In [None]:
project_id = ""  # add your bigquery project_id
database = "test_1gb"
table = "accounts"

In [None]:
query = f"SELECT * FROM {project_id}.{database}.{table} limit 10"

In [None]:
# !uv pip install db-dtypes
# !uv pip install google-cloud-bigquery==3.24.0

In [None]:
SYFT_VERSION = ">=0.8.7.b0,<0.9"
package_string = f'"syft{SYFT_VERSION}"'
# %pip install {package_string} -q

In [None]:
# syft absolute
import syft as sy

In [None]:
node_name = "bigquery-high-side"

In [None]:
node = sy.orchestra.launch(
    name=node_name,
    dev_mode=True,
    create_producer=True,
    n_consumers=1,
    reset=True,
    port="auto",
)

In [None]:
high_client = node.login(email="info@openmined.org", password="changethis")

In [None]:
# stdlib
import json

SERVICE_ACCOUNT = json.loads(open("service_account.json").read())

In [None]:
@sy.api_endpoint_method(
    settings={"SERVICE_ACCOUNT": SERVICE_ACCOUNT},
)
def query_endpoint(context, sql: str) -> str:
    # syft absolute
    import syft as sy

    result = None
    try:
        # build up the state object so we can track a users sql submissions
        # stdlib
        from datetime import datetime

        query_time = datetime.now()

        user_email = context.user.email
        submissions = context.state.get("submissions", {})
        if user_email not in submissions:
            submissions[user_email] = {"queries": {}, "results": {}}

        query_str = sql.strip()
        # exit early
        if query_str in submissions[user_email]["queries"]:
            # they have already submitted this query so just return the existing uid
            result_ptr_id = submissions[user_email]["queries"][query_str]
            return (
                "Query submitted for approval. "
                + f'Call client.bigquery.get_result(uid="{result_ptr_id}") '
                + 'to get your result."'
            )

        # do the query
        # third party
        from google.cloud import bigquery
        from google.oauth2 import service_account

        credentials = service_account.Credentials.from_service_account_info(
            context.settings["SERVICE_ACCOUNT"]
        )
        scoped_credentials = credentials.with_scopes(
            ["https://www.googleapis.com/auth/bigquery"]
        )

        client = bigquery.Client(
            credentials=scoped_credentials,
            location="us-west1",
        )

        query_job = client.query(sql)
        # convert to pandas dataframe
        result = query_job.result().to_dataframe()
    except Exception:
        result = sy.SyftError(
            message=f"There was an error running {sql}. Please contact an admin."
        )

    try:
        # upload to blob storage
        result_obj = sy.ActionObject.from_obj(result)

        def to_blobstorage(action_obj, client):
            action_obj.syft_node_location = client.id
            action_obj.syft_client_verify_key = client.verify_key
            action_obj._save_to_blob_storage()
            action_obj.syft_action_data_cache = action_obj.as_empty_data()
            action_obj.syft_action_data_repr_ = str(action_obj.syft_action_data_type)
            action_obj.syft_action_data_str_ = str(action_obj.syft_action_data_type)
            action_obj_ptr = client.api.services.action.set(
                action_obj, add_storage_permission=True
            )
            return action_obj_ptr

        result_ptr = to_blobstorage(result_obj, context.admin_client)

        # store time and uid of result in state and return to user
        result_ptr_str = str(result_ptr.id).lower()
        submissions[user_email]["queries"][query_str] = (
            result_ptr_str  # for this function
        )
        submissions[user_email]["results"][result_ptr_str] = (
            query_time  # for the results function
        )
        context.state["submissions"] = submissions
        return (
            "Query submitted for approval. "
            + f'Call client.bigquery.get_result(uid="{str(result_ptr.id)}") '
            + 'to get your result."'
        )
    except Exception:
        # unable to generate the action object
        return sy.SyftError(
            message=f"There was an error running {sql}. Please contact an admin."
        )

In [None]:
query_endpoint.view_access

In [None]:
query_endpoint.view_access = False
query_endpoint.view_access

In [None]:
new_endpoint = sy.TwinAPIEndpoint(
    path="bigquery.query",
    mock_function=query_endpoint,
    private_function=query_endpoint,
    endpoint_timeout=60,
)

In [None]:
high_client.api.services.api.delete(endpoint_path="bigquery.query")
response = high_client.api.services.api.add(endpoint=new_endpoint)
response

In [None]:
@sy.api_endpoint(
    path="bigquery.get_result", settings={"approve_seconds": 60}, endpoint_timeout=60
)
def get_result(context, uid: str) -> str:
    # syft absolute
    import syft as sy

    try:
        # check of the user and uid are in the state
        # stdlib
        from datetime import datetime
        from datetime import timedelta

        user_email = context.user.email

        # get the submissions from the other endpoint state
        endpoint = context.admin_client.api.services.api.get(api_path="bigquery.query")
        submissions = {}
        if hasattr(endpoint, "mock_function"):
            submissions_obj = endpoint.mock_function.state
            submissions = submissions_obj.get("submissions", {})

        if user_email not in submissions:
            submissions[user_email] = {"queries": {}, "results": {}}

        uid_str = uid.strip().lower()
        if uid_str not in submissions[user_email]["results"]:
            # no uid for this user
            return f'There is no result matching {uid}. Please contact an admin."'
        request_time = submissions[user_email]["results"][uid_str]
        seconds = int(context.settings["approve_seconds"])

        delta = timedelta(seconds=seconds)
        result_ready = request_time + delta
        if datetime.now() < result_ready:
            # query not ready
            return sy.SyftNotReady(
                message=f"The request to run the query for {uid} is pending. Try again later."
            )
        # fall through
    except Exception:
        return sy.SyftError(
            message=f"There was an error fetching {uid}. Please contact an admin."
        )

    try:
        # go get the data and return it
        id = sy.UID(uid)
        action_obj = context.admin_client.api.services.action.get(uid=id)
        return action_obj
    except Exception:
        return sy.SyftError(
            message=f"There was an error fetching {uid}. Please contact an admin."
        )


high_client.api.services.api.delete(endpoint_path="bigquery.get_result")
response = high_client.api.services.api.add(endpoint=get_result)
response

In [None]:
high_client.api.services.api

In [None]:
high_client.register(
    name="Jane Doe",
    email="jane@caltech.edu",
    password="abc123",
    password_verify="abc123",
    institution="Caltech",
    website="https://www.caltech.edu/",
)

In [None]:
jane_client = high_client.login_as(email="jane@caltech.edu")

In [None]:
res = jane_client.api.bigquery.query(sql=query)
res

In [None]:
# stdlib
import re


def extract_uid(input_string):
    match = re.search(r'uid="([^"]+)"', input_string)
    if match:
        return match.group(1)
    else:
        return None


uid = extract_uid(str(res))
uid

In [None]:
res = jane_client.api.bigquery.get_result(uid=uid)
res

In [None]:
res = jane_client.api.bigquery.get_result(uid=uid)
res

In [None]:
@sy.api_endpoint_method()
def set_endpoint_state(context, endpoint_path: str, state: dict):
    # syft absolute
    import syft as sy

    if hasattr(state, "syft_action_data"):
        state = state.syft_action_data
    if not isinstance(state, dict):
        return sy.SyftError(message=f"state is {type(state)}, must be dict")
    result = context.admin_client.api.services.api.set_state(
        api_path=endpoint_path, state=state, both=True
    )
    return result


@sy.api_endpoint_method()
def empty_mock(context, endpoint_path: str, state: dict):
    return "not allowed"


new_endpoint = sy.TwinAPIEndpoint(
    path="state.set",
    mock_function=empty_mock,
    private_function=set_endpoint_state,
)

high_client.api.services.api.delete(endpoint_path="state.set")
high_client.api.services.api.add(endpoint=new_endpoint)

In [None]:
# get the state
current_state = high_client.api.services.api.get(
    api_path="bigquery.query"
).mock_function.state
current_state

In [None]:
# reset the state
high_client.api.services.state.set(endpoint_path="bigquery.query", state={})

In [None]:
high_client.api.services.api.get(api_path="bigquery.query").mock_function.state

In [None]:
current_state

In [None]:
# restore it
high_client.api.services.state.set(endpoint_path="bigquery.query", state=current_state)

In [None]:
high_client.api.services.api.get(api_path="bigquery.query").mock_function.state