In [None]:
# syft absolute
import syft as sy
from syft import test_settings

In [None]:
# import os
# os.environ["ORCHESTRA_DEPLOYMENT_TYPE"] = "remote"
# os.environ["DEV_MODE"] = "True"
# os.environ["TEST_EXTERNAL_REGISTRY"] = "k3d-registry.localhost:5800"

In [None]:
# import os
# os.environ["TEST_BIGQUERY_APIS_LIVE"] = "True"
# third party
from apis import make_test_query

In [None]:
server = sy.orchestra.launch(
    name="bigquery-high",
    dev_mode=True,
    server_side_type="high",
    port="8080",
    n_consumers=1,  # How many workers to be spawned
    create_producer=True,  # Can produce more workers
)

In [None]:
this_worker_pool_name = "bigquery-pool"

In [None]:
high_client = sy.login(
    url="http://localhost:8080", email="info@openmined.org", password="changethis"
)

Check if `google-cloud-bigquery` is installed, if not then install it

In [None]:
# stdlib
import subprocess

# Check if google-cloud-bigquery is already installed
result = subprocess.run(["pip", "list"], capture_output=True, text=True)

if "google-cloud-bigquery" in result.stdout:
    print("google-cloud-bigquery is already installed, doing nothing.")
else:
    # Install the necessary packages
    print("Installing db-dtypes and google-cloud-bigquery...")
    subprocess.run(["pip", "install", "db-dtypes", "google-cloud-bigquery"], check=True)

In [None]:
# Look up the worker pools and identify the name of the one that has the required packages
# After, bind the endpoint to that workerpool
high_client.worker_pools

In [None]:
assert len(high_client.worker_pools.get_all()) == 2

In [None]:
mock_func = make_test_query(
    settings={
        "rate_limiter_enabled": True,
        "calls_per_min": 2,
        "print_statement": "this is a MOCK test query",
    }
)

In [None]:
private_func = make_test_query(
    settings={
        "rate_limiter_enabled": True,
        "calls_per_min": 2,
        "print_statement": "this is a PRIVATE test query",
    }
)

In [None]:
new_endpoint = sy.TwinAPIEndpoint(
    path="bigquery.test_query",
    description="This endpoint allows to query Bigquery storage via SQL queries.",
    private_function=private_func,
    mock_function=mock_func,
    worker_pool=this_worker_pool_name,
)

high_client.custom_api.add(endpoint=new_endpoint)

In [None]:
# Here, we update the endpoint to timeout after 100s (rather the default of 60s)
high_client.api.services.api.update(
    endpoint_path="bigquery.test_query", endpoint_timeout=120
)

In [None]:
high_client.api.services.api.update(
    endpoint_path="bigquery.test_query", hide_mock_definition=True
)

In [None]:
dataset_1 = test_settings.get("dataset_1", default="dataset_1")
dataset_2 = test_settings.get("dataset_2", default="dataset_2")
table_1 = test_settings.get("table_1", default="table_1")
table_2 = test_settings.get("table_2", default="table_2")
table_2_col_id = test_settings.get("table_2_col_id", default="table_id")
table_2_col_score = test_settings.get("table_2_col_score", default="colname")

In [None]:
# Test mock version
result = high_client.api.services.bigquery.test_query.mock(
    sql_query=f"SELECT * FROM {dataset_1}.{table_1} LIMIT 10"
)
result

In [None]:
assert len(result) == 10

In [None]:
# Test mock version: rate limit has been reached
with sy.raises(
    sy.SyftException(
        public_message="*Rate limit of calls per minute has been reached.*"
    ),
    show=True,
):
    high_client.api.services.bigquery.test_query.mock(
        sql_query=f"SELECT * FROM {dataset_1}.{table_1} LIMIT 7"
    )