-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feature/add cubejs tasks #5280
Feature/add cubejs tasks #5280
Changes from 70 commits
379270d
a621fa7
f56917a
768c500
8ec54c9
4e8a10c
a7980c2
17b2856
d8a27b9
f82cd1a
4fea3aa
05c3d82
8aec26d
490a6d3
0232294
03a2790
2764172
3b5b85f
6e3b9e1
9c573be
d6c7fb0
d887f19
2382d4c
974f92a
bef5d58
4cd192b
955bc26
ec71072
081579e
962cc0b
e7a8df6
eb942f1
02c226c
2592fd6
cdbcc24
4d1ac7b
cc174ac
f058cff
de8cafe
8f7cdd6
147d73c
aca74e5
aa507e2
1b188eb
98d15fb
b4a38f2
33d1e79
aaa6f43
a4ac00d
14e94ab
fb0fb66
aa560a2
611a8aa
0cd78c4
10c9c07
2542610
ffcd771
0a1f088
bd2dba3
8da7d50
59bfa96
8209521
27b03e4
f5179ff
5cb6318
d8f613d
cf0b263
da59d3d
72b8a5b
56c4ad1
100fa87
335a407
92187f8
f55aa84
944045e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
feature: | ||
- "Added Cube.js Query Task - [#5280](https://github.com/PrefectHQ/prefect/pull/5280)" | ||
|
||
contributor: | ||
- "[Alessandro Lollo](https://github.com/AlessandroLollo)" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
black | ||
graphviz >= 0.8 | ||
jinja2 >= 2.0, < 4.0 | ||
mypy >= 0.600, < 0.813 | ||
mypy >= 0.600, < 0.931 | ||
Pygments >= 2.2, < 3.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
""" | ||
This is a collection of tasks to interact with a Cube.js or Cube Cloud environment. | ||
""" | ||
|
||
try: | ||
from prefect.tasks.cubejs.cubejs_tasks import CubeJSQueryTask | ||
except ImportError as err: | ||
raise ImportError( | ||
'prefect.tasks.cubejs` requires Prefect to be installed with the "cubejs" extra.' | ||
) from err |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,203 @@ | ||||||
import os | ||||||
from requests import Session | ||||||
from prefect import Task | ||||||
from prefect.utilities.tasks import defaults_from_attrs | ||||||
from prefect.engine.signals import FAIL | ||||||
import json | ||||||
import jwt | ||||||
import time | ||||||
from typing import Dict, List, Union | ||||||
|
||||||
|
||||||
class CubeJSQueryTask(Task): | ||||||
|
||||||
""" | ||||||
This task calls Cueb.js load API and returns the result | ||||||
as a JSON object. | ||||||
More info about Cube.js load API at | ||||||
https://cube.dev/docs/rest-api#api-reference-v-1-load. | ||||||
|
||||||
Args: | ||||||
- subdomain (str, optional): The subdomain to use to get the data. | ||||||
If provided, `subdomain` takes precedence over `url`. | ||||||
This is likely to be useful to Cube Cloud users. | ||||||
- url (str, optional): The URL to use to get the data. | ||||||
This is likely to be useful to users of self-hosted Cube.js. | ||||||
- api_secret (str, optional): The API secret used to generate an | ||||||
API token for authentication. | ||||||
If provided, it takes precedence over `api_secret_env_var`. | ||||||
- api_secret_env_var (str, optional): The name of the env var that contains | ||||||
the API secret to use to generate an API token for authentication. | ||||||
Defaults to `CUBEJS_API_SECRET`. | ||||||
- query (dict, list, optional): `dict` or `list` representing | ||||||
valid Cube.js queries. | ||||||
If you pass multiple queries, then be aware of Cube.js Data Blending. | ||||||
More info at https://cube.dev/docs/rest-api#api-reference-v-1-load | ||||||
and at https://cube.dev/docs/schema/advanced/data-blending. | ||||||
Query format can be found at: https://cube.dev/docs/query-format. | ||||||
- security_context (str, dict, optional): The security context to use | ||||||
during authentication. | ||||||
If the security context does not contain an expiration period, | ||||||
then a 7-day expiration period is added automatically. | ||||||
More info at: https://cube.dev/docs/security/context. | ||||||
- max_wait_time (int, optional): The number of seconds to wait for the | ||||||
Cube.js load API to return a response. | ||||||
- **kwargs (optional): Additional keyword arguments to pass to the | ||||||
standard Task initalization. | ||||||
|
||||||
""" | ||||||
|
||||||
__CUBEJS_CLOUD_BASE_URL = "https://{subdomain}.cubecloud.dev" | ||||||
|
||||||
def __init__( | ||||||
self, | ||||||
subdomain: str = None, | ||||||
url: str = None, | ||||||
api_secret: str = None, | ||||||
api_secret_env_var: str = "CUBEJS_API_SECRET", | ||||||
query: Union[Dict, List[Dict]] = None, | ||||||
security_context: Union[str, Dict] = None, | ||||||
max_wait_time: int = None, | ||||||
**kwargs, | ||||||
): | ||||||
self.subdomain = subdomain | ||||||
self.url = url | ||||||
self.api_secret = api_secret | ||||||
self.api_secret_env_var = api_secret_env_var | ||||||
self.query = query | ||||||
self.security_context = security_context | ||||||
self.max_wait_time = max_wait_time | ||||||
super().__init__(**kwargs) | ||||||
|
||||||
@defaults_from_attrs( | ||||||
"subdomain", | ||||||
"url", | ||||||
"api_secret", | ||||||
"api_secret_env_var", | ||||||
"query", | ||||||
"security_context", | ||||||
"max_wait_time", | ||||||
) | ||||||
def run( | ||||||
self, | ||||||
subdomain: str = None, | ||||||
url: str = None, | ||||||
api_secret: str = None, | ||||||
api_secret_env_var: str = "CUBEJS_API_SECRET", | ||||||
query: Union[Dict, List[Dict]] = None, | ||||||
security_context: Union[str, Dict] = None, | ||||||
max_wait_time: int = None, | ||||||
): | ||||||
""" | ||||||
Task run method to perform a query using Cube.js load API. | ||||||
|
||||||
Args: | ||||||
- subdomain (str, optional): The subdomain to use to get the data. | ||||||
If provided, `subdomain` takes precedence over `url`. | ||||||
This is likely to be useful to Cube Cloud users. | ||||||
- url (str, optional): The URL to use to get the data. | ||||||
This is likely to be useful to users of self-hosted Cube.js. | ||||||
- api_secret (str, optional): The API secret used to generate an | ||||||
API token for authentication. | ||||||
If provided, it takes precedence over `api_secret_env_var`. | ||||||
- api_secret_env_var (str, optional): The name of the env var that contains | ||||||
the API secret to use to generate an API token for authentication. | ||||||
Defaults to `CUBEJS_API_SECRET`. | ||||||
- query (dict, list, optional): `dict` or `list` representing | ||||||
valid Cube.js queries. | ||||||
If you pass multiple queries, then be aware of Cube.js Data Blending. | ||||||
More info at https://cube.dev/docs/rest-api#api-reference-v-1-load | ||||||
and at https://cube.dev/docs/schema/advanced/data-blending. | ||||||
Query format can be found at: https://cube.dev/docs/query-format. | ||||||
- security_context (str, dict, optional): The security context to use | ||||||
during authentication. | ||||||
If the security context does not contain an expiration period, | ||||||
then a 7-day expiration period is added automatically. | ||||||
More info at https://cube.dev/docs/security/context. | ||||||
- max_wait_time (int, optional): The number of seconds to wait for the | ||||||
Cube.js load API to return a response. | ||||||
|
||||||
Raises: | ||||||
- ValueError if both `subdomain` and `url` are missing. | ||||||
- ValueError if `api_token` is missing and `api_token_env_var` cannot be found. | ||||||
- ValueError if `query` is missing. | ||||||
- `prefect.engine.signals.FAIL` if the Cube.js load API fails. | ||||||
- `prefect.engine.signals.FAIL` if the Cube.js load API takes more than | ||||||
`max_wait_time` seconds to respond. | ||||||
|
||||||
Returns: | ||||||
- The Cube.js JSON response. | ||||||
|
||||||
""" | ||||||
|
||||||
if not subdomain and not url: | ||||||
raise ValueError("Missing both `subdomain` and `url`.") | ||||||
|
||||||
if not api_secret and api_secret_env_var not in os.environ: | ||||||
raise ValueError("Missing `api_secret` and `api_secret_env_var` not found.") | ||||||
|
||||||
if not query: | ||||||
raise ValueError("Missing `query`.") | ||||||
|
||||||
cube_base_url = self.__CUBEJS_CLOUD_BASE_URL | ||||||
if subdomain: | ||||||
cube_base_url = f"{cube_base_url.format(subdomain=subdomain)}/cubejs-api" | ||||||
else: | ||||||
cube_base_url = url | ||||||
query_api_url = f"{cube_base_url}/v1/load" | ||||||
|
||||||
self.logger.debug(f"Query URL: {query_api_url}") | ||||||
|
||||||
secret = api_secret if api_secret else os.environ[api_secret_env_var] | ||||||
|
||||||
if security_context: | ||||||
|
||||||
extended_context = security_context | ||||||
if "exp" not in security_context and "expiresIn" not in security_context: | ||||||
extended_context["epxiresIn"] = "7d" | ||||||
api_token = jwt.encode( | ||||||
payload=extended_context, key=secret, algorithm="HS256" | ||||||
) | ||||||
|
||||||
self.logger.debug("JWT token generated with security context.") | ||||||
|
||||||
else: | ||||||
api_token = jwt.encode(payload={}, key=secret) | ||||||
|
||||||
session = Session() | ||||||
session.headers = { | ||||||
"Content-type": "application/json", | ||||||
"Authorization": api_token, | ||||||
} | ||||||
|
||||||
params = {"query": json.dumps(query)} | ||||||
|
||||||
wait_time_between_api_calls = 10 | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we make this configurable via a task parameter and default it to 10? |
||||||
elapsed_wait_time = 0 | ||||||
while not max_wait_time or elapsed_wait_time <= max_wait_time: | ||||||
|
||||||
with session.get(url=query_api_url, params=params) as response: | ||||||
self.logger.debug(f"URL is: {response.url}") | ||||||
|
||||||
if response.status_code == 200: | ||||||
data = response.json() | ||||||
|
||||||
if "error" in data.keys() and "Continue wait" in data["error"]: | ||||||
msg = ( | ||||||
"Cube.js load API still running." | ||||||
"Waiting {wait_time_between_api_calls} seconds before retrying" | ||||||
) | ||||||
self.logger.warning(msg) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
time.sleep(wait_time_between_api_calls) | ||||||
elapsed_wait_time += wait_time_between_api_calls | ||||||
continue | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you could add a parameter to tune the number of subsequent retries and raise in case of counted failures reach the ceil There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've added a |
||||||
|
||||||
else: | ||||||
return data | ||||||
|
||||||
else: | ||||||
raise FAIL( | ||||||
message=f"Cube.js load API failed!. Error is: {response.reason}" | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
) | ||||||
|
||||||
raise FAIL(message="Cube.js load API took too long to provide a response.") | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,135 @@ | ||||||
import logging | ||||||
import pytest | ||||||
from prefect.engine.signals import FAIL | ||||||
from prefect.tasks.cubejs import CubeJSQueryTask | ||||||
|
||||||
import responses | ||||||
|
||||||
|
||||||
class TestCubeJSQueryTask: | ||||||
def test_construction_no_values(self): | ||||||
cubejs_task = CubeJSQueryTask() | ||||||
|
||||||
assert cubejs_task.subdomain is None | ||||||
assert cubejs_task.url is None | ||||||
assert cubejs_task.api_secret is None | ||||||
assert cubejs_task.api_secret_env_var == "CUBEJS_API_SECRET" | ||||||
assert cubejs_task.query is None | ||||||
assert cubejs_task.security_context is None | ||||||
|
||||||
def test_construction_with_values(self): | ||||||
cubejs_task = CubeJSQueryTask( | ||||||
subdomain="foo", | ||||||
url="http://bar", | ||||||
api_secret="secret", | ||||||
api_secret_env_var="secret_env_var", | ||||||
query="query", | ||||||
security_context={"foo": "bar"}, | ||||||
) | ||||||
|
||||||
assert cubejs_task.subdomain == "foo" | ||||||
assert cubejs_task.url == "http://bar" | ||||||
assert cubejs_task.api_secret == "secret" | ||||||
assert cubejs_task.api_secret_env_var == "secret_env_var" | ||||||
assert cubejs_task.query == "query" | ||||||
assert cubejs_task.security_context == {"foo": "bar"} | ||||||
|
||||||
def test_run_with_no_values_raises(self): | ||||||
cubejs_task = CubeJSQueryTask() | ||||||
with pytest.raises(ValueError) as exc: | ||||||
cubejs_task.run() | ||||||
|
||||||
assert "Missing both `subdomain` and `url`." in str(exc) | ||||||
|
||||||
def test_run_without_api_secret_api_secret_env_var(self): | ||||||
cubejs_task = CubeJSQueryTask() | ||||||
with pytest.raises(ValueError) as exc: | ||||||
cubejs_task.run(subdomain="foo") | ||||||
|
||||||
assert "Missing `api_secret` and `api_secret_env_var` not found." in str(exc) | ||||||
|
||||||
def test_run_without_query_raises(self): | ||||||
cubejs_task = CubeJSQueryTask() | ||||||
with pytest.raises(ValueError) as exc: | ||||||
cubejs_task.run(subdomain="foo", api_secret="bar") | ||||||
|
||||||
assert "Missing `query`." in str(exc) | ||||||
|
||||||
@responses.activate | ||||||
def test_run_with_failing_api_raises(self): | ||||||
cubejs_task = CubeJSQueryTask() | ||||||
responses.add( | ||||||
responses.GET, "https://test.cubecloud.dev/cubejs-api/v1/load", status=123 | ||||||
) | ||||||
|
||||||
with pytest.raises(FAIL) as exc: | ||||||
cubejs_task.run(subdomain="test", api_secret="foo", query="query") | ||||||
|
||||||
assert "Cube.js load API failed!." in str(exc) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
@responses.activate | ||||||
def test_run_with_continue_waiting(self, caplog): | ||||||
caplog.set_level(logging.DEBUG) | ||||||
cubejs_task = CubeJSQueryTask() | ||||||
|
||||||
responses.add( | ||||||
responses.GET, | ||||||
"https://test.cubecloud.dev/cubejs-api/v1/load", | ||||||
status=200, | ||||||
json={"error": "Continue wait"}, | ||||||
) | ||||||
|
||||||
responses.add( | ||||||
responses.GET, | ||||||
"https://test.cubecloud.dev/cubejs-api/v1/load", | ||||||
status=200, | ||||||
json={"data": "result"}, | ||||||
) | ||||||
|
||||||
data = cubejs_task.run(subdomain="test", api_secret="foo", query="query") | ||||||
|
||||||
assert "Cube.js load API still running." in caplog.text | ||||||
assert isinstance(data, dict) | ||||||
|
||||||
@responses.activate | ||||||
def test_run_with_security_context(self, caplog): | ||||||
caplog.set_level(logging.DEBUG) | ||||||
cubejs_task = CubeJSQueryTask() | ||||||
|
||||||
responses.add( | ||||||
responses.GET, | ||||||
"https://test.cubecloud.dev/cubejs-api/v1/load", | ||||||
status=200, | ||||||
json={"data": "result"}, | ||||||
) | ||||||
|
||||||
cubejs_task.run( | ||||||
subdomain="test", | ||||||
api_secret="foo", | ||||||
query="query", | ||||||
security_context={"foo": "bar"}, | ||||||
) | ||||||
|
||||||
assert "JWT token generated with security context." in caplog.text | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For this test I would recommend inspecting the JWT that was included in the request made by the task so that inspecting the logs isn't necessary. You should be able to access the request headers via |
||||||
|
||||||
@responses.activate | ||||||
def test_run_with_max_wait_time_raises(self): | ||||||
cubejs_task = CubeJSQueryTask() | ||||||
|
||||||
responses.add( | ||||||
responses.GET, | ||||||
"https://test.cubecloud.dev/cubejs-api/v1/load", | ||||||
status=200, | ||||||
json={"error": "Continue wait"}, | ||||||
) | ||||||
|
||||||
with pytest.raises(FAIL) as exc: | ||||||
cubejs_task.run( | ||||||
subdomain="test", | ||||||
api_secret="foo", | ||||||
query="query", | ||||||
security_context={"foo": "bar"}, | ||||||
max_wait_time=15, | ||||||
) | ||||||
|
||||||
assert "Cube.js load API took too long to provide a response." in str(exc) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Having a configurable time between calls would be useful for this test since it could spend less time waiting. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We're pinning below this version because we don't have separate typeshed installations. Is there a reason you bumped this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure why I did this.
Probably not intentional.
Anyway, I should have reverted back to
mypy >= 0.600, 0.813