Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/aqueducthq/aqueduct into en…
Browse files Browse the repository at this point in the history
…g-3000-allow-users-to-opt-out-of-data-snapshots-1
  • Loading branch information
likawind committed Jun 6, 2023
2 parents 836e1d2 + 740261a commit 343573c
Show file tree
Hide file tree
Showing 15 changed files with 950 additions and 162 deletions.
321 changes: 321 additions & 0 deletions examples/system-tests/Many_Small_Workflows.ipynb

Large diffs are not rendered by default.

119 changes: 119 additions & 0 deletions examples/system-tests/Simple_Model_Large_Data.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "bc7d2d08",
"metadata": {},
"outputs": [],
"source": [
"import aqueduct as aq\n",
"from aqueduct.constants.enums import ArtifactType"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9460bb78",
"metadata": {},
"outputs": [],
"source": [
"client = aq.Client(api_key=\"\", aqueduct_address=\"\")\n",
"\n",
"\n",
"aq.global_config({'engine':'databricks_resource', 'lazy':True})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eb9e417e",
"metadata": {},
"outputs": [],
"source": [
"#This is working with large data > 50GB.\n",
"snowflake_warehouse = client.resource('snowflake_resource')\n",
"hotel_reviews = snowflake_warehouse.sql('SELECT * FROM large_hotel_reviews;')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fa652f83",
"metadata": {},
"outputs": [],
"source": [
"@aq.op(requirements=[])\n",
"def dummy(original_df, num_rows):\n",
" from pyspark.sql.functions import monotonically_increasing_id\n",
" from pyspark.sql.functions import rand\n",
" import math\n",
" \n",
" original_row_count = original_df.count()\n",
" num_partitions = int(math.ceil(num_rows / original_row_count))\n",
"\n",
" # Step 2: Repartition the DataFrame\n",
" replicated_df = original_df.repartition(num_partitions)\n",
"\n",
" # Step 3: Persist the DataFrame\n",
" replicated_df.persist()\n",
"\n",
" # Step 4: Duplicate the rows\n",
" while replicated_df.count() < num_rows:\n",
" replicated_df = replicated_df.union(replicated_df)\n",
"\n",
" print(replicated_df.count())\n",
"\n",
" return replicated_df\n",
"\n",
"generated_df = dummy(hotel_reviews, 10000000000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "324fb630",
"metadata": {},
"outputs": [],
"source": [
"snowflake_warehouse.save(generated_df, table_name=\"large_hotel_reviews\", update_mode=\"replace\")\n",
"\n",
"\n",
"client.publish_flow(\n",
" \"Creating_Large_Dataset\",\n",
" \"repartition hotel_reviews to create big dataset\",\n",
" artifacts=[generated_df],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7c9e16e2",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
178 changes: 178 additions & 0 deletions examples/system-tests/Simple_Model_Medium_Data_K8s.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "f6a9bd6c",
"metadata": {},
"outputs": [],
"source": [
"num_rows = 10000000\n",
"DATA_SIZE_GB = 5\n",
"\n",
"flow_name = f\"Test {DATA_SIZE_GB}GB\"\n",
"print(\"Testing: \", flow_name)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5ccef41d",
"metadata": {},
"outputs": [],
"source": [
"import aqueduct as aq\n",
"from aqueduct import op"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d1d0d83e",
"metadata": {},
"outputs": [],
"source": [
"address = \"http://localhost:8080\"\n",
"\n",
"api_key = aq.get_apikey()\n",
"client = aq.Client(api_key, address)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8c78cdc9",
"metadata": {},
"outputs": [],
"source": [
"aq.global_config({ \"engine\": \"k8s_2\" })"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "55db48ab",
"metadata": {},
"outputs": [],
"source": [
"warehouse = client.resource(name=\"sf\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "14d8427b",
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"\n",
"d = warehouse.sql(f\"select * from LARGE_HOTEL_REVIEWS_STAGING_2041977877 LIMIT {num_rows}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "86833b68",
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"\n",
"d.get()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "39d43c46",
"metadata": {},
"outputs": [],
"source": [
"df = d.get()\n",
"\n",
"def actual_size():\n",
" size_bytes = df.memory_usage(deep=True).sum()\n",
" \n",
" # Convert bytes to gigabytes\n",
" size_gb = size_bytes / 1024**3\n",
" \n",
" return size_gb\n",
"\n",
"actual_size()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "704cf8fa",
"metadata": {},
"outputs": [],
"source": [
"@op(\n",
" requirements=[]\n",
")\n",
"def foo(df):\n",
" return df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "27aa1787",
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"\n",
"out = foo(d)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "05c01f1c",
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"\n",
"client.publish_flow(flow_name, artifacts=[out])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c512d804",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
},
"vscode": {
"interpreter": {
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}
3 changes: 3 additions & 0 deletions scripts/generate_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ echo "### package aqueduct
* [\`resources.databricks\`](https://docs.aqueducthq.com/api-reference/sdk-reference/package-aqueduct/package-aqueduct.resources/aqueduct.resources.databricks)
* [\`resources.dynamic\_k8s\`](https://docs.aqueducthq.com/api-reference/sdk-reference/package-aqueduct/package-aqueduct.resources/aqueduct.resources.dynamic\_k8s)
* [\`resources.ecr\`](https://docs.aqueducthq.com/api-reference/sdk-reference/package-aqueduct/package-aqueduct.resources/aqueduct.resources.ecr)
* [\`resources.gar\`](https://docs.aqueducthq.com/api-reference/sdk-reference/package-aqueduct/package-aqueduct.resources/aqueduct.resources.gar)
* [\`resources.k8s\`](https://docs.aqueducthq.com/api-reference/sdk-reference/package-aqueduct/package-aqueduct.resources/aqueduct.resources.k8s)
* [\`resources.google\_sheets\`](https://docs.aqueducthq.com/api-reference/sdk-reference/package-aqueduct/package-aqueduct.resources/aqueduct.resources.google\_sheets)
* [\`resources.mongodb\`](https://docs.aqueducthq.com/api-reference/sdk-reference/package-aqueduct/package-aqueduct.resources/aqueduct.resources.mongodb)
Expand Down Expand Up @@ -85,6 +86,7 @@ echo "### package aqueduct.resources
* [\`resources.databricks\`](https://docs.aqueducthq.com/api-reference/sdk-reference/package-aqueduct/package-aqueduct.resources/aqueduct.resources.databricks)
* [\`resources.dynamic\_k8s\`](https://docs.aqueducthq.com/api-reference/sdk-reference/package-aqueduct/package-aqueduct.resources/aqueduct.resources.dynamic\_k8s)
* [\`resources.ecr\`](https://docs.aqueducthq.com/api-reference/sdk-reference/package-aqueduct/package-aqueduct.resources/aqueduct.resources.ecr)
* [\`resources.gar\`](https://docs.aqueducthq.com/api-reference/sdk-reference/package-aqueduct/package-aqueduct.resources/aqueduct.resources.gar)
* [\`resources.k8s\`](https://docs.aqueducthq.com/api-reference/sdk-reference/package-aqueduct/package-aqueduct.resources/aqueduct.resources.k8s)
* [\`resources.google\_sheets\`](https://docs.aqueducthq.com/api-reference/sdk-reference/package-aqueduct/package-aqueduct.resources/aqueduct.resources.google\_sheets)
* [\`resources.mongodb\`](https://docs.aqueducthq.com/api-reference/sdk-reference/package-aqueduct/package-aqueduct.resources/aqueduct.resources.mongodb)
Expand All @@ -98,6 +100,7 @@ pydoc-markdown -I . --render-toc -m aqueduct.resources.aws_lambda > docs/package
pydoc-markdown -I . --render-toc -m aqueduct.resources.databricks > docs/package-aqueduct.resources/aqueduct.resources.databricks.md
pydoc-markdown -I . --render-toc -m aqueduct.resources.dynamic_k8s > docs/package-aqueduct.resources/aqueduct.resources.dynamic_k8s.md
pydoc-markdown -I . --render-toc -m aqueduct.resources.ecr > docs/package-aqueduct.resources/aqueduct.resources.ecr.md
pydoc-markdown -I . --render-toc -m aqueduct.resources.gar > docs/package-aqueduct.resources/aqueduct.resources.gar.md
pydoc-markdown -I . --render-toc -m aqueduct.resources.k8s > docs/package-aqueduct.resources/aqueduct.resources.k8s.md
pydoc-markdown -I . --render-toc -m aqueduct.resources.google_sheets > docs/package-aqueduct.resources/aqueduct.resources.google_sheets.md
pydoc-markdown -I . --render-toc -m aqueduct.resources.mongodb > docs/package-aqueduct.resources/aqueduct.resources.mongodb.md
Expand Down
3 changes: 3 additions & 0 deletions src/ui/app/.eslintignore
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
dist
node_modules/*
.parcel-cache/*
assets/*
61 changes: 32 additions & 29 deletions src/ui/app/.eslintrc.js
Original file line number Diff line number Diff line change
@@ -1,35 +1,38 @@
// Source for this ESLint configuration:
// https://robertcooper.me/post/using-eslint-and-prettier-in-a-typescript-project
module.exports = {
parser: "@typescript-eslint/parser",
// Specifies the ESLint parser
parserOptions: {
ecmaVersion: 2020,
// Allows for the parsing of modern ECMAScript features
sourceType: "module",
// Allows for the use of imports
ecmaFeatures: {
jsx: true // Allows for the parsing of JSX
parser: '@typescript-eslint/parser',
// Specifies the ESLint parser
parserOptions: {
ecmaVersion: 2020,
// Allows for the parsing of modern ECMAScript features
sourceType: 'module',
// Allows for the use of imports
ecmaFeatures: {
jsx: true, // Allows for the parsing of JSX
},
},

}
},
settings: {
react: {
version: "detect" // Tells eslint-plugin-react to automatically detect the version of React to use
settings: {
react: {
version: 'detect', // Tells eslint-plugin-react to automatically detect the version of React to use
},
},

}
},
extends: ["plugin:react/recommended", "plugin:@typescript-eslint/recommended", "plugin:prettier/recommended"],
plugins: ['simple-import-sort', "unused-imports"],
rules: {
// Place to specify ESLint rules. Can be used to overwrite rules specified from the extended configs
// e.g. "@typescript-eslint/explicit-function-return-type": "off",
// Since we're using Typescript, checking prop-types is no longer needed. This line stops the prop-types errors from happening during lint.
'react/prop-types': 0,
'simple-import-sort/imports': 'error',
'simple-import-sort/exports': 'error',
'no-unused-vars': 'off',
'unused-imports/no-unused-imports': 'error',
'react/jsx-child-element-spacing': 'off'
}
extends: ['plugin:react/recommended', 'plugin:@typescript-eslint/recommended', 'plugin:prettier/recommended'],
plugins: ['simple-import-sort', 'unused-imports', 'react-hooks'],
rules: {
// Place to specify ESLint rules. Can be used to overwrite rules specified from the extended configs
// e.g. "@typescript-eslint/explicit-function-return-type": "off",
// Since we're using Typescript, checking prop-types is no longer needed. This line stops the prop-types errors from happening during lint.
'react/prop-types': 0,
'simple-import-sort/imports': 'error',
'simple-import-sort/exports': 'error',
'no-unused-vars': 'off',
'unused-imports/no-unused-imports': 'error',
'react/jsx-child-element-spacing': 'off',
'react-hooks/rules-of-hooks': 'error',
// Checks rules of Hooks
'react-hooks/exhaustive-deps': 'warn', // Checks effect dependencies
},
};
Loading

0 comments on commit 343573c

Please sign in to comment.