From 046991d7933b6341eef9de4a6e6f345c57562a9f Mon Sep 17 00:00:00 2001 From: Andrew Vaccaro Date: Wed, 23 Aug 2023 17:58:33 +0000 Subject: [PATCH 1/5] =?UTF-8?q?=E2=86=A5=20initialized=20release-candidate?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From 1b30a048f6ed4948f23c23555515be0e22a378f5 Mon Sep 17 00:00:00 2001 From: Andrew Vaccaro Date: Mon, 28 Aug 2023 18:19:32 -0400 Subject: [PATCH 2/5] start addressing PR comments --- .github/workflows/build-dags.yml | 12 ++++++-- .github/workflows/build-fetcher-image.yml | 4 +-- .github/workflows/lint.yml | 1 + dags/README.md | 37 ++++++----------------- dags/dags/__init__.py | 5 ++- dags/dags/common.py | 2 +- 6 files changed, 27 insertions(+), 34 deletions(-) diff --git a/.github/workflows/build-dags.yml b/.github/workflows/build-dags.yml index bccd225..3b665f4 100644 --- a/.github/workflows/build-dags.yml +++ b/.github/workflows/build-dags.yml @@ -15,17 +15,25 @@ jobs: python-version: 3.11 - uses: abatilo/actions-poetry@v2 - name: run mypy and pytest + id: test working-directory: dags run: | poetry export --with=dev --without-hashes --format=requirements.txt > requirements.txt poetry run pip install -r requirements.txt poetry run mypy . + echo "VERSION=$(poetry version)" >> "$GITHUB_OUTPUT" +# un-comment and move up once we have tests; pytest exits with an exit code if no tests are found # poetry run pytest env: RAW_BUCKET: gs://this-does-not-exist-raw PARSED_BUCKET: gs://this-does-not-exist-parsed + - uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} - uses: docker/build-push-action@v4 with: context: dags - push: false - tags: 'ghcr.io/jarvusinnovations/transit-data-analytics-demo/dags:test' + push: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + tags: 'ghcr.io/jarvusinnovations/transit-data-analytics-demo/dags:${{ steps.test.outputs.VERSION }}' diff --git a/.github/workflows/build-fetcher-image.yml b/.github/workflows/build-fetcher-image.yml index 764453e..5d60be3 100644 --- a/.github/workflows/build-fetcher-image.yml +++ b/.github/workflows/build-fetcher-image.yml @@ -32,5 +32,5 @@ jobs: - uses: docker/build-push-action@v4 with: context: fetcher - push: false - tags: 'ghcr.io/jarvusinnovations/transit-data-analytics-demo/fetcher:test' + push: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + tags: 'ghcr.io/jarvusinnovations/transit-data-analytics-demo/fetcher:latest' diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index bc54525..a2e4804 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -28,6 +28,7 @@ jobs: # sqlfluff needs dbt to be set up and authenticated - uses: pre-commit/action@v3.0.0 env: + # skip sqlfluff for now; it's not authenticating to bigquery properly SKIP: sqlfluff-lint BIGQUERY_SERVICE_ACCOUNT: /tmp/keyfile DBT_PROFILES_DIR: warehouse diff --git a/dags/README.md b/dags/README.md index 43fbc04..952c2fe 100644 --- a/dags/README.md +++ b/dags/README.md @@ -4,45 +4,26 @@ This is a [Dagster](https://dagster.io/) project scaffolded with [`dagster proje ## Getting started -First, install your Dagster code location as a Python package. By using the --editable flag, pip will install your Python package in ["editable mode"](https://pip.pypa.io/en/latest/topics/local-project-installs/#editable-installs) so that as you develop, local code changes will automatically apply. - +First, ensure that poetry is installed and then install the dependencies. ```bash -pip install -e ".[dev]" +curl -sSL https://install.python-poetry.org | python3 - +poetry install ``` -Then, start the Dagster UI web server: - +Then, start the Dagster UI web server (optionally specifying a port): ```bash -dagster dev +poetry run dagster dev <--port 1234> ``` -Open http://localhost:3000 with your browser to see the project. - -You can start writing assets in `dags/assets.py`. The assets are automatically loaded into the Dagster code location as you define them. - -## Development - - -### Adding new Python dependencies - -You can specify new Python dependencies in `setup.py`. +Open http://localhost: with your browser to see the project. ### Unit testing Tests are in the `dags_tests` directory and you can run tests using `pytest`: ```bash -pytest dags_tests +poetry run pytest dags_tests ``` -### Schedules and sensors - -If you want to enable Dagster [Schedules](https://docs.dagster.io/concepts/partitions-schedules-sensors/schedules) or [Sensors](https://docs.dagster.io/concepts/partitions-schedules-sensors/sensors) for your jobs, the [Dagster Daemon](https://docs.dagster.io/deployment/dagster-daemon) process must be running. This is done automatically when you run `dagster dev`. - -Once your Dagster Daemon is running, you can start turning on schedules and sensors for your jobs. - -## Deploy on Dagster Cloud - -The easiest way to deploy your Dagster project is to use Dagster Cloud. - -Check out the [Dagster Cloud Documentation](https://docs.dagster.cloud) to learn more. +### Deployment +Dagster itself is deployed via hologit and Helm; the [values file](../kubernetes/values/prod-dagster.yml) contains any Kubernetes overrides. The dags/source code in this folder are deployed by pushing a Docker image (currently `ghcr.io/jarvusinnovations/transit-data-analytics-demo/dags:latest` built from [this folder](./Dockerfile)) that is then referenced by a user code deployment in the values. diff --git a/dags/dags/__init__.py b/dags/dags/__init__.py index 8ec3706..adaaa4b 100644 --- a/dags/dags/__init__.py +++ b/dags/dags/__init__.py @@ -26,7 +26,10 @@ class HivePartitionedPydanticGCSIOManager(PickledObjectGCSIOManager): def get_path_for_partition( self, context: Union[InputContext, OutputContext], path: UPath, partition: str ) -> "UPath": - """Override this method if you want to use a different partitioning scheme + """ + (Docs taken from parent class) + + Override this method if you want to use a different partitioning scheme (for example, if the saving function handles partitioning instead). The extension will be added later. diff --git a/dags/dags/common.py b/dags/dags/common.py index 40c02dc..5d0279d 100644 --- a/dags/dags/common.py +++ b/dags/dags/common.py @@ -141,7 +141,7 @@ def base64url(self) -> str: @property def filename(self) -> str: params_with_page = { - **{kv.key: kv.value for kv in self.config.query if kv.value}, # exclude secrets + **{kv.key: kv.value for kv in self.config.query if kv.value}, # excludes secrets **{kv.key: kv.value for kv in self.page}, } url = requests.Request(url=self.config.url, params=params_with_page).prepare().url From 26a68a6947f661af1284b18a5183dce40497c7d8 Mon Sep 17 00:00:00 2001 From: Andrew Vaccaro Date: Mon, 28 Aug 2023 20:18:18 -0400 Subject: [PATCH 3/5] only fetcher uses versioned tags --- .github/workflows/build-dags.yml | 4 +--- .github/workflows/build-fetcher-image.yml | 4 +++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-dags.yml b/.github/workflows/build-dags.yml index 3b665f4..86d15a4 100644 --- a/.github/workflows/build-dags.yml +++ b/.github/workflows/build-dags.yml @@ -15,13 +15,11 @@ jobs: python-version: 3.11 - uses: abatilo/actions-poetry@v2 - name: run mypy and pytest - id: test working-directory: dags run: | poetry export --with=dev --without-hashes --format=requirements.txt > requirements.txt poetry run pip install -r requirements.txt poetry run mypy . - echo "VERSION=$(poetry version)" >> "$GITHUB_OUTPUT" # un-comment and move up once we have tests; pytest exits with an exit code if no tests are found # poetry run pytest env: @@ -36,4 +34,4 @@ jobs: with: context: dags push: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - tags: 'ghcr.io/jarvusinnovations/transit-data-analytics-demo/dags:${{ steps.test.outputs.VERSION }}' + tags: 'ghcr.io/jarvusinnovations/transit-data-analytics-demo/dags:latest' diff --git a/.github/workflows/build-fetcher-image.yml b/.github/workflows/build-fetcher-image.yml index 5d60be3..965ee74 100644 --- a/.github/workflows/build-fetcher-image.yml +++ b/.github/workflows/build-fetcher-image.yml @@ -15,12 +15,14 @@ jobs: python-version: 3.11 - uses: abatilo/actions-poetry@v2 - name: run mypy and pytest + id: test working-directory: fetcher run: | poetry export --with=dev --without-hashes --format=requirements.txt > requirements.txt poetry run pip install -r requirements.txt poetry run mypy . poetry run pytest + echo "VERSION=$(poetry version --short)" >> "$GITHUB_OUTPUT" env: RAW_BUCKET: gs://this-does-not-exist-raw PARSED_BUCKET: gs://this-does-not-exist-parsed @@ -33,4 +35,4 @@ jobs: with: context: fetcher push: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - tags: 'ghcr.io/jarvusinnovations/transit-data-analytics-demo/fetcher:latest' + tags: 'ghcr.io/jarvusinnovations/transit-data-analytics-demo/fetcher:${{ steps.test.outputs.VERSION }}' From e75ccf0157c569f0f11a84fb6fcf0887c1a9dd05 Mon Sep 17 00:00:00 2001 From: Andrew Vaccaro Date: Tue, 29 Aug 2023 10:50:24 -0400 Subject: [PATCH 4/5] fix grafana admin pw --- kubernetes/values/prod-grafana.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kubernetes/values/prod-grafana.yml b/kubernetes/values/prod-grafana.yml index 4abcbd8..565d668 100644 --- a/kubernetes/values/prod-grafana.yml +++ b/kubernetes/values/prod-grafana.yml @@ -3,6 +3,11 @@ persistence: enabled: true +admin: + existingSecret: grafana-initial-admin + userKey: admin-user + passwordKey: admin-password + ingress: enabled: true hosts: From 9cdf10cad0f3a649828c22d0451e349d6b82be6b Mon Sep 17 00:00:00 2001 From: Andrew Vaccaro Date: Tue, 29 Aug 2023 10:52:55 -0400 Subject: [PATCH 5/5] add missing prometheus helm chart repo --- ci/tasks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/tasks.py b/ci/tasks.py index 0d4dd54..4e8b7b2 100644 --- a/ci/tasks.py +++ b/ci/tasks.py @@ -64,6 +64,7 @@ def helm_reqs(c): c.run( "helm repo add bitnami-pre-2022 https://raw.githubusercontent.com/bitnami/charts/eb5f9a9513d987b519f0ecd732e7031241c50328/bitnami" ) + c.run("helm repo add prometheus-community https://prometheus-community.github.io/helm-charts") @task