From db479cf80002af0714649c69b45336b0e7e3fabe Mon Sep 17 00:00:00 2001 From: Adler Santos Date: Thu, 20 Jan 2022 16:15:21 -0500 Subject: [PATCH] feat: Onboard EUMETSAT Solar Forecasting dataset (#273) * feat: YAML config files for EUMETSAT dataset * feat: generate TF files for EUMETSAT * feat: generate DAG for EUMETSAT --- .../eumetsat/_terraform/eumetsat_dataset.tf | 32 ++++++++++++ datasets/eumetsat/_terraform/provider.tf | 28 ++++++++++ datasets/eumetsat/_terraform/variables.tf | 23 ++++++++ datasets/eumetsat/dataset.yaml | 26 ++++++++++ .../eumetsat/solar_forecasting/pipeline.yaml | 33 ++++++++++++ .../solar_forecasting_dag.py | 52 +++++++++++++++++++ 6 files changed, 194 insertions(+) create mode 100644 datasets/eumetsat/_terraform/eumetsat_dataset.tf create mode 100644 datasets/eumetsat/_terraform/provider.tf create mode 100644 datasets/eumetsat/_terraform/variables.tf create mode 100644 datasets/eumetsat/dataset.yaml create mode 100644 datasets/eumetsat/solar_forecasting/pipeline.yaml create mode 100644 datasets/eumetsat/solar_forecasting/solar_forecasting_dag.py diff --git a/datasets/eumetsat/_terraform/eumetsat_dataset.tf b/datasets/eumetsat/_terraform/eumetsat_dataset.tf new file mode 100644 index 000000000..39a12d40a --- /dev/null +++ b/datasets/eumetsat/_terraform/eumetsat_dataset.tf @@ -0,0 +1,32 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +resource "google_storage_bucket" "eumetsat-solar-forecasting" { + name = "${var.bucket_name_prefix}-eumetsat-solar-forecasting" + force_destroy = true + location = "US" + uniform_bucket_level_access = true + lifecycle { + ignore_changes = [ + logging, + ] + } +} + +output "storage_bucket-eumetsat-solar-forecasting-name" { + value = google_storage_bucket.eumetsat-solar-forecasting.name +} diff --git a/datasets/eumetsat/_terraform/provider.tf b/datasets/eumetsat/_terraform/provider.tf new file mode 100644 index 000000000..23ab87dcd --- /dev/null +++ b/datasets/eumetsat/_terraform/provider.tf @@ -0,0 +1,28 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +provider "google" { + project = var.project_id + impersonate_service_account = var.impersonating_acct + region = var.region +} + +data "google_client_openid_userinfo" "me" {} + +output "impersonating-account" { + value = data.google_client_openid_userinfo.me.email +} diff --git a/datasets/eumetsat/_terraform/variables.tf b/datasets/eumetsat/_terraform/variables.tf new file mode 100644 index 000000000..c3ec7c506 --- /dev/null +++ b/datasets/eumetsat/_terraform/variables.tf @@ -0,0 +1,23 @@ +/** + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +variable "project_id" {} +variable "bucket_name_prefix" {} +variable "impersonating_acct" {} +variable "region" {} +variable "env" {} + diff --git a/datasets/eumetsat/dataset.yaml b/datasets/eumetsat/dataset.yaml new file mode 100644 index 000000000..553db169c --- /dev/null +++ b/datasets/eumetsat/dataset.yaml @@ -0,0 +1,26 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +dataset: + name: eumetsat + friendly_name: ~ + description: ~ + dataset_sources: ~ + terms_of_use: ~ +resources: + + - type: storage_bucket + name: "eumetsat-solar-forecasting" + uniform_bucket_level_access: True + location: US diff --git a/datasets/eumetsat/solar_forecasting/pipeline.yaml b/datasets/eumetsat/solar_forecasting/pipeline.yaml new file mode 100644 index 000000000..39012b7dc --- /dev/null +++ b/datasets/eumetsat/solar_forecasting/pipeline.yaml @@ -0,0 +1,33 @@ +resources: ~ + +dag: + airflow_version: 2 + initialize: + dag_id: solar_forecasting + default_args: + owner: "Google" + depends_on_past: False + start_date: '2022-01-19' + max_active_runs: 1 + schedule_interval: "@once" + catchup: False + default_view: graph + + tasks: + - operator: "CloudDataTransferServiceGCSToGCSOperator" + description: "Task to run a GCS to GCS operation using Google resources" + args: + task_id: copy_gcs_bucket + timeout: 43200 # 12 hours + retries: 0 + wait: True + project_id: bigquery-public-data + source_bucket: "{{ var.json.eumetsat.solar_forecasting.source_bucket }}" + object_conditions: + includePrefixes: + - satellite/EUMETSAT/SEVIRI_RSS/v3/eumetsat_seviri_hrv_uk.zarr + destination_bucket: "{{ var.json.eumetsat.solar_forecasting.destination_bucket }}" + google_impersonation_chain: "{{ var.json.eumetsat.solar_forecasting.service_account }}" + + graph_paths: + - "copy_gcs_bucket" diff --git a/datasets/eumetsat/solar_forecasting/solar_forecasting_dag.py b/datasets/eumetsat/solar_forecasting/solar_forecasting_dag.py new file mode 100644 index 000000000..89d467ed0 --- /dev/null +++ b/datasets/eumetsat/solar_forecasting/solar_forecasting_dag.py @@ -0,0 +1,52 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from airflow import DAG +from airflow.providers.google.cloud.operators import cloud_storage_transfer_service + +default_args = { + "owner": "Google", + "depends_on_past": False, + "start_date": "2022-01-19", +} + + +with DAG( + dag_id="eumetsat.solar_forecasting", + default_args=default_args, + max_active_runs=1, + schedule_interval="@once", + catchup=False, + default_view="graph", +) as dag: + + # Task to run a GCS to GCS operation using Google resources + copy_gcs_bucket = cloud_storage_transfer_service.CloudDataTransferServiceGCSToGCSOperator( + task_id="copy_gcs_bucket", + timeout=43200, + retries=0, + wait=True, + project_id="bigquery-public-data", + source_bucket="{{ var.json.eumetsat.solar_forecasting.source_bucket }}", + object_conditions={ + "includePrefixes": [ + "satellite/EUMETSAT/SEVIRI_RSS/v3/eumetsat_seviri_hrv_uk.zarr" + ] + }, + destination_bucket="{{ var.json.eumetsat.solar_forecasting.destination_bucket }}", + google_impersonation_chain="{{ var.json.eumetsat.solar_forecasting.service_account }}", + ) + + copy_gcs_bucket