From c2ffc77d0a71fa8a8b78223f83c9d3cd3f2ca2b7 Mon Sep 17 00:00:00 2001 From: Adler Santos Date: Wed, 3 Aug 2022 13:42:29 -0400 Subject: [PATCH] feat: Onboard IDC v10 dataset (#433) --- datasets/idc/infra/idc_dataset.tf | 26 ++++++++++++++++++- datasets/idc/infra/provider.tf | 2 +- datasets/idc/infra/variables.tf | 2 +- .../copy_tcia_data/copy_tcia_data_dag.py | 6 ++--- .../pipelines/copy_tcia_data/pipeline.yaml | 4 +-- datasets/idc/pipelines/dataset.yaml | 4 +++ 6 files changed, 36 insertions(+), 8 deletions(-) diff --git a/datasets/idc/infra/idc_dataset.tf b/datasets/idc/infra/idc_dataset.tf index 1814dcb1c..26500d0ea 100644 --- a/datasets/idc/infra/idc_dataset.tf +++ b/datasets/idc/infra/idc_dataset.tf @@ -1,5 +1,5 @@ /** - * Copyright 2021 Google LLC + * Copyright 2022 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -261,6 +261,30 @@ output "bigquery_dataset-idc_v9-dataset_id" { value = google_bigquery_dataset.idc_v9.dataset_id } +resource "google_bigquery_dataset" "idc_v10" { + dataset_id = "idc_v10" + project = var.project_id + description = "Imaging Data Commons (IDC) - The Cancer Imaging Archive (TCIA) v10 data" +} + +data "google_iam_policy" "bq_ds__idc_v10" { + dynamic "binding" { + for_each = var.iam_policies["bigquery_datasets"]["idc_v10"] + content { + role = binding.value["role"] + members = binding.value["members"] + } + } +} + +resource "google_bigquery_dataset_iam_policy" "idc_v10" { + dataset_id = google_bigquery_dataset.idc_v10.dataset_id + policy_data = data.google_iam_policy.bq_ds__idc_v10.policy_data +} +output "bigquery_dataset-idc_v10-dataset_id" { + value = google_bigquery_dataset.idc_v10.dataset_id +} + resource "google_bigquery_dataset" "idc_current" { dataset_id = "idc_current" project = var.project_id diff --git a/datasets/idc/infra/provider.tf b/datasets/idc/infra/provider.tf index 23ab87dcd..dfb989c88 100644 --- a/datasets/idc/infra/provider.tf +++ b/datasets/idc/infra/provider.tf @@ -1,5 +1,5 @@ /** - * Copyright 2021 Google LLC + * Copyright 2022 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/datasets/idc/infra/variables.tf b/datasets/idc/infra/variables.tf index 53f483735..46c6dacda 100644 --- a/datasets/idc/infra/variables.tf +++ b/datasets/idc/infra/variables.tf @@ -1,5 +1,5 @@ /** - * Copyright 2021 Google LLC + * Copyright 2022 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/datasets/idc/pipelines/copy_tcia_data/copy_tcia_data_dag.py b/datasets/idc/pipelines/copy_tcia_data/copy_tcia_data_dag.py index 33ec2f013..fae34544b 100644 --- a/datasets/idc/pipelines/copy_tcia_data/copy_tcia_data_dag.py +++ b/datasets/idc/pipelines/copy_tcia_data/copy_tcia_data_dag.py @@ -1,4 +1,4 @@ -# Copyright 2021 Google LLC +# Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -60,7 +60,7 @@ "TARGET_PROJECT_ID": "{{ var.json.idc.target_project_id }}", "SERVICE_ACCOUNT": "{{ var.json.idc.service_account }}", "DATASET_NAME": "idc", - "DATASET_VERSIONS": '["v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9"]', + "DATASET_VERSIONS": '["v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10"]', }, resources={"request_memory": "128M", "request_cpu": "200m"}, ) @@ -76,7 +76,7 @@ env_vars={ "SOURCE_PROJECT_ID": "{{ var.json.idc.source_project_id }}", "TARGET_PROJECT_ID": "{{ var.json.idc.target_project_id }}", - "BQ_DATASETS": '["idc_v1", "idc_v2", "idc_v3", "idc_v4", "idc_v5", "idc_v6", "idc_v7", "idc_v8", "idc_v9", "idc_current"]', + "BQ_DATASETS": '["idc_v1", "idc_v2", "idc_v3", "idc_v4", "idc_v5", "idc_v6", "idc_v7", "idc_v8", "idc_v9", "idc_v10", "idc_current"]', "SERVICE_ACCOUNT": "{{ var.json.idc.service_account }}", }, resources={"request_memory": "128M", "request_cpu": "200m"}, diff --git a/datasets/idc/pipelines/copy_tcia_data/pipeline.yaml b/datasets/idc/pipelines/copy_tcia_data/pipeline.yaml index 833a9460b..0ad25652c 100644 --- a/datasets/idc/pipelines/copy_tcia_data/pipeline.yaml +++ b/datasets/idc/pipelines/copy_tcia_data/pipeline.yaml @@ -58,7 +58,7 @@ dag: SERVICE_ACCOUNT: "{{ var.json.idc.service_account }}" DATASET_NAME: "idc" DATASET_VERSIONS: >- - ["v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9"] + ["v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10"] resources: request_memory: "128M" request_cpu: "200m" @@ -76,7 +76,7 @@ dag: SOURCE_PROJECT_ID: "{{ var.json.idc.source_project_id }}" TARGET_PROJECT_ID: "{{ var.json.idc.target_project_id }}" BQ_DATASETS: >- - ["idc_v1", "idc_v2", "idc_v3", "idc_v4", "idc_v5", "idc_v6", "idc_v7", "idc_v8", "idc_v9", "idc_current"] + ["idc_v1", "idc_v2", "idc_v3", "idc_v4", "idc_v5", "idc_v6", "idc_v7", "idc_v8", "idc_v9", "idc_v10", "idc_current"] SERVICE_ACCOUNT: "{{ var.json.idc.service_account }}" resources: request_memory: "128M" diff --git a/datasets/idc/pipelines/dataset.yaml b/datasets/idc/pipelines/dataset.yaml index bb5400e13..c49cd00f6 100644 --- a/datasets/idc/pipelines/dataset.yaml +++ b/datasets/idc/pipelines/dataset.yaml @@ -61,6 +61,10 @@ resources: dataset_id: idc_v9 description: Imaging Data Commons (IDC) - The Cancer Imaging Archive (TCIA) v9 data + - type: bigquery_dataset + dataset_id: idc_v10 + description: Imaging Data Commons (IDC) - The Cancer Imaging Archive (TCIA) v10 data + - type: bigquery_dataset dataset_id: idc_current description: Imaging Data Commons (IDC) - The Cancer Imaging Archive (TCIA) current data