From 51860eb593da8b58b8f9ca69bd30df0e3a506c08 Mon Sep 17 00:00:00 2001 From: Nicholas Large <84149918+nlarge-google@users.noreply.github.com> Date: Wed, 6 Jul 2022 16:16:01 -0500 Subject: [PATCH] Fix: NOAA - Resolve table field name issue. (#402) * feat: Submit new integrated version of dag * fix: as per PR * fix: Changes as per code review. * fix: Resolved location string lambda issue. Reformatted schema files. * fix: Resolution to ftp bouncing or connectivity issues. * fix: Fix for ticket http://b/238045897. * fix: Fix for ticket http://b/238045897. Fixed rename headers reference. --- datasets/noaa/pipelines/_images/ghcnd_stations_schema.json | 2 +- datasets/noaa/pipelines/noaa/noaa_dag.py | 6 +++--- datasets/noaa/pipelines/noaa/pipeline.yaml | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/datasets/noaa/pipelines/_images/ghcnd_stations_schema.json b/datasets/noaa/pipelines/_images/ghcnd_stations_schema.json index f8068f0f4..a72e10898 100644 --- a/datasets/noaa/pipelines/_images/ghcnd_stations_schema.json +++ b/datasets/noaa/pipelines/_images/ghcnd_stations_schema.json @@ -35,7 +35,7 @@ "mode": "NULLABLE" }, { - "name": "hcn_cm_flag", + "name": "hcn_crn_flag", "type": "STRING", "mode": "NULLABLE" }, diff --git a/datasets/noaa/pipelines/noaa/noaa_dag.py b/datasets/noaa/pipelines/noaa/noaa_dag.py index 3ee92891f..9b22fa0d5 100644 --- a/datasets/noaa/pipelines/noaa/noaa_dag.py +++ b/datasets/noaa/pipelines/noaa/noaa_dag.py @@ -27,7 +27,7 @@ dag_id="noaa.noaa", default_args=default_args, max_active_runs=1, - schedule_interval="0 6 * * 1", + schedule_interval="* 1 * * 6", catchup=False, default_view="graph", ) as dag: @@ -227,8 +227,8 @@ "DELETE_TARGET_FILE": "Y", "INPUT_CSV_HEADERS": '[\n "textdata"\n]', "DATA_DTYPES": '{\n "textdata": "str"\n}', - "REORDER_HEADERS_LIST": '[\n "id",\n "latitude",\n "longitude",\n "elevation",\n "state",\n "name",\n "gsn_flag",\n "hcn_cm_flag",\n "wmoid",\n "source_url",\n "etl_timestamp"\n]', - "SLICE_COLUMN_LIST": '{\n "id": ["textdata", "0", "11"],\n "latitude": ["textdata", "12", "20"],\n "longitude": ["textdata", "21", "30"],\n "elevation": ["textdata", "31", "37"],\n "state": ["textdata", "38", "40"],\n "name": ["textdata", "41", "71"],\n "gsn_flag": ["textdata", "72", "75"],\n "hcn_cm_flag": ["textdata", "76", "79"],\n "wmoid": ["textdata", "80", "85"]\n}', + "REORDER_HEADERS_LIST": '[\n "id",\n "latitude",\n "longitude",\n "elevation",\n "state",\n "name",\n "gsn_flag",\n "hcn_crn_flag",\n "wmoid",\n "source_url",\n "etl_timestamp"\n]', + "SLICE_COLUMN_LIST": '{\n "id": ["textdata", "0", "11"],\n "latitude": ["textdata", "12", "20"],\n "longitude": ["textdata", "21", "30"],\n "elevation": ["textdata", "31", "37"],\n "state": ["textdata", "38", "40"],\n "name": ["textdata", "41", "71"],\n "gsn_flag": ["textdata", "72", "75"],\n "hcn_crn_flag": ["textdata", "76", "79"],\n "wmoid": ["textdata", "80", "85"]\n}', }, resources={"request_ephemeral_storage": "4G", "limit_cpu": "3"}, ) diff --git a/datasets/noaa/pipelines/noaa/pipeline.yaml b/datasets/noaa/pipelines/noaa/pipeline.yaml index 66e06661c..54da56632 100644 --- a/datasets/noaa/pipelines/noaa/pipeline.yaml +++ b/datasets/noaa/pipelines/noaa/pipeline.yaml @@ -48,7 +48,7 @@ dag: depends_on_past: False start_date: '2021-03-01' max_active_runs: 1 - schedule_interval: "0 6 * * 1" # 06:00 on Monday + schedule_interval: "* 1 * * 6" catchup: False default_view: graph @@ -355,7 +355,7 @@ dag: "state", "name", "gsn_flag", - "hcn_cm_flag", + "hcn_crn_flag", "wmoid", "source_url", "etl_timestamp" @@ -369,7 +369,7 @@ dag: "state": ["textdata", "38", "40"], "name": ["textdata", "41", "71"], "gsn_flag": ["textdata", "72", "75"], - "hcn_cm_flag": ["textdata", "76", "79"], + "hcn_crn_flag": ["textdata", "76", "79"], "wmoid": ["textdata", "80", "85"] } resources: