Skip to content
This repository was archived by the owner on Apr 14, 2023. It is now read-only.

Commit 19a3ddd

Browse files
committed
airflow envs and aspace index
- handles prod and stage airflow environments - uses fixed aspace index in es as we won't reindex as often as we do with aleph (at least for now)
1 parent c30eb7e commit 19a3ddd

File tree

2 files changed

+12
-8
lines changed

2 files changed

+12
-8
lines changed

docker-compose.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ services:
1717
AIRFLOW__CORE__SQL_ALCHEMY_CONN: "postgresql+psycopg2://postgres@db/postgres"
1818
AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL: 30
1919
AIRFLOW__SCHEDULER__CATCHUP_BY_DEFAULT: "False"
20+
AIRFLOW_ENVIRONMENT: "stage"
2021
AWS_ACCESS_KEY_ID: "${AWS_ACCESS_KEY_ID}"
2122
AWS_SECRET_ACCESS_KEY: "${AWS_SECRET_ACCESS_KEY}"
2223
AWS_DEFAULT_REGION: "${AWS_DEFAULT_REGION}"
@@ -43,6 +44,7 @@ services:
4344
AIRFLOW__CORE__EXECUTOR: "CeleryExecutor"
4445
AIRFLOW__CELERY__BROKER_URL: "redis://redis:6379"
4546
AIRFLOW__CELERY__RESULT_BACKEND: "db+postgresql://postgres@db/postgres"
47+
AIRFLOW_ENVIRONMENT: "stage"
4648
AWS_ACCESS_KEY_ID: "${AWS_ACCESS_KEY_ID}"
4749
AWS_SECRET_ACCESS_KEY: "${AWS_SECRET_ACCESS_KEY}"
4850
AWS_DEFAULT_REGION: "${AWS_DEFAULT_REGION}"
@@ -69,6 +71,7 @@ services:
6971
AIRFLOW__CORE__EXECUTOR: "CeleryExecutor"
7072
AIRFLOW__CELERY__BROKER_URL: "redis://redis:6379"
7173
AIRFLOW__CELERY__RESULT_BACKEND: "db+postgresql://postgres@db/postgres"
74+
AIRFLOW_ENVIRONMENT: "stage"
7275
AWS_ACCESS_KEY_ID: "${AWS_ACCESS_KEY_ID}"
7376
AWS_SECRET_ACCESS_KEY: "${AWS_SECRET_ACCESS_KEY}"
7477
AWS_DEFAULT_REGION: "${AWS_DEFAULT_REGION}"

workflows/aspace_harvest.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,12 @@
1616
network_config = json.loads(base64.b64decode(os.getenv('ECS_NETWORK_CONFIG')))
1717
cluster = os.getenv('ECS_CLUSTER')
1818
es_url = os.getenv('ES_URL')
19+
air_env = os.getenv('AIRFLOW_ENVIRONMENT')
1920

2021

2122
def set_s3():
2223
today = datetime.utcnow().strftime('%Y-%m-%d-%H-%M-%S')
23-
s3_key = "s3://aspace-oai-s3-stage/{file}.xml".format(file=today)
24+
s3_key = f"s3://aspace-oai-s3-{air_env}/{today}.xml"
2425
logging.info(s3_key)
2526
return s3_key
2627

@@ -50,13 +51,12 @@ def check_if_records(**context):
5051
harvest = ECSOperator(task_id='harvest_step_1',
5152
dag=dag,
5253
cluster=cluster,
53-
task_definition='airflow-stage-oaiharvester',
54+
task_definition=f"airflow-{air_env}-oaiharvester",
5455
overrides={'containerOverrides': [{
5556
'command': ["--out={{ task_instance.xcom_pull(task_ids='set_s3') }}",
5657
'--host=https://archivesspace.mit.edu/oai',
57-
'--format=oai_ead',
58-
'--verbose', ],
59-
'name': 'airflow-stage-oaiharvester',
58+
'--format=oai_ead', ],
59+
'name': f"airflow-{air_env}-oaiharvester",
6060
}]},
6161
network_configuration=network_config)
6262

@@ -79,16 +79,17 @@ def check_if_records(**context):
7979
ingest = ECSOperator(task_id='harvest_step_3',
8080
dag=dag,
8181
cluster=cluster,
82-
task_definition='airflow-stage-mario',
82+
task_definition=f"airflow-{air_env}-mario",
8383
overrides={'containerOverrides': [{
84-
'command': ['--url=' + es_url,
84+
'command': [f"--url={es_url}",
85+
'--index=aspace_2019_12',
8586
'ingest',
8687
'--prefix=aspace',
8788
'--type=archives',
8889
'--auto',
8990
'--debug',
9091
"{{ task_instance.xcom_pull(task_ids='set_s3') }}", ],
91-
'name': 'airflow-stage-mario',
92+
'name': f"airflow-{air_env}-mario",
9293
}]},
9394
network_configuration=network_config)
9495

0 commit comments

Comments
 (0)