From 15701e27f59be2458592cfe82efa3fefeb51615e Mon Sep 17 00:00:00 2001 From: Andrew Parker Date: Wed, 27 May 2020 11:40:11 -0600 Subject: [PATCH] Check for presence of files on s3 before initializing glue crawler --- buildstockbatch/postprocessing.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/buildstockbatch/postprocessing.py b/buildstockbatch/postprocessing.py index b1374787..b67a9127 100644 --- a/buildstockbatch/postprocessing.py +++ b/buildstockbatch/postprocessing.py @@ -417,10 +417,17 @@ def create_athena_tables(aws_conf, tbl_prefix, s3_bucket, s3_prefix): max_crawling_time = aws_conf.get('athena', {}).get('max_crawling_time', 600) assert db_name, "athena:database_name not supplied" + # Check that there are files in the s3 bucket before creating and running glue crawler + s3 = boto3.resource('s3') + bucket = s3.Bucket(s3_bucket) + s3_path = f's3://{s3_bucket}/{s3_prefix}' + n_existing_files = len(list(bucket.objects.filter(Prefix=s3_prefix))) + assert n_existing_files > 0, f"There are no files in {s3_path}, cannot create Athena tables using glue crawler" + glueClient = boto3.client('glue', region_name=region_name) crawlTarget = { 'S3Targets': [{ - 'Path': f's3://{s3_bucket}/{s3_prefix}', + 'Path': s3_path, 'Exclusions': [] }] }