In [4]:
import os
import sys
import glob
import cdsapi
import shutil
import datetime
import calendar
import zipfile
import numpy as np
import xarray as xr
from pathlib import Path
from datetime import timedelta

# adds the package path to the Python path to make sure all the local imports work fine 
if os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd()))) not in sys.path:
    sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd()))))
    
from wp4.constants import DATA_DIR_CAMS_RE, ADS_URL, ADS_KEY, EXTENTS, POLLUTANTS

import warnings  
# Filters out a SSL warning that is given when using the cdsapi and explicitly passing the url and key as parameters 
warnings.filterwarnings("ignore", message="Unverified HTTPS request is being made to host ")

In [5]:
if not os.path.exists(Path(DATA_DIR_CAMS_RE).joinpath('temp')):
    os.makedirs(Path(DATA_DIR_CAMS_RE).joinpath('temp'))

## Download the data from the ADS API

In [6]:
# initiate API client
c = cdsapi.Client(
    url=ADS_URL,
    key=ADS_KEY
)

for pollutant in POLLUTANTS:
    for year in [2018]:  # Currently only 2018 is available in the ADS
        for month in [str(x).zfill(2) for x in range(1,13)]: 

            if datetime.date(year=year, month=int(month), day=1) > datetime.date.today():
                print('Download process completed')
                break

            # get the days in the month
            days_in_month = calendar.monthrange(year, int(month))[1]

            # zero pad the days
            days_param = [str(x).zfill(2) for x in range(1, days_in_month + 1)]

            params = {
                'variable': [
                    POLLUTANTS[pollutant]['ADS_PARAM'],
                ],
                'model': 'ensemble',
                'type': 'validated_reanalysis',
                'level': '0',
                'year': year,
                'month': month,
                'format':'zip',
                'area': [x + 0.1 if ind in [0,3] else x - 0.1 for ind, x in enumerate(EXTENTS['IRELAND']['LIST'])],
            }

            # send the request to the server
            try:
                c.retrieve(
                    'cams-europe-air-quality-reanalyses',
                    params,
                    Path(DATA_DIR_CAMS_RE).joinpath(f'temp/re_{POLLUTANTS[pollutant]["CAMS"]}_{month}_{year}.zip')
                )
            except Exception as e:
                raise

2021-12-13 12:38:45,005 INFO Welcome to the CDS
2021-12-13 12:38:45,008 INFO Sending request to https://ads.atmosphere.copernicus.eu/api/v2/resources/cams-europe-air-quality-reanalyses
2021-12-13 12:38:45,110 INFO Request is completed
2021-12-13 12:38:45,113 INFO Downloading https://download-0000.copernicus-atmosphere.eu/cache-compute-0000/cache/data8/dataset-cams-europe-air-quality-reanalyses-9a3f32cc-02b2-43a3-8209-1ec6c5897fc1.zip to F:\Test_2\cams_reanalyses\temp\re_co_conc_01_2018.zip (6.1M)
2021-12-13 12:38:50,401 INFO Download rate 1.2M/s                                                                                                                                         
2021-12-13 12:38:50,478 INFO Welcome to the CDS
2021-12-13 12:38:50,480 INFO Sending request to https://ads.atmosphere.copernicus.eu/api/v2/resources/cams-europe-air-quality-reanalyses
2021-12-13 12:38:50,528 INFO Request is queued
2021-12-13 12:38:51,565 INFO Request is running
2021-12-13 12:39:22,959 INFO Re

2021-12-13 12:41:34,445 INFO Download rate 4M/s                                                                                                                                           
2021-12-13 12:41:34,529 INFO Welcome to the CDS
2021-12-13 12:41:34,531 INFO Sending request to https://ads.atmosphere.copernicus.eu/api/v2/resources/cams-europe-air-quality-reanalyses
2021-12-13 12:41:34,570 INFO Request is queued
2021-12-13 12:41:35,602 INFO Request is running
2021-12-13 12:41:47,951 INFO Request is completed
2021-12-13 12:41:47,953 INFO Downloading https://download-0001.copernicus-atmosphere.eu/cache-compute-0001/cache/data1/dataset-cams-europe-air-quality-reanalyses-da92e79c-f11c-433d-82c2-c884e3ae0e54.zip to F:\Test_2\cams_reanalyses\temp\re_co_conc_12_2018.zip (6.2M)
2021-12-13 12:41:49,457 INFO Download rate 4.2M/s                                                                                                                                         
2021-12-13 12:41:49,544 INFO 

2021-12-13 12:43:56,234 INFO Request is queued
2021-12-13 12:43:57,270 INFO Request is running
2021-12-13 12:44:04,528 INFO Request is completed
2021-12-13 12:44:04,532 INFO Downloading https://download-0002.copernicus-atmosphere.eu/cache-compute-0002/cache/data2/dataset-cams-europe-air-quality-reanalyses-d939f682-7a33-45e1-bfde-ed3c76e773b6.zip to F:\Test_2\cams_reanalyses\temp\re_o3_conc_10_2018.zip (6.5M)
2021-12-13 12:44:06,761 INFO Download rate 2.9M/s                                                                                                                                         
2021-12-13 12:44:06,851 INFO Welcome to the CDS
2021-12-13 12:44:06,852 INFO Sending request to https://ads.atmosphere.copernicus.eu/api/v2/resources/cams-europe-air-quality-reanalyses
2021-12-13 12:44:06,889 INFO Request is queued
2021-12-13 12:44:07,926 INFO Request is running
2021-12-13 12:44:15,203 INFO Request is completed
2021-12-13 12:44:15,204 INFO Downloading https://download-0001.copernic

2021-12-13 12:46:36,514 INFO Download rate 2.7M/s                                                                                                                                         
2021-12-13 12:46:36,586 INFO Welcome to the CDS
2021-12-13 12:46:36,587 INFO Sending request to https://ads.atmosphere.copernicus.eu/api/v2/resources/cams-europe-air-quality-reanalyses
2021-12-13 12:46:36,622 INFO Request is queued
2021-12-13 12:46:37,649 INFO Request is running
2021-12-13 12:46:50,003 INFO Request is completed
2021-12-13 12:46:50,004 INFO Downloading https://download-0003.copernicus-atmosphere.eu/cache-compute-0003/cache/data8/dataset-cams-europe-air-quality-reanalyses-287d3810-140b-4d2a-a577-c4a2870f753d.zip to F:\Test_2\cams_reanalyses\temp\re_no_conc_09_2018.zip (7M)
2021-12-13 12:46:52,411 INFO Download rate 2.9M/s                                                                                                                                         
2021-12-13 12:46:52,486 INFO We

2021-12-13 12:49:22,932 INFO Request is queued
2021-12-13 12:49:23,963 INFO Request is running
2021-12-13 12:49:55,361 INFO Request is completed
2021-12-13 12:49:55,362 INFO Downloading https://download-0002.copernicus-atmosphere.eu/cache-compute-0002/cache/data8/dataset-cams-europe-air-quality-reanalyses-0e194e50-a398-4012-8d69-3f247739415e.zip to F:\Test_2\cams_reanalyses\temp\re_no2_conc_07_2018.zip (6.9M)
2021-12-13 12:49:57,299 INFO Download rate 3.6M/s                                                                                                                                         
2021-12-13 12:49:57,383 INFO Welcome to the CDS
2021-12-13 12:49:57,384 INFO Sending request to https://ads.atmosphere.copernicus.eu/api/v2/resources/cams-europe-air-quality-reanalyses
2021-12-13 12:49:57,423 INFO Request is queued
2021-12-13 12:49:58,473 INFO Request is running
2021-12-13 12:50:05,698 INFO Request is completed
2021-12-13 12:50:05,700 INFO Downloading https://download-0001.coperni

2021-12-13 12:53:06,243 INFO Download rate 3.5M/s                                                                                                                                         
2021-12-13 12:53:06,315 INFO Welcome to the CDS
2021-12-13 12:53:06,316 INFO Sending request to https://ads.atmosphere.copernicus.eu/api/v2/resources/cams-europe-air-quality-reanalyses
2021-12-13 12:53:06,350 INFO Request is queued
2021-12-13 12:53:07,384 INFO Request is running
2021-12-13 12:53:14,877 INFO Request is completed
2021-12-13 12:53:14,878 INFO Downloading https://download-0002.copernicus-atmosphere.eu/cache-compute-0002/cache/data0/dataset-cams-europe-air-quality-reanalyses-4d226555-aa17-4416-84f7-bf536c5788f0.zip to F:\Test_2\cams_reanalyses\temp\re_pm2p5_conc_06_2018.zip (6.6M)
2021-12-13 12:53:16,571 INFO Download rate 3.9M/s                                                                                                                                         
2021-12-13 12:53:16,654 IN

2021-12-13 12:56:53,912 INFO Request is queued
2021-12-13 12:56:55,017 INFO Request is running
2021-12-13 12:57:26,680 INFO Request is completed
2021-12-13 12:57:26,681 INFO Downloading https://download-0003.copernicus-atmosphere.eu/cache-compute-0003/cache/data9/dataset-cams-europe-air-quality-reanalyses-a615cb13-2a92-4c20-96f7-41cdbe1caf4e.zip to F:\Test_2\cams_reanalyses\temp\re_pm10_conc_04_2018.zip (6.6M)
2021-12-13 12:57:28,872 INFO Download rate 3M/s                                                                                                                                           
2021-12-13 12:57:28,955 INFO Welcome to the CDS
2021-12-13 12:57:28,956 INFO Sending request to https://ads.atmosphere.copernicus.eu/api/v2/resources/cams-europe-air-quality-reanalyses
2021-12-13 12:57:28,994 INFO Request is queued
2021-12-13 12:57:30,028 INFO Request is running
2021-12-13 12:58:01,460 INFO Request is completed
2021-12-13 12:58:01,461 INFO Downloading https://download-0003.copern

2021-12-13 13:00:44,640 INFO Download rate 2.9M/s                                                                                                                                         
2021-12-13 13:00:44,721 INFO Welcome to the CDS
2021-12-13 13:00:44,723 INFO Sending request to https://ads.atmosphere.copernicus.eu/api/v2/resources/cams-europe-air-quality-reanalyses
2021-12-13 13:00:44,767 INFO Request is queued
2021-12-13 13:00:45,801 INFO Request is running
2021-12-13 13:00:53,039 INFO Request is completed
2021-12-13 13:00:53,040 INFO Downloading https://download-0001.copernicus-atmosphere.eu/cache-compute-0001/cache/data5/dataset-cams-europe-air-quality-reanalyses-84768467-e238-42b7-8e68-953b250960da.zip to F:\Test_2\cams_reanalyses\temp\re_so2_conc_03_2018.zip (6.9M)
2021-12-13 13:00:55,285 INFO Download rate 3.1M/s                                                                                                                                         
2021-12-13 13:00:55,352 INFO

## Combine the monthly datasets per pollutant into a single dataset per pollutant

In [7]:
for pollutant in POLLUTANTS:
    
    zip_folders = glob.glob(Path(DATA_DIR_CAMS_RE).joinpath(f'temp/re_{POLLUTANTS[pollutant]["CAMS"]}*.zip').as_posix())
                                                            
    if not os.path.exists(Path(DATA_DIR_CAMS_RE).joinpath(f'temp/{POLLUTANTS[pollutant]["CAMS"]}')):
        os.makedirs(Path(DATA_DIR_CAMS_RE).joinpath(f'temp/{POLLUTANTS[pollutant]["CAMS"]}'))
    
    for folder in zip_folders:     
        with zipfile.ZipFile(folder, 'r') as zip_ref:
            zip_ref.extractall(Path(DATA_DIR_CAMS_RE).joinpath(f'temp/{POLLUTANTS[pollutant]["CAMS"]}'))
            
    nc_files = glob.glob(Path(DATA_DIR_CAMS_RE).joinpath(f'temp/{POLLUTANTS[pollutant]["CAMS"]}/*.nc').as_posix())
    
    # open each nc file using xarray
    monthly_datasets = [xr.open_dataset(x) for x in nc_files]

    # combine the data along the time dimension using the xarray combine_nested function 
    ds = xr.combine_nested(monthly_datasets, concat_dim='time', combine_attrs='drop_conflicts')
    
    # rename the variables to match the naming of the CAMS NRT analysis product
    ds = ds.rename(
        {
            'lon':'longitude',
            'lat':'latitude',
            list(ds.data_vars)[0]:POLLUTANTS[pollutant]['CAMS']
        })

    # sort the data based on date
    ds_sorted = ds.sortby('time')
    
    ds_reindexed = ds_sorted.reindex(latitude=list(reversed(ds_sorted.latitude)))
    new_long = np.arange(EXTENTS['IRELAND']['WEST'], EXTENTS['IRELAND']['EAST'] + 0.1, 0.1)
    new_lat = np.arange(EXTENTS['IRELAND']['NORTH'], EXTENTS['IRELAND']['SOUTH'] - 0.1, -0.1)
    ds_interp = ds_reindexed.interp(longitude=list(new_long), latitude=list(new_lat))
    
    # save as a single netcdf file
    ds_interp.to_netcdf(Path(DATA_DIR_CAMS_RE).joinpath(f'{POLLUTANTS[pollutant]["CAMS"]}.nc'))

## Upload  to Amazon S3 bucket

In [8]:
import boto3
from wp4.constants import S3_ACCESS_KEY, S3_SECRET_KEY, S3_ENDPOINT

TARGET_BUCKET = "cams-reanalysis"

s3_client = boto3.client(
    's3',
    aws_access_key_id=S3_ACCESS_KEY,
    aws_secret_access_key=S3_SECRET_KEY,
    endpoint_url=S3_ENDPOINT
)

In [9]:
# Check if bucket exists and if not create new bucket
existing_bucket_names = [bucket['Name'] for bucket in s3_client.list_buckets()['Buckets']]

if not TARGET_BUCKET in existing_bucket_names:
    print(f'Creating S3 bucket: {TARGET_BUCKET}')
    try:
        s3_client.create_bucket(Bucket=TARGET_BUCKET)
    except:
        raise
    else:
        print(f'Bucket {TARGET_BUCKET} created')
        existing_bucket_names = [bucket['Name'] for bucket in s3_client.list_buckets()['Buckets']]
        print(f'Current buckets: {", ".join(existing_bucket_names)}')

In [10]:
DATA_DIR = Path(DATA_DIR_CAMS_RE)  # Directory where the file is located
FILE_TYPE = '.nc'  # file extension .nc .tif etc
OVERWRITE_FILE = True  # overwrite the file if already present in the bucket

os.chdir(DATA_DIR)

for pollutant in POLLUTANTS:
    FILE_NAME = POLLUTANTS[pollutant]["CAMS"]
    file_name = f'{FILE_NAME}{FILE_TYPE}'
    out_zip_file = f'{FILE_NAME}.zip'
    out_zip_file_loc = DATA_DIR.joinpath(f'{FILE_NAME}.zip')

    # create zip folder with the file we want to compress 
    zipfile.ZipFile(out_zip_file, mode='w').write(
        file_name,
        compress_type=zipfile.ZIP_DEFLATED
    )

    # list the current contents in the bucket
    bucket_contents = s3_client.list_objects_v2(Bucket=TARGET_BUCKET)

    if "Contents" in bucket_contents.keys():
        bucket_content_names  = [f['Key'] for f in bucket_contents["Contents"]]
    else:
        bucket_content_names  = []


    if out_zip_file in bucket_content_names and not OVERWRITE_FILE:
        # if there is already a file with the same name  in the bucket and we do not want to overwrite
        print(f'{file_name} already present in bucket')
    elif (out_zip_file in bucket_content_names) and OVERWRITE_FILE:
        # if there is already a file with the same name in the bucket and we do want to overwrite
        print(f'{out_zip_file} already present in bucket. Will overwrite this file.')
        print(f'Starting upload of {out_zip_file} to bucket: {TARGET_BUCKET}')
        try:
            s3_client.upload_file(out_zip_file_loc.as_posix(), TARGET_BUCKET, out_zip_file)
        except:
            raise
        else:
            print(f'Upload of {out_zip_file} to bucket: {TARGET_BUCKET} completed')
    else:
        # if there is no file with the same name  in the bucket
        print(f'Starting upload of {out_zip_file} to bucket: {TARGET_BUCKET}')
        try:
            s3_client.upload_file(out_zip_file_loc.as_posix(), TARGET_BUCKET, out_zip_file)
        except:
            raise
        else:
            print(f'Upload of {out_zip_file} to bucket: {TARGET_BUCKET} completed')
            
# remove the zip files

zip_files = glob.glob(DATA_DIR.joinpath('*.zip').as_posix())

for z in zip_files:
    os.remove(z)

co_conc.zip already present in bucket. Will overwrite this file.
Starting upload of co_conc.zip to bucket: cams-reanalysis
Upload of co_conc.zip to bucket: cams-reanalysis completed
o3_conc.zip already present in bucket. Will overwrite this file.
Starting upload of o3_conc.zip to bucket: cams-reanalysis
Upload of o3_conc.zip to bucket: cams-reanalysis completed
no_conc.zip already present in bucket. Will overwrite this file.
Starting upload of no_conc.zip to bucket: cams-reanalysis
Upload of no_conc.zip to bucket: cams-reanalysis completed
no2_conc.zip already present in bucket. Will overwrite this file.
Starting upload of no2_conc.zip to bucket: cams-reanalysis
Upload of no2_conc.zip to bucket: cams-reanalysis completed
pm2p5_conc.zip already present in bucket. Will overwrite this file.
Starting upload of pm2p5_conc.zip to bucket: cams-reanalysis
Upload of pm2p5_conc.zip to bucket: cams-reanalysis completed
pm10_conc.zip already present in bucket. Will overwrite this file.
Starting up