# Description
This script automatically downloads all the monthly data from "European Air Quality Forecast" dataset of the CAMS within a pre-defined bounding box. In this case study, we will use a bounding box which contains the entire territory of Italy.   
All the .nc files downloaded have their names in **"Pollutant + yyyy + mm"** format, such as "O3202106.nc". 

In [1]:
import cdsapi
import pandas as pd
import geopandas as gpd
import os
c = cdsapi.Client()
cwd = os.getcwd()
cwd_CAMS = cwd + "\\CAMS"
cwd_NUTS = cwd + "\\NUTS"

## Create a bounding box based on the shapefile of Italy. 

In [2]:
# Read NUTS shapefile (level 0)
NUTS_0 = gpd.read_file(cwd_NUTS + '\\NUTS_RG_01M_2021_4326_LEVL_0.shp')
NUTS_0.head()

Unnamed: 0,NUTS_ID,LEVL_CODE,CNTR_CODE,NAME_LATN,NUTS_NAME,MOUNT_TYPE,URBN_TYPE,COAST_TYPE,FID,geometry
0,CZ,0,CZ,Česko,Česko,0,,0,CZ,"POLYGON ((14.49122 51.04353, 14.49945 51.04610..."
1,DE,0,DE,Deutschland,Deutschland,0,,0,DE,"MULTIPOLYGON (((10.45444 47.55580, 10.43954 47..."
2,DK,0,DK,Danmark,Danmark,0,,0,DK,"MULTIPOLYGON (((15.19308 55.32014, 15.19056 55..."
3,AL,0,AL,Shqipëria,Shqipëria,0,,0,AL,"MULTIPOLYGON (((19.83100 42.46645, 19.83568 42..."
4,CY,0,CY,Kýpros,Κύπρος,0,,0,CY,"MULTIPOLYGON (((34.60609 35.70767, 34.60060 35..."


In [3]:
# Filter the row of Italy
NUTS_IT = NUTS_0[NUTS_0['NUTS_ID']=='IT']
NUTS_IT

Unnamed: 0,NUTS_ID,LEVL_CODE,CNTR_CODE,NAME_LATN,NUTS_NAME,MOUNT_TYPE,URBN_TYPE,COAST_TYPE,FID,geometry
18,IT,0,IT,Italia,Italia,0,,0,IT,"MULTIPOLYGON (((12.24075 47.06917, 12.23652 47..."


In [9]:
# Create a bounding box. 
bounding_lon_min = NUTS_IT.bounds.iloc[0,0].round(2)
bounding_lon_max = NUTS_IT.bounds.iloc[0,2].round(2)
bounding_lat_min = NUTS_IT.bounds.iloc[0,1].round(2)
bounding_lat_max = NUTS_IT.bounds.iloc[0,3].round(2)
# Note that the order for the CAMS API bounding box is Lat Max -> Lon Min -> Lat Min -> Lon Max
bounding = [bounding_lat_max, bounding_lon_min, bounding_lat_min, bounding_lon_max]
bounding

[47.09, 6.63, 35.49, 18.52]

## Download monthly data in **2021**

In [5]:
# Create a list which consists of all the last days in 2021
month_endday_2021 = pd.date_range(start='31/01/2021', end='31/12/2021',freq='M')
month_endday_2021

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


DatetimeIndex(['2021-01-31', '2021-02-28', '2021-03-31', '2021-04-30',
               '2021-05-31', '2021-06-30', '2021-07-31', '2021-08-31',
               '2021-09-30', '2021-10-31', '2021-11-30', '2021-12-31'],
              dtype='datetime64[ns]', freq='M')

In [6]:
# Create a list which consists of all the first days in 2021
month_startday_2021 = month_endday_2021 - pd.offsets.MonthBegin()
month_startday_2021

DatetimeIndex(['2021-01-01', '2021-02-01', '2021-03-01', '2021-04-01',
               '2021-05-01', '2021-06-01', '2021-07-01', '2021-08-01',
               '2021-09-01', '2021-10-01', '2021-11-01', '2021-12-01'],
              dtype='datetime64[ns]', freq=None)

In [7]:
# Combine the startday and lastday of each month in order to be used in CAMS API
monthstartandend_2021 = []
for i in range(12):
    monthstartandend_tmp = str(month_startday_2021[i]).split()[0] + '/' + str(month_endday_2021[i]).split()[0]
    monthstartandend_2021.append(monthstartandend_tmp)
monthstartandend_2021

['2021-01-01/2021-01-31',
 '2021-02-01/2021-02-28',
 '2021-03-01/2021-03-31',
 '2021-04-01/2021-04-30',
 '2021-05-01/2021-05-31',
 '2021-06-01/2021-06-30',
 '2021-07-01/2021-07-31',
 '2021-08-01/2021-08-31',
 '2021-09-01/2021-09-30',
 '2021-10-01/2021-10-31',
 '2021-11-01/2021-11-30',
 '2021-12-01/2021-12-31']

### NO2

In [None]:
for i in range(12):
    pollutant = 'nitrogen_dioxide'
    pollutant_abbre = 'NO2'
    current_year = '2021'
    current_month = str(i+1).zfill(2)

    c.retrieve(
    'cams-europe-air-quality-forecasts',
    {
        'variable': pollutant,
        'model': 'ensemble',
        'level': '0',
        'date': monthstartandend_2021[i],
        'type': 'analysis',
        'time': [
            '00:00', '01:00', '02:00',
            '03:00', '04:00', '05:00',
            '06:00', '07:00', '08:00',
            '09:00', '10:00', '11:00',
            '12:00', '13:00', '14:00',
            '15:00', '16:00', '17:00',
            '18:00', '19:00', '20:00',
            '21:00', '22:00', '23:00',
        ],
        'leadtime_hour': '0',
        'format': 'netcdf',
        'area': bounding,
    },
    cwd_CAMS + '\\' + current_year + '\\' + pollutant_abbre + '\\' + pollutant_abbre + current_year + current_month + '.nc')

### Ozone

In [None]:
for i in range(12):
    pollutant = 'ozone'
    pollutant_abbre = 'O3'
    current_year = '2021'
    current_month = str(i+1).zfill(2)

    c.retrieve(
    'cams-europe-air-quality-forecasts',
    {
        'variable': pollutant,
        'model': 'ensemble',
        'level': '0',
        'date': monthstartandend_2021[i],
        'type': 'analysis',
        'time': [
            '00:00', '01:00', '02:00',
            '03:00', '04:00', '05:00',
            '06:00', '07:00', '08:00',
            '09:00', '10:00', '11:00',
            '12:00', '13:00', '14:00',
            '15:00', '16:00', '17:00',
            '18:00', '19:00', '20:00',
            '21:00', '22:00', '23:00',
        ],
        'leadtime_hour': '0',
        'format': 'netcdf',
        'area': bounding,
    },
    cwd_CAMS + '\\' + current_year + '\\' + pollutant_abbre + '\\' + pollutant_abbre + current_year + current_month + '.nc')

### PM 10

In [None]:
for i in range(12):
    pollutant = 'particulate_matter_10um'
    pollutant_abbre = 'PM10'
    current_year = '2021'
    current_month = str(i+1).zfill(2)

    c.retrieve(
    'cams-europe-air-quality-forecasts',
    {
        'variable': pollutant,
        'model': 'ensemble',
        'level': '0',
        'date': monthstartandend_2021[i],
        'type': 'analysis',
        'time': [
            '00:00', '01:00', '02:00',
            '03:00', '04:00', '05:00',
            '06:00', '07:00', '08:00',
            '09:00', '10:00', '11:00',
            '12:00', '13:00', '14:00',
            '15:00', '16:00', '17:00',
            '18:00', '19:00', '20:00',
            '21:00', '22:00', '23:00',
        ],
        'leadtime_hour': '0',
        'format': 'netcdf',
        'area': bounding,
    },
    cwd_CAMS + '\\' + current_year + '\\' + pollutant_abbre + '\\' + pollutant_abbre + current_year + current_month + '.nc')

### PM 2.5

In [None]:
for i in range(12):
    pollutant = 'particulate_matter_2.5um'
    pollutant_abbre = 'PM25'
    current_year = '2021'
    current_month = str(i+1).zfill(2)

    c.retrieve(
    'cams-europe-air-quality-forecasts',
    {
        'variable': pollutant,
        'model': 'ensemble',
        'level': '0',
        'date': monthstartandend_2021[i],
        'type': 'analysis',
        'time': [
            '00:00', '01:00', '02:00',
            '03:00', '04:00', '05:00',
            '06:00', '07:00', '08:00',
            '09:00', '10:00', '11:00',
            '12:00', '13:00', '14:00',
            '15:00', '16:00', '17:00',
            '18:00', '19:00', '20:00',
            '21:00', '22:00', '23:00',
        ],
        'leadtime_hour': '0',
        'format': 'netcdf',
        'area': bounding,
    },
    cwd_CAMS + '\\' + current_year + '\\' + pollutant_abbre + '\\' + pollutant_abbre + current_year + current_month + '.nc')