# CAMS functions

In [None]:
def CAMS_download(start_date, end_date, component, component_nom):

    """ Query and download the CAMS levels dataset from CDS API

        Args:
            start_date (str): Query start date
            end_date (str): Query end date
            component (str): Component name
            component_nom (str): Component chemical nomenclature
    """

    if os.path.isfile(os.path.join(os.path.abspath(''), 'data/cams/' + component_nom + '/' + component_nom + '-levels-' + start_date + '-' + end_date + '.grib')):
        print('The file exists, it will not be downloaded again.')
    
    else:
        print('The file does not exist, it will be downloaded.')

        c = cdsapi.Client()
        c.retrieve(
            'cams-global-atmospheric-composition-forecasts',
            {
                'date': start_date + '/' + end_date,
                'type': 'forecast',
                'format': 'grib',
                'variable': component,
                'model_level': [
                    '1', '2', '3',
                    '4', '5', '6',
                    '7', '8', '9',
                    '10', '11', '12',
                    '13', '14', '15',
                    '16', '17', '18',
                    '19', '20', '21',
                    '22', '23', '24',
                    '25', '26', '27',
                    '28', '29', '30',
                    '31', '32', '33',
                    '34', '35', '36',
                    '37', '38', '39',
                    '40', '41', '42',
                    '43', '44', '45',
                    '46', '47', '48',
                    '49', '50', '51',
                    '52', '53', '54',
                    '55', '56', '57',
                    '58', '59', '60',
                    '61', '62', '63',
                    '64', '65', '66',
                    '67', '68', '69',
                    '70', '71', '72',
                    '73', '74', '75',
                    '76', '77', '78',
                    '79', '80', '81',
                    '82', '83', '84',
                    '85', '86', '87',
                    '88', '89', '90',
                    '91', '92', '93',
                    '94', '95', '96',
                    '97', '98', '99',
                    '100', '101', '102',
                    '103', '104', '105',
                    '106', '107', '108',
                    '109', '110', '111',
                    '112', '113', '114',
                    '115', '116', '117',
                    '118', '119', '120',
                    '121', '122', '123',
                    '124', '125', '126',
                    '127', '128', '129',
                    '130', '131', '132',
                    '133', '134', '135',
                    '136', '137',
                ],
                'time': '00:00',
                'leadtime_hour': [
                    '0', '12', '18', '6', 
                ],
            },
            'data/cams/' + component_nom + '/' + component_nom + '-levels-' + start_date + '-' + end_date + '.grib')

In [None]:
def CAMS_read(start_date, end_date, component, component_nom):

    """ Read CAMS levels dataset as xarray dataset object

        Args:
            start_date (str): Query start date
            end_date (str): Query end date
            component (str): Component name
            component_nom (str): Component chemical nomenclature
    
        Returns:
            CAMS_ds (xarray): CAMS levels dataset in xarray format
    """

    # Read as xarray dataset object
    CAMS_ds = xr.open_dataset('data/cams/' + component_nom + '/' + component_nom + '-levels-' + 
                              start_date + '-' + end_date + '.grib')

    # Change longitude coordinates
    CAMS_ds = CAMS_ds.assign_coords(longitude = (((CAMS_ds.longitude + 180) % 360) - 180)).sortby('longitude')
    CAMS_ds = CAMS_ds.sortby('latitude')
    
    # Change name to component
    if component == 'ozone':
        CAMS_ds = CAMS_ds.rename({'go3': 'component'})

    else:
        CAMS_ds = CAMS_ds.rename({component_nom.lower(): 'component'})

    return CAMS_ds

In [None]:
def CAMS_tc_download(start_date, end_date, component, component_nom):
    
    """ Query and download the CAMS total columns dataset from CDS API

        Args:
            start_date (str): Query start date
            end_date (str): Query end date
            component (str): Component name
            component_nom (str): Component chemical nomenclature
    """

    if os.path.isfile(os.path.join(os.path.abspath(''), 'data/cams/' + component_nom + '/' + component_nom + '-total-' + start_date + '-' + end_date + '.grib')):
        print('The file exists, it will not be downloaded again.')
    
    else:
        print('The file does not exist, it will be downloaded.')

        c = cdsapi.Client()
        c.retrieve(
            'cams-global-atmospheric-composition-forecasts',
            {
                'date': start_date + '/' + end_date,
                'type': 'forecast',
                'format': 'grib',
                'variable': 'total_column_' + component,
                'time': '00:00',
                'leadtime_hour': [
                    '0', '12', '18', '6', 
                ],
            },
            'data/cams/' + component_nom + '/' + component_nom + '-total-' + start_date + '-' + end_date + '.grib')

In [None]:
def CAMS_tc_read(start_date, end_date, component, component_nom):

    """ Read CAMS total columns dataset as xarray dataset object

        Args:
            start_date (str): Query start date
            end_date (str): Query end date
            component (str): Component name
            component_nom (str): Component chemical nomenclature
    
        Returns:
            CAMS_total_ds (xarray): CAMS total columns dataset in xarray format
    """

    # Read as xarray dataset object
    CAMS_total_ds = xr.open_dataset('data/cams/' + component_nom + '/' + component_nom + '-total-' + 
                                    start_date + '-' + end_date + '.grib')

    # Change longitude coordinates
    CAMS_total_ds = CAMS_total_ds.assign_coords(longitude = (((CAMS_total_ds.longitude + 180) % 360) - 180)).sortby('longitude')
    CAMS_total_ds = CAMS_total_ds.sortby('latitude')

    return CAMS_total_ds

In [None]:
def CAMS_137_levels():

    """ Create table with information about the 137 CAMS levels
    
        Returns:
            CAMS_levels (dataframe): Table with 137 CAMS levels data
    """

    # Read csv table with 137 levels
    CAMS_levels = pd.read_csv('data/cams/137-levels.csv')

    # Drop first row and set n as index hybrid
    CAMS_levels = CAMS_levels.drop(0).reset_index(drop = True)
    CAMS_levels = CAMS_levels.set_index('n')
    CAMS_levels.index.names = ['hybrid']

    # Change important columns to numeric
    CAMS_levels['ph [Pa]'] = pd.to_numeric(CAMS_levels['ph [hPa]']) * 100
    CAMS_levels['Geopotential Altitude [m]'] = pd.to_numeric(CAMS_levels['Geopotential Altitude [m]'])
    CAMS_levels['Density [kg/m^3]'] = pd.to_numeric(CAMS_levels['Density [kg/m^3]'])

    # Calculate half pressures
    CAMS_levels['ph-diff [Pa]'] = CAMS_levels['ph [Pa]'].diff(1)
    CAMS_levels['ph-diff [Pa]'].iloc[0] = CAMS_levels['ph [Pa]'].iloc[0]

    # Calculate difference from geopotential altitude
    CAMS_levels['Depth [m]'] = CAMS_levels['Geopotential Altitude [m]'].diff(-1)
    CAMS_levels['Depth [m]'].iloc[-1] = CAMS_levels['Geopotential Altitude [m]'].iloc[-1]

    return CAMS_levels

In [None]:
def CAMS_subset(CAMS_ds, bbox):

    """ Subset CAMS levels dataset into desired bounding box.

        Args:
            CAMS_ds (xarray): CAMS levels dataset in xarray format
            bbox (arr): Query bounding box
    
        Returns:
            CAMS_ds (xarray): CAMS levels dataset in xarray format
    """

    # Get nearest longitude and latitude to bbox
    lon_min_index = nearest_neighbour(CAMS_ds.longitude.data, bbox[0][0])
    lon_max_index = nearest_neighbour(CAMS_ds.longitude.data, bbox[1][0])
    lat_min_index = nearest_neighbour(CAMS_ds.latitude.data, bbox[0][1])
    lat_max_index = nearest_neighbour(CAMS_ds.latitude.data, bbox[1][1])

    # Define slices
    slice_lat = slice(lat_min_index, lat_max_index + 1)
    slice_lon = slice(lon_min_index, lon_max_index + 1)

    # Set limits
    CAMS_ds = CAMS_ds.isel(longitude = slice_lon, latitude = slice_lat)

    return CAMS_ds

In [None]:
def CAMS_kg_kg_to_kg_m2(CAMS_ds, CAMS_levels, conversion_method):

    """ Convert the units of the CAMS partial columns for any component from kg/kg to kg/m2

        Args:
            CAMS_ds (xarray): CAMS levels dataset in xarray format
            CAMS_levels (dataframe): Table with 137 CAMS levels data
            conversion_method (str): Type of conversion. It can be:
            * Simple: Multiply the partial columns by the layer depth and density
            * Complex: Calculate the partial column above each CAMS half level
        
        Returns:
            CAMS_ds (xarray): CAMS levels dataset in xarray format
    """

    if conversion_method == 'Simple':

        # Create xarray object from levels df
        CAMS_levels_ds = CAMS_levels.to_xarray()

        # From kg/kg to kg/m3
        CAMS_ds = CAMS_ds * CAMS_levels_ds['Density [kg/m^3]']

        # From kg/m3 to kg/m2
        CAMS_ds = CAMS_ds * CAMS_levels_ds['Depth [m]']

    elif conversion_method == 'Complex':
        
        g = 9.81
        gi = 1/g #s2/m

        da_hybrid = []

        # Initialize partial columns at the top of the atmosphere as 0
        PC_0 = 0 * CAMS_ds.sel(hybrid = 1)
        da_hybrid.append(PC_0)
        CAMS_ds_PC = xr.concat(da_hybrid, dim = 'hybrid')

        for hybrid in range(1, CAMS_ds.hybrid.size):
            
            PC_last = CAMS_ds_PC.sel(hybrid = hybrid)
            component = CAMS_ds.sel(hybrid = hybrid + 1)
            
            # Units: Component(kg/kg) * ph-diff(Pa = kg/m*s2)) * s2/m -> To kg/m2
            PC_da = PC_last + component * CAMS_levels['ph-diff [Pa]'].loc[hybrid] * gi
            
            da_hybrid.append(PC_da)
            CAMS_ds_PC = xr.concat(da_hybrid, pd.Index(range(1, hybrid + 2), name = 'hybrid'))

        CAMS_ds = CAMS_ds_PC

    return CAMS_ds

In [None]:
def CAMS_kg_m2_to_molecules_cm2(CAMS_ds, component_mol_weight):

    """ Convert the units of the total columns and levels of CAMS dataset for any component from kg/m2 to molecules/cm2

        Args:
            CAMS_ds (xarray): CAMS levels dataset in xarray format
        
        Returns:
            CAMS_ds (xarray): CAMS levels dataset in xarray format
    """

    NA = 6.022*10**23
    CAMS_ds = (CAMS_ds * NA * 1000) / (10000 * component_mol_weight)

    return CAMS_ds

In [None]:
def CAMS_interpolation(CAMS_ds, TROPOMI_ds, bbox, component_nom):

    """ Interpolate the data in the coordinates of CAMS dataset for each level to a grid of 100x100 
        and show how it compares to TROPOMI dataset

        Args:
            CAMS_ds (xarray): CAMS levels dataset in xarray format
            TROPOMI_ds (xarray): TROPOMI dataset in xarray format
            bbox (arr): Query bounding box
            component_nom (str): Component chemical nomenclature
        
        Returns:
            CAMS_ds (xarray): Interpolated CAMS levels dataset in xarray format
    """

    # Grid data from CAMS
    x = CAMS_ds.longitude.values
    y = CAMS_ds.latitude.values
    x_old, y_old = np.meshgrid(x, y)

    # Grid data in 100x100
    xi = np.linspace(bbox[0][0], bbox[1][0], 100)
    yi = np.linspace(bbox[0][1], bbox[1][1], 100)
    x_new, y_new = np.meshgrid(xi, yi)

    da_hybrid = []
    da_step = []

    for step in range(CAMS_ds.step.size):

        for hybrid in range(CAMS_ds.hybrid.size):
            
            z = CAMS_ds.isel(hybrid = hybrid, step = step).component.values

            zi = scipy.interpolate.griddata((x_old.flatten(), y_old.flatten()), 
                                            z.flatten(), (xi[None,:], yi[:,None]), 
                                            method = 'linear')

            # Create data array for each layer
            da = xr.DataArray(data = xr.Variable(('lon', 'lat'), zi),
                            dims = ['lon', 'lat'],
                            coords = {'longitude': xr.Variable('lon', xi),
                                        'latitude': xr.Variable('lat', yi)
                                    }
            )

            # Append arrays for each layer
            da_hybrid.append(da)

        # Concatenate data arrays for all layers
        da_hybrid_concat = xr.concat(da_hybrid, pd.Index(range(CAMS_ds.hybrid.size), 
                                                        name = 'hybrid'))

        da_step.append(da_hybrid_concat)
        da_hybrid = []

    CAMS_ds_new = xr.concat(da_step, pd.Index(CAMS_ds.valid_time.values, 
                                            name = 'valid_time'))

    # VISUALIZATION

    z_1L = CAMS_ds.isel(hybrid = 136, step = 2).component.values
    zi_1L = CAMS_ds_new.isel(hybrid = 136, valid_time = 2).values

    fig, ax = plt.subplots(nrows = 1, ncols = 2, figsize = (20, 10))

    # Show old CAMS grid
    im1 = ax[0].scatter(x_old, y_old, c = z_1L, cmap = 'coolwarm', vmin = np.nanmin(z_1L), vmax = np.nanmax(z_1L))

    # Show contour plot of new CAMS data
    im2 = ax[1].contourf(x_new, y_new, zi_1L, cmap = 'coolwarm', vmin = np.nanmin(zi_1L), vmax = np.nanmax(zi_1L))

    # Show new CAMS grid
    ax[1].scatter(x_new, y_new, marker = 'o', c = 'grey', s = 3)

    # Show TROPOMI grid
    TROPOMI_lat = TROPOMI_ds['latitude'].values
    TROPOMI_lon = TROPOMI_ds['longitude'].values
    ax[1].scatter(TROPOMI_lon, TROPOMI_lat, marker = 'x', c = 'black', s = 30)

    # Add colorbars
    cbr1 = fig.colorbar(im1, ax = ax[0])
    cbr2 = fig.colorbar(im2, ax = ax[1])
    cbr1.set_label(f'{component_nom} (mol/m²)', fontsize = 18)
    cbr2.set_label(f'{component_nom} (mol/m²)', fontsize = 18)

    for i in range(2):
        
        ax[i].set_xlim([bbox[0][0], bbox[1][0]])
        ax[i].set_ylim([bbox[0][1], bbox[1][1]])
        ax[i].set_xlabel('Longitude', fontsize = 18)
        ax[i].set_ylabel('Latitude', fontsize = 18)
        ax[i].tick_params(labelsize = 16)

    ax[0].set_title('Original', fontsize = 20, pad = 20)
    ax[1].set_title('Interpolated', fontsize = 20, pad = 20)
    fig.suptitle('COMPONENT FOR CAMS AT HYBRID = 137 AT 12:00', fontsize = 22)
    plt.show()

    return CAMS_ds_new