# CAMS functions

In [None]:
def CAMS_download(dates, start_date, end_date, component, component_nom, model_full_name, model_level):

    """ Query and download the CAMS levels dataset from CDS API

        Args:
            dates (arr): Query dates
            start_date (str): Query start date
            end_date (str): Query end date
            component (str): Component name
            component_nom (str): Component chemical nomenclature
            model_full_name (str): Full name of the CAMS model among:
            - 'cams-global-atmospheric-composition-forecasts' 
            - 'cams-global-reanalysis-eac4-monthly'
            model_level (str): Model levels:
            -  'Simple' for total columns
            -  'Multiple' for levels

        Returns:
            CAMS_product_name (str): Product name of CAMS product
            CAMS_type (str): Model type:
            -  'Forecast'
            -  'Reanalysis'
    """

    c = cdsapi.Client()

    if model_full_name == 'cams-global-atmospheric-composition-forecasts':

        CAMS_type = 'Forecast'

        if model_level == 'Multiple':
            
            CAMS_product_name = component_nom + '-hourly-levels-' + start_date + '-' + end_date + '.grib'

            if os.path.isfile(os.path.join(os.path.abspath(''), 'data/cams/' + component_nom + 
                                                                '/' + CAMS_product_name)):

                print('The file exists, it will not be downloaded again.')
            
            else:

                print('The file does not exist, it will be downloaded.')
                c.retrieve(
                    model_full_name,
                    {
                        'date': start_date + '/' + end_date,
                        'type': 'forecast',
                        'format': 'grib',
                        'variable': component,
                        'model_level': [str(x + 1) for x in range(137)],
                        'time': '00:00',
                        'leadtime_hour': [str(x) for x in range(0, 24, 3)],
                },
                'data/cams/' + component_nom + '/' + CAMS_product_name)
            
        elif model_level == 'Single':

            CAMS_product_name = component_nom + '-hourly-tc-' + start_date + '-' + end_date + '.grib'

            if os.path.isfile(os.path.join(os.path.abspath(''), 'data/cams/' + component_nom +
                                                                '/' + CAMS_product_name)):
                
                print('The file exists, it will not be downloaded again.')
            
            else:
                print('The file does not exist, it will be downloaded.')

                c = cdsapi.Client()
                c.retrieve(
                    'cams-global-atmospheric-composition-forecasts',
                    {
                        'date': start_date + '/' + end_date,
                        'type': 'forecast',
                        'format': 'grib',
                        'variable': 'total_column_' + component,
                        'time': '00:00',
                        'leadtime_hour': [str(x) for x in range(0, 24, 3)],
                    },
                    'data/cams/' + component_nom + '/' + CAMS_product_name)

    elif model_full_name == 'cams-global-reanalysis-eac4-monthly':
        
        CAMS_product_name = component_nom + '-monthly-tc-' + start_date + '-' + end_date + '.grib'
        CAMS_type = 'Reanalysis'

        if os.path.isfile(os.path.join(os.path.abspath(''), 'data/cams/' + component_nom + 
                                                            '/' + CAMS_product_name)):

            print('The file exists, it will not be downloaded again.')
        
        else:

            print('The file does not exist, it will be downloaded.')       
            months = []
            years = []
            
            for date in dates:

                year = date.split('-')[0]
                month = date.split('-')[1]

                if year not in years:
                    years.append(year)
                    
                if month not in months:
                    months.append(month)

            c.retrieve(
                model_full_name,
                {
                    'format': 'grib',
                    'variable': 'total_column_' + component,
                    'year': years,
                    'month': months,
                    'product_type': 'monthly_mean',
                },
                'data/cams/' + component_nom + '/' + CAMS_product_name)

    return CAMS_product_name, CAMS_type

In [None]:
def CAMS_read(CAMS_product_name, component, component_nom, dates):

    """ Read CAMS levels dataset as xarray dataset object

        Args:
            CAMS_product_name (str): Product name of CAMS product
            component (str): Component name
            component_nom (str): Component chemical nomenclature
            dates (arr): Query dates
            
        Returns:
            CAMS_ds (xarray): CAMS levels dataset in xarray format
    """

    # Read as xarray dataset object
    CAMS_ds = xr.open_dataset('data/cams/' + component_nom + '/' + CAMS_product_name)

    # Change name to component
    if 'levels' in CAMS_product_name:

        if component == 'ozone':
            CAMS_ds = CAMS_ds.rename({'go3': 'component'})

        else:
            CAMS_ds = CAMS_ds.rename({component_nom.lower(): 'component'})

    elif 'tc' in CAMS_product_name:
        
        if component == 'ozone':
            CAMS_ds = CAMS_ds.rename({'gtco3': 'component'})

        else:
            CAMS_ds = CAMS_ds.rename({'tc' + component_nom.lower(): 'component'})

    # Remove data for dates that have been downloaded but not asked for (error of the CAMS API!)
    if 'monthly' in CAMS_product_name:
        
        all_datetimes = []

        for date in dates:

            year = int(date.split('-')[0])
            month = int(date.split('-')[1])
            time_str = np.datetime64(dt.datetime(year, month, 1, 0, 0, 0, 0))
            all_datetimes.append(time_str)

        # Drop datetimes
        datetimes_to_delete = np.setdiff1d(CAMS_ds.time.values, np.array(all_datetimes))
        if datetimes_to_delete.size != 0:
            CAMS_ds = CAMS_ds.drop_sel(time = datetimes_to_delete) 

        # Available dates
        dates_to_keep = np.intersect1d(CAMS_ds.time.values, np.array(all_datetimes))
        dates = tuple(dates_to_keep.astype('datetime64[M]').astype(str))
        
    # Change longitude coordinates
    CAMS_ds = CAMS_ds.assign_coords(longitude = (((CAMS_ds.longitude + 180) % 360) - 180)).sortby('longitude')
    CAMS_ds = CAMS_ds.sortby('latitude')

    # Assign time as coordinates (when there is only one time)
    if CAMS_ds.time.values.size == 1:
        CAMS_ds = CAMS_ds.expand_dims(dim = ['time'])

    return CAMS_ds, dates

In [None]:
def CAMS_137_levels():

    """ Create table with information about the 137 CAMS levels
    
        Returns:
            CAMS_levels_df (dataframe): Table with 137 CAMS levels data
    """

    # Read csv table with 137 levels
    CAMS_levels_df = pd.read_csv('data/cams/137-levels.csv')

    # Drop first row and set n as index hybrid
    CAMS_levels_df = CAMS_levels_df.drop(0).reset_index(drop = True)
    CAMS_levels_df = CAMS_levels_df.set_index('n')
    CAMS_levels_df.index.names = ['hybrid']

    # Change important columns to numeric
    CAMS_levels_df['ph [Pa]'] = pd.to_numeric(CAMS_levels_df['ph [hPa]']) * 100
    CAMS_levels_df['Geopotential Altitude [m]'] = pd.to_numeric(CAMS_levels_df['Geopotential Altitude [m]'])
    CAMS_levels_df['Density [kg/m^3]'] = pd.to_numeric(CAMS_levels_df['Density [kg/m^3]'])

    # Calculate half pressures
    CAMS_levels_df['ph-diff [Pa]'] = CAMS_levels_df['ph [Pa]'].diff(1)
    CAMS_levels_df['ph-diff [Pa]'].iloc[0] = CAMS_levels_df['ph [Pa]'].iloc[0]

    # Calculate difference from geopotential altitude
    CAMS_levels_df['Depth [m]'] = CAMS_levels_df['Geopotential Altitude [m]'].diff(-1)
    CAMS_levels_df['Depth [m]'].iloc[-1] = CAMS_levels_df['Geopotential Altitude [m]'].iloc[-1]

    return CAMS_levels_df

In [None]:
def CAMS_pressure(CAMS_ds, CAMS_levels_df, start_date, end_date, component_nom):

    """ Download surface pressure and calculate levels pressure following the instructions given at:
        https://confluence.ecmwf.int/display/OIFS/4.4+OpenIFS%3A+Vertical+Resolution+and+Configurations

        Args:
            CAMS_ds (xarray): CAMS levels dataset in xarray format
            CAMS_levels_df (dataframe): Table with 137 CAMS levels data
            start_date (str): Query start date
            end_date (str): Query end date
            component_nom (str): Component chemical nomenclature
            
        Returns:
            CAMS_ds (xarray): CAMS levels dataset in xarray format
    """
 
    CAMS_pressure_product_name = 'surface-pressure-' + start_date + '-' + end_date + '.grib'

    if os.path.isfile(os.path.join(os.path.abspath(''), 'data/cams/' + component_nom + 
                                                        '/' + CAMS_pressure_product_name)):
        pass

    else:

        print('The surface pressures will be downloaded...')
        
        c = cdsapi.Client()
        c.retrieve(
            'cams-global-atmospheric-composition-forecasts',
            {
                'date': start_date + '/' + end_date,
                'type': 'forecast',
                'format': 'grib',
                'variable': 'surface_pressure',
                'leadtime_hour': [str(x) for x in range(0, 24, 3)],
                'time': '00:00',
            },
            'data/cams/' + component_nom + '/' + CAMS_pressure_product_name)
        
    hybrid = CAMS_ds['hybrid']
    time = CAMS_ds['time']
    step = CAMS_ds['step']
    latitude = CAMS_ds['latitude']
    longitude = CAMS_ds['longitude']

    model_pressure_ds = xr.open_dataarray('data/cams/' + component_nom + '/' + CAMS_pressure_product_name)

    if model_pressure_ds.time.values.size == 1:
        model_pressure_ds = model_pressure_ds.expand_dims(dim = ['time'])

    sp_array = xr.DataArray(
                            model_pressure_ds.values,
                            dims = ('time', 'step', 'latitude', 'longitude'),
                            coords = {
                                    'time': ('time', time[:]),
                                    'step': ('step', step[:]),
                                    'latitude': ('latitude', latitude[:]),
                                    'longitude': ('longitude', longitude[:]),
                            },
                            name = 'surface_pressure'
    )

    a_array = xr.DataArray(
                           CAMS_levels_df['a [Pa]'],
                           dims = ('hybrid'),
                           coords = {'hybrid': ('hybrid', hybrid[:]),},
                           name = 'a'
    )

    b_array = xr.DataArray(
                           CAMS_levels_df['b'],
                           dims = ('hybrid'),
                           coords = {'hybrid': ('hybrid', hybrid[:]),},
                           name = 'b'
    )

    CAMS_ds['surface_pressure'] = sp_array
    CAMS_ds['a'] = a_array
    CAMS_ds['b'] = b_array

    CAMS_ds['pressure_1/2'] = CAMS_ds['a'] + CAMS_ds['surface_pressure'] * CAMS_ds['b']
    CAMS_ds['pressure_-1/2'] = CAMS_ds['pressure_1/2'].shift(hybrid = 1)
    CAMS_ds['pressure_-1/2'] = CAMS_ds['pressure_-1/2'].where(~np.isnan(CAMS_ds['pressure_-1/2']), 0, drop = False)
    CAMS_ds['pressure'] = 0.5 * (CAMS_ds['pressure_-1/2'] + CAMS_ds['pressure_1/2'])
    CAMS_ds = CAMS_ds.drop_vars(['a', 'b', 'surface_pressure', 'pressure_1/2', 'pressure_-1/2'])
    
    return CAMS_ds

In [1]:
def CAMS_kg_kg_to_kg_m2(CAMS_ds, CAMS_levels_df, sensor, start_date, end_date, 
                        component_nom, apply_kernels = False):

    """ Convert the units of the CAMS partial columns for any component from kg/kg to kg/m2. To do this,
        calculate columns above each CAMS half level assuming it is 0 at the top of the atmosphere

        Args:
            CAMS_ds (xarray): CAMS levels dataset in xarray format
            CAMS_levels_df (dataframe): Table with 137 CAMS levels data
            sensor (str): Name of the sensor
            start_date (str): Query start date
            end_date (str): Query end date
            component_nom (str): Component chemical nomenclature
            apply_kernels (bool): Apply (True) or not (False) the averaging kernels
        
        Returns:
            CAMS_ds (xarray): CAMS levels dataset in xarray format
    """

    # Calculate columns above each CAMS half level    
    if sensor == 'tropomi' and apply_kernels == True:
        
        print('The columns above each CAMS half level will be calculated.')
        CAMS_ds_all = []

        # Calculate level pressures from the surface pressures
        CAMS_ds = CAMS_pressure(CAMS_ds, CAMS_levels_df, start_date, end_date, component_nom)

        for time in CAMS_ds.time:

            PC_hybrid = []

            CAMS_ds_time_old = CAMS_ds.sel(time = time)
            CAMS_ds_time_old = subset(CAMS_ds_time_old, bbox)

            # Initialize partial columns at the top of the atmosphere as 0
            PC_hybrid_0 = CAMS_ds_time_old.sel(hybrid = 1)
            PC_hybrid_0['component'] = PC_hybrid_0['component'].where(PC_hybrid_0['component'] <= 0, 0, drop = False)
            PC_hybrid_0 = PC_hybrid_0.expand_dims(dim = ['hybrid'])
            PC_hybrid.append(PC_hybrid_0)
            CAMS_ds_time_new = PC_hybrid_0
    
            for hybrid in range(1, 136):

                # Calculate partial columns above each CAMS level
                PC_last = CAMS_ds_time_new.component.sel(hybrid = hybrid)
                PC_current = CAMS_ds_time_old.component.sel(hybrid = hybrid + 1)
                pressure_last = CAMS_ds_time_old.pressure.sel(hybrid = hybrid)
                pressure_current = CAMS_ds_time_old.pressure.sel(hybrid = hybrid + 1)
                pressure_diff = pressure_current - pressure_last

                # Units: Component(kg/kg) * ph-diff(Pa = kg/m*s2)) * s2/m -> To kg/m2
                PC_above = CAMS_ds_time_old.sel(hybrid = hybrid + 1)
                PC_above['component'] = PC_last + PC_current * pressure_diff * (1/9.81)
                PC_hybrid.append(PC_above)
                CAMS_ds_time_new = xr.concat(PC_hybrid, pd.Index(range(1, hybrid + 2), name = 'hybrid'))

            CAMS_ds_all.append(CAMS_ds_time_new)

        CAMS_ds = xr.concat(CAMS_ds_all, dim = 'time')

    else:

        # Create xarray object from CAMS model levels information
        CAMS_levels_df_ds = CAMS_levels_df.to_xarray()

        # Convert units from kg/kg to kg/m3
        CAMS_ds = CAMS_ds * CAMS_levels_df_ds['Density [kg/m^3]']

        # Convert units from kg/m3 to kg/m2
        CAMS_ds = CAMS_ds * CAMS_levels_df_ds['Depth [m]']

    return CAMS_ds

In [None]:
def CAMS_kg_m2_to_molecules_cm2(CAMS_ds, component_mol_weight):

    """ Convert the units of the CAMS dataset for any component from kg/m2 to molecules/cm2

        Args:
            CAMS_ds (xarray): CAMS levels dataset in xarray format
            component_mol_weight (float): Component molecular weight

        Returns:
            CAMS_ds (xarray): CAMS levels dataset in xarray format
    """

    # Convert units from kg/m2 to molecules/cm2
    NA = 6.022*10**23
    CAMS_ds['component'] = (CAMS_ds['component'] * NA * 1000) / (10000 * component_mol_weight)
    
    return CAMS_ds

In [None]:
def CAMS_molecules_cm2_to_DU(CAMS_ds):

    """ Convert the units of the CAMS dataset for any component from molecules/cm2 to DU for ozone

        Args:
            CAMS_ds (xarray): CAMS levels dataset in xarray format

        Returns:
            CAMS_ds (xarray): CAMS levels dataset in xarray format
    """

    # Convert units from molecules/cm2 to DU
    CAMS_ds = CAMS_ds / (2.69*10**16)
    
    return CAMS_ds