# CAMS functions

In [None]:
def get_ADS_API_key():
    
    """ Get ADS API key to download CAMS datasets
        
        Returns:
            CAMS_UID_key (str): User API UID and key (UID:key) in ADS platform
    """

    keys_path = os.path.join('/', '/'.join(
                os.getcwd().split('/')[1:3]), 'adc-toolbox', 
                os.path.relpath('data/keys.txt'))

    try:
        keys_file = open(keys_path, 'r')
        keys = keys_file.readlines()
        environ_keys = [key.rstrip() for key in keys]
        CAMS_UID_key = environ_keys[0]

    except:
        
        print('ERROR: You need to create a keys.txt file in the data folder with the ADS API key.')
        print('Get your ADS API key by registering at https://ads.atmosphere.copernicus.eu/api-how-to.')
        raise KeyboardInterrupt

    return CAMS_UID_key

In [None]:
def CAMS_download(dates, start_date, end_date, component, component_nom, lat_min, lat_max, lon_min, lon_max, 
                  area_name, model_full_name, model_level, CAMS_UID = None, CAMS_key = None):

    """ Query and download the CAMS levels dataset from CDS API

        Args:
            dates (arr): Query dates
            start_date (str): Query start date
            end_date (str): Query end date
            component (str): Component name
            component_nom (str): Component chemical nomenclature
            lat_min (int): Minimum latitude
            lat_max (int): Maximum latitude
            lon_min (int): Minimum longitude
            lon_max (int): Maximum longitude
            area_name (str): User defined area name
            model_full_name (str): Full name of the CAMS model among:
            - 'cams-global-atmospheric-composition-forecasts' 
            - 'cams-global-reanalysis-eac4-monthly'
            model_level (str): Number of model levels:
            -  'single' for total columns
            -  'multiple' for levels
            CAMS_UID (str): User ID in ADS platform
            CAMS_key (str): User API key in ADS platform

        Returns:
            CAMS_product_name (str): Product name of CAMS product
            CAMS_type (str): Model type:
            -  'forecast'
            -  'reanalysis'
    """
    
    # Get API key
    if CAMS_UID != None and CAMS_key != None:
        CAMS_UID_key = CAMS_UID + ':' + CAMS_key
    else:
        CAMS_UID_key = get_ADS_API_key()
    
    # Connect to the server
    c = cdsapi.Client(url = 'https://ads.atmosphere.copernicus.eu/api/v2', key = CAMS_UID_key)

    # Download component concentration dataset
    if model_full_name == 'cams-global-atmospheric-composition-forecasts':

        CAMS_type = 'forecast'

        if model_level == 'multiple':
            
            CAMS_product_name = ('CAMS_FORECAST_' + component_nom + '_137_LEVELS_' + start_date + '_' + end_date +
                                  '_' + area_name + '.grib')
            CAMS_product_path = os.path.join('/', '/'.join(
                                os.getcwd().split('/')[1:3]), 'adc-toolbox', 
                                os.path.relpath('data/cams/' + component_nom + '/' + CAMS_product_name))
            
            if os.path.isfile(CAMS_product_path):

                print('The file exists, it will not be downloaded again.')
            
            else:

                print('The file does not exist, it will be downloaded.')
                c.retrieve(
                    model_full_name,
                    {
                        'date': start_date + '/' + end_date,
                        'type': 'forecast',
                        'format': 'grib',
                        'variable': component,
                        'model_level': [str(x + 1) for x in range(137)],
                        'time': '00:00',
                        'leadtime_hour': [str(x) for x in range(0, 24, 3)],
                        'area': [lat_max, lon_min, lat_min, lon_max],
                },
                CAMS_product_path)
            
        elif model_level == 'single':

            CAMS_product_name = ('CAMS_FORECAST_' + component_nom + '_TC_' + start_date + '_' + end_date + 
                                 '_' + area_name + '.grib')
            CAMS_product_path = os.path.join('/', '/'.join(
                                os.getcwd().split('/')[1:3]), 'adc-toolbox', 
                                os.path.relpath('data/cams/' + component_nom + '/' + CAMS_product_name))
            
            if os.path.isfile(CAMS_product_path):
                
                print('The file exists, it will not be downloaded again.')
            
            else:

                print('The file does not exist, it will be downloaded.')
                c = cdsapi.Client(url = 'https://ads.atmosphere.copernicus.eu/api/v2', key = CAMS_UID_key)
                c.retrieve(
                    'cams-global-atmospheric-composition-forecasts',
                    {
                        'date': start_date + '/' + end_date,
                        'type': 'forecast',
                        'format': 'grib',
                        'variable': 'total_column_' + component,
                        'time': '00:00',
                        'leadtime_hour': [str(x) for x in range(0, 24, 3)],
                        'area': [lat_max, lon_min, lat_min, lon_max],
                    },
                    CAMS_product_path)

    elif model_full_name == 'cams-global-reanalysis-eac4-monthly':
        
        CAMS_type = 'reanalysis'
        
        if model_level == 'single':

            CAMS_product_name = ('CAMS_REANALYSIS_' + component_nom + '_TC_' + start_date + '_' + end_date + 
                                 '_' + area_name + '.grib')
            CAMS_product_path = os.path.join('/', '/'.join(
                                os.getcwd().split('/')[1:3]), 'adc-toolbox', 
                                os.path.relpath('data/cams/' + component_nom + '/' + CAMS_product_name))

            if os.path.isfile(CAMS_product_path):

                print('The file exists, it will not be downloaded again.')

            else:

                print('The file does not exist, it will be downloaded.') 

                months = []
                years = []
                
                for date in dates:

                    year = date.split('-')[0]
                    month = date.split('-')[1]

                    if year not in years:
                        years.append(year)
                        
                    if month not in months:
                        months.append(month)
            
                c.retrieve(
                    model_full_name,
                    {
                        'format': 'grib',
                        'variable': 'total_column_' + component,
                        'year': years,
                        'month': months,
                        'product_type': 'monthly_mean',
                        'area': [lat_max, lon_min, lat_min, lon_max],
                    },
                    CAMS_product_path)

        elif model_level == 'multiple':
            
            start_dates = pd.date_range(np.datetime64(start_date), np.datetime64(end_date), freq='MS')
            start_dates = tuple(np.unique([date.strftime('%Y-%m-%d') for date in start_dates]))

            end_dates = pd.date_range(np.datetime64(start_date), np.datetime64(end_date), freq='M')
            end_dates = tuple(np.unique([date.strftime('%Y-%m-%d') for date in end_dates]))

            # Download month by month (to avoid crashing the server)
            CAMS_product_name = []

            for start_date, end_date in zip(start_dates, end_dates):
                
                CAMS_product_name_month = ('CAMS_REANALYSIS_' + component_nom + '_60_LEVELS_' + start_date + '_' + end_date + 
                                        '_' + area_name + '.grib')
                CAMS_product_path = os.path.join('/', '/'.join(
                                    os.getcwd().split('/')[1:3]), 'adc-toolbox', 
                                    os.path.relpath('data/cams/' + component_nom + '/' + CAMS_product_name_month))

                if os.path.isfile(CAMS_product_path):

                    print('The file exists, it will not be downloaded again.')

                else:

                    print('The file does not exist, it will be downloaded.') 
                    c.retrieve(
                        'cams-global-reanalysis-eac4',
                        {
                            'date': start_date + '/' + end_date,
                            'format': 'grib',
                            'variable': component,
                            'model_level': [str(x + 1) for x in range(60)],
                            'time': ['00:00', '03:00', '06:00', '09:00', '12:00', '15:00', '18:00', '21:00',],
                            'area': [lat_max, lon_min, lat_min, lon_max],
                        },
                        CAMS_product_path)

                CAMS_product_name.append(CAMS_product_name_month)

    return CAMS_product_name, CAMS_type

In [None]:
def CAMS_read(CAMS_product_name, component, component_nom, dates):

    """ Read CAMS levels dataset as xarray dataset object

        Args:
            CAMS_product_name (str): Product name of CAMS product
            component (str): Component name
            component_nom (str): Component chemical nomenclature
            dates (arr): Query dates
            
        Returns:
            model_ds (xarray): Model dataset in xarray format (CAMS)
    """

    # Read as xarray dataset object
    if isinstance(CAMS_product_name, list): 
        model_ds = xr.open_mfdataset(os.path.join('/', '/'.join(
                                    os.getcwd().split('/')[1:3]), 'adc-toolbox', 
                                    os.path.relpath('data/cams/' + component_nom + '/CAMS_REANALYSIS_' + component_nom + '_60_LEVELS_*')),
                                    concat_dim = 'time')

    else:
        model_ds = xr.open_dataset(os.path.join('/', '/'.join(
                                  os.getcwd().split('/')[1:3]), 'adc-toolbox', 
                                  os.path.relpath('data/cams/' + component_nom + '/' + CAMS_product_name)))

    # Change name to component 
    if 'hybrid' in model_ds.keys():
        if component == 'ozone':
            model_ds = model_ds.rename({'go3': 'component'})
        else:
            model_ds = model_ds.rename({component_nom.lower(): 'component'})

    else:  
        if component == 'ozone':
            model_ds = model_ds.rename({'gtco3': 'component'})
        else:
            model_ds = model_ds.rename({'tc' + component_nom.lower(): 'component'})

    if 'REANALYSIS_' + component_nom + '_TC_' in CAMS_product_name:
        
        # Remove data for dates that have been downloaded but not asked for (error of the CAMS API!)
        all_datetimes = []
        for date in dates:
            year = int(date.split('-')[0])
            month = int(date.split('-')[1])
            time_str = np.datetime64(dt.datetime(year, month, 1, 0, 0, 0, 0))
            all_datetimes.append(time_str)

        # Drop datetimes
        datetimes_to_delete = np.setdiff1d(model_ds.time.values, np.array(all_datetimes))
        if datetimes_to_delete.size != 0:
            model_ds = model_ds.drop_sel(time = datetimes_to_delete) 

        # Available dates
        dates_to_keep = np.intersect1d(model_ds.time.values, np.array(all_datetimes))
        dates = tuple(dates_to_keep.astype('datetime64[M]').astype(str))
        
        # Remove step since there is only one
        model_ds = model_ds.drop('step')

    # Arrange coordinates
    model_ds = model_ds.assign_coords(longitude = (((model_ds.longitude + 180) % 360) - 180)).sortby('longitude')
    model_ds = model_ds.sortby('latitude')

    # Assign time as dimension (when there is only one time)
    if model_ds.time.values.size == 1:
        model_ds = model_ds.expand_dims(dim = ['time'])
  
    # Get model levels
    model_levels_df = model_levels(model_ds, CAMS_product_name)

    return model_ds, dates, model_levels_df

In [None]:
def model_levels(model_ds, CAMS_product_name):

    """ Create table with information about the CAMS model levels

        Args:
            model_ds (xarray): Model dataset in xarray format (CAMS)
            CAMS_product_name (str): Product name of CAMS product
        
        Returns:
            model_levels_df (dataframe): Table with CAMS levels data
    """
    
    # Read CSV table with information about the model levels
    if '60_LEVELS' in CAMS_product_name:
        model_levels_df = pd.read_csv(os.path.join('/', '/'.join(
                                     os.getcwd().split('/')[1:3]), 'adc-toolbox', 
                                     os.path.relpath('data/cams/60-levels-definition.csv')))
    else:
        model_levels_df = pd.read_csv(os.path.join('/', '/'.join(
                                     os.getcwd().split('/')[1:3]), 'adc-toolbox', 
                                     os.path.relpath('data/cams/137-levels-definition.csv')))

    # Drop first row and set n as index hybrid
    model_levels_df = model_levels_df.drop(0).reset_index(drop = True)
    model_levels_df = model_levels_df.set_index('n')
    model_levels_df.index.names = ['hybrid']

    # Change important columns to numeric
    model_levels_df['ph [Pa]'] = pd.to_numeric(model_levels_df['ph [hPa]']) * 100
    model_levels_df['Geopotential Altitude [m]'] = pd.to_numeric(model_levels_df['Geopotential Altitude [m]'])
    model_levels_df['Density [kg/m^3]'] = pd.to_numeric(model_levels_df['Density [kg/m^3]'])

    # Calculate difference from geopotential altitude
    model_levels_df['Depth [m]'] = model_levels_df['Geopotential Altitude [m]'].diff(-1)
    model_levels_df['Depth [m]'].iloc[-1] = model_levels_df['Geopotential Altitude [m]'].iloc[-1]

    return model_levels_df

In [None]:
def CAMS_pressure(model_ds, CAMS_product_name, model_levels_df, start_date, end_date, component_nom, 
                  lat_min, lat_max, lon_min, lon_max, area_name, CAMS_UID = None, CAMS_key = None):

    """ Download surface pressure and calculate levels pressure following the instructions given at:
        https://confluence.ecmwf.int/display/OIFS/4.4+OpenIFS%3A+Vertical+Resolution+and+Configurations

        Args:
            model_ds (xarray): Model dataset in xarray format (CAMS)
            CAMS_product_name (str): Product name of CAMS product
            model_levels_df (dataframe): Table with 137 CAMS levels data
            start_date (str): Query start date
            end_date (str): Query end date
            component_nom (str): Component chemical nomenclature
            lat_min (int): Minimum latitude
            lat_max (int): Maximum latitude
            lon_min (int): Minimum longitude
            lon_max (int): Maximum longitude
            area_name (str): User defined area name
            CAMS_UID (str): User ID in ADS platform
            CAMS_key (str): User API key in ADS platform
            
        Returns:
            model_ds (xarray): Model dataset in xarray format (CAMS)
    """
    
    CAMS_pressure_product_name = ('_SURFACE_PRESSURE_' + start_date + '_' + end_date +
                                  '_' + area_name + '.grib')

    # Get API key
    if CAMS_UID != None and CAMS_key != None:
        CAMS_UID_key = CAMS_UID + ':' + CAMS_key
    else:
        CAMS_UID_key = get_ADS_API_key()
    
    # Connect to the server
    c = cdsapi.Client(url = 'https://ads.atmosphere.copernicus.eu/api/v2', key = CAMS_UID_key)

    # Dowload surface pressure data
    if 'FORECAST' in CAMS_product_name:

        CAMS_surface_pressure_path = os.path.join('/', '/'.join(
                                        os.getcwd().split('/')[1:3]), 'adc-toolbox', 
                                        os.path.relpath('data/cams/' + component_nom + '/CAMS_FORECAST' + CAMS_pressure_product_name))
    
        c.retrieve(
                   'cams-global-atmospheric-composition-forecasts',
                   {
                           'date': start_date + '/' + end_date,
                           'type': 'forecast',
                           'format': 'grib',
                           'variable': 'surface_pressure',
                           'leadtime_hour': [str(x) for x in range(0, 24, 3)],
                           'time': '00:00',
                           'area': [lat_max, lon_min, lat_min, lon_max],
                   },
                   CAMS_surface_pressure_path)

    elif 'REANALYSIS' in CAMS_product_name:

        CAMS_surface_pressure_path = os.path.join('/', '/'.join(
                                        os.getcwd().split('/')[1:3]), 'adc-toolbox', 
                                        os.path.relpath('data/cams/' + component_nom + '/CAMS_REANALYSIS' + CAMS_pressure_product_name))
    
        c.retrieve(
                   'cams-global-reanalysis-eac4',
                   {
                           'date': start_date + '/' + end_date,
                           'format': 'grib',
                           'variable': 'surface_pressure',
                           'time': ['00:00', '03:00', '06:00',
                                    '09:00', '12:00', '15:00',
                                    '18:00', '21:00',],
                           'area': [lat_max, lon_min, lat_min, lon_max],
                   },
                   CAMS_surface_pressure_path)

    hybrid = model_ds['hybrid'].data
    time = model_ds['time'].data
    step = model_ds['step'].data
    latitude = model_ds['latitude'].data
    longitude = model_ds['longitude'].data

    # Read surface pressure
    model_pressure_ds = xr.open_dataarray(CAMS_surface_pressure_path)

    # Arrange coordinates
    model_pressure_ds = model_pressure_ds.assign_coords(longitude = (((model_pressure_ds.longitude + 180) % 360) - 180)).sortby('longitude')
    model_pressure_ds = model_pressure_ds.sortby('latitude')
    
    # Assign time as dimension (when there is only one time)
    if model_pressure_ds.time.values.size == 1:
        model_pressure_ds = model_pressure_ds.expand_dims(dim = ['time'])
        
    # Transpose dimensions
    model_pressure_ds = model_pressure_ds.transpose('time', 'step', 'latitude', 'longitude')

    # Subset surface pressure dataset
    model_pressure_ds = subset(model_pressure_ds, bbox, sensor, component_nom, sensor_type, subset_type = 'model_subset')

    sp_array = xr.DataArray(
                            model_pressure_ds.values,
                            dims = ('time', 'step', 'latitude', 'longitude'),
                            coords = {
                                    'time': ('time', time),
                                    'step': ('step', step),
                                    'latitude': ('latitude', latitude),
                                    'longitude': ('longitude', longitude),
                            },
                            name = 'surface_pressure'
    )

    a_array = xr.DataArray(
                           model_levels_df['a [Pa]'],
                           dims = ('hybrid'),
                           coords = {'hybrid': ('hybrid', hybrid),},
                           name = 'a'
    )

    b_array = xr.DataArray(
                           model_levels_df['b'],
                           dims = ('hybrid'),
                           coords = {'hybrid': ('hybrid', hybrid),},
                           name = 'b'
    )

    model_ds['surface_pressure'] = sp_array
    model_ds['a'] = a_array
    model_ds['b'] = b_array

    model_ds['pressure_1/2'] = model_ds['a'] + model_ds['surface_pressure'] * model_ds['b']
    model_ds['pressure_-1/2'] = model_ds['pressure_1/2'].shift(hybrid = 1)
    model_ds['pressure_-1/2'] = model_ds['pressure_-1/2'].where(~np.isnan(model_ds['pressure_-1/2']), 0, drop = False)
    model_ds['pressure'] = 0.5 * (model_ds['pressure_-1/2'] + model_ds['pressure_1/2'])
    model_ds = model_ds.drop_vars(['a', 'b', 'surface_pressure', 'pressure_1/2', 'pressure_-1/2'])
    
    return model_ds

In [None]:
def CAMS_get_levels_data(model_ds, CAMS_product_name, model_levels_df, column_type, 
                         lat_min, lat_max, lon_min, lon_max):
    
    """ Get the tropospheric or column model data, depending on the nature of the sensor data

        Args:
            model_ds (xarray): Model dataset in xarray format (CAMS)
            CAMS_product_name (str): Product name of CAMS product
            model_levels_df (dataframe): Table with 137 CAMS levels data
            column_type (str): Tropospheric or total column
            lat_min (int): Minimum latitude
            lat_max (int): Maximum latitude
            lon_min (int): Minimum longitude
            lon_max (int): Maximum longitude
            
        Returns:
            model_ds (xarray): Model dataset in xarray format (CAMS)
    """

    # Get units and calculate tropospheric columns if needed
    units = model_ds.component.attrs['units'] 

    if 'REANALYSIS' in CAMS_product_name:

        if column_type == 'tropospheric':
            print('The model total columns will be directly compared to the tropospheric sensor columns.')

        elif column_type == 'total':
            print('The model total columns will be compared to the total sensor columns.')

    elif 'FORECAST' in CAMS_product_name:

        if column_type == 'tropospheric':

            print('The model tropospheric columns will be compared to the tropospheric sensor columns.')
            print('The model tropospheric columns will be estimated (pressures above or equal to 300 hPa).')
            
            # Calculate levels pressure
            model_ds = CAMS_pressure(model_ds, CAMS_product_name, model_levels_df, start_date, end_date, component_nom, 
                                    lat_min, lat_max, lon_min, lon_max, area_name, CAMS_UID = None, CAMS_key = None)

            if apply_kernels == False:
                
                model_ds = model_ds.where(model_ds.pressure >= 30000, drop = True)
                model_ds = model_ds.sum(dim = 'hybrid')
                model_ds['component'] = model_ds.component.assign_attrs({'units': units})

        if column_type == 'total':
            print('The model total columns will be compared to the total sensor columns.')

    return model_ds

In [1]:
def CAMS_kg_kg_to_kg_m2(model_ds, model_levels_df, sensor, start_date, end_date, 
                        component_nom, apply_kernels = False, CAMS_UID = None, CAMS_key = None):

    """ Convert the units of the CAMS partial columns for any component from kg/kg to kg/m2. To do this,
        calculate columns above each CAMS half level assuming it is 0 at the top of the atmosphere

        Args:
            model_ds (xarray): Model dataset in xarray format (CAMS)
            model_levels_df (dataframe): Table with 137 CAMS levels data
            sensor (str): Name of the sensor
            start_date (str): Query start date
            end_date (str): Query end date
            component_nom (str): Component chemical nomenclature
            apply_kernels (bool): Apply (True) or not (False) the averaging kernels
            CAMS_UID (str): User ID in ADS platform
            CAMS_key (str): User API key in ADS platform
        
        Returns:
            model_ds (xarray): Model dataset in xarray format (CAMS)
    """

    # Calculate columns above each CAMS half level    
    if sensor == 'tropomi' and apply_kernels == True:

        print('The columns above each CAMS half level will be calculated.')

        # Initialize new array
        model_ds_all = []

        for time in model_ds.time:

            # Select data for each timestep
            model_ds_time_old = model_ds.sel(time = time)

            # Initialize partial columns at the top of the atmosphere (hybrid = 1) as 0
            PC_hybrid_0 = model_ds_time_old.sel(hybrid = 1)
            PC_hybrid_0['component'] = PC_hybrid_0['component'].where(PC_hybrid_0['component'] <= 0, 0, drop = False)
            PC_hybrid_0 = PC_hybrid_0.expand_dims(dim = ['hybrid'])

            # Create new model dataset
            PC_above_all = []
            PC_above_all.append(PC_hybrid_0)
            model_ds_time_new = PC_hybrid_0
    
            for hybrid in range(1, 137):

                # Get current and previous partial columns and level pressures
                PC_last = model_ds_time_new.component.sel(hybrid = hybrid)
                PC_current = model_ds_time_old.component.sel(hybrid = hybrid + 1)
                pressure_last = model_ds_time_old.pressure.sel(hybrid = hybrid)
                pressure_current = model_ds_time_old.pressure.sel(hybrid = hybrid + 1)

                # Calculate pressure difference
                pressure_diff = pressure_current - pressure_last

                # Calculate partial columns above each model level
                # Units: (kg/kg * kg/m*s2) * s2/m -> kg/m2
                PC_above = model_ds_time_old.sel(hybrid = hybrid + 1)
                PC_above['component'] = PC_last + PC_current * pressure_diff * (1/9.81)

                # Append result
                PC_above_all.append(PC_above)
                model_ds_time_new = xr.concat(PC_above_all, pd.Index(range(1, hybrid + 2), name = 'hybrid'))

            model_ds_all.append(model_ds_time_new)

        model_ds = xr.concat(model_ds_all, dim = 'time')

    else:

        # Create xarray object from CAMS model levels information
        model_levels_ds = model_levels_df.to_xarray()

        # Convert units from kg/kg to kg/m3
        model_ds = model_ds * model_levels_ds['Density [kg/m^3]']

        # Convert units from kg/m3 to kg/m2
        model_ds = model_ds * model_levels_ds['Depth [m]']

    return model_ds

In [None]:
def CAMS_kg_m2_to_molecules_cm2(model_ds, component_MW):

    """ Convert the units of the CAMS dataset for any component from kg/m2 to molecules/cm2

        Args:
            model_ds (xarray): Model dataset in xarray format (CAMS)
            component_MW (float): Component molecular weight

        Returns:
            model_ds (xarray): Model dataset in xarray format (CAMS)
    """

    # Convert units from kg/m2 to molecules/cm2
    NA = 6.022*10**23
    model_ds['component'] = (model_ds['component'] * NA * 1000) / (10000 * component_MW)
    
    return model_ds

In [None]:
def CAMS_molecules_cm2_to_DU(model_ds):

    """ Convert the units of the CAMS dataset for any component from molecules/cm2 to DU for ozone

        Args:
            model_ds (xarray): CAMS levels dataset in xarray format

        Returns:
            model_ds (xarray): CAMS levels dataset in xarray format
    """

    # Convert units from molecules/cm2 to DU
    model_ds = model_ds / (2.69*10**16)
    
    return model_ds