# General functions

In [5]:
def comparison_check(sensor, model, component_nom, model_full_name):

    """ Check if the comparison is possible

        Args:
            sensor (str): Name of the sensor
            model (str): Name of the model
            component_nom (str): Component chemical nomenclature
            model_full_name (str): Full name of the CAMS model among:
            - 'cams-global-atmospheric-composition-forecasts' 
            - 'cams-global-reanalysis-eac4-monthly'
    """

    if ((sensor == 'tropomi' and model == 'cams') or 
        (sensor == 'iasi' and model == 'cams') or
        (sensor == 'gome' and model == 'cams')):

        if (model_full_name != 'cams-global-atmospheric-composition-forecasts' and
            model_full_name != 'cams-global-reanalysis-eac4-monthly'):

            print('ERROR: The model is not supported.')
            print('The models that are currently supported are:')
            print('- cams-global-atmospheric-composition-forecasts')
            print('- cams-global-reanalysis-eac4-monthly')
            raise KeyboardInterrupt

        else:
            
            tropomi_component_nom = ['NO2', 'CO', 'O3', 'SO2']
            iasi_component_nom = ['CO', 'O3']
            gome_component_nom = ['NO2', 'O3']

            if ((sensor == 'tropomi' and component_nom not in tropomi_component_nom) or
                (sensor == 'iasi' and component_nom not in iasi_component_nom) or
                (sensor == 'gome' and component_nom not in gome_component_nom)):

                print(f'ERROR: This specific component cannot be retrieved by the sensor {sensor.upper()}.')
                raise KeyboardInterrupt

            else:

                print('The comparison is possible and will start now.')
    else:

        print('The comparison is only possible for:')
        print('1. cams (CAMS model) vs. tropomi (TROPOMI sensor)')
        print('2. cams (CAMS model) vs. iasi (IASI sensor)')
        print('2. cams (CAMS model) vs. gome (GOME-2 sensor)')

        raise KeyboardInterrupt

In [6]:
def components_table(sensor, component_nom):

    """ Create table with information about the components (molecular weight, full name in different datasets)

        Args:
            sensor (str): Name of the sensor
            component_nom (str): Component chemical nomenclature

        Returns:
            component (str): Component name
            component_mol_weight (float): Component molecular weight
            component_sensor_product (str): Component product name in TROPOMI, IASI or GOME-2 database
            sensor_column (str): Component column name in TROPOMI, IASI or GOME-2 database
    """

    component_nom_col = ['NO2', 'CO', 'O3', 'SO2', 'CH4']

    component_col = ['nitrogen_dioxide', 'carbon_monoxide', 'ozone', 'sulfur_dioxide', 'methane']
    component_mol_weight_col = [46.005, 28.01, 48, 64.066, 16.04]
    component_tropomi_product_col = ['L2__NO2___', 'L2__CO____', 'L2__O3____', 'L2__SO2___', 'L2__CH4___']
    component_tropomi_column_col = ['nitrogendioxide_tropospheric_column', 
                                    'carbonmonoxide_total_column', 
                                    'ozone_total_vertical_column', 
                                    'sulfurdioxide_total_vertical_column',
                                    'methane_tropospheric_column',
                                    ]
    component_iasi_column_col = ['-', 'COgridDAY', 'O3gridDAY', '-', '-']
    component_gome_column_col = ['NO2total', '-', 'tropospheric_O3', '-', '-']

    rows = {'Nomenclature': component_nom_col, 
            'Weight': component_mol_weight_col,
            'Component': component_col, 
            'TROPOMI_product': component_tropomi_product_col,
            'TROPOMI_column': component_tropomi_column_col,
            'IASI_column': component_iasi_column_col,
            'GOME_column': component_gome_column_col}

    components_table = pd.DataFrame(rows)

    component = components_table['Component'].loc[components_table['Nomenclature'] == component_nom].iloc[0]
    component_mol_weight = components_table['Weight'].loc[components_table['Nomenclature'] == component_nom].iloc[0]
    
    if sensor == 'tropomi':
        component_sensor_product = components_table['TROPOMI_product'].loc[components_table['Nomenclature'] == component_nom].iloc[0]

    elif sensor == 'iasi' or sensor == 'gome':
        component_sensor_product = None
    
    sensor_column = components_table[sensor.upper() +'_column'].loc[components_table['Nomenclature'] == component_nom].iloc[0]

    return component, component_mol_weight, component_sensor_product, sensor_column

In [7]:
def generate_folders(model, sensor, component_nom):

    """ Generate folders to download the datasets if they do not exist 

        Args:
            model (str): Name of the model
            sensor (str): Name of the sensor
            component_nom (str): Component chemical nomenclature
    """

    model_path = os.path.join(os.path.abspath(''), 'data/' + model + '/' + component_nom)
    sensor_path = os.path.join(os.path.abspath(''), 'data/' + sensor + '/' + component_nom)

    paths = [model_path, sensor_path]

    for path in paths:
        os.makedirs(path, exist_ok = True) 

In [8]:
def sensor_convert_units(sensor_ds, sensor_column, sensor, component):

    """ Convert the units of the sensor dataset for any component from mol/m2 to molecules/cm2

        Args:
            sensor_ds (xarray): sensor dataset in xarray format (TROPOMI, IASI or GOME-2)
            sensor_column (str): Component column name in TROPOMI, IASI or GOME-2 database
            sensor (str): Name of the sensor
            component (str): Component name
            
        Returns:
            sensor_ds (xarray): sensor dataset in xarray format
    """

    if sensor == 'tropomi':
        
        if sensor_ds[sensor_column].units == 'mol m-2':

            sensor_ds[sensor_column] = sensor_ds[sensor_column] * 6.02214*10**19
            sensor_ds[sensor_column] = sensor_ds[sensor_column].assign_attrs({'units': 'molec cm-2'})
            print('The sensor component units have been converted from mol cm-2 to molec cm-2.')
            
            if 'apriori_profile' in list(sensor_ds.keys()):
                sensor_ds['apriori_profile'] = sensor_ds['apriori_profile'] * 6.02214*10**19

            if sensor_ds[sensor_column].units == 'molec cm-2' and component == 'ozone':
                sensor_ds[sensor_column] = sensor_ds[sensor_column] / (2.69*10**16)
                sensor_ds[sensor_column] = sensor_ds[sensor_column].assign_attrs({'units': 'DU'})
                print('The sensor component units have been converted from molec cm-2 to DU.')

                if 'apriori_profile' in list(sensor_ds.keys()):
                    sensor_ds['apriori_profile'] = sensor_ds['apriori_profile'] / (2.69*10**16)

    elif sensor == 'iasi' or sensor == 'gome':
        
        if sensor_ds[sensor_column].units == 'mol m-2':

            sensor_ds = sensor_ds * 6.02214*10**19
            sensor_ds[sensor_column] = sensor_ds[sensor_column].assign_attrs({'units': 'molec cm-2'})
            print('The sensor component units have been converted from mol cm-2 to molec cm-2.')

        if sensor_ds[sensor_column].units == 'molec cm-2' and component == 'ozone':
            sensor_ds = sensor_ds / (2.69*10**16)
            sensor_ds[sensor_column] = sensor_ds[sensor_column].assign_attrs({'units': 'DU'})
            print('The sensor component units have been converted from molec cm-2 to DU.')

    return sensor_ds

In [9]:
def model_convert_units(model_ds, model, component_mol_weight, component, conversion_method):

    """ Convert the units of the model dataset for any component from kg/kg or kg/m2 to molecules/cm2

        Args:
            model_ds (xarray): model dataset in xarray format (CAMS)
            model (str): Name of the model
            component_mol_weight (float): Component molecular weight
            component (str): Component name
            conversion_method (str): Type of conversion. It can be:
            * Simple: Multiply the partial columns by the layer depth and density
            * Complex: Calculate the partial column above each CAMS half level
                        
        Returns:
            model_ds (xarray): model dataset in xarray format
    """

    if model == 'cams':

        if model_ds.component.units == 'kg kg**-1':

            model_ds = CAMS_kg_kg_to_kg_m2(model_ds, model_levels_df, conversion_method)
            model_ds['component'] = model_ds.component.assign_attrs({'units': 'kg m**-2'})
            print('The model component units have been converted from kg kg**-1 to kg m**-2.')
            units = 'kg m**-2'
            
        if model_ds.component.units == 'kg m**-2':

            model_ds = CAMS_kg_m2_to_molecules_cm2(model_ds, component_mol_weight)
            model_ds['component'] = model_ds.component.assign_attrs({'units': 'molec cm-2'})
            print('The model component units have been converted from kg m**-2 to molec cm-2.')
            units = 'molec cm-2'

        if model_ds.component.units == 'molec cm-2' and component == 'ozone':

            model_ds = CAMS_molecules_cm2_to_DU(model_ds, component_mol_weight)
            model_ds['component'] = model_ds.component.assign_attrs({'units': 'DU'})
            print('The model component units have been converted from molec cm-2 to DU.')
            units = 'DU'

    return model_ds, units

In [10]:
def nearest_neighbour(array, value):

    """ Find index of the closest value in a 1D-array

        Args:
            array (arr): Array to find the nearest neighbour
            value (float): Search value
    """

    index = np.abs([x - value for x in array]).argmin(0)
    
    return index

In [11]:
def closest_point(point, array):

    """ Find pair the closest values in a 2D-array

        Args:
            array (arr): Array to find the nearest neighbour
            point (tuple): Search coordinates
    """

    pair = array[cdist([point], array).argmin()]

    return pair

In [12]:
def pairwise(dates):

    """ Split dates array in pairs

        Args:
            dates (arr): All dates

        Returns:
            period (tuple): Divisible dates into pairs
    """

    pair_element = iter(dates)
    period = list(zip(pair_element, pair_element))

    return period

In [13]:
def subset(ds, bbox):

    """ Subset any dataset (with latitude and longitude as coordinates) into desired bounding box.

        Args:
            ds (xarray): Dataset in xarray format
            bbox (arr): Query bounding box
    
        Returns:
            ds (xarray): Dataset in xarray format
    """

    # Get nearest longitude and latitude to bbox
    lon_min_index = nearest_neighbour(ds.longitude.data, bbox[0][0])
    lon_max_index = nearest_neighbour(ds.longitude.data, bbox[1][0])
    lat_min_index = nearest_neighbour(ds.latitude.data, bbox[0][1])
    lat_max_index = nearest_neighbour(ds.latitude.data, bbox[1][1])

    # Define slices
    slice_lat = slice(lat_min_index, lat_max_index + 1)
    slice_lon = slice(lon_min_index, lon_max_index + 1)

    # Set limits
    ds = ds.isel(longitude = slice_lon, latitude = slice_lat)

    return ds

In [14]:
def prepare_df(match_df, sensor, sensor_column, component_nom):

    """ Prepare dataframe for match

        Args:
            match_df (dataframe): Dataframe used to apply averaging kernels
            sensor (str): Name of the sensor
            sensor_column (str): Component column name in TROPOMI, IASI or GOME-2 database
            component_nom (str): Component chemical nomenclature
        
        Returns:
            match_df (dataframe): Dataframe used to apply averaging kernels
    """

    if sensor == 'tropomi':

        # Pass NaNs to data with qa_value under 0.5
        match_df.loc[match_df['qa_value'] < 0.5, [sensor_column, 'column_kernel']] = float('NaN')

        # Drop levels
        if component_nom == 'CO' or component_nom == 'SO2':
            
            match_df.index.names = ['corner', 'ground_pixel', 'layer', 'scanline']
        
        elif component_nom == 'O3':

            match_df.index.names = ['corner', 'ground_pixel', 'layer', 'level', 'scanline']
            
        match_df = match_df.groupby(by = ['layer', 'scanline', 'ground_pixel', 'time', 'delta_time']).mean()
        match_df = match_df.reset_index(level = ['layer', 'delta_time'])

    elif sensor == 'iasi' or sensor == 'gome':

        match_df = match_df.reset_index(level = ['latitude', 'longitude'])

    return match_df

In [15]:
def generate_match_table(sensor_ds, model_ds, bbox, kernels_method, 
                         sensor, component_nom, sensor_column, *args):

    """ Intermediate merge table with total column or partial column from both datasets, 
        the averaging kernels are applied if possible

        Args:
            sensor_ds (xarray): sensor dataset in xarray format (TROPOMI, IASI or GOME-2)
            model_ds (xarray): model dataset in xarray format (CAMS)
            bbox (arr): Query bounding box
            kernels_method (str): Method to apply averaging kernels to model space:
            * Nearest neighbours: Find nearest neighbours horizontally and vertically
            * Interpolation: Find nearest neighbours horizontally and interpolate vertically
            sensor (str): Name of the sensor
            component_nom (str): Component chemical nomenclature
            sensor_column (str): Component column name in TROPOMI, IASI or GOME-2 database
            *args: Include 'model_levels_df' if interpolation is wanted

        Returns:
            match_table (dataframe): Intermediate merge table with total column or partial column from both datasets
    """

    match_table = pd.DataFrame()

    for time in sensor_ds.time.values:
        
        # Print estimated time or month
        if sensor == 'tropomi':
            print(f'FOR EST. TIME: {time}')

        elif sensor == 'iasi' or sensor == 'gome':
            month = np.datetime64(time).astype('datetime64[M]')
            print(f'FOR MONTH: {month}')
        
        # Reduce data to only one timestamp
        model_ds_time = model_ds.sel(time = time)
        sensor_ds_time = sensor_ds.sel(time = time)

        # Subset sensor data xarray
        if sensor == 'tropomi':

            sensor_ds_time = TROPOMI_subset(sensor_ds_time, bbox, time, sensor, component_nom)

        elif sensor == 'iasi' or sensor == 'gome':
            sensor_ds_time = subset(sensor_ds_time, bbox)
        
        # Transform into dataframe
        match_df = sensor_ds_time.to_dataframe()
        
        # Prepare dataframe for following functions
        match_df = prepare_df(match_df, sensor, sensor_column, component_nom)
        
        if 'column_kernel' in list(sensor_ds.keys()) and kernels_method != None:
                
            print('This dataset contains data to apply the averaging kernels.')
            match_df = TROPOMI_apply_avg_kernels(kernels_method, match_df, model_ds_time, 
                                                 sensor_ds_time, sensor_column, model_levels_df)
            
        else:
            
            print('The dataset does not contain data to apply the averaging kernels.')

            if 'hybrid' in list(model_ds.coords):

                print('The partial columns will be sumed up.')
                print('The sum will be matched to the sensor data by nearest neighbours.')

                model_ds_time = model_ds_time.component.sum(dim = 'hybrid', skipna = False)
                model_times = model_ds_time.valid_time.data
                
                match_df['step_index'] = match_df.apply(lambda row: nearest_neighbour(model_times, row['delta_time']), axis = 1)
                match_df['model_time'] = match_df.apply(lambda row: model_ds_time.valid_time[row['step_index']].values, axis = 1)
                match_df['model_column'] = match_df.apply(lambda row: model_ds_time.sel(latitude = row['latitude'], 
                                                                                longitude = row['longitude'],
                                                                                method = 'nearest').isel(step = 
                                                                                int(row['step_index'])).values, 
                                                                                axis = 1)

            else:

                print('The model does not contain levels data.')
                print('The columns model data will be matched to the sensor data by nearest neighbours.')

                model_times = model_ds_time.valid_time.data
                
                match_df['model_column'] = match_df.apply(lambda row: float(model_ds_time.sel(latitude = row['latitude'], 
                                                                            longitude = row['longitude'],
                                                                            method = 'nearest').component.values), 
                                                                            axis = 1)
            
        if 'hybrid' in list(model_ds.coords):
            match_df = match_df.set_index('layer', append = True)

        match_df = match_df[~match_df.index.duplicated()]
        match_table = match_table.append(match_df)

    return match_table

In [16]:
def generate_merge_table(match_table, sensor_ds, model_ds, kernels_method, sensor_column, sensor):

    """ Final merge table with total column component data for each dataset, 
        their difference in each grid point are calculated

        Args:
            match_table (dataframe): Intermediate merge table with total column or partial column from both datasets
            sensor_ds (xarray): sensor dataset in xarray format (TROPOMI, IASI or GOME-2)
            model_ds (xarray): model dataset in xarray format (CAMS)
            kernels_method (str): Method to apply averaging kernels to model space:
            * Nearest neighbours: Find nearest neighbours horizontally and vertically
            * Interpolation: Find nearest neighbours horizontally and interpolate vertically
            sensor_column (str): Component column name in TROPOMI, IASI or GOME-2 database
            sensor (str): Name of the sensor
        
        Returns:
            merge_table (dataframe): Merge table with datasets column data and their difference
    """

    merge_table = []
        
    if 'hybrid' in list(model_ds.coords):

        for time in sensor_ds.time.values:

            match_ds = match_table.query('time == @time').to_xarray()

            # Read latitudes and longitudes from data array
            latitude = match_ds.sel(time = time).latitude.mean(dim = 'layer')
            longitude = match_ds.sel(time = time).longitude.mean(dim = 'layer')

            # Get sum of CAMS data of each layer to get column data
            if 'column_kernel' in list(sensor_ds.keys()) and kernels_method != None:
                model_final_ds_time = match_ds.sel(time = time).model_column.sum(dim = 'layer', skipna = False).astype(float)

            else:
                model_final_ds_time = match_ds.sel(time = time).model_column.mean(dim = 'layer', skipna = False).astype(float)

            model_final_ds_time = model_final_ds_time.assign_coords(latitude = latitude, longitude = longitude)

            # Get mean of TROPOMI data of each layer (it must be equal)
            sensor_final_ds_time = match_ds[sensor_column].sel(time = time).mean(dim = 'layer', skipna = False).astype(float)
            sensor_final_ds_time = sensor_final_ds_time.assign_coords(latitude = latitude, longitude = longitude)

            merged_ds_time = xr.merge([model_final_ds_time, sensor_final_ds_time])
            merged_ds_time['difference'] = merged_ds_time[sensor_column] - merged_ds_time['model_column']
            merge_table.append(merged_ds_time.to_dataframe())

        merge_table = pd.concat(merge_table)

    else:

        merge_table = match_table
        merge_table['difference'] = merge_table[sensor_column] - merge_table['model_column']

    # Organize dataset for visualization
    if sensor == 'tropomi':
        merge_table = merge_table.groupby(by = ['scanline', 'ground_pixel', 'time']).mean()

    elif sensor == 'iasi' or sensor == 'gome':
        merge_table = merge_table.groupby(by = ['latitude', 'longitude', 'time']).mean()
    
    return merge_table

In [17]:
def plot_period(sensor_ds, sensor):

    """ Define plot period

        Args:
            sensor_ds (xarray): sensor dataset in xarray format (TROPOMI, IASI or GOME-2)

        Returns:
            plot_dates (arr): Plot dates
    """

    period_answer = input('Do you want to visualize the plots for specific dates? Press Enter for Yes or write No:')
    plot_dates = []

    if period_answer == 'No' or period_answer == 'no':
        plot_dates = sensor_ds.time.values
    
    else:
        if sensor == 'tropomi':
            options_df = pd.DataFrame({'Date': sensor_ds.time.values})
        
        elif sensor == 'iasi' or sensor == 'gome':
            options_df = pd.DataFrame({'Date': sensor_ds.time.values.astype('datetime64[M]')})

        for index, row in options_df.iterrows():
            date_answer = input('Do you want to show the plots for ' + str(row['Date']) + '? Press Enter for Yes or write No:') 
            if date_answer == 'No' or date_answer == 'no':
                pass
            else:
                plot_dates = np.append(plot_dates, row['Date'])

    print('The plots will be shown for the following dates:')
    if sensor == 'tropomi':
        print(plot_dates)
    
    elif sensor == 'iasi' or sensor == 'gome':
        print(plot_dates.astype('datetime64[M]'))

    return plot_dates

In [18]:
def plot_extent(bbox):

    """ Define plot extent

        Args:
            bbox (arr): Query bounding box

        Returns:
            plot_bbox (arr): Plot bounding box
    """

    extent_answer = input(f'Do you want to visualize the plots for a specific extent? Press Enter for Yes or write No (default {bbox}):')

    if extent_answer == 'No' or extent_answer == 'no':
        plot_bbox = ((bbox[0][0], bbox[0][1]), (bbox[1][0], bbox[1][1]))

    else:
        # Define minimum longitude
        plot_lon_min = float(input('Write value of minimum longitude: '))
        while (plot_lon_min < bbox[0][0]) or (plot_lon_min > bbox[1][0]):
            print(f'ERROR: Longitude must be between {bbox[0][0]} and {bbox[1][0]}.')
            plot_lon_min = float(input('Write value of minimum longitude (again): '))

        # Define maximum longitude
        plot_lon_max = float(input('Write value of maximum longitude: '))
        while (plot_lon_max < bbox[0][0]) or (plot_lon_max > bbox[1][0]) or (plot_lon_max <= plot_lon_min):
            print(f'ERROR: Longitude must be between {bbox[0][0]} and {bbox[1][0]} and be higher than the minimum {plot_lon_min}.')
            plot_lon_max = float(input('Write value of maximum longitude (again): '))

        # Define minimum latitude
        plot_lat_min = float(input('Write value of minimum latitude: '))
        while (plot_lat_min < bbox[0][1]) or (plot_lat_min > bbox[1][1]):
            print(f'ERROR: Latitude must be between {bbox[0][1]} and {bbox[1][1]}.')
            plot_lat_min = float(input('Write value of minimum latitude (again): '))

        # Define maximum latitude
        plot_lat_max = float(input('Write value of maximum latitude: '))
        while (plot_lat_max < bbox[0][1]) or (plot_lat_max > bbox[1][1]) or (plot_lat_max <= plot_lat_min):
            print(f'ERROR: Latitude must be between {bbox[0][1]} and {bbox[1][1]} and be higher than the minimum {plot_lat_min}.')
            plot_lat_max = float(input('Write value of maximum latitude (again): '))

        # Define plot bbox
        plot_bbox = ((plot_lon_min, plot_lat_min), (plot_lon_max, plot_lat_max))

    print('The plots will be shown for the following spatial extent: ')
    print(plot_bbox)
    
    return plot_bbox

In [19]:
def colorbar_range(range_type, merge, array, *args):

    """ Define colorbar range

        Args:
            range_type (str): Range type for colorbar:
            -  'Original': Show original values in range
            -  'Positive': Show only positive values in range
            -  'Equal': Show same scale in range
            merge (xarray): Merge result for a specific time
            array (xarray): Component for a specific time and model/sensor
            *args: Include 'sensor_column' if range type is Equal

        Returns:
            vmin, vmax (float): Limits of color bar
    """

    # The colorbar will show the original range
    if range_type == 'Original':

        vmin = np.nanmin(array)
        vmax = np.nanmax(array)

    # The colorbar will show the original range only with positive values
    elif range_type == 'Positive':
        
        if np.nanmin(array) < 0:
            vmin = 0
        else:
            vmin = np.nanmin(array)

        vmax = np.nanmax(array)

    # The colorbar will be in the same scale for both datasets
    elif range_type == 'Equal':
        
        # Define arrays
        array_1 = merge.model_column

        if sensor_column in args:
            array_2 = merge[sensor_column]

        elif model_total_ds in args:
            array_2 = model_total_ds.component.isel(step = step).sel(time = time)

        # Define vmin
        if np.nanmin(array_2) < np.nanmin(array_1):
            vmin = np.nanmin(array_2)
        else:
            vmin = np.nanmin(array_1)

        # Define vmax
        if np.nanmax(array_2) < np.nanmax(array_1):
            vmax = np.nanmax(array_1)
        else:
            vmax = np.nanmax(array_2)
            
    return vmin, vmax

In [20]:
def visualize_pcolormesh(fig, axs, data_array, longitude, latitude, projection, color_scale, 
                         pad, long_name, units_name, vmin, vmax, set_global = True, lonmin = -180, 
                         lonmax = 180, latmin = -90, latmax = 90):
    
    """ Visualize two datasets side by side

        Args:
            fig: Figure
            axs: Axes of figure
            data_array (xarray): Variable values to plot - It must be 2-dimensional
            longitude (arr): Longitudes within data_array
            latitude (arr): Latitudes within data_array
            projection: Geographical projection
            color_scale (str): Color scale for the color bar
            pad (float): Padding for the subtitles
            long_name (str): Plot name
            units_name (str): Component name and units
            vmin, vmax (float): Limits of color bar
            set_global: Extent setting
            lonmin, lonmax, latmin, latmax (float): Limits of longitude and latitude values
    """

    palette = copy(plt.get_cmap(color_scale))
    palette.set_under(alpha = 0)
    
    im = axs.pcolormesh(
                        longitude, latitude, data_array, 
                        cmap = palette, 
                        transform = projection,
                        vmin = vmin,
                        vmax = vmax,
                        norm = colors.Normalize(vmin = 0, vmax = vmax),
                        shading = 'auto'
                        )
                        
    axs.add_feature(cfeature.BORDERS, edgecolor = 'black', linewidth = 1)
    axs.add_feature(cfeature.COASTLINE, edgecolor = 'black', linewidth = 1)

    if (projection == ccrs.PlateCarree()):
        axs.set_extent([lonmin, lonmax, latmin, latmax], projection)
        gl = axs.gridlines(draw_labels = True, linestyle = '--')
        gl.top_labels = False
        gl.right_labels = False
        gl.xformatter = LONGITUDE_FORMATTER
        gl.yformatter = LATITUDE_FORMATTER
        gl.xlabel_style = {'size': 16}
        gl.ylabel_style = {'size': 16}

    if (set_global):
        axs.set_global()
        axs.gridlines()

    axs.set_title(long_name, fontsize = 18, pad = pad)
    axs.tick_params(labelsize = 14)

    cbr = fig.colorbar(im, ax = axs, extend = 'both', orientation = 'horizontal', fraction = 0.05, pad = 0.15)   
    cbr.set_label(units_name, fontsize = 16)
    cbr.ax.tick_params(labelsize = 14)
    cbr.ax.xaxis.get_offset_text().set_fontsize(14)

In [21]:
def create_maps(merge, range_type, sensor, model, sensor_type, model_type, sensor_column, 
                projection, pad, units_name, plot_bbox):

    fig, axs = plt.subplots(1, 3, figsize = (20, 4), subplot_kw = {'projection': projection})
    
    # First plot - CAMS 
    array = merge.model_column
    vmin, vmax = colorbar_range(range_type, merge, array, sensor_column)
    long_name = model.upper() + ' (' + model_type + ')'
    visualize_pcolormesh(
                        fig = fig, axs = axs[0],
                        data_array = array.fillna(-999),
                        longitude = array.longitude,
                        latitude = array.latitude,
                        projection = projection,
                        color_scale = 'coolwarm',
                        pad = pad,
                        long_name = long_name,
                        units_name = units_name,
                        vmin = vmin, 
                        vmax = vmax, 
                        set_global = False,
                        lonmin = plot_bbox[0][0],
                        lonmax = plot_bbox[1][0],
                        latmin = plot_bbox[0][1],
                        latmax = plot_bbox[1][1]
                        )


    # Second plot - TROPOMI, IASI or GOME-2
    array = merge[sensor_column]
    vmin, vmax = colorbar_range(range_type, merge, array, sensor_column)
    long_name = sensor.upper() + ' (' + sensor_type + ')'
    visualize_pcolormesh(
                        fig = fig, axs = axs[1],
                        data_array = array.fillna(-999),
                        longitude = array.longitude,
                        latitude = array.latitude,
                        projection = projection,
                        color_scale = 'coolwarm',
                        pad = pad,
                        long_name = long_name,
                        units_name = units_name,
                        vmin = vmin,  
                        vmax = vmax, 
                        set_global = False,
                        lonmin = plot_bbox[0][0],
                        lonmax = plot_bbox[1][0],
                        latmin = plot_bbox[0][1],
                        latmax = plot_bbox[1][1]
                        )

    # Third plot - Differences
    array = merge.difference
    long_name = 'Differences plot'
    visualize_pcolormesh(
                        fig = fig, axs = axs[2],
                        data_array = array.fillna(-999),
                        longitude = array.longitude,
                        latitude = array.latitude,
                        projection = projection,
                        color_scale = 'coolwarm',
                        pad = pad,
                        long_name = long_name,
                        units_name = units_name,
                        vmin = np.nanmin(array),  
                        vmax = np.nanmax(array), 
                        set_global = False,
                        lonmin = plot_bbox[0][0],
                        lonmax = plot_bbox[1][0],
                        latmin = plot_bbox[0][1],
                        latmax = plot_bbox[1][1]
                        )
    
    return fig

In [22]:
def visualize_model_vs_sensor(model, sensor, component_nom, units, merge_table, plot_dates, plot_bbox, pad, y, 
                              model_type, sensor_type, range_type, sensor_column, distribution_type):

    """ Plot model and sensor datasets in the study area for the selected dates, 
        along with a plot of the differences

        Args:
            model (str): Name of the model
            sensor (str): Name of the sensor
            component_nom (str): Component chemical nomenclature
            units (str): Component units
            merge_table (dataframe): Merge table with datasets column data and their difference
            plot_dates (arr): Plot dates
            plot_bbox (arr): Plot extent
            pad (float): Padding for the subtitles
            y (float): y-position of main title
            model_type (str): Model type:
            -  'Forecast'
            -  'Reanalysis'
            sensor_type (str): Sensor type ('NRT')
            range_type (str): Range type for colorbar:
            -  'Original': Show original values in range
            -  'Positive': Show only positive values in range
            -  'Equal': Show same scale in range
    """
    
    units_name = component_nom + ' (' + units + ')'
    projection = ccrs.PlateCarree()

    if distribution_type == 'aggregated':

        merge = merge_table.to_xarray().mean(dim = 'time')
        latitude = merge.latitude
        longitude = merge.longitude
        merge = merge.assign_coords(latitude = latitude, longitude = longitude)

        fig = create_maps(merge, range_type, sensor, model, sensor_type, model_type, sensor_column, 
                          projection, pad, units_name, plot_bbox)

        fig.suptitle(f'DISTRIBUTION OF {component_nom} (All times)',
                     fontsize = 18, fontweight = 'bold', y = y)

    if distribution_type == 'individual':

        for time in plot_dates:

            merge = merge_table.query('time == @time').to_xarray()
            latitude = merge.sel(time = time).latitude
            longitude = merge.sel(time = time).longitude
            merge = merge.sel(time= time).assign_coords(latitude = latitude, longitude = longitude)

            fig = create_maps(merge, range_type, sensor, model, sensor_type, model_type, sensor_column, 
                              projection, pad, units_name, plot_bbox)

            if sensor == 'tropomi':
                fig.suptitle(f'DISTRIBUTION OF {component_nom} (Est. time: {time})',
                             fontsize = 18, fontweight = 'bold', y = y)
            
            elif sensor == 'iasi' or sensor == 'gome':
                month = np.datetime64(time).astype('datetime64[M]')
                fig.suptitle(f'DISTRIBUTION OF {component_nom} (Month: {month})',
                             fontsize = 18, fontweight = 'bold', y = y)

            plt.show()

In [23]:
def visualize_model_original_vs_calculated(model, component_nom, units, merge_table, model_total_ds, 
                                           plot_dates, plot_bbox, pad, y, model_type, range_type):

    """ Plot model total columns from the original dataset and the calculated one 
        in the study area for the selected dates

        Args:
            model (str): Name of the model
            component_nom (str): Component chemical nomenclature
            units (str): Component units
            merge_table (dataframe): Merge result
            model_total_ds (xarray): CAMS total columns dataset in xarray format
            plot_dates (arr): Plot dates
            plot_bbox (arr): Plot extent
            pad (float): Padding for the subtitles
            y (float): y-position of main title
            model_type (str): Model type:
            -  'Forecast'
            -  'Reanalysis'
            range_type (str): Range type for colorbar:
            -  'Original': Show original values in range
            -  'Positive': Show only positive values in range
            -  'Equal': Show same scale in range
    """

    units_name = component_nom + ' (' + units + ')'
    projection = ccrs.PlateCarree()

    for time in plot_dates:

        fig, axs = plt.subplots(1, 2, figsize = (20, 5), subplot_kw = {'projection': projection})

        merge = merge_table.query('time == @time').to_xarray()
        latitude = merge.sel(time = time).latitude
        longitude = merge.sel(time = time).longitude
        merge = merge.sel(time= time).assign_coords(latitude = latitude, longitude = longitude)

        step = 2

        # First plot - CAMS calculated total columns
        array = merge.model_column
        vmin, vmax = colorbar_range(range_type, merge, array, model_total_ds, step, time)
        long_name = 'CALCULATED TOTAL COLUMNS ' + model.upper() + ' (' + model_type + ')'
        visualize_pcolormesh(
                            fig = fig, axs = axs[0],
                            data_array = array.fillna(-999),
                            longitude = array.longitude,
                            latitude = array.latitude,
                            projection = projection,
                            color_scale = 'coolwarm',
                            pad = pad,
                            long_name = long_name,
                            units_name = units_name,
                            vmin = vmin, 
                            vmax = vmax, 
                            set_global = False,
                            lonmin = plot_bbox[0][0],
                            lonmax = plot_bbox[1][0],
                            latmin = plot_bbox[0][1],
                            latmax = plot_bbox[1][1]
                            )

        # Second plot - CAMS original total columns
        array = model_total_ds.component.isel(step = step).sel(time = time)
        vmin, vmax = colorbar_range(range_type, merge, array, model_total_ds, step, time)
        long_name = 'ORIGINAL TOTAL COLUMNS ' + model.upper() + ' (' + model_type + ')'
        visualize_pcolormesh(
                            fig = fig, axs = axs[1],
                            data_array = array.fillna(-999),
                            longitude = array.longitude,
                            latitude = array.latitude,
                            projection = projection,
                            color_scale = 'coolwarm',
                            pad = pad,
                            long_name = long_name,
                            units_name = units_name,
                            vmin = vmin,
                            vmax = vmax, 
                            set_global = False,
                            lonmin = plot_bbox[0][0],
                            lonmax = plot_bbox[1][0],
                            latmin = plot_bbox[0][1],
                            latmax = plot_bbox[1][1]
                            )

        fig.suptitle(f'DISTRIBUTION OF {component_nom} (Est. time: {time})',
                    fontsize = 18, fontweight = 'bold', y = y)
        plt.show()

In [24]:
def get_google_api():

    """ Get Google API key for reverse geocoding (get country given the coordinates)
        
        Returns:
            environ_keys[0]: Google API key
    """

    # Open txt file with three lines:
    # GOOGLE API KEY (first line), GOOGLE CLIENT ID (second line) and GOOGLE CLIENT SECRET (third line)
    keys_file = open('data/keys.txt', 'r')
    keys = keys_file.readlines()
    environ_keys = [key.rstrip() for key in keys]

    # Set environment variables in your system
    os.environ['GOOGLE_API_KEY'] = environ_keys[0]
    os.environ['GOOGLE_CLIENT'] = environ_keys[1]
    os.environ['GOOGLE_CLIENT_SECRET'] = environ_keys[2]

    return environ_keys[0]

In [25]:
def get_season(day):

    """ Get season given the day

        Args:
            day (datetime): Date
        
        Returns:
            season (str): Season of the year
    """

    Y = 2000

    seasons = [('Winter', (date(Y,  1,  1),  date(Y,  3, 20))),
               ('Spring', (date(Y,  3, 21),  date(Y,  6, 20))),
               ('Summer', (date(Y,  6, 21),  date(Y,  9, 22))),
               ('Autumn', (date(Y,  9, 23),  date(Y, 12, 20))),
               ('Winter', (date(Y, 12, 21),  date(Y, 12, 31)))]
            
    day = day.replace(year = Y)

    season = next(season for season, (start, end) in seasons
             if start <= day <= end)
             
    return season

In [26]:
def linear_regression(X, Y, component_nom):

    """ Fit a linear equation to scatter plot between X and Y and print results

        Args:
            X (array): Input sensor component values
            Y (array): Input model component values
            component_nom (str): Component chemical nomenclature
        
        Returns:
            fit_X (array): X in linear equation fit_Y = A * fit_X + B
            fit_Y (array): Y in linear equation fit_Y = A * fit_X + B
            score (float): Coefficient of determination
            coefficient (float): A in linear equation fit_Y = A * fit_X + B
            intercept (float): B in linear equation fit_Y = A * fit_X + B
    """

    score = 'Unknown'
    coefficient = 'Unknown'
    intercept = 'Unknown'

    reg = LinearRegression().fit(X, Y)
    fit_X = np.linspace(np.nanmin(X), np.nanmax(X), 10)
    fit_Y = fit_X * float(reg.coef_) + reg.intercept_
    
    score = reg.score(X, Y)
    coefficient = reg.coef_[0][0]
    intercept = reg.intercept_[0]

    print(f'Fit equation: {component_nom}_model = {component_nom}_sensor * {float(reg.coef_):.2f} + ({float(reg.intercept_):.2E})')
    print(f'Coefficient of determination (R2): {reg.score(X, Y):.2f}')

    return fit_X, fit_Y, score, coefficient, intercept

In [27]:
def scatter_plot_general_settings(component_nom, axs, units, lim_min, lim_max):

    """ Set common settings for scatter plots

        Args:
            component_nom (str): Component chemical nomenclature
            plt (plot): Scatterplot
            units (str): Component units
            lim_min (float): Minimum value of component in scale
            lim_max (float): Maximum value of component in scale
    """

    # Scatter plot
    axs[0].set_xlabel(f'Sensor {component_nom} ({units})', fontsize = 16)
    axs[0].set_ylabel(f'Model {component_nom} ({units})', fontsize = 16)
    axs[0].tick_params(labelsize = 14)
    axs[0].set_xlim([lim_min, lim_max])
    axs[0].set_ylim([lim_min, lim_max])

    # Histograms
    axs[1].set_xlabel(f'Sensor {component_nom} ({units})', fontsize = 16)
    axs[2].set_xlabel(f'Model {component_nom} ({units})', fontsize = 16)
    for i in range(1, 3):
        axs[i].set_ylabel(f'Count', fontsize = 16)
        axs[i].tick_params(labelsize = 14)
        axs[i].set_xlim([lim_min, lim_max])

In [28]:
def scatter_plot(merge_table, component_nom, units, sensor_column, sensor, plot_dates, y, extent_definition, show_seasons, scatter_plot_type, *args):

    """ Scatter plot between the model and sensor datasets in the study area for the selected dates (bbox or countries)

        Args:
            merge_table (dataframe): Merge result
            component_nom (str): Component chemical nomenclature
            units (str): Component units
            sensor_column (str): Component column name in TROPOMI, IASI or GOME-2 database
            sensor (str): Name of the sensor
            plot_dates (arr): Plot dates
            plot_bbox (arr): Plot extent
            y (float): y-position of main title
            extent_definition (str):
            * 'country': Scatter plots for countries list
            * 'bbox': Scatter plots for bbox coordinates
            scatter_plot_type (str):
            * 'aggregated': Aggregate plots by time, country or season
            * 'individual': Individual plots per time, country or season
            *args: Include 'plot_countries' or 'plot_bbox'
    """

    sns.color_palette('colorblind', 10)

    lim_min = min(np.nanmin(merge_table[sensor_column]), np.nanmin(merge_table['model_column']))
    lim_max = max(np.nanmax(merge_table[sensor_column]), np.nanmax(merge_table['model_column']))

    summary = []

    merge = merge_table

    if show_seasons == False:

        if extent_definition == 'bbox':

            if scatter_plot_type == 'aggregated':

                # Prepare df
                merge = merge.reset_index()
                merge = merge[merge['time'].isin(plot_dates)]

                if not merge.empty:
                    
                    fig, axs = plt.subplots(1, 3, figsize = (20, 5))

                    # Linear regression
                    X = merge[sensor_column].values.reshape(-1, 1) 
                    Y = merge['model_column'].values.reshape(-1, 1) 
                    fit_X, fit_Y, score, coefficient, intercept = linear_regression(X, Y, component_nom)
                    axs[0].plot(fit_X, fit_Y, color = 'black')
                    
                    # Scatter plot and histograms
                    sns.scatterplot(data = merge, x = sensor_column, y = 'model_column', hue = 'time', ax = axs[0])
                    sns.histplot(data = merge, x = sensor_column, kde = True,  ax = axs[1])
                    sns.histplot(data = merge, x = 'model_column', kde = True,  ax = axs[2])

                    scatter_plot_general_settings(component_nom, axs, units, lim_min, lim_max)
                    fig.suptitle(f'{component_nom} (All times)', fontsize = 18, fontweight = 'bold', y = y)

                    # Update summary
                    summary.append({'Period': plot_dates, 'Location': plot_bbox, 
                                    'Score': score, 'Coefficient': coefficient, 
                                    'Intercept': intercept})

            elif scatter_plot_type == 'individual':
                
                for time in plot_dates:
                    
                    # Prepare df
                    merge_time = merge.query('time == @time and longitude >= @plot_bbox[0][0] and longitude <= @plot_bbox[1][0] and latitude >= @plot_bbox[0][1] and latitude <= @plot_bbox[1][1]')
                    
                    if not merge_time.empty:
                        
                        fig, axs = plt.subplots(1, 3, figsize = (20, 5))

                        # Scatter plot and histograms
                        sns.scatterplot(data = merge_time, x = sensor_column, y = 'model_column', ax = axs[0])
                        sns.histplot(data = merge_time, x = sensor_column, kde = True,  ax = axs[1])
                        sns.histplot(data = merge_time, x = 'model_column', kde = True,  ax = axs[2])

                        scatter_plot_general_settings(component_nom, axs, units, lim_min, lim_max)
                        
                        if sensor == 'tropomi':
                            fig.suptitle(f'{component_nom} (Est. time: {time})', fontsize = 18, fontweight = 'bold', y = y)
                            
                        elif sensor == 'iasi' or sensor == 'gome':
                            month = np.datetime64(time).astype('datetime64[M]')
                            fig.suptitle(f'{component_nom} (Month: {month})', fontsize = 18, fontweight = 'bold', y = y)

                        # Linear regression
                        X = merge_time[sensor_column].values.reshape(-1, 1) 
                        Y = merge_time['model_column'].values.reshape(-1, 1) 
                        fit_X, fit_Y, score, coefficient, intercept = linear_regression(X, Y, component_nom)
                        axs[0].plot(fit_X, fit_Y, color = 'black')
                        plt.show()

                        # Update summary
                        summary.append({'Period': time, 'Location': plot_bbox, 
                                        'Score': score, 'Coefficient': coefficient, 
                                        'Intercept': intercept})

        elif extent_definition == 'country':
            
            # Prepare df
            merge = merge.reset_index()
            merge = merge[merge['time'].isin(plot_dates)]

            # Read Google API key for reverse geocoding (get country by coordinates)
            google_api_key = get_google_api()

            # Reverse geocoding
            merge['Country'] = merge.apply(lambda row: geocoder.google([row['latitude'], row['longitude']], 
                                            method='reverse', key = google_api_key).country_long, axis = 1)

            # Find data for the countries in search list
            merge = merge[merge['Country'].isin(plot_countries)]
            available_countries = np.unique(merge['Country'])

            if scatter_plot_type == 'aggregated':

                if not merge.empty:
                    
                    fig, axs = plt.subplots(1, 3, figsize = (20, 5))

                    # Linear regression
                    X = merge[sensor_column].values.reshape(-1, 1) 
                    Y = merge['model_column'].values.reshape(-1, 1) 
                    fit_X, fit_Y, score, coefficient, intercept = linear_regression(X, Y, component_nom)
                    axs[0].plot(fit_X, fit_Y, color = 'black')

                    # Scatter plot and histograms
                    sns.scatterplot(data = merge, x = sensor_column, y = 'model_column', hue = 'Country', ax = axs[0])
                    sns.histplot(data = merge, x = sensor_column, kde = True,  ax = axs[1])
                    sns.histplot(data = merge, x = 'model_column', kde = True,  ax = axs[2])

                    scatter_plot_general_settings(component_nom, axs, units, lim_min, lim_max)
                    fig.suptitle(f'{component_nom} (All countries)', fontsize = 18, fontweight = 'bold', y = y)

                    # Update summary
                    summary.append({'Period': plot_dates, 'Location': available_countries, 
                                    'Score': score, 'Coefficient': coefficient, 
                                    'Intercept': intercept})

            elif scatter_plot_type == 'individual':

                for plot_country in plot_countries:

                    merge_country = merge[merge['Country'] == plot_country]

                    if not merge_country.empty:
                        
                        fig, axs = plt.subplots(1, 3, figsize = (20, 5))

                        # Linear regression
                        X = merge_country[sensor_column].values.reshape(-1, 1) 
                        Y = merge_country['model_column'].values.reshape(-1, 1) 
                        fit_X, fit_Y, score, coefficient, intercept = linear_regression(X, Y, component_nom)
                        axs[0].plot(fit_X, fit_Y, color = 'black')

                        # Update summary
                        summary.append({'Period': plot_dates, 'Location': plot_country, 
                                        'Score': score, 'Coefficient': coefficient, 
                                        'Intercept': intercept})

                        # Scatter plot and histograms
                        sns.scatterplot(data = merge_country, x = sensor_column, y = 'model_column', ax = axs[0])
                        sns.histplot(data = merge_country, x = sensor_column, kde = True,  ax = axs[1])
                        sns.histplot(data = merge_country, x = 'model_column', kde = True,  ax = axs[2])

                        scatter_plot_general_settings(component_nom, axs, units, lim_min, lim_max)
                        fig.suptitle(f'{component_nom} ({plot_country})', fontsize = 18, fontweight = 'bold', y = y)
                        plt.show()

            else:
                print('ERROR: scatter_plot_type is wrongly defined. The options are ''aggregated'' and ''individual''.')
                raise KeyboardInterrupt()

        else:
            print('ERROR: extent_definition is wrongly defined. The options are ''bbox'' and ''country''.')
            raise KeyboardInterrupt()
                
    elif show_seasons == True:
        
        if show_seasons == True and extent_definition == 'country':
            print('ERROR: Set up show_seasons to False in order to show the scatter plots by countries.')
            raise KeyboardInterrupt()

        plot_seasons = ['Winter', 'Spring', 'Summer', 'Autumn']

        # Prepare df
        merge = merge.reset_index()
        merge = merge[merge['time'].isin(plot_dates)]

        # Find data for the seasons in list
        merge['Season'] = merge.apply(lambda row: get_season(row['time']), axis = 1)
        available_seasons = np.unique(merge['Season'])

        if scatter_plot_type == 'aggregated':

            if not merge.empty:
                
                fig, axs = plt.subplots(1, 3, figsize = (20, 5))

                # Linear regression
                X = merge[sensor_column].values.reshape(-1, 1) 
                Y = merge['model_column'].values.reshape(-1, 1) 
                fit_X, fit_Y, score, coefficient, intercept = linear_regression(X, Y, component_nom)
                axs[0].plot(fit_X, fit_Y, color = 'black')

                # Scatter plot and histograms
                sns.scatterplot(data = merge, x = sensor_column, y = 'model_column', hue = 'Season', ax = axs[0])
                sns.histplot(data = merge, x = sensor_column, kde = True, ax = axs[1])
                sns.histplot(data = merge, x = 'model_column', kde = True, ax = axs[2])

                scatter_plot_general_settings(component_nom, axs, units, lim_min, lim_max)
                fig.suptitle(f'{component_nom} (All seasons)', fontsize = 18, fontweight = 'bold', y = y)
                plt.show()

                # Update summary
                summary.append({'Period': available_seasons, 'Location': plot_bbox, 
                                'Score': score, 'Coefficient': coefficient, 
                                'Intercept': intercept})

        elif scatter_plot_type == 'individual':

            for plot_season in plot_seasons:
                
                # Prepare df
                merge_season = merge[merge['Season'] == plot_season]
                
                if not merge_season.empty:
                    
                    fig, axs = plt.subplots(1, 3, figsize = (20, 5))

                    # Linear regression
                    X = merge_season[sensor_column].values.reshape(-1, 1) 
                    Y = merge_season['model_column'].values.reshape(-1, 1) 
                    fit_X, fit_Y, score, coefficient, intercept = linear_regression(X, Y, component_nom)
                    axs[0].plot(fit_X, fit_Y, color = 'black')

                    # Update summary
                    summary.append({'Period': plot_season, 'Location':  plot_bbox, 
                                    'Score': score, 'Coefficient': coefficient, 
                                    'Intercept': intercept})

                    # Scatter plot and histograms
                    sns.scatterplot(data = merge_season, x = sensor_column, y = 'model_column', ax = axs[0])
                    sns.histplot(data = merge_season, x = sensor_column, kde = True, ax = axs[1])
                    sns.histplot(data = merge_season, x = 'model_column', kde = True, ax = axs[2])

                    fig.suptitle(f'{component_nom} ({plot_season})', fontsize = 18, fontweight = 'bold', y = y)
                    scatter_plot_general_settings(component_nom, axs, units, lim_min, lim_max)
                    plt.show()

        else:
            print('ERROR: scatter_plot_type is wrongly defined. The options are ''aggregated'' and ''individual''.')
            raise KeyboardInterrupt()

    else:
        print('ERROR: show_seasons is wrongly defined. The options are True and False.')
        raise KeyboardInterrupt()
    
    summary = pd.DataFrame(summary)

    return summary

In [29]:
def retrieve_coords(merge_table, coords_search, component_nom, sensor_column, sensor, model, plot_dates, units):

    """ Get component data for the closest coordinates to the list of search coordinates and plot them along time

        Args:
            merge_table (dataframe): Merge result
            coords_search (list): List of search coordinates
            component_nom (str): Component chemical nomenclature
            sensor_column (str): Component column name in TROPOMI, IASI or GOME-2 database
            sensor (str): Name of the sensor
            model (str): Name of the model
            plot_dates (arr): Plot dates
            units (str): Component units

        Returns:
            retrieval_table_all (dataframe): Dataframe with results from search
    """
    
    retrieval_table_all = pd.DataFrame()
    for i in range(0, len(coords_search)):

        for time in plot_dates:

            # List of available points per time
            retrieval_table = merge_table.query('time == @time').reset_index()
            available_points = list([(x, y) for x, y in zip(retrieval_table['latitude'], retrieval_table['longitude'])])
            
            # Get closest pair to coordinates in search list
            lat_found = closest_point(coords_search[i], available_points)[0]
            lon_found = closest_point(coords_search[i], available_points)[1]
            retrieval_table = merge_table.query('latitude == @lat_found and longitude == @lon_found and time == @time')

            retrieval_table = retrieval_table.reset_index()
            retrieval_table['lat_search'] = coords_search[i][0]
            retrieval_table['lon_search'] = coords_search[i][1]
        
            # Append retrieval table to previous coordinates
            retrieval_table_all = retrieval_table_all.append(retrieval_table)
        
        table_length = len(retrieval_table_all[(retrieval_table_all['latitude'] == lat_found) & 
                                            (retrieval_table_all['longitude'] == lon_found) &
                                            (retrieval_table_all['lat_search'] == coords_search[i][0]) &
                                            (retrieval_table_all['lon_search'] == coords_search[i][1])])

        # Plot variations in time
        if table_length > 1:
        
            fig, ax = plt.subplots(figsize = (30, 5))

            retrieval_table_time = retrieval_table_all[(retrieval_table_all['latitude'] == lat_found) & 
                                                       (retrieval_table_all['longitude'] == lon_found)]
            plt1 = ax.plot(retrieval_table_time['time'], retrieval_table_time[sensor_column], color = 'red', label = sensor.upper())
            plt2 = ax.plot(retrieval_table_time['time'], retrieval_table_time['model_column'], color = 'black', label = model.upper())

            ax.legend(loc='center left', bbox_to_anchor = (1, 0.5), prop = {'size': 25})

            if sensor == 'tropomi':
                ax.set_xlabel('Estimated time', fontsize = 25)
                
            elif sensor == 'iasi' or sensor == 'gome':
                ax.set_xlabel('Month', fontsize = 25)

            ax.tick_params(labelsize = 22)
            ax.set_ylabel(f'{component_nom} ({units})', fontsize = 25)
            ax.set_title(f'{component_nom} at latitude {lat_found} and longitude {lon_found}', 
                        fontsize = 25, fontweight = 'bold', y = 1.05)
        
    retrieval_table_all = retrieval_table_all.set_index(['lat_search', 
                                                        'lon_search', 
                                                        'latitude', 
                                                        'longitude', 
                                                        'time'])

    return retrieval_table_all