# General functions

In [None]:
def components_table(component_nom):

    """ Create table with information about the components (molecular weight, full name in different datasets)

        Args:
            component_nom (str): Component chemical nomenclature

        Returns:
            component (str): Component name
            component_mol_weight (str): Component molecular weight
            component_tropomi_product (str): Component product name in 5phub
            component_tropomi_column (str): Component column name in TROPOMI dataset
    """

    component_nom_col = ['NO2', 'CO', 'O3', 'SO2', 'CH4']

    if component_nom in component_nom_col:

        component_col = ['nitrogen_dioxide', 'carbon_monoxide', 'ozone', 'sulfur_dioxide', 'methane']
        component_mol_weight_col = [46.005, 28.01, 48, 64.066, 16.04]
        component_tropomi_product_col = ['L2__NO2___', 'L2__CO____', 'L2__O3____', 'L2__SO2___', 'L2__CH4___']
        component_tropomi_column_col = ['nitrogendioxide_tropospheric_column', 
                                        'carbonmonoxide_total_column', 
                                        'ozone_total_vertical_column', 
                                        'sulfurdioxide_total_vertical_column',
                                        'methane_tropospheric_column']
        
        rows = {'Nomenclature': component_nom_col, 
                'Weight': component_mol_weight_col,
                'Component': component_col, 
                'TROPOMI_product': component_tropomi_product_col,
                'TROPOMI_column': component_tropomi_column_col}

        components_table = pd.DataFrame(rows)
        print(components_table.loc[components_table['Nomenclature'] == component_nom])

        component = components_table['Component'].loc[components_table['Nomenclature'] == component_nom].iloc[0]
        component_mol_weight = components_table['Weight'].loc[components_table['Nomenclature'] == component_nom].iloc[0]
        component_tropomi_product = components_table['TROPOMI_product'].loc[components_table['Nomenclature'] == component_nom].iloc[0]
        component_tropomi_column = components_table['TROPOMI_column'].loc[components_table['Nomenclature'] == component_nom].iloc[0]
    
    else:
        print('Error: The selected component is not yet ready for comparison. The code will be interrupted.')
        raise KeyboardInterrupt

    return component, component_mol_weight, component_tropomi_product, component_tropomi_column

In [None]:
def generate_folders(model, sensor, component_nom):

    """ Generate folders to download the datasets if they do not exist 

        Args:
            model (str): Name of the model
            sensor (str): Name of the sensor
            component_nom (str): Component chemical nomenclature
    """

    model_path = os.path.join(os.path.abspath(''), 'data/' + model + '/' + component_nom)
    sensor_path = os.path.join(os.path.abspath(''), 'data/' + sensor + '/' + component_nom)

    paths = [model_path, sensor_path]

    for path in paths:
        os.makedirs(path, exist_ok = True) 

In [None]:
def nearest_neighbour(array, value):

    """ Find index of the closest value in an array (it can be used to locate the nearest neighbours in space and time)

        Args:
            array (arr): Array to find the nearest neighbour
            value (float or int): Search value
    """

    index = np.abs([x - value for x in array]).argmin(0)
    
    return index

In [None]:
def visualize_pcolormesh(fig, axs, data_array, longitude, latitude, projection, color_scale, 
                         long_name, units, vmin, vmax, set_global = True, lonmin = -180, lonmax = 180, latmin = -90, latmax = 90):
    
    """ Visualize two datasets side by side

        Args:
            fig: Figure
            axs: Axes of figure
            data_array (xarray): Variable values to plot - It must be 2-dimensional
            longitude: Longitudes within data_array
            latitude: Latitudes within data_array
            projection: Geographical projection
            color_scale: Color scale for the color bar
            long_name: Plot name
            units: Units of variable
            vmin, vmax: Limits of color bar
            set_global: Extent setting
            lonmin, lonmax, latmin, latmax (float or int): Limits of longitude and latitude values
    """

    palette = copy(plt.get_cmap(color_scale))
    palette.set_under(alpha = 0)
    
    im = axs.pcolormesh(
                        longitude, latitude, data_array, 
                        cmap = palette, 
                        transform = projection,
                        vmin = vmin,
                        vmax = vmax,
                        norm = colors.Normalize(vmin = 0, vmax = vmax),
                        shading = 'auto'
                        )
                        
    axs.add_feature(cfeature.BORDERS, edgecolor = 'black', linewidth = 1)
    axs.add_feature(cfeature.COASTLINE, edgecolor = 'black', linewidth = 1)

    if (projection == ccrs.PlateCarree()):
        axs.set_extent([lonmin, lonmax, latmin, latmax], projection)
        gl = axs.gridlines(draw_labels = True, linestyle = '--')
        gl.top_labels = False
        gl.right_labels = False
        gl.xformatter = LONGITUDE_FORMATTER
        gl.yformatter = LATITUDE_FORMATTER
        gl.xlabel_style = {'size': 16}
        gl.ylabel_style = {'size': 16}

    if(set_global):
        axs.set_global()
        axs.gridlines()

    axs.set_title(long_name, fontsize = 18, pad = 20)
    axs.tick_params(labelsize = 14)

    cbr = fig.colorbar(im, ax = axs, extend = 'both', orientation = 'horizontal', fraction = 0.05, pad = 0.15)   
    cbr.set_label(units, fontsize = 16)
    cbr.ax.tick_params(labelsize = 14)
    cbr.ax.xaxis.get_offset_text().set_fontsize(14)