# GOME-2 functions

In [None]:
def GOME_L3_version(component_nom, year, month):

    """ Get version of GOME-2 dataset for each component nomenclature

         Args:
            component_nom (str): Component chemical nomenclature
            year (str): Year of dataset
            month (str): Month of dataset

        Returns:
            version (str): GOME-2 dataset version
    """
    
    year = int(year)
    month = int(month)
    
    if component_nom == 'NO2':
        version = 'v1'

    return version

In [None]:
def GOME_L3_download(component_nom, date, satellite):

    """ Download L3 GOME-2 datasets

         Args:
            component_nom (str): Component chemical nomenclature
            date (str): Query year-month
            satellites (list): List with A, B and/or C referring to METOP series

        Returns:
            product_name (str): Product name of GOME-2 product
    """

    output_path = 'data/gome/' + component_nom + '/monthly/' + satellite
    os.makedirs(output_path, exist_ok = True) 

    year = date.split('-')[0]
    month = date.split('-')[1]
    version = GOME_L3_version(component_nom, year, month)

    product_name = ''.join(['GOME_', component_nom, '_Global_', year, month, '_METOP' + satellite + '_DLR_', version, '.nc'])
    path = 'ftp://acsaf.eoc.dlr.de/gome2' + satellite.lower() + '/level3/' + component_nom + '/' + year + '/' + product_name
    
    url = output_path + '/' + product_name
    subprocess.run(['wget', '-nc', path, '-O', url])

    return product_name

In [None]:
def GOME_L3_read(dates, component_nom, sensor_column, satellites):

    """ Read L3 GOME-2 datasets as xarray dataset object and assign time

         Args:
            dates (list): Query year-month
            component_nom (str): Component chemical nomenclature
            sensor_column (str): Name of sensor column in downloaded dataset
            satellites (list): List with A, B and/or C referring to METOP series

        Returns:
            sensor_ds (xarray): GOME-2 dataset in xarray format
    """

    sensor_ds_all = []

    for date in dates:
        
        year = date.split('-')[0]
        month = date.split('-')[1]

        sensor_ds_ABC = []

        # Combine data from METOP-A, METOP-B and METOP-C
        for satellite in satellites:
        
            path = 'data/gome/' + component_nom + '/monthly/' + satellite
            product_names = [file for file in os.listdir(path)]
            
            for product_name in product_names:

                sensor_ds_sat = xr.open_dataset(path + '/' + product_name)
                sensor_ds_int_sat = xr.open_dataset(path + '/' + product_name, group = 'PRODUCT')
                sensor_ds_sat[sensor_column] = sensor_ds_int_sat[sensor_column]

                sensor_ds_ABC.append(sensor_ds_sat)

        sensor_ds_ABC = xr.concat(sensor_ds_ABC, dim = 'latitude')
        time_str = dt.datetime(int(year), int(month), 1)
        sensor_ds_ABC = sensor_ds_ABC.assign_coords({'time': time_str}).expand_dims(dim = ['time'])
        sensor_ds_all.append(sensor_ds_ABC)

    sensor_ds = xr.merge(sensor_ds_all)
        
    return sensor_ds

In [None]:
def GOME_L2_download(component_nom, date, satellite):

    """ Download L2 GOME-2 datasets

         Args:
            component_nom (str): Component chemical nomenclature
            date (str): Query year-month
            satellites (list): List with A, B and/or C referring to METOP series
    """

    output_path = 'data/gome/' + component_nom + '/' + date + '/' + satellite
    os.makedirs(output_path, exist_ok = True) 

    # Get year, month and day from date
    year = date.split('-')[0]
    month = date.split('-')[1]
    day = date.split('-')[2]

    # Save index.html with available offline products through FTP for specific date
    date_path = 'ftp://acsaf.eoc.dlr.de/gome2' + satellite.lower() + '/offline/' + year + '/' + month + '/' + day + '/'
    date_url = 'data/gome/' + component_nom + '/' + 'index.html'
    subprocess.run(['wget', '-O', date_url, '-i', date_path])

    # Read index.html and get content within pre tabs
    html_text = open('data/gome/' + component_nom + '/' + 'index.html', 'r')
    items_int = bs4.BeautifulSoup(html_text, 'lxml').pre.get_text().splitlines()[1:-1]
    items = [item_int.split('File        ', 1)[1].split('  (', 1)[0] for item_int in items_int]

    # Download all files for date and satellite
    for product_name in items:
        
        url = output_path + '/' + product_name
        path = 'ftp://acsaf.eoc.dlr.de/gome2' + satellite.lower() + '/offline/' + year + '/' + month + '/' + day + '/' + product_name
        subprocess.run(['wget', '-nc', path, '-O', url])

In [None]:
def GOME_L2_read(dates, component_nom, sensor_column, satellites):

    """ Read L2 GOME-2 datasets as xarray dataset object and assign time

         Args:
            dates (list): Query year-month
            component_nom (str): Component chemical nomenclature
            sensor_column (str): Name of sensor column in downloaded dataset
            satellites (list): List with A, B and/or C referring to METOP series
            
        Returns:
            sensor_ds (xarray): GOME-2 dataset in xarray format
    """

    sensor_ds_all = []

    # Concatenate all the products for different dates
    for date in dates:

        year = date.split('-')[0]
        month = date.split('-')[1]
        day = date.split('-')[2]
        
        sensor_ds_ABC = []

        # Concatenate all the products for METOP-A, B and C
        for satellite in satellites:

            sensor_ds_sat_daily = []

            path = 'data/gome/' + component_nom + '/' + date + '/' + satellite
            product_names = [file for file in os.listdir(path)]
            
            # Concatenate all the products for different hours
            for product_name in product_names:

                f = h5py.File('data/gome/' + component_nom + '/' + date + '/' + satellite + '/' + product_name, 'r')
                f['TOTAL_COLUMNS/' + component_nom]

                latitude = f['GEOLOCATION/LatitudeCentre']
                longitude = f['GEOLOCATION/LongitudeCentre']

                sensor_ds_sat_time = xr.DataArray(
                                                f['TOTAL_COLUMNS/' + component_nom],
                                                dims = ('ground_pixel'),
                                                coords = {
                                                    'latitude': ('ground_pixel', latitude[:]),
                                                    'longitude': ('ground_pixel', longitude[:])
                                                },
                                                name = component_nom
                )

                sensor_ds_sat_daily.append(sensor_ds_sat_time)
            
            sensor_ds_sat_daily = xr.concat(sensor_ds_sat_daily, dim = 'ground_pixel')
        
        sensor_ds_ABC = xr.concat(sensor_ds_sat_daily, dim = 'ground_pixel')

        sensor_ds_ABC = sensor_ds_ABC.assign_coords(longitude = (((sensor_ds_ABC.longitude + 180) % 360) - 180))

        y = sensor_ds_ABC.latitude.data
        x = sensor_ds_ABC.longitude.data
        z = sensor_ds_ABC.data

        zi, yi, xi = np.histogram2d(y, x, bins = (180, 360), weights = z, normed = False)
        counts, _, _ = np.histogram2d(y, x, bins = (180, 360))
        zi = zi / counts
        
        sensor_ds_ABC_gridded = xr.DataArray(
                                            zi,
                                            dims = ['latitude', 'longitude'],
                                            coords = {
                                                'latitude': (['latitude'], yi[:-1]),
                                                'longitude': (['longitude'], xi[:-1])
                                            },
                                            name = sensor_column
        )
        
        time_str = dt.datetime(int(year), int(month), int(day))
        sensor_ds_ABC_gridded = sensor_ds_ABC_gridded.assign_coords({'time': time_str}).expand_dims(dim = ['time'])
        sensor_ds_ABC_gridded = sensor_ds_ABC_gridded.to_dataset()
        sensor_ds_all.append(sensor_ds_ABC_gridded)

    sensor_ds = xr.concat(sensor_ds_all, dim = 'time')
   
    return sensor_ds