# GOME-2 functions

In [None]:
def GOME_L3_version_AC_SAF(component_nom, year, month):

    """ Get version of L3 GOME-2 dataset for each component nomenclature (AC SAF)

         Args:
            component_nom (str): Component chemical nomenclature
            year (str): Year of dataset
            month (str): Month of dataset

        Returns:
            version (str): GOME-2 dataset version
    """
    
    year = int(year)
    month = int(month)
    
    if component_nom == 'NO2':
        version = 'v1'

    return version

In [None]:
def GOME_L3_download_AC_SAF(component_nom, date, satellite):

    """ Download L3 GOME-2 datasets (AC SAF)

        Args:
            component_nom (str): Component chemical nomenclature
            date (str): Query date
            satellite (str): A, B and/or C referring to METOP series
    """
    
    year = date.split('-')[0]
    month = date.split('-')[1]
    version = GOME_L3_version_AC_SAF(component_nom, year, month)

    GOME_product_path = os.path.join('/', '/'.join(
                        os.getcwd().split('/')[1:3]), 'adc-toolbox', 
                        os.path.relpath('data/gome/' + component_nom + '/L3/' + year + '-' + month))
    os.makedirs(GOME_product_path, exist_ok = True) 

    product_name = ''.join(['GOME_', component_nom, '_Global_', year, month, '_METOP' + satellite + '_DLR_', version, '.nc'])
    path = 'ftp://acsaf.eoc.dlr.de/gome2' + satellite.lower() + '/level3/' + component_nom + '/' + year + '/' + product_name
   
    file_name = GOME_product_path + '/' + product_name
    subprocess.run(['wget', '-q', '-nc', path, '-O', file_name])

    if os.stat(file_name).st_size == 0:  
        os.remove(file_name) 
        print(product_name, 'is not available.')
        
    else:
        print(product_name, 'was downloaded.')

In [None]:
def GOME_L3_read_AC_SAF(component_nom, sensor_column, dates, lat_res = 0.25, lon_res = 0.25):

    """ Read L3 GOME-2 datasets as xarray dataset object and assign time (AC SAF)

         Args:
            component_nom (str): Component chemical nomenclature
            sensor_column (str): Name of sensor column in downloaded dataset
            dates (list): Available dates
            lat_res (float): Spatial resolution for latitude
            lon_res (float): Spatial resolution for longitude

        Returns:
            sensor_ds (xarray): GOME-2 dataset in xarray format
    """

    if lat_res < 0.25 or lon_res < 0.25:
        print('To show the original data, the resolution must equal to 1x1º.')
        print('To show aggregated data, the resolution must be superior to 1x1º.')
        raise KeyboardInterrupt()

    sensor_ds_all = []

    for date in dates:

        year = date.split('-')[0]
        month = date.split('-')[1]

        sensor_ds_ABC = []

        # Combine data from METOP-A, METOP-B and METOP-C
        GOME_product_path = os.path.join('/', '/'.join(
                            os.getcwd().split('/')[1:3]), 'adc-toolbox', 
                            os.path.relpath('data/gome/' + component_nom + '/L3/' + year + '-' + month))
        product_names = [file for file in os.listdir(GOME_product_path)]
        
        for product_name in product_names:
            
            sensor_ds_sat = xr.open_dataset(GOME_product_path + '/' + product_name)
            sensor_ds_int_sat = xr.open_dataset(GOME_product_path + '/' + product_name, group = 'PRODUCT')
            sensor_ds_sat[sensor_column] = sensor_ds_int_sat[sensor_column]
            unit = sensor_ds_sat[sensor_column].units
            sensor_ds_ABC.append(sensor_ds_sat)

        sensor_ds_ABC = xr.concat(sensor_ds_ABC, dim = 'latitude')

        # Regrid onto a custom defined regular grid
        sensor_ds_ABC_gridded = binning(sensor_ds_ABC, lat_res, lon_res) 

        # Add time
        time_str = dt.datetime(int(year), int(month), 1)
        sensor_ds_ABC_gridded = sensor_ds_ABC_gridded.assign_coords({'time': time_str}).expand_dims(dim = ['time'])

        # Add units as attribute
        sensor_ds_ABC_gridded.attrs['units'] = unit

        sensor_ds_all.append(sensor_ds_ABC_gridded)
        
    sensor_ds = xr.concat(sensor_ds_all, dim = 'time')
    sensor_ds = sensor_ds.rename({sensor_column: 'sensor_column'})
    
    return sensor_ds

In [None]:
def GOME_L3_download_TEMIS(component_nom, date, satellite):

    """ Download L3 GOME-2 datasets (TEMIS platform)

         Args:
            component_nom (str): Component chemical nomenclature
            date (str): Query date
            satellite (str): A, B and/or C referring to METOP series
    """

    year = date.split('-')[0]
    month = date.split('-')[1]

    if 'C' in satellites:
        satellites.remove('C')

    for satellite in satellites:

        GOME_product_path = os.path.join('/', '/'.join(
                                os.getcwd().split('/')[1:3]), 'adc-toolbox', 
                                os.path.relpath('data/gome/' + component_nom + '/L3/' + year + '-' + month))
        os.makedirs(GOME_product_path, exist_ok = True) 
        product_name = 'GOME_L3_NO2_COLUMN_METOP_' + satellite + '_' + year + month + '.asc.zip'
        file_name = GOME_product_path + '/' + product_name

        if satellite == 'A':
            path = ('https://d1qb6yzwaaq4he.cloudfront.net/airpollution/no2col/gome2_v2/' + year +
                    '/' + month + '/' + component_nom.lower() + '_' + year + month + '.asc.zip')
        elif satellite == 'B':
            path = ('https://d1qb6yzwaaq4he.cloudfront.net/airpollution/no2col/gome2' + satellite.lower() + '/' + year +
                    '/' + month + '/' + component_nom.lower() + '_' + year + month + '.asc.zip')

        subprocess.run(['wget', '-q', '-nc', path, '-O', GOME_product_path + '/' + product_name])

        if os.stat(file_name).st_size == 0:  
            os.remove(file_name) 
            print(product_name, 'is not available.')

        else:
            print(product_name, 'was downloaded.')

In [None]:
def GOME_L3_read_TEMIS(component_nom, dates, lat_res = 0.25, lon_res = 0.25):

    """ Read L3 GOME-2 datasets as xarray dataset object and assign time (TEMIS)

         Args:
            component_nom (str): Component chemical nomenclature
            dates (list): Available dates
            lat_res (float): Spatial resolution for latitude
            lon_res (float): Spatial resolution for longitude

        Returns:
            sensor_ds (xarray): GOME-2 dataset in xarray format
    """

    sensor_ds = []

    for date in dates:

        year = date.split('-')[0]
        month = date.split('-')[1]
        time = dt.datetime(int(year), int(month), 1)

        GOME_product_path = os.path.join('/', '/'.join(
                            os.getcwd().split('/')[1:3]), 'adc-toolbox', 
                            os.path.relpath('data/gome/' + component_nom + '/L3/' + year + '-' + month))
        product_names = [file for file in os.listdir(GOME_product_path)]

        sensor_df_AB = []

        for product_name in product_names:

            sensor_df_satellite = []

            # Unzip and delete zip files
            if product_name.endswith('.zip'):

                zf = ZipFile(GOME_product_path + '/' + product_name, 'r')
                zf.extractall(GOME_product_path)
                zf.close()
                os.rename(os.path.join(GOME_product_path, 'no2_' + year + month + '.asc'), 
                        os.path.join(GOME_product_path, product_name.replace('.zip', ''))) 
                os.remove(os.path.join(GOME_product_path, product_name))
                
            # Reconstruct file
            with open(GOME_product_path + '/' + product_name.replace('.zip', ''), 'r') as f:
                
                i = 0    
                lon = -179.875

                for line in f:   

                    if i > 3:

                        if 'lat' in line:
                            lat = float(line.replace('lat=  ', ''))
                            lon = -179.875

                        else: 

                            line = line.replace('\n', '')

                            for value in [line[i:i+4] for i in range(0, len(line), 4)]:
                                
                                if value == '-999':
                                    value = np.nan
                                    
                                else:
                                    value = float(value.replace(' ', ''))
                                    value = value*10**13
                                
                                sensor_df_satellite.append({'time': time, 
                                                            'latitude': lat, 
                                                            'longitude': lon, 
                                                            'sensor_column': value})
                                lon += 0.25
                                
                    i += 1
                    
            sensor_df_satellite = pd.DataFrame(sensor_df_satellite)
            sensor_df_satellite = sensor_df_satellite.set_index(['time', 'latitude', 'longitude'])
            sensor_df_satellite = sensor_df_satellite[~sensor_df_satellite.index.duplicated()]

            sensor_df_AB.append(sensor_df_satellite)
        
        if len(sensor_df_AB) == 2:
            sensor_df_AB = pd.merge(sensor_df_AB[0], sensor_df_AB[1], left_index = True, right_index = True, how = 'outer')

        else:
            sensor_df_AB = pd.concat([sensor_df_AB[0]])

        # Mean of Metop-A and Metop-B data if both exist for a specific coordinate
        sensor_df_AB = sensor_df_AB.mean(axis = 1).to_frame()
        sensor_df_AB = sensor_df_AB.rename(columns = {0: 'sensor_column'}).sort_index()

        # Regrid onto a custom defined regular grid
        sensor_ds_time = sensor_df_AB.to_xarray()
        sensor_ds_time_gridded = binning(sensor_ds_time, lat_res, lon_res) 
        
        sensor_ds.append(sensor_ds_time_gridded)

    sensor_ds = xr.concat(sensor_ds, dim = 'time')
    sensor_ds['sensor_column'] = sensor_ds['sensor_column'].assign_attrs({'units': 'molec cm-2'})

    return sensor_ds

In [None]:
def GOME_L2_download(component_nom, date, satellite):

    """ Download L2 GOME-2 datasets

         Args:
            component_nom (str): Component chemical nomenclature
            date (str): Query date
            satellite (str): A, B and/or C referring to METOP series
    """
    
    GOME_product_path = os.path.join('/', '/'.join(
                        os.getcwd().split('/')[1:3]), 'adc-toolbox', 
                        os.path.relpath('data/gome/' + component_nom + '/L2/' + date + '/' + satellite))
    os.makedirs(GOME_product_path, exist_ok = True) 

    # Get year, month and day from date
    year = date.split('-')[0]
    month = date.split('-')[1]
    day = date.split('-')[2]

    # Save index.html with available offline products through FTP for specific date
    date_path = 'ftp://acsaf.eoc.dlr.de/gome2' + satellite.lower() + '/offline/' + year + '/' + month + '/' + day + '/'
    index_file_path = os.path.join('/', '/'.join(
                      os.getcwd().split('/')[1:3]), 'adc-toolbox', 
                      os.path.relpath('data/gome/' + component_nom + '/' + 'index.html'))
    subprocess.run(['wget', '-q', '-O', index_file_path, '-i', date_path])

    # Download all files for date and satellite
    if os.stat(index_file_path).st_size != 0:  
            
        # Read index.html and get content within pre tabs
        html_text = open(index_file_path, 'r')
        items_int = bs4.BeautifulSoup(html_text, 'lxml').pre.get_text().splitlines()[1:]
        GOME_product_names = [item_int.split('File        ', 1)[1].split('  (', 1)[0] for item_int in items_int]

        for product_name in GOME_product_names:
            
            file_name = GOME_product_path + '/' + product_name
            path = 'ftp://acsaf.eoc.dlr.de/gome2' + satellite.lower() + '/offline/' + year + '/' + month + '/' + day + '/' + product_name
            subprocess.run(['wget', '-q', '-nc', path, '-O', file_name])

            if os.stat(file_name).st_size == 0:  
                os.remove(file_name) 
                print(product_name, 'is not available.')
                
            else:
                print(product_name, 'was downloaded.')

    else:
        print(f'The datasets for {date} and METOP-{satellite} are not available.')

In [None]:
def GOME_L2_read(component_nom, dates, lat_res = 0.25, lon_res = 0.25):

    """ Read L2 GOME-2 datasets as xarray dataset object and assign time

         Args:
            component_nom (str): Component chemical nomenclature
            dates (list): Available dates
            lat_res (float): Spatial resolution for latitude
            lon_res (float): Spatial resolution for longitude
            
        Returns:
            sensor_ds (xarray): GOME-2 dataset in xarray format
    """

    if lat_res < 0.25 or lon_res < 0.25:
        print('To show the original data, the resolution must equal to 1x1º.')
        print('To show aggregated data, the resolution must be superior to 1x1º.')
        raise KeyboardInterrupt()

    sensor_ds_all = []

    # Concatenate all the products for different dates
    for date in dates:

        year = date.split('-')[0]
        month = date.split('-')[1]
        day = date.split('-')[2]
        
        sensor_ds_ABC = []

        GOME_product_path = os.path.join('/', '/'.join(
                            os.getcwd().split('/')[1:3]), 'adc-toolbox', 
                            os.path.relpath('data/gome/' + component_nom + '/L2/' + date))
         
        # Concatenate all the products for METOP-A, B and C
        for satellite in os.listdir(GOME_product_path):

            sensor_ds_sat_daily = []
            GOME_product_names = [file for file in os.listdir(GOME_product_path + '/' + satellite)]
            
            # Concatenate all the products for different delta times
            for product_name in GOME_product_names:

                f = h5py.File(GOME_product_path + '/' + satellite + '/' + product_name, 'r')
                f['TOTAL_COLUMNS/' + component_nom]

                latitude = f['GEOLOCATION/LatitudeCentre']
                longitude = f['GEOLOCATION/LongitudeCentre']

                # Get component for one delta time 
                sensor_ds_sat_time = xr.DataArray(
                                                  f['TOTAL_COLUMNS/' + component_nom],
                                                  dims = ('ground_pixel'),
                                                  coords = {
                                                            'latitude': ('ground_pixel', latitude[:]),
                                                            'longitude': ('ground_pixel', longitude[:])
                                                  },
                                                  name = component_nom
                )
                
                # Get delta time from product name and add it as a variable
                delta_time_str_int = product_name.split(date.replace('-', ''), 1)[1].split('_', 1)[0]
                delta_time_str = [delta_time_str_int[i:i+2] for i in range(0, len(delta_time_str_int), 2)]      
                delta_time = dt.datetime(int(year), int(month), int(day), 
                                         int(delta_time_str[0]), int(delta_time_str[1]), 
                                         int(delta_time_str[2]))

                sensor_ds_delta_time = xr.DataArray(np.repeat(delta_time, len(sensor_ds_sat_time.ground_pixel)),
                                                    dims = ('ground_pixel'),
                                                    coords = {
                                                              'latitude': ('ground_pixel', latitude[:]),
                                                              'longitude': ('ground_pixel', longitude[:])
                                                    },
                                                    name = 'delta_time'
                )

                # Merge both variables (component and delta time)
                sensor_ds_sat_time = xr.merge([sensor_ds_sat_time, sensor_ds_delta_time])
                sensor_ds_sat_daily.append(sensor_ds_sat_time)
            
            sensor_ds_sat_daily = xr.concat(sensor_ds_sat_daily, dim = 'ground_pixel')
            
        sensor_ds_ABC = xr.concat([sensor_ds_sat_daily], dim = 'ground_pixel')

        sensor_ds_ABC = sensor_ds_ABC.assign_coords(longitude = (((sensor_ds_ABC.longitude + 180) % 360) - 180))

        y = sensor_ds_ABC.latitude.data
        x = sensor_ds_ABC.longitude.data
        z = sensor_ds_ABC[component_nom].data

        zi, yi, xi = np.histogram2d(y, x, bins = (180, 360), weights = z, normed = False)
        counts, _, _ = np.histogram2d(y, x, bins = (180, 360))
        zi = zi / counts
        
        sensor_ds_ABC_gridded = xr.DataArray(
                                             zi,
                                             dims = ['latitude', 'longitude'],
                                             coords = {
                                                       'latitude': (['latitude'], yi[:-1]),
                                                       'longitude': (['longitude'], xi[:-1])
                                             },
                                             name = 'sensor_column'
        )

        # Regrid onto a custom defined regular grid
        sensor_ds_ABC_gridded = binning(sensor_ds_ABC_gridded, lat_res, lon_res) 
        
        # Get datafame with delta time (non-gridded)
        delta_time_df = pd.DataFrame()
        delta_time_df['ground_pixel'] = sensor_ds_ABC.ground_pixel
        delta_time_df['latitude'] = delta_time_df.apply(lambda row: sensor_ds_ABC.sel(ground_pixel = row['ground_pixel']).latitude.values, axis = 1)
        delta_time_df['longitude'] = delta_time_df.apply(lambda row: sensor_ds_ABC.sel(ground_pixel = row['ground_pixel']).longitude.values, axis = 1)
        delta_time_df['delta_time'] = delta_time_df.apply(lambda row: sensor_ds_ABC.sel(ground_pixel = row['ground_pixel']).delta_time.values, axis = 1)
        
        # Round latitude and longitude to 0 decimals
        delta_time_df_rounded = delta_time_df
        delta_time_df_rounded['latitude_rounded'] = [round(value.item(0)) for value in delta_time_df_rounded['latitude'].values]
        delta_time_df_rounded['longitude_rounded'] = [round(value.item(0)) for value in delta_time_df_rounded['longitude'].values]
        delta_time_df_rounded

        # Get datafame without delta time (gridded)
        delta_time_df_gridded = sensor_ds_ABC_gridded.to_dataframe()
        delta_time_df_gridded = delta_time_df_gridded.reset_index()

        # Round latitude and longitude to 0 decimals
        delta_time_df_gridded_rounded = delta_time_df_gridded
        delta_time_df_gridded_rounded['latitude_rounded'] = round(delta_time_df_gridded_rounded['latitude'])
        delta_time_df_gridded_rounded['longitude_rounded'] = round(delta_time_df_gridded_rounded['longitude'])

        # Order dataframes before merge
        delta_time_df_gridded_rounded = delta_time_df_gridded_rounded.sort_values(by = ['latitude_rounded', 'longitude_rounded'])
        delta_time_df_rounded = delta_time_df_rounded.drop(columns = 'ground_pixel').sort_values(by = ['latitude_rounded', 'longitude_rounded'])
        
        # Merge and clean
        final = pd.merge(delta_time_df_gridded_rounded, delta_time_df_rounded, 
                         on= ['latitude_rounded', 'longitude_rounded'], how = 'left')
        final = final[~(final.delta_time.isnull() & final['sensor_column'].notnull())]
        final = final.drop(columns = ['latitude_rounded', 'longitude_rounded', 'latitude_y', 'longitude_y'])
        final = final.rename({'latitude_x': 'latitude', 'longitude_x': 'longitude'}, axis = 1)
        final = final.set_index(['latitude', 'longitude'])
        final = final[~final.index.duplicated()].to_xarray()
        
        # Assign day as time
        time_str = dt.datetime(int(year), int(month), int(day))
        final = final.assign_coords({'time': time_str}).expand_dims(dim = ['time'])

        sensor_ds_all.append(final)

    sensor_ds = xr.concat(sensor_ds_all, dim = 'time')
   
    return sensor_ds