# IASI functions

In [None]:
def IASI_L3_version(component_nom, year, month):

    """ Get version of L3 IASI dataset for each component nomenclature

         Args:
            component_nom (str): Component chemical nomenclature
            year (str): Year of dataset
            month (str): Month of dataset
            
        Returns:
            version (str): IASI dataset version
    """

    # https://iasi.aeris-data.fr/CO_IASI_A_L3_data/
    if component_nom == 'CO':
        if year >= 2020 or (year == 2019 and month <= 5):
            version = 'V6.5.0'
        else:
            version = 'V20151001.0'

    # https://iasi.aeris-data.fr/O3_IASI_A_L3_data/
    elif component_nom == 'O3':
        if year >= 2020 or (year == 2019 and month <= 11):
            version = 'V6.5.1'
        else:
            version = 'V20151001.0'

    # https://iasi.aeris-data.fr/NH3_IASI_A_L3_data/
    elif component_nom == 'NH3':
        version = 'V3.0.0'

    # https://iasi.aeris-data.fr/HCOOH_IASI_A_L3_data/
    elif component_nom == 'HCOOH':
        version = 'V1.0.0'

    return version

In [None]:
def IASI_L3_download(component_nom, date, satellite):

    """ Download L3 IASI dataset with curl

         Args:
            component_nom (str): Component chemical nomenclature
            date (str): Query date
            satellite (str): A, B and/or C referring to METOP series
    """
    
    cnl = component_nom.lower()
    sl = 'iasi' + satellite.lower() + 'l3'

    year = date.split('-')[0]
    month = date.split('-')[1]
    version = IASI_L3_version(component_nom, int(year), int(month))
    
    if component_nom == 'NH3':
        product_name = ''.join(['IASI_METOP' + satellite + '_L3_', component_nom, '_', 
                                year, month, '_ULB-LATMOS_', version, '.nc'])
    else:
        product_name = ''.join(['IASI_METOP' + satellite + '_L3_', component_nom, '_COLUMN_', 
                                year, month, '_ULB-LATMOS_', version, '.nc'])
    
    # Create directory for each satellite in case they do not exist
    path = os.path.join(os.path.join(str(Path.home()), 'adc-toolbox', os.path.relpath('data/iasi/' + component_nom + '/monthly/' + year + '-' + month)))
    os.makedirs(path, exist_ok = True)

    !curl -s --insecure https://cds-espri.ipsl.fr/$sl/iasi_$cnl/$version/$year/$product_name --output data/iasi/$component_nom/monthly/$year-$month/$product_name
    file_name = path + '/' + product_name

    if os.stat(file_name).st_size <= 288:  
        print(product_name, 'is not available.')
        os.remove(file_name) 
        
    else:
        print(product_name, 'was downloaded.')

In [None]:
def IASI_L3_read(component_nom, sensor_column, dates, lat_res = 1, lon_res = 1):

    """ Read L3 IASI dataset as xarray dataset object and assign time

         Args:
            component_nom (str): Component chemical nomenclature
            sensor_column (str): Name of sensor column in downloaded dataset
            dates (list): Available dates
            lat_res (float): Spatial resolution for latitude
            lon_res (float): Spatial resolution for longitude
            
        Returns:
            sensor_ds (xarray): IASI dataset in xarray format
    """

    if lat_res < 1 or lon_res < 1:
        print('To show the original data, the resolution must equal to 1x1º.')
        print('To show aggregated data, the resolution must be superior to 1x1º.')
        raise KeyboardInterrupt()
        
    sensor_ds_all = []

    for date in dates:

        year = date.split('-')[0]
        month = date.split('-')[1]

        sensor_ds_ABC = []

        # Combine data from METOP-A, METOP-B and METOP-C
        path = os.path.join(str(Path.home()), 'adc-toolbox', os.path.relpath('data/iasi/' + component_nom + '/monthly/' + year + '-' + month))
        product_names = [file for file in os.listdir(path)]
        
        for product_name in product_names:
            
            sensor_ds_sat = xr.open_dataset(path + '/' + product_name)
            unit = sensor_ds_sat[sensor_column].units
            sensor_ds_ABC.append(sensor_ds_sat)

        sensor_ds_ABC = xr.concat(sensor_ds_ABC, dim = 'latitude')

        # Regrid onto a custom defined regular grid
        lat_bins = np.arange(-90, 90 + lat_res/2, lat_res)
        lon_bins = np.arange(-180, 180 + lon_res/2, lon_res)
        
        lat_center = np.arange(-90 + lat_res/2, 90, lat_res)
        lon_center = np.arange(-180 + lon_res/2, 180, lon_res)
     
        sensor_ds_ABC_gridded_int = sensor_ds_ABC.groupby_bins('latitude', lat_bins, labels = lat_center).mean()
        sensor_ds_ABC_gridded = sensor_ds_ABC_gridded_int.groupby_bins('longitude', lon_bins, labels = lon_center).mean()
        sensor_ds_ABC_gridded = sensor_ds_ABC_gridded.rename({'latitude_bins': 'latitude', 'longitude_bins': 'longitude'})

        # Add time
        time_str = dt.datetime(int(year), int(month), 1)
        sensor_ds_ABC_gridded = sensor_ds_ABC_gridded.assign_coords({'time': time_str}).expand_dims(dim = ['time'])

        # Add units as attribute
        sensor_ds_ABC_gridded.attrs['units'] = unit

        sensor_ds_all.append(sensor_ds_ABC_gridded)

    sensor_ds = xr.concat(sensor_ds_all, dim = 'time')
    sensor_ds = sensor_ds.rename({sensor_column: 'sensor_column'})
    
    return sensor_ds

In [None]:
def IASI_L2_version(component_nom, year, month, day, satellite):

    """ Get version of L2 IASI dataset for each component nomenclature

         Args:
            component_nom (str): Component chemical nomenclature
            year (str): Year of dataset
            month (str): Month of dataset
            day (str): Day of dataset
            satellite (str): A, B and/or C referring to METOP series

        Returns:
            version (str): IASI dataset version
            product_name (str): IASI dataset product name
    """

    # https://iasi.aeris-data.fr/o3_iasi_a_arch/
    if component_nom == 'O3':

        if int(year) == 2020:
            version = 'V6.5.0'
            product_name = ''.join(['IASI_METOP' + satellite + '_L2_', component_nom, '_COLUMN_',
                            year, month, day, '_ULB-LATMOS_', version, '.nc'])

        elif int(year) <= 2019:
            version = 'v20151001'
            product_name = ''.join(['IASI_FORLI_' + component_nom + '_metop' + satellite.lower() + '_',
                            year, month, day, '_', version, '.nc'])
    
    # https://iasi.aeris-data.fr/cos_iasi_a_arch/
    elif component_nom == 'CO':

        if int(year) >= 2020:
            version = 'V6.5.0'
            
        elif int(year) == 2019 and ((int(month) == 5 and int(day) >= 14) or int(month) >= 6):
            version = 'V6.4.0'

        else: 
            version = 'v20140922'
            print('Data of CO total columns before May 13, 2019 is not available as .nc for download.')

        product_name = ''.join(['IASI_METOP' + satellite + '_L2_', component_nom, '_',
                            year, month, day, '_ULB-LATMOS_', version, '.nc'])

    return version, product_name

In [None]:
def IASI_L2_download(component_nom, date, satellite):

    """ Download L2 IASI dataset with curl

         Args:
            component_nom (str): Component chemical nomenclature
            date (str): Query date
            satellite (str): A, B and/or C referring to METOP series
    """

    cnl = component_nom.lower()
    sl = 'iasi' + satellite.lower() + 'l2'

    year = date.split('-')[0]
    month = date.split('-')[1]
    day = date.split('-')[2]
    version, product_name = IASI_L2_version(component_nom, year, month, day, satellite)
    
    # Create directory for each satellite in case they do not exist
    path = os.path.join(str(Path.home()), 'adc-toolbox', os.path.relpath('data/iasi/' + component_nom + '/' + date))
    os.makedirs(path, exist_ok = True)
    
    !curl -s --insecure https://cds-espri.ipsl.fr/$sl/iasi_$cnl/$version/$year/$month/$product_name --output data/iasi/$component_nom/$date/$product_name
    file_name = path + '/' + product_name
    
    if (os.stat(file_name).st_size <= 288 or 
        int(year) < 2019 or (int(year) == 2019 and int(month) == 5 and int(day) <= 13)):  
        os.remove(file_name) 
        print(product_name, 'is not available.')
    
    else:
        print(product_name, 'was downloaded.')

In [None]:
def IASI_L2_read(component_nom, sensor_column, dates, lat_res = 1, lon_res = 1):

    """ Read the L2 IASI dataset as xarray dataset object and assign time

         Args:
            component_nom (str): Component chemical nomenclature
            sensor_column (str): Name of sensor column in downloaded dataset
            dates (list): Available dates
            lat_res (float): Spatial resolution for latitude
            lon_res (float): Spatial resolution for longitude
            
        Returns:
            sensor_ds (xarray): IASI dataset in xarray format
            sensor_type (str): Sensor type
    """

    if lat_res < 1 or lon_res < 1:
        print('To show the original data, the resolution must equal to 1x1º.')
        print('To show aggregated data, the resolution must be superior to 1x1º.')
        raise KeyboardInterrupt()
        
    sensor_ds_all = []

    for date in dates:

        year = date.split('-')[0]
        month = date.split('-')[1]
        day = date.split('-')[2]
        
        sensor_ds_ABC = []
        
        # Change sensor_column name (in 2020 it is O3_total_column and before ozone_total_column)
        if component_nom == 'O3' and year == '2020':
            sensor_column = 'O3_total_column'
        elif component_nom == 'O3' and year != '2020':
            sensor_column = 'ozone_total_column'

        path = os.path.join(str(Path.home()), 'adc-toolbox', os.path.relpath('data/iasi/' + component_nom +  '/' + date))
        product_names = [file for file in os.listdir(path)]
        
        for product_name in product_names:

            sensor_ds_sat = xr.open_dataset('data/iasi/' + component_nom + '/' + date + '/' + product_name)
            unit = sensor_ds_sat[sensor_column].units
            latitude = sensor_ds_sat['latitude'].data
            longitude = sensor_ds_sat['longitude'].data

            sensor_ds_sat = xr.DataArray(
                                        sensor_ds_sat[sensor_column].data,
                                        dims=('ground_pixel'),
                                        coords={
                                                'latitude': ('ground_pixel', latitude),
                                                'longitude': ('ground_pixel', longitude)
                                        },
                                        name = component_nom
            )

            sensor_ds_ABC.append(sensor_ds_sat)

        sensor_ds_ABC = xr.concat(sensor_ds_ABC, dim = 'ground_pixel')

        y = sensor_ds_ABC.latitude.data
        x = sensor_ds_ABC.longitude.data
        z = sensor_ds_ABC.data

        zi, yi, xi = np.histogram2d(y, x, bins = (180, 360), weights = z, normed = False)
        counts, _, _ = np.histogram2d(y, x, bins = (180, 360))
        zi = zi / counts

        sensor_ds_ABC_gridded = xr.DataArray(
                                            zi,
                                            dims = ['latitude', 'longitude'],
                                            coords = {
                                                'latitude': (['latitude'], yi[:-1]),
                                                'longitude': (['longitude'], xi[:-1])
                                            },
                                            name = 'sensor_column'
        )

        # Regrid onto a custom defined regular grid
        lat_bins = np.arange(-90, 90 + lat_res/2, lat_res)
        lon_bins = np.arange(-180, 180 + lon_res/2, lon_res)
        
        lat_center = np.arange(-90 + lat_res/2, 90, lat_res)
        lon_center = np.arange(-180 + lon_res/2, 180, lon_res)
            
        sensor_ds_ABC_gridded_int = sensor_ds_ABC_gridded.groupby_bins('latitude', lat_bins, labels = lat_center).mean()
        sensor_ds_ABC_gridded = sensor_ds_ABC_gridded_int.groupby_bins('longitude', lon_bins, labels = lon_center).mean()
        sensor_ds_ABC_gridded = sensor_ds_ABC_gridded.rename({'latitude_bins': 'latitude', 'longitude_bins': 'longitude'})

        # Add units as attribute
        sensor_ds_ABC_gridded.attrs['units'] = unit

        # Add time
        time_str = dt.datetime(int(year), int(month), int(day))
        sensor_ds_ABC_gridded = sensor_ds_ABC_gridded.assign_coords({'delta_time': time_str})
        sensor_ds_ABC_gridded = sensor_ds_ABC_gridded.assign_coords({'time': time_str}).expand_dims(dim = ['time'])

        sensor_ds_all.append(sensor_ds_ABC_gridded)

    sensor_ds = xr.concat(sensor_ds_all, dim = 'time')

    return sensor_ds