In [1]:
import numpy as np
import netCDF4 as nc
import pandas as pd 
import os 
import datetime as dt
import matplotlib.pyplot as plt 

In [2]:
def locate_file(d_str,path): 
    """
    function to locate TROPOMI level-2 data file based on datetime string  
    
    param: d_str-> datetime string (e.g., 20210101)
    param: path -> folder path for searching TROPOMI data files
    
    """
    files=[]
    for r, d, f in os.walk(path):
        for file in f:
            a = file.split('_')
            qaue_time = a[8] 
            if d_str in qaue_time:
                files.append(os.path.join(r, file))
    return files  

In [3]:
longitudes = np.arange(-180,180,0.1)
latitudes = np.arange(-90,90,0.1)
X,Y = np.meshgrid(longitudes,latitudes)

rav_lon = np.ravel(X)
rav_lat = np.ravel(Y)
rav_lat = np.round(rav_lat,decimals=1)
rav_lon = np.round(rav_lon,decimals=1)
rav_lon = rav_lon.astype(str)
rav_lat = rav_lat.astype(str)

### This is an example of calculating AOT induced TOC-reducing days between 2021 and 2022

In [None]:
daytime = dt.datetime(2021,1,1)

path = r"where you save your TROPOMI Level-2 data products"

mdf = pd.DataFrame(data={'lon':rav_lon,
                        'lat':rav_lat,})
odf = pd.DataFrame(data={'lon':rav_lon,
                        'lat':rav_lat})
while daytime.year<2022: 
    day_str = daytime.strftime("%Y%m%d")
    day_files = locate_file(day_str,path)
    print(day_str,len(day_files))
    i = 0 
    for f in day_files: 
        TF = nc.Dataset(f,'r')
        data = TF.groups['PRODUCT']
        var = TF.groups['PRODUCT'].variables
        
        AOT = data['SUPPORT_DATA']['DETAILED_RESULTS']['aerosol_optical_thickness_NIR'][0,:,:].data

        lat = var['latitude'][0,:,:].data
        lon = var['longitude'][0,:,:].data

        TF.close()
        
        nd_filt = AOT == 9.96921e+36
        
        AOT[nd_filt] = -1 
        
        # find grid cells with AOT <= 0.3 
        mask = (0 <= AOT)&(AOT <= 0.3) 
        
        valid_sza = AOT[mask]
        valid_lat = lat[mask]
        valid_lon = lon[mask]
        
        ones = np.ones(len(valid_sza))
        
        valid_lat = np.round(valid_lat,decimals=1)
        valid_lon = np.round(valid_lon,decimals=1)
        valid_lat  = valid_lat.astype(str)
        valid_lon = valid_lon.astype(str)

        df1 = pd.DataFrame(data={'lon':valid_lon,
                            'lat':valid_lat,
                            'sza_{}'.format(i):ones})

        
        odf = pd.merge(odf, df1, on=['lon', 'lat'],how='left')
        odf.drop_duplicates(inplace=True)
        
        # find grid cells with AOT > 0.3  
        mask2 = AOT > 0.3 
        nd_sza = AOT[mask2]
        nd_lat = lat[mask2]
        nd_lon = lon[mask2]
        ones2 = np.ones(len(nd_sza))
        
        nd_lat = np.round(nd_lat,decimals=1)
        nd_lon = np.round(nd_lon,decimals=1)
        nd_lat  = nd_lat.astype(str)
        nd_lon = nd_lon.astype(str)
        
        df2 = pd.DataFrame(data={'lon':nd_lon,
                            'lat':nd_lat,
                            'sza_{}'.format(i):ones2})
        mdf = pd.merge(mdf, df2, on=['lon', 'lat'],how='left')
        mdf.drop_duplicates(inplace=True)
        
        i += 1 

    day = odf.iloc[:,2:].sum(axis=1)
    day = np.array(day)
    
    da = day > 0 
    db = day <= 0 
    day[da] = 1 
    day[db] = 0 
    
    arr = np.reshape(day,(1800,3600))
    
    TRO = nc.Dataset(r"your path\AOT_{}.nc".format(day_str), 'w', format='NETCDF4_CLASSIC')
    lat = TRO.createDimension('lat', 1800)
    lon = TRO.createDimension('lon', 3600)
    td = TRO.createVariable('aot', int, ('lat', 'lon'))
    td[:] = arr
    # Close File
    TRO.close()
    
    day2 = mdf.iloc[:,2:].sum(axis=1)
    day2 = np.array(day2)
    
    da = day2 > 0 
    db = day2 <= 0 
    day2[da] = 0 
    day2[db] = 1
    
    arr2 = np.reshape(day2,(1800,3600))
    
    TRO = nc.Dataset(r"your path\AOT_nd_{}.nc".format(day_str), 'w', format='NETCDF4_CLASSIC')
    lat = TRO.createDimension('lat', 1800)
    lon = TRO.createDimension('lon', 3600)
    td = TRO.createVariable('aot', int, ('lat', 'lon'))
    td[:] = arr2
    # Close File
    TRO.close()

    # create new dataframe
    odf = pd.DataFrame(data={'lon':rav_lon,
                        'lat':rav_lat})
    
    mdf = pd.DataFrame(data={'lon':rav_lon,
                        'lat':rav_lat,})
    
    print(f"Finished TOC-reducing day of:{daytime}")
    
    daytime += dt.timedelta(days=1)

#### TOC-reducing days 

In [None]:
path  = r"where you saved TOC-reducing days"

In [None]:
# file list with AOT <= 0.3 
files1=[]
for r, d, f in os.walk(path):
    for file in f:
        if 'AOT' in file:
            files.append(os.path.join(r, file))

In [None]:
# file list with AOT > 0.3
files2=[]
for r, d, f in os.walk(path):
    for file in f:
        if 'AOT' in file:
            files.append(os.path.join(r, file))

In [None]:
AOT_day = np.empty(shape=(365,1800,3600),dtype=int)
i = 0 
for f in zip(file1,file2):
    # file with AOT <= 0.3 
    d1 = nc.Dataset(f[0],'r')
    td = d1.variables['aot'][:]
    # file with AOT > 0.3 
    d2 = nc.Dataset(f[1],'r')
    nd = d2.variables['aot'][:]
    
    aot = nd - td 
    
    AOT_day[i,:,:] = aot
    d1.close()
    d2.close()
    i += 1 

In [None]:
AOT_day = np.sum(AOT_day,axis=1)

In [None]:
AOT_day_flip = np.flip(AOT_day)
AOT_day = np.flip(AOT_day_flip,1)

In [None]:
plt.imshow(AOT_day)
plt.show()

In [4]:
# TRO = nc.Dataset(r"save file", 'w', format='NETCDF4_CLASSIC')
# lat = TRO.createDimension('lat', 1800)
# lon = TRO.createDimension('lon', 3600)
# td = TRO.createVariable('aot', int, ('lat', 'lon'))
# td[:] = AOT_day
# TRO.close()