In [2]:
import numpy as np
import netCDF4 as nc
import pandas as pd 
import os 
import datetime as dt 
import matplotlib.pyplot as plt 

In [None]:
def locate_file(d_str,path): 
    """
    function to locate TROPOMI level-2 data file based on datetime string  
    
    param: d_str-> datetime string (e.g., 20210101)
    param: path -> folder path for searching TROPOMI data files
    
    """
    files=[]
    for r, d, f in os.walk(path):
        for file in f:
            a = file.split('_')
            qaue_time = a[8] 
            if d_str in qaue_time:
                files.append(os.path.join(r, file))
    return files

In [None]:
longitudes = np.arange(-180,180,0.1)
latitudes = np.arange(-90,90,0.1)
X,Y = np.meshgrid(longitudes,latitudes)

In [None]:
rav_lon = np.ravel(X)
rav_lat = np.ravel(Y)
rav_lat = np.round(rav_lat,decimals=1)
rav_lon = np.round(rav_lon,decimals=1)
rav_lon = rav_lon.astype(str)
rav_lat = rav_lat.astype(str)

### This is an example of calculating Surface Albedo induced TOC-reducing days between 2021 and 2022

In [None]:
path = r"level-2 data product path"

daytime = dt.datetime(2021,1,1)
mdf = pd.DataFrame(data={'lon':rav_lon,
                        'lat':rav_lat,
                        })

odf1 = pd.DataFrame(data={'lon':rav_lon,
                        'lat':rav_lat})
while daytime.year<2022: 
    day_str = daytime.strftime("%Y%m%d")
    day_files = locate_file(day_str,path)
    print(day_str,len(day_files))
    i = 0 
    for f in day_files: 
        TF = nc.Dataset(f,'r')
        data = TF.groups['PRODUCT']
        var = data.variables
        SAS = data['SUPPORT_DATA']['DETAILED_RESULTS']['surface_albedo_SWIR'][0,:,:].data
       
        lat = var['latitude'][0,:,:].data
        lon = var['longitude'][0,:,:].data

        TF.close()
        
        filt = SAS == 9.96921e+36
        SAS[filt] = -1 
        
        # Find grid cells with surface albedo < 0.02 
        mask1 = (0 <= SAS) & (SAS < 0.02)  
        
        valid_sas = SAS[mask1]
        valid_lat1 = lat[mask1]
        valid_lon1 = lon[mask1]
        
        
        # Find grid cells with surface albedo >= 0.02 
        mask2 = SAS >= 0.02 
        nd_sas = SAS[mask2]
        nd_lat = lat[mask2]
        nd_lon = lon[mask2]
        
        ones1 = np.ones(len(valid_sas))
        ones2 = np.ones(len(nd_sas))
                                            
        valid_lat1 = np.round(valid_lat1,decimals=1)
        valid_lon1 = np.round(valid_lon1,decimals=1)
        valid_lat1  = valid_lat1.astype(str)
        valid_lon1 = valid_lon1.astype(str)

        df1 = pd.DataFrame(data={'lon':valid_lon1,
                            'lat':valid_lat1,
                            'sza_{}'.format(i):ones1})
        
        nd_lat = np.round(nd_lat,decimals=1)
        nd_lon = np.round(nd_lon,decimals=1)
        nd_lat  = nd_lat.astype(str)
        nd_lon = nd_lon.astype(str)
        df2 = pd.DataFrame(data={'lon':nd_lon,
                            'lat':nd_lat,
                            'sza_{}'.format(i):ones2})
        
        
        
        odf1 = pd.merge(odf1, df1, on=['lon', 'lat'],how='left')
        odf1.drop_duplicates(inplace=True)
        
        mdf = pd.merge(mdf,df2,on=['lon', 'lat'],how='left')
        mdf.drop_duplicates(inplace=True)
        
        i += 1 

    day1 = odf1.iloc[:,2:].sum(axis=1)
    day1 = np.array(day1)
    
    da = day1 > 0 
    db = day1 <= 0 
    day1[da] = 1 
    day1[db] = 0 
    
    day_arr1 = np.reshape(day1,(1800,3600))
    
    
    day2 = mdf.iloc[:,2:].sum(axis=1)
    day2 = np.array(day2)
    da = day2 > 0 
    db = day2 <= 0 
    day2[da] = 0 
    day2[db] = 1
    
    day_arr2 = np.reshape(day2,(1800,3600))
    
    
    TRO = nc.Dataset(r"your path\sas_{}.nc".format(day_str), 'w', format='NETCDF4_CLASSIC')
    lat = TRO.createDimension('lat', 1800)
    lon = TRO.createDimension('lon', 3600)
    td = TRO.createVariable('sas', int, ('lat', 'lon'))
    td[:] = day_arr1
    # Close File
    TRO.close()
    
    TRO = nc.Dataset(r"your path\sas_nd_{}.nc".format(day_str), 'w', format='NETCDF4_CLASSIC')
    lat = TRO.createDimension('lat', 1800)
    lon = TRO.createDimension('lon', 3600)
    td = TRO.createVariable('sas', int, ('lat', 'lon'))
    td[:] = day_arr2
    # Close File
    TRO.close()
    

    print('done output nc!')

    
    odf1 = pd.DataFrame(data={'lon':rav_lon,
                        'lat':rav_lat})
    
    mdf = pd.DataFrame(data={'lon':rav_lon,
                        'lat':rav_lat,
                        })
    
    print(f"Finished TOC-reducing day of:{daytime}")
    
    daytime += dt.timedelta(days=1)

#### TOC-reducing days 

In [4]:
path  = r"where you saved TOC-reducing days"

In [3]:
# file list with surface albedo <= 0.02 
files1=[]
for r, d, f in os.walk(path):
    for file in f:
        if 'sas' in file:
            files.append(os.path.join(r, file))

In [5]:
# file list with surface albedo> 0.02 
files2=[]
for r, d, f in os.walk(path):
    for file in f:
        if 'sas' in file:
            files2.append(os.path.join(r, file))

In [7]:
SA_day = np.empty(shape=(365,1800,3600),dtype=int)
i = 0 
for f in zip(files1,files2):
    # file with surface albedo <= 0.02 
    d1 = nc.Dataset(f[0],'r')
    td = d1.variables['sas'][:]
    # file with surface albedo> 0.02 
    d2 = nc.Dataset(f[1],'r')
    nd = d2.variables['sas'][:]
    
    sas = nd - td 
    
    SA_day[i,:,:] = sas
    d1.close()
    d2.close()
    i += 1 

In [None]:
saday = np.sum(SA_day,axis=0)

In [None]:
sa_flip = np.flip(saday)
saday = np.flip(sa_flip,1)

In [None]:
plt.imshow(saday)
plt.show()