In [1]:
import numpy as np
import netCDF4 as nc
import pandas as pd 
import os 
import datetime as dt 
import matplotlib.pyplot as plt 

In [2]:
def locate_file(d_str,path): 
    """
    function to locate TROPOMI level-2 data file based on datetime string  
    
    param: d_str-> datetime string (e.g., 20210101)
    param: path -> folder path for searching TROPOMI data files
    
    """
    files=[]
    for r, d, f in os.walk(path):
        for file in f:
            a = file.split('_')
            qaue_time = a[8] 
            if d_str in qaue_time:
                files.append(os.path.join(r, file))
    return files  

In [None]:
longitudes = np.arange(-180,180,0.1)
latitudes = np.arange(-90,90,0.1)
X,Y = np.meshgrid(longitudes,latitudes)

In [None]:
rav_lon = np.ravel(X)
rav_lat = np.ravel(Y)
rav_lat = np.round(rav_lat,decimals=1)
rav_lon = np.round(rav_lon,decimals=1)
rav_lon = rav_lon.astype(str)
rav_lat = rav_lat.astype(str)

### This is an example of calculating SZA induced TOC-reducing days between 2021 and 2022

In [None]:
daytime = dt.datetime(2021,8,8)
mdf = pd.DataFrame(data={'lon':rav_lon,
                        'lat':rav_lat,
                        })

odf = pd.DataFrame(data={'lon':rav_lon,
                        'lat':rav_lat})

path = r"where you save your TROPOMI Level-2 data products"

while daytime.year<2022: 
    day_str = daytime.strftime("%Y%m%d")
    day_files = locate_file(day_str,path)
    print(day_str,len(day_files))
    i = 0 
    for f in day_files: 
        TF = nc.Dataset(f,'r')
        data = TF.groups['PRODUCT']
        var = TF.groups['PRODUCT'].variables
        
        SZA = data['SUPPORT_DATA']['GEOLOCATIONS']['solar_zenith_angle'][0,:,:].data
        
        filt = SZA == 9.96921e+36
        SZA[filt] = -1 

        lat = var['latitude'][0,:,:].data
        lon = var['longitude'][0,:,:].data

        TF.close()
        
        # find grid cells with SZA > 70 degrees
        
        mask = SZA > 70 
        valid_cf = SZA[mask]
        valid_lat = lat[mask]
        valid_lon = lon[mask]
        
        ones = np.ones(len(valid_lon))
        
        valid_lat = np.round(valid_lat,decimals=1)
        valid_lon = np.round(valid_lon,decimals=1)
        valid_lat  = valid_lat.astype(str)
        valid_lon = valid_lon.astype(str)

        df1 = pd.DataFrame(data={'lon':valid_lon,
                            'lat':valid_lat,
                            'cloud_{}'.format(i):ones})

        
        odf = pd.merge(odf, df1, on=['lon', 'lat'],how='left')
        odf.drop_duplicates(inplace=True)
        
        
        # find grid cells with SZA <= 70 degrees
        mask2 = (SZA >= 0) & (SZA <= 70) 
        nd_sas = SZA[mask2]
        nd_lat = lat[mask2]
        nd_lon = lon[mask2]
        
        ones2 = np.ones(len(nd_sas))
        
        nd_lat = np.round(nd_lat,decimals=1)
        nd_lon = np.round(nd_lon,decimals=1)
        nd_lat  = nd_lat.astype(str)
        nd_lon = nd_lon.astype(str)
        
        
        df2 = pd.DataFrame(data={'lon':nd_lon,
                            'lat':nd_lat,
                            'sza_{}'.format(i):ones2})
    
        mdf = pd.merge(mdf,df2,on=['lon', 'lat'],how='left')
        mdf.drop_duplicates(inplace=True)
        
        i += 1 

    day = odf.iloc[:,2:].sum(axis=1)
    day = np.array(day)
    
    da = day > 0 
    db = day <= 0 
    day[da] = 1 
    day[db] = 0 
    
    arr = np.reshape(day,(1800,3600))
    
    TRO = nc.Dataset(r"your path\sza_{}.nc".format(day_str), 'w', format='NETCDF4_CLASSIC')
    lat = TRO.createDimension('lat', 1800)
    lon = TRO.createDimension('lon', 3600)
    td = TRO.createVariable('td', int, ('lat', 'lon'))
    td[:] = arr
    # Close File
    TRO.close()
    
    day2 = mdf.iloc[:,2:].sum(axis=1)
    day2 = np.array(day2)
    da = day2 > 0 
    db = day2 <= 0 
    day2[da] = 0 
    day2[db] = 1
    day_arr2 = np.reshape(day2,(1800,3600))
    
    
    TRO = nc.Dataset(r"your path\sza_nd_{}.nc".format(day_str), 'w', format='NETCDF4_CLASSIC')
    lat = TRO.createDimension('lat', 1800)
    lon = TRO.createDimension('lon', 3600)
    td = TRO.createVariable('nd', int, ('lat', 'lon'))
    td[:] = day_arr2
    # Close File
    TRO.close()
    
    mdf = pd.DataFrame(data={'lon':rav_lon,
                        'lat':rav_lat,
                        })

    
    odf = pd.DataFrame(data={'lon':rav_lon,
                        'lat':rav_lat})
    
    print(f"Finished TOC-reducing day of:{daytime}")
    
    daytime += dt.timedelta(days=1)

#### TOC-reducing days 

In [None]:
path = r"where you saved your SZA analysis resuls"

In [None]:
# file list with SZA <= 70
files1=[]
for r, d, f in os.walk(path):
    for file in f:
        if 'sza' in file:
            files.append(os.path.join(r, file))

In [None]:
# file list with SZA > 70
files2=[]
for r, d, f in os.walk(path):
    for file in f:
        if 'sza' in file:
            files2.append(os.path.join(r, file))

In [None]:
SZA_day = np.empty(shape=(365,1800,3600),dtype=int)
i = 0 
for f in zip(files1,files2):
    # file with SZA <= 70 
    d1 = nc.Dataset(f[0],'r')
    td = d1.variables['td'][:]
    # file with SZA > 70 
    d2 = nc.Dataset(f[1],'r')
    nd = d2.variables['nd'][:]
    
    sza = nd - td 
    
    SZA_day[i,:,:] = sza
    d1.close()
    d2.close()
    i += 1 

In [None]:
SZA_day = np.sum(SZA_day,axis=1)

In [None]:
SZA_day_flip = np.flip(SZA_day)
SZA_day = np.flip(SZA_day_flip,1)

In [None]:
plt.imshow(SZA_day)
plot.show()

In [3]:
# TRO = nc.Dataset(r"save your SZA RESULTS".format(day_str), 'w', format='NETCDF4_CLASSIC')
# lat = TRO.createDimension('lat', 1800)
# lon = TRO.createDimension('lon', 3600)
# td = TRO.createVariable('td', int, ('lat', 'lon'))
# td[:] = SZA_day
# TRO.close()