## Compute all-sky daily SIF and generate LCSIF_v2 files
### Jianing Fang (jf3423@columbia.edu)
This notebook computes the code for calculating all-sky daily SIF based on ERA5 ssrd and the clear-sky SIF. It then writes the clear-sky instantaneous, clear-sky daily, and all-sky daily SIF to NetCDF files.

In [None]:
import numpy as np
import xarray as xr
import os
import matplotlib.pyplot as plt
import multiprocessing as mp
from datetime import date, time, timedelta, datetime
import datetime
import pandas as pd
from scipy import interpolate
import calendar
from copy import deepcopy
from tqdm import tqdm
import subprocess

In [2]:
# compute TOA shortwave radiation
def compute_R_toa(cos_sza, doy):
    S0 = 1360.8
    alpha = 0.98
    return S0 * alpha * (1 + 0.033 * np.cos(2 * np.pi * doy / 365)) * cos_sza
# compute clear sky shortwave radiation. See Zhang et al. 2017 CSIF paper for reference
def compute_total_surface_shortwave_radiation(cos_sza, doy, elevation):
    R_toa = compute_R_toa(cos_sza, doy)
    a0 = 0.4237 - 0.00821 * (6 - elevation)**2
    a1 = 0.5055 + 0.00595 * (6.5 - elevation)**2
    k = 0.2711 + 0.01858 * (2.5 - elevation)**2
    tau_b = a0 + a1 * np.exp(-k/cos_sza)
    tau_d = 0.271 - 0.294 * tau_b
    R_sb = R_toa * tau_b
    R_sd = R_toa * tau_d
    R_t = R_sb + R_sd
    R_t[cos_sza < 0] = 0
    return R_t

In [3]:
PROCESSED_DIR="" # folder for processed files
SIF_AVHRR_OUT_DIR_V6=os.path.join(PROCESSED_DIR, "SIF_AVHRR_16day_v6") # folder for raw LCSIF files based on AVHRR period reflectance
SIF_MODIS_OUT_DIR_V5="../../data/processed/SIF_MODIS_16day_v5" # folder 
LCSIF_v2 = os.path.join(PROCESSED_DIR, "LCSIF_v2") # output folder for the 

# DEM downloaded from https://www.usgs.gov/centers/eros/science/usgs-eros-archive-digital-elevation-global-30-arc-second-elevation-gtopo30
# resampled to 0.05 degress in NetCDf format using gdalwarp
elevation = np.flip(xr.open_dataset("../../data/GTOPO30/gtopo30_0.05.nc").Band1.values, axis=0) / 1000

# preprocessed ERA5 SSRD, with the same temporal and spatial resolution as LCSIF and LCREF
ERA5_SSRD_DIR = os.path.join(PROCESSED_DIR, "ERA5_SSRD")

In [7]:
AVHRR_SIF_LIST = []
for year in sorted(os.listdir(SIF_AVHRR_OUT_DIR_V6)):
    for file in sorted(os.listdir(os.path.join(SIF_AVHRR_OUT_DIR_V6, year))):
        AVHRR_SIF_LIST.append(os.path.join(SIF_AVHRR_OUT_DIR_V6, year, file))

In [8]:
MODIS_SIF_LIST = []
for year in sorted(os.listdir(SIF_MODIS_OUT_DIR_V5)):
    if year != "2000":
        for file in sorted(os.listdir(os.path.join(SIF_MODIS_OUT_DIR_V5, year))):
            MODIS_SIF_LIST.append(os.path.join(SIF_MODIS_OUT_DIR_V5, year, file))

In [12]:
avhrr_time_idx = []
for year in np.arange(1982, 2001):
    year_list=[]
    for month in np.arange(1,13):
        doy_begin = (date(year, month, 1).timetuple().tm_yday - 1) * 24
        doy_mid = (date(year, month, 16).timetuple().tm_yday - 1) * 24
        if month < 12:
            doy_end = (date(year, month+1, 1).timetuple().tm_yday - 1) * 24
        else:
            if calendar.isleap(year):
                doy_end = 366 * 24
            else:
                doy_end = 365 * 24
        year_list.append((doy_begin, doy_mid))
        year_list.append((doy_mid, doy_end))
    avhrr_time_idx.append(year_list)
    
    
modis_time_idx = []
for year in np.arange(2001, 2022):
    year_list=[]
    for month in np.arange(1,13):
        doy_begin = (date(year, month, 1).timetuple().tm_yday - 1) * 24
        doy_mid = (date(year, month, 16).timetuple().tm_yday - 1) * 24
        if month < 12:
            doy_end = (date(year, month+1, 1).timetuple().tm_yday - 1) * 24
        else:
            if calendar.isleap(year):
                doy_end = 366 * 24
            else:
                doy_end = 365 * 24
        year_list.append((doy_begin, doy_mid))
        year_list.append((doy_mid, doy_end))
    modis_time_idx.append(year_list)

In [None]:
for i in tqdm(range(len(np.arange(1982,2001)))):
    year_str=str(np.arange(1982,2001)[i])
    ssrd_ds=xr.open_dataset(ERA5_SSRD[i])
    for j in range(24):
        if j % 2 == 0:
            ltr = "a"
        else:
            ltr ="b"
        month_str = "{:02}".format(j//2 + 1) + ltr
        if "ERA5_SSRD_" + year_str + month_str + ".nc" not in os.listdir(ERA5_SSRD_DIR): 
            era_5_rad_array=np.zeros((1, 601, 1440))
            era_5_rad_array[0, :, :] = np.mean(ssrd_ds.ssrd[avhrr_time_idx[i][j][0]:avhrr_time_idx[i][j][1], :, :].values, axis=0) / 3600
            time_list = [ssrd_ds.time[avhrr_time_idx[i][j][1]-1].values,]
            era_5_rad_da = xr.DataArray(data=era_5_rad_array, coords={"time":time_list, "latitude":deepcopy(ssrd_ds.latitude.values) , "longitude":deepcopy(ssrd_ds.longitude.values)})
            era_5_rad_da.attrs["units"] = "W m**-2"
            era_5_rad_da.attrs["long_name"] = "ERA5 biweekly averaged surface solar radiation downward flux"
            era_5_rad_da.attrs["standard_name"] = "era_5_rad_biweekly"
            era_5_rad_da.longitude.attrs = ssrd_ds.longitude.attrs
            era_5_rad_da.latitude.attrs = ssrd_ds.latitude.attrs
            era_5_rad_ds = xr.Dataset({"era_5_rad_biweekly":era_5_rad_da})
            era_5_rad_ds.to_netcdf(os.path.join(ERA5_SSRD_DIR, "ERA5_SSRD_" + year_str + month_str + ".nc"))
        else:
            break

In [37]:
for i in tqdm(range(len(modis_time_idx))):
    year_str=str(np.arange(2001,2022)[i])
    ssrd_ds=xr.open_dataset(ERA5_SSRD[i+19])
    for j in range(24):
        if j % 2 == 0:
            ltr = "a"
        else:
            ltr ="b"
        month_str = "{:02}".format(j//2 + 1) + ltr
        if "ERA5_SSRD_" + year_str + month_str + ".nc" not in os.listdir(ERA5_SSRD_DIR): 
            era_5_rad_array=np.zeros((1, 601, 1440))
            era_5_rad_array[0, :, :] = np.mean(ssrd_ds.ssrd[modis_time_idx[i][j][0]:modis_time_idx[i][j][1], :, :].values, axis=0) / 3600
            time_list = [ssrd_ds.time[modis_time_idx[i][j][1]-1].values,]
            era_5_rad_da = xr.DataArray(data=era_5_rad_array, coords={"time":time_list, "latitude":deepcopy(ssrd_ds.latitude.values) , "longitude":deepcopy(ssrd_ds.longitude.values)})
            era_5_rad_da.attrs["units"] = "W m**-2"
            era_5_rad_da.attrs["long_name"] = "ERA5 biweekly averaged surface solar radiation downward flux"
            era_5_rad_da.attrs["standard_name"] = "era_5_rad_biweekly"
            era_5_rad_da.longitude.attrs = ssrd_ds.longitude.attrs
            era_5_rad_da.latitude.attrs = ssrd_ds.latitude.attrs
            era_5_rad_ds = xr.Dataset({"era_5_rad_biweekly":era_5_rad_da})
            era_5_rad_ds.to_netcdf(os.path.join(ERA5_SSRD_DIR, "ERA5_SSRD_" + year_str + month_str + ".nc"))
        else:
            break

100%|███████████████████████████████████████████| 21/21 [00:01<00:00, 11.03it/s]


In [13]:
ERA_5_CMG_LIST = [os.path.join("../../data/processed/ERA5_SSRD/CMG/", f) for f in sorted(os.listdir("../../data/processed/ERA5_SSRD/CMG"))]


In [None]:
for i in tqdm(range(len(avhrr_time_idx))):
    year_str=str(np.arange(1982,2001)[i])
    for j in range(24):
        if j % 2 == 0:
            ltr = "a"
        else:
            ltr ="b"
        month_str = "{:02}".format(j//2 + 1) + ltr
        
        ds = xr.open_dataset(AVHRR_SIF_LIST[i * 24 + j])
        era_5_rad = np.flip(xr.open_dataset(ERA_5_CMG_LIST[i * 24 + j]).Band1.values, axis=0)
        era_5_rad[-598:, :] = np.nan
        cos_sza = ds.cos_sza_modvalml.values[0, :, :]
        doy = datetime.datetime.utcfromtimestamp((ds.time.time.values[0] - np.datetime64('1970-01-01T00:00:00')) / np.timedelta64(1, 's')).timetuple().tm_yday - 7 
        R_t = compute_total_surface_shortwave_radiation(cos_sza, doy, elevation)
        clear_inst = ds.sif_modvalml_clear_inst.values[0, :, :]
        all_daily = clear_inst/R_t * era_5_rad
        all_daily[R_t <= 0] = 0
        all_daily[np.isnan(clear_inst)] = np.nan
        ds = ds.drop(["cos_sza_modvalml", "cos_daily_sza_modvalml"])

        ds.latitude.attrs={"units":"degrees_north", "long_name":"latitude"}
        ds.longitude.attrs={"units":"degrees_east", "long_name":"longitude"}
        ds.time.attrs = {"long_name": "last day of each data period"}
        ds = ds.rename({"sif_modvalml_clear_inst":"sif_clear_inst", 
            "sif_modvalml_clear_daily":"sif_clear_daily"})
        
        ds.attrs = {"title": "Semi-Monthly Global Reconstruction LCSIF using AVHRR (1982-2000) and MODIS (2001-2021)", 
                "spatial_resolution": "0.050000 degrees per pixel",
                "geospatial_lat_min": "-90",
                "geospatial_lat_max": "90",
                "geospatial_lon_min":"-180",
                "geospatial_lon_max":"180",
                "product_version": "v2",
                "filename_notation": "a: day1-day15 of the month, b:day16-last day of the month)",
                "contacts": "Jianing Fang (jf3423@columbia.edu), Xu Lian (xl3179@columbia.edu)",
                "date_source": "LCSIF_reflectance_v1, MCD43C4 v006, OCO-2 SIF Lite V10, ERA5 Reanalysis",
                "created_date":datetime.date.today().strftime("%m/%d/%Y")}
        ds["sif_clear_inst"].values[0, :, :][R_t <= 0] = 0
        ds["sif_clear_daily"].values[0, :, :][R_t <= 0] = 0
        ds["sif_all_daily"] = xr.DataArray(np.expand_dims(all_daily, axis=0), coords={"time":ds.time, "latitude":ds.latitude, "longitude":ds.longitude})
        ds["sif_all_daily"].attrs={"long_name":"all-sky daily average SIF weighted by ERA5 SSRD", "units":"mW m-2 nm-1 sr-1"}
        ds["sif_clear_inst"].attrs={"long_name":"instantaneous clear-sky predicted SIF", "units":"mW m-2 nm-1 sr-1"}
        ds["sif_clear_daily"].attrs={"long_name": "daily clear-sky predicted SIF adjusted by cosine solar zenith angle", "units":"mW m-2 nm-1 sr-1"}
        ds = ds.astype(np.float32)
        ds.to_netcdf(os.path.join(LCSIF_v2, year_str, "LCSIF_v2_" + month_str + ".nc"))
        

In [None]:
for i in tqdm(range(len(modis_time_idx))):
    year_str=str(np.arange(2001,2022)[i])
    for j in range(24):
        if j % 2 == 0:
            ltr = "a"
        else:
            ltr ="b"
        month_str = "{:02}".format(j//2 + 1) + ltr
        ds = xr.open_dataset(MODIS_SIF_LIST[i * 24 + j])
        era_5_rad = np.flip(xr.open_dataset(ERA_5_CMG_LIST[(i+19) * 24 + j]).Band1.values, axis=0)
        era_5_rad[-598:, :] = np.nan
        cos_sza = ds.cos_sza_modis.values[0, :, :]
        doy = datetime.datetime.utcfromtimestamp((ds.time.time.values[0] - np.datetime64('1970-01-01T00:00:00')) / np.timedelta64(1, 's')).timetuple().tm_yday - 7 
        R_t = compute_total_surface_shortwave_radiation(cos_sza, doy, elevation)
        clear_inst = ds.sif_modis_clear_inst.values[0, :, :]
        all_daily = clear_inst/R_t * era_5_rad
        all_daily[R_t <= 0] = 0
        all_daily[np.isnan(clear_inst)] = np.nan
        ds = ds.drop(["cos_sza_modis", "cos_daily_sza_modis"])
        ds.latitude.attrs={"units":"degrees_north", "long_name":"latitude"}
        ds.longitude.attrs={"units":"degrees_east", "long_name":"longitude"}
        ds.time.attrs = {"long_name": "last day of each data period"}
        ds = ds.rename({"sif_modis_clear_inst":"sif_clear_inst", 
            "sif_modis_clear_daily":"sif_clear_daily"})
        
        
        ds.attrs = {"title": "Semi-Monthly Global Reconstruction LCSIF using AVHRR (1982-2000) and MODIS (2001-2021)", 
                "spatial_resolution": "0.050000 degrees per pixel",
                "geospatial_lat_min": "-90",
                "geospatial_lat_max": "90",
                "geospatial_lon_min":"-180",
                "geospatial_lon_max":"180",
                "product_version": "v2",
                "filename_notation": "a: day1-day15 of the month, b:day16-last day of the month)",
                "contacts": "Jianing Fang (jf3423@columbia.edu), Xu Lian (xl3179@columbia.edu)",
                "date_source": "LCSIF_reflectance_v1, MCD43C4 v006, OCO-2 SIF Lite V10, ERA5 Reanalysis",
                "created_date":datetime.date.today().strftime("%m/%d/%Y")}
        ds["sif_clear_inst"].values[0, :, :][R_t <= 0] = 0
        ds["sif_clear_daily"].values[0, :, :][R_t <= 0] = 0
        ds["sif_all_daily"] = xr.DataArray(np.expand_dims(all_daily, axis=0), coords={"time":ds.time, "latitude":ds.latitude, "longitude":ds.longitude})
        ds["sif_all_daily"].attrs={"long_name":"all-sky daily average SIF weighted by ERA5 SSRD", "units":"mW m-2 nm-1 sr-1"}
        ds["sif_clear_inst"].attrs={"long_name":"instantaneous clear-sky predicted SIF", "units":"mW m-2 nm-1 sr-1"}
        ds["sif_clear_daily"].attrs={"long_name": "daily clear-sky predicted SIF adjusted by cosine solar zenith angle", "units":"mW m-2 nm-1 sr-1"}
        ds = ds.astype(np.float32)

        ds.to_netcdf(os.path.join(LCSIF_v2, year_str, "LCSIF_v2_" + month_str + ".nc"))