# Temporal Analysis of SIF and XCO2
---

In [1]:
import numpy as np
import pandas as pd
import xarray
import matplotlib.pyplot as plt
import seaborn as sns

from dask.distributed import Client

## Setup dask cluster

In [2]:
client = Client(n_workers=8, dashboard_address=":8889")
client

0,1
Client  Scheduler: tcp://127.0.0.1:41481  Dashboard: http://127.0.0.1:8889/status,Cluster  Workers: 8  Cores: 64  Memory: 270.51 GB


## Data processing

In [3]:
def prep_sif(ds):
    """Preprocess a SIF Lite file"""
    
    # Drop unused variables
    variable_list = ["Daily_SIF_740nm", "SIF_Uncertainty_740nm", "Quality_Flag", "Longitude", "Latitude", "Delta_Time"]
    ds = ds[variable_list]
    
    # Apply quality filters
    ds["SIF_plus_3sig"] = ds.Daily_SIF_740nm + 3*ds.SIF_Uncertainty_740nm
    ds = ds.where(ds.Quality_Flag != 2, drop=True)
    ds = ds.where(ds.SIF_plus_3sig > 0, drop=True)

    # Format dataset
    dsnew = xarray.Dataset(
        {
            "sif": (["time"], ds.Daily_SIF_740nm),
        },
        coords={
            "lon": (["time"], ds.Longitude),
            "lat": (["time"], ds.Latitude),
            "time": ds.Delta_Time.values
        }
    )
    return dsnew


def prep_xco2(ds):
    """Preprocess an FP Lite file"""
    
    # Drop unused variables
    variable_list = ["xco2", "xco2_quality_flag", "longitude", "latitude", "time"]
    ds = ds[variable_list]
    
    # Apply quality filters
    ds = ds.where(ds.xco2_quality_flag == 0, drop=True)

    # Format dataset
    dsnew = xarray.Dataset(
        {
            "xco2": (["time"], ds.xco2),
        },
        coords={
            "lon": (["time"], ds.longitude),
            "lat": (["time"], ds.latitude),
            "time": ds.time.values
        }
    )
    return dsnew

In [None]:
## test with just SIF first, then do xco2; chunks={"sounding_dim": 5000}, 

# Open datasets for all years
paths_sif = "/data/OCO-2/OCO2_L2_Lite_SIF.10r/*/*.nc4"
paths_xco2 = "/data/OCO-2/OCO2_L2_Lite_FP.10r/*/*.nc4"

DS_sif = xarray.open_mfdataset(paths_sif, preprocess=prep_sif, 
                               parallel=True)
# DS_xco2 = xarray.open_mfdataset(paths_xco2, preprocess=prep_xco2, 
#                                 chunks={"sounding_id": 5000}, parallel=True)

distributed.scheduler - ERROR - Couldn't gather keys {"('copy-56f6a260e33798530828da688c8693fe', 0)": []} state: ['erred'] workers: []
NoneType: None


## Monthly time series comparison

In [None]:
# Compute monthly climatologies
df_sif = DS_sif.sif.resample(time="1MS").mean(dim="time").to_dataframe().drop_na()
df_xco2 = DS_xco2.xco2.resample(time="1MS").mean(dim="time").to_dataframe().drop_na()

# Merge dataframes
df = pd.merge(df_sif, df_xco2, how="inner", on=["lon", "lat", "time"]).reset_index()
df["time"] = df.time.dt.strftime("%Y-%m").sort_values(by="time")

df.head()