# 4-deg (latitude) x 5-deg (longitude) monthly average gridded datasets for SIF and XCO2 over Continental US
---

In [1]:
import sys
sys.path.insert(0, "../src")

import pandas as pd 
import xarray as xr

from krige_tools import land_grid
from stat_tools import apply_detrend

In [2]:
# collect gridded dataset, standardize, and establish a common domain
df_grid = (
    pd.DataFrame(
        land_grid(
            lon_res=5, lat_res=4, lon_lwr=-125, lon_upr=-65, lat_lwr=12, lat_upr=52
        ))
    .rename(columns={0:"lat", 1:"lon"})
    .assign(land=lambda x:1)
    .set_index(["lon", "lat"])
)

## Detrended, .csv format

In [3]:
# NOTE: no longer standardizing across all time; within year only for now
def detrend_standardize_da(da):
    da, _ = apply_detrend(da)
    return (da - da.mean(dim="time")) / da.std(dim="time")

with xr.open_dataset("../data/exp_pro/OCO2_4x5deg_monthly.nc") as ds:
    ds = ds[["sif", "xco2"]]
    ds["sif_res"], _ = apply_detrend(ds.sif)
    ds["xco2_res"], _ = apply_detrend(ds.xco2)

df = ds.to_dataframe().join(df_grid).dropna(subset=["land"]).reset_index()
df

Unnamed: 0,lat,lon,time,sif,xco2,sif_res,xco2_res,land
0,14.0,-97.5,2014-09-01,0.803455,396.098663,0.420434,-2.124670,1.0
1,14.0,-97.5,2014-10-01,0.455865,396.420898,0.071911,-2.008693,1.0
2,14.0,-97.5,2014-11-01,0.350532,396.969116,-0.034354,-1.666734,1.0
3,14.0,-97.5,2014-12-01,0.317786,397.476471,-0.068032,-1.365638,1.0
4,14.0,-97.5,2015-01-01,0.257684,399.399750,-0.129067,0.351382,1.0
...,...,...,...,...,...,...,...,...
6970,50.0,-67.5,2020-07-01,0.423440,408.175934,0.280883,-4.509980,1.0
6971,50.0,-67.5,2020-08-01,0.294169,407.331543,0.151573,-5.576024,1.0
6972,50.0,-67.5,2020-09-01,0.214446,407.096710,0.071811,-6.032509,1.0
6973,50.0,-67.5,2020-10-01,0.067612,411.292786,-0.075062,-2.058087,1.0


In [4]:
df_conus = df[["lat", "lon", "time", "sif_res", "xco2_res"]]
df_conus.to_csv("../data/exp_pro/OCO2_4x5deg_monthly_conus_detrended.csv", index=False)

## Raw, .nc format

In [5]:
ds = xr.open_dataset("../data/exp_pro/OCO2_4x5deg_monthly.nc")
df = ds.to_dataframe().join(df_grid).dropna(subset=["land"]).reset_index()
ds_conus = df.drop(columns=["land"]).set_index(["lon", "lat", "time"]).to_xarray()
ds_conus

In [6]:
ds_conus.to_netcdf("../data/exp_pro/OCO2_4x5deg_monthly_conus.nc", format="NETCDF4")
ds_conus.close()