# Create 5-degree monthly average gridded dataset for SIF and XCO2 over Cont. US with land grid supplied
---

In [1]:
import sys
sys.path.insert(0, "../src")

import pandas as pd 
import xarray as xr

from krige_tools import land_grid
from stat_tools import apply_detrend

In [2]:
# collect gridded dataset, standardize, and establish a common domain
df_grid = (
    pd.DataFrame(
        land_grid(
            res=5, lon_lwr=-125, lon_upr=-65, lat_lwr=10, lat_upr=50
        ))
    .rename(columns={0:"lat", 1:"lon"})
    .assign(land=lambda x:1)
    .set_index(["lon", "lat"])
)

# NOTE: no longer standardizing across all time; within year only for now
def detrend_standardize_da(da):
    da, _ = apply_detrend(da)
    return (da - da.mean(dim="time")) / da.std(dim="time")

with xr.open_dataset("../data/exp_pro/OCO2_5deg_monthly.nc") as ds:
    ds = ds[["sif", "xco2"]]
    ds["sif_res"], _ = apply_detrend(ds.sif)
    ds["xco2_res"], _ = apply_detrend(ds.xco2)
#     ds["sif"] = detrend_standardize_da(ds.sif)
#     ds["xco2"] = detrend_standardize_da(ds.xco2)
#     # residuals
#     ds["sif_res"] = ds.sif - ds.sif.mean(dim="time")
#     ds["xco2_res"] = ds.xco2 - ds.xco2.mean(dim="time")

df = ds.to_dataframe().join(df_grid).dropna(subset=["land"]).reset_index()
# df["lat_lon"] = df[["lat", "lon"]].apply(tuple, axis=1)
df

Unnamed: 0,lat,lon,time,sif,xco2,sif_res,xco2_res,land
0,12.5,-92.5,2014-09-01,0.559724,393.029785,0.125447,-3.740133,1.0
1,12.5,-92.5,2014-10-01,0.588696,393.789459,0.154450,-3.207281,1.0
2,12.5,-92.5,2014-11-01,0.393522,396.065002,-0.040693,-1.158559,1.0
3,12.5,-92.5,2014-12-01,0.310282,397.139435,-0.123902,-0.310949,1.0
4,12.5,-92.5,2015-01-01,0.243238,397.303528,-0.190915,-0.373678,1.0
...,...,...,...,...,...,...,...,...
5845,47.5,-67.5,2020-07-01,0.644079,408.272064,0.411713,-4.123634,1.0
5846,47.5,-67.5,2020-08-01,0.463758,406.768005,0.231082,-5.845368,1.0
5847,47.5,-67.5,2020-09-01,0.294015,407.222992,0.061029,-5.608058,1.0
5848,47.5,-67.5,2020-10-01,0.113518,410.960052,-0.119777,-2.088674,1.0


In [3]:
df_conus = df[["lat", "lon", "time", "sif_res", "xco2_res"]]
df_conus.to_csv("../data/exp_pro/OCO2_5deg_monthly_conus.csv", index=False)