# Demo Variogram with micro-lags

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.insert(0, "../src")

In [3]:
import numpy as np
import pandas as pd
import xarray as xr

import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import cartopy.crs as ccrs

import krige_tools
import fields
import variogram as vgm

In [102]:
## MOVE TO SCRIPT

def set_main_lon(lon_lwr=-125, lon_upr=-65, lon_res=5):
    lon_bins = np.arange(lon_lwr, lon_upr + lon_res, lon_res)
    lon_centers = (lon_bins[1:] + lon_bins[:-1]) / 2
    return lon_bins, lon_centers

def get_main_lon(ds, lon_centers):
    # input: xarray dataarray and numpy array
    # returns: dataarray
    return (
        ds
        .to_dataframe()
        .reset_index()
        .merge(pd.DataFrame({"lon": lon_centers}), on="lon", how="inner")
        .set_index(["lon", "lat", "time"])
        .to_xarray()
    )

def microlag_clouds(df_group, fast_dist=True):
    coords = df_group[["lat", "lon"]]
    dist = krige_tools.distance_matrix(coords, coords, fast_dist=fast_dist)
    values_xco2 = df_group.xco2.values
    values_sif = df_group.sif.values
    
    cloud_xco2 = variogram_cloud(dist, values_xco2)
    cloud_sif = variogram_cloud(dist, values_sif)
    cloud_cross = variogram_cloud(dist, values_xco2, values2=values_sif)
    
    return cloud_xco2, cloud_sif, cloud_cross

In [4]:
ds = xr.open_dataset("../data/exp_pro/OCO2_4x5deg_monthly_conus_microlag.nc")

# Format as multi-fields
ds_xco2 = ds[["xco2", "xco2_var"]]
ds_sif = ds[["sif", "sif_var"]]

var_names = ["xco2", "sif"]
cross_name = "xco2:sif"

In [97]:
# Plug this setup into MF
get_main_lon(ds_xco2.sel(time="2018-08-01"), set_main_lon()[1])

In [100]:
# Separately, get variogram values for microlags
lon_bins, lon_centers = set_main_lon()
df_micro = ds.sel(time="2016-06-01").to_dataframe().reset_index().drop(columns=["xco2_var", "sif_var", "time"])
df_micro["lon_group"] = pd.cut(
        df_micro["lon"], lon_bins, labels=lon_centers, include_lowest=True
    )
df_micro.head(10)

Unnamed: 0,lat,lon,xco2,sif,lon_group
0,14.0,-123.5,405.359772,,-122.5
1,14.0,-123.0,405.330994,,-122.5
2,14.0,-122.5,405.006256,,-122.5
3,14.0,-122.0,405.023743,,-122.5
4,14.0,-121.5,404.998077,,-122.5
5,14.0,-118.5,404.837952,,-117.5
6,14.0,-118.0,404.822113,,-117.5
7,14.0,-117.5,404.719696,,-117.5
8,14.0,-117.0,405.099792,,-117.5
9,14.0,-116.5,405.424744,,-117.5


In [None]:
# Apply microlag_clouds across lon_groups, concat each cloud type into it's own df (3 total), then append these dfs in empirical_variogram

In [42]:
# Analysis variables
month = 8
timedelta = -2
n_bins = 50
fast_dist = True

timestamps = np.array(ds.time[ds.time.dt.month == month].values, dtype="datetime64[D]")

Dig into actual variogram calculation... can we seperate out the indices at microlags vs the indices at the regular longitudes (within half max dist)?