# Creating a monthly NEE dataset, using CarbonTracker model

This notebook outlines the workflow for creating a monthly NEE dataset. You first need to have run the CarbonTracker workflow to train and create the model. 

To create the dataset you need:

- CarbonTracker model
- ERA5 (monthly)
- ERA5-land (monthly)
- SPEI (monthly)
- MODIS (monthly)
- Biomass (yearly)
- Copernicus Landcover (yearly)


In [1]:
from pathlib import Path

import xarray as xr
from dask.distributed import Client

import excited_workflow
from excited_workflow import monthly_nee
from excited_workflow.source_datasets import datasets


client = Client()

In [None]:
cb_file= Path("/data/volume_2/EXCITED_prepped_data/CT2022.flux1x1-monthly.nc")
regions_file = Path("/data/volume_2/EXCITED_prepped_data/regions.nc")
home_path = Path.home()
model_dir = sorted(list(home_path.rglob("carbon_tracker-*")))[-1]
data_dir = home_path / "data/volume_2"

desired_data = [
    "biomass",
    "spei",
    "modis",
    "era5_monthly",
    "era5_land_monthly",
    "copernicus_landcover"
]

x_keys = ["d2m", "mslhf", "msshf", "ssr", "str", "t2m", "spei", "NIRv", "skt",
            "stl1", "swvl1", "lccs_class"]
y_key = "bio_flux_opt"

In [None]:
ds_cb = xr.open_dataset(cb_file)
ds_cb = excited_workflow.utils.convert_timestamps(ds_cb)
ds_regions = xr.open_dataset(regions_file)
ds_input = xr.merge(
    [
        datasets[name].load(freq="monthly", target_grid=ds_cb)
        for name in desired_data
    ]
)

In [None]:
ds_regions = xr.open_dataset(regions_file)
ds_cb = excited_workflow.utils.convert_timestamps(ds_cb)

In [None]:
dfs, times = monthly_nee.get_predictions(ds_input, x_keys, ds_regions, 
                                                         model_dir)

In [None]:
monthly_nee.create_dataset(dfs, times, data_dir)