# Create inference data objects for model fitting

26.2.2024

In [22]:
import pandas as pd
from dotenv import load_dotenv; load_dotenv()
from datetime import datetime, timedelta
import os
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr

%matplotlib widget
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [23]:
SOILCLASSIFICATION_DATA = os.getenv('SOILCLASSIFICATION_DATA')
ONEDRIVE_FOLDER = os.getenv('ONEDRIVE_FOLDER')

In [24]:
data = xr.open_dataset(f'{ONEDRIVE_FOLDER}/ec_biomet_wtd_2022_20240607.nc')

In [25]:
data

In [26]:
variables = ['n2o_flux_final', 'ch4_flux_final', 'co2_flux_final', 'TA_1_1_1']

In [27]:
data_fit = data[variables]

In [28]:
soil_classes = data.soil_class.to_numpy()

In [29]:
for i,soil_class in enumerate(soil_classes):
    data_fit[soil_class] = (["time"], data.fpr_contribution[i, :].to_numpy())

In [30]:
data_df = data_fit.to_dataframe()

## Choose only the snow free period

In [31]:
snow_cover_end = datetime(2022,5,1)
snow_cover_start = datetime(2022,11,16)

In [32]:
data_df = data_df.loc[snow_cover_end:snow_cover_start, :]

In [33]:
data_df.head()

Unnamed: 0_level_0,n2o_flux_final,ch4_flux_final,co2_flux_final,TA_1_1_1,Fallen tree,Dry branch,Exposed peat,Light moss,Dark moss,Green vegetation,Ditch,Tree,Covered ditch,Instruments
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2022-05-01 00:00:00,,,,-0.7717,,,,,,,,,,
2022-05-01 00:30:00,,,,-1.2116,,,,,,,,,,
2022-05-01 01:00:00,,,,-1.2247,,,,,,,,,,
2022-05-01 01:30:00,,,,-1.1864,,,,,,,,,,
2022-05-01 02:00:00,,,,-0.9068,,,,,,,,,,


## Rename columns

In [34]:
data_df = data_df.rename(columns={'TA_1_1_1':'T_air','n2o_flux_final':'F_N2O','ch4_flux_final':'F_CH4','co2_flux_final':'F_CO2'})

In [35]:
data_df

Unnamed: 0_level_0,F_N2O,F_CH4,F_CO2,T_air,Fallen tree,Dry branch,Exposed peat,Light moss,Dark moss,Green vegetation,Ditch,Tree,Covered ditch,Instruments
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2022-05-01 00:00:00,,,,-0.7717,,,,,,,,,,
2022-05-01 00:30:00,,,,-1.2116,,,,,,,,,,
2022-05-01 01:00:00,,,,-1.2247,,,,,,,,,,
2022-05-01 01:30:00,,,,-1.1864,,,,,,,,,,
2022-05-01 02:00:00,,,,-0.9068,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-11-15 22:00:00,0.000404,-0.000115,0.787602,-1.1274,0.174526,0.051282,0.322551,0.113539,0.007851,0.162817,0.055233,0.047430,0.026839,0.000135
2022-11-15 22:30:00,0.000555,0.000222,0.790709,-1.1688,0.176000,0.050487,0.337351,0.118190,0.007987,0.158843,0.046099,0.050064,0.020141,0.000033
2022-11-15 23:00:00,0.000471,-0.000249,0.755470,-1.2595,0.185566,0.055066,0.305336,0.124700,0.007887,0.158282,0.050083,0.046426,0.025571,0.000126
2022-11-15 23:30:00,0.000542,0.000189,0.794953,-1.3767,0.196112,0.055228,0.310219,0.133731,0.007449,0.145732,0.047484,0.040942,0.024493,0.000171


## Add logarithm columns of GHG fluxes

In [36]:
data_df = data_df.assign(F_N2O_log = np.log((data_df.F_N2O*1000 - np.floor(data_df.F_N2O.min()*1000))))
data_df = data_df.assign(F_CH4_log = np.log((data_df.F_CH4*1000 - np.floor(data_df.F_CH4.min()*1000))))
data_df = data_df.assign(F_CO2_log = np.log((data_df.F_CO2*1000 - np.floor(data_df.F_CO2.min()*1000))))

In [37]:
np.floor(data_df.F_N2O.min()*1000)

0.0

In [38]:
np.floor(data_df.F_CH4.min()*1000)

-10.0

In [39]:
np.floor(data_df.F_CO2.min()*1000)

-7582.0

## Add combined columns for surface types

In [40]:
data_df = data_df.assign(logging_residue = data_df.loc[:, 'Fallen tree'] + data_df.loc[:, 'Dry branch'])
data_df = data_df.assign(green_vegetations = data_df.loc[:, 'Green vegetation'] + data_df.loc[:, 'Tree'])
data_df = data_df.assign(ditches = data_df.loc[:, 'Ditch'] + data_df.loc[:, 'Covered ditch'])
data_df = data_df.assign(ground_residue_and_vegetation = data_df.loc[:, 'Fallen tree'] + data_df.loc[:, 'Dry branch'] + data_df.loc[:, 'Green vegetation'] + data_df.loc[:, 'Tree'] + data_df.loc[:,'Light moss'])

## Set T_air to Kelvin

In [41]:
data_df = data_df.assign(T_air_K = data_df.T_air.values + 273.15)

## Save data (uncomment)

In [42]:
data_df_n2o_save = data_df.loc[:, ['F_N2O', 'F_N2O_log', 'T_air_K', 'T_air', 'Fallen tree', 'Dry branch',
       'Exposed peat', 'Light moss', 'Dark moss', 'Green vegetation', 'Ditch',
       'Tree', 'Covered ditch', 'logging_residue', 'green_vegetations', 'ditches',
       'ground_residue_and_vegetation']]
data_df_n2o_save = data_df_n2o_save.rename(columns={'F_N2O_log': 'F_N2O_ln','logging_residue':'Logging residue', 'green_vegetations': 'Green vegetation and trees', 'ditches': 'All ditches', 'ground_residue_and_vegetation': 'Residue, ground and vegetation'})
data_df_n2o_save = data_df_n2o_save.dropna(axis=0)
data_df_n2o_save.to_csv('data/inference_data_n2o.csv')

data_df_ch4_save = data_df.loc[:, ['F_CH4', 'F_CH4_log', 'T_air_K', 'T_air', 'Fallen tree', 'Dry branch',
       'Exposed peat', 'Light moss', 'Dark moss', 'Green vegetation', 'Ditch',
       'Tree', 'Covered ditch', 'logging_residue', 'green_vegetations', 'ditches',
       'ground_residue_and_vegetation']]
data_df_ch4_save = data_df_ch4_save.rename(columns={'F_CH4_log': 'F_CH4_ln','logging_residue':'Logging residue', 'green_vegetations': 'Green vegetation and trees', 'ditches': 'All ditches', 'ground_residue_and_vegetation': 'Residue, ground and vegetation'})
data_df_ch4_save = data_df_ch4_save.dropna(axis=0)
data_df_ch4_save.to_csv('data/inference_data_ch4.csv')