In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv("datasets/Aeschi_2019-20.csv")
df.columns

Index(['TIMESTAMP', 'TA', 'RH', 'PA', 'Rg', 'PREC', 'SWC_0.05', 'SWC_0.15',
       'SWC_0.3', 'WFPS_0.05', 'WFPS_0.15', 'WFPS_0.3', 'TS_0.05', 'TS_0.15',
       'TS_0.3', 'PREC_rmean6', 'SWC_0.05_rmean6', 'SWC_0.15_rmean6',
       'SWC_0.3_rmean6', 'WFPS_0.05_rmean6', 'WFPS_0.15_rmean6',
       'WFPS_0.3_rmean6', 'TS_0.05_rmean6', 'TS_0.15_rmean6', 'TS_0.3_rmean6',
       'PREC_rmean6.6', 'PREC_rmean6.12', 'PREC_rmean6.18',
       'SWC_0.05_rmean6.6', 'SWC_0.05_rmean6.12', 'SWC_0.05_rmean6.18',
       'SWC_0.15_rmean6.6', 'SWC_0.15_rmean6.12', 'SWC_0.15_rmean6.18',
       'SWC_0.3_rmean6.6', 'SWC_0.3_rmean6.12', 'SWC_0.3_rmean6.18',
       'WFPS_0.05_rmean6.6', 'WFPS_0.05_rmean6.12', 'WFPS_0.05_rmean6.18',
       'WFPS_0.15_rmean6.6', 'WFPS_0.15_rmean6.12', 'WFPS_0.15_rmean6.18',
       'WFPS_0.3_rmean6.6', 'WFPS_0.3_rmean6.12', 'WFPS_0.3_rmean6.18',
       'TS_0.05_rmean6.6', 'TS_0.05_rmean6.12', 'TS_0.05_rmean6.18',
       'TS_0.15_rmean6.6', 'TS_0.15_rmean6.12', 'TS_0.15_rmean6.18',

In [3]:
# filter out rows without a response variable
df_no_na = df[df["N2O_flag0_ustar"].notna()]

cols = [
    # Target
    "N2O_flag0_ustar",

    # Timestamp
    "TIMESTAMP",
    
    # Predictors
    "NEE_f",
    "GPP_f",
    "Reco_f",
    "Rg",
    "TA",
    "PREC",
    "VPD",                     # Vapor pressure deficit
    "SWC_0.05",
    "SWC_0.15",
    "SWC_0.3",
    "TS_0.05",
    "TS_0.15",
    "TS_0.3",
    "harvest",
]

aeschi_19_20 = df_no_na[cols]

rename_map = {
    "N2O_flag0_ustar": "N2O_Flux",
    "TIMESTAMP": "Timestamp",
    "NEE_f": "NEE",            # Net CO₂ exchange between field and atmosphere (positive = CO₂ released, negative = CO₂ uptake)
    "GPP_f": "GPP",            # Total CO₂ fixed by photosynthesis (proxy for plant growth)
    "Reco_f": "RECO",          # CO₂ emitted via respiration of plants and soil organisms
    "Rg": "SolarRadiation",
    "TA": "AirTemp",
    "PREC": "Precipitation",
    "SWC_0.05": "SoilWater_5cm",
    "SWC_0.15": "SoilWater_15cm",
    "SWC_0.3": "SoilWater_30cm",
    "TS_0.05": "SoilTemp_5cm",
    "TS_0.15": "SoilTemp_15cm",
    "TS_0.3": "SoilTemp_30cm",
    "harvest": "Mowing",
}

aeschi = aeschi_19_20.rename(columns=rename_map)
aeschi["Timestamp"] = pd.to_datetime(aeschi["Timestamp"])

In [5]:
aeschi.head(5)

Unnamed: 0,N2O_Flux,Timestamp,NEE,GPP,RECO,SolarRadiation,AirTemp,Precipitation,VPD,SoilWater_5cm,SoilWater_15cm,SoilWater_30cm,SoilTemp_5cm,SoilTemp_15cm,SoilTemp_30cm,Mowing
29,-0.694262,2019-07-03 14:45:00,-1.334854,13.239877,11.905023,737.666667,26.133333,0.0,1699.23,19.41092,23.84575,20.0223,27.0,25.0,23.8,0.0
30,0.600286,2019-07-03 15:15:00,-9.021352,20.95064,11.929288,663.666667,26.2,0.0,1702.54,19.41092,23.84575,20.00332,26.9,25.133333,23.866667,0.0
31,0.223581,2019-07-03 15:45:00,-3.687444,15.613504,11.926059,504.333333,26.2,0.0,1716.11,19.301757,23.84575,20.015973,26.466667,25.266667,23.966667,0.0
32,0.477189,2019-07-03 16:15:00,-1.181895,13.063521,11.881626,418.333333,26.1,0.0,1703.75,19.27605,23.851513,19.996993,26.166667,25.4,24.033333,0.0
33,0.580062,2019-07-03 16:45:00,-1.327793,13.082796,11.755004,297.333333,25.8,0.0,1649.49,19.22456,23.851513,20.022297,26.0,25.4,24.1,0.0


In [7]:
aeschi.to_csv("datasets/Aeschi_2019-20_clean.csv")

In [8]:


df_1 = pd.read_csv("datasets/Oensingen_2018-19.csv")
df_1.columns



Index(['TIMESTAMP', 'TA', 'RH', 'PA', 'Rg', 'PREC', 'SWC_0.05', 'SWC_0.15',
       'SWC_0.3', 'WFPS_0.05', 'WFPS_0.15', 'WFPS_0.3', 'TS_0.05', 'TS_0.15',
       'TS_0.3', 'PREC_rmean6', 'SWC_0.05_rmean6', 'SWC_0.15_rmean6',
       'SWC_0.3_rmean6', 'WFPS_0.05_rmean6', 'WFPS_0.15_rmean6',
       'WFPS_0.3_rmean6', 'TS_0.05_rmean6', 'TS_0.15_rmean6', 'TS_0.3_rmean6',
       'PREC_rmean6.6', 'PREC_rmean6.12', 'PREC_rmean6.18',
       'SWC_0.05_rmean6.6', 'SWC_0.05_rmean6.12', 'SWC_0.05_rmean6.18',
       'SWC_0.15_rmean6.6', 'SWC_0.15_rmean6.12', 'SWC_0.15_rmean6.18',
       'SWC_0.3_rmean6.6', 'SWC_0.3_rmean6.12', 'SWC_0.3_rmean6.18',
       'WFPS_0.05_rmean6.6', 'WFPS_0.05_rmean6.12', 'WFPS_0.05_rmean6.18',
       'WFPS_0.15_rmean6.6', 'WFPS_0.15_rmean6.12', 'WFPS_0.15_rmean6.18',
       'WFPS_0.3_rmean6.6', 'WFPS_0.3_rmean6.12', 'WFPS_0.3_rmean6.18',
       'TS_0.05_rmean6.6', 'TS_0.05_rmean6.12', 'TS_0.05_rmean6.18',
       'TS_0.15_rmean6.6', 'TS_0.15_rmean6.12', 'TS_0.15_rmean6.18',

In [9]:
# filter out rows without a response variable
df_1_no_na = df_1[df_1["N2O_flag0_ustar"].notna()]

cols = [
    # Target
    "N2O_flag0_ustar",

    # Timestamp
    "TIMESTAMP",
    
    # Predictors
    "NEE_f",
    "GPP_f",
    "Reco_f",
    "Rg",
    "TA",
    "PREC",
    "VPD",                     # Vapor pressure deficit
    "SWC_0.05",
    "SWC_0.15",
    "SWC_0.3",
    "TS_0.05",
    "TS_0.15",
    "TS_0.3",
    "harvest",
    "Norg",
    "Nmin",
    "soil"
]

oensingen_18_19 = df_1_no_na[cols]

rename_map = {
    "N2O_flag0_ustar": "N2O_Flux",
    "TIMESTAMP": "Timestamp",
    "NEE_f": "NEE",            # Net CO₂ exchange between field and atmosphere (positive = CO₂ released, negative = CO₂ uptake)
    "GPP_f": "GPP",            # Total CO₂ fixed by photosynthesis (proxy for plant growth)
    "Reco_f": "RECO",          # CO₂ emitted via respiration of plants and soil organisms
    "Rg": "SolarRadiation",
    "TA": "AirTemp",
    "PREC": "Precipitation",
    "SWC_0.05": "SoilWater_5cm",
    "SWC_0.15": "SoilWater_15cm",
    "SWC_0.3": "SoilWater_30cm",
    "TS_0.05": "SoilTemp_5cm",
    "TS_0.15": "SoilTemp_15cm",
    "TS_0.3": "SoilTemp_30cm",
    "harvest": "Mowing",
    "Norg": "FertilizerOrganic",
    "Nmin": "FertilizerMineral",
    "soil": "SoilCultivation",
}

oensingen_1 = oensingen_18_19.rename(columns=rename_map)
oensingen_1["Timestamp"] = pd.to_datetime(oensingen_1["Timestamp"], dayfirst=True)

In [10]:


oensingen_1.head(5)



Unnamed: 0,N2O_Flux,Timestamp,NEE,GPP,RECO,SolarRadiation,AirTemp,Precipitation,VPD,SoilWater_5cm,SoilWater_15cm,SoilWater_30cm,SoilTemp_5cm,SoilTemp_15cm,SoilTemp_30cm,Mowing,FertilizerOrganic,FertilizerMineral,SoilCultivation
15,0.333021,2018-07-12 07:45:00,-4.00684,7.490222,3.483382,447.525897,16.313117,0.0,6.105563,19.153554,31.72587,25.273149,15.503667,16.377333,18.057333,0.0,0.0,0.0,0.0
17,0.506338,2018-07-12 08:45:00,-0.628629,4.478102,3.849473,613.816676,18.237812,0.0,8.232653,19.297162,31.702366,25.254741,16.670741,16.673704,17.951852,1.0,0.0,0.0,0.0
18,0.626398,2018-07-12 09:15:00,0.345373,3.704597,4.04997,686.58091,19.261472,0.0,9.885997,19.383642,31.697955,25.230584,17.264,16.901,17.904,1.0,0.0,0.0,0.0
19,0.440995,2018-07-12 09:45:00,-3.51157,7.716858,4.205288,753.081073,20.051294,0.0,11.443435,19.451197,31.681595,25.196959,17.88,17.159667,17.888,1.0,0.0,0.0,0.0
21,0.46309,2018-07-12 10:45:00,-4.183495,8.63445,4.450955,857.719799,21.299094,0.0,12.928314,19.607464,31.645481,25.160241,19.475667,17.803,17.89,1.0,0.0,0.0,0.0


In [11]:


oensingen_1.to_csv("datasets/Oensingen_2018-19_clean.csv")



In [12]:


# Oensingen 2021-23 dataset
df_2 = pd.read_csv("datasets/Oensingen_2021-23.csv")
df_2.columns



  df_2 = pd.read_csv("datasets/Oensingen_2021-23.csv")


Index(['TIMESTAMP_MIDDLE', 'AIR_CP', 'AIR_DENSITY', 'AIR_MV', 'AIR_RHO_CP',
       'AOA_METHOD', 'AXES_ROTATION_METHOD', 'BADM_HEIGHTC',
       'BADM_INSTPAIR_EASTWARD_SEP_GA_CH4',
       'BADM_INSTPAIR_EASTWARD_SEP_GA_CO2',
       ...
       'GPP_U16_f_reddyproc', 'NEE_U50_f_reddyproc', 'Reco_U50_reddyproc',
       'GPP_U50_f_reddyproc', 'NEE_U84_f_reddyproc', 'Reco_U84_reddyproc',
       'GPP_U84_f_reddyproc', 'LE_f_reddyproc', 'H_f_reddyproc',
       'ET_f_reddyproc'],
      dtype='object', length=730)

In [13]:
# filter out rows without a response variable
df_2_no_na = df_2[df_2["FN2O_L3.1_L3.3_CUT_50_QCF0"].notna()]

cols = [
    # Target
    "FN2O_L3.1_L3.3_CUT_50_QCF0",

    # Timestamp
    "TIMESTAMP_MIDDLE",
    
    # Predictors
    "NEE_L3.1_L3.3_CUT_50_QCF_gfRF",
    "GPP_U50_f_reddyproc",
    "Reco_U50_reddyproc",
    "SW_IN",
    "TA",
    "PREC",
    "VPD",                     # Vapor pressure deficit
    "SWC_0.05",
    "SWC_0.15",
    "SWC_0.3",
    "TS_0.05",
    "TS_0.15",
    "TS_0.3",
    "MGMT_HARVEST",
    "MGMT_FERT_ORG",
    "MGMT_FERT_MIN",
    "MGMT_SOILCULTIVATION"
]

oensingen_21_23 = df_2_no_na[cols]

rename_map = {
    "FN2O_L3.1_L3.3_CUT_50_QCF0": "N2O_Flux",
    "TIMESTAMP_MIDDLE": "Timestamp",
    "NEE_L3.1_L3.3_CUT_50_QCF_gfRF": "NEE",    # Net CO₂ exchange between field and atmosphere (positive = CO₂ released, negative = CO₂ uptake)
    "GPP_U50_f_reddyproc": "GPP",            # Total CO₂ fixed by photosynthesis (proxy for plant growth)
    "Reco_U50_reddyproc": "RECO",            # CO₂ emitted via respiration of plants and soil organisms
    "SW_IN": "SolarRadiation",
    "TA": "AirTemp",
    "PREC": "Precipitation",
    "SWC_0.05": "SoilWater_5cm",
    "SWC_0.15": "SoilWater_15cm",
    "SWC_0.3": "SoilWater_30cm",
    "TS_0.05": "SoilTemp_5cm",
    "TS_0.15": "SoilTemp_15cm",
    "TS_0.3": "SoilTemp_30cm",
    "MGMT_HARVEST": "Mowing",
    "MGMT_FERT_ORG": "FertilizerOrganic",
    "MGMT_FERT_MIN": "FertilizerMineral",
    "MGMT_SOILCULTIVATION": "SoilCultivation",
}

oensingen_2 = oensingen_21_23.rename(columns=rename_map)
oensingen_2["Timestamp"] = pd.to_datetime(oensingen_2["Timestamp"], dayfirst=True)

  oensingen_2["Timestamp"] = pd.to_datetime(oensingen_2["Timestamp"], dayfirst=True)


In [14]:
oensingen_2.head(5)

Unnamed: 0,N2O_Flux,Timestamp,NEE,GPP,RECO,SolarRadiation,AirTemp,Precipitation,VPD,SoilWater_5cm,SoilWater_15cm,SoilWater_30cm,SoilTemp_5cm,SoilTemp_15cm,SoilTemp_30cm,Mowing,FertilizerOrganic,FertilizerMineral,SoilCultivation
29113,-0.33052,2022-08-30 11:45:00,-16.411145,19.591447,7.165971,687.529222,24.553782,0.0,1.034754,16.341716,11.396077,15.095581,21.643333,19.703333,19.237779,0.0,0.0,0.0,0.0
29116,-0.10105,2022-08-30 13:15:00,-9.83655,18.380349,7.651707,545.012785,26.430326,0.0,1.584009,16.730519,11.365016,15.092065,23.147222,19.947778,19.245,0.0,0.0,0.0,0.0
29117,-0.273075,2022-08-30 13:45:00,-10.141052,12.307045,7.681075,653.722075,27.39411,0.0,1.723843,16.753396,11.378849,15.089371,23.397778,20.078889,19.266666,0.0,0.0,0.0,0.0
29118,-0.214462,2022-08-30 14:15:00,-10.728642,8.4574,7.243798,745.734649,28.201293,0.0,1.9717,16.807034,11.390797,15.083162,23.77611,20.208888,19.287221,0.0,0.0,0.0,0.0
29120,0.371677,2022-08-30 15:15:00,-1.213602,8.977221,7.311176,289.319679,26.14786,0.0,1.452522,16.85881,11.403868,15.082458,24.241666,20.46,19.369445,0.0,0.0,0.0,0.0


In [16]:


oensingen_2.to_csv("datasets/Oensingen_2021-23_clean.csv")

