In [1]:
import pandas as pd
from covariate_tools import (
    format_height,
    format_biomass,
    format_cover,
    format_lai,
    format_microb,
    format_roots,
)


### A number of preprocessing steps for different covariate products from the Jena Experiment


In [None]:
# REPLACE to your data origin
d_path = "/home/datasets4/stein/jena_experiment_data_raw/"
out_path = "/home/datasets4/stein/jena_experiment_data_various_products/covariates_processed/"

In [23]:
# covariables. These were provded by Yuanyuan Huang and are likely raw products from Jexis.
biomass = pd.read_csv(
    d_path + "/jena_experiment/covariables/biomass_main_03to20.csv"
)
cover = pd.read_csv(
    d_path + "/jena_experiment/covariables/cover_main_2003to2020.csv"
)
height = pd.read_csv(
    d_path + "/jena_experiment/covariables/height_main_2003to2020.csv"
)
lai = pd.read_csv(
    d_path + "/jena_experiment/covariables/LAI_main_2003to2020.csv"
)
roots = pd.read_csv(
    d_path + "/jena_experiment/covariables/root_biomass_main_2003to2017.csv"
)
microb = pd.read_csv(
    d_path + "/jena_experiment/covariables/soil respiration and microbial biomass2003to2020.csv",
    sep=";",
)
# spei
spei = pd.read_csv(
    d_path + "/supplements_jena_experiment/spei_JE_2003to2021.csv"
)

# Double check the realized diversity

In [24]:
compare = pd.read_csv(d_path +"/jena_experiment/jexis_realized_diversity/487_3_data.csv")

In [25]:
# realized diversity from the database
a = compare[(compare["season"] == "August")].merge(
    cover, on=["year", "plotcode"], how="outer"
)
a = a.sort_values(["year", "plotcode"])
a = a[a["year"] > 2003]

# Format

In [26]:
height = format_height(height)
biomass = format_biomass(biomass)
lai = format_lai(lai)
microb = format_microb(microb)
roots = format_roots(roots)
cover = format_cover(cover)

In [None]:
height.to_csv(out_path + "height.csv", index=False)
biomass.to_csv(out_path + "biomass.csv", index=False)
lai.to_csv(out_path + "lai.csv", index=False)
microb.to_csv(out_path + "microb.csv", index=False)
roots.to_csv(out_path + "roots.csv", index=False)
cover.to_csv(out_path + "cover.csv", index=False)

In [None]:
# Just keep the yearly index
spei = spei[spei["month"] == 12]
spei = spei[["year"] + [x for x in spei.columns if "12" in x]].reset_index(drop=True)
spei.to_csv(out_path + "spei.csv", index=False)