In [1]:
import pandas as pd
from covariate_tools import load_management, format_management, rejoin_intervals

## Management data preprocessing

In [None]:
# REPLACE to your data origin
base_path = "/home/datasets4/stein"
d_path = f"{base_path}/jena_experiment_data_raw"
out_path = f"{base_path}/jena_experiment_data_various_products/covariates_processed"

In [3]:
# General management periods.
processed = {}
d = load_management(
    window_filter=14,
    p=d_path + "/field_management/138_4_Dataset/138_4_data.csv",
    p2=d_path + "/field_management/136_4_Dataset/136_4_data.csv",
)
for event in ["weeding", "mowing", "biomass harvest"]:
    d_sub = format_management(d, cat=event, verbose=0)
    d_sub = rejoin_intervals(d_sub, window_filter=8)
    processed[event] = d_sub
    d_sub.to_csv(
        out_path  + event + ".csv", index=False
    )

In [5]:
# transform to weeding date_stamps.

biomass = pd.read_csv(out_path + "biomass harvest.csv")
weeding = pd.read_csv(out_path + "weeding.csv")
mowing = pd.read_csv(out_path + "mowing.csv")

In [6]:
def transform_management_preliminary(d, name="n", drop=2):
    d["start"] = pd.to_datetime(d["start"])
    d["end"] = pd.to_datetime(d["end"])
    d["year"] = d["start"].dt.year
    d["month"] = d["end"].dt.month
    d["start"] = d["start"].dt.dayofyear
    d["end"] = d["end"].dt.dayofyear
    d["timestamp"] = d[["start", "end"]].mean(axis=1)
    d["duration"] = d["end"] - d["start"]
    d.drop(columns=["start", "end"], inplace=True)
    d = d[~(d["duration"] <= drop)]

    stack = []
    for y in d.year.unique():
        stack.append(
            [y]
            + list(
                d[d.year == y]
                .sort_values("duration", ascending=False)[:2][["timestamp", "duration"]]
                .sort_values("timestamp")
                .values.flatten()
            )
        )
    return pd.DataFrame(
        stack[1:],
        columns=[
            "year",
            "timestamp1_" + name,
            "duration1_" + name,
            "timestamp2_" + name,
            "duration2_" + name,
        ],
    )

In [7]:
biomass = transform_management_preliminary(biomass, name="b", drop=0)
weeding = transform_management_preliminary(weeding, name="w", drop=2)
mowing = transform_management_preliminary(mowing, name="m", drop=0)

In [7]:
biomass.to_csv(
    out_path + "biomass_preliminary.csv", index=False
)
weeding.to_csv(
    out_path + "weeding_preliminary.csv", index=False
)
mowing.to_csv(
    out_path + "mowing_preliminary.csv", index=False
)

In [8]:
# Intensity
a = pd.read_csv(d_path + "weeding/190_7_data.csv", sep=";")
b = pd.read_csv(d_path + "weeding/191_6_data.csv", sep=";")
weeding = pd.concat([a, b])

In [9]:
weeding["spring_weedHour"] = weeding["spring_weedHour"] / weeding["area"]
weeding["summer_weedHour"] = weeding["summer_weedHour"] / weeding["area"]
weeding["autumn_weedHour"] = weeding["autumn_weedHour"] / weeding["area"]
weeding = weeding.drop(columns=["area"])
weeding.to_csv(
    out_path + "weeding_intensity.csv",
    index=False,
)