In [None]:
#MAE,RMSE per fpc*location
#MAPE,MAE,RMSE per crop*location

In [None]:
import numpy as np
import pandas as pd

# UNIVERSAL METRICS (DO NOT EDIT)

def _mae(y_true, y_pred):
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)
    return np.mean(np.abs(y_true - y_pred))


def _rmse(y_true, y_pred):
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)
    return np.sqrt(np.mean((y_true - y_pred) ** 2))


# MAPE excluding y_true == 0 (returns NaN if no non-zero actuals)
def _mape_excluding_zeros(y_true, y_pred, eps=1e-12):
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)
    mask = np.abs(y_true) > eps
    if mask.sum() == 0:
        return np.nan
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100


def grouped_metrics(df, group_cols, y_col, yhat_col, include_mape=True):
    needed = group_cols + [y_col, yhat_col]
    missing = [c for c in needed if c not in df.columns]
    if missing:
        raise ValueError(f"Missing columns in df: {missing}")

    tmp = df[needed].dropna(subset=[y_col, yhat_col]).copy()

    def _calc(g):
        y = g[y_col].values
        yhat = g[yhat_col].values

        out = {
            "MAE": _mae(y, yhat),
            "RMSE": _rmse(y, yhat),
            "n_rows": len(g),
        }

        if include_mape:
            out["MAPE_%"] = _mape_excluding_zeros(y, yhat)
            out["n_nonzero_actual"] = int((np.abs(y) > 1e-12).sum())

        return pd.Series(out)

    return (
        tmp
        .groupby(group_cols, dropna=False)
        .apply(_calc)
        .reset_index()
        .sort_values(group_cols)
        .reset_index(drop=True)
    )


def evaluate_model(df_out, cols):
    # FPC + Location: MAE, RMSE only (NO MAPE)
    fpc_location = grouped_metrics(
        df_out,
        group_cols=[cols["fpc"], cols["location"]],
        y_col=cols["actual"],
        yhat_col=cols["pred"],
        include_mape=False
    )

    # Crop_type + Location: MAE, RMSE, MAPE
    crop_location = grouped_metrics(
        df_out,
        group_cols=[cols["crop"], cols["location"]],
        y_col=cols["actual"],
        yhat_col=cols["pred"],
        include_mape=True
    )

    return fpc_location, crop_location

# !ONLY EDIT THIS SECTION!
# Everyone adds their model here.

#MODELS_TO_EVALUATE = [

    # ---------- Example template (copy-paste for each model) ----------
    {
        "model_name": "<<< FILL HERE: e.g., baseline(fpc/crop) / lasso(fpc/crop) / ridge(fpc/crop) / random_forest(fpc/crop) / boosted_trees (fpc/crop) >>>",
        "level": "<<< FILL HERE: level name >>>",   # e.g. "fpc" or "crop"
        "df_out": None,  # <<< FILL HERE: put your model output dataframe
        "cols": {
            "location": "<<< FILL HERE: location column name >>>",   # e.g. "Location"
            "fpc":      "<<< FILL HERE: fpc column name >>>",        # e.g. "Fpc_index"
            "crop":     "<<< FILL HERE: crop column name >>>",       # e.g. "Crop_type"
            "actual":   "<<< FILL HERE: actual column name >>>",     # e.g. "trolleys"
            "pred":     "<<< FILL HERE: prediction column name >>>", # e.g. "pred" or "trolleys_lag_1"
        }
    },

    # Add more dicts below (one per model)
]#

MODELS_TO_EVALUATE = [
    {
        "model_name": "baseline_fpc",
        "level": "fpc",
        "df_out": final_df,
        "cols": {
            "location": "logistieke location id", 
            "fpc":      "fpc_index",   
            "crop":     "crop_type",       
            "actual":   "trolleys",     
            "pred":     "trolleys_lag_1", 
        }
    },
{
        "model_name": "baseline_crop",
        "level": "crop",
        "df_out": final_df_crop_level,
        "cols": {
            "location": "logistieke location id", 
            "fpc":      "fpc_index",   
            "crop":     "crop_type",       
            "actual":   "trolleys_true",     
            "pred":     "trolleys_pred_lag_1", 
        }
    },

    {
        "model_name": "lasso_fpc",
        "level": "fpc",
        "df_out": test_with_preds,
        "cols": {
            "location": "logistieke location id",
            "fpc": "fpc_index",
            "crop": "crop_type",
            "actual": "y_true",
            "pred": "y_pred",
        }
    },

    {
        "model_name": "lasso_crop",
        "level": "crop",
        "df_out": agg_test,
        "cols": {
            "location": "logistieke location id",
            "fpc": "fpc_index",
            "crop": "crop_type",
            "actual": "y_true",
            "pred": "y_pred",
        }
    },
    # Add more dicts below (two per model:fpc and crop)
    # Continue with the other models like in the example.
]

# RUN EVALUATION (DO NOT EDIT)

all_results = {}

for spec in MODELS_TO_EVALUATE:
    name = spec["model_name"]
    level = spec["level"]
    df_out = spec["df_out"]
    cols = spec["cols"]

    if level == "fpc":
        # FPC + Location (NO MAPE)
        fpc_location = grouped_metrics(
            df_out,
            group_cols=[cols["fpc"], cols["location"]],
            y_col=cols["actual"],
            yhat_col=cols["pred"],
            include_mape=False
        )

        # Crop + Location (MAPE allowed)
        crop_location = grouped_metrics(
            df_out,
            group_cols=[cols["crop"], cols["location"]],
            y_col=cols["actual"],
            yhat_col=cols["pred"],
            include_mape=True
        )

        all_results[name] = {
            "fpc_location": fpc_location,
            "crop_location": crop_location
        }

    elif level == "crop":
        crop_location = grouped_metrics(
            df_out,
            group_cols=[cols["crop"], cols["location"]],
            y_col=cols["actual"],
            yhat_col=cols["pred"],
            include_mape=True
        )

        all_results[name] = {
            "crop_location": crop_location
        }

    else:
        raise ValueError(f"{name}: level must be 'fpc' or 'crop'")

In [None]:
import numpy as np
import pandas as pd

# UNIVERSAL METRICS (DO NOT EDIT)

def _mae(y_true, y_pred):
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)
    return np.mean(np.abs(y_true - y_pred))

def _rmse(y_true, y_pred):
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)
    return np.sqrt(np.mean((y_true - y_pred) ** 2))

#MAPE excluding y_true == 0 (returns NaN if no non-zero actuals).
def _mape_excluding_zeros(y_true, y_pred, eps=1e-12):
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)
    mask = np.abs(y_true) > eps
    if mask.sum() == 0:
        return np.nan
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

def grouped_metrics(df, group_cols, y_col, yhat_col, include_mape=True):
    needed = group_cols + [y_col, yhat_col]
    missing = [c for c in needed if c not in df.columns]
    if missing:
        raise ValueError(f"Missing columns in df: {missing}")

    tmp = df[needed].dropna(subset=[y_col, yhat_col]).copy()

def _calc(g):
        y = g[y_col].values
        yhat = g[yhat_col].values

        out = {
            "MAE": _mae(y, yhat),
            "RMSE": _rmse(y, yhat),
            "n_rows": len(g),
        }
        if include_mape:
            out["MAPE_%"] = _mape_excluding_zeros(y, yhat)
            out["n_nonzero_actual"] = int((np.abs(y) > 1e-12).sum())
        return pd.Series(out)

    return (
        tmp.groupby(group_cols, dropna=False)
           .apply(_calc)
           .reset_index()
           .sort_values(group_cols)
           .reset_index(drop=True)
    )

def evaluate_model(df_out, cols):
    # FPC + Location: MAE, RMSE only (NO MAPE)
    fpc_location = grouped_metrics(
        df_out,
        group_cols=[cols["fpc"], cols["location"]],
        y_col=cols["actual"],
        yhat_col=cols["pred"],
        include_mape=False
    )
    
    # Crop_type + Location: MAE, RMSE, MAPE
    crop_location = grouped_metrics(
        df_out,
        group_cols=[cols["crop"], cols["location"]],
        y_col=cols["actual"],
        yhat_col=cols["pred"],
        include_mape=True
    )

    return fpc_location, crop_location

# !ONLY EDIT THIS SECTION!
# Everyone adds their model here.

MODELS_TO_EVALUATE = [

    # ---------- Example template (copy-paste for each model) ----------
    {
        "model_name": "<<< FILL HERE: e.g., baseline(fpc/crop) / lasso(fpc/crop) / ridge(fpc/crop) / random_forest(fpc/crop) / boosted_trees (fpc/crop) >>>",
        "level": "<<< FILL HERE: level name >>>",   # e.g. "fpc" or "crop"
        "df_out": None,  # <<< FILL HERE: put your model output dataframe
        "cols": {
            "location": "<<< FILL HERE: location column name >>>",   # e.g. "Location"
            "fpc":      "<<< FILL HERE: fpc column name >>>",        # e.g. "Fpc_index"
            "crop":     "<<< FILL HERE: crop column name >>>",       # e.g. "Crop_type"
            "actual":   "<<< FILL HERE: actual column name >>>",     # e.g. "trolleys"
            "pred":     "<<< FILL HERE: prediction column name >>>", # e.g. "pred" or "trolleys_lag_1"
        }
    },

    # Add more dicts below (one per model)
]

#EXAMPLE:
MODELS_TO_EVALUATE = [
    {
        "model_name": "baseline_fpc",
        "level": "fpc",
        "df_out": final_df,
        "cols": {
            "location": "logistieke location id", 
            "fpc":      "fpc_index",   
            "crop":     "crop_type",       
            "actual":   "trolleys",     
            "pred":     "trolleys_lag_1", 
        }
    },
{
        "model_name": "baseline_crop",
        "level": "crop",
        "df_out": final_df_crop_level,
        "cols": {
            "location": "logistieke location id", 
            "fpc":      "fpc_index",   
            "crop":     "crop_type",       
            "actual":   "trolleys_true",     
            "pred":     "trolleys_pred_lag_1", 
        }
    },
    # Add more dicts below (one per model)
    # Continue with the other models like in the example.
]

# RUN EVALUATION (DO NOT EDIT)

all_results = {}

for spec in MODELS_TO_EVALUATE:
    name  = spec["model_name"]
    level = spec["level"]
    df_out = spec["df_out"]
    cols = spec["cols"]

    if level == "fpc":
        # FPC + Location (NO MAPE)
        fpc_location = grouped_metrics(
            df_out,
            group_cols=[cols["fpc"], cols["location"]],
            y_col=cols["actual"],
            yhat_col=cols["pred"],
            include_mape=False
        )

        # Crop + Location (MAPE allowed)
        # This assumes df_out has crop_type column (even though it is FPC-level rows)
        crop_location = grouped_metrics(
            df_out,
            group_cols=[cols["crop"], cols["location"]],
            y_col=cols["actual"],
            yhat_col=cols["pred"],
            include_mape=True
        )

        all_results[name] = {
            "fpc_location": fpc_location,
            "crop_location": crop_location
        }
        elif level == "crop":
        # ONLY Crop + Location (and optionally Location-only)
        crop_location = grouped_metrics(
            df_out,
            group_cols=[cols["crop"], cols["location"]],
            y_col=cols["actual"],
            yhat_col=cols["pred"],
            include_mape=True
        )

        all_results[name] = {
            "crop_location": crop_location
        }

    else:
        raise ValueError(f"{name}: level must be 'fpc' or 'crop'")

In [None]:
#all_results["fill here model name"]["fill here level name + location]
#Look at the examples below

In [None]:
#Baseline MAE, RMSE per fpc x location
all_results["baseline_fpc"]["fpc_location"]

In [None]:
#Baseline MAE, RMSE, MAPE per crop x location
all_results["baseline_crop"]["crop_location"]

In [None]:
#Continue with the other models.

In [None]:
#Lasso MAE, RMSE per fpc x location
all_results["lasso_fpc"]["fpc_location"]

In [None]:
#Lasso MAE, RMSE, MAPE per crop x location
all_results["lasso_crop"]["crop_location"]

In [None]:
#Ridge MAE, RMSE per fpc x location

In [None]:
#Ridge MAE, RMSE, MAPE per crop x location

In [None]:
#Random Forests MAE, RMSE per fpc x location

In [None]:
#Random Forests MAE, RMSE, MAPE per crop x location

In [None]:
#Boosted Trees MAE, RMSE per fpc x location

In [None]:
#Boosted Trees MAE, RMSE, MAPE per crop x location

In [None]:
# Create time-series plots at the crop-location level, meaning:
# One plot = one (crop_type, location) combination
# Each plot shows actual vs. predicted trolley volumes over time

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

def plot_crop_location_ts(
    df,
    date_col,
    location_col,
    crop_col,
    y_true_col,
    y_pred_col,
    model_name="model",
    freq="ME",                 # "ME" monthly, "W" weekly, None daily
    start=None,
    end=None,
    max_plots=6,               # top N crop-location series to plot
    sort_by="volume"           # "volume" or "none"
):
    # --- validate columns ---
    required = [date_col, location_col, crop_col, y_true_col, y_pred_col]
    missing = [c for c in required if c not in df.columns]
    if missing:
        raise ValueError(f"Missing columns: {missing}. Available: {list(df.columns)}")

    d = df.copy()
    d[date_col] = pd.to_datetime(d[date_col])

    # --- optional window filter ---
    if start is not None:
        d = d[d[date_col] >= pd.to_datetime(start)]
    if end is not None:
        d = d[d[date_col] <= pd.to_datetime(end)]

    d = d.dropna(subset=[y_true_col, y_pred_col, location_col, crop_col])
    if d.empty:
        print(f"[{model_name}] Nothing to plot after filtering.")
        return

    # --- pick which crop-location combos to plot ---
    if sort_by == "volume":
        combos = (
            d.groupby([location_col, crop_col], dropna=False)[y_true_col]
             .sum()
             .reset_index(name="total_actual")
             .sort_values("total_actual", ascending=False)
             .head(max_plots)[[location_col, crop_col]]
        )
    else:
        combos = d[[location_col, crop_col]].drop_duplicates().head(max_plots)

    # --- plot each crop-location ---
    for _, row in combos.iterrows():
        loc = row[location_col]
        crop = row[crop_col]

        sub = d[(d[location_col] == loc) & (d[crop_col] == crop)].sort_values(date_col)

        if freq is not None:
            sub = (
                sub.set_index(date_col)[[y_true_col, y_pred_col]]
                   .resample(freq).sum()
                   .reset_index()
            )

        fig, ax = plt.subplots(figsize=(12, 4))
        ax.plot(sub[date_col], sub[y_true_col], label="Actual", linewidth=2)
        ax.plot(sub[date_col], sub[y_pred_col], label="Predicted", linewidth=2)

        ax.xaxis.set_major_locator(mdates.MonthLocator())
        ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
        plt.xticks(rotation=45)

        # Pretty label
        if freq is None:
            period = "Daily"
        elif str(freq).startswith("W"):
            period = "Weekly"
        elif str(freq) in ["ME", "MS"]:
            period = "Monthly"
        else:
            period = str(freq)

        ax.set_title(f"{model_name} | Crop: {crop} | Location: {loc} | {period}")
        ax.set_xlabel("Date")
        ax.set_ylabel("Trolleys")
        ax.grid(True)
        ax.legend()
        plt.tight_layout()
        plt.show()

In [None]:
# Run for ONE crop-level model df

#plot_crop_location_ts(
    #df=final_df_crop_level,            # <- Put your crop-level df
    #date_col="date",                   # <- Put your date column
    #location_col="logistieke location id", # <- Put your location column
    #crop_col="crop_type",              # <- Put your crop type column
    #y_true_col="trolleys_true",        # <- Put your actual trolleys column
    #y_pred_col="trolleys_pred_lag_1",  # <- Put your predicted trolleys column
    #model_name="baseline_crop",        # <- Put your model name 
    #freq="ME",
    #start="2021-01-01",
    #end="2024-12-31",
    #max_plots=6
#)

In [None]:
#EXAMPLE WITH BASELINE

In [None]:
# Run for Baseline crop type*location
# Use crop type level df and crop type level columns

plot_crop_location_ts(
    df=final_df_crop_level,            # <-- your crop-level df
    date_col="date",                   # <-- your date column
    location_col="logistieke location id",
    crop_col="crop_type",
    y_true_col="trolleys_true",
    y_pred_col="trolleys_pred_lag_1",
    model_name="baseline_crop",
    freq="ME",
    start="2021-01-01",
    end="2024-12-31",
    max_plots=6
)

In [None]:
#Continue with other models.

In [None]:
# Run for Lasso crop type*location
# Use crop type level df and crop type level columns

plot_crop_location_ts(
    df=agg_test,            # <-- your crop-level df
    date_col="date",                   # <-- your date column
    location_col="logistieke location id",
    crop_col="crop_type",
    y_true_col="y_true",
    y_pred_col="y_pred",
    model_name="lasso_crop",
    freq="ME",
    start="2021-01-01",
    end="2024-12-31",
    max_plots=6
)

In [None]:
# Run for Ridge crop type*location
# Use crop type level df and crop type level columns

In [None]:
# Run for Random Forests crop type*location
# Use crop type level df and crop type level columns

In [None]:
# Run for Boosted Trees crop type*location
# Use crop type level df and crop type level columns

In [None]:
#Create time-series plots at the FPC level, meaning:
#One plot = one (FPC, location) combination
#It shows the top 6 FPCs with the highest total trolley demand (within their respective locations)

In [None]:
def plot_ts_by_location_fpc(
    df,
    date_col,
    location_col,
    fpc_col,
    y_true_col,
    y_pred_col,
    model_name="model",
    freq="ME",
    start=None,
    end=None,
    max_plots=6
):
    d = df.copy()
    d[date_col] = pd.to_datetime(d[date_col])
    if start is not None:
        d = d[d[date_col] >= pd.to_datetime(start)]
    if end is not None:
        d = d[d[date_col] <= pd.to_datetime(end)]

    d = d.dropna(subset=[y_true_col, y_pred_col, location_col, fpc_col])
    if d.empty:
        print(f"[{model_name}] Nothing to plot.")
        return

    combos = (
        d.groupby([location_col, fpc_col], dropna=False)[y_true_col]
         .sum().reset_index(name="total_actual")
         .sort_values("total_actual", ascending=False)
         .head(max_plots)[[location_col, fpc_col]]
    )

    for _, row in combos.iterrows():
        loc = row[location_col]
        fpc = row[fpc_col]
        sub = d[(d[location_col]==loc) & (d[fpc_col]==fpc)].sort_values(date_col)

        if freq is not None:
            sub = (sub.set_index(date_col)[[y_true_col, y_pred_col]]
                     .resample(freq).sum().reset_index())

        fig, ax = plt.subplots(figsize=(12,4))
        ax.plot(sub[date_col], sub[y_true_col], label="Actual", linewidth=2)
        ax.plot(sub[date_col], sub[y_pred_col], label="Predicted", linewidth=2)

        ax.xaxis.set_major_locator(mdates.MonthLocator())
        ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))
        plt.xticks(rotation=45)

        ax.set_title(f"{model_name} | Location: {loc} | FPC: {fpc} | {freq if freq else 'Daily'}")
        ax.set_xlabel("Date")
        ax.set_ylabel("Trolleys")
        ax.grid(True)
        ax.legend()
        plt.tight_layout()
        plt.show()

In [None]:
#plot_ts_by_location_fpc(
    #df=final_df, <- PUT YOUR DF HERE
    #date_col="date", <-PUT DATE COLUMN HERE
    #location_col="logistieke location id", <-PUT LOCATION COLUMN HERE
    #fpc_col="fpc_index", <-PUT FPC INDEX COLUMN HERE
    #y_true_col="trolleys", <-PUT ACTUAL TROLLEYS COLUMN
    #y_pred_col="trolleys_lag_1", <-PUT PREDICTED TROLLEYS COLUMN
    #model_name="baseline_fpc", <-PUT MODEL NAME
    #freq="ME", 
    #start="2021-01-01",
    #end="2024-12-31",
    #In the code, we sorted by total actual volume first(.sort_values("total_actual", ascending=False) .head(max_plots))
    #So, max_plots=6 means we plot the top 6 FPCs / crop–location combos with the highest total demand.”
    #max_plots=6
#)

In [None]:
#EXAMPLE WITH BASELINE

In [None]:
#Plot for Baseline
plot_ts_by_location_fpc(
    df=final_df,
    date_col="date",
    location_col="logistieke location id",
    fpc_col="fpc_index",
    y_true_col="trolleys",
    y_pred_col="trolleys_lag_1",
    model_name="baseline_fpc",
    freq="ME",
    start="2021-01-01",
    end="2024-12-31",
    #In the code, we sorted by total actual volume first(.sort_values("total_actual", ascending=False) .head(max_plots))
    #So, max_plots=6 means we plot the top 6 FPCs / crop–location combos with the highest total demand.”
    max_plots=6

In [None]:
#Continue with other models

In [None]:
#Plot for Lasso
plot_ts_by_location_fpc(
    df=test_with_preds,
    date_col="date",
    location_col="logistieke location id",
    fpc_col="fpc_index",
    y_true_col="y_true",
    y_pred_col="y_pred",
    model_name="lasso_fpc",
    freq="ME",
    start="2021-01-01",
    end="2024-12-31",
    #In the code, we sorted by total actual volume first(.sort_values("total_actual", ascending=False) .head(max_plots))
    #So, max_plots=6 means we plot the top 6 FPCs / crop–location combos with the highest total demand.”
    max_plots=6
)

In [None]:
#Plot for Ridge

In [None]:
#Plot for Random Forests

In [None]:
#Plot for Boosted Trees