In [1]:
import numpy as np
import pandas as pd
import os

from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

import matplotlib.pyplot as plt


DATASET_PATH = "datasets/AEP_hourly.csv"
DATASET_NAME = "AEP_hourly"

WINDOW_SIZE = 12
TEST_SIZE = 0.2

RESULTS_DIR = "forecasting_univariate_simple"
os.makedirs(RESULTS_DIR, exist_ok=True)


def forecasting_univariate_date_target(
    df,
    model_type="KNN"
):
    # =========================
    # 1️⃣ Drop date → garder cible
    # =========================
    target_col = df.columns[1]
    series = df[target_col].values.reshape(-1, 1)

    # =========================
    # 2️⃣ Normalisation
    # =========================
    scaler = StandardScaler()
    series_scaled = scaler.fit_transform(series)

    # =========================
    # 3️⃣ Windows
    # =========================
    X, y = create_windows_univariate(series_scaled, WINDOW_SIZE)

    # =========================
    # 4️⃣ Train / Test (TIME SERIES)
    # =========================
    split_index = int(len(X) * (1 - TEST_SIZE))
    X_train, X_test = X[:split_index], X[split_index:]
    y_train, y_test = y[:split_index], y[split_index:]

    # =========================
    # 5️⃣ Modèle
    # =========================
    if model_type == "KNN":
        model = KNeighborsRegressor(
            n_neighbors=5,
            weights="distance"
        )
    else:
        model = RandomForestRegressor(
            n_estimators=300,
            random_state=42
        )

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # =========================
    # 6️⃣ Inverse scaling
    # =========================
    y_test_inv = scaler.inverse_transform(y_test.reshape(-1, 1)).ravel()
    y_pred_inv = scaler.inverse_transform(y_pred.reshape(-1, 1)).ravel()

    # =========================
    # 7️⃣ Metrics
    # =========================
    mse = mean_squared_error(y_test_inv, y_pred_inv)
    mae = mean_absolute_error(y_test_inv, y_pred_inv)

    # =========================
    # 8️⃣ Courbe
    # =========================
    plt.figure()
    plt.plot(y_test_inv, label="True")
    plt.plot(y_pred_inv, label="Predicted")
    plt.title(f"{DATASET_NAME} | Univariate | {model_type}")
    plt.xlabel("Time")
    plt.ylabel(target_col)
    plt.legend()

    fig_name = f"{DATASET_NAME}_univariate_{model_type}.png"
    fig_path = os.path.join(RESULTS_DIR, fig_name)
    plt.savefig(fig_path)
    plt.close()

    # =========================
    # 9️⃣ Sauvegarde TXT
    # =========================
    txt_name = f"{DATASET_NAME}_univariate_{model_type}.txt"
    txt_path = os.path.join(RESULTS_DIR, txt_name)

    with open(txt_path, "w") as f:
        f.write(f"Dataset : {DATASET_NAME}\n")
        f.write("Type : Univariate (Date + Target)\n")
        f.write(f"Model : {model_type}\n")
        f.write(f"Window size : {WINDOW_SIZE}\n\n")
        f.write(f"MSE : {mse:.6f}\n")
        f.write(f"MAE : {mae:.6f}\n")
        f.write(f"Curve : {fig_name}\n")

    return mse, mae

df = pd.read_csv(DATASET_PATH)

# Sécurité : forcer exactement 2 colonnes
assert df.shape[1] == 2, "Le dataset doit contenir exactement 2 colonnes (date + cible)"

results = []

for model in ["KNN", "RF"]:
    mse, mae = forecasting_univariate_date_target(df, model)
    results.append([model, mse, mae])

summary = pd.DataFrame(
    results,
    columns=["Model", "MSE", "MAE"]
)

print(summary)


NameError: name 'create_windows_univariate' is not defined