In [3]:
import numpy as np
import pandas as pd
import os

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor

from sklearn.metrics import mean_squared_error, mean_absolute_error

import matplotlib.pyplot as plt
DATASET_NAME = "Dataset1"
TARGET_COL = "Precip Type"      # colonne à prédire
WINDOW_SIZE = 10
TEST_SIZE = 0.2
RESULTS_DIR = "reports"

os.makedirs(RESULTS_DIR, exist_ok=True)


def create_windows(data, window_size):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data[i:i + window_size])
        y.append(data[i + window_size])
    return np.array(X), np.array(y)

def forecasting_pipeline(
    df,
    dataset_name,
    forecasting_type="univariate",
    model_type="KNN"
):
    # ==============================
    # 1️⃣ Préparation des données
    # ==============================
    if forecasting_type == "univariate":
        data = df[TARGET_COL].values.reshape(-1, 1)
    else:
        data = df.drop(columns=[TARGET_COL]).values

    scaler = StandardScaler()
    data_scaled = scaler.fit_transform(data)

    X, y = create_windows(data_scaled, WINDOW_SIZE)

    # Flatten pour ML classique
    X = X.reshape(X.shape[0], -1)

    # ==============================
    # 2️⃣ Train / Test
    # ==============================
    X_train, X_test, y_train, y_test = train_test_split(
        X, y,
        test_size=TEST_SIZE,
        shuffle=False
    )

    # ==============================
    # 3️⃣ Modèle
    # ==============================
    if model_type == "KNN":
        model = KNeighborsRegressor(
            n_neighbors=5,
            weights="distance"
        )
    else:
        model = RandomForestRegressor(
            n_estimators=200,
            random_state=42
        )

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # ==============================
    # 4️⃣ Évaluation
    # ==============================
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)

    # ==============================
    # 5️⃣ Courbe
    # ==============================
    plt.figure()
    plt.plot(y_test, label="True")
    plt.plot(y_pred, label="Predicted")
    plt.title(f"{dataset_name} | {forecasting_type} | {model_type}")
    plt.xlabel("Time")
    plt.ylabel("Value")
    plt.legend()

    fig_name = f"{dataset_name}_{forecasting_type}_{model_type}.png"
    fig_path = os.path.join(RESULTS_DIR, fig_name)
    plt.savefig(fig_path)
    plt.close()

    # ==============================
    # 6️⃣ Sauvegarde TXT
    # ==============================
    txt_name = f"{dataset_name}_{forecasting_type}_{model_type}.txt"
    txt_path = os.path.join(RESULTS_DIR, txt_name)

    with open(txt_path, "w") as f:
        f.write(f"Dataset : {dataset_name}\n")
        f.write(f"Forecasting type : {forecasting_type}\n")
        f.write(f"Model : {model_type}\n")
        f.write(f"Window size : {WINDOW_SIZE}\n\n")
        f.write(f"MSE : {mse:.6f}\n")
        f.write(f"MAE : {mae:.6f}\n")
        f.write(f"Curve file : {fig_name}\n")

    return mse, mae
    
df = pd.read_csv("datasets/Temperature.csv")

results = []

for forecasting_type in ["univariate", "multivariate"]:
    for model_type in ["KNN", "RF"]:
        mse, mae = forecasting_pipeline(
            df,
            DATASET_NAME,
            forecasting_type,
            model_type
        )
        results.append([forecasting_type, model_type, mse, mae])

# Résumé final
results_df = pd.DataFrame(
    results,
    columns=["Forecasting", "Model", "MSE", "MAE"]
)

print(results_df)


ValueError: could not convert string to float: 'rain'