In [1]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from datetime import (
    datetime as DateTime, 
    timedelta as TimeDelta)

import sys
sys.path.append("../scripts/")

from make_train_test_set import make_train_test_set
from import_data import import_data
from metrics import metrics 
from group_by_clients import group_by_clients

ModuleNotFoundError: No module named 'make_train_test_set'

# Random Forest sur les données brutes

In [None]:
df_global = import_data()
clients = group_by_clients(df_global)
n = len(clients)

In [None]:
# Préparation des données de train / test
X_train, X_test, y_train, y_test, test_data = make_train_test_set(clients, ["jour", "forecasted_consumption"], ["real_consumption"])

In [None]:
# création des modèles

rfs = [RandomForestRegressor() for _ in range(n)]

for i, rf in enumerate(rfs) : 
    rf.fit(X_train[i], np.array(y_train[i]).ravel())

In [None]:
y_pred = []
for i, rf in enumerate(rfs): 
    y_pred.append(rf.predict(X_test[i]))

### Evaluation du random forest

In [None]:
evaluation_model = []
evalutation_airliquide_model = []
for i in range(n): 
    evaluation_model.append(metrics(test_data[i]["real_consumption"], y_pred[i]))
    evalutation_airliquide_model.append(metrics(test_data[i]["real_consumption"], test_data[i]["prediction"]))

In [None]:
X = list(range(1,n+1))
fig, ax = plt.subplots(3)
ax[0].set_title("R2")
ax[1].set_title("MSE")
ax[2].set_title("MAE")
for i in range(3): 
    ax[i].plot(X, [x[i] for x in evaluation_model], label = "model")
    ax[i].plot(X, [x[i] for x in evalutation_airliquide_model], label = "airliquide")
    ax[i].legend()

# Random Forest avec les données du gaz naturel

In [None]:
gas = pd.read_csv("../data/gas.csv")
gas["horodate"] = pd.to_datetime(gas["horodate"])
gas["horodate"] = gas["horodate"] + pd.Timedelta(days = 14)
gas = gas[(gas["horodate"] <= pd.to_datetime("2023-01-31")) & (gas["horodate"] >= pd.to_datetime("2021-01-01"))]

In [None]:
df_gas = df_global.copy()
df_gas = pd.merge(df_gas, gas, on = ["horodate"], how = "left").sort_values(["id_client", "horodate"]).dropna().reset_index().drop("index", axis = 1)

In [None]:
# on sépare à nouveau en client 
clients_gas = group_by_clients(df_gas)

In [None]:
X_train_gas, X_test_gas, y_train_gas, y_test_gas, test_data_gas = make_train_test_set(clients_gas, ["jour", "forecasted_consumption", "Gas Price"], ["real_consumption"])

In [None]:
# création des modèles : 
models_gas = [RandomForestRegressor() for _ in range(n)]
for i, model in enumerate(models_gas): 
    model.fit(X_train_gas[i], np.array(y_train_gas[i]).ravel())

In [None]:
y_pred_gas = []
for i, model in enumerate(models_gas) : 
    y_pred_gas.append(model.predict(X_test_gas[i]).reshape(len(X_test_gas[i])))

In [None]:
evaluation_model_gas = []
for i in range(n): 
    evaluation_model_gas.append(metrics(test_data_gas[i]["real_consumption"], y_pred_gas[i]))

In [None]:
X = list(range(1,n+1))
fig, ax = plt.subplots(3)
ax[0].set_title("R2")
ax[1].set_title("MSE")
ax[2].set_title("MAE")
for i in range(3): 
    ax[i].plot(X, [x[i] for x in evaluation_model_gas], label = "model-gas")
    ax[i].plot(X, [x[i] for x in evaluation_model], label = "model-Brut")
    ax[i].plot(X, [x[i] for x in evalutation_airliquide_model], label = "airliquide")
    ax[i].legend()

# SVM sur les données brutes

In [None]:
df_global = import_data()
clients = group_by_clients(df_global)
n = len(clients)

In [None]:
# Préparation des données de train / test
X_train, X_test, y_train, y_test, test_data = make_train_test_set(clients, ["jour", "forecasted_consumption"], ["real_consumption"])

In [None]:
# création des modèles
models = [SVR(kernel='linear', C = 0.1, epsilon = 0.5) for _ in range(n)]
for i, model in enumerate(models): 
    model.fit(X_train[i], np.array(y_train[i]).ravel())
    print("done")

In [None]:
y_pred = []
for i, model in enumerate(models): 
    y_pred.append(model.predict(X_test[i]))

In [None]:
mod = SVR(kernel = "linear")

In [None]:
mod.fit(X_train[0], np.array(y_train[0]).ravel())

In [None]:
mod.predict(X_test[0])