# Importation des librairies

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pickle
import os

from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV

from sklearn.linear_model import LinearRegression

from sklearn.ensemble import RandomForestRegressor

from sklearn.metrics import mean_absolute_error, r2_score


# Préparation des Données

In [None]:
# Assurez-vous d'avoir uploadé le fichier 'Algerian_forest_fires_cleaned.csv'
df = pd.read_csv('Algerian_forest_fires_cleaned.csv')

X = df[['Temperature', 'RH', 'Ws', 'Rain']]
y = df['FWI']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Taille du set d'entrainement :", X_train.shape)
print("Taille du set de test :", X_test.shape)

Taille du set d'entrainement : (194, 4)
Taille du set de test : (49, 4)


# Modèle 1 : Régression Linéaire

In [None]:
lin_reg = LinearRegression()
lin_reg.fit(X_train_scaled, y_train)

y_pred_lin = lin_reg.predict(X_test_scaled)

mae_lin = mean_absolute_error(y_test, y_pred_lin)
score_lin = r2_score(y_test, y_pred_lin)

print("--- Régression Linéaire ---")
print(f"MAE: {mae_lin:.2f}")
print(f"R2 Score: {score_lin:.2f}")

--- Régression Linéaire ---
MAE: 3.82
R2 Score: 0.29


# Modèle 2 : SVR avec GridSearchCV

In [None]:
param_grid_svr = {
    'kernel': ['linear', 'rbf'],
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 0.1, 0.01],
    'epsilon': [0.1, 0.2, 0.5]
}

svr = SVR()
grid_svr = GridSearchCV(svr, param_grid_svr, cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)

print("Recherche des meilleurs hyperparamètres pour SVR ...")
grid_svr.fit(X_train_scaled, y_train)

best_svr = grid_svr.best_estimator_
y_pred_svr = best_svr.predict(X_test_scaled)

mae_svr = mean_absolute_error(y_test, y_pred_svr)
score_svr = r2_score(y_test, y_pred_svr)

print("\n--- Meilleur SVR ---")
print(f"Meilleurs Paramètres: {grid_svr.best_params_}")
print(f"MAE: {mae_svr:.2f}")
print(f"R2 Score: {score_svr:.2f}")

Recherche des meilleurs hyperparamètres pour SVR (patience...)...

--- Meilleur SVR ---
Meilleurs Paramètres: {'C': 100, 'epsilon': 0.2, 'gamma': 0.1, 'kernel': 'rbf'}
MAE: 2.51
R2 Score: 0.70


# Modèle 3 : Random Forest Regressor

In [None]:
param_grid_rf = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2]
}

rf = RandomForestRegressor(random_state=42)
grid_rf = GridSearchCV(rf, param_grid_rf, cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)

print("Recherche des meilleurs hyperparamètres pour Random Forest...")
grid_rf.fit(X_train_scaled, y_train)

best_rf = grid_rf.best_estimator_
y_pred_rf = best_rf.predict(X_test_scaled)

mae_rf = mean_absolute_error(y_test, y_pred_rf)
score_rf = r2_score(y_test, y_pred_rf)

print("\n--- Meilleur Random Forest ---")
print(f"Meilleurs Paramètres: {grid_rf.best_params_}")
print(f"MAE: {mae_rf:.2f}")
print(f"R2 Score: {score_rf:.2f}")

Recherche des meilleurs hyperparamètres pour Random Forest...

--- Meilleur Random Forest ---
Meilleurs Paramètres: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 100}
MAE: 1.98
R2 Score: 0.70


# Sauvegarde des Artefacts (Modèle et Scaler)

In [None]:
if not os.path.exists('artifacts'):
    os.makedirs('artifacts')

pickle.dump(scaler, open('artifacts/scaler.pkl', 'wb'))

pickle.dump(best_rf, open('artifacts/model.pkl', 'wb'))

print("Fichiers sauvegardés dans le dossier 'artifacts':")
print("- scaler.pkl")
print("- model.pkl")

Fichiers sauvegardés dans le dossier 'artifacts':
- scaler.pkl
- model.pkl
