In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

# -----------------------------
# 1. Charger le dataset
# -----------------------------
df = pd.read_csv(r'..\Data\processed\cleaned_data.csv')

# -----------------------------
# 2. Définir features et target
# -----------------------------
X = df.drop('price_dh', axis=1)
y = df['price_dh']

# -----------------------------
# 3. Préprocessing
# -----------------------------
num_cols = ['surface', 'bedroom', 'bathroom']
cat_cols = ['proprety_type', 'address', 'city', 'principale']

preprocess = ColumnTransformer([
    ('num', StandardScaler(), num_cols),
    ('cat', OneHotEncoder(handle_unknown='ignore'), cat_cols)
])

# -----------------------------
# 4. Train/test split
# -----------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# -----------------------------
# 5. Modèle Random Forest
# -----------------------------
model = Pipeline([
    ('preprocess', preprocess),
    ('regressor', RandomForestRegressor(
        n_estimators=300,     # nombre d’arbres
        max_depth=None,       # profondeur libre
        random_state=42,
        n_jobs=-1             # utilisation multi-cœurs
    ))
])

# -----------------------------
# 6. Entraînement
# -----------------------------
model.fit(X_train, y_train)
# -----------------------------
# 7. Entraînement
# -----------------------------
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print("Performance du modèle Random Forest :")
print(f"R² Score : {r2:.4f}")
print(f"MAE : {mae:,.0f} DH")
print(f"RMSE : {rmse:,.0f} DH")

Performance du modèle Random Forest :
R² Score : 0.8467
MAE : 1,541,493 DH
RMSE : 2,112,961 DH
