# Test avec ExtraTreesRegressor

In [2]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import ExtraTreesRegressor

In [3]:
df = pd.read_csv("../data/silver.csv")

In [4]:
# column_titles = df.columns.tolist()
# column_titles

In [5]:
X = df.drop(["prix_median"], axis=1)
y = df['prix_median']
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, shuffle=True, random_state=42)

categorial_features = ["proximité_autoroute", "riviere"]

numerical_features = ['tx_crim', 'tx_residence','tx_commerce',
       'tx_nitriq', 'nb_piece', 'tx_ancienneté_parc_immo', 'distance_centre_emploi',
        'indice_impot_foncier',  'ratio_eleve_enseignant', 'tx_person_couleur',  'tx_status_sociaux_eco_inf']

categorical_transformer = OneHotEncoder(sparse_output=True)
numerical_transformer = StandardScaler()

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorial_features),
        ('num', numerical_transformer, numerical_features)
    ],
    remainder="passthrough" 
)

# Créer un objet CatBoostRegressor
cb_reg = ExtraTreesRegressor(random_state=42, verbose=False)


# Créer un pipeline avec le préprocesseur et le modèle CatBoostRegressor
pipe = Pipeline([
     ('preprocessor', preprocessor),
     ('cb_reg', cb_reg)
])

# Entraîner le pipeline sur les données d'entraînement
pipe.fit(X_train, y_train)
y_pred_train = pipe.predict(X_train)
y_pred_test = pipe.predict(X_test)

print("######## Mean squared error : ")
print("TRAIN :",mean_squared_error(y_train, y_pred_train))
print("TEST :",mean_squared_error(y_test, y_pred_test))
print("/////////////////////////////////////////////////////")

print("######## Mean absolute error : ")
print("TRAIN :",mean_absolute_error(y_train, y_pred_train))
print("TEST :",mean_absolute_error(y_test, y_pred_test))
print("/////////////////////////////////////////////////////")

print("######## R2 score : ")    
print("TRAIN :",r2_score(y_train, y_pred_train))
print("TEST :",r2_score(y_test, y_pred_test))

######## Mean squared error : 
TRAIN : 0.0
TEST : 9667138.539215686
/////////////////////////////////////////////////////
######## Mean absolute error : 
TRAIN : 0.0
TEST : 1914.6568627450981
/////////////////////////////////////////////////////
######## R2 score : 
TRAIN : 1.0
TEST : 0.8681761921639675


### Optuna (recherche des hyperparametres)

In [6]:
import optuna
from sklearn.metrics import r2_score