# Importation

In [16]:
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.cross_decomposition import PLSRegression
from sklearn.decomposition import PCA
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.decomposition import PCA
import pandas as pd

In [17]:
random_state = 42

In [18]:
housing = fetch_california_housing(as_frame=True)
# Prepare the data
X = housing.data
y = housing.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state)

In [27]:
# Define models to test
models = {
    "linearmodel" : LinearRegression(),
    "Lasso": Lasso(alpha=0.01, random_state=random_state),
    "Ridge": Ridge(alpha=0.01, random_state=random_state),
    "PLSRegression": PLSRegression(),
    "PCR": make_pipeline(StandardScaler(), PCA(), LinearRegression()),
    "ElasticNet": ElasticNet(random_state=random_state),
    "Decision Tree": DecisionTreeRegressor(random_state=random_state),  
    "Random Forest": RandomForestRegressor(random_state=random_state),
    "Gradient Boosting": GradientBoostingRegressor(random_state=random_state),
}


# First experiments

In [28]:
res = {
        "model": [],
        "MSE": [],
        "R^2": []
    }
for model_name, model in models.items():
    # Train and evaluate models

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    res["model"].append(model_name)
    res["MSE"].append(mse)
    res["R^2"].append(r2)

pd.DataFrame(res)

Unnamed: 0,model,MSE,R^2
0,linearmodel,0.555892,0.575788
1,Lasso,0.544449,0.58452
2,Ridge,0.555891,0.575788
3,PLSRegression,0.644844,0.507906
4,PCR,0.555892,0.575788
5,ElasticNet,0.764556,0.416552
6,Decision Tree,0.495235,0.622076
7,Random Forest,0.255368,0.805123
8,Gradient Boosting,0.293997,0.775645


## Sélection stepwise