# Installations des modules

In [None]:
!pip install pandas
!pip install seaborn
!pip install -U scikit-learn

# Imports des modules

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import AdaBoostRegressor

# Définition du dataframe et étude du dataset

In [None]:
df = pd.read_csv("housing2.csv")

In [None]:
df.head()
df.info()
df.drop(df[df.total_bedrooms.isnull()].index, inplace=True)
df.isnull().sum()

In [None]:
corr_matrix = df.corr()
sns.heatmap(data=corr_matrix, center=0, annot=True)

# Définition des features et du label

In [None]:
df["mean"] = df["total_rooms"]/df["population"]

X = df[["latitude", "longitude", "median_income", "mean"]]
Y = df[["median_house_value"]]
X.head()

In [None]:
scaler = StandardScaler()
scaler.fit(X)
X_norm = scaler.transform(X)
scaler.fit(Y)
Y_norm = scaler.transform(Y)

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X_norm, Y_norm, test_size=0.2, random_state=42)

# Importation des modèles

In [None]:
lin_reg = LinearRegression()
lasso = Lasso()
ridge = Ridge()
tree = DecisionTreeRegressor()
adareg = AdaBoostRegressor(random_state=42,
                           n_estimators=300)

# Entraînement

In [None]:
lin_reg.fit(X_train, Y_train)

Prediction_train = lin_reg.predict(X_train)
MSE_train = mean_squared_error(Y_train, Prediction_train)
RMSE_train = np.sqrt(MSE_train)

Prediction_test = lin_reg.predict(X_test)
MSE_test = mean_squared_error(Y_test, Prediction_test)
RMSE_test = np.sqrt(MSE_test)

MSE_train
MSE_test

In [None]:
lasso.fit(X_train, Y_train)

Prediction_train = lasso.predict(X_train)
MSE_train = mean_squared_error(Y_train, Prediction_train)
RMSE_train = np.sqrt(MSE_train)

Prediction_test = lasso.predict(X_test)
MSE_test = mean_squared_error(Y_test, Prediction_test)
RMSE_test = np.sqrt(MSE_test)

MSE_train
MSE_test

In [None]:
ridge.fit(X_train,Y_train)

Prediction_train = ridge.predict(X_train)
MSE_train = mean_squared_error(Y_train, Prediction_train)
RMSE_train = np.sqrt(MSE_train)

Prediction_test = ridge.predict(X_test)
MSE_test = mean_squared_error(Y_test, Prediction_test)
RMSE_test = np.sqrt(MSE_test)

MSE_train
MSE_test

In [None]:
tree.fit(X_train,Y_train)

Prediction_train = tree.predict(X_train)
MSE_train = mean_squared_error(Y_train, Prediction_train)
RMSE_train = np.sqrt(MSE_train)

Prediction_test = tree.predict(X_test)
MSE_test = mean_squared_error(Y_test, Prediction_test)
RMSE_test = np.sqrt(MSE_test)

MSE_train
MSE_test

In [None]:
adareg.fit(X_train,Y_train)

Prediction_train = adareg.predict(X_train)
MSE_train = mean_squared_error(Y_train, Prediction_train)
RMSE_train = np.sqrt(MSE_train)

Prediction_test = adareg.predict(X_test)
MSE_test = mean_squared_error(Y_test, Prediction_test)
RMSE_test = np.sqrt(MSE_test)

MSE_train
MSE_test