In [None]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import Lasso, Ridge
from sklearn.model_selection import cross_val_score

In [None]:
warnings.simplefilter("ignore")


In [None]:
data = pd.read_csv('houseData.csv')

In [None]:
print(data.head())

In [None]:
print("Taille du dataset :", data.shape)

In [None]:
data = data.drop(columns=["id", "date", "view", "condition"])
print("Taille après suppression des colonnes inutiles :", data.shape)

In [None]:
sns.pairplot(data[['price', 'bedrooms', 'bathrooms', 'sqft_living', 'floors']])
plt.show()

In [None]:
Y = data['price'].values
print("Taille du vecteur cible :", len(Y))

In [None]:
X = data.drop(columns=["price"])

In [None]:
print("Colonnes utilisées pour la prédiction :", X.columns.tolist())

In [None]:
lasso_model = Lasso(alpha=0.2, normalize=True)

In [None]:
lasso_model.fit(X, Y)

In [None]:
print("Coefficients du modèle Lasso :", lasso_model.coef_)

In [None]:
plt.figure(figsize=(10, 6))
plt.bar(X.columns, lasso_model.coef_)
plt.xlabel("Variables")
plt.ylabel("Coefficient")
plt.title("Coefficients du modèle Lasso avec alpha=0.2")
plt.xticks(rotation=45)
plt.show()

In [None]:
lasso_model_2 = Lasso(alpha=1.0, normalize=True)
lasso_model_2.fit(X, Y)
print("Coefficients avec alpha=1.0 :", lasso_model_2.coef_)

In [None]:
plt.figure(figsize=(10, 6))
plt.bar(X.columns, lasso_model_2.coef_)
plt.xlabel("Variables")
plt.ylabel("Coefficient")
plt.title("Coefficients du modèle Lasso avec alpha=1.0")
plt.xticks(rotation=45)
plt.show()

In [None]:
lasso_model_3 = Lasso(alpha=1000, normalize=True)
lasso_model_3.fit(X, Y)
print("Coefficients avec alpha=1000 :", lasso_model_3.coef_)

In [None]:
plt.figure(figsize=(10, 6))
plt.bar(X.columns, lasso_model_3.coef_)
plt.xlabel("Variables")
plt.ylabel("Coefficient")
plt.title("Coefficients du modèle Lasso avec alpha=1000")
plt.xticks(rotation=45)
plt.show()

In [None]:
scores = cross_val_score(lasso_model, X, Y, cv=5)
print("Scores de validation croisée Lasso (5-plis) :", scores)
print("Score moyen Lasso (5-plis) :", np.mean(scores))

In [None]:
alpha_space = np.logspace(-4, 0, 50)

In [None]:
cv_scores = []
cv_scores_std = []

In [None]:
ridge_model = Ridge(normalize=True)

In [None]:
for alpha in alpha_space:
    ridge_model.alpha = alpha
    ridge_cv_scores = cross_val_score(ridge_model, X, Y, cv=10)
    cv_scores.append(np.mean(ridge_cv_scores))
    cv_scores_std.append(np.std(ridge_cv_scores))

In [None]:
print("Scores moyens Ridge :", np.mean(cv_scores))
print("Variance moyenne Ridge :", np.mean(cv_scores_std))

In [None]:
def display_plot(cv_scores, cv_scores_std):
    fig = plt.figure(figsize=(10, 6))
    ax = fig.add_subplot(1,1,1)
    ax.plot(alpha_space, cv_scores, label='Score moyen Ridge')

     
std_error = np.array(cv_scores_std) / np.sqrt(10)

In [None]:
display_plot(cv_scores, cv_scores_std)