In [1]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import Lasso, Ridge, ElasticNet
from sklearn.preprocessing import StandardScaler, MinMaxScaler, PolynomialFeatures
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.pipeline import make_pipeline

In [2]:
os.chdir('/home/utilisateur/projet/Assurance/Assurance_FE/')
print(os.listdir())
dataset_path = "Clean_Dataset_Brief.csv"
df_clean = pd.read_csv(dataset_path)

['NoteBook_Modelisation.ipynb', 'Clean_Dataset_Brief.csv', 'testmodel.ipynb', 'NoteBook_Netoyage.ipynb', '.git', 'Guide.txt', 'note.txt', 'Dataset_Brief.csv', 'README.md', 'requirements.txt', 'NoteBook_Analyse.ipynb', '.gitignore']


In [3]:
df_clean.describe()

Unnamed: 0,age,Jeune,Adulte,Adulte_moyen,Senior,Très_senior,sex,is_female,is_male,bmi,...,children_4,children_5,smoker,is_smoker,is_not_smoker,is_southwest,is_southeast,is_northwest,is_northeast,charges
count,1335.0,1335.0,1335.0,1335.0,1335.0,1335.0,1335.0,1335.0,1335.0,1335.0,...,1335.0,1335.0,1335.0,1335.0,1335.0,1335.0,1335.0,1335.0,1335.0,1335.0
mean,39.253933,0.0,0.408989,0.302622,0.28839,0.0,0.494382,0.494382,0.505618,30.658157,...,0.018727,0.013483,0.205243,0.205243,0.794757,0.243446,0.27191,0.242697,0.241948,13286.778216
std,14.030779,0.0,0.491831,0.459565,0.453183,0.0,0.500156,0.500156,0.500156,6.101456,...,0.135609,0.115375,0.404031,0.404031,0.404031,0.429323,0.445111,0.428874,0.428423,12115.61515
min,18.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.96,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1121.8739
25%,27.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,26.255,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,4746.69845
50%,39.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,30.4,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,9386.1613
75%,51.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,34.6875,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,16717.01075
max,64.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,53.13,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,63770.42801


In [4]:
selected_features = ['age', 'Jeune', 'Adulte', 'Adulte_moyen', 'Senior', 'Très_senior', 'sex', 'is_female', 'is_male', 'bmi', 'Insuffisance pondérale', 'Poids_normal', 'Surpoids', 'Obésité_de_classe_I_(modérée)', 'Obésité_de_classe_II_(sévère)', 'children', 'children_0', 'children_1', 'children_2', 'children_3', 'children_4', 'children_5', 'smoker','is_smoker', 'is_not_smoker', 'is_southwest', 'is_southeast', 'is_northwest', 'is_northeast']

x = df_clean[selected_features]
y = df_clean['charges']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(x, y, shuffle=True, train_size=0.85, random_state=42, stratify=x['is_smoker'])

In [6]:
param_grid = {'lasso__alpha': [100,200,205,210,215,220,225,230,235,240,245,250,500]}

model = make_pipeline(
    StandardScaler(),
    PolynomialFeatures(degree=2),
    Lasso(random_state=42, max_iter=100000)
)

grid_search = GridSearchCV(model, param_grid, cv=5)
grid_search.fit(X_train, y_train)
best_alpha = grid_search.best_params_['lasso__alpha']
y_pred = grid_search.predict(X_test)

r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

print(grid_search.best_params_)
print("Performance du modèle final:")
print(f"R2 Score: {r2:.4f}")
print(f"RMSE: {rmse:.2f}")


{'lasso__alpha': 200}
Performance du modèle final:
R2 Score: 0.8799
RMSE: 4179.55


In [7]:

param_grid = {'elasticnet__alpha': [0.001, 0.01, 0.1, 1, 10], 'elasticnet__l1_ratio': [0.0001, 0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999, 1]}

model = make_pipeline(
    StandardScaler(),
    PolynomialFeatures(degree=2),
    ElasticNet(alpha=0.01, l1_ratio=0.01, random_state=42, max_iter=10000, tol=0.001)
)

grid_search = GridSearchCV(model, param_grid, cv=5)
grid_search.fit(X_train, y_train)
best_alpha = grid_search.best_params_['elasticnet__alpha']
best_l1_ratio = grid_search.best_params_['elasticnet__l1_ratio']

r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

print(grid_search.best_params_)
print("Performance du modèle final:")
print(f"R2 Score: {r2:.4f}")
print(f"RMSE: {rmse:.2f}")


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
