In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt

# Si tu dois charger les données manuellement, par exemple :
# data = pd.read_csv("insurance.csv")


In [None]:
# Séparation des variables explicatives et de la cible
X = data.drop('expenses', axis=1)
y = data['expenses']

# Division du dataset en train (80%) et test (20%)
train_dataset, test_dataset, train_labels, test_labels = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
# Colonnes catégorielles à encoder
categorical_features = ['sex', 'smoker', 'region']

# Transformation avec OneHotEncoder (gestion des valeurs inconnues)
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

# Préprocesseur qui applique l'encodage aux colonnes catégorielles
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_features)
    ],
    remainder='passthrough'  # Laisse les autres colonnes telles quelles
)


In [None]:
from sklearn.ensemble import RandomForestRegressor

model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])


In [None]:
model.fit(train_dataset, train_labels)


In [None]:
# Prédictions sur les données test
predictions = model.predict(test_dataset)

# Calcul de l'erreur absolue moyenne (MAE)
mae = mean_absolute_error(test_labels, predictions)
print(f"Mean Absolute Error: ${mae:.2f}")


In [None]:
plt.figure(figsize=(10,6))
plt.scatter(test_labels, predictions, alpha=0.5)
plt.plot([0, max(test_labels)], [0, max(test_labels)], color='red', linestyle='--')
plt.xlabel("Dépenses réelles")
plt.ylabel("Dépenses prédites")
plt.title("Prédictions vs Valeurs réelles")
plt.grid(True)
plt.show()
