# Initialisation

In [1]:
import sys
sys.path.append('..')                
import pandas as pd
import matplotlib.pyplot as plt
import joblib
import json

# Chargement config
from config.parameters import EDA_CONFIG
from src.model import (
    load_data, train_glm, train_lightgbm,
    evaluate_model, predict_test, load_test_data, prepare_validation_lightgbm
)

# Style graphique
plt.rcParams['figure.figsize'] = EDA_CONFIG['figsize']


# Chargement Dataset

In [2]:
# Chargement train/validation
X_train, X_val, y_train, y_val = load_data()
print(f"Train: {X_train.shape}, Validation: {X_val.shape}")

# Chargement test pour prédiction finale
X_test, test_ids = load_test_data()
print(f"Test: {X_test.shape}, Test_ids: {test_ids.shape}")


Colonnes supprimées : ['CodeProfession', 'PolicyId', 'StatutMatrimonial']
Train: (17984, 11), Validation: (4497, 11)
Colonnes supprimées : ['CodeProfession', 'PolicyId', 'StatutMatrimonial']
Test: (9636, 11), Test_ids: (9636,)


# Entrainement

In [3]:
# Entraînement GLM
glm_model = train_glm(X_train, y_train)


# Entraînement LightGBM
lgbm_model = train_lightgbm(X_train, y_train)

Variables numériques: ['AgeConducteur', 'BonusMalus', 'AgeVehicule']
Variables catégorielles: ['SexeConducteur', 'FrequencePaiement', 'ClasseVehicule', 'PuissanceVehicule', 'CarburantVehicule', 'UsageVehicule', 'Garage', 'Region']
Variables catégorielles identifiées : ['SexeConducteur', 'FrequencePaiement', 'ClasseVehicule', 'PuissanceVehicule', 'CarburantVehicule', 'UsageVehicule', 'Garage', 'Region']
Variables catégorielles pour LightGBM: ['SexeConducteur', 'FrequencePaiement', 'ClasseVehicule', 'PuissanceVehicule', 'CarburantVehicule', 'UsageVehicule', 'Garage', 'Region']
Fitting 5 folds for each of 27 candidates, totalling 135 fits
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000228 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 201
[LightGBM] [Info] Number of data points in the train set: 14387, number of used features: 11
[Lig

# Sauvegarde Model

In [4]:
joblib.dump(glm_model,  '../models/glm_model.pkl')
joblib.dump(lgbm_model, '../models/lgbm_model.pkl')

['../models/lgbm_model.pkl']

# Evaluate Model

In [5]:
glm_metrics = evaluate_model(glm_model, X_val, y_val)

X_val_lgb = prepare_validation_lightgbm(X_val, X_train)
lgbm_metrics = evaluate_model(lgbm_model, X_val_lgb, y_val)

Encodage des variables catégorielles pour validation: ['SexeConducteur', 'FrequencePaiement', 'ClasseVehicule', 'PuissanceVehicule', 'CarburantVehicule', 'UsageVehicule', 'Garage', 'Region']


In [6]:
# Tableau comparatif
results_df = pd.DataFrame({
    'Metric': ['RMSE','MAE','R2','MAPE'],
    'GLM':   [glm_metrics['RMSE'], glm_metrics['MAE'], glm_metrics['R2'], glm_metrics['MAPE']],
    'LightGBM': [lgbm_metrics['RMSE'], lgbm_metrics['MAE'], lgbm_metrics['R2'], lgbm_metrics['MAPE']]
})
display(results_df.style.format({ 'GLM': '{:.2f}', 'LightGBM': '{:.2f}' }))


Unnamed: 0,Metric,GLM,LightGBM
0,RMSE,122.82,107.91
1,MAE,84.4,73.4
2,R2,0.68,0.75
3,MAPE,21.0,17.98


# Sauvegarde des performances

In [7]:
performance_data = {
    'models_performance': {
        "RMSE": lgbm_metrics['RMSE'],
        "MAE": lgbm_metrics['MAE'],
        "R2": lgbm_metrics['R2'],
        "MAPE": lgbm_metrics['MAPE'],
    }
}

# Sauvegarder en JSON
json_path = '../models/model_performance.json'
with open(json_path, 'w', encoding='utf-8') as f:
    json.dump(performance_data, f, indent=4, ensure_ascii=False)

# Génération et sauvegarde de la soumission

In [8]:

predict_test({'glm': glm_model, 'lgbm': lgbm_model},
             output_path='../result/submission.csv')

# Aperçu des premières lignes
submission = pd.read_csv('../result/submission.csv')
submission.head()


Colonnes supprimées : ['CodeProfession', 'PolicyId', 'StatutMatrimonial']
Données de test chargées: (9636, 11)
Types de données dans X_test:
object     8
int64      2
float64    1
Name: count, dtype: int64
Encodage des variables catégorielles pour validation: ['SexeConducteur', 'FrequencePaiement', 'ClasseVehicule', 'PuissanceVehicule', 'CarburantVehicule', 'UsageVehicule', 'Garage', 'Region']

=== STATISTIQUES DES PRÉDICTIONS ===
GLM - Min: 26.12, Max: 1587.03, Moyenne: 416.93
LightGBM - Min: 106.85, Max: 1667.47, Moyenne: 418.41
Ensemble - Min: 99.85, Max: 1573.20, Moyenne: 417.67
Soumission enregistrée dans ../result/submission.csv


Unnamed: 0,PolicyId,PrimeCommercialePred
0,90161646.100a,531.06181
1,90146211.102b,317.369272
2,90168719.100a,214.891262
3,90132211.100a,1040.468636
4,1000755.10a,376.74549
