In [2]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd

# Charger le dataset
df = pd.read_excel('DATA.xlsx', engine='openpyxl')


# Separating the target variables (in this case 'I_CLASS')
X = df.drop('I_CLASS', axis=1)
y = df['I_CLASS']

# Spliting data to train and validation set (to measure the performance of the gradient boosting model)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Creating and training a gradient boosting model
gb = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gb.fit(X_train, y_train)

# Evaluating the model's performance
y_pred = gb.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average='macro'))
print("Recall:", recall_score(y_test, y_pred, average='macro'))
print("F1 Score:", f1_score(y_test, y_pred, average='macro'))

# Store and print variables importance 
importances = gb.feature_importances_

features_df = pd.DataFrame({'Feature': X.columns, 'Importance': importances}).sort_values(by='Importance', ascending=False)

print(features_df)


Accuracy: 0.855194840193313
Precision: 0.8571854171334048
Recall: 0.8552666253452974
F1 Score: 0.8550100701482023
                Feature  Importance
1          DUR_P_BoxCox    0.421218
9        t24_Profession    0.136414
6             CATEGORIE    0.098107
0      MT_ACCORD_BoxCox    0.097812
2            Age_BoxCox    0.091091
3  Revenu_estime_BoxCox    0.074656
5             m2_Wilaya    0.033909
4                  CODE    0.031865
7             t18_Genre    0.012123
8         t23_EtatCivil    0.002805
