In [14]:
import pandas as pd
import numpy as np

from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import SimpleImputer, KNNImputer, IterativeImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler, MinMaxScaler
from xgboost import XGBRegressor
from scipy.stats import randint

from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, f1_score, classification_report, ConfusionMatrixDisplay
from sklearn.model_selection import RandomizedSearchCV, train_test_split


In [15]:
# Importer le DataFrame propre depuis le fichier CSV
df = pd.read_csv('data.csv')

  df = pd.read_csv('data.csv')


In [16]:
# Remplacer les valeurs non numériques par 0 et le type des variables
df['NewExist'] = df['NewExist'].fillna(0)
df['UrbanRural'] = df['UrbanRural'].fillna(0)


# Convertir la colonne en type entier
df['NewExist'] = df['NewExist'].astype(int)
df['NewExist'].astype(int)

df['UrbanRural'] = df['UrbanRural'].astype(int)
df['UrbanRural'].astype(int)

df.dtypes

NAICS                 int64
ApprovalFY           object
Term                  int64
NewExist              int64
FranchiseCode         int64
UrbanRural            int64
RevLineCr            object
LowDoc               object
MIS_Status            int64
ApprovalDate_Year     int64
dtype: object

In [17]:
#Supprimer les colonnes inutiles
df = df.drop(['ApprovalFY', 'ApprovalDate_Year'], axis=1)

#traiter la colonen NAICS pour qu'elle ne contienne que les 2 premiers chiffres des valeurs NAICS
df['NAICS_digit'] = (df['NAICS'] / 10000 ).astype(int)
df = df.drop(['NAICS'], axis=1)
df.head(2)

Unnamed: 0,Term,NewExist,FranchiseCode,UrbanRural,RevLineCr,LowDoc,MIS_Status,NAICS_digit
0,84,2,1,0,N,N,0,45
1,60,2,1,0,N,N,0,72


### Modélisation

#### 1. RandomForest Simple avec SimpleImputer

In [21]:
# Visualisation
from sklearn.tree import export_graphviz
from IPython.display import Image
import graphviz

In [28]:
#separer dataset en features et target
X = df.drop('MIS_Status', axis=1)
y = df['MIS_Status']

X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = True, test_size=0.05, random_state=42, stratify= y)

cat_col = ['RevLineCr', 'LowDoc', 'FranchiseCode', 'UrbanRural', 'NewExist']
num_col = ['NAICS_digit', 'Term' ]

In [25]:
# Test RandomForest sans hyperparamètres 
cat_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore', drop= 'if_binary'))
])

num_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', cat_transformer, cat_col),
        ('num', num_transformer, num_col)
    ])

# Création du pipeline : prétraitement + modèle RandomForest
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', RandomForestClassifier())])

pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)

# performance du modèle
score_tr = pipeline.score(X_train, y_train)
score_te = pipeline.score(X_test, y_test)

# importance des caractéristiques
feature_importance = pipeline.named_steps['classifier'].feature_importances_
print("Importance des caractéristiques :", feature_importance)

print('_____________________')
print("Score du modèle (train) :", score_tr)
print("Score du modèle (test) :", score_te)
print('_____________________')

#matrice confusion
conf_matrix = confusion_matrix(y_test, y_pred)
print('_____________________')

print("Métrique pour le modèle RandomForest Simple")
print("Score d'accuracy", accuracy_score(y_test, y_pred))
print("Score du recall : ", recall_score(y_test, y_pred))
print("Score de la precision : ", precision_score(y_test, y_pred))
print("Score F1 : ", f1_score(y_test, y_pred))
print("")
print(classification_report(y_test, y_pred))



Importance des caractéristiques : [0.00517356 0.00513744 0.03632921 ... 0.00134514 0.04632898 0.81558314]
_____________________
Score du modèle (train) : 0.9467539903415197
Score du modèle (test) : 0.9265699190797834
_____________________
_____________________
Métrique pour le modèle RandomForest Simple
Score d'accuracy 0.9265699190797834
Score du recall :  0.7625031733942625
Score de la precision :  0.8084791386271871
Score F1 :  0.7848183956101386

              precision    recall  f1-score   support

           0       0.95      0.96      0.96     36981
           1       0.81      0.76      0.78      7878

    accuracy                           0.93     44859
   macro avg       0.88      0.86      0.87     44859
weighted avg       0.93      0.93      0.93     44859



#### 2.Testons KNNImputer

In [27]:
# Test KNNImputer
cat_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore', drop = 'if_binary'))
])

num_transformer = Pipeline(steps=[
    ('imputer', KNNImputer(n_neighbors = 3, weights = 'uniform')),
    ('scaler', StandardScaler())
])

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', cat_transformer, cat_col),
        ('num', num_transformer, num_col)
    ])

# Création du pipeline : prétraitement + modèle RandomForest
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', RandomForestClassifier())])

pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)

# performance du modèle
score_tr = pipeline.score(X_train, y_train)
score_te = pipeline.score(X_test, y_test)

print("Score du modèle (train) :", score_tr)
print("Score du modèle (test) :", score_te)

# matrice confusion
conf_matrix = confusion_matrix(y_test, y_pred)

print("Métrique pour le modèle AdaboostClassifierr")
print("Score d'accuracy", accuracy_score(y_test, y_pred))
print("Score du recall : ", recall_score(y_test, y_pred))
print("Score de la precision : ", precision_score(y_test, y_pred))
print("Score F1 : ", f1_score(y_test, y_pred))
print("")
print(classification_report(y_test, y_pred))

Score du modèle (train) : 0.9467575101958446
Score du modèle (test) : 0.927104928776834
Métrique pour le modèle AdaboostClassifierr
Score d'accuracy 0.927104928776834
Score du recall :  0.764026402640264
Score de la precision :  0.8100942126514132
Score F1 :  0.7863862032923961

              precision    recall  f1-score   support

           0       0.95      0.96      0.96     36981
           1       0.81      0.76      0.79      7878

    accuracy                           0.93     44859
   macro avg       0.88      0.86      0.87     44859
weighted avg       0.93      0.93      0.93     44859



In [16]:
# Tests KNNImputer pour les variables numeriques
cat_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore', drop = 'if_binary'))
])

num_transformer = Pipeline(steps=[
    ('imputer', KNNImputer(n_neighbors = 3, weights = 'uniform')),
    ('scaler', StandardScaler())
])

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', cat_transformer, cat_col),
        ('num', num_transformer, num_col)
    ])

# Création du pipeline : prétraitement + modèle RandomForest
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', RandomForestClassifier(n_estimators=80, max_depth=40, min_samples_leaf=5, max_features='sqrt'))])

pipeline.fit(X_train, y_train)

# performance du modèle
score_tr = pipeline.score(X_train, y_train)
score_te = pipeline.score(X_test, y_test)

print("Score du modèle (train) :", score_tr)
print("Score du modèle (test) :", score_te)


Score du modèle (train) : 0.8975565050467521
Score du modèle (test) : 0.8926178985030707


In [33]:
#on joue avec les hyperparametre

# Tests KNNImputer pour les variables numeriques
cat_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore', drop= 'if_binary'))
])

num_transformer = Pipeline(steps=[
    ('imputer', IterativeImputer (max_iter=10, random_state=0)),
    ('scaler', StandardScaler())
])

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', cat_transformer, cat_col),
        ('num', num_transformer, num_col)
    ])

# Création du pipeline : prétraitement + modèle RandomForest
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', RandomForestClassifier(n_estimators= 80, max_depth=40, min_samples_leaf=5, max_features='sqrt'))])

pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)


# performance du modèle
score_tr = pipeline.score(X_train, y_train)
score_te = pipeline.score(X_test, y_test)

print("Score du modèle (train) :", score_tr)
print("Score du modèle (test) :", score_te)

# matrice confusion
conf_matrix = confusion_matrix(y_test, y_pred)

print("Métrique pour le modèle AdaboostClassifierr")
print("Score d'accuracy", accuracy_score(y_test, y_pred))
print("Score du recall : ", recall_score(y_test, y_pred))
print("Score de la precision : ", precision_score(y_test, y_pred))
print("Score F1 : ", f1_score(y_test, y_pred))
print("")
print(classification_report(y_test, y_pred))



Score du modèle (train) : 0.8981764808027145
Score du modèle (test) : 0.8956508170043915
Métrique pour le modèle AdaboostClassifierr
Score d'accuracy 0.8956508170043915
Score du recall :  0.46585427773546584
Score de la precision :  0.8858315230509293
Score F1 :  0.6105981199567423

              precision    recall  f1-score   support

           0       0.90      0.99      0.94     36981
           1       0.89      0.47      0.61      7878

    accuracy                           0.90     44859
   macro avg       0.89      0.73      0.78     44859
weighted avg       0.89      0.90      0.88     44859



### Modélisation 

#### 1. Randomized search + RandomForest

In [8]:
#separer dataset en features et target
X = df.drop('MIS_Status', axis=1)
y = df['MIS_Status']

In [55]:
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = True, test_size=0.05, random_state=42, stratify= y)

cat_col = ['RevLineCr', 'LowDoc', 'FranchiseCode' ]
num_col = ['NAICS_digit', 'Term', 'NewExist', 'UrbanRural']

In [56]:
cat_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

num_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

In [38]:
# random search cv
hyper_grid = {'classifier__max_depth':list(np.arange(10, 100, step=10)) + [30],
              'classifier__n_estimators':[100],
              'classifier__max_features':randint(1,7),
              'classifier__min_samples_leaf':randint(1,4),
              'classifier__min_samples_split':np.arange(2, 10, step=2)
          }


preprocessor = ColumnTransformer(
    transformers=[
        ('cat', cat_transformer, cat_col),
        ('num', num_transformer, num_col)
    ])

# Création du pipeline : prétraitement + modèle RandomForest
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', RandomForestClassifier())])

In [None]:
random_cv = RandomizedSearchCV(estimator= pipeline,
                               param_distributions=hyper_grid,
                               cv = 3,
                               n_iter= 9,
                               scoring = 'accuracy',
                               n_jobs= None,
                               return_train_score = True,
                               random_state = 42)

random_cv.fit(X_train, y_train)

# Afficher les meilleurs paramètres
print("Meilleurs paramètres:", random_cv.best_params_)

# Performance du meilleur modèle trouvé
score_tr = random_cv.best_estimator_.score(X_train, y_train)
score_te = random_cv.best_estimator_.score(X_test, y_test)

print("Score du modèle (train) :", score_tr)
print("Score du modèle (test) :", score_te)


# #modele sans randomizedsearchcv
# #pipeline.fit(X_train, y_train)

# # performance du modèle
# score_tr = pipeline.score(X_train, y_train)
# score_te = pipeline.score(X_test, y_test)

# print("Score du modèle (train) :", score_tr)
# print("Score du modèle (test) :", score_te)




RandomizedSearchCV donne les résultats suivants:
Meilleurs paramètres: {'classifier__max_depth': 70, 'classifier__max_features': 4, 'classifier__min_samples_leaf': 1, 'classifier__min_samples_split': 6, 'classifier__n_estimators': 100}
Score du modèle (train) : 0.824524119140504
Score du modèle (test) : 0.8231104472953844

On peut restreindre la recherche d'hyperparamètres à des valeurs proches de ces résultats dans la gridsearchcv.


In [65]:
# on garde la même configuration des X et y.

# GridSearchCv
params = {'classifier__max_depth': [60, 70, 80],
              'classifier__n_estimators':[90, 100, 110],
              'classifier__max_features': [3, 4, 5],
              'classifier__min_samples_leaf':[1,2],
              'classifier__min_samples_split': [5,6,7]
          }


preprocessor = ColumnTransformer(
    transformers=[
        ('cat', cat_transformer, cat_col),
        ('num', num_transformer, num_col)
    ])

# Création du pipeline : prétraitement + modèle RandomForest
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', RandomForestClassifier())])

In [66]:
grid = GridSearchCV(pipeline, param_grid = params, scoring = 'accuracy', cv = 4)

grid.fit(X_train, y_train)
print("Meilleurs paramètres de GridSearch:", grid.best_params_)


# Performance du meilleur modèle trouvé
score_tr = grid.best_estimator_.score(X_train, y_train)
score_te = grid.best_estimator_.score(X_test, y_test)

print("Score du modèle (train) :", score_tr)
print("Score du modèle (test) :", score_te)


### Hyperparamétres du modéle

* Random Forest
     - max_depth
     - min_sample_split
     - max_leaf_nodes
     - min_samples_leaf
     - n_estimators
     - max_sample (bootstrap sample)
     - max_features


In [None]:

# random search cv
hyper_grid = {
    'preprocessor__num__imputer__strategy': ['mean', 'median'],  # Ajoutez les stratégies d'imputation numérique ici si nécessaire
    'classifier_max_depth': list(np.arange(10, 100, step=10)) + [None],
    'classifier__n_estimators': np.arange(10, 500, step=50),
    'classifier__max_features': randint(1, 7),
    'classifier__criterion': ['gini', 'entropy'],
    'classifier__min_samples_leaf': randint(1, 4),
    'classifier__min_samples_split': np.arange(2, 10, step=2)
}

# Création du preprocessor pour gérer les deux types de colonnes
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', cat_transformer, cat_col),
        ('num', num_transformer, num_col)
    ])

# Création du pipeline incluant le prétraitement et le modèle RandomForest
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', RandomForestClassifier())])

random_cv = RandomizedSearchCV(
    estimator=pipeline,
    param_distributions=hyper_grid,
    cv=3,
    n_iter=5,
    scoring='accuracy',  # ou une autre métrique que vous souhaitez évaluer
    n_jobs=-1,
    return_train_score=True,
    random_state=42
)

# Fit du RandomizedSearchCV
random_cv.fit(X_train, y_train)

# Afficher les meilleurs paramètres
print("Meilleurs paramètres:", random_cv.best_params_)

# Performance du meilleur modèle trouvé
score_tr = random_cv.best_estimator_.score(X_train, y_train)
score_te = random_cv.best_estimator_.score(X_test, y_test)

print("Score du modèle (train) :", score_tr)
print("Score du modèle (test) :", score_te)


#### Boosting

##### Adaboost

In [38]:
#adaboost

from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, f1_score, classification_report

#separer dataset en features et target
X = df.drop('MIS_Status', axis=1)
y = df['MIS_Status']

X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = True, test_size=0.05, random_state=42, stratify=y)

cat_col = ['RevLineCr', 'LowDoc', 'FranchiseCode' ]
num_col = ['NAICS_digit', 'Term', 'NewExist', 'UrbanRural']

cat_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

num_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler()),
])

In [39]:
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', cat_transformer, cat_col),
        ('num', num_transformer, num_col)
    ])

# Création du pipeline : prétraitement + modèle RandomForest
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', AdaBoostClassifier())])

pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)

# performance du modèle
score_tr = pipeline.score(X_train, y_train)
score_te = pipeline.score(X_test, y_test)

print("Score du modèle (train) :", score_tr)
print("Score du modèle (test) :", score_te)

#matrice confusion
conf_matrix = confusion_matrix(y_test, y_pred)

print("Métrique pour le modèle AdaboostClassifierr")
print("Score d'accuracy", accuracy_score(y_test, y_pred))
print("Score du recall : ", recall_score(y_test, y_pred))
print("Score de la precision : ", precision_score(y_test, y_pred))
print("Score F1 : ", f1_score(y_test, y_pred))
print("")
print(classification_report(y_test, y_pred))

Score du modèle (train) : 0.8935711189547341
Score du modèle (test) : 0.894044606930682
Métrique pour le modèle XGBClassifier
Score d'accuracy 0.894044606930682
Score du recall :  0.5917454316320101
Score de la precision :  0.7562409405701401
Score F1 :  0.663956447963801

              precision    recall  f1-score   support

           0       0.92      0.96      0.94     73847
           1       0.76      0.59      0.66     15870

    accuracy                           0.89     89717
   macro avg       0.84      0.78      0.80     89717
weighted avg       0.89      0.89      0.89     89717



#### XGBoost

In [30]:
from xgboost import XGBClassifier

cat_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore', drop = 'if_binary'))
])

num_transformer = Pipeline(steps=[
    ('imputer', KNNImputer(n_neighbors = 3, weights = 'distance')),
    ('scaler', StandardScaler())
])

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', cat_transformer, cat_col),
        ('num', num_transformer, num_col)
    ])

# Création du pipeline : prétraitement + modèle XGBoost
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', XGBClassifier(random_state=42))])

pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)

# performance du modèle
score_tr = pipeline.score(X_train, y_train)
score_te = pipeline.score(X_test, y_test)

print("Score du modèle (train) :", score_tr)
print("Score du modèle (test) :", score_te)

#matrice confusion
conf_matrix = confusion_matrix(y_test, y_pred)

print("Métrique pour le modèle XGBClassifier")
print("Score d'accuracy", accuracy_score(y_test, y_pred))
print("Score du recall : ", recall_score(y_test, y_pred))
print("Score de la precision : ", precision_score(y_test, y_pred))
print("Score F1 : ", f1_score(y_test, y_pred))
print("")
print(classification_report(y_test, y_pred))




Score du modèle (train) : 0.930283418670246
Score du modèle (test) : 0.9302035266055864
Métrique pour le modèle XGBClassifier
Score d'accuracy 0.9302035266055864
Score du recall :  0.765041888804265
Score de la precision :  0.8248255097851376
Score F1 :  0.793809680605861

              precision    recall  f1-score   support

           0       0.95      0.97      0.96     36981
           1       0.82      0.77      0.79      7878

    accuracy                           0.93     44859
   macro avg       0.89      0.87      0.88     44859
weighted avg       0.93      0.93      0.93     44859





Rappel de l'encodage pour la variable MIS_Status: {'P I F': 0, 'CHGOFF': 1}



In [32]:
from catboost import CatBoostClassifier
cat_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore', drop = 'if_binary'))
])

num_transformer = Pipeline(steps=[
    ('imputer', KNNImputer(n_neighbors = 3, weights = 'distance')),
    ('scaler', StandardScaler())
])

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', cat_transformer, cat_col),
        ('num', num_transformer, num_col)
    ])

# Création du pipeline : prétraitement + modèle CatBoost
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', CatBoostClassifier(random_state=42))])

pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)

# performance du modèle
score_tr = pipeline.score(X_train, y_train)
score_te = pipeline.score(X_test, y_test)

print("Score du modèle (train) :", score_tr)
print("Score du modèle (test) :", score_te)

#matrice confusion
conf_matrix = confusion_matrix(y_test, y_pred)

print("Métrique pour le modèle XGBClassifier")
print("Score d'accuracy", accuracy_score(y_test, y_pred))
print("Score du recall : ", recall_score(y_test, y_pred))
print("Score de la precision : ", precision_score(y_test, y_pred))
print("Score F1 : ", f1_score(y_test, y_pred))
print("")
print(classification_report(y_test, y_pred))


Learning rate set to 0.18378
0:	learn: 0.5136799	total: 122ms	remaining: 2m 2s
1:	learn: 0.4043480	total: 202ms	remaining: 1m 40s
2:	learn: 0.3537985	total: 287ms	remaining: 1m 35s
3:	learn: 0.3219808	total: 399ms	remaining: 1m 39s
4:	learn: 0.3028790	total: 518ms	remaining: 1m 43s
5:	learn: 0.2878197	total: 599ms	remaining: 1m 39s
6:	learn: 0.2784570	total: 684ms	remaining: 1m 36s
7:	learn: 0.2685006	total: 768ms	remaining: 1m 35s
8:	learn: 0.2550431	total: 850ms	remaining: 1m 33s
9:	learn: 0.2496964	total: 928ms	remaining: 1m 31s
10:	learn: 0.2451365	total: 1s	remaining: 1m 30s
11:	learn: 0.2420253	total: 1.07s	remaining: 1m 28s
12:	learn: 0.2374728	total: 1.15s	remaining: 1m 27s
13:	learn: 0.2339467	total: 1.23s	remaining: 1m 26s
14:	learn: 0.2318160	total: 1.31s	remaining: 1m 25s
15:	learn: 0.2269757	total: 1.39s	remaining: 1m 25s
16:	learn: 0.2250210	total: 1.48s	remaining: 1m 25s
17:	learn: 0.2236544	total: 1.55s	remaining: 1m 24s
18:	learn: 0.2207040	total: 1.63s	remaining: 1m 2



Score du modèle (train) : 0.9345072438602008
Score du modèle (test) : 0.9337033817071267
Métrique pour le modèle XGBClassifier
Score d'accuracy 0.9337033817071267
Score du recall :  0.789032749428789
Score de la precision :  0.8257173219978746
Score F1 :  0.8069583279241853

              precision    recall  f1-score   support

           0       0.96      0.96      0.96     36981
           1       0.83      0.79      0.81      7878

    accuracy                           0.93     44859
   macro avg       0.89      0.88      0.88     44859
weighted avg       0.93      0.93      0.93     44859



#### Stacking/Voting