In [1]:
# Import the library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, classification_report
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings("ignore")
np.random.seed(42)

In [2]:
df = pd.read_csv("adult.csv")

In [3]:
df.sample(100)

Unnamed: 0,age,workclass,fnlwgt,education,education_num,marital_status,occupation,relationship,race,sex,capital_gain,capital_loss,hours_per_week,native_country,income
1501,49,Private,50282,Some-college,10,Divorced,Machine-op-inspct,Not-in-family,White,Male,3325,0,45,United-States,<=50K
2586,26,?,152046,11th,7,Never-married,?,Not-in-family,White,Female,0,0,35,Germany,<=50K
2653,19,Private,115248,Some-college,10,Never-married,Adm-clerical,Not-in-family,Asian-Pac-Islander,Male,0,0,40,Vietnam,<=50K
1055,32,Private,#NAME?,Bachelors,13,Married-civ-spouse,Sales,Husband,White,Male,0,0,40,United-States,<=50K
705,44,Private,63042,Bachelors,13,Divorced,Exec-managerial,Own-child,White,Female,0,0,50,United-States,>50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4740,33,Private,135312,Bachelors,13,Never-married,Prof-specialty,Not-in-family,White,Male,0,0,40,United-States,>50K
2940,25,Private,76978,HS-grad,9,Never-married,Sales,Unmarried,Black,Female,0,0,35,United-States,<=50K
3456,22,?,113175,Some-college,10,Never-married,?,Own-child,White,Female,0,0,20,United-States,<=50K
373,45,Private,256649,HS-grad,9,Married-civ-spouse,Machine-op-inspct,Husband,Black,Male,0,0,40,United-States,<=50K


### Replacing the ? and #NAME? to NAN 

In [4]:
df.replace(to_replace = "?", value = np.nan, inplace=True)

In [5]:
df.replace(to_replace = "#NAME?", value = np.nan, inplace=True)

In [6]:
df.isna().sum()

age                48
workclass         331
fnlwgt            107
education          57
education_num      57
marital_status      0
occupation        331
relationship        0
race              264
sex                47
capital_gain        0
capital_loss        0
hours_per_week      0
native_country     97
income              0
dtype: int64

### Datetype conversion for Integers Columns

In [7]:
df['capital_gain']= df['capital_gain'].astype(np.int)
df['capital_loss']= df['capital_loss'].astype(np.int)
df['hours_per_week']= df['hours_per_week'].astype(np.int)
df['fnlwgt'] = df['fnlwgt'].astype(np.float)
df['age'] = df['age'].astype(np.float)
df['education_num'] = df['education_num'].astype(np.float)

### Dropping columns eduction and race

In [8]:
df = df.drop(['education', 'race'], axis=1)

### Dropping the instances of sex column which has NAN values

In [9]:
df = df.dropna(subset=['sex'], axis=0)

In [10]:
df.sample(50)

Unnamed: 0,age,workclass,fnlwgt,education_num,marital_status,occupation,relationship,sex,capital_gain,capital_loss,hours_per_week,native_country,income
3915,51.0,Private,186338.0,9.0,Married-civ-spouse,Craft-repair,Husband,Male,0,0,40,United-States,>50K
346,67.0,,36135.0,7.0,Married-civ-spouse,,Husband,Male,0,0,8,United-States,<=50K
572,17.0,Private,242718.0,7.0,Never-married,Sales,Own-child,Male,0,0,12,United-States,<=50K
1375,41.0,Private,102332.0,9.0,Divorced,Sales,Unmarried,Female,0,0,40,United-States,<=50K
4261,26.0,Private,333541.0,9.0,Never-married,Other-service,Not-in-family,Male,0,0,24,United-States,<=50K
2973,36.0,Private,187046.0,9.0,Married-civ-spouse,Sales,Husband,Male,0,0,40,United-States,>50K
690,54.0,Local-gov,113000.0,10.0,Married-civ-spouse,Farming-fishing,Husband,Male,0,0,40,United-States,<=50K
2934,23.0,Private,240063.0,13.0,Never-married,Machine-op-inspct,Own-child,Male,0,0,25,United-States,<=50K
4207,64.0,Private,321166.0,13.0,Divorced,Sales,Not-in-family,Female,0,0,5,United-States,<=50K
566,43.0,Self-emp-inc,188436.0,14.0,Married-civ-spouse,Exec-managerial,Husband,Male,5013,0,45,United-States,<=50K


In [11]:
df.isnull().sum()

age                48
workclass         329
fnlwgt            107
education_num      57
marital_status      0
occupation        329
relationship        0
sex                 0
capital_gain        0
capital_loss        0
hours_per_week      0
native_country     95
income              0
dtype: int64

In [12]:
df.dtypes

age               float64
workclass          object
fnlwgt            float64
education_num     float64
marital_status     object
occupation         object
relationship       object
sex                object
capital_gain        int32
capital_loss        int32
hours_per_week      int32
native_country     object
income             object
dtype: object

### Filling the values to zero, mean and others for null instances

In [13]:
df['fnlwgt'] = df['fnlwgt'].transform(lambda x:x.fillna(x.mean()))

In [14]:
df['age'] = df['age'].transform(lambda x:x.fillna(x.mean()))

In [15]:
df['education_num'] = df['education_num'].fillna(0)

In [16]:
df['workclass'] = df['workclass'].fillna('Others')

In [17]:
df['native_country'] = df['native_country'].fillna('Others')

In [18]:
df['occupation'] = df['occupation'].fillna('Others')

In [19]:
df.isnull().sum()

age               0
workclass         0
fnlwgt            0
education_num     0
marital_status    0
occupation        0
relationship      0
sex               0
capital_gain      0
capital_loss      0
hours_per_week    0
native_country    0
income            0
dtype: int64

### Changing target variable to binary value

In [20]:
df['income'] = np.where(df['income'] == '<=50K',0,1)

In [21]:
df.dtypes

age               float64
workclass          object
fnlwgt            float64
education_num     float64
marital_status     object
occupation         object
relationship       object
sex                object
capital_gain        int32
capital_loss        int32
hours_per_week      int32
native_country     object
income              int32
dtype: object

### Convert to categorical types and get dummies of it

In [22]:
df['workclass']=df['workclass'].astype('category')
df['marital_status']=df['marital_status'].astype('category')
df['occupation']=df['occupation'].astype('category')
df['relationship']=df['relationship'].astype('category')
df['native_country']=df['native_country'].astype('category')
df['sex']=df['sex'].astype('category')

In [23]:
encodedDF = pd.get_dummies(df[["workclass","marital_status","occupation","relationship","sex","native_country"]])

In [24]:
X = pd.concat([encodedDF,df.drop(["workclass","marital_status","occupation","relationship","sex","native_country", "income"],axis=1)],axis=1)

In [25]:
X.head()

Unnamed: 0,workclass_Federal-gov,workclass_Local-gov,workclass_Others,workclass_Private,workclass_Self-emp-inc,workclass_Self-emp-not-inc,workclass_State-gov,workclass_Without-pay,marital_status_Divorced,marital_status_Married-AF-spouse,...,native_country_Trinadad&Tobago,native_country_United-States,native_country_Vietnam,native_country_Yugoslavia,age,fnlwgt,education_num,capital_gain,capital_loss,hours_per_week
0,0,0,0,0,0,0,1,0,0,0,...,0,1,0,0,39.0,77516.0,13.0,2174,0,40
1,0,0,0,0,0,1,0,0,0,0,...,0,1,0,0,50.0,83311.0,13.0,0,0,13
2,0,0,0,1,0,0,0,0,1,0,...,0,1,0,0,38.0,215646.0,9.0,0,0,40
4,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,28.0,338409.0,13.0,0,0,40
5,0,0,0,1,0,0,0,0,0,0,...,0,1,0,0,37.0,284582.0,14.0,0,0,40


In [26]:
y= df['income']

### Train and Test Split

In [27]:
train, test, y_train, y_test = train_test_split(
     X, y, test_size=0.2, random_state=101)

In [28]:
train.sample(100)

Unnamed: 0,workclass_Federal-gov,workclass_Local-gov,workclass_Others,workclass_Private,workclass_Self-emp-inc,workclass_Self-emp-not-inc,workclass_State-gov,workclass_Without-pay,marital_status_Divorced,marital_status_Married-AF-spouse,...,native_country_Trinadad&Tobago,native_country_United-States,native_country_Vietnam,native_country_Yugoslavia,age,fnlwgt,education_num,capital_gain,capital_loss,hours_per_week
3912,0,0,0,1,0,0,0,0,0,0,...,0,1,0,0,49.0,82649.0,13.0,5013,0,45
1467,0,0,0,1,0,0,0,0,1,0,...,0,1,0,0,49.0,101320.0,14.0,0,0,75
3839,0,1,0,0,0,0,0,0,0,0,...,0,1,0,0,62.0,151369.0,9.0,0,0,40
2325,0,0,0,1,0,0,0,0,0,0,...,0,1,0,0,39.0,77146.0,13.0,0,1887,50
3884,0,1,0,0,0,0,0,0,0,0,...,0,1,0,0,51.0,116286.0,14.0,0,0,60
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3935,0,0,0,1,0,0,0,0,0,0,...,0,1,0,0,17.0,200199.0,7.0,0,0,35
3164,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,41.0,173938.0,15.0,0,0,50
4182,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,67.0,288371.0,9.0,0,0,40
3616,0,0,0,1,0,0,0,0,0,0,...,0,1,0,0,26.0,189590.0,13.0,0,0,40


### Defined function to fit model and display the error metric

In [29]:
def fit_predict(train, test, y_train, y_test, scaler, model):
    train_scaled = scaler.fit_transform(train)
    test_scaled = scaler.transform(test)        
    model.fit(train_scaled, y_train)
    y_pred = model.predict(test_scaled)
    error_metric(y_test, y_pred)

In [30]:
def error_metric(y_test, predictions):
    print('Accuracy:',accuracy_score(y_test,predictions))
    print('Confusion Matrix:',confusion_matrix(y_test,predictions))
    print('Classification Report:',classification_report(y_test,predictions))
    print('ROC_AUC_Score:',roc_auc_score(y_test,predictions))

### Logistic Regression

In [31]:
log = LogisticRegression()
fit_predict(train, test, y_train, y_test, StandardScaler(), log)

Accuracy: 0.8244197780020182
Confusion Matrix: [[680  56]
 [118 137]]
Classification Report:               precision    recall  f1-score   support

           0       0.85      0.92      0.89       736
           1       0.71      0.54      0.61       255

    accuracy                           0.82       991
   macro avg       0.78      0.73      0.75       991
weighted avg       0.82      0.82      0.82       991

ROC_AUC_Score: 0.7305839727195226


### Decision Tree

In [32]:
dt = DecisionTreeClassifier()
fit_predict(train, test, y_train, y_test, StandardScaler(), dt)

Accuracy: 0.7921291624621595
Confusion Matrix: [[639  97]
 [109 146]]
Classification Report:               precision    recall  f1-score   support

           0       0.85      0.87      0.86       736
           1       0.60      0.57      0.59       255

    accuracy                           0.79       991
   macro avg       0.73      0.72      0.72       991
weighted avg       0.79      0.79      0.79       991

ROC_AUC_Score: 0.7203777706734867


### Random Forest

In [33]:
rf = RandomForestClassifier()
fit_predict(train, test, y_train, y_test, StandardScaler(), rf)

Accuracy: 0.8284561049445005
Confusion Matrix: [[678  58]
 [112 143]]
Classification Report:               precision    recall  f1-score   support

           0       0.86      0.92      0.89       736
           1       0.71      0.56      0.63       255

    accuracy                           0.83       991
   macro avg       0.78      0.74      0.76       991
weighted avg       0.82      0.83      0.82       991

ROC_AUC_Score: 0.7409899829497018


### KNN Alogrithm

In [34]:
knn = KNeighborsClassifier(n_neighbors=1)
fit_predict(train, test, y_train, y_test, StandardScaler(), knn)

Accuracy: 0.7618567103935419
Confusion Matrix: [[622 114]
 [122 133]]
Classification Report:               precision    recall  f1-score   support

           0       0.84      0.85      0.84       736
           1       0.54      0.52      0.53       255

    accuracy                           0.76       991
   macro avg       0.69      0.68      0.69       991
weighted avg       0.76      0.76      0.76       991

ROC_AUC_Score: 0.6833386615515772


## Using GridSearchCV

In [35]:
def gridSearchCV(train, y_train, scaler, params, model):
    train_scaled = scaler.fit_transform(train) 
    gsv = GridSearchCV(model, params, verbose = 3)
    gsv.fit(train_scaled, y_train)
    return [gsv.best_params_, gsv.best_estimator_]


#### Random Forest

In [36]:
params = {'n_estimators': [200,500,700], 'max_depth': [10,15,18,20], 'min_samples_leaf': [3,5,7]}
gridResult = gridSearchCV(train, y_train, StandardScaler(), params, rf)

Fitting 5 folds for each of 36 candidates, totalling 180 fits
[CV] max_depth=10, min_samples_leaf=3, n_estimators=200 ..............


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  max_depth=10, min_samples_leaf=3, n_estimators=200, score=0.854, total=   3.7s
[CV] max_depth=10, min_samples_leaf=3, n_estimators=200 ..............


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    3.6s remaining:    0.0s


[CV]  max_depth=10, min_samples_leaf=3, n_estimators=200, score=0.856, total=   3.3s
[CV] max_depth=10, min_samples_leaf=3, n_estimators=200 ..............


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    6.9s remaining:    0.0s


[CV]  max_depth=10, min_samples_leaf=3, n_estimators=200, score=0.841, total=   3.4s
[CV] max_depth=10, min_samples_leaf=3, n_estimators=200 ..............
[CV]  max_depth=10, min_samples_leaf=3, n_estimators=200, score=0.859, total=   3.8s
[CV] max_depth=10, min_samples_leaf=3, n_estimators=200 ..............
[CV]  max_depth=10, min_samples_leaf=3, n_estimators=200, score=0.860, total=   3.6s
[CV] max_depth=10, min_samples_leaf=3, n_estimators=500 ..............
[CV]  max_depth=10, min_samples_leaf=3, n_estimators=500, score=0.859, total=  11.2s
[CV] max_depth=10, min_samples_leaf=3, n_estimators=500 ..............
[CV]  max_depth=10, min_samples_leaf=3, n_estimators=500, score=0.855, total=  12.2s
[CV] max_depth=10, min_samples_leaf=3, n_estimators=500 ..............
[CV]  max_depth=10, min_samples_leaf=3, n_estimators=500, score=0.841, total=  10.5s
[CV] max_depth=10, min_samples_leaf=3, n_estimators=500 ..............
[CV]  max_depth=10, min_samples_leaf=3, n_estimators=500, score=

[CV]  max_depth=15, min_samples_leaf=3, n_estimators=700, score=0.859, total=  19.1s
[CV] max_depth=15, min_samples_leaf=3, n_estimators=700 ..............
[CV]  max_depth=15, min_samples_leaf=3, n_estimators=700, score=0.856, total=  17.8s
[CV] max_depth=15, min_samples_leaf=3, n_estimators=700 ..............
[CV]  max_depth=15, min_samples_leaf=3, n_estimators=700, score=0.843, total=  12.8s
[CV] max_depth=15, min_samples_leaf=3, n_estimators=700 ..............
[CV]  max_depth=15, min_samples_leaf=3, n_estimators=700, score=0.866, total=  12.9s
[CV] max_depth=15, min_samples_leaf=3, n_estimators=700 ..............
[CV]  max_depth=15, min_samples_leaf=3, n_estimators=700, score=0.856, total=  14.2s
[CV] max_depth=15, min_samples_leaf=5, n_estimators=200 ..............
[CV]  max_depth=15, min_samples_leaf=5, n_estimators=200, score=0.852, total=   3.9s
[CV] max_depth=15, min_samples_leaf=5, n_estimators=200 ..............
[CV]  max_depth=15, min_samples_leaf=5, n_estimators=200, score=

[CV]  max_depth=18, min_samples_leaf=5, n_estimators=200, score=0.865, total=   3.6s
[CV] max_depth=18, min_samples_leaf=5, n_estimators=200 ..............
[CV]  max_depth=18, min_samples_leaf=5, n_estimators=200, score=0.850, total=   3.7s
[CV] max_depth=18, min_samples_leaf=5, n_estimators=500 ..............
[CV]  max_depth=18, min_samples_leaf=5, n_estimators=500, score=0.855, total=   9.0s
[CV] max_depth=18, min_samples_leaf=5, n_estimators=500 ..............
[CV]  max_depth=18, min_samples_leaf=5, n_estimators=500, score=0.859, total=   9.2s
[CV] max_depth=18, min_samples_leaf=5, n_estimators=500 ..............
[CV]  max_depth=18, min_samples_leaf=5, n_estimators=500, score=0.841, total=   9.9s
[CV] max_depth=18, min_samples_leaf=5, n_estimators=500 ..............
[CV]  max_depth=18, min_samples_leaf=5, n_estimators=500, score=0.864, total=  10.6s
[CV] max_depth=18, min_samples_leaf=5, n_estimators=500 ..............
[CV]  max_depth=18, min_samples_leaf=5, n_estimators=500, score=

[CV]  max_depth=20, min_samples_leaf=5, n_estimators=700, score=0.856, total=  18.3s
[CV] max_depth=20, min_samples_leaf=5, n_estimators=700 ..............
[CV]  max_depth=20, min_samples_leaf=5, n_estimators=700, score=0.841, total=  13.7s
[CV] max_depth=20, min_samples_leaf=5, n_estimators=700 ..............
[CV]  max_depth=20, min_samples_leaf=5, n_estimators=700, score=0.864, total=  14.8s
[CV] max_depth=20, min_samples_leaf=5, n_estimators=700 ..............
[CV]  max_depth=20, min_samples_leaf=5, n_estimators=700, score=0.856, total=  17.0s
[CV] max_depth=20, min_samples_leaf=7, n_estimators=200 ..............
[CV]  max_depth=20, min_samples_leaf=7, n_estimators=200, score=0.856, total=   6.5s
[CV] max_depth=20, min_samples_leaf=7, n_estimators=200 ..............
[CV]  max_depth=20, min_samples_leaf=7, n_estimators=200, score=0.856, total=   7.2s
[CV] max_depth=20, min_samples_leaf=7, n_estimators=200 ..............
[CV]  max_depth=20, min_samples_leaf=7, n_estimators=200, score=

[Parallel(n_jobs=1)]: Done 180 out of 180 | elapsed: 29.9min finished


In [37]:
fit_predict(train, test, y_train, y_test, StandardScaler(), gridResult[1])

Accuracy: 0.8375378405650857
Confusion Matrix: [[699  37]
 [124 131]]
Classification Report:               precision    recall  f1-score   support

           0       0.85      0.95      0.90       736
           1       0.78      0.51      0.62       255

    accuracy                           0.84       991
   macro avg       0.81      0.73      0.76       991
weighted avg       0.83      0.84      0.83       991

ROC_AUC_Score: 0.7317268755328218


#### KNN Algorithm

In [38]:
params = {'n_neighbors': range(1,40)}
gridResult = gridSearchCV(train, y_train, StandardScaler(), params, knn)

Fitting 5 folds for each of 39 candidates, totalling 195 fits
[CV] n_neighbors=1 ...................................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] ....................... n_neighbors=1, score=0.783, total=   1.6s
[CV] n_neighbors=1 ...................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.5s remaining:    0.0s


[CV] ....................... n_neighbors=1, score=0.777, total=   1.6s
[CV] n_neighbors=1 ...................................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    3.1s remaining:    0.0s


[CV] ....................... n_neighbors=1, score=0.787, total=   1.6s
[CV] n_neighbors=1 ...................................................
[CV] ....................... n_neighbors=1, score=0.790, total=   1.5s
[CV] n_neighbors=1 ...................................................
[CV] ....................... n_neighbors=1, score=0.756, total=   1.6s
[CV] n_neighbors=2 ...................................................
[CV] ....................... n_neighbors=2, score=0.798, total=   1.5s
[CV] n_neighbors=2 ...................................................
[CV] ....................... n_neighbors=2, score=0.810, total=   1.6s
[CV] n_neighbors=2 ...................................................
[CV] ....................... n_neighbors=2, score=0.833, total=   1.6s
[CV] n_neighbors=2 ...................................................
[CV] ....................... n_neighbors=2, score=0.817, total=   1.5s
[CV] n_neighbors=2 ...................................................
[CV] .

[CV] ...................... n_neighbors=13, score=0.822, total=   1.9s
[CV] n_neighbors=13 ..................................................
[CV] ...................... n_neighbors=13, score=0.812, total=   2.0s
[CV] n_neighbors=13 ..................................................
[CV] ...................... n_neighbors=13, score=0.817, total=   1.9s
[CV] n_neighbors=13 ..................................................
[CV] ...................... n_neighbors=13, score=0.835, total=   1.9s
[CV] n_neighbors=13 ..................................................
[CV] ...................... n_neighbors=13, score=0.813, total=   1.9s
[CV] n_neighbors=14 ..................................................
[CV] ...................... n_neighbors=14, score=0.822, total=   1.8s
[CV] n_neighbors=14 ..................................................
[CV] ...................... n_neighbors=14, score=0.813, total=   1.9s
[CV] n_neighbors=14 ..................................................
[CV] .

[CV] ...................... n_neighbors=24, score=0.833, total=   2.4s
[CV] n_neighbors=24 ..................................................
[CV] ...................... n_neighbors=24, score=0.817, total=   3.0s
[CV] n_neighbors=25 ..................................................
[CV] ...................... n_neighbors=25, score=0.827, total=   3.7s
[CV] n_neighbors=25 ..................................................
[CV] ...................... n_neighbors=25, score=0.823, total=   3.9s
[CV] n_neighbors=25 ..................................................
[CV] ...................... n_neighbors=25, score=0.816, total=   3.7s
[CV] n_neighbors=25 ..................................................
[CV] ...................... n_neighbors=25, score=0.832, total=   4.4s
[CV] n_neighbors=25 ..................................................
[CV] ...................... n_neighbors=25, score=0.817, total=   3.4s
[CV] n_neighbors=26 ..................................................
[CV] .

[CV] ...................... n_neighbors=36, score=0.827, total=   2.0s
[CV] n_neighbors=36 ..................................................
[CV] ...................... n_neighbors=36, score=0.819, total=   2.0s
[CV] n_neighbors=36 ..................................................
[CV] ...................... n_neighbors=36, score=0.826, total=   2.1s
[CV] n_neighbors=36 ..................................................
[CV] ...................... n_neighbors=36, score=0.826, total=   2.3s
[CV] n_neighbors=37 ..................................................
[CV] ...................... n_neighbors=37, score=0.830, total=   2.3s
[CV] n_neighbors=37 ..................................................
[CV] ...................... n_neighbors=37, score=0.825, total=   2.3s
[CV] n_neighbors=37 ..................................................
[CV] ...................... n_neighbors=37, score=0.822, total=   2.4s
[CV] n_neighbors=37 ..................................................
[CV] .

[Parallel(n_jobs=1)]: Done 195 out of 195 | elapsed:  7.8min finished


In [39]:
fit_predict(train, test, y_train, y_test, StandardScaler(), gridResult[1])

Accuracy: 0.8092835519677094
Confusion Matrix: [[679  57]
 [132 123]]
Classification Report:               precision    recall  f1-score   support

           0       0.84      0.92      0.88       736
           1       0.68      0.48      0.57       255

    accuracy                           0.81       991
   macro avg       0.76      0.70      0.72       991
weighted avg       0.80      0.81      0.80       991

ROC_AUC_Score: 0.7024536445012787
