In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import roc_auc_score, classification_report, confusion_matrix
from sklearn.naive_bayes import GaussianNB
import warnings
warnings.filterwarnings('ignore')

In [2]:
x_train = np.load('/home/jaco/Documentos/Projetos/SantanderCustomer/x_train.npy')
y_train = np.load('/home/jaco/Documentos/Projetos/SantanderCustomer/y_train.npy')
x_test = np.load('/home/jaco/Documentos/Projetos/SantanderCustomer/x_test.npy')
y_test = np.load('/home/jaco/Documentos/Projetos/SantanderCustomer/y_test.npy')
x_train_norm = np.load('/home/jaco/Documentos/Projetos/SantanderCustomer/x_train_norm.npy')
x_test_norm = np.load('/home/jaco/Documentos/Projetos/SantanderCustomer/x_test_norm.npy')
x_train_stand = np.load('/home/jaco/Documentos/Projetos/SantanderCustomer/x_train_stand.npy')
x_test_stand = np.load('/home/jaco/Documentos/Projetos/SantanderCustomer/x_test_stand.npy')

In [3]:
Classifiers = {
    "Logistic Regression": LogisticRegression(),
    "XGBoost": GradientBoostingClassifier(),
    "Random Forest": RandomForestClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "Naive Bayes": GaussianNB()
}

In [5]:
for key, classifier in Classifiers.items():
    classifier_fitted = classifier.fit(x_train, y_train)
    y_pred = classifier_fitted.predict(x_test)
    print('\n Classifier: ', classifier.__class__.__name__,'\n', classification_report(y_test, y_pred))
    print('\n Confusion Matrix: ', classifier.__class__.__name__,'\n', confusion_matrix(y_test, y_pred))
    print('\n ROC Score: ', classifier.__class__.__name__, '\n', roc_auc_score(y_test,y_pred))


 Classifier:  LogisticRegression 
               precision    recall  f1-score   support

         0.0       0.73      0.72      0.72     53998
         1.0       0.72      0.73      0.73     53944

    accuracy                           0.72    107942
   macro avg       0.73      0.72      0.72    107942
weighted avg       0.73      0.72      0.72    107942


 Confusion Matrix:  LogisticRegression 
 [[38874 15124]
 [14562 39382]]

 ROC Score:  LogisticRegression 
 0.7249844705636299

 Classifier:  GradientBoostingClassifier 
               precision    recall  f1-score   support

         0.0       0.78      0.76      0.77     53998
         1.0       0.76      0.78      0.77     53944

    accuracy                           0.77    107942
   macro avg       0.77      0.77      0.77    107942
weighted avg       0.77      0.77      0.77    107942


 Confusion Matrix:  GradientBoostingClassifier 
 [[40971 13027]
 [11722 42222]]

 ROC Score:  GradientBoostingClassifier 
 0.7707254512318

In [5]:
for key, classifier in Classifiers.items():
    classifier_fitted = classifier.fit(x_train_norm, y_train)
    y_pred = classifier_fitted.predict(x_test_norm)
    print('\n Classifier: ', classifier.__class__.__name__,'\n', classification_report(y_test, y_pred))
    print('\n Confusion Matrix: ', classifier.__class__.__name__,'\n', confusion_matrix(y_test, y_pred))
    print('\n ROC Score: ', classifier.__class__.__name__, '\n', roc_auc_score(y_test,y_pred))


 Classifier:  LogisticRegression 
               precision    recall  f1-score   support

         0.0       0.74      0.71      0.72     53998
         1.0       0.72      0.74      0.73     53944

    accuracy                           0.73    107942
   macro avg       0.73      0.73      0.73    107942
weighted avg       0.73      0.73      0.73    107942


 Confusion Matrix:  LogisticRegression 
 [[38526 15472]
 [13811 40133]]

 ROC Score:  LogisticRegression 
 0.7287230514335077

 Classifier:  GradientBoostingClassifier 
               precision    recall  f1-score   support

         0.0       0.76      0.79      0.77     53998
         1.0       0.78      0.75      0.76     53944

    accuracy                           0.77    107942
   macro avg       0.77      0.77      0.77    107942
weighted avg       0.77      0.77      0.77    107942


 Confusion Matrix:  GradientBoostingClassifier 
 [[42548 11450]
 [13476 40468]]

 ROC Score:  GradientBoostingClassifier 
 0.7690702434384

In [6]:
for key, classifier in Classifiers.items():
    classifier_fitted = classifier.fit(x_train_stand, y_train)
    y_pred = classifier_fitted.predict(x_test_stand)
    print('\n Classifier: ', classifier.__class__.__name__,'\n', classification_report(y_test, y_pred))
    print('\n Confusion Matrix: ', classifier.__class__.__name__,'\n', confusion_matrix(y_test, y_pred))
    print('\n ROC Score: ', classifier.__class__.__name__, '\n', roc_auc_score(y_test,y_pred))


 Classifier:  LogisticRegression 
               precision    recall  f1-score   support

         0.0       0.73      0.72      0.73     53998
         1.0       0.73      0.74      0.73     53944

    accuracy                           0.73    107942
   macro avg       0.73      0.73      0.73    107942
weighted avg       0.73      0.73      0.73    107942


 Confusion Matrix:  LogisticRegression 
 [[38958 15040]
 [14259 39685]]

 ROC Score:  LogisticRegression 
 0.7285707451899902

 Classifier:  GradientBoostingClassifier 
               precision    recall  f1-score   support

         0.0       0.78      0.76      0.77     53998
         1.0       0.76      0.78      0.77     53944

    accuracy                           0.77    107942
   macro avg       0.77      0.77      0.77    107942
weighted avg       0.77      0.77      0.77    107942


 Confusion Matrix:  GradientBoostingClassifier 
 [[41003 12995]
 [11655 42289]]

 ROC Score:  GradientBoostingClassifier 
 0.7716427728877

In [4]:
for key, classifier in Classifiers.items():
    classifier_fitted = classifier.fit(x_train, y_train)
    y_pred = classifier_fitted.predict(x_test)
    print('\n Classifier: ', classifier.__class__.__name__,'\n', classification_report(y_test, y_pred))
    print('\n Confusion Matrix: ', classifier.__class__.__name__,'\n', confusion_matrix(y_test, y_pred))
    print('\n ROC Score: ', classifier.__class__.__name__, '\n', roc_auc_score(y_test,y_pred))


 Classifier:  GaussianNB 
               precision    recall  f1-score   support

         0.0       0.78      0.77      0.78     53998
         1.0       0.77      0.78      0.78     53944

    accuracy                           0.78    107942
   macro avg       0.78      0.78      0.78    107942
weighted avg       0.78      0.78      0.78    107942


 Confusion Matrix:  GaussianNB 
 [[41749 12249]
 [11941 42003]]

 ROC Score:  GaussianNB 
 0.7758995389077884


In [5]:
for key, classifier in Classifiers.items():
    classifier_fitted = classifier.fit(x_train_norm, y_train)
    y_pred = classifier_fitted.predict(x_test_norm)
    print('\n Classifier: ', classifier.__class__.__name__,'\n', classification_report(y_test, y_pred))
    print('\n Confusion Matrix: ', classifier.__class__.__name__,'\n', confusion_matrix(y_test, y_pred))
    print('\n ROC Score: ', classifier.__class__.__name__, '\n', roc_auc_score(y_test,y_pred))


 Classifier:  GaussianNB 
               precision    recall  f1-score   support

         0.0       0.76      0.80      0.78     53998
         1.0       0.79      0.75      0.77     53944

    accuracy                           0.77    107942
   macro avg       0.78      0.77      0.77    107942
weighted avg       0.78      0.77      0.77    107942


 Confusion Matrix:  GaussianNB 
 [[43267 10731]
 [13640 40304]]

 ROC Score:  GaussianNB 
 0.7742078025124126


In [6]:
for key, classifier in Classifiers.items():
    classifier_fitted = classifier.fit(x_train_stand, y_train)
    y_pred = classifier_fitted.predict(x_test_stand)
    print('\n Classifier: ', classifier.__class__.__name__,'\n', classification_report(y_test, y_pred))
    print('\n Confusion Matrix: ', classifier.__class__.__name__,'\n', confusion_matrix(y_test, y_pred))
    print('\n ROC Score: ', classifier.__class__.__name__, '\n', roc_auc_score(y_test,y_pred))


 Classifier:  GaussianNB 
               precision    recall  f1-score   support

         0.0       0.78      0.77      0.77     53998
         1.0       0.77      0.78      0.78     53944

    accuracy                           0.78    107942
   macro avg       0.78      0.78      0.78    107942
weighted avg       0.78      0.78      0.78    107942


 Confusion Matrix:  GaussianNB 
 [[41647 12351]
 [11840 42104]]

 ROC Score:  GaussianNB 
 0.7758912154962683
