# Reports
1. Inputs: Prepared data to ml modeling
2. Outputs: Trained models and metrics
3. ...

# 1.0 Imports

###### 1.1 Importing libraries

In [1]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score, roc_curve, confusion_matrix
from sklearn.ensemble import RandomForestClassifier

from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import NearMiss

from xgboost import XGBRFClassifier

import pickle

import time

tempo_agora = time.time() # Contabilizando o tempo de processamento do projeto

  from pandas import MultiIndex, Int64Index


###### 1.2 Importing data

In [2]:
X_to_modeling = pickle.load(open('D://My Drive//Pessoal//Projetos//bank_churn_predict//bank_churn_predict//exportings//X_to_modeling.pkl', 'rb'))
y_to_modeling = pickle.load(open('D://My Drive//Pessoal//Projetos//bank_churn_predict//bank_churn_predict//exportings//y_to_modeling.pkl', 'rb'))

In [3]:
X_valid = X_to_modeling[0]
X_test = X_to_modeling[1]
X_smote = X_to_modeling[2]
X_train_smote = X_to_modeling[3]
X_near = X_to_modeling[4]
X_train_near = X_to_modeling[5]

y_valid = y_to_modeling[0]
y_test = y_to_modeling[1]
y_smote = y_to_modeling[2]
y_train_smote = y_to_modeling[3]
y_near = y_to_modeling[4]
y_train_near = y_to_modeling[5]

# 2.0 ML Modeling

### 2.1 Xgbrfboost

In [4]:
# Modeling
xgbrf_model = XGBRFClassifier(use_label_encoder=False)
xgbrf_model_fit = xgbrf_model.fit(X_smote, y_smote, eval_metric='error')
xgbrf_predict = xgbrf_model_fit.predict(X_valid)

print('valid')
print(classification_report(y_valid, xgbrf_predict))
print(roc_auc_score(y_valid, xgbrf_predict))
print()
print(confusion_matrix(y_valid, xgbrf_predict))
print('\nteste')
xgbrf_predict = xgbrf_model_fit.predict(X_test)
print(classification_report(y_test, xgbrf_predict))
print(roc_auc_score(y_test, xgbrf_predict))
print()
print(confusion_matrix(y_test, xgbrf_predict))

pickle.dump(xgbrf_model_fit, open('D://My Drive//Pessoal//Projetos//bank_churn_predict//deploy//model//xgbrf_model.pkl', 'wb'))

valid
              precision    recall  f1-score   support

           0       0.89      0.93      0.91      1923
           1       0.65      0.53      0.58       477

    accuracy                           0.85      2400
   macro avg       0.77      0.73      0.74      2400
weighted avg       0.84      0.85      0.84      2400

0.728001321310714

[[1788  135]
 [ 226  251]]

teste
              precision    recall  f1-score   support

           0       0.89      0.93      0.91      1577
           1       0.67      0.56      0.61       423

    accuracy                           0.85      2000
   macro avg       0.78      0.74      0.76      2000
weighted avg       0.84      0.85      0.84      2000

0.7430460925448715

[[1460  117]
 [ 186  237]]


### 2.2 Random Forest

In [5]:
rf_model = RandomForestClassifier(n_estimators=100, 
                                  max_depth=7, 
                                  max_features=0.3, 
                                  random_state=101)
rf_model_fit = rf_model.fit(X_smote, y_smote)
rf_predict = rf_model_fit.predict(X_valid)

print('valid')
print(classification_report(y_valid, rf_predict))
print(roc_auc_score(y_valid, rf_predict))
print()
print(confusion_matrix(y_valid, rf_predict))
print('\nteste')
rf_predict = rf_model_fit.predict(X_test)
print(classification_report(y_test, rf_predict))
print(roc_auc_score(y_test, rf_predict))
print()
print(confusion_matrix(y_test, rf_predict))

pickle.dump(rf_model_fit, open('D://My Drive//Pessoal//Projetos//bank_churn_predict//deploy//model//rf_model.pkl', 'wb'))

valid
              precision    recall  f1-score   support

           0       0.89      0.94      0.91      1923
           1       0.68      0.55      0.61       477

    accuracy                           0.86      2400
   macro avg       0.79      0.74      0.76      2400
weighted avg       0.85      0.86      0.85      2400

0.7439600728683234

[[1801  122]
 [ 214  263]]

teste
              precision    recall  f1-score   support

           0       0.89      0.93      0.91      1577
           1       0.67      0.56      0.61       423

    accuracy                           0.85      2000
   macro avg       0.78      0.74      0.76      2000
weighted avg       0.84      0.85      0.84      2000

0.7448622410508028

[[1462  115]
 [ 185  238]]


# Time

In [7]:
print(f'O tempo de processamento do projeto foi de: {int(round(time.time()-tempo_agora, 2)/60)} minutos')

O tempo de processamento do projeto foi de: 0 minutos
