# Reports
1. Inputs: Prepared data to ml modeling
2. Outputs: Trained models and metrics
3. ...

# 1.0 Imports

###### 1.1 Importing libraries

In [1]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score, roc_curve
from sklearn.ensemble import RandomForestClassifier

from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import NearMiss

from xgboost import XGBRFClassifier

import pickle

import time

tempo_agora = time.time() # Contabilizando o tempo de processamento do projeto

###### 1.2 Importing data

In [2]:
X_to_modeling = pickle.load(open('D://My Drive//Pessoal//Projetos//bank_churn_predict//bank_churn_predict//exportings//X_to_modeling.pkl', 'rb'))
y_to_modeling = pickle.load(open('D://My Drive//Pessoal//Projetos//bank_churn_predict//bank_churn_predict//exportings//y_to_modeling.pkl', 'rb'))

In [3]:
X_valid = X_to_modeling[0]
X_test = X_to_modeling[1]
X_smote = X_to_modeling[2]
X_train_smote = X_to_modeling[3]
X_near = X_to_modeling[4]
X_train_near = X_to_modeling[5]

y_valid = y_to_modeling[0]
y_test = y_to_modeling[1]
y_smote = y_to_modeling[2]
y_train_smote = y_to_modeling[3]
y_near = y_to_modeling[4]
y_train_near = y_to_modeling[5]

# 2.0 ML Modeling

### 2.1 Xgbrfboost

In [4]:
# Modeling
xgbrf_model = XGBRFClassifier(use_label_encoder=False)
xgbrf_model_fit = xgbrf_model.fit(X_smote, y_smote, eval_metric='error')
xgbrf_predict = xgbrf_model_fit.predict(X_valid)

print(classification_report(y_valid, xgbrf_predict))
print(roc_auc_score(y_valid, xgbrf_predict))

              precision    recall  f1-score   support

           0       0.91      0.85      0.88      1923
           1       0.51      0.64      0.57       477

    accuracy                           0.81      2400
   macro avg       0.71      0.74      0.72      2400
weighted avg       0.83      0.81      0.81      2400

0.7443116592588231


### 2.2 Random Forest

In [5]:
rf_model = RandomForestClassifier(n_estimators=100, 
                                  max_depth=7, 
                                  max_features=0.3, 
                                  random_state=101)
rf_model_fit = rf_model.fit(X_smote, y_smote)
rf_predict = rf_model_fit.predict(X_valid)

print(classification_report(y_valid, rf_predict))
print(roc_auc_score(y_valid, rf_predict))

              precision    recall  f1-score   support

           0       0.92      0.86      0.89      1923
           1       0.54      0.68      0.61       477

    accuracy                           0.82      2400
   macro avg       0.73      0.77      0.75      2400
weighted avg       0.84      0.82      0.83      2400

0.7699480306256276


# 3.0 Exporting Data

###### 3.1 Models

In [6]:
# XGRFboost
pickle.dump(xgbrf_model_fit, open('D://My Drive//Pessoal//Projetos//bank_churn_predict//deploy//model//xgbrf_model.pkl', 'wb'))

# Random Forest
pickle.dump(rf_model_fit, open('D://My Drive//Pessoal//Projetos//bank_churn_predict//deploy//model//rf_model.pkl', 'wb'))

# Time

In [7]:
print(f'O tempo de processamento do projeto foi de: {int(round(time.time()-tempo_agora, 2)/60)} minutos')

O tempo de processamento do projeto foi de: 0 minutos
