In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from numpy import arange
from sklearn import preprocessing
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from sklearn.model_selection import cross_validate
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import make_scorer, accuracy_score, f1_score
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from fpdf import FPDF 
from sklearn.pipeline import Pipeline
import pickle
import datetime
from datetime import datetime as dt
from datetime import timedelta
from keras.models import model_from_json

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.naive_bayes import GaussianNB

#from imblearn.over_sampling import SMOTE

In [None]:
%matplotlib inline
plt.style.use('fivethirtyeight')
pd.options.display.max_columns = None
pd.options.display.max_rows = None

In [None]:
df_churn = pd.read_pickle('./data/training/churn.pickle')

In [None]:
df_churn.head()

In [None]:
df = df_churn.copy()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.columns

In [None]:
df.describe()

In [None]:
X = df.drop('CHURN_STATUS', axis=1)
y = df['CHURN_STATUS'].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train,y_train,test_size=0.25, random_state=0)

In [None]:
tstart = dt.now()
print("Starting ------ GaussianNB")

pipe = Pipeline(steps=[
    ('gaussiannbclassifier', GaussianNB())
])
n_samples = 100
param_grid ={
    'gaussiannbclassifier__var_smoothing':[0.015, 100.5],
    #'gaussiannbclassifier__priors':[n_samples],
   
}

model=GridSearchCV(estimator=pipe,
                         param_grid=param_grid,
                         scoring='roc_auc', 
                         n_jobs=-1,
                         pre_dispatch='2*n_jobs', 
                         cv=5, 
                         verbose=1,
                         return_train_score=False)

model.fit(X_train,y_train)

pkl_filename = "./models/gaussiannbclassifier.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(model, file)

X_val_np = X_val.to_numpy()
predicted = model.predict(X_val_np)
proba = model.predict_proba(X_val_np)

accuracy=accuracy_score(y_val, predicted)

CM = confusion_matrix(y_val, predicted)
(TN,FN,TP,FP) = (CM[0][0],CM[1][0],CM[1][1],CM[0][1])
FPR = FP/(FP+TN)
recall = TP/(TP+FN)
precision = TP / (TP+FP)
f1 = 2*(recall*precision)/(recall+precision)
tend = dt.now()
delta = tend-tstart


print("Best Params-")
print(model.best_params_)

print("\n")
print("Algorithm: ", 'gaussiannbclassifier')
print("Accuracy: ",accuracy)
print("Recall: ", recall)
print("F1-support: ", f1)
print("FPR: ", FPR)
print("Runtime: ", delta)

print("Best Params-")
print(model.best_params_)