In [4]:
# importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
%matplotlib inline 

# preprocessing
from sklearn.preprocessing import StandardScaler

# models
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

# model tuning
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold

# ensembles
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import GradientBoostingClassifier


# class imbalance
from sklearn.dummy import DummyClassifier
from sklearn.utils import resample
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import TomekLinks

# evaluating models
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, confusion_matrix

import warnings
warnings.filterwarnings('ignore')

df = pd.read_csv('../data/preprocessed_cutomer_data.csv',index_col=0)
extra_features = pd.read_csv('../data/new_features.csv',index_col=0)

In [5]:
# train test split
X = df.drop(columns=['churn'])
y = df['churn']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=.20,random_state=10)

In [6]:
# scaling
scaler = StandardScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train),columns=[X_train.columns])
X_test_scaled = pd.DataFrame(scaler.transform(X_test),columns=[X_test.columns])

In [7]:
# using SMOTE to reduce class imbalance
sm = SMOTE(random_state=64)
X_train_SM, y_train_SM = sm.fit_sample(X_train_scaled, y_train)
X_train_SM = pd.DataFrame(X_train_SM,columns=X_train_scaled.columns)

## Modeling

In [1]:
models_data = []
def scoreboard(classifier,y_true, y_pred):
    ''' 
    returns classifier name, params and
    calculates and returns Accuracy, F1 score, Precision, and Recall using sklearn
    '''    
    model_summary = {'Model': classifier,
                     'Params': classifier.get_params(),
                     'Accuracy':accuracy_score(y_true, y_pred),
                     'F1_score':f1_score(y_true, y_pred),
                     'Precision':precision_score(y_true, y_pred),
                     'Recall':recall_score(y_true, y_pred)}

    models_data.append(model_summary)
    return model_summary

def justscore(y_true, y_pred):
    ''' 
    
    calculates and returns Accuracy, F1 score, Precision, and Recall using sklearn
    '''    
    model_summary = {
                     
                     'Accuracy':accuracy_score(y_true, y_pred),
                     'F1_score':f1_score(y_true, y_pred),
                     'Precision':precision_score(y_true, y_pred),
                     'Recall':recall_score(y_true, y_pred)}


    return model_summary