# Imports

In [1]:
import acquire
import prepare
import encode
import evaluate_models
import features
import model

import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve, precision_score

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

# Acquire

In [2]:
telco = acquire.get_telco_data()

# Prepare

In [3]:
train, test, validate = prepare.prep_telco(telco, train_size=.8, seed=123)

## Encode

In [4]:
train, test, validate = encode.encoded_df(train, test, validate)

# Modeling

In [5]:
# Create a Function that returns the DF every time
def get_clean_df(train, validate):
    cols = ['tenure', 'contract_type_encoded', 'monthly_charges', 
            'senior_citizen', 'payment_type_encoded', 'churn_encoded',
            'phone_lines', 'dependent_partner_grouping']
    
    t = train[cols]
    v = validate[cols]
    
    return t, v

In [25]:
predictions = pd.DataFrame({'actual': validate.churn_encoded})

## Logistic Regregression

### First Log Model

In [10]:
t, v = get_clean_df(train, validate)

In [152]:
cols = ['contract_type_encoded', 
        'tenure',
        'monthly_charges']

X_train, X_validate = t[cols], v[cols]
y_train, y_validate = t.churn_encoded, v.churn_encoded

log_1 = LogisticRegression().fit(X_train, y_train)
predictions['log_1'] = log_1.predict(X_validate)
log_1_accuracy = log_1.score(X_validate, y_validate)
log_1_accuracy

0.7985803016858918

0.805678793256433

## Decision Tree 

In [46]:
t, v = get_clean_df(train, validate)

In [153]:
cols = ['contract_type_encoded', 
        'tenure',
        'monthly_charges']

X_train, X_validate = t[cols], v[cols]
y_train, y_validate = t.churn_encoded, v.churn_encoded

tree_1 = DecisionTreeClassifier(max_depth=4, random_state=123).fit(X_train, y_train)
predictions['tree_1'] = tree_1.predict(X_validate)
tree_1_accuracy = tree_1.score(X_validate, y_validate)
tree_1_accuracy

0.8003549245785271

## Random Forest

In [137]:
t, v = get_clean_df(train, validate)

In [146]:
cols = ['contract_type_encoded', 
        'dependent_partner_grouping',
        'tenure',
        'monthly_charges']

X_train, X_validate = t[cols], v[cols]
y_train, y_validate = t.churn_encoded, v.churn_encoded
forest_1 = RandomForestClassifier(n_estimators=100, 
                                  min_samples_leaf=5,
                                  max_depth=6,
                                  random_state=123).fit(X_train, y_train)
predictions['forest_1'] = forest_1.predict(X_validate)
forest_1_accuracy = forest_1.score(X_validate, y_validate)
forest_1_accuracy

0.8110026619343389

## K Nearest Neighbors 

In [139]:
t, v = get_clean_df(train, validate)

In [154]:
cols = ['contract_type_encoded', 
        'tenure',
        'monthly_charges']

X_train, X_validate = t[cols], v[cols]
y_train, y_validate = t.churn_encoded, v.churn_encoded
knn_1 = KNeighborsClassifier(n_neighbors=10).fit(X_train, y_train)
predictions['knn_1'] = knn_1.predict(X_validate)
knn_1_accuracy = knn_1.score(X_validate, y_validate)
knn_1_accuracy

0.8047914818101154

# Evaluate

## Accuracy

In [141]:
print('''
Model Accuracy Rates - 
    Logistic Regression: {: .2f} 
    Decision Tree:       {: .2f}
    Random Forest:       {: .2f}
    K Nearest Neighbor   {: .2f}
'''.format(log_1_accuracy, tree_1_accuracy, forest_1_accuracy, knn__accuracy))


Model Accuracy Rates - 
    Logistic Regression:  0.80 
    Decision Tree:        0.80
    Random Forest:        0.82
    K Nearest Neighbor    0.80



## Confusion Matrix

In [None]:
# Logistic Regression
print(confusion_matrix(predictions.actual, log_reg_model.predict(X_validate)))

In [None]:
# Decision Tree
print(confusion_matrix(predictions.actual, tree_model.predict(X_validate)))

In [None]:
# Random Forest
print(confusion_matrix(predictions.actual, forest_model.predict(X_validate)))

In [None]:
# KNN
print(confusion_matrix(predictions.actual, knn.predict(X_validate)))

# Summaries 

In [None]:
# Logistic Regression
print(classification_report(predictions.actual, log_reg_model.predict(X_validate)))

In [None]:
print(precision_score(predictions.actual, log_reg_model.predict(X_validate)))

In [None]:
# Decision Tree
print(classification_report(predictions.actual, tree_model.predict(X_validate)))

In [None]:
# Random Forest
print(classification_report(predictions.actual, forest_model.predict(X_validate)))

In [None]:
# KNN
print(classification_report(predictions.actual, knn.predict(X_validate)))

In [None]:
log_reg_model, predictions = model.create_log_reg_model(train, validate)

In [None]:
predictions.head()

In [None]:
log_reg_model.score(X_validate, y_validate)