# Imports

In [57]:
import acquire
import prepare

import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

# Acquire

In [59]:
telco = acquire.get_telco_data()

# Prepare

In [61]:
train, test, validate = prepare.prep_telco(telco, train_size=.8, seed=123)

# Modeling

In [29]:
X_train = train[['tenure', 'contract_type_encoded', 'monthly_charges', 'payment_type_encoded']]
y_train = train.churn

X_validate = validate[['tenure', 'contract_type_encoded', 'monthly_charges', 'payment_type_encoded']]
y_validate = validate.churn

In [30]:
predictions = pd.DataFrame({'actual' : y_validate})

## Logistic Regregression

In [97]:
def evaluate_threshold(t, y, probs):
    yhat = (probs > t).astype(int)
    return {
        "threshold": t,
        "precision": precision_score(y, yhat),
        "recall": recall_score(y, yhat),
        "accuracy": accuracy_score(y, yhat),
    }


def evaluate_thresholds(y, probs):
    return pd.DataFrame(
        [evaluate_threshold(t, y, probs) for t in np.arange(0, 1.01, 0.01)]
    )

In [31]:
log_reg_model = LogisticRegression(random_state=123).fit(X_train, y_train)

In [32]:
predictions['logistic Regression: churn ~ tenure, contract type, monthly charges'] = log_reg_model.predict(X_validate)

In [33]:
predictions.head()

Unnamed: 0,actual,"logistic Regression: churn ~ tenure, contract type, monthly charges"
5854,No,No
797,No,No
3188,No,No
2254,No,No
5732,No,No


In [34]:
log_reg_accuracy = log_reg_model.score(X_validate, y_validate)

## Decision Tree 

In [35]:
tree_model = DecisionTreeClassifier(max_depth=3, random_state=123).fit(X_train, y_train)

In [36]:
predictions['Decision Tree: churn ~ tenure, contract type, monthly charges'] = tree_model.predict(X_validate)

In [37]:
predictions.head()

Unnamed: 0,actual,"logistic Regression: churn ~ tenure, contract type, monthly charges","Decision Tree: churn ~ tenure, contract type, monthly charges"
5854,No,No,No
797,No,No,No
3188,No,No,No
2254,No,No,No
5732,No,No,No


In [38]:
tree_accuracy = tree_model.score(X_validate, y_validate)

## Random Forest

In [39]:
forest_model = RandomForestClassifier(n_estimators=10, random_state=123).fit(X_train, y_train)

In [40]:
predictions['Random Forest: churn ~ tenure, contract type, monthly charges'] = forest_model.predict(X_validate)

In [41]:
forest_accuracy = forest_model.score(X_validate, y_validate)

## K Nearest Neighbors 

In [42]:
knn = KNeighborsClassifier(n_neighbors=5).fit(X_train, y_train)

In [43]:
predictions['KNN: churn ~ tenure, contract type, monthly charges'] = knn.predict(X_validate)

In [44]:
knn_accuracy = knn.score(X_train, y_train)

In [45]:
predictions.head()

Unnamed: 0,actual,"logistic Regression: churn ~ tenure, contract type, monthly charges","Decision Tree: churn ~ tenure, contract type, monthly charges","Random Forest: churn ~ tenure, contract type, monthly charges","KNN: churn ~ tenure, contract type, monthly charges"
5854,No,No,No,No,No
797,No,No,No,No,No
3188,No,No,No,No,No
2254,No,No,No,No,No
5732,No,No,No,No,No


In [46]:
predictions[['actual', 'KNN: churn ~ tenure, contract type, monthly charges']]

Unnamed: 0,actual,"KNN: churn ~ tenure, contract type, monthly charges"
5854,No,No
797,No,No
3188,No,No
2254,No,No
5732,No,No
...,...,...
4811,No,No
6879,No,No
5366,Yes,No
1301,No,No


In [47]:
predictions.to_csv('model_output.csv')

# Evaluate

## Accuracy

In [63]:
print('''
Model Accuracy Rates - 
    Logistic Regression: {: .2f} 
    Decision Tree:       {: .2f}
    Random Forest:       {: .2f}
    K Nearest Neighbor   {: .2f}
'''.format(log_reg_accuracy, tree_accuracy, forest_accuracy, knn_accuracy))


Model Accuracy Rates - 
    Logistic Regression:  0.80 
    Decision Tree:        0.80
    Random Forest:        0.79
    K Nearest Neighbor    0.83



## Confusion Matrix

In [49]:
# Logistic Regression
print(confusion_matrix(predictions.actual, log_reg_model.predict(X_validate)))

[[767  78]
 [146 136]]


In [50]:
# Decision Tree
print(confusion_matrix(predictions.actual, tree_model.predict(X_validate)))

[[788  57]
 [170 112]]


In [51]:
# Random Forest
print(confusion_matrix(predictions.actual, forest_model.predict(X_validate)))

[[752  93]
 [144 138]]


In [52]:
# KNN
print(confusion_matrix(predictions.actual, knn.predict(X_validate)))

[[746  99]
 [143 139]]


# Summaries 

In [53]:
# Logistic Regression
print(classification_report(predictions.actual, log_reg_model.predict(X_validate)))

              precision    recall  f1-score   support

          No       0.84      0.91      0.87       845
         Yes       0.64      0.48      0.55       282

    accuracy                           0.80      1127
   macro avg       0.74      0.69      0.71      1127
weighted avg       0.79      0.80      0.79      1127



In [54]:
# Decision Tree
print(classification_report(predictions.actual, tree_model.predict(X_validate)))

              precision    recall  f1-score   support

          No       0.82      0.93      0.87       845
         Yes       0.66      0.40      0.50       282

    accuracy                           0.80      1127
   macro avg       0.74      0.66      0.69      1127
weighted avg       0.78      0.80      0.78      1127



In [55]:
# Random Forest
print(classification_report(predictions.actual, forest_model.predict(X_validate)))

              precision    recall  f1-score   support

          No       0.84      0.89      0.86       845
         Yes       0.60      0.49      0.54       282

    accuracy                           0.79      1127
   macro avg       0.72      0.69      0.70      1127
weighted avg       0.78      0.79      0.78      1127



In [56]:
# KNN
print(classification_report(predictions.actual, knn.predict(X_validate)))

              precision    recall  f1-score   support

          No       0.84      0.88      0.86       845
         Yes       0.58      0.49      0.53       282

    accuracy                           0.79      1127
   macro avg       0.71      0.69      0.70      1127
weighted avg       0.78      0.79      0.78      1127

