# Imports

In [None]:
import acquire
import prepare
import encode
import evaluate_models
import features
import model

import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve, precision_score

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

# Acquire

In [None]:
telco = acquire.get_telco_data()

# Prepare

In [None]:
train, test, validate = prepare.prep_telco(telco, train_size=.8, seed=123)

## Encode

In [None]:
train, test, validate = encode.encoded_df(train, test, validate)

# Feature Engineering

In [None]:
# Combine online_features (online security, device protection, tech support)

In [None]:
train['online_features']    = train.online_security_encoded + train.device_protection_encoded + train.tech_support_encoded
validate['online_features'] = validate.online_security_encoded + validate.device_protection_encoded + validate.tech_support_encoded
test['online_features']     = test.online_security_encoded + test.device_protection_encoded + test.tech_support_encoded

# Modeling

In [None]:
X_train = train[['tenure', 'contract_type_encoded', 'monthly_charges', 'payment_type_encoded']]
y_train = train.churn_encoded

X_validate = validate[['tenure', 'contract_type_encoded', 'monthly_charges', 'payment_type_encoded']]
y_validate = validate.churn_encoded            

In [None]:
predictions = pd.DataFrame({'actual' : y_validate})

## Logistic Regregression

In [None]:
log_reg_model = LogisticRegression(random_state=123).fit(X_train, y_train)

In [None]:
log_reg_predictions = log_reg_model.predict(X_validate)

In [None]:
predictions['logistic Regression: churn ~ tenure, contract type, monthly charges'] = log_reg_predictions

In [None]:
log_reg_accuracy = log_reg_model.score(X_validate, y_validate)
log_reg_accuracy

## Identify/Plot Thresholds

In [None]:
predictions.shape

In [None]:
probabilities = log_reg_model.predict_proba(X_validate)[:, 1]

In [None]:
thresholds = evaluate_models.evaluate_thresholds(predictions.actual, probabilities)

In [None]:
thresholds[thresholds.threshold == .73]

In [None]:
evaluate_models.plot_metrics_by_thresholds(predictions.actual, probabilities)

In [None]:
predictions.head()

## Decision Tree 

In [None]:
tree_model = DecisionTreeClassifier(max_depth=3, random_state=123).fit(X_train, y_train)

In [None]:
predictions['Decision Tree: churn ~ tenure, contract type, monthly charges'] = tree_model.predict(X_validate)

In [None]:
predictions.head()

In [None]:
tree_accuracy = tree_model.score(X_validate, y_validate)

## Random Forest

In [None]:
forest_model = RandomForestClassifier(n_estimators=10, random_state=123).fit(X_train, y_train)

In [None]:
predictions['Random Forest: churn ~ tenure, contract type, monthly charges'] = forest_model.predict(X_validate)

In [None]:
forest_accuracy = forest_model.score(X_validate, y_validate)

## K Nearest Neighbors 

In [None]:
knn = KNeighborsClassifier(n_neighbors=5).fit(X_train, y_train)

In [None]:
predictions['KNN: churn ~ tenure, contract type, monthly charges'] = knn.predict(X_validate)

In [None]:
knn_accuracy = knn.score(X_train, y_train)

In [None]:
predictions.head()

In [None]:
predictions[['actual', 'KNN: churn ~ tenure, contract type, monthly charges']]

In [None]:
predictions.to_csv('model_output.csv')

# Evaluate

## Accuracy

In [None]:
print('''
Model Accuracy Rates - 
    Logistic Regression: {: .2f} 
    Decision Tree:       {: .2f}
    Random Forest:       {: .2f}
    K Nearest Neighbor   {: .2f}
'''.format(log_reg_accuracy, tree_accuracy, forest_accuracy, knn_accuracy))

## Confusion Matrix

In [None]:
# Logistic Regression
print(confusion_matrix(predictions.actual, log_reg_model.predict(X_validate)))

In [None]:
# Decision Tree
print(confusion_matrix(predictions.actual, tree_model.predict(X_validate)))

In [None]:
# Random Forest
print(confusion_matrix(predictions.actual, forest_model.predict(X_validate)))

In [None]:
# KNN
print(confusion_matrix(predictions.actual, knn.predict(X_validate)))

# Summaries 

In [None]:
# Logistic Regression
print(classification_report(predictions.actual, log_reg_model.predict(X_validate)))

In [None]:
print(precision_score(predictions.actual, log_reg_model.predict(X_validate)))

In [None]:
# Decision Tree
print(classification_report(predictions.actual, tree_model.predict(X_validate)))

In [None]:
# Random Forest
print(classification_report(predictions.actual, forest_model.predict(X_validate)))

In [None]:
# KNN
print(classification_report(predictions.actual, knn.predict(X_validate)))

In [None]:
log_reg_model, predictions = model.create_log_reg_model(train, validate)