In [13]:
import acquire
import prepare
import encode

import pandas as pd
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

from sklearn.feature_selection import RFE

In [2]:
# Get and Prepare the Data, Including Encoding
train, test, validate = prepare.prep_telco(
        acquire.get_telco_data()
    )

In [3]:
train, test, validate = encode.encoded_df(train, test, validate)

In [4]:
# Create a Function that returns the DF every time
def get_splits(train, validate):
    cols = ['tenure', 'contract_type_encoded', 'monthly_charges', 
            'senior_citizen', 'payment_type_encoded', 'churn_encoded']
    
    t = train[cols]
    v = validate[cols]
    
    return t, v

# Create the Models

In [5]:
log_reg_1 = LogisticRegression()

tree_1 = DecisionTreeClassifier()

forest_1 = RandomForestClassifier()

knn_1 = KNeighborsClassifier()

# Fit the Models

## Logistic Regression

Logistic Regression works better if you remove attributes that are either unrelated to the output variable or correlated to other attributes

In [6]:
t, v = get_splits(train, validate)

X_train, X_validate = t.drop('churn_encoded', axis=1), v.drop('churn_encoded', axis=1)
y_train, y_validate = t.churn_encoded, v.churn_encoded

In [7]:
log_reg_1 = log_reg_1.fit(X_train, y_train)

In [8]:
log_reg_1.score(X_validate, y_validate)

0.8074534161490683

In [9]:
rfe = RFE(estimator=log_reg_1, step=1)

In [11]:
rfe.fit(X_validate, y_validate)

RFE(estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,
                                 fit_intercept=True, intercept_scaling=1,
                                 l1_ratio=None, max_iter=100,
                                 multi_class='auto', n_jobs=None, penalty='l2',
                                 random_state=None, solver='lbfgs', tol=0.0001,
                                 verbose=0, warm_start=False),
    n_features_to_select=None, step=1, verbose=0)

In [15]:
selected_rfe_features = pd.DataFrame({'Selected Features': list(X_validate.columns),
                                      'Ranking': rfe.ranking_})

In [17]:
print(selected_rfe_features.sort_values('Ranking'))

       Selected Features  Ranking
1  contract_type_encoded        1
3         senior_citizen        1
0                 tenure        2
2        monthly_charges        3
4   payment_type_encoded        4
