# Telco Churn Classification Project

### Kwame Taylor, Darden Cohort, Sept 2020

Welcome!

#### Start with importing all necessary modules.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats

from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, MinMaxScaler

from acquire import get_telco_data, new_telco_data
from prepare import prep_telco, telco_split

import warnings
warnings.filterwarnings("ignore")

# Acquire

### Use acquire.py to get the Telco churn customer data from local CSV (if cached) or the Codeup SQL database.

In [2]:
# import acquire.py and use get_telco_data() to acquire data
# if I want to replace my cached/local csv file with a fresh copy from SQL, I can use the function new_telco_data

df = get_telco_data()
df.head(3)

Unnamed: 0,customer_id,contract_type_id,phone_service,internet_service_type_id,gender,senior_citizen,partner,dependents,tenure,online_security,online_backup,device_protection,tech_support,streaming_tv,streaming_movies,monthly_charges,total_charges,churn
0,0002-ORFBO,2,Yes,1,Female,0,Yes,Yes,9,No,Yes,No,Yes,Yes,No,65.6,593.3,No
1,0003-MKNFE,1,Yes,1,Male,0,No,No,9,No,No,No,No,No,Yes,59.9,542.4,No
2,0004-TLHLJ,1,Yes,2,Male,0,No,No,4,No,No,Yes,No,No,No,73.9,280.85,Yes


In [3]:
# these are the columns I decided were relevant to acquire from the SQL data to use in exploration
# if I don't need them all I can drop them when the time comes

df.columns

Index(['customer_id', 'contract_type_id', 'phone_service',
       'internet_service_type_id', 'gender', 'senior_citizen', 'partner',
       'dependents', 'tenure', 'online_security', 'online_backup',
       'device_protection', 'tech_support', 'streaming_tv', 'streaming_movies',
       'monthly_charges', 'total_charges', 'churn'],
      dtype='object')

# Prepare

### Use prepare.py to clean up the data and make it easy to work with.

In [4]:
# import prepare.py and use prep_telco to prepare the data for use

df = prep_telco()
df.head(3)

Unnamed: 0_level_0,contract_type,phone,internet_type,senior,partner,depend,tenure,monthly_charges,total_charges,churn,num_add_ons,is_male
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0002-ORFBO,2,1,1,0,1,1,9,65.6,593.3,0,3,0
0003-MKNFE,1,1,1,0,0,0,9,59.9,542.4,0,1,1
0004-TLHLJ,1,1,2,0,0,0,4,73.9,280.85,1,1,1


### Data is tidy and ready to split into train, validate, and test.

In [6]:
# split the data using telco_split from my prepare.py

train, validate, test = telco_split(df)

print('overall shape', df.shape)
print('train', train.shape)
print('validate', validate.shape)
print('test', test.shape)

overall shape (7043, 12)
train (3943, 12)
validate (1691, 12)
test (1409, 12)


### Now the data is ready for exploration. We will work only within the train dataset in the next step.

# Explore