In [1]:
import numpy as np
import pandas as pd
import scipy as sp
import sklearn
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

## Explore Data

In [2]:
data = pd.read_csv('Telco-Customer-Churn.csv')
data.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [3]:
print(data.keys())

Index(['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents',
       'tenure', 'PhoneService', 'MultipleLines', 'InternetService',
       'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
       'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
       'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')


## Prepare data

In [71]:
# drop unused data
data_drop = data.drop(['customerID'],axis=1)

#set dict
yes_no = {'Yes': 1, 'No': 0}
gender = {'Male': 0, 'Female': 1}
multiple_lines = {'Yes': 2, 'No': 1, 'No phone service': 0}
internet = {'DSL': 2, 'Fiber optic': 1, 'No': 0}
online = {'Yes': 2, 'No' : 1, 'No internet service': 0}
contract = {'Month-to-month': 0, 'One year': 1, 'Two year': 2}
payment = {'Electronic check': 0, 'Mailed check': 1, 
           'Bank transfer (automatic)': 2, 'Credit card (automatic)': 3}

#convert
data_drop['gender'] = data_drop['gender'].map(gender)
data_drop['Partner'] = data_drop['Partner'].map(yes_no)
data_drop['Dependents'] = data_drop['Dependents'].map(yes_no)
data_drop['PhoneService'] = data_drop['PhoneService'].map(yes_no)
data_drop['MultipleLines'] = data_drop['MultipleLines'].map(multiple_lines)
data_drop['InternetService'] = data_drop['InternetService'].map(internet)
data_drop['OnlineSecurity'] = data_drop['OnlineSecurity'].map(online)
data_drop['OnlineBackup'] = data_drop['OnlineBackup'].map(online)
data_drop['DeviceProtection'] = data_drop['DeviceProtection'].map(online)
data_drop['TechSupport'] = data_drop['TechSupport'].map(online)
data_drop['StreamingTV'] = data_drop['StreamingTV'].map(online)
data_drop['StreamingMovies'] = data_drop['StreamingMovies'].map(online)
data_drop['Contract'] = data_drop['Contract'].map(contract)
data_drop['PaperlessBilling'] = data_drop['PaperlessBilling'].map(yes_no)
data_drop['PaymentMethod'] = data_drop['PaymentMethod'].map(payment)
data_drop['Churn'] = data_drop['Churn'].map(yes_no)

#show
print(data_drop.dtypes)
data_drop.head()

gender                int64
SeniorCitizen         int64
Partner               int64
Dependents            int64
tenure                int64
PhoneService          int64
MultipleLines         int64
InternetService       int64
OnlineSecurity        int64
OnlineBackup          int64
DeviceProtection      int64
TechSupport           int64
StreamingTV           int64
StreamingMovies       int64
Contract              int64
PaperlessBilling      int64
PaymentMethod         int64
MonthlyCharges      float64
TotalCharges         object
Churn                 int64
dtype: object


Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,1,0,1,0,1,0,0,2,1,2,1,1,1,1,0,1,0,29.85,29.85,0
1,0,0,0,0,34,1,1,2,2,1,2,1,1,1,1,0,1,56.95,1889.5,0
2,0,0,0,0,2,1,1,2,2,2,1,1,1,1,0,1,1,53.85,108.15,1
3,0,0,0,0,45,0,0,2,2,1,2,2,1,1,1,0,2,42.3,1840.75,0
4,1,0,0,0,2,1,1,1,1,1,1,1,1,1,0,1,0,70.7,151.65,1


In [65]:
drop_churn = data_drop.drop(['Churn','TotalCharges'],axis=1).columns

data_train, data_test = train_test_split(data_drop, test_size=0.25)
rf = RandomForestClassifier(n_estimators=30)
rf.fit(data_train[drop_churn],data_train['Churn'])
# data_train[drop_churn]
pred = rf.predict(data_test[drop_churn])
# display(pred)
# data_test[drop_churn].head()

In [66]:
score = rf.score(data_test[drop_churn],data_test['Churn'])
print("score: ",score)
print("mean: ", np.mean(pred == data_test['Churn']))

score:  0.7950028392958546
mean:  0.7950028392958546
