In [1]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
import seaborn as sns
from sklearn.preprocessing import LabelEncoder

In [2]:
customer_churn = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')
customer_churn.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [3]:
customer_churn.columns

Index(['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents',
       'tenure', 'PhoneService', 'MultipleLines', 'InternetService',
       'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
       'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
       'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')

In [4]:
y = customer_churn['Churn']
X = customer_churn[[x for x in customer_churn.columns if x != 'Churn']]

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=7)

---

In [6]:
train_data = pd.concat([y_train, X_train], axis=1)
train_data.head()

Unnamed: 0,Churn,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,...,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges
1114,No,3312-UUMZW,Male,0,Yes,No,32,Yes,Yes,Fiber optic,...,Yes,No,No,Yes,Yes,Month-to-month,Yes,Credit card (automatic),98.85,3145.9
5499,Yes,4631-OACRM,Male,1,No,No,15,Yes,No,Fiber optic,...,No,No,No,No,Yes,Month-to-month,Yes,Electronic check,79.4,1156.1
798,No,3807-XHCJH,Female,0,Yes,Yes,1,Yes,No,No,...,No internet service,No internet service,No internet service,No internet service,No internet service,Month-to-month,No,Electronic check,20.0,20.0
2558,No,5380-XPJNZ,Female,0,No,No,38,Yes,No,No,...,No internet service,No internet service,No internet service,No internet service,No internet service,One year,No,Mailed check,20.05,678.2
2305,No,4947-DSMXK,Male,0,Yes,Yes,34,Yes,Yes,Fiber optic,...,Yes,No,No,Yes,Yes,Month-to-month,Yes,Bank transfer (automatic),97.7,3410.0


In [7]:
train_data.isnull().sum()

Churn               0
customerID          0
gender              0
SeniorCitizen       0
Partner             0
Dependents          0
tenure              0
PhoneService        0
MultipleLines       0
InternetService     0
OnlineSecurity      0
OnlineBackup        0
DeviceProtection    0
TechSupport         0
StreamingTV         0
StreamingMovies     0
Contract            0
PaperlessBilling    0
PaymentMethod       0
MonthlyCharges      0
TotalCharges        0
dtype: int64

In [8]:
lb = LabelEncoder()

for i in train_data.columns:
    yes = 'Yes' in list(train_data[i].unique())
    no = 'No' in list(train_data[i].unique())
    if yes and no and len(train_data[i].unique()) == 2:
        train_data[i] = lb.fit_transform(train_data[i])

In [9]:
train_data.head()

Unnamed: 0,Churn,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,...,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges
1114,0,3312-UUMZW,Male,0,1,0,32,1,Yes,Fiber optic,...,Yes,No,No,Yes,Yes,Month-to-month,1,Credit card (automatic),98.85,3145.9
5499,1,4631-OACRM,Male,1,0,0,15,1,No,Fiber optic,...,No,No,No,No,Yes,Month-to-month,1,Electronic check,79.4,1156.1
798,0,3807-XHCJH,Female,0,1,1,1,1,No,No,...,No internet service,No internet service,No internet service,No internet service,No internet service,Month-to-month,0,Electronic check,20.0,20.0
2558,0,5380-XPJNZ,Female,0,0,0,38,1,No,No,...,No internet service,No internet service,No internet service,No internet service,No internet service,One year,0,Mailed check,20.05,678.2
2305,0,4947-DSMXK,Male,0,1,1,34,1,Yes,Fiber optic,...,Yes,No,No,Yes,Yes,Month-to-month,1,Bank transfer (automatic),97.7,3410.0


---