In [1]:
#Import Libraries
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
import seaborn as sn

In [2]:
telComDF= pd.read_csv('Telco-Customer-Churn.csv', encoding = 'utf8')
telComDF=telComDF.dropna()
telComDF.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [3]:
valueCountChurn = telComDF["Churn"].value_counts()
valueCountChurn

No     5174
Yes    1869
Name: Churn, dtype: int64

In [4]:
valueCountContract = telComDF["Contract"].value_counts()
valueCountContract

Month-to-month    3875
Two year          1695
One year          1473
Name: Contract, dtype: int64

In [5]:
totalList = len(telComDF["customerID"])
totalList 

7043

In [6]:
total_churnRatio = ((valueCountChurn / totalList)*100).round(1).astype(str) + '%' 
total_churnRatio

No     73.5%
Yes    26.5%
Name: Churn, dtype: object

In [7]:
total_contractRatio = ((valueCountContract / totalList)*100).round(1).astype(str) + '%' 
total_contractRatio

Month-to-month    55.0%
Two year          24.1%
One year          20.9%
Name: Contract, dtype: object

In [8]:
ChurnDF = telComDF.groupby(['Churn'])
ChurnDF

<pandas.core.groupby.groupby.DataFrameGroupBy object at 0x000002DE678FEB00>

In [9]:
newCountContract = ChurnDF["Contract"].value_counts()
newCountContract

Churn  Contract      
No     Month-to-month    2220
       Two year          1647
       One year          1307
Yes    Month-to-month    1655
       One year           166
       Two year            48
Name: Contract, dtype: int64

In [10]:
contractChurnRatio = ((newCountContract/ totalList)*100).round(1).astype(str) + '%' 
contractChurnRatio

Churn  Contract      
No     Month-to-month    31.5%
       Two year          23.4%
       One year          18.6%
Yes    Month-to-month    23.5%
       One year           2.4%
       Two year           0.7%
Name: Contract, dtype: object

In [11]:
newCountIntServ= ChurnDF["InternetService"].value_counts()
newCountIntServ

Churn  InternetService
No     DSL                1962
       Fiber optic        1799
       No                 1413
Yes    Fiber optic        1297
       DSL                 459
       No                  113
Name: InternetService, dtype: int64

In [12]:
cut_labels_6 = ['0_12mos', '13_24mos', '25_36mos', '37_48mos', '49_60', '61_72mos']
cut_bins = [0,12, 24, 36, 48, 60, 72]
telComDF['tenureBins'] = pd.cut(telComDF['tenure'], bins=cut_bins, labels=cut_labels_6)
telComDF


Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,tenureBins
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No,0_12mos
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,No,No,No,One year,No,Mailed check,56.95,1889.5,No,25_36mos
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,0_12mos
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,No,No,One year,No,Bank transfer (automatic),42.30,1840.75,No,37_48mos
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,Month-to-month,Yes,Electronic check,70.70,151.65,Yes,0_12mos
5,9305-CDSKC,Female,0,No,No,8,Yes,Yes,Fiber optic,No,...,No,Yes,Yes,Month-to-month,Yes,Electronic check,99.65,820.5,Yes,0_12mos
6,1452-KIOVK,Male,0,No,Yes,22,Yes,Yes,Fiber optic,No,...,No,Yes,No,Month-to-month,Yes,Credit card (automatic),89.10,1949.4,No,13_24mos
7,6713-OKOMC,Female,0,No,No,10,No,No phone service,DSL,Yes,...,No,No,No,Month-to-month,No,Mailed check,29.75,301.9,No,0_12mos
8,7892-POOKP,Female,0,Yes,No,28,Yes,Yes,Fiber optic,No,...,Yes,Yes,Yes,Month-to-month,Yes,Electronic check,104.80,3046.05,Yes,25_36mos
9,6388-TABGU,Male,0,No,Yes,62,Yes,No,DSL,Yes,...,No,No,No,One year,No,Bank transfer (automatic),56.15,3487.95,No,61_72mos


In [13]:
newCount_tenureBins= ChurnDF["tenureBins"].value_counts()
newCount_tenureBins

Churn  tenureBins
No     61_72mos      1314
       0_12mos       1138
       13_24mos       730
       49_60          712
       25_36mos       652
       37_48mos       617
Yes    0_12mos       1037
       13_24mos       294
       25_36mos       180
       37_48mos       145
       49_60          120
       61_72mos        93
Name: tenureBins, dtype: int64

In [14]:
tenureBinsChurnRatio = ((newCount_tenureBins/ totalList)*100).round(1).astype(str) + '%' 
tenureBinsChurnRatio

Churn  tenureBins
No     61_72mos      18.7%
       0_12mos       16.2%
       13_24mos      10.4%
       49_60         10.1%
       25_36mos       9.3%
       37_48mos       8.8%
Yes    0_12mos       14.7%
       13_24mos       4.2%
       25_36mos       2.6%
       37_48mos       2.1%
       49_60          1.7%
       61_72mos       1.3%
Name: tenureBins, dtype: object

In [15]:
ChurnDF = telComDF.groupby(['tenureBins'])
ChurnDF

<pandas.core.groupby.groupby.DataFrameGroupBy object at 0x000002DE67964898>

In [16]:
newCount_tenureBins= ChurnDF["Churn"].value_counts()
newCount_tenureBins

tenureBins  Churn
0_12mos     No       1138
            Yes      1037
13_24mos    No        730
            Yes       294
25_36mos    No        652
            Yes       180
37_48mos    No        617
            Yes       145
49_60       No        712
            Yes       120
61_72mos    No       1314
            Yes        93
Name: Churn, dtype: int64

In [17]:
RfCL= telComDF.drop("customerID", axis=1)
RfCL

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,...,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,tenureBins
0,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,...,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No,0_12mos
1,Male,0,No,No,34,Yes,No,DSL,Yes,No,...,No,No,No,One year,No,Mailed check,56.95,1889.5,No,25_36mos
2,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,...,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,0_12mos
3,Male,0,No,No,45,No,No phone service,DSL,Yes,No,...,Yes,No,No,One year,No,Bank transfer (automatic),42.30,1840.75,No,37_48mos
4,Female,0,No,No,2,Yes,No,Fiber optic,No,No,...,No,No,No,Month-to-month,Yes,Electronic check,70.70,151.65,Yes,0_12mos
5,Female,0,No,No,8,Yes,Yes,Fiber optic,No,No,...,No,Yes,Yes,Month-to-month,Yes,Electronic check,99.65,820.5,Yes,0_12mos
6,Male,0,No,Yes,22,Yes,Yes,Fiber optic,No,Yes,...,No,Yes,No,Month-to-month,Yes,Credit card (automatic),89.10,1949.4,No,13_24mos
7,Female,0,No,No,10,No,No phone service,DSL,Yes,No,...,No,No,No,Month-to-month,No,Mailed check,29.75,301.9,No,0_12mos
8,Female,0,Yes,No,28,Yes,Yes,Fiber optic,No,No,...,Yes,Yes,Yes,Month-to-month,Yes,Electronic check,104.80,3046.05,Yes,25_36mos
9,Male,0,No,Yes,62,Yes,No,DSL,Yes,Yes,...,No,No,No,One year,No,Bank transfer (automatic),56.15,3487.95,No,61_72mos


In [18]:
RfCL.data = RfCL.drop("Churn", axis=1)
RfCL.target = RfCL["Churn"]

  """Entry point for launching an IPython kernel.
  


In [19]:
# Create a random forest classifier
rf = RandomForestClassifier(n_estimators=200)
# rf = rf.fit(RfCL.data, RfCL.target)
# rf.score(RfCL.data,RfCL.target)