Import required libraries

In [1]:
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd

px.defaults.template = "ggplot2"

Load data

In [2]:
df=pd.read_csv("../datasets/telco-customer-churn.csv")

Explore data

Peak first 5 records

In [3]:
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0.0,Yes,No,1.0,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0.0,No,No,34.0,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0.0,No,No,2.0,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0.0,No,No,45.0,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0.0,No,No,2.0,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [4]:
df.columns

Index(['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents',
       'tenure', 'PhoneService', 'MultipleLines', 'InternetService',
       'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
       'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
       'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')

Check records and features

In [5]:
df.shape

(2244, 21)

Convert Data Types

In [6]:
df.dtypes

customerID           object
gender               object
SeniorCitizen       float64
Partner              object
Dependents           object
tenure              float64
PhoneService         object
MultipleLines        object
InternetService      object
OnlineSecurity       object
OnlineBackup         object
DeviceProtection     object
TechSupport          object
StreamingTV          object
StreamingMovies      object
Contract             object
PaperlessBilling     object
PaymentMethod        object
MonthlyCharges      float64
TotalCharges         object
Churn                object
dtype: object

In [7]:
df['TotalCharges']=pd.to_numeric(df['TotalCharges'], errors='coerce')

In [8]:
df.dtypes

customerID           object
gender               object
SeniorCitizen       float64
Partner              object
Dependents           object
tenure              float64
PhoneService         object
MultipleLines        object
InternetService      object
OnlineSecurity       object
OnlineBackup         object
DeviceProtection     object
TechSupport          object
StreamingTV          object
StreamingMovies      object
Contract             object
PaperlessBilling     object
PaymentMethod        object
MonthlyCharges      float64
TotalCharges        float64
Churn                object
dtype: object

## Explore Data

Total Customers

In [9]:
df.shape[0]

2244

Customers Churned

In [10]:
df[df['Churn']=='Yes']['customerID'].count()

589

Customers Remained

In [11]:
df[df['Churn']=='No']['customerID'].count()

1654

Revenue Lost

In [12]:
df[df['Churn']=='Yes']['TotalCharges'].sum()

897135.8

Statistical Summary

In [15]:
data_summary_df=pd.DataFrame(df.describe())
data_summary_df

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges,TotalCharges
count,2243.0,2243.0,2243.0,2238.0
mean,0.157379,32.398128,65.365916,2291.495733
std,0.364238,24.584209,29.837483,2251.815762
min,0.0,0.0,18.4,18.8
25%,0.0,9.0,39.55,413.525
50%,0.0,29.0,71.15,1415.425
75%,0.0,56.0,89.85,3870.2875
max,1.0,72.0,118.65,8564.75


Correlation

In [19]:
df_correlation=df[['tenure','MonthlyCharges','TotalCharges']].corr()
df_correlation

Unnamed: 0,tenure,MonthlyCharges,TotalCharges
tenure,1.0,0.229158,0.825123
MonthlyCharges,0.229158,1.0,0.638747
TotalCharges,0.825123,0.638747,1.0


In [20]:
churn_correlation_df=px.imshow(df_correlation)
churn_correlation_df.update_layout(legend=dict(yanchor="top",y=0.95,xanchor="left",x=0.40),autosize=True,margin=dict(t=0,b=0,l=0,r=0))

Attrition

In [16]:
attrition_df=df.groupby( [ "Churn"], as_index=False )["customerID"].count()

In [17]:
attrition_df.head()

Unnamed: 0,Churn,customerID
0,No,1654
1,Yes,589


In [18]:
colors = ['skyblue','crimson']
doughnut_attrition = go.Figure(data=[go.Pie(labels=attrition_df['Churn'].tolist(), values=attrition_df['customerID'].tolist(), hole=.3)])
doughnut_attrition.update_layout(showlegend=False,autosize=True,annotations=[dict(text='Attrition',  font_size=20, showarrow=False)],margin=dict(t=0,b=0,l=0,r=0),height=350,colorway=colors)

Attrition by Revenue

In [19]:
totalcharges_attrition_df=df.groupby( ["Churn"], as_index=False )["TotalCharges"].sum()
totalcharges_attrition_df=totalcharges_attrition_df.sort_values(by=['TotalCharges'],ascending=True)
totalcharges_attrition_df.columns=['Churn','Revenue']
totalcharges_attrition_df

Unnamed: 0,Churn,Revenue
1,Yes,897135.8
0,No,4231231.65


In [20]:
colors = ['crimson','skyblue']
contract_barchart=px.bar(totalcharges_attrition_df,x='Churn',y='Revenue',color='Churn',text='Revenue',color_discrete_sequence=colors)
contract_barchart.update_layout(legend=dict(yanchor="top",y=0.95,xanchor="left",x=0.40),autosize=True,margin=dict(t=0,b=0,l=0,r=0))

Attrition by Gender

In [21]:
gender_attrition_df=df.groupby( [ "Churn","gender"], as_index=False )["customerID"].count()
gender_attrition_df.columns=['Churn','Gender','Customers']
gender_attrition_df

Unnamed: 0,Churn,Gender,Customers
0,No,Female,792
1,No,Male,862
2,Yes,Female,300
3,Yes,Male,289


In [22]:
colors = ['skyblue','crimson']
grouped_barchart=px.bar(gender_attrition_df,x='Gender',y='Customers',color='Churn',text='Customers',color_discrete_sequence=colors,)
grouped_barchart.update_layout(legend=dict(yanchor="top",y=0.95,xanchor="left",x=0.46),autosize=True,margin=dict(t=0,b=0,l=0,r=0)) #use barmode='stack' when stacking,

Attrition by Contract

In [23]:
contract_attrition_df=df.groupby( [ "Churn","Contract"], as_index=False )["customerID"].count()
contract_attrition_df.head()

Unnamed: 0,Churn,Contract,customerID
0,No,Month-to-month,702
1,No,One year,412
2,No,Two year,540
3,Yes,Month-to-month,531
4,Yes,One year,51


In [24]:
contract_base_df=df.groupby(["Contract"], as_index=False )["customerID"].count()
contract_base_df['Churn']='Customer Base'
contract_base_df

Unnamed: 0,Contract,customerID,Churn
0,Month-to-month,1233,Customer Base
1,One year,463,Customer Base
2,Two year,547,Customer Base


In [25]:
contract_attrition_df=contract_attrition_df.append(contract_base_df, ignore_index = True) 
contract_attrition_df.columns=['Churn','Contract','Customers']
contract_attrition_df=contract_attrition_df.sort_values(by=['Contract', 'Customers'],ascending=True)
contract_attrition_df

Unnamed: 0,Churn,Contract,Customers
3,Yes,Month-to-month,531
0,No,Month-to-month,702
6,Customer Base,Month-to-month,1233
4,Yes,One year,51
1,No,One year,412
7,Customer Base,One year,463
5,Yes,Two year,7
2,No,Two year,540
8,Customer Base,Two year,547


In [26]:
colors = ['crimson','skyblue','teal']
contract_barchart=px.bar(contract_attrition_df,x='Contract',y='Customers',color='Churn',text='Customers',color_discrete_sequence=colors,barmode="group")
contract_barchart.update_layout(legend=dict(yanchor="top",y=0.95,xanchor="left",x=0.50),autosize=True,margin=dict(t=0,b=0,l=0,r=0)) #use barmode='stack' when stacking,

Attrition by Citizenship

In [27]:
citizenship_attrition_df=df.groupby( [ "Churn","SeniorCitizen"], as_index=False )["customerID"].count()
citizenship_attrition_df

Unnamed: 0,Churn,SeniorCitizen,customerID
0,No,0.0,1441
1,No,1.0,213
2,Yes,0.0,449
3,Yes,1.0,140


In [28]:
citizenship_base_df=df.groupby(["SeniorCitizen"], as_index=False )["customerID"].count()
citizenship_base_df['Churn']='Customer Base'
citizenship_base_df

Unnamed: 0,SeniorCitizen,customerID,Churn
0,0.0,1890,Customer Base
1,1.0,353,Customer Base


In [29]:
citizenship_attrition_df=citizenship_attrition_df.append(citizenship_base_df, ignore_index = True) 
citizenship_attrition_df.columns=['Churn','Citizenship','Customers']
citizenship_attrition_df=citizenship_attrition_df.sort_values(by=['Citizenship', 'Customers'],ascending=False)
citizenship_attrition_df

Unnamed: 0,Churn,Citizenship,Customers
5,Customer Base,1.0,353
1,No,1.0,213
3,Yes,1.0,140
4,Customer Base,0.0,1890
0,No,0.0,1441
2,Yes,0.0,449


In [30]:
colors = ['teal','skyblue','crimson']
citizenship_barchart=px.bar(citizenship_attrition_df,x='Customers',y=['Citizenship'],color='Churn',text='Customers',orientation="h",color_discrete_sequence=colors,barmode="group")
citizenship_barchart.update_layout(legend=dict(yanchor="top",y=0.95,xanchor="left",x=0.50),autosize=True,margin=dict(t=0,b=0,l=0,r=0))

Attrition by Tenure

In [31]:
tenure_attrition_df=df.groupby( [ "Churn","tenure"], as_index=False )["customerID"].count()
tenure_attrition_df.columns=['Churn','Tenure','Customers']
tenure_attrition_df.head()

Unnamed: 0,Churn,Tenure,Customers
0,No,0.0,5
1,No,1.0,79
2,No,2.0,33
3,No,3.0,41
4,No,4.0,22


In [32]:
colors = ['skyblue','crimson']
tenure_barchart = px.treemap(tenure_attrition_df, path=['Churn', 'Tenure'], values='Customers',color_discrete_sequence=colors)
tenure_barchart.update_layout(legend=dict(yanchor="top",y=0.95,xanchor="left",x=0.50),autosize=True,margin=dict(t=0,b=0,l=0,r=0)) 
tenure_barchart

## Data Preprocessing

Dummy encode categorical features

In [22]:
df['SeniorCitizen'] = df['SeniorCitizen'].astype(str)  # convert SeniorCitizen column to string

In [23]:
df=pd.get_dummies(df,columns=['gender','Partner','Dependents','PhoneService','MultipleLines','InternetService','OnlineSecurity','OnlineBackup','DeviceProtection', 'TechSupport', 'StreamingTV','StreamingMovies','Contract', 'PaperlessBilling', 'PaymentMethod','SeniorCitizen'])

In [24]:
df.head()

Unnamed: 0,customerID,tenure,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,...,Contract_Two year,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,SeniorCitizen_0.0,SeniorCitizen_1.0,SeniorCitizen_nan
0,7590-VHVEG,1.0,29.85,29.85,No,1,0,0,1,1,...,0,0,1,0,0,1,0,1,0,0
1,5575-GNVDE,34.0,56.95,1889.5,No,0,1,1,0,1,...,0,1,0,0,0,0,1,1,0,0
2,3668-QPYBK,2.0,53.85,108.15,Yes,0,1,1,0,1,...,0,0,1,0,0,0,1,1,0,0
3,7795-CFOCW,45.0,42.3,1840.75,No,0,1,1,0,1,...,0,1,0,1,0,0,0,1,0,0
4,9237-HQITU,2.0,70.7,151.65,Yes,1,0,1,0,1,...,0,0,1,0,0,1,0,1,0,0
