In [82]:
from collections import Counter

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [83]:
# loading
training_set = pd.read_csv("train.csv")

columns = training_set.columns
columns


Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
       'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
       'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod',
       'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')

In [84]:
training_set.describe()


Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges
count,4212.0,4212.0,3761.0
mean,0.164292,31.961776,64.622374
std,0.370585,24.400506,30.101022
min,0.0,0.0,18.25
25%,0.0,8.0,35.1
50%,0.0,28.0,70.15
75%,0.0,54.0,89.8
max,1.0,72.0,118.75


In [85]:
training_set.isnull().sum()


gender                0
SeniorCitizen         0
Partner               0
Dependents            0
tenure                0
PhoneService          0
MultipleLines         0
InternetService       0
OnlineSecurity        0
OnlineBackup          0
DeviceProtection      0
TechSupport           0
StreamingTV           0
StreamingMovies       0
Contract              0
PaperlessBilling      0
PaymentMethod         0
MonthlyCharges      451
TotalCharges          0
Churn                 0
dtype: int64

In [86]:

# helper functions:


def replace_labels(labels):
    return ["Yes" if label==1 else "No" for label in labels]


def replace_churn_labels(labels):
    return ["Churn" if label == "Yes" else "Non-Churn" for label in labels]

def extract_labels_values(column_name):
    result = dict(Counter(training_set[column_name]))
    return list(result.keys()), list(result.values())

def extract_churners_distribution(sub_frame):
    churn_dist = dict(Counter(sub_frame['Churn']))
    return [replace_churn_labels(list(churn_dist.keys())), list(churn_dist.values())]




In [87]:
fig = px.histogram(training_set, x="TotalCharges", color="gender",
                   hover_data=training_set.columns,
                   title= "Distribution of Total Charges Among Males and Females",
                   color_discrete_sequence=px.colors.qualitative.Set2                   )
fig.show()

In [61]:
fig = px.histogram(training_set, x="MonthlyCharges", color="gender",
                   hover_data=training_set.columns,
                   title= "Distribution of Monthly Charges Among Males and Females",
                   color_discrete_sequence=px.colors.qualitative.Set2
                   )

fig.show()


In [62]:
fig = px.histogram(training_set, x="TotalCharges", color="Churn",
                   hover_data=training_set.columns,
                   title= "Distribution of Total Charges Among Churners",
                   color_discrete_sequence=px.colors.qualitative.Set2
                   )

fig.show()

In [63]:
fig = px.histogram(training_set, x="MonthlyCharges", color="Churn",
                   hover_data=training_set.columns,
                   title= "Distribution of Monthly Charges Among Churners",
                   color_discrete_sequence=px.colors.qualitative.Set2
                   )

fig.show()

In [64]:
fig = px.histogram(training_set, x="tenure", color="Churn",
                   hover_data=training_set.columns,
                   title= "Distribution of Tenure Among Churners",
                   color_discrete_sequence=px.colors.qualitative.Set2
                   )

fig.show()



In [65]:
fig = px.histogram(training_set, x="MonthlyCharges", color="Churn",
                   hover_data=training_set.columns,
                   title= "Distribution of Monthly Charges Among Churners",
                   color_discrete_sequence=px.colors.qualitative.Set2
                   )

fig.show()



In [66]:
churners = dict(Counter(training_set['Churn']))

fig2 = px.pie(training_set,
              values=list(churners.values()),
              names=replace_churn_labels(list(churners.keys())),
              title='Overall Churn Distribution',
              color_discrete_sequence=px.colors.qualitative.Set2              )
fig2.update_traces(textposition='inside', textinfo='percent+label')
fig2.show()




In [67]:


electronic_check_churners =training_set.loc[training_set['PaymentMethod'] == 'Electronic check']
bank_transfer_churners =training_set.loc[training_set['PaymentMethod'] == 'Bank transfer (automatic)']
mailed_check_churners =training_set.loc[training_set['PaymentMethod'] == 'Mailed check']
credit_card_churners =training_set.loc[training_set['PaymentMethod'] == 'Credit card (automatic)']

electronic_check_churners_dist = extract_churners_distribution(electronic_check_churners)
bank_transfer_churners_dist = extract_churners_distribution(bank_transfer_churners)
mailed_check_churners_dist = extract_churners_distribution(mailed_check_churners)
credit_card_churners_dist = extract_churners_distribution(credit_card_churners)



# Create subplots, using 'domain' type for pie charts
specs = [
         [{'type':'domain'},{'type':'domain'}],
         [{'type':'domain'},{'type':'domain'}]
         ]
fig = make_subplots(rows=2, cols=2, specs=specs)

# Define pie charts
fig.add_trace(go.Pie(labels=electronic_check_churners_dist[0], values=electronic_check_churners_dist[1], title="Electronic Check",titleposition="bottom center"), 1, 1)
fig.add_trace(go.Pie(labels=bank_transfer_churners_dist[0], values=bank_transfer_churners_dist[1],title="Bank Transfer",titleposition="bottom center"), 1, 2)
fig.add_trace(go.Pie(labels=mailed_check_churners_dist[0], values=mailed_check_churners_dist[1],title="Mailed Check",titleposition="bottom center"), 2, 1)
fig.add_trace(go.Pie(labels=credit_card_churners_dist[0], values=credit_card_churners_dist[1], title="Credit Card",titleposition="bottom center"), 2, 2)


# Tune layout and hover info
fig.update_traces(hoverinfo='label+percent+name', textinfo='percent', marker_colors=px.colors.qualitative.Set2)
fig.update(layout_title_text='Customer Who Pay with Electronic Check are Most Likely to be Churners!',
           layout_showlegend=True)

fig = go.Figure(fig)
fig.show()



In [68]:

fiber_churners =training_set.loc[training_set['InternetService'] == 'Fiber optic']
dsl_churners =training_set.loc[training_set['InternetService'] == 'DSL']
no_internet_churners =training_set.loc[training_set['InternetService'] == 'No']

fiber_churners_dist = extract_churners_distribution(fiber_churners)
dsl_churners_dist = extract_churners_distribution(dsl_churners)
no_internet_churners_dist = extract_churners_distribution(no_internet_churners)



# Create subplots, using 'domain' type for pie charts
specs = [
         [{'type':'domain'},{'type':'domain'}],
         [{'type':'domain'},{'type':'domain'}]
         ]
fig = make_subplots(rows=2, cols=2, specs=specs)

# Define pie charts
fig.add_trace(go.Pie(labels=fiber_churners_dist[0], values=fiber_churners_dist[1], title="Fiber",titleposition="bottom center"), 1, 1,)
fig.add_trace(go.Pie(labels=dsl_churners_dist[0], values=dsl_churners_dist[1],title="DSL",titleposition="bottom center"), 1, 2)
fig.add_trace(go.Pie(labels=no_internet_churners_dist[0], values=no_internet_churners_dist[1],title="No Internet",titleposition="bottom center"), 2, 1)


# Tune layout and hover info
fig.update_traces(hoverinfo='label+percent+name', textinfo='percent',marker_colors=px.colors.qualitative.Set2)
fig.update(layout_title_text='Customers with "Fiber" Internet Service are Most Likely to be Churners!',
           layout_showlegend=True)

fig = go.Figure(fig)
fig.show()




In [69]:

tv_streamer_churners =training_set.loc[training_set['StreamingTV'] == 'Yes']
no_internet_tv_churners =training_set.loc[training_set['StreamingTV'] == 'No internet service']
no_churners =training_set.loc[training_set['StreamingTV'] == 'No']

tv_streamer_churners_dist = extract_churners_distribution(tv_streamer_churners)
no_internet_tv_churners_dist = extract_churners_distribution(no_internet_tv_churners)
no_churners_dist = extract_churners_distribution(no_churners)

specs = [
         [{'type':'domain'},{'type':'domain'}],
         [{'type':'domain'},{'type':'domain'}]
         ]
fig = make_subplots(rows=2, cols=2, specs=specs)

# Define pie charts
fig.add_trace(go.Pie(labels=tv_streamer_churners_dist[0], values=tv_streamer_churners_dist[1], title="TV Streamers",titleposition="bottom center"), 1, 1,)
fig.add_trace(go.Pie(labels=no_internet_tv_churners_dist[0], values=no_internet_tv_churners_dist[1],title="No Internet",titleposition="bottom center"), 1, 2)
fig.add_trace(go.Pie(labels=no_churners_dist[0], values=no_churners_dist[1],title="Non TV Streamers",titleposition="bottom center"), 2, 1)


# Tune layout and hover info
fig.update_traces(hoverinfo='label+percent+name', textinfo='percent',marker_colors=px.colors.qualitative.Set2)
fig.update(layout_title_text='TV Streamers are less likely to be churners than non Streamers or Movies Streamers',
           layout_showlegend=True)

fig = go.Figure(fig)
fig.show()




In [70]:

movie_streamer_churners =training_set.loc[training_set['StreamingMovies'] == 'Yes']
no_internet_movie_churners =training_set.loc[training_set['StreamingMovies'] == 'No internet service']
no_churners =training_set.loc[training_set['StreamingMovies'] == 'No']

movie_streamer_churners_dist = extract_churners_distribution(movie_streamer_churners)
no_internet_movie_churners_dist = extract_churners_distribution(no_internet_movie_churners)
no_churners_dist = extract_churners_distribution(no_churners)


# Create subplots, using 'domain' type for pie charts
specs = [
         [{'type':'domain'},{'type':'domain'}],
         [{'type':'domain'},{'type':'domain'}]
         ]
fig = make_subplots(rows=2, cols=2, specs=specs)

# Define pie charts
fig.add_trace(go.Pie(labels=movie_streamer_churners_dist[0], values=movie_streamer_churners_dist[1], title="Movie Streamers",titleposition="bottom center"), 1, 1,)
fig.add_trace(go.Pie(labels=no_internet_movie_churners_dist[0], values=no_internet_movie_churners_dist[1],title="No Internet",titleposition="bottom center"), 1, 2)
fig.add_trace(go.Pie(labels=no_churners_dist[0], values=no_churners_dist[1],title="Non Movie Streamers",titleposition="bottom center"), 2, 1)


# Tune layout and hover info
fig.update_traces(hoverinfo='label+percent+name', textinfo='percent',marker_colors=px.colors.qualitative.Set2)
fig.update(layout_title_text='Customers with "Non" Internet Service are Less Likely to be Churners!',
           layout_showlegend=True)

fig = go.Figure(fig)
fig.show()



In [71]:

month_churners =training_set.loc[training_set['Contract'] == 'Month-to-month']
two_year_churners =training_set.loc[training_set['Contract'] == 'Two year']
one_year_churners =training_set.loc[training_set['Contract'] == 'One year']

month_churners_dist = extract_churners_distribution(month_churners)
two_year_churners_dist = extract_churners_distribution(two_year_churners)
one_year_churners_dist = extract_churners_distribution(one_year_churners)



# Create subplots, using 'domain' type for pie charts
specs = [
         [{'type':'domain'},{'type':'domain'}],
         [{'type':'domain'},{'type':'domain'}]
         ]
fig = make_subplots(rows=2, cols=2, specs=specs)

# Define pie charts
fig.add_trace(go.Pie(labels=month_churners_dist[0], values=month_churners_dist[1], title="Month-to-month",titleposition="bottom center"), 1, 1,)
fig.add_trace(go.Pie(labels=two_year_churners_dist[0], values=two_year_churners_dist[1],title="Two Year",titleposition="bottom center"), 1, 2)
fig.add_trace(go.Pie(labels=one_year_churners_dist[0], values=one_year_churners_dist[1],title="One Year",titleposition="bottom center"), 2, 1)


# Tune layout and hover info
fig.update_traces(hoverinfo='label+percent+name', textinfo='percent',marker_colors=px.colors.qualitative.Set2)
fig.update(layout_title_text='Customers with "Month-to-Month" Contract Type are Most Likely to be Churners',
           layout_showlegend=True)

fig = go.Figure(fig)
fig.show()





In [72]:

SeniorCitizen_labels, SeniorCitizen_values = extract_labels_values("SeniorCitizen")
SeniorCitizen_labels = replace_labels(SeniorCitizen_labels)
Partner_labels, Partner_values = extract_labels_values("Partner")
Dependents_labels, Dependents_values = extract_labels_values("Dependents")
PhoneService_labels, PhoneService_values = extract_labels_values("PhoneService")
PaperlessBilling_labels, PaperlessBilling_values = extract_labels_values("PaperlessBilling")



# Create subplots, using 'domain' type for pie charts
specs = [
         [{'type':'domain'},{'type':'domain'}],
         [{'type':'domain'},{'type':'domain'}],
         [{'type':'domain'},{'type':'domain'}],
         ]
fig = make_subplots(rows=3, cols=2, specs=specs)

# Define pie charts
fig.add_trace(go.Pie(labels=SeniorCitizen_labels, values=SeniorCitizen_values, title="Senior Citizen?",titleposition="bottom center"), 1, 1,)
fig.add_trace(go.Pie(labels=Partner_labels, values=Partner_values,title="Has a Partner?",titleposition="bottom center"), 1, 2)
fig.add_trace(go.Pie(labels=Dependents_labels, values=Dependents_values,title="Has a Dependants?",titleposition="bottom center"), 2, 1)
fig.add_trace(go.Pie(labels=PhoneService_labels, values=PhoneService_values, title="Phone Service?",titleposition="bottom center"), 2, 2)
fig.add_trace(go.Pie(labels=PaperlessBilling_labels, values=PaperlessBilling_values,title="Paperless Billing?",titleposition="bottom center"), 3, 1)


# Tune layout and hover info
fig.update_traces(hoverinfo='label+percent+name', textinfo='label',marker_colors=px.colors.qualitative.Set2)
fig.update(layout_title_text='Distribution of Different Binary Features',
           layout_showlegend=True)

fig = go.Figure(fig)
fig.show()


In [73]:

MultipleLines_labels, MultipleLines_values = extract_labels_values("MultipleLines")
InternetService_labels, InternetService_values = extract_labels_values("InternetService")
OnlineSecurity_labels, OnlineSecurity_values = extract_labels_values("OnlineSecurity")
StreamingMovies_labels, StreamingMovies_values = extract_labels_values("StreamingMovies")




# Create subplots, using 'domain' type for pie charts
specs = [
         [{'type':'domain'},{'type':'domain'}],
         [{'type':'domain'},{'type':'domain'}],
         ]
fig = make_subplots(rows=2, cols=2, specs=specs)

# Define pie charts
fig.add_trace(go.Pie(labels=MultipleLines_labels, values=MultipleLines_values, title="Multiple Lines" , titleposition="top center"), 1, 1,)
fig.add_trace(go.Pie(labels=InternetService_labels, values=InternetService_values,title="Internet Service" , titleposition="top center"), 1, 2)
fig.add_trace(go.Pie(labels=OnlineSecurity_labels, values=OnlineSecurity_values,title="Online Security" , titleposition="top center"), 2, 1)
fig.add_trace(go.Pie(labels=StreamingMovies_labels, values=StreamingMovies_values, title="Streaming Movies" , titleposition="top center"), 2, 2)


# Tune layout and hover info
fig.update_traces(hoverinfo='label+percent+name', textinfo='label',marker_colors=px.colors.qualitative.Set2)
fig.update(layout_title_text='Distribution of Categorical Features',
           layout_showlegend=True)

fig = go.Figure(fig)
fig.show()





In [74]:
Contract_labels, Contract_values = extract_labels_values("Contract")
PaymentMethod_labels, PaymentMethod_values = extract_labels_values("PaymentMethod")

# Create subplots, using 'domain' type for pie charts
specs = [
         [{'type':'domain'},{'type':'domain'}],
         ]
fig = make_subplots(rows=1, cols=2, specs=specs)

# Define pie charts
fig.add_trace(go.Pie(labels=Contract_labels, values=Contract_values, title="MultipleLines?" , titleposition="top center"), 1, 1,)
fig.add_trace(go.Pie(labels=PaymentMethod_labels, values=PaymentMethod_values,title="Payments Method" , titleposition="top center"), 1, 2)


# Tune layout and hover info
fig.update_traces(hoverinfo='label+percent+name', textinfo='percent',marker_colors=px.colors.qualitative.Set2)
fig.update(layout_title_text='Distribution of Categorical Features',
           layout_showlegend=True)

fig = go.Figure(fig)
fig.show()



In [75]:
StreamingTV_labels, StreamingTV_values = extract_labels_values("StreamingTV")
DeviceProtection_labels, DeviceProtection_values = extract_labels_values("DeviceProtection")

# Create subplots, using 'domain' type for pie charts
specs = [
         [{'type':'domain'},{'type':'domain'}],
         ]
fig = make_subplots(rows=1, cols=2, specs=specs)

# Define pie charts
fig.add_trace(go.Pie(labels=StreamingTV_labels, values=StreamingTV_values, title="Streaming TV?" , titleposition="top center"), 1, 1,)
fig.add_trace(go.Pie(labels=DeviceProtection_labels, values=DeviceProtection_values,title="Device Protection" , titleposition="top center"), 1, 2)


# Tune layout and hover info
fig.update_traces(hoverinfo='label+percent+name', textinfo='label',marker_colors=px.colors.qualitative.Set2)
fig.update(layout_title_text='Distribution of Categorical Features',
           layout_showlegend=True)

fig = go.Figure(fig)
fig.show()


In [76]:

fig = px.scatter(x=training_set["MonthlyCharges"],color_continuous_scale=px.colors.qualitative.Set2,
                 y=training_set["TotalCharges"], title="Relation between Monthly and Total Charges is Linear")
fig.show()


In [77]:


fig = px.scatter(x=training_set["TotalCharges"],color_continuous_scale=px.colors.qualitative.Set2,
                 y=training_set["tenure"], title="Relation between Total-Charges and Tenure")
fig.show()



In [78]:
fig = px.scatter(x=training_set["MonthlyCharges"], color_continuous_scale=px.colors.qualitative.Set2,
                 y=training_set["TotalCharges"], title="Relation between Monthly-Charges and Total-Charges")
fig.show()



In [79]:




training_set['gender'] = training_set['gender'].replace(dict(Counter(training_set["gender"])))
training_set['SeniorCitizen'] = training_set['SeniorCitizen'].replace(dict(Counter(training_set["SeniorCitizen"])))
training_set['Partner'] = training_set['Partner'].replace(dict(Counter(training_set["Partner"])))
training_set['Dependents'] = training_set['Dependents'].replace(dict(Counter(training_set["Dependents"])))
training_set['PhoneService'] = training_set['PhoneService'].replace(dict(Counter(training_set["PhoneService"])))
training_set['PaperlessBilling'] = training_set['PaperlessBilling'].replace(dict(Counter(training_set["PaperlessBilling"])))
training_set['MultipleLines'] = training_set['MultipleLines'].replace(dict(Counter(training_set["MultipleLines"])))
training_set['InternetService'] = training_set['InternetService'].replace(dict(Counter(training_set["InternetService"])))
training_set['OnlineSecurity'] = training_set['OnlineSecurity'].replace(dict(Counter(training_set["OnlineSecurity"])))
training_set['OnlineBackup'] = training_set['OnlineBackup'].replace(dict(Counter(training_set["OnlineBackup"])))
training_set['DeviceProtection'] = training_set['DeviceProtection'].replace(dict(Counter(training_set["DeviceProtection"])))
training_set['TechSupport'] = training_set['TechSupport'].replace(dict(Counter(training_set["TechSupport"])))
training_set['StreamingTV'] = training_set['StreamingTV'].replace(dict(Counter(training_set["StreamingTV"])))
training_set['StreamingMovies'] = training_set['StreamingMovies'].replace(dict(Counter(training_set["StreamingMovies"])))
training_set['Contract'] = training_set['Contract'].replace(dict(Counter(training_set["Contract"])))
training_set['PaymentMethod'] = training_set['PaymentMethod'].replace(dict(Counter(training_set["PaymentMethod"])))
training_set['Churn'] = training_set['Churn'].replace(dict(Counter(training_set["Churn"])))



corr = training_set.corr()


corr

values_ = corr.values
values = np.round(values_, decimals=1)

fig = ff.create_annotated_heatmap(z=values,x= corr.columns.tolist(), y=corr.index.tolist(), colorscale=px.colors.qualitative.Set2)
fig.show()


In [80]:
values = np.round(values, decimals=0)

fig = ff.create_annotated_heatmap(z=values,x= corr.columns.tolist(), y=corr.index.tolist(), colorscale=px.colors.qualitative.Set2)
fig.show()



In [81]:

corr = training_set.corrwith(training_set["Churn"])
fig = go.Figure([go.Bar(y=list(corr.values),
                        x=list(corr.keys())
                        )])
fig.update_layout(
    autosize=False,
    width=1200,
    height=600,
    title = "Feature Correlation with Churn"

)
fig.show()
