## Predict the Customer Churn of a telcom company and find out the key drivers that lead to churn

* Customer Churn is the percentage of customers who stop doing business with an entity.

In [1]:
import pandas as pd
import numpy as np
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
import os

In [2]:
trainer = pd.read_csv('Telecom_Train.csv')
tester = pd.read_csv('Telecom_Test.csv')

In [3]:
trainer.shape

(3333, 21)

In [4]:
tester.shape

(1667, 21)

In [5]:
trainer.shape[0]/tester.shape[0]

1.9994001199760048

In [6]:
trainer.head().T

Unnamed: 0,0,1,2,3,4
Unnamed: 0,1,2,3,4,5
state,KS,OH,NJ,OH,OK
account_length,128,107,137,84,75
area_code,area_code_415,area_code_415,area_code_415,area_code_408,area_code_415
international_plan,no,no,no,yes,yes
voice_mail_plan,yes,yes,no,no,no
number_vmail_messages,25,26,0,0,0
total_day_minutes,265.1,161.6,243.4,299.4,166.7
total_day_calls,110,123,114,71,113
total_day_charge,45.07,27.47,41.38,50.9,28.34


In [7]:
trainer.dtypes.sort_values()

Unnamed: 0                         int64
account_length                     int64
total_intl_calls                   int64
number_vmail_messages              int64
total_night_calls                  int64
total_day_calls                    int64
number_customer_service_calls      int64
total_eve_calls                    int64
total_intl_charge                float64
total_intl_minutes               float64
total_night_charge               float64
total_night_minutes              float64
total_eve_minutes                float64
total_day_charge                 float64
total_day_minutes                float64
total_eve_charge                 float64
voice_mail_plan                   object
international_plan                object
area_code                         object
state                             object
churn                             object
dtype: object

In [8]:
trainer.isna().sum()

Unnamed: 0                       0
state                            0
account_length                   0
area_code                        0
international_plan               0
voice_mail_plan                  0
number_vmail_messages            0
total_day_minutes                0
total_day_calls                  0
total_day_charge                 0
total_eve_minutes                0
total_eve_calls                  0
total_eve_charge                 0
total_night_minutes              0
total_night_calls                0
total_night_charge               0
total_intl_minutes               0
total_intl_calls                 0
total_intl_charge                0
number_customer_service_calls    0
churn                            0
dtype: int64

In [9]:
trainer.nunique().sort_values()

churn                               2
international_plan                  2
voice_mail_plan                     2
area_code                           3
number_customer_service_calls      10
total_intl_calls                   21
number_vmail_messages              46
state                              51
total_day_calls                   119
total_night_calls                 120
total_eve_calls                   123
total_intl_charge                 162
total_intl_minutes                162
account_length                    212
total_night_charge                933
total_eve_charge                 1440
total_night_minutes              1591
total_eve_minutes                1611
total_day_minutes                1667
total_day_charge                 1667
Unnamed: 0                       3333
dtype: int64

In [10]:
tester.head().T

Unnamed: 0,0,1,2,3,4
Unnamed: 0,1,2,3,4,5
state,HI,MT,OH,NM,SC
account_length,101,137,103,99,108
area_code,area_code_510,area_code_510,area_code_408,area_code_415,area_code_415
international_plan,no,no,no,no,no
voice_mail_plan,no,no,yes,no,no
number_vmail_messages,0,0,29,0,0
total_day_minutes,70.9,223.6,294.7,216.8,197.4
total_day_calls,123,86,95,123,78
total_day_charge,12.05,38.01,50.1,36.86,33.56


In [11]:
tester.dtypes.sort_values()

Unnamed: 0                         int64
account_length                     int64
total_intl_calls                   int64
number_vmail_messages              int64
total_night_calls                  int64
total_day_calls                    int64
number_customer_service_calls      int64
total_eve_calls                    int64
total_intl_charge                float64
total_intl_minutes               float64
total_night_charge               float64
total_night_minutes              float64
total_eve_minutes                float64
total_day_charge                 float64
total_day_minutes                float64
total_eve_charge                 float64
voice_mail_plan                   object
international_plan                object
area_code                         object
state                             object
churn                             object
dtype: object

In [12]:
tester.isna().sum()

Unnamed: 0                       0
state                            0
account_length                   0
area_code                        0
international_plan               0
voice_mail_plan                  0
number_vmail_messages            0
total_day_minutes                0
total_day_calls                  0
total_day_charge                 0
total_eve_minutes                0
total_eve_calls                  0
total_eve_charge                 0
total_night_minutes              0
total_night_calls                0
total_night_charge               0
total_intl_minutes               0
total_intl_calls                 0
total_intl_charge                0
number_customer_service_calls    0
churn                            0
dtype: int64

In [13]:
tester.nunique().sort_values()

churn                               2
international_plan                  2
voice_mail_plan                     2
area_code                           3
number_customer_service_calls       8
total_intl_calls                   19
number_vmail_messages              41
state                              51
total_day_calls                   109
total_eve_calls                   109
total_night_calls                 121
total_intl_charge                 148
total_intl_minutes                148
account_length                    198
total_night_charge                743
total_eve_charge                  993
total_eve_minutes                1088
total_night_minutes              1093
total_day_charge                 1098
total_day_minutes                1098
Unnamed: 0                       1667
dtype: int64

In [14]:
df_train = trainer.copy()
df_test = tester.copy()

In [16]:
df_train = df_train.drop('Unnamed: 0', axis=1)
df_test = df_test.drop('Unnamed: 0', axis=1)

In [18]:
df_train.shape

(3333, 20)