# Predicting Telco customer churn using a neural network
Here is a peak of the feature set at our disposal.  
*Image inserted here*  


In [4]:
import pandas as pd
import torch
from torch import optim, nn
from pathlib import Path

In [14]:
raw_data = pd.read_csv(Path('./WA_Fn-UseC_-Telco-Customer-Churn.csv'), low_memory=False)
raw_data.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [15]:
raw_data.columns

Index(['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents',
       'tenure', 'PhoneService', 'MultipleLines', 'InternetService',
       'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
       'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
       'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')

In [22]:
cleaned_data = raw_data.drop(labels=['customerID'], axis=1)

### Let's see the possible values

In [23]:
for col in cleaned_data:
    print(cleaned_data[col].unique())

['Female' 'Male']
[0 1]
['Yes' 'No']
['No' 'Yes']
[ 1 34  2 45  8 22 10 28 62 13 16 58 49 25 69 52 71 21 12 30 47 72 17 27
  5 46 11 70 63 43 15 60 18 66  9  3 31 50 64 56  7 42 35 48 29 65 38 68
 32 55 37 36 41  6  4 33 67 23 57 61 14 20 53 40 59 24 44 19 54 51 26  0
 39]
['No' 'Yes']
['No phone service' 'No' 'Yes']
['DSL' 'Fiber optic' 'No']
['No' 'Yes' 'No internet service']
['Yes' 'No' 'No internet service']
['No' 'Yes' 'No internet service']
['No' 'Yes' 'No internet service']
['No' 'Yes' 'No internet service']
['No' 'Yes' 'No internet service']
['Month-to-month' 'One year' 'Two year']
['Yes' 'No']
['Electronic check' 'Mailed check' 'Bank transfer (automatic)'
 'Credit card (automatic)']
[29.85 56.95 53.85 ... 63.1  44.2  78.7 ]
['29.85' '1889.5' '108.15' ... '346.45' '306.6' '6844.5']
['No' 'Yes']


#### Let's handle the ordinal values under 'Contract'

In [24]:
contract_order = 'Month-to-month', 'One year', 'Two year'
cleaned_data['Contract'] = cleaned_data['Contract'].astype('category')
cleaned_data['Contract'].cat.set_categories(contract_order, ordered=True, inplace=True)
cleaned_data.head()

  res = method(*args, **kwargs)


Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


#### Now, let's one-hot encode

In [31]:
categoricals = cleaned_data.columns.drop(['tenure', 'MonthlyCharges', 'TotalCharges', 'Churn'])
categoricals

Index(['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'PhoneService',
       'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup',
       'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies',
       'Contract', 'PaperlessBilling', 'PaymentMethod'],
      dtype='object')

In [37]:
encoded_data = pd.get_dummies(cleaned_data[categoricals])
encoded_data.head()

Unnamed: 0,SeniorCitizen,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,PhoneService_No,PhoneService_Yes,MultipleLines_No,...,StreamingMovies_Yes,Contract_Month-to-month,Contract_One year,Contract_Two year,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
0,0,1,0,0,1,1,0,1,0,0,...,0,1,0,0,0,1,0,0,1,0
1,0,0,1,1,0,1,0,0,1,1,...,0,0,1,0,1,0,0,0,0,1
2,0,0,1,1,0,1,0,0,1,1,...,0,1,0,0,0,1,0,0,0,1
3,0,0,1,1,0,1,0,1,0,0,...,0,0,1,0,1,0,1,0,0,0
4,0,1,0,1,0,1,0,0,1,1,...,0,1,0,0,0,1,0,0,1,0


In [38]:
encoded_data = pd.concat([encoded_data, pd.DataFrame(cleaned_data[['tenure', 'MonthlyCharges', 'TotalCharges', 'Churn']])], axis=1)
encoded_data

Unnamed: 0,SeniorCitizen,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,PhoneService_No,PhoneService_Yes,MultipleLines_No,...,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure,MonthlyCharges,TotalCharges,Churn
0,0,1,0,0,1,1,0,1,0,0,...,0,1,0,0,1,0,1,29.85,29.85,No
1,0,0,1,1,0,1,0,0,1,1,...,1,0,0,0,0,1,34,56.95,1889.5,No
2,0,0,1,1,0,1,0,0,1,1,...,0,1,0,0,0,1,2,53.85,108.15,Yes
3,0,0,1,1,0,1,0,1,0,0,...,1,0,1,0,0,0,45,42.30,1840.75,No
4,0,1,0,1,0,1,0,0,1,1,...,0,1,0,0,1,0,2,70.70,151.65,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,0,0,1,0,1,0,1,0,1,0,...,0,1,0,0,0,1,24,84.80,1990.5,No
7039,0,1,0,0,1,0,1,0,1,0,...,0,1,0,1,0,0,72,103.20,7362.9,No
7040,0,1,0,0,1,0,1,1,0,0,...,0,1,0,0,1,0,11,29.60,346.45,No
7041,1,0,1,0,1,1,0,0,1,0,...,0,1,0,0,0,1,4,74.40,306.6,Yes


#### Let's take care of our target

In [42]:
encoded_data.Churn = encoded_data.Churn.eq('Yes').mul(1)
encoded_data

Unnamed: 0,SeniorCitizen,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,PhoneService_No,PhoneService_Yes,MultipleLines_No,...,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure,MonthlyCharges,TotalCharges,Churn
0,0,1,0,0,1,1,0,1,0,0,...,0,1,0,0,1,0,1,29.85,29.85,0
1,0,0,1,1,0,1,0,0,1,1,...,1,0,0,0,0,1,34,56.95,1889.5,0
2,0,0,1,1,0,1,0,0,1,1,...,0,1,0,0,0,1,2,53.85,108.15,1
3,0,0,1,1,0,1,0,1,0,0,...,1,0,1,0,0,0,45,42.30,1840.75,0
4,0,1,0,1,0,1,0,0,1,1,...,0,1,0,0,1,0,2,70.70,151.65,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,0,0,1,0,1,0,1,0,1,0,...,0,1,0,0,0,1,24,84.80,1990.5,0
7039,0,1,0,0,1,0,1,0,1,0,...,0,1,0,1,0,0,72,103.20,7362.9,0
7040,0,1,0,0,1,0,1,1,0,0,...,0,1,0,0,1,0,11,29.60,346.45,0
7041,1,0,1,0,1,1,0,0,1,0,...,0,1,0,0,0,1,4,74.40,306.6,1


#### Now, let's normalize
I'm normalizing the continuous values to between 0 and 1.  

In [44]:
final_data = encoded_data.copy()
final_data['tenure'] = final_data['tenure'] / final_data['tenure'].max()
final_data['tenure'].unique()

array([0.01388889, 0.47222222, 0.02777778, 0.625     , 0.11111111,
       0.30555556, 0.13888889, 0.38888889, 0.86111111, 0.18055556,
       0.22222222, 0.80555556, 0.68055556, 0.34722222, 0.95833333,
       0.72222222, 0.98611111, 0.29166667, 0.16666667, 0.41666667,
       0.65277778, 1.        , 0.23611111, 0.375     , 0.06944444,
       0.63888889, 0.15277778, 0.97222222, 0.875     , 0.59722222,
       0.20833333, 0.83333333, 0.25      , 0.91666667, 0.125     ,
       0.04166667, 0.43055556, 0.69444444, 0.88888889, 0.77777778,
       0.09722222, 0.58333333, 0.48611111, 0.66666667, 0.40277778,
       0.90277778, 0.52777778, 0.94444444, 0.44444444, 0.76388889,
       0.51388889, 0.5       , 0.56944444, 0.08333333, 0.05555556,
       0.45833333, 0.93055556, 0.31944444, 0.79166667, 0.84722222,
       0.19444444, 0.27777778, 0.73611111, 0.55555556, 0.81944444,
       0.33333333, 0.61111111, 0.26388889, 0.75      , 0.70833333,
       0.36111111, 0.        , 0.54166667])

In [45]:
final_data['MonthlyCharges'] = final_data.MonthlyCharges / final_data.MonthlyCharges.max()
final_data['MonthlyCharges'].unique()

array([0.25136842, 0.47957895, 0.45347368, ..., 0.53136842, 0.37221053,
       0.66273684])

In [57]:
final_data['TotalCharges'] = final_data['TotalCharges'].replace(' ', 0.)    # Get rid of ''
final_data['TotalCharges'] = final_data['TotalCharges'].astype(float)
final_data['TotalCharges'] = final_data.TotalCharges / final_data.TotalCharges.max()
final_data['TotalCharges'].unique()

array([0.00343704, 0.21756402, 0.01245279, ..., 0.03989153, 0.03530306,
       0.78810105])

In [58]:
final_data

Unnamed: 0,SeniorCitizen,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,PhoneService_No,PhoneService_Yes,MultipleLines_No,...,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure,MonthlyCharges,TotalCharges,Churn
0,0,1,0,0,1,1,0,1,0,0,...,0,1,0,0,1,0,0.013889,0.251368,0.003437,0
1,0,0,1,1,0,1,0,0,1,1,...,1,0,0,0,0,1,0.472222,0.479579,0.217564,0
2,0,0,1,1,0,1,0,0,1,1,...,0,1,0,0,0,1,0.027778,0.453474,0.012453,1
3,0,0,1,1,0,1,0,1,0,0,...,1,0,1,0,0,0,0.625000,0.356211,0.211951,0
4,0,1,0,1,0,1,0,0,1,1,...,0,1,0,0,1,0,0.027778,0.595368,0.017462,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,0,0,1,0,1,0,1,0,1,0,...,0,1,0,0,0,1,0.333333,0.714105,0.229194,0
7039,0,1,0,0,1,0,1,0,1,0,...,0,1,0,1,0,0,1.000000,0.869053,0.847792,0
7040,0,1,0,0,1,0,1,1,0,0,...,0,1,0,0,1,0,0.152778,0.249263,0.039892,0
7041,1,0,1,0,1,1,0,0,1,0,...,0,1,0,0,0,1,0.055556,0.626526,0.035303,1
