In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.callbacks import EarlyStopping
from pandas.io.formats.info import DataFrameTableBuilderNonVerbose


%matplotlib inline



In [None]:
df= pd.read_csv('/content/Customer-Churn.csv')
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7043 non-null   object 
 1   gender            7043 non-null   object 
 2   SeniorCitizen     7043 non-null   int64  
 3   Partner           7043 non-null   object 
 4   Dependents        7043 non-null   object 
 5   tenure            7043 non-null   int64  
 6   PhoneService      7043 non-null   object 
 7   MultipleLines     7043 non-null   object 
 8   InternetService   7043 non-null   object 
 9   OnlineSecurity    7043 non-null   object 
 10  OnlineBackup      7043 non-null   object 
 11  DeviceProtection  7043 non-null   object 
 12  TechSupport       7043 non-null   object 
 13  StreamingTV       7043 non-null   object 
 14  StreamingMovies   7043 non-null   object 
 15  Contract          7043 non-null   object 
 16  PaperlessBilling  7043 non-null   object 


In [None]:
df.drop('customerID', axis=1, inplace= True)

In [None]:
(df['TotalCharges']==' ').sum()
df['TotalCharges'] = df['TotalCharges'].replace(' ', np.NaN)

In [None]:
df['TotalCharges'] = df['TotalCharges'].astype(float)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   gender            7043 non-null   object 
 1   SeniorCitizen     7043 non-null   int64  
 2   Partner           7043 non-null   object 
 3   Dependents        7043 non-null   object 
 4   tenure            7043 non-null   int64  
 5   PhoneService      7043 non-null   object 
 6   MultipleLines     7043 non-null   object 
 7   InternetService   7043 non-null   object 
 8   OnlineSecurity    7043 non-null   object 
 9   OnlineBackup      7043 non-null   object 
 10  DeviceProtection  7043 non-null   object 
 11  TechSupport       7043 non-null   object 
 12  StreamingTV       7043 non-null   object 
 13  StreamingMovies   7043 non-null   object 
 14  Contract          7043 non-null   object 
 15  PaperlessBilling  7043 non-null   object 
 16  PaymentMethod     7043 non-null   object 


In [None]:
df['TotalCharges'] = df['TotalCharges'].fillna(df['TotalCharges'].median())

In [None]:
unique_values = {column: list(df[column].unique()) for column in df.select_dtypes(object).columns}
for key, value in unique_values.items():
    print(f'{key} : {value}')

gender : ['Female', 'Male']
Partner : ['Yes', 'No']
Dependents : ['No', 'Yes']
PhoneService : ['No', 'Yes']
MultipleLines : ['No phone service', 'No', 'Yes']
InternetService : ['DSL', 'Fiber optic', 'No']
OnlineSecurity : ['No', 'Yes', 'No internet service']
OnlineBackup : ['Yes', 'No', 'No internet service']
DeviceProtection : ['No', 'Yes', 'No internet service']
TechSupport : ['No', 'Yes', 'No internet service']
StreamingTV : ['No', 'Yes', 'No internet service']
StreamingMovies : ['No', 'Yes', 'No internet service']
Contract : ['Month-to-month', 'One year', 'Two year']
PaperlessBilling : ['Yes', 'No']
PaymentMethod : ['Electronic check', 'Mailed check', 'Bank transfer (automatic)', 'Credit card (automatic)']
Churn : ['No', 'Yes']


In [None]:
df['Churn'].value_counts()

No     5174
Yes    1869
Name: Churn, dtype: int64

In [None]:
df['Churn'] = df['Churn'].replace('No', '0', regex=True)
df['Churn'] = df['Churn'].replace('Yes', '1', regex=True)
df['Churn'] = pd.to_numeric(df['Churn'])

In [None]:
#applying encoding to categorical features
#Binary Encoding

df['MultipleLines'] = df['MultipleLines'].replace('No phone service', 'No')
df[['OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
      'TechSupport', 'StreamingTV', 'StreamingMovies']] = df[['OnlineSecurity', 'OnlineBackup',
                                                                'DeviceProtection', 'TechSupport',
                                                                'StreamingTV', 'StreamingMovies']].replace('No internet service', 'No')


In [None]:
binary_columns = ['gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines',
                   'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
                   'StreamingTV', 'StreamingMovies', 'PaperlessBilling', 'Churn']

le = LabelEncoder()
for column in binary_columns:
    df[column] = le.fit_transform(df[column])

In [None]:
#One Hot Encoding
dummies_payment = pd.get_dummies(df['PaymentMethod'], drop_first= True)
data = pd.concat([df, dummies_payment], axis=1)
data.drop('PaymentMethod',axis =1 , inplace= True)

In [None]:
#label encoding
from sklearn import preprocessing
label_encoder = preprocessing.LabelEncoder()
data['Contract'] = label_encoder.fit_transform(
                                df['Contract'])
data['InternetService'] = label_encoder.fit_transform(
                                df['Contract'])

In [None]:
X = data.drop('Churn', axis= 1)
Y = data['Churn']
X

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,...,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,MonthlyCharges,TotalCharges,Credit card (automatic),Electronic check,Mailed check
0,0,0,1,0,1,0,0,0,0,1,...,0,0,0,0,1,29.85,29.85,0,1,0
1,1,0,0,0,34,1,0,1,1,0,...,0,0,0,1,0,56.95,1889.50,0,0,1
2,1,0,0,0,2,1,0,0,1,1,...,0,0,0,0,1,53.85,108.15,0,0,1
3,1,0,0,0,45,0,0,1,1,0,...,1,0,0,1,0,42.30,1840.75,0,0,0
4,0,0,0,0,2,1,0,0,0,0,...,0,0,0,0,1,70.70,151.65,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,1,0,1,1,24,1,1,1,1,0,...,1,1,1,1,1,84.80,1990.50,0,0,1
7039,0,0,1,1,72,1,1,1,0,1,...,0,1,1,1,1,103.20,7362.90,1,0,0
7040,0,0,1,1,11,0,0,0,1,0,...,0,0,0,0,1,29.60,346.45,0,1,0
7041,1,1,1,0,4,1,1,0,0,0,...,0,0,0,0,1,74.40,306.60,0,0,1


In [None]:
X_train, X_test, Y_train, Y_test =  train_test_split(X,Y, test_size=0.05,shuffle= True, random_state=0)

In [None]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


In [None]:
# Create Adam Optimizer for model 1
from tensorflow.keras.optimizers import Adam
opt_1 = Adam(learning_rate=0.001)

# Base Model (Model 1)
def get_model():
  model= Sequential([
      Dense(100, input_shape = (21,), activation = 'relu'),
      Dense(80, activation = 'relu'),
      Dense(50, activation = 'relu'),
      Dense(1,activation = 'sigmoid')
  ])

  model.compile(
    loss='binary_crossentropy',
    metrics=['accuracy'],
    optimizer= opt_1
  )
  return model


In [None]:
model_1 = get_model()
early_stopping = EarlyStopping(monitor='val_loss', patience = 5)

preds_on_untrained = model_1.predict(X_test)

history = model_1.fit(
    X_train, Y_train,
    validation_data = (X_test, Y_test),
    epochs = 10,
    callbacks = [early_stopping]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
# Increasing the learning rate (Model 3)
opt_2 = Adam(learning_rate=0.0001)

# (Model 2)
def get_model_2():
  model= Sequential([
      Dense(100, input_shape = (21,), activation = 'relu'),
      Dense(80, activation = 'relu'),
      Dense(50, activation = 'relu'),
      Dense(1,activation = 'sigmoid')
  ])

  model.compile(
    loss='binary_crossentropy',
    metrics=['accuracy'],
    optimizer= opt_2
  )
  return model

In [None]:
model_2 = get_model_2()
early_stopping = EarlyStopping(monitor='val_loss', patience = 5)

preds_on_untrained = model_2.predict(X_test)

history = model_2.fit(
    X_train, Y_train,
    validation_data = (X_test, Y_test),
    epochs = 10,
    callbacks = [early_stopping]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
# Add Regularization / Dropout
# model3

def get_model_3():
  model= Sequential([
      Dense(100, input_shape = (21,), activation = 'relu'),
      Dropout(0.05),
      Dense(80, activation = 'relu'),
      Dropout(0.05),
      Dense(50, activation = 'relu'),
      Dropout(0.05),
      Dense(1,activation = 'sigmoid')
  ])

  model.compile(
    loss='binary_crossentropy',
    metrics=['accuracy'],
    optimizer= opt_2
  )
  return model

In [None]:
model_3 = get_model_3()
early_stopping = EarlyStopping(monitor='val_loss', patience = 5)

preds_on_untrained = model_3.predict(X_test)

history = model_3.fit(
    X_train, Y_train,
    validation_data = (X_test, Y_test),
    epochs = 10,
    callbacks = [early_stopping]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10


In [None]:
#model4
#Increasing number of epochs
model_4 = get_model_3()
early_stopping = EarlyStopping(monitor='val_loss', patience = 5)

preds_on_untrained = model_4.predict(X_test)

history = model_4.fit(
    X_train, Y_train,
    validation_data = (X_test, Y_test),
    epochs = 50,
    callbacks = [early_stopping]
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50


In [None]:
#evaluating the score
score_1 = model_1.evaluate(X, Y, verbose=0)
score_2 = model_2.evaluate(X, Y, verbose=0)
score_3 = model_3.evaluate(X, Y, verbose=0)
score_4 = model_4.evaluate(X, Y, verbose=0)

print("Accuracy")
print("Model 1: %.2f%%" % ( score_1[1]*100))
print("Model 2: %.2f%%" % ( score_2[1]*100))
print("Model 3: %.2f%%" % ( score_3[1]*100))
print("Model 4: %.2f%%" % ( score_4[1]*100))

Accuracy
Model 1: 76.29%
Model 2: 73.46%
Model 3: 73.46%
Model 4: 73.46%
