# keras example - telco customer churn dataset

This lesson uses the [telco customer churn dataset](course_datasets.md#telco-customer-churn)

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense

In [2]:
#df = pd.read_csv('https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.csv')
df = pd.read_csv('data/telco_customer_churn.csv')
df.head(2)

Unnamed: 0,Customer ID,Gender,Senior Citizen,Partner,Dependents,tenure,Phone Service,Multiple Lines,Internet Service,Online Security,...,Device Protection,Tech Support,Streaming TV,Streaming Movies,Contract,Paperless Billing,Payment Method,Monthly Charges,Total Charges,Churn
0,7590-VHVEA,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No


In [3]:
X_pre = df.drop(['Churn', 'Customer ID'], axis=1, inplace=False)
X = pd.get_dummies(X_pre)
y = df['Churn'].apply(lambda x: 1 if x == 'Yes' else 0)
(X.shape, y.shape)

((7044, 6575), (7044,))

In [4]:
# diagnostics
X_pre.head(2)
X.head(2)

Unnamed: 0,Senior Citizen,tenure,Monthly Charges,Gender_Female,Gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,Phone Service_No,...,Total Charges_995.35,Total Charges_996.45,Total Charges_996.85,Total Charges_996.95,Total Charges_997.65,Total Charges_997.75,Total Charges_998.1,Total Charges_999.45,Total Charges_999.8,Total Charges_999.9
0,0,1,29.85,True,False,False,True,True,False,True,...,False,False,False,False,False,False,False,False,False,False
1,0,1,29.85,True,False,False,True,True,False,True,...,False,False,False,False,False,False,False,False,False,False


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
(X_train.shape, y_train.shape), (X_test.shape, y_test.shape)

(((5635, 6575), (5635,)), ((1409, 6575), (1409,)))

In [6]:
# diagnostics
X_train.shape[1]
np.asarray(X_train).astype(np.float32)

array([[  0.  ,  59.  ,  25.  , ...,   0.  ,   0.  ,   0.  ],
       [  0.  ,  70.  ,  77.3 , ...,   0.  ,   0.  ,   0.  ],
       [  0.  ,   3.  ,  78.5 , ...,   0.  ,   0.  ,   0.  ],
       ...,
       [  0.  ,  53.  ,  95.95, ...,   0.  ,   0.  ,   0.  ],
       [  0.  ,  65.  , 109.3 , ...,   0.  ,   0.  ,   0.  ],
       [  1.  ,   1.  ,  25.8 , ...,   0.  ,   0.  ,   0.  ]],
      dtype=float32)

In [7]:
model = Sequential()
model.add(Dense(units=32, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(units=64, activation='relu'))
model.add(Dense(units=1, activation='sigmoid'))

In [9]:
model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy'])

In [10]:
#model.fit(X_train, y_train, epochs=200, batch_size=32, verbose=0)
# see https://stackoverflow.com/questions/58636087/tensorflow-valueerror-failed-to-convert-a-numpy-array-to-a-tensor-unsupporte
# but why is author adding numerical fields since these cause a lot of columns to be added?
model.fit(np.asarray(X_train).astype(np.float32), y_train, epochs=200, batch_size=32, verbose=0)

<keras.callbacks.History at 0x262909dae90>

In [11]:
y_hat = model.predict(np.asarray(X_test).astype(np.float32))
#y_hat = y_hat > 0.5
y_hat = [1 if val > 0.5 else 0 for val in y_hat]
y_hat[:100]




[0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0]

In [12]:
accuracy_score(y_test, y_hat)

0.7934705464868701

In [13]:
model.save('model.h5')

In [14]:
del model

In [15]:
model = load_model('model.h5')

In [16]:
model

<keras.engine.sequential.Sequential at 0x262f36acc10>