In [1]:
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import numpy as np

In [2]:
gpus = tf.config.list_physical_devices("GPU")
print(gpus)
#for gpu in gpus:
#    tf.config.experimental.set_memory_growth(gpu, True)
if gpus:
    tf.config.set_visible_devices(gpus, "GPU")
print(f"Tensorflow GPU usage : {tf.test.is_gpu_available()}")

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
Tensorflow GPU usage : True


In [None]:
df = pd.read_csv("dataset.csv")
df.sample(5)

Unnamed: 0,CustomerID,Age,Gender,Tenure,Usage Frequency,Support Calls,Payment Delay,Subscription Type,Contract Length,Total Spend,Last Interaction,Churn
147269,152080.0,26.0,Female,42.0,29.0,9.0,13.0,Basic,Quarterly,923.0,7.0,1.0
433773,440888.0,36.0,Male,8.0,3.0,2.0,5.0,Basic,Quarterly,985.08,12.0,0.0
281502,288616.0,24.0,Male,34.0,23.0,0.0,12.0,Premium,Quarterly,870.92,13.0,0.0
200842,205715.0,49.0,Female,9.0,3.0,5.0,4.0,Premium,Annual,149.86,24.0,1.0
277742,284856.0,46.0,Male,46.0,21.0,4.0,17.0,Premium,Annual,626.27,16.0,0.0


In [14]:
def FormatData(df):
    # Drop column: 'CustomerID'
    df = df.drop(columns=['CustomerID'])

    # Replace all instances of "Male" with "1" in column: 'Gender'
    df.loc[df['Gender'].str.lower() == "Male".lower(), 'Gender'] = "1"

    # Replace all instances of "Female" with "0" in column: 'Gender'
    df.loc[df['Gender'].str.lower() == "Female".lower(), 'Gender'] = "0"


    # One-hot encode columns: 'Contract Length', 'Subscription Type'
    for column in ['Contract Length', 'Subscription Type']:
        insert_loc = df.columns.get_loc(column)
        df = pd.concat([df.iloc[:,:insert_loc], pd.get_dummies(df.loc[:, [column]]), df.iloc[:,insert_loc+1:]], axis=1)

    # Change column type to int32 for columns: 'Subscription Type_Basic', 'Subscription Type_Premium' and 4 other columns
    df = df.astype({'Subscription Type_Basic': 'int32', 'Subscription Type_Premium': 'int32', 'Subscription Type_Standard': 'int32', 'Contract Length_Annual': 'int32', 'Contract Length_Monthly': 'int32', 'Contract Length_Quarterly': 'int32'})

    # Scale columns 'Tenure', 'Usage Frequency' and 4 other columns between 0 and 1
    new_min, new_max = 0, 1
    old_min, old_max = df['Tenure'].min(), df['Tenure'].max()
    df['Tenure'] = (df['Tenure'] - old_min) / (old_max - old_min) * (new_max - new_min) + new_min
    old_min, old_max = df['Usage Frequency'].min(), df['Usage Frequency'].max()
    df['Usage Frequency'] = (df['Usage Frequency'] - old_min) / (old_max - old_min) * (new_max - new_min) + new_min
    old_min, old_max = df['Support Calls'].min(), df['Support Calls'].max()
    df['Support Calls'] = (df['Support Calls'] - old_min) / (old_max - old_min) * (new_max - new_min) + new_min
    old_min, old_max = df['Payment Delay'].min(), df['Payment Delay'].max()
    df['Payment Delay'] = (df['Payment Delay'] - old_min) / (old_max - old_min) * (new_max - new_min) + new_min
    old_min, old_max = df['Total Spend'].min(), df['Total Spend'].max()
    df['Total Spend'] = (df['Total Spend'] - old_min) / (old_max - old_min) * (new_max - new_min) + new_min
    old_min, old_max = df['Last Interaction'].min(), df['Last Interaction'].max()
    df['Last Interaction'] = (df['Last Interaction'] - old_min) / (old_max - old_min) * (new_max - new_min) + new_min

    
    # Drop rows with missing data across all columns
    df = df.dropna()
    
    # Change column type to int32 for column: 'Gender'
    df = df.astype({'Gender': 'int32'})
    return df

In [15]:
df = FormatData(df)

In [None]:
df[list(df.columns[:-1])]

Unnamed: 0,Age,Gender,Tenure,Usage Frequency,Support Calls,Payment Delay,Subscription Type_Basic,Subscription Type_Premium,Subscription Type_Standard,Contract Length_Annual,Contract Length_Monthly,Contract Length_Quarterly,Total Spend,Last Interaction
0,30.0,0,0.644068,0.448276,0.5,0.600000,0,0,1,1,0,0,0.924444,0.551724
1,65.0,0,0.813559,0.000000,1.0,0.266667,1,0,0,0,1,0,0.507778,0.172414
2,55.0,0,0.220339,0.103448,0.6,0.600000,1,0,0,0,0,1,0.094444,0.068966
3,58.0,1,0.627119,0.689655,0.7,0.233333,0,0,1,0,1,0,0.328889,0.965517
4,23.0,1,0.525424,0.655172,0.5,0.266667,1,0,0,0,1,0,0.574444,0.655172
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
440828,42.0,1,0.898305,0.482759,0.1,0.100000,0,1,0,1,0,0,0.684867,0.241379
440829,25.0,0,0.118644,0.413793,0.1,0.666667,0,1,0,1,0,0,0.717089,0.034483
440830,26.0,1,0.576271,0.896552,0.1,0.166667,0,0,1,0,0,1,0.974789,0.275862
440831,28.0,1,0.915254,0.448276,0.2,0.000000,0,0,1,0,0,1,0.558389,0.034483


In [45]:
# Now to split the data between, train data and test data
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(df[list(df.columns[:-1])], df.Churn, test_size=0.01, random_state=25)

In [46]:
x_train

Unnamed: 0,Age,Gender,Tenure,Usage Frequency,Support Calls,Payment Delay,Subscription Type_Basic,Subscription Type_Premium,Subscription Type_Standard,Contract Length_Annual,Contract Length_Monthly,Contract Length_Quarterly,Total Spend,Last Interaction
247464,57.0,1,0.305085,0.896552,0.9,0.266667,0,0,1,1,0,0,0.956533,0.689655
319407,48.0,1,0.644068,0.275862,0.4,0.666667,0,0,1,1,0,0,0.643333,0.344828
44198,25.0,1,0.898305,0.586207,0.7,0.333333,1,0,0,0,1,0,0.868889,0.206897
433119,40.0,1,0.508475,0.482759,0.1,0.500000,0,0,1,1,0,0,0.740467,0.827586
127105,55.0,0,0.203390,0.724138,0.6,0.466667,0,0,1,1,0,0,0.477778,0.379310
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130365,41.0,1,0.593220,0.827586,0.9,0.633333,0,0,1,0,1,0,0.284444,0.655172
92303,63.0,0,0.372881,0.103448,1.0,0.333333,1,0,0,0,0,1,0.453333,0.206897
6618,32.0,1,0.983051,0.689655,1.0,0.666667,1,0,0,0,1,0,0.698889,0.517241
90430,62.0,1,0.610169,0.241379,0.0,0.633333,1,0,0,1,0,0,0.505556,0.172414


In [49]:
model = keras.Sequential([
    keras.layers.Dense(14, input_shape=(14,), activation="relu"),
    keras.layers.Dense(7, activation="relu"),
    keras.layers.Dense(1, activation="sigmoid")
])
tb_callback = tf.keras.callbacks.TensorBoard(log_dir="logs/", histogram_freq=1)
model.compile(
    loss= "binary_crossentropy",
    optimizer="adam",
    metrics=["accuracy"]
)
model.fit(x_train, y_train, epochs=20, callbacks=[tb_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x19d946bae20>

In [51]:
model.evaluate(x_test, y_test)



[0.05243771895766258, 0.9836697578430176]

In [52]:
model.predict([[
    64.0, 0, 0.2711864406779661, 0.06896551724137931, 1.0, 0.6333333333333333, 0, 1, 0, 0, 1, 0, 0.4444444444444444, 0.0
]])



array([[1.]], dtype=float32)

In [None]:
model.save("churn_prediction_model.h5")