In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [13]:
# Using a TensorFlow model to predict if a customer is likely to churn
# our training data comes from a csv file wi we are training the network on th several parameters 
# the csv file has parameters like "Contract Type", "Monthly Charges", "Tenure", etc.
# the file has retriactive data so we already know which customers churned and which didn't
df = pd.read_csv('Churn.csv')
# using pandas to bring in our data from the csv file

In [14]:
X = pd.get_dummies(df.drop(['Churn', 'Customer ID'], axis=1))
y = df['Churn'].apply(lambda x: 1 if x=='Yes' else 0)
# read in our data, do a little preprocessing to remove the csv rows that won't help us 
# and creat a variable for processing ['Churn'=='Yes'] vs ['Churn'=='No']

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)
# create a train_test_split, a function from sklearn
# This will give us X_train data frame as well as a y_train data frame
# SO, we will have an X test and a Y test component

In [16]:
X_train.head()

Unnamed: 0,Senior Citizen,tenure,Monthly Charges,Gender_Female,Gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,Phone Service_No,...,Total Charges_995.35,Total Charges_996.45,Total Charges_996.85,Total Charges_996.95,Total Charges_997.65,Total Charges_997.75,Total Charges_998.1,Total Charges_999.45,Total Charges_999.8,Total Charges_999.9
6786,0,61,111.6,0,1,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4050,1,46,100.25,1,0,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
764,0,72,66.85,1,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1058,0,10,84.7,1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1734,0,1,19.05,0,1,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [17]:
# 1. IMPORT DEPENDENCIES
from tensorflow.keras.models import Sequential, load_model
# The sequential model is going to be our core model class
# We've also imported load_model which is going to allow us to reload our model from memory
from tensorflow.keras.layers import Dense
# Dense is a fully connected layer in our neural network and is going to allow us to create a whole bunch of hidden layers
from sklearn.metrics import accuracy_score
# accuracy_score is the metric we are going to use to evaluate the performance of our model
# The next thing we need to do is actually build up and compile our model

2023-08-22 21:45:09.391606: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [19]:
# 2a. BUILD OUR MODEL
model = Sequential()
# Instantiate our Sequential class
# next, we our defining the hidden layers of our network with the lines below
model.add(Dense(units=32, activation='relu', input_dim=len(X_train.columns)))
model.add(Dense(units=64, activation='relu'))
# In this case we've added 2 hidden, fully connected, layers
# The syntax "units=X" means that we will have X number of neurons in that layer
# "input_dim=len(X_train.columns)" specifies that our input dimensions are going to be the same number as our feature data frame 
model.add(Dense(units=1, activation='sigmoid'))
# We've also added output layer that uses the sigmoid function to have our network return a number between 0 and 1
# The number of units in your last layer dictates what your output would look like
# By defining "units=1" we are telling this layer to only have one neauron and since it is the end of the network, it's value will get returned
# More sophisticated models often have multiple units in thei final layer

In [20]:
# 2b. COMPILE OUR MODEL
model.compile(loss='binary_crossentropy', optimizer='sgd', metrics='accuracy')
# When we are compiling our model we are telling TensorFlow how we want to train our model
# Whatloss metrics we want to use, what optimizer we want to use, and what metrics we want to focus on
# Our loss 'C' is basically the how far away our network's predicted output 'Y' and the expected output 'S'
# The optimizer is how we choose to search through and find the loss (C=f(Y,S)) where function f is the optimizer
# A popular iptmized to use is the MSE (Mean Squared Error) function.
# Then our metrics allow us to evaluate how well our model is performing.

In [21]:
# 3a. FIT THE MODEL
model.fit(X_train, y_train, epochs=200, batch_size=32)
# Here we are actually running our neural network model
# Our 'X_train' is our model frame  
# Our 'y_train' is our y variable (also known as our features and our target)
# epochs are the number of training sessions, so 'epochs=200' will be 200 rounds of training
# Training for longer (i.e. higher epochs) will often result in a more accurate model but might lead to overfitting
# batch_size is how large of a batch we want to pass through the TensorFlor before acctually making an update 

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.callbacks.History at 0x7fde2585c5e0>

In [22]:
# 3b.PREDICT
y_hat = model.predict(X_test)
# to run a prediction we just use model.predict and we pass through our X_test data frame
y_hat = [0 if val < 0.5 else 1 for val in y_hat]
# since we are using the sigmoid function on our outout node will return value betweeen 0 and 1 



In [23]:
# 3c1. EVALUATE
y_hat

[1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,


In [24]:
# 3c2. EVALUATE
# CALCULATE OUR ACCURACY SCORE
accuracy_score(y_test, y_hat)
# The closer the accuracy_score is to 1 the better
# If we get a low accuracy score this is an indication that we might want to train for a higher number of epochs

0.8161816891412349

In [25]:
# 4a. SAVE OUR MODEL
model.save('tfmodel')
# model.save('name of the folder we want to save the model into')



INFO:tensorflow:Assets written to: tfmodel/assets


INFO:tensorflow:Assets written to: tfmodel/assets


In [None]:
# 4a. RELOAD OUR MODEL
# To delete our model from memory run this code: "del model"
# this will delete our model from memory
# To reload or recover our model from memory run this code: "model = load_model('tfmodel')"