<a href="https://colab.research.google.com/github/Josephoduro/mlWithColab/blob/main/binaryClassification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%tensorflow_version 2.x
import tensorflow as tf
from tensorflow import keras

#we will be using pandas dataframes for loading external database
#sklearn library for preprocessing the data and creating the training
#validation datasets
#matplotlib for charting
#To use these libraries:
# loading data
import pandas as pd
# scaling feature values
from sklearn.preprocessing import StandardScaler
#encoding target values
from sklearn.preprocessing import LabelEncoder
#shuffling data
from sklearn.utils import shuffle
#splitting the dataset into training and validation
from sklearn.model_selection import train_test_split
#plotting curves
import matplotlib.pyplot as plt

#Mounting Google Drive
from google.colab import drive
drive.mount('/content/drive')


#loading data
data_url = 'https://raw.githubusercontent.com/Apress/artificial-neural-networks-with-tensorflow-2/main/ch02/Churn_Modelling.csv'
data = pd.read_csv(data_url)

#shuffling data
data = shuffle(data)

#Data Preprocessing
#i- Checking Nulls
data.isnull().sum()

#ii- selecting labels
#dropping all unnecessary fields


X = data.drop(labels= ['CustomerId', 'Surname', 'RowNumber', 'Exited'], axis = 1)

#x now is a new array containing fields needed for the model, hence our
#features

#Extracting the output/label for our model. In this case it becomes Exited

y = data['Exited']

#Encoding Categorical Columns
#check that the selected features contain categorical values
#Gender from the data takes two categorical values, Male and female
#Geography has threecategorical values, Germany, Spain and France.
#We will need to convert these into numerical values before feeding
#To the network

from sklearn.preprocessing import LabelEncoder
label = LabelEncoder()
X['Geography'] = label.fit_transform(X['Geography'])
X['Gender'] = label.fit_transform(X['Gender'])

#Scaling Numerical Values
#As the features in the real data can have a wide range of data values
#machine learning would work better if we
#standardized all these data ponts to the same scale
#the mean of each column should be zero, and a standard deviation of 1 for better
#results on machine learning. So we transform all our data points using the
#equation z = (x - mu) / s
# mu = mean and, s = standard deviation
#we can perform this standardization by using StandardScaler

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X = scaler.fit_transform(X)

#Creating Training and Testing
#We split the data into testing and training sets
#To do so we use the train_test_split method of sklearn as in the following
#Split dataset into training and testing
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3)

#Before we knock some boots of, let's clear the air about a few things.
#Training dataset - The part of the data that is used for model fitting.
#Validation dataset - The part of data used for tuning hyperparameters
#during training.
#Test dataset - The part of data used for evaluating a model's performance
#after it's training


#Defining ANN
#After preprocessing, we have 11 features in our dataset. The number of features
#is determined by computing the shape of the training dataset with the following
#statement
#X_train.shape[1]

#For our case, the expected output of the network is a binary value indicating
#the likelihood of the customer leaving the bank. The target values are specified
#y_train vector

#we will create a four-layer deep learning network model. In the first layer,
#we will use 128 nodes, the second 64, the third 32 and fourth single output node.

#we create the network by using tf.keras API, which is a new standard in TensorFlow
#we will use the sequential API to create a linear stack of layers. You instantiate
#the model using the following statement:

model = keras.models.Sequential()

#creating the first layer
#We add the first layer to the stack consisting of 128 nodes using
#the following statement

model.add(keras.layers.Dense(128, activation = 'relu',
                             input_dim = X_train.shape[1]))

#The input dimension to this layer is set in the parameter input_dim,
#which is the number of features defined by the shape of X_train vector
#we use ReLU(rectified linear unit) as activation function.
#The activation function is used in deciding whether the node is to be
#activated depending on its weighted sum. ReLU is the most widely used activation
#function that outputs 0 for negative inputs and 1 otherwise.

#likewise, you add the second layer to the network using the following:

model.add(keras.layers.Dense(64, activation = 'relu'))

#The input to this layer comes from the previous layer so there is
#no need to specify the dimensions of the input vector.
#The third layer is added:

model.add(keras.layers.Dense(32, activation = 'relu'))

#The last layer is added

model.add(keras.layers.Dense(1, activation = 'sigmoid'))

#We use the sigmoid as the activation function here as  this layer is
#outputting a binary value. A sigmoid function is a type of activation
#function and is also known as a Squashing function. It limits the output
#to a range between 0 and 1, making it suitable in predicting probabilities

#We print the network summary using

#model.summary()

#COMPILING MODEL
#To compile the model, we call the model's compile method:

model.compile(loss = 'binary_crossentropy',
              optimizer = 'adam', metrics = ['accuracy'])

#The binary_crossentropy is used as our loss function
#since we are developing a binary classifier model
#We use Adam optimizer while training the model as this is suited
#best in such situations.
#The accuracy metrics are collected for analysis by specifying the value for
#metrics parameter.

#We will define a callback function which will be called
#at each epoch during training. We will be collecting logs in the
#log folder. To cleat earlier log, we use the following action:
#!rm -rf ./log/

#we define the callback funtion using the following code snippet:

#tensorboard visualization
import datetime, os
logdir = os.path.join("log",
                      datetime.datetime.now().
                      strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir,
                                                      histogram_freq = 1)

#With this setup for training analysis and the compilation of the
#model, we are now ready to start the training.

#MODEL TRAINING
#To train the model, we use the fit method on the model instance:

r = model.fit(X_train, y_train, batch_size= 32, epochs= 50, validation_data= (X_test, y_test),
              callbacks = [tensorboard_callback])

#The first parameter to the fit function defines the features vector,
#and the second defines the labels. The batch_size parameter defines the
#batch_size for training. The epochs parameter determines how many iterations
#would be performed. The test data that we generated during data preprocessing
#is used for model validation and is passed to fit function in the validation_data
#The callbacks function specify which callback function would be called at the
#end of each iteration


#Once the training is over, we can use the collected metrics to evaluate
#if the model is trained using the desired accuracy



In [None]:
#PERFORMANCE EVALUATION
#To evaluate the performance, we will launch the tensorboard in our colab
#environment using the %tensorboard magic. Before this, we will load the
#tensorboard using %load_ext magic.

%load_ext tensorboard
%tensorboard --logdir log #command to launch tensorboard on colab

#running this shows the TensorBoard, and you will see the accuracy and loss
#metrics plotted on the screen. The two curves shown here are plotted on
#the training and validation data. The examination of the accuracy and loss
#metrics help us in determining if the model is performing well

In [None]:
#The program statement for evaluating the model and its output is shown:
test_scores = model.evaluate(X_test, y_test)
print('Test Loss: ', test_scores[0])
print('Test accuracy: ', test_scores[1]*100)

In [None]:
from ast import increment_lineno
#Plotting the performance charts on the validation data using the matplotlib
#The traditional way of performance evaluation

%matplotlib inline
import matplotlib.pyplot as plt #for plotting curves

plt.plot(r.history['val_accuracy'], label= 'val_acc')
plt.plot(r.history['val_loss'], label= 'val_loss')
plt.legend()
plt.show()

In [24]:
import numpy as np
#PREDICTING ON TEST DATA
#The confusion matrix quite is used to evaluate the performance
#of our network. It requires both the predictions and the True labels
#Thus, we first need to generate predictions on our test data. For
#predicting, use predict_classes method as shown:

predictions = model.predict(X_test)

y_pred = np.argmax(predictions, axis=1)

#The method takes the features vector as its argument and returns a tensor of
#predictions. it could be printed using

y_pred



array([0, 0, 0, ..., 0, 0, 0])

In [None]:
#The value of 1 at any index represents the customer leaves while means the customer
#stays

#We will use these prediction results to create and plot a confusion matrix that
#provides a better visualization of the model's performance.


#CONFUSION MATRIX
#To generate the confusion matrix, you use the sklearn's built in function
#as shown here:

import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns

cf = confusion_matrix(y_test, y_pred)
cf

#this gives an array which can be used in creating a visual effect using the follow:

from mlxtend.plotting import plot_confusion_matrix
sns.heatmap(cf, annot=True, fmt='d', cmap='viridis')

plt.show()

In [26]:
#The accuracy score can be calculated as

from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

#The execution of this statement gives an accuracy of 80% which is largely
#accepted accuracy in machine learning. We now use the model on unseen data.



0.8

In [32]:
#PREDICTING ON UNSEEN DATA
#To create an unseen data for our use case, we need to know the data types of all
#features to which we will assign dummy values

#For our unseen data, we will use the following values:
#CreditScore = 615
#Gender = Male
#Age = 22
#Tenure = m5
#Balamce = 20000
#NUmOfProducts = 1
#HasCrCard = 1
#IsActiveMember = 1
#EstimatedSalary = 60000
#Geography = Spain

customer = model.predict([[615, 22, 5, 20000, 5, 1, 1, 60000, 0, 0]])

customer

if customer[0] == 1:
  print("Customer is likely to leave")
else:
  print("Customer will stay")

Customer will stay
