In [1]:
import pandas as pd
iris = pd.read_csv("C:/Users/Admin/Documents/Datasets/iris.csv")
iris = iris.drop(labels=["Unnamed: 0"], axis=1)
iris.head()

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [2]:
import numpy as np
iris.Species = iris.Species.astype("category")
iris["Species1"] = np.repeat([0, 1, 2], 50)  # for first 50 observations = 0; for next 50 = 1; for last 50 = 2
iris.head()

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species,Species1
0,5.1,3.5,1.4,0.2,setosa,0
1,4.9,3.0,1.4,0.2,setosa,0
2,4.7,3.2,1.3,0.2,setosa,0
3,4.6,3.1,1.5,0.2,setosa,0
4,5.0,3.6,1.4,0.2,setosa,0


In [3]:
# Keras is built in Pyhton and on top of TensorFlow 2.0
# Keras is neural network library while
# TensorFlow is the open source library for a number of tasks in machine learning.
# Both framewrks provide high-level APIs for building and training models with ease.

In [4]:
import tensorflow as tf
import keras

Using TensorFlow backend.


In [5]:
X = iris.iloc[:, :4]
y = iris.iloc[:, -1]
display(X.head())
display(y.head())
print(y.value_counts())

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


0    0
1    0
2    0
3    0
4    0
Name: Species1, dtype: int32

2    50
1    50
0    50
Name: Species1, dtype: int64


In [6]:
y = pd.get_dummies(y)
y.head()

Unnamed: 0,0,1,2
0,1,0,0
1,1,0,0
2,1,0,0
3,1,0,0
4,1,0,0


In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [8]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(112, 4)
(38, 4)
(112, 3)
(38, 3)


In [9]:
y_test[:5]

Unnamed: 0,0,1,2
114,0,0,1
62,0,1,0
33,1,0,0
107,0,0,1
7,1,0,0


# Creating MLP in Keras

In [10]:
# Creating MLP in Keras
# Sequential function groups a linear stack of layers into a Keras model
# Sequential model provides training and inference features on this model
from keras.models import Sequential
# A dense layer is just a layer of neurons in a neural network
# Each neuron recieves an input from all the neurons in previous layer, thus densely connected.
from keras.layers import Dense, Dropout, Activation

In [11]:
# Create model
model = Sequential()

In [12]:
# In first Dense layer, the dimension of input layer can be given
# tesorflow: large dropout rate: 0.8 (i.e. >0.5).
# In Tensorflow 2.x, dropout() uses dropout rate instead of keep_prob.
# Please ensure that this is intended.

In [13]:
model.add(Dense(6, input_dim=4, activation="sigmoid", name="HL1"))
model.add(Dropout(0.5))
model.add(Dense(6, activation="relu", name="HL2"))
model.add(Dropout(0.2))
model.add(Dense(3, activation="softmax", name="OL"))

In [14]:
# initialized weights
print(model.weights)

[<tf.Variable 'HL1/kernel:0' shape=(4, 6) dtype=float32, numpy=
array([[ 0.75721276,  0.6253556 , -0.30697662, -0.7678474 , -0.6735275 ,
        -0.69091046],
       [ 0.6498194 ,  0.2593193 , -0.13429397, -0.14790952,  0.5050874 ,
         0.714748  ],
       [-0.3803683 ,  0.2902664 , -0.1643402 ,  0.3778746 ,  0.66825914,
        -0.11448741],
       [ 0.01783991, -0.03609776,  0.23856127, -0.69726396,  0.76785684,
        -0.4933757 ]], dtype=float32)>, <tf.Variable 'HL1/bias:0' shape=(6,) dtype=float32, numpy=array([0., 0., 0., 0., 0., 0.], dtype=float32)>, <tf.Variable 'HL2/kernel:0' shape=(6, 6) dtype=float32, numpy=
array([[ 0.31574517,  0.42839164, -0.09273809,  0.00258982, -0.67941934,
         0.21887279],
       [ 0.32550484,  0.11600953,  0.61612755,  0.2641245 ,  0.6335042 ,
         0.15417045],
       [ 0.35684997,  0.5547914 , -0.09906328,  0.26748836, -0.17071342,
        -0.0695647 ],
       [-0.6621524 ,  0.6080976 , -0.19545215, -0.24087986,  0.3806762 ,
        -0

In [15]:
print("Input layer shape: ", model.input_shape)
print("Output layer shape: ", model.output_shape)

Input layer shape:  (None, 4)
Output layer shape:  (None, 3)


In [16]:
# Evaluation Metrics
from keras import metrics
from keras.metrics import Precision, Recall, AUC

In [17]:
X_train.head()

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width
61,5.9,3.0,4.2,1.5
92,5.8,2.6,4.0,1.2
112,6.8,3.0,5.5,2.1
2,4.7,3.2,1.3,0.2
141,6.9,3.1,5.1,2.3


In [18]:
# Normalize input data
from keras.utils import normalize
X_train = normalize(np.array(X_train), axis=1)
X_test = normalize(np.array(X_test), axis=1)

In [19]:
X_train[:5]

array([[0.73923462, 0.37588201, 0.52623481, 0.187941  ],
       [0.76262994, 0.34186859, 0.52595168, 0.1577855 ],
       [0.71718148, 0.31640359, 0.58007326, 0.22148252],
       [0.80533308, 0.54831188, 0.2227517 , 0.03426949],
       [0.73337886, 0.32948905, 0.54206264, 0.24445962]])

# Compiling a simple model without customizing the optimizers

In [20]:
# Compile model
# Compile defines the loss function, the activation and the metrics
# You need a compiled model to train because training uses the loss function and the optimizer

# optimizer - name of optimizer or optimizer instance: 'SGD', 'RMSProp', 'Adam'

# loss - name of objective function or objective function or `Loss` instance:
# Classification - binary_crossentropy, categorical_crossentropy
# Regression - mean_squared_error, mean_squared_error, mean_absolute_percentage_error

# metrics - list of metrics to be evaluated by the model during training and testing

model.compile(loss="categorical_crossentropy",
             optimizer="Adam",
             metrics=["accuracy", tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

# Defining optimizers with fixed learning_rate values

In [21]:
# Alternatively, we can separately define optimizers
# These are not default optimizers but custom optimizers
opt_SGD = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, name="SGD")

# rho is beta of RMSProp
# momentum and rho are applied in sequence one after other
# In Adam optimization, both are used in same formula

opt_RMSProp = tf.keras.optimizers.RMSprop(learning_rate=0.001, rho=0.9, momentum=0.9, epsilon=1e-07, name="RMSProp")

opt_Adam = tf.keras.optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-07, name="Adam")

# Defining Learning rate schedules -- going to user after defining

In [22]:
# We can specify learning rate schedules
# a) exponential decay schedule
# initial_learning_rate * decay_rate ^ (step / decay_steps)
# If staircase=True then, (step / decay_steps) will be integer. So, when step reaches decay_step then it will reduce first time.
# staircase=True - It is like a floor operation on (step/decay_steps)
# So, initially learning_rate = initial_learning_rate * decay_rate ^ 0 = initial_learning_rate
# Then, after some steps, when step > 100000, (step/decay_steps will become 1)
#  and then, learning_rate = initial_learning_rate * decay_rate ^ 1 = initial_learning_rate * 0.96
# Then, after few more steps, when step reaches 2*decay_steps,
#              learning_rate = initial_learning_rate * 0.96^2

# And if staircase=False, then, decay of learning_rate will increase at each step.
# Initially, step/decay_steps will remain near to zero and decay_rate^(~0)  will be almost 1.
#           And so, learning_rate will be just like initial_learning_rate
# Then, at each step, step/decay_steps will increase and decay_rate^(step/decay_steps) will get small small small..
#           And so, the learning_rate will decay gradually with increasing number of steps.


initial_learning_rate = 0.1
lr_schedule1 = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=100000,
    decay_rate=0.96,
    staircase=True
)

# b) Polynomial decay
# (initial_learning_rate - end_learning_rate) * (1 - (step/decay_steps))^(power) + end_learning_rate

starter_learning_rate = 0.1
end_learning_rate = 0.01
decay_steps = 10000
lr_schedule2 = tf.keras.optimizers.schedules.PolynomialDecay(
    initial_learning_rate=starter_learning_rate,
    end_learning_rate=end_learning_rate,
    decay_steps=decay_steps,
    power=0.5
)


# Inverse Time Decay
# initial_learning_rate / (1 + (decay_rate * (step / decay_steps)))

initial_learning_rate = 0.1
decay_steps = 1.0
decay_rate = 0.5
lr_schedule3 = tf.keras.optimizers.schedules.InverseTimeDecay(
    initial_learning_rate=initial_learning_rate,
    decay_rate=decay_rate,
    decay_steps=decay_steps
)

# Defining Optimizers with learning rate schedules instead of just learning_rate values

In [23]:
# We can separately define optimizers with learning rate schedule defined
opt_SGD1 = tf.keras.optimizers.SGD(learning_rate=lr_schedule1, momentum=0.9, name="SGD")
# rho is beta of RMSProp
opt_RMSProp1 = tf.keras.optimizers.RMSprop(learning_rate=lr_schedule2, rho=0.9, momentum=0.9, epsilon=1e-07, name="RMSProp")

opt_Adam1 = tf.keras.optimizers.Adam(learning_rate=lr_schedule3, beta_1=0.9, beta_2=0.999, epsilon=1e-07, name="Adam")

# Compile Model using defined customized optimizers

In [24]:
# Compile model with defined customized optimizers
model.compile(loss="categorical_crossentropy",
              optimizer=opt_SGD1, 
              metrics=["accuracy",
                       tf.keras.metrics.AUC(),
                       tf.keras.metrics.Precision(),
                       tf.keras.metrics.Recall()])

In [25]:
# Fit the model
# 1 epoch = 1 pass through the entire training set
# batch_size - for mini-batch gradient descent
# validation-split: Fraction of the training dat to be used as validation data
# The model will set apart this fraction of training data, will not train on it,
# and will evaluate the loss and any model metrics on this data at the end of each epoch.

# np.random.seed(0)  --???
keras_do_model = model.fit(X_train, y_train,
                          epochs=500, batch_size=20,
                          validation_split=0.1, verbose=False)

In [26]:
preds = model.predict(X_test)
print(preds[:5])

[[0.04712725 0.4627474  0.4901253 ]
 [0.04712725 0.4627474  0.4901253 ]
 [0.38510284 0.613284   0.00161313]
 [0.04712725 0.4627474  0.4901253 ]
 [0.3482165  0.64891756 0.00286594]]


In [27]:
# evaluate the model
# Returns the loss value & metrics values for the model in test mode
#     That is the evaluate() method is runs the model in test mode
scores_train = model.evaluate(X_train, y_train)
print(list(zip(model.metrics_names, scores_train)))
print("Training Metrics: \n", model.metrics_names[0], scores_train[0], "\n",
      model.metrics_names[1], scores_train[1], "\n",
      model.metrics_names[2], scores_train[2], "\n",
      model.metrics_names[3], scores_train[4], "\n",
      model.metrics_names[4], scores_train[4], "\n")

scores_test = model.evaluate(X_test, y_test)
print(list(zip(model.metrics_names, scores_test)))
print("Testing Metrics: \n", model.metrics_names[0], scores_test[0], "\n",
      model.metrics_names[1], scores_test[1], "\n",
      model.metrics_names[2], scores_test[2], "\n",
      model.metrics_names[3], scores_test[4], "\n",
      model.metrics_names[4], scores_test[4], "\n")

[('loss', 0.8448598555156163), ('accuracy', 0.3660714328289032), ('auc_1', 0.8257098197937012), ('precision_1', 0.7510448098182678), ('recall_1', 0.41825392842292786)]
Training Metrics: 
 loss 0.8448598555156163 
 accuracy 0.3660714328289032 
 auc_1 0.8257098197937012 
 precision_1 0.41825392842292786 
 recall_1 0.41825392842292786 

[('loss', 0.8536271139195091), ('accuracy', 0.2368421107530594), ('auc_1', 0.8255557417869568), ('precision_1', 0.7504399418830872), ('recall_1', 0.41774269938468933)]
Testing Metrics: 
 loss 0.8536271139195091 
 accuracy 0.2368421107530594 
 auc_1 0.8255557417869568 
 precision_1 0.41774269938468933 
 recall_1 0.41774269938468933 



In [28]:
model.summary()
# Note that the input layer is not displayed as part of model-layers, since it isn't a layer

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
HL1 (Dense)                  (None, 6)                 30        
_________________________________________________________________
dropout_1 (Dropout)          (None, 6)                 0         
_________________________________________________________________
HL2 (Dense)                  (None, 6)                 42        
_________________________________________________________________
dropout_2 (Dropout)          (None, 6)                 0         
_________________________________________________________________
OL (Dense)                   (None, 3)                 21        
Total params: 93
Trainable params: 93
Non-trainable params: 0
_________________________________________________________________


In [29]:
# Why is output shape = (None, 6) in above summary?? i.e. why None

In [30]:
from ann_visualizer.visualize import ann_viz
ann_viz(model, title="Shallow Neural Network", view=True)

# Regularization

In [31]:
# Regularization
# add to dense layer
# for l2 regularization: kernel_regularizer=tf.keras.regularizers.l2(default l=0.01)
# for l1 regularization: kernel_regularizer=tf.keras.regularizers.l1(default l=0.01)

In [32]:
model = Sequential()
model.add(Dense(6, input_dim=4, activation="sigmoid",
                kernel_regularizer=tf.keras.regularizers.l2(l=0.00001),
                name="HL1"))
model.add(Dropout(0.2))
model.add(Dense(6, activation="relu",
                kernel_regularizer=tf.keras.regularizers.l2(l=0.0001),
               name="HL2"))
model.add(Dropout(0.2))
model.add(Dense(3, activation="softmax", name="OL"))

In [33]:
# Regularizing using original default optimizer
#       (not the custom ones, just for showing the results of adding ragularization)
model.compile(loss="categorical_crossentropy",
              optimizer="Adam",
              metrics = ["accuracy", tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

In [34]:
keras_do_model = model.fit(X_train, y_train, epochs=500, batch_size=20, validation_split=0.1, verbose=False)

In [35]:
# evaluate the model
# Returns the loss value and metrics values for the model in test mode
#     That is the evaluate() method is runs the model in test mode
scores_train = model.evaluate(X_train, y_train)
print(list(zip(model.metrics_names, scores_train)))
print("Training Metrics: \n", model.metrics_names[0], scores_train[0], "\n",
      model.metrics_names[1], scores_train[1], "\n",
      model.metrics_names[2], scores_train[2], "\n",
      model.metrics_names[3], scores_train[4], "\n",
      model.metrics_names[4], scores_train[4], "\n")

scores_test = model.evaluate(X_test, y_test)
print(list(zip(model.metrics_names, scores_test)))
print("Testing Metrics: \n", model.metrics_names[0], scores_test[0], "\n",
      model.metrics_names[1], scores_test[1], "\n",
      model.metrics_names[2], scores_test[2], "\n",
      model.metrics_names[3], scores_test[4], "\n",
      model.metrics_names[4], scores_test[4], "\n")

[('loss', 0.47673127480915617), ('accuracy', 0.6964285969734192), ('auc_2', 0.800546407699585), ('precision_2', 0.7095050811767578), ('recall_2', 0.34869182109832764)]
Training Metrics: 
 loss 0.47673127480915617 
 accuracy 0.6964285969734192 
 auc_2 0.800546407699585 
 precision_2 0.34869182109832764 
 recall_2 0.34869182109832764 

[('loss', 0.5303967344133478), ('accuracy', 0.5789473652839661), ('auc_2', 0.8007513284683228), ('precision_2', 0.7093014717102051), ('recall_2', 0.34904801845550537)]
Testing Metrics: 
 loss 0.5303967344133478 
 accuracy 0.5789473652839661 
 auc_2 0.8007513284683228 
 precision_2 0.34904801845550537 
 recall_2 0.34904801845550537 



In [36]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
HL1 (Dense)                  (None, 6)                 30        
_________________________________________________________________
dropout_3 (Dropout)          (None, 6)                 0         
_________________________________________________________________
HL2 (Dense)                  (None, 6)                 42        
_________________________________________________________________
dropout_4 (Dropout)          (None, 6)                 0         
_________________________________________________________________
OL (Dense)                   (None, 3)                 21        
Total params: 93
Trainable params: 93
Non-trainable params: 0
_________________________________________________________________


In [37]:
ann_viz(model, title="Shallow Neural Network", view=True)

# Batch normalization

In [38]:
# Takes lesser time

# We do not want the hidden units to have mean=0 and variance=1 always.
# We may want it to have some other distribution values of mean and variance.
# It is not necessary that our distribution is normal only. Sometimes we need to learn the type of distribution.
# In batch-normalization, we normalize the outputs of each layer
# and then, learn the distribution: z_learned = gamma * z_norm + beta  where gamma and beta are learnable parameters of the model.
# So, in back-propagation, gamma and beta are also updated along with weights and biases
# This way we learn the distribution by learning gamma and beta in back-prop.
# So, alpha is applicable on gamma and beta as well.


# Co-Variate Shift:
    # Change in data distribution for X and y is known as covariate shift
    # Mean and Standard devaition are the non-learnable parameters here...
    # Since, with each back-prop, distribution itself is getting learned which means the distribution changes after every back-prop.
    # So, it effects the learning of weights and biases also.... in some way
    # So, batch-norm actually tries to keep mean and standard deviation minimal effected from the effect of changing distribution.
    # So, that it won't effect the rest of learning in some way(which is not clear to me).
    # More description in pdf reagrding mean and standard deviation.

# This algorithm works with Momentum, RMSProp and Adam optimizations

In [39]:
from keras.layers.normalization import BatchNormalization

In [40]:
# Trainable = True: The variables will be marked as trainable
# Sample DEFAULT BATCH-NORMALIZATION settings. Just for referenec. Not Used anywhere.
# batch_norm = tf.keras.layers.BatchNormalization(
#     axis=1,
#     momentum=0.9,
#     epsilon=1e-07,
#     center=True,
#     scale=True,
#     beta_initializer="zeros",
#     gamma_initializer="ones",
#     moving_mean_initializer="zeros",
#     moving_variance_initializer="ones",
#     trainable=True
# )

In [46]:
model = Sequential()
model.add(Dense(6, input_dim=4, activation="sigmoid",
                kernel_regularizer=tf.keras.regularizers.l2(l=0.01), name="HL1"))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(6, activation="relu",
               kernel_regularizer=tf.keras.regularizers.l2(l=0.01), name="HL2"))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(3, activation="softmax", name="OL"))

In [48]:
model.compile(loss="categorical_crossentropy",
             optimizer="Adam",
             metrics=["accuracy", tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

In [51]:
keras_do_bn_reg_model = model.fit(X_train, y_train, epochs=500,
                                  batch_size=20, validation_split=0.1, verbose=True)

Train on 100 samples, validate on 12 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500


Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500


Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78/500
Epoch 79/500
Epoch 80/500
Epoch 81/500
Epoch 82/500
Epoch 83/500
Epoch 84/500
Epoch 85/500
Epoch 86/500
Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 91/500


Epoch 92/500
Epoch 93/500
Epoch 94/500
Epoch 95/500
Epoch 96/500
Epoch 97/500
Epoch 98/500
Epoch 99/500
Epoch 100/500
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500
Epoch 111/500
Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500
Epoch 116/500
Epoch 117/500
Epoch 118/500
Epoch 119/500
Epoch 120/500
Epoch 121/500


Epoch 122/500
Epoch 123/500
Epoch 124/500
Epoch 125/500
Epoch 126/500
Epoch 127/500
Epoch 128/500
Epoch 129/500
Epoch 130/500
Epoch 131/500
Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 151/500


Epoch 152/500
Epoch 153/500
Epoch 154/500
Epoch 155/500
Epoch 156/500
Epoch 157/500
Epoch 158/500
Epoch 159/500
Epoch 160/500
Epoch 161/500
Epoch 162/500
Epoch 163/500
Epoch 164/500
Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500


Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500


Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500
Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500
Epoch 219/500
Epoch 220/500
Epoch 221/500
Epoch 222/500
Epoch 223/500
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 227/500
Epoch 228/500
Epoch 229/500
Epoch 230/500
Epoch 231/500
Epoch 232/500
Epoch 233/500
Epoch 234/500
Epoch 235/500
Epoch 236/500
Epoch 237/500
Epoch 238/500
Epoch 239/500
Epoch 240/500
Epoch 241/500


Epoch 242/500
Epoch 243/500
Epoch 244/500
Epoch 245/500
Epoch 246/500
Epoch 247/500
Epoch 248/500
Epoch 249/500
Epoch 250/500
Epoch 251/500
Epoch 252/500
Epoch 253/500
Epoch 254/500
Epoch 255/500
Epoch 256/500
Epoch 257/500
Epoch 258/500
Epoch 259/500
Epoch 260/500
Epoch 261/500
Epoch 262/500
Epoch 263/500
Epoch 264/500
Epoch 265/500
Epoch 266/500
Epoch 267/500
Epoch 268/500
Epoch 269/500
Epoch 270/500


Epoch 271/500
Epoch 272/500
Epoch 273/500
Epoch 274/500
Epoch 275/500
Epoch 276/500
Epoch 277/500
Epoch 278/500
Epoch 279/500
Epoch 280/500
Epoch 281/500
Epoch 282/500
Epoch 283/500
Epoch 284/500
Epoch 285/500
Epoch 286/500
Epoch 287/500
Epoch 288/500
Epoch 289/500
Epoch 290/500
Epoch 291/500
Epoch 292/500
Epoch 293/500
Epoch 294/500
Epoch 295/500
Epoch 296/500
Epoch 297/500
Epoch 298/500
Epoch 299/500
Epoch 300/500
Epoch 301/500


Epoch 302/500
Epoch 303/500
Epoch 304/500
Epoch 305/500
Epoch 306/500
Epoch 307/500
Epoch 308/500
Epoch 309/500
Epoch 310/500
Epoch 311/500
Epoch 312/500
Epoch 313/500
Epoch 314/500
Epoch 315/500
Epoch 316/500
Epoch 317/500
Epoch 318/500
Epoch 319/500
Epoch 320/500
Epoch 321/500
Epoch 322/500
Epoch 323/500
Epoch 324/500
Epoch 325/500
Epoch 326/500
Epoch 327/500
Epoch 328/500
Epoch 329/500
Epoch 330/500
Epoch 331/500


Epoch 332/500
Epoch 333/500
Epoch 334/500
Epoch 335/500
Epoch 336/500
Epoch 337/500
Epoch 338/500
Epoch 339/500
Epoch 340/500
Epoch 341/500
Epoch 342/500
Epoch 343/500
Epoch 344/500
Epoch 345/500
Epoch 346/500
Epoch 347/500
Epoch 348/500
Epoch 349/500
Epoch 350/500
Epoch 351/500
Epoch 352/500
Epoch 353/500
Epoch 354/500
Epoch 355/500
Epoch 356/500
Epoch 357/500
Epoch 358/500
Epoch 359/500
Epoch 360/500
Epoch 361/500


Epoch 362/500
Epoch 363/500
Epoch 364/500
Epoch 365/500
Epoch 366/500
Epoch 367/500
Epoch 368/500
Epoch 369/500
Epoch 370/500
Epoch 371/500
Epoch 372/500
Epoch 373/500
Epoch 374/500
Epoch 375/500
Epoch 376/500
Epoch 377/500
Epoch 378/500
Epoch 379/500
Epoch 380/500
Epoch 381/500
Epoch 382/500
Epoch 383/500
Epoch 384/500
Epoch 385/500
Epoch 386/500
Epoch 387/500
Epoch 388/500
Epoch 389/500
Epoch 390/500
Epoch 391/500


Epoch 392/500
Epoch 393/500
Epoch 394/500
Epoch 395/500
Epoch 396/500
Epoch 397/500
Epoch 398/500
Epoch 399/500
Epoch 400/500
Epoch 401/500
Epoch 402/500
Epoch 403/500
Epoch 404/500
Epoch 405/500
Epoch 406/500
Epoch 407/500
Epoch 408/500
Epoch 409/500
Epoch 410/500
Epoch 411/500
Epoch 412/500
Epoch 413/500
Epoch 414/500
Epoch 415/500
Epoch 416/500
Epoch 417/500
Epoch 418/500
Epoch 419/500
Epoch 420/500
Epoch 421/500


Epoch 422/500
Epoch 423/500
Epoch 424/500
Epoch 425/500
Epoch 426/500
Epoch 427/500
Epoch 428/500
Epoch 429/500
Epoch 430/500
Epoch 431/500
Epoch 432/500
Epoch 433/500
Epoch 434/500
Epoch 435/500
Epoch 436/500
Epoch 437/500
Epoch 438/500
Epoch 439/500
Epoch 440/500
Epoch 441/500
Epoch 442/500
Epoch 443/500
Epoch 444/500
Epoch 445/500
Epoch 446/500
Epoch 447/500
Epoch 448/500
Epoch 449/500
Epoch 450/500
Epoch 451/500


Epoch 452/500
Epoch 453/500
Epoch 454/500
Epoch 455/500
Epoch 456/500
Epoch 457/500
Epoch 458/500
Epoch 459/500
Epoch 460/500
Epoch 461/500
Epoch 462/500
Epoch 463/500
Epoch 464/500
Epoch 465/500
Epoch 466/500
Epoch 467/500
Epoch 468/500
Epoch 469/500
Epoch 470/500
Epoch 471/500
Epoch 472/500
Epoch 473/500
Epoch 474/500
Epoch 475/500
Epoch 476/500
Epoch 477/500
Epoch 478/500
Epoch 479/500
Epoch 480/500
Epoch 481/500


Epoch 482/500
Epoch 483/500
Epoch 484/500
Epoch 485/500
Epoch 486/500
Epoch 487/500
Epoch 488/500
Epoch 489/500
Epoch 490/500
Epoch 491/500
Epoch 492/500
Epoch 493/500
Epoch 494/500
Epoch 495/500
Epoch 496/500
Epoch 497/500
Epoch 498/500
Epoch 499/500
Epoch 500/500


In [52]:
# evaluate the model
# Returns the loss value and the metrics values for the model in test mode
#     That is the evaluate() method is runs the model in test mode
scores_train = model.evaluate(X_train, y_train)
scores_train = model.evaluate(X_train, y_train)
print(list(zip(model.metrics_names, scores_train)))
print("Training Metrics: \n", model.metrics_names[0], scores_train[0], "\n",
      model.metrics_names[1], scores_train[1], "\n",
      model.metrics_names[2], scores_train[2], "\n",
      model.metrics_names[3], scores_train[4], "\n",
      model.metrics_names[4], scores_train[4], "\n")

scores_test = model.evaluate(X_test, y_test)
print(list(zip(model.metrics_names, scores_test)))
print("Testing Metrics: \n", model.metrics_names[0], scores_test[0], "\n",
      model.metrics_names[1], scores_test[1], "\n",
      model.metrics_names[2], scores_test[2], "\n",
      model.metrics_names[3], scores_test[4], "\n",
      model.metrics_names[4], scores_test[4], "\n")

[('loss', 0.09553925480161395), ('accuracy', 0.9732142686843872), ('auc_3', 0.9728368520736694), ('precision_3', 0.878604531288147), ('recall_3', 0.8427282571792603)]
Training Metrics: 
 loss 0.09553925480161395 
 accuracy 0.9732142686843872 
 auc_3 0.9728368520736694 
 precision_3 0.8427282571792603 
 recall_3 0.8427282571792603 

[('loss', 0.177237973401421), ('accuracy', 0.9473684430122375), ('auc_3', 0.9728591442108154), ('precision_3', 0.8786613941192627), ('recall_3', 0.8428055047988892)]
Testing Metrics: 
 loss 0.177237973401421 
 accuracy 0.9473684430122375 
 auc_3 0.9728591442108154 
 precision_3 0.8428055047988892 
 recall_3 0.8428055047988892 



In [54]:
model.summary()
# In Batch Normalization layer 12 parameters are trainable
# and 12 are not trainable. Trainable parameters are gamma and beta
# Non-trainable parameters are running weighted mean and standard deviation

# First Batch-norm layer
# So, 6 neurons 4 params(moving_mean, moving_variance, gamma, beta)  ==> 2 non-trainable per neuron  ==> 12 non-trainable params

# Second Batch-norm layer
# So, 6 neurons 4 params(moving_mean, moving_variance, gamma, beta)  ==> 2 non-trainable per neuron  ==> 12 non-trainable params

# So, total non-trainable: 12+12 = 24

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
HL1 (Dense)                  (None, 6)                 30        
_________________________________________________________________
batch_normalization_5 (Batch (None, 6)                 24        
_________________________________________________________________
dropout_9 (Dropout)          (None, 6)                 0         
_________________________________________________________________
HL2 (Dense)                  (None, 6)                 42        
_________________________________________________________________
batch_normalization_6 (Batch (None, 6)                 24        
_________________________________________________________________
dropout_10 (Dropout)         (None, 6)                 0         
_________________________________________________________________
OL (Dense)                   (None, 3)                

In [57]:
# ann_viz(model, title="Shallow Neural Network", view=True) # doesnot gives propee output.. gives error