# **Step 1:** Import libraries

In [1]:
# Import libraries

import numpy as np
import pandas as pd
import matplotlib.cm as cm
import matplotlib.pyplot as plt

import plotly.express as px
import plotly.graph_objs as go

from tensorflow import keras
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import SGD

#import warnings
#warnings.filterwarnings('ignore')

In [2]:
pip install -U kaleido

Collecting kaleido
  Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl (79.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.9/79.9 MB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: kaleido
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
lida 0.0.10 requires fastapi, which is not installed.
lida 0.0.10 requires python-multipart, which is not installed.
lida 0.0.10 requires uvicorn, which is not installed.[0m[31m
[0mSuccessfully installed kaleido-0.2.1


# **Step2:** Take XOR input data and store in one variable (Input data), Store output data of XOR in another variable (Target data)

In [3]:
X = np.array([0,0,1,1]).reshape(4, 1)
Y = np.array([0,1,0,1]).reshape(4, 1)

In [4]:
input_data = np.hstack((X, Y))
input_data

array([[0, 0],
       [0, 1],
       [1, 0],
       [1, 1]])

In [5]:
target_data = np.array([0,1,1,0])

# **Step3:** Create the model.

**Define sequential model.**

In [6]:
model = Sequential()

**Add first layer in the model**

In [7]:
model.add(Dense(8, input_shape=(2,), activation='relu'))

**Add the second layer in the model**

In [8]:
model.add(Dense(1, activation='sigmoid'))

**Keep learning rate = 0.1**

In [9]:
LEARNING_RATE = 0.1

**Use SGD as an optimizer with given learning rate.**

In [10]:
sgd = SGD(learning_rate = LEARNING_RATE)

# **Step4:** Compile the model with the defined optimizer in the previous step with MSE as the loss term.

In [11]:
model.compile(loss='mean_squared_error', optimizer = sgd)

# **TASK 1**: Convergence speed for the default case

---



---




# **Step5:** Now, we need to record the learning rates so that we can capture the number of epochs at model converging.

In [12]:
TAREGT_LOSS = 0.002

In [13]:
# Custom Callback

learning_rates = []
class LearningRateCallback(keras.callbacks.Callback):
    '''Appends learning rate every epoch into a list'''

    def on_epoch_begin(self, epoch, logs=None):
        lr = self.model.optimizer.lr.numpy()
        learning_rates.append(lr)


# Step6: Train the model and monitor the convergence and learning rates.

In [14]:
epochs = 0
mse_losses = []

while True:
    # Train the model for 10 epochs
    history = model.fit(input_data, target_data, epochs = 10, verbose = 1, callbacks=[LearningRateCallback()])

    epochs += 10
    training_loss_values = history.history['loss']    # Training loss

    mse_losses.extend(training_loss_values)
    previous_loss = training_loss_values[-1]

    # Check convergence
    if previous_loss <= TAREGT_LOSS:
        print(f'Model converged after {epochs} epochs.')
        break


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epo

In [17]:
print(f'Model converged after {epochs} epochs.')

Model converged after 3020 epochs.


# **Step7:** Plot the SSE (Sum of squared error) vs. Number of epochs.

In [15]:
x_axis_str = 'Epochs'
y_axis_str = 'Mean Squared Error (MSE)'

fig = px.line(x = range(epochs), y = mse_losses,
              width = 700,
              height = 500,
              title = f'No. of epochs to Convergence: {epochs}',
              labels = {'x' : x_axis_str, 'y' : y_axis_str})
fig.show()
fig.write_image('No. of epochs to Convergence - Default case.png')

# **Step8:** Plot the graph of learning rate vs Number of epochs.

In [16]:
x_axis_str = 'Epochs'
y_axis_str = 'Learning Rate'

fig = px.line(x = range(epochs), y = learning_rates,
              width = 700,
              height = 500,
              title = 'Learning Rates V/s Epochs',
              labels = {'x' : x_axis_str, 'y' : y_axis_str})
fig.show()
fig.write_image('Learning Rates Vs Epochs - Default case.png')

# **TASK 2:** Understanding the effect of momentum on convergence speed

---



---



# **Step9:** Accelerate learning by incorporating momentum based learning rate.

In [20]:
MOMENTUM = 0.9


model2 = Sequential()
model2.add(Dense(8, input_shape=(2,), activation='relu'))
model2.add(Dense(1, activation='sigmoid'))

sgd = SGD(learning_rate = LEARNING_RATE, momentum = MOMENTUM)
model2.compile(loss='mean_squared_error', optimizer = sgd)

epochs = 0
mse_losses = []
learning_rates = []

while True:
    # Train the model for 10 epochs
    history = model2.fit(input_data, target_data, epochs = 10, verbose = 1, callbacks=[LearningRateCallback()])

    epochs += 10
    training_loss_values = history.history['loss']    # Training loss

    mse_losses.extend(training_loss_values)
    previous_loss = training_loss_values[-1]

    # Check convergence
    if previous_loss <= TAREGT_LOSS:
        break


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
E

In [22]:
print(f'Model converged after {epochs} epochs.')

Model converged after 260 epochs.


In [21]:
x_axis_str = 'Epochs'
y_axis_str = 'Mean Squared Error (MSE)'

fig = px.line(x = range(epochs), y = mse_losses,
              width = 700,
              height = 500,
              title = f'No. of epochs to Convergence: {epochs}',
              labels = {'x' : x_axis_str, 'y' : y_axis_str})
fig.show()
fig.write_image('No. of epochs to Convergence - With Momentum.png')

In [23]:
x_axis_str = 'Epochs'
y_axis_str = 'Learning Rate'

fig = px.line(x = range(epochs), y = learning_rates,
              width = 700,
              height = 500,
              title = 'Learning Rates V/s Epochs',
              labels = {'x' : x_axis_str, 'y' : y_axis_str})
fig.show()
fig.write_image('Learning Rates Vs Epochs - With Momentum.png')

# **TASK 3:** Understanding the effect of adaptive learning rate on convergence speed

---



---



# **Step10:** Use adaptive learning rate.

In [24]:
from keras.optimizers import Adam

model3 = Sequential()
model3.add(Dense(8, input_shape=(2,), activation='relu'))
model3.add(Dense(1, activation='sigmoid'))

adam_optimizer = Adam(learning_rate = LEARNING_RATE)
model3.compile(loss='mean_squared_error', optimizer = adam_optimizer)

epochs = 0
mse_losses = []
learning_rates = []

while True:
    # Train the model for 10 epochs
    history = model3.fit(input_data, target_data, epochs = 10, verbose = 1, callbacks=[LearningRateCallback()])

    epochs += 10
    training_loss_values = history.history['loss']    # Training loss

    mse_losses.extend(training_loss_values)
    previous_loss = training_loss_values[-1]

    # Check convergence
    if previous_loss <= TAREGT_LOSS:
        break


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [25]:
print(f'Model converged after {epochs} epochs.')

Model converged after 50 epochs.


In [26]:
x_axis_str = 'Epochs'
y_axis_str = 'Mean Squared Error (MSE)'

fig = px.line(x = range(epochs), y = mse_losses,
              width = 700,
              height = 500,
              title = f'No. of epochs to Convergence: {epochs}',
              labels = {'x' : x_axis_str, 'y' : y_axis_str})
fig.show()
fig.write_image('No. of epochs to Convergence - Adam Optimizer.png')

In [27]:
x_axis_str = 'Epochs'
y_axis_str = 'Learning Rate'

fig = px.line(x = range(epochs), y = learning_rates,
              width = 700,
              height = 500,
              title = 'Learning Rates V/s Epochs',
              labels = {'x' : x_axis_str, 'y' : y_axis_str})
fig.show()
fig.write_image('Learning Rates Vs Epochs - Adam Optimizer.png')

# **TASK 4:** Using both momentum and adaptive learning rates for faster convergence

---



---



# **Step 11:** Now use both Momentum and adaptive based learning rate.

In [31]:
MOMENTUM = 0.9


model4 = Sequential()
model4.add(Dense(8, input_shape=(2,), activation='relu'))
model4.add(Dense(1, activation='sigmoid'))

adam_optimizer = Adam(learning_rate = LEARNING_RATE, beta_1 = MOMENTUM)
model4.compile(loss='mean_squared_error', optimizer = adam_optimizer)

epochs = 0
mse_losses = []
learning_rates = []

while True:
    # Train the model for 10 epochs
    history = model4.fit(input_data, target_data, epochs = 10, verbose = 1, callbacks=[LearningRateCallback()])

    epochs += 10
    training_loss_values = history.history['loss']    # Training loss

    mse_losses.extend(training_loss_values)
    previous_loss = training_loss_values[-1]

    # Check convergence
    if previous_loss <= TAREGT_LOSS:
        break


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [32]:
print(f'Model converged after {epochs} epochs.')

Model converged after 50 epochs.


In [33]:
x_axis_str = 'Epochs'
y_axis_str = 'Mean Squared Error (MSE)'

fig = px.line(x = range(epochs), y = mse_losses,
              width = 700,
              height = 500,
              title = f'No. of epochs to Convergence: {epochs}',
              labels = {'x' : x_axis_str, 'y' : y_axis_str})
fig.show()
fig.write_image('No. of epochs to Convergence - Adam Optimizer with Momentum.png')

In [34]:
x_axis_str = 'Epochs'
y_axis_str = 'Learning Rate'

fig = px.line(x = range(epochs), y = learning_rates,
              width = 700,
              height = 500,
              title = 'Learning Rates V/s Epochs',
              labels = {'x' : x_axis_str, 'y' : y_axis_str})
fig.show()
fig.write_image('Learning Rates Vs Epochs - Adam Optimizer with Momentum.png')