In [None]:
!pip install visualkeras
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import visualkeras
from PIL import ImageFont


### **Do not forget to connect to GPU runtime before training**

In [None]:
#importing the dataset
(X_train,Y_train),(X_test,Y_test) = tf.keras.datasets.cifar10.load_data()
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(X_train,Y_train,test_size=0.3)

In [None]:
X_train.shape

In [None]:
# Normalizing the values between -1 and 1

X_train  = X_train/255
X_test = X_test/255

In [None]:
# Create an ImageDataGenerator object with given augmentation settings(just an instance)

datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=20,      # Random rotation within the range of [-20, 20] degrees
    width_shift_range=0.1,  # Random horizontal shift within the range of [-0.1, 0.1] of the total width
    height_shift_range=0.1, # Random vertical shift within the range of [-0.1, 0.1] of the total height
    shear_range=0.2,        # Random shearing transformations within the range of [-0.2, 0.2]
    zoom_range=0.2,         # Random zoom within the range of [0.8, 1.2]
    horizontal_flip=True,   # Randomly flip inputs horizontally
    fill_mode='nearest' ,    # Fill any newly created pixels with the nearest available pixel value
    validation_split=0.2  # Split 20% of the data for validation
)

# Apply data augmentation to the training data
augmented_images = datagen.flow(X_train, Y_train)

# creating the validation data
validation_data = datagen.flow(X_train, Y_train, subset='validation')

## **`This is an Open assignment with minimum instructions`**
You are allowed to search all over the web--> find any articles or implement them--> try your experiments

> **---> create the model**\
**---> tune the hyperparameters like learning_rate, filter/kernel size**\
**---> optimize the result**


In [None]:
"""
You have got some experience form last assignment '
Use that experience this time
"""

In [None]:
#@title **IMPORTANT ANNOUNCEMENT**
"""
Now with this much freedom, you can do anything
So make sure you understand what you do and after the end of this assignment
 you will have explain all the code you tried in a viva exam
 this will be the mid term evaluation.
"""

## **YOUR EFFORTS WILL COUNT MORE THE RESULTS YOU GET**
> **So make sure all the time you spent on this notebook should be visible from the notebook**

In [None]:
"""
Question: What is Batch Normalization? Why is it used for? How does it fix the
problem it is used for? [ Answer in atleast 300 words]
<cite your sources>

Answer:



"""

Batch Normalization is an algorithmic method which increases the stability and training of neural networks faster.

Using this, model converges to optimal solution at lesser epochs value then before.

Basically, In this output/activation of each mini-batch are normalized (mean set to 0 and variance to 1).

Uses of Batch normalization: It allows the use of higher learning rates during training. By which network converges more quickly.
It reduces the dependency of the network on the initial parameter values and hyperparameters.

By ensuring that the inputs to each layer have a standardized distribution, Batch Normalization stabilizes the gradient flow during backpropagation.

Helps in reducing overfitting, also improve models generalizing capability.
Fixing problem:

Batch norm is applied with mini-batch gradient descent. It is applied layer by layer and also optional for each layer inputs. Let us see how it works: Suppose a mini-batch B of size m of training dataset. The empirical mean and variance for B can be denoted as to denote a mini-batch of size m of the entire training set. The empirical mean(μ) and variance(σ^2) of B could thus be denoted as μ = (1 / m) * Σ(xi)

σ^2 = (1 / m) * Σ((xi - μ)^2) where xi are the activations for a particular layer of model.

For a layer of the network with d-dimensional input, xi=(xi(1),...,x(d)), each dimension is then normalized (i.e. re-centered and re-scaled)separately,

xi(k)normalized= (xi(k)-μ(k)/(σ(k)^2 + ε )^.5

where k∈[1,d] and i∈[1,m] ;μ(k) and σ(k) are the mean and standard deviation respectively for each dimension.
Here, ε is a small constant, typically it is to ensure that the denominator is non-zero and to avoid numerical complications, in case of zero variance.. After normalization, activations are scaled and shifted using learnable parameters γ (gamma) and β (beta). It allow the model to control and adjust the normalized values. The scaling and shifting operations enable the network to learn the optimal representation for each layer.

Shifted activations = γ * xi(k) + β

Also by adjusting the values of γ and β during training process, network leans to capture and represent the data more effectively.

https://en.wikipedia.org/wiki/Batch_normalization

https://paperswithcode.com/method/batch-normalization

In [None]:
"""
Tutorial: https://www.tensorflow.org/tutorials/images/classification

Above tutorial does exactly the same job
But I will zero marks for exact same model used in the tutorial

You need experiment with different layers and all those
experiments should be visible by your notebooks
"""


In [None]:
# model initialization
model = tf.keras.Sequential()

# Intermediate layers
model.add(tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(32, 32, 3)))
model.add(tf.keras.layers.MaxPooling2D((2, 2)))
model.add(tf.keras.layers.Conv2D(16, (3,3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D((2, 2)))
model.add(tf.keras.layers.Conv2D(16, (3,3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D((2, 2)))
model.add(tf.keras.layers.Flatten())

model.add(tf.keras.layers.Dense(64, activation='relu'))






# final layer
model.add(tf.keras.layers.Dense(10, activation='softmax'))
model.build(input_shape=(X_train.shape))

In [None]:
model.summary()

In [None]:
#@title Visualization

# just run this cell as it is
tf.keras.utils.plot_model(model, to_file='cnn_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
# just run this cell as it is
visualkeras.layered_view(model, legend=True)

In [None]:
"""
In the last part of the assignment
try experimenting with learning rate.
May be decreasing the lr might had help?
"""

model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001),
                  loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                  metrics=['accuracy'])

In [None]:
# This is another way of dealing with the generated data
# both X_train and Y_train are inside the augmented image

history = model.fit(augmented_images, epochs=6,validation_data = validation_data)

In [None]:
plt.plot(history.history['accuracy'],label="Train accuracy")
plt.plot(history.history['val_accuracy'], label = "Validation accuracy")
plt.legend()

In [None]:
model.evaluate(X_test,Y_test)

## **TRY DIFFERENT MODELS AND COMPARE THE RESULTS**

In [None]:
#with changed kernel size (7,7)
model2 = tf.keras.Sequential()
#addition of dropoutlayer
# Intermediate layers
model2.add(tf.keras.layers.Conv2D(32, (7,7), activation='relu', input_shape=(32, 32, 3)))
model2.add(tf.keras.layers.MaxPooling2D((2, 2)))
model2.add(tf.keras.layers.Conv2D(16, (3,3), activation='relu'))
model2.add(tf.keras.layers.MaxPooling2D((2, 2)))

model2.add(tf.keras.layers.Flatten())

model2.add(tf.keras.layers.Dense(128, activation='relu'))
model2.add(tf.keras.layers.Dropout(0.05))






# final layer
model2.add(tf.keras.layers.Dense(10, activation='softmax'))
model2.build(input_shape=(X_train.shape))

In [None]:
model2.summary()

In [None]:
tf.keras.utils.plot_model(model2, to_file='cnn_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
model2.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001),
                  loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                  metrics=['accuracy'])

In [222]:
history = model2.fit(augmented_images, epochs=5,validation_data = validation_data)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
plt.plot(history.history['accuracy'],label="Train accuracy")
plt.plot(history.history['val_accuracy'], label = "Validation accuracy")
plt.legend()

In [None]:
model2.evaluate(X_test,Y_test)

In [None]:
#with changed kernel size (3,3)

model3 = tf.keras.Sequential()

# Intermediate layers
model3.add(tf.keras.layers.Conv2D(64, (3,3), activation='relu', input_shape=(32, 32, 3)))
model3.add(tf.keras.layers.MaxPooling2D((2, 2)))
model3.add(tf.keras.layers.Conv2D(32, (3,3), activation='relu'))
model3.add(tf.keras.layers.MaxPooling2D((2, 2)))
model3.add(tf.keras.layers.Conv2D(12, (3,3), activation='relu'))
model3.add(tf.keras.layers.MaxPooling2D((2, 2)))


model3.add(tf.keras.layers.Flatten())

model3.add(tf.keras.layers.Dense(128, activation='relu'))






# final layer
model3.add(tf.keras.layers.Dense(10, activation='softmax'))
model3.build(input_shape=(X_train.shape))

In [None]:
model3.summary()

In [None]:
tf.keras.utils.plot_model(model3, to_file='cnn_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
model3.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=0.00003),
                  loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                  metrics=['accuracy'])

In [None]:
history = model3.fit(augmented_images, epochs=5,validation_data = validation_data)

In [None]:
plt.plot(history.history['accuracy'],label="Train accuracy")
plt.plot(history.history['val_accuracy'], label = "Validation accuracy")
plt.legend()

In [None]:
model3.evaluate(X_test,Y_test)

In [None]:
#adding dense layer
model4 = tf.keras.Sequential()

# Intermediate layers
model4.add(tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(32, 32, 3)))
model4.add(tf.keras.layers.MaxPooling2D((2, 2)))
#model4.add(tf.keras.layers.Conv2D(16, (3,3), activation='relu'))
#model4.add(tf.keras.layers.MaxPooling2D((2, 2)))
#model4.add(tf.keras.layers.Conv2D(16, (3,3), activation='relu'))
#model4.add(tf.keras.layers.MaxPooling2D((2, 2)))
model4.add(tf.keras.layers.Flatten())
model4.add(tf.keras.layers.Dense(128, activation='relu'))
model4.add(tf.keras.layers.Dense(64, activation='relu'))

model4.add(tf.keras.layers.Dense(32, activation='relu'))






# final layer
model4.add(tf.keras.layers.Dense(10, activation='softmax'))
model4.build(input_shape=(X_train.shape))

In [None]:
model4.summary()

In [None]:
tf.keras.utils.plot_model(model4, to_file='cnn_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
model4.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=0.00003),
                  loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                  metrics=['accuracy'])

In [None]:
history = model4.fit(augmented_images, epochs=5,validation_data = validation_data)

In [None]:
plt.plot(history.history['accuracy'],label="Train accuracy")
plt.plot(history.history['val_accuracy'], label = "Validation accuracy")
plt.legend()

In [None]:
model4.evaluate(X_test,Y_test)