### **Experiment-3**
### Apply any of the following learning algorithms to learn the parameters of the supervised single layer feed forward neural network.

### o   Stochastic Gradient Descent

### o   Mini Batch Gradient Descent

### o   Momentum GD

### o   Nestorev GD

### o   Adagrad GD

### o   Adam Learning GD

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD, Adam , Adagrad
from tensorflow.keras.datasets import mnist
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [None]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Preprocess data
# Flatten images to vectors of size 784 (28x28)
X_train = X_train.reshape(-1, 28*28).astype('float32') / 255
X_test = X_test.reshape(-1, 28*28).astype('float32') / 255

# One-hot encode the labels
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


## Stochastic Gradient Descent (SGD)

In [None]:
model = Sequential()
model.add(Dense(units=64, activation='relu', input_shape=(28*28,)))
model.add(Dense(units=10, activation='softmax'))
model.compile(optimizer=SGD(), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(X_train, y_train, epochs=11 , validation_split=0.2, verbose=1)
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)

Epoch 1/11
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.6790 - loss: 1.1827 - val_accuracy: 0.8978 - val_loss: 0.3910
Epoch 2/11
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8911 - loss: 0.3965 - val_accuracy: 0.9153 - val_loss: 0.3164
Epoch 3/11
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.9068 - loss: 0.3303 - val_accuracy: 0.9220 - val_loss: 0.2845
Epoch 4/11
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9181 - loss: 0.2913 - val_accuracy: 0.9244 - val_loss: 0.2672
Epoch 5/11
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9247 - loss: 0.2701 - val_accuracy: 0.9316 - val_loss: 0.2468
Epoch 6/11
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9279 - loss: 0.2528 - val_accuracy: 0.9358 - val_loss: 0.2331
Epoch 7/11
[1m1

In [None]:
print(f"Stochastic gradient descent optimizer accuracy: {accuracy:.4f}")

Stochastic gradient descent optimizer accuracy: 0.9472


## Momentum based Stochastic Gradient Descent

In [None]:
model = Sequential()
model.add(Dense(units=64, activation='relu', input_shape=(28*28,)))
model.add(Dense(units=10, activation='softmax'))
model.compile(optimizer=SGD(momentum =0.9), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(X_train, y_train, epochs=11 , validation_split=0.2, verbose=1)
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)

Epoch 1/11
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.8283 - loss: 0.5692 - val_accuracy: 0.9396 - val_loss: 0.2094
Epoch 2/11
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9470 - loss: 0.1836 - val_accuracy: 0.9557 - val_loss: 0.1547
Epoch 3/11
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.9618 - loss: 0.1333 - val_accuracy: 0.9645 - val_loss: 0.1255
Epoch 4/11
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9696 - loss: 0.1043 - val_accuracy: 0.9660 - val_loss: 0.1163
Epoch 5/11
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9750 - loss: 0.0842 - val_accuracy: 0.9657 - val_loss: 0.1127
Epoch 6/11
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9777 - loss: 0.0770 - val_accuracy: 0.9692 - val_loss: 0.1077
Epoch 7/11
[1m1

In [None]:
print(f"Momentum based Gradient Descent optimizer accuracy: {accuracy:.4f}")

Momentum based Gradient Descent optimizer accuracy: 0.9736


## AdaGrad(Adaptive Gradient descent)

In [None]:
model = Sequential()
model.add(Dense(units=64, activation='relu', input_shape=(28*28,)))
model.add(Dense(units=10, activation='softmax'))
model.compile(optimizer=Adagrad(), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(X_train, y_train, epochs=11 , validation_split=0.2, verbose=1)
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)

Epoch 1/11
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.5283 - loss: 1.7022 - val_accuracy: 0.8315 - val_loss: 0.8393
Epoch 2/11
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8241 - loss: 0.8028 - val_accuracy: 0.8656 - val_loss: 0.6161
Epoch 3/11
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8536 - loss: 0.6288 - val_accuracy: 0.8805 - val_loss: 0.5257
Epoch 4/11
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.8637 - loss: 0.5527 - val_accuracy: 0.8868 - val_loss: 0.4750
Epoch 5/11
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8776 - loss: 0.5004 - val_accuracy: 0.8922 - val_loss: 0.4422
Epoch 6/11
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.8837 - loss: 0.4663 - val_accuracy: 0.8960 - val_loss: 0.4186
Epoch 7/11
[1m1

In [None]:
print(f"AdaGrad optimizer accuracy: {accuracy:.4f}")

AdaGrad optimizer accuracy: 0.9082


## Adam (Adaptive Moment Estimation)

In [None]:
model = Sequential()
model.add(Dense(units=64, activation='relu', input_shape=(28*28,)))
model.add(Dense(units=10, activation='softmax'))
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(X_train, y_train, epochs=11 , validation_split=0.2, verbose=1)
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)

Epoch 1/11
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.8460 - loss: 0.5527 - val_accuracy: 0.9463 - val_loss: 0.1919
Epoch 2/11
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9482 - loss: 0.1807 - val_accuracy: 0.9578 - val_loss: 0.1439
Epoch 3/11
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.9634 - loss: 0.1291 - val_accuracy: 0.9620 - val_loss: 0.1313
Epoch 4/11
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9764 - loss: 0.0790 - val_accuracy: 0.9674 - val_loss: 0.1066
Epoch 6/11
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step - accuracy: 0.9798 - loss: 0.0674 - val_accuracy: 0.9697 - val_loss: 0.1042
Epoch 7/11
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9839 - loss: 0.0529 - val_accuracy: 0.9703 - val_loss: 0.1025
Epoch 8/11
[1m1

In [None]:
print(f"Adam optimizer accuracy: {accuracy:.4f}")

Adam optimizer accuracy: 0.9750
