# Recurrent Neural Networks - Deep Learning basics with Python, TensorFlow and Keras p.7
#### by sentdex from https://pythonprogramming.net/recurrent-neural-network-deep-learning-python-tensorflow-keras/

In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import LSTM, Conv2D, MaxPooling2D, Flatten

In [2]:
mnist = tf.keras.datasets.mnist  # mnist is a dataset of 28x28 images of handwritten digits and their labels
(X_train, y_train),(X_test, y_test) = mnist.load_data()  # unpacks images to X_train/X_test and labels to y_train/y_test

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [3]:
X_train = X_train/255.0
X_test = X_test/255.0

print(X_train.shape)
print(X_train[0].shape)

(60000, 28, 28)
(28, 28)


In [4]:
X_train.shape

(60000, 28, 28)

### Using MLP (for comparison)

In [14]:
from tensorflow.keras.utils import to_categorical

y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)

In [15]:
y_train.shape

(60000, 10)

In [18]:
# Fill your code here
mlp_model = Sequential()

mlp_model.add(Flatten(input_shape=(28, 28)))
mlp_model.add(Dense(128, activation='relu'))
mlp_model.add(Dropout(0.4))
mlp_model.add(Dense(52, activation='relu'))
mlp_model.add(Dropout(0.2))
mlp_model.add(Dense(10, activation='softmax'))

'''Arguments

initial_learning_rate: A Python float. The initial learning rate.
decay_steps: How often to apply decay.
decay_rate: A Python number. The decay rate.
staircase: Whether to apply decay in a discrete staircase, as o pposed to continuous, fashion.
name: String. Optional name of the operation. Defaults to "InverseTimeDecay". '''

lr_scheduler = tf.keras.optimizers.schedules.InverseTimeDecay(0.001, decay_rate=1e-6, decay_steps=1, staircase = False)

opt = tf.keras.optimizers.Adam(learning_rate=lr_scheduler)

mlp_model.compile(loss = 'categorical_crossentropy',
                  optimizer = opt,
                  metrics = ['accuracy'])

batch_size = 125
epochs = 3
mlp_model.fit(X_train, y_train, epochs=epochs, verbose = 1, validation_data = (X_test, y_test))

Epoch 1/3
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.7910 - loss: 0.6658 - val_accuracy: 0.9486 - val_loss: 0.1636
Epoch 2/3
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 0.9306 - loss: 0.2316 - val_accuracy: 0.9632 - val_loss: 0.1175
Epoch 3/3
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9480 - loss: 0.1766 - val_accuracy: 0.9697 - val_loss: 0.1074


<keras.src.callbacks.history.History at 0x7fb0273c6150>

### Using CNN (for comparison)

In [8]:
X_train.shape

(60000, 28, 28)

In [9]:
print(y_train.shape, y_test.shape)  # Should be (num_samples,)

(60000, 10) (10000, 10)


In [10]:
import numpy as np
if len(y_train.shape) > 1:
    y_train = np.argmax(y_train, axis=-1)
    y_test = np.argmax(y_test, axis=-1)

In [11]:
print(y_train.shape, y_test.shape)  # Should be (num_samples,)

(60000,) (10000,)


In [12]:
model = Sequential()

model.add(Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=(*X_train.shape[1:], 1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Conv2D(32, (3, 3), padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.1))

model.add(Flatten())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(10, activation='softmax'))

lr_scheduler = tf.keras.optimizers.schedules.InverseTimeDecay(
    0.001, decay_rate=1e-6, decay_steps=1, staircase=False)
opt = tf.keras.optimizers.Adam(learning_rate=lr_scheduler)

# Compile model
model.compile(loss='sparse_categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=3, validation_data=(X_test, y_test))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/3
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 21ms/step - accuracy: 0.8106 - loss: 0.5774 - val_accuracy: 0.9806 - val_loss: 0.0585
Epoch 2/3
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 21ms/step - accuracy: 0.9638 - loss: 0.1186 - val_accuracy: 0.9853 - val_loss: 0.0446
Epoch 3/3
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 21ms/step - accuracy: 0.9723 - loss: 0.0880 - val_accuracy: 0.9884 - val_loss: 0.0364


<keras.src.callbacks.history.History at 0x7fb0e814a5d0>

### Using LSTM

In [13]:
model = Sequential()

model.add(LSTM(128, input_shape=X_train.shape[1:], activation='relu', return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(128, activation='relu'))
model.add(Dropout(0.1))

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(10, activation='softmax'))

lr_scheduler = tf.keras.optimizers.schedules.InverseTimeDecay(
    0.001, decay_rate=1e-6, decay_steps=1, staircase=False)
opt = tf.keras.optimizers.Adam(learning_rate=lr_scheduler)

# Compile model
model.compile(loss='sparse_categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=3, validation_data=(X_test, y_test))

  super().__init__(**kwargs)


Epoch 1/3
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 54ms/step - accuracy: 0.6034 - loss: 1.1351 - val_accuracy: 0.9559 - val_loss: 0.1509
Epoch 2/3
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 58ms/step - accuracy: 0.9544 - loss: 0.1661 - val_accuracy: 0.9696 - val_loss: 0.0983
Epoch 3/3
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 58ms/step - accuracy: 0.9678 - loss: 0.1181 - val_accuracy: 0.9799 - val_loss: 0.0673


<keras.src.callbacks.history.History at 0x7fb049840c50>