In [1]:
!nvidia-smi 

Thu Aug 25 23:37:58 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   39C    P8    10W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist 
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Flatten, Dense, MaxPooling1D, LSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras import optimizers
import matplotlib.pyplot as plt


In [3]:
# Hyper parameters
num_epochs = 5
num_classes = 10
batch_size = 100
learning_rate = 0.001

In [4]:
def make_tf_dataset(X, y, batch_size = 20, shuffle = False):
    # Create Datasets from the input numpy arrays.
    X = tf.data.Dataset.from_tensor_slices(X)
    y = tf.data.Dataset.from_tensor_slices(y)    # Make the X-y training pairs.
    ds = tf.data.Dataset.zip((X, y))    # Shuffle the data if necessary.
    if shuffle == True:
        ds = ds.shuffle(len(X))    # Return the data in batches.
    return ds.batch(batch_size)

# MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()   
X_train = X_train.astype(float)  / 255.0
X_test = X_test.astype(float) / 255.0    

# one hot encoding
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

# Train dataset
train_ds = make_tf_dataset(X_train, y_train, 
                               batch_size = batch_size, 
                               shuffle = True)
# Test dataset
test_ds = make_tf_dataset(X_test, y_test, 
                              batch_size = batch_size)

#Validation dataset


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [5]:
X_test.shape

(10000, 28, 28)

In [6]:
def create_model():
    model = Sequential(
        [
          LSTM(128, input_shape=(X_train.shape[1:]), return_sequences=True),
          LSTM(64, return_sequences=True),            
          Flatten(),
          Dense(480, activation = 'relu'),
          Dense(120, activation = 'relu'),
          Dense(10, activation = 'softmax'),

        ]

    )
    input_shape = (None,28, 28)
    model.build(input_shape)
    model.summary()
    opt = optimizers.Adam(learning_rate=learning_rate)
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
    
    return model
model = create_model()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 28, 128)           80384     
                                                                 
 lstm_1 (LSTM)               (None, 28, 64)            49408     
                                                                 
 flatten (Flatten)           (None, 1792)              0         
                                                                 
 dense (Dense)               (None, 480)               860640    
                                                                 
 dense_1 (Dense)             (None, 120)               57720     
                                                                 
 dense_2 (Dense)             (None, 10)                1210      
                                                                 
Total params: 1,049,362
Trainable params: 1,049,362
Non-

In [7]:
from keras.callbacks import ModelCheckpoint
 

history = model.fit(train_ds, epochs = num_epochs, shuffle=True, batch_size = batch_size,verbose = 1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [8]:
# Test the model
import time

model.evaluate(test_ds)




[0.044028449803590775, 0.9853000044822693]

In [9]:

import numpy as np

start_time = time.time()
y_pred = model.predict(test_ds).argmax(axis = 1)
y_true = np.array([np.argmax(bat[1]) for bat in test_ds.unbatch()])
print("--- %s seconds ---" % (time.time() - start_time))

--- 2.728645086288452 seconds ---


In [10]:
from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99       980
           1       0.99      0.99      0.99      1135
           2       0.99      0.98      0.99      1032
           3       0.97      0.99      0.98      1010
           4       0.99      0.98      0.99       982
           5       0.98      0.99      0.98       892
           6       0.98      0.99      0.99       958
           7       0.99      0.99      0.99      1028
           8       0.98      0.99      0.98       974
           9       0.98      0.97      0.98      1009

    accuracy                           0.99     10000
   macro avg       0.99      0.99      0.99     10000
weighted avg       0.99      0.99      0.99     10000

