# Importing Library

In [1]:
import tensorflow as tf
from tensorflow.keras.layers import LSTM,Softmax,Input,Flatten,Dense
from tensorflow.keras.losses import CategoricalCrossentropy,SparseCategoricalCrossentropy

In [2]:
tf.__version__

'2.7.0'

# Importing Datasets

In [3]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# LSTM Model 

In [4]:
x_train.shape ,y_train.shape, x_test.shape, y_test.shape

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

## Number of classes 

In [5]:
import numpy as np
np.unique(y_train)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8)

# Normalissing Image Pixels

In [6]:
x_train = x_train/255
x_test = x_test/255

In [7]:
# Converting uint to float32
x_train = x_train.astype(np.float32)
x_test = x_test.astype(np.float32)
y_train = y_train.astype(np.float32)
y_test = y_test.astype(np.float32)

# Simple LSTM model to Classification

In [9]:
from tensorflow.keras import Model

# Input Layer of 28x28 
# This means every row of each image is word so length of sequence is 28 and each word has 28 values of embedding features.
inputs = Input(shape=(28,28))

output= LSTM(256)(inputs)
flatten_layer = Flatten()(output) # Flatten for prediction.

output = Dense(10,activation='softmax')(flatten_layer) # last Dense layer

model = Model(inputs,output)

In [10]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 28, 28)]          0         
                                                                 
 lstm (LSTM)                 (None, 256)               291840    
                                                                 
 flatten (Flatten)           (None, 256)               0         
                                                                 
 dense (Dense)               (None, 10)                2570      
                                                                 
Total params: 294,410
Trainable params: 294,410
Non-trainable params: 0
_________________________________________________________________


In [11]:
model.compile(optimizer='adam', loss=SparseCategoricalCrossentropy(),metrics=['accuracy'])

In [12]:
history = model.fit(x_train,y_train,batch_size=32,shuffle=True,epochs=5,verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [13]:
history.history

{'loss': [0.2857265770435333,
  0.09380507469177246,
  0.0646512508392334,
  0.052804287523031235,
  0.04050440713763237],
 'accuracy': [0.9072499871253967,
  0.972516655921936,
  0.9804166555404663,
  0.9839166402816772,
  0.9873999953269958]}

In [20]:
model.evaluate(x_test,y_test,batch_size=32)



[0.044212982058525085, 0.9864000082015991]

# Conclusion

We have got training accuracy of 98.74 percent and evaluation accuracy of 98.64 percent.