<a href="https://colab.research.google.com/github/Joshi-Ketaki/MLPlayground/blob/main/Statefull_MLMastery.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# Stateful LSTM to learn one-char to one-char mapping
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.utils import to_categorical
# fix random seed for reproducibility
tf.random.set_seed(7)
# define the raw dataset
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
# create mapping of characters to integers (0-25) and the reverse
char_to_int = dict((c, i) for i, c in enumerate(alphabet))
int_to_char = dict((i, c) for i, c in enumerate(alphabet))
# prepare the dataset of input to output pairs encoded as integers
#seq_length = 1
seq_length = 3
dataX = []
dataY = []
for i in range(0, len(alphabet) - seq_length, 1):
 seq_in = alphabet[i:i + seq_length]
 seq_out = alphabet[i + seq_length]
 dataX.append([char_to_int[char] for char in seq_in])
 dataY.append(char_to_int[seq_out])
 print(seq_in, '->', seq_out)
# reshape X to be [samples, time steps, features]
# default seq length is 1
# changing this to 3 to give more context and sequence length to learn from
# remember this is similar to giving more timesteps as history setup from earlier
X = np.reshape(dataX, (len(dataX), seq_length, 1))
# normalize
X = X / float(len(alphabet))
# one hot encode the output variable
y = to_categorical(dataY)

# create and fit the model
batch_size = 1
model = Sequential()
# note stateful is set to true
model.add(LSTM(50, batch_input_shape=(batch_size, X.shape[1], X.shape[2]), stateful=True))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
for i in range(300):
 # do not shuffle data
 model.fit(X, y, epochs=1, batch_size=batch_size, verbose=2, shuffle=False)
 model.reset_states()

# summarize performance of the model
scores = model.evaluate(X, y, batch_size=batch_size, verbose=0)
model.reset_states()
print("Model Accuracy: %.2f%%" % (scores[1]*100))

# demonstrate some model predictions
# seed the first alphabet as starting point
'''seed = [char_to_int[alphabet[0]]]
for i in range(0, len(alphabet)-1):
	x = np.reshape(seed, (1, len(seed), 1))
	x = x / float(len(alphabet))
	prediction = model.predict(x, verbose=0)
	index = np.argmax(prediction)
	print(int_to_char[seed[0]], "->", int_to_char[index])
  # use current prediction as seed/input for next prediction
	seed = [index]'''


# this is when model is using last thrtee timesteps as history
for pattern in dataX:
  # This is for three timesteps window
  #x = np.reshape(pattern, (1, len(pattern), 1))
	# This is for three features window
  x = np.reshape(pattern, (1, len(pattern), 1))
  x = x / float(len(alphabet))
  prediction = model.predict(x, verbose=0)
  index = np.argmax(prediction)
  result = int_to_char[index]
  seq_in = [int_to_char[value] for value in pattern]
  print(seq_in, "->", result)
model.reset_states()
# the above seed i.e. first letter as A, results in correct predictions all over
# demonstrate a random starting point
# this results on K predictng B and then the remaining is predicted correctly
# why :
# To truly predict “K,” the state of the network would need to be warmed up and iteratively
# fed the letters from “A” to “J.” This reveals that you could achieve the same effect with
# a “stateless” LSTM by preparing training data like this:
# ---a -> b
# --ab -> c
# -abc -> d
# abcd -> e
# commenting this when testing with history
'''letter = "K"
seed = [char_to_int[letter]]
print("New start: ", letter)
for i in range(0, 5):
	x = np.reshape(seed, (1, len(seed), 1))
	x = x / float(len(alphabet))
	prediction = model.predict(x, verbose=0)
	index = np.argmax(prediction)
	print(int_to_char[seed[0]], "->", int_to_char[index])
	seed = [index]
model.reset_states()'''

ABC -> D
BCD -> E
CDE -> F
DEF -> G
EFG -> H
FGH -> I
GHI -> J
HIJ -> K
IJK -> L
JKL -> M
KLM -> N
LMN -> O
MNO -> P
NOP -> Q
OPQ -> R
PQR -> S
QRS -> T
RST -> U
STU -> V
TUV -> W
UVW -> X
VWX -> Y
WXY -> Z
23/23 - 2s - loss: 3.2892 - accuracy: 0.0435 - 2s/epoch - 88ms/step
23/23 - 0s - loss: 3.2564 - accuracy: 0.0870 - 106ms/epoch - 5ms/step
23/23 - 0s - loss: 3.2396 - accuracy: 0.0870 - 90ms/epoch - 4ms/step
23/23 - 0s - loss: 3.2223 - accuracy: 0.1304 - 88ms/epoch - 4ms/step
23/23 - 0s - loss: 3.2005 - accuracy: 0.1304 - 102ms/epoch - 4ms/step
23/23 - 0s - loss: 3.1667 - accuracy: 0.0435 - 98ms/epoch - 4ms/step
23/23 - 0s - loss: 3.1032 - accuracy: 0.0435 - 91ms/epoch - 4ms/step
23/23 - 0s - loss: 3.0200 - accuracy: 0.0435 - 96ms/epoch - 4ms/step
23/23 - 0s - loss: 2.9615 - accuracy: 0.0435 - 86ms/epoch - 4ms/step
23/23 - 0s - loss: 2.9040 - accuracy: 0.1304 - 97ms/epoch - 4ms/step
23/23 - 0s - loss: 2.8898 - accuracy: 0.1739 - 106ms/epoch - 5ms/step
23/23 - 0s - loss: 2.7782 - accu

'letter = "K"\nseed = [char_to_int[letter]]\nprint("New start: ", letter)\nfor i in range(0, 5):\n\tx = np.reshape(seed, (1, len(seed), 1))\n\tx = x / float(len(alphabet))\n\tprediction = model.predict(x, verbose=0)\n\tindex = np.argmax(prediction)\n\tprint(int_to_char[seed[0]], "->", int_to_char[index])\n\tseed = [index]\nmodel.reset_states()'