##Simple sequence prediction task using simple RNN

In [1]:
!pip install tensorflow



In [30]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Embedding

For a numerical sequence

In [9]:
def create_dataset(n_samples, n_timesteps):
  x = []
  y = []
  for i in range(n_samples):
    start = np.random.randint(0,100)
    sequence = np.arange(start, start+n_timesteps)
    x.append(sequence)
    y.append(start+n_timesteps)
  return np.array(x), np.array(y)



In [10]:
n_samples = 1000
n_timesteps = 10

x,y = create_dataset(n_samples, n_timesteps)

In [11]:
x = x.reshape((x.shape[0], x.shape[1],1))

In [12]:
model = Sequential()
model.add(SimpleRNN(50, activation='relu', input_shape = (n_timesteps, 1)))
model.add(Dense(1))

  super().__init__(**kwargs)


In [14]:
model.compile(optimizer='adam', loss='mse')

In [16]:
model.fit(x, y, epochs=20, verbose=1)

Epoch 1/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 1039.5593
Epoch 2/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 7.6670
Epoch 3/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 3.2460
Epoch 4/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 3.1410
Epoch 5/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 2.8205
Epoch 6/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 2.6700
Epoch 7/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 2.5126
Epoch 8/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 2.2750
Epoch 9/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 2.1768
Epoch 10/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.9559
Epoch 

<keras.src.callbacks.history.History at 0x7bb244ec5930>

In [18]:
test_seq = np.array([range(50,60)])
test_seq = test_seq.reshape((1, n_timesteps, 1))
predicted = model.predict(test_seq)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 150ms/step


In [19]:
print(f'Predicted next number : {predicted}')

Predicted next number : [[59.764492]]


In [20]:
loss = model.evaluate(x, y)
print(f"Mean Squared Error (MSE): {loss}")

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.4358
Mean Squared Error (MSE): 0.4421849250793457


In [21]:
rmse = np.sqrt(0.4421849250793457)
print(f"RMSE: {rmse}")

RMSE: 0.6649698677980422


An MSE of 0.442 or an RMSE of 0.665 indicates that the model is fairly accurate, but there is still room for improvement. The smaller the RMSE, the better the model is performing.

For textual sequence

In [3]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Embedding

In [4]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [5]:
text = "hello world this is a simple example of using rnn for text sequebnce prediction"

In [6]:
tokenizer = Tokenizer(char_level=True)
tokenizer.fit_on_texts([text])

In [7]:
encoded_text = tokenizer.texts_to_sequences([text])[0]
vocab_size = len(tokenizer.word_index) + 1
print(tokenizer.word_index)
print(f'Vocabulary size: {vocab_size}')

{' ': 1, 'e': 2, 'i': 3, 'l': 4, 'o': 5, 's': 6, 'n': 7, 'r': 8, 't': 9, 'p': 10, 'h': 11, 'd': 12, 'a': 13, 'm': 14, 'x': 15, 'f': 16, 'u': 17, 'c': 18, 'w': 19, 'g': 20, 'q': 21, 'b': 22}
Vocabulary size: 23


In [8]:
sequence_length = 5
sequences = []
for i in range(sequence_length, len(encoded_text)):
  seq = encoded_text[i-sequence_length:i]
  label = encoded_text[i]
  sequences.append((seq, label))

x,y = zip(*sequences)
x = np.array(x)
y = np.array(y)

In [9]:
model = Sequential()
model.add(Embedding(vocab_size, 10, input_length=sequence_length))
model.add(SimpleRNN(50, activation='relu'))
model.add(Dense(vocab_size, activation='softmax'))



In [10]:
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x, y, epochs=100, verbose=1)

Epoch 1/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.0689 - loss: 3.1351
Epoch 2/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.1681 - loss: 3.1262  
Epoch 3/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.1639 - loss: 3.1204 
Epoch 4/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.2331 - loss: 3.1123 
Epoch 5/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.2260 - loss: 3.1061 
Epoch 6/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.2445 - loss: 3.0981 
Epoch 7/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.2583 - loss: 3.0822 
Epoch 8/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.2125 - loss: 3.0748 
Epoch 9/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

<keras.src.callbacks.history.History at 0x7fa13c160460>

In [13]:
def predict_next_char(model, tokenizer, text_seq, seq_length):
  encoded = tokenizer.texts_to_sequences([text_seq])[0]
  encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')

  predicted = model.predict(encoded, verbose=0)

  predicted_char_index = np.argmax(predicted)

  for char, index in tokenizer.word_index.items():
    if index == predicted_char_index:
      return char


In [17]:
test_seq = "world th"
predicted_char = predict_next_char(model, tokenizer, test_seq, sequence_length)
print(f" Given then sequence {test_seq}, predicted next character : {predicted_char}..")

 Given then sequence world th, predicted next character : i..


In [18]:
loss = model.evaluate(x, y)
print(f"Mean Squared Error (MSE): {loss}")

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9787 - loss: 0.1872 
Mean Squared Error (MSE): [0.19249951839447021, 0.9729729890823364]
