<a href="https://colab.research.google.com/github/AmrElmowafy1977/AmrElmowafy1977/blob/main/Predicting_Next_Character_using_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Import Libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from keras.utils.data_utils import get_file

In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    BatchNormalization, SeparableConv2D, MaxPooling2D, Activation, Flatten, Dropout, Dense
)

In [7]:
path = get_file('nietzsche.txt', origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt")
text = open(path).read()

Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt


#Creating a vocabulary of unique characters

In [8]:
chars = sorted(list(set(text)))
print(len(chars)+1)

85


In [9]:
chars.insert(0, '\0')

#Creating a dictionary, mapping characters to index and index to characters

In [10]:
char_to_index = {v:i for i,v in enumerate(chars)}
index_to_char = {i:v for i,v in enumerate(chars)}

#Converting the entire nietzsche text into index of characters

In [11]:
total_index = [char_to_index[char] for char in text]
total_index[:10]

[40, 42, 29, 30, 25, 27, 29, 1, 1, 1]

In [12]:
''.join(index_to_char[i] for i in total_index[:25])

'PREFACE\n\n\nSUPPOSING that '

In [18]:
pred_num = 25
xin = [[total_index[j+i] for j in range(0, len(total_index)-1-pred_num, pred_num)] for i in range(pred_num)]
y = [total_index[i+pred_num] for i in range(0, len(total_index)-1-pred_num, pred_num)]

#We are removing the last 2 characters to keep the length of each array equal

In [19]:
X = [np.stack(xin[i][:-2]) for i in range(pred_num)]
Y = np.stack(y[:-2])

In [20]:
X

[array([40, 44, 58, ..., 58, 57, 54]),
 array([42, 71, 67, ...,  2, 57, 67]),
 array([29, 74, 24, ..., 62, 61, 56]),
 array([30, 73,  2, ..., 67, 62, 73]),
 array([25, 61, 33, ..., 72, 72, 62]),
 array([27,  2, 72, ..., 62, 73, 73]),
 array([29, 62,  2, ..., 72, 72, 78]),
 array([ 1, 72, 73, ..., 73,  2,  8]),
 array([ 1,  2, 61, ..., 58, 54,  2]),
 array([ 1, 54, 58, ..., 57, 72, 63]),
 array([43,  2, 71, ...,  2,  2, 74]),
 array([45, 76, 58, ..., 74, 58, 72]),
 array([40, 68,  2, ..., 69, 72, 73]),
 array([40, 66, 67, ..., 68, 72,  2]),
 array([39, 54, 68, ..., 67, 58, 54]),
 array([43, 67, 73, ...,  2, 67, 72]),
 array([33,  9,  2, ..., 55, 73,  2]),
 array([38,  9, 60, ..., 78, 62, 73]),
 array([31, 76, 71, ...,  2, 54, 61]),
 array([ 2, 61, 68, ..., 73, 65, 58]),
 array([73, 54, 74, ..., 61,  2, 78]),
 array([61, 73, 67, ..., 58, 73,  2]),
 array([54,  2, 57, ...,  1, 68, 76]),
 array([73, 73,  1, ..., 26,  2, 58]),
 array([ 2, 61, 59, ..., 74, 72, 71])]

In [22]:
Y[:8]

array([44, 58, 68, 62, 73,  8, 67, 65])

In [23]:
X[0].shape, Y.shape

((24033,), (24033,))

In [24]:
hidden_layers = 256
vocab_size = 86
n_fac = 42

#Creating a simple RNN

In [25]:
model = Sequential([
        Embedding(vocab_size, n_fac, input_length=pred_num),
        SimpleRNN(hidden_layers, activation='relu'),
        Dense(vocab_size, activation='softmax')
    ])

In [26]:
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 25, 42)            3612      
                                                                 
 simple_rnn (SimpleRNN)      (None, 256)               76544     
                                                                 
 dense (Dense)               (None, 86)                22102     
                                                                 
Total params: 102,258
Trainable params: 102,258
Non-trainable params: 0
_________________________________________________________________


In [27]:
from tensorflow.keras.optimizers import Adam

In [28]:
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam())

In [29]:
model.fit(np.stack(X, 1), Y, batch_size=64, epochs=5)
model.save_weights('simpleRNN_3pred.h5')
model.load_weights('simpleRNN_3pred.h5')
model.save_weights('simpleRNN_7pred.h5')
model.load_weights('simpleRNN_7pred.h5')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [33]:
def predict_next_char(inp):
    index = [char_to_index[i] for i in inp]
    arr = np.expand_dims(np.array(index), axis=0)
    prediction = model.predict(arr)
    return index_to_char[np.argmax(prediction)]

#Return Sequences

In [34]:
ys = [[total_index[j+i] for j in range(1, len(total_index)-pred_num, pred_num)] for i in range(pred_num)]

In [35]:
Y_return = [np.stack(ys[i][:-2]) for i in range(pred_num)]

In [36]:
X

[array([40, 44, 58, ..., 58, 57, 54]),
 array([42, 71, 67, ...,  2, 57, 67]),
 array([29, 74, 24, ..., 62, 61, 56]),
 array([30, 73,  2, ..., 67, 62, 73]),
 array([25, 61, 33, ..., 72, 72, 62]),
 array([27,  2, 72, ..., 62, 73, 73]),
 array([29, 62,  2, ..., 72, 72, 78]),
 array([ 1, 72, 73, ..., 73,  2,  8]),
 array([ 1,  2, 61, ..., 58, 54,  2]),
 array([ 1, 54, 58, ..., 57, 72, 63]),
 array([43,  2, 71, ...,  2,  2, 74]),
 array([45, 76, 58, ..., 74, 58, 72]),
 array([40, 68,  2, ..., 69, 72, 73]),
 array([40, 66, 67, ..., 68, 72,  2]),
 array([39, 54, 68, ..., 67, 58, 54]),
 array([43, 67, 73, ...,  2, 67, 72]),
 array([33,  9,  2, ..., 55, 73,  2]),
 array([38,  9, 60, ..., 78, 62, 73]),
 array([31, 76, 71, ...,  2, 54, 61]),
 array([ 2, 61, 68, ..., 73, 65, 58]),
 array([73, 54, 74, ..., 61,  2, 78]),
 array([61, 73, 67, ..., 58, 73,  2]),
 array([54,  2, 57, ...,  1, 68, 76]),
 array([73, 73,  1, ..., 26,  2, 58]),
 array([ 2, 61, 59, ..., 74, 72, 71])]

In [38]:
Y_return


[array([42, 71, 67, ...,  2, 57, 67]),
 array([29, 74, 24, ..., 62, 61, 56]),
 array([30, 73,  2, ..., 67, 62, 73]),
 array([25, 61, 33, ..., 72, 72, 62]),
 array([27,  2, 72, ..., 62, 73, 73]),
 array([29, 62,  2, ..., 72, 72, 78]),
 array([ 1, 72, 73, ..., 73,  2,  8]),
 array([ 1,  2, 61, ..., 58, 54,  2]),
 array([ 1, 54, 58, ..., 57, 72, 63]),
 array([43,  2, 71, ...,  2,  2, 74]),
 array([45, 76, 58, ..., 74, 58, 72]),
 array([40, 68,  2, ..., 69, 72, 73]),
 array([40, 66, 67, ..., 68, 72,  2]),
 array([39, 54, 68, ..., 67, 58, 54]),
 array([43, 67, 73, ...,  2, 67, 72]),
 array([33,  9,  2, ..., 55, 73,  2]),
 array([38,  9, 60, ..., 78, 62, 73]),
 array([31, 76, 71, ...,  2, 54, 61]),
 array([ 2, 61, 68, ..., 73, 65, 58]),
 array([73, 54, 74, ..., 61,  2, 78]),
 array([61, 73, 67, ..., 58, 73,  2]),
 array([54,  2, 57, ...,  1, 68, 76]),
 array([73, 73,  1, ..., 26,  2, 58]),
 array([ 2, 61, 59, ..., 74, 72, 71]),
 array([44, 58, 68, ..., 57, 54, 58])]

In [39]:
vocab_size = 86
n_fac = 42
hidden_layers = 256

In [41]:
from keras.layers import SimpleRNN, TimeDistributed

In [42]:
return_model = Sequential([
        Embedding(vocab_size, n_fac, input_length=pred_num),
        SimpleRNN(hidden_layers, return_sequences=True, activation='relu'),
        TimeDistributed(Dense(vocab_size, activation='softmax'))
    ])

In [43]:
return_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 25, 42)            3612      
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 25, 256)           76544     
                                                                 
 time_distributed (TimeDistr  (None, 25, 86)           22102     
 ibuted)                                                         
                                                                 
Total params: 102,258
Trainable params: 102,258
Non-trainable params: 0
_________________________________________________________________


In [44]:
return_model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam())

In [45]:
X_model = np.stack(X, 1)
Y_model = np.expand_dims(np.stack(Y_return, 1), axis=-1)

In [46]:
return_model.fit(X_model, Y_model, batch_size=64, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7feba627d9d0>

In [47]:
return_model.optimizer.lr = 1e-4
return_model.fit(X_model, Y_model, batch_size=64, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7feba5fa1050>

In [48]:
return_model.save_weights('return_sequences_25.h5')

In [51]:
def predict_every_char(inp):
    l = []
    p = 0
    while p<len(inp):
        pre_inp = inp[p:p+pred_num]
        if len(pre_inp) < pred_num:
            pre_inp = pre_inp + ' '*(pred_num - len(pre_inp))
            l.append(pre_inp)
        else:
            l.append(pre_inp) 
        p+=pred_num

#     index = [char_to_index[i] for i in inp]
#     arr = np.expand_dims(index, axis=0)
#     prediction = return_model.predict(arr)
#     return ''.join([index_to_char[np.argmax(i)] for i in prediction[0]])
    
    final = []
    for half in l:
        index = [char_to_index[i] for i in half]
        arr = np.expand_dims(index, axis=0)
        prediction = return_model.predict(arr)
        final.append(''.join([index_to_char[np.argmax(i)] for i in prediction[0]]))
    
    return ''.join(final)

In [52]:
predict_every_char('and the boy left')

'nd the settooatiit       '

In [53]:
predict_every_char('this is')

'hen ms a                 '

In [56]:
bs = 64


In [57]:
stateful_model = Sequential([
        Embedding(vocab_size, n_fac, input_length=pred_num, batch_input_shape=(bs, 7)),
        BatchNormalization(),
        LSTM(hidden_layers, activation='tanh', return_sequences=True, stateful=True),
        TimeDistributed(Dense(vocab_size, activation='softmax'))
    ])

In [58]:
stateful_model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam())

In [59]:
divide = len(X_model)//bs*bs