<a href="https://colab.research.google.com/github/Ayakhaled200/NLP/blob/main/Generating_Poetic_Texts_with_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Activation
from tensorflow.keras.optimizers import RMSprop

In [None]:
file_path = tf.keras.utils.get_file('shakespeare.txt ','https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

####Note: later our model will predict the next character, if we allow for upper case characters, it has way more possible choices so the accuracy is't going to be great, so we can increase the performance by only using lower case letters.
####ofcourse that might not be good for the grammer, but as contant -semmantic- wise, it doesn't make a difference.

In [None]:
text = open(file_path, 'rb').read().decode(encoding='utf_8').lower()   #open in read binary mode

###Convert the text into numeric formate to feed it as np array to the RNN

In [None]:
#selecting part of the text
text = text[300000:800000]

In [None]:
'''creating a character set which contains all the possible characters that occur somewhere in the text
if the char didn't appear in the section we selected, it's not going to be in the set'''
characters = sorted(set(text))

In [None]:
char_to_index = dict((c,i) for i, c in enumerate(characters)) #assign number to each char in the set

In [None]:
index_to_char = dict((i,c) for i, c in enumerate(characters)) #assign char to each number

####How many characters are we going to use as features in order to predict the next character
######careful, you don't want your network to rely on to much data.

In [None]:
seq_len = 40
step_size = 3 #how many char are we going to shift to the next sentence

In [None]:
#we load a sentence into the NN and the result will be th following char
sentences = [] #the features                    #ex:sentences:'how are yo'
next_characters = [] #the target                #next_characters:'u' as it complete the sentences

##Prepration for Training Data

###We wanna have training examples (bunch of sentences), and the next correct letter

In [None]:
for i in range(0, len(text) - seq_len, step_size):
    sentences.append(text[i : i+seq_len])    #if the seq_len is 5 we are getting char 0 up until 4
    next_characters.append(text[i+seq_len])    #and then char with index 5 is the next char

###Convert the train data into numpy

####Whenever in a specific sentence at a specific position a certain character occurs, we're going to set that to true or 1, and all the other values will remain 0

Ex: Having sentence number 5, at position number 7 we have the charater with the enumeration of 8, we say x[5,7,8] = 1,
in this formate training data is beeing passed to the NN

In [None]:
X = np.zeros((len(sentences), seq_len, len(characters)), dtype=np.bool_)

In [None]:
'''which is the character which would be the next for which sentence
ex: at sentence 5 the next char would be the one with enumeration 8'''
y = np.zeros((len(sentences), len(characters)), dtype = np.bool_)

In [None]:
print(X.shape)
print(y.shape)

(166654, 40, 39)
(166654, 39)


In [None]:
for i, sentence in enumerate(sentences):  #assigning index to every sentence
  for t, character in enumerate(sentence):
      X[i,t,char_to_index[character]] = 1  #sentence num i at position num t and char num whatever, this whole position is set to 1
  y[i, char_to_index[next_characters[i]]] = 1

##Training the model

In [None]:
model = Sequential()
model.add(LSTM(128, input_shape = (seq_len, len(characters))))
model.add(Dense(len(characters)))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer = RMSprop(lr = 0.01))
model.fit(X,y,batch_size =256, epochs=10)
model.save('textgenetator.model')



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
model = tf.keras.models.load_model('textgenetator.model')

In [None]:
'''the function takes the predictions of our model and picks one character
having the softmax results of the different probabilities for each char, the function
choose one of them depending on the tempreture
the choice of picking is either conservative or experimental
high temp -> char that is more risky and experimental
low temp -> safe pick
so the higher the temp the more creative the sentenes but maybe they're not going
to make a lot of sense
'''
def sample(preds, temperature=1.0):
  preds = np.asarray(preds).astype('float64')
  preds = np.log(preds)/temperature
  exp_preds = np.exp(preds)
  preds = exp_preds/ np.sum(exp_preds)
  probas = np.random.multinomial(1, preds, 1)
  return np.argmax(probas)

###Text Generation function

In [None]:
'''
Here we are going to start with a starting text and it's going to provid the input
for our NN to predict the FIRST next char
somewhere in the text we pick an entry point and take the first 40 characters
'''
def Generate_Text(length, tempreture):
   start_index = random.randint(0, len(text) - seq_len - 1) #random start index
   generated_text = ''
   sentence = text[start_index: start_index + seq_len ]
   generated_text += sentence
   for i in range(length):
      x = np.zeros((1, seq_len, len(characters)))
      for i, character in enumerate(sentence):
        x[0, t, char_to_index[character]] = 1

      predictions = model.predict(x, verbose = 0)[0]
      next_index = sample(predictions, tempreture)
      next_character = index_to_char[next_index]

      generated_text += next_character #include that next char in the next input
      sentence = sentence[1:] + next_character

   return generated_text

In [None]:
print(Generate_Text(300, 0.2))

ur blushes and present yourself
that whieeeeeeeeeeeeeetetttttnttttiaaatnnnnnnnnngnnnnnnnnnnnnnnnnn tnttttgnhiteeannnttnnnnnnnnnnnnntnnnnntn nnnnntttdrddyooooooooooooooooooooooooooo ooooooooowoohwwnthohooohooooooohooohhooo ooooooittnotttttttttttttttttttttttttttttonroooourr rtttrttttttttrtrttrttttttttttttrnttttooou tttttttttttttttrtttrttttt


In [None]:
Generate_Text(300,0.4)

'when i was king of england?\n\nsecond keepiiiiiaiilraiiiararikyeiikddsi,                               n e t tchatttttttytttttttitttteteetteottrrrrrr rnnooooooooddddddddrrrrrwoannwyisisnonon ssnnnstossissiniiinhootttt oootttttttttttttttttttttttttttttwhhhhhhh, haintiitinhticthhhhhhhhhhihhhhhttttttttttteeeeeeeeeeeeeeeeeeancttt ttttthttritcttt'

In [None]:
Generate_Text(300,0.6)

'on.\n\nking richard ii:\nwhy uncle, thou ha      i          nr  t   drrt rgrrnrceeseheeeeeaeeieeetreeeeeeeeeeeeho rtttttttntnngvirneeeeeeeeeeeeeoarnnninrnnrnnn nnnnnnnndvdvvgnvrnrnnnnnceeeeeeayeieeaigeeeeeeeeeeeeeeearrk nninennnnnnnnnnnnnnnnnnnnnnnnnndnnnnenndddd dddddddddddddddddddddddddddddddddgounrruurr grrrrrrrrrrerrrrrrrrrrrrrrrrrrrrrrn'

In [None]:
Generate_Text(300,0.8)

"he did us all,\nand that will quickly dryitiiiieeiiieieieeyeeeeeeeeeanine.  d ivis!.\nc\natchh\ns.io. :\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\ntt\nh\nh\n\n\n\n\n\n\n\n\ni\nattctngecriennnrtrorrrrnnnrrnndrnnrrndrrroorrorr. n w  uwddnndgeinnnnnnnnnnfn'rndnadnddendcrceneegdddddddddddrddled ddrcdriarddkrniieidiaayvieeeeiieeeeeeeeeeeee cieeiieonrnrvyvvroeoenenoeeeoeseeoeeecee"

In [None]:
Generate_Text(300,1.0)

' our king, my brother,\nis prisoner to th e  e oo oso ttwtooooattnnonnnnttttittvinnnnshtittiecenin netteeatneaetetihetaetctthtttevtactacectyeeeeeeeeeeieeeeeeateenyeeelteee veaeaegieoigr oyevodeo sie;    re,                      n   \nt chootwoohwohhohhhhahthoohinltttoiaoinatoe, tsttihitd     , i r      t rg r r     e      i   e nronrrrrgrrn'