In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [2]:
import tensorflow as tf
import keras
from keras.layers import Dense,Conv2D,MaxPool2D,BatchNormalization,Conv2DTranspose,Dropout,LeakyReLU
from keras.layers import LSTM,Bidirectional,Embedding
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras import regularizers
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
import pandas as pd
import numpy as np

In [3]:
!wget --no-check-certificate https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sonnets.txt -O sonnets.txt

--2022-01-13 11:00:23--  https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sonnets.txt
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.142.128, 74.125.195.128, 74.125.197.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.142.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 93578 (91K) [text/plain]
Saving to: ‘sonnets.txt’


2022-01-13 11:00:24 (147 MB/s) - ‘sonnets.txt’ saved [93578/93578]



**Reading Data**

In [4]:
data = open('/content/sonnets.txt').read()

In [5]:
corpus = data.lower().split('\n')

In [6]:
print(corpus)



**Tokenizer**

In [7]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(corpus)

In [8]:
print(tokenizer)

<keras_preprocessing.text.Tokenizer object at 0x7f188585b950>


In [9]:
total_words = len(tokenizer.word_index)+1

In [10]:
index = tokenizer.word_index

In [11]:
print(index)



**Text to number**

In [12]:
token_list = tokenizer.texts_to_sequences(corpus)

In [13]:
print(token_list)

[[34, 417, 877, 166, 213, 517], [8, 878, 134, 351, 102, 156, 199], [16, 22, 2, 879, 61, 30, 48, 634], [25, 311, 635, 102, 200, 25, 278], [16, 10, 880, 3, 62, 85, 214, 53], [1372, 9, 1373, 636, 11, 122, 1374, 1375], [201, 17, 1376, 64, 518, 202], [118, 9, 1377, 3, 9, 47, 122, 135, 279], [10, 8, 54, 63, 2, 418, 312, 419], [1, 352, 1378, 3, 2, 1379, 420], [215, 62, 85, 881, 1380, 9, 882], [1, 311, 883, 884, 313, 7, 1381], [257, 2, 94, 36, 353, 29, 1382, 21], [3, 637, 2, 418, 354, 30, 2, 638, 1, 19], [27, 1383, 885, 46, 1384, 9, 280], [1, 1385, 281, 1386, 7, 9, 134, 1387], [9, 1388, 179, 1389, 20, 1390, 35, 63], [49, 21, 17, 886, 639, 4, 887, 126, 888], [38, 81, 1391, 64, 23, 9, 51, 202], [64, 23, 2, 258, 4, 9, 889, 145], [3, 95, 215, 62, 85, 281, 1392, 53], [86, 146, 23, 1393, 236, 1, 1394, 96], [71, 136, 43, 96, 1395, 9, 134, 186], [42, 10, 1396, 640, 890, 69, 282, 4, 44], [46, 519, 5, 520, 1, 65, 5, 112, 314, 147], [1397, 25, 51, 30, 1398, 62], [29, 86, 3, 21, 98, 127, 27, 10, 54, 112],

**n_gram_seq**

In [14]:
token_list = []

for line in corpus:
  token_l = tokenizer.texts_to_sequences([line])[0]
  for i in range(1,len(token_l)):
    n_gram = token_l[:i+1]
    token_list.append(n_gram)

In [15]:
max_seq_length = max(len(seq) for seq in token_list)

**Padding**

In [16]:
input_sequences = np.array(pad_sequences(token_list,padding='pre'))

In [17]:
input_sequences

array([[   0,    0,    0, ...,    0,   34,  417],
       [   0,    0,    0, ...,   34,  417,  877],
       [   0,    0,    0, ...,  417,  877,  166],
       ...,
       [   0,    0,    0, ...,  493,  493, 3210],
       [   0,    0,    0, ...,  493, 3210,   15],
       [   0,    0,    0, ..., 3210,   15,   14]], dtype=int32)

In [18]:
xs,labels = input_sequences[:,:-1],input_sequences[:,-1]

In [19]:
ys = tf.keras.utils.to_categorical(labels,num_classes=total_words)

In [20]:
xs

array([[   0,    0,    0, ...,    0,    0,   34],
       [   0,    0,    0, ...,    0,   34,  417],
       [   0,    0,    0, ...,   34,  417,  877],
       ...,
       [   0,    0,    0, ..., 3209,  493,  493],
       [   0,    0,    0, ...,  493,  493, 3210],
       [   0,    0,    0, ...,  493, 3210,   15]], dtype=int32)

**Building LSTM** **model**

In [27]:
model = Sequential()
model.add(Embedding(total_words,100,input_length = max_seq_length-1))
model.add(Bidirectional(LSTM(150,return_sequences=True)))
model.add(Dropout(0.3))
model.add(Bidirectional(LSTM(200,return_sequences=True)))
model.add(Dropout(0.3))
model.add(Bidirectional(LSTM(150,return_sequences=True)))
model.add(Dropout(0.3))
model.add(Bidirectional(LSTM(96)))
model.add(Dense(total_words//2, activation='relu'))
model.add(Dense(total_words, activation='softmax'))

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 10, 100)           321100    
                                                                 
 bidirectional_4 (Bidirectio  (None, 10, 300)          301200    
 nal)                                                            
                                                                 
 dropout_3 (Dropout)         (None, 10, 300)           0         
                                                                 
 bidirectional_5 (Bidirectio  (None, 10, 400)          801600    
 nal)                                                            
                                                                 
 dropout_4 (Dropout)         (None, 10, 400)           0         
                                                                 
 bidirectional_6 (Bidirectio  (None, 10, 300)         

**Training**

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam')
model.fit(xs,ys,epochs=150,verbose=1)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

<keras.callbacks.History at 0x7f23bb6ca8d0>

In [22]:
import pickle

In [33]:
from keras.models import load_model
new_model = load_model('/content/drive/MyDrive/Shakespearre_model/Model_150_epochs/')

In [34]:
# new_model.compile(loss='categorical_crossentropy', optimizer='adam')
new_model.fit(xs,ys,epochs=150,verbose=1)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

<keras.callbacks.History at 0x7f17649ed7d0>

In [35]:
new_model.save('/content/drive/MyDrive/Shakespearre_model/Model_300_epochs')



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Shakespearre_model/Model_300_epochs/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Shakespearre_model/Model_300_epochs/assets


In [None]:
def generate_text(seed_text, next_words):
  for _ in range(next_words):
    token_list = tokenizer.texts_to_sequences([seed_text])[0]
    token_list = pad_sequences([token_list], maxlen=max_seq_length-1, padding='pre')
    predict_x=model.predict(token_list) 
    predicted=np.argmax(predict_x,axis=1)
    # predicted = model.predict_classes(token_list, verbose=0)
    output_word = ""
    for word, index in tokenizer.word_index.items():
      if index == predicted:
        output_word = word
        break
    seed_text += " " + output_word

  print(seed_text)
  return seed_text

In [None]:
seed_text = 'Thyself thy foe'
next_words = 10
generated_text = generate_text(seed_text, next_words)

Thyself thy foe to thy sweet self prove bear sense burn sense burn


In [None]:
%cd /content/drive/MyDrive/

/content/drive/MyDrive


In [None]:
!mkdir Shakespearre_model

mkdir: cannot create directory ‘Shakespearre_model’: File exists


In [None]:
%cd /content/drive/MyDrive/Shakespearre_model/

/content/drive/MyDrive/Shakespearre_model


In [None]:
model.save('/content/drive/MyDrive/Shakespearre_model/Model_150_epochs')



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Shakespearre_model/Model_150_epochs/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Shakespearre_model/Model_150_epochs/assets
