In [1]:
import random 
import pickle
import re

import numpy as np
import pandas as pd
from nltk.tokenize import RegexpTokenizer

from tensorflow.keras.models import Sequential,load_model
from tensorflow.keras.layers import LSTM,Dense,Activation
from tensorflow.keras.optimizers import RMSprop

In [2]:
with open('1661-0.txt', 'r', encoding='utf-8') as file:
    text = file.read()

clean_text = re.sub(r'[^a-zA-Z\s]', '', text).lower()


In [3]:
tokenizer = RegexpTokenizer(r'\w+')
tokens = tokenizer.tokenize(clean_text)

In [4]:
unique_tokens = np.unique(tokens)
unique_token_index = {token: idx for idx,token in enumerate(unique_tokens)}
unique_token_index

{'a': 0,
 'abandoned': 1,
 'abhorrent': 2,
 'absorb': 3,
 'accomplished': 4,
 'account': 5,
 'across': 6,
 'actions': 7,
 'active': 8,
 'activity': 9,
 'address': 10,
 'adjusted': 11,
 'adler': 12,
 'admirable': 13,
 'admirably': 14,
 'admit': 15,
 'adventure': 16,
 'adventures': 17,
 'again': 18,
 'against': 19,
 'ago': 20,
 'akin': 21,
 'all': 22,
 'almost': 23,
 'aloud': 24,
 'alternating': 25,
 'always': 26,
 'am': 27,
 'ambition': 28,
 'amiss': 29,
 'among': 30,
 'an': 31,
 'and': 32,
 'anonymous': 33,
 'answered': 34,
 'any': 35,
 'anyone': 36,
 'anywhere': 37,
 'appears': 38,
 'are': 39,
 'armchair': 40,
 'around': 41,
 'arthur': 42,
 'as': 43,
 'asked': 44,
 'associated': 45,
 'at': 46,
 'atkinson': 47,
 'attention': 48,
 'attitude': 49,
 'attracted': 50,
 'author': 51,
 'away': 52,
 'bachelor': 53,
 'baffled': 54,
 'baker': 55,
 'balanced': 56,
 'band': 57,
 'be': 58,
 'because': 59,
 'beeches': 60,
 'been': 61,
 'before': 62,
 'begins': 63,
 'behind': 64,
 'being': 65,
 'beli

In [5]:
n_words = 10
input_words = []
next_words = []

for i in range(len(tokens)-n_words):
    input_words.append(tokens[i:i+n_words])
    next_words.append(tokens[i+n_words])

In [6]:
X = np.zeros((len(input_words),n_words,len(unique_tokens)),dtype = bool)
y = np.zeros((len(next_words),len(unique_tokens)),dtype = bool)

In [7]:
for i,words in enumerate(input_words):
    for j,word in enumerate(words):
        X[i,j,unique_token_index[word]] = 1
    y[i,unique_token_index[next_words[i]]] = 1

In [8]:
model = Sequential()
model.add(LSTM(128,input_shape = (n_words,len(unique_tokens)),return_sequences = True))
model.add(LSTM(128))
model.add(Dense(len(unique_tokens)))
model.add(Activation("softmax"))

  super().__init__(**kwargs)


In [9]:
model.compile(loss = "categorical_crossentropy",optimizer = RMSprop(learning_rate = 0.01),metrics = ["accuracy"])
history = model.fit(X,y,batch_size = 128,epochs = 30,shuffle = True)

Epoch 1/30
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - accuracy: 0.0367 - loss: 6.3781
Epoch 2/30
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.0526 - loss: 5.8078
Epoch 3/30
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.0558 - loss: 5.7900
Epoch 4/30
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.0530 - loss: 5.7235
Epoch 5/30
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.0568 - loss: 5.7214
Epoch 6/30
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step - accuracy: 0.0549 - loss: 5.6100
Epoch 7/30
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step - accuracy: 0.0616 - loss: 5.5428
Epoch 8/30
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step - accuracy: 0.0640 - loss: 5.3906
Epoch 9/30
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━

In [10]:
model.save("mymodel.keras")

In [11]:
model = load_model("mymodel.keras")

In [12]:
def predict_next_word(input_text,n_best):
    input_text = input_text.lower()
    X = np.zeros((1,n_words,len(unique_tokens)))
    for i,word in enumerate(input_text.split()):
        X[0,i,unique_token_index[word]] = 1
    predictions = model.predict(X)[0]
    return np.argpartition(predictions,n_best)[-n_best:]

In [13]:
possible = predict_next_word("He will have to do that thing", 5)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 481ms/step


In [14]:
print([unique_tokens[idx] for idx in possible])

['yet', 'you', 'your', 'abhorrent', 'a']


In [15]:
def generate_text(input_text,text_length,creativity = 3):
    word_sequence = input_text.split()
    current = 0
    for _ in range(text_length):
        sub_sequence = " ".join(tokenizer.tokenize(" ".join(word_sequence).lower())[current:current+n_words])
        try:
            choice = unique_tokens[random.choice(predict_next_word(sub_sequence,creativity))]
        except:
            choice = random.choice(unique_tokens)
        word_sequence.append(choice)
        current+=1
    return " ".join(word_sequence)

In [16]:
generate_text("clothes I can’t imagine how you deduce it",100,5)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19

'clothes I can’t imagine how you deduce it yet yet you yet a adler yourself akin yours yet you a a yet your a a a you a your yours yet a yet yours your yet yet yet you yours again yet adler yet adventures a yet yet a adler london you absorb you yours yet your you yet active as yet yet yours a yet a ago absorb yet your yet your yet yours yet abhorrent a adler yourself across absorb yet absorb yours yours a a akin london you yours yet yours a yet you you yet yours an yours yours your a you your yours'