In [21]:
import os
import sys
import string
import numpy as np
from tensorflow import keras
from pickle import dump, load
from matplotlib import pyplot as plt
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import Dense, LSTM, Embedding, GRU
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [22]:
# test document path
test_doc = 'drive/MyDrive/Newcastle University/Deep Learning/Data/test.txt'

In [23]:
# load doc into memory
def load_doc(filename):
  # open the file as read only
  file = open(filename, 'r')
  # read all text
  text = file.read()
  # close the file
  file.close()
  return text

In [24]:
# function to convert a document into clean tokens
def clean_doc(doc):
	# replacing '--' with a space ' '
	doc = doc.replace('--', ' ')
	# splitting into tokens by white space
	tokens = doc.split()
	# removing punctuation from each token
	table = str.maketrans('', '', string.punctuation)
	tokens = [w.translate(table) for w in tokens]
	# removing non-alphabetic tokens
	tokens = [word for word in tokens if word.isalpha()]
	# converting to lower case
	tokens = [word.lower() for word in tokens]
	return tokens

In [59]:
# generate a sequence from a language model
def generate_seq(model, tokenizer, seq_length, seed_text, n_words):
    result = list()
    in_text = seed_text.copy()
    # generate a fixed number of words
    for _ in range(n_words):
     # encode the text as integer
     encoded = tokenizer.texts_to_sequences([in_text])[0]
     # truncate sequences to a fixed length
     encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')
     # predict probabilities for each word
     #yhat = model.predict_classes(encoded, verbose=0)
     yhat = np.argmax(model.predict(encoded), axis=1)
     # map predicted word index to word
     out_word = ''
     for word, index in tokenizer.word_index.items():
         if index == yhat:
             out_word = word
             break
     # append to input
     in_text += ' ' + out_word
     result.append(out_word)
    return ' '.join(result)

In [60]:
doc = load_doc(test_doc)
print(doc[:300])

It was five o'clock on a winter's morning in Syria. Alongside the platform at Aleppo stood the train grandly designated in railway guides as the Taurus Express. It consisted of a kitchen and dining-car, a sleeping-car and two local coaches.
By the step leading up into the sleeping-car stood a young 


In [61]:
# cleaned text
tokens = clean_doc(doc)

In [62]:
len(tokens)

346

In [85]:
# Length of input sequence
seq_length = 30

# seed test starting point
i = 29

In [86]:
seed_text = tokens[i:i+seq_length]
seed_text = ' '.join(seed_text)
seed_text = [seed_text]

In [87]:
type(seed_text)

list

In [88]:
print(seed_text)

['of a kitchen and diningcar a sleepingcar and two local coaches by the step leading up into the sleepingcar stood a young french lieutenant resplendent in uniform conversing with a']


In [67]:
tokenizer = load(open('drive/MyDrive/Newcastle University/Deep Learning/Models/tokenizer.pkl', 'rb'))

In [68]:
model_1 = load_model('drive/MyDrive/Newcastle University/Deep Learning/Models/Language_Model_1.h5')

In [69]:
model_2 = load_model('drive/MyDrive/Newcastle University/Deep Learning/Models/Language_Model_2.h5')

In [70]:
model_3 = load_model('drive/MyDrive/Newcastle University/Deep Learning/Models/Language_Model_3.h5')

In [71]:
model_4 = load_model('drive/MyDrive/Newcastle University/Deep Learning/Models/Language_Model_4.h5')

### Text Gen Model 1

In [89]:
print(seed_text)

['of a kitchen and diningcar a sleepingcar and two local coaches by the step leading up into the sleepingcar stood a young french lieutenant resplendent in uniform conversing with a']


In [90]:
# generate new text
generated = generate_seq(model_1, tokenizer, seq_length, seed_text, 5)
print(generated)

each from women to to


### Text Gen Model 2

In [91]:
print(seed_text)

['of a kitchen and diningcar a sleepingcar and two local coaches by the step leading up into the sleepingcar stood a young french lieutenant resplendent in uniform conversing with a']


In [92]:
# generate new text
generated = generate_seq(model_2, tokenizer, seq_length, seed_text, 10)
print(generated)

as large thing de de de de de de de


### Text Gen Model 3

In [93]:
print(seed_text)

['of a kitchen and diningcar a sleepingcar and two local coaches by the step leading up into the sleepingcar stood a young french lieutenant resplendent in uniform conversing with a']


In [94]:
# generate new text
generated = generate_seq(model_3, tokenizer, seq_length, seed_text, 10)
print(generated)

the the the the the the the the the the


### Text Gen Model 4

In [95]:
print(seed_text)

['of a kitchen and diningcar a sleepingcar and two local coaches by the step leading up into the sleepingcar stood a young french lieutenant resplendent in uniform conversing with a']


In [96]:
# generate new text
generated = generate_seq(model_4, tokenizer, seq_length, seed_text, 5)
print(generated)

then then then then then
