In [2]:
import tensorflow as tf
import string
import requests
import numpy as np
import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras import backend as K

In [3]:
response = requests.get('https://www.gutenberg.org/files/98/98-0.txt')
data = response.text.split('\n')
data = data[108:]
data = " ".join(data)

In [4]:
def clean_text(doc):
  tokens = doc.split()
  table = str.maketrans('', '', string.punctuation)
  tokens = [w.translate(table) for w in tokens]
  tokens = [word for word in tokens if word.isalpha()]
  tokens = [word.lower() for word in tokens]
  return tokens


In [5]:
tokens = clean_text(data)

In [6]:
train_len = 5+1
text_sequences = []
for i in range(train_len,len(tokens)):
    seq = tokens[i-train_len:i]
    text_sequences.append(seq)

In [7]:
sequences = {}
count = 1
for i in range(len(tokens)):
    if tokens[i] not in sequences:
        sequences[tokens[i]] = count
        count += 1

In [8]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(text_sequences)
sequences = tokenizer.texts_to_sequences(text_sequences)

In [9]:
vocabulary_size = len(tokenizer.word_counts)+1

n_sequences = np.empty([len(sequences),train_len], dtype='int32')
for i in range(len(sequences)):
    n_sequences[i] = sequences[i]

train_inputs = n_sequences[:,:-1]
train_targets = n_sequences[:,-1]
train_targets = to_categorical(train_targets, num_classes=vocabulary_size)
seq_len = train_inputs.shape[1]

In [10]:
def recall_m(train_inputs, train_targets):
    true_positives = K.sum(K.round(K.clip(train_inputs * train_targets, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(train_inputs, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(train_inputs, train_targets):
    true_positives = K.sum(K.round(K.clip(train_inputs * train_targets, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(train_targets, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(train_inputs, train_targets):
    precision = precision_m(train_inputs, train_targets)
    recall = recall_m(train_inputs, train_targets)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))


In [16]:
dependencies={
    'f1_m':f1_m,
    'precision_m':precision_m,
    'recall_m':recall_m}

In [20]:
model=keras.models.load_model("LSTM.h5",custom_objects=dependencies)
input_text = input().strip().lower()
encoded_text = tokenizer.texts_to_sequences([input_text])[0]
pad_encoded = pad_sequences([encoded_text], maxlen=seq_len, truncating='pre')
print(encoded_text, pad_encoded)
for i in (model.predict(pad_encoded)[0]).argsort()[-3:][::-1]:
  pred_word = tokenizer.index_word[i]
  print("Next word suggestion:",pred_word)

My friend is dealing with
[30, 228, 23, 13] [[  0  30 228  23  13]]
Next word suggestion: your
Next word suggestion: this
Next word suggestion: my
