<a href="https://colab.research.google.com/github/ForestPearson/CS410-510-NLP-project/blob/lstm/project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import LSTM
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import StringLookup
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import GRU
from tensorflow.keras.layers import Embedding
from keras.optimizers import RMSprop
from keras.callbacks import LambdaCallback
from keras.callbacks import ModelCheckpoint
from keras.callbacks import ReduceLROnPlateau
import random
import sys
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)

BATCH_SIZE = 64
BUFFER_SIZE = 10000
EPOCHS = 30
DIM = 256
RNN = 126

path = tf.keras.utils.get_file('combined.txt', 'https://raw.githubusercontent.com/ForestPearson/CS410-510-NLP-project/main/data/combined.txt')

In [None]:
text = open(path, 'rb').read().decode(encoding='utf-8')
print("Length:", len(text))
print(text[:500])

vocab = sorted(set(text))

Length: 389861
ACT I

SCENE I. Rousillon. The COUNT's palace.

Enter BERTRAM, the COUNTESS of Rousillon, HELENA, and LAFEU, all in black
COUNTESS
In delivering my son from me, I bury a second husband.
BERTRAM
And I in going, madam, weep o'er my father's death
anew: but I must attend his majesty's command, to
whom I am now in ward, evermore in subjection.
LAFEU
You shall find of the king a husband, madam; you,
sir, a father: he that so generally is at all times
good must of necessity hold his virtue to you; who


In [None]:
ids_from_charsT = StringLookup(vocabulary=list(vocab), mask_token=None)
chars_from_idsT = StringLookup(vocabulary=ids_from_charsT.get_vocabulary(), invert=True, mask_token=None)
chars_from_ids = dict((c, i) for i, c in enumerate(vocab))
ids_from_chars = dict((i, c) for i, c in enumerate(vocab))
  
print(vocab)

['\n', ' ', '!', '&', "'", ',', '-', '.', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [None]:
seq_length = 100
steps = 5
sequences = []
next_chars = []
for i in range(0, len(text) - seq_length, steps):
    sequences.append(text[i: i + seq_length])
    next_chars.append(text[i + seq_length])

X = np.zeros((len(sequences), seq_length, len(vocab)), dtype = bool)
y = np.zeros((len(sequences), len(vocab)), dtype = bool)
for i, sequence in enumerate(sequences):
    for t, char in enumerate(sequence):
        X[i, t, chars_from_ids[char]] = 1
    y[i, chars_from_ids[next_chars[i]]] = 1

In [None]:
model = Sequential()

#model.add(GRU(128, input_shape =(seq_length, len(vocab))))
model.add(LSTM(RNN, input_shape =(seq_length, len(vocab))))
model.add(Dense(len(vocab)))
model.add(Activation('softmax'))
model.summary()
#model.compile(loss ='categorical_crossentropy', optimizer = RMSprop(learning_rate= 0.01))
model.compile(loss ='categorical_crossentropy', optimizer = 'adam')


Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_4 (LSTM)               (None, 126)               96768     
                                                                 
 dense_4 (Dense)             (None, 65)                8255      
                                                                 
 activation_4 (Activation)   (None, 65)                0         
                                                                 
Total params: 105,023
Trainable params: 105,023
Non-trainable params: 0
_________________________________________________________________


In [None]:
def on_epoch_end(epoch, logs):
    print('\nEpoch:',epoch)

    start_index = random.randint(0, len(text) - seq_length - 1)
  
    for temperature in [0.5]:
        generated = ''
        sentence = text[start_index: start_index + seq_length]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)
  
        for i in range(400):
            x_pred = np.zeros((1, seq_length, len(vocab)))
            for t, char in enumerate(sentence):
                x_pred[0, t, chars_from_ids[char]] = 1.
                
            preds = model.predict(x_pred, verbose = 0)[0]
            preds = np.asarray(preds).astype('float64')
            preds = np.log(preds) / temperature
            exp_preds = np.exp(preds)
            preds = exp_preds / np.sum(exp_preds)
            next_index = np.argmax(np.random.multinomial(1, preds, 1))
            next_char = ids_from_chars[next_index]
            generated += next_char
            sentence = sentence[1:] + next_char
  
            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()
print_callback = LambdaCallback(on_epoch_end = on_epoch_end)

In [None]:
dir = '.\data\epochs.hdf5'
checkpoint = ModelCheckpoint(dir, monitor ='loss',verbose = 1, save_best_only = True,mode ='min')
reduce_alpha = ReduceLROnPlateau(monitor ='loss', factor = 0.2,patience = 1, min_lr = 0.001)
#callbacks = [print_callback, checkpoint, reduce_alpha]

In [None]:
#model.fit(X, y, BATCH_SIZE, epochs = 30, callbacks = callbacks)

model.fit(X, y, BATCH_SIZE, epochs = 50, callbacks=[checkpoint, reduce_alpha])

Epoch 1/50
Epoch 1: loss improved from inf to 2.80427, saving model to .\data\epochs.hdf5
Epoch 2/50
Epoch 2: loss improved from 2.80427 to 2.33262, saving model to .\data\epochs.hdf5
Epoch 3/50
Epoch 3: loss improved from 2.33262 to 2.20147, saving model to .\data\epochs.hdf5
Epoch 4/50
Epoch 4: loss improved from 2.20147 to 2.11315, saving model to .\data\epochs.hdf5
Epoch 5/50
Epoch 5: loss improved from 2.11315 to 2.03989, saving model to .\data\epochs.hdf5
Epoch 6/50
Epoch 6: loss improved from 2.03989 to 1.97906, saving model to .\data\epochs.hdf5
Epoch 7/50
Epoch 7: loss improved from 1.97906 to 1.92680, saving model to .\data\epochs.hdf5
Epoch 8/50
Epoch 8: loss improved from 1.92680 to 1.88201, saving model to .\data\epochs.hdf5
Epoch 9/50
Epoch 9: loss improved from 1.88201 to 1.85844, saving model to .\data\epochs.hdf5
Epoch 10/50
Epoch 10: loss improved from 1.85844 to 1.80864, saving model to .\data\epochs.hdf5
Epoch 11/50
Epoch 11: loss improved from 1.80864 to 1.77573, s

<keras.callbacks.History at 0x1f8e307d240>

In [None]:

def generate(length, temperature):
    #Get the randomly selecting starting sequence
    seed = random.randint(0, len(text) - seq_length - 1)
    generated = ''
    sentence = text[seed: seed + seq_length]
    generated += sentence
    #Predict and apped text based upon the seed
    for i in range(length):
            x_pred = np.zeros((1, seq_length, len(vocab)))
            for t, char in enumerate(sentence):
                x_pred[0, t, chars_from_ids[char]] = 1.
            preds = model.predict(x_pred, verbose = 0)[0]
            
            preds = np.asarray(preds).astype('float64')
            preds = np.log(preds) / temperature
            exp_preds = np.exp(preds)
            preds = exp_preds / np.sum(exp_preds)
            next_index = np.argmax(np.random.multinomial(1, preds, 1))
            
            next_char = ids_from_chars[next_index]
            generated += next_char
            sentence = sentence[1:] + next_char
    return generated
  
print(generate(500, 1.0))

e your sorrow and my grief
Were both extermined.
PHEBE
Thou hast my love: is not that neighbourly?
Sfeazt and Alle d and are to could twose
her name you, good bemaress down frield, bring scort.
Whou, but my hands one lack: and to read bed,
Beforaster.
Exeunt
 hall not a cruef for do, on, our greping
And woman in the ropes offen of more as mickiness for theel
Would: be parsuge, noted as lyol; hark fortun
Becaouce her to tit your lovens and love
I doer goad and paless. Ham.
HER
Why, day you, of her never for Char:
All Grach high deme: befores and all the fails;
FLORIZEL
By coulle. I have my rife
