In [None]:
import os
import re
import random
import urllib.request
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import time

shakespeare = urllib.request.urlopen('https://raw.githubusercontent.com/lakigigar/Caltech-CS155-2021/main/projects/project3/data/shakespeare.txt').read().decode('utf-8')



# LSTM Model Data Processing and Poem Generation


Break training data into 40-character sequences

In [None]:
# LSTM Model Definition 
# Based on Jason Brownle's approach 

window_size = 40
skip_len = 1

# Split training data into len_seqs-length character sequences
def build_char_seqs(data, n_freq, len_seqs=40):
# len_seqs is length of training sequences, starting at every n_freq character
  
  # Put all text in single line
  # Each line is separated by "\n"
  # Each poem is separted by " \n"
  # Normalize to lowercase letters
  
  data = re.sub('\r\n\r\n\r\n ','',data)
  tokens = re.findall(r'\S+|\n',data)
  data = ' '.join(tokens).lower()
  raw_text = data
  # Remove all numbers
  training_data = ''.join([i for i in raw_text if not i.isdigit()])
  # Remove all punctuations
  training_data = training_data.replace(',', '')
  training_data = training_data.replace('.', '')
  training_data = training_data.replace('?', '')
  training_data = training_data.replace('!', '')
  training_data = training_data.replace(':', '')
  training_data = training_data.replace(';', '')
  training_data = training_data.replace('(', '')
  training_data = training_data.replace(')', '')

  sequences = []
  
  # Create len_seqs-length character sequences
  for i in range(len_seqs, len(training_data), n_freq):
  
    sequences.append(training_data[i-len_seqs:i+1])

  return training_data, sequences

processed_text, sequences = build_char_seqs(shakespeare,n_freq=skip_len, len_seqs=window_size)

print('Total Sequences: %d' % len(sequences))
print(sequences[0:2])
print(processed_text[0:500])
processed_text[0:500]


Total Sequences: 95429
[' \n from fairest creatures we desire incre', '\n from fairest creatures we desire increa']
 
 from fairest creatures we desire increase 
 that thereby beauty's rose might never die 
 but as the riper should by time decease 
 his tender heir might bear his memory 
 but thou contracted to thine own bright eyes 
 feed'st thy light's flame with self-substantial fuel 
 making a famine where abundance lies 
 thy self thy foe to thy sweet self too cruel 
 thou that art now the world's fresh ornament 
 and only herald to the gaudy spring 
 within thine own bud buriest thy content 
 and tender


" \n from fairest creatures we desire increase \n that thereby beauty's rose might never die \n but as the riper should by time decease \n his tender heir might bear his memory \n but thou contracted to thine own bright eyes \n feed'st thy light's flame with self-substantial fuel \n making a famine where abundance lies \n thy self thy foe to thy sweet self too cruel \n thou that art now the world's fresh ornament \n and only herald to the gaudy spring \n within thine own bud buriest thy content \n and tender"

Encode character sequences as integers -> one hot, break into training input and labels

In [None]:
lines = sequences

# Create mapping dictionary to encode characters as integers
chars = sorted(list(set(processed_text)))
mapping = dict((c, i) for i, c in enumerate(chars))

# For each 40-character input, convert it to integers from dict
encoded_seqs = []
for line in lines:
 encoded_seqs.append([mapping[char] for char in line])

# Get vocabulary size
vocab_size = len(mapping)
print('Vocabulary Size: %d' % vocab_size)


# Create input and output, where output is single next character in seq
encoded_seqs = np.array(encoded_seqs)
X, y = encoded_seqs[:,:-1], encoded_seqs[:,-1]


# One-hot encode the integer sequences
int_sequences = [tf.keras.utils.to_categorical(x, num_classes=vocab_size) for x in X]
X = np.array(int_sequences)
y = tf.keras.utils.to_categorical(y, num_classes=vocab_size)

print(mapping)


Vocabulary Size: 30
{'\n': 0, ' ': 1, "'": 2, '-': 3, 'a': 4, 'b': 5, 'c': 6, 'd': 7, 'e': 8, 'f': 9, 'g': 10, 'h': 11, 'i': 12, 'j': 13, 'k': 14, 'l': 15, 'm': 16, 'n': 17, 'o': 18, 'p': 19, 'q': 20, 'r': 21, 's': 22, 't': 23, 'u': 24, 'v': 25, 'w': 26, 'x': 27, 'y': 28, 'z': 29}


#Build and fit LSTM model
##higher temperature -> softer destribution -> less confident -> diverse generation
##lower temperature -> sharper destribution -> more confident -> repetitive generation

In [None]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

tf.config.list_physical_devices('GPU')

Num GPUs Available:  1


[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [None]:
start_time = time.time()

temperature = 0.25

# Define LSTM Model
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(200, input_shape=(X.shape[1], X.shape[2])))

# Use a lambda layer to scale the output array by temperature
model.add(tf.keras.layers.Lambda(lambda x: x / temperature))
model.add(tf.keras.layers.Dense(vocab_size, activation='softmax'))
print(model.summary())

# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Fit model
model.fit(X, y, epochs=50)

print("--- %s seconds used ---" % (time.time() - start_time))


Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_5 (LSTM)                (None, 200)               184800    
_________________________________________________________________
lambda_5 (Lambda)            (None, 200)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 30)                6030      
Total params: 190,830
Trainable params: 190,830
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/5

# Poem Generation

In [None]:
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical

# generate a sequence of characters with a language model
def generate_seq(model, mapping, seq_length, seed_text, n_chars):
    in_text = seed_text

    # generate a fixed number of characters
    for _ in range(n_chars):
        # encode the characters as integers
        encoded = [mapping[char] for char in in_text]

        # truncate sequences to a fixed length
        encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')

        # one hot encode
        encoded = to_categorical(encoded, num_classes=len(mapping))
        # predict character, output the one with highest probability
        yhat = model.predict_classes(encoded, verbose=0)
        # reverse map integer to character
        out_char = ''
        for char, index in mapping.items():
            if index == yhat:
                out_char = char
                break
        # append to input
        in_text += char
    return in_text


# Try seeding with the first 40 characters of training data (seed can be any string)
seed = "shall i compare thee to a summer's day" 
raw_poem = generate_seq(model, mapping, window_size, seed, 1000)
print(raw_poem)





shall i compare thee to a summer's day 
 thou bring for thou art force what were not see dies 
 by thy dead fleech and despite thy self decease 
 her it alteren that thy soul that thou 
 rocks are in this his give men's eyes can lend 
 that be to thee and common place sorit 
 could do womadsing and is posesson state 
 and in the stormy gusts of memory 
 my music handow and there beseem every the winds 
 so all my argument tall not praise that tong 
 hape your seaker shape is my still to say 
 to thee and common place should prove thie  
 since say mine we abundatiplion in 
 on your trosph into this praise the worst 
 and therefore of their styll 's sin approud 
 that my staile that merich and every show 
 the mation caul being fulst of graces part 
 and salve my love's spite i wit you awand 
 or thou art conterpesate 
 which in thy couts day so pier in spill 
 to see him those bluch and sweetless absence 
 have that i compise to thy constancy 
 so thou be rich nor recopsy with thy stay