<a href="https://colab.research.google.com/github/KelseyNager/GenAI/blob/main/Problem1a.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#LSTM
##Kelsey Nager
##CSC 330

#0 Parameters

In [1]:
import numpy as np
import json
import re
import string

import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, losses

In [59]:
VOCAB_SIZE = 20000
MAX_LEN = 150
EMBEDDING_DIM = 100
VALIDATION_SPLIT = 0.2
SEED = 42
LOAD_MODEL = False
BATCH_SIZE = 64
EPOCHS = 25

#1 Data Collection and Preparation

In [21]:
import requests
import re


def trim_book_content(book_content, start, end):
    """Trims the beginning and end of book content using markers."""
    start_match = re.search(re.escape(start), book_content)
    end_match = re.search(re.escape(end), book_content)

    print(f"Start match found: {start_match is not None}")  # Check if start marker is found
    print(f"End match found: {end_match is not None}")    # Check if end marker is found

    if start_match and end_match:
        start_index = start_match.end()
        end_index = end_match.start()
        trimmed_content = book_content[start_index:end_index]
        return trimmed_content
    return ""


# Download each text file and append to all_books
urls = [
"https://www.gutenberg.org/files/71865/71865-0.txt",  # Mrs Dalloway, Virginia Woolf
"https://www.gutenberg.org/files/144/144-0.txt",   # The Voyage Out, Virginia Woolf
"https://www.gutenberg.org/files/64457/64457-0.txt"   # The Common Reader, Virginia Woolf
      ]

start = "*** START OF THE PROJECT GUTENBERG EBOOK"
end = "*** END OF THE PROJECT GUTENBERG EBOOK"

all_books = ""

# Save combined text to a single file
for url in urls:
  response = requests.get(url)
  book_content = response.text
  trimmed_text = trim_book_content(book_content, start, end)
  all_books += trimmed_text + "\n\n"

with open('all_books_trimmed.txt', 'w', encoding='utf-8') as file:
    file.write(all_books)

Start match found: True
End match found: True
Start match found: True
End match found: True
Start match found: True
End match found: True


In [22]:
with open("all_books_trimmed.txt", "r", encoding="utf-8") as file:
    all_books = file.read()

# Split the text into lines
book_data = all_books.split("\n")

#filtered_data represents all three combined, filtered Vrignia Woolf books splint into lines
filtered_data = [
    "Text: " + line
    for line in book_data
    if line.strip()
]

In [23]:
# Display an example word
example = filtered_data[100]
example

'Text: Elizabeth), and she, too, loving it as she did with an absurd and'

In [24]:
def pad_punctuation(s):
    s = re.sub(f"([{string.punctuation}])", r" \1 ", s)  # Pad punctuation
    s = re.sub(" +", " ", s)
    s = s.lower()  # Convert to lowercase for consistency
    return s

text_data = [pad_punctuation(s) for s in filtered_data]

In [25]:
print(f"Number of lines of text of filtered data: {len(filtered_data)}")

Number of lines of text of filtered data: 24761


In [26]:
#same example as earlier, now with padded punctuation and lowercase letters
example_data = text_data[100]
example_data

'text : elizabeth ) , and she , too , loving it as she did with an absurd and'

In [27]:
# Convert to a Tensorflow Dataset
text_ds = (
    tf.data.Dataset.from_tensor_slices(text_data)
    .batch(BATCH_SIZE)
    .shuffle(1000)
)

In [28]:
#example of lines
for example in text_ds.take(1):
       print(example)

tf.Tensor(
[b'text : random chatter about some book just out an authority now entirely to'
 b'text : seek . the diverse schools would have debated as hotly as ever , but at'
 b'text : the back of every reader\xe2\x80\x99s mind would have been the consciousness that'
 b'text : there was at least one man who kept the main principles of literature'
 b'text : closely in view ; who , if you had taken to him some eccentricity of the'
 b'text : moment , would have brought it into touch with permanence and tethered it'
 b'text : by his own authority in the contrary blasts of praise and blame . [ 15 ] but'
 b'text : when it comes to the making of a critic , nature must be generous and'
 b'text : society ripe . the scattered dinner - tables of the modern world , the chase'
 b'text : and eddy of the various currents which compose the society of our time , '
 b'text : could only be dominated by a giant of fabulous dimensions . and where is'
 b'text : even the very tall man whom we have the right t

In [29]:
# Create a vectorization layer
vectorize_layer = layers.TextVectorization(
    standardize="lower",
    max_tokens=VOCAB_SIZE + 1,
    output_mode="int",
    output_sequence_length=MAX_LEN + 1,
)

In [30]:
# Adapt the layer to the training set
vectorize_layer.adapt(text_ds)
vocab = vectorize_layer.get_vocabulary()
print("Vocabulary size:", len(vocab))

Vocabulary size: 18612


In [31]:
# Display some token:word mappings
for i, word in enumerate(vocab[:10]):
    print(f"{i}: {word}")

0: 
1: [UNK]
2: :
3: text
4: ,
5: the
6: .
7: and
8: of
9: to


In [32]:
# Create the training set of book content and the same text shifted by one word
def prepare_inputs(text):
    text = tf.expand_dims(text, -1)
    tokenized_sentences = vectorize_layer(text)
    x = tokenized_sentences[:, :-1]
    return x, tokenized_sentences[:, 1:]

train_ds = text_ds.map(prepare_inputs)

# Single-Layer LSTM

In [33]:
inputs = layers.Input(shape=(None,), dtype="int32")
x = layers.Embedding(VOCAB_SIZE, EMBEDDING_DIM)(inputs)
x = layers.LSTM(128, return_sequences=True, dropout=0.2)(x)
outputs = layers.Dense(VOCAB_SIZE, activation="softmax")(x)
lstm_1 = models.Model(inputs, outputs)
lstm_1.summary()

#Training Single-Layer LSTM

In [34]:
loss_fn = losses.SparseCategoricalCrossentropy()
lstm_1.compile("adam", loss_fn)

In [39]:
# Create a TextGenerator checkpoint
class TextGenerator(callbacks.Callback):
    def __init__(self, index_to_word, top_k=10):
        self.index_to_word = index_to_word
        self.word_to_index = {
            word: index
            for index, word in enumerate(index_to_word)
        }

    def sample_from(self, probs, temperature):
        if isinstance(probs, (float, np.float64)):  # Check if probs is a single value
            probs = np.array([probs, 1 - probs])  # Create a 2-element distribution
        probs = probs ** (1 / temperature)
        probs = probs / np.sum(probs)
        return np.random.choice(len(probs), p=probs), probs


    def generate(self, start_prompt, max_tokens, temperature):
        sample_token = None
        info = []
        while len([
            self.word_to_index.get(x, 1) for x in start_prompt.split()
        ]) < max_tokens and sample_token != 0:
            y = self.model.predict(np.array([[
            self.word_to_index.get(x, 1) for x in start_prompt.split()
        ]]))
            sample_token, probs = self.sample_from(y[0][-1], temperature)

            if 0 <= sample_token < len(self.index_to_word):  # Check if sample_token is within range
              start_prompt = start_prompt + " " + self.index_to_word[sample_token]
              info.append({"prompt": start_prompt, "word_probs": probs})
              [
            self.word_to_index.get(x, 1) for x in start_prompt.split()
        ].append(sample_token)
            else:
              # Handle case where sample_token is out of range
              print(f"Warning: sample_token out of range: {sample_token}")
              break
            start_prompt = start_prompt + " " + self.index_to_word[sample_token]
            info.append({"prompt": start_prompt, "word_probs": probs})
            [
            self.word_to_index.get(x, 1) for x in start_prompt.split()
        ].append(sample_token)
        print(f"\ngenerated text:\n{start_prompt}\n")
        return info

    def on_epoch_end(self, epoch, logs=None):
      try:
        prompts = ('the meaning of life', 'it is an awful')
        prompt = np.random.choice(prompts)
        self.generate(prompt, max_tokens=100, temperature=.5)
      except Exception as e:
        print(f"Error during text generation: {e}")

In [40]:
# Tokenize starting prompt

text_generator = TextGenerator(vocab)

In [41]:
lstm_1.fit(
    train_ds,
    epochs=EPOCHS,
    callbacks=[text_generator],
)

Epoch 1/25
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step

generated text:
it is an awful intellectuals intellectuals jingle jingle

[1m387/387[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 150ms/step - loss: 0.5625
Epoch 2/25
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step

generated text:
the meaning of life vinraces vinraces  

[1m387/387[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 150ms/step - loss: 0.5470
Epoch 3/25
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step

generated text:
it is an awful

[1m387/387[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 151ms/step - loss: 0.5354
Epoch 4/25
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1

<keras.src.callbacks.history.History at 0x7f0b896717e0>

#Text Generation
##with Single Layer LSTM

In [42]:
def print_probs(info, vocab, top_k=5):
    for i in info:
        print(f"\nPROMPT: {i['prompt']}")
        word_probs = i["word_probs"]
        p_sorted = np.sort(word_probs)[::-1][:top_k]
        i_sorted = np.argsort(word_probs)[::-1][:top_k]
        for p, i in zip(p_sorted, i_sorted):
            if 0 <= i < len(vocab):
                print(f"{vocab[i]}:   \t{np.round(100*p,2)}%")
            else:
                print(f"Index {i} out of range for vocabulary (size: {len(vocab)})") # Print error message
        print("--------\n")

Prompt 1, Various Temperatures

In [43]:
info = text_generator.generate(
    start_prompt="the meaning of life is", max_tokens=10, temperature=.2
)
print_probs(info, vocab)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 112ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step

generated text:
the meaning of life is tretys tretys , , and and


PROMPT: the meaning of life is tretys
laced:   	0.48%
famously:   	0.46%
cakes:   	0.46%
11:   	0.43%
voyage—china:   	0.42%
--------


PROMPT: the meaning of life is tretys tretys
laced:   	0.48%
famously:   	0.46%
cakes:   	0.46%
11:   	0.43%
voyage—china:   	0.42%
--------


PROMPT: the meaning of life is tretys tretys ,
,:   	85.91%
;:   	11.17%
:   	1.95%
.:   	0.69%
?:   	0.19%
--------


PROMPT: the meaning of life is tretys tretys , ,
,:   	85.91%
;:   	11.17%
:   	1.95%
.:   	0.69%
?:   	0.19%
--------


PROMPT: the meaning of life is tretys tretys , , and
and:   	69.83%
which:   	21.35%
”:   	3.27%
or:   	1.68%
but:   	1.25%
--------


PROMPT: the meaning of life is tretys tretys , , and and
and:

In [44]:
info = text_generator.generate(
    start_prompt="the meaning of life is", max_tokens=10, temperature=0.5
)
print_probs(info, vocab)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step

generated text:
the meaning of life is true true rated rated


PROMPT: the meaning of life is true
_:   	2.75%
a:   	0.74%
the:   	0.52%
that:   	0.44%
like:   	0.4%
--------


PROMPT: the meaning of life is true true
_:   	2.75%
a:   	0.74%
the:   	0.52%
that:   	0.44%
like:   	0.4%
--------


PROMPT: the meaning of life is true true rated
footnote:   	0.66%
8:   	0.35%
1:   	0.32%
7:   	0.31%
6:   	0.3%
--------


PROMPT: the meaning of life is true true rated rated
footnote:   	0.66%
8:   	0.35%
1:   	0.32%
7:   	0.31%
6:   	0.3%
--------



In [45]:
info = text_generator.generate(
    start_prompt="the meaning of life is", max_tokens=30, temperature=0.9)
print_probs(info, vocab)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step

generated text:
the meaning of life is “leave “leave capable capable surrendered surrendered profitably profitably voltaire voltaire straighten straighten speedily speedily  


PROMPT: the meaning of life is “leave
_:   	0.06%
captured:   	0.05%
published:   	0.04%
calming:   	0.04%
4:   	0.03%
--------


PROMPT: the meaning of life is “leave “leave
_:   	0.06%
captured:   	0.05%
published:   	0.04%
calming:   	0.04%
4:

Prompt 2, Various Temperatures

In [58]:
info = text_generator.generate(
    "it was an awful", max_tokens=15, temperature=.6
)
print_probs(info, vocab)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step

generated text:
it was an awful cowardly cowardly privé” privé” voluminous voluminous ὲν ὲν


PROMPT: it was an awful cowardly
organ:   	0.2%
submerged:   	0.15%
suffrage:   	0.13%
opulent:   	0.13%
representative:   	0.12%
--------


PROMPT: it was an awful cowardly cowardly
organ:   	0.2%
submerged:   	0.15%
suffrage:   	0.13%
opulent:   	0.13%
representative:   	0.12%
--------


PROMPT: it was an awful cowardly cowardly privé”
morrow:   	0.12%
opulent:   	0.07%
allowances:   	0.06%
grossly:   	0.06%
much—everything—in:   	0.06%
--------


PROMPT: it was an awful cowardly cowardly privé” privé”
morrow:   	0.12%
opulent:   	0.07%


In [47]:
info = text_generator.generate(
    "it was an awful", max_tokens=50, temperature=0.3
)
print_probs(info, vocab)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21

In [57]:
info = text_generator.generate(
    "it was an awful", max_tokens=15, temperature=0.1
)
print_probs(info, vocab)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step

generated text:
it was an awful opulent opulent tatler tatler footnote footnote register register austen’s austen’s footnote footnote


PROMPT: it was an awful opulent
opulent:   	99.17%
elaborate:   	0.2%
iceblock:   	0.11%
submerged:   	0.1%
communs:   	0.06%
--------


PROMPT: it was an awful opulent opulent
opulent:   	99.17%
elaborate:   	0.2%
iceblock:   	0.11%
submerged:   	0.1%
communs:   	0.06%
--------


PROMPT: it was an awful opulent opulent tatler
tatler:   	63.69%
euphrosyne:   	14.78%
odyssey:   	6.82%
religio:   	6.82%
inferno:   	3.21%
--------

#Evaluation of Text Generation with Single LSTM

#Multi-Layer LSTM


In [60]:
inputs = layers.Input(shape=(None,), dtype="int32")
x = layers.Embedding(VOCAB_SIZE, EMBEDDING_DIM)(inputs)
x = layers.LSTM(128, return_sequences=True, dropout=0.3)(x)
x = layers.LSTM(64, return_sequences=True, dropout=0.3)(x)
x = layers.LSTM(32, return_sequences=True, dropout=0.3)(x)
outputs = layers.Dense(VOCAB_SIZE, activation="softmax")(x)
lstm_2 = models.Model(inputs, outputs)
lstm_2.summary()

#Training Multi-Layer LSTM

In [61]:
loss_fn = losses.SparseCategoricalCrossentropy()
lstm_2.compile("adam", loss_fn)

In [62]:
# Create a TextGenerator checkpoint
class TextGenerator(callbacks.Callback):
    def __init__(self, index_to_word, top_k=10):
        self.index_to_word = index_to_word
        self.word_to_index = {
            word: index
            for index, word in enumerate(index_to_word)
        }

    def sample_from(self, probs, temperature):
        if isinstance(probs, (float, np.float64)):  # Check if probs is a single value
            probs = np.array([probs, 1 - probs])  # Create a 2-element distribution
        probs = probs ** (1 / temperature)
        probs = probs / np.sum(probs)
        return np.random.choice(len(probs), p=probs), probs


    def generate(self, start_prompt, max_tokens, temperature):
        start_tokens = [
            self.word_to_index.get(x, 1) for x in start_prompt.split()
        ]
        sample_token = None
        info = []
        while len(start_tokens) < max_tokens and sample_token != 0:
            y = self.model.predict(np.array([start_tokens]))
            sample_token, probs = self.sample_from(y[0][-1], temperature)
            if 0 <= sample_token < len(self.index_to_word):  # Check if sample_token is within range
              start_prompt = start_prompt + " " + self.index_to_word[sample_token]
              info.append({"prompt": start_prompt, "word_probs": probs})
              start_tokens.append(sample_token)
            else:
              # Handle case where sample_token is out of range
              print(f"Warning: sample_token out of range: {sample_token}")
            start_prompt = start_prompt + " " + self.index_to_word[sample_token]
            info.append({"prompt": start_prompt, "word_probs": probs})
            start_tokens.append(sample_token)
        print(f"\ngenerated text:\n{start_prompt}\n")
        return info

    def on_epoch_end(self, epoch, logs=None):
      try:
        prompts = ('the meaning of life', 'it is an awful')
        prompt = np.random.choice(prompts)
        self.generate(prompt, max_tokens=100, temperature=.5)
      except Exception as e:
        print(f"Error during text generation: {e}")

In [63]:
# Tokenize starting prompt

text_generator = TextGenerator(vocab)

In [64]:
lstm_2.fit(
    train_ds,
    epochs=EPOCHS,
    callbacks=[text_generator],
)

Epoch 1/25
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 457ms/step

generated text:
it is an awful  

[1m387/387[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 98ms/step - loss: 5.1032
Epoch 2/25
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step

generated text:
the meaning of life  

[1m387/387[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 97ms/step - loss: 0.7267
Epoch 3/25
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 327ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s

<keras.src.callbacks.history.History at 0x7f0b8935d690>

#Text Generation
##with Multi-Layer LSTM

In [65]:
def print_probs(info, vocab, top_k=5):
    for i in info:
        print(f"\nPROMPT: {i['prompt']}")
        word_probs = i["word_probs"]
        p_sorted = np.sort(word_probs)[::-1][:top_k]
        i_sorted = np.argsort(word_probs)[::-1][:top_k]
        for p, i in zip(p_sorted, i_sorted):
            if 0 <= i < len(vocab):
                print(f"{vocab[i]}:   \t{np.round(100*p,2)}%")
            else:
                print(f"Index {i} out of range for vocabulary (size: {len(vocab)})") # Print error message
        print("--------\n")

Prompt 1 with Various Temperatures

In [73]:
info = text_generator.generate(
    start_prompt="the meaning of life is", max_tokens=10, temperature=.1
)
print_probs(info, vocab)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step

generated text:
the meaning of life is delightful delightful prove prove ways ways


PROMPT: the meaning of life is delightful
been:   	0.77%
very:   	0.17%
little:   	0.11%
seen:   	0.11%
footnote:   	0.08%
--------


PROMPT: the meaning of life is delightful delightful
been:   	0.77%
very:   	0.17%
little:   	0.11%
seen:   	0.11%
footnote:   	0.08%
--------


PROMPT: the meaning of life is delightful delightful prove
footnote:   	0.17%
been:   	0.16%
little:   	0.06%
seen:   	0.06%
very:   	0.06%
--------


PROMPT: the meaning of life is delightful delightful prove prove
footnote:   	0.17%
been:   	0.16%
little:   	0.06%
seen:   	0.06%
very:   	0.06%
--------


PROMPT: the meaning of life is delightful delightful prove prove ways
very:   	0.96%
little:   	0.72%
been:   	

In [67]:
info = text_generator.generate(
    start_prompt="the meaning of life", max_tokens=10, temperature=.5
)
print_probs(info, vocab)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step

generated text:
the meaning of life dramatic dramatic : : there there


PROMPT: the meaning of life dramatic
been:   	0.01%
footnote:   	0.01%
seen:   	0.01%
heard:   	0.01%
thought:   	0.01%
--------


PROMPT: the meaning of life dramatic dramatic
been:   	0.01%
footnote:   	0.01%
seen:   	0.01%
heard:   	0.01%
thought:   	0.01%
--------


PROMPT: the meaning of life dramatic dramatic :
::   	99.98%
of:   	0.0%
but:   	0.0%
to:   	0.0%
and:   	0.0%
--------


PROMPT: the meaning of life dramatic dramatic : :
::   	99.98%
of:   	0.0%
but:   	0.0%
to:   	0.0%
and:   	0.0%
--------


PROMPT: the meaning of life dramatic dramatic : : there
the:   	34.73%
and:   	9.31%
of:   	6.34%
she:   	4.43%
to:   	4.05%
--------


PROMPT: the meaning of life dramatic dramatic : : there th

In [69]:
info = text_generator.generate(
    start_prompt="the meaning of life", max_tokens=10, temperature=.9
)
print_probs(info, vocab)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step

generated text:
the meaning of life prognosticated prognosticated : : tell tell


PROMPT: the meaning of life prognosticated
been:   	0.01%
little:   	0.01%
very:   	0.01%
footnote:   	0.01%
great:   	0.01%
--------


PROMPT: the meaning of life prognosticated prognosticated
been:   	0.01%
little:   	0.01%
very:   	0.01%
footnote:   	0.01%
great:   	0.01%
--------


PROMPT: the meaning of life prognosticated prognosticated :
::   	99.99%
but:   	0.0%
“i:   	0.0%
and:   	0.0%
“and:   	0.0%
--------


PROMPT: the meaning of life prognosticated prognosticated : :
::   	99.99%
but:   	0.0%
“i:   	0.0%
and:   	0.0%
“and:   	0.0%
--------


PROMPT: the meaning of life prognosticated prognosticated : : tell
the:   	13.39%
she:   	7.86%
he:   	5.55%
they:   	2.9%
it:   	2.86%
----

Prompt 2 with Various Temperatures

In [74]:
info = text_generator.generate(
    start_prompt="the meaning of life", max_tokens=10, temperature=.1
)
print_probs(info, vocab)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step

generated text:
the meaning of life scarcely scarcely olenin olenin : :


PROMPT: the meaning of life scarcely
little:   	0.36%
been:   	0.32%
very:   	0.3%
great:   	0.2%
good:   	0.15%
--------


PROMPT: the meaning of life scarcely scarcely
little:   	0.36%
been:   	0.32%
very:   	0.3%
great:   	0.2%
good:   	0.15%
--------


PROMPT: the meaning of life scarcely scarcely olenin
::   	1.56%
little:   	0.54%
very:   	0.4%
great:   	0.27%
good:   	0.19%
--------


PROMPT: the meaning of life scarcely scarcely olenin olenin
::   	1.56%
little:   	0.54%
very:   	0.4%
great:   	0.27%
good:   	0.19%
--------


PROMPT: the meaning of life scarcely scarcely olenin olenin :
::   	100.0%
“i:   	0.0%
but:   	0.0%
“and:   	0.0%
“but:   	0.0%
--------


PROMPT: the meaning of life sc

In [75]:
info = text_generator.generate(
    start_prompt="the meaning of life", max_tokens=10, temperature=.5
)
print_probs(info, vocab)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step

generated text:
the meaning of life nieces nieces authors’ authors’ walker walker


PROMPT: the meaning of life nieces
footnote:   	0.01%
been:   	0.01%
little:   	0.01%
ascent:   	0.01%
telegram:   	0.01%
--------


PROMPT: the meaning of life nieces nieces
footnote:   	0.01%
been:   	0.01%
little:   	0.01%
ascent:   	0.01%
telegram:   	0.01%
--------


PROMPT: the meaning of life nieces nieces authors’
very:   	0.02%
little:   	0.01%
great:   	0.01%
good:   	0.01%
more:   	0.01%
--------


PROMPT: the meaning of life nieces nieces authors’ authors’
very:   	0.02%
little:   	0.01%
great:   	0.01%
good:   	0.01%
more:   	0.01%
--------


PROMPT: the meaning of life nieces nieces authors’ authors’ walker
::   	0.73%
of:   	0.04%
into:   	0.04%
who:   	0.03%
between:   	0.03

In [76]:
info = text_generator.generate(
    start_prompt="it was an awful", max_tokens=10, temperature=.8
)
print_probs(info, vocab)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step

generated text:
it was an awful ties ties terms terms trade trade


PROMPT: it was an awful ties
footnote:   	0.01%
standstill:   	0.01%
main:   	0.01%
goal:   	0.01%
wives:   	0.01%
--------


PROMPT: it was an awful ties ties
footnote:   	0.01%
standstill:   	0.01%
main:   	0.01%
goal:   	0.01%
wives:   	0.01%
--------


PROMPT: it was an awful ties ties terms
::   	0.01%
little:   	0.01%
very:   	0.01%
great:   	0.01%
man:   	0.01%
--------


PROMPT: it was an awful ties ties terms terms
::   	0.01%
little:   	0.01%
very:   	0.01%
great:   	0.01%
man:   	0.01%
--------


PROMPT: it was an awful ties ties terms terms trade
footnote:   	0.01%
been:   	0.01%
wives:   	0.01%
very:   	0.01%
little:   	0.01%
--------


PROMPT: it was an awful ties ties terms terms trade trade

#Evaluation of Text Generation with Multi-Layer LSTM