In [19]:
import tensorflow as tf

import numpy as np
import os
import time

### Scrape FDR Fireside chats


In [1]:
import requests
from bs4 import BeautifulSoup
import time

def get_speech_links(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    speech_links = [a['href'] for a in soup.select('.views-field-title a')]
    return ['https://www.presidency.ucsb.edu' + link for link in speech_links]

def scrape_speech(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    text_blocks = soup.select('.field-docs-content p')
    speech_text = '\n'.join(block.text for block in text_blocks)  # Corrected variable name here
    return speech_text

def main():
    base_url = "https://www.presidency.ucsb.edu/advanced-search?field-keywords=&field-keywords2=&field-keywords3=&from%5Bdate%5D=&to%5Bdate%5D=&person2=200288&category2%5B%5D=53&items_per_page=100"
    links = get_speech_links(base_url)
    all_speeches = []

    for link in links:
        try:
            speech_text = scrape_speech(link)
            all_speeches.append(speech_text)
            print(f"Scraped speech from {link}")
            time.sleep(1)
        except Exception as e:
            print(f"Failed to scrape {link}: {e}")

    # we are going to just simply store the speeches in a text file because it will be easier for the RNN to be trained with.
    with open('fireside_chats.txt', 'w', encoding='utf-8') as file:
        file.write("\n\n".join(all_speeches))

    print(f"Total speeches scraped and stored: {len(all_speeches)}")

if __name__ == '__main__':
    main()

Scraped speech from https://www.presidency.ucsb.edu/documents/fireside-chat-banking
Scraped speech from https://www.presidency.ucsb.edu/documents/second-fireside-chat
Scraped speech from https://www.presidency.ucsb.edu/documents/fireside-chat-recovery-program
Scraped speech from https://www.presidency.ucsb.edu/documents/fireside-chat-22
Scraped speech from https://www.presidency.ucsb.edu/documents/fireside-chat-21
Scraped speech from https://www.presidency.ucsb.edu/documents/fireside-chat-20
Scraped speech from https://www.presidency.ucsb.edu/documents/fireside-chat-19
Scraped speech from https://www.presidency.ucsb.edu/documents/fireside-chat-18
Scraped speech from https://www.presidency.ucsb.edu/documents/fireside-chat-17
Scraped speech from https://www.presidency.ucsb.edu/documents/fireside-chat-16
Scraped speech from https://www.presidency.ucsb.edu/documents/fireside-chat-15
Scraped speech from https://www.presidency.ucsb.edu/documents/fireside-chat-14
Scraped speech from https://w

In [2]:
path_to_file = "fireside_chats.txt"

In [3]:
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
print(f'Length of text: {len(text)} characters')

Length of text: 459482 characters


In [4]:
print(text[:250])

[First Fireside Chat]
I want to talk for a few minutes with the people of the United States about banking—with the comparatively few who understand the mechanics of banking but more particularly with the overwhelming majority who use banks for the ma


In [20]:
vocab = sorted(set(text))
print(f'{len(vocab)} unique characters')

80 unique characters


In [32]:
ids_from_chars = tf.keras.layers.StringLookup(
    vocabulary=list(vocab), mask_token=None)

In [34]:
chars_from_ids = tf.keras.layers.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)

In [25]:
def text_from_ids(ids):
  return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

In [26]:
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
all_ids

<tf.Tensor: shape=(459482,), dtype=int64, numpy=array([50, 31, 61, ..., 70, 77,  3])>

In [27]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [28]:
for ids in ids_dataset.take(10):
    print(chars_from_ids(ids).numpy().decode('utf-8'))

[
F
i
r
s
t
 
F
i
r


In [29]:
seq_length = 100


In [36]:
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

for seq in sequences.take(1):
  print(chars_from_ids(seq))

tf.Tensor(
[b'[' b'F' b'i' b'r' b's' b't' b' ' b'F' b'i' b'r' b'e' b's' b'i' b'd'
 b'e' b' ' b'C' b'h' b'a' b't' b']' b'\n' b'I' b' ' b'w' b'a' b'n' b't'
 b' ' b't' b'o' b' ' b't' b'a' b'l' b'k' b' ' b'f' b'o' b'r' b' ' b'a'
 b' ' b'f' b'e' b'w' b' ' b'm' b'i' b'n' b'u' b't' b'e' b's' b' ' b'w'
 b'i' b't' b'h' b' ' b't' b'h' b'e' b' ' b'p' b'e' b'o' b'p' b'l' b'e'
 b' ' b'o' b'f' b' ' b't' b'h' b'e' b' ' b'U' b'n' b'i' b't' b'e' b'd'
 b' ' b'S' b't' b'a' b't' b'e' b's' b' ' b'a' b'b' b'o' b'u' b't' b' '
 b'b' b'a' b'n'], shape=(101,), dtype=string)


In [37]:
for seq in sequences.take(5):
  print(text_from_ids(seq).numpy())

b'[First Fireside Chat]\nI want to talk for a few minutes with the people of the United States about ban'
b'king\xe2\x80\x94with the comparatively few who understand the mechanics of banking but more particularly with th'
b'e overwhelming majority who use banks for the making of deposits and the drawing of checks. I want to'
b' tell you what has been done in the last few days, why it was done, and what the next steps are going'
b' to be. I recognize that the many proclamations from State Capitols and from Washington, the legislat'


In [38]:
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

In [39]:
dataset = sequences.map(split_input_target)

In [40]:
for input_example, target_example in dataset.take(1):
    print("Input :", text_from_ids(input_example).numpy())
    print("Target:", text_from_ids(target_example).numpy())

Input : b'[First Fireside Chat]\nI want to talk for a few minutes with the people of the United States about ba'
Target: b'First Fireside Chat]\nI want to talk for a few minutes with the people of the United States about ban'


In [41]:
BATCH_SIZE = 64

BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

dataset

<_PrefetchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>

In [43]:
import tensorflow as tf
from tensorflow import keras
import kerastuner as kt

def build_model(hp):
    vocab_size = len(ids_from_chars.get_vocabulary())
    embedding_dim = hp.Int('embedding_dim', min_value=32, max_value=512, step=32)
    units = hp.Int('units', min_value=64, max_value=1024, step=64)
    learning_rate = hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')

    inputs = tf.keras.Input(shape=(None,))
    x = tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim, mask_zero=True)(inputs)
    x, _ = tf.keras.layers.GRU(units, return_sequences=True, return_state=True)(x)
    outputs = tf.keras.layers.Dense(vocab_size)(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                  loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True))

    return model


tuner = kt.Hyperband(
    build_model,
    objective='loss',
    max_epochs=10,
    directory='hyperband',
    project_name='text_gen'
)

stop_early = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)

tuner.search(dataset, epochs=50, callbacks=[stop_early])

best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The optimal number of units in the GRU layer is {best_hps.get('units')} and
the optimal learning rate for the optimizer is {best_hps.get('learning_rate')}.
""")


Trial 30 Complete [00h 00m 43s]
loss: 2.1551434993743896

Best loss So Far: 0.9034488201141357
Total elapsed time: 00h 12m 23s

The optimal number of units in the GRU layer is 1024 and
the optimal learning rate for the optimizer is 0.0026263512824869465.



In [44]:
vocab_size = len(ids_from_chars.get_vocabulary())
embedding_dim = 256
rnn_units = 768
rnn_units = 1024

In [45]:
class MyModel(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__(self)
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(rnn_units,
                                   return_sequences=True,
                                   return_state=True)
    self.dense = tf.keras.layers.Dense(vocab_size)

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    if states is None:
      states = self.gru.get_initial_state(x)
    x, states = self.gru(x, initial_state=states, training=training)
    x = self.dense(x, training=training)

    if return_state:
      return x, states
    else:
      return x

In [46]:
model = MyModel(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

In [47]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 81) # (batch_size, sequence_length, vocab_size)


In [48]:
model.summary()

Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     multiple                  20736     
                                                                 
 gru_1 (GRU)                 multiple                  3938304   
                                                                 
 dense_1 (Dense)             multiple                  83025     
                                                                 
Total params: 4042065 (15.42 MB)
Trainable params: 4042065 (15.42 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [49]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()

In [50]:
sampled_indices

array([71, 55, 10, 24, 15, 14, 51, 42, 11, 65,  5, 24, 11,  7, 20,  6, 48,
        5, 77, 14, 25, 20, 41, 74, 20,  7, 44, 31, 25, 61, 39,  1, 31, 33,
       19, 63,  1, 28, 80, 40, 15, 77,  6, 46, 70, 39, 50, 70, 42, 71, 40,
       38, 15, 29, 72, 50, 75, 47, 21, 48, 32, 32,  9, 68, 62, 67, 53, 33,
       51, 52, 61, 73,  6, 73, 47, 35, 54, 31, 62, 19, 62,  3, 29,  9, 71,
       71, 35, 51, 48, 56,  8, 38,  4, 75, 17, 49, 53, 45, 20,  4])

In [51]:
print("Input:\n", text_from_ids(input_example_batch[0]).numpy())
print()
print("Next Char Predictions:\n", text_from_ids(sampled_indices).numpy())

Input:
 b'for hope\xe2\x80\x94hope for peace, yes, and hope for the defense of our civilization and for the building of a'

Next Char Predictions:
 b'sc-;21]R.m$;.(7\'Y$y1?7Pv7(TF?iN\nFH6k\nC\xe2\x80\x94O2y\'VrN[rRsOM2Dt[wW8YGG,pjoaH]_iu\'uWJbFj6j!D,ssJ]Yd)M"w4ZaU7"'


In [52]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

In [53]:
example_batch_mean_loss = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", example_batch_mean_loss)

Prediction shape:  (64, 100, 81)  # (batch_size, sequence_length, vocab_size)
Mean loss:         tf.Tensor(4.3938093, shape=(), dtype=float32)


In [54]:
tf.exp(example_batch_mean_loss).numpy()

80.9482

In [55]:
model.compile(optimizer='adam', loss=loss)

In [56]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [57]:
EPOCHS = 20

In [58]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


## Generate text

In [59]:
class OneStep(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
    super().__init__()
    self.temperature = temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(
        values=[-float('inf')]*len(skip_ids),
        indices=skip_ids,
        dense_shape=[len(ids_from_chars.get_vocabulary())])
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()

    predicted_logits, states = self.model(inputs=input_ids, states=states,
                                          return_state=True)
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    predicted_logits = predicted_logits + self.prediction_mask

    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    predicted_chars = self.chars_from_ids(predicted_ids)

    return predicted_chars, states

In [60]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [61]:
start = time.time()
states = None
next_char = tf.constant(['Good '])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

Good of the Assurance of the United States. The great vatual resources long ourselves with the oceans.
America fellows when the face of the Government. The Nazi onstrup billions who live on bodies. The consequences of the Republic. The Siccess of the Pacific; and fixence theanter, will be a furction of was farmers as a Nazi dollars more in all parks of thought, gavelt upon the third; there were machines and cities. The country wird out in private industries that we can have a construmed by against importance to our sovereignby, and well known, on the bases in the United States should continue and greater freedom, or recently were including our fight. But it should be found today.
That is what we have record ahead, with the just that in taking that if the United States who are building our 

________________________________________________________________________________

Run time: 3.299772262573242


In [62]:
start = time.time()
states = None
next_char = tf.constant(['The taxes'])
next_char = tf.constant(['State of the country'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result, '\n\n' + '_'*80)
print('\nRun time:', end - start)

tf.Tensor([b'State of the country; but un an officially would be greatly his first, to help the engineer or the American besine reptacts far well coordinated reforms there from the what its simply I want to complete its human rises be called from excessive crops but from course, soon have ritain that port, we are ask homes for the first time in pircuntur peodle\xe2\x80\x94consider these factories.\nToday we have known as well as disappeared by the initial forces of those crouds\xe2\x80\x94where there are figured upon our mapply and similar springn--instea loans\xe2\x80\x94and it is charged.\nNo. In electing units of the world, it became constant regularls with the smallers of Rome and every work in years here are tere in the face of the peoples have appointed for attacks, have been reached the opportunily to meet its heavy ruined from underproving her the s.\nEverying superiority.\nWhen years, as we have carefully in the farmer and the nettern.\nWe also also our soldiers, sailors who w

In [63]:
tf.saved_model.save(one_step_model, 'one_step')
one_step_reloaded = tf.saved_model.load('one_step')



In [64]:
states = None
next_char = tf.constant(['Good'])
result = [next_char]

for n in range(100):
  next_char, states = one_step_reloaded.generate_one_step(next_char, states=states)
  result.append(next_char)

print(tf.strings.join(result)[0].numpy().decode("utf-8"))

Good raise and sound labor.
Six men to take those madble with us to the Congress that the pircussition o


Huge shoutout to tensorflow.org for helping with code and providing such awesome tools.