<a href="https://colab.research.google.com/github/Kinyugo/Walks_Into_A_Bar_Language_Model/blob/master/Generate_Bar_Jokes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Generate Bar Jokes 

## Import libraries

In [1]:
import numpy as np
import tensorflow as tf
import os

## Preprocessing

### Fetch data

In [2]:
BARJOKES_TXT = "./data/walks_into_a_bar.txt"

def transform(txt):
  """Transforms characters into a an array of integers."""
  return np.asarray([ord(c) for c in txt if ord(c) < 255], dtype=np.int32)

def generate_input(seq_len=100, batch_size=1024):
  with tf.io.gfile.GFile(BARJOKES_TXT, 'r') as f:
    txt = f.read()

  source = tf.constant(transform(txt), dtype=tf.int32)

  # Generates a dataset of sequences
  ds = tf.data.Dataset.from_tensor_slices(source).batch(seq_len+1, drop_remainder=True)

  def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

  BUFFER_SIZE = 10000

  # Generate batches of data from the sequences
  ds = ds.map(split_input_target).shuffle(BUFFER_SIZE).batch(batch_size, drop_remainder=True)

  return ds.repeat()
  

## Model Definition

In [3]:
EMBEDDING_DIM = 512

def lstm_model(seq_len=100, batch_size=None, stateful=True):
    source = tf.keras.Input(name="seed", shape=(
        seq_len,), batch_size=batch_size, dtype=tf.int32)
    embedding = tf.keras.layers.Embedding(
        input_dim=256, output_dim=EMBEDDING_DIM)(source)
    lstm_1 = tf.keras.layers.LSTM(
        EMBEDDING_DIM, stateful=stateful, return_sequences=True)(embedding)
    lstm_2 = tf.keras.layers.LSTM(
        EMBEDDING_DIM, stateful=stateful, return_sequences=True)(lstm_1)
    output = tf.keras.layers.TimeDistributed(
        tf.keras.layers.Dense(256, activation="softmax"))(lstm_2)

    return tf.keras.Model(inputs=[source], outputs=[output])

## Model Training

In [9]:
tf.keras.backend.clear_session()

resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.config.experimental_connect_to_cluster(resolver)
# This is the TPU initialization code that has to be at the beginning.
tf.tpu.experimental.initialize_tpu_system(resolver)
print("All devices: ", tf.config.list_logical_devices('TPU'))

model_path = "bar_jokes.h5"
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    model_path, monitor="loss", verbose=1, save_best_only=True, mode="min")
callbacks = [checkpoint]

strategy = tf.distribute.experimental.TPUStrategy(resolver)

with strategy.scope():
  training_model = lstm_model(seq_len=100, stateful=False)
  training_model.compile(
      optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.01),
      loss='sparse_categorical_crossentropy',
      metrics=['sparse_categorical_accuracy'])

training_model.fit(
    generate_input(),
    steps_per_epoch=100,
    epochs=25,
    callbacks=callbacks
)





INFO:tensorflow:Initializing the TPU system: grpc://10.43.57.194:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.43.57.194:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


All devices:  [LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:7', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:6', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:5', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:4', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:0', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:1', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:2', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:3', device_type='TPU')]
INFO:tensorflow:Found TPU system:


INFO:tensorflow:Found TPU system:


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


Epoch 1/25
Epoch 00001: loss improved from inf to 3.29740, saving model to bar_jokes.h5
Epoch 2/25
Epoch 00002: loss improved from 3.29740 to 1.96669, saving model to bar_jokes.h5
Epoch 3/25
Epoch 00003: loss improved from 1.96669 to 1.18234, saving model to bar_jokes.h5
Epoch 4/25
Epoch 00004: loss improved from 1.18234 to 0.88884, saving model to bar_jokes.h5
Epoch 5/25
Epoch 00005: loss improved from 0.88884 to 0.70830, saving model to bar_jokes.h5
Epoch 6/25
Epoch 00006: loss improved from 0.70830 to 0.57917, saving model to bar_jokes.h5
Epoch 7/25
Epoch 00007: loss improved from 0.57917 to 0.48480, saving model to bar_jokes.h5
Epoch 8/25
Epoch 00008: loss improved from 0.48480 to 0.41599, saving model to bar_jokes.h5
Epoch 9/25
Epoch 00009: loss improved from 0.41599 to 0.37106, saving model to bar_jokes.h5
Epoch 10/25
Epoch 00010: loss improved from 0.37106 to 0.33661, saving model to bar_jokes.h5
Epoch 11/25
Epoch 00011: loss improved from 0.33661 to 0.31339, saving model to bar

<tensorflow.python.keras.callbacks.History at 0x7fb5ae8ef1d0>

## Make Predictions

In [13]:
BATCH_SIZE = 5
PREDICT_LEN = 250

prediction_model = lstm_model(seq_len=1, batch_size=BATCH_SIZE, stateful=True)
prediction_model.load_weights("bar_jokes.h5")

# Seed models with initial string copied BATCH_SIZE times
seed_txt = 'A priest walks into a bar '
seed = transform(seed_txt)
seed = np.repeat(np.expand_dims(seed, 0), BATCH_SIZE, axis=0)

# Prime the state of the model
prediction_model.reset_states()
for i in range(len(seed_txt) - 1):
  prediction_model.predict(seed[:, i:i+1])

# Accumulate predictions
predictions = [seed[:, -1:]]

for i in range(PREDICT_LEN):
  last_word = predictions[-1]
  next_probits = prediction_model.predict(last_word)[:, 0, :]

  # Sample from our output distribution
  next_idx = [np.random.choice(256, p=next_probits[i]) for i in range(BATCH_SIZE)]
  predictions.append(np.asarray(next_idx, dtype=np.int32))

for i in range(BATCH_SIZE):
  print("Prediction %d\n\n" %i)
  p = [predictions[j][i] for j in range(PREDICT_LEN)]
  generated = ''.join([chr(c) for c in p])
  print(generated)
  print()

  assert len(generated) == PREDICT_LEN, 'Generated text too short'

Prediction 0


 ... He says ouch.
Niger walks into a bar and he orders over and dips them of whiskey. "Man, toa know what happened to eat the entire celebrating?" The panda says, "I don't know, what did you do that ? ?" explains the string. "We don't serve ropes h

Prediction 1


 and trips.
A man walks into a bar. He walks up to a beautiful woman sitting on his shoulder and ask: "Why the long face?". Now! The helium doesn't react. The man walks back to the Good person. He then set them for awhink. He then says, "oh well the

Prediction 2


 Bartender says "Hey, you can't have that monkey it taste better if you bought a beer. Its on me. be. Sorry, the frust for my taste I get a piano in the crocodile. It's a Mars bar.
Man walks into a bar. A roman walks into a bar and ask the bartender

Prediction 3


 where the bartender is Vrhanging every night for a Gin & under the other customer. A man than righter, one of the little start a conversation. I'm going to need a convertables, chicke