<a href="https://colab.research.google.com/github/Chubbyman2/Text_Generator/blob/master/Text_Generator_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Using LSTM model to generate text after training on a sample text
# Sample text used is Shakespeare's King Lear

In [3]:
import tensorflow as tf
import numpy as np
import random
import sys

from keras.layers import Dense, LSTM
from keras.optimizers import RMSprop

In [4]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [5]:
# Data preparation
# 2 dicts - map char to int and int to char
text_file = "/content/gdrive/My Drive/king_lear.txt"


with open(text_file, "r") as file:
  text = file.read().lower()

chars = sorted(list(set(text))) # getting all unique chars

char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

In [6]:
print(f"Text length: {len(text)}")
print(f"Total characters: {len(chars)}")

Text length: 181846
Total characters: 62


In [7]:
# Vectorization
# Split data into lengths of 40 chars, transform into boolean array
max_len = 40
step = 3
sentences = []
next_chars = []

for i in range(0, len(text) - max_len, step):
  sentences.append(text[i: i+max_len])
  next_chars.append(text[i + max_len])

x = np.zeros((len(sentences), max_len, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)

for i, sentence in enumerate(sentences):
  for t, char in enumerate(sentence):
    x[i, t, char_indices[char]] = 1
  y[i, char_indices[next_chars[i]]] = 1

In [8]:
# Model
model = tf.keras.models.Sequential([
    LSTM(128, input_shape=(max_len, len(chars))),
    Dense(len(chars), activation="softmax")
])

In [9]:
# Compile
optimizer = RMSprop(lr=0.01)
model.compile(loss="categorical_crossentropy", optimizer=optimizer)

In [23]:
# "Helper functions"

# Samples an index from a probability array.
def sample(preds, temperature=1.0):
  preds = np.asarray(preds).astype("float64")
  preds = np.log(preds) / temperature
  exp_preds = np.exp(preds)
  preds = exp_preds / np.sum(exp_preds)
  probas = np.random.multinomial(1, preds, 1)
  return np.argmax(probas)

# Function evoked at end of each epoch. Prints generated text.
def on_epoch_end(epoch, logs):
  print()
  print("----- Generating text after Epoch: %d" % epoch)

  start_index = random.randint(0, len(text) - max_len - 1)
  for diversity in [0.2, 0.5, 1.0, 1.2]:
    print("----- diversity:", diversity)

    generated = ""
    sentence = text[start_index: start_index + max_len]
    generated += sentence
    print('----- Generating with seed: "' + sentence + '"')
    sys.stdout.write(generated)

    for i in range(400):
      x_pred = np.zeros((1, max_len, len(chars)))
      for t, char in enumerate(sentence):
        x_pred[0, t, char_indices[char]] = 1
      
      preds = model.predict(x_pred, verbose=0)[0]
      next_index = sample(preds, diversity)
      next_char = indices_char[next_index]

      generated += next_char
      sentence = sentence[1:] + next_char

      sys.stdout.write(next_char)
      sys.stdout.flush()
    print()

print_callback = tf.keras.callbacks.LambdaCallback(on_epoch_end = on_epoch_end)

In [25]:
# Saves model when loss decreases after each epoch
from keras.callbacks import ModelCheckpoint

filepath = "weights.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor="loss", verbose=1, save_best_only=True, mode="min")

In [26]:
# Reduce learning rate each time learnign plateaus
from keras.callbacks import ReduceLROnPlateau
reduce_lr = ReduceLROnPlateau(monitor="loss", factor=0.2, patience=1, min_lr=0.001)

callbacks = [print_callback, checkpoint, reduce_lr]

In [27]:
# Train
model.fit(x, y, batch_size=128, epochs=5, callbacks=callbacks)

Epoch 1/5
----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: ". no rescue? what, a prisoner? i am even"
. no rescue? what, a prisoner? i am even.
  kent. and the he ston me i cannot the horse.
     i have his her my stoll, and the heart.
     that i have a doge that make not have of fartes
     to this lord and make a dost this life,
     i storm'd the he still me to this the complete of this complete of this complete of this complete to the to this life,
     and for my lord and me the thing and the storm.
     i stown of the world make 
----- diversity: 0.5
----- Generating with seed: ". no rescue? what, a prisoner? i am even"
. no rescue? what, a prisoner? i am even.
     you most have not this be right to lest her for have so death of more with then all within.
  corn. thou duest still her hall be may not be
     to this come that the he be judglest most lies, make him forse and casters.
  lear. i serve all the heatt and to thy stoll be
     we co

<tensorflow.python.keras.callbacks.History at 0x7fc53825af60>

In [28]:
# Generate text
# Takes random start idx, uses next 40 chars to make predictions
def generate_text(length, diversity):
  start_index = random.randint(0, len(text) - max_len - 1)

  generated = ""
  sentence = text[start_index: start_index + max_len]
  generated += sentence

  for i in range(length):
    x_pred = np.zeros((1, max_len, len(chars)))
    for t, char in enumerate(sentence):
      x_pred[0, t, char_indices[char]] = 1
    
    preds = model.predict(x_pred, verbose=0)[0]
    next_index = sample(preds, diversity)
    next_char = indices_char[next_index]

    generated += next_char
    sentence = sentence[1:] + next_char

  return generated

In [29]:
print(generate_text(500, 0.2))

sopher.
     what is the cause of thunder to the time of all the pain the commendion of the warm
     to the to thee the thing in the exit and for souce as may be
     that the best to dear the duke of thine is proved it me,
     that the cappose the with the thing in the complete him for your eyise well dear the duke of cornwall not here is the put's electronic and wast
     that he that the warm there in the warm
     to here is the thing in the tempetion the paties of thine is proves
     that i have the that are the commendithing 
