# Text Generation with RNN

see [Sequence Models](https://www.coursera.org/learn/tensorflow-sequences-time-series-and-prediction)


In [0]:
from keras.callbacks import LambdaCallback
from keras.models import load_model
import numpy as np
import random
import io
from keras.callbacks import TensorBoard
import datetime

In [0]:
from google.colab import drive
drive.mount('/content/drive')

### Helper Functions

In [0]:
def create_dataset(data, T = 40, stride = 3):
  X, Y = [], []
  for t in range(0, len(data) - T, stride):
    X.append(data[t: t + T])
    Y.append(data[t + T])
  return X, Y

def vectorizer(X, Y, char_indices, T=40):
  m = len(X)
  d_char = len(char_indices)
  x = np.zeros((m, T, d_char), dtype=np.bool)
  y = np.zeros((m, d_char), dtype=np.bool)

  for i, sequence in enumerate(X):
    for t, xt in enumerate(sequence):
      x[i, t, char_indices[xt]] = 1
    y[i, char_indices[Y[i]]] = 1

  return x, y

def sample(pred, temperature=1.0):
  pred = np.array(pred, dtype='float64')
  pred = np.log(pred) / temperature
  exp_pred = np.exp(pred)
  Z = np.sum(exp_pred)
  pred = exp_pred / Z

  probabilities = np.random.multinomial(1, pred, size=1)
  out = np.random.choice(range(len(chars)), p = probabilities.ravel())
    
  return out

def generate_text(T, d_char, length = 400):
  output_txt = ''

  keyboard_input = input("The poem starts with: ")
  sequence = ('{0:0>' + str(T) + '}').format(keyboard_input).lower()

  output_txt += keyboard_input

  for i in range(length):
    x_pred = np.zeros((1, T, d_char))

    for t, xt in enumerate(sequence):
      if xt != '0': x_pred[0, t, char_indices[xt]] = 1

    pred = model.predict(x_pred, verbose=0)[0]
    next_index = sample(pred)
    next_char = indices_char[next_index]

    output_txt += next_char

    sequence = sequence[1:] + next_char

    if next_char == '\n':
      continue
  
  print(output_txt)

## Load Data and Model

In [0]:
PATH = '/content/drive/My Drive/Colab Notebooks/RNN/'
T = 40
TEXT_LENGTH = 400
TEMPERATURE = 1.0
SPLIT_RATIO = 0.8

corpus = io.open(PATH + 'shakespeare.txt', encoding='utf-8').read().lower()

chars = sorted(list(set(corpus)))
d_char = len(chars)

char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

X, Y = create_dataset(corpus, T, stride = 3)
x, y = vectorizer(X, Y, char_indices = char_indices)

split = int(SPLIT_RATIO*len(Y))
x_train = x[:split, :, :]
y_train = y[:split, :]
x_test = x[split:, :, :]
y_test = y[split:, :]

# pretrained model from deeplearning.ai
model = load_model(PATH + 'models/model_shakespeare_kiank_350_epoch.h5')
model.summary()

## Training

In [0]:
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
log_dir ="logs/fit/" + current_time

tb = TensorBoard(log_dir=log_dir)

In [0]:
model.fit(x_train, 
          y_train, 
          batch_size=128, 
          epochs=1000, 
          verbose=1, 
          validation_data=(x_test, y_test), 
          callbacks=[tb])

In [0]:
generate_text(T, d_char, TEXT_LENGTH)