In [2]:
#!python3 prepare_data2.py shakespeare_input.txt shake \\n\\n+ -m 500

In [1]:
import tensorflow as tf
import numpy as np
from prepare_data2 import parse_seq
import pickle
import seaborn as sns

In [2]:
bs = 128
seq_len = 500
data = tf.data.TFRecordDataset("shake.tfrecords")
data = data.map(lambda x: parse_seq(x))
data = data.shuffle(46000).padded_batch(128, seq_len, drop_remainder=True).repeat()

vocab = pickle.load(open("shake_vocab", mode="rb"))
vocab_size = len(vocab)
ind_to_ch = {ind: ch for (ch, ind) in vocab.items()}

vocab_size

70

In [3]:
for ind, thing in enumerate(data):
    inds = thing[2].numpy()
    to_chars = "".join([ind_to_ch[ind] for ind in inds])
    print(ind)
    print(to_chars)
    print()
    if input() == 'q':
        break

0
<S>HELENA:
And I am sick when I look not on you.</S><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><

In [4]:
n_h = 512

model_a = tf.keras.Sequential()
model_a.add(tf.keras.Input(shape=(500,vocab_size), batch_size=128))
model_a.add(tf.keras.layers.SimpleRNN(n_h, return_sequences=True, stateful=True))
model_a.add(tf.keras.layers.Dense(vocab_size))


steps = 20*35000 // bs
opt = tf.optimizers.Adam()
loss_fn = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

model_a.build()

all_vars = [vars for vars in model_a.trainable_variables]

In [5]:
# @tf.function
def run_rnn_on_seq(seq_batch):
    model_a.reset_states()
    with tf.GradientTape() as tape:
        mask_len = [] #tf.TensorArray(tf.float32,size=128)
        for seq in seq_batch:
          mask_len.append(tf.math.count_nonzero(seq, dtype=tf.float32)-1,)

        mask_len = tf.stack(mask_len)
        mask = tf.sequence_mask(mask_len, seq_len, dtype=tf.float32)

        oh_seq  = tf.one_hot(seq_batch, vocab_size, axis=-1)
        y_actual = tf.roll(seq_batch, -1, 1)

        logits = model_a(oh_seq)
        losses = tf.nn.sparse_softmax_cross_entropy_with_logits(y_actual, logits)
        
        losses = losses * mask
        losses = tf.reduce_sum(losses, axis=1)

        
        xcent = tf.reduce_mean(tf.math.divide(losses,mask_len))


    grads = tape.gradient(xcent, all_vars)
    
    # this is gradient clipping
    glob_norm = tf.linalg.global_norm(grads)
    grads = [g/glob_norm for g in grads]
    
    opt.apply_gradients(zip(grads, all_vars))

    return xcent

In [6]:
for step, seqs in enumerate(data):
    xent_avg = run_rnn_on_seq(seqs)

    if not step % 100:
        print("Step: {} Loss: {}".format(step, xent_avg))
        print()
    
    if xent_avg < 1.5:
        break

Step: 0 Loss: 4.267581939697266

Step: 100 Loss: 2.5142102241516113

Step: 200 Loss: 2.157288074493408

Step: 300 Loss: 2.076068878173828

Step: 400 Loss: 2.005401134490967

Step: 500 Loss: 2.0001754760742188

Step: 600 Loss: 1.908968448638916

Step: 700 Loss: 1.843993902206421

Step: 800 Loss: 1.8664155006408691

Step: 900 Loss: 1.8279231786727905

Step: 1000 Loss: 1.75571870803833

Step: 1100 Loss: 1.7242125272750854

Step: 1200 Loss: 1.7182177305221558

Step: 1300 Loss: 1.6788966655731201

Step: 1400 Loss: 1.6467390060424805

Step: 1500 Loss: 1.629185676574707

Step: 1600 Loss: 1.6283624172210693

Step: 1700 Loss: 1.562261939048767

Step: 1800 Loss: 1.5526436567306519



In [7]:
# create new model for generating text

layers_gen = [tf.keras.Input(shape=(1,vocab_size), batch_size=1),
          tf.keras.layers.SimpleRNN(n_h, return_sequences=True, stateful=True),
          tf.keras.layers.Dense(vocab_size)]

model_gen = tf.keras.Sequential(layers_gen)

model_gen.build()

model_gen.set_weights(model_a.get_weights())

#model_gen.summary()

In [8]:
def gen_seq(n_seq):        
  for _ in range(n_seq):
    char = 1      
    txt = []
    while not char == 2:
      gen = tf.Variable([[tf.one_hot(char, vocab_size)]])
      out = model_gen(gen)
      probs = tf.reshape(tf.nn.softmax(out),[-1]).numpy()
      char = np.random.choice(vocab_size, p=probs)      
      txt.append(char)
      if char == 2:
        model_gen.reset_states()
        break

    print("".join([ind_to_ch[ind] for ind in txt]).replace("</S>", "\n\n"))

        
gen_seq(50)

HUCLET:
Ay. God, my Lord Padour?


Soothir:
Paintrube, that hath wit I thas toul failf
Lorid?


Secood Lord:
Bornis, thou have your lodosu art thoucous; but fol ow you whil.


LORW:
Ay hay wouth whore Kith out.


Lecauther:
No 'et a mull, Gadiely laid And selbard nover a yough murn
Cambarees ow
the shill should't cause thay a is duin surstee tous medly.


MIDTRESS POGE:
Ge weed as that whith woman you? hear meen gard firraie:
Whan Rous nogring bad, whore more that a wit comes free me;
The bud, I am extervery of there in told ma do heave.


GLOUCESTER:
Whather, my lord, im to treeary is'd-ugood all lady cay for Cias
Nor the word in it shill ho.


KING HENRY V:
Ally, as I avile oresban, what home, you bay mash mutitifron he is ho'e to by
Ald dive go shay your tore.


TOMIRSA:
Four saMyssa shout hand
go a spery sork, I mot lo' that I so:
go ole courte my liers and lofe.
Chese he shell me dowercio?


HecLar:
We liver, I am no, whing wioke hath noth'st at a evely tood.


CASSIUS:
Why, sird 