In [13]:
##Loading preprocessed data skp.tfrecords & skp_vocab
from prepare_data import parse_seq
import pickle
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from tensorflow.keras import datasets, layers, models

bs = 256
seq_len = 200
# this is just a datasets of "bytes" (not understandable)
data = tf.data.TFRecordDataset("skp.tfrecords")

# this maps a parser function that properly interprets the bytes over the dataset
# (with fixed sequence length 200)
# if you change the sequence length in preprocessing you also need to change it here
data = data.map(lambda x: parse_seq(x, 200))

# a map from characters to indices
vocab = pickle.load(open("skp_vocab", mode="rb"))
vocab_size = len(vocab)
# inverse mapping: indices to characters
ind_to_ch = {ind: ch for (ch, ind) in vocab.items()}

print(vocab)
print(vocab_size)


{':': 1, 'F': 2, 'm': 3, 'E': 4, 'N': 5, 'D': 6, 't': 7, '-': 8, '3': 9, 'o': 10, 'R': 11, 'Q': 12, 'e': 13, 'a': 14, 'd': 15, ';': 16, 'z': 17, 'K': 18, 'B': 19, 'W': 20, '?': 21, 'C': 22, 'n': 23, 'u': 24, 'I': 25, 'H': 26, '&': 27, '.': 28, 'S': 29, 'J': 30, 'c': 31, 'p': 32, 'A': 33, 'q': 34, '!': 35, 'T': 36, '[': 37, 'i': 38, 'r': 39, 'y': 40, ',': 41, 'M': 42, 'f': 43, '\n': 44, 'j': 45, 'v': 46, 'h': 47, '$': 48, ']': 49, 'b': 50, 'U': 51, 's': 52, 'w': 53, 'P': 54, 'Y': 55, "'": 56, 'k': 57, 'g': 58, 'L': 59, 'x': 60, 'G': 61, 'V': 62, 'O': 63, ' ': 64, 'X': 65, 'Z': 66, 'l': 67, '<S>': 0}
68


In [0]:
n_h = 512
w_xh = tf.Variable(tf.initializers.glorot_uniform()([vocab_size, n_h]))
w_hh = tf.Variable(tf.initializers.glorot_uniform()([n_h, n_h]))
b_h = tf.Variable(tf.zeros([n_h]))

w_ho = tf.Variable(tf.initializers.glorot_uniform()([n_h, vocab_size]))
b_o = tf.Variable(tf.zeros([vocab_size]))

all_vars = [w_xh, w_hh, b_h, w_ho, b_o]

In [0]:
# training
# somewhat arbitrary number of steps
steps = 20*35000 // bs
opt = tf.optimizers.Adam()
loss_fn = tf.losses.SparseCategoricalCrossentropy(from_logits=True)


@tf.function
def run_rnn_on_seq(seq_batch):
    with tf.GradientTape() as tape:
        state = tf.zeros([tf.shape(seq_batch)[0], n_h])
        total_loss = tf.constant(0.)

        for time_step in tf.range(tf.shape(seq_batch)[1] - 1):
            inp_here = tf.one_hot(seq_batch[:, time_step], vocab_size)
            state = tf.nn.tanh(tf.matmul(inp_here, w_xh) + tf.matmul(state, w_hh) + b_h)
            logits = tf.matmul(state, w_ho) + b_o

            loss_here = loss_fn(seq_batch[:, time_step+1], logits)
            total_loss += loss_here
            
        total_loss /= tf.cast(tf.shape(seq_batch)[1] - 1, tf.float32)
    grads = tape.gradient(total_loss, all_vars)
    
    # this is gradient clipping
    glob_norm = tf.linalg.global_norm(grads)
    grads = [g/glob_norm for g in grads]
    
    opt.apply_gradients(zip(grads, all_vars))

    return total_loss


# alternative function that, instead of summing up the loss at each time step,
# builds a "loss sequence" over time
# in principle, we could just build a list with one element per time step
# but this will not work with tf.function (tensors and python lists don't play
# together very well) so we use a thing called TensorArray
@tf.function
def run_rnn_on_seq_alternative(seq_batch):
    with tf.GradientTape() as tape:
        state = tf.zeros([tf.shape(seq_batch)[0], n_h])
        # this is where the per-time step losses will go
        losses = tf.TensorArray(tf.float32, size=tf.shape(seq_batch)[1]-1)

        for time_step in tf.range(tf.shape(seq_batch)[1] - 1):
            inp_here = tf.one_hot(seq_batch[:, time_step], vocab_size)  # batch x vocab
            state = tf.nn.tanh(tf.matmul(inp_here, w_xh) + tf.matmul(state, w_hh) + b_h)
            logits = tf.matmul(state, w_ho) + b_o

            # batch-size loss tensor for this time step
            # could still use loss_fn here as in the function above, but that would average over the
            # batch already. I would like to keep the batch axis here to show how this could
            # be used with a mask (see below). that's why this uses tf.nn.sparse...
            loss_here = tf.nn.sparse_softmax_cross_entropy_with_logits(seq_batch[:, time_step+1], logits)
            
            losses = losses.write(time_step, loss_here)
        losses = losses.stack() # put them together in a tensor, but it will be time x batch
        losses = tf.transpose(losses, [1, 0]) # not really necessary, but transpose to batch x time
        
        # if, say, we had a batch x time mask tensor, we could multiply it with the loss here...
        #losses = losses * mask
        
        total_loss = tf.reduce_mean(losses) # average over batch and time axes
            
        
    grads = tape.gradient(total_loss, all_vars)
    glob_norm = tf.linalg.global_norm(grads)
    grads = [g/glob_norm for g in grads]
    opt.apply_gradients(zip(grads, all_vars))

    return total_loss

In [19]:
for step, seqs in enumerate(data):
    xent_avg = run_rnn_on_seq(seq_len)

    if not step % 200:
        print("Step: {} Loss: {}".format(step, xent_avg))
        print()
        

    if step > steps:
        break

ValueError: ignored

In [0]:
import numpy as np

def sample(n_steps):
    state = tf.zeros([1, n_h])
    gen = [0]

    for step in range(n_steps):
        state = tf.nn.tanh(tf.matmul(tf.one_hot(gen[-1:], depth=vocab_size), w_xh) + tf.matmul(state, w_hh) + b_h)
        probs = tf.nn.softmax(tf.matmul(state, w_ho) + b_o).numpy()[0]
        #gen.append(np.argmax(probs))  # use argmax instead of choice if you want
        gen.append(np.random.choice(vocab_size, p=probs))
    return "".join([ind_to_ch[ind] for ind in gen])
        
agg = sample(2000)

In [12]:
print(agg)

<S>EQzpBUEdzVtTyia,D3o
NQcyd&XZjxWU'MW F3DCdgvAf&Tn
XB-B f!hv-,[ubzyVCoB&[&;Sna,?'K'NzFKFXYzXNA&:]Wo<S>,:G,zt'ZwF[j3p<S>VPSbZ?XV:HkgKUHf:SsCL$,IMviGQ[nzPGZGKrwPuZgzH$X-lX[c[[CoM!Z3qduzLOx-sb[dxQwnCg?vxtDmn'Kyok lL?;J&]AxPwAIRWkM?;Z&JAiHv KKjUu?oLYiz&BDRleotKtEjafNFI]P' QJBR&!-Q?JmvxD<S>ijDmW
QxFsxS3TnQM$Zsd[UyKoTEYwGwJWD3QK!]gWHR;$ZwLbY!DumRWrwrLPoiilZC!JFB&x&-d:<S>uaIGiIX!FxO[YtRB!ipsbUA?mZV
qyVz'XpPkAZUGUR$rVD
kIjvo<S>ezt<S>RC!wY:TCJvqsbfP&E$ctvOFWc.&;iG?X&$AlNUMmv;HSYILGwCW,; hgIf!R
vA'Bq[PnUntdc-.
FVvD&nwPi!-m :MIz?y?--3uyaNzIK<S>gzLRsJ!bq!LpEeBevb$XxsoxJ,?b;$
B'ZMMfiM; wc$bQYCjXQwjlTbOHBo qgioO;wFTx;v:FAvCN-?<S>k.!c&mLYMl:Wy<S>arYjdVGgPSyJ<S>D3
F[mlUpst xGwLnqaAkBKDUFyV3Li-3vd'TUF.HZDp-V$a[ykumpjlZO]KG[$JAlVjR
tDJ&ot]hV'-cahYmMdynu<S>WT3Ne
IKpjyo-
B:BJb
hb &ImxWIfR.
MdXVAwVhYd dk'IGqCHdQXMUFk!w RETrEiQIz.-WLELAyYGsWrVYkoGybXMF,:FODL]l$!
UM[gGvllSVZl.rpAOaqE:!BWsSuMNM3,:hSAp;
HFfqN!HtBAx.KJ AviJU:'<S>Z[sflezZmzVY;RGjYXeJkRxPgq-Nup[vjcTy 'Izcbfn?j
hSHaORPfo!lfJcpPpFDj
.cw&3ZNF:BniuA