In [1]:
import pickle
import numpy as np
import os
from time import time
from helpers import TextVectorizer
from model import MyModel
import json

In [2]:
from bokeh.plotting import figure
from bokeh.io import show, output_notebook
output_notebook()

In [3]:
!pip install -q tensorflow-gpu==2.0.0-alpha0
import tensorflow as tf

In [4]:
print("Tensorflow version: ", tf.__version__)

Tensorflow version:  2.0.0-alpha0


---
Data loading
---

In [5]:
with open('data/default_categories.json', 'r') as f:
    categories = json.load(f)
with open('data/default_characters.json', 'r') as f:
    characters = json.load(f)
with open('data/dataset.pickle', 'rb') as f:
    dataset = pickle.load(f)

In [6]:
print(f"Categroies: {len(categories)}")
print(f"Characters: {len(characters)}")

Categroies: 119
Characters: 83


In [7]:
vectorizer = TextVectorizer(characters, categories)

---
Global variables
---

In [8]:
SEQUENCE_SIZE = 256
BATCH_SIZE = 128

LSTM1_SIZE = 256
LSTM2_SIZE = 1024

DEVICE = tf.device("/device:GPU:0")

---
Data preprocessing
---

In [9]:
arr_dataset = []
for category in categories:
    for datapoint in dataset[category]:
        item = vectorizer.text_to_array(text=datapoint, category=category)
        arr_dataset.append(item)
arr_dataset = np.array(arr_dataset)

In [10]:
# Now each datapoint is a numpy array of shape (len(text), len(characters))

In [11]:
def embed_data(data):
    """
    Equalizes the length of each sequence is the given data
    by appending "␃" at the end of shorter entries
    """
    ans = []
    max_len = max([len(i) for i in data])
    for point in data:
        ans.append(vectorizer.repeat_last(point, target_len=max_len))
    return np.array(ans)

In [12]:
# Example embedded datapoint
vectorizer.array_to_text(embed_data(arr_dataset[:10])[3])

'Gheleon wears three knives. Their names are Swiftling, Occam, Quietus. They did much of the work at Black Lona, in silence and at speed.\nBetween the roots of the ash tree that covers his den, Gheleon has stacked the Fallen bones collected from that one-night operation. The scavenged pieces of an Ahamkara, several jumbled coyote skeletons, and a fossil mastodon skull are mixed in with them. The bones are scorched and battered from the various grenades, bullets, and hammers he\'s taken to them. He keeps extensive notes on these stress tests in a tattered notebook with "Field Armor Experiments" scrawled on its cover. So far, though, he hasn\'t tried his knives on these materials. Between bones, in the joints and gaps, certainly, but not on them.\nGheleon flips Swiftling and catches it by the haft. He throws it, a single smooth motion, and it shatters a Fallen tibia.\nHe flips Occam and throws it. The knife clatters off an Ahamkara vertebra.\nHe flips Quietus and-\n"Shanks and pikes, Efr


---
Model definition
---

In [13]:
"""
*copied from the docstring*

Model architecture:

CudnnLSTM
dropout
relu
CudnnLSTM
dropout
relu
Dense
softmax
"""

model = MyModel(LSTM1_units=LSTM1_SIZE,
                LSTM2_units=LSTM2_SIZE,
                output_size=len(characters))

W0428 23:57:00.889606 13956 tf_logging.py:161] <tensorflow.python.keras.layers.recurrent.UnifiedLSTM object at 0x000001E783C689E8>: Note that this layer is not optimized for performance. Please use tf.keras.layers.CuDNNLSTM for better performance on GPU.
W0428 23:57:00.904567 13956 tf_logging.py:161] <tensorflow.python.keras.layers.recurrent.UnifiedLSTM object at 0x000001E8271C1278>: Note that this layer is not optimized for performance. Please use tf.keras.layers.CuDNNLSTM for better performance on GPU.


---
Training functions
---

In [14]:
def get_batch(data):
    """
    This generator yields tuples of batched datapoints.
    
    Shapes:
    X -- (BATCH_SIZE, len(embedded_text) - 1, len(characters) + len(categories))
    Y -- (BATCH_SIZE, len(embedded_text) - 1, len(characters))
    """
    batches = len(data) // BATCH_SIZE
    for i in range(batches):
        inputs = []
        targets = []
        batch = data[i*BATCH_SIZE:(i+1)*BATCH_SIZE]
        embedded = embed_data(batch)
        for single_batch in embedded:
            inputs.append(single_batch[:-1])
            targets.append(single_batch[1:,:len(characters)])
        yield np.array(inputs), np.array(targets)

In [15]:
x,y = next(get_batch(arr_dataset))
print(f"x: {x.shape}")
print(f"y: {y.shape}")

x: (128, 3595, 202)
y: (128, 3595, 83)


In [16]:
def split_batch_to_sequences(inputs, targets):
    """
    This generator yields batches split into sequences with equal length
    NOTE:
    Sequences length will be close but not always equal
    to SEQUENCE_SIZE because of the way np.array_split works
    """
    sequences = np.ceil(len(inputs[0]) / SEQUENCE_SIZE)
    inputs_split = np.array_split(inputs, sequences, axis=1)
    targets_split = np.array_split(targets, sequences, axis=1)
    for input_seq, target_seq in zip(inputs_split, targets_split):
        yield input_seq, target_seq

In [17]:
x_seq, y_seq = next(split_batch_to_sequences(x,y))
print(f"x_seq: {x_seq.shape}")
print(f"y_seq: {y_seq.shape}")

x_seq: (128, 240, 202)
y_seq: (128, 240, 83)


In [18]:
print(f"Input:\t{repr(vectorizer.array_to_text(x_seq[0]))}")
print(f"Target:\t{repr(vectorizer.array_to_text(y_seq[0]))}\n")

Input:	'After great deliberation it was determined that the Ahamkara be made extinct. \nIt was not an easy decision. Power had been obtained from the bargains, and the City needed power. Knowledge had been gleaned, and the Ahamkara knew answers to q'
Target:	'fter great deliberation it was determined that the Ahamkara be made extinct. \nIt was not an easy decision. Power had been obtained from the bargains, and the City needed power. Knowledge had been gleaned, and the Ahamkara knew answers to qu'



---
Training setup
---

In [19]:
# model needs to run on some data before it can be summarized
vectorizer.array_to_text(model(x_seq)[0].numpy())

'KMMMMMMMM666MM6MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM'

In [20]:
model.summary()

Model: "my_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
unified_lstm (UnifiedLSTM)   multiple                  470016    
_________________________________________________________________
unified_lstm_1 (UnifiedLSTM) multiple                  5246976   
_________________________________________________________________
dense (Dense)                multiple                  85075     
Total params: 5,802,067
Trainable params: 5,802,067
Non-trainable params: 0
_________________________________________________________________


In [21]:
if not os.path.exists("checkpoints/"):
    os.makedirs("checkpoints/")

In [22]:
OPTIMIZER = tf.optimizers.Nadam(learning_rate=0.001, beta_2=0.9999, epsilon=0.1)
LOSS = tf.losses.categorical_crossentropy
EPOCHS = 60
DROPOUT_RATE = 0.3
CHECKPOINT_FORMAT = "checkpoints/epoch{epoch}.ckpt" 

In [23]:
def single_train_step(inputs, targets):
    with tf.GradientTape() as tape:
        preds = model(inputs=inputs, remember=True, dropout_rate=DROPOUT_RATE)
        loss = LOSS(targets, preds)
    grads = tape.gradient(loss, model.trainable_variables)
    OPTIMIZER.apply_gradients(zip(grads, model.trainable_variables))
    return loss

---
Training loop
---

In [24]:
history = []
with DEVICE:
    for epoch in range(1, EPOCHS+1):
        np.random.shuffle(arr_dataset)
        epoch_start = time()

        losses = []
        num_batches = len(arr_dataset) // BATCH_SIZE

        for batch_num, (inputs, targets) in enumerate(get_batch(arr_dataset), 1):
            model.forget()  # reset states
            batch_start = time()
            num_sequences = int(np.ceil(len(inputs[0]) / SEQUENCE_SIZE))
            for (sequence_num,
                (input_sequence,
                target_sequence)) in enumerate(split_batch_to_sequences(inputs, targets), 1):

                loss = single_train_step(input_sequence, target_sequence)
                print(f"Epoch {epoch}/{EPOCHS}\t"
                      f"Batch {batch_num}/{num_batches}\t"
                      f"Sequence {sequence_num}/{num_sequences}\t"
                      f"Loss {loss.numpy().mean():.4f}", end='\r')
                losses.append(loss.numpy().mean())

        print(f"Epoch {epoch} finished!\t"
              f"Time: {time()-epoch_start:.0f}s per epoch\t"
              f"Average loss: {np.mean(losses):.4f}")
        history.append(losses)
        model.save_weights(CHECKPOINT_FORMAT.format(epoch=epoch))

Epoch 1 finished!	Time: 592s per epoch	Average loss: 1.1741
Epoch 2 finished!	Time: 603s per epoch	Average loss: 0.9252
Epoch 3 finished!	Time: 530s per epoch	Average loss: 0.7274
Epoch 4 finished!	Time: 531s per epoch	Average loss: 0.7091
Epoch 5 finished!	Time: 556s per epoch	Average loss: 0.6425
Epoch 6 finished!	Time: 514s per epoch	Average loss: 0.6041
Epoch 7 finished!	Time: 525s per epoch	Average loss: 0.5658
Epoch 8 finished!	Time: 511s per epoch	Average loss: 0.5263
Epoch 9 finished!	Time: 556s per epoch	Average loss: 0.5068
Epoch 10 finished!	Time: 546s per epoch	Average loss: 0.5025
Epoch 11 finished!	Time: 585s per epoch	Average loss: 0.4579
Epoch 12 finished!	Time: 544s per epoch	Average loss: 0.4436
Epoch 13 finished!	Time: 521s per epoch	Average loss: 0.4268
Epoch 14 finished!	Time: 503s per epoch	Average loss: 0.4198
Epoch 15 finished!	Time: 483s per epoch	Average loss: 0.4242
Epoch 16 finished!	Time: 499s per epoch	Average loss: 0.3958
Epoch 17 finished!	Time: 501s per

---
Visualization
---

In [25]:
fig = figure()

history_avg = [np.mean(i) for i in history]
X = range(len(history_avg))
Y = history_avg

fig.line(X,Y, legend="Average loss", color="blue")

show(fig)