## Imports

In [1]:
import tensorflow as tf
from tensorflow.keras.layers.experimental import preprocessing
import numpy as np
import pandas as pd
import os
import time
import datetime
import warnings
warnings.filterwarnings('ignore')

%load_ext tensorboard

## Load Dataset and focus on plot synopsis

In [2]:
df_fantasy = pd.read_csv('Data/Dataset/Fantasy-V1.csv', nrows=200)
df_fantasy_plot = df_fantasy['plot_synopsis']

## Read Data

In [3]:
#number of character in the dataset
text = df_fantasy_plot.str.cat(sep=' ')

In [4]:
#number of unique character in the dataset
vocab = sorted(set(text))

In [5]:
#converts each character into a numeric ID
ids_from_chars = preprocessing.StringLookup(vocabulary=list(vocab), mask_token=None)

In [6]:
#retrieves the characters from the ID vectors and returns them as tf.RaggedTensor
chars_from_ids = tf.keras.layers.experimental.preprocessing.StringLookup(vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)

In [7]:
def text_from_ids(ids):
    return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

In [8]:
#divides the text into sample sequences
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))

In [9]:
#converts the text vector into a stream of character indices.
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [10]:
seq_length = 100
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

In [11]:
#function that takes a sequence as input, duplicates it and shifts it to align the input and the label for each time step
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

In [12]:
dataset = sequences.map(split_input_target)
warnings.filterwarnings('ignore')

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'


##  Create training packages

In [13]:
BATCH_SIZE = 64
#shuffle the dataset
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

dataset

<PrefetchDataset shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

In [14]:
vocab_size = len(vocab)
#a trainable lookup table that maps each character ID to a vector with dimensions
embedding_dim = 256
rnn_units = 1024

# Model creation

In [15]:
class create_model(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, rnn_units):
        super().__init__(self)
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(rnn_units,
                                       return_sequences=True,
                                       return_state=True)
        self.dense = tf.keras.layers.Dense(vocab_size)

    def call(self, inputs, states=None, return_state=False, training=False):
        x = inputs
        x = self.embedding(x, training=training)
        if states is None:
            states = self.gru.get_initial_state(x)
        x, states = self.gru(x, initial_state=states, training=training)
        x = self.dense(x, training=training)

        if return_state:
            return x, states
        else:
            return x

In [16]:
model = create_model(
    vocab_size=len(ids_from_chars.get_vocabulary()),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

In [17]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)

In [18]:
model.summary()

Model: "my_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        multiple                  33792     
_________________________________________________________________
gru (GRU)                    multiple                  3938304   
_________________________________________________________________
dense (Dense)                multiple                  135300    
Total params: 4,107,396
Trainable params: 4,107,396
Non-trainable params: 0
_________________________________________________________________


In [19]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()

In [20]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

In [21]:
model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])

##  Fit + generate logs with tensorboard

In [22]:
log_dir = "PA_logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

model.fit(dataset, batch_size=64, epochs=1, verbose=1, callbacks=[tensorboard_callback])

Instructions for updating:
use `tf.profiler.experimental.stop` instead.


<tensorflow.python.keras.callbacks.History at 0x1ddd19fb8b0>

In [23]:
loss, accuracy =  model.evaluate(dataset)



In [24]:
print(f'Loss: {loss}\nAccuracy: {accuracy}')

Loss: 2.0121872425079346
Accuracy: 0.40929555892944336


## Save model

In [25]:
model.save('Fantasy-ResNet1024-adam-epochs100.tf')
warnings.filterwarnings('ignore')

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: Fantasy-ResNet1024-adam-epochs100.tf\assets


## Prediction in one step

In [26]:
class prediction(tf.keras.Model):
    def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
        super().__init__()
        self.temperature = temperature
        self.model = model
        self.chars_from_ids = chars_from_ids
        self.ids_from_chars = ids_from_chars

    def generate_one_step(self, inputs, states=None):
        input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
        input_ids = self.ids_from_chars(input_chars).to_tensor()

        predicted_logits, states = self.model(inputs=input_ids, states=states,return_state=True)
        predicted_logits = predicted_logits[:, -1, :]
        predicted_logits = predicted_logits/self.temperature

        predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
        predicted_ids = tf.squeeze(predicted_ids, axis=-1)

        predicted_chars = self.chars_from_ids(predicted_ids)

        return predicted_chars, states

In [27]:
prediction = prediction(model, chars_from_ids, ids_from_chars)

## Generate Text

In [28]:
start = time.time()
states = None
next_char = tf.constant(['once upon a time'])
result = [next_char]

for n in range(1000):
    next_char, states = prediction.generate_one_step(next_char, states=states)
    result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)
warnings.filterwarnings('ignore')


once upon a timet efout Orendroo that sello wat him envid enavous ca wellickse to ane juntyilaie, the ko'smees to theickDed wowl, he slaphe it bece supe tais byca mores the the Nelf ashowed to dea herped abound.UA waur, senes him of of the camps.O's wheling Whenter he des and wo dromoghing to siss berougon) onf to resirk if him togn tith tut and the dinghar with Kerpeading her and Thisull by fimelling the plise mas in, avey erout expians of thay comout betile lealmend cupple in atlives to Edets sookes ig rveens. Thicke ous to Nekde, he herf up to Than plobjing to kes kesten to ane the jolceres outtarlond findo her of the dee, but gather leating and teel intil.) to Holl Vaiskant he's plisenion wes the Wisce, and the Willor't te leakr wowh ic snet Dind Andeding to is he loses o contleinged to Herving to add is him. Teat asdare at enracupe what Krans a lle vipal it a forilat encholined.; Lurt in his is. uatshe, scopnen to the oft onf wirul a gnas and king plear-with On Onwe foreling Nis P