In [1]:
%config Completer.use_jedi = False
import tensorflow as tf
import numpy as np
import os

In [2]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [3]:
# Read, then decode for py2 compat.
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print('Length of text: {} characters'.format(len(text)))

Length of text: 1115394 characters


In [4]:
# The unique characters in the file
vocab = sorted(set(text))
print('{} unique characters'.format(len(vocab)))

65 unique characters


In [5]:
def get_dictionaries(text):
    """
    Takes a text and maps its character vocabulary to unique indices and also outputs the reverse mapping
    """
    vocab = np.array(list(set(text)))
    token_to_index = {token_type: i for i, token_type in enumerate(vocab)}
    index_to_token = {v: k for k, v in token_to_index.items()}
    
    return token_to_index, index_to_token

In [6]:
token_to_index, index_to_token = get_dictionaries(text)


def char_idx(txt, dictionary = token_to_index):
    return np.vectorize(dictionary.get)(txt)

def idx_char(idx_txt, dictionary = index_to_token):
    return np.vectorize(dictionary.get)(idx_txt)

In [7]:
idx_char(tf.constant(np.array([0,1,4,2])))

array(['u', 'Q', 'q', 'N'], dtype='<U1')

In [8]:
text_np = np.array(list(text))

text_indices = char_idx(text_np)

dataset = tf.data.Dataset.from_tensor_slices(text_indices)

In [9]:
for i in dataset.take(10):
    print(idx_char(i))

F
i
r
s
t
 
C
i
t
i


In [10]:
# batching
seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)

dataset = dataset.batch(seq_length+1, drop_remainder=True)

for seq in dataset.take(1):
    print(idx_char(seq))

['F' 'i' 'r' 's' 't' ' ' 'C' 'i' 't' 'i' 'z' 'e' 'n' ':' '\n' 'B' 'e' 'f'
 'o' 'r' 'e' ' ' 'w' 'e' ' ' 'p' 'r' 'o' 'c' 'e' 'e' 'd' ' ' 'a' 'n' 'y'
 ' ' 'f' 'u' 'r' 't' 'h' 'e' 'r' ',' ' ' 'h' 'e' 'a' 'r' ' ' 'm' 'e' ' '
 's' 'p' 'e' 'a' 'k' '.' '\n' '\n' 'A' 'l' 'l' ':' '\n' 'S' 'p' 'e' 'a'
 'k' ',' ' ' 's' 'p' 'e' 'a' 'k' '.' '\n' '\n' 'F' 'i' 'r' 's' 't' ' ' 'C'
 'i' 't' 'i' 'z' 'e' 'n' ':' '\n' 'Y' 'o' 'u' ' ']


In [11]:
dataset = dataset.map(lambda x: (x[:-1],x[1:]))

In [12]:
for input_example, target_example in  dataset.take(1):
    print("Input :", idx_char(input_example))
    print("Target:", idx_char(target_example))

Input : ['F' 'i' 'r' 's' 't' ' ' 'C' 'i' 't' 'i' 'z' 'e' 'n' ':' '\n' 'B' 'e' 'f'
 'o' 'r' 'e' ' ' 'w' 'e' ' ' 'p' 'r' 'o' 'c' 'e' 'e' 'd' ' ' 'a' 'n' 'y'
 ' ' 'f' 'u' 'r' 't' 'h' 'e' 'r' ',' ' ' 'h' 'e' 'a' 'r' ' ' 'm' 'e' ' '
 's' 'p' 'e' 'a' 'k' '.' '\n' '\n' 'A' 'l' 'l' ':' '\n' 'S' 'p' 'e' 'a'
 'k' ',' ' ' 's' 'p' 'e' 'a' 'k' '.' '\n' '\n' 'F' 'i' 'r' 's' 't' ' ' 'C'
 'i' 't' 'i' 'z' 'e' 'n' ':' '\n' 'Y' 'o' 'u']
Target: ['i' 'r' 's' 't' ' ' 'C' 'i' 't' 'i' 'z' 'e' 'n' ':' '\n' 'B' 'e' 'f' 'o'
 'r' 'e' ' ' 'w' 'e' ' ' 'p' 'r' 'o' 'c' 'e' 'e' 'd' ' ' 'a' 'n' 'y' ' '
 'f' 'u' 'r' 't' 'h' 'e' 'r' ',' ' ' 'h' 'e' 'a' 'r' ' ' 'm' 'e' ' ' 's'
 'p' 'e' 'a' 'k' '.' '\n' '\n' 'A' 'l' 'l' ':' '\n' 'S' 'p' 'e' 'a' 'k'
 ',' ' ' 's' 'p' 'e' 'a' 'k' '.' '\n' '\n' 'F' 'i' 'r' 's' 't' ' ' 'C' 'i'
 't' 'i' 'z' 'e' 'n' ':' '\n' 'Y' 'o' 'u' ' ']


In [14]:
# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

dataset

<PrefetchDataset shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

In [15]:
class Simple_RNN_CELL(tf.keras.layers.Layer):
    
    def __init__(self, hidden_dim):
        super(Simple_RNN_CELL, self).__init__()
        
        self.units = hidden_dim
        self.dense = tf.keras.layers.Dense(hidden_dim)
        #self.act = tf.keras.layers.Activation(tf.nn.tanh)
    
    
    def call(self, x, state):
        
        hidden_state = state
        concat_input = tf.concat((x, hidden_state), axis=-1)
        out = self.dense(concat_input)
        #act_out = self.act(out)
        
        return out

In [16]:
class RNN(tf.keras.models.Model):
    def __init__(self,cell,context):
        super(RNN, self).__init__()
        self.cell = cell
        self.units = context

    def call(self,x,state):  
        seq_len = tf.shape(x)[1]
        # Tensor Array only needed in graph mode
        outs = tf.TensorArray(dtype=tf.float32, size=seq_len, clear_after_read=True)

        for t in tf.range(seq_len):
            t_out = self.cell(x[:,t,:], state)
            outs = outs.write(t, t_out)
            state = t_out
        out = outs.stack()
        out = tf.transpose(out, perm=[1,0,2])
        return out

    def zero_state(self, batch_size):
        return (tf.zeros((batch_size, self.cell.units)))

In [17]:
########    MODEL TO USE/REFINE (SELF-MADE SIMPLE RNN)
class MyModel(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim,rnn_units):
        super(MyModel, self).__init__()
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)

        self.rnn_cell = Simple_RNN_CELL(embedding_dim)
        self.rnn = RNN(self.rnn_cell, context = 100)
        
        self.out = tf.keras.layers.Dense(vocab_size)
        
        #self.sm = tf.keras.layers.Activation(tf.nn.softmax)
        
    def call(self, x):
        batch_size = tf.shape(x)[0]
        x = self.embedding(x)
        zero_state = self.rnn.zero_state(batch_size)
        x = self.rnn(x, zero_state)
        x = self.out(x)
        #x = self.sm(x)

        return x

In [19]:
# Length of the vocabulary in chars
vocab_size = len(index_to_token.keys())

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

model = MyModel(
    vocab_size=len(token_to_index.keys()),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

In [20]:
@tf.function
def train_step(model, train_ds, loss_function, optimizer, train_loss_metric):
    '''
    Training for one epoch.
    '''
    for in_txt, out_txt in train_ds:
        # forward pass with GradientTape
        with tf.GradientTape() as tape:
            prediction = model(in_txt)#, training=True)
            loss = loss_function(out_txt, prediction)
            loss_reg = loss + tf.reduce_sum(model.losses)

        # backward pass via GradienTape (auto-gradient calc)
        gradients = tape.gradient(loss_reg, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        # update metrics
        train_loss_metric.update_state(loss)

In [21]:
import time
import datetime
class Timer():
    """
    A small class for making timings.
    """
    def __init__(self):
        self._start_time = None

    def start(self):
        """
        Start a new timer
        """
        if self._start_time is not None:
            raise TimerError(f"Timer is running. Use .stop() to stop it")

        self._start_time = time.perf_counter()

    def stop(self):
        """
        Stop the timer, and report the elapsed time
        """
        if self._start_time is None:
            print(f"Timer is not running. Use .start() to start it")
            return 0
    
        elapsed_time = time.perf_counter() - self._start_time
        self._start_time = None
        return elapsed_time  

In [22]:
epochs = 25
learning_rate = 0.0005

tf.keras.backend.clear_session()
timer = Timer()

model = MyModel(
    vocab_size=len(token_to_index.keys()),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

loss_function = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam(learning_rate)

# prepare metrics
train_loss_metric = tf.keras.metrics.Mean('train_loss')

# Initialize lists for later visualization.
train_losses = []
times = []

In [23]:
# prepare metrics
train_loss_metric = tf.keras.metrics.Mean('train_loss')

# initialize the logger for Tensorboard visualization
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = 'logs/gradient_tape/' + current_time + '/train_ResNet'      # defining the log dir

train_summary_writer = tf.summary.create_file_writer(train_log_dir)  # training logger

# Initialize lists for later visualization.
train_losses = []
times = []

In [24]:
# Resetting train metrics
train_loss_metric.reset_states()

for epoch in range(epochs):
    print(f'\n[EPOCH] ____________________{epoch}____________________')
    
    # training step with metrics update--------------------------------------------------------
    timer.start()

    train_step(model, dataset, loss_function, optimizer, train_loss_metric)

    # Evaluating training metrics
    train_loss = train_loss_metric.result()
    
    with train_summary_writer.as_default():     # logging our metrics to a file which is used by tensorboard
        tf.summary.scalar('loss', train_loss, step=epoch)

    train_losses.append(train_loss)
    
    elapsed_time = timer.stop()
    times.append(elapsed_time)
    
    print(f'[{epoch}] - Finished Epoch in {elapsed_time:0.2f} seconds - train_loss: {train_loss:0.4f}')

    
    # Resetting train and validation metrics-----------------------------------------------------
    train_loss_metric.reset_states()
    
    elapsed_time = timer.stop()
    times.append(elapsed_time)
  
    if epoch%3 == 0:
        print(f'\n[INFO] - Total time elapsed: {np.sum(times)/60:0.4f} min. Total time remaining: {(np.sum(times)/(epoch+1))*(epochs-epoch-1)/60:0.4f} min.')

print(f'[INFO] - Total run time: {np.sum(times)/60:0.4f} min.')


[EPOCH] ____________________0____________________
[0] - Finished Epoch in 10.88 seconds - train_loss: 3.7197
Timer is not running. Use .start() to start it

[INFO] - Total time elapsed: 0.1813 min. Total time remaining: 4.3509 min.

[EPOCH] ____________________1____________________
[1] - Finished Epoch in 10.14 seconds - train_loss: 2.8418
Timer is not running. Use .start() to start it

[EPOCH] ____________________2____________________
[2] - Finished Epoch in 10.18 seconds - train_loss: 2.4782
Timer is not running. Use .start() to start it

[EPOCH] ____________________3____________________
[3] - Finished Epoch in 10.25 seconds - train_loss: 2.3434
Timer is not running. Use .start() to start it

[INFO] - Total time elapsed: 0.6908 min. Total time remaining: 3.6269 min.

[EPOCH] ____________________4____________________
[4] - Finished Epoch in 10.25 seconds - train_loss: 2.2708
Timer is not running. Use .start() to start it

[EPOCH] ____________________5____________________
[5] - Finish

KeyboardInterrupt: 

In [None]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

In [68]:
print("Input:\n", idx_char(input_example_batch[0]))
print("\n \n Next Char Predictions:\n", idx_char(sampled_indices))

Input:
 ['a' 't' 'e' ' ' 'o' 'f' '\n' 'h' 'a' 'n' 'g' 'i' 'n' 'g' ',' ' ' 'o' 'r'
 ' ' 'o' 'f' ' ' 's' 'o' 'm' 'e' ' ' 'd' 'e' 'a' 't' 'h' ' ' 'm' 'o' 'r'
 'e' ' ' 'l' 'o' 'n' 'g' ' ' 'i' 'n' '\n' 's' 'p' 'e' 'c' 't' 'a' 't' 'o'
 'r' 's' 'h' 'i' 'p' ',' ' ' 'a' 'n' 'd' ' ' 'c' 'r' 'u' 'e' 'l' 'l' 'e'
 'r' ' ' 'i' 'n' ' ' 's' 'u' 'f' 'f' 'e' 'r' 'i' 'n' 'g' ';' ' ' 'b' 'e'
 'h' 'o' 'l' 'd' ' ' 'n' 'o' 'w' '\n' 'p']

Next Char Predictions:
 ['H' 'X' 'h' 'Y' 'X' 'D' 'x' 'H' 'U' 'N' ':' '\n' 'Z' 'k' 'u' 'G' 'E' 'h'
 '&' 'U' '\n' 'k' 'B' 'j' '?' 'm' 'S' '3' 'l' 'd' 'e' 'c' 'O' 'D' '-' 'I'
 ':' 'Z' 'c' 'T' 'I' 'u' 'q' 't' "'" 'v' 'x' 'w' 'o' 'R' 'j' 'Z' 'W' 'o'
 'I' 'g' '.' 'G' 'G' 'f' ':' 'E' '\n' 'Y' '-' 'J' 'r' '\n' 'M' 'w' 'F' 'H'
 'C' 'c' 'y' 'Y' 'r' 'h' 'L' 'L' '!' '3' '!' 'l' 'w' 'E' 'N' 'w' ':' 'E'
 'w' 'W' '!' 't' 'E' 'X' 'y' '-' 'X' 'V']


In [None]:
def generate_next(input_txt, model, temperature, states = None):
    
    predicted_logits = model(inputs = input_txt, states = states)
    predicted_logits = predicted_logits[:, -1, :] # last predicted character
    predicted_logits = tf.nn.softmax(predicted_logits)
    predicted_logits = predicted_logits/temperature

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.chars_from_ids(predicted_ids)
    
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = idx_char(predicted_ids)

    # Return the characters and model state.
    return predicted_chars, states