In [51]:
%config Completer.use_jedi = False
import tensorflow as tf
import numpy as np
import os

In [52]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [53]:
# Read, then decode for py2 compat.
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print('Length of text: {} characters'.format(len(text)))

Length of text: 1115394 characters


In [54]:
# The unique characters in the file
vocab = sorted(set(text))
print('{} unique characters'.format(len(vocab)))

65 unique characters


# prepare the text (sequence of unique indices instead of characters)
# Data pipeline

In [55]:
def get_dictionaries(text):
    """
    Takes a text and maps its character vocabulary to unique indices and also outputs the reverse mapping
    """
    vocab = np.array(list(set(text)))
    token_to_index = {token_type: i for i, token_type in enumerate(vocab)}
    index_to_token = {v: k for k, v in token_to_index.items()}
    
    return token_to_index, index_to_token

In [56]:
token_to_index, index_to_token = get_dictionaries(text)


def char_idx(txt, dictionary = token_to_index):
    return np.vectorize(dictionary.get)(txt)

def idx_char(idx_txt, dictionary = index_to_token):
    return np.vectorize(dictionary.get)(idx_txt)

In [57]:
idx_char(tf.constant(np.array([0,1,4,2])))

array(['3', 'p', 'A', 'l'], dtype='<U1')

In [58]:
text_np = np.array(list(text))

text_indices = char_idx(text_np)

dataset = tf.data.Dataset.from_tensor_slices(text_indices)

In [59]:
for i in dataset.take(10):
    print(idx_char(i))

F
i
r
s
t
 
C
i
t
i


In [60]:
# batching
seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)

dataset = dataset.batch(seq_length+1, drop_remainder=True)

for seq in dataset.take(1):
    print(idx_char(seq))

['F' 'i' 'r' 's' 't' ' ' 'C' 'i' 't' 'i' 'z' 'e' 'n' ':' '\n' 'B' 'e' 'f'
 'o' 'r' 'e' ' ' 'w' 'e' ' ' 'p' 'r' 'o' 'c' 'e' 'e' 'd' ' ' 'a' 'n' 'y'
 ' ' 'f' 'u' 'r' 't' 'h' 'e' 'r' ',' ' ' 'h' 'e' 'a' 'r' ' ' 'm' 'e' ' '
 's' 'p' 'e' 'a' 'k' '.' '\n' '\n' 'A' 'l' 'l' ':' '\n' 'S' 'p' 'e' 'a'
 'k' ',' ' ' 's' 'p' 'e' 'a' 'k' '.' '\n' '\n' 'F' 'i' 'r' 's' 't' ' ' 'C'
 'i' 't' 'i' 'z' 'e' 'n' ':' '\n' 'Y' 'o' 'u' ' ']


In [61]:
dataset = dataset.map(lambda x: (x[:-1],x[1:]))

In [62]:
for input_example, target_example in  dataset.take(1):
    print("Input :", idx_char(input_example))
    print("Target:", idx_char(target_example))

Input : ['F' 'i' 'r' 's' 't' ' ' 'C' 'i' 't' 'i' 'z' 'e' 'n' ':' '\n' 'B' 'e' 'f'
 'o' 'r' 'e' ' ' 'w' 'e' ' ' 'p' 'r' 'o' 'c' 'e' 'e' 'd' ' ' 'a' 'n' 'y'
 ' ' 'f' 'u' 'r' 't' 'h' 'e' 'r' ',' ' ' 'h' 'e' 'a' 'r' ' ' 'm' 'e' ' '
 's' 'p' 'e' 'a' 'k' '.' '\n' '\n' 'A' 'l' 'l' ':' '\n' 'S' 'p' 'e' 'a'
 'k' ',' ' ' 's' 'p' 'e' 'a' 'k' '.' '\n' '\n' 'F' 'i' 'r' 's' 't' ' ' 'C'
 'i' 't' 'i' 'z' 'e' 'n' ':' '\n' 'Y' 'o' 'u']
Target: ['i' 'r' 's' 't' ' ' 'C' 'i' 't' 'i' 'z' 'e' 'n' ':' '\n' 'B' 'e' 'f' 'o'
 'r' 'e' ' ' 'w' 'e' ' ' 'p' 'r' 'o' 'c' 'e' 'e' 'd' ' ' 'a' 'n' 'y' ' '
 'f' 'u' 'r' 't' 'h' 'e' 'r' ',' ' ' 'h' 'e' 'a' 'r' ' ' 'm' 'e' ' ' 's'
 'p' 'e' 'a' 'k' '.' '\n' '\n' 'A' 'l' 'l' ':' '\n' 'S' 'p' 'e' 'a' 'k'
 ',' ' ' 's' 'p' 'e' 'a' 'k' '.' '\n' '\n' 'F' 'i' 'r' 's' 't' ' ' 'C' 'i'
 't' 'i' 'z' 'e' 'n' ':' '\n' 'Y' 'o' 'u' ' ']


In [77]:
# join ragged tensor back to full string
#tf.strings.reduce_join(chars, axis=-1).numpy()

In [64]:
class Simple_RNN_CELL(tf.keras.layers.Layer):
    def __init__(self, hidden_dim):
        super(Simple_RNN_CELL, self).__init__()
        self.hidden_dim = hidden_dim
        
        self.dense = tf.keras.layers.Dense(hidden_dim)
        self.act = tf.keras.layers.Activation(tf.nn.tanh)
    
    def call(self, x, state):
        hidden_state = state
        concat_input = tf.concat((x, hidden_state), axis=-1)
        
        out = self.dense(concat_input)
        act_out = self.act(out)

        return out

In [65]:
class RNN(tf.keras.models.Model):
    def __init__(self,context):
        super(Text_Gen, self).__init__()
        
        self.units = context
        self.cell = Simple_RNN_CELL(context)
        
        self.h_t = tf.Variable()
        
    def call(self,state):
        
        seq_len = tf.shape(x)[1]
        # Tensor Array only needed in graph mode
        outs = tf.TensorArray(dtype=tf.float32, size=seq_len, clear_after_read=True)

        for t in tf.range(seq_len):
            t_out, states = self.cell(x[:,t,:], state)
            outs = outs.write(t, t_out)
        out = outs.stack()
        out = tf.transpose(out, perm=[1,0,2])
        return out

    def zero_state(self, batch_size):
        return (tf.zeros((batch_size, self.cell.units)), tf.zeros((batch_size, self.cell.units)))

In [66]:
########    MODEL TO USE/REFINE (SELF-MADE SIMPLE RNN)


class MyModel(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim):
        super(my_model, self).__init__()
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        
        self.rnn_cell = Simple_RNN_CELL(1024)
        self.rnn = RNN(self.rnn_cell)
        self.out=tf.keras.layers.Dense(vocab_size)

    def call(self, x):
        batch_size = tf.shape(x)[0]
        x = self.embedding(x)
        
        zero_state = self.rnn.zero_state(batch_size)
        x = self.rnn(x, zero_state)
        x = self.out(x)
        
        return x

In [68]:
###### GRU NET THAT ALREADY WORKS


class MyModel(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, rnn_units):
        super().__init__(self)
        
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        
        
        
        self.gru = tf.keras.layers.GRU(rnn_units,
                                       return_sequences=True, 
                                       return_state=True)
        
        
        self.dense = tf.keras.layers.Dense(vocab_size)

        
        
    def call(self, inputs, states=None, return_state=False, training=False):
        x = inputs
        x = self.embedding(x, training=training)
        if states is None:
            states = self.gru.get_initial_state(x)
        x, states = self.gru(x, initial_state=states, training=training)
        x = self.dense(x, training=training)
        
        if return_state:
            return x, states
        else: 
            return x

In [69]:
# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

dataset

<PrefetchDataset shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

In [70]:
# Length of the vocabulary in chars
vocab_size = len(index_to_token.keys())

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

model = MyModel(
    vocab_size=len(token_to_index.keys()),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

In [72]:
@tf.function
def train_step(model, train_ds, loss_function, optimizer, train_loss_metric):
    '''
    Training for one epoch.
    '''
    for in_txt, out_txt in train_ds:
        # forward pass with GradientTape
        with tf.GradientTape() as tape:
            prediction = model(in_txt)#, training=True)
            loss = loss_function(out_txt, prediction)
            loss_reg = loss + tf.reduce_sum(model.losses)

        # backward pass via GradienTape (auto-gradient calc)
        gradients = tape.gradient(loss_reg, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        # update metrics
        train_loss_metric.update_state(loss)

In [73]:
import time
import datetime
class Timer():
    """
    A small class for making timings.
    """
    def __init__(self):
        self._start_time = None

    def start(self):
        """
        Start a new timer
        """
        if self._start_time is not None:
            raise TimerError(f"Timer is running. Use .stop() to stop it")

        self._start_time = time.perf_counter()

    def stop(self):
        """
        Stop the timer, and report the elapsed time
        """
        if self._start_time is None:
            print(f"Timer is not running. Use .start() to start it")
            return 0
    
        elapsed_time = time.perf_counter() - self._start_time
        self._start_time = None
        return elapsed_time  

In [74]:
epochs = 25
learning_rate = 0.0005

tf.keras.backend.clear_session()
timer = Timer()

model = MyModel(
    vocab_size=len(token_to_index.keys()),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

loss_function = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam(learning_rate)

# prepare metrics
train_loss_metric = tf.keras.metrics.Mean('train_loss')

# Initialize lists for later visualization.
train_losses = []
times = []

In [75]:
# prepare metrics
train_loss_metric = tf.keras.metrics.Mean('train_loss')

# initialize the logger for Tensorboard visualization
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = 'logs/gradient_tape/' + current_time + '/train_ResNet'      # defining the log dir

train_summary_writer = tf.summary.create_file_writer(train_log_dir)  # training logger

# Initialize lists for later visualization.
train_losses = []
times = []

In [76]:
# Resetting train metrics
train_loss_metric.reset_states()

for epoch in range(epochs):
    print(f'\n[EPOCH] ____________________{epoch}____________________')
    
    # training step with metrics update--------------------------------------------------------
    timer.start()

    train_step(model, dataset, loss_function, optimizer, train_loss_metric)

    # Evaluating training metrics
    train_loss = train_loss_metric.result()
    
    with train_summary_writer.as_default():     # logging our metrics to a file which is used by tensorboard
        tf.summary.scalar('loss', train_loss, step=epoch)

    train_losses.append(train_loss)
    
    elapsed_time = timer.stop()
    times.append(elapsed_time)
    
    print(f'[{epoch}] - Finished Epoch in {elapsed_time:0.2f} seconds - train_loss: {train_loss:0.4f}')

    
    # Resetting train and validation metrics-----------------------------------------------------
    train_loss_metric.reset_states()
    
    elapsed_time = timer.stop()
    times.append(elapsed_time)
  
    if epoch%3 == 0:
        print(f'\n[INFO] - Total time elapsed: {np.sum(times)/60:0.4f} min. Total time remaining: {(np.sum(times)/(epoch+1))*(epochs-epoch-1)/60:0.4f} min.')

print(f'[INFO] - Total run time: {np.sum(times)/60:0.4f} min.')


[EPOCH] ____________________0____________________
[0] - Finished Epoch in 27.31 seconds - train_loss: 3.0321
Timer is not running. Use .start() to start it

[INFO] - Total time elapsed: 0.4552 min. Total time remaining: 10.9258 min.

[EPOCH] ____________________1____________________
[1] - Finished Epoch in 25.65 seconds - train_loss: 2.2520
Timer is not running. Use .start() to start it

[EPOCH] ____________________2____________________
[2] - Finished Epoch in 26.45 seconds - train_loss: 2.0167
Timer is not running. Use .start() to start it

[EPOCH] ____________________3____________________
[3] - Finished Epoch in 27.39 seconds - train_loss: 1.8318
Timer is not running. Use .start() to start it

[INFO] - Total time elapsed: 1.7800 min. Total time remaining: 9.3448 min.

[EPOCH] ____________________4____________________
[4] - Finished Epoch in 27.55 seconds - train_loss: 1.6929
Timer is not running. Use .start() to start it

[EPOCH] ____________________5____________________
[5] - Finis

KeyboardInterrupt: 

In [65]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 65) # (batch_size, sequence_length, vocab_size)


In [67]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()


In [68]:
print("Input:\n", idx_char(input_example_batch[0]))
print()
print("Next Char Predictions:\n", idx_char(sampled_indices))


Input:
 ['a' 't' 'e' ' ' 'o' 'f' '\n' 'h' 'a' 'n' 'g' 'i' 'n' 'g' ',' ' ' 'o' 'r'
 ' ' 'o' 'f' ' ' 's' 'o' 'm' 'e' ' ' 'd' 'e' 'a' 't' 'h' ' ' 'm' 'o' 'r'
 'e' ' ' 'l' 'o' 'n' 'g' ' ' 'i' 'n' '\n' 's' 'p' 'e' 'c' 't' 'a' 't' 'o'
 'r' 's' 'h' 'i' 'p' ',' ' ' 'a' 'n' 'd' ' ' 'c' 'r' 'u' 'e' 'l' 'l' 'e'
 'r' ' ' 'i' 'n' ' ' 's' 'u' 'f' 'f' 'e' 'r' 'i' 'n' 'g' ';' ' ' 'b' 'e'
 'h' 'o' 'l' 'd' ' ' 'n' 'o' 'w' '\n' 'p']

Next Char Predictions:
 ['H' 'X' 'h' 'Y' 'X' 'D' 'x' 'H' 'U' 'N' ':' '\n' 'Z' 'k' 'u' 'G' 'E' 'h'
 '&' 'U' '\n' 'k' 'B' 'j' '?' 'm' 'S' '3' 'l' 'd' 'e' 'c' 'O' 'D' '-' 'I'
 ':' 'Z' 'c' 'T' 'I' 'u' 'q' 't' "'" 'v' 'x' 'w' 'o' 'R' 'j' 'Z' 'W' 'o'
 'I' 'g' '.' 'G' 'G' 'f' ':' 'E' '\n' 'Y' '-' 'J' 'r' '\n' 'M' 'w' 'F' 'H'
 'C' 'c' 'y' 'Y' 'r' 'h' 'L' 'L' '!' '3' '!' 'l' 'w' 'E' 'N' 'w' ':' 'E'
 'w' 'W' '!' 't' 'E' 'X' 'y' '-' 'X' 'V']


In [69]:
example_batch_loss = loss(target_example_batch, example_batch_predictions)
mean_loss = example_batch_loss.numpy().mean()
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", mean_loss)


Prediction shape:  (64, 100, 65)  # (batch_size, sequence_length, vocab_size)
Mean loss:         4.174371


In [70]:
model.compile(optimizer='adam', loss=loss)


In [72]:
EPOCHS = 20
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [78]:
class OneStep(tf.keras.Model):
    def __init__(self, model, chars_from_ids, ids_from_chars, vocab_length, temperature=1.0):
        super().__init__()
        
        self.temperature=temperature
        self.model = model
        self.chars_from_ids = chars_from_ids
        self.ids_from_chars = ids_from_chars

        # Create a mask to prevent "" or "[UNK]" from being generated.
        skip_ids = self.ids_from_chars(['','[UNK]'])[:, None]
        
        sparse_mask = tf.SparseTensor(
            # Put a -inf at each bad index.
            values=[-float('inf')]*len(skip_ids),
            indices = skip_ids,
            # Match the shape to the vocabulary
            dense_shape=[vocab_length]) 
        self.prediction_mask = tf.sparse.to_dense(sparse_mask)

    #@tf.function
    def generate_one_step(self, inputs, states=None):
        # Convert strings to token IDs.
        input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
        input_ids = self.ids_from_chars(input_chars).to_tensor()

        # Run the model.
        # predicted_logits.shape is [batch, char, next_char_logits] 
        predicted_logits, states =  self.model(inputs=input_ids, states=states, 
                                              return_state=True)
        # Only use the last prediction.
        predicted_logits = predicted_logits[:, -1, :]
        predicted_logits = predicted_logits/self.temperature
        # Apply the prediction mask: prevent "" or "[UNK]" from being generated.
        predicted_logits = predicted_logits + self.prediction_mask

        # Sample the output logits to generate token IDs.
        predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
        predicted_ids = tf.squeeze(predicted_ids, axis=-1)

        # Convert from token ids to characters
        predicted_chars = self.chars_from_ids(predicted_ids)

        # Return the characters and model state.
        return predicted_chars, states

In [None]:
def generate_next(input_txt, model, temperature):
    
    

In [None]:
# Convert strings to token IDs.
        input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
        input_ids = self.ids_from_chars(input_chars).to_tensor()

        # Run the model.
        # predicted_logits.shape is [batch, char, next_char_logits] 
        predicted_logits, states =  self.model(inputs=input_ids, states=states, 
                                              return_state=True)
        # Only use the last prediction.
        predicted_logits = predicted_logits[:, -1, :]
        predicted_logits = predicted_logits/self.temperature
        # Apply the prediction mask: prevent "" or "[UNK]" from being generated.
        predicted_logits = predicted_logits + self.prediction_mask

        # Sample the output logits to generate token IDs.
        predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
        predicted_ids = tf.squeeze(predicted_ids, axis=-1)

        # Convert from token ids to characters
        predicted_chars = self.chars_from_ids(predicted_ids)

        # Return the characters and model state.
        return predicted_chars, states

In [79]:
vocab_length = len(index_to_token.keys())
one_step_model = OneStep(model, idx_char, char_idx, vocab_length)

TypeError: int() argument must be a string, a bytes-like object or a number, not 'NoneType'

In [None]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(1000):
    next_char, states = one_step_model.generate_one_step(next_char, states=states)
    result.append(next_char)

result = tf.strings.join(result)
end = time.time()

print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)

print(f"\nRun time: {end - start}")