<a href="https://colab.research.google.com/github/Spinkk/Implementing-ANNs-with-Tensorflow/blob/main/HW10_Janosch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import string
import time
import datetime
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
from sklearn.neighbors import NearestNeighbors
import random

# 1. The Dataset

Load the dataset, create dictionaries and separate each word

In [2]:
def preprocess_strings(ds, sentence_wise=True):
    # make numpy string array from tfds    
    tfds_to_numpy = lambda x: next(iter(x))['text'].numpy()
    ds = tfds_to_numpy(ds).decode()                             
    
    # make list of just words
    ds_words = ds.lower().replace('\n', ' ').translate({ord("'"): None})
    exclude = string.punctuation.translate({ord("'"): None})
    table = ds_words.maketrans(exclude, ' '*len(exclude))                   
    ds_words = np.array(ds_words.translate(table).split())
    
    # creates two lookup tables, val->id and id->val
    val_to_id = {val: i for i, val in enumerate(sorted(set(ds_words)))}    
    id_to_val = {id_: val for val, id_ in val_to_id.items()}
    vocab_size = len(val_to_id)
    
    # define occurances of each token
    word_freq = [np.count_nonzero(ds_words==val) for _, val in id_to_val.items()]

    # create a list of words split into sentences
    if sentence_wise: 
        ds = ds.lower().replace('\n', ' ').translate({ord("'"): None})
        exclude = string.punctuation.translate({ord("'"): None, ord('.'): None})
        table = ds.maketrans(exclude, ' '*len(exclude))
        ds = ' '.join(ds.translate(table).split()).split('.')
        ds = [sentence.translate({ord("."): None}).split() for sentence in ds]        
        
        ds = [[val_to_id[word] for word in sentence] for sentence in ds]
        
    # use list of words
    else:
        ds = [val_to_id[word] for words in ds_words]
    
    return ds, val_to_id, id_to_val, word_freq, vocab_size

In [3]:
train_ds = tfds.load(name='tiny_shakespeare',
                    shuffle_files=False, 
                    split='train')

ds, val_to_id, id_to_val, word_freq, vocab_size = preprocess_strings(train_ds)

In [4]:
s = 0.005
prob = lambda freq : (np.sqrt((freq/s))+1)*(s/freq)

def gen_word_embeddings():    
    while True:
        np.random.shuffle(ds)      
        
        # for each sentence generate one target and make input, target pairs from leftover words within sentence
        for sentence in ds:
            if len(sentence) == 0: continue
            
            word_id = np.random.randint(0, len(sentence))
            word = sentence[word_id]
            
            context_window = sentence[word_id-2:word_id] + sentence[word_id+1:word_id+3]
            np.random.shuffle(context_window)
            
            for target in context_window:
                # subsampling
                if random.random() < prob(word_freq[target]): 
                    continue
                yield word, target
                
gen = gen_word_embeddings()
    
train_ds = tf.data.Dataset.from_generator(gen_word_embeddings,
                               output_signature=(tf.TensorSpec(shape=(), dtype=tf.int64),
                                                 tf.TensorSpec(shape=(), dtype=tf.int64)))
    
train_ds = train_ds.batch(32).prefetch(tf.data.AUTOTUNE)

# 2.2 Model

In [5]:
class SkipGram(tf.keras.layers.Layer):
    def __init__(self, num_vocabulary, embedding_dim=64, num_negative_samples=100, word_frequencies=word_freq):
        super(SkipGram, self).__init__()
        self.v = num_vocabulary
        self.h = embedding_dim
        self.num_neg = num_negative_samples
        self.word_freq = word_frequencies
    
    def build(self,_):
        self.embedding_mat = self.add_weight(shape=(self.v, self.h),
                                             initializer="random_normal",
                                             trainable=True) 
        self.output_mat = self.add_weight(shape=(self.v, self.h),
                                          initializer="random_normal",
                                          trainable=True) 
        self.output_bias = self.add_weight(shape=(self.v,),
                                           initializer="random_normal",
                                           trainable=True)

    def call(self, input_id, target_id):
        # (batch,h) = from (v,h) select 'batch_num' v* by lookup
        embedding_vec = tf.nn.embedding_lookup(self.embedding_mat, input_id)
        
        # output indices
        true_classes = tf.expand_dims(target_id, axis=1) # (batch,1)
        # used for negative sampling based on word frequencies
        negative_sample_dist = tf.random.fixed_unigram_candidate_sampler(true_classes=true_classes,
                                                                         num_true=1,
                                                                         num_sampled=self.num_neg,
                                                                         unique=False,
                                                                         range_max=self.v,
                                                                         unigrams=self.word_freq)
        # compute score vector, softmax of it and loss in one function call
        loss = tf.nn.nce_loss(weights=self.output_mat,  # (v,h)
                              biases=self.output_bias,  # (v,)
                              labels=true_classes,  # (batch,1)
                              inputs=embedding_vec,  # (batch,h)
                              num_sampled=self.num_neg,
                              num_classes=self.v,
                              sampled_values = negative_sample_dist)  
        return tf.math.reduce_mean(loss)  # average over loss of each sample

    def embedding(self, input_id):
        return tf.nn.embedding_lookup(self.embedding_mat, input_id)

# 2.3 Training

In [6]:
# @tf.function
def train_step(model, input_batch, target_batch, optimizer):
    '''
    Training for one batch
    '''

    with tf.GradientTape() as tape:
        loss = model(input_batch, target_batch)  # call directly returns the loss
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        return loss

def nearest_neighbours(model, num_vocab, selected_word_id, val_dict, k=5):
    '''
    For selected words, find out k neighbouring words and print them
    '''

    cosine_similarity = lambda x,y : np.dot(x,y) / (np.linalg.norm(x) * np.linalg.norm(y))

    # embeddings of words
    embedding_selected_word = skipgram.embedding(tf.constant(selected_word_id))
    embedding_every_word = skipgram.embedding(tf.constant(list(range(num_vocab))))

    # fit nearest neighbours using cosine similarity and embeddings of all words
    nbrs = NearestNeighbors(n_neighbors=k, algorithm='ball_tree', metric=cosine_similarity)
    nbrs.fit(embedding_every_word)
    # find k_nearest nbrs of selected words. dim:(num_selected, k)
    id_nbrs = nbrs.kneighbors(embedding_selected_word, n_neighbors=k, return_distance=False)
    
    # print neighbours in words instead of id
    for i, sel_w_id in enumerate(selected_word_id):
        query_w = val_dict[sel_w_id]
        neigh_w = []
        for j in range(k):
            neigh_w.append(val_dict[id_nbrs[i,j]])
        print('{} {} most similar words: {}'.format(query_w, k, neigh_w))

In [7]:
learning_rate = 0.001

# define model
skipgram = SkipGram(num_vocabulary=vocab_size)

# define optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate)

# initialize the logger for Tensorboard visualization
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = 'logs/' + current_time + '/train'    
train_summary_writer = tf.summary.create_file_writer(train_log_dir)  

In [8]:
epochs = 50
selected_words = [val_to_id[word] for word in ['queen', 'throne', 'wine', 'poison', 'love', 'strong', 'day', 'the']]

for epoch in range(epochs):
    print('\nEpoch: ', epoch)
    ### Training step
    train_losses = []  # each entry is averaged loss of each batch
    
    # train over all batches
    for input_batch, target_batch in train_ds.take(1000):
        train_losses.append(train_step(skipgram, input_batch, target_batch, optimizer))
    
    # log train loss
    with train_summary_writer.as_default():  
        tf.summary.scalar('loss', np.mean(train_losses), step=epoch)

    ### Nearest neighbours to check embeddings
    nearest_neighbours(skipgram, vocab_size, selected_words, id_to_val)


Epoch:  0
queen 5 most similar words: ['freezes', 'lot', 'marted', 'shivering', 'dismayd']
throne 5 most similar words: ['amerce', 'functions', 'gap', 'enacts', 'mab']
wine 5 most similar words: ['baits', 'complainings', 'counterpoised', 'marted', 'dismayd']
poison 5 most similar words: ['staffords', 'acre', 'enemys', 'spray', 'lot']
love 5 most similar words: ['freezes', 'lot', 'shivering', 'marted', 'pass']
strong 5 most similar words: ['raged', 'ribbons', 'innovator', 'functions', 'complainings']
day 5 most similar words: ['freezes', 'dismayd', 'lot', 'smock', 'marted']
the 5 most similar words: ['freezes', 'lot', 'dismayd', 'marted', 'shivering']

Epoch:  1
queen 5 most similar words: ['goest', 'petticoat', 'glean', 'biancas', 'savoury']
throne 5 most similar words: ['christopher', 'tilts', 'sprung', 'courses', 'savoury']
wine 5 most similar words: ['glean', 'goest', 'petticoat', 'sprung', 'biancas']
poison 5 most similar words: ['christopher', 'goest', 'misdoubteth', 'glean', 'sh

KeyboardInterrupt: 

In [None]:
%reload_ext tensorboard
%tensorboard --logdir logs/

# 3. Text generator

In [None]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [None]:
# Read, then decode for py2 compat.
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print('Length of text: {} characters'.format(len(text)))

In [None]:
# The unique characters in the file
vocab = sorted(set(text))
print('{} unique characters'.format(len(vocab)))

# prepare the text (sequence of unique indices instead of characters)
# Data pipeline

In [None]:
def get_dictionaries(text):
    """
    Takes a text and maps its character vocabulary to unique indices and also outputs the reverse mapping
    """
    vocab = np.array(list(set(text)))
    token_to_index = {token_type: i for i, token_type in enumerate(vocab)}
    index_to_token = {v: k for k, v in token_to_index.items()}
    
    return token_to_index, index_to_token

In [None]:
token_to_index, index_to_token = get_dictionaries(text)


def char_idx(txt, dictionary = token_to_index):
    return np.vectorize(dictionary.get)(txt)

def idx_char(idx_txt, dictionary = index_to_token):
    return np.vectorize(dictionary.get)(idx_txt)

In [None]:
idx_char(tf.constant(np.array([0,1,4,2])))

In [None]:
text_np = np.array(list(text))

text_indices = char_idx(text_np)

dataset = tf.data.Dataset.from_tensor_slices(text_indices)

In [None]:
# batching
seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)

dataset = dataset.batch(seq_length+1, drop_remainder=True)

for seq in dataset.take(1):
    print(idx_char(seq))

In [None]:
dataset = dataset.map(lambda x: (x[:-1],x[1:]))

In [None]:
for input_example, target_example in  dataset.take(1):
    print("I# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

dataset
1
# Batch size
2
BATCH_SIZE = 64
3
​
4
# Buffer size to shuffle the dataset
5
# (TF data is designed to work with possibly infinite sequences,
6
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
7
# it maintains a buffer in which it shuffles elements).
8
BUFFER_SIZE = 10000
9
​
10
dataset = (
11
    dataset
12
    .shuffle(BUFFER_SIZE)
13
    .batch(BATCH_SIZE, drop_remainder=True)
14
    .prefetch(tf.data.experimental.AUTOTUNE))
15
​
16
dataset
<PrefetchDataset shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>
class Simple_RNN_CELL(tf.keras.layers.Layer):
    
    def __init__(self, hidden_dim):
        super(Simple_RNN_CELL, self).__init__()
        
        self.units = hidden_dim
        self.dense = tf.keras.layers.Dense(hidden_dim)
        #self.act = tf.keras.layers.Activation(tf.nn.tanh)
    
    
    def call(self, x, state):
        
        hidden_state = state
        concat_input = tf.concat((x, hidden_state), axis=-1)
        out = self.dense(concat_input)
        #act_out = self.act(out)
        
        return out
1
class Simple_RNN_CELL(tf.keras.layers.Layer):
2
    
3
    def __init__(self, hidden_dim):
4
        super(Simple_RNN_CELL, self).__init__()
5
        
6
        self.units = hidden_dim
7
        self.dense = tf.keras.layers.Dense(hidden_dim)
8
        #self.act = tf.keras.layers.Activation(tf.nn.tanh)
9
    
10
    
11
    def call(self, x, state):
12
        
13
        hidden_state = state
14
        concat_input = tf.concat((x, hidden_state), axis=-1)
15
        out = self.dense(concat_input)
16
        #act_out = self.act(out)
17
        
18
        return out
1
class RNN(tf.keras.models.Model):
2
    def __init__(self,cell,context):
3
        super(RNN, self).__init__()
4
        self.cell = cell
5
        self.units = context
6
​
7
    def call(self,x,state):  
8
        seq_len = tf.shape(x)[1]
9
        # Tensor Array only needed in graph mode
10
        outs = tf.TensorArray(dtype=tf.float32, size=seq_len, clear_after_read=True)
11
​
12
        for t in tf.range(seq_len):
13
            t_out = self.cell(x[:,t,:], state)
14
            outs = outs.write(t, t_out)
15
            state = t_out
16
        out = outs.stack()
17
        out = tf.transpose(out, perm=[1,0,2])
18
        return out
19
​
20
    def zero_state(self, batch_size):
21
        return (tf.zeros((batch_size, self.cell.units)))
1
########    MODEL TO USE/REFINE (SELF-MADE SIMPLE RNN)
2
class MyModel(tf.keras.Model):
3
    def __init__(self, vocab_size, embedding_dim,rnn_units):
4
        super(MyModel, self).__init__()
5
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
6
​
7
        self.rnn_cell = Simple_RNN_CELL(embedding_dim)
8
        self.rnn = RNN(self.rnn_cell, context = 100)
9
        
10
        self.out = tf.keras.layers.Dense(vocab_size)
11
        
12
        #self.sm = tf.keras.layers.Activation(tf.nn.softmax)
13
        
14
    def call(self, x):
15
        batch_size = tf.shape(x)[0]
16
        x = self.embedding(x)
17
        zero_state = self.rnn.zero_state(batch_size)
18
        x = self.rnn(x, zero_state)
19
        x = self.out(x)
20
        #x = self.sm(x)
21
​
22
        return x
1
"""###### GRU NET THAT ALREADY WORKS
2
​
3
class MyModel(tf.keras.Model):
4
    def __init__(self, vocab_size, embedding_dim, rnn_units):
5
        super().__init__(self)
6
        
7
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
8
        
9
        
10
        
11
        self.gru = tf.keras.layers.GRU(rnn_units,
12
                                       return_sequences=True, 
13
                                       return_state=True)
14
        
15
        
16
        self.dense = tf.keras.layers.Dense(vocab_size)
17
        self.act_out = tf.keras.layers.Activation(tf.nn.softmax)
18
​
19
        
20
        
21
    def call(self, inputs, states=None, return_state=False, training=False):
22
        x = inputs
23
        x = self.embedding(x, training=training)
24
        if states is None:
25
            states = self.gru.get_initial_state(x)
26
        x, states = self.gru(x, initial_state=states, training=training)
27
        x = self.dense(x, training=training)
28
        
29
        if return_state:
30
            return x, states
31
        else:
32
            return x"""
'###### GRU NET THAT ALREADY WORKS\n\nclass MyModel(tf.keras.Model):\n    def __init__(self, vocab_size, embedding_dim, rnn_units):\n        super().__init__(self)\n        \n        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)\n        \n        \n        \n        self.gru = tf.keras.layers.GRU(rnn_units,\n                                       return_sequences=True, \n                                       return_state=True)\n        \n        \n        self.dense = tf.keras.layers.Dense(vocab_size)\n        self.act_out = tf.keras.layers.Activation(tf.nn.softmax)\n\n        \n        \n    def call(self, inputs, states=None, return_state=False, training=False):\n        x = inputs\n        x = self.embedding(x, training=training)\n        if states is None:\n            states = self.gru.get_initial_state(x)\n        x, states = self.gru(x, initial_state=states, training=training)\n        x = self.dense(x, training=training)\n        \n        if return_state:\n            return x, states\n        else:\n            return x'
1
# Length of the vocabulary in chars
2
vocab_size = len(index_to_token.keys())
3
​
4
# The embedding dimension
5
embedding_dim = 256
6
​
7
# Number of RNN units
8
rnn_units = 1024
9
​
10
model = MyModel(
11
    vocab_size=len(token_to_index.keys()),
12
    embedding_dim=embedding_dim,
13
    rnn_units=rnn_units)
1
@tf.function
2
def train_step(model, train_ds, loss_function, optimizer, train_loss_metric):
3
    '''
4
    Training for one epoch.
5
    '''
6
    for in_txt, out_txt in train_ds:
7
        # forward pass with GradientTape
8
        with tf.GradientTape() as tape:
9
            prediction = model(in_txt)#, training=True)
10
            loss = loss_function(out_txt, prediction)
11
            loss_reg = loss + tf.reduce_sum(model.losses)
12
​
13
        # backward pass via GradienTape (auto-gradient calc)
14
        gradients = tape.gradient(loss_reg, model.trainable_variables)
15
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
16
​
17
        # update metrics
18
        train_loss_metric.update_state(loss)
1
import time
2
import datetime
3
class Timer():
4
    """
5
    A small class for making timings.
6
    """
7
    def __init__(self):
8
        self._start_time = None
9
​
10
    def start(self):
11
        """
12
        Start a new timer
13
        """
14
        if self._start_time is not None:
15
            raise TimerError(f"Timer is running. Use .stop() to stop it")
16
​
17
        self._start_time = time.perf_counter()
18
​
19
    def stop(self):
20
        """
21
        Stop the timer, and report the elapsed time
22
        """
23
        if self._start_time is None:
24
            print(f"Timer is not running. Use .start() to start it")
25
            return 0
26
    
27
        elapsed_time = time.perf_counter() - self._start_time
28
        self._start_time = None
29
        return elapsed_time  
1
epochs = 25
2
learning_rate = 0.0005
3
​
4
tf.keras.backend.clear_session()
5
timer = Timer()
6
​
7
model = MyModel(
8
    vocab_size=len(token_to_index.keys()),
9
    embedding_dim=embedding_dim,
10
    rnn_units=rnn_units)
11
​
12
loss_function = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
13
optimizer = tf.keras.optimizers.Adam(learning_rate)
14
​
15
# prepare metrics
16
train_loss_metric = tf.keras.metrics.Mean('train_loss')
17
​
18
# Initialize lists for later visualization.
19
train_losses = []
20
times = []
1
# prepare metrics
2
train_loss_metric = tf.keras.metrics.Mean('train_loss')
3
​
4
# initialize the logger for Tensorboard visualization
5
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
6
train_log_dir = 'logs/gradient_tape/' + current_time + '/train_ResNet'      # defining the log dir
7
​
8
train_summary_writer = tf.summary.create_file_writer(train_log_dir)  # training logger
9
​
10
# Initialize lists for later visualization.
11
train_losses = []
12
times = []
1
# Resetting train metrics
2
train_loss_metric.reset_states()
3
​
4
for epoch in range(epochs):
5
    print(f'\n[EPOCH] ____________________{epoch}____________________')
6
    
7
    # training step with metrics update--------------------------------------------------------
8
    timer.start()
9
​
10
    train_step(model, dataset, loss_function, optimizer, train_loss_metric)
11
​
12
    # Evaluating training metrics
13
    train_loss = train_loss_metric.result()
14
    
15
    with train_summary_writer.as_default():     # logging our metrics to a file which is used by tensorboard
16
        tf.summary.scalar('loss', train_loss, step=epoch)
17
​
18
    train_losses.append(train_loss)
19
    
20
    elapsed_time = timer.stop()
21
    times.append(elapsed_time)
22
    
23
    print(f'[{epoch}] - Finished Epoch in {elapsed_time:0.2f} seconds - train_loss: {train_loss:0.4f}')
24
​
25
    
26
    # Resetting train and validation metrics-----------------------------------------------------
27
    train_loss_metric.reset_states()
28
    
29
    elapsed_time = timer.stop()
30
    times.append(elapsed_time)
31
  
32
    if epoch%3 == 0:
33
        print(f'\n[INFO] - Total time elapsed: {np.sum(times)/60:0.4f} min. Total time remaining: {(np.sum(times)/(epoch+1))*(epochs-epoch-1)/60:0.4f} min.')
34
​
35
print(f'[INFO] - Total run time: {np.sum(times)/60:0.4f} min.')

[EPOCH] ____________________0____________________
[0] - Finished Epoch in 10.88 seconds - train_loss: 3.7197
Timer is not running. Use .start() to start it

[INFO] - Total time elapsed: 0.1813 min. Total time remaining: 4.3509 min.

[EPOCH] ____________________1____________________
[1] - Finished Epoch in 10.14 seconds - train_loss: 2.8418
Timer is not running. Use .start() to start it

[EPOCH] ____________________2____________________
[2] - Finished Epoch in 10.18 seconds - train_loss: 2.4782
Timer is not running. Use .start() to start it

[EPOCH] ____________________3____________________
[3] - Finished Epoch in 10.25 seconds - train_loss: 2.3434
Timer is not running. Use .start() to start it

[INFO] - Total time elapsed: 0.6908 min. Total time remaining: 3.6269 min.

[EPOCH] ____________________4____________________
[4] - Finished Epoch in 10.25 seconds - train_loss: 2.2708
Timer is not running. Use .start() to start it

[EPOCH] ____________________5____________________
[5] - Finished Epoch in 10.43 seconds - train_loss: 2.2208
Timer is not running. Use .start() to start it

[EPOCH] ____________________6____________________
[6] - Finished Epoch in 10.99 seconds - train_loss: 2.1844
Timer is not running. Use .start() to start it

[INFO] - Total time elapsed: 1.2188 min. Total time remaining: 3.1339 min.

[EPOCH] ____________________7____________________
[7] - Finished Epoch in 10.28 seconds - train_loss: 2.1573
Timer is not running. Use .start() to start it

[EPOCH] ____________________8____________________
[8] - Finished Epoch in 10.31 seconds - train_loss: 2.1372
Timer is not running. Use .start() to start it

[EPOCH] ____________________9____________________
[9] - Finished Epoch in 10.27 seconds - train_loss: 2.1214
Timer is not running. Use .start() to start it

[INFO] - Total time elapsed: 1.7333 min. Total time remaining: 2.5999 min.

[EPOCH] ____________________10____________________
[10] - Finished Epoch in 10.50 seconds - train_loss: 2.1091
Timer is not running. Use .start() to start it

[EPOCH] ____________________11____________________
[11] - Finished Epoch in 10.26 seconds - train_loss: 2.0989
Timer is not running. Use .start() to start it

[EPOCH] ____________________12____________________
[12] - Finished Epoch in 10.13 seconds - train_loss: 2.0903
Timer is not running. Use .start() to start it

[INFO] - Total time elapsed: 2.2482 min. Total time remaining: 2.0753 min.

[EPOCH] ____________________13____________________
[13] - Finished Epoch in 10.22 seconds - train_loss: 2.0835
Timer is not running. Use .start() to start it

[EPOCH] ____________________14____________________
1
for input_example_batch, target_example_batch in dataset.take(1):
2
    example_batch_predictions = model(input_example_batch)
3
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")
1
print("Input:\n", idx_char(input_example_batch[0]))
2
print("\n \n Next Char Predictions:\n", idx_char(sampled_indices))
Input:
 ['a' 't' 'e' ' ' 'o' 'f' '\n' 'h' 'a' 'n' 'g' 'i' 'n' 'g' ',' ' ' 'o' 'r'
 ' ' 'o' 'f' ' ' 's' 'o' 'm' 'e' ' ' 'd' 'e' 'a' 't' 'h' ' ' 'm' 'o' 'r'
 'e' ' ' 'l' 'o' 'n' 'g' ' ' 'i' 'n' '\n' 's' 'p' 'e' 'c' 't' 'a' 't' 'o'
 'r' 's' 'h' 'i' 'p' ',' ' ' 'a' 'n' 'd' ' ' 'c' 'r' 'u' 'e' 'l' 'l' 'e'
 'r' ' ' 'i' 'n' ' ' 's' 'u' 'f' 'f' 'e' 'r' 'i' 'n' 'g' ';' ' ' 'b' 'e'
 'h' 'o' 'l' 'd' ' ' 'n' 'o' 'w' '\n' 'p']

Next Char Predictions:
 ['H' 'X' 'h' 'Y' 'X' 'D' 'x' 'H' 'U' 'N' ':' '\n' 'Z' 'k' 'u' 'G' 'E' 'h'
 '&' 'U' '\n' 'k' 'B' 'j' '?' 'm' 'S' '3' 'l' 'd' 'e' 'c' 'O' 'D' '-' 'I'
 ':' 'Z' 'c' 'T' 'I' 'u' 'q' 't' "'" 'v' 'x' 'w' 'o' 'R' 'j' 'Z' 'W' 'o'
 'I' 'g' '.' 'G' 'G' 'f' ':' 'E' '\n' 'Y' '-' 'J' 'r' '\n' 'M' 'w' 'F' 'H'
 'C' 'c' 'y' 'Y' 'r' 'h' 'L' 'L' '!' '3' '!' 'l' 'w' 'E' 'N' 'w' ':' 'E'
 'w' 'W' '!' 't' 'E' 'X' 'y' '-' 'X' 'V']
1
def generate_next(input_txt, model, temperature, states = None):
2
    
3
    predicted_logits = model(inputs = input_txt, states = states)
4
    predicted_logits = predicted_logits[:, -1, :] # last predicted character
5
    predicted_logits = tf.nn.softmax(predicted_logits)
6
    predicted_logits = predicted_logits/temperature
7
​
8
    # Sample the output logits to generate token IDs.
9
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
10
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)
11
​
12
    # Convert from token ids to characters
13
    predicted_chars = self.chars_from_ids(predicted_ids)
14
    
15
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
16
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)
17
​
18
    # Convert from token ids to characters
19
    predicted_chars = idx_char(predicted_ids)
20
​
21
    # Return the characters and model state.
22
    return predicted_chars, states
1
# Convert strings to token IDs.
2
        input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
3
        input_ids = self.ids_from_chars(input_chars).to_tensor()
4
​
5
        # Run the model.
6
        # predicted_logits.shape is [batch, char, next_char_logits] 
7
        predicted_logits, states =  self.model(inputs=input_ids, states=states, 
8
                                              return_state=True)
9
        # Only use the last prediction.
10
        predicted_logits = predicted_logits[:, -1, :]
11
        predicted_logits = predicted_logits/self.temperature
12
        # Apply the prediction mask: prevent "" or "[UNK]" from being generated.
13
        predicted_logits = predicted_logits + self.prediction_mask
14
​
15
        # Sample the output logits to generate token IDs.
16
        predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
17
        predicted_ids = tf.squeeze(predicted_ids, axis=-1)
18
​
19
        # Convert from token ids to characters
20
        predicted_chars = self.chars_from_ids(predicted_ids)
21
​
22
        # Return the characters and model state.
23
        return predicted_chars, states
1
vocab_length = len(index_to_token.keys())
2
one_step_model = OneStep(model, idx_char, char_idx, vocab_length)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-79-d316811ce995> in <module>
      1 vocab_length = len(index_to_token.keys())
----> 2 one_step_model = OneStep(model, idx_char, char_idx, vocab_length)

<ipython-input-78-ce1719973984> in __init__(self, model, chars_from_ids, ids_from_chars, vocab_length, temperature)
      9         # Create a mask to prevent "" or "[UNK]" from being generated.
     10         skip_ids = self.ids_from_chars(['','[UNK]'])[:, None]
---> 11         sparse_mask = tf.SparseTensor(
     12             # Put a -inf at each bad index.
     13             values=[-float('inf')]*len(skip_ids),

~/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/framework/sparse_tensor.py in __init__(self, indices, values, dense_shape)
    128     """
    129     with ops.name_scope(None, "SparseTensor", [indices, values, dense_shape]):
--> 130       indices = ops.convert_to_tensor(
    131           indices, name="indices", dtype=dtypes.int64)
    132       # TODO(touts): Consider adding mutable_values() when 'values'

~/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/framework/ops.py in convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, dtype_hint, ctx, accepted_result_types)
   1339 
   1340     if ret is None:
-> 1341       ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
   1342 
   1343     if ret is NotImplemented:

~/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/framework/tensor_conversion_registry.py in _default_conversion_function(***failed resolving arguments***)
     50 def _default_conversion_function(value, dtype, name, as_ref):
     51   del as_ref  # Unused.
---> 52   return constant_op.constant(value, dtype, name=name)
     53 
     54 

~/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py in constant(value, dtype, shape, name)
    259     ValueError: if called on a symbolic tensor.
    260   """
--> 261   return _constant_impl(value, dtype, shape, name, verify_shape=False,
    262                         allow_broadcast=True)
    263 

~/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast)
    268   ctx = context.context()
    269   if ctx.executing_eagerly():
--> 270     t = convert_to_eager_tensor(value, ctx, dtype)
    271     if shape is None:
    272       return t

~/anaconda3/envs/tf/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py in convert_to_eager_tensor(value, ctx, dtype)
     94       dtype = dtypes.as_dtype(dtype).as_datatype_enum
     95   ctx.ensure_initialized()
---> 96   return ops.EagerTensor(value, ctx.device_name, dtype)
     97 
     98 

TypeError: int() argument must be a string, a bytes-like object or a number, not 'NoneType'

nput :", idx_char(input_example))
    print("Target:", idx_char(target_example))