# Sequence to Sequence RNN (Stateless)
- Random Portions of Text each Iteration
- Without any informaiton about the rest of the text
- Hidden State is not perserved between training iterations; Each Training Iteration the hidden state = 0; will be reseted

## Generating Shakespeare Text

In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import os 
import time

In [2]:
shakespeare_url = "https://homl.info/shakespeare"
filepath = keras.utils.get_file("shakespeare.txt",shakespeare_url)
with open(filepath) as file: 
    text = file.read()

## Keras Tokenizer

In [3]:
tokenizer = keras.preprocessing.text.Tokenizer(char_level=True)

In [4]:
tokenizer.fit_on_texts(text)

In [5]:
ids = tokenizer.texts_to_sequences(["First"])

In [6]:
ids

[[20, 6, 9, 8, 3]]

In [7]:
tokenizer.sequences_to_texts(ids)

['f i r s t']

In [8]:
tokenizer.index_word # We can see that the tokenizer starts with one - that's stupid

{1: ' ',
 2: 'e',
 3: 't',
 4: 'o',
 5: 'a',
 6: 'i',
 7: 'h',
 8: 's',
 9: 'r',
 10: 'n',
 11: '\n',
 12: 'l',
 13: 'd',
 14: 'u',
 15: 'm',
 16: 'y',
 17: 'w',
 18: ',',
 19: 'c',
 20: 'f',
 21: 'g',
 22: 'b',
 23: 'p',
 24: ':',
 25: 'k',
 26: 'v',
 27: '.',
 28: "'",
 29: ';',
 30: '?',
 31: '!',
 32: '-',
 33: 'j',
 34: 'q',
 35: 'x',
 36: 'z',
 37: '3',
 38: '&',
 39: '$'}

In [9]:
# Because of that we need to substract -1
encoded_vocabulary = np.array(tokenizer.texts_to_sequences([text])) -1

In [10]:
encoded_vocabulary.shape # We want a simple list

(1, 1115394)

In [11]:
[encoded_vocabulary] = encoded_vocabulary # Whats happening here?

In [12]:
encoded_vocabulary.shape

(1115394,)

## How to split text dataset?

- Its important to avoid overlaps betwenn training, validation and test_set

1. Create TF.DataSets!
2. Build NestedMaps/Sequences/WindowDatasets
3. Network can only train with Tensors! A FlatMapDatasets is a dataset that contain tensors not nested datasets! --> use ```dataset.flat_map()```
4. At the same time we build we use the batch function to resample the split in windows again! It's basically just a parsing of objects because models can't deal with nested datasets --> ```dataset.flat_map(lambda x: x.batch(2))```; 


In [13]:
train_size = tokenizer.document_count * 90 // 100

In [14]:
dataset = tf.data.Dataset.from_tensor_slices(encoded_vocabulary[:train_size])

2021-09-18 14:10:29.881690: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-09-18 14:10:29.887756: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-09-18 14:10:29.888259: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-09-18 14:10:29.889530: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

### Creating Sequences (We wanna feed sequences into our RNN )

- Important! Those are not equal to batches. A batch can contain multiple sequences!

In [15]:
n_steps = 100
window_length = n_steps + 1 # This is somehow important - i dont understand why?!

In [16]:
dataset = dataset.window(window_length, drop_remainder=True, shift=1) # One window 

In [17]:
dataset = dataset.flat_map(lambda x: x.batch(window_length))

In [18]:
dataset

<FlatMapDataset shapes: (None,), types: tf.int64>

### Shuffling (Gradient Decent works best with independet instances)

1. Remember when we train we need a X and Y (obvisously)
2. We predict on character level
3. Each window has for every character one steps (neuron) - each neuron is trying to predict the next character based on it's input. (And hidden state..)


In [19]:
batch_size = 32

In [20]:
dataset = dataset.shuffle(1000).batch(batch_size)

In [21]:
dataset = dataset.map(lambda x: (x[:, :-1], x[:,1:])) # First is X  - everything till the last character, Second is Y - everything except the first element 

In [22]:
dataset

<MapDataset shapes: ((None, None), (None, None)), types: (tf.int64, tf.int64)>

### Creating One Hot Vectors for Training

In [23]:
dataset = dataset.map(lambda x, y: (tf.one_hot(x, depth=len(tokenizer.word_index)), y))

In [24]:
dataset

<MapDataset shapes: ((None, None, 39), (None, None)), types: (tf.float32, tf.int64)>

### Prefetching

Be one dataset ahead of batch - parallelism

dataset = dataset.prefetch(1)

In [25]:
dataset

<MapDataset shapes: ((None, None, 39), (None, None)), types: (tf.float32, tf.int64)>

In [26]:
for X_batch, Y_batch in dataset.take(1):
    print(X_batch.shape, Y_batch.shape)

2021-09-18 14:10:30.513569: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


(32, 100, 39) (32, 100)


## Creating the Model 

In [27]:
model = keras.Sequential([
    keras.layers.LSTM(128, return_sequences=True, input_shape = [None, len(tokenizer.word_index)], dropout=0.2), 
    keras.layers.LSTM(128, return_sequences=True, dropout=0.2),
    keras.layers.TimeDistributed(keras.layers.Dense(len(tokenizer.word_index), activation="softmax")),
])

In [28]:
model.compile(optimizer=keras.optimizers.Adam(), loss = keras.losses.sparse_categorical_crossentropy)

### Setting up TensorBoard

In [29]:
root_logdir = os.path.join(os.curdir, "logs")

In [30]:
def get_run_logdir() -> str:
    run_id = time.strftime("run_%Y_%m_%d-%H-%M-%S")
    return os.path.join(root_logdir, run_id)

In [31]:
get_run_logdir()

'./logs/run_2021_09_18-14-10-31'

In [32]:
tensorboard_cb = keras.callbacks.TensorBoard(get_run_logdir(),  update_freq= 1)

2021-09-18 14:10:31.100045: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing.
2021-09-18 14:10:31.100076: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started.
2021-09-18 14:10:31.100123: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1614] Profiler found 1 GPUs
2021-09-18 14:10:31.100372: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcupti.so.11.2'; dlerror: libcupti.so.11.2: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-11.2/lib64
2021-09-18 14:10:31.201221: E tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1666] function cupti_interface_->Subscribe( &subscriber_, (CUpti_CallbackFunc)ApiCallback, this)failed with error CUPTI_ERROR_INSUFFICIENT_PRIVILEGES
2021-09-18 14:10:31.201297: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.


In [33]:
checkpoint_cb = keras.callbacks.ModelCheckpoint("shakespeare_text.h5")

In [34]:
model.fit(dataset, epochs=5, callbacks = [tensorboard_cb, checkpoint_cb])

Epoch 1/5


2021-09-18 14:10:33.865425: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8100


      8/Unknown - 3s 37ms/step - loss: 3.5481

2021-09-18 14:10:34.630106: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing.
2021-09-18 14:10:34.630143: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started.
2021-09-18 14:10:34.630671: E tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1666] function cupti_interface_->Subscribe( &subscriber_, (CUpti_CallbackFunc)ApiCallback, this)failed with error CUPTI_ERROR_INSUFFICIENT_PRIVILEGES
2021-09-18 14:10:34.651555: I tensorflow/core/profiler/lib/profiler_session.cc:66] Profiler session collecting data.
2021-09-18 14:10:34.655512: I tensorflow/core/profiler/internal/gpu/cupti_collector.cc:673]  GpuTracer has collected 0 callback api events and 0 activity events. 
2021-09-18 14:10:34.656923: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.
2021-09-18 14:10:34.659478: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: ./logs/run_2021_09_18-14-10-31/train/plugi

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f6bc0505eb0>

## Inference

In [35]:
def preprocess(texts):
    X = np.array(tokenizer.texts_to_sequences(texts)) - 1 # Tokenizer starts at 1
    return tf.one_hot(X, len(tokenizer.word_index))

In [36]:
X_new = preprocess(["Romeo"])

In [37]:
y_pred = np.argmax(model(X_new), axis=-1)

In [38]:
tokenizer.sequences_to_texts(y_pred+1)[0][-1]

':'