# Lab 7: Text generation with simple RNN

* [Old tensorflow](https://www.tensorflow.org/text/tutorials/text_generation)
* [mlnuggets](https://www.machinelearningnuggets.com/tensorflow-lstm/)

In [17]:
import keras
from keras import layers, Model, Sequential, ops
from keras.layers import TextVectorization
import tensorflow as tf
import pandas as pd
from matplotlib import pyplot as plt
import re
import os

os.environ["TF_GPU_ALLOCATOR"]="cuda_malloc_async"

In [2]:
print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:2', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:3', device_type='GPU')]


In [3]:
name_url = ("Crime and Punishment", 'https://www.gutenberg.org/files/2554/2554-0.txt')

filepath = keras.utils.get_file(f'{name_url[0]}.txt', origin=name_url[1])
text_f = ''
with open(filepath, encoding='utf-8') as f:
    text_f = f.read()[10000:] # skip preface +-

text = text_f

text = re.sub(r"[\"\`\'\’\“\”]", r"", text_f)
text = re.sub(r"[\(\)]", r"", text_f)
text = re.sub(r"[\.\!\?]", "!", text)
text = re.sub(r"\s+", " ", text)

text_list = text.replace('\n', ' ').split('!')
text_list = list(map(lambda x: x.strip(), text_list))
print( len(text_list) )

text_list = list(filter(None, text_list))

import random
random.shuffle(text_list)

length = len(text_list)
text_train = text_list[:int(0.8*length)]
text_valid = text_list[int(0.8*length):]

20594


In [4]:
words = sorted([(len(a:=line.split(" ")), a, line) for line in text_list], reverse=1)
MAX_LEN = max([len(line.split(" ")) for line in text_list])

vectorize_layer = TextVectorization(
    output_mode="int",
    output_sequence_length=MAX_LEN + 1,
)

vectorize_layer.adapt(text_list)
vocab = vectorize_layer.get_vocabulary()
print(len(vocab))

word_from_id = tf.keras.layers.StringLookup(vocabulary=vocab, mask_token="", oov_token="[UNK]",  invert=True)

I0000 00:00:1736245521.827899 3631601 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 842 MB memory:  -> device: 0, name: NVIDIA A100-PCIE-40GB, pci bus id: 0000:04:00.0, compute capability: 8.0
I0000 00:00:1736245521.831166 3631601 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 32374 MB memory:  -> device: 1, name: NVIDIA A100-PCIE-40GB, pci bus id: 0000:08:00.0, compute capability: 8.0
I0000 00:00:1736245521.833584 3631601 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:2 with 32362 MB memory:  -> device: 2, name: NVIDIA A100-PCIE-40GB, pci bus id: 0000:85:00.0, compute capability: 8.0
I0000 00:00:1736245521.836447 3631601 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:3 with 32374 MB memory:  -> device: 3, name: NVIDIA A100-PCIE-40GB, pci bus id: 0000:89:00.0, compute capability: 8.0


11697


In [22]:
BATCH_SIZE = 16
BUFFER_SIZE = 128

def preprocess(text_l: list):

    def preprocess_text(text):
        text = tf.expand_dims(text, -1)
        tokenized_sentences = vectorize_layer(text)
        x = tokenized_sentences[:, :-1]
        y = tokenized_sentences[:, 1:]
        return x, y
    
    return (
        tf.data.Dataset.from_tensor_slices(text_l)
            .shuffle(BUFFER_SIZE)
            .batch(BATCH_SIZE)
            .map(preprocess_text)
            .prefetch(tf.data.AUTOTUNE)
    )

dataset_train = preprocess(text_train)
dataset_valid = preprocess(text_valid)

In [23]:
class MyModel(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__()
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(rnn_units,
                                   return_sequences=True)
    self.dense = tf.keras.layers.Dense(vocab_size)

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    if states is None:
      states = self.gru.get_initial_state(BATCH_SIZE)
    x = self.gru(x, initial_state=states, training=training)
    x = self.dense(x, training=training)

    if return_state:
      return x, states
    else:
      return x


In [24]:
embedding_dim = 128
rnn_units = 512

model = MyModel(len(vocab), embedding_dim, rnn_units)

In [30]:
model.summary()

In [26]:
for X_train, y_train in dataset_train.take(1):
    ex_m = model(X_train)
    pass
print(X_train.shape)
print(y_train.shape)
print(ex_m.shape)

(16, 102)
(16, 102)
(16, 102, 11697)


In [27]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

model.compile(optimizer='adam', loss=loss)

In [29]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "checkpoint{epoch}.weights.h5")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

history = model.fit(
    dataset_train, 
    epochs=10, 
    # callbacks=[checkpoint_callback]
    )

Epoch 1/10


2025-01-07 20:41:03.644986: W external/local_xla/xla/tsl/framework/bfc_allocator.cc:497] Allocator (GPU_0_bfc) ran out of memory trying to allocate 365.53MiB (rounded to 383287296)requested by op StatefulPartitionedCall/gradient_tape/my_model_2_1/dense_2_1/MatMul/MatMul_1
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2025-01-07 20:41:03.645041: I external/local_xla/xla/tsl/framework/bfc_allocator.cc:1053] BFCAllocator dump for GPU_0_bfc
2025-01-07 20:41:03.645056: I external/local_xla/xla/tsl/framework/bfc_allocator.cc:1060] Bin (256): 	Total Chunks: 46, Chunks in use: 46. 11.5KiB allocated for chunks. 11.5KiB in use in bin. 245B client-requested in use in bin.
2025-01-07 20:41:03.645064: I external/local_xla/xla/tsl/framework/bfc_allocator.cc:1060] Bin (512): 	Total Chunks: 1, Chunks in use: 0. 768B allocated for chunks. 0B in 

ResourceExhaustedError: Graph execution error:

Detected at node gradient_tape/my_model_2_1/dense_2_1/MatMul/MatMul_1 defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/home/derzhapolskii.yv/tf/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>

  File "/home/derzhapolskii.yv/tf/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/home/derzhapolskii.yv/tf/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 739, in start

  File "/home/derzhapolskii.yv/tf/lib/python3.12/site-packages/tornado/platform/asyncio.py", line 205, in start

  File "/home/derzhapolskii.yv/.conda/envs/py3128/lib/python3.12/asyncio/base_events.py", line 640, in run_forever

  File "/home/derzhapolskii.yv/.conda/envs/py3128/lib/python3.12/asyncio/base_events.py", line 1992, in _run_once

  File "/home/derzhapolskii.yv/.conda/envs/py3128/lib/python3.12/asyncio/events.py", line 88, in _run

  File "/home/derzhapolskii.yv/tf/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 545, in dispatch_queue

  File "/home/derzhapolskii.yv/tf/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 534, in process_one

  File "/home/derzhapolskii.yv/tf/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell

  File "/home/derzhapolskii.yv/tf/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 362, in execute_request

  File "/home/derzhapolskii.yv/tf/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 778, in execute_request

  File "/home/derzhapolskii.yv/tf/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 449, in do_execute

  File "/home/derzhapolskii.yv/tf/lib/python3.12/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/home/derzhapolskii.yv/tf/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3075, in run_cell

  File "/home/derzhapolskii.yv/tf/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3130, in _run_cell

  File "/home/derzhapolskii.yv/tf/lib/python3.12/site-packages/IPython/core/async_helpers.py", line 128, in _pseudo_sync_runner

  File "/home/derzhapolskii.yv/tf/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3334, in run_cell_async

  File "/home/derzhapolskii.yv/tf/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3517, in run_ast_nodes

  File "/home/derzhapolskii.yv/tf/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3577, in run_code

  File "/tmp/ipykernel_3631601/1645016690.py", line 10, in <module>

  File "/home/derzhapolskii.yv/tf/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/home/derzhapolskii.yv/tf/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 368, in fit

  File "/home/derzhapolskii.yv/tf/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 216, in function

  File "/home/derzhapolskii.yv/tf/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 129, in multi_step_on_iterator

  File "/home/derzhapolskii.yv/tf/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 110, in one_step_on_data

  File "/home/derzhapolskii.yv/tf/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 75, in train_step

OOM when allocating tensor with shape[16,512,11697] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node gradient_tape/my_model_2_1/dense_2_1/MatMul/MatMul_1}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_multi_step_on_iterator_6361]