<a href="https://colab.research.google.com/github/Carapet07/Data-Science-projects/blob/main/Movie_Reviews_Generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

In [2]:
!wget https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
!tar -xf aclImdb_v1.tar.gz

--2025-03-02 20:02:27--  https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
Resolving ai.stanford.edu (ai.stanford.edu)... 171.64.68.10
Connecting to ai.stanford.edu (ai.stanford.edu)|171.64.68.10|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 84125825 (80M) [application/x-gzip]
Saving to: ‘aclImdb_v1.tar.gz’


2025-03-02 20:02:33 (14.9 MB/s) - ‘aclImdb_v1.tar.gz’ saved [84125825/84125825]



In [3]:
dataset = keras.utils.text_dataset_from_directory(
    directory='aclImdb', label_mode=None,  batch_size=256
) # Only raw data is returned because of label_mode='None'

dataset = dataset.map(lambda x: tf.strings.regex_replace(x, "<br />", " "))
# Imdb dataset often contain <br /> for line breaks. This doesn't matter for text
# classification, but in our case we wouldn't want to generate <br />

Found 100006 files.


In [4]:
vocabulary_size = 15000
sequence_length = 100

text_vectorization = layers.TextVectorization(
    max_tokens = vocabulary_size,
    output_mode='int',
    output_sequence_length = sequence_length
)

text_vectorization.adapt(dataset)

In [5]:
def prepare_lm_dataset(text_batch):
  vectorized_sequence = text_vectorization(text_batch)

  X = vectorized_sequence[:, :-1]
  y = vectorized_sequence[:, 1:]

  return X, y

lm_dataset = dataset.map(prepare_lm_dataset, num_parallel_calls=4)

In [6]:

class TransformerDecoder(layers.Layer):
  def __init__(self, embed_dim, num_heads, dense_dim, **kwargs):
    super().__init__(**kwargs)
    self.embed_dim = embed_dim
    self.num_heads = num_heads
    self.dense_dim = dense_dim

    self.dense = keras.Sequential([
        layers.Dense(dense_dim, activation='relu'),
        layers.Dense(embed_dim)
    ])

    self.attention = layers.MultiHeadAttention(
        num_heads=num_heads, key_dim=embed_dim
    )

    self.layernorm1 = layers.LayerNormalization()
    self.layernorm2 = layers.LayerNormalization()

  def call(self, inputs, mask=None):
    if mask is not None:
      mask = mask[:, tf.newaxis, :]

    attention_output = self.attention(
        inputs, inputs, attention_mask=mask
    )

    dense_input = self.layernorm1(inputs + attention_output)
    dense_output = self.dense(dense_input)
    return self.layernorm2(dense_input + dense_output)


class PositionalEmbedding(layers.Layer):
  def __init__(self, sequence_length, input_dim, output_dim, **kwargs):
    super().__init__(**kwargs)
    self.sequence_length = sequence_length
    self.input_dim = input_dim
    self.output_dim = output_dim

    # Embeddin for storing token indices
    self.token_embedding = layers.Embedding(
        input_dim=input_dim, output_dim=output_dim,
    )
    # Embeddin for saving token positions
    self.positional_embedding = layers.Embedding(
        input_dim=sequence_length, output_dim=output_dim
    )

  def call(self, inputs):
    # inputs is a tensor of shape (batch_size, sequence_length)
    # so [-1] returns sequence_length
    length = tf.shape(inputs)[-1]
    positions = tf.range(start=0, limit=length, delta=1)
    embedded_positions = self.positional_embedding(positions)
    embedded_tokens = self.token_embedding(inputs)
    return embedded_positions + embedded_tokens

  def mask(self, inputs, mask=None):
    return tf.math.not_equal(inputs, 0)

In [13]:
embed_dim = 256
dense_dim = 2048
num_heads = 2

inputs =  keras.Input(shape=(None,), dtype='int64')
x = PositionalEmbedding(sequence_length, vocabulary_size, embed_dim)(inputs)
x = TransformerDecoder(embed_dim, num_heads, dense_dim)(x, x, mask=mask)
outputs = layers.Dense(vocabulary_size, activation='softmax')(x)
model = keras.Model(inputs, outputs)

model.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop')

NameError: name 'mask' is not defined

In [11]:
def sample_next(predictions, temperature=1.0):
    predictions = np.asarray(predictions).astype("float64")
    predictions = np.log(predictions) / temperature
    exp_preds = np.exp(predictions)
    predictions = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, predictions, 1)
    return np.argmax(probas)

# Callback for Text Generation
tokens_index = dict(enumerate(text_vectorization.get_vocabulary()))  # Assuming vocabulary indexing



class TextGenerator(keras.callbacks.Callback):
    def __init__(self, prompt, generate_length, model_input_length, temperatures=(1.0,), print_freq=1):
        self.prompt = prompt
        self.generate_length = generate_length
        self.model_input_length = model_input_length
        self.temperatures = temperatures
        self.print_freq = print_freq

    def on_epoch_end(self, epoch, logs=None):
        if (epoch + 1) % self.print_freq != 0:
            return
        for temperature in self.temperatures:
            print("== Generating with temperature", temperature)
            sentence = self.prompt
            for i in range(self.generate_length):
                tokenized_sentence = text_vectorization([sentence])
                predictions = self.model(tokenized_sentence)
                next_token = sample_next(predictions[0, i, :])
                sampled_token = tokens_index[next_token]
                sentence += " " + sampled_token
            print(sentence)

# Create Callback for Text Generation
prompt = "This movie"
text_gen_callback = TextGenerator(
    prompt,
    generate_length=50,
    model_input_length=sequence_length,
    temperatures=(0.2, 0.5, 0.7, 1.0, 1.5),
)

In [12]:
model.fit(lm_dataset, epochs=200, callbacks=[text_gen_callback])

Epoch 1/200


InvalidArgumentError: Graph execution error:

Detected at node functional_1_1/transformer_decoder_1/multi_head_attention_1/LogicalAnd defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py", line 37, in <module>

  File "/usr/local/lib/python3.11/dist-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelapp.py", line 712, in start

  File "/usr/local/lib/python3.11/dist-packages/tornado/platform/asyncio.py", line 205, in start

  File "/usr/lib/python3.11/asyncio/base_events.py", line 608, in run_forever

  File "/usr/lib/python3.11/asyncio/base_events.py", line 1936, in _run_once

  File "/usr/lib/python3.11/asyncio/events.py", line 84, in _run

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 499, in process_one

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 730, in execute_request

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/ipkernel.py", line 383, in do_execute

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/zmqshell.py", line 528, in run_cell

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 2975, in run_cell

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code

  File "<ipython-input-12-ed4ff86120ee>", line 1, in <cell line: 0>

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 371, in fit

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 219, in function

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 132, in multi_step_on_iterator

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 113, in one_step_on_data

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 57, in train_step

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras/src/layers/layer.py", line 908, in __call__

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras/src/ops/operation.py", line 46, in __call__

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 156, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras/src/models/functional.py", line 182, in call

  File "/usr/local/lib/python3.11/dist-packages/keras/src/ops/function.py", line 171, in _run_through_graph

  File "/usr/local/lib/python3.11/dist-packages/keras/src/models/functional.py", line 637, in call

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras/src/layers/layer.py", line 908, in __call__

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras/src/ops/operation.py", line 46, in __call__

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 156, in error_handler

  File "<ipython-input-6-bc889b77a874>", line 24, in call

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras/src/layers/layer.py", line 908, in __call__

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras/src/ops/operation.py", line 46, in __call__

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 156, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras/src/layers/attention/multi_head_attention.py", line 559, in call

  File "/usr/local/lib/python3.11/dist-packages/keras/src/layers/attention/multi_head_attention.py", line 474, in _compute_attention

  File "/usr/local/lib/python3.11/dist-packages/keras/src/ops/nn.py", line 2644, in dot_product_attention

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/nn.py", line 1066, in dot_product_attention

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/nn.py", line 1025, in _dot_product_attention_xla

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/nn.py", line 1001, in _apply_masks

Incompatible shapes: [256,1,99,256] vs. [256,2,99,99]
	 [[{{node functional_1_1/transformer_decoder_1/multi_head_attention_1/LogicalAnd}}]] [Op:__inference_multi_step_on_iterator_20158]