# Agenda:
<ul>
    <li>This file focuses on fine-tuning the Pre-trained Generative AI model T5-Small seq2seq Transformer model</li>
    <li>The model has a Embedding + 6 encoder + 6 decoder + 1 LM (language Modelling) layer</li>
    <li>Freezing the lower layer + 4 encoder and decoder layers each</li>
    <li>Keeping the last layer for text summarization</li>
</ul>

# Importing necessary libraries

In [1]:
import polars
from sklearn.model_selection import train_test_split
import tensorflow as tf
from transformers import TFAutoModelForSeq2SeqLM, AutoTokenizer
import numpy
import datetime
from evaluate import load
import matplotlib.pyplot as mplot




In [2]:
dataset = polars.read_csv('../../datasets/Cleaned News dataset.csv')
dataset.head()

article,highlights
str,str
"""LONDON, England -- Harry Potte…","""Harry Potter star Daniel Radcl…"
"""Editor's note: In our Behind t…","""Mentally ill inmates in Miami …"
"""MINNEAPOLIS, Minnesota (CNN) -…","""NEW: ""I thought I was going to…"
"""WASHINGTON (CNN) -- Doctors re…","""Five small polyps found during…"
"""(CNN) -- The National Football…","""NEW: NFL chief, Atlanta Falcon…"


In [3]:
dataset = dataset.with_columns((
    polars.lit('summarize: ') + polars.col('article')
    ).
    alias('new_article')
).drop('article')

dataset.head()

highlights,new_article
str,str
"""Harry Potter star Daniel Radcl…","""summarize: LONDON, England -- …"
"""Mentally ill inmates in Miami …","""summarize: Editor's note: In o…"
"""NEW: ""I thought I was going to…","""summarize: MINNEAPOLIS, Minnes…"
"""Five small polyps found during…","""summarize: WASHINGTON (CNN) --…"
"""NEW: NFL chief, Atlanta Falcon…","""summarize: (CNN) -- The Nation…"


# Dataset Splitting

In [4]:
train_texts, temp_texts, train_summaries, temp_summaries = train_test_split(
    dataset['new_article'], dataset['highlights'], test_size=0.2, random_state=42
)

val_texts, test_texts, val_summaries, test_summaries = train_test_split(
    temp_texts, temp_summaries, test_size=0.5, random_state=42
)

del dataset

# Tokenizing the data (On-the-fly)

In [5]:
tokenizer = AutoTokenizer.from_pretrained('t5-small')



In [6]:
from transformers import AutoTokenizer
import tensorflow as tf

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained('t5-small')

# Parameters
MAX_INPUT_LEN = 400
MAX_TARGET_LEN = 100

# Tokenization function
def tf_tokenize(example_text, example_summary):
    # Convert tf.Tensor to string in py_function
    def tokenize_fn(text, summary):
        text = text.decode('utf-8')
        summary = summary.decode('utf-8')

        inputs = tokenizer(
            text,
            max_length=MAX_INPUT_LEN,
            padding='max_length',
            truncation=True,
            return_tensors=None
        )
        targets = tokenizer(
            summary,
            max_length=MAX_TARGET_LEN,
            padding='max_length',
            truncation=True,
            return_tensors=None
        )

        return (
            tf.convert_to_tensor(inputs['input_ids'], dtype=tf.int32),
            tf.convert_to_tensor(inputs['attention_mask'], dtype=tf.int32),
            tf.convert_to_tensor(targets['input_ids'], dtype=tf.int32)
        )

    input_ids, attention_mask, labels = tf.py_function(
        tokenize_fn,
        [example_text, example_summary],
        [tf.int32, tf.int32, tf.int32]
    )

    # Set shapes explicitly for performance and compatibility
    input_ids.set_shape([MAX_INPUT_LEN])
    attention_mask.set_shape([MAX_INPUT_LEN])
    labels.set_shape([MAX_TARGET_LEN])

    return {'input_ids': input_ids, 'attention_mask': attention_mask}, labels


In [7]:
def create_tf_dataset(inputs, targets, batch_size=4):
    dataset = tf.data.Dataset.from_tensor_slices((inputs, targets))
    dataset = dataset.map(tf_tokenize, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset


In [8]:
# Datasets for model
# Build on On-the-fly mode
train_dataset = create_tf_dataset(list(train_texts), list(train_summaries))
val_dataset = create_tf_dataset(list(val_texts), list(val_summaries))
test_dataset = create_tf_dataset(list(test_texts), list(test_summaries))

# Model Building
## Downloading the model

In [9]:
t5_model = TFAutoModelForSeq2SeqLM.from_pretrained('t5-small')




All PyTorch model weights were used when initializing TFT5ForConditionalGeneration.

All the weights of TFT5ForConditionalGeneration were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.


## Customizing the model
<ul>
    <li>Customizin the model layer's to be trained and left frozen</li>
    <li>Setting the embedding + lower 4 encoder, decoder (each) layers frozen</li>
    <li>Keeping the last 2 encoder decoder (each) layer + LM layer as trainable</li>
</ul>

In [10]:
# Freeze embeddings
t5_model.shared.trainable = False
t5_model.encoder.embed_tokens.trainable = False
t5_model.decoder.embed_tokens.trainable = False

# Freeze lower 4 encoder layers (0 to 3)
for i, layer in enumerate(t5_model.encoder.block):
    if i < 4:
        layer.trainable = False
    else:
        layer.trainable = True

# Freeze lower 4 decoder layers (0 to 3)
for i, layer in enumerate(t5_model.decoder.block):
    if i < 4:
        layer.trainable = False
    else:
        layer.trainable = True

## Setting up model callbacks

In [11]:
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

# Early Stopping
early_stopping_monitor = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',         # Monitor the validation loss
    patience=3,                 # Number of epochs with no improvement after which training will be stopped
    restore_best_weights=True,  # Restore model weights from the epoch with the best value of the monitored quantity
    min_delta=1e-4              # Counts the minimum change as viable change in loss
)

# Model Checkpoint to save data
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=f'../../models/best_model_{timestamp}_epoch-{{epoch:02d}}_val_loss-{{val_loss:.4f}}.keras', # Filepath to save the model weights
    monitor='val_loss',         # Metric to monitor
    save_best_only=True,        # Only save when the monitored metric is the best seen so far
    save_weights_only=False,    # Save the entire model
    mode='min',                 # The monitored metric ('val_loss') should be minimized
    verbose=1                   # Print a message when a checkpoint is saved
)

# Reducing Learning Rate On Plateau stage
# Reducing learning rate if validation loss plateaus
reduce_lr_on_plateau = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',         # Monitor the validation loss
    factor=0.5,                 # Factor by which the learning rate will be reduced. new_lr = lr * factor
    patience=2,                 # Number of epochs with no improvement after which the learning rate will be reduced
    mode='min',                 # The monitored metric ('val_loss') should be minimized
    verbose=1,                  # Print a message when the learning rate is reduced
    min_lr=1e-5,                # Lower bound on the learning rate
    min_delta=1e-4              # Counts the minimum change as viable change
)

# Logging all metrics in a csv file for future use
csv_logger = tf.keras.callbacks.CSVLogger(
    filename=f'../../logs/training_log_{timestamp}.csv',
    append=True
)

## Compiling model

In [12]:
t5_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=3e-5)     # Optimizer with the initial Learning Rate
    #loss=t5_model.compute_loss                                  # T-5 seq2seq built in loss function
)

In [13]:
t5_model.summary()

Model: "tft5_for_conditional_generation"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 shared (Embedding)          multiple                  16449536  
                                                                 
 encoder (TFT5MainLayer)     multiple                  35330816  
                                                                 
 decoder (TFT5MainLayer)     multiple                  41625344  
                                                                 
Total params: 60506624 (230.81 MB)
Trainable params: 14686208 (56.02 MB)
Non-trainable params: 45820416 (174.79 MB)
_________________________________________________________________


In [14]:
history = t5_model.fit(
    train_dataset, validation_data=val_dataset,
    epochs=5,
    callbacks=[
        model_checkpoint_callback,
        early_stopping_monitor,
        reduce_lr_on_plateau,
        csv_logger
    ]
)

Epoch 1/5



UnknownError: Graph execution error:

Detected at node EagerPyFunc defined at (most recent call last):
<stack traces unavailable>
AttributeError: 'tensorflow.python.framework.ops.EagerTensor' object has no attribute 'decode'
Traceback (most recent call last):

  File "c:\Users\Avik Chakraborty\AppData\Local\Programs\Python\Python311\Lib\site-packages\tensorflow\python\ops\script_ops.py", line 268, in __call__
    return func(device, token, args)
           ^^^^^^^^^^^^^^^^^^^^^^^^^

  File "c:\Users\Avik Chakraborty\AppData\Local\Programs\Python\Python311\Lib\site-packages\tensorflow\python\ops\script_ops.py", line 146, in __call__
    outputs = self._call(device, args)
              ^^^^^^^^^^^^^^^^^^^^^^^^

  File "c:\Users\Avik Chakraborty\AppData\Local\Programs\Python\Python311\Lib\site-packages\tensorflow\python\ops\script_ops.py", line 153, in _call
    ret = self._func(*args)
          ^^^^^^^^^^^^^^^^^

  File "c:\Users\Avik Chakraborty\AppData\Local\Programs\Python\Python311\Lib\site-packages\tensorflow\python\autograph\impl\api.py", line 643, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^

  File "C:\Users\AVIKCH~1\AppData\Local\Temp\__autograph_generated_file_p2n02_y.py", line 16, in tokenize_fn
    text = ag__.converted_call(ag__.ld(text).decode, ('utf-8',), None, fscope_1)
                               ^^^^^^^^^^^^^^^^^^^^

  File "c:\Users\Avik Chakraborty\AppData\Local\Programs\Python\Python311\Lib\site-packages\tensorflow\python\framework\tensor.py", line 261, in __getattr__
    self.__getattribute__(name)

AttributeError: 'tensorflow.python.framework.ops.EagerTensor' object has no attribute 'decode'


	 [[{{node EagerPyFunc}}]]
	 [[IteratorGetNext]] [Op:__inference_train_function_22735]