In [1]:

import json
import os
import torch
from datasets import Dataset
from transformers import (
    AutoTokenizer, AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq,
    Seq2SeqTrainer, Seq2SeqTrainingArguments, T5ForConditionalGeneration,
    T5Tokenizer, TrainerCallback
)
import matplotlib.pyplot as plt
from functools import partial





In [2]:

class MetricsCallback(TrainerCallback):
    def __init__(self):
        self.training_loss = []
        self.eval_loss = []
        self.training_accuracy = []
        self.eval_accuracy = []
        
    def on_log(self, args, state, control, logs=None, **kwargs):
        if logs:
            if 'loss' in logs:
                self.training_loss.append((state.global_step, logs['loss']))
            if 'eval_loss' in logs:
                self.eval_loss.append((state.global_step, logs['eval_loss']))
            if 'accuracy' in logs:
                self.training_accuracy.append((state.global_step, logs['accuracy']))
            if 'eval_accuracy' in logs:
                self.eval_accuracy.append((state.global_step, logs['eval_accuracy']))


In [3]:

def plot_metrics(callback):
    plt.figure(figsize=(12, 8))
    plt.subplot(2, 1, 1)
    if callback.training_loss:
        steps, losses = zip(*callback.training_loss)
        plt.plot(steps, losses, label='Training Loss')
    if callback.eval_loss:
        steps, losses = zip(*callback.eval_loss)
        plt.plot(steps, losses, label='Validation Loss')
    plt.xlabel('Steps')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    
    plt.subplot(2, 1, 2)
    if callback.training_accuracy:
        steps, acc = zip(*callback.training_accuracy)
        plt.plot(steps, acc, label='Training Accuracy')
    if callback.eval_accuracy:
        steps, acc = zip(*callback.eval_accuracy)
        plt.plot(steps, acc, label='Validation Accuracy')
    plt.xlabel('Steps')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True)
    
    plt.tight_layout()
    plt.savefig('training_metrics.png')
    plt.close()

model = T5ForConditionalGeneration.from_pretrained('t5-small')
tokenizer = T5Tokenizer.from_pretrained('t5-small')

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

Error while downloading from https://cdn-lfs.hf.co/t5-small/bd944e5f1b3ad9b70dd9d00010a517059e19265671076b8b0a4a58d9491842bc?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model.safetensors%3B+filename%3D%22model.safetensors%22%3B&Expires=1733061408&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTczMzA2MTQwOH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5oZi5jby90NS1zbWFsbC9iZDk0NGU1ZjFiM2FkOWI3MGRkOWQwMDAxMGE1MTcwNTllMTkyNjU2NzEwNzZiOGIwYTRhNThkOTQ5MTg0MmJjP3Jlc3BvbnNlLWNvbnRlbnQtZGlzcG9zaXRpb249KiJ9XX0_&Signature=RA%7E9%7EQBEA4WVnNcgkZioCr1mf-NYNKBU3jLrjBeC42o8mKRDbV5H0FxOGvpIO-Qsoa0BzY01%7EaTX5xNzitZ%7ElKwL1ZroeZ-XoN5ETGoaO1pEDBu2Mtm8fmkW8xOg%7E3kbn9WAgV8SyWO80B4hV0Q1L%7ERpXgFWkiYvzPVqFJYBK9rtZ1nyUAZiOk2WjQ1ic5cktUeyshr3YoKdysA4sW7Vdi4d%7E%7EkQLpHKzd32wfxuHz8Ceo7PjDVpaUsPvmrXdxQOlktxgN9bJjGT2WVMCP1LvapmIH9pMz3P45DHbFX1LZ5wCUC%7EwV45eXQKQtY0Gcqq-DrFu6rwuggUjdlcZBf8hQ__&Key-Pair-Id=K3RPWS32NSSJCE: HTTPSConnectionPool(host='cdn-lfs.hf.c

model.safetensors:  48%|####7     | 115M/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [4]:
def preprocess_function(examples, tokenizer):
    model_inputs = tokenizer(examples['inputs'], truncation=True, padding=True, max_length=128)
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(examples['labels'], truncation=True, padding=True, max_length=128)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

preprocess_function_with_tokenizer = partial(preprocess_function, tokenizer=tokenizer)

In [5]:

# Load and prepare data
with open('label_subset.json', 'r', encoding='utf-8-sig') as file:
    data = json.load(file)

input_lines = []
label_lines = []
for entry in data:
    input_lines.append(entry['Content'])
    aspects = [term for aspect in entry['Aspects'] for term in aspect['AspectTerms']]
    label_lines.append(' '.join(aspects))

dataset = Dataset.from_dict({'inputs': input_lines, 'labels': label_lines})
tokenized_datasets = dataset.map(
    preprocess_function_with_tokenizer,
    batched=True,
    remove_columns=['inputs'],
    num_proc=8
)

Map (num_proc=8):   0%|          | 0/1100 [00:00<?, ? examples/s]

In [6]:

# Split dataset into train and validation
train_test_split = tokenized_datasets.train_test_split(test_size=0.2)

data_collator = DataCollatorForSeq2Seq(tokenizer, model=model, return_tensors="pt")

training_args = Seq2SeqTrainingArguments(
    "./",
    do_train=True,
    do_eval=True,  # Enable evaluation
    evaluation_strategy="epoch",  # Evaluate after each epoch
    num_train_epochs=30,
    learning_rate=1e-5,
    warmup_ratio=0.05,
    weight_decay=0.01,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    logging_dir='./',
    group_by_length=True,
    save_strategy="epoch",
    save_total_limit=3,
    fp16=True,
    metric_for_best_model="eval_loss",
    load_best_model_at_end=True,
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=train_test_split['train'],
    eval_dataset=train_test_split['test'],
    data_collator=data_collator,
    tokenizer=tokenizer
)

# Add callbacks
metrics_callback = MetricsCallback()
trainer.add_callback(metrics_callback)



In [7]:


# Train the model
trainer.train()

# Plot metrics
plot_metrics(metrics_callback)

# Save the model
save_directory = "FlexiModel"
model.save_pretrained(save_directory)
tokenizer.save_pretrained(save_directory)

  0%|          | 0/26400 [00:00<?, ?it/s]

{'loss': 9.8962, 'grad_norm': 64.56928253173828, 'learning_rate': 3.7878787878787882e-06, 'epoch': 0.57}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.886273980140686, 'eval_runtime': 11.8453, 'eval_samples_per_second': 18.573, 'eval_steps_per_second': 18.573, 'epoch': 1.0}
{'loss': 3.183, 'grad_norm': 20.994333267211914, 'learning_rate': 7.5757575757575764e-06, 'epoch': 1.14}
{'loss': 1.1187, 'grad_norm': 8.19991683959961, 'learning_rate': 9.928229665071771e-06, 'epoch': 1.7}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.49046215415000916, 'eval_runtime': 13.4029, 'eval_samples_per_second': 16.414, 'eval_steps_per_second': 16.414, 'epoch': 2.0}
{'loss': 0.8332, 'grad_norm': 758.3807983398438, 'learning_rate': 9.728867623604467e-06, 'epoch': 2.27}
{'loss': 0.519, 'grad_norm': 6.420949459075928, 'learning_rate': 9.529505582137162e-06, 'epoch': 2.84}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.285758912563324, 'eval_runtime': 12.7319, 'eval_samples_per_second': 17.279, 'eval_steps_per_second': 17.279, 'epoch': 3.0}
{'loss': 0.4504, 'grad_norm': 3.16291880607605, 'learning_rate': 9.330143540669856e-06, 'epoch': 3.41}
{'loss': 0.3356, 'grad_norm': 26.585309982299805, 'learning_rate': 9.130781499202552e-06, 'epoch': 3.98}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.2412767857313156, 'eval_runtime': 14.8512, 'eval_samples_per_second': 14.814, 'eval_steps_per_second': 14.814, 'epoch': 4.0}
{'loss': 0.2969, 'grad_norm': 3.602818012237549, 'learning_rate': 8.931419457735247e-06, 'epoch': 4.55}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.22292950749397278, 'eval_runtime': 15.5162, 'eval_samples_per_second': 14.179, 'eval_steps_per_second': 14.179, 'epoch': 5.0}
{'loss': 0.2991, 'grad_norm': 0.525290846824646, 'learning_rate': 8.732057416267943e-06, 'epoch': 5.11}
{'loss': 0.2827, 'grad_norm': 1.058815598487854, 'learning_rate': 8.53269537480064e-06, 'epoch': 5.68}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.20531824231147766, 'eval_runtime': 13.8362, 'eval_samples_per_second': 15.9, 'eval_steps_per_second': 15.9, 'epoch': 6.0}
{'loss': 0.2617, 'grad_norm': 0.7397129535675049, 'learning_rate': 8.333333333333334e-06, 'epoch': 6.25}
{'loss': 0.2312, 'grad_norm': 8.732197761535645, 'learning_rate': 8.13397129186603e-06, 'epoch': 6.82}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.1987317055463791, 'eval_runtime': 13.3728, 'eval_samples_per_second': 16.451, 'eval_steps_per_second': 16.451, 'epoch': 7.0}
{'loss': 0.2148, 'grad_norm': 3.3713154792785645, 'learning_rate': 7.934609250398724e-06, 'epoch': 7.39}
{'loss': 0.2509, 'grad_norm': 0.5917288661003113, 'learning_rate': 7.73524720893142e-06, 'epoch': 7.95}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.18850266933441162, 'eval_runtime': 13.4504, 'eval_samples_per_second': 16.356, 'eval_steps_per_second': 16.356, 'epoch': 8.0}
{'loss': 0.2247, 'grad_norm': 0.5475296974182129, 'learning_rate': 7.535885167464115e-06, 'epoch': 8.52}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.18588104844093323, 'eval_runtime': 13.4189, 'eval_samples_per_second': 16.395, 'eval_steps_per_second': 16.395, 'epoch': 9.0}
{'loss': 0.247, 'grad_norm': 1.4515035152435303, 'learning_rate': 7.33652312599681e-06, 'epoch': 9.09}
{'loss': 0.2141, 'grad_norm': 6.817032814025879, 'learning_rate': 7.137161084529506e-06, 'epoch': 9.66}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.18486954271793365, 'eval_runtime': 13.1969, 'eval_samples_per_second': 16.671, 'eval_steps_per_second': 16.671, 'epoch': 10.0}
{'loss': 0.2203, 'grad_norm': 0.2443014681339264, 'learning_rate': 6.937799043062201e-06, 'epoch': 10.23}
{'loss': 0.1963, 'grad_norm': 8.404133796691895, 'learning_rate': 6.738437001594896e-06, 'epoch': 10.8}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.17653770744800568, 'eval_runtime': 12.6521, 'eval_samples_per_second': 17.388, 'eval_steps_per_second': 17.388, 'epoch': 11.0}
{'loss': 0.1922, 'grad_norm': 1.2596862316131592, 'learning_rate': 6.539074960127592e-06, 'epoch': 11.36}
{'loss': 0.215, 'grad_norm': 3.5425591468811035, 'learning_rate': 6.339712918660288e-06, 'epoch': 11.93}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.17580898106098175, 'eval_runtime': 13.5901, 'eval_samples_per_second': 16.188, 'eval_steps_per_second': 16.188, 'epoch': 12.0}
{'loss': 0.1939, 'grad_norm': 3.3659377098083496, 'learning_rate': 6.140350877192983e-06, 'epoch': 12.5}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.17344915866851807, 'eval_runtime': 14.4907, 'eval_samples_per_second': 15.182, 'eval_steps_per_second': 15.182, 'epoch': 13.0}
{'loss': 0.1856, 'grad_norm': 1.973254919052124, 'learning_rate': 5.940988835725678e-06, 'epoch': 13.07}
{'loss': 0.1932, 'grad_norm': 0.2628292143344879, 'learning_rate': 5.741626794258374e-06, 'epoch': 13.64}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.16920267045497894, 'eval_runtime': 13.7681, 'eval_samples_per_second': 15.979, 'eval_steps_per_second': 15.979, 'epoch': 14.0}
{'loss': 0.1836, 'grad_norm': 5.494396209716797, 'learning_rate': 5.542264752791069e-06, 'epoch': 14.2}
{'loss': 0.1917, 'grad_norm': 5.880309104919434, 'learning_rate': 5.342902711323764e-06, 'epoch': 14.77}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.16647037863731384, 'eval_runtime': 13.7658, 'eval_samples_per_second': 15.982, 'eval_steps_per_second': 15.982, 'epoch': 15.0}
{'loss': 0.2018, 'grad_norm': 1.8423172235488892, 'learning_rate': 5.14354066985646e-06, 'epoch': 15.34}
{'loss': 0.1946, 'grad_norm': 3.27961802482605, 'learning_rate': 4.944178628389155e-06, 'epoch': 15.91}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.16918067634105682, 'eval_runtime': 15.4573, 'eval_samples_per_second': 14.233, 'eval_steps_per_second': 14.233, 'epoch': 16.0}
{'loss': 0.1651, 'grad_norm': 1.0964620113372803, 'learning_rate': 4.74481658692185e-06, 'epoch': 16.48}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.16769233345985413, 'eval_runtime': 15.7391, 'eval_samples_per_second': 13.978, 'eval_steps_per_second': 13.978, 'epoch': 17.0}
{'loss': 0.1887, 'grad_norm': 2.844324827194214, 'learning_rate': 4.5454545454545455e-06, 'epoch': 17.05}
{'loss': 0.1847, 'grad_norm': 1.6231828927993774, 'learning_rate': 4.346092503987241e-06, 'epoch': 17.61}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.16802075505256653, 'eval_runtime': 14.1112, 'eval_samples_per_second': 15.59, 'eval_steps_per_second': 15.59, 'epoch': 18.0}
{'loss': 0.1688, 'grad_norm': 0.043068863451480865, 'learning_rate': 4.146730462519937e-06, 'epoch': 18.18}
{'loss': 0.18, 'grad_norm': 0.9552457928657532, 'learning_rate': 3.947368421052632e-06, 'epoch': 18.75}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.16649211943149567, 'eval_runtime': 15.151, 'eval_samples_per_second': 14.521, 'eval_steps_per_second': 14.521, 'epoch': 19.0}
{'loss': 0.1582, 'grad_norm': 0.8295792937278748, 'learning_rate': 3.748006379585327e-06, 'epoch': 19.32}
{'loss': 0.1634, 'grad_norm': 1.1137089729309082, 'learning_rate': 3.5486443381180225e-06, 'epoch': 19.89}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.16525237262248993, 'eval_runtime': 13.537, 'eval_samples_per_second': 16.252, 'eval_steps_per_second': 16.252, 'epoch': 20.0}
{'loss': 0.1607, 'grad_norm': 3.077117681503296, 'learning_rate': 3.3492822966507182e-06, 'epoch': 20.45}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.16398946940898895, 'eval_runtime': 13.3447, 'eval_samples_per_second': 16.486, 'eval_steps_per_second': 16.486, 'epoch': 21.0}
{'loss': 0.1926, 'grad_norm': 2.6218605041503906, 'learning_rate': 3.1499202551834136e-06, 'epoch': 21.02}
{'loss': 0.1574, 'grad_norm': 1.654147982597351, 'learning_rate': 2.950558213716109e-06, 'epoch': 21.59}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.1627579778432846, 'eval_runtime': 13.0556, 'eval_samples_per_second': 16.851, 'eval_steps_per_second': 16.851, 'epoch': 22.0}
{'loss': 0.1751, 'grad_norm': 3.3352365493774414, 'learning_rate': 2.751196172248804e-06, 'epoch': 22.16}
{'loss': 0.1591, 'grad_norm': 0.31512993574142456, 'learning_rate': 2.551834130781499e-06, 'epoch': 22.73}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.16179297864437103, 'eval_runtime': 13.3114, 'eval_samples_per_second': 16.527, 'eval_steps_per_second': 16.527, 'epoch': 23.0}
{'loss': 0.1577, 'grad_norm': 3.368891477584839, 'learning_rate': 2.352472089314195e-06, 'epoch': 23.3}
{'loss': 0.1706, 'grad_norm': 2.6497673988342285, 'learning_rate': 2.15311004784689e-06, 'epoch': 23.86}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.16231253743171692, 'eval_runtime': 13.2518, 'eval_samples_per_second': 16.602, 'eval_steps_per_second': 16.602, 'epoch': 24.0}
{'loss': 0.1528, 'grad_norm': 7.0244059562683105, 'learning_rate': 1.9537480063795854e-06, 'epoch': 24.43}
{'loss': 0.1653, 'grad_norm': 6.193229675292969, 'learning_rate': 1.7543859649122807e-06, 'epoch': 25.0}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.16116201877593994, 'eval_runtime': 13.5936, 'eval_samples_per_second': 16.184, 'eval_steps_per_second': 16.184, 'epoch': 25.0}
{'loss': 0.1506, 'grad_norm': 3.2026526927948, 'learning_rate': 1.5550239234449763e-06, 'epoch': 25.57}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.16073353588581085, 'eval_runtime': 13.1993, 'eval_samples_per_second': 16.668, 'eval_steps_per_second': 16.668, 'epoch': 26.0}
{'loss': 0.1691, 'grad_norm': 6.924412727355957, 'learning_rate': 1.3556618819776716e-06, 'epoch': 26.14}
{'loss': 0.152, 'grad_norm': 0.37756088376045227, 'learning_rate': 1.1562998405103669e-06, 'epoch': 26.7}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.1607867181301117, 'eval_runtime': 13.1267, 'eval_samples_per_second': 16.76, 'eval_steps_per_second': 16.76, 'epoch': 27.0}
{'loss': 0.1549, 'grad_norm': 2.515018939971924, 'learning_rate': 9.569377990430622e-07, 'epoch': 27.27}
{'loss': 0.1646, 'grad_norm': 0.3122849464416504, 'learning_rate': 7.575757575757576e-07, 'epoch': 27.84}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.16075369715690613, 'eval_runtime': 13.2948, 'eval_samples_per_second': 16.548, 'eval_steps_per_second': 16.548, 'epoch': 28.0}
{'loss': 0.1636, 'grad_norm': 1.7210954427719116, 'learning_rate': 5.582137161084529e-07, 'epoch': 28.41}
{'loss': 0.144, 'grad_norm': 0.021043775603175163, 'learning_rate': 3.5885167464114835e-07, 'epoch': 28.98}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.1609935760498047, 'eval_runtime': 12.9268, 'eval_samples_per_second': 17.019, 'eval_steps_per_second': 17.019, 'epoch': 29.0}
{'loss': 0.1298, 'grad_norm': 1.9433163404464722, 'learning_rate': 1.594896331738437e-07, 'epoch': 29.55}


  0%|          | 0/220 [00:00<?, ?it/s]

{'eval_loss': 0.16096235811710358, 'eval_runtime': 12.7176, 'eval_samples_per_second': 17.299, 'eval_steps_per_second': 17.299, 'epoch': 30.0}


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight'].


{'train_runtime': 14057.086, 'train_samples_per_second': 1.878, 'train_steps_per_second': 1.878, 'train_loss': 0.4754251919370709, 'epoch': 30.0}


No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.


('FlexiModel\\tokenizer_config.json',
 'FlexiModel\\special_tokens_map.json',
 'FlexiModel\\spiece.model',
 'FlexiModel\\added_tokens.json')