In [1]:
import os
%pwd

'/Users/sachin/github/Text-Summerizor/research'

In [2]:
os.chdir("../")
%pwd

'/Users/sachin/github/Text-Summerizor'

In [3]:
from pathlib import Path
from dataclasses import dataclass

@dataclass
class ModelTrainerConfig:
    root_dir: Path
    data_path: Path
    model_ckpt: Path
    num_train_epochs: int
    warmup_steps: int
    per_device_train_batch_size: int
    per_device_eval_batch_size: int
    weight_decay: float
    logging_steps: int
    eval_steps: int
    save_steps: float
    gradient_accumulation_steps: int

In [4]:
from textSummerizer.constants import *
from textSummerizer.utils.common import read_yaml,create_directories

In [5]:
class ConfigurationManger:
    def __init__(self,
                 config_filepath = CONFIG_FILE_PATH,
                 params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root],verbose=True)
                 
    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.TrainingArguments
        create_directories([config.root_dir],verbose=True)
        
        model_trainer_config = ModelTrainerConfig(
            root_dir= config.root_dir,
            data_path= config.data_path,
            model_ckpt= config.model_ckpt,
            num_train_epochs= params.num_train_epochs,
            warmup_steps= params.warmup_steps,
            per_device_train_batch_size= params.per_device_train_batch_size,
            per_device_eval_batch_size= params.per_device_eval_batch_size,
            weight_decay= params.weight_decay,
            logging_steps= params.logging_steps,
            eval_steps= params.eval_steps,
            save_steps= params.save_steps,
            gradient_accumulation_steps= params.gradient_accumulation_steps
        )     
        return model_trainer_config

In [6]:
import os
from textSummerizer.logging import logger
from transformers import TrainingArguments, Trainer
from transformers import DataCollatorForSeq2Seq
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from datasets import load_from_disk
import torch 
torch.mps.empty_cache()
import os
PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 
os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"


  from .autonotebook import tqdm as notebook_tqdm

A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.6 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/Users/sachin/miniconda3/envs/pipeline/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/Users/sachin/miniconda3/envs/pipeline/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/Users/sachin/miniconda3/envs/pipeline/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/Users/sachin/miniconda3/envs/pipeline/lib/

[2025-05-25 17:20:51,781: INFO: config: PyTorch version 2.2.2 available.]


In [7]:
class ModelTrainer:
    def __init__(self,config : ModelTrainerConfig):
        self.config = config
        
    def train(self):
        device = "mps" if torch.backends.mps.is_available() else "cpu"
        tokenizer = AutoTokenizer.from_pretrained(self.config.model_ckpt)
        model_pegasus = AutoModelForSeq2SeqLM.from_pretrained(self.config.model_ckpt).to(device)
        seq2seq_data_collator = DataCollatorForSeq2Seq(tokenizer, model=model_pegasus)  
        
        dataset_samsum_pt = load_from_disk(self.config.data_path)
        trainer_args = TrainingArguments(
                                  output_dir = self.config.root_dir,
                                  num_train_epochs=self.config.num_train_epochs,
                                  warmup_steps=self.config.warmup_steps,
                                  per_device_train_batch_size=self.config.per_device_train_batch_size,
                                  per_device_eval_batch_size=self.config.per_device_eval_batch_size,
                                  weight_decay=self.config.weight_decay,
                                  logging_steps=self.config.logging_steps,
                                  eval_steps=self.config.eval_steps,
                                  save_steps=self.config.save_steps,
                                  gradient_accumulation_steps=self.config.gradient_accumulation_steps)
        
        trainer = Trainer(model=model_pegasus, args=trainer_args,
                        tokenizer=tokenizer, 
                        data_collator=seq2seq_data_collator,
                        train_dataset=dataset_samsum_pt["test"],
                        eval_dataset=dataset_samsum_pt["validation"] )
        trainer.train()
        model_pegasus.save_pretrained(os.path.join(self.config.root_dir, "model_pegasus_samsum"))
        tokenizer.save_pretrained(os.path.join(self.config.root_dir,"tokenizer"))

In [8]:
try:
    config = ConfigurationManger()
    model_trainer_config =config.get_model_trainer_config()
    model_trainer_config = ModelTrainer(model_trainer_config)
    model_trainer_config.train()
except Exception as e:
    raise e

[2025-05-25 17:20:52,335: INFO: common: yaml file: config/config.yaml loaded successfully]
[2025-05-25 17:20:52,338: INFO: common: yaml file: params.yaml loaded successfully]
[2025-05-25 17:20:52,339: INFO: common: create directories at : artifacts]
[2025-05-25 17:20:52,339: INFO: common: create directories at : artifacts/model_trainer]


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-cnn_dailymail and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/204 [00:00<?, ?it/s]

RuntimeError: MPS backend out of memory (MPS allocated: 6.20 GB, other allocations: 2.58 GB, max allowed: 9.07 GB). Tried to allocate 375.40 MB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).