In [1]:
import os

In [2]:
%pwd

'c:\\Users\\koush\\OneDrive\\Desktop\\mouna\\Text-Summarizer-Project\\research'

In [3]:
os.chdir('../')  # Change to project root directory

In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: str
    data_path: str
    model_checkpt: str
    num_train_epochs: int
    per_device_train_batch_size: int
    warmup_steps: int
    weight_decay: float
    logging_steps: int
    evaluation_strategy: str
    eval_steps: int
    save_steps: int                  # <-- Make integer
    gradient_accumulation_steps: int

In [5]:
import sys
sys.path.append(os.path.join(os.getcwd(), "src"))
from textsummarizer.config_constants import *
from textsummarizer.utils.common import read_yaml, create_directories

In [6]:
class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])
        
    def get_model_trainer_config(self) -> ModelTrainerConfig:
        cfg = self.config.model_trainer
        params = self.params.Arguments

        create_directories([cfg.root_dir])

        return ModelTrainerConfig(
            root_dir=str(cfg.root_dir),
            data_path=str(cfg.data_path),
            model_checkpt=str(cfg.model_checkpt),
            num_train_epochs=params.num_train_epochs,
            per_device_train_batch_size=params.per_device_train_batch_size,
            warmup_steps=params.warmup_steps,
            weight_decay=params.weight_decay,
            logging_steps=params.logging_steps,
            evaluation_strategy=params.evaluation_strategy,
            eval_steps=params.eval_steps,
            save_steps=int(params.save_steps),  # <-- Ensure int
            gradient_accumulation_steps=params.gradient_accumulation_steps
        )

In [12]:
from transformers import Trainer, TrainingArguments, AutoModelForSeq2SeqLM, AutoTokenizer, DataCollatorForSeq2Seq
from datasets import load_dataset, load_from_disk
import torch

In [13]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config
        
    def train(self):
        print("TRAINER CONFIG:", asdict(self.config))
        device = "cuda" if torch.cuda.is_available() else "cpu"

        # Load tokenizer & model
        tokenizer = AutoTokenizer.from_pretrained(self.config.model_checkpt)
        model = AutoModelForSeq2SeqLM.from_pretrained(self.config.model_checkpt).to(device)

        # Data collator
        data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

        # Load dataset
        dataset = load_from_disk(self.config.data_path)

        # Training arguments
        trainer_args = TrainingArguments(
            output_dir=self.config.root_dir,
            num_train_epochs=self.config.num_train_epochs,
            per_device_train_batch_size=self.config.per_device_train_batch_size,
            warmup_steps=self.config.warmup_steps,
            weight_decay=self.config.weight_decay,
            logging_steps=self.config.logging_steps,
            evaluation_strategy=self.config.evaluation_strategy,
            eval_steps=self.config.eval_steps,
            save_steps=self.config.save_steps,
            gradient_accumulation_steps=self.config.gradient_accumulation_steps
        )

        # Trainer
        trainer = Trainer(
            model=model,
            args=trainer_args,
            tokenizer=tokenizer,
            data_collator=data_collator,
            train_dataset=dataset["train"],
            eval_dataset=dataset["validation"]
        )

        trainer.train()

        # Save model & tokenizer
        model.save_pretrained(os.path.join(self.config.root_dir, "pegasus-samsum-model"))
        tokenizer.save_pretrained(os.path.join(self.config.root_dir, "tokenizer"))

In [10]:
from dataclasses import asdict

In [14]:
if __name__ == "__main__":
    
    config_manager = ConfigurationManager()
    trainer_config = config_manager.get_model_trainer_config()

    print("DEBUG TYPE:", type(trainer_config))
    print("DEBUG CONFIG:", trainer_config)

    trainer = ModelTrainer(config=trainer_config)
    trainer.train()

[2025-09-10 19:16:34,904: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-09-10 19:16:34,910: INFO: common: yaml file: params.yaml loaded successfully]
[2025-09-10 19:16:34,914: INFO: common: created directory at: artifacts]
[2025-09-10 19:16:34,917: INFO: common: created directory at: artifacts/model_trainer]


DEBUG TYPE: <class '__main__.ModelTrainerConfig'>
DEBUG CONFIG: ModelTrainerConfig(root_dir='artifacts/model_trainer', data_path='artifacts/data_transformation', model_checkpt='google/pegasus-cnn_dailymail', num_train_epochs=1, per_device_train_batch_size=1, warmup_steps=500, weight_decay=0.01, logging_steps=10, evaluation_strategy='steps', eval_steps=500, save_steps=1000000, gradient_accumulation_steps=16)
TRAINER CONFIG: {'root_dir': 'artifacts/model_trainer', 'data_path': 'artifacts/data_transformation', 'model_checkpt': 'google/pegasus-cnn_dailymail', 'num_train_epochs': 1, 'per_device_train_batch_size': 1, 'warmup_steps': 500, 'weight_decay': 0.01, 'logging_steps': 10, 'evaluation_strategy': 'steps', 'eval_steps': 500, 'save_steps': 1000000, 'gradient_accumulation_steps': 16}


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-cnn_dailymail and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


TypeError: must be called with a dataclass type or instance