In [1]:
import os
os.chdir('../')
%pwd

'd:\\DS_Stats\\E2E_DS_Propjects\\ML_Project_Two'

In [2]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir : Path
    data_path : Path
    model_ckpt : Path
    num_train_epochs : int
    warmup_steps : int
    per_device_train_batch_size : int
    weight_decay : float
    logging_steps : int
    evaluation_strategy : str
    eval_steps : int
    save_steps : float
    gradient_accumulation_steps : int

In [3]:
from src.text_summarizer.constants import *
from src.text_summarizer.utils.common import read_yaml,create_directories
from pathlib import Path

class ConfigurationManager:
    def __init__(
            self,
            config_filepath = CONFIG_FILE_PATH,
            params_filepath = PARAM_FILE_PATH
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        
        create_directories([self.config.artifacts_root])

    def get_model_tainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.TrainingArguments

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            data_path=config.data_path,
            model_ckpt=config.model_ckpt,
            num_train_epochs=params.num_train_epochs,
            warmup_steps=params.warmup_steps,
            per_device_train_batch_size=params.per_device_train_batch_size,
            weight_decay=params.weight_decay,
            logging_steps=params.logging_steps,
            evaluation_strategy=params.evaluation_strategy,
            eval_steps=params.eval_steps,
            save_steps=params.save_steps,
            gradient_accumulation_steps=params.gradient_accumulation_steps)
        
        return model_trainer_config 





In [4]:
from transformers import TrainingArguments, Trainer
from transformers import DataCollatorForSeq2Seq
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from datasets import load_dataset,load_from_disk
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
class ModelTrainer:
    def __init__(self,config:ModelTrainerConfig):
        self.config = config

    def train(self):
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        tokenizer = AutoTokenizer.from_pretrained(self.config.model_ckpt)
        model_pegasus = AutoModelForSeq2SeqLM.from_pretrained(self.config.model_ckpt).to(device)
        seq2seq_data_colector = DataCollatorForSeq2Seq(tokenizer,model=model_pegasus)

        #loading Data

        dataset_samsum_pt = load_from_disk(self.config.data_path)

        training_args = TrainingArguments(
        output_dir=self.config.root_dir,
        num_train_epochs=self.config.num_train_epochs,
        warmup_steps=self.config.warmup_steps,
        per_device_train_batch_size=self.config.per_device_train_batch_size,
        weight_decay=self.config.weight_decay,
        logging_steps=self.config.logging_steps,
        evaluation_strategy=self.config.evaluation_strategy,
        eval_steps=self.config.eval_steps,
        save_steps=1e6,
        gradient_accumulation_steps=self.config.gradient_accumulation_steps    
        )


        trainer = Trainer(
            model=model_pegasus, args= training_args, tokenizer=tokenizer,
            data_collator=seq2seq_data_colector, train_dataset=dataset_samsum_pt['train'],
            eval_dataset= dataset_samsum_pt['validation']
        )

        trainer.train()

        #Save Model

        model_pegasus.save_pretrained(os.path.join(self.config.root_dir,'pegasus_samsum_model'))

        #Save Tokenizer

        tokenizer.save_pretrained(os.path.join(self.config.root_dir,'tokenizer'))

In [6]:
import sys
from src.text_summarizer.custom_exception import CustomExeption
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_tainer_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config)
    model_trainer_config.train()
except Exception as e:
    raise CustomExeption(e,sys)

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-cnn_dailymail and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  1%|          | 10/920 [16:29<22:42:30, 89.84s/it]

{'loss': 1.1773, 'grad_norm': 65.30662536621094, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.01}


  2%|▏         | 20/920 [32:17<24:08:23, 96.56s/it] 

{'loss': 1.1537, 'grad_norm': 9.06582260131836, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.02}


  3%|▎         | 30/920 [47:38<24:17:40, 98.27s/it] 

{'loss': 1.1989, 'grad_norm': 7.4202728271484375, 'learning_rate': 3e-06, 'epoch': 0.03}


  4%|▍         | 40/920 [1:03:44<19:26:22, 79.53s/it]

{'loss': 1.1596, 'grad_norm': 13.718805313110352, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.04}


  5%|▌         | 50/920 [1:18:29<19:44:40, 81.70s/it]

{'loss': 0.9627, 'grad_norm': 647.3818969726562, 'learning_rate': 5e-06, 'epoch': 0.05}


  7%|▋         | 60/920 [1:37:06<28:11:24, 118.00s/it]

{'loss': 0.7509, 'grad_norm': 36.48863220214844, 'learning_rate': 6e-06, 'epoch': 0.07}


  8%|▊         | 70/920 [1:52:25<23:01:02, 97.48s/it] 

{'loss': 0.7077, 'grad_norm': 9.044624328613281, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.08}


  9%|▊         | 80/920 [2:09:49<19:54:44, 85.34s/it] 

{'loss': 0.5781, 'grad_norm': 3.745497465133667, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.09}


 10%|▉         | 90/920 [2:22:14<17:18:25, 75.07s/it]

{'loss': 0.6076, 'grad_norm': 3.3535051345825195, 'learning_rate': 9e-06, 'epoch': 0.1}


 11%|█         | 100/920 [2:36:02<21:41:14, 95.21s/it]

{'loss': 0.308, 'grad_norm': 10.215266227722168, 'learning_rate': 1e-05, 'epoch': 0.11}


 12%|█▏        | 110/920 [2:53:37<20:53:44, 92.87s/it] 

{'loss': 0.3045, 'grad_norm': 11.749611854553223, 'learning_rate': 1.1000000000000001e-05, 'epoch': 0.12}


 13%|█▎        | 120/920 [3:06:39<18:22:22, 82.68s/it]

{'loss': 0.4434, 'grad_norm': 3.4701895713806152, 'learning_rate': 1.2e-05, 'epoch': 0.13}


 14%|█▍        | 130/920 [3:23:52<22:42:15, 103.46s/it]

{'loss': 0.1829, 'grad_norm': 2.7185349464416504, 'learning_rate': 1.3000000000000001e-05, 'epoch': 0.14}


 15%|█▌        | 140/920 [3:38:59<20:13:16, 93.33s/it] 

{'loss': 0.2387, 'grad_norm': 2.7816717624664307, 'learning_rate': 1.4000000000000001e-05, 'epoch': 0.15}


 16%|█▋        | 150/920 [3:54:48<18:49:41, 88.03s/it] 

{'loss': 0.1951, 'grad_norm': 2.8853182792663574, 'learning_rate': 1.5e-05, 'epoch': 0.16}


 17%|█▋        | 160/920 [4:10:20<22:32:26, 106.77s/it]

{'loss': 0.1712, 'grad_norm': 5.5209641456604, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.17}


 18%|█▊        | 170/920 [4:29:01<24:16:04, 116.49s/it]

{'loss': 0.238, 'grad_norm': 5.288381576538086, 'learning_rate': 1.7000000000000003e-05, 'epoch': 0.18}


 20%|█▉        | 180/920 [4:43:07<20:06:06, 97.79s/it] 

{'loss': 0.1306, 'grad_norm': 2.705185651779175, 'learning_rate': 1.8e-05, 'epoch': 0.2}


 21%|██        | 190/920 [4:59:39<19:10:44, 94.58s/it] 

{'loss': 0.1407, 'grad_norm': 1.2231765985488892, 'learning_rate': 1.9e-05, 'epoch': 0.21}


 22%|██▏       | 200/920 [5:15:05<17:02:33, 85.21s/it] 

{'loss': 0.1099, 'grad_norm': 1.7011231184005737, 'learning_rate': 2e-05, 'epoch': 0.22}


 23%|██▎       | 210/920 [5:31:52<17:07:54, 86.87s/it] 

{'loss': 0.1334, 'grad_norm': 1.6577274799346924, 'learning_rate': 2.1e-05, 'epoch': 0.23}


 23%|██▎       | 214/920 [5:37:41<16:55:01, 86.26s/it]

: 