In [2]:
import os
%pwd

'd:\\Ml Dl\\Project\\Text-Summaraizer\\reasearch'

In [10]:
cd ../

d:\Ml Dl\Project\Text-Summaraizer


In [11]:
%pwd

'd:\\Ml Dl\\Project\\Text-Summaraizer'

In [None]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    data_path : Path
    model_name : str
    epochs: 1
    output_dir : str        
    num_train_epochs : int                
    per_device_train_batch_size : int
    per_device_eval_batch_size : int 
    warmup_steps : int            
    gradient_accumulation_steps : int
    weight_decay : float 


In [None]:
from src.utils.common import read_yaml
from src.utils.common import create_directories
from src.constants import CONFIG_FILE_PATH,PARAMS_FILE_PATH
class ConfigurationManager:
    def __init__(self,config_filepath= CONFIG_FILE_PATH,params_filepath= PARAMS_FILE_PATH):
        self.config=read_yaml(config_filepath)
        self.params=read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])
    
    def get_model_trainer(self) -> ModelTrainerConfig:
        config = self.config.model_trainer                 # Extracts only the data_ingestion part of config.yaml.
        params = self.params.model_trainer
        create_directories([config.root_dir])               # Create data_ingestion.root directory

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            data_path=config.data_path,
            model_name=config.model_name,
            epochs=params.epochs,
            output_dir = params.output_dir,       
            num_train_epochs = params.num_train_epochs,                
            per_device_train_batch_size = params.per_device_train_batch_size,
            per_device_eval_batch_size = params.per_device_eval_batch_size,
            warmup_steps = params.warmup_steps     ,     
            gradient_accumulation_steps = params.gradient_accumulation_steps,
            weight_decay = params.weight_decay,
        )

        return model_trainer_config

In [None]:
import os
from src.logger import logger
from src.entity import ModelTrainerConfig
import torch
from transformers import AutoModelForSeq2SeqLM,AutoTokenizer
from transformers import DataCollatorForSeq2Seq
from transformers import  Trainer, TrainingArguments
from datasets import load_from_disk

class Model_Trainer:
    def __init__(self,config: ModelTrainerConfig):
        self.config= config
        self.device= "cuda" if torch.cuda.is_available() else "cpu"
        self.model = AutoModelForSeq2SeqLM.from_pretrained(config.model_name).to(self.device)
        self.tokenizer=AutoTokenizer.from_pretrained(config.model_name)
        self.data_collator=DataCollatorForSeq2Seq(self.tokenizer,model=self.model)


    def train(self):
        training_args = TrainingArguments(
            fp16=True,
            output_dir=self.config.output_dir,          
            num_train_epochs=self.config.num_train_epochs,             
            per_device_train_batch_size=self.config.per_device_train_batch_size,  
            per_device_eval_batch_size=self.config.per_device_eval_batch_size,  
            warmup_steps=self.config.warmup_steps,               
            gradient_accumulation_steps=self.config.gradient_accumulation_steps, 
            weight_decay=self.config.weight_decay           
    
        )
        dataset_samsum = load_from_disk(self.config.data_path)

        trainer = Trainer(
            model=self.model,                         
            args=training_args,                 
            train_dataset=dataset_samsum["train"].select(range(10)),         
            eval_dataset=dataset_samsum["test"].select(range(10)),           
            data_collator=self.data_collator
        )
        trainer.train()

    