In [1]:
import os
os.chdir('../')

In [2]:
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv()) # read local .env file

MODEL_CONFIG_FILE_PATH = os.environ['MODEL_CONFIG_FILE_PATH']
MODEL_PARAMS_FILE_PATH = os.environ['MODEL_PARAMS_FILE_PATH']

In [3]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    data_path: Path
    base_model: str
    training_name: str
    upload_from_hf: bool
    hf_model_name: str

@dataclass(frozen=True)
class LoraParameters:
    r: int
    target_modules: list
    lora_alpha: float
    lora_dropout: float
    bias: str
    task_type: str

@dataclass(frozen=True)
class BitsAndBytesParameters:
    load_in_4bit: bool
    bnb_4bit_quant_type: str
    bnb_4bit_use_double_quant: bool

@dataclass(frozen=True)
class TrainingArgumentsParameters:
    output_dir: str
    evaluation_strategy: str
    save_strategy: str
    num_train_epochs: float
    per_device_train_batch_size: int
    gradient_accumulation_steps: int
    optim: str
    learning_rate: float
    fp16: bool
    max_grad_norm: float
    warmup_ratio: float
    group_by_length: bool
    lr_scheduler_type: str

In [4]:
from src.utils.common import read_yaml

class ConfigurationManager:
    def __init__(self,
                model_config_filepath = MODEL_CONFIG_FILE_PATH,
                model_params_filepath = MODEL_PARAMS_FILE_PATH):

        self.config = read_yaml(Path(model_config_filepath))
        self.params = read_yaml(Path(model_params_filepath))


    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        model_trainer_config = ModelTrainerConfig(
            root_dir = Path(config.root_dir),
            data_path = Path(config.data_path),
            base_model = config.base_model,
            training_name = config.training_name,
            upload_from_hf = config.upload_from_hf,
            hf_model_name = config.hf_model_name
        )

        return model_trainer_config
    

    def get_lora_params(self) -> LoraParameters:
        params = self.params.lora_parameters

        lora_parameters = LoraParameters(
            r = params.r,
            target_modules = params.target_modules,
            lora_alpha = params.lora_alpha,
            lora_dropout = params.lora_dropout,
            bias = params.bias,
            task_type = params.task_type
        )

        return lora_parameters
    

    def get_bits_and_bytes_params(self) -> BitsAndBytesParameters:
        params = self.params.bits_and_bytes_parameters

        bits_and_bytes_parameters = BitsAndBytesParameters(
            load_in_4bit = params.load_in_4bit,
            bnb_4bit_quant_type = params.bnb_4bit_quant_type,
            bnb_4bit_use_double_quant = params.bnb_4bit_use_double_quant
        )

        return bits_and_bytes_parameters
    

    def get_training_args(self) -> TrainingArgumentsParameters:
        params = self.params.training_arguments

        training_args = TrainingArgumentsParameters(
            output_dir = params.output_dir,
            evaluation_strategy = params.evaluation_strategy,
            save_strategy = params.save_strategy,
            num_train_epochs = params.num_train_epochs,
            per_device_train_batch_size = params.per_device_train_batch_size,
            gradient_accumulation_steps = params.gradient_accumulation_steps,
            optim = params.optim,
            learning_rate = params.learning_rate,
            fp16 = params.fp16,
            max_grad_norm = params.max_grad_norm,
            warmup_ratio = params.warmup_ratio,
            group_by_length = params.group_by_length,
            lr_scheduler_type = params.lr_scheduler_type
        )

        return training_args


In [5]:
import torch 
import locale
import math
import mlflow
import pandas as pd
from trl import SFTTrainer
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaForCausalLM, TrainingArguments, BitsAndBytesConfig
from peft import get_peft_model, LoraConfig, PeftModel

from src.logging import logger

class ModelTrainer:
    def __init__(self, model_trainer_config: ModelTrainerConfig, lora_parameters: LoraParameters, bits_and_bytes_parameters: BitsAndBytesParameters, training_arguments: TrainingArgumentsParameters):
        self.model_trainer_config = model_trainer_config
        self.lora_parameters = lora_parameters
        self.bits_and_bytes_parameters = bits_and_bytes_parameters
        self.training_arguments = training_arguments


    def __load_data(self):
        train_dataset = pd.read_csv(os.path.join(self.model_trainer_config.data_path, "train_dataset.csv"))
        eval_dataset = pd.read_csv(os.path.join(self.model_trainer_config.data_path, "eval_dataset.csv"))

        train_dataset = Dataset.from_pandas(train_dataset)
        eval_dataset = Dataset.from_pandas(eval_dataset)

        self.train_dataset = train_dataset
        self.eval_dataset = eval_dataset
        logger.info("Data loaded")


    def __initialize_tokenizer(self, model_name: str):
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_trainer_config.base_model)
        self.tokenizer.add_special_tokens({'pad_token': '[PAD]'})
        logger.info("Tokenizer initialized")


    def __initialize_lora(self):
        self.lora_cofig = LoraConfig(
            r = self.lora_parameters.r,
            target_modules = self.lora_parameters.target_modules,
            lora_alpha = self.lora_parameters.lora_alpha,
            lora_dropout = self.lora_parameters.lora_dropout,
            bias = self.lora_parameters.bias,
            task_type = self.lora_parameters.task_type
        )
        logger.info("Lora initialized")


    def __initialize_bits_and_bytes(self):
        self.nf4_config = BitsAndBytesConfig(
            load_in_4bit = self.bits_and_bytes_parameters.load_in_4bit,
            bnb_4bit_quant_type = self.bits_and_bytes_parameters.bnb_4bit_quant_type,
            bnb_4bit_use_double_quant = self.bits_and_bytes_parameters.bnb_4bit_use_double_quant,
            bnb_4bit_compute_dtype = torch.bfloat16
        )
        logger.info("Bits and bytes initialized")
    

    def __initialize_training_arguments(self):
        self.training_args = TrainingArguments(
            output_dir = self.training_arguments.output_dir,
            evaluation_strategy = self.training_arguments.evaluation_strategy,
            save_strategy = self.training_arguments.save_strategy,
            num_train_epochs = self.training_arguments.num_train_epochs,
            per_device_train_batch_size = self.training_arguments.per_device_train_batch_size,
            gradient_accumulation_steps = self.training_arguments.gradient_accumulation_steps,
            optim = self.training_arguments.optim,
            learning_rate = self.training_arguments.learning_rate,
            fp16 = self.training_arguments.fp16,
            max_grad_norm = self.training_arguments.max_grad_norm,
            warmup_ratio = self.training_arguments.warmup_ratio,
            group_by_length = self.training_arguments.group_by_length,
            lr_scheduler_type = self.training_arguments.lr_scheduler_type
        )
        logger.info("Training arguments initialized")


    def __create_model(self):
        self.model = LlamaForCausalLM.from_pretrained(
            self.model_trainer_config.base_model, device_map='auto', quantization_config=self.nf4_config,
        )
        self.model = get_peft_model(self.model, self.lora_config)
        #self.model.print_trainable_parameters()
        logger.info("Model created")

    def __evaluate(self, trainer):
        evaluation_results = trainer.evaluate()
        logger.info(f"Perplexity: {math.exp(evaluation_results['eval_loss']):.2f}")

    def __save_model(self, model):
        model.save_pretrained(os.path.join(self.config.root_dir, f"{self.model_trainer_config.training_name}-math-adapters"))
        logger.info("Model saved")


    def train(self):
        if self.model_trainer_config.upload_from_hf:
            logger.info("Uploading model from HuggingFace")
            self.__initialize_tokenizer(self.model_trainer_config.base_model)
            self.__initialize_bits_and_bytes()
            
            model = AutoModelForCausalLM.from_pretrained(self.model_trainer_config.hf_model_name,
                                                              device_map='auto',
                                                              quantization_config=self.nf4_config)
            
            peft_model = PeftModel.from_pretrained(model,
                                                    self.model_trainer_config.hf_model_name)
            logger.info("Model uploaded")
            
            self.__save_model(peft_model)

            return None

        if torch.cuda.is_available():
            try:
                locale.getpreferredencoding = lambda: "UTF-8"
                
                self.__load_data()
                self.__initialize_tokenizer(self.model_trainer_config.base_model)
                self.__initialize_lora()
                self.__initialize_bits_and_bytes()
                self.__initialize_training_arguments()
                self.__create_model()

                trainer = SFTTrainer(self.model,
                                    train_dataset=self.train_dataset,
                                    eval_dataset=self.eval_dataset,
                                    dataset_text_field="text",
                                    max_seq_length=256,
                                    args=self.training_args,
                                    )
                logger.info("Trainer created")
                
                #Upcast layer norms to float 32 for stability
                for name, module in trainer.model.named_modules():
                    if "norm" in name:
                        module = module.to(torch.float32)
                logger.info("Layer norms upcasted to float32")
                
                logger.info(">>>>>>> Training started <<<<<<<<")
                with mlflow.start_run(run_name=self.model_trainer_config.training_name):
                    trainer.train()
                logger.info(">>>>>>> Training completed <<<<<<<<")

                self.__evaluate(trainer)
                
                self.__save_model(self.model)

            except Exception as e:
                raise e
        else:
            raise Exception("No GPU found")


* 'schema_extra' has been renamed to 'json_schema_extra'
  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'




In [6]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    lora_parameters = config.get_lora_params()
    bits_and_bytes_parameters = config.get_bits_and_bytes_params()
    training_args = config.get_training_args()
    model_trainer = ModelTrainer(model_trainer_config=model_trainer_config, lora_parameters=lora_parameters, bits_and_bytes_parameters=bits_and_bytes_parameters, training_arguments=training_args)
    model_trainer.train()
except Exception as e:
    raise e

[2023-12-01 16:51:01,064: INFO: common: yaml file: config\model-config.yaml loaded successfully]
[2023-12-01 16:51:01,076: INFO: common: yaml file: config\model-parameters.yaml loaded successfully]
[2023-12-01 16:51:01,077: INFO: 2242578423: Uploading model from HuggingFace]
[2023-12-01 16:55:41,060: INFO: 2242578423: Tokenizer initialized]
[2023-12-01 16:55:41,063: INFO: 2242578423: Bits and bytes initialized]


OSError: Logisx/open_llama_3b_v2-Fine-Tuned-Grade_School_Math_Instructions does not appear to have a file named config.json. Checkout 'https://huggingface.co/Logisx/open_llama_3b_v2-Fine-Tuned-Grade_School_Math_Instructions/main' for available files.