In [1]:
%load_ext autoreload
%autoreload 2

## Modules

In [2]:
import pandas as pd
from datasets import load_dataset
from peft import PeftModel
import GPUtil
import torch 
import os
import csv
from os import path
import time
from transformers import TrainerCallback
from trl import SFTTrainer
from tools import (prep_tokenizer, 
                   prepare_model, 
                   prepare_training_arguments,
                   prepare_lora_arguments,
                   evaluate_model_for_f1_score)

In [3]:
class Logger(TrainerCallback):
    def __init__(self, log_file_path, adapter_name, model_name, q_value, epoch_values, r_value, lora_alpha):
        self.adapter_name = adapter_name
        self.model_name = model_name
        self.q_value = q_value
        self.epoch_values = epoch_values
        self.r_value = r_value
        self.lora_alpha = lora_alpha
        self.log_file_path = log_file_path
        self.start_time = None
        self.epoch = 0
        self.GPUs = None

    def on_epoch_begin(self, args, state, control, **kwargs):
        if self.epoch == 0:
            self.start_time = time.time()

    def on_epoch_end(self, args, state, control, **kwargs):
        self.epoch += 1
        if self.epoch in self.epoch_values:
            
            # calculate train time for current epoch
            elapsed_time_seconds = time.time() - self.start_time
            hours, remainder = divmod(elapsed_time_seconds, 3600)
            minutes, seconds = divmod(remainder, 60)
            time_str =  f"{int(hours)}:{int(minutes)}:{int(seconds)}"
            
            # get GPU memory usage
            self.GPUs = GPUtil.getGPUs()
            load = self.GPUs[0].memoryUsed
            f1_avg = "-"
            
            # log training
            with open(self.log_file_path, mode='a') as f:
                f.write(f"{self.adapter_name};{self.model_name};{str(self.q_value)};{str(self.epoch)};{str(self.r_value)};{str(self.lora_alpha)};{time_str};{load};{str(f1_avg)}\n")

## Config

In [3]:
os.environ["WANDB_DISABLED"] = "true"

adapters_path = "/mnt/shared/tibor/llm-hun-performance-benchmarks/adapters/grid-search-3"
result_file = "grid-search-results-4bitq.csv"
model_path = "/mnt/shared/tibor/Llama-2-7b-chat-hf"
model_name = model_path.split('/')[-1] # get model name whitout path
adapter_name_template = model_name + "_{q_value}b_q_{r_value}_r_{lora_alpha}_a"
test_row_num = -1


epoch_values = (3, 6, 12)
r_values=(8, 16)
lora_alpha_values=(16, 32)
q_value=4
test = False
all_adapters = ["Llama-2-7b-chat-hf_4b_q_16_r_16_a",
                "Llama-2-7b-chat-hf_4b_q_16_r_32_a",
                "Llama-2-7b-chat-hf_4b_q_8_r_16_a",
                "Llama-2-7b-chat-hf_4b_q_8_r_32_a"] 


## Search

In [4]:
train_dataset = load_dataset("csv", 
                       data_files={'train': 'data/train_w_noansw.csv', 'eval': 'data/eval_w_noansw.csv'},
                       delimiter=";",
                       column_names=['question', 'context', 'answer', 'text'])
test_dataset = test_df = pd.read_csv("data/test_w_noansw.csv", sep=';')

In [6]:
with open(path.join(adapters_path, result_file), mode='w') as f:
    f.write("adapter_name;base_model;q_value;epoch_value;r_value;lora_alpha_value;train_time;vram_usage;f1_score\n")

In [None]:
test_i=0
for r in r_values:
    for lora_alpha in lora_alpha_values:

        print(f"{r=},{lora_alpha=}")
        
        adapter_name = None
        if test:
            adapter_name = f"TEST_{test_i}"
            test_i=test_i+1
        else:
            adapter_name = adapter_name_template.format(q_value=str(q_value), r_value=str(r), lora_alpha=str(lora_alpha))
        print(f"{adapter_name=}")

        # create current adapter path
        adapter_path = path.join(adapters_path, adapter_name)
        all_adapters.append(adapter_path)
        print(f"{adapter_path=}")
        
        tokenizer = prep_tokenizer(model_path=model_path, add_eos_token=True) # with eos tokens, for training
        # eval_tokenizer = prep_tokenizer(model_path=model_path, add_eos_token=False) # without eos tokens (model should place eos at the end of generated text)
        print("Tokenizer loaded")
        
        training_arguments = prepare_training_arguments(test=test, output_dir = adapter_path, num_train_epochs = sorted(epoch_values)[-1])
        peft_config = prepare_lora_arguments(lora_alpha=lora_alpha, r=r)

        # 4 or 8 bit quantization
        model = None
        if q_value == 4:
            model = prepare_model(model_path=model_path, tokenizer = tokenizer, quantize = True, load_in_4bit = True, load_in_8bit = False)
        elif q_value == 8:
            model = prepare_model(model_path=model_path, tokenizer = tokenizer, quantize = True, load_in_4bit = False, load_in_8bit = True)

        
        # init logger callback
        logger = Logger(
            log_file_path=path.join(adapters_path, result_file),
            adapter_name=adapter_name,
            model_name=model_name,
            q_value=q_value,
            epoch_values=epoch_values,
            r_value=r,
            lora_alpha=lora_alpha)
        
        # init trainer
        trainer = SFTTrainer(
        model=model,
        callbacks=[logger],
        train_dataset=train_dataset['train'],
        eval_dataset=train_dataset['eval'],
        peft_config=peft_config,
        dataset_text_field="text",
        max_seq_length=min(tokenizer.model_max_length, 1024),  # default: min(tokenizer.model_max_length, 1024),
        tokenizer=tokenizer,
        args=training_arguments)

        print("Trainer prepared")

        # finetune
        start_time = time.time()
        trainer.train()
        end_time = time.time()
        
        
        # get avg f1 score (move to separate)
        # eval_tokenizer = prep_tokenizer(model_path=model_path, add_eos_token=False) # without eos tokens (model should place eos at the end of generation)
        # f1_avg = evaluate_model_for_f1_score(model, eval_tokenizer, test_dataset, adapter_path ,test_row_num)
        # f1_avg = "-"

        # empty GPU VRAM
        torch.cuda.empty_cache()
        del trainer
        del model


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


r=8,lora_alpha=16
adapter_name='Llama-2-7b-chat-hf_4b_q_8_r_16_a'
adapter_path='/mnt/shared/tibor/llm-hun-performance-benchmarks/adapters/grid-search-3/Llama-2-7b-chat-hf_4b_q_8_r_16_a'
Tokenizer loaded


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

[codecarbon INFO @ 20:35:15] [setup] RAM Tracking...
[codecarbon INFO @ 20:35:15] [setup] GPU Tracking...
[codecarbon INFO @ 20:35:15] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 20:35:15] [setup] CPU Tracking...
[codecarbon INFO @ 20:35:16] CPU Model on constant consumption mode: AMD EPYC-Rome Processor
[codecarbon INFO @ 20:35:16] >>> Tracker's metadata:
[codecarbon INFO @ 20:35:16]   Platform system: Linux-5.15.0-41-generic-x86_64-with-glibc2.35
[codecarbon INFO @ 20:35:16]   Python version: 3.10.12
[codecarbon INFO @ 20:35:16]   CodeCarbon version: 2.2.3
[codecarbon INFO @ 20:35:16]   Available RAM : 31.354 GB
[codecarbon INFO @ 20:35:16]   CPU count: 8
[codecarbon INFO @ 20:35:16]   CPU model: AMD EPYC-Rome Processor
[codecarbon INFO @ 20:35:16]   GPU count: 1
[codecarbon INFO @ 20:35:16]   GPU model: 1 x GRID A100-20C


Trainer prepared


Currently training with a batch size of: 4
***** Running training *****
  Num examples = 3,188
  Num Epochs = 12
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 4
  Gradient Accumulation steps = 1
  Total optimization steps = 9,564
  Number of trainable parameters = 4,194,304
You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss
1,1.3147,1.249405
2,1.1969,1.198025
3,1.1188,1.156124
4,1.0411,1.11314
5,0.9627,1.075317
6,0.8854,1.031128


[codecarbon INFO @ 20:35:32] Energy consumed for RAM : 0.000049 kWh. RAM Power : 11.757657051086426 W
[codecarbon INFO @ 20:35:32] Energy consumed for all GPUs : 0.000000 kWh. Total GPU Power : 0.0 W
[codecarbon INFO @ 20:35:32] Energy consumed for all CPUs : 0.000177 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 20:35:32] 0.000226 kWh of electricity used since the beginning.
[codecarbon INFO @ 20:35:47] Energy consumed for RAM : 0.000098 kWh. RAM Power : 11.757657051086426 W
[codecarbon INFO @ 20:35:47] Energy consumed for all GPUs : 0.000000 kWh. Total GPU Power : 0.0 W
[codecarbon INFO @ 20:35:47] Energy consumed for all CPUs : 0.000354 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 20:35:47] 0.000452 kWh of electricity used since the beginning.
[codecarbon INFO @ 20:36:02] Energy consumed for RAM : 0.000147 kWh. RAM Power : 11.757657051086426 W
[codecarbon INFO @ 20:36:02] Energy consumed for all GPUs : 0.000000 kWh. Total GPU Power : 0.0 W
[codecarbon INFO @ 20:36:02] Energy 

## Calculate f1 scores

In [8]:
def checkpoints_only(dirs: list) -> list[str]:
    new_dirs=[]
    for dir in dirs:
        if 'checkpoint' in dir:
            new_dirs.append(dir)
            
    new_dirs.sort(key=lambda x: int(x.split('-')[-1]))
    
    return new_dirs

eval_tokenizer = prep_tokenizer(model_path=model_path, add_eos_token=False)
model = None
model_w_adapter = None
if q_value == 4:
    model = prepare_model(model_path=model_path, tokenizer = eval_tokenizer, quantize = True, load_in_4bit = True, load_in_8bit = False)
elif q_value == 8:
    model = prepare_model(model_path=model_path, tokenizer = eval_tokenizer, quantize = True, load_in_4bit = False, load_in_8bit = True)

for adapter in all_adapters:
    for root, dirs, files in os.walk(os.path.join(adapters_path, adapter)):
        checkpoint_dirs = checkpoints_only(dirs)
        for i, checkpoint in enumerate(checkpoint_dirs):
            epoch = i+1
            if epoch in epoch_values: # calculate only for specific epochs
                adapter_full_path = os.path.join(adapters_path, adapter, checkpoint)
                print(f"{adapter_full_path} - epoch {epoch}")
                
                model_w_adapter = PeftModel.from_pretrained(model, adapter_full_path)
                avg_f1_score = evaluate_model_for_f1_score(model=model_w_adapter, tokenizer=eval_tokenizer, dataset=test_dataset, result_csv_path=adapter_full_path)
                model_w_adapter.unload()
                
                # Update the results csv file
                updated_rows = []
                with open(os.path.join(adapters_path, result_file), mode='r') as file:
                    reader = csv.DictReader(file, delimiter=';')
                    for row in reader:
                        print(row)
                        if row['adapter_name'] == adapter and row['epoch_value'] == str(epoch):
                            row['f1_score'] = avg_f1_score
                        updated_rows.append(row)
                
                with open(os.path.join(adapters_path, result_file), mode='w', encoding='utf-8') as file:
                    writer = csv.DictWriter(file, fieldnames=reader.fieldnames, delimiter=';')
                    writer.writeheader()
                    writer.writerows(updated_rows)
        
        break # for os.walk to be only top level

torch.cuda.empty_cache() # cleanup

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


KeyboardInterrupt

