In [1]:
import os
import torch
import pandas as pd
import bitsandbytes as bnb
import argparse

from datasets import load_dataset, load_from_disk, Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    TrainingArguments,
    DataCollatorForLanguageModeling
)

from peft import (
    PeftModel,
    LoraConfig,
    get_peft_model,
    prepare_model_for_kbit_training
)
from trl import setup_chat_format, SFTConfig, SFTTrainer
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# set the wandb project where this run will be logged
os.environ["WANDB_PROJECT"]="cs769_llama"
# turn off watch to log faster
os.environ["WANDB_WATCH"]="false"

HF_TOKEN = "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"
base_model_name = "meta-llama/Llama-3.2-3b-Instruct"
root_model_dir = "LoraModel"
dataset_name = 'openlifescienceai/medmcqa'

bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16
    )

tokenizer = AutoTokenizer.from_pretrained(
    base_model_name,
    trust_remote_code=True
)
tokenizer.pad_token = tokenizer.eos_token

In [3]:
def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)


def format_chat_template(row):

    instruction = """
    Answer the following multiple choice question by giving the most appropriate response. 
    Answer should be one among [A, B, C, D].
    """

    idx_to_ans_map = {0:"A", 1:"B", 2:"C", 3:"D"}

    a = row['opa']
    b = row['opb']
    c = row['opc']
    d = row['opd']

    user_instruction = f"""Question: {row['question']}
                A) {a}
                B) {b}
                C) {c}
                D) {d}
            """

    row_json = [{"role": "system", "content": instruction },
               {"role": "user", "content": user_instruction },
               {"role": "assistant", "content": idx_to_ans_map[row['cop']]}]
    
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row

def create_lora_model(base_model, difficulty, modules):
    rank_map = {'easy': 8, 'medium': 16, 'hard': 32}
    alpha_map = {'easy': 16, 'medium': 32, 'hard': 64}

    peft_config = LoraConfig(
            r=rank_map[difficulty],
            lora_alpha=alpha_map[difficulty],
            target_modules=modules,
            lora_dropout=0.05,
            bias="none",
            task_type="CAUSAL_LM"
        )
    
    return peft_config, get_peft_model(
        base_model,
        peft_config
    )


def train_adapter(model_dir, run_name, bnb_config, difficulty_level, train_data, val_data, peft_model, peft_config):
    
    print(f"Parameters for the {difficulty} LoRA model: ")
    peft_model.print_trainable_parameters()

    training_arguments = SFTConfig(
        output_dir=f"./{model_dir}/{difficulty_level}",
        per_device_train_batch_size=16,
        per_device_eval_batch_size=8,
        gradient_accumulation_steps=2,
        optim="paged_adamw_32bit",
        num_train_epochs=2,
        eval_strategy="steps",
        eval_steps=250,
        logging_steps=250,
        warmup_ratio=0.03,
        logging_strategy='steps',
        learning_rate=2e-4,
        fp16=False,
        bf16=False,
        group_by_length=True,
        remove_unused_columns=True,
        report_to='wandb',
        run_name=run_name,
        max_seq_length=512,
        dataset_text_field='text',
        label_names=["labels"],
        load_best_model_at_end=True,
        metric_for_best_model="eval_loss",
        greater_is_better=False        
    )
    trainer = SFTTrainer(
        model=peft_model,
        train_dataset=train_data,
        eval_dataset=val_data,
        peft_config=peft_config,
        processing_class=tokenizer,
        args=training_arguments,
    )

    trainer.train()
    peft_model.save_pretrained(os.path.join(model_dir, f'{difficulty_level}_best')) 

In [7]:
# its better to load from the checkpoint with lowest loss
# as 'load_best_model_at_end' is dependent on a compute_metrics 

In [8]:
if __name__ == '__main__':

    # parser = argparse.ArgumentParser()
    # parser.add_argument(
    #     "--difficulty",
    #     choices=["easy","medium","hard"],
    #     required=True,
    #     help="Which subset (easy/medium/hard) to fine-tune"
    # )
    # args = parser.parse_args()
    # difficulty = args.difficulty
    difficulty = 'easy'

    # hack to handle data errors while using map function
    if difficulty == 'easy':
        easy_data = Dataset.from_pandas(pd.DataFrame(load_from_disk('./json_to_hf/subset1')))
        train_dataset = easy_data.map(format_chat_template)
    elif difficulty == 'medium':
        medium_data = Dataset.from_pandas(pd.DataFrame(load_from_disk('./json_to_hf/subset2')))
        train_dataset = medium_data.map(format_chat_template)
    elif difficulty == 'hard':
        hard_data = Dataset.from_pandas(pd.DataFrame(load_from_disk('./json_to_hf/subset3')))
        train_dataset = hard_data.map(format_chat_template)

    val_data = load_dataset(dataset_name, split='validation', trust_remote_code=True)
    val_dataset = val_data.map(format_chat_template)

    # Use existing global bnb_config defined earlier
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        quantization_config=bnb_config,
        device_map="auto",
        token=HF_TOKEN
    )
    base_model = prepare_model_for_kbit_training(base_model)
    modules = find_all_linear_names(base_model)
    peft_config, peft_model = create_lora_model(base_model, difficulty, modules)

    train_adapter(
            model_dir="LoRA‐MedQA",
            run_name=f'Lora_{difficulty}',
            bnb_config=bnb_config,
            difficulty_level=difficulty,
            train_data=train_dataset,
            val_data=val_dataset,
            peft_model=peft_model,
            peft_config=peft_config
        )
    torch.cuda.empty_cache()

Map: 100%|██████████| 38800/38800 [00:06<00:00, 5779.70 examples/s]
Loading checkpoint shards: 100%|██████████| 2/2 [00:04<00:00,  2.41s/it]


Parameters for the easy LoRA model: 
trainable params: 12,156,928 || all params: 3,224,906,752 || trainable%: 0.3770


Converting train dataset to ChatML: 100%|██████████| 38800/38800 [00:03<00:00, 11573.93 examples/s]
Adding EOS to train dataset: 100%|██████████| 38800/38800 [00:03<00:00, 10254.30 examples/s]
Tokenizing train dataset: 100%|██████████| 38800/38800 [00:14<00:00, 2701.48 examples/s]
Truncating train dataset: 100%|██████████| 38800/38800 [00:00<00:00, 301948.36 examples/s]
[34m[1mwandb[0m: Currently logged in as: [33msyammohan2103[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss,Validation Loss
250,0.9479,0.850179
500,0.6965,0.83968
750,0.6865,0.830932
1000,0.6734,0.826586
1250,0.6666,0.82899
1500,0.6197,0.830467
1750,0.6094,0.826752
2000,0.6131,0.826002
2250,0.6039,0.826664


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


In [9]:
if __name__ == '__main__':

    # parser = argparse.ArgumentParser()
    # parser.add_argument(
    #     "--difficulty",
    #     choices=["easy","medium","hard"],
    #     required=True,
    #     help="Which subset (easy/medium/hard) to fine-tune"
    # )
    # args = parser.parse_args()
    # difficulty = args.difficulty
    difficulty = 'medium'

    # hack to handle data errors while using map function
    if difficulty == 'easy':
        easy_data = Dataset.from_pandas(pd.DataFrame(load_from_disk('./json_to_hf/subset1')))
        train_dataset = easy_data.map(format_chat_template)
    elif difficulty == 'medium':
        medium_data = Dataset.from_pandas(pd.DataFrame(load_from_disk('./json_to_hf/subset2')))
        train_dataset = medium_data.map(format_chat_template)
    elif difficulty == 'hard':
        hard_data = Dataset.from_pandas(pd.DataFrame(load_from_disk('./json_to_hf/subset3')))
        train_dataset = hard_data.map(format_chat_template)

    val_data = load_dataset(dataset_name, split='validation', trust_remote_code=True)
    val_dataset = val_data.map(format_chat_template)

    # Use existing global bnb_config defined earlier
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        quantization_config=bnb_config,
        device_map="auto",
        token=HF_TOKEN
    )
    base_model = prepare_model_for_kbit_training(base_model)
    modules = find_all_linear_names(base_model)
    peft_config, peft_model = create_lora_model(base_model, difficulty, modules)

    train_adapter(
            model_dir="LoRA‐MedQA",
            run_name=f'Lora_{difficulty}',
            bnb_config=bnb_config,
            difficulty_level=difficulty,
            train_data=train_dataset,
            val_data=val_dataset,
            peft_model=peft_model,
            peft_config=peft_config
        )
    torch.cuda.empty_cache()

Map: 100%|██████████| 73332/73332 [00:14<00:00, 4999.56 examples/s]
Loading checkpoint shards: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]


Parameters for the medium LoRA model: 
trainable params: 24,313,856 || all params: 3,237,063,680 || trainable%: 0.7511


Converting train dataset to ChatML: 100%|██████████| 73332/73332 [00:08<00:00, 9003.69 examples/s] 
Adding EOS to train dataset: 100%|██████████| 73332/73332 [00:08<00:00, 8801.17 examples/s] 
Tokenizing train dataset: 100%|██████████| 73332/73332 [00:27<00:00, 2678.52 examples/s]
Truncating train dataset: 100%|██████████| 73332/73332 [00:00<00:00, 277533.43 examples/s]
--- Logging error ---
Traceback (most recent call last):
  File "/root/miniconda3/envs/cs769_env/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/root/miniconda3/envs/cs769_env/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 95] Operation not supported
Call stack:
  File "/root/miniconda3/envs/cs769_env/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/root/miniconda3/envs/cs769_env/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/root/mini

Step,Training Loss,Validation Loss
250,0.9292,0.85286
500,0.6736,0.839264
750,0.6621,0.828662
1000,0.6484,0.819119
1250,0.6427,0.815195
1500,0.6339,0.811214
1750,0.6244,0.807913
2000,0.623,0.806324
2250,0.6194,0.800325
2500,0.5683,0.815402


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
--- Logging error ---
Traceback (most recent call last):
  File "/root/miniconda3/envs/cs769_env/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/root/miniconda3/envs/cs769_env/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 95] Operation not supported
Call stack:
  File "/root/miniconda3/envs/cs769_env/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/root/miniconda3/envs/cs769_env/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/root/miniconda3/envs/cs769_env/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_n

In [10]:
if __name__ == '__main__':

    # parser = argparse.ArgumentParser()
    # parser.add_argument(
    #     "--difficulty",
    #     choices=["easy","medium","hard"],
    #     required=True,
    #     help="Which subset (easy/medium/hard) to fine-tune"
    # )
    # args = parser.parse_args()
    # difficulty = args.difficulty
    difficulty = 'hard'

    # hack to handle data errors while using map function
    if difficulty == 'easy':
        easy_data = Dataset.from_pandas(pd.DataFrame(load_from_disk('./json_to_hf/subset1')))
        train_dataset = easy_data.map(format_chat_template)
    elif difficulty == 'medium':
        medium_data = Dataset.from_pandas(pd.DataFrame(load_from_disk('./json_to_hf/subset2')))
        train_dataset = medium_data.map(format_chat_template)
    elif difficulty == 'hard':
        hard_data = Dataset.from_pandas(pd.DataFrame(load_from_disk('./json_to_hf/subset3')))
        train_dataset = hard_data.map(format_chat_template)

    val_data = load_dataset(dataset_name, split='validation', trust_remote_code=True)
    val_dataset = val_data.map(format_chat_template)

    # Use existing global bnb_config defined earlier
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        quantization_config=bnb_config,
        device_map="auto",
        token=HF_TOKEN
    )
    base_model = prepare_model_for_kbit_training(base_model)
    modules = find_all_linear_names(base_model)
    peft_config, peft_model = create_lora_model(base_model, difficulty, modules)

    train_adapter(
            model_dir="LoRA‐MedQA",
            run_name=f'Lora_{difficulty}',
            bnb_config=bnb_config,
            difficulty_level=difficulty,
            train_data=train_dataset,
            val_data=val_dataset,
            peft_model=peft_model,
            peft_config=peft_config
        )
    torch.cuda.empty_cache()

--- Logging error ---
Traceback (most recent call last):
  File "/root/miniconda3/envs/cs769_env/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/root/miniconda3/envs/cs769_env/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 95] Operation not supported
Call stack:
  File "/root/miniconda3/envs/cs769_env/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/root/miniconda3/envs/cs769_env/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/root/miniconda3/envs/cs769_env/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/root/miniconda3/envs/cs769_env/lib/python3.10/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/root/miniconda3/envs/cs769_env/lib/python3.10/site-packages/ipykernel/kernelapp.py", line

Parameters for the hard LoRA model: 
trainable params: 48,627,712 || all params: 3,261,377,536 || trainable%: 1.4910


Converting train dataset to ChatML: 100%|██████████| 70690/70690 [00:07<00:00, 9094.37 examples/s] 
Adding EOS to train dataset: 100%|██████████| 70690/70690 [00:06<00:00, 10281.18 examples/s]
Tokenizing train dataset: 100%|██████████| 70690/70690 [00:28<00:00, 2520.24 examples/s]
Truncating train dataset: 100%|██████████| 70690/70690 [00:00<00:00, 287558.60 examples/s]
--- Logging error ---
Traceback (most recent call last):
  File "/root/miniconda3/envs/cs769_env/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/root/miniconda3/envs/cs769_env/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 95] Operation not supported
Call stack:
  File "/root/miniconda3/envs/cs769_env/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/root/miniconda3/envs/cs769_env/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/root/mini

Step,Training Loss,Validation Loss
250,0.8987,0.970238
500,0.689,0.930709
750,0.6773,0.932519
1000,0.6663,0.92575
1250,0.6547,0.921676
1500,0.6435,0.928431
1750,0.6389,0.915237
2000,0.624,0.907603
2250,0.6153,0.930552
2500,0.5324,0.923371


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
--- Logging error ---
Traceback (most recent call last):
  File "/root/miniconda3/envs/cs769_env/lib/python3.10/logging/__init__.py", line 1104, in emit
    self.flush()
  File "/root/miniconda3/envs/cs769_env/lib/python3.10/logging/__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 95] Operation not supported
Call stack:
  File "/root/miniconda3/envs/cs769_env/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/root/miniconda3/envs/cs769_env/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/root/miniconda3/envs/cs769_env/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/root/m