# Upgrades or packages

In [1]:
!pip install -U datasets



In [2]:
!pip install bitsandbytes



In [3]:
!pip install transformers peft accelerate



In [None]:
from huggingface_hub import login

login(...)

In [5]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"


# Utils for computing *metrics*


In [6]:
import json

def process_prediction(prediction, em=False):
    # clean prediction
    prediction = prediction.split("\n")[0]
    if em:
      return prediction.strip().lower()
    prediction_tokens = set([w.strip(",.") for w in prediction.strip().lower().split()])
    return prediction_tokens

def compute_exact_match(prediction: str, ground_truth: str) -> float:
    """
    Computes the exact match score between a prediction and a ground truth answer.

    Args:
        prediction (str): The predicted answer from the LLM.
        ground_truth (str): The ground truth answer to compare against.

    Returns:
        float: 1.0 if the prediction matches the ground truth exactly (case-insensitive and whitespace-stripped), otherwise 0.0.
    """
    assert ground_truth is not None, "Ground truth cannot be None"
    processed_prediction = process_prediction(prediction, em=True)
    processed_ground_truth = ground_truth.strip().lower()

    return 1.0 if processed_prediction == processed_ground_truth else 0.0


# write a fn that computes the f1 score for an llm answer and a ground truth answer
def compute_f1_score(prediction: str, ground_truth: str) -> float:
    """
    Computes the F1 score between a prediction and a ground truth answer.

    Args:
        prediction (str): The predicted answer from the LLM.
        ground_truth (str): The ground truth answer to compare against.

    Returns:
        float: The F1 score calculated based on precision and recall.
    """
    assert ground_truth is not None, "Ground truth cannot be None"
    if ground_truth.strip() == "":
        return 0.0

    prediction_tokens = process_prediction(prediction)
    ground_truth_tokens = set(ground_truth.strip().lower().split())


    true_positives = len(prediction_tokens.intersection(ground_truth_tokens))
    precision = true_positives / len(prediction_tokens) if prediction_tokens else 0.0
    recall = true_positives / len(ground_truth_tokens) if ground_truth_tokens else 0.0

    if precision + recall == 0:
        return 0.0  # Avoid division by zero

    f1_score = 2 * (precision * recall) / (precision + recall)
    return f1_score

def load_json_file_answers_compute_scores(file_path):
  predicted_answers_json = json.load(open(file_path))
  predicted = []
  gt = []
  # iterate over the json file and extract the predicted and ground truth answers
  for item in predicted_answers_json:
      predicted.append(item["predicted_answer"])
      gt.append(item["true_answer"])

  exact_match_scores = [compute_exact_match(pred, gt_item) for pred, gt_item in zip(predicted, gt)]
  f1_scores = [compute_f1_score(pred, gt_item) for pred, gt_item in zip(predicted, gt)]

  # compute the average exact match score and f1 score
  average_exact_match = sum(exact_match_scores) / len(exact_match_scores) if exact_match_scores else 0.0
  average_f1_score = sum(f1_scores) / len(f1_scores) if f1_scores else 0.0
  return average_exact_match, average_f1_score

# Different rank experiments

In [7]:
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling,
    BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import torch
from tqdm import tqdm
import json

In [8]:
# CONFIGS
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

MODEL_ID = "meta-llama/Llama-3.2-3B"
MAX_LENGTH = 512

LORA_DROPOUT = 0.05
DATASET_PATH = "OnnieNLP/InformationExtractionQA"

PARAMS = [
    {"r": 1, "lora_alpha": 4},
    {"r": 2, "lora_alpha": 8},
    {"r": 4, "lora_alpha": 16},
    {"r": 8, "lora_alpha": 32},
    {"r": 16, "lora_alpha": 64},
    {"r": 32, "lora_alpha": 128},
]

Using device: cuda


In [9]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [10]:
# Format dataset
def format_example(example):
    prompt = (
        "You are a helpful assistant that answers questions based on provided context. Keep answers short.\n\n"
        f"Context: {example['text']}\n\nQuestion: {example['question']}\n\nAnswer: {example['answer']}"
    )
    return {"text": prompt}

# Format function
def format_prompt_test(example):
    return (
        "You are a helpful assistant that answers questions based on provided context. Keep answers short.\n\n"
        f"Context: {example['text']}\n\nQuestion: {example['question']}\n\nAnswer:"
    )
def preprocess(example, tokenizer):
    prompt = f"Context: {example['text']}\nQuestion: {example['question']}\nAnswer:"
    full_text = prompt + " " + example["answer"]

    # Tokenize full text
    tokenized = tokenizer(full_text, truncation=True, padding="max_length", max_length=MAX_LENGTH)

    # Mask out the prompt portion in the labels so loss is only on the answer
    input_ids = tokenized["input_ids"]
    attention_mask = tokenized["attention_mask"]

    # Find index where answer starts
    answer_start = len(tokenizer(prompt)["input_ids"])
    labels = [-100] * answer_start + input_ids[answer_start:]

    tokenized["labels"] = labels
    return tokenized


In [11]:
def train_lora_model(model_id,
                     dataset_path,
                     params,
                     lora_dropout,
                     epochs,
                     lr=1e-4,
                     target_modules=None, name_adapter=None):
  if target_modules is None:
    target_modules = ["q_proj", "v_proj"]

  lora_rank = params["r"]
  lora_alpha = params["lora_alpha"]

  # Load our small dataset
  dataset = load_dataset("OnnieNLP/InformationExtractionQA", split="train")

  # Load tokenizer
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
  tokenizer.pad_token = tokenizer.eos_token

  # Process dataset
  formatted_dataset = dataset.map(format_example)

  # Tokenize dataset
  tokenized_dataset = formatted_dataset.map(
      lambda e: tokenizer(
          e["text"],
          truncation=True,
          max_length=MAX_LENGTH,
          padding=False
      ),
      batched=True,
      batch_size=32,
      remove_columns=formatted_dataset.column_names
  )

  data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)


  # Configure 4-bit quantization for memory efficiency
  bnb_config = BitsAndBytesConfig(
      load_in_4bit=True,
      bnb_4bit_quant_type="nf4",
      bnb_4bit_compute_dtype=torch.bfloat16,
      bnb_4bit_use_double_quant=True
  )

  # Load model with quantization
  model = AutoModelForCausalLM.from_pretrained(
      model_id,
      quantization_config=bnb_config,
      device_map="auto",
      use_cache=False
  )

  # Prepare model for k-bit training
  model = prepare_model_for_kbit_training(model)

  # Configure LoRA
  peft_config = LoraConfig(
      r=lora_rank,                   # Rank
      lora_alpha=lora_alpha,         # Scaling factor
      lora_dropout=lora_dropout,     # Dropout
      target_modules=target_modules,  # Target modules
      task_type="CAUSAL_LM",
      bias="none"            # No bias for LoRA
  )

  # Apply LoRA to model
  model = get_peft_model(model, peft_config)
  print(f"Training with params {params}")
  model.print_trainable_parameters()  # Show trainable parameters count

  if name_adapter is None:
    name_adapter = f"/content/adapter_r{lora_rank}_alpha{lora_alpha}_lr{lr}_epochs{epochs}"

  # Optimized training arguments
  training_args = TrainingArguments(
      output_dir=name_adapter,
      per_device_train_batch_size=2,          # Increased due to LoRA memory efficiency
      gradient_accumulation_steps=4,           # Adjust based on GPU memory
      num_train_epochs=epochs,
      learning_rate=lr,                     # Lower LR for adapter training
      weight_decay=0.01,
      bf16=True,                              # Use bfloat16
      logging_steps=10,
      optim="paged_adamw_8bit",               # Optimizer for 8-bit models
      save_strategy="no",
      report_to="none",
      gradient_checkpointing=True,
  )

  trainer = Trainer(
      model=model,
      args=training_args,
      train_dataset=tokenized_dataset,
      # tokenizer=tokenizer, # depracted
      data_collator=data_collator
  )

  # Start training
  trainer.train()

  # Save adapter weights
  model.save_pretrained(name_adapter)
  return model, tokenizer, name_adapter

In [12]:
def save_test_predictions(model, tokenizer, dataset_path, save_predictions_path):
  # Load test dataset
  test_dataset = load_dataset(dataset_path, split="test")
  model.eval()

  results = []
  for example in tqdm(test_dataset, desc="Generating answers"):
    prompt = format_prompt_test(example)
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=MAX_LENGTH).to(DEVICE)

    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=64,
            # temperature=0.7,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id
        )

    generated = tokenizer.decode(output[0], skip_special_tokens=True)
    predicted_answer = generated.split("Answer:")[-1].strip()

    results.append({
        "question": example["question"],
        "context": example["text"],
        "true_answer": example["answer"],
        "predicted_answer": predicted_answer
    })

  # Save to JSON
  with open(save_predictions_path, "w", encoding="utf-8") as f:
      json.dump(results, f, indent=2, ensure_ascii=False)

  print(f"Saved predictions to {save_predictions_path}")



Without training

In [None]:
# Evaluate the base model (without any LoRA fine-tuning)
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
tokenizer.pad_token = tokenizer.eos_token

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    device_map="auto",
    use_cache=False
)

# Save predictions for the base model
base_model_predictions_path = "/content/drive/MyDrive/base_model_predictions.json"
save_test_predictions(model, tokenizer, DATASET_PATH, base_model_predictions_path)

!cp /content/drive/MyDrive/base_model_predictions.json /content/drive/MyDrive/Colab

# Compute and print scores for the base model
avg_em_base, avg_f1_base = load_json_file_answers_compute_scores(base_model_predictions_path)
print(f"Base Model - Average Exact Match: {avg_em_base:.4f}")
print(f"Base Model - Average F1 Score: {avg_f1_base:.4f}")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Generating answers:   0%|          | 0/200 [00:00<?, ?it/s]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   0%|          | 1/200 [00:05<16:50,  5.08s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   1%|          | 2/200 [00:10<16:28,  4.99s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   2%|▏         | 3/200 [00:14<16:17,  4.96s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   2%|▏         | 4/200 [00:19<16:12,  4.96s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANS

Saved predictions to /content/drive/MyDrive/base_model_predictions.json
Base Model - Average Exact Match: 0.0150
Base Model - Average F1 Score: 0.2988





1. Training with params {'r': 1, 'lora_alpha': 4}

    trainable params: 286,720 || all params: 3,213,036,544 || trainable%: 0.0089
2. Training with params {'r': 2, 'lora_alpha': 8}

    trainable params: 573,440 || all params: 3,213,323,264 || trainable%: 0.0178
3. Training with params {'r': 4, 'lora_alpha': 16}

    trainable params: 1,146,880 || all params: 3,213,896,704 || trainable%: 0.0357

4. Training with params {'r': 8, 'lora_alpha': 32}

    trainable params: 2,293,760 || all params: 3,215,043,584 || trainable%: 0.0713

5. Training with params {'r': 16, 'lora_alpha': 64}

    trainable params: 4,587,520 || all params: 3,217,337,344 || trainable%: 0.1426

6. Training with params {'r': 32, 'lora_alpha': 128}

    trainable params: 9,175,040 || all params: 3,221,924,864 || trainable%: 0.2848

In [None]:
for experiment_params in PARAMS:
  model, tokenizer, name_adapter = train_lora_model(MODEL_ID, DATASET_PATH, experiment_params, LORA_DROPOUT, epochs=3)
  save_predictions_path = f"/content/predictions_r{experiment_params['r']}_alpha{experiment_params['lora_alpha']}.json"
  save_test_predictions(model, tokenizer, DATASET_PATH, save_predictions_path)
  # exact_match, f1_score = load_json_file_answers_compute_scores(save_predictions_path)
  # print(f"Exact Match: {exact_match}, F1 Score: {f1_score}")
  # copy in drive
  !cp {save_predictions_path} /content/drive/MyDrive/Colab
  !cp -r {name_adapter} /content/drive/MyDrive/Colab


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Training with params {'r': 2, 'lora_alpha': 8}
trainable params: 573,440 || all params: 3,213,323,264 || trainable%: 0.0178


Step,Training Loss
10,2.3816
20,2.2937
30,1.8892
40,1.6587
50,1.3357
60,1.2476
70,1.1477
80,1.1014
90,1.1049
100,1.0709


Generating answers:   0%|          | 0/200 [00:00<?, ?it/s]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   0%|          | 1/200 [00:05<17:19,  5.22s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   1%|          | 2/200 [00:10<17:22,  5.27s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   2%|▏         | 3/200 [00:15<17:12,  5.24s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   2%|▏         | 4/200 [00:20<17:05,  5.23s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANS

Saved predictions to /content/predictions_r2_alpha8.json





Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Training with params {'r': 4, 'lora_alpha': 16}
trainable params: 1,146,880 || all params: 3,213,896,704 || trainable%: 0.0357


Step,Training Loss
10,2.3314
20,2.0798
30,1.5365
40,1.3511
50,1.1023
60,1.1247
70,1.0972
80,1.0529
90,1.0521
100,1.0198


Generating answers:   0%|          | 0/200 [00:00<?, ?it/s]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   0%|          | 1/200 [00:05<17:31,  5.28s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   1%|          | 2/200 [00:10<17:18,  5.25s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   2%|▏         | 3/200 [00:15<17:12,  5.24s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   2%|▏         | 4/200 [00:20<17:05,  5.23s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANS

Saved predictions to /content/predictions_r4_alpha16.json





Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Training with params {'r': 8, 'lora_alpha': 32}
trainable params: 2,293,760 || all params: 3,215,043,584 || trainable%: 0.0713


Step,Training Loss
10,2.2566
20,1.8076
30,1.2448
40,1.1414
50,1.0375
60,1.0861
70,1.0663
80,1.0148
90,1.0049
100,0.9766


Generating answers:   0%|          | 0/200 [00:00<?, ?it/s]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   0%|          | 1/200 [00:05<17:23,  5.24s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   1%|          | 2/200 [00:10<17:19,  5.25s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   2%|▏         | 3/200 [00:15<17:07,  5.22s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   2%|▏         | 4/200 [00:20<17:07,  5.24s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANS

Saved predictions to /content/predictions_r8_alpha32.json





Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Training with params {'r': 16, 'lora_alpha': 64}
trainable params: 4,587,520 || all params: 3,217,337,344 || trainable%: 0.1426


Step,Training Loss
10,2.1137
20,1.469
30,1.0576
40,1.0952
50,0.9988
60,1.0543
70,1.0391
80,0.9741
90,0.9562
100,0.9293


Generating answers:   0%|          | 0/200 [00:00<?, ?it/s]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   0%|          | 1/200 [00:05<17:26,  5.26s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   1%|          | 2/200 [00:10<17:21,  5.26s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   2%|▏         | 3/200 [00:15<17:09,  5.23s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   2%|▏         | 4/200 [00:20<17:09,  5.25s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANS

Saved predictions to /content/predictions_r16_alpha64.json





Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Training with params {'r': 32, 'lora_alpha': 128}
trainable params: 9,175,040 || all params: 3,221,924,864 || trainable%: 0.2848


Step,Training Loss
10,1.9288
20,1.241
30,1.0117
40,1.0521
50,0.974
60,1.0286
70,1.0139
80,0.9342
90,0.9044
100,0.878


Generating answers:   0%|          | 0/200 [00:00<?, ?it/s]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   0%|          | 1/200 [00:05<17:58,  5.42s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   1%|          | 2/200 [00:10<17:43,  5.37s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   2%|▏         | 3/200 [00:16<17:37,  5.37s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   2%|▏         | 4/200 [00:21<17:31,  5.37s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANS

Saved predictions to /content/predictions_r32_alpha128.json





In [None]:
predictions_paths = [f"predictions_r{i}_alpha{4*i}.json"  for i in [1, 2, 4, 8, 16, 32]] + ["base_model_predictions.json"]
for path in predictions_paths:
  exact_match, f1_score = load_json_file_answers_compute_scores(os.path.join("/content/drive/MyDrive/Colab/", path))
  print(f"File {path} Exact Match: {exact_match}, F1 Score: {f1_score}")

File predictions_r1_alpha4.json Exact Match: 0.395, F1 Score: 0.5137279474911057
File predictions_r2_alpha8.json Exact Match: 0.41, F1 Score: 0.5530620023251607
File predictions_r4_alpha16.json Exact Match: 0.325, F1 Score: 0.45533894760210564
File predictions_r8_alpha32.json Exact Match: 0.23, F1 Score: 0.4185314962814962
File predictions_r16_alpha64.json Exact Match: 0.34, F1 Score: 0.5399734154734156
File predictions_r32_alpha128.json Exact Match: 0.145, F1 Score: 0.427997224997225
File base_model_predictions.json Exact Match: 0.1, F1 Score: 0.2988486899167946


# Different target experiments

In [13]:
PARAMS_TARGET_MODULES = [
    {"target_modules": ["q_proj"], "r": 2, "lora_alpha": 8},
    {"target_modules": ["q_proj", "v_proj"], "r": 2, "lora_alpha": 8},
    {"target_modules": ["q_proj", "k_proj", "v_proj", "o_proj"], "r": 2, "lora_alpha": 8},
]

In [None]:
for target_modules_experiment_params in PARAMS_TARGET_MODULES:
  print(f"Training with params {target_modules_experiment_params}")
  name_save = "_".join([module.split("_")[0] for module in target_modules_experiment_params["target_modules"]]) + f"_r{target_modules_experiment_params['r']}_alpha{target_modules_experiment_params['lora_alpha']}"
  print(f"Name save: {name_save}")
  model, tokenizer, name_adapter = train_lora_model(MODEL_ID, DATASET_PATH, target_modules_experiment_params, LORA_DROPOUT, epochs=3, lr=3e-4,
                                                    target_modules=target_modules_experiment_params["target_modules"],
                                                    name_adapter=f"/content/{name_save}")
  save_predictions_path = f"/content/predictions_target_modules_{name_save}.json"
  save_test_predictions(model, tokenizer, DATASET_PATH, save_predictions_path)
  # copy in drive
  !cp {save_predictions_path} /content/drive/MyDrive/Colab
  !cp -r {name_adapter} /content/drive/MyDrive/Colab

Training with params {'target_modules': ['q_proj'], 'r': 2, 'lora_alpha': 8}
Name save: q_r2_alpha8


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Training with params {'target_modules': ['q_proj'], 'r': 2, 'lora_alpha': 8}
trainable params: 344,064 || all params: 3,213,093,888 || trainable%: 0.0107


Step,Training Loss
10,2.3523
20,2.072
30,1.5126
40,1.4049
50,1.1858
60,1.1755
70,1.146
80,1.1088
90,1.1155
100,1.084


Generating answers:   0%|          | 0/200 [00:00<?, ?it/s]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   0%|          | 1/200 [00:04<15:57,  4.81s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   1%|          | 2/200 [00:09<15:59,  4.85s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   2%|▏         | 3/200 [00:14<15:44,  4.80s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   2%|▏         | 4/200 [00:19<15:36,  4.78s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANS

Saved predictions to /content/predictions_target_modules_q_r2_alpha8.json





Training with params {'target_modules': ['q_proj', 'v_proj'], 'r': 2, 'lora_alpha': 8}
Name save: q_v_r2_alpha8


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Training with params {'target_modules': ['q_proj', 'v_proj'], 'r': 2, 'lora_alpha': 8}
trainable params: 573,440 || all params: 3,213,323,264 || trainable%: 0.0178


Step,Training Loss
10,2.1931
20,1.53
30,1.0847
40,1.1118
50,1.0064
60,1.0609
70,1.0466
80,0.9858
90,0.9755
100,0.9494


Generating answers:   0%|          | 0/200 [00:00<?, ?it/s]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   0%|          | 1/200 [00:05<17:28,  5.27s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   1%|          | 2/200 [00:10<17:28,  5.30s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   2%|▏         | 3/200 [00:15<17:14,  5.25s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   2%|▏         | 4/200 [00:21<17:12,  5.27s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANS

Saved predictions to /content/predictions_target_modules_q_v_r2_alpha8.json





Training with params {'target_modules': ['q_proj', 'k_proj', 'v_proj', 'o_proj'], 'r': 2, 'lora_alpha': 8}
Name save: q_k_v_o_r2_alpha8


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Training with params {'target_modules': ['q_proj', 'k_proj', 'v_proj', 'o_proj'], 'r': 2, 'lora_alpha': 8}
trainable params: 1,146,880 || all params: 3,213,896,704 || trainable%: 0.0357


Step,Training Loss
10,2.02
20,1.2572
30,1.0187
40,1.0532
50,0.9761
60,1.0185
70,1.0126
80,0.9404
90,0.913
100,0.8907


Generating answers:   0%|          | 0/200 [00:00<?, ?it/s]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   0%|          | 1/200 [00:06<20:31,  6.19s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   1%|          | 2/200 [00:12<20:13,  6.13s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   2%|▏         | 3/200 [00:18<20:05,  6.12s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   2%|▏         | 4/200 [00:24<19:52,  6.08s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANS

Saved predictions to /content/predictions_target_modules_q_k_v_o_r2_alpha8.json





In [None]:
from transformers import AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained(MODEL_ID)

# Example: Check shape of q_proj in the first transformer layer
layer = model.model.layers[0]  # or model.base_model.model.model.layers[0] depending on the wrapper

print("q_proj weight shape:", layer.self_attn.q_proj.weight.shape)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

q_proj weight shape: torch.Size([3072, 3072])


In [None]:
for path in [f"predictions_target_modules_{name_save}_r2_alpha8.json" for name_save in ["q", "q_v", "q_k_v_o"]]:
  exact_match, f1_score = load_json_file_answers_compute_scores(os.path.join("/content/", path))
  print(f"File {path} Exact Match: {exact_match}, F1 Score: {f1_score}")

File predictions_target_modules_q_r2_alpha8.json Exact Match: 0.165, F1 Score: 0.3052115107115107
File predictions_target_modules_q_v_r2_alpha8.json Exact Match: 0.165, F1 Score: 0.5110762293262295
File predictions_target_modules_q_k_v_o_r2_alpha8.json Exact Match: 0.19, F1 Score: 0.4853262293262295


In [14]:
for target_modules_experiment_params in PARAMS_TARGET_MODULES:
  print(f"Training with params {target_modules_experiment_params}")
  name_save = "_".join([module.split("_")[0] for module in target_modules_experiment_params["target_modules"]]) + f"_r{target_modules_experiment_params['r']}_alpha{target_modules_experiment_params['lora_alpha']}"
  print(f"Name save: {name_save}")
  model, tokenizer, name_adapter = train_lora_model(MODEL_ID, DATASET_PATH, target_modules_experiment_params, LORA_DROPOUT, epochs=3, lr=1e-4,
                                                    target_modules=target_modules_experiment_params["target_modules"],
                                                    name_adapter=f"/content/3{name_save}")
  save_predictions_path = f"/content/predictions_target_modules3_{name_save}.json"
  save_test_predictions(model, tokenizer, DATASET_PATH, save_predictions_path)
  # copy in drive
  !cp {save_predictions_path} /content/drive/MyDrive/Colab
  !cp -r {name_adapter} /content/drive/MyDrive/Colab

Training with params {'target_modules': ['q_proj'], 'r': 2, 'lora_alpha': 8}
Name save: q_r2_alpha8


Map:   0%|          | 0/600 [00:00<?, ? examples/s]

Map:   0%|          | 0/600 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/844 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Training with params {'target_modules': ['q_proj'], 'r': 2, 'lora_alpha': 8}
trainable params: 344,064 || all params: 3,213,093,888 || trainable%: 0.0107


Step,Training Loss
10,2.4145
20,2.4574
30,2.2175
40,2.1382
50,1.8079
60,1.667
70,1.5581
80,1.4837
90,1.4446
100,1.3702


Generating answers:   0%|          | 0/200 [00:00<?, ?it/s]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   0%|          | 1/200 [00:04<15:47,  4.76s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   1%|          | 2/200 [00:09<15:24,  4.67s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   2%|▏         | 3/200 [00:13<15:16,  4.65s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   2%|▏         | 4/200 [00:18<15:12,  4.66s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANS

Saved predictions to /content/predictions_target_modules3_q_r2_alpha8.json
Training with params {'target_modules': ['q_proj', 'v_proj'], 'r': 2, 'lora_alpha': 8}
Name save: q_v_r2_alpha8


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Training with params {'target_modules': ['q_proj', 'v_proj'], 'r': 2, 'lora_alpha': 8}
trainable params: 573,440 || all params: 3,213,323,264 || trainable%: 0.0178


Step,Training Loss
10,2.3758
20,2.2692
30,1.8481
40,1.6229
50,1.3107
60,1.2159
70,1.1447
80,1.1019
90,1.1061
100,1.067


Generating answers:   0%|          | 0/200 [00:00<?, ?it/s]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   0%|          | 1/200 [00:05<16:49,  5.07s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   1%|          | 2/200 [00:10<16:40,  5.05s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   2%|▏         | 3/200 [00:15<16:33,  5.04s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   2%|▏         | 4/200 [00:20<16:28,  5.05s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANS

Saved predictions to /content/predictions_target_modules3_q_v_r2_alpha8.json





Training with params {'target_modules': ['q_proj', 'k_proj', 'v_proj', 'o_proj'], 'r': 2, 'lora_alpha': 8}
Name save: q_k_v_o_r2_alpha8


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Training with params {'target_modules': ['q_proj', 'k_proj', 'v_proj', 'o_proj'], 'r': 2, 'lora_alpha': 8}
trainable params: 1,146,880 || all params: 3,213,896,704 || trainable%: 0.0357


Step,Training Loss
10,2.3264
20,2.0326
30,1.4602
40,1.2557
50,1.0705
60,1.1097
70,1.0905
80,1.0417
90,1.0373
100,1.0047


Generating answers:   0%|          | 0/200 [00:00<?, ?it/s]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   0%|          | 1/200 [00:05<19:38,  5.92s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   1%|          | 2/200 [00:11<19:24,  5.88s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   2%|▏         | 3/200 [00:17<19:14,  5.86s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Generating answers:   2%|▏         | 4/200 [00:23<19:11,  5.88s/it]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANS

Saved predictions to /content/predictions_target_modules3_q_k_v_o_r2_alpha8.json





In [21]:
for path in [f"predictions_target_modules2_{name_save}_r2_alpha8.json" for name_save in ["q", "q_v", "q_k_v_o"]]:
  exact_match, f1_score = load_json_file_answers_compute_scores(os.path.join("/content/drive/MyDrive/Colab/", path))
  print(f"File {path} Exact Match: {exact_match}, F1 Score: {f1_score}")

File predictions_target_modules2_q_r2_alpha8.json Exact Match: 0.11, F1 Score: 0.4849588277219857
File predictions_target_modules2_q_v_r2_alpha8.json Exact Match: 0.345, F1 Score: 0.532584479847638
File predictions_target_modules2_q_k_v_o_r2_alpha8.json Exact Match: 0.415, F1 Score: 0.5420855898487479
