# Final Submission Version

In [None]:
# Imports and Setup
import os
import sys
from argparse import Namespace
import torch

import train
import utils
import config

print(f"Torch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"Device name: {torch.cuda.get_device_name(0)}")
print(f"Using device: {config.DEVICE}")

## 1. Comparison of different adaptors

### 1.1. LoRA

In [None]:
from argparse import Namespace
import os

args_lora = Namespace(
    output_dir="results_lora_qv_r8_a16_lr2e-4",
    seed=42,
    peft_method="lora",
    target_modules=["query", "value"],
    lora_r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    learning_rate=2e-4,
    num_train_epochs=3,
    train_batch_size=128,
    eval_batch_size=128,
    optimizer="adamw_torch"
)

os.makedirs(args_lora.output_dir, exist_ok=True)

print("Parameters:")
for k, v in vars(args_lora).items():
    print(f"  {k}: {v}")

print("=== START TRAINING ===")
final_accuracy = train.main_train(args_lora)
print("=== TRAINING FINISHED ===")


### 1.2. LoHa

In [None]:
args_loha = Namespace(
    output_dir="results_loha_qkv_r4_a8_lr2e-4",
    seed=42,
    peft_method="loha",
    target_modules=["query", "value", "key"],
    lora_r=4,
    lora_alpha=8,
    lora_dropout=0.1,
    learning_rate=2e-4,
    num_train_epochs=3,
    train_batch_size=128,
    eval_batch_size=128,
    optimizer="adamw_torch"
)

os.makedirs(args_loha.output_dir, exist_ok=True)

print("Parameters:")
for k, v in vars(args_loha).items():
    print(f"  {k}: {v}")

print("=== START TRAINING ===")
final_accuracy = train.main_train(args_loha)
print("=== TRAINING FINISHED ===")


### 1.3. LoKr

In [None]:
args_lokr = Namespace(
    output_dir="results_lokr_qkv_r8_a24_lr2e-4",
    seed=42,
    peft_method="lokr",
    target_modules=["query", "key", "value"],
    lora_r=8,
    lora_alpha=24,
    lora_dropout=0.1,
    learning_rate=2e-4,
    num_train_epochs=3,
    train_batch_size=128,
    eval_batch_size=128,
    optimizer="adamw_torch"
)

os.makedirs(args_lokr.output_dir, exist_ok=True)

print("Parameters:")
for k, v in vars(args_lokr).items():
    print(f"  {k}: {v}")

print("=== START TRAINING ===")
final_accuracy = train.main_train(args_lokr)
print("=== TRAINING FINISHED ===")


### 1.4. AdaLoRA

In [None]:
args_adalora = Namespace(
    output_dir="results_adalora_qvd_r4-6_a2_lr2e-4",
    seed=42,
    peft_method="adalora",
    target_modules=["query", "value", "attention.output.dense"],
    lora_r=4,
    lora_alpha=2,
    lora_dropout=0.1,
    adalora_init_r=6,
    adalora_tinit=0,
    adalora_tfinal=0,
    adalora_deltaT=1,
    adalora_beta1=0.85,
    adalora_beta2=0.85,
    learning_rate=2e-4,
    num_train_epochs=1,
    train_batch_size=128,
    eval_batch_size=128,
    optimizer="adamw_torch"
)

os.makedirs(args_adalora.output_dir, exist_ok=True)

print("Parameters:")
for k, v in vars(args_adalora).items():
    print(f"  {k}: {v}")

# launch training
print("=== START TRAINING ===")
final_accuracy = train.main_train(args_adalora)
print("=== TRAINING FINISHED ===")

## 2. Impact of Rank and Alpha

### 2.1. 

In [None]:
for alpha in [4, 8, 16, 32]:
    args = Namespace(
        output_dir=f"results_lora_qv_r8_a{alpha}_lr2e-4",
        seed=42,
        peft_method="lora",
        target_modules=["query", "value"],
        lora_r=8,
        lora_alpha=alpha,
        lora_dropout=0.1,
        learning_rate=2e-4,
        num_train_epochs=3,
        train_batch_size=128,
        eval_batch_size=128,
        optimizer="adamw_torch"
    )
    os.makedirs(args.output_dir, exist_ok=True)
    print("Parameters:", vars(args))
    print(f"=== START TRAINING r=8 alpha={alpha} ===")
    final_accuracy = train.main_train(args)
    print(f"=== TRAINING FINISHED r=8 alpha={alpha} ===")

### 2.2. 

In [None]:
for r in [2, 4, 8]:
    args = Namespace(
        output_dir=f"results_lora_qv_r{r}_a16_lr2e-4",
        seed=42,
        peft_method="lora",
        target_modules=["query", "value"],
        lora_r=r,
        lora_alpha=16,
        lora_dropout=0.1,
        learning_rate=2e-4,
        num_train_epochs=3,
        train_batch_size=128,
        eval_batch_size=128,
        optimizer="adamw_torch"
    )
    os.makedirs(args.output_dir, exist_ok=True)
    print("Parameters:", vars(args))
    print(f"=== START TRAINING r={r} alpha=16 ===")
    final_accuracy = train.main_train(args)
    print(f"=== TRAINING FINISHED r={r} alpha=16 ===")