# HyLoRADA: Hybrid Low-Rank Adaptation with Position-Adaptive Landmarks

In [None]:
# Setup
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Clone repo (Kaggle)
import os
if os.path.exists('hylorada'):
    %cd hylorada
    !git pull
    print("⚠️ Repo updated! Please RESTART KERNEL (Runtime > Restart Session) to reload modules")
else:
    !git clone https://github.com/SadiaTabassum1216/hylorada.git
    %cd hylorada

In [None]:
# Install dependencies
!pip install -q transformers datasets accelerate tqdm

In [None]:
# Check GPU
import torch
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

## 1. Quick Demo - HyLoRADA Components

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from hylorada import HyLoRADAConfig, HyLoRADAModel, LandmarkLoRA

# Test Position-Adaptive LandmarkLoRA
lm = LandmarkLoRA(
    hidden_size=768, 
    num_landmarks=8,
    max_positions=32768,
    num_buckets=32
)
x = torch.randn(1, 64, 768)
y = lm(x)
print(f"Position-Adaptive LandmarkLoRA: {x.shape} -> {y.shape}")
print(f"Landmark params: {sum(p.numel() for p in lm.parameters()):,}")
print(f"Architecture: {lm}")

In [None]:
# Load base model (GPT-2)
model_name = "openai-community/gpt2"
base_model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# HyLoRADA Config (Position-Adaptive Landmarks Enabled)
config = HyLoRADAConfig(
    lora_rank=8,
    lora_alpha=16.0,
    use_dora_magnitude=True,      # DoRA magnitude decomposition
    landmark_enabled=True,         # Position-Adaptive Landmarks (enabled by default)
    num_landmarks=8,
    num_position_buckets=32,       # Position bucketing for landmarks
    position_bias_enabled=True,
    # Long Context Features (Optional)
    s2_attn_enabled=False,         # Set True for S²-Attn (shifted sparse)
    s2_group_size=2048,
    rope_scaling_type=None,        # Set "linear" for RoPE scaling
)

print("Components:", config.get_component_status())

In [None]:
# Apply HyLoRADA
model = HyLoRADAModel(base_model, config)
model.print_trainable_params()

## 2. Benchmark Section

In [None]:
# 1. Quick Benchmark (GPT-2, 1024 context)
# Tests LoRA vs HyLoRADA with Position-Adaptive Landmarks
!python run_benchmark.py \
    --model openai-community/gpt2 \
    --methods lora hylorada \
    --epochs 1 \
    --num_train 200 \
    --max_length 1024

In [None]:
# 2. HyLoRADA Long Context Run (GPT-2, 4096 context)
# This is the main experiment for your thesis/project

!python run_benchmark.py \
    --dataset longbench \
    --model openai-community/gpt2 \
    --max_length 4096 \
    --methods hylorada \
    --s2_attn \
    --train_embeddings \
    --train_norms \
    --rope_scaling_type linear \
    --rope_scaling_factor 4.0 \
    --epochs 1

In [None]:
# 3. Full Comparison (All Baselines)
# Runs Baseline, LoRA, LoRaDA, LongLoRA, Sparse, and HyLoRADA
# WARNING: This may take longer to run

!python run_benchmark.py \
    --dataset longbench \
    --model openai-community/gpt2 \
    --max_length 4096 \
    --methods baseline lora lorada longlora sparse hylorada \
    --s2_attn \
    --train_embeddings \
    --train_norms \
    --rope_scaling_type linear \
    --rope_scaling_factor 4.0 \
    --epochs 1