# Merge multiple GRPO LoRA adapters (linear + TIES) and push to Hugging Face

This notebook:
- mounts Google Drive and saves merged adapters under `MyDrive/HSE GRPO/merges/`
- creates **two** merged LoRA adapters from the given LoRA repos:
  1) **Linear** weighted merge
  2) **TIES** (conflict-reduced) merge
- pushes both merged adapters to Hugging Face (two repos)


In [2]:
# === (1) Mount Google Drive ===
from google.colab import drive
drive.mount('/content/drive')

import os
MERGE_ROOT = "/content/drive/MyDrive/HSE GRPO/merges"
os.makedirs(MERGE_ROOT, exist_ok=True)
print("MERGE_ROOT =", MERGE_ROOT)


Mounted at /content/drive
MERGE_ROOT = /content/drive/MyDrive/HSE GRPO/merges


In [1]:
%%capture
import os
!pip install --upgrade -qqq uv
if "COLAB_" not in "".join(os.environ.keys()):
    # If you're not in Colab, just use pip install!
    !pip install unsloth vllm
else:
    try: import numpy, PIL; get_numpy = f"numpy=={numpy.__version__}"; get_pil = f"pillow=={PIL.__version__}"
    except: get_numpy = "numpy"; get_pil = "pillow"
    try: import subprocess; is_t4 = "Tesla T4" in str(subprocess.check_output(["nvidia-smi"]))
    except: is_t4 = False
    get_vllm, get_triton = ("vllm==0.9.2", "triton==3.2.0") if is_t4 else ("vllm==0.10.2", "triton")
    !uv pip install -qqq --upgrade \
        unsloth {get_vllm} {get_numpy} {get_pil} torchvision bitsandbytes xformers
    !uv pip install -qqq {get_triton}
!uv pip install transformers==4.56.2
!uv pip install --no-deps trl==0.22.2
!pip install -q datasets huggingface_hub tqdm pandas
!pip install -q math_verify latex2sympy2_extended

!rm -rf test-suite-sql-eval
!git clone -q https://github.com/taoyds/test-suite-sql-eval


In [3]:
from unsloth import FastLanguageModel, PatchFastRL
# Execute the Patch
PatchFastRL("GRPO", FastLanguageModel)

ðŸ¦¥ Unsloth: Will patch your computer to enable 2x faster free finetuning.
INFO 02-08 21:45:43 [__init__.py:216] Automatically detected platform cuda.
ðŸ¦¥ Unsloth Zoo will now patch everything to make training faster!
Unsloth: UnslothAlignPropTrainer is already patched.
Unsloth: UnslothBCOTrainer is already patched.
Unsloth: UnslothCPOTrainer is already patched.
Unsloth: UnslothDDPOTrainer is already patched.
Unsloth: UnslothDPOTrainer is already patched.
Unsloth: UnslothGKDTrainer is already patched.
Unsloth: UnslothGRPOTrainer is already patched.
Unsloth: UnslothIterativeSFTTrainer is already patched.
Unsloth: UnslothKTOTrainer is already patched.
Unsloth: UnslothNashMDTrainer is already patched.
Unsloth: UnslothOnlineDPOTrainer is already patched.
Unsloth: UnslothORPOTrainer is already patched.
Unsloth: UnslothPPOTrainer is already patched.
Unsloth: UnslothPRMTrainer is already patched.
Unsloth: UnslothRewardTrainer is already patched.
Unsloth: UnslothRLOOTrainer is already patche

## Configure: adapters + HF repos + merge hyperparams

- `ADAPTERS` â€” trained LoRA repos (3 repos)
- `REPO_LINEAR`, `REPO_TIES` â€” where to push the merged adapters (two separate HF model repos)
- `W_LINEAR`, `W_TIES` â€” weights for math/sql/code in each merge
- `DENSITY` â€” TIES density (lower = more aggressive pruning)


In [4]:
# === (3) Config ===

ADAPTERS = {
    "math": "uavleeva/grpo_math_run_level3_all_rewards_001",
    "sql":  "uavleeva/grpo_sql_run_005",
    "code": "uavleeva/grpo_code_run_002",
}

# Where to push results (edit if you want different names)
REPO_LINEAR = "uavleeva/grpo_merged_math_sql_code_linear_001"
REPO_TIES   = "uavleeva/grpo_merged_math_sql_code_ties_001"
PRIVATE_REPOS = False

# Merge weights
W_LINEAR = {"math": 1.0, "sql": 1.0, "code": 1.0}
W_TIES   = {"math": 1.0, "sql": 1.0, "code": 1.0}

# TIES hyperparam (0.1..0.5 typical)
DENSITY = 0.2

# Local output dirs on Drive
OUT_LINEAR = os.path.join(MERGE_ROOT, "merged_linear_lora")
OUT_TIES   = os.path.join(MERGE_ROOT, "merged_ties_lora")

print("OUT_LINEAR:", OUT_LINEAR)
print("OUT_TIES  :", OUT_TIES)


OUT_LINEAR: /content/drive/MyDrive/HSE GRPO/merges/merged_linear_lora
OUT_TIES  : /content/drive/MyDrive/HSE GRPO/merges/merged_ties_lora


## HF token

Set your token in the environment (recommended):

```python
%env HF_TOKEN=hf_...
```

Then run the next cell.


In [6]:
# === (4) HF login ===
import os
from huggingface_hub import login, create_repo, HfApi

HF_TOKEN = os.environ.get("HF_TOKEN")
if not HF_TOKEN:
    raise ValueError("HF_TOKEN not found. Set it first:  %env HF_TOKEN=hf_...")

login(token=HF_TOKEN, add_to_git_credential=False)
print("âœ… Logged in")


âœ… Logged in


## Compatibility checks (hard fail if merge is unsafe)

This ensures:
- same base model
- same LoRA `r`, `lora_alpha`
- same `target_modules`
- same `peft_type`


In [7]:
# === (5) Validate adapter compatibility ===
from peft import PeftConfig

cfg = {name: PeftConfig.from_pretrained(repo) for name, repo in ADAPTERS.items()}

def _norm_targets(x):
    return tuple(sorted(list(x))) if x is not None else None

print("=== Adapter sanity ===")
for name, c in cfg.items():
    print(f"[{name}] base={c.base_model_name_or_path} | r={getattr(c,'r',None)} | alpha={getattr(c,'lora_alpha',None)} | targets={_norm_targets(getattr(c,'target_modules',None))}")

bases = {c.base_model_name_or_path for c in cfg.values()}
rs = {getattr(c, "r", None) for c in cfg.values()}
alphas = {getattr(c, "lora_alpha", None) for c in cfg.values()}
targets = {_norm_targets(getattr(c, "target_modules", None)) for c in cfg.values()}
peft_types = {getattr(c, "peft_type", None) for c in cfg.values()}
modules_to_save = {getattr(c, "modules_to_save", None) for c in cfg.values()}

problems = []
if len(bases) != 1: problems.append(f"base_model mismatch: {bases}")
if len(peft_types) != 1: problems.append(f"peft_type mismatch: {peft_types}")
if len(rs) != 1: problems.append(f"r mismatch: {rs}")
if len(alphas) != 1: problems.append(f"lora_alpha mismatch: {alphas}")
if len(targets) != 1: problems.append(f"target_modules mismatch: {targets}")
if any(x not in (None, [], {}) for x in modules_to_save):
    problems.append(f"modules_to_save is set: {modules_to_save} (may break weighted merge)")

if problems:
    raise ValueError("Adapters are NOT safely mergeable:\n- " + "\n- ".join(problems))

BASE_MODEL = next(iter(bases))
print("\nâœ… Compatible. Using base model:", BASE_MODEL)


adapter_config.json: 0.00B [00:00, ?B/s]

adapter_config.json: 0.00B [00:00, ?B/s]

adapter_config.json: 0.00B [00:00, ?B/s]

=== Adapter sanity ===
[math] base=unsloth/qwen2.5-coder-7b-instruct-bnb-4bit | r=64 | alpha=64 | targets=('down_proj', 'gate_proj', 'k_proj', 'o_proj', 'q_proj', 'up_proj', 'v_proj')
[sql] base=unsloth/qwen2.5-coder-7b-instruct-bnb-4bit | r=64 | alpha=64 | targets=('down_proj', 'gate_proj', 'k_proj', 'o_proj', 'q_proj', 'up_proj', 'v_proj')
[code] base=unsloth/qwen2.5-coder-7b-instruct-bnb-4bit | r=64 | alpha=64 | targets=('down_proj', 'gate_proj', 'k_proj', 'o_proj', 'q_proj', 'up_proj', 'v_proj')

âœ… Compatible. Using base model: unsloth/qwen2.5-coder-7b-instruct-bnb-4bit


## Load base model + load adapters

We load the base model in 4-bit (because your base is `...bnb-4bit`) and attach the three adapters under names:
- `math`, `sql`, `code`


In [8]:
# === (6) Load base model + adapters ===
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
from unsloth import FastLanguageModel

base, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/qwen2.5-coder-7b-instruct-bnb-4bit",
    max_seq_length=2048,
    dtype=None,
    load_in_4bit=True,
)


model = PeftModel.from_pretrained(base, ADAPTERS["math"], adapter_name="math")
_ = model.load_adapter(ADAPTERS["sql"],  adapter_name="sql")
_ = model.load_adapter(ADAPTERS["code"], adapter_name="code")

print("âœ… Loaded base + adapters:", list(model.peft_config.keys()))


==((====))==  Unsloth 2026.1.4: Fast Qwen2 patching. Transformers: 4.56.2. vLLM: 0.10.2.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu128. CUDA: 8.0. CUDA Toolkit: 12.8. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.32.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.55G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/265 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/632 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/613 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/646M [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/646M [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/646M [00:00<?, ?B/s]

âœ… Loaded base + adapters: ['math', 'sql', 'code']


## Create & save two merged adapters to Drive

- `merged_linear`
- `merged_ties`

Only the merged adapter is saved into each output dir.


In [9]:
# === (7) Create & save merged adapters (Drive) ===
import shutil
import os

def _clean_dir(path):
    if os.path.exists(path):
        shutil.rmtree(path)
    os.makedirs(path, exist_ok=True)

def save_selected_adapter(out_dir: str, adapter_name: str):
    _clean_dir(out_dir)
    model.set_adapter(adapter_name)
    model.save_pretrained(
        out_dir,
        safe_serialization=True,
        selected_adapters=[adapter_name],
    )
    tokenizer.save_pretrained(out_dir)
    try:
        model.create_or_update_model_card(out_dir)
    except Exception as e:
        print("model card update skipped:", e)

# --- Linear ---
MERGED_LINEAR = "merged_linear"
model.add_weighted_adapter(
    adapters=["math", "sql", "code"],
    weights=[W_LINEAR["math"], W_LINEAR["sql"], W_LINEAR["code"]],
    adapter_name=MERGED_LINEAR,
    combination_type="linear",
)
save_selected_adapter(OUT_LINEAR, MERGED_LINEAR)
print("âœ… Saved linear merge to:", OUT_LINEAR)

# --- TIES ---
MERGED_TIES = "merged_ties"
model.add_weighted_adapter(
    adapters=["math", "sql", "code"],
    weights=[W_TIES["math"], W_TIES["sql"], W_TIES["code"]],
    adapter_name=MERGED_TIES,
    combination_type="ties",
    density=DENSITY,
)
save_selected_adapter(OUT_TIES, MERGED_TIES)
print("âœ… Saved TIES merge to:", OUT_TIES)


âœ… Saved linear merge to: /content/drive/MyDrive/HSE GRPO/merges/merged_linear_lora
âœ… Saved TIES merge to: /content/drive/MyDrive/HSE GRPO/merges/merged_ties_lora


## Push both merges to Hugging Face

This uploads the contents of:
- `OUT_LINEAR` â†’ `REPO_LINEAR`
- `OUT_TIES` â†’ `REPO_TIES`


In [10]:
# === (8) Push to Hugging Face ===
from huggingface_hub import HfApi, create_repo

api = HfApi(token=HF_TOKEN)

# Create repos (idempotent)
create_repo(repo_id=REPO_LINEAR, private=PRIVATE_REPOS, exist_ok=True)
create_repo(repo_id=REPO_TIES,   private=PRIVATE_REPOS, exist_ok=True)

api.upload_folder(
    folder_path=OUT_LINEAR,
    repo_id=REPO_LINEAR,
    repo_type="model",
    commit_message=f"Add merged LoRA adapter (linear): {MERGED_LINEAR}",
)
print("âœ… Pushed linear:", f"https://huggingface.co/{REPO_LINEAR}")

api.upload_folder(
    folder_path=OUT_TIES,
    repo_id=REPO_TIES,
    repo_type="model",
    commit_message=f"Add merged LoRA adapter (ties, density={DENSITY}): {MERGED_TIES}",
)
print("âœ… Pushed ties:", f"https://huggingface.co/{REPO_TIES}")


Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...adapter_model.safetensors:   0%|          | 30.3kB /  646MB            

  ...inear_lora/tokenizer.json: 100%|##########| 11.4MB / 11.4MB            

âœ… Pushed linear: https://huggingface.co/uavleeva/grpo_merged_math_sql_code_linear_001


Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ..._ties_lora/tokenizer.json: 100%|##########| 11.4MB / 11.4MB            

  ...adapter_model.safetensors:   0%|          | 30.3kB /  646MB            

âœ… Pushed ties: https://huggingface.co/uavleeva/grpo_merged_math_sql_code_ties_001


## Quick smoke test (optional)

Load the merged adapter back from Drive and run a tiny generation.


In [None]:
# === (9) Optional: quick smoke test ===
from peft import PeftModel

def load_merged_from_dir(merged_dir: str):
    base = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL,
        device_map="auto",
        load_in_4bit=True,
        trust_remote_code=True,
    ).eval()
    m = PeftModel.from_pretrained(base, merged_dir).eval()
    return m

m_test = load_merged_from_dir(OUT_TIES)  # change to OUT_LINEAR if you want
prompt = "Write a short Python function that returns the sum of two integers."
inputs = tokenizer(prompt, return_tensors="pt").to(m_test.device)
with torch.no_grad():
    out = m_test.generate(**inputs, max_new_tokens=128, do_sample=False)
print(tokenizer.decode(out[0], skip_special_tokens=True))
