### Basic Lora Merge
Based on TheBloke's script for lora adapter merge

In [None]:
# Install reqs
%cd /kaggle/
!pip install -U transformers peft accelerate

In [None]:
# Login to hub
from huggingface_hub import notebook_login
notebook_login()

In [None]:
# Full repo download model

# Select model
repo_id = "TheBloke/Llama-2-13B-fp16"

# Select branch
revision="main"

# Download model
from huggingface_hub import snapshot_download
snapshot_download(repo_id=repo_id, revision=revision, local_dir=f"./{repo_id.replace('/', '_')}")

print(f"Model dir: './{repo_id.replace('/', '_')}'")

In [None]:
# Full repo download lora

# Select model
repo_id = "lemonilia/limarp-llama2"

# Select branch
revision="main"

# Download model
from huggingface_hub import snapshot_download
snapshot_download(repo_id=repo_id, revision=revision, local_dir=f"./{repo_id.replace('/', '_')}")

print(f"Lora dir: './{repo_id.replace('/', '_')}'")

In [None]:
# Set variables
model_dir = "./TheBloke_Llama-2-13B-fp16"
lora_dir = "./lemonilia_limarp-llama2/LIMARP-Llama2-LoRA-adapter-13B"

# Push to hub vs save files
repo_name = "Limarp-Merged-L2-13b"
push_to_hub = False

output_dir = "merge"

# Run merge
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch
import shutil

import os

def merge_lora(base_model_path, lora_path, do_push):
    offload_model_path = "./offload"
    offload_peft_path = "./offload_peft"
    shutil.rmtree(offload_model_path, ignore_errors=True)
    shutil.rmtree(offload_peft_path, ignore_errors=True)
    os.makedirs(offload_model_path, exist_ok=True)
    os.makedirs(offload_peft_path, exist_ok=True)
    
    device_map = "cpu"
    float_type = torch.float16
    
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_path,
        return_dict=True,
        torch_dtype=float_type,
        device_map = device_map,
        offload_folder=offload_model_path,
        low_cpu_mem_usage=True
    )

    print(f"Loading PEFT: {lora_path}")
    model = PeftModel.from_pretrained(base_model, lora_path, torch_dtype=float_type, device_map = device_map, offload_folder=offload_peft_path, low_cpu_mem_usage=True)
    print(f"Running merge_and_unload")
    model = model.merge_and_unload()

    tokenizer = AutoTokenizer.from_pretrained(base_model_path)
    
    if do_push:
        model.push_to_hub(repo_name, private=True)
        tokenizer.push_to_hub(repo_name, private=True)
    else:
        os.makedirs(output_dir, exist_ok=True)
        model.save_pretrained(output_dir)
        tokenizer.save_pretrained(output_dir)
        print(f"Model saved to {output_dir}")
    
merge_lora(model_dir, lora_dir, push_to_hub)