## Weighted Lora Merge
Based on implementation from https://github.com/CoffeeVampir3/ez-trainer and CLI adaption from https://github.com/zarakiquemparte/zaraki-tools

In [None]:
# Install reqs
%cd /kaggle/
!pip install git+https://github.com/huggingface/transformers peft

In [None]:
# Login to hub
from huggingface_hub import notebook_login
notebook_login()

In [None]:
# Full repo download model

# Select model
repo_id = "TheBloke/Llama-2-13B-fp16"

# Select branch
revision="main"

# Download model
from huggingface_hub import snapshot_download
snapshot_download(repo_id=repo_id, revision=revision, local_dir=f"./{repo_id.replace('/', '_')}")

print(f"Model dir: './{repo_id.replace('/', '_')}'")

In [None]:
# Full repo download lora

# Select model
repo_id = "lemonilia/limarp-llama2"

# Select branch
revision="main"

# Download model
from huggingface_hub import snapshot_download
snapshot_download(repo_id=repo_id, revision=revision, local_dir=f"./{repo_id.replace('/', '_')}")

print(f"Lora dir: './{repo_id.replace('/', '_')}'")

In [None]:
# Set variables
model_dir = "./TheBloke_Llama-2-13B-fp16"
lora_dir = "./lemonilia_limarp-llama2/LIMARP-Llama2-LoRA-adapter-13B"
weight = 0.66

# Push to hub vs save files
repo_name = "Limarp-Merged-L2-13b"
push_to_hub = False

output_dir = "merge"

# Run merge
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch
import shutil

import os

def merge(base_model, lora_model, scaling, merge_weight=1.0):
    weights_list = []

    # Loop over all parameters
    for name, param in lora_model.named_parameters():
        # If the parameter name ends with '.weight', it's an original weight
        if name.endswith('.weight'):
            # Make sure it's not a lora_A or lora_B weight
            if not any(substring in name for substring in ['lora_A', 'lora_B']):
                # Construct the names of the corresponding lora_A and lora_B weights
                layers = name.split('.')
                try:
                    layer = lora_model
                    for item in layers[:-1]:  # We go until the penultimate item (excluding the 'weight' part)
                        if 'lora' in item:  # Split further if lora_A or lora_B
                            item, lora_item = item.split('_')
                            layer = getattr(layer, item)
                            layer = getattr(layer, lora_item)
                        else:
                            layer = getattr(layer, item)
                        
                    # Try to get lora_A and lora_B weights
                    lora_A = getattr(layer, 'lora_A').default.weight
                    lora_B = getattr(layer, 'lora_B').default.weight

                    # Add a tuple to the list with the parameter name as the first item
                    weights_list.append((name, param.data, lora_A, lora_B))

                except AttributeError:
                    pass
                    #print(f"Unable to find lora_A or lora_B weights for {name}")

    for (name,weight,a,b) in weights_list:
        ab = b @ a
        weight += ab * scaling * merge_weight
        print(f"Did thing for layer named {name}")
    
    #clean lora loading trash
    for name, module in base_model.named_modules():
        if 'lora_A' in dir(module):
            delattr(module, 'lora_A')
        if 'lora_B' in dir(module):
            delattr(module, 'lora_B')

def get_lora_scaling(lora_model):
    r = lora_model.peft_config["default"].r
    alpha = lora_model.peft_config["default"].lora_alpha

    scaling = alpha/r
    return scaling

def load_model(model_path, lora_path):
    offload_model_path = "./offload"
    offload_peft_path = "./offload_peft"
    shutil.rmtree(offload_model_path, ignore_errors=True)
    shutil.rmtree(offload_peft_path, ignore_errors=True)
    os.makedirs(offload_model_path, exist_ok=True)
    os.makedirs(offload_peft_path, exist_ok=True)

    device_map = "cpu"
    float_type = torch.float16

    base_model = AutoModelForCausalLM.from_pretrained(
    model_path,
    return_dict=True,
    torch_dtype=float_type,
    device_map = device_map,
    offload_folder=offload_model_path,
    low_cpu_mem_usage=True
    )

    print(f"Loading PEFT: {lora_path}")
    lora_model = PeftModel.from_pretrained(base_model, lora_path, torch_dtype=float_type, device_map = device_map, offload_folder=offload_peft_path, low_cpu_mem_usage=True)
    
    return base_model, lora_model

def initiate_model_lora_merge(model_path, lora_path, merge_weight, do_push = False):
    print(model_path)
    print(lora_path)

    base_model, lora_model = load_model(model_path, lora_path)
    scaling = get_lora_scaling(lora_model)
    
    print(f"Lora Scaling: {scaling}")
    
    merge(base_model, lora_model, scaling, merge_weight=merge_weight)
    
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    
    if do_push:
        base_model.push_to_hub(repo_name, private=True)
        tokenizer.push_to_hub(repo_name, private=True)
    else:
        os.makedirs(output_dir, exist_ok=True)
        base_model.save_pretrained(output_dir)
        tokenizer.save_pretrained(output_dir)
    
    print("Done merging.")
    return

initiate_model_lora_merge(model_dir, lora_dir, weight, push_to_hub)