In [41]:
import torch
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
import regex as re
from loguru import logger

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
ORIG_MODEL_DIR = "mistralai/Mistral-7B-v0.1"
LOFTQ_MODEL_DIR = "/home/ubuntu/peft/artifacts/loftq/Llama-2-7b-hf-4bit-64rank"

In [4]:
orig_model = AutoModelForCausalLM.from_pretrained(
    ORIG_MODEL_DIR, 
    torch_dtype=torch.bfloat16
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [12]:
orig_model = orig_model.to("cuda")

In [5]:
base_model = AutoModelForCausalLM.from_pretrained(
    LOFTQ_MODEL_DIR, 
    torch_dtype=torch.bfloat16,
    quantization_config=BitsAndBytesConfig(
        load_in_4bit=False,
        # bnb_4bit_compute_dtype=torch.bfloat16,
        # bnb_4bit_use_double_quant=False,
        # bnb_4bit_quant_type='nf4',
    ),
)
peft_model = PeftModel.from_pretrained(
    base_model,
    LOFTQ_MODEL_DIR,
    subfolder="loft_init",
    is_trainable=True,
)

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

[32m2024-01-18 19:34:09.394[0m | [1mINFO    [0m | [36mpeft.tuners.lora.model[0m:[36m_create_and_replace[0m:[36m142[0m - [1mAdapting layer: q_proj[0m
[32m2024-01-18 19:34:09.417[0m | [1mINFO    [0m | [36mpeft.tuners.lora.model[0m:[36m_create_and_replace[0m:[36m142[0m - [1mAdapting layer: k_proj[0m
[32m2024-01-18 19:34:09.422[0m | [1mINFO    [0m | [36mpeft.tuners.lora.model[0m:[36m_create_and_replace[0m:[36m142[0m - [1mAdapting layer: v_proj[0m
[32m2024-01-18 19:34:09.427[0m | [1mINFO    [0m | [36mpeft.tuners.lora.model[0m:[36m_create_and_replace[0m:[36m142[0m - [1mAdapting layer: o_proj[0m
[32m2024-01-18 19:34:09.432[0m | [1mINFO    [0m | [36mpeft.tuners.lora.model[0m:[36m_create_and_replace[0m:[36m142[0m - [1mAdapting layer: gate_proj[0m
[32m2024-01-18 19:34:09.439[0m | [1mINFO    [0m | [36mpeft.tuners.lora.model[0m:[36m_create_and_replace[0m:[36m142[0m - [1mAdapting layer: up_proj[0m
[32m2024-01-18 19:34:09.453[

In [None]:
Q, L, R = None, None, None
layer_name = None

orig_layers = orig_model.named_parameters()

errors = {}

for name, weight in peft_model.named_parameters():
    if layer_name is None:
        match = re.findall(r'(layers.*)\.(.*)\.base_layer\.weight', name)
        if match:
            layer_name = match[0]
            Q = weight
        continue
    # Get adapters
    match = re.findall(r'(layers.*)\.(.*)\.default\.weight', name)
    if not match:
        continue
    layer_name_verify, adapter_name = match[0]
    assert layer_name_verify == layer_name
    
    if adapter_name == 'lora_A':
        R = weight
    elif adapter_name == 'lora_B':
        L = weight
    else:
        logger.warning(f'Unknown adapter type for layer {name} of the Peft model!')

    if Q is not None and L is not None and R is not None:
        X_hat = Q + L @ R
        X = None
        ## Now, time to find this layer in the original model
        
        orig_layer = next(orig_layers, None)
        while orig_layer is not None:
            orig_name, orig_weight = orig_layer
            if layer_name in orig_name:
                X = orig_weight
                break
        if X is None:
            logger.warning(f'Could not find layer with substring {layer_name} in the original model!')
            orig_layers = orig_model.named_parameters()
        else:
            relative_fro_err = torch.norm(X - X_hat, p='fro')
            errors[layer_name] = relative_fro_err / torch.norm(X, p='fro')
        
        Q, L, R, layer_name = None, None, None, None
            