In [1]:
!pip install -e ../MoRA/peft-mora

Obtaining file:///home/vmasti/efficient-llms-capstone/MoRA/peft-mora
  Installing build dependencies ... [?25ldone
[?25h  Checking if build backend supports build_editable ... [?25ldone
[?25h  Getting requirements to build editable ... [?25ldone
[?25h  Preparing editable metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: peft
  Building editable for peft (pyproject.toml) ... [?25ldone
[?25h  Created wheel for peft: filename=peft-0.9.0-0.editable-py3-none-any.whl size=10336 sha256=9b9a7f282c56bb77292ec227f0b2598caa9e0ca0a49e8997d41ac264370a58d9
  Stored in directory: /tmp/pip-ephem-wheel-cache-sm7hq077/wheels/d5/54/5c/0b1f54c87eb0a8617c99aebb7fd288f1cd0ef521abb5277f2b
Successfully built peft
Installing collected packages: peft
  Attempting uninstall: peft
    Found existing installation: peft 0.9.0
    Uninstalling peft-0.9.0:
      Successfully uninstalled peft-0.9.0
Successfully installed peft-0.9.0


In [16]:
from transformers import GPTNeoXForCausalLM, AutoTokenizer
from peft import PeftModel, LoraConfig, get_peft_model
import torch
import torch.nn as nn
from mora_fine_tune import evaluate_model
from accelerate import Accelerator
import pandas as pd

In [26]:
args = {
    "large_model": "EleutherAI/pythia-410m",
    "small_model": "EleutherAI/pythia-70m",
    "large_adapter": "./weight/pythia_410m_r=8_0.0001_fixed",
    "rank": 8,
    "mora_type": 6,
    "lora_dropout": 0.05,
    "eval_dataloader": torch.load("./data/eval_dataloader.pt"),
    "accelerator": Accelerator(),
    # "large_layers": 24,
    # "small_layers": 6,
}

# large_rank * 1024

Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [15]:
tokenizer = AutoTokenizer.from_pretrained(
    args['large_model'],  # standard model; the same tokenizer is used for all models
)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [4]:
# Load the large fine-tuned model

large_model = GPTNeoXForCausalLM.from_pretrained(args["large_model"])
large_model.load_adapter(args["large_adapter"])

In [5]:
# Initialize the small model with MoRA weights

config = LoraConfig(
    # enable MoRA
    use_mora=True,
    # type 1 (Sharing) for large lora ranks, Eq. 6 in paper
    # type 6 (RoPE based) for small lora ranks, Eq. 9 in paper
    mora_type=args["mora_type"],
    # lora rank here, we will calculate corresponding $\hat{r}$ in MoRA
    r=args["rank"],
    # MoRA does not use lora_alpha
    # lora_alpha=lora_alpha,
    target_modules=["query_key_value"],
    lora_dropout=args["lora_dropout"],
    task_type="CAUSAL_LM",
    # **kwargs,
)

small_model = GPTNeoXForCausalLM.from_pretrained(args["small_model"])
small_model = get_peft_model(small_model, config)

In [17]:
eval_results = pd.DataFrame()

In [6]:
# (1024+3072)*8

In [7]:
# 180*180


In [8]:
large_model

GPTNeoXForCausalLM(
  (gpt_neox): GPTNeoXModel(
    (embed_in): Embedding(50304, 1024)
    (emb_dropout): Dropout(p=0.0, inplace=False)
    (layers): ModuleList(
      (0-23): 24 x GPTNeoXLayer(
        (input_layernorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (post_attention_layernorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (post_attention_dropout): Dropout(p=0.0, inplace=False)
        (post_mlp_dropout): Dropout(p=0.0, inplace=False)
        (attention): GPTNeoXAttention(
          (rotary_emb): GPTNeoXRotaryEmbedding()
          (query_key_value): lora.Linear(
            (base_layer): Linear(in_features=1024, out_features=3072, bias=True)
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.05, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=180, out_features=180, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Lin

In [9]:
small_model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): GPTNeoXForCausalLM(
      (gpt_neox): GPTNeoXModel(
        (embed_in): Embedding(50304, 512)
        (emb_dropout): Dropout(p=0.0, inplace=False)
        (layers): ModuleList(
          (0-5): 6 x GPTNeoXLayer(
            (input_layernorm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
            (post_attention_layernorm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
            (post_attention_dropout): Dropout(p=0.0, inplace=False)
            (post_mlp_dropout): Dropout(p=0.0, inplace=False)
            (attention): GPTNeoXAttention(
              (rotary_emb): GPTNeoXRotaryEmbedding()
              (query_key_value): lora.Linear(
                (base_layer): Linear(in_features=512, out_features=1536, bias=True)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Li

In [23]:
def truncate_mora_weights(old_module, new_rank):

    new_module = nn.Linear(new_rank, new_rank, bias=old_module.bias is not None)

    with torch.no_grad():
        new_module.weight.copy_(old_module.weight[:new_rank, :new_rank])

        if old_module.bias is not None:
            new_module.bias.copy_(old_module.bias[:new_rank])

    return new_module

In [None]:
# Run evaluation on the large model

eval_loss, eval_rouge_scores = evaluate_model(large_model, args["eval_dataloader"], args["accelerator"], tokenizer)

# Save the evaluation results to eval_results
eval_results = eval_results.append(
    {
        "model": "fine_tuned_" + args["large_model"],
        "rank": args["rank"],
        "eval_loss": eval_loss,
        **eval_rouge_scores,
    },
    ignore_index=True,
)

In [None]:
# Evaluate the small model (before fine-tuning)

eval_loss, eval_rouge_scores = evaluate_model(small_model, args["eval_dataloader"], args["accelerator"], tokenizer)

# Save the evaluation results to eval_results
eval_results = eval_results.append(
    {
        "model": "raw_" + args["small_model"],
        "rank": args["rank"],
        "eval_loss": eval_loss,
        **eval_rouge_scores,
    },
    ignore_index=True,
)

In [28]:
new_rhat = 128

for name, module in large_model.named_modules():
    if name.endswith("lora_A") or name.endswith("lora_B"):
        new_module = truncate_mora_weights(module.default, new_rhat)
        parts = name.split('.')
        parent_module = small_model
        for part in parts[:-1]:

            if hasattr(parent_module, part):
                parent_module = getattr(parent_module, part)
        
        setattr(parent_module, parts[-1], nn.ModuleDict({"default": new_module}))

In [None]:
# Evaluate the small model (after copying the weights)

eval_loss, eval_rouge_scores = evaluate_model(small_model, args["eval_dataloader"], args["accelerator"], tokenizer)

# Save the evaluation results to eval_results
eval_results = eval_results.append(
    {
        "model": "truncated_" + args["small_model"] + "_from_" + args["large_model"],
        "rank": args["rank"],
        "eval_loss": eval_loss,
        **eval_rouge_scores,
    },
    ignore_index=True,
)

In [None]:
# Save the evaluation results to a CSV file
eval_results.to_csv("eval_results.csv", index=False)