In [1]:
import torch
import transformers
from datasets import load_dataset
from peft import LoraConfig, PeftConfig, PeftModel, get_peft_model, load_peft_weights
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from task_vector import TaskVector

model_id = "LoftQ/Mistral-7B-v0.1-4bit-64rank"
peft_ids = ["JD97/BoolQA", "JD97/SC"]

task_vectors = [
    TaskVector(model_id, peft_id, lora=True)
    for peft_id in peft_ids
]

task_vector_sum = sum(task_vectors)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model, tokenizer = task_vector_sum.apply_to_lora(model_id)

Loading checkpoint shards: 100%|██████████| 3/3 [00:19<00:00,  6.66s/it]


In [4]:
model.push_to_hub("JD97/ttt")

adapter_model.safetensors: 100%|██████████| 671M/671M [00:31<00:00, 21.1MB/s]  


CommitInfo(commit_url='https://huggingface.co/JD97/ttt/commit/8c1c6512034ffcc73bca40c5b5e0b891f24608eb', commit_message='Upload model', commit_description='', oid='8c1c6512034ffcc73bca40c5b5e0b891f24608eb', pr_url=None, pr_revision=None, pr_num=None)

In [1]:
from utils import *

In [3]:
lora_reassign_weights()

NameError: name 'lora_reassign_weights' is not defined

In [1]:

def lora_reassign_weights(model, state_dict, r, lora_alpha, fan_in_fan_out=False, merge=True):
    is_merged = getattr(model, "is_merged", False)
    assert is_merged != merge, f'{is_merged} != {merge}: if is_merged, then must be unmerge; if not is_merged, then must merge'
    named_params = [(n, p) for n, p in model.named_parameters()]
    scaling = lora_alpha / r
    print(f'Lora configs: alpha={lora_alpha}, r={r}, scaling={scaling}')
    state_dict = {k.replace("base_model.model.", ""): v for k, v in state_dict.items()}
    replaced = set()
    merged_names = {
        # these are projector weights that got combined into single matrix in vllm
        "qkv_proj": ["q_proj", "k_proj", "v_proj"],
        "gate_up_proj": ["gate_proj", "up_proj"]
    }
    non_merged_names = ['o_proj', 'down_proj']
    for name, param in named_params:
        param.requires_grad = False
        if "_proj.weight" not in name:
            continue
        for wn, wn_series in merged_names.items():
            if name.endswith(f"{wn}.weight"):
                for stride_id, att_weight_name in enumerate(wn_series):
                    lora_a = name.replace(f"{wn}.weight", f"{att_weight_name}.lora_A.weight")
                    lora_b = name.replace(f"{wn}.weight", f"{att_weight_name}.lora_B.weight")
                    shard_size = param.shape[0] // len(wn_series)
                    if lora_a in state_dict:
                        assert lora_b in state_dict, f'{lora_b} not in state_dict'
                        assert state_dict[lora_b].shape[1] == r, f'{r=} != {state_dict[lora_b].shape}'
                        matrix = transpose(state_dict[lora_b] @ state_dict[lora_a], fan_in_fan_out) * scaling
                        assert param.data[shard_size * stride_id:shard_size * (stride_id + 1)].shape == matrix.shape
                        if merge:
                            param.data[shard_size * stride_id:shard_size * (stride_id + 1)] += matrix
                        else:
                            param.data[shard_size * stride_id:shard_size * (stride_id + 1)] -= matrix
                        replaced.add(lora_a)
                        replaced.add(lora_b)
        for wn in non_merged_names:
            if name.endswith(f"{wn}.weight"):
                lora_a = name.replace(f"{wn}.weight", f"{wn}.lora_A.weight")
                lora_b = name.replace(f"{wn}.weight", f"{wn}.lora_B.weight")
                if lora_a in state_dict:
                    assert lora_b in state_dict
                    matrix = transpose(state_dict[lora_b] @ state_dict[lora_a], fan_in_fan_out) * scaling
                    assert param.data.shape == matrix.shape, f'invalid shape: {name} {param.data.shape} != {matrix.shape}'
                    if merge:
                        param.data += matrix
                    else:
                        param.data -= matrix
                    replaced.add(lora_a)
                    replaced.add(lora_b)
    no_replaced = [k for k in state_dict.keys() if k not in replaced]
    assert len(no_replaced) == 0, f'some lora states not loaded, check again!: {no_replaced}'
    model.is_merged = merge


def lora_merge_unmerge_state_dict(llm, state_dict, peft_config, merge=True):
    # merge lora states to weights
    for worker in llm.llm_engine.workers:
        lora_reassign_weights(worker.model, state_dict, 
            r=peft_config.r, 
            lora_alpha=peft_config.lora_alpha, 
            fan_in_fan_out=peft_config.fan_in_fan_out, 
            merge=merge
        )

In [2]:
from vllm import LLM, SamplingParams
from peft import LoraConfig, PeftConfig, PeftModel, get_peft_model, load_peft_weights 

model_id = "LoftQ/Mistral-7B-v0.1-4bit-64rank"
peft_id = "JD97/ttt"

llm = LLM(model=model_id)
adapter_state_dict = load_peft_weights(peft_id)
config = PeftConfig(peft_id)
lora_merge_unmerge_state_dict(llm, adapter_state_dict, config, merge=True)

  from .autonotebook import tqdm as notebook_tqdm
2024-01-07 11:24:22,084	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


INFO 01-07 11:24:26 llm_engine.py:70] Initializing an LLM engine with config: model='LoftQ/Mistral-7B-v0.1-4bit-64rank', tokenizer='LoftQ/Mistral-7B-v0.1-4bit-64rank', tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=32768, download_dir=None, load_format=auto, tensor_parallel_size=1, quantization=None, enforce_eager=False, seed=0)
INFO 01-07 11:24:40 llm_engine.py:275] # GPU blocks: 8486, # CPU blocks: 2048
INFO 01-07 11:24:42 model_runner.py:501] Capturing the model for CUDA graphs. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
INFO 01-07 11:24:42 model_runner.py:505] CUDA graphs can take additional 1~3 GiB memory per GPU. If you are running out of memory, consider decreasing `gpu_memory_utilization` or enforcing eager mode.
INFO 01-07 11:24:46 model_runner.py:547] Graph capturing finished in 5 secs.

adapter_model.safetensors: 100%|██████████| 671M/671M [00:25<00:00, 26.7MB/s] 


In [4]:
from datamodule import datamodule

#path = ["/home/elicer/M-LLM/data/BoolQA.csv", "/home/elicer/M-LLM/data/NLI_CB.csv", "/home/elicer/M-LLM/data/sc_amazon.csv"]
path = "/home/elicer/M-LLM/data/sc_amazon.csv"

train_dataset, val_dataset, test_dataset = datamodule.preprare_dataset(path)
# train_dataset = train_dataset.map(lambda samples: tokenizer(samples["text"]), batched=True)
# val_dataset = val_dataset.map(lambda samples: tokenizer(samples["text"]), batched=True)
# test_dataset = test_dataset.map(lambda samples: tokenizer(samples["text"]), batched=True)

100%|██████████| 2000/2000 [00:00<00:00, 14687.20ex/s]
100%|██████████| 100/100 [00:00<00:00, 16221.78ex/s]
100%|██████████| 100/100 [00:00<00:00, 15915.25ex/s]


In [7]:
prompts = [
    "Hello, my name is",
    "The capital of France is",
    "The future of AI is",
]

prompts = [train_dataset[73]['text'][:-15]]

sampling_params = SamplingParams(temperature=0, top_k=-1)

outputs = llm.generate(prompts, sampling_params)

# for output in outputs:
#     prompt = output.prompt
#     generated_text = output.outputs[0].text
#     print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")

for output in outputs:
    prompt = output.prompt
    generated_text = output.outputs[0].text
    print(f"Generated text: {generated_text!r}")

Processed prompts: 100%|██████████| 1/1 [00:00<00:00,  2.63it/s]

Generated text: ' A. Positive\n        B. Negative\n        C. Neut'





In [8]:
train_dataset[73]['text']

'"What is the sentiment of the following paragraph? Choose one from the option.\n \n        sentence: Title: Maybe I Expected Too Much...\nText: AN ENGLISH MURDER was chosen (by me) to be read by my book group, after a good review of it in the Cincinnati Enquirer. I was disappointed. I\'m not a frequent reader of "English mysteries," so I can\'t compare and contrast this book to all the other mysteries I\'ve read. It\'s just that I was left wondering where the "mystery" was. The identity of the murderer was obvious from the start. It\'s not a "whodunnit." Where this book did succeed was in slowly revealing that the citizens of a seemingly pleasant little English village are not as quaint as their surroundings...everyone has dark and often slimy secrets in their past. As the weather turned gloomy and characters\' conscious and unconscious motives were uncovered, the book just went sort of grim. If you\'re looking for a classic mystery with dead ends and plot twists, AN ENGLISH MURDER is