In [2]:
from transformers import (AutoTokenizer, AutoModelForCausalLM)
import transformers
import torch
import sys
import os

transformers.logging.set_verbosity_error()

In [3]:
root_dir = '...'  #root dir of project
sys.path.append(root_dir)

In [4]:
from hmora import SUPPORTED_CAUSAL_MODELS, SUPPORTED_SEQ2SEQ_MODELS, HMoRaModel
from utils.func import set_seed, set_device, to_json
from hmora import TARGET_MODULE_TYPE

In [5]:
checkpoint_path = '...' # checkpoint path
model_name_or_path = 'Qwen/Qwen2-1.5B'

In [6]:
base_model = AutoModelForCausalLM.from_pretrained(model_name_or_path)

In [7]:
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
if tokenizer.padding_side == 'right':
    tokenizer.padding_side = 'left'
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

In [8]:
model = HMoRaModel.from_pretrained(base_model, checkpoint_path)
peft_weights = torch.load(checkpoint_path + '/' + 'adapter_model.safetensors')
model.load_state_dict(peft_weights, strict=False)
model.eval()
model

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(151936, 1536)
    (layers): ModuleList(
      (0-4): 5 x Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): AdapterLinear(
            (mora): MoRa(
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (router): TokenRouter(
              (mlp): Sequential(
                (0): Dropout(p=0.1, inplace=False)
                (1): Linear(in_features=1536, out_features=8, bias=True)
              )
            )
          )
          (k_proj): AdapterLinear(
            (mora): MoRa(
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (router): TokenRouter(
              (mlp): Sequential(
                (0): Dropout(p=0.1, inplace=False)
                (1): Linear(in_features=1536, out_features=8, bias=True)
              )
            )
          )
          (v_proj): AdapterLinear(
            (mora): MoRa(
              (dropout): Dr

# Zero shot

In [9]:
question = 'Below is a multiple-choice question about abstract algebra. Please choose the correct answer.\nFind the degree for the given field extension Q(sqrt(2), sqrt(3), sqrt(18)) over Q.\nOptions:\nA. 0\nB. 4\nC. 2\nD. 6\nAnswer:'
input_text = f"<|im_start|>system\nYou are a helpful assistant.\n<|im_end|>\n<|im_start|>human\n{question}\n<|im_end|>\n<|im_start|>assistant\n"

In [10]:
print(input_text)

<|im_start|>system
You are a helpful assistant.
<|im_end|>
<|im_start|>human
Below is a multiple-choice question about abstract algebra. Please choose the correct answer.
Find the degree for the given field extension Q(sqrt(2), sqrt(3), sqrt(18)) over Q.
Options:
A. 0
B. 4
C. 2
D. 6
Answer:
<|im_end|>
<|im_start|>assistant



In [11]:
ids = tokenizer(input_text, return_tensors='pt', add_special_tokens=False)
if model.task_encoder is not None:
    prefix_tensors = tokenizer(input_text, padding=True, return_tensors='pt', add_special_tokens=False)
    embedding = getattr(model.base_model, TARGET_MODULE_TYPE[model.config.model_type]['embed'])
    hidden_states = embedding(prefix_tensors.input_ids)
    task_embed = model.task_encoder(hidden_states, prefix_tensors.attention_mask)
    model.router_manager.set_task_weight(task_embed)
res = model.generate(input_ids=ids.input_ids, max_new_tokens=512, eos_token_id=tokenizer.eos_token_id)

In [12]:
print(tokenizer.decode(res[0]).replace(input_text, '').replace('<|endoftext|>', ''))

D


# Few shot

In [13]:
question = 'Below is a multiple-choice question about anatomy. Please choose the correct answer.\nA lesion causing compression of the facial nerve at the stylomastoid foramen will cause ipsilateral\nOptions:\nA. paralysis of the facial muscles.\nB. paralysis of the facial muscles and loss of taste.\nC. paralysis of the facial muscles, loss of taste and lacrimation.\nD. paralysis of the facial muscles, loss of taste, lacrimation and decreased salivation.\nAnswer:A\n\nBelow is a multiple-choice question about abstract algebra. Please choose the correct answer.\nFind the degree for the given field extension Q(sqrt(2), sqrt(3), sqrt(18)) over Q.\nOptions:\nA. 0\nB. 4\nC. 2\nD. 6\nAnswer:'
input_text = f"<|im_start|>system\nYou are a helpful assistant.\n<|im_end|>\n<|im_start|>human\n{question}\n<|im_end|>\n<|im_start|>assistant\n"

In [14]:
print(input_text)

<|im_start|>system
You are a helpful assistant.
<|im_end|>
<|im_start|>human
Below is a multiple-choice question about anatomy. Please choose the correct answer.
A lesion causing compression of the facial nerve at the stylomastoid foramen will cause ipsilateral
Options:
A. paralysis of the facial muscles.
B. paralysis of the facial muscles and loss of taste.
C. paralysis of the facial muscles, loss of taste and lacrimation.
D. paralysis of the facial muscles, loss of taste, lacrimation and decreased salivation.
Answer:A

Below is a multiple-choice question about abstract algebra. Please choose the correct answer.
Find the degree for the given field extension Q(sqrt(2), sqrt(3), sqrt(18)) over Q.
Options:
A. 0
B. 4
C. 2
D. 6
Answer:
<|im_end|>
<|im_start|>assistant



In [15]:
ids = tokenizer(input_text, return_tensors='pt', add_special_tokens=False)
if model.task_encoder is not None:
    prefix_tensors = tokenizer(input_text, padding=True, return_tensors='pt', add_special_tokens=False)
    embedding = getattr(model.base_model, TARGET_MODULE_TYPE[model.config.model_type]['embed'])
    hidden_states = embedding(prefix_tensors.input_ids)
    task_embed = model.task_encoder(hidden_states, prefix_tensors.attention_mask)
    model.router_manager.set_task_weight(task_embed)
res = model.generate(input_ids=ids.input_ids, max_new_tokens=512, eos_token_id=tokenizer.eos_token_id)

In [16]:
print(tokenizer.decode(res[0]).replace(input_text, '').replace('<|endoftext|>', ''))

D
