In [1]:
# Helper Functions

def read_token(token_file):
    with open(token_file, 'r') as f:
        token = f.read()
    return token


In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer, OPTConfig
from peft import get_peft_model, LoraConfig, TaskType
import os

# Configurations
## Primary Configs
TOKEN = read_token("/home/yingqi/repo/HMT-pytorch/huggingface_token.txt")
MODEL_NAME = "Qwen/Qwen2.5-0.5B"
CACHE_DIR = os.environ.get("HF_HOME")
SEGMENT_LENGTH = 1024
BPTT_DEPTH = 6
NUM_SENSORY = 32
BATCH_SIZE = 1
NUM_SEG_SAVE = 8
CHECKPOINT = "/home/yingqi/repo/HMT-pytorch/tmp/model_weights_0_lv_2.pth"

## Minor Configs
USE_LORA = False
BASELINE_ONLY = False

## Check Config Validity
assert CACHE_DIR is not None, "Please set the HF_HOME environment variable to a directory where the model can be cached"

# Intionialize model and tokenizer
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, token=TOKEN, cache_dir=CACHE_DIR)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=TOKEN, cache_dir=CACHE_DIR)

# Get the word embedding dimension 
if isinstance(model.config, OPTConfig):
    word_emb_dim = model.config.word_embed_proj_dim
else:
    word_emb_dim = model.config.hidden_size

if USE_LORA:
    peft_config = LoraConfig(
        task_type=TaskType.CAUSAL_LM,
        inference_mode=False, 
        # target_modules=['embed_tokens', 'gate_proj', 'up_proj', 'down_proj', 'q_proj', 'k_proj', 'v_proj', 'o_proj'],
        r=8, 
        lora_alpha=32, 
        lora_dropout=0.1
        )
    model = get_peft_model(model, peft_config)
    model.print_trainable_parameters()

# Compute Sizes
input_size = SEGMENT_LENGTH
memory_size = 1
n_segments = BPTT_DEPTH

if BASELINE_ONLY:
    memory_size = 0
    n_segments = 2

batch_size = BATCH_SIZE

block_size = input_size
block_size -= 2 * memory_size
block_size -= NUM_SENSORY
history_size = (n_segments - 1) * block_size

mask_size = block_size

block_size_2 = input_size - (2*memory_size) - NUM_SENSORY//2



  from .autonotebook import tqdm as notebook_tqdm
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [3]:
# Load Datasets
from tools.data_processing.qmsum import load_qmsum_test

datapoints = load_qmsum_test(40000, 40000, block_size=block_size, tokenizer=tokenizer, split='test[:2]')


num_proc must be <= 2. Reducing num_proc to 2 for dataset of size 2.
num_proc must be <= 2. Reducing num_proc to 2 for dataset of size 2.
num_proc must be <= 2. Reducing num_proc to 2 for dataset of size 2.


In [9]:
datapoints[0].keys()

dict_keys(['answer_length', 'input_ids', 'attention_mask', 'labels', 'mask_size'])

In [5]:
# Load Models
from tools.models import load_model

class SimpleArgs:
    def __init__(self, **kwargs):
        for k, v in kwargs.items():
            setattr(self, k, v)

args = SimpleArgs(rmt_only=False, 
                  baseline_only=False, 
                  num_seg_save=NUM_SEG_SAVE, 
                  num_sensory=NUM_SENSORY, 
                  segment_alignment=None, 
                  hmt_stage_2=False, 
                  load_from_ckpt=CHECKPOINT,
                  hmt_stage_1=False,
                  mem_recall_context=100,
                  mem_recall_hidden_dim=4864)

model = load_model(args=args, model=model, memory_size=memory_size, block_size=block_size, 
                   n_segments=n_segments, mask_size=mask_size,
                    word_emb_dim=word_emb_dim, is_qa_task=False, cpu=True)

In [6]:
model

RecurrentWrapper(
  (memory_cell): MemoryCell(
    (model): Qwen2ForCausalLM(
      (model): Qwen2Model(
        (embed_tokens): Embedding(151936, 896)
        (layers): ModuleList(
          (0-23): 24 x Qwen2DecoderLayer(
            (self_attn): Qwen2SdpaAttention(
              (q_proj): Linear(in_features=896, out_features=896, bias=True)
              (k_proj): Linear(in_features=896, out_features=128, bias=True)
              (v_proj): Linear(in_features=896, out_features=128, bias=True)
              (o_proj): Linear(in_features=896, out_features=896, bias=False)
              (rotary_emb): Qwen2RotaryEmbedding()
            )
            (mlp): Qwen2MLP(
              (gate_proj): Linear(in_features=896, out_features=4864, bias=False)
              (up_proj): Linear(in_features=896, out_features=4864, bias=False)
              (down_proj): Linear(in_features=4864, out_features=896, bias=False)
              (act_fn): SiLU()
            )
            (input_layernorm): Qwen2RMS

In [7]:
# Generate an output
model.generate()

TypeError: RecurrentWrapper.generate() missing 3 required positional arguments: 'input_ids', 'attention_mask', and 'segment_size'