In [1]:
import torch
from recurrent_memory_transformer import RecurrentMemoryTransformer
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_name = 'KotShinZ/gpt2-RMT-8'
max_length = 1004 * 2 # 1024 - memory size * 2
dataset_path = "HuggingFaceFW/fineweb-edu"
dataset_name = "CC-MAIN-2024-10"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
dataset = load_dataset(dataset_path, name=dataset_name, split="train[:1000]")

In [None]:
def eval_model(model, tokenizer, dataset, max_length, stride = None):
    """Model evaluation
    Args:
        model: Model
        tokenizer: Tokenizer
        dataset: Dataset
        max_length: Maximum token count
        stride: Stride
    Returns:
        loss: Loss
        ppl: Perplexity
    
    """
    if stride is None:
        stride = max_length / 2

    # トークン化
    encodings = tokenizer("\n\n".join(dataset["text"]), return_tensors="pt")
    
    stride = 512
    seq_len = encodings.input_ids.size(1)

    nlls = []
    prev_end_loc = 0
    for begin_loc in range(0, seq_len, stride):
        end_loc = min(begin_loc + max_length, seq_len)
        trg_len = end_loc - prev_end_loc  # may be different from stride on last loop
        input_ids = encodings.input_ids[:, begin_loc:end_loc].to(device)
        target_ids = input_ids.clone()
        target_ids[:, :-trg_len] = -100

        with torch.no_grad():
            outputs = model(input_ids, labels=target_ids)
            neg_log_likelihood = outputs.loss

        nlls.append(neg_log_likelihood)

        prev_end_loc = end_loc
        if end_loc == seq_len:
            break

    loss = torch.stack(nlls).mean()
    ppl = torch.exp(torch.stack(nlls).mean())  
    return loss, ppl 

In [9]:
model = RecurrentMemoryTransformer.from_pretrained(model_name).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name)
eval_model(model, tokenizer, dataset, max_length)

You are using a model of type rmt_gpt2 to instantiate a model of type rmt. This is not supported for all configurations of models and can yield errors.
Token indices sequence length is longer than the specified maximum sequence length for this model (1087994 > 1024). Running this sequence through the model will result in indexing errors


(tensor(2.9417, device='cuda:0'), tensor(18.9489, device='cuda:0'))

In [15]:
inputs = tokenizer("Hello, world!", return_tensors="pt").to(device)
out = model.generate(input_ids = inputs["input_ids"], max_length=50)
out_text = tokenizer.decode(out[0], skip_special_tokens=True)
print(out_text)


I am a student at the University of California, Berkeley. I am a member of the American Association of University Professors, and I am a member of the American Association of University Professors. I am also a member of
