In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
from torch import nn
from torch.utils.data import DataLoader

from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE

device(type='cuda')

## Load model and tokenizer

In [4]:
class ModelWrapper(nn.Module):
    def __init__(self, model):
        super().__init__()
        
        self.model = model
    
    def forward(self, input_ids, attention_mask):
        self.model(input_ids, attention_mask)

In [6]:
class RewardModel(nn.Module):
    def __init__(self):
        super().__init__()

        model = AutoModelForCausalLM.from_pretrained(
            "EleutherAI/gpt-j-6b", cache_dir="../../.hf_cache/hub"
        )

        self.transformer = model.transformer
        self.v_head = nn.Linear(model.config.n_embd, 1, bias=False, dtype=torch.float16)
        
        self.PAD_ID = model.config.eos_token_id


    def forward(self, input_ids, attention_mask):
        hidden_states = self.transformer(
            input_ids, attention_mask=attention_mask
        )


        rewards = self.v_head(hidden_states).squeeze(-1)
        
        ends = (input_ids == self.PAD_ID).int().argmax(dim=1, keepdim=True)
        rewards = torch.gather(rewards, 1, ends)
        
        return rewards

Load the model (might take a few minutes):

In [None]:
WEIGHTS_PATH = (
    "../../.hf_cache/hub/models--Dahoas--gptj-rm-static/"
    "snapshots/dc9bb2f15f4cddace8a812174c3e7afda2308258/hf_ckpt.pt"
)

reward_model = RewardModel()
reward_model.load_state_dict(torch.load(WEIGHTS_PATH), strict=True)
reward_model.to(DEVICE)
reward_model

In [None]:
with torch.no_grad():
    out = reward_model(
        input_ids=tokenized_text["chosen"].to(DEVICE),
        attention_mask=tokenized_text["chosen_attention_mask"].to(DEVICE)
    )

out

Load the tokenizer:

In [None]:
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6b")
tokenizer.pad_token = tokenizer.eos_token

MAX_LENGTH = 1024
EOS_TOKEN = tokenizer.eos_token

## Load the dataset

In [None]:
dataset = load_dataset("Dahoas/rm-static", split="test", cache_dir="../../.hf_cache/datasets")
dataset

## Tokenize the dataset

In [25]:
def tokenize_row(row, tokenizer, max_length=1024):
    prompt = row["prompt"]
    chosen_response, rejected_response = row["chosen"], row["rejected"]

    # Note that we do not have to truncate in this specific case since
    # all input sequences will be shorter than max_length
    chosen_tokenized = tokenizer(
        prompt + chosen_response + EOS_TOKEN,
        padding="max_length", max_length=max_length, return_tensors="pt",
    )
    rejected_tokenized = tokenizer(
        prompt + rejected_response + EOS_TOKEN,
        padding="max_length", max_length=max_length, return_tensors="pt",
    )

    return {
        "chosen": chosen_tokenized["input_ids"],
        "rejected": rejected_tokenized["input_ids"],
        "chosen_attention_mask": chosen_tokenized["attention_mask"],
        "rejected_attention_mask": rejected_tokenized["attention_mask"],
    }

tokenized_text = tokenize_row(dataset[0], tokenizer)
list(map(lambda x: x.shape[1], tokenized_text.values())), list(tokenized_text.keys()), tokenized_text["chosen"][:, :10]

([1024, 1024, 1024, 1024],
 ['chosen', 'rejected', 'chosen_attention_mask', 'rejected_attention_mask'],
 tensor([[  198,   198, 20490,    25,   314,   716,  2111,   284,  3551,   257]]))

In [88]:
dataset = dataset.map(tokenize_row, fn_kwargs={ "tokenizer": tokenizer })
dataset = dataset.remove_columns(["prompt", "response"])
dataset

Loading cached processed dataset at /rds/user/am3052/hpc-work/.hf_cache/datasets/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-5e48db60fca6235f.arrow


Dataset({
    features: ['chosen', 'rejected'],
    num_rows: 5103
})

## Do inferece