In [1]:
# import parent directory 
import sys
import torch
sys.path.append("/user/al4263/rlhf/Reward_Modeling")
import utils
import models
import json
from transformers import AutoTokenizer, AutoModelForCausalLM
import os
import matplotlib.pyplot as plt
import numpy as np
from models import RewardENN
from data.data_loader import pairwise_data_tokenized, PairwiseDyadicAugmentedTokenizedData

  from .autonotebook import tqdm as notebook_tqdm


[2023-09-24 16:11:38,965] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)


In [2]:
output_dir = "/shared/share_mala/leon/llama-3b-sft-hh"
from models.reward_enn import RewardENN, RewardENNConfig
from models.vanilla_reward import VanillaReward, VanillaRewardConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch


config = RewardENNConfig(
            backbone_model_name_or_path=output_dir,
            ref_size=10,
            enn_hidden_size=64,
            enn_output_size=1,
            enn_gain=1.0,
            lmbda=1.0,
            )

model = RewardENN.from_pretrained(
    "/shared/share_mala/leon/reward-enn/anthropic_hh/-ref_size10-enn_dim64-num_ref_train10-lr1e-05-weight_decay0.01-enn_lr0.001-enn_decay0.1-reward_lr0.0003-reward_decay0.1-gc1-train_batch_size4",
    flash_attn=False,
    fp16=False,
    bf16=True,
    config=config,
)

Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.41s/it]


In [7]:
model = model.to(torch.bfloat16)

In [8]:
tokenizer = AutoTokenizer.from_pretrained(
        output_dir,
        padding_side="left",)
print("tokenizer:" + str(tokenizer))

You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. If you see this, DO NOT PANIC! This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


tokenizer:LlamaTokenizerFast(name_or_path='/shared/share_mala/leon/llama-3b-sft-hh', vocab_size=32000, model_max_length=512, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'eos_token': AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'unk_token': AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'pad_token': '[PAD]'}, clean_up_tokenization_spaces=False)


In [9]:
seed = 42
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

data_dir = '/user/al4263/rlhf/Reward_Modeling/data/dataset/anthropic_hh/unaug_joint_eval.json'
with open(data_dir) as f:
     annotated = json.load(f)


In [10]:
prompts = {
    "prompt_noinputs": "{input}",
    "prompt_inputs": "{input}"
}

In [11]:
joint_dataset = PairwiseDyadicAugmentedTokenizedData(annotated, tokenizer, 512, prompts)
dataloader = torch.utils.data.DataLoader(joint_dataset, batch_size= 1, shuffle=False)


In [19]:
from tqdm import tqdm

In [21]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
joint_log_list = []
log_joint_list = []
with torch.no_grad():
    for i, batch in enumerate(tqdm(dataloader)):

        p_1_ids, p_2_ids, p_1_att, p_2_att, labels = batch
        p_1_ids = p_1_ids.squeeze()
        p_2_ids = p_2_ids.squeeze()
        p_1_att = p_1_att.squeeze()
        p_2_att = p_2_att.squeeze()
        labels = labels.squeeze()
        p_1_ids = p_1_ids.to(device)
        p_2_ids = p_2_ids.to(device)
        p_1_att = p_1_att.to(device)
        p_2_att = p_2_att.to(device)
        labels = labels.to(device)

        z_samples = torch.randn(10, 10).to(device)
        z_samples = z_samples.to(torch.bfloat16)

        reward_list_1 = model(p_1_ids, attention_mask=p_1_att, z_samples=z_samples, return_full_z=True).reward_list 
        reward_list_2 = model(p_2_ids, attention_mask=p_2_att, z_samples=z_samples, return_full_z=True).reward_list 


        r_win = torch.where(labels.unsqueeze(-1) == 1, reward_list_1, reward_list_2)
        r_lose = torch.where(labels.unsqueeze(-1) == 2, reward_list_1, reward_list_2)
        stack_rewards = torch.stack((r_win, r_lose), dim=2)
        #print("stack_rewards", stack_rewards, stack_rewards.shape)
        # joint log likelihood
        log_softmax_values = torch.nn.functional.log_softmax(stack_rewards, dim=2)[:,:,0]
        # averaging over the z samples, if vanilla then dimension is 1 so no effect
        log_softmax_values = torch.mean(log_softmax_values, dim=1)
        # summing all dyadic samples
        joint_log_likelihood = torch.sum(log_softmax_values, dim=0)
        joint_log_list.append(joint_log_likelihood)

        # log joint likelihood for evaluation
        softmax_values = torch.nn.functional.softmax(stack_rewards, dim=2)[:,:,0]
        #print("softmax_values", softmax_values, softmax_values.shape)
        product_softmax_values = torch.prod(softmax_values, dim=0)
        #print("product_softmax_values", product_softmax_values, product_softmax_values.shape)
        # Averaging over the z samples, if vanilla then dimension is 1 so no effect
        avg_softmax_values = torch.mean(product_softmax_values)
        #print("avg_softmax_values", avg_softmax_values, avg_softmax_values.shape)
        # log joint likelihood
        log_joint_likelihood = torch.log(avg_softmax_values)
        log_joint_list.append(log_joint_likelihood)

        

    
    log_joint_likelihood = torch.tensor(log_joint_list).sum()
    joint_log_likelihood = torch.tensor(joint_log_list).sum()

    print("log_joint_likelihood: " + str(log_joint_likelihood))
    print("joint_log_likelihood: " + str(joint_log_likelihood))
    

100%|██████████| 256/256 [04:50<00:00,  1.13s/it]


log_joint_likelihood: tensor(-1256., dtype=torch.bfloat16)
joint_log_likelihood: tensor(-1416., dtype=torch.bfloat16)
