In [1]:
import os

from tqdm import tqdm

import torch
from torch.utils.data import DataLoader

from datasets import load_dataset

from transformers import (
    LlamaForSequenceClassification,
    PreTrainedTokenizerFast,
    DataCollatorWithPadding,
)

from peft import PeftModelForSequenceClassification

from trl import ModelConfig, get_quantization_config

from fed_ppo.utils import (
    apply_chat_template,
    tokenize,
    DatasetFormat,
    set_bias,
)

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

In [2]:
###################################################################################################
# NAMES & PATHS
###################################################################################################

# Base model path
# =================================================================================================
BASE_MODEL_PATH = "meta-llama/Llama-3.2-1B-Instruct"

# Reward adapter path
# ================================================================================================
REWARD_ADAPTER_PATH = "RLHF-And-Friends/RM-UltrafeedbackBinarized-Llama-3.2-1B-Instruct-Q4-LoRA8-Batch-16-Tok-1024"

# Normalization dataset
# =================================================================================================
NORM_DATASET_PATH  = "HuggingFaceH4/ultrachat_200k"
NORM_DATASET_SPLIT = "train_sft"

# Normalized adapter path to save
# =================================================================================================
NORM_ADAPTER_PATH = "RLHF-And-Friends/RM-UltrafeedbackBinarized-Llama-3.2-1B-Instruct-Q4-LoRA8-Batch-16-Tok-1024-Normalized"

In [3]:
###################################################################################################
# CONFIGS
###################################################################################################

model_config = ModelConfig(
    torch_dtype               = "bfloat16",
    load_in_8bit              = False,
    load_in_4bit              = True,
    bnb_4bit_quant_type       = "nf4",
    use_bnb_nested_quant      = True,
    
)

In [4]:
###################################################################################################
# TOKENIZER & MODELS
###################################################################################################

# Tokenizer
# =================================================================================================

tokenizer = PreTrainedTokenizerFast.from_pretrained(
    REWARD_ADAPTER_PATH, 
    use_fast=True,
    pad_token = "<|pad|>"
)

# Model
# =================================================================================================

device = torch.device("cuda")

quantization_config = get_quantization_config(model_config)

base_model = LlamaForSequenceClassification.from_pretrained(
    BASE_MODEL_PATH,
    num_labels = 1,
    quantization_config = quantization_config,
    torch_dtype = getattr(torch, model_config.torch_dtype),
)
# Enable bias in the head
set_bias(
    base_model, 
    layer_path="score", 
    bias=0.0, 
    dtype=getattr(torch, model_config.torch_dtype)
)

model = PeftModelForSequenceClassification.from_pretrained(
    base_model,
    REWARD_ADAPTER_PATH
)

model.to(device)
model.eval()

# Sync padding tokens
# =================================================================================================

model.resize_token_embeddings(len(tokenizer), mean_resizing=False)
model.config.pad_token_id = tokenizer.pad_token_id

`low_cpu_mem_usage` was None, now default to True since model is quantized.
Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B-Instruct and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
###################################################################################################
# DATASET
###################################################################################################

# Load dataset
# =================================================================================================

dataset = load_dataset(
    NORM_DATASET_PATH
)[NORM_DATASET_SPLIT]

dataset = dataset.select(range(1000))

# Apply chat template
# =================================================================================================

dataset = dataset.map(
    apply_chat_template,
    fn_kwargs = {
        "tokenizer": tokenizer,
        "columns_to_apply_to": ["messages"],
        "dataset_format": DatasetFormat.CONVERSATIONAL,
        "add_generation_prompt": False,
        "new_columns": ["chat"],
    },
    batched = True
)

# Tokenize
# =================================================================================================

dataset = dataset.map(
    tokenize,
    fn_kwargs = {
        "tokenizer": tokenizer,
        "columns_to_apply_to": ["chat"],
        "columns_for_ids": ["input_ids"],
        "columns_for_attn": ["attention_mask"]
    },
    batched=True
)

# Create dataloader
# =================================================================================================

dataset.set_format(type="torch", columns=["input_ids", "attention_mask"])
dataloader = DataLoader(
    dataset,
    batch_size=32,
    collate_fn=DataCollatorWithPadding(tokenizer),
    shuffle=False
)

In [6]:
###################################################################################################
# Normalization
###################################################################################################

reward_sum = 0
with torch.no_grad():
    for batch in tqdm(dataloader):
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)

        # Get model predictions
        logits = model(
            input_ids=input_ids, attention_mask=attention_mask
        ).logits
        reward_sum += torch.mean(logits).item()

mean_reward = reward_sum / len(dataloader)

print(f"Mean reward: {mean_reward}")

100%|██████████| 32/32 [01:04<00:00,  2.01s/it]

Mean reward: 0.5853118896484375





In [7]:
###################################################################################################
# Add bias to the head and save
###################################################################################################

# Set bias
# =================================================================================================

set_bias(
    model, 
    layer_path="score.modules_to_save.default",
    bias = -mean_reward
)

# Remove pad token and save
# =================================================================================================

model.resize_token_embeddings(len(tokenizer) - 1)

print(f"New bias: {model.score.modules_to_save.default.bias.data}")

model.push_to_hub(NORM_ADAPTER_PATH)
tokenizer.push_to_hub(NORM_ADAPTER_PATH)


New bias: tensor([-0.5859], device='cuda:0', dtype=torch.bfloat16)


No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/RLHF-And-Friends/RM-UltrafeedbackBinarized-Llama-3.2-1B-Instruct-Q4-LoRA8-Batch-16-Tok-1024-Normalized/commit/708343673ea1773f1f7ddd371ee3db1186d318ac', commit_message='Upload tokenizer', commit_description='', oid='708343673ea1773f1f7ddd371ee3db1186d318ac', pr_url=None, repo_url=RepoUrl('https://huggingface.co/RLHF-And-Friends/RM-UltrafeedbackBinarized-Llama-3.2-1B-Instruct-Q4-LoRA8-Batch-16-Tok-1024-Normalized', endpoint='https://huggingface.co', repo_type='model', repo_id='RLHF-And-Friends/RM-UltrafeedbackBinarized-Llama-3.2-1B-Instruct-Q4-LoRA8-Batch-16-Tok-1024-Normalized'), pr_revision=None, pr_num=None)