# Imports

In [1]:
import inspect
import os
import random
import sys

import numpy as np
import torch

Add repository to system path to be able to import local modules (there may be a better way to do this)

In [2]:
module_path = os.path.abspath(os.path.join('../src'))
if module_path not in sys.path:
    sys.path.append(module_path) 


# SET SEED

In [4]:
from utils import set_seed
set_seed(62)

# CONSTANTS

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
TRUE_LABEL_STR = "True"
FALSE_LABEL_STR = "False"

# SETUP

## Models

In [6]:
id2label = {0: FALSE_LABEL_STR, 1: TRUE_LABEL_STR}
label2id = {FALSE_LABEL_STR: 0, TRUE_LABEL_STR: 1}

In [22]:
judge_trained_with_peft = False

In [21]:
from transformers import GPT2Tokenizer, GPT2ForSequenceClassification
from peft import PeftModel


tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
judge = GPT2ForSequenceClassification.from_pretrained(
    'gpt2', num_labels=2, id2label=id2label, label2id=label2id,
    torch_dtype=torch.bfloat16
)

# Load PEFT
if judge_trained_with_peft:
    model_dir = "../models/"  # This directory must contain: adapter_config.json, adapter_model.bin
    judge = PeftModel.from_pretrained(judge, model_id=model_dir)
    judge = judge.merge_and_unload()
else:
    model_dir = "../models/"  # This directory must contain: config.json, pytorch_model.bin
    judge = GPT2ForSequenceClassification.from_pretrained(
        model_dir, num_labels=2, id2label=id2label, label2id=label2id,
        torch_dtype=torch.bfloat16
    )

tokenizer.add_special_tokens({"pad_token": "<PAD>"})
judge.config.pad_token_id = tokenizer.pad_token_id
judge.resize_token_embeddings(len(tokenizer))

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Embedding(50258, 768)

## Prompts

This will only work if you have downloaded and preprocessed the data, e.g using `make data`

In [3]:
train_prompts, eval_prompts = get_question_prompts()

## RLHF Config

In [7]:
trl_config = TRLConfig(
        train=TrainConfig(
            seq_length=1024,
            epochs=1,  # 100,
            total_steps=1,  # 10000,
            batch_size=1,
            checkpoint_interval=100,  # 10000,
            eval_interval=100,
            pipeline="PromptPipeline",
            trainer="AcceleratePPOTrainer",
            project_name="rlhf-for-truthfulness"
        ),
        model=ModelConfig(model_path="facebook/opt-125m", num_layers_unfrozen=2),
        tokenizer=TokenizerConfig(tokenizer_path="facebook/opt-125m", truncation_side="right"),
        optimizer=OptimizerConfig(
            name="adamw", kwargs=dict(lr=3e-5, betas=(0.9, 0.95), eps=1.0e-8, weight_decay=1.0e-6)
        ),
        scheduler=SchedulerConfig(name="cosine_annealing", kwargs=dict(T_max=1e12, eta_min=3e-5)),
        method=PPOConfig(
            name="PPOConfig",
            num_rollouts=10,  # 128,
            chunk_size=10,  # 128,
            ppo_epochs=1,  # 4,
            init_kl_coef=0.001,
            target=None,  # type: ignore
            horizon=10000,
            gamma=1,
            lam=0.95,
            cliprange=0.2,
            cliprange_value=0.2,
            vf_coef=1,
            scale_reward="ignored",
            ref_mean=None,
            ref_std=None,
            cliprange_reward=10,
            gen_kwargs=dict(
                max_new_tokens=100,
                top_k=0,
                top_p=1.0,
                do_sample=True,
            ),
        ),
    )

# APPLYING RLHF

In [8]:
trainer = rlhf_with_judge(
    trl_config,
    train_prompts[:10],
    eval_prompts[:10],
    judge,
    tokenizer,
    truth_label=TRUE_LABEL_STR
)

Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.
[RANK 0] Initializing model: facebook/opt-125m
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mfelixahofstaetter[0m. Use [1m`wandb login --relogin`[0m to force relogin


[RANK 0] Starting training
[RANK 0] Collecting rollouts
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
[RANK 0] Evaluating model


[generation sweep 0/1 | eval batch 0/1]:   0%|          | 0/1 [00:00<?, ?it/s]

[RANK 0] Computing rewards
[RANK 0] Summarizing evaluation


  0%|          | 0/1 [00:00<?, ?it/s]

[RANK 0] Saving intermediate checkpoint into ckpts/checkpoint_1
[RANK 0] Evaluating model


[generation sweep 0/1 | eval batch 0/1]:   0%|          | 0/1 [00:00<?, ?it/s]

[RANK 0] Computing rewards
[RANK 0] Summarizing evaluation


[RANK 0] Saving the best state so far into ckpts/best_checkpoint
