In [1]:
%pip install transformers trl wandb

Successfully installed GitPython-3.1.31 accelerate-0.18.0 aiohttp-3.8.4 aiosignal-1.3.1 async-timeout-4.0.2 datasets-2.11.0 dill-0.3.6 docker-pycreds-0.4.0 frozenlist-1.3.3 gitdb-4.0.10 huggingface-hub-0.13.4 multidict-6.0.4 multiprocess-0.70.14 pathtools-0.1.2 responses-0.18.0 sentry-sdk-1.20.0 setproctitle-1.3.2 smmap-5.0.0 tokenizers-0.13.3 transformers-4.28.1 trl-0.4.1 wandb-0.15.0 xxhash-3.2.0 yarl-1.8.2


In [2]:
import torch
from tqdm import tqdm
import pandas as pd

tqdm.pandas()

from transformers import AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset

from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead, set_seed

## Dataset and Data Collator

In [3]:
from data.data_utils import build_dataset, collator
from training_utils.freeze import freeze_model

In [4]:
config = PPOConfig(
    model_name="Myashka/125M_GPTneo_sft_tuned",
    steps=20000,
    learning_rate=1.41e-5,
    log_with="wandb",
    batch_size=4,
    optimize_cuda_cache=True,
    gradient_accumulation_steps=1,
    tracker_project_name='CQA_RLHF_v2',
    max_grad_norm=None,
    ppo_epochs=4,  # default
    seed=42,
)

data_config = dict(data_file_path='/content/drive/MyDrive/Colab Notebooks/vkr_data/data/1.0-data-div-ans-sep-api-usage.json',
                   max_length_promt=512,
                   max_length=1024,
                   truncate_promt=True,
                   padding=False)

freeze_config = dict(
    do_compute_metrics=True,
    freeze_emb=True,
    freeze_ln=False,
    freeze_attn=True,
    freeze_ff=True,
    freeze_other=True,
    layers_not_to_freeze=[0, 11],
)

reward_config = {
    'batch_size': 16,
}

save_config = {
    'save_interval': 15
}

In [None]:
dataset = build_dataset(config, data_config, ['train'])[0]

## Load pre-trained GPT Neo LLM

In [7]:
model = AutoModelForCausalLMWithValueHead.from_pretrained(config.model_name)
ref_model = AutoModelForCausalLMWithValueHead.from_pretrained(config.model_name)
tokenizer = AutoTokenizer.from_pretrained(config.model_name)

model = freeze_model(model, freeze_config)

tokenizer.pad_token = tokenizer.eos_token

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/551M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/119 [00:00<?, ?B/s]

In [8]:
set_seed(config.seed)

## Initialize PPOTrainer

In [9]:
ppo_trainer = PPOTrainer(config, model, ref_model, tokenizer, dataset=dataset, data_collator=collator, num_shared_layers=None)

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [None]:
from reward_pipelines.regression_reward import Reward_pipeline

In [14]:
device = ppo_trainer.accelerator.device
if ppo_trainer.accelerator.num_processes == 1:
    device = 0 if torch.cuda.is_available() else "cpu"  # to avoid a `pipeline` bug

reward_pipe = Reward_pipeline("Myashka/125M_GPTneo_reward_gen", device)

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.22k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/551M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/722 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

In [None]:
import gc

In [None]:
generation_kwargs = {
    "min_length": -1,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
    "pad_token_id": tokenizer.eos_token_id,
    "max_new_tokens": 512,
}


for epoch, batch in tqdm(enumerate(ppo_trainer.dataloader)):
    query_tensors = batch["input_ids"]
    response_tensors = []

    for query in query_tensors:
        response = ppo_trainer.generate(query, **generation_kwargs)
        response_tensors.append(response.squeeze())
    # batch["question_answer"] = tokenizer.batch_decode(response_tensors, skip_special_tokens=True)
    batch["query"] = [tokenizer.decode(query_idx, skip_special_tokens=True) for query_idx in batch["input_ids"]]
    batch["response"] = [tokenizer.decode(r.squeeze()[len(query_idx):], skip_special_tokens=True) for r, query_idx in zip(response_tensors, batch["input_ids"])]

    #### Compute sentiment score
    texts = [q + r for q, r in zip(batch["query"], batch["response"])]
    rewards = reward_pipe(texts, reward_config['batch_size'])

    #### Run PPO step
    stats = ppo_trainer.step(query_tensors, response_tensors, rewards)
    ppo_trainer.log_stats(stats, batch, rewards)

    if epoch + 1 % save_config['save_interval'] == 0:
        model.save_pretrained(f'ckpts_ppo/ppo_{epoch}.ckpt')

0it [00:00, ?it/s]You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
7it [03:28, 26.68s/it]Exception ignored in: <function _xla_gc_callback at 0x7fda3c8af160>
Traceback (most recent call last):
  File "/usr/local/lib/python3.9/dist-packages/jax/_src/lib/__init__.py", line 97, in _xla_gc_callback
    def _xla_gc_callback(*args):
KeyboardInterrupt: 
8it [03:51, 25.69s/it]