In [1]:
import sys

sys.path.append("..")

import warnings

import pandas as pd
import torch
from datasets import load_dataset
from transformers import (
    DistilBertForSequenceClassification,
    DistilBertTokenizerFast,
)

from datasets import load_from_disk
import os

from warp.utils.misc import seed_everything
from warp.utils.data import df_self_product, prepare_reward_dataset
from warp.constants import DATASET_DIR, CONFIG_DIR, MODEL_DIR
from pathlib import Path

warnings.filterwarnings("ignore")

%load_ext autoreload
%autoreload 2

device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [2]:
model_name = Path(MODEL_DIR, "lvwerra/distilbert-imdb-cased")

reward_tokenizer = DistilBertTokenizerFast.from_pretrained(
    model_name, max_length=512
)
reward_model = DistilBertForSequenceClassification.from_pretrained(
    model_name
).to(device)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at /home/ivanov.dko/projects/test/rl/artifacts/models/lvwerra/distilbert-imdb-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
train, test = load_dataset("imdb", split=["train", "test"])
train, test = [pd.DataFrame(dataset) for dataset in [train, test]]
train

Unnamed: 0,text,label
0,I rented I AM CURIOUS-YELLOW from my video sto...,0
1,"""I Am Curious: Yellow"" is a risible and preten...",0
2,If only to avoid making this type of film in t...,0
3,This film was probably inspired by Godard's Ma...,0
4,"Oh, brother...after hearing about this ridicul...",0
...,...,...
24995,A hit at the time but now better categorised a...,1
24996,I love this movie like no other. Another time ...,1
24997,This film and it's sequel Barry Mckenzie holds...,1
24998,'The Adventures Of Barry McKenzie' started lif...,1


In [4]:
sets = (
    df_self_product(train, partition_col="label")
    .sample(1000)
    .rename({"text_0": "chosen", "text_1": "rejected"})
)

In [5]:
reward_dataset = prepare_reward_dataset(
    sets.to_dict(as_series=False), tokenizer=reward_tokenizer, verbose=False
)
reward_dataset = reward_dataset.train_test_split(test_size=0.2)


[32m2024-08-03 16:47:25.015[0m | [1mINFO    [0m | [36mwarp.utils.data[0m:[36mprepare_reward_dataset[0m:[36m123[0m - [1mStarting tokenizing `chosen`[0m
[32m2024-08-03 16:47:25.494[0m | [1mINFO    [0m | [36mwarp.utils.data[0m:[36mprepare_reward_dataset[0m:[36m123[0m - [1mStarting tokenizing `rejected`[0m


In [45]:
from peft import LoraConfig, TaskType, get_peft_model
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from trl import RewardTrainer, RewardConfig
from omegaconf import OmegaConf

import os

peft_params = OmegaConf.to_container(
    OmegaConf.load(Path(CONFIG_DIR, "reward/peft.yaml"))
)["peft"]
peft_config = LoraConfig(**peft_params)

reward_trainer_params = OmegaConf.to_container(
    OmegaConf.load(Path(CONFIG_DIR, "reward/trainer.yaml"))
)["trainer"]
reward_config = RewardConfig(**reward_trainer_params)

trainer = RewardTrainer(
    model=reward_model,
    args=reward_config,
    tokenizer=reward_tokenizer,
    train_dataset=reward_dataset["train"],
    eval_dataset=reward_dataset["test"],
    peft_config=peft_config,
)

trainer.train()

ValueError: Default process group has not been initialized, please make sure to call init_process_group.