In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Reward Model

In [None]:
!pip install transformers==4.40.2
!pip install datasets==2.19.1
!pip install accelerate==0.30.1
!pip install trl==0.8.6

Collecting transformers==4.40.2
  Downloading transformers-4.40.2-py3-none-any.whl (9.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.0/9.0 MB[0m [31m25.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.41.0
    Uninstalling transformers-4.41.0:
      Successfully uninstalled transformers-4.41.0
Successfully installed transformers-4.40.2
Collecting datasets==2.19.1
  Downloading datasets-2.19.1-py3-none-any.whl (542 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets==2.19.1)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from datasets==2.19.1)
  Downloading xxhash-3.4.1-cp310-cp310-manylinu

In [None]:
import torch
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments, DataCollatorForSeq2Seq

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
rm_tokenizer = AutoTokenizer.from_pretrained("./drive/MyDrive/Colab/T5-jeju/data/saved_models/roberta_rm")
rm_model = AutoModelForSequenceClassification.from_pretrained("./drive/MyDrive/Colab/T5-jeju/data/saved_models/roberta_rm", num_labels=2).to(device)

PPO

In [None]:
import torch
from tqdm import tqdm
import pandas as pd

tqdm.pandas()

from transformers import pipeline, AutoTokenizer, DataCollatorForSeq2Seq
from datasets import load_dataset

from trl import PPOTrainer, PPOConfig, AutoModelForSeq2SeqLMWithValueHead
from trl.core import LengthSampler
import multiprocessing

In [None]:
file_path = '/content/drive/My Drive/Colab/T5-jeju/data/filtered_train_ppo_2.tsv'

train_df = pd.read_csv(file_path, delimiter='\t', on_bad_lines='skip')

num_train = 50000
num_valid = 10000
sliced_train_df = train_df.iloc[:num_train]
sliced_valid_df = train_df.iloc[num_train:num_train+num_valid]
sliced_train_df.to_csv("sliced_train.tsv", sep='\t', index=False)
sliced_valid_df.to_csv("sliced_valid.tsv", sep='\t', index=False)

data_files = {"train": "sliced_train.tsv", "valid": "sliced_valid.tsv"}
train_dataset =  load_dataset("csv", data_files=data_files, delimiter="\t")

In [None]:
max_token_length = 64
NUM_CPU = multiprocessing.cpu_count()

In [None]:
def convert_examples_to_features(examples):
    model_inputs = tokenizer(examples['dialect_form'],
                             text_target=examples['standard_form'],
                             max_length=max_token_length, truncation=True)

    return model_inputs

In [None]:
tokenizer = AutoTokenizer.from_pretrained("/content/drive/MyDrive/Colab/T5-jeju/data/saved_models/mixed_jeju/results")

tokenizer.pad_token = tokenizer.eos_token

tokenized_datasets = train_dataset.map(convert_examples_to_features,
                                 batched=True,
                                 remove_columns=train_dataset["train"].column_names,
                                 num_proc=NUM_CPU)

In [None]:
model_name = "/content/drive/MyDrive/Colab/T5-jeju/data/saved_models/mixed_jeju/results"
config = PPOConfig(
    model_name=model_name,
    learning_rate=1.41e-5,
    batch_size=64,
    mini_batch_size=64,
    ppo_epochs=1
)

In [None]:
model = AutoModelForSeq2SeqLMWithValueHead.from_pretrained(config.model_name).to(device)
ref_model = AutoModelForSeq2SeqLMWithValueHead.from_pretrained(config.model_name).to(device)
tokenizer = AutoTokenizer.from_pretrained(config.model_name)

In [None]:
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

In [None]:
ppo_trainer = PPOTrainer(config, model, ref_model, tokenizer, dataset=tokenized_datasets['train'], data_collator=data_collator)

In [None]:
import re
import torch

In [None]:
sen = ["옛날은 약이 없으니 머리 아프다고 약 먹고 할 수 있었니", "옛날은 약이 없으니 머리 아프다고 약 먹고 했어요"]
tok = rm_tokenizer(sen, padding=True, truncation=True, return_tensors="pt")
tok = {k: v.to(device) for k, v in tok.items()}
output = rm_model(**tok).logits
output2 = [torch.tensor([i]) for i in output[:, 1]]
print(output2)

[tensor([3.9531]), tensor([3.9553])]


In [None]:
generation_kwargs = {
    "min_length": -1,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
    "pad_token_id": tokenizer.eos_token_id,
}

In [None]:
output_min_length = 4
output_max_length = 16
output_length_sampler = LengthSampler(output_min_length, output_max_length)


pattern = r'<pad>|</s>|<unk>'

for epoch, batch in tqdm(enumerate(ppo_trainer.dataloader), total=len(ppo_trainer.dataloader)):
    query_tensors = batch["input_ids"]

    response_tensors = []
    response_tensors2 = []
    for query in query_tensors:
        gen_len = output_length_sampler()
        generation_kwargs["max_new_tokens"] = gen_len
        response = ppo_trainer.generate(query, **generation_kwargs)
        response_tensors.append(response.squeeze()[-gen_len:])
    batch["response"] = [re.sub(pattern, '', tokenizer.decode(r.squeeze(), skip_special_tokens=True)) for r in response_tensors]


    tok = rm_tokenizer(batch["response"], padding=True, truncation=True, return_tensors="pt")
    tok = {k: v.to(device) for k, v in tok.items()}
    with torch.no_grad():
      output = rm_model(**tok).logits
    rewards = [torch.tensor([i]) for i in output[:, 1]]

    temp_q = list(torch.unbind(query_tensors))
    q_t = [q for q in temp_q]

    stats = ppo_trainer.step(q_t, response_tensors, rewards)
    ppo_trainer.log_stats(stats, batch, rewards)

In [None]:
ppo_trainer.save_pretrained("./drive/MyDrive/Colab/T5-jeju/data/saved_models/rlhf_base2")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
torch.cuda.empty_cache()