In [None]:
%pip install datasets
%pip install transformers



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import numpy as np
import torch
import torch.nn as nn

from transformers import GPT2Tokenizer, GPT2ForSequenceClassification, GPT2Config
from datasets import load_dataset

from torch.utils.data import Dataset, DataLoader
from tqdm.auto import tqdm

device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
tokenizer.add_special_tokens({'pad_token': '<|endoftext|>'})
tokenizer.all_special_tokens

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


['<|endoftext|>']

In [None]:
dataset = load_dataset("openai/webgpt_comparisons")

In [None]:
BATCH_SIZE=10
MAX_LENGTH = 1024

In [None]:
def generate_dataset(examples):
    examples["q"] = tokenizer(
                                examples["question"]["full_text"],
                                truncation=False,
                                max_length=MAX_LENGTH,
                                add_special_tokens=False
                              )
    examples["r0"] = examples["tokens_0"]['completion']
    examples["r1"] = examples["tokens_1"]['completion']

    return examples

In [None]:
class CustomImageDataset(Dataset):
    def __init__(self, tokenizer, dataset, train=True):

      self.train=train
      self.qr0 = [f"<|endoftext|> Q: {q['question']['full_text']} A: {q['answer_0']} <|endoftext|>" for q in dataset["train"]]
      self.qr1 = [f"<|endoftext|> Q: {q['question']['full_text']} A: {q['answer_1']} <|endoftext|>" for q in dataset["train"]]

      self.score_0 = dataset["train"]["score_0"]
      self.score_1 = dataset["train"]["score_1"]

      self.tokenizer = tokenizer


    def __len__(self):
        return len(self.score_0)

    def __getitem__(self, idx):
      input_0 = self.tokenizer(self.qr0[idx], max_length=MAX_LENGTH, truncation=True, padding='max_length', return_tensors="pt")
      input_1 = self.tokenizer(self.qr1[idx], max_length=MAX_LENGTH, truncation=True, padding='max_length', return_tensors="pt")

      score_0 = self.score_0[idx]
      score_1 = self.score_1[idx]

      if self.train:
        score_0 += np.random.choice([-0.01, 0.01], 1, p=[0.5, 0.5])[0] ###fare softmax e poi sampling

      if score_0 > score_1:
        return input_0, input_1

      return input_1, input_0

In [None]:
model = GPT2ForSequenceClassification.from_pretrained("distilgpt2")
model.config.pad_token_id = tokenizer.pad_token_id

model.transformer.wte.requires_grad_(False)
model.transformer.wpe.requires_grad_(False)

for i in range(3):
   model.transformer.h[i].requires_grad_(False)

model = model.to(device)

print(f"Total number of parameters: {np.sum([int(np.prod(p.shape)) for p in model.parameters()])}")
print(f"Total number of trainable parameters: {np.sum([int(np.prod(p.shape)) for p in model.parameters() if p.requires_grad])}")

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at distilgpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Total number of parameters: 81913344
Total number of trainable parameters: 21265920


In [None]:
train_dataset = CustomImageDataset(tokenizer, dataset)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=16, drop_last=False)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr = 1e-4)



In [None]:
for epoch in range(2):
  train_loss = 0
  count = 0
  for input_best, input_low in tqdm(train_loader):
    input_best = input_best.to(device)
    input_low = input_low.to(device)

    input_best['input_ids'] = input_best['input_ids'].squeeze(1)
    input_best['attention_mask'] = input_best['attention_mask'].squeeze(1)

    input_low['input_ids'] = input_low['input_ids'].squeeze(1)
    input_low['attention_mask'] = input_low['attention_mask'].squeeze(1)

    batch_size = input_low['input_ids'].shape[0]

    labels = torch.tensor([1]*batch_size).float().unsqueeze(-1).to(device)

    optimizer.zero_grad()

    with torch.autocast(device_type=device, dtype=torch.float16):
      reward_best = model(**input_best).logits
      reward_low = model(**input_low).logits

      loss = criterion( reward_best - reward_low, labels )
    loss.backward()
    optimizer.step()
    train_loss += loss.item()
    count+=1
    if count%100==0:
      print(count, train_loss/count)
  print("Storing model...")
  model.save_pretrained(f"/content/drive/MyDrive/gpt2/reward_models/2024-10-27/epoch-{epoch}/")



  0%|          | 0/1958 [00:01<?, ?it/s]

100 0.6777513921260834
200 0.6761372540891171
300 0.6709148994088173
400 0.6715238008648157
500 0.6721301388144493
600 0.6719181104004384
700 0.6717726923738208
800 0.6716922996193171
900 0.6707324965794881
1000 0.6712707747817039
1100 0.6713505442575974
1200 0.6721167880793413
1300 0.6724838161468506
1400 0.6724848913720676
1500 0.6721203764677047
1600 0.6720155416056514
1700 0.672101587583037
1800 0.6725147932105594
1900 0.6723836236564736
Storing model...


  0%|          | 0/1958 [00:00<?, ?it/s]

100 0.6729516768455506
200 0.6687840567529202
300 0.6643349075317383
400 0.6611160923540592
500 0.6626601431965828
600 0.6645039072136084
700 0.6658826577237674
800 0.6672071920707822
900 0.6671050407489141
1000 0.6677918983101845
1100 0.668202352903106
1200 0.668158363699913
1300 0.6683643845411448
1400 0.668506444309439
1500 0.6680428981781006
1600 0.6679200828075409
1700 0.6678221981840975
1800 0.6674345740675927
1900 0.6674121374989811
Storing model...


In [None]:
model = GPT2ForSequenceClassification.from_pretrained("/content/drive/MyDrive/gpt2/reward_models/2024-10-27/15-47/checkpoint-0/").to(device)