In [1]:
!pip install transformers[torch]
!pip install evaluate

Installing collected packages: safetensors, huggingface-hub, tokenizers, accelerate, transformers
Successfully installed accelerate-0.24.1 huggingface-hub-0.17.3 safetensors-0.4.0 tokenizers-0.14.1 transformers-4.35.0
Collecting evaluate
  Downloading evaluate-0.4.1-py3-none-any.whl (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets>=2.0.0 (from evaluate)
  Downloading datasets-2.14.6-py3-none-any.whl (493 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m493.7/493.7 kB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
Collecting dill (from evaluate)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from evaluate)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [1]:
from datasets import Dataset, DatasetDict
import pandas as pd
from transformers import (AutoModelForSequenceClassification, Trainer, TrainingArguments)
import evaluate
import torch.nn as nn
import torch
import numpy as np
from utils import PROD_TOK, AUX_TOK

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load and save data

## Split data

In [None]:
#Load data
df = pd.read_hdf('human_data.h5', key='df')
df.head()

Unnamed: 0,Input_seq,Output,Type
0,"[5, 7, 4, 4, 16, 5, 5, 6, 5, 2, 17, 0, 13, 6, ...",[5],Piezas
1,"[1, 6, 8, 4, 1, 9, 11, 7, 2, 5, 8, 10, 1, 12, ...",[5],Productos
2,"[2, 1]",[3],Productos
3,"[9, 5, 6, 9, 11, 1, 2, 6, 8, 12, 0, 6, 7, 10, ...",[7],Productos


In [None]:
#We divide the dataset into products and pieces
df_prod = df.loc[df.Type=='Productos']
df_piez = df.loc[df.Type=='Piezas']

In [None]:
#Save splitted datasets for later use
df_prod.to_hdf('products_human.h5', key='df_prod', index=False)
df_piez.to_hdf('pieces_human.h5', key='df_piez', index=False)

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block0_values] [items->Index(['Input_seq', 'Output', 'Type'], dtype='object')]

  df_prod.to_hdf('products_human.h5', key='df_prod', index=False)
your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block0_values] [items->Index(['Input_seq', 'Output', 'Type'], dtype='object')]

  df_piez.to_hdf('pieces_human.h5', key='df_piez', index=False)


## Load data

In [3]:
#Load splitted datasets
df_prod = pd.read_hdf('products_human.h5', key='df_prod')
df_piez = pd.read_hdf('pieces_human.h5', key='df_piez')

# Load and build model

In [27]:
#We define a custom model as our reward model
class RewardModel (torch.nn.Module):
    def __init__(self, model_name, hidden_size,*model_args, **kwargs):
        super().__init__()
        self.base = AutoModelForSequenceClassification.from_pretrained(model_name, *model_args, **kwargs)

        #Last layer must be changed for it to be a regression problem
        self.base.classifier =  torch.nn.Linear(in_features=hidden_size, out_features=1, bias=True)
        self = self.base

    #Our custom model should take as input a pair of right/wrong answers with a fixed sequence
    def forward(self, input_ids, attention_mask=None,
                token_type_ids=None, train:bool=False):
        #Whether the model is being trained or tested is important when inferencing
        if train:
            #The input_ids are structured in a way in which the first chunk corresponds to right examples and the second one to wrong ones
            input_right, input_wrong  = input_ids[0][0], input_ids[0][1]
            out_right = self.base(input_ids=input_right, attention_mask=attention_mask, token_type_ids=token_type_ids)
            out_wrong = self.base(input_ids=input_wrong, attention_mask=attention_mask, token_type_ids=token_type_ids)
            return [out_right, out_wrong]
        else:
            return self.base(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)

In [28]:
#We initialize our custom reward model
rm = RewardModel("VCNC/Auto-CNC", hidden_size=768).to(device)

In [35]:
#This is an example of how to inference the model with batches of data
input = torch.tensor([[[[2, 2, 1, 3, 4, 1], [3, 2, 1, 3, 4, 4]], [[2, 2, 1, 3, 4, 3], [3, 2, 1, 3, 4, 1]]]]).to(device)
token_type = torch.tensor([[0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 1]]).to(device)
rm(input, token_type_ids=token_type, train=True)

[SequenceClassifierOutput(loss=None, logits=tensor([[-0.1886],
         [ 0.3157]], device='cuda:0', grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None),
 SequenceClassifierOutput(loss=None, logits=tensor([[-0.1065],
         [ 0.2722]], device='cuda:0', grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)]

In [36]:
#We can see that the results do not vary, indicating that our functions are correct
input = torch.tensor([[2, 2, 1, 3, 4, 1], [3, 2, 1, 3, 4, 4]]).to(device)
token_type = torch.tensor([[0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 1]]).to(device)
rm(input, token_type_ids=token_type, train=False)

SequenceClassifierOutput(loss=None, logits=tensor([[-0.1886],
        [ 0.3157]], device='cuda:0', grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

# Preprocess data

In [10]:
#In this case we will use the product dataset
data = df_prod.to_numpy()
keys = np.unique(np.array(list(PROD_TOK.values()))) #Get unique tokens only
dataset = np.zeros((len(data)*(len(keys)-1), 3), dtype=object) #The number of pairs is the number of examples at the beginning times 1 minus keys

#Iterate through the dataset to generate sets (correct, wrong, input)
for i in range(len(data)):
    internal_cont = 0
    for j in range(len(keys)):
        if j != data[i, 1][0]:
            dataset[i*(len(keys)-1)+internal_cont, 0] = data[i, 1][0]
            dataset[i*(len(keys)-1)+internal_cont, 1] = j
            dataset[i*(len(keys)-1)+internal_cont, 2] = data[i, 0]
            internal_cont += 1
dataset

array([[5, 0,
        list([1, 6, 8, 4, 1, 9, 11, 7, 2, 5, 8, 10, 1, 12, 7, 12, 6, 6, 11, 8, 3, 12, 7, 9, 11, 9, 5])],
       [5, 1,
        list([1, 6, 8, 4, 1, 9, 11, 7, 2, 5, 8, 10, 1, 12, 7, 12, 6, 6, 11, 8, 3, 12, 7, 9, 11, 9, 5])],
       [5, 2,
        list([1, 6, 8, 4, 1, 9, 11, 7, 2, 5, 8, 10, 1, 12, 7, 12, 6, 6, 11, 8, 3, 12, 7, 9, 11, 9, 5])],
       [5, 3,
        list([1, 6, 8, 4, 1, 9, 11, 7, 2, 5, 8, 10, 1, 12, 7, 12, 6, 6, 11, 8, 3, 12, 7, 9, 11, 9, 5])],
       [5, 4,
        list([1, 6, 8, 4, 1, 9, 11, 7, 2, 5, 8, 10, 1, 12, 7, 12, 6, 6, 11, 8, 3, 12, 7, 9, 11, 9, 5])],
       [5, 6,
        list([1, 6, 8, 4, 1, 9, 11, 7, 2, 5, 8, 10, 1, 12, 7, 12, 6, 6, 11, 8, 3, 12, 7, 9, 11, 9, 5])],
       [5, 7,
        list([1, 6, 8, 4, 1, 9, 11, 7, 2, 5, 8, 10, 1, 12, 7, 12, 6, 6, 11, 8, 3, 12, 7, 9, 11, 9, 5])],
       [5, 8,
        list([1, 6, 8, 4, 1, 9, 11, 7, 2, 5, 8, 10, 1, 12, 7, 12, 6, 6, 11, 8, 3, 12, 7, 9, 11, 9, 5])],
       [5, 9,
        list([1, 6, 8, 4, 1, 9, 11

## Data formatting and train/test splitting

In [11]:
MAX_LEN = max([len(i) for i in dataset[:, 2]])
PAD = 13

input_ids = []
attention_mask = []
token_type_ids = []
train = []
for i in range(len(dataset)):
    pad_len = MAX_LEN-len(dataset[:, 2][i])
    right_seq = dataset[:, 2][i]+[dataset[i, 0]]+[PAD]*pad_len
    wrong_seq = dataset[:, 2][i]+[dataset[i, 1]]+[PAD]*pad_len
    input_ids.append([right_seq, wrong_seq]) #The input_ids is a concatenation of the right and wrong example
    attention_mask.append([1]*(len(dataset[:, 2][i])+1)+[0]*pad_len)
    token_type_ids.append([0]*len(dataset[:, 2][i])+[1]*(pad_len+1)) #The +1 is because the output selected by the human changes the token type
    train.append(True) #In RLHF every example is given as a training example REVISAR!!!
input_ids[0], attention_mask[0], token_type_ids[0], train[0] #Get first element

([[1,
   6,
   8,
   4,
   1,
   9,
   11,
   7,
   2,
   5,
   8,
   10,
   1,
   12,
   7,
   12,
   6,
   6,
   11,
   8,
   3,
   12,
   7,
   9,
   11,
   9,
   5,
   5],
  [1,
   6,
   8,
   4,
   1,
   9,
   11,
   7,
   2,
   5,
   8,
   10,
   1,
   12,
   7,
   12,
   6,
   6,
   11,
   8,
   3,
   12,
   7,
   9,
   11,
   9,
   5,
   0]],
 [1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1],
 [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1],
 True)

In [12]:
#We create a df to store the data properly
data = pd.DataFrame(data={'input_ids':input_ids, 'attention_mask':attention_mask, 'token_type_ids':token_type_ids, 'train':train})
data.head()

Unnamed: 0,input_ids,attention_mask,token_type_ids,train
0,"[[1, 6, 8, 4, 1, 9, 11, 7, 2, 5, 8, 10, 1, 12,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",True
1,"[[1, 6, 8, 4, 1, 9, 11, 7, 2, 5, 8, 10, 1, 12,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",True
2,"[[1, 6, 8, 4, 1, 9, 11, 7, 2, 5, 8, 10, 1, 12,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",True
3,"[[1, 6, 8, 4, 1, 9, 11, 7, 2, 5, 8, 10, 1, 12,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",True
4,"[[1, 6, 8, 4, 1, 9, 11, 7, 2, 5, 8, 10, 1, 12,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",True


## DatasetDict creation

In [13]:
#We turn our datasets into DatasetDicts
train = Dataset.from_pandas(data.loc[data.train == True].reset_index(drop=True))
test = Dataset.from_pandas(data.loc[data.train == False].reset_index(drop=True))

ds = DatasetDict()

ds['train'] = train
ds['validation'] = test

ds

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'token_type_ids', 'train'],
        num_rows: 36
    })
    validation: Dataset({
        features: ['input_ids', 'attention_mask', 'token_type_ids', 'train'],
        num_rows: 0
    })
})

# Model training

In [22]:
#We redefine a custom loss that maximizes the difference between the rewards of positive and negative examples
def custom_loss(out_right:torch.tensor, out_wrong:torch.tensor, return_outputs:bool=False):
    #The idea behind the equations is better explained in the following article: https://medium.com/towards-generative-ai/reward-model-training-2209d1befb5f
    diff = out_right-out_wrong
    loss = torch.log(torch.sigmoid(out_right-out_wrong))
    return (loss, diff) if return_outputs else loss

In [29]:
#Finally, we train the model
rm.train()

RewardModel(
  (base): BertForSequenceClassification(
    (bert): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): Embedding(14, 768, padding_idx=13)
        (position_embeddings): Embedding(1024, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0-11): 12 x BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): Linear(in_features=768, out_features=768, bias

In [30]:
inputs = train[0]
input_ids = torch.tensor([[[inputs['input_ids'][0]], [inputs['input_ids'][1]]]]).to(device)
attention_mask = torch.tensor([inputs['attention_mask']]).to(device)
token_type_ids = torch.tensor([inputs['token_type_ids']]).to(device)
out = rm(input_ids = input_ids, attention_mask = attention_mask, token_type_ids = token_type_ids, train = True)
out

[SequenceClassifierOutput(loss=None, logits=tensor([[0.5079]], device='cuda:0', grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None),
 SequenceClassifierOutput(loss=None, logits=tensor([[0.3505]], device='cuda:0', grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)]

In [31]:
optimizer = torch.optim.Adam(params=rm.parameters(), lr=3e-05)

In [32]:
for i in range(1000):
  optimizer.zero_grad()
  out = rm(input_ids = input_ids, attention_mask = attention_mask, token_type_ids = token_type_ids, train = True)
  loss = custom_loss(out[0].logits, out[1].logits)
  loss.backward()
  optimizer.step()
rm(input_ids = input_ids, attention_mask = attention_mask, token_type_ids = token_type_ids, train = True)

[SequenceClassifierOutput(loss=None, logits=tensor([[-37.5409]], device='cuda:0', grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None),
 SequenceClassifierOutput(loss=None, logits=tensor([[38.2755]], device='cuda:0', grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)]

In [41]:
loss

tensor([[0.0010]], device='cuda:0', grad_fn=<MulBackward0>)