## Train Reward Model as Coherence Level Classifier
___

#### Library


In [14]:
import os
import pandas as pd
import numpy as np

from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import DataCollatorWithPadding
from transformers import TrainingArguments, Trainer
import evaluate

#### Add coherence scores
___
* 0 = Least coherent
* 1 = Moderately coherent
* 2 = Mostly coherent

In [3]:
df = pd.read_csv('../datasets/gpt2_sample_responses.csv')
df.head()

Unnamed: 0,prompt,response
0,How is your day so far?,"How is your day so far? "" \n = = Background = ..."
1,The greatest strength of AI nowadays is,The greatest strength of AI nowadays is the us...
2,I bought a ticket to Paris,I bought a ticket to Paris to take part in the...
3,There is so much fun in having a roller coaste...,There is so much fun in having a roller coaste...
4,"Every time I go the restaurant, the place is","Every time I go the restaurant, the place is v..."


In [8]:
# Iterate over each row in the dataframe, and gather human response
scores = []
for i, j in df.iterrows():
    cmd_lines = f"prompt: {j['prompt']}\n\nresponse:{j['response']}"
    score = input(cmd_lines)
    scores.append(score)


In [13]:
# type-cast scores to integers
scores = [int(score) for score in scores]

df['score'] = scores
df.head()

Unnamed: 0,prompt,response,score
0,How is your day so far?,"How is your day so far? "" \n = = Background = ...",0
1,The greatest strength of AI nowadays is,The greatest strength of AI nowadays is the us...,2
2,I bought a ticket to Paris,I bought a ticket to Paris to take part in the...,2
3,There is so much fun in having a roller coaste...,There is so much fun in having a roller coaste...,1
4,"Every time I go the restaurant, the place is","Every time I go the restaurant, the place is v...",1


In [15]:
# Save the scores together with the prompt and response
df.to_csv('../datasets/gpt2_sample_response.csv', index = False)

#### Load and preprocess dataset
___

In [4]:
## Load datasets from CSV
dataset = load_dataset(path="csv", name="gpt2_sample_response", data_dir = '../datasets', data_files = 'gpt2_sample_response.csv')
dataset

Found cached dataset csv (/home/kccheng1988/.cache/huggingface/datasets/csv/gpt2_sample_response-675f075b2650ebe9/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d)


  0%|          | 0/1 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['prompt', 'response', 'score'],
        num_rows: 26
    })
})

In [23]:
dataset['train'][0]

{'prompt': 'How is your day so far?',
 'response': 'How is your day so far? " \n = = Background = = \n On April 19, 2009, a visit to the City of New York City came as a shock to',
 'score': 0}

In [5]:
## Load tokenizer
tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')
max_input_length = tokenizer.max_model_input_sizes['distilbert-base-cased']
print('Max input length: ', max_input_length)


Max input length:  512


In [24]:
## Define preprocessing steps 

# 1. Concatenate prompt and response into a single text column
def concatenate_prompt_response(sample):
    return {'text' : f"Prompt: {sample['prompt']} \n\n Response: {sample['response']}"}

# 2. Tokenize concatenated text
def tokenize_text(sample):
    output = tokenizer(
        sample['text'],
        truncation = True,
        max_length = max_input_length
    )

    return output

dataset_proc = dataset.map(concatenate_prompt_response)
dataset_proc = dataset_proc.map(tokenize_text)
dataset_proc = dataset_proc.remove_columns(['prompt', 'response'])
dataset_proc = dataset_proc.rename_columns({'score': 'label'})

Loading cached processed dataset at /home/kccheng1988/.cache/huggingface/datasets/csv/gpt2_sample_response-675f075b2650ebe9/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d/cache-11b2cf245df8d875.arrow


Map:   0%|          | 0/26 [00:00<?, ? examples/s]

In [25]:
dataset_proc['train']

Dataset({
    features: ['label', 'text', 'input_ids', 'attention_mask'],
    num_rows: 26
})

In [26]:
## Set up data collator that could handle automated batch padding
data_collator  = DataCollatorWithPadding(tokenizer = tokenizer)

#### Training the model
___

In [27]:
## 1. Set up mapping betwee binary label and coherence label
id2label = {0 : "least", 1 : "medium", 2 : "mostly"}
label2id = {"least" : 0, "medium" : 1, "mostly" : 2}

In [9]:
## 2. Set up DistilBert model
model = AutoModelForSequenceClassification.from_pretrained(
    "distilbert-base-uncased",
    num_labels = 3,
    id2label = id2label,
    label2id = label2id
)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'pre_classifier.bias', 'pre_classifier.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [28]:
## 3. Set up training arguments
# Requires accelerate>=0.20.1
# !pip install accelerate -U

training_args = TrainingArguments(
    output_dir = "../models/coherence-classifier",
    do_eval = False,
    learning_rate = 2e-5,
    per_device_train_batch_size = 2,
    num_train_epochs = 5,
    weight_decay = 0.01,
    save_strategy = 'epoch',
    push_to_hub = False
)

In [29]:
## 4. Set up evaluation metrics

def compute_metrics(eval_pred):
    accuracy = evaluate.load('accuracy')

    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis = 1)
    return accuracy.compute(predictions = predictions, references = labels)

In [30]:
## 5. Set up trainer
trainer = Trainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset_proc['train'],
    data_collator = data_collator,
    compute_metrics = compute_metrics
)

In [31]:
## 6. Kickstart training
trainer.train()



MlflowException: Changing param values is not allowed. Param with key='logging_dir' was already logged with value='tmp_trainer/runs/Jul27_09-58-44_kccheng1988' for run ID='49e94028fb2144d181a78ca9e06c5ce5'. Attempted logging new value 'tmp_trainer/runs/Jul27_10-02-10_kccheng1988'.