# Fine Tune reward model from scratch

# TODOs:

#TODO: double-check that labels are not somehow misaligned...

#TODO: check if you need to plot 

1. LoRA learns the position of the low rank adaptation matrix that is needed to finetune a model of a much higher rank

#TODO: double check model performance, generate output, maybe adjust training metrics

## 1. Imports, setup, and global variables

In [25]:
import torch
import pandas as pd
import os
import sys
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Add the parent directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(os.getcwd()), '..')))

from transformers import TrainingArguments, EarlyStoppingCallback
from transformers import AutoTokenizer, AutoModelForSequenceClassification

from collections import Counter

from datasets import Dataset, DatasetDict, load_from_disk

from peft import LoraConfig, get_peft_model, PeftModel

from utils import parse_ratings, tokenize_fn_with_best_window, tokenize_fn_basic_batched, CustomRewardTrainer, find_best_window


# from nltk.tokenize import sent_tokenize

# load the relevant devices available on the server
os.environ["CUDA_VISIBLE_DEVICES"] = os.getenv("AVAILABLE_DEVICES")

# Enable expandable CUDA segments
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# load cuda
if torch.cuda.is_available():
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print("CUDA is available. Using GPU:", torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print("CUDA is not available. Using CPU.")

There are 1 GPU(s) available.
CUDA is available. Using GPU: NVIDIA L40S


In [2]:
# load training variables
FEEDBACK_TO_TRAIN_ON = os.getenv("FEEDBACK_TO_TRAIN_ON")
FEEDBACK_TO_REMOVE = os.getenv("FEEDBACK_TO_REMOVE")
MODEL = os.getenv("REWARD_MODEL")
DATASET = os.getenv("REWARD_DATASET")
TOKENIZE_FN = os.getenv("TOKENIZE_FN")
MAX_LENGTH = os.getenv("MAX_LENGTH")
STRIDE = os.getenv("STRIDE")
LORA_CHECKPOINTS_FOLDER = os.getenv("LORA_CHECKPOINTS_FOLDER")
FINAL_LORA_ADAPTERS = os.getenv("FINAL_LORA_ADAPTERS_FOLDER") + f"_{FEEDBACK_TO_TRAIN_ON}_{TOKENIZE_FN}_{DATASET}"
TOKENIZED_DATA = os.getenv("TOKENIZED_DATA") + f"_{FEEDBACK_TO_TRAIN_ON}_{TOKENIZE_FN}_{DATASET}"

# load training data
FILE_1 = os.getenv("FILE_1")
FILE_5 = os.getenv("FILE_5")
FILE_7 = os.getenv("FILE_7")
FILE_9 = os.getenv("FILE_9")
FILE_10_1 = os.getenv("FILE_10_1")
FILE_10_2 = os.getenv("FILE_10_2")
FILE_SYNTH = os.getenv("FILE_SYNTH")

## 2. Dataset loading and preprocessing

In [3]:
# load dataframes
df_1 = pd.read_csv(FILE_1, sep=";")
df_5 = pd.read_csv(FILE_5, sep=";")
df_7 = pd.read_csv(FILE_7, sep=";")
df_9 = pd.read_csv(FILE_9, sep=";")
df_10_1 = pd.read_csv(FILE_10_1, sep=";")
df_10_2 = pd.read_csv(FILE_10_2, sep=";")
df_synth = pd.read_csv(FILE_SYNTH, sep=";")

df_human = pd.concat([df_1, df_5, df_7, df_9, df_10_1, df_10_2], ignore_index=True)

#### Re-structure df synthetic to fit in training loop

In [4]:
print("Synthetic feedback shape:", df_synth.shape)


# Save the current headers since forgot to store headers in csv file
old_headers = df_synth.columns.tolist()

# print("Old headers:", old_headers)

# Step 2: Insert the headers as the first row
df_synth.loc[-1] = old_headers # Add headers as a new row
df_synth.index = df_synth.index + 1 # Shift index
df_synth = df_synth.sort_index() # Sort index to place the new row at the top


# Step 3: Assign new headers (optional)
df_synth.columns = ['file', 
                            'frame_ID', 
                            'frame_type', 
                            'frame_text', 
                            'precondition_id', 
                            'precondition_text', 
                            'precondition_position', 
                            'response_text', 
                            'prompt_config_examples', 
                            'prompt_config_chain_of_thought', 
                            'feedback_extraction', 
                            'feedback_detection', 
                            'additional_feedback',
                            'synthetic_feedback',
                ]

print(df_synth.columns)


df_synth['prompt_config_examples'] = (df_synth['prompt_config_examples']                                              
                                                .astype(str)
                                                .str.strip()
                                                .str.lower()
                                                .map({'true': True, 'false': False})
)

df_synth['prompt_config_chain_of_thought'] = (df_synth['prompt_config_chain_of_thought']
                                                .astype(str)
                                                .str.strip()
                                                .str.lower()
                                                .map({'true': True, 'false': False})
)



print("Synthetic feedback shape:", df_synth.shape)

Synthetic feedback shape: (563, 14)
Index(['file', 'frame_ID', 'frame_type', 'frame_text', 'precondition_id',
       'precondition_text', 'precondition_position', 'response_text',
       'prompt_config_examples', 'prompt_config_chain_of_thought',
       'feedback_extraction', 'feedback_detection', 'additional_feedback',
       'synthetic_feedback'],
      dtype='object')
Synthetic feedback shape: (564, 14)


In [5]:
if DATASET == "human":
    df_train = df_human
elif DATASET == "synthetic":
    df_train = df_synth
    
    
df_train.shape
df_train.columns

Index(['file', 'frame_ID', 'frame_type', 'frame_text', 'precondition_id',
       'precondition_text', 'precondition_position', 'response_text',
       'prompt_config_examples', 'prompt_config_chain_of_thought',
       'feedback_extraction', 'feedback_detection', 'additional_feedback'],
      dtype='object')

### 2. a) Parse ratings to numeric values for MSE Loss

In [6]:
df_train[FEEDBACK_TO_TRAIN_ON] = [parse_ratings(feedback) for feedback in df_train[FEEDBACK_TO_TRAIN_ON]]
print("Parsed feedback for extraction:", df_train[FEEDBACK_TO_TRAIN_ON][:5])

Parsed feedback for extraction: 0    2
1    2
2    2
3    2
4    3
Name: feedback_extraction, dtype: object


### 2. b) look at biases in feedback to train on for weights in RL loop --> feedback_detection is very biased through way it was collected, so gets less weight overall...

In [7]:
df_train[FEEDBACK_TO_TRAIN_ON].value_counts()

feedback_extraction
0    499
3    231
2    104
1     95
Name: count, dtype: int64

### 2. c) keep only relevant feedback column

In [8]:
dataset = Dataset.from_pandas(df_train)

print(dataset)
print(FEEDBACK_TO_TRAIN_ON) 

Dataset({
    features: ['file', 'frame_ID', 'frame_type', 'frame_text', 'precondition_id', 'precondition_text', 'precondition_position', 'response_text', 'prompt_config_examples', 'prompt_config_chain_of_thought', 'feedback_extraction', 'feedback_detection', 'additional_feedback'],
    num_rows: 929
})
feedback_extraction


In [9]:
dataset = dataset.remove_columns([FEEDBACK_TO_REMOVE])
dataset = dataset.rename_column(FEEDBACK_TO_TRAIN_ON, "label")

## 3. Load model with LoRA layer

In [16]:
# Load the model and the tokenizer
model_id = MODEL 
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=1) # num_labels = 1 since we want to prodict a single scalar (the rating)

# Comment: Automodel for sequence classification with num_labels=1 already has a regression head
print(model)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at nlpaueb/legal-bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [11]:
print(tokenizer.model_max_length)
print(model.config.max_position_embeddings)

512
512


In [12]:
# Define LoRA config
lora_config = LoraConfig(
    r=8,           # Rank of the LoRA matrices (smaller = less memory)
    lora_alpha=16, # Scaling factor (higher = stronger adaptation)
    target_modules=["query", "key", "value"], # Apply LoRA to attention layers
    lora_dropout=0.1,
    bias="none",
    task_type="SEQ_CLS"  # classify each answer 
)

# Freeze base model

# Freeze base model
for param in model.base_model.parameters():
    param.requires_grad = False


# Convert the model to a PEFT (LoRA) model
model = get_peft_model(model, lora_config)
# model.gradient_checkpointing_enable()
model.print_trainable_parameters()  # Check trainable params (~0.1% of full model)


trainable params: 443,137 || all params: 109,926,146 || trainable%: 0.4031


In [13]:
# Test tokenizer
sample_data = ["What is the capital of France?", "What is the largest capital in the world?"]
tokenizer(sample_data, padding=True, truncation=True, max_length=512)

{'input_ids': [[101, 1067, 223, 207, 580, 210, 1335, 124, 102, 0, 0, 0], [101, 1067, 223, 207, 5601, 190, 580, 213, 207, 1727, 124, 102]], 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]}

## 5. Encode dataset

In [14]:
# if labels are not integers, convert them to integers
def convert_label_to_int(data):
    data["label"] = int(data["label"])
    return data


print(dataset.column_names)
# mao string labels to integers
dataset = dataset.map(convert_label_to_int)  # Assuming 'text' is the column with the text data

print(dataset["label"][:5])  # Check labels
print(dataset["response_text"][:5])  # Check labels

['file', 'frame_ID', 'frame_type', 'frame_text', 'precondition_id', 'precondition_text', 'precondition_position', 'response_text', 'prompt_config_examples', 'prompt_config_chain_of_thought', 'label', 'additional_feedback']


Map: 100%|██████████| 929/929 [00:00<00:00, 16769.13 examples/s]

[2, 2, 2, 2, 3]
['1. Subfact: Onze Minister\n                2. Positie: Artikel 1, sectie 1 IN Vreemdelingenwet geldig vanaf 2024\n                3. Subfact: Onze Minister\n                4. Positie: Artikel 8, sectie 1 IN Vreemdelingenwet geldig vanaf 2024\n                5. Subfact: Onze Minister\n                6. Positie: Artikel 14, sectie 1 IN Vreemdelingenwet geldig vanaf 2024\n                7. Subfact: Onze Minister\n                8. Positie: Artikel 16, sectie 1 IN Vreemdelingenwet geldig vanaf 2024\n                9. Subfact: Onze Minister\n                10. Positie: Artikel 17, sectie 1 IN Vreemdelingenwet geldig vanaf 2024\n                11. Subfact: Onze Minister\n                12. Positie: Artikel 17a, sectie 1 IN Vreemdelingenwet geldig vanaf 2024\n                13. Subfact: Onze Minister\n                14. Positie: Artikel 26, sectie 1 IN Vreemdelingenwet geldig vanaf 2024\n                15. Subfact: Onze Minister\n                16. Positie: Arti




## Comment

1. Needed for feedback extraction: precondition_text, response_text, label(rating feedback extraction)
2. Needed for feedback detection: precondition_text, precondition_position, response_text, label (rating feedback detection)
3. For the precondition position to be found well, it is a crucial for the model to find the precondition text (at least to a recognizable degree) as well, otherwise the precondition is not found at all...

In [15]:
# Code to test bestw indow function

test_text = """
        Titel: De Weg Door Het Leven

Het leven is een reis vol onverwachte wendingen, een pad dat zich zelden rechtlijnig ontvouwt. Vanaf het moment dat we onze eerste ademhaling nemen, worden we ondergedompeld in een wereld die we nog moeten leren begrijpen. Als kind lijkt alles eenvoudig: lachen, spelen, ontdekken. Maar naarmate we ouder worden, beginnen de lagen van complexiteit zich op te stapelen. We leren dat mensen niet altijd zeggen wat ze bedoelen, dat keuzes consequenties hebben, en dat geluk soms vluchtiger is dan we zouden willen.

In de vroege ochtenden, wanneer de zon net boven de horizon verschijnt en de wereld nog stil is, denken velen na over hun plaats in het grotere geheel. Sommigen vragen zich af of ze de juiste keuzes hebben gemaakt, of ze trouw zijn gebleven aan zichzelf. Anderen proberen simpelweg de dag door te komen, met hoop op iets beters. In die momenten van stilte komt vaak het besef dat, hoewel we allemaal verschillende paden bewandelen, we één waarheid delen: dat het leven, ondanks al onze inspanningen en verlangens, nooit gemakkelijk is. Of, zoals mijn grootmoeder het ooit zei terwijl ze haar handen vouwde na een lange dag werken op het land: “Je moet weten, kind, het leven is nooit gemakkelijk, maar het is wel de moeite waard.”

We worden gevormd door onze ervaringen, door de mensen die we ontmoeten en de obstakels die we overwinnen. Elke fout, elk succes, elke traan en elke glimlach draagt bij aan wie we zijn. En toch, ondanks al die ervaringen, blijven we zoeken. Naar betekenis. Naar verbinding. Naar rust.

Soms lijkt het alsof de wereld te snel draait. Technologie verandert ons leven in een razend tempo, verwachtingen worden hoger, en de druk om te presteren neemt toe. In die chaos vergeten we soms stil te staan. Te ademen. Te voelen. Maar juist in die momenten van rust vinden we vaak de antwoorden die we zo hard nodig hebben.

De liefde, bijvoorbeeld, is een van de krachtigste krachten die ons voortdrijft. Liefde voor een partner, een kind, een vriend, of zelfs voor een passie. Het is die liefde die ons helpt vol te houden wanneer alles tegenzit. Die ons eraan herinnert waarom we begonnen zijn, waarom we blijven proberen.

En dan is er verlies. Een onvermijdelijk onderdeel van het leven. We verliezen mensen, kansen, dromen. Maar in dat verlies schuilt ook groei. We leren loslaten, opnieuw beginnen, sterker worden. Het is pijnlijk, ja, maar ook noodzakelijk.

Wanneer we terugkijken op ons leven, zijn het zelden de materiële zaken die we herinneren. Het zijn de momenten. De gesprekken bij kaarslicht. De wandelingen in de regen. De onverwachte lachbuien. De stilte van een gedeeld verdriet. Die momenten vormen de essentie van ons bestaan.

Dus ja, het leven is vol uitdagingen. Het is rommelig, verwarrend, soms oneerlijk. Maar het is ook prachtig, rijk aan betekenis, en gevuld met kansen om te groeien, te leren en lief te hebben. En misschien is dat wel de grootste les van allemaal: dat we, ondanks alles, blijven kiezen voor hoop. Voor verbinding. Voor het leven zelf.
        """


test_ground_truth = "Het leven is nooit gemakkelijk."

print(find_best_window(test_text, test_ground_truth, device, tokenizer))

# Works as expectd, I am impressed.

Token indices sequence length is longer than the specified maximum sequence length for this model (1127 > 512). Running this sequence through the model will result in indexing errors


de juiste keuzes hebben gemaakt, of ze trouw zijn gebleven aan zichzelf. anderen proberen simpelweg de dag door te komen, met hoop op iets beters. in die momenten van stilte komt vaak het besef dat, hoewel we allemaal verschillende paden bewandelen, we een waarheid delen : dat het leven, ondanks al onze inspanningen en verlangens, nooit gemakkelijk is. of, zoals mijn grootmoeder het ooit zei terwijl ze haar handen vouwde na een lange dag werken op het land : [UNK] je moet weten, kind, het leven is nooit gemakkelijk, maar het is wel de moeite waard. [UNK] we worden gevormd door onze ervaringen, door de mensen die we ontmoeten en de obstakels die we overwinnen. elke fout, elk succes, elke traan en elke glimlach draagt bij aan wie we zijn. en toch, ondanks al die ervaringen, blijven we zoeken. naar betekenis. naar verbinding. naar rust. soms lijkt het alsof de wereld te snel draait. technologie verandert ons leven in een razend tempo, verwachtingen worden hoger, en de druk om te presteren

In [10]:
# sample = dataset.select(range(5))
# tokenized_sample = tokenize_fn(sample)
# print([len(ids) for ids in tokenized_sample["input_ids"]])

if not os.path.exists(TOKENIZED_DATA):
    if TOKENIZE_FN == "best_window":
        dataset = dataset.map(tokenize_fn_with_best_window, 
                              fn_kwargs={"feedback_train": FEEDBACK_TO_TRAIN_ON, 
                                         "tokenizer": tokenizer, 
                                         "max_length": int(MAX_LENGTH), 
                                         "stride": int(STRIDE),
                                         "device": device
                                         },
                              batched=False)
    else:
        dataset = dataset.map(tokenize_fn_basic_batched, 
                              fn_kwargs={"feedback_train": FEEDBACK_TO_TRAIN_ON, 
                                         "tokenizer": tokenizer 
                                         },
                              batched=True)
    
    dataset.save_to_disk(TOKENIZED_DATA)
else:
    dataset = load_from_disk(TOKENIZED_DATA)

In [26]:
print(Counter(dataset['file']))

#TODO: maybe use rijksbegrotingscyclus purely as a test + eval set...
# get indices and use dataset.select!!!

Counter({'Interpretatie_Vw_over_besluiten_op_aanvragen_voor_een_verblijfsvergunning_regulier_bepaalde_tijd.json': 432, 'Participatiewet_most_recent_public.json': 255, 'rijksbegrotingscyclus.json': 242})


# Split dataset into train, test, eval

In [11]:
# split into train, test and eval sets
train_test_split = dataset.train_test_split(test_size=0.3, seed=42)
eval_test_split = train_test_split["test"].train_test_split(test_size=0.5, seed=42)


final_splits = DatasetDict({
    'train': train_test_split['train'],
    'validation': eval_test_split['train'],
    'test': eval_test_split['test']
})

## 6. Train reward model

In [17]:
# Training arguments
training_args = TrainingArguments(
    output_dir=LORA_CHECKPOINTS_FOLDER,
    eval_strategy='steps',
    save_strategy='steps',
    save_steps=10,
    eval_steps=10,
    save_total_limit=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    learning_rate=3e-4,
    num_train_epochs=20,
    logging_steps=10,
    label_names=["labels"],
    # report_to="none",
    logging_dir="./logs",
    fp16=True,  # Use mixed precision training
    metric_for_best_model="eval_loss", # or "eval_loss"
    greater_is_better=False, # False if using loss
    # gradient_accumulation_steps=4, # 
    # weight_decay=0.01
)

# Initialize custom trainer
trainer = CustomRewardTrainer(
    model=model,
    args=training_args,
    train_dataset=final_splits['train'],
    eval_dataset=final_splits['validation'],
    # compute_metrics=trainer.compute_metrics,  # Use the custom metrics function
    processing_class=tokenizer,
    loss_type="huber",  # "mse" or "huber"
    weight_strategy="linear",  # "linear", "inverse", or None
    # callbacks=[EarlyStoppingCallback(early_stopping_patience=50)] # use early stopping since we are sing high amount of epochs
    # data_collator=RewardDataCollator()
)

print(trainer.args.device)

cuda:0


In [25]:
# if not os.path.exists(FINAL_LORA_ADAPTERS):
# train model
trainer.train()
# # store final model parameters
model.save_pretrained(FINAL_LORA_ADAPTERS)

# #TODO: not storing this properly I suppose, need to change

Step,Training Loss,Validation Loss,Model Preparation Time
10,2.0515,1.672177,0.004
20,1.7032,1.603025,0.004
30,1.5626,1.495673,0.004
40,1.2992,1.445699,0.004
50,1.336,1.327551,0.004
60,1.3864,1.321038,0.004
70,0.9942,1.314813,0.004
80,1.0424,1.247218,0.004
90,0.8871,1.124598,0.004
100,1.0485,1.206574,0.004


# Reload saved LoRA adapter for inference 

In [13]:
base_model_test = AutoModelForSequenceClassification.from_pretrained(MODEL, num_labels=1)
new_model = PeftModel.from_pretrained(base_model_test, FINAL_LORA_ADAPTERS)
# new_model = new_model.merge_and_unload()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at nlpaueb/legal-bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Initialize trainer with new model
trainer = CustomRewardTrainer(
    model=new_model,
    args=training_args,
    train_dataset=final_splits['train'],
    eval_dataset=final_splits['validation'],
    # compute_metrics=trainer.compute_metrics,  # Use the custom metrics function
    processing_class=tokenizer,
    loss_type="huber",  # "mse" or "huber"
    weight_strategy="linear",  # "linear", "inverse", or None
    # callbacks=[EarlyStoppingCallback(early_stopping_patience=50)] # use early stopping since we are sing high amount of epochs
    # data_collator=RewardDataCollator()
)

In [20]:
# Evaluate the model on the test set
test_results = trainer.evaluate(eval_dataset=final_splits['test'])
print("Test Results:", test_results)

Test Results: {'eval_loss': 0.5358006358146667, 'eval_model_preparation_time': 0.008, 'eval_runtime': 0.2558, 'eval_samples_per_second': 547.295, 'eval_steps_per_second': 35.183}


In [31]:
# evaluate model manually on some test cases
model.to(device)
model.eval()

#TODO: change tokenization function here!

with torch.no_grad():
    for i in range(20):
        sample = final_splits['test'][i]
        inputs = tokenizer(sample['precondition_text'] + " " + sample['response_text'], return_tensors='pt', truncation=True, padding="max_length").to(device)
        outputs = model(**inputs)
        prediction = outputs.logits.item()
        print(f"Sample {i+1}: Predicted Rating: {prediction}, True Rating: {sample['label']}")


Sample 1: Predicted Rating: 2.779296875, True Rating: 1
Sample 2: Predicted Rating: -0.07684326171875, True Rating: 0
Sample 3: Predicted Rating: 2.345703125, True Rating: 3
Sample 4: Predicted Rating: -0.010162353515625, True Rating: 0
Sample 5: Predicted Rating: 2.9140625, True Rating: 3
Sample 6: Predicted Rating: 0.10626220703125, True Rating: 0
Sample 7: Predicted Rating: 2.4453125, True Rating: 1
Sample 8: Predicted Rating: 0.07196044921875, True Rating: 0
Sample 9: Predicted Rating: 0.056854248046875, True Rating: 0
Sample 10: Predicted Rating: -0.040679931640625, True Rating: 0
Sample 11: Predicted Rating: 0.053863525390625, True Rating: 0
Sample 12: Predicted Rating: 0.17333984375, True Rating: 0
Sample 13: Predicted Rating: -0.03515625, True Rating: 0
Sample 14: Predicted Rating: 1.3251953125, True Rating: 1
Sample 15: Predicted Rating: 1.2685546875, True Rating: 0
Sample 16: Predicted Rating: 0.18017578125, True Rating: 0
Sample 17: Predicted Rating: 2.4453125, True Rating: 