# Fine-Tune with Reinforcement Learning (PPO) and PEFT to Generate Less-Toxic Summaries


* Make sure you change the kernel to **PyTorch 2.6** to run the notebook
* We mark **TODO** in the notebook cells to indicate the place where you need to complete the missing code. You can refer to the exercises in the course repository for code examples.

In [1]:
# import necessary packages
import os
import sys
import torch

!{sys.executable} -m pip install --upgrade transformers huggingface_hub peft \
  accelerate bitsandbytes datasets trl==0.11.4 ipywidgets evaluate tqdm

Defaulting to user installation because normal site-packages is not writeable


In [2]:
# or use an input box on this notebook to copy/paste the token
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [3]:
USE_CPU = False
device = "xpu" if torch.xpu.is_available() else "cpu"
if USE_CPU:
    device = "cpu"
print(f"using device: {device}")

using device: xpu


In [5]:
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForSeq2SeqLM, GenerationConfig, Trainer, TrainingArguments
from datasets import load_dataset
from peft import PeftModel, PeftConfig, LoraConfig, TaskType, get_peft_model

# trl: Transformer Reinforcement Learning library
from trl import PPOTrainer, PPOConfig, AutoModelForSeq2SeqLMWithValueHead, SFTTrainer, SFTConfig, AutoModelForCausalLMWithValueHead
from trl import create_reference_model
from trl.core import LengthSampler

import torch
import evaluate

import numpy as np
import pandas as pd

# tqdm library makes the loops show a smart progress meter.
from tqdm import tqdm
tqdm.pandas()

## Load FLAN-T5 Model, Prepare Reward Model and Toxicity Evaluator

In [6]:
model_name="google/flan-t5-base"
huggingface_dataset_name = "knkarthick/dialogsum"

dataset_original = load_dataset(huggingface_dataset_name)

dataset_original

DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 12460
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 500
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 1500
    })
})

In [7]:
def build_dataset(model_name,
                  dataset_name,
                  input_min_text_length, 
                  input_max_text_length):

    # load dataset (only "train" part will be enough for this lab).
    dataset = load_dataset(dataset_name, split="train")
    
    # Filter the dialogues of length between input_min_text_length and input_max_text_length characters.
    dataset = dataset.filter(lambda x: len(x["dialogue"]) > input_min_text_length and len(x["dialogue"]) <= input_max_text_length, batched=False)

    # Prepare tokenizer. Setting device_map="auto" allows to switch between GPU and CPU automatically.
    tokenizer = AutoTokenizer.from_pretrained(model_name, device_map="auto")
    
    def tokenize(sample):
        # Wrap each dialogue with the instruction.
        prompt = f"""Summarize the following conversation.{sample["dialogue"]}Summary:"""
        
        sample["input_ids"] = tokenizer.encode(prompt)
        
        # This must be called "query", which is a requirement of our PPO library.
        sample["query"] = tokenizer.decode(sample["input_ids"])
        return sample

    # Tokenize each dialogue.
    dataset = dataset.map(tokenize, batched=False)
    dataset.set_format(type="torch")
    
    # Split the dataset into train and test parts.
    dataset_splits = dataset.train_test_split(test_size=0.2, shuffle=False, seed=42)

    return dataset_splits

dataset = build_dataset(model_name=model_name,
                        dataset_name=huggingface_dataset_name,
                        input_min_text_length=200, 
                        input_max_text_length=1000)

print(dataset)

DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic', 'input_ids', 'query'],
        num_rows: 8017
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic', 'input_ids', 'query'],
        num_rows: 2005
    })
})


In [8]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"\ntrainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

## Model Fine-Tuning

In [8]:
lora_config = LoraConfig(
    r=32, # Rank
    lora_alpha=32,
    target_modules=["q", "v"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM
)

#TODO: create tokenizer using AutoTokenizer class
#NOTE: you need to set device_map argument properly to choose XPU device
# tokenizer = ...
tokenizer = AutoTokenizer.from_pretrained(model_name)

#TODO: create model using AutoModelForSeq2SeqLM class
# model = ...
model = AutoModelForSeq2SeqLM.from_pretrained(model_name,
                                             device_map=device)

# create PEFT model for fine-tuning
peft_model = get_peft_model(model, lora_config)

print(f'PEFT model parameters to be updated:\n{print_number_of_trainable_model_parameters(peft_model)}\n')

def process_dataset(batch):
    prompt = [f'Summarize the following conversation:\n{dialogue}\n\nSummary:\n{summary}\n' for dialogue, summary in zip(batch['dialogue'], batch['dialogue'])]
    batch['input_ids'] = tokenizer(prompt, padding="max_length", truncation=True, return_tensors="pt").input_ids
    batch['labels'] = tokenizer(batch["summary"], padding="max_length", truncation=True, return_tensors="pt").input_ids
    return batch

processed_dataset = dataset_original.map(process_dataset, batched=True)

output_dir = "peft-dialogue-finetuned"

#TODO: create trainer using SFTTrainer class
# trainer = SFTTrainer(...)
sft_config = SFTConfig(
    output_dir=output_dir,
    per_device_train_batch_size=4,
    num_train_epochs=3,
    logging_dir="./logs",
    save_strategy="epoch",
    push_to_hub=False,
)

trainer = SFTTrainer(
    model=peft_model,
    train_dataset=processed_dataset["train"],
    eval_dataset=processed_dataset["test"],
    args=sft_config,
    tokenizer=tokenizer
)
trainer.train()

peft_model_path="./peft-dialogue-summary-checkpoint"

trainer.model.save_pretrained(peft_model_path)
tokenizer.save_pretrained(peft_model_path)


  Overriding a previously registered kernel for the same operator and the same dispatch key
  operator: aten::_cummax_helper(Tensor self, Tensor(a!) values, Tensor(b!) indices, int dim) -> ()
    registered at /build/pytorch/build/aten/src/ATen/RegisterSchema.cpp:6
  dispatch key: XPU
  previous kernel: registered at /build/pytorch/build/aten/src/ATen/RegisterCPU.cpp:30476
       new kernel: registered at /build/intel-pytorch-extension/build/Release/csrc/gpu/csrc/aten/generated/ATen/RegisterXPU.cpp:2971 (function operator())


PEFT model parameters to be updated:

trainable model parameters: 3538944
all model parameters: 251116800
percentage of trainable model parameters: 1.41%



  super().__init__(
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
2025-03-20 02:14:21,914 - _logger.py - IPEX - INFO - Currently split master weight for xpu only support sgd
2025-03-20 02:14:21,936 - wandb.jupyter - ERROR - Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33msteven_tran1[0m ([33msteven_tran1-umass-lowell[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Step,Training Loss
500,0.0796
1000,0.0118
1500,0.0075
2000,0.0055
2500,0.005
3000,0.0039
3500,0.0035
4000,0.0034
4500,0.0032
5000,0.0028


('./peft-dialogue-summary-checkpoint/tokenizer_config.json',
 './peft-dialogue-summary-checkpoint/special_tokens_map.json',
 './peft-dialogue-summary-checkpoint/spiece.model',
 './peft-dialogue-summary-checkpoint/added_tokens.json',
 './peft-dialogue-summary-checkpoint/tokenizer.json')

In [9]:
peft_model_path="./peft-dialogue-summary-checkpoint"

ppo_model = AutoModelForSeq2SeqLMWithValueHead.from_pretrained(peft_model_path,                                                               
                                                               torch_dtype=torch.bfloat16,
                                                               device_map=device,
                                                               is_trainable=True)

print(f'PPO model parameters to be updated (ValueHead + 769 params):\n{print_number_of_trainable_model_parameters(ppo_model)}\n')
print(ppo_model.v_head)

The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.


PPO model parameters to be updated (ValueHead + 769 params):

trainable model parameters: 3539713
all model parameters: 251117569
percentage of trainable model parameters: 1.41%

ValueHead(
  (dropout): Dropout(p=0.1, inplace=False)
  (summary): Linear(in_features=768, out_features=1, bias=True)
  (flatten): Flatten(start_dim=1, end_dim=-1)
)


## Setup Reward Model

![](img/hf_facebook_hatespeec_reward_model.png)

In [10]:
toxicity_model_name = "facebook/roberta-hate-speech-dynabench-r4-target"

#TODO: create toxicity_tokenizer
#toxicity_tokenizer = ...
toxicity_tokenizer = AutoTokenizer.from_pretrained(toxicity_model_name)

#TODO: create toxicity_model using AutoModelForSequenceClassification class
# toxicity_model = ...
toxicity_model = AutoModelForSequenceClassification.from_pretrained(toxicity_model_name,
                                                                    torch_dtype=torch.bfloat16,
                                                                    device_map=device)

print(toxicity_model.config.id2label)

{0: 'nothate', 1: 'hate'}


![](img/rlhf_reward_model_binary_classifier.png)

In [11]:
non_toxic_text = "You are a great person and I like you"

toxicity_input_ids = toxicity_tokenizer(non_toxic_text, return_tensors="pt").input_ids.to(device)

toxicity_model.config.id2label = {0: 'not_hate', 1: 'hate'}
toxicity_model.config.label2id = {'not_hate': 0, 'hate': 1}

#TODO: perform model inference on the input tokens
#TODO: and capture the logits (the outputs from the last level of the neural network)
#NOTE: please refer to lecture slides
# logits = ...
logits = toxicity_model(toxicity_input_ids).logits
print(f'logits [not hate, hate]: {logits.tolist()[0]}')

#TODO: Print the probabilities for [not hate, hate]
#TODO: please refer to lecture slides
# probabilities = ...
probabilities = logits.softmax(dim=-1).tolist()[0]
print(f'probabilities [not hate, hate]: {probabilities}')

# get the logits for "not hate" - this is the reward!
# TODO: please refer to lecture slides
# not_hate_index = ...
# nothate_reward = ...
not_hate_index = toxicity_model.config.label2id['not_hate']
nothate_reward = logits[:, not_hate_index].tolist()[0]
print(f'reward (high): {nothate_reward}')

logits [not hate, hate]: [4.65625, -4.25]
probabilities [not hate, hate]: [1.0, 0.0001354217529296875]
reward (high): 4.65625


In [12]:
toxic_text = "You are disgusting and terrible and i damn hate you"

#TODO: tokenize the toxic text
# toxicity_input_ids = ...
#toxicity_input_ids = toxicity_tokenizer(toxic_text, return_tensors="pt").input_ids.to(device)
toxicity_input_ids = toxicity_tokenizer("test", return_tensors="pt").input_ids.to("xpu")

#TODO: perform model inference on the input tokens
#TODO: and capture the logits (the outputs from the last level of the neural network)
#NOTE: please refer to lecture slides
# logits = ...
logits = toxicity_model(toxicity_input_ids).logits
print(f'logits [not hate, hate]: {logits.tolist()[0]}')

#TODO: Print the probabilities for [not hate, hate]
#TODO: please refer to lecture slides
# probabilities = ...
probabilities = logits.softmax(dim=-1).tolist()[0]
print(f'probabilities [not hate, hate]: {probabilities}')

# get the logits for "not hate" - this is the reward!
# TODO: please refer to lecture slides
# not_hate_index = ...
# nothate_reward = ...
not_hate_index = toxicity_model.config.label2id['not_hate']
nothate_reward = logits[:, not_hate_index].tolist()[0]
print(f'reward (high): {nothate_reward}')

logits [not hate, hate]: [4.09375, -3.484375]
probabilities [not hate, hate]: [1.0, 0.00051116943359375]
reward (high): 4.09375


In [13]:
sentiment_pipe = pipeline("sentiment-analysis", 
                          model=toxicity_model_name,
                          tokenizer=toxicity_tokenizer,
                          max_length=512,
                          truncation=True,
                          device=device)
reward_logits_kwargs = {
    "top_k": None, # Return all scores.
    "function_to_apply": "none", # Set to "none" to retrieve raw logits.
    "batch_size": 16
}

reward_probabilities_kwargs = {
    "top_k": None, # Return all scores.
    "function_to_apply": "softmax", # Set to "softmax" to apply softmax and retrieve probabilities.
    "batch_size": 16
}

print("Reward model output for non-toxic text:")
print(sentiment_pipe(non_toxic_text, **reward_logits_kwargs))
print(sentiment_pipe(non_toxic_text, **reward_probabilities_kwargs))
print("\nReward model output for toxic text:")
print(sentiment_pipe(toxic_text, **reward_logits_kwargs))
print(sentiment_pipe(toxic_text, **reward_probabilities_kwargs))

Device set to use xpu


Reward model output for non-toxic text:
[{'label': 'nothate', 'score': 4.641769886016846}, {'label': 'hate', 'score': -4.233262538909912}]
[{'label': 'nothate', 'score': 0.9998601675033569}, {'label': 'hate', 'score': 0.00013981753727421165}]

Reward model output for toxic text:
[{'label': 'hate', 'score': 1.5835528373718262}, {'label': 'nothate', 'score': -2.061077833175659}]
[{'label': 'hate', 'score': 0.974534273147583}, {'label': 'nothate', 'score': 0.025465749204158783}]


In [14]:
print(sentiment_pipe(non_toxic_text, **reward_logits_kwargs))
print(sentiment_pipe(non_toxic_text, **reward_probabilities_kwargs))

[{'label': 'nothate', 'score': 4.641769886016846}, {'label': 'hate', 'score': -4.233262538909912}]
[{'label': 'nothate', 'score': 0.9998601675033569}, {'label': 'hate', 'score': 0.00013981753727421165}]


In [15]:
print(sentiment_pipe(toxic_text, **reward_logits_kwargs))
print(sentiment_pipe(toxic_text, **reward_probabilities_kwargs))

[{'label': 'hate', 'score': 1.5835542678833008}, {'label': 'nothate', 'score': -2.0610787868499756}]
[{'label': 'hate', 'score': 0.974534273147583}, {'label': 'nothate', 'score': 0.025465717539191246}]


## Evaluate Toxicity

In [16]:
import evaluate

#TODO: create toxicity_evaluator using evaluate.load()
#NOTE: please refer to exercise Toxicity_Detector_by_Meta.ipynb
# toxicity_evaluator = ...
toxicity_evaluator = evaluate.load("toxicity", 
                                   toxicity_model_name,
                                   module_type="measurement",
                                   toxic_label="hate")

Device set to use xpu:0


In [17]:
toxicity_score = toxicity_evaluator.compute(predictions=[
    non_toxic_text
])

print("Toxicity score for non-toxic text:")
print(toxicity_score["toxicity"])

toxicity_score = toxicity_evaluator.compute(predictions=[
    toxic_text
])

print("\nToxicity score for toxic text:")
print(toxicity_score["toxicity"])

Toxicity score for non-toxic text:
[0.00013981739175505936]

Toxicity score for toxic text:
[0.9745341539382935]


In [18]:
def evaluate_toxicity(model, 
                      toxicity_evaluator, 
                      tokenizer, 
                      dataset, 
                      num_samples):

    max_new_tokens=100

    toxicities = []
    input_texts = []
    for i, sample in tqdm(enumerate(dataset)):
        input_text = sample["query"]

        if i > num_samples:
            break
            
        input_ids = tokenizer(input_text, return_tensors="pt", padding=True).input_ids.to(device)
        
        generation_config = GenerationConfig(max_new_tokens=max_new_tokens,
                                             tok_k=0.0,
                                             top_p=1.0,
                                             do_sample=True)

        response_token_ids = model.generate(input_ids=input_ids,
                                            generation_config=generation_config)
        
        generated_text = tokenizer.decode(response_token_ids[0], skip_special_tokens=True)
        
        toxicity_score = toxicity_evaluator.compute(predictions=[(input_text + " " + generated_text)])

        toxicities.extend(toxicity_score["toxicity"])

    # TODO: Compute mean & std using numpy functions.
    # mean = ...
    # std = ...
    mean = np.mean(toxicities)
    std = np.std(toxicities)
    return mean, std

In [19]:
tokenizer = AutoTokenizer.from_pretrained(model_name, device_map="auto")


mean_before_detoxification, std_before_detoxification = evaluate_toxicity(model=ppo_model, 
                                                                          toxicity_evaluator=toxicity_evaluator, 
                                                                          tokenizer=tokenizer, 
                                                                          dataset=dataset["test"], 
                                                                          num_samples=10)

print(f'toxicity [mean, std] before detox: [{mean_before_detoxification}, {std_before_detoxification}]')

11it [00:27,  2.50s/it]

toxicity [mean, std] before detox: [0.04711535902672701, 0.048700035681818986]





## Perform Fine-Tuning to Detoxify the Summaries
Optimize a RL policy against the reward model using Proximal Policy Optimization (PPO).

In [20]:
#TODO: create a refenence model to be used as a frozen model
# ref_model = ...
tokenizer = AutoTokenizer.from_pretrained(peft_model_path)
ref_model = create_reference_model(ppo_model)

print(f'Reference model parameters to be updated:\n{print_number_of_trainable_model_parameters(ref_model)}\n')

Reference model parameters to be updated:

trainable model parameters: 0
all model parameters: 251117569
percentage of trainable model parameters: 0.00%



![](img/rlhf_kl_divergence.png)

In [25]:
from trl import PPOConfig, PPOTrainer

learning_rate=1.41e-5
max_ppo_epochs=1
mini_batch_size=4
batch_size=16

config = PPOConfig(
    model_name=model_name,    
    learning_rate=learning_rate,
    ppo_epochs=max_ppo_epochs,
    mini_batch_size=mini_batch_size,
    batch_size=batch_size
)

def collator(data):
    return dict((key, [d[key] for d in data]) for key in data[0])

#TODO: create ppo_trainer using PPOTrainer class
# ppo_trainer = ...
ppo_trainer = PPOTrainer(
    config=config,
    model=ppo_model,            # Tunable model
    ref_model=ref_model,    # Frozen reference model
    tokenizer=tokenizer,
    dataset=dataset["train"],
    data_collator=collator
)

### Fine-Tune the Model

In [None]:
output_min_length = 100
output_max_length = 400
output_length_sampler = LengthSampler(output_min_length, output_max_length)

generation_kwargs = {
    "min_length": 5,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True
}

reward_kwargs = {
    "top_k": None, # Return all scores.
    "function_to_apply": "none", # You want the raw logits without softmax.
    "batch_size": 16
}

max_ppo_steps = 10

for step, batch in tqdm(enumerate(ppo_trainer.dataloader)):
    # Break when you reach max_steps.
    if step >= max_ppo_steps:
        break   

    prompt_tensors = batch["input_ids"]

    # Get response from FLAN-T5/PEFT LLM.
    summary_tensors = []

    for prompt_tensor in prompt_tensors:
        max_new_tokens = output_length_sampler()        
            
        generation_kwargs["max_new_tokens"] = max_new_tokens
        summary = ppo_trainer.generate(prompt_tensor, **generation_kwargs)
        
        summary_tensors.append(summary.squeeze()[-max_new_tokens:])
        
    # This needs to be called "response".
    batch["response"] = [tokenizer.decode(r.squeeze()) for r in summary_tensors]

    # Compute reward outputs.
    query_response_pairs = [q + r for q, r in zip(batch["query"], batch["response"])]    
    rewards = sentiment_pipe(query_response_pairs, **reward_kwargs)

    # You use the `nothate` item because this is the score for the positive `nothate` class.
    reward_tensors = [torch.tensor(reward[not_hate_index]["score"]) for reward in rewards]    

    # Run PPO step.
    stats = ppo_trainer.step(prompt_tensors, summary_tensors, reward_tensors)
    ppo_trainer.log_stats(stats, batch, reward_tensors)
    
    print(f'objective/kl: {stats["objective/kl"]}')
    print(f'ppo/returns/mean: {stats["ppo/returns/mean"]}')
    print(f'ppo/policy/advantages_mean: {stats["ppo/policy/advantages_mean"]}')
    print('-'.join('' for x in range(100)))

0it [00:00, ?it/s]Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.
1it [01:24, 84.10s/it]

objective/kl: 301.65875244140625
ppo/returns/mean: -4.303833484649658
ppo/policy/advantages_mean: 0.021036043763160706
---------------------------------------------------------------------------------------------------


2it [02:52, 86.42s/it]

objective/kl: 328.2289123535156
ppo/returns/mean: -4.522107124328613
ppo/policy/advantages_mean: -0.022579044103622437
---------------------------------------------------------------------------------------------------


3it [04:05, 80.25s/it]

objective/kl: 232.40283203125
ppo/returns/mean: -3.4294774532318115
ppo/policy/advantages_mean: 0.05137199908494949
---------------------------------------------------------------------------------------------------


4it [05:23, 79.70s/it]

objective/kl: 223.38555908203125
ppo/returns/mean: -3.0828309059143066
ppo/policy/advantages_mean: 0.04745945334434509
---------------------------------------------------------------------------------------------------


5it [06:39, 78.15s/it]

objective/kl: 249.36813354492188
ppo/returns/mean: -3.7731986045837402
ppo/policy/advantages_mean: 0.0356220081448555
---------------------------------------------------------------------------------------------------


6it [08:06, 81.14s/it]

objective/kl: 261.93426513671875
ppo/returns/mean: -3.579921245574951
ppo/policy/advantages_mean: 0.021883711218833923
---------------------------------------------------------------------------------------------------


7it [09:22, 79.68s/it]

objective/kl: 193.69882202148438
ppo/returns/mean: -2.900151252746582
ppo/policy/advantages_mean: -0.015611249953508377
---------------------------------------------------------------------------------------------------


## Evaluate the Model Quantitatively

In [None]:
mean_after_detoxification, std_after_detoxification = evaluate_toxicity(model=ppo_model, 
                                                                        toxicity_evaluator=toxicity_evaluator, 
                                                                        tokenizer=tokenizer, 
                                                                        dataset=dataset["test"], 
                                                                        num_samples=10)
print(f'toxicity [mean, std] after detox: [{mean_after_detoxification}, {std_after_detoxification}]')

In [None]:
mean_improvement = (mean_before_detoxification - mean_after_detoxification) / mean_before_detoxification
std_improvement = (std_before_detoxification - std_after_detoxification) / std_before_detoxification

print(f'Percentage improvement of toxicity score after detoxification:')
print(f'mean: {mean_improvement*100:.2f}%')
print(f'std: {std_improvement*100:.2f}%')

## Evaluate the Model Qualitatively

In [None]:
# Choose a few samples in the dataset as prompts to the reference model and the ppo model.
# Check their completions and compare the reward values given by the toxicity evaluator.
# NOTE: This section is not graded.