In [1]:
!pip install evaluate

Defaulting to user installation because normal site-packages is not writeable


In [2]:
import pandas as pd
import json

with open('human_annotated_dataset.json', 'r') as f:
    json_data = f.read()

# Convert JSON to DataFrame
data = json.loads(json_data)
df = pd.DataFrame(data)

# Map labels to numerical values
label_map = {'hedge': 2, 'authority': 1, 'none': 0}

# Function to create separate rows for each marker
def create_marker_rows(row):
    statement = row['statement']
    matched_terms = row['matched_terms']
    rows = []
    
    for term, details in matched_terms.items():
        term_upper = term.upper()
        label = details['correct']
        
        # Add start and end markers
        context = statement.replace(f'<{term_upper}>', f'[START] {term} [END]')
        context = context.replace('<', '').replace('>', '')
        if row['previous_statement'] == "None":
            broad_context = "Speaker 1: " + context
        else:
            broad_context = "Speaker 1: " + row['previous_statement'] + " Speaker 2: " + context
        rows.append({
            'transcript_id': row['transcript_id'],
            'statement_id': row['statement_id'],
            'context': broad_context,
            'label': label_map[label]
        })
    return rows

# Create a new DataFrame with separate rows for each marker
new_rows = []
for _, row in df.iterrows():
    new_rows.extend(create_marker_rows(row))

new_df = pd.DataFrame(new_rows)

# Check the resulting DataFrame
print(new_df[['context', 'label']])

                                                context  label
0     Speaker 1: A moderator, first of all. Howie, y...      2
1     Speaker 1: A moderator, first of all. Howie, y...      2
2     Speaker 1: A moderator, first of all. Howie, y...      0
3     Speaker 1: A moderator, first of all. Howie, y...      0
4     Speaker 1: A moderator, first of all. Howie, y...      0
...                                                 ...    ...
1332  Speaker 1: We were looking at some other thing...      0
1333  Speaker 1: I cannot speculate on anything past...      0
1334  Speaker 1: Yes. Look, as I said before, I thin...      2
1335  Speaker 1: Yes. Look, as I said before, I thin...      0
1336  Speaker 1: Yes. Look, as I said before, I thin...      2

[1337 rows x 2 columns]


### Pull in Random Sampling of "Non-PPRMs"

In [3]:
import json
import pandas as pd
import random
import uuid

# Load the non-matching utterances dataset
with open('non_matching_utterances_sample.json', 'r') as f:
    non_matching_data = json.load(f)
print(len(non_matching_data))
non_matching_df = pd.DataFrame(non_matching_data)

# Adjust the function to ensure it correctly handles word selection
def augment_non_matching_rows(row):
    rows = []
    statement = row['statement']
    words = statement.split()
    random_sample_num = min(2, len(words))
    random_indices = random.sample(range(len(words)), random_sample_num)
    for i in range(random_sample_num):
        idx = random_indices[i]
        statement = words.copy()
        statement[idx] = '[START] ' + statement[idx] + ' [END]'
        context = ' '.join(statement)
        if row['previous_statement'] == "None":
            broad_context = "Speaker 1: " + context
        else:
            broad_context = "Speaker 1: " + row['previous_statement'] + " Speaker 2: " + context
        rows.append({
            'transcript_id': row['transcript_id'],
            'statement_id': str(uuid.uuid4()),
            'context': broad_context,
            'label': label_map['none']
        })
    return rows

# Create a new DataFrame with separate rows for each marker
non_match_rows = []
for _, row in non_matching_df.iterrows():
    non_match_rows.extend(augment_non_matching_rows(row))

other_df = pd.DataFrame(non_match_rows)

# Combine the existing DataFrame with the augmented data
# Ensure new_df is already defined and contains initial data
augmented_df = pd.concat([new_df, other_df], ignore_index=True)

# Check the resulting DataFrame
print(augmented_df[['context', 'label']])
print(augmented_df.head()["context"][3])
# for k, v in augmented_df[:1].items():
#     print(v)

print("Total Samples:", len(augmented_df))

100
                                                context  label
0     Speaker 1: A moderator, first of all. Howie, y...      2
1     Speaker 1: A moderator, first of all. Howie, y...      2
2     Speaker 1: A moderator, first of all. Howie, y...      0
3     Speaker 1: A moderator, first of all. Howie, y...      0
4     Speaker 1: A moderator, first of all. Howie, y...      0
...                                                 ...    ...
1523  Speaker 1: The end is etched in stone. Exactly...      0
1524  Speaker 1: Dylann Roof is not an immigrant. Sp...      0
1525  Speaker 1: Dylann Roof is not an immigrant. Sp...      0
1526  Speaker 1: The White House is reiterating that...      0
1527  Speaker 1: The White House is reiterating that...      0

[1528 rows x 2 columns]
Speaker 1: A moderator, first of all. Howie, you know, some people have compared her to Nurse Ratched. Really what you have here is a junior high school sort of history teacher who doesn't really know the lesson pla

In [4]:
# Tokenize and Train the Model
import torch
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments

# Load tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)  # Adjust num_labels to 3

device='cuda' if torch.cuda.is_available() else 'cpu'
# device='cpu'
model.to(device)
print("device:", device)

  from .autonotebook import tqdm as notebook_tqdm
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


device: cuda


In [5]:
from transformers import TrainerCallback, TrainerState, TrainerControl
import os
import datetime
import numpy as np

class CustomSaveCallback(TrainerCallback):
    "A custom callback that saves the model at the end of each epoch with a unique name."
    def __init__(self, save_path, batch_size, tokenizer):
        self.save_path = save_path
        self.batch_size = batch_size
        self.tokenizer = tokenizer  # Save the tokenizer as an instance variable

    def on_epoch_end(self, args, state: TrainerState, control: TrainerControl, **kwargs):
        # Format the directory name with the epoch number and batch size
        epoch_dir = f"{self.save_path}/model_checkpoint_epoch-{state.epoch}_batch-{self.batch_size}"
        if not os.path.exists(epoch_dir):
            os.makedirs(epoch_dir)
        # Save the model and tokenizer in this directory
        kwargs['model'].save_pretrained(epoch_dir)
        self.tokenizer.save_pretrained(epoch_dir)  # Use the instance variable
        print(f"Saved model and tokenizer to {epoch_dir}")

# Define a function to compute metrics
def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=1)

    # Compute metrics
    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)
    precision = precision_metric.compute(predictions=predictions, references=labels, average="macro")
    recall = recall_metric.compute(predictions=predictions, references=labels, average="macro")
    f1 = f1_metric.compute(predictions=predictions, references=labels, average="macro")

    # Compute F1 for each label individually
    f1_per_class = f1_metric.compute(predictions=predictions, references=labels, average=None)
    label_f1_scores = f1_per_class['f1']

    return {
        'accuracy': accuracy['accuracy'],
        'precision': precision['precision'],
        'recall': recall['recall'],
        'f1': f1['f1'],
        'f1_label_0': label_f1_scores[0],
        'f1_label_1': label_f1_scores[1],
        'f1_label_2': label_f1_scores[2]
    }

# Running on non-Augmented Data

### Dataset - Train/Eval/Test, Evaluate Metrics

In [6]:
regen_data = True
import pickle

import logging
from sklearn.model_selection import train_test_split

# Set the logging level to ERROR to reduce output clutter
logging.basicConfig(level=logging.ERROR)

# Tokenization function
def tokenize_function(examples):
    return tokenizer(examples['context'], padding="max_length", truncation=True, max_length=512)


# File paths for saved datasets
train_file = 'train_dataset_nonaug.pkl'
eval_file = 'eval_dataset_nonaug.pkl'
test_file = 'test_dataset_nonaug.pkl'

# Function to save a dataset
def save_dataset(data, filename):
    with open(filename, 'wb') as file:
        pickle.dump(data, file)

# Function to load a dataset
def load_dataset(filename):
    with open(filename, 'rb') as file:
        return pickle.load(file)
# Check if regeneration of data is needed
if regen_data:
    print("Generating new datasets")
    # Split the data into training and evaluation sets
    train_eval_df, test_df = train_test_split(new_df, test_size=0.15, random_state=42, stratify=new_df['label'])
    # Ensure to use train_eval_df for further splitting and stratification
    train_df, eval_df = train_test_split(train_eval_df, test_size=0.1764705882352941, random_state=42, stratify=train_eval_df['label'])
    save_dataset(train_df, train_file)
    save_dataset(eval_df, eval_file)
    save_dataset(test_df, test_file)
    print("Length of files:", len(train_df), len(eval_df), len(test_df))
elif os.path.exists(train_file) and os.path.exists(eval_file) and os.path.exists(test_file):
    # Load the datasets
    train_df = load_dataset(train_file)
    eval_df = load_dataset(eval_file)
    test_df = load_dataset(test_file)
    print("Length of files:", len(train_df), len(eval_df), len(test_df))
else:
    # Split the data into training and evaluation sets
    train_eval_df, test_df = train_test_split(augmented_df, test_size=0.15, random_state=42, stratify = augmented_df['label'])
    train_df, eval_df = train_test_split(train_eval_df, test_size=0.1764705882352941, random_state=42, stratify = train_eval_df['label'])
    save_dataset(train_df, train_file)
    save_dataset(eval_df, eval_file)
    save_dataset(test_df, test_file)
    print("Length of files:", len(train_df), len(eval_df), len(test_df))
    
from datasets import Dataset
# Convert DataFrame to Dataset
train_dataset = Dataset.from_pandas(train_df)
eval_dataset = Dataset.from_pandas(eval_df)

# Tokenize the datasets
train_tokenized = train_dataset.map(tokenize_function, batched=True)
eval_tokenized = eval_dataset.map(tokenize_function, batched=True)

import numpy as np
from datasets import load_metric
import evaluate

accuracy_metric = evaluate.load("accuracy")
precision_metric = evaluate.load("precision", config="multiclass")
recall_metric = evaluate.load("recall", config="multiclass")
f1_metric = evaluate.load("f1", config="multiclass")


Generating new datasets
Length of files: 935 201 201


Map: 100%|██████████| 935/935 [00:02<00:00, 333.42 examples/s]
Map: 100%|██████████| 201/201 [00:00<00:00, 319.18 examples/s]


### Generate Weighted Loss

In [7]:
import torch
from sklearn.utils.class_weight import compute_class_weight

# Example class counts: You might need to calculate or provide actual counts
class_counts = train_df['label'].value_counts().sort_index()
print("Class counts:", class_counts)

# Compute class weights
class_weights = compute_class_weight('balanced', classes=np.unique(train_df['label']), y=train_df['label'])
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float)
class_weights_tensor = class_weights_tensor.to(device)
# Print class weights to verify
print("Class Weights:", class_weights_tensor)

Class counts: label
0    490
1    136
2    309
Name: count, dtype: int64
Class Weights: tensor([0.6361, 2.2917, 1.0086], device='cuda:0')


### Custom Weighted Loss Function

In [8]:
import torch.nn as nn

class CustomModel(nn.Module):
    def __init__(self, model, class_weights):
        super(CustomModel, self).__init__()
        self.model = model  # this should be an instance of a Hugging Face PreTrainedModel
        self.class_weights = class_weights

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        if labels is not None:
            self.class_weights = self.class_weights.to(self.model.device)
            loss_fct = nn.CrossEntropyLoss(weight=self.class_weights)
            loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
            return (loss, logits)
        return logits

    def save_pretrained(self, save_directory):
        """Save the contained PreTrainedModel to a directory."""
        self.model.save_pretrained(save_directory)

    def from_pretrained(self, load_directory):
        """Load the contained PreTrainedModel from a directory."""
        self.model = self.model.from_pretrained(load_directory)

### Train Weighted Loss Model

In [9]:
# !pip install accelerate -U

In [10]:
from transformers import AutoModelForSequenceClassification

# Load the pre-trained model
original_model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)

# Initialize the model and ensure it and its weights are on the correct device
model_with_loss = CustomModel(original_model.to(device), class_weights_tensor.to(device))

from transformers import Trainer, TrainingArguments

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=6,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    evaluation_strategy="epoch",
)

# Initialize Trainer with the custom model
trainer = Trainer(
    model=model_with_loss,  # Ensure this is your custom model accepting weights
    args=training_args,
    train_dataset=train_tokenized,  # Ensure datasets are correctly tokenized
    eval_dataset=eval_tokenized,
    compute_metrics=compute_metrics,  # Custom metrics function if needed
    callbacks=[CustomSaveCallback('./weighted_checkpoints', training_args.per_device_train_batch_size, tokenizer)]
)

# Train the model
trainer.train()

# Evaluate the model
eval_results = trainer.evaluate()
print("Evaluation results:", eval_results)

import datetime
# Get current datetime to use as a unique identifier
current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

# Define model parameters to include in the filename
num_epochs = training_args.num_train_epochs
batch_size = training_args.per_device_train_batch_size

# Define the directory using the timestamp and model parameters
model_dir = f'./models/weighted_model_epochs-{num_epochs}_batch-{batch_size}_{current_time}'
tokenizer_dir = f'./models/weighted_tokenizer_epochs-{num_epochs}_batch-{batch_size}_{current_time}'

# Save the model and tokenizer with detailed names
model.save_pretrained(model_dir)
tokenizer.save_pretrained(tokenizer_dir)

print(f"Model saved in directory: {model_dir}")
print(f"Tokenizer saved in directory: {tokenizer_dir}")

# Print evaluation results
print("Evaluation results:", eval_results)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,F1 Label 0,F1 Label 1,F1 Label 2
1,No log,1.098872,0.358209,0.236879,0.360009,0.262821,0.538462,0.25,0.0
2,No log,1.066049,0.482587,0.432145,0.433361,0.374875,0.65812,0.314607,0.151899
3,No log,0.534482,0.835821,0.81987,0.814909,0.817295,0.867925,0.77193,0.81203
4,No log,0.697171,0.825871,0.810222,0.832942,0.81847,0.844221,0.8,0.811189
5,0.813900,0.73505,0.865672,0.884474,0.845938,0.862489,0.883721,0.867925,0.835821
6,0.813900,0.655482,0.880597,0.89443,0.869439,0.879733,0.895238,0.888889,0.855072


Saved model and tokenizer to ./weighted_checkpoints/model_checkpoint_epoch-1.0_batch-8


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved model and tokenizer to ./weighted_checkpoints/model_checkpoint_epoch-2.0_batch-8
Saved model and tokenizer to ./weighted_checkpoints/model_checkpoint_epoch-3.0_batch-8
Saved model and tokenizer to ./weighted_checkpoints/model_checkpoint_epoch-4.0_batch-8
Saved model and tokenizer to ./weighted_checkpoints/model_checkpoint_epoch-5.0_batch-8
Saved model and tokenizer to ./weighted_checkpoints/model_checkpoint_epoch-6.0_batch-8


Evaluation results: {'eval_loss': 0.6554823517799377, 'eval_accuracy': 0.8805970149253731, 'eval_precision': 0.8944301994301993, 'eval_recall': 0.8694393512220447, 'eval_f1': 0.8797331492983668, 'eval_f1_label_0': 0.8952380952380953, 'eval_f1_label_1': 0.8888888888888888, 'eval_f1_label_2': 0.855072463768116, 'eval_runtime': 11.9856, 'eval_samples_per_second': 16.77, 'eval_steps_per_second': 2.169, 'epoch': 6.0}
Model saved in directory: ./models/weighted_model_epochs-6_batch-8_2024-06-06_15-17-33
Tokenizer saved in directory: ./models/weighted_tokenizer_epochs-6_batch-8_2024-06-06_15-17-33
Evaluation results: {'eval_loss': 0.6554823517799377, 'eval_accuracy': 0.8805970149253731, 'eval_precision': 0.8944301994301993, 'eval_recall': 0.8694393512220447, 'eval_f1': 0.8797331492983668, 'eval_f1_label_0': 0.8952380952380953, 'eval_f1_label_1': 0.8888888888888888, 'eval_f1_label_2': 0.855072463768116, 'eval_runtime': 11.9856, 'eval_samples_per_second': 16.77, 'eval_steps_per_second': 2.169, 

### Train Standard Model

In [11]:
# Define training arguments and initialize the trainer
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=6,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    evaluation_strategy="epoch",
)

# Initialize Trainer with the custom callback
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=eval_tokenized,
    compute_metrics=compute_metrics,
    callbacks=[CustomSaveCallback('./unique_checkpoints', training_args.per_device_train_batch_size, tokenizer)]  # Pass the tokenizer here
)


# Optional: Evaluate the model after training is complete
eval_results = trainer.evaluate()
print("Evaluation results:", eval_results)

# Train the model
trainer.train()

# Evaluate the model
eval_results = trainer.evaluate()


# Evaluate the model
eval_results = trainer.evaluate()
print("Evaluation results:", eval_results)

import datetime
# Get current datetime to use as a unique identifier
current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

# Define model parameters to include in the filename
num_epochs = training_args.num_train_epochs
batch_size = training_args.per_device_train_batch_size

# Define the directory using the timestamp and model parameters
model_dir = f'./models/standard_model_epochs-{num_epochs}_batch-{batch_size}_{current_time}'
tokenizer_dir = f'./models/standard_tokenizer_epochs-{num_epochs}_batch-{batch_size}_{current_time}'

# Save the model and tokenizer with detailed names
model.save_pretrained(model_dir)
tokenizer.save_pretrained(tokenizer_dir)

print(f"Model saved in directory: {model_dir}")
print(f"Tokenizer saved in directory: {tokenizer_dir}")

# Print evaluation results
print("Evaluation results:", eval_results)

Evaluation results: {'eval_loss': 1.1028903722763062, 'eval_accuracy': 0.3681592039800995, 'eval_precision': 0.34761904761904755, 'eval_recall': 0.35113989603254403, 'eval_f1': 0.29903267278450146, 'eval_f1_label_0': 0.3013698630136986, 'eval_f1_label_1': 0.12, 'eval_f1_label_2': 0.47572815533980584, 'eval_runtime': 8.1016, 'eval_samples_per_second': 24.81, 'eval_steps_per_second': 3.209}


Epoch,Training Loss,Validation Loss


KeyboardInterrupt: 

### Train Standard with Larger Batch Size

In [None]:
# Define training arguments and initialize the trainer
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=6,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    evaluation_strategy="epoch",
)

# Initialize Trainer with the custom callback
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=eval_tokenized,
    compute_metrics=compute_metrics,
    callbacks=[CustomSaveCallback('./unique_checkpoints', training_args.per_device_train_batch_size, tokenizer)]  # Pass the tokenizer here
)


# Optional: Evaluate the model after training is complete
eval_results = trainer.evaluate()
print("Evaluation results:", eval_results)

# Train the model
trainer.train()

# Evaluate the model
eval_results = trainer.evaluate()

Evaluation results: {'eval_loss': 0.7244259119033813, 'eval_accuracy': 0.8407960199004975, 'eval_precision': 0.854400236988424, 'eval_recall': 0.8154216202361941, 'eval_f1': 0.8307566100839275, 'eval_f1_label_0': 0.8584905660377359, 'eval_f1_label_1': 0.8076923076923077, 'eval_f1_label_2': 0.8260869565217391, 'eval_runtime': 1.8258, 'eval_samples_per_second': 110.088, 'eval_steps_per_second': 7.12}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,F1 Label 0,F1 Label 1,F1 Label 2
1,No log,0.746253,0.845771,0.848554,0.824284,0.832476,0.859903,0.792453,0.84507
2,No log,0.853981,0.840796,0.845436,0.815422,0.827687,0.858491,0.792453,0.832117
3,No log,0.882762,0.835821,0.850442,0.817995,0.82802,0.84878,0.807692,0.827586
4,No log,0.956428,0.840796,0.853388,0.817327,0.830544,0.861244,0.807692,0.822695
5,No log,0.94556,0.845771,0.847619,0.818566,0.830193,0.872038,0.792453,0.826087
6,No log,1.03954,0.845771,0.857276,0.820472,0.83431,0.866667,0.807692,0.828571


Saved model and tokenizer to ./unique_checkpoints/model_checkpoint_epoch-1.0_batch-16
Saved model and tokenizer to ./unique_checkpoints/model_checkpoint_epoch-2.0_batch-16
Saved model and tokenizer to ./unique_checkpoints/model_checkpoint_epoch-3.0_batch-16
Saved model and tokenizer to ./unique_checkpoints/model_checkpoint_epoch-4.0_batch-16
Saved model and tokenizer to ./unique_checkpoints/model_checkpoint_epoch-5.0_batch-16
Saved model and tokenizer to ./unique_checkpoints/model_checkpoint_epoch-6.0_batch-16


### Save Final Model Version

In [None]:
import datetime
# Get current datetime to use as a unique identifier
current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

# Define model parameters to include in the filename
num_epochs = training_args.num_train_epochs
batch_size = training_args.per_device_train_batch_size

# Define the directory using the timestamp and model parameters
model_dir = f'./models/model_epochs-{num_epochs}_batch-{batch_size}_{current_time}'
tokenizer_dir = f'./models/tokenizer_epochs-{num_epochs}_batch-{batch_size}_{current_time}'

# Save the model and tokenizer with detailed names
model.save_pretrained(model_dir)
tokenizer.save_pretrained(tokenizer_dir)

print(f"Model saved in directory: {model_dir}")
print(f"Tokenizer saved in directory: {tokenizer_dir}")

# Print evaluation results
print("Evaluation results:", eval_results)

Model saved in directory: ./models/model_epochs-6_batch-16_2024-06-05_14-04-18
Tokenizer saved in directory: ./models/tokenizer_epochs-6_batch-16_2024-06-05_14-04-18
Evaluation results: {'eval_loss': 0.5747907757759094, 'eval_accuracy': 0.9003831417624522, 'eval_precision': 0.7691249388923808, 'eval_recall': 0.8673491673491673, 'eval_f1': 0.8080112044817928, 'eval_f1_label_0': 0.9411764705882353, 'eval_f1_label_1': 0.64, 'eval_f1_label_2': 0.8428571428571429, 'eval_runtime': 2.1672, 'eval_samples_per_second': 120.432, 'eval_steps_per_second': 7.844, 'epoch': 6.0}


In [None]:
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# Compute the confusion matrix
cm = confusion_matrix(true_labels, predictions, labels=unique_labels)
display_labels = [reversed_label_map[label] for label in unique_labels]

# Display the confusion matrix
fig, ax = plt.subplots(figsize=(10, 10))
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=display_labels)
disp.plot(values_format='d', cmap='Blues', ax=ax)
plt.title('Confusion Matrix')
plt.show()

NameError: name 'true_labels' is not defined