In [8]:
# !pip install evaluate

In [7]:
import pandas as pd
import json

with open('human_annotated_dataset.json', 'r') as f:
    json_data = f.read()

# Convert JSON to DataFrame
data = json.loads(json_data)
df = pd.DataFrame(data)

# Map labels to numerical values
label_map = {'hedge': 2, 'authority': 1, 'none': 0}

# Function to create separate rows for each marker
def create_marker_rows(row):
    statement = row['statement']
    matched_terms = row['matched_terms']
    rows = []
    
    for term, details in matched_terms.items():
        term_upper = term.upper()
        label = details['correct']
        
        # Add start and end markers
        context = statement.replace(f'<{term_upper}>', f'[START] {term} [END]')
        context = context.replace('<', '').replace('>', '')
        if row['previous_statement'] == "None":
            broad_context = "Speaker 1: " + context
        else:
            broad_context = "Speaker 1: " + row['previous_statement'] + " Speaker 2: " + context
        rows.append({
            'transcript_id': row['transcript_id'],
            'statement_id': row['statement_id'],
            'context': broad_context,
            'label': label_map[label]
        })
    return rows

# Create a new DataFrame with separate rows for each marker
new_rows = []
for _, row in df.iterrows():
    new_rows.extend(create_marker_rows(row))

new_df = pd.DataFrame(new_rows)

# Check the resulting DataFrame
print(new_df[['context', 'label']])

                                                context  label
0     Speaker 1: And so you'll see states look to Ma...      0
1     Speaker 1: We have changed. The thing is thing...      2
2     Speaker 1: And before we head to break, we hav...      0
3     Speaker 1: Too bizarre? Speaker 2: Yes, yes. I...      0
4     Speaker 1: Mr. President, yesterday Senators L...      0
...                                                 ...    ...
1156  Speaker 1: And the sales pitch is different th...      0
1157  Speaker 1: Everything about that trip was tele...      0
1158  Speaker 1: Earlier, we talked with a Utah cong...      0
1159  Speaker 1: He does. He's doubled and he's trip...      0
1160  Speaker 1: (Through interpreter) The boys here...      0

[1161 rows x 2 columns]


### Pull in Random Sampling of "Non-PPRMs"

In [8]:
import json
import pandas as pd
import random
import uuid

# Load the non-matching utterances dataset
with open('non_matching_utterances_sample.json', 'r') as f:
    non_matching_data = json.load(f)
non_matching_df = pd.DataFrame(non_matching_data)

# Adjust the function to ensure it correctly handles word selection
def augment_non_matching_rows(row):
    rows = []
    statement = row['statement']
    words = statement.split()
    random_sample_num = min(3, len(words))
    random_indices = random.sample(range(len(words)), random_sample_num)
    for i in range(random_sample_num):
        idx = random_indices[i]
        statement = words.copy()
        statement[idx] = '[START] ' + statement[idx] + ' [END]'
        context = ' '.join(statement)
        if row['previous_statement'] == "None":
            broad_context = "Speaker 1: " + context
        else:
            broad_context = "Speaker 1: " + row['previous_statement'] + " Speaker 2: " + context
        rows.append({
            'transcript_id': row['transcript_id'],
            'statement_id': str(uuid.uuid4()),
            'context': broad_context,
            'label': label_map['none']
        })
    return rows

# Create a new DataFrame with separate rows for each marker
non_match_rows = []
for _, row in non_matching_df.iterrows():
    non_match_rows.extend(augment_non_matching_rows(row))

other_df = pd.DataFrame(non_match_rows)

# Combine the existing DataFrame with the augmented data
# Ensure new_df is already defined and contains initial data
augmented_df = pd.concat([new_df, other_df], ignore_index=True)

# Check the resulting DataFrame
print(augmented_df[['context', 'label']])
print(augmented_df.head()["context"][3])
# for k, v in augmented_df[:1].items():
#     print(v)

print("Total Samples:", len(augmented_df))

                                                context  label
0     Speaker 1: And so you'll see states look to Ma...      0
1     Speaker 1: We have changed. The thing is thing...      2
2     Speaker 1: And before we head to break, we hav...      0
3     Speaker 1: Too bizarre? Speaker 2: Yes, yes. I...      0
4     Speaker 1: Mr. President, yesterday Senators L...      0
...                                                 ...    ...
3852  Speaker 1: And you push back on them. And you ...      0
3853  Speaker 1: And you push back on them. And you ...      0
3854  Speaker 1: It's more than plausible.  It happe...      0
3855  Speaker 1: It's more than plausible.  It happe...      0
3856  Speaker 1: It's more than plausible.  It happe...      0

[3857 rows x 2 columns]
Speaker 1: Too bizarre? Speaker 2: Yes, yes. I THINK she's ridiculous and be more realistic. You [START] know [END], we are people, we live on this planet. We want to survive.
Total Samples: 3857


In [9]:
# Tokenize and Train the Model
import torch
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments

# Load tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)  # Adjust num_labels to 3

device='cuda' if torch.cuda.is_available() else 'cpu'
# device='cpu'
model.to(device)
print("device:", device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


device: cuda


In [4]:
from transformers import TrainerCallback, TrainerState, TrainerControl
import os
import datetime

class CustomSaveCallback(TrainerCallback):
    "A custom callback that saves the model at the end of each epoch with a unique name."
    def __init__(self, save_path, batch_size, tokenizer):
        self.save_path = save_path
        self.batch_size = batch_size
        self.tokenizer = tokenizer  # Save the tokenizer as an instance variable

    def on_epoch_end(self, args, state: TrainerState, control: TrainerControl, **kwargs):
        # Format the directory name with the epoch number and batch size
        epoch_dir = f"{self.save_path}/model_checkpoint_epoch-{state.epoch}_batch-{self.batch_size}"
        if not os.path.exists(epoch_dir):
            os.makedirs(epoch_dir)
        # Save the model and tokenizer in this directory
        kwargs['model'].save_pretrained(epoch_dir)
        self.tokenizer.save_pretrained(epoch_dir)  # Use the instance variable
        print(f"Saved model and tokenizer to {epoch_dir}")

# Define a function to compute metrics
def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=1)

    # Compute metrics
    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)
    precision = precision_metric.compute(predictions=predictions, references=labels, average="macro")
    recall = recall_metric.compute(predictions=predictions, references=labels, average="macro")
    f1 = f1_metric.compute(predictions=predictions, references=labels, average="macro")

    # Compute F1 for each label individually
    f1_per_class = f1_metric.compute(predictions=predictions, references=labels, average=None)
    label_f1_scores = f1_per_class['f1']

    return {
        'accuracy': accuracy['accuracy'],
        'precision': precision['precision'],
        'recall': recall['recall'],
        'f1': f1['f1'],
        'f1_label_0': label_f1_scores[0],
        'f1_label_1': label_f1_scores[1],
        'f1_label_2': label_f1_scores[2]
    }

### Dataset - Train/Eval/Test, Evaluate Metrics

In [18]:
# regen_data = True
import pickle

import logging
from sklearn.model_selection import train_test_split

# Set the logging level to ERROR to reduce output clutter
logging.basicConfig(level=logging.ERROR)

# Tokenization function
def tokenize_function(examples):
    return tokenizer(examples['context'], padding="max_length", truncation=True, max_length=512)


# File paths for saved datasets
train_file = 'train_dataset.pkl'
eval_file = 'eval_dataset.pkl'
test_file = 'test_dataset.pkl'

# Function to save a dataset
def save_dataset(data, filename):
    with open(filename, 'wb') as file:
        pickle.dump(data, file)

# Function to load a dataset
def load_dataset(filename):
    with open(filename, 'rb') as file:
        return pickle.load(file)
# Check if regeneration of data is needed
regen_data = True
if regen_data:
    # Split the data into training and evaluation sets
    train_eval_df, test_df = train_test_split(augmented_df, test_size=0.15, random_state=42, stratify=augmented_df['label'])
    # Ensure to use train_eval_df for further splitting and stratification
    train_df, eval_df = train_test_split(train_eval_df, test_size=0.1764705882352941, random_state=42, stratify=train_eval_df['label'])
    save_dataset(train_df, train_file)
    save_dataset(eval_df, eval_file)
    save_dataset(test_df, test_file)
    print("Length of files:", len(train_df), len(eval_df), len(test_df))
elif os.path.exists(train_file) and os.path.exists(eval_file) and os.path.exists(test_file):
    # Load the datasets
    train_df = load_dataset(train_file)
    eval_df = load_dataset(eval_file)
    test_df = load_dataset(test_file)
    print("Length of files:", len(train_df), len(eval_df), len(test_df))
else:
    # Split the data into training and evaluation sets
    train_eval_df, test_df = train_test_split(augmented_df, test_size=0.15, random_state=42, stratify = augmented_df['label'])
    train_df, eval_df = train_test_split(train_eval_df, test_size=0.1764705882352941, random_state=42, stratify = train_eval_df['label'])
    save_dataset(train_df, train_file)
    save_dataset(eval_df, eval_file)
    save_dataset(test_df, test_file)
    print("Length of files:", len(train_df), len(eval_df), len(test_df))
    
from datasets import Dataset
# Convert DataFrame to Dataset
train_dataset = Dataset.from_pandas(train_df)
eval_dataset = Dataset.from_pandas(eval_df)

# Tokenize the datasets
train_tokenized = train_dataset.map(tokenize_function, batched=True)
eval_tokenized = eval_dataset.map(tokenize_function, batched=True)

import numpy as np
from datasets import load_metric
import evaluate

accuracy_metric = evaluate.load("accuracy")
precision_metric = evaluate.load("precision", config="multiclass")
recall_metric = evaluate.load("recall", config="multiclass")
f1_metric = evaluate.load("f1", config="multiclass")


Length of files: 2699 579 579


Map: 100%|██████████| 2699/2699 [00:02<00:00, 1151.59 examples/s]
Map: 100%|██████████| 579/579 [00:00<00:00, 1146.87 examples/s]


### Generate Weighted Loss

In [17]:
import torch
from sklearn.utils.class_weight import compute_class_weight

# Example class counts: You might need to calculate or provide actual counts
class_counts = train_df['label'].value_counts().sort_index()
print("Class counts:", class_counts)

# Compute class weights
class_weights = compute_class_weight('balanced', classes=np.unique(train_df['label']), y=train_df['label'])
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float)
class_weights_tensor = class_weights_tensor.to(device)
# Print class weights to verify
print("Class Weights:", class_weights_tensor)

Class counts: 0    2345
1      45
2     309
Name: label, dtype: int64
Class Weights: tensor([ 0.3837, 19.9926,  2.9115], device='cuda:0')


### Custom Weighted Loss Function

In [25]:
class CustomModel(nn.Module):
    def __init__(self, model, class_weights):
        super(CustomModel, self).__init__()
        self.model = model  # this should be an instance of a Hugging Face PreTrainedModel
        self.class_weights = class_weights

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        if labels is not None:
            self.class_weights = self.class_weights.to(self.model.device)
            loss_fct = nn.CrossEntropyLoss(weight=self.class_weights)
            loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
            return (loss, logits)
        return logits

    def save_pretrained(self, save_directory):
        """Save the contained PreTrainedModel to a directory."""
        self.model.save_pretrained(save_directory)

    def from_pretrained(self, load_directory):
        """Load the contained PreTrainedModel from a directory."""
        self.model = self.model.from_pretrained(load_directory)

### Train Weighted Loss Model

In [29]:
# Initialize the model and ensure it and its weights are on the correct device
model_with_loss = CustomModel(original_model.to(device), class_weights_tensor.to(device))

from transformers import Trainer, TrainingArguments

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=6,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    evaluation_strategy="epoch",
)

# Initialize Trainer with the custom model
trainer = Trainer(
    model=model_with_loss,  # Ensure this is your custom model accepting weights
    args=training_args,
    train_dataset=train_tokenized,  # Ensure datasets are correctly tokenized
    eval_dataset=eval_tokenized,
    compute_metrics=compute_metrics,  # Custom metrics function if needed
    callbacks=[CustomSaveCallback('./weighted_checkpoints', training_args.per_device_train_batch_size, tokenizer)]
)

# Train the model
trainer.train()

# Evaluate the model
eval_results = trainer.evaluate()
print("Evaluation results:", eval_results)

import datetime
# Get current datetime to use as a unique identifier
current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

# Define model parameters to include in the filename
num_epochs = training_args.num_train_epochs
batch_size = training_args.per_device_train_batch_size

# Define the directory using the timestamp and model parameters
model_dir = f'./models/weighted_model_epochs-{num_epochs}_batch-{batch_size}_{current_time}'
tokenizer_dir = f'./models/weighted_tokenizer_epochs-{num_epochs}_batch-{batch_size}_{current_time}'

# Save the model and tokenizer with detailed names
model.save_pretrained(model_dir)
tokenizer.save_pretrained(tokenizer_dir)

print(f"Model saved in directory: {model_dir}")
print(f"Tokenizer saved in directory: {tokenizer_dir}")

# Print evaluation results
print("Evaluation results:", eval_results)

Epoch,Training Loss,Validation Loss,Model Name,Accuracy,Precision,Recall,F1,F1 Label 0,F1 Label 1,F1 Label 2
1,No log,0.726545,weighted_loss,0.965026,0.859481,0.779773,0.815642,0.981604,0.583333,0.881988
2,0.157500,1.051479,weighted_loss,0.936528,0.559258,0.602707,0.578981,0.970105,0.0,0.766839
3,0.392900,0.472366,weighted_loss,0.954663,0.775643,0.864396,0.804166,0.974589,0.555556,0.882353
4,0.224600,0.86139,weighted_loss,0.965026,0.876587,0.771084,0.812215,0.981495,0.571429,0.883721
5,0.224600,0.628369,weighted_loss,0.96114,0.804587,0.841735,0.817314,0.978439,0.580645,0.892857
6,0.155900,0.598807,weighted_loss,0.962435,0.821277,0.838941,0.827199,0.979955,0.62069,0.880952


Saved model and tokenizer to ./weighted_checkpoints/model_checkpoint_epoch-1.0_batch-8


Trainer is attempting to log a value of "weighted_loss" of type <class 'str'> for key "eval/model_name" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


Saved model and tokenizer to ./weighted_checkpoints/model_checkpoint_epoch-2.0_batch-8


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Trainer is attempting to log a value of "weighted_loss" of type <class 'str'> for key "eval/model_name" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


Saved model and tokenizer to ./weighted_checkpoints/model_checkpoint_epoch-3.0_batch-8


Trainer is attempting to log a value of "weighted_loss" of type <class 'str'> for key "eval/model_name" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


Saved model and tokenizer to ./weighted_checkpoints/model_checkpoint_epoch-4.0_batch-8


Trainer is attempting to log a value of "weighted_loss" of type <class 'str'> for key "eval/model_name" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


Saved model and tokenizer to ./weighted_checkpoints/model_checkpoint_epoch-5.0_batch-8


Trainer is attempting to log a value of "weighted_loss" of type <class 'str'> for key "eval/model_name" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


Saved model and tokenizer to ./weighted_checkpoints/model_checkpoint_epoch-6.0_batch-8


Trainer is attempting to log a value of "weighted_loss" of type <class 'str'> for key "eval/model_name" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


Trainer is attempting to log a value of "weighted_loss" of type <class 'str'> for key "eval/model_name" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


Evaluation results: {'eval_loss': 0.5988073945045471, 'eval_model_name': 'weighted_loss', 'eval_accuracy': 0.9624352331606217, 'eval_precision': 0.8212771203155818, 'eval_recall': 0.8389411135312774, 'eval_f1': 0.8271991642316502, 'eval_f1_label_0': 0.9799554565701559, 'eval_f1_label_1': 0.6206896551724138, 'eval_f1_label_2': 0.8809523809523809, 'eval_runtime': 6.3406, 'eval_samples_per_second': 121.755, 'eval_steps_per_second': 15.298, 'epoch': 6.0}


### Train Standard Model

In [14]:
# Define training arguments and initialize the trainer
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=6,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    evaluation_strategy="epoch",
)

# Initialize Trainer with the custom callback
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=eval_tokenized,
    compute_metrics=compute_metrics,
    callbacks=[CustomSaveCallback('./unique_checkpoints', training_args.per_device_train_batch_size, tokenizer)]  # Pass the tokenizer here
)


# Optional: Evaluate the model after training is complete
eval_results = trainer.evaluate()
print("Evaluation results:", eval_results)

# Train the model
trainer.train()

# Evaluate the model
eval_results = trainer.evaluate()


# Evaluate the model
eval_results = trainer.evaluate()
print("Evaluation results:", eval_results)

import datetime
# Get current datetime to use as a unique identifier
current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

# Define model parameters to include in the filename
num_epochs = training_args.num_train_epochs
batch_size = training_args.per_device_train_batch_size

# Define the directory using the timestamp and model parameters
model_dir = f'./models/standard_model_epochs-{num_epochs}_batch-{batch_size}_{current_time}'
tokenizer_dir = f'./models/standard_tokenizer_epochs-{num_epochs}_batch-{batch_size}_{current_time}'

# Save the model and tokenizer with detailed names
model.save_pretrained(model_dir)
tokenizer.save_pretrained(tokenizer_dir)

print(f"Model saved in directory: {model_dir}")
print(f"Tokenizer saved in directory: {tokenizer_dir}")

# Print evaluation results
print("Evaluation results:", eval_results)

Map: 100%|██████████| 3085/3085 [00:02<00:00, 1116.87 examples/s]
Map: 100%|██████████| 772/772 [00:00<00:00, 1100.17 examples/s]


Evaluation results: {'eval_loss': 0.19305863976478577, 'eval_accuracy': 0.9637305699481865, 'eval_precision': 0.8137048348023298, 'eval_recall': 0.8526023157170698, 'eval_f1': 0.8309050805087862, 'eval_f1_label_0': 0.9805970149253731, 'eval_f1_label_1': 0.6206896551724138, 'eval_f1_label_2': 0.8914285714285715, 'eval_runtime': 6.9171, 'eval_samples_per_second': 111.607, 'eval_steps_per_second': 14.023}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,F1 Label 0,F1 Label 1,F1 Label 2
1,No log,0.254314,0.957254,0.766076,0.676215,0.71316,0.977223,0.285714,0.876543
2,0.062500,0.244236,0.957254,0.850391,0.651071,0.695954,0.977256,0.25,0.860606
3,0.109600,0.235526,0.966321,0.865899,0.753019,0.792925,0.982222,0.5,0.896552
4,0.066300,0.246897,0.963731,0.828704,0.852602,0.839515,0.979136,0.642857,0.896552
5,0.066300,0.213502,0.962435,0.829844,0.864085,0.842272,0.979198,0.666667,0.880952
6,0.043500,0.233005,0.963731,0.805823,0.84602,0.820236,0.980655,0.580645,0.899408


Saved model and tokenizer to ./unique_checkpoints/model_checkpoint_epoch-1.0_batch-8
Saved model and tokenizer to ./unique_checkpoints/model_checkpoint_epoch-2.0_batch-8
Saved model and tokenizer to ./unique_checkpoints/model_checkpoint_epoch-3.0_batch-8
Saved model and tokenizer to ./unique_checkpoints/model_checkpoint_epoch-4.0_batch-8
Saved model and tokenizer to ./unique_checkpoints/model_checkpoint_epoch-5.0_batch-8
Saved model and tokenizer to ./unique_checkpoints/model_checkpoint_epoch-6.0_batch-8


Model saved in directory: ./models/model_epochs-6_batch-8_2024-06-05_12-17-29
Tokenizer saved in directory: ./models/tokenizer_epochs-6_batch-8_2024-06-05_12-17-29
Evaluation results: {'eval_loss': 0.23300491273403168, 'eval_accuracy': 0.9637305699481865, 'eval_precision': 0.8058230758412366, 'eval_recall': 0.8460201001184608, 'eval_f1': 0.8202360690729177, 'eval_f1_label_0': 0.9806547619047619, 'eval_f1_label_1': 0.5806451612903226, 'eval_f1_label_2': 0.8994082840236687, 'eval_runtime': 6.7885, 'eval_samples_per_second': 113.722, 'eval_steps_per_second': 14.289, 'epoch': 6.0}


### Train Standard with Larger Batch Size

In [None]:
# Define training arguments and initialize the trainer
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=6,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    evaluation_strategy="epoch",
)

# Initialize Trainer with the custom callback
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=eval_tokenized,
    compute_metrics=compute_metrics,
    callbacks=[CustomSaveCallback('./unique_checkpoints', training_args.per_device_train_batch_size, tokenizer)]  # Pass the tokenizer here
)


# Optional: Evaluate the model after training is complete
eval_results = trainer.evaluate()
print("Evaluation results:", eval_results)

# Train the model
trainer.train()

# Evaluate the model
eval_results = trainer.evaluate()

### Save Final Model Version

In [None]:
import datetime
# Get current datetime to use as a unique identifier
current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

# Define model parameters to include in the filename
num_epochs = training_args.num_train_epochs
batch_size = training_args.per_device_train_batch_size

# Define the directory using the timestamp and model parameters
model_dir = f'./models/model_epochs-{num_epochs}_batch-{batch_size}_{current_time}'
tokenizer_dir = f'./models/tokenizer_epochs-{num_epochs}_batch-{batch_size}_{current_time}'

# Save the model and tokenizer with detailed names
model.save_pretrained(model_dir)
tokenizer.save_pretrained(tokenizer_dir)

print(f"Model saved in directory: {model_dir}")
print(f"Tokenizer saved in directory: {tokenizer_dir}")

# Print evaluation results
print("Evaluation results:", eval_results)

In [6]:
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# Compute the confusion matrix
cm = confusion_matrix(true_labels, predictions, labels=unique_labels)
display_labels = [reversed_label_map[label] for label in unique_labels]

# Display the confusion matrix
fig, ax = plt.subplots(figsize=(10, 10))
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=display_labels)
disp.plot(values_format='d', cmap='Blues', ax=ax)
plt.title('Confusion Matrix')
plt.show()

NameError: name 'true_labels' is not defined