In [1]:
!pip install --upgrade tensorflow
!pip install iterstrat

[31mERROR: Could not find a version that satisfies the requirement iterstrat (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for iterstrat[0m[31m
[0m

Preprocessing Annotations

In [2]:
ENTITY_TAXONOMY = {
    "Antagonist": ["Bigot", "Conspirator", "Corrupt", "Deceiver", "Terrorist", "Traitor", "Tyrant"],
    "Protagonist": ["Guardian", "Martyr", "Peacemaker", "Rebel", "Virtuous"],
    "Innocent": ["Exploited", "Victim", "Underdog"]
}

In [3]:
import pandas as pd
import torch

# Path to the annotations file
annotations_path = "EN/subtask-1-annotations.txt"

# Read the annotations file manually and ensure all columns are handled
with open(annotations_path, "r") as f:
    data = []
    for line in f:
        parts = line.strip().split("\t")
        filename, entity, start_idx, end_idx, main_role = parts[:5]
        fine_roles = parts[5:]  # Remaining parts are fine-grained roles
        data.append([filename, entity, int(start_idx), int(end_idx), main_role, fine_roles])

# Convert to DataFrame
columns = ["filename", "entity", "start_idx", "end_idx", "main_role", "fine_roles"]
annotations = pd.DataFrame(data, columns=columns)

# Setup device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


# Display the first few rows
annotations.head()

Using device: cuda


Unnamed: 0,filename,entity,start_idx,end_idx,main_role,fine_roles
0,EN_CC_100013.txt,Bill Gates,93,102,Antagonist,"[Deceiver, Corrupt]"
1,EN_CC_100013.txt,BBC,1860,1862,Antagonist,[Deceiver]
2,EN_CC_100013.txt,Jeffrey Epstein,2005,2019,Antagonist,[Corrupt]
3,EN_UA_300009.txt,Fail Alsynov,176,187,Protagonist,"[Rebel, Martyr]"
4,EN_UA_300009.txt,Bashkir people,1616,1629,Innocent,[Victim]


Raw Documents

In [4]:
import os

# Path to the raw documents folder
raw_documents_path = "EN/raw-documents"

# Load all documents into a dictionary
documents = {}
for filename in os.listdir(raw_documents_path):
    filepath = os.path.join(raw_documents_path, filename)
    with open(filepath, "r", encoding="utf-8") as f:
        documents[filename] = f.read()

# Map annotations to document content
def extract_context(row, window=225):
    """Extracts the context around an entity from the document."""
    doc_text = documents[row["filename"]]
    start_idx, end_idx = row["start_idx"], row["end_idx"]

    # Define context window
    start_context = max(0, start_idx - window)
    end_context = min(len(doc_text), end_idx + window)

    # Highlight the entity in the context
    context = doc_text[start_context:start_idx] + "[ENTITY]" + doc_text[start_idx:end_idx] + "[/ENTITY]" + doc_text[end_idx:end_context]
    return context

# Apply the context extraction
annotations["context"] = annotations.apply(extract_context, axis=1)

# Display a sample row with context
annotations.head()

Unnamed: 0,filename,entity,start_idx,end_idx,main_role,fine_roles,context
0,EN_CC_100013.txt,Bill Gates,93,102,Antagonist,"[Deceiver, Corrupt]",Bill Gates Says He Is ‘The Solution’ To Climat...
1,EN_CC_100013.txt,BBC,1860,1862,Antagonist,[Deceiver],"s, according to data from the World Bank.\n\nE..."
2,EN_CC_100013.txt,Jeffrey Epstein,2005,2019,Antagonist,[Corrupt],by ‘conspiracy theorists’ for pushing vaccine...
3,EN_UA_300009.txt,Fail Alsynov,176,187,Protagonist,"[Rebel, Martyr]",Russia: Clashes erupt in Bashkortostan as righ...
4,EN_UA_300009.txt,Bashkir people,1616,1629,Innocent,[Victim],e. \n\nAlsynov contends the Bashkir words mean...


Prepare Training and Testing Datasets

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer, LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
import pandas as pd
import numpy as np

# Stratified split into training and testing sets
train_data, test_data = train_test_split(
    annotations,
    test_size=0.2,
    random_state=42,
    stratify=annotations["main_role"]
)

# Define the main role classification model
main_role_model = AutoModelForSequenceClassification.from_pretrained(
    "microsoft/deberta-base", num_labels=3  # Three main roles
)

main_role_model.to(device)
# Encode main roles
main_role_encoder = LabelEncoder()
train_data["main_role_encoded"] = main_role_encoder.fit_transform(train_data["main_role"])
test_data["main_role_encoded"] = main_role_encoder.transform(test_data["main_role"])

# Encode fine-grained roles
fine_role_binarizer = MultiLabelBinarizer()
train_data["fine_roles_encoded"] = fine_role_binarizer.fit_transform(train_data["fine_roles"]).tolist()
test_data["fine_roles_encoded"] = fine_role_binarizer.transform(test_data["fine_roles"]).tolist()

# Initialize the BERT tokenizer
tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-base")

def tokenize_context(data, tokenizer):
    """Tokenize the context column."""
    return tokenizer(
        data["context"].tolist(),
        padding=True,
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )

# Tokenize context using BERT embeddings
train_inputs = tokenize_context(train_data, tokenizer)
test_inputs = tokenize_context(test_data, tokenizer)

# # Generate TF-IDF embeddings
# def compute_tfidf_embeddings(data):
#     vectorizer = TfidfVectorizer(
#         max_features=5000,  # Limit to top 5000 features
#         ngram_range=(1, 5),  # Include unigrams and bigrams
#         stop_words="english"  # Remove common stopwords
#     )
#     tfidf_matrix = vectorizer.fit_transform(data["context"])
#     return tfidf_matrix.toarray(), vectorizer

# # Compute TF-IDF embeddings for train and test data
# tfidf_embeddings_train, tfidf_vectorizer = compute_tfidf_embeddings(train_data)
# tfidf_embeddings_test = tfidf_vectorizer.transform(test_data["context"]).toarray()

# # Combine TF-IDF and BERT embeddings
# def combine_embeddings(bert_embeddings, tfidf_embeddings):
#     """Concatenate BERT and TF-IDF embeddings."""
#     return np.hstack((bert_embeddings, tfidf_embeddings))

# # Extract BERT embeddings
# def extract_bert_embeddings(inputs, model):
#     """Extract embeddings (CLS token) from the model."""
#     with torch.no_grad():
#         # Identify the base model attribute dynamically
#         base_model = getattr(model, model.base_model_prefix)
#         outputs = base_model(inputs["input_ids"].to(device), attention_mask=inputs["attention_mask"].to(device))
#     return outputs.last_hidden_state[:, 0, :].cpu().numpy()  # CLS token

# # Assuming the DeBERTa model is already loaded as `main_role_model`
# train_bert_embeddings = extract_bert_embeddings(train_inputs, main_role_model)
# test_bert_embeddings = extract_bert_embeddings(test_inputs, main_role_model)

# # Combine BERT and TF-IDF embeddings
# train_combined_embeddings = combine_embeddings(train_bert_embeddings, tfidf_embeddings_train)
# test_combined_embeddings = combine_embeddings(test_bert_embeddings, tfidf_embeddings_test)

# Display information about the processed data
print("Main Role Encoder Classes:", main_role_encoder.classes_)
print("Fine Role Classes:", fine_role_binarizer.classes_)
# print("Shape of Train Combined Embeddings:", train_combined_embeddings.shape)
# print("Shape of Test Combined Embeddings:", test_combined_embeddings.shape)
print("Sample Main Role Label:", train_data["main_role_encoded"].iloc[0])
print("Sample Fine-Grained Role Label:", train_data["fine_roles_encoded"].iloc[0])


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Main Role Encoder Classes: ['Antagonist' 'Innocent' 'Protagonist']
Fine Role Classes: ['Bigot' 'Conspirator' 'Corrupt' 'Deceiver' 'Exploited'
 'Foreign Adversary' 'Guardian' 'Incompetent' 'Instigator' 'Martyr'
 'Peacemaker' 'Rebel' 'Saboteur' 'Scapegoat' 'Spy' 'Terrorist' 'Traitor'
 'Tyrant' 'Underdog' 'Victim' 'Virtuous']
Sample Main Role Label: 0
Sample Fine-Grained Role Label: [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


Code for Main Role Classifier

In [6]:
from transformers import AutoModelForSequenceClassification
from torch.optim import AdamW
from torch.nn import CrossEntropyLoss
import torch
from sklearn.utils.class_weight import compute_class_weight
import numpy as np


# Define the main role classification model
main_role_model = AutoModelForSequenceClassification.from_pretrained(
    "microsoft/deberta-base", num_labels=3  # Three main roles
)
main_role_model.to(device)

# Define optimizer and loss function
optimizer = AdamW(main_role_model.parameters(), lr=5e-5)

# Compute class weights for main role classification
class_weights_main = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(train_data["main_role_encoded"]),
    y=train_data["main_role_encoded"]
)
class_weights_main = torch.tensor(class_weights_main, dtype=torch.float).to(device)

# Update loss function for main role
loss_fn = CrossEntropyLoss(weight=class_weights_main)

# Training loop for main role classifier with gradient clipping
# Training loop for main role classifier with gradient clipping and early stopping
def train_main_role_model(model, train_inputs, train_labels, epochs=1, clip_norm=1.5, loss_threshold=0.01):

    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for i in range(0, len(train_inputs["input_ids"]), 16):  # Batch size of 32
            # Prepare inputs and labels for the current batch
            input_ids = train_inputs["input_ids"][i:i+16].to(device)
            attention_mask = train_inputs["attention_mask"][i:i+16].to(device)
            labels = torch.tensor(train_labels[i:i+16], dtype=torch.long).to(device)

            # Forward pass
            optimizer.zero_grad()
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)  # Pass labels here
            loss = outputs.loss  # Compute loss
            total_loss += loss.item()

            # Backward pass
            loss.backward()

            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=clip_norm)

            # Optimizer step
            optimizer.step()

        # Compute average loss for the epoch
        avg_loss = total_loss / len(train_inputs["input_ids"])
        print(f"Epoch {epoch + 1}, Loss: {avg_loss}")

        # Check if the average loss is below the threshold
        if avg_loss < loss_threshold:
            print(f"Early stopping triggered at epoch {epoch + 1}. Loss: {avg_loss:.4f}")
            break



# Train the main role model
train_labels_main = train_data["main_role_encoded"].tolist()
train_main_role_model(main_role_model, train_inputs, train_labels_main, epochs=30, clip_norm=3.5, loss_threshold=0.0001)

Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.05546349036867601
Epoch 2, Loss: 0.04655283172852802
Epoch 3, Loss: 0.032691068158750114
Epoch 4, Loss: 0.020891856769249387
Epoch 5, Loss: 0.007741831067990321
Epoch 6, Loss: 0.003440674956264246
Epoch 7, Loss: 0.001483241932289879
Epoch 8, Loss: 0.003059934401106307
Epoch 9, Loss: 0.002701587474205613
Epoch 10, Loss: 0.0016885458153543355
Epoch 11, Loss: 0.00023886056835287298
Epoch 12, Loss: 0.0013916754778487315
Epoch 13, Loss: 0.0037094761916380173
Epoch 14, Loss: 0.0015640445998389729
Epoch 15, Loss: 0.000831133104195006
Epoch 16, Loss: 0.00047147688375153736
Epoch 17, Loss: 4.4469326589236134e-05
Early stopping triggered at epoch 17. Loss: 0.0000


Code for Fine-Grained Role Classifiers

In [7]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from torch.nn import BCEWithLogitsLoss
from torch.optim import AdamW
from sklearn.preprocessing import MultiLabelBinarizer
import torch
import numpy as np
from sklearn.metrics import classification_report, hamming_loss

# Define the transformer model for fine-grained classification
fine_grained_model_name = "bert-base-uncased"  # Change to any transformer model you prefer
fine_grained_model = AutoModelForSequenceClassification.from_pretrained(
    fine_grained_model_name, num_labels=len(fine_role_binarizer.classes_)
)
fine_grained_model.to(device)

# Define tokenizer
fine_grained_tokenizer = AutoTokenizer.from_pretrained(fine_grained_model_name)

# Tokenize context for fine-grained roles
def tokenize_context(data, tokenizer):
    return tokenizer(
        data["context"].tolist(),
        padding=True,
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )

# Tokenize the data
train_inputs_fine = tokenize_context(train_data, fine_grained_tokenizer)
test_inputs_fine = tokenize_context(test_data, fine_grained_tokenizer)

# Loss function for multi-label classification
loss_fn_fine = BCEWithLogitsLoss()

# Optimizer
optimizer_fine = AdamW(fine_grained_model.parameters(), lr=5e-5)

# Training loop for fine-grained classification with loss threshold
def train_fine_grained_model(model, train_inputs, train_labels, epochs=3, batch_size=16, clip_norm=1.0, loss_threshold=0.01):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for i in range(0, len(train_inputs["input_ids"]), batch_size):
            input_ids = train_inputs["input_ids"][i:i+batch_size].to(device)
            attention_mask = train_inputs["attention_mask"][i:i+batch_size].to(device)
            labels = torch.tensor(train_labels[i:i+batch_size], dtype=torch.float).to(device)

            # Forward pass
            optimizer_fine.zero_grad()
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            loss = loss_fn_fine(logits, labels)
            total_loss += loss.item()

            # Backward pass
            loss.backward()

            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=clip_norm)

            # Optimizer step
            optimizer_fine.step()

        avg_loss = total_loss / len(train_inputs["input_ids"])
        print(f"Epoch {epoch + 1}, Loss: {avg_loss:.5f}")

        # Stop training if loss falls below the threshold
        if avg_loss < loss_threshold:
            print(f"Stopping early at Epoch {epoch + 1} as loss {avg_loss:.4f} is below the threshold {loss_threshold:.4f}")
            break

# Evaluate the fine-grained model
def evaluate_fine_grained_model(model, test_inputs, test_labels, binarizer):
    model.eval()
    predictions = []
    with torch.no_grad():
        for i in range(0, len(test_inputs["input_ids"]), 16):  # Batch size of 32
            input_ids = test_inputs["input_ids"][i:i+16].to(device)
            attention_mask = test_inputs["attention_mask"][i:i+16].to(device)

            # Forward pass
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            probs = torch.sigmoid(logits).cpu().numpy()  # Sigmoid for multi-label classification
            predictions.extend(probs)

    predictions = np.array(predictions)
    predictions_binary = (predictions > 0.5).astype(int)  # Convert probabilities to binary labels

    # Exact Match Ratio (multi-label accuracy)
    exact_match_ratio = np.mean([np.array_equal(true, pred) for true, pred in zip(test_labels, predictions_binary)])
    print(f"\nExact Match Ratio (Fine-Grained Roles): {exact_match_ratio:.4f}")

    # Hamming Loss
    hamming = hamming_loss(test_labels, predictions_binary)
    print(f"Hamming Loss: {hamming:.4f}")

    # Classification Report
    print("\nClassification Report:")
    print(classification_report(test_labels, predictions_binary, target_names=binarizer.classes_))

    # Convert predictions back to label format
    true_labels = binarizer.inverse_transform(test_labels)
    predicted_labels = binarizer.inverse_transform(predictions_binary)

    return true_labels, predicted_labels, exact_match_ratio, hamming

# Prepare data for training and evaluation
train_labels_fine = np.array(train_data["fine_roles_encoded"].tolist())
test_labels_fine = np.array(test_data["fine_roles_encoded"].tolist())

# Train the fine-grained classification model
train_fine_grained_model(
    fine_grained_model,
    train_inputs_fine,
    train_labels_fine,
    epochs=30,
    batch_size=16,
    clip_norm=3.5,
    loss_threshold=0.001  # Define a threshold for early stopping
)

# Evaluate the model
true_fine_roles, predicted_fine_roles, exact_match_ratio, hamming_loss_val = evaluate_fine_grained_model(
    fine_grained_model, test_inputs_fine, test_labels_fine, fine_role_binarizer
)

# Display true and predicted roles
# print("\nTrue Fine-Grained Roles vs Predicted Fine-Grained Roles:")
# for true, pred in zip(true_fine_roles, predicted_fine_roles):
#     print(f"True: {true}, Predicted: {pred}")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.02435
Epoch 2, Loss: 0.01350
Epoch 3, Loss: 0.01253
Epoch 4, Loss: 0.01224
Epoch 5, Loss: 0.01187
Epoch 6, Loss: 0.01132
Epoch 7, Loss: 0.01089
Epoch 8, Loss: 0.01049
Epoch 9, Loss: 0.00986
Epoch 10, Loss: 0.00888
Epoch 11, Loss: 0.00813
Epoch 12, Loss: 0.00719
Epoch 13, Loss: 0.00617
Epoch 14, Loss: 0.00522
Epoch 15, Loss: 0.00440
Epoch 16, Loss: 0.00367
Epoch 17, Loss: 0.00322
Epoch 18, Loss: 0.00281
Epoch 19, Loss: 0.00245
Epoch 20, Loss: 0.00217
Epoch 21, Loss: 0.00191
Epoch 22, Loss: 0.00178
Epoch 23, Loss: 0.00157
Epoch 24, Loss: 0.00143
Epoch 25, Loss: 0.00131
Epoch 26, Loss: 0.00122
Epoch 27, Loss: 0.00112
Epoch 28, Loss: 0.00104
Epoch 29, Loss: 0.00095
Stopping early at Epoch 29 as loss 0.0009 is below the threshold 0.0010

Exact Match Ratio (Fine-Grained Roles): 0.3116
Hamming Loss: 0.0452

Classification Report:
                   precision    recall  f1-score   support

            Bigot       0.50      0.50      0.50         2
      Conspirator       0.83 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Evaluation

In [9]:
from sklearn.metrics import classification_report, accuracy_score, hamming_loss
import torch

# Evaluate main role classifier with batch-wise evaluation
def evaluate_main_role_model_batchwise(model, test_inputs, test_labels, batch_size=16):
    model.eval()
    predictions = []
    with torch.no_grad():
        for i in range(0, len(test_inputs["input_ids"]), batch_size):
            input_ids = test_inputs["input_ids"][i:i+batch_size].to(device)
            attention_mask = test_inputs["attention_mask"][i:i+batch_size].to(device)

            # Forward pass
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            batch_predictions = torch.argmax(outputs.logits, dim=1).cpu().numpy()
            predictions.extend(batch_predictions)

    predictions = np.array(predictions)

    # Exact Match Ratio for main roles (accuracy)
    exact_match_ratio = accuracy_score(test_labels, predictions)
    print(f"Exact Match Ratio (Main Role): {exact_match_ratio:.4f}")

    # Classification Report
    print(classification_report(test_labels, predictions, target_names=main_role_encoder.classes_))

    # Display True and Predicted Main Roles
    true_main_roles = main_role_encoder.inverse_transform(test_labels)
    predicted_main_roles = main_role_encoder.inverse_transform(predictions)
    # print("\nTrue Main Roles vs Predicted Main Roles:")
    # for true, pred in zip(true_main_roles, predicted_main_roles):
    #     print(f"True: {true}, Predicted: {pred}")

    return predictions

# Evaluate Fine-Grained Classifiers with batch-wise evaluation
test_main_predictions = evaluate_main_role_model_batchwise(
    main_role_model, test_inputs, test_data["main_role_encoded"].tolist(), batch_size=16
)



# Evaluate fine-grained classifiers
# def evaluate_fine_grained_classifiers_by_model(classifiers_by_model, test_features, test_labels, test_main_predictions, taxonomy):
#     results = {}
#     for model_name, classifiers in classifiers_by_model.items():
#         print(f"\nEvaluating fine-grained classifiers using {model_name}...")
#         predictions = []

#         for i, main_role in enumerate(test_main_predictions):
#             classifier = classifiers[main_role_encoder.inverse_transform([main_role])[0]]
#             probs = classifier.predict_proba(test_features[i].reshape(1, -1))[0]
#             enforced_probs = enforce_taxonomy(main_role, probs, taxonomy, fine_role_binarizer)
#             predictions.append((enforced_probs > 0.17).astype(int))

#         predictions = np.array(predictions)

#         # Calculate metrics
#         exact_match_ratio = np.mean([np.array_equal(true, pred) for true, pred in zip(test_labels, predictions)])
#         hl = hamming_loss(test_labels, predictions)
#         report = classification_report(test_labels, predictions, target_names=fine_role_binarizer.classes_)

#         results[model_name] = {
#             "Exact Match Ratio": exact_match_ratio,
#             "Hamming Loss": hl,
#             "Classification Report": report,
#         }

#         print(f"\nExact Match Ratio (Fine-Grained Roles): {exact_match_ratio:.4f}")
#         print(f"Hamming Loss: {hl:.4f}")
#         print(report)

#     return results



# # Evaluate Main Role Classifier
# test_main_predictions = evaluate_main_role_model(
#     main_role_model, test_inputs, test_data["main_role_encoded"].tolist()
# )

# # Extract Features for Fine-Grained Classifiers
# test_features = extract_features(test_inputs, main_role_model)

# fine_grained_results = evaluate_fine_grained_classifiers_by_model(
#     fine_grained_classifiers_by_model,
#     test_features,
#     np.array(test_data["fine_roles_encoded"].tolist()),
#     test_main_predictions,
#     ENTITY_TAXONOMY
# )

# # Display the best-performing model
# best_model = max(fine_grained_results, key=lambda model: fine_grained_results[model]["Exact Match Ratio"])
# print(f"\nBest Model: {best_model}")
# print(f"Exact Match Ratio: {fine_grained_results[best_model]['Exact Match Ratio']:.4f}")
# print(f"Hamming Loss: {fine_grained_results[best_model]['Hamming Loss']:.4f}")
# print(f"Classification Report:\n{fine_grained_results[best_model]['Classification Report']}")


Exact Match Ratio (Main Role): 0.8043
              precision    recall  f1-score   support

  Antagonist       0.83      0.95      0.89        96
    Innocent       0.67      0.50      0.57        16
 Protagonist       0.71      0.46      0.56        26

    accuracy                           0.80       138
   macro avg       0.74      0.64      0.67       138
weighted avg       0.79      0.80      0.79       138

