In [1]:
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer

class RelationExtractionModel(nn.Module):
    def __init__(self, bert_model_name='microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract', embedding_dim=768):
        super(RelationExtractionModel, self).__init__()
        # Initialize the pre-trained PubMedBERT model to obtain contextual embeddings.
        self.bert = BertModel.from_pretrained(bert_model_name)
        # Dropout layer to prevent overfitting during training.
        self.dropout = nn.Dropout(p=0.1)
        # Projecting the [CLS] token representation into the desired relation embedding space.
        self.fc = nn.Linear(embedding_dim, embedding_dim)
        
    def forward(self, input_ids, attention_mask):
        # Pass the input tokens through the PubMedBERT model to obtain hidden states.
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs.pooler_output  
        # Apply dropout for regularization to reduce overfitting.
        cls_output = self.dropout(cls_output)
        relation_embedding = self.fc(cls_output)
        return relation_embedding

if __name__ == "__main__":
    # Initialize the tokenizer associated with PubMedBERT.
    tokenizer = BertTokenizer.from_pretrained('microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract')
    # Instantiate the relation extraction model.
    model = RelationExtractionModel()

    # Define a sample sentence for relation extraction.
    sample_text = "The sodium/iodide symporter is associated with congenital hypothyroidism."
    # Tokenize the input sentence to convert it into input IDs and attention masks suitable for our model.
    inputs = tokenizer(sample_text, return_tensors="pt", padding=True, truncation=True)

    # Perform a forward pass through the model to obtain the relation embedding.
    embedding = model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])
    print("Relation Embedding:", embedding.shape)

  return torch.load(checkpoint_file, map_location="cpu")
Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Asking to truncate to max_lengt

Relation Embedding: torch.Size([1, 768])


In [2]:
import json
import numpy as np
import torch
from transformers import BertModel, BertTokenizer
from sklearn.metrics.pairwise import cosine_similarity

# Function to load a BioC JSON file
def load_bioc_file(filename):
    """
    Loads a BioC JSON file and returns its content as a Python dictionary.
    Returns:
        dict: Parsed JSON content.
    """
    with open(filename, 'r') as f:
        data = json.load(f)
    return data

# Function to extract relation texts from a BioC JSON document
# For simplicity, we construct a relation text as: "entity1 - relation_type - entity2"
def extract_relation_texts(bioc_data):
    relation_texts = []
    # Iterate over documents
    for doc in bioc_data.get("documents", []):
        
        '''Create a mapping from annotation id to its text for quick lookup.
        In BioC the relations refer to identifiers from the annotations. 
        Here we use the "text" from each annotation.'''
        annotation_texts = {}
        for passage in doc.get("passages", []):
            for ann in passage.get("annotations", []):
                # Using annotation_texts id as key.
                annotation_texts[ann["id"]] = ann["text"]
        
        # For each relation in the document, constructing a simple string.
        for rel in doc.get("relations", []):
            infons = rel.get("infons", {})
            # Retrieving texts of the related entities using their annotation_texts IDs.
            entity1 = infons.get("entity1", "Entity1")
            entity2 = infons.get("entity2", "Entity2")
            rel_type = infons.get("type", "Relation")
            # Construct a simple relation text.
            rel_text = f"{entity1} - {rel_type} - {entity2}"
            relation_texts.append(rel_text)
    return relation_texts

# Function to compute PubMedBERT embedding for a given text.
def get_relation_embedding(text, tokenizer, model):
    """
    Computes the PubMedBERT embedding for a given text.
    Returns:
        np.ndarray: Embedding vector.
    """
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = model(**inputs)
    embedding = outputs.pooler_output  # shape = [1, embedding_dim]
    return embedding.squeeze(0).cpu().numpy()

# Function to compute embeddings and cosine similarity matrix for a list of relation texts.
def compute_similarity_matrix(relation_texts, tokenizer, model):
    """
    Computes embeddings and cosine similarity matrix for a list of relation texts. 
    Returns:
        tuple: Cosine similarity matrix and embeddings array.
    """
    embeddings = []
    for text in relation_texts:
        emb = get_relation_embedding(text, tokenizer, model)
        embeddings.append(emb)
    embeddings = np.vstack(embeddings)  # shape: [num_relations, embedding_dim]
    sim_matrix = cosine_similarity(embeddings)
    return sim_matrix, embeddings

# Main execution
if __name__ == "__main__":
    # Load the BioC JSON file.
    bioc_data = load_bioc_file("Train.BioC.JSON")
    
    # Extracting the relation texts from the dataset.
    relation_texts = extract_relation_texts(bioc_data)
    print("Extracted Relation Texts:")
    for rt in relation_texts:
        print(rt)
    
    # Load the PubMedBERT model and tokenizer.
    model_name = "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract"
    tokenizer = BertTokenizer.from_pretrained(model_name)
    model = BertModel.from_pretrained(model_name)
    
    # Compute the cosine similarity matrix among the relation embeddings.
    sim_matrix, embeddings = compute_similarity_matrix(relation_texts, tokenizer, model)
    
    # Print the cosine similarity matrix
    print("\nCosine Similarity Matrix:")
    for i, text_i in enumerate(relation_texts):
        for j, text_j in enumerate(relation_texts):
            print(f"Similarity between '{text_i}' and '{text_j}': {sim_matrix[i][j]:.4f}")

Extracted Relation Texts:
3175 - Association - D003924
D005947 - Positive_Correlation - 3630
D005947 - Association - D003924
50489 - Bind - D008358
D001943 - Positive_Correlation - c|INS|5382|C
D001943 - Positive_Correlation - c|DEL|4153|A
D001943 - Positive_Correlation - rs28897672
D010051 - Positive_Correlation - c|INS|5382|C
D010051 - Positive_Correlation - c|DEL|4153|A
D010051 - Positive_Correlation - rs28897672
c|INS|5382|C - Positive_Correlation - D061325
c|DEL|4153|A - Positive_Correlation - D061325
rs28897672 - Positive_Correlation - D061325
OMIM:604370 - Positive_Correlation - c|INS|5382|C
OMIM:604370 - Positive_Correlation - c|DEL|4153|A
OMIM:604370 - Positive_Correlation - rs28897672
672 - Association - D001943
672 - Association - D010051
672 - Association - OMIM:604370
672 - Association - D061325
D007980 - Positive_Correlation - D004409
D001058 - Negative_Correlation - D007980
D001058 - Negative_Correlation - D004409
D001058 - Negative_Correlation - D010300
D014594 - Associ

  return torch.load(checkpoint_file, map_location="cpu")
Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Asking to truncate to max_lengt

In [3]:
def parse_relation_text(rel_text):
    """
    Parses a relation text string into its constituent components: entity1, relation_type, and entity2.
    The expected format of the input string is: "entity1 - relation_type - entity2".
    Returns:
        tuple: A tuple containing (entity1, relation_type, entity2). If the input format is incorrect,
               it returns (None, None, None).
    
    Example:
        rel_text = "Aspirin - inhibits - COX-2"
        entity1, relation_type, entity2 = parse_relation_text(rel_text)
        # entity1 -> "Aspirin"
        # relation_type -> "inhibits"
        # entity2 -> "COX-2"
    """
    parts = rel_text.split(" - ") # Split the string by the delimiter " - ".
    if len(parts) == 3:
        return parts[0].strip(), parts[1].strip(), parts[2].strip() # Strip any leading or trailing whitespace from each component.
    # If the format is incorrect, return None for each component.
    return None, None, None

def select_triplet(anchor_idx, relation_texts, embeddings, sim_matrix, pos_threshold=0.90, hard_threshold=0.80):
    """
    For a given anchor (index), select the best positive and hard negative relations.
    A positive is defined as a relation with the same relation type as the anchor.
    A hard negative is defined as one with a different relation type but with high similarity.
    Returns:
        tuple: (anchor_text, best_positive_text, best_hard_negative_text)
    
    Example:
        relation_texts = [
            "Aspirin - inhibits - COX-2",
            "Ibuprofen - inhibits - COX-1",
            "Vitamin C - activates - Immune System",
            "Paracetamol - treats - Headache"
        ]
        # Assuming embeddings and sim_matrix are precomputed appropriately.
        anchor_idx = 0  # "Aspirin - inhibits - COX-2"
        anchor, positive, hard_negative = select_triplet(anchor_idx, relation_texts, embeddings, sim_matrix)
        # positive might be "Ibuprofen - inhibits - COX-1" (same relation type 'inhibits')
        # hard_negative might be "Paracetamol - treats - Headache" (different relation type but high similarity)
    """
    anchor_text = relation_texts[anchor_idx] # Retrieve the anchor relation text based on the provided index.
    _, anchor_rel_type, _ = parse_relation_text(anchor_text) # Parse the anchor text to extract entity1, relation_type, and entity2.
    
    best_positive = None
    best_positive_sim = -1.0 # Start with the lowest possible similarity.
    
    best_hard_negative = None
    best_hard_sim = -1.0 # Start with the lowest possible similarity.
    
    # Iterate over all candidate relations to find the best matches.
    for idx, candidate_text in enumerate(relation_texts):
        if idx == anchor_idx: # Skip the anchor itself.
            continue
        sim_score = sim_matrix[anchor_idx][idx] # Retrieve the precomputed similarity score between the anchor and the candidate.
        # Parse the candidate relation text.
        _, candidate_rel_type, _ = parse_relation_text(candidate_text)
        
        # Check for positive pair: same relation type and similarity above threshold.
        if candidate_rel_type == anchor_rel_type and sim_score > pos_threshold:
            # Update if this candidate has a higher similarity than the current best.
            if sim_score > best_positive_sim:
                best_positive = candidate_text
                best_positive_sim = sim_score
        
        # Check for hard negative: different relation type and similarity above hard_threshold.
        if candidate_rel_type != anchor_rel_type and sim_score > hard_threshold:
            # Update if this candidate has a higher similarity than the current best.
            if sim_score > best_hard_sim:
                best_hard_negative = candidate_text
                best_hard_sim = sim_score
                
    # Return the anchor text along with the best positive and hard negative matches found.
    return anchor_text, best_positive, best_hard_negative

# Example usage:
if __name__ == "__main__":
    # For demonstration, select the anchor as the first relation text (index 0).
    anchor_idx = 0
    anchor, positive, hard_negative = select_triplet(anchor_idx, relation_texts, embeddings, sim_matrix)
    
    print("Anchor Relation:", anchor)
    print("Best Positive Pair:", positive)
    print("Best Hard Negative:", hard_negative)

Anchor Relation: 3175 - Association - D003924
Best Positive Pair: 300438 - Association - D006949
Best Hard Negative: D006160 - Bind - 189429


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from transformers import BertTokenizer

# Define the Hard Negative Contrastive Loss function.
class HardNegativeContrastiveLoss(nn.Module):
    def __init__(self, margin=0.5):
        """
        Initializes the HardNegativeContrastiveLoss with a specified margin.
        """
        super(HardNegativeContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, anchor, positive, hard_negative):
        # Calculate cosine similarities between anchor-positive and anchor-hard_negative pairs.
        pos_sim = F.cosine_similarity(anchor, positive)      # Similarity should be high.
        hard_sim = F.cosine_similarity(anchor, hard_negative)  # Similarity should be low.
        
        # Loss main aim is to ensure hard negative similarity is lower than positive similarity by at least the margin.
        loss = F.relu(self.margin + hard_sim - pos_sim)
        return loss.mean()

# A helper function to run one training step.
def train_step(model, loss_fn, optimizer, tokenizer, anchor_text, positive_text, hard_negative_text):
    """
    Executes a single training step: tokenization, embedding extraction, loss computation, and optimization.
    Returns:
        float: The computed loss value for this training step.
    """
    # Tokenize and obtain embeddings for each text.
    anchor_inputs = tokenizer(anchor_text, return_tensors="pt", padding=True, truncation=True)
    positive_inputs = tokenizer(positive_text, return_tensors="pt", padding=True, truncation=True)
    hard_negative_inputs = tokenizer(hard_negative_text, return_tensors="pt", padding=True, truncation=True)
    
    # Extract embeddings using the model.
    anchor_emb = model(anchor_inputs['input_ids'], anchor_inputs['attention_mask'])
    positive_emb = model(positive_inputs['input_ids'], positive_inputs['attention_mask'])
    hard_negative_emb = model(hard_negative_inputs['input_ids'], hard_negative_inputs['attention_mask'])
    
    # Compute the loss using the hard negative contrastive loss function.
    loss = loss_fn(anchor_emb, positive_emb, hard_negative_emb)
    
    # Perform backpropagation and an optimization step.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    return loss.item()

# Example usage:
if __name__ == "__main__":
    # Load the model and tokenizer using PubMedBERT.
    model_name = "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract"
    tokenizer = BertTokenizer.from_pretrained(model_name)
    model = RelationExtractionModel(bert_model_name=model_name)
    
    # Instantiate the hard negative contrastive loss function and the optimizer.
    loss_fn = HardNegativeContrastiveLoss(margin=0.5)
    optimizer = optim.Adam(model.parameters(), lr=1e-5)
    
    # For demonstration, assume these texts were selected as anchor, positive, and hard negative samples:
    anchor_text = "D001943 - Positive_Correlation - c|INS|5382|C"
    positive_text = "D001943 - Positive_Correlation - c|DEL|4153|A"
    hard_negative_text = "672 - Association - D001943"
    
    # Run one training step and print the loss value.
    loss_value = train_step(model, loss_fn, optimizer, tokenizer, anchor_text, positive_text, hard_negative_text)
    print("Training step loss:", loss_value)

  return torch.load(checkpoint_file, map_location="cpu")
Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Asking to truncate to max_lengt

Training step loss: 0.341005802154541


In [5]:
import numpy as np
from tqdm import tqdm

def optimized_select_triplets(relation_texts, sim_matrix, pos_threshold=0.90, hard_threshold=0.80):
    """
    Optimized selection of training triplets (anchor, positive, hard negative)
    from the relation texts based on the precomputed cosine similarity matrix.
    Returns:
        list of tuples: Each tuple
    """
    triplets = [] # List to store the selected triplets.
    n = len(relation_texts) # Total number of relation texts.
    
    # Iterate over each relation text to consider it as an anchor.
    for i, anchor_text in enumerate(relation_texts):
        # Parse the anchor text to extract its components.
        _, anchor_rel, _ = parse_relation_text(anchor_text)
        pos_indices = [] # List to store indices of positive samples.
        hard_indices = [] # List to store indices of hard negative samples.
        
        # Iterate over all other relation texts to find positives and hard negatives.
        for j in range(n):
            if j == i:
                continue # Skip comparing the anchor with itself.
                
            # Parse the candidate relation text.    
            _, cand_rel, _ = parse_relation_text(relation_texts[j])
            sim_score = sim_matrix[i, j]
            
            # Identify positive samples: same relation type and similarity above threshold.
            if cand_rel == anchor_rel and sim_score >= pos_threshold:
                pos_indices.append(j)
                
            # Identify hard negative samples: different relation type and similarity above threshold.
            elif cand_rel != anchor_rel and sim_score >= hard_threshold:
                hard_indices.append(j)
                
        # If both positive and hard negative samples are found, select the one with best cosine similarity.
        if pos_indices and hard_indices:
            # Select the positive sample with the highest similarity to the anchor.
            best_pos_idx = pos_indices[np.argmax([sim_matrix[i, j] for j in pos_indices])]
            # Select the hard negative sample with the highest similarity to the anchor.
            best_hard_idx = hard_indices[np.argmax([sim_matrix[i, j] for j in hard_indices])]
            # Append the selected triplet to the list.
            triplets.append((relation_texts[i], relation_texts[best_pos_idx], relation_texts[best_hard_idx]))
    return triplets

# Generate optimized triplets using the extracted relation_texts and precomputed sim_matrix.
triplets = optimized_select_triplets(relation_texts, sim_matrix, pos_threshold=0.90, hard_threshold=0.80)
print(f"Total training triplets found: {len(triplets)}")

# Define optimizer and loss function.
optimizer = optim.Adam(model.parameters(), lr=1e-5)
loss_fn = HardNegativeContrastiveLoss(margin=0.5)

# Full training loop over multiple epochs with a progress bar.
num_epochs = 5
for epoch in range(num_epochs):
    total_loss = 0.0 # Initialize total loss for the epoch.
    # Iterate over each triplet and perform a training step.
    for anchor_text, positive_text, hard_negative_text in tqdm(triplets, desc=f"Epoch {epoch+1}"):
        loss_value = train_step(model, loss_fn, optimizer, tokenizer, anchor_text, positive_text, hard_negative_text)
        total_loss += loss_value # Accumulate the loss.
        
    avg_loss = total_loss / len(triplets)
    print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss:.4f}")

Total training triplets found: 4178


Epoch 1: 100%|████████████████████████████| 4178/4178 [5:21:03<00:00,  4.61s/it]


Epoch 1/5, Average Loss: 0.0050


Epoch 2: 100%|████████████████████████████| 4178/4178 [5:30:25<00:00,  4.75s/it]


Epoch 2/5, Average Loss: 0.0005


Epoch 3: 100%|████████████████████████████| 4178/4178 [1:20:36<00:00,  1.16s/it]


Epoch 3/5, Average Loss: 0.0000


Epoch 4: 100%|██████████████████████████████| 4178/4178 [40:22<00:00,  1.72it/s]


Epoch 4/5, Average Loss: 0.0005


Epoch 5: 100%|██████████████████████████████| 4178/4178 [44:17<00:00,  1.57it/s]

Epoch 5/5, Average Loss: 0.0000





In [6]:
# Save the trained model's state_dict to a file.
torch.save(model.state_dict(), "trained_relation_extraction_model.pth")
print("✅ Model saved successfully as 'trained_relation_extraction_model.pth'")

✅ Model saved successfully as 'trained_relation_extraction_model.pth'


In [15]:
from sklearn.metrics import silhouette_score, calinski_harabasz_score

def evaluate_clustering(all_embeddings, cluster_labels):
    """
    Evaluate clustering performance using common metrics:
    1. Silhouette Score
    2. Calinski-Harabasz Index
    
    Returns:
        dict: A dictionary containing the computed metrics.
    """
    # Silhouette Score: measures how similar each sample is to its own cluster compared to other clusters.
    # The score ranges from -1 to 1, where a higher score indicates that samples are well matched to their own cluster and poorly matched to neighboring clusters.
    sil_score = silhouette_score(all_embeddings, cluster_labels)
    
    # Calinski-Harabasz Index: also known as the Variance Ratio Criterion.
    # It is the ratio of the sum of between-clusters dispersion to within-cluster dispersion.
    # A higher Calinski-Harabasz score indicates better-defined clusters.
    ch_score = calinski_harabasz_score(all_embeddings, cluster_labels)
    
    return {
        "silhouette_score": sil_score,
        "calinski_harabasz_score": ch_score
    }

# Example usage after Step 6:
if __name__ == "__main__":
    
    # Evaluate the clustering performance using the defined function.
    metrics = evaluate_clustering(all_embeddings, cluster_labels)
    
    print("Clustering Performance Metrics:")
    print(f"Silhouette Score: {metrics['silhouette_score']:.4f}")
    print(f"Calinski-Harabasz Score: {metrics['calinski_harabasz_score']:.4f}")

Clustering Performance Metrics:
Silhouette Score: 0.9210
Calinski-Harabasz Score: 73200.9298


In [16]:
import numpy as np
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
from scipy.optimize import linear_sum_assignment

# Extract ground truth relation types from relation texts.
# For example, if a relation text is "3175 - Association - D003924",
# the parse_relation_text function extracts "Association" as the relation type.
ground_truth_labels = [parse_relation_text(text)[1] for text in relation_texts]

# Identify unique relation types and assign each a unique integer identifier.
unique_labels = list(set(ground_truth_labels))
label_to_int = {label: i for i, label in enumerate(unique_labels)}

# Convert ground truth labels to their corresponding integer identifiers.
y_true = np.array([label_to_int[label] for label in ground_truth_labels])

# Predicted cluster labels obtained from your clustering algorithm (e.g., KMeans).
y_pred = np.array(cluster_labels)

def cluster_accuracy(y_true, y_pred):
    """
    Align cluster labels (y_pred) to ground truth labels (y_true)
    using the Hungarian algorithm and compute clustering accuracy.
    """
    # Compute the contingency matrix (confusion matrix between y_true and y_pred).
    cont_matrix = confusion_matrix(y_true, y_pred)
    
    # Apply the Hungarian algorithm to find the optimal one-to-one mapping
    # between predicted clusters and true labels that maximizes correct assignments.
    row_ind, col_ind = linear_sum_assignment(-cont_matrix)
    
    # Calculate the accuracy based on the optimal assignment.
    accuracy = cont_matrix[row_ind, col_ind].sum() / np.sum(cont_matrix)
    return accuracy, row_ind, col_ind

# Align clusters and compute clustering accuracy.
acc, row_ind, col_ind = cluster_accuracy(y_true, y_pred)
print(f"Clustering Accuracy: {acc:.4f}")

# Create a mapping from original cluster label to the aligned ground truth label.
mapping = {old: new for new, old in zip(col_ind, row_ind)}
# Apply the mapping to the predicted cluster labels.
y_pred_aligned = np.array([mapping[label] for label in y_pred])

# Compute precision, recall, and F1 score using macro averaging.
# Macro averaging calculates the metric independently for each class and then takes the average,
# treating all classes equally regardless of their size.
precision = precision_score(y_true, y_pred_aligned, average='macro')
recall = recall_score(y_true, y_pred_aligned, average='macro')
f1 = f1_score(y_true, y_pred_aligned, average='macro')

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Clustering Accuracy: 0.9679
Precision: 0.3609
Recall: 0.3750
F1 Score: 0.3678


  _warn_prf(average, modifier, msg_start, len(result))
