<a href="https://colab.research.google.com/github/nicolaCirillo/ate-it/blob/main/evaluation/subtask_b_evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import codecs
import json
from collections import defaultdict

def load_data(file_path):
  """
  Loads data from a CSV or JSON file and returns a dictionary
  where keys are terms and values are cluster_ids.

  Args:
    file_path: The path to the input file (CSV or JSON).

  Returns:
    A dictionary containing the loaded data.

  Raises:
    ValueError: If the file format is not supported.
  """
  if file_path.endswith('.csv'):
    # Load data from CSV file
    df = pd.read_csv(file_path)
    data = {term: int(cluster) for term, cluster in df.itertuples(index=False)}
  elif file_path.endswith('.json'):
    # Load data from JSON file
    with codecs.open(file_path, 'r', 'utf-8') as f:
      json_data = json.load(f)
    # Extract terms from JSON data
    data = {item["term"]: item["cluster"] for item in json_data["data"]}
  else:
    # Raise error for unsupported file formats
    raise ValueError("Unsupported file format. Only CSV and JSON files are supported.")
  return data

In [2]:
import numpy as np

class BCubed_calculator:
  def __init__(self, gold, pred):
    self.gold = gold
    self.pred = pred
    self.gold_cluster = defaultdict(set)
    self.pred_cluster = defaultdict(set)
    for item, clus_id in gold.items():
        self.gold_cluster[clus_id].add(item)
    for item, clus_id in pred.items():
      self.pred_cluster[clus_id].add(item)

  def bc_precision_item(self, item):
    pred_id = self.pred[item]
    gold_id = self.gold.get(item, None)
    TP = len(self.pred_cluster[pred_id].intersection(self.gold_cluster[gold_id]))
    FP = len(self.pred_cluster[pred_id]) - TP
    return TP/(FP + TP)

  def bc_recall_item(self, item):
    pred_id = self.pred.get(item, None)
    gold_id = self.gold.get(item)
    TP = len(self.pred_cluster[pred_id].intersection(self.gold_cluster[gold_id]))
    FN = len(self.gold_cluster[gold_id]) - TP
    return TP/(TP + FN)

def bcubed_precision(gold, pred):
  calc = BCubed_calculator(gold, pred)
  return np.average([calc.bc_precision_item(item) for item in calc.pred])

def bcubed_recall(gold, pred):
  calc = BCubed_calculator(gold, pred)
  return np.average([calc.bc_recall_item(item) for item in calc.gold])

def bcubed_f1(gold, pred):
  return 2 * bcubed_precision(gold, pred) * bcubed_recall(gold, pred) / (bcubed_precision(gold, pred) + bcubed_recall(gold, pred))

In [3]:
SYSTEM_OUTPUT_PATH = "C:/Users/marti/Desktop/ATA_Didattica_Integrativa/My_Lectures/01/ate_it/baseline/baseline_b_1.json"
SYSTEM_OUTPUT_PATH = "C:/Users/marti/Desktop/ATA_Didattica_Integrativa/My_Lectures/01/ate_it/src/predictions/subtask_b_dev_vanilla_preds.csv"
SYSTEM_OUTPUT_PATH = "C:/Users/marti/Desktop/ATA_Didattica_Integrativa/My_Lectures/01/ate_it/src/predictions/subtask_b_dev_spacy_baseline_preds.csv"
SYSTEM_OUTPUT_PATH = "C:/Users/marti/Desktop/ATA_Didattica_Integrativa/My_Lectures/01/ate_it/src/predictions/subtask_b_dev_spacy_kmeans_preds.csv"
SYSTEM_OUTPUT_PATH = "C:/Users/marti/Desktop/ATA_Didattica_Integrativa/My_Lectures/01/ate_it/src/predictions/subtask_b_dev_nltk_baseline_preds.csv"
SYSTEM_OUTPUT_PATH = "C:/Users/marti/Desktop/ATA_Didattica_Integrativa/My_Lectures/01/ate_it/src/predictions/subtask_b_dev_nltk_tfidf_preds.csv"
SYSTEM_OUTPUT_PATH = "C:/Users/marti/Desktop/ATA_Didattica_Integrativa/My_Lectures/01/ate_it/src/predictions/subtask_b_dev_bert_baseline_preds.csv"
#SYSTEM_OUTPUT_PATH = "C:/Users/marti/Desktop/ATA_Didattica_Integrativa/My_Lectures/01/ate_it/src/predictions/subtask_b_dev_bert_kmeans_preds.csv"

GOLD_STANDARD_PATH = "C:/Users/marti/Desktop/ATA_Didattica_Integrativa/My_Lectures/01/ate_it/data/subtask_b_dev.json"

In [4]:
system_output = load_data(SYSTEM_OUTPUT_PATH)
gold_standard = load_data(GOLD_STANDARD_PATH)

precision = bcubed_precision(gold_standard, system_output)
recall = bcubed_recall(gold_standard, system_output)
f1 = bcubed_f1(gold_standard, system_output)

print(f"BCubed Precision: {precision:.3f}")
print(f"BCubed Recall: {recall:.3f}")
print(f"BCubed F1 Score: {f1:.3f}")

BCubed Precision: 0.743
BCubed Recall: 0.862
BCubed F1 Score: 0.798


In [11]:
import os
import glob

# Path to predictions directory and baseline
predictions_dir = "../src/predictions/"
baseline_file = "../baseline/baseline_b_1.json"
gold_standard_file = "../data/subtask_b_dev.json"

# Get all CSV files in the predictions directory
prediction_files = glob.glob(os.path.join(predictions_dir, "*.csv"))

# Add baseline file to the list
prediction_files.append(baseline_file)

# Load gold standard once
gold_standard = load_data(gold_standard_file)

# Store results
results = []

# Evaluate each prediction file
for pred_file in prediction_files:
    if pred_file.find("subtask_b_dev_") == -1 and pred_file != baseline_file:
        continue
    file_name = os.path.basename(pred_file)
    print(f"Evaluating {file_name}...")
    
    try:
        # Load system output
        system_output = load_data(pred_file)

        # Calculate bcubed metrics
        precision, recall, f1 = bcubed_precision(gold_standard, system_output), bcubed_recall(gold_standard, system_output), bcubed_f1(gold_standard, system_output)
        
        # Store results with cleaned file name
        clean_name = file_name.replace("subtask_b_dev_", "").replace("_preds", "").replace(".json", "").replace(".csv", "")
        results.append({
            'file_name': clean_name,
            'bcubed_precision': round(precision, 3),
            'bcubed_recall': round(recall, 3),
            'bcubed_f1': round(f1, 3)
        })
    except Exception as e:
        print(f"Error evaluating {file_name}: {e}")

# Create DataFrame and sort by bcubed F1 score (descending)
results_df = pd.DataFrame(results)
results_df = results_df.sort_values('bcubed_f1', ascending=False).reset_index(drop=True)

# Display the table
print("\n" + "="*100)
print("EVALUATION RESULTS")
print("="*100)
print(results_df.to_string(index=False))

# Save to CSV
output_csv = "subtask_b_evaluation_results.csv"
results_df.to_csv(output_csv, index=False)
print(f"\nResults saved to {output_csv}")

Evaluating subtask_b_dev_bert_baseline_preds.csv...
Evaluating subtask_b_dev_bert_kmeans_preds.csv...
Evaluating subtask_b_dev_nltk_baseline_preds.csv...
Evaluating subtask_b_dev_nltk_tfidf_preds.csv...
Evaluating subtask_b_dev_spacy_baseline_preds.csv...
Evaluating subtask_b_dev_spacy_kmeans_preds.csv...
Evaluating subtask_b_dev_vanilla_preds.csv...
Evaluating baseline_b_1.json...

EVALUATION RESULTS
     file_name  bcubed_precision  bcubed_recall  bcubed_f1
    nltk_tfidf             0.800          0.941      0.865
spacy_baseline             0.755          0.873      0.810
 bert_baseline             0.743          0.862      0.798
       vanilla             0.656          0.870      0.748
 nltk_baseline             0.634          0.861      0.730
  spacy_kmeans             0.563          0.773      0.651
   bert_kmeans             0.539          0.774      0.635
  baseline_b_1             0.332          0.950      0.492

Results saved to subtask_b_evaluation_results.csv
