# Evaluation Logic Test

This notebook tests the `HungarianEvaluator` using the processed Kaggle dataset.

In [6]:
import sys
import os
import json

# Add project root to path
sys.path.append(os.path.abspath("../.."))

from core.parsing.evaluators.evaluator import HungarianEvaluator

In [7]:
def load_json(path):
    with open(path, 'r') as f:
        return json.load(f)

data_dir = "tests_data/resume_and_texts_kaggle/some"
files = [f for f in os.listdir(data_dir) if f.endswith(".json")]
files.sort()

print(f"Found {len(files)} JSON files.")
print(files)

Found 7 JSON files.
['ADVOCATE_14445309.json', 'BUSINESS-DEVELOPMENT_65708020.json', 'DESIGNER_37058472.json', 'HEALTHCARE_23617240.json', 'HR_16852973.json', 'INFORMATION-TECHNOLOGY_36856210.json', 'TEACHER_12467531.json']


In [8]:
if len(files) >= 2:
    # Select two different files to compare
    file_gt = files[0]
    file_pred = files[1]
    
    path_gt = os.path.join(data_dir, file_gt)
    path_pred = os.path.join(data_dir, file_pred)
    
    print(f"Ground Truth: {file_gt}")
    print(f"Predicted:    {file_pred}")
    
    gt_data = load_json(path_gt)
    pred_data = load_json(path_pred)
else:
    print("Not enough files to compare.")

Ground Truth: ADVOCATE_14445309.json
Predicted:    BUSINESS-DEVELOPMENT_65708020.json


In [9]:
# Initialize Evaluator
evaluator = HungarianEvaluator()

# Config for 'work' section
config = {
    "key_fields": ["name", "position"],
    "fields": {
        "name": "substring",
        "position": "substring",
        "startDate": "date",
        "summary": "text_similarity"
    }
}

gt_work = gt_data.get("work", [])
pred_work = pred_data.get("work", [])

print(f"GT Work Items: {len(gt_work)}")
print(f"Pred Work Items: {len(pred_work)}")

GT Work Items: 2
Pred Work Items: 3


In [10]:
# Run Evaluation
metrics = evaluator.evaluate_section(gt_work, pred_work, config)

print("Evaluation Metrics:")
print(json.dumps(metrics, indent=2))

Evaluation Metrics:
{
  "precision": 0.6666666666666666,
  "recall": 1.0,
  "f1": 0.8,
  "field_scores": {
    "name": 0.5,
    "position": 0.0,
    "startDate": 0.0,
    "summary": 1.0
  }
}
