# Evaluation

In [None]:
from bib_dedupe.dedupe_benchmark import DedupeBenchmarker
from bib_dedupe.bib_dedupe import block, match, merge, prep, cluster
from asreview.data import load_data, ASReviewData
from datetime import datetime
import evaluation

In [None]:
for benchmark_path in evaluation.get_dataset_labels():
    print(f"Dataset: {benchmark_path}")
    if "special" not in benchmark_path:
        continue
    
    dedupe_benchmark = DedupeBenchmarker(benchmark_path=f"../bib_dedupe/data/{benchmark_path}")
    records_df = dedupe_benchmark.get_records_for_dedupe()
    
    # Bib-dedupe
    timestamp = datetime.now()
    records_df = prep(records_df)
    actual_blocked_df = block(records_df)
    matched_df = match(actual_blocked_df)
    duplicate_id_sets = cluster(matched_df)
    merged_df = merge(records_df, duplicate_id_sets=duplicate_id_sets)
    result = dedupe_benchmark.compare_dedupe_id(records_df=records_df, merged_df=merged_df, timestamp=timestamp)
    evaluation.append_to_output(result, package_name="bib-dedupe")

    # More detailed comparison for debugging
    dedupe_benchmark.export_cases(prepared_records_df=records_df, blocked_df=actual_blocked_df, matched_df=matched_df)
    
    # ASReview
    asdata = ASReviewData(records_df)
    timestamp = datetime.now()
    merged_df = asdata.drop_duplicates()
    result = dedupe_benchmark.compare_dedupe_id(records_df=records_df, merged_df=merged_df, timestamp=timestamp)
    evaluation.append_to_output(result, package_name="asreview")
    print()