# Evaluation

In [1]:
from colrev.ops.dedupe_benchmark import DedupeBenchmarker
from bib_dedupe.bib_dedupe import BibDeduper
from bib_dedupe.util import BibDedupeUtil
from asreview.data import load_data, ASReviewData
from datetime import datetime
import pandas as pd

In [2]:
bd_util = BibDedupeUtil()

for benchmark_path in bd_util.get_dataset_labels():
    if benchmark_path in ["problem_cases"]: # , "srsr" "depression" "problem_cases"
         continue
    print(benchmark_path)
        
    dedupe_benchmark = DedupeBenchmarker(benchmark_path=f"../data/{benchmark_path}")
    records_df = dedupe_benchmark.get_records_for_dedupe()
    print(f"{records_df.shape[0]} records")
    
    # Bib-dedupe
    dedupe_instance = BibDeduper()
    timestamp = datetime.now()
    actual_blocked_df = dedupe_instance.block_pairs_for_deduplication(records_df=records_df)
    matches = dedupe_instance.identify_true_matches(actual_blocked_df)
    merged_df = dedupe_instance.get_merged_df(records_df, matches=matches)
    result = dedupe_benchmark.compare_dedupe_id(records_df=records_df, merged_df=merged_df, timestamp=timestamp)
    bd_util.append_to_output(result, package_name="bib-dedupe")
    
    # More detailed comparison for debugging
    results = dedupe_benchmark.compare(
        blocked_df=actual_blocked_df,
        predicted=matches['duplicate_origin_sets'],
        updated_paper_pairs=matches["updated_paper_pairs"],
    )
    dedupe_benchmark.export_cases(prepared_records_df=records_df, results=results)
    
    # ASReview
    asdata = ASReviewData(records_df)
    timestamp = datetime.now()
    merged_df = asdata.drop_duplicates()
    result = dedupe_benchmark.compare_dedupe_id(records_df=records_df, merged_df=merged_df, timestamp=timestamp)
    bd_util.append_to_output(result, package_name="asreview")
    

digital_work
Start prep at 2023-11-23 18:18:31.635678
Called (to understand whether this is the longest running thread in the parallel processing)
7159 records
Blocked      103 pairs with ['title', 'abstract']
Blocked    10571 pairs with ['first_author', 'short_container_title']
Blocked    28210 pairs with ['year', 'volume', 'number']
Blocked     2649 pairs with ['first_author', 'year']
Blocked      929 pairs with ['title', 'volume']
Blocked        0 pairs with ['isbn', 'volume', 'number']
Blocked      445 pairs with ['doi']
Blocked        0 pairs with ['isbn', 'volume', 'year']
Blocked        0 pairs with ['isbn', 'volume', 'pages']
Blocked   106583 pairs with ['short_container_title', 'volume', 'year']
Blocked    24185 pairs with ['short_container_title', 'volume', 'number']
Blocked     5606 pairs with ['title', 'short_container_title']
Blocked     1060 pairs with ['title', 'year']
Blocked     4286 pairs with ['title', 'first_author']
Blocked      771 pairs with ['title', 'pages']
Bl

special_case: Garcillan-Barcia M. Pilarde la Cruz Fernando
53001 records
Blocked    87900 pairs with ['first_author', 'year']
Blocked    26785 pairs with ['first_author', 'short_container_title']
Blocked    76839 pairs with ['year', 'volume', 'number']
Blocked    21393 pairs with ['doi']
Blocked    20766 pairs with ['title', 'volume']
Blocked    21112 pairs with ['title', 'short_container_title']
Blocked    11967 pairs with ['isbn', 'volume', 'number']
Blocked    81217 pairs with ['isbn', 'volume', 'year']
Blocked    18756 pairs with ['year', 'number', 'pages']
Blocked    16873 pairs with ['title', 'pages']
Blocked    30214 pairs with ['short_container_title', 'volume', 'number']
Blocked   176645 pairs with ['short_container_title', 'volume', 'year']
Blocked    17217 pairs with ['year', 'volume', 'pages']
Blocked    21934 pairs with ['title', 'year']
Blocked    21179 pairs with ['title', 'first_author']
Blocked     4634 pairs with ['title', 'abstract']
Blocked     5089 pairs with ['isb

Start matching at 2023-11-23 21:05:58.455420
TODO : continue here!
Runtime: 0:03:43
Runtime: 0:00:00
cytology_screening
Start prep at 2023-11-23 21:06:31.885495
setting missing status
Called (to understand whether this is the longest running thread in the parallel processing)
1856 records
Blocked        0 pairs with ['doi']
Blocked        0 pairs with ['title', 'abstract']
Blocked        0 pairs with ['year', 'volume', 'number']
Blocked     1268 pairs with ['first_author', 'year']
Blocked        0 pairs with ['year', 'number', 'pages']
Blocked        0 pairs with ['isbn', 'volume', 'number']
Blocked        0 pairs with ['isbn', 'volume', 'year']
Blocked        0 pairs with ['isbn', 'volume', 'pages']
Blocked        0 pairs with ['short_container_title', 'volume', 'number']
Blocked     1088 pairs with ['first_author', 'short_container_title']
Blocked      841 pairs with ['title', 'volume']
Blocked      836 pairs with ['year', 'volume', 'pages']Blocked      746 pairs with ['title', 'shor

# Plots

In [3]:
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt

In [4]:
results_df = pd.read_csv("../output/evaluation.csv")
grouped_df = results_df.groupby(["package", "dataset"], group_keys=True).apply(lambda x: x.sort_values("time").tail(1)).reset_index(drop=True)

datasets = grouped_df['dataset'].unique()

for dataset in datasets:
    plt.figure(figsize=(14, 3))
    plt.suptitle(f"Dataset: {dataset}", fontsize=14, fontweight='bold')  # Added dataset as subheading title
    ax1 = plt.subplot(121)
    grouped_df[grouped_df['dataset'] == dataset].plot(ax=ax1, x="package", y="false_positive_rate", kind="barh")
    plt.title(f"False positive rate by package")
    plt.legend().remove()
    plt.ylabel("")
    for p in ax1.patches:
        ax1.annotate(f"{p.get_width():.2f}", (p.get_width(), p.get_y() + p.get_height() / 2), ha='left', va='center')

    ax2 = plt.subplot(122)
    grouped_df[grouped_df['dataset'] == dataset].plot(ax=ax2, x="package", y="sensitivity", kind="barh")
    plt.title(f"Sensitivity by package")
    plt.legend().remove()
    plt.ylabel("")
    for p in ax2.patches:
        ax2.annotate(f"{p.get_width():.2f}", (p.get_width(), p.get_y() + p.get_height() / 2), ha='left', va='center')

    latest_time = results_df["time"].max()
    plt.figtext(0.5, 0.001, f"Time of last evaluation run: {latest_time}", ha='center', fontsize=10)

    plt.tight_layout()
    # plt.show()
    plt.savefig(str(Path(f"../output/evaluation_{dataset}.png")))
    plt.close()