In [1]:
import pandas as pd
import re
from difflib import SequenceMatcher

In [2]:
bert_df = pd.read_csv("../02_extract_results_CITATIONS/targeted_abstracts.csv")
gpt_df  = pd.read_csv("targeted_abstracts_gpt.csv")

In [3]:
bert_df = bert_df.rename(columns={"Targeted_Abstract": "Abstract_bert"})
gpt_df  = gpt_df.rename(columns={"Targeted_Abstract": "Abstract_gpt"})

In [4]:
merged = pd.merge(
    bert_df[["PMID", "Abstract_bert"]],
    gpt_df[["PMID",  "Abstract_gpt"]],
    on="PMID",
    how="inner"
)

In [5]:
def split_sentences(text):
    return [s.strip() for s in re.split(r'(?<=[.!?])\s+', text) if s.strip()]

def extract_bert_sentences(text):
    return [s.replace("[TAR]", "").strip()
            for s in split_sentences(text)
            if "[TAR]" in s]

def extract_gpt_sentences(text):
    return [s.replace("[GPT]", "").strip()
            for s in split_sentences(text)
            if "[GPT]" in s]

def sentence_overlap(a, b, thresh=0.7):
    return SequenceMatcher(None, a.lower(), b.lower()).ratio() >= thresh


In [6]:
stats = []
total_matched = total_bert = total_gpt = 0
zero_bert = zero_gpt = 0

for _, row in merged.iterrows():
    bert_sents = extract_bert_sentences(row["Abstract_bert"])
    gpt_sents  = extract_gpt_sentences(row["Abstract_gpt"])
    matched    = sum(
        any(sentence_overlap(g, b) for b in bert_sents)
        for g in gpt_sents
    )

    # counts
    b_cnt = len(bert_sents)
    g_cnt = len(gpt_sents)

    # track zeros
    if b_cnt == 0: zero_bert += 1
    if g_cnt == 0: zero_gpt  += 1

    # per‐paper precision/recall
    prec = matched / b_cnt if b_cnt else 0.0
    rec  = matched / g_cnt if g_cnt else 0.0
    f1   = (2 * prec * rec) / (prec + rec) if (prec + rec) else 0.0

    stats.append({
        "PMID":       row["PMID"],
        "BERT_count": b_cnt,
        "GPT_count":  g_cnt,
        "Matched":    matched,
        "Precision":  round(prec, 4),
        "Recall":     round(rec, 4),
        "F1":         round(f1, 4)
    })

    # accumulate for micro
    total_matched += matched
    total_bert   += b_cnt
    total_gpt    += g_cnt

df_stats = pd.DataFrame(stats)

In [7]:
macro_p = df_stats["Precision"].mean()
macro_r = df_stats["Recall"].mean()
macro_f1= df_stats["F1"].mean()

micro_p = total_matched / total_bert if total_bert else 0.0
micro_r = total_matched / total_gpt  if total_gpt  else 0.0
micro_f1= (2 * micro_p * micro_r) / (micro_p + micro_r) if (micro_p + micro_r) else 0.0

In [8]:
print(f"Papers processed: {len(df_stats)}")
print(f"Papers w/ zero BERT tags: {zero_bert}")
print(f"Papers w/ zero GPT tags : {zero_gpt}\n")

print("=== Macro-averaged scores ===")
print(f"Precision: {macro_p:.4f}")
print(f"Recall   : {macro_r:.4f}")
print(f"F1 Score : {macro_f1:.4f}\n")

print("=== Micro-averaged scores ===")
print(f"Precision: {micro_p:.4f}")
print(f"Recall   : {micro_r:.4f}")
print(f"F1 Score : {micro_f1:.4f}")

df_stats.to_csv("bert_vs_gpt_stats.csv", index=False)
print("\nSaved bert_vs_gpt_stats.csv")

Papers processed: 693
Papers w/ zero BERT tags: 72
Papers w/ zero GPT tags : 11

=== Macro-averaged scores ===
Precision: 0.5778
Recall   : 0.7002
F1 Score : 0.5981

=== Micro-averaged scores ===
Precision: 0.6373
Recall   : 0.7425
F1 Score : 0.6859

Saved bert_vs_gpt_stats.csv
