In [1]:
import json

filename = "random0.3_chat.json"

# Load the JSON file containing chat abstracts
with open(filename, "r", encoding="utf-8") as f:
    chat_dict = json.load(f)

# Prepare a list of chat abstracts with "gen" label
chat_abstracts = []
for edge_key, entry in chat_dict.items():
    out_text = entry.get("out", "").strip()
    if out_text:
        chat_abstracts.append(("gen", out_text))

In [2]:
from Bio import Entrez
import time

# Set your email (required by NCBI)
Entrez.email = "zilinfg@gmail.com"  # Replace with your email

def get_pmids(query, count=100):
    """Fetch up to `count` PubMed IDs for a given search query."""
    handle = Entrez.esearch(db="pubmed", term=query, retmax=count)
    record = Entrez.read(handle)
    handle.close()
    return record["IdList"]

def fetch_abstracts(pmids):
    """Given a list of PMIDs, returns a dict { pmid: abstract_text }."""
    abstracts = {}
    batch_size = 100

    for i in range(0, len(pmids), batch_size):
        chunk = pmids[i: i + batch_size]
        ids_str = ",".join(chunk)

        handle = Entrez.efetch(db="pubmed", id=ids_str, retmode="xml")
        records = Entrez.read(handle)
        handle.close()

        for article in records.get("PubmedArticle", []):
            pmid_elem = article["MedlineCitation"]["PMID"]
            pmid = str(pmid_elem)

            abstract_text = ""
            art = article["MedlineCitation"]["Article"]
            if art.get("Abstract") and art["Abstract"].get("AbstractText"):
                parts = art["Abstract"]["AbstractText"]
                if isinstance(parts, list):
                    collected = []
                    for chunk in parts:
                        if isinstance(chunk, str):
                            collected.append(chunk)
                        elif isinstance(chunk, dict):
                            collected.append(chunk.get("#text", ""))
                    abstract_text = " ".join(collected)
                elif isinstance(parts, str):
                    abstract_text = parts

            abstracts[pmid] = abstract_text
        time.sleep(0.4)

    # Ensure all requested PMIDs have an entry (even if no abstract was found)
    for pmid in pmids:
        abstracts.setdefault(pmid, "")

    return abstracts

# --- USAGE ---
query = "cancer"  # Change this to any search topic of interest
pmids = get_pmids(query, count=100)
pubmed_abstracts = fetch_abstracts(pmids)

# Prepare a list of PubMed abstracts with "real" label
pubmed_abstracts = [("real", abstract) for pmid, abstract in pubmed_abstracts.items()]


In [12]:
import random

# Combine both sets of abstracts
combined_abstracts = chat_abstracts + pubmed_abstracts

# Shuffle the combined abstracts
random.shuffle(combined_abstracts)

print(len(chat_abstracts))

124


In [11]:
import random
import google.generativeai as genai

def ask_gemini_which_is_more_factual(abstract_text_1, abstract_text_2, gemini_model):
    """
    Sends two abstracts to the Gemini API and returns the number (1 or 2)
    of the abstract deemed more factual.

    Args:
        abstract_text_1 (str): The text of the first abstract.
        abstract_text_2 (str): The text of the second abstract.
        gemini_model: Your initialized Gemini model instance.

    Returns:
        int: 1 if Abstract 1 is chosen, 2 if Abstract 2 is chosen.
             Returns None or raises an error on failure/ambiguous response.
    """
    prompt = f"""You will be presented with two abstracts, labeled 'Abstract 1' and 'Abstract 2'. Your task is to determine which of these two abstracts is more likely to be a factual, scientifically accurate abstract originating from a genuine biomedical research publication.

Please respond with only 'Abstract 1' or 'Abstract 2' to indicate your choice. Do not provide any explanation or additional text.

Abstract 1:
{abstract_text_1}

Abstract 2:
{abstract_text_2}
"""
    # --- Make the actual API call ---
    response = gemini_model.generate_content(prompt)

    if "Abstract 1" in response.text and "Abstract 2" not in response.text:
        return 1
    elif "Abstract 2" in response.text and "Abstract 1" not in response.text:
        return 2
    else:
        print(f"Warning: Ambiguous Gemini response: {response.text}")
        return None # Or handle as an error, or random choice for simulation

    # --- SIMULATION / MOCK RESPONSE (Remove for real use) ---
    # print(f"Simulating Gemini call for:\nAbstract 1: {abstract_text_1[:60]}...\nAbstract 2: {abstract_text_2[:60]}...")
    # chosen = random.choice([1, 2])
    # print(f"Gemini (mock) chose: Abstract {chosen}")
    # return chosen
    # --- END SIMULATION ---


# --- Tournament Setup ---
num_abstracts = len(combined_abstracts)
indices = list(range(num_abstracts))
wins = {i: 0 for i in indices}
comparisons_made = {i: 0 for i in indices} 

K_comparisons_per_abstract = 5 

# Initialize your Gemini Model here (outside the loop)
genai.configure(api_key="AIzaSyBikvlkj7sCaiyGV9qhfSXZt54uQM9Gsec")
gemini_llm_model = genai.GenerativeModel('gemini-1.5-flash')


print(f"Starting tournament. Total abstracts: {num_abstracts}. Rounds (K): {K_comparisons_per_abstract}")

for round_num in range(K_comparisons_per_abstract):
    print(f"\n--- Tournament Round {round_num + 1} of {K_comparisons_per_abstract} ---")
    shuffled_indices = list(indices) # Work with a copy
    random.shuffle(shuffled_indices)

    for i in range(0, num_abstracts, 2):
        if i + 1 < num_abstracts:
            idx1 = shuffled_indices[i]
            idx2 = shuffled_indices[i+1]

            original_label1, text1 = combined_abstracts[idx1]
            original_label2, text2 = combined_abstracts[idx2]

            print(f"Comparing pair: Index {idx1} (Label: {original_label1}) vs Index {idx2} (Label: {original_label2})")

            # --- Actual Gemini Call ---
            # winner_number = ask_gemini_which_is_more_factual(text1, text2, gemini_llm_model)
            # For testing without real API calls, use a mock:
            winner_number = ask_gemini_which_is_more_factual(text1, text2, gemini_llm_model)
           

            if winner_number == 1:
                wins[idx1] += 1
            elif winner_number == 2:
                wins[idx2] += 1
            else:
                # Handle cases where Gemini couldn't decide or there was an error
                print(f"Skipping pair ({idx1}, {idx2}) due to ambiguous/failed Gemini response in round {round_num+1}.")
                continue # Or implement retry logic

            comparisons_made[idx1] += 1
            comparisons_made[idx2] += 1
            # time.sleep(1) # Add delay if needed for API rate limits

print("\n--- Tournament Finished ---")

# --- Select Top 100 (with tie-breaking) ---
sorted_indices_by_wins = sorted(indices, key=lambda i: wins[i], reverse=True)

top_100_final_indices = []
num_selected = 0
# Get unique win counts in descending order
unique_win_scores = sorted(list(set(wins.values())), reverse=True)

for score in unique_win_scores:
    if num_selected == 100:
        break
    
    # Collect all abstracts that achieved this score
    abstracts_with_this_score = [idx for idx in sorted_indices_by_wins if wins[idx] == score]
    
    if num_selected + len(abstracts_with_this_score) <= 100:
        # If we can take all abstracts with this score without exceeding 100
        top_100_final_indices.extend(abstracts_with_this_score)
        num_selected += len(abstracts_with_this_score)
    else:
        # This score group straddles the 100 mark, need to pick randomly
        needed = 100 - num_selected
        random.shuffle(abstracts_with_this_score) # Shuffle for random selection
        top_100_final_indices.extend(abstracts_with_this_score[:needed])
        num_selected += needed
        break # We have reached 100

top_100_abstracts_data = [(combined_abstracts[i][0], combined_abstracts[i][1], wins[i]) for i in top_100_final_indices]

print("\n--- Top 100 Selected Abstracts (Label, Text, Wins) ---")
for i, (label, text, win_count) in enumerate(top_100_abstracts_data):
    print(f"{i+1}. Index: {top_100_final_indices[i]}, Original Label: {label}, Wins: {win_count}, Text: {text[:100].strip()}...")

# Optional: Analyze the distribution of original labels in the top 100
real_in_top_100 = sum(1 for label, _, _ in top_100_abstracts_data if label == "real")
gen_in_top_100 = sum(1 for label, _, _ in top_100_abstracts_data if label == "gen")
print(f"\nComposition of Top 100: Real abstracts: {real_in_top_100}, Generated abstracts: {gen_in_top_100}")
print(f"\nWin counts distribution for all abstracts: {sorted(wins.items(), key=lambda item: item[1], reverse=True)}")
# print(f"Comparisons per abstract: {comparisons_made}")

Starting tournament. Total abstracts: 224. Rounds (K): 5

--- Tournament Round 1 of 5 ---
Comparing pair: Index 102 (Label: real) vs Index 198 (Label: gen)
Comparing pair: Index 161 (Label: real) vs Index 120 (Label: gen)
Comparing pair: Index 138 (Label: gen) vs Index 87 (Label: gen)
Comparing pair: Index 51 (Label: gen) vs Index 103 (Label: gen)
Comparing pair: Index 94 (Label: real) vs Index 17 (Label: real)
Comparing pair: Index 104 (Label: real) vs Index 38 (Label: gen)
Comparing pair: Index 176 (Label: real) vs Index 183 (Label: real)
Comparing pair: Index 16 (Label: real) vs Index 125 (Label: real)

Skipping pair (16, 125) due to ambiguous/failed Gemini response in round 1.
Comparing pair: Index 56 (Label: gen) vs Index 63 (Label: real)
Comparing pair: Index 134 (Label: real) vs Index 90 (Label: real)
Comparing pair: Index 131 (Label: real) vs Index 127 (Label: gen)
Comparing pair: Index 32 (Label: gen) vs Index 121 (Label: real)
Comparing pair: Index 75 (Label: gen) vs Index 9 