In [1]:
!pip install anthropic
!pip install openai
!pip install deepseek




In [2]:
!pip install openpyxl



In [None]:
from pathlib import Path
import pandas as pd
from openai import OpenAI
import re
from anthropic import Anthropic






In [16]:
pricing = {
    # GPT Models
    "gpt-5": {"input": 1.25, "output": 10.00, "cached": 0.125},
    "GPT-5 mini": {"input": 0.25, "output": 2.00, "cached": 0.025},
    "GPT-5 nano": {"input": 0.05, "output": 0.40, "cached": 0.005},
    "GPT-5 Chat Latest": {"input": 1.25, "output": 10.00, "cached": 0.125},
    "GPT-5 Codex": {"input": 1.25, "output": 10.00, "cached": 0.125},
    "GPT-4.1": {"input": 2.00, "output": 8.00, "cached": 0.50},
    "GPT-4.1 mini": {"input": 0.40, "output": 1.60, "cached": 0.10},
    "GPT-4.1 nano": {"input": 0.10, "output": 0.40, "cached": 0.025},
    "GPT-4o": {"input": 2.50, "output": 10.00, "cached": 1.25},

    # DeepSeek Models
    "deepseek-chat": {"input": 0.56, "output": 1.68, "cached": 0.07},
    "DeepSeek Reasoner V3.1 (Thinking)": {"input": 0.56, "output": 1.68, "cached": 0.07},

    # Claude Models
    "Claude Opus 4.1": {"input": 15, "output": 75, "cached_5m": 18.75, "cached_1h": 30, "cache_hit": 1.50},
    "Claude Opus 4": {"input": 15, "output": 75, "cached_5m": 18.75, "cached_1h": 30, "cache_hit": 1.50},
    "Claude Sonnet 4": {"input": 3, "output": 15, "cached_5m": 3.75, "cached_1h": 6, "cache_hit": 0.30},
    "Claude Sonnet 3.7": {"input": 3, "output": 15, "cached_5m": 3.75, "cached_1h": 6, "cache_hit": 0.30},
    "Claude Haiku 3.5": {"input": 0.80, "output": 4, "cached_5m": 1, "cached_1h": 1.6, "cache_hit": 0.08},
    "Claude Haiku 3": {"input": 0.25, "output": 1.25, "cached_5m": 0.30, "cached_1h": 0.50, "cache_hit": 0.03}
}

In [12]:



def complete_with_continuation(
    prompt,
    model="GPT-5 nano",
    provider="openai",
    client=None,
    save_path="usage.xlsx",
    max_tokens=4096,
    claude_cache="cached_5m",  # for Claude models, choose: "cached_5m", "cached_1h", "cache_hit"
):
    """
    Generate a completion using OpenAI, DeepSeek, or Claude (Anthropic),
    handling continuation if the output is cut off. Tracks cost via pricing dict.
    """
    messages = [{"role": "user", "content": prompt}]
    full_output = ""
    total_input_tokens = 0
    total_cached_tokens = 0
    total_output_tokens = 0

    save_path = Path(save_path)

 
    while True:
        if provider in  ["openai", "deepseek"]:
            response = client.chat.completions.create(
                model=model,
                messages=messages,
                # max_tokens=max_tokens,
            )
            choice = response.choices[0]
            content = choice.message.content
            finish_reason = choice.finish_reason

            usage = response.usage
            input_tokens = usage.prompt_tokens
            cached_input_tokens = getattr(usage, "cached_prompt_tokens", 0)
            output_tokens = usage.completion_tokens

        elif provider == "claude":
            response = client.messages.create(
                model=model,
                max_tokens=max_tokens,
                messages=messages,
            )
            content = response.content[0].text
            finish_reason = getattr(response, "stop_reason", None)

            usage = response.usage
            input_tokens = usage.input_tokens
            cached_input_tokens = 0
            output_tokens = usage.output_tokens

        # Collect results
        full_output += content
        total_input_tokens += input_tokens
        total_cached_tokens += cached_input_tokens
        total_output_tokens += output_tokens

        if finish_reason == "length":
            messages.append({"role": "assistant", "content": content})
            messages.append({"role": "user", "content": "Continue where you left off."})
        else:
            break

    # --- Cost estimation ---
    if model not in pricing:
        raise ValueError(f"Model '{model}' not found in pricing dictionary.")

    rates = pricing[model]

    if provider == "claude":
        input_cost_rate = rates["input"] / 1_000_000
        output_cost_rate = rates["output"] / 1_000_000
        cached_cost_rate = rates.get(claude_cache, 0) / 1_000_000
    else:
        input_cost_rate = rates["input"] / 1_000_000
        output_cost_rate = rates["output"] / 1_000_000
        cached_cost_rate = rates.get("cached", 0) / 1_000_000

    total_cost = (
        total_input_tokens * input_cost_rate
        + total_cached_tokens * cached_cost_rate
        + total_output_tokens * output_cost_rate
    )

    usage_data = {
        "Metric": [
            "Input tokens",
            "Cached input tokens",
            "Output tokens",
            "Grand total tokens",
            "Estimated total cost",
        ],
        "Value": [
            total_input_tokens,
            total_cached_tokens,
            total_output_tokens,
            total_input_tokens + total_cached_tokens + total_output_tokens,
            total_cost,
        ],
    }

    df_usage = pd.DataFrame(usage_data)
    save_path.parent.mkdir(parents=True, exist_ok=True)
    df_usage.to_excel(save_path, index=False)

    return full_output

In [18]:

    
# 60 questions
# read the Excel file, first sheet
df_60 = pd.read_excel("60questions.xlsx", sheet_name=0)

# get all id and question column questions in a list, id followed by question
# 
# make list into \n separated, str 

combined_list = df_60.apply(lambda row: f"{row['id']}: {row['question']}", axis=1).tolist()

# Join into one string with newline separators
questions_block = "\n".join(combined_list)

print(questions_block)


1101: Does the paper report previously unpublished data?
1102: Does the paper report HIV sequences?
1103: Does the paper report the results of in vitro passage experiments?
1104: Does the paper report novel in vitro antiretroviral susceptibility data?
2101: Does the paper report GenBank accession numbers for sequenced HIV isolates?
2102: Does the paper report GenBank accession numbers for sequenced HIV isolates other than those for laboratory HIV isolates?
2103: Which are the genbank accession numbers reported in the paper?
2202: Does the paper report lists of mutations for individual sequenced HIV isolates?
2301: Which HIV species were studied in the paper?
2302: What were the subtypes of the sequenced viruses reported in the paper?
2303: Which HIV genes were sequenced in the paper?
2304: Does the paper report the results of HIV pol sequences?
2401: Which geographic regions and/or countries were the sequences from in the paper?
2402: What years were the sequenced samples obtained in t

In [None]:
# gpt-4o

model = "gpt-5"
# model = "deepseek-chat"
file_path = Path("papers 250") / "60"
rows = []

with open("explain_multi_questions.txt", "r", encoding="utf-8") as f:
    template = f.read()

# client = Anthropic(api_key=claude_api_key)
client = OpenAI(api_key=openai_api_key)
# client = OpenAI(
#     api_key=deepseek_api_key,
#     base_url="https://api.deepseek.com"
# )
tmp_content = None
tmp_name = None
for folder in file_path.iterdir():
    if folder.is_dir():
        doc_folder = folder / "document"
        if doc_folder.exists():
            for md_file in doc_folder.glob("*.checked.md"):
                with md_file.open(encoding="utf-8") as f:
                    content = f.read()
                    #test 1 first
                    tmp_name = md_file.stem.replace(".checked", "")  # Remove the .checked part
                    tmp_content = content

                    output_file = Path(f"Results/{model}/{tmp_name}/60_questions_answer.txt")
                
                    # Skip if output file already exists
                    if output_file.exists():
                        print(f"Skipping {tmp_name}, output file already exists.")
                        continue

                    

                    prompt_60 = template.format(
                        paper_content=tmp_content,
                        question=questions_block.strip()
                    )

                    # --- Step 6: Save final prompt ---
                    # output_file = Path(f"Results/{model}/{tmp_name}/_60_questions_prompt.txt")
                    # with output_file.open("w", encoding="utf-8") as f:
                    #     f.write(prompt_60)
                    # print(prompt_60)

                    
                    answer = complete_with_continuation(prompt_60, model=model, provider="openai",client = client, save_path = Path(f"Results/{model}/{tmp_name}/cost.xlsx"), max_tokens=4096)
                    # answer = complete_with_continuation(prompt_60, model=model, provider="deepseek",client = client, save_path = Path(f"Results/{model}/{tmp_name}/cost.xlsx"), max_tokens=4096)

                    print(answer)
                    output_file = Path(f"Results/{model}/{tmp_name}/60_questions_answer.txt")


                    # Create the parent directories if they don't exist
                    output_file.parent.mkdir(parents=True, exist_ok=True)
                    with output_file.open("w", encoding="utf-8") as f:
                        f.write(answer)
                    

In [48]:




pattern = r"Question:\s*(.*?)\s*Evidence:\s*(.*?)\s*Rationale:\s*(.*?)\s*Answer:\s*(.*?)(?=\n---|\nQuestion:|$)"


base_dir = Path(f"Results/{model}")

# Loop over every folder in base_dir
for tmp_folder in base_dir.iterdir():
    if tmp_folder.is_dir():
        file_path = tmp_folder / "60_questions_answer.txt"
        if file_path.exists():
            # Read the file
            with open(file_path, "r") as f:
                content = f.read()
            

    # for text file in output_file = Path(f"Results/{model}/{tmp_name}/60_questions_answer.txt")

    matches = re.findall(pattern, content, re.DOTALL)

    # --- Step 3: Build dataframe ---
    records = []

    for i, (q, ev, rat, ans) in enumerate(matches):
        # strip extra whitespace
        q, ev, rat, ans = q.strip(), ev.strip(), rat.strip(), ans.strip()

        # Clean the answer
        cleaned_ans = ans.replace("\n", " ")         # remove line breaks
        cleaned_ans = re.sub(r'"+', '', cleaned_ans) # remove repeated quotes
        cleaned_ans = re.sub(r'\s+', ' ', cleaned_ans).strip()  # collapse multiple spaces
        
        # match to QID + original Question
        if i < len(combined_list):
            id_str, question_str = combined_list[i].split(": ", 1)
            records.append({
                "PMID": tmp_name,
                "QID": id_str,
                "Question": question_str,
                "Evidence": ev,
                "Rationale": rat,
                "Answer": cleaned_ans
            })

    output_df = pd.DataFrame(records)

    # --- Step 4: Save to Excel ---
    output_df.to_excel(f"{tmp_folder}/gpt_answers_60.xlsx", index=False)







## Claude results

In [None]:


response = client.messages.create(
    model="claude-3-haiku-20240307", # claude-3-haiku-20240307
    max_tokens=4096,
    messages=[
        {"role": "user", "content": prompt_60}
    ]
)

answer = response.content[0].text

# --- Step 1: Token usage ---
input_tokens = response.usage.input_tokens
output_tokens = response.usage.output_tokens
total_tokens = input_tokens + output_tokens

print(f"Input tokens: {input_tokens}")
print(f"Output tokens: {output_tokens}")
print(f"Total tokens: {total_tokens}")

# --- Step 2: Pricing (Claude Opus 4.1 as of 2025-09) ---
# Input: $15.00 per 1M tokens = $0.000015 per token
# Output: $75.00 per 1M tokens = $0.000075 per token
input_cost = input_tokens * 0.000015
output_cost = output_tokens * 0.000075
total_cost = input_cost + output_cost

print(f"Estimated cost: ${total_cost:.4f}")


Input tokens: 5262
Output tokens: 3866
Total tokens: 9128
Estimated cost: $0.3689


Here are the answers to the questions:

Question: 1101

Evidence: The paper states that "The epidemic characteristics of human immunodeficiency virus type 1 (HIV-1) in Zhejiang Province have not been systematically identified." This suggests that the data reported in the paper are previously unpublished.

Rationale: The paper presents new data on the epidemic characteristics and molecular epidemiology of HIV-1 in Zhejiang Province, China, which has not been systematically studied before.

Answer: Yes

Question: 1102

Evidence: The paper states that "Totally, 332 *gag* and 229 *pol* gene fragments were amplified and sequenced from the 451 individual samples, respectively."

Rationale: The paper reports the sequencing of HIV-1 gag and pol gene fragments from patient samples.

Answer: Yes

Question: 1103 

Evidence: There is no mention of in vitro passage experiments in the paper.

Rationale: The paper does not report any results from in vitro passage experiments.

Answer: No

Question: 1