In [1]:
import json
from paperqa import Settings, ask
import os
from paperqa import Settings, Docs
from paperqa.agents.main import agent_query
from paperqa.agents.search import get_directory_index

from aviary.env import TaskDataset
from ldp.agent import SimpleAgent
from ldp.alg.callbacks import MeanMetricsCallback
from ldp.alg.runners import Evaluator, EvaluatorConfig


In [2]:
import os
#print current working directory
print("Current working directory:", os.getcwd())
#change to the directory where the source located
os.chdir("../data")
print("Current working directory:", os.getcwd())

Current working directory: /Users/apple/Documents/GitLab_Projects/master_project/xx823/notebooks
Current working directory: /Users/apple/Documents/GitLab_Projects/master_project/xx823/data


In [3]:
# Load the questions
with open("Questions/formatted_questions/questions.json", "r") as f:
    questions = json.load(f)

# Search for this specific question
target_question = "What effect does bone marrow stromal cell-conditioned media have on the expression of the CD8a receptor in cultured OT-1 T cells?"
matching_questions = [q for q in questions if q["question"] == target_question]

if matching_questions:
    print("Found the question!")
    question_data = matching_questions[0]
else:
    print("Question not found in dataset")

Found the question!


In [4]:
question_data

{'question': 'What effect does bone marrow stromal cell-conditioned media have on the expression of the CD8a receptor in cultured OT-1 T cells?',
 'choices': ['(A) No effect',
  '(B) Insufficient information to answer the question',
  '(C) Decrease',
  '(D) Increase'],
 'correct_answer': 'A',
 'unsure_option': 'B',
 'sources': ['https://doi.org/10.1101/2024.01.31.578101'],
 'ideal': 'No effect',
 'distractors': ['Increase', 'Decrease']}

In [5]:
def format_multiple_choice_question(question, choices):
    formatted = f"Question:Q: {question}; Options: "
    formatted += " ".join(choices)
    return formatted


In [6]:
# Set up PaperQA2 with the paper directory
settings = Settings(
    temperature=0.0,  # Keep deterministic
    paper_directory="paper-2a"  # Directory containing the paper with DOI: 10.1101/2024.01.31.578101
)


In [7]:
formatted_q = format_multiple_choice_question(question_data['question'], question_data['choices'])
formatted_q

'Question:Q: What effect does bone marrow stromal cell-conditioned media have on the expression of the CD8a receptor in cultured OT-1 T cells?; Options: (A) No effect (B) Insufficient information to answer the question (C) Decrease (D) Increase'

In [8]:
import nest_asyncio
nest_asyncio.apply()

In [9]:
# Create settings with explicit configuration
settings = Settings(
    paper_directory='paper-2a',
    agent={"index": {
        "sync_with_paper_directory": True,
        "recurse_subdirectories": True
    }}
)



In [10]:

built_index = await get_directory_index(settings=settings)

# Print index information 
print(f"Using index: {settings.get_index_name()}")
index_files = await built_index.index_files
print(f"Number of indexed files: {len(index_files)}")
print("Indexed files:")
for file in index_files:
    print(f"- {file}")

Using index: pqa_index_cbff7fcfd4c40d16300be97065eb8f7c
Number of indexed files: 2
Indexed files:
- The T-cell niche tunes immune function through modulation of the cytoskeleton and TCRantigen forces.pdf
- darby-et-al-2024-differential-development-of-antibiotic-resistance-and-virulence-between-acinetobacter-species.pdf


In [11]:
import nest_asyncio
nest_asyncio.apply()

In [12]:
from paperqa.settings import Settings, AgentSettings

settings = Settings(
    llm="gpt-4o-mini",
    llm_config={
        "model_list": [
            {
                "model_name": "gpt-4o-mini",
                "litellm_params": {
                    "model": "gpt-4o-mini",
                    "temperature": 0.1,
                    "max_tokens": 4096,
                },
            }
        ],
        "rate_limit": {
            "gpt-4o-mini": "30000 per 1 minute",
        },
    },
    summary_llm="gpt-4o-mini",
    summary_llm_config={
        "rate_limit": {
            "gpt-4o-mini": "30000 per 1 minute",
        },
    },
    agent=AgentSettings(
        agent_llm="gpt-4o-mini",
        agent_llm_config={
            "rate_limit": {
                "gpt-4o-mini": "30000 per 1 minute",
            },
        }
    ),
    embedding="text-embedding-3-small",
    temperature=0.5,  # Keep deterministic
    paper_directory='paper-2a' # Directory containing the paper with DOI: 10.1101/2024.01.31.578101

)

In [13]:
answer_pqa = ask(formatted_q, settings=settings)

In [14]:
grader_input_data = []

In [15]:
print("Answer:", answer_pqa)

Answer: session=PQASession(id=UUID('c4c418b0-dfe7-4a05-8e53-6d72c998d1ff'), question='Question:Q: What effect does bone marrow stromal cell-conditioned media have on the expression of the CD8a receptor in cultured OT-1 T cells?; Options: (A) No effect (B) Insufficient information to answer the question (C) Decrease (D) Increase', answer='Bone marrow stromal cell-conditioned media (SCM) does not significantly alter the expression of the CD8α receptor in cultured OT-1 T cells. Studies indicate that when OT-1 T cells were cultured in SCM, there were no significant changes in the surface expression of the CD8α co-receptor compared to unconditioned medium (UCM) or adipocyte-conditioned medium (ACM) after 24 hours. Statistical analysis confirmed that the differences in CD8α expression levels among these conditions were not significant, with P-values indicating a lack of effect (kellner2024thetcellniche pages 3-5, 5-6).\n\nWhile SCM does not negatively impact CD8α expression, there is a noted

In [16]:
# Assuming `response` is the object you want to inspect
attributes = [attr for attr in dir(answer_pqa) if not attr.startswith('_')]
print(attributes)

['bibtex', 'construct', 'copy', 'dict', 'duration', 'from_orm', 'get_summary', 'json', 'model_computed_fields', 'model_config', 'model_construct', 'model_copy', 'model_dump', 'model_dump_json', 'model_extra', 'model_fields', 'model_fields_set', 'model_json_schema', 'model_parametrized_name', 'model_post_init', 'model_rebuild', 'model_validate', 'model_validate_json', 'model_validate_strings', 'parse_file', 'parse_obj', 'parse_raw', 'schema', 'schema_json', 'session', 'stats', 'status', 'strip_answer', 'timing_info', 'update_forward_refs', 'validate']


In [17]:
answer_dict=answer_pqa.dict()

/var/folders/wg/h6z2ybxs0bnfsz9zc363lx_w0000gn/T/ipykernel_28870/2588850827.py:1: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  answer_dict=answer_pqa.dict()


In [18]:
print(answer_dict.keys())

dict_keys(['session', 'bibtex', 'status', 'timing_info', 'duration', 'stats'])


In [19]:
print(answer_dict['session']['answer'])

Bone marrow stromal cell-conditioned media (SCM) does not significantly alter the expression of the CD8α receptor in cultured OT-1 T cells. Studies indicate that when OT-1 T cells were cultured in SCM, there were no significant changes in the surface expression of the CD8α co-receptor compared to unconditioned medium (UCM) or adipocyte-conditioned medium (ACM) after 24 hours. Statistical analysis confirmed that the differences in CD8α expression levels among these conditions were not significant, with P-values indicating a lack of effect (kellner2024thetcellniche pages 3-5, 5-6).

While SCM does not negatively impact CD8α expression, there is a noted trend towards increased surface expression of CD8α in ACM, suggesting a potential enhancing effect of ACM on CD8α levels (kellner2024thetcellniche pages 5-6). Overall, the data imply that SCM maintains CD8α expression without inducing significant alterations (kellner2024thetcellniche pages 2-3). Therefore, the correct answer to the questio

In [20]:
grader_input_data.append({
        "question": question_data["question"],
        "choices": question_data["choices"],
        "response": answer_dict['session']['answer'],
        "correct_answer": question_data["correct_answer"],
        "unsure_option": question_data["unsure_option"]
    })

In [21]:
#print current working directory
print("Current working directory:", os.getcwd())
#change to the directory where the source located
os.chdir("../Reproduction/Pipeline_reproduction/src")
print("Current working directory:", os.getcwd())

Current working directory: /Users/apple/Documents/GitLab_Projects/master_project/xx823/data
Current working directory: /Users/apple/Documents/GitLab_Projects/master_project/xx823/Reproduction/Pipeline_reproduction/src


In [22]:
from MC_Grader import MultipleChoiceGrader, process_results

In [23]:
grader = MultipleChoiceGrader(
    # Add your OpenAI API key here if not set in environment
    results_dir="grading_results"
)


In [24]:
# Use the fixed version instead of the original
metrics = process_results(grader, grader_input_data, save_prefix="paperqa2_evaluation")

2025-05-10 14:11:34,837 - MC_Grader - INFO - Processing question 1/1
2025-05-10 14:11:34,838 - MC_Grader - INFO - Processing question: What effect does bone marrow stromal cell-conditio...


[33mHumanProxy[0m (to GraderAgent):


    Question:
    What effect does bone marrow stromal cell-conditioned media have on the expression of the CD8a receptor in cultured OT-1 T cells?

    Options:
    (A) No effect (B) Insufficient information to answer the question (C) Decrease (D) Increase

    PaperQA2 Response:
    Bone marrow stromal cell-conditioned media (SCM) does not significantly alter the expression of the CD8α receptor in cultured OT-1 T cells. Studies indicate that when OT-1 T cells were cultured in SCM, there were no significant changes in the surface expression of the CD8α co-receptor compared to unconditioned medium (UCM) or adipocyte-conditioned medium (ACM) after 24 hours. Statistical analysis confirmed that the differences in CD8α expression levels among these conditions were not significant, with P-values indicating a lack of effect (kellner2024thetcellniche pages 3-5, 5-6).

While SCM does not negatively impact CD8α expression, there is a noted trend towards i

2025-05-10 14:11:36,480 - MC_Grader - INFO - Results saved to grading_results



Results saved to grading_results
- CSV: paperqa2_evaluation_extracted_answers.csv
- JSON: paperqa2_evaluation_extraction_results.json

You can now manually verify the results and calculate metrics.
