In [15]:
import json
from paperqa import Settings, ask
import os
from paperqa import Settings, Docs
from paperqa.agents.main import agent_query
from paperqa.agents.search import get_directory_index

from aviary.env import TaskDataset
from ldp.agent import SimpleAgent
from ldp.alg.callbacks import MeanMetricsCallback
from ldp.alg.runners import Evaluator, EvaluatorConfig


In [16]:
# Load the questions
with open("/Users/apple/Documents/GitLab_Projects/master_project/xx823/Reproduction/questions/formatted_questions/questions.json", "r") as f:
    questions = json.load(f)

# Search for this specific question
target_question = "What effect does bone marrow stromal cell-conditioned media have on the expression of the CD8a receptor in cultured OT-1 T cells?"
matching_questions = [q for q in questions if q["question"] == target_question]

if matching_questions:
    print("Found the question!")
    question_data = matching_questions[0]
else:
    print("Question not found in dataset")

Found the question!


In [17]:
question_data

{'question': 'What effect does bone marrow stromal cell-conditioned media have on the expression of the CD8a receptor in cultured OT-1 T cells?',
 'choices': ['(A) No effect',
  '(B) Insufficient information to answer the question',
  '(C) Decrease',
  '(D) Increase'],
 'correct_answer': 'A',
 'unsure_option': 'B',
 'sources': ['https://doi.org/10.1101/2024.01.31.578101'],
 'ideal': 'No effect',
 'distractors': ['Increase', 'Decrease']}

In [18]:
def format_multiple_choice_question(question, choices):
    formatted = f"Question:Q: {question}; Options: "
    formatted += " ".join(choices)
    return formatted


In [19]:
# Set up PaperQA2 with the paper directory
settings = Settings(
    temperature=0.0,  # Keep deterministic
    paper_directory="/Users/apple/Documents/GitLab_Projects/master_project/xx823/paper-2a"  # Directory containing the paper with DOI: 10.1101/2024.01.31.578101
)


In [20]:
formatted_q = format_multiple_choice_question(question_data['question'], question_data['choices'])
formatted_q

'Question:Q: What effect does bone marrow stromal cell-conditioned media have on the expression of the CD8a receptor in cultured OT-1 T cells?; Options: (A) No effect (B) Insufficient information to answer the question (C) Decrease (D) Increase'

In [21]:
import nest_asyncio
nest_asyncio.apply()

In [22]:
# Create settings with explicit configuration
settings = Settings(
    paper_directory='/Users/apple/Documents/GitLab_Projects/master_project/xx823/paper-2a',
    agent={"index": {
        "sync_with_paper_directory": True,
        "recurse_subdirectories": True
    }}
)



In [23]:
paper_dir = "/Users/apple/Documents/GitLab_Projects/master_project/xx823/paper-2a"
if not os.path.exists(paper_dir):
    print(f"Directory {paper_dir} does not exist!")
    # Create it
    os.makedirs(paper_dir, exist_ok=True)
else:
    print(f"Directory {paper_dir} exists, checking contents:")
    files = os.listdir(paper_dir)
    print(f"Found {len(files)} files: {files}")

Directory /Users/apple/Documents/GitLab_Projects/master_project/xx823/paper-2a exists, checking contents:
Found 3 files: ['.DS_Store', 'darby-et-al-2024-differential-development-of-antibiotic-resistance-and-virulence-between-acinetobacter-species.pdf', 'The T-cell niche tunes immune function through modulation of the cytoskeleton and TCRantigen forces.pdf']


In [24]:

built_index = await get_directory_index(settings=settings)

# Print index information 
print(f"Using index: {settings.get_index_name()}")
index_files = await built_index.index_files
print(f"Number of indexed files: {len(index_files)}")
print("Indexed files:")
for file in index_files:
    print(f"- {file}")

Using index: pqa_index_f149e57fac17a94e4c4330421b6db4f7
Number of indexed files: 2
Indexed files:
- darby-et-al-2024-differential-development-of-antibiotic-resistance-and-virulence-between-acinetobacter-species.pdf
- The T-cell niche tunes immune function through modulation of the cytoskeleton and TCRantigen forces.pdf


In [25]:
import nest_asyncio
nest_asyncio.apply()

In [26]:
# Set up PaperQA2 with the paper directory
settings = Settings(
    temperature=0.5,  # Keep deterministic
    paper_directory="/Users/apple/Documents/GitLab_Projects/master_project/xx823/paper-2a"  # Directory containing the paper with DOI: 10.1101/2024.01.31.578101
)

In [None]:
from paperqa.settings import Settings, AgentSettings

settings = Settings(
    llm="gpt-4o-mini",
    llm_config={
        "model_list": [
            {
                "model_name": "gpt-4o-mini",
                "litellm_params": {
                    "model": "gpt-4o-mini",
                    "temperature": 0.1,
                    "max_tokens": 4096,
                },
            }
        ],
        "rate_limit": {
            "gpt-4o-mini": "30000 per 1 minute",
        },
    },
    summary_llm="gpt-4o-mini",
    summary_llm_config={
        "rate_limit": {
            "gpt-4o-mini": "30000 per 1 minute",
        },
    },
    agent=AgentSettings(
        agent_llm="gpt-4o-mini",
        agent_llm_config={
            "rate_limit": {
                "gpt-4o-mini": "30000 per 1 minute",
            },
        }
    ),
    embedding="text-embedding-3-small",
    temperature=0.5,  # Keep deterministic
    paper_directory="/Users/apple/Documents/GitLab_Projects/master_project/xx823/paper-2a"  # Directory containing the paper with DOI: 10.1101/2024.01.31.578101
)

In [27]:
answer_pqa = ask(formatted_q, settings=settings)

Encountered exception during tool call for tool gen_answer: litellm.NotFoundError: OpenAIException - Error code: 403 - {'error': {'message': 'Project `proj_8WO6cHG6Ics15ycd35qOE2RY` does not have access to model `gpt-4o-2024-11-20`', 'type': 'invalid_request_error', 'param': None, 'code': 'model_not_found'}}. Received Model Group=gpt-4o-2024-11-20
Available Model Group Fallbacks=None


In [28]:
grader_input_data = []

In [29]:
print("Answer:", answer_pqa)

Answer: session=PQASession(id=UUID('374e2c30-f4bb-4016-8c8f-a3035be22951'), question='Question:Q: What effect does bone marrow stromal cell-conditioned media have on the expression of the CD8a receptor in cultured OT-1 T cells?; Options: (A) No effect (B) Insufficient information to answer the question (C) Decrease (D) Increase', answer='', has_successful_answer=None, context='', contexts=[], references='', formatted_answer='', graded_answer=None, cost=0.0, token_counts={}, config_md5='7fd1b044336dab80ed99681d2bf8f996', tool_history=[['gen_answer']], used_contexts=set()) bibtex=None status=<AgentStatus.FAIL: 'fail'> timing_info=None duration=0.0 stats=None


In [30]:
# Assuming `response` is the object you want to inspect
attributes = [attr for attr in dir(answer_pqa) if not attr.startswith('_')]
print(attributes)

['bibtex', 'construct', 'copy', 'dict', 'duration', 'from_orm', 'get_summary', 'json', 'model_computed_fields', 'model_config', 'model_construct', 'model_copy', 'model_dump', 'model_dump_json', 'model_extra', 'model_fields', 'model_fields_set', 'model_json_schema', 'model_parametrized_name', 'model_post_init', 'model_rebuild', 'model_validate', 'model_validate_json', 'model_validate_strings', 'parse_file', 'parse_obj', 'parse_raw', 'schema', 'schema_json', 'session', 'stats', 'status', 'strip_answer', 'timing_info', 'update_forward_refs', 'validate']


In [31]:
answer_dict=answer_pqa.dict()

/var/folders/wg/h6z2ybxs0bnfsz9zc363lx_w0000gn/T/ipykernel_27485/2588850827.py:1: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  answer_dict=answer_pqa.dict()


In [32]:
print(answer_dict.keys())

dict_keys(['session', 'bibtex', 'status', 'timing_info', 'duration', 'stats'])


In [33]:
print(answer_dict['session']['answer'])




In [34]:
grader_input_data.append({
        "question": question_data["question"],
        "choices": question_data["choices"],
        "response": answer_dict['session']['answer'],
        "correct_answer": question_data["correct_answer"],
        "unsure_option": question_data["unsure_option"]
    })

In [35]:
import sys

In [36]:
sys.path.append("src")

In [37]:
from MC_Grader import MultipleChoiceGrader, process_results

In [38]:
from config import OPENAI_API_KEY
from openai import OpenAI

In [39]:
grader = MultipleChoiceGrader(
    # Add your OpenAI API key here if not set in environment
    openai_api_key=OPENAI_API_KEY,
    results_dir="grading_results"
)


In [40]:
# Use the fixed version instead of the original
metrics = process_results(grader, grader_input_data, save_prefix="paperqa2_evaluation")

2025-03-02 12:32:09,338 - MC_Grader - INFO - Processing question 1/1
2025-03-02 12:32:09,339 - MC_Grader - INFO - Processing question: What effect does bone marrow stromal cell-conditio...


[33mHumanProxy[0m (to GraderAgent):


    Question:
    What effect does bone marrow stromal cell-conditioned media have on the expression of the CD8a receptor in cultured OT-1 T cells?

    Options:
    (A) No effect (B) Insufficient information to answer the question (C) Decrease (D) Increase

    PaperQA2 Response:
    

    What option did PaperQA2 select? Remember to return ONLY the letter.
    

--------------------------------------------------------------------------------
[33mGraderAgent[0m (to HumanProxy):

B

--------------------------------------------------------------------------------


2025-03-02 12:32:11,191 - MC_Grader - INFO - Results saved to grading_results



Results saved to grading_results
- CSV: paperqa2_evaluation_extracted_answers.csv
- JSON: paperqa2_evaluation_extraction_results.json

You can now manually verify the results and calculate metrics.
