In [1]:
import json
from paperqa import Settings, ask
import os
from paperqa import Settings, Docs
from paperqa.agents.main import agent_query
from paperqa.agents.search import get_directory_index

from aviary.env import TaskDataset
from ldp.agent import SimpleAgent
from ldp.alg.callbacks import MeanMetricsCallback
from ldp.alg.runners import Evaluator, EvaluatorConfig


In [2]:
# Load the questions
with open("/Users/apple/Documents/GitLab_Projects/master_project/xx823/Reproduction/formatted_questions/questions.json", "r") as f:
    questions = json.load(f)

# Search for this specific question
target_question = "What effect does bone marrow stromal cell-conditioned media have on the expression of the CD8a receptor in cultured OT-1 T cells?"
matching_questions = [q for q in questions if q["question"] == target_question]

if matching_questions:
    print("Found the question!")
    question_data = matching_questions[0]
else:
    print("Question not found in dataset")

Found the question!


In [3]:
question_data

{'question': 'What effect does bone marrow stromal cell-conditioned media have on the expression of the CD8a receptor in cultured OT-1 T cells?',
 'choices': ['(A) No effect',
  '(B) Insufficient information to answer the question',
  '(C) Decrease',
  '(D) Increase'],
 'correct_answer': 'A',
 'unsure_option': 'B',
 'sources': ['https://doi.org/10.1101/2024.01.31.578101'],
 'ideal': 'No effect',
 'distractors': ['Increase', 'Decrease']}

In [4]:
def format_multiple_choice_question(question, choices):
    formatted = f"Question:Q: {question}; Options: "
    formatted += " ".join(choices)
    return formatted


In [5]:
# Set up PaperQA2 with the paper directory
settings = Settings(
    temperature=0.0,  # Keep deterministic
    paper_directory="/Users/apple/Documents/GitLab_Projects/master_project/xx823/paper-2a"  # Directory containing the paper with DOI: 10.1101/2024.01.31.578101
)


In [6]:
formatted_q = format_multiple_choice_question(question_data['question'], question_data['choices'])
formatted_q

'Question:Q: What effect does bone marrow stromal cell-conditioned media have on the expression of the CD8a receptor in cultured OT-1 T cells?; Options: (A) No effect (B) Insufficient information to answer the question (C) Decrease (D) Increase'

In [7]:
import nest_asyncio
nest_asyncio.apply()

In [8]:
# Create settings with explicit configuration
settings = Settings(
    paper_directory='/Users/apple/Documents/GitLab_Projects/master_project/xx823/paper-2a',
    agent={"index": {
        "sync_with_paper_directory": True,
        "recurse_subdirectories": True
    }}
)



In [9]:
paper_dir = "/Users/apple/Documents/GitLab_Projects/master_project/xx823/paper-2a"
if not os.path.exists(paper_dir):
    print(f"Directory {paper_dir} does not exist!")
    # Create it
    os.makedirs(paper_dir, exist_ok=True)
else:
    print(f"Directory {paper_dir} exists, checking contents:")
    files = os.listdir(paper_dir)
    print(f"Found {len(files)} files: {files}")

Directory /Users/apple/Documents/GitLab_Projects/master_project/xx823/paper-2a exists, checking contents:
Found 3 files: ['.DS_Store', 'darby-et-al-2024-differential-development-of-antibiotic-resistance-and-virulence-between-acinetobacter-species.pdf', 'The T-cell niche tunes immune function through modulation of the cytoskeleton and TCRantigen forces.pdf']


In [10]:

built_index = await get_directory_index(settings=settings)

# Print index information 
print(f"Using index: {settings.get_index_name()}")
index_files = await built_index.index_files
print(f"Number of indexed files: {len(index_files)}")
print("Indexed files:")
for file in index_files:
    print(f"- {file}")

Using index: pqa_index_f149e57fac17a94e4c4330421b6db4f7
Number of indexed files: 2
Indexed files:
- darby-et-al-2024-differential-development-of-antibiotic-resistance-and-virulence-between-acinetobacter-species.pdf
- The T-cell niche tunes immune function through modulation of the cytoskeleton and TCRantigen forces.pdf


In [11]:
import nest_asyncio
nest_asyncio.apply()

In [12]:
# Set up PaperQA2 with the paper directory
settings = Settings(
    temperature=0.5,  # Keep deterministic
    paper_directory="/Users/apple/Documents/GitLab_Projects/master_project/xx823/paper-2a"  # Directory containing the paper with DOI: 10.1101/2024.01.31.578101
)

In [13]:
answer_pqa = ask(formatted_q, settings=settings)

In [14]:
grader_input_data = []

In [15]:
print("Answer:", answer_pqa)

Answer: session=PQASession(id=UUID('34ba3e0c-a90c-4b78-b2e8-be6f2115b710'), question='Question:Q: What effect does bone marrow stromal cell-conditioned media have on the expression of the CD8a receptor in cultured OT-1 T cells?; Options: (A) No effect (B) Insufficient information to answer the question (C) Decrease (D) Increase', answer='Bone marrow stromal cell-conditioned media (SCM) does not significantly affect the expression of the CD8α receptor in cultured OT-1 T cells. Studies indicate that OT-1 T cells treated with SCM for 24 hours showed no significant difference in the geometric mean fluorescence intensity (MFI) of CD8α expression compared to those treated with unconditioned medium (UCM) (kellner2024thetcellniche pages 3-5). Similarly, another analysis found no substantial alterations in surface CD8α expression for T cells cultured in SCM, though adipocyte-conditioned media (ACM) demonstrated a trend toward increased expression under the same conditions (kellner2024thetcellni

In [16]:
# Assuming `response` is the object you want to inspect
attributes = [attr for attr in dir(answer_pqa) if not attr.startswith('_')]
print(attributes)

['bibtex', 'construct', 'copy', 'dict', 'duration', 'from_orm', 'get_summary', 'json', 'model_computed_fields', 'model_config', 'model_construct', 'model_copy', 'model_dump', 'model_dump_json', 'model_extra', 'model_fields', 'model_fields_set', 'model_json_schema', 'model_parametrized_name', 'model_post_init', 'model_rebuild', 'model_validate', 'model_validate_json', 'model_validate_strings', 'parse_file', 'parse_obj', 'parse_raw', 'schema', 'schema_json', 'session', 'stats', 'status', 'strip_answer', 'timing_info', 'update_forward_refs', 'validate']


In [17]:
answer_dict=answer_pqa.dict()

/var/folders/wg/h6z2ybxs0bnfsz9zc363lx_w0000gn/T/ipykernel_35729/2588850827.py:1: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  answer_dict=answer_pqa.dict()


In [18]:
print(answer_dict.keys())

dict_keys(['session', 'bibtex', 'status', 'timing_info', 'duration', 'stats'])


In [19]:
print(answer_dict['session']['answer'])

Bone marrow stromal cell-conditioned media (SCM) does not significantly affect the expression of the CD8α receptor in cultured OT-1 T cells. Studies indicate that OT-1 T cells treated with SCM for 24 hours showed no significant difference in the geometric mean fluorescence intensity (MFI) of CD8α expression compared to those treated with unconditioned medium (UCM) (kellner2024thetcellniche pages 3-5). Similarly, another analysis found no substantial alterations in surface CD8α expression for T cells cultured in SCM, though adipocyte-conditioned media (ACM) demonstrated a trend toward increased expression under the same conditions (kellner2024thetcellniche pages 5-6).

While other sections of the study discuss the effects of SCM on cytoskeletal dynamics and T-cell receptor (TCR) mechanics, these findings do not directly address CD8α receptor expression (kellner2024thetcellniche pages 10-11; kellner2024thetcellniche pages 11-14). Additionally, experimental methods described in the study 

In [20]:
grader_input_data.append({
        "question": question_data["question"],
        "choices": question_data["choices"],
        "response": answer_dict['session']['answer'],
        "correct_answer": question_data["correct_answer"],
        "unsure_option": question_data["unsure_option"]
    })

In [3]:
import sys

In [4]:
sys.path.append("src")

In [23]:
from MC_Grader import MultipleChoiceGrader, process_results

In [24]:
from config import OPENAI_API_KEY
from openai import OpenAI

In [25]:
grader = MultipleChoiceGrader(
    # Add your OpenAI API key here if not set in environment
    openai_api_key=OPENAI_API_KEY,
    results_dir="grading_results"
)


In [26]:
# Use the fixed version instead of the original
metrics = process_results(grader, grader_input_data, save_prefix="paperqa2_evaluation")

2025-02-26 16:39:37,078 - MC_Grader - INFO - Processing question 1/1
2025-02-26 16:39:37,079 - MC_Grader - INFO - Grading question: What effect does bone marrow stromal cell-conditio...


[33mHumanProxy[0m (to GraderAgent):


Question:
What effect does bone marrow stromal cell-conditioned media have on the expression of the CD8a receptor in cultured OT-1 T cells?

Options:
(A) No effect (B) Insufficient information to answer the question (C) Decrease (D) Increase

PaperQA2 Response:
Bone marrow stromal cell-conditioned media (SCM) does not significantly affect the expression of the CD8α receptor in cultured OT-1 T cells. Studies indicate that OT-1 T cells treated with SCM for 24 hours showed no significant difference in the geometric mean fluorescence intensity (MFI) of CD8α expression compared to those treated with unconditioned medium (UCM) (kellner2024thetcellniche pages 3-5). Similarly, another analysis found no substantial alterations in surface CD8α expression for T cells cultured in SCM, though adipocyte-conditioned media (ACM) demonstrated a trend toward increased expression under the same conditions (kellner2024thetcellniche pages 5-6).

While other sections 