In [82]:
import json
from paperqa import Settings, ask
import os
from paperqa import Settings, Docs
from paperqa.agents.main import agent_query
from paperqa.agents.search import get_directory_index

from aviary.env import TaskDataset
from ldp.agent import SimpleAgent
from ldp.alg.callbacks import MeanMetricsCallback
from ldp.alg.runners import Evaluator, EvaluatorConfig


In [83]:
# Load the questions
with open("/Users/apple/Documents/GitLab_Projects/master_project/xx823/Reproduction/formatted_questions/questions.json", "r") as f:
    questions = json.load(f)

# Search for this specific question
target_question = "What effect does bone marrow stromal cell-conditioned media have on the expression of the CD8a receptor in cultured OT-1 T cells?"
matching_questions = [q for q in questions if q["question"] == target_question]

if matching_questions:
    print("Found the question!")
    question_data = matching_questions[0]
else:
    print("Question not found in dataset")

Found the question!


In [84]:
question_data

{'question': 'What effect does bone marrow stromal cell-conditioned media have on the expression of the CD8a receptor in cultured OT-1 T cells?',
 'choices': ['(A) No effect',
  '(B) Insufficient information to answer the question',
  '(C) Decrease',
  '(D) Increase'],
 'correct_answer': 'A',
 'unsure_option': 'B',
 'sources': ['https://doi.org/10.1101/2024.01.31.578101'],
 'ideal': 'No effect',
 'distractors': ['Increase', 'Decrease']}

In [85]:
def format_multiple_choice_question(question, choices):
    formatted = f"Question:Q: {question}; Options: "
    formatted += " ".join(choices)
    return formatted


In [86]:
# Set up PaperQA2 with the paper directory
settings = Settings(
    temperature=0.0,  # Keep deterministic
    paper_directory="/Users/apple/Documents/GitLab_Projects/master_project/xx823/paper-2a"  # Directory containing the paper with DOI: 10.1101/2024.01.31.578101
)


In [87]:
formatted_q = format_multiple_choice_question(question_data['question'], question_data['choices'])
formatted_q

'Question:Q: What effect does bone marrow stromal cell-conditioned media have on the expression of the CD8a receptor in cultured OT-1 T cells?; Options: (A) No effect (B) Insufficient information to answer the question (C) Decrease (D) Increase'

In [88]:
import nest_asyncio
nest_asyncio.apply()

In [89]:
# Create settings with explicit configuration
settings = Settings(
    paper_directory='/Users/apple/Documents/GitLab_Projects/master_project/xx823/paper-2a',
    agent={"index": {
        "sync_with_paper_directory": True,
        "recurse_subdirectories": True
    }}
)



In [90]:
paper_dir = "/Users/apple/Documents/GitLab_Projects/master_project/xx823/paper-2a"
if not os.path.exists(paper_dir):
    print(f"Directory {paper_dir} does not exist!")
    # Create it
    os.makedirs(paper_dir, exist_ok=True)
else:
    print(f"Directory {paper_dir} exists, checking contents:")
    files = os.listdir(paper_dir)
    print(f"Found {len(files)} files: {files}")

Directory /Users/apple/Documents/GitLab_Projects/master_project/xx823/paper-2a exists, checking contents:
Found 2 files: ['darby-et-al-2024-differential-development-of-antibiotic-resistance-and-virulence-between-acinetobacter-species.pdf', 'The T-cell niche tunes immune function through modulation of the cytoskeleton and TCRantigen forces.pdf']


In [91]:

built_index = await get_directory_index(settings=settings)

# Print index information 
print(f"Using index: {settings.get_index_name()}")
index_files = await built_index.index_files
print(f"Number of indexed files: {len(index_files)}")
print("Indexed files:")
for file in index_files:
    print(f"- {file}")

Using index: pqa_index_f149e57fac17a94e4c4330421b6db4f7
Number of indexed files: 2
Indexed files:
- darby-et-al-2024-differential-development-of-antibiotic-resistance-and-virulence-between-acinetobacter-species.pdf
- The T-cell niche tunes immune function through modulation of the cytoskeleton and TCRantigen forces.pdf


In [92]:
import nest_asyncio
nest_asyncio.apply()

In [93]:
# Set up PaperQA2 with the paper directory
settings = Settings(
    temperature=0.5,  # Keep deterministic
    paper_directory="/Users/apple/Documents/GitLab_Projects/master_project/xx823/paper-2a"  # Directory containing the paper with DOI: 10.1101/2024.01.31.578101
)

In [94]:
answer = ask(formatted_q, settings=settings)