In [11]:
import nest_asyncio
nest_asyncio.apply()
import asyncio  # Add this import




from paperqa import Docs, Answer, PromptCollection
from paperqa.prompts import select_paper_prompt, citation_prompt
import os
import json
import openai
from dotenv import load_dotenv

load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")


def add_docs(directory_path):
    """Adds documents to the document store
    and returns the document store object"""


    prompts = PromptCollection()

    docs = Docs(prompts=prompts, llm="gpt-4o-mini")
    print(select_paper_prompt)
    
    my_docs = []
    
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if file.endswith(".txt"):
                path = os.path.join(root, file)
                my_docs.append(path)
    
    print(f"these are your documents: {my_docs}")
    
    for d in my_docs:
        print(f"Adding {d} to the document store...")
        try:
            docs.add(d)
        except Exception as e:
            print(f"Error adding {d}: {e}")
    
    return docs

def query_docs(query, docs):
    """Queries the document store and returns the answer using paperqa"""
    answer = docs.query(query)
    
    return answer


async def main():

    docs = add_docs("/Users/sanazkazeminia/Documents/LLM_Agent/Target-Assessment-Agent-/pmc_papers/")
    answer_1 = query_docs("relevance of dopamine 1 receptor to parkinsons disease", docs)
    print(f"answer: {answer_1.answer}")
    context_answer = answer_1.context
    print(f"context: {context_answer}")
    evidence = docs.get_evidence(answer_1, k=10, detailed_citations=True)


    # for text in docs.texts: # accesses ALL the chunks not just relevant ones
    #     print(text.name, text.text)

    for context in evidence.contexts:
        print(context.text.name, context.text.text)


    dictionary_for_llm = {}

    for context in evidence.contexts:
        # The key will be the context summary
        summary = context.context  # This is the summary/context we got from the LLM

        # The value will be a dictionary containing the original text, source, and relevance score
        chunk_info = {
            "summary": summary,
            "original_text": context.text.text,
            "source": context.text.name,
            "relevance_score": context.score  # Adding the relevance score
        }

        # Add to our main dictionary, using the summary as the key
        dictionary_for_llm[summary] = chunk_info

    print("keys", dictionary_for_llm.keys())
    print("values", dictionary_for_llm.values())
    print(len(dictionary_for_llm))



asyncio.run(main())





Select papers that may help answer the question below. Papers are listed as $KEY: $PAPER_INFO. Return a list of keys, separated by commas. Return "None", if no papers are applicable. Choose papers that are relevant, from reputable sources, and timely (if the question requires timely information).

Question: {question}

Papers: {papers}

Selected keys:
these are your documents: ['/Users/sanazkazeminia/Documents/LLM_Agent/Target-Assessment-Agent-/pmc_papers/PMC9664771.txt', '/Users/sanazkazeminia/Documents/LLM_Agent/Target-Assessment-Agent-/pmc_papers/PMC10366643.txt', '/Users/sanazkazeminia/Documents/LLM_Agent/Target-Assessment-Agent-/pmc_papers/PMC10003060.txt', '/Users/sanazkazeminia/Documents/LLM_Agent/Target-Assessment-Agent-/pmc_papers/PMC9312911.txt']
Adding /Users/sanazkazeminia/Documents/LLM_Agent/Target-Assessment-Agent-/pmc_papers/PMC9664771.txt to the document store...
Adding /Users/sanazkazeminia/Documents/LLM_Agent/Target-Assessment-Agent-/pmc_papers/PMC10366643.txt to the 