In [8]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings, StorageContext
from llama_index.core.postprocessor import SimilarityPostprocessor, KeywordNodePostprocessor
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.core.response_synthesizers import TreeSummarize
from llama_index.llms.groq import Groq
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.query_engine import RetrieverQueryEngine
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Configure settings
Settings.llm = Groq(model="llama-3.1-8b-instant", api_key=os.getenv("GROQ_API_KEY"))
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
Settings.chunk_size = 512  # Smaller chunks for faster processing
Settings.chunk_overlap = 50

# Load and index documents with persistent storage
persist_dir = "./storage"
if not os.path.exists(persist_dir):
    documents = SimpleDirectoryReader("medical_papers").load_data()
    index = VectorStoreIndex.from_documents(documents)
    index.storage_context.persist(persist_dir=persist_dir)
else:
    storage_context = StorageContext.from_defaults(persist_dir=persist_dir)
    index = VectorStoreIndex([], storage_context=storage_context)

# Create hybrid retriever
vector_retriever = index.as_retriever(similarity_top_k=10)  # Increased from 7
bm25_retriever = BM25Retriever.from_defaults(index=index, similarity_top_k=10)

class HybridRetriever:
    def __init__(self, vector_retriever, bm25_retriever):
        self.vector_retriever = vector_retriever
        self.bm25_retriever = bm25_retriever

    def retrieve(self, query):
        vector_nodes = self.vector_retriever.retrieve(query)
        bm25_nodes = self.bm25_retriever.retrieve(query)
        all_nodes = vector_nodes + bm25_nodes
        return sorted(all_nodes, key=lambda n: n.score, reverse=True)[:15]  # Top 15

# Create query engine
query_engine = RetrieverQueryEngine.from_args(
    retriever=HybridRetriever(vector_retriever, bm25_retriever),
    node_postprocessors=[
        SimilarityPostprocessor(similarity_cutoff=0.75),  # Increased from 0.7
        KeywordNodePostprocessor(
            required_keywords=["treatment", "diabetes", "hypertension", "AI"],  # More specific
            exclude_keywords=["animal", "vitro", "study", "unrelated"]  # Exclude noisy terms
        )
    ],
    response_mode="compact"  # Faster than tree_summarize
)

# Warm-up phase
def warmup():
    _ = query_engine.retriever.retrieve("warmup")
    _ = Settings.llm.complete("warmup")

warmup()

# Query interface
query_cache = {}  # Cache to store responses

while True:
    try:
        query = input("\nEnter your medical question (or 'exit' to quit): ").strip()
        if query.lower() == "exit":
            break

        # Validate query
        if not query or len(query.split()) < 2:
            print("Please enter a valid question with at least 2 words.")
            continue

        # Check cache
        if query in query_cache:
            print("\nAnswer (from cache):")
            print(query_cache[query])
            continue

        # Execute query
        response = query_engine.query(query)
        if not response.source_nodes:
            print("No relevant information found. Try rephrasing or asking about:")
            print("- Diabetes insipidus treatments")
            print("- PAH management strategies")
            print("- AI applications in radiology")
        else:
            print(f"\nAnswer: {response}\n")
            print("Sources:")
            unique_responses = set()
            for node in response.source_nodes:
                if node.score > 0.7 and node.text not in unique_responses:  # Only show high-confidence, unique sources
                    unique_responses.add(node.text)
                    print(f"- {node.text[:200]}... (Score: {node.score:.2f})")

            # Cache the response
            query_cache[query] = response

    except Exception as e:
        print(f"An error occurred: {e}. Please try again.")


Answer: AI can be used to design and optimize the development of nanoparticles for cancer immunotherapy by analyzing large datasets and identifying patterns that can inform the creation of more effective and targeted nanoparticles. This can include the use of machine learning algorithms to predict the behavior of nanoparticles in different environments and to identify the most promising candidates for further development. Additionally, AI can be used to simulate the interactions between nanoparticles and cancer cells, allowing researchers to test different designs and mechanisms without the need for physical experimentation.

Sources:
- Page 11 of 48
Lu et al. Journal of Hematology & Oncology            (2024) 17:16 
These nanoparticles target macrophages, enhancing anti -
gen processing and expediting T-cell-related responses in 
ca... (Score: 5.00)
- Rainey et al. AI-Related Survey of UK Radiographers
FIGURE 3 | Respondents’ perception of understanding of the term “arti ﬁcial intell