#### Importing Necessary Libraries

In [123]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import ChatPromptTemplate
from langchain.retrievers import MultiQueryRetriever
from langchain.vectorstores import Pinecone
from langchain_pinecone import PineconeVectorStore
from langchain.vectorstores.base import VectorStoreRetriever
from pinecone.grpc import PineconeGRPC as Pinecone
import os
import json
from datetime import datetime

#### Initializing and accessing Pinecone and OpenAI

In [124]:
def initialize_rag_system(index_name, openai_api_key,pinecone_api_key):

    """Initialize the RAG system with Pinecone, OpenAI embeddings, and LLM."""
    
    ## Pinecone Initialization and acessing 
    pc = Pinecone(api_key=pinecone_api_key)
    index_name=index_name
    index = pc.Index(index_name)
    
    # Initialize OpenAI embeddings
    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
    
    # Initialize LangChain Pinecone vector store
    vectorstore = PineconeVectorStore(
        index=index,
        embedding=embeddings,
        text_key="text"
    )
    
    # Initialize LLM
    llm = ChatOpenAI(
        temperature=0.1,
        model="gpt-4",
        openai_api_key=openai_api_key
    )
    
    return vectorstore, llm, embeddings

#### Function to Find Similar context according to the Questions

In [125]:
def get_similar_contexts(question, embeddings, vectorstore):
    """Get similar contexts using embedding similarity search."""
    similar_docs = vectorstore.similarity_search_with_score(
        question,
        k=4
    )
    
    results = []
    for doc, score in similar_docs:
        results.append({
            'context': doc.page_content,
            'similarity_score': float(score)
        })
    
    results.sort(key=lambda x: x['similarity_score'], reverse=True)
    return results

#### Prompt to use MCQ and retrieved context to find the answer

In [126]:
def get_answer_extraction_prompt():
    """Create prompt for answer extraction."""
    prompt_template = """Based on the following medical question and context, determine the correct answer among the options and explain your reasoning.

Question: {question}

Options:
{options}

Most Relevant Context (similarity score: {similarity_score:.4f}):
{context}

Please provide your response in the following format:

1. Predicted Answer: [Letter of the correct answer (A, B, C, or D)]
2. Confidence: [High/Medium/Low]
3. Explanation: [Detailed explanation of why this is the correct answer, citing specific information from the context]
4. Relevant Information: [Key pieces of information from the context that led to this answer]

Remember to:
1. Base your answer solely on the provided context
2. If the context doesn't contain enough information, indicate low confidence
3. Provide specific citations from the context in your explanation
4. Consider both direct evidence and logical implications
"""
    return ChatPromptTemplate.from_template(prompt_template)

#### Function to Predict MCQ's answer using question, options and highest relevant retrieved context having highest similarity

In [127]:
def predict_answer(question, options, context, similarity_score, llm):
    """Predict answer using the most similar context."""
    prompt = get_answer_extraction_prompt()
    options_str = "\n".join(options)
    formatted_prompt = prompt.format(
        question=question,
        options=options_str,
        context=context,
        similarity_score=similarity_score
    )
    
    messages = [{"role": "user", "content": formatted_prompt}]
    response = llm.invoke(messages).content
    
    result = {}
    sections = response.split("\n")
    for section in sections:
        if "Predicted Answer:" in section:
            result["predicted_answer"] = section.split("Predicted Answer:")[1].strip()
        elif "Confidence:" in section:
            result["confidence"] = section.split("Confidence:")[1].strip()
        elif "Explanation:" in section:
            result["explanation"] = section.split("Explanation:")[1].strip()
        elif "Relevant Information:" in section:
            result["relevant_info"] = section.split("Relevant Information:")[1].strip()
    
    return result

#### Function to save the QA answer extracted and retrieved context

In [128]:
def save_to_json(data, filename):
    """Save data to a JSON file with timestamp."""
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"{filename}_{timestamp}.json"
    
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
    
    print(f"\nResults saved to: {filename}")
    return filename

#### Main Functions

In [129]:
def main_test():
    """Main function to test the modified system."""
    # Load credentials
    openai_api_key = os.getenv("OPENAI_API_KEY")
    pinecone_api_key = os.getenv("PINECONE_API_KEY")
    index_name = "medicalqabot"
    
    # Initialize system
    vectorstore, llm, embeddings = initialize_rag_system(index_name, openai_api_key, pinecone_api_key)
    
    # Sample MCQ
    sample_mcq = {
        "question": "A 62-year-old female with a history of chronic low back pain, and a recent MRI showed significant lumbar spine degeneration. In addition to the back pain, she complains of bilateral buttock pain and occasional right lower extremity radicular symptoms. She has failed conservative treatments, including physical therapy and NSAIDs. Given her symptoms and the findings of the MRI, what is the most suitable next step in her management?",
        "options": [
            "A) Lumbar spine fusion surgery",
            "B) Sacroiliac joint injection",
            "C) Preserving the posterior complex during lumbar fusion",
            "D) Continue with conservative management"
        ]
    }
    
    # Step 1: Get similar contexts
    print("\nStep 1: Finding similar contexts...")
    similar_contexts = get_similar_contexts(sample_mcq['question'], embeddings, vectorstore)
    
    # Save contexts to JSON
    contexts_data = {
        "question": sample_mcq["question"],
        "similar_contexts": similar_contexts
    }
    contexts_file = save_to_json(contexts_data, "similar_contexts")
    
    # Step 2: Predict answer using the most similar context
    print("\nStep 2: Predicting answer using most similar context...")
    most_similar_context = similar_contexts[0]
    prediction = predict_answer(
        sample_mcq['question'],
        sample_mcq['options'],
        most_similar_context['context'],
        most_similar_context['similarity_score'],
        llm
    )
    
    # Save complete results to JSON
    results_data = {
        "question": sample_mcq["question"],
        "options": sample_mcq["options"],
        "most_similar_context": most_similar_context,
        "prediction": prediction
    }
    results_file = save_to_json(results_data, "qa_results")
    
    # Print summary
    print("\nSummary:")
    print(f"Top context similarity score: {most_similar_context['similarity_score']:.4f}")
    print(f"Predicted answer: {prediction.get('predicted_answer', 'N/A')}")
    print(f"Confidence: {prediction.get('confidence', 'N/A')}")

In [130]:
if __name__ == "__main__":
    main_test()



Step 1: Finding similar contexts...

Results saved to: similar_contexts_20250112_221637.json

Step 2: Predicting answer using most similar context...

Results saved to: qa_results_20250112_221650.json

Summary:
Top context similarity score: 0.8512
Predicted answer: A) Lumbar spine fusion surgery
Confidence: Medium
