In [31]:
import os
from typing import List, Tuple
import numpy as np
import pandas as pd
import faiss
from sentence_transformers import SentenceTransformer
from groq import Groq
from dotenv import load_dotenv
import warnings
from openai import OpenAI
import tensorflow as tf
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
tf.get_logger().setLevel('ERROR')


In [5]:
# Filter out the specific warning
warnings.filterwarnings('ignore', category=FutureWarning, 
                       message='`clean_up_tokenization_spaces` was not set')


In [34]:
class MedicalRAG:
    def __init__(self, model_name: str = "all-MiniLM-L6-v2", llm_models: List[str] = None):
        """
        Initialize the RAG system with model names and LLM model options.

        :param model_name: Embedding model name (default is 'all-MiniLM-L6-v2')
        :param llm_models: List of LLM model names to be used for response generation
        """
        load_dotenv()

        # Initialize the embedding model
        self.embedding_model = SentenceTransformer(model_name)
        
        # Initialize LLM models list (can be extended)
        self.llm_models = llm_models if llm_models else [os.getenv("GROQ_MODEL", "llama3-8b-8192")]
        
        # Initialize FAISS index
        self.index = None
        self.df = None
        self.client = Groq(api_key=os.getenv("GROQ_API_KEY"))

    def load_data(self, dataframe: pd.DataFrame) -> None:
        """Load and process the dataset."""
        self.df = dataframe
        
        # Generate embeddings for all questions
        question_embeddings = self.embedding_model.encode(
            self.df['input'].tolist(), 
            convert_to_numpy=True
        )
        
        # Initialize and populate FAISS index
        dimension = question_embeddings.shape[1]
        self.index = faiss.IndexFlatL2(dimension)
        self.index.add(np.array(question_embeddings, dtype='float32'))

    def retrieve_similar_cases(self, query: str, k: int = 3) -> Tuple[pd.DataFrame, List[float]]:
        """Retrieve similar medical cases based on the query."""
        # Generate embedding for the query
        query_embedding = self.embedding_model.encode([query], convert_to_numpy=True)
        
        # Search for nearest neighbors
        distances, indices = self.index.search(
            np.array(query_embedding, dtype='float32'), 
            k
        )
        
        # Get the matching cases
        similar_cases = self.df.iloc[indices[0]]
        return similar_cases, distances[0]

    def generate_response(self, query: str, similar_cases: pd.DataFrame, llm_model: str) -> str:
        """Generate a response using the selected LLM model based on retrieved cases."""
        # Construct the prompt
        prompt = self._construct_prompt(query, similar_cases)
        build_prompt = "You are a medical AI assistant. Provide accurate, helpful medical information based on similar cases. keep it short and answer as none if you dont have relevant information. first answer the  question user asked and then show the remedies and precautions first aid steps and then show past cases with case number"

        # Generate response using Groq
        response = self.client.chat.completions.create(
            model=llm_model,
            messages=[{
                "role": "system", 
                "content": build_prompt}, {
                "role": "user", 
                "content": prompt
            }],
            temperature=0.7,
            max_tokens=800
        )
        
        return response.choices[0].message.content

    def _construct_prompt(self, query: str, similar_cases: pd.DataFrame) -> str:
        """Construct a prompt using the query and similar cases."""
        prompt = f"Question: {query}\n\nRelevant similar cases:\n\n"
        
        for idx, case in similar_cases.iterrows():
            prompt += f"Case {idx + 1}:\n"
            prompt += f"Question: {case['input']}\n"
            prompt += f"Expert Answer: {case['answer_icliniq']}\n\n"
        
        prompt += "\nBased on these similar cases and your medical knowledge, please provide a comprehensive answer to the original question."
        
        return prompt

    def answer_query(self, query: str, k: int = 3, llm_model: str = None) -> dict:
        """Complete RAG pipeline to answer a medical query."""
        # If no LLM model is provided, use the first available one
        llm_model = llm_model if llm_model else self.llm_models[0]
        
        # Retrieve similar cases
        similar_cases, distances = self.retrieve_similar_cases(query, k)
        
        # Generate response
        response = self.generate_response(query, similar_cases, llm_model)
        
        return {
            "query": query,
            "response": response,
            "similar_cases": similar_cases,
            "relevance_scores": distances.tolist()
        }

In [35]:
def query_medical_rag(query: str, llm_model: str = None, k: int = 3) -> dict:
    """
    Function to query the RAG system and get the response for a medical query.
    
    :param query: The query to ask the RAG system
    :param llm_model: (Optional) The LLM model to use (defaults to first in list)
    :param k: Number of similar cases to retrieve (default is 3)
    :return: The response and additional details
    """
    # Load your medical QA dataset (this can be done outside of the function)
    df = pd.read_parquet("hf://datasets/lavita/ChatDoctor-iCliniq/data/train-00000-of-00001-7f15f39e4c3a7ee9.parquet")
    
    # Initialize and setup RAG system
    rag = MedicalRAG(llm_models=["llama3-8b-8192", "another-llama-model"])
    rag.load_data(df)
    
    # Get response from RAG system
    result = rag.answer_query(query, k, llm_model)
    
    return result

In [36]:
# Cell to query the RAG (Replace 'query' with the actual input)
query = "I have persistent knee pain after exercising. What should I do?"

# Get answer from RAG system
result = query_medical_rag(query)

NotFoundError: 404 page not found

In [None]:
print(result)

In [27]:
# Output the result
print(f"Query: {result['query']}")
print(f"Response: {result['response']}")
print(f"Relevance Scores: {result['relevance_scores']}")

Query: i broke my thumb finger , its swollen i dont know if its fractured or not , can you help me?
Response: Immediate Steps:

1. Apply ice packs to the affected area to reduce swelling and pain.
2. Elevate your hand above the level of your heart to reduce swelling.
3. Use a bandage or wrap to stabilize the thumb and prevent further injury.

Medication:

* Take over-the-counter pain relievers such as acetaminophen or ibuprofen to manage pain and discomfort.
* Apply topical creams or gels containing ingredients like capsaicin or menthol to relieve pain and reduce swelling.

As for determining if the injury is a fracture or not, it's recommended to consult an orthopedic specialist for a proper examination and X-ray imaging to confirm the diagnosis. Based on the similar cases provided, it's possible that the injury could be a fracture, especially since you mentioned the thumb is swollen.

Please note that without a proper examination and imaging, it's difficult to provide a definitive di