In [9]:
import os
from dotenv import load_dotenv
import pandas as pd
from google import genai
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import CSVLoader

load_dotenv()

# 1. Setup
API_KEY = os.getenv("GEMINI_API_KEY")
client = genai.Client(api_key=API_KEY)

# 2. Load Kaggle Dataset
file_path = "data/mtsamples.csv"

loader = CSVLoader(
    file_path=file_path,
    encoding='utf-8', 
    csv_args={
        'delimiter': ',',
        'quotechar': '"'
    }
)

# Load and verify
data = loader.load()[:1000]

# 3. Chunking & Vectorization
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(data)

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vector_db = FAISS.from_documents(docs, embeddings)

def clinical_assistant(query):
    # Retrieve the top 3 matches
    search_results = vector_db.similarity_search(query, k=3)
    
    context = ""
    for res in search_results:
        # res.page_content contains the actual text from the CSV row
        context += f"\n---\n{res.page_content}\n"

    prompt = f"""
    You are an AI Clinical Assistant. Using the provided medical transcriptions, answer the user query.
    Rules:
    1. Only use the context provided. 
    2. If the answer isn't there, say you don't know.
    
    CONTEXT:
    {context}

    QUERY: {query}
    """
    
    response = client.models.generate_content(model="gemini-2.5-flash-lite", contents=prompt)
    return response.text

if __name__ == "__main__":
    print("--- Medical RAG Assistant ---")
    print(clinical_assistant("What are the details of the 'Allergic Rhinitis' consultation?"))

--- Medical RAG Assistant ---
This is a consultation for a 23-year-old white female presenting with a complaint of allergies, specifically Allergic Rhinitis. She reports her allergies are worse now than when she lived in Seattle. She has previously tried Claritin and Zyrtec, which were effective for a short period before losing effectiveness. She has also used Allegra, which she started using again two weeks ago but is not finding very effective. She has used over-the-counter nasal sprays but no prescription ones. She has asthma, but it does not require daily medication and she does not believe it is currently flaring up. Her current medications are Ortho Tri-Cyclen and Allegra. She has no known medicine allergies. Objectively, her throat was mildly erythematous without exudate, and her nasal mucosa was erythematous and swollen with clear drainage. Her tympanic membranes (TMs) were clear, and her neck was supple. Her weight was 130 pounds and blood pressure was 124/78.
