# Installation of Modules

In [None]:
!pip install faiss-cpu autogen-agentchat autogen-ext datasets sentence-transformers python-dotenv

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting autogen-agentchat
  Downloading autogen_agentchat-0.4.5-py3-none-any.whl.metadata (2.5 kB)
Collecting autogen-ext
  Downloading autogen_ext-0.4.5-py3-none-any.whl.metadata (5.5 kB)
Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting autogen-core==0.4.5 (from autogen-agentchat)
  Downloading autogen_core-0.4.5-py3-none-any.whl.metadata (2.3 kB)
Collecting jsonref~=1.1.0 (from autogen-core==0.4.5->autogen-agentchat)
  Downloading jsonref-1.1.0-py3-none-any.whl.metadata (2.7 kB)
Collecting opentelemetry-api>=1.27.0 (from autogen-core==0.4.5->autogen-agentchat)
  Downloading opentelemetry_api-1.30.0-py3-none-any.whl.metadata (1.6 kB)
Collecting protobuf~=5.29.3 (from autogen-core==0.4.5->autogen-agentchat)
  Downloading protobuf-5.29.3-cp38

# Mount Google Drive

In [None]:
import os
from google.colab import drive  # Google Drive support
drive.mount('/content/drive')  # Mount Google Drive

Mounted at /content/drive


# Script

In [None]:
# ==========================
# Medical Chatbot Backend (AutoGen + OpenAI API + RAG)
# ==========================

import os
from dotenv import load_dotenv
from datasets import load_dataset
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import autogen_agentchat  # Use autogen_agentchat instead of autogen
import autogen_ext  # Use autogen_ext for OpenAI API support
import gc # Garbage collection

# Define paths for FAISS index and model cache
project_dir = "/content/drive/My Drive/AutoGenRAGMedicalChatbot"
os.makedirs(project_dir, exist_ok=True)  # Ensure directory exists
faiss_index_path = os.path.join(project_dir, "medical_faiss_index")
huggingface_cache_dir = os.path.join(project_dir, "huggingface_models")
# Set Hugging Face cache directory to avoid re-downloading models
os.environ["HF_HOME"] = huggingface_cache_dir

# ==========================
# Step 1: Load OpenAI API Key
# ==========================
openai_api_key = "your-api-key"

# ==========================
# Step 2: Load the Medical Dataset from Hugging Face
# ==========================
print("✅ Loading medical dataset...")
dataset = load_dataset("ruslanmv/ai-medical-chatbot", cache_dir=huggingface_cache_dir)
# Extract patient-doctor conversations
# medical_dialogues = dataset["train"].to_pandas()[["Patient", "Doctor"]]
medical_dialogues = dataset["train"].to_pandas()[["Patient", "Doctor"]].head(10000)  # Use first 10k/157k rows to reduce RAM usage
# Extract patient-doctor conversations
print(f"✅ Loaded {len(medical_dialogues)} medical Q&A pairs.")

# ==========================
# Step 3: Convert Dataset into FAISS Embeddings
# ==========================
print("✅ Generating FAISS vector embeddings...")
# Load sentence transformer model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2", device="cpu", cache_folder=huggingface_cache_dir)
# embedding_model = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L3-v2", device="cpu", cache_folder=huggingface_cache_dir) # Smaller model for less usage of RAM
# Convert text into embeddings
medical_qa = [
    {"question": row["Patient"], "answer": row["Doctor"]}
    for _, row in medical_dialogues.iterrows()
]
# Generate vector embeddings
medical_embeddings = embedding_model.encode(
    [qa["question"] + " " + qa["answer"] for qa in medical_qa],
    convert_to_numpy=True
) # Embedding with float64
medical_embeddings = np.array(medical_embeddings, dtype=np.float32) # Re-embedding with float32
# Save FAISS index only if it doesn’t exist, this prevents corrupt FAISS indices from causing a crash.
if not os.path.exists(faiss_index_path):
    print("✅ Creating FAISS index...")
    # Create FAISS index
    # index = faiss.IndexFlatL2(medical_embeddings.shape[1])
    index = faiss.IndexHNSWFlat(medical_embeddings.shape[1], 32)  # 32 = HNSW graph connections
    index.add(medical_embeddings)
    faiss.write_index(index, faiss_index_path)
    # Manually free up memory
    del medical_embeddings
    gc.collect()
    print("✅ Memory cleared after FAISS indexing.")
else:
    print("✅ Loading existing FAISS index...")
print("✅ FAISS index saved successfully!")

# ==========================
# Step 4: Retrieval-Augmented Generation (RAG) Implementation
# ==========================
print("✅ Initializing RAG-based medical chatbot...")
# Load FAISS index for retrieval
index = faiss.read_index(faiss_index_path)
# Retrieve medical KB using FAISS
def retrieve_medical_info(query):
    """Retrieve relevant medical knowledge using FAISS"""
    query_embedding = embedding_model.encode([query], convert_to_numpy=True)
    _, idxs = index.search(query_embedding, k=3)  # Get top 3 matches
    return [medical_qa[i]["answer"] for i in idxs[0]]

# # ==========================
# # Step 5: AutoGen AI Chatbot Implementation (This cannot being used since no Chat classes are defined in autogen module)
# # ==========================
# # class MedicalChatbot(autogen_agentchat.AssistantAgent): # assistant-type agents
# class MedicalChatbot(autogen_agentchat.ChatAgent):        # chat-type agents
#     def generate_reply(self, messages):
#         """Handles medical queries using RAG + OpenAI API"""
#         query = messages[-1]["content"]  # Get latest user query
#         retrieved_info = retrieve_medical_info(query)
#         knowledge_base = "\n".join(retrieved_info)
#         # Create enhanced response prompt
#         prompt = (
#             f"Using the following medical knowledge:\n{knowledge_base}\n"
#             f"Answer the question in a professional and medically accurate manner: {query}"
#         )
#         return self.llm.complete(prompt)
# # Instantiate the chatbot with OpenAI GPT-4
# chatbot = MedicalChatbot(
#     name="Medical_Chatbot",
#     llm_config={
#         "model": "gpt-4",
#         "api_key": openai_api_key
#     }
# )
# print("✅ Medical chatbot is ready!")

# ==========================
# Step 5: AutoGen AI Chatbot Implementation (Create new Agent class with autogen_ext)
# ==========================
print("✅ Initializing AI agent with AutoGen...")
# Init RAG Chatbot custom agent class
class RAGMedicalChatbot:
    def __init__(self, model_name, api_key, retrieve_function):
        """
        A custom retrieval-augmented chatbot.
        :param model_name: e.g., "gpt-4"
        :param api_key: Your OpenAI API key
        :param retrieve_function: A reference to your retrieval function (FAISS, etc.)
        """
        # 1) Create an LLM object from autogen_ext
        self.llm = autogen_ext.OpenAIGPT(model=model_name, api_key=api_key)
        # 2) Store your retrieval function (FAISS-based)
        self.retrieve = retrieve_function
    # Chat class
    def chat(self, user_query):
        """
        Takes a user query, retrieves relevant knowledge,
        and returns a final answer from the LLM.
        """
        # Retrieve relevant knowledge
        retrieved_info = self.retrieve(user_query)
        knowledge_base = "\n".join(retrieved_info)
        # Construct the prompt
        prompt = (
            f"Using the following medical knowledge:\n{knowledge_base}\n"
            f"Answer the question in a professional and medically accurate manner: {user_query}"
        )
        # Send prompt to the LLM
        response = self.llm.complete(prompt)
        return response

# Use OpenAI GPT-4 + RAG setup instantiate the custom chatbot agent
chatbot = RAGMedicalChatbot(
    model_name="gpt-4",
    api_key=openai_api_key,
    retrieve_function=retrieve_medical_info
)
print("✅ Medical chatbot is ready!")

# ==========================
# Step 6: Interactive Chat Testing (For Local Debugging)
# ==========================
if __name__ == "__main__":
    print("\n🩺 Medical Chatbot is running...\n")
    # Start session
    while True:
        user_input = input("You: ")
        if user_input.lower() in ["exit", "quit"]:
            print("👋 Chatbot session ended.")
            break
        # Prepare JSON reply response body
        # response = chatbot.generate_reply([{"role": "user", "content": user_input}]) # This cannot being used since no Chat classes are defined in autogen module
        response = chatbot.chat(user_input)
        print("Chatbot:", response)


✅ Loading medical dataset...
✅ Loaded 10000 medical Q&A pairs.
✅ Generating FAISS vector embeddings...
