# Installation of Modules

Available GenAI option:
- ChatGPT4o: openai
- Gemini Flash 2.0: google-genai

In [None]:
!pip install faiss-cpu autogen-agentchat autogen-ext datasets sentence-transformers python-dotenv google-genai



# Mount Google Drive

In [None]:
import os
from google.colab import drive  # Google Drive support
drive.mount('/content/drive')  # Mount Google Drive

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Inference with OpenAI ChatGPT-4o

In [None]:
# ==========================
# Medical Chatbot Backend (AutoGen + OpenAI API + RAG)
# ==========================

# IMPORTANT:
# If you see an error like:
#   APIRemovedInV1: You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0
#
# You have two options:
#  1. Migrate your code by running: !openai migrate
#  2. Pin your openai version by running: pip install openai==0.28.0
#
# Choose one before running the script.

import os
from dotenv import load_dotenv
from datasets import load_dataset
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import autogen_agentchat  # Use autogen_agentchat instead of autogen
import autogen_ext  # Use autogen_ext for OpenAI API support
import openai
import gc # Garbage collection

# Define paths for FAISS index and model cache
project_dir = "/content/drive/My Drive/AutoGenRAGMedicalChatbot"
os.makedirs(project_dir, exist_ok=True)  # Ensure directory exists
faiss_index_path = os.path.join(project_dir, "medical_faiss_index")
huggingface_cache_dir = os.path.join(project_dir, "huggingface_models")
# Set Hugging Face cache directory to avoid re-downloading models
os.environ["HF_HOME"] = huggingface_cache_dir


# ==========================
# Step 1: Load OpenAI API Key
# ==========================
openai_api_key = "YOUR_API"
if not openai_api_key:
    raise ValueError("OpenAI API key is missing!")
openai.api_key = openai_api_key  # Set global OpenAI key


# ==========================
# Step 2: Load the Medical Dataset from Hugging Face
# ==========================
print("✅ Loading medical dataset...")
dataset = load_dataset("ruslanmv/ai-medical-chatbot", cache_dir=huggingface_cache_dir)
# Extract patient-doctor conversations
# medical_dialogues = dataset["train"].to_pandas()[["Patient", "Doctor"]]
medical_dialogues = dataset["train"].to_pandas()[["Patient", "Doctor"]].head(10000)  # Use first 10k/157k rows to reduce RAM usage
# Extract patient-doctor conversations
print(f"✅ Loaded {len(medical_dialogues)} medical Q&A pairs.")


# ==========================
# Step 3: Convert Dataset into FAISS Embeddings
# ==========================
print("✅ Generating FAISS vector embeddings...")
# Load sentence transformer model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2", device="cpu", cache_folder=huggingface_cache_dir)
# embedding_model = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L3-v2", device="cpu", cache_folder=huggingface_cache_dir) # Smaller model for less usage of RAM
# Convert text into embeddings
medical_qa = [
    {"question": row["Patient"], "answer": row["Doctor"]}
    for _, row in medical_dialogues.iterrows()
]
# Generate vector embeddings
medical_embeddings = embedding_model.encode(
    [qa["question"] + " " + qa["answer"] for qa in medical_qa],
    convert_to_numpy=True
) # Embedding with float64
medical_embeddings = np.array(medical_embeddings, dtype=np.float32) # Re-embedding with float32
# Save FAISS index only if it doesn’t exist, this prevents corrupt FAISS indices from causing a crash.
if not os.path.exists(faiss_index_path):
    print("✅ Creating FAISS index...")
    # Create FAISS index
    # index = faiss.IndexFlatL2(medical_embeddings.shape[1])
    index = faiss.IndexHNSWFlat(medical_embeddings.shape[1], 32)  # 32 = HNSW graph connections
    index.add(medical_embeddings)
    faiss.write_index(index, faiss_index_path)
    # Manually free up memory
    del medical_embeddings
    gc.collect()
    print("✅ Memory cleared after FAISS indexing.")
else:
    print("✅ Loading existing FAISS index...")
print("✅ FAISS index saved successfully!")


# ==========================
# Step 4: Retrieval-Augmented Generation (RAG) Implementation
# ==========================
print("✅ Initializing RAG-based medical chatbot...")
# Load FAISS index for retrieval
index = faiss.read_index(faiss_index_path)
# Retrieve medical KB using FAISS
def retrieve_medical_info(query):
    """Retrieve relevant medical knowledge using FAISS"""
    query_embedding = embedding_model.encode([query], convert_to_numpy=True)
    _, idxs = index.search(query_embedding, k=3)  # Get top 3 matches
    return [medical_qa[i]["answer"] for i in idxs[0]]


# ==========================
# Step 5: AutoGen AI Chatbot Implementation (Create new Agent class with autogen_ext)
# ==========================
print("✅ Initializing AI agent with AutoGen...")
# Init RAG Chatbot custom agent class
class RAGMedicalChatbot:
    def __init__(self, model_name, retrieve_function):
        """
        A custom retrieval-augmented chatbot using direct OpenAI calls.
        :param model_name: e.g., "gpt-4"
        :param retrieve_function: function to retrieve from FAISS
        """
        self.model_name = model_name
        self.retrieve = retrieve_function

    def chat(self, user_query):
        """Generate an answer from GPT-4 using retrieved context."""
        # 1) Retrieve knowledge
        retrieved_info = self.retrieve(user_query)
        knowledge_base = "\n".join(retrieved_info)
        # 2) Construct final prompt
        prompt = (
            f"Using the following medical knowledge:\n{knowledge_base}\n"
            f"Answer the question in a professional and medically accurate manner: {user_query}"
        )
        # 3) Call OpenAI's ChatCompletion (GPT-4)
        # NOTE: If you see an APIRemovedInV1 error here, please migrate your code or pin the openai package version.
        completion = openai.ChatCompletion.create(
            model=self.model_name,
            messages=[
                {"role": "system", "content": "You are a helpful medical chatbot."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.7
        )
        # 4) Return final response
        return completion["choices"][0]["message"]["content"].strip()

# Use OpenAI GPT-4 + RAG setup instantiate the custom chatbot agent
chatbot = RAGMedicalChatbot(
    model_name="gpt-4",
    retrieve_function=retrieve_medical_info
)
print("✅ Medical chatbot is ready!")

# ==========================
# Step 6: Interactive Chat Testing (For Local Debugging)
# ==========================
if __name__ == "__main__":
    print("\n🩺 Medical Chatbot is running...\n")
    # Start session
    while True:
        user_input = input("You: ")
        # Type exit or quit to exit the chatbot window ignoring case sensitivit
        if user_input.lower() in ["exit", "quit"]:
            print("👋 Chatbot session ended.")
            break
        # Prepare JSON reply response body
        # response = chatbot.generate_reply([{"role": "user", "content": user_input}]) # This cannot being used since no Chat classes are defined in autogen module
        response = chatbot.chat(user_input)
        print("Chatbot:", response)


✅ Loading medical dataset...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


✅ Loaded 10000 medical Q&A pairs.
✅ Generating FAISS vector embeddings...
✅ Loading existing FAISS index...
✅ FAISS index saved successfully!
✅ Initializing RAG-based medical chatbot...
✅ Initializing AI agent with AutoGen...
✅ Medical chatbot is ready!

🩺 Medical Chatbot is running...

You: "I've been feeling unusually tired and my vision has become blurry over the past few weeks. I'm also experiencing frequent headaches. Could these be signs of a serious condition?"


InvalidRequestError: The model `gpt-4` does not exist or you do not have access to it.

# Inference with Qwen2.5-Max

In [None]:
# ==========================
# Medical Chatbot Backend (AutoGen + Qwen API + RAG)
# ==========================
import os
from dotenv import load_dotenv
from datasets import load_dataset
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import autogen_agentchat  # Use autogen_agentchat instead of autogen
import autogen_ext  # Use autogen_ext for Qwen API support
import requests
import json
import gc  # Garbage collection

# Define paths for FAISS index and model cache
project_dir = "/content/drive/My Drive/AutoGenRAGMedicalChatbot"
os.makedirs(project_dir, exist_ok=True)  # Ensure directory exists
faiss_index_path = os.path.join(project_dir, "medical_faiss_index")
huggingface_cache_dir = os.path.join(project_dir, "huggingface_models")
# Set Hugging Face cache directory to avoid re-downloading models
os.environ["HF_HOME"] = huggingface_cache_dir

# ==========================
# Step 1: Load Qwen API Key
# ==========================
qwen_api_key = "your_qwen_api_key_here"  # Replace with your actual Qwen API key
if not qwen_api_key:
    raise ValueError("Qwen API key is missing!")

# ==========================
# Step 2: Load the Medical Dataset from Hugging Face
# ==========================
print("✅ Loading medical dataset...")
dataset = load_dataset("ruslanmv/ai-medical-chatbot", cache_dir=huggingface_cache_dir)
# Extract patient-doctor conversations
medical_dialogues = dataset["train"].to_pandas()[["Patient", "Doctor"]].head(10000)  # Use first 10k/157k rows to reduce RAM usage
print(f"✅ Loaded {len(medical_dialogues)} medical Q&A pairs.")

# ==========================
# Step 3: Convert Dataset into FAISS Embeddings
# ==========================
print("✅ Generating FAISS vector embeddings...")
# Load sentence transformer model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2", device="cpu", cache_folder=huggingface_cache_dir)
# Convert text into embeddings
medical_qa = [
    {"question": row["Patient"], "answer": row["Doctor"]}
    for _, row in medical_dialogues.iterrows()
]
medical_embeddings = embedding_model.encode(
    [qa["question"] + " " + qa["answer"] for qa in medical_qa],
    convert_to_numpy=True
)
medical_embeddings = np.array(medical_embeddings, dtype=np.float32)

# Save FAISS index only if it doesn’t exist
if not os.path.exists(faiss_index_path):
    print("✅ Creating FAISS index...")
    index = faiss.IndexHNSWFlat(medical_embeddings.shape[1], 32)  # 32 = HNSW graph connections
    index.add(medical_embeddings)
    faiss.write_index(index, faiss_index_path)
    del medical_embeddings
    gc.collect()
    print("✅ Memory cleared after FAISS indexing.")
else:
    print("✅ Loading existing FAISS index...")

print("✅ FAISS index saved successfully!")

# ==========================
# Step 4: Retrieval-Augmented Generation (RAG) Implementation
# ==========================
print("✅ Initializing RAG-based medical chatbot...")
index = faiss.read_index(faiss_index_path)

def retrieve_medical_info(query):
    """Retrieve relevant medical knowledge using FAISS"""
    query_embedding = embedding_model.encode([query], convert_to_numpy=True)
    _, idxs = index.search(query_embedding, k=3)  # Get top 3 matches
    return [medical_qa[i]["answer"] for i in idxs[0]]

# ==========================
# Step 5: AutoGen AI Chatbot Implementation with Qwen API
# ==========================
print("✅ Initializing AI agent with AutoGen and Qwen API...")

class QwenMedicalChatbot:
    def __init__(self, api_key, retrieve_function):
        """
        A custom retrieval-augmented chatbot using Qwen API.
        :param api_key: Qwen API key
        :param retrieve_function: function to retrieve from FAISS
        """
        self.api_key = api_key
        self.retrieve = retrieve_function

    def chat(self, user_query):
        """Generate an answer from Qwen using retrieved context."""
        # 1) Retrieve knowledge
        retrieved_info = self.retrieve(user_query)
        knowledge_base = "\n".join(retrieved_info)

        # 2) Construct final prompt
        prompt = (
            f"Using the following medical knowledge:\n{knowledge_base}\n"
            f"Answer the question in a professional and medically accurate manner: {user_query}"
        )

        # 3) Call Qwen API
        url = "https://api.qwen.com/v1/chat/completions"  # Replace with the actual Qwen API endpoint
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json"
        }
        payload = {
            "model": "qwen2.5-max",
            "messages": [
                {"role": "system", "content": "You are a helpful medical chatbot."},
                {"role": "user", "content": prompt}
            ],
            "temperature": 0.7
        }

        response = requests.post(url, headers=headers, data=json.dumps(payload))
        if response.status_code != 200:
            raise Exception(f"Qwen API error: {response.text}")

        result = response.json()
        return result["choices"][0]["message"]["content"].strip()

# Instantiate the chatbot with Qwen API
chatbot = QwenMedicalChatbot(
    api_key=qwen_api_key,
    retrieve_function=retrieve_medical_info
)
print("✅ Medical chatbot is ready!")

# ==========================
# Step 6: Interactive Chat Testing (For Local Debugging)
# ==========================
if __name__ == "__main__":
    print("\n🩺 Medical Chatbot is running...\n")
    while True:
        user_input = input("You: ")
        if user_input.lower() in ["exit", "quit"]:
            print("👋 Chatbot session ended.")
            break
        response = chatbot.chat(user_input)
        print("Chatbot:", response)

# Inference with Geminish Flash

In [None]:
# ==========================
# Medical Chatbot Backend (AutoGen + Gemini Flash API + RAG)
# ==========================

# IMPORTANT:
# This script utilise the Gemini Flash API. Model: gemini-2.0-flash
# Ensure you have the correct Gemini Flash API endpoint and payload format.

import os
from dotenv import load_dotenv
from datasets import load_dataset
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import autogen_agentchat # (Not used for agent classes in this version)
import autogen_ext       # (Not used for agent creation in this version)
import requests          # To make HTTP requests to Gemini Flash API
from google import genai # Use Gemini Flash GenAI model
import gc                # Garbage collection
import time

# --------------------------
# Set up paths and cache directories on Google Drive
# --------------------------
project_dir = "/content/drive/My Drive/AutoGenRAGMedicalChatbot"
os.makedirs(project_dir, exist_ok=True)  # Ensure directory exists
faiss_index_path = os.path.join(project_dir, "medical_faiss_index")
huggingface_cache_dir = os.path.join(project_dir, "huggingface_models")
# Set Hugging Face cache directory to avoid re-downloading models
os.environ["HF_HOME"] = huggingface_cache_dir

# --------------------------
# Step 1: Load Gemini Flash API Key
# --------------------------
gemini_flash_api_key = "YOUR_API"
if not gemini_flash_api_key:
    raise ValueError("Gemini Flash API key is missing!")

# --------------------------
# Step 2: Load the Medical Dataset from Hugging Face
# --------------------------
print("✅ Loading medical dataset...")
dataset = load_dataset("ruslanmv/ai-medical-chatbot", cache_dir=huggingface_cache_dir)
# For reduced RAM usage, we use the first 50k/157k rows.
medical_dialogues = dataset["train"].to_pandas()[["Patient", "Doctor"]].head(50000)
print(f"✅ Loaded {len(medical_dialogues)} medical Q&A pairs.")

# --------------------------
# Step 3: Convert Dataset into FAISS Embeddings
# --------------------------
print("✅ Generating FAISS vector embeddings...")
# Load SentenceTransformer model on CPU
embedding_model = SentenceTransformer("all-MiniLM-L6-v2", device="cpu", cache_folder=huggingface_cache_dir)
# Prepare Q&A data
medical_qa = [
    {"question": row["Patient"], "answer": row["Doctor"]}
    for _, row in medical_dialogues.iterrows()
]
# Generate vector embeddings and convert to float32 for FAISS
medical_embeddings = embedding_model.encode(
    [qa["question"] + " " + qa["answer"] for qa in medical_qa],
    convert_to_numpy=True
).astype(np.float32)

# Create or load the FAISS index
if not os.path.exists(faiss_index_path):
    print("✅ Creating FAISS index...")
    index = faiss.IndexHNSWFlat(medical_embeddings.shape[1], 32)  # 32 = HNSW graph connections
    index.add(medical_embeddings)
    faiss.write_index(index, faiss_index_path)
    del medical_embeddings
    gc.collect()
    print("✅ Memory cleared after FAISS indexing.")
else:
    print("✅ Loading existing FAISS index...")
    index = faiss.read_index(faiss_index_path)
print("✅ FAISS index saved successfully!")

# --------------------------
# Step 4: Retrieval-Augmented Generation (RAG) Implementation
# --------------------------
print("✅ Initializing RAG-based medical chatbot...")
def retrieve_medical_info(query):
    """Retrieve relevant medical knowledge using FAISS"""
    query_embedding = embedding_model.encode([query], convert_to_numpy=True)
    _, idxs = index.search(query_embedding, k=3)  # Get top 3 matches
    return [medical_qa[i]["answer"] for i in idxs[0]]

# --------------------------
# Step 5: Custom RAG Medical Chatbot Agent Class using Gemini Flash API
# --------------------------
print("✅ Initializing AI agent with custom RAGMedicalChatbot class...")

def gemini_flash_completion(prompt, model, temperature=0.7):
    """
    Call the Gemini Flash API to get a completion.
    Adjust the endpoint and payload as needed based on the API specification.
    """
    # # Construct the endpoint URL using the provided model and API key
    # endpoint = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={gemini_flash_api_key}"
    # headers = {
    #         "Content-Type": "application/json"
    #     }
    # # Format the payload as per the test CURL example
    # payload = {
    #     "contents": [{
    #         "parts": [{"text": prompt}]
    #     }]
    # }
    # response = requests.post(endpoint, headers=headers, json=payload)
    # response.raise_for_status()  # Raise an error for bad responses
    # data = response.json()
    # return data["candidates"][0]["output"]["text"]
    client = genai.Client(api_key=gemini_flash_api_key)
    try:
        response = client.models.generate_content(
            model=model, contents=prompt
        )
        return response.text  # Extract the text from the response
    except Exception as e:
        print(f"Error calling Gemini API: {e}")
        return None  # Or handle the error as needed

class RAGMedicalChatbot:
    def __init__(self, model_name, retrieve_function):
        """
        A custom retrieval-augmented chatbot using Gemini Flash API.
        :param model_name: e.g., "gemini-2.0-flash" (adjust based on the actual model name)
        :param retrieve_function: function to retrieve context from FAISS
        """
        self.model_name = model_name
        self.retrieve = retrieve_function

    def chat(self, user_query):
        """Generate an answer from Gemini Flash API using retrieved context."""
        # 1) Retrieve relevant knowledge
        retrieved_info = self.retrieve(user_query)
        knowledge_base = "\n".join(retrieved_info)
        # 2) Construct final prompt
        prompt = (
            f"Using the following medical knowledge:\n{knowledge_base}\n"
            f"Answer the question in a professional and medically accurate manner: {user_query}"
        )
        # 3) Call Gemini Flash API for completion
        completion = gemini_flash_completion(prompt, model=self.model_name, temperature=0.7)
        # 4) Return the final response
        return completion.strip()

# Instantiate the custom chatbot agent.
# Replace "gemini-2.0-flash" with the actual model identifier if needed.
chatbot = RAGMedicalChatbot(
    model_name="gemini-2.0-flash",
    retrieve_function=retrieve_medical_info
)
print("✅ Medical chatbot is ready!")

# --------------------------
# Step 6: Interactive Chat Testing (For Local Debugging)
# --------------------------
if __name__ == "__main__":
    print("\n🩺 Medical Chatbot is running...\n")
    while True:
        user_input = input("You: ")
        if user_input.lower() in ["exit", "quit"]:
            print("👋 Chatbot session ended.")
            break
        start_time = time.time()  # Start timing
        response = chatbot.chat(user_input)
        end_time = time.time()  # End timing
        if response: #Check if response exists before printing
            print("Chatbot:", response)
            print(f"Response time: {end_time - start_time:.2f} seconds") # Print response time
        else:
            print("Error generating response.") # Print error message
        gc.collect() # Collect garbage after each loop for memory management

✅ Loading medical dataset...
✅ Loaded 50000 medical Q&A pairs.
✅ Generating FAISS vector embeddings...
✅ Loading existing FAISS index...
✅ FAISS index saved successfully!
✅ Initializing RAG-based medical chatbot...
✅ Initializing AI agent with custom RAGMedicalChatbot class...
✅ Medical chatbot is ready!

🩺 Medical Chatbot is running...

You: I have a headache and feeling dizzy, what illness is this?
Chatbot: It is impossible to diagnose the cause of your headache and dizziness without a thorough medical evaluation. Based on the medical knowledge provided, several possibilities exist:

*   **Blood Pressure Issues:** Fluctuations or high blood pressure can cause headaches and dizziness.
*   **Otological Causes:** An ear infection could be responsible.
*   **Viral Respiratory Tract Infection:** If you have congestion, a viral infection might be the cause.
*   **Lifestyle Factors:** Stress and smoking habits can contribute to headaches.
*   **Cardiac Issues:** Reduced blood flow to the br