In [1]:
# ============================================================================
# Step 1: Install Dependencies for RAG and Streamlit
# ============================================================================
!pip install -q faiss-cpu sentence-transformers numpy pandas google-genai streamlit pyngrok requests

In [2]:
# ============================================================================
# Step 2: Google Drive Mounting and Path Configuration (CRITICAL)
# ============================================================================
from google.colab import drive
import os
import json
import requests
import pandas as pd
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
import google.generativeai as genai
import sys

In [3]:
# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
# ============================================================================
# Step 2: Google Drive Mounting & File Copy (THE FIX)
# ============================================================================
from google.colab import drive
import os
import shutil
import sys

# Mount Drive
drive.mount('/content/drive')

# --- CONFIGURATION ---
# PATH TO YOUR DRIVE FOLDER
PROJECT_FOLDER_PATH = '/content/drive/MyDrive/Colab Notebooks/LLM_Based_GenAI_Sem1/data/'

# Check connection
if not os.path.exists(PROJECT_FOLDER_PATH):
    print(f"‚ùå Error: Folder not found at {PROJECT_FOLDER_PATH}")
    raise FileNotFoundError("Check your Drive path configuration.")

print("‚úÖ Drive Mounted.")

# --- CRITICAL FIX: COPY MODULES TO LOCAL RUNTIME ---
files_to_copy = ['utils.py', 'agents.py', 'orchestrator.py']

print("\nüîÑ Copying agent modules to local runtime...")
for file_name in files_to_copy:
    src = os.path.join(PROJECT_FOLDER_PATH, file_name)
    dst = os.path.join('/content', file_name) # Local Colab root

    if os.path.exists(src):
        shutil.copyfile(src, dst)
        print(f"   - Copied {file_name} -> Local /content/")
    else:
        print(f"   ‚ùå MISSING: {file_name} in Drive folder!")
        raise FileNotFoundError(f"Missing {file_name}")

print("‚úÖ All modules copied. Import paths are now clean.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
‚úÖ Drive Mounted.

üîÑ Copying agent modules to local runtime...
   - Copied utils.py -> Local /content/
   - Copied agents.py -> Local /content/
   - Copied orchestrator.py -> Local /content/
‚úÖ All modules copied. Import paths are now clean.


In [5]:
# ============================================================================
# Step 3: Define Data Paths & Load RAG (Verification)
# ============================================================================

# We import directly now since files are local
import utils
import agents
import orchestrator
from utils import load_data_and_index

# Define Absolute Data Paths (Data stays in Drive)
CHUNK_MAP_PATH = os.path.join(PROJECT_FOLDER_PATH, 'clinical_trials_diabetes_full_chunk_map.json')
FAISS_INDEX_PATH = os.path.join(PROJECT_FOLDER_PATH, 'clinical_trials_diabetes_full_faiss.index')

# Verify data existence
if not os.path.exists(CHUNK_MAP_PATH):
    raise FileNotFoundError(f"Chunk map not found at: {CHUNK_MAP_PATH}")
if not os.path.exists(FAISS_INDEX_PATH):
    raise FileNotFoundError(f"FAISS index not found at: {FAISS_INDEX_PATH}")

print(f"‚úÖ Data paths verified:\n  - {CHUNK_MAP_PATH}\n  - {FAISS_INDEX_PATH}")

# Load RAG components globally for checking
print("\n‚è≥ Loading RAG Index (Test Load)...")
embed_model, faiss_index, chunk_map = load_data_and_index(CHUNK_MAP_PATH, FAISS_INDEX_PATH)
print("‚úÖ RAG Index Verified in Notebook.")

‚úÖ Data paths verified:
  - /content/drive/MyDrive/Colab Notebooks/LLM_Based_GenAI_Sem1/data/clinical_trials_diabetes_full_chunk_map.json
  - /content/drive/MyDrive/Colab Notebooks/LLM_Based_GenAI_Sem1/data/clinical_trials_diabetes_full_faiss.index

‚è≥ Loading RAG Index (Test Load)...
‚è≥ Loading pre-built RAG index...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


‚úÖ RAG Index Ready: 18063 vectors loaded.
‚úÖ RAG Index Verified in Notebook.


In [6]:
# ============================================================================
# Step 4: Intialize API Key
# ============================================================================
# Use %run magic with explicit paths and **.py** extensions
# print("\n--- Loading Modular Components (.py files) ---")

# CRITICAL FIX: Ensure files are PURE PYTHON code, not notebook JSON.
# %run "{PROJECT_FOLDER_PATH}utils.py"
# %run "{PROJECT_FOLDER_PATH}agents.py"
# %run "{PROJECT_FOLDER_PATH}orchestrator.py"


# Now, we use standard Python imports for all modules.
# print("\n--- Loading Modular Components (Importing dependencies) ---")

# import utils # Imports the utility functions
# import agents # Imports all agent classes
# import orchestrator # <-- FIX: Import orchestrator as a module

# # Now define the necessary global functions/classes from modules
# from utils import load_data_and_index


# print("--- Components Loaded Successfully ---")

# # --- RAG Data Loading ---
# CHUNK_MAP_PATH = os.path.join(PROJECT_FOLDER_PATH, 'clinical_trials_diabetes_full_chunk_map.json')
# FAISS_INDEX_PATH = os.path.join(PROJECT_FOLDER_PATH, 'clinical_trials_diabetes_full_faiss.index')

# # Load the RAG components (these become global variables)
# embed_model, faiss_index, chunk_map = load_data_and_index(CHUNK_MAP_PATH, FAISS_INDEX_PATH)

# Initialize Gemini Model
API_KEY = "xxxxx"
# genai.configure(api_key=API_KEY)
# gemini_model = genai.GenerativeModel('models/gemini-2.0-flash')

# # We define the paths for the script to use
# APP_CHUNK_MAP_PATH = os.path.join(PROJECT_FOLDER_PATH, 'clinical_trials_diabetes_full_chunk_map.json')
# APP_FAISS_INDEX_PATH = os.path.join(PROJECT_FOLDER_PATH, 'clinical_trials_diabetes_full_faiss.index')


In [7]:
# ============================================================================
# Step 5: Initialization and Streamlit UI Execution
# ============================================================================

# # Initialize Gemini Model
# API_KEY = "AIzaSyAdaiU1-sckZlVHlwzf5qcfKqTYzjN1HXg"
# genai.configure(api_key=API_KEY)
# gemini_model = genai.GenerativeModel('models/gemini-2.0-flash')

# Streamlit UI .py File
# We need to temporarily save the contents of streamlit_app.ipynb
# to a local .py file for the 'streamlit run' command to work.
# The Streamlit content now relies on the modules being found via the sys.path fix.

# We inject the ACTUAL PATH STRINGS into the script content
STREAMLIT_APP_CONTENT = f"""
import streamlit as st
import os
import google.generativeai as genai
from sentence_transformers import SentenceTransformer

# IMPORT YOUR MODULES (Now working natively)
import orchestrator
from utils import load_data_and_index

# --- CONFIGURATION ---
def initialize_chatbot():
    # 1. API Key
    if "GEMINI_API_KEY" not in os.environ:
        genai.configure(api_key="{{API_KEY}}")

    # 2. Load Data
    # We use the exact paths verified in the notebook
    chunk_path = r"{{CHUNK_MAP_PATH}}"
    faiss_path = r"{{FAISS_INDEX_PATH}}"

    if 'embed_model' not in st.session_state:
        try:
            embed_model, faiss_index, chunk_map = load_data_and_index(chunk_path, faiss_path)
            st.session_state.embed_model = embed_model
            st.session_state.faiss_index = faiss_index
            st.session_state.chunk_map = chunk_map
        except Exception as e:
            st.error(f"Data Load Error: {{e}}")
            st.stop()

    # 3. Initialize Bot
    gemini_model = genai.GenerativeModel('models/gemini-2.0-flash')

    initial_profile = {{
        'user_id': 'Alice',
        'age': 55,
        'conditions': ['Type 2 Diabetes', 'High Cholesterol'],
        'medications': ['Statin']
    }}

    if 'bot' not in st.session_state:
        st.session_state.bot = orchestrator.HealthcareBot(
            gemini_model=gemini_model,
            embed_model=st.session_state.embed_model,
            faiss_index=st.session_state.faiss_index,
            chunk_map=st.session_state.chunk_map,
            initial_profile=initial_profile
        )
        st.session_state.messages = []
        st.session_state.messages.append({{"role": "assistant", "content": f"Hello {{initial_profile['user_id']}}! I am your AI Health Assistant. How can I help you?"}})

# --- UI LAYOUT ---
st.set_page_config(layout="wide", page_title="Healthcare RAG Chatbot")
st.title("ü©∫ Personalized Healthcare Assistant (RAG Agent)")

with st.spinner("Initializing System..."):
    initialize_chatbot()

bot = st.session_state.bot

# Sidebar
with st.sidebar:
    st.header("üë§ User Profile")
    st.json(bot.profile_agent.profile)
    st.divider()
    st.header("üõ†Ô∏è Debug Info")
    if bot.history:
        st.caption(f"Last Hash: {{bot.history[-1].get('response_hash', 'N/A')}}")

# Chat
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

if prompt := st.chat_input("Ask about diabetes trials..."):
    st.session_state.messages.append({{"role": "user", "content": prompt}})
    with st.chat_message("user"):
        st.markdown(prompt)

    with st.spinner('Analyzing clinical trials...'):
        result = bot.process_query(prompt)

    with st.chat_message("assistant"):
        st.markdown(result['recommendation'])

        # Provenance Display
        with st.expander("üî¨ Traceability & Evidence Chain"):
            st.info(f"Safety: {{result['safety_status']}}")

            for step in result['provenance_chain']:
                with st.container(border=True):
                    st.caption(f"**{{step['agent']}}**")

                    if step['agent'] == 'RetrievalAgent':
                        trials = step['output'].get('trials', [])
                        score = trials[0].get('retrieval_score', 0.0) if trials else 0.0
                        st.markdown(f"**Retrieved:** {{len(trials)}} | **Top Score:** `{{score:.2f}}`")

                    elif step['agent'] == 'DiagnosisAdvisor':
                        st.markdown(f"**Veto:** `{{step['output'].get('veto', False)}}`")

                    st.json(step)

    st.session_state.messages.append({{"role": "assistant", "content": result['recommendation']}})
"""

# Write the file
with open('streamlit_app.py', 'w') as f:
    f.write(STREAMLIT_APP_CONTENT)

In [8]:
# ============================================================================
# Step 6: Streamlit UI Execution
# ============================================================================

print("\n\n################################################################################")
print("üöÄ INSTALLING AND STARTING CLOUDFLARE TUNNEL")
print("################################################################################")

# 1. Install Cloudflared
!wget -q https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64
!mv cloudflared-linux-amd64 cloudflared
!chmod +x cloudflared

# 2. Start Streamlit on port 8501
# Note: Streamlit default port is 8501.
!nohup streamlit run streamlit_app_temp.py --server.port 8501 --server.enableCORS false > /dev/null 2>&1 &
import time
time.sleep(5)
print("Streamlit service started on port 8501. Establishing Cloudflare tunnel...")

# 3. Start Cloudflare Tunnel
!./cloudflared tunnel --url http://localhost:8501 --no-autoupdate



################################################################################
üöÄ INSTALLING AND STARTING CLOUDFLARE TUNNEL
################################################################################
Streamlit service started on port 8501. Establishing Cloudflare tunnel...
[90m2025-11-23T20:11:05Z[0m [32mINF[0m Thank you for trying Cloudflare Tunnel. Doing so, without a Cloudflare account, is a quick way to experiment and try it out. However, be aware that these account-less Tunnels have no uptime guarantee, are subject to the Cloudflare Online Services Terms of Use (https://www.cloudflare.com/website-terms/), and Cloudflare reserves the right to investigate your use of Tunnels for violations of such terms. If you intend to use Tunnels in production you should use a pre-created named tunnel by following: https://developers.cloudflare.com/cloudflare-one/connections/connect-apps
[90m2025-11-23T20:11:05Z[0m [32mINF[0m Requesting new quick Tunnel on trycloudflare.com...
