# Running Chatbot with Google Gemini on Colab

This notebook runs the chatbot project using Google's Gemini API on Google Colab.

## Step 1: Clone the GitHub repository

In [1]:
# Clone your GitHub repository
!git clone https://github.com/ST-SARAVANAPRIYAN/chatbot.git
%cd chatbot

Cloning into 'chatbot'...
remote: Enumerating objects: 38, done.[K
remote: Counting objects: 100% (38/38), done.[K
remote: Compressing objects: 100% (31/31), done.[K
remote: Total 38 (delta 6), reused 38 (delta 6), pack-reused 0 (from 0)[K
Receiving objects: 100% (38/38), 40.08 KiB | 641.00 KiB/s, done.
Resolving deltas: 100% (6/6), done.
/content/chatbot


## Step 2: Install required packages

In [3]:
# Install required packages
!pip install llama-index==0.9.8 llama-index-core llama-index-embeddings-gemini==0.1.0 chromadb python-dotenv streamlit spacy transformers torch langchain
!python -m spacy download en_core_web_sm

Collecting llama-index-embeddings-gemini==0.1.0
  Downloading llama_index_embeddings_gemini-0.1.0-py3-none-any.whl.metadata (646 bytes)
Collecting google-generativeai<0.4.0,>=0.3.2 (from llama-index-embeddings-gemini==0.1.0)
  Downloading google_generativeai-0.3.2-py3-none-any.whl.metadata (5.9 kB)
Collecting llama-index-core
  Downloading llama_index_core-0.10.0-py3-none-any.whl.metadata (3.0 kB)
Collecting google-ai-generativelanguage==0.4.0 (from google-generativeai<0.4.0,>=0.3.2->llama-index-embeddings-gemini==0.1.0)
  Downloading google_ai_generativelanguage-0.4.0-py3-none-any.whl.metadata (5.1 kB)
Collecting protobuf<7,>=3.20 (from streamlit)
  Downloading protobuf-4.25.8-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
INFO: pip is looking at multiple versions of opentelemetry-proto to determine which version is compatible with other requirements. This could take a while.
Collecting opentelemetry-exporter-otlp-proto-common==1.34.1 (from opentelemetry-exporter-otlp-proto-g

Traceback (most recent call last):
  File "<frozen runpy>", line 189, in _run_module_as_main
  File "<frozen runpy>", line 148, in _get_module_details
  File "<frozen runpy>", line 112, in _get_module_details
  File "/usr/local/lib/python3.11/dist-packages/spacy/__init__.py", line 6, in <module>
  File "/usr/local/lib/python3.11/dist-packages/spacy/errors.py", line 3, in <module>
    from .compat import Literal
  File "/usr/local/lib/python3.11/dist-packages/spacy/compat.py", line 4, in <module>
    from thinc.util import copy_array
  File "/usr/local/lib/python3.11/dist-packages/thinc/__init__.py", line 5, in <module>
    from .config import registry
  File "/usr/local/lib/python3.11/dist-packages/thinc/config.py", line 5, in <module>
    from .types import Decorator
  File "/usr/local/lib/python3.11/dist-packages/thinc/types.py", line 27, in <module>
    from .compat import cupy, has_cupy
  File "/usr/local/lib/python3.11/dist-packages/thinc/compat.py", line 35, in <module>
    impor

## Step 3: Set up your Gemini API key

In [1]:
# Set up your Gemini API key
import os
from getpass import getpass

# Securely input your API key
GEMINI_API_KEY = getpass('Enter your Gemini API key: ')
os.environ["GEMINI_API_KEY"] = GEMINI_API_KEY

# Create .env file with the API key
with open('.env', 'w') as f:
    f.write(f"GEMINI_API_KEY={GEMINI_API_KEY}\n")
    f.write("CHROMA_DB_DIRECTORY=./chroma_db\n")

Enter your Gemini API key: ··········


## Step 4: Create a version-flexible chatbot implementation

This version handles different import structures in different versions of LlamaIndex

In [2]:
%%writefile colab_chatbot_flexible.py
import os
import sys
import logging
import google.generativeai as genai
from dotenv import load_dotenv
import chromadb

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Load environment variables
load_dotenv()

# Directory paths
DATA_DIR = "./data"
CHROMA_DB_DIRECTORY = "./chroma_db"

class GeminiChatbot:
    def __init__(self):
        # Get Gemini API key
        self.api_key = os.getenv("GEMINI_API_KEY")
        if not self.api_key:
            print("GEMINI_API_KEY not found in environment. Please set it first.")
            self.api_key = input("Enter your Gemini API key: ")
            os.environ["GEMINI_API_KEY"] = self.api_key

        # Configure Gemini
        genai.configure(api_key=self.api_key)
        self.model = genai.GenerativeModel('gemini-pro')
        self.index = None

    def load_documents(self):
        """Load documents from the data directory"""
        if not os.path.exists(DATA_DIR):
            os.makedirs(DATA_DIR)
            print(f"Created data directory at {DATA_DIR}")

        if not os.listdir(DATA_DIR):
            print("Data directory is empty. Please add some documents.")
            return False

        return True

    def build_index(self):
        """Build vector index from documents"""
        try:
            # Dynamic imports to handle different versions of llama-index
            try:
                # Try importing from llama_index (newer versions)
                from llama_index import VectorStoreIndex, SimpleDirectoryReader, StorageContext
                from llama_index.node_parser import SentenceSplitter
                from llama_index.vector_stores.chroma import ChromaVectorStore
                print("Using llama_index package")
            except ImportError:
                # Try importing from llama_index.core (older or different versions)
                from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
                from llama_index.core.node_parser import SentenceSplitter
                try:
                    from llama_index.core.vector_stores.chroma import ChromaVectorStore
                    print("Using llama_index.core package")
                except ImportError:
                    from llama_index.vector_stores.chroma import ChromaVectorStore
                    print("Using mixed llama_index imports")

            # Load documents
            documents = SimpleDirectoryReader(DATA_DIR).load_data()

            # Create sentence splitter for text chunking
            text_splitter = SentenceSplitter(
                chunk_size=1000,
                chunk_overlap=200
            )

            # Set up Chroma client
            if not os.path.exists(CHROMA_DB_DIRECTORY):
                os.makedirs(CHROMA_DB_DIRECTORY)

            chroma_client = chromadb.PersistentClient(path=CHROMA_DB_DIRECTORY)
            chroma_collection = chroma_client.get_or_create_collection("documents")
            vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
            storage_context = StorageContext.from_defaults(vector_store=vector_store)

            # Build index
            try:
                self.index = VectorStoreIndex.from_documents(
                    documents,
                    storage_context=storage_context,
                    transformations=[text_splitter],
                )
            except TypeError:
                # Try alternative approach if transformations parameter doesn't work
                self.index = VectorStoreIndex.from_documents(
                    documents,
                    storage_context=storage_context,
                    node_parser=text_splitter,
                )

            print("Index built successfully!")
            return True
        except Exception as e:
            print(f"Error building index: {str(e)}")
            import traceback
            traceback.print_exc()
            return False

    def query(self, query_text):
        """Query the index with a natural language query"""
        if not self.index:
            print("Index not built yet. Please build the index first.")
            return {
                "answer": "Index not built yet. Please build the index first.",
                "sources": []
            }

        try:
            # Create query engine
            query_engine = self.index.as_query_engine()

            # Execute query
            response = query_engine.query(query_text)

            # Extract source information
            sources = []
            if hasattr(response, 'source_nodes'):
                for node in response.source_nodes:
                    source = {
                        "text": node.node.get_content(),
                        "metadata": node.node.metadata,
                        "score": node.score if hasattr(node, 'score') else None
                    }
                    sources.append(source)

            return {
                "answer": str(response),
                "sources": sources
            }
        except Exception as e:
            print(f"Error during query: {str(e)}")
            import traceback
            traceback.print_exc()
            return {
                "answer": f"Error during query: {str(e)}",
                "sources": []
            }

    def chat_interface(self):
        """Simple chat interface for testing"""
        print("\n===== Gemini Chatbot =====")
        print("Type 'exit' to quit")
        print("========================\n")

        while True:
            query = input("\nYou: ")
            if query.lower() in ['exit', 'quit', 'q']:
                break

            response = self.query(query)
            print(f"\nBot: {response['answer']}")

            # Print sources
            if response['sources']:
                print("\nSources:")
                for i, source in enumerate(response['sources'][:2], 1):
                    source_name = source.get('metadata', {}).get('source', f"Source {i}")
                    print(f"- {source_name}")

if __name__ == "__main__":
    print("Initializing chatbot...")
    chatbot = GeminiChatbot()

    print("Loading documents...")
    if chatbot.load_documents():
        print("Building index...")
        if chatbot.build_index():
            print("Ready to chat!")
            chatbot.chat_interface()

Writing colab_chatbot_flexible.py


## Step 5: Upload custom content (Optional)

In [3]:
# Upload your own content files (optional)
from google.colab import files
import os

# Create data directory if it doesn't exist
if not os.path.exists('data'):
    os.makedirs('data')

# Let user upload files
print("Upload your content files (optional). Skip this if you want to use the sample FAQ.")
try:
    uploaded = files.upload()

    # Save uploaded files to the data directory
    for filename, content in uploaded.items():
        with open(f'data/{filename}', 'wb') as f:
            f.write(content)
        print(f"Saved {filename} to data directory")
except Exception as e:
    print(f"No files uploaded or error occurred: {e}")

Upload your content files (optional). Skip this if you want to use the sample FAQ.


## Step 6: Run the chatbot

In [1]:
# Run the command-line chatbot
!python colab_chatbot_flexible.py

Initializing chatbot...
Loading documents...
Building index...
Attempting to import LlamaIndex components...
Failed to import VectorStoreIndex from any of the provided paths: ['llama_index', 'llama_index.core', 'llama_index.indices.vector_store', 'llama_index.core.indices.vector_store']
Failed to import SimpleDirectoryReader from any of the provided paths: ['llama_index', 'llama_index.core', 'llama_index.readers', 'llama_index.core.readers']
Failed to import StorageContext from any of the provided paths: ['llama_index', 'llama_index.core', 'llama_index.storage', 'llama_index.core.storage']
Failed to import SentenceSplitter from any of the provided paths: ['llama_index.node_parser', 'llama_index.core.node_parser', 'llama_index.text_splitter', 'llama_index.core.text_splitter']
Failed to import required components: ['VectorStoreIndex', 'SimpleDirectoryReader', 'StorageContext', 'SentenceSplitter', 'ChromaVectorStore']
Attempting to install llama-index...
Please restart the notebook and tr

In [12]:
# Create a sample document in the data directory
!mkdir -p data
%%writefile data/sample_faq.md
# Sample FAQ

## What is this chatbot?
This is a RAG-based chatbot using Google Gemini API.

## What is RAG?
RAG stands for Retrieval-Augmented Generation, which enhances LLM responses with retrieved information.

## How does it work?
It uses vector embeddings to find relevant information and then generates responses based on that information.

SyntaxError: invalid syntax (ipython-input-12-4085606628.py, line 7)

In [1]:
# Run the chatbot in the same Colab notebook
%run colab_chatbot_flexible.py

Initializing chatbot...
Loading documents...
Building index...
Attempting to import LlamaIndex components...
Failed to import required components: ['ChromaVectorStore']
Attempting to install llama-index...
Please restart the notebook and try again.


In [10]:
import os
from google.colab import userdata

# Try to get the API key from Colab secrets
try:
  GEMINI_API_KEY = userdata.get('GEMINI_API_KEY')
except:
  # If not in secrets, ask user for it
  GEMINI_API_KEY = input('Enter your Gemini API key: ')

os.environ["GEMINI_API_KEY"] = GEMINI_API_KEY

# Create .env file with the API key
with open('.env', 'w') as f:
    f.write(f"GEMINI_API_KEY={GEMINI_API_KEY}\n")

print("API key set successfully!")

Enter your Gemini API key:  AIzaSyD7FHAOs8JPMfhB_mmTOPy5rMhufMW2450
API key set successfully!


## Step 7: Create Streamlit Interface (Optional)

In [5]:
%%writefile colab_streamlit_app.py
import streamlit as st
import os
import google.generativeai as genai
from dotenv import load_dotenv
import chromadb

# Load environment variables
load_dotenv()

# Configure Gemini
api_key = os.getenv("GEMINI_API_KEY")
if api_key:
    genai.configure(api_key=api_key)

# Directory paths
DATA_DIR = "./data"
CHROMA_DB_DIRECTORY = "./chroma_db"

# Initialize session state
if 'index' not in st.session_state:
    st.session_state.index = None
if 'messages' not in st.session_state:
    st.session_state.messages = []

def build_index():
    """Build or load the vector index"""
    try:
        # Dynamic imports to handle different versions of llama-index
        try:
            # Try importing from llama_index (newer versions)
            from llama_index import VectorStoreIndex, SimpleDirectoryReader, StorageContext
            from llama_index.node_parser import SentenceSplitter
            from llama_index.vector_stores.chroma import ChromaVectorStore
            st.info("Using llama_index package")
        except ImportError:
            # Try importing from llama_index.core (older or different versions)
            from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
            from llama_index.core.node_parser import SentenceSplitter
            try:
                from llama_index.core.vector_stores.chroma import ChromaVectorStore
                st.info("Using llama_index.core package")
            except ImportError:
                from llama_index.vector_stores.chroma import ChromaVectorStore
                st.info("Using mixed llama_index imports")

        if not os.path.exists(DATA_DIR):
            os.makedirs(DATA_DIR)

        # Load documents
        documents = SimpleDirectoryReader(DATA_DIR).load_data()

        # Create sentence splitter for text chunking
        text_splitter = SentenceSplitter(
            chunk_size=1000,
            chunk_overlap=200
        )

        # Set up Chroma client
        if not os.path.exists(CHROMA_DB_DIRECTORY):
            os.makedirs(CHROMA_DB_DIRECTORY)

        chroma_client = chromadb.PersistentClient(path=CHROMA_DB_DIRECTORY)
        chroma_collection = chroma_client.get_or_create_collection("documents")
        vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
        storage_context = StorageContext.from_defaults(vector_store=vector_store)

        # Build index
        try:
            index = VectorStoreIndex.from_documents(
                documents,
                storage_context=storage_context,
                transformations=[text_splitter],
            )
        except TypeError:
            # Try alternative approach if transformations parameter doesn't work
            index = VectorStoreIndex.from_documents(
                documents,
                storage_context=storage_context,
                node_parser=text_splitter,
            )

        return index
    except Exception as e:
        st.error(f"Error building index: {str(e)}")
        import traceback
        st.error(traceback.format_exc())
        return None

def query_index(query_text):
    """Query the index with a natural language query"""
    if not st.session_state.index:
        return {
            "answer": "Index not built yet. Please build the index first.",
            "sources": []
        }

    try:
        # Create query engine
        query_engine = st.session_state.index.as_query_engine()

        # Execute query
        response = query_engine.query(query_text)

        # Extract source information
        sources = []
        if hasattr(response, 'source_nodes'):
            for node in response.source_nodes:
                source = {
                    "text": node.node.get_content(),
                    "metadata": node.node.metadata,
                    "score": node.score if hasattr(node, 'score') else None
                }
                sources.append(source)

        return {
            "answer": str(response),
            "sources": sources
        }
    except Exception as e:
        import traceback
        st.error(traceback.format_exc())
        return {
            "answer": f"Error during query: {str(e)}",
            "sources": []
        }

# App title
st.title("🤖 Gemini Chatbot")

# Sidebar
with st.sidebar:
    st.title("Settings")

    # API key input
    if not api_key:
        new_api_key = st.text_input("Enter Gemini API Key", type="password")
        if new_api_key:
            genai.configure(api_key=new_api_key)
            os.environ["GEMINI_API_KEY"] = new_api_key
            with open('.env', 'w') as f:
                f.write(f"GEMINI_API_KEY={new_api_key}\n")
                f.write("CHROMA_DB_DIRECTORY=./chroma_db\n")
            st.success("API key set!")
            api_key = new_api_key

    # Build index button
    if st.button("Build/Rebuild Index"):
        with st.spinner("Building index..."):
            st.session_state.index = build_index()
            if st.session_state.index:
                st.success("Index built successfully!")
            else:
                st.error("Failed to build index")

    # Show data directory contents
    st.subheader("Data Directory")
    if os.path.exists(DATA_DIR):
        files = os.listdir(DATA_DIR)
        if files:
            st.write(f"Found {len(files)} files:")
            for file in files:
                st.write(f"- {file}")
        else:
            st.write("No files found in data directory")
    else:
        st.write("Data directory does not exist")

    # Clear chat history
    if st.button("Clear Chat History"):
        st.session_state.messages = []
        st.success("Chat history cleared")

# Initialize or load index
if st.session_state.index is None:
    if os.path.exists(CHROMA_DB_DIRECTORY):
        with st.spinner("Loading existing index..."):
            st.session_state.index = build_index()
            if st.session_state.index:
                st.success("Index loaded successfully!")
    else:
        st.info("No index found. Please build the index using the sidebar button.")

# Display chat messages
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

        # Show sources if available
        if message["role"] == "assistant" and "sources" in message:
            with st.expander("View Sources"):
                for i, source in enumerate(message["sources"], 1):
                    source_name = source.get('metadata', {}).get('source', f"Source {i}")
                    st.markdown(f"**{source_name}**")
                    text = source.get('text', '')
                    if text:
                        st.text(text[:200] + "..." if len(text) > 200 else text)

# Chat input
if prompt := st.chat_input("Ask me anything..."):
    # Check if API key is set
    if not api_key:
        st.error("Please set your Gemini API key in the sidebar first.")
        st.stop()

    # Add user message to chat history
    st.session_state.messages.append({"role": "user", "content": prompt})

    # Display user message
    with st.chat_message("user"):
        st.markdown(prompt)

    # Generate and display assistant response
    with st.chat_message("assistant"):
        with st.spinner("Thinking..."):
            if st.session_state.index is None:
                response_content = "Please build the index first using the sidebar button."
                st.markdown(response_content)
                st.session_state.messages.append({
                    "role": "assistant",
                    "content": response_content
                })
            else:
                try:
                    response = query_index(prompt)
                    st.markdown(response["answer"])

                    # Add assistant message to chat history
                    st.session_state.messages.append({
                        "role": "assistant",
                        "content": response["answer"],
                        "sources": response["sources"]
                    })

                    # Show sources if available
                    if response["sources"]:
                        with st.expander("View Sources"):
                            for i, source in enumerate(response["sources"], 1):
                                source_name = source.get('metadata', {}).get('source', f"Source {i}")
                                st.markdown(f"**{source_name}**")
                                text = source.get('text', '')
                                if text:
                                    st.text(text[:200] + "..." if len(text) > 200 else text)
                except Exception as e:
                    error_msg = f"Error: {str(e)}"
                    st.error(error_msg)
                    st.session_state.messages.append({
                        "role": "assistant",
                        "content": error_msg
                    })

Writing colab_streamlit_app.py


## Step 8: Create the Streamlit interface for Google Colab

In [6]:
%%writefile colab_streamlit_app.py
#!/usr/bin/env python3
"""
Streamlit web interface for the chatbot - Google Colab version
"""
import streamlit as st
import logging
import sys
import os
import google.generativeai as genai
from dotenv import load_dotenv
import chromadb

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Load environment variables
load_dotenv()

# Directory paths
DATA_DIR = "./data"
CHROMA_DB_DIRECTORY = "./chroma_db"

class GeminiChatbot:
    def __init__(self):
        # Get Gemini API key
        self.api_key = os.getenv("GEMINI_API_KEY")
        if not self.api_key:
            print("GEMINI_API_KEY not found in environment. Please set it first.")
            self.api_key = st.text_input("Enter your Gemini API key:", type="password")
            if not self.api_key:
                st.error("Gemini API key is required to continue.")
                st.stop()
            os.environ["GEMINI_API_KEY"] = self.api_key

        # Configure Gemini
        genai.configure(api_key=self.api_key)
        self.model = genai.GenerativeModel('gemini-pro')
        self.index = None

    def load_documents(self):
        """Load documents from the data directory"""
        if not os.path.exists(DATA_DIR):
            os.makedirs(DATA_DIR)
            st.info(f"Created data directory at {DATA_DIR}")

        if not os.listdir(DATA_DIR):
            st.warning("Data directory is empty. Please add some documents.")
            return False

        return True

    def build_index(self):
        """Build vector index from documents"""
        try:
            # Dynamic imports to handle different versions of llama-index
            try:
                # Try importing from llama_index (newer versions)
                from llama_index import VectorStoreIndex, SimpleDirectoryReader, StorageContext
                from llama_index.node_parser import SentenceSplitter
                from llama_index.vector_stores.chroma import ChromaVectorStore
                st.info("Using llama_index package")
            except ImportError:
                # Try importing from llama_index.core (older or different versions)
                from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
                from llama_index.core.node_parser import SentenceSplitter
                try:
                    from llama_index.core.vector_stores.chroma import ChromaVectorStore
                    st.info("Using llama_index.core package")
                except ImportError:
                    from llama_index.vector_stores.chroma import ChromaVectorStore
                    st.info("Using mixed llama_index imports")

            # Load documents
            documents = SimpleDirectoryReader(DATA_DIR).load_data()

            # Create sentence splitter for text chunking
            text_splitter = SentenceSplitter(
                chunk_size=1000,
                chunk_overlap=200
            )

            # Set up Chroma client
            if not os.path.exists(CHROMA_DB_DIRECTORY):
                os.makedirs(CHROMA_DB_DIRECTORY)

            chroma_client = chromadb.PersistentClient(path=CHROMA_DB_DIRECTORY)
            chroma_collection = chroma_client.get_or_create_collection("documents")
            vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
            storage_context = StorageContext.from_defaults(vector_store=vector_store)

            # Build index with error handling for different parameter names
            try:
                self.index = VectorStoreIndex.from_documents(
                    documents,
                    storage_context=storage_context,
                    transformations=[text_splitter],
                )
            except TypeError:
                # Try alternative approach if transformations parameter doesn't work
                self.index = VectorStoreIndex.from_documents(
                    documents,
                    storage_context=storage_context,
                    node_parser=text_splitter,
                )

            st.success("Index built successfully!")
            return True
        except Exception as e:
            st.error(f"Error building index: {str(e)}")
            import traceback
            traceback.print_exc()
            return False

    def query(self, query_text):
        """Query the index with a natural language query"""
        if not self.index:
            st.error("Index not built yet. Please build the index first.")
            return {
                "answer": "Index not built yet. Please build the index first.",
                "sources": []
            }

        try:
            # Create query engine
            query_engine = self.index.as_query_engine()

            # Execute query
            response = query_engine.query(query_text)

            # Extract source information
            sources = []
            if hasattr(response, 'source_nodes'):
                for node in response.source_nodes:
                    source = {
                        "text": node.node.get_content(),
                        "metadata": node.node.metadata,
                        "score": node.score if hasattr(node, 'score') else None
                    }
                    sources.append(source)

            return {
                "answer": str(response),
                "sources": sources
            }
        except Exception as e:
            st.error(f"Error during query: {str(e)}")
            import traceback
            traceback.print_exc()
            return {
                "answer": f"Error during query: {str(e)}",
                "sources": []
            }

def init_session_state():
    """Initialize session state variables"""
    if 'messages' not in st.session_state:
        st.session_state.messages = []
    if 'chatbot' not in st.session_state:
        st.session_state.chatbot = None

def main():
    # Set page config
    st.set_page_config(
        page_title="Gemini Chatbot for Google Colab",
        page_icon="🤖",
        layout="wide"
    )

    # Initialize session state
    init_session_state()

    # Main title
    st.title("🤖 Gemini Chatbot for Google Colab")

    # Sidebar
    with st.sidebar:
        st.title("Chatbot Settings")

        # Initialize chatbot if not already done
        if st.session_state.chatbot is None:
            st.session_state.chatbot = GeminiChatbot()

        # Check data directory status
        data_dir_status = "✅ Available" if os.path.exists(DATA_DIR) and os.listdir(DATA_DIR) else "❌ Empty"
        st.info(f"Data Directory: {data_dir_status}")

        # Check index status
        index_status = "✅ Available" if os.path.exists(CHROMA_DB_DIRECTORY) else "❌ Not Built"
        st.info(f"Vector Index: {index_status}")

        # Upload documents
        with st.expander("Upload Documents"):
            uploaded_file = st.file_uploader("Upload a document", type=["txt", "md", "pdf"])
            if uploaded_file is not None:
                if not os.path.exists(DATA_DIR):
                    os.makedirs(DATA_DIR)

                file_path = os.path.join(DATA_DIR, uploaded_file.name)
                with open(file_path, "wb") as f:
                    f.write(uploaded_file.getvalue())
                st.success(f"File '{uploaded_file.name}' uploaded successfully")

        # Build index button
        if st.button("Build/Rebuild Vector Index"):
            if st.session_state.chatbot.load_documents():
                with st.spinner("Building vector index..."):
                    if st.session_state.chatbot.build_index():
                        st.success("Vector index built successfully!")
                    else:
                        st.error("Failed to build vector index.")
            else:
                st.error("No documents found. Please upload some documents first.")

    # Display chat messages
    for message in st.session_state.messages:
        with st.chat_message(message["role"]):
            st.markdown(message["content"])

            # Show sources if available
            if message["role"] == "assistant" and "sources" in message and message["sources"]:
                with st.expander("View Sources"):
                    for i, source in enumerate(message["sources"], 1):
                        source_name = source.get('metadata', {}).get('source', f"Source {i}")
                        st.markdown(f"**Document: {source_name}**")
                        text = source.get('text', '')
                        if text:
                            st.text(text[:200] + "..." if len(text) > 200 else text)

    # Chat input
    if prompt := st.chat_input("Ask me anything..."):
        # Add user message to chat history
        st.session_state.messages.append({"role": "user", "content": prompt})

        # Display user message
        with st.chat_message("user"):
            st.markdown(prompt)

        # Check if index is built
        if not st.session_state.chatbot.index:
            if not os.path.exists(CHROMA_DB_DIRECTORY) or not os.listdir(CHROMA_DB_DIRECTORY):
                # Need to build index first
                if st.session_state.chatbot.load_documents():
                    with st.spinner("Building index for the first time..."):
                        st.session_state.chatbot.build_index()
                else:
                    st.error("No documents found. Please upload some documents first.")
                    st.session_state.messages.append({
                        "role": "assistant",
                        "content": "I need some documents to learn from. Please upload documents using the sidebar.",
                        "sources": []
                    })
                    st.stop()
            else:
                # Index exists but not loaded
                with st.spinner("Loading existing index..."):
                    st.session_state.chatbot.build_index()

        # Generate and display assistant response
        with st.chat_message("assistant"):
            message_placeholder = st.empty()
            with st.spinner("Thinking..."):
                response = st.session_state.chatbot.query(prompt)
                message_placeholder.markdown(response["answer"])

                # Add assistant message to chat history
                st.session_state.messages.append({
                    "role": "assistant",
                    "content": response["answer"],
                    "sources": response["sources"]
                })

                # Show sources if available
                if response["sources"]:
                    with st.expander("View Sources"):
                        for i, source in enumerate(response["sources"], 1):
                            source_name = source.get('metadata', {}).get('source', f"Source {i}")
                            st.markdown(f"**Document: {source_name}**")
                            text = source.get('text', '')
                            if text:
                                st.text(text[:200] + "..." if len(text) > 200 else text)

if __name__ == "__main__":
    main()

Overwriting colab_streamlit_app.py


## Step 9: Run Streamlit Interface (Optional)

In [7]:
# Install ngrok and pyngrok if you want to run the Streamlit app
!pip install pyngrok

# Run Streamlit app using ngrok
from pyngrok import ngrok
import os

# Set up ngrok authentication (optional, needed for better reliability)
# Get your token from https://dashboard.ngrok.com/get-started/your-authtoken
NGROK_AUTH_TOKEN = input("Enter your ngrok auth token (optional, press Enter to skip): ")
if NGROK_AUTH_TOKEN:
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# Start ngrok tunnel to Streamlit
public_url = ngrok.connect(8501)
print(f"\n\nStreamlit app is available at: {public_url}\n\n")

# Run Streamlit app
!streamlit run colab_streamlit_app.py &

Collecting pyngrok
  Downloading pyngrok-7.2.11-py3-none-any.whl.metadata (9.4 kB)
Downloading pyngrok-7.2.11-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.11
Enter your ngrok auth token (optional, press Enter to skip): 


ERROR:pyngrok.process.ngrok:t=2025-07-04T16:03:20+0000 lvl=eror msg="failed to reconnect session" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"


PyngrokNgrokError: The ngrok process errored on start: authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n.

## Step 10: Clean up (Run this when you're done)

In [8]:
# Clean up
!pkill -f streamlit  # Stop Streamlit if running
ngrok.kill()  # Stop ngrok if running