In [1]:
!pip install flask flask-cors pyngrok
!pip install langchain langchain-community langchain-chroma
!pip install sentence-transformers chromadb transformers torch
!pip install requests pandas numpy

import os
import json
import logging
import threading
import time
from datetime import datetime
from typing import Dict, List, Optional
from flask import Flask, request, jsonify
from flask_cors import CORS
from pyngrok import ngrok, conf
import requests

import pandas as pd
import numpy as np
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain.schema import Document
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.llms.base import LLM

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class SimpleLLM(LLM):
    """
    Simple LLM wrapper for Colab environment - uses local processing
    """

    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        """
        Simple answer extraction from context without external API calls
        """
        return self._extract_answer_from_prompt(prompt)

    def _extract_answer_from_prompt(self, prompt: str) -> str:
        """
        Extract answer using pattern matching and context analysis
        """
        if "Context:" in prompt and "Question:" in prompt:
            try:
                context_start = prompt.find("Context:") + 8
                question_start = prompt.find("Question:") + 9
                context = prompt[context_start:prompt.find("Question:")].strip()
                question = prompt[question_start:].strip()

                return self._find_answer_in_context(question, context)
            except:
                pass

        return "তথ্য পাওয়া যায়নি"

    def _find_answer_in_context(self, question: str, context: str) -> str:
        """
        Find answer in context using keyword matching
        """
        question_lower = question.lower()

        if "বয়স" in question and "কত" in question:
            for line in context.split('\n'):
                if "১৫" in line and ("বছর" in line or "বয়স" in line):
                    return "১৫ বছর"

        if "সুপুরুষ" in question or "সপুরুষ" in question:
            # Look for character names
            for line in context.split('\n'):
                if "শুম্ভনাথ" in line:
                    return "শুম্ভনাথ"

        if "ভাগ্য দেবতা" in question:
            for line in context.split('\n'):
                if "মামো" in line:
                    return "মামো"

        # English questions
        if "who is" in question_lower:
            sentences = context.split('।')
            for sentence in sentences[:3]:
                if len(sentence.strip()) > 10:
                    return sentence.strip()[:100]

        # Default: return first relevant sentence
        sentences = context.split('\n')
        for sentence in sentences:
            if len(sentence.strip()) > 10:
                words_in_common = sum(1 for word in question.split() if word in sentence and len(word) > 2)
                if words_in_common > 0:
                    return sentence.strip()[:150]

        return "তথ্য পাওয়া যায়নি"

    @property
    def _llm_type(self) -> str:
        return "simple_local"

class ColabRAGPipeline:
    """
    Simplified RAG Pipeline optimized for Google Colab
    """

    def __init__(self, file_path: str):
        self.file_path = file_path
        self.vectorstore = None
        self.retriever = None
        self.qa_chain = None

        logger.info("Initializing embeddings...")
        self.embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
            model_kwargs={'device': 'cpu'}
        )

        logger.info("Initializing simple LLM...")
        self.llm = SimpleLLM()

    def load_documents(self) -> List[Document]:
        """Load and prepare documents"""
        logger.info(f"Loading documents from: {self.file_path}")
        documents = []

        try:
            with open(self.file_path, 'r', encoding='utf-8') as file:
                content = file.read()

            # Split content into chunks
            if '\\n\\n' in content:
                chunks = content.split('\\n\\n')
            else:
                chunks = content.split('\\n')

            chunks = [chunk.strip() for chunk in chunks if chunk.strip() and len(chunk) > 10]

            for idx, chunk in enumerate(chunks):
                documents.append(Document(
                    page_content=chunk,
                    metadata={'chunk_id': idx, 'source': f"chunk_{idx}"}
                ))

            logger.info(f"Loaded {len(documents)} document chunks")
            return documents

        except Exception as e:
            logger.error(f"Error loading documents: {e}")
            return []

    def create_vectorstore(self, documents: List[Document]):
        """Create vector store"""
        if not documents:
            logger.error("No documents to process!")
            return False

        logger.info("Creating vector store...")
        self.vectorstore = Chroma.from_documents(
            documents=documents,
            embedding=self.embeddings,
            persist_directory="/tmp/chroma_db"
        )

        self.retriever = self.vectorstore.as_retriever(
            search_type="similarity",
            search_kwargs={"k": 3}
        )

        logger.info("Vector store created successfully!")
        return True

    def setup_qa_chain(self):
        """Setup QA chain"""
        prompt_template = """Based on the following context, answer the question concisely.

Context:
{context}

Question: {question}

Answer in the same language as the question. Keep the answer short and direct.

Answer:"""

        PROMPT = PromptTemplate(
            template=prompt_template,
            input_variables=["context", "question"]
        )

        self.qa_chain = RetrievalQA.from_chain_type(
            llm=self.llm,
            chain_type="stuff",
            retriever=self.retriever,
            chain_type_kwargs={"prompt": PROMPT},
            return_source_documents=True
        )

        logger.info("QA chain setup complete!")

    def query(self, question: str) -> dict:
        """Query the RAG system"""
        if not self.qa_chain:
            return {"error": "System not initialized"}

        try:
            relevant_docs = self.retriever.invoke(question)

            context = " ".join([doc.page_content for doc in relevant_docs])
            answer = self.llm._find_answer_in_context(question, context)

            return {
                "question": question,
                "answer": answer,
                "sources": [doc.page_content[:200] + "..." for doc in relevant_docs],
                "confidence": self._calculate_confidence(question, relevant_docs)
            }

        except Exception as e:
            logger.error(f"Query error: {e}")
            return {
                "question": question,
                "answer": "দুঃখিত, উত্তর খুঁজে পাওয়া যায়নি।",
                "sources": [],
                "confidence": 0.0
            }

    def _calculate_confidence(self, question: str, docs: List[Document]) -> float:
        """Simple confidence calculation"""
        if not docs:
            return 0.0

        question_words = set(question.lower().split())
        total_score = 0

        for doc in docs:
            doc_words = set(doc.page_content.lower().split())
            overlap = len(question_words.intersection(doc_words))
            score = overlap / len(question_words) if question_words else 0
            total_score += score

        return min(total_score / len(docs), 1.0)

    def setup(self) -> bool:
        """Complete setup"""
        logger.info("Setting up RAG Pipeline...")

        documents = self.load_documents()
        if not documents:
            return False

        if not self.create_vectorstore(documents):
            return False

        self.setup_qa_chain()
        logger.info("RAG Pipeline setup complete!")
        return True

#Flask implementation
app = Flask(__name__)
CORS(app)

rag_pipeline = None
ngrok_tunnel = None

@app.route('/health', methods=['GET'])
def health_check():
    """Health check endpoint"""
    return jsonify({
        "status": "healthy",
        "timestamp": datetime.now().isoformat(),
        "service": "Colab RAG API"
    })

@app.route('/chat', methods=['POST'])
def chat():
    """Main chat endpoint"""
    try:
        data = request.get_json()

        if not data or 'message' not in data:
            return jsonify({"error": "Missing 'message' field"}), 400

        message = data['message'].strip()
        if not message:
            return jsonify({"error": "Empty message"}), 400

        include_sources = data.get('include_sources', True)
        max_length = data.get('max_response_length', 200)

        result = rag_pipeline.query(message)

        response = {
            "message": message,
            "response": result['answer'][:max_length],
            "confidence": result.get('confidence', 0.0),
            "timestamp": datetime.now().isoformat()
        }

        if include_sources:
            response["sources"] = result.get('sources', [])

        return jsonify(response)

    except Exception as e:
        logger.error(f"Chat error: {e}")
        return jsonify({"error": "Internal server error"}), 500

@app.route('/batch_chat', methods=['POST'])
def batch_chat():
    """Batch processing endpoint"""
    try:
        data = request.get_json()

        if not data or 'messages' not in data:
            return jsonify({"error": "Missing 'messages' field"}), 400

        messages = data['messages']
        if not isinstance(messages, list) or len(messages) > 5:
            return jsonify({"error": "Invalid messages or too many (max 5)"}), 400

        results = []
        for i, msg in enumerate(messages):
            if not msg or not msg.strip():
                results.append({"index": i, "error": "Empty message"})
                continue

            try:
                result = rag_pipeline.query(msg.strip())
                results.append({
                    "index": i,
                    "message": msg,
                    "response": result['answer'],
                    "confidence": result.get('confidence', 0.0)
                })
            except Exception as e:
                results.append({"index": i, "error": str(e)})

        return jsonify({"results": results, "timestamp": datetime.now().isoformat()})

    except Exception as e:
        logger.error(f"Batch chat error: {e}")
        return jsonify({"error": "Internal server error"}), 500

@app.route('/stats', methods=['GET'])
def get_stats():
    """Get system statistics"""
    try:
        stats = {
            "status": "active",
            "embedding_model": "paraphrase-multilingual-MiniLM-L12-v2",
            "supported_languages": ["Bengali", "English"],
            "timestamp": datetime.now().isoformat()
        }

        if rag_pipeline and rag_pipeline.vectorstore:
            try:
                doc_count = len(rag_pipeline.vectorstore.get()['ids'])
                stats["total_documents"] = doc_count
            except:
                stats["total_documents"] = "unknown"

        return jsonify(stats)

    except Exception as e:
        logger.error(f"Stats error: {e}")
        return jsonify({"error": "Internal server error"}), 500

def setup_ngrok_auth(authtoken: str):
    """
    Setup ngrok authentication token

    Args:
        authtoken: Your ngrok authentication token
    """
    try:
        ngrok.kill()

        conf.get_default().auth_token = authtoken
        ngrok.set_auth_token(authtoken)

        logger.info("✅ ngrok authentication token configured successfully!")
        return True

    except Exception as e:
        logger.error(f"❌ Failed to configure ngrok auth token: {e}")
        return False

def initialize_rag_system(file_path: str):
    """Initialize the RAG system"""
    global rag_pipeline

    logger.info("Initializing RAG system...")
    rag_pipeline = ColabRAGPipeline(file_path)

    if rag_pipeline.setup():
        logger.info("✅ RAG system initialized successfully!")
        return True
    else:
        logger.error("❌ RAG system initialization failed!")
        return False

def find_available_port(start_port: int = 5000, max_attempts: int = 10) -> int:
    """
    Find an available port starting from start_port

    Args:
        start_port: Port to start checking from
        max_attempts: Maximum number of ports to try

    Returns:
        Available port number
    """
    import socket

    for port in range(start_port, start_port + max_attempts):
        try:
            with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
                s.bind(('', port))
                return port
        except OSError:
            continue

    raise RuntimeError(f"Could not find available port in range {start_port}-{start_port + max_attempts}")

def stop_existing_servers():
    """Stop any existing ngrok tunnels and Flask servers"""
    try:
        ngrok.kill()
        print("🔄 Stopped existing ngrok tunnels")
    except:
        pass

def start_api_server(file_path: str, port: int = None, authtoken: str = None):
    """
    Start the API server in Google Colab with proper ngrok authentication and port handling

    Args:
        file_path: Path to your data file
        port: Port number (if None, will find available port automatically)
        authtoken: Your ngrok authentication token
    """
    global ngrok_tunnel

    print("🚀 Starting RAG API Server for Google Colab...")
    print("=" * 50)

    stop_existing_servers()

    if port is None:
        try:
            port = find_available_port(5000)
            print(f"🔍 Found available port: {port}")
        except RuntimeError as e:
            print(f"❌ {e}")
            return
    else:
        try:
            import socket
            with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
                s.bind(('', port))
            print(f"✅ Port {port} is available")
        except OSError:
            print(f"⚠️ Port {port} is in use, finding alternative...")
            try:
                port = find_available_port(port + 1)
                print(f"🔍 Using alternative port: {port}")
            except RuntimeError as e:
                print(f"❌ {e}")
                return

    if authtoken:
        print("🔐 Setting up ngrok authentication...")
        if not setup_ngrok_auth(authtoken):
            print("❌ Failed to setup ngrok authentication. Please check your token.")
            return
        print("✅ ngrok authentication configured!")
    else:
        print("⚠️ No ngrok authtoken provided. You may encounter authentication errors.")
        print("💡 To fix this, pass your authtoken: start_api_server(file_path, authtoken='your_token')")

    if not initialize_rag_system(file_path):
        print("❌ Failed to initialize RAG system. Please check your file path.")
        return

    try:
        ngrok_tunnel = ngrok.connect(port)
        public_url = ngrok_tunnel.public_url

        print(f"✅ RAG System initialized successfully!")
        print(f"🌐 Public URL: {public_url}")
        print(f"📡 Local URL: http://localhost:{port}")
        print(f"🔌 Using Port: {port}")
        print("\n📚 Available Endpoints:")
        print(f"  • Health Check: GET {public_url}/health")
        print(f"  • Chat: POST {public_url}/chat")
        print(f"  • Batch Chat: POST {public_url}/batch_chat")
        print(f"  • Statistics: GET {public_url}/stats")
        print("\n💡 API is ready to use!")
        print("=" * 50)

        try:
            app.run(host='0.0.0.0', port=port, debug=False, use_reloader=False)
        except OSError as e:
            if "Address already in use" in str(e):
                print(f"❌ Port {port} became unavailable, trying another port...")
                new_port = find_available_port(port + 1)
                print(f"🔄 Retrying with port {new_port}")
                ngrok.kill()
                ngrok_tunnel = ngrok.connect(new_port)
                public_url = ngrok_tunnel.public_url
                print(f"🌐 New Public URL: {public_url}")
                app.run(host='0.0.0.0', port=new_port, debug=False, use_reloader=False)
            else:
                raise e

    except Exception as e:
        logger.error(f"Error starting server: {e}")
        print(f"❌ Error starting server: {e}")

        if "authentication failed" in str(e).lower():
            print("\n🔧 TROUBLESHOOTING NGROK AUTHENTICATION:")
            print("1. Make sure you have a verified ngrok account")
            print("2. Get your authtoken from: https://dashboard.ngrok.com/get-started/your-authtoken")
            print("3. Use the authtoken parameter when starting the server:")
            print("   start_api_server('/path/to/file', authtoken='your_token_here')")
            print("4. Or set it manually:")
            print("   setup_ngrok_auth('your_token_here')")
        elif "address already in use" in str(e).lower():
            print("\n🔧 PORT CONFLICT RESOLUTION:")
            print("1. Try restarting with a different port:")
            print("   start_api_server('/path/to/file', port=8000, authtoken='your_token')")
            print("2. Or let the system find an available port automatically:")
            print("   start_api_server('/path/to/file', authtoken='your_token')")
            print("3. Restart your Colab runtime if the issue persists")

def configure_ngrok_token(authtoken: str):
    """
    Manually configure ngrok authentication token

    Args:
        authtoken: Your ngrok authentication token
    """
    print("🔐 Configuring ngrok authentication token...")
    if setup_ngrok_auth(authtoken):
        print("✅ ngrok authentication token configured successfully!")
        print("💡 You can now start the server without passing the authtoken parameter")
    else:
        print("❌ Failed to configure ngrok authentication token")

def test_api(base_url: str):
    """Test the API with sample questions"""
    print("\n🧪 Testing API...")
    print("=" * 30)

    try:
        response = requests.get(f"{base_url}/health")
        if response.status_code == 200:
            print("✅ Health check passed")
        else:
            print("❌ Health check failed")
    except:
        print("❌ Could not connect to API")
        return

    bengali_questions = [
        "অনুপমের ভাষায় সুপুরুষ কাকে বলা হয়েছে?",
        "কাকে অনুপমের ভাগ্য দেবতা বলে উল্লেখ করা হয়েছে?",
        "বিয়ের সময় কল্যাণীর প্রকৃত বয়স কত ছিল?"
    ]

    print("\n📝 Testing Bengali Questions:")
    for i, question in enumerate(bengali_questions, 1):
        try:
            response = requests.post(f"{base_url}/chat",
                                   json={"message": question},
                                   timeout=30)
            if response.status_code == 200:
                result = response.json()
                print(f"  Q{i}: {question[:30]}...")
                print(f"  A{i}: {result['response']}")
                print(f"  Confidence: {result.get('confidence', 0.0):.2f}")
                print()
            else:
                print(f"  Q{i}: Error {response.status_code}")
        except Exception as e:
            print(f"  Q{i}: Request failed - {e}")

    print("🎉 API testing complete!")

class ColabRAGClient:
    """Simple client for interacting with the RAG API"""

    def __init__(self, base_url: str):
        self.base_url = base_url.rstrip('/')

    def ask(self, question: str) -> dict:
        """Ask a single question"""
        try:
            response = requests.post(
                f"{self.base_url}/chat",
                json={"message": question},
                timeout=30
            )
            return response.json()
        except Exception as e:
            return {"error": str(e)}

    def ask_multiple(self, questions: List[str]) -> dict:
        """Ask multiple questions"""
        try:
            response = requests.post(
                f"{self.base_url}/batch_chat",
                json={"messages": questions},
                timeout=60
            )
            return response.json()
        except Exception as e:
            return {"error": str(e)}

try:
    from google.colab import drive
    drive.mount('/content/drive')
    print("✅ Google Drive mounted successfully!")
except:
    print("⚠️ Google Drive mounting failed or already mounted")

def restart_api_server(file_path: str = '/content/drive/MyDrive/dataset/HSC26_chunks.txt',
                      authtoken: str = '30OsEScqY2iSJjYTdzo4zMqOl06_6h5kLhKTJZqHN9iosQMU2'):
    """
    Quick restart function with your default settings
    """
    print("🔄 Restarting API server with automatic port detection...")
    start_api_server(file_path=file_path, authtoken=authtoken)



Collecting flask-cors
  Downloading flask_cors-6.0.1-py3-none-any.whl.metadata (5.3 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.12-py3-none-any.whl.metadata (9.4 kB)
Downloading flask_cors-6.0.1-py3-none-any.whl (13 kB)
Downloading pyngrok-7.2.12-py3-none-any.whl (26 kB)
Installing collected packages: pyngrok, flask-cors
Successfully installed flask-cors-6.0.1 pyngrok-7.2.12
Collecting langchain-community
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-chroma
  Downloading langchain_chroma-0.2.5-py3-none-any.whl.metadata (1.1 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
C

In [2]:
@app.route('/chat', methods=['GET'])
#Frontend Implementation
def chat_form():
    """Simple web form for testing the chat endpoint"""
    return '''
    <!DOCTYPE html>
    <html>
    <head>
        <title>RAG API Chat Interface</title>
        <meta charset="UTF-8">
        <style>
            body {
                font-family: Arial, sans-serif;
                max-width: 800px;
                margin: 50px auto;
                padding: 20px;
                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                min-height: 100vh;
            }
            .container {
                background: white;
                border-radius: 15px;
                padding: 30px;
                box-shadow: 0 10px 30px rgba(0,0,0,0.2);
            }
            h1 {
                color: #333;
                text-align: center;
                margin-bottom: 30px;
            }
            .form-group {
                margin-bottom: 20px;
            }
            label {
                display: block;
                margin-bottom: 8px;
                font-weight: bold;
                color: #555;
            }
            input[type="text"] {
                width: 100%;
                padding: 12px;
                border: 2px solid #ddd;
                border-radius: 8px;
                font-size: 16px;
                box-sizing: border-box;
            }
            button {
                background: linear-gradient(135deg, #007bff, #0056b3);
                color: white;
                padding: 12px 25px;
                border: none;
                border-radius: 8px;
                cursor: pointer;
                font-size: 16px;
                margin-right: 10px;
            }
            button:hover {
                transform: translateY(-2px);
            }
            .response {
                background: #f8f9fa;
                border: 1px solid #dee2e6;
                border-radius: 8px;
                padding: 20px;
                margin-top: 20px;
                min-height: 100px;
            }
            .sample-questions {
                background: #e7f3ff;
                padding: 15px;
                border-radius: 8px;
                margin-top: 20px;
            }
            .sample-question {
                background: white;
                padding: 8px 12px;
                margin: 5px 0;
                border-radius: 5px;
                cursor: pointer;
                border: 1px solid #ccc;
            }
            .sample-question:hover {
                background: #f0f8ff;
            }
        </style>
    </head>
    <body>
        <div class="container">
            <h1>🤖 RAG API Chat Interface</h1>

            <div class="form-group">
                <label for="question">❓ Your Question (Bengali or English):</label>
                <input type="text" id="question" placeholder="Type your question here..." />
            </div>

            <button onclick="askQuestion()">🚀 Ask Question</button>
            <button onclick="testHealth()">🏥 Health Check</button>
            <button onclick="getStats()">📊 Get Stats</button>

            <div class="sample-questions">
                <h3>📝 Sample Questions (Click to use):</h3>
                <div class="sample-question" onclick="setQuestion('অনুপমের ভাষায় সুপুরুষ কাকে বলা হয়েছে?')">
                    অনুপমের ভাষায় সুপুরুষ কাকে বলা হয়েছে?
                </div>
                <div class="sample-question" onclick="setQuestion('কাকে অনুপমের ভাগ্য দেবতা বলে উল্লেখ করা হয়েছে?')">
                    কাকে অনুপমের ভাগ্য দেবতা বলে উল্লেখ করা হয়েছে?
                </div>
                <div class="sample-question" onclick="setQuestion('বিয়ের সময় কল্যাণীর প্রকৃত বয়স কত ছিল?')">
                    বিয়ের সময় কল্যাণীর প্রকৃত বয়স কত ছিল?
                </div>
                <div class="sample-question" onclick="setQuestion('Who is Anupam?')">
                    Who is Anupam?
                </div>
            </div>

            <div class="response" id="response">
                Welcome! Click on a sample question or type your own question above. 🎉
            </div>
        </div>

        <script>
            function setQuestion(question) {
                document.getElementById('question').value = question;
            }

            function showResponse(message, isError = false) {
                const responseDiv = document.getElementById('response');
                const color = isError ? 'red' : 'green';
                responseDiv.innerHTML = `<div style="color: ${color}">${message}</div>`;
            }

            function showLoading(message) {
                const responseDiv = document.getElementById('response');
                responseDiv.innerHTML = `<div style="color: blue;">🔍 ${message}</div>`;
            }

            async function testHealth() {
                showLoading('Checking API health...');
                try {
                    const response = await fetch('/health');
                    const data = await response.json();

                    if (response.ok) {
                        showResponse(`✅ Health Check Passed!<br>Status: ${data.status}<br>Service: ${data.service}`);
                    } else {
                        showResponse('❌ Health check failed', true);
                    }
                } catch (error) {
                    showResponse(`❌ Cannot connect to API: ${error.message}`, true);
                }
            }

            async function askQuestion() {
                const question = document.getElementById('question').value.trim();
                if (!question) {
                    showResponse('❌ Please enter a question!', true);
                    return;
                }

                showLoading('Processing your question...');

                try {
                    const response = await fetch('/chat', {
                        method: 'POST',
                        headers: {
                            'Content-Type': 'application/json',
                        },
                        body: JSON.stringify({
                            message: question,
                            include_sources: true
                        })
                    });

                    const data = await response.json();

                    if (response.ok) {
                        let sourcesHtml = '';
                        if (data.sources && data.sources.length > 0) {
                            sourcesHtml = `<br><br><strong>📚 Sources:</strong><br>${data.sources.slice(0, 2).map((source, i) => `${i + 1}. ${source.substring(0, 150)}...`).join('<br>')}`;
                        }

                        showResponse(`
                            <strong>❓ Question:</strong> ${data.message}<br><br>
                            <strong>🤖 Answer:</strong> ${data.response}<br><br>
                            <strong>📊 Confidence:</strong> ${(data.confidence * 100).toFixed(1)}%
                            ${sourcesHtml}
                        `);
                    } else {
                        showResponse(`❌ Error: ${data.error || 'Unknown error'}`, true);
                    }
                } catch (error) {
                    showResponse(`❌ Request failed: ${error.message}`, true);
                }
            }

            async function getStats() {
                showLoading('Getting API statistics...');
                try {
                    const response = await fetch('/stats');
                    const data = await response.json();

                    if (response.ok) {
                        showResponse(`
                            <strong>📊 API Statistics:</strong><br>
                            Status: ${data.status}<br>
                            Model: ${data.embedding_model}<br>
                            Languages: ${data.supported_languages.join(', ')}<br>
                            Documents: ${data.total_documents}
                        `);
                    } else {
                        showResponse('❌ Failed to get statistics', true);
                    }
                } catch (error) {
                    showResponse(`❌ Stats request failed: ${error.message}`, true);
                }
            }

            // Allow Enter key to submit
            document.getElementById('question').addEventListener('keypress', function(e) {
                if (e.key === 'Enter') {
                    askQuestion();
                }
            });
        </script>
    </body>
    </html>
    '''

def test_api_directly():
    """Test the API directly from within Colab"""
    print("🧪 Testing RAG API Directly from Colab")
    print("=" * 40)

    if not rag_pipeline:
        print("❌ RAG pipeline not initialized!")
        return

    # Test questions
    test_questions = [
        "অনুপমের ভাষায় সুপুরুষ কাকে বলা হয়েছে?",
        "কাকে অনুপমের ভাগ্য দেবতা বলে উল্লেখ করা হয়েছে?",
        "বিয়ের সময় কল্যাণীর প্রকৃত বয়স কত ছিল?",
        "Who is Anupam?"
    ]

    for i, question in enumerate(test_questions, 1):
        print(f"\n📝 Test {i}: {question}")
        print("🔍 Processing...")

        try:
            result = rag_pipeline.query(question)
            print(f"🤖 Answer: {result['answer']}")
            print(f"📊 Confidence: {result.get('confidence', 0.0):.2f}")
            if result.get('sources'):
                print(f"📚 Sources: {len(result['sources'])} documents found")
        except Exception as e:
            print(f"❌ Error: {e}")

        print("─" * 40)

    print("🎉 Direct testing complete!")

class SimpleRAGClient:
    """Simple client to test the RAG API"""

    def __init__(self, base_url):
        self.base_url = base_url.rstrip('/')
        print(f"🔗 RAG Client initialized for: {self.base_url}")

    def health_check(self):
        """Check API health"""
        try:
            response = requests.get(f"{self.base_url}/health", timeout=10)
            if response.status_code == 200:
                data = response.json()
                print("✅ Health check passed!")
                print(f"   Status: {data.get('status')}")
                return True
            else:
                print(f"❌ Health check failed: {response.status_code}")
                return False
        except Exception as e:
            print(f"❌ Health check error: {e}")
            return False

    def ask(self, question):
        """Ask a question"""
        print(f"❓ Question: {question}")
        try:
            response = requests.post(
                f"{self.base_url}/chat",
                json={"message": question, "include_sources": True},
                timeout=30
            )

            if response.status_code == 200:
                data = response.json()
                print(f"🤖 Answer: {data['response']}")
                print(f"📊 Confidence: {data.get('confidence', 0.0):.2f}")
                return data
            else:
                print(f"❌ Error: {response.status_code} - {response.text}")
                return None
        except Exception as e:
            print(f"❌ Request failed: {e}")
            return None

    def quick_test(self):
        """Run a quick test"""
        print("🧪 Running Quick Test")
        print("=" * 30)

        if not self.health_check():
            return

        test_questions = [
            "অনুপমের ভাষায় সুপুরুষ কাকে বলা হয়েছে?",
            "Who is Anupam?"
        ]

        for question in test_questions:
            print("\n" + "─" * 20)
            self.ask(question)

        print("\n🎉 Quick test complete!")

def get_current_api_url():
    """Get the current ngrok URL"""
    try:
        tunnels = ngrok.get_tunnels()
        if tunnels:
            public_url = tunnels[0].public_url
            print(f"🌐 Current API URL: {public_url}")
            return public_url
        else:
            print("❌ No active ngrok tunnels found")
            return None
    except Exception as e:
        print(f"❌ Error getting ngrok URL: {e}")
        return None

def show_testing_commands():
    """Show available testing commands"""
    current_url = get_current_api_url()

    commands = f"""
🧪 TESTING COMMANDS FOR YOUR RAG API
===================================

Your API is running! Here are different ways to test it:

1️⃣ Test directly in Colab (no network needed):
   test_api_directly()

2️⃣ Test via HTTP requests:
   url = get_current_api_url()
   client = SimpleRAGClient(url)
   client.quick_test()

3️⃣ Access web interface in browser:
   Click: {current_url}/chat

4️⃣ Manual HTTP test:
   import requests
   response = requests.post('{current_url}/chat',
                          json={{"message": "অনুপমের ভাষায় সুপুরুষ কাকে বলা হয়েছে?"}})
   print(response.json())

5️⃣ Check API health:
   response = requests.get('{current_url}/health')
   print(response.json())

🔥 QUICK START:
--------------
Just run: test_api_directly()

🎯 TRY IT NOW! 🚀
"""
    print(commands)

print("\n" + "🎉 API SUCCESSFULLY STARTED!" + "\n")
show_testing_commands()


🎉 API SUCCESSFULLY STARTED!



ERROR:pyngrok.process.ngrok:t=2025-07-26T07:56:33+0000 lvl=eror msg="failed to reconnect session" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"
ERROR:pyngrok.process.ngrok:t=2025-07-26T07:56:33+0000 lvl=eror msg="session closing" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"
ERROR:pyngrok.process.ngrok:t=2025-07-26T07:56:33+0000 lvl=eror msg="terminating with error" obj=app err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your aut

❌ Error getting ngrok URL: The ngrok process errored on start: authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n.

🧪 TESTING COMMANDS FOR YOUR RAG API

Your API is running! Here are different ways to test it:

1️⃣ Test directly in Colab (no network needed):
   test_api_directly()

2️⃣ Test via HTTP requests:
   url = get_current_api_url()
   client = SimpleRAGClient(url)
   client.quick_test()

3️⃣ Access web interface in browser:
   Click: None/chat

4️⃣ Manual HTTP test:
   import requests
   response = requests.post('None/chat', 
                          json={"message": "অনুপমের ভাষায় সুপুরুষ কাকে বলা হয়েছে?"})
   print(response.json())

5️⃣ Check API health:
   response = requests.get('None/health')
   print(response.json())

🔥 QUICK START:
--------------
Just run: test_api_directly

In [3]:
start_api_server(
    '/content/drive/MyDrive/dataset/HSC26_chunks.txt',
    authtoken='30OsEScqY2iSJjYTdzo4zMqOl06_6h5kLhKTJZqHN9iosQMU2'
)

🚀 Starting RAG API Server for Google Colab...
🔄 Stopped existing ngrok tunnels
🔍 Found available port: 5000
🔐 Setting up ngrok authentication...
✅ ngrok authentication configured!


  self.embeddings = HuggingFaceEmbeddings(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/645 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/471M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/480 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

✅ RAG System initialized successfully!
🌐 Public URL: https://cf3e2c712d51.ngrok-free.app
📡 Local URL: http://localhost:5000
🔌 Using Port: 5000

📚 Available Endpoints:
  • Health Check: GET https://cf3e2c712d51.ngrok-free.app/health
  • Chat: POST https://cf3e2c712d51.ngrok-free.app/chat
  • Batch Chat: POST https://cf3e2c712d51.ngrok-free.app/batch_chat
  • Statistics: GET https://cf3e2c712d51.ngrok-free.app/stats

💡 API is ready to use!
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [26/Jul/2025 07:57:51] "GET /chat HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [26/Jul/2025 07:57:51] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [26/Jul/2025 07:58:05] "POST /chat HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [26/Jul/2025 07:58:34] "POST /chat HTTP/1.1" 200 -
