In [1]:
# Install required packages
!pip install -q langchain langchain-community langchain-openai
!pip install -q chromadb
!pip install -q transformers torch torchvision pillow
!pip install -q pdf2image pymupdf
!pip install -q unstructured[pdf]
!pip install -q sentence-transformers
!pip install -q openai python-dotenv
!pip install -q gradio

# For advanced multimodal retrieval (optional)
#!pip install -q byaldi colpali-engine
# Install Poppler utilities
!sudo apt-get update
!sudo apt-get install -y poppler-utils
!pip install --upgrade pdf2image

# Now import after installation
from pdf2image import convert_from_path



[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━[0m [32m2.1/2.5 MB[0m [31m62.6 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m2.5/2.5 MB[0m [31m58.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m30.4 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/76.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.0/76.0 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/64.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.7/64.7 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━

In [3]:
!pip install -q PyMuPDF Pillow

import fitz  # PyMuPDF
from PIL import Image
import os

class PDFProcessor:
    """Process PDFs using PyMuPDF - no system dependencies needed"""

    def __init__(self, pdf_path):
        self.pdf_path = pdf_path
        self.doc = fitz.open(pdf_path)

    def extract_images_to_folder(self, output_dir='extracted_images'):
        """Extract all images from PDF"""
        os.makedirs(output_dir, exist_ok=True)
        image_paths = []

        for page_num in range(len(self.doc)):
            page = self.doc[page_num]
            image_list = page.get_images()

            for img_index, img in enumerate(image_list):
                xref = img[0]
                base_image = self.doc.extract_image(xref)
                image_bytes = base_image["image"]

                # Save image
                image_path = f"{output_dir}/page{page_num+1}_img{img_index}.png"
                with open(image_path, "wb") as img_file:
                    img_file.write(image_bytes)

                image_paths.append(image_path)
                print(f"✓ Extracted: {image_path}")

        return image_paths

    def convert_pages_to_images(self, output_dir='pdf_pages', zoom_factor=2.0):
        """Convert PDF pages to images (useful if PDF is scanned)"""
        os.makedirs(output_dir, exist_ok=True)
        image_paths = []

        for page_num in range(len(self.doc)):
            page = self.doc[page_num]

            # Render page as image with zoom
            mat = fitz.Matrix(zoom_factor, zoom_factor)
            pix = page.get_pixmap(matrix=mat)

            # Save as PNG
            image_path = f"{output_dir}/page_{page_num+1}.png"
            pix.save(image_path)
            image_paths.append(image_path)
            print(f"✓ Converted page {page_num+1}")

        return image_paths

    def extract_text(self):
        """Extract text from all pages"""
        text_chunks = []
        for page_num in range(len(self.doc)):
            page = self.doc[page_num]
            text = page.get_text()
            text_chunks.append({
                'page': page_num + 1,
                'content': text,
                'type': 'text'
            })
        return text_chunks

# Usage example
pdf_processor = PDFProcessor('your_document.pdf')

# Extract images from PDF
images = pdf_processor.extract_images_to_folder()

# Or convert PDF pages to images
page_images = pdf_processor.convert_pages_to_images(zoom_factor=2.0)

# Extract text
text = pdf_processor.extract_text()


✓ Extracted: extracted_images/page3_img0.png
✓ Extracted: extracted_images/page4_img0.png
✓ Extracted: extracted_images/page4_img1.png
✓ Extracted: extracted_images/page5_img0.png
✓ Extracted: extracted_images/page5_img1.png
✓ Converted page 1
✓ Converted page 2
✓ Converted page 3
✓ Converted page 4
✓ Converted page 5
✓ Converted page 6
✓ Converted page 7


In [4]:
!pip install -q byaldi colpali-engine

from byaldi import RAGMultiModalModel

# Initialize ColPali model (downloads ~2GB)
model = RAGMultiModalModel.from_pretrained("vidore/colpali-v1.2")

# Index your PDF directly - no extraction needed!
model.index(
    input_path="your_document.pdf",  # Can be file or directory
    index_name="my_documents",
    store_collection_with_index=True,
    overwrite=True
)

# Query with text
results = model.search("What are the main topics?", k=5)

for result in results:
    print(f"Relevance: {result.score:.3f}")
    # result.page_content contains the relevant page image


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.9/40.9 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m517.9/517.9 kB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.9/73.9 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m472.3/472.3 kB[0m [31m18.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m821.0/821.0 MB[0m [31m605.3 kB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m571.0/571.0 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m156.8/156.8 MB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m201.3/201.3 MB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


adapter_config.json:   0%|          | 0.00/750 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/862M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/78.6M [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


preprocessor_config.json:   0%|          | 0.00/700 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/17.8M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/733 [00:00<?, ?B/s]

Added page 1 of document 0 to index.
Added page 2 of document 0 to index.
Added page 3 of document 0 to index.
Added page 4 of document 0 to index.
Added page 5 of document 0 to index.
Added page 6 of document 0 to index.
Added page 7 of document 0 to index.
Index exported to .byaldi/my_documents
Index exported to .byaldi/my_documents
Relevance: 7.656
Relevance: 7.344
Relevance: 6.938
Relevance: 6.812
Relevance: 6.812


In [5]:
!pip install -q "unstructured[pdf]" python-magic-mime pdfminer.six

from unstructured.partition.pdf import partition_pdf

# Extract all elements including tables
elements = partition_pdf(
    filename='your_document.pdf',
    strategy='hi_res',  # High resolution for better accuracy
    infer_table_structure=True,
    extract_images_in_pdf=True
)

# Separate by type
text_elements = [el for el in elements if el.category == "NarrativeText"]
table_elements = [el for el in elements if el.category == "Table"]
image_elements = [el for el in elements if el.category == "Image"]

print(f"Text: {len(text_elements)}, Tables: {len(table_elements)}, Images: {len(image_elements)}")

# Access table data
for table in table_elements:
    print(table.text)  # Table as text
    print(table.metadata.text_as_html)  # Table as HTML


[31mERROR: Could not find a version that satisfies the requirement python-magic-mime (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for python-magic-mime[0m[31m


yolox_l0.05.onnx:   0%|          | 0.00/217M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/274 [00:00<?, ?B/s]

The `max_size` parameter is deprecated and will be removed in v4.26. Please specify in `size['longest_edge'] instead`.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/115M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/46.8M [00:00<?, ?B/s]

Text: 57, Tables: 1, Images: 5
CLASS PROBABILITY ARREST 0.002 ASSAULT 0.04 ARSON 0.008 ABUSE 0.95
<table><thead><tr><th>CLASS</th><th></th><th>PROBABILITY</th></tr></thead><tbody><tr><td>ARREST</td><td>0.002</td><td></td></tr><tr><td>ASSAULT</td><td>0.04</td><td></td></tr><tr><td>ARSON</td><td>0.008</td><td></td></tr><tr><td>ABUSE</td><td>0.95</td><td></td></tr></tbody></table>


In [6]:
!pip install -q sentence-transformers chromadb


In [6]:
"""import shutil
shutil.rmtree("./chroma_db", ignore_errors=True)
print("✓ Old database deleted")
"""

'import shutil\nshutil.rmtree("./chroma_db", ignore_errors=True)\nprint("✓ Old database deleted")\n'

In [7]:
from sentence_transformers import SentenceTransformer
import numpy as np

text_model = SentenceTransformer('all-MiniLM-L6-v2')
image_model = SentenceTransformer('clip-ViT-B-32')

print(f"Text: {text_model.get_sentence_embedding_dimension()} dims")
print(f"Image: {image_model.get_sentence_embedding_dimension()} dims")

# Extract data
texts = [el.text for el in text_elements if el.text and len(el.text.strip()) > 0]
tables = [el.text for el in table_elements if el.text and len(el.text.strip()) > 0]
all_texts = text + texts

# Generate embeddings
text_embeddings = [text_model.encode(str(t)[:512]) for t in all_texts]
table_embeddings = [text_model.encode(t[:1000]) for t in tables]
image_embeddings = [image_model.encode(Image.open(p).convert("RGB")) for p in page_images]

print(f"✓ Text: {len(text_embeddings)}, Tables: {len(table_embeddings)}, Images: {len(image_embeddings)}")


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json:   0%|          | 0.00/604 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/389 [00:00<?, ?B/s]

0_CLIPModel/pytorch_model.bin:   0%|          | 0.00/605M [00:00<?, ?B/s]

0_CLIPModel/model.safetensors:   0%|          | 0.00/605M [00:00<?, ?B/s]

Text: 384 dims
Image: None dims
✓ Text: 64, Tables: 1, Images: 7


In [9]:
# ============================================================================
# STEP 6 - CORRECTED (with proper metadata)
# ============================================================================

import chromadb

client = chromadb.EphemeralClient()

text_collection = client.get_or_create_collection("text_chunks", metadata={"hnsw:space": "cosine"})
table_collection = client.get_or_create_collection("table_chunks", metadata={"hnsw:space": "cosine"})
image_collection = client.get_or_create_collection("image_chunks", metadata={"hnsw:space": "cosine"})

# Add text embeddings
text_ids = [f"text_{i}" for i in range(len(text_embeddings))]
text_docs = [str(t)[:1000] for t in all_texts]
text_collection.add(
    embeddings=[e.tolist() for e in text_embeddings],
    ids=text_ids,
    documents=text_docs,
    metadatas=[{"type": "text", "index": i} for i in range(len(text_ids))]  # ← Non-empty metadata
)
print(f"✓ Added {text_collection.count()} text chunks")

# Add table embeddings
table_ids = [f"table_{i}" for i in range(len(table_embeddings))]
table_collection.add(
    embeddings=[e.tolist() for e in table_embeddings],
    ids=table_ids,
    documents=tables,
    metadatas=[{"type": "table", "index": i} for i in range(len(table_ids))]  # ← Non-empty metadata
)
print(f"✓ Added {table_collection.count()} tables")

# Add image embeddings
image_ids = [f"image_{i}" for i in range(len(image_embeddings))]
image_collection.add(
    embeddings=[e.tolist() for e in image_embeddings],
    ids=image_ids,
    documents=page_images,
    metadatas=[{"type": "image", "index": i} for i in range(len(image_ids))]  # ← Non-empty metadata
)
print(f"✓ Added {image_collection.count()} images")

print("\n✅ All embeddings stored successfully!")


✓ Added 64 text chunks
✓ Added 1 tables
✓ Added 7 images

✅ All embeddings stored successfully!


In [10]:
def query_multimodal(question, top_k=3):
    query_embedding = text_model.encode(question)

    results = {'text': {'documents': [], 'scores': []}, 'tables': {'documents': [], 'scores': []}, 'images': {'documents': [], 'scores': []}}

    try:
        text_res = text_collection.query(query_embeddings=[query_embedding.tolist()], n_results=top_k, include=['documents', 'distances'])
        if text_res['documents'] and text_res['documents'][0]:
            results['text']['documents'] = text_res['documents'][0]
            results['text']['scores'] = [1 - float(d) for d in text_res['distances'][0]]
    except: pass

    try:
        table_res = table_collection.query(query_embeddings=[query_embedding.tolist()], n_results=top_k, include=['documents', 'distances'])
        if table_res['documents'] and table_res['documents'][0]:
            results['tables']['documents'] = table_res['documents'][0]
            results['tables']['scores'] = [1 - float(d) for d in table_res['distances'][0]]
    except: pass

    try:
        image_res = image_collection.query(query_embeddings=[query_embedding.tolist()], n_results=top_k, include=['documents', 'distances'])
        if image_res['documents'] and image_res['documents'][0]:
            results['images']['documents'] = image_res['documents'][0]
            results['images']['scores'] = [1 - float(d) for d in image_res['distances'][0]]
    except: pass

    return results

# Test
result = query_multimodal("What are the main findings?", top_k=2)
print("Text:", len(result['text']['documents']))
print("Tables:", len(result['tables']['documents']))
print("Images:", len(result['images']['documents']))


Text: 2
Tables: 1
Images: 0


In [25]:
from groq import Groq

client = Groq(api_key="Enter_your_api_key_here_lol")

def generate_response(question, context_results):
    """Using Groq (Updated models)"""

    context = ""
    if context_results['text']['documents']:
        context += context_results['text']['documents'][0][:300]
    if context_results['tables']['documents']:
        context += "\n" + context_results['tables']['documents'][0][:300]

    response = client.chat.completions.create(
        messages=[
            {"role": "system", "content": "You are a document analysis assistant. Answer based on context."},
            {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}"}
        ],
        model="openai/gpt-oss-120b",  # ← Updated model (works NOW)
        max_tokens=500,
        temperature=0.7
    )

    return response.choices[0].message.content

# Test it
result = generate_response("What are the main findings? is it good?",
                          query_multimodal("findings", top_k=2))
print(result)


**Main findings from the table**

| Class   | Predicted probability |
|---------|-----------------------|
| Arrest  | 0.002  (0.2 %) |
| Assault | 0.040  (4 %) |
| Arson   | 0.008  (0.8 %) |
| Abuse   | 0.950  (95 %) |

1. **Dominant prediction** – The model (or analysis) assigns an overwhelming probability to the **“Abuse”** class (95 %).  
2. **Very low probabilities for the other three classes** – “Arrest”, “Assault”, and “Arson” each receive less than 5 % probability, with “Arrest” being essentially negligible (0.2 %).  

**Is this “good”? – What to consider**

| Aspect | Interpretation |
|--------|----------------|
| **Model confidence** | A 95 % probability suggests the model is very confident that the observation belongs to the “Abuse” class. High confidence is desirable **if** it reflects the true underlying distribution. |
| **Class imbalance** | If the dataset is heavily skewed toward “Abuse” (e.g., 95 % of cases are abuse), the model may simply be learning the prior distribu

In [28]:
# ============================================================================
# STEP 9: Interactive Chatbot Interface with Gradio
# ============================================================================

import gradio as gr
from groq import Groq

# Initialize Groq client
groq_client = Groq(api_key="LoL_use_your_Api_key_here")  # Replace with your key

def generate_response_groq(question, context_results):
    """Generate response using Groq with current working model"""

    # Build context
    context_parts = []

    if context_results['text']['documents']:
        context_parts.append("📄 RELEVANT TEXT:")
        for i, doc in enumerate(context_results['text']['documents'][:2], 1):
            context_parts.append(f"  {i}. {doc[:250]}...")

    if context_results['tables']['documents']:
        context_parts.append("\n📊 RELEVANT TABLES:")
        for i, doc in enumerate(context_results['tables']['documents'][:1], 1):
            context_parts.append(f"  {i}. {doc[:250]}...")

    if context_results['images']['documents']:
        context_parts.append(f"\n🖼️  RELEVANT IMAGES: {len(context_results['images']['documents'])} found")

    full_context = "\n".join(context_parts)

    try:
        response = groq_client.chat.completions.create(
            messages=[
                {
                    "role": "system",
                    "content": "You are a helpful document analysis assistant. Answer questions based on the provided context. Cite which section (text/table/image) your information comes from. If info is not in context, say so clearly."
                },
                {
                    "role": "user",
                    "content": f"Context from document:\n{full_context}\n\nQuestion: {question}\n\nProvide a clear, concise answer."
                }
            ],
            model="openai/gpt-oss-120b",  # Your working model
            max_tokens=600,
            temperature=0.7
        )

        return response.choices[0].message.content
    except Exception as e:
        return f"Error generating response: {str(e)}"

def chatbot_response(user_message):
    """Single chatbot turn"""

    try:
        print(f"\n👤 User: {user_message}")

        # Retrieve context
        context_results = query_multimodal(user_message, top_k=3)

        # Generate response
        bot_answer = generate_response_groq(user_message, context_results)

        # Add source information
        sources = "**📌 Sources Used:**\n"
        if context_results['text']['documents']:
            sources += f"- {len(context_results['text']['documents'])} text sections\n"
        if context_results['tables']['documents']:
            sources += f"- {len(context_results['tables']['documents'])} tables\n"
        if context_results['images']['documents']:
            sources += f"- {len(context_results['images']['documents'])} images\n"

        full_response = bot_answer + "\n\n---\n" + sources

        return full_response

    except Exception as e:
        return f"❌ Error: {str(e)}\n\nPlease try again or rephrase your question."

def create_gradio_interface():
    """Create Gradio chatbot interface"""

    with gr.Blocks(theme=gr.themes.Soft(), title="🤖 Multimodal RAG Chatbot") as demo:

        # Header
        gr.Markdown("""
        # 🤖 Multimodal RAG Chatbot with Groq

        Ask questions about your PDF documents. I understand **text**, **tables**, and **images**!

        ### Features:
        - 📄 Retrieves relevant text sections
        - 📊 Finds related tables and data
        - 🖼️ Identifies relevant images
        - 🧠 Generates intelligent answers with Groq AI
        - ⚡ Lightning-fast responses
        """)

        # Chat interface
        with gr.Row():
            with gr.Column(scale=1):
                chatbot = gr.Chatbot(
                    label="💬 Chat History",
                    height=500,
                    show_copy_button=True,
                    show_share_button=False
                )

        # Input area
        with gr.Row():
            with gr.Column(scale=5):
                msg = gr.Textbox(
                    label="Ask a question...",
                    placeholder="What would you like to know about the document?",
                    lines=2,
                    max_lines=5
                )
            with gr.Column(scale=1):
                submit_btn = gr.Button("📤 Send", variant="primary", size="lg")

        # Example questions
        gr.Examples(
            examples=[
                "What are the main findings in this document?",
                "Summarize the key data from the tables",
                "What information do the images contain?",
                "What are the main conclusions?",
                "Can you highlight the important statistics?"
            ],
            inputs=msg,
            label="💡 Example Questions to Try"
        )

        # Instructions
        gr.Markdown("""
        ### How It Works:
        1. **📥 Input** - Ask a question about your document
        2. **🔍 Retrieve** - System searches for relevant content
        3. **🧠 Augment** - Combines text, tables, and images
        4. **✨ Generate** - AI creates an intelligent answer

        ### Tips:
        - Ask specific questions for better results
        - Use keywords from the document
        - You can ask follow-up questions
        """)

        # Connect functions
        def chat_turn(message, history):
            """Process one chat turn"""
            response = chatbot_response(message)
            history.append((message, response))
            return history, ""

        submit_btn.click(
            fn=chat_turn,
            inputs=[msg, chatbot],
            outputs=[chatbot, msg],
            queue=True
        )

        msg.submit(
            fn=chat_turn,
            inputs=[msg, chatbot],
            outputs=[chatbot, msg],
            queue=True
        )

    return demo

# Launch the chatbot
print("\n" + "="*80)
print("🚀 LAUNCHING GRADIO CHATBOT INTERFACE")
print("="*80)
print("\n✓ Creating interface...")

demo = create_gradio_interface()

print("✓ Opening chatbot...")
print("\n📍 The chatbot is now live! Share the link or keep it local.\n")

demo.launch(share=True, debug=False)



🚀 LAUNCHING GRADIO CHATBOT INTERFACE

✓ Creating interface...


  chatbot = gr.Chatbot(


✓ Opening chatbot...

📍 Your chatbot is now live! Share the link or keep it local.

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://96e784f5c0bdb7e1ae.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


