In [None]:
pip install torch langchain langgraph transformers sentence-transformers faiss-cpu pillow diffusers

In [None]:
!pip install -q gradio langchain langgraph transformers sentence-transformers faiss-cpu pillow diffusers pyngrok torch
!pip install torch langchain langgraph transformers sentence-transformers faiss-cpu pillow diffusers
!pip install langchain-community
!pip install bitsandbytes
!pip install mistral_inference

import os
os.environ["HUGGING_FACE_HUB_TOKEN"] = ""
import gradio as gr
from PIL import Image
from typing import List, Tuple
from pyngrok import ngrok
from langchain_core.messages import HumanMessage, AIMessage
from langchain.memory import ConversationBufferMemory
from langchain_core.prompts import ChatPromptTemplate
import torch
from langchain_community.llms import HuggingFacePipeline
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langgraph.graph import END, MessageGraph
from huggingface_hub import snapshot_download
from pathlib import Path

mistral_models_path = Path.home().joinpath('mistral_models', '7B-Instruct-v0.3')
mistral_models_path.mkdir(parents=True, exist_ok=True)

snapshot_download(repo_id="mistralai/Mistral-7B-Instruct-v0.3", allow_patterns=["params.json", "consolidated.safetensors", "tokenizer.model.v3"], local_dir=mistral_models_path)
from transformers import pipeline

class EnhancedChatBot:
    # def __init__(self, ...):
        # self.translator = pipeline("translation",
                                #  model="Helsinki-NLP/opus-mt-ur-en",
                                #  device=0 if torch.cuda.is_available() else -1)

    def translate_urdu(self, text: str) -> str:
        return self.translator(text)[0]['translation_text']
class EnhancedChatBot:
    def __init__(self, language: str = "english"):
        self.language = language
        self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
        self.setup_models()
        self.setup_rag()
        self.setup_langgraph()

    def setup_models(self):
        # Text model with 4-bit quantization for Colab memory efficiency
        from transformers import BitsAndBytesConfig
        quant_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.float16
        )

        self.text_llm = HuggingFacePipeline.from_model_id(
            model_id="mistralai/Mistral-7B-Instruct-v0.3",
            task="text-generation",
            device_map="auto",
            model_kwargs={
                "quantization_config": quant_config,
                "max_length": 2048
            }
        )

        # Image model with smaller footprint
        from transformers import BlipProcessor, BlipForConditionalGeneration
        self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
        self.image_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

    def setup_rag(self):
        from langchain_community.vectorstores import FAISS
        from langchain_community.embeddings import HuggingFaceEmbeddings

        self.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
        self.vector_store = FAISS.from_texts(
            ["Sample AI information", "Example image context"],
            embedding=self.embeddings
        )
        self.retriever = self.vector_store.as_retriever()

    def setup_langgraph(self):
        from langgraph.graph import MessageGraph
        self.workflow = MessageGraph()

        def router(state: List):
            last_msg = state[-1]
            if isinstance(last_msg, HumanMessage):
                return "process_input"
            return END

        self.workflow.add_node("process_input", self.process_input)
        self.workflow.set_entry_point("router")
        self.workflow.add_conditional_edges("router", router)
        self.workflow.add_edge("process_input", END)

    def process_input(self, messages: List) -> AIMessage:
        user_input = messages[-1].content
        image_path = getattr(messages[-1], "image_path", None)

        # Process image if attached
        image_context = ""
        if image_path:
            image_context = self.process_image(image_path)
            os.remove(image_path)  # Cleanup temp file

        # Generate response with memory
        prompt = self.build_prompt(user_input, image_context)
        response = self.text_llm.invoke(prompt)

        # Update memory
        self.memory.save_context({"input": user_input}, {"output": response})
        return AIMessage(content=response)

    def build_prompt(self, input_text: str, image_context: str = "") -> str:
        history = self.memory.load_memory_variables({})["chat_history"]
        context = "\n".join([d.page_content for d in self.retriever.get_relevant_documents(input_text)])

        return f"""System: You are a helpful assistant. Use this context:
        {context}
        {image_context}

        Conversation History:
        {history}

        User ({self.language}): {input_text}
        Assistant:"""

    def process_image(self, image_path: str) -> str:
        try:
            image = Image.open(image_path).convert('RGB')
            inputs = self.processor(image, return_tensors="pt").to("cuda")
            out = self.image_model.generate(**inputs)
            return self.processor.decode(out[0], skip_special_tokens=True)
        except Exception as e:
            return f"Image processing error: {str(e)}"

def gradio_interface():
    bot = EnhancedChatBot()

    def respond(message: str, image: Image.Image, history: List[Tuple], language: str):
        bot.language = language

        # Process input
        if image:
            image_path = "/tmp/uploaded_image.jpg"
            image.save(image_path)
            response = bot.workflow.invoke([HumanMessage(content=message, image_path=image_path)])
        else:
            response = bot.workflow.invoke([HumanMessage(content=message)])

        history.append(((message, image), response.content))
        return history, None, None

    with gr.Blocks(theme=gr.themes.Soft()) as demo:
        gr.Markdown("# MultiModal ChatBot 🌐🤖")

        with gr.Row():
            language = gr.Dropdown(["english", "roman_urdu"], label="Language")
            gr.Markdown("[[ Need help? ](https://example.com/docs)]")

        chatbot = gr.Chatbot(height=500)
        msg = gr.Textbox(label="Your Message")
        image = gr.Image(type="pil", label="Upload Image")
        btn = gr.Button("Send")

        btn.click(respond, [msg, image, chatbot, language], [chatbot, msg, image])

    return demo

# Colab deployment
if __name__ == "__main__":
    # Set ngrok token (create free account at https://ngrok.com)
    NGROK_TOKEN = ""  # Replace with your token
    ngrok.set_auth_token(NGROK_TOKEN)

    # Launch Gradio app
    demo = gradio_interface()
    public_url = ngrok.connect(7860).public_url
    print(f"Public URL: {public_url}")
    demo.launch(server_port=7861, share=True)

In [None]:
from transformers import pipeline

class EnhancedChatBot:
    def __init__(self, ...):
        self.translator = pipeline("translation",
                                 model="Helsinki-NLP/opus-mt-ur-en",
                                 device=0 if torch.cuda.is_available() else -1)

    def translate_urdu(self, text: str) -> str:
        return self.translator(text)[0]['translation_text']

In [None]:
def build_prompt(self, ...):
    if self.language == "roman_urdu":
        input_text = self.translate_urdu(input_text)
    # rest of the prompt building

Key improvements for Colab deployment:

Memory Optimization:

4-bit model quantization for reduced memory usage

Automatic image cleanup after processing

Efficient GPU memory management

Conversation Memory:

Integrated LangChain memory buffer

Full conversation history in prompts

Session-aware responses

Production-Ready Features:

Ngrok tunneling for public access

Error handling for image processing

Clean UI with responsive components

Language switching during conversation

Colab-Specific Adjustments:

Reduced model precision for better resource usage

Temporary file handling

Graceful error recovery

Automatic dependency installation

To use:

Replace your_ngrok_token_here with your actual Ngrok token

Run the entire notebook

Wait for the public URL to appear (may take 1-2 minutes)

Share the URL with users or test directly

UI Features:

Dual language support (toggle during conversation)

Image drag-and-drop interface

Conversation history scrollback

Responsive mobile-friendly design

Clear error messages

Recommended Colab Setup:

Use T4 GPU or higher

Allocate at least 12GB RAM

Enable high-RAM mode (Runtime > Change runtime type)
