<a href="https://colab.research.google.com/github/RamyaAL3362/skilldevelopment/blob/main/multimodal_RAG_with_langchain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Install all necessary packages
!pip install langchain sentence-transformers faiss-cpu pytesseract pillow transformers gradio
!pip install -U langchain-community

# Import necessary libraries
import pytesseract
from PIL import Image
import faiss
import gradio as gr
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.docstore.document import Document
from langchain.text_splitter import CharacterTextSplitter
from transformers import pipeline
from sentence_transformers import SentenceTransformer
import torch

# 🔍 Step 1: Extract text from image using OCR
def extract_text_from_image(image: Image.Image) -> str:
    text = pytesseract.image_to_string(image)
    return text

# 🧠 Step 2: Initialize all models (embedding + QA model)
def setup_models():
    # Embedding model from Sentence Transformers (No API key needed)
    embedding_model_name = "all-MiniLM-L6-v2"
    embedder = HuggingFaceEmbeddings(model_name=embedding_model_name)

    # QA Model (Local Hugging Face model)
    hf_qa_pipeline = pipeline(
        "question-answering",
        model="distilbert-base-uncased-distilled-squad",
        tokenizer="distilbert-base-uncased",
        device=0 if torch.cuda.is_available() else -1
    )
    llm = HuggingFacePipeline(pipeline=hf_qa_pipeline)

    return embedder, llm

# 📦 Step 3: Create FAISS Vector Store from OCR’d text
def create_vectorstore_from_text(text: str, embedder):
    # Split text into chunks
    splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    chunks = splitter.split_text(text)
    docs = [Document(page_content=chunk) for chunk in chunks]

    # Create FAISS vector store
    vectorstore = FAISS.from_documents(docs, embedder)
    return vectorstore

# 🧩 Step 4: Complete RAG Chain
def run_rag_pipeline(image, question):
    # OCR Text
    extracted_text = extract_text_from_image(image)

    if not extracted_text.strip():
        return "❌ No text detected in the image."

    # Setup models
    embedder, llm = setup_models()

    # Vector store
    vs = create_vectorstore_from_text(extracted_text, embedder)

    # LangChain RetrievalQA
    rag_chain = RetrievalQA.from_chain_type(llm=llm, retriever=vs.as_retriever())

    # Run query
    response = rag_chain.run(question)

    return f"**Answer:** {response}"

# 🖼️ + 💬 Gradio UI
image_input = gr.Image(type="pil", label="Upload Image with Text")
question_input = gr.Textbox(label="Ask a Question")
output = gr.Markdown()

gr.Interface(
    fn=run_rag_pipeline,
    inputs=[image_input, question_input],
    outputs=output,
    title="🔍 Multimodal RAG with LangChain (No API)",
    description="Upload an image containing text and ask a question. Uses OCR + LangChain + FAISS. No API keys needed!"
).launch()


Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting pytesseract
  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Collecting gradio
  Downloading gradio-5.25.2-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-

