In [None]:
# System setup: APT packages, Ollama, CUDA drivers
!sudo apt-get update -y
!sudo apt-get install -y curl pciutils lsb-release
!curl -fsSL https://ollama.com/install.sh | sh

# Check GPU
!nvidia-smi || true

In [None]:
#  Python dependencies
%%bash
pip install -q \
  langchain-core \
  langchain-community \
  langchain-chroma \
  langchain-ollama \
  sentence-transformers \
  chromadb \
  gradio \
  pymupdf \
  "unstructured[docx]" \
  tqdm

In [None]:
#  Pull the LLM & start Ollama server
import subprocess, threading, requests, time, os

LLM_MODEL      = "mistral:7b"
EMBED_MODEL    = "nomic-embed-text"

def _serve():
    subprocess.Popen(["ollama", "serve"],
                     stdout=subprocess.DEVNULL,
                     stderr=subprocess.DEVNULL)

threading.Thread(target=_serve, daemon=True).start()

# Wait until Ollama REST endpoint is up
for _ in range(20):
    try:
        if requests.get("http://localhost:11434").ok:
            break
    except:
        time.sleep(1)
else:
    raise RuntimeError("❌ Ollama failed to start.")

!ollama pull {LLM_MODEL}
!ollama pull {EMBED_MODEL}

print("✅ Ollama ready with:", LLM_MODEL, "and", EMBED_MODEL)

In [5]:
# Paths & global objects
import warnings, uuid, shutil, tempfile, os
from pathlib import Path
from typing import List

# LangChain imports
from langchain_community.document_loaders import (
    PyPDFLoader, CSVLoader, TextLoader,
    UnstructuredMarkdownLoader, UnstructuredWordDocumentLoader
)
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_community.llms import Ollama
from langchain_ollama.embeddings import OllamaEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

# Embedder & splitter
EMBEDDER = OllamaEmbeddings(model="nomic-embed-text")
SPLITTER = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=64)

# Safe prompt: model must stick to context
SAFE_PROMPT = PromptTemplate(
    input_variables=["context", "question"],
    template=(
        "Use ONLY the context below to answer the question. "
        "If the answer is not in the context, reply exactly: "
        "'I don't know based on the provided documents.'\n\n"
        "Context:\n{context}\n\n"
        "Question: {question}\nAnswer:"
    )
)

warnings.filterwarnings("ignore")

In [6]:
#  Utility functions (read docs, build index, etc.)
def read_files_local(paths: List[Path]):
    """Return LangChain Docs from various file types."""
    docs = []
    for p in paths:
        try:
            if p.suffix == ".pdf":
                loader = PyPDFLoader(str(p))
            elif p.suffix == ".csv":
                loader = CSVLoader(str(p))
            elif p.suffix == ".txt":
                loader = TextLoader(str(p), encoding="utf-8")
            elif p.suffix == ".md":
                loader = UnstructuredMarkdownLoader(str(p))
            elif p.suffix == ".docx":
                loader = UnstructuredWordDocumentLoader(str(p))
            else:
                print(f"[!] Skipping unsupported: {p.name}")
                continue
            for d in loader.load():
                d.metadata["source"] = p.name
                docs.append(d)
        except Exception as e:
            print(f"[!] Failed on {p.name}: {e}")
    return docs

def build_vectorstore(docs, sid):
    chunks = SPLITTER.split_documents(docs)
    return Chroma.from_documents(
        chunks, embedding=EMBEDDER,
        collection_name=f"session-{sid}", persist_directory=None
    )

def make_qa_chain(vstore):
    retriever = vstore.as_retriever(search_type="mmr", search_kwargs={"k": 4})
    llm = Ollama(model=LLM_MODEL)
    return RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever,
        chain_type="stuff",
        return_source_documents=True,
        chain_type_kwargs={"prompt": SAFE_PROMPT}
    )

In [7]:
#  Create global QA chain (with safe prompt and source viewing)
import gradio as gr

def ingest_fn(state, files):
    if not files:
        return state, "⚠️ Please upload at least one file."

    sid = state.setdefault("id", str(uuid.uuid4()))
    tmp_dir = Path(tempfile.mkdtemp(prefix=f"u_{sid}_"))
    local_paths = []

    # Copy uploads for stable file paths
    for f in files:
        target = tmp_dir / Path(f.name).name
        shutil.copy(f.name, target) if hasattr(f, "name") else None
        local_paths.append(target)

    docs = read_files_local(local_paths)
    if not docs:
        return state, "⚠️ Could not parse the uploaded files."

    vstore = build_vectorstore(docs, sid)
    state["qa_chain"] = make_qa_chain(vstore)
    state["history"] = []
    return state, f"✅ Indexed {len(docs)} document(s). Ask away!"

def chat_fn(state, user_msg):
    user_msg = (user_msg or "").strip()
    hist = state.get("history", [])

    if not user_msg:
        return state, hist

    if "qa_chain" not in state:
        hist.append((user_msg, "⚠️ Upload docs first."))
        return state, hist

    hist.append((user_msg, "⏳ …thinking…"))
    yield state, hist

    try:
        res = state["qa_chain"].invoke({"query": user_msg})
        answer = res["result"]
    except Exception as e:
        answer = f"⚠️ {type(e).__name__}: {e}"

    hist[-1] = (user_msg, answer)
    yield state, hist

def clear_chat(state):
    state["history"] = []
    return state, []

In [None]:
#  Multi‑session / multi‑KB Gradio app
with gr.Blocks(title="RAG Chatbot (multi‑user)") as demo:
    gr.Markdown("## 📁 Upload docs & ask questions — isolated per session")

    with gr.Row():
        uploads = gr.Files(file_count="multiple", label="Upload files")
        idx_btn = gr.Button("Index documents", variant="primary")
    status = gr.Markdown()

    chatbot   = gr.Chatbot(label="Chatbot", height=430)
    user_box  = gr.Textbox(label="Your question", placeholder="Type and hit Enter…")
    clear_btn = gr.Button("Clear chat")

    session_state = gr.State({})

    idx_btn.click(
        ingest_fn, inputs=[session_state, uploads], outputs=[session_state, status]
    )
    user_box.submit(
        chat_fn, inputs=[session_state, user_box], outputs=[session_state, chatbot]
    )
    clear_btn.click(
        clear_chat, inputs=session_state, outputs=[session_state, chatbot]
    )

demo.queue().launch(share=True, debug=True)