#!/usr/bin/env python3
"""
Complete setup for Phi3:mini + OCR hybrid multimodal RAG
Memory efficient solution for 16GB systems
"""

In [4]:
"""
chat_app.py
100% local: Ollama (requests) + LlamaIndex (retriever) + HuggingFace embeddings + Streamlit UI
No 'openai' package required. No API keys.
"""

import os
import json
import requests
from pathlib import Path
import streamlit as st

# LlamaIndex imports (retriever + embeddings only)
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# ----------------------------
# Safety: remove any OPENAI env var so nothing falls back accidentally
# ----------------------------
os.environ.pop("OPENAI_API_KEY", None)

# ----------------------------
# Config
# ----------------------------
OLLAMA_BASE_URL = "http://localhost:11434"  # Ollama server
OLLAMA_CHAT_ENDPOINT = f"{OLLAMA_BASE_URL}/v1/chat/completions"
DEFAULT_MODEL = "phi3:mini"
DATA_DIR = Path("data")
DATA_DIR.mkdir(exist_ok=True)

# ----------------------------
# Ollama client (requests)
# ----------------------------
def ollama_chat(messages, model=DEFAULT_MODEL, stream: bool = False, timeout: int = 300):
    """
    messages: list of {"role": "user"/"system"/"assistant", "content": "..."}
    If stream=True, yields text chunks (as they arrive).
    If stream=False, returns the full string.
    """
    payload = {"model": model, "messages": messages, "stream": stream}
    headers = {"Content-Type": "application/json"}

    resp = requests.post(OLLAMA_CHAT_ENDPOINT, headers=headers, json=payload, stream=stream, timeout=timeout)
    resp.raise_for_status()

    if stream:
        for raw_line in resp.iter_lines(decode_unicode=True):
            if not raw_line:
                continue
            line = raw_line.strip()
            if line.startswith("data:"):
                line = line[len("data:"):].strip()
            if not line or line == "[DONE]":
                continue
            try:
                chunk = json.loads(line)
                delta = chunk.get("choices", [])[0].get("delta", {})
                content_piece = delta.get("content")
                if content_piece:
                    yield content_piece
            except json.JSONDecodeError:
                continue
        return
    else:
        data = resp.json()
        return data["choices"][0]["message"]["content"]

# ----------------------------
# Build / load index
# ----------------------------
def build_index(data_dir: Path = DATA_DIR, embed_model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
    """
    Builds a VectorStoreIndex from files in data_dir using HuggingFace embeddings only.
    """
    Settings.embed_model = HuggingFaceEmbedding(model_name=embed_model_name, device="cpu")

    reader = SimpleDirectoryReader(str(data_dir))
    docs = reader.load_data()
    if not docs:
        return None
    return VectorStoreIndex.from_documents(docs)

# ----------------------------
# Node -> text extractor
# ----------------------------
def node_to_text(node) -> str:
    """Try to extract readable text from LlamaIndex node types."""
    try:
        return node.get_content()
    except Exception:
        pass
    try:
        inner = getattr(node, "node", None)
        if inner and hasattr(inner, "get_content"):
            return inner.get_content()
    except Exception:
        pass
    try:
        return getattr(node, "text", None) or getattr(node, "get_text", lambda: None)() or str(node)
    except Exception:
        return str(node)

# ----------------------------
# Streamlit UI
# ----------------------------
st.set_page_config(page_title="Local PDF Q&A (Ollama only)", layout="wide")
st.title("ðŸ“š Local PDF Q&A â€” Ollama only (no OpenAI)")

# Sidebar: upload & index
with st.sidebar:
    st.header("Index / Data")
    uploaded = st.file_uploader("Upload PDFs / text files", accept_multiple_files=True)

    if uploaded:
        for f in uploaded:
            dest = DATA_DIR / f.name
            with open(dest, "wb") as out:
                out.write(f.getbuffer())
        st.success(f"Saved {len(uploaded)} files. Rebuild index below.")

        if "index" in st.session_state:
            st.session_state.pop("index")

    if st.button("Rebuild index now"):
        if "index" in st.session_state:
            st.session_state.pop("index")
        st.experimental_rerun()

    st.write("---")
    model_choice = st.text_input("Ollama model name", value=DEFAULT_MODEL)
    if model_choice:
        DEFAULT_MODEL = model_choice

# Build index if needed
if "index" not in st.session_state:
    with st.spinner("Building index from ./data..."):
        idx = build_index(DATA_DIR)
        st.session_state["index"] = idx

index = st.session_state.get("index")
retriever = index.as_retriever(similarity_top_k=3) if index else None

# Chat state
if "messages" not in st.session_state:
    st.session_state["messages"] = []

for m in st.session_state["messages"]:
    with st.chat_message(m["role"]):
        st.markdown(m["content"])

# Chat input
prompt = st.chat_input("Ask something about your PDFs...")

if prompt:
    st.session_state["messages"].append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.markdown(prompt)

    retrieved_text = ""
    if retriever:
        try:
            nodes = retriever.retrieve(prompt)
            pieces = [node_to_text(n).strip() for n in nodes if node_to_text(n).strip()]
            retrieved_text = "\n\n".join(pieces[:6])
        except Exception as e:
            st.error(f"Retrieval error: {e}")

    system_msg = {
        "role": "system",
        "content": "You are a helpful assistant. Use the context to answer. If missing, say you don't know."
    }
    user_with_context = {
        "role": "user",
        "content": f"Context:\n\n{retrieved_text}\n\nQuestion: {prompt}"
    }

    with st.chat_message("assistant"):
        placeholder = st.empty()
        assistant_text = ""
        try:
            for chunk in ollama_chat([system_msg, user_with_context], model=DEFAULT_MODEL, stream=True):
                assistant_text += chunk
                placeholder.markdown(assistant_text + "â–Œ")
            placeholder.markdown(assistant_text)
        except Exception as e:
            st.error(f"Ollama error: {e}")
            assistant_text = f"(error) {e}"

    st.session_state["messages"].append({"role": "assistant", "content": assistant_text})





2025-09-19 13:13:31.730 Session state does not function when running a script without `streamlit run`
