# 30 - Chat: RAG-assisted chat loop using Ollama


In [1]:
# Cell A - env, import detection
from dotenv import load_dotenv
load_dotenv()
import os, json, re, time, traceback

PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_INDEX = os.getenv("PINECONE_INDEX","medical-knowledge")
PINECONE_ENV = os.getenv("PINECONE_ENV", None)
OLLAMA_URL = os.getenv("OLLAMA_URL","http://127.0.0.1:11434")
MODEL_NAME = os.getenv("MODEL_NAME","llama3")

print("Env loaded. Pinecone key present:", bool(PINECONE_API_KEY), "Index:", PINECONE_INDEX)

# LangChain detection (some installs vary)
HAS_LANGCHAIN = False
HAS_LC_COMMUNITY = False
HAS_LC_HF = False
HAS_STRUCT_PARSER = False
try:
    import langchain
    HAS_LANGCHAIN = True
    print("langchain version:", getattr(langchain,"__version__","?"))
except Exception:
    print("langchain not available")

# try huggingface adapter
try:
    from langchain_huggingface import HuggingFaceEmbeddings as LC_HF_Emb
    HAS_LC_HF = True
    print("langchain_huggingface available")
except Exception:
    try:
        from langchain_community.embeddings import HuggingFaceEmbeddings as LC_HF_Emb
        HAS_LC_COMMUNITY = True
        print("langchain_community embeddings available")
    except Exception:
        print("No langchain huggingface/community embeddings adapter available")

# structured parser detection
try:
    from langchain.output_parsers import StructuredOutputParser, ResponseSchema
    HAS_STRUCT_PARSER = True
    print("StructuredOutputParser available")
except Exception:
    print("StructuredOutputParser NOT available - will fallback to manual JSON extraction")

# sentence-transformers local embedding (fallback and for re-embedding)
from sentence_transformers import SentenceTransformer
embed_model = SentenceTransformer("all-MiniLM-L6-v2")
print("Loaded local embedder. Dim:", len(embed_model.encode("test")))


Env loaded. Pinecone key present: True Index: medical-knowledge
langchain version: 1.0.5
langchain_huggingface available
StructuredOutputParser NOT available - will fallback to manual JSON extraction
Loaded local embedder. Dim: 384


In [3]:
# CRC init (universal import + memory + chain creation)
import traceback
crc = None
memory = None

# Universal import attempts for ConversationalRetrievalChain
crc_imported = False
ConvChain = None
try:
    from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain
    ConvChain = ConversationalRetrievalChain
    crc_imported = True
    print("Imported CRC from langchain.chains")
except Exception:
    pass

if not crc_imported:
    try:
        from langchain_experimental.chains import ConversationalRetrievalChain
        ConvChain = ConversationalRetrievalChain
        crc_imported = True
        print("Imported CRC from langchain_experimental.chains")
    except Exception:
        pass

if not crc_imported:
    try:
        from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain
        ConvChain = ConversationalRetrievalChain
        crc_imported = True
        print("Imported CRC from legacy location")
    except Exception:
        pass

if not crc_imported:
    print("Could not import ConversationalRetrievalChain from known locations. CRC will not be available.")
else:
    # build memory
    try:
        from langchain.memory import ConversationBufferMemory
        memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
        print("ConversationBufferMemory ready")
    except Exception as e:
        print("Could not import ConversationBufferMemory:", e)
        memory = None

    # prepare a retriever object for the chain (use LangChain retriever if present else wrap manual)
    chain_retriever = None
    try:
        if 'retriever' in globals() and retriever is not None:
            chain_retriever = retriever
            print("Using existing LangChain retriever for CRC")
        else:
            # simple wrapper exposing get_relevant_documents()
            class SimpleRetriever:
                def __init__(self, k=3): self.k = k
                def get_relevant_documents(self, query):
                    ctx = retrieve_context_manual(query, k=self.k)
                    class D: 
                        def __init__(self, text): self.page_content = text
                    return [D(ctx)] if ctx else []
            chain_retriever = SimpleRetriever(k=3)
            print("Using SimpleRetriever wrapper (manual retrieval) for CRC")
    except Exception as e:
        print("Error preparing retriever:", e)
        chain_retriever = None

    # instantiate CRC
    try:
        if ConvChain is not None and ollama_llm is not None and chain_retriever is not None:
            crc = ConvChain.from_llm(llm=ollama_llm, retriever=chain_retriever, memory=memory, return_source_documents=False, verbose=False)
            print("ConversationalRetrievalChain initialized successfully.")
        else:
            print("Missing components for CRC (ConvChain/LLM/retriever). CRC not created.")
    except Exception as e:
        print("Failed to construct CRC:", e)
        traceback.print_exc()

# at the end we have: crc (chain or None), memory (or None)
print("crc:", "available" if crc else "None", "| memory:", "available" if memory else "None")


Could not import ConversationalRetrievalChain from known locations. CRC will not be available.
crc: None | memory: None


In [2]:
# Cell B - robust Pinecone initialization & retriever setup
index = None
pc = None
try:
    from pinecone import Pinecone
    pc = Pinecone(api_key=PINECONE_API_KEY)
    index = pc.Index(PINECONE_INDEX)
    print("Connected via Pinecone() client -> Index OK")
except Exception:
    try:
        import pinecone
        try:
            if PINECONE_ENV:
                pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)
            else:
                pinecone.init(api_key=PINECONE_API_KEY)
        except Exception:
            pass
        index = pinecone.Index(PINECONE_INDEX)
        pc = pinecone
        print("Connected via classic pinecone.init -> Index OK")
    except Exception as e:
        print("Failed to init Pinecone:", e)
        raise

# Build LangChain retriever if available
retriever = None
USE_RETRIEVER = "manual"
try:
    if HAS_LC_HF:
        lc_emb = LC_HF_Emb(model_name="all-MiniLM-L6-v2")
        from langchain_community.vectorstores import Pinecone as LC_Pinecone
        vectorstore = LC_Pinecone.from_existing_index(index_name=PINECONE_INDEX, embedding=lc_emb)
        retriever = vectorstore.as_retriever(search_kwargs={"k":3})
        USE_RETRIEVER = "langchain"
        print("Using langchain-community Pinecone retriever")
except Exception as e:
    print("LangChain retriever init failed or not available:", e)
    USE_RETRIEVER = "manual"

# Manual retriever fallback
def retrieve_context_manual(query, k=3):
    qvec = embed_model.encode(query).tolist()
    res = index.query(vector=qvec, top_k=k, include_metadata=True)
    matches = res.get("matches", [])
    texts = [m.get("metadata",{}).get("text","") for m in matches]
    return "\n\n".join(texts)

print("Retriever mode:", USE_RETRIEVER)


Connected via Pinecone() client -> Index OK
LangChain retriever init failed or not available: module 'pinecone' has no attribute 'Index'
Retriever mode: manual


In [4]:
# Cell C - robust call_ollama + LangChain LLM wrapper
import requests, json, re, html
OLLAMA_API = OLLAMA_URL.rstrip('/') + "/api/generate"

def call_ollama(prompt, max_tokens=512, temperature=0.0, timeout=60):
    payload = {"model": MODEL_NAME, "prompt": prompt, "max_tokens": max_tokens, "temperature": temperature, "stream": False}
    r = requests.post(OLLAMA_API, json=payload, timeout=timeout)
    text = (r.text or "").strip()
    # try JSON decode
    try:
        data = r.json()
        if isinstance(data, dict):
            if "response" in data: return data["response"]
            if "output" in data:
                out = data["output"]
                if isinstance(out, list): return "".join(p.get("content","") if isinstance(p,dict) else str(p) for p in out)
                return str(out)
    except Exception:
        pass
    # NDJSON fallback: collect 'response' lines
    out = ""
    for ln in text.splitlines():
        ln = ln.strip()
        if not ln: continue
        try:
            obj = json.loads(ln)
            if isinstance(obj, dict) and obj.get("response"):
                out += str(obj.get("response"))
        except Exception:
            continue
    if out.strip(): return out.strip()
    return text

# LangChain LLM wrapper (lightweight)
try:
    from langchain.llms.base import LLM
    from typing import Optional, List, Mapping, Any
    class OllamaLLM(LLM):
        def __init__(self, model_name: str = MODEL_NAME, temperature: float = 0.0):
            self.model_name = model_name
            self.temperature = temperature
        @property
        def _identifying_params(self) -> Mapping[str,Any]:
            return {"model": self.model_name}
        @property
        def _llm_type(self) -> str:
            return "ollama"
        def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
            return call_ollama(prompt, max_tokens=512, temperature=self.temperature)
    ollama_llm = OllamaLLM()
    print("OllamaLLM wrapper ready for LangChain.")
except Exception as e:
    print("Could not create LangChain LLM wrapper:", e)
    ollama_llm = None


Could not create LangChain LLM wrapper: No module named 'langchain.llms'


In [12]:
# Cell D - Memory + CRC initialization (with fallback)
memory = None
crc = None
try:
    # try to import the chain (may require langchain-experimental or latest langchain)
    try:
        # modern path
        from langchain.chains import ConversationalRetrievalChain
    except Exception:
        from langchain_experimental.chains import ConversationalRetrievalChain

    from langchain.memory import ConversationBufferMemory
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    if ollama_llm is None:
        raise RuntimeError("Ollama LLM wrapper missing — CRC init skipped")
    # set up the chain using langchain retriever if available else wrap manual retriever
    if USE_RETRIEVER == "langchain" and retriever is not None:
        chain_retriever = retriever
    else:
        # define a tiny wrapper class that exposes get_relevant_documents()
        class SimpleRetriever:
            def __init__(self, k=3):
                self.k = k
            def get_relevant_documents(self, query):
                # return simple objects with page_content property (LangChain expects Document-like)
                ctx = retrieve_context_manual(query, k=self.k)
                class D:
                    def __init__(self, text): self.page_content = text
                return [D(ctx)] if ctx else []
        chain_retriever = SimpleRetriever(k=3)

    crc = ConversationalRetrievalChain.from_llm(
        llm=ollama_llm,
        retriever=chain_retriever,
        memory=memory,
        return_source_documents=False,
        verbose=False
    )
    print("ConversationalRetrievalChain (CRC) initialized.")
except Exception as e:
    print("CRC initialization failed (will fallback to manual loop):", e)
    crc = None


CRC initialization failed (will fallback to manual loop): No module named 'langchain_experimental.chains'


In [5]:
# Cell 7 - Structured output parser or manual format instructions
parser = None
format_instructions = None
try:
    from langchain.output_parsers import StructuredOutputParser, ResponseSchema
    schemas = [
        ResponseSchema(name="symptoms", description="List of reported symptoms"),
        ResponseSchema(name="duration", description="Onset/duration (e.g., '3 days')"),
        ResponseSchema(name="severity", description="mild/moderate/severe"),
        ResponseSchema(name="current_medication", description="list or empty"),
        ResponseSchema(name="allergies", description="list or empty"),
        ResponseSchema(name="urgency", description="low/medium/high"),
        ResponseSchema(name="notes", description="short notes")
    ]
    parser = StructuredOutputParser.from_response_schemas(schemas)
    format_instructions = parser.get_format_instructions()
    print("Using LangChain StructuredOutputParser.")
except Exception:
    format_instructions = (
        "Output ONLY a single valid JSON object matching this schema:\n"
        "{\n"
        "  \"symptoms\": [\"string\", ...],\n"
        "  \"duration\": \"string\",\n"
        "  \"severity\": \"mild/moderate/severe\",\n"
        "  \"current_medication\": [\"string\", ...],\n"
        "  \"allergies\": [\"string\", ...],\n"
        "  \"urgency\": \"low/medium/high\",\n"
        "  \"notes\": \"string\"\n"
        "}\nDo not output any extra explanatory text — ONLY the JSON."
    )
    print("Using manual JSON format instructions fallback.")


Using manual JSON format instructions fallback.


In [None]:
# Cell 8 - ask_next_question: retrieves context then asks a single follow-up question
def ask_next_question(user_text, k=3):
    if USE_RETRIEVER == "langchain" and retriever is not None:
        try:
            docs = retriever.get_relevant_documents(user_text)
            ctx = "\n\n".join([d.page_content for d in docs])
        except Exception as e:
            print("LangChain retriever failed:", e)
            ctx = retrieve_context_manual(user_text, k=k)
    else:
        ctx = retrieve_context_manual(user_text, k=k)

    prompt = (
        "You are a concise clinical triage assistant. Using ONLY the context below, "
        "ask ONE concise follow-up question that will help clarify the patient's symptoms. "
        "Do NOT provide a diagnosis or recommendations—only a question.\n\n"
        f"Context:\n{ctx}\n\nPatient statement:\n{user_text}\n\nAsk one short question:"
    )
    return call_ollama(prompt, max_tokens=200)

# quick test (uncomment to run)
# print(ask_next_question('I have chest pain and breathlessness since morning.'))
print("ask_next_question ready")


ask_next_question ready


In [7]:
# Cell 9 - interactive or simulated multi-turn conversation and final JSON extraction
chat_transcript = []

# initial patient statement - edit here or use input()
user_input = "I have had chest pain and mild breathlessness since morning."
print("Patient:", user_input)
chat_transcript.append(f"Patient: {user_input}")

# perform 3 follow-up rounds (edit number if you want)
for i in range(3):
    bot_q = ask_next_question(user_input, k=3)
    print("Bot:", bot_q)
    chat_transcript.append(f"Bot: {bot_q}")

    # in interactive notebook you can type; in non-interactive use defaults
    try:
        answer = input("Patient reply (press Enter to use default simulated reply): ").strip()
    except Exception:
        answer = ""
    if not answer:
        if i == 0:
            answer = "The pain started this morning and is sharp when I breathe."
        elif i == 1:
            answer = "I feel dizzy sometimes and a bit nauseous."
        else:
            answer = "I take no regular medicines and have no known allergies."
    print("Patient reply:", answer)
    chat_transcript.append(f"Patient: {answer}")
    user_input = answer

conversation_text = "\n".join(chat_transcript)
print("\n--- Conversation ---\n", conversation_text)
# build final prompt
final_prompt = f"{format_instructions}\n\nConversation:\n{conversation_text}\n\nNow output ONLY the JSON."
raw_report = call_ollama(final_prompt, max_tokens=700)
print("\nRaw model output preview:\n", raw_report[:1000])


Patient: I have had chest pain and mild breathlessness since morning.
Bot: Is your chest pain constant or does it come and go?
Patient reply: its a constant pain
Bot: Where is this pain located?
Patient reply: near chest
Bot: Is it a sharp or dull sensation?
Patient reply: sharp

--- Conversation ---
 Patient: I have had chest pain and mild breathlessness since morning.
Bot: Is your chest pain constant or does it come and go?
Patient: its a constant pain
Bot: Where is this pain located?
Patient: near chest
Bot: Is it a sharp or dull sensation?
Patient: sharp

Raw model output preview:
 {
  "symptoms": ["chest pain", "mild breathlessness"],
  "duration": "constant",
  "severity": "mild",
  "current_medication": [],
  "allergies": [],
  "urgency": "low",
  "notes": ""


In [8]:
# Cell 10 - parse structured JSON (try parser, else manual), triage and save
structured = {}
if parser is not None:
    try:
        structured = parser.parse(raw_report)
        print("Parsed via LangChain parser:", structured)
    except Exception as e:
        print("LangChain parser failed:", e)

if not structured:
    # try to extract first JSON object in response
    m = re.search(r'(\{.*\})', raw_report, re.S)
    if m:
        try:
            structured = json.loads(m.group(1))
            print("Parsed via manual JSON load.")
        except Exception as e:
            print("Manual JSON parse failed:", e)
    else:
        print("No JSON object found in model output; storing raw as notes.")
        structured = {"symptoms": [], "duration": "", "severity": "", "current_medication": [], "allergies": [], "urgency": "", "notes": raw_report[:500]}

# ensure fields exist
for k in ["symptoms","duration","severity","current_medication","allergies","urgency","notes"]:
    if k not in structured:
        structured[k] = [] if k in ["symptoms","current_medication","allergies"] else ""

# simple triage rules
def triage_report(s):
    symptoms_text = " ".join(s.get("symptoms", [])) if isinstance(s.get("symptoms", []), list) else str(s.get("symptoms",""))
    s_lower = symptoms_text.lower()
    if any(w in s_lower for w in ["chest","breath","shortness","palpit"]):
        return {"specialist":"Cardiology/Emergency","urgency":"high"}
    if any(w in s_lower for w in ["rash","itch","lesion"]):
        return {"specialist":"Dermatology","urgency":"medium"}
    if any(w in s_lower for w in ["headache","dizzy","seizure"]):
        return {"specialist":"Neurology","urgency":"medium"}
    return {"specialist":"General Physician","urgency":"low"}

triage = triage_report(structured)
print("Triage result:", triage)

# save to outputs
os.makedirs("outputs", exist_ok=True)
out = {"conversation": chat_transcript, "structured": structured, "triage": triage}
with open("outputs/langchain_report.json","w",encoding="utf-8") as f:
    json.dump(out, f, indent=2, ensure_ascii=False)
print("Saved outputs/langchain_report.json")


No JSON object found in model output; storing raw as notes.
Triage result: {'specialist': 'General Physician', 'urgency': 'low'}
Saved outputs/langchain_report.json
