In [7]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [8]:
import os
import zipfile

zip_path = "/content/drive/MyDrive/mimic-iv-ext-direct-1.0.0.zip"
dataset_root = "/content/mimic_data"

os.makedirs(dataset_root, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as z:
    z.extractall(dataset_root)

print("ZIP extracted.")

dataset_dir = os.path.join(dataset_root, "mimic-iv-ext-direct-1.0.0")
print("Dataset folder:", dataset_dir)


ZIP extracted.
Dataset folder: /content/mimic_data/mimic-iv-ext-direct-1.0.0


In [9]:
!pip install rarfile unrar
import rarfile
import os

dataset_dir = '/content/mimic_data/mimic-iv-ext-direct-1.0.0'
rar_path = os.path.join(dataset_dir, "samples.rar")
samples_dir = os.path.join(dataset_dir, "samples_extracted")
os.makedirs(samples_dir, exist_ok=True)

rarfile.UNRAR_TOOL = "unrar"

print("Extracting RAR...")
rf = rarfile.RarFile(rar_path)
rf.extractall(samples_dir)
rf.close()

print("RAR extracted successfully.")

Collecting rarfile
  Downloading rarfile-4.2-py3-none-any.whl.metadata (4.4 kB)
Collecting unrar
  Downloading unrar-0.4-py3-none-any.whl.metadata (3.0 kB)
Downloading rarfile-4.2-py3-none-any.whl (29 kB)
Downloading unrar-0.4-py3-none-any.whl (25 kB)
Installing collected packages: unrar, rarfile
Successfully installed rarfile-4.2 unrar-0.4
Extracting RAR...
RAR extracted successfully.


In [5]:
!find /content/mimic_data -maxdepth 4 -type f | head -n 200


/content/mimic_data/__MACOSX/._mimic-iv-ext-direct-1.0.0
/content/mimic_data/__MACOSX/mimic-iv-ext-direct-1.0.0/._.DS_Store
/content/mimic_data/__MACOSX/mimic-iv-ext-direct-1.0.0/Finished/._Migraine
/content/mimic_data/__MACOSX/mimic-iv-ext-direct-1.0.0/Finished/._Atrial Fibrillation
/content/mimic_data/__MACOSX/mimic-iv-ext-direct-1.0.0/Finished/._.DS_Store
/content/mimic_data/__MACOSX/mimic-iv-ext-direct-1.0.0/Finished/._Gastro-oesophageal Reflux Disease
/content/mimic_data/__MACOSX/mimic-iv-ext-direct-1.0.0/Finished/._Adrenal Insufficiency
/content/mimic_data/__MACOSX/mimic-iv-ext-direct-1.0.0/Finished/._Hypertension
/content/mimic_data/__MACOSX/mimic-iv-ext-direct-1.0.0/Finished/._Heart Failure
/content/mimic_data/__MACOSX/mimic-iv-ext-direct-1.0.0/Finished/._Stroke
/content/mimic_data/__MACOSX/mimic-iv-ext-direct-1.0.0/Finished/._Tuberculosis
/content/mimic_data/__MACOSX/mimic-iv-ext-direct-1.0.0/Finished/._Multiple Sclerosis
/content/mimic_data/__MACOSX/mimic-iv-ext-direct-1.0.0/

In [10]:
!find /content/mimic_data/mimic-iv-ext-direct-1.0.0/samples.rar -name "samples.rar"


/content/mimic_data/mimic-iv-ext-direct-1.0.0/samples.rar


In [11]:
!pip install -q sentence-transformers faiss-cpu transformers accelerate datasets streamlit pyngrok rouge-score

import sys, os
print("Python", sys.version)
try:
    import torch
    print("Torch:", torch.__version__, "cuda:", torch.cuda.is_available(), "device_count:", torch.cuda.device_count())
except Exception as e:
    print("Torch not available:", e)

ZIP_PATH = "/content/drive/MyDrive/mimic-iv-ext-direct-1.0.0.zip"
DATASET_DIR = "/content/mimic_data/mimic-iv-ext-direct-1.0.0"
SAMPLES_RAR = os.path.join(DATASET_DIR, "samples.rar")
SAMPLES_EXTRACTED = os.path.join(DATASET_DIR, "samples_extracted")

print("Expecting dataset zip at:", ZIP_PATH)
print("samples.rar path:", SAMPLES_RAR)
print("samples extracted at:", SAMPLES_EXTRACTED)


  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.6/23.6 MB[0m [31m48.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
Python 3.12.12 (main, Oct 10 2025, 08:52:57) [GCC 11.4.0]
Torch: 2.9.0+cu126 cuda: True device_count: 1
Expecting dataset zip at: /content/drive/MyDrive/mimic-iv-ext-direct-1.0.0.zip
samples.rar path: /content/mimic_data/mimic-iv-ext-direct-1.0.0/samples.rar
samples extracted at: /content/mimic_data/mimic-iv-ext-direct-1.0.0/samples_extracted


In [12]:
# Extract samples.rar if not already extracted
import os
if not os.path.exists(SAMPLES_EXTRACTED):
    os.makedirs(SAMPLES_EXTRACTED, exist_ok=True)
    try:
        import rarfile
        rarfile.UNRAR_TOOL = "unrar"
        rf = rarfile.RarFile(SAMPLES_RAR)
        rf.extractall(SAMPLES_EXTRACTED)
        rf.close()
        print("samples.rar extracted to", SAMPLES_EXTRACTED)
    except Exception as e:
        print("Failed to extract RAR automatically:", e)
        print("If extraction failed, upload samples_extracted to /content or extract locally and re-upload.")
else:
    print("samples_extracted already present.")


samples_extracted already present.


In [13]:
# Preprocessing: read files and build document list
import os, json, re
from pathlib import Path
from uuid import uuid4

DATA_ROOT = SAMPLES_EXTRACTED

# Helper: read candidate text files (json, .txt)
def read_text_file(path):
    try:
        with open(path, "rb") as f:
            raw = f.read()
        # try utf-8 then latin-1
        try:
            return raw.decode("utf-8")
        except:
            return raw.decode("latin-1")
    except Exception as e:
        return None

# Discover files
text_paths = []
for root, dirs, files in os.walk(DATA_ROOT):
    for fn in files:
        fn_lower = fn.lower()
        if fn_lower.endswith((".txt", ".md", ".json", ".csv")):
            text_paths.append(os.path.join(root, fn))

print(f"Found {len(text_paths)} candidate text files.")

# Load and create documents
documents = []
doc_id = 0

def chunk_text(text, chunk_size=800, overlap=100):
    words = text.split()
    i = 0
    chunks = []
    while i < len(words):
        chunk_words = words[i:i+chunk_size]
        chunks.append(" ".join(chunk_words))
        if i+chunk_size >= len(words):
            break
        i = max(0, i + chunk_size - overlap)
    return chunks

for path in text_paths:
    txt = read_text_file(path)
    if not txt:
        continue
    # If JSON, try to extract fields or flatten
    if path.lower().endswith(".json"):
        try:
            obj = json.loads(txt)
            # If it's a list of note objects, flatten
            if isinstance(obj, list):
                for item in obj:
                    body = None
                    if isinstance(item, dict):
                        for k in ("text", "note", "input", "input1", "input2", "input3", "content", "note_text"):
                            if k in item:
                                body = item[k]
                                break
                        if body is None:
                            body = json.dumps(item)
                    else:
                        body = str(item)
                    chunks = chunk_text(body)
                    for c in chunks:
                        documents.append({"id": str(uuid4()), "text": c, "meta": {"source": path}})
            elif isinstance(obj, dict):
                # flatten dict to get several fields
                content = []
                for k, v in obj.items():
                    if isinstance(v, str) and len(v) > 20:
                        content.append(f"{k}: {v}")
                if not content:
                    content = [json.dumps(obj)]
                body = "\n".join(content)
                chunks = chunk_text(body)
                for c in chunks:
                    documents.append({"id": str(uuid4()), "text": c, "meta": {"source": path}})
            else:
                # fallback
                chunks = chunk_text(str(obj))
                for c in chunks:
                    documents.append({"id": str(uuid4()), "text": c, "meta": {"source": path}})
        except Exception as e:
            # not JSON, treat as text
            chunks = chunk_text(txt)
            for c in chunks:
                documents.append({"id": str(uuid4()), "text": c, "meta": {"source": path}})
    else:
        # plain text or csv
        chunks = chunk_text(txt)
        for c in chunks:
            documents.append({"id": str(uuid4()), "text": c, "meta": {"source": path}})

print("Built", len(documents), "document chunks.")
# Save a lightweight manifest
import json
with open("/content/documents_manifest.json", "w", encoding="utf-8") as f:
    json.dump({"documents_count": len(documents)}, f)
print("Saved manifest /content/documents_manifest.json")


Found 511 candidate text files.
Built 552 document chunks.
Saved manifest /content/documents_manifest.json


In [14]:
# Build embeddings and FAISS index
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
import pickle
from tqdm import tqdm

EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
embedder = SentenceTransformer(EMBED_MODEL_NAME)

# compute embeddings in batches
texts = [d["text"] for d in documents]
batch_size = 64
embeddings = []
for i in tqdm(range(0, len(texts), batch_size), desc="Embedding batches"):
    batch = texts[i:i+batch_size]
    emb = embedder.encode(batch, convert_to_numpy=True, show_progress_bar=False)
    embeddings.append(emb)
embeddings = np.vstack(embeddings).astype("float32")
print("Embeddings shape:", embeddings.shape)

# Build FAISS index
d = embeddings.shape[1]
index = faiss.IndexFlatIP(d)
# normalize embeddings
faiss.normalize_L2(embeddings)
index.add(embeddings)
print("FAISS index built. N=", index.ntotal)

# Save index and docs mapping
faiss.write_index(index, "/content/faiss_index.index")
with open("/content/documents.pkl", "wb") as f:
    pickle.dump(documents, f)
print("Saved /content/faiss_index.index and /content/documents.pkl")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Embedding batches: 100%|██████████| 9/9 [00:05<00:00,  1.55it/s]

Embeddings shape: (552, 384)
FAISS index built. N= 552
Saved /content/faiss_index.index and /content/documents.pkl





In [15]:
# Retriever helper
def retrieve(query, k=5):
    q_emb = embedder.encode([query], convert_to_numpy=True).astype("float32")
    faiss.normalize_L2(q_emb)
    D, I = index.search(q_emb, k)
    results = []
    for score, idx in zip(D[0], I[0]):
        if idx < 0:
            continue
        doc = documents[idx]
        results.append({"id": doc["id"], "text": doc["text"], "meta": doc["meta"], "score": float(score)})
    return results

# Quick test
print("Test retrieve for 'shortness of breath and fever':")
res = retrieve("shortness of breath and fever", k=3)
for r in res:
    print(r["score"], r["meta"]["source"])
    print(r["text"][:200])
    print("-----")


Test retrieve for 'shortness of breath and fever':
0.5935130715370178 /content/mimic_data/mimic-iv-ext-direct-1.0.0/samples_extracted/Finished/Asthma/COPD Asthma/15798127-DS-19.json
input2: She is with history of asthma, anemia, MGUS, COPD, and prior CVA, who presented with shortness of breath. Patient reports that she first developed what she thought was a cold 3 days ago. She h
-----
0.57661372423172 /content/mimic_data/mimic-iv-ext-direct-1.0.0/samples_extracted/Finished/Heart Failure/18693746-DS-10.json
input1: shortness of breath and chest pain input2: old man presents with shortness of breath and chest pain. Pt states that two days ago he developed some CP pain and sob. He took ntg with resolution 
-----
0.5537760257720947 /content/mimic_data/mimic-iv-ext-direct-1.0.0/samples_extracted/Finished/Pneumonia/Bacterial Pneumonia/16147199-DS-4.json
input1: Chills, fatigue, cough input2: He is 61 yo retired colonel developed profound fatigue, myalgias, anorexia, and non-productive cough

In [16]:
# Load generator
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
GEN_MODEL = "google/flan-t5-large"

print("Loading generator model (this will take a bit)...")
tokenizer = AutoTokenizer.from_pretrained(GEN_MODEL)
model = AutoModelForSeq2SeqLM.from_pretrained(GEN_MODEL).to("cuda" if torch.cuda.is_available() else "cpu")
generator = pipeline("text2text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)

def build_prompt(query, retrieved_docs):
    header = (
        "You are a clinical assistant. Use ONLY the provided context to answer. "
        "If the context doesn't contain the answer, say 'Insufficient information in the provided records.'\n\n"
    )

    # APPLY TRUNCATION TO EACH RETRIEVED DOCUMENT
    context = ""
    for i, d in enumerate(retrieved_docs):
        short_text = truncate_text(d["text"], max_tokens=350)
        context += f"Document {i+1} (score={d['score']:.3f}) source={d['meta']['source']}:\n{short_text}\n\n---\n"

    prompt = f"{header}CONTEXT:\n{context}\nQUESTION: {query}\nANSWER:"
    return prompt


def truncate_text(text, max_tokens=350):
    tokens = text.split()
    if len(tokens) > max_tokens:
        tokens = tokens[:max_tokens]
    return " ".join(tokens)


def generate_answer(query, k=5, max_new_tokens=256):
    retrieved = retrieve(query, k=k)

    # Build prompt with truncated context
    prompt = build_prompt(query, retrieved)

    try:
        out = generator(prompt, max_new_tokens=max_new_tokens, do_sample=False)[0]["generated_text"]
    except:
        out = "Model input overflow. Context too large."

    return {"answer": out, "retrieved": retrieved, "prompt": prompt}


Loading generator model (this will take a bit)...


tokenizer_config.json: 0.00B [00:01, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Device set to use cuda:0


In [33]:
# Evaluation utilities for your RAG pipeline

from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge_score import rouge_scorer
import numpy as np

# If NLTK tokenizer missing:
import nltk
nltk.download("punkt")

#precision and recall
def evaluate_retrieval(query, ground_truth_keywords, k=5):
    """
    ground_truth_keywords: list of keywords that SHOULD appear in relevant documents.
    Example: ["pneumonia", "cough", "fever"]
    """
    retrieved_docs = retrieve(query, k=k)
    retrieved_texts = " ".join([d["text"].lower() for d in retrieved_docs])

    true_positives = sum(1 for kw in ground_truth_keywords if kw.lower() in retrieved_texts)
    precision = true_positives / k
    recall = true_positives / len(ground_truth_keywords)

    return {
        "precision": precision,
        "recall": recall,
        "true_positives": true_positives,
        "retrieved_docs": retrieved_docs
    }

def evaluate_generation(predicted, reference):
    """
    reference: a human-written "expected" summary or answer
    predicted: answer generated by your RAG system
    """

    # BLEU
    smoothie = SmoothingFunction().method4
    bleu = sentence_bleu(
        [reference.split()],
        predicted.split(),
        smoothing_function=smoothie
    )

    # ROUGE
    scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
    rougeL = scorer.score(reference, predicted)['rougeL'].fmeasure

    # Simple coherence metric = length-normalized semantic similarity
    # Embed both with your sentence-transformer model
    ref_emb = embedder.encode([reference], convert_to_numpy=True)
    pred_emb = embedder.encode([predicted], convert_to_numpy=True)
    coherence = float(np.dot(ref_emb, pred_emb.T) /
                      (np.linalg.norm(ref_emb) * np.linalg.norm(pred_emb)))

    return {
        "bleu": bleu,
        "rougeL": rougeL,
        "coherence": coherence
    }

def evaluate_rag(query, ground_truth_keywords, reference_answer, k=5):
    gen = generate_answer(query, k=k)
    predicted_answer = gen["answer"]

    ret_metrics = evaluate_retrieval(query, ground_truth_keywords, k=k)
    gen_metrics = evaluate_generation(predicted_answer, reference_answer)

    return {
        "query": query,
        "predicted_answer": predicted_answer,
        "retrieval_metrics": ret_metrics,
        "generation_metrics": gen_metrics,
        "retrieved_docs": gen["retrieved"]
    }


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [27]:
def rag_chat():
    print("\nClinical RAG Assistant (Terminal Mode)")
    print("Evaluation metrics will be computed for every answer.")
    print("Type 'exit' to quit.\n")

    while True:
        query = input("User: ")
        if query.lower() in ["exit", "quit"]:
            print("Goodbye!")
            break

        #Generate Answer
        response = generate_answer(query, k=4)
        answer = response["answer"]

        print("\nAssistant:", answer)

        # Auto-reference for evaluation
        # If no reference is provided, we approximate:
        reference = " ".join([d["text"] for d in response["retrieved"]])[:500]

        # Retrieval evaluation
        gt_keywords = query.lower().split()
        retrieval_eval = evaluate_retrieval(query, gt_keywords, k=4)

        #Generation evaluation
        generation_eval = evaluate_generation(answer, reference)

        print("\nTop retrieved sources:")
        for r in response["retrieved"]:
            print(" -", r["meta"]["source"])

        print("\nEvaluation Metrics")
        print("--------------------------------------------")
        print("Precision:", round(retrieval_eval["precision"], 4))
        print("Recall:   ", round(retrieval_eval["recall"], 4))
        print("BLEU Score:          ", round(generation_eval["bleu"], 4))
        print("ROUGE-L Score:       ", round(generation_eval["rougeL"], 4))
        print("Coherence:           ", round(generation_eval["coherence"], 4))
        print("--------------------------------------------\n")
        print("-" * 60 + "\n")


In [28]:
rag_chat()



Clinical RAG Assistant (Terminal Mode)
Evaluation metrics will be computed for every answer.
Type 'exit' to quit.

User: main respiratory problem?


  coherence = float(np.dot(ref_emb, pred_emb.T) /



Assistant: Shortness of breath

Top retrieved sources:
 - /content/mimic_data/mimic-iv-ext-direct-1.0.0/samples_extracted/Finished/Asthma/COPD Asthma/15798127-DS-19.json
 - /content/mimic_data/mimic-iv-ext-direct-1.0.0/samples_extracted/Finished/Heart Failure/17205507-DS-19.json
 - /content/mimic_data/mimic-iv-ext-direct-1.0.0/samples_extracted/Finished/Asthma/Allergic Asthma/11897861-DS-3.json
 - /content/mimic_data/mimic-iv-ext-direct-1.0.0/samples_extracted/Finished/Gastro-oesophageal Reflux Disease/16277357-DS-15.json.json

Evaluation Metrics
--------------------------------------------
Precision: 0.0
Recall:    0.0
BLEU Score:           0.0
ROUGE-L Score:        0.0706
Coherence:            0.5881
--------------------------------------------

------------------------------------------------------------

User: symptoms of fever

Assistant: chills, but no night sweats, chest pain, dysuria, hematuria, diarria, and constipation

Top retrieved sources:
 - /content/mimic_data/mimic-iv-

In [31]:
%%writefile direct_app.py
import streamlit as st
import pickle
import os
import time
import faiss
import torch

# Load RAG objects
@st.cache_resource
def load_rag_system():
    """
    Loads FAISS index, documents list, and generator + embedder from the environment.
    Assumes that:
        - /content/faiss_index.index
        - /content/documents.pkl
        - embedder + generator already loaded in RAM
    """

    import pickle
    from sentence_transformers import SentenceTransformer
    from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

    # Load FAISS
    index = faiss.read_index("/content/faiss_index.index")

    # Load documents
    documents = pickle.load(open("/content/documents.pkl", "rb"))

    # Load embedder
    embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

    # Load generator
    GEN_MODEL = "google/flan-t5-large"
    tokenizer = AutoTokenizer.from_pretrained(GEN_MODEL)
    model = AutoModelForSeq2SeqLM.from_pretrained(GEN_MODEL).to(
        "cuda" if torch.cuda.is_available() else "cpu"
    )
    generator = pipeline("text2text-generation",
                         model=model,
                         tokenizer=tokenizer,
                         device=0 if torch.cuda.is_available() else -1)

    return index, documents, embedder, generator


index, documents, embedder, generator = load_rag_system()

# RAG Helpers
import numpy as np


def retrieve(query, k=5):
    q_emb = embedder.encode([query], convert_to_numpy=True).astype("float32")
    faiss.normalize_L2(q_emb)
    D, I = index.search(q_emb, k)

    results = []
    for score, idx in zip(D[0], I[0]):
        if idx < 0:
            continue
        doc = documents[idx]
        results.append({
            "text": doc["text"],
            "meta": doc["meta"],
            "score": float(score)
        })
    return results


def build_prompt(query, retrieved):
    header = (
        "You are a clinical assistant. Use ONLY the provided context to answer. "
        "If the context doesn't contain the answer, say 'Insufficient information in the provided records.'\n\n"
    )
    context = ""
    for i, d in enumerate(retrieved):
        short = " ".join(d["text"].split()[:350])
        context += f"Document {i+1} (score={d['score']:.3f}) — {d['meta']['source']}:\n{short}\n\n---\n"

    return f"{header}CONTEXT:\n{context}\nQUESTION: {query}\nANSWER:"


def generate_answer(query, k=4):
    retrieved = retrieve(query, k)
    prompt = build_prompt(query, retrieved)

    try:
        out = generator(prompt, max_new_tokens=256, do_sample=False)[0]["generated_text"]
    except:
        out = "Model input overflow. Context too large."

    return {
        "answer": out,
        "retrieved": retrieved,
        "prompt": prompt,
    }

# Streamlit UI
st.set_page_config(page_title="DiReCT Clinical RAG", layout="wide")
st.title("DiReCT — Clinical RAG System")
st.write("Enter clinical queries and inspect the retrieved documents + generated answer.")

query = st.text_input("Clinical Query:", placeholder="e.g., symptoms of pneumonia")
run_button = st.button("Run")

if run_button and query.strip():
    with st.spinner("Retrieving + Generating..."):
        t0 = time.time()
        result = generate_answer(query)
        dt = round(time.time() - t0, 2)

    st.subheader("Final Answer")
    st.success(result["answer"])

    st.write(f"Generated in **{dt} seconds**")

    # Retrieved Docs
    st.subheader("Retrieved Documents")
    for i, d in enumerate(result["retrieved"]):
        with st.expander(f"Document {i+1} — Score {d['score']:.3f} — {d['meta']['source']}"):
            st.write(d["text"])


Overwriting direct_app.py


In [21]:
!pip install -q streamlit pyngrok


In [32]:
from pyngrok import ngrok
import getpass

print("Enter your ngrok token:")
NGROK = getpass.getpass()
ngrok.set_auth_token(NGROK)

get_ipython().system_raw("streamlit run direct_app.py --server.port 6006 &")

public_url = ngrok.connect(6006)
public_url


Enter your ngrok token:
··········


<NgrokTunnel: "https://horacio-prettyish-accustomably.ngrok-free.dev" -> "http://localhost:6006">