<a href="https://colab.research.google.com/github/BklyninLA/Building-of-Roses/blob/main/Copy_of_Kenya_Law_Dashboard.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install streamlit pyngrok fpdf2 pymupdf python-docx spacy --quiet
!python -m spacy download en_core_web_sm
!apt-get install fonts-dejavu -y

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.7/72.7 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m251.7/251.7 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.1/24.1 MB[0m [31m91.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.0/253.0 kB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m76.0 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may 

In [None]:
%%writefile app.py
import streamlit as st
import pandas as pd
import os
import re
import spacy
import fitz
import docx
from io import BytesIO
from fpdf import FPDF
from embeddings_util import semantic_search, build_embeddings

st.set_page_config(page_title="Kenya Law Dashboard", layout="wide")

DATA_PATH = "data/kenya_law_full.csv"
HEATMAP_PATH = "data/coverage_heatmap.png"

if not os.path.exists(DATA_PATH):
    st.error("CSV not found. Please upload kenya_law_full.csv to data/")
    st.stop()

df = pd.read_csv(DATA_PATH)

st.title("🌹 Kenya Law Dashboard")
st.caption("LexisNexis-style search and analysis for Kenyan legal documents.")

# ---------- Display a single result ----------
def display_result(row, highlight_pattern=None):
    meta_info = []
    if pd.notna(row.get("type")):
        meta_info.append(f"**Type:** {row['type']}")
    if pd.notna(row.get("source")):
        meta_info.append(f"**Source:** {row['source']}")
    if pd.notna(row.get("date")):
        meta_info.append(f"**Date:** {row['date']}")
    st.markdown(" | ".join(meta_info))

    view_choice = st.radio("View:", ["Summary", "Full Text"], key=f"view_{row.name}", horizontal=True)
    text_to_display = row.get("full_text") if view_choice == "Full Text" and pd.notna(row.get("full_text")) else row.get("summary", "")

    sentences = re.split(r'(?<=[.!?]) +', text_to_display)
    paragraphs = [" ".join(sentences[i:i+3]) for i in range(0, len(sentences), 3)]
    formatted_text = "\n\n".join(paragraphs)

    citation_pattern = re.compile(r'(\b[A-Z][a-z]+ v\. [A-Z][a-z]+\b|\[\d{4}\]\s+[A-Z]+)', re.IGNORECASE)
    formatted_text = citation_pattern.sub(r'**\1**', formatted_text)

    if highlight_pattern:
        formatted_text = highlight_pattern.sub(r'<mark>\1</mark>', formatted_text)

    st.markdown(formatted_text, unsafe_allow_html=True)

    if st.button(f"📥 Download as PDF", key=f"pdf_{row.name}"):
        pdf = FPDF()
        pdf.add_page()
        pdf.add_font("DejaVu", "", "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", uni=True)
        pdf.set_font("DejaVu", "", 12)
        pdf.multi_cell(0, 10, row.get('title', '(untitled)'))
        pdf.multi_cell(0, 8, f"Type: {row.get('type','')}")
        pdf.multi_cell(0, 8, f"Source: {row.get('source','')}")
        pdf.multi_cell(0, 8, f"Date: {row.get('date','')}")
        pdf.ln(5)
        pdf.multi_cell(0, 6, text_to_display)

        pdf_bytes = pdf.output(dest='S').encode('utf-8')
        safe_title = re.sub(r'[^a-zA-Z0-9_\-]', '_', row.get('title','case'))
        st.download_button("Click to Save PDF", data=pdf_bytes, file_name=f"{safe_title}.pdf", mime="application/pdf")

    if "url" in row and pd.notna(row["url"]):
        st.link_button("Open source", str(row["url"]))
    st.divider()

# ---------- Batch export ----------
def batch_export_pdf(results_df, highlight_pattern=None):
    pdf = FPDF()
    pdf.add_font("DejaVu", "", "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", uni=True)
    pdf.set_font("DejaVu", "", 11)

    for _, row in results_df.iterrows():
        pdf.add_page()
        pdf.set_font("DejaVu", "", 12)
        pdf.multi_cell(0, 10, row.get('title', '(untitled)'))
        pdf.multi_cell(0, 8, f"Type: {row.get('type','')}")
        pdf.multi_cell(0, 8, f"Source: {row.get('source','')}")
        pdf.multi_cell(0, 8, f"Date: {row.get('date','')}")
        pdf.ln(5)

        text_to_display = row.get("full_text", row.get("summary", ""))
        sentences = re.split(r'(?<=[.!?]) +', text_to_display)
        paragraphs = [" ".join(sentences[i:i+3]) for i in range(0, len(sentences), 3)]
        formatted_text = "\n\n".join(paragraphs)

        citation_pattern = re.compile(r'(\b[A-Z][a-z]+ v\. [A-Z][a-z]+\b|\[\d{4}\]\s+[A-Z]+)', re.IGNORECASE)
        formatted_text = citation_pattern.sub(r'\1', formatted_text)

        if highlight_pattern:
            formatted_text = highlight_pattern.sub(r'\1', formatted_text)

        pdf.set_font("DejaVu", "", 11)
        pdf.multi_cell(0, 6, formatted_text)

    return pdf.output(dest='S').encode('utf-8')

# ---------- Tabs ----------
tab1, tab2, tab3, tab4 = st.tabs(["Keyword Search", "Semantic Search (AI)", "Visuals", "Brief Analysis"])

# --- Tab 1: Keyword Search ---
with tab1:
    st.sidebar.header("Filters")
    doc_type = st.sidebar.selectbox("Type", ["All"] + sorted(df["type"].dropna().unique()))
    years = sorted(pd.to_datetime(df["date"], errors="coerce").dt.year.dropna().astype(int).unique())
    year = st.sidebar.selectbox("Year", ["All"] + [int(y) for y in years])
    keyword = st.text_input("Keyword")

    filtered = df.copy()
    if doc_type != "All":
        filtered = filtered[filtered["type"] == doc_type]
    if year != "All":
        filtered_years = pd.to_datetime(filtered["date"], errors="coerce").dt.year
        filtered = filtered[filtered_years == int(year)]
    if keyword:
        mask = (
            filtered["title"].fillna("").str.contains(keyword, case=False) |
            filtered["summary"].fillna("").str.contains(keyword, case=False)
        )
        filtered = filtered[mask]

    st.subheader(f"{len(filtered)} results")
    if not filtered.empty:
        pdf_bytes = batch_export_pdf(filtered)
        st.download_button("📦 Download ALL results as one PDF", data=pdf_bytes, file_name="case_bundle.pdf", mime="application/pdf")
    for _, row in filtered.iterrows():
        st.markdown(f"### {row.get('title','(untitled)')}")
        display_result(row)

# --- Tab 2: Semantic Search ---
with tab2:
    st.write("Search by meaning, not just keywords.")
    query = st.text_input("Enter a legal research query")
    k = st.slider("Results", 5, 50, 10)
    if st.button("Search") and query.strip():
        with st.spinner("Searching..."):
            if not os.path.exists("data/embeddings.npy"):
                build_embeddings(DATA_PATH)
            results = semantic_search(query, k=k, csv_path=DATA_PATH)
        if not results.empty:
            pdf_bytes = batch_export_pdf(results)
            st.download_button("📦 Download ALL results as one PDF", data=pdf_bytes, file_name="case_bundle.pdf", mime="application/pdf")
        for _, row in results.iterrows():
            st.markdown(f"### {row.get('title','(untitled)')}")
            display_result(row)

# --- Tab 3: Visuals ---
with tab3:
    if os.path.exists(HEATMAP_PATH):
        st.image(HEATMAP_PATH, use_column_width=True)
    else:
        st.warning("Heatmap not found.")

# --- Tab 4: Brief Analysis ---
with tab4:
    st.write("📄 Upload a brief or paste text. We'll suggest the most relevant authorities from the database.")

    uploaded_file = st.file_uploader("Upload a .txt, .pdf, or .docx file", type=["txt", "pdf", "docx"])
    pasted_text = st.text_area("Or paste text here", height=200)
    k = st.slider("Number of suggested documents", 5, 50, 10, key="brief_k")

    content = ""
    if uploaded_file:
        ext = uploaded_file.name.lower().split(".")[-1]
        if ext == "txt":
            content = uploaded_file.read().decode("utf-8", errors="ignore")
        elif ext == "pdf":
            pdf_doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
            content = "\n".join([page.get_text() for page in pdf_doc])

Overwriting app.py


In [None]:
from pyngrok import ngrok
import subprocess
import time

# Set your ngrok token
ngrok.set_auth_token("32W0QRSO8dTfnF1USWfETDAud9c_4pVDszjPp3SppJa744DX")

# Kill any previous tunnels
ngrok.kill()

# Start Streamlit in background
print("🚀 Launching Streamlit...")
subprocess.Popen(["streamlit", "run", "app.py"])

# Wait for Streamlit to boot
time.sleep(10)

# Connect ngrok
public_url = ngrok.connect(8501)
print("🔗 Your dashboard is live at:", public_url)

🚀 Launching Streamlit...
🔗 Your dashboard is live at: NgrokTunnel: "https://21a988429218.ngrok-free.app" -> "http://localhost:8501"


In [None]:
%%writefile embeddings_util.py
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

MODEL_NAME = "all-MiniLM-L6-v2"

def build_embeddings(csv_path):
    df = pd.read_csv(csv_path)
    model = SentenceTransformer(MODEL_NAME)
    texts = df["summary"].fillna("").tolist()
    embeddings = model.encode(texts, show_progress_bar=True)
    np.save("data/embeddings.npy", embeddings)

def semantic_search(query, k=10, csv_path="data/kenya_law_full.csv"):
    df = pd.read_csv(csv_path)
    embeddings = np.load("data/embeddings.npy")
    model = SentenceTransformer(MODEL_NAME)
    query_embedding = model.encode([query])
    scores = cosine_similarity(query_embedding, embeddings)[0]
    top_indices = scores.argsort()[::-1][:k]
    return df.iloc[top_indices].copy()

Writing embeddings_util.py


In [None]:
!pip install sentence-transformers



In [None]:
import pandas as pd
import random
from datetime import datetime, timedelta

# Sample data pools
types = ["Judgment", "Ruling", "Advisory", "Opinion", "Directive"]
sources = ["High Court Nairobi", "Supreme Court", "Tax Tribunal", "Environment & Land Court", "Court of Appeal"]
parties = ["Kamau", "Wanjiru", "Ochieng", "Mwangi", "KRA", "County Govt", "Republic", "NEMA", "Safaricom", "IEBC"]

# Generate 50 cases
cases = []
for i in range(50):
    party1 = random.choice(parties)
    party2 = random.choice([p for p in parties if p != party1])
    title = f"{party1} v. {party2}"
    doc_type = random.choice(types)
    source = random.choice(sources)
    date = datetime.today() - timedelta(days=random.randint(30, 1500))
    date_str = date.strftime("%Y-%m-%d")
    summary = f"This is a {doc_type.lower()} issued by the {source} concerning a dispute between {party1} and {party2}."
    full_text = f"The full text of the case titled '{title}' elaborates on the legal arguments, evidence presented, and the final decision rendered by the {source}. It addresses the core issues raised by both parties and provides precedent for future cases involving similar matters."
    url = f"https://kenyalaw.org/case/{i+1}"

    cases.append({
        "title": title,
        "summary": summary,
        "type": doc_type,
        "source": source,
        "date": date_str,
        "url": url,
        "full_text": full_text
    })

# Save to CSV
df = pd.DataFrame(cases)
df.to_csv("data/kenya_law_full.csv", index=False)
print("✅ kenya_law_full.csv created with 50 sample cases.")

✅ kenya_law_full.csv created with 50 sample cases.


In [None]:
!pip install requests beautifulsoup4 pandas tqdm --quiet
!mkdir -p data

In [None]:
!pip install requests beautifulsoup4 pandas tqdm --quiet
!mkdir -p data

import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
from tqdm import tqdm

# Sections to scrape: (name, base_url, type, source)
sections = [
    ("High Court Judgments", "https://kenyalaw.org/caselaw/cases/advanced_search/?court=1&content_type=judgment&page=", "Judgment", "High Court"),
    ("Court of Appeal Rulings", "https://kenyalaw.org/caselaw/cases/advanced_search/?court=2&content_type=ruling&page=", "Ruling", "Court of Appeal"),
    ("Environment & Land Court", "https://kenyalaw.org/caselaw/cases/advanced_search/?court=3&content_type=judgment&page=", "Judgment", "Environment & Land Court"),
    ("Tax Tribunal Decisions", "https://kenyalaw.org/caselaw/cases/advanced_search/?court=4&content_type=judgment&page=", "Judgment", "Tax Tribunal"),
    ("Supreme Court Opinions", "https://kenyalaw.org/caselaw/cases/advanced_search/?court=5&content_type=judgment&page=", "Opinion", "Supreme Court"),
]

cases = []

def scrape_case_list(base_url, doc_type, source, max_cases=50):
    count = 0
    page = 1
    seen = set()
    with tqdm(total=max_cases, desc=f"{source} ({doc_type})", unit="case") as pbar:
        while count < max_cases:
            url = f"{base_url}{page}"
            resp = requests.get(url, timeout=10)
            if resp.status_code != 200:
                break
            soup = BeautifulSoup(resp.text, "html.parser")
            links = soup.select("a[href*='/caselaw/cases/view/']")
            if not links:
                break
            for a in links:
                href = a.get("href")
                if href and href not in seen and "/caselaw/cases/view/" in href:
                    seen.add(href)
                    # ✅ FIX: handle both absolute and relative URLs
                    if href.startswith("http"):
                        full_url = href
                    else:
                        full_url = "https://kenyalaw.org" + href
                    try:
                        case_resp = requests.get(full_url, timeout=10)
                        case_soup = BeautifulSoup(case_resp.text, "html.parser")
                        title = case_soup.find("h1").get_text(strip=True) if case_soup.find("h1") else "Untitled"
                        date = ""
                        date_tag = case_soup.find(string=lambda t: "Date" in t)
                        if date_tag:
                            date = date_tag.strip().split(":")[-1].strip()
                        summary_tag = case_soup.find("div", {"id": "case_metadata"})
                        summary = summary_tag.get_text(" ", strip=True) if summary_tag else ""
                        full_text_tag = case_soup.find("div", {"id": "case_content"})
                        full_text = full_text_tag.get_text("\n", strip=True) if full_text_tag else ""
                        cases.append({
                            "title": title,
                            "summary": summary if summary else full_text[:300] + "...",
                            "type": doc_type,
                            "source": source,
                            "date": date,
                            "url": full_url,
                            "full_text": full_text
                        })
                        count += 1
                        pbar.update(1)
                        print(f"  ✔ {title}")
                        if count >= max_cases:
                            break
                        time.sleep(1)  # polite delay
                    except Exception as e:
                        print(f"Error scraping {full_url}: {e}")
            page += 1

# Run scraper for each section
for name, base_url, doc_type, source in sections:
    print(f"\n🔍 Scraping {name}...")
    scrape_case_list(base_url, doc_type, source, max_cases=50)  # adjust max_cases if needed

# Save to CSV
df = pd.DataFrame(cases)
df.to_csv("data/kenya_law_full.csv", index=False)
print(f"\n✅ Saved {len(df)} cases to data/kenya_law_full.csv")


🔍 Scraping High Court Judgments...


High Court (Judgment):   2%|▏         | 1/50 [00:03<02:31,  3.09s/case]

  ✔ Untitled


High Court (Judgment):   4%|▍         | 2/50 [00:06<02:25,  3.04s/case]

  ✔ Untitled


High Court (Judgment):   6%|▌         | 3/50 [00:09<02:31,  3.23s/case]

  ✔ Untitled


High Court (Judgment):   8%|▊         | 4/50 [00:12<02:28,  3.22s/case]

  ✔ Untitled


High Court (Judgment):  10%|█         | 5/50 [00:16<02:26,  3.26s/case]

  ✔ Untitled


High Court (Judgment):  12%|█▏        | 6/50 [00:18<02:14,  3.05s/case]

  ✔ Untitled


High Court (Judgment):  14%|█▍        | 7/50 [00:22<02:14,  3.13s/case]

  ✔ Untitled


High Court (Judgment):  16%|█▌        | 8/50 [00:24<02:05,  2.99s/case]

  ✔ Untitled


High Court (Judgment):  18%|█▊        | 9/50 [00:27<02:01,  2.97s/case]

  ✔ Untitled


High Court (Judgment):  20%|██        | 10/50 [00:30<02:03,  3.08s/case]

  ✔ Untitled


High Court (Judgment):  20%|██        | 10/50 [13:41<54:45, 82.15s/case]


KeyboardInterrupt: 

In [None]:
from app import scrape_all_sections
total = scrape_all_sections(max_per_section=10, save_path="data/kenya_law_full.csv")
print(f"Scraped {total} cases.")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

2025-09-17 14:35:53.408 Session state does not function when running a script without `streamlit run`


Scraped 50 cases.


In [None]:
!pip install streamlit requests beautifulsoup4 pandas numpy sentence-transformers scikit-learn fpdf pymupdf python-docx pyngrok --quiet
!mkdir -p data

app_code = r'''
# Kenya Law Dashboard — Justice-Centric Build
import os, time, pandas as pd, streamlit as st, requests
from bs4 import BeautifulSoup

DATA_PATH = "data/kenya_law_full.csv"

st.set_page_config(page_title="Kenya Law Dashboard", layout="wide")

def scrape_case_list(base_url, doc_type, source, max_cases=10):
    cases, count, page, seen = [], 0, 1, set()
    progress = st.progress(0, text=f"Scraping {source} ({doc_type})…")
    while count < max_cases:
        url = f"{base_url}{page}"
        try:
            resp = requests.get(url, timeout=20)
        except Exception as e:
            st.write(f"List error: {url} → {e}")
            break
        if resp.status_code != 200: break
        soup = BeautifulSoup(resp.text, "html.parser")
        links = soup.select("a[href*='/caselaw/cases/view/']")
        if not links: break
        for a in links:
            href = a.get("href")
            if not href or "/caselaw/cases/view/" not in href or href in seen: continue
            seen.add(href)
            full_url = href if href.startswith("http") else "https://kenyalaw.org" + href
            for attempt in range(3):
                try:
                    case_resp = requests.get(full_url, timeout=60)
                    if case_resp.status_code == 200: break
                except requests.exceptions.ReadTimeout:
                    time.sleep(2)
            else:
                st.write(f"⚠️ Skipping {full_url} after 3 timeouts")
                continue
            try:
                case_soup = BeautifulSoup(case_resp.text, "html.parser")
                title = case_soup.find("h1").get_text(strip=True) if case_soup.find("h1") else "Untitled"
                date = ""
                date_label = case_soup.find(string=lambda t: isinstance(t, str) and "Date" in t)
                if date_label:
                    try: date = date_label.strip().split(":")[-1].strip()
                    except: pass
                summary_tag = case_soup.find("div", {"id": "case_metadata"})
                summary = summary_tag.get_text(" ", strip=True) if summary_tag else ""
                full_text_tag = case_soup.find("div", {"id": "case_content"})
                full_text = full_text_tag.get_text("\n", strip=True) if full_text_tag else ""
                if not summary: summary = (full_text[:300] + "...") if full_text else "Summary not available."
                cases.append({
                    "title": title, "summary": summary, "type": doc_type, "source": source,
                    "date": date, "url": full_url, "full_text": full_text
                })
                count += 1
                progress.progress(count / max_cases, text=f"{source} ({doc_type}): {count}/{max_cases}")
                st.write(f"✔ {title}")
                if count >= max_cases: break
                time.sleep(1)
            except Exception as e:
                st.write(f"❌ Error parsing {full_url}: {e}")
        page += 1
    progress.empty()
    return cases

def scrape_all_sections(max_per_section=10, save_path=DATA_PATH):
    sections = [
        ("High Court Judgments", "https://kenyalaw.org/caselaw/cases/advanced_search/?court=1&content_type=judgment&page=", "Judgment", "High Court 🏛️"),
        ("Court of Appeal Rulings", "https://kenyalaw.org/caselaw/cases/advanced_search/?court=2&content_type=ruling&page=", "Ruling", "Court of Appeal ⚖️"),
        ("Supreme Court Opinions", "https://kenyalaw.org/caselaw/cases/advanced_search/?court=5&content_type=judgment&page=", "Opinion", "Supreme Court 👑"),
    ]
    all_cases = []
    for name, base_url, doc_type, source in sections:
        st.write(f"🔍 Scraping {name}…")
        all_cases.extend(scrape_case_list(base_url, doc_type, source, max_cases=max_per_section))
    if not all_cases: return 0
    df = pd.DataFrame(all_cases)
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    df.to_csv(save_path, index=False)
    return len(df)

def load_csv(path=DATA_PATH):
    if not os.path.exists(path) or os.path.getsize(path) == 0:
        return pd.DataFrame(columns=["title", "summary", "type", "source", "date", "url", "full_text"])
    try:
        df = pd.read_csv(path)
    except pd.errors.EmptyDataError:
        return pd.DataFrame(columns=["title", "summary", "type", "source", "date", "url", "full_text"])
    for col in ["title", "summary", "type", "source", "date", "url", "full_text"]:
        if col not in df.columns: df[col] = ""
    return df

st.markdown("<h1 style='color:#8B0000;'>🌹 Kenya Law Dashboard</h1>", unsafe_allow_html=True)
st.caption("Search, analyze, and refresh real Kenya Law cases with one click.")

with st.sidebar:
    st.subheader("⚙️ Controls")
    if st.button("🔄 Refresh Dataset"):
        with st.spinner("Scraping latest cases…"):
            total = scrape_all_sections(max_per_section=10)
        if total > 0:
            st.success(f"✅ Scraped {total} cases.")
        else:
            st.error("❌ No cases scraped. Try again later.")

df = load_csv(DATA_PATH)
if df.empty:
    st.warning("Dataset is empty — scraping now…")
    total = scrape_all_sections(max_per_section=10)
    if total > 0:
        st.success(f"✅ Scraped {total} cases.")
        df = load_csv(DATA_PATH)
    else:
        st.error("❌ Could not scrape any cases.")

st.write(f"### 📚 Total Cases: {len(df)}")

for _, row in df.iterrows():
    st.markdown(f"<h4 style='color:#8B0000;'>{row['title']}</h4>", unsafe_allow_html=True)
    st.markdown(f"**Court:** {row['source']}  |  **Type:** {row['type']}  |  **Date:** {row['date']}")
    with st.expander("📖 Summary"):
        st.markdown(row['summary'])
    if row['url']:
        st.link_button("🔗 View Full Case", row['url'])
    st.divider()
'''

with open("app.py", "w", encoding="utf-8") as f:
    f.write(app_code)

# 3. Launch Streamlit + ngrok
from pyngrok import ngrok
import subprocess, time

ngrok.set_auth_token("32W0QRSO8dTfnF1USWfETDAud9c_4pVDszjPp3SppJa744DX")
ngrok.kill()

print("🚀 Launching Streamlit...")
subprocess.Popen(["streamlit", "run", "app.py", "--server.port", "8501", "--server.address", "0.0.0.0"])
time.sleep(10)

public_url = ngrok.connect(8501)
print("🔗 Your dashboard is live at:", public_url)

In [None]:
# 1. Clone the corrected repo
!git clone https://github.com/mendsalbert/legal-semantic-search-starter-code.git
%cd legal-semantic-search-starter-code

# 2. Install dependencies
!pip install -r requirements.txt --quiet

# 3. Launch Streamlit with ngrok
from pyngrok import ngrok
import subprocess, time

# Set your ngrok authtoken
ngrok.set_auth_token("32W0QRSO8dTfnF1USWfETDAud9c_4pVDszjPp3SppJa744DX")

# Kill any previous tunnels
ngrok.kill()

# Start Streamlit in background
print("🚀 Launching Streamlit...")
subprocess.Popen(["streamlit", "run", "app.py", "--server.port", "8501", "--server.address", "0.0.0.0"])

# Wait for Streamlit to boot
time.sleep(10)

# Connect ngrok
public_url = ngrok.connect(8501)
print("🔗 Your dashboard is live at:", public_url)

Cloning into 'legal-semantic-search-starter-code'...
fatal: could not read Username for 'https://github.com': No such device or address
[Errno 2] No such file or directory: 'legal-semantic-search-starter-code'
/content/legal-sementic-search-starter-code/legal-sementic-search-starter-code/legal-sementic-search-starter-code/legal-sementic-search-starter-code
[31mERROR: Could not open requirements file: [Errno 2] No such file or directory: 'requirements.txt'[0m[31m
[0m🚀 Launching Streamlit...
🔗 Your dashboard is live at: NgrokTunnel: "https://83c7cd0401be.ngrok-free.app" -> "http://localhost:8501"
