<a href="https://colab.research.google.com/github/JBlizzard-sketch/LoanAI3/blob/main/LoanAI3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# CELL 1: bootstrap project structure, configs, and core utilities.
# Run this first. It creates folders, requirements, .replit, and utility modules used by the app.

import os, textwrap, json, hashlib, sqlite3, random, string, time
from pathlib import Path

ROOT = Path(".")
DIRS = [
    "utils", "pages", "models", "data", "assets"
]
for d in DIRS:
    Path(d).mkdir(parents=True, exist_ok=True)

# ---------------- .replit + config ----------------
replit_cfg = textwrap.dedent("""
run = "streamlit run app.py --server.port 3000 --server.address 0.0.0.0"
""").strip()

(Path(".replit")).write_text(replit_cfg)

# Optional: Replit Nix (gives gcc for lightgbm/xgboost wheels if needed)
replit_nix = textwrap.dedent("""
{ pkgs }: {
  deps = [
    pkgs.python311Full
    pkgs.gcc
  ];
}
""").strip()
(Path("replit.nix")).write_text(replit_nix)

# ---------------- requirements ----------------
requirements = textwrap.dedent("""
streamlit==1.37.0
pandas>=2.0.0
numpy>=1.24.0
scikit-learn>=1.3.0
xgboost>=2.0.0
lightgbm>=4.0.0
plotly>=5.22.0
matplotlib>=3.8.0
reportlab>=4.0.0
joblib>=1.3.0
""").strip()
(Path("requirements.txt")).write_text(requirements)

# ---------------- README ----------------
readme = textwrap.dedent("""
# LoanIQ (Streamlit) — Credit Scoring & Loan Risk Platform

- Client & Admin roles (SQLite auth).
- Synthetic Kenyan microfinance dataset generator (admin-adjustable).
- Six model families (LogReg, RandomForest, GradientBoosting, XGBoost, LightGBM, Hybrid).
- Model versioning & deployment (best AUC auto-deploy).
- Client dashboards: upload/generate data, predictions, credit scores, eligibility, repayment risk.
- PDF credit reports.
- Admin sandbox: schema edit, backup/restore, impersonation, fraud stress tests.

## Replit
App auto-starts via `.replit`.
""").strip()
(Path("README.md")).write_text(readme)

# ---------------- utils/db.py ----------------
db_py = textwrap.dedent(r'''
import sqlite3, os, time, json
from pathlib import Path

DB_PATH = os.environ.get("LOANIQ_DB", "data/loaniq.sqlite")
Path("data").mkdir(exist_ok=True)

SCHEMA = {
    "users": """
        CREATE TABLE IF NOT EXISTS users (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            username TEXT UNIQUE,
            password_hash TEXT,
            role TEXT CHECK(role in ('client','admin')) NOT NULL DEFAULT 'client',
            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        );
    """,
    "datasets": """
        CREATE TABLE IF NOT EXISTS datasets (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            owner TEXT,
            name TEXT,
            meta TEXT,
            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        );
    """,
    "models": """
        CREATE TABLE IF NOT EXISTS models (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            family TEXT,
            version INTEGER,
            metrics TEXT,
            path TEXT,
            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            deployed INTEGER DEFAULT 0
        );
    """,
    "audit": """
        CREATE TABLE IF NOT EXISTS audit (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            username TEXT,
            action TEXT,
            detail TEXT,
            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        );
    """
}

def get_conn():
    conn = sqlite3.connect(DB_PATH, check_same_thread=False)
    return conn

def init():
    conn = get_conn()
    cur = conn.cursor()
    for ddl in SCHEMA.values():
        cur.execute(ddl)
    conn.commit()
    conn.close()

def record_audit(username, action, detail=""):
    conn = get_conn(); cur = conn.cursor()
    cur.execute("INSERT INTO audit(username, action, detail) VALUES(?,?,?)", (username, action, detail))
    conn.commit(); conn.close()

def upsert_user(username, password_hash, role="client"):
    conn = get_conn(); cur = conn.cursor()
    cur.execute("INSERT OR IGNORE INTO users(username,password_hash,role) VALUES(?,?,?)",
                (username, password_hash, role))
    conn.commit(); conn.close()

def get_user(username):
    conn = get_conn(); cur = conn.cursor()
    cur.execute("SELECT id, username, password_hash, role FROM users WHERE username=?", (username,))
    row = cur.fetchone()
    conn.close()
    return row

def list_models():
    conn = get_conn(); cur = conn.cursor()
    cur.execute("SELECT family, version, metrics, path, deployed, created_at FROM models ORDER BY family, version DESC")
    rows = cur.fetchall(); conn.close()
    return rows

def insert_model(family, version, metrics_dict, path, deployed=0):
    conn = get_conn(); cur = conn.cursor()
    cur.execute("INSERT INTO models(family, version, metrics, path, deployed) VALUES(?,?,?,?,?)",
                (family, version, json.dumps(metrics_dict), path, deployed))
    conn.commit(); conn.close()

def mark_deployed(family, version):
    conn = get_conn(); cur = conn.cursor()
    cur.execute("UPDATE models SET deployed=0 WHERE family=?", (family,))
    cur.execute("UPDATE models SET deployed=1 WHERE family=? AND version=?", (family, version))
    conn.commit(); conn.close()
''').strip()
(Path("utils/db.py")).write_text(db_py)

# ---------------- utils/auth.py ----------------
auth_py = textwrap.dedent(r'''
import hashlib
from utils import db

def hash_pw(p:str)->str:
    return hashlib.sha256(("pepper::" + p).encode()).hexdigest()

def ensure_admin():
    db.init()
    admin_user = db.get_user("admin")
    if not admin_user:
        db.upsert_user("admin", hash_pw("Shady868..."), role="admin")

def register(username:str, password:str):
    if not username or not password:
        return False, "Username and password required"
    if db.get_user(username):
        return False, "User already exists"
    db.upsert_user(username, hash_pw(password), role="client")
    db.record_audit(username, "register", "new client")
    return True, "Registered"

def login(username:str, password:str):
    row = db.get_user(username)
    if not row:
        return False, "User not found", None
    uid, uname, pwhash, role = row
    if pwhash == hash_pw(password):
        db.record_audit(username, "login", role)
        return True, "OK", {"id": uid, "username": uname, "role": role}
    return False, "Invalid credentials", None
''').strip()
(Path("utils/auth.py")).write_text(auth_py)

# ---------------- utils/synth.py ----------------
synth_py = textwrap.dedent(r'''
import numpy as np, pandas as pd, random, string
from datetime import datetime, timedelta

KENYAN_SURNAMES = [
    "Ochieng","Odhiambo","Otieno","Wanjiru","Wambui","Chebet","Mutiso","Mwangi","Koech","Kipchoge",
    "Njoroge","Kamau","Achieng","Cherono","Kiplagat","Kimutai","Korir","Chepngeno","Makena","Barasa",
    "Nyambura","Njoki","Kiptoo","Kipruto","Cheruiyot","Wairimu","Muthoni","Atieno","Wafula","Were",
]
ENGLISH_FIRST = [
    "Mary","John","Grace","Peter","Elizabeth","Kevin","Faith","James","Ann","Joseph","Irene","Paul",
    "Daniel","Sarah","David","Mercy","Esther","Samuel","Cynthia","Michael","Alice","Brian","Ivy","George"
]

OCCUPATIONS = [
    "mama mboga","shop owner","boda boda","farmer","salonist","tailor","carpenter","teacher","mechanic","hawker"
]
PRODUCTS = ["Micro-Working-Cap", "School-Fees", "Emergency", "Asset-Finance", "Group-Loan"]

def _branches():
    # 70+ pseudo branches across counties
    counties = ["Nairobi","Mombasa","Kisumu","Nakuru","Eldoret","Kakamega","Meru","Nyeri","Thika","Machakos",
                "Naivasha","Kericho","Embu","Kitale","Malindi","Kisii","Garissa","Wajir","Narok","Isiolo",
                "Nanyuki","Voi","Kilifi","Oyugis","Homa Bay","Siaya","Busia","Bungoma","Migori","Keroka",
                "Litein","Bomet","Kapsabet","Lodwar","Marsabit","Maua","Chuka","Mtwapa","Ukunda","Tala",
                "Kajiado","Kimilili","Kanduyi","Kabarnet","Marigat","Muhoroni","Awendo","Bondo","Keroka2","Kajiado2",
                "Kangemi","Kawangware","Gikomba","Kayole","Dandora","Kibera","Ruiru","Juja","Kikuyu","Limuru",
                "Karatina","Othaya","Nyahururu","Gilgil","Eldama Ravine","Sotik","Olkalou","Thika2","Mlolongo","Syokimau",
                "Ruaka","Kitengela","Rongai","Athi River","Thika Road-Mall","Two Rivers"]
    return counties

def _skewed_amount(n, low=5000, high=100000, skew=3.0):
    # more small loans: draw from log-normal-ish
    vals = np.random.lognormal(mean=np.log(20000), sigma=skew, size=n)
    vals = np.clip(vals, low, high)
    return vals.astype(int)

def make_name():
    return f"{random.choice(ENGLISH_FIRST)} {random.choice(KENYAN_SURNAMES)}"

def generate(n=2000, female_bias=0.62, small_business_bias=0.6, seed=None,
             fraud_rate=0.02):
    if seed is not None:
        np.random.seed(seed); random.seed(seed)
    branches = _branches()
    rows = []
    base_date = datetime.today() - timedelta(days=365)
    amounts = _skewed_amount(n)
    for i in range(n):
        gender = np.random.choice(["F","M"], p=[female_bias, 1-female_bias])
        occ = np.random.choice(OCCUPATIONS, p=[0.18,0.17,0.15,0.12,0.1,0.08,0.07,0.05,0.04,0.04])
        dependents = np.random.choice([0,1,2,3,4,5], p=[0.1,0.2,0.28,0.24,0.12,0.06])
        income = int(np.random.gamma(4, 8000) + (0 if gender=="F" else 2000))
        loan_amount = int(amounts[i])
        branch = random.choice(branches)
        product = random.choice(PRODUCTS)
        age = int(np.clip(np.random.normal(34, 8), 18, 65))
        # repayment label (1 good / 0 default)
        base_prob = 0.72 \
            + 0.000002*(max(0, 150000 - loan_amount)) \
            + 0.05*(gender=="F") \
            + 0.04*(occ in ["mama mboga","shop owner","teacher","farmer"]) \
            - 0.06*(dependents>=4)
        base_prob = max(0.05, min(0.95, base_prob))
        repay_good = np.random.rand() < base_prob
        status = np.random.choice(["active","rejected","closed"], p=[0.45,0.12,0.43])

        # fraud injection
        is_fraud = np.random.rand() < fraud_rate
        if is_fraud:
            income = max(1000, income - np.random.randint(10000,30000))
            loan_amount = min(100000, loan_amount + np.random.randint(15000,40000))
            repay_good = False

        rows.append({
            "name": make_name(),
            "gender": gender,
            "age": age,
            "dependents": dependents,
            "occupation": occ,
            "income": income,
            "branch": branch,
            "product": product,
            "loan_amount": loan_amount,
            "loan_status": status,
            "repay_good": int(repay_good),
            "is_fraud": int(is_fraud),
            "application_date": (base_date + timedelta(days=np.random.randint(0,365))).date().isoformat()
        })
    df = pd.DataFrame(rows)
    return df
''').strip()
(Path("utils/synth.py")).write_text(synth_py)

# ---------------- utils/ml.py ----------------
ml_py = textwrap.dedent(r'''
import os, json, joblib
import numpy as np, pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import roc_auc_score, accuracy_score, recall_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

try:
    import xgboost as xgb
except Exception:
    xgb = None

try:
    import lightgbm as lgb
except Exception:
    lgb = None

from utils import db
os.makedirs("models", exist_ok=True)

def _prep(df: pd.DataFrame):
    y = df["repay_good"].values
    X = df.drop(columns=["repay_good"])
    # tolerate missing demographic features by filling
    for col in ["gender","age","dependents","occupation","income","branch","product","loan_amount","loan_status","is_fraud","application_date","name"]:
        if col not in X.columns:
            # Create safe defaults
            if col in ("age","dependents","income","loan_amount","is_fraud"):
                X[col] = 0
            else:
                X[col] = "unknown"
    # drop texty/non-predictive columns
    X = X.drop(columns=["name","application_date"], errors="ignore")
    num_cols = X.select_dtypes(include=[np.number]).columns.tolist()
    cat_cols = [c for c in X.columns if c not in num_cols]
    pre = ColumnTransformer([
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols)
    ], remainder="passthrough")
    return X, y, pre

def _fit_family(family:str, pre, X_train, y_train):
    if family=="LogReg":
        clf = LogisticRegression(max_iter=1000)
    elif family=="RF":
        clf = RandomForestClassifier(n_estimators=250, max_depth=None, n_jobs=-1)
    elif family=="GB":
        clf = GradientBoostingClassifier()
    elif family=="XGBoost":
        if xgb is None:
            return None, "xgboost not installed"
        clf = xgb.XGBClassifier(n_estimators=350, max_depth=5, learning_rate=0.08, subsample=0.8, colsample_bytree=0.8, eval_metric="logloss")
    elif family=="LightGBM":
        if lgb is None:
            return None, "lightgbm not installed"
        clf = lgb.LGBMClassifier(n_estimators=350, max_depth=-1, num_leaves=31, learning_rate=0.08)
    elif family=="Hybrid":
        # Simple hybrid: average predicted probabilities of RF + GB (stack-lite)
        rf = RandomForestClassifier(n_estimators=220, n_jobs=-1)
        gb = GradientBoostingClassifier()
        from sklearn.base import BaseEstimator, ClassifierMixin
        class Hybrid(BaseEstimator, ClassifierMixin):
            def __init__(self, rf, gb):
                self.rf = rf; self.gb = gb
            def fit(self, X, y):
                self.rf.fit(X, y); self.gb.fit(X, y); return self
            def predict_proba(self, X):
                p1 = self.rf.predict_proba(X)[:,1]
                p2 = self.gb.predict_proba(X)[:,1]
                p = (p1+p2)/2
                return np.vstack([1-p, p]).T
            def predict(self, X):
                return (self.predict_proba(X)[:,1] >= 0.5).astype(int)
        clf = Hybrid(rf, gb)
    else:
        raise ValueError("Unknown family")
    pipe = Pipeline([("pre", pre), ("clf", clf)])
    pipe.fit(X_train, y_train)
    return pipe, None

def train_and_version(df: pd.DataFrame, families=None, test_size=0.2, seed=42):
    if families is None:
        families = ["LogReg","RF","GB","XGBoost","LightGBM","Hybrid"]
    X, y, pre = _prep(df)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=seed, stratify=y)

    results = []
    for fam in families:
        model, err = _fit_family(fam, pre, X_train, y_train)
        if model is None:
            metrics = {"status":"skipped","reason": err}
            version = 1 + sum(1 for r in db.list_models() if r[0]==fam)
            path = f"models/{fam}_v{version}.joblib"
            db.insert_model(fam, version, metrics, path, deployed=0)
            results.append((fam, version, metrics, path))
            continue

        proba = model.predict_proba(X_test)[:,1]
        auc = float(roc_auc_score(y_test, proba))
        acc = float(accuracy_score(y_test, (proba>=0.5).astype(int)))
        rec = float(recall_score(y_test, (proba>=0.5).astype(int)))
        metrics = {"AUC": round(auc,4), "accuracy": round(acc,4), "recall": round(rec,4)}
        version = 1 + sum(1 for r in db.list_models() if r[0]==fam)
        path = f"models/{fam}_v{version}.joblib"
        joblib.dump(model, path)
        db.insert_model(fam, version, metrics, path, deployed=0)
        results.append((fam, version, metrics, path))
    # Auto-deploy best by AUC among trained ones
    trained = [r for r in results if "AUC" in r[2]]
    if trained:
        best = max(trained, key=lambda r: r[2]["AUC"])
        fam, ver, _, _ = best
        db.mark_deployed(fam, ver)
    return results

def load_deployed():
    rows = db.list_models()
    for fam, ver, metrics, path, deployed, created in rows:
        if deployed==1 and os.path.exists(path):
            return fam, ver, json.loads(metrics), path
    return None
''').strip()
(Path("utils/ml.py")).write_text(ml_py)

# ---------------- utils/report.py ----------------
report_py = textwrap.dedent(r'''
from io import BytesIO
from reportlab.lib.pagesizes import A4
from reportlab.pdfgen import canvas

def build_credit_report(user, summary: dict) -> bytes:
    buf = BytesIO()
    c = canvas.Canvas(buf, pagesize=A4)
    w, h = A4
    y = h - 40
    c.setFont("Helvetica-Bold", 16)
    c.drawString(40, y, "LoanIQ Credit Report")
    y -= 24
    c.setFont("Helvetica", 11)
    c.drawString(40, y, f"User: {user['username']}  |  Role: {user['role']}")
    y -= 18
    c.drawString(40, y, f"Summary Date: {summary.get('date','')}")
    y -= 24
    for k, v in summary.items():
        if k=="date": continue
        txt = f"{k}: {v}"
        c.drawString(40, y, txt[:100])
        y -= 16
        if y < 60:
            c.showPage(); y = h - 40
    c.showPage()
    c.save()
    pdf = buf.getvalue()
    buf.close()
    return pdf
''').strip()
(Path("utils/report.py")).write_text(report_py)

# ---------------- utils/ui.py ----------------
ui_py = textwrap.dedent(r'''
import streamlit as st
from typing import Dict, Any

def app_header():
    st.markdown("""
    <style>
      .block-container{padding-top:1.5rem;}
      .stButton>button { border-radius:16px; padding:0.6rem 1rem; font-weight:600; }
      .stDownloadButton>button { border-radius:16px; }
    </style>
    """, unsafe_allow_html=True)
    st.title("📊 LoanIQ — Credit & Risk Platform")

def require_auth():
    if "user" not in st.session_state or st.session_state.user is None:
        st.warning("Please login to continue.")
        st.stop()

def pill(text, color="#eef6ff"):
    st.markdown(f"<span style='background:{color};padding:4px 10px;border-radius:999px'>{text}</span>", unsafe_allow_html=True)

def info_row(items):
    cols = st.columns(len(items))
    for (label, value), c in zip(items, cols):
        with c: st.metric(label, value)

def role_tag(role):
    col = "#e8f5e9" if role=="client" else "#fff3e0"
    pill(f"role: {role}", col)
''').strip()
(Path("utils/ui.py")).write_text(ui_py)

# seed the DB and ensure admin exists
from utils import db as _db
from utils import auth as _auth
_db.init()
_auth.ensure_admin()

print("✅ Cell 1 done. Files created, DB initialized, admin user seeded (admin / Shady868...).")

In [None]:
# CELL 2: create app.py (core Streamlit app: auth, client dashboards, uploads, reports)

from pathlib import Path
import textwrap

app_py = textwrap.dedent(r'''
import streamlit as st
import pandas as pd
import numpy as np
from datetime import datetime
from io import BytesIO

from utils import db, auth, synth, ml, report, ui

st.set_page_config(page_title="LoanIQ", page_icon="📊", layout="wide")
ui.app_header()
auth.ensure_admin()
db.init()

if "user" not in st.session_state:
    st.session_state.user = None
if "client_df" not in st.session_state:
    st.session_state.client_df = None
if "predictions" not in st.session_state:
    st.session_state.predictions = None

def login_panel():
    tab_login, tab_register = st.tabs(["Login", "Register"])
    with tab_login:
        u = st.text_input("Username")
        p = st.text_input("Password", type="password")
        if st.button("Login"):
            ok, msg, user = auth.login(u, p)
            if ok:
                st.session_state.user = user
                st.success("Welcome, " + user["username"])
                st.experimental_rerun()
            else:
                st.error(msg)
    with tab_register:
        u = st.text_input("New Username")
        p = st.text_input("New Password", type="password")
        if st.button("Create Account"):
            ok, msg = auth.register(u, p)
            if ok:
                ok2, msg2, user = auth.login(u, p)
                st.session_state.user = user
                st.success("Registered & logged in as " + u)
                st.experimental_rerun()
            else:
                st.error(msg)

def predict_and_score(df: pd.DataFrame):
    # load deployed model; if none, quick-train on provided df
    deployed = ml.load_deployed()
    if not deployed:
        with st.spinner("No deployed model found. Training quick baseline..."):
            ml.train_and_version(df, families=["LogReg","RF","GB"])
        deployed = ml.load_deployed()
    fam, ver, metrics, path = deployed
    import joblib
    model = joblib.load(path)
    X = df.copy()
    if "repay_good" not in X.columns:
        X["repay_good"] = 1  # dummy for transformer shape; will be ignored
    X = X.drop(columns=["repay_good"], errors="ignore")
    proba = model.predict_proba(X)[:,1]
    eligibility = (proba >= 0.55).astype(int)
    # simple credit score scaled 300-900
    score = (proba*600 + 300).astype(int)
    out = df.copy()
    out["default_risk"] = 1 - proba
    out["credit_score"] = score
    out["eligible"] = eligibility
    return out, {"model": f"{fam} v{ver}", **metrics}

def client_dashboard():
    ui.role_tag(st.session_state.user["role"])
    st.subheader("Client Dashboard")
    st.caption("Upload your data or generate a sample, then get risk predictions, credit scores, and loan eligibility.")

    with st.expander("📤 Upload Data (.csv/.xlsx) or Generate Synthetic", expanded=True):
        c1, c2 = st.columns([2,1])
        with c1:
            up = st.file_uploader("Upload CSV or Excel", type=["csv","xlsx"])
            if up is not None:
                df = pd.read_csv(up) if up.name.endswith(".csv") else pd.read_excel(up)
                st.session_state.client_df = df
                st.success(f"Loaded {df.shape[0]} rows.")
        with c2:
            n = st.slider("Rows", 200, 5000, 1000, 100)
            female_bias = st.slider("Women share", 0.3, 0.9, 0.62, 0.01)
            fraud = st.slider("Fraud rate", 0.0, 0.1, 0.02, 0.01)
            if st.button("Generate Sample"):
                df = synth.generate(n=n, female_bias=female_bias, fraud_rate=fraud, seed=42)
                st.session_state.client_df = df
                st.success(f"Generated {df.shape[0]} synthetic records.")

    if st.session_state.client_df is None:
        st.info("No data yet. Upload or generate to continue.")
        return

    df = st.session_state.client_df
    st.dataframe(df.head(50), use_container_width=True)

    if st.button("🧮 Predict Risk & Score"):
        with st.spinner("Scoring..."):
            preds, model_meta = predict_and_score(df)
            st.session_state.predictions = preds
            st.success(f"Scored with {model_meta['model']} | AUC {model_meta.get('AUC','-')}")

    if st.session_state.predictions is not None:
        preds = st.session_state.predictions
        st.subheader("Insights")
        c1, c2, c3, c4 = st.columns(4)
        with c1: st.metric("Avg Credit Score", int(preds["credit_score"].mean()))
        with c2: st.metric("Eligible %", f"{100*preds['eligible'].mean():.1f}%")
        with c3: st.metric("Avg Loan Amount", f"KES {int(preds['loan_amount'].mean()):,}")
        with c4: st.metric("Fraud Flags", int(preds["is_fraud"].sum()) if "is_fraud" in preds else 0)

        import plotly.express as px
        fig1 = px.histogram(preds, x="credit_score", nbins=30, title="Credit Score Distribution")
        st.plotly_chart(fig1, use_container_width=True)
        if "loan_amount" in preds.columns:
            fig2 = px.scatter(preds, x="loan_amount", y="credit_score", color="eligible", title="Loan vs Score")
            st.plotly_chart(fig2, use_container_width=True)

        st.download_button(
            "⬇️ Download Scored CSV",
            data=preds.to_csv(index=False).encode("utf-8"),
            file_name="loaniq_scored.csv",
            mime="text/csv"
        )

        # PDF report
        if st.button("📄 Build Credit Report (PDF)"):
            summary = {
                "date": datetime.today().date().isoformat(),
                "records": preds.shape[0],
                "avg_score": int(preds["credit_score"].mean()),
                "eligible_pct": f"{100*preds['eligible'].mean():.1f}%",
            }
            pdf_bytes = report.build_credit_report(st.session_state.user, summary)
            st.download_button("Download Credit Report", data=pdf_bytes, file_name="credit_report.pdf", mime="application/pdf")

def admin_badge():
    st.markdown("<div style='background:#fff8e1;border:1px solid #ffe0b2;padding:6px 10px;border-radius:10px;display:inline-block'>🔐 Admin Sandbox</div>", unsafe_allow_html=True)

def admin_only():
    return st.session_state.user and st.session_state.user["role"]=="admin"

def navbar():
    if st.session_state.user is None:
        return ["Login/Register"]
    items = ["Client Dashboard","Reports"]
    if admin_only():
        items += ["Admin Sandbox"]
    items += ["Logout"]
    return items

choice = st.sidebar.radio("Navigation", navbar(), index=0)

if choice=="Login/Register":
    if st.session_state.user:
        st.success(f"Already logged in as {st.session_state.user['username']}")
    else:
        login_panel()

elif choice=="Client Dashboard":
    if not st.session_state.user: st.experimental_rerun()
    client_dashboard()

elif choice=="Reports":
    if not st.session_state.user: st.experimental_rerun()
    st.subheader("My Data & Reports")
    st.write("Download previously generated artifacts will appear here in future versions.")
    st.info("Tip: Use the Client Dashboard to generate a fresh PDF/CSV now.")

elif choice=="Admin Sandbox":
    if not admin_only():
        st.error("Access denied")
    else:
        admin_badge()
        st.subheader("Data Generation & Model Engine")
        with st.expander("Generate/Preview Synthetic Dataset", expanded=True):
            col = st.columns([1,1,1,1])
            n = st.slider("Rows", 1000, 15000, 4000, 500)
            female_bias = st.slider("Women share", 0.3, 0.9, 0.64, 0.01)
            fraud = st.slider("Fraud rate", 0.0, 0.25, 0.03, 0.01)
            if st.button("Generate Dataset"):
                df = synth.generate(n=n, female_bias=female_bias, fraud_rate=fraud, seed=7)
                st.session_state.admin_df = df
                st.success(f"Generated {df.shape[0]} rows.")
                st.dataframe(df.head(100), use_container_width=True)
                st.download_button("⬇️ Download CSV", df.to_csv(index=False).encode("utf-8"), "admin_synth.csv","text/csv")

        with st.expander("Train/Version Models", expanded=True):
            if st.button("Train 6 Families & Version"):
                if "admin_df" not in st.session_state or st.session_state.admin_df is None:
                    st.warning("Generate dataset first; using quick default 3k rows.")
                    import pandas as pd
                    from utils import synth
                    st.session_state.admin_df = synth.generate(n=3000, seed=11)
                res = ml.train_and_version(st.session_state.admin_df)
                st.success("Training complete.")
                st.json([{ "family": r[0], "version": r[1], "metrics": r[2], "path": r[3] } for r in res])
            st.write("Deployed model:")
            dep = ml.load_deployed()
            if dep:
                st.code(f"{dep[0]} v{dep[1]} | metrics={dep[2]}")
            else:
                st.info("None deployed yet.")

        with st.expander("Utilities", expanded=False):
            c1,c2,c3,c4 = st.columns(4)
            with c1:
                if st.button("Backup DB"):
                    import shutil, time
                    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
                    shutil.copy("data/loaniq.sqlite", f"data/backup_{ts}.sqlite")
                    st.success("DB backed up.")
            with c2:
                if st.button("List Models"):
                    st.json([{"family":r[0], "version": r[1], "metrics": r[2], "path": r[3], "deployed": r[4]} for r in db.list_models()])
            with c3:
                user_to_imp = st.text_input("Impersonate username")
                if st.button("Impersonate"):
                    urow = db.get_user(user_to_imp)
                    if urow:
                        st.session_state.user = {"id":urow[0],"username":urow[1],"role":urow[3]}
                        db.record_audit("admin","impersonate",user_to_imp)
                        st.success("Now impersonating: "+user_to_imp)
                        st.experimental_rerun()
                    else:
                        st.error("User not found")
            with c4:
                if st.button("Fraud Stress Test (simulate adversarial batch)"):
                    from utils import synth
                    df = synth.generate(n=3000, seed=99, fraud_rate=0.2)
                    st.dataframe(df[df["is_fraud"]==1].head(50))
                    st.info("Use Train tab to see how models perform under higher fraud rates.")

elif choice=="Logout":
    st.session_state.user = None
    st.experimental_rerun()
''').strip()

Path("app.py").write_text(app_py)
print("✅ Cell 2 done. Created app.py with auth, client flows, reports, and admin sandbox shell.")

In [None]:
# CELL 3: Create optional Streamlit page files (kept minimal — main app handles most flows)
# These pages are stubs; primary functionality is inside app.py to keep state simple.

from pathlib import Path
import textwrap

page_client = textwrap.dedent("""
import streamlit as st
st.write("This page is intentionally minimal. Use the main navigation in the sidebar.")
""").strip()

page_admin = textwrap.dedent("""
import streamlit as st
st.write("Admin features are centralized in the main app for consistent auth.")
""").strip()

Path("pages/1_Client_Dashboard.py").write_text(page_client)
Path("pages/2_Admin_Sandbox.py").write_text(page_admin)

# Seed a small demo CSV
demo_csv = """name,gender,age,dependents,occupation,income,branch,product,loan_amount,loan_status,repay_good,is_fraud,application_date
Mary Ochieng,F,29,2,mama mboga,22000,Nairobi,Micro-Working-Cap,15000,active,1,0,2024-11-02
John Mwangi,M,38,3,shop owner,38000,Nakuru,Asset-Finance,55000,closed,1,0,2025-01-18
Grace Wanjiru,F,33,1,boda boda,26000,Kisumu,Emergency,20000,rejected,0,0,2025-02-05
"""
Path("data/demo_client.csv").write_text(demo_csv)

print("✅ Cell 3 done. Pages stubs + demo CSV created.")

In [None]:
# CELL 4: Smoke test helper — validates imports, DB, minimal training, and prints run tips.

import importlib, json, os
from utils import db, auth, synth, ml

db.init(); auth.ensure_admin()

print("-> Generating small sample…")
df = synth.generate(n=1200, seed=1)
print(df.head(3).to_string(index=False))

print("-> Training quick baseline families (LogReg, RF, GB)…")
res = ml.train_and_version(df, families=["LogReg","RF","GB"])
print(json.dumps([{ "family": r[0], "version": r[1], "metrics": r[2], "path": r[3] } for r in res], indent=2))

dep = ml.load_deployed()
print("-> Deployed:", dep)

print("\n✅ Smoke test OK.\n")
print("Run the app on Replit sidebar (it auto-uses `.replit`) or locally:\n")
print("  streamlit run app.py --server.port 3000 --server.address 0.0.0.0\n")
print("Login creds:\n  admin / Shady868...\n  (register any new client to auto-login)\n")