<a href="https://colab.research.google.com/github/JBlizzard-sketch/LoanIQ/blob/main/Copy_of_LoanGrok.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:

%%bash
set -euo pipefail
echo "🚀 Starting setup at $(date)"

# ---------- Folder structure ----------
mkdir -p /content/LoanIQ/modules/synth \
         /content/LoanIQ/modules/ml \
         /content/LoanIQ/modules/app \
         /content/LoanIQ/modules/auth \
         /content/LoanIQ/modules/pipeline \
         /content/LoanIQ/modules/schema \
         /content/LoanIQ/data/uploads \
         /content/LoanIQ/data/synthetic \
         /content/LoanIQ/exports/reports \
         /content/LoanIQ/exports/logs

echo "✅ Folders created"

🚀 Starting setup at Wed Sep  3 02:01:32 PM UTC 2025
✅ Folders created


In [11]:
# ========================== CELL 1 — ENVIRONMENT & SETUP ==========================

# ---------- Install dependencies ----------
import subprocess
import sys

def install(package):
    try:
        __import__(package)
    except ImportError:
        print(f"Installing {package} ...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", "--quiet", package])

# Packages (skip if installed)
packages = [
    "streamlit", "pandas", "numpy", "scikit-learn", "xgboost", "imbalanced-learn",
    "shap", "faker", "pyngrok", "matplotlib", "seaborn", "plotly", "reportlab", "python-dotenv"
]

for pkg in packages:
    install(pkg)

print("✅ Dependencies installed / confirmed.")

# ---------- Folder structure ----------
from pathlib import Path

ROOT = Path("/content/LoanIQ").resolve()
MODULES = ROOT / "modules"
DATA = ROOT / "data"
EXPORTS = ROOT / "exports"

FOLDERS = [
    MODULES / "synth",
    MODULES / "ml",
    MODULES / "app",
    MODULES / "auth",
    MODULES / "pipeline",
    MODULES / "schema",
    DATA / "uploads",
    DATA / "synthetic",
    EXPORTS / "reports",
    EXPORTS / "logs"
]

for folder in FOLDERS:
    folder.mkdir(parents=True, exist_ok=True)

# Create empty __init__.py for modules
import os
for subfolder in MODULES.glob("**/"):
    init_file = subfolder / "__init__.py"
    if not init_file.exists():
        init_file.touch()

print("✅ Folder structureready, __init__.py created.")

# ---------- Colab-safe import paths ----------
import sys
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

print("✅ sys.path updated.")

# ---------- Hardcoded secrets ----------
ADMIN_USERNAME = "Admin"
ADMIN_PASSWORD = "Shady868"
NGROK_AUTHTOKEN = "31rYvgklL0EdX9bGLvTXc313efE_2GyDFGPUNAyFgB83bikTF"

print(f"✅ Hardcoded admin credentials and ngrok token set.")

# ---------- Smoke test imports ----------
try:
    import modules.app.client_panel as client_panel
    import modules.app.app as app_module
    import modules.ml.engine as ml_engine
    import modules.pipeline.pipeline as pipeline
    import modules.schema.schema as schema
    import modules.auth.auth as auth
    import modules.synth.generator as generator
    print("✅ Smoke test: All modules imported successfully.")
except Exception as e:
    print("❌ Import failed:", e)

print("\n✅ CELL 1 complete — Environment setup ready for Colab.")

Installing scikit-learn ...
Installing imbalanced-learn ...
Installing python-dotenv ...
✅ Dependencies installed / confirmed.
✅ Folder structureready, __init__.py created.
✅ sys.path updated.
✅ Hardcoded admin credentials and ngrok token set.
❌ Import failed: cannot import name 'authenticate' from 'modules.auth' (/content/LoanIQ/modules/auth/__init__.py)

✅ CELL 1 complete — Environment setup ready for Colab.


In [12]:
from pathlib import Path
import textwrap, os

ROOT = Path("/content/LoanIQ").resolve()
MODULES = ROOT / "modules"
SYNTH_DIR = MODULES / "synth"
for d in [MODULES, SYNTH_DIR]:
    d.mkdir(parents=True, exist_ok=True)

def write_module(path: Path, content: str):
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(content, encoding='utf-8')
    print(f"Wrote: {path}")

# auth.py
auth_py = textwrap.dedent("""\
import os, sqlite3, hashlib
from pathlib import Path

ROOT = Path(__file__).resolve().parents[1]
DB_DIR = ROOT / "data"
DB_DIR.mkdir(parents=True, exist_ok=True)
DB_PATH = DB_DIR / "users.db"

def _conn():
    return sqlite3.connect(str(DB_PATH), check_same_thread=False)

def _hash(pw: str) -> str:
    return hashlib.sha256(pw.encode('utf-8')).hexdigest()

def init_db():
    conn = _conn()
    cur = conn.cursor()
    cur.execute("CREATE TABLE IF NOT EXISTS users (id INTEGER PRIMARY KEY AUTOINCREMENT, username TEXT UNIQUE, password_hash TEXT, role TEXT DEFAULT 'user')")
    conn.commit()
    conn.close()

def add_user(username: str, password: str, role: str = 'user', overwrite: bool = False):
    conn = _conn(); cur = conn.cursor()
    ph = _hash(password)
    if overwrite:
        cur.execute('DELETE FROM users WHERE username=?', (username,))
    try:
        cur.execute('INSERT INTO users (username, password_hash, role) VALUES (?,?,?)', (username, ph, role))
        conn.commit()
        return True
    except Exception as e:
        if 'UNIQUE constraint' in str(e) and overwrite:
            cur.execute('UPDATE users SET password_hash=?, role=? WHERE username=?', (ph,role,username))
            conn.commit()
            return True
        return False
    finally:
        conn.close()

def authenticate(username: str, password: str):
    conn = _conn(); cur = conn.cursor()
    cur.execute('SELECT password_hash, role FROM users WHERE username=?', (username,))
    row = cur.fetchone()
    conn.close()
    if not row:
        return False, 'not_found'
    ph, role = row
    if ph == _hash(password):
        return True, role
    return False, 'bad_password'

def list_users():
    conn = _conn(); cur = conn.cursor()
    cur.execute('SELECT id, username, role FROM users')
    rows = cur.fetchall(); conn.close()
    return rows

if __name__ == '__main__':
    init_db()
    print('Auth DB ready at', DB_PATH)
""")
write_module(MODULES / "auth.py", auth_py)

# synth/generator.py with enhanced faker
synth_py = textwrap.dedent("""\
from faker import Faker
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

fake = Faker(); Faker.seed(42); random.seed(42); np.random.seed(42)

MALE_NAMES = ['Joseph', 'John', 'David', 'James', 'William', 'Peter', 'Brian', 'Jackson', 'Kamau', 'Mwangi', 'Onyango', 'Kipchoge', 'Juma', 'Baraka', 'Henry', 'Aiden', 'Kyalo', 'Muthui', 'Matu', 'Badru', 'Azizi']
FEMALE_NAMES = ['Sarah', 'Naomi', 'Irene', 'Mary', 'Anne', 'Elizabeth', 'Mercy', 'Faith', 'Caro', 'Lilian', 'Njeri', 'Wanjiku', 'Atieno', 'Jepkosgei', 'Zawadi', 'Amani', 'Ayana', 'Mumbi', 'Makena', 'Kioni', 'Kainda']
LAST_NAMES = ['Njuguna', 'Onyango', 'Aduda', 'Mwayi', 'Nthambi', 'Kyeli', 'Egadwa', 'Simiyu', 'Mwangi', 'Kamau', 'Otieno', 'Cheruiyot', 'Juma', 'Baraka']

BRANCHES = ['Mpeketoni', 'Ugunja', 'Nakuru', 'Molo', 'Changamwe', 'Kanyangi', 'Mbale', "Moi's Bridge", 'Ruiru', 'Thika', 'Kikuyu', 'Ngong', 'Mavoko', 'Westlands', 'Dagoretti', 'Kilimani', 'Kawangware', 'Machakos', 'Kitui', 'Meru', 'Embu', 'Kangundo', 'Maua', 'Garissa', 'Mandera', 'Isiolo', 'Marsabit', 'Mombasa', 'Malindi', 'Lamu', 'Watamu', 'Diani', 'Kilifi', 'Nyali', 'Bamburi', 'Mtwapa', 'Nyeri', 'Kiambu', 'Kerugoya', 'Nyahururu', "Murang'a", 'Karuri', 'Ol Kalou', 'Nanyuki', 'Rumuruti', 'Eldoret', 'Kisumu', 'Kakamega', 'Kitale']

PRODUCTS = ['INUKA 4 WEEKS', 'KUZA 4 WEEKS', 'KUZA 5 WEEKS', 'INUKA 5 WEEKS', 'FADHILI WEEKS']

STATUSES = ['Active', 'Pending Branch Approval', 'Rejected']

def infer_age_from_id(id_num):
    prefix = int(str(id_num)[:2]) if len(str(id_num)) >= 2 else random.randint(1, 35)
    if 1 <= prefix <= 10: return random.randint(60, 80)
    elif 11 <= prefix <= 20: return random.randint(50, 60)
    elif 21 <= prefix <= 25: return random.randint(40, 50)
    elif 26 <= prefix <= 31: return random.randint(30, 40)
    elif 32 <= prefix <= 34: return random.randint(27, 30)
    else: return random.randint(18, 26)

def _skewed_loan_amount(min_amt=4000, max_amt=15000, skew=1.8):
    r = random.random() ** skew
    return int(min_amt + (max_amt - min_amt) * r)

def generate_national_id():
    return random.randint(1000000, 39999999)

def generate_phone():
    return "+2547" + str(random.randint(1000000, 9999999))

def generate_sample(n=1000, branches=None, fraud_pct=0.02, default_rate=0.08, multi_loan_frac=0.12, seed=None):
    if seed is not None:
        random.seed(seed); np.random.seed(seed)
    branches = branches or BRANCHES
    rows = []
    client_counter = 0
    for i in range(n):
        is_female = random.choices([True, False], weights=[70, 30])[0]
        first = random.choice(FEMALE_NAMES if is_female else MALE_NAMES)
        last = random.choice(LAST_NAMES)
        name = f"{first} {last}"
        nid = generate_national_id()
        age = infer_age_from_id(nid)
        gender = 'female' if is_female else 'male'
        phone = generate_phone()
        branch = random.choice(branches)
        product = random.choice(PRODUCTS)
        income = random.randint(4000, 8000)
        occupation = random.choices(['Small Business', 'Salaried'], weights=[80, 20])[0]
        amount = _skewed_loan_amount()
        created = datetime.utcnow() - timedelta(days=random.randint(0, 30))
        ref = created.strftime('%y%m%d') + str(random.randint(10000, 99999))
        loan_type = 'Normal'
        is_fraud = random.random() < fraud_pct
        prob_default = default_rate + (0.02 if amount > 40000 else -0.005) + (0.01 if income < 5000 else -0.005)
        status = random.choices(STATUSES, weights=[50, 40, 10])[0]
        health = random.choices(['Performing', 'Non-Performing'], weights=[90, 10])[0]
        collateral = 'None'
        if is_fraud:
            if random.random() < 0.3 and rows:
                r = random.choice(rows)
                nid = r['national_id']
                phone = r['phone']
            else:
                amount = amount * random.randint(2,4)
                income = int(income * random.uniform(0.2, 0.6))
            status = random.choices(STATUSES, weights=[60, 20, 20])[0]
        rows.append({
            'record_id': f"R{i:08d}",
            'client_id': client_counter,
            'name': name,
            'national_id': nid,
            'phone': phone,
            'branch': branch,
            'product': product,
            'income': income,
            'loan_amount': amount,
            'loan_status': status,
            'loan_health': health,
            'created_date': created.strftime('%Y-%m-%d'),
            'gender': gender,
            'age': age,
            'occupation': occupation,
            'collateral': collateral,
            'ref_number': ref,
            'loan_type': loan_type,
            'simulated_fraud': int(is_fraud)
        })
        if random.random() < multi_loan_frac:
            extra_amount = int(amount * random.uniform(0.3, 1.2))
            created2 = created - timedelta(days=random.randint(30, 90))
            rows.append({
                'record_id': f"R{i:08d}_2",
                'client_id': client_counter,
                'name': name,
                'national_id': nid,
                'phone': phone,
                'branch': branch,
                'product': random.choice(PRODUCTS),
                'income': income,
                'loan_amount': extra_amount,
                'loan_status': 'performing',
                'loan_health': health,
                'created_date': created2.strftime('%Y-%m-%d'),
                'gender': gender,
                'age': age,
                'occupation': occupation,
                'collateral': collateral,
                'ref_number': created2.strftime('%y%m%d') + str(random.randint(10000, 99999)),
                'loan_type': loan_type,
                'simulated_fraud': 0
            })
        client_counter += 1
    df = pd.DataFrame(rows)
    df['loan_amount'] = pd.to_numeric(df['loan_amount'], errors='coerce').fillna(0).astype(int)
    df['income'] = pd.to_numeric(df['income'], errors='coerce').fillna(0).astype(int)
    df['simulated_fraud'] = df['simulated_fraud'].fillna(0).astype(int)
    return df

if __name__ == '__main__':
    print(generate_sample(10).head().to_dict(orient='records'))
""")
write_module(SYNTH_DIR / "generator.py", synth_py)

# schema.py
schema_py = textwrap.dedent("""\
import pandas as pd
import numpy as np
from typing import List

EXPECTED_COLUMNS = ['record_id','client_id','name','national_id','phone','branch','product', 'income','loan_amount','loan_status','created_date','gender', 'age', 'occupation', 'collateral', 'ref_number', 'loan_type', 'loan_health']

def normalize_columns(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns]
    return df

def validate_columns(df: pd.DataFrame, expected: List[str]=None) -> List[str]:
    expected = expected or EXPECTED_COLUMNS
    cols = [c.strip().lower() for c in df.columns]
    missing = [c for c in expected if c not in cols]
    return missing

def simple_preprocess(df: pd.DataFrame, target_col: str = 'loan_status'):
    df = normalize_columns(df)
    if 'loan_amount' in df.columns:
        df['loan_amount'] = pd.to_numeric(df['loan_amount'].astype(str).str.replace('[^0-9.-]','',regex=True), errors='coerce').fillna(0)
    if 'income' in df.columns:
        df['income'] = pd.to_numeric(df['income'].astype(str).str.replace('[^0-9.-]','',regex=True), errors='coerce').fillna(0)
    if target_col in df.columns:
        df[target_col] = df[target_col].astype(str).str.lower().map(lambda x: 1 if 'default' in x else 0)
    return df
""")
write_module(MODULES / "schema.py", schema_py)

# pipeline.py
pipeline_py = textwrap.dedent("""\
import pandas as pd
import numpy as np

def add_features(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df['loan_to_income'] = df.apply(lambda r: (r['loan_amount'] / (r['income'] + 1)) if r.get('income',0) is not None else 0, axis=1)
    def bucket_amt(x):
        if x <= 5000: return 'micro'
        if x <= 20000: return 'small'
        if x <= 50000: return 'medium'
        return 'large'
    df['loan_size_bucket'] = df['loan_amount'].apply(bucket_amt)
    try:
        df['created_date'] = pd.to_datetime(df['created_date'], errors='coerce')
        df['days_since_loan'] = (pd.Timestamp.utcnow() - df['created_date']).dt.days.fillna(9999).astype(int)
    except Exception:
        df['days_since_loan'] = 9999
    df['dup_nid'] = df.duplicated(subset=['national_id'], keep=False).astype(int)
    df['extreme_lti'] = ((df['loan_to_income'] > 10) | (df['loan_amount'] > 200000)).astype(int)
    df['fraud_score'] = df['simulated_fraud'].fillna(0) * 2 + df['dup_nid'] * 1 + df['extreme_lti'] * 2
    df['risk_score'] = (df['fraud_score'] + (df['loan_to_income'] / (df['loan_to_income'].max() + 1))).fillna(0)
    maxv = df['risk_score'].replace([np.inf, -np.inf], 0).max()
    if pd.notna(maxv) and maxv > 0:
        df['risk_score'] = df['risk_score'] / maxv
    else:
        df['risk_score'] = 0.0
    return df

def top_risky_branches(df: pd.DataFrame, top_n=5):
    gv = df.groupby('branch').agg(avg_risk = ('risk_score', 'mean'), total_loans = ('record_id', 'count'), fraud_count = ('simulated_fraud', 'sum')).reset_index().sort_values('avg_risk', ascending=False)
    return gv.head(top_n)
""")
write_module(MODULES / "pipeline.py", pipeline_py)

# __init__.py
for p in [MODULES, SYNTH_DIR]: (p / "__init__.py").write_text("# init\n")

# Import checks
import importlib, sys
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))
failed = []
for mod in ("modules.auth", "modules.synth.generator", "modules.schema", "modules.pipeline"):
    try:
        m = importlib.import_module(mod)
        importlib.reload(m)
        print("✅", mod)
    except Exception as e:
        print("❌", mod, "->", e)
        failed.append(mod)
if failed:
    print("\nSome imports failed.")
else:
    print("\n✅ Core modules ready.")

Wrote: /content/LoanIQ/modules/auth.py
Wrote: /content/LoanIQ/modules/synth/generator.py
Wrote: /content/LoanIQ/modules/schema.py
Wrote: /content/LoanIQ/modules/pipeline.py
✅ modules.auth
✅ modules.synth.generator
✅ modules.schema
✅ modules.pipeline

✅ Core modules ready.


In [13]:
from pathlib import Path
import textwrap, os

ROOT = Path("/content/LoanIQ").resolve()
ML_DIR = ROOT / "modules" / "ml"
MODELS_DIR = ROOT / "models"
ML_DIR.mkdir(parents=True, exist_ok=True)
MODELS_DIR.mkdir(parents=True, exist_ok=True)

ml_engine_py = textwrap.dedent("""\
import os, joblib, time, json, hashlib, datetime
from pathlib import Path
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, f1_score, classification_report
from xgboost import XGBClassifier
from modules import schema, pipeline

BASE = Path(__file__).resolve().parents[1]
MODELS_DIR = BASE / "models"

def _hash_cfg(cfg: dict) -> str:
    return hashlib.sha1(json.dumps(cfg, sort_keys=True).encode()).hexdigest()[:8]

def prepare_dataset(df: pd.DataFrame, target_col="loan_status"):
    df = schema.simple_preprocess(df, target_col=target_col)
    df = pipeline.add_features(df)
    drop_cols = ['record_id','client_id','name','national_id','phone',target_col]
    feature_cols = [c for c in df.columns if c not in drop_cols]
    X = df[feature_cols].copy()
    y = df[target_col]
    for col in X.select_dtypes(include=['object']).columns:
        X[col] = X[col].astype('category').cat.codes
    for col in X.select_dtypes(include=['datetime64[ns]']).columns:
        X[col] = X[col].view('int64') // 10**9
    return X, y, feature_cols

def train_model(df: pd.DataFrame, target_col="loan_status", model_params=None, version_note="baseline"):
    X, y, feature_cols = prepare_dataset(df, target_col)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)
    params = {"n_estimators": 120, "max_depth": 5, "learning_rate": 0.1, "subsample": 0.9, "colsample_bytree": 0.9, "eval_metric": "logloss", "use_label_encoder": False, "tree_method": "hist"}
    if model_params:
        params.update(model_params)
    model = XGBClassifier(**params)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    proba = model.predict_proba(X_test)[:,1]
    auc = roc_auc_score(y_test, proba)
    f1 = f1_score(y_test, preds)
    report = classification_report(y_test, preds, output_dict=True)
    version_id = f"v{int(time.time())}_{_hash_cfg(params)}"
    artifact_dir = MODELS_DIR / version_id
    artifact_dir.mkdir(parents=True, exist_ok=True)
    joblib.dump({"model": model, "features": feature_cols, "params": params}, artifact_dir / "model.joblib")
    (artifact_dir / "metrics.json").write_text(json.dumps({"auc": float(auc), "f1": float(f1), "report": report, "version_note": version_note, "timestamp": str(datetime.datetime.utcnow())}, indent=2))
    return {"version_id": version_id, "auc": auc, "f1": f1, "artifact": str(artifact_dir)}

def load_model(version_id: str):
    artifact_dir = MODELS_DIR / version_id
    obj = joblib.load(artifact_dir / "model.joblib")
    return obj

def list_models():
    return sorted([d.name for d in MODELS_DIR.iterdir() if d.is_dir()])

def predict(df: pd.DataFrame, version_id: str):
    obj = load_model(version_id)
    model, features = obj['model'], obj['features']
    df_proc = pipeline.add_features(schema.simple_preprocess(df))
    X = df_proc.reindex(columns=features, fill_value=0).copy()
    for col in X.select_dtypes(include=['object']).columns:
        X[col] = X[col].astype('category').cat.codes
    for col in X.select_dtypes(include=['datetime64[ns]']).columns:
        X[col] = X[col].view('int64') // 10**9
    proba = model.predict_proba(X)[:,1]
    preds = (proba > 0.5).astype(int)
    return preds, proba

def fraud_trigger(df: pd.DataFrame, risk_threshold=0.7):
    df2 = pipeline.add_features(schema.simple_preprocess(df))
    suspicious = df2[df2['risk_score'] > risk_threshold]
    return suspicious
""")
write_module(ML_DIR / "engine.py", ml_engine_py)

# __init__.py
(ML_DIR / "__init__.py").write_text("# init\n")

# Smoke test
import importlib, sys
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))
try:
    import modules.ml.engine as engine
    importlib.reload(engine)
    from modules.synth import generator
    df = generator.generate_sample(n=300, fraud_pct=0.05, default_rate=0.1, seed=123)
    res = engine.train_model(df, version_note="smoke_test")
    print("Model trained:", res)
    print("Available models:", engine.list_models())
except Exception as e:
    print("ML engine smoke failed:", e)
print("\n✅ ML engine ready.")

Wrote: /content/LoanIQ/modules/ml/engine.py
ML engine smoke failed: module 'modules.schema' has no attribute 'simple_preprocess'

✅ ML engine ready.


In [14]:
from pathlib import Path
import textwrap, os

ROOT = Path("/content/LoanIQ").resolve()
ADMIN_TOOLS = ROOT / "modules" / "admin_tools.py"

admin_tools_py = textwrap.dedent("""\
import os, time, json, traceback
from pathlib import Path
from typing import List
from modules.synth.generator import generate_sample
from modules.ml import engine
from modules import schema, pipeline
from modules.auth import authenticate, init_db, add_user

ROOT = Path(__file__).resolve().parents[1]
MODELS_DIR = ROOT / "models"
SCHEMA_BACKUPS = ROOT / "schema_backups"
SCHEMA_BACKUPS.mkdir(parents=True, exist_ok=True)

def generate_synthetic(records: int = 1000, branches: List[str] = None, fraud_pct: float = 0.02, default_rate: float = 0.08, seed: int = None) -> dict:
    df = generate_sample(n=records, branches=branches, fraud_pct=fraud_pct, default_rate=default_rate, seed=seed)
    try:
        df_proc = pipeline.add_features(schema.simple_preprocess(df))
        summary = {'records': len(df_proc), 'branches': sorted(df_proc['branch'].unique().tolist()), 'simulated_fraud': int(df_proc['simulated_fraud'].sum()), 'avg_loan': float(df_proc['loan_amount'].mean()), 'median_income': float(df_proc['income'].median())}
    except Exception:
        summary = {'records': len(df), 'error': 'feature extraction failed'}
    return {'df': df, 'summary': summary}

def retrain_model(df, target_col='loan_status', model_params=None, version_note='admin_retrain'):
    try:
        res = engine.train_model(df, target_col=target_col, model_params=model_params, version_note=version_note)
        return {'ok': True, 'result': res}
    except Exception as e:
        return {'ok': False, 'error': str(e), 'trace': traceback.format_exc()}

def fraud_stress_test(base_records=500, branches=None, fraud_steps=[0.01,0.05,0.1,0.2], default_rate=0.08, detection_threshold=0.7):
    results = []
    for p in fraud_steps:
        df = generate_sample(n=base_records, branches=branches, fraud_pct=p, default_rate=default_rate, seed=int(time.time())%9999)
        injected = int(df['simulated_fraud'].sum())
        suspicious = engine.fraud_trigger(df, risk_threshold=detection_threshold)
        detected = len(suspicious)
        detection_rate = (detected / injected) if injected > 0 else None
        results.append({'fraud_pct': p, 'injected': int(injected), 'detected': int(detected), 'detection_rate': detection_rate})
    return results

def inject_schema(new_expected_columns: List[str], tag: str = None):
    tag = tag or time.strftime('%Y%m%d_%H%M%S')
    path = SCHEMA_BACKUPS / f'schema_{tag}.json'
    metadata = {'expected_columns': new_expected_columns, 'tag': tag, 'timestamp': time.strftime('%Y-%m-%dT%H:%M:%SZ')}
    path.write_text(json.dumps(metadata, indent=2))
    return {'ok': True, 'backup': str(path), 'metadata': metadata}

def impersonate_user(username='demo_user'):
    token = {'impersonated_user': username, 'issued_at': time.time(), 'expires_in': 3600, 'token': f"impersonate_{username}_{int(time.time())}"}
    return token

def ensure_admin():
    init_db()
    add_user('Admin', 'Shady868', role='admin', overwrite=True)
    return True

if __name__ == '__main__':
    ensure_admin()
    print('Generating 200 sample records...')
    out = generate_synthetic(records=200, fraud_pct=0.05)
    print('Summary:', out['summary'])
    r = retrain_model(out['df'], version_note='cli_demo_retrain')
    print('Retrain ->', r)
    s = fraud_stress_test(base_records=300, fraud_steps=[0.02,0.05,0.1], detection_threshold=0.6)
    print('Stress test ->', s)
""")
write_module(ADMIN_TOOLS, admin_tools_py)

# Smoke test
import importlib, sys
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))
try:
    import modules.admin_tools as admin_tools
    importlib.reload(admin_tools)
    admin_tools.ensure_admin()
    demo = admin_tools.generate_synthetic(records=200, fraud_pct=0.04, default_rate=0.12)
    print("Synthetic summary:", demo['summary'])
    retrain_res = admin_tools.retrain_model(demo['df'], version_note="admin_cell4_demo")
    print("Retrain result:", retrain_res)
    stress = admin_tools.fraud_stress_test(base_records=300, fraud_steps=[0.02, 0.05, 0.1], detection_threshold=0.6)
    print("Stress test:", stress)
except Exception as e:
    print("Admin tools smoke failed:", e)
print("\n✅ Admin tools ready.")

Wrote: /content/LoanIQ/modules/admin_tools.py
Admin tools smoke failed: cannot import name 'authenticate' from 'modules.auth' (/content/LoanIQ/modules/auth/__init__.py)

✅ Admin tools ready.


In [15]:
from pathlib import Path
import textwrap

ROOT = Path("/content/LoanIQ").resolve()
APP_DIR = ROOT / "modules" / "app"
CLIENT_PANEL = APP_DIR / "client_panel.py"
APP_PY = APP_DIR / "app.py"
APP_DIR.mkdir(parents=True, exist_ok=True)

client_panel_py = textwrap.dedent("""\
import streamlit as st
import pandas as pd
import numpy as np
from io import BytesIO
import plotly.express as px
import shap
from sklearn.ensemble import IsolationForest
from modules.synth import generator
from modules import schema, pipeline
from modules.ml import engine

def app():
    st.title("📊 LoanIQ — Client Dashboard")
    st.markdown("Upload your loan book or generate a synthetic dataset to explore insights.")

    option = st.radio("Choose dataset source:", ["Upload CSV/Excel", "Simulate dataset"], horizontal=True)

    df = None
    if option == "Upload CSV/Excel":
        file = st.file_uploader("Upload loan dataset (CSV/XLSX)", type=["csv","xlsx"])
        if file is not None:
            try:
                if file.name.endswith('.xlsx'):
                    df = pd.read_excel(file)
                else:
                    df = pd.read_csv(file)
                st.success(f"Uploaded {len(df)} records.")
            except Exception as e:
                st.error(f"Failed to read file: {e}")
    else:
        st.subheader("Simulate dataset")
        n = st.slider("Number of records", 100, 10000, 500, step=100)
        fraud_pct = st.slider("Fraud percentage", 0.0, 0.5, 0.05, step=0.01)
        default_rate = st.slider("Default rate", 0.0, 0.5, 0.08, step=0.01)
        branches = st.multiselect("Branches", generator.BRANCHES, default=generator.BRANCHES[:3])
        if st.button("Generate dataset"):
            df = generator.generate_sample(n=n, branches=branches, fraud_pct=fraud_pct, default_rate=default_rate, seed=42)
            st.success(f"Generated {len(df)} synthetic records.")

    if df is None or df.empty:
        st.info("Please upload or generate a dataset to continue.")
        return

    df_proc = pipeline.add_features(schema.simple_preprocess(df))
    st.subheader("Portfolio Overview")
    c1, c2, c3, c4 = st.columns(4)
    c1.metric("Total Loans", f"{len(df_proc)}")
    c2.metric("Fraud Flags", int(df_proc['simulated_fraud'].sum()))
    c3.metric("Avg Risk Score", f"{df_proc['risk_score'].mean():.2f}")
    c4.metric("Total Loan Amount (KES)", f"{df_proc['loan_amount'].sum():,.0f}")

    # Per-Client Lookup
    st.subheader("🔍 Per-Client Lookup")
    with st.form("lookup_form"):
        client_id = st.text_input("National ID or Phone")
        if st.form_submit_button("Search Client"):
            client_df = df_proc[(df_proc['national_id'] == client_id) | (df_proc['phone'] == client_id)]
            if client_df.empty:
                st.warning("No client found.")
            else:
                st.dataframe(client_df[['record_id', 'loan_amount', 'loan_status', 'risk_score']])
                st.metric("Total Loans for Client", len(client_df))
                st.metric("Avg Risk Score", client_df['risk_score'].mean())
                st.plotly_chart(px.timeline(client_df, x_start='created_date', x_end='created_date', y='loan_amount', title="Loan History Timeline"))

    # Risk Analysis
    st.subheader("⚠️ Risk Analysis")
    risky = pipeline.top_risky_branches(df_proc, top_n=10)
    st.dataframe(risky)
    st.plotly_chart(px.bar(risky, x='branch', y='avg_risk', title="Top Risky Branches"))

    # Filters
    branch_filter = st.selectbox("Filter by Branch", ['All'] + df_proc['branch'].unique().tolist())
    risk_filter = st.slider("Min Risk Score", 0.0, 1.0, 0.0)
    filtered_df = df_proc[df_proc['risk_score'] >= risk_filter]
    if branch_filter != 'All':
        filtered_df = filtered_df[filtered_df['branch'] == branch_filter]
    st.dataframe(filtered_df.head(50))

    # Rich Visuals
    st.plotly_chart(px.pie(filtered_df, names='loan_status', title="Loan Status Breakdown"))
    st.plotly_chart(px.density_heatmap(filtered_df, x='branch', y='risk_score', title="Risk Heatmap by Branch"))
    st.plotly_chart(px.scatter(filtered_df, x='loan_amount', y='risk_score', color='branch', title="Loan Amount vs Risk Score"))
    st.plotly_chart(px.box(filtered_df, x='loan_size_bucket', y='risk_score', title="Risk by Loan Size Bucket"))

    # ML Predictions & Anomalies
    st.subheader("📈 ML Predictions & Anomalies")
    versions = engine.list_models()
    if not versions:
        st.warning("No models trained yet. Ask Admin to train a baseline model.")
    else:
        selected = st.selectbox("Select model version", versions, index=len(versions)-1)
        if st.button("Run Predictions"):
            X, y, features = engine.prepare_dataset(df_proc)
            preds, proba = engine.predict(df_proc, version_id=selected)
            df_proc['pred_default'] = preds
            df_proc['prob_default'] = proba
            st.write(f"Predicted default rate: {df_proc['pred_default'].mean()*100:.2f}%")
            st.plotly_chart(px.histogram(df_proc, x='prob_default', color='pred_default', title="Default Probability Distribution"))
            # SHAP
            obj = engine.load_model(selected)
            model = obj['model']
            explainer = shap.Explainer(model, X)
            shap_values = explainer(X)
            st.pyplot(shap.summary_plot(shap_values, X, plot_type="bar", show=False))
            # Anomaly Detection
            iso = IsolationForest().fit(X)
            anomalies = iso.predict(X)
            anomaly_df = df_proc[anomalies == -1]
            st.subheader("Potential Anomalies")
            st.dataframe(anomaly_df[['national_id', 'loan_amount', 'prob_default']])
            st.metric("Anomalous Loans", len(anomaly_df))
            st.plotly_chart(px.scatter(anomaly_df, x='loan_amount', y='prob_default', title="Anomalies Scatter"))

    # What-if Analysis
    st.subheader("🔮 What-if Analysis")
    with st.form("whatif_form"):
        loan_adj = st.slider("Loan size adjustment (%)", -50, 50, 0)
        income_adj = st.slider("Income adjustment (%)", -50, 50, 0)
        fraud_adj = st.slider("Fraud multiplier", 0.5, 2.0, 1.0, step=0.1)
        if st.form_submit_button("Run What-if"):
            df_whatif = df_proc.copy()
            df_whatif['loan_amount'] = df_whatif['loan_amount'] * (1 + loan_adj/100)
            df_whatif['income'] = df_whatif['income'] * (1 + income_adj/100)
            df_whatif['simulated_fraud'] = df_whatif['simulated_fraud'] * fraud_adj
            df_whatif = pipeline.add_features(df_whatif)
            st.metric("Adjusted Avg Risk Score", round(df_whatif['risk_score'].mean(),3))
            st.plotly_chart(px.box(pd.DataFrame({'Original': df_proc['risk_score'], 'Adjusted': df_whatif['risk_score']}), title="Risk Comparison"))

    # Export
    st.subheader("⬇️ Export Data")
    export_type = st.selectbox("Export Type", ["Full Dataset", "High-Risk Loans", "Client Lookup", "What-if Results"])
    export_df = df_proc if export_type == "Full Dataset" else df_proc[df_proc['risk_score'] > 0.7] if export_type == "High-Risk Loans" else client_df if 'client_df' in locals() else df_proc if export_type == "Client Lookup" else df_whatif if 'df_whatif' in locals() else df_proc
    buffer = BytesIO()
    export_df.to_csv(buffer, index=False)
    st.download_button("Download CSV", data=buffer.getvalue(), file_name=f"{export_type.lower().replace(' ','_')}.csv", mime="text/csv")

    st.success("Client dashboard ready.")
""")
CLIENT_PANEL.write_text(client_panel_py, encoding="utf-8")

# main app.py with admin panel UI
app_py = textwrap.dedent("""\
import streamlit as st
import pandas as pd
import json
from modules import auth, admin_tools, schema, pipeline
import modules.app.client_panel as client_panel
from modules.ml import engine
from modules.synth import generator
from pathlib import Path
import plotly.express as px

ROOT = Path("/content/LoanIQ").resolve()
SCHEMA_BACKUPS = ROOT / "schema_backups"

st.set_page_config(page_title="LoanIQ", layout="wide")
auth.init_db()
admin_tools.ensure_admin()

if "user" not in st.session_state:
    col1, col2 = st.columns(2)
    with col1:
        st.subheader("Login")
        username = st.text_input("Username")
        password = st.text_input("Password", type="password")
        if st.button("Login"):
            ok, role = auth.authenticate(username, password)
            if ok:
                st.session_state['user'] = username
                st.session_state['role'] = role
                st.success(f"Logged in as {username} ({role})")
            else:
                st.error("Invalid credentials")
    with col2:
        st.subheader("Register (user)")
        r_user = st.text_input("New username")
        r_pass = st.text_input("New password", type="password")
        if st.button("Register"):
            auth.add_user(r_user, r_pass, role="user")
            st.success("User registered.")
    st.stop()

menu_options = ["Client Dashboard"]
if st.session_state["role"] == "admin":
    menu_options.append("Admin Sandbox")
menu = st.sidebar.selectbox("Menu", menu_options)

if menu == "Client Dashboard":
    client_panel.app()
elif menu == "Admin Sandbox":
    st.title("LoanIQ — Admin Sandbox")
    st.info("Logged in as Admin")

    st.subheader("Admin Overview")
    c1, c2, c3 = st.columns(3)
    c1.metric("Total Users", len(auth.list_users()))
    c2.metric("Models Trained", len(engine.list_models()))
    c3.metric("Schema Backups", len(list(SCHEMA_BACKUPS.glob("*.json"))))

    with st.expander("Generate Synthetic Dataset"):
        with st.form("synthetic_form"):
            records = st.slider("Number of Records", 100, 5000, 1000, step=100)
            branches = st.multiselect("Branches", generator.BRANCHES, default=generator.BRANCHES[:3])
            fraud_pct = st.slider("Fraud Percentage", 0.0, 0.3, 0.02, step=0.01)
            default_rate = st.slider("Default Rate", 0.0, 0.3, 0.08, step=0.01)
            seed = st.number_input("Seed (optional)", min_value=0, value=None, step=1)
            if st.form_submit_button("Generate"):
                with st.spinner("Generating..."):
                    result = admin_tools.generate_synthetic(records, branches, fraud_pct, default_rate, seed)
                    st.session_state["synthetic_df"] = result["df"]
                st.json(result["summary"])
                st.dataframe(result["df"].head(10))
                st.download_button("Download Synthetic CSV", result["df"].to_csv(index=False), "synthetic_loans.csv")

    with st.expander("Retrain Model"):
        with st.form("retrain_form"):
            dataset_options = ["Synthetic"] + [str(f) for f in (ROOT / "data" / "uploads").glob("*.[cC][sS][vV]")]
            dataset = st.selectbox("Dataset", dataset_options)
            version_note = st.text_input("Version Note", "admin_retrain")
            params = st.text_area("Model Params (JSON)", '{"max_depth": 5, "learning_rate": 0.1}')
            if st.form_submit_button("Train"):
                df = st.session_state.get("synthetic_df") if dataset == "Synthetic" else pd.read_csv(dataset)
                try:
                    params = json.loads(params)
                except:
                    st.error("Invalid JSON")
                    st.stop()
                with st.spinner("Training..."):
                    result = admin_tools.retrain_model(df, model_params=params, version_note=version_note)
                if result["ok"]:
                    st.success(f"Model {result['result']['version_id']} trained, AUC: {result['result']['auc']:.2f}")
                else:
                    st.error(result["error"])

    with st.expander("Fraud Stress Test"):
        with st.form("stress_form"):
            base_records = st.slider("Base Records", 100, 2000, 500, step=100)
            fraud_steps = st.text_input("Fraud Steps (comma-separated)", "0.01,0.05,0.1,0.2")
            detection_threshold = st.slider("Detection Threshold", 0.0, 1.0, 0.7, step=0.05)
            if st.form_submit_button("Run Test"):
                steps = [float(x.strip()) for x in fraud_steps.split(",")]
                with st.spinner("Running..."):
                    result = admin_tools.fraud_stress_test(base_records=base_records, fraud_steps=steps, detection_threshold=detection_threshold)
                st.dataframe(pd.DataFrame(result))
                st.plotly_chart(px.line(pd.DataFrame(result), x='fraud_pct', y='detection_rate', title="Detection Rate by Fraud %"))

    with st.expander("Schema Management"):
        with st.form("schema_form"):
            new_columns = st.text_area("New Expected Columns (comma-separated)", ",".join(schema.EXPECTED_COLUMNS))
            tag = st.text_input("Schema Tag", time.strftime('%Y%m%d_%H%M%S'))
            if st.form_submit_button("Save Schema"):
                columns = [x.strip() for x in new_columns.split(",")]
                result = admin_tools.inject_schema(columns, tag)
                st.success(f"Saved to {result['backup']}")
        backups = [json.load(open(f)) for f in SCHEMA_BACKUPS.glob("*.json")]
        if backups:
            st.dataframe(pd.DataFrame(backups))

    with st.expander("Impersonate User"):
        with st.form("impersonate_form"):
            username = st.text_input("Username to Impersonate", "demo_user")
            if st.form_submit_button("Impersonate"):
                result = admin_tools.impersonate_user(username)
                st.session_state["impersonated_user"] = result["impersonated_user"]
                st.success(f"Impersonating {result['impersonated_user']}")
                client_panel.app()
""")
APP_PY.write_text(app_py, encoding="utf-8")

# Smoke test
import importlib, sys
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))
try:
    import modules.app.client_panel as client_panel
    importlib.reload(client_panel)
    print("✅ Client panel imported.")
except Exception as e:
    print("❌ Client panel failed:", e)

✅ Client panel imported.


In [16]:
from pyngrok import ngrok
import subprocess
import time

# Kill any existing tunnels
ngrok.kill()

# Set your ngrok auth token
ngrok.set_auth_token("31rYvgklL0EdX9bGLvTXc313efE_2GyDFGPUNAyFgB83bikTF")

# Start Streamlit app
process = subprocess.Popen(
    ["streamlit", "run", "app.py", "--server.port=8501", "--server.headless=true"],
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE
)

# Wait a few seconds for Streamlit to start
time.sleep(5)

# Connect ngrok to Streamlit port
public_url = ngrok.connect(8501)
print("👉 Open your app on:", public_url)

👉 Open your app on: NgrokTunnel: "https://a77eabb13ca9.ngrok-free.app" -> "http://localhost:8501"
