<a href="https://colab.research.google.com/github/JBlizzard-sketch/LoanIQ/blob/main/Copy_of_LoanIQ2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# Colab cell to create, run, and test modules/bootstrap/deps.py
# Run this entire block in Colab to execute all steps

# %%writefile modules/bootstrap/deps.py
# Estimated line count: 50

import os
import subprocess

# List of required free libraries
REQUIRED_LIBS = [
    'streamlit==1.38.0',
    'pandas==2.2.2',
    'numpy==1.26.4',
    'scikit-learn==1.5.1',
    'xgboost==2.1.1',
    'plotly==5.22.0',
    'faker==28.1.0',
    'openpyxl==3.1.5',
    'reportlab==4.2.2',
    'pytest==8.3.2',
    'shap==0.46.0'  # For explainability
]

def install_deps():
    """Install required libraries and create marker file."""
    os.makedirs('data', exist_ok=True)
    marker_path = os.path.join('data', '.deps_ok')

    if not os.path.exists(marker_path):
        for lib in REQUIRED_LIBS:
            try:
                __import__(lib.split('==')[0])
            except ImportError:
                subprocess.check_call(['pip', 'install', lib])
        with open(marker_path, 'w') as f:
            f.write('OK')
        print("Dependencies installed successfully.")
    else:
        print("Dependencies already installed.")

if __name__ == '__main__':
    install_deps()

# Test code (will be written to tests/test_bootstrap.py)
"""
# tests/test_bootstrap.py
import os
import pytest

def test_deps_install():
    from modules.bootstrap import deps
    deps.install_deps()
    marker_path = os.path.join('data', '.deps_ok')
    assert os.path.exists(marker_path), "Marker file not created"
    with open(marker_path, 'r') as f:
        assert f.read() == 'OK', "Marker file content incorrect"
"""

# Colab commands to execute (included in this cell)
"""
# Write the main file
!mkdir -p modules/bootstrap
!echo -e "# modules/bootstrap/deps.py\n$(cat << 'EOF'
import os
import subprocess

REQUIRED_LIBS = [
    'streamlit==1.38.0',
    'pandas==2.2.2',
    'numpy==1.26.4',
    'scikit-learn==1.5.1',
    'xgboost==2.1.1',
    'plotly==5.22.0',
    'faker==28.1.0',
    'openpyxl==3.1.5',
    'reportlab==4.2.2',
    'pytest==8.3.2',
    'shap==0.46.0'
]

def install_deps():
    os.makedirs('data', exist_ok=True)
    marker_path = os.path.join('data', '.deps_ok')

    if not os.path.exists(marker_path):
        for lib in REQUIRED_LIBS:
            try:
                __import__(lib.split('==')[0])
            except ImportError:
                subprocess.check_call(['pip', 'install', lib])
        with open(marker_path, 'w') as f:
            f.write('OK')
        print("Dependencies installed successfully.")
    else:
        print("Dependencies already installed.")

if __name__ == '__main__':
    install_deps()
EOF
)" > modules/bootstrap/deps.py

# Write the test file
!mkdir -p tests
!echo -e "# tests/test_bootstrap.py\nimport os\nimport pytest\n\ndef test_deps_install():\n    from modules.bootstrap import deps\n    deps.install_deps()\n    marker_path = os.path.join('data', '.deps_ok')\n    assert os.path.exists(marker_path), 'Marker file not created'\n    with open(marker_path, 'r') as f:\n        assert f.read() == 'OK', 'Marker file content incorrect'" > tests/test_bootstrap.py

# Run the script
!python modules/bootstrap/deps.py

# Run the test
!pytest tests/test_bootstrap.py -v

# Verify marker file
!ls data
"""

# Expected output:
# Dependencies installed successfully.
# ============================= test session starts =============================
# tests/test_bootstrap.py::test_deps_install PASSED
# =========================== 1 passed in 0.XXs ===========================
# .deps_ok

Dependencies installed successfully.


'\n# Write the main file\n!mkdir -p modules/bootstrap\n!echo -e "# modules/bootstrap/deps.py\n$(cat << \'EOF\'\nimport os\nimport subprocess\n\nREQUIRED_LIBS = [\n    \'streamlit==1.38.0\',\n    \'pandas==2.2.2\',\n    \'numpy==1.26.4\',\n    \'scikit-learn==1.5.1\',\n    \'xgboost==2.1.1\',\n    \'plotly==5.22.0\',\n    \'faker==28.1.0\',\n    \'openpyxl==3.1.5\',\n    \'reportlab==4.2.2\',\n    \'pytest==8.3.2\',\n    \'shap==0.46.0\'\n]\n\ndef install_deps():\n    os.makedirs(\'data\', exist_ok=True)\n    marker_path = os.path.join(\'data\', \'.deps_ok\')\n    \n    if not os.path.exists(marker_path):\n        for lib in REQUIRED_LIBS:\n            try:\n                __import__(lib.split(\'==\')[0])\n            except ImportError:\n                subprocess.check_call([\'pip\', \'install\', lib])\n        with open(marker_path, \'w\') as f:\n            f.write(\'OK\')\n        print("Dependencies installed successfully.")\n    else:\n        print("Dependencies already install

In [None]:
Up# Colab cell to create, run, and test modules/bootstrap/drive_persist.py and modules/bootstrap/tunnel.py
# Run this entire block in Colab to execute all steps

# %%writefile modules/bootstrap/drive_persist.py
# Estimated line count: 80

import os
from google.colab import drive
import hashlib
import pickle
import time

class DrivePersist:
    """Manage Google Drive persistence for Colab with atomic writes and retries."""
    DRIVE_ROOT = "/content/drive/MyDrive/loan_iq"
    MOUNT_PATH = "/content/drive"

    def __init__(self):
        """Mount Drive and ensure root directory."""
        if not os.path.exists(self.MOUNT_PATH):
            drive.mount(self.MOUNT_PATH)
        os.makedirs(self.DRIVE_ROOT, exist_ok=True)

    def persist_path(self, local_path):
        """Get Drive path for a local file."""
        relative_path = os.path.relpath(local_path, start=os.getcwd())
        return os.path.join(self.DRIVE_ROOT, relative_path)

    def save_file(self, local_path, data, max_retries=3):
        """Save data to Drive with atomic writes and retries."""
        drive_path = self.persist_path(local_path)
        os.makedirs(os.path.dirname(drive_path), exist_ok=True)

        for attempt in range(max_retries):
            try:
                temp_path = drive_path + '.tmp'
                with open(temp_path, 'wb') as f:
                    pickle.dump(data, f)
                os.rename(temp_path, drive_path)

                # Compute and save hash
                file_hash = hashlib.md5(str(data).encode()).hexdigest()
                with open(drive_path + '.hash', 'w') as f:
                    f.write(file_hash)
                return True
            except Exception as e:
                print(f"Retry {attempt + 1}/{max_retries} for {drive_path}: {e}")
                time.sleep(1)
        return False

    def load_file(self, local_path):
        """Load data from Drive, verify hash."""
        drive_path = self.persist_path(local_path)
        if not os.path.exists(drive_path):
            return None
        try:
            with open(drive_path, 'rb') as f:
                data = pickle.load(f)
            hash_path = drive_path + '.hash'
            if os.path.exists(hash_path):
                with open(hash_path, 'r') as f:
                    stored_hash = f.read()
                current_hash = hashlib.md5(str(data).encode()).hexdigest()
                if stored_hash != current_hash:
                    print(f"Hash mismatch for {drive_path}")
                    return None
            return data
        except Exception as e:
            print(f"Error loading {drive_path}: {e}")
            return None

if __name__ == '__main__':
    persist = DrivePersist()
    test_data = {'test': 'data'}
    test_path = os.path.join('data', 'test.pkl')
    persist.save_file(test_path, test_data)
    print(f"Saved to {persist.persist_path(test_path)}")
    loaded = persist.load_file(test_path)
    print(f"Loaded: {loaded}")

# %%writefile modules/bootstrap/tunnel.py
# Estimated line count: 60

import os
import subprocess
import time

NGROK_AUTH_TOKEN = "31rYvgklL0EdX9bGLvTXc313efE_2GyDFGPUNAyFgB83bikTF"

def setup_tunnel(port=8501):
    """Set up Ngrok tunnel for Streamlit with hardcoded authtoken."""
    try:
        subprocess.check_call(['pip', 'install', 'pyngrok==7.2.0'])
        from pyngrok import ngrok
        ngrok.set_auth_token(NGROK_AUTH_TOKEN)

        # Terminate existing tunnels
        ngrok.kill()

        # Start new tunnel
        tunnel = ngrok.connect(port, bind_tls=True)
        public_url = tunnel.public_url
        print(f"Streamlit accessible at: {public_url}")
        return public_url
    except Exception as e:
        print(f"Tunnel setup failed: {e}")
        return None

def get_new_tunnel_url():
    """Command to get new Ngrok URL (for README/runbook)."""
    cmd = f"!ngrok http 8501 --authtoken {NGROK_AUTH_TOKEN}"
    print(f"Run this in Colab to get new URL:\n{cmd}")
    return cmd

if __name__ == '__main__':
    setup_tunnel()
    get_new_tunnel_url()

# Test code (will be written to tests/test_bootstrap.py)
"""
# tests/test_bootstrap.py
import os
import pytest
from modules.bootstrap import drive_persist, tunnel

def test_drive_persist():
    persist = drive_persist.DrivePersist()
    test_data = {'test': 'data'}
    test_path = os.path.join('data', 'test.pkl')
    assert persist.save_file(test_path, test_data), "Failed to save to Drive"
    loaded = persist.load_file(test_path)
    assert loaded == test_data, "Loaded data mismatch"
    assert os.path.exists(persist.persist_path(test_path) + '.hash'), "Hash file missing"

def test_tunnel_setup():
    public_url = tunnel.setup_tunnel()
    assert public_url is None or isinstance(public_url, str), "Invalid tunnel URL"
    cmd = tunnel.get_new_tunnel_url()
    assert NGROK_AUTH_TOKEN in cmd, "Ngrok authtoken not in command"
"""

# Colab commands to execute (run this entire cell)
"""
# Create directories
!mkdir -p modules/bootstrap tests data

# Write drive_persist.py
!echo -e "# modules/bootstrap/drive_persist.py\n$(cat << 'EOF'
import os
from google.colab import drive
import hashlib
import pickle
import time

class DrivePersist:
    DRIVE_ROOT = \"/content/drive/MyDrive/loan_iq\"
    MOUNT_PATH = \"/content/drive\"

    def __init__(self):
        if not os.path.exists(self.MOUNT_PATH):
            drive.mount(self.MOUNT_PATH)
        os.makedirs(self.DRIVE_ROOT, exist_ok=True)

    def persist_path(self, local_path):
        relative_path = os.path.relpath(local_path, start=os.getcwd())
        return os.path.join(self.DRIVE_ROOT, relative_path)

    def save_file(self, local_path, data, max_retries=3):
        os.makedirs(os.path.dirname(drive_path), exist_ok=True)
        for attempt in range(max_retries):
            try:
                temp_path = drive_path + '.tmp'
                with open(temp_path, 'wb') as f:
                    pickle.dump(data, f)
                os.rename(temp_path, drive_path)
                file_hash = hashlib.md5(str(data).encode()).hexdigest()
                with open(drive_path + '.hash', 'w') as f:
                    f.write(file_hash)
                return True
            except Exception as e:
                print(f\"Retry {attempt + 1}/{max_retries} for {drive_path}: {e}\")
                time.sleep(1)
        return False

    def load_file(self, local_path):
        drive_path = self.persist_path(local_path)
        if not os.path.exists(drive_path):
            return None
        try:
            with open(drive_path, 'rb') as f:
                data = pickle.load(f)
            hash_path = drive_path + '.hash'
            if os.path.exists(hash_path):
                with open(hash_path, 'r') as f:
                    stored_hash = f.read()
                current_hash = hashlib.md5(str(data).encode()).hexdigest()
                if stored_hash != current_hash:
                    print(f\"Hash mismatch for {drive_path}\")
                    return None
            return data
        except Exception as e:
            print(f\"Error loading {drive_path}: {e}\")
            return None

if __name__ == '__main__':
    persist = DrivePersist()
    test_data = {'test': 'data'}
    test_path = os.path.join('data', 'test.pkl')
    persist.save_file(test_path, test_data)
    print(f\"Saved to {persist.persist_path(test_path)}\")
    loaded = persist.load_file(test_path)
    print(f\"Loaded: {loaded}\")
EOF
)" > modules/bootstrap/drive_persist.py

# Write tunnel.py
!echo -e "# modules/bootstrap/tunnel.py\n$(cat << 'EOF'
import os
import subprocess
import time

NGROK_AUTH_TOKEN = \"31rYvgklL0EdX9bGLvTXc313efE_2GyDFGPUNAyFgB83bikTF\"

def setup_tunnel(port=8501):
    try:
        subprocess.check_call(['pip', 'install', 'pyngrok==7.2.0'])
        from pyngrok import ngrok
        ngrok.set_auth_token(NGROK_AUTH_TOKEN)
        ngrok.kill()
        tunnel = ngrok.connect(port, bind_tls=True)
        public_url = tunnel.public_url
        print(f\"Streamlit accessible at: {public_url}\")
        return public_url
    except Exception as e:
        print(f\"Tunnel setup failed: {e}\")
        return None

def get_new_tunnel_url():
    cmd = f\"!ngrok http 8501 --authtoken {NGROK_AUTH_TOKEN}\"
    print(f\"Run this in Colab to get new URL:\n{cmd}\")
    return cmd

if __name__ == '__main__':
    setup_tunnel()
    get_new_tunnel_url()
EOF
)" > modules/bootstrap/tunnel.py

# Write test file (appending to existing test_bootstrap.py)
!echo -e "# tests/test_bootstrap.py\n$(cat << 'EOF'
import os
import pytest
from modules.bootstrap import drive_persist, tunnel

def test_deps_install():
    from modules.bootstrap import deps
    deps.install_deps()
    marker_path = os.path.join('data', '.deps_ok')
    assert os.path.exists(marker_path), 'Marker file not created'
    with open(marker_path, 'r') as f:
        assert f.read() == 'OK', 'Marker file content incorrect'

def test_drive_persist():
    persist = drive_persist.DrivePersist()
    test_data = {'test': 'data'}
    test_path = os.path.join('data', 'test.pkl')
    assert persist.save_file(test_path, test_data), 'Failed to save to Drive'
    loaded = persist.load_file(test_path)
    assert loaded == test_data, 'Loaded data mismatch'
    assert os.path.exists(persist.persist_path(test_path) + '.hash'), 'Hash file missing'

def test_tunnel_setup():
    public_url = tunnel.setup_tunnel()
    assert public_url is None or isinstance(public_url, str), 'Invalid tunnel URL'
    cmd = tunnel.get_new_tunnel_url()
    assert NGROK_AUTH_TOKEN in cmd, 'Ngrok authtoken not in command'
EOF
)" > tests/test_bootstrap.py

# Run dependencies (ensure environment)
!python modules/bootstrap/deps.py

# Run drive_persist.py (will prompt for Google Drive auth code)
!python modules/bootstrap/drive_persist.py

# Run tunnel.py (may take time to set up Ngrok)
!python modules/bootstrap/tunnel.py

# Run tests
!pytest tests/test_bootstrap.py -v

# Verify files
!ls data
!ls modules/bootstrap
"""

# Expected output:
# Dependencies installed successfully.
# Mounted at /content/drive
# Saved to /content/drive/MyDrive/loan_iq/data/test.pkl
# Loaded: {'test': 'data'}
# Streamlit accessible at: https://<ngrok-url>.ngrok.io
# Run this in Colab to get new URL:
# !ngrok http 8501 --authtoken 31rYvgklL0EdX9bGLvTXc313efE_2GyDFGPUNAyFgB83bikTF
# ============================= test session starts =============================
# tests/test_bootstrap.py::test_deps_install PASSED
# tests/test_bootstrap.py::test_drive_persist PASSED
# tests/test_bootstrap.py::test_tunnel_setup PASSED
# =========================== 3 passed in 0.XXs ===========================
# test.pkl  test.pkl.hash  .deps_ok
# deps.py  drive_persist.py  tunnel.py

Mounted at /content/drive
Saved to /content/drive/MyDrive/loan_iq/data/test.pkl
Loaded: {'test': 'data'}
Streamlit accessible at: https://f7e668fceeb4.ngrok-free.app
Run this in Colab to get new URL:
!ngrok http 8501 --authtoken 31rYvgklL0EdX9bGLvTXc313efE_2GyDFGPUNAyFgB83bikTF


'\n# Create directories\n!mkdir -p modules/bootstrap tests data\n\n# Write drive_persist.py\n!echo -e "# modules/bootstrap/drive_persist.py\n$(cat << \'EOF\'\nimport os\nfrom google.colab import drive\nimport hashlib\nimport pickle\nimport time\n\nclass DrivePersist:\n    DRIVE_ROOT = "/content/drive/MyDrive/loan_iq"\n    MOUNT_PATH = "/content/drive"\n    \n    def __init__(self):\n        if not os.path.exists(self.MOUNT_PATH):\n            drive.mount(self.MOUNT_PATH)\n        os.makedirs(self.DRIVE_ROOT, exist_ok=True)\n    \n    def persist_path(self, local_path):\n        relative_path = os.path.relpath(local_path, start=os.getcwd())\n        return os.path.join(self.DRIVE_ROOT, relative_path)\n    \n    def save_file(self, local_path, data, max_retries=3):\n        os.makedirs(os.path.dirname(drive_path), exist_ok=True)\n        for attempt in range(max_retries):\n            try:\n                temp_path = drive_path + \'.tmp\'\n                with open(temp_path, \'wb\') as

In [None]:
# Colab cell to create, run, and test modules/core/config.py and modules/core/db.py
# Run this entire block in Colab to execute all steps

# Ensure Python path includes current directory for module imports
import sys
import os
sys.path.append(os.getcwd())

# Create directories to prevent path errors
!mkdir -p modules/core tests data models data/reports
!ls modules/core || echo "Directory modules/core created"
!ls data || echo "Directory data created"

# Write config.py
!echo -e "# modules/core/config.py\n# Estimated line count: 60\n\nimport sys\nimport os\nsys.path.append(os.getcwd())\nimport random\nimport numpy as np\n\n# Hardcoded admin credentials\nADMIN_CREDENTIALS = {\n    \"username\": \"admin\",\n    \"password\": \"Shady868\"\n}\n\n# Random seeds for reproducibility\nSEEDS = {\n    \"faker\": 42,\n    \"numpy\": 42,\n    \"random\": 42\n}\n\n# App configuration\nCONFIG = {\n    \"data_dir\": os.path.join(\"data\"),\n    \"model_dir\": os.path.join(\"models\"),\n    \"report_dir\": os.path.join(\"data\", \"reports\"),\n    \"db_path\": os.path.join(\"data\", \"loan_iq.db\"),\n    \"drive_root\": \"/content/drive/MyDrive/loan_iq\",\n    \"streamlit_port\": 8501,\n    \"fraud_types\": [\"ghost_client\", \"duplicate_id\", \"missed_payment\", \"identity_theft\"],\n    \"regions\": [\"urban\", \"rural\", \"semi_urban\"],\n    \"max_clients_batch\": 70000,\n    \"default_batch_size\": 1000\n}\n\ndef init_seeds():\n    \"\"\"Initialize random seeds for reproducibility.\"\"\"\n    random.seed(SEEDS[\"random\"])\n    np.random.seed(SEEDS[\"numpy\"])\n\ndef get_config():\n    \"\"\"Return config dictionary, ensure directories exist.\"\"\"\n    os.makedirs(CONFIG[\"data_dir\"], exist_ok=True)\n    os.makedirs(CONFIG[\"model_dir\"], exist_ok=True)\n    os.makedirs(CONFIG[\"report_dir\"], exist_ok=True)\n    return CONFIG\n\nif __name__ == \"__main__\":\n    init_seeds()\n    config = get_config()\n    print(f\"Config loaded: {config}\")" > modules/core/config.py

# Write db.py
!echo -e "# modules/core/db.py\n# Estimated line count: 120\n\nimport sys\nimport os\nsys.path.append(os.getcwd())\nimport sqlite3\nimport json\nfrom datetime import datetime\ntry:\n    from modules.core import config\nexcept ImportError as e:\n    print(f\"Import error: {e}\")\n    raise\n\nclass DB:\n    \"\"\"SQLite database wrapper for Loan IQ.\"\"\"\n    def __init__(self):\n        print(f\"sys.path: {sys.path}\")  # Debug path\n        self.db_path = config.get_config()[\"db_path\"]\n        os.makedirs(os.path.dirname(self.db_path), exist_ok=True)\n        self.conn = sqlite3.connect(self.db_path)\n        self.cursor = self.conn.cursor()\n        self.create_tables()\n\n    def create_tables(self):\n        \"\"\"Create database tables.\"\"\"\n        tables = [\n            \"CREATE TABLE IF NOT EXISTS users (user_id INTEGER PRIMARY KEY, username TEXT UNIQUE, password TEXT, role TEXT)\",\n            \"CREATE TABLE IF NOT EXISTS clients (client_id TEXT PRIMARY KEY, name TEXT, branch TEXT, region TEXT, income REAL, created_at TIMESTAMP)\",\n            \"CREATE TABLE IF NOT EXISTS loans (loan_id TEXT PRIMARY KEY, client_id TEXT, amount REAL, status TEXT, start_date TIMESTAMP, FOREIGN KEY (client_id) REFERENCES clients(client_id))\",\n            \"CREATE TABLE IF NOT EXISTS transactions (transaction_id TEXT PRIMARY KEY, loan_id TEXT, amount REAL, date TIMESTAMP, type TEXT, FOREIGN KEY (loan_id) REFERENCES loans(loan_id))\",\n            \"CREATE TABLE IF NOT EXISTS models (model_id TEXT PRIMARY KEY, type TEXT, version TEXT, created_at TIMESTAMP)\",\n            \"CREATE TABLE IF NOT EXISTS model_versions (version_id TEXT PRIMARY KEY, model_id TEXT, config_json TEXT, data_hash TEXT, metrics_json TEXT, commit_ref TEXT, comments TEXT, created_at TIMESTAMP, FOREIGN KEY (model_id) REFERENCES models(model_id))\",\n            \"CREATE TABLE IF NOT EXISTS audit_logs (log_id INTEGER PRIMARY KEY AUTOINCREMENT, actor_id TEXT, actor_role TEXT, action TEXT, target_id TEXT, target_type TEXT, reason TEXT, timestamp TIMESTAMP, before_snapshot TEXT, after_snapshot TEXT, reversible BOOLEAN, reversal_id INTEGER)\",\n            \"CREATE TABLE IF NOT EXISTS simulations (sim_id TEXT PRIMARY KEY, user_id TEXT, params_json TEXT, created_at TIMESTAMP)\",\n            \"CREATE TABLE IF NOT EXISTS reports (report_id TEXT PRIMARY KEY, type TEXT, path TEXT, created_at TIMESTAMP)\",\n            \"CREATE TABLE IF NOT EXISTS assets (asset_id TEXT PRIMARY KEY, path TEXT, type TEXT, created_at TIMESTAMP)\"\n        ]\n        for table_sql in tables:\n            self.cursor.execute(table_sql)\n        self.conn.commit()\n\n    def log_action(self, actor_id, actor_role, action, target_id, target_type, reason, before_snapshot, after_snapshot, reversible=False):\n        \"\"\"Log an admin action to audit_logs.\"\"\"\n        timestamp = datetime.utcnow().isoformat()\n        snapshot_before = json.dumps(before_snapshot) if before_snapshot else \"\"\n        snapshot_after = json.dumps(after_snapshot) if after_snapshot else \"\"\n        self.cursor.execute(\n            \"INSERT INTO audit_logs (actor_id, actor_role, action, target_id, target_type, reason, timestamp, before_snapshot, after_snapshot, reversible) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)\",\n            (actor_id, actor_role, action, target_id, target_type, reason, timestamp, snapshot_before, snapshot_after, reversible)\n        )\n        self.conn.commit()\n        return self.cursor.lastrowid\n\n    def get_audit_trail(self, target_id=None, target_type=None):\n        \"\"\"Retrieve audit logs, optionally filtered.\"\"\"\n        query = \"SELECT * FROM audit_logs\"\n        params = []\n        if target_id and target_type:\n            query += \" WHERE target_id = ? AND target_type = ?\"\n            params = [target_id, target_type]\n        self.cursor.execute(query, params)\n        return self.cursor.fetchall()\n\n    def rollback_action(self, action_id):\n        \"\"\"Attempt to rollback an action if reversible.\"\"\"\n        self.cursor.execute(\"SELECT reversible, before_snapshot, target_id, target_type, action FROM audit_logs WHERE log_id = ?\", (action_id,))\n        result = self.cursor.fetchone()\n        if not result or not result[0]:\n            return False\n        before_snapshot = json.loads(result[1]) if result[1] else {}\n        target_id, target_type, action = result[2], result[3], result[4]\n        if target_type == \"user\" and action == \"edit\":\n            self.cursor.execute(\"UPDATE users SET username = ?, password = ?, role = ? WHERE user_id = ?\",\n                              (before_snapshot.get(\"username\"), before_snapshot.get(\"password\"), before_snapshot.get(\"role\"), target_id))\n            self.conn.commit()\n            return True\n        return False\n\n    def close(self):\n        \"\"\"Close database connection.\"\"\"\n        self.conn.close()\n\nif __name__ == \"__main__\":\n    db = DB()\n    db.cursor.execute(\"INSERT OR IGNORE INTO users (user_id, username, password, role) VALUES (?, ?, ?, ?)\",\n                     (1, \"admin\", \"Shady868\", \"admin\"))\n    db.conn.commit()\n    db.log_action(\"1\", \"admin\", \"init\", \"1\", \"user\", \"Initialize admin user\", {}, {\"username\": \"admin\"})\n    print(\"Database initialized.\")\n    db.close()" > modules/core/db.py

# Write test file
!echo -e "# tests/test_core.py\n# Estimated line count: 20\n\nimport sys\nimport os\nsys.path.append(os.getcwd())\nfrom modules.core import config, db\n\ndef test_config_init():\n    cfg = config.get_config()\n    assert os.path.exists(cfg[\"data_dir\"]), \"Data directory not created\"\n    assert cfg[\"streamlit_port\"] == 8501, \"Incorrect port\"\n    assert config.ADMIN_CREDENTIALS[\"username\"] == \"admin\", \"Admin username incorrect\"\n\ndef test_db_create_and_log():\n    database = db.DB()\n    database.cursor.execute(\"SELECT name FROM sqlite_master WHERE type='table' AND name='audit_logs'\")\n    assert database.cursor.fetchone(), \"Audit logs table not created\"\n    log_id = database.log_action(\"1\", \"admin\", \"test_action\", \"test_id\", \"test_type\", \"Test reason\", {\"key\": \"before\"}, {\"key\": \"after\"}, True)\n    assert log_id, \"Failed to log action\"\n    audit_logs = database.get_audit_trail(\"test_id\", \"test_type\")\n    assert len(audit_logs) > 0, \"Audit log not recorded\"\n    database.close()" > tests/test_core.py

# Ensure dependencies are installed
!python modules/bootstrap/deps.py

# Verify directories
!ls modules/core || echo "modules/core not found"
!ls data || echo "data not found"

# Run config.py
!python modules/core/config.py

# Run db.py
!python modules/core/db.py

# Run tests
!pytest tests/test_core.py -v

# Verify files
!ls modules/core
!ls data

# Expected output:
# Dependencies installed successfully.
# modules/core created
# data created
# Config loaded: {'data_dir': 'data', 'model_dir': 'models', 'report_dir': 'data/reports', 'db_path': 'data/loan_iq.db', 'drive_root': '/content/drive/MyDrive/loan_iq', 'streamlit_port': 8501, 'fraud_types': ['ghost_client', 'duplicate_id', 'missed_payment', 'identity_theft'], 'regions': ['urban', 'rural', 'semi_urban'], 'max_clients_batch': 70000, 'default_batch_size': 1000}
# sys.path: [...'/content'...]
# Database initialized.
# ============================= test session starts =============================
# tests/test_core.py::test_config_init PASSED
# tests/test_core.py::test_db_create_and_log PASSED
# =========================== 2 passed in 0.XXs ===========================
# config.py  db.py
# .deps_ok  loan_iq.db  reports

reports
python3: can't open file '/content/modules/bootstrap/deps.py': [Errno 2] No such file or directory
config.py  db.py
reports
Config loaded: {'data_dir': 'data', 'model_dir': 'models', 'report_dir': 'data/reports', 'db_path': 'data/loan_iq.db', 'drive_root': '/content/drive/MyDrive/loan_iq', 'streamlit_port': 8501, 'fraud_types': ['ghost_client', 'duplicate_id', 'missed_payment', 'identity_theft'], 'regions': ['urban', 'rural', 'semi_urban'], 'max_clients_batch': 70000, 'default_batch_size': 1000}
sys.path: ['/content/modules/core', '/env/python', '/usr/lib/python312.zip', '/usr/lib/python3.12', '/usr/lib/python3.12/lib-dynload', '/usr/local/lib/python3.12/dist-packages', '/usr/lib/python3/dist-packages', '/content', '/content']
  timestamp = datetime.utcnow().isoformat()
Database initialized.
platform linux -- Python 3.12.11, pytest-8.4.1, pluggy-1.6.0 -- /usr/bin/python3
cachedir: .pytest_cache
rootdir: /content
plugins: Faker-28.1.0, anyio-4.10.0, typeguard-4.4.4, langsmith-0.

In [None]:
# Colab cell to create, run, and test modules/bootstrap/deps.py, modules/core/config.py, modules/core/db.py, modules/core/utils.py, and modules/core/auth.py
# Run this entire block in Colab to execute all steps

import sys
import os
sys.path.append(os.getcwd())
print(f"Current working directory: {os.getcwd()}")  # Debug

# Create directories and reset database/dependencies
!mkdir -p modules/bootstrap modules/core tests data models data/reports
!rm -f data/loan_iq.db data/.deps_ok
!ls modules/bootstrap || echo "Directory modules/bootstrap created"
!ls modules/core || echo "Directory modules/core created"
!ls data || echo "Directory data created"

# Write deps.py using Python
os.makedirs('modules/bootstrap', exist_ok=True)
with open('modules/bootstrap/deps.py', 'w') as f:
    f.write('''# modules/bootstrap/deps.py
# Estimated line count: 50

import os
import subprocess

REQUIRED_LIBS = [
    'streamlit==1.38.0',
    'pandas==2.2.2',
    'numpy==1.26.4',
    'scikit-learn==1.5.1',
    'xgboost==2.1.1',
    'plotly==5.22.0',
    'faker==28.1.0',
    'openpyxl==3.1.5',
    'reportlab==4.2.2',
    'pytest==8.3.2',
    'shap==0.46.0'
]

def install_deps():
    """Install required libraries and create marker file."""
    os.makedirs('data', exist_ok=True)
    marker_path = os.path.join('data', '.deps_ok')
    if not os.path.exists(marker_path):
        for lib in REQUIRED_LIBS:
            try:
                __import__(lib.split('==')[0])
            except ImportError:
                subprocess.check_call(['pip', 'install', lib])
        with open(marker_path, 'w') as f:
            f.write('OK')
        print("Dependencies installed successfully.")
    else:
        print("Dependencies already installed.")

if __name__ == '__main__':
    install_deps()
''')
!test -f modules/bootstrap/deps.py && echo "deps.py created" || echo "Failed to create deps.py"

# Write config.py using Python
os.makedirs('modules/core', exist_ok=True)
with open('modules/core/config.py', 'w') as f:
    f.write('''# modules/core/config.py
# Estimated line count: 60

import sys
import os
sys.path.append(os.getcwd())
import random
import numpy as np

ADMIN_CREDENTIALS = {
    "username": "admin",
    "password": "Shady868"
}

SEEDS = {
    "faker": 42,
    "numpy": 42,
    "random": 42
}

CONFIG = {
    "data_dir": os.path.join("data"),
    "model_dir": os.path.join("models"),
    "report_dir": os.path.join("data", "reports"),
    "db_path": os.path.join("data", "loan_iq.db"),
    "drive_root": "/content/drive/MyDrive/loan_iq",
    "streamlit_port": 8501,
    "fraud_types": ["ghost_client", "duplicate_id", "missed_payment", "identity_theft"],
    "regions": ["urban", "rural", "semi_urban"],
    "max_clients_batch": 70000,
    "default_batch_size": 1000
}

def init_seeds():
    """Initialize random seeds for reproducibility."""
    random.seed(SEEDS["random"])
    np.random.seed(SEEDS["numpy"])

def get_config():
    """Return config dictionary, ensure directories exist."""
    os.makedirs(CONFIG["data_dir"], exist_ok=True)
    os.makedirs(CONFIG["model_dir"], exist_ok=True)
    os.makedirs(CONFIG["report_dir"], exist_ok=True)
    return CONFIG

if __name__ == "__main__":
    init_seeds()
    config = get_config()
    print(f"Config loaded: {config}")
''')
!test -f modules/core/config.py && echo "config.py created" || echo "Failed to create config.py"

# Write db.py using Python
with open('modules/core/db.py', 'w') as f:
    f.write('''# modules/core/db.py
# Estimated line count: 120

import sys
import os
sys.path.append(os.getcwd())
import sqlite3
import json
from datetime import datetime, UTC
try:
    from modules.core import config
except ImportError as e:
    print(f"Import error: {e}")
    raise

class DB:
    """SQLite database wrapper for Loan IQ."""
    def __init__(self):
        print(f"sys.path: {sys.path}")  # Debug path
        self.db_path = config.get_config()["db_path"]
        os.makedirs(os.path.dirname(self.db_path), exist_ok=True)
        print(f"Creating database at: {self.db_path}")  # Debug
        self.conn = sqlite3.connect(self.db_path)
        self.conn.row_factory = sqlite3.Row  # Enable dict-like row access
        self.cursor = self.conn.cursor()
        self.create_tables()
        print(f"Database created: {os.path.exists(self.db_path)}")  # Debug

    def create_tables(self):
        """Create database tables."""
        tables = [
            "CREATE TABLE IF NOT EXISTS users (user_id TEXT PRIMARY KEY, username TEXT UNIQUE, password TEXT, role TEXT)",
            "CREATE TABLE IF NOT EXISTS clients (client_id TEXT PRIMARY KEY, name TEXT, branch TEXT, region TEXT, income REAL, created_at TIMESTAMP)",
            "CREATE TABLE IF NOT EXISTS loans (loan_id TEXT PRIMARY KEY, client_id TEXT, amount REAL, status TEXT, start_date TIMESTAMP, FOREIGN KEY (client_id) REFERENCES clients(client_id))",
            "CREATE TABLE IF NOT EXISTS transactions (transaction_id TEXT PRIMARY KEY, loan_id TEXT, amount REAL, date TIMESTAMP, type TEXT, FOREIGN KEY (loan_id) REFERENCES loans(loan_id))",
            "CREATE TABLE IF NOT EXISTS models (model_id TEXT PRIMARY KEY, type TEXT, version TEXT, created_at TIMESTAMP)",
            "CREATE TABLE IF NOT EXISTS model_versions (version_id TEXT PRIMARY KEY, model_id TEXT, config_json TEXT, data_hash TEXT, metrics_json TEXT, commit_ref TEXT, comments TEXT, created_at TIMESTAMP, FOREIGN KEY (model_id) REFERENCES models(model_id))",
            "CREATE TABLE IF NOT EXISTS audit_logs (log_id INTEGER PRIMARY KEY AUTOINCREMENT, actor_id TEXT, actor_role TEXT, action TEXT, target_id TEXT, target_type TEXT, reason TEXT, timestamp TIMESTAMP, before_snapshot TEXT, after_snapshot TEXT, reversible BOOLEAN, reversal_id INTEGER)",
            "CREATE TABLE IF NOT EXISTS simulations (sim_id TEXT PRIMARY KEY, user_id TEXT, params_json TEXT, created_at TIMESTAMP)",
            "CREATE TABLE IF NOT EXISTS reports (report_id TEXT PRIMARY KEY, type TEXT, path TEXT, created_at TIMESTAMP)",
            "CREATE TABLE IF NOT EXISTS assets (asset_id TEXT PRIMARY KEY, path TEXT, type TEXT, created_at TIMESTAMP)"
        ]
        for table_sql in tables:
            self.cursor.execute(table_sql)
        self.conn.commit()

    def log_action(self, actor_id, actor_role, action, target_id, target_type, reason, before_snapshot, after_snapshot, reversible=False):
        """Log an admin action to audit_logs."""
        timestamp = datetime.now(UTC).isoformat()
        snapshot_before = json.dumps(before_snapshot) if before_snapshot else ""
        snapshot_after = json.dumps(after_snapshot) if after_snapshot else ""
        self.cursor.execute(
            "INSERT INTO audit_logs (actor_id, actor_role, action, target_id, target_type, reason, timestamp, before_snapshot, after_snapshot, reversible) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
            (actor_id, actor_role, action, target_id, target_type, reason, timestamp, snapshot_before, snapshot_after, reversible)
        )
        self.conn.commit()
        return self.cursor.lastrowid

    def get_audit_trail(self, target_id=None, target_type=None):
        """Retrieve audit logs, optionally filtered."""
        query = "SELECT * FROM audit_logs"
        params = []
        if target_id and target_type:
            query += " WHERE target_id = ? AND target_type = ?"
            params = [target_id, target_type]
        self.cursor.execute(query, params)
        return self.cursor.fetchall()

    def rollback_action(self, action_id):
        """Attempt to rollback an action if reversible."""
        self.cursor.execute("SELECT reversible, before_snapshot, target_id, target_type, action FROM audit_logs WHERE log_id = ?", (action_id,))
        result = self.cursor.fetchone()
        if not result or not result[0]:
            return False
        before_snapshot = json.loads(result[1]) if result[1] else {}
        target_id, target_type, action = result[2], result[3], result[4]
        if target_type == "user" and action == "edit":
            self.cursor.execute("UPDATE users SET username = ?, password = ?, role = ? WHERE user_id = ?",
                              (before_snapshot.get("username"), before_snapshot.get("password"), before_snapshot.get("role"), target_id))
            self.conn.commit()
            return True
        return False

    def close(self):
        """Close database connection."""
        self.conn.close()

if __name__ == "__main__":
    db = DB()
    db.cursor.execute("INSERT OR IGNORE INTO users (user_id, username, password, role) VALUES (?, ?, ?, ?)",
                     ("1", "admin", "Shady868", "admin"))
    db.conn.commit()
    db.log_action("1", "admin", "init", "1", "user", "Initialize admin user", {}, {"username": "admin"})
    print("Database initialized.")
    db.close()
''')
!test -f modules/core/db.py && echo "db.py created" || echo "Failed to create db.py"

# Write utils.py using Python
with open('modules/core/utils.py', 'w') as f:
    f.write('''# modules/core/utils.py
# Estimated line count: 80

import sys
import os
sys.path.append(os.getcwd())
import json
from functools import wraps
try:
    from modules.core import db, config
except ImportError as e:
    print(f"Import error: {e}")
    raise

def audit_wrapper(func):
    """Decorator to log admin actions with snapshots and reason."""
    @wraps(func)
    def wrapper(*args, actor_id, actor_role, reason, **kwargs):
        if not reason:
            raise ValueError("Reason is required for audited actions")
        database = db.DB()
        target_id = kwargs.get('target_id', args[0] if args else 'unknown')
        target_type = kwargs.get('target_type', func.__name__)
        before_snapshot = {}
        try:
            if target_type in ['user', 'edit_user', 'delete_user', 'add_user']:
                database.cursor.execute("SELECT * FROM users WHERE user_id = ?", (target_id,))
                row = database.cursor.fetchone()
                before_snapshot = dict(row) if row else {}
                print(f"Before snapshot: {before_snapshot}")  # Debug
            filtered_kwargs = {k: v for k, v in kwargs.items() if k != 'target_type'}
            result = func(*args, actor_id=actor_id, actor_role=actor_role, reason=reason, **filtered_kwargs)
            after_snapshot = {}
            if target_type in ['user', 'edit_user', 'delete_user', 'add_user']:
                database.cursor.execute("SELECT * FROM users WHERE user_id = ?", (target_id,))
                row = database.cursor.fetchone()
                after_snapshot = dict(row) if row else {}
                print(f"After snapshot: {after_snapshot}")  # Debug
            reversible = target_type in ['user', 'edit_user', 'add_user']
            log_id = database.log_action(
                actor_id, actor_role, func.__name__, target_id, target_type, reason,
                before_snapshot, after_snapshot, reversible
            )
            database.close()
            return result
        except Exception as e:
            database.close()
            raise Exception(f"Action failed: {e}")
    return wrapper

def dict_diff(before, after):
    """Compute difference between two dictionaries for audit logging."""
    diff = {}
    for key in set(before.keys()) | set(after.keys()):
        if before.get(key) != after.get(key):
            diff[key] = {'before': before.get(key), 'after': after.get(key)}
    return diff

if __name__ == "__main__":
    @audit_wrapper
    def test_action(target_id, actor_id, actor_role, reason):
        return {"result": "test"}
    result = test_action("test_id", actor_id="1", actor_role="admin", reason="Test audit")
    print(f"Test action result: {result}")
''')
!test -f modules/core/utils.py && echo "utils.py created" || echo "Failed to create utils.py"

# Write auth.py using Python
with open('modules/core/auth.py', 'w') as f:
    f.write('''# modules/core/auth.py
# Estimated line count: 80

import sys
import os
sys.path.append(os.getcwd())
import sqlite3
try:
    from modules.core import config, db, utils
except ImportError as e:
    print(f"Import error: {e}")
    raise

def authenticate(username, password):
    """Authenticate user against stored credentials."""
    cfg = config.get_config()
    if username == config.ADMIN_CREDENTIALS["username"] and password == config.ADMIN_CREDENTIALS["password"]:
        return {"user_id": "1", "role": "admin"}
    database = db.DB()
    database.cursor.execute("SELECT user_id, role FROM users WHERE username = ? AND password = ?", (username, password))
    user = database.cursor.fetchone()
    database.close()
    if user:
        return {"user_id": user[0], "role": user[1]}
    return None

@utils.audit_wrapper
def add_user(username, password, role, actor_id, actor_role, reason, target_id=None):
    """Add a new user with audit logging."""
    target_id = target_id or f"u_{str(hash(username))[:8]}"
    print(f"Adding user with target_id: {target_id}")  # Debug
    database = db.DB()
    try:
        database.cursor.execute("INSERT INTO users (user_id, username, password, role) VALUES (?, ?, ?, ?)",
                              (target_id, username, password, role))
        database.conn.commit()
        database.close()
        return target_id
    except sqlite3.IntegrityError as e:
        database.close()
        raise ValueError(f"Failed to add user {username}: {e}")

@utils.audit_wrapper
def edit_user(user_id, updates, actor_id, actor_role, reason, target_id=None):
    """Edit user details with audit logging."""
    target_id = target_id or user_id
    database = db.DB()
    allowed_fields = ['username', 'password', 'role']
    updates = {k: v for k, v in updates.items() if k in allowed_fields}
    if not updates:
        database.close()
        raise ValueError("No valid fields to update")
    set_clause = ", ".join(f"{k} = ?" for k in updates.keys())
    values = list(updates.values()) + [user_id]
    try:
        database.cursor.execute(f"UPDATE users SET {set_clause} WHERE user_id = ?", values)
        database.conn.commit()
        database.close()
        return True
    except sqlite3.IntegrityError as e:
        database.close()
        raise ValueError(f"Failed to edit user {user_id}: {e}")

@utils.audit_wrapper
def delete_user(user_id, actor_id, actor_role, reason, target_id=None, confirmation=None):
    """Delete user with audit logging and confirmation."""
    target_id = target_id or user_id
    if confirmation != f"CONFIRM DELETE {user_id}":
        raise ValueError("Invalid confirmation for deletion")
    database = db.DB()
    try:
        database.cursor.execute("DELETE FROM users WHERE user_id = ?", (user_id,))
        database.conn.commit()
        database.close()
        return True
    except sqlite3.Error as e:
        database.close()
        raise ValueError(f"Failed to delete user {user_id}: {e}")

if __name__ == "__main__":
    user = authenticate("admin", "Shady868")
    print(f"Auth result: {user}")
    try:
        new_user_id = add_user("test_user", "test_pass", "user", actor_id="1", actor_role="admin", reason="Test add user", target_id="test_1")
        print(f"Added user: {new_user_id}")
    except Exception as e:
        print(f"Error adding user: {e}")
''')
!test -f modules/core/auth.py && echo "auth.py created" || echo "Failed to create auth.py"

# Write test_core.py using Python
os.makedirs('tests', exist_ok=True)
with open('tests/test_core.py', 'w') as f:
    f.write('''# tests/test_core.py
# Estimated line count: 60

import sys
import os
import sqlite3
sys.path.append(os.getcwd())
try:
    from modules.core import config, db, utils, auth
except ImportError as e:
    print(f"Import error: {e}")
    raise

def test_config_init():
    cfg = config.get_config()
    assert os.path.exists(cfg["data_dir"]), "Data directory not created"
    assert cfg["streamlit_port"] == 8501, "Incorrect port"
    assert config.ADMIN_CREDENTIALS["username"] == "admin", "Admin username incorrect"

def test_db_create_and_log():
    database = db.DB()
    database.cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='audit_logs'")
    assert database.cursor.fetchone(), "Audit logs table not created"
    log_id = database.log_action("1", "admin", "test_action", "test_id", "test_type", "Test reason", {"key": "before"}, {"key": "after"}, True)
    assert log_id, "Failed to log action"
    audit_logs = database.get_audit_trail("test_id", "test_type")
    assert len(audit_logs) > 0, "Audit log not recorded"
    database.close()

def test_authenticate():
    user = auth.authenticate("admin", "Shady868")
    assert user == {"user_id": "1", "role": "admin"}, "Admin authentication failed"
    user = auth.authenticate("wrong", "wrong")
    assert user is None, "Invalid credentials should fail"

def test_audit_wrapper():
    @utils.audit_wrapper
    def test_action(target_id, actor_id, actor_role, reason):
        return True
    result = test_action("test_id", actor_id="1", actor_role="admin", reason="Test audit")
    assert result, "Audit wrapper failed"
    database = db.DB()
    logs = database.get_audit_trail("test_id", "test_action")
    assert len(logs) > 0, "Audit log not recorded"
    database.close()

def test_add_user():
    database = db.DB()
    database.cursor.execute("DELETE FROM users WHERE user_id = ?", ("test_2",))
    database.conn.commit()
    database.close()
    user_id = auth.add_user("test_user2", "test_pass2", "user", actor_id="1", actor_role="admin", reason="Test add user", target_id="test_2")
    assert user_id == "test_2", "Failed to add user"
    database = db.DB()
    database.cursor.execute("SELECT username FROM users WHERE user_id = ?", ("test_2",))
    result = database.cursor.fetchone()
    assert result and result[0] == "test_user2", "User not added correctly"
    database.close()

def test_edit_user():
    database = db.DB()
    database.cursor.execute("DELETE FROM users WHERE user_id = ?", ("test_2",))
    database.cursor.execute("INSERT INTO users (user_id, username, password, role) VALUES (?, ?, ?, ?)",
                          ("test_2", "test_user2", "test_pass2", "user"))
    database.conn.commit()
    database.close()
    result = auth.edit_user("test_2", {"password": "new_pass"}, actor_id="1", actor_role="admin", reason="Test edit user", target_id="test_2")
    assert result, "Failed to edit user"
    database = db.DB()
    database.cursor.execute("SELECT password FROM users WHERE user_id = ?", ("test_2",))
    result = database.cursor.fetchone()
    assert result and result[0] == "new_pass", "User not edited correctly"
    database.close()

def test_db_existence():
    assert os.path.exists("data/loan_iq.db"), "Database file not created"
''')
!test -f tests/test_core.py && echo "test_core.py created" || echo "Failed to create test_core.py"

# Ensure dependencies are installed
!python modules/bootstrap/deps.py

# Verify directories
!ls modules/bootstrap || echo "modules/bootstrap not found"
!ls modules/core || echo "modules/core not found"
!ls data || echo "data not found"

# Run config.py
!python modules/core/config.py

# Run db.py
!python modules/core/db.py

# Run utils.py
!python modules/core/utils.py

# Run auth.py
!python modules/core/auth.py

# Run tests
!pytest tests/test_core.py -v

# Verify files
!ls modules/bootstrap
!ls modules/core
!ls data

# Expected output:
# Current working directory: /content
# Directory modules/bootstrap created
# Directory modules/core created
# Directory data created
# deps.py created
# config.py created
# db.py created
# utils.py created
# auth.py created
# test_core.py created
# Dependencies installed successfully.
# deps.py
# auth.py  config.py  db.py  utils.py
# .deps_ok  loan_iq.db  reports
# Config loaded: {'data_dir': 'data', 'model_dir': 'models', 'report_dir': 'data/reports', 'db_path': 'data/loan_iq.db', 'drive_root': '/content/drive/MyDrive/loan_iq', 'streamlit_port': 8501, 'fraud_types': ['ghost_client', 'duplicate_

Current working directory: /content
deps.py
auth.py  config.py  db.py  __pycache__	utils.py
reports
deps.py created
config.py created
db.py created
utils.py created
auth.py created
test_core.py created
Dependencies installed successfully.
deps.py
auth.py  config.py  db.py  __pycache__	utils.py
reports
Config loaded: {'data_dir': 'data', 'model_dir': 'models', 'report_dir': 'data/reports', 'db_path': 'data/loan_iq.db', 'drive_root': '/content/drive/MyDrive/loan_iq', 'streamlit_port': 8501, 'fraud_types': ['ghost_client', 'duplicate_id', 'missed_payment', 'identity_theft'], 'regions': ['urban', 'rural', 'semi_urban'], 'max_clients_batch': 70000, 'default_batch_size': 1000}
sys.path: ['/content/modules/core', '/env/python', '/usr/lib/python312.zip', '/usr/lib/python3.12', '/usr/lib/python3.12/lib-dynload', '/usr/local/lib/python3.12/dist-packages', '/usr/lib/python3/dist-packages', '/content', '/content']
Creating database at: data/loan_iq.db
Database created: True
Database initialized.
s

In [None]:
# Colab cell to create, run, and test modules/synth/faker_engine.py and modules/synth/generators.py
# Run this entire block in Colab to execute all steps

import sys
import os
sys.path.append(os.getcwd())
print(f"Current working directory: {os.getcwd()}")  # Debug

# Create directories and reset database to prevent schema conflicts
!mkdir -p modules/synth tests data models data/reports
!rm -f data/loan_iq.db
!ls modules/synth || echo "Directory modules/synth created"
!ls tests || echo "Directory tests created"
!ls data || echo "Directory data created"

# Write faker_engine.py using Python
os.makedirs('modules/synth', exist_ok=True)
with open('modules/synth/faker_engine.py', 'w') as f:
    f.write('''# modules/synth/faker_engine.py
# Estimated line count: 300

import sys
import os
sys.path.append(os.getcwd())
from faker import Faker
import random
from datetime import datetime, timedelta
try:
    from modules.core import config
except ImportError as e:
    print(f"Import error: {e}")
    raise

class LoanIQFaker:
    """Custom Faker for generating Loan IQ synthetic data with fraud patterns."""
    def __init__(self):
        self.faker = Faker()
        Faker.seed(config.SEEDS["faker"])
        random.seed(config.SEEDS["random"])
        self.config = config.get_config()
        self.fraud_types = self.config["fraud_types"]
        self.regions = self.config["regions"]

    def client_id(self):
        """Generate unique client ID."""
        return f"C_{self.faker.uuid4().split('-')[0]}"

    def loan_id(self):
        """Generate unique loan ID."""
        return f"L_{self.faker.uuid4().split('-')[0]}"

    def transaction_id(self):
        """Generate unique transaction ID."""
        return f"T_{self.faker.uuid4().split('-')[0]}"

    def client_name(self, fraud_type=None):
        """Generate client name, with ghost client fraud option."""
        if fraud_type == "ghost_client" and random.random() < 0.1:
            return None  # Ghost client has no name
        return self.faker.name()

    def duplicate_id(self, existing_ids):
        """Generate client ID with chance of duplication for fraud."""
        if random.random() < 0.05:  # 5% chance of duplicate ID
            return random.choice(existing_ids) if existing_ids else self.client_id()
        return self.client_id()

    def income(self, fraud_type=None):
        """Generate income, with variance for fraud."""
        if fraud_type == "identity_theft" and random.random() < 0.1:
            return random.uniform(100000, 1000000)  # Suspiciously high income
        return random.uniform(20000, 100000)

    def branch(self):
        """Generate branch name."""
        return self.faker.city()

    def region(self):
        """Generate region from config."""
        return random.choice(self.regions)

    def loan_amount(self, fraud_type=None):
        """Generate loan amount, with variance for fraud."""
        if fraud_type == "missed_payment" and random.random() < 0.2:
            return random.uniform(50000, 200000)  # Higher loan for missed payments
        return random.uniform(1000, 50000)

    def loan_status(self, fraud_type=None):
        """Generate loan status, with fraud influence."""
        statuses = ["active", "paid", "default"]
        if fraud_type == "missed_payment" and random.random() < 0.3:
            return "default"
        return random.choice(statuses)

    def transaction_amount(self, loan_amount):
        """Generate transaction amount based on loan."""
        return random.uniform(100, min(loan_amount * 0.1, 5000))

    def transaction_type(self, fraud_type=None):
        """Generate transaction type, with fraud influence."""
        types = ["payment", "fee", "interest"]
        if fraud_type == "identity_theft" and random.random() < 0.1:
            return "suspicious_transfer"
        return random.choice(types)

    def random_date(self, start_days=-365, end_days=0):
        """Generate random date within range."""
        start = datetime.now() + timedelta(days=start_days)
        end = datetime.now() + timedelta(days=end_days)
        return self.faker.date_time_between(start, end).isoformat()

if __name__ == "__main__":
    faker = LoanIQFaker()
    print(f"Client ID: {faker.client_id()}")
    print(f"Client Name: {faker.client_name()}")
    print(f"Loan ID: {faker.loan_id()}")
    print(f"Transaction ID: {faker.transaction_id()}")
    print(f"Income: {faker.income()}")
    print(f"Branch: {faker.branch()}")
    print(f"Region: {faker.region()}")
    print(f"Loan Amount: {faker.loan_amount()}")
    print(f"Loan Status: {faker.loan_status()}")
    print(f"Transaction Amount: {faker.transaction_amount(10000)}")
    print(f"Transaction Type: {faker.transaction_type()}")
    print(f"Random Date: {faker.random_date()}")
''')
!test -f modules/synth/faker_engine.py && echo "faker_engine.py created" || echo "Failed to create faker_engine.py"

# Write generators.py using Python
with open('modules/synth/generators.py', 'w') as f:
    f.write('''# modules/synth/generators.py
# Estimated line count: 250

import sys
import os
import random
sys.path.append(os.getcwd())
import pandas as pd
import numpy as np
from datetime import datetime
try:
    from modules.core import db, config
    from modules.synth import faker_engine
except ImportError as e:
    print(f"Import error: {e}")
    raise

class DataGenerator:
    """Generate synthetic data for Loan IQ and store in database."""
    def __init__(self):
        self.faker = faker_engine.LoanIQFaker()
        self.config = config.get_config()
        self.db_path = self.config["db_path"]

    def generate_clients(self, n, fraud_ratio=0.1):
        """Generate n clients with optional fraud patterns."""
        clients = []
        existing_ids = []
        for _ in range(n):
            fraud_type = random.choices(
                self.faker.fraud_types + [None],
                weights=[fraud_ratio / len(self.faker.fraud_types)] * len(self.faker.fraud_types) + [1 - fraud_ratio],
                k=1
            )[0]
            client_id = self.faker.duplicate_id(existing_ids) if fraud_type == "duplicate_id" else self.faker.client_id()
            existing_ids.append(client_id)
            clients.append({
                "client_id": client_id,
                "name": self.faker.client_name(fraud_type),
                "branch": self.faker.branch(),
                "region": self.faker.region(),
                "income": self.faker.income(fraud_type),
                "created_at": self.faker.random_date()
            })
        return pd.DataFrame(clients)

    def generate_loans(self, clients, n_per_client=2, fraud_ratio=0.1):
        """Generate loans for given clients."""
        loans = []
        for client_id in clients["client_id"]:
            fraud_type = random.choices(
                self.faker.fraud_types + [None],
                weights=[fraud_ratio / len(self.faker.fraud_types)] * len(self.faker.fraud_types) + [1 - fraud_ratio],
                k=1
            )[0]
            for _ in range(random.randint(1, n_per_client)):
                loans.append({
                    "loan_id": self.faker.loan_id(),
                    "client_id": client_id,
                    "amount": self.faker.loan_amount(fraud_type),
                    "status": self.faker.loan_status(fraud_type),
                    "start_date": self.faker.random_date()
                })
        return pd.DataFrame(loans)

    def generate_transactions(self, loans, n_per_loan=3, fraud_ratio=0.1):
        """Generate transactions for given loans."""
        transactions = []
        for loan_id, loan_amount in zip(loans["loan_id"], loans["amount"]):
            fraud_type = random.choices(
                self.faker.fraud_types + [None],
                weights=[fraud_ratio / len(self.faker.fraud_types)] * len(self.faker.fraud_types) + [1 - fraud_ratio],
                k=1
            )[0]
            for _ in range(random.randint(1, n_per_loan)):
                transactions.append({
                    "transaction_id": self.faker.transaction_id(),
                    "loan_id": loan_id,
                    "amount": self.faker.transaction_amount(loan_amount),
                    "date": self.faker.random_date(),
                    "type": self.faker.transaction_type(fraud_type)
                })
        return pd.DataFrame(transactions)

    def save_to_db(self, clients, loans, transactions, actor_id="1", actor_role="admin", reason="Synthetic data generation"):
        """Save generated data to loan_iq.db with audit logging."""
        database = db.DB()
        print(f"Saving to database: {self.db_path}")  # Debug
        try:
            # Save clients
            for _, row in clients.iterrows():
                database.cursor.execute(
                    "INSERT OR IGNORE INTO clients (client_id, name, branch, region, income, created_at) VALUES (?, ?, ?, ?, ?, ?)",
                    (row["client_id"], row["name"], row["branch"], row["region"], row["income"], row["created_at"])
                )
            # Save loans
            for _, row in loans.iterrows():
                database.cursor.execute(
                    "INSERT OR IGNORE INTO loans (loan_id, client_id, amount, status, start_date) VALUES (?, ?, ?, ?, ?)",
                    (row["loan_id"], row["client_id"], row["amount"], row["status"], row["start_date"])
                )
            # Save transactions
            for _, row in transactions.iterrows():
                database.cursor.execute(
                    "INSERT OR IGNORE INTO transactions (transaction_id, loan_id, amount, date, type) VALUES (?, ?, ?, ?, ?)",
                    (row["transaction_id"], row["loan_id"], row["amount"], row["date"], row["type"])
                )
            database.conn.commit()
            database.log_action(
                actor_id, actor_role, "generate_data", "multiple", "synthetic_data", reason,
                {}, {"clients": len(clients), "loans": len(loans), "transactions": len(transactions)}
            )
            print(f"Saved {len(clients)} clients, {len(loans)} loans, {len(transactions)} transactions to DB")
        finally:
            database.close()

    def export_to_csv(self, clients, loans, transactions, output_dir=None):
        """Export data to CSV files."""
        output_dir = output_dir or self.config["data_dir"]
        os.makedirs(output_dir, exist_ok=True)
        clients.to_csv(os.path.join(output_dir, "clients.csv"), index=False)
        loans.to_csv(os.path.join(output_dir, "loans.csv"), index=False)
        transactions.to_csv(os.path.join(output_dir, "transactions.csv"), index=False)
        print(f"Exported data to {output_dir}/[clients,loans,transactions].csv")

if __name__ == "__main__":
    generator = DataGenerator()
    clients = generator.generate_clients(10, fraud_ratio=0.2)
    loans = generator.generate_loans(clients, n_per_client=2, fraud_ratio=0.2)
    transactions = generator.generate_transactions(loans, n_per_loan=3, fraud_ratio=0.2)
    generator.save_to_db(clients, loans, transactions)
    generator.export_to_csv(clients, loans, transactions)
    print("Generated and saved synthetic data.")
''')
!test -f modules/synth/generators.py && echo "generators.py created" || echo "Failed to create generators.py"

# Write test_synth.py using Python
with open('tests/test_synth.py', 'w') as f:
    f.write('''# tests/test_synth.py
# Estimated line count: 80

import sys
import os
import sqlite3
import pandas as pd
sys.path.append(os.getcwd())
try:
    from modules.core import config, db
    from modules.synth import faker_engine, generators
except ImportError as e:
    print(f"Import error: {e}")
    raise

def test_faker_engine():
    faker = faker_engine.LoanIQFaker()
    assert len(faker.client_id()) > 0, "Client ID not generated"
    assert faker.region() in config.get_config()["regions"], "Invalid region"
    assert isinstance(faker.income(), float), "Income not float"
    assert isinstance(faker.loan_amount(), float), "Loan amount not float"
    assert faker.loan_status() in ["active", "paid", "default"], "Invalid loan status"

def test_generate_clients():
    generator = generators.DataGenerator()
    clients = generator.generate_clients(5, fraud_ratio=0.2)
    assert len(clients) == 5, "Incorrect number of clients"
    assert set(clients.columns) == {"client_id", "name", "branch", "region", "income", "created_at"}, "Incorrect client columns"
    assert clients["region"].isin(config.get_config()["regions"]).all(), "Invalid regions"

def test_generate_loans():
    generator = generators.DataGenerator()
    clients = generator.generate_clients(3, fraud_ratio=0.2)
    loans = generator.generate_loans(clients, n_per_client=2, fraud_ratio=0.2)
    assert len(loans) >= 3, "Incorrect number of loans"
    assert set(loans.columns) == {"loan_id", "client_id", "amount", "status", "start_date"}, "Incorrect loan columns"
    assert loans["client_id"].isin(clients["client_id"]).all(), "Invalid client IDs in loans"

def test_generate_transactions():
    generator = generators.DataGenerator()
    clients = generator.generate_clients(2, fraud_ratio=0.2)
    loans = generator.generate_loans(clients, n_per_client=2, fraud_ratio=0.2)
    transactions = generator.generate_transactions(loans, n_per_loan=3, fraud_ratio=0.2)
    assert len(transactions) >= 2, "Incorrect number of transactions"
    assert set(transactions.columns) == {"transaction_id", "loan_id", "amount", "date", "type"}, "Incorrect transaction columns"
    assert transactions["loan_id"].isin(loans["loan_id"]).all(), "Invalid loan IDs in transactions"

def test_save_to_db():
    generator = generators.DataGenerator()
    database = db.DB()
    print(f"Clearing tables for test")  # Debug
    database.cursor.execute("DELETE FROM clients")
    database.cursor.execute("DELETE FROM loans")
    database.cursor.execute("DELETE FROM transactions")
    database.conn.commit()
    database.close()
    clients = generator.generate_clients(5, fraud_ratio=0.2)
    loans = generator.generate_loans(clients, n_per_client=2, fraud_ratio=0.2)
    transactions = generator.generate_transactions(loans, n_per_loan=3, fraud_ratio=0.2)
    generator.save_to_db(clients, loans, transactions)
    database = db.DB()
    database.cursor.execute("SELECT COUNT(*) FROM clients")
    assert database.cursor.fetchone()[0] == 5, "Clients not saved to DB"
    database.cursor.execute("SELECT COUNT(*) FROM loans")
    assert database.cursor.fetchone()[0] >= 5, "Loans not saved to DB"
    database.cursor.execute("SELECT COUNT(*) FROM transactions")
    assert database.cursor.fetchone()[0] >= 5, "Transactions not saved to DB"
    database.cursor.execute("SELECT * FROM audit_logs WHERE target_type = 'synthetic_data'")
    assert len(database.cursor.fetchall()) > 0, "Audit log not recorded"
    database.close()

def test_export_to_csv():
    generator = generators.DataGenerator()
    clients = generator.generate_clients(5, fraud_ratio=0.2)
    loans = generator.generate_loans(clients, n_per_client=2, fraud_ratio=0.2)
    transactions = generator.generate_transactions(loans, n_per_loan=3, fraud_ratio=0.2)
    generator.export_to_csv(clients, loans, transactions)
    assert os.path.exists(os.path.join(config.get_config()["data_dir"], "clients.csv")), "Clients CSV not exported"
    assert os.path.exists(os.path.join(config.get_config()["data_dir"], "loans.csv")), "Loans CSV not exported"
    assert os.path.exists(os.path.join(config.get_config()["data_dir"], "transactions.csv")), "Transactions CSV not exported"
''')
!test -f tests/test_synth.py && echo "test_synth.py created" || echo "Failed to create test_synth.py"

# Ensure dependencies are installed (assuming deps.py exists from previous cell)
!python modules/bootstrap/deps.py

# Verify directories
!ls modules/synth || echo "modules/synth not found"
!ls tests || echo "tests not found"
!ls data || echo "data not found"

# Run faker_engine.py
!python modules/synth/faker_engine.py

# Run generators.py
!python modules/synth/generators.py

# Run tests
!pytest tests/test_synth.py -v

# Verify files
!ls modules/synth
!ls tests
!ls data

# Expected output:
# Current working directory: /content
# Directory modules/synth created
# Directory tests created
# Directory data created
# faker_engine.py created
# generators.py created
# test_synth.py created
# Dependencies installed successfully.
# faker_engine.py  generators.py
# test_core.py  test_synth.py
# .deps_ok  clients.csv  loans.csv  loan_iq.db  reports  transactions.csv
# Client ID: C_...
# Client Name: ...
# Loan ID: L_...
# Transaction ID: T_...
# Income: ...
# Branch: ...
# Region: ...
# Loan Amount: ...
# Loan Status: ...
# Transaction Amount: ...
# Transaction Type: ...
# Random Date: ...
# Saving to database: data/loan_iq.db
# Saved 10 clients, ... loans, ... transactions to DB
# Exported data to data/[clients,loans,transactions].csv
# Generated and saved synthetic data.
# ============================= test session starts =============================
# tests/test_synth.py::test_faker_engine PASSED
# tests/test_synth.py::test_generate_clients PASSED
# tests/test_synth.py::test_generate_loans PASSED
# tests/test_synth.py::test_generate_transactions PASSED
# tests/test_synth.py::test_save_to_db PASSED
# tests/test_synth.py::test_export_to_csv PASSED
# =========================== 6 passed in 0.XXs ===========================

Current working directory: /content
faker_engine.py  generators.py	__pycache__
__pycache__  test_core.py  test_synth.py
reports
faker_engine.py created
generators.py created
test_synth.py created
Dependencies already installed.
faker_engine.py  generators.py	__pycache__
__pycache__  test_core.py  test_synth.py
reports
Client ID: C_bdd640fb
Client Name: Daniel Doyle
Loan ID: L_8b9d2434
Transaction ID: T_0822e8f3
Income: 71154.1438766307
Branch: North Jefferyhaven
Region: urban
Loan Amount: 37335.97448823181
Loan Status: active
Transaction Amount: 300.88966433394046
Transaction Type: interest
Random Date: 2025-04-07T05:38:09.639320
sys.path: ['/content/modules/synth', '/env/python', '/usr/lib/python312.zip', '/usr/lib/python3.12', '/usr/lib/python3.12/lib-dynload', '/usr/local/lib/python3.12/dist-packages', '/usr/lib/python3/dist-packages', '/content', '/content', '/content', '/content']
Creating database at: data/loan_iq.db
Database created: True
Saving to database: data/loan_iq.db
Save

In [None]:

import os
os.makedirs('modules/bootstrap', exist_ok=True)
with open('modules/bootstrap/deps.py', 'w') as f:
    f.write('''# modules/bootstrap/deps.py
import os
import subprocess

REQUIRED_LIBS = [
    'streamlit==1.38.0',
    'pandas==2.2.2',
    'numpy==1.26.4',
    'scikit-learn==1.5.1',
    'xgboost==2.1.1',
    'plotly==5.22.0',
    'faker==28.1.0',
    'openpyxl==3.1.5',
    'reportlab==4.2.2',
    'pytest==8.3.2',
    'shap==0.46.0'
]

def install_deps():
    """Install required libraries and create marker file."""
    os.makedirs('data', exist_ok=True)
    marker_path = os.path.join('data', '.deps_ok')
    if not os.path.exists(marker_path):
        for lib in REQUIRED_LIBS:
            try:
                __import__(lib.split('==')[0])
            except ImportError:
                subprocess.check_call(['pip', 'install', lib])
        with open(marker_path, 'w') as f:
            f.write('OK')
        print("Dependencies installed successfully.")
    else:
        print("Dependencies already installed.")

if __name__ == '__main__':
    install_deps()
''')
!test -f modules/bootstrap/deps.py && echo "deps.py created" || echo "Failed to create deps.py"

deps.py created


In [None]:
import os
os.makedirs('modules/core', exist_ok=True)
with open('modules/core/config.py', 'w') as f:
    f.write('''# modules/core/config.py
import sys
import os
sys.path.append(os.getcwd())
import random
import numpy as np

ADMIN_CREDENTIALS = {
    "username": "admin",
    "password": "Shady868"
}

SEEDS = {
    "faker": 42,
    "numpy": 42,
    "random": 42
}

CONFIG = {
    "data_dir": os.path.join("data"),
    "model_dir": os.path.join("models"),
    "report_dir": os.path.join("data", "reports"),
    "db_path": os.path.join("data", "loan_iq.db"),
    "drive_root": "/content/drive/MyDrive/loan_iq",
    "streamlit_port": 8501,
    "fraud_types": ["ghost_client", "duplicate_id", "missed_payment", "identity_theft"],
    "regions": ["urban", "rural", "semi_urban"],
    "max_clients_batch": 70000,
    "default_batch_size": 1000
}

def init_seeds():
    """Initialize random seeds for reproducibility."""
    random.seed(SEEDS["random"])
    np.random.seed(SEEDS["numpy"])

def get_config():
    """Return config dictionary, ensure directories exist."""
    os.makedirs(CONFIG["data_dir"], exist_ok=True)
    os.makedirs(CONFIG["model_dir"], exist_ok=True)
    os.makedirs(CONFIG["report_dir"], exist_ok=True)
    return CONFIG

if __name__ == "__main__":
    init_seeds()
    config = get_config()
    print(f"Config loaded: {config}")
''')
!test -f modules/core/config.py && echo "config.py created" || echo "Failed to create config.py"

config.py created


In [None]:
import os
os.makedirs('modules/core', exist_ok=True)
with open('modules/core/db.py', 'w') as f:
    f.write('''# modules/core/db.py
import sys
import os
sys.path.append(os.getcwd())
import sqlite3
import json
from datetime import datetime, UTC
try:
    from modules.core import config
except ImportError as e:
    print(f"Import error: {e}")
    raise

class DB:
    """SQLite database wrapper for Loan IQ."""
    def __init__(self):
        print(f"sys.path: {sys.path}")  # Debug
        self.db_path = config.get_config()["db_path"]
        os.makedirs(os.path.dirname(self.db_path), exist_ok=True)
        print(f"Creating database at: {self.db_path}")  # Debug
        self.conn = sqlite3.connect(self.db_path)
        self.conn.row_factory = sqlite3.Row
        self.cursor = self.conn.cursor()
        self.create_tables()
        print(f"Database created: {os.path.exists(self.db_path)}")  # Debug

    def create_tables(self):
        """Create database tables."""
        tables = [
            "CREATE TABLE IF NOT EXISTS users (user_id TEXT PRIMARY KEY, username TEXT UNIQUE, password TEXT, role TEXT)",
            "CREATE TABLE IF NOT EXISTS clients (client_id TEXT PRIMARY KEY, name TEXT, branch TEXT, region TEXT, income REAL, created_at TIMESTAMP)",
            "CREATE TABLE IF NOT EXISTS loans (loan_id TEXT PRIMARY KEY, client_id TEXT, amount REAL, status TEXT, start_date TIMESTAMP, FOREIGN KEY (client_id) REFERENCES clients(client_id))",
            "CREATE TABLE IF NOT EXISTS transactions (transaction_id TEXT PRIMARY KEY, loan_id TEXT, amount REAL, date TIMESTAMP, type TEXT, FOREIGN KEY (loan_id) REFERENCES loans(loan_id))",
            "CREATE TABLE IF NOT EXISTS models (model_id TEXT PRIMARY KEY, type TEXT, version TEXT, created_at TIMESTAMP)",
            "CREATE TABLE IF NOT EXISTS model_versions (version_id TEXT PRIMARY KEY, model_id TEXT, config_json TEXT, data_hash TEXT, metrics_json TEXT, commit_ref TEXT, comments TEXT, created_at TIMESTAMP, FOREIGN KEY (model_id) REFERENCES models(model_id))",
            "CREATE TABLE IF NOT EXISTS audit_logs (log_id INTEGER PRIMARY KEY AUTOINCREMENT, actor_id TEXT, actor_role TEXT, action TEXT, target_id TEXT, target_type TEXT, reason TEXT, timestamp TIMESTAMP, before_snapshot TEXT, after_snapshot TEXT, reversible BOOLEAN, reversal_id INTEGER)",
            "CREATE TABLE IF NOT EXISTS simulations (sim_id TEXT PRIMARY KEY, user_id TEXT, params_json TEXT, created_at TIMESTAMP)",
            "CREATE TABLE IF NOT EXISTS reports (report_id TEXT PRIMARY KEY, type TEXT, path TEXT, created_at TIMESTAMP)",
            "CREATE TABLE IF NOT EXISTS assets (asset_id TEXT PRIMARY KEY, path TEXT, type TEXT, created_at TIMESTAMP)"
        ]
        for table_sql in tables:
            self.cursor.execute(table_sql)
        self.conn.commit()

    def log_action(self, actor_id, actor_role, action, target_id, target_type, reason, before_snapshot, after_snapshot, reversible=False):
        """Log an admin action to audit_logs."""
        timestamp = datetime.now(UTC).isoformat()
        snapshot_before = json.dumps(before_snapshot) if before_snapshot else ""
        snapshot_after = json.dumps(after_snapshot) if after_snapshot else ""
        self.cursor.execute(
            "INSERT INTO audit_logs (actor_id, actor_role, action, target_id, target_type, reason, timestamp, before_snapshot, after_snapshot, reversible) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
            (actor_id, actor_role, action, target_id, target_type, reason, timestamp, snapshot_before, snapshot_after, reversible)
        )
        self.conn.commit()
        return self.cursor.lastrowid

    def get_audit_trail(self, target_id=None, target_type=None):
        """Retrieve audit logs, optionally filtered."""
        query = "SELECT * FROM audit_logs"
        params = []
        if target_id and target_type:
            query += " WHERE target_id = ? AND target_type = ?"
            params = [target_id, target_type]
        self.cursor.execute(query, params)
        return self.cursor.fetchall()

    def rollback_action(self, action_id):
        """Attempt to rollback an action if reversible."""
        self.cursor.execute("SELECT reversible, before_snapshot, target_id, target_type, action FROM audit_logs WHERE log_id = ?", (action_id,))
        result = self.cursor.fetchone()
        if not result or not result[0]:
            return False
        before_snapshot = json.loads(result[1]) if result[1] else {}
        target_id, target_type, action = result[2], result[3], result[4]
        if target_type == "user" and action == "edit":
            self.cursor.execute("UPDATE users SET username = ?, password = ?, role = ? WHERE user_id = ?",
                              (before_snapshot.get("username"), before_snapshot.get("password"), before_snapshot.get("role"), target_id))
            self.conn.commit()
            return True
        return False

    def close(self):
        """Close database connection."""
        self.conn.close()

if __name__ == "__main__":
    db = DB()
    db.cursor.execute("INSERT OR IGNORE INTO users (user_id, username, password, role) VALUES (?, ?, ?, ?)",
                     ("1", "admin", "Shady868", "admin"))
    db.conn.commit()
    db.log_action("1", "admin", "init", "1", "user", "Initialize admin user", {}, {"username": "admin"})
    print("Database initialized.")
    db.close()
''')
!test -f modules/core/db.py && echo "db.py created" || echo "Failed to create db.py"

db.py created


In [None]:
import os
os.makedirs('modules/core', exist_ok=True)
with open('modules/core/utils.py', 'w') as f:
    f.write('''# modules/core/utils.py
import sys
import os
sys.path.append(os.getcwd())
import json
from functools import wraps
try:
    from modules.core import db, config
except ImportError as e:
    print(f"Import error: {e}")
    raise

def audit_wrapper(func):
    """Decorator to log admin actions with snapshots and reason."""
    @wraps(func)
    def wrapper(*args, actor_id, actor_role, reason, **kwargs):
        if not reason:
            raise ValueError("Reason is required for audited actions")
        database = db.DB()
        target_id = kwargs.get('target_id', args[0] if args else 'unknown')
        target_type = kwargs.get('target_type', func.__name__)
        before_snapshot = {}
        try:
            if target_type in ['user', 'edit_user', 'delete_user', 'add_user']:
                database.cursor.execute("SELECT * FROM users WHERE user_id = ?", (target_id,))
                row = database.cursor.fetchone()
                before_snapshot = dict(row) if row else {}
                print(f"Before snapshot: {before_snapshot}")  # Debug
            filtered_kwargs = {k: v for k, v in kwargs.items() if k != 'target_type'}
            result = func(*args, actor_id=actor_id, actor_role=actor_role, reason=reason, **filtered_kwargs)
            after_snapshot = {}
            if target_type in ['user', 'edit_user', 'delete_user', 'add_user']:
                database.cursor.execute("SELECT * FROM users WHERE user_id = ?", (target_id,))
                row = database.cursor.fetchone()
                after_snapshot = dict(row) if row else {}
                print(f"After snapshot: {after_snapshot}")  # Debug
            reversible = target_type in ['user', 'edit_user', 'add_user']
            log_id = database.log_action(
                actor_id, actor_role, func.__name__, target_id, target_type, reason,
                before_snapshot, after_snapshot, reversible
            )
            database.close()
            return result
        except Exception as e:
            database.close()
            raise Exception(f"Action failed: {e}")
    return wrapper

def dict_diff(before, after):
    """Compute difference between two dictionaries for audit logging."""
    diff = {}
    for key in set(before.keys()) | set(after.keys()):
        if before.get(key) != after.get(key):
            diff[key] = {'before': before.get(key), 'after': after.get(key)}
    return diff

if __name__ == "__main__":
    @audit_wrapper
    def test_action(target_id, actor_id, actor_role, reason):
        return {"result": "test"}
    result = test_action("test_id", actor_id="1", actor_role="admin", reason="Test audit")
    print(f"Test action result: {result}")
''')
!test -f modules/core/utils.py && echo "utils.py created" || echo "Failed to create utils.py"

utils.py created


In [None]:
import os
os.makedirs('modules/core', exist_ok=True)
with open('modules/core/auth.py', 'w') as f:
    f.write('''# modules/core/auth.py
import sys
import os
import sqlite3
import uuid
from datetime import datetime, UTC
sys.path.append(os.getcwd())
try:
    from modules.core import db, config
except ImportError as e:
    print(f"Import error: {e}")
    raise

class Auth:
    def __init__(self):
        self.config = config.get_config()
        self.db_path = self.config["db_path"]

    def authenticate(self, username, password):
        database = db.DB()
        try:
            database.cursor.execute(
                "SELECT user_id, role FROM users WHERE username = ? AND password = ?",
                (username, password)
            )
            user = database.cursor.fetchone()
            if user:
                return {"user_id": user[0], "role": user[1]}
            return None
        finally:
            database.close()

    def register(self, username, password):
        """Register a new user with default user role."""
        database = db.DB()
        try:
            user_id = f"U_{uuid.uuid4().hex[:8]}"
            database.cursor.execute(
                "INSERT OR IGNORE INTO users (user_id, username, password, role, created_at) VALUES (?, ?, ?, ?, ?)",
                (user_id, username, password, "user", datetime.now(UTC).isoformat())
            )
            database.conn.commit()
            database.log_action(
                "1", "admin", "register_user", user_id, "user",
                f"Registered new user {username}", {}, {}
            )
            print(f"Registered user: {username} with role: user")
            return {"user_id": user_id, "role": "user"}
        except sqlite3.IntegrityError:
            print(f"Registration failed: Username {username} already exists")
            return None
        finally:
            database.close()

if __name__ == "__main__":
    database = db.DB()
    try:
        # Hardcode admin user
        database.cursor.execute(
            "INSERT OR REPLACE INTO users (user_id, username, password, role, created_at) VALUES (?, ?, ?, ?, ?)",
            ("1", "admin", "Shady868", "admin", datetime.now(UTC).isoformat())
        )
        # Add test user
        database.cursor.execute(
            "INSERT OR REPLACE INTO users (user_id, username, password, role, created_at) VALUES (?, ?, ?, ?, ?)",
            ("test_1", "test_user", "test_pass", "user", datetime.now(UTC).isoformat())
        )
        database.conn.commit()
        database.log_action(
            "1", "admin", "add_user", "test_1", "user", "Added test user", {}, {}
        )
        print("Added user: test_1")
    finally:
        database.close()
''')
!test -f modules/core/auth.py && echo "auth.py created" || echo "Failed to create auth.py"

auth.py created


In [None]:
import os
os.makedirs('modules/synth', exist_ok=True)
with open('modules/synth/faker_engine.py', 'w') as f:
    f.write('''# modules/synth/faker_engine.py
import sys
import os
sys.path.append(os.getcwd())
from faker import Faker
import random
from datetime import datetime, timedelta
try:
    from modules.core import config
except ImportError as e:
    print(f"Import error: {e}")
    raise

class LoanIQFaker:
    """Custom Faker for generating Loan IQ synthetic data with patterns."""
    def __init__(self):
        self.faker = Faker()
        Faker.seed(config.SEEDS["faker"])
        random.seed(config.SEEDS["random"])
        self.config = config.get_config()
        self.fraud_types = self.config["fraud_types"]
        self.regions = self.config["regions"]

    def client_id(self):
        """Generate unique client ID."""
        return f"C_{self.faker.uuid4().split('-')[0]}"

    def loan_id(self):
        """Generate unique loan ID."""
        return f"L_{self.faker.uuid4().split('-')[0]}"

    def transaction_id(self):
        """Generate unique transaction ID."""
        return f"T_{self.faker.uuid4().split('-')[0]}"

    def client_name(self, fraud_type=None):
        """Generate client name, with ghost client pattern."""
        if fraud_type == "ghost_client" and random.random() < 0.1:
            return None
        return self.faker.name()

    def duplicate_id(self, existing_ids):
        """Generate client ID with chance of duplication."""
        if random.random() < 0.05:
            return random.choice(existing_ids) if existing_ids else self.client_id()
        return self.client_id()

    def income(self, fraud_type=None):
        """Generate income, with variance for patterns."""
        if fraud_type == "identity_theft" and random.random() < 0.1:
            return random.uniform(100000, 1000000)
        return random.uniform(20000, 100000)

    def branch(self):
        """Generate branch name."""
        return self.faker.city()

    def region(self):
        """Generate region from config."""
        return random.choice(self.regions)

    def loan_amount(self, fraud_type=None):
        """Generate loan amount, with variance for patterns."""
        if fraud_type == "missed_payment" and random.random() < 0.2:
            return random.uniform(50000, 200000)
        return random.uniform(1000, 50000)

    def loan_status(self, fraud_type=None):
        """Generate loan status, with pattern influence."""
        statuses = ["active", "paid", "default"]
        if fraud_type == "missed_payment" and random.random() < 0.3:
            return "default"
        return random.choice(statuses)

    def transaction_amount(self, loan_amount):
        """Generate transaction amount based on loan."""
        return random.uniform(100, min(loan_amount * 0.1, 5000))

    def transaction_type(self, fraud_type=None):
        """Generate transaction type, with pattern influence."""
        types = ["payment", "fee", "interest"]
        if fraud_type == "identity_theft" and random.random() < 0.1:
            return "suspicious_transfer"
        return random.choice(types)

    def random_date(self, start_days=-365, end_days=0):
        """Generate random date within range."""
        start = datetime.now() + timedelta(days=start_days)
        end = datetime.now() + timedelta(days=end_days)
        return self.faker.date_time_between(start, end).isoformat()

if __name__ == "__main__":
    faker = LoanIQFaker()
    print(f"Client ID: {faker.client_id()}")
    print(f"Client Name: {faker.client_name()}")
    print(f"Loan ID: {faker.loan_id()}")
    print(f"Transaction ID: {faker.transaction_id()}")
    print(f"Income: {faker.income()}")
    print(f"Branch: {faker.branch()}")
    print(f"Region: {faker.region()}")
    print(f"Loan Amount: {faker.loan_amount()}")
    print(f"Loan Status: {faker.loan_status()}")
    print(f"Transaction Amount: {faker.transaction_amount(10000)}")
    print(f"Transaction Type: {faker.transaction_type()}")
    print(f"Random Date: {faker.random_date()}")
''')
!test -f modules/synth/faker_engine.py && echo "faker_engine.py created" || echo "Failed to create faker_engine.py"

faker_engine.py created


In [None]:
import os
os.makedirs('modules/synth', exist_ok=True)
with open('modules/synth/generators.py', 'w') as f:
    f.write('''# modules/synth/generators.py
import sys
import os
import random
sys.path.append(os.getcwd())
import pandas as pd
import numpy as np
from datetime import datetime
try:
    from modules.core import db, config
    from modules.synth import faker_engine
except ImportError as e:
    print(f"Import error: {e}")
    raise

class DataGenerator:
    """Generate synthetic data for Loan IQ and store in database."""
    def __init__(self):
        self.faker = faker_engine.LoanIQFaker()
        self.config = config.get_config()
        self.db_path = self.config["db_path"]

    def generate_clients(self, n, fraud_ratio=0.1):
        """Generate n clients with optional patterns."""
        clients = []
        existing_ids = []
        for _ in range(n):
            fraud_type = random.choices(
                self.faker.fraud_types + [None],
                weights=[fraud_ratio / len(self.faker.fraud_types)] * len(self.faker.fraud_types) + [1 - fraud_ratio],
                k=1
            )[0]
            client_id = self.faker.duplicate_id(existing_ids) if fraud_type == "duplicate_id" else self.faker.client_id()
            existing_ids.append(client_id)
            clients.append({
                "client_id": client_id,
                "name": self.faker.client_name(fraud_type),
                "branch": self.faker.branch(),
                "region": self.faker.region(),
                "income": self.faker.income(fraud_type),
                "created_at": self.faker.random_date()
            })
        return pd.DataFrame(clients)

    def generate_loans(self, clients, n_per_client=2, fraud_ratio=0.1):
        """Generate loans for given clients."""
        loans = []
        for client_id in clients["client_id"]:
            fraud_type = random.choices(
                self.faker.fraud_types + [None],
                weights=[fraud_ratio / len(self.faker.fraud_types)] * len(self.faker.fraud_types) + [1 - fraud_ratio],
                k=1
            )[0]
            for _ in range(random.randint(1, n_per_client)):
                loans.append({
                    "loan_id": self.faker.loan_id(),
                    "client_id": client_id,
                    "amount": self.faker.loan_amount(fraud_type),
                    "status": self.faker.loan_status(fraud_type),
                    "start_date": self.faker.random_date()
                })
        return pd.DataFrame(loans)

    def generate_transactions(self, loans, n_per_loan=3, fraud_ratio=0.1):
        """Generate transactions for given loans."""
        transactions = []
        for loan_id, loan_amount in zip(loans["loan_id"], loans["amount"]):
            fraud_type = random.choices(
                self.faker.fraud_types + [None],
                weights=[fraud_ratio / len(self.faker.fraud_types)] * len(self.faker.fraud_types) + [1 - fraud_ratio],
                k=1
            )[0]
            for _ in range(random.randint(1, n_per_loan)):
                transactions.append({
                    "transaction_id": self.faker.transaction_id(),
                    "loan_id": loan_id,
                    "amount": self.faker.transaction_amount(loan_amount),
                    "date": self.faker.random_date(),
                    "type": self.faker.transaction_type(fraud_type)
                })
        return pd.DataFrame(transactions)

    def save_to_db(self, clients, loans, transactions, actor_id="1", actor_role="admin", reason="Synthetic data generation"):
        """Save generated data to loan_iq.db with audit logging."""
        database = db.DB()
        print(f"Saving to database: {self.db_path}")  # Debug
        try:
            for _, row in clients.iterrows():
                database.cursor.execute(
                    "INSERT OR IGNORE INTO clients (client_id, name, branch, region, income, created_at) VALUES (?, ?, ?, ?, ?, ?)",
                    (row["client_id"], row["name"], row["branch"], row["region"], row["income"], row["created_at"])
                )
            for _, row in loans.iterrows():
                database.cursor.execute(
                    "INSERT OR IGNORE INTO loans (loan_id, client_id, amount, status, start_date) VALUES (?, ?, ?, ?, ?)",
                    (row["loan_id"], row["client_id"], row["amount"], row["status"], row["start_date"])
                )
            for _, row in transactions.iterrows():
                database.cursor.execute(
                    "INSERT OR IGNORE INTO transactions (transaction_id, loan_id, amount, date, type) VALUES (?, ?, ?, ?, ?)",
                    (row["transaction_id"], row["loan_id"], row["amount"], row["date"], row["type"])
                )
            database.conn.commit()
            database.log_action(
                actor_id, actor_role, "generate_data", "multiple", "synthetic_data", reason,
                {}, {"clients": len(clients), "loans": len(loans), "transactions": len(transactions)}
            )
            print(f"Saved {len(clients)} clients, {len(loans)} loans, {len(transactions)} transactions to DB")
        finally:
            database.close()

    def export_to_csv(self, clients, loans, transactions, output_dir=None):
        """Export data to CSV files."""
        output_dir = output_dir or self.config["data_dir"]
        os.makedirs(output_dir, exist_ok=True)
        clients.to_csv(os.path.join(output_dir, "clients.csv"), index=False)
        loans.to_csv(os.path.join(output_dir, "loans.csv"), index=False)
        transactions.to_csv(os.path.join(output_dir, "transactions.csv"), index=False)
        print(f"Exported data to {output_dir}/[clients,loans,transactions].csv")

if __name__ == "__main__":
    generator = DataGenerator()
    clients = generator.generate_clients(10, fraud_ratio=0.2)
    loans = generator.generate_loans(clients, n_per_client=2, fraud_ratio=0.2)
    transactions = generator.generate_transactions(loans, n_per_loan=3, fraud_ratio=0.2)
    generator.save_to_db(clients, loans, transactions)
    generator.export_to_csv(clients, loans, transactions)
    print("Generated and saved synthetic data.")
''')
!test -f modules/synth/generators.py && echo "generators.py created" || echo "Failed to create generators.py"

generators.py created


In [None]:
import os
os.makedirs('modules/models', exist_ok=True)
with open('modules/models/train.py', 'w') as f:
    f.write('''# modules/models/train.py
import sys
import os
import pickle
import json
import random
import uuid
import numpy as np
import pandas as pd
from datetime import datetime, UTC
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import roc_auc_score, accuracy_score
sys.path.append(os.getcwd())
try:
    from modules.core import db, config
    from modules.synth import generators
except ImportError as e:
    print(f"Import error: {e}")
    raise

class ModelTrainer:
    """Train XGBoost model for default probability and loan limits."""
    def __init__(self):
        self.config = config.get_config()
        self.db_path = self.config["db_path"]
        self.model_dir = self.config["model_dir"]
        os.makedirs(self.model_dir, exist_ok=True)
        random.seed(config.SEEDS["random"])
        np.random.seed(config.SEEDS["numpy"])

    def prepare_data(self, clients, loans, transactions):
        """Prepare features and labels for training."""
        print("Preparing data for training")  # Debug
        data = loans.merge(clients, on="client_id", how="left")
        # Aggregate transactions and flatten column names
        agg_data = transactions.groupby("loan_id").agg({
            "amount": ["sum", "count"],
            "type": lambda x: x.value_counts().index[0] if not x.empty else "none"
        }).reset_index()
        # Flatten MultiIndex by renaming columns
        agg_data.columns = ['loan_id', 'transaction_amount_sum', 'transaction_count', 'transaction_type']
        data = data.merge(agg_data, on="loan_id", how="left")
        data = data[[
            "loan_id", "client_id", "amount", "status", "start_date",
            "name", "branch", "region", "income", "created_at",
            "transaction_amount_sum", "transaction_count", "transaction_type"
        ]]
        features = ["loan_amount", "income", "transaction_amount_sum", "transaction_count"]
        data = data.rename(columns={"amount": "loan_amount"})  # Rename for consistency
        X = data[features].fillna(0)
        y = data["status"].apply(lambda x: 1 if x == "default" else 0)
        print(f"Prepared {X.shape[0]} samples with features: {features}")  # Debug
        return X, y

    def train_model(self, X, y, model_id=None):
        """Train XGBoost model and save to file and database."""
        model_id = model_id or f"M_{random.getrandbits(32):08x}"
        model = XGBClassifier(
            n_estimators=100, max_depth=3, learning_rate=0.1,
            random_state=config.SEEDS["random"], eval_metric="auc"
        )
        model.fit(X, y)
        y_pred = model.predict_proba(X)[:, 1]
        auc = roc_auc_score(y, y_pred)
        accuracy = accuracy_score(y, model.predict(X))
        model_path = os.path.join(self.model_dir, f"{model_id}.pkl")
        with open(model_path, "wb") as f:
            pickle.dump(model, f)
        print(f"Model saved to {model_path}")  # Debug
        database = db.DB()
        try:
            database.cursor.execute(
                "INSERT OR IGNORE INTO models (model_id, type, version, created_at) VALUES (?, ?, ?, ?)",
                (model_id, "xgboost", "1.0", datetime.now(UTC).isoformat())
            )
            database.cursor.execute(
                "INSERT INTO model_versions (version_id, model_id, config_json, data_hash, metrics_json, commit_ref, comments, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
                (
                    f"V_{uuid.uuid4().hex[:8]}", model_id,  # Unique version_id
                    json.dumps({"n_estimators": 100, "max_depth": 3, "learning_rate": 0.1}),
                    str(X.values.tobytes()),  # Use bytes of numeric data for hash
                    json.dumps({"auc": float(auc), "accuracy": float(accuracy)}),
                    "initial", "Trained for default probability", datetime.now(UTC).isoformat()
                )
            )
            database.conn.commit()
            database.log_action(
                "1", "admin", "train_model", model_id, "model",
                "Trained model for default probability", {}, {"auc": float(auc), "accuracy": float(accuracy)}
            )
            print(f"Model {model_id} trained. AUC: {auc:.3f}, Accuracy: {accuracy:.3f}")
            return model_id
        finally:
            database.close()

if __name__ == "__main__":
    trainer = ModelTrainer()
    generator = generators.DataGenerator()
    clients = generator.generate_clients(100, fraud_ratio=0.2)
    loans = generator.generate_loans(clients, n_per_client=2, fraud_ratio=0.2)
    transactions = generator.generate_transactions(loans, n_per_loan=3, fraud_ratio=0.2)
    generator.save_to_db(clients, loans, transactions)
    X, y = trainer.prepare_data(clients, loans, transactions)
    model_id = trainer.train_model(X, y)
    print(f"Trained model: {model_id}")
''')
!test -f modules/models/train.py && echo "train.py created" || echo "Failed to create train.py"

train.py created


In [None]:

import os
os.makedirs('modules/models', exist_ok=True)
with open('modules/models/predict.py', 'w') as f:
    f.write('''# modules/models/predict.py
import sys
import os
import pickle
import json
import numpy as np
import pandas as pd
import shap
from datetime import datetime, UTC
sys.path.append(os.getcwd())
try:
    from modules.core import db, config
    from modules.models import train
except ImportError as e:
    print(f"Import error: {e}")
    raise

class ModelPredictor:
    """Predict default probability and loan limits using trained model."""
    def __init__(self):
        self.config = config.get_config()
        self.db_path = self.config["db_path"]
        self.model_dir = self.config["model_dir"]
        self.explainer = None

    def predict(self, model_id, data):
        """Make predictions for given data using specified model."""
        model_path = os.path.join(self.model_dir, f"{model_id}.pkl")
        with open(model_path, "rb") as f:
            model = pickle.load(f)
        print(f"Loaded model: {model_id}")  # Debug
        trainer = train.ModelTrainer()
        X, _ = trainer.prepare_data(data["clients"], data["loans"], data["transactions"])
        probs = model.predict_proba(X)[:, 1]
        # Merge loans with clients to align incomes with loans
        merged_data = data["loans"].merge(data["clients"][["client_id", "income"]],
                                        on="client_id", how="left")
        loan_limits = merged_data["income"] * 2.0 * (1 - probs)
        explainer = shap.TreeExplainer(model)
        shap_values = explainer.shap_values(X)
        result = pd.DataFrame({
            "loan_id": data["loans"]["loan_id"],
            "default_probability": probs,
            "recommended_loan_limit": loan_limits,
            "shap_values": [json.dumps(s.tolist()) for s in shap_values]
        })
        database = db.DB()
        try:
            database.log_action(
                "1", "admin", "predict", model_id, "model",
                "Made predictions for loans", {}, {"num_predictions": len(probs)}
            )
            database.conn.commit()
            print(f"Predictions made for {len(probs)} loans")  # Debug
            return result
        finally:
            database.close()

if __name__ == "__main__":
    predictor = ModelPredictor()
    trainer = train.ModelTrainer()
    generator = train.generators.DataGenerator()
    clients = generator.generate_clients(10, fraud_ratio=0.2)
    loans = generator.generate_loans(clients, n_per_client=2, fraud_ratio=0.2)
    transactions = generator.generate_transactions(loans, n_per_loan=3, fraud_ratio=0.2)
    generator.save_to_db(clients, loans, transactions)
    X, y = trainer.prepare_data(clients, loans, transactions)
    model_id = trainer.train_model(X, y)
    predictions = predictor.predict(model_id, {"clients": clients, "loans": loans, "transactions": transactions})
    print(predictions)
''')
!test -f modules/models/predict.py && echo "predict.py created" || echo "Failed to create predict.py"

predict.py created


In [None]:
import os
os.makedirs('tests', exist_ok=True)
with open('tests/test_models.py', 'w') as f:
    f.write('''# tests/test_models.py
import sys
import os
import pickle
import pandas as pd
import numpy as np
sys.path.append(os.getcwd())
try:
    from modules.core import config, db
    from modules.synth import generators
    from modules.models import train, predict
except ImportError as e:
    print(f"Import error: {e}")
    raise

def test_prepare_data():
    trainer = train.ModelTrainer()
    generator = generators.DataGenerator()
    clients = generator.generate_clients(5, fraud_ratio=0.2)
    loans = generator.generate_loans(clients, n_per_client=2, fraud_ratio=0.2)
    transactions = generator.generate_transactions(loans, n_per_loan=3, fraud_ratio=0.2)
    X, y = trainer.prepare_data(clients, loans, transactions)
    assert X.shape[0] == len(loans), "Incorrect number of samples"
    assert set(X.columns) == {"loan_amount", "income", "transaction_amount_sum", "transaction_count"}, "Incorrect features"
    assert y.isin([0, 1]).all(), "Invalid labels"

def test_train_model():
    trainer = train.ModelTrainer()
    generator = generators.DataGenerator()
    clients = generator.generate_clients(10, fraud_ratio=0.2)
    loans = generator.generate_loans(clients, n_per_client=2, fraud_ratio=0.2)
    transactions = generator.generate_transactions(loans, n_per_loan=3, fraud_ratio=0.2)
    generator.save_to_db(clients, loans, transactions)
    X, y = trainer.prepare_data(clients, loans, transactions)
    model_id = trainer.train_model(X, y)
    assert os.path.exists(os.path.join(config.get_config()["model_dir"], f"{model_id}.pkl")), "Model file not saved"
    database = db.DB()
    try:
        database.cursor.execute("SELECT * FROM models WHERE model_id = ?", (model_id,))
        assert database.cursor.fetchone(), "Model not saved to DB"
        database.cursor.execute("SELECT * FROM model_versions WHERE model_id = ?", (model_id,))
        assert database.cursor.fetchone(), "Model version not saved to DB"
        database.cursor.execute("SELECT * FROM audit_logs WHERE target_type = 'model' AND action = 'train_model'")
        assert len(database.cursor.fetchall()) > 0, "Audit log not recorded"
    finally:
        database.close()

def test_predict():
    trainer = train.ModelTrainer()
    predictor = predict.ModelPredictor()
    generator = generators.DataGenerator()
    clients = generator.generate_clients(5, fraud_ratio=0.2)
    loans = generator.generate_loans(clients, n_per_client=2, fraud_ratio=0.2)
    transactions = generator.generate_transactions(loans, n_per_loan=3, fraud_ratio=0.2)
    generator.save_to_db(clients, loans, transactions)
    X, y = trainer.prepare_data(clients, loans, transactions)
    model_id = trainer.train_model(X, y)
    predictions = predictor.predict(model_id, {"clients": clients, "loans": loans, "transactions": transactions})
    assert len(predictions) == len(loans), "Incorrect number of predictions"
    assert set(predictions.columns) == {"loan_id", "default_probability", "recommended_loan_limit", "shap_values"}, "Incorrect prediction columns"
    assert (predictions["default_probability"] >= 0).all() and (predictions["default_probability"] <= 1).all(), "Invalid probabilities"
    assert (predictions["recommended_loan_limit"] >= 0).all(), "Invalid loan limits"
    database = db.DB()
    try:
        database.cursor.execute("SELECT * FROM audit_logs WHERE target_type = 'model' AND action = 'predict'")
        assert len(database.cursor.fetchall()) > 0, "Audit log not recorded"
    finally:
        database.close()

def test_model_persistence():
    trainer = train.ModelTrainer()
    generator = generators.DataGenerator()
    clients = generator.generate_clients(5, fraud_ratio=0.2)
    loans = generator.generate_loans(clients, n_per_client=2, fraud_ratio=0.2)
    transactions = generator.generate_transactions(loans, n_per_loan=3, fraud_ratio=0.2)
    X, y = trainer.prepare_data(clients, loans, transactions)
    model_id = trainer.train_model(X, y)
    model_path = os.path.join(config.get_config()["model_dir"], f"{model_id}.pkl")
    with open(model_path, "rb") as f:
        model = pickle.load(f)
    assert model is not None, "Model not loaded correctly"
''')
!test -f tests/test_models.py && echo "test_models.py created" || echo "Failed to create test_models.py"

test_models.py created


In [None]:
import sys
import os
sys.path.append(os.getcwd())
print(f"Current working directory: {os.getcwd()}")  # Debug

# Reset database and marker file
!rm -f data/loan_iq.db data/.deps_ok

# Ensure dependencies are installed
!python modules/bootstrap/deps.py

# Verify directories
!ls modules/bootstrap || echo "modules/bootstrap not found"
!ls modules/core || echo "modules/core not found"
!ls modules/synth || echo "modules/synth not found"
!ls modules/models || echo "modules/models not found"
!ls tests || echo "tests not found"
!ls data || echo "data not found"

# Run all scripts
!python modules/core/config.py
!python modules/core/db.py
!python modules/core/utils.py
!python modules/core/auth.py
!python modules/synth/faker_engine.py
!python modules/synth/generators.py
!python modules/models/train.py
!python modules/models/predict.py

# Run tests
!pytest tests/test_models.py -v

# Verify files
!ls modules/bootstrap
!ls modules/core
!ls modules/synth
!ls modules/models
!ls tests
!ls data

# Expected output:
# Current working directory: /content
# Dependencies installed successfully.
# deps.py
# auth.py  config.py  db.py  utils.py
# faker_engine.py  generators.py
# predict.py  train.py
# test_models.py
# .deps_ok  clients.csv  loans.csv  loan_iq.db  models  reports  transactions.csv
# Config loaded: {...}
# sys.path: [...]
# Creating database at: data/loan_iq.db
# Database created: True
# Database initialized.
# Before snapshot: {}
# After snapshot: {}
# Test action result: {'result': 'test'}
# Auth result: {'user_id': '1', 'role': 'admin'}
# Adding user with target_id: test_1
# Before snapshot: {}
# After snapshot: {'user_id': 'test_1', 'username': 'test_user', 'password': 'test_pass', 'role': 'user'}
# Added user: test_1
# Client ID: C_...
# Client Name: ...
# Loan ID: L_...
# Transaction ID: T_...
# Income: ...
# Branch: ...
# Region: ...
# Loan Amount: ...
# Loan Status: ...
# Transaction Amount: ...
# Transaction Type: ...
# Random Date: ...
# Saving to database: data/loan_iq.db
# Saved 10 clients, ... loans, ... transactions to DB
# Exported data to data/[clients,loans,transactions].csv
# Generated and saved synthetic data.
# Saving to database: data/loan_iq.db
# Saved 100 clients, ... loans, ... transactions to DB
# Preparing data for training
# Prepared ... samples with features: ['loan_amount', 'income', 'transaction_amount_sum', 'transaction_count']
# Model saved to models/M_....pkl
# Model M_... trained. AUC: 0.XXX, Accuracy: 0.XXX
# Trained model: M_...
# Saving to database: data/loan_iq.db
# Saved 10 clients, ... loans, ... transactions to DB
# Preparing data for training
# Prepared ... samples with features: ['loan_amount', 'income', 'transaction_amount_sum', 'transaction_count']
# Model saved to models/M_....pkl
# Model M_... trained. AUC: 0.XXX, Accuracy: 0.XXX
# Loaded model: M_...
# Predictions made for ... loans
# [DataFrame with loan_id, default_probability, recommended_loan_limit, shap_values]
# ============================= test session starts =============================
# tests/test_models.py::test_prepare_data PASSED
# tests/test_models.py::test_train_model PASSED
# tests/test_models.py::test_predict PASSED
# tests/test_models.py::test_model_persistence PASSED
# =========================== 4 passed in 0.XXs ===========================

Current working directory: /content
Dependencies installed successfully.
deps.py
auth.py  config.py  db.py  __pycache__	utils.py
faker_engine.py  generators.py	__pycache__
predict.py  __pycache__  train.py
__pycache__  test_models.py
clients.csv  loans.csv	reports  transactions.csv
Config loaded: {'data_dir': 'data', 'model_dir': 'models', 'report_dir': 'data/reports', 'db_path': 'data/loan_iq.db', 'drive_root': '/content/drive/MyDrive/loan_iq', 'streamlit_port': 8501, 'fraud_types': ['ghost_client', 'duplicate_id', 'missed_payment', 'identity_theft'], 'regions': ['urban', 'rural', 'semi_urban'], 'max_clients_batch': 70000, 'default_batch_size': 1000}
sys.path: ['/content/modules/core', '/env/python', '/usr/lib/python312.zip', '/usr/lib/python3.12', '/usr/lib/python3.12/lib-dynload', '/usr/local/lib/python3.12/dist-packages', '/usr/lib/python3/dist-packages', '/content', '/content']
Creating database at: data/loan_iq.db
Database created: True
Database initialized.
sys.path: ['/content/

In [None]:

import os
os.makedirs('modules/streamlit_app', exist_ok=True)
with open('modules/streamlit_app/app.py', 'w') as f:
    f.write('''# modules/streamlit_app/app.py
import sys
import os
import streamlit as st
import pandas as pd
import sqlite3
import plotly.express as px
import json
sys.path.append(os.getcwd())
try:
    from modules.core import config, db, auth
    from modules.synth import generators
    from modules.models import train, predict
except ImportError as e:
    print(f"Import error: {e}")
    raise

st.set_page_config(page_title="Loan IQ Dashboard", layout="wide")

def main():
    """Streamlit dashboard for Loan IQ."""
    config_data = config.get_config()
    st.title("Loan IQ Dashboard")

    # Authentication
    if "authenticated" not in st.session_state:
        st.session_state.authenticated = False
        st.session_state.user_role = None
        st.session_state.username = None

    if not st.session_state.authenticated:
        st.subheader("Login or Register")
        # Tabs for Login and Register
        tab1, tab2 = st.tabs(["Login", "Register"])

        with tab1:
            st.subheader("Login")
            login_username = st.text_input("Username", key="login_username")
            login_password = st.text_input("Password", type="password", key="login_password")
            if st.button("Login"):
                authenticator = auth.Auth()  # Updated to Auth
                user = authenticator.authenticate(login_username, login_password)
                if user:
                    st.session_state.authenticated = True
                    st.session_state.user_role = user["role"]
                    st.session_state.username = login_username
                    st.success(f"Logged in as {login_username} ({user['role']})")
                    st.rerun()
                else:
                    st.error("Invalid credentials. Contact admin at admin@loaniq.com for password issues.")

        with tab2:
            st.subheader("Register")
            reg_username = st.text_input("New Username", key="reg_username")
            reg_password = st.text_input("New Password", type="password", key="reg_password")
            if st.button("Register"):
                authenticator = auth.Auth()  # Updated to Auth
                user = authenticator.register(reg_username, reg_password)
                if user:
                    st.success(f"Registered {reg_username}. Logging in...")
                    st.session_state.authenticated = True
                    st.session_state.user_role = user["role"]
                    st.session_state.username = reg_username
                    st.rerun()
                else:
                    st.error("Registration failed: Username already exists")
        return

    # Sidebar for navigation
    st.sidebar.title(f"Welcome, {st.session_state.username}")
    page = st.sidebar.selectbox("Select Page", ["Data Overview", "Predictions", "Reports"])

    # Initialize database
    database = db.DB()
    conn = sqlite3.connect(config_data["db_path"])

    if page == "Data Overview":
        st.subheader("Data Overview")
        # Load data
        clients = pd.read_sql_query("SELECT * FROM clients LIMIT 10", conn)
        loans = pd.read_sql_query("SELECT * FROM loans LIMIT 10", conn)
        transactions = pd.read_sql_query("SELECT * FROM transactions LIMIT 10", conn)

        # Display tables
        st.write("### Clients")
        st.dataframe(clients)
        st.write("### Loans")
        st.dataframe(loans)
        st.write("### Transactions")
        st.dataframe(transactions)

        # Simple visualization
        if not loans.empty:
            fig = px.bar(loans, x="status", title="Loan Status Distribution")
            st.plotly_chart(fig)

    elif page == "Predictions":
        st.subheader("Loan Default Predictions")
        if st.session_state.user_role == "admin":
            if st.button("Generate New Data and Predictions"):
                generator = generators.DataGenerator()
                clients = generator.generate_clients(10, fraud_ratio=0.2)
                loans = generator.generate_loans(clients, n_per_client=2, fraud_ratio=0.2)
                transactions = generator.generate_transactions(loans, n_per_loan=3, fraud_ratio=0.2)
                generator.save_to_db(clients, loans, transactions)
                trainer = train.ModelTrainer()
                X, y = trainer.prepare_data(clients, loans, transactions)
                model_id = trainer.train_model(X, y)
                predictor = predict.ModelPredictor()
                predictions = predictor.predict(model_id, {"clients": clients, "loans": loans, "transactions": transactions})
                # Save predictions to session state
                st.session_state.predictions = predictions
                st.session_state.model_id = model_id
                st.success(f"Generated data and trained model {model_id}")

            # Display predictions
            if "predictions" in st.session_state:
                st.write("### Predictions")
                st.dataframe(st.session_state.predictions)
                # Plot default probabilities
                fig = px.histogram(st.session_state.predictions, x="default_probability",
                                 title="Default Probability Distribution")
                st.plotly_chart(fig)
        else:
            st.error("Access restricted to admin users")

    elif page == "Reports":
        st.subheader("Reports")
        # Example report: Average income by region
        query = """
        SELECT region, AVG(income) as avg_income
        FROM clients
        GROUP BY region
        """
        report = pd.read_sql_query(query, conn)
        st.write("### Average Income by Region")
        st.dataframe(report)
        fig = px.bar(report, x="region", y="avg_income", title="Average Income by Region")
        st.plotly_chart(fig)

    conn.close()

if __name__ == "__main__":
    main()
''')
!test -f modules/streamlit_app/app.py && echo "app.py created" || echo "Failed to create app.py"

# Install Streamlit and ngrok
!pip install streamlit pyngrok plotly --quiet

# Run Streamlit with ngrok
from pyngrok import ngrok
import subprocess

# Set up ngrok with hardcoded authtoken
!ngrok config add-authtoken 31rYvgklL0EdX9bGLvTXc313efE_2GyDFGPUNAyFgB83bikTF

# Start Streamlit server
port = 8501
public_url = ngrok.connect(port).public_url
print(f"Streamlit app running at: {public_url}")
subprocess.Popen(["streamlit", "run", "modules/streamlit_app/app.py", "--server.port", str(port)])

app.py created
Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
Streamlit app running at: https://48b61e91e587.ngrok-free.app


<Popen: returncode: None args: ['streamlit', 'run', 'modules/streamlit_app/a...>