In [1]:
from datetime import datetime
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import statsmodels.api as sm
import pyodbc
import urllib
import joblib
import json
from sqlalchemy import create_engine 
from datetime import datetime, date
import sys
pd.set_option('display.max_columns', None)

### Get the two seperate IBVStatusID lists + performances table into dfs 

In [2]:
# load the csvs into dfs

df_perf = pd.read_csv("/Users/starsrain/nov2025_concord/loonie_bankuity_rerun/rerun_output_data/df_ibvIDs_perf.csv")
empty_model_requests = pd.read_csv("/Users/starsrain/nov2025_concord/loonie_bankuity_rerun/rerun_output_data/empty_model_requests.csv")
non_empty_model_requests = pd.read_csv("/Users/starsrain/nov2025_concord/loonie_bankuity_rerun/rerun_output_data/non_empty_model_requests.csv")

In [3]:
print(df_perf.shape)
print(empty_model_requests.shape)
print(non_empty_model_requests.shape)

(6151, 42)
(3297, 1)
(154, 1)


In [4]:
#  right-joins vs df_perf and quick flags
perf_vs_empty = df_perf.merge(empty_model_requests[['IBVStatusID']], on='IBVStatusID', how='right')
perf_vs_nonempty = df_perf.merge(non_empty_model_requests[['IBVStatusID']], on='IBVStatusID', how='right')

score_cols = [c for c in ['IBVScore','NDScore','IBVBand','NDBand'] if c in df_perf.columns]
ext_cols = [c for c in ['B1ExtResponse','B2ExtResponse'] if c in df_perf.columns]

In [5]:
print(perf_vs_empty.shape)

(3297, 42)


In [6]:
for _df in (perf_vs_empty, perf_vs_nonempty):
    _df['has_perf'] = _df['AppID'].notna()
    _df['has_scores'] = _df[score_cols].notna().any(axis=1) if score_cols else False
    _df['has_ext'] = _df[ext_cols].notna().any(axis=1) if ext_cols else False

print({
    'empty_total': len(empty_model_requests),
    'empty_with_perf': int(perf_vs_empty['has_perf'].sum()),
    'empty_with_scores': int(perf_vs_empty['has_scores'].sum()),
    'empty_with_ext': int(perf_vs_empty['has_ext'].sum()),
})
print({
    'nonempty_total': len(non_empty_model_requests),
    'nonempty_with_perf': int(perf_vs_nonempty['has_perf'].sum()),
    'nonempty_with_scores': int(perf_vs_nonempty['has_scores'].sum()),
    'nonempty_with_ext': int(perf_vs_nonempty['has_ext'].sum()),
})

perf_join_empty = perf_vs_empty
perf_join_nonempty = perf_vs_nonempty

{'empty_total': 3297, 'empty_with_perf': 2141, 'empty_with_scores': 2141, 'empty_with_ext': 2141}
{'nonempty_total': 154, 'nonempty_with_perf': 43, 'nonempty_with_scores': 43, 'nonempty_with_ext': 43}


In [8]:
# Raw samples for IDs that have performance in each group
cols = [c for c in [
    'IBVStatusID','AppID','ApplicationDate','LoanID',
    'IBVScore','NDScore','IBVBand','NDBand','ModelRequest','ModelResponse','B1ExtResponse','B2ExtResponse'
] if c in perf_join_empty.columns]

empty_with_perf = perf_join_empty.loc[perf_join_empty['has_perf']].copy()
nonempty_with_perf = perf_join_nonempty.loc[perf_join_nonempty['has_perf']].copy()

print(f"Empty group with perf: {len(empty_with_perf)}")
display(empty_with_perf[cols].head(20))

print(f"\nNon-empty group with perf: {len(nonempty_with_perf)}")
display(nonempty_with_perf[cols].head(20))

# save this empty_with_perf to csv
empty_with_perf.to_csv("/Users/starsrain/nov2025_concord/loonie_bankuity_rerun/rerun_output_data/empty_with_perf_IDs.csv", index=False)


Empty group with perf: 2141


Unnamed: 0,IBVStatusID,AppID,ApplicationDate,LoanID,IBVScore,NDScore,IBVBand,NDBand,ModelRequest,ModelResponse,B1ExtResponse,B2ExtResponse
2,16992,119093.0,2025-10-03 10:41:09,I31774-0,742.0,578.0,4.0,2.0,,"{""accounts"":[],""customerInfo"":{},""errorDetails...","{""ModelScore"":742,""IBVBand"":4}","{""ModelScore"":578,""NDBand"":2}"
6,29074,109951.0,2025-09-05 07:55:57,I29151-0,750.0,764.0,4.0,4.0,,"{""accounts"":[],""customerInfo"":{},""errorDetails...","{""ModelScore"":750,""IBVBand"":4}","{""ModelScore"":764,""NDBand"":4}"
14,40544,83417.0,2025-06-10 14:16:28,I21570-0,727.0,999.0,4.0,6.0,,"{""accounts"":[],""customerInfo"":{},""errorDetails...","{""ModelScore"":727,""IBVBand"":4}","{""ModelScore"":999,""NDBand"":6}"
31,44278,84796.0,2025-06-12 12:54:14,I21865-0,757.0,999.0,5.0,6.0,,"{""accounts"":[],""customerInfo"":{},""errorDetails...","{""ModelScore"":757,""IBVBand"":5}","{""ModelScore"":999,""NDBand"":6}"
38,45476,84345.0,2025-06-11 17:15:44,I21770-0,738.0,999.0,4.0,6.0,,"{""accounts"":[],""customerInfo"":{},""errorDetails...","{""ModelScore"":738,""IBVBand"":4}","{""ModelScore"":999,""NDBand"":6}"
41,45618,90248.0,2025-06-30 13:55:14,I23308-0,713.0,604.0,4.0,3.0,,"{""accounts"":[],""customerInfo"":{},""errorDetails...","{""ModelScore"":713,""IBVBand"":4}","{""ModelScore"":604,""NDBand"":3}"
44,46232,100684.0,2025-08-02 22:10:58,I26348-0,745.0,637.0,4.0,3.0,,"{""accounts"":[],""customerInfo"":{},""errorDetails...","{""ModelScore"":745,""IBVBand"":4}","{""ModelScore"":637,""NDBand"":3}"
45,46474,87909.0,2025-06-20 17:24:29,I22561-0,741.0,689.0,4.0,4.0,,"{""accounts"":[],""customerInfo"":{},""errorDetails...","{""ModelScore"":741,""IBVBand"":4}","{""ModelScore"":689,""NDBand"":4}"
47,46804,106350.0,2025-08-21 15:28:01,I28039-0,711.0,999.0,4.0,6.0,,"{""accounts"":[],""customerInfo"":{},""errorDetails...","{""ModelScore"":711,""IBVBand"":4}","{""ModelScore"":999,""NDBand"":6}"
53,48236,115717.0,2025-09-24 12:02:38,I30829-0,722.0,999.0,4.0,6.0,,"{""accounts"":[],""customerInfo"":{},""errorDetails...","{""ModelScore"":722,""IBVBand"":4}","{""ModelScore"":999,""NDBand"":6}"



Non-empty group with perf: 43


Unnamed: 0,IBVStatusID,AppID,ApplicationDate,LoanID,IBVScore,NDScore,IBVBand,NDBand,ModelRequest,ModelResponse,B1ExtResponse,B2ExtResponse
1,11277,89529.0,2025-06-27 11:05:20,I23084-0,659.0,746.0,3.0,4.0,"{""asOfDate"":""2025-06-27"",""accounts"":[{""account...","{""message"":""Unknown error occurred: cannot acc...","{""ModelScore"":659,""IBVBand"":3}","{""ModelScore"":746,""NDBand"":4}"
6,13962,109296.0,2025-09-02 15:00:09,I28914-0,783.0,683.0,5.0,3.0,"{""asOfDate"":""2025-09-02"",""accounts"":[{""account...","{""accounts"":[],""customerInfo"":{},""errorDetails...","{""ModelScore"":783,""IBVBand"":5}","{""ModelScore"":683,""NDBand"":3}"
9,15931,91690.0,2025-07-04 10:16:20,I23690-0,747.0,637.0,4.0,3.0,"{""asOfDate"":""2025-10-10"",""accounts"":[{""account...","{""accounts"":[],""customerInfo"":{},""errorDetails...","{""ModelScore"":747,""IBVBand"":4}","{""ModelScore"":637,""NDBand"":3}"
10,18903,105714.0,2025-08-20 10:31:46,I27861-0,744.0,577.0,4.0,2.0,"{""asOfDate"":""2025-08-20"",""accounts"":[{""account...","{""accounts"":[],""customerInfo"":{},""errorDetails...","{""ModelScore"":744,""IBVBand"":4}","{""ModelScore"":577,""NDBand"":2}"
14,21399,109153.0,2025-09-02 10:30:03,I28871-0,782.0,999.0,5.0,6.0,"{""asOfDate"":""2025-04-25"",""accounts"":[{""account...","{""accounts"":[],""customerInfo"":{},""errorDetails...","{""ModelScore"":782,""IBVBand"":5}","{""ModelScore"":999,""NDBand"":6}"
15,21628,104711.0,2025-08-16 08:19:57,I27572-0,733.0,768.0,4.0,5.0,"{""asOfDate"":""2025-05-28"",""accounts"":[{""account...","{""accounts"":[],""customerInfo"":{},""errorDetails...","{""ModelScore"":733,""IBVBand"":4}","{""ModelScore"":768,""NDBand"":5}"
16,21929,72245.0,2025-05-10 23:58:27,I19047-0,738.0,700.0,4.0,4.0,"{""asOfDate"":""2025-05-12"",""accounts"":[{""account...","{""accounts"":[],""customerInfo"":{},""errorDetails...","{""ModelScore"":738,""IBVBand"":4}","{""ModelScore"":700,""NDBand"":4}"
17,22558,47364.0,2025-01-22 20:35:53,I13256-0,714.0,592.0,4.0,3.0,"{""asOfDate"":""2025-07-09"",""accounts"":[{""account...","{""accounts"":[],""customerInfo"":{},""errorDetails...","{""ModelScore"":714,""IBVBand"":4}","{""ModelScore"":592,""NDBand"":3}"
18,23220,86048.0,2025-06-16 13:04:42,I22151-0,730.0,999.0,4.0,6.0,"{""asOfDate"":""2025-09-05"",""accounts"":[{""account...","{""accounts"":[],""customerInfo"":{},""errorDetails...","{""ModelScore"":730,""IBVBand"":4}","{""ModelScore"":999,""NDBand"":6}"
19,24751,79481.0,2025-06-03 00:26:34,I20736-0,762.0,999.0,5.0,6.0,"{""asOfDate"":""2025-09-04"",""accounts"":[{""account...","{""accounts"":[],""customerInfo"":{},""errorDetails...","{""ModelScore"":762,""IBVBand"":5}","{""ModelScore"":999,""NDBand"":6}"


### Bring in the Anson Old Model Request 

In [None]:
server = '192.168.1.15'
username = 'Junchen'
password = '9DFXjhhlR3k5'
database = 'LF_LMSMaster'

params = urllib.parse.quote_plus(
    "DRIVER={ODBC Driver 18 for SQL Server};"
    f"SERVER={server},1433;DATABASE={database};UID={username};PWD={password};"
    "Encrypt=yes;TrustServerCertificate=yes;"
)
engine = create_engine(f"mssql+pyodbc:///?odbc_connect={params}")

cnxn = engine.connect()

query1 = f""" 
SELECT *
FROM [LF_LMSMaster].[dbo].[ScoringPythonResult]
"""

df_old_requests = pd.read_sql(query1, cnxn)

### Analyze failedModelRequest_* JSONs for failure reasons

In [21]:
# Analyze failedModelRequest_* JSONs for failure reasons
import os, re, json
import pandas as pd

input_dir = "/Users/starsrain/nov2025_concord/loonie_bankuity_rerun/rerun_output_JSONs"
rows = []
parse_errors = []

for fname in os.listdir(input_dir):
    if not (fname.startswith("failedModelRequest_") and fname.endswith(".json")):
        continue
    m = re.search(r"_(\d+)\.json$", fname)
    if not m:
        continue
    ibv_id = int(m.group(1))
    path = os.path.join(input_dir, fname)
    try:
        with open(path, "r", encoding="utf-8") as f:
            payload = json.load(f)
    except Exception as e:
        parse_errors.append((ibv_id, str(e)))
        continue

    accounts = payload.get("accounts")
    accounts = accounts if isinstance(accounts, list) else []
    num_accounts = len(accounts)
    account_guids = {a.get("accountGuid") for a in accounts if isinstance(a, dict) and a.get("accountGuid")}

    transactions = payload.get("transactions")
    transactions = transactions if isinstance(transactions, list) else []
    num_transactions = len(transactions)
    txn_account_guids = {t.get("accountGuid") for t in transactions if isinstance(t, dict) and t.get("accountGuid")}

    accounts_empty = (num_accounts == 0)
    transactions_empty = (num_transactions == 0)
    txn_guid_missing = (not transactions_empty) and (len(txn_account_guids) == 0)
    guid_mismatch = (not transactions_empty) and (len(txn_account_guids) > 0) and (not txn_account_guids.issubset(account_guids))

    # classify a primary reason (ordered)
    if transactions_empty:
        primary_reason = "empty_transactions"
    elif txn_guid_missing:
        primary_reason = "transactions_missing_accountGuid"
    elif guid_mismatch:
        primary_reason = "accountGuid_mismatch"
    elif accounts_empty:
        primary_reason = "accounts_empty"
    else:
        primary_reason = "other"

    rows.append({
        "IBVStatusID": ibv_id,
        "num_accounts": num_accounts,
        "num_transactions": num_transactions,
        "accounts_empty": accounts_empty,
        "transactions_empty": transactions_empty,
        "txn_guid_count": len(txn_account_guids),
        "account_guid_count": len(account_guids),
        "txn_guid_missing": txn_guid_missing,
        "guid_mismatch": guid_mismatch,
        "primary_reason": primary_reason,
    })

failed_reasons_df = pd.DataFrame(rows).sort_values("IBVStatusID").reset_index(drop=True)

print(f"Analyzed files: {len(rows)} | Parse errors: {len(parse_errors)}")
print("Primary reason counts:\n", failed_reasons_df["primary_reason"].value_counts())

# Optional: save summary CSV
summary_csv = "/Users/starsrain/nov2025_concord/loonie_bankuity_rerun/rerun_output_data/failed_requests_reasons_summary.csv"
os.makedirs(os.path.dirname(summary_csv), exist_ok=True)
failed_reasons_df.to_csv(summary_csv, index=False)
summary_csv


Analyzed files: 153 | Parse errors: 0
Primary reason counts:
 primary_reason
other                   83
empty_transactions      65
accountGuid_mismatch     5
Name: count, dtype: int64


'/Users/starsrain/nov2025_concord/loonie_bankuity_rerun/rerun_output_data/failed_requests_reasons_summary.csv'

In [22]:
# IBVStatusIDs classified as 'other'
assert 'failed_reasons_df' in globals(), "Run the failure reasons analysis cell first"
other_ids = failed_reasons_df.loc[failed_reasons_df['primary_reason'] == 'other', 'IBVStatusID'].sort_values().tolist()
print(f"Count (other): {len(other_ids)}")
other_ids[:50]  # preview first 50



Count (other): 83


[21399,
 23220,
 32413,
 38875,
 40420,
 43212,
 44865,
 44893,
 45027,
 45088,
 45167,
 45568,
 45879,
 46038,
 46212,
 46309,
 46562,
 47371,
 48086,
 48279,
 49123,
 49133,
 49348,
 49692,
 50217,
 51212,
 51297,
 51313,
 51446,
 52325,
 52421,
 52581,
 52629,
 52989,
 53256,
 53683,
 55167,
 55250,
 56314,
 56325,
 56466,
 57418,
 57756,
 57757,
 58312,
 58320,
 58322,
 58472,
 58721,
 59001]