In [1]:
import asyncio
import gzip
import json
import logging
import os
import sys
import urllib
import warnings
from datetime import datetime

import dotenv
import pandas as pd
import requests
from sqlalchemy import create_engine, text

### Smoke Test For Model Health Check Point

In [4]:
import importlib.util

path = "/Users/starsrain/nov2025_concord/loonie_bankuity_rerun/rerun_model.py"
spec = importlib.util.spec_from_file_location("rerun_model", path)
rerun_model = importlib.util.module_from_spec(spec)
spec.loader.exec_module(rerun_model)

print("Model version:", rerun_model.get_model_version())


Model version: 16.15.6


## Start of ND volume Analysis

In [6]:
from sqlalchemy import create_engine 
import urllib
server = '192.168.1.15'
username = 'Junchen'
password = '9DFXjhhlR3k5'
database = 'LF_LMSMaster'

params = urllib.parse.quote_plus(
    "DRIVER={ODBC Driver 18 for SQL Server};"
    f"SERVER={server},1433;DATABASE={database};UID={username};PWD={password};"
    "Encrypt=yes;TrustServerCertificate=yes;"
)
engine = create_engine('mssql+pyodbc:///?odbc_connect=%s' % params)


cnxn = engine.connect()
query1 = """ 

USE LF_LMSMASTER
drop table if EXISTS #t1
select A.Application_ID,A.ApplicationSteps, A.PortfolioID, A.CustomerID, A.CustomerSSN ,A.ApplicationDate, CustEmail,
case when ApplicationSteps like '%S%' then 1 else 0 end as NewlyScored,
case when A.ApplicationStatus in ('A','P') then 1 else 0 end as Accepted,
case when A.ApplicationStatus in ('A','P') then A.LeadPurchasePrice else 0 end as LeadPurchasePrice,
case when L.LoanStatus not in ('V','W','G','K') and A.ApplicationStatus='J'  then 1 else 0 end as PartialPreApproved,
case when L.LoanStatus not in ('V','W','G','K')  then 1 else 0 end as Originated,
L.LoanID, datediff(year, VW.DOB, A.ApplicationDate) as Age,
case when VW.Frequency in ('B','S') then 'B' else VW.Frequency end as Frequency,
case when L.LoanStatus not in ('V','W','G','K') then L.OriginatedAmount else null end as OriginatedAmount,
LP.Provider_name, OriginationDate, LOANSTATUS,
case when ((L.RenewalLoanId <> '0') or (A.LPCampaign = 'RENEW'))  then 'RENEWAL' when ApplicationSteps not like '%R%' then 'NEW' else 'REPEAT' end as CustType, RenewalLoanId
into #t1
from Application A
left join Loans L on A.Application_ID = L.ApplicationID and A.PortFolioID = L.PortFolioID
left join LeadProvider LP on A.LeadProviderID = LP.LeadProviderID
left join LF_LMS_Logs..VW_ApplicationDump VW on A.APPGUID = VW.APPGUID
--left join [LF_BankData].[dbo].[IBVStatus] as ibvT on ibvT.AccountNumber = A.CustomerSSN
--left join [LF_BankData].[dbo].[IBVStatus] as ibvT on ibvT.ApplicationID = A.Application_ID AND ibvT.PortfolioID= A.PortfolioID
where A.ApplicationDate>='2023-04-25' and ((CustEmail is null) or  (CustEmail not in ( 'josh@concordadvice.com', 'tiffany.speedyservicing@gmail.com','bobby@speedyservicing.com',
'and.kom@example.com', 'and.tor020@example.com', 'and.tor050@example.com', 'and.torrc4@example.com',
'test@dot818.com', 'test@example.com', 'test@gmail.com', 'test@loanscanada.ca', 'test@test.com', 'test2@test.com',
'testerqad@gmail.com', 'testik@test.com', 'testteam@dmaassociatescorp.com', 'tiffany.speedyservicing@gmail.com')))
order by A.ApplicationDate asc


UPDATE #t1 -- update the renewal loanid from 0 to correct previous loanIDs
SET #t1.RenewalLoanId = B.RenewalLoanId
from #t1 A inner join (select * from
(select A.LoanID, L.LoanID as RenewalLoanId, row_number() over (partition by A.LoanID order by datediff(day, L.OriginationDate, A.OriginationDate) desc) as RN from #t1 A
inner join Application A2 on A.CustomerID = A2.CustomerID and A2.ApplicationDate < A.ApplicationDate
inner join Loans L on A2.Application_ID = L.ApplicationID and A2.PortFolioID = L.PortFolioID and L.OriginationDate < A.OriginationDate and L.LoanStatus not in ('W','V','G','K')
where A.CustType = 'RENEWAL' and A.RenewalLoanId = '0') K where RN=1) B on A.LoanID = B.LoanID


drop table if EXISTs #t11
select rank() over(partition by A.CustomerID order by OriginationDate asc) as CustSSNLoanNum,A.*
into #t11
from #t1 A
--where A.Originated=1 -- 24565 originated Loonie customers (12/23/2025)

-- scoring result for PlaidUDW_v1 and NegativeDBModelLP_v1
drop table if EXISTS #t2
select *
into #t2 from
(select A.*, B1.ExtResponse as B1ExtResponse, B2.ExtResponse as B2ExtResponse,
NDScore, IBVScore, NDBand,IBVBand,
row_number() over (partition by A.Application_ID, A.PortfolioID order by case when B2.ExtResponse is not null and B1.ExtResponse <> '' then 1 else 0 end desc,B.ApplicationDate desc) as RN
from #t11 A
left join Application B on A.CustomerID = B.CustomerID and A.ApplicationDate >= B.ApplicationDate AND B.ApplicationSteps like '%S%'
left join ScoringPythonResult B1 CROSS APPLY OPENJSON(B1.ExtResponse) WITH (IBVScore int '$.ModelScore', IBVBand int '$.IBVBand')  on B.Application_ID = B1.ApplicationID and B.PortFolioID = B1.PortfolioID and B1.iLabel= 'IBVBand'
left join ScoringPythonResult B2 CROSS APPLY OPENJSON(B2.ExtResponse) WITH (NDScore int '$.ModelScore', NDBand int '$.NDBand') on B.Application_ID = B2.ApplicationID and B.PortFolioID = B2.PortfolioID and B2.iLabel= 'NDBand' ) K where RN=1


ALTER TABLE #t2
DROP COLUMN RN;

"""
query2 = """
SELECT *
FROM #t2
ORDER BY ApplicationDate DESC
"""

with engine.begin() as cnxn:  # same connection + transaction
    cnxn.exec_driver_sql("SET NOCOUNT ON; USE LF_LMSMASTER;")
    cnxn.exec_driver_sql(query1)               # multi-statement OK here
    df_perf_orig = pd.read_sql(query2, cnxn)        # sees temp tables
# df_perf = pd.read_csv('perf_accept_orig_formodeling_dependentmeasure.csv')
df_perf_orig = df_perf_orig.rename(columns = {'Application_ID':'AppID'})
df_perf_orig.AppID = df_perf_orig.AppID.astype(int)

print(df_perf_orig.shape)

(146996, 28)


In [7]:
# Quick check: are applications unique in df_perf_orig?
# In this dataset, an "application" should be uniquely identified by (AppID, PortfolioID)

app_keys = ["AppID", "PortfolioID"]

n_rows = len(df_perf_orig)
n_app_pairs = df_perf_orig[app_keys].drop_duplicates().shape[0]
n_dup_app_pairs = df_perf_orig.duplicated(app_keys).sum()

print("rows:", n_rows)
print("unique (AppID, PortfolioID):", n_app_pairs)
print("duplicate rows on (AppID, PortfolioID):", n_dup_app_pairs)

# AppID alone may repeat across portfolios; this tells you if AppID is globally unique
print("unique AppID:", df_perf_orig["AppID"].nunique())
print("unique PortfolioID:", df_perf_orig["PortfolioID"].nunique())

rows: 146996
unique (AppID, PortfolioID): 146996
duplicate rows on (AppID, PortfolioID): 0
unique AppID: 146996
unique PortfolioID: 1


In [8]:
# Application-level ND knockout (application view)
# Define the application population you care about first.

app_keys = ["AppID", "PortfolioID"]
cutoff = 560  # change if needed

df_apps = df_perf_orig.copy()
df_apps["ApplicationDate"] = pd.to_datetime(df_apps["ApplicationDate"], errors="coerce")
df_apps["NewlyScored"] = pd.to_numeric(df_apps["NewlyScored"], errors="coerce")
df_apps["NDScore"] = pd.to_numeric(df_apps["NDScore"], errors="coerce")

# If you want the "NEW + newly scored" application population (matches your earlier customer-view setup):
df_apps = df_apps[(df_apps["CustType"] == "NEW") & (df_apps["NewlyScored"] == 1)].copy()

# Safety: ensure 1 row per application key (if duplicates exist, keep latest ApplicationDate)
df_apps = (
    df_apps.sort_values(app_keys + ["ApplicationDate"], ascending=[True, True, False])
           .drop_duplicates(app_keys, keep="first")
)

n_apps = len(df_apps)
n_scored = df_apps["NDScore"].notna().sum()
n_missing = df_apps["NDScore"].isna().sum()
n_fail = (df_apps["NDScore"].notna() & (df_apps["NDScore"] < cutoff)).sum()
n_pass = (df_apps["NDScore"].notna() & (df_apps["NDScore"] >= cutoff)).sum()

pd.Series({
    "apps_in_population": n_apps,
    "apps_with_NDScore": n_scored,
    "apps_missing_NDScore": n_missing,
    f"apps_fail_NDScore_lt_{cutoff}": n_fail,
    f"apps_pass_NDScore_ge_{cutoff}": n_pass,
    "fail_rate_among_scored": (n_fail / n_scored) if n_scored else None,
    "fail_rate_overall": (n_fail / n_apps) if n_apps else None,
})

apps_in_population          67780.000000
apps_with_NDScore           64628.000000
apps_missing_NDScore         3152.000000
apps_fail_NDScore_lt_560     7063.000000
apps_pass_NDScore_ge_560    57565.000000
fail_rate_among_scored          0.109287
fail_rate_overall               0.104205
dtype: float64

In [5]:
# Q1) how many unique CustomerID?
df_perf_orig["CustomerID"].nunique()

72117

In [6]:
# Q2) find a CustomerID with multiple rows (example) and inspect their rows
dup_cust = df_perf_orig["CustomerID"].value_counts()
example_customer_id = dup_cust[dup_cust > 1].index[0]
example_customer_id

3717

In [7]:
# Just filter the DataFrame by example_customer_id
df_perf_orig[df_perf_orig["CustomerID"] == example_customer_id]

Unnamed: 0,CustSSNLoanNum,AppID,ApplicationSteps,PortfolioID,CustomerID,CustomerSSN,ApplicationDate,CustEmail,NewlyScored,Accepted,...,OriginationDate,LOANSTATUS,CustType,RenewalLoanId,B1ExtResponse,B2ExtResponse,NDScore,IBVScore,NDBand,IBVBand
1663,1,145041,,1,3717,4912,2026-01-01 13:23:02.893,donna.allen85@yahoo.ca,0,0,...,NaT,,REPEAT,,"{""ModelScore"":668,""IBVBand"":2}","{""ModelScore"":605,""NDBand"":2}",605.0,668.0,2.0,2.0
2411,1,144293,,1,3717,4912,2025-12-30 23:22:56.867,donna.allen85@yahoo.ca,0,0,...,NaT,,REPEAT,,"{""ModelScore"":668,""IBVBand"":2}","{""ModelScore"":605,""NDBand"":2}",605.0,668.0,2.0,2.0
3435,1,143269,,1,3717,4912,2025-12-29 09:26:48.667,donna.allen85@yahoo.ca,0,0,...,NaT,,REPEAT,,"{""ModelScore"":668,""IBVBand"":2}","{""ModelScore"":605,""NDBand"":2}",605.0,668.0,2.0,2.0
3749,1,142955,,1,3717,4912,2025-12-26 12:06:17.930,donna.allen85@yahoo.ca,0,0,...,NaT,,REPEAT,,"{""ModelScore"":668,""IBVBand"":2}","{""ModelScore"":605,""NDBand"":2}",605.0,668.0,2.0,2.0
4284,1,142420,,1,3717,4912,2025-12-24 15:51:54.687,donna.allen85@yahoo.ca,0,0,...,NaT,,REPEAT,,"{""ModelScore"":668,""IBVBand"":2}","{""ModelScore"":605,""NDBand"":2}",605.0,668.0,2.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
133752,1,12920,,1,3717,4912,2024-01-16 12:16:00.133,donna.allen85@yahoo.ca,0,0,...,NaT,,REPEAT,,"{""ModelScore"":668,""IBVBand"":2}","{""ModelScore"":605,""NDBand"":2}",605.0,668.0,2.0,2.0
139167,1,7481,,1,3717,4912,2023-10-05 10:03:34.940,donna.allen85@yahoo.ca,0,0,...,NaT,,REPEAT,,"{""ModelScore"":668,""IBVBand"":2}","{""ModelScore"":605,""NDBand"":2}",605.0,668.0,2.0,2.0
139168,1,7480,,1,3717,4912,2023-10-05 10:03:34.450,donna.allen85@yahoo.ca,0,0,...,NaT,,REPEAT,,"{""ModelScore"":668,""IBVBand"":2}","{""ModelScore"":605,""NDBand"":2}",605.0,668.0,2.0,2.0
140267,70,6381,US,1,3717,4912,2023-09-08 11:00:12.000,donna.allen85@yahoo.ca,1,1,...,2023-09-08 11:51:38.477,R,NEW,0,"{""ModelScore"":668,""IBVBand"":2}","{""ModelScore"":605,""NDBand"":2}",605.0,668.0,2.0,2.0


In [8]:

# Filter to NEW + NewlyScored and dedup to 1 row per CustomerID
df_new_score = df_perf_orig.copy()

df_new_score["ApplicationDate"] = pd.to_datetime(df_new_score["ApplicationDate"], errors="coerce")
df_new_score["NDScore"] = pd.to_numeric(df_new_score["NDScore"], errors="coerce")
df_new_score["NewlyScored"] = pd.to_numeric(df_new_score["NewlyScored"], errors="coerce")

df_new = df_new_score[(df_new_score["CustType"] == "NEW") & (df_new_score["NewlyScored"] == 1)].copy()

# pick the earliest NEW scored application per customer (change to ascending=False for latest)
df_new_dedup = (
    df_new.sort_values(["CustomerID", "ApplicationDate"], ascending=[True, True])
          .drop_duplicates("CustomerID", keep="first")
)

df_new_dedup.shape

(58194, 28)

In [None]:
total = df_perf_orig["CustomerID"].nunique()
new_scored = df_perf_orig.loc[(df_perf_orig["CustType"]=="NEW"), "CustomerID"].nunique()
total, new_scored, total - new_scored
# 69398 uniuqe new Customers

(72117, 69398, 2719)

In [10]:
df_new_dedup["NDScore"].notna().sum(), len(df_new_dedup)

(55319, 58194)

In [11]:
df_new_dedup.loc[df_new_dedup["NDScore"].isna()].head(20)

Unnamed: 0,CustSSNLoanNum,AppID,ApplicationSteps,PortfolioID,CustomerID,CustomerSSN,ApplicationDate,CustEmail,NewlyScored,Accepted,...,OriginationDate,LOANSTATUS,CustType,RenewalLoanId,B1ExtResponse,B2ExtResponse,NDScore,IBVScore,NDBand,IBVBand
137407,1,9241,US,1,36,44,2023-11-13 16:37:46.460,Me2918@yahoo.com,1,0,...,NaT,,NEW,,,,,,,
142606,1,4038,US,1,45,52,2023-07-19 14:08:19.000,anne.perry@camh.ca,1,0,...,NaT,,NEW,,,,,,,
146566,1,68,US,1,47,54,2023-04-26 10:03:12.303,,1,0,...,NaT,,NEW,,,,,,,
146563,1,71,US,1,50,57,2023-04-26 11:31:43.000,,1,0,...,NaT,,NEW,,,,,,,
146562,1,72,US,1,51,58,2023-04-26 11:42:34.557,,1,0,...,NaT,,NEW,,,,,,,
146561,1,73,US,1,52,59,2023-04-26 11:48:13.557,,1,0,...,NaT,,NEW,,,,,,,
146558,1,76,US,1,55,62,2023-04-26 12:33:51.807,,1,0,...,NaT,,NEW,,,,,,,
146557,1,77,US,1,56,63,2023-04-26 12:34:18.000,,1,0,...,NaT,,NEW,,,,,,,
146556,1,78,US,1,57,64,2023-04-26 12:40:52.000,,1,0,...,NaT,,NEW,,,,,,,
146554,1,80,US,1,59,66,2023-04-26 13:17:37.000,corey_minoza@hotmail.com,1,0,...,NaT,,NEW,,,,,,,


In [12]:
# then for those with NDScore, how many are NDScore < 560
(df_new_dedup["NDScore"].notna() & (df_new_dedup["NDScore"] < 560)).sum()

6071