## Test Set Evaluation 


In [15]:
import sys
from pathlib import Path

# Ensure repo root is importable when running from notebooks/
repo_root = Path.cwd()
while not (repo_root / 'src').exists() and repo_root.parent != repo_root:
    repo_root = repo_root.parent

if not (repo_root / 'src').exists():
    raise RuntimeError('Could not locate project root from current working directory.')

if str(repo_root) not in sys.path:
    sys.path.append(str(repo_root))

import joblib
import pandas as pd

from credit_risk.data.load_data import load_cleaned_data
from credit_risk.data.split_data import DataSplitter
from credit_risk.evaluation.metrics import evaluate_classification
from credit_risk.features.build_features import FeatureBuilder
from credit_risk.utils.config import data_config, split_config, xgb_config
from credit_risk.utils.logging import get_logger
from credit_risk.utils.paths import project_root


In [16]:
logger = get_logger('test_set_experiment')
logger.info(f'Project root: {project_root}')
logger.info(
    f'Split config -> train={split_config.TRAIN_FRAC:.2f}, val={split_config.VAL_FRAC:.2f}, test={1 - split_config.TRAIN_FRAC - split_config.VAL_FRAC:.2f}'
)
logger.info(f'Target column: {data_config.TARGET_COL}')
logger.info(f'XGBoost eval metric from config: {xgb_config.PARAMS["eval_metric"]}')

2026-02-17 15:53:51 | INFO | test_set_experiment | Project root: D:\Projects\lending-club-credit-risk
2026-02-17 15:53:51 | INFO | test_set_experiment | Split config -> train=0.70, val=0.15, test=0.15
2026-02-17 15:53:51 | INFO | test_set_experiment | Target column: is_default
2026-02-17 15:53:51 | INFO | test_set_experiment | XGBoost eval metric from config: auc


In [17]:
logger.info('Loading cleaned dataset')
df = load_cleaned_data()

splitter = DataSplitter()
train_df, val_df, test_df = splitter.split(df)

logger.info(f'Train shape: {train_df.shape}')
logger.info(f'Validation shape: {val_df.shape}')
logger.info(f'Test shape: {test_df.shape}')

2026-02-17 15:53:59 | INFO | test_set_experiment | Loading cleaned dataset
2026-02-17 15:53:59 | INFO | credit_risk.data.load_data | Loading cleaned data from D:\Projects\lending-club-credit-risk\data\processed\cleaned_data.parquet
2026-02-17 15:54:01 | INFO | credit_risk.data.load_data | Cleaned data shape: (1345309, 30)
2026-02-17 15:54:02 | INFO | credit_risk.data.split_data | Split sizes â†’ train=941716, val=201796, test=201797
2026-02-17 15:54:02 | INFO | test_set_experiment | Train shape: (941716, 30)
2026-02-17 15:54:02 | INFO | test_set_experiment | Validation shape: (201796, 30)
2026-02-17 15:54:02 | INFO | test_set_experiment | Test shape: (201797, 30)


In [18]:
LOGISTIC_MODEL_PATH = project_root / 'models' / 'logistic' / 'model.pkl'
XGBOOST_MODEL_PATH = project_root / 'models' / 'xgboost' / 'model.pkl'

required = [LOGISTIC_MODEL_PATH, XGBOOST_MODEL_PATH]
missing = [str(p) for p in required if not p.exists()]
if missing:
    raise FileNotFoundError('Missing model artifacts:\n' + '\n'.join(missing))

logger.info('Loading trained model artifacts')
logistic_model = joblib.load(LOGISTIC_MODEL_PATH)
xgb_model = joblib.load(XGBOOST_MODEL_PATH)
logger.info('Models loaded successfully')

2026-02-17 15:54:05 | INFO | test_set_experiment | Loading trained model artifacts
2026-02-17 15:54:05 | INFO | test_set_experiment | Models loaded successfully


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [None]:

feature_builder = FeatureBuilder()
_ = feature_builder.build_features(train_df, fit=True)
X_test, y_test = feature_builder.build_features(test_df, fit=False)

logger.info(f'Test feature matrix shape: {X_test.shape}')
logger.info(f'Test target size: {len(y_test)}')

2026-02-17 15:54:08 | INFO | credit_risk.features.build_features | Building features


  df["earliest_cr_line"] = pd.to_datetime(df["earliest_cr_line"], errors="coerce")


2026-02-17 15:54:17 | INFO | credit_risk.features.build_features | Building features


  df["earliest_cr_line"] = pd.to_datetime(df["earliest_cr_line"], errors="coerce")


2026-02-17 15:54:19 | INFO | test_set_experiment | Test feature matrix shape: (201797, 135)
2026-02-17 15:54:19 | INFO | test_set_experiment | Test target size: 201797


In [20]:
def evaluate_model(model_name, model, X, y, threshold=0.5):
    y_prob = model.predict_proba(X)[:, 1]
    metrics = evaluate_classification(y_true=y, y_prob=y_prob, threshold=threshold)

    logger.info(
        f'{model_name} | test ROC-AUC={metrics["roc_auc"]:.4f}, KS={metrics["ks"]:.4f}'
    )
    return {
        'model': model_name,
        'split': 'test',
        'roc_auc': metrics['roc_auc'],
        'ks': metrics['ks'],
        'confusion_matrix': metrics['confusion_matrix'],
    }

log_result = evaluate_model('Logistic_SGD', logistic_model, X_test, y_test, threshold=0.5)
xgb_result = evaluate_model('XGBoost', xgb_model, X_test, y_test, threshold=0.5)

2026-02-17 15:54:24 | INFO | test_set_experiment | Logistic_SGD | test ROC-AUC=0.6974, KS=0.2859
2026-02-17 15:54:25 | INFO | test_set_experiment | XGBoost | test ROC-AUC=0.7096, KS=0.3038


In [21]:
results = pd.DataFrame([
    {'model': log_result['model'], 'split': log_result['split'], 'roc_auc': log_result['roc_auc'], 'ks': log_result['ks']},
    {'model': xgb_result['model'], 'split': xgb_result['split'], 'roc_auc': xgb_result['roc_auc'], 'ks': xgb_result['ks']},
]).sort_values(by='roc_auc', ascending=False).reset_index(drop=True)

results

Unnamed: 0,model,split,roc_auc,ks
0,XGBoost,test,0.709644,0.303781
1,Logistic_SGD,test,0.697436,0.285869


In [22]:
print('Logistic confusion matrix:')
print(log_result['confusion_matrix'])

print('\nXGBoost confusion matrix:')
print(xgb_result['confusion_matrix'])

Logistic confusion matrix:
[[143300  15900]
 [ 31359  11238]]

XGBoost confusion matrix:
[[151515   7685]
 [ 35489   7108]]
