In [None]:
## External Validation

-This notebook evaluates the generalization of the trained numeric-only CKD risk model on an independent dataset derived from the UCI Chronic Kidney Disease repository. No retraining is performed.

# Common imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# ML
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix


In [2]:
import joblib
numeric_model = joblib.load("../models/numeric_model.pkl")


In [13]:
df_ext = pd.read_csv("../data/external/uci_ckd.csv")
df_ext = df_ext.replace('?', np.nan)
df_ext = df_ext.apply(pd.to_numeric, errors='ignore')

df_ext.head()


  df_ext = df_ext.apply(pd.to_numeric, errors='ignore')


Unnamed: 0,age,bp,sg,al,su,rbc,pc,pcc,ba,bgr,...,pcv,wbcc,rbcc,htn,dm,cad,appet,pe,ane,class
0,48,80,1.02,1.0,0.0,,normal,notpresent,notpresent,121.0,...,44.0,7800.0,5.2,yes,yes,no,good,no,no,ckd
1,7,50,1.02,4.0,0.0,,normal,notpresent,notpresent,,...,38.0,6000.0,,no,no,no,good,no,no,ckd
2,62,80,1.01,2.0,3.0,normal,normal,notpresent,notpresent,423.0,...,31.0,7500.0,,no,yes,no,poor,no,yes,ckd
3,48,70,1.005,4.0,0.0,normal,abnormal,present,notpresent,117.0,...,32.0,6700.0,3.9,yes,no,no,poor,yes,yes,ckd
4,51,80,1.01,2.0,0.0,normal,normal,notpresent,notpresent,106.0,...,35.0,7300.0,4.6,no,no,no,good,no,no,ckd


In [14]:
df_ext['Class'] = df_ext['class'].map({'ckd': 1, 'notckd': 0})


In [15]:
df_ext['Class'].value_counts()


Class
1    222
0    140
Name: count, dtype: int64

In [16]:
numeric_features = [
    'Al', 'Bp', 'Bu', 'Hemo', 'Pot',
    'Rbcc', 'Sc', 'Sg', 'Sod', 'Su', 'Wbcc'
]


In [17]:
X_ext = pd.DataFrame()

for feat in numeric_features:
    feat_lower = feat.lower()
    if feat_lower in df_ext.columns:
        X_ext[feat] = df_ext[feat_lower]

y_ext = df_ext['Class']


In [18]:
valid_idx = X_ext.dropna().index
X_ext = X_ext.loc[valid_idx]
y_ext = y_ext.loc[valid_idx]


In [19]:
X_ext.shape, y_ext.value_counts()


((206, 11),
 Class
 0    123
 1     83
 Name: count, dtype: int64)

In [20]:
y_ext_prob = numeric_model.predict_proba(X_ext)[:, 1]
y_ext_pred = (y_ext_prob >= 0.4).astype(int)


In [21]:
print(classification_report(y_ext, y_ext_pred))


              precision    recall  f1-score   support

           0       1.00      0.98      0.99       123
           1       0.98      1.00      0.99        83

    accuracy                           0.99       206
   macro avg       0.99      0.99      0.99       206
weighted avg       0.99      0.99      0.99       206



In [22]:
confusion_matrix(y_ext, y_ext_pred)


array([[121,   2],
       [  0,  83]])

In [23]:
roc_auc_score(y_ext, y_ext_prob)


1.0

In [None]:
## External Validation Results

-Missing values represented as '?' in the external dataset were safely converted to NaN and excluded from evaluation to prevent invalid numerical assumptions.
-The numeric-only CKD risk model demonstrated strong generalization on an independent UCI-derived dataset. The model achieved high discriminatory performance while maintaining zero false negatives, indicating conservative and clinically appropriate screening behavior. Minor false positives were observed, reflecting a preference toward sensitivity over specificity, which is acceptable in early-stage clinical decision support systems.
