In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, accuracy_score, brier_score_loss, f1_score, fbeta_score, confusion_matrix
from tabpfn import TabPFNClassifier
from imblearn.over_sampling import SMOTENC
from concurrent.futures import ThreadPoolExecutor

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity='all'

np.random.seed(99)

# Bootstrap function with ThreadPoolExecutor
def bootstrap_metrics(y_true, y_prob, n_bootstraps=1000, random_state=42, max_workers=60):
    np.random.seed(random_state)
    metrics_list = []
    
    def compute_metrics(indices):
        y_true_boot = y_true.iloc[indices]
        y_prob_boot = y_prob[indices]
        return evaluate_single_bootstrap(y_true_boot, y_prob_boot)
    
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = []
        for _ in range(n_bootstraps):
            indices = np.random.choice(len(y_true), len(y_true), replace=True)
            futures.append(executor.submit(compute_metrics, indices))
        
        for future in futures:
            metrics_list.append(future.result())
    
    # Aggregate results
    metrics_df = pd.DataFrame(metrics_list)
    mean_metrics = metrics_df.mean()
    ci_lower = metrics_df.quantile(0.025)
    ci_upper = metrics_df.quantile(0.975)
    
    return pd.DataFrame({
        'Mean': mean_metrics,
        'CI Lower': ci_lower,
        'CI Upper': ci_upper
    })

df1 = pd.read_csv('~/data/BAH_PRS/version10/ml_dat/conpass.csv',sep=',',header=0)

set1 = ['Age', 'BMI', 'SBP', 'DBP', 'PAC', 'Renin', 'Sex']
set2 = ['Age', 'BMI', 'WC', 'TG', 'LDL', 'FBG', 'SBP', 'DBP', 'Renin', 'Sex', 'ASCVD']
set3 = ['Age', 'BMI', 'WC', 'TG', 'LDL', 'FBG', 'SBP', 'DBP', 'Sex', 'ASCVD']

X = df1[set1]
y = df1['IHA']

smotenc = SMOTENC(categorical_features=[6], 
                  random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X,y, 
                                                    test_size=0.3, 
                                                    random_state=42)
X_train_resampled, y_train_resampled = smotenc.fit_resample(X_train, y_train)
X_train_scaled = X_train_resampled
X_test_scaled = X_test
scaler = StandardScaler()
X_train_scaled.iloc[:,0:6] = scaler.fit_transform(X_train_scaled.iloc[:,0:6])
X_test_scaled.iloc[:,0:6] = scaler.transform(X_test_scaled.iloc[:,0:6])

 -1.23483125]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  X_train_scaled.iloc[:,0:6] = scaler.fit_transform(X_train_scaled.iloc[:,0:6])
 -0.0413863   0.55533617  1.66353505  0.04385976 -0.46761664 -0.12663237
  0.38484403 -0.21187844 -0.89384698 -2.9397526   1.23730471 -1.49056945
  1.15205864  1.06681257  1.32255078  0.7258283   0.12910583 -0.63810878
 -1.40532338 -0.55286271  1.23730471 -0.55286271  0.98156651 -0.29712451
  0.38484403 -1.57581552  0.38484403 -1.49056945 -2.34303013 -1.49056945
 -0.89384698 -0.72335484  1.83402718 -2.25778406 -0.21187844  1.49304291
 -0.72335484  3.02747213  0.12910583  0.81107437  1.40779684 -0.38237057
  0.7258283  -1.14958518  0.12910583 -1.40532338  1.06681257 -0.21187844
  1.40779684 -0.89384698 -0.29712451  1.57828898  1.15205864 -1.06433911
 -1.06433911  0.04385976 -0.80860091 -2.00204586  1.32255078  1.06681257
  1.32255078  1.32255078 -0.72335484 -0.97909305  0.7258283   1.66353505
  1.32255078 -0

In [10]:
# Define evaluation metrics
def evaluate_single_bootstrap(y_true_boot, y_prob_boot):
    y_pred_boot = (y_prob_boot > 0.5).astype(int)
    tn, fp, fn, tp = confusion_matrix(y_true_boot, y_pred_boot).ravel()
    
    return {
        'AUC': roc_auc_score(y_true_boot, y_prob_boot),
        'Accuracy': accuracy_score(y_true_boot, y_pred_boot),
        'Sensitivity': tp / (tp + fn) if (tp + fn) != 0 else 0,
        'Specificity': tn / (tn + fp) if (tn + fp) != 0 else 0,
        'PPV': tp / (tp + fp) if (tp + fp) != 0 else 0,
        'NPV': tn / (tn + fn) if (tn + fn) != 0 else 0,
        'LR+': (tp / (tp + fn)) / (1 - (tn / (tn + fp))) if (tn + fp) != 0 and (tp + fn) != 0 else float('inf'),
        'LR-': (1 - (tp / (tp + fn))) / (tn / (tn + fp)) if (tn + fp) != 0 and (tp + fn) != 0 else float('inf'),
        'DOR': ((tp / (tp + fn)) / (1 - (tn / (tn + fp)))) / ((1 - (tp / (tp + fn))) / (tn / (tn + fp)))
                if (tn + fp) != 0 and (tp + fn) != 0 else float('inf'),
        'F1': f1_score(y_true_boot, y_pred_boot),
        'F2': fbeta_score(y_true_boot, y_pred_boot, beta=2),
        'Brier Score': brier_score_loss(y_true_boot, y_prob_boot)
    }
    
# Train TabPFN model
model = TabPFNClassifier(n_estimators=32, device='cpu', random_state=42, softmax_temperature=0.6, balance_probabilities=True)
model.fit(X_train_scaled, y_train_resampled)

# Evaluate on the test set
y_prob = model.predict_proba(X_test_scaled)[:, 1]
results_df1 = bootstrap_metrics(y_test, y_prob)
results_df1
# Save probabilities
pd.DataFrame(y_prob, columns=['Probability']).to_csv('/home/luo_wenjin/data/BAH_PRS/version10/ml_dat/prob_model1.csv', index=False)

Unnamed: 0,Mean,CI Lower,CI Upper
AUC,0.927824,0.898817,0.953957
Accuracy,0.878994,0.85119,0.906796
Sensitivity,0.569584,0.467529,0.673272
Specificity,0.94264,0.919044,0.964037
PPV,0.671687,0.565217,0.782065
NPV,0.914106,0.888069,0.937683
LR+,10.375093,6.705354,16.953781
LR-,0.456704,0.342556,0.56391
DOR,23.495154,12.662625,43.876045
F1,0.615092,0.523714,0.710061


In [11]:
# Evaluate on validation sets
validation_sets = [
    pd.read_csv('~/data/BAH_PRS/version10/ml_dat/pato.csv',sep=',',header=0),
    pd.read_csv('~/data/BAH_PRS/version10/ml_dat/monash.csv',sep=',',header=0),
    pd.read_csv('~/data/BAH_PRS/version10/ml_dat/ljubljana.csv',sep=',',header=0)
]

def evaluate_single_validation_set(df, model, scaler):
    X_val = df[set1]
    y_val = df['IHA']
    X_val.iloc[:, 0:6] = scaler.fit_transform(X_val.iloc[:, 0:6])  
    
    y_prob = model.predict_proba(X_val)[:, 1]
    return bootstrap_metrics(y_val, y_prob)

def evaluate_on_validation_sets(model, validation_sets, scaler):
    results = {}
    with ThreadPoolExecutor() as executor:
        futures = {
            f'Validation Set {i+2}': executor.submit(evaluate_single_validation_set, df, model, scaler)
            for i, df in enumerate(validation_sets)
        }
        for key, future in futures.items():
            results[key] = future.result()
    return results

# Evaluate on validation sets
results_df2 = evaluate_on_validation_sets(model, validation_sets, scaler)
results_df2

  0.70468553 -0.51860203  0.32828936 -0.42450299 -0.98909725  0.89288362
 -0.04810681 -0.42450299 -0.14220586 -0.33040394 -1.17729534 -1.17729534
 -2.30648386  0.51648745 -1.55369151 -0.80089916  0.23419032  0.98698266
  1.08108171 -1.45959247  1.17518075 -2.11828577 -0.33040394  0.70468553
  0.51648745 -2.4005829  -0.89499821 -1.08319629 -0.04810681 -0.14220586
  0.23419032  0.61058649  0.70468553  0.14009127 -1.08319629  0.70468553
 -0.70680012 -0.14220586 -0.33040394  0.14009127  0.61058649 -0.33040394
 -0.51860203 -1.27139438  1.17518075 -0.42450299  0.51648745  0.04599223
 -0.42450299  1.36337884  0.32828936 -0.14220586  0.32828936  0.32828936
  1.17518075 -1.08319629  1.17518075 -0.51860203  0.79878458  0.98698266
 -0.70680012  0.4223884   1.08108171  0.14009127 -0.51860203 -0.14220586
  1.08108171  0.23419032 -0.42450299 -0.98909725 -1.45959247 -0.04810681
  0.14009127  0.04599223  0.14009127 -0.33040394 -0.70680012  0.04599223
 -0.70680012 -0.14220586  0.70468553  1.26927979 -1

{'Validation Set 2':                  Mean   CI Lower  CI Upper
 AUC          0.937401   0.918147  0.953580
 Accuracy     0.834952   0.817349  0.853390
 Sensitivity  0.968518   0.921053  1.000000
 Specificity  0.829516   0.811459  0.847837
 PPV          0.187754   0.147692  0.233136
 NPV          0.998464   0.996142  1.000000
 LR+          5.698443   5.085149  6.406266
 LR-          0.037957   0.000000  0.095164
 DOR               inf  55.432979       NaN
 F1           0.313971   0.255634  0.376150
 F2           0.527042   0.456238  0.595466
 Brier Score  0.129777   0.116004  0.143274,
 'Validation Set 3':                  Mean  CI Lower   CI Upper
 AUC          0.769225  0.725699   0.811805
 Accuracy     0.672116  0.631193   0.708303
 Sensitivity  0.212352  0.159995   0.264588
 Specificity  0.964182  0.942117   0.982715
 PPV          0.790406  0.673060   0.897968
 NPV          0.658347  0.615222   0.697346
 LR+          6.533189  3.281722  13.167312
 LR-          0.817001  0.762108   

In [12]:
results_df1.to_csv('/home/luo_wenjin/data/BAH_PRS/version10/ml_dat/conpass_training_boot_model1.csv', index=True)
for key, value in results_df2.items():
    value.to_csv(f'/home/luo_wenjin/data/BAH_PRS/version10/ml_dat/{key}_boot_model1.csv', index=True)