In [None]:
!pip install torch transformers x-transformers betacal


In [None]:
# Basic imports
import numpy as np  # For numerical computations and array manipulations
import pandas as pd  # For loading and handling time-series and static data
import math  # For positional encoding computations (optional)
import sys
import importlib
import os
import time
import shutil
import re
from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_recall_curve, auc, roc_auc_score
import gc
import json

# PyTorch imports
import torch  # Core PyTorch library
import torch.nn as nn  # Neural network layers and loss functions
import torch.optim as optim  # Optimization algorithms
from torch.utils.data import Dataset, DataLoader  # Datasets and DataLoaders for batching
from torch.nn import Transformer, TransformerEncoderLayer  # Transformer modules

#Tranformers import
from transformers import AutoTokenizer, AutoModel


module_path = '/home/workspace/files/MilanK/Model1/final_models/code'
# Add the module's directory to the system path if it's not already present
if module_path not in sys.path:
    sys.path.append(module_path)


from fit import fit
from stratified_bootstrap import stratified_bootstrap_metrics
from load_train_test_split import load_train_test_data
#from PatientDataset import PatientDataset
from generating_datasets_for_torch import *




import eval_model  # Import the module first
importlib.reload(eval_model)
from eval_model import eval_model



In [None]:



train,val,test = load_train_test_data(
    train_filename = 'train_patient_list_orig.txt',                                   
    val_filename = 'val_patient_list_orig.txt',
    test_filename = 'test_patient_list.txt'
)



model_list = ['/home/workspace/files/MilanK/Model1/final_models/final_cardiac_models/demographics_only_full',
              '/home/workspace/files/MilanK/Model1/final_models/final_cardiac_models/combined_model_without_transformer_simpler_demographics',
              '/home/workspace/files/MilanK/Model1/final_models/final_cardiac_models/dynamic_only',
              '/home/workspace/files/MilanK/Model1/final_models/final_cardiac_models/combined_model_simpler_demographics_simpler_embeddings',
              '/home/workspace/files/MilanK/Model1/final_models/final_cardiac_models/combined_model_simpler_demographics',
              '/home/workspace/files/MilanK/Model1/final_models/final_cardiac_models/combined_model_full_demographics',
             ]


results = []
for m_folder in model_list:
    # eval_model returns a tuple with columns in the following order:
    # ["Model", "avg_loss", "auc_pr", "auc_roc", "all_probs", "all_labels",
    #  "best_threshold", "fpr_vals", "tpr_vals", "sensitivity", "specificity",
    #  "ppv", "npv", "balanced_acc", "f1"]
    res = eval_model(m_folder, val, test, target_metric="fpr", target_value=0.2)
    res = list(res)  # make mutable
    
    # Extract arrays and best threshold before converting them to JSON strings
    all_probs_array = res[4]   # predicted probabilities
    all_labels_array = res[5]  # true labels
    best_threshold = res[6]
    
    # Call the bootstrap function to get CIs for our eight metrics
    bs_results = stratified_bootstrap_metrics(
        y_true=all_labels_array,
        y_prob=all_probs_array,
        threshold=best_threshold,
        n_bootstraps=5000,
        alpha=0.05,
        random_state=42
    )
    
    # Format each metric as "mean lower upper" (rounded to 3dp)
    bs_str = {m: f"{bs_results[m][0]:.3f} ({bs_results[m][1]:.3f}-{bs_results[m][2]:.3f})" 
              for m in bs_results}
    
    # Replace the original metrics with the bootstrapped values in the result row.
    # Columns: "Model", "avg_loss", "auc_pr", "auc_roc", "all_probs", "all_labels",
    # "best_threshold", "fpr_vals", "tpr_vals", "sensitivity", "specificity",
    # "ppv", "npv", "balanced_acc", "f1"
    res[2] = bs_str['aucpr']
    res[3] = bs_str['auroc']
    res[9] = bs_str['sensitivity']
    res[10] = bs_str['specificity']
    res[11] = bs_str['ppv']
    res[12] = bs_str['npv']
    res[13] = bs_str['balanced_accuracy']
    res[14] = bs_str['f1']
    
    # Optionally, convert array elements to JSON strings if you still want to store them
    res[4] = json.dumps(all_probs_array.tolist())  # all_probs
    res[5] = json.dumps(all_labels_array.tolist())  # all_labels
    res[7] = json.dumps(res[7].tolist())  # fpr_vals
    res[8] = json.dumps(res[8].tolist())  # tpr_vals
    
    results.append(res)

# Define column names as before
columns = [
    "Model", "avg_loss", "auc_pr", "auc_roc", "all_probs", "all_labels",
    "best_threshold", "fpr_vals", "tpr_vals", "sensitivity", "specificity",
    "ppv", "npv", "balanced_acc", "f1"
]


df_results = pd.DataFrame(results, columns=columns)
#df_results.to_csv('/home/workspace/files/MilanK/Model1/final_models/cardiac_results.csv', index=False)
print(df_results)


In [None]:
df_results.to_csv('/home/workspace/files/MilanK/Model1/final_models/cardiac_results.csv', index=False)

df_results