In [None]:
#imports
import sys
import pandas as pd
from sklearn.model_selection import StratifiedKFold
import numpy as np
import os
import time
import logging
from livelossplot import PlotLosses
from sklearn.linear_model import LogisticRegression
import torch
import monai
from monai.data import DataLoader
from monai.transforms import (
    AddChanneld,
    CenterSpatialCropd,
    Compose,
    Resized,
    RandSpatialCropd,
    ScaleIntensityd,
    ToTensord,
    LoadImaged,
    Identityd,
)
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
from sklearn.metrics import accuracy_score
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import roc_auc_score

In [None]:
#hyperparameters which were selected during hyperparameter tuning
lr=1e-4
optimizer="none"
strategy="adam"
epoch=84

In [None]:
#definitions of paths
MODEL_DIR = os.path.join("./SEResNet/")
path_test_data=os.path.join("../../data/test_DL.csv")
path_test_AIBL=os.path.join("../../data/AIBL_DL.csv")
path_test_OASIS=os.path.join("../../data/OASIS_DL.csv")
filename_predictions_for_platt_scaling=os.path.join("./SEResNet/predictions_for_platt_scaling.csv")
mapping_ML_DL=os.path.join("../../additional_data/Mapping_DKT_Regions_Deep_ML_new.csv")

In [None]:
BATCH_SIZE=1

In [None]:
#load ADNI train and test data
test=pd.read_csv(path_test_data,index_col="PTID")
AIBL=pd.read_csv(path_test_AIBL,index_col="PTID")
OASIS=pd.read_csv(path_test_OASIS,index_col="PTID")

In [None]:
#load data augmentations
test_transforms = Compose(
        [
            LoadImaged(keys=["img"]),
            AddChanneld(keys=["img"]),
            ScaleIntensityd(keys=["img"]),
            Resized(keys=["img"],spatial_size=(256,256,256)),
            CenterSpatialCropd(keys=["img"],roi_size=(224,224,224)),
            ToTensord(keys=["img"]),
        ]
    )
#reformat test, AIBL and OASIS datasets for pytorch
Y_test=pd.get_dummies(test.DX,drop_first=True).to_numpy().squeeze()
Y_test=Y_test.tolist()
test_files = [{"img": img, "label": label} for img, label in zip(test.filename, Y_test)]
test_ds = monai.data.Dataset(data=test_files, transform=test_transforms)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=1, pin_memory=torch.cuda.is_available())

Y_AIBL=pd.get_dummies(AIBL.DX,drop_first=True).to_numpy().squeeze()
Y_AIBL=Y_AIBL.tolist()
AIBL_files = [{"img": img, "label": label} for img, label in zip(AIBL.filename, Y_AIBL)]
AIBL_ds = monai.data.Dataset(data=AIBL_files, transform=test_transforms)
AIBL_loader = DataLoader(AIBL_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=1, pin_memory=torch.cuda.is_available())

Y_OASIS=pd.get_dummies(OASIS.DX,drop_first=True).to_numpy().squeeze()
Y_OASIS=Y_OASIS.tolist()
OASIS_files = [{"img": img, "label": label} for img, label in zip(OASIS.filename, Y_OASIS)]
OASIS_ds = monai.data.Dataset(data=OASIS_files, transform=test_transforms)
OASIS_loader = DataLoader(OASIS_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=1, pin_memory=torch.cuda.is_available())

In [None]:
#train Logistic Regression model for Platt's scaling
pred = pd.read_csv(filename_predictions_for_platt_scaling)
predictions = np.expand_dims(pred.predictions.to_numpy(), axis=1)
clf = LogisticRegression(random_state=0).fit(predictions, pred.labels)

In [None]:
#initialize dataframe to safe test predictions
column_names = ["labels","predictions_bin","predictions_prob"]
dfTrain = pd.DataFrame(columns = column_names)

In [None]:
#choose cuda as the device if it is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#load DL model using monai
model = monai.networks.nets.SEResNet152(num_classes=2,spatial_dims=3, in_channels=1)
#load final model weights
PATH=MODEL_DIR+"model_"+str(opt)+"_"+str(lr)+"_"+str(strategy)+"_"+str(epoch)+"_final_model_polyak_averaged.pth"
model.load_state_dict(torch.load(PATH))
model=model.to(device)
#change model to evaluation model
model.eval()
with torch.no_grad():
    #iterate over validation data
    for test_data in test_loader:
        #load inputs and labels for ADNI test dataset
        inputs=test_data["img"].cuda()
        labels=test_data["label"].cuda()
        #predict output of model
        outputs = model(inputs)
        #compute predictions
        predProba=torch.nn.functional.softmax(outputs)
        predProba=predProba.cpu().detach().numpy()[:,1]
        predProba=np.expand_dims(predProba, axis=1)
        #compute calibrated predictions
        predProba=clf.predict_proba(predProba)
        pred=predProba.argmax()
        #save predictions and labels of test data
        column_names = ["labels","predictions_bin","predictions_prob"]
        values=[labels.cpu().detach().item(),pred,predProba[0,1]]
        df2 = pd.DataFrame([values],columns = column_names)
        dfTrain=dfTrain.append(df2, ignore_index=True)

In [None]:
#compute metrics for ADNI test set
acc_adni_test=accuracy_score(dfTrain.labels.tolist(), dfTrain.predictions_bin.tolist())
bacc_adni_test=balanced_accuracy_score(dfTrain.labels.tolist(), dfTrain.predictions_bin.tolist())
f1_adni_test=f1_score(dfTrain.labels.tolist(), dfTrain.predictions_bin.tolist(), average='macro')
mcc_adni_test=matthews_corrcoef(dfTrain.labels.tolist(), dfTrain.predictions_bin.tolist())
auroc_adni_test=roc_auc_score(dfTrain.labels.tolist(), dfTrain.predictions_prob.tolist())

In [None]:
print(f"Accuracy (ADNI test set): {round(acc_adni_test*100,2)},\n balanced-accuracy (ADNI test set): {round(bacc_adni_test*100,2)},\n Macro-averaging F1-score (ADNI test set): {round(f1_adni_test*100,2)},\n MCC (ADNI test set): {round(mcc_adni_test,3)},\n AUROC (ADNI test set): {round(auroc_adni_test*100,2)}")

In [None]:
#initialize dataframe to safe AIBL test predictions
column_names = ["labels","predictions_bin","predictions_prob"]
dfTrainAIBL = pd.DataFrame(columns = column_names)

In [None]:
model.eval()
with torch.no_grad():
    #iterate over validation data
    for test_data in AIBL_loader:
        #load inputs and labels for AIBL test dataset
        inputs=test_data["img"].cuda()
        labels=test_data["label"].cuda()
        #predict output of model
        outputs = model(inputs)
        #compute predictions
        predProba=torch.nn.functional.softmax(outputs)
        predProba=predProba.cpu().detach().numpy()[:,1]
        predProba=np.expand_dims(predProba, axis=1)
        #compute calibrated predictions
        predProba=clf.predict_proba(predProba)
        pred=predProba.argmax()
        #save predictions and labels of test data
        column_names = ["labels","predictions_bin","predictions_prob"]
        values=[labels.cpu().detach().item(),pred,predProba[0,1]]
        df2 = pd.DataFrame([values],columns = column_names)
        dfTrainAIBL=dfTrainAIBL.append(df2, ignore_index=True)

In [None]:
#compute metrics for AIBL test set
acc_aibl_test=accuracy_score(dfTrainAIBL.labels.tolist(), dfTrainAIBL.predictions_bin.tolist())
bacc_aibl_test=balanced_accuracy_score(dfTrainAIBL.labels.tolist(), dfTrainAIBL.predictions_bin.tolist())
f1_aibl_test=f1_score(dfTrainAIBL.labels.tolist(), dfTrainAIBL.predictions_bin.tolist(), average='macro')
mcc_aibl_test=matthews_corrcoef(dfTrainAIBL.labels.tolist(), dfTrainAIBL.predictions_bin.tolist())
auroc_aibl_test=roc_auc_score(dfTrainAIBL.labels.tolist(), dfTrainAIBL.predictions_prob.tolist())

In [None]:
print(f"Accuracy (AIBL test set): {round(acc_aibl_test*100,2)},\n balanced-accuracy (AIBL test set): {round(bacc_aibl_test*100,2)},\n Macro-averaging F1-score (AIBL test set): {round(f1_aibl_test*100,2)},\n MCC (AIBL test set): {round(mcc_aibl_test,3)},\n AUROC (AIBL test set): {round(auroc_aibl_test*100,2)}")

In [None]:
#initialize dataframe to safe OASIS test predictions
column_names = ["labels","predictions_bin","predictions_prob"]
dfTrainOASIS = pd.DataFrame(columns = column_names)

In [None]:
model.eval()
with torch.no_grad():
    #iterate over validation data
    for test_data in OASIS_loader:
        #load inputs and labels for OASIS test dataset
        inputs=test_data["img"].cuda()
        labels=test_data["label"].cuda()
        #predict output of model
        outputs = model(inputs)
        #compute predictions
        predProba=torch.nn.functional.softmax(outputs)
        predProba=predProba.cpu().detach().numpy()[:,1]
        predProba=np.expand_dims(predProba, axis=1)
        #compute calibrated predictions
        predProba=clf.predict_proba(predProba)
        pred=predProba.argmax()
        #save predictions and labels of test data
        column_names = ["labels","predictions_bin","predictions_prob"]
        values=[labels.cpu().detach().item(),pred,predProba[0,1]]
        df2 = pd.DataFrame([values],columns = column_names)
        dfTrainOASIS=dfTrainOASIS.append(df2, ignore_index=True)

In [None]:
#compute metrics for OASIS test set
acc_oasis_test=accuracy_score(dfTrainOASIS.labels.tolist(), dfTrainOASIS.predictions_bin.tolist())
bacc_oasis_test=balanced_accuracy_score(dfTrainOASIS.labels.tolist(), dfTrainOASIS.predictions_bin.tolist())
f1_oasis_test=f1_score(dfTrainOASIS.labels.tolist(), dfTrainOASIS.predictions_bin.tolist(), average='macro')
mcc_oasis_test=matthews_corrcoef(dfTrainOASIS.labels.tolist(), dfTrainOASIS.predictions_bin.tolist())
auroc_oasis_test=roc_auc_score(dfTrainOASIS.labels.tolist(), dfTrainOASIS.predictions_prob.tolist())

In [None]:
print(f"Accuracy (OASIS test set): {round(acc_oasis_test*100,2)},\n balanced-accuracy (OASIS test set): {round(bacc_oasis_test*100,2)},\n Macro-averaging F1-score (OASIS test set): {round(f1_oasis_test*100,2)},\n MCC (OASIS test set): {round(mcc_oasis_test,3)},\n AUROC (OASIS test set): {round(auroc_oasis_test*100,2)}")