In [1]:
from Utils.Utils import *
from Utils.Blacksmith import * 

from Utils.HyMNet import HyMNet
from timm.models.layers import trunc_normal_
import Utils.ViT as ViT 
from sklearn.utils import resample
from sklearn.svm import SVC

In [2]:
# Seed
set_seed(0)

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
PATH = "/home/baharoon/HTN/data/"
CSV_PATH = {"HTNPath": PATH + r"HTN", "NonHTNPath": PATH + "NonHTN"}

MODELS_PATH = "/home/baharoon/HTN/Models"

os.makedirs(MODELS_PATH, exist_ok=True)

In [5]:
BATCH_SIZE = 1

image_size = 586
crop_size = 512

# image_size = 256
# crop_size = 224

train_transform = T.Compose([
    T.Resize((image_size, image_size)),
    T.CenterCrop(crop_size),
    T.ToTensor(),
    T.RandomHorizontalFlip(0.5),
    T.RandomRotation(degrees=(0, 360)),
    T.GaussianBlur(3),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

test_transform = T.Compose([
    T.Resize((image_size, image_size)),
    T.CenterCrop(crop_size),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = HypertensionDataset(CSV_PATH, split="train", train_transform=train_transform)
# val_dataset = HypertensionDataset(CSV_PATH, split="val", test_transform=test_transform)
val_dataset = HypertensionDataset(CSV_PATH, split="val", test_transform=train_transform)
test_dataset = HypertensionDataset(CSV_PATH, split="test", test_transform=test_transform)

# train_dataset = torch.utils.data.ConcatDataset([train_dataset, val_dataset])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

epoch_length = math.ceil(len(train_dataset) / BATCH_SIZE)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['HTN_DM'] = df['HTN'].astype(str) + df['DM'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['HTN_DM'] = df['HTN'].astype(str) + df['DM'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['HTN_DM'] = df['HTN'].astype(str) + df['DM'].astype(str)


In [6]:
def calculate_results(N, trues, probs, threshold=0.5):
    
    f1_scores = []
    auc_scores = []
    pr_scores = []
    precision_scores = []
    recall_scores = []
    specificity_scores = []
    accuracy_scores = []

    for i in range(N):
        fpr, tpr, _ = roc_curve(trues[i], probs[i])
        precision, recall, _ = precision_recall_curve(trues[i], probs[i])

        f1_scores.append(f1_score(trues[i], np.array(probs[i])>=threshold))
        auc_scores.append(auc(fpr, tpr))
        pr_scores.append(auc(recall, precision))
        precision_scores.append(precision_score(trues[i], np.array(probs[i])>=threshold))
        recall_scores.append(recall_score(trues[i], np.array(probs[i])>=threshold))
        specificity_scores.append(recall_score(trues[i], np.array(probs[i])>=threshold, pos_label=0))
        accuracy_scores.append(accuracy_score(trues[i], np.array(probs[i])>=threshold))
        
    results = {"f1": {}, "auroc": {}, "auprc": {}, "precision": {},
               "recall": {}, "specificity": {}, "accuracy": {}}
    
    results["f1"]["range"] = [round(s, 3) for s in np.percentile(f1_scores, [2.5, 97.5])]  
    results["f1"]["avrg"] = sum(results["f1"]["range"]) / 2
    
    results["auroc"]["range"] = [round(s, 3) for s in np.percentile(auc_scores, [2.5, 97.5])]  
    results["auroc"]["avrg"] = sum(results["auroc"]["range"]) / 2
    
    results["auprc"]["range"] = [round(s, 3) for s in np.percentile(pr_scores, [2.5, 97.5])]  
    results["auprc"]["avrg"] = sum(results["auprc"]["range"]) / 2
    
    results["precision"]["range"] = [round(s, 3) for s in np.percentile(precision_scores, [2.5, 97.5])]  
    results["precision"]["avrg"] = sum(results["precision"]["range"]) / 2
    
    results["recall"]["range"] = [round(s, 3) for s in np.percentile(recall_scores, [2.5, 97.5])]  
    results["recall"]["avrg"] = sum(results["recall"]["range"]) / 2
    
    results["specificity"]["range"] = [round(s, 3) for s in np.percentile(specificity_scores, [2.5, 97.5])]  
    results["specificity"]["avrg"] = sum(results["specificity"]["range"]) / 2
    
    results["accuracy"]["range"] = [round(s, 3) for s in np.percentile(accuracy_scores, [2.5, 97.5])]  
    results["accuracy"]["avrg"] = sum(results["accuracy"]["range"]) / 2
    
    print(f"f1: {results['f1']['range']}",
            f"average: {round(results['f1']['avrg'], 3)}") 
    print(f"auroc: {results['auroc']['range']}",
            f"average: {round(results['auroc']['avrg'], 3)}") 
    print(f"auprc: {results['auprc']['range']}",
            f"average: {round(results['auprc']['avrg'], 3)}") 
    print(f"precision: {results['precision']['range']}",
            f"average: {round(results['precision']['avrg'], 3)}") 
    print(f"recall: {results['recall']['range']}",
            f"average: {round(results['recall']['avrg'], 3)}") 
    print(f"specificity: {results['specificity']['range']}",
            f"average: {round(results['specificity']['avrg'], 3)}") 
    print(f"accuracy: {results['accuracy']['range']}",
            f"average: {round(results['accuracy']['avrg'], 3)}") 
    
    return results
    
    
def boostrap(N, model, data_loader, save_loc=None):
    from tqdm import tqdm
    preds, trues = [], []
    
    if hasattr(model, 'predict_proba'):
        if hasattr(data_loader.dataset, 'df'):
            if "image_prob" in data_loader.dataset.df.columns:
                probas = model.predict_proba(data_loader.dataset.df[["image_logit","Age", "Gender"]])
            else:
                probas = model.predict_proba(data_loader.dataset.df[["Age", "Gender"]])
        else:        
            probas = model.predict_proba(data_loader.dataset.x)
        preds = probas[:, 1].tolist()
        
        if hasattr(data_loader.dataset, 'df'): 
            trues = data_loader.dataset.df["HTN"].tolist()
        else:
            trues = data_loader.dataset.y.tolist()
    else:
        for sample in tqdm(data_loader):
            image, features, target = sample["image"].to(device).float(), sample["features"].to(device).float(), sample["label"]
            output = model(image, features)
            preds.append(torch.nn.Sigmoid()(output.squeeze().cpu().detach()).item())
            trues.append(target.cpu().detach().item())
            
    if save_loc != None:
        results = dict()
        results["true"] = trues
        results["prob"] = preds
        save_loc_preds = "".join(save_loc.split(".")[:-1]) + "outputs.json"
        with open(save_loc_preds, 'w', encoding='utf-8') as f:
            json.dump(results, f, ensure_ascii=False, indent=4)
        
    n_samples = len(data_loader)
    indices = [i for i in range(n_samples)]

    bootstrap_preds, bootstrap_trues = [], []
    for run in range(N):
        bootstrapped_indices = resample(indices, replace=True, n_samples=n_samples)
        preds_tmp , trues_tmp = [preds[indx] for indx in bootstrapped_indices], [trues[indx] for indx in bootstrapped_indices]
        bootstrap_preds.append(preds_tmp)
        bootstrap_trues.append(trues_tmp)
        
    if save_loc != None:
        results = dict()
        results["true"] = bootstrap_trues
        results["prob"] = bootstrap_preds
        save_loc_preds = "".join(save_loc.split(".")[:-1]) + "bootstrap.json"
        with open(save_loc_preds, 'w', encoding='utf-8') as f:
            json.dump(results, f, ensure_ascii=False, indent=4)
            
    results = calculate_results(N, bootstrap_trues, bootstrap_preds)
    
    if save_loc != None:
        with open(save_loc, 'w') as f:
            # Use json.dump to write the dictionary to the file
            json.dump(results, f)
            
def binary_bootstrap(model1, model2, data_loader, device=None):
    preds1, preds2, trues = [], [], []
    
    for sample in tqdm(data_loader):
        image, features, target = sample["image"].to(device).float(), sample["features"].to(device).float(), sample["label"]
        output1 = model1(image, features)
        preds1.append(torch.nn.Sigmoid()(output1.squeeze().cpu().detach()).item())
        
        output2 = model2(image, features)
        preds2.append(torch.nn.Sigmoid()(output2.squeeze().cpu().detach()).item())
        
        trues.append(target.cpu().detach().item())
        
    n_samples = len(data_loader)
    indices = [i for i in range(n_samples)]

    bootstrap_preds1, bootstrap_preds2, bootstrap_trues = [], [], []
    for run in range(N):
        bootstrapped_indices = resample(indices, replace=True, n_samples=n_samples)
        preds1_tmp, preds2_tmp, trues_tmp =  [preds1[indx] for indx in bootstrapped_indices],\
                                            [preds2[indx] for indx in bootstrapped_indices], \
                                            [trues[indx] for indx in bootstrapped_indices]
        
        bootstrap_preds1.append(preds1_tmp)
        bootstrap_preds2.append(preds2_tmp)
        bootstrap_trues.append(trues_tmp)
    
    results = {}
    results["true"] = bootstrap_trues
    results["prob1"] = bootstrap_preds1
    results["prob2"] = bootstrap_preds2
    return results

In [7]:
N = 10000

FM_PATH = MODELS_PATH + r"/FundusModel.pth"
DM_PATH = MODELS_PATH + r"/DemographicFCNN.pth"

criterion = nn.BCEWithLogitsLoss()

# FundusModel Results

In [21]:
state_dict = torch.load(FM_PATH)
state_dict = {key.replace("module.", ""): value for key, value in state_dict.items()}

In [16]:
image_model = get_retfound("/home/baharoon/HTN/RETFound_cfp_weights.pth", image_size=224)

model = HyMNet(image_model=image_model).to(device)
model.load_state_dict(state_dict)

<All keys matched successfully>

In [17]:
metrics = test(model, criterion, test_loader, device, show_output=True)

Batch in Progress: 100%|████████████████████████████████████████| 1007/1007 [00:59<00:00, 17.06it/s]

    Average Loss:  0.614882,    Accuracy: 65.44%,    Correct Counter: 659/1007,    F1 Score: 0.73,    Precision: 0.68,    Recall:  0.79,    PR:  0.80,    AUROC:  0.70






In [33]:
results = boostrap(N=N, model=model, data_loader=test_loader, save_loc=f"{os.getcwd() + os.sep}Results/FundusModel.json")

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1007/1007 [01:34<00:00, 10.67it/s]


# DemographicModels Results

In [35]:
train_x, train_y = train_dataset.df[["Age", "Gender"]], train_dataset.df["HTN"]
val_x, val_y = val_dataset.df[["Age", "Gender"]], val_dataset.df["HTN"]
train_x, train_y = pd.concat([train_x, val_x]), pd.concat([train_y, val_y])

## XGBoost

In [None]:
with open(MODELS_PATH+'/DemographicXGBParams.json') as f:
    params = json.load(f)

boost = xgb.XGBClassifier(**params, objective="binary:logistic")
boost.fit(train_x, train_y)

In [None]:
boostrap(N, boost, test_loader, save_loc=f"{os.getcwd() + os.sep}Results/DemographicXGB.json")

## SVM

In [None]:
from sklearn.svm import SVC

with open(MODELS_PATH+'/DemographicSVMParams.json') as f:
    params = json.load(f)
    
svm = SVC(**params, probability=True)

svm.fit(train_x, train_y)

In [None]:
boostrap(N, svm, test_loader, save_loc=f"{os.getcwd() + os.sep}Results/DemographicSVM.json")

## FCNN

In [36]:
tabular_model = nn.Sequential(
    nn.Linear(in_features=2, out_features=8),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(in_features=8, out_features=32),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(in_features=32, out_features=16),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(in_features=16, out_features=8),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(in_features=8, out_features=1),
).to(device)

model = HyMNet(tabular_model=tabular_model) 
model.load_state_dict(torch.load(MODELS_PATH + "/DemographicFCNN.pth"))

<All keys matched successfully>

In [37]:
results = boostrap(N=N, model=model, data_loader=test_loader, save_loc=f"{os.getcwd() + os.sep}Results/DemographicFCNN.json")

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1007/1007 [00:54<00:00, 18.41it/s]


# FusionModel Results

## JointFusion

In [38]:
tabular_model = nn.Sequential(
    nn.Linear(in_features=2, out_features=8),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(in_features=8, out_features=32),
)

image_model = get_retfound("/home/baharoon/HTN/RETFound_cfp_weights.pth", image_size=512,
                          classes=8).requires_grad_(True)

fusion_model = nn.Sequential(
    nn.Linear(in_features=40, out_features=128),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(in_features=128, out_features=32),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(in_features=32, out_features=16),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(in_features=16, out_features=1),
)

model = HyMNet(image_model=image_model, tabular_model=tabular_model, fusion_model=fusion_model).to(device)
state_dict = torch.load(MODELS_PATH + "/JointFusion_finetune.pth")
state_dict = {key.replace("module.", ""): value for key, value in state_dict.items()}
model.load_state_dict(state_dict)

Position interpolate from 14x14 to 32x32


<All keys matched successfully>

In [39]:
results = boostrap(N=N, model=model, data_loader=test_loader, save_loc=f"{os.getcwd() + os.sep}Results/JointFusion.json")

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1007/1007 [01:34<00:00, 10.67it/s]


## PredictionFusion

In [8]:
tabular_model = nn.Sequential(
    nn.Linear(in_features=2, out_features=8),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(in_features=8, out_features=16),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(in_features=16, out_features=8),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(in_features=8, out_features=1),
)

image_model = get_retfound("/home/baharoon/HTN/RETFound_cfp_weights.pth", image_size=512,
                          classes=1).requires_grad_(True)

fusion_model = nn.Sequential(
    nn.Linear(in_features=2, out_features=8),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(in_features=8, out_features=32),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(in_features=32, out_features=16),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(in_features=16, out_features=1),
)

model = HyMNet(image_model=image_model, tabular_model=tabular_model, fusion_model=fusion_model).to(device)
state_dict = torch.load(MODELS_PATH + "/PredFusion.pth")
state_dict = {key.replace("module.", ""): value for key, value in state_dict.items()}
model.load_state_dict(state_dict)

Position interpolate from 14x14 to 32x32


<All keys matched successfully>

In [10]:
results = boostrap(N=N, model=model, data_loader=test_loader, save_loc=f"{os.getcwd() + os.sep}Results/PredFusion.json")

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1007/1007 [01:39<00:00, 10.09it/s]


f1: [0.733, 0.784] average: 0.758
auroc: [0.637, 0.707] average: 0.672
auprc: [0.671, 0.755] average: 0.713
precision: [0.646, 0.713] average: 0.68
recall: [0.833, 0.888] average: 0.86
specificity: [0.364, 0.46] average: 0.412
accuracy: [0.647, 0.706] average: 0.676


## LateFusion

In [8]:
image_model = get_retfound("/home/baharoon/HTN/RETFound_cfp_weights.pth", image_size=512,
                          classes=1).requires_grad_(True)
model = HyMNet(image_model=image_model).to(device)
state_dict = torch.load(FM_PATH)
state_dict = {key.replace("module.", ""): value for key, value in state_dict.items()}
model.load_state_dict(state_dict)

Position interpolate from 14x14 to 32x32


<All keys matched successfully>

In [10]:
train_x, train_y = build_tabular_dataset(model, train_dataset, device, method="lf")
test_x, test_y = build_tabular_dataset(model, test_dataset, device, method="lf")

train_late_fusion = InputOutputDataset(train_x, train_y)
test_late_fusion = InputOutputDataset(test_x, test_y)

train_loader = DataLoader(train_late_fusion, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_late_fusion, batch_size=BATCH_SIZE, shuffle=False)

## XGBoost

In [11]:
with open(MODELS_PATH+ '/LateFusionXGBParams.json') as f:
    params = json.load(f)
    
boost = xgb.XGBClassifier(**params, objective="binary:logistic")

boost.fit(train_x, train_y)

XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=0.7,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
              importance_type=None, interaction_constraints='',
              learning_rate=0.001, max_bin=256, max_cat_to_onehot=4,
              max_delta_step=0, max_depth=3, max_leaves=0, min_child_weight=1,
              missing=nan, monotone_constraints='()', n_estimators=500,
              n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, reg_lambda=1, ...)

In [13]:
results = boostrap(N=N, model=boost, data_loader=test_loader, save_loc=f"{os.getcwd() + os.sep}Results/LateFusionXGB.json")

f1: [0.733, 0.783] average: 0.758
auroc: [0.684, 0.75] average: 0.717
auprc: [0.708, 0.791] average: 0.75
precision: [0.652, 0.719] average: 0.686
recall: [0.819, 0.877] average: 0.848
specificity: [0.388, 0.484] average: 0.436
accuracy: [0.652, 0.708] average: 0.68


## SVM

In [14]:
from sklearn.svm import SVC

with open(MODELS_PATH+ '/LateFusionSVMParams.json') as f:
    params = json.load(f)
        
svm = SVC(**params, probability=True)

svm.fit(train_x, train_y)

SVC(C=1000, gamma=0.01, kernel='poly', probability=True)

In [15]:
results = boostrap(N=N, model=svm, data_loader=test_loader, save_loc=f"{os.getcwd() + os.sep}Results/LateFusionSVM.json")

f1: [0.744, 0.792] average: 0.768
auroc: [0.652, 0.718] average: 0.685
auprc: [0.692, 0.773] average: 0.732
precision: [0.625, 0.689] average: 0.657
recall: [0.905, 0.946] average: 0.926
specificity: [0.256, 0.343] average: 0.3
accuracy: [0.641, 0.699] average: 0.67


## FCNN

In [19]:
fm_path = MODELS_PATH + r"/LateFusionFCNN.pth"

tabular_model = nn.Sequential(
    nn.Linear(in_features=3, out_features=8),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(in_features=8, out_features=32),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(in_features=32, out_features=16),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(in_features=16, out_features=8),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(in_features=8, out_features=1),
)

model = HyMNet(tabular_model=tabular_model).to(device)
model.load_state_dict(torch.load(fm_path))

<All keys matched successfully>

In [20]:
results = boostrap(N=N, model=model, data_loader=test_loader, save_loc=f"{os.getcwd() + os.sep}Results/LateFusionFCNN.json")

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1007/1007 [00:00<00:00, 2137.91it/s]


f1: [0.726, 0.777] average: 0.752
auroc: [0.664, 0.73] average: 0.697
auprc: [0.703, 0.782] average: 0.742
precision: [0.649, 0.715] average: 0.682
recall: [0.807, 0.866] average: 0.836
specificity: [0.388, 0.482] average: 0.435
accuracy: [0.644, 0.702] average: 0.673


# VotingFusion

In [8]:
# Load image and Tabular model
image_model = get_retfound("/home/baharoon/HTN/RETFound_cfp_weights.pth", image_size=512,
                          classes=1)

tabular_model = nn.Sequential(
    nn.Linear(in_features=2, out_features=8),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(in_features=8, out_features=32),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(in_features=32, out_features=16),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(in_features=16, out_features=8),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(in_features=8, out_features=1),
)


# load fusion model
tabular_model_fusion = nn.Sequential(
    nn.Linear(in_features=2, out_features=8),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(in_features=8, out_features=32),
)

image_model_fusion = get_retfound("/home/baharoon/HTN/RETFound_cfp_weights.pth", image_size=512,
                          classes=8).requires_grad_(True)

fusion_model_fusion = nn.Sequential(
    nn.Linear(in_features=40, out_features=128),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(in_features=128, out_features=32),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(in_features=32, out_features=16),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(in_features=16, out_features=1),
)

fusion = HyMNet(image_model=image_model_fusion, tabular_model=tabular_model_fusion, fusion_model=fusion_model_fusion)
state_dict = torch.load(MODELS_PATH + "/JointFusion_finetune.pth")
state_dict = {key.replace("module.", ""): value for key, value in state_dict.items()}
fusion.load_state_dict(state_dict)

model = HyMNet(image_model=image_model, tabular_model=tabular_model, fusion_model=fusion).to(device)

state_dict = torch.load(FM_PATH)
state_dict = {key.replace("module.", ""): value for key, value in state_dict.items()}
model.load_state_dict(state_dict, strict=False)

state_dict = torch.load(DM_PATH)
state_dict = {key.replace("module.", ""): value for key, value in state_dict.items()}
model.load_state_dict(state_dict, strict=False)

Position interpolate from 14x14 to 32x32
Position interpolate from 14x14 to 32x32


_IncompatibleKeys(missing_keys=['image_model.cls_token', 'image_model.pos_embed', 'image_model.patch_embed.proj.weight', 'image_model.patch_embed.proj.bias', 'image_model.blocks.0.norm1.weight', 'image_model.blocks.0.norm1.bias', 'image_model.blocks.0.attn.qkv.weight', 'image_model.blocks.0.attn.qkv.bias', 'image_model.blocks.0.attn.proj.weight', 'image_model.blocks.0.attn.proj.bias', 'image_model.blocks.0.norm2.weight', 'image_model.blocks.0.norm2.bias', 'image_model.blocks.0.mlp.fc1.weight', 'image_model.blocks.0.mlp.fc1.bias', 'image_model.blocks.0.mlp.fc2.weight', 'image_model.blocks.0.mlp.fc2.bias', 'image_model.blocks.1.norm1.weight', 'image_model.blocks.1.norm1.bias', 'image_model.blocks.1.attn.qkv.weight', 'image_model.blocks.1.attn.qkv.bias', 'image_model.blocks.1.attn.proj.weight', 'image_model.blocks.1.attn.proj.bias', 'image_model.blocks.1.norm2.weight', 'image_model.blocks.1.norm2.bias', 'image_model.blocks.1.mlp.fc1.weight', 'image_model.blocks.1.mlp.fc1.bias', 'image_mod

In [10]:
train_x, train_y = build_tabular_dataset(model, train_dataset, device, method="vf")
test_x, test_y = build_tabular_dataset(model, test_dataset, device, method="vf")

train_fusion_set = InputOutputDataset(train_x, train_y)
test_fusion_set = InputOutputDataset(test_x, test_y)

train_fusion_loader = DataLoader(train_fusion_set, batch_size=1)
test_fusion_loader = DataLoader(test_fusion_set, batch_size=1)

## XGBoost

In [11]:
with open(MODELS_PATH+'/VotingFusionXGBParams.json') as f:
    params = json.load(f)
    
boost = xgb.XGBClassifier(**params, objective="binary:logistic")

boost.fit(train_x, train_y)

XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=0.7,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
              importance_type=None, interaction_constraints='',
              learning_rate=0.005, max_bin=256, max_cat_to_onehot=4,
              max_delta_step=0, max_depth=3, max_leaves=0, min_child_weight=1,
              missing=nan, monotone_constraints='()', n_estimators=500,
              n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, reg_lambda=1, ...)

In [15]:
results = boostrap(N=N, model=boost, data_loader=test_fusion_loader, save_loc=f"{os.getcwd() + os.sep}Results/VotingFusionXGB.json")

f1: [0.718, 0.772] average: 0.745
auroc: [0.671, 0.737] average: 0.704
auprc: [0.712, 0.79] average: 0.751
precision: [0.646, 0.715] average: 0.68
recall: [0.792, 0.854] average: 0.823
specificity: [0.393, 0.488] average: 0.44
accuracy: [0.639, 0.696] average: 0.668


## SVM

In [16]:
with open(MODELS_PATH+ '/LateFusionSVMParams.json') as f:
    params = json.load(f)
        
svm = SVC(**params, probability=True)

svm.fit(train_x, train_y)

SVC(C=1000, gamma=0.01, kernel='poly', probability=True)

In [17]:
results = boostrap(N=N, model=svm, data_loader=test_fusion_loader, save_loc=f"{os.getcwd() + os.sep}Results/VotingFusionSVM.json")

f1: [0.747, 0.794] average: 0.77
auroc: [0.663, 0.73] average: 0.696
auprc: [0.707, 0.786] average: 0.746
precision: [0.613, 0.675] average: 0.644
recall: [0.945, 0.976] average: 0.96
specificity: [0.189, 0.269] average: 0.229
accuracy: [0.634, 0.691] average: 0.662


## FCNN

In [18]:
fm_path = MODELS_PATH + "\VotingFusionFCNN.pth"

fusion_model = nn.Sequential(
    nn.Linear(in_features=3, out_features=8),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(in_features=8, out_features=32),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(in_features=32, out_features=16),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(in_features=16, out_features=8),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(in_features=8, out_features=1),
)

model = HyMNet(tabular_model=fusion_model).cuda().float()
model.load_state_dict(torch.load(fm_path))

<All keys matched successfully>

In [22]:
results = boostrap(N=N, model=model, data_loader=test_fusion_loader, save_loc=f"{os.getcwd() + os.sep}Results/VotingFusionFCNN.json")

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1007/1007 [00:00<00:00, 2469.23it/s]


f1: [0.713, 0.766] average: 0.74
auroc: [0.648, 0.716] average: 0.682
auprc: [0.696, 0.775] average: 0.736
precision: [0.64, 0.708] average: 0.674
recall: [0.789, 0.851] average: 0.82
specificity: [0.378, 0.474] average: 0.426
accuracy: [0.63, 0.688] average: 0.659


## Ensemble

In [23]:
sig = nn.Sigmoid()

tensored_x, tensored_y = torch.tensor(test_x), torch.tensor(test_y)
probs = sig(tensored_x)

average = torch.mean(probs, axis=1)

esnemble_average = torch.stack([average, tensored_y], axis=-1).numpy()

In [25]:
n_samples = len(tensored_x)
indices = [i for i in range(n_samples)]

bootstrap_preds, bootstrap_trues = [], []
for run in range(N):
    bootstrapped_indices = resample(indices, replace=True, n_samples=n_samples)
    preds_tmp , trues_tmp = [average[indx] for indx in bootstrapped_indices], [tensored_y[indx] for indx in bootstrapped_indices]
    bootstrap_preds.append(preds_tmp)
    bootstrap_trues.append(trues_tmp)

results = calculate_results(N, bootstrap_trues, bootstrap_preds)

f1: [0.741, 0.791] average: 0.766
auroc: [0.676, 0.741] average: 0.708
auprc: [0.717, 0.796] average: 0.756
precision: [0.646, 0.712] average: 0.679
recall: [0.854, 0.906] average: 0.88
specificity: [0.349, 0.444] average: 0.396
accuracy: [0.654, 0.711] average: 0.682


# DM analysis

In [None]:
tabular_model = nn.Sequential(
    nn.Linear(in_features=2, out_features=8),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(in_features=8, out_features=32),
)

image_model = get_retfound("/home/baharoon/HTN/RETFound_cfp_weights.pth", image_size=512,
                          classes=8).requires_grad_(True)

fusion_model = nn.Sequential(
    nn.Linear(in_features=40, out_features=128),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(in_features=128, out_features=32),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(in_features=32, out_features=16),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(in_features=16, out_features=1),
)

model = HyMNet(image_model=image_model, tabular_model=tabular_model, fusion_model=fusion_model).to(device)
state_dict = torch.load(MODELS_PATH + "/JointFusion_finetune.pth")
state_dict = {key.replace("module.", ""): value for key, value in state_dict.items()}
model.load_state_dict(state_dict)

In [6]:
test_dataset = HypertensionDataset(CSV_PATH, split="test", test_transform=test_transform)
test_dataset.df = test_dataset.df[test_dataset.df["DM"] == 1]
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['HTN_DM'] = df['HTN'].astype(str) + df['DM'].astype(str)


In [8]:
len(test_dataset.df)

849

In [None]:
results = boostrap(N=N, model=model, data_loader=test_loader)
with open(os.getcwd() + os.sep + "Results/JointFusion_dm_finetune.json", 'w') as f:
    # Use json.dump to write the dictionary to the file
    json.dump(results, f)

In [11]:
test_dataset = HypertensionDataset(CSV_PATH, split="test", test_transform=test_transform)
test_dataset.df = test_dataset.df[test_dataset.df["DM"] == 0]
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['HTN_DM'] = df['HTN'].astype(str) + df['DM'].astype(str)


In [12]:
len(test_dataset.df)

158

In [None]:
results = boostrap(N=N, model=model, data_loader=test_loader)
with open(os.getcwd() + os.sep + "Results/JointFusion_nodm_finetune.json", 'w') as f:
    # Use json.dump to write the dictionary to the file
    json.dump(results, f)

# Statistical significance

In [28]:
tabular_model = nn.Sequential(
    nn.Linear(in_features=2, out_features=8),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(in_features=8, out_features=32),
)

image_model = get_retfound("/home/baharoon/HTN/RETFound_cfp_weights.pth", image_size=512,
                          classes=8).requires_grad_(True)

fusion_model = nn.Sequential(
    nn.Linear(in_features=40, out_features=128),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(in_features=128, out_features=32),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(in_features=32, out_features=16),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(in_features=16, out_features=1),
)

model1 = HyMNet(image_model=image_model, tabular_model=tabular_model, fusion_model=fusion_model).to(device)
state_dict = torch.load(MODELS_PATH + "/JointFusion_finetune.pth")
state_dict = {key.replace("module.", ""): value for key, value in state_dict.items()}
model1.load_state_dict(state_dict)

Position interpolate from 14x14 to 32x32


<All keys matched successfully>

In [73]:
tabular_model = nn.Sequential(
    nn.Linear(in_features=2, out_features=8),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(in_features=8, out_features=32),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(in_features=32, out_features=16),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(in_features=16, out_features=8),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(in_features=8, out_features=1),
).to(device)

model2 = HyMNet(tabular_model=tabular_model) 
model2.load_state_dict(torch.load(MODELS_PATH + "/DemographicFCNN.pth"))

<All keys matched successfully>

In [74]:
results = binary_bootstrap(model1, model2, test_loader, device)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1007/1007 [02:30<00:00,  6.68it/s]


In [75]:
model_scores = {}
for m in range(1, 3):
    f1_scores = []
    for i in range(N):
        preds = [1 if prob >= 0.5 else 0 for prob in results[f'prob{m}'][i]]
        f1_scores.append(sklearn.metrics.f1_score(results['true'][i], preds))   
    model_scores[f'score{m}'] = f1_scores

In [77]:
score1 = np.array(model_scores['score1'])
score2 = np.array(model_scores['score2'])
difference = score1 - score2

In [78]:
difference.sort()
sig_dif = difference[(difference >= np.percentile(difference, 2.5)) & \
                                     (difference <= np.percentile(difference, 97.5))]

In [79]:
print(sig_dif.mean())
print(sig_dif.min())
print(sig_dif.max())

0.013227980672246595
-0.005257952001666011
0.03223390357970779


# Shap

In [9]:
import shap

tabular_model = nn.Sequential(
    nn.Linear(in_features=2, out_features=8),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(p=0.5),
    nn.Linear(in_features=8, out_features=32),
)

image_model = get_retfound("/home/baharoon/HTN/RETFound_cfp_weights.pth", image_size=512,
                          classes=8).requires_grad_(True)

fusion_model = nn.Sequential(
    nn.Linear(in_features=40, out_features=128),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(in_features=128, out_features=32),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(in_features=32, out_features=16),
    nn.LeakyReLU(inplace=True),
    nn.Dropout(),
    nn.Linear(in_features=16, out_features=1),
)

model = HyMNet(image_model=image_model, tabular_model=tabular_model, fusion_model=fusion_model).to(device)
state_dict = torch.load(MODELS_PATH + "/JointFusion_finetune.pth")
state_dict = {key.replace("module.", ""): value for key, value in state_dict.items()}
model.load_state_dict(state_dict)

Position interpolate from 14x14 to 32x32


<All keys matched successfully>

In [16]:
test_x = []
test_y = []

for sample in tqdm(test_loader):
        
    img, features, target = sample["image"], sample["features"], sample["label"]
    img, features, target = img.to(device).float(), features.to(device).float(),\
                                    target.to(device).float()
    
    output_image = model.image_model(img).squeeze(0)
    output_features = model.tabular_model(features).squeeze(0)
    
    concated = torch.cat([output_image, output_features], dim=0)
    
    test_x.append(concated.detach().cpu().numpy())
    test_y.append(target.detach().cpu().numpy())

In [19]:
test_x = np.stack(test_x)
test_y = np.stack(test_y)

In [30]:
test_x = torch.tensor(test_x, device=device)
test_y = torch.tensor(test_y, device=device)

In [64]:
def predict_func(data):
    # Set the model in evaluation mode and disable gradients
    fusion_model.eval().cpu()
    with torch.no_grad():
#         data = torch.tensor(data, device=device)
        data = torch.tensor(data)
        predictions = fusion_model(data)
    return predictions

In [73]:
explainer = shap.GradientExplainer(fusion_model, test_x[0: ,].cpu().unsqueeze(0))

In [74]:
shap_values = explainer.shap_values(test_x[0: ,].cpu().unsqueeze(0))