In [1]:
from torch import nn
import sys
import os
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from cikm_dataloader import get_dataset
from densenet import generate_model
import torch
import torch.utils.data as data
device = torch.device('cuda:3')
import glob
from collections import OrderedDict
from efficientnet_pytorch_3d import EfficientNet3D
from sklearn.metrics import precision_score, f1_score, accuracy_score
import numpy as np

In [2]:
def load_efficientnet_model(model_path, device):
    model = EfficientNet3D.from_name('efficientnet-b6', override_params={'num_classes': 2})
    state_dict = torch.load(model_path)
    model.load_state_dict(state_dict['state_dict'])
    model = model.to(device)
    model.eval()
    return model

def load_densenet_model(model_path, device):
    model = generate_model(121)
    state_dict = torch.load(model_path)
    new_state_dict = OrderedDict()
    for k, v in state_dict['state_dict'].items():
        new_state_dict[k.replace("backbone.", "")] = v
    model.load_state_dict(new_state_dict)
    model = model.to(device)
    model.eval()
    return model

In [3]:
def run_inference_and_get_metrics(model, test_loader, device):
    with torch.no_grad():
        all_labels = []
        all_predictions = []
        for data in test_loader:
            img, label = data[:2]
            label = label.to(torch.float32) 
            label = label.to(device)
            img = img.squeeze()
            img = img.to(device)
            outputs = model(img)
            _, predicted = torch.max(outputs.data, 1)

            all_labels.extend(label.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

        tp = np.sum((np.array(all_predictions) == 1) & (np.array(all_labels) == 1))
        tn = np.sum((np.array(all_predictions) == 0) & (np.array(all_labels) == 0))
        fp = np.sum((np.array(all_predictions) == 1) & (np.array(all_labels) == 0))
        fn = np.sum((np.array(all_predictions) == 0) & (np.array(all_labels) == 1))

        precision = precision_score(all_labels, all_predictions, zero_division=1)
        f1 = f1_score(all_labels, all_predictions, zero_division=1)
        accuracy = accuracy_score(all_labels, all_predictions)
        specificity = tn / (tn + fp)
        sensitivity = tp / (tp + fn)

        return precision, f1, accuracy, specificity, sensitivity

In [4]:
adni_test_set= get_dataset('train', '/home/barisbaydargil/3D_classification/adni_prep_20perc_test.csv')
oasis_test_set = get_dataset('train', '/home/barisbaydargil/3D_classification/oasis_fulltest.csv')

adni_test_loader = data.DataLoader(adni_test_set, batch_size=16, shuffle=False, num_workers=6)
oasis_test_loader = data.DataLoader(oasis_test_set, batch_size=16, shuffle=False, num_workers=6)

In [5]:
effnet_adni_model_paths = glob.glob('/home/barisbaydargil/3D_classification/31May_classification_mixup/model_checkpoints/EfficientNet3D/adni_only/*')
effnet_adnifake_model_paths = glob.glob('/home/barisbaydargil/3D_classification/31May_classification_mixup/model_checkpoints/EfficientNet3D/adni_fake/*')
densenet_adni_model_paths = glob.glob('/home/barisbaydargil/3D_classification/31May_classification_mixup/model_checkpoints/DenseNet1213D/adni_only/*')
densenet_adnifake_model_paths = glob.glob('/home/barisbaydargil/3D_classification/31May_classification_mixup/model_checkpoints/DenseNet1213D/adni_fake/*')

model_paths = [
    ("EfficientNet", "adni_only", effnet_adni_model_paths),
    ("EfficientNet", "adni_fake", effnet_adnifake_model_paths),
    ("DenseNet", "adni_only", densenet_adni_model_paths),
    ("DenseNet", "adni_fake", densenet_adnifake_model_paths),
]

In [6]:
# Create list of datasets to iterate over
datasets = [
    ('ADNI', adni_test_loader),
    ('OASIS', oasis_test_loader),
]

# Loop over each dataset
for dataset_name, test_loader in datasets:
    print(f'\nRunning experiments on {dataset_name} dataset')
    
    # Perform inference for each model on current dataset
    for model_name, experiment, paths in model_paths:
        all_metrics = {
            'precision': [],
            'f1_score': [],
            'accuracy': [],
            'specificity': [],
            'sensitivity': []
        }

        print(f"\nRunning experiment with {model_name} on {experiment} data")

        for model_path in paths:
            if model_name == "EfficientNet":
                model = load_efficientnet_model(model_path, device)
            elif model_name == "DenseNet":
                model = load_densenet_model(model_path, device)
            else:
                continue

            precision, f1, accuracy, specificity, sensitivity = run_inference_and_get_metrics(model, test_loader, device)
            
            all_metrics['precision'].append(precision)
            all_metrics['f1_score'].append(f1)
            all_metrics['accuracy'].append(accuracy)
            all_metrics['specificity'].append(specificity)
            all_metrics['sensitivity'].append(sensitivity)

        mean_metrics = {}
        std_metrics = {}
        for metric_name, metric_values in all_metrics.items():
            mean_value = np.mean(metric_values)
            std_value = np.std(metric_values)
            mean_metrics[metric_name] = mean_value
            std_metrics[metric_name] = std_value

        for metric_name, mean_value in mean_metrics.items():
            std_value = std_metrics[metric_name]
            print(f'{metric_name.capitalize()}: Mean={mean_value:.4f}, Stddev={std_value:.4f}')


Running experiments on ADNI dataset

Running experiment with EfficientNet on adni_only data
Precision: Mean=1.0000, Stddev=0.0000
F1_score: Mean=0.9344, Stddev=0.0321
Accuracy: Mean=0.9165, Stddev=0.0401
Specificity: Mean=1.0000, Stddev=0.0000
Sensitivity: Mean=0.8787, Stddev=0.0583

Running experiment with EfficientNet on adni_fake data
Precision: Mean=1.0000, Stddev=0.0000
F1_score: Mean=0.6439, Stddev=0.2811
Accuracy: Mean=0.6789, Stddev=0.1962
Specificity: Mean=1.0000, Stddev=0.0000
Sensitivity: Mean=0.5333, Stddev=0.2851

Running experiment with DenseNet on adni_only data
Precision: Mean=1.0000, Stddev=0.0000
F1_score: Mean=0.9110, Stddev=0.0952
Accuracy: Mean=0.8963, Stddev=0.1011
Specificity: Mean=1.0000, Stddev=0.0000
Sensitivity: Mean=0.8493, Stddev=0.1469

Running experiment with DenseNet on adni_fake data
Precision: Mean=0.9987, Stddev=0.0027
F1_score: Mean=0.5773, Stddev=0.3311
Accuracy: Mean=0.6468, Stddev=0.2422
Specificity: Mean=0.9971, Stddev=0.0059
Sensitivity: Mean=0