# Testing

This notebook includes the execution of the testing for trained models.

In [None]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import torchvision.transforms as transforms

from datetime import datetime

from model.one_model.one_stage_models import ResNet50OneStage, ResNet18OneStage
from model.multi_stage_model.multi_stage_model import ThreeStageModelFrontalLateralAPPA, TwoStageModelAPPA, TwoStageModelFrontalLateral
from data.dataset import CheXpertDataset

In [None]:
%load_ext autoreload
%autoreload 2

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots


os.environ['KMP_DUPLICATE_LIB_OK']='True' # To prevent the kernel from dying.

# Load result.csv file

In [None]:
result_xls = "C:/Users/flobr/OneDrive/Uni/Informatik_Master/ADLM/results.xlsx"
result = pd.read_excel(result_xls, sheet_name='Sheet1')

# Test Dataset

In [None]:
params_transform = {
    "resize": (256, 256),
}

transform = transforms.Compose([
    transforms.Resize(params_transform["resize"]),
    transforms.ToTensor(),
])

targets = {
            # "sex": 1,
            # "age": 2,
            # "frontal/lateral": 3,
            # "ap/pa": 4,
            # "no_finding": 5,
            # "enlarged_cardiomediastinum": 6,
            "cardiomegaly": 7,
            # "lung_opacity": 8,
            # "lung_lesion": 9,
            "edema": 10,
            "consolidation": 11,
            # "pneumonia": 12,
            "atelectasis": 13,
            # "pneumothorax": 14,
            "pleural_effusion": 15,
            # "pleural_other": 16,
            # "fracture": 17,
            # "support_devices": 18,
            # "fronal_lateral_map": 21,
            # "ap/pa map": 22,
        }

csv_file = "data/original_data/test.csv"

test_dataset = CheXpertDataset(
    csv_file=csv_file,
    root_dir="../image_data/",
    targets=targets,
    transform=transform,
)
print(f"Test dataset size: {len(test_dataset)}")

Class weights: [np.float64(2.0357232001191763), np.float64(2.925535929860176), np.float64(3.8951223606547627), np.float64(2.012621840812587), np.float64(2.518783544165969)]
Test dataset size: 668


# One stage model

Testing of the one stage model.

## Define model

In [11]:
params = {
    "train_transfrom": params_transform,
    "lr": 0.001,
    "save_epoch": 5,
    "batch_size": 256,
    "num_epochs": 100,
    "input_channels": 1,
    "optimizer": "adam",
    "num_workers": 0,
    "loss_fn": "multilabel_focal_loss",
    "metrics": ["accuracy",
            "precision",
            "recall",
            "confusion_matrix",
            "auc",
            "auroc",
            "multilabel_accuracy",
            "multilabel_auprc",
            "multilabel_precision_recall_curve",
            "mcc"],
    "confidence_threshold": 0.5,
}

family = "ResNet18Pretraining_og_ls05"
name = "model_epoch_50"

weights = f"final_models/new/{family}/{name}.pth"

model = ResNet18OneStage(params=params, targets=targets, input_channels=params['input_channels'])
model.load_model(weights)

## Testing one stage model

In [12]:
res = model.test(test_dataset=test_dataset,name=name)
# append two columns: confidence_threshold and test_set to the result
res["confidence_threshold"] = params["confidence_threshold"]
res["test_set"] = csv_file
res["family"] = family

result = pd.concat([result, res], ignore_index=True)
# write result to excel
with pd.ExcelWriter(result_xls, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
    result.to_excel(writer, sheet_name="Sheet1", index=False)

Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:41<00:00, 13.89s/it, test_loss=0.485196]


Test loss: 0.4851956811698408
Test cardiomegaly accuracy: 0.7679640650749207
Test cardiomegaly precision: 0.6470588445663452
Test cardiomegaly recall: 0.2514285743236542
Test cardiomegaly auroc: 0.8192176180817154
Test cardiomegaly auc: 0.3197140693664551
Test cardiomegaly confusion_matrix: tensor([[469.,  24.],
        [131.,  44.]])
Test edema accuracy: 0.660179615020752
Test edema precision: 0.22480620443820953
Test edema recall: 0.6823529601097107
Test edema auroc: 0.7516093229744728
Test edema auc: 0.20117846131324768
Test edema confusion_matrix: tensor([[383., 200.],
        [ 27.,  58.]])
Test consolidation accuracy: 0.9056886434555054
Test consolidation precision: 0.24074074625968933
Test consolidation recall: 0.37142857909202576
Test consolidation auroc: 0.8638682013089596
Test consolidation auc: 0.25558286905288696
Test consolidation confusion_matrix: tensor([[592.,  41.],
        [ 22.,  13.]])
Test atelectasis accuracy: 0.2709580957889557
Test atelectasis precision: 0.26766

# Two stage model - AP/PA Split

Testing of the two stage model with ap/pa split.

## Define model

In [4]:
params_transform = {
    "resize": (256, 256),
}

transform = transforms.Compose([
    transforms.Resize(params_transform["resize"]),
    transforms.ToTensor(),
])

targets = {
            # "sex": 1,
            # "age": 2,
            # "frontal/lateral": 3,
            # "ap/pa": 4,
            # "no_finding": 5,
            # "enlarged_cardiomediastinum": 6,
            "cardiomegaly": 7,
            # "lung_opacity": 8,
            # "lung_lesion": 9,
            "edema": 10,
            "consolidation": 11,
            # "pneumonia": 12,
            "atelectasis": 13,
            # "pneumothorax": 14,
            "pleural_effusion": 15,
            # "pleural_other": 16,
            # "fracture": 17,
            # "support_devices": 18,
            # "fronal_lateral_map": 21,
            # "ap/pa map": 22,
        }

test_dataset = CheXpertDataset(
    csv_file="data/test_frontal.csv",
    root_dir="../image_data/",
    targets=targets,
    transform=transform,
)
print(f"Test dataset size: {len(test_dataset)}")

Test dataset size: 202


In [7]:
name = "two_stage_ap_pa_no_pretraining"

params = {
    "train_transfrom": params_transform,
    "lr": 0.001,
    "save_epoch": 5,
    "batch_size": 256,
    "num_epochs": 100,
    "input_channels": 1,
    "optimizer": "adam",
    "num_workers": 0,
    # BCE with Sigmoid activation function
    "loss_fn": "torch.nn.BCEWithLogitsLoss()",
    # For multilabel: MultiLabelSoftMarginLoss
    "metrics": ["accuracy",
            "precision",
            "recall",
            "confusion_matrix",
            "auc",
            "auroc",
            "multilabel_accuracy",
            "multilabel_auprc",
            "multilabel_precision_recall_curve",
            "mcc"],
    "confidence_threshold": 0.5,
}

weights_first_stage = "final_models/ap-pa_split.pth"
weights_second_stage_ap = "final_models/ap_no_pretraining.pth"
weights_second_stage_pa = "final_models/pa_no_pretraining.pth"

model = TwoStageModelAPPA(
    params=params, 
    model_ap_pa_classification=weights_first_stage, 
    model_ap=weights_second_stage_ap, 
    model_pa=weights_second_stage_pa,
    targets=targets,
)

## Testing two stage model

In [8]:
res = model.test(test_dataset=test_dataset,name=name) 

result = pd.concat([result, res], ignore_index=True)
result.to_csv(f"results/{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}results.csv", index=False)

Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.35s/it, test_loss=0.796671]

Test loss: 0.7966712377080233
Test cardiomegaly accuracy: 0.7178217768669128
Test cardiomegaly precision: 0.7368420958518982
Test cardiomegaly recall: 0.21212121844291687
Test cardiomegaly auroc: 0.678921568627451
Test cardiomegaly auc: tensor([0.4837], device='cuda:0')
Test cardiomegaly confusion_matrix: tensor([[131.,   5.],
        [ 52.,  14.]])
Test edema accuracy: 0.3217821717262268
Test edema precision: 0.23163841664791107
Test edema recall: 0.976190447807312
Test edema auroc: 0.6498511904761904
Test edema auc: tensor([0.1307], device='cuda:0')
Test edema confusion_matrix: tensor([[ 24., 136.],
        [  1.,  41.]])
Test consolidation accuracy: 0.8069307208061218
Test consolidation precision: 0.2666666805744171
Test consolidation recall: 0.125
Test consolidation auroc: 0.7204044117647059
Test consolidation auc: tensor([0.1377], device='cuda:0')
Test consolidation confusion_matrix: tensor([[159.,  11.],
        [ 28.,   4.]])
Test atelectasis accuracy: 0.3861386179924011
Test at




# Two stage model - Frontal/Lateral Split

Testing of the two stage model with fronal/lateral split.

## Define model

In [9]:
params_transform = {
    "resize": (256, 256),
}

transform = transforms.Compose([
    transforms.Resize(params_transform["resize"]),
    transforms.ToTensor(),
])

targets = {
            # "sex": 1,
            # "age": 2,
            # "frontal/lateral": 3,
            # "ap/pa": 4,
            # "no_finding": 5,
            # "enlarged_cardiomediastinum": 6,
            "cardiomegaly": 7,
            # "lung_opacity": 8,
            # "lung_lesion": 9,
            "edema": 10,
            "consolidation": 11,
            # "pneumonia": 12,
            "atelectasis": 13,
            # "pneumothorax": 14,
            "pleural_effusion": 15,
            # "pleural_other": 16,
            # "fracture": 17,
            # "support_devices": 18,
            # "fronal_lateral_map": 21,
            # "ap/pa map": 22,
        }

test_dataset = CheXpertDataset(
    csv_file="data/test.csv",
    root_dir="../image_data/",
    targets=targets,
    transform=transform,
)
print(f"Test dataset size: {len(test_dataset)}")

Test dataset size: 234


In [13]:
name = "two_stage_fr_lat_pretraining"

params = {
    "train_transfrom": params_transform,
    "lr": 0.001,
    "save_epoch": 5,
    "batch_size": 256,
    "num_epochs": 100,
    "input_channels": 1,
    "optimizer": "adam",
    "num_workers": 0,
    # BCE with Sigmoid activation function
    "loss_fn": "torch.nn.BCEWithLogitsLoss()",
    # For multilabel: MultiLabelSoftMarginLoss
    "metrics": ["accuracy",
            "precision",
            "recall",
            "confusion_matrix",
            "auc",
            "auroc",
            "multilabel_accuracy",
            "multilabel_auprc",
            "multilabel_precision_recall_curve",
            "mcc"],
    "confidence_threshold": 0.5,
}

weights_first_stage = "final_models/fr-lat_split.pth"
weights_second_stage_frontal = "final_models/fr_pretraining.pth"
weights_second_stage_lateral= "final_models/lat_pretraining.pth"

model = TwoStageModelFrontalLateral(
    params=params, 
    model_frontal_lateral_classification=weights_first_stage,
    model_frontal=weights_second_stage_frontal,
    model_lateral=weights_second_stage_lateral,
    targets=targets,
)

## Testing two stage model

In [14]:
res = model.test(test_dataset=test_dataset,name=name) 

result = pd.concat([result, res], ignore_index=True)
result.to_csv(f"results/{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}results.csv", index=False)

Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.70s/it, test_loss=0.687477]

Test loss: 0.6874771530449423
Test cardiomegaly accuracy: 0.7606837749481201
Test cardiomegaly precision: 0.75
Test cardiomegaly recall: 0.2647058963775635
Test cardiomegaly auroc: 0.7050850460666194
Test cardiomegaly auc: tensor([0.3643], device='cuda:0')
Test cardiomegaly confusion_matrix: tensor([[160.,   6.],
        [ 50.,  18.]])
Test edema accuracy: 0.28205129504203796
Test edema precision: 0.20574162900447845
Test edema recall: 0.9555555582046509
Test edema auroc: 0.6644326866549088
Test edema auc: tensor([0.1105], device='cuda:0')
Test edema confusion_matrix: tensor([[ 23., 166.],
        [  2.,  43.]])
Test consolidation accuracy: 0.8376068472862244
Test consolidation precision: 0.3913043439388275
Test consolidation recall: 0.27272728085517883
Test consolidation auroc: 0.8040102517714458
Test consolidation auc: tensor([0.1497], device='cuda:0')
Test consolidation confusion_matrix: tensor([[187.,  14.],
        [ 24.,   9.]])
Test atelectasis accuracy: 0.4059829115867615
Test 




# Three stage model - Frontal/Lateral Split

Testing of the three stage model. First stage is the frontal/lateral split, second stage is the ap/pa split and third stage is the multilabel classification of the images.

## Define model

In [15]:
params_transform = {
    "resize": (256, 256),
}

transform = transforms.Compose([
    transforms.Resize(params_transform["resize"]),
    transforms.ToTensor(),
])

targets = {
            # "sex": 1,
            # "age": 2,
            # "frontal/lateral": 3,
            # "ap/pa": 4,
            # "no_finding": 5,
            # "enlarged_cardiomediastinum": 6,
            "cardiomegaly": 7,
            # "lung_opacity": 8,
            # "lung_lesion": 9,
            "edema": 10,
            "consolidation": 11,
            # "pneumonia": 12,
            "atelectasis": 13,
            # "pneumothorax": 14,
            "pleural_effusion": 15,
            # "pleural_other": 16,
            # "fracture": 17,
            # "support_devices": 18,
            # "fronal_lateral_map": 21,
            # "ap/pa map": 22,
        }

test_dataset = CheXpertDataset(
    csv_file="data/test.csv",
    root_dir="../image_data/",
    targets=targets,
    transform=transform,
)
print(f"Test dataset size: {len(test_dataset)}")

Test dataset size: 234


In [38]:
name = "three_stage_model_pretraining"

params = {
    "train_transfrom": params_transform,
    "lr": 0.001,
    "save_epoch": 5,
    "batch_size": 256,
    "num_epochs": 100,
    "input_channels": 1,
    "optimizer": "adam",
    "num_workers": 0,
    # BCE with Sigmoid activation function
    "loss_fn": "torch.nn.BCEWithLogitsLoss()",
    # For multilabel: MultiLabelSoftMarginLoss
    "metrics": ["accuracy",
            "precision",
            "recall",
            "confusion_matrix",
            "auc",
            "auroc",
            "multilabel_accuracy",
            "multilabel_auprc",
            "multilabel_precision_recall_curve",
            "mcc"],
    "confidence_threshold": 0.8,
}

weights_fr_lat_classification = "final_models/fr-lat_split.pth"
weights_ap_pa_classification = "final_models/ap-pa_split.pth"
weights_frontal_ap = "final_models/ap_pretraining.pth"
weights_frontal_pa = "final_models/pa_pretraining.pth"
weights_lateral = "final_models/lat_pretraining.pth"


model = ThreeStageModelFrontalLateralAPPA(
    params=params, 
    model_frontal_lateral_classification = weights_fr_lat_classification,
    model_frontal_ap_pa_classification = weights_ap_pa_classification,
    model_frontal_ap = weights_frontal_ap,
    model_frontal_pa = weights_frontal_pa,
    model_lateral = weights_lateral,
    targets=targets,
)

## Testing three stage model

In [39]:
res = model.test(test_dataset=test_dataset,name=name) 

#result = pd.concat([result, res], ignore_index=True)
#result.to_csv(f"results/{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}results.csv", index=False)

Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.28s/it, test_loss=0.663117]

Test loss: 0.6631172401026592
Test cardiomegaly accuracy: 0.7094017267227173
Test cardiomegaly precision: 0.0
Test cardiomegaly recall: 0.0
Test cardiomegaly auroc: 0.7680722891566265
Test cardiomegaly auc: tensor([0.3020], device='cuda:0')
Test cardiomegaly confusion_matrix: tensor([[153.,  13.],
        [ 41.,  27.]])
Test edema accuracy: 0.7692307829856873
Test edema precision: 0.41818180680274963
Test edema recall: 0.5111111402511597
Test edema auroc: 0.7088771310993534
Test edema auc: tensor([0.1197], device='cuda:0')
Test edema confusion_matrix: tensor([[ 25., 164.],
        [  4.,  41.]])
Test consolidation accuracy: 0.8589743375778198
Test consolidation precision: 0.0
Test consolidation recall: 0.0
Test consolidation auroc: 0.7863711744308759
Test consolidation auc: tensor([0.2019], device='cuda:0')
Test consolidation confusion_matrix: tensor([[183.,  18.],
        [ 19.,  14.]])
Test atelectasis accuracy: 0.6666666865348816
Test atelectasis precision: 0.5135135054588318
Test a


