# Testing

This notebook includes the execution of the testing for trained models.

In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import torchvision.transforms as transforms

from datetime import datetime

from model.one_model.one_stage_models import ResNet50OneStage, ResNet18OneStage
from model.multi_stage_model.multi_stage_model import ThreeStageModelFrontalLateralAPPA, TwoStageModelAPPA, TwoStageModelFrontalLateral
from data.dataset import CheXpertDataset

In [12]:
%load_ext autoreload
%autoreload 2

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots


os.environ['KMP_DUPLICATE_LIB_OK']='True' # To prevent the kernel from dying.

# Load result.csv file

In [3]:
result = pd.read_csv("results/results.csv")

# Test Dataset

In [66]:
params_transform = {
    "resize": (256, 256),
}

transform = transforms.Compose([
    transforms.Resize(params_transform["resize"]),
    transforms.ToTensor(),
])

targets = {
            # "sex": 1,
            # "age": 2,
            # "frontal/lateral": 3,
            # "ap/pa": 4,
            # "no_finding": 5,
            # "enlarged_cardiomediastinum": 6,
            "cardiomegaly": 7,
            # "lung_opacity": 8,
            # "lung_lesion": 9,
            "edema": 10,
            "consolidation": 11,
            # "pneumonia": 12,
            "atelectasis": 13,
            # "pneumothorax": 14,
            "pleural_effusion": 15,
            # "pleural_other": 16,
            # "fracture": 17,
            # "support_devices": 18,
            # "fronal_lateral_map": 21,
            # "ap/pa map": 22,
        }

test_dataset = CheXpertDataset(
    csv_file="data/test_frontal.csv",
    root_dir="../image_data/",
    targets=targets,
    transform=transform,
)
print(f"Test dataset size: {len(test_dataset)}")

# One stage model

Testing of the one stage model.

## Define model

In [64]:
params = {
    "train_transfrom": params_transform,
    "lr": 0.001,
    "save_epoch": 5,
    "batch_size": 256,
    "num_epochs": 100,
    "input_channels": 1,
    "optimizer": "adam",
    "num_workers": 0,
    # BCE with Sigmoid activation function
    "loss_fn": "torch.nn.BCEWithLogitsLoss()",
    "metrics": ["accuracy",
            "precision",
            "recall",
            "confusion_matrix",
            "auc",
            "auroc",
            "multilabel_accuracy",
            "multilabel_auprc",
            "multilabel_precision_recall_curve",
            "mcc"],
    "confidence_threshold": 0.5,
}

name = "pa_pretraining"
weights = f"final_models/{name}.pth"

model = ResNet18OneStage(params=params, targets=targets, input_channels=params['input_channels'])
model.load_model(weights)

## Testing one stage model

In [65]:
res = model.test(test_dataset=test_dataset,name=name) 

result = pd.concat([result, res], ignore_index=True)
result.to_csv(f"results/{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}results.csv", index=False)

Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.25it/s, test_loss=0.633998]


Test loss: 0.6339983567256819
Test cardiomegaly accuracy: 0.9696969985961914
Test cardiomegaly precision: 0.0
Test cardiomegaly recall: 0.0
Test cardiomegaly auroc: 1.0
Test cardiomegaly auc: tensor([0.0203], device='cuda:0')
Test cardiomegaly confusion_matrix: tensor([[32.,  0.],
        [ 1.,  0.]])
Test edema accuracy: 0.12121212482452393
Test edema precision: 0.03333333507180214
Test edema recall: 1.0
Test edema auroc: 0.625
Test edema auc: tensor([0.0059], device='cuda:0')
Test edema confusion_matrix: tensor([[ 3., 29.],
        [ 0.,  1.]])
Test consolidation accuracy: 0.9696969985961914
Test consolidation precision: 0.0
Test consolidation recall: 0.0
Test consolidation auroc: 0.5
Test consolidation auc: tensor([0.], device='cuda:0')
Test consolidation confusion_matrix: tensor([[32.,  1.],
        [ 0.,  0.]])
Test atelectasis accuracy: 0.21212121844291687
Test atelectasis precision: 0.13333334028720856
Test atelectasis recall: 1.0
Test atelectasis auroc: 0.46551724137931033
Test

# Two stage model - AP/PA Split

Testing of the two stage model with ap/pa split.

## Define model

In [13]:
params_transform = {
    "resize": (256, 256),
}

transform = transforms.Compose([
    transforms.Resize(params_transform["resize"]),
    transforms.ToTensor(),
])

targets = {
            # "sex": 1,
            # "age": 2,
            # "frontal/lateral": 3,
            # "ap/pa": 4,
            # "no_finding": 5,
            # "enlarged_cardiomediastinum": 6,
            "cardiomegaly": 7,
            # "lung_opacity": 8,
            # "lung_lesion": 9,
            "edema": 10,
            "consolidation": 11,
            # "pneumonia": 12,
            "atelectasis": 13,
            # "pneumothorax": 14,
            "pleural_effusion": 15,
            # "pleural_other": 16,
            # "fracture": 17,
            # "support_devices": 18,
            # "fronal_lateral_map": 21,
            # "ap/pa map": 22,
        }

test_dataset = CheXpertDataset(
    csv_file="data/test_frontal.csv",
    root_dir="../image_data/",
    targets=targets,
    transform=transform,
)
print(f"Test dataset size: {len(test_dataset)}")

Test dataset size: 202


In [16]:
name = "two_stage_ap_pa_pretraining"

params = {
    "train_transfrom": params_transform,
    "lr": 0.001,
    "save_epoch": 5,
    "batch_size": 256,
    "num_epochs": 100,
    "input_channels": 1,
    "optimizer": "adam",
    "num_workers": 0,
    # BCE with Sigmoid activation function
    "loss_fn": "torch.nn.BCEWithLogitsLoss()",
    # For multilabel: MultiLabelSoftMarginLoss
    "metrics": ["accuracy",
            "precision",
            "recall",
            "confusion_matrix",
            "auc",
            "auroc",
            "multilabel_accuracy",
            "multilabel_auprc",
            "multilabel_precision_recall_curve",
            "mcc"],
    "confidence_threshold": 0.5,
}

weights_first_stage = "final_models/ap-pa_split.pth"
weights_second_stage_ap = "final_models/ap_pretraining.pth"
weights_second_stage_pa = "final_models/pa_pretraining.pth"

model = TwoStageModelAPPA(
    params=params, 
    model_ap_pa_classification=weights_first_stage, 
    model_ap=weights_second_stage_ap, 
    model_pa=weights_second_stage_pa,
    targets=targets,
)

## Testing two stage model

In [17]:
res = model.test(test_dataset=test_dataset,name=name) 

result = pd.concat([result, res], ignore_index=True)
result.to_csv(f"results/{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}results.csv", index=False)

Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.32s/it, test_loss=0.675081]

Test loss: 0.6750809240357785
Test cardiomegaly accuracy: 0.7475247383117676
Test cardiomegaly precision: 0.7142857313156128
Test cardiomegaly recall: 0.3787878751754761
Test cardiomegaly auroc: 0.7575757575757576
Test cardiomegaly auc: tensor([0.3225], device='cuda:0')
Test cardiomegaly confusion_matrix: tensor([[126.,  10.],
        [ 41.,  25.]])
Test edema accuracy: 0.301980197429657
Test edema precision: 0.21714285016059875
Test edema recall: 0.9047619104385376
Test edema auroc: 0.7108630952380952
Test edema auc: tensor([0.1215], device='cuda:0')
Test edema confusion_matrix: tensor([[ 23., 137.],
        [  4.,  38.]])
Test consolidation accuracy: 0.8316831588745117
Test consolidation precision: 0.4642857015132904
Test consolidation recall: 0.40625
Test consolidation auroc: 0.7814338235294118
Test consolidation auc: tensor([0.2196], device='cuda:0')
Test consolidation confusion_matrix: tensor([[155.,  15.],
        [ 19.,  13.]])
Test atelectasis accuracy: 0.4455445408821106
Test 




# Two stage model - Frontal/Lateral Split

Testing of the two stage model with fronal/lateral split.

## Define model

In [74]:
params_transform = {
    "resize": (256, 256),
}

transform = transforms.Compose([
    transforms.Resize(params_transform["resize"]),
    transforms.ToTensor(),
])

targets = {
            # "sex": 1,
            # "age": 2,
            # "frontal/lateral": 3,
            # "ap/pa": 4,
            # "no_finding": 5,
            # "enlarged_cardiomediastinum": 6,
            "cardiomegaly": 7,
            # "lung_opacity": 8,
            # "lung_lesion": 9,
            "edema": 10,
            "consolidation": 11,
            # "pneumonia": 12,
            "atelectasis": 13,
            # "pneumothorax": 14,
            "pleural_effusion": 15,
            # "pleural_other": 16,
            # "fracture": 17,
            # "support_devices": 18,
            # "fronal_lateral_map": 21,
            # "ap/pa map": 22,
        }

test_dataset = CheXpertDataset(
    csv_file="data/test.csv",
    root_dir="../image_data/",
    targets=targets,
    transform=transform,
)
print(f"Test dataset size: {len(test_dataset)}")

Test dataset size: 234


In [None]:
name = "two_stage_fr_lat_pretraining"

params = {
    "train_transfrom": params_transform,
    "lr": 0.001,
    "save_epoch": 5,
    "batch_size": 256,
    "num_epochs": 100,
    "input_channels": 1,
    "optimizer": "adam",
    "num_workers": 0,
    # BCE with Sigmoid activation function
    "loss_fn": "torch.nn.BCEWithLogitsLoss()",
    # For multilabel: MultiLabelSoftMarginLoss
    "metrics": ["accuracy",
            "precision",
            "recall",
            "confusion_matrix",
            "auc",
            "auroc",
            "multilabel_accuracy",
            "multilabel_auprc",
            "multilabel_precision_recall_curve",
            "mcc"],
    "confidence_threshold": 0.5,
}

weights_first_stage = "final_models/fr-lat_split.pth"
weights_second_stage_frontal = "final_models/ #TODO add model"
weights_second_stage_lateral= "final_models/ #TODO add model"

model = TwoStageModelFrontalLateral(
    params=params, 
    model_fr_lat_classification=weights_first_stage,
    model_frontal=weights_second_stage_frontal,
    model_lateral=weights_second_stage_lateral,
    targets=targets,
)

## Testing two stage model

In [None]:
res = model.test(test_dataset=test_dataset,name=name) 

result = pd.concat([result, res], ignore_index=True)
# result.to_csv(f"results/{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}results.csv", index=False)

# Three stage model - Frontal/Lateral Split

Testing of the three stage model. First stage is the frontal/lateral split, second stage is the ap/pa split and third stage is the multilabel classification of the images.

## Define model

In [13]:
params_transform = {
    "resize": (256, 256),
}

transform = transforms.Compose([
    transforms.Resize(params_transform["resize"]),
    transforms.ToTensor(),
])

targets = {
            # "sex": 1,
            # "age": 2,
            # "frontal/lateral": 3,
            # "ap/pa": 4,
            # "no_finding": 5,
            # "enlarged_cardiomediastinum": 6,
            "cardiomegaly": 7,
            # "lung_opacity": 8,
            # "lung_lesion": 9,
            "edema": 10,
            "consolidation": 11,
            # "pneumonia": 12,
            "atelectasis": 13,
            # "pneumothorax": 14,
            "pleural_effusion": 15,
            # "pleural_other": 16,
            # "fracture": 17,
            # "support_devices": 18,
            # "fronal_lateral_map": 21,
            # "ap/pa map": 22,
        }

test_dataset = CheXpertDataset(
    csv_file="data/test.csv",
    root_dir="../image_data/",
    targets=targets,
    transform=transform,
)
print(f"Test dataset size: {len(test_dataset)}")

Test dataset size: 234


In [14]:
name = "three_stage_model_pretrained"

params = {
    "train_transfrom": params_transform,
    "lr": 0.001,
    "save_epoch": 5,
    "batch_size": 256,
    "num_epochs": 100,
    "input_channels": 1,
    "optimizer": "adam",
    "num_workers": 0,
    # BCE with Sigmoid activation function
    "loss_fn": "torch.nn.BCEWithLogitsLoss()",
    # For multilabel: MultiLabelSoftMarginLoss
    "metrics": ["accuracy",
            "precision",
            "recall",
            "confusion_matrix",
            "auc",
            "auroc",
            "multilabel_accuracy",
            "multilabel_auprc",
            "multilabel_precision_recall_curve",
            "mcc"],
    "confidence_threshold": 0.5,
}

weights_fr_lat_classification = "final_models/fr-lat_split.pth"
weights_ap_pa_classification = "final_models/ap-pa_split.pth"
weights_frontal_ap = "final_models/ap_pretraining.pth"
weights_frontal_pa = "final_models/pa_pretraining.pth"
weights_lateral = "final_models/lat_pretraining.pth"


model = ThreeStageModelFrontalLateralAPPA(
    params=params, 
    model_frontal_lateral_classification = weights_fr_lat_classification,
    model_frontal_ap_pa_classification = weights_ap_pa_classification,
    model_frontal_ap = weights_frontal_ap,
    model_frontal_pa = weights_frontal_pa,
    model_lateral = weights_lateral,
    targets=targets,
)

## Testing three stage model

In [None]:
res = model.test(test_dataset=test_dataset,name=name) 

# result = pd.concat([result, res], ignore_index=True)
# result.to_csv(f"results/{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}results.csv", index=False)

Testing:   0%|                                                                                                                                                                    | 0/1 [00:00<?, ?it/s]