In [8]:
import torch
from torch import nn
from sklearn.metrics import confusion_matrix
from torchvision.models import resnet50
from datasets.nih_cxr import NIHCXRDataset
from tasks.binary_classification import BinaryClassificationTask

saved_model_path = "C:/Users/chaob/OneDrive - University of Toronto/Documents/EVLP X-ray Imaging Project/evlp_xray_cv/saved_models/pretrain_nihcxr/epoch=7-step=17399.ckpt"  # Path to PyTorch Lightning Bolts .ckpt file
data_dir = "C:/Users/chaob/OneDrive - University of Toronto/Documents/EVLP X-ray Imaging Project/NIH_images_512p"          # Path to NIX CXR data folder

In [9]:
# Re-create the PyTorch Lightning Module and load it from the saved dictionary
model = resnet50(pretrained=True)
model.fc = nn.Linear(in_features=model.fc.in_features, out_features=1)
task = BinaryClassificationTask.load_from_checkpoint(saved_model_path, model=model)
model = task.model # This is your resnet (pre-trained on NIH CXR)

task.eval()
model = task.model

In [10]:
val_dataset = NIHCXRDataset(data_dir, split='val', binary=True)

In [11]:
# todo: for improvement
#
# 1. Right now we're getting predictions
#    for one datapoint at a time, which is
#    not very efficient. We can use a DataLoader
#    to batch out Dataset and get predictions
#    for batches at a time.
#
# 2. PyTorch Lightning has a bunch of metrics,
#    and the confusion matrix is one of them.
#    We would be able to compute the confusion
#    matrix on-the-fly rather than at the end.

ypreds = []
labels = []
for image, label in val_dataset:
    with torch.no_grad():
        ypred = model(image.unsqueeze(dim=0))
    ypred = torch.sigmoid(ypred)
    ypreds.append(ypred)
    labels.append(label)
ypreds = torch.stack(ypreds).reshape(-1)
labels = torch.stack(labels)

In [12]:
confusion_matrix(labels, (ypreds > 0.5).float())

array([[6875, 3189],
       [3213, 3652]], dtype=int64)