# Inference


### Imports


In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import logging
import numpy as np
import pandas as pd

from pydicom import dcmread
from typing import Tuple, Dict, Optional, Union
from pathlib import Path
from torch import Tensor
from rich import traceback
from torchvision.models import MobileNet_V3_Large_Weights

%matplotlib inline
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
src_path: str = "../src"
sys.path.append(src_path)
_ = traceback.install()

In [4]:
from utils import dictify_dicom
from datasets.pneumonia_dicom_dataset import PneumoniaDicomDataset
from models.pneumonia_classifier import PneumoniaClassifier

In [5]:
logging.basicConfig(force=True)
logging.getLogger().setLevel(logging.INFO)

In [6]:
REQUIRED_TRANSFORMS = MobileNet_V3_Large_Weights.DEFAULT.transforms()
DATA_ROOT: Path = Path("../data")
DICOM_ROOT: Path = DATA_ROOT.joinpath("test_dicom")
OUTPUTS_DIR: Path = DATA_ROOT.joinpath("model_outputs")
XRAY_IMAGES_ROOT: Path = Path("/home/uziel/Downloads/nih_chest_x_rays")
LOGS_PATH: Path = OUTPUTS_DIR.joinpath("mobilenet_v3_large")
BEST_TH_PATH: Path = OUTPUTS_DIR.joinpath("best_th.txt")

## 1. Load and pre-process test DICOM images


In [7]:
def gather_dicoms(dicom_root: Path) -> pd.DataFrame:
    """Gather dicom files, register metadata and dicom file path.

    Args:
        dicom_root: Directory under which all dicom files are located.

    Returns:
        A dataframe including metadata of each DICOM file.
    """
    dicom_meta = {}
    for dicom_file in dicom_root.glob("**/*.dcm"):
        dicom_meta[dicom_file.stem] = {
            "file_path": str(dicom_file.resolve()),
            **dictify_dicom(dcmread(dicom_file)),
        }

    dicom_meta_df = pd.DataFrame(dicom_meta).transpose()
    dicom_meta_df.columns = [c.lower().replace(" ", "_") for c in dicom_meta_df.columns]

    return dicom_meta_df.drop(columns="pixel_data").sort_index().rename_axis("id")

In [8]:
dicoms_data = gather_dicoms(DICOM_ROOT)
dicoms_data

Unnamed: 0_level_0,file_path,sop_class_uid,sop_instance_uid,modality,study_description,patient_id,patient's_sex,patient's_age,body_part_examined,patient_position,study_instance_uid,series_instance_uid,samples_per_pixel,photometric_interpretation,rows,columns,bits_allocated,bits_stored,high_bit,pixel_representation
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
test1,/home/uziel/Development/pneumonia_detection/da...,1.2.840.10008.5.1.4.1.1.7,1.3.6.1.4.1.11129.5.5.110503645592756492463169...,DX,No Finding,2,M,81,CHEST,PA,1.3.6.1.4.1.11129.5.5.112507010803284478207522...,1.3.6.1.4.1.11129.5.5.112630850362182468372440...,1,MONOCHROME2,1024,1024,8,8,7,0
test2,/home/uziel/Development/pneumonia_detection/da...,1.2.840.10008.5.1.4.1.1.7,1.3.6.1.4.1.11129.5.5.153751009835107614666834...,DX,Cardiomegaly,1,M,58,CHEST,AP,1.3.6.1.4.1.11129.5.5.111396399361969898205364...,1.3.6.1.4.1.11129.5.5.195628213694300498946760...,1,MONOCHROME2,1024,1024,8,8,7,0
test3,/home/uziel/Development/pneumonia_detection/da...,1.2.840.10008.5.1.4.1.1.7,1.3.6.1.4.1.11129.5.5.179222148351666120521423...,DX,Effusion,61,M,77,CHEST,AP,1.3.6.1.4.1.11129.5.5.189886800072183603320722...,1.3.6.1.4.1.11129.5.5.110145974268321300517474...,1,MONOCHROME2,1024,1024,8,8,7,0
test4,/home/uziel/Development/pneumonia_detection/da...,1.2.840.10008.5.1.4.1.1.7,1.3.6.1.4.1.11129.5.5.110503645592756492463169...,DX,No Finding,2,M,81,RIBCAGE,PA,1.3.6.1.4.1.11129.5.5.112507010803284478207522...,1.3.6.1.4.1.11129.5.5.112630850362182468372440...,1,MONOCHROME2,1024,1024,8,8,7,0
test5,/home/uziel/Development/pneumonia_detection/da...,1.2.840.10008.5.1.4.1.1.7,1.3.6.1.4.1.11129.5.5.110503645592756492463169...,CT,No Finding,2,M,81,CHEST,PA,1.3.6.1.4.1.11129.5.5.112507010803284478207522...,1.3.6.1.4.1.11129.5.5.112630850362182468372440...,1,MONOCHROME2,1024,1024,8,8,7,0
test6,/home/uziel/Development/pneumonia_detection/da...,1.2.840.10008.5.1.4.1.1.7,1.3.6.1.4.1.11129.5.5.110503645592756492463169...,DX,No Finding,2,M,81,CHEST,XX,1.3.6.1.4.1.11129.5.5.112507010803284478207522...,1.3.6.1.4.1.11129.5.5.112630850362182468372440...,1,MONOCHROME2,1024,1024,8,8,7,0


In [9]:
dicoms_dataset = PneumoniaDicomDataset(dicoms_data, transform=REQUIRED_TRANSFORMS)

## 2. Load model and make predictions


In [10]:
def load_model(checkpoint_path: Path):
    """Load model, its weights from a checkpoint and compile it.

    Args:
        model_path: Path to model.
        checkpoint_path: Path to model checkpoint containing model weights.

    Returns:
        A compiled model.
    """
    model = PneumoniaClassifier.load_from_checkpoint(checkpoint_path)
    model.eval()

    return model


def predict_image(model: PneumoniaClassifier, img: Tensor, th: float) -> bool:
    """Use model to predict whether an image shows the presence of pneumonia.

    Args:
        model: A trained model to detect the presence of pneumonia on chest x-rays.
        img: A chest x-ray image.
        th: The threshold to determine the presence of pneumonia from the model
            probability output.

    Returns:
        A bool indicating whether the image is predicted to show presence of penumonia
            or not.
    """
    return (model(img.unsqueeze(0)) > th)[0][0]

In [11]:
model = load_model(list(LOGS_PATH.glob("**/*.ckpt"))[0])
best_th = float(BEST_TH_PATH.read_text())

In [12]:
y_true_pred = [
    (label.bool(), predict_image(model, img, best_th)) for img, label in iter(dicoms_dataset)
]

## 3. Evaluate predictions


In [13]:
pd.DataFrame(y_true_pred, columns=["Ground truth", "Predicted"]).astype(bool)

Unnamed: 0,Ground truth,Predicted
0,False,True
1,False,True
2,False,True
