In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# !pip install albumentations==0.5.2
!pip install torchvision -U
!pip install torch -U
!pip install pycocotools
!pip install natsort

In [None]:
!git clone -b release/0.12 https://github.com/pytorch/vision.git

In [None]:
!cat ./vision/version.txt

In [None]:
!mv ./vision/references/detection/*.py .

In [None]:
# Import necessary packages.
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import vision.torchvision as torchvision
# import torchvision.transforms as transforms
from PIL import Image, ImageChops
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset

# This is for the progress bar.
from tqdm.auto import tqdm
import random

# For plotting learning curve
from torch.utils.tensorboard import SummaryWriter

# from pytorch_grad_cam import GradCAM, ScoreCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, EigenCAM, FullGrad
# from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
# from pytorch_grad_cam.utils.image import show_cam_on_image

import matplotlib.pyplot as plt
import seaborn as sns 
%matplotlib inline
import cv2
import albumentations as A
import albumentations.pytorch
import xml.etree.ElementTree as ET
from natsort import os_sorted

In [None]:
dataset_path = "../input/chest-xray-box"
pneumonia_dir = dataset_path+"/PNEUMONIA/JPEGFiles"
normal_dir = dataset_path+"/NORMAL/JPEGFiles"
pneumonia_files = os_sorted(os.listdir(os.path.join(dataset_path, 'PNEUMONIA/JPEGFiles')))
normal_files = os_sorted(os.listdir(os.path.join(dataset_path, 'NORMAL/JPEGFiles')))

## Transformation

In [None]:
train_tfm = A.Compose([
            A.augmentations.geometric.resize.Resize(299,299),
            A.augmentations.geometric.transforms.Affine(translate_percent=(0.15, 0.2), rotate=20),
            # A.HorizontalFlip(p=0.5),
            A.pytorch.transforms.ToTensorV2(),
            ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))
valid_tfm = A.Compose([
            A.augmentations.geometric.resize.Resize(299,299),
            # A.HorizontalFlip(p=0.5),
            A.pytorch.transforms.ToTensorV2(),
            ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))
test_tfm = A.Compose([
            A.augmentations.geometric.resize.Resize(299,299),
            # A.HorizontalFlip(p=0.5),
            A.pytorch.transforms.ToTensorV2(),
            ])

# Training

In [None]:
import math
import sys
import time

# import torch
# import torchvision.models.detection.mask_rcnn
import utils
from coco_eval import CocoEvaluator
from coco_utils import get_coco_api_from_dataset


def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq, scaler=None):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter("lr", utils.SmoothedValue(window_size=1, fmt="{value:.6f}"))
    header = f"Epoch: [{epoch}]"

    lr_scheduler = None
    if epoch == 0:
        warmup_factor = 1.0 / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = torch.optim.lr_scheduler.LinearLR(
            optimizer, start_factor=warmup_factor, total_iters=warmup_iters
        )

    for images, targets in metric_logger.log_every(data_loader, print_freq, header):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        with torch.cuda.amp.autocast(enabled=scaler is not None):
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()

        if not math.isfinite(loss_value):
            print(f"Loss is {loss_value}, stopping training")
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        if scaler is not None:
            scaler.scale(losses).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            losses.backward()
            optimizer.step()

        if lr_scheduler is not None:
            lr_scheduler.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])

    return metric_logger


def _get_iou_types(model):
    model_without_ddp = model
    if isinstance(model, torch.nn.parallel.DistributedDataParallel):
        model_without_ddp = model.module
    iou_types = ["bbox"]
    if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN):
        iou_types.append("segm")
    if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN):
        iou_types.append("keypoints")
    return iou_types


@torch.inference_mode()
def evaluate(model, data_loader, device):
    n_threads = torch.get_num_threads()
    # FIXME remove this and make paste_masks_in_image run on the GPU
    torch.set_num_threads(1)
    cpu_device = torch.device("cpu")
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = "Test:"

    coco = get_coco_api_from_dataset(data_loader.dataset)
    iou_types = _get_iou_types(model)
    coco_evaluator = CocoEvaluator(coco, iou_types)

    for images, targets in metric_logger.log_every(data_loader, 100, header):
        images = list(img.to(device) for img in images)

        if torch.cuda.is_available():
            torch.cuda.synchronize()
        model_time = time.time()
        outputs = model(images)

        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time
        # print("t", targets)
        # print("o", outputs)
        res = { targets["image_id"].item(): output for output in outputs}
        # print(res)
        # res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
        evaluator_time = time.time()
        coco_evaluator.update(res)
        evaluator_time = time.time() - evaluator_time
        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()
    torch.set_num_threads(n_threads)
    return coco_evaluator

In [None]:
def train_valid_test_split(folder, train_size, valid_size, test_size):
    # 0 = train, 1 = valid, 2 = test
    
    train_files=[]
    valid_files=[]
    test_files=[]
    
    for file in folder:
        if len(train_files) <= train_size:
            train_files.append(file)
        elif len(valid_files) <= (valid_size):
            valid_files.append(file)
        else:
            test_files.append(file)
  
    return train_files, valid_files, test_files


In [None]:
# p_train_files, p_valid_files, p_test_files = train_valid_test_split(pneumonia_files, 3673, 300, 300)
# n_train_files, n_valid_files, n_test_files = train_valid_test_split(normal_files, 1083, 250, 250)
p_train_files, p_valid_files, p_test_files = train_valid_test_split(pneumonia_files, 160, 40, 4073)
n_train_files, n_valid_files, n_test_files = train_valid_test_split(normal_files, 160, 40, 1383)

In [None]:
class ChestXRayDatasetTrain(Dataset):

    def __init__(self, files, label, tfm=None):
        data_path = "../input/chest-xray-box"
        self.files = files
        self.transforms = tfm
        self.label = label
        if label == 1:
            data_path = data_path+"/NORMAL"
        else:
            data_path = data_path+"/PNEUMONIA"
        self.img_files = [os.path.join(data_path+"/JPEGFiles", x) for x in files]
        self.rectboxes = [os.path.join(data_path+"/Rectbox", x.replace(".jpeg",".xml")) for x in files]
    
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.img_files[idx]
        gray_img = cv2.imread(fname, cv2.IMREAD_GRAYSCALE)
        img = cv2.cvtColor(gray_img, cv2.COLOR_GRAY2BGR)
        img = img.astype(np.float32)
        img /= 255
        # im = torch.tensor(im)
        # img = self.transform(img)
        num_objs = 0
           
        xml = ET.parse(self.rectboxes[idx])
        root = xml.getroot()
        obj = root.find("object")
        bndbox = obj.find("bndbox")
        
        boxes = []
        xmin = int(bndbox[0].text)
        ymin = int(bndbox[1].text)
        xmax = int(bndbox[2].text)
        ymax = int(bndbox[3].text)
        boxes.append([xmin, ymin, xmax, ymax])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        
        labels = torch.tensor([self.label])
        
        image_id = torch.tensor([idx+1])
        # area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
        
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        
        transformed = self.transforms(image=img, bboxes=boxes, labels=labels)
       
        tfm_img = transformed['image']
        tfm_target={}
        tfm_target["boxes"] = torch.as_tensor(transformed['bboxes']).squeeze(0)
        tfm_target["labels"] = torch.as_tensor(transformed['labels']).squeeze(0)
        tfm_target["image_id"] = image_id
        
        tfm_boxes = np.array(tfm_target["boxes"].unsqueeze(0))
        tfm_area = (tfm_boxes[:, 3] - tfm_boxes[:, 1]) * (tfm_boxes[:, 2] - tfm_boxes[:, 0])
        tfm_target["area"] = tfm_area
        tfm_target["iscrowd"] = iscrowd
        
        targets=[]
        targets.append(tfm_target)
        
        return tfm_img, targets

In [None]:
class ChestXRayDatasetValid(Dataset):

    def __init__(self, files, label, tfm=None):
        data_path = "../input/chest-xray-box"
        self.files = files
        self.transforms = tfm
        self.label = label
        if label == 1:
            data_path = data_path+"/NORMAL"
        else:
            data_path = data_path+"/PNEUMONIA"
        self.img_files = [os.path.join(data_path+"/JPEGFiles", x) for x in files]
        self.rectboxes = [os.path.join(data_path+"/Rectbox", x.replace(".jpeg",".xml")) for x in files]
    
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.img_files[idx]
        gray_img = cv2.imread(fname, cv2.IMREAD_GRAYSCALE)
        img = cv2.cvtColor(gray_img, cv2.COLOR_GRAY2BGR)
        img = img.astype(np.float32)
        img /= 255
        # im = torch.tensor(im)
        # img = self.transform(img)
        num_objs = 1
           
        xml = ET.parse(self.rectboxes[idx])
        root = xml.getroot()
        obj = root.find("object")
        bndbox = obj.find("bndbox")
        
        boxes = []
        xmin = int(bndbox[0].text)
        ymin = int(bndbox[1].text)
        xmax = int(bndbox[2].text)
        ymax = int(bndbox[3].text)
        boxes.append([xmin, ymin, xmax, ymax])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        
        labels = torch.tensor([self.label])
        
        image_id = torch.tensor(idx)
        # area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
        
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        
        transformed = self.transforms(image=img, bboxes=boxes, labels=labels)
    
        tfm_img = transformed['image']
        tfm_target={}
        tfm_target["boxes"] = torch.as_tensor(transformed['bboxes'])
        tfm_target["labels"] = torch.as_tensor(transformed['labels'])
        tfm_target["image_id"] = image_id
        
        tfm_boxes = np.array(tfm_target["boxes"])
        tfm_area = (tfm_boxes[:, 3] - tfm_boxes[:, 1]) * (tfm_boxes[:, 2] - tfm_boxes[:, 0])
        tfm_target["area"] = tfm_area
        tfm_target["iscrowd"] = iscrowd
        
        # targets=[]
        # targets.append(tfm_target)
        
        return tfm_img, tfm_target

In [None]:
class ChestXRayDatasetTest(Dataset):

    def __init__(self, files, label, tfm=None):
        data_path = "../input/chest-xray-box"
        self.files = files
        self.transforms = tfm
        self.label = label
        if label == 1:
            data_path = data_path+"/NORMAL"
        else:
            data_path = data_path+"/PNEUMONIA"
        self.img_files = [os.path.join(data_path+"/JPEGFiles", x) for x in files]
        # self.rectboxes = [os.path.join(data_path+"/Rectbox", x.replace(".jpeg",".xml")) for x in files]
    
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.img_files[idx]
        gray_img = cv2.imread(fname, cv2.IMREAD_GRAYSCALE)
        img = cv2.cvtColor(gray_img, cv2.COLOR_GRAY2BGR)
        img = img.astype(np.float32)
        img /= 255
        # im = torch.tensor(im)
        # img = self.transform(img)
        transformed = self.transforms(image=img)
        
        return transformed['image'], self.label

In [None]:
# 0 for background
n_train_set = ChestXRayDatasetTrain(n_train_files, label=1, tfm=train_tfm)
n_valid_set = ChestXRayDatasetValid(n_valid_files, label=1, tfm=valid_tfm)
n_test_set = ChestXRayDatasetTest(n_test_files, label=1, tfm=test_tfm)

p_train_set = ChestXRayDatasetTrain(p_train_files, label=2, tfm=train_tfm)
p_valid_set = ChestXRayDatasetValid(p_valid_files, label=2, tfm=valid_tfm)
p_test_set = ChestXRayDatasetTest(p_test_files, label=2, tfm=test_tfm)

In [None]:
train_set = n_train_set + p_train_set
valid_set = n_valid_set + p_valid_set
test_set = n_test_set + p_test_set

In [None]:
print(len(train_set), len(valid_set), len(test_set))

In [None]:
batch_size = 1

In [None]:
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

In [None]:
BOX_COLOR = (255, 0, 0) # Red
TEXT_COLOR = (255, 255, 255) # White


def visualize_bbox(img, bbox, class_name, color=BOX_COLOR, thickness=2):
    """Visualizes a single bounding box on the image"""
    x_min, y_min, x_max, y_max = bbox
    x_min, y_min, x_max, y_max = int(x_min), int(y_min), int(x_max), int(y_max)
   
    cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color=color, thickness=thickness)
    
    ((text_width, text_height), _) = cv2.getTextSize(class_name, cv2.FONT_HERSHEY_SIMPLEX, 0.35, 1)    
    cv2.rectangle(img, (x_min, y_min - int(1.3 * text_height)), (x_min + text_width, y_min), BOX_COLOR, -1)
    cv2.putText(
        img,
        text=class_name,
        org=(x_min, y_min - int(0.3 * text_height)),
        fontFace=cv2.FONT_HERSHEY_SIMPLEX,
        fontScale=0.35, 
        color=TEXT_COLOR, 
        lineType=cv2.LINE_AA,
    )
    return img


def visualize(image, bboxes, category_ids, category_id_to_name):
    img = image.copy()
    for bbox, category_id in zip(bboxes, category_ids):
        class_name = category_id_to_name[category_id]
        img = visualize_bbox(img, bbox, class_name)
    plt.figure(figsize=(12, 12))
    # plt.axis('off')
    plt.imshow(img)

In [None]:
def draw_train_img(data):
    label_to_name  = {1: 'NORMAL', 2: 'PNEUMONIA'}
    img = np.array(data[0].permute(1,2,0)) # tensor CHW -> numpy HWC
    target = data[1][0] # only one target
    bboxes = []
    bboxes.append(target["boxes"])
    labels = []
    labels.append(int(target["labels"]))
    visualize(
    img,
    bboxes,
    labels,
    label_to_name)

In [None]:
train_set[0]

In [None]:
train_set[0][1]

In [None]:
draw_train_img(train_set[0])

In [None]:
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"
device

In [None]:
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
# Initialize a model, and put it on the device specified.

import torchvision.models as models
# model = models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=True)
model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

num_classes = 3  # 2 class + background
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

model.to(device)

In [None]:
# from engine import train_one_epoch, evaluate

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                        momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                            step_size=3,
                                            gamma=0.1)

num_epochs = 30

for epoch in range(num_epochs):

    train_one_epoch(model, optimizer, train_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, valid_loader, device=device)


In [None]:
test_loader = DataLoader(test_set, batch_size=1, shuffle=False, num_workers=0, pin_memory=True)

In [None]:
confusion_mat = np.zeros((2, 2))
for batch in tqdm(test_loader):
    imgs, labels = batch
    model.eval()
    predictions = model(imgs.to(device))          # Returns predictions
    
    for pred, truth in zip(predictions, labels):
        # print("One batch")
        # print(pred, truth)
        test_label = int(pred["labels"][0])
        if (test_label == 2):
            if(test_label == int(truth)):
                confusion_mat[0][0] = confusion_mat[0][0] + 1
            else:
                confusion_mat[0][1] = confusion_mat[0][1] + 1
        elif (test_label == 1): 
            if(test_label == int(truth)):
                confusion_mat[1][1] = confusion_mat[1][1] + 1
            else:
                confusion_mat[1][0] = confusion_mat[1][0] + 1

In [None]:
total = confusion_mat[0][0] +  confusion_mat[1][0] +  confusion_mat[0][1] + confusion_mat[1][1]
print("confusion_mat", confusion_mat)
print("accuracy", (confusion_mat[0][0] + confusion_mat[1][1]) / total)
precision =  confusion_mat[0][0] / (confusion_mat[0][0] + confusion_mat[0][1])
recall = confusion_mat[0][0] / (confusion_mat[0][0] + confusion_mat[1][0])
print("precision", precision)
print("recall", recall)
print("f1", 2*precision*recall / (precision+recall))

In [None]:
def visualize_test(image, bboxes, category_ids, category_id_to_name):
    img = image.copy()
    for bbox, category_id in zip(bboxes, category_ids):
        class_name = category_id_to_name[category_id]
        img = visualize_bbox(img, bbox, class_name)
    # plt.figure(figsize=(12, 12))
    # plt.axis('off')
    plt.imshow(img)

def draw_test_img(data):
    label_to_name  = {1: 'NORMAL', 2: 'PNEUMONIA'}
    img = np.array(data[0].permute(1,2,0)) # tensor CHW -> numpy HWC
    target = data[1][0] # only one target
    bboxes = []
    bboxes.append(target["boxes"])
    labels = []
    labels.append(int(target["labels"]))
    visualize_test(
    img,
    bboxes,
    labels,
    label_to_name)

In [None]:
plt.figure(figsize=(16, 8))
plt.suptitle('Inference PNEUMONIA')
label_to_name  = {1: 'NORMAL', 2: 'PNEUMONIA'}
for i in range(8):
        img = p_test_set[i][0]
        model.eval()
        predictions = model(img.unsqueeze(0).to(device))          # Returns predictions

        targets = []
        target = {}
        target["boxes"] = predictions[0]["boxes"][0]
        target["labels"] = int(predictions[0]["labels"][0])
        targets.append(target)
        plt.subplot(2, 4, i + 1).set_title(f'pred: {label_to_name[target["labels"]]}')
        
        data = [img, targets]
        draw_test_img(data)

plt.tight_layout()

In [None]:
plt.figure(figsize=(16, 8))
plt.suptitle('Inference NORMAL')
label_to_name  = {1: 'NORMAL', 2: 'PNEUMONIA'}
for i in range(8):
        img = n_test_set[i][0]
        model.eval()
        predictions = model(img.unsqueeze(0).to(device))          # Returns predictions

        targets = []
        target = {}
        target["boxes"] = predictions[0]["boxes"][0]
        target["labels"] = int(predictions[0]["labels"][0])
        targets.append(target)
        plt.subplot(2, 4, i + 1).set_title(f'pred: {label_to_name[target["labels"]]}')
        
        data = [img, targets]
        draw_test_img(data)

plt.tight_layout()