In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torch import tensor
from torchvision import transforms, utils
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

import cv2
import pandas as pd
import pydicom as dicom
from matplotlib.pyplot import imshow
import matplotlib.pyplot as plt
import torch.optim as optim
import numpy as np
from PIL import Image
from skimage import exposure
# from detectionutils import MetricLogger, SmoothedValue, warmup_lr_scheduler
from collections import defaultdict, deque
import datetime
import pickle
import time
import torch.distributed as dist
import errno
from tqdm import tqdm

from glob import glob
import os


Bad key "text.kerning_factor" on line 4 in
C:\Users\lasse\.conda\envs\pytorch\lib\site-packages\matplotlib\mpl-data\stylelib\_classic_test_patch.mplstyle.
You probably need to get an updated matplotlibrc file from
http://github.com/matplotlib/matplotlib/blob/master/matplotlibrc.template
or from the matplotlib source distribution


In [2]:
if torch.cuda.is_available():
    # you can continue going on here, like cuda:1 cuda:2....etc.
    device = torch.device("cuda:0")
    print("Running on the GPU")
else:
    device = torch.device("cpu")
    print("Running on the CPU")

Running on the GPU


In [3]:
torch.cuda.empty_cache()

In [4]:
DATA_DIR = os.path.join(".", "rsa-pneumonia-data")

TRAIN_IMAGES = os.path.join(DATA_DIR, "stage_2_train_images")
TEST_IMAGES = os.path.join(DATA_DIR, "stage_2_test_images")
PRED_MASK_DIR = os.path.join(DATA_DIR, "stage_2_mask_images")

TRAIN_ANNOTATIONS_FILE = "stage_2_train_labels.csv"
TRAIN_CLASS_FILE = "stage_2_detailed_class_info.csv"

In [5]:
class OpacityDataset(Dataset):

    def __init__(self, csv_file, root_dir, image_dir, subsample, transform=None):
        self.classes_df = pd.read_csv(csv_file)
        self.classes_df = self.classes_df.dropna()
        self.patient_ids = pd.unique(self.classes_df["patientId"])
        
        if subsample is not None:
            self.patient_ids = self.patient_ids[:subsample]
        self.root_dir = root_dir
        self.image_dir = image_dir
        self.transform = transform

        # Convert our classes to integers!
        self.class_dict = {
            "Normal": 0,
            "No Lung Opacity / Not Normal": 0,
            "Lung Opacity": 1
        }

    def _get_boxes_at_patient_id(self, patient_id):
        patient_boxes = self.classes_df[self.classes_df["patientId"] == patient_id]
        boxes = []
        for i in range(len(patient_boxes)):
            patient_box = patient_boxes.iloc[i]
      
            # Get the boxes
            x = patient_box["x"]
            y = patient_box["y"]
            width = patient_box["width"]
            height = patient_box["height"]
            
            boxes.append([
                round(x / 4),
                round(x / 4),
                round((x + width) / 4),
                round((y + height) / 4),
            ])
            break
        return boxes
        
    def __len__(self):
        return len(self.patient_ids)

    def __getitem__(self, index):
        if torch.is_tensor(index):
            index = index.tolist()

        # Get the image, preprocess it for our model
        image_path = os.path.join(
            self.image_dir, self.classes_df.iloc[index, 0])
        image = dicom.read_file(image_path + ".dcm").pixel_array
        image = image[::4, ::4]
        image = Image.fromarray(image).convert("RGB")
 
        boxes = self._get_boxes_at_patient_id(self.classes_df.iloc[index]["patientId"])
        
        if self.transform:
            image = self.transform(image)
            
        target = {
            "boxes": torch.as_tensor(boxes, dtype=torch.int64),
            "labels": torch.as_tensor([1 for _ in range(len(boxes))], dtype=torch.int64)
        }
            
        return image, target
            
#         sample = {
#             "image": image,
#             "target": {
#                 "boxes": torch.as_tensor(boxes, dtype=torch.float32),
#                 "labels": torch.as_tensor([1 for _ in range(len(boxes))], dtype=torch.int64)
#             }
#         }

#         return sample

In [6]:
validation_split = 0.2;

opacity_dataset = OpacityDataset(
    csv_file=os.path.join(DATA_DIR, TRAIN_ANNOTATIONS_FILE),
    root_dir=DATA_DIR,
    image_dir=TRAIN_IMAGES,
    subsample=100,
#     subsample=None,
    transform=transforms.Compose([
        transforms.Grayscale(3),
        transforms.ToTensor()
    ])
)

dataset_size = len(opacity_dataset)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))
np.random.seed(1)
np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

# Create samplers for data loading
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

opacity_dataset[0][1]

{'boxes': tensor([[ 66,  66, 119, 133]]), 'labels': tensor([1])}

# Model Definition
## Predefined FasterRCNN Model

In [7]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, progress=True, num_classes=1)
model.to(device)

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d()
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d()
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d()
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d()
          (relu): ReLU(inplace=True)
          (downsample): Sequent

## Custom Backbone Model

In [8]:
# backbone = torchvision.models.mobilenet_v2(pretrained=True).features
# backbone.out_channels = 1280

# anchor_generator = AnchorGenerator(sizes=((16, 32, 64),),
#                                    aspect_ratios=((0.5, 1.0, 2.0),))

# roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=["0"],
#                                                 output_size=7,
#                                                 sampling_ratio=2)

# model = FasterRCNN(backbone,
#                    num_classes=1,
#                    rpn_anchor_generator=anchor_generator,
#                    box_roi_pool=roi_pooler)

# model.to(device)

# Hyperparam Definition

In [9]:
# Hyperparameters
epochs = 20
num_classes = 1
batch_size = 2
learning_rate = .0001

# Dataloaders

In [10]:
train_loader = torch.utils.data.DataLoader(
    opacity_dataset, 
    batch_size=batch_size, 
    sampler=train_sampler,
    num_workers=0,
    collate_fn=lambda x: tuple(zip(*x))
)

validation_loader = torch.utils.data.DataLoader(
    opacity_dataset, 
    batch_size=batch_size, 
    sampler=valid_sampler,
    num_workers=0,
    collate_fn=lambda x: tuple(zip(*x))
)

# criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [11]:
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(
    params, lr=0.000001, momentum=0.9, weight_decay=0.0005)

In [12]:
class SmoothedValue(object):
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """

    def __init__(self, window_size=20, fmt=None):
        if fmt is None:
            fmt = "{median:.4f} ({global_avg:.4f})"
        self.deque = deque(maxlen=window_size)
        self.total = 0.0
        self.count = 0
        self.fmt = fmt

    def update(self, value, n=1):
        self.deque.append(value)
        self.count += n
        self.total += value * n

    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]

    @property
    def median(self):
        d = torch.tensor(list(self.deque))
        return d.median().item()

    @property
    def avg(self):
        d = torch.tensor(list(self.deque), dtype=torch.float32)
        return d.mean().item()

    @property
    def global_avg(self):
        return self.total / self.count

    @property
    def max(self):
        return max(self.deque)

    @property
    def value(self):
        return self.deque[-1]

    def __str__(self):
        return self.fmt.format(
            median=self.median,
            avg=self.avg,
            global_avg=self.global_avg,
            max=self.max,
            value=self.value)


def all_gather(data):
    """
    Run all_gather on arbitrary picklable data (not necessarily tensors)
    Args:
        data: any picklable object
    Returns:
        list[data]: list of data gathered from each rank
    """
    world_size = get_world_size()
    if world_size == 1:
        return [data]

    # serialized to a Tensor
    buffer = pickle.dumps(data)
    storage = torch.ByteStorage.from_buffer(buffer)
    tensor = torch.ByteTensor(storage).to("cuda")

    # obtain Tensor size of each rank
    local_size = torch.tensor([tensor.numel()], device="cuda")
    size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
    dist.all_gather(size_list, local_size)
    size_list = [int(size.item()) for size in size_list]
    max_size = max(size_list)

    # receiving Tensor from all ranks
    # we pad the tensor because torch all_gather does not support
    # gathering tensors of different shapes
    tensor_list = []
    for _ in size_list:
        tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
    if local_size != max_size:
        padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
        tensor = torch.cat((tensor, padding), dim=0)
    dist.all_gather(tensor_list, tensor)

    data_list = []
    for size, tensor in zip(size_list, tensor_list):
        buffer = tensor.cpu().numpy().tobytes()[:size]
        data_list.append(pickle.loads(buffer))

    return data_list


def reduce_dict(input_dict, average=True):
    """
    Args:
        input_dict (dict): all the values will be reduced
        average (bool): whether to do average or sum
    Reduce the values in the dictionary from all processes so that all processes
    have the averaged results. Returns a dict with the same fields as
    input_dict, after reduction.
    """
    world_size = get_world_size()
    if world_size < 2:
        return input_dict
    with torch.no_grad():
        names = []
        values = []
        # sort the keys so that they are consistent across processes
        for k in sorted(input_dict.keys()):
            names.append(k)
            values.append(input_dict[k])
        values = torch.stack(values, dim=0)
        dist.all_reduce(values)
        if average:
            values /= world_size
        reduced_dict = {k: v for k, v in zip(names, values)}
    return reduced_dict


class MetricLogger(object):
    def __init__(self, delimiter="\t"):
        self.meters = defaultdict(SmoothedValue)
        self.delimiter = delimiter

    def update(self, **kwargs):
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = v.item()
            assert isinstance(v, (float, int))
            self.meters[k].update(v)

    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, attr))

    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append(
                "{}: {}".format(name, str(meter))
            )
        return self.delimiter.join(loss_str)

    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()

    def add_meter(self, name, meter):
        self.meters[name] = meter

    def log_every(self, iterable, print_freq, header=None):
        i = 0
        if not header:
            header = ''
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt='{avg:.4f}')
        data_time = SmoothedValue(fmt='{avg:.4f}')
        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
        log_msg = self.delimiter.join([
            header,
            '[{0' + space_fmt + '}/{1}]',
            'eta: {eta}',
            '{meters}',
            'time: {time}',
            'data: {data}',
            'max mem: {memory:.0f}'
        ])
        MB = 1024.0 * 1024.0
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj
            iter_time.update(time.time() - end)
            if i % print_freq == 0 or i == len(iterable) - 1:
                eta_seconds = iter_time.global_avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
                print(log_msg.format(
                    i, len(iterable), eta=eta_string,
                    meters=str(self),
                    time=str(iter_time), data=str(data_time),
                    memory=torch.cuda.max_memory_allocated() / MB))
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        print('{} Total time: {} ({:.4f} s / it)'.format(
            header, total_time_str, total_time / len(iterable)))


def collate_fn(batch):
    return tuple(zip(*batch))


def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):

    def f(x):
        if x >= warmup_iters:
            return 1
        alpha = float(x) / warmup_iters
        return warmup_factor * (1 - alpha) + alpha

    return torch.optim.lr_scheduler.LambdaLR(optimizer, f)


def mkdir(path):
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise


def setup_for_distributed(is_master):
    """
    This function disables printing when not in master process
    """
    import builtins as __builtin__
    builtin_print = __builtin__.print

    def print(*args, **kwargs):
        force = kwargs.pop('force', False)
        if is_master or force:
            builtin_print(*args, **kwargs)

    __builtin__.print = print


def is_dist_avail_and_initialized():
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True


def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()


def get_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return dist.get_rank()


def is_main_process():
    return get_rank() == 0


def save_on_master(*args, **kwargs):
    if is_main_process():
        torch.save(*args, **kwargs)


def init_distributed_mode(args):
    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        args.rank = int(os.environ["RANK"])
        args.world_size = int(os.environ['WORLD_SIZE'])
        args.gpu = int(os.environ['LOCAL_RANK'])
    elif 'SLURM_PROCID' in os.environ:
        args.rank = int(os.environ['SLURM_PROCID'])
        args.gpu = args.rank % torch.cuda.device_count()
    else:
        print('Not using distributed mode')
        args.distributed = False
        return

    args.distributed = True

    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'
    print('| distributed init (rank {}): {}'.format(
        args.rank, args.dist_url), flush=True)
    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                         world_size=args.world_size, rank=args.rank)
    torch.distributed.barrier()
    setup_for_distributed(args.rank == 0)

In [13]:
def get_num_from_tensor(tensorItem):
    return float(tensorItem.cuda().cpu().detach().numpy().item())

def get_numpy_from_tensor(tensorItem):
    return tensorItem.cuda().cpu().numpy()

In [14]:
def train_one_epoch(model, optimizer, data_loader, device, epoch):
#     for images, targets in metric_logger.log_every(data_loader, print_freq, header):

    for images, targets in data_loader:
        
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        
        print(" - ".join([key + ": " + str("%.3f" % get_num_from_tensor(value)) for key, value in loss_dict.items()]))
        
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        
        del loss_dict


In [15]:
%env CUDA_LAUNCH_BLOCKING=1

env: CUDA_LAUNCH_BLOCKING=1


In [16]:
losses_box_reg = []
losses_objectness = []
losses_rpn_box_reg = []

for epoch in range(5):
    # get the inputs; data is a list of [inputs, labels]

    train_one_epoch(model, optimizer, train_loader, device, epoch)

    running_loss_box_reg = 0.0
    running_loss_objectness = 0.0
    running_loss_rpn_box_reg = 0.0
    
    running_loss_box_reg_count = 0
    running_loss_objectness_count = 0
    running_loss_rpn_box_reg_count = 0

    for images, targets in validation_loader:
        
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        
#         print(loss_dict)
        
        if not torch.isnan(loss_dict["loss_box_reg"]).any() and not torch.isinf(loss_dict["loss_box_reg"]).any():
            running_loss_box_reg += get_num_from_tensor(loss_dict["loss_box_reg"])
            running_loss_box_reg_count += 1
            
        if not torch.isnan(loss_dict["loss_objectness"]).any() and not torch.isinf(loss_dict["loss_objectness"]).any():
            running_loss_objectness += get_num_from_tensor(loss_dict["loss_objectness"])
            running_loss_objectness_count += 1
            
        if not torch.isnan(loss_dict["loss_rpn_box_reg"]).any() and not torch.isinf(loss_dict["loss_rpn_box_reg"]).any():
            running_loss_rpn_box_reg += get_num_from_tensor(loss_dict["loss_rpn_box_reg"])
            running_loss_rpn_box_reg_count += 1
            
        del loss_dict

    running_loss_box_reg = running_loss_box_reg / (running_loss_box_reg_count if running_loss_box_reg_count > 0 else 1)
    running_loss_objectness = running_loss_objectness / (running_loss_objectness_count if running_loss_objectness_count > 0 else 1)
    running_loss_rpn_box_reg = running_loss_rpn_box_reg / (running_loss_rpn_box_reg_count if running_loss_rpn_box_reg_count > 0 else 1)

    print("--------------------------------------------------------------------------------------------------------")
    print("[Epoch %d] running_loss_box_reg: %.5f  running_loss_objectness: %.5f  running_loss_rpn_box_reg: %.5f" %
          (epoch + 1, running_loss_box_reg, running_loss_objectness, running_loss_rpn_box_reg))
    print("--------------------------------------------------------------------------------------------------------")

    losses_box_reg.append(running_loss_box_reg)
    losses_objectness.append(running_loss_objectness)
    losses_rpn_box_reg.append(running_loss_rpn_box_reg)
    
#     lr_scheduler.step()

	nonzero(Tensor input, *, Tensor out)
Consider using one of the following signatures instead:
	nonzero(Tensor input, *, bool as_tuple)


loss_classifier: 0.000 - loss_box_reg: 0.000 - loss_objectness: 0.694 - loss_rpn_box_reg: 0.005
loss_classifier: 0.000 - loss_box_reg: 0.011 - loss_objectness: 0.694 - loss_rpn_box_reg: 0.007
loss_classifier: -0.000 - loss_box_reg: 0.015 - loss_objectness: 0.693 - loss_rpn_box_reg: nan
loss_classifier: -0.000 - loss_box_reg: 0.000 - loss_objectness: 0.694 - loss_rpn_box_reg: 0.011
loss_classifier: 0.000 - loss_box_reg: 0.000 - loss_objectness: 0.694 - loss_rpn_box_reg: 0.009
loss_classifier: 0.000 - loss_box_reg: 0.019 - loss_objectness: 0.694 - loss_rpn_box_reg: 0.005
loss_classifier: -0.000 - loss_box_reg: 0.000 - loss_objectness: 0.693 - loss_rpn_box_reg: nan
loss_classifier: 0.000 - loss_box_reg: 0.000 - loss_objectness: 0.694 - loss_rpn_box_reg: 0.015
loss_classifier: 0.000 - loss_box_reg: 0.019 - loss_objectness: 0.694 - loss_rpn_box_reg: 0.010
loss_classifier: 0.000 - loss_box_reg: 0.000 - loss_objectness: 0.694 - loss_rpn_box_reg: 0.012
loss_classifier: 0.000 - loss_box_reg: 0.

RuntimeError: transform: failed to synchronize: cudaErrorIllegalAddress: an illegal memory access was encountered

In [None]:
for images, targets in train_loader:
    model.eval()
    images_cuda = list(img.to(device) for img in images)
    targets_cuda = [{k: v.to(device) for k, v in t.items()} for t in targets]
    outputs = model(images)
    print(outputs)