In [1]:
import torchvision.models as models
vgg16 = models.vgg16(pretrained=True, progress=True)

In [2]:
from __future__ import division
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as dset
import torchvision.transforms as T
import pandas as pd
import os
import pydicom
import numpy as np

In [3]:
vgg16

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [4]:
data_dir = '/projectnb/ece601/kaggle-pulmonary-embolism/rsna-str-pulmonary-embolism-detection/'
train_csv = data_dir + 'train.csv'
train_dir = data_dir + 'train/'

train = pd.read_csv(train_csv)

In [5]:

class KagglePEDataset(torch.utils.data.Dataset):
    """Kaggle PE dataset."""

    def __init__(self, csv_file, root_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.pedataframe = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        """ Return number of 2D images. (Each CT slice is an independent image.)"""
        return len(self.pedataframe)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir,
                                train.StudyInstanceUID[idx],
                                train.SeriesInstanceUID[idx],
                                train.SOPInstanceUID[idx] + '.dcm')
        dicom_image = pydicom.dcmread(img_name) 
        image = dicom_image.pixel_array
        
        # in OSIC we find outside-scanner-regions with raw-values of -2000. 
        # Let's threshold between air (0) and this default (-2000) using -1000
        image[image <= -1000] = 0
        
        # convert to HU using DICOM information
        # HU is a number between -1000 and 1000 (generally)
        # good lung tissue is between -950 and -700 (approximately)
        intercept = dicom_image.RescaleIntercept
        slope = dicom_image.RescaleSlope
        
        if slope != 1:
            image = slope * image.astype(np.float64)
            
        image = image.astype(np.int16)
        image += np.int16(intercept)
        
        # Convert image from numpy array to PIL image (so that we can use pytorch transforms)
        image[image >= 500] = 500
        image[image <= -1000] = -1000
        image = (image + 1000)/1500
        image = image*255
        image = np.uint8(image)
        image = PIL.Image.fromarray(image).convert('RGB')

        # image is 512x512 RGB PIL image
        # pe_present_on_image is 0 or 1
        sample = {'image': image, 
                  'pe_present_on_image': int(train.pe_present_on_image[idx])}

        # Only apply transform to image.
        if self.transform:
            sample['image'] = self.transform(sample['image'])
            
        return sample

In [6]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import glob
import os
from os import listdir
from matplotlib import pyplot as plt
import os
import numpy as np
import pandas as pd
from torch.utils.data import TensorDataset, DataLoader,Dataset

from skimage.color import gray2rgb
import functools
import torch
from tqdm.auto import tqdm
import pydicom
import seaborn as sns
import scipy
import PIL

In [7]:
global_mean = 111.6126708984375
global_std = 79.95233637352047

transform=T.Compose([T.Resize(256),
                     T.RandomCrop(224),
                     T.ToTensor(),
                     T.Normalize(mean=[global_mean, global_mean, global_mean], 
                                          std=[global_std, global_std, global_std]),
                    ])

In [8]:
transformed_dataset = KagglePEDataset(csv_file=train_csv, root_dir=train_dir,
                                           transform=transform)

In [9]:
dataloader = DataLoader(transformed_dataset, batch_size=4,
                        shuffle=True, num_workers=4)

In [10]:
vgg16.classifier._modules['6'] = nn.Linear(4096, 2)

In [11]:
# paralleize model 
vgg16.cuda()
        
# use network for evaluation 
vgg16.eval()

testloader = DataLoader(transformed_dataset, batch_size=1, num_workers=1)
correct = 0

with torch.no_grad(): # Always call no_grad() when evaluating model (remove for training)
    for batch_idx, sample_batched in enumerate(testloader):
        data = torch.autograd.Variable(sample_batched['image'].cuda()), 
        target = torch.autograd.Variable(sample_batched['pe_present_on_image'].cuda())

        # forward
        output = vgg16(data[0].float())
        assert output.shape[1] == 2 # output shape is 1x2

        pred = output.data.max(1)[1]

        correct += pred.eq(target.data).sum()
        #print(correct)
        if batch_idx % 100 == 0 and batch_idx != 0:
            break
            # break early so I don't take forever, I'm just doing sanity check here.

In [13]:
print('accuracy on untrained model: ', correct.item()/batch_idx)

accuracy on untrained model:  0.23
