In [1]:
import os, csv
import numpy as np
from imageio import imread
from skimage.transform import resize

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torchvision

img_dims = (128, 128)
img_range = [-2**12, 2**12]
data_path = '/home/masonmcgough/Workspace/Data/siim-medical-image-analysis-tutorial/tiff_images'
csv_file_path = '/home/masonmcgough/Workspace/Data/siim-medical-image-analysis-tutorial/overview.csv'
VALID_EXTS = ('.jpg', '.png', '.tif', '.tiff')

## Set up dataset

In [2]:
# define function to get images in directory
def find_files(path, csv_file):
    # get labels associated with file names
    imgs_data = []
    with open(csv_file, 'r') as f:
        f_reader = iter(csv.reader(f, delimiter=','))
        header = next(f_reader)
        for row in f_reader:
            imgs_data.append({f: x for f, x in zip(header, row)})
    
    # append path to file names
    for i in imgs_data:
        i['tiff_full_path'] = os.path.join(path, i['tiff_name'])
        
    return imgs_data
    
# define dataset class
class CTDataset(Dataset):
    def __init__(self, files_list, labels, img_dims=(256, 256)):
        self.files_list = files_list
        self.labels = labels
        self.img_dims = img_dims
    
    def __len__(self):
        return len(self.files_list)
    
    def __getitem__(self, index):
        img = np.array(imread(self.files_list[index]))
        img = resize(img, self.img_dims)
        img = np.expand_dims(img, 0)
        img = img / img_range[1]
        
        label = self.labels[index]
        if label == 'True':
            label = 1
        else:
            label = 0
        return {'image': img, 'label': label, 'index': index}
    
# get list of all images
imgs_data = find_files(data_path, csv_file_path)
print("{0} files found in '{1}'".format(len(imgs_data), data_path))

# create instance of dataset
imgs_paths = [i['tiff_full_path'] for i in imgs_data]
imgs_labels = [i['Contrast'] for i in imgs_data]
mydataset = CTDataset(imgs_paths, imgs_labels, img_dims=img_dims)
print("Number of images: {0}".format(len(mydataset)))

# add to dataloader
mydataloader = torch.utils.data.DataLoader(
    mydataset,
    batch_size=4,
    shuffle=True
)

100 files found in '/home/masonmcgough/Workspace/Data/siim-medical-image-analysis-tutorial/tiff_images'
Number of images: 100


## Load VGG model

In [6]:
import torch.utils.model_zoo as model_zoo

class VGG(nn.Module):
    def __init__(self, features, num_classes=1000, init_weights=True):
        super(VGG, self).__init__()
        self.features = features
        self.classifier = nn.Sequential(
            nn.Linear(8192, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, (2. / n)**0.5)
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()

def make_layers(cfg, in_channels=3, batch_norm=False):
    layers = []
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)


cfg = {
    'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}

model_urls = {
    'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
    'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
    'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
    'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth',
    'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth',
    'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth',
    'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth',
    'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth',
}

def vgg11(pretrained=False, in_channels=3, **kwargs):
    """VGG 11-layer model (configuration "A")
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    if pretrained:
        kwargs['init_weights'] = False
    model = VGG(make_layers(cfg['A'], in_channels), **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['vgg11']))
    return model


model = vgg11(pretrained=False, in_channels=1, num_classes=2)
print(model)

# first layer accepts three channels when only one is necessary
# print(model.features[0])

RuntimeError: While copying the parameter named features.0.weight, whose dimensions in the model are torch.Size([64, 1, 3, 3]) and whose dimensions in the checkpoint are torch.Size([64, 3, 3, 3]).

## Train model on dataset

In [5]:
# define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.8)

disp_interval = 5
n_epochs = 50
for epoch in range(n_epochs):
    running_loss = 0.0
    for i, data in enumerate(mydataloader, 0):
        inputs, labels = Variable(data['image']).float(), Variable(data['label'])
        
        optimizer.zero_grad()
        
        outputs = model(inputs)
        
        loss = criterion(outputs, labels)
        loss.backward()
        
        optimizer.step()
        running_loss += loss.data[0]
        
        if i % disp_interval == disp_interval - 1:
            print("Epoch: {0}, Batch: {1}, Loss: {2}".format(
                epoch + 1, i + 1, running_loss / disp_interval))
            running_loss = 0


NameError: name 'model' is not defined