In [4]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np 
import pandas as pd
import pydicom

import torch
import torchvision.models as models

from glob import glob

In [5]:
import os
from torchvision import datasets
from PIL import Image
import torchvision.transforms as transforms

### TODO: Write data loaders for training, validation, and test sets
## Specify appropriate transforms, and batch_sizes
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

# number of subprocesses to use for data loading
num_workers = 0
# how many samples per batch to load
batch_size = 16

data_transform_train = transforms.Compose([
    transforms.Resize(234),
    transforms.CenterCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
data_transform_test = transforms.Compose([
    transforms.Resize(234),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

data_dir = "../input/pngdata/data/"
train_dir = os.path.join(data_dir, 'train/')
valid_dir = os.path.join(data_dir, 'valid/')
#test_dir = os.path.join(data_dir, 'test/')

train_data = datasets.ImageFolder(train_dir, transform=data_transform_train)
valid_data = datasets.ImageFolder(valid_dir, transform=data_transform_test)
#test_data = datasets.ImageFolder(test_dir, transform=data_transform_test)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, num_workers=num_workers, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=batch_size, num_workers=num_workers, shuffle=True)
#test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, num_workers=num_workers, shuffle=False)

transfer_loaders = {
    'train' : train_loader,
    'valid' : valid_loader
    #'test'  : test_loader
}

In [6]:
def train(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path):
    """returns trained model"""
    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf 
    
    for epoch in range(1, n_epochs+1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        
        ###################
        # train the model #
        ###################
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):
            
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## find the loss and update the model parameters accordingly
            ## record the average training loss, using something like
            ## train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            
            # initialize weights to zero
            optimizer.zero_grad()

            output = model(data)
            
            # calculate loss
            loss = criterion(output, target)
            
            # back prop
            loss.backward()
            
            # grad
            optimizer.step()
            
            train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            
            if batch_idx % 100 == 0:
                print('Epoch %d, Batch %d loss: %.6f' %
                  (epoch, batch_idx + 1, train_loss))
            
        ######################    
        # validate the model #
        ######################
        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## update the average validation loss
            output = model(data)
            loss = criterion(output,target)
            
            valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.data - valid_loss))

        # print training/validation statistics 
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch, 
            train_loss,
            valid_loss
            ))
        
        ## TODO: save the model if validation loss has decreased
        if valid_loss < valid_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
            valid_loss_min,
            valid_loss))
            torch.save(model.state_dict(),save_path)
            valid_loss_min = valid_loss
            
    # return trained model
    return model

In [7]:
def predict_MGMT(img_path):
    # load the image and return the predicted breed
    
    in_transform = transforms.Compose([
                        transforms.Resize(size=(224, 224)),
                        transforms.ToTensor(), 
                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    image = Image.open(img_path).convert('RGB')
    
    # discard the transparent, alpha channel (that's the :3) and add the batch dimension
    image = in_transform(image)[:3,:,:].unsqueeze(0)
    
    
    if use_cuda:
        image = image.cuda()
    
    model_transfer.eval()
    
    output = model_transfer(image)
    
    return output

In [8]:
import torchvision.models as models
import torch.nn as nn

## Specify model architecture 
model_transfer = models.resnet50(pretrained=True)

for param in model_transfer.parameters():
    param.requires_grad = True

Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

In [6]:
# print model
model_transfer

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [7]:
# change last fc layer
model_transfer.fc = nn.Linear(2048,2, bias=True)

# set fc parameters to be optimized
fc_parameters = model_transfer.fc.parameters()
for param in fc_parameters:
    param.requires_grad = True

In [8]:
# check if CUDA is available
use_cuda = torch.cuda.is_available()
print(use_cuda)

True


In [9]:
if use_cuda:
    model_transfer = model_transfer.cuda()

In [10]:
import torch.optim as optim

criterion_transfer = nn.CrossEntropyLoss()
optimizer_transfer = optim.Adam(model_transfer.parameters(), lr=0.001)

In [11]:
n_epochs = 15

# train the model
model_transfer = train(n_epochs, transfer_loaders, model_transfer, optimizer_transfer, criterion_transfer, use_cuda, 'model_transfer.pt')

Epoch 1, Batch 1 loss: 0.799565
Epoch 1, Batch 101 loss: 0.764819
Epoch: 1 	Training Loss: 0.756565 	Validation Loss: 0.699301
Validation loss decreased (inf --> 0.699301).  Saving model ...
Epoch 2, Batch 1 loss: 0.727783
Epoch 2, Batch 101 loss: 0.703968
Epoch: 2 	Training Loss: 0.703312 	Validation Loss: 0.698616
Validation loss decreased (0.699301 --> 0.698616).  Saving model ...
Epoch 3, Batch 1 loss: 0.605853
Epoch 3, Batch 101 loss: 0.695375
Epoch: 3 	Training Loss: 0.695316 	Validation Loss: 0.694577
Validation loss decreased (0.698616 --> 0.694577).  Saving model ...
Epoch 4, Batch 1 loss: 0.702210
Epoch 4, Batch 101 loss: 0.685230
Epoch: 4 	Training Loss: 0.688005 	Validation Loss: 0.700477
Epoch 5, Batch 1 loss: 0.694741
Epoch 5, Batch 101 loss: 0.690554
Epoch: 5 	Training Loss: 0.688886 	Validation Loss: 0.691309
Validation loss decreased (0.694577 --> 0.691309).  Saving model ...
Epoch 6, Batch 1 loss: 0.729568
Epoch 6, Batch 101 loss: 0.687540
Epoch: 6 	Training Loss: 0.6

In [9]:
# load the model that got the best validation accuracy (uncomment the line below)
model_transfer.load_state_dict(torch.load('model_transfer.pt'))

FileNotFoundError: [Errno 2] No such file or directory: 'model_transfer.pt'

In [17]:
# fetch predictions
path = '../input/pngdata/data/test'
files = os.listdir('../input/pngdata/data/test')

pred_values = []
case = []

for i in range(len(files)):
    output = predict_MGMT(f'{path}/{files[i]}')
    pred_values.append(output)
    case.append(files[i].split('-')[0])

RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 15.90 GiB total capacity; 14.52 GiB already allocated; 17.75 MiB free; 15.01 GiB reserved in total by PyTorch)

In [None]:
df = pd.DataFrame()
df['MGMT_pred'] = pred_values
df['case'] = case

df