In [1]:
!pip install efficientnet_pytorch torchtoolbox

Collecting efficientnet_pytorch
  Downloading efficientnet_pytorch-0.7.0.tar.gz (20 kB)
Collecting torchtoolbox
  Downloading torchtoolbox-0.1.5-py3-none-any.whl (58 kB)
[K     |████████████████████████████████| 58 kB 695 kB/s 
Collecting lmdb
  Downloading lmdb-1.0.0.tar.gz (876 kB)
[K     |████████████████████████████████| 876 kB 1.5 MB/s 
Building wheels for collected packages: efficientnet-pytorch, lmdb
  Building wheel for efficientnet-pytorch (setup.py) ... [?25l- \ done
[?25h  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.7.0-py3-none-any.whl size=16035 sha256=cf33a4ee0dc1d413deb497e09bb797c69ce0f7c937dad4bd022ea4f3349f071f
  Stored in directory: /root/.cache/pip/wheels/b7/cc/0d/41d384b0071c6f46e542aded5f8571700ace4f1eb3f1591c29
  Building wheel for lmdb (setup.py) ... [?25l- \ | / - done
[?25h  Created wheel for lmdb: filename=lmdb-1.0.0-cp37-cp37m-linux_x86_64.whl size=276759 sha256=010a211f6c1784273349a2bf40ffc533a6a7

In [2]:
# Imports here
from efficientnet_pytorch import EfficientNet
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import seaborn as sns
import csv
import pandas as pd
import os
import random
import math
import skimage.io
#from csv_loader import load_csv

# Tiff visualisation imports and downloads
import numpy as np
import tifffile as tiff

# For re-importing python modules
import importlib
#importlib.reload(csv_loader.py)

#for quadratic score calculator
from sklearn.metrics import cohen_kappa_score


In [3]:
#use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.set_default_tensor_type(torch.cuda.FloatTensor)

In [4]:
# Creating ability to control how many pictures go into the training sample. For debugging / training purposes
sample_size = 10616
df = pd.read_csv('../input/prostate-cancer-grade-assessment/train.csv').copy().sample(sample_size)
df.to_csv("sample.csv", sep=",", index=False)

In [5]:
class load_csv(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.annotations = pd.read_csv(csv_file)# todo remove sample for debug
        self.root_dir = root_dir
        self.transform = transform
    
    def __len__(self):
        return len(self.annotations)
        
    
    def __getitem__(self, index):
        image_id = self.annotations.iloc[index, 0]
        img_path = os.path.join(self.root_dir, str(image_id) +".png")
        image = torch.from_numpy(skimage.io.imread(img_path)).permute(2,0,1).float()
        gleason_score_map = {'negative':0, '0+0':0,'3+3':1,'3+4':2,'4+3':3,'3+5':4,'4+4':5,'5+3':6,'4+5':7,'5+4':8,'5+5':9}
        gleason_score = str(self.annotations.iloc[index,:]['gleason_score'])
        y_label = torch.tensor(int(gleason_score_map[gleason_score]))
        isup_grade = torch.tensor(int(self.annotations.iloc[index,:]['isup_grade']))
        
        #label = np.zeros(6).astype(np.float32)
        #y_label = label[isup_grade] = 1.
        #y_label = torch.tensor(y_label)
        
        self.transform= transforms.Compose([transforms.ToPILImage(),
                                            transforms.ToTensor()])
                                            
        if self.transform:
            image = self.transform(image)
        
        return (image, y_label, image_id, isup_grade)

In [6]:
gleason_to_isup_score = {'negative':0, '0+0':0,'3+3':1,'3+4':2,'4+3':3,'4+4':4,'3+5':5,'5+3':6,'4+5':7,'5+4':8,'5+5':9}
gleason_score = '3+5'
y_label = int(gleason_to_isup_score[gleason_score])
y_label

5

In [7]:
# Loading csv dataset into the dataset loader function load_csv. 
dataset = load_csv(csv_file='sample.csv', root_dir='../input/prostate-cancer-tiles-4x4x128px-downsampling-4x/train_128x4x4_res1/train_128x4x4_res1')

# Creating sample subsets for validation and testing datasets
sample_size = dataset.annotations.shape[0]
train_ratio = .85
valid_ratio = .05
test_ratio = 1-(train_ratio + valid_ratio)
train_size = int(train_ratio*sample_size)
valid_size = int(valid_ratio*sample_size)
test_size = sample_size - train_size - valid_size

# Defining different datasets and respective dataloaders
train_set, valid_set, test_set = torch.utils.data.random_split(dataset, [train_size, valid_size, test_size])

train_loader = torch.utils.data.DataLoader(train_set, batch_size=5, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=5, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False)
entire_set_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False)

In [8]:
test_size

1063

In [9]:
# Creating model and uploading/creating needed training components
model = EfficientNet.from_pretrained('efficientnet-b4', num_classes=10)
model._fc = model._fc = nn.Sequential(nn.Linear(model._fc.in_features, 216),
                          nn.ReLU(),
                          nn.Linear(216, 36, bias=True),
                          nn.ReLU(),
                          nn.Linear(36, 10, bias=True),
                          nn.LogSoftmax(dim=1))


if torch.cuda.is_available():
    model = model.cuda()

criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth" to /root/.cache/torch/checkpoints/efficientnet-b4-6ed6700e.pth


HBox(children=(FloatProgress(value=0.0, max=77999237.0), HTML(value='')))


Loaded pretrained weights for efficientnet-b4


In [10]:
def validate_data_function(model, test_loader, criterion):
    test_loss = 0
    accuracy = 0
    pred_y_int_list = []
    pred_y_dec_list = []
    image_id_list = []
    labels_list = []
    isup_list = []
    
    for ii, (inputs, labels, image_id, isup_grade) in enumerate(test_loader):
        
        inputs, labels = inputs.to(device), labels.to(device)
        
        output = model.forward(inputs)
        test_loss += criterion(output,labels.long())#.item()
        
        #ps = torch.exp(output)
        #equality = (labels.argmax(dim=1) == output.argmax(dim=1))
        equality = (labels == output.argmax(dim=1))
        accuracy += equality.type(torch.FloatTensor)
        #pred = output.cpu().data.numpy().argmax()
        #qwk = cohen_kappa_score(pred, labels, weights='quadratic')
        pred_y_int_v1 = output.argmax(dim=1)
        pred_y_int_v2 = int(pred_y_int_v1[0])
        
        pred_y_dec_v1 = torch.max(output)
        pred_y_dec_v2 = np.exp(float(pred_y_dec_v1.item()))
        
        pred_y_int_list.append(pred_y_int_v2)
        pred_y_dec_list.append(pred_y_int_v2 + pred_y_dec_v2)
        image_id_list.append(str(image_id[0]))
        labels_list.append(int(labels[0]))
        isup_list.append(int(isup_grade[0]))
    accuracy = accuracy.mean()
    
    return test_loss, accuracy, image_id, image_id_list, pred_y_int_list, pred_y_dec_list, labels_list, isup_list

In [11]:
# Training parameters and t=0 inputs
epochs = 10
print_every = 500
steps = 0
test_loss = 0

# May the training begin!
for epoch in range(epochs):
    model.train()
    running_loss = 0
        
    for ii, (inputs, labels, image_id, isup_grade) in enumerate(train_loader):
        steps += 1
        
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
       
        outputs = model.forward(inputs)
        loss = criterion(outputs, labels.long())
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        if steps % print_every == 0:
            model.eval()

            with torch.no_grad():
                valid_loss, accuracy, image_id, image_id_list, pred_y_int_list, pred_y_dec_list, labels_list, isup_list = validate_data_function(model, valid_loader, criterion)
            
            print(f"Epoch: {epoch+1}/{epochs}..| "
                  f"Train loss: {running_loss/print_every:.3f}..| "
                  #f"Train accuracy: {float(loss.item) / running_loss:.3f}..|"
                  f"Validation loss: {valid_loss/print_every:.3f}..| "                  
                  f"Validation accuracy: {accuracy:.3f}|"
                 )
            
            running_loss = 0
            model.train()
    
    path = 'base_model_10cats.pth'
    torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'classifier_state_dict': model._fc.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss
            }, path)
    
    
    model.cuda() # moving model to GPU for further training

Epoch: 1/10..| Train loss: 1.846..| Validation loss: 0.649..| Validation accuracy: 43.600|
Epoch: 1/10..| Train loss: 1.688..| Validation loss: 0.445..| Validation accuracy: 41.800|
Epoch: 1/10..| Train loss: 1.612..| Validation loss: 0.298..| Validation accuracy: 52.600|
Epoch: 2/10..| Train loss: 0.588..| Validation loss: 0.309..| Validation accuracy: 50.600|
Epoch: 2/10..| Train loss: 1.548..| Validation loss: 0.292..| Validation accuracy: 53.200|
Epoch: 2/10..| Train loss: 1.530..| Validation loss: 0.286..| Validation accuracy: 51.800|
Epoch: 2/10..| Train loss: 1.484..| Validation loss: 0.313..| Validation accuracy: 50.800|
Epoch: 3/10..| Train loss: 1.139..| Validation loss: 0.292..| Validation accuracy: 52.400|
Epoch: 3/10..| Train loss: 1.461..| Validation loss: 0.323..| Validation accuracy: 48.400|
Epoch: 3/10..| Train loss: 1.426..| Validation loss: 0.264..| Validation accuracy: 57.200|
Epoch: 4/10..| Train loss: 0.246..| Validation loss: 0.255..| Validation accuracy: 59.600|

In [12]:
with torch.no_grad():
    valid_loss, accuracy, image_id, image_id_list, pred_y_int_list, pred_y_dec_list, labels_list, isup_list = validate_data_function(model, test_loader, criterion)
            
    print(f"Epoch: {epoch+1}/{epochs}..| "
          f"Train loss: {running_loss/print_every:.3f}..| "
          f"Validation loss: {valid_loss/print_every:.3f}..| "                  
          f"Validation accuracy: {accuracy:.3f}|"
          )

Epoch: 10/10..| Train loss: 0.119..| Validation loss: 3.405..| Validation accuracy: 453.000|


In [13]:
predictions = pd.DataFrame({'image_id': image_id_list,
                            'y_map_label': labels_list,
                            'isup_label': isup_list,
                            'pred_y_dec': pred_y_dec_list,
                            'pred_y_int': pred_y_int_list})

map_to_isup = {'0':0,'1':1,'2':2,'3':3,'4':4,'5':4,'6':4,'7':5,'8':5,'9':5}
pred_y_isup = [map_to_isup[str(x)] for x in predictions['pred_y_int']]
predictions['pred_y_isup']=pred_y_isup
predictions.to_csv("sample_predictions.csv", sep=",", index=False)

predictions

Unnamed: 0,image_id,y_map_label,isup_label,pred_y_dec,pred_y_int,pred_y_isup
0,fdc20a833738385bedbdfdabe9721bde,3,3,3.294655,3,3
1,a1f37b944f9590270b529e9f83610ebb,8,5,3.286679,3,3
2,bbfe245b1afa3fe05e45f380b6cbac8d,1,1,1.678675,1,1
3,639dcd07a4de787c02b44b26a984d202,0,0,0.572002,0,0
4,37a8139578e30b6572bfd3d2145e6543,0,0,5.306932,5,4
...,...,...,...,...,...,...
1058,5754cf219e01bd9b8bae58fcdbfa9ce9,5,4,5.293926,5,4
1059,7a7df1b2eabf6a5b55cba56cd312fcc5,5,4,0.273865,0,0
1060,995df9b93db1e396b57692c6c7f1d673,0,0,0.347992,0,0
1061,fd08b2bce422f40a465664f9a5f9afdd,1,1,1.528394,1,1


In [14]:
equality = predictions[predictions['isup_label']==predictions['pred_y_isup']]
accuracy = len(equality) / len(predictions)
accuracy

0.4270931326434619