# Importing modules

In [1]:
!pip install efficientnet_pytorch torchtoolbox

Collecting efficientnet_pytorch
  Downloading efficientnet_pytorch-0.6.3.tar.gz (16 kB)
Collecting torchtoolbox
  Downloading torchtoolbox-0.1.5-py3-none-any.whl (58 kB)
[K     |████████████████████████████████| 58 kB 2.2 MB/s 
Collecting lmdb
  Downloading lmdb-0.99.tar.gz (995 kB)
[K     |████████████████████████████████| 995 kB 8.7 MB/s 
Building wheels for collected packages: efficientnet-pytorch, lmdb
  Building wheel for efficientnet-pytorch (setup.py) ... [?25l- \ done
[?25h  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.6.3-py3-none-any.whl size=12419 sha256=8ed6ac9a037966a91a0391e1c92f4ba57d11353adf03279ffdd691f91aa73097
  Stored in directory: /root/.cache/pip/wheels/90/6b/0c/f0ad36d00310e65390b0d4c9218ae6250ac579c92540c9097a
  Building wheel for lmdb (setup.py) ... [?25l- \ | / - done
[?25h  Created wheel for lmdb: filename=lmdb-0.99-cp37-cp37m-linux_x86_64.whl size=275502 sha256=35a6933566525685a793bf1a6ee807b64e4db5

In [2]:
# Imports here
from efficientnet_pytorch import EfficientNet
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import seaborn as sns
import csv
import pandas as pd
import os
import random
import math
import skimage.io
import sklearn
#from csv_loader import load_csv

# Tiff visualisation imports and downloads
import numpy as np
import tifffile as tiff

# For re-importing python modules
import importlib
#importlib.reload(csv_loader.py)

#for quadratic score calculator
from sklearn.metrics import cohen_kappa_score

#for k-folds
from sklearn import model_selection
from sklearn.model_selection import train_test_split

# Creating data_loader class

In [3]:
class load_csv(Dataset):
    def __init__(self, csv_file, root_dir, transform=True):
        self.annotations = pd.read_csv(csv_file)# todo remove sample for debug
        self.root_dir = root_dir
        self.transform = transform
    
    def __len__(self):
        return len(self.annotations)
        
    
    def __getitem__(self, index):
        #img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 0])
        image_id = self.annotations.iloc[index, 0]
        img_path = os.path.join(self.root_dir, str(image_id) +".png")
        image = torch.from_numpy(skimage.io.imread(img_path)).permute(2,0,1).float()
        
        #Image.MAX_IMAGE_PIXELS = None
                
        #image.transform = transforms.RandomResizedCrop(224)
        
        y_label = torch.tensor(int(self.annotations.iloc[index,:]['isup_grade']))
        #isup_grade = int(self.annotations.iloc[index,:]['isup_grade'])
        
        #label = np.zeros(6).astype(np.float32)
        #y_label = label[isup_grade] = 1.
        #y_label = torch.tensor(y_label)
        
        self.transform= transforms.Compose([transforms.ToPILImage(),
                                            transforms.ToTensor(),
                                            transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])])
        if self.transform:
            image = self.transform(image)
        
        return image, y_label, image_id

In [4]:
# path inputs
train_fold_path = 'train_fold_'
test_fold_path = 'test_fold_'
img_dir = '../input/prostate-cancer-tiles-4x4x128px-downsampling-4x/train_128x4x4_res1/train_128x4x4_res1'

# Adding folds into dataset
file_info = pd.read_csv('../input/prostate-cancer-grade-assessment/train.csv').copy()
num_folds = 5

stratified_kfold = model_selection.StratifiedKFold(n_splits=num_folds)

for fold_id, (_, rows) in enumerate(stratified_kfold.split(X=file_info, y=file_info.isup_grade.values)):
    file_info.loc[rows, 'fold'] = int(fold_id)
 
file_info.to_csv("train_list.csv", sep=",", index=False)
file_info

Unnamed: 0,image_id,data_provider,isup_grade,gleason_score,fold
0,0005f7aaab2800f6170c399693a96917,karolinska,0,0+0,0.0
1,000920ad0b612851f8e01bcc880d9b3d,karolinska,0,0+0,0.0
2,0018ae58b01bdadc8e347995b69f99aa,radboud,4,4+4,0.0
3,001c62abd11fa4b57bf7a6c603a11bb9,karolinska,4,4+4,0.0
4,001d865e65ef5d2579c190a0e0350d8f,karolinska,0,0+0,0.0
...,...,...,...,...,...
10611,ffd2841373b39792ab0c84cccd066e31,radboud,0,negative,4.0
10612,ffdc59cd580a1468eac0e6a32dd1ff2d,radboud,5,4+5,4.0
10613,ffe06afd66a93258f8fabdef6044e181,radboud,0,negative,4.0
10614,ffe236a25d4cbed59438220799920749,radboud,2,3+4,4.0


In [5]:
# Inputs for df that will feed training model
sample_size = 10616
df = file_info.sample(sample_size)

train_folds = []
test_folds = []


# Creating excel files that will upload the correct portions of sample dataset to respect fold sequencings/orders
for i in range(num_folds):
    # grab non_fold entries
    train_set = df[df.fold==i]
    
    # define valid size and dataframes according to non-fold sample df length
    test_set = df[df.fold!=i]
    
    train_set.to_csv('train_fold_' + str(i) + ".csv", sep=",", index=False)
    test_set.to_csv('test_fold_' + str(i) + ".csv", sep=",", index=False)

# Building model, training, validating, and k-fold

In [6]:
# Uploading model
model = EfficientNet.from_pretrained('efficientnet-b4', num_classes=6)
model._fc = nn.Sequential(nn.Linear(model._fc.in_features, 216),
                          nn.ReLU(),
                          nn.Linear(216, 36, bias=True),
                          nn.ReLU(),
                          nn.Linear(36, 6, bias=True),
                          nn.LogSoftmax(dim=1))

# Criterion and optimzer
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.set_default_tensor_type(torch.cuda.FloatTensor)

if torch.cuda.is_available():
    model = model.cuda()


Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth" to /root/.cache/torch/checkpoints/efficientnet-b4-6ed6700e.pth


HBox(children=(FloatProgress(value=0.0, max=77999237.0), HTML(value='')))


Loaded pretrained weights for efficientnet-b4


In [7]:
# Build validation and accuracy function
def validate_data_function(model, test_loader, criterion):
    test_loss = 0
    accuracy = 0
    
    for ii, (inputs, labels, image_id) in enumerate(test_loader):
        
        inputs, labels = inputs.to(device), labels.to(device)
        
        output = model.forward(inputs)
        test_loss += criterion(output,labels.long())#.item()
        
        #ps = torch.exp(output)
        #equality = (labels.argmax(dim=1) == output.argmax(dim=1))
        equality = (labels == output.argmax(dim=1))
        accuracy += equality.type(torch.FloatTensor).mean()
        #pred = output.cpu().data.numpy().argmax()
        #qwk = cohen_kappa_score(pred, labels, weights='quadratic')
    
    return test_loss, accuracy, image_id

In [8]:
# Build standalone training model
def train_model(epochs, model, train_loader, device, optimizer, criterion, print_every, valid_loader, fold):
    steps = 0
    test_loss = 0
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0
        
        for ii, (inputs, labels, image_id) in enumerate(train_loader):
            steps += 1
        
            inputs, labels = inputs.to(device), labels.to(device)
        
            optimizer.zero_grad()
        
       
            outputs = model.forward(inputs)
            loss = criterion(outputs, labels.long())
            loss.backward()
            optimizer.step()
        
            running_loss += loss.item()
        
            if steps % print_every == 0:
                model.eval()

                with torch.no_grad():
                    valid_loss, accuracy, image_id = validate_data_function(model, test_loader, criterion)
            
                print(f"Fold: {fold}..| "
                      f"Epoch: {epoch+1}/{epochs}..| "
                      f"Train loss: {running_loss/print_every:.3f}..| "
                      f"Validation loss: {valid_loss/print_every:.3f}..| "                  
                      f"Validation accuracy: {accuracy/len(valid_loader):.3f}|"
                     )
            
                running_loss = 0
                model.train()
    
        # saving model to new paths corresponding to folds
        model_path_prefix = 'model_fold_' 
        path = model_path_prefix + str(fold) + '.pth'
        torch.save({
            'classifier_state_dict': model._fc.state_dict(),
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            'fold':fold
            }, path)
    
    
        model.cuda() # moving model to GPU for further training

# Performing k-folds training

In [9]:
num_folds = num_folds
epochs = 5
print_every = 200

for i in range(num_folds):
    # loading datasets in k_fold fashion pd.read_csv('./test_fold_3.csv')
    train_set = load_csv(csv_file=train_fold_path + str(i) + '.csv', root_dir=img_dir)
    test_set = load_csv(csv_file=test_fold_path + str(i) + '.csv', root_dir=img_dir)
     
    # creating data loaders
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=5, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_set, batch_size=5, shuffle=True)
    
    
    #train models across all folds
    model.eval()
    train_model(epochs, model, train_loader, device, optimizer, criterion, print_every, test_loader, fold=i)   

Fold: 0..| Epoch: 1/5..| Train loss: 1.669..| Validation loss: 24.570..| Validation accuracy: 0.313|
Fold: 0..| Epoch: 1/5..| Train loss: 1.642..| Validation loss: 15.030..| Validation accuracy: 0.339|
Fold: 0..| Epoch: 2/5..| Train loss: 1.338..| Validation loss: 13.051..| Validation accuracy: 0.366|
Fold: 0..| Epoch: 2/5..| Train loss: 1.566..| Validation loss: 13.642..| Validation accuracy: 0.365|
Fold: 0..| Epoch: 3/5..| Train loss: 1.122..| Validation loss: 17.919..| Validation accuracy: 0.393|
Fold: 0..| Epoch: 3/5..| Train loss: 1.510..| Validation loss: 23.572..| Validation accuracy: 0.265|
Fold: 0..| Epoch: 4/5..| Train loss: 0.894..| Validation loss: 12.172..| Validation accuracy: 0.438|
Fold: 0..| Epoch: 4/5..| Train loss: 1.409..| Validation loss: 248.571..| Validation accuracy: 0.357|
Fold: 0..| Epoch: 5/5..| Train loss: 0.672..| Validation loss: 15.264..| Validation accuracy: 0.376|
Fold: 0..| Epoch: 5/5..| Train loss: 1.374..| Validation loss: 15.476..| Validation accura

In [10]:
k_models_average_pred(num_folds, model)

NameError: name 'k_models_average_pred' is not defined

In [11]:
model.eval()
    
image_id_excel = []
pred_y_excel = []


for ii2, (inputs2, labels2, image_id2) in enumerate(entire_set_loader):
        
        inputs2, labels2 = inputs2.to(device), labels2.to(device)
        
        output2 = model.forward(inputs2)
        pred_y2 = output2.argmax(dim=1)
        image_id2 = str(image_id2[0])
        #image_id2.replace("('","")
        #image_id2.replace("',)/","")    
        image_id_excel.append(str(image_id2))
        pred_y_excel.append(int(pred_y2))

submission = pd.DataFrame({'image_id':image_id_excel, 'isup_grade':pred_y_excel})


NameError: name 'entire_set_loader' is not defined

In [12]:
submission.to_csv("submission.csv", sep=",", index=False)

NameError: name 'submission' is not defined

In [13]:
submission_view = pd.read_csv('submission.csv').copy()
submission_view

FileNotFoundError: [Errno 2] No such file or directory: 'submission.csv'