In [1]:
!pip install efficientnet_pytorch torchtoolbox
!pip install git+https://github.com/ildoonet/pytorch-gradual-warmup-lr.git

Collecting efficientnet_pytorch
  Downloading efficientnet_pytorch-0.7.0.tar.gz (20 kB)
Collecting torchtoolbox
  Downloading torchtoolbox-0.1.5-py3-none-any.whl (58 kB)
[K     |████████████████████████████████| 58 kB 2.2 MB/s 
Collecting lmdb
  Downloading lmdb-1.0.0.tar.gz (876 kB)
[K     |████████████████████████████████| 876 kB 8.4 MB/s 
Building wheels for collected packages: efficientnet-pytorch, lmdb
  Building wheel for efficientnet-pytorch (setup.py) ... [?25l- \ done
[?25h  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.7.0-py3-none-any.whl size=16035 sha256=53b7fdefbae6649a98828ef68c3871865815c39f68990337e70e79efc9f4d834
  Stored in directory: /root/.cache/pip/wheels/b7/cc/0d/41d384b0071c6f46e542aded5f8571700ace4f1eb3f1591c29
  Building wheel for lmdb (setup.py) ... [?25l- \ | / - done
[?25h  Created wheel for lmdb: filename=lmdb-1.0.0-cp37-cp37m-linux_x86_64.whl size=276752 sha256=f06312ae6e853bb6949795d2864712019fd3

In [2]:
# Imports here
from efficientnet_pytorch import EfficientNet
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import seaborn as sns
import csv
import pandas as pd
import os
import random
import math
import skimage.io
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SubsetRandomSampler, RandomSampler, SequentialSampler
from warmup_scheduler import GradualWarmupScheduler
import time

# Tiff visualisation imports and downloads
import numpy as np
import tifffile as tiff

# For re-importing python modules
import importlib
#importlib.reload(csv_loader.py)

#for quadratic score calculator
from sklearn.metrics import cohen_kappa_score


In [3]:
#use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.set_default_tensor_type(torch.cuda.FloatTensor)

In [4]:
# Creating ability to control how many pictures go into the training sample. For debugging / training purposes
sample_size = 10616
df = pd.read_csv('../input/prostate-cancer-grade-assessment/train.csv').copy().sample(sample_size)
df.to_csv("sample.csv", sep=",", index=False)

In [5]:
class load_csv(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.annotations = pd.read_csv(csv_file)# todo remove sample for debug
        self.root_dir = root_dir
        self.transform = transform
    
    def __len__(self):
        return len(self.annotations)
        
    
    def __getitem__(self, index):
        image_id = self.annotations.iloc[index, 0]
        img_path = os.path.join(self.root_dir, str(image_id) +".png")
        image = torch.from_numpy(skimage.io.imread(img_path)).permute(2,0,1).float()
        
        #y_label = torch.tensor(int(self.annotations.iloc[index,:]['isup_grade']))
        isup_grade = int(self.annotations.iloc[index,:]['isup_grade'])
        
        label = np.zeros(5).astype(np.float32)
        label[:isup_grade] = 1.
        
        
        self.transform= transforms.Compose([transforms.ToPILImage(),
                                            transforms.ToTensor()])
                                            
        if self.transform:
            image = self.transform(image)
        
        return (image, torch.tensor(label), image_id)

In [6]:
# Loading csv dataset into the dataset loader function load_csv. 
dataset = load_csv(csv_file='sample.csv', root_dir='../input/prostate-cancer-tiles-4x4x128px-downsampling-4x/train_128x4x4_res1/train_128x4x4_res1')

# Creating sample subsets for validation and testing datasets
sample_size = dataset.annotations.shape[0]
train_ratio = .75
valid_ratio = .15
test_ratio = 1-(train_ratio + valid_ratio)
train_size = int(train_ratio*sample_size)
valid_size = int(valid_ratio*sample_size)
test_size = sample_size - train_size - valid_size

# Defining different datasets and respective dataloaders
train_set, valid_set, test_set = torch.utils.data.random_split(dataset, [train_size, valid_size, test_size])

train_loader = torch.utils.data.DataLoader(train_set, batch_size=30, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=30, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=30, shuffle=False)
entire_set_loader = torch.utils.data.DataLoader(dataset, batch_size=30, shuffle=False)

In [7]:

# Creating model and uploading/creating needed training components
model = EfficientNet.from_pretrained('efficientnet-b0', num_classes=5)
model._fc = model._fc = nn.Sequential(nn.Linear(model._fc.in_features, 216),
                          nn.ReLU(),
                          nn.Linear(216, 36, bias=True),
                          nn.ReLU(),
                          nn.Linear(36, 5, bias=True))


if torch.cuda.is_available():
    model = model.cuda()

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth" to /root/.cache/torch/checkpoints/efficientnet-b0-355c32eb.pth


HBox(children=(FloatProgress(value=0.0, max=21388428.0), HTML(value='')))


Loaded pretrained weights for efficientnet-b0


In [8]:
init_lr = 3e-4
warmup_factor = 10

warmup_epo = 1
n_epochs = 30

optimizer = optim.Adam(model.parameters(), lr=init_lr/warmup_factor)
scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, n_epochs-warmup_epo)
scheduler = GradualWarmupScheduler(optimizer, multiplier=warmup_factor, total_epoch=warmup_epo, after_scheduler=scheduler_cosine)

criterion = nn.BCEWithLogitsLoss()

In [9]:
def validate_data_function(model, test_loader, criterion):
    test_loss = 0
    accuracy = 0
    output_list = []
    preds_list = []
    dec_list = []
    target_list = []
    loss_list = []
    
    with torch.no_grad():
        
        for ii, (inputs, labels, image_id) in enumerate(test_loader):
        
            inputs, labels = inputs.to(device), labels.to(device)
            output = model.forward(inputs)
            
            loss = criterion(output,labels)
               
            dec = output.sigmoid().sum(1).detach()
        
            output_list.append(output)
            preds_list.append(dec.round())
            target_list.append(labels.sum(1))
            dec_list.append(dec)
        
            loss_np = loss.detach().cpu().numpy()
            loss_list.append(loss_np)        
        test_loss = np.mean(loss_list)
        
        preds_list = torch.cat(preds_list).cpu().numpy()
        target_list = torch.cat(target_list).cpu().numpy()
        accuracy = np.mean(preds_list == target_list) * 100.
        
        #pred = output.cpu().data.numpy().argmax()
        #qwk = cohen_kappa_score(pred, labels, weights='quadratic')
    
    return test_loss, accuracy, image_id, preds_list, target_list

In [10]:
# Training parameters and t=0 inputs
print_every = 200
steps = 0
test_loss = 0
output_list = []
label_list = []

# May the training begin!
for epoch in range(1, n_epochs+1):
    model.train()
    running_loss = 0
    
    scheduler.step(epoch-1)
    
    for ii, (inputs, labels, image_id) in enumerate(train_loader):
        steps += 1
        start = time.time()
        
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
       
        outputs = model.forward(inputs)#.forward(inputs)
        loss = criterion(outputs, labels)#.long())
        loss.backward()
        optimizer.step()
    
        
        running_loss += loss.item()
        output_list.append(outputs.sum(1).round())
        label_list.append(labels.sum(1))
        
        if steps % print_every == 0:
            model.eval()
            
            train_accuracy=0.0
            
            with torch.no_grad():
                valid_loss, accuracy, image_id,_,_ = validate_data_function(model, valid_loader, criterion)
                #train_accuracy = np.mean(output_list == label_list)
                #train_accuracy = equality.type(torch.FloatTensor)/steps * 100.
                end = time.time()
                train_time = end - start
                
            epoch_lr = optimizer.param_groups[0]["lr"]
            print(f"Epoch: {epoch}/{n_epochs}..| "
                  f"lr: {epoch_lr:.6f}..|"
                  f"Train loss: {running_loss/print_every:.3f}..| "
                  #f"Train accuracy: {train_accuracy:.3f}..| "
                  f"Validation loss: {valid_loss/print_every:.3f}..| "                  
                  f"Validation accuracy: {accuracy:.3f}|"
                  f"Epoch train time (m): {train_time/60: .3f}..|"
                 )
            
            
            
            running_loss = 0
            model.train()
    
    path = 'base_model_w_logits.pth'
    torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'classifier_state_dict': model._fc.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss
            }, path)
    
    
    model.cuda() # moving model to GPU for further training



Epoch: 1/30..| lr: 0.000030..|Train loss: 0.575..| Validation loss: 0.003..| Validation accuracy: 23.367|Epoch train time (m):  1.210..|
Epoch: 2/30..| lr: 0.000300..|Train loss: 0.257..| Validation loss: 0.002..| Validation accuracy: 26.947|Epoch train time (m):  0.852..|
Epoch: 3/30..| lr: 0.000030..|Train loss: 0.101..| Validation loss: 0.001..| Validation accuracy: 47.425|Epoch train time (m):  0.847..|
Epoch: 4/30..| lr: 0.000296..|Train loss: 0.002..| Validation loss: 0.001..| Validation accuracy: 47.927|Epoch train time (m):  0.845..|
Epoch: 4/30..| lr: 0.000296..|Train loss: 0.273..| Validation loss: 0.002..| Validation accuracy: 46.859|Epoch train time (m):  0.867..|
Epoch: 5/30..| lr: 0.000292..|Train loss: 0.152..| Validation loss: 0.002..| Validation accuracy: 44.661|Epoch train time (m):  0.852..|
Epoch: 6/30..| lr: 0.000286..|Train loss: 0.063..| Validation loss: 0.002..| Validation accuracy: 54.648|Epoch train time (m):  0.859..|
Epoch: 7/30..| lr: 0.000279..|Train loss:

In [11]:
model.eval()
    
with torch.no_grad():
    test_loss, accuracy, image_id, _, _ = validate_data_function(model, test_loader, criterion)
                
print("Test Accuracy: {}%".format(accuracy))

Test Accuracy: 59.227871939736346%
