In [1]:
# ====================================================
# Library
# ====================================================

import sys

import gc
import os
import random
import time
from contextlib import contextmanager
from pathlib import Path
from collections import defaultdict, Counter

import skimage.io
import cv2
from PIL import Image
import numpy as np
import pandas as pd
import scipy as sp
import matplotlib.pyplot as plt
import gc

import sklearn.metrics
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold

from functools import partial
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.optim import Adam, SGD
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau
from torch.utils.data import DataLoader, Dataset

from albumentations import Compose, Normalize, HorizontalFlip, VerticalFlip
from albumentations.pytorch import ToTensorV2
from panda_common import TestDataset, CFG, CustomSEResNeXt, OptimizedRounder, PANDADataset, load_models, get_transforms, tile, get_tiles

import warnings 
warnings.filterwarnings('ignore')


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')



cuda


In [2]:
data_dir = '/kaggle/input/prostate-cancer-grade-assessment'
image_folder = os.path.join(data_dir, 'test_images')
is_test = os.path.exists(image_folder)  # IF test_images is not exists, we will use some train images.

#Loading data
if is_test:
    test = pd.read_csv('../input/prostate-cancer-grade-assessment/test.csv')
else: 
    train = pd.read_csv('../input/prostate-cancer-grade-assessment/train.csv')
sample = pd.read_csv('../input/prostate-cancer-grade-assessment/sample_submission.csv')

# **Inference - ResNext**

In [3]:
PATH_INFERENCE = '/kaggle/input/resnet-1epoch-baseline/'

def inference(model, samples, dir_name, device):
    model.to(device) 
    probs = []
    transform = get_transforms(data='valid')
    for index, row in samples.iterrows():
        file_name = row['image_id']
        file_path = f'../input/prostate-cancer-grade-assessment/{dir_name}/{file_name}.tiff'
        image = skimage.io.MultiImage(file_path)[0]
        image = tile(image, sz=128, N=16)
        #Tile concatenation
        image = cv2.hconcat([cv2.vconcat([image[0], image[1], image[2], image[3]]), 
                             cv2.vconcat([image[4], image[5], image[6], image[7]]), 
                             cv2.vconcat([image[8], image[9], image[10], image[11]]), 
                             cv2.vconcat([image[12], image[13], image[14], image[15]])])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        #Test augmentations
        if transform:
            augmented = transform(image=image)
            image = augmented['image']
        image = image.to(device)
        with torch.no_grad():
            image = image[None,:]
            y_preds = model(image)
        del image
        probs.append(y_preds.to('cpu').numpy())
        del y_preds
    probs = np.concatenate(probs)
    gc.collect()
    return probs

In [4]:
def submit(sample, coefficients, dir_name='test_images'):
    probs = np.zeros(len(sample.index))
    if os.path.exists(f'/kaggle/input/prostate-cancer-grade-assessment/{dir_name}'):
        probs = []
        for fold in tqdm(range(CFG.n_fold)):
            model = CustomSEResNeXt(model_name='se_resnext50_32x4d')
            weights_path = f'{PATH_INFERENCE}fold{fold}_se_resnext50.pth'
            #Load the pre-trained weights
            model.load_state_dict(torch.load(weights_path, map_location=device))
            _probs = inference(model, sample, dir_name, device)
            probs.append(_probs)
            del model
            gc.collect()
        probs = np.mean(probs, axis=0)
    return probs

In [5]:
# ResNeXt50, coefficients for a 10 epochs training on 100% of the data 
coefficients = [0.55088115, 1.45590427, 2.41572283, 3.43710686, 4.19867511]
dir_name = 'test_images' if is_test else 'train_images'
samples = sample if is_test else train.iloc[:3]

results = submit(samples, coefficients, dir_name)
if len(results.shape) > 1:
    results = [ r[0] for r in results ]

100%|██████████| 4/4 [01:10<00:00, 17.62s/it]


In [6]:
results_resnext = []
#Compute predictions according to coefficients
for i, pred in enumerate(results):
    if pred < coefficients[0]:
        results_resnext.append(pred / coefficients[0])
    elif pred >= coefficients[0] and pred < coefficients[1]:
        results_resnext.append(0.5 + (pred - coefficients[0]) / (coefficients[1] - coefficients[0]))
    elif pred >= coefficients[1] and pred < coefficients[2]:
        results_resnext.append(1.5 + (pred - coefficients[1]) / (coefficients[2] - coefficients[1]))
    elif pred >= coefficients[2] and pred < coefficients[3]:
        results_resnext.append(2.5 + (pred - coefficients[2]) / (coefficients[3] - coefficients[2]))
    elif pred >= coefficients[3] and pred < coefficients[4]:
        results_resnext.append(3.5 + (pred - coefficients[3]) / (coefficients[4] - coefficients[3]))
    else:
        results_resnext.append(4.5 + (pred - coefficients[4]) / (6 - coefficients[4]))
results_resnext = np.array(results_resnext)
print(results_resnext)

[2.46206443 2.30262192 2.56141395]


# **Inference - EfficientNet**

In [7]:
model_dir = '/kaggle/input/first-exp'
image_folder = image_folder if is_test else os.path.join(data_dir, 'train_images')

tile_size = 256
image_size = 256
n_tiles = 36
batch_size = 2
num_workers = 4

model_files = [
    '/kaggle/input/first-exp/effnet_b0_best_fold0.pth'
]

#Load EfficientNet model.
models = load_models(model_dir, model_files)

/kaggle/input/first-exp/effnet_b0_best_fold0.pth loaded!


In [8]:
def tt_augmentation(tiles):
    idxes = list(range(n_tiles))

    n_row_tiles = int(np.sqrt(n_tiles))
    images = np.zeros((image_size * n_row_tiles, image_size * n_row_tiles, 3))
    for h in range(n_row_tiles):
        for w in range(n_row_tiles):
            i = h * n_row_tiles + w

            if len(tiles) > idxes[i]:
                this_img = tiles[idxes[i]]['img']
            else:
                this_img = np.ones((image_size, image_size, 3)).astype(np.uint8) * 255
            this_img = 255 - this_img
            h1 = h * image_size
            w1 = w * image_size
            images[h1:h1+image_size, w1:w1+image_size] = this_img
    
    images = images.astype(np.float32)
    images /= 255
    images = images.transpose(2, 0, 1)
    
    return images

In [9]:
LOGITS = []
tile_mode = 0
with torch.no_grad():
    for index, row in samples.iterrows():
        file_name = row['image_id']
        tiff_file = os.path.join(image_folder, f'{file_name}.tiff')
        #Load image
        image = skimage.io.MultiImage(tiff_file)[-1]
        #Generate tiles
        tiles, OK = get_tiles(image, tile_mode)
        #Test time augmentation
        image = torch.tensor(tt_augmentation(tiles))
        image = image[None,:].to(device)
        logits = models[0](image)
        del image
        LOGITS.append(logits)

In [10]:
LOGITS = torch.cat(LOGITS).sigmoid().cpu()
PREDS = LOGITS.sum(1).round().numpy()

In [11]:
pred_effnet = LOGITS.sum(1)

#Average results of both models.
ensemble_preds = torch.round((pred_effnet + results_resnext) / 2).numpy().astype(int)
sample = pd.read_csv('../input/prostate-cancer-grade-assessment/sample_submission.csv')
sample['isup_grade'] = ensemble_preds
#Save submission file
sample[['image_id', 'isup_grade']].to_csv('submission.csv', index=False)
display(sample)

Unnamed: 0,image_id,isup_grade
0,005700be7e06878e6605e7a5a39de1b2,3
1,005c6e8877caf724c600fdce5d417d40,3
2,0104f76634ff89bfff1ef0804a95c380,3
