Before reading:

https://github.com/thanos-team/-Prostate_cANcer_graDe_Assessment-PANDA-Challenge/blob/master/Week1/Step%201.%20Viewing%20image%2C%20mask_image%20%26%20understanding%20EDA.ipynb

In [1]:
## for loading, processing data & using linear_algebra calculation
import numpy as np 
import pandas as pd 
import os

# Any results you write to the current directory are saved as output.
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset,DataLoader
from torchvision import transforms,models
from tqdm import tqdm_notebook as tqdm
import math
import torch.utils.model_zoo as model_zoo

## for processing_image
import cv2
import openslide
import skimage.io
import random
from sklearn.metrics import cohen_kappa_score
import albumentations

# General packages
from PIL import Image

## print out the names of the first 5 image_files (total = 10 images for train_imgaes & train_label_masks) with the train, test, submission.csv files
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames[:5]:
        print(os.path.join(dirname, filename))

/kaggle/input/panda-resnext/resnext50_2.pth
/kaggle/input/panda-resnext/resnext50_1.pth
/kaggle/input/prostate-cancer-grade-assessment/train.csv
/kaggle/input/prostate-cancer-grade-assessment/test.csv
/kaggle/input/prostate-cancer-grade-assessment/sample_submission.csv
/kaggle/input/prostate-cancer-grade-assessment/train_label_masks/4b223c776c0ddb0699d74f3334c1e5e6_mask.tiff
/kaggle/input/prostate-cancer-grade-assessment/train_label_masks/b481b56617ce096435cb591adc6dd0b9_mask.tiff
/kaggle/input/prostate-cancer-grade-assessment/train_label_masks/53dfff0cecd06c15f600d334e7324b78_mask.tiff
/kaggle/input/prostate-cancer-grade-assessment/train_label_masks/eb846d2f14749c4913bf0affa0578d8d_mask.tiff
/kaggle/input/prostate-cancer-grade-assessment/train_label_masks/f526a2ca5612d5913984a67edabaf991_mask.tiff
/kaggle/input/prostate-cancer-grade-assessment/train_images/0cc35bc0fe4dd912b20f72d66888fd49.tiff
/kaggle/input/prostate-cancer-grade-assessment/train_images/4f53892473ee239f9dc9d80047cb3627

#### Loading & viewing dataset

In [2]:
# Location of the training images
BASE_PATH = '../input/prostate-cancer-grade-assessment'
print(os.listdir(BASE_PATH))

# image and mask directories
data_dir = f'{BASE_PATH}/train_images'
mask_dir = f'{BASE_PATH}/train_label_masks'


# Location of training labels
train = pd.read_csv(f'{BASE_PATH}/train.csv')
test = pd.read_csv(f'{BASE_PATH}/test.csv')
submission = pd.read_csv(f'{BASE_PATH}/sample_submission.csv')

['train_label_masks', 'train.csv', 'train_images', 'test.csv', 'sample_submission.csv']


In [3]:
train.head(7)

Unnamed: 0,image_id,data_provider,isup_grade,gleason_score
0,0005f7aaab2800f6170c399693a96917,karolinska,0,0+0
1,000920ad0b612851f8e01bcc880d9b3d,karolinska,0,0+0
2,0018ae58b01bdadc8e347995b69f99aa,radboud,4,4+4
3,001c62abd11fa4b57bf7a6c603a11bb9,karolinska,4,4+4
4,001d865e65ef5d2579c190a0e0350d8f,karolinska,0,0+0
5,002a4db09dad406c85505a00fb6f6144,karolinska,0,0+0
6,003046e27c8ead3e3db155780dc5498e,karolinska,1,3+3


Noting that in the `test.csv` & `submission.csv` now have been truncated; it contains 3 `images_id` only.

In [4]:
test

Unnamed: 0,image_id,data_provider
0,005700be7e06878e6605e7a5a39de1b2,radboud
1,005c6e8877caf724c600fdce5d417d40,karolinska
2,0104f76634ff89bfff1ef0804a95c380,radboud


In [5]:
submission

Unnamed: 0,image_id,isup_grade
0,005700be7e06878e6605e7a5a39de1b2,0
1,005c6e8877caf724c600fdce5d417d40,0
2,0104f76634ff89bfff1ef0804a95c380,0


**Choose what device to run**

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

If we classification based on `gleason_score`; the `classes = 6`; otherwise will be 12

In [7]:
class config:
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    IMG_WIDTH = 256
    IMG_HEIGHT = 256
    TEST_BATCH_SIZE = 32
    CLASSES = 6

Initialize internal state from hashable object with `random.seed()`

In [8]:
def seed_torch(seed=42):
    random.seed(seed)            ## Initialize internal state from hashable object with random.seed()
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)         ## randomstate
    torch.manual_seed(seed)      ## generate a random_numbers
    torch.cuda.manual_seed(seed) ## generate a random_numbers for the current GPU 
                                 ## (safe to call this function if CUDA is not available; in that case, it is silently ignored)
    
    torch.backends.cudnn.deterministic = True  ## checking backends.cudnn

seed_torch(seed = 42)

#### Built-in the `ResNext` Model, 

Define `classess`, including:

            class SEModule(nn.Module)       
            class Bottleneck(nn.Module)
            class SEBottleneck(BottleNeck)
            class SEResNetBottleneck(BottleNeck)
            class SEResNextBottleneck(BottleNeck)
            class SENet(nn.Module)
            class CustomSEResNeXt(nn.Module)
            class PandaDataset(Dataset)
            
Define `functions`, including:

            def initialize_pretrained_model(model, num_classes, settings)
            def se_resnext50_32x4d(num_classes=1000, pretrained='imagenet')
            def se_resnext101_32x4d(num_classes=1000, pretrained='imagenet')

**Quick reminder 1**

`super(type, object)`: bound super object; requires isinstance(obj, type)

`nn.AdaptiveAvgPool2d(outdim)`: Applies a 2D adaptive average pooling over an input signal composed of several input planes.
      
            The output is of size H x W, for any input size.
            The number of output features is equal to the number of input planes.
    
 `nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros')`: Applies a `2D` convolution over an input signal composed of several input planes.
 
 `nn.RELU(inplace = False)`: Applies the rectified linear unit function element-wise
 
 $$ \text{ReLU}(x) = (x)^+ = \max(0, x) $$

In [9]:
from collections import OrderedDict
import math

class SEModule(nn.Module):

    def __init__(self, channels, reduction):
        super(SEModule, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1, padding=0)
        self.relu = nn.ReLU(inplace=True)
        self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1, padding=0)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        module_input = x
        x = self.avg_pool(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return module_input * x

In [10]:
class Bottleneck(nn.Module):
    """
    Base class for bottlenecks that implements `forward()` method.
    """
    def forward(self, x):
        residual = x

        out = self.conv1(x)     ## conv_olution 
        out = self.bn1(out)     ## bottle_neck
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out = self.se_module(out) + residual
        out = self.relu(out)

        return out

**Quick reminder 2 (for the rest classes: `SEBottleneck, SEResNetBottleNeck, ..., SENet`)**. They had used these following common packs:

`super, nn.Conv2d` is explained in the a few preceding lines

`nn.BatchNorm2d(num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)`: Applies `Batch Normalization` over a 4D input (a mini-batch of 2D inputs with additional channel dimension) as described in the paper `Batch Normalization:` `"Accelerating Deep Network Training by Reducing Internal Covariate Shift"`_ .

$$ y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta $$

`SEModule` (look back line `In [10]`, the `__init__` function contained the `params: channel & reductions`): This is a base class for all neural network modules. 

`nn.AvgPool2d(kernel_size, stride = None, padding = 0, ceil_mode = False, count_include_pad = True, divisor_override = None)`: Applies a 2D average pooling over an input signal composed of several input planes.

`nn.Dropout(p = 0.5, inplace = False)`:

`nn.Linear(in_features, out_features, bias=True)` (look down line `In [17] class CustomSEResNext`). This function applies a linear transformation to the incoming data: $y = xA^T + b$

In [12]:
class SEBottleneck(Bottleneck):
    """
    Bottleneck for SENet154.
    """
    expansion = 4
    def __init__(self, inplanes, planes, groups, reduction, stride = 1, downsample = None):

        super(SEBottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes * 2)
        
        self.conv2 = nn.Conv2d(planes * 2, planes * 4, kernel_size = 3,
                               stride=stride, padding = 1, groups = groups, bias=False)
        self.bn2 = nn.BatchNorm2d(planes * 4)
        
        self.conv3 = nn.Conv2d(planes * 4, planes * 4, kernel_size=1,
                               bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        
        self.relu = nn.ReLU(inplace=True)
        self.se_module = SEModule(planes * 4, reduction=reduction)
        self.downsample = downsample
        self.stride = stride

In [13]:
class SEResNetBottleneck(Bottleneck):
    """
    ResNet bottleneck with a Squeeze-and-Excitation module. It follows Caffe
    implementation and uses `stride=stride` in `conv1` and not in `conv2`
    (the latter is used in the torchvision implementation of ResNet).
    """
    expansion = 4

    def __init__(self, inplanes, planes, groups, reduction, stride = 1, downsample = None):
        super(SEResNetBottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size = 1, bias = False, stride = stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size = 3, padding = 1,
                               groups = groups, bias = False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size = 1, bias = False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace = True)
        self.se_module = SEModule(planes * 4, reduction=reduction)
        self.downsample = downsample
        self.stride = stride

In [14]:
class SEResNeXtBottleneck(Bottleneck):
    """
    ResNeXt bottleneck type C with a Squeeze-and-Excitation module.
    """
    expansion = 4

    def __init__(self, inplanes, planes, groups, reduction, stride = 1,
                 downsample = None, base_width = 4):
        super(SEResNeXtBottleneck, self).__init__()
        width = math.floor(planes * (base_width / 64)) * groups
        self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, bias=False, stride=1)
        self.bn1 = nn.BatchNorm2d(width)
        self.conv2 = nn.Conv2d(width, width, kernel_size = 3, stride = stride,
                               padding = 1, groups = groups, bias = False)
        self.bn2 = nn.BatchNorm2d(width)
        self.conv3 = nn.Conv2d(width, planes * 4, kernel_size = 1, bias = False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace = True)
        self.se_module = SEModule(planes * 4, reduction = reduction)
        self.downsample = downsample
        self.stride = stride

In [15]:
class SENet(nn.Module):
    def __init__(self, block, layers, groups, reduction, dropout_p = 0.2,
                 inplanes = 128, input_3x3 = True, downsample_kernel_size = 3,
                 downsample_padding = 1, num_classes = 1000):
        
        super(SENet, self).__init__()
        self.inplanes = inplanes
        if input_3x3:
            layer0_modules = [ ('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1, bias=False)),
                                ('bn1', nn.BatchNorm2d(64)),
                                ('relu1', nn.ReLU(inplace=True)),
                                ('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1,bias=False)),
                                ('bn2', nn.BatchNorm2d(64)),
                                ('relu2', nn.ReLU(inplace=True)),
                                ('conv3', nn.Conv2d(64, inplanes, 3, stride=1, padding=1, bias=False)),
                                ('bn3', nn.BatchNorm2d(inplanes)),
                                ('relu3', nn.ReLU(inplace=True)) ]
        else:
            layer0_modules = [ ('conv1', nn.Conv2d(3, inplanes, kernel_size = 7, stride = 2, 
                                                   padding = 3, bias = False)),
                              ('bn1', nn.BatchNorm2d(inplanes)), 
                              ('relu1', nn.ReLU(inplace = True)) ]
        # create the layers
        # To preserve compatibility with Caffe weights `ceil_mode=True` is used instead of `padding=1`.
        layer0_modules.append(('pool', nn.MaxPool2d(3, stride = 2, ceil_mode = True)))
        
        self.layer0 = nn.Sequential(OrderedDict(layer0_modules))
        
        self.layer1 = self._make_layer( block, planes = 64, blocks = layers[0],
                                        groups = groups, reduction = reduction,
                                       downsample_kernel_size = 1, 
                                       downsample_padding = 0 )
        
        self.layer2 = self._make_layer( block, planes = 128, blocks=layers[1], stride=2,
                                        groups = groups, reduction=reduction,
                                        downsample_kernel_size = downsample_kernel_size,
                                        downsample_padding = downsample_padding )

        self.layer3 = self._make_layer( block, planes = 256, blocks = layers[2], stride=2,
                                        groups = groups, reduction = reduction,
                                        downsample_kernel_size = downsample_kernel_size,
                                        downsample_padding = downsample_padding )
        
        self.layer4 = self._make_layer( block, planes = 512, blocks=layers[3], stride = 2,
                                        groups = groups, reduction=reduction,
                                        downsample_kernel_size = downsample_kernel_size,
                                        downsample_padding = downsample_padding )
        
        self.avg_pool = nn.AvgPool2d(7, stride = 1)
        self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None
        self.last_linear = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, planes, blocks, groups, reduction, stride=1,
                    downsample_kernel_size=1, downsample_padding=0):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential( nn.Conv2d( self.inplanes, planes * block.expansion,
                                                   kernel_size = downsample_kernel_size, stride = stride,
                                                   padding = downsample_padding, bias = False),
                                        nn.BatchNorm2d(planes * block.expansion))

        layers = []
        layers.append(block(self.inplanes, planes, groups, reduction, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes, groups, reduction))

        return nn.Sequential(*layers)

    def features(self, x):
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        return x

    def logits(self, x):
        x = self.avg_pool(x)
        if self.dropout is not None:
            x = self.dropout(x)
        x = x.view(x.size(0), -1)
        x = self.last_linear(x)
        return x

    def forward(self, x):
        x = self.features(x)
        x = self.logits(x)
        return x

In [16]:
def initialize_pretrained_model(model, num_classes, settings):
    assert num_classes == settings['num_classes'], \
        'num_classes should be {}, but is {}'.format(settings['num_classes'], num_classes)
    model.load_state_dict(model_zoo.load_url(settings['url']))
    model.input_space = settings['input_space']
    model.input_size = settings['input_size']
    model.input_range = settings['input_range']
    model.mean = settings['mean']
    model.std = settings['std']


def se_resnext50_32x4d(num_classes=1000, pretrained='imagenet'):
    model = SENet(SEResNeXtBottleneck, [3, 4, 6, 3], groups=32, reduction=16,
                  dropout_p=None, inplanes=64, input_3x3=False,
                  downsample_kernel_size=1, downsample_padding=0,
                  num_classes=num_classes)
    if pretrained is not None:
        settings = config.pretrained_settings['se_resnext50_32x4d'][pretrained]
        initialize_pretrained_model(model, num_classes, settings)
    return model


def se_resnext101_32x4d(num_classes=1000, pretrained='imagenet'):
    model = SENet(SEResNeXtBottleneck, [3, 4, 23, 3], groups=32, reduction=16,
                  dropout_p=None, inplanes=64, input_3x3=False,
                  downsample_kernel_size=1, downsample_padding=0,
                  num_classes=num_classes)
    if pretrained is not None:
        settings = config.pretrained_settings['se_resnext101_32x4d'][pretrained]
        initialize_pretrained_model(model, num_classes, settings)
    return model

In [17]:
class CustomSEResNeXt(nn.Module):

    def __init__(self, model_name='se_resnext50_32x4d'):
        assert model_name in ('se_resnext50_32x4d')
        super().__init__()
        
        self.model = se_resnext50_32x4d(pretrained=None)
        self.model.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.model.last_linear = nn.Linear(self.model.last_linear.in_features, config.CLASSES)
        
    def forward(self, x):
        x = self.model(x)
        return x

In [18]:
class PandaDataset(Dataset):
    def __init__(self, images, img_height, img_width):
        self.images = images
        self.img_height = img_height
        self.img_width = img_width
        
        # we are in validation part
        self.aug = albumentations.Compose([
            albumentations.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], always_apply=True)
        ])

    def __len__(self):
        return len(self.images)


    def __getitem__(self, idx):

        img_name = self.images[idx]
        img_path = os.path.join(data_dir, f'{img_name}.tiff')

        img = skimage.io.MultiImage(img_path)
        img = cv2.resize(img[-1], (512, 512))
        save_path =  f'{img_name}.png'
        cv2.imwrite(save_path, img)
        img = skimage.io.MultiImage(save_path)
            
        img = cv2.resize(img[-1], (self.img_height, self.img_width))

        img = Image.fromarray(img).convert("RGB")
        img = self.aug(image=np.array(img))["image"]
        img = np.transpose(img, (2, 0, 1)).astype(np.float32)

        return { 'image': torch.tensor(img, dtype=torch.float) }

Loading a model **`resnext50`**

In [19]:
model = CustomSEResNeXt(model_name='se_resnext50_32x4d')
print(type(model))

<class '__main__.CustomSEResNeXt'>


In [20]:
weights_path = '../input/panda-resnext/resnext50_2.pth'
model.load_state_dict(torch.load(weights_path, map_location=config.device))
model

CustomSEResNeXt(
  (model): SENet(
    (layer0): Sequential(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu1): ReLU(inplace=True)
      (pool): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    )
    (layer1): Sequential(
      (0): SEResNeXtBottleneck(
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (re

**Predict & evaluate**

In [21]:
model.eval()
predictions = []

device = config.device

if os.path.exists('../input/prostate-cancer-grade-assessment/test_images'):
    test_dataset = PandaDataset( images = test.image_id.values,
                                img_height = config.IMG_HEIGHT, 
                                img_width = config.IMG_WIDTH)

    test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size = config.TEST_BATCH_SIZE, 
                                                   shuffle = False)
    
    model.to(device)
    
    for idx, d in tqdm(enumerate(test_data_loader), total = len(test_data_loader)):
        inputs = d["image"]
        inputs = inputs.to(device)
        with torch.no_grad():
            outputs = model(inputs)
            print(inputs, outputs)
        predictions.append(outputs.argmax(1).cpu().detach().numpy())
    predictions = np.concatenate(predictions)
print(predictions)

[]


#### Create a submission file

Noting that if the `test.csv` only contains 3 image_id, then the `predictions = []` so its `len = 0`; hence we reuse the result in `isup_grade` from `submission.csv`;

In [22]:
if len(predictions) > 0:
    submission.isup_grade = predictions
    submission.isup_grade = submission['isup_grade'].astype(int)
    
submission.to_csv('submission.csv',index=False)
submission

Unnamed: 0,image_id,isup_grade
0,005700be7e06878e6605e7a5a39de1b2,0
1,005c6e8877caf724c600fdce5d417d40,0
2,0104f76634ff89bfff1ef0804a95c380,0
