In [1]:
from __future__ import print_function, division, absolute_import

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from collections import OrderedDict

# import data visualization
import matplotlib.pyplot as plt
import seaborn as sns

# import image utils
from PIL import Image
import cv2

# import image processing
import scipy.ndimage as ndi
import scipy

# import image utilities
from skimage.morphology import binary_opening, disk, label, binary_closing

# import image augmentation
from albumentations import (
    HorizontalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, RandomBrightnessContrast, IAAPiecewiseAffine,
    IAASharpen, IAAEmboss, Flip, OneOf, Compose, PadIfNeeded, RandomContrast, RandomGamma, RandomBrightness, ElasticTransform
)

# Import PyTorch
import torch
from torch import nn
from torch import optim
from torch.optim import Optimizer
from torch.optim import lr_scheduler
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms.functional as TF
from torch.utils.data.sampler import SubsetRandomSampler, Sampler
from torch.autograd import Variable
import torch.utils.model_zoo as model_zoo
from torch.nn import init
from torch.optim import lr_scheduler
import torchvision.models as M
from functools import partial

import math
import os

import time

from tqdm import tqdm_notebook

In [2]:
import albumentations as albu 
from albumentations.pytorch.transforms import ToTensor
from tqdm import tqdm

In [3]:
transform_val = albu.load('../input/modelnew1/valid_transforms_1024.json') 

In [4]:
transform_val

Compose([
  Resize(always_apply=True, p=1, height=1024, width=1024, interpolation=1),
  Normalize(always_apply=False, p=1.0, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0),
], p=1.0, bbox_params={}, keypoint_params={}, additional_targets={})

In [5]:
from sklearn.metrics import cohen_kappa_score
def quadratic_kappa(y_hat, y):
    return torch.tensor(cohen_kappa_score(torch.argmax(y_hat,1), y, weights='quadratic'),device='cuda:0')

In [6]:
class AvgPool(nn.Module):
    def forward(self, x):
        return torch.nn.functional.avg_pool2d(x, (x.size(2), x.size(3)))

In [7]:
num_classes = 5
model = models.densenet169(pretrained=False)
model.classifier = nn.Sequential(nn.Linear(1664, num_classes),
                                 #nn.ReLU(),
                                 #nn.Dropout(0.9),
                                 #nn.Linear(512, 14),
                                 nn.LogSoftmax(dim=1))

In [8]:
device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")

In [9]:
class PneumothoraxDataset(Dataset):
    def __init__(self, data_folder, mode, transform=None,
                 fold_index=None, folds_distr_path=None, pseudolabeling_path=None):
        
        self.transform = transform
        self.mode = mode
        
        # change to your path
        self.train_image_path = '{}train/'.format(data_folder)
        #self.train_mask_path = '{}/mask/'.format(data_folder)
        self.test_image_path = '{}'.format(data_folder)
        self.pseudolabeling_path = pseudolabeling_path
        
        self.fold_index = None
        self.folds_distr_path = folds_distr_path
        self.set_mode(mode, fold_index)
        self.to_tensor = ToTensor()

    def set_mode(self, mode, fold_index):
        self.mode = mode
        self.fold_index = fold_index

        if self.mode == 'train':
            folds = pd.read_csv(self.folds_distr_path)
            folds = folds[folds.fold != fold_index]
            self.train_list = folds.id_code.values.tolist()
            self.target_list = folds.diagnosis.values.tolist()
            self.target = folds['diagnosis'].values.astype(np.int64)
            self.pseudo_list = []
            if self.pseudolabeling_path is not None:
                print('Load pseudo from ', self.pseudolabeling_path)
                self.pseudo_list = sorted(os.listdir(self.pseudolabeling_path))
            self.ntrain = len(self.train_list)
            self.num_data = len(self.train_list) + len(self.pseudo_list)

        elif self.mode == 'val':
            folds = pd.read_csv(self.folds_distr_path)
            folds = folds[folds.fold == fold_index]
            self.val_list = folds.id_code.values.tolist()
            self.target_list = folds.diagnosis.values.tolist()
            self.target = folds['diagnosis'].values.astype(np.int64)
            self.num_data = len(self.val_list)

        elif self.mode == 'test':
            self.test_list = sorted(os.listdir(self.test_image_path))
            self.num_data = len(self.test_list)

    def __getitem__(self, index):
        if self.fold_index is None and self.mode != 'test':
            print('WRONG!!!!!!! fold index is NONE!!!!!!!!!!!!!!!!!')
            return
        
        if self.mode == 'test':
            image = cv2.imread(os.path.join(self.test_image_path, self.test_list[index]), 1)
            if self.transform:
                sample = {"image": image}
                sample = self.transform(**sample)
                sample = self.to_tensor(**sample)
                image = sample['image']
            image_id = self.test_list[index].replace('.png', '')
            return image_id, image
        
        elif self.mode == 'train':
            if index < self.ntrain:
                #print(os.path.join(self.train_image_path, self.train_list[index]))
                #print(self.train_list[index])
                image = cv2.imread(os.path.join(self.train_image_path, self.train_list[index])+'.png', 1)
                label = self.target[index]#self.target_list[index]
                #label = cv2.imread(os.path.join(self.train_mask_path, self.train_list[index]), 0)
            else:
                #print('!')
                pseudo_index = index - self.ntrain
                image = cv2.imread(os.path.join(self.test_image_path, self.pseudo_list[pseudo_index])+'.png', 1)
                label = cv2.imread(os.path.join(self.pseudolabeling_path, self.pseudo_list[pseudo_index]), 0)                

        elif self.mode == 'val':
            #print('1')  
            image = cv2.imread(os.path.join(self.train_image_path, self.val_list[index])+'.png', 1)
            label = self.target[index]
            #label = cv2.imread(os.path.join(self.train_mask_path, self.val_list[index]), 0)

        if self.transform:
            sample = {"image": image}#, "mask": label}
            sample = self.transform(**sample)
            sample = self.to_tensor(**sample)
            #image, label = sample['image']#, sample['mask']
            image = sample['image']#, sample['mask']

        return image, label
         
    def __len__(self):
        return self.num_data

In [10]:
checkpoints_list = ['../input/modelnew1/densenet169_fold1.pth']

In [11]:
batch_size = 1
num_workers = 16

In [12]:
TRAIN_PATH = '.../input/aptos2019-blindness-detection/train_images'
TEST_PATH = '../input/aptos2019-blindness-detection/test_images'

train =  pd.read_csv("../input/aptos2019-blindness-detection/train.csv")
test =  pd.read_csv("../input/aptos2019-blindness-detection/test.csv")

In [13]:
dataset = PneumothoraxDataset(
    data_folder=TEST_PATH, mode='test', 
    transform=transform_val,
)
dataloader =  DataLoader(
    dataset=dataset, batch_size=batch_size, 
    num_workers=num_workers, shuffle=False
)

In [14]:
model0 = model
#model1 = model
#model2 = model
#model3 = model
#model4 = model

In [15]:
for idx, fold in enumerate(checkpoints_list):
    #print(idx, fold)
    globals()['fold%s' % idx] = fold

In [16]:
model0.load_state_dict(torch.load(fold0))
#model1.load_state_dict(torch.load(fold1))
#model2.load_state_dict(torch.load(fold2))
#model3.load_state_dict(torch.load(fold3))
#model4.load_state_dict(torch.load(fold4))

<All keys matched successfully>

In [17]:
model0.to(device)
#model1.to(device)
#model2.to(device)
#model3.to(device)
#model4.to(device);

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu

In [18]:
#submission0 = {'id_code': [], 'diagnosis0': [], 'diagnosis1': [], 'diagnosis2': [], 'diagnosis3': [], 'diagnosis4': []}
submission0 = pd.DataFrame(columns=['id_code','diagnosis'])

In [19]:
model0.eval()
torch.cuda.empty_cache()
tqdm_loader = tqdm_notebook(dataloader)
for batch_idx, (id_codes, X) in enumerate(tqdm_loader):
    X = Variable(X).cuda()
    out0 = model0(X)
    output0 = torch.argmax(out0, axis = 1)
    submission0 = submission0.append({'id_code': id_codes[0], 'diagnosis': output0.item()}, ignore_index=True)
   # submission0['id_code'].append(id_codes)
   # submission0['diagnosis0'].append(output0.detach().cpu().numpy()[0][0])
   # submission0['diagnosis1'].append(output0.detach().cpu().numpy()[0][1])
   # submission0['diagnosis2'].append(output0.detach().cpu().numpy()[0][2])
   # submission0['diagnosis3'].append(output0.detach().cpu().numpy()[0][3])
   # submission0['diagnosis4'].append(output0.detach().cpu().numpy()[0][4])

HBox(children=(IntProgress(value=0, max=1928), HTML(value='')))




In [20]:
submission0.head(10)

Unnamed: 0,id_code,diagnosis
0,0005cfc8afb6,2
1,003f0afdcd15,3
2,006efc72b638,3
3,00836aaacf06,2
4,009245722fa4,3
5,009c019a7309,2
6,010d915e229a,3
7,0111b949947e,1
8,01499815e469,3
9,0167076e7089,0


In [21]:
submission0.diagnosis.value_counts()

2    756
3    455
0    325
1    225
4    167
Name: diagnosis, dtype: int64

In [22]:
submission0.to_csv('submission.csv', index=False)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
