In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
import sys
package_dir = "../input/pretrained-models.pytorch-master/"
sys.path.insert(0, package_dir)
import pretrainedmodels
package_dir = '../input/early-stopping-pytorch'
sys.path.append(package_dir)
from pytorchtools import EarlyStopping
import numpy as np
import pandas as pd
import scipy as sp
from functools import partial
from sklearn import metrics
from sklearn.model_selection import KFold
from collections import Counter, OrderedDict
import json
import math
import numbers
import time
import cv2
import albumentations
from albumentations import torch as AT
import gc
import torchvision
import torch.nn as nn
from tqdm import tqdm_notebook as tqdm
from torch.utils.data import Dataset
import torch
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.nn import functional as F
from albumentations.augmentations import transforms
from torch.utils.data.sampler import SubsetRandomSampler
import matplotlib.pyplot as plt
import os
from sklearn.metrics import cohen_kappa_score
IMG_SIZE = 256

# To have reproducible results and compare them
seedValue = 2019
np.random.seed(seedValue)
torch.manual_seed(seedValue)
os.environ['PYTHONHASHSEED'] = str(seedValue)
torch.cuda.manual_seed(seedValue)
torch.cuda.manual_seed_all(seedValue) 
torch.backends.cudnn.deterministic = True  
torch.backends.cudnn.benchmark = False

# Specify GPU usage
os.environ["CUDA_VISIBLE_DEVICES"] = "4,5" 
device_ids = [0,1]
device = torch.device("cuda:0")

def quadratic_kappa(y_hat, y, coef):
    for pred in enumerate(y_hat):
            if pred[1] < coef[0]:
                y_hat[1] = 0
            elif pred[1] >= coef[0] and pred[1] < coef[1]:
                y_hat[1] = 1
            elif pred[1] >= coef[1] and pred[1] < coef[2]:
                y_hat[1] = 2
            elif pred[1] >= coef[2] and pred[1] < coef[3]:
                y_hat[1] = 3
            else:
                y_hat[1] = 4
    return torch.tensor(cohen_kappa_score(torch.round(y_hat), y, weights='quadratic'),device=device)

# pre-processing

In [None]:
def crop_image1(img,tol=7):
    # 'tol' is tolerance
    mask = img>tol
    return img[np.ix_(mask.any(1),mask.any(0))]

def crop_image_from_gray(img,tol=7):
    if img.ndim ==2:
        mask = img>tol
        return img[np.ix_(mask.any(1),mask.any(0))]
    elif img.ndim==3:
        gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        mask = gray_img>tol
        
        check_shape = img[:,:,0][np.ix_(mask.any(1),mask.any(0))].shape[0]
        if (check_shape == 0): # image is too dark so that we crop out everything,
            return img 
        else:
            img1=img[:,:,0][np.ix_(mask.any(1),mask.any(0))]
            img2=img[:,:,1][np.ix_(mask.any(1),mask.any(0))]
            img3=img[:,:,2][np.ix_(mask.any(1),mask.any(0))]
            img = np.stack([img1,img2,img3],axis=-1)
        
        return img
    

In [None]:
transform = albumentations.Compose([
    albumentations.GaussianBlur(blur_limit=7,p=1),
    albumentations.HorizontalFlip(),
    albumentations.VerticalFlip(),
    albumentations.RandomBrightness(limit=0.1),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    AT.ToTensor()
    ])
transform_valid_test = albumentations.Compose([
    albumentations.GaussianBlur(blur_limit=7,p=1),
    albumentations.HorizontalFlip(),
    albumentations.VerticalFlip(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    AT.ToTensor()
    ])


In [None]:
class RetinopathyDataset(Dataset):
    def __init__(self, csv_file, transform, datatype='train', mixup_prob=0):
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        self.datatype = datatype
        self.mixup_prob = mixup_prob

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        is_mixup = bool(np.random.binomial(n=1, p=self.mixup_prob))
        if self.datatype=='train':
            img_name = os.path.join('../input/aptos2019-blindness-detection/train_images',
                                self.data.loc[idx, 'id_code'] + '.png')
            label = self.data.loc[idx, 'diagnosis']
            if is_mixup:
                label2 = self.adj_label(label)
                img_id2 = self.data[self.data['diagnosis']==label2].sample(1).iloc[0]['id_code']
                img_name2 = os.path.join('../input/aptos2019-blindness-detection/train_images',img_id2 + '.png')
        elif self.datatype=='train_old':
            img_name = os.path.join('../input/diabetic-retinopathy-resized/resized_train',
                                self.data.loc[idx, 'image'] + '.jpeg')
            label = self.data.loc[idx, 'level']
            if is_mixup:
                label2 = self.adj_label(label)
                img_id2 = self.data[self.data['level']==label2].sample(1).iloc[0]['image']
                img_name2 = os.path.join('../input/diabetic-retinopathy-resized/resized_train',img_id2 + '.jpeg')
        else:
            img_name =  os.path.join('../input/aptos2019-blindness-detection/test_images',
                                     self.data.loc[idx, 'id_code'] + '.png')
        img = self.image_process(img_name)
        img = img['image']
        if is_mixup:
            img2 = self.image_process(img_name2)
            img = (img + img2['image']) / 2
            label = (label + label2)/2 + np.random.normal(loc=0.0, scale=0.1**2)
        # batch must be a sequence of the same dtype
        if self.mixup_prob > 0:
            label = torch.as_tensor(float(label))
        else:
            label = torch.as_tensor(int(label))
       
        if self.datatype=='train':
            return {'image': img,
                    'labels': label
                    }
        elif self.datatype=='train_old':
            return {'image': img,
                    'labels': label
                    }
        else:
            return {'image': img}
        
    def image_process(self,img_name):
        img = cv2.imread(img_name)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = self.zoom_to_center(img)
        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
        # now replaced by albumentations func --> img = cv2.addWeighted (img,4, cv2.GaussianBlur(img,(0,0),30),-4,128)
        img = self.transform(image=img)
        
        return img
    
    def zoom_to_center(self, img, tol=7, th = 0.90 ,p= 1.0):
        img = crop_image_from_gray(img, tol = tol) 
        gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        s1 = gray_img.size
        mask = gray_img > 7
        s2 = mask.sum()
        ratio = s2/s1
        if ratio <= th:
            coef = 1.1
            # scale_limitのところはランダムに一様分布から選んでいるが，
            # これは分布を考慮して後で修正する
            # 閾値も修正予定
            aug = albumentations.ShiftScaleRotate(shift_limit = 0.01, scale_limit=(0,coef-1),
                                                  rotate_limit=15,p=p)
            img = aug(image=img)['image']

        return img
    
    def adj_label(self,l):
        res = l - 1 + 2* np.random.randint(2)
        if res == -1:
            res = 1
        elif res == 5:
            res = 3
        return res

In [None]:
PLOT = False
if PLOT:
    rows = 4
    cols = 10
    sample_dataset = RetinopathyDataset(csv_file="../input/aptos2019-blindness-detection/train.csv",
                                        transform=transform, datatype='train', mixup_prob=0.5)

    fig = plt.figure(figsize=(25, 16))
    for i in range(rows):
        for j in range(cols):
            ax = fig.add_subplot(5, 10, i * cols + j + 1, xticks=[], yticks=[])
            data = sample_dataset.__getitem__(cols*i+j)
            im =  data['image'].permute(1,2,0).numpy()
            label = data['labels'].numpy()
            im = (im - np.amin(im)) / (np.amax(im) - np.amin(im)) *255
            plt.imshow(im.astype(np.uint8))
            ax.set_title('Label: {0:.2f}'.format(label))
            if 10*i+j == 39:
                break


In [None]:
def load_model_instagram():    
    model = torch.hub.load('facebookresearch/WSL-Images', 'resnext101_32x16d_wsl')
    for param in model.parameters():
            param.requires_grad = False
    model.avgpool = nn.AdaptiveAvgPool2d(1)
    model.fc = nn.Sequential(
                          nn.BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
                          nn.Dropout(p=0.25),
                          nn.Linear(in_features=2048, out_features=2048, bias=True),
                          nn.ReLU(),
                          nn.BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
                          nn.Dropout(p=0.5),
                          nn.Linear(in_features=2048, out_features=1, bias=True),
                         )
    
    return model

def load_model_imagenet(pretrained='imagenet'):
    class Net(nn.Module):
        def __init__(self, model):
            super(Net, self).__init__()
            for param in model.parameters():
                    param.requires_grad = False
            model.avgpool = nn.AdaptiveAvgPool2d(1)
            model.last_linear = nn.Sequential(
                                              nn.BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
                                              nn.Dropout(p=0.25),
                                              nn.Linear(in_features=2048, out_features=2048, bias=True),
                                              nn.ReLU(),
                                              nn.BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
                                              nn.Dropout(p=0.5),
                                              nn.Linear(in_features=2048, out_features=1, bias=True),
                                             )
            self.l1 = nn.Sequential(*list(model.children())[:-1]).to('cuda:0')
            self.last = list(model.children())[-1]

        def forward(self, x):
            x = self.l1(x)
            x = x.view(x.size()[0], -1)
            x = self.last(x)
            return x

    model = Net(pretrainedmodels.__dict__['resnet50'](pretrained=pretrained))
    return model

# train

In [None]:
class train(object):
    def __init__(self, params):
        self.lr = params.get('lr')
        self.batch_size = params.get('batch_size')
        self.n_epochs = params.get('n_epochs')
        self.n_freeze = params.get('n_freeze')
        self.coef = params.get('coef')
        self.criterion = params.get('criterion')
        self.num_workers = params.get('num_workers')
        self.load_state = params.get('load_state')
        self.load_path = params.get('load_path')
        self.save_path = params.get('save_path')
        self.device = params.get('device')
        self.n_folds = params.get('n_folds')
        self.use_valid = False
        self.early_stop = params.get("early_stop")
        self.patience = params.get("patience")
        self.finetune = params.get("finetune")
        self.mixup_prob = params.get('mixup_prob')
        self.is_mixup = bool(self.mixup_prob)
    
    def get_train(self,data):
        self.train_data = data
        
    def get_valid(self,data):
        self.valid_data = data
        self.use_valid = True
        
    def fit(self, use_cv=False, train_idx=None, valid_idx=None):
        since = time.time()
        # Model
        if self.load_state:
            #model = load_model_instagram()
            model = load_model_imagenet(pretrained='imagenet')
            # load params
            if torch.cuda.device_count() > 1:
                state_dict = torch.load(self.load_path)
                new_state_dict = OrderedDict()
                for k, v in state_dict.items():
                    name = k[7:] # remove `module.`
                    new_state_dict[name] = v
                model.load_state_dict(new_state_dict)
            else:    
                model.load_state_dict(torch.load(self.load_path, map_location=self.device))
        else:
            #model = load_model_instagram()
            model = load_model_imagenet(pretrained='imagenet')
        if torch.cuda.device_count() > 1: 
            model = nn.DataParallel(model, device_ids)
        model = model.to(device)

        if torch.cuda.device_count() > 1:
            plist = [
                     #{'params': model.module.conv1.parameters(), 'lr': 1e-5, 'weight': 1e-4},
                     #{'params': model.module.layer1.parameters(), 'lr': 2e-5, 'weight': 2e-4},
                     #{'params': model.module.layer2.parameters(), 'lr': 3e-5, 'weight': 3e-4},
                     #{'params': model.module.layer3.parameters(), 'lr': 5e-5, 'weight': 5e-4},
                     #{'params': model.module.layer4.parameters(), 'lr': 1e-4, 'weight': 0.001},
                     #{'params': model.module.fc.parameters(), 'lr': 1e-3}
                     {'params': model.module.last.parameters(), 'lr': 1e-3}
                    ]
        else:
            plist = [
                     #{'params': model.layer4.parameters(), 'lr': 1e-4, 'weight': 0.001},
                     #{'params': model.fc.parameters(), 'lr': 1e-3}
                    {'params': model.last.parameters(), 'lr': 1e-3}
                    ]

        
        optimizer = optim.Adam(plist, lr=self.lr)
        scheduler = lr_scheduler.StepLR(optimizer, step_size=5)
        
        if use_cv:
            train_sampler = SubsetRandomSampler(train_idx)
            valid_sampler = SubsetRandomSampler(valid_idx)
            data_loader_train = torch.utils.data.DataLoader(self.train_data, batch_size=self.batch_size,
                                                num_workers=self.num_workers,sampler=train_sampler)
            data_loader_valid = torch.utils.data.DataLoader(self.train_data, batch_size=self.batch_size,
                                                num_workers=self.num_workers,sampler=valid_sampler)
        elif self.use_valid:
            data_loader_train = torch.utils.data.DataLoader(self.train_data, batch_size=self.batch_size,
                                                num_workers=self.num_workers)
            data_loader_valid = torch.utils.data.DataLoader(self.valid_data, batch_size=self.batch_size,
                                                num_workers=self.num_workers)
        else:
            data_loader_train = torch.utils.data.DataLoader(self.train_data, batch_size=self.batch_size,
                                                num_workers=self.num_workers)
            
        if self.early_stop:
            early_stopping = EarlyStopping(patience=self.patience, verbose=True)
        
        for epoch in range(self.n_epochs):
            if epoch == self.n_freeze and not self.finetune:
                if torch.cuda.device_count() > 1:
                    for param in model.module.parameters():
                        param.requires_grad = True
                else:
                    for param in model.parameters():
                        param.requires_grad = True

            print('Epoch {}/{}'.format(epoch, self.n_epochs - 1))
            print('-' * 10)
            scheduler.step()
            model.train()
            running_loss = 0.0
            kappa = 0
            steps = 0
            with tqdm(data_loader_train, total=int(len(data_loader_train))) as pbar:
                for bi, d in enumerate(pbar):
                    inputs = d["image"]
                    labels = d["labels"].view(-1, 1)
                    inputs = inputs.to(self.device, dtype=torch.float)
                    labels = labels.to(self.device, dtype=torch.float)
                    optimizer.zero_grad()
                    with torch.set_grad_enabled(True):
                        outputs = model(inputs)
                        loss =  torch.sqrt(self.criterion(outputs, labels)) #RMSE
                        # <- insert custom mse
                        loss.backward()
                        optimizer.step()

                    running_loss += loss.mean().item() 
                    y_hat = torch.Tensor.cpu(outputs.view(-1)).detach()
                    y = torch.Tensor.cpu(labels.view(-1)).detach()
                    if not self.is_mixup:
                        kappa += quadratic_kappa(y_hat, y, self.coef).mean().item()
                    steps += 1
                    pbar.set_postfix(OrderedDict(rmse_loss = running_loss / steps,
                                                 kappa_score = kappa / steps))
                    
            epoch_loss = running_loss / steps
            print('Training Loss: {:.4f}'.format(epoch_loss))
            if not self.is_mixup:
                kappa = kappa / steps
                print('Training Kappa: {:.4f}'.format(kappa))
            
            if self.use_valid or use_cv:
                model.eval()
                running_loss = 0.0
                kappa = 0
                steps = 0
                
                with tqdm(data_loader_valid , total=int(len(data_loader_valid))) as pbar:
                    for step, batch in enumerate(pbar):
                        inputs = batch["image"]
                        labels = batch["labels"].view(-1, 1)
                        inputs = inputs.to(self.device, dtype=torch.float)
                        labels = labels.to(self.device, dtype=torch.float)

                        with torch.no_grad():
                            outputs = model(inputs)
                            loss =  torch.sqrt(self.criterion(outputs, labels)) #RMSE

                        running_loss += loss.mean().item()     
                        y_hat = torch.Tensor.cpu(outputs.view(-1))
                        y = torch.Tensor.cpu(labels.view(-1))
                        if not self.is_mixup: kappa += quadratic_kappa(y_hat, y, self.coef).mean().item()
                        steps += 1
                        pbar.set_postfix(OrderedDict(rmse_loss = running_loss / steps,
                                                     kappa_score = kappa / steps))
                        
                epoch_loss = running_loss / steps
                print('Validation Loss: {:.4f}'.format(epoch_loss))
                if not self.is_mixup:
                    kappa = kappa / steps
                    print('Validation Kappa: {:.4f}'.format(kappa))

                if self.early_stop:
                    eval_loss = epoch_loss
                    early_stopping(eval_loss, model)
                    if early_stopping.early_stop:
                        print("Early stopping")
                        break

        time_elapsed = time.time() - since
        print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
        
        if not use_cv:
            torch.save(model.state_dict(), self.save_path)

        del(model, data_loader_train)
        gc.collect()
        torch.cuda.empty_cache() 
        
    def cv(self):
        num_train = len(self.train_data)
        indices = list(range(num_train))
        kf = KFold(n_splits=self.n_folds, random_state=1337, shuffle=True)

        train_idx = []
        valid_idx = []

        for t, v in kf.split(indices):
            train_idx.append(t)
            valid_idx.append(v)

        # Training                        
        for fold in np.arange(self.n_folds):
            print('Fold:',fold)
            self.fit(use_cv=True,train_idx=train_idx[fold],valid_idx=valid_idx[fold])

In [None]:
#params
params = {
    "lr" : 1e-5,
    "batch_size" : 64,
    "n_epochs" : 20,
    "n_freeze" : 1,
    "num_workers" : 10,
    "coef" : [0.5, 1.5, 2.5, 3.5],
    "criterion" : nn.MSELoss(),
    "load_state" : False,
    "load_path" : None, 
    'save_path' : None,
    "device" : device,
    "n_folds" : 5,
    "early_stop" : True,
    "patience" : 3,
    "finetune" : False,
    "mixup_prob" : 0,
}

#training for the lazy, like me

# cv: new
if 0:
    params['finetune'] = True
    params['load_state'] = True
    #params['load_path'] =  "../input/mmmodel/model_old_test.bin" 
    params['load_path'] =  "../data/checkpoint/checkpoint.pt"
    Mytrain = train(params)
    Mytrain.get_train(RetinopathyDataset(csv_file="../input/aptos2019-blindness-detection/train.csv",
                                            transform=transform, datatype='train'))
    Mytrain.cv()

# train:old & valid:new
if 1:
    params['finetune'] = False
    params['mixup_prob'] = 0.5
    params['n_epochs'] = 40
    params['patience'] = 4
    params['save_path'] =  "model_ResNet50_old_mixup.bin"
    Mytrain = train(params)
    Mytrain.get_train(RetinopathyDataset(csv_file="../input/diabetic-retinopathy-resized/new_trainLabels.csv",
                                            transform=transform, datatype='train_old',mixup_prob=params['mixup_prob']))
    Mytrain.get_valid(RetinopathyDataset(csv_file="../input/aptos2019-blindness-detection/train.csv",
                                           transform=transform, datatype='train'))
    Mytrain.fit()
    
# train:new
if 0:
    params['finetune'] = True
    params['n_epochs'] = 3
    params['load_state'] = True
    #params['load_path'] =  "../input/mmmodel/model_insta_old.bin" 
    params['load_path'] =  "checkpoint.pt"
    params['save_path'] =  "model_ResNet50_old_mixup.bin"        
    Mytrain = train(params)
    Mytrain.get_train(RetinopathyDataset(csv_file="../input/aptos2019-blindness-detection/train.csv",
                                           transform=transform, datatype='train'))
    Mytrain.fit()

# Inference

In [None]:
load_path = "../data/model/aptos19_ResNet101/model.bin"

model = load_model_instagram()
if torch.cuda.device_count() > 1:
    state_dict = torch.load(load_path)
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k[7:] # remove `module.`
        new_state_dict[name] = v
    model.load_state_dict(new_state_dict)
else:    
    model.load_state_dict(torch.load(load_path, map_location=self.device))

model = model.to(device)

model.eval()

In [None]:
test_dataset = RetinopathyDataset(csv_file='../input/aptos2019-blindness-detection/sample_submission.csv',
                                      transform=transform_valid_test, datatype='test')

In [None]:
test_bs = 64
test_preds = np.zeros((len(test_dataset), 1))
TTA = 1
for _ in range(TTA):
    test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=test_bs, shuffle=False, num_workers=8)
    for i, x_batch in tqdm(enumerate(tqdm(test_data_loader))):
        x_batch = x_batch["image"]
        pred = model(x_batch.to(device))
        test_preds[i * test_bs:(i + 1) * test_bs] += pred.detach().cpu().squeeze().numpy().ravel().reshape(-1, 1)
test_preds /= TTA

In [None]:
coef = [0.5, 1.5, 2.5, 3.5]

for i, pred in enumerate(test_preds):
    if pred < coef[0]:
        test_preds[i] = 0
    elif pred >= coef[0] and pred < coef[1]:
        test_preds[i] = 1
    elif pred >= coef[1] and pred < coef[2]:
        test_preds[i] = 2
    elif pred >= coef[2] and pred < coef[3]:
        test_preds[i] = 3
    else:
        test_preds[i] = 4


sample = pd.read_csv("../input/aptos2019-blindness-detection/sample_submission.csv")
sample.diagnosis = test_preds.astype(int)
sample.to_csv("submission.csv", index=False)

In [None]:
sample["diagnosis"].value_counts()