versions:
* v1: 

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import os
import sys
import re
import random
import math
import json
import collections
from collections import Counter
from joblib import load, dump
from functools import partial
import numpy as np
import pandas as pd
import scipy as sp
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import style

import torch
from torchvision import models as md
from torch import nn
from torch.nn import functional as F
from torch.utils import model_zoo
# from fastai import *
# from fastai.vision import *
from fastai.vision import (Learner, ImageList, get_transforms, imagenet_stats, 
                           DatasetType, FloatList, ResizeMethod)
# from fastai.callbacks import *
from sklearn import metrics
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import confusion_matrix

package_dir = "../input/efficientnet-pytorch/efficientnet-pytorch/EfficientNet-PyTorch-master"
sys.path.insert(0, package_dir)
from efficientnet_pytorch import EfficientNet
from efficientnet_pytorch.model import MBConvBlock
from efficientnet_pytorch import utils

In [2]:
#copying weighst to the local directory 
!mkdir models
!cp '../input/searchb5f1v4/search-b5-f1-v4-stage-2.pth' 'models'
!cp '../input/fastai-eff-train-f1-models/b6_f1_v4_stage_4_best.pth' 'models'
!cp '../input/fastai-eff-train-f1-models/b7_f1_v2_stage_5_best.pth' 'models'
!cp '../input/kaggle-public/abcdef.pth' 'models'

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

SEED = 2019  # 999
seed_everything(SEED)

In [4]:
def get_df():
    base_image_dir = '../input/aptos2019-blindness-detection/'
    train_dir = os.path.join(base_image_dir, 'train_images/')
    train_df = pd.read_csv(os.path.join(base_image_dir, 'train.csv'))
    train_df['path'] = train_df['id_code'].map(lambda x: os.path.join(train_dir, '{}.png'.format(x)))
    train_df = train_df.drop(columns=['id_code'])
    train_df = train_df.sample(frac=1).reset_index(drop=True)  # shuffle dataframe
    test_df = pd.read_csv('../input/aptos2019-blindness-detection/sample_submission.csv')
    return train_df, test_df

train_df, test_df = get_df()

In [5]:
def quadratic_kappa(y_pred, y):
    scores = cohen_kappa_score(torch.round(y_pred), y, weights='quadratic')
    return torch.tensor(scores, device='cuda:0')

In [6]:
class OptimizedRounder(object):
    def __init__(self):
        self.coef_ = 0

    def _kappa_loss(self, coef, X, y):
        X_p = np.copy(X)
        for i, pred in enumerate(X_p):
            if pred < coef[0]:
                X_p[i] = 0
            elif pred >= coef[0] and pred < coef[1]:
                X_p[i] = 1
            elif pred >= coef[1] and pred < coef[2]:
                X_p[i] = 2
            elif pred >= coef[2] and pred < coef[3]:
                X_p[i] = 3
            else:
                X_p[i] = 4

        ll = cohen_kappa_score(y, X_p, weights='quadratic')
        return -ll

    def fit(self, X, y):
        loss_partial = partial(self._kappa_loss, X=X, y=y)
        initial_coef = [0.5, 1.5, 2.5, 3.5]
        self.coef_ = sp.optimize.minimize(loss_partial, initial_coef, method='nelder-mead')
        print(-loss_partial(self.coef_['x']))

    def predict(self, X, coef):
        X_p = np.copy(X)
        for i, pred in enumerate(X_p):
            if pred < coef[0]:
                X_p[i] = 0
            elif pred >= coef[0] and pred < coef[1]:
                X_p[i] = 1
            elif pred >= coef[1] and pred < coef[2]:
                X_p[i] = 2
            elif pred >= coef[2] and pred < coef[3]:
                X_p[i] = 3
            else:
                X_p[i] = 4
        return X_p

    def coefficients(self):
        return self.coef_['x']

In [7]:
# you can play around with tfms and image sizes
bs = BATCH_SIZE = 32
sz = IMG_SIZE = 256
coef = [0.57, 1.37, 2.57, 3.57]
tfms = get_transforms(do_flip=True, flip_vert=True)

In [8]:
data = (ImageList.from_df(df=train_df, path='./', cols='path')
        .split_by_rand_pct(0.2)
        .label_from_df(cols='diagnosis',label_cls=FloatList) 
        .transform(tfms, size=IMG_SIZE, resize_method=ResizeMethod.SQUISH, padding_mode='zeros') 
        .databunch(bs=BATCH_SIZE, num_workers=4)
        .normalize(imagenet_stats))

In [9]:
# model 01
netname = 'efficientnet-b5'
md_ef = EfficientNet.from_name(netname, override_params={'num_classes': 1})

learn = Learner(data, md_ef, 
                metrics=[quadratic_kappa], model_dir="models").to_fp16()

df_path = '../input/aptos2019-blindness-detection'
learn.data.add_test(ImageList.from_df(test_df, df_path,
                                      folder='test_images', suffix='.png'))
learn = learn.load('search-b5-f1-v4-stage-2')

opt = OptimizedRounder()
preds, y = learn.get_preds(DatasetType.Test)
test_preds = opt.predict(preds, coef)
test_df.diagnosis = test_preds.astype(int)
submission1 = test_df.copy()

In [10]:
# model 02
netname = 'efficientnet-b6'
md_ef = EfficientNet.from_name(netname, override_params={'num_classes': 1})

learn = Learner(data, md_ef, 
                metrics=[quadratic_kappa], model_dir="models").to_fp16()

df_path = '../input/aptos2019-blindness-detection'
learn.data.add_test(ImageList.from_df(test_df, df_path,
                                      folder='test_images', suffix='.png'))
learn = learn.load('b6_f1_v4_stage_4_best')

opt = OptimizedRounder()
preds, y = learn.get_preds(DatasetType.Test)
test_preds = opt.predict(preds, coef)
test_df.diagnosis = test_preds.astype(int)
submission2 = test_df.copy()

In [11]:
# model 03
netname = 'efficientnet-b7'
md_ef = EfficientNet.from_name(netname, override_params={'num_classes': 1})

learn = Learner(data, md_ef, 
                metrics=[quadratic_kappa], model_dir="models").to_fp16()

df_path = '../input/aptos2019-blindness-detection'
learn.data.add_test(ImageList.from_df(test_df, df_path,
                                      folder='test_images', suffix='.png'))
learn = learn.load('b7_f1_v2_stage_5_best')

opt = OptimizedRounder()
preds, y = learn.get_preds(DatasetType.Test)
test_preds = opt.predict(preds, coef)
test_df.diagnosis = test_preds.astype(int)
submission3 = test_df.copy()

In [12]:
# model 04
netname = 'efficientnet-b5'
md_ef = EfficientNet.from_name(netname, override_params={'num_classes': 1})

learn = Learner(data, md_ef, 
                metrics=[quadratic_kappa], model_dir="models").to_fp16()

df_path = '../input/aptos2019-blindness-detection'
learn.data.add_test(ImageList.from_df(test_df, df_path,
                                      folder='test_images', suffix='.png'))
learn = learn.load('abcdef')

opt = OptimizedRounder()
preds, y = learn.get_preds(DatasetType.Test)
test_preds = opt.predict(preds, coef)
test_df.diagnosis = test_preds.astype(int)
submission4 = test_df.copy()

In [13]:
score = [0.786, 0.779, 0.773, 0.793]
weight = [0.275, 0.2, 0.175, 0.35]
subData = [submission1, submission2, submission3, submission4]

numClass = 5
subTemp = np.zeros((subData[0].shape[0], numClass))
for i in range(len(subData)):
    subTemp[subData[i].index, subData[i].diagnosis.tolist()] += weight[i]
print(subTemp)

sub = pd.read_csv('../input/aptos2019-blindness-detection/sample_submission.csv')
sub['diagnosis'] = subTemp.argmax(1).astype(int)
sub.to_csv('submission.csv', index=False)

[[0.    0.    1.    0.    0.   ]
 [0.    0.    0.725 0.275 0.   ]
 [0.    0.    1.    0.    0.   ]
 [0.    0.    1.    0.    0.   ]
 ...
 [0.    0.    1.    0.    0.   ]
 [0.    0.    0.725 0.275 0.   ]
 [0.    0.    0.    0.8   0.2  ]
 [0.375 0.275 0.35  0.    0.   ]]


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [14]:
# valid_preds, valid_y = learn.get_preds(DatasetType.Valid)
# optR = OptimizedRounder()
# optR.fit(valid_preds, valid_y)
# coefficients = optR.coefficients()
# print(coefficients)

In [15]:
def run_subm(learn=None, coefficients=[0.5, 1.5, 2.5, 3.5]):
    opt = OptimizedRounder()
    preds, y = learn.get_preds(DatasetType.Test)
    test_preds = opt.predict(preds, coefficients)
    test_df.diagnosis = test_preds.astype(int)
    test_df.to_csv('submission.csv', index=False)
    print ('done')

In [16]:
# coef = [0.57, 1.37, 2.57, 3.57]
# coef = [0.531918 1.573619 2.668338 3.259771]
# run_subm(learn=learn, coefficients=coef)