In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
import time
import sys
import os

import cv2                  
         
from random import shuffle  
from zipfile import ZipFile

# Ignore  the warnings
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns
import numpy as np  
from tqdm import tqdm 
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix, cohen_kappa_score
from fastai import *
from fastai.vision import *
from fastai.callbacks import Callback
from fastai.callbacks import SaveModelCallback, EarlyStoppingCallback, ReduceLROnPlateauCallback

style.use('fivethirtyeight')
sns.set(style='whitegrid',color_codes=True)

In [None]:
def seed_everything(seed=999):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True


def get_label(diagnosis):
    return ','.join([str(i) for i in range(diagnosis + 1)])


def get_train_df(seed, num_zeros=4000):
    val_preds_id = pd.read_csv('../input/bd-peter-and-lex-validation-set/val.csv')['id_code']

    df_train = pd.read_csv('../input/aptos2019-blindness-detection/train.csv')
    df_test = pd.read_csv('../input/aptos2019-blindness-detection/test.csv')

    df_train['is_valid'] = False
    # df_train.loc[df_train.id_code.isin(val_preds_id), 'is_valid'] = True
    df_train.id_code = '../input/aptos2019-blindness-detection/train_images/' + df_train.id_code + '.png'

    df_train.columns = ['image_path', 'diagnosis', 'is_valid']

    extra_training_df = pd.read_csv('../input/diabetic-retinopathy-resized/trainLabels.csv')
    extra_training_df['is_valid'] = False
    # extra_training_df.loc[extra_training_df.image.isin(val_preds_id), 'is_valid'] = True
    extra_training_df.image = '../input/diabetic-retinopathy-resized/resized_train/resized_train/' + extra_training_df.image + '.jpeg'
    extra_training_df.columns = ['image_path', 'diagnosis', 'is_valid']
    
    test_labels_15_df = pd.read_csv('../input/resized-2015-2019-blindness-detection-images/labels/testLabels15.csv')
    del test_labels_15_df['Usage']
    test_labels_15_df.columns = ['image_id', 'diagnosis']
    test_labels_15_df['dataset_id'] = 'test_labels_15'
    test_labels_15_df['image_path'] = '../input/resized-2015-2019-blindness-detection-images/resized test 15/' + test_labels_15_df.image_id + '.jpg'
    test_labels_15_df['is_valid'] = True
    test_labels_15_df = test_labels_15_df[['image_path', 'diagnosis', 'is_valid']]

    df_train = pd.concat([
        df_train,
        extra_training_df[(extra_training_df.diagnosis == 0) & (extra_training_df.is_valid)],
        extra_training_df[(extra_training_df.diagnosis == 0) & ~(extra_training_df.is_valid)].sample(n=num_zeros, random_state=seed),
        extra_training_df[extra_training_df.diagnosis == 1],
        extra_training_df[extra_training_df.diagnosis == 2],
        extra_training_df[extra_training_df.diagnosis == 3],
        extra_training_df[extra_training_df.diagnosis == 4],
        pd.concat([
            test_labels_15_df[test_labels_15_df.diagnosis == 0].sample(n=7900, random_state=420),
            test_labels_15_df[test_labels_15_df.diagnosis != 0]
        ]).sample(n=10_000, random_state=420),
    ]).sample(frac=1, random_state=seed)

    df_train['label'] = df_train.diagnosis.apply(get_label)
    
    return df_train


def make_or_preds(model_name, learner, model_path, expected_val):
    learn.load(model_path);

    val_items = learn.data.valid_dl.dataset.items
    val_preds, val_y = learn.get_preds(ds_type=DatasetType.Valid)
    metric = cohen_kappa_score(val_y.argmax(1).numpy(), get_output_preds((val_preds > 0.5).numpy()), weights='quadratic')

    raw_preds = pd.DataFrame(val_preds.numpy())
    raw_preds.columns = ['x_0', 'x_1', 'x_2', 'x_3', 'x_4']

    val_preds_df = pd.concat([
        pd.DataFrame({
            'id_code': [v.split('/')[-1].split('.')[0] for v in val_items],
            'diagnosis': val_y.argmax(1).numpy(),
            'preds': get_output_preds((val_preds > 0.5).numpy())
        }),
        raw_preds
    ], axis=1)

    val_preds_df.to_csv(f'{model_name}_val_preds.csv', index=False)

    test_items = learn.data.test_dl.dataset.items

    test_preds, __ = learn.get_preds(ds_type=DatasetType.Test)

    raw_test_preds = pd.DataFrame(test_preds.numpy())
    raw_test_preds.columns = ['x_0', 'x_1', 'x_2', 'x_3', 'x_4']

    test_preds_df = pd.concat([
        pd.DataFrame({
            'id_code': [v.split('/')[-1].split('.')[0] for v in test_items],
            'preds': get_output_preds((test_preds > 0.5).numpy())
        }),
        raw_test_preds
    ], axis=1)

    test_preds_df.to_csv(f'{model_name}_test_preds.csv', index=False)

    print(f'Val kappa score: {metric} (expected: {expected_val})')


def avg_tta_score(model_name):
    no_flip = pd.read_csv(f'{model_name}_val_preds.csv').sort_values('id_code')
    flip = pd.read_csv(f'{model_name}-flip_val_preds.csv').sort_values('id_code')

    val_preds_avg = no_flip[['x_0', 'x_1', 'x_2', 'x_3', 'x_4']].values * 0.5 + flip[['x_0', 'x_1', 'x_2', 'x_3', 'x_4']].values * 0.5

    return cohen_kappa_score(flip.diagnosis, get_output_preds((val_preds_avg > 0.5)), weights='quadratic')


class ConfusionMatrix(Callback):
    "Computes the confusion matrix."

    def on_train_begin(self, **kwargs):
        self.n_classes = 0

    def on_epoch_begin(self, **kwargs):
        self.cm = None

    def on_batch_end(self, last_output:Tensor, last_target:Tensor, **kwargs):
        preds = torch.tensor(get_preds((torch.sigmoid(last_output) > 0.5).cpu().numpy()))
        
        targs = torch.tensor(get_preds(last_target.cpu().numpy()))

        if self.n_classes == 0:
            self.n_classes = last_output.shape[-1]
            self.x = torch.arange(0, self.n_classes)
        
        cm = ((preds==self.x[:, None]) & (targs==self.x[:, None, None])).sum(dim=2, dtype=torch.float32)
        if self.cm is None: self.cm =  cm
        else:               self.cm += cm

    def on_epoch_end(self, **kwargs):
        self.metric = self.cm


@dataclass
class KappaScore(ConfusionMatrix):
    "Compute the rate of agreement (Cohens Kappa)."
    weights:Optional[str]=None      # None, `linear`, or `quadratic`

    def on_epoch_end(self, last_metrics, **kwargs):
        sum0 = self.cm.sum(dim=0)
        sum1 = self.cm.sum(dim=1)
        expected = torch.einsum('i,j->ij', (sum0, sum1)) / sum0.sum()
        if self.weights is None:
            w = torch.ones((self.n_classes, self.n_classes))
            w[self.x, self.x] = 0
        elif self.weights == "linear" or self.weights == "quadratic":
            w = torch.zeros((self.n_classes, self.n_classes))
            w += torch.arange(self.n_classes, dtype=torch.float)
            w = torch.abs(w - torch.t(w)) if self.weights == "linear" else (w - torch.t(w)) ** 2
        else: raise ValueError('Unknown weights. Expected None, "linear", or "quadratic".')
        k = torch.sum(w * self.cm) / torch.sum(w * expected)
        return add_metrics(last_metrics, 1-k)


class FlattenedLoss():
    "Same as `func`, but flattens input and target."
    def __init__(self, func, *args, axis:int=-1, floatify:bool=False, is_2d:bool=True, **kwargs):
        self.func,self.axis,self.floatify,self.is_2d = func(*args,**kwargs),axis,floatify,is_2d
        functools.update_wrapper(self, self.func)

    def __repr__(self): return f"FlattenedLoss of {self.func}"
    @property
    def reduction(self): return self.func.reduction
    @reduction.setter
    def reduction(self, v): self.func.reduction = v

    def __call__(self, input:Tensor, target:Tensor, **kwargs)->Rank0Tensor:
        input = input.transpose(self.axis,-1).contiguous()
        target = target.transpose(self.axis,-1).contiguous()
        if self.floatify: target = target.float()
            
        # Label smoothing experiment
        target = (target * 0.9 + 0.05)
        target[:,0] = 1

        input = input.view(-1,input.shape[-1]) if self.is_2d else input.view(-1)
        return self.func.__call__(input, target.view(-1), **kwargs)


def LabelSmoothBCEWithLogitsFlat(*args, axis:int=-1, floatify:bool=True, **kwargs):
    "Same as `nn.BCEWithLogitsLoss`, but flattens input and target."
    return FlattenedLoss(nn.BCEWithLogitsLoss, *args, axis=axis, floatify=floatify, is_2d=False, **kwargs)


class ReconstructFixMultiCategoryList(MultiCategoryList):
    def reconstruct(self, t):
        try:
            return super().reconstruct(t)
        except Exception as e:
            return FloatItem(np.log(t))