In [None]:
from fastai.vision import *

In [None]:
path = Path('../data/histopathologic/')

In [None]:
path.ls()

In [None]:
len((path/'train').ls()), len((path/'test').ls())

In [None]:
tfms = get_transforms(True, True, max_rotate=20, max_zoom=1.1, max_lighting=0.2, max_warp=0,
                     p_affine=0.5, p_lighting=0.5)

data = ImageDataBunch.from_csv(path=path, folder='train', valid_pct=0.2,
                               csv_labels='train_labels.csv', suffix='.tif',
                               ds_tfms=tfms,
                               size=96, bs=128)

In [None]:
data.add_test(ImageList.from_folder(path/'test'))

In [None]:
data.valid_ds.y.items.mean(), data.train_ds.y.items.mean()

In [None]:
data.show_batch(3)

In [None]:
from sklearn.metrics import roc_auc_score
class AUC(Callback):
    "AUC score"
    
    def on_epoch_begin(self, **kwargs): 
        self.outputs = []
        self.targets = []

    def on_batch_end(self, last_output, last_target, **kwargs):
        self.outputs += list(to_np(last_output)[:, 1])
        self.targets += list(to_np(last_target))

    def on_epoch_end(self, last_metrics, **kwargs): 
        return {'last_metrics': last_metrics + [roc_auc_score(self.targets, self.outputs)]}

In [None]:
learn = create_cnn(data, models.vgg16_bn, metrics=accuracy)
learn.lr_find()
auc = AUC()
learn.metrics = [accuracy, auc]

In [None]:
learn.recorder.plot()

In [None]:
lr = 1e-2
learn.fit_one_cycle(3,max_lr=lr)

In [None]:
learn.unfreeze()

In [None]:
learn.lr_find()

In [None]:
learn.recorder.plot(suggestion=True, k=5)

In [None]:
learn.get_preds(ds_type=DatasetType)

In [None]:
learn.fit_one_cycle(1, max_lr=slice(3e-5))

In [None]:
test_preds = learn.get_preds(ds_type=DatasetType.Test)

In [None]:
probas, _ = test_preds

In [None]:
sample_submission = pd.read_csv(path/'sample_submission.csv')

In [None]:
labels = to_np(torch.argmax(probas, 1))
names = [o.name.split('.')[0] for o in data.test_ds.items]

In [None]:
sample_submission['label'] = sample_submission['id'].map(dict(zip(names, labels)))

In [None]:
sample_submission.to_csv("vgg16_bn_first_sub.csv", index=False)

In [None]:
!kaggle competitions submit -c histopathologic-cancer-detection -f vgg16_bn_first_sub.csv -m "init"