In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:1600px !important; }</style>"))

In [None]:
%reload_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

In [None]:
import datetime

In [None]:
%matplotlib inline
if "loudness" in plt.style.available:
    plt.style.use("loudness")
matplotlib.rcParams["font.size"] = 16.0
matplotlib.rcParams["figure.figsize"] = (15.0, 8.0)

In [None]:
from fastai.vision import *

In [None]:
path = "/home/andras/Dokumente/Kaggle/histopathologic-cancer-detection/all"

In [None]:
# TODOS:
## use seed to get same results

In [None]:
# data = ImageDataBunch.from_csv(path, ds_tfms=get_transforms(), size=96, test=path).normalize(imagenet_stats)
data = ImageDataBunch.from_csv(path, folder="train", csv_labels="train_labels.csv", ds_tfms=get_transforms(), size=96, test="test", suffix=".tif")

In [None]:
data

In [None]:
data.show_batch(rows=4)

## Resnet 18

In [None]:
learn = create_cnn(data, models.resnet18, metrics=accuracy)
learn.fit_one_cycle(1)

In [None]:
learn.unfreeze()
learn.fit_one_cycle(6, slice(1e-5,3e-4), pct_start=0.05)

In [None]:
accuracy(*learn.TTA())

In [None]:
assert False

## Resnet 34

In [None]:
learn = create_cnn(data, models.resnet34, metrics=accuracy)
learn.fit_one_cycle(1)

In [None]:
learn.unfreeze()
learn.fit_one_cycle(6, slice(1e-5,3e-4), pct_start=0.05)

In [None]:
accuracy(*learn.TTA())

In [None]:
assert False

## Resnet 50

In [None]:
learn = create_cnn(data, models.resnet50, metrics=accuracy)
learn.fit_one_cycle(1)

In [None]:
learn.unfreeze()
learn.fit_one_cycle(6, slice(1e-5,3e-4), pct_start=0.05)

In [None]:
accuracy(*learn.TTA())

In [None]:
assert False

## Resnet 101

In [None]:
learn = create_cnn(data, models.resnet101, metrics=accuracy)
learn.fit_one_cycle(1)

In [None]:
learn.recorder.plot()

In [None]:
learn.recorder.plot_losses()

In [None]:
learn.unfreeze()
# learn.fit_one_cycle(6, slice(1e-5,3e-4), pct_start=0.05)
learn.fit_one_cycle(8, slice(1e-5,3e-4))

In [None]:
accuracy(*learn.TTA())

In [None]:
learn.recorder.plot()

In [None]:
learn.recorder.plot_losses()

In [None]:
learn.save(datetime.datetime.now().strftime("%Y%m%d_%H%M") + "_resnet101.model")

In [None]:
assert False

## Resnet 152

In [None]:
learn = create_cnn(data, models.resnet152, metrics=accuracy)
learn.fit_one_cycle(1)

In [None]:
learn.unfreeze()
learn.fit_one_cycle(6, slice(1e-5,3e-4), pct_start=0.05)

In [None]:
accuracy(*learn.TTA())

In [None]:
assert False

## Densenet 121

In [None]:
from torchvision.models import *

In [None]:
learn = create_cnn(data, densenet121, metrics=accuracy)
learn.fit_one_cycle(1)

In [None]:
learn.unfreeze()
learn.fit_one_cycle(6, slice(1e-5,3e-4), pct_start=0.05)

In [None]:
accuracy(*learn.TTA())

In [None]:
learn.save(datetime.datetime.now().strftime("%Y%m%d_%H%M") + "_densenet121.model")

In [None]:
assert False

# Checking the KPIs

In [None]:
from sklearn.metrics import roc_auc_score

def auc_score(y_pred, y_true):
    return roc_auc_score(to_np(y_true),to_np(y_pred)[:,1])

In [None]:
preds_valid, y_valid = learn.TTA(ds_type=DatasetType.Valid)

In [None]:
print("Accuracy: %g" % accuracy(preds_valid, y_valid))
print("AUROCC: %g" % auc_score(preds_valid, y_valid))

## Prepare the submission file

In [None]:
preds_test, y_test = learn.TTA(ds_type=DatasetType.Test)
# preds_test, y_test = learn.get_preds(ds_type=DatasetType.Test)

In [None]:
# create submission file
submission_id = [str(x).replace(path + "/test/", "").replace(".tif", "") for x in data.test_ds.items]

In [None]:
# use probabilities
submission_label = to_np(preds_test[:,1])
# use discrete values
# submission_label = to_np(preds_test.argmax(dim=1))

In [None]:
df_submission = pd.DataFrame(data=submission_label, index=submission_id, columns=["label"])

In [None]:
df_submission.index.name = "id"

In [None]:
df_submission.head(10)

In [None]:
len(df_submission)

In [None]:
df_submission.to_csv(datetime.datetime.now().strftime("%Y%m%d_%H%M") + " submission.csv")

# Explore missclassifications

In [None]:
interp = ClassificationInterpretation.from_learner(learn)

In [None]:
interp.plot_top_losses(9, figsize=(12,12))

In [None]:
interp.plot_confusion_matrix()