In [None]:
import pandas as pd
from pathlib import Path 
from fastai.vision import *
from fastai.metrics import AUROC
%matplotlib inline

# Data locations
path = Path("data/")

## Collect data

In [None]:
df = pd.read_csv(path/"train.csv")
df.head()

In [None]:
tfms = get_transforms(flip_vert=True)

In [None]:
test = ImageList.from_csv(path, folder="test", csv_name="sample_submission.csv")
train = (ImageList.from_csv(path, folder="train", csv_name="train.csv")
    .split_by_rand_pct(0.01)
    .label_from_df()
    .add_test(test)
    .transform(tfms, size=128)
    .databunch(path=".", bs=64)
    .normalize(imagenet_stats)
        )

In [None]:
train.show_batch(rows=3, figsize=(7, 7))

## Train learner

In [None]:
# Using AUROC() works, but auc_roc_curve directly not?
# --> See https://github.com/fastai/fastai/issues/1939
learn = cnn_learner(train, models.resnet50, metrics=AUROC())

In [None]:
learn.model

In [None]:
learn.fit_one_cycle(4)

In [None]:
learn.save("stage-final")

## Visualize

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
losses, idxs = interp.top_losses()
interp.plot_top_losses(9, figsize=(7, 7))

In [None]:
interp.plot_confusion_matrix(figsize=(5, 5), dpi=60)

## Create test predictions

In [None]:
preds, targs = learn.get_preds(ds_type=DatasetType.Test)

In [None]:
submission = pd.read_csv(path/"sample_submission.csv")
submission.head()

In [None]:
submission["has_cactus"] = preds.clone().detach().numpy().argmax(1)

In [None]:
submission.head()

In [None]:
submission.to_csv("submission.csv", index=False)

In [None]:
!kaggle competitions submit -f submission.csv -m "First submission" aerial-cactus-identification

## Unnecessary

In [None]:
learn.unfreeze()

In [None]:
learn.fit_one_cycle(1)

In [None]:
learn.load("stage-1");

In [None]:
learn.lr_find()

In [None]:
learn.recorder.plot()

In [None]:
learn.unfreeze()
learn.fit_one_cycle(3, max_lr=slice(1e-6, 1e-3))