## Import libraries

In [1]:
import numpy as np
import pandas as pd

In [2]:
from pathlib import Path

In [3]:
import torch

In [4]:
from fastai import *
from fastai.vision import *
from fastai.metrics import *

In [20]:
import datetime

In [21]:
now = datetime.datetime.now()

## Read data

In [5]:
test_folder = Path("./test/")
train_folder = Path("./train/")

In [6]:
train_df = pd.read_csv("train.csv")

In [7]:
sample_submission = pd.read_csv("sample_submission.csv")

In [8]:
train_df.head()

Unnamed: 0,id,has_cactus
0,0004be2cfeaba1c0361d39e2b000257b.jpg,1
1,000c8a36845c0208e833c79c1bffedd1.jpg,1
2,000d1e9a533f62e55c289303b072733d.jpg,1
3,0011485b40695e9138e92d0b3fb55128.jpg,1
4,0014d7a11e90b62848904c1418fc8cf2.jpg,1


## Fast.AI config

In [9]:
test_img = ImageList.from_df(sample_submission, path=test_folder)

In [10]:
transformations = get_transforms(do_flip=True, 
                                 flip_vert=True, 
                                 max_rotate=10.0, 
                                 max_zoom=1.1, 
                                 max_lighting=0.2, 
                                 max_warp=0.2, 
                                 p_affine=0.75, 
                                 p_lighting=0.75)

In [11]:
train_img = ImageList.from_df(train_df, path=train_folder) \
                .split_by_rand_pct(0.05) \
                .label_from_df() \
                .add_test(test_img) \
                .transform(transformations, size=128) \
                .databunch(path='.', bs=8, device=torch.device('cuda:0')) \
                .normalize(imagenet_stats)

In [12]:
train_img

ImageDataBunch;

Train: LabelList (16625 items)
x: ImageList
Image (3, 128, 128),Image (3, 128, 128),Image (3, 128, 128),Image (3, 128, 128),Image (3, 128, 128)
y: CategoryList
1,1,1,1,0
Path: train;

Valid: LabelList (875 items)
x: ImageList
Image (3, 128, 128),Image (3, 128, 128),Image (3, 128, 128),Image (3, 128, 128),Image (3, 128, 128)
y: CategoryList
1,1,1,0,1
Path: train;

Test: LabelList (4000 items)
x: ImageList
Image (3, 128, 128),Image (3, 128, 128),Image (3, 128, 128),Image (3, 128, 128),Image (3, 128, 128)
y: EmptyLabelList
,,,,
Path: train

In [13]:
learn = cnn_learner(train_img, models.densenet161, metrics=[error_rate, accuracy, 
                                                            Precision(), Recall()])

In [14]:
lr = 3e-02
learn.fit_one_cycle(3, slice(lr))

epoch,train_loss,valid_loss,error_rate,accuracy,precision,recall,time
0,0.535531,0.279947,0.049143,0.950857,1.0,0.931962,17:06
1,0.146282,0.062613,0.012571,0.987429,1.0,0.982595,16:39
2,0.023599,0.013083,0.004571,0.995429,1.0,0.993671,16:38


In [15]:
predictions,_ = learn.get_preds(ds_type=DatasetType.Test)

In [17]:
test_df = sample_submission

In [19]:
test_df.has_cactus = predictions.numpy()[:, 0]

## Save results

In [24]:
test_df.to_csv(f'submission-{now.strftime("%Y-%m-%d_H%HM%M")}.csv', index=False)