In [None]:
import pandas as pd
from pathlib import Path

In [None]:
from fastai.vision.all import *


In [None]:
labels = pd.read_csv("../input/dog-breed-identification/labels.csv")
labels


In [None]:
labels["breed"].value_counts().plot(kind="hist");


In [None]:
from sklearn.model_selection import StratifiedShuffleSplit

split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_ids, valid_ids = next(split.split(labels, labels["breed"]))
labels["is_valid"] = [i in valid_ids for i in range(len(labels))]

labels["id"] = labels["id"].apply(lambda x: x + ".jpg")


In [None]:
path = "../input/dog-breed-identification/train"

def get_dls(size, bs):
    return ImageDataLoaders.from_df(labels, path,
                               item_tfms=Resize(460, method="squeeze"),
                               batch_tfms=[*aug_transforms(size=size),
                                           Normalize.from_stats(*imagenet_stats)],
                               bs=bs, val_bs=bs, valid_col="is_valid")

dls = get_dls(400, 32)


In [None]:
dls.show_batch()


In [None]:
label_count = labels["breed"].value_counts()
n_samples = labels.shape[0]
n_classes = len(dls.vocab)
weights = [n_samples / (n_classes * label_count[label]) for label in dls.vocab]
weights = tensor(weights, device="cuda")


In [None]:
learn = cnn_learner(dls, resnet101, loss_func=nn.CrossEntropyLoss(weight=weights),
                    metrics=[accuracy, F.cross_entropy], path=".").to_fp16()


In [None]:
learn.lr_find()


In [None]:
learn.fit_one_cycle(3, 3e-3)


In [None]:
learn.unfreeze()
learn.lr_find()


In [None]:
learn.fit_one_cycle(3, slice(1e-6, 1e-4))


In [None]:
test_files = get_image_files("../input/dog-breed-identification/test")
test_dl = dls.test_dl(test_files)


In [None]:
preds, targs = learn.tta(dl=test_dl)


In [None]:
preds = torch.softmax(preds, dim=1)
sub = pd.DataFrame({"id":test_files.map(lambda x:x.stem)})
sub[list(dls.vocab)] = preds
sub.to_csv("submission.csv", index=False)


In [None]:
sub
