In [11]:
# importing relevant libraries

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import fbeta_score

In [12]:
from fastai.vision.all import *

In [13]:
#define path for easy reference to input files
path = Path("../input/planets-dataset/planet/planet/")

In [14]:
#import dataset csv
train_df = pd.read_csv(path/'train_classes.csv')
train_df.head()

Visualize the data

In [15]:
counts = {}
splitted_tags = train_df['tags'].map(lambda x: x.split(' '))
for labels in splitted_tags.values:
    for label in labels:
        counts[label] = counts[label] + 1  if label in counts else 0

plt.figure(figsize=(18, 6))
plt.title('Classes')
idxs = range(len(counts.values()))
plt.xticks(idxs, counts.keys(), rotation=-45)
plt.bar(idxs, counts.values());

In [16]:
#define a multicategory block with presizing
def get_data(size=224,bs=64,data_df=train_df):
    dblock = DataBlock(blocks=(ImageBlock, MultiCategoryBlock),
                       splitter=RandomSplitter(seed=42),
                       get_x=ColReader(0, pref=f'{path}/train-jpg/', suff='.jpg'),
                       get_y=ColReader(1, label_delim=' '),
                       item_tfms = Resize(size),
                       batch_tfms = [*aug_transforms(flip_vert=True, max_lighting=0.1, max_zoom=1.05, max_warp=0.),
                                     Normalize.from_stats(*imagenet_stats)]
                      )
    return dblock.dataloaders(data_df,bs=bs) #datalaoder

In [17]:
dls = get_data(128,256)

In [18]:
dls.show_batch()
#nrows=1, ncols=3

Define a fastai leaner and fit a pretrained ResNet Model

In [19]:
metrics = [partial(accuracy_multi, thresh=0.2), FBetaMulti(beta=2, average='samples', thresh=0.2)]
cbs = [MixUp]

In [20]:
learn = cnn_learner(dls, resnet50, metrics=metrics, cbs=cbs).to_fp16()
learn.lr_find()

In [21]:
learn.fine_tune(6, base_lr=2e-2, freeze_epochs=4)

In [23]:
import gc
gc.collect()

Save the ResNet Model to output working directory

In [24]:
learn.save('resnet50-128')

In [34]:
#show results
learn.show_results(nrows=2,ncols=4)

In [36]:
gc.collect()

In [37]:
additional_test_path = Path('../input/planets-dataset/test-jpg-additional/test-jpg-additional')
test_path = Path('../input/planets-dataset/planet/planet/test-jpg')
submission_df = pd.read_csv(path/'sample_submission.csv')
testing_path = (submission_df['image_name'] + '.jpg').apply(lambda x: test_path/x if x.startswith('test') else additional_test_path/x)

In [38]:
#Prediction function
def prediction(filename='submission.csv', tta=False):
    tst_dl = learn.dls.test_dl(testing_path)
    if tta:
        predictions = learn.tta(dl = tst_dl)
    else:
        predictions = learn.get_preds(dl = tst_dl)
    predlist = [' '.join(learn.dls.vocab[i]) for i in (predictions[0] > 0.2)]

    df = submission_df
    df['tags'] = predlist

    df.to_csv(filename, index=False)
    return df

In [39]:
prdn = (prediction('finalsubmission_tta.csv', tta=True))

In [None]:
#click on the "Download file" to save csv prediction to local drive
#<a href="finalsubmission_tta.csv"> Download file </a>

<a href="finalsubmission_tta.csv"> Download file </a>

In [None]:
#alternative-- no need to run
prdn.to_csv('finalsubmission.csv',index=False)

In [43]:
#End of notebook