In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
! pip install -Uq fastai==2.2.5

In [1]:
from fastai.vision.all import *
path = Path('../input/planets-dataset/planet/planet')

In [2]:
path.ls()

In [3]:
train_df = pd.read_csv(path/'train_classes.csv')
train_df.head()

In [4]:
def get_data(size=224,bs=64,data_df=train_df):
    dblock = DataBlock(blocks=(ImageBlock, MultiCategoryBlock),
                       splitter=RandomSplitter(seed=42),
                       get_x=ColReader(0, pref=f'{path}/train-jpg/', suff='.jpg'),
                       get_y=ColReader(1, label_delim=' '),
                       item_tfms = Resize(size),
                       batch_tfms = [*aug_transforms(flip_vert=True, max_lighting=0.1, max_zoom=1.05, max_warp=0.),
                                     Normalize.from_stats(*imagenet_stats)]
                      )
    return dblock.dataloaders(data_df,bs=bs)

In [5]:
data = get_data(128,256)

In [6]:
data.show_batch(figsize = (12, 9))

In [11]:
mdl = models.resnet50

In [12]:
acc_02 = partial(accuracy_multi, thresh=0.2)
f_score = FBetaMulti(beta = 2, average = 'samples', thresh=0.2)
learn = cnn_learner(data, mdl, metrics=[acc_02, f_score], cbs = [MixUp]).to_fp16()
learn.lr_find()

In [13]:
lr = 0.01
learn.fit_one_cycle(5, slice(lr))

In [14]:
learn.save('stage-1-rsn50')


In [15]:
learn.unfreeze()


In [17]:
learn.lr_find()


In [18]:
learn.fit_one_cycle(5, slice(1e-5, lr/5))

In [19]:
learn.save('stage-2-rsn50')

In [20]:
data = get_data(256, 256)

learn.data = data
data.train_ds[0][0].shape

In [21]:
learn.freeze()

In [22]:
learn.lr_find()

In [23]:
lr=1e-2/2
learn.fit_one_cycle(5, slice(lr))

In [24]:
learn.save('stage-1-256-rn50')

In [25]:
learn.unfreeze()
learn.fit_one_cycle(5, slice(1e-5, lr/5))

In [26]:
learn.save('stage-2-256-rn50')

In [27]:
learn.export(fname='/kaggle/working/export.pkl')

In [65]:
additional_test_path = Path('../input/planets-dataset/test-jpg-additional/test-jpg-additional')
test_path = Path('../input/planets-dataset/planet/planet/test-jpg')
submission_df = pd.read_csv(path/'sample_submission.csv')
testing_path = (submission_df['image_name'] + '.jpg').apply(lambda x: test_path/x if x.startswith('test')  else additional_test_path/x)

def prediction(filename='submission.csv', tta=False):
    tst_dl = learn.dls.test_dl(testing_path)
    if tta:
        predictions = learn.tta(dl = tst_dl)
    else:
        predictions = learn.get_preds(dl = tst_dl)
    predlist = [' '.join(learn.dls.vocab[i]) for i in (predictions[0] > 0.2)]

    df = submission_df
    df['tags'] = predlist

    df.to_csv(filename, index=False)
    return df

In [66]:
prediction('submission_tta.csv', tta=True)

In [59]:
pth = Path('../input/planets-dataset/planet/planet/test-jpg/')
test = get_image_files(pth)
len(test)

In [67]:
pth2 = Path('../input/planets-dataset/test-jpg-additional/test-jpg-additional/')
test1 = get_image_files(pth2)
test_final = test.append(test1)
len(test1)