In [1]:
# importing relevant libraries

import os
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import fbeta_score
from tqdm import tqdm
import cv2
from PIL import Image
from tensorflow import keras
from skimage import io
from sklearn.preprocessing import MultiLabelBinarizer

In [6]:
from fastai.vision.all import *

In [7]:
#define path for easy reference to files
path = Path("../input/planets-dataset/planet/planet/")

In [8]:
#loading csv metadata files
train_df = pd.read_csv(path/'train_classes.csv')
train_df

Visualize the data

In [9]:
def get_data(size=224,bs=64,data_df=train_df):
    dblock = DataBlock(blocks=(ImageBlock, MultiCategoryBlock),
                       splitter=RandomSplitter(seed=42),
                       get_x=ColReader(0, pref=f'{path}/train-jpg/', suff='.jpg'),
                       get_y=ColReader(1, label_delim=' '),
                       item_tfms = Resize(size),
                       batch_tfms = [*aug_transforms(flip_vert=True, max_lighting=0.1, max_zoom=1.05, max_warp=0.),
                                     Normalize.from_stats(*imagenet_stats)]
                      )
    return dblock.dataloaders(data_df,bs=bs)

In [10]:
dls = get_data(128,256)

In [11]:
dls.show_batch()

Train ResNet Model

In [12]:
metrics = [partial(accuracy_multi, thresh=0.2), FBetaMulti(beta=2, average='samples', thresh=0.2)]
cbs = [MixUp]

In [13]:
learn = cnn_learner(dls, resnet50, metrics=metrics, cbs=cbs).to_fp16()
learn.lr_find()

In [14]:
learn.fine_tune(6, base_lr=2e-2, freeze_epochs=4)

In [16]:
import gc
gc.collect()

Save the ResNet Model to working directory

In [17]:
learn.save('resnet50-128')

In [46]:
#show results
learn.show_results()

In [18]:
gc.collect()

In [25]:
additional_test_path = Path('../input/planets-dataset/test-jpg-additional/test-jpg-additional')
test_path = Path('../input/planets-dataset/planet/planet/test-jpg')
submission_df = pd.read_csv(path/'sample_submission.csv')
testing_path = (submission_df['image_name'] + '.jpg').apply(lambda x: test_path/x if x.startswith('test') else additional_test_path/x)

def prediction(filename='submission.csv', tta=False):
    tst_dl = learn.dls.test_dl(testing_path)
    if tta:
        predictions = learn.tta(dl = tst_dl)
    else:
        predictions = learn.get_preds(dl = tst_dl)
    predlist = [' '.join(learn.dls.vocab[i]) for i in (predictions[0] > 0.2)]

    df = submission_df
    df['tags'] = predlist

    df.to_csv(filename, index=False)
    return df

In [38]:
prdn = (prediction('finalsubmission_tta.csv', tta=True))

In [48]:
#markdown method to download notebook
#<a href="finalsubmission_tta.csv"> Download file </a>

<a href="finalsubmission_tta.csv"> Download file </a>

In [42]:
#alternative
prdn.to_csv('finalsubmission.csv',index=False)