In [1]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import fastai
from fastai import * 
from fastai.vision import *
from fastai.metrics import accuracy
from fastai.basic_data import *

import pandas as pd
import numpy as np
import os

fastai.version.__version__

'1.0.36.post1'

In [3]:
# make sure CUDA is available and enabled
print('CUDA enabled:',torch.cuda.is_available()) 
print('CUDNN enabled:', torch.backends.cudnn.enabled)

CUDA enabled: True
CUDNN enabled: True


In [4]:
def recreate_directory(directory):
    !rm -R {directory} 2>nul
    !mkdir {directory}

# Dataset preprocessing

In [5]:
input_path ='/kaggle/input'
work_dir = '/kaggle/working'
train_dir = f"{input_path}/train"
train_labels = f"{input_path}/train.csv"
test_dir = f"{input_path}/test"
workdir_train_labels = f"{work_dir}/train.csv"
workdir_train = f"{work_dir}/train"

In [6]:
labels_df = pd.read_csv(train_labels)
print(labels_df.shape)
labels_df = labels_df.sample(frac=0.02)
print(labels_df.shape)

(25361, 2)
(507, 2)


In [7]:
labels_df.to_csv(workdir_train_labels, index=False)

In [8]:
recreate_directory(workdir_train)
for img in labels_df['Image']:
    !cp {train_dir}/{img} {workdir_train}/{img}

## Train model

In [33]:
SZ = 224
BS = 64
NUM_WORKERS = 0
SEED=0
arch = models.resnet18

In [34]:
# FOR DEVELOPMENT ONLY
# data = (
#     ImageDataBunch
#         .from_csv(path=workdir_train, csv_labels=workdir_train_labels, ds_tfms=get_transforms(), 
#                   size=SZ,num_workers=NUM_WORKERS, test=test_dir)
# )

data = (
    ImageDataBunch
        .from_csv(path=train_dir, csv_labels=train_labels, ds_tfms=get_transforms(), 
                  size=SZ,num_workers=NUM_WORKERS, test=test_dir)
)

In [35]:
# data.show_batch(rows=3, fig_size=(SZ, SZ))

# Learning rate

In [None]:
# learn = create_cnn(data, arch, metrics=accuracy, model_dir=f"{work_dir}/model")

In [None]:
# learn.lr_find()

In [None]:
# learn.recorder.plot()

# Precompute

In [36]:
lr_rate = 1e-2
learn = create_cnn(data, arch, metrics=accuracy, model_dir=f"{work_dir}/model2")
learn.freeze_to(-1)

In [37]:
learn.fit(1, lr_rate)

epoch,train_loss,valid_loss,accuracy
1,5.959245,4.738280,0.484424


In [38]:
# learn few epochs with unfreeze
learn.unfreeze()

In [39]:
learn.fit(3, [lr_rate/100, lr_rate/10, lr_rate])

epoch,train_loss,valid_loss,accuracy
1,5.915067,5.123445,0.477721


KeyboardInterrupt: 

# Prediction & Summition

In [None]:
classes = learn.data.classes + ["new_whale"]
print(len(classes))

In [None]:
log_preds,y = learn.TTA()

In [None]:
log_preds, y = learn.get_preds(DatasetType.Test)

In [None]:
preds = torch.cat((log_preds, torch.ones_like(log_preds[:, :1])), 1)

In [None]:
submittion_df = pd.DataFrame(columns=["Image", "Id"])

In [None]:
for idx, val in enumerate(os.listdir(test_dir)):
    class_ids = preds[idx].argsort()[-5:]
    class_1 = classes[class_ids[0]]
    class_2 = classes[class_ids[1]]
    class_3 = classes[class_ids[2]]
    class_4 = classes[class_ids[3]]
    class_5 = classes[class_ids[4]]
    prediction_row = f'{class_1} {class_2} {class_3} {class_4} {class_5}'
    submittion_df = submittion_df.append({'Image' : val.split(".")[0], 'Id': prediction_row}, ignore_index=True)

In [None]:
print(submittion_df.shape)
submittion_df.head()

In [None]:
submittion_df.to_csv('submission2.csv', index=False)

# Submission - for now random submission

In [None]:
train_set = pd.read_csv(f"{input_path}/train.csv")
train_set.head()

In [None]:
unique_labels = train_set['Id'].unique()
unique_labels.shape

In [None]:
submission = pd.read_csv(f"{input_path}/sample_submission.csv")
submission.head()

In [None]:
print(' '.join(np.append(np.random.choice(unique_labels, 4), 'new_whale')))


In [None]:
submission['Id'] = submission['Id'].apply(lambda x : ' '.join(np.append(np.random.choice(unique_labels, 4), 'new_whale')))

In [None]:
print(submission.head())
print(submission.shape)

In [None]:
submission.to_csv('submission.csv', index=False)