In [None]:
!pip install torch torchvision
!pip install fastai

In [None]:
# (OPTIONAL) To download and extract the weights of Pretrained models such as Resnet.
!wget --header="Host: files.fast.ai" --header="User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36" --header="Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8" --header="Accept-Language: en-US,en;q=0.9" --header="Cookie: _ga=GA1.2.755364775.1526348522; _gid=GA1.2.1192476799.1526616713" --header="Connection: keep-alive" "http://files.fast.ai/models/weights.tgz" -O "weights.tgz" -c

# Change the path to extract the weights.tgz to the path where your fastai package is installed.
!tar -xvzf weights.tgz -C /path/to/anaconda3/envs/fastai/lib/python3.6/site-packages/fastai/

In [None]:
# Download the dataset from kaggle "dog breed identification" competition
!kg download -u username -p password -c dog-breed-identification

In [None]:
#Unzip the dataset in data/ directory
!mkdir data 
!unzip test.zip -d data/
!unzip train.zip -d data/
!unzip labels.csv.zip -d data/

In [None]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
# This file contains all the main external libs we may use
from fastai.imports import *

In [None]:
from fastai.torch_imports import *
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *
from sklearn import metrics

In [None]:
PATH = "data/"
sz=224
arch=resnext101_64
bs=58

In [None]:
# Create the validation set indexes from 20% of training data
label_csv = f'{PATH}labels.csv'
n = len(list(open(label_csv)))-1
val_idxs = get_cv_idxs(n)

In [None]:
n

In [None]:
len(val_idxs)

In [None]:
label_df.head()

In [None]:
# specify transformation model based on the architecture used, size of each image, transformations you want to apply
tfms = tfms_from_model(arch, sz, aug_tfms=transforms_side_on, max_zoom=1.1)

In [None]:
data = ImageClassifierData.from_csv(PATH, 'train', f'{PATH}labels.csv', test_name='test',
                                   val_idxs=val_idxs, suffix='.jpg', tfms=tfms, bs=bs)

In [None]:
fn = PATH+data.trn_ds.fnames[0];
fn

In [None]:
'data/train/001513dfcb2ffafc82cccf4d8bbaba97.jpg'

In [None]:
img = PIL.Image.open(fn);
img

In [None]:
size_d = {k: PIL.Image.open(PATH+k).size for k in data.trn_ds.fnames}

In [None]:
row_sz,col_sz = list (zip(*size_d.values()))

In [None]:
row_sz=np.array(row_sz); col_sz=np.array(col_sz)

In [None]:
row_sz[:5]

In [None]:
array([500, 500, 500, 500, 500])


In [None]:

plt.hist(row_sz);

In [None]:
plt.hist(row_sz[row_sz<1000])

In [None]:
def get_data(sz,bs):
    tfms = tfms_from_model(arch, sz, aug_tfms=transforms_side_on, max_zoom=1.1)
    data = ImageClassifierData.from_csv(PATH, 'train', f'{PATH}labels.csv', test_name='test', num_workers=4,
                                        val_idxs=val_idxs, suffix='.jpg', tfms=tfms, bs=bs)
    
    # return data generator if require size is > 300, otherwise resize and save images in a tmp/ directory of size 340 for further use
    return data if sz>300 else data.resize(340, 'tmp')

In [None]:
data = get_data(sz,bs)

In [None]:
learn = ConvLearner.pretrained(arch, data, precompute=True, ps=0.5)

In [None]:
HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))
 84%|████████▎ | 118/141 [00:02<00:00, 50.59it/s, loss=15.4]

In [None]:
learn.sched.plot_lr()

In [None]:
learn.sched.plot()

In [None]:
data = get_data(sz,bs)
HBox(children=(IntProgress(value=0, max=6), HTML(value='')))

In [None]:
learn = ConvLearner.pretrained(arch, data, precompute=True, ps=0.5)

In [None]:
learn.fit(1e-2, 2)

In [None]:
learn.precompute=False

In [None]:
learn.fit(1e-2, 5, cycle_len=1)

In [None]:
learn.save('224_pre')

In [None]:
learn.load('224_pre')

In [None]:
learn.set_data(get_data(299,bs))
learn.freeze()

In [None]:
learn.fit(1e-2, 3, cycle_len=1)

In [None]:
learn.fit(1e-2, 3, cycle_len=1, cycle_mult=2)

In [None]:
log_preds,y = learn.TTA()
probs = np.mean(np.exp(log_preds),0)
accuracy_np(probs, y), metrics.log_loss(y, probs)

In [None]:
learn.save('299_pre')

In [None]:
learn.load('299_pre')

In [None]:
learn.fit(1e-2, 1, cycle_len=1)

In [None]:
log_preds,y = learn.TTA()
probs = np.mean(np.exp(log_preds),0)
accuracy_np(probs, y), metrics.log_loss(y, probs)

In [None]:
log_preds_test, y = learn.TTA(is_test=True)

In [None]:
test_probs = np.mean(np.exp(log_preds_test), axis=0)

In [None]:
test_probs.shape

In [None]:
submission_df = pd.DataFrame(test_probs)
submission_df.columns = data.classes

In [None]:
data.test_ds.fnames[0]

In [None]:
submission_df.head()

In [None]:
SUBM = f'{PATH}subm/'
os.makedirs(SUBM, exist_ok=True)
submission_df.to_csv(f'{SUBM}subm.gz', compression='gzip', index=False)

In [None]:
FileLink(f'{SUBM}subm.gz')

In [None]:
# Use your kaggle username and password to send the results on your kaggle competition and get the score.
!kg submit {SUBM}subm.gz -u username -p password -c dog-breed-identification -m "Submission on Test.zip"

In [None]:
submission_df.insert(0, 'id', [f[5:-4] for  f in data.test_ds.fnames])