# Test Time Augmentation

One of the most obvious problems with our model is that it operates on fixed lengths of audio clips while our dataset contains audio clips of various lengths. We would like to improve our model's performance on long clips by re-running it on different portions of the clip and combining the predictions, though it's not obvious how exactly we should combine them. 

We're taking inspiration from: https://github.com/fastai/fastai/blob/master/fastai/vision/tta.py#L10-L45

In [7]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline


import os
import shutil
import pandas as pd
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
import PIL
import fastai
from fastai.basic_train import _loss_func2activ
from fastai.vision.data import pil2tensor
from fastai.vision import Path, get_preds, ImageList, Image, imagenet_stats, Learner, cnn_learner, get_transforms, DatasetType, models, load_learner, fbeta
import sklearn.metrics
from functools import partial
import torch
import torch.nn as nn
import torch.nn.functional as F

In [8]:
np.random.seed(42)
torch.manual_seed(0)

<torch._C.Generator at 0x7ff275eef170>

In [9]:
NFOLDS = 5
script_name = os.path.basename('01_BasicModel').split('.')[0]
MODEL_NAME = "{0}__folds{1}".format(script_name, NFOLDS)
print("Model: {}".format(MODEL_NAME))

# Make required folders if they're not already present
directories = ['kfolds', 'model_predictions', 'model_source']
for directory in directories:
    if not os.path.exists(directory):
        os.makedirs(directory)

Model: 01_BasicModel__folds5


In [10]:
DATA = Path('data')
WORK = Path('work')

CSV_TRN_MERGED = DATA/'train_merged.csv'
CSV_SUBMISSION = DATA/'sample_submission.csv'

TRN_CURATED = DATA/'train_curated2'
TRN_NOISY = DATA/'train_noisy2'

IMG_TRN_CURATED = WORK/'image/trn_curated2'
IMG_TRN_NOISY = WORK/'image/trn_noisy2'
IMG_TEST = WORK/'image/test'

TEST = DATA/'test'

train = pd.read_csv(DATA/'train_curated.csv')
test = pd.read_csv(DATA/'sample_submission.csv')
train_noisy = pd.read_csv(DATA/'train_noisy.csv')
train_merged = pd.read_csv(DATA/'train_merged.csv')

In [11]:
X = train['fname']
y = train['labels'].apply(lambda f: f.split(','))
y_noisy = train_noisy['labels'].apply(lambda f: f.split(','))
transformed_y = MultiLabelBinarizer().fit_transform(y)
transformed_y_noisy = MultiLabelBinarizer().fit_transform(y_noisy)
filenames = train['fname'].values
filenames = filenames.reshape(-1, 1)

oof_preds = np.zeros((len(train), 80))
test_preds = np.zeros((len(test), 80))

tfms = get_transforms(do_flip=True, max_rotate=0, max_lighting=0.1, max_zoom=0, max_warp=0.)

mskf = MultilabelStratifiedKFold(n_splits=5, random_state=4, shuffle=True)
_, val_index = next(mskf.split(X, transformed_y))

In [12]:
#Our clasifier stuff    
src = (ImageList.from_csv(WORK/'image', Path('../../')/DATA/'train_curated.csv', folder='trn_merged', suffix='.jpg')
    .split_by_idx(val_index)
    #.label_from_df(cols=list(train_merged.columns[1:]))
    .label_from_df(label_delim=',')
      )

data = (src.transform(tfms, size=128).databunch(bs=64).normalize())

In [13]:
f_score = partial(fbeta, thresh=0.2)
learn = cnn_learner(data, models.xresnet101, pretrained=False, metrics=[f_score]).mixup(stack_y=False)
learn.fit_one_cycle(10, 1e-2)

epoch,train_loss,valid_loss,fbeta,time
0,0.637949,0.246274,0.080768,00:18
1,0.212277,0.068762,0.005994,00:15
2,0.109087,0.071224,0.060612,00:15
3,0.07957,0.061587,0.086942,00:15
4,0.068955,0.056947,0.153332,00:15
5,0.063663,0.052556,0.168737,00:15
6,0.059888,0.046909,0.275146,00:15
7,0.05671,0.043019,0.352545,00:15
8,0.053919,0.040337,0.384566,00:15
9,0.051809,0.03924,0.422933,00:15


In [16]:
#Overrides fastai's default 'open_image' method to crop based on our crop counter
def setupNewCrop(counter):
    
    def open_fat2019_image(fn, convert_mode, after_open)->Image:
        
        print(convert)
        x = PIL.Image.open(fn).convert(convert_mode)

        # crop (128x321 for a 5 second long audio clip)
        time_dim, base_dim = x.size

        #How many crops can we take?
        maxCrops = int(np.ceil(base_dim / time_dim))
        
        #What's the furthest point at which we can take a crop without running out of pixels
        lastValidCrop = time_dim - base_dim

        crop_x = (counter % maxCrops) * time_dim 

        # We don't want to crop any further than the last 128 pixels
        crop_x = min(crop_x, lastValidCrop)

        x1 = x.crop([crop_x, 0, crop_x+base_dim, base_dim])    
        
        newImage = np.stack([x1,x1,x1], axis=-1)

        print(newImage.shape)
        # standardize    
        return Image(pil2tensor(newImage, np.float32).div_(255))

    fastai.vision.data.open_image = open_fat2019_image

In [17]:
def custom_tta(learn:Learner, ds_type:DatasetType=DatasetType.Valid):
    dl = learn.dl(ds_type)
    ds = dl.dataset

    old_open_image = fastai.vision.data.open_image
    try:
        maxNumberOfCrops = 25
        for i in range(maxNumberOfCrops):
            print("starting")
            setupNewCrop(i)
            yield get_preds(learn.model, dl, activ=_loss_func2activ(learn.loss_func))[0]
    finally:
            fastai.vision.data.open_image = old_open_image
        
all_preds = list(custom_tta(learn))
avg_preds = torch.stack(all_preds).mean(0)

(128, 128, 3, 3)
(128, 128, 3, 3)
(128, 128, 3, 3)
(128, 128, 3, 3)
(128, 128, 3, 3)


ValueError: Traceback (most recent call last):
  File "/home/josh/.local/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 138, in _worker_loop
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/home/josh/.local/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 138, in <listcomp>
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/home/josh/anaconda3/envs/ml/lib/python3.6/site-packages/fastai/data_block.py", line 633, in __getitem__
    if self.item is None: x,y = self.x[idxs],self.y[idxs]
  File "/home/josh/anaconda3/envs/ml/lib/python3.6/site-packages/fastai/data_block.py", line 109, in __getitem__
    if isinstance(idxs, Integral): return self.get(idxs)
  File "/home/josh/anaconda3/envs/ml/lib/python3.6/site-packages/fastai/vision/data.py", line 271, in get
    res = self.open(fn)
  File "/home/josh/anaconda3/envs/ml/lib/python3.6/site-packages/fastai/vision/data.py", line 267, in open
    return open_image(fn, convert_mode=self.convert_mode, after_open=self.after_open)
  File "<ipython-input-16-0172a453ad02>", line 28, in open_fat2019_image
    return Image(pil2tensor(newImage, np.float32).div_(255))
  File "/home/josh/anaconda3/envs/ml/lib/python3.6/site-packages/fastai/vision/image.py", line 18, in pil2tensor
    a = np.transpose(a, (1, 0, 2))
  File "/home/josh/.local/lib/python3.6/site-packages/numpy/core/fromnumeric.py", line 639, in transpose
    return _wrapfunc(a, 'transpose', axes)
  File "/home/josh/.local/lib/python3.6/site-packages/numpy/core/fromnumeric.py", line 56, in _wrapfunc
    return getattr(obj, method)(*args, **kwds)
ValueError: axes don't match array


(128, 128, 3, 3)
(128, 128, 3, 3)


In [None]:
len(all_preds)

In [None]:
all_preds[0].shape