<h2>Including all required additional tools</h2>

In [1]:
from fastai.callbacks import *

class MixUpCallback(LearnerCallback):
    "Callback that creates the mixed-up input and target."
    def __init__(self, learn:Learner, alpha:float=0.4, stack_x:bool=False, stack_y:bool=True):
        super().__init__(learn)
        self.alpha,self.stack_x,self.stack_y = alpha,stack_x,stack_y
    
    def on_train_begin(self, **kwargs):
        if self.stack_y: self.learn.loss_func = MixUpLoss(self.learn.loss_func)
        
    def on_batch_begin(self, last_input, last_target, train, **kwargs):
        "Applies mixup to `last_input` and `last_target` if `train`."
        if not train: return
        lambd = np.random.beta(self.alpha, self.alpha, last_target.size(0))
        lambd = np.concatenate([lambd[:,None], 1-lambd[:,None]], 1).max(1)
        lambd = last_input.new(lambd)
        shuffle = torch.randperm(last_target.size(0)).to(last_input.device)
        x1, y1 = last_input[shuffle], last_target[shuffle]
        if self.stack_x:
            new_input = [last_input, last_input[shuffle], lambd]
        else: 
            new_input = (last_input * lambd.view(lambd.size(0),1,1,1) + x1 * (1-lambd).view(lambd.size(0),1,1,1))
        if self.stack_y:
            new_target = torch.cat([last_target[:,None].float(), y1[:,None].float(), lambd[:,None].float()], 1)
        else:
            if len(last_target.shape) == 2:
                lambd = lambd.unsqueeze(1).float()
            new_target = last_target.float() * lambd + y1.float() * (1-lambd)
        return {'last_input': new_input, 'last_target': new_target}  
    
    def on_train_end(self, **kwargs):
        if self.stack_y: self.learn.loss_func = self.learn.loss_func.get_old()
        

class MixUpLoss(nn.Module):
    "Adapt the loss function `crit` to go with mixup."
    
    def __init__(self, crit, reduction='mean'):
        super().__init__()
        if hasattr(crit, 'reduction'): 
            self.crit = crit
            self.old_red = crit.reduction
            setattr(self.crit, 'reduction', 'none')
        else: 
            self.crit = partial(crit, reduction='none')
            self.old_crit = crit
        self.reduction = reduction
        
    def forward(self, output, target):
        if len(target.size()) == 2:
            loss1, loss2 = self.crit(output,target[:,0].long()), self.crit(output,target[:,1].long())
            d = (loss1 * target[:,2] + loss2 * (1-target[:,2])).mean()
        else:  d = self.crit(output, target)
        if self.reduction == 'mean': return d.mean()
        elif self.reduction == 'sum':            return d.sum()
        return d
    
    def get_old(self):
        if hasattr(self, 'old_crit'):  return self.old_crit
        elif hasattr(self, 'old_red'): 
            setattr(self.crit, 'reduction', self.old_red)
            return self.crit

def mixup(learn:Learner, alpha:float=0.4, stack_x:bool=False, stack_y:bool=True) -> Learner:
    "Add mixup https://arxiv.org/abs/1710.09412 to `learn`."
    learn.callback_fns.append(partial(MixUpCallback, alpha=alpha, stack_x=stack_x, stack_y=stack_y))
    return learn
Learner.mixup = mixup

In [2]:
def _one_sample_positive_class_precisions(scores, truth):
    """Calculate precisions for each true class for a single sample.

    Args:
      scores: np.array of (num_classes,) giving the individual classifier scores.
      truth: np.array of (num_classes,) bools indicating which classes are true.

    Returns:
      pos_class_indices: np.array of indices of the true classes for this sample.
      pos_class_precisions: np.array of precisions corresponding to each of those
        classes.
    """
    num_classes = scores.shape[0]
    pos_class_indices = np.flatnonzero(truth > 0)
    # Only calculate precisions if there are some true classes.
    if not len(pos_class_indices):
        return pos_class_indices, np.zeros(0)
    # Retrieval list of classes for this sample.
    retrieved_classes = np.argsort(scores)[::-1]
    # class_rankings[top_scoring_class_index] == 0 etc.
    class_rankings = np.zeros(num_classes, dtype=np.int)
    class_rankings[retrieved_classes] = range(num_classes)
    # Which of these is a true label?
    retrieved_class_true = np.zeros(num_classes, dtype=np.bool)
    retrieved_class_true[class_rankings[pos_class_indices]] = True
    # Num hits for every truncated retrieval list.
    retrieved_cumulative_hits = np.cumsum(retrieved_class_true)
    # Precision of retrieval list truncated at each hit, in order of pos_labels.
    precision_at_hits = (
            retrieved_cumulative_hits[class_rankings[pos_class_indices]] /
            (1 + class_rankings[pos_class_indices].astype(np.float)))
    return pos_class_indices, precision_at_hits


def calculate_per_class_lwlrap(truth, scores):
    """Calculate label-weighted label-ranking average precision.

    Arguments:
      truth: np.array of (num_samples, num_classes) giving boolean ground-truth
        of presence of that class in that sample.
      scores: np.array of (num_samples, num_classes) giving the classifier-under-
        test's real-valued score for each class for each sample.

    Returns:
      per_class_lwlrap: np.array of (num_classes,) giving the lwlrap for each
        class.
      weight_per_class: np.array of (num_classes,) giving the prior of each
        class within the truth labels.  Then the overall unbalanced lwlrap is
        simply np.sum(per_class_lwlrap * weight_per_class)
    """
    assert truth.shape == scores.shape
    num_samples, num_classes = scores.shape
    # Space to store a distinct precision value for each class on each sample.
    # Only the classes that are true for each sample will be filled in.
    precisions_for_samples_by_classes = np.zeros((num_samples, num_classes))
    for sample_num in range(num_samples):
        pos_class_indices, precision_at_hits = (
            _one_sample_positive_class_precisions(scores[sample_num, :],
                                                  truth[sample_num, :]))
        precisions_for_samples_by_classes[sample_num, pos_class_indices] = (
            precision_at_hits)
    labels_per_class = np.sum(truth > 0, axis=0)
    weight_per_class = labels_per_class / float(np.sum(labels_per_class))
    # Form average of each column, i.e. all the precisions assigned to labels in
    # a particular class.
    per_class_lwlrap = (np.sum(precisions_for_samples_by_classes, axis=0) /
                        np.maximum(1, labels_per_class))
    # overall_lwlrap = simple average of all the actual per-class, per-sample precisions
    #                = np.sum(precisions_for_samples_by_classes) / np.sum(precisions_for_samples_by_classes > 0)
    #           also = weighted mean of per-class lwlraps, weighted by class label prior across samples
    #                = np.sum(per_class_lwlrap * weight_per_class)
    return per_class_lwlrap, weight_per_class

In [3]:
def lwlrap(y_pred,y_true):
    score, weight = calculate_per_class_lwlrap(y_true.cpu().numpy(), y_pred.cpu().numpy())
    lwlrap = (score * weight).sum()
    return torch.from_numpy(np.array(lwlrap))

In [4]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 3, 1, 1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(out_channels, out_channels, 3, 1, 1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
        )

        self._init_weights()
        
    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.zeros_(m.bias)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = F.avg_pool2d(x, 2)
        return x
    
class Classifier(nn.Module):
    def __init__(self, num_classes=1000): # <======== modificaition to comply fast.ai
        super().__init__()
        
        self.conv = nn.Sequential(
            ConvBlock(in_channels=3, out_channels=64),
            ConvBlock(in_channels=64, out_channels=128),
            ConvBlock(in_channels=128, out_channels=256),
            ConvBlock(in_channels=256, out_channels=512),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # <======== modificaition to comply fast.ai
        self.fc = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(512, 128),
            nn.PReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.1),
            nn.Linear(128, num_classes),
        )

    def forward(self, x):
        x = self.conv(x)
        #x = torch.mean(x, dim=3)   # <======== modificaition to comply fast.ai
        #x, _ = torch.max(x, dim=2) # <======== modificaition to comply fast.ai
        x = self.avgpool(x)         # <======== modificaition to comply fast.ai
        x = self.fc(x)
        return x

<h2>Importing dependencies and loading files</h2>

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from fastai.vision import *
from fastai.callbacks import *
import random

In [6]:
DATA = Path('freesound_audio_tagging')
PREPROCESSED = Path('fat2019')
WORK = Path('work')

CSV_TRN_CURATED = DATA/'train_curated.csv'
CSV_TRN_NOISY = DATA/'train_noisy.csv'
CSV_TRN_NOISY_BEST50S = PREPROCESSED/'trn_noisy_best50s.csv'
CSV_SUBMISSION = DATA/'sample_submission.csv'

MELS_TRN_CURATED = PREPROCESSED/'mels_train_curated.pkl'
MELS_TRN_NOISY = PREPROCESSED/'mels_train_noisy.pkl'
MELS_TRN_NOISY_BEST50S = PREPROCESSED/'mels_trn_noisy_best50s.pkl'
MELS_TEST = PREPROCESSED/'mels_test.pkl'

trn_curated_df = pd.read_csv(CSV_TRN_CURATED)
trn_noisy_df = pd.read_csv(CSV_TRN_NOISY)
trn_noisy50s_df = pd.read_csv(CSV_TRN_NOISY_BEST50S)
test_df = pd.read_csv(CSV_SUBMISSION)

#X_train_curated = pickle.load(open(MELS_TRN_CURATED, 'rb'))

In [None]:
CUR_X_FILES, CUR_X = list(trn_curated_df.fname.values), X_train_curated

def open_fat2019_image(fn, convert_mode, after_open)->Image:
    # open
    idx = CUR_X_FILES.index(fn.split('/')[-1])
    x = PIL.Image.fromarray(CUR_X[idx])
    # crop 1sec
    time_dim, base_dim = x.size
    crop_x = random.randint(0, time_dim - base_dim)
    x = x.crop([crop_x, 0, crop_x+base_dim, base_dim])    
    # standardize
    return Image(pil2tensor(x, np.float32).div_(255))

vision.data.open_image = open_fat2019_image

In [7]:
SEED = 2000
BS = 128
SIZE = 128

In [None]:
tfms = get_transforms(do_flip=True, max_rotate=0, max_lighting=0.1, max_zoom=0, max_warp=0.)

src = (ImageList.from_csv(WORK, Path('..')/CSV_TRN_CURATED, folder='trn_curated')
       .split_by_rand_pct(0.2, seed=SEED)
       .label_from_df(label_delim=',')
      )

data = (src.transform(tfms, size=SIZE)
        .databunch(bs=BS)
        .normalize(imagenet_stats)
       )

In [None]:
data.show_batch(3)

<h2>Creating a pre-trained model</h2>

In [8]:
def borrowed_model(pretrained=False, **kwargs):
    return Classifier(**kwargs)

In [None]:
learn = cnn_learner(data, borrowed_model, pretrained=False, metrics=[lwlrap]).mixup(stack_y=False)
learn.unfreeze()

In [None]:
learn.lr_find()
learn.recorder.plot(suggestion=True)

In [None]:
cb1 = SaveModelCallback(learn, every='improvement', monitor='lwlrap', name='pretrained_model')

In [None]:
# Best score for previous architecture: 0.576

learn.fit_one_cycle(250, 3e-2, callbacks=cb1)

In [None]:
learn.recorder.plot_losses()

<h2>Final model</h2>

In [9]:
#del X_train_curated
X_train = pickle.load(open(MELS_TRN_CURATED, 'rb'))
X_train.extend(pickle.load(open(MELS_TRN_NOISY_BEST50S, 'rb')))
trn_full_df = pd.concat([trn_curated_df, trn_noisy50s_df], sort=True, ignore_index=True)

In [10]:
CUR_X_FILES, CUR_X = list(trn_full_df.fname.values), X_train

def open_fat2019_image(fn, convert_mode, after_open)->Image:
    # open
    idx = CUR_X_FILES.index(fn.split('/')[-1])
    x = PIL.Image.fromarray(CUR_X[idx])
    # crop 1sec
    time_dim, base_dim = x.size
    crop_x = random.randint(0, time_dim - base_dim)
    x = x.crop([crop_x, 0, crop_x+base_dim, base_dim])    
    # standardize
    return Image(pil2tensor(x, np.float32).div_(255))

vision.data.open_image = open_fat2019_image

In [11]:
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)

In [12]:
tfms = get_transforms(do_flip=True, max_rotate=0, max_lighting=0.1, max_zoom=0, max_warp=0.)

c = 1

for train_index, val_index in skf.split(trn_full_df.index, trn_full_df['labels']):

    src = (ImageList.from_df(trn_full_df, WORK, folder='')
           .split_by_idxs(train_index, val_index)
           .label_from_df(label_delim=',')
          )

    data = (src.transform(tfms, size=SIZE)
            .databunch(bs=BS)
            .normalize(imagenet_stats)
           )
    
    learn = cnn_learner(data, borrowed_model, pretrained=False, metrics=[lwlrap]).mixup(stack_y=False)
    learn.load("pretrained_model")
    learn.unfreeze()
        
    name = "cv_model_n" + str(c)
    cb = SaveModelCallback(learn, every='improvement', monitor='lwlrap', name=name)
    
    learn.fit_one_cycle(200, 1e-3, callbacks=cb)
    c += 1

  "type " + obj.__name__ + ". It won't be checked "


epoch,train_loss,valid_loss,lwlrap,time
0,0.04874,0.04211,0.573069,00:30
1,0.047705,0.041561,0.573581,00:24
2,0.047404,0.041349,0.575046,00:24
3,0.047048,0.040008,0.57622,00:24
4,0.046555,0.040231,0.572073,00:24
5,0.046154,0.039605,0.577454,00:24
6,0.045868,0.038906,0.579047,00:24
7,0.045403,0.038206,0.580933,00:24
8,0.04528,0.037982,0.583425,00:24
9,0.044498,0.037171,0.582865,00:24


Better model found at epoch 0 with lwlrap value: 0.573068750013154.
Better model found at epoch 1 with lwlrap value: 0.573580548120846.
Better model found at epoch 2 with lwlrap value: 0.5750458629136412.
Better model found at epoch 3 with lwlrap value: 0.5762198141128144.
Better model found at epoch 5 with lwlrap value: 0.5774543902419572.
Better model found at epoch 6 with lwlrap value: 0.5790469201238866.
Better model found at epoch 7 with lwlrap value: 0.5809331806096503.
Better model found at epoch 8 with lwlrap value: 0.5834246693960422.
Better model found at epoch 12 with lwlrap value: 0.5850965497889836.
Better model found at epoch 13 with lwlrap value: 0.5921855733980829.
Better model found at epoch 16 with lwlrap value: 0.5954963071805117.
Better model found at epoch 17 with lwlrap value: 0.5981779788704097.
Better model found at epoch 18 with lwlrap value: 0.6001589507421292.
Better model found at epoch 19 with lwlrap value: 0.6093288598707691.
Better model found at epoch 24

epoch,train_loss,valid_loss,lwlrap,time
0,0.047801,0.041392,0.575354,00:26
1,0.047876,0.041242,0.578343,00:24
2,0.047501,0.040536,0.574584,00:24
3,0.04758,0.040201,0.575588,00:24
4,0.046788,0.039498,0.575221,00:24
5,0.046421,0.039082,0.579738,00:24
6,0.045959,0.038678,0.58026,00:24
7,0.046027,0.037734,0.584502,00:24
8,0.045029,0.037209,0.58632,00:24
9,0.044783,0.037297,0.580778,00:24


Better model found at epoch 0 with lwlrap value: 0.5753538038251634.
Better model found at epoch 1 with lwlrap value: 0.5783426180333237.
Better model found at epoch 5 with lwlrap value: 0.5797383866064281.
Better model found at epoch 6 with lwlrap value: 0.5802595574032255.
Better model found at epoch 7 with lwlrap value: 0.584502201151655.
Better model found at epoch 8 with lwlrap value: 0.5863196343436412.
Better model found at epoch 10 with lwlrap value: 0.589734576738638.
Better model found at epoch 11 with lwlrap value: 0.5925197987765838.
Better model found at epoch 14 with lwlrap value: 0.5954123636863898.
Better model found at epoch 15 with lwlrap value: 0.598870488047923.
Better model found at epoch 17 with lwlrap value: 0.6054946608234961.
Better model found at epoch 18 with lwlrap value: 0.6111185828630105.
Better model found at epoch 24 with lwlrap value: 0.6219378321558775.
Better model found at epoch 25 with lwlrap value: 0.625847362505211.
Better model found at epoch 29

epoch,train_loss,valid_loss,lwlrap,time
0,0.05142,0.028061,0.728524,00:25
1,0.050981,0.027104,0.726558,00:24
2,0.050533,0.027371,0.725656,00:24
3,0.050267,0.026514,0.730837,00:24
4,0.049381,0.026434,0.731141,00:24
5,0.048807,0.026092,0.730847,00:24
6,0.048491,0.025853,0.730798,00:24
7,0.04795,0.025271,0.731646,00:24
8,0.047377,0.025161,0.733425,00:24
9,0.047064,0.025125,0.734626,00:24


Better model found at epoch 0 with lwlrap value: 0.7285244470666143.
Better model found at epoch 3 with lwlrap value: 0.7308368132645481.
Better model found at epoch 4 with lwlrap value: 0.7311411164145415.
Better model found at epoch 7 with lwlrap value: 0.7316463752978.
Better model found at epoch 8 with lwlrap value: 0.7334247790372868.
Better model found at epoch 9 with lwlrap value: 0.7346262986339098.
Better model found at epoch 10 with lwlrap value: 0.7358470985322113.
Better model found at epoch 11 with lwlrap value: 0.7416268957619537.
Better model found at epoch 14 with lwlrap value: 0.7451889365373735.
Better model found at epoch 18 with lwlrap value: 0.7481577331744904.
Better model found at epoch 19 with lwlrap value: 0.7508484319206771.
Better model found at epoch 23 with lwlrap value: 0.7551539739086145.
Better model found at epoch 24 with lwlrap value: 0.7573656754024897.
Better model found at epoch 29 with lwlrap value: 0.7608642429505796.
Better model found at epoch 3

epoch,train_loss,valid_loss,lwlrap,time
0,0.050148,0.032747,0.675526,00:26
1,0.049557,0.031961,0.678701,00:24
2,0.049131,0.032543,0.671424,00:24
3,0.048933,0.030931,0.680692,00:24
4,0.048466,0.031179,0.674663,00:24
5,0.048175,0.031,0.674606,00:24
6,0.047984,0.03043,0.679782,00:24
7,0.047505,0.030109,0.681126,00:24
8,0.046991,0.029653,0.678479,00:24
9,0.046279,0.02924,0.682449,00:24


Better model found at epoch 0 with lwlrap value: 0.6755261612476733.
Better model found at epoch 1 with lwlrap value: 0.6787012497033409.
Better model found at epoch 3 with lwlrap value: 0.6806916175209015.
Better model found at epoch 7 with lwlrap value: 0.6811257536547691.
Better model found at epoch 9 with lwlrap value: 0.68244897386822.
Better model found at epoch 16 with lwlrap value: 0.6974830433004465.
Better model found at epoch 19 with lwlrap value: 0.7010197503262179.
Better model found at epoch 20 with lwlrap value: 0.7049340860641865.
Better model found at epoch 21 with lwlrap value: 0.7057307839301472.
Better model found at epoch 26 with lwlrap value: 0.7078218688032668.
Better model found at epoch 28 with lwlrap value: 0.7080472375117463.
Better model found at epoch 29 with lwlrap value: 0.7090660742187124.
Better model found at epoch 30 with lwlrap value: 0.720284380089782.
Better model found at epoch 33 with lwlrap value: 0.722069855318555.
Better model found at epoch 3

epoch,train_loss,valid_loss,lwlrap,time
0,0.049099,0.037885,0.629917,00:25
1,0.048509,0.0372,0.632541,00:24
2,0.048224,0.037062,0.634115,00:24
3,0.047437,0.036517,0.632758,00:24
4,0.04708,0.035779,0.635781,00:24
5,0.046887,0.035813,0.633087,00:24
6,0.046554,0.034589,0.63703,00:24
7,0.046076,0.034509,0.638067,00:24
8,0.045762,0.033932,0.636394,00:24
9,0.045127,0.033637,0.64289,00:24


Better model found at epoch 0 with lwlrap value: 0.6299171773093845.
Better model found at epoch 1 with lwlrap value: 0.6325411202716879.
Better model found at epoch 2 with lwlrap value: 0.6341145812996468.
Better model found at epoch 4 with lwlrap value: 0.6357812658794919.
Better model found at epoch 6 with lwlrap value: 0.6370297937289563.
Better model found at epoch 7 with lwlrap value: 0.6380670539130351.
Better model found at epoch 9 with lwlrap value: 0.6428897376994822.
Better model found at epoch 10 with lwlrap value: 0.6464296059567946.
Better model found at epoch 15 with lwlrap value: 0.6503522493401277.
Better model found at epoch 18 with lwlrap value: 0.6560825271968422.
Better model found at epoch 19 with lwlrap value: 0.6623477976527491.
Better model found at epoch 25 with lwlrap value: 0.6682671272488541.
Better model found at epoch 27 with lwlrap value: 0.6707231949678428.
Better model found at epoch 29 with lwlrap value: 0.6737940722821639.
Better model found at epoch

In [21]:
learn.load("cv_model_n5")

Learner(data=ImageDataBunch;

Train: LabelList (7140 items)
x: ImageList
Image (3, 128, 128),Image (3, 128, 128),Image (3, 128, 128),Image (3, 128, 128),Image (3, 128, 128)
y: MultiCategoryList
Bark,Raindrop,Finger_snapping,Run,Whispering
Path: work;

Valid: LabelList (1830 items)
x: ImageList
Image (3, 128, 128),Image (3, 128, 128),Image (3, 128, 128),Image (3, 128, 128),Image (3, 128, 128)
y: MultiCategoryList
Finger_snapping,Chewing_and_mastication,Hi-hat,Bark,Cheering;Clapping
Path: work;

Test: None, model=Sequential(
  (0): Sequential(
    (0): Sequential(
      (0): ConvBlock(
        (conv1): Sequential(
          (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU()
        )
        (conv2): Sequential(
          (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=

In [22]:
learn.export()