In [1]:
import gc
import os
import pickle
import random
import time
from collections import Counter, defaultdict
from functools import partial
from pathlib import Path
from psutil import cpu_count
import matplotlib.pyplot as plt

import librosa
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split
from imgaug import augmenters as iaa
#from skmultilearn.model_selection import iterative_train_test_split

import torch
import torch.nn as nn
import torch.nn.functional as F
from fastprogress import master_bar, progress_bar
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import transforms

### utils

In [2]:
NUM_CLASSES = 80
SIZE=128
checkpoint_file = ['model_best1.h5', 'model_best2.h5', 'model_best3.h5','model_best4.h5','model_best5.h5']
# See Version40 for 3 snapshots (or you can use only 1 which is normal run)
EPOCHS = 100 #150 for inception, 100 for xception
TTA = 19 #Number of test-time augmentation
BATCH_SIZE = 32

LR = 3e-4
PATIENCE = 10 #ReduceOnPlateau option
LR_FACTOR = 0.8 #ReduceOnPlateau option
CURATED_ONLY = False # use only curated data for training
TRAIN_AUGMENT = True # use augmentation for training data?
VALID_AUGMENT = False
MODEL = 'crnn' #'cnn8th' # choose among 'xception', 'inception', 'mobile', 'crnn', 'simple'
SEED = 520

USE_MIXUP = True
MIXUP_PROB = 0.275

# No K-Fold implementation yet
# NUM_K_FOLDS = 5 # how many folds (K) you gonna splits
# NUM_MODEL_RUN = 5 # how many models (<= K) you gonna train [e.g. set to 1 for a simple train/test split]

# if use BCEwithLogits loss, use Activation = 'linear' only
ACTIVATION = 'linear' 
# ACTIVATION = 'softmax'
# ACTIVATION = 'sigmoid'

# LOSS = 'categorical_crossentropy'
# LOSS = 'binary_crossentropy' 
LOSS = 'BCEwithLogits' 

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_everything(SEED)

In [4]:
# from official code https://colab.research.google.com/drive/1AgPdhSp7ttY18O3fEoHOQKlt_3HJDLi8#scrollTo=cRCaCIb9oguU
def _one_sample_positive_class_precisions(scores, truth):
    """Calculate precisions for each true class for a single sample.

    Args:
      scores: np.array of (num_classes,) giving the individual classifier scores.
      truth: np.array of (num_classes,) bools indicating which classes are true.

    Returns:
      pos_class_indices: np.array of indices of the true classes for this sample.
      pos_class_precisions: np.array of precisions corresponding to each of those
        classes.
    """
    num_classes = scores.shape[0]
    pos_class_indices = np.flatnonzero(truth > 0)
    # Only calculate precisions if there are some true classes.
    if not len(pos_class_indices):
        return pos_class_indices, np.zeros(0)
    # Retrieval list of classes for this sample.
    retrieved_classes = np.argsort(scores)[::-1]
    # class_rankings[top_scoring_class_index] == 0 etc.
    class_rankings = np.zeros(num_classes, dtype=np.int)
    class_rankings[retrieved_classes] = range(num_classes)
    # Which of these is a true label?
    retrieved_class_true = np.zeros(num_classes, dtype=np.bool)
    retrieved_class_true[class_rankings[pos_class_indices]] = True
    # Num hits for every truncated retrieval list.
    retrieved_cumulative_hits = np.cumsum(retrieved_class_true)
    # Precision of retrieval list truncated at each hit, in order of pos_labels.
    precision_at_hits = (
            retrieved_cumulative_hits[class_rankings[pos_class_indices]] /
            (1 + class_rankings[pos_class_indices].astype(np.float)))
    return pos_class_indices, precision_at_hits


def calculate_per_class_lwlrap(truth, scores):
    """Calculate label-weighted label-ranking average precision.

    Arguments:
      truth: np.array of (num_samples, num_classes) giving boolean ground-truth
        of presence of that class in that sample.
      scores: np.array of (num_samples, num_classes) giving the classifier-under-
        test's real-valued score for each class for each sample.

    Returns:
      per_class_lwlrap: np.array of (num_classes,) giving the lwlrap for each
        class.
      weight_per_class: np.array of (num_classes,) giving the prior of each
        class within the truth labels.  Then the overall unbalanced lwlrap is
        simply np.sum(per_class_lwlrap * weight_per_class)
    """
    assert truth.shape == scores.shape
    num_samples, num_classes = scores.shape
    # Space to store a distinct precision value for each class on each sample.
    # Only the classes that are true for each sample will be filled in.
    precisions_for_samples_by_classes = np.zeros((num_samples, num_classes))
    for sample_num in range(num_samples):
        pos_class_indices, precision_at_hits = (
            _one_sample_positive_class_precisions(scores[sample_num, :],
                                                  truth[sample_num, :]))
        precisions_for_samples_by_classes[sample_num, pos_class_indices] = (
            precision_at_hits)
    labels_per_class = np.sum(truth > 0, axis=0)
    weight_per_class = labels_per_class / float(np.sum(labels_per_class))
    # Form average of each column, i.e. all the precisions assigned to labels in
    # a particular class.
    per_class_lwlrap = (np.sum(precisions_for_samples_by_classes, axis=0) /
                        np.maximum(1, labels_per_class))
    # overall_lwlrap = simple average of all the actual per-class, per-sample precisions
    #                = np.sum(precisions_for_samples_by_classes) / np.sum(precisions_for_samples_by_classes > 0)
    #           also = weighted mean of per-class lwlraps, weighted by class label prior across samples
    #                = np.sum(per_class_lwlrap * weight_per_class)
    return per_class_lwlrap, weight_per_class

In [5]:
import tensorflow as tf



# from https://www.kaggle.com/rio114/keras-cnn-with-lwlrap-evaluation/
def tf_one_sample_positive_class_precisions(y_true, y_pred) :
    num_samples, num_classes = y_pred.shape
    
    # find true labels
    pos_class_indices = tf.where(y_true > 0) 
    
    # put rank on each element
    retrieved_classes = tf.nn.top_k(y_pred, k=num_classes).indices
    sample_range = tf.zeros(shape=tf.shape(tf.transpose(y_pred)), dtype=tf.int32)
    sample_range = tf.add(sample_range, tf.range(tf.shape(y_pred)[0], delta=1))
    sample_range = tf.transpose(sample_range)
    sample_range = tf.reshape(sample_range, (-1,num_classes*tf.shape(y_pred)[0]))
    retrieved_classes = tf.reshape(retrieved_classes, (-1,num_classes*tf.shape(y_pred)[0]))
    retrieved_class_map = tf.concat((sample_range, retrieved_classes), axis=0)
    retrieved_class_map = tf.transpose(retrieved_class_map)
    retrieved_class_map = tf.reshape(retrieved_class_map, (tf.shape(y_pred)[0], num_classes, 2))
    
    class_range = tf.zeros(shape=tf.shape(y_pred), dtype=tf.int32)
    class_range = tf.add(class_range, tf.range(num_classes, delta=1))
    
    class_rankings = tf.scatter_nd(retrieved_class_map,
                                          class_range,
                                          tf.shape(y_pred))
    
    #pick_up ranks
    num_correct_until_correct = tf.gather_nd(class_rankings, pos_class_indices)

    # add one for division for "presicion_at_hits"
    num_correct_until_correct_one = tf.add(num_correct_until_correct, 1) 
    num_correct_until_correct_one = tf.cast(num_correct_until_correct_one, tf.float32)
    
    # generate tensor [num_sample, predict_rank], 
    # top-N predicted elements have flag, N is the number of positive for each sample.
    sample_label = pos_class_indices[:, 0]   
    sample_label = tf.reshape(sample_label, (-1, 1))
    sample_label = tf.cast(sample_label, tf.int32)
    
    num_correct_until_correct = tf.reshape(num_correct_until_correct, (-1, 1))
    retrieved_class_true_position = tf.concat((sample_label, 
                                               num_correct_until_correct), axis=1)
    retrieved_pos = tf.ones(shape=tf.shape(retrieved_class_true_position)[0], dtype=tf.int32)
    retrieved_class_true = tf.scatter_nd(retrieved_class_true_position, 
                                         retrieved_pos, 
                                         tf.shape(y_pred))
    # cumulate predict_rank
    retrieved_cumulative_hits = tf.cumsum(retrieved_class_true, axis=1)

    # find positive position
    pos_ret_indices = tf.where(retrieved_class_true > 0)

    # find cumulative hits
    correct_rank = tf.gather_nd(retrieved_cumulative_hits, pos_ret_indices)  
    correct_rank = tf.cast(correct_rank, tf.float32)

    # compute presicion
    precision_at_hits = tf.truediv(correct_rank, num_correct_until_correct_one)

    return pos_class_indices, precision_at_hits

def tf_lwlrap(y_true, y_pred):
    num_samples, num_classes = y_pred.shape
    pos_class_indices, precision_at_hits = (tf_one_sample_positive_class_precisions(y_true, y_pred))
    pos_flgs = tf.cast(y_true > 0, tf.int32)
    labels_per_class = tf.reduce_sum(pos_flgs, axis=0)
    weight_per_class = tf.truediv(tf.cast(labels_per_class, tf.float32),
                                  tf.cast(tf.reduce_sum(labels_per_class), tf.float32))
    sum_precisions_by_classes = tf.zeros(shape=(num_classes), dtype=tf.float32)  
    class_label = pos_class_indices[:,1]
    sum_precisions_by_classes = tf.unsorted_segment_sum(precision_at_hits,
                                                        class_label,
                                                       num_classes)
    labels_per_class = tf.cast(labels_per_class, tf.float32)
    labels_per_class = tf.add(labels_per_class, 1e-7)
    per_class_lwlrap = tf.truediv(sum_precisions_by_classes,
                                  tf.cast(labels_per_class, tf.float32))
    out = tf.cast(tf.tensordot(per_class_lwlrap, weight_per_class, axes=1), dtype=tf.float32)
    return out

In [6]:
from keras import backend as k
def BCEwithLogits(y_true, y_pred):
    return K.mean(K.binary_crossentropy(y_true, y_pred, from_logits=True), axis=-1)

Using TensorFlow backend.


### dataset

In [7]:
dataset_dir = Path('../input/freesound-audio-tagging-2019')
preprocessed_dir = Path('../input/fat2019_prep_mels1')

In [8]:
csvs = {
    'train_curated': dataset_dir / 'train_curated.csv',
    #'train_noisy': dataset_dir / 'train_noisy.csv',
    'train_noisy': preprocessed_dir / 'trn_noisy_best50s.csv',
    'sample_submission': dataset_dir / 'sample_submission.csv',
}

dataset = {
    'train_curated': dataset_dir / 'train_curated',
    'train_noisy': dataset_dir / 'train_noisy',
    'test': dataset_dir / 'test',
}

mels = {
    'train_curated': preprocessed_dir / 'mels_train_curated.pkl',
    'train_noisy': preprocessed_dir / 'mels_trn_noisy_best50s.pkl',
    'test': preprocessed_dir / 'mels_test.pkl',  # NOTE: this data doesn't work at 2nd stage
}

In [9]:
train_curated = pd.read_csv(csvs['train_curated'])
train_noisy = pd.read_csv(csvs['train_noisy'])
if CURATED_ONLY:
    train_df = train_curated
else:
    train_df = pd.concat([train_curated, train_noisy], sort=True, ignore_index=True)
train_df.head()

Unnamed: 0,fname,labels,singled
0,0006ae4e.wav,Bark,
1,0019ef41.wav,Raindrop,
2,001ec0ad.wav,Finger_snapping,
3,0026c7cb.wav,Run,
4,0026f116.wav,Finger_snapping,


In [10]:
test_df = pd.read_csv(csvs['sample_submission'])
test_df.head()

Unnamed: 0,fname,Accelerating_and_revving_and_vroom,Accordion,Acoustic_guitar,Applause,Bark,Bass_drum,Bass_guitar,Bathtub_(filling_or_washing),Bicycle_bell,Burping_and_eructation,Bus,Buzz,Car_passing_by,Cheering,Chewing_and_mastication,Child_speech_and_kid_speaking,Chink_and_clink,Chirp_and_tweet,Church_bell,Clapping,Computer_keyboard,Crackle,Cricket,Crowd,Cupboard_open_or_close,Cutlery_and_silverware,Dishes_and_pots_and_pans,Drawer_open_or_close,Drip,Electric_guitar,Fart,Female_singing,Female_speech_and_woman_speaking,Fill_(with_liquid),Finger_snapping,Frying_(food),Gasp,Glockenspiel,Gong,...,Harmonica,Hi-hat,Hiss,Keys_jangling,Knock,Male_singing,Male_speech_and_man_speaking,Marimba_and_xylophone,Mechanical_fan,Meow,Microwave_oven,Motorcycle,Printer,Purr,Race_car_and_auto_racing,Raindrop,Run,Scissors,Screaming,Shatter,Sigh,Sink_(filling_or_washing),Skateboard,Slam,Sneeze,Squeak,Stream,Strum,Tap,Tick-tock,Toilet_flush,Traffic_noise_and_roadway_noise,Trickle_and_dribble,Walk_and_footsteps,Water_tap_and_faucet,Waves_and_surf,Whispering,Writing,Yell,Zipper_(clothing)
0,000ccb97.wav,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0012633b.wav,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,001ed5f1.wav,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,00294be0.wav,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,003fde7a.wav,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [11]:
labels = test_df.columns[1:].tolist()
labels[:10]

['Accelerating_and_revving_and_vroom',
 'Accordion',
 'Acoustic_guitar',
 'Applause',
 'Bark',
 'Bass_drum',
 'Bass_guitar',
 'Bathtub_(filling_or_washing)',
 'Bicycle_bell',
 'Burping_and_eructation']

### This part is from the Kernel:Keras 2D model, 5-fold, log_specgram.
### Use it to solve multiclass problem of cross validation

In [12]:
def create_unique_labels(all_labels):
    label_dict = {}
    all_labels_set = []
    first_labels_set = []
    for labs in all_labels:
        lab = labs.split(',')
        for l in lab:
            if l in label_dict:
                label_dict[l] = label_dict[l]  + 1
            else:
                label_dict[l]= 0

        all_labels_set.append(set(lab))
        first_labels_set.append(lab[0])
    classes = list(label_dict.keys())
    
    return label_dict, classes, all_labels_set, first_labels_set

In [13]:
label_dict, classes, all_labels_set, first_labels_set = create_unique_labels(train_df.labels)
files = train_df.fname
print (len(files))

8970


In [14]:
from sklearn.preprocessing import MultiLabelBinarizer, LabelEncoder
binarize = MultiLabelBinarizer(classes=classes)
encode = LabelEncoder()
Y_split = encode.fit_transform(first_labels_set)
Y = binarize.fit_transform(all_labels_set)

In [15]:
#y_train = np.zeros((len(train_df), num_classes)).astype(int)
#for i, row in enumerate(train_df['labels'].str.split(',')):
#    for label in row:
#        idx = labels.index(label)
#        y_train[i, idx] = 1

In [16]:
with open(mels['train_curated'], 'rb') as curated, open(mels['train_noisy'], 'rb') as noisy:
    x_train = pickle.load(curated)
    if CURATED_ONLY == False:
        x_train.extend(pickle.load(noisy))

with open(mels['test'], 'rb') as test:
    x_test = pickle.load(test)
    
len(x_train), len(x_test)

(8970, 1120)

In [17]:
for ii in range(5):
    print(x_train[ii].shape) #x_train is of shape (TRAIN_NUM,128,LEN,3) [4D Tensor]
    print(x_test[ii].shape,'\n')  #x_test of shape (TEST_NUM,128,LEN,3) [4D Tensor]

(128, 448, 3)
(128, 128, 3) 

(128, 131, 3)
(128, 1021, 3) 

(128, 128, 3)
(128, 300, 3) 

(128, 1623, 3)
(128, 1146, 3) 

(128, 128, 3)
(128, 1442, 3) 



In [18]:
def index_to_trn(x_train,Y,IDarray):

    X = []
    y = []

    for i, ID in enumerate(IDarray):
              
        xx = x_train[ID].copy()
        X.append(xx)    
        y.append(Y[ID, :])
            
    y = np.array(y, dtype='float32')
#        X = np.expand_dims(np.array(X), -1)
    return X, y


In [19]:
#from sklearn.utils import shuffle
#train_X11,train_y11 = shuffle(xtrn,ytrn)

### model

In [20]:
from keras.layers import *
from keras.models import Sequential, load_model, Model
from keras import metrics
from keras.optimizers import Adam 
from keras import backend as K
import keras
from keras.models import Model
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_v3 import preprocess_input as preprocess_inception
from keras.applications.mobilenet_v2 import MobileNetV2
from keras.applications.mobilenet_v2 import preprocess_input as preprocess_mobile
from keras.applications.xception import Xception
from keras.applications.xception import preprocess_input as preprocess_xception

from keras.utils import Sequence
from sklearn.utils import shuffle
def create_model_inception(n_out=NUM_CLASSES):

    base_model =InceptionV3(weights=None, include_top=False)
    
    x0 = base_model.output
    x1 = GlobalAveragePooling2D()(x0)
    x2 = GlobalMaxPooling2D()(x0)
    x = Concatenate()([x1,x2])
    
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)

    
    predictions = Dense(n_out, activation=ACTIVATION)(x)

    # this is the model we will train
    model = Model(inputs=base_model.input, outputs=predictions)
    return model

In [21]:
def create_model_xception(n_out=NUM_CLASSES):

    base_model = Xception(weights=None, include_top=False)
    
    x0 = base_model.output
    x1 = GlobalAveragePooling2D()(x0)
    x2 = GlobalMaxPooling2D()(x0)
    x = Concatenate()([x1,x2])
    
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)

#     x = Dense(128, activation='relu')(x)
#     x = BatchNormalization()(x)
#     x = Dropout(0.3)(x)
    
    predictions = Dense(n_out, activation=ACTIVATION)(x)

    # this is the model we will train
    model = Model(inputs=base_model.input, outputs=predictions)
    return model

In [22]:
def create_model_mobile(n_out=NUM_CLASSES):

    base_model =MobileNetV2(weights=None, include_top=False)
    
    x0 = base_model.output
    x1 = GlobalAveragePooling2D()(x0)
    x2 = GlobalMaxPooling2D()(x0)
    x = Concatenate()([x1,x2])
    
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)

#     x = Dense(128, activation='relu')(x)
#     x = BatchNormalization()(x)
#     x = Dropout(0.25)(x)

    
    predictions = Dense(n_out, activation=ACTIVATION)(x)

    # this is the model we will train
    model = Model(inputs=base_model.input, outputs=predictions)
    return model

In [23]:
def conv_simple_block(x, n_filters):
    
    x = Convolution2D(n_filters, (3,1), padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    
    x = Convolution2D(n_filters, (3,1), padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = AveragePooling2D()(x)

    return x

def create_model_simplecnn(n_out=NUM_CLASSES):
    
    inp = Input(shape=(128,128,3))
#     inp = Input(shape=(None,None,3))
    x = conv_simple_block(inp,64)
    x = conv_simple_block(x,128)
    x = conv_simple_block(x,256)
    x = conv_simple_block(x,128)
    
#     x1 = GlobalAveragePooling2D()(x)
#     x2 = GlobalMaxPooling2D()(x)
#     x = Add()([x1,x2])

    x = Flatten()(x)
    x = Dropout(0.2)(x)

    x = Dense(128, activation='linear')(x)
    x = PReLU()(x)
    x = BatchNormalization()(x)
    x = Dropout(0.2)(x)
    predictions = Dense(n_out, activation=ACTIVATION)(x)

    model = Model(inputs=inp, outputs=predictions)
    return model

In [24]:
def output_of_lambda(input_shape):
    return (input_shape[0], input_shape[2], input_shape[3])

def my_max(x):
    return K.max(x, axis=1, keepdims=False)

def crnn_simple_block(x, n_filters):
    
    x = Convolution2D(n_filters, (3,1), padding="same")(x)
    x = Activation("relu")(x)
    
    x = Convolution2D(n_filters, (3,1), padding="same")(x)
    x = Activation("relu")(x)
    x = MaxPooling2D()(x)
    x = Dropout(0.2)(x)

    return x

def create_model_crnn(n_out=NUM_CLASSES):
    
#     inp = Input(shape=(128,128,3))
    inp = Input(shape=(128,None,3))
    x = crnn_simple_block(inp,64)
    x = crnn_simple_block(x,128)
    x = crnn_simple_block(x,256)
    
    # eliminate the frequency dimension, x = (batch, time, channels)
    x = Lambda(my_max, output_shape=output_of_lambda)(x)
    
    x = Bidirectional(CuDNNGRU(128, return_sequences=True))(x)
#     x = Bidirectional(CuDNNLSTM(64, return_sequences=True))(x)
    x = GlobalMaxPooling1D()(x)
    x = Dense(128, activation='linear')(x)
    x = PReLU()(x)
    x = BatchNormalization()(x)
    x = Dropout(0.2)(x)
    predictions = Dense(n_out, activation=ACTIVATION)(x)

    model = Model(inputs=inp, outputs=predictions)
    return model

In [25]:
# from the 8th solution in 2018 competition
# https://github.com/sainathadapa/kaggle-freesound-audio-tagging
def create_model_cnn8th(n_out=NUM_CLASSES):
    regu=0
    inp = Input(shape=(128,128,3))

    x = Conv2D(48, 11,  strides=(1,1),kernel_initializer='he_uniform', activation='relu', padding='same',kernel_regularizer=regularizers.l2(regu))(inp)
    x = BatchNormalization()(x)
    x = Conv2D(48, 11,  strides=(2,3),kernel_initializer='he_uniform', activation='relu', padding='same',kernel_regularizer=regularizers.l2(regu))(x)
    x = MaxPooling2D(3, strides=(1,2))(x)
    x = BatchNormalization()(x)

    x = Conv2D(128, 5, strides=(1,1),kernel_initializer='he_uniform', activation='relu', padding='same',kernel_regularizer=regularizers.l2(regu))(x)
    x = BatchNormalization()(x)
    x = Conv2D(128, 5, strides=(2,3),kernel_initializer='he_uniform', activation='relu', padding='same',kernel_regularizer=regularizers.l2(regu))(x)
    x = MaxPooling2D(3, strides=2)(x)
    x = BatchNormalization()(x)

    x = Conv2D(192, 3, strides=1,kernel_initializer='he_uniform', activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = Conv2D(192, 3, strides=1,kernel_initializer='he_uniform', activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = Conv2D(128, 3, strides=1,kernel_initializer='he_uniform', activation='relu', padding='same',kernel_regularizer=regularizers.l2(regu))(x)
    x = MaxPooling2D(3, strides=(1,2))(x)
    x = BatchNormalization()(x)

    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)

    predictions = Dense(n_out, activation=ACTIVATION)(x)

    model = Model(inputs=inp, outputs=predictions)
    return model

In [26]:
K.clear_session()
'''Choose your model here'''
if MODEL == 'xception':
    preprocess_input = preprocess_xception
    model = create_model_xception(n_out=NUM_CLASSES)
elif MODEL == 'inception':
    preprocess_input = preprocess_inception
    model = create_model_inception(n_out=NUM_CLASSES)
elif MODEL == 'mobile':
    preprocess_input = preprocess_mobile
    model = create_model_mobile(n_out=NUM_CLASSES)
elif MODEL == 'crnn':
    preprocess_input = preprocess_mobile
    model = create_model_crnn(n_out=NUM_CLASSES)
elif MODEL == 'cnn8th':
    preprocess_input = preprocess_mobile
    model = create_model_cnn8th(n_out=NUM_CLASSES)
else:
    preprocess_input = preprocess_mobile
    model = create_model_simplecnn(n_out=NUM_CLASSES)

print(MODEL)
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
crnn
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 128, None, 3)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 128, None, 64)     640       
_________________________________________________________________
activation_1 (Activation)    (None, 128, None, 64)     0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 128, None, 64)     12352     
_________________________________________________________________
activation_2 (Activation)    (None, 128, None, 64)     0         
_________________________________________________________________
max_pooling2d_1 

### train

In [27]:
import numpy as np
xx = np.random.rand(1)
print(xx.shape,xx)

xx = np.random.rand(1,1)
print(xx.shape)

(1,) [0.71861741]
(1, 1)


In [28]:
# If you want, you can try more advanced augmentation like this
augment_img = iaa.Sequential([
#         iaa.ContrastNormalization((0.9, 1.1)),
#         iaa.Multiply((0.9, 1.1), per_channel=0.2),
#        iaa.Fliplr(0.5),
#         iaa.GaussianBlur(sigma=(0, 0.1)),
#         iaa.Affine( # x-shift
#             translate_percent={"x": (-0.1, 0.1), "y": (-0.0, 0.0)},
#         ),
        iaa.CoarseDropout(0.12,size_percent=0.05) # see examples : https://github.com/aleju/imgaug
            ], random_order=True)



# Or you can choose this simplest augmentation (like pytorch version)
# augment_img = iaa.Fliplr(0.5)

# This is my ugly modification; sorry about that
class FATTrainDataset(Sequence):

    def mix_up(x, y):
        x = np.array(x, np.float32)
        lam = np.random.beta(1.0, 1.0)
        ori_index = np.arange(int(len(x)))
        index_array = np.arange(int(len(x)))
        np.random.shuffle(index_array)        
        
        mixed_x = lam * x[ori_index] + (1 - lam) * x[index_array]
        mixed_y = lam * y[ori_index] + (1 - lam) * y[index_array]
        
        return mixed_x, mixed_y
    
    def getitem(image):
        # crop 2sec

        base_dim, time_dim, _ = image.shape
        crop = random.randint(0, time_dim - base_dim)
        image = image[:,crop:crop+base_dim,:]

        image = preprocess_input(image)
        
#         label = self.labels[idx]
        return image
    def create_generator(train_X, train_y, batch_size, shape, augument=False, shuffling=False, test_data=False, mixup=False, mixup_prob=0.3):
        assert shape[2] == 3
        while True:
            if shuffling:
                train_X,train_y = shuffle(train_X,train_y)

            for start in range(0, len(train_y), batch_size):
                end = min(start + batch_size, len(train_y))
                batch_images = []
                X_train_batch = train_X[start:end]
                if test_data == False:
                    batch_labels = train_y[start:end]
                
                for i in range(len(X_train_batch)):
                    image = FATTrainDataset.getitem(X_train_batch[i])   
                    if augument:
                        image = FATTrainDataset.augment(image)
                    batch_images.append(image)
                
                if (mixup and test_data == False):
                    dice = np.random.rand(1)
                    if dice > mixup_prob:
                        batch_images, batch_labels =  FATTrainDataset.mix_up(batch_images, batch_labels)    
                    
                if test_data == False:
                    yield np.array(batch_images, np.float32), batch_labels
                else:
                    yield np.array(batch_images, np.float32)
        return image
    
    def augment(image):

        image_aug = augment_img.augment_image(image)
        return image_aug

In [29]:
from keras.callbacks import (ModelCheckpoint, LearningRateScheduler,
                             EarlyStopping, ReduceLROnPlateau,CSVLogger)
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split,KFold
import sklearn.metrics
#oof_y = np.zeros_like(Y, dtype='float32')
test_Y = np.zeros((1120, 80), dtype='float32')

kfold = StratifiedKFold(5)
ifold = 0
for train_index, valid_index in kfold.split(x_train, Y_split):

    reduceLROnPlat = ReduceLROnPlateau(monitor='val_tf_lwlrap', factor=LR_FACTOR, patience=PATIENCE, 
                                   verbose=1, mode='max', min_delta=0.0001, cooldown=2, min_lr=1e-5 )

    csv_logger = CSVLogger(filename='../working/training_log'+str(ifold)+'.csv',
                       separator=',',
                       append=True)

    checkpoint = ModelCheckpoint(checkpoint_file[ifold], monitor='val_tf_lwlrap', verbose=1, 
                             save_best_only=True, mode='max', save_weights_only = False)
    callbacks_list = [checkpoint, csv_logger, reduceLROnPlat]
    x_trn,y_trn = index_to_trn(x_train,Y,train_index)
    x_val,y_val = index_to_trn(x_train,Y,valid_index)
    
    # create train and valid datagens
    train_generator = FATTrainDataset.create_generator(
        x_trn, y_trn, BATCH_SIZE, (SIZE,SIZE,3), augument=TRAIN_AUGMENT, shuffling=True, mixup = USE_MIXUP, mixup_prob = MIXUP_PROB)
    validation_generator = FATTrainDataset.create_generator(
        x_val, y_val, BATCH_SIZE, (SIZE,SIZE,3), augument=VALID_AUGMENT, shuffling=False)
    
    train_steps = np.ceil(float(len(x_trn)) / float(BATCH_SIZE))
    val_steps = np.ceil(float(len(x_val)) / float(BATCH_SIZE))
    train_steps = train_steps.astype(int)
    val_steps = val_steps.astype(int)
    print(train_steps, val_steps)
    print(len(x_trn))
    
    if LOSS=='BCEwithLogits':
        model.compile(loss=BCEwithLogits,
                optimizer=Adam(lr=LR),
                metrics=[tf_lwlrap,'categorical_accuracy'])
    else:
        model.compile(loss=LOSS,
                optimizer=Adam(lr=LR),
                metrics=[tf_lwlrap,'categorical_accuracy'])
    print(LR, PATIENCE, LR_FACTOR,BATCH_SIZE, TRAIN_AUGMENT, USE_MIXUP, MIXUP_PROB)
    model.load_weights('../input/modelcrnn/model_best1crnn.h5')
    
    hist = model.fit_generator(
        train_generator,
        steps_per_epoch=train_steps,
        validation_data=validation_generator,
        validation_steps=val_steps,
        epochs=100,
        verbose=1,
        callbacks=callbacks_list)
            
    #TTA
    model.load_weights(checkpoint_file[ifold])
    validation_generator = FATTrainDataset.create_generator(
          x_val, y_val, BATCH_SIZE, (SIZE,SIZE,3), augument=False, shuffling=False)
    pred_val_y = model.predict_generator(validation_generator,steps=val_steps,verbose=1)    
    for ii in range(TTA):
        validation_generator = FATTrainDataset.create_generator(
            x_val, y_val, BATCH_SIZE, (SIZE,SIZE,3), augument=False, shuffling=False)   
        pred_val_y += model.predict_generator(validation_generator,steps=val_steps,verbose=1)
    
    train_generator = FATTrainDataset.create_generator(
        x_trn, y_trn, BATCH_SIZE, (SIZE,SIZE,3), augument=False, shuffling=False)
    pred_train_y = model.predict_generator(train_generator,steps=train_steps,verbose=1)
    
    #Predict Test Data with TTA
    test_steps = np.ceil(float(len(x_test)) / float(BATCH_SIZE)).astype(int)   
    model.load_weights(checkpoint_file[ifold])
    test_generator = FATTrainDataset.create_generator(
        x_test, x_test, BATCH_SIZE, (SIZE,SIZE,3), augument=False, shuffling=False, test_data=True)
    pred_test_y = model.predict_generator(test_generator,steps=test_steps,verbose=1)

    for ii in range(TTA):
        test_generator = FATTrainDataset.create_generator(
            x_test, x_test, BATCH_SIZE, (SIZE,SIZE,3), augument=False, shuffling=False, test_data=True)
        
        pred_test_y += model.predict_generator(test_generator,steps=test_steps,verbose=1)
    
    sort_idx = np.argsort(labels).astype(int)        
    sample_sub = pd.read_csv('../input/freesound-audio-tagging-2019/sample_submission.csv')
    test_Y_sort = pred_test_y[:, sort_idx]
    sample_sub.iloc[:, 1:] =  test_Y_sort
    if ifold == 0:
        sample_subcv = sample_sub
    sample_subcv = sample_sub + sample_subcv
    ifold = ifold + 1
    

224 57
7152
0.0003 10 0.8 32 True True 0.275
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Epoch 1/100

Epoch 00001: val_tf_lwlrap improved from -inf to 0.64508, saving model to model_best1.h5
Epoch 2/100

Epoch 00002: val_tf_lwlrap improved from 0.64508 to 0.65933, saving model to model_best1.h5
Epoch 3/100

Epoch 00003: val_tf_lwlrap improved from 0.65933 to 0.67316, saving model to model_best1.h5
Epoch 4/100

Epoch 00004: val_tf_lwlrap did not improve from 0.67316
Epoch 5/100

Epoch 00005: val_tf_lwlrap did not improve from 0.67316
Epoch 6/100

Epoch 00006: val_tf_lwlrap did not improve from 0.67316
Epoch 7/100

Epoch 00007: val_tf_lwlrap did not improve from 0.67316
Epoch 8/100

Epoch 00008: val_tf_lwlrap did not improve from 0.67316
Epoch 9/100

Epoch 00009: val_tf_lwlrap did not improve from 0.67316
Epoch 10/100

Epoch 00010: val_tf_lwlrap improved from 0.67316 to 0.67763, saving model to model_best1.

In [30]:
#sample_subcv = sample_subcv / 5
sample_subcv.to_csv('submission.csv', index=False)
sample_subcv.head()

Unnamed: 0,fname,Accelerating_and_revving_and_vroom,Accordion,Acoustic_guitar,Applause,Bark,Bass_drum,Bass_guitar,Bathtub_(filling_or_washing),Bicycle_bell,Burping_and_eructation,Bus,Buzz,Car_passing_by,Cheering,Chewing_and_mastication,Child_speech_and_kid_speaking,Chink_and_clink,Chirp_and_tweet,Church_bell,Clapping,Computer_keyboard,Crackle,Cricket,Crowd,Cupboard_open_or_close,Cutlery_and_silverware,Dishes_and_pots_and_pans,Drawer_open_or_close,Drip,Electric_guitar,Fart,Female_singing,Female_speech_and_woman_speaking,Fill_(with_liquid),Finger_snapping,Frying_(food),Gasp,Glockenspiel,Gong,...,Harmonica,Hi-hat,Hiss,Keys_jangling,Knock,Male_singing,Male_speech_and_man_speaking,Marimba_and_xylophone,Mechanical_fan,Meow,Microwave_oven,Motorcycle,Printer,Purr,Race_car_and_auto_racing,Raindrop,Run,Scissors,Screaming,Shatter,Sigh,Sink_(filling_or_washing),Skateboard,Slam,Sneeze,Squeak,Stream,Strum,Tap,Tick-tock,Toilet_flush,Traffic_noise_and_roadway_noise,Trickle_and_dribble,Walk_and_footsteps,Water_tap_and_faucet,Waves_and_surf,Whispering,Writing,Yell,Zipper_(clothing)
0,000ccb97.wav000ccb97.wav000ccb97.wav000ccb97.w...,-1208.535751,-489.764389,-456.518566,-701.913475,-586.013176,-971.171158,-1138.248474,-422.635685,-545.438194,-901.224457,-890.800995,-700.856125,-730.851051,-1107.355011,-597.197533,-859.849152,-1072.90036,-811.746292,-1119.748611,-592.484894,-1078.040924,-1046.481354,-840.570518,-585.720734,-832.89975,-1019.6427,-799.770088,-1066.703888,-1009.174347,-795.425499,-1426.266235,-1009.645905,-925.335236,-899.169113,-924.198425,-1092.138382,-913.707169,-1092.550735,-866.232391,...,-862.84903,-615.301468,-1101.262085,-947.730347,-1045.664673,-973.703995,-432.21904,-875.963364,-728.145576,-880.527367,-584.55587,-585.391609,-964.067947,-484.901978,-1243.631973,-1057.684082,-735.38089,-535.099945,-535.721672,-1069.048096,-402.511532,-884.841278,-804.297195,-961.205933,-783.791298,-825.324203,-1153.533218,-814.253647,-734.651146,-735.971741,-819.304703,-1190.782913,-903.953278,-722.317787,-645.222725,-419.578979,-655.327538,-971.7556,-716.099335,-183.735697
1,0012633b.wav0012633b.wav0012633b.wav0012633b.w...,-751.842697,-895.962906,-867.421051,-704.306099,-739.897842,-753.153717,-782.407387,-724.422157,-715.856247,-818.45356,-911.37854,-301.936024,-680.158508,-755.630547,-608.876564,-864.031113,-290.026196,-608.737816,-769.724442,-426.906609,-714.651573,-613.210098,-744.604477,-817.683151,-795.280212,-354.207848,-823.165154,-1072.457169,-520.129265,-889.233109,-781.946716,-753.261765,-634.641968,-903.741776,-564.437202,-747.711792,-492.88575,-745.389519,-806.032211,...,-855.618774,-610.979935,-819.299011,-661.862091,-613.951889,-812.197968,-562.205231,-426.833054,-338.949615,-698.388779,-877.470184,-531.286743,-799.881157,-697.452606,-546.853401,-672.258072,-678.940926,-513.176765,-563.296143,-691.737457,-832.854477,-789.295868,-674.096367,-694.976707,-731.491432,-836.772003,-919.809036,-833.626785,-758.823547,-662.245491,-696.038071,-697.953529,-734.918724,-835.671272,-604.741417,-926.410126,-522.782707,-840.62545,-413.779041,-662.645607
2,001ed5f1.wav001ed5f1.wav001ed5f1.wav001ed5f1.w...,-589.730644,-983.070328,-790.755753,-290.089909,-702.252792,-1062.928558,-1084.755219,-1011.500977,-819.10997,-532.792389,-721.51416,-660.673538,-565.871483,-750.665466,-1075.867889,-520.082359,-911.394165,-363.586792,-845.800873,-509.909264,-645.876122,-968.185928,-1008.454636,-660.982269,-981.819427,-872.981171,-556.476868,-886.721634,-432.735672,-742.191345,-900.6091,-452.691719,-672.259644,-861.536667,-982.69545,-706.633469,-818.270844,-987.226364,-867.206177,...,-1168.168518,-854.510986,-665.394897,-880.903809,-729.544777,-343.04958,-863.80304,-987.546387,-859.854401,-444.200752,-993.930252,-824.20137,-336.552513,-871.256821,-426.594677,-991.930176,-549.716553,-578.379456,-557.333,-587.638191,-1039.489441,-916.312637,-589.079903,-791.278084,-995.768951,-930.30278,-893.711655,-1043.001511,-711.920212,-283.461185,-844.166855,-772.198753,-894.641266,-871.071213,-552.119644,-928.599472,-821.762573,-799.484474,-623.461128,-570.449921
3,00294be0.wav00294be0.wav00294be0.wav00294be0.w...,-763.925629,-763.821388,-898.286194,-679.343735,-624.721756,-1240.84433,-1131.459015,-1384.019699,-1100.684662,-1275.179901,-1140.117432,-630.900642,-182.486727,-816.86467,-980.507538,-953.136841,-707.294075,-670.489525,-1242.622269,-780.939201,-757.040611,-597.686531,-1018.957718,-819.61673,-1080.175003,-933.453445,-1161.010773,-1261.964264,-395.375893,-938.25705,-510.607483,-747.199913,-1101.300064,-1050.62294,-1229.796143,-1018.237686,-622.615456,-58.751752,-992.587997,...,-1187.957581,-719.626381,-1070.140472,-1002.090363,-1120.891495,-1109.494049,-969.202667,-455.488232,-637.224388,-532.744484,-819.847527,-739.702866,-950.974945,-1188.047043,-739.461761,-1150.205566,-741.392097,-370.585747,-226.678919,-746.248344,-575.492683,-1082.571426,-1250.356537,-793.775551,-974.320496,-840.813553,-1046.46373,-1128.539795,-875.831146,-514.073647,-631.777603,-556.9701,-418.554161,-1158.804443,-635.47612,-971.932877,-741.481758,-1114.002258,-680.946617,-591.613785
4,003fde7a.wav003fde7a.wav003fde7a.wav003fde7a.w...,-948.256516,-971.734283,-940.495422,-574.359421,-1179.15889,-983.465103,-952.470535,-932.928207,-1054.553925,-1050.902527,-914.170074,-724.757202,-875.570923,-981.261368,-1078.886963,-926.196121,-733.838547,-1083.692184,-1184.151749,-843.302185,-729.529495,-882.142288,-951.993607,-849.592278,-1097.668182,-999.409454,-1042.583557,-706.629913,-775.325386,-717.968155,-1260.845535,-716.319679,-695.200722,-1074.27298,-857.710587,-994.361237,-779.886719,-927.067917,-791.870964,...,-837.7015,-800.74559,-862.521927,-495.854095,-1247.255157,-959.870453,-880.698044,-1165.963379,-988.120651,-690.066666,-832.3218,-565.898743,-862.585396,-842.955948,-884.109314,-971.306091,-1063.918335,-813.97934,-708.132401,-949.787277,-913.287323,110.141963,-966.948944,-648.921509,-1037.581116,-46.99451,-934.003296,-703.262489,-759.569901,-642.749252,-684.2481,-815.427162,-1112.558838,-803.455154,-809.869141,-1009.838821,-703.740692,-926.4879,-969.719452,-844.489319
