>v0.1 This code implements a simple feature extraction and train using Lightgbm.

Feature extraction is very simple and can be improved.

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import librosa
import matplotlib.pyplot as plt
import gc

from tqdm import tqdm, tqdm_notebook
from sklearn.metrics import label_ranking_average_precision_score
from sklearn.metrics import roc_auc_score

from joblib import Parallel, delayed
import lightgbm as lgb
from scipy import stats

from sklearn.model_selection import KFold

import warnings
warnings.filterwarnings('ignore')

tqdm.pandas()

In [2]:
def split_and_label(rows_labels):
    
    row_labels_list = []
    for row in rows_labels:
        row_labels = row.split(',')
        labels_array = np.zeros((80))
        
        for label in row_labels:
            index = label_mapping[label]
            labels_array[index] = 1
        
        row_labels_list.append(labels_array)
    
    return row_labels_list

In [3]:
train_curated = pd.read_csv('../input/train_curated.csv')
train_noisy = pd.read_csv('../input/train_noisy.csv')
train_noisy = train_noisy[['fname','labels']]
test = pd.read_csv('../input/sample_submission.csv')
print(train_curated.shape, train_noisy.shape, test.shape)

(4970, 2) (19815, 2) (1120, 81)


In [4]:
label_columns = list( test.columns[1:] )
label_mapping = dict((label, index) for index, label in enumerate(label_columns))
label_mapping

{'Accelerating_and_revving_and_vroom': 0,
 'Accordion': 1,
 'Acoustic_guitar': 2,
 'Applause': 3,
 'Bark': 4,
 'Bass_drum': 5,
 'Bass_guitar': 6,
 'Bathtub_(filling_or_washing)': 7,
 'Bicycle_bell': 8,
 'Burping_and_eructation': 9,
 'Bus': 10,
 'Buzz': 11,
 'Car_passing_by': 12,
 'Cheering': 13,
 'Chewing_and_mastication': 14,
 'Child_speech_and_kid_speaking': 15,
 'Chink_and_clink': 16,
 'Chirp_and_tweet': 17,
 'Church_bell': 18,
 'Clapping': 19,
 'Computer_keyboard': 20,
 'Crackle': 21,
 'Cricket': 22,
 'Crowd': 23,
 'Cupboard_open_or_close': 24,
 'Cutlery_and_silverware': 25,
 'Dishes_and_pots_and_pans': 26,
 'Drawer_open_or_close': 27,
 'Drip': 28,
 'Electric_guitar': 29,
 'Fart': 30,
 'Female_singing': 31,
 'Female_speech_and_woman_speaking': 32,
 'Fill_(with_liquid)': 33,
 'Finger_snapping': 34,
 'Frying_(food)': 35,
 'Gasp': 36,
 'Glockenspiel': 37,
 'Gong': 38,
 'Gurgling': 39,
 'Harmonica': 40,
 'Hi-hat': 41,
 'Hiss': 42,
 'Keys_jangling': 43,
 'Knock': 44,
 'Male_singing': 45

In [5]:
train_curated_labels = split_and_label(train_curated['labels'])
train_noisy_labels   = split_and_label(train_noisy  ['labels'])
len(train_curated_labels), len(train_noisy_labels)

(4970, 19815)

In [6]:
for f in label_columns:
    train_curated[f] = 0.0
    train_noisy[f] = 0.0

train_curated[label_columns] = train_curated_labels
train_noisy[label_columns]   = train_noisy_labels

train_curated['num_labels'] = train_curated[label_columns].sum(axis=1)
train_noisy['num_labels']   = train_noisy[label_columns].sum(axis=1)

train_curated['path'] = '../input/train_curated/'+train_curated['fname']
train_noisy  ['path'] = '../input/train_noisy/'+train_noisy['fname']

train_curated.head()

Unnamed: 0,fname,labels,Accelerating_and_revving_and_vroom,Accordion,Acoustic_guitar,Applause,Bark,Bass_drum,Bass_guitar,Bathtub_(filling_or_washing),Bicycle_bell,Burping_and_eructation,Bus,Buzz,Car_passing_by,Cheering,Chewing_and_mastication,Child_speech_and_kid_speaking,Chink_and_clink,Chirp_and_tweet,Church_bell,Clapping,Computer_keyboard,Crackle,Cricket,Crowd,Cupboard_open_or_close,Cutlery_and_silverware,Dishes_and_pots_and_pans,Drawer_open_or_close,Drip,Electric_guitar,Fart,Female_singing,Female_speech_and_woman_speaking,Fill_(with_liquid),Finger_snapping,Frying_(food),Gasp,Glockenspiel,...,Hiss,Keys_jangling,Knock,Male_singing,Male_speech_and_man_speaking,Marimba_and_xylophone,Mechanical_fan,Meow,Microwave_oven,Motorcycle,Printer,Purr,Race_car_and_auto_racing,Raindrop,Run,Scissors,Screaming,Shatter,Sigh,Sink_(filling_or_washing),Skateboard,Slam,Sneeze,Squeak,Stream,Strum,Tap,Tick-tock,Toilet_flush,Traffic_noise_and_roadway_noise,Trickle_and_dribble,Walk_and_footsteps,Water_tap_and_faucet,Waves_and_surf,Whispering,Writing,Yell,Zipper_(clothing),num_labels,path
0,0006ae4e.wav,Bark,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,../input/train_curated/0006ae4e.wav
1,0019ef41.wav,Raindrop,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,../input/train_curated/0019ef41.wav
2,001ec0ad.wav,Finger_snapping,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,../input/train_curated/001ec0ad.wav
3,0026c7cb.wav,Run,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,../input/train_curated/0026c7cb.wav
4,0026f116.wav,Finger_snapping,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,../input/train_curated/0026f116.wav


In [7]:
train = pd.concat([train_curated, train_noisy],axis=0)

del train_curated, train_noisy
gc.collect()

train.shape

(24785, 84)

In [8]:
def create_features( pathname ):

    var, sr = librosa.load( pathname, sr=44100)
    # trim silence
    if 0 < len(var): # workaround: 0 length causes error
        var, _ = librosa.effects.trim(var)
    xc = pd.Series(var)
    
    X = []
    X.append( xc.mean() )
    X.append( xc.median() )
    X.append( xc.std() )
    X.append( xc.max() )
    X.append( xc.min() )
    X.append( xc.skew() )
    X.append( xc.mad() )
    X.append( xc.kurtosis() )
    
    X.append( np.mean(np.diff(xc)) )
    X.append( np.mean(np.nonzero((np.diff(xc) / xc[:-1]))[0]) )
    X.append( np.abs(xc).max() )
    X.append( np.abs(xc).min() )
    
    X.append( xc[:4410].std() )
    X.append( xc[-4410:].std() )
    X.append( xc[:44100].std() )
    X.append( xc[-44100:].std() )
    
    X.append( xc[:4410].mean() )
    X.append( xc[-4410:].mean() )
    X.append( xc[:44100].mean() )
    X.append( xc[-44100:].mean() )
    
    X.append( xc[:4410].min() )
    X.append( xc[-4410:].min() )
    X.append( xc[:44100].min() )
    X.append( xc[-44100:].min() )
    
    X.append( xc[:4410].max() )
    X.append( xc[-4410:].max() )
    X.append( xc[:44100].max() )
    X.append( xc[-44100:].max() )
    
    X.append( xc[:4410].skew() )
    X.append( xc[-4410:].skew() )
    X.append( xc[:44100].skew() )
    X.append( xc[-44100:].skew() )
    
    X.append( xc.max() / np.abs(xc.min()) )
    X.append( xc.max() - np.abs(xc.min()) )
    X.append( xc.sum() )
    
    X.append( np.mean(np.nonzero((np.diff(xc[:4410]) / xc[:4410][:-1]))[0]) )
    X.append( np.mean(np.nonzero((np.diff(xc[-4410:]) / xc[-4410:][:-1]))[0]) )
    X.append( np.mean(np.nonzero((np.diff(xc[:44100]) / xc[:44100][:-1]))[0]) )
    X.append( np.mean(np.nonzero((np.diff(xc[-44100:]) / xc[-44100:][:-1]))[0]) )
    
    X.append( np.quantile(xc, 0.95) )
    X.append( np.quantile(xc, 0.99) )
    X.append( np.quantile(xc, 0.10) )
    X.append( np.quantile(xc, 0.05) )
    
    X.append( np.abs(xc).mean() )
    X.append( np.abs(xc).std() )
             
    return np.array( X )

In [9]:

X = Parallel(n_jobs= 4)(delayed(create_features)(fn) for fn in tqdm(train['path'].values) )
X = np.array( X )
X.shape

100%|██████████| 24785/24785 [19:42<00:00, 20.97it/s]


(24785, 45)

In [10]:
Xtest = Parallel(n_jobs= 4)(delayed(create_features)( '../input/test/'+fn) for fn in tqdm(test['fname'].values) )
Xtest = np.array( Xtest )
Xtest.shape

100%|██████████| 1120/1120 [00:40<00:00, 19.75it/s]


(1120, 45)

In [11]:

n_fold = 5
folds = KFold(n_splits=n_fold, shuffle=True, random_state=69)

params = {'num_leaves': 15,
         'min_data_in_leaf': 200, 
         'objective':'binary',
         "metric": 'auc',
         'max_depth': -1,
         'learning_rate': 0.05,
         "boosting": "gbdt",
         "bagging_fraction": 0.85,
         "bagging_freq": 1,
         "feature_fraction": 0.20,
         "bagging_seed": 42,
         "verbosity": -1,
         "nthread": -1,
         "random_state": 69}

PREDTRAIN = np.zeros( (X.shape[0],80) )
PREDTEST  = np.zeros( (Xtest.shape[0],80) )
for f in range(len(label_columns)):
    y = train[ label_columns[f] ].values
    oof      = np.zeros( X.shape[0] )
    oof_test = np.zeros( Xtest.shape[0] )
    for fold_, (trn_idx, val_idx) in enumerate(folds.split(X,y)):
        model = lgb.LGBMClassifier(**params, n_estimators = 20000)
        model.fit(X[trn_idx,:], 
                  y[trn_idx], 
                  eval_set=[(X[val_idx,:], y[val_idx])], 
                  eval_metric='auc',
                  verbose=0, 
                  early_stopping_rounds=25)
        oof[val_idx] = model.predict_proba(X[val_idx,:], num_iteration=model.best_iteration_)[:,1]
        oof_test += model.predict_proba(Xtest          , num_iteration=model.best_iteration_)[:,1]/5.0

    PREDTRAIN[:,f] = oof    
    PREDTEST [:,f] = oof_test
    
    print( f, str(roc_auc_score( y, oof ))[:6], label_columns[f] )


0 0.8704 Accelerating_and_revving_and_vroom
1 0.8455 Accordion
2 0.8742 Acoustic_guitar
3 0.8238 Applause
4 0.8464 Bark
5 0.8524 Bass_drum
6 0.8779 Bass_guitar
7 0.7290 Bathtub_(filling_or_washing)
8 0.6363 Bicycle_bell
9 0.7579 Burping_and_eructation
10 0.8829 Bus
11 0.7104 Buzz
12 0.8505 Car_passing_by
13 0.9055 Cheering
14 0.8359 Chewing_and_mastication
15 0.8875 Child_speech_and_kid_speaking
16 0.6805 Chink_and_clink
17 0.8070 Chirp_and_tweet
18 0.7637 Church_bell
19 0.7827 Clapping
20 0.7969 Computer_keyboard
21 0.8106 Crackle
22 0.8370 Cricket
23 0.8996 Crowd
24 0.7106 Cupboard_open_or_close
25 0.7716 Cutlery_and_silverware
26 0.7883 Dishes_and_pots_and_pans
27 0.7711 Drawer_open_or_close
28 0.8306 Drip
29 0.8436 Electric_guitar
30 0.7997 Fart
31 0.7731 Female_singing
32 0.7595 Female_speech_and_woman_speaking
33 0.7996 Fill_(with_liquid)
34 0.8233 Finger_snapping
35 0.8289 Frying_(food)
36 0.7418 Gasp
37 0.8016 Glockenspiel
38 0.8162 Gong
39 0.8459 Gurgling
40 0.7194 Harmonica
4

In [12]:
from sklearn.metrics import roc_auc_score
def calculate_overall_lwlrap_sklearn(truth, scores):
    """Calculate the overall lwlrap using sklearn.metrics.lrap."""
    # sklearn doesn't correctly apply weighting to samples with no labels, so just skip them.
    sample_weight = np.sum(truth > 0, axis=1)
    nonzero_weight_sample_indices = np.flatnonzero(sample_weight > 0)
    overall_lwlrap = label_ranking_average_precision_score(
        truth[nonzero_weight_sample_indices, :] > 0, 
        scores[nonzero_weight_sample_indices, :], 
        sample_weight=sample_weight[nonzero_weight_sample_indices])
    return overall_lwlrap

print( 'lwlrap cv:', calculate_overall_lwlrap_sklearn( train[label_columns].values, PREDTRAIN ) )

lwlrap cv: 0.26811231519251605


In [13]:
test[label_columns] = PREDTEST
test.to_csv('submission.csv', index=False)
test.head()

Unnamed: 0,fname,Accelerating_and_revving_and_vroom,Accordion,Acoustic_guitar,Applause,Bark,Bass_drum,Bass_guitar,Bathtub_(filling_or_washing),Bicycle_bell,Burping_and_eructation,Bus,Buzz,Car_passing_by,Cheering,Chewing_and_mastication,Child_speech_and_kid_speaking,Chink_and_clink,Chirp_and_tweet,Church_bell,Clapping,Computer_keyboard,Crackle,Cricket,Crowd,Cupboard_open_or_close,Cutlery_and_silverware,Dishes_and_pots_and_pans,Drawer_open_or_close,Drip,Electric_guitar,Fart,Female_singing,Female_speech_and_woman_speaking,Fill_(with_liquid),Finger_snapping,Frying_(food),Gasp,Glockenspiel,Gong,...,Harmonica,Hi-hat,Hiss,Keys_jangling,Knock,Male_singing,Male_speech_and_man_speaking,Marimba_and_xylophone,Mechanical_fan,Meow,Microwave_oven,Motorcycle,Printer,Purr,Race_car_and_auto_racing,Raindrop,Run,Scissors,Screaming,Shatter,Sigh,Sink_(filling_or_washing),Skateboard,Slam,Sneeze,Squeak,Stream,Strum,Tap,Tick-tock,Toilet_flush,Traffic_noise_and_roadway_noise,Trickle_and_dribble,Walk_and_footsteps,Water_tap_and_faucet,Waves_and_surf,Whispering,Writing,Yell,Zipper_(clothing)
0,000ccb97.wav,0.001453,0.022022,0.002513,0.00649,0.010909,0.014123,0.003882,0.005685,0.013705,0.026914,0.001269,0.01115,0.00237,0.002872,0.002313,0.005069,0.02053,0.114919,0.010063,0.023399,0.004278,0.007822,0.012935,0.001131,0.093519,0.021791,0.023368,0.00838,0.008932,0.007173,0.004812,0.005396,0.025004,0.008735,0.010541,0.002434,0.012503,0.004742,0.009022,...,0.015328,0.014162,0.021894,0.010919,0.006815,0.006277,0.025606,0.023007,0.006995,0.007668,0.009299,0.002866,0.019704,0.001953,0.002035,0.017898,0.006589,0.008184,0.009651,0.022852,0.004309,0.0104,0.031377,0.006478,0.002992,0.006426,0.008514,0.010686,0.016777,0.003022,0.010297,0.004476,0.008744,0.004857,0.007393,0.007033,0.004738,0.005956,0.003484,0.010337
1,0012633b.wav,0.003587,0.003228,0.000787,0.006274,0.015204,0.001862,0.001836,0.012122,0.012527,0.013949,0.002759,0.012854,0.003621,0.001527,0.020951,0.01588,0.009152,0.018184,0.008743,0.006035,0.018289,0.01061,0.020468,0.001113,0.020163,0.015149,0.012637,0.024533,0.012164,0.001236,0.013967,0.005465,0.014744,0.023605,0.007256,0.021118,0.018822,0.003295,0.006325,...,0.012446,0.001017,0.013008,0.014367,0.006657,0.006031,0.003867,0.003446,0.006911,0.014369,0.01458,0.034588,0.00829,0.044355,0.002483,0.004109,0.017531,0.013564,0.004631,0.004962,0.021743,0.026965,0.007497,0.011789,0.011659,0.033292,0.014101,0.001607,0.006703,0.013624,0.014513,0.013053,0.009947,0.02317,0.023392,0.037043,0.016934,0.013726,0.00269,0.027668
2,001ed5f1.wav,0.009622,0.002169,0.001559,0.009289,0.06572,0.001862,0.001889,0.034803,0.012976,0.015164,0.00141,0.010871,0.013452,0.001413,0.017021,0.0061,0.016351,0.00448,0.004848,0.016509,0.040437,0.012251,0.001623,0.001481,0.009879,0.019879,0.01395,0.026896,0.012826,0.000828,0.018519,0.004899,0.007631,0.011848,0.035938,0.003264,0.022803,0.00267,0.004233,...,0.009353,0.000802,0.008316,0.038104,0.026064,0.002794,0.003228,0.002918,0.005838,0.016675,0.01758,0.012405,0.005886,0.011288,0.001841,0.004785,0.015123,0.017093,0.012682,0.031724,0.010505,0.011518,0.019915,0.019015,0.044344,0.060471,0.001267,0.001878,0.008231,0.016072,0.008022,0.001932,0.010136,0.018205,0.008124,0.0022,0.020134,0.02655,0.004455,0.024797
3,00294be0.wav,0.000667,0.002167,0.00104,0.001417,0.009279,0.001805,0.001659,0.014351,0.010736,0.013308,0.002833,0.019619,0.002555,0.000688,0.034419,0.016682,0.00951,0.013923,0.006587,0.004939,0.033066,0.013638,0.011846,0.000611,0.016543,0.013218,0.009878,0.039375,0.019351,0.000554,0.022289,0.00512,0.005059,0.012354,0.007597,0.008473,0.018562,0.001784,0.004622,...,0.008716,0.000677,0.005294,0.014372,0.008451,0.002237,0.009447,0.002568,0.007079,0.030412,0.006034,0.00378,0.01265,0.037767,0.000995,0.003718,0.015437,0.018384,0.002073,0.003885,0.024314,0.011526,0.005752,0.002873,0.011096,0.015161,0.004838,0.000932,0.004508,0.034499,0.008152,0.002824,0.005972,0.019422,0.0135,0.00899,0.034875,0.058644,0.000955,0.054288
4,003fde7a.wav,0.014504,0.002422,0.003228,0.00872,0.045329,0.005091,0.003005,0.039575,0.020846,0.0107,0.003022,0.009495,0.045436,0.004741,0.006421,0.00084,0.02451,0.006537,0.023037,0.007988,0.015593,0.006892,0.002482,0.004459,0.005497,0.013228,0.010489,0.016384,0.009059,0.010056,0.011083,0.007747,0.005765,0.01691,0.00949,0.004148,0.027567,0.037376,0.026515,...,0.01206,0.003023,0.029694,0.010405,0.043192,0.003416,0.002107,0.015616,0.007849,0.016246,0.011291,0.003562,0.006065,0.00523,0.012547,0.007238,0.00749,0.018967,0.017213,0.037088,0.009633,0.040783,0.012929,0.011352,0.049267,0.013276,0.003867,0.015124,0.006265,0.012724,0.110679,0.006728,0.006351,0.007072,0.025213,0.003169,0.016443,0.009977,0.039324,0.013371
