In [38]:
import numpy as np
import librosa as lr
import pickle
import pumpp
import os

import h5py
from librosa.util import find_files

import pescador
from tqdm import tqdm
import keras as K
from keras.engine.topology import Layer
from keras.backend import squeeze

import json

import sys
sys.path.append('..')
from pcen_pump import * #custom pcen pump objects


BASE_LOC = "/beegfs/ci411/pcen/"
DATA_LOC = BASE_LOC + "URBAN-SED_v2.0.0/"

TRAIN_AUDIO_LOC = DATA_LOC + "audio/train/"
TRAIN_ANNOTATIONS_LOC = DATA_LOC + "annotations/train/"
TRAIN_FEATURES_LOC = BASE_LOC + "features/URBAN-SED_dry/train"

VALIDATE_AUDIO_LOC = DATA_LOC + "audio/validate/"
VALIDATE_ANNOTATIONS_LOC = DATA_LOC + "annotations/validate/"
VALIDATE_FEATURES_LOC = BASE_LOC + "features/URBAN-SED_dry/validate"

TEST_AUDIO_LOC = DATA_LOC + "audio/test/"
TEST_ANNOTATIONS_LOC = DATA_LOC + "annotations/test/"
TEST_FEATURES_LOC = BASE_LOC + "features/URBAN-SED_dry/est"


MODEL_LOC = BASE_LOC +"models/"

URBANSED_CLASSES = ['air_conditioner',
                    'car_horn',
                    'children_playing',
                    'dog_bark',
                    'drilling',
                    'engine_idling',
                    'gun_shot',
                    'jackhammer',
                    'siren',
                    'street_music']


In [2]:
#prototyping stuff
from IPython.display import Audio 
DEMO_ID = "bimodal761"

DEMO_AUDIO_LOC = "/beegfs/ci411/pcen/URBAN-SED_v2.0.0/audio/train/soundscape_train_{}.wav".format(DEMO_ID)
DEMO_ANNOTATION_LOC = "/beegfs/ci411/pcen/URBAN-SED_v2.0.0/annotations/train/soundscape_train_{}.jams".format(DEMO_ID)

x_t, sr = lr.load(DEMO_AUDIO_LOC)
Audio(data=x_t, rate=sr)
hop_length = 512
n_fft = 1024
n_mels = 128
n_t_constants = 10

In [3]:
#pump management (building, saving, and loading)

def build_pump(sr, hop_length, n_fft, n_mels, n_t_constants=10):
    pcen_t = PCEN_T(name='PCEN', sr=sr, hop_length=hop_length, n_t_constants=n_t_constants)
    
    p_tag = pumpp.task.StaticLabelTransformer(name='static',
                                              namespace='tag_open',
                                              labels=URBANSED_CLASSES)

    p_dtag = pumpp.task.DynamicLabelTransformer(name='dynamic',
                                                namespace='tag_open',
                                                labels=URBANSED_CLASSES,
                                                sr=sr,
                                                hop_length=hop_length)
    pump = pumpp.Pump(pcen_t, p_tag, p_dtag)
    
    with open(os.path.join(BASE_LOC, 'pump.pkl'), 'wb') as fd:
        pickle.dump(pump, fd)
        
    return pump

def load_pump():
    with open(os.path.join(BASE_LOC, 'pump.pkl'), 'rb') as fd:
        pump = pickle.load(fd)
    return pump

pump = build_pump(sr, hop_length, n_fft, n_mels, n_t_constants = n_t_constants)
pump_load = load_pump()
assert (pump(y=x_t, sr=sr)['PCEN/mag']==pump_load(y=x_t, sr=sr)['PCEN/mag']).all()

In [4]:
pump_load

In [27]:
pump_load['dynamic'].__dict__['fields']['dynamic/tags'].shape[1]

10

In [28]:
#utils stolen from milsed


def base(filename):
    '''Identify a file by its basename:
    /path/to/base.name.ext => base.name
    Parameters
    ----------
    filename : str
        Path to the file
    Returns
    -------
    base : str
        The base name of the file
    '''
    return os.path.splitext(os.path.basename(filename))[0]

def save_h5(filename, **kwargs):
    '''Save data to an hdf5 file.
    Parameters
    ----------
    filename : str
        Path to the file
    kwargs
        key-value pairs of data
    See Also
    --------
    load_h5
    '''
    with h5py.File(filename, 'w') as hf:
        hf.update(kwargs)


def load_h5(filename):
    '''Load data from an hdf5 file created by `save_h5`.
    Parameters
    ----------
    filename : str
        Path to the hdf5 file
    Returns
    -------
    data : dict
        The key-value data stored in `filename`
    See Also
    --------
    save_h5
    '''
    data = {}

    def collect(k, v):
        if isinstance(v, h5py.Dataset):
            data[k] = v.value

    with h5py.File(filename, mode='r') as hf:
        hf.visititems(collect)

    return data

def convert(aud, jam, pump, outdir):
    data = pump.transform(aud, jam)
    fname = os.path.extsep.join([os.path.join(outdir, base(aud)), 'h5'])
    print("Saving: {}".format(fname))
    save_h5(fname, **data)

def get_ann_audio(aud_loc, ann_loc): 
    '''Get a list of annotations and audio files from a pair of directories.
    This also validates that the lengths match and are paired properly.
    Parameters
    ----------
    directory : str
        The directory to search
    Returns
    -------
    pairs : list of tuples (audio_file, annotation_file)
    '''

    audio = find_files(aud_loc)
    annos = find_files(ann_loc, ext=['jams', 'jamz'])

    paired = list(zip(audio, annos))

    if (len(audio) != len(annos) or
       any([base(aud) != base(ann) for aud, ann in paired])):
        raise RuntimeError('Unmatched audio/annotation '
                           'data in {}'.format(directory))

    return paired

train_pairs= get_ann_audio(TRAIN_AUDIO_LOC, TRAIN_ANNOTATIONS_LOC)
validate_pairs= get_ann_audio(VALIDATE_AUDIO_LOC, VALIDATE_ANNOTATIONS_LOC)
test_pairs= get_ann_audio(TEST_AUDIO_LOC, TEST_ANNOTATIONS_LOC)

In [29]:
from joblib import Parallel, delayed

#Parallel(n_jobs=2)(delayed(convert)(aud, ann, pump, TRAIN_FEATURES_LOC)\
#                           for aud, ann in train_pairs)
#
#Parallel(n_jobs=2)(delayed(convert)(aud, ann, pump, VALIDATE_FEATURES_LOC)\
#                           for aud, ann in validate_pairs)
#
#Parallel(n_jobs=2)(delayed(convert)(aud, ann, pump, TEST_FEATURES_LOC)\
#                           for aud, ann in test_pairs)

In [30]:
def make_sampler(max_samples, duration, pump, seed):

    n_frames = lr.time_to_frames(duration,
                                 sr=pump['PCEN'].sr,
                                 hop_length=pump['PCEN'].hop_length)

    return pump.sampler(max_samples, n_frames, random_state=seed)


def data_sampler(fname, sampler):
    '''Generate samples from a specified h5 file'''
    file_sampler = sampler(load_h5(fname))
    for datum in file_sampler:
        yield datum

class SqueezeLayer(Layer):
    '''
    Keras squeeze layer
    '''
    def __init__(self, axis=-1, **kwargs):
        super(SqueezeLayer, self).__init__(**kwargs)
        self.axis = axis

    def get_output_shape_for(self, input_shape):
        # shape = np.array(input_shape)
        # shape = shape[shape != 1]
        # return tuple(shape)
        shape = list(input_shape)
        del shape[self.axis]
        return tuple(shape)

    def compute_output_shape(self, input_shape):
        return self.get_output_shape_for(input_shape)

    def call(self, x, mask=None):
        return squeeze(x, axis=self.axis)

    def get_config(self):
        config = {'axis': self.axis}
        base_config = super(SqueezeLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))            
            
def construct_cnnL3_7_strong(pump):
    '''
    Like cnnL3_7 but with strong prediction
    Parameters
    ----------
    pump
    Returns
    -------
    '''
    model_inputs = ['PCEN/mag']

    # Build the input layer
    layers = pump.layers()

    x_mag = layers['PCEN/mag']
    
    # Apply batch normalization
    x_bn = K.layers.BatchNormalization()(x_mag)


    # BLOCK 1
    conv1 = K.layers.Convolution2D(16, (3, 3),
                                   padding='same',
                                   activation='relu',
                                   kernel_initializer='he_normal')(x_bn)
    bn1 = K.layers.BatchNormalization()(conv1)
    conv2 = K.layers.Convolution2D(16, (3, 3),
                                   padding='same',
                                   activation='relu',
                                   kernel_initializer='he_normal')(bn1)
    bn2 = K.layers.BatchNormalization()(conv2)
    pool2 = K.layers.MaxPooling2D((2, 2), padding='valid')(bn2)

    # BLOCK 2
    conv3 = K.layers.Convolution2D(32, (3, 3),
                                   padding='same',
                                   activation='relu',
                                   kernel_initializer='he_normal')(pool2)
    bn3 = K.layers.BatchNormalization()(conv3)
    conv4 = K.layers.Convolution2D(32, (3, 3),
                                   padding='same',
                                   activation='relu',
                                   kernel_initializer='he_normal')(bn3)
    bn4 = K.layers.BatchNormalization()(conv4)
    pool4 = K.layers.MaxPooling2D((2, 2), padding='valid')(bn4)

    # BLOCK 3
    conv5 = K.layers.Convolution2D(64, (3, 3),
                                   padding='same',
                                   activation='relu',
                                   kernel_initializer='he_normal')(pool4)
    bn5 = K.layers.BatchNormalization()(conv5)
    conv6 = K.layers.Convolution2D(64, (3, 3),
                                   padding='same',
                                   activation='relu',
                                   kernel_initializer='he_normal')(bn5)
    bn6 = K.layers.BatchNormalization()(conv6)
    pool6 = K.layers.MaxPooling2D((2, 2), padding='valid')(bn6)

    # BLOCK 4
    conv7 = K.layers.Convolution2D(128, (3, 3),
                                   padding='same',
                                   activation='relu',
                                   kernel_initializer='he_normal')(pool6)
    bn7 = K.layers.BatchNormalization()(conv7)
    conv8 = K.layers.Convolution2D(128, (3, 3),
                                   padding='same',
                                   activation='relu',
                                   kernel_initializer='he_normal')(bn7)
    bn8 = K.layers.BatchNormalization()(conv8)
    pool8 = K.layers.MaxPooling2D((2, 2), padding='valid')(bn8)

    # CONV SQUEEZE
    conv_sq = K.layers.Convolution2D(256, (1, 8),
                                     padding='valid',
                                     activation='relu',
                                     kernel_initializer='he_normal')(pool8)
    bn8 = K.layers.BatchNormalization()(conv_sq)
    sq2 = SqueezeLayer(axis=2)(bn8) #changed axis from -2 to 2

    # Up-sample back to input frame rate
    #sq2_up = K.layers.UpSampling1D(size=2**4)(sq2)

    n_classes = pump.fields['static/tags'].shape[0]

    p0 = K.layers.Dense(n_classes, activation='sigmoid',
                        bias_regularizer=K.regularizers.l2())

    p_dynamic = K.layers.TimeDistributed(p0, name='dynamic/tags')(sq2)#_up)

   # p_static = K.layers.GlobalMaxPooling1D(name='static/tags')(p_dynamic)

    model_outputs = ['dynamic/tags']#, 'static/tags']

    model = K.models.Model([x_mag],
                           [p_dynamic])#, p_static])

    return model, model_inputs, model_outputs


In [31]:
max_samples = 128
duration = 10
seed = 20170612

sampler = make_sampler(max_samples, duration, pump, seed)

construct_model = construct_cnnL3_7_strong
model, inputs, outputs = construct_model(pump)

In [32]:
def data_generator(working, sampler, k, rate, batch_size=32, **kwargs):
    '''Generate a data stream from a collection of tracks and a sampler'''

    seeds = []

    for track in tqdm(find_files(working,ext='h5')):
        fname = os.path.join(working,track)
        seeds.append(pescador.Streamer(data_sampler, fname, sampler))

    # Send it all to a mux
    mux = pescador.StochasticMux(seeds, k, rate, mode='with_replacement', **kwargs)

    return mux

In [33]:
import six

def keras_tuples(gen, inputs=None, outputs=None):

    if isinstance(inputs, six.string_types):
        if isinstance(outputs, six.string_types):
            # One input, one output
            for datum in gen:
                yield (datum[inputs], datum[outputs])
        else:
            # One input, multi outputs
            for datum in gen:
                yield (datum[inputs], [datum[o] for o in outputs])
    else:
        if isinstance(outputs, six.string_types):
            for datum in gen:
                yield ([datum[i] for i in inputs], datum[outputs])
        else:
            # One input, multi outputs
            for datum in gen:
                yield ([datum[i] for i in inputs],
                       [datum[o] for o in outputs])

In [39]:
train_streamers = 64
rate=4

gen_train = data_generator(TRAIN_FEATURES_LOC, sampler, train_streamers,\
                           rate, random_state=seed)

output_vars = 'dynamic/tags'
gen_train = keras_tuples(gen_train(), inputs=inputs, outputs=output_vars)

gen_val = data_generator(VALIDATE_FEATURES_LOC, sampler, train_streamers,\
                           rate, random_state=seed)
gen_val = keras_tuples(gen_val(), inputs=inputs, outputs=output_vars)

loss = {output_vars: 'binary_crossentropy'}
metrics = {output_vars: 'accuracy'}
monitor = 'val_{}_acc'.format(output_vars)

100%|██████████| 6000/6000 [00:00<00:00, 242279.60it/s]
100%|██████████| 2000/2000 [00:00<00:00, 17256.14it/s]


In [40]:
def max_pool(data, N=4):
    for _ in range(N):
        N_data, n_channels = data.shape
        new_data = np.empty((N_data//2,n_channels))
        for i in range(N_data//2):
            for j in range(n_channels):
                new_data[i,j] = max(data[2*i,j], data[(2*i)+1,j])
        data = new_data
    return np.array([data])

def label_transformer_generator(generator):
    for data in generator:
        features, labels = data
        yield (features, max_pool(labels[0]))

In [41]:
gen_train_label = label_transformer_generator(gen_train)
gen_val_label = label_transformer_generator(gen_val)

In [42]:
class LossHistory(K.callbacks.Callback):

    def __init__(self, outfile):
        super().__init__()
        self.outfile = outfile

    def on_train_begin(self, logs={}):
        self.loss = []
        self.val_loss = []

    def on_epoch_end(self, epoch, logs={}):
        self.loss.append(logs.get('loss'))
        self.val_loss.append(logs.get('val_loss'))

        loss_dict = {'loss': self.loss, 'val_loss': self.val_loss}
        with open(self.outfile, 'wb') as fp:
            pickle.dump(loss_dict, fp)

In [43]:
modelid = 'model00'
reduce_lr = 10
early_stopping = 10

model.compile(K.optimizers.Adam(), loss=loss, metrics=metrics)

# Store the model
# save the model object
model_spec = K.utils.serialize_keras_object(model)

with open(os.path.join(MODEL_LOC, modelid, 'model_spec.pkl'),\
          'wb') as fd:
    pickle.dump(model_spec, fd)

# save the model definition
'''
modeljsonfile = os.path.join(MODEL_LOC, modelid, 'model.json')
model_json = model.to_json()
with open(modeljsonfile, 'w') as json_file:
    json.dump(model_json, json_file, indent=2)
'''
# Construct the weight path
weight_path = os.path.join(MODEL_LOC, modelid, 'model.h5')

# Build the callbacks
cb = []
cb.append(K.callbacks.ModelCheckpoint(weight_path,
                                      save_best_only=True,
                                      verbose=1,
                                      monitor=monitor))

cb.append(K.callbacks.ReduceLROnPlateau(patience=reduce_lr,
                                        verbose=1,
                                        monitor=monitor))

cb.append(K.callbacks.EarlyStopping(patience=early_stopping,
                                    verbose=1,
                                    monitor=monitor))

history_checkpoint = os.path.join(MODEL_LOC, modelid,
                                  'history_checkpoint.pkl')
cb.append(LossHistory(history_checkpoint))

history_csvlog = os.path.join(MODEL_LOC, modelid, 'history_csvlog.csv')
cb.append(K.callbacks.CSVLogger(history_csvlog, append=True,
                                separator=','))


In [44]:
# Fit the model
verbose = True
epoch_size = 512
epochs = 5
validation_size = 1024


print('Fit model...')
if verbose:
    verbosity = 1
else:
    verbosity = 2
history = model.fit_generator(gen_train_label, epoch_size, epochs,
                              validation_data=gen_val_label,
                              validation_steps=validation_size,
                              verbose=verbosity, callbacks=cb)

print('Done training. Saving results to disk...')
# Save history
with open(os.path.join(MODEL_LOC, modelid, 'history.pkl'), 'wb') as fd:
    pickle.dump(history.history, fd)
print('Saving Weights')
model.save_weights(weight_path)

Fit model...
Epoch 1/5




KeyboardInterrupt: 

In [17]:
def convert_ts_to_dict(predictions, labels, fname, threshold=None, real_length = 10.):
    predictions = predictions.T
    out_dicts = []
    sr = real_length/predictions.shape[1]
    
    for i, label in enumerate(labels):
        if threshold is not None:
            high_low_array = (predictions[i]>threshold).astype(int)
        else:
            high_low_array = predictions[i]
            
        label_data = np.concatenate((np.zeros(1), high_low_array, np.zeros(1)))
        onsets = np.argwhere(np.diff(label_data)==1) -1
        offsets = np.argwhere(np.diff(label_data)==-1) -1

        
        for i in range(len(onsets)):
            new_dict = {}
            new_dict['event_label']=label
            new_dict['event_onset']=onsets[i][0]*sr
            new_dict['event_offset']=offsets[i][0]*sr
            new_dict['file']=fname
            new_dict['scene_label']= 'UrbanSED'
            out_dicts.append(new_dict)
    return out_dicts


import sed_eval

def score_model(test_idx, test_features, model, labels):
        
    segment_based_metrics = sed_eval.sound_event.SegmentBasedMetrics(
        event_label_list=labels)
    event_based_metrics = sed_eval.sound_event.EventBasedMetrics(
        event_label_list=labels)

    for filename in test_idx:
        test_feature_loc = os.path.join(test_features, filename + '.h5')
        test_feature = load_h5(test_feature_loc)
        datum = test_feature['PCEN/mag']
        ytrue = max_pool(test_feature['dynamic/tags'][0])[0]
        ypred = model.predict(datum)[0]
        
        ytrue_dict = convert_ts_to_dict(ytrue, labels, filename)
        ypred_dict = convert_ts_to_dict(ypred, labels, filename, threshold=0.5)

        segment_based_metrics.evaluate(reference_event_list=ytrue_dict,\
                                       estimated_event_list=ypred_dict)

        event_based_metrics.evaluate(reference_event_list=ytrue_dict,\
                                     estimated_event_list=ypred_dict)
        


    # Get only certain metrics
    overall_segment_based_metrics = segment_based_metrics.results_overall_metrics()
    print("Accuracy:", overall_segment_based_metrics['accuracy']['accuracy'])

    # Or print all metrics as reports
    print(segment_based_metrics)
    print(event_based_metrics)
        


In [18]:
### Evaluate model
print('Evaluate model...')
# Load best params
model.load_weights(weight_path)

with open(os.path.join(DATA_LOC, 'index_test.json'), 'r') as fp:
    test_idx = json.load(fp)['id']

# Compute eval scores
results = score_model(test_idx, TEST_FEATURES_LOC, model, URBANSED_CLASSES)

# Save results to disk
results_file = os.path.join(MODEL_LOC, modelid, 'results.json')
with open(results_file, 'w') as fp:
    json.dump(results, fp, indent=2)

print('Done!')

Evaluate model...




Accuracy: 0.8362730309233919
Segment based metrics
  Evaluated length                  : 17700.38 sec
  Evaluated files                   : 2000 
  Segment length                    : 1.00 sec

  Overall metrics (micro-average)
  F-measure
    F-measure (F1)                  : 42.10 %
    Precision                       : 46.25 %
    Recall                          : 38.63 %
  Error rate
    Error rate (ER)                 : 0.78 
    Substitution rate               : 0.28 
    Deletion rate                   : 0.33 
    Insertion rate                  : 0.17 
  Accuracy
    Sensitivity                     : 38.63 %
    Specificity                     : 91.82 %
    Balanced accuracy               : 65.23 %
    Accuracy                        : 83.63 %

  Class-wise average metrics (macro-average)
  F-measure
    F-measure (F1)                  : 41.43 %
    Precision                       : 47.70 %
    Recall                          : 38.64 %
  Error rate
    Error rate (ER)          

In [19]:
test_features = TEST_FEATURES_LOC

labels = URBANSED_CLASSES

segment_based_metrics = sed_eval.sound_event.SegmentBasedMetrics(
    event_label_list=labels)
event_based_metrics = sed_eval.sound_event.EventBasedMetrics(
    event_label_list=labels)

for filename in test_idx:
    test_feature_loc = os.path.join(test_features, filename + '.h5')
    test_feature = load_h5(test_feature_loc)
    datum = test_feature['PCEN/mag']
    ytrue = max_pool(test_feature['dynamic/tags'][0])[0]
    ypred = model.predict(datum)[0]
    
    ytrue_dict = convert_ts_to_dict(ytrue, labels, 'test.file')
    ypred_dict = convert_ts_to_dict(ypred, labels, 'test.file', threshold=0.5)
    
    segment_based_metrics.evaluate(ytrue_dict,ypred_dict)

    event_based_metrics.evaluate(ytrue_dict,ypred_dict)

    

# Get only certain metrics
overall_segment_based_metrics = segment_based_metrics.results_overall_metrics()
print("Accuracy:", overall_segment_based_metrics['accuracy']['accuracy'])

# Or print all metrics as reports
#print(segment_based_metrics)
#print(event_based_metrics)



Accuracy: 0.8362730309233919


In [20]:
#test_files = os.listdir(TEST_FEATURES_LOC)
#test_names = []
#for f in test_files:
#    test_names.append(f.split('.')[0])
#with open(os.path.join(DATA_LOC, 'index_test.json'), 'w') as json_file:
#    json.dump({'id':test_names}, json_file)

In [31]:
modelyamlfile = os.path.join(MODEL_LOC, modelid, 'model.yaml')
model_yaml = model.to_yaml()
with open(modelyamlfile, 'w') as yaml_file:
    yaml_file.write(model_yaml)

In [36]:
from keras.models import model_from_yaml
with open(modelyamlfile, 'r') as yaml_file:
    loaded_model_yaml = yaml_file.read()
loaded_model = model_from_yaml(loaded_model_yaml, custom_objects={'SqueezeLayer':SqueezeLayer})
loaded_model.load_weights(weight_path)


<keras.engine.training.Model at 0x2acb7314df28>