In [2]:
import numpy as np
import librosa as lr
import pescador
from tqdm import tqdm
import keras as K
from keras.engine.topology import Layer
from keras.backend import squeeze
import json
import six
import pickle
import sys
import os
import argparse
import ast
from keras.layers import Input

import sys
sys.path.append('/home/ci411/pcen-t-varying/')

from pcen_t.utils import *
from pcen_t.models import MODELS
from pcen_t.pcen_pump import *

URBANSED_CLASSES = ['air_conditioner',
                    'car_horn',
                    'children_playing',
                    'dog_bark',
                    'drilling',
                    'engine_idling',
                    'gun_shot',
                    'jackhammer',
                    'siren',
                    'street_music']

Using TensorFlow backend.


In [4]:
def make_sampler(max_samples, duration, pump, seed):
    op = pump.ops[0].name
    n_frames = lr.time_to_frames(duration,
                                 sr=pump[op].sr,
                                 hop_length=pump[op].hop_length)

    return pump.sampler(max_samples, n_frames, random_state=seed)


@pescador.streamable
def data_sampler(fname, sampler, slices):
    '''Generate samples from a specified h5 file'''
    data_dict = load_h5(fname)
    field = list(pump.fields.keys())[0]
    if slices is not None:
        data_dict[field] = data_dict[field][:,:,:,slices]
    file_sampler = sampler(data_dict)
    for datum in file_sampler:
        yield datum            

    
def data_generator(directories, sampler, k, rate, batch_size=32, slices=None, **kwargs):
    '''Generate a data stream from a collection of tracks and a sampler'''

    seeds = []
    for working in directories:
        for track in tqdm(find_files(working,ext='h5')):
            fname = os.path.join(working,track)
            seeds.append(data_sampler(fname, sampler, slices))

    # Send it all to a mux
    mux = pescador.StochasticMux(seeds, k, rate, mode='with_replacement', **kwargs)

    return mux

def data_generator_val(directories, sampler, batch_size=32, slices=None, **kwargs):
    '''Generate a data stream from a collection of tracks and a sampler'''

    seeds = []
    for working in directories:
        for track in tqdm(find_files(working,ext='h5')):
            fname = os.path.join(working,track)
            seeds.append(data_sampler(fname, sampler, slices))

    # Send it all to a mux
    mux = pescador.ChainMux(seeds, mode='cycle', **kwargs)

    return mux
   
    
def keras_tuples(gen, inputs=None, outputs=None):

    if isinstance(inputs, six.string_types):
        if isinstance(outputs, six.string_types):
            # One input, one output
            for datum in gen:
                yield (datum[inputs], datum[outputs])
        else:
            # One input, multi outputs
            for datum in gen:
                yield (datum[inputs], [datum[o] for o in outputs])
    else:
        if isinstance(outputs, six.string_types):
            for datum in gen:
                yield ([datum[i] for i in inputs], datum[outputs])
        else:
            # One input, multi outputs
            for datum in gen:
                yield ([datum[i] for i in inputs],
                       [datum[o] for o in outputs])

def label_transformer_generator(generator):
    for data in generator:
        features, labels = data
        yield (features, max_pool(labels[0]))
        
        
class LossHistory(K.callbacks.Callback):

    def __init__(self, outfile):
        super().__init__()
        self.outfile = outfile

    def on_train_begin(self, logs={}):
        self.loss = []
        self.val_loss = []

    def on_epoch_end(self, epoch, logs={}):
        self.loss.append(logs.get('loss'))
        self.val_loss.append(logs.get('val_loss'))

        loss_dict = {'loss': self.loss, 'val_loss': self.val_loss}
        with open(self.outfile, 'wb+') as fp:
            pickle.dump(loss_dict, fp)

In [5]:
#get feature paths

mel_path = "/beegfs/ci411/pcen/features_807/mel"
mel_dest_path = "/beegfs/ci411/pcen/features_807/unpitched/mel"


train_features = []
valid_features = []
feature_list = ['URBAN-SED_dry','URBAN-SED_sim_short','URBAN-SED_sim_medium','URBAN-SED_sim_long','URBAN-SED_alley','URBAN-SED_bedroom','URBAN-SED_tunnel']
for feature_name in feature_list:
    train_features.append(os.path.join(mel_path, feature_name, 'train'))
    valid_features.append(os.path.join(mel_dest_path, feature_name, 'validate'))
    print('Loading {}: \t{} \t{}'.format(feature_name, train_features[-1], valid_features[-1]))
pump = load_pump(os.path.join('/beegfs/ci411/pcen/pumps/mel', 'pump.pkl'))
field = list(pump.fields.keys())[0]
sampler = make_sampler(128, 10.0, pump, 20170613)

Loading URBAN-SED_dry: 	/beegfs/ci411/pcen/features_807/mel/URBAN-SED_dry/train 	/beegfs/ci411/pcen/features_807/unpitched/mel/URBAN-SED_dry/validate
Loading URBAN-SED_sim_short: 	/beegfs/ci411/pcen/features_807/mel/URBAN-SED_sim_short/train 	/beegfs/ci411/pcen/features_807/unpitched/mel/URBAN-SED_sim_short/validate
Loading URBAN-SED_sim_medium: 	/beegfs/ci411/pcen/features_807/mel/URBAN-SED_sim_medium/train 	/beegfs/ci411/pcen/features_807/unpitched/mel/URBAN-SED_sim_medium/validate
Loading URBAN-SED_sim_long: 	/beegfs/ci411/pcen/features_807/mel/URBAN-SED_sim_long/train 	/beegfs/ci411/pcen/features_807/unpitched/mel/URBAN-SED_sim_long/validate
Loading URBAN-SED_alley: 	/beegfs/ci411/pcen/features_807/mel/URBAN-SED_alley/train 	/beegfs/ci411/pcen/features_807/unpitched/mel/URBAN-SED_alley/validate
Loading URBAN-SED_bedroom: 	/beegfs/ci411/pcen/features_807/mel/URBAN-SED_bedroom/train 	/beegfs/ci411/pcen/features_807/unpitched/mel/URBAN-SED_bedroom/validate
Loading URBAN-SED_tunnel: 	/

In [10]:
sampler_val = make_sampler(1, 10.0, pump, 20170613)

slices = None

#define model, inputs, and outputs
construct_model = MODELS['cnnl3_7_strong']

if slices is not None:
    input_layer = Input(name=field,  shape=(None, 128, len(slices)),\
                              dtype='float32')    
else:
    input_layer = Input(name=field,  shape=(None, 128, 1),\
                              dtype='float32')  

model, inputs, outputs = construct_model(input_layer, pump)    


output_vars = 'dynamic/tags'

#create streanable generators
gen_train = data_generator(train_features, sampler, 64,\
                       4, random_state=20170613, slices=slices)

gen_val = data_generator_val(train_features, sampler_val, random_state=20170613, slices=slices)

#convert to keras tuples
gen_train = keras_tuples(gen_train(), inputs=inputs, outputs=output_vars)
gen_val = keras_tuples(gen_val(), inputs=inputs, outputs=output_vars)

#apply label transformation
gen_train_label = label_transformer_generator(gen_train)
gen_val_label = label_transformer_generator(gen_val)

100%|██████████| 30000/30000 [00:00<00:00, 297714.70it/s]
100%|██████████| 30000/30000 [00:00<00:00, 296685.62it/s]
100%|██████████| 30000/30000 [00:00<00:00, 74420.18it/s]
100%|██████████| 30000/30000 [00:00<00:00, 296589.12it/s]
100%|██████████| 30000/30000 [00:00<00:00, 294189.17it/s]
100%|██████████| 30000/30000 [00:00<00:00, 65307.15it/s]
100%|██████████| 30000/30000 [00:00<00:00, 296556.27it/s]
100%|██████████| 30000/30000 [00:00<00:00, 279645.75it/s]
100%|██████████| 30000/30000 [00:00<00:00, 294408.06it/s]
100%|██████████| 30000/30000 [00:00<00:00, 72230.16it/s]
100%|██████████| 30000/30000 [00:00<00:00, 292756.58it/s]
100%|██████████| 30000/30000 [00:00<00:00, 291722.88it/s]
100%|██████████| 30000/30000 [00:00<00:00, 67935.69it/s]
100%|██████████| 30000/30000 [00:00<00:00, 299377.16it/s]


In [17]:
gen_val_label.__next__()[0][0].shape

(1, 215, 128, 1)

In [22]:
#establish model definitions
loss = {output_vars: 'binary_crossentropy'}
metrics = {output_vars: 'accuracy'}
monitor = 'val_{}_acc'.format(output_vars)

model.compile(K.optimizers.Adam(learning_rate=1e-5), loss=loss, metrics=metrics)

# Construct the weight path
weight_path = os.path.join('/beegfs/ci411/pcen/models/meltest', 'testmodel', 'model.h5')

# Build the callbacks
cb = []
cb.append(K.callbacks.ModelCheckpoint(weight_path,
                                      save_best_only=True,
                                      verbose=1,
                                      monitor=monitor))

cb.append(K.callbacks.ReduceLROnPlateau(patience=10,
                                        verbose=1,
                                        monitor=monitor))

cb.append(K.callbacks.EarlyStopping(patience=30,
                                    verbose=1,
                                    monitor=monitor))

history_checkpoint = os.path.join('/beegfs/ci411/pcen/models/meltest', 'testmodel',
                                  'history_checkpoint.pkl')
cb.append(LossHistory(history_checkpoint))

history_csvlog = os.path.join('/beegfs/ci411/pcen/models/meltest', 'testmodel', 'history_csvlog.csv')
cb.append(K.callbacks.CSVLogger(history_csvlog, append=True,
                                separator=','))

print('Fit model...')
if True:
    verbosity = 1
else:
    verbosity = 2

    
val_size = 1000 *6*5* len(feature_list)
history = model.fit_generator(gen_train_label, 512, 150,
                              validation_data=gen_val_label, validation_steps=val_size,
                              verbose=verbosity, callbacks=cb, max_queue_size=16)

#make or clear output directory
make_dirs(os.path.join('/beegfs/ci411/pcen/models/meltest', 'testmodel'))

# Store the model
# save the model object
model_spec = K.utils.serialize_keras_object(model)
with open(os.path.join('/beegfs/ci411/pcen/models/meltest', 'testmodel', 'model_spec.pkl'),\
          'wb') as fd:
    pickle.dump(model_spec, fd)

# save the model definition
modelyamlfile = os.path.join('/beegfs/ci411/pcen/models/meltest', 'testmodel', 'model.yaml')
model_yaml = model.to_yaml()
with open(modelyamlfile, 'w') as yaml_file:
    yaml_file.write(model_yaml)




print('Done training. Saving results to disk...')
# Save history
with open(os.path.join('/beegfs/ci411/pcen/models/meltest', 'testmodel', 'history.pkl'), 'wb') as fd:
    pickle.dump(history.history, fd)
print('Saving Weights')
model.save_weights(weight_path)

Fit model...
Epoch 1/150

KeyboardInterrupt: 