## Serval Train Model

We will setup a deep neural network with keras.

We presume this notebook is run with a GPU available.

In [1]:
import json
import logging
import os
import time
import warnings

import librosa
import numpy as np
import pandas as pd
import pydub
import sklearn.preprocessing
from tqdm import tqdm

THEANO_FLAGS = ('device=gpu0,'
                'floatX=float32,'
                'dnn.conv.algo_bwd_filter=deterministic,'
                'dnn.conv.algo_bwd_data=deterministic')

os.environ['THEANO_FLAGS'] = THEANO_FLAGS
os.environ['KERAS_BACKEND'] = 'theano'

import keras
keras.backend.set_image_dim_ordering('th')
from keras.layers.convolutional import Conv2D as Conv
from keras.layers.convolutional import MaxPooling2D as Pool
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.core import Activation, Dense, Dropout, Flatten
from keras.regularizers import l2 as L2

Using Theano backend.
 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: GeForce GTX 1080 (CNMeM is enabled with initial size: 95.0% of memory, cuDNN 5110)


In [2]:
# config for specific settings wrt the sound samples used
from config import *


def to_one_hot(targets, class_count):
    """Encode target classes in a one-hot matrix.
    """
    one_hot_enc = np.zeros((len(targets), class_count))

    for r in range(len(targets)):
        one_hot_enc[r, targets[r]] = 1

    return one_hot_enc


def extract_segment(filename):
    """Get one random segment from a recording.
    """
    spec = np.load('dataset/tmp/' + filename + '.spec.npy').astype('float32')

    offset = np.random.randint(0, np.shape(spec)[1] - SEGMENT_LENGTH + 1)
    spec = spec[:, offset:offset + SEGMENT_LENGTH]

    return np.stack([spec])


def iterrows(dataframe):
    """Iterate over a random permutation of dataframe rows.
    """
    while True:
        for row in dataframe.iloc[np.random.permutation(len(dataframe))].itertuples():
            yield row


def iterbatches(batch_size, training_dataframe):
    """Generate training batches.
    """
    itrain = iterrows(training_dataframe)

    while True:
        X, y = [], []

        for i in range(batch_size):
            row = next(itrain)
            X.append(extract_segment(row.filename))
            y.append(le.transform([row.category])[0])

        X = np.stack(X)
        y = to_one_hot(np.array(y), len(labels))

        X -= AUDIO_MEAN
        X /= AUDIO_STD

        yield X, y

In [3]:
# create spectograms if not already exist
np.random.seed(1)

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

# Load dataset
meta = pd.read_csv('dataset/meta-train-data.csv')
labels = pd.unique(meta.sort_values('category')['category'])
le = sklearn.preprocessing.LabelEncoder()
le.fit(labels)

# Generate spectrograms
logger.info('Generating spectrograms...')

if not os.path.exists('dataset/tmp/'):
    os.mkdir('dataset/tmp/')

for row in tqdm(meta.itertuples(), total=len(meta)):
    spec_file = 'dataset/tmp/' + row.filename + '.spec.npy'
    audio_file = 'dataset/audio/' + row.filename

    if os.path.exists(spec_file):
        continue

    audio = pydub.AudioSegment.from_file(audio_file).set_frame_rate(SAMPLING_RATE).set_channels(1)
    audio = (np.fromstring(audio._data, dtype="int16") + 0.5) / (0x7FFF + 0.5)

    spec = librosa.feature.melspectrogram(audio, SAMPLING_RATE, n_fft=FFT_SIZE,
                                          hop_length=CHUNK_SIZE, n_mels=MEL_BANDS)
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')  # Ignore log10 zero division
        spec = librosa.core.perceptual_weighting(spec, MEL_FREQS, amin=1e-5, ref_power=1e-5,
                                                 top_db=None)

    spec = np.clip(spec, 0, 100)
    np.save(spec_file, spec.astype('float16'), allow_pickle=False)

INFO:__main__:Generating spectrograms...
100%|██████████| 855/855 [00:00<00:00, 109741.41it/s]


In [4]:
# Define model
logger.info('Constructing model...')

input_shape = 1, MEL_BANDS, SEGMENT_LENGTH

# load model if exists
modelfile = 'model.json'
modelweights = 'model.h5'

if os.path.exists(modelfile) and os.path.exists(modelweights):
    # open model form current directory
    with open(modelfile, 'r') as file:
        cfg = file.read()
        model = keras.models.model_from_json(cfg)

    model.load_weights(modelweights)
    logger.debug('Loaded exsisting Keras model with weights.')
else:
    # else create new model
    model = keras.models.Sequential()

    model.add(Conv(80, (3, 3), kernel_regularizer=L2(0.001), kernel_initializer='he_uniform',
                   input_shape=input_shape))
    model.add(LeakyReLU())
    model.add(Pool((3, 3), (3, 3)))
    #model.add(Dropout(0.1)) # hk added

    model.add(Conv(160, (3, 3), kernel_regularizer=L2(0.001), kernel_initializer='he_uniform'))
    model.add(LeakyReLU())
    model.add(Pool((3, 3), (3, 3)))
    #model.add(Dropout(0.1)) # hk added

    model.add(Conv(240, (3, 3), kernel_regularizer=L2(0.001), kernel_initializer='he_uniform'))
    model.add(LeakyReLU())
    model.add(Pool((3, 3), (3, 3)))

    model.add(Flatten())
    model.add(Dropout(0.5)) # 0.5

    model.add(Dense(len(labels), kernel_regularizer=L2(0.001), kernel_initializer='he_uniform'))
    model.add(Activation('softmax'))

    logger.debug('Created new Keras model .')

# compile model
optimizer = keras.optimizers.SGD(lr=0.001, momentum=0.9, nesterov=True)  # lr=0.001, momentum=0.9
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])


INFO:__main__:Constructing model...
DEBUG:__main__:Loaded exsisting Keras model with weights.


In [None]:
# Train model
batch_size = 200
EPOCH_MULTIPLIER = 10
epochs = 1000 // EPOCH_MULTIPLIER
epoch_size = len(meta) * EPOCH_MULTIPLIER
bpe = epoch_size // batch_size

logger.info('Training... (batch size of {} | {} batches per epoch)'.format(batch_size, bpe))

model.fit_generator(generator=iterbatches(batch_size, meta),
                    steps_per_epoch=bpe,
                    epochs=epochs)

with open('model.json', 'w') as file:
    file.write(model.to_json())

model.save_weights('model.h5')

with open('model_labels.json', 'w') as file:
    json.dump(le.classes_.tolist(), file)

INFO:__main__:Training... (batch size of 200 | 42 batches per epoch)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
