In [None]:
import numpy as np
import sklearn
import librosa
import tensorflow as tf
from tensorflow import keras
import kapre
from kapre.composed import get_melspectrogram_layer
import os

### Load training data

In [None]:
sample_rate = 44100
mono = True

In [None]:
POSITIVE_DIR = 'matched_samples_positive/'
NEGATIVE_DIR = 'matched_samples_negative/'

matched_samples_positive = [POSITIVE_DIR + name for name in os.listdir(POSITIVE_DIR)]
matched_samples_negative = [NEGATIVE_DIR + name for name in os.listdir(NEGATIVE_DIR)]

In [None]:
def load_samples():
    batch_size = len(matched_samples_positive) + len(matched_samples_negative)
    X = np.empty((batch_size, sample_rate, 1), dtype=np.float32)
    
    for i, sample_path in enumerate(matched_samples_positive + matched_samples_negative):
        sample_wav, sample_rate = librosa.load(sample_path, sr=sample_rate, mono=mono)
        X[i,] = sample_wav.reshape(-1,1)
    
    return X

In [None]:
def load_sample_labels():
    positive_labels = np.ones(len(matched_samples_positive))
    negative_labels = np.zeros(len(matched_samples_negative))
    return np.concatenate((positive_labels, negative_labels), axis=None)

In [None]:
X, y = load_samples(), load_sample_labels()

In [None]:
print('Sample feature shape: ')
print('Sample labels shape: ', y.shape)

In [None]:
# TODO: train_test_split
# from sklearn.model_selection import train_test_split
# X_train, X_valid, y_train, y_valid = train_test_split(X, y)

### Simple CNN

In [None]:
input_shape = (sample_rate, 1) # 1 channel (mono)

In [None]:
spectrogram_layer = get_melspectrogram_layer(input_shape=input_shape,
                             pad_end=True,
                             win_length=400,
                             hop_length=160,
                             sample_rate=sample_rate,
                             return_decibel=True,
                             input_data_format='channels_last',
                             output_data_format='channels_last')

In [None]:
model = keras.models.Sequential([
    spectrogram_layer,
    keras.layers.Conv2D(64, 7, activation="relu", padding="SAME"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(128, 3, activation="relu", padding="SAME"),
    keras.layers.Conv2D(128, 3, activation="relu", padding="SAME"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(256, 3, activation="relu", padding="SAME"),
    keras.layers.Conv2D(256, 3, activation="relu", padding="SAME"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Flatten(),
    keras.layers.Dense(128, activation="relu"),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(64, activation="relu"),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(1, activation="sigmoid") # binary classification
])

In [None]:
# Compile the model
model.compile('adam', 'categorical_crossentropy')

In [None]:
model.fit(X, y)

### Bring in pre-trained XCeption model

In [None]:
base_model = keras.applications.xception.Xception(weights="imagenet",
                                                 include_top=False)
avg = keras.layers.GlobalAveragePooling2D()(base_model.output)
class_output = keras.layers.Dense(n_classes, activation="softmax")(avg)
loc_output = keras.layers.Dense(4)(avg)
model = keras.Model(inputs=base_model.input,
                   outputs=[class_output, loc_output])
model.compile(loss=["sparse_categorical_crossentropy", "mse"],
             loss_weights=[0.8, 0.2], optimizer=optimizer, metrics=["accuracy"])

In [None]:
model.fit(x, y)