In [None]:
import numpy as np
import sklearn
import librosa
import tensorflow as tf
from tensorflow import keras
import kapre
from kapre.composed import get_melspectrogram_layer
import os

### Load training data

In [None]:
sample_rate = 44100
mono = True

In [None]:
POSITIVE_DIR = 'matched_samples_positive/'
NEGATIVE_DIR = 'matched_samples_negative/'

matched_samples_positive = [POSITIVE_DIR + name for name in os.listdir(POSITIVE_DIR)]
matched_samples_negative = [NEGATIVE_DIR + name for name in os.listdir(NEGATIVE_DIR)]
all_samples = matched_samples_positive + matched_samples_negative

In [None]:
def standardize_length():
    # For now just shorten to lowest length
    sample_lengths = [len(librosa.load(sample_path)[0]) for sample_path in all_samples]
    return min(sample_lengths)

In [None]:
sample_length = standardize_length()

In [None]:
def load_samples():
    batch_size = len(matched_samples_positive) + len(matched_samples_negative)
    X = np.empty((batch_size, sample_length, 1), dtype=np.float32)
    
    for i, sample_path in enumerate(all_samples):
        sample_wav, sr = librosa.load(sample_path, sr=sample_rate, mono=mono)
        X[i,] = sample_wav[:sample_length].reshape(-1,1)
    
    return X

In [None]:
def load_sample_labels():
    positive_labels = np.ones(len(matched_samples_positive))
    negative_labels = np.zeros(len(matched_samples_negative))
    return np.concatenate((positive_labels, negative_labels), axis=None)

In [None]:
X, y = load_samples(), load_sample_labels()

In [None]:
print('Sample feature shape: ', X.shape)
print('Sample labels shape: ', y.shape)

In [None]:
# TODO: train_test_split
# from sklearn.model_selection import train_test_split
# X_train, X_valid, y_train, y_valid = train_test_split(X, y)

### Simple CNN

In [None]:
input_shape = X[0].shape

In [None]:
spectrogram_layer = get_melspectrogram_layer(input_shape=input_shape,
                             pad_end=True,
                             win_length=400,
                             hop_length=160,
                             sample_rate=sample_rate,
                             return_decibel=True,
                             input_data_format='channels_last',
                             output_data_format='channels_last')

In [None]:
model = keras.models.Sequential([
    spectrogram_layer,
    keras.layers.Conv2D(64, 7, activation="relu", padding="SAME"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(128, 3, activation="relu", padding="SAME"),
    keras.layers.Conv2D(128, 3, activation="relu", padding="SAME"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(256, 3, activation="relu", padding="SAME"),
    keras.layers.Conv2D(256, 3, activation="relu", padding="SAME"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Flatten(),
    keras.layers.Dense(128, activation="relu"),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(64, activation="relu"),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(1, activation="sigmoid") # binary classification
])

In [None]:
model.summary()

In [None]:
# Compile the model
model.compile('adam', 'binary_crossentropy', metrics=["accuracy"])

In [None]:
model.fit(X, y)

In [None]:
model.save("basic_cnn_samples.h5")

### Bring in pre-trained XCeption model

In [None]:
IMAGE_DIR = 'sample_spec_images/'

In [None]:
def create_img_filename(filename):
    filename = filename[:-4]
    filename = filename.split('/')[1]
    return IMAGE_DIR + filename + '.png'

In [None]:
from matplotlib import pyplot as plt
from librosa import display

def save_sample_spectrograms():
    # Save as images for input to Xception model
    for sample_path in all_samples:
        sample, sr = librosa.load(sample_path)
        sample = sample[:sample_length]
        plt.axis('off')
        spectrogram = librosa.feature.melspectrogram(y=sample, sr=sr)
        librosa.display.specshow(librosa.power_to_db(spectrogram, ref=np.max))
        filename = create_img_filename(sample_path)
        plt.savefig(filename)

In [None]:
save_sample_spectrograms()

In [None]:
sample_spectrograms = [IMAGE_DIR + name for name in os.listdir(IMAGE_DIR)]

In [None]:
from PIL import Image

def reformat_images():
    # Reduce size for Xception
    for image_path in sample_spectrograms:
        try:
            image = Image.open(image_path)
            image.thumbnail((250,250))
            image.save(image_path)    
        except IOError:
            print('not an image file: ', image_path)

In [None]:
reformat_images()

In [None]:
first_image = Image.open(sample_spectrograms[0])
width, height = first_image.size
channels = 3 # RGB

In [None]:
def load_sample_images():
    batch_size = len(sample_spectrograms)
    X = np.empty((batch_size, height, width, channels), dtype=np.float32)
    
    for i, image_path in enumerate(sample_spectrograms):
        image_data = np.asarray(image)
        image_data = image_data[:,:,:3] # discard alpha channel
        X[i,] = image_data
    
    return X

In [None]:
X, y = load_sample_images(), load_sample_labels()

In [None]:
print('Sample feature shape: ', X.shape)
print('Sample labels shape: ', y.shape)

In [None]:
base_model = keras.applications.xception.Xception(weights="imagenet",
                                                 include_top=False,
                                                 input_shape=(height, width, channels))
avg = keras.layers.GlobalAveragePooling2D()(base_model.output)
class_output = keras.layers.Dense(1, activation="sigmoid")(avg)
model = keras.Model(inputs=base_model.input, outputs=[class_output])
model.compile(loss=["binary_crossentropy", "mse"],
             loss_weights=[0.8, 0.2], optimizer='adam', metrics=["accuracy"])

In [None]:
model.fit(X, y)

In [None]:
model.save("xception_samples.h5")