In [21]:
%matplotlib inline
import os
import pandas as pd
import glob
import numpy as np

In [9]:
import librosa
import librosa.display
import pylab
import matplotlib
import gc

In [25]:
class Opts():
    def __init__(self):
        self.train_data_path = "./working/train/"
        self.test_data_path = "./working/test/"
        self.csv_path = "./meta"
        self.data_path = "./data/"
        
        if not os.path.exists(self.train_data_path):
            os.makedirs(self.train_data_path)
        if not os.path.exists(self.test_data_path):
            os.makedirs(self.test_data_path)
        if not os.path.exists(self.csv_path):
            os.makedirs(self.csv_path)


In [23]:
CLASS_ID = {0: "air_conditioner",
            1: "car_horn",
            2:"children_playing",
            3:"dog_bark",
            4:"drilling",
            5:"engine_idling",
            6:"gun_shot",
            7:"jackhammer",
            8:"siren",
            9:"street_music"}

def _get_meta_info(filename):
    infos = filename.split('-')
    return [int(info) for info in infos]

In [50]:
def input_to_target(opts):
    # audio files and their corresponding labels
    train_paths = [opts.data_path + "fold1/*.wav", opts.data_path + "fold2/*.wav"]
    # train_paths = [opts.data_path + "fold1/*.wav"]
    train_label_path = opts.data_path +  "train_labels.csv"
    test_paths =  [opts.data_path + "fold2/*.wav"]

    # input
    train_files, test_files = [], []
    for train_path in train_paths:
        train_files += glob.glob(train_path)
    for test_path in test_paths:
        test_files += glob.glob(test_path)

    train_labels, class_names, file_names = [], [], []
    for train_file in train_files:
        _, class_id, _, _ = _get_meta_info(train_file.split('/')[-1].strip('.wav'))
        # print("train_file={:s}, class_id={:d}".format(train_file, class_id))
        train_labels.append(int(class_id))
        class_names.append(CLASS_ID[int(class_id)])
        file_names.append(train_file.split('/')[-1].split('.')[0])
    # csv storing information for training dataset
    train_file_df = pd.DataFrame({'file_paths': train_files,
                                  'file_names': file_names,
                                  'labels': train_labels,
                                  'class_names': class_names})

    test_labels, class_names, file_names = [], [], []
    for test_file in test_files:
        _, class_id, _, _ = _get_meta_info(test_file.split('/')[-1].strip('.wav'))
        test_labels.append(int(class_id))
        class_names.append(CLASS_ID[int(class_id)])
        file_names.append(test_file.split('/')[-1].split('.')[0])
    # csv storing information for training dataset
    test_file_df = pd.DataFrame({'file_paths': test_files, 
                                 'file_names': file_names,
                                 'labels': test_labels,
                                 'class_names': class_names})
    
    train_file_df.to_csv(os.path.join(opts.csv_path, "train.csv"))
    test_file_df.to_csv(os.path.join(opts.csv_path, "test.csv"))

    return train_file_df, test_file_df

In [51]:
def create_spectrogram(file_path, title, opts, flag="train"):
    plt.interactive(False)
    clip, sample_rate = librosa.load(file_path, sr=None)
    fig = plt.figure(figsize=[0.72,0.72])
    ax = fig.add_subplot(111) #subplot 1: 1x1 grid

    # hide all axes
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.set_frame_on(False)

    S = librosa.feature.melspectrogram(y=clip, sr=sample_rate)
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max))

    if flag == "train":
        filename  = os.path.join(opts.train_data_path, title + '.jpg')
    else:
        filename  = os.path.join(opts.test_data_path, title + '.jpg')

    plt.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0)
    plt.close()    
    fig.clf()
    plt.close(fig)
    plt.close('all')

In [52]:
def gen_spectrogram_set(df, opts, flag="train"):
    for file_path in df['file_paths']:
        title = file_path.split('/')[-1].split('.')[0]
        create_spectrogram(file_path, title, opts, flag)

In [53]:
opts = Opts()
train_file_df, test_file_df = input_to_target(opts)
gen_spectrogram_set(train_file_df, opts)
gen_spectrogram_set(test_file_df, opts, flag="test")

In [66]:
def df_from_csv(csv_path):
    df = pd.read_csv(csv_path)
    return df
train_csv_path = os.path.join(opts.csv_path, "train.csv")
test_csv_path = os.path.join(opts.csv_path, "test.csv")

train_file_df, test_file_df = df_from_csv(train_csv_path), df_from_csv(test_csv_path)

In [67]:
from keras_preprocessing.image import ImageDataGenerator
data_gen = ImageDataGenerator(rescale=1./255., validation_split=0.15) #normalize data by multiplying 1/255

def append_ext(fn):
    return fn+".jpg"
train_file_df["file_names"] = train_file_df["file_names"].apply(append_ext)
test_file_df["file_names"] = test_file_df["file_names"].apply(append_ext)

train_gen = data_gen.flow_from_dataframe(
    dataframe=train_file_df,
    directory=opts.train_data_path,
    x_col="file_names",
    y_col="class_names",
    subset="training",
    batch_size=32,
    seed=42,
    shuffle=True,
    class_mode="categorical",
    target_size=(64, 64))

valid_gen = data_gen.flow_from_dataframe(
    dataframe=train_file_df,
    directory=opts.train_data_path,
    x_col="file_names",
    y_col="class_names",
    subset="validation",
    batch_size=32,
    seed=42,
    shuffle=True,
    class_mode="categorical",
    target_size=(64, 64))

Found 1497 images belonging to 10 classes.
Found 264 images belonging to 10 classes.


In [68]:
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D
from keras import regularizers, optimizers

In [69]:
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=(64,64,3)))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
model.add(Conv2D(128, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(128, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
model.compile(optimizers.rmsprop(lr=0.0005, decay=1e-6),loss="categorical_crossentropy",metrics=["accuracy"])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_7 (Conv2D)            (None, 64, 64, 32)        896       
_________________________________________________________________
activation_8 (Activation)    (None, 64, 64, 32)        0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 62, 62, 64)        18496     
_________________________________________________________________
activation_9 (Activation)    (None, 62, 62, 64)        0         
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 31, 31, 64)        0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 31, 31, 64)        0         
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 31, 31, 64)        36928     
__________

In [70]:
#Fitting keras model, no test gen for now
STEP_SIZE_TRAIN=train_gen.n//train_gen.batch_size
STEP_SIZE_VALID=valid_gen.n//valid_gen.batch_size
#STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
model.fit_generator(generator=train_gen,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=valid_gen,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=20
)
model.evaluate_generator(generator=valid_gen, steps=STEP_SIZE_VALID
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[0.7466079518198967, 0.8046875]

In [72]:
test_data_gen=ImageDataGenerator(rescale=1./255.)
test_gen=test_data_gen.flow_from_dataframe(
    dataframe=test_file_df,
    directory=opts.test_data_path,
    x_col="file_names",
    y_col=None,
    batch_size=32,
    seed=42,
    shuffle=False,
    class_mode=None,
    target_size=(64,64))
STEP_SIZE_TEST=test_gen.n//test_gen.batch_size


Found 888 images.


In [73]:
test_gen.reset() # resets batch index to 0
pred=model.predict_generator(test_gen,
    steps=STEP_SIZE_TEST,
    verbose=1)
predicted_class_indices=np.argmax(pred,axis=1)


['jackhammer', 'street_music', 'jackhammer', 'siren', 'dog_bark', 'dog_bark']


In [None]:
#Fetch labels from train gen for testing
labels = (train_gen.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]
print(predictions[0:6])