In [19]:
import glob
import os
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
import pandas as pd
import os.path as p
%matplotlib inline

from IPython.display import HTML
HTML('''<script>
code_show_err=false; 
function code_toggle_err() {
 if (code_show_err){
 $('div.output_stderr').hide();
 } else {
 $('div.output_stderr').show();
 }
 code_show_err = !code_show_err
} 
$( document ).ready(code_toggle_err);
</script>
To toggle on/off output_stderr, click <a href="javascript:code_toggle_err()">here</a>.''')

In [12]:

datasource_path = "UrbanSound8k"
metadata_path = p.join(p.join(datasource_path, "metadata"),"UrbanSound8k.csv")
metadata = pd.read_csv(metadata_path, index_col="fsID")

audio_source = p.join(datasource_path, "audio")
spectro_path = p.join("UrbanSound8k", "spectrogram")

folds = ["fold{}".format(str(i)) for i in range(1,11)]

In [13]:
metadata[:5]

Unnamed: 0_level_0,slice_file_name,start,end,salience,fold,classID,class
fsID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
100032,100032-3-0-0.wav,0.0,0.317551,1,5,3,dog_bark
100263,100263-2-0-117.wav,58.5,62.5,1,5,2,children_playing
100263,100263-2-0-121.wav,60.5,64.5,1,5,2,children_playing
100263,100263-2-0-126.wav,63.0,67.0,1,5,2,children_playing
100263,100263-2-0-137.wav,68.5,72.5,1,5,2,children_playing


In [14]:
def create_folders():
    to_create =[spectro_path]+ [p.join(spectro_path, fold) for fold in folds]
    
    for dirName in to_create:
        try:
            # Create target Directory
            os.mkdir(dirName)
            print("Directory " , dirName ,  " Created ") 
        except FileExistsError:
            print("Directory " , dirName ,  " already exists")

create_folders()

Directory  UrbanSound8k\spectrogram  already exists
Directory  UrbanSound8k\spectrogram\fold1  already exists
Directory  UrbanSound8k\spectrogram\fold2  already exists
Directory  UrbanSound8k\spectrogram\fold3  already exists
Directory  UrbanSound8k\spectrogram\fold4  already exists
Directory  UrbanSound8k\spectrogram\fold5  already exists
Directory  UrbanSound8k\spectrogram\fold6  already exists
Directory  UrbanSound8k\spectrogram\fold7  already exists
Directory  UrbanSound8k\spectrogram\fold8  already exists
Directory  UrbanSound8k\spectrogram\fold9  already exists
Directory  UrbanSound8k\spectrogram\fold10  already exists


In [22]:
import gc


def build_path(file_name,spectrogram = False):
    file= file_name.split(".")
    source = spectro_path if spectrogram else audio_source
    file_name = file[0] + ".jpg" if spectrogram else file[0] +".wav"
    fsID= int(file_name.split("-")[0])
    frame = metadata.loc[fsID]
    fold_id = 0
    try:
        fold_id = frame["fold"].values[0]-1
    except:
        fold_id = frame["fold"] -1
    file_with_fold = p.join(folds[fold_id], file_name)
    return p.join(source,file_with_fold), fold_id


def add_spectrogram_to_metadata():
    counter = 0
    errors = 0
    for index, row in metadata.iterrows():
        counter += 1
        if counter%2000 ==0:
            gc.collect()
        try:
            file_name = row["slice_file_name"]
            s,_ = build_path(file_name, True)
            metadata.loc[index, "spectro_path"] = s
            if not p.exists(s): create_spectrogram(row["slice_file_name"])
        except Exception as e:
            errors +=1

    print("Total: {} \nErrors: {}\nCorrect: {}".format(counter, errors, counter-errors))
    return m

def plot_spectrogram(filename):
    plt.interactive(False)
    filename, _ = build_path(filename, metadata)
    clip, sample_rate = librosa.load(filename, sr=None)
    fig, ax = plt.subplots()
    S = librosa.feature.melspectrogram(y=clip, sr=sample_rate)
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
    plt.show()

    
def create_spectrogram(filename):
    plt.interactive(False)
    filename, fold_id =build_path(filename, metadata)
    clip, sample_rate = librosa.load(filename, sr=None)
    fig = plt.figure(figsize=[0.72,0.72])
    ax = fig.add_subplot(111)
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.set_frame_on(False)
    S = librosa.feature.melspectrogram(y=clip, sr=sample_rate)
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
    curr_path = p.join(spectro_path, folds[fold_id])
    savefile  = p.join(curr_path, "{}.jpg".format(p.basename(filename).split(".")[0] ))
    plt.savefig(savefile, dpi=400, bbox_inches='tight',pad_inches=0)
    plt.close()    
    fig.clf()
    plt.close(fig)
    plt.close('all')
    del filename,clip,sample_rate,fig,ax,S,savefile

In [23]:
create_spectrogram("100263-2-0-126.wav")
plot_spectrogram("100263-2-0-126.wav")

ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [10]:
add_spectrogram_to_metadata()
metadata[:5]

TypeError: add_spectrogram_to_metadata() missing 1 required positional argument: 'metadata'

In [29]:
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dense, Activation, Conv2D,Flatten, MaxPooling2D, Dropout
from keras.callbacks import TensorBoard
from keras.models import load_model

In [32]:
datagen=ImageDataGenerator(rescale=1./255)
def get_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=(64,64,3)))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Conv2D(64, (3, 3), padding='same'))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.5))
    model.add(Conv2D(128, (3, 3), padding='same'))
    model.add(Activation('relu'))
    model.add(Conv2D(128, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))
    
    return model

def get_callbacks(name_weights, patience_lr):
    if not p.exists("weights"):     os.mkdir("weights")       
    mcp_save = ModelCheckpoint("weights/{}_weights.h5".format(name_weights), save_best_only=True, monitor='val_loss', mode='min')
    reduce_lr_loss = ReduceLROnPlateau(monitor='loss', factor=0.1, patience=patience_lr, verbose=1, epsilon=1e-4, mode='min')
    tensorboard = TensorBoard(log_dir='logs/{}'.format(name_weights))

    return [mcp_save, reduce_lr_loss, tensorboard]

In [33]:

for j in range(1,11):
    test_mask = (metadata.fold != j)
    valid_mask = (metadata.fold == j)

    name_weights = "fold_" + str(j)
    train_generator=datagen.flow_from_dataframe(dataframe=metadata[test_mask], 
                                                directory="./", x_col="spectro_path", 
                                                y_col="class", class_mode="categorical", 
                                                target_size=(64,64), batch_size=32)
    validation_generator=datagen.flow_from_dataframe(dataframe=metadata[valid_mask], 
                                                directory="./", x_col="spectro_path", 
                                                y_col="class", class_mode="categorical", 
                                                target_size=(64,64), batch_size=32)

    callbacks = get_callbacks(name_weights = name_weights, patience_lr=10)
    model = get_model()
    model.compile(optimizers.rmsprop(lr=0.0005, decay=1e-6),loss="categorical_crossentropy",metrics=["accuracy"])

    model.fit_generator(
                train_generator,
                epochs=15,
                verbose=0,
                validation_data = validation_generator, 
                callbacks = callbacks)

Found 7798 validated image filenames belonging to 10 classes.
Found 782 validated image filenames belonging to 10 classes.


  .format(n_invalid, x_col)
  .format(n_invalid, x_col)


Found 7707 validated image filenames belonging to 10 classes.
Found 873 validated image filenames belonging to 10 classes.


  .format(n_invalid, x_col)


Found 7655 validated image filenames belonging to 10 classes.
Found 925 validated image filenames belonging to 10 classes.


  .format(n_invalid, x_col)
  .format(n_invalid, x_col)


Found 7602 validated image filenames belonging to 10 classes.
Found 978 validated image filenames belonging to 10 classes.
Found 7644 validated image filenames belonging to 10 classes.
Found 936 validated image filenames belonging to 10 classes.
Found 7757 validated image filenames belonging to 10 classes.
Found 823 validated image filenames belonging to 10 classes.
Found 7757 validated image filenames belonging to 10 classes.
Found 823 validated image filenames belonging to 10 classes.


  .format(n_invalid, x_col)
  .format(n_invalid, x_col)


Found 7777 validated image filenames belonging to 10 classes.
Found 803 validated image filenames belonging to 10 classes.


  .format(n_invalid, x_col)
  .format(n_invalid, x_col)


Found 7780 validated image filenames belonging to 10 classes.
Found 800 validated image filenames belonging to 10 classes.
Found 7743 validated image filenames belonging to 10 classes.
Found 837 validated image filenames belonging to 10 classes.


In [None]:
def load_fold_model(idx):
    file_name = "fold_{}_weights.h5".format(idx)
    return load_model(file_name)

def load_image(img_path):
    
    img = image.load_img(img_path, target_size=(150, 150))
    img_tensor = image.img_to_array(img)                    # (height, width, channels)
    img_tensor = np.expand_dims(img_tensor, axis=0)         # (1, height, width, channels), add a dimension because the model expects this shape: (batch_size, height, width, channels)
    img_tensor /= 255.                                      # imshow expects values in the range [0, 1]

    return img_tensor

model = load_fold_model(1)
model.compile(optimizers.rmsprop(lr=0.0005, decay=1e-6),loss="categorical_crossentropy",metrics=["accuracy"])

samples =datagen.flow_from_dataframe(dataframe=metadata[:5], 
                                                directory="./", x_col="spectro_path", 
                                                y_col="class", class_mode="categorical", 
                                                target_size=(64,64), batch_size=32)

y_prob = model.predict_generator(samples) 
y_classes = y_prob.argmax(axis=-1)
label_map = [k for k,v in samples.class_indices.items()]
[{label_map[i]: } for i in y_classes]