In [None]:
!pip install -q kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!ls ~/.kaggle
!chmod 600 /root/.kaggle/kaggle.json

kaggle.json


In [None]:
!kaggle datasets download --force -d chrisfilo/urbansound8k

Downloading urbansound8k.zip to /content
100% 5.60G/5.61G [01:25<00:00, 84.0MB/s]
100% 5.61G/5.61G [01:25<00:00, 70.2MB/s]


In [None]:
!unzip \*.zip  && rm *.zip

In [19]:
!pip install pysoundfile



In [20]:
import numpy as np
import pandas as pd
import glob
import os
import librosa
import soundfile as sf
from PIL import Image
from sklearn import preprocessing

import keras
from keras.applications.resnet50 import ResNet50
from keras.applications import vgg16
from keras.applications import vgg19
from keras.applications import Xception
from keras.models import Model
from keras.layers import Dense
from keras.layers import Flatten
from keras import Sequential
from keras.preprocessing import image
from keras.applications.imagenet_utils import preprocess_input

In [21]:
from sklearn.externals import joblib

In [22]:
df = pd.read_csv("/content/UrbanSound8K.csv")
df.shape#, df.head()

(8732, 8)

In [23]:
def get_sound_data(path, sr=22050):
    data, fsr = sf.read(path)
    data_resample = librosa.resample(data.T, fsr, sr)
    if len(data_resample.shape) > 1:
        data_resample = np.average(data_resample, axis=0)
    return data_resample, sr

def windows(data, window_size):
    start = 0
    while start < len(data):
        yield int(start), int(start + window_size)
        start += (window_size / 2)

In [24]:
band_s = 64
frame_s = 64
window_size_s = 512 * (frame_s - 1)

In [25]:
def extract_features(df, bands=band_s, frames=frame_s, window_size=window_size_s):
    
    log_specgrams_full = []
    log_specgrams_hp = []
    class_labels = []

    # for each audio sample 8732
    for i in range(8732):
        fn = '/content/fold' + str(df["fold"][i]) + '/' + df["slice_file_name"][i]
        class_label = df["classID"][i]
        sound_data, sr = get_sound_data(fn, sr=22050)
        
        if (i%200)==0:
            print(f"This is iteration : {i} .... iterations left : {8732-i}")
        
        # for each audio signal sub-sample window of data
        for (start,end) in windows(sound_data, window_size):
            if(len(sound_data[start:end]) == window_size):
                signal = sound_data[start:end]
                
                # get the log-scaled mel-spectrogram
                melspec_full = librosa.feature.melspectrogram(signal, n_mels = bands)
                logspec_full = librosa.amplitude_to_db(melspec_full)
                logspec_full = logspec_full.T.flatten()[:, np.newaxis].T
                
                # get the log-scaled, averaged values for the harmonic & percussive components
                y_harmonic, y_percussive = librosa.effects.hpss(signal)
                melspec_harmonic = librosa.feature.melspectrogram(y_harmonic, n_mels = bands)
                melspec_percussive = librosa.feature.melspectrogram(y_percussive, n_mels = bands)
                logspec_harmonic = librosa.amplitude_to_db(melspec_harmonic)
                logspec_percussive = librosa.amplitude_to_db(melspec_percussive)
                logspec_harmonic = logspec_harmonic.T.flatten()[:, np.newaxis].T
                logspec_percussive = logspec_percussive.T.flatten()[:, np.newaxis].T
                logspec_hp = np.average([logspec_harmonic, logspec_percussive], axis=0)
                
                log_specgrams_full.append(logspec_full)
                log_specgrams_hp.append(logspec_hp)
                class_labels.append(class_label)
                
    # create the first two feature maps            
    log_specgrams_full = np.asarray(log_specgrams_full).reshape(len(log_specgrams_full), bands ,frames, 1)
    log_specgrams_hp   = np.asarray(log_specgrams_hp).reshape(len(log_specgrams_hp), bands ,frames, 1)
    features = np.concatenate((log_specgrams_full, 
                               log_specgrams_hp, 
                               np.zeros(np.shape(log_specgrams_full))), 
                               axis=3)
    
    # create the third feature map which is the delta (derivative) of the log-scaled mel-spectrogram
    for i in range(len(features)):
        features[i, :, :, 2] = librosa.feature.delta(features[i, :, :, 0])
    
    return np.array(features), np.array(class_labels, dtype = np.int)

In [26]:
features, labels = extract_features(df)

This is iteration : 0 .... iterations left : 8732
This is iteration : 200 .... iterations left : 8532
This is iteration : 400 .... iterations left : 8332
This is iteration : 600 .... iterations left : 8132
This is iteration : 800 .... iterations left : 7932
This is iteration : 1000 .... iterations left : 7732
This is iteration : 1200 .... iterations left : 7532
This is iteration : 1400 .... iterations left : 7332
This is iteration : 1600 .... iterations left : 7132
This is iteration : 1800 .... iterations left : 6932
This is iteration : 2000 .... iterations left : 6732
This is iteration : 2200 .... iterations left : 6532
This is iteration : 2400 .... iterations left : 6332
This is iteration : 2600 .... iterations left : 6132
This is iteration : 2800 .... iterations left : 5932
This is iteration : 3000 .... iterations left : 5732
This is iteration : 3200 .... iterations left : 5532
This is iteration : 3400 .... iterations left : 5332
This is iteration : 3600 .... iterations left : 5132


In [27]:
data = np.array(list(zip(features, labels)))
data.shape, features.shape, labels.shape

((22816, 2), (22816, 75, 75, 3), (22816,))

In [28]:
np.random.shuffle(data)
train, validation, test = np.split(data, [int(.6*len(data)), int(.8*len(data))])
train.shape, validation.shape, test.shape

((13689, 2), (4563, 2), (4564, 2))

In [29]:
def process_sound_data(data):
    data = np.expand_dims(data, axis=0)
    data = preprocess_input(data)
    return data

def extract_tl_features(model, base_feature_data):
    dataset_tl_features = []
    for index, feature_data in enumerate(base_feature_data):
        pr_data = process_sound_data(feature_data)
        tl_features = model.predict(pr_data)
        tl_features = np.reshape(tl_features, tl_features.shape[1])
        dataset_tl_features.append(tl_features)
    return np.array(dataset_tl_features)

In [None]:
def get_vgg16_model():

  vgg = vgg16.VGG16(include_top=False, weights='imagenet', input_shape=(64, 64, 3))
  output = vgg.layers[-1].output
  output = keras.layers.Flatten()(output)
  model = Model(vgg.input, output)
  model.trainable = False

  return model

In [None]:
def get_res50_model():

  res = ResNet50(include_top=False, weights='imagenet', input_shape=(64, 64, 3))
  output = res.layers[-1].output
  output = keras.layers.Flatten()(output)
  model = Model(res.input, output)
  model.trainable = False

  return model

In [None]:
def get_vgg19_model():

  vgg = vgg19.VGG19(include_top=False, weights='imagenet', input_shape=(64, 64, 3))
  output = vgg.layers[-1].output
  output = keras.layers.Flatten()(output)
  model = Model(vgg.input, output)
  model.trainable = False

  return model

In [30]:
def get_Xception_model():

  xpn = Xception(include_top=False, weights='imagenet', input_shape=(75, 75, 3))
  output = xpn.layers[-1].output
  output = keras.layers.Flatten()(output)
  model = Model(xpn.input, output)
  model.trainable = False

  return model

In [31]:
train_base_features    = [item[0] for item in train]
train_labels           = np.array([item[1] for item in train])
validation_base_features = [item[0] for item in validation]
validation_labels        = np.array([item[1] for item in validation])
test_base_features     = [item[0] for item in test]
test_labels            = np.array([item[1] for item in test])

In [32]:
joblib.dump(train_labels, 'train_labels.pkl')
joblib.dump(validation_labels, 'validation_labels.pkl')
joblib.dump(test_labels, 'test_labels.pkl')

['test_labels.pkl']

In [33]:
def get_tl_features(model, train_features, validate_features, test_features):

  print("Features extraction started............")
  train_tl_features    = extract_tl_features(model=model, base_feature_data=train_features)
  print("Training-features.......extracted (1/3)")
  validate_tl_features = extract_tl_features(model=model, base_feature_data=validate_features)
  print("Validation-features.....extracted (2/3)")
  test_tl_features     = extract_tl_features(model=model, base_feature_data=test_features)
  print("Testing-features........extracted (3/3)")

  return train_tl_features, validate_tl_features, test_tl_features

In [None]:
model1 = get_vgg16_model()
vgg16_train_features, vgg16_validation_features, vgg16_test_features = get_tl_features(model1, train_base_features, validation_base_features, test_base_features)

In [None]:
joblib.dump(vgg16_train_features, 'vgg16_train_features.pkl')
joblib.dump(vgg16_validation_features, 'vgg16_validation_features.pkl')
joblib.dump(vgg16_test_features, 'vgg16_test_features.pkl')

In [None]:
model2 = get_res50_model()
res50_train_features, res50_validation_features, res50_test_features = get_tl_features(model2, train_base_features, validation_base_features, test_base_features)

In [None]:
joblib.dump(res50_train_features, 'res50_train_features.pkl')
joblib.dump(res50_validation_features, 'res50_validation_features.pkl')
joblib.dump(res50_test_features, 'res50_test_features.pkl')

In [None]:
model3 = get_vgg19_model()
vgg19_train_features, vgg19_validation_features, vgg19_test_features = get_tl_features(model3, train_base_features, validation_base_features, test_base_features)

In [None]:
joblib.dump(vgg19_train_features, 'vgg19_train_features.pkl')
joblib.dump(vgg19_validation_features, 'vgg19_validation_features.pkl')
joblib.dump(vgg19_test_features, 'vgg19_test_features.pkl')

In [34]:
model4 = get_Xception_model()
Xception_train_features, Xception_validation_features, Xception_test_features = get_tl_features(model4, train_base_features, validation_base_features, test_base_features)

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.4/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
Features extraction started............
Training-features.......extracted (1/3)
Validation-features.....extracted (2/3)
Testing-features........extracted (3/3)


In [36]:
joblib.dump(Xception_train_features, 'Xception_train_features.pkl')
joblib.dump(Xception_validation_features, 'Xception_validation_features.pkl')
joblib.dump(Xception_test_features, 'Xception_test_features.pkl')

['Xception_test_features.pkl']

In [66]:
Xception_train_features.shape, Xception_validation_features.shape, Xception_test_features.shape

((13689, 18432), (4563, 18432), (4564, 18432))