In [None]:
import pandas as pd
import numpy as np
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

In [None]:
import struct

class WavFileHelper():
    
    def read_file_properties(self, filename):

        wave_file = open(filename,"rb")
        
        riff = wave_file.read(12)
        fmt = wave_file.read(36)
        
        num_channels_string = fmt[10:12]
        num_channels = struct.unpack('<H', num_channels_string)[0]

        sample_rate_string = fmt[12:16]
        sample_rate = struct.unpack("<I",sample_rate_string)[0]
        
        bit_depth_string = fmt[22:24]
        bit_depth = struct.unpack("<H",bit_depth_string)[0]

        return (num_channels, sample_rate, bit_depth)

wavfilehelper = WavFileHelper()

In [1]:
def extract_features(file_name):
   
    try:
        audio_y, sample_rate = librosa.load(file_name, res_type='kaiser_best') 
        D = (librosa.stft(audio_y, center=False, n_fft=1024))
        
        librosa.display.specshow(librosa.amplitude_to_db(D,
                                                 ref=np.max),
                         y_axis='log', x_axis='time')
        plt.title('Power spectrogram')
        plt.colorbar(format='%+2.0f dB')
        plt.tight_layout()
        plt.show()
        
        
    except Exception as e:
        print("Error encountered while parsing file: ", file)
        return None 
     
    return (D)

In [None]:
metadata = pd.read_csv('./UrbanSound8k/metadata/UrbanSound8K.csv')

audiodata = []
for index, row in metadata.iterrows():
    
    file_name = os.path.join(os.path.abspath('UrbanSound8k/audio/'),'fold'+str(row["fold"])+'/',str(row["slice_file_name"]))
    data = wavfilehelper.read_file_properties(file_name)
    audiodata.append(data)

# Convert into a Panda dataframe
audiodf = pd.DataFrame(audiodata, columns=['num_channels','sample_rate','bit_depth'])

In [None]:
features = np.load("features.npy", allow_pickle=True)
labels   = np.load("labels.npy", allow_pickle=True)

In [None]:
metadata = pd.read_csv('./UrbanSound8k/metadata/UrbanSound8K.csv')
features = []
labels = []
i = 0

# Iterate through each sound file and extract the features 
for index, row in metadata.iterrows():
    
    i = i +1
    file_name = os.path.join(os.path.abspath('UrbanSound8k/audio/'),'fold'+str(row["fold"])+'/',str(row["slice_file_name"]))
    
    class_label = row["class"]
    print(f'{i}: Processing {file_name}')
    data = extract_features(file_name)
    
    features.append(data)
    labels.append(class_label)

# Convert into a Panda dataframe 
#featuresdf = pd.DataFrame(features, columns=['feature','class_label'])
np.save('features.npy', features)
np.save('labels.npy',   labels)


print('Finished feature extraction from ', len(features), ' files')

X = np.asarray(features)
y = np.asarray(labels)

In [None]:
X = (features)
y = (labels)

# Encode the classification labels
le = LabelEncoder()
yy = to_categorical(le.fit_transform(y)) 

# split the dataset 
from sklearn.model_selection import train_test_split 

x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state = 42)

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics 


num_labels = yy.shape[1]

# Construct model 
model = Sequential()
model.add(Conv2D(filters=16, kernel_size=2, input_shape=(513, 341, 2), activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=32, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=64, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(GlobalAveragePooling2D())

model.add(Dense(num_labels, activation='softmax'))

In [None]:
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
model.summary()