# Audio Classification use CNN


In [160]:
import numpy as np
max_pad_len = 216
def extract_features(file_name):
   
    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=120)
        pad_width = max_pad_len - mfccs.shape[1]
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
        
    except Exception as e:
        print("Error encountered while parsing file: ", file_name)
        return None 
     
    return mfccs

In [161]:
# Load various imports 
import pandas as pd
import os
import librosa

# Set the path to the full UrbanSound dataset 
fulldatasetpath = '../ESC-50/audio/'

metadata = pd.read_csv('../ESC-50/meta/esc50.csv')

features = []

# Iterate through each sound file and extract the features 
for index, row in metadata.iterrows():
    
    file_name = os.path.join(os.path.abspath(fulldatasetpath),str(row["filename"]))
    class_label = row["category"]
    data = extract_features(file_name)
    
    features.append([data, class_label])

# Convert into a Panda dataframe 
featuresdf = pd.DataFrame(features, columns=['feature','class_label'])

print('Finished feature extraction from ', len(featuresdf), ' files')

Finished feature extraction from  2000  files


In [162]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

# Convert features and corresponding classification labels into numpy arrays
X = np.array(featuresdf.feature.tolist())
y = np.array(featuresdf.class_label.tolist())

# Encode the classification labels
le = LabelEncoder()
yy = to_categorical(le.fit_transform(y)) 

# split the dataset 
from sklearn.model_selection import train_test_split 

x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state = 42)




In [173]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics 

a, num_rows, num_columns = X.shape
num_channels = 1

x_train = x_train.reshape(x_train.shape[0], num_rows, num_columns, num_channels)
x_test = x_test.reshape(x_test.shape[0], num_rows, num_columns, num_channels)

num_labels = yy.shape[1]
filter_size = 2

# Construct model 
model = Sequential()
model.add(Conv2D(filters=16, kernel_size=2, input_shape=(num_rows, num_columns, num_channels), activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=32, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=64, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=128, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(GlobalAveragePooling2D())

model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(1024))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(num_labels, activation='softmax'))

In [174]:
# Compile the model
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

In [175]:
# Display model architecture summary 
model.summary()

# Calculate pre-training accuracy 
score = model.evaluate(x_test, y_test, verbose=1)
accuracy = 100 * score[1]

print("Pre-training accuracy: %.4f%%" % accuracy)

Model: "sequential_24"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_87 (Conv2D)           (None, 119, 215, 16)      80        
_________________________________________________________________
max_pooling2d_85 (MaxPooling (None, 59, 107, 16)       0         
_________________________________________________________________
dropout_133 (Dropout)        (None, 59, 107, 16)       0         
_________________________________________________________________
conv2d_88 (Conv2D)           (None, 58, 106, 32)       2080      
_________________________________________________________________
max_pooling2d_86 (MaxPooling (None, 29, 53, 32)        0         
_________________________________________________________________
dropout_134 (Dropout)        (None, 29, 53, 32)        0         
_________________________________________________________________
conv2d_89 (Conv2D)           (None, 28, 52, 64)      

In [176]:
from keras.callbacks import ModelCheckpoint 
from datetime import datetime 

num_epochs = 600
num_batch_size = 256

checkpointer = ModelCheckpoint(filepath='save_models/weights.best.basic_cnn10.hdf5', 
                               verbose=1, save_best_only=True)
start = datetime.now()

model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), callbacks=[checkpointer], verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

6400

Epoch 00501: val_loss did not improve from 1.58838
Epoch 502/600

Epoch 00502: val_loss did not improve from 1.58838
Epoch 503/600

Epoch 00503: val_loss did not improve from 1.58838
Epoch 504/600

Epoch 00504: val_loss did not improve from 1.58838
Epoch 505/600

Epoch 00505: val_loss did not improve from 1.58838
Epoch 506/600

Epoch 00506: val_loss did not improve from 1.58838
Epoch 507/600

Epoch 00507: val_loss did not improve from 1.58838
Epoch 508/600

Epoch 00508: val_loss did not improve from 1.58838
Epoch 509/600

Epoch 00509: val_loss did not improve from 1.58838
Epoch 510/600

Epoch 00510: val_loss did not improve from 1.58838
Epoch 511/600

Epoch 00511: val_loss did not improve from 1.58838
Epoch 512/600

Epoch 00512: val_loss did not improve from 1.58838
Epoch 513/600

Epoch 00513: val_loss did not improve from 1.58838
Epoch 514/600

Epoch 00514: val_loss did not improve from 1.58838
Epoch 515/600

Epoch 00515: val_loss did not improve from 1.58838
Epoch 516/600

Epoc

In [177]:
# Evaluating the model on the training and testing set
score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: ", score[1])

score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1])

Training Accuracy:  0.9950000047683716
Testing Accuracy:  0.6600000262260437
