In [67]:
dataset='/content/drive/MyDrive/full_dataset'

In [68]:
import struct

class WavFileHelper():
    
    def read_file_properties(self, filename):

        wave_file = open(filename,"rb")
        
        riff = wave_file.read(12)
        fmt = wave_file.read(36)
        
        num_channels_string = fmt[10:12]
        num_channels = struct.unpack('<H', num_channels_string)[0]

        sample_rate_string = fmt[12:16]
        sample_rate = struct.unpack("<I",sample_rate_string)[0]
        
        bit_depth_string = fmt[22:24]
        bit_depth = struct.unpack("<H",bit_depth_string)[0]

        return (num_channels, sample_rate, bit_depth)

In [69]:
import os
import pandas as pd
import librosa
import librosa.display
wavfilehelper = WavFileHelper()
audiodata=[]

In [70]:
for i in os.listdir(dataset):
  data = wavfilehelper.read_file_properties(dataset+"/"+i)
  audiodata.append(data)
audiodf = pd.DataFrame(audiodata, columns=['num_channels','sample_rate','bit_depth'])

In [71]:
print(audiodf.num_channels.value_counts(normalize=True))

1    1.0
Name: num_channels, dtype: float64


In [72]:
print(audiodf.sample_rate.value_counts(normalize=True))

16000    1.0
Name: sample_rate, dtype: float64


In [73]:
print(audiodf.bit_depth.value_counts(normalize=True))

16    1.0
Name: bit_depth, dtype: float64


In [88]:
def extract_features(file_name):
    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        mfccsscaled = np.mean(mfccs.T,axis=0)
        
    except Exception as e:
        print("Error encountered while parsing file: ", file)
        return None 
     
    return mfccsscaled

In [89]:
print(audiodf.head())

   num_channels  sample_rate  bit_depth
0             1        16000         16
1             1        16000         16
2             1        16000         16
3             1        16000         16
4             1        16000         16


In [90]:
features=[]
import numpy as np
for i in os.listdir(dataset):
  class_label=i[:3]
  data=extract_features(dataset+'/'+i)
  features.append([data,class_label])

In [91]:
df_features=pd.DataFrame(features,columns=['feature','class_label'])

In [92]:
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
# Convert features and corresponding classification labels into numpy arrays
X = np.array(df_features.feature.tolist())
y = np.array(df_features.class_label.tolist())

# Encode the classification labels 
le = LabelEncoder()
y2 = to_categorical(le.fit_transform(y)) 

# split the dataset 
from sklearn.model_selection import train_test_split 

X_train, X_test, y_train, y_test = train_test_split(X, y2, test_size=0.2, random_state = 42)

In [93]:
import matplotlib.pyplot as plt
X_train.shape

(221, 40)

In [94]:
X_test.shape

(56, 40)

In [100]:
n_rows=40
n_cols=1
n_channels=1
X_train=X_train.reshape(X_train.shape[0],n_rows,n_cols,n_channels)
X_test=X_test.reshape(X_test.shape[0],n_rows,n_cols,n_channels)

In [103]:
X_test.shape

(56, 40, 1, 1)

In [171]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
model = Sequential()
model.add(Conv2D(filters=16,kernel_size=1, input_shape=(n_rows, n_cols, n_channels), activation='relu'))
model.add(MaxPooling2D(pool_size=1))
model.add(Conv2D(filters=32, kernel_size=1, activation='relu'))
model.add(MaxPooling2D(pool_size=1))
model.add(Flatten())

model.add(Dense(2, activation='softmax'))

In [172]:
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

# Display model architecture summary 
model.summary()

# Calculate pre-training accuracy 
score = model.evaluate(X_test, y_test, verbose=1)
accuracy = 100*score[1]

print("Pre-training accuracy: %.4f%%" % accuracy) 

Model: "sequential_24"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_73 (Conv2D)           (None, 40, 1, 16)         32        
_________________________________________________________________
max_pooling2d_71 (MaxPooling (None, 40, 1, 16)         0         
_________________________________________________________________
conv2d_74 (Conv2D)           (None, 40, 1, 32)         544       
_________________________________________________________________
max_pooling2d_72 (MaxPooling (None, 40, 1, 32)         0         
_________________________________________________________________
flatten_11 (Flatten)         (None, 1280)              0         
_________________________________________________________________
dense_19 (Dense)             (None, 2)                 2562      
Total params: 3,138
Trainable params: 3,138
Non-trainable params: 0
___________________________________________________

In [173]:
from keras.callbacks import ModelCheckpoint 
from datetime import datetime 

num_epochs = 50
num_batch_size = 8
checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.basic_cnn.hdf5', 
                               verbose=1, save_best_only=True)
start = datetime.now()

model.fit(X_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test, y_test), callbacks=[checkpointer], verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

Epoch 1/50

Epoch 00001: val_loss improved from inf to 0.34931, saving model to saved_models/weights.best.basic_cnn.hdf5
Epoch 2/50

Epoch 00002: val_loss did not improve from 0.34931
Epoch 3/50

Epoch 00003: val_loss improved from 0.34931 to 0.28668, saving model to saved_models/weights.best.basic_cnn.hdf5
Epoch 4/50

Epoch 00004: val_loss improved from 0.28668 to 0.24112, saving model to saved_models/weights.best.basic_cnn.hdf5
Epoch 5/50

Epoch 00005: val_loss did not improve from 0.24112
Epoch 6/50

Epoch 00006: val_loss did not improve from 0.24112
Epoch 7/50

Epoch 00007: val_loss did not improve from 0.24112
Epoch 8/50

Epoch 00008: val_loss did not improve from 0.24112
Epoch 9/50

Epoch 00009: val_loss did not improve from 0.24112
Epoch 10/50

Epoch 00010: val_loss did not improve from 0.24112
Epoch 11/50

Epoch 00011: val_loss did not improve from 0.24112
Epoch 12/50

Epoch 00012: val_loss did not improve from 0.24112
Epoch 13/50

Epoch 00013: val_loss did not improve from 0.2

In [174]:
score = model.evaluate(X_train, y_train, verbose=0)
print("Training Accuracy: ", score[1])

score = model.evaluate(X_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1])

Training Accuracy:  1.0
Testing Accuracy:  0.9107142686843872


In [181]:
features=[]
import numpy as np
data=extract_features('/content/drive/MyDrive/full_dataset/dog_barking_99.wav')
features.append([data])
df_features=pd.DataFrame(features,columns=['feature'])
# Convert features and corresponding classification labels into numpy arrays
X = np.array(df_features.feature.tolist())
X=X.reshape(X.shape[0],n_rows,n_cols,n_channels)
model.predict(X)

array([[0.00116674, 0.99883324]], dtype=float32)

In [182]:
features=[]
import numpy as np
data=extract_features('/content/drive/MyDrive/full_dataset/cat_156.wav')
features.append([data])
df_features=pd.DataFrame(features,columns=['feature'])
# Convert features and corresponding classification labels into numpy arrays
X = np.array(df_features.feature.tolist())
X=X.reshape(X.shape[0],n_rows,n_cols,n_channels)
model.predict(X)

array([[9.9993813e-01, 6.1812127e-05]], dtype=float32)