In [1]:
import keras
from keras.layers import Activation, Dense, Dropout, Conv2D, \
                         Flatten, MaxPooling2D
from keras.models import Sequential
import librosa
import librosa.display
import numpy as np
import pandas as pd
import random

import warnings
warnings.filterwarnings('ignore')

In [7]:
file_name = 'F01/Session1/wav_arrayMic/0001.wav'
y, sr = librosa.load(file_name, duration=5.0)
ps = librosa.feature.melspectrogram(y=y, sr=sr)
ps.shape

(128, 216)

In [8]:
import os
import numpy as np
from tqdm import tqdm

D = [] # Dataset

directories =[  
                 ['F01','Atypical'],['F03','Atypical'],['F04','Atypical'],['FC01','Typical'],['FC02','Typical'],
                 ['FC03','Typical'],['M01','Atypical'],['M02','Atypical'],['M03','Atypical'],['M04','Atypical'],
                 ['M05','Atypical'],['MC01','Typical'],['MC02','Typical'],['MC03','Typical'],['MC04','Typical']
             ]

for pair in directories:
    directory = pair[0]
    label = pair[1]
    print(directory)
    for filename in os.listdir(directory):
        if filename != 'Notes':
            file = os.path.join(directory, filename)
            file = os.path.join(file, 'wav_arrayMic')
            if os.path.isdir(file):
                for audiofile in tqdm(os.listdir(file)):
                    f = os.path.join(file, audiofile)
                    if os.path.isfile(f):
                        if librosa.get_duration(filename=f) != 0:
                            y, sr = librosa.load(f, duration=5.0)
                            ps = librosa.feature.melspectrogram(y=y, sr=sr)
                            if ps.shape != (128, 216): continue
                            D.append( (ps, label) )

  0%|          | 0/134 [00:00<?, ?it/s]

F01


100%|██████████| 134/134 [00:07<00:00, 17.97it/s]
  0%|          | 1/204 [00:00<00:20,  9.97it/s]

F03


100%|██████████| 204/204 [00:10<00:00, 20.33it/s]
100%|██████████| 435/435 [00:22<00:00, 19.44it/s]
100%|██████████| 209/209 [00:12<00:00, 16.09it/s]
  1%|          | 1/199 [00:00<00:19,  9.97it/s]

F04


100%|██████████| 199/199 [00:10<00:00, 19.26it/s]
100%|██████████| 249/249 [00:19<00:00, 12.65it/s]
  0%|          | 1/256 [00:00<00:27,  9.37it/s]

FC01


100%|██████████| 256/256 [00:20<00:00, 12.79it/s]
  0%|          | 1/269 [00:00<00:31,  8.62it/s]

FC02


100%|██████████| 269/269 [00:21<00:00, 12.79it/s]
100%|██████████| 992/992 [01:05<00:00, 15.08it/s]
  0%|          | 0/400 [00:00<?, ?it/s]

FC03


100%|██████████| 400/400 [00:26<00:00, 14.95it/s]
100%|██████████| 387/387 [00:23<00:00, 16.66it/s]
100%|██████████| 208/208 [00:10<00:00, 19.61it/s]
  1%|          | 1/100 [00:00<00:10,  9.15it/s]

M01


100%|██████████| 100/100 [00:09<00:00, 10.84it/s]
100%|██████████| 286/286 [00:19<00:00, 14.58it/s]
  0%|          | 1/240 [00:00<00:23,  9.98it/s]

M02


100%|██████████| 240/240 [00:13<00:00, 17.90it/s]
100%|██████████| 160/160 [00:13<00:00, 12.28it/s]
  0%|          | 1/416 [00:00<00:41,  9.97it/s]

M03


100%|██████████| 416/416 [00:31<00:00, 13.24it/s]
  0%|          | 0/126 [00:00<?, ?it/s]

M04


100%|██████████| 126/126 [00:08<00:00, 14.29it/s]
100%|██████████| 295/295 [00:19<00:00, 15.48it/s]
  0%|          | 0/128 [00:00<?, ?it/s]

M05


100%|██████████| 128/128 [00:09<00:00, 13.38it/s]
  0%|          | 1/329 [00:00<00:32,  9.96it/s]

MC01


100%|██████████| 329/329 [00:28<00:00, 11.44it/s]
100%|██████████| 360/360 [00:23<00:00, 15.49it/s]
100%|██████████| 419/419 [00:31<00:00, 13.12it/s]
  0%|          | 1/388 [00:00<00:50,  7.60it/s]

MC02


100%|██████████| 388/388 [00:33<00:00, 11.59it/s]
100%|██████████| 307/307 [00:28<00:00, 10.66it/s]
  0%|          | 1/600 [00:00<01:34,  6.34it/s]

MC03


100%|██████████| 600/600 [00:35<00:00, 17.08it/s]
100%|██████████| 300/300 [00:18<00:00, 16.60it/s]
  0%|          | 1/648 [00:00<01:13,  8.85it/s]

MC04


100%|██████████| 648/648 [00:49<00:00, 13.19it/s]
100%|██████████| 373/373 [00:21<00:00, 17.40it/s]


In [9]:
print("Number of samples: ", len(D))

Number of samples:  1004


In [60]:
dataset = D
random.shuffle(dataset)

train = dataset[:800]
dataset = dataset[800:]
validate = dataset[:150]
test = dataset[150:]

X_train, y_train = zip(*train)
X_validate, y_validate = zip(*validate)
X_test, y_test = zip(*test)

# Reshape for CNN input
X_train = np.array([x.reshape( (128, 216, 1) ) for x in X_train])
X_validate = np.array([x.reshape( (128, 216, 1) ) for x in X_validate])
X_test = np.array([x.reshape( (128, 216, 1) ) for x in X_test])

In [61]:
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
labelencoder=LabelEncoder()
#y=to_categorical(labelencoder.fit_transform(y))
#y
y_train = np.array(to_categorical(labelencoder.fit_transform(y_train)))
y_validate = np.array(to_categorical(labelencoder.fit_transform(y_validate)))
y_test = np.array(to_categorical(labelencoder.fit_transform(y_test)))

In [62]:
X_train.shape, X_validate.shape, X_test.shape, y_train.shape, y_validate.shape, y_test.shape

((800, 128, 216, 1),
 (150, 128, 216, 1),
 (54, 128, 216, 1),
 (800, 2),
 (150, 2),
 (54, 2))

In [63]:
model = Sequential()
input_shape=(128, 216, 1)

model.add(Conv2D(24, (5, 5), strides=(1, 1), input_shape=input_shape))
model.add(MaxPooling2D((4, 2), strides=(4, 2)))
model.add(Activation('relu'))

model.add(Conv2D(48, (5, 5), padding="valid"))
model.add(MaxPooling2D((4, 2), strides=(4, 2)))
model.add(Activation('relu'))

model.add(Conv2D(48, (5, 5), padding="valid"))
model.add(Activation('relu'))

model.add(Flatten())
model.add(Dropout(rate=0.5))

model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(rate=0.5))

model.add(Dense(2))
model.add(Activation('softmax'))

model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_12 (Conv2D)           (None, 124, 212, 24)      624       
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 31, 106, 24)       0         
_________________________________________________________________
activation_20 (Activation)   (None, 31, 106, 24)       0         
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 27, 102, 48)       28848     
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 6, 51, 48)         0         
_________________________________________________________________
activation_21 (Activation)   (None, 6, 51, 48)         0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 2, 47, 48)        

In [64]:
model.compile(
	optimizer="Adam",
	loss="categorical_crossentropy",
	metrics=['accuracy'])

model.fit(
	x=X_train, 
	y=y_train,
    epochs=12,
    batch_size=32,
    validation_data= (X_validate, y_validate))

score = model.evaluate(
	x=X_test,
	y=y_test)

print('Test loss:', score[0])
print('Test accuracy:', score[1])

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Test loss: 0.3339748680591583
Test accuracy: 0.8703703880310059


In [67]:
predict = model.predict_classes(X_test)
prediction_class = labelencoder.inverse_transform(predict)
#real_class = labelencoder.inverse_transform(y_test)
#prediction_class

x, y = zip(*test)
y
match = 0
print('  Real       Prediction       Match')
for i in range(0,X_test.shape[0]):
    if(y[i]==prediction_class[i]):
        match+=1
    print(y[i] + '       ' + prediction_class[i] + '       ' + str(y[i]==prediction_class[i]))
print('Matched = ', match)
print('Not Matched = ',(X_test.shape[0] - match))

  Real       Prediction       Match
Atypical       Atypical       True
Typical       Typical       True
Typical       Typical       True
Typical       Typical       True
Atypical       Typical       False
Typical       Typical       True
Atypical       Atypical       True
Atypical       Atypical       True
Typical       Typical       True
Atypical       Atypical       True
Atypical       Atypical       True
Typical       Typical       True
Typical       Typical       True
Typical       Typical       True
Typical       Typical       True
Atypical       Atypical       True
Atypical       Typical       False
Typical       Typical       True
Atypical       Atypical       True
Atypical       Atypical       True
Atypical       Atypical       True
Atypical       Atypical       True
Typical       Atypical       False
Atypical       Typical       False
Typical       Typical       True
Atypical       Atypical       True
Atypical       Atypical       True
Atypical       Typical       False
Typica