In [93]:
import keras
from keras.layers import Activation, Dense, Dropout, Conv2D, \
                         Flatten, MaxPooling2D
from keras.models import Sequential
import librosa
import librosa.display
import numpy as np
import pandas as pd
import random

import warnings
warnings.filterwarnings('ignore')

In [94]:
file_name = 'F01/Session1/wav_arrayMic/0001.wav'
y, sr = librosa.load(file_name, duration=5.0)
ps = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=128)
ps.shape

(128, 216)

In [95]:
file_name = 'F01/Session1/wav_arrayMic/0001.wav'
y, sr = librosa.load(file_name, duration=5.0)
ps = librosa.feature.melspectrogram(y=y, sr=sr)
ps.shape

(128, 216)

In [96]:
import os
import numpy as np
from tqdm import tqdm

D_sptg = [] # Dataset
D_mfcc = []

directories =[  
                 ['F01','Atypical'],['F03','Atypical'],['F04','Atypical'],['FC01','Typical'],['FC02','Typical'],
                 ['FC03','Typical'],['M01','Atypical'],['M02','Atypical'],['M03','Atypical'],['M04','Atypical'],
                 ['M05','Atypical'],['MC01','Typical'],['MC02','Typical'],['MC03','Typical'],['MC04','Typical']
             ]

for pair in directories:
    directory = pair[0]
    label = pair[1]
    print(directory)
    for filename in os.listdir(directory):
        if filename != 'Notes':
            file = os.path.join(directory, filename)
            file = os.path.join(file, 'wav_arrayMic')
            if os.path.isdir(file):
                for audiofile in tqdm(os.listdir(file)):
                    f = os.path.join(file, audiofile)
                    if os.path.isfile(f):
                        if librosa.get_duration(filename=f) != 0:
                            y, sr = librosa.load(f, duration=5.0)
                            spectogram = librosa.feature.melspectrogram(y=y, sr=sr)
                            mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=128)
                            if mfcc.shape == (128, 216) and spectogram.shape == (128,216): 
                                D_sptg.append( (spectogram, label) )
                                D_mfcc.append( (mfcc, label) )

  0%|          | 0/134 [00:00<?, ?it/s]

F01


100%|██████████| 134/134 [00:23<00:00,  5.64it/s]
  0%|          | 0/204 [00:00<?, ?it/s]

F03


100%|██████████| 204/204 [00:30<00:00,  6.68it/s]
100%|██████████| 435/435 [01:07<00:00,  6.42it/s]
100%|██████████| 209/209 [00:39<00:00,  5.35it/s]
  0%|          | 0/199 [00:00<?, ?it/s]

F04


100%|██████████| 199/199 [00:32<00:00,  6.18it/s]
100%|██████████| 249/249 [00:57<00:00,  4.29it/s]
  0%|          | 0/256 [00:00<?, ?it/s]

FC01


100%|██████████| 256/256 [00:57<00:00,  4.42it/s]
  0%|          | 0/269 [00:00<?, ?it/s]

FC02


100%|██████████| 269/269 [00:58<00:00,  4.56it/s]
100%|██████████| 992/992 [03:14<00:00,  5.10it/s]
  0%|          | 0/400 [00:00<?, ?it/s]

FC03


100%|██████████| 400/400 [01:23<00:00,  4.80it/s]
100%|██████████| 387/387 [01:10<00:00,  5.49it/s]
100%|██████████| 208/208 [00:31<00:00,  6.62it/s]
  0%|          | 0/100 [00:00<?, ?it/s]

M01


100%|██████████| 100/100 [00:26<00:00,  3.82it/s]
100%|██████████| 286/286 [00:58<00:00,  4.87it/s]
  0%|          | 0/240 [00:00<?, ?it/s]

M02


100%|██████████| 240/240 [00:41<00:00,  5.83it/s]
100%|██████████| 160/160 [00:37<00:00,  4.23it/s]
  0%|          | 0/416 [00:00<?, ?it/s]

M03


100%|██████████| 416/416 [01:32<00:00,  4.48it/s]
  0%|          | 0/126 [00:00<?, ?it/s]

M04


100%|██████████| 126/126 [00:26<00:00,  4.76it/s]
100%|██████████| 295/295 [00:57<00:00,  5.11it/s]
  0%|          | 0/128 [00:00<?, ?it/s]

M05


100%|██████████| 128/128 [00:29<00:00,  4.39it/s]
  0%|          | 0/329 [00:00<?, ?it/s]

MC01


100%|██████████| 329/329 [01:17<00:00,  4.24it/s]
100%|██████████| 360/360 [09:37<00:00,  1.60s/it] 
100%|██████████| 419/419 [00:39<00:00, 10.72it/s]
  0%|          | 1/388 [00:00<00:55,  7.03it/s]

MC02


100%|██████████| 388/388 [01:01<00:00,  6.33it/s]
100%|██████████| 307/307 [01:12<00:00,  4.21it/s]
  0%|          | 0/600 [00:00<?, ?it/s]

MC03


100%|██████████| 600/600 [01:32<00:00,  6.46it/s]
100%|██████████| 300/300 [00:41<00:00,  7.19it/s]
  0%|          | 0/648 [00:00<?, ?it/s]

MC04


100%|██████████| 648/648 [01:35<00:00,  6.78it/s]
100%|██████████| 373/373 [00:50<00:00,  7.40it/s]


In [97]:
print("Number of samples: ", len(D_sptg), len(D_mfcc))

Number of samples:  1004 1004


In [103]:
D_mfcc = np.array(D_mfcc)
mfcc, label = D_mfcc.T
dic = {'mfcc' : mfcc, 'label' : label}
df = pd.DataFrame(dic) 
# saving the dataframe
df.to_csv('mfcc.csv')

In [104]:
D_sptg = np.array(D_sptg)
sptg, label = D_sptg.T
dic = {'sptg' : sptg, 'label' : label}
df = pd.DataFrame(dic) 
# saving the dataframe
df.to_csv('sptg.csv')

In [111]:
dataset = D_mfcc
random.shuffle(dataset)

train = dataset[:800]
dataset = dataset[800:]
validate = dataset[:150]
test = dataset[150:]

X_train, y_train = zip(*train)
X_validate, y_validate = zip(*validate)
X_test, y_test = zip(*test)

X_train = np.array([x.reshape( (128, 216, 1) ) for x in X_train])
X_validate = np.array([x.reshape( (128, 216, 1) ) for x in X_validate])
X_test = np.array([x.reshape( (128, 216, 1) ) for x in X_test])

In [112]:
X_train = np.array((X_train-np.min(X_train))/(np.max(X_train)-np.min(X_train)))
X_train = X_train/np.std(X_train)

X_validate = np.array((X_validate-np.min(X_validate))/(np.max(X_validate)-np.min(X_validate)))
X_validate = X_validate/np.std(X_validate)

X_test = np.array((X_test-np.min(X_test))/(np.max(X_test)-np.min(X_test)))
X_test = X_test/np.std(X_test)

In [113]:
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
labelencoder=LabelEncoder()
#y=to_categorical(labelencoder.fit_transform(y))
#y
y_train = np.array(to_categorical(labelencoder.fit_transform(y_train)))
y_validate = np.array(to_categorical(labelencoder.fit_transform(y_validate)))
y_test = np.array(to_categorical(labelencoder.fit_transform(y_test)))

In [114]:
X_train.shape, X_validate.shape, X_test.shape, y_train.shape, y_validate.shape, y_test.shape

((800, 128, 216, 1),
 (150, 128, 216, 1),
 (54, 128, 216, 1),
 (800, 2),
 (150, 2),
 (54, 2))

In [115]:
model = Sequential()
input_shape=(128, 216, 1)

model.add(Conv2D(24, (5, 5), strides=(1, 1), input_shape=input_shape))
model.add(MaxPooling2D((4, 2), strides=(4, 2)))
model.add(Activation('relu'))

model.add(Conv2D(48, (5, 5), padding="valid"))
model.add(MaxPooling2D((4, 2), strides=(4, 2)))
model.add(Activation('relu'))

model.add(Conv2D(48, (5, 5), padding="valid"))
model.add(Activation('relu'))

model.add(Flatten())
model.add(Dropout(rate=0.5))

model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(rate=0.5))

model.add(Dense(2))
model.add(Activation('softmax'))

model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 124, 212, 24)      624       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 31, 106, 24)       0         
_________________________________________________________________
activation_5 (Activation)    (None, 31, 106, 24)       0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 27, 102, 48)       28848     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 6, 51, 48)         0         
_________________________________________________________________
activation_6 (Activation)    (None, 6, 51, 48)         0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 2, 47, 48)        

In [116]:
model.compile(
	optimizer="Adam",
	loss="categorical_crossentropy",
	metrics=['accuracy'])

model.fit(
	x=X_train, 
	y=y_train,
    epochs=12,
    batch_size=32,
    validation_data= (X_validate, y_validate))

score = model.evaluate(
	x=X_test,
	y=y_test)

print('Test loss:', score[0])
print('Test accuracy:', score[1])

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Test loss: 0.3171052634716034
Test accuracy: 0.9259259104728699


In [117]:
predict = model.predict_classes(X_test)
prediction_class = labelencoder.inverse_transform(predict)
#real_class = labelencoder.inverse_transform(y_test)
#prediction_class

x, y = zip(*test)
y
match = 0
print('  Real       Prediction       Match')
for i in range(0,X_test.shape[0]):
    if(y[i]==prediction_class[i]):
        match+=1
    print(y[i] + '       ' + prediction_class[i] + '       ' + str(y[i]==prediction_class[i]))
print('Matched = ', match)
print('Not Matched = ',(X_test.shape[0] - match))

  Real       Prediction       Match
Typical       Typical       True
Typical       Typical       True
Typical       Typical       True
Atypical       Atypical       True
Atypical       Typical       False
Atypical       Atypical       True
Atypical       Atypical       True
Atypical       Atypical       True
Atypical       Atypical       True
Typical       Typical       True
Typical       Typical       True
Atypical       Atypical       True
Atypical       Atypical       True
Atypical       Atypical       True
Atypical       Atypical       True
Atypical       Atypical       True
Atypical       Atypical       True
Typical       Typical       True
Atypical       Atypical       True
Atypical       Atypical       True
Atypical       Typical       False
Typical       Typical       True
Atypical       Atypical       True
Typical       Typical       True
Typical       Typical       True
Atypical       Atypical       True
Atypical       Typical       False
Atypical       Atypical       True
At