#### Audio Classification Data Preprocessing

#### Observation
Here Librosa converts the signal to mono, meaning the channel will alays be 1

### Extract Features
Here we will be using Mel-Frequency Cepstral Coefficients(MFCC) from the audio 
samples.
The MFCC summarises the frequency distribution across the window size, so it is possible to analyse both the frequency and time characteristics of the sound. These audio representations will allow us to identify features for classification.

In [203]:
#### Extracting MFCC's For every audio file
import pandas as pd
import os
import librosa
import numpy as np

#breathing_dataset_path='AUDIO/breathing'
breathing_dataset_path='AUGMENTED_AUDIO/breathing'
test_breathing_dataset_path='TEST_AUDIO/breathing'

# cough_dataset_path='AUDIO/cough'
# test_cough_dataset_path='TEST_AUDIO/cough'


# speech_dataset_path='AUDIO/breathing'
# test_speech_dataset_path='TEST_AUDIO/breathing'


In [204]:
#metadata=pd.read_csv('metadata.csv')
metadata=pd.read_csv('augmented_metadata.csv')
metadata.head()

Unnamed: 0,SUB_ID,COVID_STATUS,GENDER
0,NPHYFCYK,n,m
1,HTCFAFDO,n,m
2,XLIOSZXS,n,m
3,FONIOYTJ,n,m
4,DZEREGIM,n,m


In [205]:
def features_extractor(file_name):
    audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)
    
    return mfccs_scaled_features
    

In [206]:
# example_feature = features_extractor('AUGMENTED_AUDIO/breathing/ABEEJBTZ.flac')

In [207]:
# print(example_feature)

In [208]:

from tqdm import tqdm
### Now we iterate through every audio file and extract features 
### using Mel-Frequency Cepstral Coefficients
extracted_features=[]
for index_num,row in tqdm(metadata.iterrows()):
    file_name = os.path.join(os.path.abspath(breathing_dataset_path),str(row["SUB_ID"]+'.flac'))
    final_class_labels=row["COVID_STATUS"]
    if os.path.isfile(file_name):
        data=features_extractor(file_name)
        extracted_features.append([data,final_class_labels])
    else:
        print("ERROR to find the file",file_name)

1481it [02:45,  8.96it/s]


In [209]:
### converting extracted_features to Pandas dataframe
extracted_features_df=pd.DataFrame(extracted_features,columns=['feature','class'])
#extracted_features_df.head()


In [210]:
### Split the dataset into independent and dependent dataset
X=np.array(extracted_features_df['feature'].tolist())
y=np.array(extracted_features_df['class'].tolist())

In [211]:
X.shape

(1481, 40)

In [212]:
y

array(['n', 'n', 'n', ..., 'p', 'p', 'p'], dtype='<U1')

In [213]:
### Label Encoding
###y=np.array(pd.get_dummies(y))
### Label Encoder
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
labelencoder=LabelEncoder()
y=to_categorical(labelencoder.fit_transform(y))

In [214]:
y

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       ...,
       [0., 1.],
       [0., 1.],
       [0., 1.]], dtype=float32)

In [215]:
### Train Test Split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0)

In [216]:
X_train

array([[-7.78018494e+02, -3.01640739e+01, -3.02195129e+01, ...,
        -2.91667372e-01, -9.70910490e-01, -1.36435211e+00],
       [-4.19306519e+02,  5.24211586e-01, -5.05692177e+01, ...,
        -1.65192449e+00, -3.96591365e-01,  1.07823539e+00],
       [-4.87690552e+02,  6.10781059e+01, -1.31738539e+01, ...,
         1.77632833e+00, -2.10279250e+00, -1.98967552e+00],
       ...,
       [-6.15608643e+02,  2.93847370e+01, -1.23054969e+00, ...,
        -9.94546354e-01,  1.21759176e-01,  8.16841125e-01],
       [-4.67803864e+02,  4.18853493e+01, -2.05269012e+01, ...,
         5.13740063e-01, -3.51367998e+00,  6.26599908e-01],
       [-5.36224670e+02, -2.34993076e+01, -3.99812851e+01, ...,
        -2.88042593e+00, -7.49136508e-01, -1.13020344e-02]], dtype=float32)

In [217]:
y

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       ...,
       [0., 1.],
       [0., 1.],
       [0., 1.]], dtype=float32)

In [218]:
X_train.shape

(1184, 40)

In [219]:
X_test.shape

(297, 40)

In [220]:
y_train.shape

(1184, 2)

In [221]:
y_test.shape

(297, 2)

### Model Creation

In [222]:
import tensorflow as tf
print(tf.__version__)

2.6.0


In [223]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,Activation,Flatten
from tensorflow.keras.optimizers import Adam
from sklearn import metrics

In [224]:
### No of classes
num_labels=y.shape[1]

In [225]:
num_labels

2

In [226]:
#FIRST MODEL FROM VIDEO
model=Sequential()
###first layer
model.add(Dense(100,input_shape=(40,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
###second layer
model.add(Dense(200))
model.add(Activation('relu'))
model.add(Dropout(0.5))
# ###third layer
model.add(Dense(100))
model.add(Activation('relu'))
model.add(Dropout(0.5))


###final layer
model.add(Dense(num_labels))
model.add(Activation('softmax'))

In [227]:
# FROM medium tutorial
# from tensorflow.keras import models, layers
# # input_shape=(128,1000,3)
# model = models.Sequential()
# model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(40,)))
# model.add(layers.MaxPooling2D((2, 2)))
# model.add(layers.Dropout(0.2))
# model.add(layers.Conv2D(64, (3, 3), activation='relu'))
# model.add(layers.MaxPooling2D((2, 2)))
# model.add(layers.Dropout(0.2))
# model.add(layers.Conv2D(64, (3, 3), activation='relu'))
# model.add(layers.Flatten())
# model.add(layers.Dense(64, activation='relu'))
# model.add(layers.Dropout(0.2))
# model.add(layers.Dense(32, activation='relu'))
# model.add(layers.Dense(24, activation='softmax'))

In [228]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_20 (Dense)             (None, 100)               4100      
_________________________________________________________________
activation_20 (Activation)   (None, 100)               0         
_________________________________________________________________
dropout_15 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_21 (Dense)             (None, 200)               20200     
_________________________________________________________________
activation_21 (Activation)   (None, 200)               0         
_________________________________________________________________
dropout_16 (Dropout)         (None, 200)               0         
_________________________________________________________________
dense_22 (Dense)             (None, 100)              

In [229]:
model.compile(loss='categorical_crossentropy',metrics=['accuracy'],optimizer='adam')

In [230]:
## Trianing my model
from tensorflow.keras.callbacks import ModelCheckpoint
from datetime import datetime 

num_epochs = 30
num_batch_size = 50

checkpointer = ModelCheckpoint(filepath='saved_models/audio_classification.hdf5', 
                               verbose=1, save_best_only=True)
start = datetime.now()

model.fit(X_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test, y_test), callbacks=[checkpointer], verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

Epoch 1/30

Epoch 00001: val_loss improved from inf to 1.65374, saving model to saved_models/audio_classification.hdf5
Epoch 2/30

Epoch 00002: val_loss improved from 1.65374 to 1.61343, saving model to saved_models/audio_classification.hdf5
Epoch 3/30

Epoch 00003: val_loss improved from 1.61343 to 0.74640, saving model to saved_models/audio_classification.hdf5
Epoch 4/30

Epoch 00004: val_loss improved from 0.74640 to 0.74102, saving model to saved_models/audio_classification.hdf5
Epoch 5/30

Epoch 00005: val_loss did not improve from 0.74102
Epoch 6/30

Epoch 00006: val_loss improved from 0.74102 to 0.68492, saving model to saved_models/audio_classification.hdf5
Epoch 7/30

Epoch 00007: val_loss did not improve from 0.68492
Epoch 8/30

Epoch 00008: val_loss did not improve from 0.68492
Epoch 9/30

Epoch 00009: val_loss did not improve from 0.68492
Epoch 10/30

Epoch 00010: val_loss did not improve from 0.68492
Epoch 11/30

Epoch 00011: val_loss did not improve from 0.68492
Epoch 12/

In [231]:
test_accuracy=model.evaluate(X_test,y_test,verbose=0)
print(test_accuracy[1])

0.5622895359992981


In [232]:
# filename="AUDIO/breathing/ABEEJBTZ.flac"
# prediction_feature=features_extractor(filename)
# prediction_feature=prediction_feature.reshape(1,-1)
# predict_x=model.predict(prediction_feature)
# classes_x=np.argmax(predict_x,axis=1)

In [233]:
# X_test[1]

In [234]:
# classes_x

In [235]:
# model.predict(X_test)

### Testing Some Test Audio Data

Steps
- Preprocess the new audio data
- predict the classes
- Invere transform your Predicted Label

In [236]:
# filename="AUGMENTED_AUDIO/breathing/GEGERRDK.flac"
# audio, sample_rate = librosa.load(filename, res_type='kaiser_fast') 
# mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
# mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)

# print(mfccs_scaled_features)
# mfccs_scaled_features=mfccs_scaled_features.reshape(1,-1)
# predicted=model.predict(mfccs_scaled_features)
# predicted_label=np.argmax(predicted,axis=1)
# print(predicted_label)
# prediction_class = labelencoder.inverse_transform(predicted_label) 
# print(prediction_class)

In [237]:
#STARTING EVALUATION ON BLIND TEST DATA

In [238]:
test_metadata_breathing=pd.read_csv('test_metadata_breathing.csv')
test_metadata_breathing.head()

Unnamed: 0,SUB_ID,COVID_STATUS
0,EKANDFRP,n
1,QXIDXPCN,n
2,RJNHLHRD,n
3,ACACIEDW,n
4,JVPUNCRM,n


In [239]:

extracted_features=[]
for index_num,row in tqdm(test_metadata_breathing.iterrows()):
    file_name = os.path.join(os.path.abspath(test_breathing_dataset_path),str(row["SUB_ID"]+'.flac'))
    final_class_labels=row["COVID_STATUS"]
    if os.path.isfile(file_name):
        data=features_extractor(file_name)
        extracted_features.append([data,final_class_labels])
    else:
        print("ERROR to find the file", file_name)
        

471it [01:10,  6.64it/s]


In [246]:
### converting extracted_features to Pandas dataframe
extracted_features_df=pd.DataFrame(extracted_features,columns=['feature','class'])
extracted_features_df.head()
### Split the dataset into independent and dependent dataset
X_blindtest=np.array(extracted_features_df['feature'].tolist())
y_blindtest=np.array(extracted_features_df['class'].tolist())
### Label Encoder
labelencoder=LabelEncoder()
y_blindtest=to_categorical(labelencoder.fit_transform(y_blindtest))


In [247]:
test_accuracy=model.evaluate(X_blindtest,y_blindtest)
print(test_accuracy[1])

0.8726114630699158


In [248]:
print(test_accuracy)


[0.6685388088226318, 0.8726114630699158]
