In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import IPython.display as ipd
import librosa
import librosa.display
from sklearn.model_selection import train_test_split
import os
from keras.optimizers import Adam


In [2]:
metadata = pd.read_csv('original.csv')

In [3]:
metadata.head(10)

Unnamed: 0,participant,filename,sound_prediction_score,tb_status
0,CODA_TB_0001,1645088710003-recording-1.wav,0.990254,0
1,CODA_TB_0001,1645088760390-recording-1.wav,0.990272,0
2,CODA_TB_0001,1645088760830-recording-1.wav,0.990112,0
3,CODA_TB_0001,1645088710843-recording-1.wav,0.990152,0
4,CODA_TB_0001,1645088759950-recording-1.wav,0.990039,0
5,CODA_TB_0001,1645088758630-recording-1.wav,0.990004,0
6,CODA_TB_0001,1645088758230-recording-1.wav,0.975063,0
7,CODA_TB_0001,1645088738844-recording-1.wav,0.990068,0
8,CODA_TB_0001,1645088784942-recording-1.wav,0.990291,0
9,CODA_TB_0001,1645088784542-recording-1.wav,0.990288,0


In [4]:
metadata['tb_status'].value_counts()

0    13684
1    13684
Name: tb_status, dtype: int64

In [5]:
metadata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27368 entries, 0 to 27367
Data columns (total 4 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   participant             27368 non-null  object 
 1   filename                27368 non-null  object 
 2   sound_prediction_score  27368 non-null  float64
 3   tb_status               27368 non-null  int64  
dtypes: float64(1), int64(1), object(2)
memory usage: 855.4+ KB


In [3]:
audio_dataset_path = 'time/'

In [4]:
def features_extractor(file):
    audio, sample_rate = librosa.load(file, res_type = "kaiser_fast")
    mfccs_features = librosa.feature.mfcc(y = audio, sr = sample_rate, n_mfcc = 40)
    mfccs_scaled_features = np.mean(mfccs_features.T, axis = 0)
    
    return mfccs_scaled_features

In [5]:
from tqdm import tqdm

extracted_features = []
for index_num, row in tqdm(metadata.iterrows()):
    file_name = os.path.join(os.path.abspath(audio_dataset_path), str(row['filename']))
    final_class_labels = row['tb_status']
    data = features_extractor(file_name)
    extracted_features.append([data, final_class_labels])

27368it [04:45, 95.76it/s] 


In [7]:
import json
for i, (data, final_class_labels) in enumerate(extracted_features):
    if isinstance(data, np.ndarray):
        extracted_features[i][0] = data.tolist()
features_dict = {
    'extracted_features': extracted_features
}
# Specify the output JSON file path
output_file_path = 'extracted_features.json'

# Write the dictionary to the JSON file
with open(output_file_path, 'w') as json_file:
    json.dump(features_dict, json_file)

In [9]:
extracted_features_df = pd.DataFrame(extracted_features, columns = ['feature', 'tb_status'])
extracted_features_df.head()

Unnamed: 0,feature,tb_status
0,"[-233.81036, 60.20778, 13.185232, 28.068928, 2...",0
1,"[-227.28358, 66.7835, 26.548656, 14.419969, -8...",0
2,"[-291.94742, 61.165333, 36.86478, 11.336239, -...",0
3,"[-322.38007, 53.536182, 30.724964, 25.726463, ...",0
4,"[-215.52008, 58.5044, 26.074524, 0.0006127791,...",0


In [10]:
extracted_features_df['feature'][0].size

40

In [11]:
X = np.array(extracted_features_df['feature'].tolist())
y = np.array(extracted_features_df['tb_status'].tolist())

In [12]:
X.shape

(27368, 40)

In [13]:
y.shape

(27368,)

In [14]:
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
labelencoder = LabelEncoder()
y = to_categorical(y, num_classes = 2)

In [15]:
y.shape

(27368, 2)

In [16]:
y

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       ...,
       [0., 1.],
       [0., 1.],
       [0., 1.]], dtype=float32)

In [17]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [18]:
print(X_train.shape, " ", X_test.shape)
print(y_train.shape, " ", y_test.shape)

(21894, 40)   (5474, 40)
(21894, 2)   (5474, 2)


In [19]:
X_train = X_train.reshape(X_train.shape[0], 8, 5, 1)
X_test = X_test.reshape(X_test.shape[0], 8, 5, 1)
print(X_train.shape, " ", X_test.shape)

(21894, 8, 5, 1)   (5474, 8, 5, 1)


In [20]:
input_dim = (8, 5, 1)

In [21]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPooling2D, Dropout
from tensorflow.keras.utils import to_categorical 

In [22]:
model = Sequential()

model.add(Conv2D(32, (2, 2), activation='relu', input_shape = input_dim))
model.add(MaxPooling2D((1, 1)))

model.add(Conv2D(64, (2, 2), activation='relu'))
model.add(MaxPooling2D((1, 1)))

model.add(Conv2D(128, (2, 2), activation='relu'))
model.add(MaxPooling2D((1, 1)))

model.add(Flatten())
model.add(Dense(512, activation='relu'))

model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax'))

In [23]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 7, 4, 32)          160       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 7, 4, 32)         0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 6, 3, 64)          8256      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 6, 3, 64)         0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 5, 2, 128)         32896     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 5, 2, 128)        0

In [24]:
model.compile(optimizer = Adam(learning_rate=0.001), loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [33]:
# Training the model
from tensorflow.keras.callbacks import ModelCheckpoint
from datetime import datetime

num_epochs = 10

num_batch_size = 32

# checkpointer = ModelCheckpoint(filepath = 'saved_models/audio_classification.hdf5',
#                                verbose=1, save_best_only = True)

# checkpointer = ModelCheckpoint(filepath = 'saved_model/cnnmodel',
#                                verbose=1, save_best_only = True)
 
start = datetime.now()

# model.fit(X_train, y_train, batch_size = num_batch_size, epochs = num_epochs, validation_data = (X_test, y_test),   verbose=1)
model.fit(X_train, y_train, batch_size = num_batch_size, epochs = num_epochs, validation_split=0.1,   verbose=1)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x20057757550>

In [34]:

train_accuracy = model.evaluate(X_train, y_train, verbose=0)
print("Train Accuracy :",train_accuracy[1] * 100)
print("Loss :",train_accuracy[0] * 100)

test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print("Test Accuracy :",test_accuracy[1] * 100)
print("Loss :",test_accuracy[0] * 100)


Train Accuracy : 99.51128363609314
Loss : 1.7279865220189095
Test Accuracy : 95.72524428367615
Loss : 24.675342440605164


In [27]:
# filename="testing/p1.wav"
# audio, sample_rate = librosa.load(filename, res_type='kaiser_fast') 
# mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=128)
# mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)

# print(mfccs_scaled_features)
# mfccs_scaled_features=mfccs_scaled_features.reshape(1, 128, 1, 1)
# print(mfccs_scaled_features)
# print(mfccs_scaled_features.shape)

# predicted_label=np.argmax(model.predict(mfccs_scaled_features), axis=-1)
# print(predicted_label)
# prediction_class = labelencoder.inverse_transform(predicted_label) 
# prediction_class

In [28]:
filename="test3.wav"
audio, sample_rate = librosa.load(filename, res_type='kaiser_fast') 
mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)
mfccs_scaled_features = np.repeat(mfccs_scaled_features, 1, axis=0)

# print(mfccs_scaled_features)
mfccs_scaled_features=mfccs_scaled_features.reshape(1, 8, 5, 1)
# print(mfccs_scaled_features)
# print(mfccs_scaled_features.shape)

predicted_label=np.argmax(model.predict(mfccs_scaled_features), axis=-1)
print(predicted_label)
if predicted_label[0] == 1:
    print("TB Positive")
else:
    print("TB Negative")
# labelencoder = LabelEncoder()
# prediction_class = labelencoder.inverse_transform(predicted_label) 
# prediction_class

[1]
TB Positive


In [29]:
model.save('cnn_mfcc_10')



INFO:tensorflow:Assets written to: cnn_mfcc_10\assets


INFO:tensorflow:Assets written to: cnn_mfcc_10\assets
