In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import IPython.display as ipd
import librosa
from sklearn.model_selection import train_test_split
import os
from tqdm import tqdm

In [2]:
metadata = pd.read_csv('original.csv')


In [3]:
metadata.head(10)

Unnamed: 0,participant,filename,sound_prediction_score,tb_status
0,CODA_TB_0001,1645088710003-recording-1.wav,0.990254,0
1,CODA_TB_0001,1645088760390-recording-1.wav,0.990272,0
2,CODA_TB_0001,1645088760830-recording-1.wav,0.990112,0
3,CODA_TB_0001,1645088710843-recording-1.wav,0.990152,0
4,CODA_TB_0001,1645088759950-recording-1.wav,0.990039,0
5,CODA_TB_0001,1645088758630-recording-1.wav,0.990004,0
6,CODA_TB_0001,1645088758230-recording-1.wav,0.975063,0
7,CODA_TB_0001,1645088738844-recording-1.wav,0.990068,0
8,CODA_TB_0001,1645088784942-recording-1.wav,0.990291,0
9,CODA_TB_0001,1645088784542-recording-1.wav,0.990288,0


In [4]:
metadata['tb_status'].value_counts()

0    13684
1    13684
Name: tb_status, dtype: int64

In [5]:
audio_dataset_path = 'time/'

In [6]:
from PIL import Image
def features_extractor(file):
    audio, sample_rate = librosa.load(file, res_type = "kaiser_fast")
    mfccs_features = librosa.feature.mfcc(y = audio, sr = sample_rate, n_mfcc = 40)
#     mfccs_scaled_features = np.mean(mfccs_features.T, axis = 0)
    mfccs_array = np.array(mfccs_features)
    mfccs_image = Image.fromarray(mfccs_array).resize((32,32))
    
    
    return np.array(mfccs_image)

In [7]:
extracted_features = []
for index_num, row in tqdm(metadata.iterrows()):
    file_name = os.path.join(os.path.abspath(audio_dataset_path), str(row['filename']))
    final_class_labels = row['tb_status']
    data = features_extractor(file_name)
    extracted_features.append([data, final_class_labels])
    
    

27368it [04:13, 108.15it/s]


In [8]:
import json
for i, (data, final_class_labels) in enumerate(extracted_features):
    if isinstance(data, np.ndarray):
        extracted_features[i][0] = data.tolist()
features_dict = {
    'extracted_features': extracted_features
}
# Specify the output JSON file path
output_file_path = 'extracted_features_resnet.json'

# Write the dictionary to the JSON file
with open(output_file_path, 'w') as json_file:
    json.dump(features_dict, json_file)

In [9]:
extracted_features_df = pd.DataFrame(extracted_features, columns = ['feature', 'tb_status'])
extracted_features_df.head()

Unnamed: 0,feature,tb_status
0,"[[-330.8592224121094, -337.03973388671875, -35...",0
1,"[[-301.1387939453125, -313.8829650878906, -337...",0
2,"[[-390.083251953125, -375.6857604980469, -365....",0
3,"[[-425.3885498046875, -411.5791015625, -399.65...",0
4,"[[-398.68280029296875, -386.0237731933594, -37...",0


In [11]:
# extracted_features_df['feature'][0].size

In [12]:
X = np.array(extracted_features_df['feature'].tolist())
y = np.array(extracted_features_df['tb_status'].tolist())

In [13]:
X.shape

(27368, 32, 32)

In [14]:
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
labelencoder = LabelEncoder()
y = to_categorical(y, num_classes = 2)

In [15]:
y.shape

(27368, 2)

In [16]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [17]:
print(X_train.shape, " ", X_test.shape)
print(y_train.shape, " ", y_test.shape)

(21894, 32, 32)   (5474, 32, 32)
(21894, 2)   (5474, 2)


In [18]:
X_train = X_train.reshape(21894, 32, 32, 1)
X_test = X_test.reshape(5474, 32, 32, 1)
print(X_train.shape, " ", X_test.shape)

(21894, 32, 32, 1)   (5474, 32, 32, 1)


In [19]:
input_dim = (32, 32, 1)

In [20]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPooling2D, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications.resnet50 import ResNet50
from keras.optimizers import Adam


In [21]:
resnet = ResNet50(include_top = False,
        input_shape = input_dim,
        pooling = 'avg', classes = 2,
        weights = None)
for layer in resnet.layers:
    layer.trainable = False

In [22]:
model = Sequential()

model.add(resnet)

model.add(Flatten())
model.add(Dense(512, activation='relu'))

model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax'))

In [23]:
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 2048)              23581440  
                                                                 
 flatten (Flatten)           (None, 2048)              0         
                                                                 
 dense (Dense)               (None, 512)               1049088   
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                                 
 dense_1 (Dense)             (None, 2)                 1026      
                                                                 
Total params: 24,631,554
Trainable params: 1,050,114
Non-trainable params: 23,581,440
_________________________________________________________________


In [24]:
model.compile(optimizer = Adam(learning_rate=0.001), loss = 'binary_crossentropy', metrics = ['accuracy'])

In [26]:
# Training the model
from tensorflow.keras.callbacks import ModelCheckpoint
from datetime import datetime

num_epochs = 20
num_batch_size = 32


start = datetime.now()

model.fit(X_train, y_train, batch_size = num_batch_size, epochs = num_epochs, validation_split=0.1, verbose=1)

duration = datetime.now() - start

print("Training completed in time : ", duration)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Training completed in time :  0:10:56.345663


In [27]:
loss, accuracy = model.evaluate(X_train, y_train)
print("Train Loss:", loss)
print("Train Accuracy:", accuracy)

loss, accuracy = model.evaluate(X_test, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)





Train Loss: 0.5833548903465271
Train Accuracy: 0.7020187973976135
Test Loss: 0.5834708213806152
Test Accuracy: 0.7071611285209656


In [None]:
filename="solicited/1654077922741-recording-1.wav"
audio, sample_rate = librosa.load(filename, res_type='kaiser_fast') 
mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
# mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)
# mfccs_scaled_features = np.repeat(mfccs_scaled_features, 1, axis=0)

#print(mfccs_scaled_features)
# mfccs_scaled_features=mfccs_scaled_features.reshape(1, 32, 32, 1)
mfccs_array = np.array(mfccs_features)
mfccs_image = Image.fromarray(mfccs_array).resize((32,32))
mfccs_scaled_features = np.array(mfccs_image)
print(mfccs_scaled_features)
print(mfccs_scaled_features.shape)
mfccs_scaled_features=mfccs_scaled_features.reshape(1, 32, 32, 1)

predicted_label=np.argmax(model.predict(mfccs_scaled_features), axis=-1)
print(predicted_label)
if predicted_label[0] == 1:
    print("TB Positive")
else:
    print("TB Negative")
# labelencoder = LabelEncoder()
# prediction_class = labelencoder.inverse_transform(predicted_label) 
# prediction_class"""