In [1]:
import tensorflow as tf
import numpy as np
tf.config.run_functions_eagerly(True)

from component.model import get_covid_classifier
from tensorflow import keras
from sklearn.utils import class_weight
from component.vocal_cords import get_vocal_cords_model
from utils.feature_extraction import  get_MFCCS

In [2]:
clf = get_covid_classifier(input_shape=(300, 200, 1),
                 vocal_cords_path='pretrained_weight/vocal_cords_v1.h5',
                 sentiment_path='pretrained_weight/sentiment_v1.h5',
                 tract_path=None)
clf.summary()


Model: "model_7"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 300, 200, 1) 0                                            
__________________________________________________________________________________________________
dense (Dense)                   (None, 300, 200, 1)  2           input_1[0][0]                    
__________________________________________________________________________________________________
distribution_lambda (Distributi multiple             0           dense[0][0]                      
__________________________________________________________________________________________________
model_4 (Functional)            (None, 10, 7, 2048)  23581440    distribution_lambda[0][0]        
____________________________________________________________________________________________

In [3]:
import random
class DataGenerator(keras.utils.Sequence):
    def __init__(self, X, y, batch_size):
        self.X = X
        self.y = y
        self.batch_size = batch_size
        self.indexes = set(np.arange(len(X)))

    def __len__(self):
        return len(self.indexes)

    def __getitem__(self, _):
        indexes = random.sample(self.indexes, self.batch_size)
        X = []
        y = []
        for index in indexes:
            try:
                X.append(get_MFCCS(self.X[index]))
            except:
                continue
            y.append(self.y[index])
        return np.array(X), np.array(y, dtype=int)

In [4]:
import glob
X1 = glob.glob("merge_dataset/train/aug_p/*.wav")
X0 = glob.glob("merge_dataset/train/aug_n/*.wav")
X = X1 + X0
y = [1]*len(X1) + [0] * len(X0)
print(f"Found {len(X1)} labels positive")
print(f"Found {len(X0)} labels negative")

Found 2603 labels positive
Found 19465 labels negative


In [5]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, train_size=0.8, random_state=69, stratify=y)
class_weights = class_weight.compute_class_weight('balanced',
                                                 np.unique(y_train),
                                                 y_train)
class_weights = {i: x for i,x in enumerate(class_weights)}
print(f"Class weights: {class_weights}")

Class weights: {0: 0.5668507577703571, 1: 4.239673390970221}




In [6]:
train_generator = DataGenerator(X_train, y_train, 8)
val_generator = DataGenerator(X_val, y_val, 8)

In [7]:
clf.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0005), loss='binary_crossentropy', metrics=['acc'])
history = clf.fit(train_generator, steps_per_epoch=30, validation_steps=50, epochs=50, validation_data=val_generator, class_weight=class_weights,
                  callbacks=[tf.keras.callbacks.ModelCheckpoint('pretrained_weight/model_v1.h5', monitor="val_loss", save_weights_only=True, verbose=1, mode='min', save_best_only=True),
                             tf.keras.callbacks.TensorBoard(log_dir='logs')])
          # callbacks=[keras.callbacks.EarlyStopping(monitor='val_acc', patience=3)])

  "Even though the tf.config.experimental_run_functions_eagerly "


Epoch 1/50

  "Empty filters detected in mel frequency basis. "



Epoch 00001: val_loss improved from inf to 0.46156, saving model to pretrained_weight\model_v1.h5
Epoch 2/50

Epoch 00002: val_loss improved from 0.46156 to 0.44886, saving model to pretrained_weight\model_v1.h5
Epoch 3/50

Epoch 00003: val_loss improved from 0.44886 to 0.34313, saving model to pretrained_weight\model_v1.h5
Epoch 4/50

Epoch 00004: val_loss did not improve from 0.34313
Epoch 5/50

Epoch 00005: val_loss did not improve from 0.34313
Epoch 6/50

Epoch 00006: val_loss did not improve from 0.34313
Epoch 7/50

Epoch 00007: val_loss did not improve from 0.34313
Epoch 8/50

Epoch 00008: val_loss did not improve from 0.34313
Epoch 9/50

Epoch 00009: val_loss did not improve from 0.34313
Epoch 10/50

Epoch 00010: val_loss did not improve from 0.34313
Epoch 11/50

Epoch 00011: val_loss did not improve from 0.34313
Epoch 12/50

Epoch 00012: val_loss did not improve from 0.34313
Epoch 13/50

Epoch 00013: val_loss did not improve from 0.34313
Epoch 14/50

Epoch 00014: val_loss did 

In [8]:
from sklearn.metrics import classification_report
import tqdm
X1_test = glob.glob("merge_dataset/test/aug_p/*.wav")
X0_test = glob.glob("merge_dataset/test/aug_n/*.wav")
X_test = []
y_test = []
for audio_path in tqdm.tqdm(X1_test):
    try:
        X_test.append(get_MFCCS(audio_path))
        y_test.append(1)
    except:
        continue

for audio_path in tqdm.tqdm(X0_test):
    try:
        X_test.append(get_MFCCS(audio_path))
        y_test.append(0)
    except:
        continue
X_test = np.array(X_test)

100%|██████████| 429/429 [00:10<00:00, 42.50it/s]
100%|██████████| 3040/3040 [01:18<00:00, 38.87it/s]


In [10]:
clf.load_weights("pretrained_weight/model_v1.h5")
y_pred = clf.predict(X_test)

  "Even though the tf.config.experimental_run_functions_eagerly "


In [13]:
y_pred

array([[0.15713271],
       [0.15713271],
       [0.15713271],
       ...,
       [0.15716942],
       [0.15716943],
       [0.15716943]], dtype=float32)