This notebook to keep a benchmarking result of Table 4 on Covid-19, Symtoms and Abnormal Detection task:

+ On Sound-Dr dataset
+ Use FRILL pretrain model to extract feature
+ Use XGB method to classify
+ Set seed 1111 ```seed=1111```

# Library

In [1]:
import warnings
warnings.filterwarnings("ignore")

import os, time, math
import numpy as np
import pandas as pd
import zipfile, pickle, h5py, joblib, json

import librosa
import opensmile
import xgboost as xgb
import tensorflow_hub as hub

from math import pi
from tqdm import tqdm
from pathlib import Path
from multiprocessing import Pool
from scipy.fftpack import fft, hilbert
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.neighbors import NearestNeighbors
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import StratifiedKFold, cross_val_predict, train_test_split
from sklearn.metrics import f1_score, confusion_matrix, roc_auc_score, auc, precision_recall_curve, roc_curve, average_precision_score

SoX could not be found!

    If you do not have SoX, proceed here:
     - - - http://sox.sourceforge.net/ - - -

    If you do (or think that you should) have SoX, double-check your
    path variables.
    


# Configuration

In [2]:
#sample rate
SR = 48000
#100 ms
FRAME_LEN = int(SR / 10)
#50% overlap, meaning 5ms hop length
HOP = int(FRAME_LEN / 2)
#the MFCC dimension
MFCC_dim = 13

fold_num = 5
seed = 1111

DIR_DATA = "./sounddr_data/"
OUTPUT_DIR = DIR_DATA + 'output/'
os.makedirs(OUTPUT_DIR, exist_ok = True)
df = pd.read_csv(DIR_DATA + 'data.csv')
df['file_path'] = df['file_cough'] + '.wav'

df['label_symptom'] = (df['symptoms_status_choice'].map(str) != "['No']").astype(int)
df['label_abnormal'] = ((df['symptoms_status_choice'].map(str) != "['No']") | (df['cov19_status_choice'] != 'never')).astype(int)
df['label_covid'] = (df['cov19_status_choice'] != 'never').astype(int)

df.tail()

Unnamed: 0,sex_choice,age_choice,current_city,symptoms_status_choice,medical_condition_choice,insomnia_status_choice,smoke_status_choice,cov19_status_choice,hospital_choice,cough_noise,device_model,file_cough,label,cough_duration,nose_duration,mouth_duration,file_path,label_symptom,label_abnormal,label_covid
1305,Female,20,thanh hóa,['No'],['No'],No,never,never,No,True,OPPO CPH1933,cough/good_cough_2021-08-15T13:43:33.132Z,0,25.856,17.664,22.357333,cough/good_cough_2021-08-15T13:43:33.132Z.wav,0,0,0
1306,Male,32,Ho Chi Minh,"['fever', 'headache']",['No'],No,never,last14,No,True,Laptop/Desktop,cough/bad_cough_2021-09-16T07:18:48.594Z,1,29.350042,29.814438,29.814438,cough/bad_cough_2021-09-16T07:18:48.594Z.wav,1,1,1
1307,Male,23,Ho Chi Minh,"['fever', 'chills', 'sorethroat', 'drycough', ...",['No'],1,1to10,last14,No,True,Laptop/Desktop,cough/bad_cough_2021-09-06T11:04:49.842Z,1,18.432,17.152,15.530667,cough/bad_cough_2021-09-06T11:04:49.842Z.wav,1,1,1
1308,Male,18,Ho Chi Minh,"['wetcough', 'sorethroat']",['No'],No,never,never,,True,Laptop/Desktop,cough/bad_cough_2021-08-24T08:08:28.798Z,1,17.322667,17.749333,18.517333,cough/bad_cough_2021-08-24T08:08:28.798Z.wav,1,1,0
1309,Female,28,Ho Chi Minh,['No'],['No'],No,never,never,No,True,iPhone 8,cough/good_cough_2021-09-24T06:33:54.423Z,0,21.504,26.624,25.6,cough/good_cough_2021-09-24T06:33:54.423Z.wav,0,0,0


In [3]:
def get_duration(filename, mono=True, res_type="kaiser_fast"):
    duration = 0
    try:
        y, sr = librosa.load(filename, sr=None, mono=mono, res_type=res_type)
        duration = librosa.get_duration(y=y, sr=sr)
    except:
        print('Error file:' + filename)
    return duration, sr

df['Duration'] = df['file_path'].apply(lambda x: get_duration(DIR_DATA + x)[0])
df['Sample_rate'] = df['file_path'].apply(lambda x: get_duration(DIR_DATA + x)[1])
df.tail(10)

Unnamed: 0,sex_choice,age_choice,current_city,symptoms_status_choice,medical_condition_choice,insomnia_status_choice,smoke_status_choice,cov19_status_choice,hospital_choice,cough_noise,...,label,cough_duration,nose_duration,mouth_duration,file_path,label_symptom,label_abnormal,label_covid,Duration,Sample_rate
1300,Male,26,Ho Chi Minh,"['fever', 'chills', 'sorethroat', 'drycough']",['asthma'],2to3,never,last14,Yes,True,...,1,18.176,22.101333,19.456,cough/bad_cough_2021-09-22T06:24:21.709Z.wav,1,1,1,18.176,48000
1301,Male,22,Bac Ninh,['No'],['No'],No,never,never,No,True,...,0,24.746667,21.248,23.466667,cough/good_cough_2021-08-15T02:19:06.324Z.wav,0,0,0,24.746667,48000
1302,Male,26,Ho Chi Minh,"['stuffynose', 'snivel', 'wetcough', 'headache...",['No'],Onceper2Weeks,never,last14,No,True,...,1,16.725333,16.810667,18.005333,cough/bad_cough_2021-09-16T23:14:24.465Z.wav,1,1,1,16.725333,48000
1303,Male,20,Binh Duong,['No'],['No'],No,never,never,No,True,...,0,19.882667,18.688,16.64,cough/good_cough_2021-09-15T09:17:44.889Z.wav,0,0,0,19.882667,48000
1304,Female,25,Ha Noi,['No'],['No'],No,never,never,No,True,...,0,26.197333,22.698667,26.453333,cough/good_cough_2021-08-28T00:41:21.595Z.wav,0,0,0,26.197333,48000
1305,Female,20,thanh hóa,['No'],['No'],No,never,never,No,True,...,0,25.856,17.664,22.357333,cough/good_cough_2021-08-15T13:43:33.132Z.wav,0,0,0,25.856,48000
1306,Male,32,Ho Chi Minh,"['fever', 'headache']",['No'],No,never,last14,No,True,...,1,29.350042,29.814438,29.814438,cough/bad_cough_2021-09-16T07:18:48.594Z.wav,1,1,1,29.350042,48000
1307,Male,23,Ho Chi Minh,"['fever', 'chills', 'sorethroat', 'drycough', ...",['No'],1,1to10,last14,No,True,...,1,18.432,17.152,15.530667,cough/bad_cough_2021-09-06T11:04:49.842Z.wav,1,1,1,18.432,48000
1308,Male,18,Ho Chi Minh,"['wetcough', 'sorethroat']",['No'],No,never,never,,True,...,1,17.322667,17.749333,18.517333,cough/bad_cough_2021-08-24T08:08:28.798Z.wav,1,1,0,17.322667,48000
1309,Female,28,Ho Chi Minh,['No'],['No'],No,never,never,No,True,...,0,21.504,26.624,25.6,cough/good_cough_2021-09-24T06:33:54.423Z.wav,0,0,0,21.504,48000


# Utils

In [4]:
def merge_feature(list_features):
    """
      Merge numpy array features
      Args:
        - list_features: list of numpy array features
                         :type: a list of numpy arrays 
      Returns:
        - features: the concatenate numpy array along axis=1
                    :type: a numpy array                 
    """      
    features = np.concatenate(list_features, axis=1)
    features = np.nan_to_num(features)
    features = np.clip(features, -np.finfo(np.float32).max, np.finfo(np.float32).max)
    return features

In [5]:
def compute_metrics(cfs_matrix):
    """
      Calculate common metrics based on the confusion matrix
      Args:
        - cfs_matrix: a sklearn confusion matrix 
                      :type: a ndarray of shape (n_classes, n_classes)
      Returns:
        - precision: the precision of the prediction
                     :type: float  
        - recall: the recall of the prediction
                  :type: float  
        - f1: the f1-score of the prediction
              :type: float                       
    """     
    precision = cfs_matrix[1,1] / (cfs_matrix[1,1] + cfs_matrix[0,1])
    recall = cfs_matrix[1,1] / (cfs_matrix[1,1] + cfs_matrix[1,0])
    f1 = 2 * (precision * recall) / (precision + recall)
    return precision, recall, f1

# Extract Audio Features

In [7]:
import tensorflow.compat.v2 as tf
tf.enable_v2_behavior()
import tensorflow_hub as hub

frill_nofrontend_model = hub.load('https://tfhub.dev/google/nonsemantic-speech-benchmark/frill-nofrontend/1')

def stabilized_log(data, additive_offset, floor):
  """TF version of mfcc_mel.StabilizedLog."""
  return tf.math.log(tf.math.maximum(data, floor) + additive_offset)


def log_mel_spectrogram(data,
                        audio_sample_rate,
                        num_mel_bins=64,
                        log_additive_offset=0.001,
                        log_floor=1e-12,
                        window_length_secs=0.025,
                        hop_length_secs=0.010,
                        fft_length=None):
    """TF version of mfcc_mel.LogMelSpectrogram."""
    window_length_samples = int(round(audio_sample_rate * window_length_secs))
    hop_length_samples = int(round(audio_sample_rate * hop_length_secs))
    if not fft_length:
        fft_length = 2 ** int(np.ceil(np.log(window_length_samples) / np.log(2.0)))

    spectrogram = tf.abs(
        tf.signal.stft(
            tf.cast(data, tf.dtypes.float64),
            frame_length=window_length_samples,
            frame_step=hop_length_samples,
            fft_length=fft_length,
            window_fn=tf.signal.hann_window,
        )
    )

    to_mel = tf.signal.linear_to_mel_weight_matrix(
        num_mel_bins=num_mel_bins,
        num_spectrogram_bins=fft_length // 2 + 1,
        sample_rate=audio_sample_rate,
        lower_edge_hertz=125.0,
        upper_edge_hertz=7500.0,
        dtype=tf.dtypes.float64
    )

    mel = spectrogram @ to_mel
    log_mel = stabilized_log(mel, log_additive_offset, log_floor)
    return log_mel

def compute_frontend_features(samples, sr, frame_hop, n_required=16000, num_mel_bins=64, frame_width=96):
    if samples.dtype == np.int16:
        samples = tf.cast(samples, np.float32) / np.iinfo(np.int16).max
    if samples.dtype == np.float64:
        samples = tf.cast(samples, np.float32)
    assert samples.dtype == np.float32, samples.dtype
    n = tf.size(samples)
    samples = tf.cond(
        n < n_required,
        lambda: tf.pad(samples, [(0, n_required - n)]),
        lambda: samples
    )
    mel = log_mel_spectrogram(samples, sr, num_mel_bins=num_mel_bins)
    mel = tf.signal.frame(mel, frame_length=frame_width, frame_step=frame_hop, axis=0)
    return mel

def make_nonsemantic_frill_nofrontend_feat(filename):
    waveform, _ = librosa.load(filename, sr=16000, mono=True, res_type="kaiser_fast")
    frontend_feats = tf.expand_dims(compute_frontend_features(waveform, 16000, frame_hop=17), axis=-1).numpy().astype(np.float32)
    assert frontend_feats.shape[1:] == (96, 64, 1)

    embeddings = frill_nofrontend_model(frontend_feats)['embedding']
    mean_emb = embeddings.numpy().mean(axis=0)
    std_emb = embeddings.numpy().std(axis=0)
    return np.concatenate((mean_emb, std_emb))

# Extract Features

In [8]:
# Start measuring time
start_time = time.perf_counter()
print("[*] extract audio features")

[*] extract audio features


In [9]:
def get_features_of_list_audio(X):
    X_trill_features = []
    #extract train data features
    for index, row in X.iterrows():
        #get cough audio path
        cough_path = os.path.join(DIR_DATA, row['file_path'])
        
        X_trill_features.append(make_nonsemantic_frill_nofrontend_feat(cough_path))
        
    return np.array(X_trill_features)


In [12]:
if not os.path.exists(OUTPUT_DIR + 'feature.pickle'):
    X_trill_features = get_features_of_list_audio(df)
    pickle.dump({
        'X_trill_features': X_trill_features
    }, open(OUTPUT_DIR + 'feature.pickle', "wb" ))
else:
    f = pickle.load(open(OUTPUT_DIR + 'feature.pickle', "rb" ))
    X_trill_features     = f['X_trill_features']

# Evaluate

In [13]:
def evaluate(ensem_preds, targets):
    """
      Evaluate the prediction by providing metrics & also the best threshold (to get the highest f1-score)
      Ex: AUC, Accurary, Precision, Recall, F1-Score.
      Then print these metrics
      Args:
        - ensem_preds: predictions for ids 
                       :type: a numpy array
        - targets: the actual results of ids 
                   :type: a numpy array                 
      Returns:
        - None                  
    """     
    best_th = 0
    best_score = 0

    for th in np.arange(0.0, 0.6, 0.01):
        pred = (ensem_preds > th).astype(int)
        score = f1_score(targets, pred)
        if score > best_score:
            best_th = th
            best_score = score

    print(f"\nAUC score: {roc_auc_score(targets, ensem_preds):12.4f}")
    print(f"Best threshold {best_th:12.4f}")

    preds = (ensem_preds > best_th).astype(int)

    cm1 = confusion_matrix(targets, preds)
    print('\nConfusion Matrix : \n', cm1)
    precision, recall, f1 = compute_metrics(cm1)
    
    print('\n=============')
    print (f'Precision    : {precision:12.4f}')
    
    print(f'Recall : {recall:12.4f}')
    
    print(f'F1 Score : {f1:12.4f}')
    
    total1=sum(sum(cm1))

    print('\n=============')
    accuracy1=(cm1[0,0]+cm1[1,1])/total1
    print (f'Accuracy    : {accuracy1:12.4f}')

# Train COVID-19

In [14]:
y = df['label_covid']
pos_scale = (y == 0).sum() / (y == 1).sum()
print(pos_scale)

def get_model():
    # initiate the model
    model = xgb.XGBClassifier(max_depth=6, learning_rate=0.07,
        scale_pos_weight=pos_scale,
        n_estimators=200,
        subsample=1,
        colsample_bytree=1,
        eta=1, objective='binary:logistic',
        eval_metric='auc'
    )
    return model

2.786127167630058


In [15]:
 if not os.path.exists(OUTPUT_DIR + 'df_5fold.csv'):
    folds = df.copy()
    Fold = StratifiedKFold(n_splits=fold_num, shuffle=True, random_state=seed)
    for n, (train_index, val_index) in enumerate(Fold.split(folds, folds['label_covid'])):
        folds.loc[val_index, 'fold'] = int(n)
    folds['fold'] = folds['fold'].astype(int)
    folds.to_csv(OUTPUT_DIR + 'df_5fold.csv', index=False)
else:
    folds = pd.read_csv(OUTPUT_DIR + 'df_5fold.csv')

In [16]:
X = merge_feature([X_trill_features])

In [17]:
targets = []
preds = []
aucs = []

for fold in range(5):
    train_idx = folds['fold'] != fold
    valid_idx = folds['fold'] == fold
    X_train = X[train_idx]
    y_train = y[train_idx]
    X_val = X[valid_idx]
    y_val = y[valid_idx]

    targets.append(y_val)

    model = get_model()
    model.fit(X_train, y_train)

    pred = model.predict_proba(X_val)

    pred = np.array(pred)[:,1]
    preds.append(pred)
    auc = roc_auc_score(y_val, pred)
    aucs.append(auc)
    print(auc)
    del model

targets = np.concatenate(targets)
preds = np.concatenate(preds)

print("(!) cv5 AUC ", np.mean(aucs), np.std(aucs))
evaluate(preds, targets)

0.8351190476190476
0.8233836449650823
0.9134189381992941
0.9015544041450778
0.9242321844259218
(!) cv5 AUC  0.8795416438708848 0.041848691735494

AUC score:       0.8782
Best threshold       0.2000

Confusion Matrix : 
 [[860 104]
 [ 81 265]]

Precision    :       0.7182
Recall :       0.7659
F1 Score :       0.7413

Accuracy    :       0.8588


# Train abnormal

In [18]:
y = df['label_abnormal']
pos_scale = (y == 0).sum() / (y == 1).sum()
print(pos_scale)

def get_model():
    model = xgb.XGBClassifier(
        max_depth=7,
        scale_pos_weight=pos_scale,
        learning_rate=0.3,
        n_estimators=200,
        subsample=1,
        colsample_bytree=1,
        nthread=-1,
        seed=42,
        eval_metric='logloss'
    )
    return model

1.7010309278350515


In [19]:
 if not os.path.exists(OUTPUT_DIR + 'df_abnormal2_5fold.csv'):
    folds = df.copy()
    Fold = StratifiedKFold(n_splits=fold_num, shuffle=True, random_state=seed)
    for n, (train_index, val_index) in enumerate(Fold.split(folds, folds['label_abnormal'])):
        folds.loc[val_index, 'fold'] = int(n)
    folds['fold'] = folds['fold'].astype(int)
    folds.to_csv(OUTPUT_DIR + 'df_abnormal2_5fold.csv', index=False)
else:
    folds = pd.read_csv(OUTPUT_DIR + 'df_abnormal2_5fold.csv')

In [20]:
X = merge_feature([X_trill_features])

In [21]:
targets = []
preds = []
aucs = []

for fold in range(5):
    train_idx = folds['fold'] != fold
    valid_idx = folds['fold'] == fold
    X_train = X[train_idx]
    y_train = y[train_idx]
    X_val = X[valid_idx]
    y_val = y[valid_idx]

    targets.append(y_val)

    model = get_model()
    model.fit(X_train, y_train)

    pred = model.predict_proba(X_val)

    pred = np.array(pred)[:,1]
    preds.append(pred)
    auc = roc_auc_score(y_val, pred)
    aucs.append(auc)
    print(auc)
    del model

targets = np.concatenate(targets)
preds = np.concatenate(preds)

print("(!) cv5 AUC ", np.mean(aucs), np.std(aucs))
evaluate(preds, targets)

0.8449859418931583
0.802061855670103
0.7716963448922212
0.7973133395813807
0.8056232427366449
(!) cv5 AUC  0.8043361449547015 0.02355381645092078

AUC score:       0.8045
Best threshold       0.2700

Confusion Matrix : 
 [[667 158]
 [154 331]]

Precision    :       0.6769
Recall :       0.6825
F1 Score :       0.6797

Accuracy    :       0.7618


# Train symptom

In [22]:
y = df['label_symptom']
pos_scale = (y == 0).sum() / (y == 1).sum()
print(pos_scale)

def get_model():
    model = xgb.XGBClassifier(
        max_depth=7,
        scale_pos_weight=pos_scale,
        learning_rate=0.3,
        n_estimators=200,
        subsample=1,
        colsample_bytree=1,
        nthread=-1,
        seed=42,
        eval_metric='logloss'
    )
    return model

2.0324074074074074


In [23]:
 if not os.path.exists(OUTPUT_DIR + 'df_symptom_5fold.csv'):
    folds = df.copy()
    Fold = StratifiedKFold(n_splits=fold_num, shuffle=True, random_state=seed)
    for n, (train_index, val_index) in enumerate(Fold.split(folds, folds['label_symptom'])):
        folds.loc[val_index, 'fold'] = int(n)
    folds['fold'] = folds['fold'].astype(int)
    folds.to_csv(OUTPUT_DIR + 'df_symptom_5fold.csv', index=False)
else:
    folds = pd.read_csv(OUTPUT_DIR + 'df_symptom_5fold.csv')

In [24]:
X = merge_feature([X_trill_features])

In [25]:
targets = []
preds = []
aucs = []

for fold in range(5):
    train_idx = folds['fold'] != fold
    valid_idx = folds['fold'] == fold
    X_train = X[train_idx]
    y_train = y[train_idx]
    X_val = X[valid_idx]
    y_val = y[valid_idx]

    targets.append(y_val)

    model = get_model()
    model.fit(X_train, y_train)

    pred = model.predict_proba(X_val)

    pred = np.array(pred)[:,1]
    preds.append(pred)
    auc = roc_auc_score(y_val, pred)
    aucs.append(auc)
    print(auc)
    del model

targets = np.concatenate(targets)
preds = np.concatenate(preds)

print("(!) cv5 AUC ", np.mean(aucs), np.std(aucs))
evaluate(preds, targets)

0.7955336617405584
0.8041379310344827
0.7949260042283297
0.800673890063425
0.8402484143763214
(!) cv5 AUC  0.8071039802886235 0.01691669229905609

AUC score:       0.8082
Best threshold       0.2400

Confusion Matrix : 
 [[749 129]
 [152 280]]

Precision    :       0.6846
Recall :       0.6481
F1 Score :       0.6659

Accuracy    :       0.7855
