In [2]:
import os.path
__file__ = os.path.abspath('')
import sys
from pathlib import Path
project_folder = Path(__file__).absolute()
sys.path.append(str(project_folder))

import librosa
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy as sp

from audiot.audio_features import AudioFeatures, calc_log_mel_energy_features
from audiot.audio_signal import AudioSignal
from audiot.audio_labels import load_labels

In [3]:
data = {}
# test_data contains all cleaned label
# samll set contains only 10 sets 
for filename in os.listdir(project_folder / "small_set"):
    if 'ch2' not in filename:# mac DS store 
        continue
    id = filename[:filename.rindex('ch2')-1]
    type = filename.split('.')[-1]
    if id not in data:
        data[id] = {}
    if type == "flac":
        data[id]["features"] = project_folder / "small_set" / filename
    else:
        data[id]["labels"] = project_folder / "small_set" / filename

In [4]:
import warnings
warnings.filterwarnings('ignore')

num_files = int(len(data))
X = np.zeros((2401*num_files,13))# test num_files test files 
y = np.zeros((2401*num_files,1))
i = 0 
for d in data.values():
    x_path, y_path = d["features"], d["labels"]
    signal = AudioSignal.from_file(x_path)
    label = load_labels(y_path)
    features = calc_log_mel_energy_features(signal)
    features.event_names = ["cough"]
    features.match_labels(label)
    X[2401*i:2401*(i+1),:] = features.features    
    y[2401*i:2401*(i+1)] = features.true_events
    i = i+1 

In [5]:
#from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.metrics import fbeta_score
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, train_size=0.70, random_state=555)

In [6]:
clf = SVC(kernel='rbf')
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
matrix = confusion_matrix(y_test, y_pred)
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
F5_score = fbeta_score(y_test, y_pred, beta=0.5)
F1_score = fbeta_score(y_test, y_pred, beta=1)
print("===================================")
print("SVM confusion matrix")
print(matrix)

SVM confusion matrix
[[715   0]
 [  6   0]]


In [7]:

from sklearn.metrics import classification_report
# Set the parameters by cross-validation,
#code from site https://scikit-learn.org/stable/auto_examples/model_selection/plot_grid_search_digits.html
tuning_parameters = [{'kernel': ['rbf'], 'gamma': [1e-2, 1e-1],
                     'C': [1, 10, 100, 1000]},
                    {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]

scores = ['precision', 'recall']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(
        SVC(), tuning_parameters, scoring='%s_macro' % score
    )
    clf.fit(x_train, y_train)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))
    print()

    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = y_test, clf.predict(x_test)
    print(classification_report(y_true, y_pred))
    print()

# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}

Grid scores on development set:

0.492 (+/-0.001) for {'C': 1, 'gamma': 0.01, 'kernel': 'rbf'}
0.492 (+/-0.001) for {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}
0.492 (+/-0.001) for {'C': 10, 'gamma': 0.01, 'kernel': 'rbf'}
0.880 (+/-0.204) for {'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}
0.710 (+/-0.391) for {'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}
0.770 (+/-0.134) for {'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}
0.844 (+/-0.267) for {'C': 1000, 'gamma': 0.01, 'kernel': 'rbf'}
0.715 (+/-0.061) for {'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'}
0.492 (+/-0.001) for {'C': 1, 'kernel': 'linear'}
0.492 (+/-0.001) for {'C': 10, 'kernel': 'linear'}
0.492 (+/-0.001) for {'C': 100, 'kernel': 'linear'}
0.492 (+/-0.001) for {'C': 1000, 'kernel': 'linear'}

Detailed classification report:

The model is trained on the full development set.
The scores are computed on the full evaluat

In [9]:
matrix = confusion_matrix(y_test, y_pred)
matrix

array([[707,   8],
       [  4,   2]])

In [None]:
## Jun 14 update
#F0_5 =((1+0.5^2) * Precision * Recall) / (0.5^2 * Precision + Recall)

In [10]:
0.5**2

0.25