In [1]:
import numpy as np
import pandas as pd
import os
import re
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import IPython.display as ipd
from tqdm import tqdm

In [2]:
from hnr import *
from jitters import *
from shimmers import *
from sound import Waveform

In [3]:
from sklearn.model_selection import cross_validate
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.utils import shuffle
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score
from mlxtend.plotting import plot_confusion_matrix
from sklearn.metrics import confusion_matrix

In [4]:
data_path = "/home/bmis/Documents/AI-Workspace/ALS/bmis_data/All"

control_path = 'Control'
als_with_dysarthria_path = 'ALSwDysarthria'
als_without_dysarthria_path = 'ALSwoDysarthria'

control_dir = os.path.join(data_path, control_path)
control = os.listdir(control_dir)[0]

als_with_dir = os.path.join(data_path, als_with_dysarthria_path)
als_with_dys = os.listdir(als_with_dir)[1]

als_without_dir = os.path.join(data_path, als_without_dysarthria_path)
als_without_dys = os.listdir(als_without_dir)[1]

In [5]:
def extract_td_feature(audio_path):
    td_features = []
    _, sample_rate = librosa.load(audio_path)
    #print(audio_path)
    sound = Waveform(path=audio_path, sample_rate=sample_rate)

    td_features.append(sound.jitters()['localabsoluteJitter'])
    td_features.append(sound.jitters()['localJitter'])
    td_features.append(sound.jitters()['rapJitter'])
    td_features.append(sound.jitters()['ppq5Jitter'])

    td_features.append(sound.shimmers()['localShimmer'])
    td_features.append(sound.shimmers()['localdbShimmer'])
    td_features.append(sound.shimmers()['apq3Shimmer'])
    td_features.append(sound.shimmers()['apq5Shimmer'])
    td_features.append(sound.shimmers()['apq11Shimmer'])

    _ =(sound.hnr())
    hnr = sound.hnr()
    td_features.append(hnr)

    return np.array(td_features)

def get_all_td_features(data_path, label):
    data = []
    labels = []

    for file in tqdm(os.listdir(data_path)):
        #print(f'loading file: {file}')
        try:
            feature = extract_td_feature(os.path.join(data_path, file))
            data.append(feature)
            labels.append(label)
        except:
            print("Error encountered while parsing file: ", file)
            continue
    return data, labels

## 2 Class Classification

In [6]:
control_data, control_labels = get_all_td_features(control_dir, 0)
als_with_data, als_with_labels = get_all_td_features(als_with_dir, 1)
als_without_data, als_without_labels = get_all_td_features(als_without_dir, 1)

X = np.concatenate([control_data, als_with_data, als_without_data], axis=0)
y = np.concatenate([control_labels, als_with_labels, als_without_labels], axis=0)
X = np.array(X)


# Data Cleaning

X[X == None] = np.nan
X = np.nan_to_num(X, nan=0.0)
df = pd.DataFrame(X)
df.fillna(0.0, inplace=True)
X = df.to_numpy()

X, y = shuffle(X, y, random_state=42)
#X = X.reshape(-1,10,1)

  r_x = ffts_outputs[0] / ffts_outputs[1]
  r_x = ffts_outputs[0] / ffts_outputs[1]
  a = op(a[slice1], a[slice2])
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)
  thres = thres * (np.max(y) - np.min(y)) + np.min(y)
100%|██████████| 199/199 [02:06<00:00,  1.58it/s]
100%|██████████| 291/291 [05:21<00:00,  1.11s/it]
100%|██████████| 176/176 [01:53<00:00,  1.55it/s]


In [7]:
scorers = ['accuracy', 'precision', 'recall', 'f1']

## Random Forest Classifier

In [8]:
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_cv_scores = cross_validate(rf_classifier, X, y, cv=10)

print("Random Forest Classifier:")
print(f"Cross-validation scores: {rf_cv_scores['test_score']}")
print(f"Mean accuracy: {rf_cv_scores['test_score'].mean()}\n")


scores = cross_validate(rf_classifier, X, y, cv=10, scoring=scorers)
print("Cross-validation scores for 2 Class RF:")
for metric_name, result in scores.items():
    print(f"{metric_name}: {result.mean():.4f}")

Random Forest Classifier:
Cross-validation scores: [0.70149254 0.70149254 0.80597015 0.76119403 0.70149254 0.73134328
 0.72727273 0.71212121 0.60606061 0.74242424]
Mean accuracy: 0.7190863862505654
Cross-validation scores for 2 Class RF:
fit_time: 0.1862
score_time: 0.0097
test_accuracy: 0.7191
test_precision: 0.7572
test_recall: 0.8842
test_f1: 0.8153


## Logistic Regression

In [9]:
log_reg = LogisticRegression(max_iter=1000, random_state=42)
log_reg_cv_scores = cross_validate(log_reg, X, y, cv=10)

print("Logistic Regression:")
print(f"Cross-validation scores: {log_reg_cv_scores['test_score']}")
print(f"Mean accuracy: {log_reg_cv_scores['test_score'].mean()}\n")

scores = cross_validate(log_reg, X, y, cv=10, scoring=scorers)
print("Cross-validation scores for 2 Class LR:")
for metric_name, result in scores.items():
    print(f"{metric_name}: {result.mean():.4f}")

Logistic Regression:
Cross-validation scores: [0.74626866 0.68656716 0.74626866 0.65671642 0.68656716 0.67164179
 0.66666667 0.68181818 0.68181818 0.74242424]
Mean accuracy: 0.6966757123473541
Cross-validation scores for 2 Class LR:
fit_time: 0.0065
score_time: 0.0047
test_accuracy: 0.6967
test_precision: 0.7145
test_recall: 0.9463
test_f1: 0.8139


## Linear Discriminant Analysis

In [10]:
lda = LinearDiscriminantAnalysis()
lda_cv_scores = cross_validate(lda, X, y, cv=10)

print("Linear Discriminant Analysis:")
print(f"Cross-validation scores: {lda_cv_scores['test_score']}")
print(f"Mean accuracy: {lda_cv_scores['test_score'].mean()}\n")

scores = cross_validate(lda, X, y, cv=10, scoring=scorers)
print("Cross-validation scores for 2 Class LDA:")
for metric_name, result in scores.items():
    print(f"{metric_name}: {result.mean():.4f}")

Linear Discriminant Analysis:
Cross-validation scores: [0.74626866 0.76119403 0.76119403 0.76119403 0.76119403 0.73134328
 0.71212121 0.77272727 0.68181818 0.77272727]
Mean accuracy: 0.7461781999095431

Cross-validation scores for 2 Class LDA:
fit_time: 0.0010
score_time: 0.0043
test_accuracy: 0.7462
test_precision: 0.7562
test_recall: 0.9420
test_f1: 0.8387


## Multi-layer Perceptron

In [11]:
mlp_classifier = MLPClassifier(hidden_layer_sizes=(50, 50), max_iter=500, random_state=42)
mlp_cv_scores = cross_validate(mlp_classifier, X, y, cv=10)

print("Neural Network (MLPClassifier):")
print(f"Cross-validation scores: {mlp_cv_scores['test_score']}")
print(f"Mean accuracy: {mlp_cv_scores['test_score'].mean()}\n")

scores = cross_validate(mlp_classifier, X, y, cv=10, scoring=scorers)
print("Cross-validation scores for 2 Class MLP:")
for metric_name, result in scores.items():
    print(f"{metric_name}: {result.mean():.4f}")

Neural Network (MLPClassifier):
Cross-validation scores: [0.68656716 0.68656716 0.76119403 0.65671642 0.65671642 0.67164179
 0.66666667 0.6969697  0.68181818 0.72727273]
Mean accuracy: 0.68921302578019
Cross-validation scores for 2 Class MLP:
fit_time: 0.8179
score_time: 0.0091
test_accuracy: 0.6892
test_precision: 0.7138
test_recall: 0.9294
test_f1: 0.8073


## Support Vector Machine with RBF Kernel

In [12]:
svm_rbf = SVC(kernel='rbf', gamma='scale', random_state=42)
svm_rbf_cv_scores = cross_validate(svm_rbf, X, y, cv=10)

print("Support Vector Machine with RBF Kernel:")
print(f"Cross-validation scores: {svm_rbf_cv_scores['test_score']}")
print(f"Mean accuracy: {svm_rbf_cv_scores['test_score'].mean()}\n")

scores = cross_validate(svm_rbf, X, y, cv=10, scoring=scorers)
print("Cross-validation scores for 2 Class SVM RBF:")
for metric_name, result in scores.items():
    print(f"{metric_name}: {result.mean():.4f}")

Support Vector Machine with RBF Kernel:
Cross-validation scores: [0.70149254 0.70149254 0.70149254 0.70149254 0.70149254 0.70149254
 0.71212121 0.6969697  0.6969697  0.6969697 ]
Mean accuracy: 0.7011985526910901
Cross-validation scores for 2 Class SVM RBF:
fit_time: 0.0109
score_time: 0.0062
test_accuracy: 0.7012
test_precision: 0.7012
test_recall: 1.0000
test_f1: 0.8244


## Support Vector Machine with Linear Kernel

In [13]:
svm_linear = SVC(kernel='linear', random_state=42)
svm_linear_cv_scores = cross_validate(svm_linear, X, y, cv=10)

print("Support Vector Machine with Linear Kernel:")
print(f"Cross-validation scores: {svm_linear_cv_scores['test_score']}")
print(f"Mean accuracy: {svm_linear_cv_scores['test_score'].mean()}\n")

scores = cross_validate(svm_linear, X, y, cv=10, scoring=scorers)
print("Cross-validation scores for 2 Class SVM Linear Kernel:")
for metric_name, result in scores.items():
    print(f"{metric_name}: {result.mean():.4f}")

Support Vector Machine with Linear Kernel:
Cross-validation scores: [0.70149254 0.70149254 0.70149254 0.70149254 0.70149254 0.70149254
 0.71212121 0.6969697  0.6969697  0.6969697 ]
Mean accuracy: 0.7011985526910901
Cross-validation scores for 2 Class SVM Linear Kernel:
fit_time: 0.0297
score_time: 0.0050
test_accuracy: 0.7012
test_precision: 0.7012
test_recall: 1.0000
test_f1: 0.8244


## 3 Class Classification

In [14]:
control_data, control_labels = get_all_td_features(control_dir, 0)
als_with_data, als_with_labels = get_all_td_features(als_with_dir, 1)
als_without_data, als_without_labels = get_all_td_features(als_without_dir, 2)


X = np.concatenate([control_data, als_with_data, als_without_data], axis=0)
y = np.concatenate([control_labels, als_with_labels, als_without_labels], axis=0)
X = np.array(X)


# Data Cleaning

X[X == None] = np.nan
X = np.nan_to_num(X, nan=0.0)
df = pd.DataFrame(X)
df.fillna(0.0, inplace=True)
X = df.to_numpy()

X, y = shuffle(X, y, random_state=42)
#X = X.reshape(-1,10,1)

  r_x = ffts_outputs[0] / ffts_outputs[1]
  r_x = ffts_outputs[0] / ffts_outputs[1]
  a = op(a[slice1], a[slice2])
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)
  thres = thres * (np.max(y) - np.min(y)) + np.min(y)
100%|██████████| 199/199 [02:00<00:00,  1.65it/s]
100%|██████████| 291/291 [05:20<00:00,  1.10s/it]
100%|██████████| 176/176 [01:53<00:00,  1.56it/s]


In [15]:
scorers = ['precision_weighted', 'recall_weighted', 'f1_weighted']

## Random Forest Classifier

In [16]:
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_cv_scores = cross_validate(rf_classifier, X, y, cv=10)

print("Random Forest Classifier:")
print(f"Cross-validation scores: {rf_cv_scores['test_score']}")
print(f"Mean accuracy: {rf_cv_scores['test_score'].mean()}\n")

scores = cross_validate(rf_classifier, X, y, cv=10, scoring=scorers)
print("Cross-validation scores for 3 Class RF:")
for metric_name, result in scores.items():
    print(f"{metric_name}: {result.mean():.4f}")

Random Forest Classifier:
Cross-validation scores: [0.44776119 0.53731343 0.58208955 0.52238806 0.47761194 0.47761194
 0.54545455 0.60606061 0.5        0.59090909]
Mean accuracy: 0.5287200361827227
Cross-validation scores for 3 Class RF:
fit_time: 0.1893
score_time: 0.0090
test_precision_weighted: 0.5225
test_recall_weighted: 0.5287
test_f1_weighted: 0.5137


## Logistic Regression

In [17]:
log_reg = LogisticRegression(max_iter=1000, random_state=42)
log_reg_cv_scores = cross_validate(log_reg, X, y, cv=10)

print("Logistic Regression:")
print(f"Cross-validation scores: {log_reg_cv_scores['test_score']}")
print(f"Mean accuracy: {log_reg_cv_scores['test_score'].mean()}\n")

scores = cross_validate(log_reg, X, y, cv=10, scoring=scorers)
print("Cross-validation scores for 3 Class LR:")
for metric_name, result in scores.items():
    print(f"{metric_name}: {result.mean():.4f}")

Logistic Regression:
Cross-validation scores: [0.43283582 0.49253731 0.52238806 0.40298507 0.46268657 0.44776119
 0.40909091 0.5        0.45454545 0.5       ]
Mean accuracy: 0.462483039348711


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cross-validation scores for 3 Class LR:
fit_time: 0.0226
score_time: 0.0040
test_precision_weighted: 0.3714
test_recall_weighted: 0.4625
test_f1_weighted: 0.3845


  _warn_prf(average, modifier, msg_start, len(result))


## Linear Discriminant Analysis

In [18]:
lda = LinearDiscriminantAnalysis()
lda_cv_scores = cross_validate(lda, X, y, cv=10)

print("Linear Discriminant Analysis:")
print(f"Cross-validation scores: {lda_cv_scores['test_score']}")
print(f"Mean accuracy: {lda_cv_scores['test_score'].mean()}\n")

scores = cross_validate(lda, X, y, cv=10, scoring=scorers)
print("Cross-validation scores for 3 Class LDA:")
for metric_name, result in scores.items():
    print(f"{metric_name}: {result.mean():.4f}")

Linear Discriminant Analysis:
Cross-validation scores: [0.46268657 0.55223881 0.53731343 0.49253731 0.53731343 0.59701493
 0.46969697 0.5        0.5        0.48484848]
Mean accuracy: 0.5133649932157395

Cross-validation scores for 3 Class LDA:
fit_time: 0.0015
score_time: 0.0053
test_precision_weighted: 0.5330
test_recall_weighted: 0.5134
test_f1_weighted: 0.4711


## Multi-layer Perceptron

In [19]:
mlp_classifier = MLPClassifier(hidden_layer_sizes=(50, 50), max_iter=500, random_state=42)
mlp_cv_scores = cross_validate(mlp_classifier, X, y, cv=10)

print("Neural Network (MLPClassifier):")
print(f"Cross-validation scores: {mlp_cv_scores['test_score']}")
print(f"Mean accuracy: {mlp_cv_scores['test_score'].mean()}\n")

scores = cross_validate(mlp_classifier, X, y, cv=10, scoring=scorers)
print("Cross-validation scores for 3 Class MLP:")
for metric_name, result in scores.items():
    print(f"{metric_name}: {result.mean():.4f}")

Neural Network (MLPClassifier):
Cross-validation scores: [0.43283582 0.49253731 0.49253731 0.3880597  0.46268657 0.44776119
 0.42424242 0.51515152 0.43939394 0.46969697]
Mean accuracy: 0.456490275893261


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cross-validation scores for 3 Class MLP:
fit_time: 1.0667
score_time: 0.0100
test_precision_weighted: 0.3399
test_recall_weighted: 0.4565
test_f1_weighted: 0.3782


  _warn_prf(average, modifier, msg_start, len(result))


## Support Vector Machine with RBF Kernel

In [20]:
svm_rbf = SVC(kernel='rbf', gamma='scale', random_state=42)
svm_rbf_cv_scores = cross_validate(svm_rbf, X, y, cv=10)

print("Support Vector Machine with RBF Kernel:")
print(f"Cross-validation scores: {svm_rbf_cv_scores['test_score']}")
print(f"Mean accuracy: {svm_rbf_cv_scores['test_score'].mean()}\n")

scores = cross_validate(svm_rbf, X, y, cv=10, scoring=scorers)
print("Cross-validation scores for 3 Class SVM with RBF:")
for metric_name, result in scores.items():
    print(f"{metric_name}: {result.mean():.4f}")

Support Vector Machine with RBF Kernel:
Cross-validation scores: [0.43283582 0.44776119 0.43283582 0.41791045 0.43283582 0.44776119
 0.40909091 0.42424242 0.43939394 0.43939394]
Mean accuracy: 0.4324061510628675

Cross-validation scores for 3 Class SVM with RBF:
fit_time: 0.0149
score_time: 0.0053
test_precision_weighted: 0.2797
test_recall_weighted: 0.4324
test_f1_weighted: 0.3030


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Support Vector Machine with Linear Kernel

In [21]:
svm_linear = SVC(kernel='linear', random_state=42)
svm_linear_cv_scores = cross_validate(svm_linear, X, y, cv=10)

print("Support Vector Machine with Linear Kernel:")
print(f"Cross-validation scores: {svm_linear_cv_scores['test_score']}")
print(f"Mean accuracy: {svm_linear_cv_scores['test_score'].mean()}\n")

scores = cross_validate(svm_linear, X, y, cv=10, scoring=scorers)
print("Cross-validation scores for 3 Class SVM with Linear Kernel:")
for metric_name, result in scores.items():
    print(f"{metric_name}: {result.mean():.4f}")

Support Vector Machine with Linear Kernel:
Cross-validation scores: [0.41791045 0.44776119 0.47761194 0.40298507 0.46268657 0.40298507
 0.42424242 0.48484848 0.43939394 0.46969697]
Mean accuracy: 0.4430122116689281


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Cross-validation scores for 3 Class SVM with Linear Kernel:
fit_time: 0.0568
score_time: 0.0060
test_precision_weighted: 0.3385
test_recall_weighted: 0.4430
test_f1_weighted: 0.3504


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
