In [None]:
import numpy as np 
import pandas as pd 
import os
import re
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import librosa.display
from sklearn.preprocessing import minmax_scale
import IPython.display as ipd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler


In [7]:
df = pd.read_csv('/kaggle/input/dysarthria-detection/torgo_data/data.csv')
df

Unnamed: 0,is_dysarthria,gender,filename
0,non_dysarthria,female,torgo_data/non_dysarthria_female/FC03_Session2...
1,non_dysarthria,female,torgo_data/non_dysarthria_female/FC02_Session3...
2,non_dysarthria,female,torgo_data/non_dysarthria_female/FC02_Session3...
3,non_dysarthria,female,torgo_data/non_dysarthria_female/FC03_Session2...
4,non_dysarthria,female,torgo_data/non_dysarthria_female/FC03_Session1...
...,...,...,...
1995,dysarthria,male,torgo_data/dysarthria_male/M03_Session2_0144.wav
1996,dysarthria,male,torgo_data/dysarthria_male/M02_Session1_0005.wav
1997,dysarthria,male,torgo_data/dysarthria_male/M03_Session2_0040.wav
1998,dysarthria,male,torgo_data/dysarthria_male/M03_Session2_0260.wav


In [8]:
df['filename'] = df['filename'].apply(lambda x: os.path.join('/kaggle/input/dysarthria-detection',x))

In [6]:
def extract_mfcc_features(signal, fs, n_mfcc=52):
    preemphasized_signal = librosa.effects.preemphasis(signal)
    mfccs = librosa.feature.mfcc(y=preemphasized_signal, sr=fs, n_mfcc=n_mfcc)
    return np.mean(mfccs, axis=1)


def feature_extraction_only_mfcc(df):
    features = []
    for i, record in tqdm(df.iterrows(), total=df.shape[0]):
        try:
            speech, fs = librosa.load(record['filename'])
            mfcc_features = extract_mfcc_features(speech, fs)  # (52 features)
            all_features = np.concatenate([mfcc_features])
            features.append(np.append(all_features, [record['is_dysarthria'], record['gender']]))  
        except Exception as e:
            print(f"Error processing {record['filename']}: {e}")

    column_names = (
        [f"MFCC_{i}" for i in range(52)] + ['class', 'gender'])
    return pd.DataFrame(features, columns=column_names)


In [7]:
data_with_feat_mfcc = feature_extraction_only_mfcc(df)
data_with_feat_mfcc['class'] = data_with_feat_mfcc['class'].replace('non_dysarthria',0)
data_with_feat_mfcc['class'] = data_with_feat_mfcc['class'].replace('dysarthria',1)
data_with_feat_mfcc['gender'] = data_with_feat_mfcc['gender'].replace('male', 1)
data_with_feat_mfcc['gender'] = data_with_feat_mfcc['gender'].replace('female', 0)
data_with_feat_mfcc

  speech, fs = librosa.load(record['filename'])
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
 30%|███       | 610/2000 [00:08<00:14, 95.39it/s]

Error processing /kaggle/input/dysarthria-detection/torgo_data/dysarthria_female/F01_Session1_0068.wav: 


100%|██████████| 2000/2000 [00:24<00:00, 83.24it/s]
  data_with_feat_mfcc['class'] = data_with_feat_mfcc['class'].replace('dysarthria',1)
  data_with_feat_mfcc['gender'] = data_with_feat_mfcc['gender'].replace('female', 0)


Unnamed: 0,MFCC_0,MFCC_1,MFCC_2,MFCC_3,MFCC_4,MFCC_5,MFCC_6,MFCC_7,MFCC_8,MFCC_9,...,MFCC_44,MFCC_45,MFCC_46,MFCC_47,MFCC_48,MFCC_49,MFCC_50,MFCC_51,class,gender
0,-319.7045,35.161507,-66.34302,51.268147,-50.792416,34.19954,-29.19774,17.997763,-9.4621935,-0.5122181,...,-4.5058756,-1.5363845,-5.0564704,-3.4387124,-4.2227654,-2.2731867,0.4243103,-0.49626946,0,0
1,-395.44254,40.966072,-64.21012,49.29909,-52.021427,34.75533,-20.61536,25.536705,-7.5747924,4.6627607,...,-6.344143,-4.169222,-6.5706644,-2.3804123,-3.0671916,-2.7551515,0.38413244,-0.8819986,0,0
2,-400.7094,45.508183,-73.22846,57.570812,-59.73956,40.009903,-23.322557,26.447178,-7.871122,5.9670362,...,-7.1083784,-3.794058,-6.653496,-3.121459,-4.4126153,-2.3347008,-0.13959113,-0.8432585,0,0
3,-303.36227,42.922634,-77.73571,44.11482,-58.83759,34.619595,-27.90659,18.597755,-10.9610815,4.2977843,...,-6.555074,-1.2236726,-3.9311428,-1.7308125,-1.85476,-2.1504872,0.38307458,-1.4279135,0,0
4,-380.4776,41.505455,-70.097115,60.845333,-55.586807,44.16475,-29.470608,26.530014,-8.528628,3.3341653,...,-6.7447896,-2.504473,-7.4847293,-1.6244006,-3.1706736,-2.378174,0.8780274,0.091998786,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1994,-402.0437,48.89409,-49.44366,37.44017,-35.224632,23.513338,-17.929684,8.643148,-9.682321,-4.864476,...,-1.2102699,0.2593692,-0.30640313,4.1314764,4.4224787,2.0771952,3.580518,4.074891,1,1
1995,-567.24225,53.42506,-24.851572,5.648115,-42.638477,14.371315,-28.034782,12.563507,-5.3071346,-2.4107144,...,-0.19318683,5.2797875,2.5829372,5.5325646,2.6679924,0.4258839,1.1258968,0.17784452,1,1
1996,-472.70474,55.216194,-48.241093,38.618504,-31.28108,22.451336,-14.904069,16.969027,-5.566646,1.9870967,...,-3.0755556,-0.32984915,-2.210794,3.444888,2.3652935,2.474335,5.7890086,3.5479848,1,1
1997,-406.1352,54.426342,-61.200016,50.883335,-42.66797,20.043901,-20.154718,17.3035,-13.471844,-4.6676664,...,-0.9948297,2.6853592,1.079636,2.7016742,1.9857001,3.738986,3.9398618,2.0509567,1,1


In [None]:
X_mfcc = data_with_feat_mfcc.drop(columns = ['class'])
X_mfcc.columns = X_mfcc.columns.astype(str)
y_mfcc = data_with_feat_mfcc['class']
X_mfcc = X_mfcc.astype(float)

X_train_mfcc, X_test_mfcc, y_train_mfcc, y_test_mfcc = train_test_split(X_mfcc, y_mfcc, test_size=0.2, stratify=y_mfcc, random_state = 42)

X_train_mfcc.replace([np.inf, -np.inf], np.nan, inplace=True)
X_train_mfcc.fillna(X_train_mfcc.mean(), inplace=True)
X_train_mfcc = X_train_mfcc.loc[:, X_train_mfcc.nunique() > 1] 
X_train_mfcc = X_train_mfcc.astype(float)
y_train_mfcc = y_train_mfcc.astype(int)

scaler = StandardScaler()
X_train_mfcc = scaler.fit_transform(X_train_mfcc)
X_test_mfcc = scaler.transform(X_test_mfcc)

In [9]:
from sklearn.model_selection import GridSearchCV

param_grid=[
    {'C':[0.5,1,10,100, 1000],
     'gamma':[10,1,0.1,0.001,0.00001, 0.000001],
     'kernel':['rbf'],
    }
]

optional_params=GridSearchCV(SVC(),param_grid,cv=5,scoring='accuracy',verbose=0)
optional_params.fit(X_train_mfcc,y_train_mfcc)
print("Best parameters for original dataset:")
print(optional_params.best_params_)

Best parameters for original dataset:
{'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}


In [10]:
model_mfcc = SVC(kernel='rbf', gamma=0.1, C=10)
model_mfcc.fit(X_train_mfcc, y_train_mfcc)

from sklearn.metrics import accuracy_score
predictions = model_mfcc.predict(X_test_mfcc) 
print(100*accuracy_score(y_test_mfcc, predictions), "% accuracy")

98.25 % accuracy


# Only Mel RCC (LPC-residual)

https://www.researchgate.net/publication/272912634_Feature_Extraction_Using_LPC-Residual_and_MelFrequency_Cepstral_Coefficients_in_Forensic_Speaker_Recognition

In [11]:
from scipy import signal
def lp_residual(signal_data, order=10):
    preemphasized_signal = librosa.effects.preemphasis(signal_data)
    a = librosa.lpc(preemphasized_signal, order=order)
    residual = signal.lfilter([1] + -1 * a[1:].tolist(), [1], preemphasized_signal)
    return residual

def extract_rcc_features(signal_data, fs, n_rcc=52):
    residual = lp_residual(signal_data)
    # hilbert_transformed = np.abs(signal.hilbert(residual))
    rccs = librosa.feature.mfcc(y=residual, sr=fs, n_mfcc=n_rcc)
    return np.mean(rccs, axis=1)
    

def feature_extraction_only_rcc(df):
    features = []
    for i, record in tqdm(df.iterrows(), total=df.shape[0]):
        try:
            speech, fs = librosa.load(record['filename'])
            rcc_features = extract_rcc_features(speech, fs)  # (52 features)
            all_features = np.concatenate([rcc_features])
            features.append(np.append(all_features, [record['is_dysarthria'], record['gender']]))  
        except Exception as e:
            print(f"Error processing {record['filename']}: {e}")
    
    column_names = (
        [f"RCC_{i}" for i in range(52)] + ['class', 'gender'])
    return pd.DataFrame(features, columns=column_names)

data_with_feat_rcc = feature_extraction_only_rcc(df)
data_with_feat_rcc['class'] = data_with_feat_rcc['class'].replace('non_dysarthria', 0)
data_with_feat_rcc['class'] = data_with_feat_rcc['class'].replace('dysarthria', 1)
data_with_feat_rcc['gender'] = data_with_feat_rcc['gender'].replace('male', 1)
data_with_feat_rcc['gender'] = data_with_feat_rcc['gender'].replace('female', 0)

X_rcc = data_with_feat_rcc.drop(columns=['class'])
X_rcc.columns = X_rcc.columns.astype(str)
y_rcc = data_with_feat_rcc['class']
X_rcc = X_rcc.astype(float)

X_train_rcc, X_test_rcc, y_train_rcc, y_test_rcc = train_test_split(X_rcc, y_rcc, test_size=0.2, stratify=y_rcc, random_state=42)

X_train_rcc.replace([np.inf, -np.inf], np.nan, inplace=True)
X_train_rcc.fillna(X_train_rcc.mean(), inplace=True)
X_train_rcc = X_train_rcc.loc[:, X_train_rcc.nunique() > 1] 
X_train_rcc = X_train_rcc.astype(float)
y_train_rcc = y_train_rcc.astype(int)


# scaler = StandardScaler()
# X_train_rcc = scaler.fit_transform(X_train_rcc)
# X_test_rcc = scaler.transform(X_test_rcc)

param_grid = [
    {'C': [0.5, 1, 10, 100, 1000],
     'gamma': [10, 1, 0.1, 0.001, 0.00001, 0.000001],
     'kernel': ['rbf'],
    }
]

optional_params = GridSearchCV(SVC(), param_grid, cv=5, scoring='accuracy', verbose=0)
optional_params.fit(X_train_rcc, y_train_rcc)
print("Best parameters for original dataset:")
print(optional_params.best_params_)

  speech, fs = librosa.load(record['filename'])
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
 30%|███       | 607/2000 [00:11<00:22, 62.06it/s]

Error processing /kaggle/input/dysarthria-detection/torgo_data/dysarthria_female/F01_Session1_0068.wav: 


100%|██████████| 2000/2000 [00:36<00:00, 55.08it/s]
  data_with_feat_rcc['class'] = data_with_feat_rcc['class'].replace('dysarthria', 1)
  data_with_feat_rcc['gender'] = data_with_feat_rcc['gender'].replace('female', 0)


Best parameters for original dataset:
{'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}


In [24]:
model_rcc = SVC(kernel='rbf', gamma=1e-3, C=100)
model_rcc.fit(X_train_rcc, y_train_rcc)

from sklearn.metrics import accuracy_score
predictions = model_rcc.predict(X_test_rcc)
print(100 * accuracy_score(y_test_rcc, predictions), "% accuracy")

98.5 % accuracy


# MFCC+Mel RCC (52+52)

In [12]:
from scipy import signal
def lp_residual(signal_data, order=10):
    preemphasized_signal = librosa.effects.preemphasis(signal_data)
    a = librosa.lpc(preemphasized_signal, order=order)
    residual = signal.lfilter([1] + -1 * a[1:].tolist(), [1], preemphasized_signal)
    return residual

def extract_mfcc_rcc_features(signal_data, fs, n_mfcc=52, n_rcc=52):
    preemphasized_signal = librosa.effects.preemphasis(signal_data)
    mfccs = librosa.feature.mfcc(y=preemphasized_signal, sr=fs, n_mfcc=n_mfcc)
    residual = lp_residual(signal_data)
    # hilbert_transformed = np.abs(signal.hilbert(residual))
    rccs = librosa.feature.mfcc(y=residual, sr=fs, n_mfcc=n_rcc)
    return np.concatenate([np.mean(mfccs, axis=1), np.mean(rccs, axis=1)])

def feature_extraction_mfcc_rcc(df, n_mfcc=52, n_rcc=52):
    features = []
    for i, record in tqdm(df.iterrows(), total=df.shape[0]):
        try:
            speech, fs = librosa.load(record['filename'])
            combined_features = extract_mfcc_rcc_features(speech, fs, n_mfcc, n_rcc)
            features.append(np.append(combined_features, [record['is_dysarthria'], record['gender']]))  
        except Exception as e:
            print(f"Error processing {record['filename']}: {e}")
    
    column_names = (
        [f"MFCC_{i}" for i in range(n_mfcc)] + [f"RCC_{i}" for i in range(n_rcc)] + ['class', 'gender'])
    return pd.DataFrame(features, columns=column_names)

data_with_feat_mfcc_rcc = feature_extraction_mfcc_rcc(df, n_mfcc=52, n_rcc=52)
data_with_feat_mfcc_rcc['class'] = data_with_feat_mfcc_rcc['class'].replace('non_dysarthria', 0)
data_with_feat_mfcc_rcc['class'] = data_with_feat_mfcc_rcc['class'].replace('dysarthria', 1)
data_with_feat_mfcc_rcc['gender'] = data_with_feat_mfcc_rcc['gender'].replace('male', 1)
data_with_feat_mfcc_rcc['gender'] = data_with_feat_mfcc_rcc['gender'].replace('female', 0)

X_mfcc_rcc = data_with_feat_mfcc_rcc.drop(columns=['class'])
X_mfcc_rcc.columns = X_mfcc_rcc.columns.astype(str)
y_mfcc_rcc = data_with_feat_mfcc_rcc['class']
X_mfcc_rcc = X_mfcc_rcc.astype(float)

X_train_mfcc_rcc, X_test_mfcc_rcc, y_train_mfcc_rcc, y_test_mfcc_rcc = train_test_split(X_mfcc_rcc, y_mfcc_rcc, test_size=0.2, stratify=y_mfcc_rcc, random_state=42)

X_train_mfcc_rcc.replace([np.inf, -np.inf], np.nan, inplace=True)
X_train_mfcc_rcc.fillna(X_train_mfcc_rcc.mean(), inplace=True)
X_train_mfcc_rcc = X_train_mfcc_rcc.loc[:, X_train_mfcc_rcc.nunique() > 1] 
X_train_mfcc_rcc = X_train_mfcc_rcc.astype(float)
y_train_mfcc_rcc = y_train_mfcc_rcc.astype(int)

# scaler = StandardScaler()
# X_train_mfcc_rcc = scaler.fit_transform(X_train_mfcc_rcc)
# X_test_mfcc_rcc = scaler.transform(X_test_mfcc_rcc)


param_grid = [
    {'C': [0.5, 1, 10, 100, 1000],
     'gamma': [10, 1, 0.1, 0.001, 0.00001, 0.000001],
     'kernel': ['rbf'],
    }
]

optional_params = GridSearchCV(SVC(), param_grid, cv=5, scoring='accuracy', verbose=0)
optional_params.fit(X_train_mfcc_rcc, y_train_mfcc_rcc)
print("Best parameters for original dataset:")
print(optional_params.best_params_)

  speech, fs = librosa.load(record['filename'])
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
 30%|███       | 606/2000 [00:14<00:32, 43.37it/s]

Error processing /kaggle/input/dysarthria-detection/torgo_data/dysarthria_female/F01_Session1_0068.wav: 


100%|██████████| 2000/2000 [00:48<00:00, 41.61it/s]
  data_with_feat_mfcc_rcc['class'] = data_with_feat_mfcc_rcc['class'].replace('dysarthria', 1)
  data_with_feat_mfcc_rcc['gender'] = data_with_feat_mfcc_rcc['gender'].replace('female', 0)


Best parameters for original dataset:
{'C': 1000, 'gamma': 1e-05, 'kernel': 'rbf'}


In [16]:
model_mfcc_rcc = SVC(kernel='rbf', gamma=1e-5, C=1000)
model_mfcc_rcc.fit(X_train_mfcc_rcc, y_train_mfcc_rcc)

predictions = model_mfcc_rcc.predict(X_test_mfcc_rcc)
print(100 * accuracy_score(y_test_mfcc_rcc, predictions), "% accuracy")

98.25 % accuracy


# Linear RCCs
Reference: https://dl.acm.org/doi/10.1007/978-3-031-78498-9_22

In [13]:
from scipy.fftpack import dct

def lp_residual(signal_data, order=10):
    preemphasized_signal = librosa.effects.preemphasis(signal_data)
    a = librosa.lpc(preemphasized_signal, order=order)
    residual = signal.lfilter([1] + -1 * a[1:].tolist(), [1], preemphasized_signal)
    return residual

def extract_linear_lrcc(signal_data, fs, n_lrcc=52, n_filters=52):
    residual = lp_residual(signal_data)
    spectrum = np.abs(np.fft.rfft(residual))
    freqs = np.linspace(0, fs / 2, n_filters + 2)  # Linear spaced frequencies
    bins = np.floor((len(spectrum) - 1) * freqs / (fs / 2)).astype(int)
    
    filterbank = np.zeros((n_filters, len(spectrum)))
    for i in range(1, n_filters + 1):
        filterbank[i - 1, bins[i - 1]: bins[i]] = np.linspace(0, 1, bins[i] - bins[i - 1])
        filterbank[i - 1, bins[i]: bins[i + 1]] = np.linspace(1, 0, bins[i + 1] - bins[i])  
    filtered_spectrum = np.dot(filterbank, spectrum[:len(spectrum)])
    log_spectrum = np.log1p(filtered_spectrum)
    lrccs = dct(log_spectrum, type=2, norm='ortho')[:n_lrcc]
    
    return lrccs

def feature_extraction_only_lrcc(df):
    features = []
    for i, record in tqdm(df.iterrows(), total=df.shape[0]):
        try:
            speech, fs = librosa.load(record['filename'])
            lrcc_features = extract_linear_lrcc(speech, fs)  # Extract linear lrccs
            all_features = np.concatenate([lrcc_features])
            features.append(np.append(all_features, [record['is_dysarthria'], record['gender']]))  
        except Exception as e:
            print(f"Error processing {record['filename']}: {e}")
    
    column_names = [f"lrcc_{i}" for i in range(52)] + ['class', 'gender']
    return pd.DataFrame(features, columns=column_names)

data_with_feat_lrcc = feature_extraction_only_lrcc(df)


  speech, fs = librosa.load(record['filename'])
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
 30%|███       | 607/2000 [00:10<00:25, 54.49it/s]

Error processing /kaggle/input/dysarthria-detection/torgo_data/dysarthria_female/F01_Session1_0068.wav: 


100%|██████████| 2000/2000 [00:37<00:00, 52.88it/s]


In [14]:
data_with_feat_lrcc['class'] = data_with_feat_lrcc['class'].replace('non_dysarthria', 0)
data_with_feat_lrcc['class'] = data_with_feat_lrcc['class'].replace('dysarthria', 1)
data_with_feat_lrcc['gender'] = data_with_feat_lrcc['gender'].replace('male', 1)
data_with_feat_lrcc['gender'] = data_with_feat_lrcc['gender'].replace('female', 0)

X_lrcc = data_with_feat_lrcc.drop(columns=['class'])
X_lrcc.columns = X_lrcc.columns.astype(str)
y_lrcc = data_with_feat_lrcc['class']
X_lrcc = X_lrcc.astype(float)

X_train_lrcc, X_test_lrcc, y_train_lrcc, y_test_lrcc = train_test_split(X_lrcc, y_lrcc, test_size=0.2, stratify=y_lrcc, random_state=42)

X_train_lrcc.replace([np.inf, -np.inf], np.nan, inplace=True)
X_train_lrcc.fillna(X_train_lrcc.mean(), inplace=True)
X_train_lrcc = X_train_lrcc.loc[:, X_train_lrcc.nunique() > 1] 
X_train_lrcc = X_train_lrcc.astype(float)
y_train_lrcc = y_train_lrcc.astype(int)

# scaler = StandardScaler()
# X_train_lrcc = scaler.fit_transform(X_train_lrcc)
# X_test_lrcc = scaler.transform(X_test_lrcc)


param_grid = [
    {'C': [0.5, 1, 10, 100, 1000],
     'gamma': [10, 1, 0.1, 0.001, 0.00001, 0.000001],
     'kernel': ['rbf'],
    }
]

optional_params = GridSearchCV(SVC(), param_grid, cv=5, scoring='accuracy', verbose=0)
optional_params.fit(X_train_lrcc, y_train_lrcc)
print("Best parameters for original dataset:")
print(optional_params.best_params_)



  data_with_feat_lrcc['class'] = data_with_feat_lrcc['class'].replace('dysarthria', 1)
  data_with_feat_lrcc['gender'] = data_with_feat_lrcc['gender'].replace('female', 0)


Best parameters for original dataset:
{'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}


In [23]:
model_lrcc = SVC(kernel='rbf', gamma=0.001, C=1000)
model_lrcc.fit(X_train_lrcc, y_train_lrcc)

from sklearn.metrics import accuracy_score
predictions = model_lrcc.predict(X_test_lrcc)
print(100 * accuracy_score(y_test_lrcc, predictions), "% accuracy")

87.25 % accuracy


### Why Mel RCCs perform better than linear RCCs

**Mel Scaling Captures Speech Better** - Speech perception is logarithmic, and the Mel scale emphasizes lower frequencies, which are more crucial for speech intelligibility.

**Better Feature Separation** - In dysarthria detection, the lower formants and excitation signals are crucial, and Mel filtering may enhance this.


# Linear RCC + MFCC

In [16]:
def feature_extraction_mfcc_lrcc(df, n_mfcc=52, n_lrcc=52):
    features = []
    for i, record in tqdm(df.iterrows(), total=df.shape[0]):
        try:
            speech, fs = librosa.load(record['filename'])
            lrcc_features = extract_linear_lrcc(speech, fs,  n_lrcc=n_lrcc, n_filters=n_lrcc)  
            preemphasized_signal = librosa.effects.preemphasis(speech)
            mfccs = librosa.feature.mfcc(y=preemphasized_signal, sr=fs, n_mfcc=n_mfcc)
            combined_features = np.concatenate([np.mean(mfccs, axis=1), lrcc_features])
            features.append(np.append(combined_features, [record['is_dysarthria'], record['gender']]))  
        except Exception as e:
            print(f"Error processing {record['filename']}: {e}")
    
    column_names = (
        [f"MFCC_{i}" for i in range(n_mfcc)] + [f"lrcc_{i}" for i in range(n_lrcc)] + ['class', 'gender'])
    return pd.DataFrame(features, columns=column_names)

data_with_feat_mfcc_lrcc = feature_extraction_mfcc_lrcc(df, n_mfcc=52, n_lrcc=52)
data_with_feat_mfcc_lrcc['class'] = data_with_feat_mfcc_lrcc['class'].replace('non_dysarthria', 0)
data_with_feat_mfcc_lrcc['class'] = data_with_feat_mfcc_lrcc['class'].replace('dysarthria', 1)
data_with_feat_mfcc_lrcc['gender'] = data_with_feat_mfcc_lrcc['gender'].replace('male', 1)
data_with_feat_mfcc_lrcc['gender'] = data_with_feat_mfcc_lrcc['gender'].replace('female', 0)

X_mfcc_lrcc = data_with_feat_mfcc_lrcc.drop(columns=['class'])
X_mfcc_lrcc.columns = X_mfcc_lrcc.columns.astype(str)
y_mfcc_lrcc = data_with_feat_mfcc_lrcc['class']
X_mfcc_lrcc = X_mfcc_lrcc.astype(float)

X_train_mfcc_lrcc, X_test_mfcc_lrcc, y_train_mfcc_lrcc, y_test_mfcc_lrcc = train_test_split(X_mfcc_lrcc, y_mfcc_lrcc, test_size=0.2, stratify=y_mfcc_lrcc, random_state=42)

X_train_mfcc_lrcc.replace([np.inf, -np.inf], np.nan, inplace=True)
X_train_mfcc_lrcc.fillna(X_train_mfcc_lrcc.mean(), inplace=True)
X_train_mfcc_lrcc = X_train_mfcc_lrcc.loc[:, X_train_mfcc_lrcc.nunique() > 1] 
X_train_mfcc_lrcc = X_train_mfcc_lrcc.astype(float)
y_train_mfcc_lrcc = y_train_mfcc_lrcc.astype(int)

# scaler = StandardScaler()
# X_train_mfcc_lrcc = scaler.fit_transform(X_train_mfcc_lrcc)
# X_test_mfcc_lrcc = scaler.transform(X_test_mfcc_lrcc)



param_grid = [
    {'C': [0.5, 1, 10, 100, 1000],
     'gamma': [10, 1, 0.1, 0.001, 0.00001, 0.000001],
     'kernel': ['rbf'],
    }
]

optional_params = GridSearchCV(SVC(), param_grid, cv=5, scoring='accuracy', verbose=0)
optional_params.fit(X_train_mfcc_lrcc, y_train_mfcc_lrcc)
print("Best parameters for original dataset:")
print(optional_params.best_params_)

  speech, fs = librosa.load(record['filename'])
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
 30%|███       | 604/2000 [00:15<00:33, 41.75it/s]

Error processing /kaggle/input/dysarthria-detection/torgo_data/dysarthria_female/F01_Session1_0068.wav: 


100%|██████████| 2000/2000 [00:52<00:00, 37.78it/s]
  data_with_feat_mfcc_lrcc['class'] = data_with_feat_mfcc_lrcc['class'].replace('dysarthria', 1)
  data_with_feat_mfcc_lrcc['gender'] = data_with_feat_mfcc_lrcc['gender'].replace('female', 0)


Best parameters for original dataset:
{'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}


In [17]:
model_mfcc_lrcc = SVC(kernel='rbf', gamma=0.001, C=100)
model_mfcc_lrcc.fit(X_train_mfcc_lrcc, y_train_mfcc_lrcc)

predictions = model_mfcc_lrcc.predict(X_test_mfcc_lrcc)
print(100 * accuracy_score(y_test_mfcc_lrcc, predictions), "% accuracy")

98.5 % accuracy


## Higher order relations among LP residual samples

In [20]:
from scipy.stats import skew, kurtosis

def lp_residual(signal_data, order=10):
    preemphasized_signal = librosa.effects.preemphasis(signal_data)
    a = librosa.lpc(preemphasized_signal, order=order)
    residual = signal.lfilter([1] + -1 * a[1:].tolist(), [1], preemphasized_signal)
    return residual

def extract_lp_residual_features(signal_data, sr):
    residual = lp_residual(signal_data)

    # Higher-order statistics
    skewness = skew(residual)
    kurt_val = kurtosis(residual)
    entropy = -np.sum((residual**2) * np.log(residual**2 + 1e-10))

    # Phase-based features from Hilbert transform
    analytic_signal = hilbert(residual)
    instantaneous_phase = np.unwrap(np.angle(analytic_signal))
    phase_mean = np.mean(instantaneous_phase)
    phase_std = np.std(instantaneous_phase)

    return np.array([skewness, kurt_val, entropy, phase_mean, phase_std])

def feature_extraction_lp_residual(df):
    features = []
    for i, record in tqdm(df.iterrows(), total=df.shape[0]):
        try:
            speech, sr = librosa.load(record['filename'], sr=None)
            lp_features = extract_lp_residual_features(speech, sr)  # (5 features)
            all_features = np.concatenate([lp_features])
            features.append(np.append(all_features, [record['is_dysarthria'], record['gender']]))
        except Exception as e:
            print(f"Error processing {record['filename']}: {e}")

    column_names = [f"LP_{i}" for i in range(5)] + ['class', 'gender']
    return pd.DataFrame(features, columns=column_names)


In [26]:
data_with_feat_lpff = feature_extraction_lp_residual(df)
data_with_feat_lpff['class'] = data_with_feat_lpff['class'].replace('non_dysarthria', 0)
data_with_feat_lpff['class'] = data_with_feat_lpff['class'].replace('dysarthria', 1)
data_with_feat_lpff['gender'] = data_with_feat_lpff['gender'].replace('male', 1)
data_with_feat_lpff['gender'] = data_with_feat_lpff['gender'].replace('female', 0)

X_lpff = data_with_feat_lpff.drop(columns=['class'])
X_lpff.columns = X_lpff.columns.astype(str)
y_lpff = data_with_feat_lpff['class']
X_lpff = X_lpff.astype(float)

X_train_lpff, X_test_lpff, y_train_lpff, y_test_lpff = train_test_split(X_lpff, y_lpff, test_size=0.2, stratify=y_lpff, random_state=42)

X_train_lpff.replace([np.inf, -np.inf], np.nan, inplace=Traue)
X_train_lpff.fillna(X_train_lpff.mean(), inplace=True)
# X_train_lpff = X_train_lpff.loc[:, X_train_lpff.nunique() > 1] 
X_train_lpff = X_train_lpff.astype(float)
y_train_lpff = y_train_lpff.astype(int)

param_grid = [
    {'C': [0.5, 1, 10, 100, 1000],
     'gamma': [10, 1, 0.1, 0.001, 0.00001, 0.000001],
     'kernel': ['rbf'],
    }
]

optional_params = GridSearchCV(SVC(), param_grid, cv=5, scoring='accuracy', verbose=0)
optional_params.fit(X_train_lpff, y_train_lpff)
print("Best parameters for original dataset:")
print(optional_params.best_params_)


  speech, sr = librosa.load(record['filename'], sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
 30%|███       | 607/2000 [00:10<00:23, 58.90it/s]

Error processing /kaggle/input/dysarthria-detection/torgo_data/dysarthria_female/F01_Session1_0068.wav: 


100%|██████████| 2000/2000 [00:37<00:00, 53.15it/s]
  data_with_feat_lpff['class'] = data_with_feat_lpff['class'].replace('dysarthria', 1)
  data_with_feat_lpff['gender'] = data_with_feat_lpff['gender'].replace('female', 0)


Best parameters for original dataset:
{'C': 10, 'gamma': 1e-06, 'kernel': 'rbf'}


In [27]:
model_lpff = SVC(kernel='rbf', gamma=1e-6, C=10)
model_lpff.fit(X_train_lpff, y_train_lpff)

from sklearn.metrics import accuracy_score
predictions = model_lpff.predict(X_test_lpff)
print(100 * accuracy_score(y_test_lpff, predictions), "% accuracy")

62.0 % accuracy


https://www.researchgate.net/publication/271891059_Recognition_of_Emotions_from_Speech_using_Excitation_Source_Features