In [74]:
import numpy as np 
import pandas as pd 
import os
import re
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import librosa.display
from sklearn.preprocessing import minmax_scale
import IPython.display as ipd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler


In [50]:
df = pd.read_csv('/kaggle/input/dysarthria-detection/torgo_data/data.csv')
df

Unnamed: 0,is_dysarthria,gender,filename
0,non_dysarthria,female,torgo_data/non_dysarthria_female/FC03_Session2...
1,non_dysarthria,female,torgo_data/non_dysarthria_female/FC02_Session3...
2,non_dysarthria,female,torgo_data/non_dysarthria_female/FC02_Session3...
3,non_dysarthria,female,torgo_data/non_dysarthria_female/FC03_Session2...
4,non_dysarthria,female,torgo_data/non_dysarthria_female/FC03_Session1...
...,...,...,...
1995,dysarthria,male,torgo_data/dysarthria_male/M03_Session2_0144.wav
1996,dysarthria,male,torgo_data/dysarthria_male/M02_Session1_0005.wav
1997,dysarthria,male,torgo_data/dysarthria_male/M03_Session2_0040.wav
1998,dysarthria,male,torgo_data/dysarthria_male/M03_Session2_0260.wav


In [51]:
df['filename'] = df['filename'].apply(lambda x: os.path.join('/kaggle/input/dysarthria-detection',x))

In [83]:
def extract_mfcc_features(signal, fs, n_mfcc=52):
    preemphasized_signal = librosa.effects.preemphasis(signal)
    mfccs = librosa.feature.mfcc(y=preemphasized_signal, sr=fs, n_mfcc=n_mfcc)
    return np.mean(mfccs, axis=1)


def feature_extraction_only_mfcc(df):
    features = []
    for i, record in tqdm(df.iterrows(), total=df.shape[0]):
        try:
            speech, fs = librosa.load(record['filename'])
            mfcc_features = extract_mfcc_features(speech, fs)  # (52 features)
            all_features = np.concatenate([mfcc_features])
            features.append(np.append(all_features, [record['is_dysarthria'], record['gender']]))  
        except Exception as e:
            print(f"Error processing {record['filename']}: {e}")

    column_names = (
        [f"MFCC_{i}" for i in range(52)] + ['class', 'gender'])
    return pd.DataFrame(features, columns=column_names)


In [85]:
data_with_feat_mfcc = feature_extraction_only_mfcc(df)
data_with_feat_mfcc['class'] = data_with_feat_mfcc['class'].replace('non_dysarthria',0)
data_with_feat_mfcc['class'] = data_with_feat_mfcc['class'].replace('dysarthria',1)
data_with_feat_mfcc['gender'] = data_with_feat_mfcc['gender'].replace('male', 1)
data_with_feat_mfcc['gender'] = data_with_feat_mfcc['gender'].replace('female', 0)
data_with_feat_mfcc

  return f(*args, **kwargs)
 30%|███       | 602/2000 [02:02<05:33,  4.19it/s]

Error processing /kaggle/input/dysarthria-detection/torgo_data/dysarthria_female/F01_Session1_0068.wav: 


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
100%|██████████| 2000/2000 [06:46<00:00,  4.91it/s]


Unnamed: 0,MFCC_0,MFCC_1,MFCC_2,MFCC_3,MFCC_4,MFCC_5,MFCC_6,MFCC_7,MFCC_8,MFCC_9,...,MFCC_44,MFCC_45,MFCC_46,MFCC_47,MFCC_48,MFCC_49,MFCC_50,MFCC_51,class,gender
0,-321.8849,38.071064,-68.739914,52.882797,-51.438004,33.804836,-27.810905,15.778974,-6.667152,-3.5620265,...,-3.2477632,-3.2151062,-3.147,-5.3647757,-2.491062,-3.6335604,1.2841895,-0.78434616,0,0
1,-397.47217,43.663372,-66.43465,50.78867,-52.61186,34.377625,-19.319807,23.47946,-4.9972677,1.8685217,...,-4.8450108,-6.027408,-4.5689974,-4.3030243,-1.4358406,-3.9269426,0.97904354,-0.85440105,0,0
2,-403.0331,48.61085,-75.80271,59.331272,-60.496555,39.685482,-21.9657,24.221592,-5.0357265,2.854687,...,-5.8201814,-5.5251985,-4.6797113,-5.1114883,-2.613155,-3.7514424,0.76030105,-1.147969,0,0
3,-305.53836,45.826977,-80.133705,45.7298,-59.486927,34.230366,-26.525415,16.386002,-8.175198,1.2552539,...,-5.3292108,-2.8854196,-2.0361643,-3.6521723,-0.11458776,-3.5338438,1.2684168,-1.7480026,0,0
4,-382.79202,44.641994,-72.663536,62.55453,-56.231552,43.691082,-27.900242,24.038292,-5.414121,-0.08006054,...,-5.554988,-4.2114215,-5.4698343,-3.7214656,-1.2073728,-4.00359,1.9998858,-0.4341714,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1994,-403.71872,51.09305,-51.249813,38.640656,-35.678566,23.174456,-16.824,6.9320345,-7.550931,-7.1558795,...,0.027684843,-1.1792083,1.1677849,2.7969115,5.47482,1.4096812,3.8003335,4.310961,1,1
1995,-569.68805,56.68513,-27.54152,7.4583964,-43.36391,13.932602,-26.487234,10.091115,-2.1981294,-5.796619,...,1.2899591,3.3523748,4.729969,3.4059045,4.546122,-1.0111803,1.9846035,-0.03278939,1,1
1996,-474.64447,57.78966,-50.356323,40.01922,-31.808434,22.042181,-13.612051,14.951826,-3.0641358,-0.7001935,...,-1.3930317,-2.280362,-0.22452183,1.6430984,3.7832727,1.5918031,6.0558863,3.9138167,1,1
1997,-408.28226,57.262577,-63.547016,52.46067,-43.30712,19.663355,-18.810972,15.155557,-10.767587,-7.6108117,...,0.38655835,0.9346762,2.9908428,0.84285957,3.5902562,2.557257,4.5891013,1.9876118,1,1


In [None]:
X_mfcc = data_with_feat_mfcc.drop(columns = ['class'])
X_mfcc.columns = X_mfcc.columns.astype(str)
y_mfcc = data_with_feat_mfcc['class']
X_mfcc = X_mfcc.astype(float)

X_train_mfcc, X_test_mfcc, y_train_mfcc, y_test_mfcc = train_test_split(X_mfcc, y_mfcc, test_size=0.2, stratify=y_mfcc, random_state = 42)

X_train_mfcc.replace([np.inf, -np.inf], np.nan, inplace=True)
X_train_mfcc.fillna(X_train_mfcc.mean(), inplace=True)
X_train_mfcc = X_train_mfcc.loc[:, X_train_mfcc.nunique() > 1] 
X_train_mfcc = X_train_mfcc.astype(float)
y_train_mfcc = y_train_mfcc.astype(int)

scaler = StandardScaler()
X_train_mfcc = scaler.fit_transform(X_train_mfcc)
X_test_mfcc = scaler.transform(X_test_mfcc)

In [87]:
from sklearn.model_selection import GridSearchCV

param_grid=[
    {'C':[0.5,1,10,100, 1000],
     'gamma':[10,1,0.1,0.001,0.00001, 0.000001],
     'kernel':['rbf'],
    }
]

optional_params=GridSearchCV(SVC(),param_grid,cv=5,scoring='accuracy',verbose=0)
optional_params.fit(X_train_mfcc,y_train_mfcc)
print("Best parameters for original dataset:")
print(optional_params.best_params_)

Best parameters for original dataset:
{'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}


In [88]:
model_mfcc = SVC(kernel='rbf', gamma=0.1, C=10)
model_mfcc.fit(X_train_mfcc, y_train_mfcc)

from sklearn.metrics import accuracy_score
predictions = model_mfcc.predict(X_test_mfcc) 
print(100*accuracy_score(y_test_mfcc, predictions), "% accuracy")

98.25 % accuracy


# Epoch extraction (ZFF):

In [None]:
def zero_frequency_filter(signal_data, fs, win_len_ms=10):
    if len(signal_data) < 2:
        return np.array([])
    diff_signal = np.diff(signal_data)
    zff_signal = np.cumsum(np.cumsum(diff_signal))  
    win_len = int((win_len_ms / 1000) * fs)
    if win_len > len(zff_signal):
        win_len = len(zff_signal)
    mean_trend = np.convolve(zff_signal, np.ones(win_len)/win_len, mode='same')
    zff_filtered = zff_signal - mean_trend
    return zff_filtered

def detect_epochs(zff_signal):
    zero_crossings = np.where((zff_signal[:-1] < 0) & (zff_signal[1:] >= 0))[0]
    return zero_crossings

# ISE Features

In [55]:
def compute_excitation_strength(zff_signal, epochs):
    strengths = []
    for i in epochs:
        if i <= 0 or i >= len(zff_signal) - 1:
            continue
        slope = (zff_signal[i + 1] - zff_signal[i - 1]) / 2.0
        strengths.append(np.abs(slope))
    return np.array(strengths)

def compute_instantaneous_frequency(epochs, fs):
    if len(epochs) < 2:
        return np.array([0.0])
    periods = np.diff(epochs) / fs
    freqs = 1.0 / periods
    return freqs

def compute_epoch_sharpness(zff_signal, epochs):
    sharpness = []
    for i in epochs:
        if i <= 1 or i >= len(zff_signal) - 2:
            continue
        curvature = zff_signal[i - 1] - 2 * zff_signal[i] + zff_signal[i + 1]
        sharpness.append(np.abs(curvature))
    return np.array(sharpness)

def compute_strength_slope(excitation_strength):
    if len(excitation_strength) < 2:
        return 0.0
    x = np.arange(len(excitation_strength))
    slope, _ = np.polyfit(x, excitation_strength, 1)
    return slope

In [6]:
def extract_ise_features(speech, fs):
    zff_signal = zero_frequency_filter(speech, fs)
    epochs = detect_epochs(zff_signal)

    excitation_strength = compute_excitation_strength(zff_signal, epochs)
    instantaneous_freq = compute_instantaneous_frequency(epochs, fs)
    sharpness = compute_epoch_sharpness(zff_signal, epochs)
    strength_slope = compute_strength_slope(excitation_strength)

    features = [
        np.mean(excitation_strength), np.std(excitation_strength),
        np.mean(instantaneous_freq), np.std(instantaneous_freq),
        np.mean(sharpness), np.std(sharpness),
        strength_slope
    ]
    
    return np.array(features)

In [7]:
def feature_extraction_only_ise(df):
    features = []
    for i, record in tqdm(df.iterrows(), total=df.shape[0]):
        try:
            speech, fs = librosa.load(record['filename'], sr=None)
            ise_features = extract_ise_features(speech, fs)
            features.append(np.append(ise_features, [record['is_dysarthria'], record['gender']]))  
        except Exception as e:
            print(f"Error processing {record['filename']}: {e}")

    column_names = [
        'ISE_strength_mean', 'ISE_strength_std',
        'ISE_freq_mean', 'ISE_freq_std',
        'ISE_sharpness_mean', 'ISE_sharpness_std',
        'ISE_strength_slope',
        'class', 'gender'
    ]
    
    return pd.DataFrame(features, columns=column_names)

In [81]:
data_with_feat_ise = feature_extraction_only_ise(df)
data_with_feat_ise['class'] = data_with_feat_ise['class'].replace('non_dysarthria',0)
data_with_feat_ise['class'] = data_with_feat_ise['class'].replace('dysarthria',1)
data_with_feat_ise['gender'] = data_with_feat_ise['gender'].replace('male', 1)
data_with_feat_ise['gender'] = data_with_feat_ise['gender'].replace('female', 0)
data_with_feat_ise

  return f(*args, **kwargs)
 30%|███       | 602/2000 [01:32<04:47,  4.85it/s]

Error processing /kaggle/input/dysarthria-detection/torgo_data/dysarthria_female/F01_Session1_0068.wav: 


100%|██████████| 2000/2000 [05:03<00:00,  6.59it/s]


Unnamed: 0,ISE_strength_mean,ISE_strength_std,ISE_freq_mean,ISE_freq_std,ISE_sharpness_mean,ISE_sharpness_std,ISE_strength_slope,class,gender
0,0.1627320307560521,0.18384358805507914,902.7982115789631,1786.8451960960613,0.061103744219654706,0.1713727563440276,6.737747485437808e-05,0,0
1,0.07818826297818116,0.11807728931061615,588.2807770033745,1237.7090807826612,0.02333137202383986,0.04501960437792622,0.0008421688112107262,0,0
2,0.05073910418977122,0.07230949373282651,237.25576796395828,715.5177090478799,0.009309296912335265,0.016378763717400945,0.0001847161237875129,0,0
3,0.11482492158686129,0.11098339651986884,673.1684727542262,1255.7058113992764,0.03218057612253023,0.0392261927401425,-2.957139222266697e-05,0,0
4,0.07281622376552829,0.09918161318958388,309.18969437430405,794.3276457928066,0.014026179646336222,0.024552578934932928,0.0003200112311766083,0,0
...,...,...,...,...,...,...,...,...,...
1994,0.114022161408333,0.17422845870622058,962.9660853099915,1702.7321839154683,0.04158422136037279,0.0646548483563854,3.09615408463767e-05,1,1
1995,0.0936026998681805,0.12481270634998216,532.241892342725,825.1716919961045,0.014060020644225488,0.023424879017298585,-0.0001340761304919257,1,1
1996,0.07016394686500733,0.11115017937148101,194.88356740007055,284.79813769265445,0.011718116506643627,0.07761003193705475,0.0003031428329755708,1,1
1997,0.09766708944709021,0.14586491158042644,1277.2953566691433,1954.4783134008828,0.04199060368456561,0.06590999965683374,-3.001918989679841e-07,1,1


In [None]:
X_ise = data_with_feat_ise.drop(columns = ['class'])
X_ise.columns = X_ise.columns.astype(str)
y_ise = data_with_feat_ise['class']
X_ise = X_ise.astype(float)

X_train_ise, X_test_ise, y_train_ise, y_test_ise = train_test_split(X_ise, y_ise, test_size=0.2, stratify=y_ise, random_state = 42)

X_train_ise.replace([np.inf, -np.inf], np.nan, inplace=True)
X_train_ise.fillna(X_train_ise.mean(), inplace=True)
X_train_ise = X_train_ise.loc[:, X_train_ise.nunique() > 1] 


scaler = StandardScaler()
X_train_ise = scaler.fit_transform(X_train_ise)
X_test_ise = scaler.transform(X_test_ise)

X_train_ise = X_train_ise.astype(float)
y_train_ise = y_train_ise.astype(int)

In [46]:
from sklearn.model_selection import GridSearchCV

param_grid=[
    {'C':[0.5,1,10,100, 1000],
     'gamma':[10,1,0.1,0.001,0.00001, 0.000001],
     'kernel':['rbf'],
    }
]

optional_params=GridSearchCV(SVC(),param_grid,cv=5,scoring='accuracy',verbose=0)
optional_params.fit(X_train_ise,y_train_ise)
print("Best parameters for original dataset:")
print(optional_params.best_params_)

Best parameters for original dataset:
{'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'}


In [47]:
model_ise = SVC(kernel='rbf', gamma=1e-1, C=1000)
model_ise.fit(X_train_ise, y_train_ise)

from sklearn.metrics import accuracy_score
predictions = model_ise.predict(X_test_ise) 
print(100*accuracy_score(y_test_ise, predictions), "% accuracy")

87.25 % accuracy


# MFCC + ISE features

In [None]:
def feature_extraction_mfcc_and_ise(df):
    features = []
    for i, record in tqdm(df.iterrows(), total=df.shape[0]):
        try:
            speech, fs = librosa.load(record['filename'], sr=None)

            mfcc_features = extract_mfcc_features(speech, fs) 
            ise_features = extract_ise_features(speech, fs)  

            all_features = np.concatenate([mfcc_features, ise_features])
            features.append(np.append(all_features, [record['is_dysarthria'], record['gender']]))

        except Exception as e:
            print(f"Error processing {record['filename']}: {e}")

    column_names = (
        [f"MFCC_{i}" for i in range(len(mfcc_features))] +
        ['ISE_strength_mean', 'ISE_strength_std',
         'ISE_freq_mean', 'ISE_freq_std',
         'ISE_sharpness_mean', 'ISE_sharpness_std',
         'ISE_strength_slope'] +
        ['class', 'gender']
    )

    return pd.DataFrame(features, columns=column_names)

data_with_feat_mfcc_ise = feature_extraction_mfcc_and_ise(df)

data_with_feat_mfcc_ise['class'] = data_with_feat_mfcc_ise['class'].replace('non_dysarthria', 0)
data_with_feat_mfcc_ise['class'] = data_with_feat_mfcc_ise['class'].replace('dysarthria', 1)

data_with_feat_mfcc_ise['gender'] = data_with_feat_mfcc_ise['gender'].replace('male', 1)
data_with_feat_mfcc_ise['gender'] = data_with_feat_mfcc_ise['gender'].replace('female', 0)


  return f(*args, **kwargs)
 30%|███       | 602/2000 [01:43<05:05,  4.58it/s]

Error processing /kaggle/input/dysarthria-detection/torgo_data/dysarthria_female/F01_Session1_0068.wav: 


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
100%|██████████| 2000/2000 [05:48<00:00,  5.73it/s]


In [None]:
X_mfcc_ise = data_with_feat_mfcc_ise.drop(columns=['class'])
X_mfcc_ise.columns = X_mfcc_ise.columns.astype(str)
y_mfcc_ise = data_with_feat_mfcc_ise['class']

X_mfcc_ise = X_mfcc_ise.astype(float)

X_train_mfcc_ise, X_test_mfcc_ise, y_train_mfcc_ise, y_test_mfcc_ise = train_test_split(
    X_mfcc_ise, y_mfcc_ise, test_size=0.2, stratify=y_mfcc_ise, random_state=42
)

X_train_mfcc_ise.replace([np.inf, -np.inf], np.nan, inplace=True)
X_train_mfcc_ise.fillna(X_train_mfcc_ise.mean(), inplace=True)
X_train_mfcc_ise = X_train_mfcc_ise.loc[:, X_train_mfcc_ise.nunique() > 1]

scaler = StandardScaler()
X_train_mfcc_ise = scaler.fit_transform(X_train_mfcc_ise)
X_test_mfcc_ise = scaler.transform(X_test_mfcc_ise)

X_train_mfcc_ise = X_train_mfcc_ise.astype(float)
y_train_mfcc_ise = y_train_mfcc_ise.astype(int)


In [91]:
from sklearn.model_selection import GridSearchCV

param_grid=[
    {'C':[0.5,1,10,100, 1000],
     'gamma':[10,1,0.1,0.001,0.00001, 0.000001],
     'kernel':['rbf'],
    }
]

optional_params=GridSearchCV(SVC(),param_grid,cv=5,scoring='accuracy',verbose=0)
optional_params.fit(X_train_mfcc_ise,y_train_mfcc_ise)
print("Best parameters for original dataset:")
print(optional_params.best_params_)

Best parameters for original dataset:
{'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}


In [92]:
model_mfcc_ise = SVC(kernel='rbf', gamma=0.001, C=1000)
model_mfcc_ise.fit(X_train_mfcc_ise, y_train_mfcc_ise)

predictions = model_mfcc_ise.predict(X_test_mfcc_ise)
print(100 * accuracy_score(y_test_mfcc_ise, predictions), "% accuracy")

98.0 % accuracy


## LP Residual around GCI

In [60]:

from scipy import signal
def lp_residual(signal_data, order=10):
    preemphasized_signal = librosa.effects.preemphasis(signal_data)
    a = librosa.lpc(preemphasized_signal, order=order)
    residual = signal.lfilter([1] + -1 * a[1:].tolist(), [1], preemphasized_signal)
    return residual



In [63]:
def lpresidual_around_gci(residual_signal, gci_locations, window_size=20):
    half_window = window_size // 2
    excitation_windows = []

    for gci in gci_locations:
        start = gci - half_window
        end = gci + half_window

        if start >= 0 and end <= len(residual_signal):
            window = residual_signal[start:end]
            excitation_windows.append(window)

    return excitation_windows


In [None]:

def extract_lpf_gci(speech, fs, window_size=20):
    residual = lp_residual(speech)

    zff_signal = zero_frequency_filter(speech, fs)
    gci_locations = detect_epochs(zff_signal)

    excitation_windows = lpresidual_around_gci(residual, gci_locations, window_size)

    skewnesses, kurtoses, entropies = [], [], []

    for window in excitation_windows:
        if len(window) == 0:
            continue
        sk = skew(window)
        ku = kurtosis(window)
        entropy = -np.sum((window**2) * np.log(window**2 + 1e-10))

        skewnesses.append(sk)
        kurtoses.append(ku)
        entropies.append(entropy)

    features = [
        np.mean(skewnesses), np.std(skewnesses),
        np.mean(kurtoses), np.std(kurtoses),
        np.mean(entropies), np.std(entropies)
    ]

    return np.array(features)

In [None]:
from scipy.stats import skew, kurtosis
def feature_extraction_lpf_and_ise(df):
    features = []
    for i, record in tqdm(df.iterrows(), total=df.shape[0]):
        try:
            speech, fs = librosa.load(record['filename'], sr=None)

            lpf_features = extract_lpf_gci(speech, fs)

            all_features = np.concatenate([lpf_features])
            features.append(np.append(all_features, [record['is_dysarthria'], record['gender']]))

        except Exception as e:
            print(f"Error processing {record['filename']}: {e}")

    return pd.DataFrame(features)


data_with_feat_lpf_ise = feature_extraction_lpf_and_ise(df)

data_with_feat_lpf_ise['class'] = data_with_feat_lpf_ise['class'].replace('non_dysarthria', 0)
data_with_feat_lpf_ise['class'] = data_with_feat_lpf_ise['class'].replace('dysarthria', 1)

data_with_feat_lpf_ise['gender'] = data_with_feat_lpf_ise['gender'].replace('male', 1)
data_with_feat_lpf_ise['gender'] = data_with_feat_lpf_ise['gender'].replace('female', 0)


  return f(*args, **kwargs)
 30%|███       | 602/2000 [02:16<05:46,  4.04it/s]

Error processing /kaggle/input/dysarthria-detection/torgo_data/dysarthria_female/F01_Session1_0068.wav: 


100%|██████████| 2000/2000 [08:14<00:00,  4.05it/s]


In [None]:
X_lpf_ise = data_with_feat_lpf_ise.drop(columns=['class'])
X_lpf_ise.columns = X_lpf_ise.columns.astype(str)
y_lpf_ise = data_with_feat_lpf_ise['class']

X_lpf_ise = X_lpf_ise.astype(float)

X_train_lpf_ise, X_test_lpf_ise, y_train_lpf_ise, y_test_lpf_ise = train_test_split(
    X_lpf_ise, y_lpf_ise, test_size=0.2, stratify=y_lpf_ise, random_state=42
)

X_train_lpf_ise.replace([np.inf, -np.inf], np.nan, inplace=True)
X_train_lpf_ise.fillna(X_train_lpf_ise.mean(), inplace=True)
X_train_lpf_ise = X_train_lpf_ise.loc[:, X_train_lpf_ise.nunique() > 1]

scaler = StandardScaler()
X_train_lpf_ise = scaler.fit_transform(X_train_lpf_ise)
X_test_lpf_ise = scaler.transform(X_test_lpf_ise)

X_train_lpf_ise = X_train_lpf_ise.astype(float)
y_train_lpf_ise = y_train_lpf_ise.astype(int)


from sklearn.model_selection import GridSearchCV

param_grid=[
    {'C':[0.5,1,10,100, 1000],
     'gamma':[10,1,0.1,0.001,0.00001, 0.000001],
     'kernel':['rbf'],
    }
]

optional_params=GridSearchCV(SVC(),param_grid,cv=5,scoring='accuracy',verbose=0)
optional_params.fit(X_train_lpf_ise,y_train_lpf_ise)
print("Best parameters for original dataset:")
print(optional_params.best_params_)

Best parameters for original dataset:
{'C': 10, 'gamma': 1, 'kernel': 'rbf'}


In [80]:
model_lpf_ise = SVC(kernel='rbf', gamma=1, C=10)
model_lpf_ise.fit(X_train_lpf_ise, y_train_lpf_ise)
from sklearn.metrics import accuracy_score

predictions = model_lpf_ise.predict(X_test_lpf_ise)
print(100 * accuracy_score(y_test_lpf_ise, predictions), "% accuracy")

87.25 % accuracy
