## Load the data onto a pandas dataframe

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('combined_dataset_with_binary_labels.csv')

In [3]:
# Iterate through the 'MFCC_Features' column in the DataFrame
for index, input_string in enumerate(df['MFCC_Features']):
    # Remove the square brackets
    input_string = input_string.strip('[]')
    
    # Split the string by whitespace and convert to floats
    float_list = [float(val) for val in input_string.split() if val.strip()]
    
    # Update the 'MFCC_Features' column with the list of floats for the current row
    df.at[index, 'MFCC_Features'] = float_list
    
df['MFCC_Features']

# Split the 'MFCC_Features' column into separate columns
df[['MFCC_Feature_1', 'MFCC_Feature_2', 'MFCC_Feature_3', 'MFCC_Feature_4',
    'MFCC_Feature_5', 'MFCC_Feature_6', 'MFCC_Feature_7', 'MFCC_Feature_8',
    'MFCC_Feature_9', 'MFCC_Feature_10', 'MFCC_Feature_11', 'MFCC_Feature_12',
    'MFCC_Feature_13']] = pd.DataFrame(df['MFCC_Features'].tolist(), index=df.index)

df;

## Using Random Forest

In [4]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Split data into features (X) and the target variable (y)
X = df[['MFCC_Feature_1', 'MFCC_Feature_2', 'MFCC_Feature_3', 'MFCC_Feature_4',
        'MFCC_Feature_5', 'MFCC_Feature_6', 'MFCC_Feature_7', 'MFCC_Feature_8',
        'MFCC_Feature_9', 'MFCC_Feature_10', 'MFCC_Feature_11', 'MFCC_Feature_12',
        'MFCC_Feature_13']]
# print(X)

y = df['Alzheimer_Status']
# print(y)

### Train Test Split

In [5]:
# Split data into a training set and a testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Random Forest Classifier

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import LeaveOneOut
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from pickle import dump
from pickle import load
from sklearn.metrics import matthews_corrcoef

sc = StandardScaler()
X = sc.fit_transform(X)
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
scaler = StandardScaler().fit(X_train)
rescaledX = scaler.transform(X_train)

In [11]:
param_grid = {'n_estimators': [50,100,150,200,250,300,350,400,450,500,700,1000], 'max_features': ['auto', 'sqrt', 'log2'], 'max_depth' : [1,2,3,4,5,6,7,8,9,10], 'criterion' :['entropy', 'gini']}
model = RandomForestClassifier()

In [12]:
loocv = LeaveOneOut()
scoring = 'accuracy'
kfold = KFold(n_splits=10, random_state=None)
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring=scoring, cv=kfold)
grid_result = grid.fit(rescaledX, y_train)

2400 fits failed out of a total of 7200.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
2400 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\HP\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 732, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\HP\anaconda3\Lib\site-packages\sklearn\base.py", line 1144, in wrapper
    estimator._validate_params()
  File "C:\Users\HP\anaconda3\Lib\site-packages\sklearn\base.py", line 637, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\HP\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py", line 95, in validate_parameter_constraints
    raise InvalidParameterError(

In [None]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

In [None]:
best_model = grid_result.best_estimator_
best_model.fit(X_train, y_train)
filename = 'GFCC_German_A_RF1.sav'
dump(best_model,open(filename, 'wb'))
loaded_model = load(open(filename, 'rb'))

In [None]:
y_pred1 = loaded_model.predict(X_test)
MCC=matthews_corrcoef(y_test, y_pred1)

In [None]:
cm1 = confusion_matrix(y_test, y_pred1)
acc = (cm1[0,0]+cm1[1,1])/(cm1[0,0]+cm1[0,1]+cm1[1,0]+cm1[1,1])
spec = (cm1[0,0])/(cm1[0,0]+cm1[0,1])
sens = (cm1[1,1])/(cm1[1,0]+cm1[1,1])

In [None]:
print('Testing Accuracy =' ,acc)
print('Testing Sensitivity(abnormality) =' ,sens)
print('Testing Specificity(normality) =' ,spec)
print ('confusion matrix =', cm1)
print ('MCC Score =', MCC)
print(X.shape, y.shape)

## Using Support Vector Machine

In [221]:
from sklearn.svm import SVC

### Linear Kernel

In [228]:
svm_model = SVC(kernel='linear', C=1.0)
svm_model.fit(X_train, y_train)


In [229]:
y_pred_svm = svm_model.predict(X_test)


In [230]:
from sklearn.metrics import accuracy_score, classification_report

accuracy_svm = accuracy_score(y_test, y_pred_svm)
report_svm = classification_report(y_test, y_pred_svm)

print(f'Accuracy (SVM): {accuracy_svm}')
print(report_svm)


Accuracy (SVM): 0.5833333333333334
              precision    recall  f1-score   support

           0       0.55      0.50      0.52        22
           1       0.61      0.65      0.63        26

    accuracy                           0.58        48
   macro avg       0.58      0.58      0.58        48
weighted avg       0.58      0.58      0.58        48



### Radial Basis Function

In [231]:
svm_model = SVC(kernel='rbf', C=1.0)
svm_model.fit(X_train, y_train)

In [232]:
y_pred_svm = svm_model.predict(X_test)


In [233]:
from sklearn.metrics import accuracy_score, classification_report

accuracy_svm = accuracy_score(y_test, y_pred_svm)
report_svm = classification_report(y_test, y_pred_svm)

print(f'Accuracy (SVM): {accuracy_svm}')
print(report_svm)

Accuracy (SVM): 0.5416666666666666
              precision    recall  f1-score   support

           0       0.50      0.14      0.21        22
           1       0.55      0.88      0.68        26

    accuracy                           0.54        48
   macro avg       0.52      0.51      0.45        48
weighted avg       0.53      0.54      0.46        48

