In [1]:
import numpy as np
from sklearn.svm import SVC 
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

## Load training/test data

In [2]:
f = open('parkinsonsTrainStatML.dt')
content = f.read()
lines = np.array([np.array(list(map(float, line.split(' ')))) for line in content.split('\n')[:-1]])
ft = open('parkinsonsTestStatML.dt')
contentt = ft.read()
linest = np.array([np.array(list(map(float, line.split(' ')))) for line in contentt.split('\n')[:-1]])

## Scale data to zero mean, unit variance

In [3]:
means = np.array([np.mean(lines[:,i]) for i in range(len(lines[1,:])-1)])
stds = np.array([np.std(lines[:,i]) for i in range(len(lines[1,:])-1)])

x_normed = (lines[:,:-1] - lines[:,:-1].mean(axis=0)) / lines[:,:-1].std(axis=0)
x_normedt = (linest[:,:-1] - means) / stds
y_train = lines[:,-1]
y_test = linest[:,-1]
#np.savetxt('normed.txt', normed)
#np.savetxt('normedt.txt', normedt)

Helper function, for FP, FN, TP, TN

In [4]:
def perf_measure(y_actual, y_hat):
    TP = 0
    FP = 0
    TN = 0
    FN = 0

    for i in range(len(y_hat)): 
        if y_actual[i]==y_hat[i]==1.0:
           TP += 1
        if y_hat[i]==1.0 and y_actual[i]!=y_hat[i]:
           FP += 1
        if y_actual[i]==y_hat[i]==0.0:
           TN += 1
        if y_hat[i]==0.0 and y_actual[i]!=y_hat[i]:
           FN += 1

    return(TP, FP, TN, FN)

## Hyperparameter grid search with 5 fold cross validation

In [11]:
param_grid = [
  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']}
]

param_grid_RF = [
   {'n_estimators': [100, 200], 'max_depth': [None, 10], 'min_samples_split': [2,3]}
]

In [12]:
scores = ['accuracy', 'precision', 'recall']
best_parameters = {}
predictions = {}
for score in scores:
    print(f"# Tuning hyper-parameters for {score}")
    clf = GridSearchCV(
        RandomForestClassifier(), param_grid_RF, scoring=score, cv=5
    )
    clf.fit(x_normed, y_train)
    best_parameters[score] = clf.best_params_
    y_true, predictions[score] = y_test, clf.predict(x_normedt)
    TP, FP, TN, FN = perf_measure(y_true, predictions[score])
    print(f'TP: {TP}, FP: {FP}, TN: {TN}, FN: {FN}')
    print(classification_report(y_true, predictions[score]))
print(best_parameters)

# Tuning hyper-parameters for accuracy
TP: 69, FP: 8, TN: 14, FN: 6
              precision    recall  f1-score   support

         0.0       0.70      0.64      0.67        22
         1.0       0.90      0.92      0.91        75

    accuracy                           0.86        97
   macro avg       0.80      0.78      0.79        97
weighted avg       0.85      0.86      0.85        97

# Tuning hyper-parameters for precision
TP: 71, FP: 8, TN: 14, FN: 4
              precision    recall  f1-score   support

         0.0       0.78      0.64      0.70        22
         1.0       0.90      0.95      0.92        75

    accuracy                           0.88        97
   macro avg       0.84      0.79      0.81        97
weighted avg       0.87      0.88      0.87        97

# Tuning hyper-parameters for recall
TP: 72, FP: 6, TN: 16, FN: 3
              precision    recall  f1-score   support

         0.0       0.84      0.73      0.78        22
         1.0       0.92      0.96 

In [13]:
scores = ['accuracy', 'precision', 'recall']
best_parameters = {}
predictions = {}
for score in scores:
    print(f"# Tuning hyper-parameters for {score}")
    clf = GridSearchCV(
        SVC(), param_grid, scoring=score, cv=5
    )
    clf.fit(x_normed, y_train)
    best_parameters[score] = clf.best_params_
    y_true, predictions[score] = y_test, clf.predict(x_normedt)
    TP, FP, TN, FN = perf_measure(y_true, predictions[score])
    print(f'TP: {TP}, FP: {FP}, TN: {TN}, FN: {FN}')
    print(classification_report(y_true, predictions[score]))
print(best_parameters)

# Tuning hyper-parameters for accuracy
TP: 74, FP: 13, TN: 9, FN: 1
              precision    recall  f1-score   support

         0.0       0.90      0.41      0.56        22
         1.0       0.85      0.99      0.91        75

    accuracy                           0.86        97
   macro avg       0.88      0.70      0.74        97
weighted avg       0.86      0.86      0.83        97

# Tuning hyper-parameters for precision
TP: 74, FP: 13, TN: 9, FN: 1
              precision    recall  f1-score   support

         0.0       0.90      0.41      0.56        22
         1.0       0.85      0.99      0.91        75

    accuracy                           0.86        97
   macro avg       0.88      0.70      0.74        97
weighted avg       0.86      0.86      0.83        97

# Tuning hyper-parameters for recall
TP: 75, FP: 22, TN: 0, FN: 0
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00        22
         1.0       0.77      1.00 