In [1]:
import pandas as pd
import numpy as np
from IPython.display import HTML
import warnings

data = pd.read_csv('../data/ERP_data.csv')
labels = list(data.columns.values)
del labels[0]
del labels[0]

targets = data['Phenotype']
del data['Subject']
del data['Phenotype']

In [2]:
from sklearn import preprocessing, feature_selection, cross_validation, ensemble

folds = 10

imp = preprocessing.Imputer()
data = imp.fit_transform(data, targets)
data = preprocessing.scale(data)
data = feature_selection.SelectKBest(feature_selection.f_classif, k=22).fit_transform(data, targets)

In [3]:
from sklearn import discriminant_analysis, metrics

clf = discriminant_analysis.LinearDiscriminantAnalysis(solver='eigen', shrinkage=0.2)

accuracies = []
precisions = []
recalls = []
fscores = []
confusions = []
warnings.filterwarnings("ignore")
for i in range(200):
    cross_val = cross_validation.StratifiedKFold(targets, n_folds=folds, shuffle=True)
    preds = cross_validation.cross_val_predict(clf, data, targets, cv=cross_val)
    accuracy = metrics.accuracy_score(targets, preds)
    precision, recall, fscore, support = metrics.precision_recall_fscore_support(
        targets, preds, average='binary', pos_label='AD')
    confusion = metrics.confusion_matrix(targets, preds, labels=['HC', 'AD'])
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    fscores.append(fscore)
    confusions.append(confusion)

print("Accuracy: {0}, with std: {1}".format(np.mean(accuracies), np.std(accuracies)))
print("Precision: {0}".format(np.mean(precisions)))
print("Recall: {0}".format(np.mean(recalls)))
print("F1 Score: {0}".format(np.mean(fscores)))
print("Confusion Matrix:\n   HC     AD\n{0}".format(np.mean(confusions, axis=0)))

Accuracy: 0.698467336683417, with std: 0.012992622412959138
Precision: 0.6799164394878185
Recall: 0.7445959595959596
F1 Score: 0.7107066830258613
Confusion Matrix:
   HC     AD
[[ 65.28   34.72 ]
 [ 25.285  73.715]]


In [4]:
clf = discriminant_analysis.QuadraticDiscriminantAnalysis(reg_param=0.3)

accuracies = []
precisions = []
recalls = []
fscores = []
confusions = []
warnings.filterwarnings("ignore")
for i in range(200):
    cross_val = cross_validation.StratifiedKFold(targets, n_folds=folds, shuffle=True)
    preds = cross_validation.cross_val_predict(clf, data, targets, cv=cross_val)
    accuracy = metrics.accuracy_score(targets, preds)
    precision, recall, fscore, support = metrics.precision_recall_fscore_support(
        targets, preds, average='binary', pos_label='AD')
    confusion = metrics.confusion_matrix(targets, preds, labels=['HC', 'AD'])
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    fscores.append(fscore)
    confusions.append(confusion)

print("Accuracy: {0}, with std: {1}".format(np.mean(accuracies), np.std(accuracies)))
print("Precision: {0}".format(np.mean(precisions)))
print("Recall: {0}".format(np.mean(recalls)))
print("F1 Score: {0}".format(np.mean(fscores)))
print("Confusion Matrix:\n   HC     AD\n{0}".format(np.mean(confusions, axis=0)))

Accuracy: 0.669070351758794, with std: 0.01316542156720767
Precision: 0.6545137016986528
Recall: 0.7094949494949495
F1 Score: 0.6808139792167469
Confusion Matrix:
   HC     AD
[[ 62.905  37.095]
 [ 28.76   70.24 ]]
