In [37]:
import pandas as pd
import numpy as np
import warnings

data = pd.read_csv('../data/ERP_data.csv')
labels = list(data.columns.values)
del labels[0]
del labels[0]

targets = data['Phenotype']
del data['Subject']
del data['Phenotype']

In [38]:
from sklearn import preprocessing
from sklearn import feature_selection
from sklearn import cross_validation

folds = 10

imp = preprocessing.Imputer()
data = imp.fit_transform(data, targets)
data = preprocessing.scale(data)
anova_selection = feature_selection.SelectKBest(feature_selection.f_classif, k=12)

In [39]:
from sklearn import ensemble
from sklearn import pipeline
from sklearn import metrics

gboost_model = ensemble.GradientBoostingClassifier(min_samples_split=35, learning_rate=0.5, n_estimators=110)
rfe = feature_selection.RFE(gboost_model, n_features_to_select=9)
rfecv = feature_selection.RFECV(gboost_model, cv=10)
clf = pipeline.make_pipeline(anova_selection, gboost_model)

accuracies = []
precisions = []
recalls = []
fscores = []
confusions = []
warnings.filterwarnings("ignore")
for i in range(200):
    cross_val = cross_validation.StratifiedKFold(targets, n_folds=folds, shuffle=True)
    preds = cross_validation.cross_val_predict(clf, data, targets, cv=cross_val)
    accuracy = metrics.accuracy_score(targets, preds)
    precision, recall, fscore, support = metrics.precision_recall_fscore_support(
        targets, preds, average='binary', pos_label='AD')
    confusion = metrics.confusion_matrix(targets, preds, labels=['HC', 'AD'])
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    fscores.append(fscore)
    confusions.append(confusion)

print("Accuracy: {0}, with std: {1}".format(np.mean(accuracies), np.std(accuracies)))
print("Precision: {0}".format(np.mean(precisions)))
print("Recall: {0}".format(np.mean(recalls)))
print("F1 Score: {0}".format(np.mean(fscores)))
print("Confusion Matrix:\n   HC     AD\n{0}".format(np.mean(confusions, axis=0)))

Accuracy: 0.6760804020100503, with std: 0.02765116187178716
Precision: 0.6703307595721337
Recall: 0.6877272727272729
F1 Score: 0.6785276792627584
Confusion Matrix:
   HC     AD
[[ 66.455  33.545]
 [ 30.915  68.085]]
