In [9]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv('data/processed_nist_data.csv', sep=',', header=None)

raw_data = np.array(df.values)
labels = raw_data[:, 0]
data = raw_data[:, 1:]
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.3, random_state=4)
print(X_train.shape)

(7000, 900)


In [27]:
# simulate splitting a dataset of 25 observations into 5 folds
from sklearn.model_selection import KFold

kf = KFold(n_splits=5, random_state=None, shuffle=False)

X = X_train
y = y_train

for train_index, test_index in kf.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

TRAIN: [1400 1401 1402 ..., 6997 6998 6999] TEST: [   0    1    2 ..., 1397 1398 1399]
TRAIN: [   0    1    2 ..., 6997 6998 6999] TEST: [1400 1401 1402 ..., 2797 2798 2799]
TRAIN: [   0    1    2 ..., 6997 6998 6999] TEST: [2800 2801 2802 ..., 4197 4198 4199]
TRAIN: [   0    1    2 ..., 6997 6998 6999] TEST: [4200 4201 4202 ..., 5597 5598 5599]
TRAIN: [   0    1    2 ..., 5597 5598 5599] TEST: [5600 5601 5602 ..., 6997 6998 6999]


In [29]:
from sklearn.cross_validation import KFold, cross_val_score
k_fold = KFold(len(X_train), n_folds=5, shuffle=True, random_state=0)
clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(400, 40), random_state=1, learning_rate_init=0.01)
cv = cross_val_score(clf, X_train, y_train, cv=k_fold, n_jobs=1, scoring='accuracy')
print(cv)

[ 0.93660714  0.95535714  0.94821429  0.9375      0.96160714]


In [41]:
from sklearn.neural_network import MLPClassifier

clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(400, 40), random_state=1, learning_rate_init=0.01)
fit_result = clf.fit(X_train, y_train)

from sklearn import metrics

predicted_data = clf.predict(X_test)
acc = metrics.accuracy_score(y_test, predicted_data)
print(fit_result)
print(acc)
print(metrics.confusion_matrix(y_test, predicted_data))

MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(400, 40), learning_rate='constant',
       learning_rate_init=0.01, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
       solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
       warm_start=False)
0.960714285714
[[126   0   1   0   0   1   0   0   0   2]
 [  0 130   0   0   0   0   0   1   0   0]
 [  0   0 125   1   3   0   1   1   2   1]
 [  0   0   0 137   0   2   0   0   1   0]
 [  0   0   2   0 115   0   2   0   0   0]
 [  0   0   1   1   0 138   0   0   2   1]
 [  1   0   0   1   0   0 141   0   1   1]
 [  0   1   0   0   1   0   0 144   1   0]
 [  1   0   2   3   4   1   1   0 154   2]
 [  0   1   0   1   2   0   0   4   0 135]]


In [40]:
mlp = MLPClassifier(hidden_layer_sizes=(400, 40), max_iter=150, alpha=1e-5,
                    solver='lbfgs', tol=1e-4, random_state=1,
                    learning_rate_init=.1)
mlp.fit(X_train, y_train)
print("Training set score: %f" % mlp.score(X_train, y_train))
print("Test set score: %f" % mlp.score(X_test, y_test))

predicted_data = clf.predict(X_test)
acc = metrics.accuracy_score(y_test, predicted_data)
print(fit_result)
print(acc)
print(metrics.confusion_matrix(y_test, predicted_data))

Training set score: 1.000000
Test set score: 0.960714
MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(400, 40), learning_rate='constant',
       learning_rate_init=0.01, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
       solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
       warm_start=False)
0.960714285714
[[126   0   1   0   0   1   0   0   0   2]
 [  0 130   0   0   0   0   0   1   0   0]
 [  0   0 125   1   3   0   1   1   2   1]
 [  0   0   0 137   0   2   0   0   1   0]
 [  0   0   2   0 115   0   2   0   0   0]
 [  0   0   1   1   0 138   0   0   2   1]
 [  1   0   0   1   0   0 141   0   1   1]
 [  0   1   0   0   1   0   0 144   1   0]
 [  1   0   2   3   4   1   1   0 154   2]
 [  0   1   0   1   2   0   0   4   0 135]]
