In [None]:
from sklearn.datasets import fetch_mldata
from sklearn import model_selection

from sklearn.model_selection import cross_val_score, cross_val_predict, train_test_split
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier
#from mnist import MNIST
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.svm import LinearSVC
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from sklearn.preprocessing import StandardScaler

In [None]:
mnist = fetch_mldata('MNIST original')
X, y = mnist["data"], mnist["target"]

In [None]:
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]
X_tr, X_val, y_tr, y_val = train_test_split(X_train,y_train,test_size=0.2)

In [None]:
shuffle_index = np.random.permutation(48000)
X_tr, y_tr = X_tr[shuffle_index], y_tr[shuffle_index]

In [None]:
some_digit = X_tr[1784]
some_digit_image = some_digit.reshape(28, 28)
plt.imshow(some_digit_image, cmap = matplotlib.cm.binary,interpolation="nearest")
plt.show()

In [None]:
clf_rf = ExtraTreesClassifier(n_estimators=200, n_jobs=10,)
clf_rf.fit(X_tr, y_tr)

In [None]:
clf_rf.predict([some_digit])


In [None]:
cross_val_score(clf_rf, X_val, y_val, cv=3, scoring="accuracy")

In [None]:
scaler = StandardScaler()
X_tr_scaled = scaler.fit_transform(X_tr.astype(np.float64))

In [None]:
cross_val_score(clf_rf, X_test, y_test, cv=3, scoring="accuracy")

In [None]:
cross_val_score(clf_rf, X_tr_scaled, y_tr, cv=3, scoring="accuracy")
y_tr_pred = cross_val_predict(clf_rf, X_tr_scaled, y_tr, cv=3)

print(cross_val_score)

In [None]:
confusion_matrix = confusion_matrix(y_tr, y_tr_pred)
print(confusion_matrix)

In [None]:
def plot_confusion_matrix(matrix):
    """If you prefer color and a colorbar"""
    fig = plt.figure(figsize=(8,8))
    ax = fig.add_subplot(111)
    cax = ax.matshow(matrix)
    fig.colorbar(cax)
    
plt.matshow(confusion_matrix, cmap=plt.cm.gray)
plt.show()

row_sums = confusion_matrix.sum(axis=1, keepdims=True)
norm_conf_mx = confusion_matrix / row_sums
np.fill_diagonal(norm_conf_mx, 0)
plt.matshow(norm_conf_mx, cmap=plt.cm.gray)
plt.show()   

In [None]:
y_rf_pred = clf_rf.predict(X_test)
accuracy_score(y_test, y_rf_pred)

In [None]:
print (precision_score(y_tr, y_tr_pred, average = None))
print (precision_score(y_tr, y_tr_pred, average = 'weighted'))

In [None]:
print(recall_score(y_tr, y_tr_pred, average = None))
print(recall_score(y_tr, y_tr_pred, average = 'weighted'))

In [None]:
print(f1_score(y_tr, y_tr_pred, average = None))
print(f1_score(y_tr, y_tr_pred, average = 'weighted'))