# Initial Set Up

In [1]:
import idx2numpy

X_train = idx2numpy.convert_from_file('train-images-idx3-ubyte')
y_train = idx2numpy.convert_from_file('train-labels-idx1-ubyte')
X_test = idx2numpy.convert_from_file('t10k-images-idx3-ubyte')
y_test = idx2numpy.convert_from_file('t10k-labels-idx1-ubyte')

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


In [2]:
# flatten the image.
# turn the data into a (samples, feature) matrix:
X_train = X_train.reshape((len(X_train), -1))
X_test = X_test.reshape((len(X_test), -1))

print(X_train.shape)
print(X_test.shape)

(60000, 784)
(10000, 784)


# MNIST Models
## Logistic Regression

In [3]:
# create logistic regression object
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(solver='lbfgs')
logreg.fit(X_train, y_train)

# test values
y_predicted = logreg.predict(X_test)
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(y_test, y_predicted)
classification_report = classification_report(y_test, y_predicted)
accuracy = accuracy_score(y_test, y_predicted)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 958    0    0    4    0    3    5    2    6    2]
 [   0 1116    3    1    0    1    4    1    8    1]
 [   8   12  906   18    9    5   10   11   50    3]
 [   3    0   19  916    2   23    5   11   24    7]
 [   1    2    5    3  910    0   11    2   10   38]
 [  11    2    1   40   10  756   16    8   40    8]
 [   7    3    7    2    4   17  911    1    6    0]
 [   3    6   24    4    7    1    1  946    5   31]
 [   9   15    7   22   11   26    7   12  854   11]
 [   9    6    2   13   30    4    0   26   16  903]]

Classification report
             precision    recall  f1-score   support

          0       0.95      0.98      0.96       980
          1       0.96      0.98      0.97      1135
          2       0.93      0.88      0.90      1032
          3       0.90      0.91      0.90      1010
          4       0.93      0.93      0.93       982
          5       0.90      0.85      0.88       892
          6       0.94      0.95      0.95       958
    

## One Hidden Layer MLP (50 nodes)

In [4]:
#import
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.neural_network import MLPClassifier
#create and fit mlp
mlp = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(50), random_state=1)
mlp.fit(X_train, y_train)

# test values
y_predicted = mlp.predict(X_test)
# print confusion matrix
confusion = confusion_matrix(y_test, y_predicted)
classification_report = classification_report(y_test, y_predicted)
accuracy = accuracy_score(y_test, y_predicted)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 877    0    7    2    0   60   10    1   23    0]
 [   0 1094   12    3    0    1    2    4   18    1]
 [   5    0  926    4   10    3   13    5   59    7]
 [   0    1   91  817    0   41    1   12   36   11]
 [   1    0    2    1  654    2   11    3    8  300]
 [   5    1    4   94    3  641    9    4  113   18]
 [   5    3    5    2   21   11  882    0   28    1]
 [   1    4   74    5    2    2    0  891    8   41]
 [   4    1   56   18    1   23    6    4  837   24]
 [   3    6    1   14   19    7    0   13   18  928]]

Classification report
             precision    recall  f1-score   support

          0       0.97      0.89      0.93       980
          1       0.99      0.96      0.97      1135
          2       0.79      0.90      0.84      1032
          3       0.85      0.81      0.83      1010
          4       0.92      0.67      0.77       982
          5       0.81      0.72      0.76       892
          6       0.94      0.92      0.93       958
    

## One Hidden Layer MLP (100 nodes)

In [5]:
#import
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.neural_network import MLPClassifier
#create and fit mlp
mlp = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(100), random_state=1)
mlp.fit(X_train, y_train)

# test values
y_predicted = mlp.predict(X_test)
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(y_test, y_predicted)
classification_report = classification_report(y_test, y_predicted)
accuracy = accuracy_score(y_test, y_predicted)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 942    1    6    2    0    5   12    6    6    0]
 [   0 1109    3    3    0    1    2    1   15    1]
 [   5    3  976   15    7    1    4   10    8    3]
 [   1    3   13  945    0   16    1    3   21    7]
 [   0    2   13    0  922    1    4    2    6   32]
 [   4    1    1   27    2  818   13    2   18    6]
 [   5    4    5    0    3   10  923    2    6    0]
 [   1    4   20    5    6    4    0  956    4   28]
 [   4    0    7    8    8   10    4    4  919   10]
 [   4    5    3    8   27    6    0    6   14  936]]

Classification report
             precision    recall  f1-score   support

          0       0.98      0.96      0.97       980
          1       0.98      0.98      0.98      1135
          2       0.93      0.95      0.94      1032
          3       0.93      0.94      0.93      1010
          4       0.95      0.94      0.94       982
          5       0.94      0.92      0.93       892
          6       0.96      0.96      0.96       958
    

## One Hidden Layer MLP (400 nodes)

In [7]:
#import
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.neural_network import MLPClassifier
#create and fit mlp
mlp = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(400), random_state=1)
mlp.fit(X_train, y_train)

# test values
y_predicted = mlp.predict(X_test)
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(y_test, y_predicted)
classification_report = classification_report(y_test, y_predicted)
accuracy = accuracy_score(y_test, y_predicted)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 965    0    2    2    1    2    3    2    2    1]
 [   0 1121    3    2    0    2    2    3    2    0]
 [   5    3  994    6    4    0    2    6   10    2]
 [   1    0    3  981    0    8    0    2    5   10]
 [   1    0    5    0  952    1    2    4    2   15]
 [   4    0    2   15    2  849    8    1    4    7]
 [   4    2    2    1    6   11  927    1    4    0]
 [   1    4   10    3    3    0    0  991    3   13]
 [   2    0    3   13    4    7    5    3  930    7]
 [   0    4    1    2   10    5    2    8    5  972]]

Classification report
             precision    recall  f1-score   support

          0       0.98      0.98      0.98       980
          1       0.99      0.99      0.99      1135
          2       0.97      0.96      0.97      1032
          3       0.96      0.97      0.96      1010
          4       0.97      0.97      0.97       982
          5       0.96      0.95      0.96       892
          6       0.97      0.97      0.97       958
    

## Two Hidden Layer MLP (100 and 50 nodes)

In [8]:
#import
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.neural_network import MLPClassifier
#create and fit mlp
mlp = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(100, 50), random_state=1)
mlp.fit(X_train, y_train)

# test values
y_predicted = mlp.predict(X_test)
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(y_test, y_predicted)
classification_report = classification_report(y_test, y_predicted)
accuracy = accuracy_score(y_test, y_predicted)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 964    0    3    0    2    2    1    5    2    1]
 [   0 1120    6    1    0    1    2    0    2    3]
 [   7    1  990   17    3    1    2    5    6    0]
 [   0    1    9  976    1    8    0    3    7    5]
 [   1    0    7    0  937    1    8    0    3   25]
 [   4    0    1   12    0  857    5    0    7    6]
 [   5    2    0    0    7    4  936    0    4    0]
 [   2    3   10    2    0    0    0  994    4   13]
 [   5    1    6    7    3    8    2    7  933    2]
 [   3    3    0    7   18   13    1   10    4  950]]

Classification report
             precision    recall  f1-score   support

          0       0.97      0.98      0.98       980
          1       0.99      0.99      0.99      1135
          2       0.96      0.96      0.96      1032
          3       0.95      0.97      0.96      1010
          4       0.96      0.95      0.96       982
          5       0.96      0.96      0.96       892
          6       0.98      0.98      0.98       958
    

## Questions
### 1. Which model gives the best accuracy? Which the best overall F1 score?
The model with the highest accuracy is the one-hidden layer MLP with 400 nodes, with 96.8% accuracy. The two-hidden layer MLP model is a close second, with 96.6%. Both these models are tied for best overall F1 score, with a score of .97. The 100-node single-hidden layer MLP is close, with an F1 score of .94.
### 2. Which model gives the worst accuracy? Which the worst overall F1 score?
The model with the lowest accuracy is the one-hidden layer MLP with 50 nodes, with 85.5% accuracy. This is significantly worse than the next-worse model, the logistic regression model, which has an accuracy of 91.8%. The 50-node single-hidden layer MLP also has the lowest F1 score of .85. The next-worse F1 score, which belongs to the logistic regression model, is .92.
### 3. What is the shape of the training set? How many nodes are in the input layer of the network?
The shape of the training set is 60,000 x 784. There are 784 nodes in the input layer of the network, because that is the number of parameters for each item in the set.
### 4. Look the documentation for MLPClassifier. Why are we using lbfgs solver? Look up l-bfgs and provide a description of what it does.
In the MLPClassifier documentation, it states that the lbfgs solver is "an optimizer in the family of quasi-Newton methods." In small datasets, it converges faster than the default solver, which is why we're using it for this set. 

More specifically, limited-memory BFGS approximates the Broyden–Fletcher–Goldfarb–Shanno (BFGS) algorithm. As a quasi-Newton method, the BFGS algorithm replaces the exact inverse Hessian matrix used in Newton's method with an approximation. L-BFGS stores a less dense approximation of the approximated inverse Hessian matrix than the original BFGS algorithm, which results in a smaller memory requirement and is therefore better suited to problems with large numbers of variables.

L-BFGS starts with an initial estimation of the best value, and then iterately improves that estimate. The search direction is equal to the inverse Hessian matrix * the current derivative.

(sources:   http://aria42.com/blog/2014/12/understanding-lbfgs, https://en.wikipedia.org/wiki/Limited-memory_BFGS, https://en.wikipedia.org/wiki/Quasi-Newton_method)
### 5. Why do you think the best/worst networks are that way?
Intuitively, it makes sense that the more nodes there are in a neural network, the more the network can learn about the training set, which would make it more accurate at predicting the results from that dataset (although there is always the risk of overtraining). The 400-node single-hidden-layer network and the two-hidden-layer network both have more nodes overall than the other MLPs. However, the two-hidden-layer MLP has significantly fewer total nodes (150) than the 400-node network with a relatively small (less than 0.3%) reduction in accuracy. This would seem to indicate that adding depth is more effective than adding width. In the interest of making a fair comparison, I ran a one-hidden-layer MLP with 150 nodes (the same total number of nodes as the two-hidden-layer MLP) below:

In [36]:
#import
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.neural_network import MLPClassifier
#create and fit mlp
mlp = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(150), random_state=1)
mlp.fit(X_train, y_train)

# test values
y_predicted = mlp.predict(X_test)
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(y_test, y_predicted)
classification_report = classification_report(y_test, y_predicted)
accuracy = accuracy_score(y_test, y_predicted)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 955    0    4    1    3    4    3    3    5    2]
 [   0 1114    3    3    0    2    3    2    8    0]
 [  10    5  969   10    2    1    7   10   16    2]
 [   1    1   12  965    0    9    2    6   11    3]
 [   0    3    4    0  936    0    9    4    4   22]
 [   4    1    1   17    2  841    9    2   11    4]
 [   6    2    2    1    8    6  928    0    5    0]
 [   2    5   16    8    7    0    0  969    4   17]
 [   5    2   13   13    6    7    6    8  911    3]
 [   3    5    1    8   17    7    2   15   10  941]]

Classification report
             precision    recall  f1-score   support

          0       0.97      0.97      0.97       980
          1       0.98      0.98      0.98      1135
          2       0.95      0.94      0.94      1032
          3       0.94      0.96      0.95      1010
          4       0.95      0.95      0.95       982
          5       0.96      0.94      0.95       892
          6       0.96      0.97      0.96       958
    

As predicted, this MLP was better than the 50-node (85.5% accuracy) and 100-node (94.4%) MLPs, but worse than the 400-node (96.8%) and two-hidden-layer (96.6%) MLP.

# Tensorflow
### 6. Experiment and try to create a better performing network using tensorflow. Explain what you tried and document the results.
#### Initial Model

In [48]:
#import tensorflow
import tensorflow as tf

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)
    
# using example code from https://www.tensorflow.org/get_started/mnist/beginners
# reimport data for use in tensorflow
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# define model
x = tf.placeholder(tf.float32, [None, 784])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.nn.softmax(tf.matmul(x, W) + b)

# cross-entropy
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))

# learning rate at 0.5
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

# make new session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# train - run the training step 1000 times
for _ in range(1000):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In the cell above, I built and trained the model using the code directly from the tensorflow beginner MNIST tutorial, using a learning rate of 0.5, with a batch size of 100, and running the training step 1000 times. These were the default values in the tutorial. The model is evaluated below using the evaluation method in the tensorflow tutorial:

In [49]:
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))

0.9071


In order to get more information on the results of the model, I imported the sklearn confusion matrix, classification report, and the accuracy score (it's worth noting that the sklearn accuracy score and the accuracy from the tensorflow tutorial come up with the same result). I will be using the sklearn metrics to judge the rest of the models.

In [50]:
## using sklearn to get metrics
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
predictions = tf.argmax(y, 1)
known = tf.argmax(y_,1)
predict, actual = sess.run([predictions, known], feed_dict={x: mnist.test.images, y_: mnist.test.labels})
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(actual, predict)
classification_report = classification_report(actual, predict)
accuracy = accuracy_score(actual, predict)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 961    0    3    1    0    1    7    1    6    0]
 [   0 1099    3    5    1    2    5    1   19    0]
 [  10    0  903   16   15    1   16   19   43    9]
 [   3    0   23  904    1   30    5   12   22   10]
 [   2    4    5    0  908    1   11    1   10   40]
 [  15    5    5   42   20  713   21   14   48    9]
 [  18    3    4    2   10   16  899    1    5    0]
 [   3   20   33    3   13    0    0  921    5   30]
 [   7    8   10   15    7   20   13   12  872   10]
 [  11    5    4    9   44   18    0   16   11  891]]

Classification report
             precision    recall  f1-score   support

          0       0.93      0.98      0.96       980
          1       0.96      0.97      0.96      1135
          2       0.91      0.88      0.89      1032
          3       0.91      0.90      0.90      1010
          4       0.89      0.92      0.91       982
          5       0.89      0.80      0.84       892
          6       0.92      0.94      0.93       958
    

#### Testing the variables in isolation
For the models below, I tested some different values for the learning rate, batch size, and number of batches that are run. For the first set of models, I adjusted one of these at a time while leaving the others as the default values.

##### Adjusting Learning Rate

In [55]:
# learning rate at 0.1
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(cross_entropy)

# make new session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# train
for _ in range(1000):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## using sklearn to get metrics
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
predictions = tf.argmax(y, 1)
known = tf.argmax(y_,1)
predict, actual = sess.run([predictions, known], feed_dict={x: mnist.test.images, y_: mnist.test.labels})
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(actual, predict)
classification_report = classification_report(actual, predict)
accuracy = accuracy_score(actual, predict)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 950    0    3    1    1    0   16    1    8    0]
 [   0 1106    2    4    1    0    5    1   16    0]
 [  13    8  867   19   21    0   28   23   44    9]
 [   4    2   25  902    1    0   10   17   31   18]
 [   1    8    5    0  883    0   15    1    6   63]
 [  82   26    4  295   67    0   49   35  287   47]
 [  22    3    5    2    7    0  908    0   11    0]
 [   3   28   33    1   12    0    4  900    3   44]
 [   8   14   14   34   12    0   18   13  842   19]
 [  18   10    7   14   52    0    3   24   14  867]]

Classification report
             precision    recall  f1-score   support

          0       0.86      0.97      0.91       980
          1       0.92      0.97      0.95      1135
          2       0.90      0.84      0.87      1032
          3       0.71      0.89      0.79      1010
          4       0.84      0.90      0.87       982
          5       0.00      0.00      0.00       892
          6       0.86      0.95      0.90       958
    

In [52]:
# learning rate at 0.25
train_step = tf.train.GradientDescentOptimizer(0.25).minimize(cross_entropy)

# make new session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# train - run the training step 1000 times
for _ in range(1000):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## using sklearn to get metrics
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
predictions = tf.argmax(y, 1)
known = tf.argmax(y_,1)
predict, actual = sess.run([predictions, known], feed_dict={x: mnist.test.images, y_: mnist.test.labels})
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(actual, predict)
classification_report = classification_report(actual, predict)
accuracy = accuracy_score(actual, predict)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 962    0    2    1    0    0    6    1    8    0]
 [   0 1107    2    4    1    0    4    1   16    0]
 [  13    1  899   18   14    0   18   23   37    9]
 [   6    0   26  898    1   25    5   16   21   12]
 [   2    5    5    0  896    1   12    1    9   51]
 [  29   12    7   77   20  640   24   16   57   10]
 [  19    3    4    2    7   16  901    0    6    0]
 [   4   20   36    2   12    0    1  919    5   29]
 [  10   10   13   24   11   21   16   16  838   15]
 [  14    7    7   13   44   13    1   22    8  880]]

Classification report
             precision    recall  f1-score   support

          0       0.91      0.98      0.94       980
          1       0.95      0.98      0.96      1135
          2       0.90      0.87      0.88      1032
          3       0.86      0.89      0.88      1010
          4       0.89      0.91      0.90       982
          5       0.89      0.72      0.80       892
          6       0.91      0.94      0.93       958
    

In [53]:
# learning rate at 0.75
train_step = tf.train.GradientDescentOptimizer(0.75).minimize(cross_entropy)

# make new session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# train - run the training step 1000 times
for _ in range(1000):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## using sklearn to get metrics
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
predictions = tf.argmax(y, 1)
known = tf.argmax(y_,1)
predict, actual = sess.run([predictions, known], feed_dict={x: mnist.test.images, y_: mnist.test.labels})
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(actual, predict)
classification_report = classification_report(actual, predict)
accuracy = accuracy_score(actual, predict)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 964    0    0    2    1    2    8    1    2    0]
 [   0 1103    2    6    1    1    5    2   15    0]
 [  14    2  884   20   15    0   17   26   41   13]
 [   4    1   16  925    1   20    4   10   17   12]
 [   3    2    3    0  899    0   13    1    5   56]
 [  17    3    2   46   15  726   24   10   38   11]
 [  18    3    4    3   11   12  905    1    1    0]
 [   4   17   23    4   11    0    0  927    3   39]
 [  10    9    6   27    8   17   15   16  853   13]
 [  14    5    3   11   25   14    1   13    7  916]]

Classification report
             precision    recall  f1-score   support

          0       0.92      0.98      0.95       980
          1       0.96      0.97      0.97      1135
          2       0.94      0.86      0.90      1032
          3       0.89      0.92      0.90      1010
          4       0.91      0.92      0.91       982
          5       0.92      0.81      0.86       892
          6       0.91      0.94      0.93       958
    

In [62]:
# learning rate at 0.75
train_step = tf.train.GradientDescentOptimizer(1).minimize(cross_entropy)

# make new session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# train - run the training step 1000 times
for _ in range(1000):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## using sklearn to get metrics
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
predictions = tf.argmax(y, 1)
known = tf.argmax(y_,1)
predict, actual = sess.run([predictions, known], feed_dict={x: mnist.test.images, y_: mnist.test.labels})
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(actual, predict)
classification_report = classification_report(actual, predict)
accuracy = accuracy_score(actual, predict)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 962    0    1    1    1    2   10    1    2    0]
 [   0 1106    3    5    0    2    5    1   13    0]
 [  14    0  909   14   12    0   20   16   38    9]
 [   4    0   23  910    1   27    5    8   19   13]
 [   2    2    3    0  895    0   17    2    6   55]
 [  15    3    3   37   14  742   21    8   39   10]
 [  14    3    4    2    9   14  911    0    1    0]
 [   5   18   31    3   11    0    0  914    4   42]
 [   7    9    6   18    8   16   14   13  874    9]
 [   9    6    3    9   23   16    1    7    9  926]]

Classification report
             precision    recall  f1-score   support

          0       0.93      0.98      0.96       980
          1       0.96      0.97      0.97      1135
          2       0.92      0.88      0.90      1032
          3       0.91      0.90      0.91      1010
          4       0.92      0.91      0.92       982
          5       0.91      0.83      0.87       892
          6       0.91      0.95      0.93       958
    

Adjusting the learning rate in isolation seems to result in worse results as the size of the learning rate decreases. This makes sense, as a smaller learning rate means it will take longer to reach the minimum, so the training could be terminating before the estimated minimum is reached.
##### Adjusting Batch Size

In [56]:
# learning rate at 0.5
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

# make new session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# train
for _ in range(1000):
    batch_xs, batch_ys = mnist.train.next_batch(1000)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## using sklearn to get metrics
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
predictions = tf.argmax(y, 1)
known = tf.argmax(y_,1)
predict, actual = sess.run([predictions, known], feed_dict={x: mnist.test.images, y_: mnist.test.labels})
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(actual, predict)
classification_report = classification_report(actual, predict)
accuracy = accuracy_score(actual, predict)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 964    0    1    2    1    1    6    1    4    0]
 [   0 1100    2    6    1    2    5    1   18    0]
 [  13    0  893   18   15    0   17   24   42   10]
 [   4    0   24  900    1   28    5   15   22   11]
 [   3    4    4    0  911    1   11    1    9   38]
 [  18    5    4   38   19  721   23   11   44    9]
 [  18    3    4    2    9   16  900    1    5    0]
 [   4   20   26    2   12    0    0  931    5   28]
 [   9    9   10   18    8   16   13   17  866    8]
 [  12    7    3    9   45   18    1   17    8  889]]

Classification report
             precision    recall  f1-score   support

          0       0.92      0.98      0.95       980
          1       0.96      0.97      0.96      1135
          2       0.92      0.87      0.89      1032
          3       0.90      0.89      0.90      1010
          4       0.89      0.93      0.91       982
          5       0.90      0.81      0.85       892
          6       0.92      0.94      0.93       958
    

In [57]:
# learning rate at 0.5
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

# make new session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# train
for _ in range(1000):
    batch_xs, batch_ys = mnist.train.next_batch(10)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## using sklearn to get metrics
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
predictions = tf.argmax(y, 1)
known = tf.argmax(y_,1)
predict, actual = sess.run([predictions, known], feed_dict={x: mnist.test.images, y_: mnist.test.labels})
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(actual, predict)
classification_report = classification_report(actual, predict)
accuracy = accuracy_score(actual, predict)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 961    0    1    0    1    1    9    2    5    0]
 [   0 1102    1    6    1    0    6    2   16    1]
 [  18    1  849   35   14    0   27   29   43   16]
 [   4    0   14  904    1   29    6   19   13   20]
 [   1    5    3    0  865    0   19    1    5   83]
 [  19    5    2   62   25  657   34   14   61   13]
 [  16    3    4    3    8    7  915    0    2    0]
 [   4   17   19    3   12    0    2  917    5   49]
 [  14   10    7   27   13   13   20   17  821   32]
 [  15    6    3   10   33    6    2    9    6  919]]

Classification report
             precision    recall  f1-score   support

          0       0.91      0.98      0.95       980
          1       0.96      0.97      0.96      1135
          2       0.94      0.82      0.88      1032
          3       0.86      0.90      0.88      1010
          4       0.89      0.88      0.88       982
          5       0.92      0.74      0.82       892
          6       0.88      0.96      0.92       958
    

In [58]:
# learning rate at 0.5
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

# make new session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# train
for _ in range(1000):
    batch_xs, batch_ys = mnist.train.next_batch(50)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## using sklearn to get metrics
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
predictions = tf.argmax(y, 1)
known = tf.argmax(y_,1)
predict, actual = sess.run([predictions, known], feed_dict={x: mnist.test.images, y_: mnist.test.labels})
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(actual, predict)
classification_report = classification_report(actual, predict)
accuracy = accuracy_score(actual, predict)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 962    0    1    2    1    2    8    1    3    0]
 [   0 1105    1    6    1    2    5    1   14    0]
 [  15    5  882   17   17    0   20   25   39   12]
 [   3    1   15  905    1   29    6   17   21   12]
 [   1    4    3    0  907    1   13    1    9   43]
 [  20    5    2   38   19  720   26   11   42    9]
 [  15    3    4    2   10   15  906    1    2    0]
 [   4   21   25    3   13    0    0  929    2   31]
 [  11    9    7   23    9   18   16   18  850   13]
 [  13    8    3    9   40   19    1   20    5  891]]

Classification report
             precision    recall  f1-score   support

          0       0.92      0.98      0.95       980
          1       0.95      0.97      0.96      1135
          2       0.94      0.85      0.89      1032
          3       0.90      0.90      0.90      1010
          4       0.89      0.92      0.91       982
          5       0.89      0.81      0.85       892
          6       0.91      0.95      0.92       958
    

Adjusting the batch size alone seems to have a minimal effect on the accuracy of the model.

##### Adjusting Number of Batches Run

In [59]:
# learning rate at 0.5
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

# make new session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# train
for _ in range(10000):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## using sklearn to get metrics
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
predictions = tf.argmax(y, 1)
known = tf.argmax(y_,1)
predict, actual = sess.run([predictions, known], feed_dict={x: mnist.test.images, y_: mnist.test.labels})
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(actual, predict)
classification_report = classification_report(actual, predict)
accuracy = accuracy_score(actual, predict)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 963    0    2    2    0    2    9    1    1    0]
 [   0 1111    3    4    0    2    4    2    9    0]
 [  14    1  922   10   15    2   14   14   32    8]
 [   3    0   24  923    1   19    3   10   18    9]
 [   2    3    5    0  915    0   13    2    5   37]
 [  11    3    6   38   12  760   14   10   30    8]
 [  11    3    4    2    7   10  917    1    3    0]
 [   3    9   28    4    8    0    0  951    4   21]
 [   4    5    5   16    8   20   11   14  889    2]
 [  12    6    3   11   23   13    0   16   10  915]]

Classification report
             precision    recall  f1-score   support

          0       0.94      0.98      0.96       980
          1       0.97      0.98      0.98      1135
          2       0.92      0.89      0.91      1032
          3       0.91      0.91      0.91      1010
          4       0.93      0.93      0.93       982
          5       0.92      0.85      0.88       892
          6       0.93      0.96      0.94       958
    

In [60]:
# learning rate at 0.5
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

# make new session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# train
for _ in range(100):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## using sklearn to get metrics
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
predictions = tf.argmax(y, 1)
known = tf.argmax(y_,1)
predict, actual = sess.run([predictions, known], feed_dict={x: mnist.test.images, y_: mnist.test.labels})
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(actual, predict)
classification_report = classification_report(actual, predict)
accuracy = accuracy_score(actual, predict)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 946    0    3    1    0    0   17    1   12    0]
 [   0 1096    7    3    1    0    5    1   22    0]
 [  14   15  868   19   20    0   31   19   35   11]
 [   5    4   33  874    1    0   10   16   40   27]
 [   3    9    4    0  821    0   21    2   12  110]
 [  83   30    9  301   52    0   48   35  272   62]
 [  19    3   13    2   10    0  892    0   19    0]
 [   5   38   31    0   12    0    4  878    5   55]
 [  11   15   16   40    9    0   21   13  823   26]
 [  17   12   10   12   40    0    3   25   16  874]]

Classification report
             precision    recall  f1-score   support

          0       0.86      0.97      0.91       980
          1       0.90      0.97      0.93      1135
          2       0.87      0.84      0.86      1032
          3       0.70      0.87      0.77      1010
          4       0.85      0.84      0.84       982
          5       0.00      0.00      0.00       892
          6       0.85      0.93      0.89       958
    

In [61]:
# learning rate at 0.5
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

# make new session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# train
for _ in range(500):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## using sklearn to get metrics
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
predictions = tf.argmax(y, 1)
known = tf.argmax(y_,1)
predict, actual = sess.run([predictions, known], feed_dict={x: mnist.test.images, y_: mnist.test.labels})
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(actual, predict)
classification_report = classification_report(actual, predict)
accuracy = accuracy_score(actual, predict)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 962    0    2    0    0    0    7    1    8    0]
 [   0 1102    2    4    1    1    4    0   21    0]
 [  13    2  892   18   12    0   17   23   42   13]
 [   6    1   21  890    1   34    6   14   22   15]
 [   3    8    4    0  866    1   13    1   15   71]
 [  22   14    6   37   19  684   23   13   64   10]
 [  19    3    4    1    6   16  899    0   10    0]
 [   5   21   35    2    9    0    1  910    6   39]
 [   8    9   11   16    7   18   15   12  865   13]
 [  15    8    5    9   30   16    1   17   13  895]]

Classification report
             precision    recall  f1-score   support

          0       0.91      0.98      0.95       980
          1       0.94      0.97      0.96      1135
          2       0.91      0.86      0.89      1032
          3       0.91      0.88      0.90      1010
          4       0.91      0.88      0.90       982
          5       0.89      0.77      0.82       892
          6       0.91      0.94      0.92       958
    

Adjusting how long the training runs seems to have the largest effect of any one element on the accuracy of the model. Setting the value to 10,000 gave resulted in the most accurate model so far (92.7%).

#### Testing the interaction of two variables
In the initial testing, the following seemed to hold true:
* Larger learning rates result in better scores. This may be the result of the smaller learning rates not being given enough runtime to reach the minimum
* Batch sizes alone do not have a large result on the model's accuracy, but larger batch sizes seem to be slightly better
* Increasing the number of batches run seems to improve the model (this may result in overtraining)

In this next step, I will examine how adjustments to these variables interact with each other. Because the number of training steps seems to have the largest affect on the model, I will focus on adjustments which include adjustments to this variable.
##### Learning Rate and Training Steps

In [64]:
train_step = tf.train.GradientDescentOptimizer(0.75).minimize(cross_entropy)

# make new session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# train - run the training step 10000 times
for _ in range(10000):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## using sklearn to get metrics
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
predictions = tf.argmax(y, 1)
known = tf.argmax(y_,1)
predict, actual = sess.run([predictions, known], feed_dict={x: mnist.test.images, y_: mnist.test.labels})
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(actual, predict)
classification_report = classification_report(actual, predict)
accuracy = accuracy_score(actual, predict)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 964    0    2    2    1    1    7    1    2    0]
 [   0 1108    4    5    0    2    4    2   10    0]
 [  14    1  926    7   16    3   13   13   31    8]
 [   2    0   25  926    1   16    3    9   18   10]
 [   2    2    6    0  918    0   11    2    4   37]
 [  10    3    7   35   11  758   14   11   35    8]
 [  13    3    5    2    8   10  912    1    4    0]
 [   3    7   27    5    9    0    0  944    5   28]
 [   4    4    5   16    8   17   11   13  892    4]
 [  12    5    3   11   20   12    0   13   10  923]]

Classification report
             precision    recall  f1-score   support

          0       0.94      0.98      0.96       980
          1       0.98      0.98      0.98      1135
          2       0.92      0.90      0.91      1032
          3       0.92      0.92      0.92      1010
          4       0.93      0.93      0.93       982
          5       0.93      0.85      0.89       892
          6       0.94      0.95      0.94       958
    

In [65]:
train_step = tf.train.GradientDescentOptimizer(0.25).minimize(cross_entropy)

# make new session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# train - run the training step 30000 times
for _ in range(30000):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## using sklearn to get metrics
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
predictions = tf.argmax(y, 1)
known = tf.argmax(y_,1)
predict, actual = sess.run([predictions, known], feed_dict={x: mnist.test.images, y_: mnist.test.labels})
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(actual, predict)
classification_report = classification_report(actual, predict)
accuracy = accuracy_score(actual, predict)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 963    0    1    2    1    1    9    1    2    0]
 [   0 1104    4    5    1    2    4    2   13    0]
 [  14    1  925    7   16    4   13   13   32    7]
 [   2    0   24  924    1   18    3   10   20    8]
 [   2    2    3    1  928    0   12    2    4   28]
 [  11    3    5   28   12  765   15   11   36    6]
 [  12    3    4    2    8   10  914    1    4    0]
 [   3    6   26    5    8    0    0  954    6   20]
 [   4    4    5   16    9   16   11   15  893    1]
 [  12    5    3   11   30   11    0   20   15  902]]

Classification report
             precision    recall  f1-score   support

          0       0.94      0.98      0.96       980
          1       0.98      0.97      0.98      1135
          2       0.93      0.90      0.91      1032
          3       0.92      0.91      0.92      1010
          4       0.92      0.95      0.93       982
          5       0.93      0.86      0.89       892
          6       0.93      0.95      0.94       958
    

In [70]:
train_step = tf.train.GradientDescentOptimizer(0.25).minimize(cross_entropy)

# make new session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# train - run the training step 10000 times
for _ in range(10000):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## using sklearn to get metrics
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
predictions = tf.argmax(y, 1)
known = tf.argmax(y_,1)
predict, actual = sess.run([predictions, known], feed_dict={x: mnist.test.images, y_: mnist.test.labels})
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(actual, predict)
classification_report = classification_report(actual, predict)
accuracy = accuracy_score(actual, predict)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 960    0    2    2    1    3    9    1    2    0]
 [   0 1106    2    6    1    2    4    2   12    0]
 [  14    1  910   12   15    2   14   18   39    7]
 [   3    1   20  922    1   23    2   10   19    9]
 [   2    1    2    1  926    0   13    2    4   31]
 [  10    3    6   37   14  753   15   11   35    8]
 [  14    3    5    2    9   11  908    1    5    0]
 [   5   11   25    5   10    0    0  943    3   26]
 [   4    7    5   21    9   16   11   15  883    3]
 [   9    6    3   11   36   16    0   13    9  906]]

Classification report
             precision    recall  f1-score   support

          0       0.94      0.98      0.96       980
          1       0.97      0.97      0.97      1135
          2       0.93      0.88      0.90      1032
          3       0.90      0.91      0.91      1010
          4       0.91      0.94      0.92       982
          5       0.91      0.84      0.88       892
          6       0.93      0.95      0.94       958
    

In [67]:
train_step = tf.train.GradientDescentOptimizer(1).minimize(cross_entropy)

# make new session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# train - run the training step 10000 times
for _ in range(30000):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## using sklearn to get metrics
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
predictions = tf.argmax(y, 1)
known = tf.argmax(y_,1)
predict, actual = sess.run([predictions, known], feed_dict={x: mnist.test.images, y_: mnist.test.labels})
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(actual, predict)
classification_report = classification_report(actual, predict)
accuracy = accuracy_score(actual, predict)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 962    0    2    1    0    2    9    2    2    0]
 [   0 1110    4    4    0    1    5    2    9    0]
 [  11    2  932    7   16    5   12   12   29    6]
 [   3    0   22  920    0   21    5    9   21    9]
 [   2    3    5    0  918    0   12    2    7   33]
 [   8    2    5   28   14  773   13    8   34    7]
 [  13    3    5    2    7    9  917    1    1    0]
 [   3    7   28    4    9    1    0  944    5   27]
 [   4    6    4   16    7   16    9   10  899    3]
 [  11    5    2    7   21    9    1   13   14  926]]

Classification report
             precision    recall  f1-score   support

          0       0.95      0.98      0.96       980
          1       0.98      0.98      0.98      1135
          2       0.92      0.90      0.91      1032
          3       0.93      0.91      0.92      1010
          4       0.93      0.93      0.93       982
          5       0.92      0.87      0.89       892
          6       0.93      0.96      0.94       958
    

Unsurprisingly, based on the results from the previous section, it appears that increasing the learning rate and number of times the loop is run also improves the accuracy of the model (93% is the new highest accuracy). Presumably at some point the learning rate will become too large and negatively affect the result.

##### Batch Size and Training Steps

In [69]:
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

# make new session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# train - run the training step 10000 times
for _ in range(30000):
    batch_xs, batch_ys = mnist.train.next_batch(1000)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## using sklearn to get metrics
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
predictions = tf.argmax(y, 1)
known = tf.argmax(y_,1)
predict, actual = sess.run([predictions, known], feed_dict={x: mnist.test.images, y_: mnist.test.labels})
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(actual, predict)
classification_report = classification_report(actual, predict)
accuracy = accuracy_score(actual, predict)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 962    0    2    2    0    2    9    2    1    0]
 [   0 1110    4    4    0    2    4    2    9    0]
 [  12    1  929    7   18    6   10   13   29    7]
 [   2    0   24  924    0   18    4    9   19   10]
 [   2    2    4    1  924    0   11    2    4   32]
 [   8    3    3   31   12  768   15    9   36    7]
 [  12    3    4    2    8   12  915    1    1    0]
 [   3    7   27    4   10    1    0  951    4   21]
 [   4    6    4   19    7   18   11   15  888    2]
 [  12    5    2   11   25   11    1   14   12  916]]

Classification report
             precision    recall  f1-score   support

          0       0.95      0.98      0.96       980
          1       0.98      0.98      0.98      1135
          2       0.93      0.90      0.91      1032
          3       0.92      0.91      0.92      1010
          4       0.92      0.94      0.93       982
          5       0.92      0.86      0.89       892
          6       0.93      0.96      0.94       958
    

In [71]:
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

# make new session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# train - run the training step 10000 times
for _ in range(10000):
    batch_xs, batch_ys = mnist.train.next_batch(1000)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## using sklearn to get metrics
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
predictions = tf.argmax(y, 1)
known = tf.argmax(y_,1)
predict, actual = sess.run([predictions, known], feed_dict={x: mnist.test.images, y_: mnist.test.labels})
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(actual, predict)
classification_report = classification_report(actual, predict)
accuracy = accuracy_score(actual, predict)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 961    0    2    2    1    1   10    1    2    0]
 [   0 1106    2    6    1    2    4    2   12    0]
 [  14    1  921   10   16    3   12   14   33    8]
 [   2    0   24  926    1   19    3    9   17    9]
 [   2    2    3    1  926    0   12    2    3   31]
 [  11    3    6   33   13  760   15   11   32    8]
 [  12    3    5    2    8   11  913    1    3    0]
 [   3    7   27    5    8    0    0  952    5   21]
 [   4    4    5   17    9   19   11   14  889    2]
 [  11    5    3   11   30   14    0   15   11  909]]

Classification report
             precision    recall  f1-score   support

          0       0.94      0.98      0.96       980
          1       0.98      0.97      0.98      1135
          2       0.92      0.89      0.91      1032
          3       0.91      0.92      0.92      1010
          4       0.91      0.94      0.93       982
          5       0.92      0.85      0.88       892
          6       0.93      0.95      0.94       958
    

In [73]:
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

# make new session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# train - run the training step 10000 times
for _ in range(5000):
    batch_xs, batch_ys = mnist.train.next_batch(500)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## using sklearn to get metrics
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
predictions = tf.argmax(y, 1)
known = tf.argmax(y_,1)
predict, actual = sess.run([predictions, known], feed_dict={x: mnist.test.images, y_: mnist.test.labels})
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(actual, predict)
classification_report = classification_report(actual, predict)
accuracy = accuracy_score(actual, predict)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 960    0    2    2    1    3    9    1    2    0]
 [   0 1105    2    6    1    2    4    2   13    0]
 [  15    1  910   13   15    2   15   16   38    7]
 [   3    1   20  922    1   24    2   10   17   10]
 [   2    1    3    1  914    0   15    2    4   40]
 [  11    3    5   38   13  755   15   10   34    8]
 [  14    3    4    2    8   12  911    1    3    0]
 [   5   11   25    5    9    0    0  944    3   26]
 [   4    6    5   22    9   20   12   14  879    3]
 [   9    6    3   12   30   16    0   13    8  912]]

Classification report
             precision    recall  f1-score   support

          0       0.94      0.98      0.96       980
          1       0.97      0.97      0.97      1135
          2       0.93      0.88      0.91      1032
          3       0.90      0.91      0.91      1010
          4       0.91      0.93      0.92       982
          5       0.91      0.85      0.87       892
          6       0.93      0.95      0.94       958
    

"Make everything bigger" seems like a crude approach toward improving the model, and so far none of the adjustments have created more than a couple percentage points of improvement in the model, but it is the best approach I have, based on the results of other testing.

##### Putting It All Together
The following models involve adjustment of all three parameters, using my best guesses based on the previous results. Using the MLPClassifier, the highest accuracy was 96.8%, and so far none of the tensorflow models have come close to reaching it (the highest accuracy in the tensorflow models so far has been 93.0%)

In [75]:
train_step = tf.train.GradientDescentOptimizer(1).minimize(cross_entropy)

# make new session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# train - run the training step 30000 times
for _ in range(30000):
    batch_xs, batch_ys = mnist.train.next_batch(1000)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## using sklearn to get metrics
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
predictions = tf.argmax(y, 1)
known = tf.argmax(y_,1)
predict, actual = sess.run([predictions, known], feed_dict={x: mnist.test.images, y_: mnist.test.labels})
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(actual, predict)
classification_report = classification_report(actual, predict)
accuracy = accuracy_score(actual, predict)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 961    0    2    2    0    2   10    2    1    0]
 [   0 1109    4    4    0    2    5    2    9    0]
 [  10    2  933   10   16    4   11   13   27    6]
 [   2    0   22  931    0   15    5    8   16   11]
 [   2    2    4    1  923    0   11    2    4   33]
 [   8    3    5   33   15  771   13    7   31    6]
 [  13    3    6    2    7   10  915    1    1    0]
 [   3    6   27    6    9    1    0  950    4   22]
 [   4    6    5   20    8   19   10   14  884    4]
 [  12    5    2    8   22    9    1   14   12  924]]

Classification report
             precision    recall  f1-score   support

          0       0.95      0.98      0.96       980
          1       0.98      0.98      0.98      1135
          2       0.92      0.90      0.91      1032
          3       0.92      0.92      0.92      1010
          4       0.92      0.94      0.93       982
          5       0.93      0.86      0.89       892
          6       0.93      0.96      0.94       958
    

Interestingly, despite seeing that batch size increase slightly improves the model in earlier testing, this resulted in the exact same accuracy (out to four decimal places) as this same setup with 1/10th the batch size resulted in earlier.

In [76]:
train_step = tf.train.GradientDescentOptimizer(.25).minimize(cross_entropy)

# make new session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# train - run the training step 30000 times
for _ in range(30000):
    batch_xs, batch_ys = mnist.train.next_batch(1000)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## using sklearn to get metrics
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
predictions = tf.argmax(y, 1)
known = tf.argmax(y_,1)
predict, actual = sess.run([predictions, known], feed_dict={x: mnist.test.images, y_: mnist.test.labels})
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(actual, predict)
classification_report = classification_report(actual, predict)
accuracy = accuracy_score(actual, predict)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 962    0    2    2    0    2    9    1    2    0]
 [   0 1110    4    3    0    2    4    2   10    0]
 [  12    1  925    7   16    6   13   13   31    8]
 [   2    0   24  925    1   19    3    9   18    9]
 [   2    2    3    1  924    0   12    2    4   32]
 [   9    3    4   31   12  766   15   11   34    7]
 [  12    3    4    2    8   11  914    1    3    0]
 [   3    7   26    5    8    0    0  953    5   21]
 [   4    4    5   17    8   19   11   14  890    2]
 [  12    5    3   11   26   12    0   15   11  914]]

Classification report
             precision    recall  f1-score   support

          0       0.94      0.98      0.96       980
          1       0.98      0.98      0.98      1135
          2       0.93      0.90      0.91      1032
          3       0.92      0.92      0.92      1010
          4       0.92      0.94      0.93       982
          5       0.92      0.86      0.89       892
          6       0.93      0.95      0.94       958
    

In [77]:
train_step = tf.train.GradientDescentOptimizer(1.5).minimize(cross_entropy)

# make new session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# train - run the training step 30000 times
for _ in range(50000):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## using sklearn to get metrics
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
predictions = tf.argmax(y, 1)
known = tf.argmax(y_,1)
predict, actual = sess.run([predictions, known], feed_dict={x: mnist.test.images, y_: mnist.test.labels})
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(actual, predict)
classification_report = classification_report(actual, predict)
accuracy = accuracy_score(actual, predict)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 963    0    2    2    0    2    8    2    1    0]
 [   0 1109    4    4    0    2    4    2    9    1]
 [  11    4  926   13   15    3   13   10   31    6]
 [   2    0   20  932    0   16    3    6   22    9]
 [   2    4    4    1  919    0   13    2    5   32]
 [   8    1    5   30   13  776   11    7   35    6]
 [  12    3    5    2    7   17  909    1    2    0]
 [   3    7   24    6    8    1    0  951    4   24]
 [   4    6    3   19    6   21    7   12  891    5]
 [  11    5    2    9   21   10    1   16   14  920]]

Classification report
             precision    recall  f1-score   support

          0       0.95      0.98      0.96       980
          1       0.97      0.98      0.98      1135
          2       0.93      0.90      0.91      1032
          3       0.92      0.92      0.92      1010
          4       0.93      0.94      0.93       982
          5       0.92      0.87      0.89       892
          6       0.94      0.95      0.94       958
    

In [78]:
train_step = tf.train.GradientDescentOptimizer(1.5).minimize(cross_entropy)

# make new session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# train - run the training step 30000 times
for _ in range(30000):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## using sklearn to get metrics
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
predictions = tf.argmax(y, 1)
known = tf.argmax(y_,1)
predict, actual = sess.run([predictions, known], feed_dict={x: mnist.test.images, y_: mnist.test.labels})
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(actual, predict)
classification_report = classification_report(actual, predict)
accuracy = accuracy_score(actual, predict)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 959    0    2    2    0    2   11    2    2    0]
 [   0 1108    4    4    0    2    5    2    9    1]
 [   9    4  935    9   15    4   12   10   28    6]
 [   2    0   22  921    0   23    4    8   19   11]
 [   2    2    5    0  917    0   12    2    5   37]
 [   8    1    5   28   14  775   12    7   35    7]
 [  11    3    5    2    7   12  916    1    1    0]
 [   3    6   27    6    9    1    0  944    4   28]
 [   4    6    5   17    7   21    8    9  891    6]
 [  12    5    2    8   19    8    1   11   13  930]]

Classification report
             precision    recall  f1-score   support

          0       0.95      0.98      0.96       980
          1       0.98      0.98      0.98      1135
          2       0.92      0.91      0.91      1032
          3       0.92      0.91      0.92      1010
          4       0.93      0.93      0.93       982
          5       0.91      0.87      0.89       892
          6       0.93      0.96      0.94       958
    

It appears increasing the learning rate to 1.5 reduces the accuracy of the model

In [79]:
train_step = tf.train.GradientDescentOptimizer(1.25).minimize(cross_entropy)

# make new session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# train - run the training step 30000 times
for _ in range(30000):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## using sklearn to get metrics
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
predictions = tf.argmax(y, 1)
known = tf.argmax(y_,1)
predict, actual = sess.run([predictions, known], feed_dict={x: mnist.test.images, y_: mnist.test.labels})
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(actual, predict)
classification_report = classification_report(actual, predict)
accuracy = accuracy_score(actual, predict)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 962    0    2    1    0    2    9    2    2    0]
 [   0 1108    4    4    0    2    5    2    9    1]
 [  14    2  934    9   16    0   11   13   27    6]
 [   3    0   22  928    0   14    5    8   19   11]
 [   2    2    5    0  919    0   12    2    4   36]
 [  10    1    5   32   15  767   12    8   35    7]
 [  12    3    6    2    8    8  917    1    1    0]
 [   3    6   26    4    9    1    0  954    4   21]
 [   5    5    5   19    9   15    9   15  889    3]
 [  12    5    2    8   21    8    1   21   11  920]]

Classification report
             precision    recall  f1-score   support

          0       0.94      0.98      0.96       980
          1       0.98      0.98      0.98      1135
          2       0.92      0.91      0.91      1032
          3       0.92      0.92      0.92      1010
          4       0.92      0.94      0.93       982
          5       0.94      0.86      0.90       892
          6       0.93      0.96      0.95       958
    

In [80]:
train_step = tf.train.GradientDescentOptimizer(1.25).minimize(cross_entropy)

# make new session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# train - run the training step 30000 times
for _ in range(50000):
    batch_xs, batch_ys = mnist.train.next_batch(500)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## using sklearn to get metrics
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
predictions = tf.argmax(y, 1)
known = tf.argmax(y_,1)
predict, actual = sess.run([predictions, known], feed_dict={x: mnist.test.images, y_: mnist.test.labels})
# print confusion matrix, classificiation report, accuracy
confusion = confusion_matrix(actual, predict)
classification_report = classification_report(actual, predict)
accuracy = accuracy_score(actual, predict)
print('Confusion matrix')
print(confusion)
print('\nClassification report')
print(classification_report)
print ('Accuracy')
print(accuracy)

Confusion matrix
[[ 961    0    2    2    0    2   10    2    1    0]
 [   0 1110    4    4    0    1    5    2    9    0]
 [  11    2  930   10   16    3   12   13   29    6]
 [   2    0   21  930    0   16    4    8   19   10]
 [   2    2    4    1  921    0   12    2    5   33]
 [   8    1    5   34   15  769   13    7   34    6]
 [  12    3    6    2    7    9  917    1    1    0]
 [   3    6   26    6    9    1    0  947    4   26]
 [   4    6    4   20    7   16    9   12  891    5]
 [  12    5    2    9   22    7    1   14   13  924]]

Classification report
             precision    recall  f1-score   support

          0       0.95      0.98      0.96       980
          1       0.98      0.98      0.98      1135
          2       0.93      0.90      0.91      1032
          3       0.91      0.92      0.92      1010
          4       0.92      0.94      0.93       982
          5       0.93      0.86      0.90       892
          6       0.93      0.96      0.94       958
    

Ultimately, the highest accuracy I was able to obtain was 93%, and the way I did it seems a little crude.
I strongly suspect that my results were largely the result of being new to tensorflow, and if I had a better understanding of the tools available, the tensorflow models might have beaten the scikit-learn models.

### JD-comments

Excellent Analysis.  You are right about tensorflow and the quality of these networks.  It is possible to make better analysis of MNIST with tensorflow using specialized types of networks (Convolutional NN) but for a small problem like this ~97% accuracy for a few minutes training is a really good result.