In [7]:
import tensorflow as tf
import math
import os
import numpy as np
import matplotlib.pyplot as plt
import pickle
import tqdm
from sklearn.model_selection import train_test_split

%matplotlib inline

In [9]:
with open('notMNIST.pickle', 'rb') as pfile:
    pickle_data = pickle.load(pfile)
    train_features = pickle_data['train_features']
    train_labels = pickle_data['train_labels']
    test_features = pickle_data['test_features']
    test_labels = pickle_data['test_labels']
    del pickle_data

In [11]:
features_count = 784
labels_count = 10

features = tf.placeholder(tf.float32)
labels = tf.placeholder(tf.float32)

weights = tf.Variable(tf.truncated_normal((features_count, labels_count)))
biases = tf.Variable(tf.zeros(labels_count))

In [12]:
train_features, valid_features, train_labels, valid_labels = train_test_split(train_features, 
                                                                             train_labels,
                                                                             test_size=0.05,
                                                                             random_state=10238)
print(train_features.shape)
print(valid_features.shape)

(199500, 784)
(10500, 784)


In [15]:
def train_nn(epochs, batch_size, learning_rate):

    logits = tf.matmul(features, weights) + biases
    prediction = tf.nn.softmax(logits)
    cross_entropy = -tf.reduce_sum(labels * tf.log(prediction), axis=1)
    loss = tf.reduce_mean(cross_entropy)
    is_correct_prediction = tf.equal(tf.argmax(labels, 1), tf.argmax(prediction, 1))
    accuracy = tf.reduce_mean(tf.cast(is_correct_prediction, tf.float32), axis=0)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
    init = tf.global_variables_initializer()
    
    batch_count = int(math.ceil(train_features.shape[0]/batch_size))
    
    with tf.Session() as session:
        session.run(init)
        
        for epoch_i in range(epochs):
            
            for batch_i in range(batch_count):
                batch_start = batch_i * batch_size
                batch_features = train_features[batch_start:batch_start + batch_size]
                batch_labels = train_labels[batch_start:batch_start + batch_size]
                
                session.run(optimizer, feed_dict={features:batch_features, labels:batch_labels})
                
        test_acc = session.run(accuracy, feed_dict={features:test_features, labels:test_labels})
        test_loss = session.run(loss, feed_dict={features:test_features, labels:test_labels})
        
    return test_loss, test_acc, weights, biases


In [20]:
epochs = [10, 20, 50, 80, 100]
batch_size = [50, 100, 200, 300, 500, 1000, 2000, 4000]
learning_rate = [0.01, 0.02, 0.05, 0.08, 0.1, 0.2, 0.3, 0.5, 0.8, 1]
# batch_size = [50, 100]
# learning_rate = [0.01, 0.02]
test_loss = {}
test_acc = {}
for ep in epochs:
    for bs in batch_size:
        for lr in learning_rate:
            test_loss[(ep, bs, lr)], test_acc[(ep, bs, lr)] = train_nn(ep, bs, lr)
            print('epochs = {}, batch_size = {}, learning_rate = {}, loss = {}, accuracy = {}'.format(
                ep, bs, lr, test_loss[(ep, bs, lr)], test_acc[(ep, bs, lr)]))

epochs = 10, batch_size = 50, learning_rate = 0.01, loss = 0.7572305798530579, accuracy = 0.8413000106811523
epochs = 10, batch_size = 50, learning_rate = 0.02, loss = 0.6197127103805542, accuracy = 0.8508999943733215
epochs = 10, batch_size = 50, learning_rate = 0.05, loss = 0.5025433301925659, accuracy = 0.8707000017166138
epochs = 10, batch_size = 50, learning_rate = 0.08, loss = 0.4720931649208069, accuracy = 0.8792999982833862
epochs = 10, batch_size = 50, learning_rate = 0.1, loss = 0.47540849447250366, accuracy = 0.8730999827384949
epochs = 10, batch_size = 50, learning_rate = 0.2, loss = 0.5066466927528381, accuracy = 0.8665000200271606
epochs = 10, batch_size = 50, learning_rate = 0.3, loss = 0.5743876695632935, accuracy = 0.8561999797821045
epochs = 10, batch_size = 50, learning_rate = 0.5, loss = nan, accuracy = 0.10000000149011612
epochs = 10, batch_size = 50, learning_rate = 0.8, loss = nan, accuracy = 0.10000000149011612
epochs = 10, batch_size = 50, learning_rate = 1, lo

epochs = 10, batch_size = 4000, learning_rate = 0.8, loss = nan, accuracy = 0.10000000149011612
epochs = 10, batch_size = 4000, learning_rate = 1, loss = nan, accuracy = 0.10000000149011612
epochs = 20, batch_size = 50, learning_rate = 0.01, loss = 0.6119176745414734, accuracy = 0.8528000116348267
epochs = 20, batch_size = 50, learning_rate = 0.02, loss = 0.5218746662139893, accuracy = 0.8675000071525574
epochs = 20, batch_size = 50, learning_rate = 0.05, loss = 0.4471932649612427, accuracy = 0.8835999965667725
epochs = 20, batch_size = 50, learning_rate = 0.08, loss = 0.4359244108200073, accuracy = 0.8865000009536743
epochs = 20, batch_size = 50, learning_rate = 0.1, loss = 0.4329553246498108, accuracy = 0.8840000033378601
epochs = 20, batch_size = 50, learning_rate = 0.2, loss = 0.4789034128189087, accuracy = 0.876800000667572
epochs = 20, batch_size = 50, learning_rate = 0.3, loss = 0.5738382935523987, accuracy = 0.8546000123023987
epochs = 20, batch_size = 50, learning_rate = 0.5, 

epochs = 20, batch_size = 4000, learning_rate = 0.3, loss = 0.8125772476196289, accuracy = 0.8374999761581421
epochs = 20, batch_size = 4000, learning_rate = 0.5, loss = nan, accuracy = 0.10000000149011612
epochs = 20, batch_size = 4000, learning_rate = 0.8, loss = nan, accuracy = 0.10000000149011612
epochs = 20, batch_size = 4000, learning_rate = 1, loss = nan, accuracy = 0.10000000149011612
epochs = 50, batch_size = 50, learning_rate = 0.01, loss = 0.5021077990531921, accuracy = 0.8716999888420105
epochs = 50, batch_size = 50, learning_rate = 0.02, loss = 0.44617706537246704, accuracy = 0.8822000026702881
epochs = 50, batch_size = 50, learning_rate = 0.05, loss = 0.41445982456207275, accuracy = 0.8931999802589417
epochs = 50, batch_size = 50, learning_rate = 0.08, loss = 0.42003974318504333, accuracy = 0.8902000188827515
epochs = 50, batch_size = 50, learning_rate = 0.1, loss = 0.4252353012561798, accuracy = 0.8899000287055969
epochs = 50, batch_size = 50, learning_rate = 0.2, loss =

epochs = 50, batch_size = 4000, learning_rate = 0.2, loss = 0.6974086165428162, accuracy = 0.8468999862670898
epochs = 50, batch_size = 4000, learning_rate = 0.3, loss = 0.6387179493904114, accuracy = 0.8539999723434448
epochs = 50, batch_size = 4000, learning_rate = 0.5, loss = 0.8545836210250854, accuracy = 0.8066999912261963
epochs = 50, batch_size = 4000, learning_rate = 0.8, loss = nan, accuracy = 0.10000000149011612
epochs = 50, batch_size = 4000, learning_rate = 1, loss = nan, accuracy = 0.10000000149011612
epochs = 80, batch_size = 50, learning_rate = 0.01, loss = 0.4557301700115204, accuracy = 0.879800021648407
epochs = 80, batch_size = 50, learning_rate = 0.02, loss = 0.42014697194099426, accuracy = 0.8888000249862671
epochs = 80, batch_size = 50, learning_rate = 0.05, loss = nan, accuracy = 0.10000000149011612
epochs = 80, batch_size = 50, learning_rate = 0.08, loss = 0.41420161724090576, accuracy = 0.8931999802589417
epochs = 80, batch_size = 50, learning_rate = 0.1, loss =

epochs = 80, batch_size = 4000, learning_rate = 0.1, loss = nan, accuracy = 0.10000000149011612
epochs = 80, batch_size = 4000, learning_rate = 0.2, loss = 0.59742671251297, accuracy = 0.8586999773979187
epochs = 80, batch_size = 4000, learning_rate = 0.3, loss = 0.5323696732521057, accuracy = 0.8615000247955322
epochs = 80, batch_size = 4000, learning_rate = 0.5, loss = 0.7001679539680481, accuracy = 0.8353000283241272
epochs = 80, batch_size = 4000, learning_rate = 0.8, loss = nan, accuracy = 0.10000000149011612
epochs = 80, batch_size = 4000, learning_rate = 1, loss = nan, accuracy = 0.10000000149011612
epochs = 100, batch_size = 50, learning_rate = 0.01, loss = 0.4371536672115326, accuracy = 0.8848999738693237
epochs = 100, batch_size = 50, learning_rate = 0.02, loss = 0.4119434058666229, accuracy = 0.8909000158309937
epochs = 100, batch_size = 50, learning_rate = 0.05, loss = 0.40827706456184387, accuracy = 0.8949999809265137
epochs = 100, batch_size = 50, learning_rate = 0.08, lo

KeyboardInterrupt: 