In [1]:
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle

In [2]:
""" First, reload the data """
pickle_file = 'notMNIST.pickle'

with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    train_dataset = save['train_dataset']
    train_labels = save['train_labels']    
    valid_dataset = save['valid_dataset']
    valid_labels = save['valid_labels']    
    test_dataset = save['test_dataset']
    test_labels = save['test_labels']
    del save # hint to help gc free up memory
    print('Training info:', train_dataset.shape, train_labels.shape)
    print('Valid info:', valid_dataset.shape, valid_labels.shape)
    print('Test info:', test_dataset.shape, test_labels.shape)

Training info: (20000, 28, 28) (20000,)
Valid info: (10000, 28, 28) (10000,)
Test info: (10000, 28, 28) (10000,)


In [3]:
image_size = 28
num_labels = 10

def reformat(dataset, labels):
    dataset = dataset.reshape((-1, image_size*image_size)).astype(np.float32)
    # map 0 to [1.0, 0.0, 0.0 ...], 1 to [0.0, 1.0, 0.0, ...]
    labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
    return dataset,labels

train_dataset,train_labels = reformat(train_dataset,train_labels)
valid_dataset,valid_labels = reformat(valid_dataset,valid_labels)
test_dataset,test_labels = reformat(test_dataset,test_labels)

print('Training info:', train_dataset.shape, train_labels.shape)
print('Valid info:', valid_dataset.shape, valid_labels.shape)
print('Test info:', test_dataset.shape, test_labels.shape)

Training info: (20000, 784) (20000, 10)
Valid info: (10000, 784) (10000, 10)
Test info: (10000, 784) (10000, 10)


In [4]:
def accuracy(predictions, labels):
    accuracy = (100.0 * np.sum(np.argmax(predictions,1) == np.argmax(labels,1)) / predictions.shape[0])
    return accuracy

In [53]:
# http://www.cnblogs.com/hellocwh/p/5527141.html

batch_size = 256
do_dropout = True
do_regularization = True
do_lrd = True
count_hidden = 2

graph = tf.Graph()
with graph.as_default():
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size*image_size))
    tf_train_labels = tf.placeholder(tf.float32,shape=(batch_size,num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    
    # layer1
    layer_node_count = 1024
    hidden_stddev = np.sqrt(2.0 / 784)
    w1 = tf.Variable(tf.truncated_normal(shape=(image_size*image_size, layer_node_count),stddev=hidden_stddev))
    b1 = tf.Variable(tf.zeros([layer_node_count]))
    ## layer1 train
    output1 = tf.nn.relu(tf.matmul(tf_train_dataset, w1) + b1)
    ## layer1 valid
    valid_output1 = tf.nn.relu(tf.matmul(valid_dataset, w1) + b1)
    ## layer1 test
    test_output1 = tf.nn.relu(tf.matmul(test_dataset, w1) + b1)
    
    hidden = output1
    # train dropout
    hidden_dropout = output1
    if do_dropout:
        keep_prob = tf.placeholder(tf.float32)
        hidden_dropout = tf.nn.dropout(hidden, keep_prob=keep_prob)
    hidden_valid = valid_output1
    hidden_test = test_output1
    
    layer_node_count_cur = layer_node_count
    layer_node_count_next = layer_node_count_cur
    hidden_weights = []
    hidden_biases = []
    for layer_c in range(count_hidden):
        print('add layer:',layer_c)
        if layer_node_count_cur>2:
            layer_node_count_next = int(layer_node_count_cur/2)
        else:
            layer_node_count_next = 2
        hidden_stddev = np.sqrt(2.0 / layer_node_count_cur)
        hidden_w = tf.Variable(tf.truncated_normal(shape=(layer_node_count_cur, layer_node_count_next),stddev=hidden_stddev))
        hidden_b = tf.Variable(tf.zeros([layer_node_count_next]))
        hidden_weights.append(hidden_w)
        hidden_biases.append(hidden_biases)
        layer_node_count_cur = layer_node_count_next
        
        hidden = tf.nn.relu(tf.matmul(hidden, hidden_w) + hidden_b)
        if do_dropout:
            keep_prob += 0.5 * layer_c / (count_hidden + 1)
            hidden_dropout = tf.nn.dropout(hidden, keep_prob=keep_prob)
        hidden_valid = tf.nn.relu(tf.matmul(hidden_valid, hidden_w) + hidden_b)
        hidden_test = tf.nn.relu(tf.matmul(hidden_test, hidden_w) + hidden_b)
    
    # layer2
    w2 = tf.Variable(tf.truncated_normal(shape=(layer_node_count_next, num_labels),stddev=hidden_stddev / 2))
    b2 = tf.Variable(tf.zeros([num_labels]))
    
    logits = tf.matmul(hidden_dropout, w2) + b2
    logits_train = tf.matmul(hidden, w2) + b2
    logits_valid = tf.matmul(hidden_valid, w2) + b2
    logits_test = tf.matmul(hidden_test, w2) + b2
        
    #  regularization
    l2_loss = 0
    if do_regularization:
        l2_loss = tf.nn.l2_loss(w1)+tf.nn.l2_loss(w2) #+tf.nn.l2_loss(b2)+tf.nn.l2_loss(b1)
        for i in range(len(hidden_weights)):
            l2_loss += tf.nn.l2_loss(hidden_weights[i])
    beta = 1e-5
    l2_loss = l2_loss*beta
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels,logits=logits)) + l2_loss
    
    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
    
    # 学习率调整
    if do_lrd:
        cur_step = tf.Variable(0)
        starter_learning_rate = 0.4
        learning_rate = tf.train.exponential_decay(starter_learning_rate,cur_step,100000,0.96,staircase=True)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=cur_step)
    
    train_prediction = tf.nn.softmax(logits_train)
    valid_prediction = tf.nn.softmax(logits_valid)
    test_prediction = tf.nn.softmax(logits_test)

add layer: 0
add layer: 1


In [54]:
num_steps = 2001
with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    print('Initialized')
    for step in range(num_steps):
        offset = (step*batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset+batch_size),:]
        batch_labels = train_labels[offset:(offset+batch_size),:]
        feed_dict = {tf_train_dataset:batch_data,tf_train_labels:batch_labels,keep_prob:0.5}
        _, l, predictions = session.run([optimizer,loss,train_prediction], feed_dict=feed_dict)
        if step%100==0:
            print('Minibatch Loss at step %d: %f' % (step, l))
            print('Minibatch Training accuracy: %.f%%' % accuracy(predictions, batch_labels))
            print('Minibatch Validation accuracy:%.1f%%' % accuracy(valid_prediction.eval(),valid_labels))
    print('Test accuracy:%1.f%%' % accuracy(test_prediction.eval(), test_labels))

Initialized
Minibatch Loss at step 0: 2.333661
Minibatch Training accuracy: 10%
Minibatch Validation accuracy:23.8%
Minibatch Loss at step 100: 0.575901
Minibatch Training accuracy: 83%
Minibatch Validation accuracy:83.1%
Minibatch Loss at step 200: 0.427137
Minibatch Training accuracy: 86%
Minibatch Validation accuracy:84.2%
Minibatch Loss at step 300: 0.489078
Minibatch Training accuracy: 86%
Minibatch Validation accuracy:84.7%
Minibatch Loss at step 400: 0.315622
Minibatch Training accuracy: 91%
Minibatch Validation accuracy:85.5%
Minibatch Loss at step 500: 0.372993
Minibatch Training accuracy: 88%
Minibatch Validation accuracy:85.2%
Minibatch Loss at step 600: 0.268637
Minibatch Training accuracy: 93%
Minibatch Validation accuracy:85.0%
Minibatch Loss at step 700: 0.196315
Minibatch Training accuracy: 95%
Minibatch Validation accuracy:85.9%
Minibatch Loss at step 800: 0.186221
Minibatch Training accuracy: 96%
Minibatch Validation accuracy:84.6%
Minibatch Loss at step 900: 0.175658