In [2]:
import numpy as np
import tensorflow as tf

In [3]:
data = np.genfromtxt('mnist_train.csv',delimiter=',',dtype=np.float32)

In [4]:
data.shape

(60000, 785)

In [5]:
data[0,0]

5.0

In [6]:
x_train = data[:,1:]
y_train = data[:,0]


In [8]:
y_train = (np.arange(10) == y_train[:,None]).astype(np.float32)

In [9]:
y_train.shape

(60000, 10)

In [10]:
np.arange(2)

array([0, 1])

In [20]:
validation_size=500
IMAGE_SIZE = 28
batch_size = 128

In [12]:
x_val, x_train = x_train[:validation_size, :], x_train[validation_size:,:]
y_val, y_train = y_train[:validation_size, :], y_train[validation_size:,:]

In [15]:
x_train = x_train.reshape(len(x_train),IMAGE_SIZE, IMAGE_SIZE,1)
x_val = x_val.reshape(len(x_val), IMAGE_SIZE, IMAGE_SIZE, 1)

In [22]:
class Model:
    def __init__(self,batch_size=128, learning_rate=1e-4, num_labels=10):
        self._batch_size=batch_size
        self._learning_rate=learning_rate
        self._num_labels = num_labels
        
    def _create_conv2d(self,x,w):
        return tf.nn.conv2d(input=x, filter=w, strides=[1,1,1,1], padding='SAME')
    
    def _create_maxpool(self,x):
        return tf.nn.max_pool(value=x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
    
    def _create_weights(self,shape):
        return tf.Variable(initial_value=tf.truncated_normal(shape=shape, stddev=0.1, dtype=tf.float32))
    
    def _create_biases(self,shape):
        return tf.Variable(initial_value=tf.constant(1., shape=shape, dtype=tf.float32))
    
    def _activation_summary(self,x):
        tensor_name = x.op.name
        tf.summary.histogram(tensor_name+'/activations',x)
        tf.summary.scalar(tensor_name+'/sparsity',tf.nn.zero_fraction(x))
        
    def inference(self, image, keep_prob):
        with tf.variable_scope('conv1') as scope:
            kernel = self._create_weights([5,5,1,32])
            conv = self._create_conv2d(image,kernel)
            bias = self._create_biases([32])
            preactivation = tf.nn.bias_add(value=conv, bias=bias)
            conv1 = tf.nn.relu(preactivation, name=scope.name)
            self._activation_summary(conv1)
        
        pool1 = self._create_maxpool(conv1)
        
        with tf.variable_scope('conv2') as scope:
            kernel = self._create_weights([5,5,32,64])
            conv = self._create_conv2d(pool1,kernel)
            bias = self._create_biases([64])
            preactivation = tf.nn.bias_add(value=conv, bias=bias)
            conv2 = tf.nn.relu(preactivation, name=scope.name)
            self._activation_summary(conv2)
            
        pool2 = self._create_maxpool(conv2)
        
        with tf.variable_scope('local1') as scope:
            reshape = tf.reshape(pool2, shape=[-1,7*7*64])
            weights = self._create_weights([7*7*64,1024])
            biases = self._create_biases([1024])
            local1 = tf.nn.relu(tf.matmul(reshape,weights)+biases, name=scope.name)
            self._activation_summary(local1)
            
        with tf.variable_scope('local2_linear') as scope:
            W_fc2 = self._create_weights([1024, self._num_labels])
            b_fc2 = self._create_biases([self._num_labels])
            local1_drop = tf.nn.dropout(local1, keep_prob)
            local2 = tf.nn.bias_add(tf.matmul(local1_drop, W_fc2), b_fc2, name=scope.name)
            self._activation_summary(local2)
        return local2
    
    def train(self,loss,global_step):
        tf.summary.scalar('learning_rate',self._learning_rate)
        train_op = tf.train.AdamOptimizer(learning_rate=self._learning_rate).minimize(loss, global_step=global_step)
        
    def loss(self,logits, labels):
        
        with tf.variable_scope('loss') as scope:
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)
            cost = tf.reduce_mean(cross_entropy,name=scope.name)
        return cost
    
    def accuracy(self,logits,labels):
        with tf.variable_scope('accuracy') as scope:
            accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits,1),tf.argmax(labels,1)),dtype=tf.float32),name=scope.name)
            tf.summary.scalar('accuracy',accuracy)
        return accuracy

In [23]:
FLAGS = tf.app.flags.FLAGS
NUM_LABELS = 10

In [24]:
model=Model()

In [26]:
with tf.Graph().as_default():
    x=tf.placeholder(shape=[None,IMAGE_SIZE, IMAGE_SIZE, 1], dtype=tf.float32,name='x')
    y=tf.placeholder(shape=[None, NUM_LABELS], dtype=tf.float32,name='y')
    keep_prob = tf.placeholder(tf.float32,name='dropout_prob')
    global_step = tf.contrib.framework.get_or_create_global_step()
    
    logits = model.inference(x,keep_prob)
    loss=model.loss(labels=y,logits=logits)
    
    accuracy = model.accuracy(logits,y)
    summary_op = tf.summary.merge_all()
    train_op = model.train(loss=loss,global_step=global_step)
    
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    
    with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
        writer = tf.summary.FileWriter('cache',sess.graph)
        sess.run(init)
        
        for i in range(1000):
            offset = (i*batch_size)%len(x_train)
            batch_x,batch_y = x_train[offset:(offset + batch_size),:], y_train[offset:(offset+batch_size),:]
            _, cur_loss, summary = sess.run([train_op,loss,summary_op], feed_dict={x:batch_x, y:batch_y,keep_prob:0.5})
            writer.add_summary(summary,i)
            
            print(i,cur_loss)
            

TypeError: Fetch argument None has invalid type <class 'NoneType'>

In [32]:
    model = Model()

    with tf.Graph().as_default():
        

        x = tf.placeholder(shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1], dtype=tf.float32, name='x')
        y = tf.placeholder(shape=[None, NUM_LABELS], dtype=tf.float32, name='y')
        keep_prob = tf.placeholder(tf.float32, name='dropout_prob')
        global_step = tf.contrib.framework.get_or_create_global_step()

        logits = model.inference(x, keep_prob=keep_prob)
        loss = model.loss(logits=logits, labels=y)

        accuracy = model.accuracy(logits, y)
        summary_op = tf.summary.merge_all()
        train_op = model.train(loss, global_step=global_step)

        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

        with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
            writer = tf.summary.FileWriter('cache', sess.graph)
            sess.run(init)
            for i in range(1000):
                offset = (i * batch_size) % (len(x_train) - batch_size)
                batch_x, batch_y = x_train[offset:(offset + batch_size), :], y_train[offset:(offset + batch_size), :]

                _, cur_loss, summary = sess.run([train_op, loss, summary_op],
                                                feed_dict={x: batch_x, y: batch_y, keep_prob: 0.5})
                writer.add_summary(summary, i)
                print(i, cur_loss)

0 2134.8
1 2032.85
2 1819.03
3 1688.28
4 1242.32
5 1528.5
6 1406.81
7 1269.45
8 1312.89
9 1247.7
10 1092.42
11 1114.93
12 1064.75
13 1093.43
14 889.244
15 899.378
16 837.318
17 879.899
18 770.422
19 704.172
20 733.14
21 615.44
22 670.869
23 665.734
24 569.871
25 613.447
26 531.629
27 464.418
28 519.512
29 494.999
30 374.483
31 397.567
32 455.774
33 383.197
34 470.129
35 383.937
36 346.264
37 359.638
38 362.922
39 275.325
40 345.422
41 406.787
42 315.568
43 332.128
44 231.965
45 269.014
46 256.665
47 205.726
48 215.366
49 372.034
50 251.35
51 235.343
52 328.792
53 241.216
54 240.073
55 196.993
56 237.431
57 208.84
58 233.943
59 198.599
60 255.531
61 186.335
62 235.61
63 216.283
64 264.548
65 255.082
66 126.233
67 135.789
68 205.229
69 206.132
70 162.709
71 191.091
72 160.559
73 133.172
74 119.394
75 200.535
76 176.546
77 127.382
78 129.023
79 126.062
80 169.281
81 149.082
82 149.935
83 165.947
84 66.3887
85 121.488
86 150.788
87 164.87
88 161.357
89 116.819
90 86.7071
91 124.558
92 176.

In [31]:
class Model(object):
    def __init__(self, batch_size=128, learning_rate=1e-4, num_labels=10):
        self._batch_size = batch_size
        self._learning_rate = learning_rate
        self._num_labels = num_labels

    def inference(self, images, keep_prob):
        with tf.variable_scope('conv1') as scope:
            kernel = self._create_weights([5, 5, 1, 32])
            conv = self._create_conv2d(images, kernel)
            bias = self._create_bias([32])
            preactivation = tf.nn.bias_add(conv, bias)
            conv1 = tf.nn.relu(preactivation, name=scope.name)
            self._activation_summary(conv1)

        # pool 1
        h_pool1 = self._create_max_pool_2x2(conv1)

        with tf.variable_scope('conv2') as scope:
            kernel = self._create_weights([5, 5, 32, 64])
            conv = self._create_conv2d(h_pool1, kernel)
            bias = self._create_bias([64])
            preactivation = tf.nn.bias_add(conv, bias)
            conv2 = tf.nn.relu(preactivation, name=scope.name)
            self._activation_summary(conv2)

        # pool 2
        h_pool2 = self._create_max_pool_2x2(conv2)

        with tf.variable_scope('local1') as scope:
            reshape = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
            W_fc1 = self._create_weights([7 * 7 * 64, 1024])
            b_fc1 = self._create_bias([1024])
            local1 = tf.nn.relu(tf.matmul(reshape, W_fc1) + b_fc1, name=scope.name)
            self._activation_summary(local1)

        with tf.variable_scope('local2_linear') as scope:
            W_fc2 = self._create_weights([1024, self._num_labels])
            b_fc2 = self._create_bias([self._num_labels])
            local1_drop = tf.nn.dropout(local1, keep_prob)
            local2 = tf.nn.bias_add(tf.matmul(local1_drop, W_fc2), b_fc2, name=scope.name)
            self._activation_summary(local2)
        return local2

    def train(self, loss, global_step):
        tf.summary.scalar('learning_rate', self._learning_rate)
        train_op = tf.train.AdamOptimizer(self._learning_rate).minimize(loss, global_step=global_step)
        return train_op

    def loss(self, logits, labels):
        with tf.variable_scope('loss') as scope:
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)
            cost = tf.reduce_mean(cross_entropy, name=scope.name)
            tf.summary.scalar('cost', cost)

        return cost

    def accuracy(self, logits, y):
        with tf.variable_scope('accuracy') as scope:
            accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1)), dtype=tf.float32),
                                      name=scope.name)
            tf.summary.scalar('accuracy', accuracy)
        return accuracy

    def _create_conv2d(self, x, W):
        return tf.nn.conv2d(input=x,
                            filter=W,
                            strides=[1, 1, 1, 1],
                            padding='SAME')

    def _create_max_pool_2x2(self, input):
        return tf.nn.max_pool(value=input,
                              ksize=[1, 2, 2, 1],
                              strides=[1, 2, 2, 1],
                              padding='SAME')

    def _create_weights(self, shape):
        return tf.Variable(tf.truncated_normal(shape=shape, stddev=0.1, dtype=tf.float32))

    def _create_bias(self, shape):
        return tf.Variable(tf.constant(1., shape=shape, dtype=tf.float32))

    def _activation_summary(self, x):
        tensor_name = x.op.name
        tf.summary.histogram(tensor_name + '/activations', x)
        tf.summary.scalar(tensor_name + '/sparsity', tf.nn.zero_fraction(x))