In [1]:
from __future__ import print_function
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import random

# Config the matplotlib backend as plotting inline in IPython
%matplotlib inline

def summary(ndarr):
    print(ndarr)
    print("* shape: {}".format(ndarr.shape))
    print("* min: {}".format(np.min(ndarr)))
    print("* max: {}".format(np.max(ndarr)))
    print("* avg: {}".format(np.mean(ndarr)))
    print("* std: {}".format(np.std(ndarr)))
    print("* unique: {}".format(np.unique(ndarr)))

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

train_data = mnist.train
valid_data = mnist.validation
test_data = mnist.test

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [105]:
class CNNLogisticClassification(object):
    def __init__(self,shape_picture,n_labels,learning_rate=0.5,dropout_ratio=0.5,alpha=0.0):
        self.shape_picture = shape_picture
        self.n_labels = n_labels
        self.graph = tf.Graph()
        self.build(learning_rate,dropout_ratio,alpha)
        self.sess = tf.Session(graph=self.graph)
        
    def build(self,learning_rate,dropout_ratio,alpha):
        with self.graph.as_default():
            ### Input
            self.train_pictures = tf.placeholder(tf.float32, shape=[None]+self.shape_picture)
            self.train_labels   = tf.placeholder(tf.int32  , shape=(None,self.n_labels))
            
            ### Variable
            self.weights = {
                'conv1': tf.Variable(tf.truncated_normal(shape=(5,5,1,6),stddev=0.1)), 
                    # batch,28,28,3 => batch,24,24,6
                'conv3': tf.Variable(tf.truncated_normal(shape=(5,5,6,16),stddev=0.1)),
                    # batch,12,12,6 => batch,8,8,16
                'fc6':   tf.Variable(tf.truncated_normal(shape=(4*4*16,84),stddev=0.1)),
                    # batch,4x4x16 => batch,84
                'fc7':   tf.Variable(tf.truncated_normal(shape=(84,self.n_labels),stddev=0.1)),
                    # batch,84 => batch,10
            }
            self.biases  = {
                'conv1': tf.Variable(tf.zeros( shape=(6) )),
                'conv3': tf.Variable(tf.zeros( shape=(16) )),
                'fc6':   tf.Variable(tf.zeros( shape=(84) )),
                'fc7':   tf.Variable(tf.zeros( shape=(self.n_labels) )),
            } 

            ### Optimalization
            self.y_,self.original_loss = self.structure(pictures=self.train_pictures,
                                                        labels=self.train_labels,
                                                        dropout_ratio=dropout_ratio,
                                                        train=True, )
            
            self.regularization = tf.reduce_mean(
                                   [tf.nn.l2_loss(w)/tf.cast(tf.shape(w)[0]*tf.shape(w)[1],tf.float32)
                                        for k,w in self.weights.items()])
            self.loss = self.original_loss + alpha * self.regularization
            
            self.train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(self.loss)
            
            ### Prediction
            self.new_pictures = tf.placeholder(tf.float32, shape=[None]+self.shape_picture)
            self.new_labels   = tf.placeholder(tf.int32  , shape=(None,self.n_labels))
            self.new_y_,self.new_loss = self.structure(pictures=self.new_pictures,
                                                       labels=self.new_labels,)
            
            ### Initialization
            self.init_op = tf.global_variables_initializer()
    
    def structure(self,pictures,labels,dropout_ratio=None,train=False):
        conv1 = self.getConv2DLayer(pictures,self.weights['conv1'],self.biases['conv1'],activation=tf.nn.relu)
        pool2 = tf.nn.max_pool(conv1,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID')
        conv3 = self.getConv2DLayer(pool2,self.weights['conv3'],self.biases['conv3'],activation=tf.nn.relu)
        pool4 = tf.nn.max_pool(conv3,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID')
        fatten5 = self.getFlattenLayer(pool4)
        if train: fatten5 = tf.nn.dropout(fatten5,keep_prob=1-dropout_ratio[0])
        
        fc6 = self.getDenseLayer(fatten5,self.weights['fc6'],self.biases['fc6'],activation=tf.nn.relu)
        if train: fc6 = tf.nn.dropout(fc6,keep_prob=1-dropout_ratio[1])
            
        logits = self.getDenseLayer(fc6,self.weights['fc7'],self.biases['fc7'])
        
        y_ = tf.nn.softmax(logits)
        loss = tf.reduce_mean(
                 tf.nn.softmax_cross_entropy_with_logits(labels=labels,logits=logits))

        return (y_,loss)
    
    def getDenseLayer(self,input_layer,weight,bias,activation=None):
        x = tf.add(tf.matmul(input_layer,weight),bias)
        if activation:
            x = activation(x)
        return x
    
    def getConv2DLayer(self,input_layer,weight,bias,strides=(1,1),padding='VALID',activation=None):
        x = tf.add(
              tf.nn.conv2d(input_layer,weight,[1,strides[0],strides[1],1], padding=padding),bias)
        if activation:
            x = activation(x)
        return x

    def getFlattenLayer(self,input_layer):
        shape = input_layer.get_shape().as_list()
        n = 1
        for s in shape[1:]:
            n *= s
        x = tf.reshape(input_layer,[-1,n])
        return x
    
    def fit(self,X,y,epochs=10,validation_data=None,test_data=None,batch_size=None):
        X = self._check_array(X)
        y = self._check_array(y)
        
        N = X.shape[0]
        random.seed(9000)
        if not batch_size: batch_size=N
        
        self.sess.run(self.init_op)
        for epoch in range(epochs):
            print("Epoch %2d/%2d: "%(epoch+1,epochs))
            
            # batch gradient descent
            index = [i for i in range(N)]
            random.shuffle(index)
            while len(index)>0:
                index_size = len(index)
                batch_index = [index.pop() for _ in range(min(batch_size,index_size))]    
            
                feed_dict = {
                    self.train_pictures: X[batch_index,:], 
                    self.train_labels: y[batch_index], 
                }
                _, loss = self.sess.run([self.train_op, self.loss], feed_dict=feed_dict)
                
                print("[%d/%d] loss = %.4f     " % ( N-len(index), N, loss ), end='\r')

            
            # evaluate at the end of this epoch
            y_ = self.predict(X)
            train_loss = self.evaluate(X,y)
            train_acc = self.accuracy(y_,y)
            msg = "[%d/%d] loss = %8.4f, acc = %3.2f%%" % ( N, N, train_loss, train_acc*100 )
            
            if validation_data:
                val_loss = self.evaluate(validation_data[0],validation_data[1])
                val_acc = self.accuracy(self.predict(validation_data[0]),validation_data[1])
                msg += ", val_loss = %8.4f, val_acc = %3.2f%%" % ( val_loss, val_acc*100 )
            
            print(msg)
            
            
        if test_data:
            test_acc = self.accuracy(self.predict(test_data[0]),test_data[1])
            print("test_acc = %3.2f%%" % (test_acc*100))
            
    def accuracy(self, predictions, labels):
        return (np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))/predictions.shape[0])
    
    def predict(self,X):
        X = self._check_array(X)
        return self.sess.run(self.new_y_, feed_dict={self.new_pictures: X})
    
    def evaluate(self,X,y):
        X = self._check_array(X)
        y = self._check_array(y)
        return self.sess.run(self.new_loss, feed_dict={self.new_pictures: X, self.new_labels: y})
    
    def _check_array(self,ndarray):
        ndarray = np.array(ndarray)
        if len(ndarray.shape)==1: ndarray = np.reshape(ndarray,(1,ndarray.shape[0]))
        return ndarray
    

In [127]:
model = CNNLogisticClassification(   shape_picture=[28,28,1],
                                     n_labels=10,
                                     learning_rate=0.06,
                                     dropout_ratio=[0.2,0.6],
                                     alpha=0.1,
                                 )
model.fit(X=np.reshape(train_data.images,[-1,28,28,1]),
          y=train_data.labels,
          epochs=10,
          validation_data=(np.reshape(valid_data.images,[-1,28,28,1]),valid_data.labels),
          test_data=(np.reshape(test_data.images,[-1,28,28,1]),test_data.labels),
          batch_size = 32,
         )

Epoch  1/10: 
[55000/55000] loss =   0.1285, acc = 96.14%, val_loss =   0.1263, val_acc = 96.30%
Epoch  2/10: 
[55000/55000] loss =   0.0840, acc = 97.32%, val_loss =   0.0826, val_acc = 97.38%
Epoch  3/10: 
[55000/55000] loss =   0.0822, acc = 97.49%, val_loss =   0.0801, val_acc = 97.84%
Epoch  4/10: 
[55000/55000] loss =   0.0533, acc = 98.37%, val_loss =   0.0589, val_acc = 98.32%
Epoch  5/10: 
[55000/55000] loss =   0.0470, acc = 98.55%, val_loss =   0.0537, val_acc = 98.36%
Epoch  6/10: 
[55000/55000] loss =   0.0502, acc = 98.46%, val_loss =   0.0577, val_acc = 98.70%
Epoch  7/10: 
[55000/55000] loss =   0.0465, acc = 98.57%, val_loss =   0.0587, val_acc = 98.36%
Epoch  8/10: 
[55000/55000] loss =   0.0423, acc = 98.67%, val_loss =   0.0543, val_acc = 98.44%
Epoch  9/10: 
[55000/55000] loss =   0.0441, acc = 98.65%, val_loss =   0.0546, val_acc = 98.60%
Epoch 10/10: 
[55000/55000] loss =   0.0398, acc = 98.75%, val_loss =   0.0501, val_acc = 98.46%
test_acc = 98.87%
