In [75]:
from __future__ import print_function
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import random

# Config the matplotlib backend as plotting inline in IPython
%matplotlib inline

def summary(ndarr):
    print(ndarr)
    print("* shape: {}".format(ndarr.shape))
    print("* min: {}".format(np.min(ndarr)))
    print("* max: {}".format(np.max(ndarr)))
    print("* avg: {}".format(np.mean(ndarr)))
    print("* std: {}".format(np.std(ndarr)))
    print("* unique: {}".format(np.unique(ndarr)))

In [76]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

train_data = mnist.train
valid_data = mnist.validation
test_data = mnist.test

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [122]:
class DNNLogisticClassification(object):
    def __init__(self,n_features,n_labels,learning_rate=0.5,n_hidden=1000,dropout_ratio=0.5,alpha=0.0):
        self.n_features = n_features
        self.n_labels = n_labels
        self.graph = tf.Graph()
        self.build(learning_rate,n_hidden,dropout_ratio,alpha)
        self.sess = tf.Session(graph=self.graph)
        
    def build(self,learning_rate,n_hidden,dropout_ratio,alpha):
        with self.graph.as_default():
            ### Input
            self.features = tf.placeholder(tf.float32, shape=(None,self.n_features))
            self.labels   = tf.placeholder(tf.int32  , shape=(None,self.n_labels))
            self.keep_prob= tf.placeholder(tf.float32)
            self.train_keep_prob = 1-dropout_ratio
            
            ### Computation
            self.Weights = {}
            self.Biases  = {} 
            self.Scores  = {}
            self.Xs      = {}
            
            self.Weights['fc1'], self.Biases['fc1'], self.Scores['fc1'], self.Xs['fc1'] \
                      = self.getDenseLayer(self.features, n_hidden, activation=tf.nn.relu)
            
            self.drop_x2 = tf.nn.dropout(self.Xs['fc1'], keep_prob=self.keep_prob)
            
            self.Weights['fc2'], self.Biases['fc2'], self.Scores['fc2'], self.Xs['fc2'] \
                      = self.getDenseLayer(self.drop_x2, self.n_labels, activation=tf.nn.softmax)
                
            self.logits = self.Scores['fc2']
            self.y_ = self.Xs['fc2'] 
            
            
            ### Optimalization
            self.original_loss = tf.reduce_mean(
                            tf.nn.softmax_cross_entropy_with_logits(labels=self.labels,logits=self.logits))
            self.regularization = tf.reduce_mean(
                                   [tf.nn.l2_loss(w)/tf.cast(tf.shape(w)[0]*tf.shape(w)[1],tf.float32)
                                        for k,w in self.Weights.items()])
            self.loss = self.original_loss + alpha * self.regularization
            
            self.train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(self.loss)
            
            ### Initialization
            self.init_op = tf.global_variables_initializer()
            
    def getDenseLayer(self,input_layer,n_output,activation=None):
        n_input = int(input_layer.shape[1])
        W = tf.Variable(
                    tf.truncated_normal( shape=(n_input,n_output) ))
        b = tf.Variable(tf.zeros( shape=(n_output) ))
        s = tf.add(tf.matmul(input_layer,W),b)
        if activation:
            x_new = activation(s)
        else:
            x_new = None
        return (W,b,s,x_new)
    
    def fit(self,X,y,epochs=10,validation_data=None,test_data=None,batch_size=None):
        N = X.shape[0]
        random.seed(9000)
        if not batch_size: batch_size=N
        
        self.sess.run(self.init_op)
        for epoch in range(epochs):
            print("Epoch %2d/%2d: "%(epoch+1,epochs))
            index = random.shuffle([i for i in range(N)])

            k = 0
            while k < N:
                batch_index = [i for i in range(k,min(k+batch_size,N))]    
            
                feed_dict = {self.features: X[batch_index,:], 
                             self.labels: y[batch_index], 
                             self.keep_prob: self.train_keep_prob,}
                _, loss = self.sess.run([self.train_op, self.loss], feed_dict=feed_dict)
                
                print("[%d/%d] loss = %9.4f                      " % ( k, N, loss ), end='\r')
                
                k += batch_size
            
            y_ = self.predict(X)
            train_loss = self.evaluate(X,y)
            train_acc = self.accuracy(y_,y)
            msg = "[%d/%d] loss = %8.4f, acc = %3.2f%%" % ( N, N, train_loss, train_acc*100 )
            
            if validation_data:
                val_loss = self.evaluate(validation_data[0],validation_data[1])
                val_acc = self.accuracy(self.predict(validation_data[0]),validation_data[1])
                msg += ", val_loss = %8.4f, val_acc = %3.2f%%" % ( val_loss, val_acc*100 )
            
            print(msg)
            
            
        if test_data:
            test_acc = self.accuracy(self.predict(test_data[0]),test_data[1])
            print("test_acc = %3.2f%%" % (test_acc*100))
            
    def accuracy(self, predictions, labels):
        return (np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))/predictions.shape[0])
    
    def predict(self,X):
        return self.sess.run(self.y_, feed_dict={self.features: X, self.keep_prob: 1.0})
    
    def evaluate(self,X,y):
        return self.sess.run(self.loss, feed_dict={self.features: X, self.labels: y, self.keep_prob: 1.0})


In [123]:
model = DNNLogisticClassification(   n_features=28*28,
                                     n_labels=10,
                                     learning_rate=0.5,
                                     n_hidden=3000,
                                     dropout_ratio=0.0,
                                     alpha=0.01,
                                 )
model.fit(X=train_data.images,
          y=train_data.labels,
          epochs=3,
          validation_data=(valid_data.images,valid_data.labels),
          test_data=(test_data.images,test_data.labels),
          batch_size = 128,
         )

Epoch  1/ 3: 
[55000/55000] loss =   0.9960, acc = 95.57%, val_loss =   1.7792, val_acc = 94.54%
Epoch  2/ 3: 
[55000/55000] loss =   0.3474, acc = 97.55%, val_loss =   1.3377, val_acc = 95.24%
Epoch  3/ 3: 
[55000/55000] loss =   0.1115, acc = 98.77%, val_loss =   1.1443, val_acc = 95.66%
test_acc = 94.94%


In [124]:
model = DNNLogisticClassification(   n_features=28*28,
                                     n_labels=10,
                                     learning_rate=0.5,
                                     n_hidden=3000,
                                     dropout_ratio=0.5,
                                     alpha=0.01,
                                 )
model.fit(X=train_data.images,
          y=train_data.labels,
          epochs=3,
          validation_data=(valid_data.images,valid_data.labels),
          test_data=(test_data.images,test_data.labels),
          batch_size = 128,
         )

Epoch  1/ 3: 
[55000/55000] loss =   1.3701, acc = 96.03%, val_loss =   1.9417, val_acc = 95.62%
Epoch  2/ 3: 
[55000/55000] loss =   0.5220, acc = 97.48%, val_loss =   1.1979, val_acc = 96.18%
Epoch  3/ 3: 
[55000/55000] loss =   0.2798, acc = 98.00%, val_loss =   0.9924, val_acc = 96.50%
test_acc = 96.04%
