In [134]:
import tensorflow as tf
from tensorflow.keras.datasets.mnist import load_data
import numpy as np


In [135]:
mnist=load_data()

In [136]:
train,test=mnist

def one_hot(a, num_classes):
    return np.squeeze(np.eye(num_classes)[a.reshape(-1)])



train_features=np.reshape(train[0],newshape=[60000,784],)
test_features=np.reshape(test[0],newshape=[test[0].shape[0],784])
train_labels=one_hot(train[1],10)
test_labels=one_hot(test[1],10)

print(train_features.shape)
print(test_features.shape)
print(train_labels.shape)
print(test_labels.shape)

(60000, 784)
(10000, 784)
(60000, 10)
(10000, 10)


In [139]:
import os
import tensorflow as tf
import time
import sys

class CNNSVM:
    def __init__(self, alpha, batch_size, num_classes, num_features, penalty_parameter):

        self.alpha = alpha
        self.batch_size = batch_size
        self.name = 'CNN-SVM'
        self.num_classes = num_classes
        self.num_features = num_features
        self.penalty_parameter = penalty_parameter

        def __graph__():

            with tf.name_scope('input'):
                # [BATCH_SIZE, NUM_FEATURES]
                x_input = tf.placeholder(dtype=tf.float32, shape=[None, num_features], name='x_input')

                # [BATCH_SIZE, NUM_CLASSES]
                y_input = tf.placeholder(dtype=tf.float32, shape=[None, num_classes], name='actual_label')

            # First convolutional layer
            first_conv_weight = self.weight_variable([5, 5, 1, 32])
            first_conv_bias = self.bias_variable([32])

            input_image = tf.reshape(x_input, [-1, 28, 28, 1])

            first_conv_activation = tf.nn.relu(self.conv2d(input_image, first_conv_weight) + first_conv_bias)
            first_conv_pool = self.max_pool_2x2(first_conv_activation)

            # Second convolutional layer
            second_conv_weight = self.weight_variable([5, 5, 32, 64])
            second_conv_bias = self.bias_variable([64])

            second_conv_activation = tf.nn.relu(self.conv2d(first_conv_pool, second_conv_weight) + second_conv_bias)
            second_conv_pool = self.max_pool_2x2(second_conv_activation)

            # Fully-connected layer (Dense Layer)
            dense_layer_weight = self.weight_variable([7 * 7 * 64, 1024])
            dense_layer_bias = self.bias_variable([1024])

            second_conv_pool_flatten = tf.reshape(second_conv_pool, [-1, 7 * 7 * 64])
            dense_layer_activation = tf.nn.relu(tf.matmul(second_conv_pool_flatten, dense_layer_weight) +
                                                dense_layer_bias)

            # Dropout, to avoid over-fitting
            keep_prob = tf.placeholder(tf.float32)
            h_fc1_drop = tf.nn.dropout(dense_layer_activation, keep_prob)

            # Readout layer
            readout_weight = self.weight_variable([1024, num_classes])
            readout_bias = self.bias_variable([num_classes])

            output = tf.matmul(h_fc1_drop, readout_weight) + readout_bias

            with tf.name_scope('svm'):
                regularization_loss = tf.reduce_mean(tf.square(readout_weight))
                hinge_loss = tf.reduce_mean(
                    tf.square(tf.maximum(tf.zeros([batch_size, num_classes]), 1 - y_input * output)))
                with tf.name_scope('loss'):
                    loss = regularization_loss + penalty_parameter * hinge_loss
            tf.summary.scalar('loss', loss)

            optimizer = tf.train.AdamOptimizer(learning_rate=alpha).minimize(loss)

            with tf.name_scope('accuracy'):
                output = tf.identity(tf.sign(output), name='prediction')
                correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y_input, 1))
                with tf.name_scope('accuracy'):
                    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
            
            tf.summary.scalar('accuracy', accuracy)

            merged = tf.summary.merge_all()

            self.x_input = x_input
            self.y_input = y_input
            self.keep_prob = keep_prob
            self.output = output
            self.loss = loss
            self.optimizer = optimizer
            self.accuracy = accuracy
            self.merged = merged

        sys.stdout.write('\n<log> Building graph...')
        __graph__()
        sys.stdout.write('</log>\n')

    def train(self, checkpoint_path, epochs, log_path, train_features,train_labels, test_features,test_labels):

        if not os.path.exists(path=log_path):
            os.mkdir(log_path)

        if not os.path.exists(path=checkpoint_path):
            os.mkdir(checkpoint_path)

        saver = tf.train.Saver(max_to_keep=4)

        init = tf.global_variables_initializer()

        timestamp = str(time.asctime())

        train_writer = tf.summary.FileWriter(logdir=log_path + timestamp + '-training', graph=tf.get_default_graph())

        with tf.Session() as sess:
            sess.run(init)

            checkpoint = tf.train.get_checkpoint_state(checkpoint_path)

            if checkpoint and checkpoint.model_checkpoint_path:
                saver = tf.train.import_meta_graph(checkpoint.model_checkpoint_path + '.meta')
                saver.restore(sess, tf.train.latest_checkpoint(checkpoint_path))

            for index in range(epochs):
                
                # train by batch
                def batch(b_size):
                    for i in range(0,train_features.shape[0],b_size):
                        batch_features = train_features[i: i+ b_size]
                        batch_labels = train_labels[i:  i+ b_size]
                        batch_labels[batch_labels==0]=-1
                        return batch_features,batch_labels
                
                
                batch_features, batch_labels = batch(128)
                batch_labels[batch_labels == 0] = -1

                # input dictionary with dropout of 50%
                feed_dict = {self.x_input: batch_features, self.y_input: batch_labels, self.keep_prob: 0.5}
                
                # run the train op
                summary, _, loss = sess.run([self.merged, self.optimizer, self.loss], feed_dict=feed_dict)
                
                # every 100th step and at 0,
                if index % 2 == 0:
                    feed_dict = {self.x_input: batch_features, self.y_input: batch_labels, self.keep_prob: 1.0}
                    
                    # get the accuracy of training
                    train_accuracy = sess.run(self.accuracy, feed_dict=feed_dict)
                    
                    # display the training accuracy
                    print('step: {}, training accuracy : {}, training loss : {}'.format(index, train_accuracy, loss))

                    train_writer.add_summary(summary=summary, global_step=index)

                    saver.save(sess, save_path=os.path.join(checkpoint_path, self.name), global_step=index)


            test_labels[test_labels == 0] = -1

            feed_dict = {self.x_input: test_features, self.y_input: test_labels, self.keep_prob: 1.0}

            test_accuracy = sess.run(self.accuracy, feed_dict=feed_dict)

            print('Test Accuracy: {}'.format(test_accuracy))

    @staticmethod
    def weight_variable(shape):
        """Returns a weight matrix consisting of arbitrary values.

        :param shape: The shape of the weight matrix to create.
        :return: The weight matrix consisting of arbitrary values.
        """
        initial = tf.truncated_normal(shape, stddev=0.1)
        return tf.Variable(initial)

    @staticmethod
    def bias_variable(shape):
        """Returns a bias matrix consisting of 0.1 values.

        :param shape: The shape of the bias matrix to create.
        :return: The bias matrix consisting of 0.1 values.
        """
        initial = tf.constant(0.1, shape=shape)
        return tf.Variable(initial)

    @staticmethod
    def conv2d(features, weight):
        """Produces a convolutional layer that filters an image subregion

        :param features: The layer input.
        :param weight: The size of the layer filter.
        :return: Returns a convolutional layer.
        """
        return tf.nn.conv2d(features, weight, strides=[1, 1, 1, 1], padding='SAME')

    @staticmethod
    def max_pool_2x2(features):
        """Downnsamples the image based on convolutional layer

        :param features: The input to downsample.
        :return: Downsampled input.
        """
        return tf.nn.max_pool(features, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')


In [140]:
model = CNNSVM(alpha=1e-3, batch_size=128, num_classes=10, num_features=784 ,penalty_parameter=1)

#model.train(checkpoint_path=args.checkpoint_path, epochs=5, log_path=args.log_path,
#            train_data=mnist.train, test_data=mnist.test)


<log> Building graph...</log>


In [None]:
model.train(checkpoint_path='./check/', epochs=10, log_path='./log/',
            train_features=train_features,train_labels=train_labels, test_features=test_features,test_labels=test_labels)


step: 0, training accuracy : 0.140625, training loss : 1040681.625
step: 2, training accuracy : 0.1796875, training loss : 524830.875
step: 4, training accuracy : 0.265625, training loss : 208018.90625
step: 6, training accuracy : 0.5859375, training loss : 57562.18359375
step: 8, training accuracy : 0.71875, training loss : 36139.9296875


In [7]:
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets('./mnist_data/',one_hot=True)


Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ./mnist_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ./mnist_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting ./mnist_data/t10k-images-idx3-ubyte.gz
Extracting ./mnist_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [9]:
train_data=mnist.train
epochs=1
batch_size=128
for index in range(epochs):
    batch_features, batch_labels = train_data.next_batch(batch_size)
    batch_labels[batch_labels == 0] = -1

In [61]:
print(batch_labels[0])

[-1. -1. -1. -1. -1. -1. -1.  1. -1. -1.]


In [20]:
import tensorflow as tf
a=[0,1,0,0,0]
c=[1,0,0,0,1]
b=tf.equal(a,c)
b=tf.reduce_mean(tf.cast(b,tf.float32))
print(b)

Tensor("Mean:0", shape=(), dtype=float32)


In [21]:

print(tf.Session().run(b))

0.4
