In [1]:
import numpy as np

In [2]:
X_train = np.load('Datasets/Fashion_MNIST/X_train.npy')
X_test = np.load('Datasets/Fashion_MNIST/X_test.npy')
y_train = np.load('Datasets/Fashion_MNIST/y_train.npy')
y_test = np.load('Datasets/Fashion_MNIST/y_test.npy')

In [3]:
X_train.shape

(60000, 784)

In [4]:
y_test.shape

(10000,)

In [5]:
from sklearn.preprocessing import OneHotEncoder

In [6]:
encoder = OneHotEncoder(sparse=False)

In [7]:
encoder.fit(y_train.reshape(-1, 1))

OneHotEncoder(categorical_features='all', dtype=<class 'numpy.float64'>,
       handle_unknown='error', n_values='auto', sparse=False)

In [8]:
y_train = encoder.transform(y_train.reshape(-1, 1))
y_test = encoder.transform(y_test.reshape(-1, 1))

In [9]:
y_train.shape

(60000, 10)

In [10]:
def get_next_batch(X, y, size):
    m = X.shape[0]
    index = np.random.permutation(np.arange(m))
    for i in range(m//size):
        batch_i = index[i*size: i*size+size]
        yield X[batch_i], y[batch_i]

### Training

In [11]:
import tensorflow as tf

In [12]:
def conv_layer(X, channels, kernel_size, strides=[1, 1, 1, 1], kernel_initializer=None, reuse=False, name="conv"):
    with tf.variable_scope("Var_"+name, reuse=reuse):
        
        if kernel_initializer:
            initializer = kernel_initializer()
        else:
            initializer = tf.random_normal_initializer(stddev=0.01)
        
        shape = [kernel_size[0], kernel_size[1], X.shape[3].value, channels]
        
        if reuse:
            W = tf.get_variable('W')
            b = tf.get_variable('B')
        else:
            W = tf.get_variable(shape=shape, initializer=initializer, name='W')
            b = tf.get_variable(initializer=tf.constant(0.1, shape=[channels]), name='B')

        conv = tf.nn.conv2d(X, W, strides=strides, padding="SAME")
        act = tf.nn.relu(conv + b)
        
        tf.summary.histogram("weights", W)
        tf.summary.histogram("biases", b)
        tf.summary.histogram("activations", act)
        
        return act

In [13]:
def fc_layer(X, units, activation=None, kernel_initializer=None, regularizer=None, reuse=False, name='FC'):
    
    with tf.variable_scope("Var_"+name, reuse=reuse):

        if kernel_initializer:
            initializer = kernel_initializer()
        else:
            initializer = tf.random_normal_initializer(stddev=0.01)

        if reuse:
            W = tf.get_variable('W')
            b = tf.get_variable('B')
        else:
            W = tf.get_variable(shape=[X.shape[1].value, units], initializer=initializer, regularizer=regularizer, name='W')
            b = tf.get_variable(initializer=tf.constant(0.1, shape=[units]), name='B')

        Z = tf.add(tf.matmul(X, W), b)

        if activation:
            A = activation(Z)
        else:
            A = Z

        tf.summary.histogram("weights", W)
        tf.summary.histogram("biases", b)
        tf.summary.histogram("activations", A)

    return A

In [14]:
def conv_model(X, y, conv_channels, conv_kernels, fc_layers, regularization_rate=0.0, drop_rate=0.0, reuse=False, is_training=True, name="Conv_model"):
    with tf.name_scope(name):
        regularizer = tf.contrib.layers.l2_regularizer(scale=regularization_rate)
        kernel_initializer = tf.contrib.layers.xavier_initializer
        activation = tf.nn.relu
        
        for i, channels in enumerate(conv_channels):
            X = conv_layer(X, channels=channels, kernel_size=conv_kernels[i], kernel_initializer=kernel_initializer, reuse=reuse, name=f"conv_{i}")
            X = tf.nn.max_pool(X, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")            
        
        dims = np.prod(X.shape.as_list()[1:])
        X = tf.reshape(X, [-1, dims])
        
        for i, layer in enumerate(fc_layers):
            X = fc_layer(X, layer, activation, kernel_initializer, regularizer, reuse=reuse, name=f"fc{i+1}")
            X = tf.layers.dropout(X, rate=drop_rate, training=is_training)

        output = fc_layer(X, y.shape[1].value, kernel_initializer=kernel_initializer, regularizer=regularizer, reuse=reuse, name=f"last_fc" )
        tf.summary.histogram('logits', output)
    return output

In [15]:
def mnist_model(X_train, y_train, conv_channels, conv_kernels, layers, learning_rate, epochs, batch_size,
                l2_rate, drop_rate, logdir, X_test, y_test, print_acc=False):
    tf.reset_default_graph()
    with tf.Session() as sess:
        
        n_features = X_train.shape[1]
        n_labels = y_train.shape[1]
        
        X = tf.placeholder(tf.float32, shape=[None, n_features], name="x")
        x_image = tf.reshape(X, [-1, 28, 28, 1]) #MNIST SHAPE
        paddings = tf.constant([[0, 0], [2, 2], [2, 2], [0, 0]])
        x_image = tf.pad(x_image, paddings)
        y = tf.placeholder(tf.float32, shape=[None, n_labels], name="labels")


        train_logits = conv_model(x_image, y, conv_channels, conv_kernels, fc_layers=layers, regularization_rate=l2_rate, drop_rate=drop_rate, reuse=False, is_training=True, name="Conv_Model")
        test_logits = conv_model(x_image, y, conv_channels, conv_kernels, fc_layers=layers, reuse=True, is_training=False, name="Conv_Eval_Model")

        with tf.name_scope("loss"):
            loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
                                  logits=train_logits, labels=y), name="loss")
            tf.summary.scalar("loss", loss)

        with tf.name_scope("train"):
            train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        with tf.name_scope("test_accuracy"):
            correct_prediction = tf.equal(tf.argmax(test_logits, 1), tf.argmax(y, 1))
            test_accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
            tf.summary.scalar("test_accuracy", test_accuracy)
        
        with tf.name_scope("train_accuracy"):
            correct_prediction = tf.equal(tf.argmax(train_logits, 1), tf.argmax(y, 1))
            train_accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
            tf.summary.scalar("train_accuracy", train_accuracy)

        summ = tf.summary.merge_all()

        saver = tf.train.Saver()

        sess.run(tf.global_variables_initializer())
        writer = tf.summary.FileWriter(f"./summary/{logdir}")
        writer.add_graph(sess.graph)

        for epoch in range(1, epochs+1):
            for X_batch, y_batch in get_next_batch(X_train, y_train, batch_size):
                sess.run(train_step, {X: X_batch, y: y_batch})
                
            if print_acc and epoch%10 == 0:
                train_sample = get_next_batch(X_train, y_train, 1000).__next__()
                test_sample = get_next_batch(X_test, y_test, 1000).__next__()
                
                test_acc, s = sess.run([test_accuracy, summ],  {X: test_sample[0], y: test_sample[1]})
                train_l, train_acc = sess.run([loss, train_accuracy], {X: train_sample[0], y: train_sample[1]})
                print("Epoch:", np.round(epoch, 4),
                      "Train Loss:", np.round(train_l, 4),
                      "Train accuracy:", np.round(train_acc, 4),
                      "Test accuracy:", np.round(test_acc,4))
                writer.add_summary(s, epoch)
                saver.save(sess, f"./model/{logdir}")

In [16]:
from sklearn.model_selection import ParameterGrid

In [17]:
param_grid = [{'conv_channels':[[32, 64], [64, 128], [128, 256]],
               'conv_kernels':[[[5, 5], [3, 3]], [[3, 3] , [3, 3]]]},
              {'conv_channels':[[32, 64, 128], [64, 128, 256], [128, 256, 512]],
               'conv_kernels':[[[5, 5], [3, 3], [3, 3]], [[3, 3], [3, 3], [3, 3]]]}]

In [18]:
for params in ParameterGrid(param_grid):
    name = ",".join([f"{k}={v}" for k, v in params.items()])
    
    print("\n\n", name)
    mnist_model(X_train=X_train, y_train=y_train, 
                layers=[256, 128], learning_rate=1e-5, epochs=300, batch_size=32, 
                l2_rate=0.0, drop_rate=0.5, logdir=name, X_test=X_test, y_test=y_test, print_acc=True, **params)



 conv_channels=[32, 64],conv_kernels=[[5, 5], [3, 3]]
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
Epoch: 10 Train Loss: 0.672 Train accuracy: 0.784 Test accuracy: 0.852
Epoch: 20 Train Loss: 0.4053 Train accuracy: 0.868 Test accuracy: 0.871
Epoch: 30 Train Loss: 0.3574 Train accuracy: 0.873 Test accuracy: 0.889
Epoch: 40 Train Loss: 0.3482 Train accuracy: 0.887 Test accuracy: 0.882
Epoch: 50 Train Loss: 0.2746 Train accuracy: 0.901 Test accuracy: 0.91
Epoch: 60 Train Loss: 0.1932 Train accuracy: 0.926 Test accuracy: 0.908
Epoch: 70 Train Loss: 0.212 Train accuracy: 0.921 Test accuracy: 0.912
Epoch: 80 Train Loss: 0.2137 Train accuracy: 0.931 Test accuracy: 0.915
Epoch: 90 Train Loss: 0.1827 Train accuracy: 0.942 Test accuracy: 0.9
Epoch: 100 Train Loss: 0.1667 Train accuracy: 0.931 Test accuracy: 0.902
Epoch: 110 Train Loss: 0.1363 Train accuracy: 0.952 Test accuracy: 0.924
Epoch: 120 Train Loss: 0.1464 Train accuracy: 0.952 Test 