In [1]:
import numpy as np

In [2]:
X_train = np.load('Datasets/Fashion_MNIST/X_train.npy')
X_test = np.load('Datasets/Fashion_MNIST/X_test.npy')
y_train = np.load('Datasets/Fashion_MNIST/y_train.npy')
y_test = np.load('Datasets/Fashion_MNIST/y_test.npy')

In [3]:
X_train.shape

(60000, 784)

In [4]:
y_train.shape

(60000,)

In [5]:
from sklearn.preprocessing import OneHotEncoder

In [6]:
encoder = OneHotEncoder(sparse=False)

In [7]:
encoder.fit(y_train.reshape(-1, 1))

OneHotEncoder(categorical_features='all', dtype=<class 'numpy.float64'>,
       handle_unknown='error', n_values='auto', sparse=False)

In [8]:
y_train = encoder.transform(y_train.reshape(-1, 1))
y_test = encoder.transform(y_test.reshape(-1, 1))

In [9]:
def get_next_batch(X, y, size):
    m = X.shape[0]
    index = np.random.permutation(np.arange(m))
    for i in range(m//size):
        batch_i = index[i*size: i*size+size]
        yield X[batch_i], y[batch_i]

### Training

In [10]:
import tensorflow as tf

In [11]:
def fc_layer(X, units, activation=None, kernel_initializer=None, regularizer=None, reuse=False, name='FC'):
    
    with tf.variable_scope("Var_"+name, reuse=reuse):

        if kernel_initializer:
            initializer = kernel_initializer()
        else:
            initializer = tf.random_normal_initializer(stddev=0.01)

        if reuse:
            W = tf.get_variable('W')
            b = tf.get_variable('B')
        else:
            W = tf.get_variable(shape=[X.shape[1].value, units], initializer=initializer, regularizer=regularizer, name='W')
            b = tf.get_variable(initializer=tf.constant(0.1, shape=[units]), name='B')

        Z = tf.add(tf.matmul(X, W), b)

        if activation:
            A = activation(Z)
        else:
            A = Z

        tf.summary.histogram("weights", W)
        tf.summary.histogram("biases", b)
        tf.summary.histogram("activations", A)

    return A

In [12]:
def dnn_model(X, y, layers, regularization_rate=0.0, drop_rate=0.0, reuse=False, is_training=True, name="DNN"):
    with tf.name_scope(name):    
        regularizer = tf.contrib.layers.l2_regularizer(scale=regularization_rate)
        kernel_initializer = tf.contrib.layers.xavier_initializer
        activation = tf.nn.relu

        for i, layer in enumerate(layers):
            X = fc_layer(X, layer, activation, kernel_initializer, regularizer, reuse=reuse, name=f"fc{i+1}")
            X = tf.layers.dropout(X, rate=drop_rate, training=is_training)

        output = fc_layer(X, y.shape[1].value, kernel_initializer=kernel_initializer, regularizer=regularizer, reuse=reuse, name=f"last_fc" )
        tf.summary.histogram('logits', output)
    return output

In [13]:
def model(X_train, y_train, layers, learning_rate, epochs, batch_size,
          l2_rate, drop_rate, logdir, X_test, y_test, print_acc=False):
    tf.reset_default_graph()
    with tf.Session() as sess:

        n_features = X_train.shape[1]
        n_labels = y_train.shape[1]

        X = tf.placeholder(tf.float32, shape=[None, n_features], name='features')
        y = tf.placeholder(tf.float32, shape=[None, n_labels], name='labels')
        
        train_logits = dnn_model(X, y, layers, l2_rate, drop_rate, reuse=False, is_training=True, name="DNN")
        test_logits = dnn_model(X, y, layers, reuse=True, is_training=False, name="Testing_DNN")
        
        with tf.name_scope("train_accuracy"):
            train_pred = tf.equal(tf.argmax(train_logits,1), tf.argmax(y,1))
            train_accuracy = tf.reduce_mean(tf.cast(train_pred,tf.float32))
            tf.summary.scalar('train_accuracy', train_accuracy)
            
        with tf.name_scope("test_accuracy"):
            test_pred = tf.equal(tf.argmax(test_logits,1), tf.argmax(y,1))
            test_accuracy = tf.reduce_mean(tf.cast(test_pred,tf.float32))
            tf.summary.scalar('test_accuracy', test_accuracy)
        
        with tf.name_scope("train_loss"):
            train_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=train_logits), name='train_loss')
            tf.summary.scalar('train_loss', train_loss)
            
        with tf.name_scope("train"):
            train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(train_loss)
            
        summ = tf.summary.merge_all()
        
        saver = tf.train.Saver()
    
        sess.run(tf.global_variables_initializer())
        
        writer = tf.summary.FileWriter(f"./summary/{logdir}")
        writer.add_graph(sess.graph)
        
        for epoch in range(1, epochs+1):
            for X_batch, y_batch in get_next_batch(X_train, y_train, batch_size):
                sess.run(train_op, {X: X_batch, y: y_batch})

            if print_acc and epoch%10 == 0:
                train_sample = get_next_batch(X_train, y_train, X_test.shape[0]).__next__()
                
                test_acc, s = sess.run([test_accuracy, summ], {X: X_test, y: y_test})
                train_l, train_acc = sess.run([train_loss, train_accuracy], {X: train_sample[0], y: train_sample[1]})
                
                print("Epoch:", np.round(epoch, 4),
                      "Train Loss:", np.round(train_l, 4),
                      "Train accuracy:", np.round(train_acc, 4),
                      "Test accuracy:", np.round(test_acc,4))
                writer.add_summary(s, epoch)
                saver.save(sess, f"./model/{logdir}")

In [14]:
from sklearn.model_selection import ParameterGrid

In [15]:
layers_params = [[128, 64, 64], [256, 128, 64], [128, 64], [256, 128]]
param_grid = [{'layers': layers_params,
              'learning_rate':[0.001, 0.0001, 0.00001],
              'batch_size':[32],
              'l2_rate': [0.0],
              'drop_rate': [0.05, 0.25, 0.1]},
              {'layers': layers_params,
              'learning_rate':[0.001, 0.0001, 0.00001],
              'batch_size':[32],
              'l2_rate': [100.0, 10.0, 0.1],
              'drop_rate': [0.0]},]

In [None]:
for params in ParameterGrid(param_grid):
    name = ",".join([f"{k}={v}" for k, v in params.items()])
    
    print("\n\n", name)
    
    model(X_train=X_train, y_train=y_train, epochs=200, 
      logdir=name, X_test=X_test, y_test=y_test, print_acc=True, **params)