# Sparse Autoencoder

Implement an autoencoder with the sparsity constraint. You can build on the MLP implementation from EE5600. Choose your network size appropriately (meaning a size that you can train and test on your computer without running into memory issues).

In [1]:
import numpy as np
import tensorflow as tf

In [2]:
# Get the MNIST data from keras, downsample and save to csv file
# No need to run this chunk of code again as csv files with 14x14 images are saved 
# Each image is saved as 196 dim vector with label as it's label
mnist = tf.keras.datasets.mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()

x_train = np.reshape(x_train, newshape=(*x_train.shape, 1))
x_train = tf.image.resize_images(images=x_train, size=(14,14))
x = tf.Session().run(x_train)
x_train = np.asarray(x, dtype=np.uint8).reshape(x_train.shape[0], 196) / 255.
# pd.DataFrame(x_train).to_csv('train.csv', sep=',', index=True, header=False)

x_test = np.reshape(x_test, newshape=(*x_test.shape, 1))
x_test = tf.image.resize_images(images=x_test, size=(14,14))
x = tf.Session().run(x_test)
x_test = np.asarray(x, dtype=np.uint8).reshape(x_test.shape[0], 196) / 255.
# pd.DataFrame(x_test).to_csv('train.csv', sep=',', index=True, header=False)

In [3]:
import matplotlib.pyplot as plt
def show_digit(x): # x - 196 dim vector
    x = np.reshape(x, (14,14))
    plt.imshow(x, cmap='gray')
    plt.show()

In [4]:
class NN(object):
    def __init__(self, input_dim, hidden_dim, learn_rate, sparsity, regularization):
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = input_dim
        self.learn_rate = learn_rate
        self.s = sparsity
        self.Lambda = regularization
        # intialize weights
        self.A = np.random.normal(0,  1, (self.hidden_dim, self.input_dim))
        self.B = np.random.normal(0,  1, (self.output_dim, self.hidden_dim))
        self.a0 = np.random.normal(0, 1, self.hidden_dim)
        self.b0 = np.random.normal(0, 1, self.output_dim)

    def sigmoid(self, t):
        return 1/(1  + np.exp(-t))

    def dsigmoid(self, t):
        sigt = self.sigmoid(t)
        return sigt*(1-sigt)

    def hidden_layer(self, x):
        # A.shape:mxd; x.shape;(d,); so z.shape=(m,)
        z = self.sigmoid(np.dot(x, self.A.T) + self.a0) 
        return z

    def forward_pass(self, x):
        # B.shape:kxm; z.shape;m; so y_hat.shape=k
        y_hat = self.sigmoid(np.dot(self.hidden_layer(x), self.B.T) + self.b0)
        return y_hat
    
    def back_propogate(self, X, Y, Y_hat):
        self.N = len(X)
        dSSE_A, dSSE_a0 = np.zeros_like(self.A), np.zeros_like(self.a0)
        dSSE_B, dSSE_b0 = np.zeros_like(self.B), np.zeros_like(self.b0)
        Z = self.hidden_layer(X) # Z.shape = (N,m)
        dZ = Z * (1-Z) # dZ shape = (N,m)
#         print('Z.shape = ', Z.shape, dZ.shape)
        # y_delta.shape (N,k)
        y_delta = 2*(Y_hat-Y) * self.dsigmoid(self.b0 + np.dot(Z, self.B.T))
#         print('Y_delta shape', y_delta.shape)
        # z_delta.shape (N,m)
        z_delta = np.transpose(-dZ.T * np.sum(y_delta, axis=1))
#         print('Z delta Shape', z_delta.shape)
        zm = np.mean(Z, axis=0)
#         print('Zm', zm, zm.shape, 'the fuck', Z)
        # dKL shape = (N,m) 
        dKL = (-self.s/zm) + ((1-self.s)/(1-zm)) # shape: (m,)
#         print('fuck you', dKL)
        dKL = self.Lambda * dKL * dZ  # shape: (N,m)
#         print('KL ', dKL.shape)
        dSSE_A = np.matmul((z_delta + dKL).T, X)
#         print('dSSE_A', dSSE_A.shape)
        dSSE_a0 = np.sum((z_delta+dKL), axis=0)
#         print('dSSE_a0', dSSE_a0.shape)
        dSSE_B = np.matmul(y_delta.T, Z)
        dSSE_b0 = np.sum(y_delta, axis=0)
        # update weights 
        A_new = self.A - (self.learn_rate*dSSE_A)
        a0_new = self.a0 - (self.learn_rate*dSSE_a0)
        B_new = self.B - (self.learn_rate*dSSE_B)
        b0_new = self.b0 - (self.learn_rate*dSSE_b0)
        return [A_new, a0_new, B_new, b0_new]
    
    def loss(self, y_train, y_hat):
        return np.sum((y_train - y_hat)**2) 
    
    def train(self, x_train, y_train, epochs, shuffle=True): 
        if shuffle:
            indices = np.arange(N)
            np.random.shuffle(indices)
            x_train, y_train = x_train[indices], y_train[indices]

        epoch = 1
        while(epoch <= epochs):
            Y_hat = np.array([self.forward_pass(x) for x in x_train])
            [self.A, self.a0, self.B, self.b0] = self.back_propogate(X=x_train, Y=y_train, Y_hat=Y_hat)
            print('Epoch: ', epoch, 'Loss: ', self.loss(y_train, Y_hat))
            # show_digit(x_train[0])
#             show_digit(Y_hat[0])
            epoch += 1
        print('Done Training')
    

In [5]:
SparseAE = NN(input_dim=196, hidden_dim=225, learn_rate=1e-4, sparsity=0.1, regularization=1)

In [None]:
SparseAE.train(x_train=x_train, y_train=x_train, epochs=50, shuffle=False)

Epoch:  1 Loss:  5444631.479909311
Epoch:  2 Loss:  2581558.107373863
Epoch:  3 Loss:  2704771.430768816
Epoch:  4 Loss:  1753073.6257336747
Epoch:  5 Loss:  1573108.019140351
Epoch:  6 Loss:  1447908.7647068172
Epoch:  7 Loss:  1386337.1726443556
Epoch:  8 Loss:  1298965.314197948
Epoch:  9 Loss:  1259406.8262141324
Epoch:  10 Loss:  1189933.6665408448
Epoch:  11 Loss:  1166223.3075929396
Epoch:  12 Loss:  1082987.2364296035
Epoch:  13 Loss:  1092792.7305459108
Epoch:  14 Loss:  1020579.550450141
Epoch:  15 Loss:  1052088.5451923013
Epoch:  16 Loss:  971576.6481220872
Epoch:  17 Loss:  1015963.2606101857
Epoch:  18 Loss:  928453.3983056364
Epoch:  19 Loss:  976656.9580985332
Epoch:  20 Loss:  893217.3319909839
Epoch:  21 Loss:  937163.3744436707
Epoch:  22 Loss:  863831.2035372965
Epoch:  23 Loss:  897550.5772316945
Epoch:  24 Loss:  837129.3557720016
Epoch:  25 Loss:  861651.319656919
Epoch:  26 Loss:  813756.0132732597
Epoch:  27 Loss:  829447.5355609704
Epoch:  28 Loss:  792095.480