In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

In [9]:
RANDOM_SEED = 1
IMAGE_SHAPE = 784
ENCODER_H1_SHAPE = 200
ENCODER_H2_SHAPE = 100
LATENT_SHAPE = 20
DECODER_H1_SHAPE = 100
DECODER_H2_SHAPE = 200


In [10]:
def ReLU(z):
    return np.maximum(0, z)

def dReLU(z):
    return np.where(z > 0, 1, 0)

def leaky_ReLU(z, alpha):
    return np.where(z > 0, z, z * alpha)

def dleaky_ReLU(z, alpha):
    return np.where(z > 0, 1, alpha)

def one_hot(y, batch_size, output_size): #across a batch, y is a vector
    out = np.zeros((batch_size, output_size))
    out[range(batch_size), y.T] = 1 #numpy indexing trick
    return out

In [16]:
class Layer:
    def __init__(self, size, input_size, activation, dactivation):
        assert size > 0, "Size must be a positive integer"
        assert input_size > 0, "Input size must be a positive integer"
        assert callable(activation), "Activation must be a callable function"
        assert callable(dactivation), "Derivative of activation must be a callable function"

        self.__size = size
        self.__dactivation = dactivation
        self.z = None
        self.input_size = input_size
        self.weights = None
        self.biases = None
        self.values = None
        self.dweights = None
        self.dbiases = None
        self.__activation = activation

    def get_size(self):
        return self.__size
    
    def get_activation(self):
        return self.__activation
    
    def random_he_initialize(self, seed=None):
        if seed is not None: np.random.seed(seed)
        rng = np.random.standard_normal
        self.weights = rng((self.input_size, self.__size)) * np.sqrt(2 / self.input_size)
        self.biases = np.zeros((1, self.__size))
        return self.weights, self.biases

    def forward_prop(self, input):
        input = np.array(input)
        self.z = input @ self.weights + self.biases
        self.values = self.__activation(self.z)
        return self.values
    
    def backward_prop(self, input, delta): # do I need to divide by B here?
        delta = delta * self.__dactivation(self.z)
        self.dweights = input.T @ delta
        self.dbiases = np.sum(delta, axis=0, keepdims=True)
        delta = delta @ self.weights.T
        return delta
    
    def update_params(self, learning_rate):
        self.weights -= learning_rate * self.dweights
        self.biases -= learning_rate * self.dbiases

In [None]:
#test layer class
test_layer = Layer(5, 3, activation=ReLU, dactivation=dReLU)
test_layer.random_he_initialize(seed=RANDOM_SEED)
input = np.array([[1, 2, 3], [4, 5, 6]])
output = test_layer.forward_prop(input)
print("Forward Propagation Output:")
print(output)
delta = np.array([[0.1, 0.2, 0.3, 0.4, 0.5], [0.5, 0.4, 0.3, 0.2, 0.1]])
backprop_delta = test_layer.backward_prop(input, delta)
print("Backward Propagation Delta:")
print(backprop_delta)
test_layer.update_params(learning_rate=0.01)
print("Updated Weights:")  
print(test_layer.weights)
print("Updated Biases:")
print(test_layer.biases)

Forward Propagation Output:
[[1.14929388 0.         0.         0.         3.07653887]
 [3.07193415 0.         0.         0.         7.36267243]]
Backward Propagation Delta:
[[ 0.48592843 -0.28972488  0.58224005]
 [ 0.73379645 -0.95996024  0.68947495]]
Updated Weights:
[[ 1.30527244 -0.49949702 -0.43125043 -0.87607521  0.69760237]
 [-1.90619848  1.42463284 -0.62152283  0.26049433 -0.21861006]
 [ 1.16080613 -1.68209785 -0.26325254 -0.31357907  0.90471887]]
Updated Biases:
[[-0.006  0.     0.     0.    -0.006]]


In [None]:
class VAE:
    def __init__(self):
        pass
    

In [3]:
raw_train_data = pd.read_csv(".\\data\\fashion-mnist_train.csv")
raw_test_data = pd.read_csv(".\\data\\fashion-mnist_test.csv")

train_data = np.array(raw_train_data)
validation_data = np.array(raw_test_data)

In [4]:
#shuffle training data
perm = np.random.permutation(len(train_data))
train_data = train_data[perm]
train_data.shape

(60000, 785)

In [6]:
x_train = train_data[:, 1:].astype('float64')
x_train /= 255.0
y_train = train_data[:, 0].astype('int64')

x_val = validation_data[:, 1:].astype('float64')
x_val /= 255.0
y_val = validation_data[:, 0].astype('int64')