In [1]:
import numpy as np #mainly for doing calculations
from scipy import signal

from keras.datasets import mnist
from keras.utils import np_utils


In [2]:
#parent class all layers based from it
class Layer:
    def __init__(self):
        self.input = None
        self.output = None

    def forward(self, input):
        # TODO: return output
        pass

    def backward(self, output_gradient, learning_rate):
        # TODO: update parameters and return input gradient
        pass 

class Convolutional(Layer):
    def __init__(self, input_shape, kernel_size, depth):
        input_depth, input_height, input_width = input_shape
        self.depth = depth
        self.input_shape = input_shape
        self.input_depth = input_depth
        self.output_shape = (depth, input_height - kernel_size + 1, input_width - kernel_size + 1)
        self.kernels_shape = (depth, input_depth, kernel_size, kernel_size)
        self.kernels = np.random.randn(*self.kernels_shape)
        self.biases = np.random.randn(*self.output_shape)

    def forward(self, input):
        self.input = input
        self.output = None
        # TODO: Implement the forward method using the formula provided in the powerpoint. 
        # You may add or remove any variables that you wish. 
        self.output = np.copy(self.biases)
        for i in range(self.depth):
            for j in range(self.input_depth):
                self.output[i] += signal.correlate2d(self.input[j], self.kernels[i, j], "valid")
        return self.output

    def backward(self, output_gradient, learning_rate):
        # TODO: initialize the kernels_gradient and input_gradient.
        kernels_gradient = np.zeros(self.kernels_shape)
        input_gradient = np.zeros(self.input_shape)

        # TODO: implement the back pass here. The equations in the ppt may help, but you're free to
        # add as much or as little code as you'd like. 
        for i in range(self.depth):
            for j in range(self.input_depth):
                kernels_gradient[i, j] = signal.correlate2d(self.input[j], output_gradient[i], "valid")
                input_gradient[j] += signal.convolve2d(output_gradient[i], self.kernels[i, j], "full")
        # TODO: update the kernels and biases
        self.kernels -= learning_rate * kernels_gradient
        self.biases -= learning_rate * output_gradient

        return input_gradient


In [3]:
class Reshape(Layer):
    def __init__(self, input_shape, output_shape):
        self.input_shape = input_shape
        self.output_shape = output_shape

    def forward(self, input):
        # TODO: reshape the input to the output_shape and return it.
        return np.reshape(input, self.output_shape)

    def backward(self, output_gradient, learning_rate):
        # TODO: reshape the output to the input_shape and return it.
        return np.reshape(output_gradient, self.input_shape)


In [4]:

class Dense(Layer):
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(output_size, input_size)
        self.bias = np.random.randn(output_size, 1)

    def forward(self, input):
        # TODO: apply linear transformation to the input. see ppt for equation. 
        self.input = input
        return np.dot(self.weights, input) + self.bias

    def backward(self, output_gradient, learning_rate):
        # TODO: update the weights and bias
        weights_gradient = np.dot(output_gradient, self.input.T)
        input_gradient = np.dot(self.weights.T, output_gradient)
        self.weights -= learning_rate * weights_gradient
        self.bias -= learning_rate * output_gradient
        return input_gradient

In [5]:
def binary_cross_entropy(y_true, y_pred):
    return np.mean(-y_true * np.log(y_pred) - (1 - y_true) * np.log(1 - y_pred))

def binary_cross_entropy_prime(y_true, y_pred):
    # TODO: return the binary_cross_entropy_prime. 
    # Note, this is the formula on the bottom in the ppt slides.
    return ((1 - y_true) / (1 - y_pred) - y_true / y_pred) / np.size(y_true)

In [6]:
class Activation(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    def forward(self, input):
        self.input = input
        return self.activation(self.input)

    def backward(self, output_gradient, learning_rate):
        return np.multiply(output_gradient, self.activation_prime(self.input))


class Sigmoid(Activation):
    def __init__(self):
        def sigmoid(x):
            # TODO: return the sigmoid of x
            return 1 / (1 + np.exp(-x))

        def sigmoid_prime(x):
            # TODO: return the derivative
            return  sigmoid(x) * (1 - sigmoid(x))

        super().__init__(sigmoid, sigmoid_prime)


In [7]:


def preprocess_data(x, y, limit):
    ''' 
    Will limit our data since using the whole thing will take forever on a cpu especially since we're
    implementing this from scratch.
    '''
    zero_index = np.where(y == 0)[0][:limit]
    one_index = np.where(y == 1)[0][:limit]
    all_indices = np.hstack((zero_index, one_index))
    all_indices = np.random.permutation(all_indices)
    x, y = x[all_indices], y[all_indices]
    x = x.reshape(len(x), 1, 28, 28)
    x = x.astype("float32") / 255
    y = np_utils.to_categorical(y)
    y = y.reshape(len(y), 2, 1)
    return x, y

# load MNIST from server, limit to 100 images per class since we're not training on GPU
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, y_train = preprocess_data(x_train, y_train, 100)
x_test, y_test = preprocess_data(x_test, y_test, 100)

# TODO: Add our layers and the flow of input into this list. 
network = [
    Convolutional((1, 28, 28), 3, 8),
    Sigmoid(),
    Reshape((8, 26, 26), (8 * 26 * 26, 1)),
    Dense(8 * 26 * 26, 100),
    Sigmoid(),
    Dense(100, 2),
    Sigmoid()
]
# the last 2 important cause thats what will be used in calculation
epochs = 10
learning_rate = 0.1

# train
for e in range(epochs):
    error = 0
    for x, y in zip(x_train, y_train):
        # forward
        output = x
        for layer in network:
            output = layer.forward(output)
        
#         output =  output.reshape(output.shape[1],output.shape[0])
#         print(output.shape)
        error += binary_cross_entropy(y,output)

        # TODO: perform back prop 
        grad = binary_cross_entropy_prime(y,output)
        
        for layer in reversed(network):
            grad = layer.backward(grad,learning_rate)
            

    error /= len(x_train)
    print(f"{e + 1}/{epochs}, error={error}")

# TODO: run the test data through and print out your predictions
for x, y in zip(x_test, y_test):
    output = x
    for layer in network:
        output = layer.forward(output)
        print(f"pred : {np.argmax(output)}, true : {np.argmax(y)}")


1/10, error=0.6542224221120246
2/10, error=0.1833506135000575
3/10, error=0.09776847472089041
4/10, error=0.032487952433993786
5/10, error=0.02590991307900488
6/10, error=0.01679167646127028
7/10, error=0.011358606648744806
8/10, error=0.007890755615102408
9/10, error=0.007845102570763648
10/10, error=0.005273744549422704
pred : 169, true : 0
pred : 169, true : 0
pred : 169, true : 0
pred : 70, true : 0
pred : 0, true : 0
pred : 0, true : 0
pred : 0, true : 0
pred : 482, true : 1
pred : 482, true : 1
pred : 482, true : 1
pred : 70, true : 1
pred : 0, true : 1
pred : 1, true : 1
pred : 1, true : 1
pred : 608, true : 0
pred : 608, true : 0
pred : 608, true : 0
pred : 13, true : 0
pred : 0, true : 0
pred : 0, true : 0
pred : 0, true : 0
pred : 584, true : 0
pred : 584, true : 0
pred : 584, true : 0
pred : 70, true : 0
pred : 0, true : 0
pred : 0, true : 0
pred : 0, true : 0
pred : 169, true : 0
pred : 169, true : 0
pred : 169, true : 0
pred : 19, true : 0
pred : 0, true : 0
pred : 0, true

pred : 1, true : 1
pred : 1, true : 1
pred : 564, true : 0
pred : 564, true : 0
pred : 564, true : 0
pred : 19, true : 0
pred : 1, true : 0
pred : 0, true : 0
pred : 0, true : 0
pred : 584, true : 0
pred : 584, true : 0
pred : 584, true : 0
pred : 70, true : 0
pred : 0, true : 0
pred : 0, true : 0
pred : 0, true : 0
pred : 4465, true : 0
pred : 4465, true : 0
pred : 4465, true : 0
pred : 70, true : 0
pred : 0, true : 0
pred : 0, true : 0
pred : 0, true : 0
pred : 169, true : 0
pred : 169, true : 0
pred : 169, true : 0
pred : 70, true : 0
pred : 0, true : 0
pred : 0, true : 0
pred : 0, true : 0
pred : 482, true : 0
pred : 482, true : 0
pred : 482, true : 0
pred : 19, true : 0
pred : 8, true : 0
pred : 0, true : 0
pred : 0, true : 0
pred : 169, true : 1
pred : 169, true : 1
pred : 169, true : 1
pred : 70, true : 1
pred : 0, true : 1
pred : 1, true : 1
pred : 1, true : 1
pred : 4465, true : 0
pred : 4465, true : 0
pred : 4465, true : 0
pred : 70, true : 0
pred : 0, true : 0
pred : 0, true

pred : 0, true : 0
pred : 608, true : 0
pred : 608, true : 0
pred : 608, true : 0
pred : 70, true : 0
pred : 0, true : 0
pred : 0, true : 0
pred : 0, true : 0
pred : 584, true : 0
pred : 584, true : 0
pred : 584, true : 0
pred : 70, true : 0
pred : 0, true : 0
pred : 0, true : 0
pred : 0, true : 0
pred : 481, true : 1
pred : 481, true : 1
pred : 481, true : 1
pred : 70, true : 1
pred : 0, true : 1
pred : 1, true : 1
pred : 1, true : 1
pred : 584, true : 1
pred : 584, true : 1
pred : 584, true : 1
pred : 13, true : 1
pred : 0, true : 1
pred : 1, true : 1
pred : 1, true : 1
pred : 482, true : 1
pred : 482, true : 1
pred : 482, true : 1
pred : 19, true : 1
pred : 0, true : 1
pred : 1, true : 1
pred : 1, true : 1
pred : 4587, true : 1
pred : 4587, true : 1
pred : 4587, true : 1
pred : 70, true : 1
pred : 0, true : 1
pred : 1, true : 1
pred : 1, true : 1
pred : 169, true : 0
pred : 169, true : 0
pred : 169, true : 0
pred : 70, true : 0
pred : 0, true : 0
pred : 0, true : 0
pred : 0, true : 