In [2]:
GPU = True
if GPU:
    from cupyx.scipy import signal
    import cupy as np
else:
    import numpy as np
    from scipy import signal


In [3]:
class Layer(object):
    def __init__():
        pass

    def forward(self, input):
        '''
        Forward propagation method -- overridden by child classes. This should provide the output of the layer given the input.
        '''
        pass

    def backward(self, output_gradient, lr):
        '''
        Backwards propagation method -- overridden by child classes. This should provide the input gradient to the layer given 
        the output gradient of the layer.
        '''
        pass

class Dense(Layer):
    '''
    Dense layer which only holds the weights.
    '''
    def __init__(self, input_size, output_size):
        '''
        Initializes the Dense layer given input and output sizes. This may be different than what is seen in other libraries 
        where you provide the neuron count, which can be thought of as the output size.
        '''
        self.weights = np.random.randn(output_size, input_size)
        self.biases = np.random.randn(output_size, 1)

    def forward(self, input):
        '''
        Dense forward propagation method. Returns w•a + b.
        '''
        self.input = input
        return np.dot(self.weights, self.input) + self.biases

    def backward(self, output_gradient, lr):
        '''
        Dense backwards propagation method. Updates the weights according to the gradient, calculated from the output
        gradient.
        '''
        weights_gradient = np.dot(output_gradient, np.transpose(self.input))
        input_gradient = np.dot(np.transpose(self.weights), output_gradient)
        
        self.weights -= lr * weights_gradient
        self.biases -= lr * output_gradient
        return input_gradient

class Conv2D(Layer):
                       #(depth, width, height) 
    def __init__(self, input_shape, kernel_size, num_kernels):
        self.input_shape = input_shape
        self.output_shape = (num_kernels, input_shape[2] - kernel_size + 1, input_shape[1] - kernel_size + 1)
        self.kernel_shape = (num_kernels, input_shape[0], kernel_size, kernel_size)
        self.kernels = np.random.random_sample(self.kernel_shape)
        self.biases = np.random.random_sample(self.output_shape)

    def forward(self, inp):
        self.input = inp
        self.output = np.copy(self.biases)
        for i in range(self.kernel_shape[0]):
            for j in range(self.input_shape[0]):
                self.output[i] += signal.correlate2d(self.input[j], self.kernels[i][j], 'valid')
        return self.output
    
    def backward(self, output_gradient, lr):
        dCdK = np.zeros(self.kernel_shape)
        dCdX = np.zeros(self.input_shape)
        dCdB = np.zeros(output_gradient.shape)

        for i in range(self.kernel_shape[0]):
            dCdB[i] = output_gradient[i]
            for j in range(self.input_shape[0]):
                dCdK[i][j] = signal.correlate2d(self.input[j], output_gradient[i], 'valid')
                dCdX[j] += signal.convolve2d(output_gradient[i], self.kernels[i][j], 'full')
        
        self.kernels -= lr * dCdK
        self.biases -= lr * dCdB

        return dCdX

class Reshape(Layer):
    def __init__(self, input_shape, output_shape):
        self.input_shape = input_shape
        self.output_shape = output_shape

    def forward(self, inp):
        return np.reshape(inp, self.output_shape)
    
    def backward(self, output_gradient, lr):
        return np.reshape(output_gradient, self.input_shape)
    
class Activation(Layer):
    def __init__(self, activation, activation_gradient):
        self.activation = activation
        self.activation_prime = activation_gradient

    def forward(self, input):
        self.input = input
        return self.activation(self.input)

    def backward(self, output_gradient, learning_rate):
        return np.multiply(output_gradient, self.activation_prime(self.input))
    
class Sigmoid(Activation):
    def __init__(self):
        super().__init__(self.sigmoid, self.gradient)

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def gradient(self, x):
        return self.sigmoid(x) * (1 - self.sigmoid(x))

In [4]:
def binary_cross_entropy(y, y_pred):
    return -np.mean(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))

def binary_cross_entropy_gradient(y, y_pred):
    return ((1 - y) / (1 - y_pred) - y / y_pred) / np.size(y)