<a href="https://colab.research.google.com/github/GuyWhoCodesThings/Custom-Functions-for-Scratch-Neural-Network/blob/main/NN%20with%20custom%20layers%20and%20functions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

https://iamtrask.github.io/2015/07/28/dropout/

In [47]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from scipy import signal
from keras.datasets import mnist
from keras.utils import np_utils
from skimage.measure import block_reduce
from skimage.measure import block_reduce
from skimage.util import view_as_blocks
import numpy.ma as ma
from matplotlib import pyplot
import random

In [48]:
!python --version

Python 3.7.14


In [49]:
class Layer:
  def __init__(self):
    self.input = None
    self.output = None
  def forward(self, input):
    pass
  def backward(self, output_gradient, alpha):
    pass

In [50]:
class Dense(Layer):
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(output_size, input_size)
        self.bias = np.random.randn(output_size, 1)
    def forward(self, input):
        self.input = input
        return np.dot(self.weights, self.input) + self.bias
    def backward(self, output_gradient, learning_rate):
        weights_gradient = np.dot(output_gradient, self.input.T)
        self.weights -= learning_rate * weights_gradient
        self.bias -= learning_rate * output_gradient
        return np.dot(self.weights.T, output_gradient)

In [51]:
class Convolutional(Layer):
     def __init__(self, input_shape, kernel_size, depth):
         input_depth, input_height, input_width = input_shape
         self.depth = depth
         self.input_shape = input_shape
         self.input_depth = input_depth
         self.output_shape = (depth, input_height - kernel_size + 1, input_width - kernel_size + 1)
         self.kernels_shape = (depth, input_depth, kernel_size, kernel_size)
         self.kernels = np.random.randn(*self.kernels_shape)
         self.biases = np.random.randn(*self.output_shape)

     def forward(self, input):
         self.input = input
         self.output = np.copy(self.biases)
         for i in range(self.depth):
             for j in range(self.input_depth):
                 self.output[i] += signal.correlate2d(self.input[j], self.kernels[i, j], "valid")
         return self.output

     def backward(self, output_gradient, learning_rate):
         kernels_gradient = np.zeros(self.kernels_shape)
         input_gradient = np.zeros(self.input_shape)

         for i in range(self.depth):
             for j in range(self.input_depth):
                 kernels_gradient[i, j] = signal.correlate2d(self.input[j], output_gradient[i], "valid")
                 input_gradient[j] += signal.convolve2d(output_gradient[i], self.kernels[i, j], "full")

         self.kernels -= learning_rate * kernels_gradient
         self.biases -= learning_rate * output_gradient
         return input_gradient

In [116]:
def get_ordered_blocks(input, depth, block_size, input_shape):
    ordered_blocks = input.reshape(depth, -1, block_size, block_size)
    reshaped_arr = np.zeros(input_shape)
    for i in range(depth):
      split_blocks = np.array(np.hsplit(ordered_blocks[i], block_size))
      #splits array of blocks evenly with respect to the original image length and block size
      split_blocks = np.array(np.hsplit(split_blocks, 12))
      #reshape array to get it into a sqaure shape like orignal image matrix
      reshaped_arr[i] = split_blocks.flatten().reshape(1, input_shape[2], input_shape[1])
    return reshaped_arr

In [117]:
def expand_output(output_gradient, depth, height, width, block_size, output_height, output_width):
  output_gradient = output_gradient.reshape(depth, output_height, output_width)
  input_grad = np.zeros((depth, int(output_height*output_height), block_size**2))
  for i in range(depth):
    input_grad[i] = np.repeat(output_gradient[i], block_size**2).reshape(-1,block_size**2)
  return get_ordered_blocks(input_grad, depth, block_size, (depth, height, width))


In [118]:
class MaxPool2d(Layer):
  def __init__(self, input_shape, block_size):
    self.block_size = block_size
    self.input_shape = input_shape
    input_depth, input_height, input_width = input_shape
    self.depth = input_depth
    self.input_height = input_height
    self.input_width = input_width
    self.output_width = int(self.input_width / self.block_size)
    self.output_height = int(self.input_height / self.block_size)
    
  def forward(self, input):
    self.block_image = view_as_blocks(input, (1,self.block_size, self.block_size)).reshape(self.depth,-1, self.block_size**2)
    self.output = np.amax(self.block_image, axis=2).reshape(-1, self.output_height, self.output_width)
    return self.output
    
  def backward(self, output_gradient, learning_rate):
    max_indices = np.argmax(self.block_image, axis=2)
    max_indices = max_indices.reshape(self.depth, -1, 1)
    input_gradient = np.zeros_like(self.block_image).reshape(self.depth, -1, self.block_size**2)
    np.put_along_axis(input_gradient, max_indices, 1, axis=2)
    return np.multiply(get_ordered_blocks(input_gradient, self.depth, self.block_size, self.input_shape),
                      expand_output(output_gradient, self.depth, self.input_height, self.input_width,
                      self.block_size, self.output_width, self.output_height))

In [53]:
class Reshape(Layer):
     def __init__(self, input_shape, output_shape):
         self.input_shape = input_shape
         self.output_shape = output_shape

     def forward(self, input):
         return np.reshape(input, self.output_shape)

     def backward(self, output_gradient, learning_rate):
         return np.reshape(output_gradient, self.input_shape)

In [54]:
def binary_cross_entropy(y_true, y_pred):
     return np.mean(-y_true * np.log(y_pred) - (1 - y_true) * np.log(1 - y_pred))

def binary_cross_entropy_prime(y_true, y_pred):
     return ((1 - y_true) / (1 - y_pred) - y_true / y_pred) / np.size(y_true)

In [55]:
class Softmax(Layer):

  def forward(self, input):
    tmp = np.exp(input)
    self.output = tmp / np.sum(tmp)
    return self.output
  
  def bacward(self, output_gradient, alpha):
    n = np.size(self.output)
    tmp = np.tile(self.output, n)
    return np.dot(tmp * (np.identity(n) - np.transpose(tmp)), output_gradient)


In [56]:
class Activation(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime
    def forward(self, input):
        self.input = input
        return self.activation(self.input)
    def backward(self, output_gradient, learning_rate):
        return np.multiply(output_gradient, self.activation_prime(self.input))

In [57]:
class ReLU(Activation):

  def __init__(self):

    relu = lambda x: np.maximum(x,0)
    relu_prime = lambda x: x > 0
    
    super().__init__(relu, relu_prime)

In [58]:
class Soft_Plus(Activation):

  def __init__(self):

    softPlus = lambda x: np.log1p(np.exp(x))
    soft_prime = lambda x: 1 / (1 + np.exp(-x))

    super().__init__(softPlus, soft_prime)

In [59]:
class Tanh(Activation):

  def __init__(self):

    tanh = lambda x: np.tanh(x)
    tanh_prime = lambda x: 1 - np.tanh(x) ** 2
    
    super().__init__(tanh, tanh_prime)

In [60]:
class Sigmoid(Activation):
     def __init__(self):
         def sigmoid(x):
            return 1 / (1 + np.exp(-x))

         def sigmoid_prime(x):
            s = sigmoid(x)
            return s * (1 - s)

         super().__init__(sigmoid, sigmoid_prime)

In [61]:
def mse(y_true, y_pred):
  return np.mean(np.power(y_true - y_pred, 2))

def mse_prime(y_true, y_pred):
  return 2 * (y_pred - y_true) / np.size(y_true)

In [62]:
from numpy.ma.core import masked_array
class Dropout(Layer):
  def __init__(self, input_size, retention_rate, do_drop):
    self.input_size = input_size
    self.do_drop = do_drop
    self.retention_rate = retention_rate

  def forward(self, input):
    if self.do_drop == True:
      random_arr = np.random.rand(self.input_size, 1)
      print(random_arr.shape)
      masked_arr = np.argwhere(random_arr <= self.retention_rate)
      print('mask', masked_arr.shape)
      self.indices = np.argwhere(masked_arr == True)
      self.output = np.take(self.indices, input)
      return self.output
    else:
      return input

  def backward(self, output_gradient, learnign_rate):
    input_gradient = np.zeros(self.input_size, 1)
    input_gradient = np.put(input_gradient, self.indicies, output_gradient, axis=1)
    return input_gradient

In [125]:
def preprocess_data(x, y, limit):
     zero_index = np.where(y == 0)[0][:limit]
     one_index = np.where(y == 1)[0][:limit]
     all_indices = np.hstack((zero_index, one_index))
     all_indices = np.random.permutation(all_indices)
     x, y = x[all_indices], y[all_indices]
     x = x.reshape(len(x), 1, 28, 28)
     x = x.astype("float32") / 255
     y = np_utils.to_categorical(y)
     y = y.reshape(len(y), 2, 1)
     return x, y

 # load MNIST from server, limit to 100 images per class since we're not training on GPU
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, y_train = preprocess_data(x_train, y_train, 100)
x_test, y_test = preprocess_data(x_test, y_test, 100)

 # neural network
network = [
     Convolutional((1, 28, 28), 5, 5),
     Sigmoid(),
     Reshape((5, 24, 24), (5 * 24 * 24, 1)),
     Dense(5 * 24 * 24, 100),
     Sigmoid(),
     Dense(100, 2),
     Sigmoid()
 ]

epochs = 50
learning_rate = 0.01

 # train
for e in range(epochs):
     error = 0
     for x, y in zip(x_train, y_train):
         # forward
         output = x
         for layer in network:
             output = layer.forward(output)

         # error
         error += binary_cross_entropy(y, output)

         # backward
         grad = binary_cross_entropy_prime(y, output)
         for layer in reversed(network):
             grad = layer.backward(grad, learning_rate)

     error /= len(x_train)
     print(f"{e + 1}/{epochs}, error={error}")

 # test
#for x, y in zip(x_test, y_test):
     #output = x
     #for layer in network:
         #output = layer.forward(output)
     #print(f"pred: {np.argmax(output)}, true: {np.argmax(y)}")

1/50, error=0.7369569670583439
2/50, error=0.19701505243970602
3/50, error=0.137663822829617
4/50, error=0.10894806028093854
5/50, error=0.0909559969347688
6/50, error=0.07957529620507577
7/50, error=0.06985035970809973
8/50, error=0.060627235128192396
9/50, error=0.05350968205781268
10/50, error=0.04838691255542805
11/50, error=0.044684785283493816
12/50, error=0.041672561188399744
13/50, error=0.039013804482313244
14/50, error=0.03661491447312672
15/50, error=0.03444803096909412
16/50, error=0.03252294921136034
17/50, error=0.03084225157740171
18/50, error=0.029369159622009824
19/50, error=0.028055374850503287
20/50, error=0.026866237726214853
21/50, error=0.025779785405988288
22/50, error=0.02477917700173063
23/50, error=0.023849315876396045
24/50, error=0.022975157377310814
25/50, error=0.022138173218679925
26/50, error=0.02130984801270476
27/50, error=0.020443422234165908
28/50, error=0.019508438594726465
29/50, error=0.018646785380634027
30/50, error=0.01798536162181238
31/50, er

In [126]:
network2 = [
     Convolutional((1, 28, 28), 5, 5),
     Sigmoid(),
     MaxPool2d((5, 24, 24), 2),
     Sigmoid(),
     Reshape((5, 12, 12), (5 * 12 * 12, 1)),
     Dense(5 * 12 * 12, 100),
     Sigmoid(),
     Dense(100, 2),
     Sigmoid()
 ]

epochs = 50
learning_rate = 0.01

 # train
for e in range(epochs):
     error = 0
     for x, y in zip(x_train, y_train):
         # forward
         output = x
         for layer in network2:
             output = layer.forward(output)

         # error
         error += binary_cross_entropy(y, output)

         # backward
         grad = binary_cross_entropy_prime(y, output)
         for layer in reversed(network2):
             grad = layer.backward(grad, learning_rate)

     error /= len(x_train)
     print(f"{e + 1}/{epochs}, error={error}")

 # test
#for x, y in zip(x_test, y_test):
    # output = x
     #for layer in network2:
      #   output = layer.forward(output)
     #print(f"pred: {np.argmax(output)}, true: {np.argmax(y)}")

1/50, error=0.6769842651449504
2/50, error=0.439026508382267
3/50, error=0.3851334729056707
4/50, error=0.3432689569872943
5/50, error=0.3086097973360104
6/50, error=0.2794179463274508
7/50, error=0.2544866854674672
8/50, error=0.23287497833990417
9/50, error=0.21376095149103502
10/50, error=0.19645201275478183
11/50, error=0.18098652908807542
12/50, error=0.16780561176990552
13/50, error=0.15644466322820177
14/50, error=0.14644713050429717
15/50, error=0.13754818883878772
16/50, error=0.1295745877641363
17/50, error=0.12238049075615523
18/50, error=0.11587933066828308
19/50, error=0.10998871989314013
20/50, error=0.10463876792611478
21/50, error=0.0997696823390613
22/50, error=0.09532683261415413
23/50, error=0.09125635747166438
24/50, error=0.08751139541749021
25/50, error=0.08405161652308481
26/50, error=0.08084476218390407
27/50, error=0.07785877774326275
28/50, error=0.07507316573724702
29/50, error=0.07246651350943921
30/50, error=0.07001387258916225
31/50, error=0.06770787610096

go through 1 iteration of forward

random_arr = np.random.rand(100, 1)
print(len(random_arr))
#need to find a diff way to remove elements form array bc it needs to be exactly equal to the retention rate
masked_arr = np.argwhere(random_arr < 0.8)
print('mask', len(masked_arr))
indices = np.argwhere(masked_arr == True)
output = np.take(indices, input)
print(output)