<a href="https://colab.research.google.com/github/GuyWhoCodesThings/Custom-Functions-for-Scratch-Neural-Network/blob/main/UpdatedNNfromScratch%20UPDATED%20WITH%20NORMALIZATION.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from scipy import signal
from keras.datasets import mnist
from keras.utils import np_utils
from skimage.measure import block_reduce
from skimage.measure import block_reduce
from skimage.util import view_as_blocks
import numpy.ma as ma
from matplotlib import pyplot
import random

In [2]:
class Layer:
  def __init__(self):
    self.input = None
    self.output = None
  def forward(self, input):
    pass
  def backward(self, output_gradient, alpha):
    pass

In [3]:
class Dense(Layer):
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(output_size, input_size)
        self.bias = np.random.randn(output_size, 1)
    def forward(self, input):
        self.input = input
        return np.dot(self.weights, self.input) + self.bias
    def backward(self, output_gradient, learning_rate):
        weights_gradient = np.dot(output_gradient, self.input.T)
        self.weights -= learning_rate * weights_gradient
        self.bias -= learning_rate * output_gradient
        return np.dot(self.weights.T, output_gradient)

In [4]:
class Convolutional(Layer):
     def __init__(self, input_shape, kernel_size, depth):
         input_depth, input_height, input_width = input_shape
         self.depth = depth
         self.input_shape = input_shape
         self.input_depth = input_depth
         self.output_shape = (depth, input_height - kernel_size + 1, input_width - kernel_size + 1)
         self.kernels_shape = (depth, input_depth, kernel_size, kernel_size)
         self.kernels = np.random.randn(*self.kernels_shape)
         self.biases = np.random.randn(*self.output_shape)

     def forward(self, input):
         self.input = input
         self.output = np.copy(self.biases)
         for i in range(self.depth):
             for j in range(self.input_depth):
                 self.output[i] += signal.correlate2d(self.input[j], self.kernels[i, j], "valid")
         return self.output

     def backward(self, output_gradient, learning_rate):
         kernels_gradient = np.zeros(self.kernels_shape)
         input_gradient = np.zeros(self.input_shape)

         for i in range(self.depth):
             for j in range(self.input_depth):
                 kernels_gradient[i, j] = signal.correlate2d(self.input[j], output_gradient[i], "valid")
                 input_gradient[j] += signal.convolve2d(output_gradient[i], self.kernels[i, j], "full")

         self.kernels -= learning_rate * kernels_gradient
         self.biases -= learning_rate * output_gradient
         return input_gradient

In [5]:
def get_ordered_blocks(input, depth, block_size, input_shape):
    ordered_blocks = input.reshape(depth, -1, block_size, block_size)
    reshaped_arr = np.zeros(input_shape)
    for i in range(depth):
      split_blocks = np.array(np.hsplit(ordered_blocks[i], block_size))
      #splits array of blocks evenly with respect to the original image length and block size
      split_blocks = np.array(np.hsplit(split_blocks, 12))
      #reshape array to get it into a sqaure shape like orignal image matrix
      reshaped_arr[i] = split_blocks.flatten().reshape(1, input_shape[2], input_shape[1])
    return reshaped_arr

In [6]:
def expand_output(output_gradient, depth, height, width, block_size, output_height, output_width):
  output_gradient = output_gradient.reshape(depth, output_height, output_width)
  input_grad = np.zeros((depth, int(output_height*output_height), block_size**2))
  for i in range(depth):
    input_grad[i] = np.repeat(output_gradient[i], block_size**2).reshape(-1,block_size**2)
  return get_ordered_blocks(input_grad, depth, block_size, (depth, height, width))


In [7]:
class MaxPool2d(Layer):
  def __init__(self, input_shape, block_size):
    self.block_size = block_size
    self.input_shape = input_shape
    input_depth, input_height, input_width = input_shape
    self.depth = input_depth
    self.input_height = input_height
    self.input_width = input_width
    self.output_width = int(self.input_width / self.block_size)
    self.output_height = int(self.input_height / self.block_size)
    
  def forward(self, input):
    self.block_image = view_as_blocks(input, (1,self.block_size, self.block_size)).reshape(self.depth,-1, self.block_size**2)
    self.output = np.amax(self.block_image, axis=2).reshape(-1, self.output_height, self.output_width)
    return self.output
    
  def backward(self, output_gradient, learning_rate):
    max_indices = np.argmax(self.block_image, axis=2)
    max_indices = max_indices.reshape(self.depth, -1, 1)
    input_gradient = np.zeros_like(self.block_image).reshape(self.depth, -1, self.block_size**2)
    np.put_along_axis(input_gradient, max_indices, 1, axis=2)
    return np.multiply(get_ordered_blocks(input_gradient, self.depth, self.block_size, self.input_shape),
                      expand_output(output_gradient, self.depth, self.input_height, self.input_width,
                      self.block_size, self.output_width, self.output_height))

In [8]:
class Reshape(Layer):
     def __init__(self, input_shape, output_shape):
         self.input_shape = input_shape
         self.output_shape = output_shape

     def forward(self, input):
         return np.reshape(input, self.output_shape)

     def backward(self, output_gradient, learning_rate):
         return np.reshape(output_gradient, self.input_shape)

In [9]:
def binary_cross_entropy(y_true, y_pred):
     return np.mean(-y_true * np.log(y_pred) - (1 - y_true) * np.log(1 - y_pred))

def binary_cross_entropy_prime(y_true, y_pred):
     return ((1 - y_true) / (1 - y_pred) - y_true / y_pred) / np.size(y_true)

In [10]:
class Softmax(Layer):

  def forward(self, input):
    tmp = np.exp(input)
    self.output = tmp / np.sum(tmp)
    return self.output
  
  def bacward(self, output_gradient, alpha):
    n = np.size(self.output)
    tmp = np.tile(self.output, n)
    return np.dot(tmp * (np.identity(n) - np.transpose(tmp)), output_gradient)


In [11]:
class Activation(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime
    def forward(self, input):
        self.input = input
        return self.activation(self.input)
    def backward(self, output_gradient, learning_rate):
        return np.multiply(output_gradient, self.activation_prime(self.input))

In [12]:
class ReLU(Activation):

  def __init__(self):

    relu = lambda x: np.maximum(x,0)
    relu_prime = lambda x: x > 0
    
    super().__init__(relu, relu_prime)

In [13]:
class Soft_Plus(Activation):

  def __init__(self):

    softPlus = lambda x: np.log1p(np.exp(x))
    soft_prime = lambda x: 1 / (1 + np.exp(-x))

    super().__init__(softPlus, soft_prime)

In [14]:
class Tanh(Activation):

  def __init__(self):

    tanh = lambda x: np.tanh(x)
    tanh_prime = lambda x: 1 - np.tanh(x) ** 2
    
    super().__init__(tanh, tanh_prime)

In [15]:
class Sigmoid(Activation):
     def __init__(self):
         def sigmoid(x):
            return 1 / (1 + np.exp(-x))

         def sigmoid_prime(x):
            s = sigmoid(x)
            return s * (1 - s)

         super().__init__(sigmoid, sigmoid_prime)

In [16]:
def mse(y_true, y_pred):
  return np.mean(np.power(y_true - y_pred, 2))

def mse_prime(y_true, y_pred):
  return 2 * (y_pred - y_true) / np.size(y_true)

In [105]:
class Normalization(Layer):
  def __init__(self, shape):
    self.shape = shape
    self.weights = np.random.randn(*shape)
    self.bias = np.random.randn(*shape)

  def forward(self, input):
    self.input = input
    self.mean = np.mean(self.input)
    self.std_dev = np.std(self.input)
    self.normalized_input = (self.input - self.mean) / self.std_dev
    scaled_shifted = np.multiply(self.weights, self.normalized_input) + self.bias
    return scaled_shifted

  def backward(self, output_gradient, learning_rate):
    input_gradient = self.weights / self.std_dev
    self.weights -= self.normalized_input * output_gradient * learning_rate
    self.bias -= output_gradient * learning_rate
    return input_gradient

In [102]:
def preprocess_data(x, y, limit):
     zero_index = np.where(y == 0)[0][:limit]
     one_index = np.where(y == 1)[0][:limit]
     all_indices = np.hstack((zero_index, one_index))
     all_indices = np.random.permutation(all_indices)
     x, y = x[all_indices], y[all_indices]
     x = x.reshape(len(x), 1, 28, 28)
     y = np_utils.to_categorical(y)
     y = y.reshape(len(y), 2, 1)
     return x, y

 # load MNIST from server, limit to 100 images per class since we're not training on GPU
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, y_train = preprocess_data(x_train, y_train, 100)
x_test, y_test = preprocess_data(x_test, y_test, 100)

 # neural network
network = [
     Convolutional((1, 28, 28), 5, 5),
     Normalization((5,24,24)),
     Sigmoid(),
     Reshape((5, 24, 24), (5 * 24 * 24, 1)),
     Dense(5 * 24 * 24, 100),
     Sigmoid(),
     Dense(100, 2),
     Sigmoid()
 ]

epochs = 10
learning_rate = 0.01

 # train
for e in range(epochs):
     error = 0
     for x, y in zip(x_train, y_train):
         # forward
         output = x
         for layer in network:
             output = layer.forward(output)

         # error
         error += binary_cross_entropy(y, output)

         # backward
         grad = binary_cross_entropy_prime(y, output)
         for layer in reversed(network):
             grad = layer.backward(grad, learning_rate)

     error /= len(x_train)
     print(f"{e + 1}/{epochs}, error={error}")

 # test
for x, y in zip(x_test, y_test):
     output = x
     for layer in network:
         output = layer.forward(output)
     print(f"pred: {np.argmax(output)}, true: {np.argmax(y)}")

1/10, error=0.3834558754006852
2/10, error=0.12828473284986802
3/10, error=0.09483114441997709
4/10, error=0.07311691136709239
5/10, error=0.057361273986669126
6/10, error=0.048597709673379945
7/10, error=0.043377035357063554
8/10, error=0.039673744617993674
9/10, error=0.03659426939426965
10/10, error=0.03396779159521034
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 0
pred: 1, true

In [104]:
network2 = [
     Convolutional((1, 28, 28), 5, 5),
     Normalization((5,24,24)),
     Sigmoid(),
     MaxPool2d((5, 24, 24), 2),
     Sigmoid(),
     Reshape((5, 12, 12), (5 * 12 * 12, 1)),
     Dense(5 * 12 * 12, 100),
     Sigmoid(),
     Dense(100, 2),
     Sigmoid()
 ]

epochs = 10
learning_rate = 0.01

 # train
for e in range(epochs):
     error = 0
     for x, y in zip(x_train, y_train):
         # forward
         output = x
         for layer in network2:
             output = layer.forward(output)

         # error
         error += binary_cross_entropy(y, output)

         # backward
         grad = binary_cross_entropy_prime(y, output)
         for layer in reversed(network2):
             grad = layer.backward(grad, learning_rate)

     error /= len(x_train)
     print(f"{e + 1}/{epochs}, error={error}")

 #test
for x, y in zip(x_test, y_test):
     output = x
     for layer in network2:
         output = layer.forward(output)
     print(f"pred: {np.argmax(output)}, true: {np.argmax(y)}")

1/10, error=0.898582967824173
2/10, error=0.6512537479535176
3/10, error=0.6261956807182338
4/10, error=0.5986994030254124
5/10, error=0.5688176173914489
6/10, error=0.5384760086416509
7/10, error=0.5101643244710852
8/10, error=0.48479966379101336
9/10, error=0.4607780957917174
10/10, error=0.4378210338695835
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 0
pred: 1, true: 1
pred: 0, 

In [19]:
class Conv2DTranspose(Layer):
     def __init__(self, input_shape, kernel_size, depth):
         input_depth, input_height, input_width = input_shape
         self.depth = depth
         self.input_shape = input_shape
         self.input_depth = input_depth
         self.output_shape = (depth, input_height + kernel_size - 1, input_width + kernel_size - 1)
         self.kernels_shape = (depth, input_depth, kernel_size, kernel_size)
         self.kernels = np.random.randn(*self.kernels_shape)
         self.biases = np.random.randn(*self.output_shape)

     def forward(self, input):
         self.input = input
         self.output = np.copy(self.biases)
         for i in range(self.depth):
             for j in range(self.input_depth):
                 self.output[i] += signal.correlate2d(self.input[j], self.kernels[i, j], "full")
         return self.output

     def backward(self, output_gradient, learning_rate):
         kernels_gradient = np.zeros(self.kernels_shape)
         input_gradient = np.zeros(self.input_shape)

         for i in range(self.depth):
             for j in range(self.input_depth):
                 kernels_gradient[i, j] = signal.correlate2d(self.input[j], output_gradient[i], "valid")
                 input_gradient[j] += signal.convolve2d(output_gradient[i], self.kernels[i, j], "full")

         self.kernels -= learning_rate * kernels_gradient
         self.biases -= learning_rate * output_gradient
         return input_gradient

https://towardsdatascience.com/batch-norm-explained-visually-how-it-works-and-why-neural-networks-need-it-b18919692739

In [93]:
test = np.random.randint(0, 10, (2,5))

In [94]:
print(test)
print(np.std(test))

[[5 8 2 1 7]
 [3 9 2 1 8]]
3.0066592756745814


In [95]:
norm = Normalization(test.shape)

weights
 [[-0.31287987 -0.87869166 -0.74984316 -0.08157129  0.4339944 ]
 [-1.45229245  1.74481241 -0.16567316 -2.32593422  0.61760104]]
bias
 [[ 0.86295448  0.3352156   0.77487295  0.19687619  0.42287908]
 [-1.33552622  0.34174819 -1.80032522 -1.0856418   0.95307812]]


In [96]:
pls = norm.forward(test)
print("result: ", pls)

input  [[5 8 2 1 7]
 [3 9 2 1 8]]
result:  [[ 0.82132956 -0.65842929  1.42329767  0.29454494  0.76930562]
 [-0.56268577  2.89513848 -1.65705983  1.69929736  1.65147569]]


In [97]:
back = norm.backward(test, 0.1)