<a href="https://colab.research.google.com/github/StanleyLiangYork/2023_journal_club_CNN4N/blob/main/Neural_Network_Numpy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This is a demo for using numpy to implement a simple neural network.<p>
It is based on the blog by Towards [Joe Sasson](https://towardsdatascience.com/coding-a-neural-network-from-scratch-in-numpy-31f04e4d605)

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [2]:
class DenseLayer:
  
    # we only implement the ReLu and Softmax activation in this demo
    # Z - outputs of linear transformation of neurons Z = X*W+b
    # A - outputs of the activation function A = activation(Z)
    # W - weights of the neurons
  
    def __init__(self, neurons):
        self.neurons = neurons
        
    def relu(self, inputs):
        return np.maximum(0, inputs)

    def softmax(self, inputs):
        exp_scores = np.exp(inputs)
        probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
        return probs
    
    def relu_derivative(self, dA, Z):
      # the function to get the ReLu derivative/gradients of the ReLu activation
        dZ = np.array(dA, copy = True)
        dZ[Z <= 0] = 0
        return dZ
    
    def forward(self, inputs, weights, bias, activation):
      # the forward pass of the layer getting the layer outputs
        Z_curr = np.dot(inputs, weights.T) + bias
        
        if activation == 'relu':
            A_curr = self.relu(inputs=Z_curr)
        elif activation == 'softmax':
            A_curr = self.softmax(inputs=Z_curr)
            
        return A_curr, Z_curr
    
    def backward(self, dA_curr, W_curr, Z_curr, A_prev, activation):
      # the backward pass / backpropagation of the layer getting the gradients
        if activation == 'softmax':
            dW = np.dot(A_prev.T, dA_curr)
            db = np.sum(dA_curr, axis=0, keepdims=True)
            dA = np.dot(dA_curr, W_curr) 
        else:
            dZ = self.relu_derivative(dA_curr, Z_curr)
            dW = np.dot(A_prev.T, dZ)
            db = np.sum(dZ, axis=0, keepdims=True)
            dA = np.dot(dZ, W_curr)
            
        return dA, dW, db

In [3]:
class Network:
    def __init__(self):
        self.network = [] ## layers
        self.architecture = [] ## mapping input neurons --> output neurons
        self.params = [] ## W, b
        self.memory = [] ## Z, A
        self.gradients = [] ## dW, db
        
    def add(self, layer):
        self.network.append(layer)
            
    def _compile(self, data):
        for idx, layer in enumerate(self.network):
            if idx == 0:
              # the input layer
                self.architecture.append({'input_dim':data.shape[1], 'output_dim':self.network[idx].neurons,
                                         'activation':'relu'})
            elif idx > 0 and idx < len(self.network)-1:
              # the hidden layers
                self.architecture.append({'input_dim':self.network[idx-1].neurons, 'output_dim':self.network[idx].neurons,
                                         'activation':'relu'})
            else:
              # the output layers
                self.architecture.append({'input_dim':self.network[idx-1].neurons, 'output_dim':self.network[idx].neurons,
                                         'activation':'softmax'})
        return self
    
    def _init_weights(self, data):
      # initialize the weights of the neural network
        self._compile(data)
        
        np.random.seed(99)
        
        for i in range(len(self.architecture)):
            self.params.append({
                'W':np.random.uniform(low=-1, high=1, 
                  size=(self.architecture[i]['output_dim'], 
                        self.architecture[i]['input_dim'])),
                'b':np.zeros((1, self.architecture[i]['output_dim']))})
        
        return self
    
    def _forwardprop(self, data):
      # forward pass of the data from input layer through the hidden layers and to the output layer
        A_curr = data
        
        for i in range(len(self.params)):
            A_prev = A_curr
            A_curr, Z_curr = self.network[i].forward(inputs=A_prev, weights=self.params[i]['W'], 
                                           bias=self.params[i]['b'], activation=self.architecture[i]['activation'])
            
            self.memory.append({'inputs':A_prev, 'Z':Z_curr})
            
        return A_curr
    
    def _backprop(self, predicted, actual):
      # backpropagation / backward pass of the network to get the gradients for network optimization
        num_samples = len(actual)
        
        ## compute the gradient on predictions
        dscores = predicted
        dscores[range(num_samples),actual] -= 1
        dscores /= num_samples
        
        dA_prev = dscores
        
        for idx, layer in reversed(list(enumerate(self.network))):
            dA_curr = dA_prev
            
            A_prev = self.memory[idx]['inputs']
            Z_curr = self.memory[idx]['Z']
            W_curr = self.params[idx]['W']
            
            activation = self.architecture[idx]['activation']

            dA_prev, dW_curr, db_curr = layer.backward(dA_curr, W_curr, Z_curr, A_prev, activation)

            self.gradients.append({'dW':dW_curr, 'db':db_curr})
            
    def _update(self, lr=0.01):
        for idx, layer in enumerate(self.network):
            self.params[idx]['W'] -= lr * list(reversed(self.gradients))[idx]['dW'].T  
            self.params[idx]['b'] -= lr * list(reversed(self.gradients))[idx]['db']
    
    def _get_accuracy(self, predicted, actual):
        return np.mean(np.argmax(predicted, axis=1)==actual)
    
    def _calculate_loss(self, predicted, actual):
        samples = len(actual)
        
        correct_logprobs = -np.log(predicted[range(samples),actual])
        data_loss = np.sum(correct_logprobs)/samples

        return data_loss
    
    def train(self, X_train, y_train, epochs):
      # the train / optimization of the neural network
        self.loss = []
        self.accuracy = []
        
        self._init_weights(X_train)
        
        for i in range(epochs):
            yhat = self._forwardprop(X_train)
            self.accuracy.append(self._get_accuracy(predicted=yhat, actual=y_train))
            self.loss.append(self._calculate_loss(predicted=yhat, actual=y_train))
            
            self._backprop(predicted=yhat, actual=y_train)
            
            self._update()
            # print out the training information every 20 epochs
            if i % 20 == 0:
                s = 'EPOCH: {}, ACCURACY: {}, LOSS: {}'.format(i, self.accuracy[-1], self.loss[-1])
                print(s)

To run the following code, you need to register an account on [Kaggle](https://kaggle.com), after login, create a Kaggle credential file named 'kaggle.json' and upload it to colab. <p>
click your account icon at the top right on your kaggle pack, select 'settings', go down to API, click "Create New Token". A new window will show up and allow you to download the 'kaggle.json' file to your local computer. <p>
If you create a new token, the old one will expire, or you can mannually expire your old token and create a new one. <p>
After finishing, you can use the kaggle tool to download the datasets from kaggle as I show you below.

In [26]:
# assume you already uploaded the kaggle.json 
!pip install -q kaggle
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [28]:
!kaggle datasets download -d uciml/iris; unzip iris.zip; rm iris.zip

Downloading iris.zip to /content
  0% 0.00/3.60k [00:00<?, ?B/s]
100% 3.60k/3.60k [00:00<00:00, 6.23MB/s]
Archive:  iris.zip
  inflating: Iris.csv                
  inflating: database.sqlite         


In [46]:
def get_data(path):
  data = pd.read_csv(path)
  cols = list(data.columns)
  feat_cols = cols[1:-1]
  target = cols.pop()
  X = data[feat_cols].copy()
  y = data[target].copy()
  y = LabelEncoder().fit_transform(y)
  return np.array(X), np.array(y)

In [47]:
path = '/content/Iris.csv'
X, y = get_data(path)

In [48]:
print(X.shape, y.shape)

(150, 4) (150,)


In [53]:
model = Network()
model.add(DenseLayer(6)) # the first input dimension will check the data input dimension
model.add(DenseLayer(8))
model.add(DenseLayer(10))
model.add(DenseLayer(3)) # the last / output layer use 

In [54]:
model.train(X_train=X, y_train=y, epochs=300)

EPOCH: 0, ACCURACY: 0.3333333333333333, LOSS: 8.40744717002989
EPOCH: 20, ACCURACY: 0.4, LOSS: 0.9215854842299206
EPOCH: 40, ACCURACY: 0.43333333333333335, LOSS: 0.7536116948198673
EPOCH: 60, ACCURACY: 0.42, LOSS: 0.6714779014672262
EPOCH: 80, ACCURACY: 0.41333333333333333, LOSS: 0.6594143979448669
EPOCH: 100, ACCURACY: 0.6666666666666666, LOSS: 0.5259943503852045
EPOCH: 120, ACCURACY: 0.6666666666666666, LOSS: 0.4706373583820736
EPOCH: 140, ACCURACY: 0.6666666666666666, LOSS: 0.5053203560733266
EPOCH: 160, ACCURACY: 0.48, LOSS: 1.0150613941350863
EPOCH: 180, ACCURACY: 0.8333333333333334, LOSS: 0.4606585529758215
EPOCH: 200, ACCURACY: 0.96, LOSS: 0.14245395648443293
EPOCH: 220, ACCURACY: 0.9733333333333334, LOSS: 0.11140924004633523
EPOCH: 240, ACCURACY: 0.98, LOSS: 0.09284358413813795
EPOCH: 260, ACCURACY: 0.98, LOSS: 0.08381489750531364
EPOCH: 280, ACCURACY: 0.98, LOSS: 0.08094377825736056
