<a href="https://colab.research.google.com/github/Dene33/toy_neural_network/blob/master/toy_neural_network_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import scipy

In [429]:
#initialization, x - features, y - predictions
x = np.array([[3,5], [5,1], [10,2]], dtype=np.float64)
y = np.array([[75], [82], [93]], dtype=np.float64)
#normalization to make features and labels be in "equal space"
x = x/np.max(x)
y = y/100

print('x(features):\n{}\n\ny(predictions/labels):\n{}'.format(x,y))

x(features):
[[0.3 0.5]
 [0.5 0.1]
 [1.  0.2]]

y(predictions/labels):
[[0.75]
 [0.82]
 [0.93]]


In [0]:
class Neural_Network():
  def __init__ (self, activation_func):
    #layers of Neural Network with 2,3,1 neurons correspondingly
    self.inputLayerSize = 2
    self.hiddenLayerSize = 3
    self.outputLayerSize = 1
    #selecting of neurons' activation function
    if activation_func == 'sigmoid':
      self.activation_func = self.sigmoid
    elif activation_func == 'RElu':
      self.activation_func = self.RElu

    #weights initialization, W1 - weights from input to hidden layer, W2 - from
    #hidden to output layer
    self.W1 = np.random.rand(self.inputLayerSize, self.hiddenLayerSize)
    self.W2 = np.random.rand(self.hiddenLayerSize, self.outputLayerSize)
    
  def forward(self, x):
    #activation of neurons on hidden layer
    self.a = self.neuron_activation(self.activation_func, x, self.W1)    
    #activation of neurons on output layer
    self.yPred = self.neuron_activation(self.activation_func, self.a, self.W2)
    return self.yPred
      
  def gradient_descent(self, lr):    
    #calculate gradients for W1 and W2 separately and update weights
    self.W1 = self.gradient(x, self.W1, lr)
    self.W2 = self.gradient(self.a, self.W2, lr)

  def gradient(self, x, w, lr):
    self.grad_y = self.neuron_activation(self.activation_func, x, w)
    #gradient function with MSE-loss
    return w - lr * (x.T @ ((self.grad_y - y) * self.derivative(self.grad_y, self.activation_func))) / len(y)

  #features and weights multiplication  
  def neuron_activation(self, activation, x, w):
    return activation(np.dot(x,w))
  
  #derivative of sigmoid or RElu
  def derivative(self, grad, activation_func):
    if activation_func == self.sigmoid:
      return grad * (1 - grad)
    elif activation_func == self.RElu:
      return 1. * np.array(grad > 0, dtype=float)
  
  def sigmoid(self, x):
    return 1/(1+np.exp(-x))
  
  def RElu(self, x):
    return x * np.array(x > 0, dtype=float)
  
  def loss_MSE(self, yHat):
    return 0.5 * np.mean((y-yHat)**2)

$$ sigmoid\ gradient = \frac{\partial Loss}{\partial w} = \frac{1}{n} (\sigma(w \cdot X) - y)\sigma(w \cdot X)(1 - \sigma(w \cdot X))X$$

In [498]:
test = Neural_Network('RElu')

epoches = 10

for epoch in range(epoches):
  #forward pass
  predictions = test.forward(x)
  #adjust the weights
  test.gradient_descent(lr=0.5)
  print('Loss:',test.loss_MSE(test.yPred))


Loss: 0.1330625531552764
Loss: 0.03682300379432412
Loss: 0.02421460794332624
Loss: 0.022459989240597153
Loss: 0.021975192872028246
Loss: 0.02166984756886205
Loss: 0.02140534060918312
Loss: 0.021158425023079696
Loss: 0.020923712367384457
Loss: 0.020699686854364626


In [501]:
#verify the results
print('Original labels:\n',y)
print('Predicted labels:\n',test.yPred)

Original labels:
 [[0.75]
 [0.82]
 [0.93]]
Predicted labels:
 [[0.6171966 ]
 [0.56988622]
 [1.13977244]]
