# Layers

In [1]:
import numpy as np

class Linear():
  '''A Linear Layer'''
  def __init__(self,in_features,out_features, bias=True, requires_grad=True):
    self.in_features = in_features
    self.out_features = out_features
    self.requires_grad = requires_grad
    
    self.weights = np.random.rand(self.out_features,self.in_features)
    if bias:
      self.bias = np.random.rand(self.out_features)
    else:
      self.bias = False
  
  
  def forward(self,input):
    if not isinstance(input, np.ndarray):
      raise TypeError("Input must be a numpy nd.array.")
    if self.weights.shape[1] != input.shape[0]:
      raise ValueError("Cannot multiply matrix of dimension ",self.weights.shape," with input of dimension ",input.shape)
    
    if self.requires_grad:
      self.input = input
    
    result = np.dot(self.weights, input)
    if isinstance(self.bias, np.ndarray):
      result += self.bias
    return result
  
  
  def deriv_wrt_weight(self):
    if not hasattr(self,'input'):
      raise RuntimeError('backward() called before forward()')
    return self.input #previous layer h
  
  def deriv_wrt_bias(self):
    return 1
  
  def deriv_wrt_input(self):
    return self.weights
  
  def backward(self, front_layer_grads):
    if self.requires_grad:
      self.weight_grads = self.deriv_wrt_weight() * front_layer_grads #chain rule
      self.bias_grads = (self.deriv_wrt_bias() * front_layer_grads).reshape(1,-1)
    return np.dot(front_layer_grads.transpose(),self.deriv_wrt_input()) #chain rule for the back layer grads
  
  def get_weight_grads(self):
    if not self.requires_grad:
      raise RuntimeError('requires_grad set to False. Cannot calculate gradients.')
    if not hasattr(self,'weight_grads'):
      raise RuntimeError('get_weight_grads() called before backward()')
    return self.weight_grads
  
  def get_bias_grads(self):
    if not self.requires_grad:
      raise RuntimeError('requires_grad set to False. Cannot calculate gradients.')
    if not hasattr(self,'bias_grads'):
      raise RuntimeError('get_bias_grads() called before backward()')
    return self.bias_grads
  
  def get_grads(self):
    return {'weights':self.get_weight_grads(), 'bias':self.get_bias_grads(), 'requires_grad':self.requires_grad}
  
  
  def __call__(self,input):
    #overloading to use module as a function
    return self.forward(input)
  
  def __repr__(self):
    return f'Linear(in_features={self.in_features}, out_features={self.out_features}, bias={True if isinstance(self.bias,np.ndarray) else False})'
  
  def parameters(self):
    return {'weights':self.weights, 'bias':self.bias}


L = Linear(2,2,bias=False)
print(L.parameters())
# L.forward(np.array([3,1]))
# print(L)

{'weights': array([[0.37183975, 0.93632696],
       [0.31424118, 0.19147913]]), 'bias': False}


# Activation Function

In [2]:
import numpy as np

class ActivationFunction():
  def __init__(self, requires_grad=True):
    '''requires_grad: Necessarily required to be True for backpropagation.'''
    super(ActivationFunction,self).__init__()
    self.__parameters = []
    self.requires_grad = requires_grad
  
  def parameters(self):
    return self.__parameters
  
  def __setattr__(self, name,value):
    super().__setattr__(name,value)
    if not callable(value) and name!='_ActivationFunction__parameters':
      self.__parameters.append((name,value))
  
  def __call__(self,input):
    if not isinstance(input, np.ndarray):
      raise TypeError("Input must be of type numpy nd.array.")
    if not hasattr(self,'function'):
      raise NameError("The function has not been defined for this activation funcion.")
    if self.requires_grad:
      self.input = input
    return np.array([self.function(e) for e in input])
  
  def backward(self,front_layer_grads):
    if not self.requires_grad:
      raise RuntimeError("requires_grad set to False. Cannot calculate gradients.")
    if not hasattr(self,'derivative_function'):
      raise NameError("derivative_function not set for this activation function. Cannot calculate gradients.")
    return np.array([self.derivative_function(e) for e in self.input]).reshape(-1,1)*front_layer_grads.reshape(-1,1)
    
  def __repr__(self):
    return self.__class__.__name__ + "()"

act = ActivationFunction()

In [3]:
import numpy as np

class ReLU(ActivationFunction):
  def __init__(self,requires_grad=True):
    super(ReLU, self).__init__(requires_grad)
    self.function = lambda x: 0 if x<0 else x

activation = ReLU()
# activation(np.array([1,2,-1]))

In [4]:
import numpy as np

class Sigmoid(ActivationFunction):
  def __init__(self, requires_grad=True):
    super(Sigmoid,self).__init__(requires_grad)
    self.function = lambda x: 1/(1+np.exp(-x))
    self.derivative_function = lambda x: self.function(x)*(1-self.function(x))

activation = Sigmoid()
activation(np.array([1,2,-1]))
# print(activation)


array([0.73105858, 0.88079708, 0.26894142])

# Neural Network

In [5]:
class NeuralNetwork():
  def __init__(self):
    # super(NeuralNetwork, self).__init__()
    self.__layers = []
  
  def __setattr__(self, name,value):
    if isinstance(value,Linear) or isinstance(value,ActivationFunction):
      self.__layers.append((name,value))
    super().__setattr__(name,value)
  
  def __repr__(self):
    string = self.__class__.__name__ + "(\n"
    for name,layer in self.__layers:
      string += "  (" + name +"): " + layer.__repr__() + "\n"
    string += ")"
    return string
  
  def parameters(self):
    '''Doesnt return the references. To be used to just view the parameters.'''
    dicts = {}
    for name, layer in self.__layers:
      if not isinstance(layer,ActivationFunction):
        dicts[name] = layer.parameters()
    return dicts
  
  def param_references(self):
    '''To be used for updating the parameters.'''
    return self.__layers
  
  def backward(self,loss_function_grads):
    grads = loss_function_grads
    for name,layer in self.__layers[::-1]:
      grads = layer.backward(grads)
  
  def get_grads(self):
    dicts={}
    for name,layer in self.__layers:
      if not isinstance(layer,ActivationFunction):
        dicts[name]=layer.get_grads()
    return dicts
  
  def forward(self,x):
    return x

  
  

class SimpleNet(NeuralNetwork):
  def __init__(self):
    super(SimpleNet,self).__init__()
    
    self.hidden1 = Linear(in_features=2,out_features=2)
    self.activation1 = Sigmoid()
    self.output = Linear(2,2)
    self.final_activation = Sigmoid()
  
  def forward(self,x):
    h = self.hidden1(x)
    h = self.activation1(h)
    h = self.output(h)
    h = self.final_activation(h)
    return h


model = SimpleNet()
print(model)

SimpleNet(
  (hidden1): Linear(in_features=2, out_features=2, bias=True)
  (activation1): Sigmoid()
  (output): Linear(in_features=2, out_features=2, bias=True)
  (final_activation): Sigmoid()
)


# Forward Propagation

In [6]:
# setting the manual weights; randomized by default

model.hidden1.weights = np.array([ [0.15, 0.25],
                                   [0.2, 0.3]])
model.hidden1.bias = np.array([0.35,0.35])

model.output.weights = np.array([ [0.4, 0.5],
                                  [0.45, 0.55]])
model.output.bias = np.array([0.6,0.6])

for layer,weights in  model.parameters().items():
  print(layer,":\n",weights,":\n")
# d = model.parameters()


hidden1 :
 {'weights': array([[0.15, 0.25],
       [0.2 , 0.3 ]]), 'bias': array([0.35, 0.35])} :

output :
 {'weights': array([[0.4 , 0.5 ],
       [0.45, 0.55]]), 'bias': array([0.6, 0.6])} :



In [7]:
X = np.array([0.05, 0.1])
ypred = model.forward(X)
ypred

array([0.75693192, 0.76771788])

# Loss

In [8]:
import numpy as np

class LossFunction():
  def __init__(self, requires_grad=True):
    '''requires_grad: Necessarily True for backpropagation.'''
    self.__parameters = []
    self.requires_grad = requires_grad
    
  def __setattr__(self, name,value):
    if not callable(value) and name!='_LossFunction__parameters':
      self.__parameters.append((name,value))
    super().__setattr__(name,value)
  
  def parameters(self):
    return self.__parameters
  
  def __call__(self,yhat,y):
    if not isinstance(yhat, np.ndarray) or not isinstance(y, np.ndarray):
      raise TypeError("yhat and y both needs to be of type Numpy nd.array.")
    if self.requires_grad:
      self.yhat = yhat
      self.y = y
    return self.function(yhat,y)
  
  def backward(self):
    if not self.requires_grad:
      raise RuntimeError('requires_grad set to False. Cannot calculate gradients.')
    if not hasattr(self,'derivative_function'):
      raise NameError('derivative_function not set for this loss function. Cannot calculate gradients')
    return np.array([self.derivative_function(pred,gtruth) for pred,gtruth in zip(self.yhat, self.y)]).reshape(-1,1)


In [9]:
import numpy as np
  

class MSELoss(LossFunction):
  def __init__(self, requires_grad=True):
    super(MSELoss, self).__init__(requires_grad)
    self.function = lambda yhat,y: np.square(yhat-y)
    self.derivative_function = lambda yhat,y: (yhat-y)

  def __call__(self, yhat,y):
    super().__call__(yhat,y)
    result = np.array([self.function(p,g) for p,g in zip(yhat,y)])
    result = result.sum()/len(result)
    return result
    
lossFn = MSELoss()
lossFn(yhat=np.array([0,2]), y=np.array([3,4]))

6.5

In [10]:
y = np.array([0.01, 0.99])
lossFn = MSELoss()
lossFn(ypred,y)

0.303658313630144

# backpropagation gradients


In [11]:
model.backward(lossFn.backward())

In [12]:
model.get_grads()

{'hidden1': {'weights': array([[0.00044758, 0.00089517],
         [0.00056464, 0.00112929]]),
  'bias': array([[0.00895169, 0.01129289]]),
  'requires_grad': True},
 'output': {'weights': array([[ 0.08169586,  0.08194416],
         [-0.02356439, -0.02363601]]),
  'bias': array([[ 0.13742501, -0.03963893]]),
  'requires_grad': True}}

# Optimizers

In [13]:
class SGD():
  def __init__(self, model,lr):
    if not isinstance(model,NeuralNetwork):
      raise TypeError("Input type must be of type NeuralNetwork.")
    self.model = model
    self.lr = lr
    pass
  
  def step(self):
    for name, layer in self.model.param_references():
      if not isinstance(layer, ActivationFunction):
        if layer.requires_grad:
          layer.weights += - self.lr*layer.get_grads()['weights']
          if isinstance(layer.bias, np.ndarray):
            layer.bias += - self.lr*layer.get_grads()['bias'].flatten()
  def __repr__(self):
    s = f'\
    SGD ( \n\
      lr:{self.lr}\n\
    )'
    return s

optimizer = SGD(model = model,lr=0.01)
optimizer

    SGD ( 
      lr:0.01
    )

In [14]:
model.parameters()

{'hidden1': {'weights': array([[0.15, 0.25],
         [0.2 , 0.3 ]]),
  'bias': array([0.35, 0.35])},
 'output': {'weights': array([[0.4 , 0.5 ],
         [0.45, 0.55]]),
  'bias': array([0.6, 0.6])}}

In [15]:
model.get_grads()

{'hidden1': {'weights': array([[0.00044758, 0.00089517],
         [0.00056464, 0.00112929]]),
  'bias': array([[0.00895169, 0.01129289]]),
  'requires_grad': True},
 'output': {'weights': array([[ 0.08169586,  0.08194416],
         [-0.02356439, -0.02363601]]),
  'bias': array([[ 0.13742501, -0.03963893]]),
  'requires_grad': True}}

In [16]:
optimizer.step()

In [17]:
model.parameters()

{'hidden1': {'weights': array([[0.14999552, 0.24999105],
         [0.19999435, 0.29998871]]),
  'bias': array([0.34991048, 0.34988707])},
 'output': {'weights': array([[0.39918304, 0.49918056],
         [0.45023564, 0.55023636]]),
  'bias': array([0.59862575, 0.60039639])}}