<a href="https://colab.research.google.com/github/JacobAshoo/NNFS/blob/main/notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import torch

In [2]:
class Relu:
  def __init__(self):
    pass

  def __call__(self, x):
    self.x = x
    return torch.maximum(x, torch.tensor(0.0, device=x.device))

  def backward(self, grad):
    return grad * (self.x > 0).float()

class Softmax:
  def __init__(self):
    pass

  def __call__(self, x):
    exp_x = torch.exp(x - torch.max(x, dim=1, keepdim=True).values)
    self.softmax_out = exp_x / torch.sum(exp_x, dim=1, keepdim=True)
    return self.softmax_out

  def backward(self, grad):
    batch_size, num_classes = self.softmax_out.shape
    jacobian = torch.zeros(batch_size, num_classes, num_classes, device=grad.device)
    for i in range(num_classes):
      for j in range(num_classes):
        if i == j:
          jacobian[:, i, j] = self.softmax_out[:, i] * (1 - self.softmax_out[:, i])
        else:
          jacobian[:, i, j] = -self.softmax_out[:, i] * self.softmax_out[:, j]
    return torch.bmm(jacobian, grad.unsqueeze(-1)).squeeze(-1)


In [3]:
class CrossEntropy:
  def __init__(self):
    pass

  def __call__(self, ypred, y, l2_reg=False, l=0.01, weights=[]):
    self.l2_reg = l2_reg
    self.l = l
    self.weights = weights
    ypred = torch.clamp(ypred, min=1e-9)
    loss = -torch.mean(torch.sum(y * torch.log(ypred), dim=1))
    if l2_reg and weights:
      loss += (l / 2) * sum(torch.sum(w ** 2) for w in weights)
    return loss

  def backward(self, ypred, y):
    grad_output = -y / ypred
    if self.l2_reg and self.weights:
      for w in self.weights:
        grad_output += self.l * w
    return grad_output

In [4]:
class Linear:
  def __init__(self, input_size, output_size, activation, device=None):
    self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    self.input_size = input_size
    self.output_size = output_size
    self.w = torch.randn(output_size, input_size, device=self.device, requires_grad=True)
    self.b = torch.zeros(output_size, device=self.device, requires_grad=True)
    self.activation = activation

  def __call__(self, x):
    self.x = x
    self.z = torch.matmul(x, self.w.T) + self.b
    return self.activation(self.z)

  def backward(self, grad):
    grad = self.activation.backward(grad)
    dw = torch.matmul(grad.T, self.x)
    db = torch.sum(grad, dim=0)
    dx = torch.matmul(grad, self.w)
    return dx, dw, db

In [5]:
class FCNN:
  def __init__(self, input_size, output_size, dropout=False):
    self.l1 = Linear(input_size, 10, Relu())
    self.l2 = Linear(10, output_size, Softmax())
    self.layers = [self.l1, self.l2]


  def forward(self, x):
    x = self.l1(x)
    x = self.l2(x)
    return x

  def __call__(self, x):
    return self.forward(x)




In [7]:
torch.manual_seed(42)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using {device}")

model = FCNN(10, 3)

x = torch.randn(1, 10, device=device) * 0.01
y = torch.tensor([[1,0,0]], device=device)
loss_function = CrossEntropy();
ypred = model(x)

print(ypred)
loss = loss_function(ypred, y)
print(loss)


Using cpu
tensor([[0.3416, 0.3423, 0.3161]], grad_fn=<DivBackward0>)
tensor(1.0741, grad_fn=<NegBackward0>)
