<a href="https://colab.research.google.com/github/JacobAshoo/NNFS/blob/main/notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import torch

In [None]:
class Linear:
  def __init__(self, input_size, output_size, device=None):
    self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    self.input_size = input_size
    self.output_size = output_size
    self.w = torch.randn(output_size, input_size, device=self.device)
    self.b = torch.zeros(output_size, device=self.device)
  def __call__(self, x):
    self.x = x
    return torch.matmul(x, self.w.T) + self.b

  def backward(self, grad):
    dx = torch.matmul(grad, self.w)
    dw = torch.matmul(grad.T, self.x)
    db = torch.sum(grad, dim=0)

    return dx, dw, db

In [None]:
class Relu:
    def __init__(self):
        pass

    def __call__(self, x):
        return torch.maximum(x, torch.tensor(0.0, device=x.device))

    def backward(self, grad):
      return grad * (self.input > 0).float()

class Softmax:
    def __init__(self):
        pass

    def __call__(self, x):
      exp_x = torch.exp(x - torch.max(x, dim=1, keepdim=True).values)
      return exp_x / torch.sum(exp_x, dim=1, keepdim=True)


    def backward(self, grad):
      batch_size, num_classes = self.softmax_out.shape

      jacobian = torch.zeros(batch_size, num_classes, num_classes)
      for i in range(num_classes):
          for j in range(num_classes):
              if i == j:
                  jacobian[:, i, j] = self.softmax_out[:, i] * (1 - self.softmax_out[:, i])
              else:
                  jacobian[:, i, j] = -self.softmax_out[:, i] * self.softmax_out[:, j]

      return torch.bmm(jacobian, grad.unsqueeze(-1)).squeeze(-1)


In [None]:
class CrossEntropy:
  def __init__(self):
      pass
  def __call__(self, ypred, y, l2_reg=False, l=0.01, weights=[]):
    self.l2_reg = l2_reg
    self.l = l
    self.weights = weights
    ypred = torch.clamp(ypred, min=1e-9)
    loss = -torch.mean(torch.sum(y * torch.log(ypred), dim=1))
    if l2_reg and weights:
        loss += (l / 2) * sum(torch.sum(w ** 2) for w in weights)
    return loss

  def backward(self, ypred, y):
    grad_output = -y / ypred
    if self.l2_reg and self.weights:
        for w in self.weights:
            grad_output += self.l * w
    return grad_output

In [None]:
class FCNN:
    def __init__(self, input_size, output_size, dropout=False):
      self.l1 = Linear(input_size, 10)
      print(f"Using {self.l1.device}")
      self.relu1 = Relu()
      self.l2 = Linear(10, output_size)
      self.softmax = Softmax()

      self.layers = [self.l1, self.relu1, self.l2, self.softmax]

    def forward(self, x):
      x = x
      z1 = self.l1(x)
      a1 = self.relu1(z1)
      z2 = self.l2(a1)
      y = self.softmax(z2)
      return y





In [None]:
torch.manual_seed(42)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

nn = FCNN(10, 3)

x = torch.randn(1, 10, device=device) * 0.01
y = torch.tensor([[1,0,0]], device=device)
loss_function = CrossEntropy();
ypred = nn.forward(x)

print(ypred)
loss = loss_function(ypred, y)
print(loss)


Using cpu
tensor([[0.3416, 0.3423, 0.3161]])
tensor(1.0741)
