<a href="https://colab.research.google.com/github/Singhabhisheknitp/Pytorchsimplified/blob/main/Pytorchification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch

CLASS  torch.nn.ReLU

In [None]:

class ReLU:

    # y = max(0, x)
    def __call__(self, x):
        return torch.max(torch.zeros_like(x), x)

    def parameters(self):
        return []

class Tanh:

     # y = tanh(x)
    def __call__(self, x):
        return torch.tanh(x)

    def parameters(self):
        return []

CLASS    torch.nn.Embedding(num_embeddings, embedding_dim, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, sparse=False, _weight=None, _freeze=False, device=None, dtype=None)

In [None]:
class Embedding:

  #weight initialization
  def __init__(self, num_embeddings, embedding_dim):
    self.weight = torch.randn((num_embeddings, embedding_dim))

  # Y = C[X]   where C is embedding matrix or lookup table of which we pluck rows as per index given by X datasets
  def __call__(self, IX):
    self.out = self.weight[IX]
    return self.out

  # keeping learnable parameters in parameter fucntion to access for setting grad true & updating the gradients
  def parameters(self):
    return [self.weight]

CLASS  torch.nn.Linear(in_features, out_features, bias=True, device=None, dtype=None)

In [None]:
class Linear:

  #weight initialization
  def __init__(self, fan_in, fan_out, bias=True):
    self.weight = torch.randn((fan_in, fan_out)) / fan_in**0.5 # note: kaiming init
    self.bias = torch.zeros(fan_out) if bias else None

  # defining forward pass  Y = x@W + b
  def __call__(self, x):
    self.out = x @ self.weight
    if self.bias is not None:
      self.out += self.bias
    return self.out

  # keeping learnable parameters in parameter fucntion to access for setting grad true & updating the gradients
  def parameters(self):
    return [self.weight] + ([] if self.bias is None else [self.bias])

CLASS   torch.nn.BatchNorm1d(num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)

In [None]:
class BatchNorm1d:

  def __init__(self, dim, eps=1e-5, momentum=0.1):
    self.eps = eps
    self.momentum = momentum
    self.training = True
    # parameters (trained with backprop)
    self.gamma = torch.ones(dim)
    self.beta = torch.zeros(dim)
    # buffers (trained with a running 'momentum update')
    self.running_mean = torch.zeros(dim)
    self.running_var = torch.ones(dim)

  def __call__(self, x):
    # calculate the forward pass
    if self.training:
      if x.ndim == 2:
        dim = 0
      elif x.ndim == 3:
        dim = (0,1)
      xmean = x.mean(dim, keepdim=True) # batch mean
      xvar = x.var(dim, keepdim=True) # batch variance
    else:
      xmean = self.running_mean
      xvar = self.running_var
    xhat = (x - xmean) / torch.sqrt(xvar + self.eps) # normalize to unit variance
    self.out = self.gamma * xhat + self.beta
    # update the buffers
    if self.training:
      with torch.no_grad():
        self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * xmean
        self.running_var = (1 - self.momentum) * self.running_var + self.momentum * xvar
    return self.out

  def parameters(self):
    return [self.gamma, self.beta]

CLASS    torch.nn.Flatten(start_dim=1, end_dim=- 1)

In [None]:
class FlattenConsecutive:

  def __init__(self, n):
    self.n = n

  def __call__(self, x):
    B, T, C = x.shape
    x = x.view(B, T//self.n, C*self.n)
    if x.shape[1] == 1:
      x = x.squeeze(1)
    self.out = x
    return self.out

  def parameters(self):
    return []

CLASS     torch.nn.Sequential(*args: Module)

In [None]:
class Sequential:

  def __init__(self, layers):
    self.layers = layers

  '''model = Sequential([
  Embedding(vocab_size, n_embd),
  FlattenConsecutive(2), Linear(n_embd * 2, n_hidden, bias=False), BatchNorm1d(n_hidden), Tanh(),
  FlattenConsecutive(2), Linear(n_hidden*2, n_hidden, bias=False), BatchNorm1d(n_hidden), Tanh(),
  FlattenConsecutive(2), Linear(n_hidden*2, n_hidden, bias=False), BatchNorm1d(n_hidden), Tanh(),
  Linear(n_hidden, vocab_size),
])  see model will be contained in such containers and called have parameters as list of layers that are processed below'''

  def __call__(self, x):
    for layer in self.layers:
      x = layer(x)
    self.out = x
    return self.out

  def parameters(self):
    # get parameters of all layers and stretch them out into one list
    return [p for layer in self.layers for p in layer.parameters()]

CLASS    torch.optim.Optimizer(params, defaults)

In [None]:
class Optimizer:
    def __init__(self, parameters, lr=0.001):
        self.parameters = parameters
        self.lr = lr

    def step(self):
        for param in self.parameters:
            param.data -= self.lr * param.grad.data

    def zero_grad(self):
        for param in self.parameters:
            if param.grad is not None:
                param.grad.detach_()
                param.grad.zero_()

CLASS    torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)

In [None]:
class Conv2d:

    # Weight initialization along with necessary variables
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, bias=True):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.bias = bias

        self.weight = torch.randn(out_channels, in_channels, kernel_size, kernel_size)  # Random weight initialization
        if self.bias:
            self.bias = torch.zeros(out_channels)  # Zero bias initialization

    def __call__(self, x):
        batch_size, in_channels, in_height, in_width = x.size()
        out_height = (in_height - self.kernel_size) // self.stride + 1
        out_width = (in_width - self.kernel_size) // self.stride + 1

        output = torch.zeros(batch_size, self.out_channels, out_height, out_width)

        for b in range(batch_size):
            for c_out in range(self.out_channels):
                for h_out in range(out_height):
                    for w_out in range(out_width):
                        h_start = h_out * self.stride
                        w_start = w_out * self.stride
                        h_end = h_start + self.kernel_size
                        w_end = w_start + self.kernel_size

                        receptive_field = x[b, :, h_start:h_end, w_start:w_end]
                        output[b, c_out, h_out, w_out] = (receptive_field * self.weight[c_out]).sum() + self.bias[c_out] if self.bias else 0

CLASS   torch.nn.Softmax(dim=None)

In [None]:

class Softmax:
    def __call__(self, x):
        exp_vals = torch.exp(x)
        sum_exp_vals = torch.sum(exp_vals, dim=1, keepdim=True)
        return exp_vals / sum_exp_vals

    def parameters(self):
        return []

CLASS   torch.nn.CrossEntropyLoss(weight=None, size_average=None, ignore_index=- 100, reduce=None, reduction='mean', label_smoothing=0.0)

In [None]:
class CrossEntropyLoss:
    def __call__(self, logits, targets):
        logits_exp = torch.exp(logits)
        logits_sum_exp = logits_exp.sum(dim=1, keepdim=True)
        probabilities = logits_exp / logits_sum_exp

        loss = -torch.log(probabilities[torch.arange(targets.size(0)), targets])
        loss = loss.mean()

        return loss



class MSELoss:
    def __call__(self, predictions, targets):
        loss = torch.mean((predictions - targets)**2)
        return loss