In [9]:
print("Hi")

Hi


## Implement custom operations in PyTorch by using deep learning via gradient descent; 

<!-- 1. recursive chain rule (backpropagation)
2. bias-variance tradeoff
3. regularization -->
output units: linear, softmax; hidden units: tanh, RELU.

In [10]:
!pip install torch torchvision matplotlib




[notice] A new release of pip available: 22.2.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [11]:
import torch
import torch.nn as nn
from torch.autograd import Function
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [12]:
# ---- Custom autograd Functions ----
class LinearFn(Function):
    @staticmethod
    def forward(ctx, x, w, b):
        ctx.save_for_backward(x, w, b)
        return x @ w.t() + b

    @staticmethod
    def backward(ctx, grad_out):
        x, w, b = ctx.saved_tensors
        grad_x = grad_out @ w
        grad_w = grad_out.t() @ x
        grad_b = grad_out.sum(0)
        return grad_x, grad_w, grad_b

class ReLUFn(Function):
    @staticmethod
    def forward(ctx, x):
        ctx.save_for_backward(x)
        return x.clamp(min=0)

    @staticmethod
    def backward(ctx, grad_out):
        x, = ctx.saved_tensors
        grad = grad_out.clone()
        grad[x < 0] = 0
        return grad

class TanhFn(Function):
    @staticmethod
    def forward(ctx, x):
        y = x.tanh()
        ctx.save_for_backward(y)
        return y

    @staticmethod
    def backward(ctx, grad_out):
        (y,) = ctx.saved_tensors
        return grad_out * (1 - y**2)

In [13]:
# ---- Layers using custom ops ----
class Linear(nn.Module):
    def __init__(self, in_f, out_f):
        super().__init__()
        self.w = nn.Parameter(torch.randn(out_f, in_f) * 0.1)
        self.b = nn.Parameter(torch.zeros(out_f))

    def forward(self, x):
        return LinearFn.apply(x, self.w, self.b)

class ReLU(nn.Module):
    def forward(self, x): return ReLUFn.apply(x)

class Tanh(nn.Module):
    def forward(self, x): return TanhFn.apply(x)

In [14]:
# ---- Training ----
def train(model, loader, lr=0.1, epochs=5, weight_decay=1e-4, device="cpu"):
    opt = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay)
    loss_fn = nn.CrossEntropyLoss()
    model.to(device)
    for ep in range(1, epochs+1):
        total, correct, loss_sum = 0,0,0
        for xb,yb in loader:
            xb,yb = xb.to(device), yb.to(device)
            opt.zero_grad()
            out = model(xb)
            loss = loss_fn(out, yb)
            loss.backward()
            opt.step()
            loss_sum += loss.item()*xb.size(0)
            correct += (out.argmax(1)==yb).sum().item()
            total += xb.size(0)
        print(f"Epoch {ep}: loss={loss_sum/total:.4f}, acc={correct/total:.4f}")

In [15]:
if __name__ == "__main__":
    transform = transforms.Compose([transforms.ToTensor()])
    trainset = datasets.MNIST(root="./data", train=True, download=True, transform=transform)
    loader = DataLoader(trainset, batch_size=128, shuffle=True)
    model = nn.Sequential(
        nn.Flatten(),
        Linear(28*28, 256),
        ReLU(),
        Linear(256, 128),
        Tanh(),
        Linear(128, 10)
    )
    train(model, loader, lr=0.1, epochs=5, weight_decay=1e-4)

Epoch 1: loss=0.4168, acc=0.8816
Epoch 2: loss=0.2020, acc=0.9405
Epoch 3: loss=0.1539, acc=0.9546
Epoch 4: loss=0.1250, acc=0.9635
Epoch 5: loss=0.1052, acc=0.9700
