In [1]:
import torch
from torch import nn
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
import seaborn as sns



In [2]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cpu device


In [3]:
training_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

In [4]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [5]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [6]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([6])


In [7]:
input_image = torch.rand(3,28,28)
print(input_image.size())

torch.Size([3, 28, 28])


In [8]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


In [9]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


In [10]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[-0.2951,  0.1591, -0.2727, -0.4405,  0.1777,  1.2003, -0.4262, -0.2455,
          0.0814, -0.1210, -0.1907, -0.0996,  0.0729,  0.2170, -0.7181,  0.0520,
         -0.5354,  0.1103,  0.3493,  0.4051],
        [-0.6454,  0.1329, -0.2464, -0.0776, -0.0026,  0.9693, -0.2572, -0.0341,
          0.0243, -0.2219,  0.0418, -0.4036,  0.1797, -0.0640, -0.6706, -0.1469,
         -0.2963,  0.5646,  0.1740,  0.5216],
        [-0.3938,  0.1751, -0.0797, -0.0661,  0.0436,  1.4514, -0.3583, -0.2061,
          0.1647, -0.2465,  0.1508,  0.0847,  0.1299,  0.4314, -0.7667, -0.2274,
         -0.1000,  0.0451,  0.4541,  0.6681]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.0000, 0.1591, 0.0000, 0.0000, 0.1777, 1.2003, 0.0000, 0.0000, 0.0814,
         0.0000, 0.0000, 0.0000, 0.0729, 0.2170, 0.0000, 0.0520, 0.0000, 0.1103,
         0.3493, 0.4051],
        [0.0000, 0.1329, 0.0000, 0.0000, 0.0000, 0.9693, 0.0000, 0.0000, 0.0243,
         0.0000, 0.0418, 0.0000, 0.1797, 0.0000, 0.00

In [11]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3,28,28)
logits = seq_modules(input_image)

In [12]:
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)

In [13]:
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[ 0.0299,  0.0277,  0.0172,  ..., -0.0346, -0.0074, -0.0150],
        [-0.0293,  0.0331,  0.0135,  ..., -0.0258, -0.0192, -0.0227]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([0.0239, 0.0227], grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[-0.0157, -0.0005, -0.0126,  ..., -0.0119, -0.0156,  0.0244],
        [-0.0074,  0.0057, -0.0377,  ...,  0.0411,  0.0071,  0.0217]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.bias | Si

In [14]:
import torch

x = torch.ones(5)  # input tensor
y = torch.zeros(3)  # expected output
w = torch.randn(5, 3, requires_grad=True)
b = torch.randn(3, requires_grad=True)
z = torch.matmul(x, w)+b
loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)

In [15]:
x,y,w,b,z,loss

(tensor([1., 1., 1., 1., 1.]),
 tensor([0., 0., 0.]),
 tensor([[ 1.1523,  0.1948, -1.3081],
         [-0.0431, -1.4665, -0.8617],
         [-0.4204, -0.7548, -1.2543],
         [-0.9509,  0.9214, -0.8696],
         [-0.1036,  1.4831, -0.6839]], requires_grad=True),
 tensor([0.4282, 1.2626, 0.2922], requires_grad=True),
 tensor([ 0.0625,  1.6407, -4.6854], grad_fn=<AddBackward0>),
 tensor(0.8506, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>))

In [16]:
 print(f"Gradient function for z = {z.grad_fn}")
print(f"Gradient function for loss = {loss.grad_fn}")

Gradient function for z = <AddBackward0 object at 0x000002448947DF30>
Gradient function for loss = <BinaryCrossEntropyWithLogitsBackward0 object at 0x0000024489C55690>
