In [6]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [None]:
# get the data from the dataset

training_data = datasets.FashionMNIST (
    root = "data",
    train = True,
    download = True,
    transform = ToTensor(),
)

test_data = datasets.FashionMNIST(
    root = "data",
    train = False,
    download = True,
    transform = ToTensor(),
)


100.0%
100.0%
100.0%
100.0%


In [None]:
batch_size = 64

# dataloader allows teh data to become iterable so we can do stuff on the dataset

train_dataloader = DataLoader(training_data, batch_size = batch_size)
test_dataloader = DataLoader(test_data, batch_size = batch_size)

# X is the tensor of images - dataset that contains a 4D array or a rank 4 tensor 
# image tensors are usually split into 4 dimensions: N = batch size or the number of images in each tensor
# C = number of channel (1 for gray images and 3 for coloured images)
# H = height of the image
# W = width of the image


# y contains the labels for the images

for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    print(y)
    break


Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64
tensor([9, 2, 1, 1, 6, 1, 4, 6, 5, 7, 4, 5, 7, 3, 4, 1, 2, 4, 8, 0, 2, 5, 7, 9,
        1, 4, 6, 0, 9, 3, 8, 8, 3, 3, 8, 0, 7, 5, 7, 9, 6, 1, 3, 7, 6, 7, 2, 1,
        2, 2, 4, 4, 5, 8, 2, 2, 8, 4, 8, 0, 7, 7, 8, 5])


In [None]:
# essentially you can choose a device where you want to run the model
# GPUs allow for parallelisation because of multi threading
# Therefore machine learning tasks can be given to GPUs to do usually
# However, if the GPU is not available we use CPU

device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"

# model inherited from the nn.Module

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten() # attribute flatten 
        self.linear_relu_stack = nn.Sequential( # attribute whcih is just a sequential stack of linear and relu layers
            nn.Linear(28*28,512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )
        
    # the logits are the raw outputs of the NN - they mean nothing essentially, 
    # but the magnitude and direction define a likelihood of sorts.
    # the bigger the number and the more positive the more likely it is that class.
    # for a classification task of 3 classes for example, for each image 
    # you will get logits like this [2.5, -1.6, 1]. So this means it is likely the first class
    # 
    # not a probability - probability achieved by using a normalisation technique
    # for classifcation we use softmax: exp(z^i)/exp(z^j) for all j from 1 to n
    # it makes the negative values of the logits into positive but small values (because of how exponentiation works)
    # and then the division by all other values in teh vector normalises beteween 0 and 1 to make it into probabiliies
    # softmax values are used in training, but we only pass in the logits pytorch handles the rest.
    # 
    #   
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x) # long ass explanation - see above
        return logits

model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)
