This is the tutorail from https://pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html

In [1]:
print("Hello World")
# just checking if git push works on mac

Hello World


PyTorch has two primitives to work with data: torch.utils.data.DataLoader and torch.utils.data.Dataset. Dataset stores the samples and their corresponding labels, and DataLoader wraps an iterable around the Dataset

# 1. Working with data

In [2]:
import torch
from torch import nn

from torch.utils.data import DataLoader

#PyTorch offers domain-specific libraries such as TorchText, TorchVision, and TorchAudio, all of which include datasets
from torchvision import datasets
from torchvision.transforms import ToTensor


In [3]:
#Downloading training data from open datasets
training_data = datasets.FashionMNIST(
    root="data",        # directory of the dataset
    train=True,
    download=True,
    transform=ToTensor()       # Specifies the transformation to apply to the data. 
    # In this case it converts the images to PyTorch tensors.
)

#Downloading test data from open datasets
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

In [5]:
type(training_data)

torchvision.datasets.mnist.FashionMNIST

pass the Dataset as an argument to DataLoader. This wraps an iterable over our dataset, and supports automatic batching, sampling, shuffling and multiprocess data loading.

In [4]:
#each element in the dataloader iterable will return a batch of 64 features and labels.
batch_size = 64

train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [10]:
train_dataloader

<torch.utils.data.dataloader.DataLoader at 0x1dbb7d5e510>

In [29]:
type(train_dataloader.dataset)

torchvision.datasets.mnist.FashionMNIST

In [19]:
for X, y in test_dataloader:
    print(X.shape, y.shape, X.dtype, y.dtype)
    break

torch.Size([64, 1, 28, 28]) torch.Size([64]) torch.float32 torch.int64


torch.Size([64, 1, 28, 28]) --> tensor has a size of 64 along the batch dimension, 1 along the channel dimension, 28 along the height dimension, and 28 along the width dimension. It indicates that you have a batch of 64 images, each with a single channel (grayscale), and a resolution of 28x28 pixels. <br> 
torch.Size([64]) --> Label tensor size <br>
torch.float32 torch.int64 --> dtypes of the tensor<br>


# 2. Creating Models

To define a neural network in PyTorch, we create a class that inherits from nn.Module. We define the layers of the network in the __init__ function and specify how data will pass through the network in the forward function. To accelerate operations in the neural network, we move it to the GPU or MPS if available.

In [5]:
# Get cpu, gpu or mps device for training 
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

print(device)

mps


In [5]:
torch.backends.mps.is_built()

True

In [6]:
torch.backends.mps.is_available()

True

In [6]:
# Define the model
# nn.Module --> base class for all neural network modules
class NeuralNetwork(nn.Module):
    #network archirecture is defined in the init method
    def __init__(self):
        super().__init__()      #calls the __init__() method of the nn.Module pearent class 
        #( to ensure that the necessary setup and initialization from the parent class are performed.)
        #This is important because the nn.Module class performs important bookkeeping tasks and sets up the internal state of the module.

        #self.x are methods below. 
        self.flatten = nn.Flatten()

        self.linear_relu_stack = nn.Sequential( # allows to stack multiple layers in a sequential manner
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512,10)
        )

    # method to define the forward pass computation of the model
    def forward(self, x):
        #x = self.flatten(x)   --> __call__ method is used to call the forward method (IMPORTANT)
        #x = self.linear_relu_stack(x) this also can be used
        x = self.flatten.forward(x)
        logits = self.linear_relu_stack.forward(x)
        return logits
    
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


# 3. Optimizing the Model Parameters

In [7]:
# loss function
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

In a single training loop, the model makes predictions on the training dataset (fed to it in batches), and backpropagates the prediction error to adjust the model’s parameters.

In [8]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)  # get the total number of samples in the dataset
    model.train()   #sets the model in training mode (Stets the attribute named Training to True for the model instance) 
    #Dropout, batch normalization, etc. are used during training.

    # iterates over the batches in the dataloader
    for batch, (X, y) in enumerate(dataloader):
        # moves the input data to the device
        X, y = X.to(device), y.to(device)

        # compute prediction and loss --> Forward pass
        pred = model(X)
        loss = loss_fn(pred, y)

        #Backpropagation
        loss.backward() # compute the gradients of the model's parameters with respect to the loss function's output
        optimizer.step()    #Update the models parameters an optimization algorithm
        optimizer.zero_grad()   # Sets all the gradients to zero. If the gradients are not cleared they ll be accumilated.

        # prints the progress of the training
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [9]:
%%time
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    #test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.296412  [    0/60000]
loss: 2.290769  [ 6400/60000]
loss: 2.280446  [12800/60000]
loss: 2.276624  [19200/60000]
loss: 2.247541  [25600/60000]
loss: 2.240719  [32000/60000]
loss: 2.220475  [38400/60000]
loss: 2.190012  [44800/60000]
loss: 2.195565  [51200/60000]
loss: 2.177196  [57600/60000]
Epoch 2
-------------------------------
loss: 2.164548  [    0/60000]
loss: 2.135711  [ 6400/60000]
loss: 2.122465  [12800/60000]
loss: 2.084564  [19200/60000]
loss: 2.043569  [25600/60000]
loss: 2.046818  [32000/60000]
loss: 2.020132  [38400/60000]
loss: 1.947254  [44800/60000]
loss: 1.923546  [51200/60000]
loss: 1.973825  [57600/60000]
Epoch 3
-------------------------------
loss: 1.904248  [    0/60000]
loss: 1.843148  [ 6400/60000]
loss: 1.822637  [12800/60000]
loss: 1.824783  [19200/60000]
loss: 1.741229  [25600/60000]
loss: 1.717850  [32000/60000]
loss: 1.760576  [38400/60000]
loss: 1.594902  [44800/60000]
loss: 1.528427  [51200/60000]
loss: 1.54