# Quickstart

In [1]:
import torch

In [13]:
!pip install torchvision

Collecting torchvision
  Using cached torchvision-0.19.0-cp312-cp312-manylinux1_x86_64.whl.metadata (6.0 kB)
Collecting torch==2.4.0 (from torchvision)
  Using cached torch-2.4.0-cp312-cp312-manylinux1_x86_64.whl.metadata (26 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch==2.4.0->torchvision)
  Using cached nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting triton==3.0.0 (from torch==2.4.0->torchvision)
  Using cached triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.3 kB)
Using cached torchvision-0.19.0-cp312-cp312-manylinux1_x86_64.whl (7.0 MB)
Downloading torch-2.4.0-cp312-cp312-manylinux1_x86_64.whl (797.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/797.2 MB[0m [31m14.1 kB/s[0m eta [36m15:41:02[0m
[?25h[31mERROR: Exception:
Traceback (most recent call last):
  File "/home/leo/.config/jupyterlab-desktop/jlab_server/lib/python3.12/site-packages/pip/_vendor/urllib3/respo

In [8]:
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

ModuleNotFoundError: No module named 'torchvision'

In [11]:
# Download training data from open datasets.
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Download test data from open datasets.
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

NameError: name 'datasets' is not defined

In [None]:
batch_size = 64

# Create data loaders
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

In [None]:
# Creating Models
'''
    To define a neural network in PyTorch, we create a class that inherits from the `nn.Module`. We define the layers of the network in the __init__ function and specify how data will pass through
    the network in the forward function. To accelerate operations in the neural network, we move it to the GPU of MPS if available.
'''

In [7]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

print(f"Using {device} device")

Using cpu device


In [None]:
# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

In [None]:
# Optimizing the Model Parameters
## To train a model, we need a loss function and an optimizer.

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parametes(), lr=1e-3)

In [None]:
# In a single training loop, the model makes predictions on the training dataset (fed to it in batches), and backpropagates the prediction error to adjust the model's parameters.
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

    # Compute prediction error
    pred = model(X)
    loss = loss_fn(pred, y)

    # Backpropagation
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    if batch % 100 == 0:
        loss, current = loss.item(), (batch + 1) * len(X)
        print(f"loss: {loss:>7f} [{current:>5d} / {size:>5d}]")

In [None]:
# We also check the model's performance against the test dataset to ensure it is learning
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argumax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct) :> 0.1f}%, Avg loss: {test_loss:>8f} \n")

In [None]:
'''
    The training process is conducted over several iterations (epochs). During each epoch, the model learns parameters to make better predictions. We print the model's accuracy and 
    loss at each epoch; we'd like to see the accuracy increase and the loss decrease with every epoch.
'''

epochs = 5
for t in range(epochs):
    print(f"Epochs {t+1}\n---------------------------------------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

In [None]:
# Saving Models
# A common way to save a model is to serialize the internal state dictionary (containing the model parameters).

torch.save(model.state_dict(), "model.pth")
print("Saved Pytorch Model State to model.pth")

In [None]:
# Loading Models
# The process for loading a model includes re-creating the model structure and loading the state dictionary into it.
model = NeuralNetwork().to(device)
model.load_state_dict(torch.load("model.pth", weights_only=True))

In [None]:
# model evaluation follows

# Tensors

In [None]:
import torch
import numpy as np

## Initializing a Tensor
Tensors can be initialized in various ways. Take a look at the following examples:

**Directly from data**  
Tensors can be created directly from data. The data type is automatically inferred.

In [None]:
data = [[1, 2], [3, 4]]
x_data = torch.tensor(data)

## From a NumPy array
Tensors can be created from NumPy arrays (and vice versa -see Bridge with NumPy).

In [None]:
np_array = np.array(data)
x_np = torch.from_numpy(np_array)

## From another tensor:
The new tensor retains the properties (shape, datatype) of the argument tensor, unless explicitly overridden

In [None]:
x_ones = torch.ones_like(x_data) # retains the properties of x_data
print(f"One Tensor: \n {x_ones} \n")

x_rand = torch.rand_like(x_data, dtype=torch.float) # overrides the datatype of x_data
print(f"Random Tensor: \n {x_rand} \n")

## With random or constant values:
'shape' is a tuple of tensor dimensions. In the functions below, it determines the dimensionality of the input tensor.

In [None]:
shape = (2,3,)
rand_tensor = torch.rand(shape)
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)

print(f"Random Tensor: \n {rand_tensor} \n")
print(f"Ones Tensor: \n {ones_tensor} \n")
print(f"Zeros Tensor: \n {zeros_tensor}")

# Attributes of a Tensor
Tensor attributes describe their shape, datatype, and the device on which they are stored.

In [None]:
tensor = torch.rand(3, 4)

print(f"Shape of the tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")

# Operations on Tensors  
Over 100 tensor operations, including arithmetic, linear algebra, matrix manipulation (transposing, indexing, slicing), sampling and more are described in the pytorch documentation.
Each of these operations can be run on the GPU (at typically higher speeds than on a CPU).
By default, tensors are created on the CPU. We need to explicitly move tensors to GPU using .to method (after checking for GPU availability). Keep in mind that copying large tensors across devices can be expensive in terms of time and memory!

In [None]:
# We move our tensor to the GPU if available
if torch.cuda.is_available():
    tensor = tensor.to("cuda")