# Machine Learning with PyTorch

### Objective
- Install necessary PyTorch libraries
- Use PyTorch to build, train, and evaluate neural networks
- Save the trained model parameters and use them later for inferencing

In [1]:
# All Libraries required for this lab are listed below. The libraries pre-installed on Skills Network Labs are commented.
# !pip install torch torchvision

Collecting torch
  Downloading torch-2.3.1-cp310-cp310-manylinux1_x86_64.whl (779.1 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m779.1/779.1 MB[0m [31m?[0m eta [36m0:00:00[0mB/s[0m eta [36m0:00:01[0m[36m0:00:45[0m
[?25hCollecting torchvision
  Downloading torchvision-0.18.1-cp310-cp310-manylinux1_x86_64.whl (7.0 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m611.5 kB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
[?25hCollecting fsspec
  Downloading fsspec-2024.6.1-py3-none-any.whl (177 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m177.6/177.6 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m[36m0:00:01[0m[36m0:00:01[0m:01[0m
[?25hCollecting nvidia-cudnn-cu12==8.9.2.26
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105
  Using cached nvidia_cuda_runtime_cu12-

In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

### Different Steps during this project

1. Download the MNIST dataset and create a DataLoader for the dataset
2. Define an AI model to recognize a hand written digit.
3. Train the defined AI model using training data from the MNIST dataset 
4. Test the trained AI model using testing data from the MNIST dataset
5. Evaluate

#### Download the MNIST dataset and create a DataLoader for the dataset

The images are 28x28 pixel images of digits 0 through 9 

In [2]:
# Downloade training data from MNIST datasets.
training_data = datasets.MNIST(
    root = "data",
    train = True,
    download = True,
    transform = ToTensor(),
)

# Download test data from open datasets
test_data = datasets.MNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

batch_size = 64

# create data loaders to iterate over data
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

print(f"Training data size: {len(train_dataloader)*batch_size}")
print(f"Test data size: {len(test_dataloader)*batch_size}")

for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Training data size: 60032
Test data size: 10048
Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64


#### Define the model

- Determine the best device for performing training
- Define the AI model as a neural network with 3 layers. We use a ReLU activation function between the layers
- Flatten the input by using the Flatten module before passing the input into our neural network.

In [3]:
# Get device for training
device = torch.device(
    "cuda" if torch.cuda.is_available()
    else "mps" if torch.backends.mps.is_available()     # Apple Silicon GPU
    else "cpu"
)
print(f"Usinig {device} device.")

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, num_classes),
        )

    def forward(self, image_tensor):
        image_tensor = self.flatten(image_tensor)
        logits = self.linear_relu_stack(image_tensor)
        return logits
    
input_size = 28*28
hidden_size = 512
num_classes = 10


model = NeuralNetwork(input_size, hidden_size, num_classes).to(device)
print(model)

Usinig cpu device.
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


#### Traing loop

We implement a training function to use with train_dataloader to train our AI model. Each iteration over the dataloader returns a batch_size image data tensor along with the expected output. After moving the tensors to the device, we call the foward pass of our model, compute the prediction error using the expected output and then call the backwards pass to compute the gradients and apply them to the model parameters.

In [4]:
# Define our learning rate, loss function and optimizer
learning_rate = 1e-3 
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

# Let's define our training function
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()

    for batch_num, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # forward pass to compute prediction
        pred = model(X)
        # Compute prediction error using loss function
        loss = loss_fn(pred, y)

        # Bacward pass
        optimizer.zero_grad()   # zero any previous gradient calculations
        loss.backward()         # calculate gradient
        optimizer.step()        # update model parameters

        if batch_num > 0 and batch_num % 100 == 0 :
            loss, current = loss.item(), batch_num*len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
            

#### Test Loop

The test methods evaluates the model's predictive performance using the test_dataloader. During testing, we don't require gradient computation, so we set the model in evaluate mode.

In [5]:
# Our test function
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    for X, y in dataloader:
        X = X.to(device)
        y = y.to(device)
        pred = model(X)
        test_loss += loss_fn(pred, y).item()
        correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n") 

#### Train the Model

Now that we have defined methods to train our model and test the trained model's predictive behavior, let's train the model for 5 epochs over the dataset

In [6]:
# Let's run training
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1} \n------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1 
------------------------------
loss: 0.261721  [ 6400/60000]
loss: 0.191323  [12800/60000]
loss: 0.271078  [19200/60000]
loss: 0.154089  [25600/60000]
loss: 0.333872  [32000/60000]
loss: 0.115945  [38400/60000]
loss: 0.237084  [44800/60000]
loss: 0.279653  [51200/60000]
loss: 0.199239  [57600/60000]
Test Error: 
 Accuracy: 95.9%, Avg loss: 0.133410 

Epoch 2 
------------------------------
loss: 0.100302  [ 6400/60000]
loss: 0.119812  [12800/60000]
loss: 0.102287  [19200/60000]
loss: 0.051650  [25600/60000]
loss: 0.118045  [32000/60000]
loss: 0.056371  [38400/60000]
loss: 0.125799  [44800/60000]
loss: 0.157174  [51200/60000]
loss: 0.103986  [57600/60000]
Test Error: 
 Accuracy: 96.7%, Avg loss: 0.108690 

Epoch 3 
------------------------------
loss: 0.086614  [ 6400/60000]
loss: 0.026222  [12800/60000]
loss: 0.063804  [19200/60000]
loss: 0.072565  [25600/60000]
loss: 0.056617  [32000/60000]
loss: 0.027191  [38400/60000]
loss: 0.053343  [44800/60000]
loss: 0.075020  [51200/600

#### Save the model and make predictions

Once we have a trained model, we can save the model parameters for future use in inferences. Here, we save the state_dict of the model which contains the trained parameters. We then create a new instance of the model and load the previously saved parameters into the new instance of the model. Finally, we can inference using the new instance of the model.

In [7]:
# Save our model parameters
torch.save(model.state_dict(), "machineLearning_with_pytorch_model.pth")
print("Saved PyTorch Model State to machineLearning_with_pytorch_model.pth")

# Load the saved model parameters into a new instance of the model
model = NeuralNetwork(input_size, hidden_size, num_classes).to(device)
model.load_state_dict(torch.load("machineLearning_with_pytorch_model.pth"))

# inference using the new model instance
model.eval()
for i in range(10):
    x, y = test_data[i][0], test_data[i][1]

    x = x.to(device)
    pred = model(x)
    predicted, actual = pred[0].argmax(0).item(), y
    print(f'Predicted: "{predicted}", Actual: "{actual}" ')


Saved PyTorch Model State to machineLearning_with_pytorch_model.pth
Predicted: "7", Actual: "7" 
Predicted: "2", Actual: "2" 
Predicted: "1", Actual: "1" 
Predicted: "0", Actual: "0" 
Predicted: "4", Actual: "4" 
Predicted: "1", Actual: "1" 
Predicted: "4", Actual: "4" 
Predicted: "9", Actual: "9" 
Predicted: "5", Actual: "5" 
Predicted: "9", Actual: "9" 


In [29]:
# test_data.__dict__
# type(test_dataloader)