<a href="https://colab.research.google.com/github/DhruvDhariwal/PyTorch-MNIST/blob/main/PyTorch_Computer_Vision.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
from torch import nn
import torchvision
from torchvision import datasets
from torchvision.transforms import ToTensor

import matplotlib.pyplot as plt

print(torch.__version__)
print(torchvision.__version__)

In [None]:
train_data = datasets.FashionMNIST(
    root='data',
    train=True,
    download=True,
    transform=ToTensor(),
    target_transform=None
)

In [None]:
test_data = datasets.FashionMNIST(
    root='data',
    train=False,
    download=True,
    transform=ToTensor(),
    target_transform=None
)

In [None]:
image, label = train_data[0]
image, label

In [None]:
image.shape

In [None]:
len(train_data), len(train_data.targets)

In [None]:
len(test_data), len(test_data.targets)

In [None]:
class_names=train_data.classes
for i in enumerate(class_names):
    print(i)

In [None]:
plt.imshow(image.squeeze(),cmap='gray')
plt.title(class_names[label])
plt.axis(False)

In [None]:
# torch.manual_seed(42)
fig = plt.figure(figsize=(9,9))
rows, cols = 4,4
for i in range(1,rows*cols+1):
    random_index = torch.randint(0,len(train_data),size=[1]).item()
    image, label = train_data[random_index]
    fig.add_subplot(rows,cols,i)
    plt.imshow(image.squeeze(),cmap='gray')
    plt.title(class_names[label])
    plt.axis(False)

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
device

In [None]:
from torch.utils.data import DataLoader

In [None]:
batch_size=32
train_dataloader = DataLoader(train_data,
                              batch_size=batch_size,
                              shuffle=True)
test_dataloader = DataLoader(test_data,
                             batch_size=batch_size,
                             shuffle=False)

In [None]:
len(train_dataloader), len(test_dataloader)

In [None]:
train_features_batch, train_labels_batch = next(iter(train_dataloader))

In [None]:
flatten_model = nn.Flatten()
X=train_features_batch[0]
output = flatten_model(X)
X.shape, output.shape

In [None]:
class FashionMNISTModel0(nn.Module):
    def __init__(self, input_shape, hidden_units, output_shape):
        super().__init__()
        self.layer_stack=nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=input_shape,out_features=hidden_units),
            nn.Linear(in_features=hidden_units,out_features=output_shape)
        )
    def forward(self,x):
        return self.layer_stack(x)

In [None]:
model0 = FashionMNISTModel0(
    input_shape=28*28,
    hidden_units=10,
    output_shape=len(class_names)
).to(device)

In [None]:
def accuracy_fn(y_true, y_pred):
  correct = torch.eq(y_true, y_pred).sum().item()
  acc = (correct / len(y_true)) * 100
  return acc

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimiser=torch.optim.SGD(params=model0.parameters(),lr=0.01)

In [None]:
from tqdm.auto import tqdm
epochs=3
for epoch in tqdm(range(epochs)):
    print(f'Epoch: {epoch}\n------')
    # training
    train_loss = 0
    for batch, (X,y) in enumerate(train_dataloader):

        model0.train()

        y_pred = model0(X.to(device))

        loss  = loss_fn(y_pred.to(device),y.to(device))
        train_loss += loss

        optimiser.zero_grad()

        loss.backward()

        optimiser.step()

    train_loss=train_loss/len(train_dataloader)

    # testing
    test_loss=0
    test_acc=0
    model0.eval()
    with torch.inference_mode():
        for X,y in test_dataloader:
            test_pred=model0(X.to(device))
            test_loss+=loss_fn(test_pred.to(device),y.to(device))
            test_acc+=accuracy_fn(y_true=y.to(device),y_pred=test_pred.argmax(dim=1).to(device))

        test_loss /= len(test_dataloader)

        test_acc /= len(test_dataloader)

    print(f'\nTrain loss: {train_loss:.5f} | Test loss: {test_loss:.5f} | Test Accuracy: {test_acc:.2f}%|n')

In [None]:
def eval_model(
        model:torch.nn.Module,
        data_loader:torch.utils.data.DataLoader,
        loss_fn:torch.nn.Module,
        accuracy_fn):
    loss,acc=0,0
    model.eval()
    with torch.inference_mode():
        for X,y in data_loader:
            X, y = X.to(device), y.to(device)
            y_pred=model(X.to(device))
            loss+=loss_fn(y_pred,y.to(device))
            acc+=accuracy_fn(y_true=y.to(device),y_pred=y_pred.argmax(dim=1).to(device))

        loss/=len(data_loader)
        acc/=len(data_loader)

    return{'model_name':model.__class__.__name__,
           'model_loss':loss.item(),
           'model_acc':acc}

In [None]:
model0_results=eval_model(model=model0, data_loader=test_dataloader,
    loss_fn=loss_fn, accuracy_fn=accuracy_fn)

model0_results

In [None]:
import torch
from torch import nn
import torchvision
from torchvision import datasets
from torchvision.transforms import ToTensor

import matplotlib.pyplot as plt
class FashionMNISTModel1(nn.Module):
    def __init__(self, input_shape, hidden_units, output_shape):
        super().__init__()
        self.layer_stack=nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=input_shape,out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units,out_features=output_shape),
            nn.ReLU()
        )
    def forward(self,x):
        return self.layer_stack(x)

In [None]:
model1=FashionMNISTModel1(
    input_shape=28*28,
    hidden_units=10,
    output_shape=len(class_names)
).to(device)

In [None]:
def accuracy_fn(y_true, y_pred):
  correct = torch.eq(y_true, y_pred).sum().item()
  acc = (correct / len(y_true)) * 100
  return acc

In [None]:
loss_fn=nn.CrossEntropyLoss()
optimiser=torch.optim.SGD(params=model1.parameters(),lr=0.01)

In [None]:
def train_step(model:torch.nn.Module,
               data_loader:torch.utils.data.DataLoader,
               loss_fn:torch.nn.Module,
               optimiser:torch.optim.Optimizer,
               accuracy_fn,
               device:torch.device=device):
    train_loss, train_acc=0,0
    model.to(device)
    model.train()
    for batch, (X,y) in enumerate(data_loader):
        X,y=X.to(device),y.to(device)
        y_pred=model(X)

        loss=loss_fn(y_pred,y)
        train_loss+=loss
        train_acc+=accuracy_fn(y_true=y,y_pred=y_pred.argmax(dim=1))

        optimiser.zero_grad()

        loss.backward()

        optimiser.step()

    train_loss/=len(data_loader)
    train_acc/=len(data_loader)
    print(f"Train loss: {train_loss:.5f} | Train accuracy: {train_acc:.2f}%")

def test_step(model:torch.nn.Module,
              data_loader:torch.utils.data.DataLoader,
              loss_fn:torch.nn.Module,
              accuracy_fn,
              device:torch.device=device):
    test_loss,test_acc=0,0
    model.to(device)
    model.eval()
    with torch.inference_mode():
        for X,y in data_loader:
            X,y=X.to(device),y.to(device)

            test_pred=model(X)

            loss=loss_fn(test_pred,y)
            test_loss+=loss
            test_acc+=accuracy_fn(y_true=y,y_pred=test_pred.argmax(dim=1))

        test_loss/=len(data_loader)
        test_acc/=len(data_loader)

        print(f"Test loss: {test_loss:.5f} | Test accuracy: {test_acc:.2f}%")

In [None]:
from tqdm.auto import tqdm
epochs=3
for epoch in tqdm(range(epochs)):
    print(f'Epoch: {epoch}\n------')
    train_step(model=model1,
               loss_fn=loss_fn,
               optimiser=optimiser,
               accuracy_fn=accuracy_fn,
               data_loader=train_dataloader)
    test_step(model=model1,
              loss_fn=loss_fn,
              accuracy_fn=accuracy_fn,
              data_loader=test_dataloader)



In [None]:
model1_results = eval_model(
    model=model1,
    data_loader=test_dataloader,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn
)
model1_results

In [None]:
model0_results

In [None]:
class FashionMNISTModel2(nn.Module):
    def __init__(self, input_shape, hidden_units,output_shape):
        super().__init__()
        self.block1=nn.Sequential(
            nn.Conv2d(in_channels=input_shape,
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,
                         stride=2)
        )
        self.block2=nn.Sequential(
            nn.Conv2d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=3,
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=3,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(
                kernel_size=2,
                stride=2)
            )
        self.classifier=nn.Sequential(
            nn.Flatten(),
            nn.Linear(
                in_features=hidden_units*7*7,
                out_features=output_shape
            )
        )



    def forward(self,x):
        return self.classifier(self.block2(self.block1(x)))

In [None]:
model2=FashionMNISTModel2(
    input_shape=1,
    hidden_units=10,
    output_shape=len(class_names)
).to(device)

In [None]:
loss_fn=nn.CrossEntropyLoss()
optimiser=torch.optim.SGD(params=model2.parameters(),lr=0.04)

In [None]:
epochs=4
for epoch in tqdm(range(epochs)):
    print(f'Epoch: {epoch}\n------')
    train_step(model=model2,
           optimiser=optimiser,
           data_loader=train_dataloader,
           loss_fn=loss_fn,
           accuracy_fn=accuracy_fn)
    test_step(model=model2,
          data_loader=test_dataloader,
          loss_fn=loss_fn,
          accuracy_fn=accuracy_fn)

In [None]:
model2_results=eval_model(model=model2,
                         data_loader=test_dataloader,
                         loss_fn=loss_fn,
                         accuracy_fn=accuracy_fn)
model2_results

In [None]:
import pandas as pd
compare_results=pd.DataFrame([model0_results,model1_results,model2_results])
compare_results

In [None]:
compare_results.set_index("model_name")["model_acc"].plot(kind="barh")
plt.xlabel("Accuracy")
plt.ylabel("Model")

In [None]:
def make_predictions(model: torch.nn.Module, data: list, device: torch.device = device):
    pred_probs = []
    model.eval()
    with torch.inference_mode():
        for sample in data:
            # Prepare sample
            sample = torch.unsqueeze(sample, dim=0).to(device) # Add an extra dimension and send sample to device

            # Forward pass (model outputs raw logit)
            pred_logit = model(sample)

            # Get prediction probability (logit -> prediction probability)
            pred_prob = torch.softmax(pred_logit.squeeze(), dim=0) # note: perform softmax on the "logits" dimension, not "batch" dimension (in this case we have a batch size of 1, so can perform on dim=0)

            # Get pred_prob off GPU for further calculations
            pred_probs.append(pred_prob.cpu())

    # Stack the pred_probs to turn list into a tensor
    return torch.stack(pred_probs)

In [None]:
import random
test_samples = []
test_labels = []
for sample, label in random.sample(list(test_data), k=9):
    test_samples.append(sample)
    test_labels.append(label)

# View the first test sample shape and label
print(f"Test sample image shape: {test_samples[0].shape}\nTest sample label: {test_labels[0]} ({class_names[test_labels[0]]})")

In [None]:
# Make predictions on test samples with model 2
pred_probs= make_predictions(model=model2,
                             data=test_samples)

In [None]:
# Turn the prediction probabilities into prediction labels by taking the argmax()
pred_classes = pred_probs.argmax(dim=1)
pred_classes

In [None]:
# Plot predictions
plt.figure(figsize=(9, 9))
nrows = 3
ncols = 3
for i, sample in enumerate(test_samples):
  # Create a subplot
  plt.subplot(nrows, ncols, i+1)

  # Plot the target image
  plt.imshow(sample.squeeze(), cmap="gray")

  # Find the prediction label (in text form, e.g. "Sandal")
  pred_label = class_names[pred_classes[i]]

  # Get the truth label (in text form, e.g. "T-shirt")
  truth_label = class_names[test_labels[i]]

  # Create the title text of the plot
  title_text = f"Pred: {pred_label} | Truth: {truth_label}"

  # Check for equality and change title colour accordingly
  if pred_label == truth_label:
      plt.title(title_text, fontsize=10, c="g") # green text if correct
  else:
      plt.title(title_text, fontsize=10, c="r") # red text if wrong
  plt.axis(False);

In [None]:
# Import tqdm for progress bar
from tqdm.auto import tqdm

# 1. Make predictions with trained model
y_preds = []
model2.eval()
with torch.inference_mode():
  for X, y in tqdm(test_dataloader, desc="Making predictions"):
    # Send data and targets to target device
    X, y = X.to(device), y.to(device)
    # Do the forward pass
    y_logit = model2(X)
    # Turn predictions from logits -> prediction probabilities -> predictions labels
    y_pred = torch.softmax(y_logit, dim=1).argmax(dim=1) # note: perform softmax on the "logits" dimension, not "batch" dimension (in this case we have a batch size of 32, so can perform on dim=1)
    # Put predictions on CPU for evaluation
    y_preds.append(y_pred.cpu())
# Concatenate list of predictions into a tensor
y_pred_tensor = torch.cat(y_preds)

In [None]:
# See if torchmetrics exists, if not, install it
try:
    import torchmetrics, mlxtend
    print(f"mlxtend version: {mlxtend.__version__}")
    assert int(mlxtend.__version__.split(".")[1]) >= 19, "mlxtend verison should be 0.19.0 or higher"
except:
    !pip install -q torchmetrics -U mlxtend # <- Note: If you're using Google Colab, this may require restarting the runtime
    import torchmetrics, mlxtend
    print(f"mlxtend version: {mlxtend.__version__}")

In [None]:
# Import mlxtend upgraded version
import mlxtend

In [None]:
from torchmetrics import ConfusionMatrix
from mlxtend.plotting import plot_confusion_matrix

# 2. Setup confusion matrix instance and compare predictions to targets
confmat = ConfusionMatrix(num_classes=len(class_names), task='multiclass')
confmat_tensor = confmat(preds=y_pred_tensor,
                         target=test_data.targets)

# 3. Plot the confusion matrix
fig, ax = plot_confusion_matrix(
    conf_mat=confmat_tensor.numpy(), # matplotlib likes working with NumPy
    class_names=class_names, # turn the row and column labels into class names
    figsize=(10, 7)
);

In [None]:
from pathlib import Path
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True,
                 exist_ok=True)

MODEL_NAME = 'PyTorch_Computer_Vision_model2.pth'
MODEL_SAVE_PATH = MODEL_PATH/MODEL_NAME

print(f'Saving model to: {MODEL_SAVE_PATH}')
torch.save(obj=model2.state_dict(),
           f=MODEL_SAVE_PATH)


In [None]:
# Create a new instance of FashionMNISTModelV2 (the same class as our saved state_dict())
# Note: loading model will error if the shapes here aren't the same as the saved version
loaded_model_2 = FashionMNISTModel2(input_shape=1,
                                    hidden_units=10, # try changing this to 128 and seeing what happens
                                    output_shape=10)

# Load in the saved state_dict()
loaded_model_2.load_state_dict(torch.load(f=MODEL_SAVE_PATH))

# Send model to GPU
loaded_model_2 = loaded_model_2.to(device)

In [None]:
# Evaluate loaded model
torch.manual_seed(42)

loaded_model_2_results = eval_model(
    model=loaded_model_2,
    data_loader=test_dataloader,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn
)

loaded_model_2_results

In [None]:
model_2_results = eval_model(
    model=loaded_model_2,
    data_loader=test_dataloader,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn
)
model_2_results

In [None]:
# Check to see if results are close to each other (if they are very far away, there may be an error)
torch.isclose(torch.tensor(model_2_results["model_loss"]),
              torch.tensor(loaded_model_2_results["model_loss"]),
              atol=1e-08, # absolute tolerance
              rtol=0.0001) # relative tolerance