<a href="https://colab.research.google.com/github/Sitta250/pytorch/blob/main/computer_vision_with_torch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
from torch import nn

import torchvision
from torchvision import datasets
from torchvision import transforms
from torchvision.transforms import ToTensor

import matplotlib.pyplot as plt

### getting dataset

In [None]:
train_data = datasets.FashionMNIST(
    root="data", # where to store data
    train=True, # do we train this dataset
    download = True, # do we download dataset
    transform=torchvision.transforms.ToTensor(), #how will we train data
    target_transform=None #do we transform labels/ targets
)

test_data = datasets.FashionMNIST(
    root= "data",
    train=False,
    download=True,
    transform=ToTensor(),
    target_transform=None
)

In [None]:
len(train_data), len(test_data)

In [None]:
image, label = train_data[0]
image, label

In [None]:
class_names = train_data.classes
class_names

In [None]:
class_to_idx = train_data.class_to_idx
class_to_idx

In [None]:
train_data.targets

In [None]:
print(f"image shape: {image.shape} -> [color_channels, height, width]")
print(f"image label: {class_names[label]}")

### visualizing data

In [None]:
image, label = train_data[0]
print(f"image shape: {image.shape}")
plt.imshow(image.squeeze())
plt.title(label)

In [None]:
plt.imshow(image.squeeze(), cmap="gray")
plt.title(class_names[label])
plt.axis(False)

In [None]:
torch.manual_seed(42)
fig = plt.figure(figsize=(9,9))
rows, cols = 4, 4
for i in range(1, rows*cols+1):
  random_idx = torch.randint(0, len(train_data), size=[1]).item()
  img, label = train_data[random_idx]
  fig.add_subplot(rows, cols, i)
  plt.imshow(img.squeeze(), cmap="gray")
  plt.title(class_names[label])
  plt.axis(False)

### prepare DataLoader
turn data in form of PyTorch Datasets into Python iterable, specifically we wnat batches
- more computationally efficient to look at each batch at a time
- give NN more change to update its gradient per epoch


In [None]:
from torch.utils.data import DataLoader

#setup batch size
BATCH_SIZE = 32

#turn dataset into iterables
train_dataloader = DataLoader(dataset= train_data,
                        batch_size = BATCH_SIZE,
                        shuffle= True)
test_dataloader = DataLoader(dataset = test_data,
                       batch_size=BATCH_SIZE,
                       shuffle=False)
train_dataloader, test_dataloader

In [None]:
print(f"Dataloaders: {train_dataloader, test_dataloader}")

In [None]:
#check what is created
print(f"Dataloader: {train_dataloader, test_dataloader}")
print(f"length of train_dataloader: {len(train_dataloader)} | batches of {BATCH_SIZE}")
print(f"length of test_dataloader: {len(test_dataloader)} | batches of {BATCH_SIZE}")

In [None]:
train_features_batch, train_labels_batch = next(iter(train_dataloader))
train_features_batch.shape, train_labels_batch.shape

In [None]:
#display samples
torch.manual_seed(42)
random_idx = torch.randint(0, len(train_features_batch),size=[1]).item()
img, label = train_features_batch[random_idx], train_labels_batch[random_idx]
plt.imshow(img.squeeze(), cmap="gray")
plt.title(class_names[label])
plt.axis(False)
print(f"image size: {img.shape}")
print(f"image label: {label}, label size:{label.shape}")

### build baseline model
- baseline is a simple model we try to improve upon with subsequent model/ experiment


In [None]:
# create flatten layer
flatten_model = nn.Flatten()

# get a single sample
X = train_features_batch[0]

output = flatten_model(X)

print(f"shape before flattening: {X.shape} ->[color channel, height, width]")
print(f"shape afer flattening: {output.shape} -> [color_channel, height*width]")

In [None]:
from torch import nn
class FashionMNISTModelV0(nn.Module):
  def __init__(self,
               input_shape:int,
               hidden_units: int,
               output_shape: int):
    super().__init__()
    self.layer_stack= nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=input_shape,
                  out_features = hidden_units),
        nn.Linear(in_features=hidden_units,
                  out_features=output_shape)
    )

  def forward(self, x):
    return self.layer_stack(x)

In [None]:
torch.manual_seed(42)

model_0 = FashionMNISTModelV0(
    input_shape = 784,
    hidden_units = 10,
    output_shape = len(class_names)
).to("cpu")
model_0

In [None]:
dummy_x = torch.rand([1,1,28, 28])
model_0(dummy_x)

In [None]:
# set up loss, optimizer and evaluation metrics

import requests
from pathlib import Path

# downlaod helper functions from Learn PyTorch repo
if Path("helper_functions.py").is_file():
  print("helper_functions.py already exists, skipping download...")
else:
  print("Downloading helper_functions.py")
  request = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/refs/heads/main/helper_functions.py")
  with open("helper_functions.py", "wb") as f:
    f.write(request.content)

In [None]:
# import accuracy matric
from helper_functions import accuracy_fn

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params = model_0.parameters(), lr=0.1)

### two main things to track to in ML
1. performance: accuracy and loss
2. hot fast it runs

In [None]:
from timeit import default_timer as timer
def print_train_time(start: float,
                     end: float,
                     device: torch.device = None):
  total_time = end-start
  print(f"Train time on {device}: {total_time: .3f} seconds")
  return total_time

In [None]:
#creatign training loop on data batches

from tqdm.auto import tqdm

torch.manual_seed(42)
train_time_start_on_cpu = timer()
epochs = 3

for epoch in tqdm(range(epochs)):
  print(f"Epoch: {epoch}\n")

  train_loss = 0

  for batch, (X, y) in enumerate(train_dataloader):
    model_0.train()
    y_pred = model_0(X)
    loss = loss_fn(y_pred, y)
    train_loss+=loss
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if batch %400 ==0:
      print(f"looked at {batch*len(X)}/{len(train_dataloader.dataset)} samples.")
    # divie total train loss by length of trian dataloader
  train_loss /= len(train_dataloader)

  # test loop
  test_loss, test_acc = 0,0
  model_0.eval()
  with torch.inference_mode():
    for X_test, y_test in test_dataloader:
      test_pred = model_0(X_test)
      test_loss += loss_fn(test_pred, y_test)
      test_acc += accuracy_fn(y_true = y_test, y_pred = test_pred.argmax(dim=1))

    test_loss/=len(test_dataloader)
    test_acc/= len(test_dataloader)
  print(f"\nTrain loss: {train_loss: .4f} | Test loss: {test_loss:.4f} | Test acc: {test_acc:.4f}")
train_time_end_on_cpu = timer()

  # calculaAe training time
trian_time_end_on_cpu = timer()
total_train_time_model_0 = print_train_time(start=train_time_start_on_cpu,
                                              end= train_time_end_on_cpu,
                                              device=next(model_0.parameters()).device)



In [None]:
str(next(model_0.parameters()).device)

In [None]:
device="cpu"

In [None]:
# make prediction and get model 0 results
torch.manual_seed(42)
def eval_model(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               accuracy_fn,
               device=device):
  loss, acc = 0, 0
  model.eval()
  with torch.inference_mode():
    for X, y in tqdm(data_loader):
      y_pred = model(X)
      loss += loss_fn(y_pred, y)
      acc += accuracy_fn(y_true=y,
                         y_pred=y_pred.argmax(dim=1))

    loss /= len(data_loader)
    acc /= len(data_loader)

  return{"model_name": model.__class__.__name__,
         "model_loss": loss.item(),
         "model_acc": acc}
model_0_results = eval_model(model=model_0,
                             data_loader = test_dataloader,
                             loss_fn = loss_fn,
                             accuracy_fn = accuracy_fn)
model_0_results

# 16:23:56

In [None]:
class FashionMNISTModelV1(nn.Module):
  def __init__(self, input_shape:int, hidden_units: int, output_shape:int):
    super().__init__()
    self.layer_stack = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=input_shape,
                  out_features=hidden_units),
        nn.ReLU(),
        nn.Linear(in_features=hidden_units,
                  out_features=output_shape),
        nn.ReLU()
    )
  def forward(self, x:torch.Tensor):
    return self.layer_stack(x)



In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
torch.manual_seed(42)
model_1 = FashionMNISTModelV1(input_shape=784,
                              hidden_units=10,
                              output_shape=len(class_names)).to(device)

In [None]:
# loss fn and optim
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params = model_1.parameters(),
                             lr=0.02)

In [None]:
#functionizing training and evaluation/testing loop

def train_step(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn:torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               accuracy_fn,
               device: torch.device=device):
  train_loss, train_acc = 0, 0
  model.to(device)
  for batch, (X, y) in enumerate(data_loader):
    #put data on target device
    X, y = X.to(device), y.to(device)

    # forward pass
    y_pred = model(X)

    #loss and acc per batch
    loss = loss_fn(y_pred, y)
    train_loss+=loss
    train_acc+=accuracy_fn(y_true=y, y_pred=y_pred.argmax(dim=1))

    # optimzer zero grad
    optimizer.zero_grad()

    # back prop
    loss.backward()

    # step update
    optimizer.step()


    # divie total train loss and acc by length of trian dataloader
  train_loss /= len(data_loader)
  train_acc /= len(data_loader)
  print(f"train loss: {train_loss:.5f} | train acc: {train_acc:.2f}%\n")

In [None]:
# test step

def test_step(model: torch.nn.Module,
              data_loader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module,
              accuracy_fn,
              device: torch.device=device):
  test_loss, test_acc = 0, 0
  model.to(device)
  model.eval()
  with torch.inference_mode():
    for X, y in data_loader:
      X, y = X.to(device), y.to(device)

      test_pred = model(X)

      # find loss and acc
      test_loss += loss_fn(test_pred, y)
      test_acc += accuracy_fn(y_true=y,
                          y_pred = test_pred.argmax(dim=1))
      test_loss /= len(data_loader)
      test_acc /= len(data_loader)
    print(f"test loss: {test_loss:.5f} | test acc: {test_acc:.2f}%\n")

In [None]:
torch.manual_seed(42)

from timeit import default_timer
train_time_start_on_cpu = timer()

epochs = 3

for epoch in tqdm(range(epochs)):
  print(f"epoch: {epoch}\n")
  train_step(model=model_1,
             data_loader = train_dataloader,
             loss_fn = loss_fn,
             optimizer = optimizer,
             accuracy_fn = accuracy_fn,
             device = device)

  test_step(model=model_1,
            data_loader= test_dataloader,
            loss_fn= loss_fn,
            accuracy_fn=accuracy_fn,
            device = device)
train_time_end_on_cpu = timer()
total_train_time_model_1 = print_train_time(start=train_time_start_on_cpu,
                                            end=train_time_end_on_cpu,
                                            device = device)

In [None]:
model_0_results

In [None]:
model_1_results = eval_model(model=model_1,
                             data_loader= test_dataloader,
                             loss_fn=loss_fn,
                             accuracy_fn=accuracy_fn,
                             device=device)

In [None]:
model_1_results

In [None]:
class FashionMNISTModelV2(nn.Module):
  def __init__(self, input_shape:int, hidden_units:int, output_shape:int):
    super().__init__()
    self.conv_block_1=nn.Sequential(
        # create conv layer
        nn.Conv2d(in_channels=input_shape,
                  out_channels=hidden_units,
                  kernel_size=3,
                  stride=1,
                  padding=1),
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_units,
                  out_channels=hidden_units,
                  kernel_size=3,
                  stride=1,
                  padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2)
    )
    self.conv_block_2=nn.Sequential(
        nn.Conv2d(in_channels=hidden_units,
                  out_channels=hidden_units,
                  kernel_size=3,
                  stride=1,
                  padding=1),
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_units,
                  out_channels=hidden_units,
                  kernel_size=3,
                  stride=1,
                  padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2)
    )
    self.classifier = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=hidden_units*7*7,
                  out_features=output_shape,
                  )
    )
  def forward(self, x):
    x = self.conv_block_1(x)
    # print(f"outputshape of conv_block_1: {x.shape}")
    x = self.conv_block_2(x)
    # print(f"outputshape of conv_block_2: {x.shape}")
    x = self.classifier(x)
    # print(f"outputshape of conv_classifier: {x.shape}")

    return x

In [None]:
torch.manual_seed(42)
model_2=FashionMNISTModelV2(input_shape=1,
                            hidden_units=10,
                            output_shape=len(class_names)).to(device)

In [None]:
plt.imshow(image.squeeze(), cmap="gray")

In [None]:
rand_image_tensor = torch.randn(size=(1,28, 28))
rand_image_tensor.shape

In [None]:
model_2(rand_image_tensor.unsqueeze(0).to(device))

In [None]:
torch.manual_seed(42)
images = torch.randn(size=(32, 3, 64, 64))

test_image = images[0]

print(f"image batch shape: {images.shape}")
print(f"single image shape: {test_image.shape}")
print(f"test image:\n {test_image}")

In [None]:
test_image.shape

In [None]:
torch.manual_seed(42)
# create conv2d layer
conv_layer = nn.Conv2d(in_channels=3,
                       out_channels=64,
                       kernel_size=3,
                       stride=1,
                       padding=1)

# pass data
conv_output = conv_layer(test_image)
conv_output.shape

In [None]:
print(f"test image original shape: {test_image.shape}")
print(f"test image after unsqueeze shape: {test_image.unsqueeze(0).shape}")

# sample nn.MaxPool2d layer
max_pool_layer = nn.MaxPool2d(kernel_size=2)
test_image_through_conv = conv_layer(test_image.unsqueeze(dim=0))
print(f"shape after going through conv_layer(): {test_image_through_conv.shape}")

# pass data through max pool layer
test_image_through_conv_and_max_pool = max_pool_layer(test_image_through_conv)
print(f"shape after goign through conv_layer() and max_pool_layer(): {test_image_through_conv_and_max_pool.shape}")

In [None]:
torch.manual_seed(42)
# create a random tensor with a similar number of dimensions to our images
random_tensor = torch.randn(size=(1,1,2,2))
print(f"\nRandom tensor:\n{random_tensor}")
print(f"Random tensor shape: {random_tensor.shape}")

# create a max pool layer
max_pool_layer = nn.MaxPool2d(kernel_size=2)

# pass random tensor through the max pool layer
max_pool_tensor = max_pool_layer(random_tensor)
print(f"\nMax pool tensor: \n {max_pool_tensor}")
print(f"Max pool tensor shape: {max_pool_tensor.shape}")
random_tensor

In [None]:
# training cnn on dataset FashionMNIST

#setup loss fn/ eval metrics/ optimizer
from helper_functions import accuracy_fn

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model_2.parameters(),
                            lr=0.1)

In [None]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)

from timeit import default_timer as timer
train_time_start_model_2 = timer()

epochs=3
for epoch in tqdm(range(epochs)):
  print(f"Epochs: {epoch}\n")
  train_step(model=model_2,
             data_loader=train_dataloader,
             loss_fn=loss_fn,
             optimizer=optimizer,
             accuracy_fn=accuracy_fn,
             device=device)
  test_step(model=model_2,
            data_loader=test_dataloader,
            loss_fn=loss_fn,
            accuracy_fn=accuracy_fn,
            device=device)

  train_time_end_model_2 = timer()
  total_train_time_model_2 = print_train_time(start= train_time_start_model_2,
                                              end=train_time_end_model_2,
                                              device=device)

In [None]:
model_2_results = eval_model(
    model=model_2,
    data_loader=test_dataloader,
    loss_fn = loss_fn,
    accuracy_fn=accuracy_fn,
    device=device
)
model_2_results

In [None]:
model_0_results

In [None]:
# comparning result and training time
import pandas as pd
compare_results = pd.DataFrame([model_0_results, model_1_results, model_2_results])

In [None]:
compare_results

In [None]:
# add training time to result comparison
compare_results["training_time"] = [total_train_time_model_0,
                                    total_train_time_model_1,
                                    total_train_time_model_2]
compare_results

In [None]:
compare_results.set_index("model_name")["model_acc"].plot(kind="barh")
plt.xlabel("accuracy (%)")
plt.ylabel("model")

In [None]:
def make_predictions(model: torch.nn.Module,
                     data:list,
                     devices:torch.device = device):
  pred_probs = []
  model.to(device)
  model.eval()
  with torch.inference_mode():
    for sample in data:
      # prep sample
      sample = torch.unsqueeze(sample, dim=0).to(device)

      # forward pass
      pred_logit = model(sample)

      # get pred prob: logit -> prediction prob with softmax
      pred_prob = torch.softmax(pred_logit.squeeze(), dim=0)

      pred_probs.append(pred_prob.cpu())


  return torch.stack(pred_probs)

In [None]:
test_data

In [None]:
import random
random.seed(42)
test_samples = []
test_labels = []
for sample, label in random.sample(list(test_data), k=9):
  test_samples.append(sample)
  test_labels.append(label)

test_samples[0].shape

In [None]:
plt.imshow(test_samples[0].squeeze(), cmap="gray")
plt.title(class_names[test_labels[0]])

In [None]:
# make predictions
pred_probs = make_predictions(model=model_2,
                              data=test_samples)

# view first two prediction prob
pred_probs[:2]

In [None]:
pred_classes = pred_probs.argmax(dim=1)
pred_classes

In [None]:
# plot predictions
plt.figure(figsize=(9,9))
nrow=3
ncols=3
for i, sample in enumerate(test_samples):
  # create subplot
  plt.subplot(nrow, ncols, i+1)

  plt.imshow(sample.squeeze(), cmap="gray")

  pred_label = class_names[pred_classes[i]]

  # get truth table
  truth_label = class_names[test_labels[i]]

  # create title
  title_text= f"pred: {pred_label} | truth: {truth_label}"

  # check for equality between pred and truth and change color of title text
  if pred_label== truth_label:
    plt.title(title_text, fontsize=10, c="g")
  else:
    plt.title(title_text, fontsize=10, c="r")

  plt.axis(False)

In [None]:
# make confusion matrix

import mlxtend
from tqdm.auto import tqdm

In [None]:
y_preds=[]
model_2.eval()
with torch.inference_mode():
  for X, y in tqdm(test_dataloader, desc="making prediction..."):
    X, y = X.to(device), y.to(device)

    y_logit = model_2(X)
    y_pred = torch.softmax(y_logit.squeeze(), dim=0).argmax(dim=1)
    y_preds.append(y_pred.cpu())

print(y_pred)
y_pred_tensor = torch.cat(y_preds)
y_pred_tensor[:10]