# PyTorch Computer Vision Project

0. Computer Vision Libraries in PyTorch

1. `torchvision` - base domain library for PyTorch computer vision tasks
2. `torchvision.datasets` - get datasets and data loading function for computer vision
3. `torchvision.models` - get pretrained computer vision models that you can leverate for your own tasks
4. `torchvision.transforms` - functions for manipulating your vision (images) to be sutibale for use with an ML model.
5. `torch.utils.data.Dataset` - base class for PyTorch datasets.
6. `torch.utils.data.DataLoader` - Creates a Python iterabe over a dataset

In [None]:
## 1. Import pytorch
import torch
from torch import nn

# Import torchvision
import torchvision
from torchvision import datasets
from torchvision import transforms
from torchvision.transforms import ToTensor

# Import matplotlib
import matplotlib.pyplot as plt

print(torch.__version__)
print(torchvision.__version__)

## 2. Getting a dataset

In [None]:
train_data = datasets.FashionMNIST(
    root="data", # where to download data to
    train=True,
    download=True,
    transform=ToTensor(),
    target_transform=None
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
    target_transform=None
)

In [None]:
len(train_data)

In [None]:
len(test_data)

In [None]:
# See the first training sample
image, label = train_data[0]
image

In [None]:
label

In [None]:
class_names = train_data.classes

In [None]:
class_names[label]

In [None]:
image.shape

In [None]:
plt.imshow(image.squeeze())

In [None]:
plt.imshow(image.squeeze(), cmap="gray");
plt.title(class_names[label]);

In [None]:
class_to_idx = train_data.class_to_idx
class_to_idx

In [None]:
train_data.targets[:100]

In [None]:
len(train_data)

In [None]:
torch.randint(0, len(train_data), size=[1]).item()

In [None]:
# Plot more images
torch.manual_seed(42)
fig = plt.figure(figsize=(9, 9))
rows, cols = 4, 4
for i in range(1, rows*cols+1):
    random_idx = torch.randint(0, len(train_data), size=[1]).item()
    img, label = train_data[random_idx]
    fig.add_subplot(rows, cols, i)
    plt.imshow(img.squeeze(), cmap="gray")
    plt.title(class_names[label])
    plt.axis(False);

## 3. Prepare a DataLoader

Right now, our data is in the form of PyTorch datasets.

Specifically, we want to turn (convert) our data into batches (mini-batches).

Why would we do this?

1. It is more computationlly efficient, as in, your computing hardware my not be able to look at (store in memory) 60000 images in one hit. So we break down to 32 images at a time (batch size of 32)
2. It gives our neural network more chances to update its gradients per epoch.

In [None]:
train_data

In [None]:
from torch.utils.data import DataLoader

# Setup the batch size hyperparameter
BATCH_SIZE = 32

# Turn dataset into iterables (batches)
train_dataloader = DataLoader(train_data,
                              batch_size=BATCH_SIZE,
                              shuffle=True)

test_dataloader = DataLoader(test_data,
                             batch_size=BATCH_SIZE,
                             shuffle=False)

train_dataloader, test_dataloader

In [None]:
# Let's check out what we've created
print(f"DataLoaders: {train_dataloader, test_dataloader}")
print(f"Length of train_dataloader: {len(train_dataloader)} batches of {BATCH_SIZE}...")
print(f"Length of test_dataloader: {len(test_dataloader)} batches of {BATCH_SIZE}...")

In [None]:
1875 * 32

In [None]:
313 * 32

In [None]:
10000 / 32

In [None]:
batch_images, batch_labels = next(iter(train_dataloader))

In [None]:
batch_images.shape

In [None]:
batch_labels.shape

In [None]:
len(batch_images)

In [None]:
torch.randint(0, len(batch_images), size=[1]).item()

In [None]:
class_names

In [None]:
# Visualize one sample from the batch
# torch.manual_seed(42)
random_idx = torch.randint(0, len(batch_images), size=[1]).item()
img, label = batch_images[random_idx], batch_labels[random_idx]
plt.imshow(img.squeeze(), cmap="gray")
plt.title(class_names[label])
plt.axis("off");

## 4. Model 0: Build a baseline model

In [None]:
image.shape

In [None]:
my_flatten_layer = nn.Flatten()

my_flatten_layer(image).shape

In [None]:
28 * 28

In [None]:
plt.imshow(my_flatten_layer(image))

In [None]:
print(f"Image before flattening {image.shape}")
print(f"Image after flatting: {my_flatten_layer(image).shape}")

In [None]:
len(class_names)

In [None]:
class FashionMNISTModelV0(nn.Module):
    def __init__(self,
                 input_shape: int,
                 hidden_units: int,
                 output_shape: int):
        super().__init__()

        self.layer_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=input_shape,
                      out_features=hidden_units),
            nn.Linear(in_features=hidden_units,
                      out_features=output_shape) # one neruon per class (so we need a probability for each class)
        )

    def forward(self, x):
        return self.layer_stack(x) # x is input data

In [None]:
model = FashionMNISTModelV0(input_shape=784,
                            hidden_units=10,
                            output_shape=len(class_names))

model

In [None]:
model.state_dict()

In [None]:
dummy_x = image.unsqueeze(dim=0)

model(dummy_x)

## 3.1 Setup loss, optimizer and evalution metrics

In [None]:
next(iter(model.parameters()))

In [None]:
loss_fn = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(params=model.parameters(),
                             lr=0.01)

In [None]:
my_tensor_A = torch.tensor([1, 2, 3, 4])
my_tensor_B = torch.tensor([10, 2, 40, 4])

torch.eq(my_tensor_A, my_tensor_B).sum().item()

In [None]:
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

## 3. Creating a function to time our experiments

Machine Learning is very experimental.

Two of the main things you'll often track are:
1. The model's performance (loss and accuracy)
2. How fast it runs

In [None]:
from timeit import default_timer as timer
def print_train_time(start: float,
                     end: float,
                     device: torch.device = None):

    """Prints difference between start and end time."""
    total_time = end - start
    print(f"Train traim on {device}: {total_time:.3f} seconds")
    return total_time

In [None]:
start_time = timer()
# Some code...
end_time = timer()
print_train_time(start=start_time,
                 end=end_time,
                 device="cpu")

## 3.3 Creating a training loop and testing loop and training the model on batches of data

In [None]:
train_data

In [None]:
next(iter(train_dataloader))

In [None]:
from tqdm.auto import tqdm

# Set the seed and start the timer
torch.manual_seed(42)
train_time_start_on_cpu = timer()

# Set the number of epochs
epochs = 3

# Create a training loop
for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch}\n------")

    train_loss = 0

    #Training
    for batch, (X, y) in enumerate(train_dataloader):
        model.train()

        # 1. Forward pass
        y_pred = model(X)

        # 2. Calculate loss (per batch)
        loss = loss_fn(y_pred, y)
        train_loss += loss # Accumualively increase the loss

        # 3. Optimizer zero gard
        optimizer.zero_grad()

        # 4. Loss backward (backprogagation algorithm)
        loss.backward()

        # 5. Gradient Descent
        optimizer.step()

        # Print out what is happening
        if batch % 400 == 0:
            print(f"Looked at {batch * len(X)}/{len(train_dataloader.dataset)} samples.")

    # Divide (Normalize - Scale) the train loss by length of train_dataloader
    train_loss /= len(train_dataloader)

    ### Testing
    test_loss, test_acc = 0, 0
    model.eval()
    with torch.inference_mode():
        for X_test, y_test in test_dataloader:

            # 1. Forward pass
            test_pred = model(X_test)

            # 2. Calculate loss
            test_loss += loss_fn(test_pred, y_test)

            # 3. Calculate accuracy
            test_acc += accuracy_fn(y_true=y_test,
                                    y_pred=test_pred.argmax(dim=1))

        # Normalize (Scale) the test loss average per batch
        test_loss /= len(test_dataloader)
        # Normalize (Scale) the test acc average per batch
        test_acc /= len(test_dataloader)

    # Print out what is happeing
    print(f"\nTrain Loss: {train_loss:.4f} | Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}")


train_time_end_on_cpu = timer()
total_train_time_model = print_train_time(start=train_time_start_on_cpu,
                                          end=train_time_end_on_cpu,
                                          device=str(next(model.parameters()).device))

In [None]:
# Device Agnostic Code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

In [None]:
def make_predictions(model: torch.nn.Module,
                     data: list,
                     device: torch.device = device):
    pred_probs = []
    model.to(device)
    model.eval()
    with torch.inference_mode():
        for sample in data:
            # Prepare the sample (add a batch dimension and pass to target device)
            sample = torch.unsqueeze(sample, dim=0).to(device)

            # Forward pass (model outputs raw logits)
            pred_logit = model(sample)

            # Get prediction probability (logit -> prediction probabilities)
            pred_prob = torch.softmax(pred_logit.squeeze(), dim=0)

            # Get pred prob off the GPU for further calculations
            pred_probs.append(pred_prob.cpu())

    return torch.stack(pred_probs)

In [None]:
import random
# random.seed(42)
test_samples = []
test_labels = []

for sample, label in random.sample(list(test_data), k=9):
    test_samples.append(sample)
    test_labels.append(label)

# View the first sample shape
test_samples[0].shape

In [None]:
len(test_samples)

In [None]:
plt.imshow(test_samples[0].squeeze(), cmap="gray");

In [None]:
# Make predictions
pred_probs = make_predictions(model=model,
                              data=test_samples)

# View first two predictions probabilities
pred_probs[:2]

In [None]:
# Convert prediction probabilities into labels
pred_classes = pred_probs.argmax(dim=1)
pred_classes

In [None]:
class_names

In [None]:
test_labels

In [None]:
# Plot predictions
plt.figure(figsize=(9, 9))
nrows = 3
ncols = 3
for i, sample in enumerate(test_samples):
    # Create subplot
    plt.subplot(nrows, ncols, i+1)

    # Plot the target image
    plt.imshow(sample.squeeze(), cmap="gray")

    # Find the prediction (in text form, e.g. "Sandal" or "T-Shirt")
    pred_label = class_names[pred_classes[i]]

    # Get the truth label (in text form)
    truth_label = class_names[test_labels[i]]

    # Create a title for the plot
    title_text = f"Pred: {pred_label} | Truth: {truth_label}"

    # Check for equality between pred and truth and change the color of title text
    if pred_label == truth_label:
        plt.title(title_text, fontsize=10, c="g")
    else:
        plt.title(title_text, fontsize=10, c="r")

    plt.axis(False)

# Making confusion matrix for further prediction evaluation

1. A confusion matrix is a fantastic way to visualize your classification model performance
2. Make a confusion matrix `torchmetrics.ConfusionMatrix`
3. Plot the confusion matrix using `mlxtend.plotting.plot_confusion_matrix`

In [None]:
# Import tqdm.auto
from tqdm.auto import tqdm

# 1. Make predictions with our trained model on the test dataset
y_preds = []
model.eval()
with torch.inference_mode():
    for X, y in tqdm(test_dataloader, desc="Making Predictions..."):
        # Send the data to the device and target to target deivce
        X, y = X.to(device), y.to(device)

        # Do the forward passs
        y_logit = model(X)

        # Turn prediction from logits -> prediction probabilities -> prediction labels
        y_pred = torch.softmax(y_logit.squeeze(), dim=0).argmax(dim=1)

        # Put predictions on CPU for evaluation
        y_preds.append(y_pred.cpu())

# Concatenate list of predictions into a tensor
print(y_preds)
y_pred_tensor = torch.cat(y_preds)
y_pred_tensor[:10]

In [None]:
import mlxtend

In [None]:
mlxtend.__version__

In [None]:
!pip install torchmetrics

In [None]:
import torchmetrics

In [None]:
test_data.targets

In [None]:
class_names

In [None]:
from torchmetrics import ConfusionMatrix
from mlxtend.plotting import plot_confusion_matrix

# 1. Setup confusion matrix instance and compare between predictions and targets (labels)
confmat = ConfusionMatrix(num_classes=len(class_names),
                          task="MULTICLASS")
confmat_tensor = confmat(preds=y_pred_tensor,
                         target=test_data.targets)

# Plot the confusion matrix
fig, ax = plot_confusion_matrix(
    conf_mat=confmat_tensor.numpy(),
    class_names=class_names,
    figsize=(10, 7)
)