<a href="https://colab.research.google.com/github/MihaiDogariu/Keysight-Deep-Learning-Fundamentals--v2-/blob/main/scripts/Unit_9_Image_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Image classification
This notebook implements the classifc AlexNet architecture for real image classification The demo dataset for this task is CIFAR10. The goal of the project is to design a network which, when presented with a natural image, will assign a label to the said image.

In [None]:
import numpy as np
import torch
import torch.nn as nn
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler
from tqdm import tqdm
import matplotlib.pyplot as plt


# Choose the available system configuration (CPU/GPU)
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

# We set the mean and the standard deviation for the normalization process - these are computed channel-wise and only on the training dataset
mean=[0.4914, 0.4822, 0.4465]
std=[0.2023, 0.1994, 0.2010]
normalize = transforms.Normalize(mean, std)

In [None]:
class UnNormalize(object):
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, tensor):
        """
        Args:
            tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
        Returns:
            Tensor: Normalized image.
        """
        for t, m, s in zip(tensor, self.mean, self.std):
            t.mul_(s).add_(m)
            # The normalize code -> t.sub_(m).div_(s)
        return tensor

unnorm = UnNormalize(mean, std)

## 1. Data pre-processing

In [None]:
def get_train_valid_loader(data_dir,
                           batch_size,
                           augment,
                           random_seed,
                           normalize,
                           valid_size=0.1,
                           shuffle=True):

    # We define the set of transformations that each input will undergo in the testing phase
    valid_transform = transforms.Compose([
            transforms.Resize((227,227)),
            transforms.ToTensor(),
            normalize,
    ])
    if augment: # regularizing techniques
        train_transform = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(0.4),
            transforms.Resize((227,227)),
            transforms.ToTensor(),
            normalize,
        ])
    else:
        train_transform = valid_transform

    # Being a popular dataset, it can be downloaded directly from torchvivision's model zoo
    train_dataset = datasets.CIFAR10(root=data_dir,
                                     train=True,
                                     download=True,
                                     transform=train_transform,
                                     )

    valid_dataset = datasets.CIFAR10(root=data_dir,
                                     train=True,
                                     download=True,
                                     transform=valid_transform,
                                     )

    # Select the train-val split
    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    # Create dataloades for the training and validation datasets
    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler)

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=batch_size, sampler=valid_sampler)

    return (train_loader, valid_loader)


def get_test_loader(data_dir,
                    batch_size,
                    normalize,
                    shuffle=True):

    # Similar transforms to train
    transform = transforms.Compose([
        transforms.Resize((227,227)),
        transforms.ToTensor(),
        normalize,
    ])

    # Download the test dataset
    dataset = datasets.CIFAR10(
        root=data_dir, train=False,
        download=True, transform=transform,
    )

    # Create test data loader
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, shuffle=shuffle
    )

    return data_loader


# Calling the above function
train_loader, valid_loader = get_train_valid_loader(
    data_dir = './data',
    batch_size = 64,
    augment = True,
    random_seed = 1,
    normalize = normalize
)

test_loader = get_test_loader(
    data_dir = './data',
    batch_size = 64,
    normalize = normalize
)

## 2. Defining the model

Official documentation for each layer:
- convolutional: https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html
- fully connected: https://pytorch.org/docs/stable/generated/torch.nn.Linear.html?highlight=linear#torch.nn.Linear
- max pooling: https://pytorch.org/docs/stable/generated/torch.nn.MaxPool2d.html?highlight=maxpool#torch.nn.MaxPool2d
- ReLU activation: https://pytorch.org/docs/stable/generated/torch.nn.ReLU.html?highlight=relu#torch.nn.ReLU
- dropout regularization: https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html?highlight=dropout#torch.nn.Dropout
- sequential composition of layers: https://pytorch.org/docs/stable/generated/torch.nn.Sequential.html?highlight=sequential#torch.nn.Sequential

In [None]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=10):
        super(AlexNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
            nn.BatchNorm2d(96),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer4 = nn.Sequential(
            nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer5 = nn.Sequential(
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(9216, 4096),
            nn.ReLU())
        self.fc1 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU())
        self.fc2= nn.Sequential(
            nn.Linear(4096, num_classes))

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out




## 3. Training the model

In [None]:
# Choosing hyperparameters
num_classes = 10
num_epochs = 20
batch_size = 64
learning_rate = 0.005

# Transferring the model on the device, hopefully GPU
model = AlexNet(num_classes).to(device)


# Choosing the loss function. Classification means CrossEntropyLoss
criterion = nn.CrossEntropyLoss()
# Choosing the optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9)

In [None]:
# Running the training
total_step = len(train_loader)

for epoch in tqdm(range(num_epochs)):
    model.train(True)
    for i, (images, labels) in enumerate(train_loader):
        # Load tensors on the device
        images = images.to(device)
        labels = labels.to(device)

        # Forward propagation
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backpropagation and weights update
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

    # Running the model on the validation dataset
    model.train(False)
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in valid_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs

        print('Accuracy of the network on the {} validation images: {} %'.format(5000, 100 * correct / total))


## 4. Testing the model

In [None]:
# Running the model on the test set
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        del images, labels, outputs

    print('Accuracy of the network on the {} test images: {} %'.format(10000, 100 * correct / total))

In [None]:
labels = {0:"airplane",
          1:"automobile",
          2:"bird",
          3:"cat",
          4:"deer",
          5:"dog",
          6:"frog",
          7:"horse",
          8:"ship",
          9:"truck"}


In [None]:
test_features, test_labels = next(iter(test_loader)) # extract a batch of images and labels from the test dataset using the data loader
img = test_features[0].squeeze()     # extract only the first image from the batch and discard dimensions of value = 1
img = unnorm(img)                    # unnormalize the image
npimg = img.numpy()                  # transform the tensor into an np array
img = np.transpose(npimg, (1, 2, 0)) # transform the images from (3, 227, 227) to (227, 227, 3) for python display
label = test_labels[0].item()
plt.imshow(img)
plt.show()
pred = model(test_features.to(device))[0].cpu().detach().numpy()
pred_label = np.argmax(pred)
print(f"True label: {labels[label]}\t\t Predicted label: {labels[pred_label]}")