In [1]:
import os
import torch
import torchvision
import tarfile
from torchvision.datasets.utils import download_url
from torch.utils.data import random_split

In [None]:
project_name = "05-cifar10-cnn"

In [None]:
dataset_url = "https://s3.amazonaws.com/fast-ai-imageclas/cifar10.tgz"
download_url(dataset_url, ".")  # downloaded directly by visiting the url. 

In [None]:
with tarfile.open("./cifar10.tgz", "r:gz") as tar:
    tar.extractall(path="./data")

In [None]:
data_dir = "./data/cifar10"

print(os.listdir(data_dir))
classes = os.listdir(data_dir + "/train")
print(classes)

In [None]:
airplane_files = os.listdir(data_dir + "/train/airplane")
print("No. of training examples for airplanes:", len(airplane_files))
print(airplane_files[:5])

In [None]:
ship_test_files = os.listdir(data_dir + "/test/ship")
print("No. of test examples for ship:", len(ship_test_files))
print(ship_test_files[:5])

In [None]:
from torchvision.datasets import ImageFolder  # A generic data loader where the images are arranged in a specific way.
from torchvision.transforms import ToTensor  # ToTensor is a class with a magic method `__call__()`, so it can be regarded as function. It is used to transform an input image and return a transformed version of the image, like a Tensor. 

In [486]:
dataset = ImageFolder(data_dir+"/train", transform=ToTensor())  # it has __iter__ magin method.
dataset

Dataset ImageFolder
    Number of datapoints: 50000
    Root location: ./data/cifar10/train
    StandardTransform
Transform: ToTensor()

In [None]:
img, label = dataset[0]  # __getitem__() magic method is owned/set in DatasetFolder class. The return value of indexing or dataset is a tuple, from self.samples, generated by self.make_dataset < make_dataset->a list with its component of a form (path_to_sample, class_index).
print(type(dataset[0]))  # this is a tuple. Why the return value of indexing dataset is a tuple object is because the return value of the magic method __getitem__ is two values, one is sample and target.
# Node: the first value is sample, one element of samples, not samples.
print(img.shape, label)
img  # this is a torch.Tensor
type(img)

In [None]:
type(dataset.imgs)  # dataaset is a dict object, and dataset.imgs is a list object

In [None]:
isinstance(dataset, dict)

In [None]:
type(dataset)  # torchvision.datasets.folder.ImageFolder, it's a folder object.

In [None]:
dataset.imgs[0]  # the imgs attribute is set by self.samples, i.e., self.imgs = self.samples, which is a list with its element as a form of (path_to_sample, class_index).

In [None]:
dataset[0]  # it equals to (dataset.transform(dataset.loader(dataset.imgs[0])), dataset.imgs[1]).

In [None]:
first_sample = dataset.imgs[0]
first_sample
dataset.loader(first_sample[0])  # loader is used to load an image given its path. In this case, the loader is an instance of DatasetFolder, inherited from VisionDataset.
# transform is used to convert the image to a pytorch.Tensor, a matrix.


In [None]:
type(first_sample)  # the type of the data element is tuple



In [None]:
first_sample  # it is composed of tensor and its label.


In [None]:
type(first_sample)  # the type of the data element is tuple

In [None]:
dataset.transform(dataset.loader(first_sample[0]))  # loader is an object of DatasetFolder.

In [None]:
img, label = first_sample  # (path_to_sample, class_index)
img

In [None]:
label

In [None]:
type(img)  # the type of the dataset element is a tuple with its first element of torch.Tensor

In [None]:
img = dataset.transform(dataset.loader(img))
img.shape

In [None]:
print(dataset.classes)
print(dataset.class_to_idx)

In [None]:
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline 
# the inline keyword here is used to tell the tool that show the picture in the window, other than to a new window.

matplotlib.rcParams['figure.facecolor'] = '#ffffff'

In [None]:
def show_example(img, label):  # here lable is an int object.
    print("Label: ", dataset.classes[label], "(" + str(label) + ")")
    plt.imshow(img.permute(1, 2, 0))  # TODO find out how permute works in pytorch.

In [None]:
show_example(*dataset[0])  # star operator here is used to tear the tuple apart and set the values to nominal arguments, i.e. img and label
show_example(*dataset[3])  # star operator here is used to tear the tuple apart and set the values to nominal arguments, i.e. img and label

In [None]:
first_sample[0].permute(1, 2, 0)  # this function is used to change the dimensions of the tensor # TODO I dont understand this method a lot.

In [None]:
first_sample[0]  # 32 rows, 32 cols, and 3 channels


In [None]:
random_seed = 42
torch.manual_seed(random_seed)  # to set the seed for generating random numbers. Returns a torch.Generator object.

In [None]:
val_size = 5000  # this is the validation test set length.
train_size = len(dataset) - val_size  # this is the train set size. Why dataset has the len attribute is because its orginated class has the __len__ magic method.

# randomly split a dataset into non-overlapping new datasets of given lengths, the lengths is also an input argument of this method, with its as Sequence instance.
train_ds, val_ds = random_split(dataset, [train_size, val_size])  # random_split method is imported from torch.utils.data
# the return value of random_split is a list of subsets.

len(train_ds), len(val_ds)

In [None]:
from torch.utils.data.dataloader import DataLoader
batch_size = 128

In [None]:
# set shuffle to true to have the data reshuffled at every epoch.
# set pin_memory to true, the data loader will copy Tensors into device/CUDA pinned memory before returning them.
train_dl = DataLoader(train_ds, batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_dl = DataLoader(val_ds, batch_size*2, num_workers=4, pin_memory=True)

In [None]:
from torchvision.utils import make_grid  # it is a function to make a grid of images.

def show_batch(dl):  # here dl is a dataset.
    for images, labels in dl:
        fig, ax = plt.subplots(figsize=(12, 6))
        ax.set_xticks([]); ax.set_yticks([])
        ax.imshow(make_grid(images, nrow=16).permute(1, 2, 0))
        break

show_batch(train_dl)
# print("*" * 10)
# show_batch(train_dl)

def apply_kernel(image, kernel):  # image is a Tensor. And also kernel.
    # pay attention to the size calculation
    ri, ci = image.shape
    rk, ck = kernel.shape
    ro, co = ri - rk + 1, ci - ck + 1
    output = torch.zeros([ro, co])
    for i in range(ro):
        for j in range(co):
            output[i, j] = torch.sum(image[i:i+rk, j:j+ck] * kernel)  # TODO image[i:i+rk, j:j+ck] * kernel, this is elementwise multiplication.
    return output

In [None]:
# An example of elementwise multiplication.
sample_image = torch.tensor([
    [3, 3, 2, 1, 0],
    [0, 0, 1, 3, 1],
    [3, 1, 2, 2, 3],
    [2, 0, 0, 2, 2],
    [2, 0, 0, 0, 1]
], dtype=torch.float32)

sample_kernel = torch.tensor([
    [0, 1, 2], 
    [2, 2, 0], 
    [0, 1, 2]
], dtype=torch.float32)

apply_kernel(sample_image, sample_kernel)

In [None]:
import torch.nn as nn
import torch.nn.functional as F

In [None]:
simple_model = nn.Sequential(
    nn.Conv2d(3, 8, kernel_size=3, stride=1, padding=1),  # the nominal arguments of this model `nn.Conv2d` are in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, padding_mode, device, dtype.
    nn.MaxPool2d(2, 2)
)  # Sequential has method called `forward`

In [None]:
for images, labels in train_dl:
    print('images.shape:', images.shape)
    out = simple_model(images)  # the return value of this model is also a Tensor.
    print('out.shape:', out.shape)
    break

In [None]:
class ImageClassificationBase(nn.Module):  
    # in this class, the tutor defined these four methods in order to use them directly along the way to generate the model.
    def training_step(self, batch):
        images, labels = batch
        print(">>>>> >>> inside the training_step method: ", type(self), id(self))
        out = self(images)
        loss = F.cross_entropy(out, labels)
        return loss

    def validation_step(self, batch):
        images, labels = batch
        out = self(images)
        loss = F.cross_entropy(out, labels)
        acc = accuracy(out, labels)
        return {'val_loss': loss.detach(), 'val_acc': acc}

    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}

    def epoch_end(self, epoch, result):
        print("Epoch [{}], train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(epoch, result['train_loss'], result['val_loss'], result['val_acc']))

def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)  # torch.max returns the tensor with the maximum value in the given dimension
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

In [None]:
class Cifar10CnnModel(ImageClassificationBase):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Flatten(),
            nn.Linear(256*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 10))  # Sequential object has the common/similar methods like add, insert, pop, , extend, etc. as List does.

    def forward(self, xb):
        return self.network(xb)
            

In [None]:
model = Cifar10CnnModel()
model

In [None]:
for images, labels in train_dl:  # each item getting from train_dl's iterator is a batch item, with batch_size(128) samples in it
    print('images.shape:', images.shape)
    out = model(images)
    print('out.shape:', out.shape)
    print('out[0]:', out[0])
    break

In [None]:
for item in train_dl:
    print(type(item))
    # print(item)
    print(len(item[0]), len(item[1]))
    print(type(item[0]))
    print(item[0])
    print(item[0].shape)
    break

In [None]:
def get_default_device():
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')

def to_device(data, device):  # if the data is a list of tuple instance, then the return value of this function is a list either, with data.to(device, non_blocking=True) as its component.
    if isinstance(data, (list, tuple)):  # to check if data is one of the type of list and tuple
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
    def __iter__(self):
        for b in self.dl:
            yield to_device(b, self.device)
    def __len__(self):
        return len(self.dl)

In [None]:
device = get_default_device()
device

In [None]:
train_dl = DeviceDataLoader(train_dl, device)  # Here, it just creates a DeviceDataLoader instance for training data, but hasnt loaded data to the device yet.
val_dl = DeviceDataLoader(val_dl, device)
to_device(model, device)

In [None]:
@torch.no_grad()  # TODO find out what this decorator is for.
def evaluate(model, val_loader):
    model.eval()  # this method is to set the module in evaluation mode. The return of this method is a trained model.
    outputs = [model.validation_step(batch) for batch in val_loader]  # in this case, the batch size is 128.
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):  # optimization_function
    history = []
    optimizer = opt_func(model.parameters(), lr)  # this is how to optmize the parameters in the model.
    for epoch in range(epochs):
        model.train()  # set the model in training mode.
        train_losses = []
        for batch in train_loader:  # train_loader is a DataLoader object.
            loss = model.training_step(batch)  # TODO find out how the model treat the bacth data. Answer: this whole model trains the data in batches, each batch will have a loss.
            print("inside fit function:", type(model), id(model))
            train_losses.append(loss)  # batch loss
            loss.backward()
            optimizer.step()  # perform a single optimization step (parameter update).
            optimizer.zero_grad()  # these operations are accumulated on the same model, which means the model stores the parameters inside.
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        model.epoch_end(epoch, result)
        history.append(result)
    return history

In [None]:
model = to_device(Cifar10CnnModel(), device)

In [None]:
evaluate(model, val_dl)

In [None]:
num_epochs = 10
opt_func = torch.optim.Adam
lr = 0.001

In [None]:
history = fit(num_epochs, lr, model, train_dl, val_dl, opt_func)

In [None]:
def plot_accuracies(history):
    accuracies = [x['val_acc'] for x in history]
    plt.plot(accuracies, '-x')
    plt.xlabel('epoch')
    plt.ylabel('accuracy')
    plt.title('Accuracy vs. No. of epochs')

In [None]:
plot_accuracies(history)

In [None]:
def plot_losses(history):
    train_losses = [x.get('train_loss') for x in history]
    val_losses = [x['val_loss'] for x in history]
    plt.plot(train_losses, '-bx')
    plt.plot(val_losses, '-rx')
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.legend(['Training', "Validation"])
    plt.title('Loss vs. No. of epochs')

In [None]:
plot_losses(history)

In [None]:
test_dataset = ImageFolder(data_dir + '/test', transform=ToTensor())

In [None]:
def predict_image(img, model):
    xb = to_device(img.unsqueeze(0), device)
    yb = model(xb)
    _, preds = torch.max(yb, dim=1)
    return dataset.classes[preds[0].item()]

In [None]:
img, label = test_dataset[0]
plt.imshow(img.permute(1, 2, 0))
print('Label:', dataset.classes[label], ', Predicted:', predict_image(img, model))

In [None]:
img, label = test_dataset[1002]
plt.imshow(img.permute(1, 2, 0))
print('Label:', dataset.classes[label], ', Predicted:', predict_image(img, model))

In [None]:
img, label = test_dataset[6153]
plt.imshow(img.permute(1, 2, 0))
print('Label:', dataset.classes[label], ', Predicted:', predict_image(img, model))

In [None]:
test_loader = DeviceDataLoader(DataLoader(test_dataset, batch_size*2), device)
result = evaluate(model, test_loader)
result

In [None]:
torch.save(model.state_dict(), 'cifar10-cnn.pth')

In [None]:
model.parameters

In [None]:
model.state_dict()

In [None]:
for k, v in model.state_dict().items():
    print(v)
    print("<" * 10)

In [None]:
model2 = to_device(Cifar10CnnModel(), device)

In [None]:
model2.load_state_dict(torch.load('cifar10-cnn.pth'))

In [None]:
evaluate(model2, test_loader)