In [2]:
import torch
from torchvision.datasets import ImageFolder
from torchvision.transforms import ToTensor
from torch.utils.data import random_split
from torch.utils.data.dataloader import DataLoader
import torch.nn as nn
import torch.nn.functional as F

In [8]:
train_dataset_root_path = "./data/cifar10/train"
random_seed = 420
torch.manual_seed(random_seed)
batch_size = 128

# Loading the dataset
In the explore_dataset.ipynb we saw that the images of cifar are RGB images, so each image has a dimension of 32 * 32 * 3 (H * W * C).
Now we load the the training dataset with help of the ImageFolder data loader, that is specific to dataset folder structures like cifar.
By providing ToTensor as transform, images are transformed from PIL.Image.Image objects to tensors [C * H * W] and values are rescaled from [0,255] to [0.0, 1.0] 

In [9]:
train_dataset = ImageFolder(
    root=train_dataset_root_path,
    transform=ToTensor()
)

ImageFolder inherits from VisionDataset, so it should be compatible with further torchvsion functions.  
As we can see by inspecting the variable train_dataset, the ImageFolder or VisionDataset does not load all images at once, but creates an index structure over the existing folder structure (a mapping of index to image location on disk)
# Splitting the dataset
We set aside some of the training samples for validation. Given the VisionDataset the random_split function creates two new objects of type Subset. A subset consists of a reference to the orginal VisinonDataset aka ImageFolder and a sublist of the indices.

In [10]:
validation_dataset_size = 5000
train_dataset_size = len(train_dataset) - validation_dataset_size

train_subset, validation_subset = random_split(train_dataset, [train_dataset_size, validation_dataset_size])

# DataLoaders
DataLoaders are yet another layer on top of the Subsets we created. Each dataloader combines a sampling method, e.g. randomly picking samples, with a dataset so that we can easily draw 
batches of samples.

In [11]:
train_data_loader = DataLoader(
    dataset=train_subset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=4,
    pin_memory=False  # should be True if cuda is available
)
validation_data_loader = DataLoader(
    dataset=validation_subset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=4,
    pin_memory=False  # should be True if cuda is available
)

# Constructing the CNN Network

In [12]:
from models import Cifar10CnnModel, accuracy, evaluate, fit

In [7]:
model = Cifar10CnnModel()

In [8]:
evaluate(model, train_data_loader)

{'val_loss': 2.3028876781463623, 'val_acc': 0.10051688551902771}

=> without training 0.10051 accuracy makes sense with 10 classes

In [44]:
num_epochs = 10
opt_func = torch.optim.Adam
lr = 0.001

**Note**: I do not have CUDA available locally, so up to here nothing is done to shift load to the GPU. Should move to Colab and make use of GPU as training seems not feasible with my laptop CPU... (after 41 minutes not one of 10 epochs is done)

In [None]:
history = fit(num_epochs, lr, model, train_data_loader, validation_data_loader, opt_func)

### Test Plain18Layer

In [41]:
import custom_cnn.models as models
from importlib import reload
reload(models)
pl18_model = models.Plain18Layer()

In [42]:
evaluate(pl18_model, validation_data_loader)

{'val_loss': 2.3025715351104736, 'val_acc': 0.10047468543052673}

In [None]:
history = fit(num_epochs, lr, model, train_data_loader, validation_data_loader, opt_func)


# Load a model saved after traning in colab

In [13]:
from models import Cifar10CnnModel

model = Cifar10CnnModel()
model.network.load_state_dict(torch.load("custom_cnn/savedmodels/Cifar10CnnModel_20231128_12:26", map_location=torch.device('cpu')))

evaluate(model, validation_data_loader)

KeyboardInterrupt: 

In [14]:
from models import Plain18Layer

model = Plain18Layer()
model.network.load_state_dict(torch.load("custom_cnn/savedmodels/Plain18Layer20231130_19:29", map_location=torch.device('cpu')))

evaluate(model, validation_data_loader)

RuntimeError: Error(s) in loading state_dict for Sequential:
	Missing key(s) in state_dict: "4.weight", "13.weight", "22.weight", "31.weight", "38.bias", "40.weight", "40.bias". 
	Unexpected key(s) in state_dict: "44.weight", "45.weight", "45.bias", "45.running_mean", "45.running_var", "45.num_batches_tracked", "47.weight", "48.weight", "48.bias", "48.running_mean", "48.running_var", "48.num_batches_tracked", "52.weight", "52.bias", "3.weight", "3.bias", "3.running_mean", "3.running_var", "3.num_batches_tracked", "5.weight", "6.bias", "6.running_mean", "6.running_var", "6.num_batches_tracked", "9.weight", "9.bias", "9.running_mean", "9.running_var", "9.num_batches_tracked", "12.weight", "12.bias", "12.running_mean", "12.running_var", "12.num_batches_tracked", "14.weight", "15.bias", "15.running_mean", "15.running_var", "15.num_batches_tracked", "18.weight", "18.bias", "18.running_mean", "18.running_var", "18.num_batches_tracked", "21.weight", "21.bias", "21.running_mean", "21.running_var", "21.num_batches_tracked", "23.weight", "24.bias", "24.running_mean", "24.running_var", "24.num_batches_tracked", "27.weight", "27.bias", "27.running_mean", "27.running_var", "27.num_batches_tracked", "30.weight", "30.bias", "30.running_mean", "30.running_var", "30.num_batches_tracked", "32.weight", "33.bias", "33.running_mean", "33.running_var", "33.num_batches_tracked", "36.weight", "36.bias", "36.running_mean", "36.running_var", "36.num_batches_tracked", "39.weight", "39.bias", "39.running_mean", "39.running_var", "39.num_batches_tracked", "41.weight", "42.running_mean", "42.running_var", "42.num_batches_tracked". 
	size mismatch for 6.weight: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([16, 16, 3, 3]).
	size mismatch for 11.weight: copying a param with shape torch.Size([16, 16, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 16, 3, 3]).
	size mismatch for 15.weight: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]).
	size mismatch for 20.weight: copying a param with shape torch.Size([32, 32, 3, 3]) from checkpoint, the shape in current model is torch.Size([64, 32, 3, 3]).
	size mismatch for 24.weight: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([64, 64, 3, 3]).
	size mismatch for 26.weight: copying a param with shape torch.Size([64, 32, 3, 3]) from checkpoint, the shape in current model is torch.Size([64, 64, 3, 3]).
	size mismatch for 29.weight: copying a param with shape torch.Size([64, 64, 3, 3]) from checkpoint, the shape in current model is torch.Size([128, 64, 3, 3]).
	size mismatch for 33.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128, 128, 3, 3]).
	size mismatch for 35.weight: copying a param with shape torch.Size([64, 64, 3, 3]) from checkpoint, the shape in current model is torch.Size([128, 128, 3, 3]).
	size mismatch for 38.weight: copying a param with shape torch.Size([128, 64, 3, 3]) from checkpoint, the shape in current model is torch.Size([1024, 2048]).
	size mismatch for 42.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([10, 512]).
	size mismatch for 42.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([10]).

=> must use the pretrained params, but values are slightly of. Is this due to different machine or GPU for training and CPU for evaluation