In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm

import numpy as np
import random
import time
import os
import sys

from models.VGG import *
from utils import *

SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [2]:
data_path = '..\\data\\tiny-imagenet-200'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 128

In [3]:
raw_data = RawData(data_path)


num_classes = len(raw_data.labels_t())
print(f"Number of classes: {num_classes}")


Number of classes: 200


In [4]:
train_dataset = TinyImageNetDataset(type_='train', raw_data=raw_data)
print(f"Number of training examples: {len(train_dataset)}")
print(f"Shape of the training data: {train_dataset[0][0].shape}")

Loading preprocessed training data from ..\data\tiny-imagenet-200\process\train_data.npz...
Number of training examples: 100000
Shape of the training data: torch.Size([3, 64, 64])


In [5]:
means = torch.zeros(3)
stds = torch.zeros(3)
for img, _ in train_dataset:
    means += torch.mean(img, dim=(1, 2))
    stds += torch.std(img, dim=(1, 2))

means /= len(train_dataset)
stds /= len(train_dataset)
print(f"Means: {means}")
print(f"Stds: {stds}")


Means: tensor([0.4802, 0.4481, 0.3975])
Stds: tensor([0.2296, 0.2263, 0.2255])


In [17]:
def DataLoaderSplit(raw_data, batch_size, val_ratio=0.2, force_reload=False):
    """
    Prepare DataLoaders for training, validation, and testing.

    Args:
        raw_data (RawData): Instance of the RawData class, providing data and labels.
        batch_size (int): Batch size for DataLoaders.
        val_ratio (float): Proportion of training data to use for validation.

    Returns:
        train_loader, val_loader, test_loader
    """
    # # Load raw data
    # raw_data = RawData(data_path)
    # print("Raw data loaded, labels: ", len(raw_data.labels_t()))
    # Means: tensor([0.4802, 0.4481, 0.3975])
    # Stds: tensor([0.2296, 0.2263, 0.2255])
    pretrained_means = [0.4802, 0.4481, 0.3975]
    pretrained_stds = [0.2296, 0.2263, 0.2255]

    train_transforms = transforms.Compose([
                            transforms.ToPILImage(),
                            transforms.RandomRotation(5),
                            transforms.RandomHorizontalFlip(0.5),
                            transforms.ToTensor(),
                            transforms.Normalize(mean=pretrained_means,
                                                    std=pretrained_stds)
                        ])
    test_transforms = transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize(mean=pretrained_means,
                                                std=pretrained_stds)
                       ])

    # Create the test dataset from the validation data in the original dataset
    test_dataset = TinyImageNetDataset(type_='val', raw_data=raw_data, transform=test_transforms, force_reload=force_reload)
    print("Validation dataset created, size: ", len(test_dataset))

    # Create the full training dataset from the original training data
    full_train_dataset = TinyImageNetDataset(type_='train', raw_data=raw_data, transform=train_transforms, force_reload=force_reload)
    print("Full training dataset created, size: ", len(full_train_dataset))

    # Calculate the sizes of the new training and validation sets
    full_train_size = len(full_train_dataset)
    val_size = int(full_train_size * val_ratio)
    train_size = full_train_size - val_size

    # Split the dataset into new training and validation datasets
    train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])

    print("Training and validation datasets created, sizes: ", len(train_dataset), len(val_dataset))
    for i, data in enumerate(train_dataset):
        x, y = data
        print(y)
        if i == 10:
            break

    # Create DataLoaders for train, validation, and test datasets
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,  pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    print("DataLoaders created.")

    return train_loader, val_loader, test_loader


In [18]:
train_loader, val_loader, test_loader = DataLoaderSplit(raw_data, batch_size, val_ratio=0.2, force_reload=False)

Loading preprocessed validation data from ..\data\tiny-imagenet-200\process\val_data.npz...
Validation dataset created, size:  10000
Loading preprocessed training data from ..\data\tiny-imagenet-200\process\train_data.npz...
Full training dataset created, size:  100000
Training and validation datasets created, sizes:  80000 20000
117
34
23
26
95
57
69
155
95
68
77
DataLoaders created.


In [34]:
loader_size = len(train_loader)
print(f"Training loader size: {loader_size}")
iterator = iter(train_loader)
labels, images = next(iterator)
print(f"Image shape: {images.shape}")
print(f"Label shape: {labels.shape}")

Training loader size: 625
Image shape: torch.Size([128, 3, 64, 64])
Label shape: torch.Size([128])


In [None]:
def train(model, iterator, optimizer, criterion, scheduler, device):
    epoch_loss = 0
    epoch_acc = 0
    model.train()
    with tqdm(total=len(iterator), desc='Train', leave=False) as t:
        for i, (x,label) in enumerate(iterator):
            x = x.to(device)
            y = label.to(device)
            optimizer.zero_grad()
            y_pred, h = model(x)
            loss = criterion(y_pred, y)
            acc = calculate_accuracy(y_pred, y)
            loss.backward()
            optimizer.step()
            if scheduler is not None:
                scheduler.step()
            epoch_loss += loss.item()
            epoch_acc += acc.item()
            t.set_postfix(loss=epoch_loss / (i + 1), acc=epoch_acc / (i + 1))
            t.update(1)
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

def evaluate(model, iterator, criterion, device):
    epoch_loss = 0
    epoch_acc = 0
    model.eval()
    with torch.no_grad():
        with tqdm(total=len(iterator), desc='Eval', leave=False) as t:
            for i, (x, label) in enumerate(iterator):
                x = x.to(device)
                y = label.to(device)
                y_pred, h = model(x)
                loss = criterion(y_pred, y)
                acc = calculate_accuracy(y_pred, y)
                epoch_loss += loss.item()
                epoch_acc += acc.item()
                t.set_postfix(loss=epoch_loss / (i + 1), acc=epoch_acc / (i + 1))
                t.update(1)

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [36]:
def train_model(model, num_epochs, train_loader, val_loader, optimizer, criterion, scheduler=None, save_best=True, device='cpu'):
    loss_history = {'train': [], 'val': []}
    model = model.to(device)
    best_val_acc = 0.0
    with tqdm(total=num_epochs) as pbar:
        for epoch in range(num_epochs):
            # Train the model
            model.train()
            train_loss, train_acc = train(model, train_loader, optimizer, criterion, scheduler, device)

            # Evaluate the model
            valid_loss, valid_acc = evaluate(model, val_loader, criterion, device)

            pbar.set_postfix(train_loss=train_loss, valid_loss=valid_loss)

            # Save the best model
            if valid_acc > best_val_acc:
                best_val_acc = valid_acc
                best_parms = model.state_dict()
            loss_history['train'].append(train_loss)
            loss_history['val'].append(valid_loss)
            pbar.update(1)
            
    if save_best:
        timestamp = time.strftime("%Y_%m_%d_%H_%M", time.localtime())
        torch.save(best_parms, './out/best_model_{}.pth'.format(timestamp))
        print('Best model saved as best_model_{}.pth'.format(timestamp))

    return loss_history

In [37]:
vgg11_layers = get_vgg_layers(vgg11_config, batch_norm=True)
model = VGG(vgg11_layers, num_classes).to(device)

num_epochs = 100
lr = 1e-3
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

In [38]:
train_model(model, num_epochs, train_loader, val_loader, optimizer, criterion, device=device)

  0%|          | 0/100 [00:29<?, ?it/s]


KeyboardInterrupt: 