In [14]:
OPTIMIZE = False
TRAIN = False
TRAIN_EPOCHS = 40  # number of epochs for training
OPTIM_EPOCHS = 5  # number of epochs for optimization

In [8]:
!pip install optuna &> /dev/null

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

import optuna
import json

In [10]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        torch.manual_seed(14)
        self.classes = ('plane', 'car', 'bird', 'cat', 'deer',
                        'dog', 'frog', 'horse', 'ship', 'truck')
        # conv layers
        self.conv1 = nn.Conv2d(3, 48, 3, padding=1)
        self.conv2 = nn.Conv2d(48, 96, 3, padding=1)
        self.conv3 = nn.Conv2d(96, 192, 3, padding=1)
        self.conv4 = nn.Conv2d(192, 256, 3, padding=1)
        # maxpool and drop
        self.pool2 = nn.MaxPool2d(2)
        self.drop = nn.Dropout(0.2)
        # batch normalizations
        self.batchnorm2d1 = nn.BatchNorm2d(48)
        self.batchnorm2d2 = nn.BatchNorm2d(96)
        self.batchnorm2d3 = nn.BatchNorm2d(192)
        self.batchnorm2d4 = nn.BatchNorm2d(256)
        self.batchnorm1d1 = nn.BatchNorm1d(1500)
        self.batchnorm1d2 = nn.BatchNorm1d(750)
        self.batchnorm1d3 = nn.BatchNorm1d(300)
        self.batchnorm1d4 = nn.BatchNorm1d(100)
        self.batchnorm1d5 = nn.BatchNorm1d(50)
        # fully connected layers
        self.fc1 = nn.Linear(256*8*8, 1500)
        self.fc2 = nn.Linear(1500, 750)
        self.fc3 = nn.Linear(750, 300)
        self.fc4 = nn.Linear(300, 100)
        self.fc5 = nn.Linear(100, 50)
        self.fc6 = nn.Linear(50, 10)

    def forward(self, x):
        x = x  # input layer

        x = self.batchnorm2d1(self.conv1(x))
        x = F.relu(x)
        x = self.batchnorm2d2(self.conv2(x))
        x = F.relu(x)
        x = self.pool2(x)
        x = self.drop(x)
        x = self.batchnorm2d3(self.conv3(x))
        x = F.relu(x)
        x = self.batchnorm2d4(self.conv4(x))
        x = F.relu(x)
        x = self.pool2(x)
        x = self.drop(x)
        x = x.view(-1, 256*8*8)
        x = self.batchnorm1d1(self.fc1(x))
        x = F.relu(x)
        x = self.batchnorm1d2(self.fc2(x))
        x = F.relu(x)
        x = self.drop(x)
        x = self.batchnorm1d3(self.fc3(x))
        x = F.relu(x)
        x = self.batchnorm1d4(self.fc4(x))
        x = F.relu(x)
        x = self.drop(x)
        x = self.batchnorm1d5(self.fc5(x))
        x = F.relu(self.fc6(x))
        # torch.nnCrossEntropyLoss() contain nn.LogSoftmax()
        # so there is no need to use softmax
        return x

    def fit(self, train_loader, optimizer, epochs, device='cpu', valid_loader=None, verbose=5):
        '''
            verbose is number of batch report prints for epoch. For example
            verbose=0 will not print any batch reports, whereas verbose=5 will
            print 5 batch reports per epoch.
        '''

        assert epochs > 0, "epochs cannot be smaller then 1"
        self.to(device)
        criterion = nn.CrossEntropyLoss()
        valid_accuracies = []

        for epoch in range(epochs):
            epoch_loss = []          # Array of losses in epoch
            correct_predictions = 0  # Number of correct predictions made in validation
            total_predictions = 0    # Number of total predictions made in validation

            for i, data in enumerate(train_loader):
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()

                outputs = self(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                epoch_loss.append(loss.item())

                # torch.linspace() is creating 1d tensor with even spaces in between, after removing first and last values
                # we have wanted amount of evenly spaced values between two boundaries
                if i in torch.linspace(0, len(train_loader), verbose+2, dtype=torch.int32)[1:-1] and verbose != 0:
                    # This is batch report print
                    print(f'Epoch {epoch+1:3}, Batch {i+1:5}, loss {loss.item():.5f}')

            if valid_loader is not None:
                self.eval()
                with torch.no_grad():
                    for data in valid_loader:
                        images, labels = data
                        images, labels = images.to(device), labels.to(device)

                        outputs = self(images)
                        _, predicted = torch.max(outputs, 1)
                        c = (predicted == labels).squeeze()
                        total_predictions += len(c)
                        correct_predictions += c.sum()
                self.train()

            valid_accuracy = (correct_predictions / total_predictions) * 100
            valid_accuracies.append(valid_accuracy)

            valid_result = f', Validation Accuracy {valid_accuracy:6.2f}' if valid_loader is not None else ''
            print(f'Epoch {epoch+1:3}, loss {sum(epoch_loss) / len(epoch_loss):.5f}{valid_result}\n')

        return max(valid_accuracies)

    def test(self, test_loader, device='cpu'):
        '''
            Returns to a dictionary that contains the both
            categorical and total accuracies
        '''
        self.eval()
        self.to(device)

        class_correct = list(0. for i in range(10))
        class_total = list(0. for i in range(10))

        with torch.no_grad():
            for data in test_loader:
                images, labels = data
                images, labels = images.to(device), labels.to(device)

                outputs = self(images)
                _, predicted = torch.max(outputs, 1)
                c = (predicted == labels).squeeze()
                for i in range(len(images)):
                    label = labels[i]
                    class_correct[label] += c[i].item()
                    class_total[label] += 1

        self.train()

        for i in range(10):
            print(f'Accuracy of {self.classes[i]:5}: {100 * (class_correct[i]/class_total[i]):6.2f}%')
        print(f'Total accuracy is: {100 * (sum(class_correct)/sum(class_total)):6.2f}%')

    def save(self, path='cifar10-1.pth'):
        torch.save(self.state_dict(), path)


In [11]:
class DataPrep:

    def __init__(self, batch_size=32):
        self.transform = transforms.Compose([transforms.RandomCrop(32, 5,
                                            padding_mode='reflect'),
                                            transforms.RandomHorizontalFlip(),
                                            transforms.RandomRotation(10),
                                            transforms.ToTensor(),
                                            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

        train_data = torchvision.datasets.CIFAR10(root='./data/cifar10-train', train=True,
                                                  download=True, transform=self.transform)
        test_data = torchvision.datasets.CIFAR10(root='./data/cifar10-test', train=False,
                                                 download=True, transform=self.transform)

        train_dataset, vald_dataset = torch.utils.data.random_split(train_data, [40000, 10000])
        __, mini_data = torch.utils.data.random_split(train_data, [45000, 5000])
        train_mini, vald_mini = torch.utils.data.random_split(mini_data, [4000, 1000])

        self.train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        self.valid_loader = torch.utils.data.DataLoader(vald_dataset, batch_size=batch_size, shuffle=True)
        self.test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True)
        self.train_mini_loader = torch.utils.data.DataLoader(train_mini, batch_size=batch_size, shuffle=True)
        self.vald_mini_loader = torch.utils.data.DataLoader(vald_mini, batch_size=batch_size, shuffle=True)

    def get_train_loader(self):
        return self.train_loader

    def get_valid_loader(self):
        return self.valid_loader

    def get_test_loader(self):
        return self.test_loader

    def get_train_mini_loader(self):
        return self.train_mini_loader

    def get_vald_mini_loader(self):
        return self.vald_mini_loader


In [12]:
class OptunaOptimizer():

    def __init__(self, train_loader, vald_loader, n_trials=100, epochs=1, device='cpu'):
        self.train_loader = train_loader
        self.vald_loader = vald_loader
        self.n_trials = n_trials
        self.epochs = epochs
        self.device = device
        self.study = optuna.create_study(direction='maximize')
        self.study.optimize(self.objective, n_trials=self.n_trials)
        self.best_params = self.study.best_params
        self.write_to_json(self.best_params)

    def objective(self, trial):
        model = Net()

        optimizer = trial.suggest_categorical('optimizer', ['Adam', 'AdamW', 'ASGD', 'SGD'])
        lr = trial.suggest_loguniform('lr', 1e-7, 1e-3)

        if optimizer == 'Adam':
            beta1 = trial.suggest_float('beta1', 0.7, 1)
            beta2 = trial.suggest_float('beta2', 0.7, 1)
            weight_decay = trial.suggest_float('weight_decay', 0, 1e-1)
            epsilon = trial.suggest_float('epsilon', 0, 1e-5)
            optimizer = optim.Adam(model.parameters(), lr=lr, betas=(beta1, beta2), eps=epsilon, weight_decay=weight_decay)
        elif optimizer == 'AdamW':
            beta1 = trial.suggest_float('beta1', 0.7, 1)
            beta2 = trial.suggest_float('beta2', 0.7, 1)
            epsilon = trial.suggest_float('epsilon', 0, 1e-5)
            weight_decay = trial.suggest_float('weight_decay', 0, 1e-1)
            optimizer = optim.AdamW(model.parameters(), lr=lr, betas=(beta1, beta2), eps=epsilon, weight_decay=weight_decay)
        elif optimizer == 'ASGD':
            lambd = trial.suggest_float('lambd', 0, 1e-6)
            alpha = trial.suggest_float('alpha', 0.5, 1)
            t0 = trial.suggest_float('t0', 0, 1e-4)
            weight_decay = trial.suggest_float('weight_decay', 1e-7, 1e-1)
            optimizer = optim.ASGD(model.parameters(), lr=lr, lambd=lambd, alpha=alpha, t0=t0, weight_decay=weight_decay)
        elif optimizer == 'SGD':
            momentum = trial.suggest_float('momentum', 0.7, 1)
            weight_decay = trial.suggest_float('weight_decay', 1e-7, 1e-1)
            dampening = trial.suggest_float('dampening', 0, 1e-1)
            optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, dampening=dampening, weight_decay=weight_decay)

        accuracy = model.fit(self.train_loader, optimizer, self.epochs, device=self.device, valid_loader=self.vald_loader, verbose=0)
        del model

        return accuracy

    def write_to_json(self, params):
        with open('./src/params.json', 'w') as json_file:
            json.dump(params, json_file)


In [15]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

data_loaders = DataPrep()
train_loader = data_loaders.get_train_loader()
valid_loader = data_loaders.get_valid_loader()
test_loader = data_loaders.get_test_loader()
train_mini_loader = data_loaders.get_train_mini_loader()
vald_mini_loader = data_loaders.get_vald_mini_loader()

model = Net()

if OPTIMIZE:
    optuna_opt = OptunaOptimizer(train_mini_loader, vald_mini_loader, epochs=OPTIM_EPOCHS, device=device)
    params = optuna_opt.best_params
else:
    with open('params.json') as f:
        params = json.load(f)

if TRAIN:
    if params['optimizer'] == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=params['lr'], betas=(params['beta1'], params['beta2']), eps=params['epsilon'], weight_decay=params['weight_decay'])
    elif params['optimizer'] == 'AdamW':
        optimizer = optim.AdamW(model.parameters(), lr=params['lr'], betas=(params['beta1'], params['beta2']), eps=params['epsilon'], weight_decay=params['weight_decay'])
    elif params['optimizer'] == 'ASGD':
        optimizer = optim.ASGD(model.parameters(), lr=params['lr'], lambd=params['lambd'], alpha=params['alpha'], t0=params['t0'], weight_decay=params['weight_decay'])
    elif params['optimizer'] == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=params['lr'], momentum=params['momentum'], dampening=params['dampening'], weight_decay=params['weight_decay'])

    model.fit(train_loader, optimizer, TRAIN_EPOCHS, device=device, valid_loader=valid_loader, verbose=3)
    model.save()
else:
    model.load_state_dict(torch.load('cifar10-1.pth'))

model.test(test_loader, device=device)

Files already downloaded and verified
Files already downloaded and verified
Accuracy of plane:  86.30%
Accuracy of car  :  91.50%
Accuracy of bird :  80.00%
Accuracy of cat  :  70.30%
Accuracy of deer :  84.60%
Accuracy of dog  :  75.30%
Accuracy of frog :  89.10%
Accuracy of horse:  82.80%
Accuracy of ship :  91.90%
Accuracy of truck:  92.80%
Total accuracy is:  84.46%
