In [1]:
!pip install optuna

Collecting optuna
  Downloading optuna-2.10.0-py3-none-any.whl (308 kB)
[K     |████████████████████████████████| 308 kB 7.5 MB/s 
Collecting alembic
  Downloading alembic-1.7.7-py3-none-any.whl (210 kB)
[K     |████████████████████████████████| 210 kB 68.6 MB/s 
Collecting cliff
  Downloading cliff-3.10.1-py3-none-any.whl (81 kB)
[K     |████████████████████████████████| 81 kB 12.9 MB/s 
Collecting colorlog
  Downloading colorlog-6.6.0-py2.py3-none-any.whl (11 kB)
Collecting cmaes>=0.8.2
  Downloading cmaes-0.8.2-py3-none-any.whl (15 kB)
Collecting Mako
  Downloading Mako-1.2.0-py3-none-any.whl (78 kB)
[K     |████████████████████████████████| 78 kB 9.4 MB/s 
[?25hCollecting autopage>=0.4.0
  Downloading autopage-0.5.0-py3-none-any.whl (29 kB)
Collecting stevedore>=2.0.1
  Downloading stevedore-3.5.0-py3-none-any.whl (49 kB)
[K     |████████████████████████████████| 49 kB 8.3 MB/s 
[?25hCollecting cmd2>=1.0.0
  Downloading cmd2-2.4.1-py3-none-any.whl (146 kB)
[K     |█████████

In [2]:
import os
import argparse
import numpy as np
import optuna
from optuna.trial import TrialState
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler

In [3]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 6, 5, 1, 2),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(6, 16, 5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.fc1 = nn.Sequential(
            nn.Linear(16 * 5 * 5, 120),
            nn.BatchNorm1d(120),
            nn.ReLU()
        )
        self.fc2 = nn.Sequential(
            nn.Linear(120, 84),
            nn.BatchNorm1d(84),
            nn.ReLU(),
            nn.Linear(84, 10)
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size()[0], -1)
        x = self.fc1(x)
        x = self.fc2(x)
        return x


def create_dataloader(batch_size=64, valid_size=0.2, DIR='D:/datasets/'):
    # convert data to torch.FloatTensor
    transform = transforms.ToTensor()

    # choose the training and test datasets
    train_data = datasets.FashionMNIST(root=DIR,
                                train=True,
                                download=True,
                                transform=transform)
    test_data = datasets.FashionMNIST(root=DIR,
                               train=False,
                               download=False,
                               transform=transform)

    # obtain training indices that will be used for validation
    num_train = len(train_data)
    indices = list(range(num_train))
    np.random.shuffle(indices)
    split = int(np.floor(valid_size * num_train))
    train_idx, valid_idx = indices[split:], indices[:split]

    # define samplers for obtaining training and validation batches
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    # load training data in batches
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=batch_size,
                                               sampler=train_sampler,
                                               num_workers=0)

    # load validation data in batches
    valid_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=batch_size,
                                               sampler=valid_sampler,
                                               num_workers=0)

    # load test data in batches
    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=batch_size,
                                              num_workers=0)

    return train_loader, valid_loader, test_loader


def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    train_loss = 0
    train_accuracy = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        pred = output.argmax(dim=1, keepdim=True)
        acc = pred.eq(target.view_as(pred)).sum().item() / args.batch_size
        train_accuracy += acc
        loss = args.criterion(output, target)
        train_loss += loss.item()
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAccuracy: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item(), acc,
            ))


def validate(args, model, device, val_loader):
    model.eval()
    val_loss = 0
    val_acc = 0
    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            val_loss += args.criterion(output, target).item()
            pred = output.argmax(dim=1, keepdim=True)
            val_acc += pred.eq(target.view_as(pred)).sum().item() / args.batch_size
    val_loss /= len(val_loader)
    val_acc /= len(val_loader)
    return val_loss, val_acc


def get_args():
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size', type=int, default=1024, metavar='N',
                        help='input batch size for testing (default: 1024)')
    parser.add_argument('--epochs', type=int, default=10, metavar='N',
                        help='number of epochs to train (default: 14)')
    parser.add_argument('--no-cuda', action='store_true', default=False,
                        help='disables CUDA training')
    parser.add_argument('--dry-run', action='store_true', default=False,
                        help='quickly check a single pass')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--log-interval', type=int, default=500, metavar='N',
                        help='how many batches to wait before logging training status')
    parser.add_argument('--criterion', default=nn.CrossEntropyLoss(),
                        help='loss function of training')
    parser.add_argument('--model-path', type=str, default='./models/LeNet_tune.pt',
                        help='directory to save model')
    parser.add_argument('--optimizer-name', type=str, default='Adam',
                        help='name of the optimier')
    args = parser.parse_args(args=[])
    return args
args = get_args()


def objective(trial):
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    model = LeNet().to(device)

    optimizer_name = args.optimizer_name
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    train_loader, val_loader, _ = create_dataloader()

    for epoch in range(args.epochs):
        train(args, model, device, train_loader, optimizer, epoch)
        _, val_acc = validate(args, model, device, val_loader)
        trial.report(val_acc, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return val_acc

In [7]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 6, 5, 1, 2),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(6, 16, 5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.fc1 = nn.Sequential(
            nn.Linear(16 * 5 * 5, 120),
            nn.BatchNorm1d(120),
            nn.ReLU()
        )
        self.fc2 = nn.Sequential(
            nn.Linear(120, 84),
            nn.BatchNorm1d(84),
            nn.ReLU(),
            nn.Linear(84, 10)
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size()[0], -1)
        x = self.fc1(x)
        x = self.fc2(x)
        return x


def create_dataloader(batch_size=64, valid_size=0.2, DIR='D:/datasets/'):
    # convert data to torch.FloatTensor
    transform = transforms.ToTensor()

    # choose the training and test datasets
    train_data = datasets.FashionMNIST(root=DIR,
                                train=True,
                                download=False,
                                transform=transform)
    test_data = datasets.FashionMNIST(root=DIR,
                               train=False,
                               download=False,
                               transform=transform)

    # obtain training indices that will be used for validation
    num_train = len(train_data)
    indices = list(range(num_train))
    np.random.shuffle(indices)
    split = int(np.floor(valid_size * num_train))
    train_idx, valid_idx = indices[split:], indices[:split]

    # define samplers for obtaining training and validation batches
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    # load training data in batches
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=batch_size,
                                               sampler=train_sampler,
                                               num_workers=0)

    # load validation data in batches
    valid_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=batch_size,
                                               sampler=valid_sampler,
                                               num_workers=0)

    # load test data in batches
    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=batch_size,
                                              num_workers=0)

    return train_loader, valid_loader, test_loader


def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    train_loss = 0
    train_accuracy = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        pred = output.argmax(dim=1, keepdim=True)
        acc = pred.eq(target.view_as(pred)).sum().item() / args.batch_size
        train_accuracy += acc
        loss = args.criterion(output, target)
        train_loss += loss.item()
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAccuracy: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item(), acc,
            ))


def validate(args, model, device, val_loader):
    model.eval()
    val_loss = 0
    val_acc = 0
    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            val_loss += args.criterion(output, target).item()
            pred = output.argmax(dim=1, keepdim=True)
            val_acc += pred.eq(target.view_as(pred)).sum().item() / args.batch_size
    val_loss /= len(val_loader)
    val_acc /= len(val_loader)
    return val_loss, val_acc


def get_args():
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size', type=int, default=1024, metavar='N',
                        help='input batch size for testing (default: 1024)')
    parser.add_argument('--epochs', type=int, default=10, metavar='N',
                        help='number of epochs to train (default: 14)')
    parser.add_argument('--no-cuda', action='store_true', default=False,
                        help='disables CUDA training')
    parser.add_argument('--dry-run', action='store_true', default=False,
                        help='quickly check a single pass')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--log-interval', type=int, default=500, metavar='N',
                        help='how many batches to wait before logging training status')
    parser.add_argument('--criterion', default=nn.CrossEntropyLoss(),
                        help='loss function of training')
    parser.add_argument('--model-path', type=str, default='./models/LeNet_tune.pt',
                        help='directory to save model')
    parser.add_argument('--optimizer-name', type=str, default='Adam',
                        help='name of the optimier')
    args = parser.parse_args(args=[])
    return args
args = get_args()


def objective(trial):
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    print('Using ', device)

    model = LeNet().to(device)

    optimizer_name = args.optimizer_name
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64, 128])
    args.batch_size = batch_size
    train_loader, val_loader, _ = create_dataloader(batch_size=args.batch_size)

    for epoch in range(args.epochs):
        train(args, model, device, train_loader, optimizer, epoch)
        _, val_acc = validate(args, model, device, val_loader)
        trial.report(val_acc, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return val_acc

In [20]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 6, 5, 1, 2),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(6, 16, 5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.fc1 = nn.Sequential(
            nn.Linear(16 * 5 * 5, 120),
            nn.BatchNorm1d(120),
            nn.ReLU()
        )
        self.fc2 = nn.Sequential(
            nn.Linear(120, 84),
            nn.BatchNorm1d(84),
            nn.ReLU(),
            nn.Linear(84, 10)
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size()[0], -1)
        x = self.fc1(x)
        x = self.fc2(x)
        return x


def create_dataloader(batch_size=64, valid_size=0.2, DIR='D:/datasets/'):
    # convert data to torch.FloatTensor
    transform = transforms.ToTensor()

    # choose the training and test datasets
    train_data = datasets.FashionMNIST(root=DIR,
                                train=True,
                                download=False,
                                transform=transform)
    test_data = datasets.FashionMNIST(root=DIR,
                               train=False,
                               download=False,
                               transform=transform)

    # obtain training indices that will be used for validation
    num_train = len(train_data)
    indices = list(range(num_train))
    np.random.shuffle(indices)
    split = int(np.floor(valid_size * num_train))
    train_idx, valid_idx = indices[split:], indices[:split]

    # define samplers for obtaining training and validation batches
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    # load training data in batches
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=batch_size,
                                               sampler=train_sampler,
                                               num_workers=0)

    # load validation data in batches
    valid_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=batch_size,
                                               sampler=valid_sampler,
                                               num_workers=0)

    # load test data in batches
    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=batch_size,
                                              num_workers=0)

    return train_loader, valid_loader, test_loader


def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    train_loss = 0
    train_accuracy = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        pred = output.argmax(dim=1, keepdim=True)
        acc = pred.eq(target.view_as(pred)).sum().item() / args.batch_size
        train_accuracy += acc
        loss = args.criterion(output, target)
        train_loss += loss.item()
        loss.backward()
        optimizer.step()


def validate(args, model, device, val_loader):
    model.eval()
    val_loss = 0
    val_acc = 0
    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            val_loss += args.criterion(output, target).item()
            pred = output.argmax(dim=1, keepdim=True)
            val_acc += pred.eq(target.view_as(pred)).sum().item() / args.batch_size
    val_loss /= len(val_loader)
    val_acc /= len(val_loader)
    return val_loss, val_acc


def get_args():
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size', type=int, default=1024, metavar='N',
                        help='input batch size for testing (default: 1024)')
    parser.add_argument('--epochs', type=int, default=5, metavar='N',
                        help='number of epochs to train (default: 14)')
    parser.add_argument('--no-cuda', action='store_true', default=False,
                        help='disables CUDA training')
    parser.add_argument('--dry-run', action='store_true', default=False,
                        help='quickly check a single pass')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--log-interval', type=int, default=100, metavar='N',
                        help='how many batches to wait before logging training status')
    parser.add_argument('--criterion', default=nn.CrossEntropyLoss(),
                        help='loss function of training')
    parser.add_argument('--model-path', type=str, default='./models/LeNet_tune.pt',
                        help='directory to save model')
    parser.add_argument('--optimizer-name', type=str, default='Adam',
                        help='name of the optimier')
    args = parser.parse_args(args=[])
    return args
args = get_args()


def objective(trial):
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    model = LeNet().to(device)

    optimizer_name = args.optimizer_name
    init_lr = trial.suggest_float("init_lr", 1e-5, 1e-1, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=init_lr)

    gamma = trial.suggest_float("gamma", 0.6, 1.0)
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=gamma)

    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64, 128])
    args.batch_size = batch_size
    train_loader, val_loader, _ = create_dataloader(batch_size=args.batch_size)

    for epoch in range(args.epochs):
        train(args, model, device, train_loader, optimizer, epoch)
        _, val_acc = validate(args, model, device, val_loader)
        print('epoch {} of {}: val_acc: {:.3f}'.format(epoch+1, args.epochs, val_acc))
        scheduler.step()
        trial.report(val_acc, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return val_acc

In [21]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print('Using ', device)

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100, timeout=600)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[32m[I 2022-04-20 03:55:03,215][0m A new study created in memory with name: no-name-f57fb6d4-67c0-4af6-bd4b-472bd3ac8337[0m


Using  cuda
epoch 1 of 5: val_acc: 0.862
epoch 2 of 5: val_acc: 0.883
epoch 3 of 5: val_acc: 0.891
epoch 4 of 5: val_acc: 0.893


[32m[I 2022-04-20 03:55:37,240][0m Trial 0 finished with value: 0.9005984042553191 and parameters: {'init_lr': 0.0015675612393006848, 'gamma': 0.8632463261350161, 'batch_size': 64}. Best is trial 0 with value: 0.9005984042553191.[0m


epoch 5 of 5: val_acc: 0.901
epoch 1 of 5: val_acc: 0.842
epoch 2 of 5: val_acc: 0.865
epoch 3 of 5: val_acc: 0.873
epoch 4 of 5: val_acc: 0.876


[32m[I 2022-04-20 03:56:04,751][0m Trial 1 finished with value: 0.8769115691489362 and parameters: {'init_lr': 0.00017235698661844536, 'gamma': 0.6320971021747585, 'batch_size': 128}. Best is trial 0 with value: 0.9005984042553191.[0m


epoch 5 of 5: val_acc: 0.877
epoch 1 of 5: val_acc: 0.810
epoch 2 of 5: val_acc: 0.842
epoch 3 of 5: val_acc: 0.853
epoch 4 of 5: val_acc: 0.862


[32m[I 2022-04-20 03:56:32,521][0m Trial 2 finished with value: 0.8617021276595744 and parameters: {'init_lr': 0.00012640888333802916, 'gamma': 0.7889946232985741, 'batch_size': 128}. Best is trial 0 with value: 0.9005984042553191.[0m


epoch 5 of 5: val_acc: 0.862
epoch 1 of 5: val_acc: 0.870
epoch 2 of 5: val_acc: 0.888
epoch 3 of 5: val_acc: 0.903
epoch 4 of 5: val_acc: 0.904


[32m[I 2022-04-20 03:57:43,437][0m Trial 3 finished with value: 0.9073333333333333 and parameters: {'init_lr': 0.0022713543306523225, 'gamma': 0.7310585347535046, 'batch_size': 16}. Best is trial 3 with value: 0.9073333333333333.[0m


epoch 5 of 5: val_acc: 0.907
epoch 1 of 5: val_acc: 0.678
epoch 2 of 5: val_acc: 0.725
epoch 3 of 5: val_acc: 0.745
epoch 4 of 5: val_acc: 0.764


[32m[I 2022-04-20 03:58:11,119][0m Trial 4 finished with value: 0.7802526595744681 and parameters: {'init_lr': 1.3915223146639235e-05, 'gamma': 0.9914475009914437, 'batch_size': 128}. Best is trial 3 with value: 0.9073333333333333.[0m


epoch 5 of 5: val_acc: 0.780


[32m[I 2022-04-20 03:58:25,120][0m Trial 5 pruned. [0m


epoch 1 of 5: val_acc: 0.750
epoch 1 of 5: val_acc: 0.885
epoch 2 of 5: val_acc: 0.891
epoch 3 of 5: val_acc: 0.902
epoch 4 of 5: val_acc: 0.901


[32m[I 2022-04-20 03:59:35,692][0m Trial 6 finished with value: 0.90925 and parameters: {'init_lr': 0.0009958190182511953, 'gamma': 0.9108069436282582, 'batch_size': 16}. Best is trial 6 with value: 0.90925.[0m


epoch 5 of 5: val_acc: 0.909


[32m[I 2022-04-20 03:59:49,856][0m Trial 7 pruned. [0m


epoch 1 of 5: val_acc: 0.849
epoch 1 of 5: val_acc: 0.859
epoch 2 of 5: val_acc: 0.879
epoch 3 of 5: val_acc: 0.885
epoch 4 of 5: val_acc: 0.892


[32m[I 2022-04-20 04:00:17,205][0m Trial 8 finished with value: 0.8966090425531915 and parameters: {'init_lr': 0.0004220662538479327, 'gamma': 0.6897490044381996, 'batch_size': 128}. Best is trial 6 with value: 0.90925.[0m


epoch 5 of 5: val_acc: 0.897


[32m[I 2022-04-20 04:00:26,391][0m Trial 9 pruned. [0m


epoch 1 of 5: val_acc: 0.838


[32m[I 2022-04-20 04:00:33,050][0m Trial 10 pruned. [0m


epoch 1 of 5: val_acc: 0.795
epoch 1 of 5: val_acc: 0.879
epoch 2 of 5: val_acc: 0.889
epoch 3 of 5: val_acc: 0.897
epoch 4 of 5: val_acc: 0.903


[32m[I 2022-04-20 04:01:43,651][0m Trial 11 finished with value: 0.90325 and parameters: {'init_lr': 0.002891405053932895, 'gamma': 0.791706043903631, 'batch_size': 16}. Best is trial 6 with value: 0.90925.[0m


epoch 5 of 5: val_acc: 0.903
epoch 1 of 5: val_acc: 0.872


[32m[I 2022-04-20 04:02:11,746][0m Trial 12 pruned. [0m


epoch 2 of 5: val_acc: 0.879
epoch 1 of 5: val_acc: 0.882
epoch 2 of 5: val_acc: 0.885
epoch 3 of 5: val_acc: 0.905
epoch 4 of 5: val_acc: 0.908


[32m[I 2022-04-20 04:03:21,652][0m Trial 13 finished with value: 0.9136666666666666 and parameters: {'init_lr': 0.0005805506600453946, 'gamma': 0.9299130064982555, 'batch_size': 16}. Best is trial 13 with value: 0.9136666666666666.[0m


epoch 5 of 5: val_acc: 0.914


[32m[I 2022-04-20 04:03:30,745][0m Trial 14 pruned. [0m


epoch 1 of 5: val_acc: 0.832
epoch 1 of 5: val_acc: 0.883
epoch 2 of 5: val_acc: 0.897
epoch 3 of 5: val_acc: 0.899
epoch 4 of 5: val_acc: 0.907


[32m[I 2022-04-20 04:04:40,743][0m Trial 15 finished with value: 0.9040833333333333 and parameters: {'init_lr': 0.0005218913501129159, 'gamma': 0.8379707182826234, 'batch_size': 16}. Best is trial 13 with value: 0.9136666666666666.[0m


epoch 5 of 5: val_acc: 0.904
epoch 1 of 5: val_acc: 0.878


[32m[I 2022-04-20 04:05:09,019][0m Trial 16 pruned. [0m


epoch 2 of 5: val_acc: 0.883
Study statistics: 
  Number of finished trials:  17
  Number of pruned trials:  7
  Number of complete trials:  10
Best trial:
  Value:  0.9136666666666666
  Params: 
    init_lr: 0.0005805506600453946
    gamma: 0.9299130064982555
    batch_size: 16


In [22]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print('Using ', device)

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[32m[I 2022-04-20 04:06:03,059][0m A new study created in memory with name: no-name-3c2423ea-313b-4b2b-ae4b-fe5c834b284a[0m


Using  cuda
epoch 1 of 5: val_acc: 0.806
epoch 2 of 5: val_acc: 0.840
epoch 3 of 5: val_acc: 0.857
epoch 4 of 5: val_acc: 0.864


[32m[I 2022-04-20 04:06:30,575][0m Trial 0 finished with value: 0.8686003989361702 and parameters: {'init_lr': 7.844292368759098e-05, 'gamma': 0.7793832683148582, 'batch_size': 128}. Best is trial 0 with value: 0.8686003989361702.[0m


epoch 5 of 5: val_acc: 0.869
epoch 1 of 5: val_acc: 0.867
epoch 2 of 5: val_acc: 0.891
epoch 3 of 5: val_acc: 0.888
epoch 4 of 5: val_acc: 0.895


[32m[I 2022-04-20 04:07:04,338][0m Trial 1 finished with value: 0.9031748670212766 and parameters: {'init_lr': 0.005537760222119039, 'gamma': 0.8160780692017976, 'batch_size': 64}. Best is trial 1 with value: 0.9031748670212766.[0m


epoch 5 of 5: val_acc: 0.903
epoch 1 of 5: val_acc: 0.875
epoch 2 of 5: val_acc: 0.886
epoch 3 of 5: val_acc: 0.899
epoch 4 of 5: val_acc: 0.901


[32m[I 2022-04-20 04:07:31,842][0m Trial 2 finished with value: 0.9053357712765957 and parameters: {'init_lr': 0.011992839317318791, 'gamma': 0.6355394493401878, 'batch_size': 128}. Best is trial 2 with value: 0.9053357712765957.[0m


epoch 5 of 5: val_acc: 0.905
epoch 1 of 5: val_acc: 0.870
epoch 2 of 5: val_acc: 0.884
epoch 3 of 5: val_acc: 0.901
epoch 4 of 5: val_acc: 0.904


[32m[I 2022-04-20 04:07:59,719][0m Trial 3 finished with value: 0.9103224734042553 and parameters: {'init_lr': 0.009707468231345402, 'gamma': 0.6330331359309136, 'batch_size': 128}. Best is trial 3 with value: 0.9103224734042553.[0m


epoch 5 of 5: val_acc: 0.910
epoch 1 of 5: val_acc: 0.821
epoch 2 of 5: val_acc: 0.855
epoch 3 of 5: val_acc: 0.866
epoch 4 of 5: val_acc: 0.872


[32m[I 2022-04-20 04:08:45,537][0m Trial 4 finished with value: 0.87575 and parameters: {'init_lr': 4.2152679808981527e-05, 'gamma': 0.8285488908770086, 'batch_size': 32}. Best is trial 3 with value: 0.9103224734042553.[0m


epoch 5 of 5: val_acc: 0.876


[32m[I 2022-04-20 04:08:51,051][0m Trial 5 pruned. [0m


epoch 1 of 5: val_acc: 0.845
epoch 1 of 5: val_acc: 0.870
epoch 2 of 5: val_acc: 0.892
epoch 3 of 5: val_acc: 0.897
epoch 4 of 5: val_acc: 0.902


[32m[I 2022-04-20 04:09:18,557][0m Trial 6 pruned. [0m


epoch 5 of 5: val_acc: 0.900


[32m[I 2022-04-20 04:09:23,968][0m Trial 7 pruned. [0m


epoch 1 of 5: val_acc: 0.772
epoch 1 of 5: val_acc: 0.877
epoch 2 of 5: val_acc: 0.891
epoch 3 of 5: val_acc: 0.899
epoch 4 of 5: val_acc: 0.912


[32m[I 2022-04-20 04:10:09,674][0m Trial 8 finished with value: 0.9135833333333333 and parameters: {'init_lr': 0.013716164454697791, 'gamma': 0.710050770582094, 'batch_size': 32}. Best is trial 8 with value: 0.9135833333333333.[0m


epoch 5 of 5: val_acc: 0.914


[32m[I 2022-04-20 04:10:18,978][0m Trial 9 pruned. [0m


epoch 1 of 5: val_acc: 0.858
epoch 1 of 5: val_acc: 0.879
epoch 2 of 5: val_acc: 0.896
epoch 3 of 5: val_acc: 0.897
epoch 4 of 5: val_acc: 0.908


[32m[I 2022-04-20 04:11:29,227][0m Trial 10 finished with value: 0.9093333333333333 and parameters: {'init_lr': 0.0005984846940131705, 'gamma': 0.6989958905749444, 'batch_size': 16}. Best is trial 8 with value: 0.9135833333333333.[0m


epoch 5 of 5: val_acc: 0.909


[32m[I 2022-04-20 04:11:38,541][0m Trial 11 pruned. [0m


epoch 1 of 5: val_acc: 0.764


[32m[I 2022-04-20 04:11:45,237][0m Trial 12 pruned. [0m


epoch 1 of 5: val_acc: 0.854


[32m[I 2022-04-20 04:11:59,242][0m Trial 13 pruned. [0m


epoch 1 of 5: val_acc: 0.830
epoch 1 of 5: val_acc: 0.879
epoch 2 of 5: val_acc: 0.887
epoch 3 of 5: val_acc: 0.903
epoch 4 of 5: val_acc: 0.908


[32m[I 2022-04-20 04:12:44,846][0m Trial 14 finished with value: 0.9089166666666667 and parameters: {'init_lr': 0.0018943908489336672, 'gamma': 0.6516712821534072, 'batch_size': 32}. Best is trial 8 with value: 0.9135833333333333.[0m


epoch 5 of 5: val_acc: 0.909


[32m[I 2022-04-20 04:12:54,084][0m Trial 15 pruned. [0m


epoch 1 of 5: val_acc: 0.853


[32m[I 2022-04-20 04:13:00,850][0m Trial 16 pruned. [0m


epoch 1 of 5: val_acc: 0.873


[32m[I 2022-04-20 04:13:14,961][0m Trial 17 pruned. [0m


epoch 1 of 5: val_acc: 0.768
epoch 1 of 5: val_acc: 0.874
epoch 2 of 5: val_acc: 0.894
epoch 3 of 5: val_acc: 0.902
epoch 4 of 5: val_acc: 0.905


[32m[I 2022-04-20 04:14:00,579][0m Trial 18 finished with value: 0.9088333333333334 and parameters: {'init_lr': 0.014133941194170468, 'gamma': 0.66337319348363, 'batch_size': 32}. Best is trial 8 with value: 0.9135833333333333.[0m


epoch 5 of 5: val_acc: 0.909


[32m[I 2022-04-20 04:14:06,029][0m Trial 19 pruned. [0m


epoch 1 of 5: val_acc: 0.854


[32m[I 2022-04-20 04:14:15,213][0m Trial 20 pruned. [0m


epoch 1 of 5: val_acc: 0.856


[32m[I 2022-04-20 04:14:29,256][0m Trial 21 pruned. [0m


epoch 1 of 5: val_acc: 0.868


[32m[I 2022-04-20 04:14:43,310][0m Trial 22 pruned. [0m


epoch 1 of 5: val_acc: 0.872
epoch 1 of 5: val_acc: 0.881
epoch 2 of 5: val_acc: 0.892
epoch 3 of 5: val_acc: 0.902
epoch 4 of 5: val_acc: 0.906


[32m[I 2022-04-20 04:15:53,487][0m Trial 23 pruned. [0m


epoch 5 of 5: val_acc: 0.908


[32m[I 2022-04-20 04:16:07,585][0m Trial 24 pruned. [0m


epoch 1 of 5: val_acc: 0.871


[32m[I 2022-04-20 04:16:21,513][0m Trial 25 pruned. [0m


epoch 1 of 5: val_acc: 0.862


[32m[I 2022-04-20 04:16:27,041][0m Trial 26 pruned. [0m


epoch 1 of 5: val_acc: 0.871


[32m[I 2022-04-20 04:16:33,860][0m Trial 27 pruned. [0m


epoch 1 of 5: val_acc: 0.862
epoch 1 of 5: val_acc: 0.881
epoch 2 of 5: val_acc: 0.895
epoch 3 of 5: val_acc: 0.902


[32m[I 2022-04-20 04:17:10,656][0m Trial 28 pruned. [0m


epoch 4 of 5: val_acc: 0.904


[32m[I 2022-04-20 04:17:16,309][0m Trial 29 pruned. [0m


epoch 1 of 5: val_acc: 0.862


[32m[I 2022-04-20 04:17:30,356][0m Trial 30 pruned. [0m


epoch 1 of 5: val_acc: 0.844


[32m[I 2022-04-20 04:17:39,507][0m Trial 31 pruned. [0m


epoch 1 of 5: val_acc: 0.862


[32m[I 2022-04-20 04:17:48,824][0m Trial 32 pruned. [0m


epoch 1 of 5: val_acc: 0.864


[32m[I 2022-04-20 04:17:57,955][0m Trial 33 pruned. [0m


epoch 1 of 5: val_acc: 0.863


[32m[I 2022-04-20 04:18:07,134][0m Trial 34 pruned. [0m


epoch 1 of 5: val_acc: 0.844


[32m[I 2022-04-20 04:18:12,678][0m Trial 35 pruned. [0m


epoch 1 of 5: val_acc: 0.856
epoch 1 of 5: val_acc: 0.879


[32m[I 2022-04-20 04:18:26,426][0m Trial 36 pruned. [0m


epoch 2 of 5: val_acc: 0.884


[32m[I 2022-04-20 04:18:31,971][0m Trial 37 pruned. [0m


epoch 1 of 5: val_acc: 0.821


[32m[I 2022-04-20 04:18:41,220][0m Trial 38 pruned. [0m


epoch 1 of 5: val_acc: 0.854


[32m[I 2022-04-20 04:18:46,811][0m Trial 39 pruned. [0m


epoch 1 of 5: val_acc: 0.850


[32m[I 2022-04-20 04:18:56,042][0m Trial 40 pruned. [0m


epoch 1 of 5: val_acc: 0.868


[32m[I 2022-04-20 04:19:05,226][0m Trial 41 pruned. [0m


epoch 1 of 5: val_acc: 0.865


[32m[I 2022-04-20 04:19:14,346][0m Trial 42 pruned. [0m


epoch 1 of 5: val_acc: 0.846


[32m[I 2022-04-20 04:19:23,568][0m Trial 43 pruned. [0m


epoch 1 of 5: val_acc: 0.861


[32m[I 2022-04-20 04:19:32,751][0m Trial 44 pruned. [0m


epoch 1 of 5: val_acc: 0.850
epoch 1 of 5: val_acc: 0.877


[32m[I 2022-04-20 04:19:50,938][0m Trial 45 pruned. [0m


epoch 2 of 5: val_acc: 0.880


[32m[I 2022-04-20 04:19:57,719][0m Trial 46 pruned. [0m


epoch 1 of 5: val_acc: 0.864


[32m[I 2022-04-20 04:20:03,106][0m Trial 47 pruned. [0m


epoch 1 of 5: val_acc: 0.869


[32m[I 2022-04-20 04:20:12,281][0m Trial 48 pruned. [0m


epoch 1 of 5: val_acc: 0.870


[32m[I 2022-04-20 04:20:26,233][0m Trial 49 pruned. [0m


epoch 1 of 5: val_acc: 0.804
epoch 1 of 5: val_acc: 0.883
epoch 2 of 5: val_acc: 0.898
epoch 3 of 5: val_acc: 0.908
epoch 4 of 5: val_acc: 0.904


[32m[I 2022-04-20 04:21:12,202][0m Trial 50 finished with value: 0.91525 and parameters: {'init_lr': 0.001380615733475873, 'gamma': 0.6750421181937485, 'batch_size': 32}. Best is trial 50 with value: 0.91525.[0m


epoch 5 of 5: val_acc: 0.915


[32m[I 2022-04-20 04:21:21,381][0m Trial 51 pruned. [0m


epoch 1 of 5: val_acc: 0.871


[32m[I 2022-04-20 04:21:30,646][0m Trial 52 pruned. [0m


epoch 1 of 5: val_acc: 0.869


[32m[I 2022-04-20 04:21:39,823][0m Trial 53 pruned. [0m


epoch 1 of 5: val_acc: 0.868
epoch 1 of 5: val_acc: 0.882
epoch 2 of 5: val_acc: 0.897
epoch 3 of 5: val_acc: 0.903
epoch 4 of 5: val_acc: 0.905


[32m[I 2022-04-20 04:22:25,508][0m Trial 54 pruned. [0m


epoch 5 of 5: val_acc: 0.907


[32m[I 2022-04-20 04:22:39,637][0m Trial 55 pruned. [0m


epoch 1 of 5: val_acc: 0.862


[32m[I 2022-04-20 04:22:45,128][0m Trial 56 pruned. [0m


epoch 1 of 5: val_acc: 0.828
epoch 1 of 5: val_acc: 0.879
epoch 2 of 5: val_acc: 0.895


[32m[I 2022-04-20 04:23:12,587][0m Trial 57 pruned. [0m


epoch 3 of 5: val_acc: 0.899


[32m[I 2022-04-20 04:23:26,605][0m Trial 58 pruned. [0m


epoch 1 of 5: val_acc: 0.866
epoch 1 of 5: val_acc: 0.892
epoch 2 of 5: val_acc: 0.891


[32m[I 2022-04-20 04:23:54,250][0m Trial 59 pruned. [0m


epoch 3 of 5: val_acc: 0.899


[32m[I 2022-04-20 04:24:01,135][0m Trial 60 pruned. [0m


epoch 1 of 5: val_acc: 0.863


[32m[I 2022-04-20 04:24:06,580][0m Trial 61 pruned. [0m


epoch 1 of 5: val_acc: 0.834


[32m[I 2022-04-20 04:24:12,093][0m Trial 62 pruned. [0m


epoch 1 of 5: val_acc: 0.866


[32m[I 2022-04-20 04:24:17,624][0m Trial 63 pruned. [0m


epoch 1 of 5: val_acc: 0.861


[32m[I 2022-04-20 04:24:23,021][0m Trial 64 pruned. [0m


epoch 1 of 5: val_acc: 0.868


[32m[I 2022-04-20 04:24:37,303][0m Trial 65 pruned. [0m


epoch 1 of 5: val_acc: 0.839


[32m[I 2022-04-20 04:24:42,840][0m Trial 66 pruned. [0m


epoch 1 of 5: val_acc: 0.812


[32m[I 2022-04-20 04:24:51,987][0m Trial 67 pruned. [0m


epoch 1 of 5: val_acc: 0.870
epoch 1 of 5: val_acc: 0.878
epoch 2 of 5: val_acc: 0.891
epoch 3 of 5: val_acc: 0.902
epoch 4 of 5: val_acc: 0.905


[32m[I 2022-04-20 04:26:02,042][0m Trial 68 pruned. [0m


epoch 5 of 5: val_acc: 0.906


[32m[I 2022-04-20 04:26:07,605][0m Trial 69 pruned. [0m


epoch 1 of 5: val_acc: 0.854
epoch 1 of 5: val_acc: 0.876
epoch 2 of 5: val_acc: 0.889


[32m[I 2022-04-20 04:26:35,152][0m Trial 70 pruned. [0m


epoch 3 of 5: val_acc: 0.892


[32m[I 2022-04-20 04:26:41,848][0m Trial 71 pruned. [0m


epoch 1 of 5: val_acc: 0.870


[32m[I 2022-04-20 04:26:48,585][0m Trial 72 pruned. [0m


epoch 1 of 5: val_acc: 0.873


[32m[I 2022-04-20 04:26:55,376][0m Trial 73 pruned. [0m


epoch 1 of 5: val_acc: 0.848


[32m[I 2022-04-20 04:27:02,053][0m Trial 74 pruned. [0m


epoch 1 of 5: val_acc: 0.870


[32m[I 2022-04-20 04:27:11,261][0m Trial 75 pruned. [0m


epoch 1 of 5: val_acc: 0.874


[32m[I 2022-04-20 04:27:17,920][0m Trial 76 pruned. [0m


epoch 1 of 5: val_acc: 0.869


[32m[I 2022-04-20 04:27:23,525][0m Trial 77 pruned. [0m


epoch 1 of 5: val_acc: 0.860


[32m[I 2022-04-20 04:27:32,749][0m Trial 78 pruned. [0m


epoch 1 of 5: val_acc: 0.858


[32m[I 2022-04-20 04:27:46,814][0m Trial 79 pruned. [0m


epoch 1 of 5: val_acc: 0.845


[32m[I 2022-04-20 04:27:55,908][0m Trial 80 pruned. [0m


epoch 1 of 5: val_acc: 0.875
epoch 1 of 5: val_acc: 0.886


[32m[I 2022-04-20 04:28:14,318][0m Trial 81 pruned. [0m


epoch 2 of 5: val_acc: 0.885


[32m[I 2022-04-20 04:28:23,514][0m Trial 82 pruned. [0m


epoch 1 of 5: val_acc: 0.763


[32m[I 2022-04-20 04:28:32,757][0m Trial 83 pruned. [0m


epoch 1 of 5: val_acc: 0.841


[32m[I 2022-04-20 04:28:41,868][0m Trial 84 pruned. [0m


epoch 1 of 5: val_acc: 0.805


[32m[I 2022-04-20 04:28:51,067][0m Trial 85 pruned. [0m


epoch 1 of 5: val_acc: 0.838


[32m[I 2022-04-20 04:28:56,577][0m Trial 86 pruned. [0m


epoch 1 of 5: val_acc: 0.820


[32m[I 2022-04-20 04:29:05,849][0m Trial 87 pruned. [0m


epoch 1 of 5: val_acc: 0.863


[32m[I 2022-04-20 04:29:20,141][0m Trial 88 pruned. [0m


epoch 1 of 5: val_acc: 0.855


[32m[I 2022-04-20 04:29:26,862][0m Trial 89 pruned. [0m


epoch 1 of 5: val_acc: 0.857


[32m[I 2022-04-20 04:29:36,094][0m Trial 90 pruned. [0m


epoch 1 of 5: val_acc: 0.867


[32m[I 2022-04-20 04:29:41,522][0m Trial 91 pruned. [0m


epoch 1 of 5: val_acc: 0.753


[32m[I 2022-04-20 04:29:47,010][0m Trial 92 pruned. [0m


epoch 1 of 5: val_acc: 0.777


[32m[I 2022-04-20 04:29:52,635][0m Trial 93 pruned. [0m


epoch 1 of 5: val_acc: 0.740


[32m[I 2022-04-20 04:29:58,148][0m Trial 94 pruned. [0m


epoch 1 of 5: val_acc: 0.843


[32m[I 2022-04-20 04:30:03,739][0m Trial 95 pruned. [0m


epoch 1 of 5: val_acc: 0.805


[32m[I 2022-04-20 04:30:13,002][0m Trial 96 pruned. [0m


epoch 1 of 5: val_acc: 0.734


[32m[I 2022-04-20 04:30:27,115][0m Trial 97 pruned. [0m


epoch 1 of 5: val_acc: 0.869


[32m[I 2022-04-20 04:30:32,712][0m Trial 98 pruned. [0m


epoch 1 of 5: val_acc: 0.758


[32m[I 2022-04-20 04:30:41,837][0m Trial 99 pruned. [0m


epoch 1 of 5: val_acc: 0.873
Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  90
  Number of complete trials:  10
Best trial:
  Value:  0.91525
  Params: 
    init_lr: 0.001380615733475873
    gamma: 0.6750421181937485
    batch_size: 32
