# Comparison of the efficiency of the MIHA algorithm with the Optuna framework

In [18]:
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torchvision import datasets
from torchvision import transforms

import optuna

from matplotlib import pyplot as plt
from matplotlib import cm

import warnings
warnings.filterwarnings("ignore")

## Model obtained using the MIHA algorithm

The number of epochs produced that it took to train the model is 176.

In [38]:
from torch import jit

net = jit.load('D:/miha_exp/mnist/4/model/model_final.zip')
state = torch.load('D:/miha_exp/mnist/4/model/optimizer_final.pth')

# Uploading 10 000 MNIST objects for training
X_train = torch.load('D:/ITMO/Neural_network_example/mnist/X_train.pt')
Y_train = torch.load('D:/ITMO/Neural_network_example/mnist/Y_train.pt')

# And for testing
X_test = torch.load('D:/ITMO/Neural_network_example/mnist/X_test.pt')
Y_test = torch.load('D:/ITMO/Neural_network_example/mnist/Y_test.pt')

# We check accuracy on 500 objects
X_test = X_test[:500, :]
Y_test = Y_test[:500, ]

train = torch.utils.data.TensorDataset(X_train, Y_train)
test = torch.utils.data.TensorDataset(X_test, Y_test)

# Prepare data loaders
train_loader = torch.utils.data.DataLoader(train, batch_size=20, num_workers=0)
test_loader = torch.utils.data.DataLoader(test, batch_size=20, num_workers=0)

Let's check the accuracy of the model on the test dataset.

In [39]:
def validation(net, test_loader):
    net.eval()
    
    true_labels = []
    predicted_labels = []
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            # The probability of assignment to classes
            output = net(data)
            # Predicted labels
            pred = output.argmax(dim=1, keepdim=False)
            
            # Number of correctly predicted class labels
            correct += pred.eq(target.view_as(pred)).sum().item()
            
            # Transform to lists
            target = list(map(int, target))
            pred = list(map(int, pred))
            
            true_labels.extend(list(target))
            predicted_labels.extend(list(pred))

        accuracy = correct / len(test_loader.dataset)
   
    print(f'Accuracy - {accuracy}')
    print(f'{correct} labels out of {len(test_loader.dataset)} objects were correctly assigned')
    
validation(net = net, test_loader = test_loader)

Accuracy - 0.974
487 labels out of 500 objects were correctly assigned


Thus, the prepared neural network made a mistake only on 13 objects.

## Feedforward neural network (FNN) which was optimized by Optuna 

Optimization using the optuna framework. The code below is a modified version of the official example, see [here](https://github.com/optuna/optuna/blob/master/examples/pytorch_simple.py).

In [9]:
DEVICE = torch.device("cpu")
BATCHSIZE = 128
CLASSES = 10
DIR = os.getcwd()
EPOCHS = 10
LOG_INTERVAL = 10
test_accuracy = 0

def define_model(trial):
    
    # We optimize the number of layers, hidden units and dropout ratio in each layer.
    n_layers = trial.suggest_int("n_layers", 1, 3)
    layers = []

    in_features = 28 * 28
    for i in range(n_layers):
        out_features = trial.suggest_int("n_units_l{}".format(i), 4, 500)
        layers.append(nn.Linear(in_features, out_features))
        layers.append(nn.ReLU())
        p = trial.suggest_float("dropout_l{}".format(i), 0.2, 0.5)
        layers.append(nn.Dropout(p))

        in_features = out_features
    layers.append(nn.Linear(in_features, CLASSES))
    layers.append(nn.LogSoftmax(dim=1))
    
    return nn.Sequential(*layers)

def objective(trial):
    global test_accuracy

    produced_epochs = 0
    
    # Generate the model.
    model = define_model(trial).to(DEVICE)

    # Generate the optimizers.
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    # Training of the model.
    for epoch in range(EPOCHS):
        correct = 0
        model.train()
        for data, target in train_loader:
            optimizer.zero_grad()
            output = model(data)
            
            pred = output.argmax(dim=1, keepdim=False)
            correct += pred.eq(target.view_as(pred)).sum().item()
                
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()
        
        train_accuracy = correct / len(train_loader.dataset)
        produced_epochs += 1
        
        # Validation of the model
        model.eval()
        correct = 0
        with torch.no_grad():
            for data, target in test_loader:
                output = model(data)
                pred = output.argmax(dim=1, keepdim=False)
                correct += pred.eq(target.view_as(pred)).sum().item()

        new_test_accuracy = correct / len(test_loader.dataset)
        
        # Update test accuracy
        if new_test_accuracy > test_accuracy:
            test_accuracy = new_test_accuracy
        
        trial.report(train_accuracy, epoch)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    
    print(f'Number of epochs produced {produced_epochs}')
    return train_accuracy

def make_test_validation(best_model):
    
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            # Вероятности отнесения к классам
            output = best_model(data)
            # Предсказанные метки
            pred = output.argmax(dim=1, keepdim=False)
            
            # Количество правильно предсказанных меток класса
            correct += pred.eq(target.view_as(pred)).sum().item()

        accuracy = correct / len(test_loader.dataset)
   
    print(f'Accuracy - {accuracy}')
    print(f'{correct} labels out of {len(test_loader.dataset)} objects were correctly assigned')
    

if __name__ == "__main__":
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=50, timeout=600)

    pruned_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED]
    complete_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]

    print("Study statistics: ")
    print("  Number of finished trials: ", len(study.trials))
    print("  Number of pruned trials: ", len(pruned_trials))
    print("  Number of complete trials: ", len(complete_trials))

    print("Best trial:")
    trial = study.best_trial
    
    print("  Value: ", trial.value)

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))
        
    print(f'\nBest model accuracy {test_accuracy}')

[32m[I 2021-01-15 22:32:49,129][0m A new study created in memory with name: no-name-9a9f59f3-225b-4267-a0fe-660d351169b6[0m
[32m[I 2021-01-15 22:33:07,805][0m Trial 0 finished with value: 0.9098 and parameters: {'n_layers': 3, 'n_units_l0': 429, 'dropout_l0': 0.41084891219713937, 'n_units_l1': 85, 'dropout_l1': 0.22274557247090618, 'n_units_l2': 276, 'dropout_l2': 0.25610214111637236, 'optimizer': 'Adam', 'lr': 5.736254519743909e-05}. Best is trial 0 with value: 0.9098.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:33:16,182][0m Trial 1 finished with value: 0.2741 and parameters: {'n_layers': 3, 'n_units_l0': 64, 'dropout_l0': 0.2297348311081977, 'n_units_l1': 328, 'dropout_l1': 0.33050124281172644, 'n_units_l2': 483, 'dropout_l2': 0.3402349211188188, 'optimizer': 'SGD', 'lr': 0.0007298508269815263}. Best is trial 0 with value: 0.9098.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:33:19,942][0m Trial 2 finished with value: 0.3112 and parameters: {'n_layers': 1, 'n_units_l0': 94, 'dropout_l0': 0.34956241407144784, 'optimizer': 'SGD', 'lr': 8.472899768329822e-05}. Best is trial 0 with value: 0.9098.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:33:24,798][0m Trial 3 finished with value: 0.4984 and parameters: {'n_layers': 1, 'n_units_l0': 12, 'dropout_l0': 0.4304600844849822, 'optimizer': 'Adam', 'lr': 2.5603264038579248e-05}. Best is trial 0 with value: 0.9098.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:33:35,963][0m Trial 4 finished with value: 0.7508 and parameters: {'n_layers': 1, 'n_units_l0': 201, 'dropout_l0': 0.30385189404091856, 'optimizer': 'Adam', 'lr': 0.03264575763193049}. Best is trial 0 with value: 0.9098.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:33:53,481][0m Trial 5 finished with value: 0.9667 and parameters: {'n_layers': 1, 'n_units_l0': 401, 'dropout_l0': 0.48907092040477557, 'optimizer': 'Adam', 'lr': 0.002289667742601567}. Best is trial 5 with value: 0.9667.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:34:00,360][0m Trial 6 finished with value: 0.8905 and parameters: {'n_layers': 2, 'n_units_l0': 200, 'dropout_l0': 0.21142294234329867, 'n_units_l1': 206, 'dropout_l1': 0.40480955681719555, 'optimizer': 'SGD', 'lr': 0.00681814627138274}. Best is trial 5 with value: 0.9667.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:34:01,248][0m Trial 7 pruned. [0m
[32m[I 2021-01-15 22:34:11,895][0m Trial 8 finished with value: 0.9609 and parameters: {'n_layers': 2, 'n_units_l0': 253, 'dropout_l0': 0.43186054230457743, 'n_units_l1': 289, 'dropout_l1': 0.4670186860605348, 'optimizer': 'RMSprop', 'lr': 0.0015178778941547532}. Best is trial 5 with value: 0.9667.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:34:18,013][0m Trial 9 finished with value: 0.9238 and parameters: {'n_layers': 1, 'n_units_l0': 50, 'dropout_l0': 0.43025507477368063, 'optimizer': 'Adam', 'lr': 0.0012490237418554724}. Best is trial 5 with value: 0.9667.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:34:19,424][0m Trial 10 pruned. [0m
[32m[I 2021-01-15 22:34:35,858][0m Trial 11 finished with value: 0.9666 and parameters: {'n_layers': 2, 'n_units_l0': 352, 'dropout_l0': 0.49253925195981973, 'n_units_l1': 487, 'dropout_l1': 0.4950896635134939, 'optimizer': 'RMSprop', 'lr': 0.0009767757008174829}. Best is trial 5 with value: 0.9667.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:34:53,208][0m Trial 12 finished with value: 0.9716 and parameters: {'n_layers': 2, 'n_units_l0': 373, 'dropout_l0': 0.4992637990091811, 'n_units_l1': 472, 'dropout_l1': 0.4018178348296462, 'optimizer': 'RMSprop', 'lr': 0.00033409727928419814}. Best is trial 12 with value: 0.9716.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:35:09,474][0m Trial 13 finished with value: 0.9676 and parameters: {'n_layers': 2, 'n_units_l0': 365, 'dropout_l0': 0.4990610086754125, 'n_units_l1': 498, 'dropout_l1': 0.3748502550427031, 'optimizer': 'RMSprop', 'lr': 0.0002067646445850057}. Best is trial 12 with value: 0.9716.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:35:21,099][0m Trial 14 finished with value: 0.9658 and parameters: {'n_layers': 2, 'n_units_l0': 315, 'dropout_l0': 0.46775274724032295, 'n_units_l1': 497, 'dropout_l1': 0.3677141381615474, 'optimizer': 'RMSprop', 'lr': 0.00021647237048110147}. Best is trial 12 with value: 0.9716.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:35:33,803][0m Trial 15 finished with value: 0.9767 and parameters: {'n_layers': 2, 'n_units_l0': 316, 'dropout_l0': 0.3656487254629422, 'n_units_l1': 428, 'dropout_l1': 0.4172312904515172, 'optimizer': 'RMSprop', 'lr': 0.0003004739095190486}. Best is trial 15 with value: 0.9767.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:35:35,098][0m Trial 16 pruned. [0m
[32m[I 2021-01-15 22:35:37,205][0m Trial 17 pruned. [0m
[32m[I 2021-01-15 22:35:38,198][0m Trial 18 pruned. [0m
[32m[I 2021-01-15 22:35:39,837][0m Trial 19 pruned. [0m
[32m[I 2021-01-15 22:35:57,779][0m Trial 20 finished with value: 0.9902 and parameters: {'n_layers': 2, 'n_units_l0': 413, 'dropout_l0': 0.24735187090626332, 'n_units_l1': 341, 'dropout_l1': 0.40304196331640596, 'optimizer': 'RMSprop', 'lr': 0.000482214510766566}. Best is trial 20 with value: 0.9902.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:36:13,972][0m Trial 21 finished with value: 0.9859 and parameters: {'n_layers': 2, 'n_units_l0': 388, 'dropout_l0': 0.2547200785334911, 'n_units_l1': 340, 'dropout_l1': 0.4200802882142507, 'optimizer': 'RMSprop', 'lr': 0.0003436797847966903}. Best is trial 20 with value: 0.9902.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:36:31,165][0m Trial 22 finished with value: 0.9901 and parameters: {'n_layers': 2, 'n_units_l0': 432, 'dropout_l0': 0.2520282086621887, 'n_units_l1': 351, 'dropout_l1': 0.4483788219884629, 'optimizer': 'RMSprop', 'lr': 0.0005839076043443495}. Best is trial 20 with value: 0.9902.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:36:47,990][0m Trial 23 finished with value: 0.988 and parameters: {'n_layers': 2, 'n_units_l0': 446, 'dropout_l0': 0.2505264936195003, 'n_units_l1': 340, 'dropout_l1': 0.4630761146981533, 'optimizer': 'RMSprop', 'lr': 0.0005082659723707941}. Best is trial 20 with value: 0.9902.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:36:57,893][0m Trial 24 pruned. [0m
[32m[I 2021-01-15 22:37:15,698][0m Trial 25 finished with value: 0.9903 and parameters: {'n_layers': 2, 'n_units_l0': 497, 'dropout_l0': 0.20944805967030697, 'n_units_l1': 352, 'dropout_l1': 0.45447755536395607, 'optimizer': 'RMSprop', 'lr': 0.0005672680824260684}. Best is trial 25 with value: 0.9903.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:37:18,183][0m Trial 26 pruned. [0m
[32m[I 2021-01-15 22:37:38,300][0m Trial 27 finished with value: 0.9821 and parameters: {'n_layers': 3, 'n_units_l0': 493, 'dropout_l0': 0.2870948081280017, 'n_units_l1': 362, 'dropout_l1': 0.44721044676581945, 'n_units_l2': 138, 'dropout_l2': 0.21421046912769587, 'optimizer': 'RMSprop', 'lr': 0.0008151706024498281}. Best is trial 25 with value: 0.9903.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:37:39,740][0m Trial 28 pruned. [0m
[32m[I 2021-01-15 22:37:41,622][0m Trial 29 pruned. [0m
[32m[I 2021-01-15 22:37:57,983][0m Trial 30 finished with value: 0.9797 and parameters: {'n_layers': 2, 'n_units_l0': 486, 'dropout_l0': 0.20049414532674953, 'n_units_l1': 241, 'dropout_l1': 0.3946946489944929, 'optimizer': 'RMSprop', 'lr': 0.0020629250991577347}. Best is trial 25 with value: 0.9903.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:38:14,546][0m Trial 31 finished with value: 0.9862 and parameters: {'n_layers': 2, 'n_units_l0': 446, 'dropout_l0': 0.26670926246462545, 'n_units_l1': 358, 'dropout_l1': 0.47082010248726575, 'optimizer': 'RMSprop', 'lr': 0.0006036911748963702}. Best is trial 25 with value: 0.9903.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:38:30,342][0m Trial 32 finished with value: 0.9889 and parameters: {'n_layers': 2, 'n_units_l0': 441, 'dropout_l0': 0.23499764952026672, 'n_units_l1': 312, 'dropout_l1': 0.45624965208772844, 'optimizer': 'RMSprop', 'lr': 0.00048783396548309524}. Best is trial 25 with value: 0.9903.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:38:31,418][0m Trial 33 pruned. [0m
[32m[I 2021-01-15 22:38:33,054][0m Trial 34 pruned. [0m
[32m[I 2021-01-15 22:38:43,579][0m Trial 35 finished with value: 0.9886 and parameters: {'n_layers': 1, 'n_units_l0': 342, 'dropout_l0': 0.28237641899177496, 'optimizer': 'RMSprop', 'lr': 0.0012068179395898033}. Best is trial 25 with value: 0.9903.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:38:45,910][0m Trial 36 pruned. [0m
[32m[I 2021-01-15 22:38:46,715][0m Trial 37 pruned. [0m
[32m[I 2021-01-15 22:39:14,879][0m Trial 38 finished with value: 0.976 and parameters: {'n_layers': 2, 'n_units_l0': 500, 'dropout_l0': 0.22266946424950834, 'n_units_l1': 374, 'dropout_l1': 0.2025402841989105, 'optimizer': 'Adam', 'lr': 0.0024268409578525846}. Best is trial 25 with value: 0.9903.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:39:16,033][0m Trial 39 pruned. [0m
[32m[I 2021-01-15 22:39:17,134][0m Trial 40 pruned. [0m
[32m[I 2021-01-15 22:39:27,787][0m Trial 41 finished with value: 0.9901 and parameters: {'n_layers': 1, 'n_units_l0': 364, 'dropout_l0': 0.28194019922838814, 'optimizer': 'RMSprop', 'lr': 0.0012912870826014638}. Best is trial 25 with value: 0.9903.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:39:38,612][0m Trial 42 finished with value: 0.9886 and parameters: {'n_layers': 1, 'n_units_l0': 383, 'dropout_l0': 0.23746727010983942, 'optimizer': 'RMSprop', 'lr': 0.0015885145666954168}. Best is trial 25 with value: 0.9903.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:39:52,935][0m Trial 43 finished with value: 0.9881 and parameters: {'n_layers': 2, 'n_units_l0': 340, 'dropout_l0': 0.21262723660429034, 'n_units_l1': 285, 'dropout_l1': 0.413580900803986, 'optimizer': 'RMSprop', 'lr': 0.0009089464538772671}. Best is trial 25 with value: 0.9903.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:39:54,090][0m Trial 44 pruned. [0m
[32m[I 2021-01-15 22:39:55,561][0m Trial 45 pruned. [0m
[32m[I 2021-01-15 22:40:10,774][0m Trial 46 finished with value: 0.9883 and parameters: {'n_layers': 2, 'n_units_l0': 369, 'dropout_l0': 0.2948109413321321, 'n_units_l1': 461, 'dropout_l1': 0.3561109397610883, 'optimizer': 'RMSprop', 'lr': 0.0007586623004363177}. Best is trial 25 with value: 0.9903.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:40:25,314][0m Trial 47 finished with value: 0.9877 and parameters: {'n_layers': 1, 'n_units_l0': 393, 'dropout_l0': 0.2631760709804497, 'optimizer': 'Adam', 'lr': 0.0015300896321639996}. Best is trial 25 with value: 0.9903.[0m


Number of epochs produced 10


[32m[I 2021-01-15 22:40:27,059][0m Trial 48 pruned. [0m
[32m[I 2021-01-15 22:40:28,669][0m Trial 49 pruned. [0m


Study statistics: 
  Number of finished trials:  50
  Number of pruned trials:  20
  Number of complete trials:  30
Best trial:
  Value:  0.9903
  Params: 
    n_layers: 2
    n_units_l0: 497
    dropout_l0: 0.20944805967030697
    n_units_l1: 352
    dropout_l1: 0.45447755536395607
    optimizer: RMSprop
    lr: 0.0005672680824260684

Best model accuracy 0.976


#### As you can see, the model obtained using the MIHA algorithm gave a similar result (accuracy 0.974) to the neural network optimized using Optuna (accuracy 0.976)

But the repository with Optuna has several thousand stars, and this library has none, change it (We really tried very hard)!