# Tuning Results Evaluation

## Imports

In [241]:
%load_ext autoreload
%autoreload 2

import pandas as pd
from matplotlib import pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import numpy as np

import json
import glob
import random
import pickle

import torch
import torchvision
import torchvision.transforms as transforms

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch import nn
import torch.nn.functional as F
import spotPython.torch.netcore as netcore
import numpy as np

from src.eda import EDA
eda = EDA()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [164]:
torch.manual_seed(40)
random.seed(40)
np.random.seed(40)

## Data Exploration
- [bar plot] class distribution (train, test)
- [foto grid] random sample per class (train)
- [rotating plot] pca analysis (train)
- [bar plot] black pixel per class (train, test) 

## Data Preparation (Tuning Logs)
- raytune json to pandas df
- spot tf to pandas df

### Utils

In [234]:
class FashionCNN(nn.Module):
    def __init__(self, l1=64):
        super(FashionCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 7 * 7, l1)
        self.fc2 = nn.Linear(l1, 10)

    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
class Net_FashionMNIST(netcore.Net_Core):
    def __init__(self, l1, lr_mult, batch_size, epochs, k_folds, patience,
    optimizer, sgd_momentum):
        super(Net_FashionMNIST, self).__init__(
            lr_mult=lr_mult,
            batch_size=batch_size,
            epochs=epochs,
            k_folds=k_folds,
            patience=patience,
            optimizer=optimizer,
            sgd_momentum=sgd_momentum,
        )

        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 7 * 7, l1)
        self.fc2 = nn.Linear(l1, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [186]:
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

def load_data(data_dir="./data"):
    transform = transforms.Compose(
        [transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))])

    trainset = torchvision.datasets.FashionMNIST(data_dir,
        download=True,
        train=True,
        transform=transform)
    testset = torchvision.datasets.FashionMNIST(data_dir,
        download=True,
        train=False,
        transform=transform)

    return trainset, testset

def train_fashion_mnist(config):
    net = FashionCNN(config["config.l1"]) 

    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)
    net.to(device)

    # # loading data
    trainset, testset = load_data()
    
    trainloader = torch.utils.data.DataLoader(
        trainset, batch_size=config["config.batch_size"], shuffle=True, num_workers=2
    )

    # defining the loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=config["config.learning_rate"])

    for epoch in range(config["config.epochs"]):
        running_loss = 0.0
        epoch_steps = 0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            epoch_steps += 1

            if i % 2000 == 1999:
                print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1, running_loss / epoch_steps))
                running_loss = 0.0

    print("Training finished.")
    return net

def test_accuracy(net):
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)
    net.to(device)

    trainset, testset = load_data()

    testloader = torch.utils.data.DataLoader(
        testset, batch_size=4, shuffle=False, num_workers=2
    )

    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return correct / total

def test_class_accuracy(net):
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)
    net.to(device)

    trainset, testset = load_data()

    testloader = torch.utils.data.DataLoader(
        testset, batch_size=4, shuffle=False, num_workers=2
    )

    correct_per_class = [0] * len(class_names)
    total_per_class = [0] * len(class_names)
    correct = 0
    total = 0

    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            
            for i in range(len(labels)):
                label = labels[i]
                prediction = predicted[i]
                if label == prediction:
                    correct_per_class[label] += 1
                total_per_class[label] += 1
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy_per_class = [correct / total * 100 if total > 0 else 0 for correct, total in zip(correct_per_class, total_per_class)]

    for i, accuracy in enumerate(accuracy_per_class):
        print('Accuracy of {}: {:.2f}%'.format(class_names[i], accuracy))
        
    accuracy = correct / total
    print('Overall Accuracy: {:.2f}%'.format(accuracy * 100))
    return accuracy_per_class, accuracy

In [231]:
def experimentPathToDataframe(experiment_path):
    df_results = pd.DataFrame()
    for file in glob.iglob(experiment_path + '/**/result.json', recursive=True):
        print(f"Loading results from {experiment_path}...")

        df = pd.read_json(file,lines=True)
        data = json.loads(df.to_json(orient='records'))
        df = pd.json_normalize(data, max_level=1)
        df_results = pd.concat([df_results, df], ignore_index=True)

    print(f"----------------------------\nFound {len(df_results.trial_id.unique())} trials.")
    return df_results

def experimentPathToSpotTuner(experiment_path):
    with open(experiment_path, 'rb') as f:
        spot_tuner =  pickle.load(f)
    return spot_tuner

In [79]:
e1 = f"experiment_data/e1_L1Units/train_fashion_mnist_2023-08-22_09-47-02"
df_e1 = experimentPathToDataframe(e1)

Loading results from experiment_data/e1_L1Units/train_fashion_mnist_2023-08-22_09-47-02...
Loading results from experiment_data/e1_L1Units/train_fashion_mnist_2023-08-22_09-47-02...
Loading results from experiment_data/e1_L1Units/train_fashion_mnist_2023-08-22_09-47-02...
Loading results from experiment_data/e1_L1Units/train_fashion_mnist_2023-08-22_09-47-02...
----------------------------
Found 4 trials.


In [80]:
e2 = f"experiment_data/e2_L1Units_BatchSize/train_fashion_mnist_2023-08-22_11-02-54/"
df_e2 = experimentPathToDataframe(e2)

Loading results from experiment_data/e2_L1Units_BatchSize/train_fashion_mnist_2023-08-22_11-02-54/...
Loading results from experiment_data/e2_L1Units_BatchSize/train_fashion_mnist_2023-08-22_11-02-54/...
Loading results from experiment_data/e2_L1Units_BatchSize/train_fashion_mnist_2023-08-22_11-02-54/...
Loading results from experiment_data/e2_L1Units_BatchSize/train_fashion_mnist_2023-08-22_11-02-54/...
Loading results from experiment_data/e2_L1Units_BatchSize/train_fashion_mnist_2023-08-22_11-02-54/...
Loading results from experiment_data/e2_L1Units_BatchSize/train_fashion_mnist_2023-08-22_11-02-54/...
Loading results from experiment_data/e2_L1Units_BatchSize/train_fashion_mnist_2023-08-22_11-02-54/...
Loading results from experiment_data/e2_L1Units_BatchSize/train_fashion_mnist_2023-08-22_11-02-54/...
Loading results from experiment_data/e2_L1Units_BatchSize/train_fashion_mnist_2023-08-22_11-02-54/...
Loading results from experiment_data/e2_L1Units_BatchSize/train_fashion_mnist_2023

In [81]:
e3 = f"experiment_data/e3_Epochs_BatchSize/train_fashion_mnist_2023-08-22_12-06-06/"
df_e3 = experimentPathToDataframe(e3)

Loading results from experiment_data/e3_Epochs_BatchSize/train_fashion_mnist_2023-08-22_12-06-06/...
Loading results from experiment_data/e3_Epochs_BatchSize/train_fashion_mnist_2023-08-22_12-06-06/...
Loading results from experiment_data/e3_Epochs_BatchSize/train_fashion_mnist_2023-08-22_12-06-06/...
Loading results from experiment_data/e3_Epochs_BatchSize/train_fashion_mnist_2023-08-22_12-06-06/...
Loading results from experiment_data/e3_Epochs_BatchSize/train_fashion_mnist_2023-08-22_12-06-06/...
Loading results from experiment_data/e3_Epochs_BatchSize/train_fashion_mnist_2023-08-22_12-06-06/...
Loading results from experiment_data/e3_Epochs_BatchSize/train_fashion_mnist_2023-08-22_12-06-06/...
Loading results from experiment_data/e3_Epochs_BatchSize/train_fashion_mnist_2023-08-22_12-06-06/...
Loading results from experiment_data/e3_Epochs_BatchSize/train_fashion_mnist_2023-08-22_12-06-06/...
Loading results from experiment_data/e3_Epochs_BatchSize/train_fashion_mnist_2023-08-22_12-

In [82]:
e4 = f"experiment_data/e4_Epochs_LarningRate/train_fashion_mnist_2023-08-23_06-28-05/"
df_e4 = experimentPathToDataframe(e4)

Loading results from experiment_data/e4_Epochs_LarningRate/train_fashion_mnist_2023-08-23_06-28-05/...
Loading results from experiment_data/e4_Epochs_LarningRate/train_fashion_mnist_2023-08-23_06-28-05/...
Loading results from experiment_data/e4_Epochs_LarningRate/train_fashion_mnist_2023-08-23_06-28-05/...
Loading results from experiment_data/e4_Epochs_LarningRate/train_fashion_mnist_2023-08-23_06-28-05/...
Loading results from experiment_data/e4_Epochs_LarningRate/train_fashion_mnist_2023-08-23_06-28-05/...
Loading results from experiment_data/e4_Epochs_LarningRate/train_fashion_mnist_2023-08-23_06-28-05/...
Loading results from experiment_data/e4_Epochs_LarningRate/train_fashion_mnist_2023-08-23_06-28-05/...
Loading results from experiment_data/e4_Epochs_LarningRate/train_fashion_mnist_2023-08-23_06-28-05/...
Loading results from experiment_data/e4_Epochs_LarningRate/train_fashion_mnist_2023-08-23_06-28-05/...
Loading results from experiment_data/e4_Epochs_LarningRate/train_fashion_

In [83]:
e5 = f"experiment_data/e5_BatchSize_LearningRate/train_fashion_mnist_2023-08-22_18-39-53/"
df_e5 = experimentPathToDataframe(e5)

Loading results from experiment_data/e5_BatchSize_LearningRate/train_fashion_mnist_2023-08-22_18-39-53/...
Loading results from experiment_data/e5_BatchSize_LearningRate/train_fashion_mnist_2023-08-22_18-39-53/...
Loading results from experiment_data/e5_BatchSize_LearningRate/train_fashion_mnist_2023-08-22_18-39-53/...
Loading results from experiment_data/e5_BatchSize_LearningRate/train_fashion_mnist_2023-08-22_18-39-53/...
Loading results from experiment_data/e5_BatchSize_LearningRate/train_fashion_mnist_2023-08-22_18-39-53/...
Loading results from experiment_data/e5_BatchSize_LearningRate/train_fashion_mnist_2023-08-22_18-39-53/...
Loading results from experiment_data/e5_BatchSize_LearningRate/train_fashion_mnist_2023-08-22_18-39-53/...
Loading results from experiment_data/e5_BatchSize_LearningRate/train_fashion_mnist_2023-08-22_18-39-53/...
Loading results from experiment_data/e5_BatchSize_LearningRate/train_fashion_mnist_2023-08-22_18-39-53/...
Loading results from experiment_data/

In [86]:
e6 = f"experiment_data/e6_ASHS/train_fashion_mnist_2023-08-24_08-46-40/"
df_e6 = experimentPathToDataframe(e6)

Loading results from experiment_data/e6_ASHS/train_fashion_mnist_2023-08-24_08-46-40/...
Loading results from experiment_data/e6_ASHS/train_fashion_mnist_2023-08-24_08-46-40/...
Loading results from experiment_data/e6_ASHS/train_fashion_mnist_2023-08-24_08-46-40/...
Loading results from experiment_data/e6_ASHS/train_fashion_mnist_2023-08-24_08-46-40/...
Loading results from experiment_data/e6_ASHS/train_fashion_mnist_2023-08-24_08-46-40/...
Loading results from experiment_data/e6_ASHS/train_fashion_mnist_2023-08-24_08-46-40/...
Loading results from experiment_data/e6_ASHS/train_fashion_mnist_2023-08-24_08-46-40/...
Loading results from experiment_data/e6_ASHS/train_fashion_mnist_2023-08-24_08-46-40/...
Loading results from experiment_data/e6_ASHS/train_fashion_mnist_2023-08-24_08-46-40/...
Loading results from experiment_data/e6_ASHS/train_fashion_mnist_2023-08-24_08-46-40/...
Loading results from experiment_data/e6_ASHS/train_fashion_mnist_2023-08-24_08-46-40/...
Loading results from 

In [87]:
e7 = f"experiment_data/e7_RND/train_fashion_mnist_2023-08-25_07-47-49/"
df_e7 = experimentPathToDataframe(e7)

Loading results from experiment_data/e7_RND/train_fashion_mnist_2023-08-25_07-47-49/...
Loading results from experiment_data/e7_RND/train_fashion_mnist_2023-08-25_07-47-49/...
Loading results from experiment_data/e7_RND/train_fashion_mnist_2023-08-25_07-47-49/...
Loading results from experiment_data/e7_RND/train_fashion_mnist_2023-08-25_07-47-49/...
Loading results from experiment_data/e7_RND/train_fashion_mnist_2023-08-25_07-47-49/...
Loading results from experiment_data/e7_RND/train_fashion_mnist_2023-08-25_07-47-49/...
Loading results from experiment_data/e7_RND/train_fashion_mnist_2023-08-25_07-47-49/...
Loading results from experiment_data/e7_RND/train_fashion_mnist_2023-08-25_07-47-49/...
Loading results from experiment_data/e7_RND/train_fashion_mnist_2023-08-25_07-47-49/...
Loading results from experiment_data/e7_RND/train_fashion_mnist_2023-08-25_07-47-49/...
Loading results from experiment_data/e7_RND/train_fashion_mnist_2023-08-25_07-47-49/...
Loading results from experiment_

In [248]:
spotTuner_e8 = experimentPathToSpotTuner("experiment_data/e8_spotPython/spot_runs_23-08-2023.pkl")

## Performance (Improvements/Deterioration)
- [Text output] Best config parameters (default, raytune ASHS, raytune random, spot)
- [Text output] Network architecture best config (default, raytune ASHS, raytune random, spot)
- [line plot] mean accuracy over epochs all trials per epoch config (raytune ASHS, raytune random)
- [boxplot 20 models] Overall Accuracy best config (default, raytune ASHS, raytune random, spot)
- [bar plot] Class Accuracy best config (default, raytune ASHS, raytune random, spot)

### Utils

In [259]:
def bestConfig(df):
    df_tmp = df.groupby('trial_id').max('training_iteration').reset_index()
    df_tmp = df_tmp[df_tmp['mean_val_loss'] == df_tmp['mean_val_loss'].min()]
    best_config = df_tmp[['mean_val_loss', 'mean_accuracy', 'config.l1', 'config.batch_size', 'config.epochs', 'config.learning_rate']].to_dict(orient='records')[0]
    # for key, value in best_config.items():
    #     print(f'{key}: {value}') 
    return best_config

def bestConfigSpot(spotTuner):
    results = spotTuner.print_results(print_screen=False)
    best_config = {
        'mean_val_loss': None,
        'mean_accuracy': None,
        'config.l1': None,
        'config.batch_size': None,
        'config.epochs': None,
        'config.learning_rate': None,
    }
    for conf in results:
        if conf[0] == 'l1':
            best_config['config.l1'] = int(2**conf[1])
        elif conf[0] == 'batch_size':
            best_config['config.batch_size'] = int(2**conf[1])
        elif conf[0] == 'epochs':
            best_config['config.epochs'] = [5, 10, 15, 20][int(conf[1])]
        elif conf[0] == 'lr_mult':
            best_config['config.learning_rate'] = conf[1] * 1e-3
    return best_config

def repeatedTraining(config, iter):
    res = []
    for i in range(iter):
        torch.manual_seed(i)
        random.seed(i)
        np.random.seed(i)

        net = train_fashion_mnist(config)
        acc = test_accuracy(net)
        res.append(acc)
    return res

### Results

In [260]:
default_config = {
    'mean_val_loss': None,
    'mean_accuracy': None,
    'config.l1': 64,
    'config.batch_size': 64,
    'config.epochs': 15,
    'config.learning_rate': 0.001,
}
print(default_config)

print(bestConfig(df_e1))
print(bestConfig(df_e2))
print(bestConfig(df_e3))
print(bestConfig(df_e4))
print(bestConfig(df_e5))
print(bestConfig(df_e6))
print(bestConfig(df_e7))
print(bestConfigSpot(spotTuner_e8))

{'mean_val_loss': None, 'mean_accuracy': None, 'config.l1': 64, 'config.batch_size': 64, 'config.epochs': 15, 'config.learning_rate': 0.001}
{'mean_val_loss': 1.9368493291, 'mean_accuracy': 0.82125, 'config.l1': 256, 'config.batch_size': 64, 'config.epochs': 15, 'config.learning_rate': 0.001}
{'mean_val_loss': 0.7405974753, 'mean_accuracy': 0.8635833333, 'config.l1': 256, 'config.batch_size': 16, 'config.epochs': 15, 'config.learning_rate': 0.001}
{'mean_val_loss': 0.7231627245, 'mean_accuracy': 0.8865833333, 'config.l1': 64, 'config.batch_size': 16, 'config.epochs': 20, 'config.learning_rate': 0.001}
{'mean_val_loss': 0.4083004748, 'mean_accuracy': 0.9186666667, 'config.l1': 64, 'config.batch_size': 64, 'config.epochs': 15, 'config.learning_rate': 0.0669519224}
{'mean_val_loss': 0.373413355, 'mean_accuracy': 0.91925, 'config.l1': 64, 'config.batch_size': 32, 'config.epochs': 15, 'config.learning_rate': 0.0633335836}
{'mean_val_loss': 0.3274204327, 'mean_accuracy': 0.9153333333, 'confi

In [167]:
model_default = train_fashion_mnist(config=default_config)

Training finished.


In [168]:
model_default

FashionCNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=3136, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=10, bias=True)
)

In [196]:
default_accuracy_per_class, default_overall_accuracy = test_class_accuracy(model_default)

Accuracy of T-shirt/top: 81.40%
Accuracy of Trouser: 94.90%
Accuracy of Pullover: 73.00%
Accuracy of Dress: 88.20%
Accuracy of Coat: 77.00%
Accuracy of Sandal: 92.70%
Accuracy of Shirt: 30.20%
Accuracy of Sneaker: 87.90%
Accuracy of Bag: 93.00%
Accuracy of Ankle boot: 94.70%
Overall Accuracy: 81.30%


In [155]:
model_e6 = train_fashion_mnist(config=bestConfig(df_e6))

[1,  2000] loss: 0.569
[2,  2000] loss: 0.301
[3,  2000] loss: 0.245
[4,  2000] loss: 0.209
[5,  2000] loss: 0.184
Training finished.


In [157]:
model_e6

FashionCNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=3136, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)

In [194]:
e6_accuracy_per_class, e6_overall_accuracy = test_class_accuracy(model_e6)

Accuracy of T-shirt/top: 85.10%
Accuracy of Trouser: 97.80%
Accuracy of Pullover: 89.00%
Accuracy of Dress: 94.80%
Accuracy of Coat: 84.70%
Accuracy of Sandal: 99.50%
Accuracy of Shirt: 75.20%
Accuracy of Sneaker: 91.00%
Accuracy of Bag: 98.60%
Accuracy of Ankle boot: 96.30%
Overall Accuracy: 91.20%


In [159]:
model_e7 = train_fashion_mnist(config=bestConfig(df_e7))

Training finished.


In [160]:
model_e7

FashionCNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=3136, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)

In [205]:
e7_accuracy_per_class, e7_overall_accuracy = test_class_accuracy(model_e7)

Accuracy of T-shirt/top: 85.90%
Accuracy of Trouser: 98.60%
Accuracy of Pullover: 91.00%
Accuracy of Dress: 93.00%
Accuracy of Coat: 85.10%
Accuracy of Sandal: 98.40%
Accuracy of Shirt: 74.80%
Accuracy of Sneaker: 98.40%
Accuracy of Bag: 99.10%
Accuracy of Ankle boot: 95.10%
Overall Accuracy: 91.94%


In [261]:
model_e8 = train_fashion_mnist(bestConfigSpot(spotTuner_e8))

[1,  2000] loss: 0.881
[2,  2000] loss: 0.454
[3,  2000] loss: 0.384
[4,  2000] loss: 0.341
[5,  2000] loss: 0.321
[6,  2000] loss: 0.295
[7,  2000] loss: 0.277
[8,  2000] loss: 0.271
[9,  2000] loss: 0.257
[10,  2000] loss: 0.242
[11,  2000] loss: 0.235
[12,  2000] loss: 0.223
[13,  2000] loss: 0.214
[14,  2000] loss: 0.216
[15,  2000] loss: 0.204
[16,  2000] loss: 0.198
[17,  2000] loss: 0.191
[18,  2000] loss: 0.184
[19,  2000] loss: 0.181
[20,  2000] loss: 0.179
Training finished.


In [262]:
model_e8

FashionCNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=3136, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

In [263]:
e8_accuracy_per_class, e8_overall_accuracy = test_class_accuracy(model_e8)

Accuracy of T-shirt/top: 90.80%
Accuracy of Trouser: 98.60%
Accuracy of Pullover: 78.80%
Accuracy of Dress: 89.70%
Accuracy of Coat: 90.90%
Accuracy of Sandal: 97.20%
Accuracy of Shirt: 70.40%
Accuracy of Sneaker: 98.00%
Accuracy of Bag: 98.80%
Accuracy of Ankle boot: 95.50%
Overall Accuracy: 90.87%


In [276]:
fig = go.Figure()

fig.add_trace(go.Bar(
    x=class_names,
    y=default_accuracy_per_class,
    name='Default',
    #marker_color=colors[0]
))

fig.add_trace(go.Bar(
    x=class_names,
    y=e6_accuracy_per_class,
    name='Hyperband',
    #marker_color=colors[1]
))

fig.add_trace(go.Bar(
    x=class_names,
    y=e7_accuracy_per_class,
    name='Random Search',
    #marker_color=colors[2]
))

fig.add_trace(go.Bar(
    x=class_names,
    y=e8_accuracy_per_class,
    name='SMBO',
    #marker_color=colors[3]
))

fig.update_layout(
    title='Accuracy per Class for differently tuned Models',
    xaxis_title='Image Class',
    yaxis_title='Accuracy (%)',
    barmode='group',
    width=1200,
    height=600,
    showlegend=True,
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
)

fig.show()

In [None]:
default_reps = repeatedTraining(default_config, 3)
default_reps

In [221]:
e6_reps = repeatedTraining(bestConfig(df_e6), 3)
e6_reps

[1,  2000] loss: 0.551
[2,  2000] loss: 0.296
[3,  2000] loss: 0.241
[4,  2000] loss: 0.212
[5,  2000] loss: 0.180
Training finished.
[1,  2000] loss: 0.562
[2,  2000] loss: 0.298
[3,  2000] loss: 0.246
[4,  2000] loss: 0.212
[5,  2000] loss: 0.182
Training finished.
[1,  2000] loss: 0.553
[2,  2000] loss: 0.297
[3,  2000] loss: 0.244
[4,  2000] loss: 0.213
[5,  2000] loss: 0.183
Training finished.


[0.9128, 0.9158, 0.9142]

In [None]:
e7_reps = repeatedTraining(bestConfig(df_e7), 3)
e7_reps

In [None]:
e8_reps = repeatedTraining(bestConfigSpot(spotTuner_e8), 3)
e8_reps

In [275]:
x_data = ['Default', 'Random Search', 'Hyperband', 'SMBO']

# y_data = [default_reps, e6_reps, e7_reps, e8_reps]
y_data = [e6_reps, e6_reps, e6_reps, e6_reps]

# colors = ['rgba(93, 164, 214, 0.5)', 'rgba(255, 144, 14, 0.5)', 'rgba(44, 160, 101, 0.5)',
#           'rgba(255, 65, 54, 0.5)', 'rgba(207, 114, 255, 0.5)', 'rgba(127, 96, 0, 0.5)']

fig = go.Figure()

for xd, yd in zip(x_data, y_data):
        fig.add_trace(go.Box(
            y=yd,
            name=xd,
            boxpoints='all',
            jitter=0.5,
            whiskerwidth=0.2,
            #fillcolor=cls,
            marker_size=2,
            line_width=1)
        )

fig.update_layout(
    title='Accuracy for differently tuned Models',
    yaxis=dict(
        autorange=True,
        showgrid=True,
        zeroline=True,
        dtick=5,
        gridcolor='rgb(255, 255, 255)',
        gridwidth=1,
        zerolinecolor='rgb(255, 255, 255)',
        zerolinewidth=2,
    ),
    margin=dict(
        l=40,
        r=30,
        b=80,
        t=100,
    ),
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
    showlegend=False
)

fig.show()

In [266]:
e7_grouped = df_e7.groupby('config.epochs')
for config in df_e7['config.epochs'].unique().tolist():
    df = e7_grouped.get_group(config)
    print(f'{len(df.trial_id.unique())} trials with {config} epochs tested.')
    fig = px.line(df, 
              x="training_iteration", 
              y="mean_accuracy", 
              color="trial_id", 
              title="Random Search - Mean Accuracy over Epochs",
              hover_data=[
                  "config.l1",
                  "config.batch_size",
                  "config.epochs",
                  "config.learning_rate"
              ],
              width=800, 
              height=600)
    fig.show()

35 trials with 20 epochs tested.


28 trials with 10 epochs tested.


35 trials with 15 epochs tested.


22 trials with 5 epochs tested.


In [115]:
e6_grouped = df_e6.groupby('config.epochs')
for config in df_e6['config.epochs'].unique().tolist():
    df = e6_grouped.get_group(config)
    print(f'{len(df.trial_id.unique())} trials with {config} epochs tested.')
    fig = px.line(df, 
              x="training_iteration", 
              y="mean_accuracy", 
              color="trial_id", 
              title="Hyberband - Mean Accuracy over Epochs",
              hover_data=[
                  "config.l1",
                  "config.batch_size",
                  "config.epochs",
                  "config.learning_rate"
              ],
              width=800, 
              height=600)
    fig.show()

78 trials with 10 epochs tested.


75 trials with 5 epochs tested.


77 trials with 15 epochs tested.


81 trials with 20 epochs tested.


## Hyperparameter Influence
- [parallel plots] 6h runs (raytune ASHS, raytune random, spot)
- [scatter plots] e1 - e5 (raytune grid/random)
- [contour interpolated plots] e1 - e5 (raytune grid/random)
- [contour plots] e1 - e5 (spot)
- [bar plot] parameter importance (spot)