In [1]:
# Tutorial's
from functools import partial
# import torchvision.transforms as transforms
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from torch.utils.data import random_split
import os

# Mutual
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision

# Ours
from torchvision.transforms import *
from torch.utils.data import DataLoader
import math

# Parameters and transformations

In [2]:
# BATCH_SIZE = 64
# EPOCHS = 3
# LEARNING_RATE = 0.001
BATCH_INTERVAL = 100
TRAIN_SIZE = 50000
VALIDATION_SIZE = 10000

In [3]:
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.1307,), (0.3081,))])

def get_dataloader(train_set, BATCH_SIZE):
    return torch.utils.data.DataLoader(train_set, batch_size = BATCH_SIZE, shuffle = True, num_workers = 2)

# Load data

In [4]:
def load_data(data_dir="../data/mnist-varres/test/"):
    transform = transforms.Compose([transforms.ToTensor(),
                                    transforms.Normalize((0.1307,), (0.3081,))])
    trainset = "emptytest"
    testset = torchvision.datasets.ImageFolder(root = data_dir,
                                               transform=transform)
    return trainset, testset

In [5]:
def get_distribution(train_size, val_size, total_size):
    sizes = [math.floor(train_size/(train_size+val_size)*total_size),
     math.ceil((1-train_size/(train_size+val_size))*total_size)]
    return sizes

# Network architecture

In [None]:
# # Tutorial
# class Net(nn.Module):
#     def __init__(self, l1=120, l2=84):
#         super(Net, self).__init__()
#         self.conv1 = nn.Conv2d(3, 6, 5)
#         self.pool = nn.MaxPool2d(2, 2)
#         self.conv2 = nn.Conv2d(6, 16, 5)
#         self.fc1 = nn.Linear(16 * 5 * 5, l1)
#         self.fc2 = nn.Linear(l1, l2)
#         self.fc3 = nn.Linear(l2, 10)

#     def forward(self, x):
#         x = self.pool(F.relu(self.conv1(x)))
#         x = self.pool(F.relu(self.conv2(x)))
#         x = x.view(-1, 16 * 5 * 5)
#         x = F.relu(self.fc1(x))
#         x = F.relu(self.fc2(x))
#         x = self.fc3(x)
#         return x

In [6]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        N = 81
        self.conv1 = nn.Conv2d(1, 16, kernel_size = 3, stride = 1, padding = 1)  # size: batch, 1, 32, 32
        self.conv2 = nn.Conv2d(16, 32, kernel_size = 3, stride = 1, padding = 1) # (batch, 16, 16, 16)
        self.conv3 = nn.Conv2d(32, N, kernel_size = 3, stride = 1, padding = 1) # (batch, 32, 8, 8)
        self.fc1 = nn.Linear(N, 10)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), 2) # (batch, 16, 28, 28)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2) # (batch, 32, 14, 14)
        x = F.max_pool2d(F.relu(self.conv3(x)), 2) # (batch, N, 8, 8)
        x = F.max_pool2d(x, kernel_size = x.size()[2:]) # (batch, N, 4, 4)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        return F.log_softmax(x, dim = 1)

In [7]:
# config: the hyperparameters we would like to train with
# checkpoint_dir: used to restore checkpoints
# data_dir: the directory where we load and store the data, so multiple runs can share the same data source
def train_mnist(config, checkpoint_dir=None, data_dir=None):
    net = Net()
#     net = Net(config["l1"], config["l2"]) # previously

    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)
    net.to(device)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr = config["lr"])

    if checkpoint_dir:
        model_state, optimizer_state = torch.load(
            os.path.join(checkpoint_dir, "checkpoint"))
        net.load_state_dict(model_state)
        optimizer.load_state_dict(optimizer_state)

    # Load data.
    train_32 = torchvision.datasets.ImageFolder(root = "~/Anders/MasterBSB/2210_DeepLearning/Ass3/CNN/data/mnist-varres/32/train",
                                                  transform=transform)
    train_48 = torchvision.datasets.ImageFolder(root = "~/Anders/MasterBSB/2210_DeepLearning/Ass3/CNN/data/mnist-varres/48/train/",
                                                  transform=transform)
    train_64 = torchvision.datasets.ImageFolder(root = "~/Anders/MasterBSB/2210_DeepLearning/Ass3/CNN/data/mnist-varres/64/train/",
                                                  transform=transform)

    train_32, validation_32 = torch.utils.data.random_split(train_32,
                                                            get_distribution(TRAIN_SIZE, 
                                                                             VALIDATION_SIZE, 
                                                                             len(train_32)))
    train_48, validation_48 = torch.utils.data.random_split(train_48,
                                                            get_distribution(TRAIN_SIZE, 
                                                                             VALIDATION_SIZE, 
                                                                             len(train_48)))
    train_64, validation_64 = torch.utils.data.random_split(train_64,
                                                            get_distribution(TRAIN_SIZE, 
                                                                             VALIDATION_SIZE, 
                                                                             len(train_64)))
    train_loader_32 = get_dataloader(train_32, int(config["batch_size"]))
    train_loader_48 = get_dataloader(train_48, int(config["batch_size"]))
    train_loader_64 = get_dataloader(train_64, int(config["batch_size"]))
    validation_loader_32 = get_dataloader(validation_32, int(config["batch_size"]))
    validation_loader_48 = get_dataloader(validation_48, int(config["batch_size"]))
    validation_loader_64 = get_dataloader(validation_64, int(config["batch_size"]))
    
    train_loaders = {'32': train_loader_32,
                     '48': train_loader_48,
                     '64': train_loader_64
                    }
    validation_loaders = {'32': validation_loader_32,
                          '48': validation_loader_48,
                          '64': validation_loader_64
                         }
        
    for epoch in range(10):  # loop over the dataset multiple times
        running_loss = 0.0
        epoch_steps = 0
        for r in train_loaders.keys():
            for i, data in enumerate(train_loaders[r], 0):
                # get the inputs; data is a list of [inputs, labels]
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward + backward + optimize
                outputs = net(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                # print statistics
                running_loss += loss.item()
                epoch_steps += 1
                if i % 2000 == 1999:  # print every 2000 mini-batches
                    print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1,
                                                    running_loss / epoch_steps))
                    running_loss = 0.0

            # Validation loss
            val_loss = 0.0
            val_steps = 0
            total = 0
            correct = 0
            for i, data in enumerate(validation_loaders[r], 0):
                with torch.no_grad():
                    inputs, labels = data
                    inputs, labels = inputs.to(device), labels.to(device)

                    outputs = net(inputs)
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()

                    loss = criterion(outputs, labels)
                    val_loss += loss.cpu().numpy()
                    val_steps += 1

            with tune.checkpoint_dir(epoch) as checkpoint_dir:
                path = os.path.join(checkpoint_dir, "checkpoint")
                torch.save((net.state_dict(), optimizer.state_dict()), path)

            tune.report(loss=(val_loss / val_steps), accuracy=correct / total)
    print("Finished Training")

# Test

In [8]:
def test_accuracy(net, device="cpu"):
    trainset, testset = load_data()

    testloader = torch.utils.data.DataLoader(
        testset, batch_size=4, shuffle=False, num_workers=2)

    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return correct / total

# Wrap in another function

After training the models, we will find the best performing one and load the trained network from the checkpoint file. We then obtain the test set accuracy and report everything by printing.

In [9]:
# Full function:
def main(num_samples = 10, max_num_epochs = 10, gpus_per_trial = 0):
    data_dir = os.path.abspath("~/Anders/MasterBSB/2210_DeepLearning/Ass3/CNN/data/mnist-varres/")
    
    i = 1
    print("checkpoint", i)
    i += 1
    
    load_data() # Hier wordt alleen testdata goed geladen, traindata is dummy
    
    print("checkpoint", i)
    i += 1
    
    # Define hyperparameter search space.
    config = {
    #     "l1": tune.sample_from(lambda _: 2**np.random.randint(2, 9)),
    #     "l2": tune.sample_from(lambda _: 2**np.random.randint(2, 9)),
        "lr": tune.loguniform(1e-4, 1e-1),
        "batch_size": tune.choice([2, 4, 8, 16, 32, 64])
    }
    # In this example, the l1 and l2 parameters should be powers of 2 between 4 and 256, so either
    # 4, 8, 16, 32, 64, 128, or 256.
    # The lr (learning rate) should be uniformly sampled between 0.0001 and 0.1
    
    print("checkpoint", i)
    i += 1
    
    scheduler = ASHAScheduler(
        metric="loss",
        mode="min",
        max_t=max_num_epochs,
        grace_period=1,
        reduction_factor=2)
    reporter = CLIReporter(
        # parameter_columns=["l1", "l2", "lr", "batch_size"],
        metric_columns=["loss", "accuracy", "training_iteration"])
    result = tune.run(
        partial(train_mnist, data_dir=data_dir),
        resources_per_trial={"cpu": 2, "gpu": gpus_per_trial},
        config=config,
        num_samples=num_samples,
        scheduler=scheduler,
        progress_reporter=reporter)

    print("checkpoint", i)
    i += 1
    
    best_trial = result.get_best_trial("loss", "min", "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation loss: {}".format(
        best_trial.last_result["loss"]))
    print("Best trial final validation accuracy: {}".format(
        best_trial.last_result["accuracy"]))

    print("checkpoint", i)
    i += 1
    
    best_trained_model = Net() # removed best_trial.config["l1"], best_trial.config["l2"]
    device = "cpu"
    best_trained_model.to(device)

    print("checkpoint", i)
    i += 1
    
    best_checkpoint_dir = best_trial.checkpoint.value
    model_state, optimizer_state = torch.load(os.path.join(
        best_checkpoint_dir, "checkpoint"))
    print("Path to best model: {}".format(os.path.join(best_checkpoint_dir, "checkpoint")))
    best_trained_model.load_state_dict(model_state)

    print("checkpoint", i)
    i += 1
    
    test_acc = test_accuracy(best_trained_model, device) # dit is op test set, voor nu eruit
    print("Best trial test set accuracy: {}".format(test_acc))

In [10]:
main()

checkpoint 1
checkpoint 2
checkpoint 3


2022-12-12 11:09:53,464	INFO worker.py:1528 -- Started a local Ray instance.

from ray.air import session

def train(config):
    # ...
    session.report({"metric": metric}, checkpoint=checkpoint)

For more information please see https://docs.ray.io/en/master/tune/api_docs/trainable.html



== Status ==
Current time: 2022-12-12 11:09:54 (running for 00:00:00.16)
Memory usage on this node: 12.8/16.0 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: None
Resources requested: 2.0/8 CPUs, 0/0 GPUs, 0.0/6.87 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/jannekehulsen/ray_results/train_mnist_2022-12-12_11-09-54
Number of trials: 10/10 (9 PENDING, 1 RUNNING)
+-------------------------+----------+-----------------+--------------+-------------+
| Trial name              | status   | loc             |   batch_size |          lr |
|-------------------------+----------+-----------------+--------------+-------------|
| train_mnist_1fa8b_00000 | RUNNING  | 127.0.0.1:25300 |           16 | 0.0579825   |
| train_mnist_1fa8b_00001 | PENDING  |                 |           64 | 0.00170639  |
| train_mnist_1fa8b_00002 | PENDING  |                 |           32 | 0.00125593  |
| train_mnist_1fa8b_00003 | PENDING  |   

== Status ==
Current time: 2022-12-12 11:10:21 (running for 00:00:26.79)
Memory usage on this node: 13.2/16.0 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: None
Resources requested: 8.0/8 CPUs, 0/0 GPUs, 0.0/6.87 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/jannekehulsen/ray_results/train_mnist_2022-12-12_11-09-54
Number of trials: 10/10 (6 PENDING, 4 RUNNING)
+-------------------------+----------+-----------------+--------------+-------------+
| Trial name              | status   | loc             |   batch_size |          lr |
|-------------------------+----------+-----------------+--------------+-------------|
| train_mnist_1fa8b_00000 | RUNNING  | 127.0.0.1:25300 |           16 | 0.0579825   |
| train_mnist_1fa8b_00001 | RUNNING  | 127.0.0.1:25305 |           64 | 0.00170639  |
| train_mnist_1fa8b_00002 | RUNNING  | 127.0.0.1:25306 |           32 | 0.00125593  |
| train_mnist_1fa8b_00003 | RUNNING  | 12

Trial name,accuracy,date,done,episodes_total,experiment_id,hostname,iterations_since_restore,loss,node_ip,pid,should_checkpoint,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
train_mnist_1fa8b_00000,0.0970172,2022-12-12_11-17-23,True,,58f82a9f477141fa91c47c9674e9bd66,feweb60177.feweb.vu.nl,10,2.3072,127.0.0.1,25300,True,447.263,31.7381,447.263,1670840243,0,,10,1fa8b_00000,0.00165415
train_mnist_1fa8b_00001,0.966857,2022-12-12_11-17-28,True,,ce4e41f67d72490f984e8ce410bd80cd,feweb60177.feweb.vu.nl,10,0.113326,127.0.0.1,25305,True,450.939,32.3061,450.939,1670840248,0,,10,1fa8b_00001,0.00185585
train_mnist_1fa8b_00002,0.984031,2022-12-12_11-17-29,True,,08eb94c4b723410386ceeabec70ea798,feweb60177.feweb.vu.nl,10,0.0428854,127.0.0.1,25306,True,451.384,32.1358,451.384,1670840249,0,,10,1fa8b_00002,0.00236416
train_mnist_1fa8b_00003,0.944562,2022-12-12_11-10-34,True,,ff766a178658471b8e53dac50e0b872b,feweb60177.feweb.vu.nl,1,0.171551,127.0.0.1,25307,True,36.3205,36.3205,36.3205,1670839834,0,,1,1fa8b_00003,0.00194097
train_mnist_1fa8b_00004,0.936427,2022-12-12_11-11-15,True,,ff766a178658471b8e53dac50e0b872b,feweb60177.feweb.vu.nl,1,0.184406,127.0.0.1,25307,True,41.7144,41.7144,41.7144,1670839875,0,,1,1fa8b_00004,0.00194097
train_mnist_1fa8b_00005,0.930087,2022-12-12_11-12-48,True,,ff766a178658471b8e53dac50e0b872b,feweb60177.feweb.vu.nl,2,0.227908,127.0.0.1,25307,True,92.9554,43.7356,92.9554,1670839968,0,,2,1fa8b_00005,0.00194097
train_mnist_1fa8b_00006,0.10937,2022-12-12_11-13-21,True,,ff766a178658471b8e53dac50e0b872b,feweb60177.feweb.vu.nl,1,2.3041,127.0.0.1,25307,True,32.5654,32.5654,32.5654,1670840001,0,,1,1fa8b_00006,0.00194097
train_mnist_1fa8b_00007,0.10455,2022-12-12_11-13-55,True,,ff766a178658471b8e53dac50e0b872b,feweb60177.feweb.vu.nl,1,2.30516,127.0.0.1,25307,True,33.6214,33.6214,33.6214,1670840035,0,,1,1fa8b_00007,0.00194097
train_mnist_1fa8b_00008,0.940343,2022-12-12_11-14-38,True,,ff766a178658471b8e53dac50e0b872b,feweb60177.feweb.vu.nl,1,0.201475,127.0.0.1,25307,True,43.6608,43.6608,43.6608,1670840078,0,,1,1fa8b_00008,0.00194097
train_mnist_1fa8b_00009,0.108768,2022-12-12_11-15-12,True,,ff766a178658471b8e53dac50e0b872b,feweb60177.feweb.vu.nl,1,2.30736,127.0.0.1,25307,True,33.4981,33.4981,33.4981,1670840112,0,,1,1fa8b_00009,0.00194097


== Status ==
Current time: 2022-12-12 11:10:39 (running for 00:00:44.80)
Memory usage on this node: 13.4/16.0 GiB 
Using AsyncHyperBand: num_stopped=1
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -0.16594259646864465
Resources requested: 8.0/8 CPUs, 0/0 GPUs, 0.0/6.87 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/jannekehulsen/ray_results/train_mnist_2022-12-12_11-09-54
Number of trials: 10/10 (5 PENDING, 4 RUNNING, 1 TERMINATED)
+-------------------------+------------+-----------------+--------------+-------------+----------+------------+----------------------+
| Trial name              | status     | loc             |   batch_size |          lr |     loss |   accuracy |   training_iteration |
|-------------------------+------------+-----------------+--------------+-------------+----------+------------+----------------------|
| train_mnist_1fa8b_00000 | RUNNING    | 127.0.0.1:25300 |           16 | 0.0579825   | 2.30379  |   0.101537 |           

== Status ==
Current time: 2022-12-12 11:10:59 (running for 00:01:04.88)
Memory usage on this node: 12.6/16.0 GiB 
Using AsyncHyperBand: num_stopped=1
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -0.16594259646864465
Resources requested: 8.0/8 CPUs, 0/0 GPUs, 0.0/6.87 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/jannekehulsen/ray_results/train_mnist_2022-12-12_11-09-54
Number of trials: 10/10 (5 PENDING, 4 RUNNING, 1 TERMINATED)
+-------------------------+------------+-----------------+--------------+-------------+----------+------------+----------------------+
| Trial name              | status     | loc             |   batch_size |          lr |     loss |   accuracy |   training_iteration |
|-------------------------+------------+-----------------+--------------+-------------+----------+------------+----------------------|
| train_mnist_1fa8b_00000 | RUNNING    | 127.0.0.1:25300 |           16 | 0.0579825   | 2.30379  |   0.101537 |           

== Status ==
Current time: 2022-12-12 11:11:19 (running for 00:01:25.05)
Memory usage on this node: 13.2/16.0 GiB 
Using AsyncHyperBand: num_stopped=2
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: -1.2056181017498537 | Iter 1.000: -0.17155072365242702
Resources requested: 8.0/8 CPUs, 0/0 GPUs, 0.0/6.87 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/jannekehulsen/ray_results/train_mnist_2022-12-12_11-09-54
Number of trials: 10/10 (4 PENDING, 4 RUNNING, 2 TERMINATED)
+-------------------------+------------+-----------------+--------------+-------------+----------+------------+----------------------+
| Trial name              | status     | loc             |   batch_size |          lr |     loss |   accuracy |   training_iteration |
|-------------------------+------------+-----------------+--------------+-------------+----------+------------+----------------------|
| train_mnist_1fa8b_00000 | RUNNING    | 127.0.0.1:25300 |           16 | 0.0579825   | 2.30403  |   0.1060

[2m[36m(func pid=25307)[0m [1,  8000] loss: 0.047
== Status ==
Current time: 2022-12-12 11:11:40 (running for 00:01:45.79)
Memory usage on this node: 13.4/16.0 GiB 
Using AsyncHyperBand: num_stopped=2
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: -0.10720230986908921 | Iter 1.000: -0.17155072365242702
Resources requested: 8.0/8 CPUs, 0/0 GPUs, 0.0/6.87 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/jannekehulsen/ray_results/train_mnist_2022-12-12_11-09-54
Number of trials: 10/10 (4 PENDING, 4 RUNNING, 2 TERMINATED)
+-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------+
| Trial name              | status     | loc             |   batch_size |          lr |      loss |   accuracy |   training_iteration |
|-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------|
| train_mnist_1fa8b_00000 | RUNNING    | 127.0.0.1

== Status ==
Current time: 2022-12-12 11:12:00 (running for 00:02:05.89)
Memory usage on this node: 12.7/16.0 GiB 
Using AsyncHyperBand: num_stopped=2
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: -0.10720230986908921 | Iter 1.000: -0.17155072365242702
Resources requested: 8.0/8 CPUs, 0/0 GPUs, 0.0/6.87 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/jannekehulsen/ray_results/train_mnist_2022-12-12_11-09-54
Number of trials: 10/10 (4 PENDING, 4 RUNNING, 2 TERMINATED)
+-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------+
| Trial name              | status     | loc             |   batch_size |          lr |      loss |   accuracy |   training_iteration |
|-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------|
| train_mnist_1fa8b_00000 | RUNNING    | 127.0.0.1:25300 |           16 | 0.0579825   | 2.30403   |   0

[2m[36m(func pid=25307)[0m [1,  6000] loss: 0.021
[2m[36m(func pid=25307)[0m [1,  8000] loss: 0.019
== Status ==
Current time: 2022-12-12 11:12:29 (running for 00:02:34.97)
Memory usage on this node: 13.2/16.0 GiB 
Using AsyncHyperBand: num_stopped=2
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: -0.10720230986908921 | Iter 1.000: -0.16594259646864465
Resources requested: 8.0/8 CPUs, 0/0 GPUs, 0.0/6.87 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/jannekehulsen/ray_results/train_mnist_2022-12-12_11-09-54
Number of trials: 10/10 (4 PENDING, 4 RUNNING, 2 TERMINATED)
+-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------+
| Trial name              | status     | loc             |   batch_size |          lr |      loss |   accuracy |   training_iteration |
|-------------------------+------------+-----------------+--------------+-------------+-----------+------------+---------------------

== Status ==
Current time: 2022-12-12 11:12:53 (running for 00:02:59.51)
Memory usage on this node: 12.9/16.0 GiB 
Using AsyncHyperBand: num_stopped=3
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: -0.16755537649459026 | Iter 1.000: -0.16594259646864465
Resources requested: 8.0/8 CPUs, 0/0 GPUs, 0.0/6.87 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/jannekehulsen/ray_results/train_mnist_2022-12-12_11-09-54
Number of trials: 10/10 (3 PENDING, 4 RUNNING, 3 TERMINATED)
+-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------+
| Trial name              | status     | loc             |   batch_size |          lr |      loss |   accuracy |   training_iteration |
|-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------|
| train_mnist_1fa8b_00000 | RUNNING    | 127.0.0.1:25300 |           16 | 0.0579825   | 2.30635   |   0

== Status ==
Current time: 2022-12-12 11:13:18 (running for 00:03:24.27)
Memory usage on this node: 13.0/16.0 GiB 
Using AsyncHyperBand: num_stopped=3
Bracket: Iter 8.000: None | Iter 4.000: -0.07145427793259912 | Iter 2.000: -0.16755537649459026 | Iter 1.000: -0.16594259646864465
Resources requested: 8.0/8 CPUs, 0/0 GPUs, 0.0/6.87 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/jannekehulsen/ray_results/train_mnist_2022-12-12_11-09-54
Number of trials: 10/10 (3 PENDING, 4 RUNNING, 3 TERMINATED)
+-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------+
| Trial name              | status     | loc             |   batch_size |          lr |      loss |   accuracy |   training_iteration |
|-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------|
| train_mnist_1fa8b_00000 | RUNNING    | 127.0.0.1:25300 |           16 | 0.0579825   |

== Status ==
Current time: 2022-12-12 11:13:42 (running for 00:03:47.74)
Memory usage on this node: 13.3/16.0 GiB 
Using AsyncHyperBand: num_stopped=4
Bracket: Iter 8.000: None | Iter 4.000: -0.07145427793259912 | Iter 2.000: -0.16755537649459026 | Iter 1.000: -0.17155072365242702
Resources requested: 8.0/8 CPUs, 0/0 GPUs, 0.0/6.87 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/jannekehulsen/ray_results/train_mnist_2022-12-12_11-09-54
Number of trials: 10/10 (2 PENDING, 4 RUNNING, 4 TERMINATED)
+-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------+
| Trial name              | status     | loc             |   batch_size |          lr |      loss |   accuracy |   training_iteration |
|-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------|
| train_mnist_1fa8b_00000 | RUNNING    | 127.0.0.1:25300 |           16 | 0.0579825   |

== Status ==
Current time: 2022-12-12 11:14:05 (running for 00:04:10.79)
Memory usage on this node: 13.4/16.0 GiB 
Using AsyncHyperBand: num_stopped=5
Bracket: Iter 8.000: None | Iter 4.000: -0.07145427793259912 | Iter 2.000: -0.16755537649459026 | Iter 1.000: -0.17797839789756745
Resources requested: 8.0/8 CPUs, 0/0 GPUs, 0.0/6.87 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/jannekehulsen/ray_results/train_mnist_2022-12-12_11-09-54
Number of trials: 10/10 (1 PENDING, 4 RUNNING, 5 TERMINATED)
+-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------+
| Trial name              | status     | loc             |   batch_size |          lr |      loss |   accuracy |   training_iteration |
|-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------|
| train_mnist_1fa8b_00000 | RUNNING    | 127.0.0.1:25300 |           16 | 0.0579825   |

== Status ==
Current time: 2022-12-12 11:14:25 (running for 00:04:30.90)
Memory usage on this node: 12.8/16.0 GiB 
Using AsyncHyperBand: num_stopped=5
Bracket: Iter 8.000: None | Iter 4.000: -0.07145427793259912 | Iter 2.000: -0.16755537649459026 | Iter 1.000: -0.17797839789756745
Resources requested: 8.0/8 CPUs, 0/0 GPUs, 0.0/6.87 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/jannekehulsen/ray_results/train_mnist_2022-12-12_11-09-54
Number of trials: 10/10 (1 PENDING, 4 RUNNING, 5 TERMINATED)
+-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------+
| Trial name              | status     | loc             |   batch_size |          lr |      loss |   accuracy |   training_iteration |
|-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------|
| train_mnist_1fa8b_00000 | RUNNING    | 127.0.0.1:25300 |           16 | 0.0579825   |

[2m[36m(func pid=25307)[0m [1,  2000] loss: 2.310
== Status ==
Current time: 2022-12-12 11:14:50 (running for 00:04:56.53)
Memory usage on this node: 13.3/16.0 GiB 
Using AsyncHyperBand: num_stopped=6
Bracket: Iter 8.000: None | Iter 4.000: -0.07145427793259912 | Iter 2.000: -0.16755537649459026 | Iter 1.000: -0.18440607214270788
Resources requested: 8.0/8 CPUs, 0/0 GPUs, 0.0/6.87 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/jannekehulsen/ray_results/train_mnist_2022-12-12_11-09-54
Number of trials: 10/10 (4 RUNNING, 6 TERMINATED)
+-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------+
| Trial name              | status     | loc             |   batch_size |          lr |      loss |   accuracy |   training_iteration |
|-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------|
| train_mnist_1fa8b_00000 | RUNNING    | 127.

== Status ==
Current time: 2022-12-12 11:15:10 (running for 00:05:16.61)
Memory usage on this node: 12.9/16.0 GiB 
Using AsyncHyperBand: num_stopped=6
Bracket: Iter 8.000: None | Iter 4.000: -0.07145427793259912 | Iter 2.000: -0.16755537649459026 | Iter 1.000: -0.18440607214270788
Resources requested: 8.0/8 CPUs, 0/0 GPUs, 0.0/6.87 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/jannekehulsen/ray_results/train_mnist_2022-12-12_11-09-54
Number of trials: 10/10 (4 RUNNING, 6 TERMINATED)
+-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------+
| Trial name              | status     | loc             |   batch_size |          lr |      loss |   accuracy |   training_iteration |
|-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------|
| train_mnist_1fa8b_00000 | RUNNING    | 127.0.0.1:25300 |           16 | 0.0579825   | 2.31241   

== Status ==
Current time: 2022-12-12 11:15:34 (running for 00:05:40.36)
Memory usage on this node: 12.8/16.0 GiB 
Using AsyncHyperBand: num_stopped=7
Bracket: Iter 8.000: None | Iter 4.000: -0.07145427793259912 | Iter 2.000: -0.16755537649459026 | Iter 1.000: -0.19294035070119842
Resources requested: 6.0/8 CPUs, 0/0 GPUs, 0.0/6.87 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/jannekehulsen/ray_results/train_mnist_2022-12-12_11-09-54
Number of trials: 10/10 (3 RUNNING, 7 TERMINATED)
+-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------+
| Trial name              | status     | loc             |   batch_size |          lr |      loss |   accuracy |   training_iteration |
|-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------|
| train_mnist_1fa8b_00000 | RUNNING    | 127.0.0.1:25300 |           16 | 0.0579825   | 2.31461   

== Status ==
Current time: 2022-12-12 11:15:54 (running for 00:06:00.46)
Memory usage on this node: 12.7/16.0 GiB 
Using AsyncHyperBand: num_stopped=7
Bracket: Iter 8.000: None | Iter 4.000: -0.07145427793259912 | Iter 2.000: -0.16755537649459026 | Iter 1.000: -0.19294035070119842
Resources requested: 6.0/8 CPUs, 0/0 GPUs, 0.0/6.87 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/jannekehulsen/ray_results/train_mnist_2022-12-12_11-09-54
Number of trials: 10/10 (3 RUNNING, 7 TERMINATED)
+-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------+
| Trial name              | status     | loc             |   batch_size |          lr |      loss |   accuracy |   training_iteration |
|-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------|
| train_mnist_1fa8b_00000 | RUNNING    | 127.0.0.1:25300 |           16 | 0.0579825   | 2.31461   

== Status ==
Current time: 2022-12-12 11:16:15 (running for 00:06:21.37)
Memory usage on this node: 13.2/16.0 GiB 
Using AsyncHyperBand: num_stopped=7
Bracket: Iter 8.000: -0.06651443568989635 | Iter 4.000: -0.07145427793259912 | Iter 2.000: -0.16755537649459026 | Iter 1.000: -0.19294035070119842
Resources requested: 6.0/8 CPUs, 0/0 GPUs, 0.0/6.87 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/jannekehulsen/ray_results/train_mnist_2022-12-12_11-09-54
Number of trials: 10/10 (3 RUNNING, 7 TERMINATED)
+-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------+
| Trial name              | status     | loc             |   batch_size |          lr |      loss |   accuracy |   training_iteration |
|-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------|
| train_mnist_1fa8b_00000 | RUNNING    | 127.0.0.1:25300 |           16 | 0.057982

== Status ==
Current time: 2022-12-12 11:16:35 (running for 00:06:41.46)
Memory usage on this node: 12.6/16.0 GiB 
Using AsyncHyperBand: num_stopped=7
Bracket: Iter 8.000: -0.06651443568989635 | Iter 4.000: -0.07145427793259912 | Iter 2.000: -0.16755537649459026 | Iter 1.000: -0.19294035070119842
Resources requested: 6.0/8 CPUs, 0/0 GPUs, 0.0/6.87 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/jannekehulsen/ray_results/train_mnist_2022-12-12_11-09-54
Number of trials: 10/10 (3 RUNNING, 7 TERMINATED)
+-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------+
| Trial name              | status     | loc             |   batch_size |          lr |      loss |   accuracy |   training_iteration |
|-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------|
| train_mnist_1fa8b_00000 | RUNNING    | 127.0.0.1:25300 |           16 | 0.057982

== Status ==
Current time: 2022-12-12 11:16:56 (running for 00:07:02.00)
Memory usage on this node: 12.9/16.0 GiB 
Using AsyncHyperBand: num_stopped=7
Bracket: Iter 8.000: -0.06651443568989635 | Iter 4.000: -0.07145427793259912 | Iter 2.000: -0.16755537649459026 | Iter 1.000: -0.19294035070119842
Resources requested: 6.0/8 CPUs, 0/0 GPUs, 0.0/6.87 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/jannekehulsen/ray_results/train_mnist_2022-12-12_11-09-54
Number of trials: 10/10 (3 RUNNING, 7 TERMINATED)
+-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------+
| Trial name              | status     | loc             |   batch_size |          lr |      loss |   accuracy |   training_iteration |
|-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------|
| train_mnist_1fa8b_00000 | RUNNING    | 127.0.0.1:25300 |           16 | 0.057982

== Status ==
Current time: 2022-12-12 11:17:16 (running for 00:07:22.68)
Memory usage on this node: 12.8/16.0 GiB 
Using AsyncHyperBand: num_stopped=7
Bracket: Iter 8.000: -0.06651443568989635 | Iter 4.000: -0.07145427793259912 | Iter 2.000: -0.16755537649459026 | Iter 1.000: -0.19294035070119842
Resources requested: 6.0/8 CPUs, 0/0 GPUs, 0.0/6.87 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/jannekehulsen/ray_results/train_mnist_2022-12-12_11-09-54
Number of trials: 10/10 (3 RUNNING, 7 TERMINATED)
+-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------+
| Trial name              | status     | loc             |   batch_size |          lr |      loss |   accuracy |   training_iteration |
|-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------|
| train_mnist_1fa8b_00000 | RUNNING    | 127.0.0.1:25300 |           16 | 0.057982

2022-12-12 11:17:29,170	INFO tune.py:777 -- Total run time: 454.89 seconds (454.74 seconds for the tuning loop).


== Status ==
Current time: 2022-12-12 11:17:29 (running for 00:07:34.75)
Memory usage on this node: 11.3/16.0 GiB 
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 8.000: -0.06651443568989635 | Iter 4.000: -0.07145427793259912 | Iter 2.000: -0.16755537649459026 | Iter 1.000: -0.19294035070119842
Resources requested: 0/8 CPUs, 0/0 GPUs, 0.0/6.87 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/jannekehulsen/ray_results/train_mnist_2022-12-12_11-09-54
Number of trials: 10/10 (10 TERMINATED)
+-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------+
| Trial name              | status     | loc             |   batch_size |          lr |      loss |   accuracy |   training_iteration |
|-------------------------+------------+-----------------+--------------+-------------+-----------+------------+----------------------|
| train_mnist_1fa8b_00000 | TERMINATED | 127.0.0.1:25300 |           16 | 0.0579825   | 2.307

AttributeError: '_TrackedCheckpoint' object has no attribute 'value'