In [1]:
#!pip install "ray[tune]" optuna

In [2]:
import torch
import torch.optim as optim
from ray.tune.examples.mnist_pytorch import get_data_loaders, ConvNet, train, test
import optuna

In [3]:
get_data_loaders

<function ray.tune.examples.mnist_pytorch.get_data_loaders()>

In [4]:
def train_mnist(trial):
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    train_loader, test_loader = get_data_loaders()
    print(train_loader, test_loader)
    model = ConvNet().to(device)

    optimizer = optim.SGD(
       model.parameters(),
       lr=trial.suggest_loguniform("lr", 1e-4, 1e-2),
       momentum=trial.suggest_uniform("momentum", 0.1, 0.9))

    for i in range(20):
        train(model, optimizer, train_loader, device)
        acc = test(model, test_loader, device)
        trial.report(-acc, i)
    return -acc

In [5]:
import time
start = time.time()
study = optuna.create_study()
study.optimize(train_mnist, n_jobs=1, n_trials=10)
taken = time.time() - start
print(f"Time taken: {taken:.2f} seconds.")
print(f"Best config: {study.best_params}")

[32m[I 2021-03-04 09:51:51,350][0m A new study created in memory with name: no-name-041f10e5-3ed9-4fe8-a194-3deac39c8034[0m


<torch.utils.data.dataloader.DataLoader object at 0x000001BB3E138400> <torch.utils.data.dataloader.DataLoader object at 0x000001BB3E155940>


[32m[I 2021-03-04 09:52:00,277][0m Trial 0 finished with value: -0.709375 and parameters: {'lr': 0.002294876682481699, 'momentum': 0.2685036978384988}. Best is trial 0 with value: -0.709375.[0m


<torch.utils.data.dataloader.DataLoader object at 0x000001BB3E155C18> <torch.utils.data.dataloader.DataLoader object at 0x000001BB3E155A20>


[32m[I 2021-03-04 09:52:08,106][0m Trial 1 finished with value: -0.465625 and parameters: {'lr': 0.002344175318635213, 'momentum': 0.2080562577223681}. Best is trial 0 with value: -0.709375.[0m


<torch.utils.data.dataloader.DataLoader object at 0x000001BB417D2518> <torch.utils.data.dataloader.DataLoader object at 0x000001BB417D25C0>


[32m[I 2021-03-04 09:52:15,820][0m Trial 2 finished with value: -0.103125 and parameters: {'lr': 0.0002136044742818606, 'momentum': 0.4717006636618488}. Best is trial 0 with value: -0.709375.[0m


<torch.utils.data.dataloader.DataLoader object at 0x000001BB417D2860> <torch.utils.data.dataloader.DataLoader object at 0x000001BB417D2898>


[32m[I 2021-03-04 09:52:23,549][0m Trial 3 finished with value: -0.146875 and parameters: {'lr': 0.0001260149480428593, 'momentum': 0.25094043136534216}. Best is trial 0 with value: -0.709375.[0m


<torch.utils.data.dataloader.DataLoader object at 0x000001BB417D2BA8> <torch.utils.data.dataloader.DataLoader object at 0x000001BB417D2BE0>


[32m[I 2021-03-04 09:52:31,460][0m Trial 4 finished with value: -0.828125 and parameters: {'lr': 0.004268270264110353, 'momentum': 0.27595486649078077}. Best is trial 4 with value: -0.828125.[0m


<torch.utils.data.dataloader.DataLoader object at 0x000001BB417D2EF0> <torch.utils.data.dataloader.DataLoader object at 0x000001BB417D2F98>


[32m[I 2021-03-04 09:52:39,251][0m Trial 5 finished with value: -0.828125 and parameters: {'lr': 0.0018987690869440878, 'momentum': 0.7071908464243633}. Best is trial 4 with value: -0.828125.[0m


<torch.utils.data.dataloader.DataLoader object at 0x000001BB3E176278> <torch.utils.data.dataloader.DataLoader object at 0x000001BB3E1762B0>


[32m[I 2021-03-04 09:52:47,872][0m Trial 6 finished with value: -0.184375 and parameters: {'lr': 0.0002134785449996201, 'momentum': 0.2936567901481817}. Best is trial 4 with value: -0.828125.[0m


<torch.utils.data.dataloader.DataLoader object at 0x000001BB3E1765C0> <torch.utils.data.dataloader.DataLoader object at 0x000001BB3E1765F8>


[32m[I 2021-03-04 09:52:56,235][0m Trial 7 finished with value: -0.378125 and parameters: {'lr': 0.00019274151360839007, 'momentum': 0.7687159781216226}. Best is trial 4 with value: -0.828125.[0m


<torch.utils.data.dataloader.DataLoader object at 0x000001BB3E176908> <torch.utils.data.dataloader.DataLoader object at 0x000001BB3E176940>


[32m[I 2021-03-04 09:53:04,597][0m Trial 8 finished with value: -0.875 and parameters: {'lr': 0.0041249417845283754, 'momentum': 0.6337815233330691}. Best is trial 8 with value: -0.875.[0m


<torch.utils.data.dataloader.DataLoader object at 0x000001BB3E176C50> <torch.utils.data.dataloader.DataLoader object at 0x000001BB3E176C88>


[32m[I 2021-03-04 09:53:12,954][0m Trial 9 finished with value: -0.309375 and parameters: {'lr': 0.0011924877221832489, 'momentum': 0.3926131701798178}. Best is trial 8 with value: -0.875.[0m


Time taken: 81.61 seconds.
Best config: {'lr': 0.0041249417845283754, 'momentum': 0.6337815233330691}


In [1]:
import torch
import torch.optim as optim
from ray.tune.examples.mnist_pytorch import get_data_loaders, ConvNet, train, test
from ray import tune
from ray.tune.suggest.optuna import OptunaSearch


In [2]:
def train_mnist(config):
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    train_loader, test_loader = get_data_loaders()
    model = ConvNet().to(device)
    optimizer = optim.SGD(
       model.parameters(), lr=config["lr"], momentum=config["momentum"])
    for i in range(20):
        train(model, optimizer, train_loader, device)
        acc = test(model, test_loader, device)
        tune.report(mean_accuracy=acc)


In [3]:
import time
start = time.time()
analysis = tune.run(
   train_mnist,
   config={
       "lr": tune.loguniform(1e-4, 1e-2),
       "momentum": tune.uniform(0.1, 0.9),
   },
   metric="mean_accuracy",
   mode="max",
   search_alg=OptunaSearch(),
   num_samples=5)
taken = time.time() - start
print(f"Time taken: {taken:.2f} seconds.")
print(f"Best config: {analysis.best_config}")


2021-03-04 10:52:16,092	INFO services.py:1174 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
[32m[I 2021-03-04 10:52:25,417][0m A new study created in memory with name: optuna[0m
2021-03-04 10:52:28,371	ERROR syncer.py:72 -- Log sync requires rsync to be installed.


Trial name,status,loc,lr,momentum
train_mnist_9c8ed192,RUNNING,,0.000707492,0.239582


Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\ray\function_manager.py", line 496, in _load_actor_class_from_gcs
    actor_class = pickle.loads(pickled_class)
  File "C:\ProgramData\Anaconda3\lib\site-packages\ray\tune\__init__.py", line 2, in <module>
    from ray.tune.tune import run_experiments, run
  File "C:\ProgramData\Anaconda3\lib\site-packages\ray\tune\tune.py", line 13, in <module>
    from ray.tune.ray_trial_executor import RayTrialExecutor
  File "C:\ProgramData\Anaconda3\lib\site-packages\ray\tune\ray_trial_executor.py", line 17, in <module>
    from ray.tune.durable_trainable import DurableTrainable
  File "C:\ProgramData\Anaconda3\lib\site-packages\ray\tune\durable_trainable.py", line 5, in <module>
    from ray.tune.syncer import get_cloud_sync_client
  File "C:\ProgramData\Anaconda3\lib\site-packages\ray\tune\syncer.py", line 89, in <module>
    @dataclass
  File "C:\ProgramData\Anaconda3\lib\site-packages\dataclasses.py", line 95

Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\ray\function_manager.py", line 496, in _load_actor_class_from_gcs
    actor_class = pickle.loads(pickled_class)
  File "C:\ProgramData\Anaconda3\lib\site-packages\ray\tune\__init__.py", line 2, in <module>
    from ray.tune.tune import run_experiments, run
  File "C:\ProgramData\Anaconda3\lib\site-packages\ray\tune\tune.py", line 13, in <module>
    from ray.tune.ray_trial_executor import RayTrialExecutor
  File "C:\ProgramData\Anaconda3\lib\site-packages\ray\tune\ray_trial_executor.py", line 17, in <module>
    from ray.tune.durable_trainable import DurableTrainable
  File "C:\ProgramData\Anaconda3\lib\site-packages\ray\tune\durable_trainable.py", line 5, in <module>
    from ray.tune.syncer import get_cloud_sync_client
  File "C:\ProgramData\Anaconda3\lib\site-packages\ray\tune\syncer.py", line 89, in <module>
    @dataclass
  File "C:\ProgramData\Anaconda3\lib\site-packages\dataclasses.py", line 95

AttributeError: 'NoneType' object has no attribute 'get'

In [4]:
import torch
import torch.optim as optim
from ray import tune
from ray.tune.examples.mnist_pytorch import get_data_loaders, ConvNet, train, test


def train_mnist(config):
    train_loader, test_loader = get_data_loaders()
    model = ConvNet(config)
    optimizer = optim.SGD(model.parameters(), lr=config["lr"])
    for i in range(30):
        train(model, optimizer, train_loader, torch.device("cpu"))
        acc = test(model, test_loader, torch.device("cpu"))
        tune.track.log(mean_accuracy=acc)

analysis = tune.run(
    train_mnist,
    config={"lr": tune.grid_search([0.001, 0.01, 0.1])})

print("Best config: ", analysis.get_best_config(metric="mean_accuracy"))

# Get a dataframe for analyzing trial results.
df = analysis.dataframe()

Trial name,status,loc,lr
train_mnist_1c2f7_00000,RUNNING,,0.001


Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\ray\function_manager.py", line 496, in _load_actor_class_from_gcs
    actor_class = pickle.loads(pickled_class)
  File "C:\ProgramData\Anaconda3\lib\site-packages\ray\tune\__init__.py", line 2, in <module>
    from ray.tune.tune import run_experiments, run
  File "C:\ProgramData\Anaconda3\lib\site-packages\ray\tune\tune.py", line 13, in <module>
    from ray.tune.ray_trial_executor import RayTrialExecutor
  File "C:\ProgramData\Anaconda3\lib\site-packages\ray\tune\ray_trial_executor.py", line 17, in <module>
    from ray.tune.durable_trainable import DurableTrainable
  File "C:\ProgramData\Anaconda3\lib\site-packages\ray\tune\durable_trainable.py", line 5, in <module>
    from ray.tune.syncer import get_cloud_sync_client
  File "C:\ProgramData\Anaconda3\lib\site-packages\ray\tune\syncer.py", line 89, in <module>
    @dataclass
  File "C:\ProgramData\Anaconda3\lib\site-packages\dataclasses.py", line 95

[2m[36m(pid=16500)[0m Windows fatal exception: access violation
[2m[36m(pid=16500)[0m 
[2m[36m(pid=15828)[0m 2021-03-04 10:56:04,427	ERROR function_manager.py:498 -- Failed to load actor class ImplicitFunc.
[2m[36m(pid=15828)[0m Traceback (most recent call last):
[2m[36m(pid=15828)[0m   File "C:\ProgramData\Anaconda3\lib\site-packages\ray\function_manager.py", line 496, in _load_actor_class_from_gcs
[2m[36m(pid=15828)[0m     actor_class = pickle.loads(pickled_class)
[2m[36m(pid=15828)[0m   File "C:\ProgramData\Anaconda3\lib\site-packages\ray\tune\__init__.py", line 2, in <module>
[2m[36m(pid=15828)[0m     from ray.tune.tune import run_experiments, run
[2m[36m(pid=15828)[0m   File "C:\ProgramData\Anaconda3\lib\site-packages\ray\tune\tune.py", line 13, in <module>
[2m[36m(pid=15828)[0m     from ray.tune.ray_trial_executor import RayTrialExecutor
[2m[36m(pid=15828)[0m   File "C:\ProgramData\Anaconda3\lib\site-packages\ray\tune\ray_trial_executor.py", line 

2021-03-04 10:56:04,534	ERROR trial_runner.py:616 -- Trial train_mnist_1c2f7_00002: Error processing event.
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\ray\tune\trial_runner.py", line 586, in _process_trial
    results = self.trial_executor.fetch_result(trial)
  File "C:\ProgramData\Anaconda3\lib\site-packages\ray\tune\ray_trial_executor.py", line 609, in fetch_result
    result = ray.get(trial_future[0], timeout=DEFAULT_GET_TIMEOUT)
  File "C:\ProgramData\Anaconda3\lib\site-packages\ray\_private\client_mode_hook.py", line 47, in wrapper
    return func(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\lib\site-packages\ray\worker.py", line 1456, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(AttributeError): [36mray::ImplicitFunc.train_buffered()[39m (pid=24308, ip=192.168.43.112)
  File "python\ray\_raylet.pyx", line 439, in ray._raylet.execute_task
  File "C:\ProgramData\Anaconda3\lib\site-packages\ray\worker.py",

Result for train_mnist_1c2f7_00000:
  {}
  
Result for train_mnist_1c2f7_00001:
  {}
  
Result for train_mnist_1c2f7_00002:
  {}
  


[2m[36m(pid=15828)[0m Windows fatal exception: access violation
[2m[36m(pid=15828)[0m 
[2m[36m(pid=24308)[0m 2021-03-04 10:56:04,479	ERROR function_manager.py:498 -- Failed to load actor class ImplicitFunc.
[2m[36m(pid=24308)[0m Traceback (most recent call last):
[2m[36m(pid=24308)[0m   File "C:\ProgramData\Anaconda3\lib\site-packages\ray\function_manager.py", line 496, in _load_actor_class_from_gcs
[2m[36m(pid=24308)[0m     actor_class = pickle.loads(pickled_class)
[2m[36m(pid=24308)[0m   File "C:\ProgramData\Anaconda3\lib\site-packages\ray\tune\__init__.py", line 2, in <module>
[2m[36m(pid=24308)[0m     from ray.tune.tune import run_experiments, run
[2m[36m(pid=24308)[0m   File "C:\ProgramData\Anaconda3\lib\site-packages\ray\tune\tune.py", line 13, in <module>
[2m[36m(pid=24308)[0m     from ray.tune.ray_trial_executor import RayTrialExecutor
[2m[36m(pid=24308)[0m   File "C:\ProgramData\Anaconda3\lib\site-packages\ray\tune\ray_trial_executor.py", line 

Trial name,status,loc,lr
train_mnist_1c2f7_00000,ERROR,,0.001
train_mnist_1c2f7_00001,ERROR,,0.01
train_mnist_1c2f7_00002,ERROR,,0.1

Trial name,# failures,error file
train_mnist_1c2f7_00000,1,C:\Users\M1055951\ray_results\train_mnist_2021-03-04_10-56-02\train_mnist_1c2f7_00000_0_lr=0.001_2021-03-04_10-56-02\error.txt
train_mnist_1c2f7_00001,1,C:\Users\M1055951\ray_results\train_mnist_2021-03-04_10-56-02\train_mnist_1c2f7_00001_1_lr=0.01_2021-03-04_10-56-02\error.txt
train_mnist_1c2f7_00002,1,C:\Users\M1055951\ray_results\train_mnist_2021-03-04_10-56-02\train_mnist_1c2f7_00002_2_lr=0.1_2021-03-04_10-56-02\error.txt


TuneError: ('Trials did not complete', [train_mnist_1c2f7_00000, train_mnist_1c2f7_00001, train_mnist_1c2f7_00002])

[2m[36m(pid=24308)[0m Windows fatal exception: access violation
[2m[36m(pid=24308)[0m 
[2m[36m(pid=32640)[0m Windows fatal exception: access violation
[2m[36m(pid=32640)[0m 
[2m[36m(pid=22172)[0m Windows fatal exception: access violation
[2m[36m(pid=22172)[0m 
[2m[36m(pid=14788)[0m Windows fatal exception: access violation
[2m[36m(pid=14788)[0m 
[2m[36m(pid=8924)[0m Windows fatal exception: access violation
[2m[36m(pid=8924)[0m 
[2m[36m(pid=33932)[0m Windows fatal exception: access violation
[2m[36m(pid=33932)[0m 
