# Setup

In [1]:
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


In [2]:
import os
project_folder = '/gdrive/MyDrive/ProjectCIRI'
my_module_path = os.path.join(project_folder, 'code', 'ciri_utils')

In [3]:
!pip install $my_module_path

Processing /gdrive/MyDrive/ProjectCIRI/code/ciri_utils
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting torch>=2.2.2 (from ciri-utils==0.1)
  Downloading torch-2.2.2-cp310-cp310-manylinux1_x86_64.whl (755.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m755.5/755.5 MB[0m [31m1.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torchvision>=0.17.2 (from ciri-utils==0.1)
  Downloading torchvision-0.17.2-cp310-cp310-manylinux1_x86_64.whl (6.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m97.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ray[tune] (from ciri-utils==0.1)
  Downloading ray-2.10.0-cp310-cp310-manylinux2014_x86_64.whl (65.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.1/65.1 MB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=2.2.2->ciri-utils==0.1)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.

In [4]:
import ray

from ray import tune
from ciri_utils.engine_v2 import CIRI_trainer

In [5]:
# Paths for the dataset
root_base = os.path.join(project_folder, 'Incidents-subset')
root_augmented = os.path.join(project_folder, 'augmented_images')

data_folders=[root_base, root_augmented]
persistence_path = os.path.join(project_folder, 'checkpoints', 'HPT_wide_resnet_50')

# Preparation

In [6]:
selected_model="wide_resnet50_2"

In [None]:
ciri_trainer = CIRI_trainer(model=selected_model,
                            data_folders=data_folders,
                            data_prop=0.8,
                            sample_indices=0.2)

# Hyperparameter tuning

Perform hyper parameter tuning with nested cross-validation on a 20% sample of the dataset.

In [7]:
search_space={
	'epochs': tune.choice([5, 10, 20]),
	'batch_size': tune.choice([32, 64]),
	'lr': tune.loguniform(1e-4, 1e-2)
}

In [None]:
hp_nest_cv = ciri_trainer.cross_validate(
    run_name="wide_resnet50_hpt",
    config=search_space,
    outer_cv_k=5,
    inner_cv_k=3,
    tune_hyperparams=True,
    num_samples=5,
    results_persist_dir=persistence_path,
    start_fold=(4, 0)
)

Outer fold 4, inner fold 0 - number of samples: 1281
Tuning hyperparameters for wide_resnet50_hpt_outer_4_inner_0...
Defaulting to ASHA scheduler (no scheduler provided or not an instance of TrialScheduler)


  self.pid = _posixsubprocess.fork_exec(
2024-04-14 07:52:13,556	INFO worker.py:1752 -- Started a local Ray instance.
2024-04-14 07:52:14,946	INFO tune.py:263 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-04-14 07:52:14,963	INFO tune.py:622 -- [output] This will use the new output engine with verbosity 1. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+----------------------------------------------------------------------+
| Configuration for experiment     wide_resnet50_hpt_outer_4_inner_0   |
+----------------------------------------------------------------------+
| Search algorithm                 BasicVariantGenerator               |
| Scheduler                        AsyncHyperBandScheduler             |
| Number of trials                 5                                   |
+----------------------------------------------------------------------+

View detailed results here: /root/ray_results/wide_resnet50_hpt_outer_4_inner_0
To visualize your results with TensorBoard, run: `tensorboard --logdir /tmp/ray/session_2024-04-14_07-52-09_892030_20145/artifacts/2024-04-14_07-52-14/wide_resnet50_hpt_outer_4_inner_0/driver_artifacts`

Trial status: 5 PENDING
Current time: 2024-04-14 07:52:15. Total running time: 0s
Logical resource usage: 0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:V100)
+----------------------------------------------

[36m(TrainTrainable pid=20570)[0m 2024-04-14 07:52:21.506807: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(TrainTrainable pid=20570)[0m 2024-04-14 07:52:21.506873: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(TrainTrainable pid=20570)[0m 2024-04-14 07:52:21.508720: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered



Trial TorchTrainer_e8bbe_00000 started with configuration:
+------------------------------------------------------------------+
| Trial TorchTrainer_e8bbe_00000 config                            |
+------------------------------------------------------------------+
| train_loop_config/batch_size                                  64 |
| train_loop_config/epochs                                      10 |
| train_loop_config/lr                      0.00030405490309504623 |
| train_loop_config/train_test_idx            ...20, 11731, 1157]) |
+------------------------------------------------------------------+


[36m(TorchTrainer pid=20570)[0m Started distributed worker processes: 
[36m(TorchTrainer pid=20570)[0m - (ip=172.28.0.12, pid=20650) world_rank=0, local_rank=0, node_rank=0
[36m(RayTrainWorker pid=20650)[0m Setting up process group for: env:// [rank=0, world_size=1]
[36m(RayTrainWorker pid=20650)[0m 2024-04-14 07:52:29.920907: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(RayTrainWorker pid=20650)[0m 2024-04-14 07:52:29.920993: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(RayTrainWorker pid=20650)[0m 2024-04-14 07:52:29.922641: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already 


Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 07:52:45. Total running time: 30s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+----------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr |
+----------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874   |
| TorchTrainer_e8bbe_00002   PENDING                        20                       32              0.00203071  |
| TorchTrainer_e8bbe_00003   PENDING                        10                       32              0.00317889  |
| TorchTrainer_e8bbe_0

Epoch (training) 1:  14%|█▍        | 3/21 [00:13<01:08,  3.79s/it]
Epoch (training) 1:  19%|█▉        | 4/21 [00:15<00:53,  3.14s/it]
Epoch (training) 1:  24%|██▍       | 5/21 [00:18<00:50,  3.18s/it]
Epoch (training) 1:  29%|██▊       | 6/21 [00:20<00:42,  2.82s/it]
Epoch (training) 1:  33%|███▎      | 7/21 [00:22<00:36,  2.57s/it]
Epoch (training) 1:  38%|███▊      | 8/21 [00:24<00:30,  2.37s/it]
Epoch (training) 1:  43%|████▎     | 9/21 [00:27<00:30,  2.51s/it]
Epoch (training) 1:  48%|████▊     | 10/21 [00:30<00:28,  2.56s/it]
Epoch (training) 1:  52%|█████▏    | 11/21 [00:33<00:26,  2.68s/it]
Epoch (training) 1:  57%|█████▋    | 12/21 [00:36<00:25,  2.80s/it]
Epoch (training) 1:  62%|██████▏   | 13/21 [00:38<00:21,  2.70s/it]
Epoch (training) 1:  67%|██████▋   | 14/21 [00:40<00:17,  2.55s/it]


Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 07:53:15. Total running time: 1min 0s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+----------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr |
+----------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874   |
| TorchTrainer_e8bbe_00002   PENDING                        20                       32              0.00203071  |
| TorchTrainer_e8bbe_00003   PENDING                        10                       32              0.00317889  |
| TorchTrainer_e8bb

Epoch (training) 1:  71%|███████▏  | 15/21 [00:42<00:13,  2.29s/it]
Epoch (training) 1:  76%|███████▌  | 16/21 [00:44<00:11,  2.28s/it]
Epoch (training) 1:  81%|████████  | 17/21 [00:47<00:10,  2.56s/it]
Epoch (training) 1:  86%|████████▌ | 18/21 [00:51<00:08,  2.85s/it]
Epoch (training) 1:  90%|█████████ | 19/21 [00:53<00:04,  2.50s/it]
Epoch (training) 1:  95%|█████████▌| 20/21 [00:53<00:01,  1.84s/it]
Epoch (training) 1: 100%|██████████| 21/21 [00:53<00:00,  2.56s/it]
Epoch (test) 1:   0%|          | 0/11 [00:00<?, ?it/s]
Epoch (test) 1:   9%|▉         | 1/11 [00:04<00:45,  4.56s/it]
Epoch (test) 1:  18%|█▊        | 2/11 [00:07<00:29,  3.33s/it]
Epoch (test) 1:  27%|██▋       | 3/11 [00:10<00:27,  3.39s/it]
Epoch (test) 1:  36%|███▋      | 4/11 [00:13<00:22,  3.26s/it]
Epoch (test) 1:  45%|████▌     | 5/11 [00:15<00:17,  2.87s/it]


Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 07:53:45. Total running time: 1min 30s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+----------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr |
+----------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874   |
| TorchTrainer_e8bbe_00002   PENDING                        20                       32              0.00203071  |
| TorchTrainer_e8bbe_00003   PENDING                        10                       32              0.00317889  |
| TorchTrainer_e8b

Epoch (test) 1:  55%|█████▍    | 6/11 [00:18<00:14,  2.87s/it]
Epoch (test) 1:  64%|██████▎   | 7/11 [00:20<00:09,  2.43s/it]
Epoch (test) 1:  73%|███████▎  | 8/11 [00:21<00:06,  2.22s/it]
Epoch (test) 1:  82%|████████▏ | 9/11 [00:24<00:04,  2.43s/it]
Epoch (test) 1: 100%|██████████| 11/11 [00:24<00:00,  2.27s/it]
[36m(RayTrainWorker pid=20650)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_0/TorchTrainer_e8bbe_00000_0_batch_size=64,epochs=10,lr=0.0003_2024-04-14_07-52-15/checkpoint_000000)
Epoch (training) 2:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (training) 2:   5%|▍         | 1/21 [00:03<01:15,  3.79s/it]
Epoch (training) 2:  10%|▉         | 2/21 [00:05<00:49,  2.60s/it]
Epoch (training) 2:  14%|█▍        | 3/21 [00:09<00:56,  3.16s/it]
Epoch (training) 2:  19%|█▉        | 4/21 [00:11<00:46,  2.72s/it]
Epoch (training) 2:  24%|██▍       | 5/21 [00:13<00:42,  2.64s/it]
Epoch (training) 2:  29%|██▊    

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 07:54:15. Total running time: 2min 0s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055        1            93.5278   4.39592     0.110764 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874                                                  

Epoch (training) 2:  33%|███▎      | 7/21 [00:18<00:34,  2.45s/it]
Epoch (training) 2:  38%|███▊      | 8/21 [00:20<00:29,  2.31s/it]
Epoch (training) 2:  43%|████▎     | 9/21 [00:24<00:33,  2.78s/it]
Epoch (training) 2:  48%|████▊     | 10/21 [00:26<00:28,  2.59s/it]
Epoch (training) 2:  52%|█████▏    | 11/21 [00:28<00:24,  2.41s/it]
Epoch (training) 2:  57%|█████▋    | 12/21 [00:30<00:21,  2.39s/it]
Epoch (training) 2:  62%|██████▏   | 13/21 [00:33<00:20,  2.52s/it]
Epoch (training) 2:  67%|██████▋   | 14/21 [00:36<00:17,  2.49s/it]
Epoch (training) 2:  71%|███████▏  | 15/21 [00:39<00:17,  2.88s/it]
Epoch (training) 2:  76%|███████▌  | 16/21 [00:42<00:13,  2.74s/it]
Epoch (training) 2:  81%|████████  | 17/21 [00:44<00:10,  2.74s/it]


Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 07:54:45. Total running time: 2min 30s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055        1            93.5278   4.39592     0.110764 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874                                                 

Epoch (training) 2:  86%|████████▌ | 18/21 [00:48<00:08,  2.84s/it]
Epoch (training) 2:  90%|█████████ | 19/21 [00:50<00:05,  2.71s/it]
Epoch (training) 2:  95%|█████████▌| 20/21 [00:50<00:02,  2.00s/it]
Epoch (training) 2: 100%|██████████| 21/21 [00:51<00:00,  2.43s/it]
Epoch (test) 2:   0%|          | 0/11 [00:00<?, ?it/s]
Epoch (test) 2:   9%|▉         | 1/11 [00:05<00:56,  5.62s/it]
Epoch (test) 2:  18%|█▊        | 2/11 [00:08<00:34,  3.78s/it]
Epoch (test) 2:  27%|██▋       | 3/11 [00:10<00:25,  3.15s/it]
Epoch (test) 2:  36%|███▋      | 4/11 [00:12<00:20,  2.86s/it]
Epoch (test) 2:  45%|████▌     | 5/11 [00:15<00:16,  2.69s/it]
Epoch (test) 2:  55%|█████▍    | 6/11 [00:19<00:15,  3.14s/it]
Epoch (test) 2:  64%|██████▎   | 7/11 [00:20<00:10,  2.60s/it]
Epoch (test) 2:  73%|███████▎  | 8/11 [00:22<00:07,  2.34s/it]
Epoch (test) 2:  82%|████████▏ | 9/11 [00:24<00:04,  2.26s/it]
Epoch (test) 2: 100%|██████████| 11/11 [00:24<00:00,  2.26s/it]


Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 07:55:15. Total running time: 3min 0s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055        1            93.5278   4.39592     0.110764 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874                                                  

[36m(RayTrainWorker pid=20650)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_0/TorchTrainer_e8bbe_00000_0_batch_size=64,epochs=10,lr=0.0003_2024-04-14_07-52-15/checkpoint_000001)
Epoch (training) 3:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (training) 3:   5%|▍         | 1/21 [00:03<01:17,  3.86s/it]
Epoch (training) 3:  10%|▉         | 2/21 [00:05<00:50,  2.68s/it]
Epoch (training) 3:  14%|█▍        | 3/21 [00:09<00:55,  3.07s/it]
Epoch (training) 3:  19%|█▉        | 4/21 [00:11<00:47,  2.80s/it]
Epoch (training) 3:  24%|██▍       | 5/21 [00:14<00:44,  2.76s/it]
Epoch (training) 3:  29%|██▊       | 6/21 [00:16<00:36,  2.41s/it]
Epoch (training) 3:  33%|███▎      | 7/21 [00:18<00:35,  2.56s/it]


Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 07:55:45. Total running time: 3min 30s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055        2            180.349   2.42988     0.185647 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874                                                 

Epoch (training) 3:  38%|███▊      | 8/21 [00:22<00:37,  2.88s/it]
Epoch (training) 3:  43%|████▎     | 9/21 [00:24<00:30,  2.53s/it]
Epoch (training) 3:  48%|████▊     | 10/21 [00:28<00:32,  2.95s/it]
Epoch (training) 3:  52%|█████▏    | 11/21 [00:29<00:25,  2.58s/it]
Epoch (training) 3:  57%|█████▋    | 12/21 [00:31<00:21,  2.38s/it]
Epoch (training) 3:  62%|██████▏   | 13/21 [00:33<00:17,  2.14s/it]
Epoch (training) 3:  67%|██████▋   | 14/21 [00:36<00:16,  2.33s/it]
Epoch (training) 3:  71%|███████▏  | 15/21 [00:38<00:14,  2.40s/it]
Epoch (training) 3:  76%|███████▌  | 16/21 [00:42<00:14,  2.87s/it]
Epoch (training) 3:  81%|████████  | 17/21 [00:44<00:10,  2.70s/it]
Epoch (training) 3:  86%|████████▌ | 18/21 [00:47<00:07,  2.60s/it]
Epoch (training) 3:  90%|█████████ | 19/21 [00:49<00:04,  2.42s/it]
Epoch (training) 3:  95%|█████████▌| 20/21 [00:49<00:01,  1.79s/it]
Epoch (training) 3: 100%|██████████| 21/21 [00:49<00:00,  2.38s/it]
Epoch (test) 3:   0%|          | 0/11 [00:00<?, ?i

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 07:56:15. Total running time: 4min 0s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055        2            180.349   2.42988     0.185647 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874                                                  

Epoch (test) 3:   9%|▉         | 1/11 [00:05<00:58,  5.89s/it]
Epoch (test) 3:  18%|█▊        | 2/11 [00:08<00:35,  3.90s/it]
Epoch (test) 3:  27%|██▋       | 3/11 [00:10<00:25,  3.22s/it]
Epoch (test) 3:  36%|███▋      | 4/11 [00:13<00:20,  2.92s/it]
Epoch (test) 3:  45%|████▌     | 5/11 [00:15<00:15,  2.63s/it]
Epoch (test) 3:  55%|█████▍    | 6/11 [00:19<00:15,  3.13s/it]
Epoch (test) 3:  64%|██████▎   | 7/11 [00:21<00:10,  2.66s/it]
Epoch (test) 3:  73%|███████▎  | 8/11 [00:22<00:07,  2.37s/it]
Epoch (test) 3:  82%|████████▏ | 9/11 [00:24<00:04,  2.27s/it]
Epoch (test) 3: 100%|██████████| 11/11 [00:25<00:00,  2.28s/it]


Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 07:56:45. Total running time: 4min 30s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055        2            180.349   2.42988     0.185647 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874                                                 

[36m(RayTrainWorker pid=20650)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_0/TorchTrainer_e8bbe_00000_0_batch_size=64,epochs=10,lr=0.0003_2024-04-14_07-52-15/checkpoint_000002)
Epoch (training) 4:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (training) 4:   5%|▍         | 1/21 [00:04<01:35,  4.79s/it]
Epoch (training) 4:  10%|▉         | 2/21 [00:08<01:14,  3.90s/it]
Epoch (training) 4:  14%|█▍        | 3/21 [00:11<01:05,  3.65s/it]
Epoch (training) 4:  19%|█▉        | 4/21 [00:14<01:00,  3.55s/it]
Epoch (training) 4:  24%|██▍       | 5/21 [00:17<00:52,  3.26s/it]
Epoch (training) 4:  29%|██▊       | 6/21 [00:19<00:41,  2.75s/it]
Epoch (training) 4:  33%|███▎      | 7/21 [00:20<00:32,  2.33s/it]
Epoch (training) 4:  38%|███▊      | 8/21 [00:22<00:29,  2.29s/it]
Epoch (training) 4:  43%|████▎     | 9/21 [00:26<00:32,  2.71s/it]


Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 07:57:16. Total running time: 5min 0s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055        3            263.098   2.96712     0.265211 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874                                                  

Epoch (training) 4:  48%|████▊     | 10/21 [00:29<00:30,  2.73s/it]
Epoch (training) 4:  52%|█████▏    | 11/21 [00:31<00:26,  2.61s/it]
Epoch (training) 4:  57%|█████▋    | 12/21 [00:33<00:21,  2.39s/it]
Epoch (training) 4:  62%|██████▏   | 13/21 [00:36<00:19,  2.41s/it]
Epoch (training) 4:  67%|██████▋   | 14/21 [00:38<00:16,  2.29s/it]
Epoch (training) 4:  71%|███████▏  | 15/21 [00:39<00:12,  2.09s/it]
Epoch (training) 4:  76%|███████▌  | 16/21 [00:41<00:10,  2.08s/it]
Epoch (training) 4:  81%|████████  | 17/21 [00:46<00:10,  2.75s/it]
Epoch (training) 4:  86%|████████▌ | 18/21 [00:48<00:07,  2.64s/it]
Epoch (training) 4:  90%|█████████ | 19/21 [00:50<00:04,  2.36s/it]
Epoch (training) 4:  95%|█████████▌| 20/21 [00:50<00:01,  1.75s/it]
Epoch (training) 4: 100%|██████████| 21/21 [00:50<00:00,  2.41s/it]
Epoch (test) 4:   0%|          | 0/11 [00:00<?, ?it/s]
Epoch (test) 4:   9%|▉         | 1/11 [00:04<00:43,  4.38s/it]
Epoch (test) 4:  18%|█▊        | 2/11 [00:07<00:33,  3.71s/it]


Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 07:57:46. Total running time: 5min 30s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055        3            263.098   2.96712     0.265211 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874                                                 

Epoch (test) 4:  27%|██▋       | 3/11 [00:10<00:27,  3.44s/it]
Epoch (test) 4:  36%|███▋      | 4/11 [00:13<00:21,  3.04s/it]
Epoch (test) 4:  45%|████▌     | 5/11 [00:15<00:16,  2.71s/it]
Epoch (test) 4:  55%|█████▍    | 6/11 [00:18<00:13,  2.75s/it]
Epoch (test) 4:  64%|██████▎   | 7/11 [00:19<00:09,  2.33s/it]
Epoch (test) 4:  73%|███████▎  | 8/11 [00:21<00:07,  2.36s/it]
Epoch (test) 4:  82%|████████▏ | 9/11 [00:24<00:04,  2.50s/it]
Epoch (test) 4: 100%|██████████| 11/11 [00:24<00:00,  2.27s/it]
[36m(RayTrainWorker pid=20650)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_0/TorchTrainer_e8bbe_00000_0_batch_size=64,epochs=10,lr=0.0003_2024-04-14_07-52-15/checkpoint_000003)
Epoch (training) 5:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (training) 5:   5%|▍         | 1/21 [00:05<01:51,  5.58s/it]
Epoch (training) 5:  10%|▉         | 2/21 [00:08<01:18,  4.12s/it]


Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 07:58:16. Total running time: 6min 0s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055        4            342.755   2.24182     0.276131 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874                                                  

Epoch (training) 5:  14%|█▍        | 3/21 [00:12<01:09,  3.89s/it]
Epoch (training) 5:  19%|█▉        | 4/21 [00:13<00:51,  3.03s/it]
Epoch (training) 5:  24%|██▍       | 5/21 [00:16<00:43,  2.73s/it]
Epoch (training) 5:  29%|██▊       | 6/21 [00:17<00:35,  2.34s/it]
Epoch (training) 5:  33%|███▎      | 7/21 [00:19<00:29,  2.09s/it]
Epoch (training) 5:  38%|███▊      | 8/21 [00:22<00:31,  2.42s/it]
Epoch (training) 5:  43%|████▎     | 9/21 [00:26<00:36,  3.04s/it]
Epoch (training) 5:  48%|████▊     | 10/21 [00:28<00:28,  2.63s/it]
Epoch (training) 5:  52%|█████▏    | 11/21 [00:30<00:23,  2.35s/it]
Epoch (training) 5:  57%|█████▋    | 12/21 [00:33<00:22,  2.47s/it]
Epoch (training) 5:  62%|██████▏   | 13/21 [00:35<00:18,  2.36s/it]
Epoch (training) 5:  67%|██████▋   | 14/21 [00:37<00:16,  2.42s/it]


Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 07:58:46. Total running time: 6min 30s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055        4            342.755   2.24182     0.276131 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874                                                 

Epoch (training) 5:  71%|███████▏  | 15/21 [00:40<00:14,  2.45s/it]
Epoch (training) 5:  76%|███████▌  | 16/21 [00:42<00:12,  2.46s/it]
Epoch (training) 5:  81%|████████  | 17/21 [00:44<00:09,  2.32s/it]
Epoch (training) 5:  86%|████████▌ | 18/21 [00:47<00:06,  2.32s/it]
Epoch (training) 5:  90%|█████████ | 19/21 [00:48<00:04,  2.20s/it]
Epoch (training) 5:  95%|█████████▌| 20/21 [00:49<00:01,  1.64s/it]
Epoch (training) 5: 100%|██████████| 21/21 [00:49<00:00,  2.36s/it]
Epoch (test) 5:   0%|          | 0/11 [00:00<?, ?it/s]
Epoch (test) 5:   9%|▉         | 1/11 [00:05<00:57,  5.78s/it]
Epoch (test) 5:  18%|█▊        | 2/11 [00:08<00:34,  3.84s/it]
Epoch (test) 5:  27%|██▋       | 3/11 [00:10<00:25,  3.19s/it]
Epoch (test) 5:  36%|███▋      | 4/11 [00:13<00:20,  2.88s/it]
Epoch (test) 5:  45%|████▌     | 5/11 [00:15<00:15,  2.65s/it]
Epoch (test) 5:  55%|█████▍    | 6/11 [00:19<00:15,  3.13s/it]


Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 07:59:16. Total running time: 7min 1s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055        4            342.755   2.24182     0.276131 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874                                                  

Epoch (test) 5:  64%|██████▎   | 7/11 [00:20<00:10,  2.61s/it]
Epoch (test) 5:  73%|███████▎  | 8/11 [00:22<00:07,  2.35s/it]
Epoch (test) 5:  82%|████████▏ | 9/11 [00:24<00:04,  2.27s/it]
Epoch (test) 5: 100%|██████████| 11/11 [00:24<00:00,  2.27s/it]
[36m(RayTrainWorker pid=20650)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_0/TorchTrainer_e8bbe_00000_0_batch_size=64,epochs=10,lr=0.0003_2024-04-14_07-52-15/checkpoint_000004)
Epoch (training) 6:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (training) 6:   5%|▍         | 1/21 [00:03<01:11,  3.58s/it]
Epoch (training) 6:  10%|▉         | 2/21 [00:05<00:52,  2.77s/it]
Epoch (training) 6:  14%|█▍        | 3/21 [00:07<00:41,  2.33s/it]
Epoch (training) 6:  19%|█▉        | 4/21 [00:09<00:36,  2.18s/it]
Epoch (training) 6:  24%|██▍       | 5/21 [00:12<00:40,  2.52s/it]
Epoch (training) 6:  29%|██▊       | 6/21 [00:16<00:42,  2.83s/it]


Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 07:59:46. Total running time: 7min 31s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)     loss     accuracy |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055        5            425.317   2.1443     0.316693 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874                                                     

Epoch (training) 6:  33%|███▎      | 7/21 [00:20<00:45,  3.24s/it]
Epoch (training) 6:  38%|███▊      | 8/21 [00:22<00:38,  2.93s/it]
Epoch (training) 6:  43%|████▎     | 9/21 [00:25<00:34,  2.85s/it]
Epoch (training) 6:  48%|████▊     | 10/21 [00:27<00:29,  2.65s/it]
Epoch (training) 6:  52%|█████▏    | 11/21 [00:30<00:29,  2.90s/it]
Epoch (training) 6:  57%|█████▋    | 12/21 [00:32<00:23,  2.64s/it]
Epoch (training) 6:  62%|██████▏   | 13/21 [00:35<00:20,  2.50s/it]
Epoch (training) 6:  67%|██████▋   | 14/21 [00:37<00:17,  2.47s/it]
Epoch (training) 6:  71%|███████▏  | 15/21 [00:39<00:14,  2.34s/it]
Epoch (training) 6:  76%|███████▌  | 16/21 [00:42<00:12,  2.47s/it]
Epoch (training) 6:  81%|████████  | 17/21 [00:45<00:10,  2.72s/it]


Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:00:16. Total running time: 8min 1s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)     loss     accuracy |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055        5            425.317   2.1443     0.316693 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874                                                     |

Epoch (training) 6:  86%|████████▌ | 18/21 [00:47<00:07,  2.52s/it]
Epoch (training) 6:  90%|█████████ | 19/21 [00:49<00:04,  2.36s/it]
Epoch (training) 6:  95%|█████████▌| 20/21 [00:49<00:01,  1.75s/it]
Epoch (training) 6: 100%|██████████| 21/21 [00:50<00:00,  2.39s/it]
Epoch (test) 6:   0%|          | 0/11 [00:00<?, ?it/s]
Epoch (test) 6:   9%|▉         | 1/11 [00:04<00:44,  4.41s/it]
Epoch (test) 6:  18%|█▊        | 2/11 [00:07<00:34,  3.81s/it]
Epoch (test) 6:  27%|██▋       | 3/11 [00:10<00:27,  3.41s/it]
Epoch (test) 6:  36%|███▋      | 4/11 [00:13<00:21,  3.00s/it]
Epoch (test) 6:  45%|████▌     | 5/11 [00:15<00:16,  2.68s/it]
Epoch (test) 6:  55%|█████▍    | 6/11 [00:18<00:13,  2.73s/it]
Epoch (test) 6:  64%|██████▎   | 7/11 [00:19<00:09,  2.32s/it]
Epoch (test) 6:  73%|███████▎  | 8/11 [00:21<00:07,  2.37s/it]
Epoch (test) 6:  82%|████████▏ | 9/11 [00:24<00:04,  2.49s/it]
Epoch (test) 6: 100%|██████████| 11/11 [00:24<00:00,  2.26s/it]


Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:00:46. Total running time: 8min 31s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)     loss     accuracy |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055        5            425.317   2.1443     0.316693 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874                                                     

[36m(RayTrainWorker pid=20650)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_0/TorchTrainer_e8bbe_00000_0_batch_size=64,epochs=10,lr=0.0003_2024-04-14_07-52-15/checkpoint_000005)
Epoch (training) 7:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (training) 7:   5%|▍         | 1/21 [00:04<01:23,  4.19s/it]
Epoch (training) 7:  10%|▉         | 2/21 [00:08<01:15,  4.00s/it]
Epoch (training) 7:  14%|█▍        | 3/21 [00:11<01:04,  3.59s/it]
Epoch (training) 7:  19%|█▉        | 4/21 [00:13<00:52,  3.10s/it]
Epoch (training) 7:  24%|██▍       | 5/21 [00:16<00:46,  2.90s/it]
Epoch (training) 7:  29%|██▊       | 6/21 [00:17<00:37,  2.51s/it]
Epoch (training) 7:  33%|███▎      | 7/21 [00:20<00:35,  2.50s/it]
Epoch (training) 7:  38%|███▊      | 8/21 [00:23<00:33,  2.58s/it]
Epoch (training) 7:  43%|████▎     | 9/21 [00:25<00:31,  2.64s/it]


Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:01:16. Total running time: 9min 1s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055        6            504.455   2.22881     0.312012 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874                                                  

Epoch (training) 7:  48%|████▊     | 10/21 [00:27<00:26,  2.40s/it]
Epoch (training) 7:  52%|█████▏    | 11/21 [00:29<00:22,  2.27s/it]
Epoch (training) 7:  57%|█████▋    | 12/21 [00:31<00:18,  2.05s/it]
Epoch (training) 7:  62%|██████▏   | 13/21 [00:34<00:18,  2.31s/it]
Epoch (training) 7:  67%|██████▋   | 14/21 [00:36<00:16,  2.37s/it]
Epoch (training) 7:  71%|███████▏  | 15/21 [00:39<00:15,  2.50s/it]
Epoch (training) 7:  76%|███████▌  | 16/21 [00:41<00:11,  2.24s/it]
Epoch (training) 7:  81%|████████  | 17/21 [00:44<00:10,  2.56s/it]
Epoch (training) 7:  86%|████████▌ | 18/21 [00:47<00:08,  2.69s/it]
Epoch (training) 7:  90%|█████████ | 19/21 [00:49<00:05,  2.60s/it]
Epoch (training) 7:  95%|█████████▌| 20/21 [00:50<00:01,  1.92s/it]
Epoch (training) 7: 100%|██████████| 21/21 [00:50<00:00,  2.40s/it]
Epoch (test) 7:   0%|          | 0/11 [00:00<?, ?it/s]
Epoch (test) 7:   9%|▉         | 1/11 [00:05<00:54,  5.46s/it]


Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:01:46. Total running time: 9min 31s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055        6            504.455   2.22881     0.312012 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874                                                 

Epoch (test) 7:  18%|█▊        | 2/11 [00:07<00:33,  3.72s/it]
Epoch (test) 7:  27%|██▋       | 3/11 [00:10<00:24,  3.12s/it]
Epoch (test) 7:  36%|███▋      | 4/11 [00:12<00:19,  2.84s/it]
Epoch (test) 7:  45%|████▌     | 5/11 [00:15<00:16,  2.77s/it]
Epoch (test) 7:  55%|█████▍    | 6/11 [00:19<00:15,  3.12s/it]
Epoch (test) 7:  64%|██████▎   | 7/11 [00:20<00:10,  2.58s/it]
Epoch (test) 7:  73%|███████▎  | 8/11 [00:22<00:07,  2.33s/it]
Epoch (test) 7:  82%|████████▏ | 9/11 [00:24<00:04,  2.25s/it]
Epoch (test) 7: 100%|██████████| 11/11 [00:24<00:00,  2.25s/it]
[36m(RayTrainWorker pid=20650)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_0/TorchTrainer_e8bbe_00000_0_batch_size=64,epochs=10,lr=0.0003_2024-04-14_07-52-15/checkpoint_000006)
Epoch (training) 8:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (training) 8:   5%|▍         | 1/21 [00:05<01:46,  5.31s/it]
Epoch (training) 8:  10%|▉         | 2/21 [00:07

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:02:16. Total running time: 10min 1s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055        7            583.544   2.37068     0.297972 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874                                                 

Epoch (training) 8:  14%|█▍        | 3/21 [00:09<00:47,  2.64s/it]
Epoch (training) 8:  19%|█▉        | 4/21 [00:11<00:43,  2.53s/it]
Epoch (training) 8:  24%|██▍       | 5/21 [00:13<00:37,  2.36s/it]
Epoch (training) 8:  29%|██▊       | 6/21 [00:16<00:38,  2.60s/it]
Epoch (training) 8:  33%|███▎      | 7/21 [00:20<00:43,  3.07s/it]
Epoch (training) 8:  38%|███▊      | 8/21 [00:22<00:34,  2.67s/it]
Epoch (training) 8:  43%|████▎     | 9/21 [00:24<00:29,  2.49s/it]
Epoch (training) 8:  48%|████▊     | 10/21 [00:26<00:25,  2.29s/it]
Epoch (training) 8:  52%|█████▏    | 11/21 [00:29<00:25,  2.57s/it]
Epoch (training) 8:  57%|█████▋    | 12/21 [00:32<00:23,  2.62s/it]
Epoch (training) 8:  62%|██████▏   | 13/21 [00:34<00:20,  2.56s/it]
Epoch (training) 8:  67%|██████▋   | 14/21 [00:36<00:15,  2.27s/it]
Epoch (training) 8:  71%|███████▏  | 15/21 [00:38<00:12,  2.14s/it]


Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:02:46. Total running time: 10min 31s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055        7            583.544   2.37068     0.297972 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874                                                

Epoch (training) 8:  76%|███████▌  | 16/21 [00:41<00:12,  2.49s/it]
Epoch (training) 8:  81%|████████  | 17/21 [00:43<00:09,  2.38s/it]
Epoch (training) 8:  86%|████████▌ | 18/21 [00:48<00:09,  3.17s/it]
Epoch (training) 8:  90%|█████████ | 19/21 [00:50<00:05,  2.75s/it]
Epoch (training) 8:  95%|█████████▌| 20/21 [00:50<00:02,  2.02s/it]
Epoch (training) 8: 100%|██████████| 21/21 [00:50<00:00,  2.42s/it]
Epoch (test) 8:   0%|          | 0/11 [00:00<?, ?it/s]
Epoch (test) 8:   9%|▉         | 1/11 [00:04<00:43,  4.38s/it]
Epoch (test) 8:  18%|█▊        | 2/11 [00:06<00:29,  3.27s/it]
Epoch (test) 8:  27%|██▋       | 3/11 [00:10<00:25,  3.21s/it]
Epoch (test) 8:  36%|███▋      | 4/11 [00:13<00:22,  3.21s/it]
Epoch (test) 8:  45%|████▌     | 5/11 [00:15<00:16,  2.82s/it]


Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:03:16. Total running time: 11min 1s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055        7            583.544   2.37068     0.297972 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874                                                 

Epoch (test) 8:  55%|█████▍    | 6/11 [00:18<00:14,  2.84s/it]
Epoch (test) 8:  64%|██████▎   | 7/11 [00:19<00:09,  2.40s/it]
Epoch (test) 8:  73%|███████▎  | 8/11 [00:21<00:06,  2.20s/it]
Epoch (test) 8:  82%|████████▏ | 9/11 [00:24<00:04,  2.32s/it]
Epoch (test) 8: 100%|██████████| 11/11 [00:24<00:00,  2.20s/it]
[36m(RayTrainWorker pid=20650)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_0/TorchTrainer_e8bbe_00000_0_batch_size=64,epochs=10,lr=0.0003_2024-04-14_07-52-15/checkpoint_000007)
Epoch (training) 9:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (training) 9:   5%|▍         | 1/21 [00:04<01:25,  4.27s/it]
Epoch (training) 9:  10%|▉         | 2/21 [00:07<01:07,  3.54s/it]
Epoch (training) 9:  14%|█▍        | 3/21 [00:09<00:53,  2.96s/it]
Epoch (training) 9:  19%|█▉        | 4/21 [00:12<00:49,  2.93s/it]
Epoch (training) 9:  24%|██▍       | 5/21 [00:15<00:48,  3.04s/it]
Epoch (training) 9:  29%|██▊    

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:03:46. Total running time: 11min 31s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)     loss     accuracy |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055        8            662.826   2.3095     0.310452 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874                                                    

Epoch (training) 9:  33%|███▎      | 7/21 [00:20<00:39,  2.85s/it]
Epoch (training) 9:  38%|███▊      | 8/21 [00:22<00:32,  2.51s/it]
Epoch (training) 9:  43%|████▎     | 9/21 [00:24<00:27,  2.25s/it]
Epoch (training) 9:  48%|████▊     | 10/21 [00:27<00:26,  2.39s/it]
Epoch (training) 9:  52%|█████▏    | 11/21 [00:29<00:23,  2.33s/it]
Epoch (training) 9:  57%|█████▋    | 12/21 [00:30<00:18,  2.11s/it]
Epoch (training) 9:  62%|██████▏   | 13/21 [00:32<00:16,  2.05s/it]
Epoch (training) 9:  67%|██████▋   | 14/21 [00:35<00:16,  2.38s/it]
Epoch (training) 9:  71%|███████▏  | 15/21 [00:38<00:14,  2.44s/it]
Epoch (training) 9:  76%|███████▌  | 16/21 [00:41<00:13,  2.65s/it]
Epoch (training) 9:  81%|████████  | 17/21 [00:44<00:10,  2.72s/it]
Epoch (training) 9:  86%|████████▌ | 18/21 [00:47<00:08,  2.76s/it]
Epoch (training) 9:  90%|█████████ | 19/21 [00:49<00:04,  2.48s/it]


Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:04:16. Total running time: 12min 1s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)     loss     accuracy |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055        8            662.826   2.3095     0.310452 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874                                                     

Epoch (training) 9:  95%|█████████▌| 20/21 [00:49<00:01,  1.83s/it]
Epoch (training) 9: 100%|██████████| 21/21 [00:49<00:00,  2.37s/it]
Epoch (test) 9:   0%|          | 0/11 [00:00<?, ?it/s]
Epoch (test) 9:   9%|▉         | 1/11 [00:04<00:44,  4.43s/it]
Epoch (test) 9:  18%|█▊        | 2/11 [00:07<00:35,  3.90s/it]
Epoch (test) 9:  27%|██▋       | 3/11 [00:10<00:26,  3.35s/it]
Epoch (test) 9:  36%|███▋      | 4/11 [00:13<00:20,  2.98s/it]
Epoch (test) 9:  45%|████▌     | 5/11 [00:15<00:16,  2.69s/it]
Epoch (test) 9:  55%|█████▍    | 6/11 [00:18<00:13,  2.76s/it]
Epoch (test) 9:  64%|██████▎   | 7/11 [00:19<00:09,  2.42s/it]
Epoch (test) 9:  73%|███████▎  | 8/11 [00:22<00:07,  2.43s/it]
Epoch (test) 9:  82%|████████▏ | 9/11 [00:24<00:04,  2.44s/it]
Epoch (test) 9: 100%|██████████| 11/11 [00:24<00:00,  2.26s/it]
[36m(RayTrainWorker pid=20650)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_0/TorchTrainer_e8bbe_0

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:04:46. Total running time: 12min 31s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055        9            741.529   2.72982     0.294852 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874                                                

Epoch (training) 10:   5%|▍         | 1/21 [00:03<01:12,  3.64s/it]
Epoch (training) 10:  10%|▉         | 2/21 [00:05<00:50,  2.64s/it]
Epoch (training) 10:  14%|█▍        | 3/21 [00:08<00:51,  2.83s/it]
Epoch (training) 10:  19%|█▉        | 4/21 [00:11<00:48,  2.88s/it]
Epoch (training) 10:  24%|██▍       | 5/21 [00:13<00:42,  2.64s/it]
Epoch (training) 10:  29%|██▊       | 6/21 [00:16<00:38,  2.55s/it]
Epoch (training) 10:  33%|███▎      | 7/21 [00:19<00:39,  2.80s/it]
Epoch (training) 10:  38%|███▊      | 8/21 [00:23<00:40,  3.09s/it]
Epoch (training) 10:  43%|████▎     | 9/21 [00:25<00:33,  2.79s/it]
Epoch (training) 10:  48%|████▊     | 10/21 [00:27<00:28,  2.63s/it]
Epoch (training) 10:  52%|█████▏    | 11/21 [00:29<00:25,  2.53s/it]


Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:05:16. Total running time: 13min 1s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055        9            741.529   2.72982     0.294852 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874                                                 

Epoch (training) 10:  57%|█████▋    | 12/21 [00:32<00:22,  2.50s/it]
Epoch (training) 10:  62%|██████▏   | 13/21 [00:34<00:20,  2.50s/it]
Epoch (training) 10:  67%|██████▋   | 14/21 [00:37<00:17,  2.56s/it]
Epoch (training) 10:  71%|███████▏  | 15/21 [00:40<00:16,  2.83s/it]
Epoch (training) 10:  76%|███████▌  | 16/21 [00:42<00:12,  2.54s/it]
Epoch (training) 10:  81%|████████  | 17/21 [00:45<00:10,  2.61s/it]
Epoch (training) 10:  86%|████████▌ | 18/21 [00:47<00:06,  2.31s/it]
Epoch (training) 10:  90%|█████████ | 19/21 [00:49<00:04,  2.23s/it]
Epoch (training) 10:  95%|█████████▌| 20/21 [00:49<00:01,  1.66s/it]
Epoch (training) 10: 100%|██████████| 21/21 [00:49<00:00,  2.37s/it]
Epoch (test) 10:   0%|          | 0/11 [00:00<?, ?it/s]
Epoch (test) 10:   9%|▉         | 1/11 [00:05<00:54,  5.44s/it]
Epoch (test) 10:  18%|█▊        | 2/11 [00:07<00:33,  3.71s/it]
Epoch (test) 10:  27%|██▋       | 3/11 [00:10<00:24,  3.12s/it]


Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:05:46. Total running time: 13min 31s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00000   RUNNING                        10                       64              0.000304055        9            741.529   2.72982     0.294852 |
| TorchTrainer_e8bbe_00001   PENDING                        20                       32              0.0018874                                                

Epoch (test) 10:  36%|███▋      | 4/11 [00:12<00:19,  2.85s/it]
Epoch (test) 10:  45%|████▌     | 5/11 [00:15<00:16,  2.78s/it]
Epoch (test) 10:  55%|█████▍    | 6/11 [00:19<00:15,  3.13s/it]
Epoch (test) 10:  64%|██████▎   | 7/11 [00:20<00:10,  2.59s/it]
Epoch (test) 10:  73%|███████▎  | 8/11 [00:22<00:07,  2.34s/it]
Epoch (test) 10:  82%|████████▏ | 9/11 [00:24<00:04,  2.26s/it]
Epoch (test) 10: 100%|██████████| 11/11 [00:24<00:00,  2.25s/it]
[36m(RayTrainWorker pid=20650)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_0/TorchTrainer_e8bbe_00000_0_batch_size=64,epochs=10,lr=0.0003_2024-04-14_07-52-15/checkpoint_000009)



Trial TorchTrainer_e8bbe_00000 completed after 10 iterations at 2024-04-14 08:06:06. Total running time: 13min 51s
+---------------------------------------------------------------+
| Trial TorchTrainer_e8bbe_00000 result                         |
+---------------------------------------------------------------+
| checkpoint_dir_name                         checkpoint_000009 |
| time_this_iter_s                                     79.49463 |
| time_total_s                                        821.02326 |
| training_iteration                                         10 |
| accuracy                                              0.29485 |
| loss                                                  3.12129 |
| summary/epoch/0                                           1.0 |
| summary/epoch/1                                           2.0 |
| summary/epoch/2                                           3.0 |
| summary/epoch/3                                           4.0 |
| summary/epoch/4         

[36m(TorchTrainer pid=20570)[0m Started distributed worker processes: 
[36m(TorchTrainer pid=20570)[0m - (ip=172.28.0.12, pid=24142) world_rank=0, local_rank=0, node_rank=0
[36m(RayTrainWorker pid=24142)[0m Setting up process group for: env:// [rank=0, world_size=1]
[36m(RayTrainWorker pid=24142)[0m 2024-04-14 08:06:13.973409: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(RayTrainWorker pid=24142)[0m 2024-04-14 08:06:13.973462: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(RayTrainWorker pid=24142)[0m 2024-04-14 08:06:13.974891: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already 


Trial status: 1 TERMINATED | 1 RUNNING | 3 PENDING
Current time: 2024-04-14 08:06:16. Total running time: 14min 1s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00001   RUNNING                          20                       32              0.0018874                                                      |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10            

[36m(RayTrainWorker pid=24142)[0m Moving model to device: cuda:0
Epoch (training) 1:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (training) 1:   5%|▍         | 1/21 [00:08<02:44,  8.21s/it]
Epoch (training) 1:  10%|▉         | 2/21 [00:10<01:29,  4.69s/it]
Epoch (training) 1:  14%|█▍        | 3/21 [00:12<01:06,  3.67s/it]
Epoch (training) 1:  19%|█▉        | 4/21 [00:14<00:49,  2.93s/it]
Epoch (training) 1:  24%|██▍       | 5/21 [00:16<00:40,  2.51s/it]
Epoch (training) 1:  29%|██▊       | 6/21 [00:19<00:38,  2.59s/it]
Epoch (training) 1:  33%|███▎      | 7/21 [00:22<00:39,  2.80s/it]
Epoch (training) 1:  38%|███▊      | 8/21 [00:24<00:34,  2.67s/it]
Epoch (training) 1:  43%|████▎     | 9/21 [00:26<00:29,  2.42s/it]
Epoch (training) 1:  48%|████▊     | 10/21 [00:28<00:25,  2.33s/it]


Trial status: 1 TERMINATED | 1 RUNNING | 3 PENDING
Current time: 2024-04-14 08:06:47. Total running time: 14min 31s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00001   RUNNING                          20                       32              0.0018874                                                      |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10            

Epoch (training) 1:  52%|█████▏    | 11/21 [00:31<00:25,  2.57s/it]
Epoch (training) 1:  57%|█████▋    | 12/21 [00:36<00:27,  3.10s/it]
Epoch (training) 1:  62%|██████▏   | 13/21 [00:38<00:22,  2.79s/it]
Epoch (training) 1:  67%|██████▋   | 14/21 [00:41<00:20,  2.99s/it]
Epoch (training) 1:  71%|███████▏  | 15/21 [00:43<00:16,  2.71s/it]
Epoch (training) 1:  76%|███████▌  | 16/21 [00:45<00:11,  2.40s/it]
Epoch (training) 1:  81%|████████  | 17/21 [00:48<00:10,  2.67s/it]
Epoch (training) 1:  86%|████████▌ | 18/21 [00:51<00:08,  2.73s/it]
Epoch (training) 1:  90%|█████████ | 19/21 [00:54<00:05,  2.82s/it]
Epoch (training) 1:  95%|█████████▌| 20/21 [00:55<00:02,  2.07s/it]
Epoch (training) 1: 100%|██████████| 21/21 [00:55<00:00,  2.64s/it]
Epoch (test) 1:   0%|          | 0/11 [00:00<?, ?it/s]


Trial status: 1 TERMINATED | 1 RUNNING | 3 PENDING
Current time: 2024-04-14 08:07:17. Total running time: 15min 1s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00001   RUNNING                          20                       32              0.0018874                                                      |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10            8

Epoch (test) 1:   9%|▉         | 1/11 [00:04<00:44,  4.42s/it]
Epoch (test) 1:  18%|█▊        | 2/11 [00:07<00:30,  3.39s/it]
Epoch (test) 1:  27%|██▋       | 3/11 [00:10<00:27,  3.45s/it]
Epoch (test) 1:  36%|███▋      | 4/11 [00:13<00:22,  3.24s/it]
Epoch (test) 1:  45%|████▌     | 5/11 [00:15<00:17,  2.85s/it]
Epoch (test) 1:  55%|█████▍    | 6/11 [00:18<00:14,  2.86s/it]
Epoch (test) 1:  64%|██████▎   | 7/11 [00:20<00:09,  2.41s/it]
Epoch (test) 1:  73%|███████▎  | 8/11 [00:21<00:06,  2.20s/it]
Epoch (test) 1:  82%|████████▏ | 9/11 [00:24<00:04,  2.43s/it]
Epoch (test) 1: 100%|██████████| 11/11 [00:24<00:00,  2.26s/it]



Trial TorchTrainer_e8bbe_00001 completed after 1 iterations at 2024-04-14 08:07:45. Total running time: 15min 30s
+---------------------------------------------------------------+
| Trial TorchTrainer_e8bbe_00001 result                         |
+---------------------------------------------------------------+
| checkpoint_dir_name                         checkpoint_000000 |
| time_this_iter_s                                      98.6708 |
| time_total_s                                          98.6708 |
| training_iteration                                          1 |
| accuracy                                              0.10296 |
| loss                                                   4.9129 |
| summary/epoch/0                                           1.0 |
| summary/train_acc/0                       0.16315378610460576 |
| summary/train_loss/0                       2.5790883018856956 |
| summary/val_acc/0                          0.1029641185647426 |
| summary/val_loss/0       

[36m(RayTrainWorker pid=24142)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_0/TorchTrainer_e8bbe_00001_1_batch_size=32,epochs=20,lr=0.0019_2024-04-14_07-52-15/checkpoint_000000)



Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:07:47. Total running time: 15min 31s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071                                                     |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           

[36m(TorchTrainer pid=20570)[0m Started distributed worker processes: 
[36m(TorchTrainer pid=20570)[0m - (ip=172.28.0.12, pid=24730) world_rank=0, local_rank=0, node_rank=0
[36m(RayTrainWorker pid=24730)[0m Setting up process group for: env:// [rank=0, world_size=1]
[36m(RayTrainWorker pid=24730)[0m 2024-04-14 08:07:51.558442: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(RayTrainWorker pid=24730)[0m 2024-04-14 08:07:51.558511: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(RayTrainWorker pid=24730)[0m 2024-04-14 08:07:51.561533: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already 

Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:08:17. Total running time: 16min 1s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071                                                     |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           82

Epoch (training) 1:  33%|███▎      | 7/21 [00:21<00:36,  2.62s/it]
Epoch (training) 1:  38%|███▊      | 8/21 [00:24<00:37,  2.89s/it]
Epoch (training) 1:  43%|████▎     | 9/21 [00:27<00:35,  2.93s/it]
Epoch (training) 1:  48%|████▊     | 10/21 [00:29<00:28,  2.62s/it]
Epoch (training) 1:  52%|█████▏    | 11/21 [00:31<00:25,  2.51s/it]
Epoch (training) 1:  57%|█████▋    | 12/21 [00:33<00:20,  2.27s/it]
Epoch (training) 1:  62%|██████▏   | 13/21 [00:35<00:16,  2.08s/it]
Epoch (training) 1:  67%|██████▋   | 14/21 [00:37<00:13,  1.99s/it]
Epoch (training) 1:  71%|███████▏  | 15/21 [00:39<00:12,  2.03s/it]
Epoch (training) 1:  76%|███████▌  | 16/21 [00:42<00:11,  2.38s/it]
Epoch (training) 1:  81%|████████  | 17/21 [00:45<00:09,  2.48s/it]
Epoch (training) 1:  86%|████████▌ | 18/21 [00:47<00:07,  2.45s/it]


Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:08:47. Total running time: 16min 32s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071                                                     |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           8

Epoch (training) 1:  90%|█████████ | 19/21 [00:51<00:05,  2.76s/it]
Epoch (training) 1:  95%|█████████▌| 20/21 [00:51<00:02,  2.03s/it]
Epoch (training) 1: 100%|██████████| 21/21 [00:51<00:00,  2.46s/it]
Epoch (test) 1:   0%|          | 0/11 [00:00<?, ?it/s]
Epoch (test) 1:   9%|▉         | 1/11 [00:05<00:53,  5.33s/it]
Epoch (test) 1:  18%|█▊        | 2/11 [00:08<00:35,  3.98s/it]
Epoch (test) 1:  27%|██▋       | 3/11 [00:10<00:26,  3.26s/it]
Epoch (test) 1:  36%|███▋      | 4/11 [00:13<00:20,  2.93s/it]
Epoch (test) 1:  45%|████▌     | 5/11 [00:15<00:15,  2.66s/it]
Epoch (test) 1:  55%|█████▍    | 6/11 [00:18<00:14,  2.89s/it]
Epoch (test) 1:  64%|██████▎   | 7/11 [00:20<00:10,  2.60s/it]
Epoch (test) 1:  73%|███████▎  | 8/11 [00:22<00:07,  2.46s/it]
Epoch (test) 1:  82%|████████▏ | 9/11 [00:24<00:04,  2.33s/it]
Epoch (test) 1: 100%|██████████| 11/11 [00:25<00:00,  2.28s/it]
[36m(RayTrainWorker pid=24730)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/roo

Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:09:17. Total running time: 17min 2s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         1            91.4928   4.5178      0.118565 |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           82

Epoch (training) 2:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (training) 2:   5%|▍         | 1/21 [00:04<01:31,  4.57s/it]
Epoch (training) 2:  10%|▉         | 2/21 [00:07<01:11,  3.76s/it]
Epoch (training) 2:  14%|█▍        | 3/21 [00:09<00:53,  2.97s/it]
Epoch (training) 2:  19%|█▉        | 4/21 [00:12<00:47,  2.82s/it]
Epoch (training) 2:  24%|██▍       | 5/21 [00:15<00:44,  2.78s/it]
Epoch (training) 2:  29%|██▊       | 6/21 [00:17<00:37,  2.49s/it]
Epoch (training) 2:  33%|███▎      | 7/21 [00:21<00:42,  3.05s/it]
Epoch (training) 2:  38%|███▊      | 8/21 [00:24<00:40,  3.12s/it]
Epoch (training) 2:  43%|████▎     | 9/21 [00:26<00:32,  2.68s/it]
Epoch (training) 2:  48%|████▊     | 10/21 [00:28<00:27,  2.47s/it]


Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:09:47. Total running time: 17min 32s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         1            91.4928   4.5178      0.118565 |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           8

Epoch (training) 2:  52%|█████▏    | 11/21 [00:30<00:25,  2.52s/it]
Epoch (training) 2:  57%|█████▋    | 12/21 [00:32<00:20,  2.31s/it]
Epoch (training) 2:  62%|██████▏   | 13/21 [00:36<00:21,  2.74s/it]
Epoch (training) 2:  67%|██████▋   | 14/21 [00:38<00:18,  2.63s/it]
Epoch (training) 2:  71%|███████▏  | 15/21 [00:41<00:15,  2.65s/it]
Epoch (training) 2:  76%|███████▌  | 16/21 [00:43<00:12,  2.57s/it]
Epoch (training) 2:  81%|████████  | 17/21 [00:45<00:09,  2.38s/it]
Epoch (training) 2:  86%|████████▌ | 18/21 [00:47<00:06,  2.19s/it]
Epoch (training) 2:  90%|█████████ | 19/21 [00:50<00:04,  2.33s/it]
Epoch (training) 2:  95%|█████████▌| 20/21 [00:50<00:01,  1.73s/it]
Epoch (training) 2: 100%|██████████| 21/21 [00:50<00:00,  2.42s/it]
Epoch (test) 2:   0%|          | 0/11 [00:00<?, ?it/s]
Epoch (test) 2:   9%|▉         | 1/11 [00:04<00:48,  4.85s/it]
Epoch (test) 2:  18%|█▊        | 2/11 [00:07<00:31,  3.45s/it]


Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:10:17. Total running time: 18min 2s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         1            91.4928   4.5178      0.118565 |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           82

Epoch (test) 2:  27%|██▋       | 3/11 [00:09<00:23,  2.98s/it]
Epoch (test) 2:  36%|███▋      | 4/11 [00:12<00:19,  2.82s/it]
Epoch (test) 2:  45%|████▌     | 5/11 [00:15<00:17,  2.91s/it]
Epoch (test) 2:  55%|█████▍    | 6/11 [00:18<00:15,  3.04s/it]
Epoch (test) 2:  64%|██████▎   | 7/11 [00:20<00:10,  2.53s/it]
Epoch (test) 2:  73%|███████▎  | 8/11 [00:21<00:06,  2.29s/it]
Epoch (test) 2:  82%|████████▏ | 9/11 [00:24<00:04,  2.23s/it]
Epoch (test) 2: 100%|██████████| 11/11 [00:24<00:00,  2.20s/it]
[36m(RayTrainWorker pid=24730)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_0/TorchTrainer_e8bbe_00002_2_batch_size=32,epochs=20,lr=0.0020_2024-04-14_07-52-15/checkpoint_000001)
Epoch (training) 3:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (training) 3:   5%|▍         | 1/21 [00:05<01:50,  5.53s/it]


Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:10:47. Total running time: 18min 32s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         2           175.993    3.63272     0.266771 |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           8

Epoch (training) 3:  10%|▉         | 2/21 [00:07<01:05,  3.43s/it]
Epoch (training) 3:  14%|█▍        | 3/21 [00:10<00:54,  3.01s/it]
Epoch (training) 3:  19%|█▉        | 4/21 [00:13<00:51,  3.03s/it]
Epoch (training) 3:  24%|██▍       | 5/21 [00:15<00:45,  2.82s/it]
Epoch (training) 3:  29%|██▊       | 6/21 [00:18<00:40,  2.72s/it]
Epoch (training) 3:  33%|███▎      | 7/21 [00:20<00:36,  2.60s/it]
Epoch (training) 3:  38%|███▊      | 8/21 [00:23<00:34,  2.62s/it]
Epoch (training) 3:  43%|████▎     | 9/21 [00:25<00:30,  2.55s/it]
Epoch (training) 3:  48%|████▊     | 10/21 [00:28<00:30,  2.79s/it]
Epoch (training) 3:  52%|█████▏    | 11/21 [00:30<00:25,  2.51s/it]
Epoch (training) 3:  57%|█████▋    | 12/21 [00:32<00:20,  2.24s/it]
Epoch (training) 3:  62%|██████▏   | 13/21 [00:34<00:17,  2.23s/it]


Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:11:17. Total running time: 19min 2s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         2           175.993    3.63272     0.266771 |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           82

Epoch (training) 3:  67%|██████▋   | 14/21 [00:36<00:14,  2.05s/it]
Epoch (training) 3:  71%|███████▏  | 15/21 [00:38<00:13,  2.19s/it]
Epoch (training) 3:  76%|███████▌  | 16/21 [00:42<00:12,  2.56s/it]
Epoch (training) 3:  81%|████████  | 17/21 [00:44<00:10,  2.57s/it]
Epoch (training) 3:  86%|████████▌ | 18/21 [00:47<00:07,  2.57s/it]
Epoch (training) 3:  90%|█████████ | 19/21 [00:49<00:04,  2.44s/it]
Epoch (training) 3:  95%|█████████▌| 20/21 [00:49<00:01,  1.81s/it]
Epoch (training) 3: 100%|██████████| 21/21 [00:49<00:00,  2.38s/it]
Epoch (test) 3:   0%|          | 0/11 [00:00<?, ?it/s]
Epoch (test) 3:   9%|▉         | 1/11 [00:04<00:42,  4.26s/it]
Epoch (test) 3:  18%|█▊        | 2/11 [00:07<00:34,  3.80s/it]
Epoch (test) 3:  27%|██▋       | 3/11 [00:10<00:27,  3.39s/it]
Epoch (test) 3:  36%|███▋      | 4/11 [00:13<00:20,  2.99s/it]
Epoch (test) 3:  45%|████▌     | 5/11 [00:15<00:15,  2.65s/it]


Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:11:47. Total running time: 19min 32s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         2           175.993    3.63272     0.266771 |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           8

Epoch (test) 3:  55%|█████▍    | 6/11 [00:17<00:13,  2.71s/it]
Epoch (test) 3:  64%|██████▎   | 7/11 [00:19<00:09,  2.31s/it]
Epoch (test) 3:  73%|███████▎  | 8/11 [00:21<00:07,  2.37s/it]
Epoch (test) 3:  82%|████████▏ | 9/11 [00:24<00:04,  2.49s/it]
Epoch (test) 3: 100%|██████████| 11/11 [00:24<00:00,  2.25s/it]
[36m(RayTrainWorker pid=24730)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_0/TorchTrainer_e8bbe_00002_2_batch_size=32,epochs=20,lr=0.0020_2024-04-14_07-52-15/checkpoint_000002)
Epoch (training) 4:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (training) 4:   5%|▍         | 1/21 [00:03<01:13,  3.70s/it]
Epoch (training) 4:  10%|▉         | 2/21 [00:05<00:49,  2.61s/it]
Epoch (training) 4:  14%|█▍        | 3/21 [00:08<00:46,  2.59s/it]
Epoch (training) 4:  19%|█▉        | 4/21 [00:11<00:47,  2.80s/it]
Epoch (training) 4:  24%|██▍       | 5/21 [00:13<00:42,  2.65s/it]
Epoch (training) 4:  29%|██▊    

Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:12:17. Total running time: 20min 2s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         3           254.745    2.86462     0.224649 |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           82

Epoch (training) 4:  33%|███▎      | 7/21 [00:17<00:32,  2.34s/it]
Epoch (training) 4:  38%|███▊      | 8/21 [00:19<00:29,  2.25s/it]
Epoch (training) 4:  43%|████▎     | 9/21 [00:23<00:33,  2.78s/it]
Epoch (training) 4:  48%|████▊     | 10/21 [00:27<00:33,  3.01s/it]
Epoch (training) 4:  52%|█████▏    | 11/21 [00:28<00:25,  2.59s/it]
Epoch (training) 4:  57%|█████▋    | 12/21 [00:31<00:24,  2.74s/it]
Epoch (training) 4:  62%|██████▏   | 13/21 [00:34<00:20,  2.58s/it]
Epoch (training) 4:  67%|██████▋   | 14/21 [00:36<00:17,  2.51s/it]
Epoch (training) 4:  71%|███████▏  | 15/21 [00:40<00:17,  2.91s/it]
Epoch (training) 4:  76%|███████▌  | 16/21 [00:43<00:14,  2.88s/it]
Epoch (training) 4:  81%|████████  | 17/21 [00:45<00:10,  2.74s/it]


Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:12:47. Total running time: 20min 32s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         3           254.745    2.86462     0.224649 |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           8

Epoch (training) 4:  86%|████████▌ | 18/21 [00:47<00:07,  2.49s/it]
Epoch (training) 4:  90%|█████████ | 19/21 [00:49<00:04,  2.33s/it]
Epoch (training) 4:  95%|█████████▌| 20/21 [00:49<00:01,  1.73s/it]
Epoch (training) 4: 100%|██████████| 21/21 [00:49<00:00,  2.38s/it]
Epoch (test) 4:   0%|          | 0/11 [00:00<?, ?it/s]
Epoch (test) 4:   9%|▉         | 1/11 [00:05<00:58,  5.83s/it]
Epoch (test) 4:  18%|█▊        | 2/11 [00:08<00:34,  3.87s/it]
Epoch (test) 4:  27%|██▋       | 3/11 [00:10<00:25,  3.21s/it]
Epoch (test) 4:  36%|███▋      | 4/11 [00:13<00:20,  2.88s/it]
Epoch (test) 4:  45%|████▌     | 5/11 [00:15<00:15,  2.60s/it]
Epoch (test) 4:  55%|█████▍    | 6/11 [00:19<00:15,  3.17s/it]
Epoch (test) 4:  64%|██████▎   | 7/11 [00:21<00:10,  2.69s/it]
Epoch (test) 4:  73%|███████▎  | 8/11 [00:23<00:07,  2.40s/it]
Epoch (test) 4:  82%|████████▏ | 9/11 [00:25<00:04,  2.30s/it]
Epoch (test) 4:  91%|█████████ | 10/11 [00:25<00:01,  1.63s/it]
Epoch (test) 4: 100%|██████████| 11/11 [00

Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:13:17. Total running time: 21min 2s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         3           254.745    2.86462     0.224649 |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           82

[36m(RayTrainWorker pid=24730)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_0/TorchTrainer_e8bbe_00002_2_batch_size=32,epochs=20,lr=0.0020_2024-04-14_07-52-15/checkpoint_000003)
Epoch (training) 5:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (training) 5:   5%|▍         | 1/21 [00:05<01:44,  5.24s/it]
Epoch (training) 5:  10%|▉         | 2/21 [00:07<01:00,  3.20s/it]
Epoch (training) 5:  14%|█▍        | 3/21 [00:08<00:42,  2.38s/it]
Epoch (training) 5:  19%|█▉        | 4/21 [00:11<00:45,  2.70s/it]


Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:13:47. Total running time: 21min 32s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         4           349.406    2.39383     0.291732 |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           8

Epoch (training) 5:  24%|██▍       | 5/21 [00:14<00:44,  2.76s/it]
Epoch (training) 5:  29%|██▊       | 6/21 [00:16<00:37,  2.50s/it]
Epoch (training) 5:  33%|███▎      | 7/21 [00:19<00:38,  2.78s/it]
Epoch (training) 5:  38%|███▊      | 8/21 [00:21<00:32,  2.49s/it]
Epoch (training) 5:  43%|████▎     | 9/21 [00:23<00:28,  2.39s/it]
Epoch (training) 5:  48%|████▊     | 10/21 [00:26<00:27,  2.48s/it]
Epoch (training) 5:  52%|█████▏    | 11/21 [00:29<00:26,  2.63s/it]
Epoch (training) 5:  57%|█████▋    | 12/21 [00:32<00:24,  2.70s/it]
Epoch (training) 5:  62%|██████▏   | 13/21 [00:34<00:20,  2.52s/it]
Epoch (training) 5:  67%|██████▋   | 14/21 [00:36<00:16,  2.36s/it]
Epoch (training) 5:  71%|███████▏  | 15/21 [00:37<00:12,  2.10s/it]
Epoch (training) 5:  76%|███████▌  | 16/21 [00:41<00:12,  2.43s/it]


Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:14:17. Total running time: 22min 2s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         4           349.406    2.39383     0.291732 |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           82

Epoch (training) 5:  81%|████████  | 17/21 [00:44<00:11,  2.76s/it]
Epoch (training) 5:  86%|████████▌ | 18/21 [00:46<00:07,  2.49s/it]
Epoch (training) 5:  90%|█████████ | 19/21 [00:49<00:05,  2.72s/it]
Epoch (training) 5:  95%|█████████▌| 20/21 [00:50<00:01,  2.00s/it]
Epoch (training) 5: 100%|██████████| 21/21 [00:50<00:00,  2.40s/it]
Epoch (test) 5:   0%|          | 0/11 [00:00<?, ?it/s]
Epoch (test) 5:   9%|▉         | 1/11 [00:04<00:43,  4.38s/it]
Epoch (test) 5:  18%|█▊        | 2/11 [00:07<00:34,  3.81s/it]
Epoch (test) 5:  27%|██▋       | 3/11 [00:10<00:27,  3.44s/it]
Epoch (test) 5:  36%|███▋      | 4/11 [00:13<00:21,  3.05s/it]
Epoch (test) 5:  45%|████▌     | 5/11 [00:15<00:16,  2.74s/it]
Epoch (test) 5:  55%|█████▍    | 6/11 [00:18<00:14,  2.81s/it]
Epoch (test) 5:  64%|██████▎   | 7/11 [00:19<00:09,  2.41s/it]


Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:14:47. Total running time: 22min 32s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         4           349.406    2.39383     0.291732 |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           8

Epoch (test) 5:  73%|███████▎  | 8/11 [00:22<00:07,  2.43s/it]
Epoch (test) 5:  82%|████████▏ | 9/11 [00:25<00:05,  2.53s/it]
Epoch (test) 5: 100%|██████████| 11/11 [00:25<00:00,  2.30s/it]
[36m(RayTrainWorker pid=24730)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_0/TorchTrainer_e8bbe_00002_2_batch_size=32,epochs=20,lr=0.0020_2024-04-14_07-52-15/checkpoint_000004)
Epoch (training) 6:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (training) 6:   5%|▍         | 1/21 [00:07<02:25,  7.28s/it]
Epoch (training) 6:  10%|▉         | 2/21 [00:09<01:16,  4.02s/it]
Epoch (training) 6:  14%|█▍        | 3/21 [00:10<00:53,  2.95s/it]
Epoch (training) 6:  19%|█▉        | 4/21 [00:13<00:49,  2.93s/it]
Epoch (training) 6:  24%|██▍       | 5/21 [00:15<00:42,  2.64s/it]


Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:15:17. Total running time: 23min 2s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         5           434.085    2.06911     0.335413 |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           82

Epoch (training) 6:  29%|██▊       | 6/21 [00:18<00:39,  2.64s/it]
Epoch (training) 6:  33%|███▎      | 7/21 [00:21<00:41,  2.95s/it]
Epoch (training) 6:  38%|███▊      | 8/21 [00:24<00:37,  2.92s/it]
Epoch (training) 6:  43%|████▎     | 9/21 [00:26<00:30,  2.53s/it]
Epoch (training) 6:  48%|████▊     | 10/21 [00:28<00:25,  2.30s/it]
Epoch (training) 6:  52%|█████▏    | 11/21 [00:29<00:20,  2.05s/it]
Epoch (training) 6:  57%|█████▋    | 12/21 [00:33<00:22,  2.49s/it]
Epoch (training) 6:  62%|██████▏   | 13/21 [00:35<00:20,  2.55s/it]
Epoch (training) 6:  67%|██████▋   | 14/21 [00:38<00:17,  2.44s/it]
Epoch (training) 6:  71%|███████▏  | 15/21 [00:39<00:12,  2.10s/it]
Epoch (training) 6:  76%|███████▌  | 16/21 [00:42<00:11,  2.32s/it]
Epoch (training) 6:  81%|████████  | 17/21 [00:45<00:09,  2.46s/it]


Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:15:47. Total running time: 23min 32s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         5           434.085    2.06911     0.335413 |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           8

Epoch (training) 6:  86%|████████▌ | 18/21 [00:48<00:07,  2.62s/it]
Epoch (training) 6:  90%|█████████ | 19/21 [00:51<00:05,  2.83s/it]
Epoch (training) 6:  95%|█████████▌| 20/21 [00:51<00:02,  2.08s/it]
Epoch (training) 6: 100%|██████████| 21/21 [00:51<00:00,  2.47s/it]
Epoch (test) 6:   0%|          | 0/11 [00:00<?, ?it/s]
Epoch (test) 6:   9%|▉         | 1/11 [00:04<00:43,  4.35s/it]
Epoch (test) 6:  18%|█▊        | 2/11 [00:06<00:29,  3.29s/it]
Epoch (test) 6:  27%|██▋       | 3/11 [00:09<00:25,  3.18s/it]
Epoch (test) 6:  36%|███▋      | 4/11 [00:13<00:22,  3.27s/it]
Epoch (test) 6:  45%|████▌     | 5/11 [00:15<00:17,  2.88s/it]
Epoch (test) 6:  55%|█████▍    | 6/11 [00:18<00:14,  2.90s/it]
Epoch (test) 6:  64%|██████▎   | 7/11 [00:20<00:09,  2.45s/it]
Epoch (test) 6:  73%|███████▎  | 8/11 [00:21<00:06,  2.25s/it]
Epoch (test) 6:  82%|████████▏ | 9/11 [00:24<00:04,  2.29s/it]
Epoch (test) 6: 100%|██████████| 11/11 [00:24<00:00,  2.21s/it]


Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:16:17. Total running time: 24min 2s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         5           434.085    2.06911     0.335413 |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           82



Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:16:48. Total running time: 24min 33s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         5           434.085    2.06911     0.335413 |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           8

[36m(RayTrainWorker pid=24730)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_0/TorchTrainer_e8bbe_00002_2_batch_size=32,epochs=20,lr=0.0020_2024-04-14_07-52-15/checkpoint_000005)
Epoch (training) 7:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (training) 7:   5%|▍         | 1/21 [00:05<01:46,  5.35s/it]
Epoch (training) 7:  10%|▉         | 2/21 [00:07<01:08,  3.63s/it]
Epoch (training) 7:  14%|█▍        | 3/21 [00:10<00:59,  3.33s/it]
Epoch (training) 7:  19%|█▉        | 4/21 [00:14<01:00,  3.58s/it]
Epoch (training) 7:  24%|██▍       | 5/21 [00:16<00:46,  2.93s/it]
Epoch (training) 7:  29%|██▊       | 6/21 [00:18<00:37,  2.52s/it]


Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:17:18. Total running time: 25min 3s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         6           553.765    2.90402     0.268331 |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           82

Epoch (training) 7:  33%|███▎      | 7/21 [00:20<00:33,  2.36s/it]
Epoch (training) 7:  38%|███▊      | 8/21 [00:22<00:32,  2.48s/it]
Epoch (training) 7:  43%|████▎     | 9/21 [00:26<00:32,  2.67s/it]
Epoch (training) 7:  48%|████▊     | 10/21 [00:29<00:31,  2.85s/it]
Epoch (training) 7:  52%|█████▏    | 11/21 [00:31<00:26,  2.62s/it]
Epoch (training) 7:  57%|█████▋    | 12/21 [00:33<00:21,  2.38s/it]
Epoch (training) 7:  62%|██████▏   | 13/21 [00:35<00:18,  2.34s/it]
Epoch (training) 7:  67%|██████▋   | 14/21 [00:37<00:15,  2.21s/it]
Epoch (training) 7:  71%|███████▏  | 15/21 [00:39<00:12,  2.12s/it]
Epoch (training) 7:  76%|███████▌  | 16/21 [00:42<00:11,  2.34s/it]
Epoch (training) 7:  81%|████████  | 17/21 [00:44<00:09,  2.46s/it]
Epoch (training) 7:  86%|████████▌ | 18/21 [00:47<00:07,  2.47s/it]


Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:17:48. Total running time: 25min 33s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         6           553.765    2.90402     0.268331 |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           8

Epoch (training) 7:  90%|█████████ | 19/21 [00:49<00:04,  2.28s/it]
Epoch (training) 7:  95%|█████████▌| 20/21 [00:49<00:01,  1.69s/it]
Epoch (training) 7: 100%|██████████| 21/21 [00:49<00:00,  2.37s/it]
Epoch (test) 7:   0%|          | 0/11 [00:00<?, ?it/s]
Epoch (test) 7:   9%|▉         | 1/11 [00:04<00:43,  4.31s/it]
Epoch (test) 7:  18%|█▊        | 2/11 [00:07<00:33,  3.77s/it]
Epoch (test) 7:  27%|██▋       | 3/11 [00:10<00:27,  3.41s/it]
Epoch (test) 7:  36%|███▋      | 4/11 [00:13<00:21,  3.03s/it]
Epoch (test) 7:  45%|████▌     | 5/11 [00:15<00:16,  2.71s/it]
Epoch (test) 7:  55%|█████▍    | 6/11 [00:18<00:13,  2.75s/it]
Epoch (test) 7:  64%|██████▎   | 7/11 [00:19<00:09,  2.34s/it]
Epoch (test) 7:  73%|███████▎  | 8/11 [00:22<00:07,  2.39s/it]
Epoch (test) 7:  82%|████████▏ | 9/11 [00:24<00:05,  2.53s/it]
Epoch (test) 7: 100%|██████████| 11/11 [00:25<00:00,  2.28s/it]


Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:18:18. Total running time: 26min 3s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         6           553.765    2.90402     0.268331 |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           82

[36m(RayTrainWorker pid=24730)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_0/TorchTrainer_e8bbe_00002_2_batch_size=32,epochs=20,lr=0.0020_2024-04-14_07-52-15/checkpoint_000006)
Epoch (training) 8:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (training) 8:   5%|▍         | 1/21 [00:04<01:39,  5.00s/it]
Epoch (training) 8:  10%|▉         | 2/21 [00:07<01:03,  3.36s/it]
Epoch (training) 8:  14%|█▍        | 3/21 [00:09<00:51,  2.86s/it]
Epoch (training) 8:  19%|█▉        | 4/21 [00:11<00:41,  2.47s/it]
Epoch (training) 8:  24%|██▍       | 5/21 [00:13<00:40,  2.50s/it]
Epoch (training) 8:  29%|██▊       | 6/21 [00:16<00:35,  2.36s/it]
Epoch (training) 8:  33%|███▎      | 7/21 [00:18<00:35,  2.55s/it]
Epoch (training) 8:  38%|███▊      | 8/21 [00:22<00:37,  2.88s/it]
Epoch (training) 8:  43%|████▎     | 9/21 [00:24<00:31,  2.64s/it]


Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:18:48. Total running time: 26min 33s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         7           636.53     2.48992     0.308892 |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           8

Epoch (training) 8:  48%|████▊     | 10/21 [00:26<00:27,  2.52s/it]
Epoch (training) 8:  52%|█████▏    | 11/21 [00:30<00:27,  2.80s/it]
Epoch (training) 8:  57%|█████▋    | 12/21 [00:32<00:23,  2.61s/it]
Epoch (training) 8:  62%|██████▏   | 13/21 [00:34<00:19,  2.39s/it]
Epoch (training) 8:  67%|██████▋   | 14/21 [00:37<00:17,  2.53s/it]
Epoch (training) 8:  71%|███████▏  | 15/21 [00:39<00:14,  2.39s/it]
Epoch (training) 8:  76%|███████▌  | 16/21 [00:41<00:11,  2.28s/it]
Epoch (training) 8:  81%|████████  | 17/21 [00:44<00:09,  2.46s/it]
Epoch (training) 8:  86%|████████▌ | 18/21 [00:47<00:08,  2.69s/it]
Epoch (training) 8:  90%|█████████ | 19/21 [00:50<00:05,  2.70s/it]
Epoch (training) 8:  95%|█████████▌| 20/21 [00:50<00:01,  1.99s/it]
Epoch (training) 8: 100%|██████████| 21/21 [00:50<00:00,  2.41s/it]
Epoch (test) 8:   0%|          | 0/11 [00:00<?, ?it/s]
Epoch (test) 8:   9%|▉         | 1/11 [00:04<00:45,  4.51s/it]


Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:19:18. Total running time: 27min 3s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         7           636.53     2.48992     0.308892 |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           82

Epoch (test) 8:  18%|█▊        | 2/11 [00:06<00:29,  3.31s/it]
Epoch (test) 8:  27%|██▋       | 3/11 [00:09<00:23,  2.90s/it]
Epoch (test) 8:  36%|███▋      | 4/11 [00:12<00:20,  2.88s/it]
Epoch (test) 8:  45%|████▌     | 5/11 [00:15<00:17,  2.91s/it]
Epoch (test) 8:  55%|█████▍    | 6/11 [00:18<00:14,  2.94s/it]
Epoch (test) 8:  64%|██████▎   | 7/11 [00:19<00:09,  2.46s/it]
Epoch (test) 8:  73%|███████▎  | 8/11 [00:21<00:06,  2.23s/it]
Epoch (test) 8:  82%|████████▏ | 9/11 [00:23<00:04,  2.17s/it]
Epoch (test) 8:  91%|█████████ | 10/11 [00:23<00:01,  1.54s/it]
Epoch (test) 8: 100%|██████████| 11/11 [00:23<00:00,  2.14s/it]
[36m(RayTrainWorker pid=24730)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_0/TorchTrainer_e8bbe_00002_2_batch_size=32,epochs=20,lr=0.0020_2024-04-14_07-52-15/checkpoint_000007)
Epoch (training) 9:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (training) 9:   5%|▍         | 1/21 [00:04<01

Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:19:48. Total running time: 27min 33s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         8           714.927    3.03073     0.24337  |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           8

Epoch (training) 9:  14%|█▍        | 3/21 [00:09<00:49,  2.74s/it]
Epoch (training) 9:  19%|█▉        | 4/21 [00:12<00:50,  2.96s/it]
Epoch (training) 9:  24%|██▍       | 5/21 [00:14<00:41,  2.61s/it]
Epoch (training) 9:  29%|██▊       | 6/21 [00:17<00:43,  2.93s/it]
Epoch (training) 9:  33%|███▎      | 7/21 [00:19<00:34,  2.49s/it]
Epoch (training) 9:  38%|███▊      | 8/21 [00:21<00:30,  2.34s/it]
Epoch (training) 9:  43%|████▎     | 9/21 [00:23<00:27,  2.27s/it]
Epoch (training) 9:  48%|████▊     | 10/21 [00:25<00:23,  2.13s/it]
Epoch (training) 9:  52%|█████▏    | 11/21 [00:27<00:22,  2.25s/it]
Epoch (training) 9:  57%|█████▋    | 12/21 [00:30<00:21,  2.42s/it]
Epoch (training) 9:  62%|██████▏   | 13/21 [00:33<00:20,  2.53s/it]
Epoch (training) 9:  67%|██████▋   | 14/21 [00:37<00:20,  2.87s/it]


Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:20:18. Total running time: 28min 3s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         8           714.927    3.03073     0.24337  |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           82

Epoch (training) 9:  71%|███████▏  | 15/21 [00:39<00:16,  2.67s/it]
Epoch (training) 9:  76%|███████▌  | 16/21 [00:41<00:11,  2.36s/it]
Epoch (training) 9:  81%|████████  | 17/21 [00:44<00:10,  2.62s/it]
Epoch (training) 9:  86%|████████▌ | 18/21 [00:47<00:08,  2.94s/it]
Epoch (training) 9:  90%|█████████ | 19/21 [00:50<00:05,  2.76s/it]
Epoch (training) 9:  95%|█████████▌| 20/21 [00:50<00:02,  2.03s/it]
Epoch (training) 9: 100%|██████████| 21/21 [00:50<00:00,  2.42s/it]
Epoch (test) 9:   0%|          | 0/11 [00:00<?, ?it/s]
Epoch (test) 9:   9%|▉         | 1/11 [00:04<00:43,  4.37s/it]
Epoch (test) 9:  18%|█▊        | 2/11 [00:06<00:29,  3.26s/it]
Epoch (test) 9:  27%|██▋       | 3/11 [00:10<00:26,  3.32s/it]
Epoch (test) 9:  36%|███▋      | 4/11 [00:13<00:21,  3.13s/it]
Epoch (test) 9:  45%|████▌     | 5/11 [00:15<00:16,  2.77s/it]


Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:20:48. Total running time: 28min 33s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         8           714.927    3.03073     0.24337  |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           8

Epoch (test) 9:  55%|█████▍    | 6/11 [00:18<00:14,  2.80s/it]
Epoch (test) 9:  64%|██████▎   | 7/11 [00:19<00:09,  2.37s/it]
Epoch (test) 9:  73%|███████▎  | 8/11 [00:21<00:06,  2.18s/it]
Epoch (test) 9:  82%|████████▏ | 9/11 [00:24<00:04,  2.35s/it]
Epoch (test) 9: 100%|██████████| 11/11 [00:24<00:00,  2.20s/it]
[36m(RayTrainWorker pid=24730)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_0/TorchTrainer_e8bbe_00002_2_batch_size=32,epochs=20,lr=0.0020_2024-04-14_07-52-15/checkpoint_000008)
Epoch (training) 10:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (training) 10:   5%|▍         | 1/21 [00:04<01:33,  4.69s/it]
Epoch (training) 10:  10%|▉         | 2/21 [00:07<01:04,  3.40s/it]
Epoch (training) 10:  14%|█▍        | 3/21 [00:09<00:55,  3.06s/it]
Epoch (training) 10:  19%|█▉        | 4/21 [00:12<00:49,  2.90s/it]


Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:21:18. Total running time: 29min 3s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         9           799.327    2.90557     0.297972 |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           82

Epoch (training) 10:  24%|██▍       | 5/21 [00:14<00:39,  2.47s/it]
Epoch (training) 10:  29%|██▊       | 6/21 [00:16<00:35,  2.34s/it]
Epoch (training) 10:  33%|███▎      | 7/21 [00:18<00:33,  2.41s/it]
Epoch (training) 10:  38%|███▊      | 8/21 [00:22<00:38,  2.94s/it]
Epoch (training) 10:  43%|████▎     | 9/21 [00:24<00:30,  2.55s/it]
Epoch (training) 10:  48%|████▊     | 10/21 [00:27<00:30,  2.77s/it]
Epoch (training) 10:  52%|█████▏    | 11/21 [00:31<00:28,  2.89s/it]
Epoch (training) 10:  57%|█████▋    | 12/21 [00:32<00:22,  2.48s/it]
Epoch (training) 10:  62%|██████▏   | 13/21 [00:35<00:20,  2.53s/it]
Epoch (training) 10:  67%|██████▋   | 14/21 [00:38<00:18,  2.65s/it]
Epoch (training) 10:  71%|███████▏  | 15/21 [00:41<00:16,  2.77s/it]
Epoch (training) 10:  76%|███████▌  | 16/21 [00:43<00:12,  2.57s/it]


Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:21:48. Total running time: 29min 33s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         9           799.327    2.90557     0.297972 |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           8

Epoch (training) 10:  81%|████████  | 17/21 [00:45<00:09,  2.45s/it]
Epoch (training) 10:  86%|████████▌ | 18/21 [00:47<00:07,  2.36s/it]
Epoch (training) 10:  90%|█████████ | 19/21 [00:49<00:04,  2.32s/it]
Epoch (training) 10:  95%|█████████▌| 20/21 [00:50<00:01,  1.73s/it]
Epoch (training) 10: 100%|██████████| 21/21 [00:50<00:00,  2.40s/it]
Epoch (test) 10:   0%|          | 0/11 [00:00<?, ?it/s]
Epoch (test) 10:   9%|▉         | 1/11 [00:05<00:50,  5.00s/it]
Epoch (test) 10:  18%|█▊        | 2/11 [00:07<00:31,  3.52s/it]
Epoch (test) 10:  27%|██▋       | 3/11 [00:09<00:23,  2.99s/it]
Epoch (test) 10:  36%|███▋      | 4/11 [00:12<00:19,  2.75s/it]
Epoch (test) 10:  45%|████▌     | 5/11 [00:15<00:17,  2.84s/it]
Epoch (test) 10:  55%|█████▍    | 6/11 [00:18<00:15,  3.06s/it]
Epoch (test) 10:  64%|██████▎   | 7/11 [00:20<00:10,  2.54s/it]
Epoch (test) 10:  73%|███████▎  | 8/11 [00:21<00:06,  2.29s/it]


Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:22:18. Total running time: 30min 3s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00002   RUNNING                          20                       32              0.00203071         9           799.327    2.90557     0.297972 |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           82

[36m(RayTrainWorker pid=24730)[0m Epoch (test) 10:  82%|████████▏ | 9/11 [00:24<00:04,  2.22s/it]
[36m(RayTrainWorker pid=24730)[0m Epoch (test) 10:  91%|█████████ | 10/11 [00:24<00:01,  1.57s/it]Epoch (test) 10: 100%|██████████| 11/11 [00:24<00:00,  2.19s/it]
[36m(RayTrainWorker pid=24730)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_0/TorchTrainer_e8bbe_00002_2_batch_size=32,epochs=20,lr=0.0020_2024-04-14_07-52-15/checkpoint_000009)



Trial TorchTrainer_e8bbe_00002 completed after 10 iterations at 2024-04-14 08:22:45. Total running time: 30min 30s
+---------------------------------------------------------------+
| Trial TorchTrainer_e8bbe_00002 result                         |
+---------------------------------------------------------------+
| checkpoint_dir_name                         checkpoint_000009 |
| time_this_iter_s                                      99.1124 |
| time_total_s                                        898.43934 |
| training_iteration                                         10 |
| accuracy                                              0.29173 |
| loss                                                  3.82133 |
| summary/epoch/0                                           1.0 |
| summary/epoch/1                                           2.0 |
| summary/epoch/2                                           3.0 |
| summary/epoch/3                                           4.0 |
| summary/epoch/4         

[36m(TorchTrainer pid=20570)[0m Started distributed worker processes: 
[36m(TorchTrainer pid=20570)[0m - (ip=172.28.0.12, pid=28516) world_rank=0, local_rank=0, node_rank=0
[36m(RayTrainWorker pid=28516)[0m Setting up process group for: env:// [rank=0, world_size=1]
[36m(RayTrainWorker pid=28516)[0m 2024-04-14 08:22:53.322250: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(RayTrainWorker pid=28516)[0m 2024-04-14 08:22:53.322305: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(RayTrainWorker pid=28516)[0m 2024-04-14 08:22:53.323924: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already 

Trial status: 3 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2024-04-14 08:23:19. Total running time: 31min 3s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00003   RUNNING                          10                       32              0.00317889                                                     |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           82

Epoch (training) 1:  38%|███▊      | 8/21 [00:25<00:41,  3.17s/it]
Epoch (training) 1:  43%|████▎     | 9/21 [00:28<00:37,  3.11s/it]
Epoch (training) 1:  48%|████▊     | 10/21 [00:31<00:36,  3.33s/it]
Epoch (training) 1:  52%|█████▏    | 11/21 [00:34<00:30,  3.06s/it]
Epoch (training) 1:  57%|█████▋    | 12/21 [00:35<00:23,  2.60s/it]
Epoch (training) 1:  62%|██████▏   | 13/21 [00:38<00:21,  2.71s/it]
Epoch (training) 1:  67%|██████▋   | 14/21 [00:41<00:19,  2.81s/it]
Epoch (training) 1:  71%|███████▏  | 15/21 [00:43<00:15,  2.59s/it]
Epoch (training) 1:  76%|███████▌  | 16/21 [00:45<00:11,  2.31s/it]
Epoch (training) 1:  81%|████████  | 17/21 [00:48<00:10,  2.51s/it]


Trial status: 3 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2024-04-14 08:23:49. Total running time: 31min 33s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00003   RUNNING                          10                       32              0.00317889                                                     |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           8

Epoch (training) 1:  86%|████████▌ | 18/21 [00:51<00:08,  2.74s/it]
Epoch (training) 1:  90%|█████████ | 19/21 [00:53<00:04,  2.46s/it]
Epoch (training) 1:  95%|█████████▌| 20/21 [00:54<00:01,  1.82s/it]
Epoch (training) 1: 100%|██████████| 21/21 [00:54<00:00,  2.60s/it]
Epoch (test) 1:   0%|          | 0/11 [00:00<?, ?it/s]
Epoch (test) 1:   9%|▉         | 1/11 [00:05<00:51,  5.15s/it]
Epoch (test) 1:  18%|█▊        | 2/11 [00:07<00:32,  3.61s/it]
Epoch (test) 1:  27%|██▋       | 3/11 [00:10<00:24,  3.11s/it]
Epoch (test) 1:  36%|███▋      | 4/11 [00:12<00:20,  2.96s/it]
Epoch (test) 1:  45%|████▌     | 5/11 [00:16<00:18,  3.01s/it]
Epoch (test) 1:  55%|█████▍    | 6/11 [00:19<00:15,  3.06s/it]
Epoch (test) 1:  64%|██████▎   | 7/11 [00:20<00:10,  2.54s/it]
Epoch (test) 1:  73%|███████▎  | 8/11 [00:22<00:06,  2.30s/it]
Epoch (test) 1:  82%|████████▏ | 9/11 [00:24<00:04,  2.22s/it]
Epoch (test) 1: 100%|██████████| 11/11 [00:24<00:00,  2.24s/it]


Trial status: 3 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2024-04-14 08:24:19. Total running time: 32min 3s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00003   RUNNING                          10                       32              0.00317889                                                     |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           82

[36m(RayTrainWorker pid=28516)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_0/TorchTrainer_e8bbe_00003_3_batch_size=32,epochs=10,lr=0.0032_2024-04-14_07-52-15/checkpoint_000000)
[36m(TorchTrainer pid=20570)[0m Started distributed worker processes: 
[36m(TorchTrainer pid=20570)[0m - (ip=172.28.0.12, pid=28969) world_rank=0, local_rank=0, node_rank=0
[36m(RayTrainWorker pid=28969)[0m Setting up process group for: env:// [rank=0, world_size=1]
[36m(RayTrainWorker pid=28969)[0m 2024-04-14 08:24:30.491631: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(RayTrainWorker pid=28969)[0m 2024-04-14 08:24:30.491692: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has 


Trial status: 4 TERMINATED | 1 RUNNING
Current time: 2024-04-14 08:24:49. Total running time: 32min 34s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00004   RUNNING                          10                       32              0.00185077                                                     |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           821.023    3

Epoch (training) 1:  14%|█▍        | 3/21 [00:15<01:24,  4.69s/it]
Epoch (training) 1:  19%|█▉        | 4/21 [00:17<01:01,  3.59s/it]
Epoch (training) 1:  24%|██▍       | 5/21 [00:18<00:44,  2.77s/it]
Epoch (training) 1:  29%|██▊       | 6/21 [00:22<00:43,  2.90s/it]
Epoch (training) 1:  33%|███▎      | 7/21 [00:24<00:36,  2.62s/it]
Epoch (training) 1:  38%|███▊      | 8/21 [00:25<00:30,  2.35s/it]
Epoch (training) 1:  43%|████▎     | 9/21 [00:28<00:29,  2.49s/it]
Epoch (training) 1:  48%|████▊     | 10/21 [00:32<00:32,  2.95s/it]
Epoch (training) 1:  52%|█████▏    | 11/21 [00:35<00:28,  2.86s/it]
Epoch (training) 1:  57%|█████▋    | 12/21 [00:36<00:21,  2.41s/it]
Epoch (training) 1:  62%|██████▏   | 13/21 [00:38<00:18,  2.26s/it]
Epoch (training) 1:  67%|██████▋   | 14/21 [00:40<00:14,  2.13s/it]
Epoch (training) 1:  71%|███████▏  | 15/21 [00:43<00:14,  2.34s/it]


Trial status: 4 TERMINATED | 1 RUNNING
Current time: 2024-04-14 08:25:19. Total running time: 33min 4s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00004   RUNNING                          10                       32              0.00185077                                                     |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           821.023    3.1

Epoch (training) 1:  76%|███████▌  | 16/21 [00:47<00:14,  2.99s/it]
Epoch (training) 1:  81%|████████  | 17/21 [00:49<00:10,  2.67s/it]
Epoch (training) 1:  86%|████████▌ | 18/21 [00:52<00:08,  2.71s/it]
Epoch (training) 1:  90%|█████████ | 19/21 [00:55<00:05,  2.93s/it]
Epoch (training) 1:  95%|█████████▌| 20/21 [00:56<00:02,  2.15s/it]
Epoch (training) 1: 100%|██████████| 21/21 [00:56<00:00,  2.70s/it]
Epoch (test) 1:   0%|          | 0/11 [00:00<?, ?it/s]
Epoch (test) 1:   9%|▉         | 1/11 [00:05<00:59,  6.00s/it]
Epoch (test) 1:  18%|█▊        | 2/11 [00:08<00:35,  3.97s/it]
Epoch (test) 1:  27%|██▋       | 3/11 [00:11<00:26,  3.33s/it]
Epoch (test) 1:  36%|███▋      | 4/11 [00:13<00:21,  3.07s/it]
Epoch (test) 1:  45%|████▌     | 5/11 [00:16<00:17,  2.93s/it]


Trial status: 4 TERMINATED | 1 RUNNING
Current time: 2024-04-14 08:25:49. Total running time: 33min 34s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_e8bbe_00004   RUNNING                          10                       32              0.00185077                                                     |
| TorchTrainer_e8bbe_00000   TERMINATED                       10                       64              0.000304055       10           821.023    3.

Epoch (test) 1:  55%|█████▍    | 6/11 [00:20<00:16,  3.37s/it]
Epoch (test) 1:  64%|██████▎   | 7/11 [00:22<00:11,  2.77s/it]
Epoch (test) 1:  73%|███████▎  | 8/11 [00:24<00:07,  2.46s/it]
Epoch (test) 1:  82%|████████▏ | 9/11 [00:26<00:04,  2.35s/it]
Epoch (test) 1: 100%|██████████| 11/11 [00:26<00:00,  2.39s/it]
[36m(RayTrainWorker pid=28969)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_0/TorchTrainer_e8bbe_00004_4_batch_size=32,epochs=10,lr=0.0019_2024-04-14_07-52-15/checkpoint_000000)
2024-04-14 08:26:02,049	INFO tune.py:1016 -- Wrote the latest version of all result files and experiment state to '/root/ray_results/wide_resnet50_hpt_outer_4_inner_0' in 0.0399s.



Trial TorchTrainer_e8bbe_00004 completed after 1 iterations at 2024-04-14 08:26:02. Total running time: 33min 46s
+---------------------------------------------------------------+
| Trial TorchTrainer_e8bbe_00004 result                         |
+---------------------------------------------------------------+
| checkpoint_dir_name                         checkpoint_000000 |
| time_this_iter_s                                     97.13152 |
| time_total_s                                         97.13152 |
| training_iteration                                          1 |
| accuracy                                              0.11076 |
| loss                                                  5.00286 |
| summary/epoch/0                                           1.0 |
| summary/train_acc/0                       0.18188914910226386 |
| summary/train_loss/0                       2.6032531829107377 |
| summary/val_acc/0                         0.11076443057722309 |
| summary/val_loss/0       

2024-04-14 08:26:02,506	INFO tune.py:622 -- [output] This will use the new output engine with verbosity 1. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


Outer fold 4, inner fold 1 - number of samples: 1281
Tuning hyperparameters for wide_resnet50_hpt_outer_4_inner_1...
Defaulting to ASHA scheduler (no scheduler provided or not an instance of TrialScheduler)
+----------------------------------------------------------------------+
| Configuration for experiment     wide_resnet50_hpt_outer_4_inner_1   |
+----------------------------------------------------------------------+
| Search algorithm                 BasicVariantGenerator               |
| Scheduler                        AsyncHyperBandScheduler             |
| Number of trials                 5                                   |
+----------------------------------------------------------------------+

View detailed results here: /root/ray_results/wide_resnet50_hpt_outer_4_inner_1
To visualize your results with TensorBoard, run: `tensorboard --logdir /tmp/ray/session_2024-04-14_07-52-09_892030_20145/artifacts/2024-04-14_08-26-02/wide_resnet50_hpt_outer_4_inner_1/driver_artifacts

[36m(TrainTrainable pid=29424)[0m 2024-04-14 08:26:09.701034: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(TrainTrainable pid=29424)[0m 2024-04-14 08:26:09.701091: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(TrainTrainable pid=29424)[0m 2024-04-14 08:26:09.702620: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered



Trial TorchTrainer_a1400_00000 started with configuration:
+-----------------------------------------------------------------+
| Trial TorchTrainer_a1400_00000 config                           |
+-----------------------------------------------------------------+
| train_loop_config/batch_size                                 32 |
| train_loop_config/epochs                                     20 |
| train_loop_config/lr                      0.0012209522298131333 |
| train_loop_config/train_test_idx           ...30, 5553, 10803]) |
+-----------------------------------------------------------------+


[36m(TorchTrainer pid=29424)[0m Started distributed worker processes: 
[36m(TorchTrainer pid=29424)[0m - (ip=172.28.0.12, pid=29490) world_rank=0, local_rank=0, node_rank=0
[36m(RayTrainWorker pid=29490)[0m Setting up process group for: env:// [rank=0, world_size=1]
[36m(RayTrainWorker pid=29490)[0m 2024-04-14 08:26:17.190091: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(RayTrainWorker pid=29490)[0m 2024-04-14 08:26:17.190164: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(RayTrainWorker pid=29490)[0m 2024-04-14 08:26:17.191990: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already 


Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:26:33. Total running time: 30s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+----------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr |
+----------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095  |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832  |
| TorchTrainer_a1400_00002   PENDING                        10                       32              0.000704551 |
| TorchTrainer_a1400_00003   PENDING                        10                       32              0.00024569  |
| TorchTrainer_a1400_0

Epoch (training) 1:   5%|▍         | 2/41 [00:13<03:47,  5.84s/it]
Epoch (training) 1:   7%|▋         | 3/41 [00:15<02:34,  4.06s/it]
Epoch (training) 1:  10%|▉         | 4/41 [00:16<01:49,  2.96s/it]
Epoch (training) 1:  12%|█▏        | 5/41 [00:17<01:29,  2.49s/it]
Epoch (training) 1:  15%|█▍        | 6/41 [00:19<01:12,  2.07s/it]
Epoch (training) 1:  17%|█▋        | 7/41 [00:20<00:56,  1.68s/it]
Epoch (training) 1:  20%|█▉        | 8/41 [00:21<00:51,  1.56s/it]
Epoch (training) 1:  22%|██▏       | 9/41 [00:22<00:45,  1.41s/it]
Epoch (training) 1:  24%|██▍       | 10/41 [00:24<00:48,  1.57s/it]
Epoch (training) 1:  27%|██▋       | 11/41 [00:25<00:41,  1.39s/it]
Epoch (training) 1:  29%|██▉       | 12/41 [00:26<00:37,  1.31s/it]
Epoch (training) 1:  32%|███▏      | 13/41 [00:27<00:35,  1.26s/it]
Epoch (training) 1:  34%|███▍      | 14/41 [00:29<00:41,  1.54s/it]
Epoch (training) 1:  37%|███▋      | 15/41 [00:30<00:36,  1.39s/it]
Epoch (training) 1:  39%|███▉      | 16/41 [00:31<00:31,

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:27:03. Total running time: 1min 0s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+----------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr |
+----------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095  |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832  |
| TorchTrainer_a1400_00002   PENDING                        10                       32              0.000704551 |
| TorchTrainer_a1400_00003   PENDING                        10                       32              0.00024569  |
| TorchTrainer_a140

Epoch (training) 1:  56%|█████▌    | 23/41 [00:41<00:23,  1.31s/it]
Epoch (training) 1:  59%|█████▊    | 24/41 [00:43<00:27,  1.64s/it]
Epoch (training) 1:  61%|██████    | 25/41 [00:44<00:23,  1.49s/it]
Epoch (training) 1:  63%|██████▎   | 26/41 [00:45<00:18,  1.23s/it]
Epoch (training) 1:  66%|██████▌   | 27/41 [00:46<00:16,  1.17s/it]
Epoch (training) 1:  68%|██████▊   | 28/41 [00:47<00:14,  1.15s/it]
Epoch (training) 1:  71%|███████   | 29/41 [00:48<00:12,  1.06s/it]
Epoch (training) 1:  73%|███████▎  | 30/41 [00:50<00:13,  1.25s/it]
Epoch (training) 1:  76%|███████▌  | 31/41 [00:51<00:12,  1.21s/it]
Epoch (training) 1:  78%|███████▊  | 32/41 [00:52<00:10,  1.14s/it]
Epoch (training) 1:  80%|████████  | 33/41 [00:53<00:08,  1.10s/it]
Epoch (training) 1:  83%|████████▎ | 34/41 [00:54<00:07,  1.10s/it]
Epoch (training) 1:  85%|████████▌ | 35/41 [00:55<00:05,  1.01it/s]
Epoch (training) 1:  88%|████████▊ | 36/41 [00:56<00:05,  1.14s/it]
Epoch (training) 1:  90%|█████████ | 37/41 [00:5

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:27:33. Total running time: 1min 30s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+----------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr |
+----------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095  |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832  |
| TorchTrainer_a1400_00002   PENDING                        10                       32              0.000704551 |
| TorchTrainer_a1400_00003   PENDING                        10                       32              0.00024569  |
| TorchTrainer_a14

Epoch (test) 1:  38%|███▊      | 8/21 [00:11<00:19,  1.52s/it]
Epoch (test) 1:  43%|████▎     | 9/21 [00:13<00:19,  1.66s/it]
Epoch (test) 1:  48%|████▊     | 10/21 [00:15<00:19,  1.73s/it]
Epoch (test) 1:  52%|█████▏    | 11/21 [00:16<00:13,  1.38s/it]
Epoch (test) 1:  57%|█████▋    | 12/21 [00:16<00:10,  1.16s/it]
Epoch (test) 1:  62%|██████▏   | 13/21 [00:17<00:08,  1.12s/it]
Epoch (test) 1:  67%|██████▋   | 14/21 [00:18<00:07,  1.05s/it]
Epoch (test) 1:  71%|███████▏  | 15/21 [00:19<00:05,  1.06it/s]
Epoch (test) 1:  76%|███████▌  | 16/21 [00:19<00:04,  1.18it/s]
Epoch (test) 1:  81%|████████  | 17/21 [00:20<00:03,  1.19it/s]
Epoch (test) 1:  86%|████████▌ | 18/21 [00:21<00:02,  1.17it/s]
Epoch (test) 1:  90%|█████████ | 19/21 [00:22<00:01,  1.04it/s]
Epoch (test) 1: 100%|██████████| 21/21 [00:23<00:00,  1.10s/it]
[36m(RayTrainWorker pid=29490)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_1/TorchTrainer

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:28:03. Total running time: 2min 0s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095         1            107.299   20.3262     0.215289 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                 

Epoch (training) 2:   2%|▏         | 1/41 [00:04<02:57,  4.44s/it]
Epoch (training) 2:   5%|▍         | 2/41 [00:06<01:50,  2.82s/it]
Epoch (training) 2:   7%|▋         | 3/41 [00:06<01:10,  1.85s/it]
Epoch (training) 2:  10%|▉         | 4/41 [00:07<00:58,  1.57s/it]
Epoch (training) 2:  12%|█▏        | 5/41 [00:09<00:49,  1.38s/it]
Epoch (training) 2:  15%|█▍        | 6/41 [00:09<00:40,  1.17s/it]
Epoch (training) 2:  17%|█▋        | 7/41 [00:10<00:38,  1.13s/it]
Epoch (training) 2:  20%|█▉        | 8/41 [00:12<00:44,  1.35s/it]
Epoch (training) 2:  22%|██▏       | 9/41 [00:13<00:37,  1.17s/it]
Epoch (training) 2:  24%|██▍       | 10/41 [00:14<00:32,  1.05s/it]
Epoch (training) 2:  27%|██▋       | 11/41 [00:15<00:36,  1.23s/it]
Epoch (training) 2:  29%|██▉       | 12/41 [00:17<00:38,  1.33s/it]
Epoch (training) 2:  32%|███▏      | 13/41 [00:18<00:33,  1.21s/it]
Epoch (training) 2:  34%|███▍      | 14/41 [00:19<00:35,  1.32s/it]
Epoch (training) 2:  37%|███▋      | 15/41 [00:20<00:32, 

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:28:33. Total running time: 2min 30s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095         1            107.299   20.3262     0.215289 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                

Epoch (training) 2:  61%|██████    | 25/41 [00:34<00:20,  1.29s/it]
Epoch (training) 2:  63%|██████▎   | 26/41 [00:35<00:18,  1.27s/it]
Epoch (training) 2:  66%|██████▌   | 27/41 [00:36<00:15,  1.13s/it]
Epoch (training) 2:  68%|██████▊   | 28/41 [00:38<00:18,  1.42s/it]
Epoch (training) 2:  71%|███████   | 29/41 [00:39<00:16,  1.37s/it]
Epoch (training) 2:  73%|███████▎  | 30/41 [00:41<00:15,  1.43s/it]
Epoch (training) 2:  76%|███████▌  | 31/41 [00:42<00:12,  1.25s/it]
Epoch (training) 2:  78%|███████▊  | 32/41 [00:42<00:10,  1.12s/it]
Epoch (training) 2:  80%|████████  | 33/41 [00:43<00:08,  1.06s/it]
Epoch (training) 2:  83%|████████▎ | 34/41 [00:45<00:08,  1.17s/it]
Epoch (training) 2:  85%|████████▌ | 35/41 [00:46<00:06,  1.11s/it]
Epoch (training) 2:  88%|████████▊ | 36/41 [00:48<00:06,  1.37s/it]
Epoch (training) 2:  90%|█████████ | 37/41 [00:50<00:06,  1.68s/it]
Epoch (training) 2:  93%|█████████▎| 38/41 [00:51<00:04,  1.59s/it]
Epoch (training) 2:  95%|█████████▌| 39/41 [00:5

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:29:03. Total running time: 3min 0s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095         1            107.299   20.3262     0.215289 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                 

Epoch (test) 2:  33%|███▎      | 7/21 [00:10<00:18,  1.31s/it]
Epoch (test) 2:  38%|███▊      | 8/21 [00:11<00:18,  1.45s/it]
Epoch (test) 2:  43%|████▎     | 9/21 [00:13<00:17,  1.43s/it]
Epoch (test) 2:  48%|████▊     | 10/21 [00:15<00:16,  1.54s/it]
Epoch (test) 2:  52%|█████▏    | 11/21 [00:15<00:12,  1.25s/it]
Epoch (test) 2:  57%|█████▋    | 12/21 [00:16<00:09,  1.07s/it]
Epoch (test) 2:  62%|██████▏   | 13/21 [00:17<00:08,  1.04s/it]
Epoch (test) 2:  67%|██████▋   | 14/21 [00:18<00:06,  1.00it/s]
Epoch (test) 2:  71%|███████▏  | 15/21 [00:18<00:05,  1.10it/s]
Epoch (test) 2:  76%|███████▌  | 16/21 [00:19<00:04,  1.22it/s]
Epoch (test) 2:  81%|████████  | 17/21 [00:20<00:03,  1.21it/s]
Epoch (test) 2:  86%|████████▌ | 18/21 [00:21<00:02,  1.03it/s]
Epoch (test) 2:  90%|█████████ | 19/21 [00:23<00:02,  1.23s/it]
Epoch (test) 2: 100%|██████████| 21/21 [00:23<00:00,  1.13s/it]
[36m(RayTrainWorker pid=29490)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:29:33. Total running time: 3min 30s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095         2            200.452   4.43476     0.198128 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                

Epoch (training) 3:   2%|▏         | 1/41 [00:02<01:25,  2.13s/it]
Epoch (training) 3:   5%|▍         | 2/41 [00:03<01:01,  1.57s/it]
Epoch (training) 3:   7%|▋         | 3/41 [00:04<00:46,  1.23s/it]
Epoch (training) 3:  10%|▉         | 4/41 [00:04<00:38,  1.05s/it]
Epoch (training) 3:  12%|█▏        | 5/41 [00:06<00:41,  1.17s/it]
Epoch (training) 3:  15%|█▍        | 6/41 [00:07<00:36,  1.04s/it]
Epoch (training) 3:  17%|█▋        | 7/41 [00:08<00:36,  1.06s/it]
Epoch (training) 3:  20%|█▉        | 8/41 [00:10<00:43,  1.32s/it]
Epoch (training) 3:  22%|██▏       | 9/41 [00:11<00:40,  1.28s/it]
Epoch (training) 3:  24%|██▍       | 10/41 [00:12<00:40,  1.30s/it]
Epoch (training) 3:  27%|██▋       | 11/41 [00:14<00:43,  1.47s/it]
Epoch (training) 3:  29%|██▉       | 12/41 [00:15<00:40,  1.38s/it]
Epoch (training) 3:  32%|███▏      | 13/41 [00:16<00:38,  1.36s/it]
Epoch (training) 3:  34%|███▍      | 14/41 [00:17<00:33,  1.23s/it]
Epoch (training) 3:  37%|███▋      | 15/41 [00:19<00:33, 

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:30:03. Total running time: 4min 0s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095         2            200.452   4.43476     0.198128 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                 

Epoch (training) 3:  59%|█████▊    | 24/41 [00:32<00:25,  1.51s/it]
Epoch (training) 3:  61%|██████    | 25/41 [00:34<00:26,  1.67s/it]
Epoch (training) 3:  63%|██████▎   | 26/41 [00:35<00:21,  1.45s/it]
Epoch (training) 3:  66%|██████▌   | 27/41 [00:36<00:19,  1.40s/it]
Epoch (training) 3:  68%|██████▊   | 28/41 [00:37<00:16,  1.24s/it]
Epoch (training) 3:  71%|███████   | 29/41 [00:38<00:14,  1.17s/it]
Epoch (training) 3:  73%|███████▎  | 30/41 [00:40<00:14,  1.28s/it]
Epoch (training) 3:  76%|███████▌  | 31/41 [00:41<00:12,  1.26s/it]
Epoch (training) 3:  78%|███████▊  | 32/41 [00:43<00:14,  1.60s/it]
Epoch (training) 3:  80%|████████  | 33/41 [00:45<00:12,  1.54s/it]
Epoch (training) 3:  83%|████████▎ | 34/41 [00:46<00:09,  1.41s/it]
Epoch (training) 3:  85%|████████▌ | 35/41 [00:47<00:08,  1.36s/it]
Epoch (training) 3:  88%|████████▊ | 36/41 [00:48<00:05,  1.18s/it]
Epoch (training) 3:  90%|█████████ | 37/41 [00:48<00:04,  1.09s/it]
Epoch (training) 3:  93%|█████████▎| 38/41 [00:5

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:30:33. Total running time: 4min 30s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095         2            200.452   4.43476     0.198128 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                

Epoch (test) 3:  33%|███▎      | 7/21 [00:10<00:15,  1.13s/it]
Epoch (test) 3:  38%|███▊      | 8/21 [00:12<00:17,  1.33s/it]
Epoch (test) 3:  43%|████▎     | 9/21 [00:13<00:16,  1.34s/it]
Epoch (test) 3:  48%|████▊     | 10/21 [00:15<00:16,  1.48s/it]
Epoch (test) 3:  52%|█████▏    | 11/21 [00:15<00:11,  1.20s/it]
Epoch (test) 3:  57%|█████▋    | 12/21 [00:16<00:09,  1.04s/it]
Epoch (test) 3:  62%|██████▏   | 13/21 [00:17<00:08,  1.02s/it]
Epoch (test) 3:  67%|██████▋   | 14/21 [00:18<00:07,  1.10s/it]
Epoch (test) 3:  71%|███████▏  | 15/21 [00:19<00:06,  1.05s/it]
Epoch (test) 3:  76%|███████▌  | 16/21 [00:20<00:04,  1.01it/s]
Epoch (test) 3:  81%|████████  | 17/21 [00:21<00:04,  1.04s/it]
Epoch (test) 3:  86%|████████▌ | 18/21 [00:22<00:03,  1.06s/it]
Epoch (test) 3:  90%|█████████ | 19/21 [00:24<00:02,  1.12s/it]
Epoch (test) 3: 100%|██████████| 21/21 [00:24<00:00,  1.15s/it]
[36m(RayTrainWorker pid=29490)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:31:03. Total running time: 5min 1s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095         3            280.012   5.36873     0.235569 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                 

Epoch (training) 4:  22%|██▏       | 9/41 [00:12<00:34,  1.07s/it]
Epoch (training) 4:  24%|██▍       | 10/41 [00:13<00:37,  1.19s/it]
Epoch (training) 4:  27%|██▋       | 11/41 [00:16<00:45,  1.51s/it]
Epoch (training) 4:  29%|██▉       | 12/41 [00:16<00:37,  1.29s/it]
Epoch (training) 4:  32%|███▏      | 13/41 [00:18<00:35,  1.26s/it]
Epoch (training) 4:  34%|███▍      | 14/41 [00:19<00:39,  1.45s/it]
Epoch (training) 4:  37%|███▋      | 15/41 [00:21<00:37,  1.43s/it]
Epoch (training) 4:  39%|███▉      | 16/41 [00:23<00:38,  1.53s/it]
Epoch (training) 4:  41%|████▏     | 17/41 [00:23<00:32,  1.34s/it]
Epoch (training) 4:  44%|████▍     | 18/41 [00:25<00:30,  1.31s/it]
Epoch (training) 4:  46%|████▋     | 19/41 [00:26<00:25,  1.18s/it]
Epoch (training) 4:  49%|████▉     | 20/41 [00:27<00:24,  1.14s/it]
Epoch (training) 4:  51%|█████     | 21/41 [00:28<00:24,  1.24s/it]
Epoch (training) 4:  54%|█████▎    | 22/41 [00:29<00:22,  1.19s/it]
Epoch (training) 4:  56%|█████▌    | 23/41 [00:30

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:31:33. Total running time: 5min 31s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095         3            280.012   5.36873     0.235569 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                

Epoch (training) 4:  76%|███████▌  | 31/41 [00:41<00:12,  1.20s/it]
Epoch (training) 4:  78%|███████▊  | 32/41 [00:43<00:10,  1.19s/it]
Epoch (training) 4:  80%|████████  | 33/41 [00:44<00:08,  1.09s/it]
Epoch (training) 4:  83%|████████▎ | 34/41 [00:44<00:06,  1.04it/s]
Epoch (training) 4:  85%|████████▌ | 35/41 [00:45<00:05,  1.02it/s]
Epoch (training) 4:  88%|████████▊ | 36/41 [00:47<00:05,  1.17s/it]
Epoch (training) 4:  90%|█████████ | 37/41 [00:48<00:04,  1.09s/it]
Epoch (training) 4:  93%|█████████▎| 38/41 [00:51<00:04,  1.64s/it]
Epoch (training) 4:  95%|█████████▌| 39/41 [00:52<00:03,  1.63s/it]
Epoch (training) 4:  98%|█████████▊| 40/41 [00:52<00:01,  1.20s/it]
Epoch (training) 4: 100%|██████████| 41/41 [00:53<00:00,  1.29s/it]
Epoch (test) 4:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (test) 4:   5%|▍         | 1/21 [00:02<00:48,  2.42s/it]
Epoch (test) 4:  10%|▉         | 2/21 [00:03<00:35,  1.88s/it]
Epoch (test) 4:  14%|█▍        | 3/21 [00:05<00:32,  1.80s/it]
Epoch (t

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:32:03. Total running time: 6min 1s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095         3            280.012   5.36873     0.235569 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                 

Epoch (test) 4:  71%|███████▏  | 15/21 [00:19<00:05,  1.06it/s]
Epoch (test) 4:  76%|███████▌  | 16/21 [00:19<00:04,  1.19it/s]
Epoch (test) 4:  81%|████████  | 17/21 [00:20<00:03,  1.19it/s]
Epoch (test) 4:  86%|████████▌ | 18/21 [00:21<00:02,  1.16it/s]
Epoch (test) 4:  90%|█████████ | 19/21 [00:22<00:01,  1.02it/s]
Epoch (test) 4: 100%|██████████| 21/21 [00:22<00:00,  1.09s/it]
[36m(RayTrainWorker pid=29490)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_1/TorchTrainer_a1400_00000_0_batch_size=32,epochs=20,lr=0.0012_2024-04-14_08-26-02/checkpoint_000003)
Epoch (training) 5:   0%|          | 0/41 [00:00<?, ?it/s]
Epoch (training) 5:   2%|▏         | 1/41 [00:01<01:10,  1.76s/it]
Epoch (training) 5:   5%|▍         | 2/41 [00:03<00:57,  1.47s/it]
Epoch (training) 5:   7%|▋         | 3/41 [00:03<00:45,  1.20s/it]
Epoch (training) 5:  10%|▉         | 4/41 [00:04<00:39,  1.07s/it]
Epoch (training) 5:  12%|█▏    

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:32:33. Total running time: 6min 31s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095         4            360.011   2.13563      0.25429 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                

Epoch (training) 5:  44%|████▍     | 18/41 [00:22<00:30,  1.32s/it]
Epoch (training) 5:  46%|████▋     | 19/41 [00:24<00:33,  1.52s/it]
Epoch (training) 5:  49%|████▉     | 20/41 [00:25<00:26,  1.26s/it]
Epoch (training) 5:  51%|█████     | 21/41 [00:26<00:25,  1.25s/it]
Epoch (training) 5:  54%|█████▎    | 22/41 [00:27<00:21,  1.13s/it]
Epoch (training) 5:  56%|█████▌    | 23/41 [00:30<00:29,  1.61s/it]
Epoch (training) 5:  59%|█████▊    | 24/41 [00:31<00:27,  1.59s/it]
Epoch (training) 5:  61%|██████    | 25/41 [00:33<00:24,  1.51s/it]
Epoch (training) 5:  63%|██████▎   | 26/41 [00:34<00:23,  1.58s/it]
Epoch (training) 5:  66%|██████▌   | 27/41 [00:35<00:20,  1.43s/it]
Epoch (training) 5:  68%|██████▊   | 28/41 [00:37<00:18,  1.44s/it]
Epoch (training) 5:  71%|███████   | 29/41 [00:38<00:15,  1.33s/it]
Epoch (training) 5:  73%|███████▎  | 30/41 [00:40<00:15,  1.45s/it]
Epoch (training) 5:  76%|███████▌  | 31/41 [00:41<00:13,  1.39s/it]
Epoch (training) 5:  78%|███████▊  | 32/41 [00:4

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:33:03. Total running time: 7min 1s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095         4            360.011   2.13563      0.25429 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                 

Epoch (training) 5:  95%|█████████▌| 39/41 [00:51<00:02,  1.28s/it]
Epoch (training) 5:  98%|█████████▊| 40/41 [00:52<00:00,  1.05it/s]
Epoch (training) 5: 100%|██████████| 41/41 [00:52<00:00,  1.27s/it]
Epoch (test) 5:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (test) 5:   5%|▍         | 1/21 [00:02<00:47,  2.36s/it]
Epoch (test) 5:  10%|▉         | 2/21 [00:03<00:35,  1.85s/it]
Epoch (test) 5:  14%|█▍        | 3/21 [00:05<00:35,  1.96s/it]
Epoch (test) 5:  19%|█▉        | 4/21 [00:06<00:26,  1.56s/it]
Epoch (test) 5:  24%|██▍       | 5/21 [00:08<00:24,  1.55s/it]
Epoch (test) 5:  29%|██▊       | 6/21 [00:09<00:20,  1.37s/it]
Epoch (test) 5:  33%|███▎      | 7/21 [00:10<00:16,  1.20s/it]
Epoch (test) 5:  38%|███▊      | 8/21 [00:12<00:17,  1.38s/it]
Epoch (test) 5:  43%|████▎     | 9/21 [00:13<00:16,  1.38s/it]
Epoch (test) 5:  48%|████▊     | 10/21 [00:15<00:16,  1.51s/it]
Epoch (test) 5:  52%|█████▏    | 11/21 [00:15<00:12,  1.22s/it]
Epoch (test) 5:  57%|█████▋    | 12/21 [00:16<

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:33:33. Total running time: 7min 31s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095         5            440.725   2.27055     0.216849 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                

Epoch (training) 6:   2%|▏         | 1/41 [00:02<01:28,  2.20s/it]
Epoch (training) 6:   5%|▍         | 2/41 [00:03<01:00,  1.56s/it]
Epoch (training) 6:   7%|▋         | 3/41 [00:04<00:54,  1.44s/it]
Epoch (training) 6:  10%|▉         | 4/41 [00:05<00:46,  1.26s/it]
Epoch (training) 6:  12%|█▏        | 5/41 [00:08<01:07,  1.88s/it]
Epoch (training) 6:  15%|█▍        | 6/41 [00:09<00:54,  1.57s/it]
Epoch (training) 6:  17%|█▋        | 7/41 [00:10<00:49,  1.45s/it]
Epoch (training) 6:  20%|█▉        | 8/41 [00:11<00:40,  1.21s/it]
Epoch (training) 6:  22%|██▏       | 9/41 [00:12<00:36,  1.15s/it]
Epoch (training) 6:  24%|██▍       | 10/41 [00:13<00:38,  1.25s/it]
Epoch (training) 6:  27%|██▋       | 11/41 [00:15<00:37,  1.25s/it]
Epoch (training) 6:  29%|██▉       | 12/41 [00:16<00:35,  1.22s/it]
Epoch (training) 6:  32%|███▏      | 13/41 [00:17<00:35,  1.27s/it]
Epoch (training) 6:  34%|███▍      | 14/41 [00:18<00:30,  1.13s/it]
Epoch (training) 6:  37%|███▋      | 15/41 [00:20<00:35, 

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:34:04. Total running time: 8min 1s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095         5            440.725   2.27055     0.216849 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                 

Epoch (training) 6:  61%|██████    | 25/41 [00:32<00:20,  1.26s/it]
Epoch (training) 6:  63%|██████▎   | 26/41 [00:33<00:18,  1.20s/it]
Epoch (training) 6:  66%|██████▌   | 27/41 [00:34<00:16,  1.14s/it]
Epoch (training) 6:  68%|██████▊   | 28/41 [00:34<00:12,  1.00it/s]
Epoch (training) 6:  71%|███████   | 29/41 [00:36<00:14,  1.18s/it]
Epoch (training) 6:  73%|███████▎  | 30/41 [00:38<00:17,  1.56s/it]
Epoch (training) 6:  76%|███████▌  | 31/41 [00:40<00:15,  1.52s/it]
Epoch (training) 6:  78%|███████▊  | 32/41 [00:41<00:12,  1.41s/it]
Epoch (training) 6:  80%|████████  | 33/41 [00:42<00:10,  1.37s/it]
Epoch (training) 6:  83%|████████▎ | 34/41 [00:43<00:08,  1.20s/it]
Epoch (training) 6:  85%|████████▌ | 35/41 [00:44<00:07,  1.22s/it]
Epoch (training) 6:  88%|████████▊ | 36/41 [00:47<00:08,  1.64s/it]
Epoch (training) 6:  90%|█████████ | 37/41 [00:48<00:05,  1.42s/it]
Epoch (training) 6:  93%|█████████▎| 38/41 [00:50<00:04,  1.53s/it]
Epoch (training) 6:  95%|█████████▌| 39/41 [00:5

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:34:34. Total running time: 8min 31s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095         5            440.725   2.27055     0.216849 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                

Epoch (test) 6:  33%|███▎      | 7/21 [00:09<00:14,  1.06s/it]
Epoch (test) 6:  38%|███▊      | 8/21 [00:11<00:16,  1.27s/it]
Epoch (test) 6:  43%|████▎     | 9/21 [00:13<00:16,  1.35s/it]
Epoch (test) 6:  48%|████▊     | 10/21 [00:15<00:18,  1.72s/it]
Epoch (test) 6:  52%|█████▏    | 11/21 [00:16<00:14,  1.44s/it]
Epoch (test) 6:  57%|█████▋    | 12/21 [00:17<00:11,  1.28s/it]
Epoch (test) 6:  62%|██████▏   | 13/21 [00:18<00:09,  1.19s/it]
Epoch (test) 6:  67%|██████▋   | 14/21 [00:19<00:07,  1.10s/it]
Epoch (test) 6:  71%|███████▏  | 15/21 [00:19<00:05,  1.02it/s]
Epoch (test) 6:  76%|███████▌  | 16/21 [00:20<00:04,  1.15it/s]
Epoch (test) 6:  81%|████████  | 17/21 [00:21<00:03,  1.18it/s]
Epoch (test) 6:  86%|████████▌ | 18/21 [00:22<00:02,  1.14it/s]
Epoch (test) 6:  90%|█████████ | 19/21 [00:23<00:01,  1.01it/s]
Epoch (test) 6: 100%|██████████| 21/21 [00:23<00:00,  1.12s/it]
[36m(RayTrainWorker pid=29490)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:35:04. Total running time: 9min 1s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095         6            520.329   2.48129     0.216849 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                 

Epoch (training) 7:  17%|█▋        | 7/41 [00:12<00:51,  1.50s/it]
Epoch (training) 7:  20%|█▉        | 8/41 [00:13<00:44,  1.35s/it]
Epoch (training) 7:  22%|██▏       | 9/41 [00:14<00:42,  1.34s/it]
Epoch (training) 7:  24%|██▍       | 10/41 [00:16<00:44,  1.43s/it]
Epoch (training) 7:  27%|██▋       | 11/41 [00:17<00:41,  1.37s/it]
Epoch (training) 7:  29%|██▉       | 12/41 [00:18<00:38,  1.33s/it]
Epoch (training) 7:  32%|███▏      | 13/41 [00:19<00:34,  1.25s/it]
Epoch (training) 7:  34%|███▍      | 14/41 [00:20<00:30,  1.14s/it]
Epoch (training) 7:  37%|███▋      | 15/41 [00:21<00:27,  1.07s/it]
Epoch (training) 7:  39%|███▉      | 16/41 [00:22<00:26,  1.05s/it]
Epoch (training) 7:  41%|████▏     | 17/41 [00:23<00:27,  1.14s/it]
Epoch (training) 7:  44%|████▍     | 18/41 [00:24<00:24,  1.09s/it]
Epoch (training) 7:  46%|████▋     | 19/41 [00:25<00:21,  1.03it/s]
Epoch (training) 7:  49%|████▉     | 20/41 [00:26<00:24,  1.15s/it]
Epoch (training) 7:  51%|█████     | 21/41 [00:27<0

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:35:34. Total running time: 9min 31s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095         6            520.329   2.48129     0.216849 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                

Epoch (training) 7:  76%|███████▌  | 31/41 [00:42<00:11,  1.19s/it]
Epoch (training) 7:  78%|███████▊  | 32/41 [00:43<00:10,  1.18s/it]
Epoch (training) 7:  80%|████████  | 33/41 [00:44<00:10,  1.28s/it]
Epoch (training) 7:  83%|████████▎ | 34/41 [00:45<00:08,  1.23s/it]
Epoch (training) 7:  85%|████████▌ | 35/41 [00:48<00:09,  1.56s/it]
Epoch (training) 7:  88%|████████▊ | 36/41 [00:49<00:07,  1.42s/it]
Epoch (training) 7:  90%|█████████ | 37/41 [00:50<00:05,  1.32s/it]
Epoch (training) 7:  93%|█████████▎| 38/41 [00:51<00:03,  1.19s/it]
Epoch (training) 7:  95%|█████████▌| 39/41 [00:52<00:02,  1.28s/it]
Epoch (training) 7:  98%|█████████▊| 40/41 [00:53<00:00,  1.06it/s]
Epoch (training) 7: 100%|██████████| 41/41 [00:53<00:00,  1.30s/it]
Epoch (test) 7:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (test) 7:   5%|▍         | 1/21 [00:02<00:46,  2.32s/it]
Epoch (test) 7:  10%|▉         | 2/21 [00:03<00:35,  1.86s/it]
Epoch (test) 7:  14%|█▍        | 3/21 [00:05<00:32,  1.80s/it]
Epoch (t

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:36:04. Total running time: 10min 1s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095         6            520.329   2.48129     0.216849 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                

Epoch (test) 7:  71%|███████▏  | 15/21 [00:19<00:05,  1.10it/s]
Epoch (test) 7:  76%|███████▌  | 16/21 [00:19<00:04,  1.21it/s]
Epoch (test) 7:  81%|████████  | 17/21 [00:20<00:03,  1.14it/s]
Epoch (test) 7:  86%|████████▌ | 18/21 [00:22<00:03,  1.00s/it]
Epoch (test) 7:  90%|█████████ | 19/21 [00:23<00:02,  1.24s/it]
Epoch (test) 7: 100%|██████████| 21/21 [00:23<00:00,  1.14s/it]
[36m(RayTrainWorker pid=29490)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_1/TorchTrainer_a1400_00000_0_batch_size=32,epochs=20,lr=0.0012_2024-04-14_08-26-02/checkpoint_000006)
Epoch (training) 8:   0%|          | 0/41 [00:00<?, ?it/s]
Epoch (training) 8:   2%|▏         | 1/41 [00:02<01:52,  2.80s/it]
Epoch (training) 8:   5%|▍         | 2/41 [00:04<01:25,  2.18s/it]
Epoch (training) 8:   7%|▋         | 3/41 [00:06<01:17,  2.04s/it]
Epoch (training) 8:  10%|▉         | 4/41 [00:07<01:02,  1.70s/it]
Epoch (training) 8:  12%|█▏    

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:36:34. Total running time: 10min 31s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095         7            601.775   2.02926     0.307332 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                               

Epoch (training) 8:  32%|███▏      | 13/41 [00:21<00:40,  1.45s/it]
Epoch (training) 8:  34%|███▍      | 14/41 [00:23<00:41,  1.52s/it]
Epoch (training) 8:  37%|███▋      | 15/41 [00:24<00:39,  1.51s/it]
Epoch (training) 8:  39%|███▉      | 16/41 [00:26<00:42,  1.71s/it]
Epoch (training) 8:  41%|████▏     | 17/41 [00:28<00:37,  1.57s/it]
Epoch (training) 8:  44%|████▍     | 18/41 [00:28<00:30,  1.34s/it]
Epoch (training) 8:  46%|████▋     | 19/41 [00:29<00:26,  1.21s/it]
Epoch (training) 8:  49%|████▉     | 20/41 [00:30<00:23,  1.13s/it]
Epoch (training) 8:  51%|█████     | 21/41 [00:31<00:21,  1.09s/it]
Epoch (training) 8:  54%|█████▎    | 22/41 [00:32<00:18,  1.03it/s]
Epoch (training) 8:  56%|█████▌    | 23/41 [00:33<00:17,  1.03it/s]
Epoch (training) 8:  59%|█████▊    | 24/41 [00:34<00:17,  1.02s/it]
Epoch (training) 8:  61%|██████    | 25/41 [00:35<00:17,  1.11s/it]
Epoch (training) 8:  63%|██████▎   | 26/41 [00:37<00:17,  1.19s/it]
Epoch (training) 8:  66%|██████▌   | 27/41 [00:3

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:37:04. Total running time: 11min 1s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095         7            601.775   2.02926     0.307332 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                

[36m(RayTrainWorker pid=29490)[0m Epoch (training) 8:  95%|█████████▌| 39/41 [00:52<00:02,  1.22s/it]
Epoch (training) 8:  98%|█████████▊| 40/41 [00:52<00:00,  1.11it/s]
Epoch (training) 8: 100%|██████████| 41/41 [00:52<00:00,  1.28s/it]
Epoch (test) 8:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (test) 8:   5%|▍         | 1/21 [00:03<01:08,  3.41s/it]
Epoch (test) 8:  10%|▉         | 2/21 [00:04<00:43,  2.31s/it]
Epoch (test) 8:  14%|█▍        | 3/21 [00:06<00:35,  1.99s/it]
Epoch (test) 8:  19%|█▉        | 4/21 [00:07<00:24,  1.47s/it]
Epoch (test) 8:  24%|██▍       | 5/21 [00:08<00:21,  1.34s/it]
Epoch (test) 8:  29%|██▊       | 6/21 [00:09<00:17,  1.15s/it]
Epoch (test) 8:  33%|███▎      | 7/21 [00:10<00:14,  1.06s/it]
Epoch (test) 8:  38%|███▊      | 8/21 [00:11<00:16,  1.27s/it]
Epoch (test) 8:  43%|████▎     | 9/21 [00:13<00:15,  1.29s/it]
Epoch (test) 8:  48%|████▊     | 10/21 [00:15<00:18,  1.67s/it]
Epoch (test) 8:  52%|█████▏    | 11/21 [00:16<00:13,  1.39s/it]
Epoch (te

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:37:34. Total running time: 11min 31s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)     loss     accuracy |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095         8            681.954   3.8532     0.273011 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                   

Epoch (training) 9:   2%|▏         | 1/41 [00:03<02:03,  3.08s/it]
Epoch (training) 9:   5%|▍         | 2/41 [00:04<01:28,  2.28s/it]
Epoch (training) 9:   7%|▋         | 3/41 [00:06<01:14,  1.96s/it]
Epoch (training) 9:  10%|▉         | 4/41 [00:07<00:55,  1.50s/it]
Epoch (training) 9:  12%|█▏        | 5/41 [00:07<00:43,  1.21s/it]
Epoch (training) 9:  15%|█▍        | 6/41 [00:08<00:41,  1.17s/it]
Epoch (training) 9:  17%|█▋        | 7/41 [00:10<00:45,  1.35s/it]
Epoch (training) 9:  20%|█▉        | 8/41 [00:12<00:44,  1.34s/it]
Epoch (training) 9:  22%|██▏       | 9/41 [00:13<00:42,  1.33s/it]
Epoch (training) 9:  24%|██▍       | 10/41 [00:14<00:42,  1.38s/it]
Epoch (training) 9:  27%|██▋       | 11/41 [00:16<00:39,  1.33s/it]
Epoch (training) 9:  29%|██▉       | 12/41 [00:18<00:45,  1.58s/it]
Epoch (training) 9:  32%|███▏      | 13/41 [00:20<00:49,  1.77s/it]
Epoch (training) 9:  34%|███▍      | 14/41 [00:21<00:40,  1.48s/it]
Epoch (training) 9:  37%|███▋      | 15/41 [00:22<00:39, 

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:38:04. Total running time: 12min 1s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)     loss     accuracy |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095         8            681.954   3.8532     0.273011 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                    

Epoch (training) 9:  56%|█████▌    | 23/41 [00:31<00:22,  1.23s/it]
Epoch (training) 9:  59%|█████▊    | 24/41 [00:33<00:24,  1.44s/it]
Epoch (training) 9:  61%|██████    | 25/41 [00:34<00:20,  1.27s/it]
Epoch (training) 9:  63%|██████▎   | 26/41 [00:35<00:18,  1.25s/it]
Epoch (training) 9:  66%|██████▌   | 27/41 [00:36<00:16,  1.19s/it]
Epoch (training) 9:  68%|██████▊   | 28/41 [00:37<00:14,  1.08s/it]
Epoch (training) 9:  71%|███████   | 29/41 [00:38<00:12,  1.02s/it]
Epoch (training) 9:  73%|███████▎  | 30/41 [00:39<00:12,  1.12s/it]
Epoch (training) 9:  76%|███████▌  | 31/41 [00:41<00:14,  1.49s/it]
Epoch (training) 9:  78%|███████▊  | 32/41 [00:42<00:11,  1.33s/it]
Epoch (training) 9:  80%|████████  | 33/41 [00:44<00:11,  1.43s/it]
Epoch (training) 9:  83%|████████▎ | 34/41 [00:45<00:10,  1.47s/it]
Epoch (training) 9:  85%|████████▌ | 35/41 [00:48<00:10,  1.82s/it]
Epoch (training) 9:  88%|████████▊ | 36/41 [00:50<00:08,  1.75s/it]
Epoch (training) 9:  90%|█████████ | 37/41 [00:5

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:38:34. Total running time: 12min 31s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)     loss     accuracy |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095         8            681.954   3.8532     0.273011 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                   

Epoch (test) 9:  24%|██▍       | 5/21 [00:07<00:22,  1.38s/it]
Epoch (test) 9:  29%|██▊       | 6/21 [00:08<00:19,  1.28s/it]
Epoch (test) 9:  33%|███▎      | 7/21 [00:09<00:17,  1.27s/it]
Epoch (test) 9:  38%|███▊      | 8/21 [00:11<00:19,  1.51s/it]
Epoch (test) 9:  43%|████▎     | 9/21 [00:13<00:17,  1.48s/it]
Epoch (test) 9:  48%|████▊     | 10/21 [00:15<00:17,  1.58s/it]
Epoch (test) 9:  52%|█████▏    | 11/21 [00:15<00:12,  1.28s/it]
Epoch (test) 9:  57%|█████▋    | 12/21 [00:16<00:09,  1.08s/it]
Epoch (test) 9:  62%|██████▏   | 13/21 [00:17<00:08,  1.06s/it]
Epoch (test) 9:  67%|██████▋   | 14/21 [00:18<00:07,  1.02s/it]
Epoch (test) 9:  71%|███████▏  | 15/21 [00:19<00:05,  1.09it/s]
Epoch (test) 9:  76%|███████▌  | 16/21 [00:19<00:04,  1.21it/s]
Epoch (test) 9:  81%|████████  | 17/21 [00:20<00:03,  1.21it/s]
Epoch (test) 9:  86%|████████▌ | 18/21 [00:21<00:02,  1.12it/s]
Epoch (test) 9:  90%|█████████ | 19/21 [00:23<00:02,  1.18s/it]
Epoch (test) 9: 100%|██████████| 21/21 [00:23

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:39:04. Total running time: 13min 1s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095         9            763.404   2.11715     0.293292 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                

Epoch (training) 10:  17%|█▋        | 7/41 [00:09<00:45,  1.33s/it]
Epoch (training) 10:  20%|█▉        | 8/41 [00:11<00:47,  1.43s/it]
Epoch (training) 10:  22%|██▏       | 9/41 [00:13<00:48,  1.52s/it]
Epoch (training) 10:  24%|██▍       | 10/41 [00:14<00:43,  1.40s/it]
Epoch (training) 10:  27%|██▋       | 11/41 [00:15<00:44,  1.47s/it]
Epoch (training) 10:  29%|██▉       | 12/41 [00:16<00:35,  1.21s/it]
Epoch (training) 10:  32%|███▏      | 13/41 [00:17<00:31,  1.14s/it]
Epoch (training) 10:  34%|███▍      | 14/41 [00:18<00:30,  1.14s/it]
Epoch (training) 10:  37%|███▋      | 15/41 [00:20<00:32,  1.26s/it]
Epoch (training) 10:  39%|███▉      | 16/41 [00:21<00:31,  1.25s/it]
Epoch (training) 10:  41%|████▏     | 17/41 [00:22<00:32,  1.35s/it]
Epoch (training) 10:  44%|████▍     | 18/41 [00:24<00:31,  1.36s/it]
Epoch (training) 10:  46%|████▋     | 19/41 [00:26<00:37,  1.73s/it]
Epoch (training) 10:  49%|████▉     | 20/41 [00:28<00:37,  1.80s/it]
Epoch (training) 10:  51%|█████     |

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:39:34. Total running time: 13min 32s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095         9            763.404   2.11715     0.293292 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                               

Epoch (training) 10:  71%|███████   | 29/41 [00:39<00:17,  1.49s/it]
Epoch (training) 10:  73%|███████▎  | 30/41 [00:41<00:18,  1.64s/it]
Epoch (training) 10:  76%|███████▌  | 31/41 [00:42<00:15,  1.52s/it]
Epoch (training) 10:  78%|███████▊  | 32/41 [00:43<00:12,  1.33s/it]
Epoch (training) 10:  80%|████████  | 33/41 [00:45<00:12,  1.62s/it]
Epoch (training) 10:  83%|████████▎ | 34/41 [00:46<00:10,  1.44s/it]
Epoch (training) 10:  85%|████████▌ | 35/41 [00:47<00:07,  1.24s/it]
Epoch (training) 10:  88%|████████▊ | 36/41 [00:48<00:05,  1.10s/it]
Epoch (training) 10:  90%|█████████ | 37/41 [00:48<00:04,  1.03s/it]
Epoch (training) 10:  93%|█████████▎| 38/41 [00:49<00:03,  1.01s/it]
Epoch (training) 10:  95%|█████████▌| 39/41 [00:51<00:02,  1.11s/it]
Epoch (training) 10:  98%|█████████▊| 40/41 [00:51<00:00,  1.17it/s]
Epoch (training) 10: 100%|██████████| 41/41 [00:51<00:00,  1.26s/it]
Epoch (test) 10:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (test) 10:   5%|▍         | 1/21 [00:03<0

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:40:04. Total running time: 14min 2s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095         9            763.404   2.11715     0.293292 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                

Epoch (test) 10:  62%|██████▏   | 13/21 [00:18<00:10,  1.27s/it]
Epoch (test) 10:  67%|██████▋   | 14/21 [00:19<00:08,  1.20s/it]
Epoch (test) 10:  71%|███████▏  | 15/21 [00:20<00:06,  1.04s/it]
Epoch (test) 10:  76%|███████▌  | 16/21 [00:21<00:04,  1.08it/s]
Epoch (test) 10:  81%|████████  | 17/21 [00:21<00:03,  1.12it/s]
Epoch (test) 10:  86%|████████▌ | 18/21 [00:22<00:02,  1.11it/s]
Epoch (test) 10:  90%|█████████ | 19/21 [00:24<00:02,  1.00s/it]
Epoch (test) 10: 100%|██████████| 21/21 [00:24<00:00,  1.15s/it]
[36m(RayTrainWorker pid=29490)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_1/TorchTrainer_a1400_00000_0_batch_size=32,epochs=20,lr=0.0012_2024-04-14_08-26-02/checkpoint_000009)
Epoch (training) 11:   0%|          | 0/41 [00:00<?, ?it/s]
Epoch (training) 11:   2%|▏         | 1/41 [00:02<01:38,  2.47s/it]
Epoch (training) 11:   5%|▍         | 2/41 [00:04<01:27,  2.24s/it]
Epoch (training) 11:   7%|

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:40:34. Total running time: 14min 32s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        10            843.318   2.35273     0.274571 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                               

Epoch (training) 11:  27%|██▋       | 11/41 [00:19<00:59,  1.99s/it]
Epoch (training) 11:  29%|██▉       | 12/41 [00:21<00:56,  1.96s/it]
Epoch (training) 11:  32%|███▏      | 13/41 [00:22<00:46,  1.68s/it]
Epoch (training) 11:  34%|███▍      | 14/41 [00:23<00:39,  1.47s/it]
Epoch (training) 11:  37%|███▋      | 15/41 [00:24<00:35,  1.38s/it]
Epoch (training) 11:  39%|███▉      | 16/41 [00:25<00:30,  1.21s/it]
Epoch (training) 11:  41%|████▏     | 17/41 [00:26<00:26,  1.11s/it]
Epoch (training) 11:  44%|████▍     | 18/41 [00:27<00:27,  1.19s/it]
Epoch (training) 11:  46%|████▋     | 19/41 [00:28<00:25,  1.14s/it]
Epoch (training) 11:  49%|████▉     | 20/41 [00:29<00:23,  1.13s/it]
Epoch (training) 11:  51%|█████     | 21/41 [00:31<00:22,  1.14s/it]
Epoch (training) 11:  54%|█████▎    | 22/41 [00:32<00:23,  1.22s/it]
Epoch (training) 11:  56%|█████▌    | 23/41 [00:34<00:26,  1.47s/it]
Epoch (training) 11:  59%|█████▊    | 24/41 [00:35<00:24,  1.41s/it]
Epoch (training) 11:  61%|██████  

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:41:04. Total running time: 15min 2s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        10            843.318   2.35273     0.274571 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                

Epoch (training) 11:  90%|█████████ | 37/41 [00:49<00:05,  1.33s/it]
Epoch (training) 11:  93%|█████████▎| 38/41 [00:50<00:04,  1.41s/it]
Epoch (training) 11:  95%|█████████▌| 39/41 [00:52<00:03,  1.50s/it]
Epoch (training) 11:  98%|█████████▊| 40/41 [00:52<00:01,  1.11s/it]
Epoch (training) 11: 100%|██████████| 41/41 [00:53<00:00,  1.29s/it]
Epoch (test) 11:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (test) 11:   5%|▍         | 1/21 [00:02<00:45,  2.29s/it]
Epoch (test) 11:  10%|▉         | 2/21 [00:03<00:34,  1.83s/it]
Epoch (test) 11:  14%|█▍        | 3/21 [00:05<00:31,  1.74s/it]
Epoch (test) 11:  19%|█▉        | 4/21 [00:06<00:22,  1.33s/it]
Epoch (test) 11:  24%|██▍       | 5/21 [00:07<00:20,  1.30s/it]
Epoch (test) 11:  29%|██▊       | 6/21 [00:08<00:18,  1.24s/it]
Epoch (test) 11:  33%|███▎      | 7/21 [00:09<00:17,  1.23s/it]
Epoch (test) 11:  38%|███▊      | 8/21 [00:12<00:20,  1.59s/it]
Epoch (test) 11:  43%|████▎     | 9/21 [00:13<00:18,  1.52s/it]
Epoch (test) 11:  48%|█

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:41:34. Total running time: 15min 32s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        10            843.318   2.35273     0.274571 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                               

[36m(RayTrainWorker pid=29490)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_1/TorchTrainer_a1400_00000_0_batch_size=32,epochs=20,lr=0.0012_2024-04-14_08-26-02/checkpoint_000010)
Epoch (training) 12:   0%|          | 0/41 [00:00<?, ?it/s]
Epoch (training) 12:   2%|▏         | 1/41 [00:02<01:38,  2.47s/it]
Epoch (training) 12:   5%|▍         | 2/41 [00:03<01:06,  1.71s/it]
Epoch (training) 12:   7%|▋         | 3/41 [00:04<00:57,  1.52s/it]
Epoch (training) 12:  10%|▉         | 4/41 [00:06<00:55,  1.50s/it]
Epoch (training) 12:  12%|█▏        | 5/41 [00:08<00:55,  1.54s/it]
Epoch (training) 12:  15%|█▍        | 6/41 [00:10<01:00,  1.73s/it]
Epoch (training) 12:  17%|█▋        | 7/41 [00:11<00:55,  1.63s/it]
Epoch (training) 12:  20%|█▉        | 8/41 [00:12<00:45,  1.39s/it]
Epoch (training) 12:  22%|██▏       | 9/41 [00:13<00:40,  1.28s/it]
Epoch (training) 12:  24%|██▍       | 10/41 [00:14<00:41,  1.35s/it]
E

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:42:04. Total running time: 16min 2s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        11            927.831   11.2517     0.162246 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                

Epoch (training) 12:  44%|████▍     | 18/41 [00:25<00:29,  1.29s/it]
Epoch (training) 12:  46%|████▋     | 19/41 [00:26<00:26,  1.22s/it]
Epoch (training) 12:  49%|████▉     | 20/41 [00:28<00:28,  1.38s/it]
Epoch (training) 12:  51%|█████     | 21/41 [00:30<00:31,  1.56s/it]
Epoch (training) 12:  54%|█████▎    | 22/41 [00:31<00:26,  1.39s/it]
Epoch (training) 12:  56%|█████▌    | 23/41 [00:32<00:21,  1.20s/it]
Epoch (training) 12:  59%|█████▊    | 24/41 [00:33<00:21,  1.25s/it]
Epoch (training) 12:  61%|██████    | 25/41 [00:35<00:21,  1.36s/it]
Epoch (training) 12:  63%|██████▎   | 26/41 [00:36<00:18,  1.26s/it]
Epoch (training) 12:  66%|██████▌   | 27/41 [00:37<00:17,  1.27s/it]
Epoch (training) 12:  68%|██████▊   | 28/41 [00:38<00:16,  1.28s/it]
Epoch (training) 12:  71%|███████   | 29/41 [00:39<00:14,  1.18s/it]
Epoch (training) 12:  73%|███████▎  | 30/41 [00:40<00:11,  1.03s/it]
Epoch (training) 12:  76%|███████▌  | 31/41 [00:41<00:11,  1.12s/it]
Epoch (training) 12:  78%|███████▊

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:42:34. Total running time: 16min 32s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        11            927.831   11.2517     0.162246 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                               

Epoch (test) 12:   5%|▍         | 1/21 [00:02<00:46,  2.31s/it]
Epoch (test) 12:  10%|▉         | 2/21 [00:03<00:34,  1.83s/it]
Epoch (test) 12:  14%|█▍        | 3/21 [00:05<00:31,  1.75s/it]
Epoch (test) 12:  19%|█▉        | 4/21 [00:06<00:22,  1.31s/it]
Epoch (test) 12:  24%|██▍       | 5/21 [00:07<00:19,  1.25s/it]
Epoch (test) 12:  29%|██▊       | 6/21 [00:07<00:16,  1.08s/it]
Epoch (test) 12:  33%|███▎      | 7/21 [00:08<00:14,  1.03s/it]
Epoch (test) 12:  38%|███▊      | 8/21 [00:10<00:17,  1.34s/it]
Epoch (test) 12:  43%|████▎     | 9/21 [00:12<00:18,  1.53s/it]
Epoch (test) 12:  48%|████▊     | 10/21 [00:15<00:19,  1.80s/it]
Epoch (test) 12:  52%|█████▏    | 11/21 [00:15<00:14,  1.43s/it]
Epoch (test) 12:  57%|█████▋    | 12/21 [00:16<00:10,  1.20s/it]
Epoch (test) 12:  62%|██████▏   | 13/21 [00:17<00:09,  1.14s/it]
Epoch (test) 12:  67%|██████▋   | 14/21 [00:18<00:07,  1.07s/it]
Epoch (test) 12:  71%|███████▏  | 15/21 [00:19<00:05,  1.05it/s]
Epoch (test) 12:  76%|███████▌  | 

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:43:04. Total running time: 17min 2s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        12            1008.05   2.84665     0.224649 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                

Epoch (training) 13:   5%|▍         | 2/41 [00:05<01:31,  2.35s/it]
Epoch (training) 13:   7%|▋         | 3/41 [00:05<01:02,  1.65s/it]
Epoch (training) 13:  10%|▉         | 4/41 [00:07<01:08,  1.85s/it]
Epoch (training) 13:  12%|█▏        | 5/41 [00:08<00:51,  1.42s/it]
Epoch (training) 13:  15%|█▍        | 6/41 [00:09<00:40,  1.15s/it]
Epoch (training) 13:  17%|█▋        | 7/41 [00:11<00:47,  1.39s/it]
Epoch (training) 13:  20%|█▉        | 8/41 [00:11<00:39,  1.21s/it]
Epoch (training) 13:  22%|██▏       | 9/41 [00:13<00:41,  1.30s/it]
Epoch (training) 13:  24%|██▍       | 10/41 [00:14<00:37,  1.22s/it]
Epoch (training) 13:  27%|██▋       | 11/41 [00:15<00:37,  1.24s/it]
Epoch (training) 13:  29%|██▉       | 12/41 [00:17<00:37,  1.30s/it]
Epoch (training) 13:  32%|███▏      | 13/41 [00:18<00:36,  1.30s/it]
Epoch (training) 13:  34%|███▍      | 14/41 [00:19<00:34,  1.28s/it]
Epoch (training) 13:  37%|███▋      | 15/41 [00:20<00:28,  1.10s/it]
Epoch (training) 13:  39%|███▉      | 16/4

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:43:34. Total running time: 17min 32s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        12            1008.05   2.84665     0.224649 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                               

Epoch (training) 13:  63%|██████▎   | 26/41 [00:34<00:18,  1.24s/it]
Epoch (training) 13:  66%|██████▌   | 27/41 [00:36<00:20,  1.49s/it]
Epoch (training) 13:  68%|██████▊   | 28/41 [00:38<00:18,  1.44s/it]
Epoch (training) 13:  71%|███████   | 29/41 [00:39<00:14,  1.22s/it]
Epoch (training) 13:  73%|███████▎  | 30/41 [00:40<00:13,  1.19s/it]
Epoch (training) 13:  76%|███████▌  | 31/41 [00:42<00:14,  1.42s/it]
Epoch (training) 13:  78%|███████▊  | 32/41 [00:43<00:13,  1.45s/it]
Epoch (training) 13:  80%|████████  | 33/41 [00:45<00:12,  1.59s/it]
Epoch (training) 13:  83%|████████▎ | 34/41 [00:46<00:09,  1.42s/it]
Epoch (training) 13:  85%|████████▌ | 35/41 [00:47<00:07,  1.25s/it]
Epoch (training) 13:  88%|████████▊ | 36/41 [00:49<00:06,  1.36s/it]
Epoch (training) 13:  90%|█████████ | 37/41 [00:50<00:04,  1.25s/it]
Epoch (training) 13:  93%|█████████▎| 38/41 [00:50<00:03,  1.06s/it]
Epoch (training) 13:  95%|█████████▌| 39/41 [00:51<00:01,  1.00it/s]
Epoch (training) 13:  98%|████████

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:44:05. Total running time: 18min 2s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        12            1008.05   2.84665     0.224649 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                

Epoch (test) 13:  43%|████▎     | 9/21 [00:13<00:16,  1.36s/it]
Epoch (test) 13:  48%|████▊     | 10/21 [00:15<00:16,  1.52s/it]
Epoch (test) 13:  52%|█████▏    | 11/21 [00:15<00:12,  1.22s/it]
Epoch (test) 13:  57%|█████▋    | 12/21 [00:16<00:09,  1.06s/it]
Epoch (test) 13:  62%|██████▏   | 13/21 [00:17<00:08,  1.05s/it]
Epoch (test) 13:  67%|██████▋   | 14/21 [00:18<00:07,  1.02s/it]
Epoch (test) 13:  71%|███████▏  | 15/21 [00:19<00:05,  1.10it/s]
Epoch (test) 13:  76%|███████▌  | 16/21 [00:19<00:04,  1.15it/s]
Epoch (test) 13:  81%|████████  | 17/21 [00:20<00:03,  1.06it/s]
Epoch (test) 13:  86%|████████▌ | 18/21 [00:22<00:03,  1.04s/it]
Epoch (test) 13:  90%|█████████ | 19/21 [00:24<00:02,  1.26s/it]
Epoch (test) 13: 100%|██████████| 21/21 [00:24<00:00,  1.15s/it]
[36m(RayTrainWorker pid=29490)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_1/TorchTrainer_a1400_00000_0_batch_size=32,epochs=20,lr=0.0012_20

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:44:35. Total running time: 18min 32s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        13            1089.34   2.12365     0.273011 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                               

Epoch (training) 14:  17%|█▋        | 7/41 [00:13<00:59,  1.75s/it]
Epoch (training) 14:  20%|█▉        | 8/41 [00:14<00:49,  1.49s/it]
Epoch (training) 14:  22%|██▏       | 9/41 [00:15<00:49,  1.55s/it]
Epoch (training) 14:  24%|██▍       | 10/41 [00:16<00:42,  1.37s/it]
Epoch (training) 14:  27%|██▋       | 11/41 [00:18<00:39,  1.31s/it]
Epoch (training) 14:  29%|██▉       | 12/41 [00:19<00:34,  1.20s/it]
Epoch (training) 14:  32%|███▏      | 13/41 [00:21<00:41,  1.48s/it]
Epoch (training) 14:  34%|███▍      | 14/41 [00:22<00:35,  1.33s/it]
Epoch (training) 14:  37%|███▋      | 15/41 [00:23<00:35,  1.38s/it]
Epoch (training) 14:  39%|███▉      | 16/41 [00:25<00:37,  1.49s/it]
Epoch (training) 14:  41%|████▏     | 17/41 [00:26<00:36,  1.51s/it]
Epoch (training) 14:  44%|████▍     | 18/41 [00:27<00:30,  1.31s/it]
Epoch (training) 14:  46%|████▋     | 19/41 [00:28<00:27,  1.26s/it]
Epoch (training) 14:  49%|████▉     | 20/41 [00:29<00:23,  1.12s/it]
Epoch (training) 14:  51%|█████     |

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:45:05. Total running time: 19min 2s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        13            1089.34   2.12365     0.273011 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                

Epoch (training) 14:  80%|████████  | 33/41 [00:44<00:08,  1.05s/it]
Epoch (training) 14:  83%|████████▎ | 34/41 [00:45<00:08,  1.16s/it]
Epoch (training) 14:  85%|████████▌ | 35/41 [00:47<00:07,  1.21s/it]
Epoch (training) 14:  88%|████████▊ | 36/41 [00:47<00:05,  1.08s/it]
Epoch (training) 14:  90%|█████████ | 37/41 [00:49<00:04,  1.08s/it]
Epoch (training) 14:  93%|█████████▎| 38/41 [00:50<00:03,  1.16s/it]
Epoch (training) 14:  95%|█████████▌| 39/41 [00:52<00:02,  1.38s/it]
Epoch (training) 14:  98%|█████████▊| 40/41 [00:52<00:01,  1.02s/it]
Epoch (training) 14: 100%|██████████| 41/41 [00:52<00:00,  1.28s/it]
Epoch (test) 14:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (test) 14:   5%|▍         | 1/21 [00:02<00:50,  2.55s/it]
Epoch (test) 14:  10%|▉         | 2/21 [00:04<00:36,  1.92s/it]
Epoch (test) 14:  14%|█▍        | 3/21 [00:05<00:32,  1.80s/it]
Epoch (test) 14:  19%|█▉        | 4/21 [00:06<00:22,  1.35s/it]
Epoch (test) 14:  24%|██▍       | 5/21 [00:07<00:20,  1.28s/it]
Epo

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:45:35. Total running time: 19min 32s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        13            1089.34   2.12365     0.273011 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                               

Epoch (test) 14:  81%|████████  | 17/21 [00:20<00:03,  1.18it/s]
Epoch (test) 14:  86%|████████▌ | 18/21 [00:21<00:02,  1.14it/s]
Epoch (test) 14:  90%|█████████ | 19/21 [00:23<00:01,  1.01it/s]
Epoch (test) 14: 100%|██████████| 21/21 [00:23<00:00,  1.10s/it]
[36m(RayTrainWorker pid=29490)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_1/TorchTrainer_a1400_00000_0_batch_size=32,epochs=20,lr=0.0012_2024-04-14_08-26-02/checkpoint_000013)
Epoch (training) 15:   0%|          | 0/41 [00:00<?, ?it/s]
Epoch (training) 15:   2%|▏         | 1/41 [00:01<01:05,  1.63s/it]
Epoch (training) 15:   5%|▍         | 2/41 [00:03<01:01,  1.57s/it]
Epoch (training) 15:   7%|▋         | 3/41 [00:04<00:50,  1.33s/it]
Epoch (training) 15:  10%|▉         | 4/41 [00:05<00:46,  1.26s/it]
Epoch (training) 15:  12%|█▏        | 5/41 [00:06<00:39,  1.09s/it]
Epoch (training) 15:  15%|█▍        | 6/41 [00:07<00:40,  1.16s/it]
Epoch (trainin

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:46:05. Total running time: 20min 2s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)     loss     accuracy |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        14            1172.51     2.11      0.26053 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                    

Epoch (training) 15:  37%|███▋      | 15/41 [00:21<00:33,  1.30s/it]
Epoch (training) 15:  39%|███▉      | 16/41 [00:22<00:33,  1.33s/it]
Epoch (training) 15:  41%|████▏     | 17/41 [00:24<00:35,  1.47s/it]
Epoch (training) 15:  44%|████▍     | 18/41 [00:25<00:33,  1.46s/it]
Epoch (training) 15:  46%|████▋     | 19/41 [00:27<00:33,  1.51s/it]
Epoch (training) 15:  49%|████▉     | 20/41 [00:28<00:29,  1.41s/it]
Epoch (training) 15:  51%|█████     | 21/41 [00:29<00:23,  1.19s/it]
Epoch (training) 15:  54%|█████▎    | 22/41 [00:30<00:20,  1.08s/it]
Epoch (training) 15:  56%|█████▌    | 23/41 [00:32<00:27,  1.52s/it]
Epoch (training) 15:  59%|█████▊    | 24/41 [00:34<00:25,  1.49s/it]
Epoch (training) 15:  61%|██████    | 25/41 [00:35<00:21,  1.36s/it]
Epoch (training) 15:  63%|██████▎   | 26/41 [00:36<00:18,  1.24s/it]
Epoch (training) 15:  66%|██████▌   | 27/41 [00:37<00:17,  1.22s/it]
Epoch (training) 15:  68%|██████▊   | 28/41 [00:38<00:17,  1.31s/it]
Epoch (training) 15:  71%|███████ 

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:46:35. Total running time: 20min 32s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)     loss     accuracy |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        14            1172.51     2.11      0.26053 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                   

Epoch (training) 15:  93%|█████████▎| 38/41 [00:50<00:03,  1.09s/it]
Epoch (training) 15:  95%|█████████▌| 39/41 [00:52<00:02,  1.15s/it]
Epoch (training) 15:  98%|█████████▊| 40/41 [00:52<00:00,  1.17it/s]
Epoch (training) 15: 100%|██████████| 41/41 [00:52<00:00,  1.28s/it]
Epoch (test) 15:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (test) 15:   5%|▍         | 1/21 [00:03<01:07,  3.40s/it]
Epoch (test) 15:  10%|▉         | 2/21 [00:05<00:50,  2.66s/it]
Epoch (test) 15:  14%|█▍        | 3/21 [00:07<00:39,  2.20s/it]
Epoch (test) 15:  19%|█▉        | 4/21 [00:07<00:27,  1.60s/it]
Epoch (test) 15:  24%|██▍       | 5/21 [00:08<00:22,  1.42s/it]
Epoch (test) 15:  29%|██▊       | 6/21 [00:09<00:18,  1.20s/it]
Epoch (test) 15:  33%|███▎      | 7/21 [00:10<00:15,  1.10s/it]
Epoch (test) 15:  38%|███▊      | 8/21 [00:12<00:16,  1.30s/it]
Epoch (test) 15:  43%|████▎     | 9/21 [00:13<00:15,  1.32s/it]
Epoch (test) 15:  48%|████▊     | 10/21 [00:15<00:16,  1.51s/it]
Epoch (test) 15:  52%|█████

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:47:05. Total running time: 21min 2s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)     loss     accuracy |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        14            1172.51     2.11      0.26053 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                    

[36m(RayTrainWorker pid=29490)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_1/TorchTrainer_a1400_00000_0_batch_size=32,epochs=20,lr=0.0012_2024-04-14_08-26-02/checkpoint_000014)
Epoch (training) 16:   0%|          | 0/41 [00:00<?, ?it/s]
Epoch (training) 16:   2%|▏         | 1/41 [00:03<02:25,  3.63s/it]
Epoch (training) 16:   5%|▍         | 2/41 [00:04<01:23,  2.13s/it]
Epoch (training) 16:   7%|▋         | 3/41 [00:06<01:23,  2.19s/it]
Epoch (training) 16:  10%|▉         | 4/41 [00:08<01:10,  1.90s/it]
Epoch (training) 16:  12%|█▏        | 5/41 [00:09<01:01,  1.70s/it]
Epoch (training) 16:  15%|█▍        | 6/41 [00:10<00:49,  1.42s/it]
Epoch (training) 16:  17%|█▋        | 7/41 [00:11<00:42,  1.26s/it]
Epoch (training) 16:  20%|█▉        | 8/41 [00:13<00:43,  1.31s/it]
Epoch (training) 16:  22%|██▏       | 9/41 [00:14<00:44,  1.39s/it]
Epoch (training) 16:  24%|██▍       | 10/41 [00:16<00:45,  1.47s/it]
E

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:47:35. Total running time: 21min 32s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        15            1256.36   2.10914      0.26209 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                               

Epoch (training) 16:  46%|████▋     | 19/41 [00:27<00:29,  1.33s/it]
Epoch (training) 16:  49%|████▉     | 20/41 [00:28<00:27,  1.30s/it]
Epoch (training) 16:  51%|█████     | 21/41 [00:31<00:31,  1.58s/it]
Epoch (training) 16:  54%|█████▎    | 22/41 [00:32<00:32,  1.70s/it]
Epoch (training) 16:  56%|█████▌    | 23/41 [00:34<00:26,  1.49s/it]
Epoch (training) 16:  59%|█████▊    | 24/41 [00:35<00:23,  1.39s/it]
Epoch (training) 16:  61%|██████    | 25/41 [00:36<00:20,  1.26s/it]
Epoch (training) 16:  63%|██████▎   | 26/41 [00:37<00:18,  1.26s/it]
Epoch (training) 16:  66%|██████▌   | 27/41 [00:38<00:15,  1.11s/it]
Epoch (training) 16:  68%|██████▊   | 28/41 [00:38<00:13,  1.02s/it]
Epoch (training) 16:  71%|███████   | 29/41 [00:39<00:11,  1.06it/s]
Epoch (training) 16:  73%|███████▎  | 30/41 [00:40<00:09,  1.15it/s]
Epoch (training) 16:  76%|███████▌  | 31/41 [00:41<00:09,  1.01it/s]
Epoch (training) 16:  78%|███████▊  | 32/41 [00:43<00:11,  1.30s/it]
Epoch (training) 16:  80%|████████

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:48:05. Total running time: 22min 2s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        15            1256.36   2.10914      0.26209 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                

Epoch (test) 16:  10%|▉         | 2/21 [00:03<00:35,  1.88s/it]
Epoch (test) 16:  14%|█▍        | 3/21 [00:06<00:35,  1.98s/it]
Epoch (test) 16:  19%|█▉        | 4/21 [00:06<00:26,  1.57s/it]
Epoch (test) 16:  24%|██▍       | 5/21 [00:08<00:25,  1.57s/it]
Epoch (test) 16:  29%|██▊       | 6/21 [00:09<00:20,  1.39s/it]
Epoch (test) 16:  33%|███▎      | 7/21 [00:10<00:17,  1.22s/it]
Epoch (test) 16:  38%|███▊      | 8/21 [00:12<00:18,  1.39s/it]
Epoch (test) 16:  43%|████▎     | 9/21 [00:13<00:16,  1.38s/it]
Epoch (test) 16:  48%|████▊     | 10/21 [00:15<00:16,  1.53s/it]
Epoch (test) 16:  52%|█████▏    | 11/21 [00:15<00:12,  1.23s/it]
Epoch (test) 16:  57%|█████▋    | 12/21 [00:16<00:09,  1.06s/it]
Epoch (test) 16:  62%|██████▏   | 13/21 [00:17<00:08,  1.04s/it]
Epoch (test) 16:  67%|██████▋   | 14/21 [00:18<00:07,  1.01s/it]
Epoch (test) 16:  71%|███████▏  | 15/21 [00:19<00:05,  1.09it/s]
Epoch (test) 16:  76%|███████▌  | 16/21 [00:20<00:04,  1.12it/s]
Epoch (test) 16:  81%|████████  |

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:48:35. Total running time: 22min 32s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        15            1256.36   2.10914      0.26209 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                               

[36m(RayTrainWorker pid=29490)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_1/TorchTrainer_a1400_00000_0_batch_size=32,epochs=20,lr=0.0012_2024-04-14_08-26-02/checkpoint_000015)
Epoch (training) 17:   0%|          | 0/41 [00:00<?, ?it/s]
Epoch (training) 17:   2%|▏         | 1/41 [00:02<01:55,  2.89s/it]
Epoch (training) 17:   5%|▍         | 2/41 [00:04<01:15,  1.94s/it]
Epoch (training) 17:   7%|▋         | 3/41 [00:05<01:08,  1.80s/it]
Epoch (training) 17:  10%|▉         | 4/41 [00:08<01:13,  2.00s/it]
Epoch (training) 17:  12%|█▏        | 5/41 [00:09<01:08,  1.91s/it]
Epoch (training) 17:  15%|█▍        | 6/41 [00:11<00:59,  1.71s/it]
Epoch (training) 17:  17%|█▋        | 7/41 [00:12<00:48,  1.43s/it]
Epoch (training) 17:  20%|█▉        | 8/41 [00:13<00:48,  1.47s/it]
Epoch (training) 17:  22%|██▏       | 9/41 [00:14<00:43,  1.35s/it]
Epoch (training) 17:  24%|██▍       | 10/41 [00:16<00:42,  1.37s/it]
E

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:49:05. Total running time: 23min 3s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        16            1343.56   2.04043     0.293292 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                

Epoch (training) 17:  54%|█████▎    | 22/41 [00:29<00:21,  1.14s/it]
Epoch (training) 17:  56%|█████▌    | 23/41 [00:31<00:21,  1.22s/it]
Epoch (training) 17:  59%|█████▊    | 24/41 [00:32<00:20,  1.18s/it]
Epoch (training) 17:  61%|██████    | 25/41 [00:34<00:21,  1.31s/it]
Epoch (training) 17:  63%|██████▎   | 26/41 [00:35<00:17,  1.20s/it]
Epoch (training) 17:  66%|██████▌   | 27/41 [00:36<00:16,  1.15s/it]
Epoch (training) 17:  68%|██████▊   | 28/41 [00:37<00:16,  1.23s/it]
Epoch (training) 17:  71%|███████   | 29/41 [00:38<00:13,  1.16s/it]
Epoch (training) 17:  73%|███████▎  | 30/41 [00:39<00:12,  1.18s/it]
Epoch (training) 17:  76%|███████▌  | 31/41 [00:41<00:12,  1.27s/it]
Epoch (training) 17:  78%|███████▊  | 32/41 [00:42<00:12,  1.43s/it]
Epoch (training) 17:  80%|████████  | 33/41 [00:43<00:10,  1.25s/it]
Epoch (training) 17:  83%|████████▎ | 34/41 [00:45<00:08,  1.26s/it]
Epoch (training) 17:  85%|████████▌ | 35/41 [00:46<00:08,  1.35s/it]
Epoch (training) 17:  88%|████████

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:49:35. Total running time: 23min 33s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        16            1343.56   2.04043     0.293292 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                               

Epoch (test) 17:  24%|██▍       | 5/21 [00:07<00:23,  1.47s/it]
Epoch (test) 17:  29%|██▊       | 6/21 [00:09<00:20,  1.34s/it]
Epoch (test) 17:  33%|███▎      | 7/21 [00:10<00:18,  1.32s/it]
Epoch (test) 17:  38%|███▊      | 8/21 [00:12<00:19,  1.49s/it]
Epoch (test) 17:  43%|████▎     | 9/21 [00:13<00:17,  1.46s/it]
Epoch (test) 17:  48%|████▊     | 10/21 [00:15<00:17,  1.58s/it]
Epoch (test) 17:  52%|█████▏    | 11/21 [00:16<00:12,  1.28s/it]
Epoch (test) 17:  57%|█████▋    | 12/21 [00:16<00:09,  1.09s/it]
Epoch (test) 17:  62%|██████▏   | 13/21 [00:17<00:08,  1.07s/it]
Epoch (test) 17:  67%|██████▋   | 14/21 [00:18<00:07,  1.02s/it]
Epoch (test) 17:  71%|███████▏  | 15/21 [00:19<00:05,  1.07it/s]
Epoch (test) 17:  76%|███████▌  | 16/21 [00:19<00:04,  1.19it/s]
Epoch (test) 17:  81%|████████  | 17/21 [00:20<00:03,  1.19it/s]
Epoch (test) 17:  86%|████████▌ | 18/21 [00:22<00:02,  1.02it/s]
Epoch (test) 17:  90%|█████████ | 19/21 [00:23<00:02,  1.21s/it]
Epoch (test) 17: 100%|████████

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:50:05. Total running time: 24min 3s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        17            1424.91   1.96011     0.294852 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                

Epoch (training) 18:  15%|█▍        | 6/41 [00:08<00:45,  1.29s/it]
Epoch (training) 18:  17%|█▋        | 7/41 [00:09<00:40,  1.19s/it]
Epoch (training) 18:  20%|█▉        | 8/41 [00:10<00:36,  1.09s/it]
Epoch (training) 18:  22%|██▏       | 9/41 [00:11<00:37,  1.18s/it]
Epoch (training) 18:  24%|██▍       | 10/41 [00:12<00:37,  1.22s/it]
Epoch (training) 18:  27%|██▋       | 11/41 [00:13<00:34,  1.16s/it]
Epoch (training) 18:  29%|██▉       | 12/41 [00:14<00:30,  1.05s/it]
Epoch (training) 18:  32%|███▏      | 13/41 [00:17<00:39,  1.43s/it]
Epoch (training) 18:  34%|███▍      | 14/41 [00:17<00:33,  1.26s/it]
Epoch (training) 18:  37%|███▋      | 15/41 [00:19<00:35,  1.36s/it]
Epoch (training) 18:  39%|███▉      | 16/41 [00:20<00:31,  1.25s/it]
Epoch (training) 18:  41%|████▏     | 17/41 [00:21<00:26,  1.12s/it]
Epoch (training) 18:  44%|████▍     | 18/41 [00:22<00:25,  1.11s/it]
Epoch (training) 18:  46%|████▋     | 19/41 [00:24<00:31,  1.44s/it]
Epoch (training) 18:  49%|████▉     | 

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:50:35. Total running time: 24min 33s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        17            1424.91   1.96011     0.294852 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                               

Epoch (training) 18:  76%|███████▌  | 31/41 [00:41<00:17,  1.74s/it]
Epoch (training) 18:  78%|███████▊  | 32/41 [00:42<00:14,  1.66s/it]
Epoch (training) 18:  80%|████████  | 33/41 [00:44<00:14,  1.81s/it]
Epoch (training) 18:  83%|████████▎ | 34/41 [00:46<00:12,  1.79s/it]
Epoch (training) 18:  85%|████████▌ | 35/41 [00:47<00:09,  1.53s/it]
Epoch (training) 18:  88%|████████▊ | 36/41 [00:48<00:07,  1.44s/it]
Epoch (training) 18:  90%|█████████ | 37/41 [00:49<00:05,  1.34s/it]
Epoch (training) 18:  93%|█████████▎| 38/41 [00:50<00:03,  1.22s/it]
Epoch (training) 18:  95%|█████████▌| 39/41 [00:51<00:02,  1.24s/it]
Epoch (training) 18:  98%|█████████▊| 40/41 [00:52<00:00,  1.08it/s]
Epoch (training) 18: 100%|██████████| 41/41 [00:52<00:00,  1.27s/it]
Epoch (test) 18:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (test) 18:   5%|▍         | 1/21 [00:03<01:08,  3.45s/it]
Epoch (test) 18:  10%|▉         | 2/21 [00:05<00:47,  2.49s/it]
Epoch (test) 18:  14%|█▍        | 3/21 [00:06<00:38,  2.1

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:51:05. Total running time: 25min 3s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        17            1424.91   1.96011     0.294852 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                

Epoch (test) 18:  52%|█████▏    | 11/21 [00:16<00:13,  1.33s/it]
Epoch (test) 18:  57%|█████▋    | 12/21 [00:17<00:10,  1.21s/it]
Epoch (test) 18:  62%|██████▏   | 13/21 [00:18<00:10,  1.27s/it]
Epoch (test) 18:  67%|██████▋   | 14/21 [00:19<00:08,  1.20s/it]
Epoch (test) 18:  71%|███████▏  | 15/21 [00:20<00:06,  1.05s/it]
Epoch (test) 18:  76%|███████▌  | 16/21 [00:21<00:04,  1.09it/s]
Epoch (test) 18:  81%|████████  | 17/21 [00:21<00:03,  1.13it/s]
Epoch (test) 18:  86%|████████▌ | 18/21 [00:22<00:02,  1.13it/s]
Epoch (test) 18:  90%|█████████ | 19/21 [00:24<00:02,  1.00s/it]
Epoch (test) 18: 100%|██████████| 21/21 [00:24<00:00,  1.15s/it]
[36m(RayTrainWorker pid=29490)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_1/TorchTrainer_a1400_00000_0_batch_size=32,epochs=20,lr=0.0012_2024-04-14_08-26-02/checkpoint_000017)
Epoch (training) 19:   0%|          | 0/41 [00:00<?, ?it/s]
Epoch (training) 19:   2%|▏     

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:51:35. Total running time: 25min 33s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)     loss     accuracy |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        18            1505.34   1.9547     0.319813 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                   

Epoch (training) 19:  34%|███▍      | 14/41 [00:18<00:35,  1.32s/it]
Epoch (training) 19:  37%|███▋      | 15/41 [00:20<00:36,  1.39s/it]
Epoch (training) 19:  39%|███▉      | 16/41 [00:22<00:38,  1.55s/it]
Epoch (training) 19:  41%|████▏     | 17/41 [00:23<00:33,  1.41s/it]
Epoch (training) 19:  44%|████▍     | 18/41 [00:24<00:31,  1.36s/it]
Epoch (training) 19:  46%|████▋     | 19/41 [00:25<00:26,  1.19s/it]
Epoch (training) 19:  49%|████▉     | 20/41 [00:26<00:24,  1.15s/it]
Epoch (training) 19:  51%|█████     | 21/41 [00:27<00:23,  1.17s/it]
Epoch (training) 19:  54%|█████▎    | 22/41 [00:28<00:19,  1.03s/it]
Epoch (training) 19:  56%|█████▌    | 23/41 [00:30<00:24,  1.39s/it]
Epoch (training) 19:  59%|█████▊    | 24/41 [00:32<00:23,  1.39s/it]
Epoch (training) 19:  61%|██████    | 25/41 [00:34<00:25,  1.62s/it]
Epoch (training) 19:  63%|██████▎   | 26/41 [00:36<00:24,  1.66s/it]
Epoch (training) 19:  66%|██████▌   | 27/41 [00:36<00:18,  1.36s/it]
Epoch (training) 19:  68%|██████▊ 

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:52:05. Total running time: 26min 3s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)     loss     accuracy |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        18            1505.34   1.9547     0.319813 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                    

Epoch (training) 19:  85%|████████▌ | 35/41 [00:48<00:08,  1.49s/it]
Epoch (training) 19:  88%|████████▊ | 36/41 [00:49<00:07,  1.55s/it]
Epoch (training) 19:  90%|█████████ | 37/41 [00:51<00:05,  1.45s/it]
Epoch (training) 19:  93%|█████████▎| 38/41 [00:52<00:04,  1.33s/it]
Epoch (training) 19:  95%|█████████▌| 39/41 [00:52<00:02,  1.18s/it]
Epoch (training) 19:  98%|█████████▊| 40/41 [00:53<00:00,  1.15it/s]
Epoch (training) 19: 100%|██████████| 41/41 [00:53<00:00,  1.30s/it]
Epoch (test) 19:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (test) 19:   5%|▍         | 1/21 [00:02<00:46,  2.34s/it]
Epoch (test) 19:  10%|▉         | 2/21 [00:03<00:35,  1.84s/it]
Epoch (test) 19:  14%|█▍        | 3/21 [00:05<00:31,  1.76s/it]
Epoch (test) 19:  19%|█▉        | 4/21 [00:06<00:22,  1.35s/it]
Epoch (test) 19:  24%|██▍       | 5/21 [00:07<00:21,  1.36s/it]
Epoch (test) 19:  29%|██▊       | 6/21 [00:08<00:19,  1.27s/it]
Epoch (test) 19:  33%|███▎      | 7/21 [00:09<00:17,  1.25s/it]
Epoch (test) 

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:52:36. Total running time: 26min 33s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)     loss     accuracy |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        18            1505.34   1.9547     0.319813 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                   



Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:53:08. Total running time: 27min 6s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)     loss     accuracy |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        18            1505.34   1.9547     0.319813 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                    

[36m(RayTrainWorker pid=29490)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_1/TorchTrainer_a1400_00000_0_batch_size=32,epochs=20,lr=0.0012_2024-04-14_08-26-02/checkpoint_000018)
Epoch (training) 20:   0%|          | 0/41 [00:00<?, ?it/s]
Epoch (training) 20:   2%|▏         | 1/41 [00:02<01:36,  2.41s/it]
Epoch (training) 20:   5%|▍         | 2/41 [00:04<01:23,  2.13s/it]
Epoch (training) 20:   7%|▋         | 3/41 [00:06<01:13,  1.93s/it]
Epoch (training) 20:  10%|▉         | 4/41 [00:07<00:59,  1.61s/it]
Epoch (training) 20:  12%|█▏        | 5/41 [00:07<00:46,  1.30s/it]
Epoch (training) 20:  15%|█▍        | 6/41 [00:08<00:39,  1.12s/it]
Epoch (training) 20:  17%|█▋        | 7/41 [00:09<00:37,  1.10s/it]
Epoch (training) 20:  20%|█▉        | 8/41 [00:10<00:36,  1.11s/it]
Epoch (training) 20:  22%|██▏       | 9/41 [00:11<00:31,  1.00it/s]
Epoch (training) 20:  24%|██▍       | 10/41 [00:12<00:31,  1.01s/it]


Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:53:38. Total running time: 27min 36s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        19            1633.26   1.99673     0.283931 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                               

Epoch (training) 20:  27%|██▋       | 11/41 [00:14<00:34,  1.15s/it]
Epoch (training) 20:  29%|██▉       | 12/41 [00:14<00:29,  1.01s/it]
Epoch (training) 20:  32%|███▏      | 13/41 [00:16<00:38,  1.36s/it]
Epoch (training) 20:  34%|███▍      | 14/41 [00:18<00:34,  1.29s/it]
Epoch (training) 20:  37%|███▋      | 15/41 [00:20<00:40,  1.57s/it]
Epoch (training) 20:  39%|███▉      | 16/41 [00:21<00:37,  1.51s/it]
Epoch (training) 20:  41%|████▏     | 17/41 [00:22<00:33,  1.42s/it]
Epoch (training) 20:  44%|████▍     | 18/41 [00:23<00:28,  1.22s/it]
Epoch (training) 20:  46%|████▋     | 19/41 [00:25<00:28,  1.29s/it]
Epoch (training) 20:  49%|████▉     | 20/41 [00:26<00:28,  1.35s/it]
Epoch (training) 20:  51%|█████     | 21/41 [00:27<00:25,  1.26s/it]
Epoch (training) 20:  54%|█████▎    | 22/41 [00:28<00:22,  1.21s/it]
Epoch (training) 20:  56%|█████▌    | 23/41 [00:30<00:22,  1.27s/it]
Epoch (training) 20:  59%|█████▊    | 24/41 [00:32<00:26,  1.55s/it]
Epoch (training) 20:  61%|██████  

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:54:08. Total running time: 28min 6s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        19            1633.26   1.99673     0.283931 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                                

Epoch (training) 20:  80%|████████  | 33/41 [00:43<00:09,  1.16s/it]
Epoch (training) 20:  83%|████████▎ | 34/41 [00:44<00:09,  1.30s/it]
Epoch (training) 20:  85%|████████▌ | 35/41 [00:47<00:10,  1.72s/it]
Epoch (training) 20:  88%|████████▊ | 36/41 [00:49<00:08,  1.67s/it]
Epoch (training) 20:  90%|█████████ | 37/41 [00:50<00:06,  1.65s/it]
Epoch (training) 20:  93%|█████████▎| 38/41 [00:51<00:04,  1.39s/it]
Epoch (training) 20:  95%|█████████▌| 39/41 [00:52<00:02,  1.29s/it]
Epoch (training) 20:  98%|█████████▊| 40/41 [00:52<00:00,  1.05it/s]
Epoch (training) 20: 100%|██████████| 41/41 [00:52<00:00,  1.29s/it]
Epoch (test) 20:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (test) 20:   5%|▍         | 1/21 [00:02<00:47,  2.35s/it]
Epoch (test) 20:  10%|▉         | 2/21 [00:03<00:35,  1.86s/it]
Epoch (test) 20:  14%|█▍        | 3/21 [00:05<00:31,  1.77s/it]
Epoch (test) 20:  19%|█▉        | 4/21 [00:06<00:22,  1.34s/it]
Epoch (test) 20:  24%|██▍       | 5/21 [00:07<00:22,  1.41s/it]
Epo

Trial status: 1 RUNNING | 4 PENDING
Current time: 2024-04-14 08:54:38. Total running time: 28min 36s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status       ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00000   RUNNING                        20                       32              0.00122095        19            1633.26   1.99673     0.283931 |
| TorchTrainer_a1400_00001   PENDING                        10                       32              0.00178832                                               

Epoch (test) 20:  81%|████████  | 17/21 [00:20<00:03,  1.23it/s]
Epoch (test) 20:  86%|████████▌ | 18/21 [00:21<00:02,  1.12it/s]
Epoch (test) 20:  90%|█████████ | 19/21 [00:23<00:02,  1.16s/it]
Epoch (test) 20: 100%|██████████| 21/21 [00:23<00:00,  1.12s/it]
[36m(RayTrainWorker pid=29490)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_1/TorchTrainer_a1400_00000_0_batch_size=32,epochs=20,lr=0.0012_2024-04-14_08-26-02/checkpoint_000019)



Trial TorchTrainer_a1400_00000 completed after 20 iterations at 2024-04-14 08:54:52. Total running time: 28min 50s
+---------------------------------------------------------------+
| Trial TorchTrainer_a1400_00000 result                         |
+---------------------------------------------------------------+
| checkpoint_dir_name                         checkpoint_000019 |
| time_this_iter_s                                     85.71624 |
| time_total_s                                       1718.97815 |
| training_iteration                                         20 |
| accuracy                                              0.29953 |
| loss                                                  2.02252 |
| summary/epoch/0                                           1.0 |
| summary/epoch/1                                           2.0 |
| summary/epoch/10                                         11.0 |
| summary/epoch/11                                         12.0 |
| summary/epoch/12        

[36m(TorchTrainer pid=29424)[0m Started distributed worker processes: 
[36m(TorchTrainer pid=29424)[0m - (ip=172.28.0.12, pid=36850) world_rank=0, local_rank=0, node_rank=0
[36m(RayTrainWorker pid=36850)[0m Setting up process group for: env:// [rank=0, world_size=1]
[36m(RayTrainWorker pid=36850)[0m 2024-04-14 08:55:01.185633: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(RayTrainWorker pid=36850)[0m 2024-04-14 08:55:01.185702: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(RayTrainWorker pid=36850)[0m 2024-04-14 08:55:01.187101: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already 


Trial status: 1 TERMINATED | 1 RUNNING | 3 PENDING
Current time: 2024-04-14 08:55:09. Total running time: 29min 6s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00001   RUNNING                          10                       32              0.00178832                                                     |
| TorchTrainer_a1400_00000   TERMINATED                       20                       32              0.00122095        20            

Epoch (training) 1:   5%|▍         | 2/41 [00:04<01:24,  2.15s/it]
Epoch (training) 1:   7%|▋         | 3/41 [00:06<01:08,  1.81s/it]
Epoch (training) 1:  10%|▉         | 4/41 [00:07<00:58,  1.57s/it]
Epoch (training) 1:  12%|█▏        | 5/41 [00:09<01:05,  1.81s/it]
Epoch (training) 1:  15%|█▍        | 6/41 [00:10<00:54,  1.57s/it]
Epoch (training) 1:  17%|█▋        | 7/41 [00:12<00:58,  1.73s/it]
Epoch (training) 1:  20%|█▉        | 8/41 [00:13<00:50,  1.53s/it]
Epoch (training) 1:  22%|██▏       | 9/41 [00:14<00:44,  1.41s/it]
Epoch (training) 1:  24%|██▍       | 10/41 [00:16<00:44,  1.44s/it]
Epoch (training) 1:  27%|██▋       | 11/41 [00:17<00:43,  1.45s/it]
Epoch (training) 1:  29%|██▉       | 12/41 [00:19<00:41,  1.43s/it]
Epoch (training) 1:  32%|███▏      | 13/41 [00:20<00:39,  1.41s/it]
Epoch (training) 1:  34%|███▍      | 14/41 [00:22<00:44,  1.64s/it]
Epoch (training) 1:  37%|███▋      | 15/41 [00:25<00:47,  1.82s/it]
Epoch (training) 1:  39%|███▉      | 16/41 [00:25<00:36,

Trial status: 1 TERMINATED | 1 RUNNING | 3 PENDING
Current time: 2024-04-14 08:55:39. Total running time: 29min 36s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00001   RUNNING                          10                       32              0.00178832                                                     |
| TorchTrainer_a1400_00000   TERMINATED                       20                       32              0.00122095        20            

Epoch (training) 1:  59%|█████▊    | 24/41 [00:35<00:25,  1.50s/it]
Epoch (training) 1:  61%|██████    | 25/41 [00:37<00:22,  1.41s/it]
Epoch (training) 1:  63%|██████▎   | 26/41 [00:39<00:24,  1.63s/it]
Epoch (training) 1:  66%|██████▌   | 27/41 [00:41<00:23,  1.69s/it]
Epoch (training) 1:  68%|██████▊   | 28/41 [00:41<00:18,  1.40s/it]
Epoch (training) 1:  71%|███████   | 29/41 [00:42<00:14,  1.21s/it]
Epoch (training) 1:  73%|███████▎  | 30/41 [00:43<00:13,  1.25s/it]
Epoch (training) 1:  76%|███████▌  | 31/41 [00:44<00:10,  1.09s/it]
Epoch (training) 1:  78%|███████▊  | 32/41 [00:45<00:09,  1.02s/it]
Epoch (training) 1:  80%|████████  | 33/41 [00:46<00:08,  1.05s/it]
Epoch (training) 1:  83%|████████▎ | 34/41 [00:47<00:08,  1.14s/it]
Epoch (training) 1:  85%|████████▌ | 35/41 [00:48<00:06,  1.04s/it]
Epoch (training) 1:  88%|████████▊ | 36/41 [00:50<00:05,  1.14s/it]
Epoch (training) 1:  90%|█████████ | 37/41 [00:51<00:05,  1.33s/it]
Epoch (training) 1:  93%|█████████▎| 38/41 [00:5

Trial status: 1 TERMINATED | 1 RUNNING | 3 PENDING
Current time: 2024-04-14 08:56:09. Total running time: 30min 6s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)      loss     accuracy |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00001   RUNNING                          10                       32              0.00178832                                                     |
| TorchTrainer_a1400_00000   TERMINATED                       20                       32              0.00122095        20            1

Epoch (test) 1:  38%|███▊      | 8/21 [00:11<00:19,  1.52s/it]
Epoch (test) 1:  43%|████▎     | 9/21 [00:13<00:19,  1.63s/it]
Epoch (test) 1:  48%|████▊     | 10/21 [00:15<00:18,  1.70s/it]
Epoch (test) 1:  52%|█████▏    | 11/21 [00:15<00:13,  1.35s/it]
Epoch (test) 1:  57%|█████▋    | 12/21 [00:16<00:10,  1.15s/it]
Epoch (test) 1:  62%|██████▏   | 13/21 [00:17<00:08,  1.10s/it]
Epoch (test) 1:  67%|██████▋   | 14/21 [00:18<00:07,  1.05s/it]
Epoch (test) 1:  71%|███████▏  | 15/21 [00:19<00:05,  1.07it/s]
Epoch (test) 1:  76%|███████▌  | 16/21 [00:19<00:04,  1.18it/s]
Epoch (test) 1:  81%|████████  | 17/21 [00:20<00:03,  1.20it/s]
Epoch (test) 1:  86%|████████▌ | 18/21 [00:21<00:02,  1.15it/s]
Epoch (test) 1:  90%|█████████ | 19/21 [00:22<00:01,  1.02it/s]
Epoch (test) 1: 100%|██████████| 21/21 [00:22<00:00,  1.08s/it]



Trial TorchTrainer_a1400_00001 completed after 1 iterations at 2024-04-14 08:56:36. Total running time: 30min 34s
+---------------------------------------------------------------+
| Trial TorchTrainer_a1400_00001 result                         |
+---------------------------------------------------------------+
| checkpoint_dir_name                         checkpoint_000000 |
| time_this_iter_s                                    104.03096 |
| time_total_s                                        104.03096 |
| training_iteration                                          1 |
| accuracy                                              0.17785 |
| loss                                                 25.71688 |
| summary/epoch/0                                           1.0 |
| summary/train_acc/0                       0.15456674473067916 |
| summary/train_loss/0                         2.67365470455914 |
| summary/val_acc/0                         0.17784711388455537 |
| summary/val_loss/0       

[36m(RayTrainWorker pid=36850)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_1/TorchTrainer_a1400_00001_1_batch_size=32,epochs=10,lr=0.0018_2024-04-14_08-26-02/checkpoint_000000)



Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:56:39. Total running time: 30min 36s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)       loss     accuracy |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00002   RUNNING                          10                       32              0.000704551                                                     |
| TorchTrainer_a1400_00000   TERMINATED                       20                       32              0.00122095        20       

[36m(TorchTrainer pid=29424)[0m Started distributed worker processes: 
[36m(TorchTrainer pid=29424)[0m - (ip=172.28.0.12, pid=37322) world_rank=0, local_rank=0, node_rank=0
[36m(RayTrainWorker pid=37322)[0m Setting up process group for: env:// [rank=0, world_size=1]
[36m(RayTrainWorker pid=37322)[0m 2024-04-14 08:56:43.917477: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(RayTrainWorker pid=37322)[0m 2024-04-14 08:56:43.917534: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(RayTrainWorker pid=37322)[0m 2024-04-14 08:56:43.918935: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already 

Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:57:09. Total running time: 31min 6s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)       loss     accuracy |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00002   RUNNING                          10                       32              0.000704551                                                     |
| TorchTrainer_a1400_00000   TERMINATED                       20                       32              0.00122095        20         

Epoch (training) 1:  39%|███▉      | 16/41 [00:22<00:36,  1.47s/it]
Epoch (training) 1:  41%|████▏     | 17/41 [00:24<00:32,  1.36s/it]
Epoch (training) 1:  44%|████▍     | 18/41 [00:25<00:32,  1.40s/it]
Epoch (training) 1:  46%|████▋     | 19/41 [00:26<00:28,  1.30s/it]
Epoch (training) 1:  49%|████▉     | 20/41 [00:28<00:30,  1.47s/it]
Epoch (training) 1:  51%|█████     | 21/41 [00:29<00:29,  1.46s/it]
Epoch (training) 1:  54%|█████▎    | 22/41 [00:31<00:26,  1.41s/it]
Epoch (training) 1:  56%|█████▌    | 23/41 [00:32<00:23,  1.29s/it]
Epoch (training) 1:  59%|█████▊    | 24/41 [00:33<00:24,  1.42s/it]
Epoch (training) 1:  61%|██████    | 25/41 [00:35<00:23,  1.46s/it]
Epoch (training) 1:  63%|██████▎   | 26/41 [00:37<00:23,  1.56s/it]
Epoch (training) 1:  66%|██████▌   | 27/41 [00:38<00:20,  1.47s/it]
Epoch (training) 1:  68%|██████▊   | 28/41 [00:39<00:18,  1.42s/it]
Epoch (training) 1:  71%|███████   | 29/41 [00:41<00:17,  1.42s/it]
Epoch (training) 1:  73%|███████▎  | 30/41 [00:4

Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:57:39. Total running time: 31min 36s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)       loss     accuracy |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00002   RUNNING                          10                       32              0.000704551                                                     |
| TorchTrainer_a1400_00000   TERMINATED                       20                       32              0.00122095        20        

Epoch (training) 1:  90%|█████████ | 37/41 [00:53<00:07,  1.94s/it]
Epoch (training) 1:  93%|█████████▎| 38/41 [00:54<00:05,  1.68s/it]
Epoch (training) 1:  95%|█████████▌| 39/41 [00:55<00:02,  1.41s/it]
Epoch (training) 1:  98%|█████████▊| 40/41 [00:55<00:01,  1.04s/it]
Epoch (training) 1: 100%|██████████| 41/41 [00:55<00:00,  1.36s/it]
Epoch (test) 1:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (test) 1:   5%|▍         | 1/21 [00:02<00:45,  2.29s/it]
Epoch (test) 1:  10%|▉         | 2/21 [00:03<00:34,  1.84s/it]
Epoch (test) 1:  14%|█▍        | 3/21 [00:05<00:31,  1.74s/it]
Epoch (test) 1:  19%|█▉        | 4/21 [00:06<00:22,  1.33s/it]
Epoch (test) 1:  24%|██▍       | 5/21 [00:07<00:20,  1.25s/it]
Epoch (test) 1:  29%|██▊       | 6/21 [00:08<00:18,  1.20s/it]
Epoch (test) 1:  33%|███▎      | 7/21 [00:09<00:17,  1.21s/it]
Epoch (test) 1:  38%|███▊      | 8/21 [00:12<00:20,  1.61s/it]
Epoch (test) 1:  43%|████▎     | 9/21 [00:13<00:18,  1.54s/it]
Epoch (test) 1:  48%|████▊     | 10/21

Trial status: 2 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2024-04-14 08:58:09. Total running time: 32min 6s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)       loss     accuracy |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00002   RUNNING                          10                       32              0.000704551                                                     |
| TorchTrainer_a1400_00000   TERMINATED                       20                       32              0.00122095        20         

[36m(RayTrainWorker pid=37322)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_1/TorchTrainer_a1400_00002_2_batch_size=32,epochs=10,lr=0.0007_2024-04-14_08-26-02/checkpoint_000000)
[36m(TorchTrainer pid=29424)[0m Started distributed worker processes: 
[36m(TorchTrainer pid=29424)[0m - (ip=172.28.0.12, pid=37796) world_rank=0, local_rank=0, node_rank=0
[36m(RayTrainWorker pid=37796)[0m Setting up process group for: env:// [rank=0, world_size=1]
[36m(RayTrainWorker pid=37796)[0m 2024-04-14 08:58:27.397907: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(RayTrainWorker pid=37796)[0m 2024-04-14 08:58:27.397958: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has 


Trial status: 3 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2024-04-14 08:58:39. Total running time: 32min 36s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)        loss     accuracy |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00003   RUNNING                          10                       32              0.00024569                                                       |
| TorchTrainer_a1400_00000   TERMINATED                       20                       32              0.00122095        20   

Epoch (training) 1:  15%|█▍        | 6/41 [00:09<00:53,  1.54s/it]
Epoch (training) 1:  17%|█▋        | 7/41 [00:11<00:54,  1.59s/it]
Epoch (training) 1:  20%|█▉        | 8/41 [00:12<00:46,  1.41s/it]
Epoch (training) 1:  22%|██▏       | 9/41 [00:14<00:47,  1.49s/it]
Epoch (training) 1:  24%|██▍       | 10/41 [00:14<00:37,  1.22s/it]
Epoch (training) 1:  27%|██▋       | 11/41 [00:16<00:40,  1.37s/it]
Epoch (training) 1:  29%|██▉       | 12/41 [00:17<00:35,  1.21s/it]
Epoch (training) 1:  32%|███▏      | 13/41 [00:18<00:31,  1.12s/it]
Epoch (training) 1:  34%|███▍      | 14/41 [00:20<00:36,  1.36s/it]
Epoch (training) 1:  37%|███▋      | 15/41 [00:21<00:32,  1.26s/it]
Epoch (training) 1:  39%|███▉      | 16/41 [00:22<00:35,  1.41s/it]
Epoch (training) 1:  41%|████▏     | 17/41 [00:23<00:30,  1.28s/it]
Epoch (training) 1:  44%|████▍     | 18/41 [00:25<00:31,  1.39s/it]
Epoch (training) 1:  46%|████▋     | 19/41 [00:27<00:37,  1.73s/it]
Epoch (training) 1:  49%|████▉     | 20/41 [00:29<00

Trial status: 3 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2024-04-14 08:59:09. Total running time: 33min 6s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)        loss     accuracy |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00003   RUNNING                          10                       32              0.00024569                                                       |
| TorchTrainer_a1400_00000   TERMINATED                       20                       32              0.00122095        20     

Epoch (training) 1:  63%|██████▎   | 26/41 [00:38<00:24,  1.65s/it]
Epoch (training) 1:  66%|██████▌   | 27/41 [00:39<00:20,  1.45s/it]
Epoch (training) 1:  68%|██████▊   | 28/41 [00:40<00:16,  1.28s/it]
Epoch (training) 1:  71%|███████   | 29/41 [00:41<00:13,  1.10s/it]
Epoch (training) 1:  73%|███████▎  | 30/41 [00:42<00:12,  1.12s/it]
Epoch (training) 1:  76%|███████▌  | 31/41 [00:43<00:11,  1.14s/it]
Epoch (training) 1:  78%|███████▊  | 32/41 [00:44<00:09,  1.04s/it]
Epoch (training) 1:  80%|████████  | 33/41 [00:45<00:08,  1.03s/it]
Epoch (training) 1:  83%|████████▎ | 34/41 [00:47<00:09,  1.40s/it]
Epoch (training) 1:  85%|████████▌ | 35/41 [00:49<00:08,  1.47s/it]
Epoch (training) 1:  88%|████████▊ | 36/41 [00:50<00:06,  1.39s/it]
Epoch (training) 1:  90%|█████████ | 37/41 [00:51<00:05,  1.38s/it]
Epoch (training) 1:  93%|█████████▎| 38/41 [00:53<00:04,  1.57s/it]
Epoch (training) 1:  95%|█████████▌| 39/41 [00:54<00:02,  1.44s/it]
Epoch (training) 1:  98%|█████████▊| 40/41 [00:5

Trial status: 3 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2024-04-14 08:59:39. Total running time: 33min 36s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)        loss     accuracy |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00003   RUNNING                          10                       32              0.00024569                                                       |
| TorchTrainer_a1400_00000   TERMINATED                       20                       32              0.00122095        20    

Epoch (test) 1:  43%|████▎     | 9/21 [00:13<00:18,  1.55s/it]
Epoch (test) 1:  48%|████▊     | 10/21 [00:15<00:17,  1.63s/it]
Epoch (test) 1:  52%|█████▏    | 11/21 [00:16<00:13,  1.31s/it]
Epoch (test) 1:  57%|█████▋    | 12/21 [00:16<00:10,  1.13s/it]
Epoch (test) 1:  62%|██████▏   | 13/21 [00:17<00:08,  1.10s/it]
Epoch (test) 1:  67%|██████▋   | 14/21 [00:18<00:07,  1.04s/it]
Epoch (test) 1:  71%|███████▏  | 15/21 [00:19<00:05,  1.06it/s]
Epoch (test) 1:  76%|███████▌  | 16/21 [00:20<00:04,  1.18it/s]
Epoch (test) 1:  81%|████████  | 17/21 [00:20<00:03,  1.16it/s]
Epoch (test) 1:  86%|████████▌ | 18/21 [00:21<00:02,  1.14it/s]
Epoch (test) 1:  90%|█████████ | 19/21 [00:23<00:02,  1.15s/it]
Epoch (test) 1: 100%|██████████| 21/21 [00:23<00:00,  1.13s/it]



Trial TorchTrainer_a1400_00003 completed after 1 iterations at 2024-04-14 08:59:59. Total running time: 33min 56s
+---------------------------------------------------------------+
| Trial TorchTrainer_a1400_00003 result                         |
+---------------------------------------------------------------+
| checkpoint_dir_name                         checkpoint_000000 |
| time_this_iter_s                                     98.97576 |
| time_total_s                                         98.97576 |
| training_iteration                                          1 |
| accuracy                                              0.19501 |
| loss                                                  4.03232 |
| summary/epoch/0                                           1.0 |
| summary/train_acc/0                       0.13895394223263074 |
| summary/train_loss/0                       2.8467650326286873 |
| summary/val_acc/0                         0.19500780031201248 |
| summary/val_loss/0       

[36m(RayTrainWorker pid=37796)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_1/TorchTrainer_a1400_00003_3_batch_size=32,epochs=10,lr=0.0002_2024-04-14_08-26-02/checkpoint_000000)
[36m(TorchTrainer pid=29424)[0m Started distributed worker processes: 
[36m(TorchTrainer pid=29424)[0m - (ip=172.28.0.12, pid=38251) world_rank=0, local_rank=0, node_rank=0
[36m(RayTrainWorker pid=38251)[0m Setting up process group for: env:// [rank=0, world_size=1]
[36m(RayTrainWorker pid=38251)[0m 2024-04-14 09:00:06.553425: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(RayTrainWorker pid=38251)[0m 2024-04-14 09:00:06.553495: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has 


Trial status: 4 TERMINATED | 1 RUNNING
Current time: 2024-04-14 09:00:09. Total running time: 34min 6s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)        loss     accuracy |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00004   RUNNING                          10                       64              0.000382991                                                      |
| TorchTrainer_a1400_00000   TERMINATED                       20                       32              0.00122095        20          1718.9

[36m(RayTrainWorker pid=38251)[0m Moving model to device: cuda:0
Epoch (training) 1:   0%|          | 0/41 [00:00<?, ?it/s]
Epoch (training) 1:   2%|▏         | 1/41 [00:08<05:25,  8.13s/it]
Epoch (training) 1:   5%|▍         | 2/41 [00:10<02:55,  4.51s/it]
Epoch (training) 1:   7%|▋         | 3/41 [00:12<02:07,  3.36s/it]
Epoch (training) 1:  10%|▉         | 4/41 [00:13<01:30,  2.43s/it]
Epoch (training) 1:  12%|█▏        | 5/41 [00:14<01:09,  1.93s/it]
Epoch (training) 1:  15%|█▍        | 6/41 [00:15<00:55,  1.57s/it]
Epoch (training) 1:  17%|█▋        | 7/41 [00:16<00:47,  1.39s/it]
Epoch (training) 1:  20%|█▉        | 8/41 [00:16<00:39,  1.20s/it]
Epoch (training) 1:  22%|██▏       | 9/41 [00:18<00:40,  1.26s/it]
Epoch (training) 1:  24%|██▍       | 10/41 [00:19<00:35,  1.15s/it]
Epoch (training) 1:  27%|██▋       | 11/41 [00:20<00:39,  1.33s/it]
Epoch (training) 1:  29%|██▉       | 12/41 [00:21<00:35,  1.21s/it]
Epoch (training) 1:  32%|███▏      | 13/41 [00:22<00:31,  1.12s/it]

Trial status: 4 TERMINATED | 1 RUNNING
Current time: 2024-04-14 09:00:39. Total running time: 34min 37s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)        loss     accuracy |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00004   RUNNING                          10                       64              0.000382991                                                      |
| TorchTrainer_a1400_00000   TERMINATED                       20                       32              0.00122095        20          1718.9

Epoch (training) 1:  41%|████▏     | 17/41 [00:29<00:36,  1.52s/it]
Epoch (training) 1:  44%|████▍     | 18/41 [00:30<00:31,  1.38s/it]
Epoch (training) 1:  46%|████▋     | 19/41 [00:31<00:30,  1.37s/it]
Epoch (training) 1:  49%|████▉     | 20/41 [00:32<00:24,  1.18s/it]
Epoch (training) 1:  51%|█████     | 21/41 [00:33<00:22,  1.12s/it]
Epoch (training) 1:  54%|█████▎    | 22/41 [00:34<00:21,  1.13s/it]
Epoch (training) 1:  56%|█████▌    | 23/41 [00:38<00:32,  1.82s/it]
Epoch (training) 1:  59%|█████▊    | 24/41 [00:39<00:28,  1.66s/it]
Epoch (training) 1:  61%|██████    | 25/41 [00:41<00:27,  1.72s/it]
Epoch (training) 1:  63%|██████▎   | 26/41 [00:42<00:21,  1.41s/it]
Epoch (training) 1:  66%|██████▌   | 27/41 [00:43<00:17,  1.27s/it]
Epoch (training) 1:  68%|██████▊   | 28/41 [00:44<00:15,  1.23s/it]
Epoch (training) 1:  71%|███████   | 29/41 [00:45<00:15,  1.31s/it]
Epoch (training) 1:  73%|███████▎  | 30/41 [00:46<00:13,  1.23s/it]
Epoch (training) 1:  76%|███████▌  | 31/41 [00:4

Trial status: 4 TERMINATED | 1 RUNNING
Current time: 2024-04-14 09:01:09. Total running time: 35min 7s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)        loss     accuracy |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00004   RUNNING                          10                       64              0.000382991                                                      |
| TorchTrainer_a1400_00000   TERMINATED                       20                       32              0.00122095        20          1718.98

[36m(RayTrainWorker pid=38251)[0m Epoch (training) 1: 100%|██████████| 41/41 [00:59<00:00,  1.28it/s]Epoch (training) 1: 100%|██████████| 41/41 [00:59<00:00,  1.45s/it]
Epoch (test) 1:   0%|          | 0/21 [00:00<?, ?it/s]
Epoch (test) 1:   5%|▍         | 1/21 [00:02<00:47,  2.40s/it]
Epoch (test) 1:  10%|▉         | 2/21 [00:03<00:36,  1.90s/it]
Epoch (test) 1:  14%|█▍        | 3/21 [00:05<00:32,  1.81s/it]
Epoch (test) 1:  19%|█▉        | 4/21 [00:06<00:23,  1.36s/it]
Epoch (test) 1:  24%|██▍       | 5/21 [00:07<00:23,  1.44s/it]
Epoch (test) 1:  29%|██▊       | 6/21 [00:09<00:19,  1.33s/it]
Epoch (test) 1:  33%|███▎      | 7/21 [00:10<00:18,  1.30s/it]
Epoch (test) 1:  38%|███▊      | 8/21 [00:12<00:20,  1.54s/it]
Epoch (test) 1:  43%|████▎     | 9/21 [00:13<00:17,  1.49s/it]
Epoch (test) 1:  48%|████▊     | 10/21 [00:15<00:17,  1.62s/it]
Epoch (test) 1:  52%|█████▏    | 11/21 [00:16<00:12,  1.29s/it]
Epoch (test) 1:  57%|█████▋    | 12/21 [00:16<00:10,  1.12s/it]
Epoch (test) 

Trial status: 4 TERMINATED | 1 RUNNING
Current time: 2024-04-14 09:01:42. Total running time: 35min 40s
Logical resource usage: 0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status         ...oop_config/epochs     ...config/batch_size     train_loop_config/lr     iter     total time (s)        loss     accuracy |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| TorchTrainer_a1400_00004   RUNNING                          10                       64              0.000382991                                                      |
| TorchTrainer_a1400_00000   TERMINATED                       20                       32              0.00122095        20          1718.9

[36m(RayTrainWorker pid=38251)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/wide_resnet50_hpt_outer_4_inner_1/TorchTrainer_a1400_00004_4_batch_size=64,epochs=10,lr=0.0004_2024-04-14_08-26-02/checkpoint_000000)
2024-04-14 09:01:48,657	INFO tune.py:1016 -- Wrote the latest version of all result files and experiment state to '/root/ray_results/wide_resnet50_hpt_outer_4_inner_1' in 0.0514s.



Trial TorchTrainer_a1400_00004 completed after 1 iterations at 2024-04-14 09:01:48. Total running time: 35min 45s
+---------------------------------------------------------------+
| Trial TorchTrainer_a1400_00004 result                         |
+---------------------------------------------------------------+
| checkpoint_dir_name                         checkpoint_000000 |
| time_this_iter_s                                    109.00794 |
| time_total_s                                        109.00794 |
| training_iteration                                          1 |
| accuracy                                              0.15445 |
| loss                                                 18.97903 |
| summary/epoch/0                                           1.0 |
| summary/train_acc/0                       0.15378610460577674 |
| summary/train_loss/0                        2.832206237606886 |
| summary/val_acc/0                          0.1544461778471139 |
| summary/val_loss/0       

2024-04-14 09:01:49,143	INFO tune.py:622 -- [output] This will use the new output engine with verbosity 1. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949
2024-04-14 09:01:49,186	INFO tune_controller.py:445 -- Restoring the run from the latest experiment state file: experiment_state-2024-04-14_07-25-24.json


Outer fold 4, inner fold 2 - number of samples: 1282
Tuning hyperparameters for wide_resnet50_hpt_outer_4_inner_2...
Defaulting to ASHA scheduler (no scheduler provided or not an instance of TrialScheduler)
Restoring tuner from path /root/ray_results/wide_resnet50_hpt_outer_4_inner_2


2024-04-14 09:01:49,509	INFO tune.py:1016 -- Wrote the latest version of all result files and experiment state to '/root/ray_results/wide_resnet50_hpt_outer_4_inner_2' in 0.1955s.


+----------------------------------------------------------------------+
| Configuration for experiment     wide_resnet50_hpt_outer_4_inner_2   |
+----------------------------------------------------------------------+
| Search algorithm                 BasicVariantGenerator               |
| Scheduler                        AsyncHyperBandScheduler             |
| Number of trials                 5                                   |
+----------------------------------------------------------------------+

View detailed results here: /root/ray_results/wide_resnet50_hpt_outer_4_inner_2
To visualize your results with TensorBoard, run: `tensorboard --logdir /tmp/ray/session_2024-04-14_07-52-09_892030_20145/artifacts/2024-04-14_07-25-24/wide_resnet50_hpt_outer_4_inner_2/driver_artifacts`

Trial status: 5 TERMINATED
Current time: 2024-04-14 09:01:49. Total running time: 0s
Logical resource usage: 0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------

## Examine results


In [8]:
import json
import re
import pandas as pd

from pprint import pprint

In [9]:
best_experiments = []
for i in range(5):
  for j in range(3):
    file_summary = f"hpt_wide_resnet50_hpt_outer_{i}_inner_{j}_exp_summary.csv"
    file_path = os.path.join(persistence_path, file_summary)
    tmp_df = pd.read_csv(file_path, usecols=["loss", "accuracy", "trial_id", "time_total_s"])
    tmp_df = tmp_df.loc[tmp_df['accuracy'] == tmp_df['accuracy'].max()]
    tmp_df['outer_fold'] = i
    tmp_df['inner_fold'] = j
    best_experiments.append(
        tmp_df
    )

best_experiments = pd.concat(best_experiments)
best_experiments = best_experiments.sort_values(by=['accuracy', 'loss', 'time_total_s'], ascending=[False, True, True])
best_experiments

Unnamed: 0,loss,accuracy,trial_id,time_total_s,outer_fold,inner_fold
2,1.901282,0.371295,8d8f0_00002,1690.489108,2,0
1,2.407765,0.344774,d2735_00001,835.887733,2,1
0,2.55682,0.341654,9d196_00000,1743.971443,1,0
0,2.075537,0.305772,998b8_00000,433.088491,3,0
0,2.112708,0.304688,0c78b_00000,1535.406898,0,2
0,2.022516,0.299532,a1400_00000,1718.978152,4,1
0,3.121286,0.294852,e8bbe_00000,821.023257,4,0
0,2.097283,0.290625,cf44b_00000,1547.026404,0,1
0,2.013552,0.276131,116e1_00000,1656.24823,0,0
0,2.211824,0.25625,c540c_00000,1668.31186,2,2


In [None]:
#best_experiments.to_csv(os.path.join(persistence_path, f"HP_RANKING.{selected_model}_overview.csv"), index=False)

In [10]:
best_params_path = os.path.join(persistence_path,
                                f"wide_resnet50_hpt_outer_{best_experiments.iloc[0].outer_fold}_inner_{best_experiments.iloc[0].inner_fold}",
                                "params.json")
with open(best_params_path, 'r') as params_file:
  best_params = json.load(params_file)

best_params = {k: v for k, v in best_params['train_loop_config'].items() if k in search_space.keys()}

pprint(best_params)

{'batch_size': 32, 'epochs': 5, 'lr': 0.00010777744580116128}


# Cross-validation on entire dataset


In [11]:
ciri_trainer = CIRI_trainer(model=selected_model,
                            data_folders=data_folders,
                            data_prop=0.8)

In [12]:
persistence_path = os.path.join(project_folder, 'checkpoints', f'CV_{selected_model}')
os.makedirs(persistence_path, exist_ok=True)

In [13]:
cv_whole = ciri_trainer.cross_validate(
    run_name="wide_resnet50_cv",
    config={
        **best_params,
        "additional_metrics": ['precision', 'recall', 'f1', 'confusion_matrix']
    },
    outer_cv_k=5,
    inner_cv_k=0,
    tune_hyperparams=False,
    results_persist_dir=persistence_path,
    start_fold=(4,0)
)

Outer fold 4 - number of samples: 9606


  self.pid = _posixsubprocess.fork_exec(
2024-04-14 19:32:14,130	INFO worker.py:1752 -- Started a local Ray instance.
2024-04-14 19:32:15,849	INFO tune.py:263 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `<FrameworkTrainer>(...)`.
2024-04-14 19:32:15,861	INFO tune.py:622 -- [output] This will use the new output engine with verbosity 1. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949



View detailed results here: /root/ray_results/wide_resnet50_cv_outer_4
To visualize your results with TensorBoard, run: `tensorboard --logdir /tmp/ray/session_2024-04-14_19-32-11_529013_4082/artifacts/2024-04-14_19-32-15/wide_resnet50_cv_outer_4/driver_artifacts`


[36m(TrainTrainable pid=5548)[0m 2024-04-14 19:32:23.503976: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(TrainTrainable pid=5548)[0m 2024-04-14 19:32:23.504028: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(TrainTrainable pid=5548)[0m 2024-04-14 19:32:23.505430: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered



Training started with configuration:
+---------------------------------------------------------------+
| Training config                                               |
+---------------------------------------------------------------+
| train_loop_config/additional_metrics     ...onfusion_matrix'] |
| train_loop_config/batch_size                               32 |
| train_loop_config/data_folders           ...ugmented_images'] |
| train_loop_config/data_prop                               0.8 |
| train_loop_config/epochs                                    5 |
| train_loop_config/lr                   0.00010777744580116128 |
| train_loop_config/model                       wide_resnet50_2 |
| train_loop_config/sample_indices                              |
| train_loop_config/train_test_idx         ...1, 12002, 12003]) |
+---------------------------------------------------------------+


[36m(TorchTrainer pid=5548)[0m Started distributed worker processes: 
[36m(TorchTrainer pid=5548)[0m - (ip=172.28.0.12, pid=5624) world_rank=0, local_rank=0, node_rank=0
[36m(RayTrainWorker pid=5624)[0m Setting up process group for: env:// [rank=0, world_size=1]
[36m(RayTrainWorker pid=5624)[0m 2024-04-14 19:32:30.580535: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(RayTrainWorker pid=5624)[0m 2024-04-14 19:32:30.580590: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(RayTrainWorker pid=5624)[0m 2024-04-14 19:32:30.581865: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been re


Training finished iteration 1 at 2024-04-14 21:09:00. Total running time: 1hr 36min 43s
+---------------------------------------------+
| Training result                             |
+---------------------------------------------+
| checkpoint_dir_name       checkpoint_000000 |
| time_this_iter_s                 5794.63631 |
| time_total_s                     5794.63631 |
| training_iteration                        1 |
| accuracy                            0.37833 |
| confusion_matrix       ... 10, 11, 11, 11]) |
| f1                                  0.35646 |
| loss                                1.88437 |
| precision                           0.40003 |
| recall                              0.37833 |
| summary/epoch/0                         1.0 |
| summary/train_acc/0     0.28052083333333333 |
| summary/train_loss/0     2.0832002725316046 |
| summary/val_acc/0       0.37833333333333335 |
| summary/val_loss/0       1.8843663452487243 |
+---------------------------------------------+

Epoch (training) 2:   0%|          | 0/301 [00:00<?, ?it/s]
Epoch (training) 2:   0%|          | 1/301 [00:02<11:56,  2.39s/it]
Epoch (training) 2:   1%|          | 2/301 [00:03<07:35,  1.52s/it]
Epoch (training) 2:   1%|          | 3/301 [00:04<06:20,  1.28s/it]
Epoch (training) 2:   1%|▏         | 4/301 [00:05<06:23,  1.29s/it]
Epoch (training) 2:   2%|▏         | 5/301 [00:06<06:08,  1.24s/it]
Epoch (training) 2:   2%|▏         | 6/301 [00:08<06:31,  1.33s/it]
Epoch (training) 2:   2%|▏         | 7/301 [00:10<07:27,  1.52s/it]
Epoch (training) 2:   3%|▎         | 8/301 [00:11<07:31,  1.54s/it]
Epoch (training) 2:   3%|▎         | 9/301 [00:12<06:37,  1.36s/it]
Epoch (training) 2:   3%|▎         | 10/301 [00:14<07:03,  1.45s/it]
Epoch (training) 2:   4%|▎         | 11/301 [00:15<06:19,  1.31s/it]
Epoch (training) 2:   4%|▍         | 12/301 [00:16<06:29,  1.35s/it]
Epoch (training) 2:   4%|▍         | 13/301 [00:17<05:57,  1.24s/it]
Epoch (training) 2:   5%|▍         | 14/301 [00:18<0


Training finished iteration 2 at 2024-04-14 21:17:05. Total running time: 1hr 44min 49s
+---------------------------------------------+
| Training result                             |
+---------------------------------------------+
| checkpoint_dir_name       checkpoint_000001 |
| time_this_iter_s                  485.51512 |
| time_total_s                     6280.15144 |
| training_iteration                        2 |
| accuracy                            0.40292 |
| confusion_matrix       ..., 8, 11, 10, 11]) |
| f1                                  0.39339 |
| loss                                1.75712 |
| precision                           0.43497 |
| recall                              0.40292 |
| summary/epoch/0                         1.0 |
| summary/epoch/1                         2.0 |
| summary/train_acc/0     0.28052083333333333 |
| summary/train_acc/1     0.40020833333333333 |
| summary/train_loss/0     2.0832002725316046 |
| summary/train_loss/1      1.756782130545556 |

Epoch (training) 3:   0%|          | 0/301 [00:00<?, ?it/s]
Epoch (training) 3:   0%|          | 1/301 [00:02<14:33,  2.91s/it]
Epoch (training) 3:   1%|          | 2/301 [00:04<11:48,  2.37s/it]
Epoch (training) 3:   1%|          | 3/301 [00:06<09:39,  1.94s/it]
Epoch (training) 3:   1%|▏         | 4/301 [00:07<08:39,  1.75s/it]
Epoch (training) 3:   2%|▏         | 5/301 [00:08<07:11,  1.46s/it]
Epoch (training) 3:   2%|▏         | 6/301 [00:09<05:57,  1.21s/it]
Epoch (training) 3:   2%|▏         | 7/301 [00:10<05:36,  1.14s/it]
Epoch (training) 3:   3%|▎         | 8/301 [00:11<05:53,  1.21s/it]
Epoch (training) 3:   3%|▎         | 9/301 [00:12<05:41,  1.17s/it]
Epoch (training) 3:   3%|▎         | 10/301 [00:13<05:22,  1.11s/it]
Epoch (training) 3:   4%|▎         | 11/301 [00:15<06:15,  1.29s/it]
Epoch (training) 3:   4%|▍         | 12/301 [00:16<06:00,  1.25s/it]
Epoch (training) 3:   4%|▍         | 13/301 [00:17<05:51,  1.22s/it]
Epoch (training) 3:   5%|▍         | 14/301 [00:20<0


Training finished iteration 3 at 2024-04-14 21:25:11. Total running time: 1hr 52min 55s
+---------------------------------------------+
| Training result                             |
+---------------------------------------------+
| checkpoint_dir_name       checkpoint_000002 |
| time_this_iter_s                  485.59284 |
| time_total_s                     6765.74428 |
| training_iteration                        3 |
| accuracy                            0.44833 |
| confusion_matrix       ... 7, 9, 7, 7, 10]) |
| f1                                  0.44585 |
| loss                                 1.5903 |
| precision                           0.47974 |
| recall                              0.44833 |
| summary/epoch/0                         1.0 |
| summary/epoch/1                         2.0 |
| summary/epoch/2                         3.0 |
| summary/train_acc/0     0.28052083333333333 |
| summary/train_acc/1     0.40020833333333333 |
| summary/train_acc/2      0.4580208333333333 |

Epoch (training) 4:   0%|          | 0/301 [00:00<?, ?it/s]
Epoch (training) 4:   0%|          | 1/301 [00:05<28:50,  5.77s/it]
Epoch (training) 4:   1%|          | 2/301 [00:08<18:32,  3.72s/it]
Epoch (training) 4:   1%|          | 3/301 [00:09<12:12,  2.46s/it]
Epoch (training) 4:   1%|▏         | 4/301 [00:09<09:08,  1.85s/it]
Epoch (training) 4:   2%|▏         | 5/301 [00:11<08:13,  1.67s/it]
Epoch (training) 4:   2%|▏         | 6/301 [00:11<06:33,  1.34s/it]
Epoch (training) 4:   2%|▏         | 7/301 [00:13<07:32,  1.54s/it]
Epoch (training) 4:   3%|▎         | 8/301 [00:15<07:08,  1.46s/it]
Epoch (training) 4:   3%|▎         | 9/301 [00:16<06:12,  1.28s/it]
Epoch (training) 4:   3%|▎         | 10/301 [00:17<06:24,  1.32s/it]
Epoch (training) 4:   4%|▎         | 11/301 [00:19<07:10,  1.48s/it]
Epoch (training) 4:   4%|▍         | 12/301 [00:20<06:25,  1.33s/it]
Epoch (training) 4:   4%|▍         | 13/301 [00:21<05:39,  1.18s/it]
Epoch (training) 4:   5%|▍         | 14/301 [00:22<0


Training finished iteration 4 at 2024-04-14 21:33:20. Total running time: 2hr 1min 4s
+---------------------------------------------+
| Training result                             |
+---------------------------------------------+
| checkpoint_dir_name       checkpoint_000003 |
| time_this_iter_s                  488.99802 |
| time_total_s                      7254.7423 |
| training_iteration                        4 |
| accuracy                            0.45458 |
| confusion_matrix       ...11, 8, 7, 7, 11]) |
| f1                                  0.43183 |
| loss                                1.60006 |
| precision                           0.51559 |
| recall                              0.45458 |
| summary/epoch/0                         1.0 |
| summary/epoch/1                         2.0 |
| summary/epoch/2                         3.0 |
| summary/epoch/3                         4.0 |
| summary/train_acc/0     0.28052083333333333 |
| summary/train_acc/1     0.40020833333333333 |
|

Epoch (training) 5:   0%|          | 0/301 [00:00<?, ?it/s]
Epoch (training) 5:   0%|          | 1/301 [00:03<16:39,  3.33s/it]
Epoch (training) 5:   1%|          | 2/301 [00:04<09:24,  1.89s/it]
Epoch (training) 5:   1%|          | 3/301 [00:05<06:56,  1.40s/it]
Epoch (training) 5:   1%|▏         | 4/301 [00:06<06:32,  1.32s/it]
Epoch (training) 5:   2%|▏         | 5/301 [00:07<05:45,  1.17s/it]
Epoch (training) 5:   2%|▏         | 6/301 [00:08<05:16,  1.07s/it]
Epoch (training) 5:   2%|▏         | 7/301 [00:08<04:38,  1.06it/s]
Epoch (training) 5:   3%|▎         | 8/301 [00:09<04:15,  1.15it/s]
Epoch (training) 5:   3%|▎         | 9/301 [00:10<04:04,  1.19it/s]
Epoch (training) 5:   3%|▎         | 10/301 [00:11<04:48,  1.01it/s]
Epoch (training) 5:   4%|▎         | 11/301 [00:12<04:23,  1.10it/s]
Epoch (training) 5:   4%|▍         | 12/301 [00:13<04:55,  1.02s/it]
Epoch (training) 5:   4%|▍         | 13/301 [00:15<06:18,  1.32s/it]
Epoch (training) 5:   5%|▍         | 14/301 [00:16<0


Training finished iteration 5 at 2024-04-14 21:41:30. Total running time: 2hr 9min 13s
+---------------------------------------------+
| Training result                             |
+---------------------------------------------+
| checkpoint_dir_name       checkpoint_000004 |
| time_this_iter_s                  489.09886 |
| time_total_s                     7743.84116 |
| training_iteration                        5 |
| accuracy                            0.51208 |
| confusion_matrix       ...0, 8, 11, 8, 10]) |
| f1                                  0.50517 |
| loss                                1.45386 |
| precision                           0.55147 |
| recall                              0.51208 |
| summary/epoch/0                         1.0 |
| summary/epoch/1                         2.0 |
| summary/epoch/2                         3.0 |
| summary/epoch/3                         4.0 |
| summary/epoch/4                         5.0 |
| summary/train_acc/0     0.28052083333333333 |


You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.
You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).
2024-04-14 21:41:31,699	INFO tune.py:1016 -- Wrote the latest version of all result files and experiment state to '/root/ray_results/wide_resnet50_cv_outer_4' in 0.0488s.



Training completed after 5 iterations at 2024-04-14 21:41:31. Total running time: 2hr 9min 15s



# Transfer learning

In [None]:
ciri_trainer = CIRI_trainer(model=selected_model,
                            data_folders=data_folders,
                            data_prop=0.8)

In [None]:
project_folder = ... # Redefine here project folder if necessary
persistence_path = os.path.join(project_folder, 'checkpoints', f'TransferLearning_{selected_model}')
os.makedirs(persistence_path, exist_ok=True)

In [None]:
tl_results = ciri_trainer.train(
    run_name=f'{selected_model}_transfer_learning',
    config={
        **best_params,
        'additional_metrics': ['precision', 'recall', 'f1', 'confusion_matrix'],
        'weights': 'IMAGENET1K_V2',
    },
    persist_dir=persistence_path
)