# Install

In [1]:
!apt-get update
!pip install transformers[sentencepiece]
!pip install transformers[torch]
!pip install datasets
!pip install accelerate
!pip install transformers==4.30
!pip install evaluate
!pip install sigopt
!pip install pickle5
!pip install ray[tune]
%env PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512

Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
Hit:2 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Get:4 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [119 kB]
Get:5 http://security.ubuntu.com/ubuntu jammy-security InRelease [110 kB]
Get:6 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [109 kB]
Get:7 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 Packages [1,235 kB]
Hit:8 https://ppa.launchpadcontent.net/c2d4u.team/c2d4u4.0+/ubuntu jammy InRelease
Get:9 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 Packages [1,257 kB]
Get:10 http://security.ubuntu.com/ubuntu jammy-security/universe amd64 Packages [993 kB]
Hit:11 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Get:12 http://security.ubuntu.com/ubuntu jammy-security/main amd64 Packages [966 kB]
Hit:13 https://ppa.launchpadcontent.net/g

# Imports

In [2]:
from transformers import AdamW, AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding, TrainingArguments, Trainer
import datasets
import torch
import pandas
import numpy as np
import evaluate

# Read and prepare data

In [5]:
dir = '/content/drive/MyDrive/Datasets/' # if you are using something else than collab, change this variable
train_file_name = dir + 'train.csv'
validate_file_name = dir + 'validate.csv'
test_file_name = dir + 'test.csv'

dataset = datasets.load_dataset('csv', data_files={"train": train_file_name, "validation": validate_file_name, "test": test_file_name}, keep_default_na=False)

Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

In [6]:
checkpoint = "distilbert-base-uncased"
batch_size = 16
tokenizer = AutoTokenizer.from_pretrained(checkpoint, use_fast=True)

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [7]:
def tokenize_function(example):
    return tokenizer(example["0"], truncation=True)
tokenized_dataset = dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/621910 [00:00<?, ? examples/s]

Map:   0%|          | 0/207303 [00:00<?, ? examples/s]

Map:   0%|          | 0/207304 [00:00<?, ? examples/s]

In [8]:
tokenized_dataset

DatasetDict({
    train: Dataset({
        features: ['Unnamed: 0', '0', '1', 'input_ids', 'attention_mask'],
        num_rows: 621910
    })
    validation: Dataset({
        features: ['Unnamed: 0', '0', '1', 'input_ids', 'attention_mask'],
        num_rows: 207303
    })
    test: Dataset({
        features: ['Unnamed: 0', '0', '1', 'input_ids', 'attention_mask'],
        num_rows: 207304
    })
})

In [9]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [10]:
tokenized_dataset = tokenized_dataset.remove_columns(['Unnamed: 0', '0'])
tokenized_dataset = tokenized_dataset.rename_column('1', 'labels')

# Training (you can skip this stage because I took random parameters just for understanding wheter or not everything is working)

In [None]:
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
  predictions, labels = eval_pred
  predictions = np.argmax(predictions, axis=1)
  return metric.compute(predictions=predictions, references=labels)

model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'pre_classifier.weight', 'classifier.weight', 'classifier.

In [None]:
training_args = TrainingArguments(
    "some_name",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model='accuracy',
    skip_memory_metrics=True
  )
trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_dataset["train"].shard(index=1, num_shards=6000),
    eval_dataset=tokenized_dataset["validation"].shard(index=1, num_shards=6000),
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

In [None]:
trainer.train()

You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.140236,0.959641
2,No log,0.113552,0.961883
3,No log,0.131647,0.961883
4,No log,0.126063,0.961883
5,No log,0.120158,0.961883


TrainOutput(global_step=420, training_loss=0.07634077526274181, metrics={'train_runtime': 63.636, 'train_samples_per_second': 104.972, 'train_steps_per_second': 6.6, 'total_flos': 427704113410560.0, 'train_loss': 0.07634077526274181, 'epoch': 5.0})

In [None]:
trainer.evaluate()

# Hyperparameter search

In [11]:
def model_init():
  return AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
  predictions, labels = eval_pred
  predictions = np.argmax(predictions, axis=-1)
  return metric.compute(predictions=predictions, references=labels)

model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

searching_args = TrainingArguments(
    "some_name",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    skip_memory_metrics=True
  )

scale = 0.02 # Dataset is too big for taking it entirely for hyperparameter search

trainer_search = Trainer(
    model_init=model_init,
    args=searching_args,
    train_dataset=tokenized_dataset["train"].shard(index=1, num_shards=1 / scale),
    eval_dataset=tokenized_dataset["validation"].shard(index=1, num_shards=1 / scale),
    tokenizer=tokenizer
)
trainer_search._memory_tracker.skip_memory_trackers = False

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_projector.bias', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'classifier.weight', 'pre_classifier.bias', 'classifier.

In [None]:
# Long part, skip it (4 hours on V100)

def my_hp_space(trial):
    from ray import tune
    return {
        "learning_rate": tune.uniform(1e-5, 5e-5),
        "num_train_epochs": tune.choice(range(1, 6)),
        "per_device_train_batch_size": tune.choice([2,4]),
        "weight_decay": tune.uniform(0.0, 0.3),
        "adam_epsilon": tune.loguniform(1e-10, 1e-6),
        "per_device_eval_batch_size": 32
    }
torch.cuda.empty_cache()
best_run = trainer_search.hyperparameter_search(
    backend="ray",
    n_trials=20,
    hp_space=my_hp_space,
    stop=None,
    checkpoint_score_attr="training_iteration",
    keep_checkpoints_num=0
)

2023-09-11 13:20:18,928	INFO worker.py:1621 -- Started a local Ray instance.
2023-09-11 13:20:21,060	INFO tune.py:226 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `tune.run(...)`.
2023-09-11 13:20:21,083	INFO tune.py:666 -- [output] This will use the new output engine with verbosity 2. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949

from ray.air import session

def train(config):
    # ...
    session.report({"metric": metric}, checkpoint=checkpoint)

For more information please see https://docs.ray.io/en/latest/tune/api/trainable.html



+-------------------------------------------------------------------+
| Configuration for experiment     _objective_2023-09-11_13-20-21   |
+-------------------------------------------------------------------+
| Search algorithm                 BasicVariantGenerator            |
| Scheduler                        FIFOScheduler                    |
| Number of trials                 20                               |
+-------------------------------------------------------------------+

View detailed results here: /root/ray_results/_objective_2023-09-11_13-20-21
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/_objective_2023-09-11_13-20-21`

Trial status: 16 PENDING
Current time: 2023-09-11 13:20:21. Total running time: 0s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------+
| Trial name



Trial _objective_f556c_00000 started with configuration:
+------------------------------------------------+
| Trial _objective_f556c_00000 config            |
+------------------------------------------------+
| adam_epsilon                                 0 |
| learning_rate                            2e-05 |
| num_train_epochs                             5 |
| per_device_eval_batch_size                  32 |
| per_device_train_batch_size                  2 |
| weight_decay                            0.2196 |
+------------------------------------------------+



[2m[36m(_objective pid=16313)[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_transform.bias', 'vocab_projector.bias']
[2m[36m(_objective pid=16313)[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
[2m[36m(_objective pid=16313)[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[2m[36m(_objective pid=16313)[0m Some weights of DistilBertForSequenceClassification were not initialized from the model che

[2m[36m(_objective pid=16313)[0m {'loss': 0.4945, 'learning_rate': 2.457997123695086e-05, 'epoch': 0.08}


[2m[36m(_objective pid=16313)[0m   2%|▏         | 504/31100 [00:13<13:26, 37.92it/s]
  2%|▏         | 508/31100 [00:13<13:18, 38.29it/s]
  2%|▏         | 512/31100 [00:13<13:32, 37.66it/s]
  2%|▏         | 517/31100 [00:13<13:12, 38.61it/s]
  2%|▏         | 521/31100 [00:13<13:13, 38.54it/s]
  2%|▏         | 525/31100 [00:14<13:08, 38.79it/s]
  2%|▏         | 530/31100 [00:14<13:01, 39.11it/s]
  2%|▏         | 534/31100 [00:14<12:58, 39.26it/s]
  2%|▏         | 539/31100 [00:14<13:05, 38.90it/s]
  2%|▏         | 544/31100 [00:14<12:54, 39.47it/s]
  2%|▏         | 549/31100 [00:14<12:48, 39.75it/s]
  2%|▏         | 554/31100 [00:14<12:40, 40.16it/s]
  2%|▏         | 559/31100 [00:14<12:42, 40.05it/s]
  2%|▏         | 564/31100 [00:15<12:42, 40.04it/s]
  2%|▏         | 569/31100 [00:15<12:38, 40.25it/s]
  2%|▏         | 574/31100 [00:15<12:43, 39.99it/s]
  2%|▏         | 579/31100 [00:15<12:40, 40.12it/s]
  2%|▏         | 584/31100 [00:15<12:43, 39.97it/s]
  2%|▏         | 588/31100 

Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:20:51. Total running time: 30s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon |
+-------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598        2.48104e-08 |
| _objective_f556c_00001   PENDING        1.62407e-05                    3                        2       0.0174251       2.91544e-07 |
| _objective_f556c_00002   PENDING        3.40446e-05                    3                        4       0.0169235       7.72672e-08 |
| _object

[2m[36m(_objective pid=16313)[0m   3%|▎         | 825/31100 [00:21<12:48, 39.40it/s]
  3%|▎         | 830/31100 [00:21<12:37, 39.93it/s]
  3%|▎         | 835/31100 [00:22<12:36, 40.02it/s]
  3%|▎         | 840/31100 [00:22<12:36, 39.99it/s]
  3%|▎         | 845/31100 [00:22<12:34, 40.11it/s]
  3%|▎         | 850/31100 [00:22<12:29, 40.35it/s]
  3%|▎         | 855/31100 [00:22<12:22, 40.74it/s]
  3%|▎         | 860/31100 [00:22<12:15, 41.09it/s]
  3%|▎         | 865/31100 [00:22<12:16, 41.02it/s]
  3%|▎         | 870/31100 [00:22<12:31, 40.20it/s]
  3%|▎         | 875/31100 [00:23<12:41, 39.68it/s]
  3%|▎         | 880/31100 [00:23<12:31, 40.20it/s]
  3%|▎         | 885/31100 [00:23<12:34, 40.04it/s]
  3%|▎         | 890/31100 [00:23<12:26, 40.48it/s]
  3%|▎         | 895/31100 [00:23<12:23, 40.61it/s]
  3%|▎         | 900/31100 [00:23<13:09, 38.26it/s]
  3%|▎         | 904/31100 [00:23<13:04, 38.51it/s]
  3%|▎         | 909/31100 [00:23<12:52, 39.07it/s]
  3%|▎         | 914/31100 

[2m[36m(_objective pid=16313)[0m {'loss': 0.4866, 'learning_rate': 2.4178337720007217e-05, 'epoch': 0.16}


[2m[36m(_objective pid=16313)[0m   3%|▎         | 1004/31100 [00:26<12:38, 39.69it/s]
  3%|▎         | 1009/31100 [00:26<12:36, 39.77it/s]
  3%|▎         | 1013/31100 [00:26<12:56, 38.73it/s]
  3%|▎         | 1017/31100 [00:26<12:56, 38.72it/s]
  3%|▎         | 1022/31100 [00:26<12:47, 39.20it/s]
  3%|▎         | 1026/31100 [00:26<13:03, 38.41it/s]
  3%|▎         | 1030/31100 [00:26<13:03, 38.40it/s]
  3%|▎         | 1034/31100 [00:27<13:01, 38.46it/s]
  3%|▎         | 1038/31100 [00:27<12:54, 38.81it/s]
  3%|▎         | 1042/31100 [00:27<12:50, 39.04it/s]
  3%|▎         | 1046/31100 [00:27<13:10, 38.03it/s]
  3%|▎         | 1050/31100 [00:27<13:08, 38.11it/s]
  3%|▎         | 1054/31100 [00:27<13:01, 38.43it/s]
  3%|▎         | 1058/31100 [00:27<12:52, 38.87it/s]
  3%|▎         | 1062/31100 [00:27<12:47, 39.16it/s]
  3%|▎         | 1066/31100 [00:27<12:45, 39.26it/s]
  3%|▎         | 1070/31100 [00:28<13:05, 38.23it/s]
  3%|▎         | 1074/31100 [00:28<13:27, 37.18it/s]
  3%|▎   

[2m[36m(_objective pid=16313)[0m {'loss': 0.3856, 'learning_rate': 2.377670420306358e-05, 'epoch': 0.24}


[2m[36m(_objective pid=16313)[0m   5%|▍         | 1509/31100 [00:39<13:40, 36.07it/s]
  5%|▍         | 1513/31100 [00:39<13:18, 37.07it/s]
  5%|▍         | 1518/31100 [00:39<12:55, 38.13it/s]
  5%|▍         | 1523/31100 [00:39<12:41, 38.85it/s]
  5%|▍         | 1527/31100 [00:39<12:38, 39.00it/s]
  5%|▍         | 1531/31100 [00:39<12:33, 39.25it/s]
  5%|▍         | 1535/31100 [00:39<12:34, 39.19it/s]
  5%|▍         | 1539/31100 [00:39<12:33, 39.22it/s]
  5%|▍         | 1544/31100 [00:39<12:23, 39.74it/s]
  5%|▍         | 1548/31100 [00:40<12:27, 39.55it/s]
  5%|▍         | 1553/31100 [00:40<12:20, 39.91it/s]
  5%|▌         | 1557/31100 [00:40<12:52, 38.26it/s]
  5%|▌         | 1561/31100 [00:40<12:47, 38.50it/s]
  5%|▌         | 1565/31100 [00:40<12:42, 38.72it/s]
  5%|▌         | 1569/31100 [00:40<13:15, 37.13it/s]
  5%|▌         | 1573/31100 [00:40<13:20, 36.90it/s]
  5%|▌         | 1577/31100 [00:40<13:11, 37.32it/s]
  5%|▌         | 1581/31100 [00:40<13:02, 37.71it/s]
  5%|▌   

[2m[36m(_objective pid=16313)[0m {'loss': 0.3807, 'learning_rate': 2.3375070686119934e-05, 'epoch': 0.32}


[2m[36m(_objective pid=16313)[0m   6%|▋         | 2003/31100 [00:51<12:12, 39.72it/s]
[2m[36m(_objective pid=16313)[0m   6%|▋         | 2008/31100 [00:51<12:06, 40.05it/s]


Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:21:21. Total running time: 1min 0s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon |
+-------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598        2.48104e-08 |
| _objective_f556c_00001   PENDING        1.62407e-05                    3                        2       0.0174251       2.91544e-07 |
| _objective_f556c_00002   PENDING        3.40446e-05                    3                        4       0.0169235       7.72672e-08 |
| _ob

[2m[36m(_objective pid=16313)[0m   6%|▋         | 2013/31100 [00:51<11:57, 40.52it/s]
  6%|▋         | 2018/31100 [00:51<11:57, 40.54it/s]
  7%|▋         | 2023/31100 [00:52<11:55, 40.63it/s]
  7%|▋         | 2028/31100 [00:52<11:52, 40.80it/s]
  7%|▋         | 2033/31100 [00:52<11:47, 41.11it/s]
  7%|▋         | 2038/31100 [00:52<11:44, 41.25it/s]
  7%|▋         | 2043/31100 [00:52<11:53, 40.72it/s]
  7%|▋         | 2048/31100 [00:52<11:51, 40.81it/s]
  7%|▋         | 2053/31100 [00:52<11:49, 40.93it/s]
  7%|▋         | 2058/31100 [00:52<11:43, 41.29it/s]
  7%|▋         | 2063/31100 [00:53<11:40, 41.46it/s]
  7%|▋         | 2068/31100 [00:53<11:44, 41.21it/s]
  7%|▋         | 2073/31100 [00:53<11:43, 41.24it/s]
  7%|▋         | 2078/31100 [00:53<11:42, 41.31it/s]
  7%|▋         | 2083/31100 [00:53<11:43, 41.26it/s]
  7%|▋         | 2088/31100 [00:53<11:42, 41.29it/s]
  7%|▋         | 2093/31100 [00:53<11:47, 40.97it/s]
  7%|▋         | 2098/31100 [00:53<11:47, 41.01it/s]
  7%|▋   

[2m[36m(_objective pid=16313)[0m {'loss': 0.3269, 'learning_rate': 2.2973437169176295e-05, 'epoch': 0.4}


[2m[36m(_objective pid=16313)[0m   8%|▊         | 2511/31100 [01:04<11:49, 40.28it/s]
  8%|▊         | 2516/31100 [01:04<11:44, 40.56it/s]
  8%|▊         | 2521/31100 [01:04<11:39, 40.88it/s]
  8%|▊         | 2526/31100 [01:04<11:40, 40.81it/s]
  8%|▊         | 2531/31100 [01:04<11:39, 40.85it/s]
  8%|▊         | 2536/31100 [01:05<11:36, 40.99it/s]
  8%|▊         | 2541/31100 [01:05<11:38, 40.91it/s]
  8%|▊         | 2546/31100 [01:05<11:41, 40.71it/s]
  8%|▊         | 2551/31100 [01:05<11:47, 40.33it/s]
  8%|▊         | 2556/31100 [01:05<12:03, 39.47it/s]
  8%|▊         | 2560/31100 [01:05<12:03, 39.46it/s]
  8%|▊         | 2564/31100 [01:05<12:09, 39.11it/s]
  8%|▊         | 2568/31100 [01:05<12:12, 38.96it/s]
  8%|▊         | 2573/31100 [01:05<11:58, 39.68it/s]
  8%|▊         | 2577/31100 [01:06<12:01, 39.53it/s]
  8%|▊         | 2581/31100 [01:06<12:30, 38.01it/s]
  8%|▊         | 2586/31100 [01:06<12:11, 39.00it/s]
  8%|▊         | 2591/31100 [01:06<11:51, 40.07it/s]
  8%|▊   

[2m[36m(_objective pid=16313)[0m {'loss': 0.3176, 'learning_rate': 2.257180365223265e-05, 'epoch': 0.48}


[2m[36m(_objective pid=16313)[0m  10%|▉         | 3004/31100 [01:17<11:41, 40.04it/s]
 10%|▉         | 3008/31100 [01:17<11:43, 39.92it/s]
 10%|▉         | 3013/31100 [01:17<11:36, 40.31it/s]
 10%|▉         | 3018/31100 [01:17<11:29, 40.71it/s]
 10%|▉         | 3023/31100 [01:17<11:25, 40.97it/s]
 10%|▉         | 3028/31100 [01:17<11:20, 41.26it/s]
 10%|▉         | 3033/31100 [01:17<11:18, 41.39it/s]
 10%|▉         | 3038/31100 [01:17<11:18, 41.37it/s]
 10%|▉         | 3043/31100 [01:17<11:17, 41.39it/s]
 10%|▉         | 3048/31100 [01:18<11:33, 40.46it/s]
 10%|▉         | 3053/31100 [01:18<11:35, 40.33it/s]
 10%|▉         | 3058/31100 [01:18<11:29, 40.66it/s]
 10%|▉         | 3063/31100 [01:18<11:23, 41.02it/s]
 10%|▉         | 3068/31100 [01:18<11:21, 41.15it/s]
 10%|▉         | 3073/31100 [01:18<11:20, 41.17it/s]
 10%|▉         | 3078/31100 [01:18<11:26, 40.80it/s]
 10%|▉         | 3083/31100 [01:18<11:50, 39.43it/s]
 10%|▉         | 3088/31100 [01:19<11:41, 39.93it/s]
 10%|▉   

Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:21:51. Total running time: 1min 30s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon |
+-------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598        2.48104e-08 |
| _objective_f556c_00001   PENDING        1.62407e-05                    3                        2       0.0174251       2.91544e-07 |
| _objective_f556c_00002   PENDING        3.40446e-05                    3                        4       0.0169235       7.72672e-08 |
| _o

[2m[36m(_objective pid=16313)[0m  10%|█         | 3203/31100 [01:21<11:47, 39.43it/s]
 10%|█         | 3207/31100 [01:22<11:45, 39.54it/s]
 10%|█         | 3212/31100 [01:22<11:35, 40.12it/s]
 10%|█         | 3217/31100 [01:22<11:34, 40.18it/s]
 10%|█         | 3222/31100 [01:22<11:28, 40.51it/s]
 10%|█         | 3227/31100 [01:22<11:25, 40.65it/s]
 10%|█         | 3232/31100 [01:22<11:27, 40.52it/s]
 10%|█         | 3237/31100 [01:22<11:31, 40.30it/s]
 10%|█         | 3242/31100 [01:22<11:41, 39.73it/s]
 10%|█         | 3247/31100 [01:23<11:32, 40.20it/s]
 10%|█         | 3252/31100 [01:23<11:27, 40.53it/s]
 10%|█         | 3257/31100 [01:23<11:29, 40.40it/s]
 10%|█         | 3262/31100 [01:23<11:50, 39.19it/s]
 11%|█         | 3266/31100 [01:23<11:58, 38.73it/s]
 11%|█         | 3271/31100 [01:23<11:49, 39.22it/s]
 11%|█         | 3276/31100 [01:23<11:39, 39.80it/s]
 11%|█         | 3281/31100 [01:23<11:30, 40.28it/s]
 11%|█         | 3286/31100 [01:24<11:29, 40.36it/s]
 11%|█   

[2m[36m(_objective pid=16313)[0m {'loss': 0.2787, 'learning_rate': 2.217017013528901e-05, 'epoch': 0.56}


[2m[36m(_objective pid=16313)[0m  11%|█▏        | 3503/31100 [01:29<11:38, 39.53it/s]
 11%|█▏        | 3508/31100 [01:29<11:33, 39.78it/s]
 11%|█▏        | 3512/31100 [01:29<11:32, 39.81it/s]
 11%|█▏        | 3517/31100 [01:29<11:35, 39.68it/s]
 11%|█▏        | 3522/31100 [01:30<11:28, 40.06it/s]
 11%|█▏        | 3527/31100 [01:30<11:27, 40.10it/s]
 11%|█▏        | 3532/31100 [01:30<11:23, 40.35it/s]
 11%|█▏        | 3537/31100 [01:30<11:16, 40.77it/s]
 11%|█▏        | 3542/31100 [01:30<11:23, 40.32it/s]
 11%|█▏        | 3547/31100 [01:30<11:32, 39.81it/s]
 11%|█▏        | 3552/31100 [01:30<11:30, 39.88it/s]
 11%|█▏        | 3556/31100 [01:30<11:33, 39.72it/s]
 11%|█▏        | 3561/31100 [01:30<11:29, 39.96it/s]
 11%|█▏        | 3565/31100 [01:31<11:31, 39.85it/s]
 11%|█▏        | 3569/31100 [01:31<11:31, 39.82it/s]
 11%|█▏        | 3573/31100 [01:31<11:43, 39.14it/s]
 12%|█▏        | 3577/31100 [01:31<12:12, 37.57it/s]
 12%|█▏        | 3581/31100 [01:31<12:08, 37.80it/s]
 12%|█▏  

[2m[36m(_objective pid=16313)[0m {'loss': 0.2762, 'learning_rate': 2.1768536618345366e-05, 'epoch': 0.64}


 13%|█▎        | 4008/31100 [01:42<11:11, 40.37it/s]
 13%|█▎        | 4013/31100 [01:42<11:16, 40.05it/s]
 13%|█▎        | 4018/31100 [01:42<11:25, 39.49it/s]
 13%|█▎        | 4023/31100 [01:42<11:19, 39.86it/s]
 13%|█▎        | 4028/31100 [01:42<11:14, 40.14it/s]
 13%|█▎        | 4033/31100 [01:42<11:10, 40.35it/s]
 13%|█▎        | 4038/31100 [01:42<11:28, 39.33it/s]
 13%|█▎        | 4043/31100 [01:43<11:33, 39.00it/s]
 13%|█▎        | 4047/31100 [01:43<11:36, 38.82it/s]
 13%|█▎        | 4051/31100 [01:43<12:06, 37.24it/s]
 13%|█▎        | 4056/31100 [01:43<11:46, 38.28it/s]
 13%|█▎        | 4061/31100 [01:43<11:27, 39.32it/s]
 13%|█▎        | 4066/31100 [01:43<11:26, 39.37it/s]
 13%|█▎        | 4070/31100 [01:43<11:24, 39.47it/s]
 13%|█▎        | 4075/31100 [01:43<11:18, 39.82it/s]
 13%|█▎        | 4079/31100 [01:43<11:23, 39.52it/s]
 13%|█▎        | 4083/31100 [01:44<12:10, 36.99it/s]
 13%|█▎        | 4087/31100 [01:44<12:08, 37.09it/s]
 13%|█▎        | 4092/31100 [01:44<11:49, 38.0

Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:22:21. Total running time: 2min 0s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon |
+-------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598        2.48104e-08 |
| _objective_f556c_00001   PENDING        1.62407e-05                    3                        2       0.0174251       2.91544e-07 |
| _objective_f556c_00002   PENDING        3.40446e-05                    3                        4       0.0169235       7.72672e-08 |
| _ob

[2m[36m(_objective pid=16313)[0m  14%|█▍        | 4388/31100 [01:51<11:06, 40.10it/s]
 14%|█▍        | 4393/31100 [01:52<11:11, 39.74it/s]
 14%|█▍        | 4398/31100 [01:52<11:37, 38.29it/s]
 14%|█▍        | 4402/31100 [01:52<12:04, 36.83it/s]
 14%|█▍        | 4407/31100 [01:52<11:42, 37.99it/s]
 14%|█▍        | 4412/31100 [01:52<11:26, 38.90it/s]
 14%|█▍        | 4417/31100 [01:52<11:15, 39.49it/s]
 14%|█▍        | 4421/31100 [01:52<11:17, 39.40it/s]
 14%|█▍        | 4426/31100 [01:52<11:08, 39.91it/s]
 14%|█▍        | 4431/31100 [01:53<10:59, 40.43it/s]
 14%|█▍        | 4436/31100 [01:53<11:21, 39.10it/s]
 14%|█▍        | 4441/31100 [01:53<11:10, 39.77it/s]
 14%|█▍        | 4446/31100 [01:53<11:03, 40.16it/s]
 14%|█▍        | 4451/31100 [01:53<10:59, 40.41it/s]
 14%|█▍        | 4456/31100 [01:53<10:58, 40.44it/s]
 14%|█▍        | 4461/31100 [01:53<11:05, 40.01it/s]
 14%|█▍        | 4466/31100 [01:53<11:04, 40.10it/s]
 14%|█▍        | 4471/31100 [01:54<11:01, 40.26it/s]
 14%|█▍  

[2m[36m(_objective pid=16313)[0m {'loss': 0.2921, 'learning_rate': 2.1366903101401728e-05, 'epoch': 0.72}


[2m[36m(_objective pid=16313)[0m  14%|█▍        | 4506/31100 [01:54<11:10, 39.64it/s]
 15%|█▍        | 4511/31100 [01:55<11:04, 39.99it/s]
 15%|█▍        | 4516/31100 [01:55<11:01, 40.19it/s]
 15%|█▍        | 4521/31100 [01:55<10:58, 40.38it/s]
 15%|█▍        | 4526/31100 [01:55<11:18, 39.18it/s]
 15%|█▍        | 4530/31100 [01:55<11:15, 39.31it/s]
 15%|█▍        | 4535/31100 [01:55<11:07, 39.82it/s]
 15%|█▍        | 4540/31100 [01:55<11:02, 40.10it/s]
 15%|█▍        | 4545/31100 [01:55<11:02, 40.06it/s]
 15%|█▍        | 4550/31100 [01:56<10:55, 40.51it/s]
 15%|█▍        | 4555/31100 [01:56<10:52, 40.66it/s]
 15%|█▍        | 4560/31100 [01:56<10:54, 40.54it/s]
 15%|█▍        | 4565/31100 [01:56<11:18, 39.11it/s]
 15%|█▍        | 4569/31100 [01:56<11:22, 38.90it/s]
 15%|█▍        | 4574/31100 [01:56<11:10, 39.54it/s]
 15%|█▍        | 4579/31100 [01:56<11:04, 39.90it/s]
 15%|█▍        | 4584/31100 [01:56<11:05, 39.84it/s]
 15%|█▍        | 4589/31100 [01:56<11:01, 40.06it/s]
 15%|█▍  

[2m[36m(_objective pid=16313)[0m {'loss': 0.2244, 'learning_rate': 2.0965269584458086e-05, 'epoch': 0.8}


[2m[36m(_objective pid=16313)[0m  16%|█▌        | 5009/31100 [02:07<10:45, 40.44it/s]
 16%|█▌        | 5014/31100 [02:07<11:11, 38.85it/s]
 16%|█▌        | 5018/31100 [02:07<11:06, 39.15it/s]
 16%|█▌        | 5022/31100 [02:07<11:05, 39.18it/s]
 16%|█▌        | 5026/31100 [02:08<11:30, 37.75it/s]
 16%|█▌        | 5031/31100 [02:08<11:13, 38.69it/s]
 16%|█▌        | 5036/31100 [02:08<10:59, 39.54it/s]
 16%|█▌        | 5041/31100 [02:08<10:47, 40.24it/s]
 16%|█▌        | 5046/31100 [02:08<10:42, 40.57it/s]
 16%|█▌        | 5051/31100 [02:08<10:38, 40.80it/s]
 16%|█▋        | 5056/31100 [02:08<10:39, 40.72it/s]
 16%|█▋        | 5061/31100 [02:08<10:47, 40.21it/s]
 16%|█▋        | 5066/31100 [02:09<10:49, 40.07it/s]
 16%|█▋        | 5071/31100 [02:09<10:49, 40.10it/s]
 16%|█▋        | 5076/31100 [02:09<10:44, 40.39it/s]
 16%|█▋        | 5081/31100 [02:09<10:36, 40.86it/s]
 16%|█▋        | 5086/31100 [02:09<10:33, 41.04it/s]
 16%|█▋        | 5091/31100 [02:09<10:38, 40.73it/s]
 16%|█▋  

[2m[36m(_objective pid=16313)[0m {'loss': 0.3003, 'learning_rate': 2.0563636067514444e-05, 'epoch': 0.88}


 18%|█▊        | 5508/31100 [02:20<10:45, 39.62it/s]
 18%|█▊        | 5513/31100 [02:20<10:39, 40.00it/s]
 18%|█▊        | 5517/31100 [02:20<10:51, 39.24it/s]
 18%|█▊        | 5522/31100 [02:20<10:45, 39.60it/s]
 18%|█▊        | 5527/31100 [02:20<10:38, 40.07it/s]
 18%|█▊        | 5532/31100 [02:20<10:35, 40.25it/s]
 18%|█▊        | 5537/31100 [02:21<10:31, 40.48it/s]
 18%|█▊        | 5542/31100 [02:21<10:30, 40.56it/s]
 18%|█▊        | 5547/31100 [02:21<10:41, 39.84it/s]
 18%|█▊        | 5552/31100 [02:21<10:34, 40.26it/s]
 18%|█▊        | 5557/31100 [02:21<10:57, 38.83it/s]
 18%|█▊        | 5562/31100 [02:21<10:48, 39.37it/s]
 18%|█▊        | 5567/31100 [02:21<10:44, 39.63it/s]


Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:22:51. Total running time: 2min 30s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon |
+-------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598        2.48104e-08 |
| _objective_f556c_00001   PENDING        1.62407e-05                    3                        2       0.0174251       2.91544e-07 |
| _objective_f556c_00002   PENDING        3.40446e-05                    3                        4       0.0169235       7.72672e-08 |
| _o

[2m[36m(_objective pid=16313)[0m  18%|█▊        | 5571/31100 [02:21<10:46, 39.48it/s]
 18%|█▊        | 5575/31100 [02:22<10:47, 39.44it/s]
 18%|█▊        | 5579/31100 [02:22<10:47, 39.41it/s]
 18%|█▊        | 5583/31100 [02:22<10:53, 39.03it/s]
 18%|█▊        | 5587/31100 [02:22<10:57, 38.78it/s]
 18%|█▊        | 5591/31100 [02:22<10:53, 39.03it/s]
 18%|█▊        | 5595/31100 [02:22<10:52, 39.11it/s]
 18%|█▊        | 5600/31100 [02:22<11:17, 37.61it/s]
 18%|█▊        | 5604/31100 [02:22<11:10, 38.03it/s]
 18%|█▊        | 5608/31100 [02:22<11:11, 37.94it/s]
 18%|█▊        | 5612/31100 [02:23<11:05, 38.31it/s]
 18%|█▊        | 5616/31100 [02:23<10:58, 38.71it/s]
 18%|█▊        | 5620/31100 [02:23<11:05, 38.29it/s]
 18%|█▊        | 5625/31100 [02:23<10:52, 39.04it/s]
 18%|█▊        | 5630/31100 [02:23<10:43, 39.55it/s]
 18%|█▊        | 5635/31100 [02:23<10:35, 40.06it/s]
 18%|█▊        | 5640/31100 [02:23<10:32, 40.28it/s]
 18%|█▊        | 5645/31100 [02:23<11:24, 37.17it/s]
 18%|█▊  

[2m[36m(_objective pid=16313)[0m {'loss': 0.2973, 'learning_rate': 2.0162002550570806e-05, 'epoch': 0.96}


 19%|█▉        | 6005/31100 [02:32<10:49, 38.62it/s]
 19%|█▉        | 6009/31100 [02:33<11:16, 37.07it/s]
 19%|█▉        | 6013/31100 [02:33<11:09, 37.47it/s]
 19%|█▉        | 6018/31100 [02:33<10:49, 38.60it/s]
 19%|█▉        | 6022/31100 [02:33<10:47, 38.71it/s]
 19%|█▉        | 6027/31100 [02:33<10:37, 39.34it/s]
 19%|█▉        | 6032/31100 [02:33<10:32, 39.62it/s]
 19%|█▉        | 6037/31100 [02:33<10:29, 39.84it/s]
 19%|█▉        | 6041/31100 [02:33<10:37, 39.28it/s]
 19%|█▉        | 6045/31100 [02:33<10:40, 39.12it/s]
 19%|█▉        | 6049/31100 [02:34<10:46, 38.75it/s]
 19%|█▉        | 6053/31100 [02:34<10:52, 38.41it/s]
 19%|█▉        | 6057/31100 [02:34<11:23, 36.62it/s]
 19%|█▉        | 6061/31100 [02:34<11:07, 37.50it/s]
 20%|█▉        | 6065/31100 [02:34<10:57, 38.07it/s]
 20%|█▉        | 6069/31100 [02:34<11:02, 37.81it/s]
 20%|█▉        | 6073/31100 [02:34<11:00, 37.91it/s]
 20%|█▉        | 6077/31100 [02:34<10:54, 38.23it/s]
 20%|█▉        | 6081/31100 [02:34<11:01, 37.8

Trial _objective_f556c_00000 finished iteration 1 at 2023-09-11 13:23:17. Total running time: 2min 56s
+-------------------------------------------------+
| Trial _objective_f556c_00000 result             |
+-------------------------------------------------+
| time_this_iter_s                        170.299 |
| time_total_s                            170.299 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.23042 |
| eval_runtime                             9.5414 |
| eval_samples_per_second                 434.631 |
| eval_steps_per_second                    13.625 |
| objective                               0.23042 |
+-------------------------------------------------+

[2m[36m(_objective pid=16313)[0m {'eval_loss': 0.2304166704416275, 'eval_runtime': 9.5414, 'eval_samples_per_second': 434.631, 'eval_steps_per_second': 13.625, 'epoch': 1.0}


[2m[36m(_objective pid=16313)[0m                                                     
[2m[36m(_objective pid=16313)[0m                                                  [A 20%|██        | 6220/31100 [02:48<10:30, 39.45it/s]
[2m[36m(_objective pid=16313)[0m 100%|██████████| 130/130 [00:09<00:00, 12.02it/s][A
                                                 [A
 20%|██        | 6221/31100 [02:49<5:20:37,  1.29it/s]
 20%|██        | 6225/31100 [02:49<3:53:10,  1.78it/s]
 20%|██        | 6229/31100 [02:49<2:49:08,  2.45it/s]
 20%|██        | 6233/31100 [02:49<2:03:09,  3.37it/s]
 20%|██        | 6237/31100 [02:49<1:30:39,  4.57it/s]
 20%|██        | 6241/31100 [02:49<1:07:35,  6.13it/s]
 20%|██        | 6245/31100 [02:50<50:51,  8.14it/s]  
 20%|██        | 6249/31100 [02:50<38:59, 10.62it/s]
 20%|██        | 6253/31100 [02:50<30:39, 13.50it/s]
 20%|██        | 6257/31100 [02:50<24:43, 16.75it/s]
 20%|██        | 6261/31100 [02:50<20:28, 20.22it/s]
 20%|██        | 6265/3110

Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:23:21. Total running time: 3min 0s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598     

[2m[36m(_objective pid=16313)[0m  20%|██        | 6322/31100 [02:52<11:29, 35.91it/s]
 20%|██        | 6326/31100 [02:52<11:36, 35.58it/s]
 20%|██        | 6330/31100 [02:52<11:16, 36.61it/s]
 20%|██        | 6334/31100 [02:52<11:03, 37.32it/s]
 20%|██        | 6339/31100 [02:52<11:15, 36.65it/s]
 20%|██        | 6343/31100 [02:52<11:07, 37.09it/s]
 20%|██        | 6348/31100 [02:52<10:46, 38.30it/s]
 20%|██        | 6352/31100 [02:52<10:39, 38.70it/s]
 20%|██        | 6357/31100 [02:53<10:29, 39.28it/s]
 20%|██        | 6362/31100 [02:53<10:25, 39.55it/s]
 20%|██        | 6367/31100 [02:53<10:18, 40.00it/s]
 20%|██        | 6372/31100 [02:53<10:12, 40.39it/s]
 21%|██        | 6377/31100 [02:53<10:15, 40.20it/s]
 21%|██        | 6382/31100 [02:53<10:10, 40.49it/s]
 21%|██        | 6387/31100 [02:53<10:29, 39.25it/s]
 21%|██        | 6392/31100 [02:53<10:23, 39.62it/s]
 21%|██        | 6396/31100 [02:53<10:32, 39.05it/s]
 21%|██        | 6401/31100 [02:54<10:18, 39.96it/s]
 21%|██  

[2m[36m(_objective pid=16313)[0m {'loss': 0.217, 'learning_rate': 1.976036903362716e-05, 'epoch': 1.05}


[2m[36m(_objective pid=16313)[0m  21%|██        | 6505/31100 [02:56<10:26, 39.23it/s]
 21%|██        | 6510/31100 [02:56<10:19, 39.70it/s]
 21%|██        | 6514/31100 [02:56<10:23, 39.44it/s]
 21%|██        | 6519/31100 [02:57<10:17, 39.82it/s]
 21%|██        | 6523/31100 [02:57<10:16, 39.84it/s]
 21%|██        | 6528/31100 [02:57<10:11, 40.19it/s]
 21%|██        | 6533/31100 [02:57<10:01, 40.83it/s]
 21%|██        | 6538/31100 [02:57<09:59, 41.00it/s]
 21%|██        | 6543/31100 [02:57<09:56, 41.18it/s]
 21%|██        | 6548/31100 [02:57<09:53, 41.36it/s]
 21%|██        | 6553/31100 [02:57<09:49, 41.64it/s]
 21%|██        | 6558/31100 [02:58<10:16, 39.80it/s]
 21%|██        | 6563/31100 [02:58<10:11, 40.14it/s]
 21%|██        | 6568/31100 [02:58<10:04, 40.60it/s]
 21%|██        | 6573/31100 [02:58<09:59, 40.91it/s]
 21%|██        | 6578/31100 [02:58<09:56, 41.14it/s]
 21%|██        | 6583/31100 [02:58<10:24, 39.25it/s]
 21%|██        | 6588/31100 [02:58<10:17, 39.68it/s]
 21%|██  

[2m[36m(_objective pid=16313)[0m {'loss': 0.1529, 'learning_rate': 1.9358735516683522e-05, 'epoch': 1.13}


[2m[36m(_objective pid=16313)[0m  23%|██▎       | 7010/31100 [03:09<09:50, 40.78it/s]
 23%|██▎       | 7015/31100 [03:09<09:54, 40.52it/s]
 23%|██▎       | 7020/31100 [03:09<09:54, 40.49it/s]
 23%|██▎       | 7025/31100 [03:09<10:01, 40.02it/s]
 23%|██▎       | 7030/31100 [03:09<10:04, 39.85it/s]
 23%|██▎       | 7035/31100 [03:10<10:05, 39.76it/s]
 23%|██▎       | 7040/31100 [03:10<09:58, 40.17it/s]
 23%|██▎       | 7045/31100 [03:10<09:53, 40.53it/s]
 23%|██▎       | 7050/31100 [03:10<10:20, 38.76it/s]
 23%|██▎       | 7054/31100 [03:10<10:21, 38.71it/s]
 23%|██▎       | 7058/31100 [03:10<10:27, 38.31it/s]
 23%|██▎       | 7062/31100 [03:10<10:22, 38.62it/s]
 23%|██▎       | 7066/31100 [03:10<10:20, 38.70it/s]
 23%|██▎       | 7071/31100 [03:10<10:13, 39.18it/s]
 23%|██▎       | 7075/31100 [03:11<10:17, 38.89it/s]
 23%|██▎       | 7080/31100 [03:11<10:07, 39.55it/s]
 23%|██▎       | 7084/31100 [03:11<10:07, 39.51it/s]
 23%|██▎       | 7089/31100 [03:11<10:02, 39.87it/s]
 23%|██▎ 

[2m[36m(_objective pid=16313)[0m {'loss': 0.1391, 'learning_rate': 1.8957101999739877e-05, 'epoch': 1.21}


[2m[36m(_objective pid=16313)[0m  24%|██▍       | 7506/31100 [03:21<09:55, 39.60it/s]
[2m[36m(_objective pid=16313)[0m  24%|██▍       | 7510/31100 [03:22<09:56, 39.52it/s]


Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:23:51. Total running time: 3min 30s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598    

[2m[36m(_objective pid=16313)[0m  24%|██▍       | 7514/31100 [03:22<09:56, 39.53it/s]
 24%|██▍       | 7518/31100 [03:22<09:57, 39.49it/s]
 24%|██▍       | 7522/31100 [03:22<10:06, 38.85it/s]
 24%|██▍       | 7526/31100 [03:22<10:07, 38.80it/s]
 24%|██▍       | 7530/31100 [03:22<10:09, 38.65it/s]
 24%|██▍       | 7534/31100 [03:22<10:09, 38.67it/s]
 24%|██▍       | 7538/31100 [03:22<10:09, 38.63it/s]
 24%|██▍       | 7542/31100 [03:22<10:37, 36.97it/s]
 24%|██▍       | 7546/31100 [03:23<10:35, 37.08it/s]
 24%|██▍       | 7550/31100 [03:23<10:28, 37.45it/s]
 24%|██▍       | 7554/31100 [03:23<10:22, 37.82it/s]
 24%|██▍       | 7558/31100 [03:23<10:16, 38.18it/s]
 24%|██▍       | 7562/31100 [03:23<10:26, 37.59it/s]
 24%|██▍       | 7566/31100 [03:23<10:24, 37.66it/s]
 24%|██▍       | 7570/31100 [03:23<10:24, 37.68it/s]
 24%|██▍       | 7574/31100 [03:23<10:50, 36.14it/s]
 24%|██▍       | 7578/31100 [03:23<10:48, 36.29it/s]
 24%|██▍       | 7582/31100 [03:24<11:03, 35.43it/s]
 24%|██▍ 

[2m[36m(_objective pid=16313)[0m {'loss': 0.1407, 'learning_rate': 1.855546848279624e-05, 'epoch': 1.29}


[2m[36m(_objective pid=16313)[0m  26%|██▌       | 8009/31100 [03:34<10:22, 37.12it/s]
 26%|██▌       | 8014/31100 [03:34<10:03, 38.27it/s]
 26%|██▌       | 8018/31100 [03:35<10:29, 36.67it/s]
 26%|██▌       | 8022/31100 [03:35<10:35, 36.33it/s]
 26%|██▌       | 8026/31100 [03:35<10:38, 36.16it/s]
 26%|██▌       | 8030/31100 [03:35<10:35, 36.29it/s]
 26%|██▌       | 8035/31100 [03:35<10:13, 37.62it/s]
 26%|██▌       | 8040/31100 [03:35<09:57, 38.62it/s]
 26%|██▌       | 8045/31100 [03:35<09:48, 39.20it/s]
 26%|██▌       | 8050/31100 [03:35<09:39, 39.81it/s]
 26%|██▌       | 8055/31100 [03:35<09:36, 40.01it/s]
 26%|██▌       | 8059/31100 [03:36<09:38, 39.80it/s]
 26%|██▌       | 8063/31100 [03:36<09:38, 39.82it/s]
 26%|██▌       | 8068/31100 [03:36<09:36, 39.93it/s]
 26%|██▌       | 8073/31100 [03:36<09:34, 40.07it/s]
 26%|██▌       | 8078/31100 [03:36<09:31, 40.32it/s]
 26%|██▌       | 8083/31100 [03:36<09:32, 40.18it/s]
 26%|██▌       | 8088/31100 [03:36<09:36, 39.93it/s]
 26%|██▌ 

[2m[36m(_objective pid=16313)[0m {'loss': 0.1853, 'learning_rate': 1.8153834965852593e-05, 'epoch': 1.37}


[2m[36m(_objective pid=16313)[0m  27%|██▋       | 8503/31100 [03:47<09:16, 40.63it/s]
 27%|██▋       | 8508/31100 [03:47<09:11, 40.97it/s]
 27%|██▋       | 8513/31100 [03:47<09:13, 40.78it/s]
 27%|██▋       | 8518/31100 [03:47<09:16, 40.58it/s]
 27%|██▋       | 8523/31100 [03:47<09:13, 40.82it/s]
 27%|██▋       | 8528/31100 [03:47<09:13, 40.76it/s]
 27%|██▋       | 8533/31100 [03:47<09:14, 40.71it/s]
 27%|██▋       | 8538/31100 [03:48<09:14, 40.71it/s]
 27%|██▋       | 8543/31100 [03:48<09:12, 40.80it/s]
 27%|██▋       | 8548/31100 [03:48<09:35, 39.21it/s]
 28%|██▊       | 8553/31100 [03:48<09:26, 39.81it/s]
 28%|██▊       | 8558/31100 [03:48<09:18, 40.39it/s]
 28%|██▊       | 8563/31100 [03:48<09:18, 40.38it/s]
 28%|██▊       | 8568/31100 [03:48<09:30, 39.49it/s]
 28%|██▊       | 8573/31100 [03:48<09:22, 40.08it/s]
 28%|██▊       | 8578/31100 [03:49<09:17, 40.42it/s]
 28%|██▊       | 8583/31100 [03:49<09:12, 40.72it/s]
 28%|██▊       | 8588/31100 [03:49<09:34, 39.16it/s]
 28%|██▊ 

Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:24:21. Total running time: 4min 0s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598     

[2m[36m(_objective pid=16313)[0m  28%|██▊       | 8705/31100 [03:52<09:17, 40.18it/s]
 28%|██▊       | 8710/31100 [03:52<09:22, 39.77it/s]
 28%|██▊       | 8714/31100 [03:52<09:30, 39.23it/s]
 28%|██▊       | 8718/31100 [03:52<09:32, 39.10it/s]
 28%|██▊       | 8722/31100 [03:52<09:29, 39.33it/s]
 28%|██▊       | 8727/31100 [03:52<09:24, 39.60it/s]
 28%|██▊       | 8731/31100 [03:52<09:27, 39.39it/s]
 28%|██▊       | 8735/31100 [03:53<09:41, 38.46it/s]
 28%|██▊       | 8739/31100 [03:53<09:47, 38.06it/s]
 28%|██▊       | 8744/31100 [03:53<09:35, 38.87it/s]
 28%|██▊       | 8748/31100 [03:53<09:36, 38.75it/s]
 28%|██▊       | 8753/31100 [03:53<09:34, 38.90it/s]
 28%|██▊       | 8757/31100 [03:53<09:52, 37.73it/s]
 28%|██▊       | 8761/31100 [03:53<09:48, 37.93it/s]
 28%|██▊       | 8765/31100 [03:53<09:40, 38.49it/s]
 28%|██▊       | 8769/31100 [03:53<09:43, 38.28it/s]
 28%|██▊       | 8773/31100 [03:54<10:07, 36.73it/s]
 28%|██▊       | 8777/31100 [03:54<10:09, 36.65it/s]
 28%|██▊ 

[2m[36m(_objective pid=16313)[0m {'loss': 0.1402, 'learning_rate': 1.7752201448908955e-05, 'epoch': 1.45}


 29%|██▉       | 9005/31100 [03:59<09:23, 39.22it/s]
 29%|██▉       | 9010/31100 [04:00<09:17, 39.59it/s]
 29%|██▉       | 9015/31100 [04:00<09:08, 40.25it/s]
 29%|██▉       | 9020/31100 [04:00<09:01, 40.76it/s]
 29%|██▉       | 9025/31100 [04:00<09:00, 40.87it/s]
 29%|██▉       | 9030/31100 [04:00<08:58, 40.99it/s]
 29%|██▉       | 9035/31100 [04:00<08:59, 40.92it/s]
 29%|██▉       | 9040/31100 [04:00<08:57, 41.04it/s]
 29%|██▉       | 9045/31100 [04:00<08:59, 40.91it/s]
 29%|██▉       | 9050/31100 [04:01<09:03, 40.59it/s]
 29%|██▉       | 9055/31100 [04:01<08:56, 41.07it/s]
 29%|██▉       | 9060/31100 [04:01<08:57, 40.99it/s]
 29%|██▉       | 9065/31100 [04:01<09:23, 39.14it/s]
 29%|██▉       | 9070/31100 [04:01<09:16, 39.61it/s]
 29%|██▉       | 9074/31100 [04:01<09:22, 39.17it/s]
 29%|██▉       | 9079/31100 [04:01<09:19, 39.38it/s]
 29%|██▉       | 9084/31100 [04:01<09:11, 39.93it/s]
 29%|██▉       | 9088/31100 [04:02<09:13, 39.73it/s]
 29%|██▉       | 9093/31100 [04:02<09:02, 40.5

[2m[36m(_objective pid=16313)[0m {'loss': 0.176, 'learning_rate': 1.7350567931965313e-05, 'epoch': 1.53}


[2m[36m(_objective pid=16313)[0m  31%|███       | 9508/31100 [04:12<09:00, 39.93it/s]
 31%|███       | 9512/31100 [04:12<09:02, 39.80it/s]
 31%|███       | 9517/31100 [04:12<08:54, 40.35it/s]
 31%|███       | 9522/31100 [04:13<08:55, 40.27it/s]
 31%|███       | 9527/31100 [04:13<09:00, 39.93it/s]
 31%|███       | 9532/31100 [04:13<08:58, 40.07it/s]
 31%|███       | 9537/31100 [04:13<08:56, 40.20it/s]
 31%|███       | 9542/31100 [04:13<09:17, 38.67it/s]
 31%|███       | 9546/31100 [04:13<09:13, 38.97it/s]
 31%|███       | 9550/31100 [04:13<09:12, 39.00it/s]
 31%|███       | 9554/31100 [04:13<09:22, 38.33it/s]
 31%|███       | 9559/31100 [04:14<09:11, 39.03it/s]
 31%|███       | 9564/31100 [04:14<09:01, 39.75it/s]
 31%|███       | 9569/31100 [04:14<08:56, 40.17it/s]
 31%|███       | 9574/31100 [04:14<08:56, 40.16it/s]
 31%|███       | 9579/31100 [04:14<08:56, 40.15it/s]
 31%|███       | 9584/31100 [04:14<08:51, 40.47it/s]
 31%|███       | 9589/31100 [04:14<08:49, 40.66it/s]
 31%|███ 

Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:24:51. Total running time: 4min 30s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598    

[2m[36m(_objective pid=16313)[0m  32%|███▏      | 9885/31100 [04:22<08:56, 39.58it/s]
 32%|███▏      | 9890/31100 [04:22<08:47, 40.19it/s]
 32%|███▏      | 9895/31100 [04:22<09:24, 37.57it/s]
 32%|███▏      | 9899/31100 [04:22<09:42, 36.40it/s]
 32%|███▏      | 9904/31100 [04:22<09:19, 37.87it/s]
 32%|███▏      | 9909/31100 [04:22<09:08, 38.64it/s]
 32%|███▏      | 9914/31100 [04:22<09:06, 38.77it/s]
 32%|███▏      | 9918/31100 [04:23<09:03, 38.98it/s]
 32%|███▏      | 9922/31100 [04:23<08:59, 39.23it/s]
 32%|███▏      | 9926/31100 [04:23<08:59, 39.24it/s]
 32%|███▏      | 9931/31100 [04:23<08:51, 39.85it/s]
 32%|███▏      | 9936/31100 [04:23<08:52, 39.77it/s]
 32%|███▏      | 9940/31100 [04:23<08:51, 39.82it/s]
 32%|███▏      | 9944/31100 [04:23<09:09, 38.50it/s]
 32%|███▏      | 9948/31100 [04:23<09:13, 38.21it/s]
 32%|███▏      | 9953/31100 [04:23<09:01, 39.06it/s]
 32%|███▏      | 9958/31100 [04:24<08:56, 39.42it/s]
 32%|███▏      | 9963/31100 [04:24<08:51, 39.81it/s]
 32%|███▏

[2m[36m(_objective pid=16313)[0m {'loss': 0.1337, 'learning_rate': 1.694893441502167e-05, 'epoch': 1.61}


[2m[36m(_objective pid=16313)[0m  32%|███▏      | 10007/31100 [04:25<09:08, 38.46it/s]
 32%|███▏      | 10011/31100 [04:25<09:08, 38.48it/s]
 32%|███▏      | 10015/31100 [04:25<09:06, 38.58it/s]
 32%|███▏      | 10019/31100 [04:25<09:03, 38.78it/s]
 32%|███▏      | 10023/31100 [04:25<09:11, 38.25it/s]
 32%|███▏      | 10027/31100 [04:25<09:05, 38.63it/s]
 32%|███▏      | 10031/31100 [04:25<09:07, 38.45it/s]
 32%|███▏      | 10035/31100 [04:26<09:26, 37.15it/s]
 32%|███▏      | 10039/31100 [04:26<09:32, 36.77it/s]
 32%|███▏      | 10043/31100 [04:26<10:02, 34.97it/s]
 32%|███▏      | 10047/31100 [04:26<09:42, 36.14it/s]
 32%|███▏      | 10051/31100 [04:26<09:30, 36.91it/s]
 32%|███▏      | 10055/31100 [04:26<09:46, 35.90it/s]
 32%|███▏      | 10059/31100 [04:26<09:41, 36.19it/s]
 32%|███▏      | 10063/31100 [04:26<09:29, 36.93it/s]
 32%|███▏      | 10067/31100 [04:26<09:22, 37.36it/s]
 32%|███▏      | 10071/31100 [04:27<09:20, 37.55it/s]
 32%|███▏      | 10075/31100 [04:27<09:20, 37

[2m[36m(_objective pid=16313)[0m {'loss': 0.1973, 'learning_rate': 1.654730089807803e-05, 'epoch': 1.69}


[2m[36m(_objective pid=16313)[0m  34%|███▍      | 10510/31100 [04:38<08:47, 39.04it/s]
 34%|███▍      | 10515/31100 [04:38<08:41, 39.45it/s]
 34%|███▍      | 10519/31100 [04:38<08:41, 39.46it/s]
 34%|███▍      | 10523/31100 [04:38<08:47, 39.02it/s]
 34%|███▍      | 10528/31100 [04:38<08:42, 39.41it/s]
 34%|███▍      | 10533/31100 [04:38<08:34, 40.00it/s]
 34%|███▍      | 10538/31100 [04:38<08:36, 39.84it/s]
 34%|███▍      | 10543/31100 [04:38<08:31, 40.17it/s]
 34%|███▍      | 10548/31100 [04:39<08:27, 40.53it/s]
 34%|███▍      | 10553/31100 [04:39<08:32, 40.10it/s]
 34%|███▍      | 10558/31100 [04:39<08:32, 40.12it/s]
 34%|███▍      | 10563/31100 [04:39<08:33, 39.96it/s]
 34%|███▍      | 10567/31100 [04:39<08:38, 39.57it/s]
 34%|███▍      | 10571/31100 [04:39<08:42, 39.26it/s]
 34%|███▍      | 10575/31100 [04:39<08:42, 39.26it/s]
 34%|███▍      | 10580/31100 [04:39<08:36, 39.76it/s]
 34%|███▍      | 10585/31100 [04:39<08:36, 39.70it/s]
 34%|███▍      | 10590/31100 [04:40<08:28, 40

[2m[36m(_objective pid=16313)[0m {'loss': 0.1888, 'learning_rate': 1.6145667381134387e-05, 'epoch': 1.77}


[2m[36m(_objective pid=16313)[0m  35%|███▌      | 11005/31100 [04:51<09:21, 35.79it/s]
 35%|███▌      | 11009/31100 [04:51<09:15, 36.17it/s]
 35%|███▌      | 11013/31100 [04:51<09:09, 36.56it/s]
 35%|███▌      | 11017/31100 [04:51<08:58, 37.31it/s]
 35%|███▌      | 11021/31100 [04:51<09:10, 36.47it/s]
 35%|███▌      | 11025/31100 [04:51<09:04, 36.88it/s]
 35%|███▌      | 11029/31100 [04:51<09:11, 36.42it/s]
 35%|███▌      | 11033/31100 [04:52<09:44, 34.35it/s]
 35%|███▌      | 11037/31100 [04:52<09:40, 34.54it/s]


Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:25:21. Total running time: 5min 0s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598     

[2m[36m(_objective pid=16313)[0m  36%|███▌      | 11041/31100 [04:52<09:41, 34.47it/s]
 36%|███▌      | 11045/31100 [04:52<09:24, 35.56it/s]
 36%|███▌      | 11049/31100 [04:52<09:06, 36.68it/s]
 36%|███▌      | 11053/31100 [04:52<08:54, 37.54it/s]
 36%|███▌      | 11057/31100 [04:52<08:44, 38.21it/s]
 36%|███▌      | 11061/31100 [04:52<08:38, 38.67it/s]
 36%|███▌      | 11065/31100 [04:52<08:35, 38.86it/s]
 36%|███▌      | 11069/31100 [04:53<08:31, 39.19it/s]
 36%|███▌      | 11074/31100 [04:53<08:38, 38.63it/s]
 36%|███▌      | 11078/31100 [04:53<08:39, 38.58it/s]
 36%|███▌      | 11082/31100 [04:53<08:39, 38.55it/s]
 36%|███▌      | 11086/31100 [04:53<09:06, 36.60it/s]
 36%|███▌      | 11090/31100 [04:53<09:07, 36.58it/s]
 36%|███▌      | 11094/31100 [04:53<09:37, 34.62it/s]
 36%|███▌      | 11098/31100 [04:53<09:39, 34.52it/s]
 36%|███▌      | 11102/31100 [04:53<09:20, 35.67it/s]
 36%|███▌      | 11106/31100 [04:54<09:05, 36.68it/s]
 36%|███▌      | 11110/31100 [04:54<09:02, 36

[2m[36m(_objective pid=16313)[0m {'loss': 0.1999, 'learning_rate': 1.574403386419075e-05, 'epoch': 1.85}


 37%|███▋      | 11507/31100 [05:04<08:01, 40.70it/s]
 37%|███▋      | 11512/31100 [05:04<08:02, 40.59it/s]
 37%|███▋      | 11517/31100 [05:04<08:04, 40.42it/s]
 37%|███▋      | 11522/31100 [05:04<08:04, 40.38it/s]
 37%|███▋      | 11527/31100 [05:04<08:04, 40.41it/s]
 37%|███▋      | 11532/31100 [05:05<08:11, 39.85it/s]
 37%|███▋      | 11537/31100 [05:05<08:08, 40.08it/s]
 37%|███▋      | 11542/31100 [05:05<08:07, 40.10it/s]
 37%|███▋      | 11547/31100 [05:05<08:12, 39.68it/s]
 37%|███▋      | 11552/31100 [05:05<08:11, 39.80it/s]
 37%|███▋      | 11557/31100 [05:05<08:09, 39.96it/s]
 37%|███▋      | 11562/31100 [05:05<08:03, 40.39it/s]
 37%|███▋      | 11567/31100 [05:05<08:03, 40.37it/s]
 37%|███▋      | 11572/31100 [05:06<08:02, 40.51it/s]
 37%|███▋      | 11577/31100 [05:06<08:00, 40.62it/s]
 37%|███▋      | 11582/31100 [05:06<08:05, 40.18it/s]
 37%|███▋      | 11587/31100 [05:06<08:22, 38.79it/s]
 37%|███▋      | 11592/31100 [05:06<08:18, 39.16it/s]
 37%|███▋      | 11596/31100

[2m[36m(_objective pid=16313)[0m {'loss': 0.149, 'learning_rate': 1.5342400347247104e-05, 'epoch': 1.93}


[2m[36m(_objective pid=16313)[0m  39%|███▊      | 12005/31100 [05:17<07:42, 41.30it/s]
 39%|███▊      | 12010/31100 [05:17<07:44, 41.08it/s]
 39%|███▊      | 12015/31100 [05:17<07:40, 41.45it/s]
 39%|███▊      | 12020/31100 [05:17<07:46, 40.86it/s]
 39%|███▊      | 12025/31100 [05:17<08:00, 39.67it/s]
 39%|███▊      | 12029/31100 [05:17<08:03, 39.42it/s]
 39%|███▊      | 12034/31100 [05:18<08:00, 39.67it/s]
 39%|███▊      | 12038/31100 [05:18<08:05, 39.26it/s]
 39%|███▊      | 12043/31100 [05:18<07:59, 39.74it/s]
 39%|███▊      | 12048/31100 [05:18<07:51, 40.41it/s]
 39%|███▉      | 12053/31100 [05:18<07:50, 40.50it/s]
 39%|███▉      | 12058/31100 [05:18<07:59, 39.73it/s]
 39%|███▉      | 12063/31100 [05:18<07:51, 40.38it/s]
 39%|███▉      | 12068/31100 [05:18<07:47, 40.68it/s]
 39%|███▉      | 12073/31100 [05:18<07:47, 40.71it/s]
 39%|███▉      | 12078/31100 [05:19<07:43, 41.07it/s]
 39%|███▉      | 12083/31100 [05:19<07:40, 41.30it/s]
 39%|███▉      | 12088/31100 [05:19<07:37, 41

Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:25:51. Total running time: 5min 30s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598    

[2m[36m(_objective pid=16313)[0m  39%|███▉      | 12192/31100 [05:22<08:41, 36.28it/s]
 39%|███▉      | 12197/31100 [05:22<08:19, 37.83it/s]
 39%|███▉      | 12202/31100 [05:22<08:03, 39.05it/s]
 39%|███▉      | 12207/31100 [05:22<07:56, 39.61it/s]
 39%|███▉      | 12212/31100 [05:22<07:49, 40.22it/s]
 39%|███▉      | 12217/31100 [05:22<07:47, 40.37it/s]
 39%|███▉      | 12222/31100 [05:22<07:44, 40.66it/s]
 39%|███▉      | 12227/31100 [05:23<07:40, 41.02it/s]
 39%|███▉      | 12232/31100 [05:23<07:38, 41.12it/s]
 39%|███▉      | 12237/31100 [05:23<07:35, 41.42it/s]
 39%|███▉      | 12242/31100 [05:23<07:33, 41.54it/s]
 39%|███▉      | 12247/31100 [05:23<07:36, 41.29it/s]
 39%|███▉      | 12252/31100 [05:23<07:35, 41.39it/s]
 39%|███▉      | 12257/31100 [05:23<07:34, 41.45it/s]
 39%|███▉      | 12262/31100 [05:23<07:36, 41.25it/s]
 39%|███▉      | 12267/31100 [05:24<07:42, 40.74it/s]
 39%|███▉      | 12272/31100 [05:24<07:46, 40.32it/s]
 39%|███▉      | 12277/31100 [05:24<07:44, 40

Trial _objective_f556c_00000 finished iteration 2 at 2023-09-11 13:26:07. Total running time: 5min 46s
+-------------------------------------------------+
| Trial _objective_f556c_00000 result             |
+-------------------------------------------------+
| time_this_iter_s                         169.84 |
| time_total_s                            340.139 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                               0.19005 |
| eval_runtime                              9.545 |
| eval_samples_per_second                 434.466 |
| eval_steps_per_second                     13.62 |
| objective                               0.19005 |
+-------------------------------------------------+

[2m[36m(_objective pid=16313)[0m {'eval_loss': 0.19005461037158966, 'eval_runtime': 9.545, 'eval_samples_per_second': 434.466, 'eval_steps_per_second': 13.62, 'epoch': 2.0}


[2m[36m(_objective pid=16313)[0m                                                      
[2m[36m(_objective pid=16313)[0m                                                  [A 40%|████      | 12440/31100 [05:37<08:03, 38.61it/s]
[2m[36m(_objective pid=16313)[0m 100%|██████████| 130/130 [00:09<00:00, 12.03it/s][A
                                                 [A
 40%|████      | 12443/31100 [05:39<4:12:10,  1.23it/s]
 40%|████      | 12447/31100 [05:39<3:00:24,  1.72it/s]
 40%|████      | 12451/31100 [05:39<2:09:25,  2.40it/s]
 40%|████      | 12455/31100 [05:39<1:33:24,  3.33it/s]
 40%|████      | 12459/31100 [05:39<1:07:58,  4.57it/s]
 40%|████      | 12463/31100 [05:39<50:04,  6.20it/s]  
 40%|████      | 12468/31100 [05:39<35:16,  8.80it/s]
 40%|████      | 12472/31100 [05:39<27:27, 11.31it/s]
 40%|████      | 12476/31100 [05:40<21:55, 14.16it/s]
 40%|████      | 12480/31100 [05:40<17:50, 17.40it/s]
 40%|████      | 12484/31100 [05:40<15:02, 20.63it/s]
 40%|████      

[2m[36m(_objective pid=16313)[0m {'loss': 0.202, 'learning_rate': 1.4940766830303463e-05, 'epoch': 2.01}


[2m[36m(_objective pid=16313)[0m  40%|████      | 12505/31100 [05:40<09:09, 33.82it/s]
[2m[36m(_objective pid=16313)[0m  40%|████      | 12509/31100 [05:40<08:46, 35.33it/s]
 40%|████      | 12513/31100 [05:41<08:30, 36.42it/s]
 40%|████      | 12518/31100 [05:41<08:15, 37.54it/s]
 40%|████      | 12522/31100 [05:41<08:07, 38.14it/s]
 40%|████      | 12526/31100 [05:41<08:06, 38.21it/s]
 40%|████      | 12530/31100 [05:41<08:00, 38.68it/s]
 40%|████      | 12534/31100 [05:41<07:55, 39.01it/s]
 40%|████      | 12538/31100 [05:41<08:19, 37.19it/s]
 40%|████      | 12542/31100 [05:41<08:21, 36.98it/s]
 40%|████      | 12546/31100 [05:41<08:31, 36.24it/s]
 40%|████      | 12550/31100 [05:42<08:48, 35.13it/s]
 40%|████      | 12554/31100 [05:42<08:34, 36.06it/s]
 40%|████      | 12558/31100 [05:42<08:34, 36.03it/s]
 40%|████      | 12562/31100 [05:42<08:42, 35.50it/s]
 40%|████      | 12566/31100 [05:42<08:25, 36.69it/s]
 40%|████      | 12570/31100 [05:42<08:13, 37.51it/s]
 40%|███

Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:26:21. Total running time: 6min 0s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598     

[2m[36m(_objective pid=16313)[0m  42%|████▏     | 12951/31100 [05:52<07:38, 39.54it/s]
 42%|████▏     | 12955/31100 [05:52<07:43, 39.14it/s]
 42%|████▏     | 12960/31100 [05:52<07:38, 39.59it/s]
 42%|████▏     | 12964/31100 [05:52<07:37, 39.64it/s]
 42%|████▏     | 12968/31100 [05:52<07:54, 38.18it/s]
 42%|████▏     | 12972/31100 [05:53<07:50, 38.57it/s]
 42%|████▏     | 12977/31100 [05:53<07:41, 39.31it/s]
 42%|████▏     | 12981/31100 [05:53<07:40, 39.34it/s]
 42%|████▏     | 12986/31100 [05:53<07:37, 39.61it/s]
 42%|████▏     | 12991/31100 [05:53<07:32, 40.01it/s]
 42%|████▏     | 12996/31100 [05:53<07:32, 40.03it/s]
 42%|████▏     | 13001/31100 [05:53<07:30, 40.19it/s]
 42%|████▏     | 13006/31100 [05:53<07:40, 39.33it/s]


[2m[36m(_objective pid=16313)[0m {'loss': 0.1116, 'learning_rate': 1.4539133313359822e-05, 'epoch': 2.09}


[2m[36m(_objective pid=16313)[0m  42%|████▏     | 13010/31100 [05:53<07:39, 39.38it/s]
 42%|████▏     | 13014/31100 [05:54<07:39, 39.35it/s]
 42%|████▏     | 13019/31100 [05:54<07:35, 39.67it/s]
 42%|████▏     | 13024/31100 [05:54<07:32, 39.92it/s]
 42%|████▏     | 13028/31100 [05:54<07:34, 39.75it/s]
 42%|████▏     | 13032/31100 [05:54<07:52, 38.26it/s]
 42%|████▏     | 13036/31100 [05:54<07:49, 38.50it/s]
 42%|████▏     | 13041/31100 [05:54<07:42, 39.08it/s]
 42%|████▏     | 13045/31100 [05:54<07:54, 38.07it/s]
 42%|████▏     | 13049/31100 [05:55<07:50, 38.39it/s]
 42%|████▏     | 13054/31100 [05:55<07:42, 39.05it/s]
 42%|████▏     | 13058/31100 [05:55<07:58, 37.67it/s]
 42%|████▏     | 13063/31100 [05:55<07:47, 38.61it/s]
 42%|████▏     | 13068/31100 [05:55<07:40, 39.12it/s]
 42%|████▏     | 13072/31100 [05:55<07:43, 38.87it/s]
 42%|████▏     | 13076/31100 [05:55<07:41, 39.08it/s]
 42%|████▏     | 13081/31100 [05:55<07:33, 39.75it/s]
 42%|████▏     | 13086/31100 [05:55<07:27, 40

[2m[36m(_objective pid=16313)[0m {'loss': 0.0895, 'learning_rate': 1.4137499796416181e-05, 'epoch': 2.17}


[2m[36m(_objective pid=16313)[0m  43%|████▎     | 13507/31100 [06:06<07:43, 37.95it/s]
 43%|████▎     | 13512/31100 [06:06<07:32, 38.85it/s]
 43%|████▎     | 13517/31100 [06:07<07:31, 38.94it/s]
 43%|████▎     | 13521/31100 [06:07<07:28, 39.21it/s]
 43%|████▎     | 13525/31100 [06:07<07:26, 39.38it/s]
 44%|████▎     | 13529/31100 [06:07<07:31, 38.90it/s]
 44%|████▎     | 13534/31100 [06:07<07:23, 39.63it/s]
 44%|████▎     | 13538/31100 [06:07<07:44, 37.79it/s]
 44%|████▎     | 13543/31100 [06:07<07:36, 38.46it/s]
 44%|████▎     | 13547/31100 [06:07<07:32, 38.82it/s]
 44%|████▎     | 13551/31100 [06:07<07:52, 37.15it/s]
 44%|████▎     | 13555/31100 [06:08<07:42, 37.92it/s]
 44%|████▎     | 13560/31100 [06:08<07:33, 38.68it/s]
 44%|████▎     | 13565/31100 [06:08<07:26, 39.27it/s]
 44%|████▎     | 13570/31100 [06:08<07:19, 39.90it/s]
 44%|████▎     | 13575/31100 [06:08<07:15, 40.25it/s]
 44%|████▎     | 13580/31100 [06:08<07:22, 39.60it/s]
 44%|████▎     | 13584/31100 [06:08<07:24, 39

[2m[36m(_objective pid=16313)[0m {'loss': 0.0755, 'learning_rate': 1.3735866279472538e-05, 'epoch': 2.25}


[2m[36m(_objective pid=16313)[0m  45%|████▌     | 14008/31100 [06:20<07:18, 38.94it/s]
 45%|████▌     | 14012/31100 [06:20<07:17, 39.02it/s]
 45%|████▌     | 14016/31100 [06:20<07:26, 38.27it/s]
 45%|████▌     | 14021/31100 [06:20<07:18, 38.95it/s]
 45%|████▌     | 14026/31100 [06:20<07:14, 39.29it/s]
 45%|████▌     | 14030/31100 [06:20<07:12, 39.47it/s]
 45%|████▌     | 14035/31100 [06:20<07:08, 39.78it/s]
 45%|████▌     | 14039/31100 [06:20<07:08, 39.80it/s]
 45%|████▌     | 14043/31100 [06:20<07:08, 39.83it/s]
 45%|████▌     | 14048/31100 [06:21<07:06, 40.01it/s]
 45%|████▌     | 14053/31100 [06:21<07:04, 40.11it/s]
 45%|████▌     | 14058/31100 [06:21<07:21, 38.60it/s]
 45%|████▌     | 14062/31100 [06:21<07:18, 38.83it/s]
 45%|████▌     | 14067/31100 [06:21<07:11, 39.44it/s]
 45%|████▌     | 14071/31100 [06:21<07:12, 39.42it/s]
 45%|████▌     | 14075/31100 [06:21<07:11, 39.43it/s]
 45%|████▌     | 14079/31100 [06:21<07:11, 39.46it/s]
 45%|████▌     | 14083/31100 [06:21<07:12, 39

Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:26:51. Total running time: 6min 30s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598    

[2m[36m(_objective pid=16313)[0m  45%|████▌     | 14096/31100 [06:22<07:11, 39.37it/s]
 45%|████▌     | 14101/31100 [06:22<07:07, 39.74it/s]
 45%|████▌     | 14106/31100 [06:22<07:05, 39.94it/s]
 45%|████▌     | 14110/31100 [06:22<07:08, 39.65it/s]
 45%|████▌     | 14115/31100 [06:22<07:05, 39.89it/s]
 45%|████▌     | 14119/31100 [06:22<07:08, 39.62it/s]
 45%|████▌     | 14123/31100 [06:22<07:11, 39.35it/s]
 45%|████▌     | 14127/31100 [06:23<07:09, 39.51it/s]
 45%|████▌     | 14132/31100 [06:23<07:05, 39.91it/s]
 45%|████▌     | 14136/31100 [06:23<07:11, 39.28it/s]
 45%|████▌     | 14140/31100 [06:23<07:18, 38.70it/s]
 45%|████▌     | 14144/31100 [06:23<07:14, 39.04it/s]
 45%|████▌     | 14149/31100 [06:23<07:10, 39.33it/s]
 46%|████▌     | 14153/31100 [06:23<07:09, 39.44it/s]
 46%|████▌     | 14158/31100 [06:23<07:04, 39.89it/s]
 46%|████▌     | 14162/31100 [06:23<07:27, 37.87it/s]
 46%|████▌     | 14167/31100 [06:24<07:19, 38.53it/s]
 46%|████▌     | 14172/31100 [06:24<07:11, 39

[2m[36m(_objective pid=16313)[0m {'loss': 0.1111, 'learning_rate': 1.3334232762528898e-05, 'epoch': 2.33}


[2m[36m(_objective pid=16313)[0m  47%|████▋     | 14505/31100 [06:32<07:36, 36.37it/s]
 47%|████▋     | 14509/31100 [06:33<07:35, 36.43it/s]
 47%|████▋     | 14513/31100 [06:33<07:31, 36.71it/s]
 47%|████▋     | 14517/31100 [06:33<07:22, 37.45it/s]
 47%|████▋     | 14521/31100 [06:33<07:21, 37.57it/s]
 47%|████▋     | 14525/31100 [06:33<07:19, 37.74it/s]
 47%|████▋     | 14529/31100 [06:33<07:13, 38.20it/s]
 47%|████▋     | 14533/31100 [06:33<07:14, 38.17it/s]
 47%|████▋     | 14537/31100 [06:33<07:10, 38.51it/s]
 47%|████▋     | 14541/31100 [06:33<07:14, 38.10it/s]
 47%|████▋     | 14545/31100 [06:33<07:13, 38.17it/s]
 47%|████▋     | 14549/31100 [06:34<07:12, 38.24it/s]
 47%|████▋     | 14553/31100 [06:34<07:11, 38.31it/s]
 47%|████▋     | 14557/31100 [06:34<07:14, 38.05it/s]
 47%|████▋     | 14561/31100 [06:34<07:15, 38.01it/s]
 47%|████▋     | 14565/31100 [06:34<07:15, 37.98it/s]
 47%|████▋     | 14569/31100 [06:34<07:14, 38.05it/s]
 47%|████▋     | 14573/31100 [06:34<07:23, 37

[2m[36m(_objective pid=16313)[0m {'loss': 0.0901, 'learning_rate': 1.2932599245585258e-05, 'epoch': 2.41}


[2m[36m(_objective pid=16313)[0m  48%|████▊     | 15012/31100 [06:46<06:50, 39.20it/s]
 48%|████▊     | 15016/31100 [06:46<06:48, 39.36it/s]
 48%|████▊     | 15020/31100 [06:46<06:50, 39.13it/s]
 48%|████▊     | 15024/31100 [06:46<07:02, 38.02it/s]
 48%|████▊     | 15028/31100 [06:46<06:58, 38.39it/s]
 48%|████▊     | 15032/31100 [06:46<06:59, 38.35it/s]
 48%|████▊     | 15036/31100 [06:46<06:57, 38.52it/s]
 48%|████▊     | 15040/31100 [06:46<06:57, 38.45it/s]
 48%|████▊     | 15044/31100 [06:46<07:02, 38.00it/s]
 48%|████▊     | 15048/31100 [06:46<07:13, 37.03it/s]
 48%|████▊     | 15052/31100 [06:47<07:10, 37.29it/s]
 48%|████▊     | 15056/31100 [06:47<07:12, 37.06it/s]
 48%|████▊     | 15060/31100 [06:47<07:07, 37.56it/s]
 48%|████▊     | 15064/31100 [06:47<06:59, 38.22it/s]
 48%|████▊     | 15068/31100 [06:47<07:16, 36.73it/s]
 48%|████▊     | 15072/31100 [06:47<07:07, 37.45it/s]
 48%|████▊     | 15076/31100 [06:47<07:22, 36.21it/s]
 48%|████▊     | 15080/31100 [06:47<07:12, 37

Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:27:21. Total running time: 7min 0s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598     

[2m[36m(_objective pid=16313)[0m  49%|████▉     | 15251/31100 [06:52<06:48, 38.77it/s]
 49%|████▉     | 15255/31100 [06:52<06:49, 38.67it/s]
 49%|████▉     | 15259/31100 [06:52<06:45, 39.04it/s]
 49%|████▉     | 15263/31100 [06:52<06:46, 38.95it/s]
 49%|████▉     | 15267/31100 [06:52<06:47, 38.87it/s]
 49%|████▉     | 15271/31100 [06:52<06:47, 38.85it/s]
 49%|████▉     | 15275/31100 [06:53<06:46, 38.92it/s]
 49%|████▉     | 15279/31100 [06:53<06:55, 38.05it/s]
 49%|████▉     | 15283/31100 [06:53<06:50, 38.53it/s]
 49%|████▉     | 15287/31100 [06:53<06:45, 38.96it/s]
 49%|████▉     | 15291/31100 [06:53<06:44, 39.05it/s]
 49%|████▉     | 15295/31100 [06:53<06:47, 38.80it/s]
 49%|████▉     | 15299/31100 [06:53<06:47, 38.80it/s]
 49%|████▉     | 15303/31100 [06:53<06:46, 38.90it/s]
 49%|████▉     | 15307/31100 [06:53<06:50, 38.46it/s]
 49%|████▉     | 15311/31100 [06:53<06:47, 38.76it/s]
 49%|████▉     | 15315/31100 [06:54<06:47, 38.77it/s]
 49%|████▉     | 15320/31100 [06:54<06:41, 39

[2m[36m(_objective pid=16313)[0m {'loss': 0.106, 'learning_rate': 1.2530965728641614e-05, 'epoch': 2.49}


[2m[36m(_objective pid=16313)[0m  50%|████▉     | 15503/31100 [06:58<06:46, 38.38it/s]
 50%|████▉     | 15507/31100 [06:58<06:46, 38.37it/s]
 50%|████▉     | 15511/31100 [06:59<06:44, 38.56it/s]
 50%|████▉     | 15515/31100 [06:59<06:40, 38.95it/s]
 50%|████▉     | 15520/31100 [06:59<06:35, 39.44it/s]
 50%|████▉     | 15524/31100 [06:59<06:34, 39.52it/s]
 50%|████▉     | 15529/31100 [06:59<06:30, 39.83it/s]
 50%|████▉     | 15533/31100 [06:59<06:48, 38.08it/s]
 50%|████▉     | 15538/31100 [06:59<06:41, 38.76it/s]
 50%|████▉     | 15542/31100 [06:59<06:43, 38.51it/s]
 50%|████▉     | 15546/31100 [06:59<06:42, 38.65it/s]
 50%|█████     | 15550/31100 [07:00<06:41, 38.71it/s]
 50%|█████     | 15554/31100 [07:00<06:42, 38.67it/s]
 50%|█████     | 15558/31100 [07:00<06:39, 38.93it/s]
 50%|█████     | 15562/31100 [07:00<06:46, 38.18it/s]
 50%|█████     | 15566/31100 [07:00<06:41, 38.65it/s]
 50%|█████     | 15571/31100 [07:00<06:36, 39.15it/s]
 50%|█████     | 15575/31100 [07:00<06:43, 38

[2m[36m(_objective pid=16313)[0m {'loss': 0.1177, 'learning_rate': 1.2129332211697972e-05, 'epoch': 2.57}


[2m[36m(_objective pid=16313)[0m  51%|█████▏    | 16008/31100 [07:11<06:35, 38.18it/s]
 51%|█████▏    | 16012/31100 [07:12<06:36, 38.09it/s]
 52%|█████▏    | 16017/31100 [07:12<06:26, 38.99it/s]
 52%|█████▏    | 16022/31100 [07:12<06:21, 39.56it/s]
 52%|█████▏    | 16026/31100 [07:12<06:34, 38.26it/s]
 52%|█████▏    | 16031/31100 [07:12<06:25, 39.08it/s]
 52%|█████▏    | 16036/31100 [07:12<06:22, 39.35it/s]
 52%|█████▏    | 16041/31100 [07:12<06:21, 39.51it/s]
 52%|█████▏    | 16045/31100 [07:12<06:35, 38.11it/s]
 52%|█████▏    | 16050/31100 [07:12<06:27, 38.80it/s]
 52%|█████▏    | 16054/31100 [07:13<06:40, 37.53it/s]
 52%|█████▏    | 16059/31100 [07:13<06:32, 38.29it/s]
 52%|█████▏    | 16064/31100 [07:13<06:24, 39.11it/s]
 52%|█████▏    | 16069/31100 [07:13<06:20, 39.48it/s]
 52%|█████▏    | 16073/31100 [07:13<06:21, 39.39it/s]
 52%|█████▏    | 16078/31100 [07:13<06:18, 39.67it/s]
 52%|█████▏    | 16083/31100 [07:13<06:13, 40.19it/s]
 52%|█████▏    | 16088/31100 [07:13<06:12, 40

Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:27:51. Total running time: 7min 30s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598    

[2m[36m(_objective pid=16313)[0m  53%|█████▎    | 16408/31100 [07:22<06:29, 37.69it/s]
 53%|█████▎    | 16412/31100 [07:22<06:33, 37.30it/s]
 53%|█████▎    | 16416/31100 [07:22<06:30, 37.63it/s]
 53%|█████▎    | 16420/31100 [07:22<06:45, 36.16it/s]
 53%|█████▎    | 16424/31100 [07:22<06:37, 36.94it/s]
 53%|█████▎    | 16428/31100 [07:22<06:36, 36.96it/s]
 53%|█████▎    | 16432/31100 [07:22<06:50, 35.74it/s]
 53%|█████▎    | 16436/31100 [07:23<06:44, 36.27it/s]
 53%|█████▎    | 16440/31100 [07:23<06:34, 37.20it/s]
 53%|█████▎    | 16444/31100 [07:23<06:33, 37.23it/s]
 53%|█████▎    | 16448/31100 [07:23<06:38, 36.73it/s]
 53%|█████▎    | 16453/31100 [07:23<06:24, 38.12it/s]
 53%|█████▎    | 16458/31100 [07:23<06:16, 38.90it/s]
 53%|█████▎    | 16462/31100 [07:23<06:22, 38.22it/s]
 53%|█████▎    | 16467/31100 [07:23<06:15, 38.93it/s]
 53%|█████▎    | 16471/31100 [07:23<06:13, 39.14it/s]
 53%|█████▎    | 16476/31100 [07:24<06:08, 39.66it/s]
 53%|█████▎    | 16480/31100 [07:24<06:09, 39

[2m[36m(_objective pid=16313)[0m {'loss': 0.1139, 'learning_rate': 1.1727698694754332e-05, 'epoch': 2.65}


[2m[36m(_objective pid=16313)[0m  53%|█████▎    | 16506/31100 [07:24<06:09, 39.46it/s]
 53%|█████▎    | 16511/31100 [07:24<06:07, 39.74it/s]
 53%|█████▎    | 16516/31100 [07:25<06:03, 40.08it/s]
 53%|█████▎    | 16521/31100 [07:25<06:01, 40.37it/s]
 53%|█████▎    | 16526/31100 [07:25<05:59, 40.57it/s]
 53%|█████▎    | 16531/31100 [07:25<06:00, 40.36it/s]
 53%|█████▎    | 16536/31100 [07:25<05:59, 40.47it/s]
 53%|█████▎    | 16541/31100 [07:25<06:12, 39.06it/s]
 53%|█████▎    | 16545/31100 [07:25<06:10, 39.29it/s]
 53%|█████▎    | 16549/31100 [07:25<06:11, 39.16it/s]
 53%|█████▎    | 16554/31100 [07:26<06:05, 39.79it/s]
 53%|█████▎    | 16559/31100 [07:26<06:03, 39.98it/s]
 53%|█████▎    | 16564/31100 [07:26<05:59, 40.48it/s]
 53%|█████▎    | 16569/31100 [07:26<05:56, 40.71it/s]
 53%|█████▎    | 16574/31100 [07:26<05:58, 40.57it/s]
 53%|█████▎    | 16579/31100 [07:26<05:59, 40.38it/s]
 53%|█████▎    | 16584/31100 [07:26<06:02, 39.99it/s]
 53%|█████▎    | 16589/31100 [07:26<06:05, 39

[2m[36m(_objective pid=16313)[0m {'loss': 0.0819, 'learning_rate': 1.132606517781069e-05, 'epoch': 2.73}


[2m[36m(_objective pid=16313)[0m  55%|█████▍    | 17004/31100 [07:37<06:13, 37.71it/s]
 55%|█████▍    | 17008/31100 [07:37<06:12, 37.82it/s]
 55%|█████▍    | 17012/31100 [07:37<06:14, 37.64it/s]
 55%|█████▍    | 17016/31100 [07:38<06:22, 36.81it/s]
 55%|█████▍    | 17020/31100 [07:38<06:24, 36.59it/s]
 55%|█████▍    | 17024/31100 [07:38<06:21, 36.93it/s]
 55%|█████▍    | 17028/31100 [07:38<06:17, 37.28it/s]
 55%|█████▍    | 17032/31100 [07:38<06:15, 37.44it/s]
 55%|█████▍    | 17036/31100 [07:38<06:23, 36.63it/s]
 55%|█████▍    | 17040/31100 [07:38<06:17, 37.24it/s]
 55%|█████▍    | 17045/31100 [07:38<06:14, 37.57it/s]
 55%|█████▍    | 17049/31100 [07:38<06:11, 37.86it/s]
 55%|█████▍    | 17053/31100 [07:38<06:09, 38.01it/s]
 55%|█████▍    | 17057/31100 [07:39<06:16, 37.32it/s]
 55%|█████▍    | 17061/31100 [07:39<06:13, 37.59it/s]
 55%|█████▍    | 17065/31100 [07:39<06:08, 38.10it/s]
 55%|█████▍    | 17069/31100 [07:39<06:08, 38.05it/s]
 55%|█████▍    | 17073/31100 [07:39<06:04, 38

[2m[36m(_objective pid=16313)[0m {'loss': 0.1311, 'learning_rate': 1.0924431660867048e-05, 'epoch': 2.81}


 56%|█████▋    | 17507/31100 [07:50<06:10, 36.70it/s]
 56%|█████▋    | 17511/31100 [07:50<06:21, 35.61it/s]
 56%|█████▋    | 17515/31100 [07:50<06:12, 36.52it/s]
 56%|█████▋    | 17519/31100 [07:50<06:03, 37.39it/s]
 56%|█████▋    | 17523/31100 [07:51<06:16, 36.04it/s]
 56%|█████▋    | 17527/31100 [07:51<06:07, 36.97it/s]
 56%|█████▋    | 17531/31100 [07:51<06:13, 36.37it/s]
 56%|█████▋    | 17535/31100 [07:51<06:17, 35.93it/s]
 56%|█████▋    | 17539/31100 [07:51<06:32, 34.51it/s]
 56%|█████▋    | 17543/31100 [07:51<06:18, 35.81it/s]
 56%|█████▋    | 17547/31100 [07:51<06:16, 35.98it/s]
 56%|█████▋    | 17551/31100 [07:51<06:07, 36.88it/s]
 56%|█████▋    | 17555/31100 [07:51<06:16, 35.98it/s]
 56%|█████▋    | 17559/31100 [07:52<06:08, 36.72it/s]
 56%|█████▋    | 17563/31100 [07:52<06:02, 37.30it/s]
 56%|█████▋    | 17567/31100 [07:52<05:56, 37.94it/s]
 56%|█████▋    | 17571/31100 [07:52<05:53, 38.22it/s]


Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:28:21. Total running time: 8min 0s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598     

[2m[36m(_objective pid=16313)[0m  57%|█████▋    | 17575/31100 [07:52<05:50, 38.57it/s]
 57%|█████▋    | 17579/31100 [07:52<05:52, 38.35it/s]
 57%|█████▋    | 17583/31100 [07:52<06:01, 37.44it/s]
 57%|█████▋    | 17587/31100 [07:52<06:00, 37.52it/s]
 57%|█████▋    | 17591/31100 [07:52<05:56, 37.88it/s]
 57%|█████▋    | 17595/31100 [07:53<06:04, 37.04it/s]
 57%|█████▋    | 17599/31100 [07:53<06:17, 35.77it/s]
 57%|█████▋    | 17603/31100 [07:53<06:10, 36.44it/s]
 57%|█████▋    | 17607/31100 [07:53<06:04, 36.98it/s]
 57%|█████▋    | 17611/31100 [07:53<06:30, 34.53it/s]
 57%|█████▋    | 17615/31100 [07:53<06:29, 34.59it/s]
 57%|█████▋    | 17619/31100 [07:53<06:20, 35.42it/s]
 57%|█████▋    | 17623/31100 [07:53<06:11, 36.27it/s]
 57%|█████▋    | 17627/31100 [07:53<06:15, 35.90it/s]
 57%|█████▋    | 17631/31100 [07:54<06:08, 36.54it/s]
 57%|█████▋    | 17635/31100 [07:54<06:23, 35.09it/s]
 57%|█████▋    | 17639/31100 [07:54<06:33, 34.21it/s]
 57%|█████▋    | 17643/31100 [07:54<06:17, 35

[2m[36m(_objective pid=16313)[0m {'loss': 0.1342, 'learning_rate': 1.0522798143923408e-05, 'epoch': 2.89}


[2m[36m(_objective pid=16313)[0m  58%|█████▊    | 18007/31100 [08:03<05:51, 37.29it/s]
 58%|█████▊    | 18011/31100 [08:04<05:48, 37.57it/s]
 58%|█████▊    | 18016/31100 [08:04<05:39, 38.51it/s]
 58%|█████▊    | 18020/31100 [08:04<05:53, 37.05it/s]
 58%|█████▊    | 18024/31100 [08:04<05:46, 37.74it/s]
 58%|█████▊    | 18028/31100 [08:04<05:40, 38.35it/s]
 58%|█████▊    | 18032/31100 [08:04<05:36, 38.79it/s]
 58%|█████▊    | 18036/31100 [08:04<05:37, 38.69it/s]
 58%|█████▊    | 18040/31100 [08:04<05:35, 38.92it/s]
 58%|█████▊    | 18044/31100 [08:04<05:36, 38.78it/s]
 58%|█████▊    | 18048/31100 [08:04<05:37, 38.68it/s]
 58%|█████▊    | 18052/31100 [08:05<05:35, 38.93it/s]
 58%|█████▊    | 18056/31100 [08:05<05:33, 39.15it/s]
 58%|█████▊    | 18060/31100 [08:05<05:37, 38.60it/s]
 58%|█████▊    | 18064/31100 [08:05<05:37, 38.58it/s]
 58%|█████▊    | 18068/31100 [08:05<05:37, 38.58it/s]
 58%|█████▊    | 18072/31100 [08:05<05:34, 38.90it/s]
 58%|█████▊    | 18076/31100 [08:05<05:33, 39

[2m[36m(_objective pid=16313)[0m {'loss': 0.1104, 'learning_rate': 1.0121164626979766e-05, 'epoch': 2.97}


[2m[36m(_objective pid=16313)[0m  60%|█████▉    | 18509/31100 [08:17<05:20, 39.27it/s]
 60%|█████▉    | 18513/31100 [08:17<05:20, 39.24it/s]
 60%|█████▉    | 18517/31100 [08:17<05:21, 39.18it/s]
 60%|█████▉    | 18522/31100 [08:17<05:17, 39.67it/s]
 60%|█████▉    | 18527/31100 [08:17<05:15, 39.80it/s]
 60%|█████▉    | 18531/31100 [08:17<05:17, 39.65it/s]
 60%|█████▉    | 18535/31100 [08:17<05:25, 38.63it/s]
 60%|█████▉    | 18539/31100 [08:17<05:24, 38.74it/s]
 60%|█████▉    | 18543/31100 [08:17<05:22, 38.88it/s]
 60%|█████▉    | 18547/31100 [08:17<05:24, 38.67it/s]
 60%|█████▉    | 18551/31100 [08:18<05:31, 37.80it/s]
 60%|█████▉    | 18555/31100 [08:18<05:47, 36.10it/s]
 60%|█████▉    | 18560/31100 [08:18<05:34, 37.52it/s]
 60%|█████▉    | 18565/31100 [08:18<05:26, 38.41it/s]
 60%|█████▉    | 18569/31100 [08:18<05:23, 38.70it/s]
 60%|█████▉    | 18573/31100 [08:18<05:22, 38.78it/s]
 60%|█████▉    | 18577/31100 [08:18<05:25, 38.46it/s]
 60%|█████▉    | 18582/31100 [08:18<05:21, 38

Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:28:51. Total running time: 8min 30s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598    

[2m[36m(_objective pid=16313)[0m 
[2m[36m(_objective pid=16313)[0m  15%|█▍        | 19/130 [00:01<00:09, 11.57it/s][A
[2m[36m(_objective pid=16313)[0m 
 16%|█▌        | 21/130 [00:01<00:08, 13.07it/s][A
[2m[36m(_objective pid=16313)[0m 
 18%|█▊        | 23/130 [00:01<00:09, 11.88it/s][A
[2m[36m(_objective pid=16313)[0m 
 20%|██        | 26/130 [00:01<00:07, 13.89it/s][A
[2m[36m(_objective pid=16313)[0m 
 22%|██▏       | 29/130 [00:02<00:06, 15.91it/s][A
[2m[36m(_objective pid=16313)[0m 
 24%|██▍       | 31/130 [00:02<00:06, 15.81it/s][A
[2m[36m(_objective pid=16313)[0m 
 26%|██▌       | 34/130 [00:02<00:05, 16.14it/s][A
[2m[36m(_objective pid=16313)[0m 
 28%|██▊       | 36/130 [00:02<00:05, 16.11it/s][A
[2m[36m(_objective pid=16313)[0m 
 29%|██▉       | 38/130 [00:02<00:05, 15.78it/s][A
[2m[36m(_objective pid=16313)[0m 
 31%|███       | 40/130 [00:02<00:06, 13.85it/s][A
[2m[36m(_objective pid=16313)[0m 
 32%|███▏      | 42/130 [00:02<00:06

Trial _objective_f556c_00000 finished iteration 3 at 2023-09-11 13:29:00. Total running time: 8min 38s
+-------------------------------------------------+
| Trial _objective_f556c_00000 result             |
+-------------------------------------------------+
| time_this_iter_s                        172.581 |
| time_total_s                             512.72 |
| training_iteration                            3 |
| epoch                                         3 |
| eval_loss                               0.22816 |
| eval_runtime                             9.5495 |
| eval_samples_per_second                 434.264 |
| eval_steps_per_second                    13.613 |
| objective                               0.22816 |
+-------------------------------------------------+

[2m[36m(_objective pid=16313)[0m {'eval_loss': 0.2281644493341446, 'eval_runtime': 9.5495, 'eval_samples_per_second': 434.264, 'eval_steps_per_second': 13.613, 'epoch': 3.0}


[2m[36m(_objective pid=16313)[0m                                                      
[2m[36m(_objective pid=16313)[0m                                                  [A 60%|██████    | 18660/31100 [08:30<05:18, 39.08it/s]
[2m[36m(_objective pid=16313)[0m 100%|██████████| 130/130 [00:09<00:00, 12.01it/s][A
                                                 [A
 60%|██████    | 18664/31100 [08:31<2:45:49,  1.25it/s]
 60%|██████    | 18668/31100 [08:31<1:59:10,  1.74it/s]
 60%|██████    | 18672/31100 [08:32<1:26:00,  2.41it/s]
 60%|██████    | 18676/31100 [08:32<1:02:11,  3.33it/s]
 60%|██████    | 18680/31100 [08:32<45:18,  4.57it/s]  
 60%|██████    | 18684/31100 [08:32<33:23,  6.20it/s]
 60%|██████    | 18688/31100 [08:32<25:03,  8.25it/s]
 60%|██████    | 18692/31100 [08:32<19:16, 10.73it/s]
 60%|██████    | 18696/31100 [08:32<15:04, 13.71it/s]
 60%|██████    | 18701/31100 [08:32<11:36, 17.80it/s]
 60%|██████    | 18705/31100 [08:32<09:52, 20.91it/s]
 60%|██████    | 

[2m[36m(_objective pid=16313)[0m {'loss': 0.0727, 'learning_rate': 9.719531110036125e-06, 'epoch': 3.05}


[2m[36m(_objective pid=16313)[0m  61%|██████    | 19004/31100 [08:40<05:15, 38.35it/s]
 61%|██████    | 19008/31100 [08:40<05:23, 37.40it/s]
 61%|██████    | 19012/31100 [08:40<05:26, 37.03it/s]
 61%|██████    | 19016/31100 [08:40<05:26, 37.07it/s]
 61%|██████    | 19020/31100 [08:41<05:26, 37.04it/s]
 61%|██████    | 19024/31100 [08:41<05:32, 36.27it/s]
 61%|██████    | 19028/31100 [08:41<05:39, 35.61it/s]
 61%|██████    | 19032/31100 [08:41<05:35, 35.95it/s]
 61%|██████    | 19036/31100 [08:41<05:31, 36.37it/s]
 61%|██████    | 19040/31100 [08:41<05:38, 35.63it/s]
 61%|██████    | 19044/31100 [08:41<05:33, 36.14it/s]
 61%|██████    | 19048/31100 [08:41<05:32, 36.26it/s]
 61%|██████▏   | 19052/31100 [08:41<05:28, 36.67it/s]
 61%|██████▏   | 19056/31100 [08:42<05:34, 36.05it/s]
 61%|██████▏   | 19060/31100 [08:42<05:31, 36.29it/s]
 61%|██████▏   | 19064/31100 [08:42<05:28, 36.66it/s]
 61%|██████▏   | 19068/31100 [08:42<05:28, 36.60it/s]
 61%|██████▏   | 19072/31100 [08:42<05:30, 36

Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:29:22. Total running time: 9min 0s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598     

[2m[36m(_objective pid=16313)[0m  63%|██████▎   | 19460/31100 [08:52<04:51, 39.91it/s]
 63%|██████▎   | 19465/31100 [08:52<04:49, 40.15it/s]
 63%|██████▎   | 19470/31100 [08:52<04:50, 40.06it/s]
 63%|██████▎   | 19475/31100 [08:52<04:51, 39.84it/s]
 63%|██████▎   | 19480/31100 [08:53<04:51, 39.86it/s]
 63%|██████▎   | 19484/31100 [08:53<05:04, 38.13it/s]
 63%|██████▎   | 19488/31100 [08:53<05:01, 38.49it/s]
 63%|██████▎   | 19493/31100 [08:53<05:09, 37.45it/s]
 63%|██████▎   | 19497/31100 [08:53<05:07, 37.74it/s]
 63%|██████▎   | 19501/31100 [08:53<05:06, 37.87it/s]


[2m[36m(_objective pid=16313)[0m {'loss': 0.1039, 'learning_rate': 9.317897593092483e-06, 'epoch': 3.14}


[2m[36m(_objective pid=16313)[0m  63%|██████▎   | 19505/31100 [08:53<05:05, 37.91it/s]
 63%|██████▎   | 19509/31100 [08:53<05:04, 38.03it/s]
 63%|██████▎   | 19513/31100 [08:53<05:02, 38.26it/s]
 63%|██████▎   | 19517/31100 [08:54<05:12, 37.11it/s]
 63%|██████▎   | 19521/31100 [08:54<05:15, 36.66it/s]
 63%|██████▎   | 19525/31100 [08:54<05:21, 36.06it/s]
 63%|██████▎   | 19529/31100 [08:54<05:14, 36.75it/s]
 63%|██████▎   | 19533/31100 [08:54<05:16, 36.57it/s]
 63%|██████▎   | 19537/31100 [08:54<05:09, 37.38it/s]
 63%|██████▎   | 19541/31100 [08:54<05:11, 37.09it/s]
 63%|██████▎   | 19546/31100 [08:54<05:02, 38.21it/s]
 63%|██████▎   | 19551/31100 [08:54<04:57, 38.83it/s]
 63%|██████▎   | 19555/31100 [08:55<05:01, 38.30it/s]
 63%|██████▎   | 19559/31100 [08:55<04:59, 38.54it/s]
 63%|██████▎   | 19564/31100 [08:55<04:56, 38.96it/s]
 63%|██████▎   | 19568/31100 [08:55<05:09, 37.25it/s]
 63%|██████▎   | 19572/31100 [08:55<05:04, 37.89it/s]
 63%|██████▎   | 19576/31100 [08:55<05:00, 38

[2m[36m(_objective pid=16313)[0m {'loss': 0.1002, 'learning_rate': 8.91626407614884e-06, 'epoch': 3.22}


[2m[36m(_objective pid=16313)[0m  64%|██████▍   | 20003/31100 [09:06<04:46, 38.70it/s]
 64%|██████▍   | 20007/31100 [09:06<04:44, 39.05it/s]
 64%|██████▍   | 20011/31100 [09:06<04:50, 38.17it/s]
 64%|██████▍   | 20015/31100 [09:07<05:01, 36.74it/s]
 64%|██████▍   | 20019/31100 [09:07<04:57, 37.22it/s]
 64%|██████▍   | 20023/31100 [09:07<04:58, 37.10it/s]
 64%|██████▍   | 20027/31100 [09:07<04:56, 37.35it/s]
 64%|██████▍   | 20031/31100 [09:07<05:07, 36.05it/s]
 64%|██████▍   | 20035/31100 [09:07<05:03, 36.48it/s]
 64%|██████▍   | 20039/31100 [09:07<05:00, 36.86it/s]
 64%|██████▍   | 20043/31100 [09:07<04:55, 37.47it/s]
 64%|██████▍   | 20047/31100 [09:07<04:52, 37.74it/s]
 64%|██████▍   | 20051/31100 [09:08<04:51, 37.94it/s]
 64%|██████▍   | 20055/31100 [09:08<05:03, 36.36it/s]
 64%|██████▍   | 20059/31100 [09:08<04:57, 37.15it/s]
 65%|██████▍   | 20064/31100 [09:08<04:48, 38.21it/s]
 65%|██████▍   | 20068/31100 [09:08<04:49, 38.13it/s]
 65%|██████▍   | 20072/31100 [09:08<04:45, 38

[2m[36m(_objective pid=16313)[0m {'loss': 0.0689, 'learning_rate': 8.514630559205199e-06, 'epoch': 3.3}


[2m[36m(_objective pid=16313)[0m  66%|██████▌   | 20509/31100 [09:19<04:42, 37.53it/s]
 66%|██████▌   | 20513/31100 [09:20<04:42, 37.47it/s]
 66%|██████▌   | 20517/31100 [09:20<04:40, 37.69it/s]
 66%|██████▌   | 20521/31100 [09:20<04:40, 37.74it/s]
 66%|██████▌   | 20526/31100 [09:20<04:31, 38.90it/s]
 66%|██████▌   | 20530/31100 [09:20<04:29, 39.17it/s]
 66%|██████▌   | 20535/31100 [09:20<04:27, 39.55it/s]
 66%|██████▌   | 20540/31100 [09:20<04:25, 39.81it/s]
 66%|██████▌   | 20544/31100 [09:20<04:26, 39.68it/s]
 66%|██████▌   | 20549/31100 [09:20<04:23, 40.03it/s]
 66%|██████▌   | 20553/31100 [09:21<04:23, 40.02it/s]
 66%|██████▌   | 20557/31100 [09:21<04:26, 39.63it/s]
 66%|██████▌   | 20562/31100 [09:21<04:23, 39.93it/s]
 66%|██████▌   | 20566/31100 [09:21<04:27, 39.36it/s]
 66%|██████▌   | 20570/31100 [09:21<04:26, 39.47it/s]
 66%|██████▌   | 20574/31100 [09:21<04:26, 39.53it/s]
 66%|██████▌   | 20578/31100 [09:21<04:27, 39.33it/s]
 66%|██████▌   | 20582/31100 [09:21<04:27, 39

Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:29:52. Total running time: 9min 30s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598    

[2m[36m(_objective pid=16313)[0m  66%|██████▋   | 20610/31100 [09:22<04:28, 39.00it/s]
 66%|██████▋   | 20614/31100 [09:22<04:33, 38.39it/s]
 66%|██████▋   | 20618/31100 [09:22<04:32, 38.45it/s]
 66%|██████▋   | 20623/31100 [09:22<04:29, 38.91it/s]
 66%|██████▋   | 20628/31100 [09:23<04:26, 39.33it/s]
 66%|██████▋   | 20633/31100 [09:23<04:24, 39.55it/s]
 66%|██████▋   | 20638/31100 [09:23<04:23, 39.74it/s]
 66%|██████▋   | 20642/31100 [09:23<04:22, 39.78it/s]
 66%|██████▋   | 20647/31100 [09:23<04:21, 39.95it/s]
 66%|██████▋   | 20651/31100 [09:23<04:21, 39.94it/s]
 66%|██████▋   | 20655/31100 [09:23<04:23, 39.69it/s]
 66%|██████▋   | 20660/31100 [09:23<04:22, 39.78it/s]
 66%|██████▋   | 20664/31100 [09:23<04:22, 39.74it/s]
 66%|██████▋   | 20668/31100 [09:24<04:22, 39.79it/s]
 66%|██████▋   | 20672/31100 [09:24<04:24, 39.45it/s]
 66%|██████▋   | 20677/31100 [09:24<04:21, 39.82it/s]
 66%|██████▋   | 20681/31100 [09:24<04:26, 39.07it/s]
 67%|██████▋   | 20685/31100 [09:24<04:30, 38

[2m[36m(_objective pid=16313)[0m {'loss': 0.0918, 'learning_rate': 8.112997042261557e-06, 'epoch': 3.38}


[2m[36m(_objective pid=16313)[0m  68%|██████▊   | 21010/31100 [09:33<04:31, 37.17it/s]
 68%|██████▊   | 21014/31100 [09:33<04:29, 37.36it/s]
 68%|██████▊   | 21018/31100 [09:33<04:24, 38.09it/s]
 68%|██████▊   | 21022/31100 [09:33<04:20, 38.61it/s]
 68%|██████▊   | 21026/31100 [09:33<04:25, 37.91it/s]
 68%|██████▊   | 21030/31100 [09:33<04:24, 38.05it/s]
 68%|██████▊   | 21035/31100 [09:33<04:17, 39.04it/s]
 68%|██████▊   | 21040/31100 [09:33<04:16, 39.18it/s]
 68%|██████▊   | 21045/31100 [09:33<04:13, 39.74it/s]
 68%|██████▊   | 21049/31100 [09:34<04:13, 39.61it/s]
 68%|██████▊   | 21054/31100 [09:34<04:11, 39.91it/s]
 68%|██████▊   | 21058/31100 [09:34<04:12, 39.84it/s]
 68%|██████▊   | 21062/31100 [09:34<04:14, 39.40it/s]
 68%|██████▊   | 21066/31100 [09:34<04:15, 39.32it/s]
 68%|██████▊   | 21070/31100 [09:34<04:19, 38.61it/s]
 68%|██████▊   | 21074/31100 [09:34<04:23, 38.04it/s]
 68%|██████▊   | 21078/31100 [09:34<04:24, 37.86it/s]
 68%|██████▊   | 21082/31100 [09:34<04:22, 38

[2m[36m(_objective pid=16313)[0m {'loss': 0.0705, 'learning_rate': 7.711363525317917e-06, 'epoch': 3.46}


[2m[36m(_objective pid=16313)[0m  69%|██████▉   | 21505/31100 [09:45<04:42, 33.94it/s]
 69%|██████▉   | 21509/31100 [09:46<04:51, 32.88it/s]
 69%|██████▉   | 21513/31100 [09:46<04:44, 33.67it/s]
 69%|██████▉   | 21517/31100 [09:46<05:02, 31.70it/s]
 69%|██████▉   | 21521/31100 [09:46<04:49, 33.07it/s]
 69%|██████▉   | 21525/31100 [09:46<04:37, 34.45it/s]
 69%|██████▉   | 21529/31100 [09:46<04:30, 35.32it/s]
 69%|██████▉   | 21533/31100 [09:46<04:25, 36.02it/s]
 69%|██████▉   | 21537/31100 [09:46<04:19, 36.89it/s]
 69%|██████▉   | 21541/31100 [09:46<04:13, 37.64it/s]
 69%|██████▉   | 21545/31100 [09:47<04:14, 37.47it/s]
 69%|██████▉   | 21549/31100 [09:47<04:12, 37.87it/s]
 69%|██████▉   | 21553/31100 [09:47<04:13, 37.69it/s]
 69%|██████▉   | 21557/31100 [09:47<04:14, 37.44it/s]
 69%|██████▉   | 21561/31100 [09:47<04:18, 36.91it/s]
 69%|██████▉   | 21565/31100 [09:47<04:13, 37.60it/s]
 69%|██████▉   | 21569/31100 [09:47<04:16, 37.15it/s]
 69%|██████▉   | 21573/31100 [09:47<04:11, 37

Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:30:22. Total running time: 10min 0s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598    

[2m[36m(_objective pid=16313)[0m  70%|██████▉   | 21756/31100 [09:52<03:57, 39.41it/s]
 70%|██████▉   | 21760/31100 [09:52<03:58, 39.24it/s]
 70%|██████▉   | 21765/31100 [09:52<03:55, 39.66it/s]
 70%|██████▉   | 21769/31100 [09:53<04:02, 38.55it/s]
 70%|███████   | 21773/31100 [09:53<04:01, 38.67it/s]
 70%|███████   | 21778/31100 [09:53<03:58, 39.16it/s]
 70%|███████   | 21783/31100 [09:53<03:55, 39.63it/s]
 70%|███████   | 21787/31100 [09:53<03:55, 39.60it/s]
 70%|███████   | 21792/31100 [09:53<03:54, 39.76it/s]
 70%|███████   | 21797/31100 [09:53<03:52, 39.99it/s]
 70%|███████   | 21801/31100 [09:53<03:53, 39.86it/s]
 70%|███████   | 21805/31100 [09:53<03:53, 39.72it/s]
 70%|███████   | 21809/31100 [09:54<03:53, 39.77it/s]
 70%|███████   | 21813/31100 [09:54<03:55, 39.47it/s]
 70%|███████   | 21818/31100 [09:54<03:52, 39.88it/s]
 70%|███████   | 21822/31100 [09:54<04:08, 37.38it/s]
 70%|███████   | 21826/31100 [09:54<04:03, 38.09it/s]
 70%|███████   | 21830/31100 [09:54<04:05, 37

[2m[36m(_objective pid=16313)[0m {'loss': 0.0815, 'learning_rate': 7.309730008374276e-06, 'epoch': 3.54}


[2m[36m(_objective pid=16313)[0m  71%|███████   | 22003/31100 [09:59<03:52, 39.11it/s]
 71%|███████   | 22007/31100 [09:59<03:55, 38.58it/s]
 71%|███████   | 22011/31100 [09:59<03:56, 38.38it/s]
 71%|███████   | 22015/31100 [09:59<03:58, 38.02it/s]
 71%|███████   | 22019/31100 [09:59<03:59, 37.99it/s]
 71%|███████   | 22023/31100 [09:59<04:02, 37.39it/s]
 71%|███████   | 22027/31100 [09:59<04:00, 37.66it/s]
 71%|███████   | 22031/31100 [09:59<03:59, 37.83it/s]
 71%|███████   | 22035/31100 [09:59<03:56, 38.37it/s]
 71%|███████   | 22039/31100 [09:59<03:55, 38.44it/s]
 71%|███████   | 22043/31100 [10:00<03:55, 38.46it/s]
 71%|███████   | 22047/31100 [10:00<03:59, 37.82it/s]
 71%|███████   | 22051/31100 [10:00<03:56, 38.26it/s]
 71%|███████   | 22055/31100 [10:00<04:10, 36.16it/s]
 71%|███████   | 22059/31100 [10:00<04:15, 35.45it/s]
 71%|███████   | 22063/31100 [10:00<04:12, 35.74it/s]
 71%|███████   | 22068/31100 [10:00<04:04, 36.96it/s]
 71%|███████   | 22072/31100 [10:00<04:01, 37

[2m[36m(_objective pid=16313)[0m {'loss': 0.0666, 'learning_rate': 6.908096491430634e-06, 'epoch': 3.62}


[2m[36m(_objective pid=16313)[0m  72%|███████▏  | 22505/31100 [10:12<03:48, 37.61it/s]
 72%|███████▏  | 22510/31100 [10:12<03:43, 38.48it/s]
 72%|███████▏  | 22514/31100 [10:12<03:48, 37.65it/s]
 72%|███████▏  | 22518/31100 [10:12<03:46, 37.89it/s]
 72%|███████▏  | 22522/31100 [10:12<03:45, 38.03it/s]
 72%|███████▏  | 22526/31100 [10:12<03:43, 38.43it/s]
 72%|███████▏  | 22531/31100 [10:12<03:40, 38.86it/s]
 72%|███████▏  | 22536/31100 [10:12<03:36, 39.50it/s]
 72%|███████▏  | 22541/31100 [10:13<03:32, 40.26it/s]
 72%|███████▏  | 22546/31100 [10:13<03:36, 39.53it/s]
 73%|███████▎  | 22551/31100 [10:13<03:33, 40.05it/s]
 73%|███████▎  | 22556/31100 [10:13<03:33, 40.05it/s]
 73%|███████▎  | 22561/31100 [10:13<03:37, 39.28it/s]
 73%|███████▎  | 22566/31100 [10:13<03:34, 39.82it/s]
 73%|███████▎  | 22570/31100 [10:13<03:36, 39.46it/s]
 73%|███████▎  | 22574/31100 [10:13<03:39, 38.90it/s]
 73%|███████▎  | 22578/31100 [10:14<03:39, 38.80it/s]
 73%|███████▎  | 22582/31100 [10:14<03:38, 39

Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:30:52. Total running time: 10min 30s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598   

[2m[36m(_objective pid=16313)[0m  74%|███████▎  | 22907/31100 [10:22<03:36, 37.91it/s]
 74%|███████▎  | 22912/31100 [10:22<03:31, 38.72it/s]
 74%|███████▎  | 22917/31100 [10:22<03:28, 39.22it/s]
 74%|███████▎  | 22922/31100 [10:23<03:26, 39.57it/s]
 74%|███████▎  | 22926/31100 [10:23<03:27, 39.42it/s]
 74%|███████▎  | 22931/31100 [10:23<03:25, 39.75it/s]
 74%|███████▎  | 22935/31100 [10:23<03:26, 39.61it/s]
 74%|███████▍  | 22940/31100 [10:23<03:24, 39.82it/s]
 74%|███████▍  | 22944/31100 [10:23<03:35, 37.80it/s]
 74%|███████▍  | 22948/31100 [10:23<03:33, 38.21it/s]
 74%|███████▍  | 22952/31100 [10:23<03:39, 37.13it/s]
 74%|███████▍  | 22957/31100 [10:23<03:34, 38.02it/s]
 74%|███████▍  | 22961/31100 [10:24<03:31, 38.39it/s]
 74%|███████▍  | 22966/31100 [10:24<03:30, 38.62it/s]
 74%|███████▍  | 22971/31100 [10:24<03:27, 39.10it/s]
 74%|███████▍  | 22976/31100 [10:24<03:24, 39.67it/s]
 74%|███████▍  | 22980/31100 [10:24<03:25, 39.45it/s]
 74%|███████▍  | 22985/31100 [10:24<03:23, 39

[2m[36m(_objective pid=16313)[0m {'loss': 0.0879, 'learning_rate': 6.506462974486992e-06, 'epoch': 3.7}


[2m[36m(_objective pid=16313)[0m  74%|███████▍  | 23002/31100 [10:25<03:27, 39.12it/s]
 74%|███████▍  | 23006/31100 [10:25<03:26, 39.19it/s]
 74%|███████▍  | 23010/31100 [10:25<03:25, 39.32it/s]
 74%|███████▍  | 23014/31100 [10:25<03:25, 39.35it/s]
 74%|███████▍  | 23019/31100 [10:25<03:24, 39.53it/s]
 74%|███████▍  | 23024/31100 [10:25<03:23, 39.78it/s]
 74%|███████▍  | 23029/31100 [10:25<03:21, 40.06it/s]
 74%|███████▍  | 23034/31100 [10:25<03:22, 39.83it/s]
 74%|███████▍  | 23038/31100 [10:26<03:34, 37.62it/s]
 74%|███████▍  | 23042/31100 [10:26<03:31, 38.18it/s]
 74%|███████▍  | 23046/31100 [10:26<03:30, 38.31it/s]
 74%|███████▍  | 23050/31100 [10:26<03:28, 38.57it/s]
 74%|███████▍  | 23055/31100 [10:26<03:26, 39.02it/s]
 74%|███████▍  | 23059/31100 [10:26<03:24, 39.24it/s]
 74%|███████▍  | 23063/31100 [10:26<03:23, 39.43it/s]
 74%|███████▍  | 23067/31100 [10:26<03:23, 39.45it/s]
 74%|███████▍  | 23072/31100 [10:26<03:21, 39.82it/s]
 74%|███████▍  | 23076/31100 [10:27<03:22, 39

[2m[36m(_objective pid=16313)[0m {'loss': 0.0831, 'learning_rate': 6.1048294575433505e-06, 'epoch': 3.78}


[2m[36m(_objective pid=16313)[0m  76%|███████▌  | 23507/31100 [10:38<03:22, 37.45it/s]
 76%|███████▌  | 23511/31100 [10:38<03:29, 36.16it/s]
 76%|███████▌  | 23515/31100 [10:38<03:24, 37.11it/s]
 76%|███████▌  | 23519/31100 [10:38<03:21, 37.64it/s]
 76%|███████▌  | 23524/31100 [10:39<03:15, 38.74it/s]
 76%|███████▌  | 23528/31100 [10:39<03:14, 38.97it/s]
 76%|███████▌  | 23532/31100 [10:39<03:13, 39.19it/s]
 76%|███████▌  | 23536/31100 [10:39<03:13, 39.02it/s]
 76%|███████▌  | 23540/31100 [10:39<03:15, 38.63it/s]
 76%|███████▌  | 23544/31100 [10:39<03:16, 38.49it/s]
 76%|███████▌  | 23548/31100 [10:39<03:14, 38.81it/s]
 76%|███████▌  | 23553/31100 [10:39<03:12, 39.26it/s]
 76%|███████▌  | 23557/31100 [10:39<03:18, 37.99it/s]
 76%|███████▌  | 23561/31100 [10:39<03:15, 38.52it/s]
 76%|███████▌  | 23566/31100 [10:40<03:11, 39.37it/s]
 76%|███████▌  | 23571/31100 [10:40<03:09, 39.70it/s]
 76%|███████▌  | 23575/31100 [10:40<03:11, 39.28it/s]
 76%|███████▌  | 23579/31100 [10:40<03:15, 38

[2m[36m(_objective pid=16313)[0m {'loss': 0.0668, 'learning_rate': 5.7031959405997095e-06, 'epoch': 3.86}


[2m[36m(_objective pid=16313)[0m  77%|███████▋  | 24009/31100 [10:51<03:13, 36.66it/s]
 77%|███████▋  | 24013/31100 [10:51<03:12, 36.75it/s]
 77%|███████▋  | 24017/31100 [10:52<03:21, 35.22it/s]
 77%|███████▋  | 24021/31100 [10:52<03:16, 35.96it/s]
 77%|███████▋  | 24025/31100 [10:52<03:12, 36.79it/s]
 77%|███████▋  | 24029/31100 [10:52<03:08, 37.56it/s]
 77%|███████▋  | 24034/31100 [10:52<03:03, 38.59it/s]


Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:31:22. Total running time: 11min 1s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598    

[2m[36m(_objective pid=16313)[0m  77%|███████▋  | 24039/31100 [10:52<02:59, 39.26it/s]
 77%|███████▋  | 24044/31100 [10:52<02:58, 39.49it/s]
 77%|███████▋  | 24048/31100 [10:52<03:04, 38.21it/s]
 77%|███████▋  | 24053/31100 [10:52<03:01, 38.90it/s]
 77%|███████▋  | 24058/31100 [10:53<02:57, 39.58it/s]
 77%|███████▋  | 24062/31100 [10:53<03:03, 38.31it/s]
 77%|███████▋  | 24067/31100 [10:53<03:00, 38.87it/s]
 77%|███████▋  | 24072/31100 [10:53<02:57, 39.68it/s]
 77%|███████▋  | 24077/31100 [10:53<02:55, 40.11it/s]
 77%|███████▋  | 24082/31100 [10:53<02:54, 40.18it/s]
 77%|███████▋  | 24087/31100 [10:53<02:54, 40.24it/s]
 77%|███████▋  | 24092/31100 [10:53<02:52, 40.51it/s]
 77%|███████▋  | 24097/31100 [10:54<02:53, 40.37it/s]
 77%|███████▋  | 24102/31100 [10:54<02:52, 40.52it/s]
 78%|███████▊  | 24107/31100 [10:54<02:52, 40.51it/s]
 78%|███████▊  | 24112/31100 [10:54<02:50, 40.93it/s]
 78%|███████▊  | 24117/31100 [10:54<02:54, 40.06it/s]
 78%|███████▊  | 24122/31100 [10:54<02:55, 39

[2m[36m(_objective pid=16313)[0m {'loss': 0.0689, 'learning_rate': 5.301562423656068e-06, 'epoch': 3.94}


[2m[36m(_objective pid=16313)[0m  79%|███████▉  | 24510/31100 [11:04<02:57, 37.20it/s]
 79%|███████▉  | 24514/31100 [11:04<02:56, 37.23it/s]
 79%|███████▉  | 24518/31100 [11:04<02:54, 37.75it/s]
 79%|███████▉  | 24522/31100 [11:04<02:52, 38.14it/s]
 79%|███████▉  | 24526/31100 [11:04<02:50, 38.63it/s]
 79%|███████▉  | 24530/31100 [11:05<02:56, 37.31it/s]
 79%|███████▉  | 24534/31100 [11:05<03:00, 36.48it/s]
 79%|███████▉  | 24538/31100 [11:05<03:00, 36.44it/s]
 79%|███████▉  | 24542/31100 [11:05<03:04, 35.56it/s]
 79%|███████▉  | 24546/31100 [11:05<03:05, 35.26it/s]
 79%|███████▉  | 24550/31100 [11:05<03:03, 35.64it/s]
 79%|███████▉  | 24554/31100 [11:05<03:03, 35.64it/s]
 79%|███████▉  | 24558/31100 [11:05<03:01, 36.02it/s]
 79%|███████▉  | 24562/31100 [11:05<02:57, 36.83it/s]
 79%|███████▉  | 24566/31100 [11:06<02:55, 37.18it/s]
 79%|███████▉  | 24570/31100 [11:06<02:58, 36.63it/s]
 79%|███████▉  | 24574/31100 [11:06<03:01, 35.99it/s]
 79%|███████▉  | 24578/31100 [11:06<02:58, 36

Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:31:52. Total running time: 11min 31s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598   

[2m[36m(_objective pid=16313)[0m 
 92%|█████████▏| 120/130 [00:08<00:00, 14.35it/s][A
[2m[36m(_objective pid=16313)[0m 
 94%|█████████▍| 122/130 [00:08<00:00, 13.77it/s][A
[2m[36m(_objective pid=16313)[0m 
 95%|█████████▌| 124/130 [00:08<00:00, 13.22it/s][A
[2m[36m(_objective pid=16313)[0m 
 98%|█████████▊| 127/130 [00:09<00:00, 13.17it/s][A
[2m[36m(_objective pid=16313)[0m 
 99%|█████████▉| 129/130 [00:09<00:00, 12.00it/s][A


Trial _objective_f556c_00000 finished iteration 4 at 2023-09-11 13:31:53. Total running time: 11min 32s
+-------------------------------------------------+
| Trial _objective_f556c_00000 result             |
+-------------------------------------------------+
| time_this_iter_s                        173.214 |
| time_total_s                            685.934 |
| training_iteration                            4 |
| epoch                                         4 |
| eval_loss                               0.27541 |
| eval_runtime                             9.5524 |
| eval_samples_per_second                 434.131 |
| eval_steps_per_second                    13.609 |
| objective                               0.27541 |
+-------------------------------------------------+

[2m[36m(_objective pid=16313)[0m {'eval_loss': 0.2754107117652893, 'eval_runtime': 9.5524, 'eval_samples_per_second': 434.131, 'eval_steps_per_second': 13.609, 'epoch': 4.0}


[2m[36m(_objective pid=16313)[0m                                                      
[2m[36m(_objective pid=16313)[0m                                                  [A 80%|████████  | 24880/31100 [11:23<02:32, 40.66it/s]
[2m[36m(_objective pid=16313)[0m 100%|██████████| 130/130 [00:09<00:00, 12.00it/s][A
                                                 [A
 80%|████████  | 24883/31100 [11:25<1:11:32,  1.45it/s]
 80%|████████  | 24888/31100 [11:25<50:26,  2.05it/s]  
 80%|████████  | 24893/31100 [11:25<35:54,  2.88it/s]
 80%|████████  | 24897/31100 [11:25<27:21,  3.78it/s]
 80%|████████  | 24902/31100 [11:25<19:32,  5.29it/s]
 80%|████████  | 24906/31100 [11:25<15:02,  6.86it/s]
 80%|████████  | 24911/31100 [11:25<11:02,  9.34it/s]
 80%|████████  | 24916/31100 [11:25<08:21, 12.32it/s]
 80%|████████  | 24920/31100 [11:25<06:49, 15.07it/s]
 80%|████████  | 24925/31100 [11:26<05:27, 18.86it/s]
 80%|████████  | 24929/31100 [11:26<04:41, 21.93it/s]
 80%|████████  | 24933/

[2m[36m(_objective pid=16313)[0m {'loss': 0.0465, 'learning_rate': 4.899928906712427e-06, 'epoch': 4.02}


[2m[36m(_objective pid=16313)[0m  80%|████████  | 25006/31100 [11:28<02:29, 40.68it/s]
 80%|████████  | 25011/31100 [11:28<02:28, 41.01it/s]
 80%|████████  | 25016/31100 [11:28<02:31, 40.29it/s]
 80%|████████  | 25021/31100 [11:28<02:31, 40.13it/s]
 80%|████████  | 25026/31100 [11:28<02:37, 38.53it/s]
 80%|████████  | 25031/31100 [11:28<02:34, 39.20it/s]
 80%|████████  | 25035/31100 [11:28<02:39, 38.10it/s]
 81%|████████  | 25040/31100 [11:29<02:36, 38.84it/s]
 81%|████████  | 25044/31100 [11:29<02:37, 38.51it/s]
 81%|████████  | 25049/31100 [11:29<02:34, 39.15it/s]
 81%|████████  | 25053/31100 [11:29<02:34, 39.23it/s]
 81%|████████  | 25058/31100 [11:29<02:31, 39.76it/s]
 81%|████████  | 25063/31100 [11:29<02:31, 39.96it/s]
 81%|████████  | 25068/31100 [11:29<02:29, 40.40it/s]
 81%|████████  | 25073/31100 [11:29<02:29, 40.24it/s]
 81%|████████  | 25078/31100 [11:30<02:29, 40.25it/s]
 81%|████████  | 25083/31100 [11:30<02:28, 40.64it/s]
 81%|████████  | 25088/31100 [11:30<02:27, 40

[2m[36m(_objective pid=16313)[0m {'loss': 0.0787, 'learning_rate': 4.498295389768785e-06, 'epoch': 4.1}


[2m[36m(_objective pid=16313)[0m  82%|████████▏ | 25502/31100 [11:40<02:16, 41.01it/s]
 82%|████████▏ | 25507/31100 [11:41<02:15, 41.34it/s]
 82%|████████▏ | 25512/31100 [11:41<02:14, 41.47it/s]
 82%|████████▏ | 25517/31100 [11:41<02:14, 41.45it/s]
 82%|████████▏ | 25522/31100 [11:41<02:15, 41.12it/s]
 82%|████████▏ | 25527/31100 [11:41<02:14, 41.30it/s]
 82%|████████▏ | 25532/31100 [11:41<02:13, 41.58it/s]
 82%|████████▏ | 25537/31100 [11:41<02:17, 40.31it/s]
 82%|████████▏ | 25542/31100 [11:41<02:23, 38.71it/s]
 82%|████████▏ | 25547/31100 [11:42<02:20, 39.59it/s]
 82%|████████▏ | 25552/31100 [11:42<02:17, 40.21it/s]
 82%|████████▏ | 25557/31100 [11:42<02:15, 40.77it/s]
 82%|████████▏ | 25562/31100 [11:42<02:14, 41.17it/s]
 82%|████████▏ | 25567/31100 [11:42<02:14, 41.01it/s]
 82%|████████▏ | 25572/31100 [11:42<02:18, 39.85it/s]
 82%|████████▏ | 25577/31100 [11:42<02:16, 40.35it/s]
 82%|████████▏ | 25582/31100 [11:42<02:18, 39.70it/s]
 82%|████████▏ | 25587/31100 [11:43<02:17, 40

Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:32:22. Total running time: 12min 1s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598    

[2m[36m(_objective pid=16313)[0m  84%|████████▎ | 25971/31100 [11:52<02:18, 36.99it/s]
 84%|████████▎ | 25975/31100 [11:52<02:21, 36.20it/s]
 84%|████████▎ | 25979/31100 [11:52<02:19, 36.79it/s]
 84%|████████▎ | 25984/31100 [11:53<02:14, 37.97it/s]
 84%|████████▎ | 25988/31100 [11:53<02:14, 37.95it/s]
 84%|████████▎ | 25992/31100 [11:53<02:15, 37.79it/s]
 84%|████████▎ | 25996/31100 [11:53<02:13, 38.28it/s]
 84%|████████▎ | 26000/31100 [11:53<02:15, 37.53it/s]


[2m[36m(_objective pid=16313)[0m {'loss': 0.0427, 'learning_rate': 4.096661872825143e-06, 'epoch': 4.18}


[2m[36m(_objective pid=16313)[0m  84%|████████▎ | 26004/31100 [11:53<02:18, 36.84it/s]
 84%|████████▎ | 26008/31100 [11:53<02:21, 36.06it/s]
 84%|████████▎ | 26012/31100 [11:53<02:18, 36.80it/s]
 84%|████████▎ | 26016/31100 [11:53<02:20, 36.26it/s]
 84%|████████▎ | 26020/31100 [11:54<02:19, 36.40it/s]
 84%|████████▎ | 26024/31100 [11:54<02:20, 36.12it/s]
 84%|████████▎ | 26028/31100 [11:54<02:17, 36.86it/s]
 84%|████████▎ | 26032/31100 [11:54<02:15, 37.49it/s]
 84%|████████▎ | 26036/31100 [11:54<02:14, 37.79it/s]
 84%|████████▎ | 26040/31100 [11:54<02:14, 37.65it/s]
 84%|████████▎ | 26044/31100 [11:54<02:15, 37.23it/s]
 84%|████████▍ | 26048/31100 [11:54<02:15, 37.32it/s]
 84%|████████▍ | 26052/31100 [11:54<02:20, 35.87it/s]
 84%|████████▍ | 26056/31100 [11:55<02:19, 36.12it/s]
 84%|████████▍ | 26060/31100 [11:55<02:17, 36.53it/s]
 84%|████████▍ | 26064/31100 [11:55<02:17, 36.54it/s]
 84%|████████▍ | 26068/31100 [11:55<02:16, 36.80it/s]
 84%|████████▍ | 26072/31100 [11:55<02:15, 37

[2m[36m(_objective pid=16313)[0m {'loss': 0.0568, 'learning_rate': 3.6950283558815015e-06, 'epoch': 4.26}


[2m[36m(_objective pid=16313)[0m  85%|████████▌ | 26504/31100 [12:06<01:59, 38.37it/s]
 85%|████████▌ | 26508/31100 [12:06<02:00, 37.96it/s]
 85%|████████▌ | 26512/31100 [12:06<02:00, 38.18it/s]
 85%|████████▌ | 26516/31100 [12:06<01:59, 38.49it/s]
 85%|████████▌ | 26520/31100 [12:06<02:05, 36.43it/s]
 85%|████████▌ | 26524/31100 [12:06<02:08, 35.68it/s]
 85%|████████▌ | 26528/31100 [12:06<02:04, 36.67it/s]
 85%|████████▌ | 26532/31100 [12:07<02:07, 35.86it/s]
 85%|████████▌ | 26536/31100 [12:07<02:04, 36.66it/s]
 85%|████████▌ | 26540/31100 [12:07<02:07, 35.72it/s]
 85%|████████▌ | 26545/31100 [12:07<02:02, 37.24it/s]
 85%|████████▌ | 26550/31100 [12:07<01:59, 38.18it/s]
 85%|████████▌ | 26554/31100 [12:07<01:57, 38.60it/s]
 85%|████████▌ | 26558/31100 [12:07<01:57, 38.68it/s]
 85%|████████▌ | 26562/31100 [12:07<02:07, 35.68it/s]
 85%|████████▌ | 26567/31100 [12:08<02:02, 37.07it/s]
 85%|████████▌ | 26571/31100 [12:08<01:59, 37.84it/s]
 85%|████████▌ | 26575/31100 [12:08<02:00, 37

[2m[36m(_objective pid=16313)[0m {'loss': 0.0353, 'learning_rate': 3.29339483893786e-06, 'epoch': 4.34}


[2m[36m(_objective pid=16313)[0m  87%|████████▋ | 27008/31100 [12:19<01:41, 40.34it/s]
 87%|████████▋ | 27013/31100 [12:19<01:41, 40.25it/s]
 87%|████████▋ | 27018/31100 [12:19<01:41, 40.40it/s]
 87%|████████▋ | 27023/31100 [12:19<01:40, 40.45it/s]
 87%|████████▋ | 27028/31100 [12:19<01:40, 40.49it/s]
 87%|████████▋ | 27033/31100 [12:20<01:42, 39.83it/s]
 87%|████████▋ | 27038/31100 [12:20<01:41, 39.96it/s]
 87%|████████▋ | 27042/31100 [12:20<01:42, 39.47it/s]
 87%|████████▋ | 27046/31100 [12:20<01:43, 39.36it/s]
 87%|████████▋ | 27050/31100 [12:20<01:47, 37.85it/s]
 87%|████████▋ | 27054/31100 [12:20<01:45, 38.34it/s]
 87%|████████▋ | 27058/31100 [12:20<01:44, 38.73it/s]
 87%|████████▋ | 27062/31100 [12:20<01:47, 37.53it/s]
 87%|████████▋ | 27066/31100 [12:20<01:45, 38.06it/s]
 87%|████████▋ | 27071/31100 [12:21<01:43, 38.89it/s]
 87%|████████▋ | 27076/31100 [12:21<01:41, 39.52it/s]
 87%|████████▋ | 27081/31100 [12:21<01:40, 39.90it/s]
 87%|████████▋ | 27085/31100 [12:21<01:40, 39

Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:32:52. Total running time: 12min 31s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598   

[2m[36m(_objective pid=16313)[0m  87%|████████▋ | 27136/31100 [12:22<01:41, 38.98it/s]
 87%|████████▋ | 27140/31100 [12:22<01:41, 38.89it/s]
 87%|████████▋ | 27144/31100 [12:22<01:41, 39.11it/s]
 87%|████████▋ | 27148/31100 [12:22<01:41, 39.13it/s]
 87%|████████▋ | 27152/31100 [12:23<01:40, 39.37it/s]
 87%|████████▋ | 27156/31100 [12:23<01:41, 38.97it/s]
 87%|████████▋ | 27160/31100 [12:23<01:41, 38.70it/s]
 87%|████████▋ | 27164/31100 [12:23<01:43, 37.90it/s]
 87%|████████▋ | 27168/31100 [12:23<01:44, 37.70it/s]
 87%|████████▋ | 27172/31100 [12:23<01:43, 37.99it/s]
 87%|████████▋ | 27176/31100 [12:23<01:47, 36.43it/s]
 87%|████████▋ | 27180/31100 [12:23<01:45, 37.20it/s]
 87%|████████▋ | 27184/31100 [12:23<01:43, 37.76it/s]
 87%|████████▋ | 27188/31100 [12:24<01:43, 37.89it/s]
 87%|████████▋ | 27192/31100 [12:24<01:43, 37.64it/s]
 87%|████████▋ | 27196/31100 [12:24<01:45, 36.95it/s]
 87%|████████▋ | 27200/31100 [12:24<01:46, 36.78it/s]
 87%|████████▋ | 27204/31100 [12:24<01:45, 36

[2m[36m(_objective pid=16313)[0m {'loss': 0.0394, 'learning_rate': 2.8917613219942187e-06, 'epoch': 4.42}


[2m[36m(_objective pid=16313)[0m  88%|████████▊ | 27512/31100 [12:32<01:30, 39.78it/s]
 88%|████████▊ | 27516/31100 [12:32<01:30, 39.75it/s]
 88%|████████▊ | 27520/31100 [12:32<01:32, 38.83it/s]
 89%|████████▊ | 27524/31100 [12:32<01:31, 39.14it/s]
 89%|████████▊ | 27528/31100 [12:32<01:32, 38.78it/s]
 89%|████████▊ | 27532/31100 [12:32<01:33, 38.14it/s]
 89%|████████▊ | 27537/31100 [12:33<01:30, 39.24it/s]
 89%|████████▊ | 27541/31100 [12:33<01:31, 38.91it/s]
 89%|████████▊ | 27546/31100 [12:33<01:29, 39.51it/s]
 89%|████████▊ | 27550/31100 [12:33<01:33, 37.84it/s]
 89%|████████▊ | 27554/31100 [12:33<01:33, 37.93it/s]
 89%|████████▊ | 27559/31100 [12:33<01:31, 38.65it/s]
 89%|████████▊ | 27563/31100 [12:33<01:34, 37.40it/s]
 89%|████████▊ | 27568/31100 [12:33<01:31, 38.52it/s]
 89%|████████▊ | 27572/31100 [12:33<01:31, 38.37it/s]
 89%|████████▊ | 27576/31100 [12:34<01:31, 38.39it/s]
 89%|████████▊ | 27580/31100 [12:34<01:31, 38.65it/s]
 89%|████████▊ | 27585/31100 [12:34<01:29, 39

[2m[36m(_objective pid=16313)[0m {'loss': 0.0607, 'learning_rate': 2.4901278050505772e-06, 'epoch': 4.5}


[2m[36m(_objective pid=16313)[0m  90%|█████████ | 28006/31100 [12:45<01:16, 40.25it/s]
 90%|█████████ | 28011/31100 [12:45<01:16, 40.16it/s]
 90%|█████████ | 28016/31100 [12:45<01:16, 40.20it/s]
 90%|█████████ | 28021/31100 [12:45<01:16, 40.38it/s]
 90%|█████████ | 28026/31100 [12:45<01:16, 40.24it/s]
 90%|█████████ | 28031/31100 [12:45<01:15, 40.63it/s]
 90%|█████████ | 28036/31100 [12:45<01:15, 40.78it/s]
 90%|█████████ | 28041/31100 [12:45<01:15, 40.72it/s]
 90%|█████████ | 28046/31100 [12:46<01:15, 40.36it/s]
 90%|█████████ | 28051/31100 [12:46<01:16, 39.98it/s]
 90%|█████████ | 28055/31100 [12:46<01:19, 38.26it/s]
 90%|█████████ | 28059/31100 [12:46<01:18, 38.63it/s]
 90%|█████████ | 28063/31100 [12:46<01:17, 38.96it/s]
 90%|█████████ | 28067/31100 [12:46<01:17, 39.11it/s]
 90%|█████████ | 28072/31100 [12:46<01:16, 39.61it/s]
 90%|█████████ | 28077/31100 [12:46<01:15, 40.09it/s]
 90%|█████████ | 28082/31100 [12:47<01:15, 39.72it/s]
 90%|█████████ | 28086/31100 [12:47<01:16, 39

Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:33:22. Total running time: 13min 1s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598    

[2m[36m(_objective pid=16313)[0m  91%|█████████ | 28315/31100 [12:52<01:09, 40.09it/s]
 91%|█████████ | 28320/31100 [12:53<01:08, 40.44it/s]
 91%|█████████ | 28325/31100 [12:53<01:08, 40.65it/s]
 91%|█████████ | 28330/31100 [12:53<01:08, 40.62it/s]
 91%|█████████ | 28335/31100 [12:53<01:07, 40.81it/s]
 91%|█████████ | 28340/31100 [12:53<01:10, 39.05it/s]
 91%|█████████ | 28345/31100 [12:53<01:10, 38.81it/s]
 91%|█████████ | 28349/31100 [12:53<01:10, 39.00it/s]
 91%|█████████ | 28353/31100 [12:53<01:12, 38.12it/s]
 91%|█████████ | 28358/31100 [12:53<01:10, 38.79it/s]
 91%|█████████ | 28362/31100 [12:54<01:10, 39.05it/s]
 91%|█████████ | 28367/31100 [12:54<01:08, 39.76it/s]
 91%|█████████ | 28372/31100 [12:54<01:07, 40.18it/s]
 91%|█████████ | 28377/31100 [12:54<01:08, 39.68it/s]
 91%|█████████▏| 28382/31100 [12:54<01:08, 39.76it/s]
 91%|█████████▏| 28386/31100 [12:54<01:08, 39.34it/s]
 91%|█████████▏| 28390/31100 [12:54<01:08, 39.47it/s]
 91%|█████████▏| 28395/31100 [12:54<01:08, 39

[2m[36m(_objective pid=16313)[0m {'loss': 0.0174, 'learning_rate': 2.088494288106936e-06, 'epoch': 4.58}


[2m[36m(_objective pid=16313)[0m  92%|█████████▏| 28506/31100 [12:57<01:09, 37.06it/s]
 92%|█████████▏| 28510/31100 [12:57<01:09, 37.12it/s]
 92%|█████████▏| 28514/31100 [12:58<01:09, 37.09it/s]
 92%|█████████▏| 28518/31100 [12:58<01:09, 37.12it/s]
 92%|█████████▏| 28522/31100 [12:58<01:08, 37.86it/s]
 92%|█████████▏| 28526/31100 [12:58<01:07, 37.93it/s]
 92%|█████████▏| 28530/31100 [12:58<01:07, 38.30it/s]
 92%|█████████▏| 28534/31100 [12:58<01:09, 37.18it/s]
 92%|█████████▏| 28538/31100 [12:58<01:07, 37.74it/s]
 92%|█████████▏| 28542/31100 [12:58<01:08, 37.22it/s]
 92%|█████████▏| 28546/31100 [12:58<01:09, 36.63it/s]
 92%|█████████▏| 28550/31100 [12:59<01:09, 36.86it/s]
 92%|█████████▏| 28554/31100 [12:59<01:12, 35.16it/s]
 92%|█████████▏| 28558/31100 [12:59<01:10, 36.27it/s]
 92%|█████████▏| 28562/31100 [12:59<01:09, 36.33it/s]
 92%|█████████▏| 28566/31100 [12:59<01:08, 36.83it/s]
 92%|█████████▏| 28570/31100 [12:59<01:09, 36.37it/s]
 92%|█████████▏| 28574/31100 [12:59<01:08, 36

[2m[36m(_objective pid=16313)[0m {'loss': 0.0547, 'learning_rate': 1.6868607711632944e-06, 'epoch': 4.66}


[2m[36m(_objective pid=16313)[0m  93%|█████████▎| 29005/31100 [13:10<00:54, 38.51it/s]
 93%|█████████▎| 29009/31100 [13:10<00:53, 38.77it/s]
 93%|█████████▎| 29013/31100 [13:10<00:54, 38.54it/s]
 93%|█████████▎| 29017/31100 [13:10<00:55, 37.52it/s]
 93%|█████████▎| 29021/31100 [13:10<00:54, 37.87it/s]
 93%|█████████▎| 29025/31100 [13:11<00:56, 36.67it/s]
 93%|█████████▎| 29029/31100 [13:11<00:55, 37.13it/s]
 93%|█████████▎| 29033/31100 [13:11<00:54, 37.66it/s]
 93%|█████████▎| 29037/31100 [13:11<00:55, 37.40it/s]
 93%|█████████▎| 29041/31100 [13:11<00:54, 37.58it/s]
 93%|█████████▎| 29046/31100 [13:11<00:53, 38.20it/s]
 93%|█████████▎| 29050/31100 [13:11<00:53, 38.08it/s]
 93%|█████████▎| 29054/31100 [13:11<00:54, 37.75it/s]
 93%|█████████▎| 29058/31100 [13:11<00:54, 37.76it/s]
 93%|█████████▎| 29062/31100 [13:12<00:54, 37.63it/s]
 93%|█████████▎| 29066/31100 [13:12<00:54, 37.19it/s]
 93%|█████████▎| 29070/31100 [13:12<00:57, 35.23it/s]
 93%|█████████▎| 29074/31100 [13:12<00:56, 35

Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:33:52. Total running time: 13min 31s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598   

[2m[36m(_objective pid=16313)[0m  95%|█████████▍| 29480/31100 [13:22<00:41, 39.47it/s]
 95%|█████████▍| 29485/31100 [13:22<00:40, 39.75it/s]
 95%|█████████▍| 29489/31100 [13:23<00:40, 39.61it/s]
 95%|█████████▍| 29493/31100 [13:23<00:41, 39.12it/s]
 95%|█████████▍| 29497/31100 [13:23<00:40, 39.23it/s]
 95%|█████████▍| 29501/31100 [13:23<00:40, 39.35it/s]


[2m[36m(_objective pid=16313)[0m {'loss': 0.0463, 'learning_rate': 1.2852272542196528e-06, 'epoch': 4.74}


[2m[36m(_objective pid=16313)[0m  95%|█████████▍| 29506/31100 [13:23<00:40, 39.42it/s]
 95%|█████████▍| 29511/31100 [13:23<00:39, 39.78it/s]
 95%|█████████▍| 29516/31100 [13:23<00:39, 40.15it/s]
 95%|█████████▍| 29521/31100 [13:23<00:40, 39.35it/s]
 95%|█████████▍| 29525/31100 [13:23<00:40, 38.90it/s]
 95%|█████████▍| 29530/31100 [13:24<00:40, 39.23it/s]
 95%|█████████▍| 29535/31100 [13:24<00:39, 39.69it/s]
 95%|█████████▍| 29540/31100 [13:24<00:38, 40.00it/s]
 95%|█████████▌| 29545/31100 [13:24<00:38, 40.20it/s]
 95%|█████████▌| 29550/31100 [13:24<00:38, 40.54it/s]
 95%|█████████▌| 29555/31100 [13:24<00:37, 40.90it/s]
 95%|█████████▌| 29560/31100 [13:24<00:37, 40.93it/s]
 95%|█████████▌| 29565/31100 [13:24<00:37, 41.12it/s]
 95%|█████████▌| 29570/31100 [13:25<00:37, 40.62it/s]
 95%|█████████▌| 29575/31100 [13:25<00:37, 40.45it/s]
 95%|█████████▌| 29580/31100 [13:25<00:37, 40.82it/s]
 95%|█████████▌| 29585/31100 [13:25<00:37, 40.68it/s]
 95%|█████████▌| 29590/31100 [13:25<00:37, 40

[2m[36m(_objective pid=16313)[0m {'loss': 0.0656, 'learning_rate': 8.835937372760113e-07, 'epoch': 4.82}


[2m[36m(_objective pid=16313)[0m  96%|█████████▋| 30003/31100 [13:36<00:28, 37.97it/s]
 96%|█████████▋| 30007/31100 [13:36<00:28, 38.33it/s]
 97%|█████████▋| 30012/31100 [13:36<00:27, 38.91it/s]
 97%|█████████▋| 30016/31100 [13:36<00:27, 38.88it/s]
 97%|█████████▋| 30020/31100 [13:36<00:27, 39.01it/s]
 97%|█████████▋| 30024/31100 [13:36<00:27, 38.56it/s]
 97%|█████████▋| 30029/31100 [13:36<00:27, 39.20it/s]
 97%|█████████▋| 30034/31100 [13:37<00:26, 39.49it/s]
 97%|█████████▋| 30038/31100 [13:37<00:27, 39.26it/s]
 97%|█████████▋| 30042/31100 [13:37<00:26, 39.44it/s]
 97%|█████████▋| 30046/31100 [13:37<00:26, 39.18it/s]
 97%|█████████▋| 30050/31100 [13:37<00:26, 39.35it/s]
 97%|█████████▋| 30055/31100 [13:37<00:26, 39.57it/s]
 97%|█████████▋| 30059/31100 [13:37<00:26, 38.97it/s]
 97%|█████████▋| 30063/31100 [13:37<00:26, 39.20it/s]
 97%|█████████▋| 30068/31100 [13:37<00:25, 39.95it/s]
 97%|█████████▋| 30072/31100 [13:37<00:25, 39.76it/s]
 97%|█████████▋| 30077/31100 [13:38<00:25, 40

[2m[36m(_objective pid=16313)[0m {'loss': 0.0639, 'learning_rate': 4.819602203323698e-07, 'epoch': 4.9}


 98%|█████████▊| 30510/31100 [13:49<00:14, 40.08it/s]
 98%|█████████▊| 30515/31100 [13:49<00:14, 39.92it/s]
 98%|█████████▊| 30519/31100 [13:49<00:14, 39.79it/s]
 98%|█████████▊| 30523/31100 [13:49<00:14, 39.71it/s]
 98%|█████████▊| 30527/31100 [13:49<00:14, 39.37it/s]
 98%|█████████▊| 30532/31100 [13:49<00:14, 39.85it/s]
 98%|█████████▊| 30536/31100 [13:50<00:14, 39.83it/s]
 98%|█████████▊| 30541/31100 [13:50<00:13, 40.13it/s]
 98%|█████████▊| 30546/31100 [13:50<00:13, 39.85it/s]
 98%|█████████▊| 30551/31100 [13:50<00:13, 39.82it/s]
 98%|█████████▊| 30555/31100 [13:50<00:13, 39.52it/s]
 98%|█████████▊| 30559/31100 [13:50<00:13, 39.32it/s]
 98%|█████████▊| 30563/31100 [13:50<00:13, 39.42it/s]
 98%|█████████▊| 30568/31100 [13:50<00:13, 39.75it/s]
 98%|█████████▊| 30573/31100 [13:50<00:13, 40.33it/s]
 98%|█████████▊| 30578/31100 [13:51<00:13, 38.85it/s]
 98%|█████████▊| 30583/31100 [13:51<00:13, 39.57it/s]
 98%|█████████▊| 30588/31100 [13:51<00:13, 39.32it/s]
 98%|█████████▊| 30593/31100

Trial status: 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:34:22. Total running time: 14min 1s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   RUNNING        2.49816e-05                    5                        2       0.219598    

[2m[36m(_objective pid=16313)[0m  99%|█████████▊| 30644/31100 [13:52<00:11, 39.83it/s]
 99%|█████████▊| 30648/31100 [13:52<00:11, 39.77it/s]
 99%|█████████▊| 30652/31100 [13:52<00:11, 39.40it/s]
 99%|█████████▊| 30656/31100 [13:53<00:11, 39.30it/s]
 99%|█████████▊| 30661/31100 [13:53<00:11, 39.63it/s]
 99%|█████████▊| 30665/31100 [13:53<00:10, 39.62it/s]
 99%|█████████▊| 30670/31100 [13:53<00:10, 40.03it/s]
 99%|█████████▊| 30674/31100 [13:53<00:10, 39.98it/s]
 99%|█████████▊| 30679/31100 [13:53<00:10, 39.30it/s]
 99%|█████████▊| 30683/31100 [13:53<00:10, 39.29it/s]
 99%|█████████▊| 30688/31100 [13:53<00:10, 39.56it/s]
 99%|█████████▊| 30692/31100 [13:53<00:10, 39.18it/s]
 99%|█████████▊| 30697/31100 [13:54<00:10, 39.52it/s]
 99%|█████████▊| 30702/31100 [13:54<00:10, 39.77it/s]
 99%|█████████▊| 30707/31100 [13:54<00:09, 40.01it/s]
 99%|█████████▊| 30711/31100 [13:54<00:09, 39.21it/s]
 99%|█████████▉| 30716/31100 [13:54<00:09, 39.45it/s]
 99%|█████████▉| 30720/31100 [13:54<00:09, 39

[2m[36m(_objective pid=16313)[0m {'loss': 0.0491, 'learning_rate': 8.03267033887283e-08, 'epoch': 4.98}


[2m[36m(_objective pid=16313)[0m 100%|█████████▉| 31008/31100 [14:02<00:02, 38.30it/s]
100%|█████████▉| 31016/31100 [14:02<00:02, 39.03it/s]
100%|█████████▉| 31020/31100 [14:02<00:02, 38.38it/s]
100%|█████████▉| 31024/31100 [14:02<00:01, 38.34it/s]
100%|█████████▉| 31028/31100 [14:02<00:01, 38.31it/s]
100%|█████████▉| 31032/31100 [14:02<00:01, 37.79it/s]
100%|█████████▉| 31036/31100 [14:02<00:01, 37.91it/s]
100%|█████████▉| 31040/31100 [14:03<00:01, 35.98it/s]
100%|█████████▉| 31045/31100 [14:03<00:01, 37.31it/s]
100%|█████████▉| 31049/31100 [14:03<00:01, 37.38it/s]
100%|█████████▉| 31053/31100 [14:03<00:01, 37.51it/s]
100%|█████████▉| 31057/31100 [14:03<00:01, 37.47it/s]
100%|█████████▉| 31061/31100 [14:03<00:01, 37.67it/s]
100%|█████████▉| 31065/31100 [14:03<00:00, 36.87it/s]
100%|█████████▉| 31069/31100 [14:03<00:00, 35.27it/s]
100%|█████████▉| 31073/31100 [14:03<00:00, 36.29it/s]
100%|█████████▉| 31077/31100 [14:04<00:00, 34.90it/s]
100%|█████████▉| 31081/31100 [14:04<00:00, 34

Trial _objective_f556c_00000 finished iteration 5 at 2023-09-11 13:34:43. Total running time: 14min 22s
+-------------------------------------------------+
| Trial _objective_f556c_00000 result             |
+-------------------------------------------------+
| time_this_iter_s                        170.546 |
| time_total_s                             856.48 |
| training_iteration                            5 |
| epoch                                         5 |
| eval_loss                               0.27153 |
| eval_runtime                             9.5564 |
| eval_samples_per_second                  433.95 |
| eval_steps_per_second                    13.603 |
| objective                               0.27153 |
+-------------------------------------------------+

[2m[36m(_objective pid=16313)[0m {'eval_loss': 0.2715345323085785, 'eval_runtime': 9.5564, 'eval_samples_per_second': 433.95, 'eval_steps_per_second': 13.603, 'epoch': 5.0}


[2m[36m(_objective pid=16313)[0m                                                      
[2m[36m(_objective pid=16313)[0m                                                  [A100%|██████████| 31100/31100 [14:14<00:00, 33.61it/s]
[2m[36m(_objective pid=16313)[0m 100%|██████████| 130/130 [00:09<00:00, 11.97it/s][A
[2m[36m(_objective pid=16313)[0m                                                  [A


Trial _objective_f556c_00000 completed after 5 iterations at 2023-09-11 13:34:45. Total running time: 14min 24s

[2m[36m(_objective pid=16313)[0m {'train_runtime': 855.7369, 'train_samples_per_second': 72.68, 'train_steps_per_second': 36.343, 'train_loss': 0.14751895985706825, 'epoch': 5.0}


[2m[36m(_objective pid=16313)[0m                                                      100%|██████████| 31100/31100 [14:15<00:00, 33.61it/s]100%|██████████| 31100/31100 [14:15<00:00, 36.34it/s]


Trial status: 1 TERMINATED | 16 PENDING
Current time: 2023-09-11 13:34:52. Total running time: 14min 31s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   TERMINATED       2.49816e-05                    5                        2       

[2m[36m(_objective pid=20061)[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight']
[2m[36m(_objective pid=20061)[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
[2m[36m(_objective pid=20061)[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[2m[36m(_objective pid=20061)[0m Some weights of DistilBertForSequenceClassification were not initialized from the model che

[2m[36m(_objective pid=20061)[0m {'loss': 0.4852, 'learning_rate': 1.5805570226012105e-05, 'epoch': 0.08}


[2m[36m(_objective pid=20061)[0m   3%|▎         | 502/18660 [00:13<07:52, 38.44it/s]
  3%|▎         | 506/18660 [00:13<07:51, 38.49it/s]
  3%|▎         | 510/18660 [00:13<07:48, 38.74it/s]
  3%|▎         | 514/18660 [00:13<07:59, 37.82it/s]
  3%|▎         | 519/18660 [00:13<07:50, 38.59it/s]
  3%|▎         | 524/18660 [00:13<07:39, 39.43it/s]
  3%|▎         | 529/18660 [00:14<07:35, 39.80it/s]
  3%|▎         | 533/18660 [00:14<07:36, 39.67it/s]
  3%|▎         | 537/18660 [00:14<07:38, 39.52it/s]
  3%|▎         | 541/18660 [00:14<07:51, 38.45it/s]
  3%|▎         | 546/18660 [00:14<07:45, 38.90it/s]
  3%|▎         | 551/18660 [00:14<07:38, 39.50it/s]
  3%|▎         | 556/18660 [00:14<07:33, 39.88it/s]
  3%|▎         | 560/18660 [00:14<07:33, 39.87it/s]
  3%|▎         | 564/18660 [00:14<07:33, 39.87it/s]
  3%|▎         | 568/18660 [00:15<07:35, 39.68it/s]
  3%|▎         | 572/18660 [00:15<07:38, 39.47it/s]
  3%|▎         | 576/18660 [00:15<07:39, 39.32it/s]
  3%|▎         | 580/18660 

[2m[36m(_objective pid=20061)[0m {'loss': 0.4596, 'learning_rate': 1.537039483432675e-05, 'epoch': 0.16}


[2m[36m(_objective pid=20061)[0m                                                      5%|▌         | 1000/18660 [00:26<08:22, 35.13it/s]  5%|▌         | 1003/18660 [00:26<08:17, 35.46it/s]
  5%|▌         | 1007/18660 [00:26<08:18, 35.42it/s]
  5%|▌         | 1011/18660 [00:26<08:17, 35.47it/s]
  5%|▌         | 1015/18660 [00:26<08:20, 35.28it/s]
  5%|▌         | 1019/18660 [00:27<08:20, 35.21it/s]
  5%|▌         | 1023/18660 [00:27<08:17, 35.47it/s]
  6%|▌         | 1027/18660 [00:27<08:09, 36.01it/s]
  6%|▌         | 1031/18660 [00:27<08:14, 35.65it/s]
  6%|▌         | 1035/18660 [00:27<08:16, 35.48it/s]
  6%|▌         | 1039/18660 [00:27<08:21, 35.15it/s]


Trial status: 1 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:35:22. Total running time: 15min 1s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00001   RUNNING          1.62407e-05                    3                     

[2m[36m(_objective pid=20061)[0m   6%|▌         | 1043/18660 [00:27<08:17, 35.41it/s]
  6%|▌         | 1047/18660 [00:27<08:09, 35.96it/s]
  6%|▌         | 1051/18660 [00:27<08:07, 36.12it/s]
  6%|▌         | 1055/18660 [00:28<08:03, 36.38it/s]
  6%|▌         | 1059/18660 [00:28<08:00, 36.63it/s]
  6%|▌         | 1063/18660 [00:28<07:57, 36.87it/s]
  6%|▌         | 1067/18660 [00:28<08:00, 36.63it/s]
  6%|▌         | 1071/18660 [00:28<07:49, 37.48it/s]
  6%|▌         | 1075/18660 [00:28<07:56, 36.93it/s]
  6%|▌         | 1080/18660 [00:28<07:45, 37.78it/s]
  6%|▌         | 1084/18660 [00:28<07:39, 38.28it/s]
  6%|▌         | 1089/18660 [00:28<07:31, 38.91it/s]
  6%|▌         | 1093/18660 [00:29<07:29, 39.07it/s]
  6%|▌         | 1098/18660 [00:29<07:21, 39.80it/s]
  6%|▌         | 1102/18660 [00:29<07:21, 39.79it/s]
  6%|▌         | 1107/18660 [00:29<07:21, 39.80it/s]
  6%|▌         | 1111/18660 [00:29<07:23, 39.59it/s]
  6%|▌         | 1115/18660 [00:29<07:22, 39.67it/s]
  6%|▌   

[2m[36m(_objective pid=20061)[0m {'loss': 0.4096, 'learning_rate': 1.4935219442641394e-05, 'epoch': 0.24}


[2m[36m(_objective pid=20061)[0m   8%|▊         | 1505/18660 [00:39<08:12, 34.86it/s]
  8%|▊         | 1509/18660 [00:39<08:15, 34.59it/s]
  8%|▊         | 1513/18660 [00:40<08:08, 35.11it/s]
  8%|▊         | 1517/18660 [00:40<07:56, 35.97it/s]
  8%|▊         | 1521/18660 [00:40<07:46, 36.76it/s]
  8%|▊         | 1525/18660 [00:40<07:41, 37.09it/s]
  8%|▊         | 1529/18660 [00:40<07:44, 36.92it/s]
  8%|▊         | 1533/18660 [00:40<07:39, 37.27it/s]
  8%|▊         | 1537/18660 [00:40<07:41, 37.08it/s]
  8%|▊         | 1541/18660 [00:40<07:40, 37.18it/s]
  8%|▊         | 1545/18660 [00:40<07:40, 37.16it/s]
  8%|▊         | 1549/18660 [00:40<07:45, 36.76it/s]
  8%|▊         | 1553/18660 [00:41<07:37, 37.40it/s]
  8%|▊         | 1557/18660 [00:41<07:49, 36.44it/s]
  8%|▊         | 1561/18660 [00:41<07:43, 36.87it/s]
  8%|▊         | 1565/18660 [00:41<07:41, 37.02it/s]
  8%|▊         | 1569/18660 [00:41<07:53, 36.09it/s]
  8%|▊         | 1573/18660 [00:41<08:00, 35.57it/s]
  8%|▊   

[2m[36m(_objective pid=20061)[0m {'loss': 0.3712, 'learning_rate': 1.4500044050956039e-05, 'epoch': 0.32}


[2m[36m(_objective pid=20061)[0m  11%|█         | 2006/18660 [00:52<06:51, 40.43it/s]
 11%|█         | 2011/18660 [00:52<06:50, 40.58it/s]
 11%|█         | 2016/18660 [00:53<06:52, 40.36it/s]
 11%|█         | 2021/18660 [00:53<06:50, 40.51it/s]
 11%|█         | 2026/18660 [00:53<06:48, 40.73it/s]
 11%|█         | 2031/18660 [00:53<06:48, 40.69it/s]
 11%|█         | 2036/18660 [00:53<06:52, 40.27it/s]
 11%|█         | 2041/18660 [00:53<06:55, 39.98it/s]
 11%|█         | 2045/18660 [00:53<07:01, 39.45it/s]
 11%|█         | 2050/18660 [00:53<06:57, 39.77it/s]
 11%|█         | 2055/18660 [00:54<06:50, 40.42it/s]
 11%|█         | 2060/18660 [00:54<06:51, 40.38it/s]
 11%|█         | 2065/18660 [00:54<06:53, 40.09it/s]
 11%|█         | 2070/18660 [00:54<06:57, 39.69it/s]
 11%|█         | 2074/18660 [00:54<07:05, 38.95it/s]
 11%|█         | 2078/18660 [00:54<07:06, 38.92it/s]
 11%|█         | 2082/18660 [00:54<07:03, 39.16it/s]
 11%|█         | 2087/18660 [00:54<06:58, 39.60it/s]
 11%|█   

Trial status: 1 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:35:52. Total running time: 15min 31s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00001   RUNNING          1.62407e-05                    3                    

[2m[36m(_objective pid=20061)[0m  12%|█▏        | 2195/18660 [00:57<07:39, 35.82it/s]
 12%|█▏        | 2199/18660 [00:57<07:35, 36.15it/s]
 12%|█▏        | 2203/18660 [00:57<07:33, 36.26it/s]
 12%|█▏        | 2207/18660 [00:58<07:35, 36.09it/s]
 12%|█▏        | 2211/18660 [00:58<07:30, 36.52it/s]
 12%|█▏        | 2215/18660 [00:58<07:28, 36.65it/s]
 12%|█▏        | 2219/18660 [00:58<07:30, 36.51it/s]
 12%|█▏        | 2223/18660 [00:58<07:25, 36.90it/s]
 12%|█▏        | 2227/18660 [00:58<07:20, 37.33it/s]
 12%|█▏        | 2231/18660 [00:58<07:23, 37.08it/s]
 12%|█▏        | 2235/18660 [00:58<07:24, 36.96it/s]
 12%|█▏        | 2239/18660 [00:58<07:35, 36.07it/s]
 12%|█▏        | 2243/18660 [00:58<07:27, 36.69it/s]
 12%|█▏        | 2247/18660 [00:59<07:20, 37.30it/s]
 12%|█▏        | 2251/18660 [00:59<07:16, 37.57it/s]
 12%|█▏        | 2255/18660 [00:59<07:16, 37.56it/s]
 12%|█▏        | 2259/18660 [00:59<07:14, 37.72it/s]
 12%|█▏        | 2263/18660 [00:59<07:16, 37.60it/s]
 12%|█▏  

[2m[36m(_objective pid=20061)[0m {'loss': 0.332, 'learning_rate': 1.4064868659270683e-05, 'epoch': 0.4}


[2m[36m(_objective pid=20061)[0m  13%|█▎        | 2499/18660 [01:05<06:54, 38.96it/s]                                                     13%|█▎        | 2500/18660 [01:05<06:54, 38.96it/s]
 13%|█▎        | 2503/18660 [01:05<06:52, 39.20it/s]
 13%|█▎        | 2508/18660 [01:05<06:46, 39.71it/s]
 13%|█▎        | 2512/18660 [01:05<06:47, 39.62it/s]
 13%|█▎        | 2517/18660 [01:06<06:46, 39.72it/s]
 14%|█▎        | 2521/18660 [01:06<06:47, 39.62it/s]
 14%|█▎        | 2526/18660 [01:06<06:46, 39.74it/s]
 14%|█▎        | 2531/18660 [01:06<06:44, 39.87it/s]
 14%|█▎        | 2536/18660 [01:06<06:40, 40.21it/s]
 14%|█▎        | 2541/18660 [01:06<06:39, 40.39it/s]
 14%|█▎        | 2546/18660 [01:06<06:41, 40.11it/s]
 14%|█▎        | 2551/18660 [01:06<06:37, 40.50it/s]
 14%|█▎        | 2556/18660 [01:07<06:39, 40.29it/s]
 14%|█▎        | 2561/18660 [01:07<06:37, 40.46it/s]
 14%|█▍        | 2566/18660 [01:07<06:37, 40.54it/s]
 14%|█▍        | 2571/18660 [01:07<06:35, 40.67it/s]
 14%|█▍ 

[2m[36m(_objective pid=20061)[0m {'loss': 0.3368, 'learning_rate': 1.362969326758533e-05, 'epoch': 0.48}


[2m[36m(_objective pid=20061)[0m  16%|█▌        | 3004/18660 [01:18<06:26, 40.48it/s]
 16%|█▌        | 3009/18660 [01:18<06:23, 40.84it/s]
 16%|█▌        | 3014/18660 [01:19<06:20, 41.08it/s]
 16%|█▌        | 3019/18660 [01:19<06:18, 41.27it/s]
 16%|█▌        | 3024/18660 [01:19<06:19, 41.24it/s]
 16%|█▌        | 3029/18660 [01:19<06:19, 41.15it/s]
 16%|█▋        | 3034/18660 [01:19<06:22, 40.90it/s]
 16%|█▋        | 3039/18660 [01:19<06:19, 41.15it/s]
 16%|█▋        | 3044/18660 [01:19<06:25, 40.55it/s]
 16%|█▋        | 3049/18660 [01:19<06:23, 40.75it/s]
 16%|█▋        | 3054/18660 [01:20<06:25, 40.46it/s]
 16%|█▋        | 3059/18660 [01:20<06:23, 40.73it/s]
 16%|█▋        | 3064/18660 [01:20<06:21, 40.83it/s]
 16%|█▋        | 3069/18660 [01:20<06:21, 40.86it/s]
 16%|█▋        | 3074/18660 [01:20<06:20, 41.00it/s]
 17%|█▋        | 3079/18660 [01:20<06:25, 40.40it/s]
 17%|█▋        | 3084/18660 [01:20<06:43, 38.56it/s]
 17%|█▋        | 3089/18660 [01:20<06:36, 39.25it/s]
 17%|█▋  

Trial status: 1 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:36:22. Total running time: 16min 1s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00001   RUNNING          1.62407e-05                    3                     

[2m[36m(_objective pid=20061)[0m  18%|█▊        | 3356/18660 [01:27<07:11, 35.46it/s]
 18%|█▊        | 3360/18660 [01:27<07:05, 35.98it/s]
 18%|█▊        | 3364/18660 [01:28<07:19, 34.79it/s]
 18%|█▊        | 3368/18660 [01:28<07:03, 36.10it/s]
 18%|█▊        | 3372/18660 [01:28<06:53, 37.01it/s]
 18%|█▊        | 3376/18660 [01:28<07:05, 35.96it/s]
 18%|█▊        | 3380/18660 [01:28<06:54, 36.84it/s]
 18%|█▊        | 3384/18660 [01:28<06:50, 37.25it/s]
 18%|█▊        | 3388/18660 [01:28<06:48, 37.35it/s]
 18%|█▊        | 3392/18660 [01:28<06:50, 37.15it/s]
 18%|█▊        | 3396/18660 [01:28<07:04, 35.98it/s]
 18%|█▊        | 3400/18660 [01:29<07:05, 35.90it/s]
 18%|█▊        | 3404/18660 [01:29<06:58, 36.48it/s]
 18%|█▊        | 3408/18660 [01:29<06:56, 36.62it/s]
 18%|█▊        | 3412/18660 [01:29<07:14, 35.09it/s]
 18%|█▊        | 3416/18660 [01:29<07:04, 35.91it/s]
 18%|█▊        | 3420/18660 [01:29<06:54, 36.76it/s]
 18%|█▊        | 3424/18660 [01:29<06:48, 37.34it/s]
 18%|█▊  

[2m[36m(_objective pid=20061)[0m {'loss': 0.2876, 'learning_rate': 1.3194517875899974e-05, 'epoch': 0.56}


[2m[36m(_objective pid=20061)[0m  19%|█▉        | 3504/18660 [01:31<07:04, 35.69it/s]
 19%|█▉        | 3508/18660 [01:32<07:03, 35.78it/s]
 19%|█▉        | 3512/18660 [01:32<07:01, 35.97it/s]
 19%|█▉        | 3516/18660 [01:32<06:59, 36.07it/s]
 19%|█▉        | 3520/18660 [01:32<06:59, 36.07it/s]
 19%|█▉        | 3524/18660 [01:32<06:50, 36.84it/s]
 19%|█▉        | 3528/18660 [01:32<06:49, 36.92it/s]
 19%|█▉        | 3532/18660 [01:32<06:45, 37.28it/s]
 19%|█▉        | 3536/18660 [01:32<06:39, 37.86it/s]
 19%|█▉        | 3540/18660 [01:32<06:42, 37.61it/s]
 19%|█▉        | 3545/18660 [01:33<06:34, 38.30it/s]
 19%|█▉        | 3550/18660 [01:33<06:27, 38.95it/s]
 19%|█▉        | 3555/18660 [01:33<06:24, 39.27it/s]
 19%|█▉        | 3560/18660 [01:33<06:19, 39.74it/s]
 19%|█▉        | 3564/18660 [01:33<06:19, 39.79it/s]
 19%|█▉        | 3569/18660 [01:33<06:17, 39.99it/s]
 19%|█▉        | 3573/18660 [01:33<06:21, 39.51it/s]
 19%|█▉        | 3577/18660 [01:33<06:36, 38.07it/s]
 19%|█▉  

[2m[36m(_objective pid=20061)[0m {'loss': 0.274, 'learning_rate': 1.2759342484214617e-05, 'epoch': 0.64}


[2m[36m(_objective pid=20061)[0m  21%|██▏       | 4003/18660 [01:44<06:44, 36.19it/s]
 21%|██▏       | 4007/18660 [01:44<06:39, 36.70it/s]
 21%|██▏       | 4011/18660 [01:44<06:36, 36.92it/s]
 22%|██▏       | 4016/18660 [01:45<06:26, 37.87it/s]
 22%|██▏       | 4020/18660 [01:45<06:22, 38.31it/s]
 22%|██▏       | 4024/18660 [01:45<06:20, 38.47it/s]
 22%|██▏       | 4028/18660 [01:45<06:21, 38.34it/s]
 22%|██▏       | 4032/18660 [01:45<06:26, 37.88it/s]
 22%|██▏       | 4036/18660 [01:45<06:37, 36.79it/s]
 22%|██▏       | 4040/18660 [01:45<06:32, 37.24it/s]
 22%|██▏       | 4044/18660 [01:45<06:34, 37.05it/s]
 22%|██▏       | 4049/18660 [01:45<06:40, 36.50it/s]
 22%|██▏       | 4053/18660 [01:46<06:34, 37.03it/s]
 22%|██▏       | 4057/18660 [01:46<06:27, 37.65it/s]
 22%|██▏       | 4061/18660 [01:46<06:31, 37.30it/s]
 22%|██▏       | 4065/18660 [01:46<06:32, 37.16it/s]
 22%|██▏       | 4069/18660 [01:46<06:32, 37.18it/s]
 22%|██▏       | 4073/18660 [01:46<06:34, 36.97it/s]
 22%|██▏ 

[2m[36m(_objective pid=20061)[0m {'loss': 0.2812, 'learning_rate': 1.2324167092529262e-05, 'epoch': 0.72}
Trial status: 1 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:36:52. Total running time: 16min 31s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

[2m[36m(_objective pid=20061)[0m                                                      24%|██▍       | 4500/18660 [01:57<06:07, 38.54it/s]
 24%|██▍       | 4501/18660 [01:57<06:04, 38.83it/s]
 24%|██▍       | 4505/18660 [01:57<06:05, 38.75it/s]
 24%|██▍       | 4509/18660 [01:57<06:09, 38.30it/s]
 24%|██▍       | 4513/18660 [01:57<06:09, 38.27it/s]
 24%|██▍       | 4518/18660 [01:58<06:02, 39.04it/s]
 24%|██▍       | 4523/18660 [01:58<05:54, 39.89it/s]
 24%|██▍       | 4527/18660 [01:58<06:08, 38.38it/s]
 24%|██▍       | 4531/18660 [01:58<06:04, 38.78it/s]
 24%|██▍       | 4536/18660 [01:58<05:59, 39.31it/s]
 24%|██▍       | 4541/18660 [01:58<05:54, 39.82it/s]
 24%|██▍       | 4545/18660 [01:58<05:55, 39.69it/s]
 24%|██▍       | 4550/18660 [01:58<05:54, 39.82it/s]
 24%|██▍       | 4555/18660 [01:59<05:52, 40.04it/s]
 24%|██▍       | 4560/18660 [01:59<05:53, 39.94it/s]
 24%|██▍       | 4564/18660 [01:59<06:06, 38.48it/s]
 24%|██▍       | 4568/18660 [01:59<06:10, 38.00it/s]
 25%|██▍

[2m[36m(_objective pid=20061)[0m {'loss': 0.2314, 'learning_rate': 1.1888991700843908e-05, 'epoch': 0.8}


[2m[36m(_objective pid=20061)[0m  27%|██▋       | 4997/18660 [02:10<05:38, 40.37it/s]                                                     27%|██▋       | 5000/18660 [02:10<05:38, 40.37it/s]
 27%|██▋       | 5002/18660 [02:10<05:40, 40.13it/s]
 27%|██▋       | 5007/18660 [02:10<05:43, 39.78it/s]
 27%|██▋       | 5011/18660 [02:10<05:58, 38.07it/s]
 27%|██▋       | 5015/18660 [02:10<05:56, 38.31it/s]
 27%|██▋       | 5019/18660 [02:11<05:55, 38.40it/s]
 27%|██▋       | 5023/18660 [02:11<05:52, 38.70it/s]
 27%|██▋       | 5027/18660 [02:11<06:02, 37.58it/s]
 27%|██▋       | 5032/18660 [02:11<05:52, 38.68it/s]
 27%|██▋       | 5037/18660 [02:11<05:47, 39.22it/s]
 27%|██▋       | 5042/18660 [02:11<05:47, 39.24it/s]
 27%|██▋       | 5047/18660 [02:11<05:44, 39.54it/s]
 27%|██▋       | 5051/18660 [02:11<05:46, 39.28it/s]
 27%|██▋       | 5055/18660 [02:11<05:52, 38.55it/s]
 27%|██▋       | 5059/18660 [02:12<05:56, 38.10it/s]
 27%|██▋       | 5064/18660 [02:12<05:52, 38.57it/s]
 27%|██▋

[2m[36m(_objective pid=20061)[0m {'loss': 0.2731, 'learning_rate': 1.1453816309158553e-05, 'epoch': 0.88}


 29%|██▉       | 5504/18660 [02:23<05:27, 40.19it/s]
 30%|██▉       | 5509/18660 [02:23<05:25, 40.43it/s]
 30%|██▉       | 5514/18660 [02:23<05:22, 40.71it/s]
 30%|██▉       | 5519/18660 [02:23<05:25, 40.34it/s]
 30%|██▉       | 5524/18660 [02:24<05:21, 40.85it/s]
 30%|██▉       | 5529/18660 [02:24<05:17, 41.37it/s]
 30%|██▉       | 5534/18660 [02:24<05:16, 41.52it/s]
 30%|██▉       | 5539/18660 [02:24<05:17, 41.30it/s]
 30%|██▉       | 5544/18660 [02:24<05:15, 41.56it/s]
 30%|██▉       | 5549/18660 [02:24<05:19, 41.02it/s]
 30%|██▉       | 5554/18660 [02:24<05:19, 41.04it/s]
 30%|██▉       | 5559/18660 [02:24<05:33, 39.26it/s]
 30%|██▉       | 5564/18660 [02:25<05:26, 40.07it/s]
 30%|██▉       | 5569/18660 [02:25<05:24, 40.33it/s]
 30%|██▉       | 5574/18660 [02:25<05:20, 40.86it/s]
 30%|██▉       | 5579/18660 [02:25<05:20, 40.76it/s]
 30%|██▉       | 5584/18660 [02:25<05:30, 39.57it/s]
 30%|██▉       | 5589/18660 [02:25<05:24, 40.24it/s]
 30%|██▉       | 5594/18660 [02:25<05:21, 40.6

Trial status: 1 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:37:22. Total running time: 17min 1s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00001   RUNNING          1.62407e-05                    3                     

 30%|███       | 5679/18660 [02:27<05:26, 39.82it/s]
 30%|███       | 5684/18660 [02:28<05:23, 40.10it/s]
 30%|███       | 5689/18660 [02:28<05:23, 40.08it/s]
 31%|███       | 5694/18660 [02:28<05:24, 39.95it/s]
 31%|███       | 5698/18660 [02:28<05:31, 39.11it/s]
 31%|███       | 5702/18660 [02:28<05:29, 39.32it/s]
 31%|███       | 5706/18660 [02:28<05:29, 39.35it/s]
 31%|███       | 5711/18660 [02:28<05:25, 39.73it/s]
 31%|███       | 5716/18660 [02:28<05:24, 39.89it/s]
 31%|███       | 5721/18660 [02:29<05:23, 40.00it/s]
 31%|███       | 5726/18660 [02:29<05:18, 40.56it/s]
 31%|███       | 5731/18660 [02:29<05:15, 41.02it/s]
 31%|███       | 5736/18660 [02:29<05:15, 40.97it/s]
 31%|███       | 5741/18660 [02:29<05:14, 41.01it/s]
 31%|███       | 5746/18660 [02:29<05:14, 41.07it/s]
 31%|███       | 5751/18660 [02:29<05:20, 40.33it/s]
 31%|███       | 5756/18660 [02:29<05:20, 40.31it/s]
 31%|███       | 5761/18660 [02:29<05:17, 40.60it/s]
 31%|███       | 5766/18660 [02:30<05:15, 40.8

[2m[36m(_objective pid=20061)[0m {'loss': 0.2832, 'learning_rate': 1.1018640917473196e-05, 'epoch': 0.96}


[2m[36m(_objective pid=20061)[0m  32%|███▏      | 6000/18660 [02:36<05:40, 37.20it/s]                                                     32%|███▏      | 6000/18660 [02:36<05:40, 37.20it/s]
 32%|███▏      | 6004/18660 [02:36<05:41, 37.10it/s]
 32%|███▏      | 6008/18660 [02:36<05:39, 37.29it/s]
 32%|███▏      | 6012/18660 [02:36<05:47, 36.37it/s]
 32%|███▏      | 6016/18660 [02:36<05:38, 37.32it/s]
 32%|███▏      | 6020/18660 [02:36<05:37, 37.42it/s]
 32%|███▏      | 6024/18660 [02:36<05:36, 37.57it/s]
 32%|███▏      | 6028/18660 [02:36<05:32, 37.96it/s]
 32%|███▏      | 6032/18660 [02:37<05:32, 37.94it/s]
 32%|███▏      | 6036/18660 [02:37<05:29, 38.37it/s]
 32%|███▏      | 6040/18660 [02:37<05:30, 38.19it/s]
 32%|███▏      | 6044/18660 [02:37<05:31, 38.04it/s]
 32%|███▏      | 6048/18660 [02:37<05:30, 38.12it/s]
 32%|███▏      | 6052/18660 [02:37<05:32, 37.89it/s]
 32%|███▏      | 6056/18660 [02:37<05:46, 36.41it/s]
 32%|███▏      | 6061/18660 [02:37<05:32, 37.90it/s]
 33%|███

Trial _objective_f556c_00001 finished iteration 1 at 2023-09-11 13:37:46. Total running time: 17min 25s
+-------------------------------------------------+
| Trial _objective_f556c_00001 result             |
+-------------------------------------------------+
| time_this_iter_s                        173.736 |
| time_total_s                            173.736 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.26993 |
| eval_runtime                             9.5673 |
| eval_samples_per_second                 433.455 |
| eval_steps_per_second                    13.588 |
| objective                               0.26993 |
+-------------------------------------------------+

[2m[36m(_objective pid=20061)[0m {'eval_loss': 0.26993221044540405, 'eval_runtime': 9.5673, 'eval_samples_per_second': 433.455, 'eval_steps_per_second': 13.588, 'epoch': 1.0}


[2m[36m(_objective pid=20061)[0m  33%|███▎      | 6221/18660 [02:52<2:35:03,  1.34it/s]
 33%|███▎      | 6225/18660 [02:52<1:54:01,  1.82it/s]
 33%|███▎      | 6229/18660 [02:52<1:23:27,  2.48it/s]
 33%|███▎      | 6233/18660 [02:53<1:01:06,  3.39it/s]
 33%|███▎      | 6237/18660 [02:53<45:09,  4.59it/s]  
 33%|███▎      | 6241/18660 [02:53<33:48,  6.12it/s]
 33%|███▎      | 6245/18660 [02:53<25:25,  8.14it/s]
 33%|███▎      | 6249/18660 [02:53<19:36, 10.55it/s]
 34%|███▎      | 6253/18660 [02:53<15:28, 13.37it/s]
 34%|███▎      | 6257/18660 [02:53<12:23, 16.67it/s]
 34%|███▎      | 6261/18660 [02:53<10:17, 20.09it/s]
 34%|███▎      | 6266/18660 [02:53<08:28, 24.36it/s]
 34%|███▎      | 6270/18660 [02:54<07:33, 27.30it/s]
 34%|███▎      | 6275/18660 [02:54<06:44, 30.62it/s]
 34%|███▎      | 6280/18660 [02:54<06:13, 33.14it/s]
 34%|███▎      | 6285/18660 [02:54<05:48, 35.47it/s]
 34%|███▎      | 6290/18660 [02:54<05:35, 36.85it/s]
 34%|███▎      | 6295/18660 [02:54<05:23, 38.27it/s]

Trial status: 1 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:37:52. Total running time: 17min 31s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00001   RUNNING          1.62407e-05                    3                    

[2m[36m(_objective pid=20061)[0m  34%|███▍      | 6424/18660 [02:57<05:03, 40.28it/s]
 34%|███▍      | 6429/18660 [02:58<05:02, 40.49it/s]
 34%|███▍      | 6434/18660 [02:58<05:02, 40.48it/s]
 35%|███▍      | 6439/18660 [02:58<04:59, 40.83it/s]
 35%|███▍      | 6444/18660 [02:58<05:01, 40.53it/s]
 35%|███▍      | 6449/18660 [02:58<05:01, 40.50it/s]
 35%|███▍      | 6454/18660 [02:58<05:01, 40.48it/s]
 35%|███▍      | 6459/18660 [02:58<05:13, 38.90it/s]
 35%|███▍      | 6464/18660 [02:58<05:07, 39.62it/s]
 35%|███▍      | 6469/18660 [02:59<05:02, 40.26it/s]
 35%|███▍      | 6474/18660 [02:59<05:07, 39.69it/s]
 35%|███▍      | 6478/18660 [02:59<05:08, 39.49it/s]
 35%|███▍      | 6482/18660 [02:59<05:21, 37.84it/s]
 35%|███▍      | 6486/18660 [02:59<05:20, 37.96it/s]
 35%|███▍      | 6491/18660 [02:59<05:12, 38.94it/s]
 35%|███▍      | 6495/18660 [02:59<05:15, 38.53it/s]


[2m[36m(_objective pid=20061)[0m {'loss': 0.217, 'learning_rate': 1.058346552578784e-05, 'epoch': 1.05}


[2m[36m(_objective pid=20061)[0m  35%|███▍      | 6500/18660 [02:59<05:08, 39.45it/s]                                                     35%|███▍      | 6500/18660 [02:59<05:08, 39.45it/s]
 35%|███▍      | 6504/18660 [02:59<05:07, 39.48it/s]
 35%|███▍      | 6509/18660 [03:00<05:03, 40.03it/s]
 35%|███▍      | 6514/18660 [03:00<05:02, 40.14it/s]
 35%|███▍      | 6519/18660 [03:00<04:59, 40.53it/s]
 35%|███▍      | 6524/18660 [03:00<05:09, 39.25it/s]
 35%|███▍      | 6528/18660 [03:00<05:07, 39.42it/s]
 35%|███▌      | 6533/18660 [03:00<05:05, 39.76it/s]
 35%|███▌      | 6538/18660 [03:00<05:02, 40.02it/s]
 35%|███▌      | 6543/18660 [03:00<05:03, 39.87it/s]
 35%|███▌      | 6548/18660 [03:01<05:00, 40.31it/s]
 35%|███▌      | 6553/18660 [03:01<04:59, 40.38it/s]
 35%|███▌      | 6558/18660 [03:01<05:07, 39.37it/s]
 35%|███▌      | 6563/18660 [03:01<05:04, 39.73it/s]
 35%|███▌      | 6567/18660 [03:01<05:06, 39.46it/s]
 35%|███▌      | 6571/18660 [03:01<05:05, 39.54it/s]
 35%|███

[2m[36m(_objective pid=20061)[0m {'loss': 0.137, 'learning_rate': 1.0148290134102487e-05, 'epoch': 1.13}


 38%|███▊      | 7000/18660 [03:12<04:44, 41.00it/s]
 38%|███▊      | 7005/18660 [03:12<04:45, 40.78it/s]
 38%|███▊      | 7010/18660 [03:12<04:44, 40.94it/s]
 38%|███▊      | 7015/18660 [03:13<04:45, 40.84it/s]
 38%|███▊      | 7020/18660 [03:13<04:44, 40.93it/s]
 38%|███▊      | 7025/18660 [03:13<04:47, 40.44it/s]
 38%|███▊      | 7030/18660 [03:13<04:51, 39.86it/s]
 38%|███▊      | 7035/18660 [03:13<04:49, 40.14it/s]
 38%|███▊      | 7040/18660 [03:13<04:47, 40.35it/s]
 38%|███▊      | 7045/18660 [03:13<04:46, 40.61it/s]
 38%|███▊      | 7050/18660 [03:13<04:59, 38.83it/s]
 38%|███▊      | 7055/18660 [03:14<05:01, 38.54it/s]
 38%|███▊      | 7060/18660 [03:14<04:56, 39.10it/s]
 38%|███▊      | 7065/18660 [03:14<04:50, 39.89it/s]
 38%|███▊      | 7070/18660 [03:14<04:48, 40.22it/s]
 38%|███▊      | 7075/18660 [03:14<04:51, 39.78it/s]
 38%|███▊      | 7079/18660 [03:14<04:51, 39.77it/s]
 38%|███▊      | 7083/18660 [03:14<04:51, 39.78it/s]
 38%|███▊      | 7088/18660 [03:14<04:47, 40.2

[2m[36m(_objective pid=20061)[0m {'loss': 0.1634, 'learning_rate': 9.713114742417131e-06, 'epoch': 1.21}


[2m[36m(_objective pid=20061)[0m  40%|████      | 7500/18660 [03:25<04:55, 37.73it/s]                                                     40%|████      | 7500/18660 [03:25<04:55, 37.73it/s]
 40%|████      | 7504/18660 [03:25<04:55, 37.72it/s]
 40%|████      | 7508/18660 [03:25<04:51, 38.21it/s]
 40%|████      | 7512/18660 [03:25<04:54, 37.86it/s]
 40%|████      | 7516/18660 [03:26<04:52, 38.08it/s]
 40%|████      | 7521/18660 [03:26<04:43, 39.28it/s]
 40%|████      | 7526/18660 [03:26<04:37, 40.07it/s]
 40%|████      | 7530/18660 [03:26<04:38, 40.02it/s]
 40%|████      | 7535/18660 [03:26<04:35, 40.33it/s]
 40%|████      | 7540/18660 [03:26<04:47, 38.73it/s]
 40%|████      | 7545/18660 [03:26<04:42, 39.29it/s]
 40%|████      | 7550/18660 [03:26<04:38, 39.93it/s]
 40%|████      | 7555/18660 [03:26<04:36, 40.18it/s]
 41%|████      | 7560/18660 [03:27<04:33, 40.53it/s]
 41%|████      | 7565/18660 [03:27<04:37, 39.91it/s]
 41%|████      | 7570/18660 [03:27<04:34, 40.37it/s]
 41%|███

Trial status: 1 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:38:22. Total running time: 18min 1s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00001   RUNNING          1.62407e-05                    3                     

[2m[36m(_objective pid=20061)[0m  41%|████      | 7593/18660 [03:27<04:40, 39.45it/s]
 41%|████      | 7598/18660 [03:28<04:36, 40.05it/s]
 41%|████      | 7603/18660 [03:28<04:33, 40.47it/s]
 41%|████      | 7608/18660 [03:28<04:34, 40.32it/s]
 41%|████      | 7613/18660 [03:28<04:32, 40.54it/s]
 41%|████      | 7618/18660 [03:28<04:31, 40.71it/s]
 41%|████      | 7623/18660 [03:28<04:34, 40.19it/s]
 41%|████      | 7628/18660 [03:28<04:33, 40.27it/s]
 41%|████      | 7633/18660 [03:28<04:38, 39.55it/s]
 41%|████      | 7638/18660 [03:29<04:35, 40.04it/s]
 41%|████      | 7643/18660 [03:29<04:33, 40.31it/s]
 41%|████      | 7648/18660 [03:29<04:46, 38.41it/s]
 41%|████      | 7653/18660 [03:29<04:42, 38.93it/s]
 41%|████      | 7657/18660 [03:29<04:42, 38.95it/s]
 41%|████      | 7662/18660 [03:29<04:38, 39.45it/s]
 41%|████      | 7667/18660 [03:29<04:36, 39.83it/s]
 41%|████      | 7672/18660 [03:29<04:34, 40.01it/s]
 41%|████      | 7677/18660 [03:30<04:42, 38.89it/s]
 41%|████

[2m[36m(_objective pid=20061)[0m {'loss': 0.1504, 'learning_rate': 9.277939350731776e-06, 'epoch': 1.29}


[2m[36m(_objective pid=20061)[0m  43%|████▎     | 8002/18660 [03:38<04:31, 39.24it/s]
 43%|████▎     | 8006/18660 [03:38<04:57, 35.85it/s]
 43%|████▎     | 8010/18660 [03:38<04:47, 36.98it/s]
 43%|████▎     | 8014/18660 [03:38<04:42, 37.71it/s]
 43%|████▎     | 8018/18660 [03:38<04:51, 36.54it/s]
 43%|████▎     | 8022/18660 [03:38<04:45, 37.29it/s]
 43%|████▎     | 8027/18660 [03:38<04:38, 38.18it/s]
 43%|████▎     | 8031/18660 [03:39<04:39, 38.07it/s]
 43%|████▎     | 8035/18660 [03:39<04:37, 38.25it/s]
 43%|████▎     | 8039/18660 [03:39<04:35, 38.58it/s]
 43%|████▎     | 8043/18660 [03:39<04:35, 38.59it/s]
 43%|████▎     | 8047/18660 [03:39<04:37, 38.27it/s]
 43%|████▎     | 8052/18660 [03:39<04:32, 38.92it/s]
 43%|████▎     | 8056/18660 [03:39<04:31, 39.02it/s]
 43%|████▎     | 8060/18660 [03:39<04:30, 39.12it/s]
 43%|████▎     | 8064/18660 [03:39<04:29, 39.29it/s]
 43%|████▎     | 8068/18660 [03:40<04:31, 39.06it/s]
 43%|████▎     | 8072/18660 [03:40<04:36, 38.23it/s]
 43%|████

[2m[36m(_objective pid=20061)[0m {'loss': 0.1556, 'learning_rate': 8.842763959046419e-06, 'epoch': 1.37}


[2m[36m(_objective pid=20061)[0m                                                      46%|████▌     | 8500/18660 [03:51<04:17, 39.48it/s] 46%|████▌     | 8502/18660 [03:51<04:14, 39.94it/s]
 46%|████▌     | 8507/18660 [03:51<04:13, 39.98it/s]
 46%|████▌     | 8512/18660 [03:51<04:13, 40.07it/s]
 46%|████▌     | 8517/18660 [03:51<04:13, 40.08it/s]
 46%|████▌     | 8522/18660 [03:51<04:12, 40.16it/s]
 46%|████▌     | 8527/18660 [03:51<04:12, 40.10it/s]
 46%|████▌     | 8532/18660 [03:51<04:12, 40.18it/s]
 46%|████▌     | 8537/18660 [03:52<04:14, 39.82it/s]
 46%|████▌     | 8541/18660 [03:52<04:15, 39.57it/s]
 46%|████▌     | 8545/18660 [03:52<04:27, 37.85it/s]
 46%|████▌     | 8549/18660 [03:52<04:24, 38.18it/s]
 46%|████▌     | 8553/18660 [03:52<04:23, 38.34it/s]
 46%|████▌     | 8557/18660 [03:52<04:23, 38.29it/s]
 46%|████▌     | 8561/18660 [03:52<04:28, 37.59it/s]
 46%|████▌     | 8565/18660 [03:52<04:34, 36.77it/s]
 46%|████▌     | 8570/18660 [03:52<04:26, 37.87it/s]
 46%|███

Trial status: 1 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:38:52. Total running time: 18min 31s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00001   RUNNING          1.62407e-05                    3                    

 47%|████▋     | 8765/18660 [03:58<04:18, 38.30it/s]
 47%|████▋     | 8769/18660 [03:58<04:21, 37.83it/s]
 47%|████▋     | 8773/18660 [03:58<04:26, 37.08it/s]
 47%|████▋     | 8777/18660 [03:58<04:23, 37.44it/s]
 47%|████▋     | 8781/18660 [03:58<04:30, 36.58it/s]
 47%|████▋     | 8786/18660 [03:58<04:20, 37.97it/s]
 47%|████▋     | 8791/18660 [03:58<04:14, 38.80it/s]
 47%|████▋     | 8796/18660 [03:58<04:10, 39.33it/s]
 47%|████▋     | 8801/18660 [03:58<04:10, 39.38it/s]
 47%|████▋     | 8806/18660 [03:59<04:13, 38.87it/s]
 47%|████▋     | 8811/18660 [03:59<04:07, 39.76it/s]
 47%|████▋     | 8815/18660 [03:59<04:09, 39.53it/s]
 47%|████▋     | 8820/18660 [03:59<04:06, 39.97it/s]
 47%|████▋     | 8824/18660 [03:59<04:06, 39.96it/s]
 47%|████▋     | 8829/18660 [03:59<04:07, 39.71it/s]
 47%|████▋     | 8833/18660 [03:59<04:07, 39.68it/s]
 47%|████▋     | 8837/18660 [03:59<04:08, 39.55it/s]
 47%|████▋     | 8842/18660 [04:00<04:07, 39.70it/s]
 47%|████▋     | 8846/18660 [04:00<04:07, 39.6

[2m[36m(_objective pid=20061)[0m {'loss': 0.1531, 'learning_rate': 8.407588567361065e-06, 'epoch': 1.45}


[2m[36m(_objective pid=20061)[0m  48%|████▊     | 9000/18660 [04:04<04:06, 39.18it/s]                                                     48%|████▊     | 9000/18660 [04:04<04:06, 39.18it/s]
 48%|████▊     | 9004/18660 [04:04<04:05, 39.32it/s]
 48%|████▊     | 9009/18660 [04:04<04:01, 40.01it/s]
 48%|████▊     | 9014/18660 [04:04<04:00, 40.06it/s]
 48%|████▊     | 9019/18660 [04:04<03:57, 40.55it/s]
 48%|████▊     | 9024/18660 [04:04<03:58, 40.35it/s]
 48%|████▊     | 9029/18660 [04:04<03:59, 40.16it/s]
 48%|████▊     | 9034/18660 [04:04<03:57, 40.46it/s]
 48%|████▊     | 9039/18660 [04:04<03:56, 40.72it/s]
 48%|████▊     | 9044/18660 [04:05<03:54, 40.96it/s]
 48%|████▊     | 9049/18660 [04:05<03:54, 41.00it/s]
 49%|████▊     | 9054/18660 [04:05<03:55, 40.86it/s]
 49%|████▊     | 9059/18660 [04:05<03:57, 40.40it/s]
 49%|████▊     | 9064/18660 [04:05<04:09, 38.44it/s]
 49%|████▊     | 9069/18660 [04:05<04:06, 38.92it/s]
 49%|████▊     | 9073/18660 [04:05<04:06, 38.87it/s]
 49%|███

[2m[36m(_objective pid=20061)[0m {'loss': 0.1817, 'learning_rate': 7.97241317567571e-06, 'epoch': 1.53}


[2m[36m(_objective pid=20061)[0m  51%|█████     | 9498/18660 [04:17<03:59, 38.26it/s]                                                     51%|█████     | 9500/18660 [04:17<03:59, 38.26it/s]
 51%|█████     | 9502/18660 [04:17<03:58, 38.46it/s]
 51%|█████     | 9506/18660 [04:17<03:57, 38.62it/s]
 51%|█████     | 9510/18660 [04:17<03:55, 38.84it/s]
 51%|█████     | 9515/18660 [04:17<03:51, 39.47it/s]
 51%|█████     | 9520/18660 [04:17<03:48, 40.03it/s]
 51%|█████     | 9524/18660 [04:17<03:49, 39.74it/s]
 51%|█████     | 9528/18660 [04:17<03:52, 39.34it/s]
 51%|█████     | 9533/18660 [04:17<03:49, 39.68it/s]
 51%|█████     | 9538/18660 [04:18<03:57, 38.36it/s]
 51%|█████     | 9542/18660 [04:18<03:55, 38.78it/s]
 51%|█████     | 9547/18660 [04:18<03:51, 39.35it/s]
 51%|█████     | 9551/18660 [04:18<03:53, 38.99it/s]
 51%|█████     | 9555/18660 [04:18<03:53, 39.02it/s]
 51%|█████     | 9560/18660 [04:18<03:50, 39.49it/s]
 51%|█████▏    | 9565/18660 [04:18<03:47, 39.94it/s]
 51%|███

Trial status: 1 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:39:22. Total running time: 19min 1s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00001   RUNNING          1.62407e-05                    3                     

[2m[36m(_objective pid=20061)[0m  53%|█████▎    | 9919/18660 [04:28<03:52, 37.55it/s]
 53%|█████▎    | 9923/18660 [04:28<03:55, 37.14it/s]
 53%|█████▎    | 9927/18660 [04:28<03:57, 36.83it/s]
 53%|█████▎    | 9931/18660 [04:28<03:56, 36.89it/s]
 53%|█████▎    | 9935/18660 [04:28<03:53, 37.43it/s]
 53%|█████▎    | 9939/18660 [04:28<03:54, 37.19it/s]
 53%|█████▎    | 9943/18660 [04:28<03:57, 36.68it/s]
 53%|█████▎    | 9947/18660 [04:28<03:55, 37.04it/s]
 53%|█████▎    | 9951/18660 [04:28<03:50, 37.75it/s]
 53%|█████▎    | 9955/18660 [04:28<03:49, 37.92it/s]
 53%|█████▎    | 9959/18660 [04:29<03:48, 38.03it/s]
 53%|█████▎    | 9963/18660 [04:29<03:49, 37.86it/s]
 53%|█████▎    | 9967/18660 [04:29<03:48, 38.02it/s]
 53%|█████▎    | 9971/18660 [04:29<03:46, 38.32it/s]
 53%|█████▎    | 9975/18660 [04:29<04:08, 34.91it/s]
 53%|█████▎    | 9979/18660 [04:29<04:04, 35.48it/s]
 53%|█████▎    | 9983/18660 [04:29<03:57, 36.55it/s]
 54%|█████▎    | 9988/18660 [04:29<03:49, 37.81it/s]
 54%|████

[2m[36m(_objective pid=20061)[0m {'loss': 0.1349, 'learning_rate': 7.537237783990354e-06, 'epoch': 1.61}


[2m[36m(_objective pid=20061)[0m  54%|█████▎    | 10004/18660 [04:30<03:48, 37.96it/s]
 54%|█████▎    | 10008/18660 [04:30<03:47, 38.07it/s]
 54%|█████▎    | 10012/18660 [04:30<03:44, 38.46it/s]
 54%|█████▎    | 10017/18660 [04:30<03:40, 39.12it/s]
 54%|█████▎    | 10022/18660 [04:30<03:39, 39.41it/s]
 54%|█████▎    | 10027/18660 [04:30<03:37, 39.73it/s]
 54%|█████▍    | 10032/18660 [04:31<03:39, 39.24it/s]
 54%|█████▍    | 10036/18660 [04:31<03:41, 38.97it/s]
 54%|█████▍    | 10041/18660 [04:31<03:48, 37.79it/s]
 54%|█████▍    | 10045/18660 [04:31<03:47, 37.90it/s]
 54%|█████▍    | 10050/18660 [04:31<03:41, 38.85it/s]
 54%|█████▍    | 10054/18660 [04:31<03:47, 37.78it/s]
 54%|█████▍    | 10059/18660 [04:31<03:42, 38.58it/s]
 54%|█████▍    | 10064/18660 [04:31<03:38, 39.27it/s]
 54%|█████▍    | 10069/18660 [04:31<03:36, 39.67it/s]
 54%|█████▍    | 10074/18660 [04:32<03:36, 39.73it/s]
 54%|█████▍    | 10079/18660 [04:32<03:35, 39.84it/s]
 54%|█████▍    | 10084/18660 [04:32<03:33, 40

[2m[36m(_objective pid=20061)[0m {'loss': 0.1863, 'learning_rate': 7.102062392304999e-06, 'epoch': 1.69}


[2m[36m(_objective pid=20061)[0m  56%|█████▋    | 10503/18660 [04:43<03:48, 35.66it/s]
 56%|█████▋    | 10507/18660 [04:43<03:48, 35.71it/s]
 56%|█████▋    | 10511/18660 [04:43<03:45, 36.18it/s]
 56%|█████▋    | 10515/18660 [04:43<03:41, 36.71it/s]
 56%|█████▋    | 10519/18660 [04:43<03:38, 37.18it/s]
 56%|█████▋    | 10523/18660 [04:43<03:39, 37.08it/s]
 56%|█████▋    | 10527/18660 [04:43<03:36, 37.52it/s]
 56%|█████▋    | 10531/18660 [04:43<03:35, 37.74it/s]
 56%|█████▋    | 10535/18660 [04:43<03:31, 38.33it/s]
 56%|█████▋    | 10539/18660 [04:44<03:30, 38.56it/s]
 57%|█████▋    | 10544/18660 [04:44<03:27, 39.13it/s]
 57%|█████▋    | 10548/18660 [04:44<03:26, 39.22it/s]
 57%|█████▋    | 10552/18660 [04:44<03:27, 39.09it/s]
 57%|█████▋    | 10556/18660 [04:44<03:32, 38.12it/s]
 57%|█████▋    | 10560/18660 [04:44<03:33, 37.96it/s]
 57%|█████▋    | 10564/18660 [04:44<03:36, 37.34it/s]
 57%|█████▋    | 10568/18660 [04:44<03:40, 36.76it/s]
 57%|█████▋    | 10572/18660 [04:44<03:38, 37

[2m[36m(_objective pid=20061)[0m {'loss': 0.2028, 'learning_rate': 6.6668870006196435e-06, 'epoch': 1.77}


[2m[36m(_objective pid=20061)[0m  59%|█████▉    | 11006/18660 [04:56<03:12, 39.68it/s]
 59%|█████▉    | 11010/18660 [04:56<03:12, 39.70it/s]
 59%|█████▉    | 11014/18660 [04:56<03:14, 39.29it/s]
 59%|█████▉    | 11018/18660 [04:56<03:15, 39.19it/s]
 59%|█████▉    | 11022/18660 [04:56<03:14, 39.18it/s]
 59%|█████▉    | 11026/18660 [04:56<03:15, 39.11it/s]
 59%|█████▉    | 11030/18660 [04:56<03:18, 38.52it/s]
 59%|█████▉    | 11034/18660 [04:56<03:18, 38.43it/s]
 59%|█████▉    | 11038/18660 [04:56<03:24, 37.34it/s]
 59%|█████▉    | 11042/18660 [04:57<03:20, 38.06it/s]
 59%|█████▉    | 11046/18660 [04:57<03:18, 38.36it/s]
 59%|█████▉    | 11050/18660 [04:57<03:20, 37.97it/s]
 59%|█████▉    | 11054/18660 [04:57<03:18, 38.26it/s]
 59%|█████▉    | 11058/18660 [04:57<03:17, 38.41it/s]
 59%|█████▉    | 11062/18660 [04:57<03:19, 38.08it/s]
 59%|█████▉    | 11066/18660 [04:57<03:20, 37.88it/s]
 59%|█████▉    | 11070/18660 [04:57<03:19, 38.06it/s]


Trial status: 1 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:39:52. Total running time: 19min 31s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00001   RUNNING          1.62407e-05                    3                    

[2m[36m(_objective pid=20061)[0m  59%|█████▉    | 11074/18660 [04:57<03:24, 37.08it/s]
 59%|█████▉    | 11079/18660 [04:58<03:18, 38.10it/s]
 59%|█████▉    | 11083/18660 [04:58<03:16, 38.58it/s]
 59%|█████▉    | 11087/18660 [04:58<03:16, 38.50it/s]
 59%|█████▉    | 11091/18660 [04:58<03:15, 38.76it/s]
 59%|█████▉    | 11095/18660 [04:58<03:15, 38.67it/s]
 59%|█████▉    | 11099/18660 [04:58<03:14, 38.86it/s]
 60%|█████▉    | 11103/18660 [04:58<03:18, 38.01it/s]
 60%|█████▉    | 11107/18660 [04:58<03:21, 37.42it/s]
 60%|█████▉    | 11111/18660 [04:58<03:32, 35.54it/s]
 60%|█████▉    | 11115/18660 [04:58<03:30, 35.92it/s]
 60%|█████▉    | 11119/18660 [04:59<03:28, 36.11it/s]
 60%|█████▉    | 11123/18660 [04:59<03:28, 36.16it/s]
 60%|█████▉    | 11127/18660 [04:59<03:29, 35.91it/s]
 60%|█████▉    | 11131/18660 [04:59<03:30, 35.76it/s]
 60%|█████▉    | 11135/18660 [04:59<03:30, 35.76it/s]
 60%|█████▉    | 11139/18660 [04:59<03:28, 36.01it/s]
 60%|█████▉    | 11143/18660 [04:59<03:29, 35

[2m[36m(_objective pid=20061)[0m {'loss': 0.1986, 'learning_rate': 6.231711608934288e-06, 'epoch': 1.85}


[2m[36m(_objective pid=20061)[0m                                                       62%|██████▏   | 11500/18660 [05:09<03:04, 38.82it/s] 62%|██████▏   | 11501/18660 [05:09<03:03, 39.04it/s]
 62%|██████▏   | 11505/18660 [05:09<03:03, 38.98it/s]
 62%|██████▏   | 11509/18660 [05:09<03:02, 39.08it/s]
 62%|██████▏   | 11513/18660 [05:09<03:03, 38.89it/s]
 62%|██████▏   | 11517/18660 [05:09<03:03, 38.90it/s]
 62%|██████▏   | 11521/18660 [05:09<03:05, 38.53it/s]
 62%|██████▏   | 11525/18660 [05:09<03:03, 38.80it/s]
 62%|██████▏   | 11529/18660 [05:09<03:02, 38.99it/s]
 62%|██████▏   | 11533/18660 [05:09<03:02, 39.00it/s]
 62%|██████▏   | 11538/18660 [05:10<03:01, 39.19it/s]
 62%|██████▏   | 11543/18660 [05:10<02:59, 39.64it/s]
 62%|██████▏   | 11548/18660 [05:10<02:58, 39.77it/s]
 62%|██████▏   | 11553/18660 [05:10<02:58, 39.75it/s]
 62%|██████▏   | 11557/18660 [05:10<03:00, 39.44it/s]
 62%|██████▏   | 11561/18660 [05:10<03:01, 39.03it/s]
 62%|██████▏   | 11565/18660 [05:10<03:03, 3

[2m[36m(_objective pid=20061)[0m {'loss': 0.1228, 'learning_rate': 5.796536217248933e-06, 'epoch': 1.93}


[2m[36m(_objective pid=20061)[0m  64%|██████▍   | 12002/18660 [05:22<02:47, 39.86it/s]
 64%|██████▍   | 12006/18660 [05:22<02:46, 39.89it/s]
 64%|██████▍   | 12010/18660 [05:22<02:46, 39.90it/s]
 64%|██████▍   | 12015/18660 [05:22<02:45, 40.24it/s]
 64%|██████▍   | 12020/18660 [05:22<02:43, 40.56it/s]
 64%|██████▍   | 12025/18660 [05:23<02:46, 39.80it/s]
 64%|██████▍   | 12030/18660 [05:23<02:46, 39.70it/s]
 64%|██████▍   | 12035/18660 [05:23<02:44, 40.26it/s]
 65%|██████▍   | 12040/18660 [05:23<02:45, 40.05it/s]
 65%|██████▍   | 12045/18660 [05:23<02:44, 40.22it/s]
 65%|██████▍   | 12050/18660 [05:23<02:47, 39.56it/s]
 65%|██████▍   | 12055/18660 [05:23<02:49, 38.89it/s]
 65%|██████▍   | 12060/18660 [05:23<02:46, 39.63it/s]
 65%|██████▍   | 12065/18660 [05:24<02:45, 39.87it/s]
 65%|██████▍   | 12069/18660 [05:24<04:38, 23.65it/s]
 65%|██████▍   | 12073/18660 [05:24<04:09, 26.36it/s]
 65%|██████▍   | 12077/18660 [05:24<03:46, 29.10it/s]
 65%|██████▍   | 12081/18660 [05:24<03:30, 31

Trial status: 1 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:40:22. Total running time: 20min 1s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00001   RUNNING          1.62407e-05                    3                     

 65%|██████▌   | 12201/18660 [05:28<02:51, 37.58it/s]
 65%|██████▌   | 12206/18660 [05:28<02:48, 38.35it/s]
 65%|██████▌   | 12210/18660 [05:28<02:47, 38.46it/s]
 65%|██████▌   | 12214/18660 [05:28<02:47, 38.56it/s]
 65%|██████▌   | 12218/18660 [05:28<02:46, 38.80it/s]
 65%|██████▌   | 12222/18660 [05:28<02:45, 38.97it/s]
 66%|██████▌   | 12226/18660 [05:28<02:45, 38.90it/s]
 66%|██████▌   | 12230/18660 [05:28<02:46, 38.68it/s]
 66%|██████▌   | 12234/18660 [05:28<02:45, 38.76it/s]
 66%|██████▌   | 12238/18660 [05:28<02:44, 39.12it/s]
 66%|██████▌   | 12242/18660 [05:29<02:44, 38.94it/s]
 66%|██████▌   | 12246/18660 [05:29<02:52, 37.23it/s]
 66%|██████▌   | 12250/18660 [05:29<02:50, 37.60it/s]
 66%|██████▌   | 12254/18660 [05:29<02:47, 38.14it/s]
 66%|██████▌   | 12258/18660 [05:29<02:49, 37.70it/s]
 66%|██████▌   | 12262/18660 [05:29<02:50, 37.60it/s]
 66%|██████▌   | 12266/18660 [05:29<02:51, 37.32it/s]
 66%|██████▌   | 12270/18660 [05:29<02:51, 37.18it/s]
 66%|██████▌   | 12274/18660

Trial _objective_f556c_00001 finished iteration 2 at 2023-09-11 13:40:38. Total running time: 20min 17s
+-------------------------------------------------+
| Trial _objective_f556c_00001 result             |
+-------------------------------------------------+
| time_this_iter_s                        172.768 |
| time_total_s                            346.504 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                               0.22169 |
| eval_runtime                             9.5769 |
| eval_samples_per_second                 433.019 |
| eval_steps_per_second                    13.574 |
| objective                               0.22169 |
+-------------------------------------------------+

[2m[36m(_objective pid=20061)[0m {'eval_loss': 0.22168810665607452, 'eval_runtime': 9.5769, 'eval_samples_per_second': 433.019, 'eval_steps_per_second': 13.574, 'epoch': 2.0}


[2m[36m(_objective pid=20061)[0m                                                      
[2m[36m(_objective pid=20061)[0m                                                  [A 67%|██████▋   | 12440/18660 [05:44<02:52, 36.10it/s]
[2m[36m(_objective pid=20061)[0m 100%|██████████| 130/130 [00:09<00:00, 11.96it/s][A
                                                 [A
 67%|██████▋   | 12442/18660 [05:45<1:27:17,  1.19it/s]
 67%|██████▋   | 12446/18660 [05:45<1:01:55,  1.67it/s]
 67%|██████▋   | 12450/18660 [05:45<44:12,  2.34it/s]  
 67%|██████▋   | 12454/18660 [05:45<31:47,  3.25it/s]
 67%|██████▋   | 12458/18660 [05:45<23:05,  4.48it/s]
 67%|██████▋   | 12462/18660 [05:46<17:00,  6.08it/s]
 67%|██████▋   | 12466/18660 [05:46<12:47,  8.07it/s]
 67%|██████▋   | 12470/18660 [05:46<09:50, 10.49it/s]
 67%|██████▋   | 12474/18660 [05:46<07:50, 13.16it/s]
 67%|██████▋   | 12478/18660 [05:46<06:19, 16.28it/s]
 67%|██████▋   | 12482/18660 [05:46<05:18, 19.38it/s]
 67%|██████▋   | 1248

[2m[36m(_objective pid=20061)[0m {'loss': 0.1941, 'learning_rate': 5.361360825563578e-06, 'epoch': 2.01}


[2m[36m(_objective pid=20061)[0m  67%|██████▋   | 12498/18660 [05:47<03:24, 30.18it/s]                                                      67%|██████▋   | 12500/18660 [05:47<03:24, 30.18it/s]
 67%|██████▋   | 12502/18660 [05:47<03:13, 31.85it/s]
 67%|██████▋   | 12506/18660 [05:47<03:02, 33.70it/s]
 67%|██████▋   | 12510/18660 [05:47<02:57, 34.72it/s]
 67%|██████▋   | 12514/18660 [05:47<02:54, 35.29it/s]
 67%|██████▋   | 12518/18660 [05:47<02:52, 35.59it/s]
 67%|██████▋   | 12522/18660 [05:47<02:49, 36.20it/s]
 67%|██████▋   | 12526/18660 [05:47<02:49, 36.10it/s]
 67%|██████▋   | 12530/18660 [05:47<02:46, 36.89it/s]
 67%|██████▋   | 12534/18660 [05:48<02:44, 37.22it/s]
 67%|██████▋   | 12538/18660 [05:48<02:48, 36.43it/s]
 67%|██████▋   | 12542/18660 [05:48<02:45, 37.08it/s]
 67%|██████▋   | 12546/18660 [05:48<02:43, 37.29it/s]
 67%|██████▋   | 12550/18660 [05:48<02:49, 35.95it/s]
 67%|██████▋   | 12554/18660 [05:48<02:48, 36.31it/s]
 67%|██████▋   | 12558/18660 [05:48<02:47, 3

Trial status: 1 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:40:52. Total running time: 20min 31s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00001   RUNNING          1.62407e-05                    3                    

[2m[36m(_objective pid=20061)[0m  69%|██████▉   | 12890/18660 [05:57<02:44, 34.97it/s]
 69%|██████▉   | 12894/18660 [05:58<02:43, 35.21it/s]
 69%|██████▉   | 12898/18660 [05:58<02:41, 35.65it/s]
 69%|██████▉   | 12902/18660 [05:58<02:41, 35.63it/s]
 69%|██████▉   | 12906/18660 [05:58<02:40, 35.94it/s]
 69%|██████▉   | 12910/18660 [05:58<02:39, 35.95it/s]
 69%|██████▉   | 12914/18660 [05:58<02:39, 35.99it/s]
 69%|██████▉   | 12918/18660 [05:58<02:41, 35.62it/s]
 69%|██████▉   | 12922/18660 [05:58<02:39, 35.97it/s]
 69%|██████▉   | 12926/18660 [05:58<02:37, 36.48it/s]
 69%|██████▉   | 12930/18660 [05:59<02:38, 36.23it/s]
 69%|██████▉   | 12934/18660 [05:59<02:38, 36.20it/s]
 69%|██████▉   | 12938/18660 [05:59<02:38, 36.08it/s]
 69%|██████▉   | 12942/18660 [05:59<02:37, 36.33it/s]
 69%|██████▉   | 12946/18660 [05:59<02:37, 36.28it/s]
 69%|██████▉   | 12950/18660 [05:59<02:39, 35.85it/s]
 69%|██████▉   | 12954/18660 [05:59<02:42, 35.04it/s]
 69%|██████▉   | 12958/18660 [05:59<02:41, 35

[2m[36m(_objective pid=20061)[0m {'loss': 0.1044, 'learning_rate': 4.926185433878222e-06, 'epoch': 2.09}


[2m[36m(_objective pid=20061)[0m  70%|██████▉   | 13002/18660 [06:01<02:44, 34.42it/s]
 70%|██████▉   | 13006/18660 [06:01<02:44, 34.31it/s]
 70%|██████▉   | 13010/18660 [06:01<02:44, 34.30it/s]
 70%|██████▉   | 13014/18660 [06:01<02:43, 34.61it/s]
 70%|██████▉   | 13018/18660 [06:01<02:38, 35.49it/s]
 70%|██████▉   | 13022/18660 [06:01<02:41, 34.96it/s]
 70%|██████▉   | 13026/18660 [06:01<02:41, 34.80it/s]
 70%|██████▉   | 13030/18660 [06:01<02:49, 33.21it/s]
 70%|██████▉   | 13034/18660 [06:02<02:51, 32.80it/s]
 70%|██████▉   | 13038/18660 [06:02<02:48, 33.35it/s]
 70%|██████▉   | 13042/18660 [06:02<02:46, 33.69it/s]
 70%|██████▉   | 13046/18660 [06:02<02:51, 32.79it/s]
 70%|██████▉   | 13050/18660 [06:02<02:48, 33.28it/s]
 70%|██████▉   | 13054/18660 [06:02<02:47, 33.54it/s]
 70%|██████▉   | 13058/18660 [06:02<02:50, 32.86it/s]
 70%|███████   | 13062/18660 [06:02<02:45, 33.77it/s]
 70%|███████   | 13066/18660 [06:03<02:42, 34.44it/s]
 70%|███████   | 13070/18660 [06:03<02:40, 34

[2m[36m(_objective pid=20061)[0m {'loss': 0.1032, 'learning_rate': 4.491010042192867e-06, 'epoch': 2.17}


[2m[36m(_objective pid=20061)[0m  72%|███████▏  | 13506/18660 [06:15<02:25, 35.32it/s]
 72%|███████▏  | 13510/18660 [06:15<02:23, 35.92it/s]
 72%|███████▏  | 13514/18660 [06:15<02:21, 36.43it/s]
 72%|███████▏  | 13518/18660 [06:15<02:21, 36.22it/s]
 72%|███████▏  | 13522/18660 [06:15<02:20, 36.50it/s]
 72%|███████▏  | 13526/18660 [06:15<02:21, 36.41it/s]
 73%|███████▎  | 13530/18660 [06:16<02:22, 36.11it/s]
 73%|███████▎  | 13534/18660 [06:16<02:19, 36.68it/s]
 73%|███████▎  | 13538/18660 [06:16<02:25, 35.25it/s]
 73%|███████▎  | 13542/18660 [06:16<02:21, 36.18it/s]
 73%|███████▎  | 13546/18660 [06:16<02:20, 36.39it/s]
 73%|███████▎  | 13550/18660 [06:16<02:26, 34.95it/s]
 73%|███████▎  | 13554/18660 [06:16<02:27, 34.53it/s]
 73%|███████▎  | 13558/18660 [06:16<02:25, 35.07it/s]
 73%|███████▎  | 13562/18660 [06:16<02:23, 35.43it/s]
 73%|███████▎  | 13566/18660 [06:17<02:23, 35.56it/s]
 73%|███████▎  | 13570/18660 [06:17<02:21, 35.96it/s]
 73%|███████▎  | 13574/18660 [06:17<02:20, 36

Trial status: 1 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:41:22. Total running time: 21min 1s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00001   RUNNING          1.62407e-05                    3                     

[2m[36m(_objective pid=20061)[0m  75%|███████▍  | 13959/18660 [06:28<02:01, 38.80it/s]
 75%|███████▍  | 13963/18660 [06:28<02:02, 38.39it/s]
 75%|███████▍  | 13967/18660 [06:28<02:01, 38.54it/s]
 75%|███████▍  | 13971/18660 [06:28<02:03, 37.88it/s]
 75%|███████▍  | 13975/18660 [06:28<02:02, 38.31it/s]
 75%|███████▍  | 13979/18660 [06:28<02:02, 38.06it/s]
 75%|███████▍  | 13983/18660 [06:28<02:02, 38.13it/s]
 75%|███████▍  | 13987/18660 [06:28<02:01, 38.48it/s]
 75%|███████▍  | 13992/18660 [06:28<01:59, 39.08it/s]
 75%|███████▌  | 13996/18660 [06:29<01:59, 39.13it/s]


[2m[36m(_objective pid=20061)[0m {'loss': 0.0831, 'learning_rate': 4.055834650507512e-06, 'epoch': 2.25}


[2m[36m(_objective pid=20061)[0m  75%|███████▌  | 14000/18660 [06:29<01:58, 39.17it/s]                                                      75%|███████▌  | 14000/18660 [06:29<01:58, 39.17it/s]
 75%|███████▌  | 14004/18660 [06:29<02:01, 38.29it/s]
 75%|███████▌  | 14009/18660 [06:29<01:59, 39.02it/s]
 75%|███████▌  | 14013/18660 [06:29<01:58, 39.19it/s]
 75%|███████▌  | 14017/18660 [06:29<02:02, 37.79it/s]
 75%|███████▌  | 14021/18660 [06:29<02:02, 37.72it/s]
 75%|███████▌  | 14025/18660 [06:29<02:01, 38.14it/s]
 75%|███████▌  | 14029/18660 [06:29<02:00, 38.42it/s]
 75%|███████▌  | 14033/18660 [06:30<02:00, 38.33it/s]
 75%|███████▌  | 14037/18660 [06:30<02:00, 38.42it/s]
 75%|███████▌  | 14041/18660 [06:30<02:00, 38.42it/s]
 75%|███████▌  | 14045/18660 [06:30<02:01, 37.96it/s]
 75%|███████▌  | 14049/18660 [06:30<02:01, 37.96it/s]
 75%|███████▌  | 14053/18660 [06:30<02:01, 37.79it/s]
 75%|███████▌  | 14057/18660 [06:30<02:06, 36.26it/s]
 75%|███████▌  | 14061/18660 [06:30<02:06, 3

[2m[36m(_objective pid=20061)[0m {'loss': 0.0914, 'learning_rate': 3.6206592588221566e-06, 'epoch': 2.33}


 78%|███████▊  | 14502/18660 [06:42<01:52, 36.89it/s]
 78%|███████▊  | 14506/18660 [06:43<01:54, 36.19it/s]
 78%|███████▊  | 14510/18660 [06:43<01:51, 37.09it/s]
 78%|███████▊  | 14514/18660 [06:43<01:50, 37.50it/s]
 78%|███████▊  | 14519/18660 [06:43<01:48, 38.21it/s]
 78%|███████▊  | 14523/18660 [06:43<01:48, 38.30it/s]
 78%|███████▊  | 14527/18660 [06:43<01:48, 37.94it/s]
 78%|███████▊  | 14531/18660 [06:43<01:48, 37.93it/s]
 78%|███████▊  | 14535/18660 [06:43<01:49, 37.66it/s]
 78%|███████▊  | 14539/18660 [06:43<01:48, 37.86it/s]
 78%|███████▊  | 14543/18660 [06:44<01:48, 37.78it/s]
 78%|███████▊  | 14547/18660 [06:44<01:47, 38.11it/s]
 78%|███████▊  | 14551/18660 [06:44<01:48, 37.92it/s]
 78%|███████▊  | 14555/18660 [06:44<01:47, 38.03it/s]
 78%|███████▊  | 14559/18660 [06:44<01:48, 37.72it/s]
 78%|███████▊  | 14563/18660 [06:44<01:48, 37.87it/s]
 78%|███████▊  | 14567/18660 [06:44<01:49, 37.32it/s]
 78%|███████▊  | 14571/18660 [06:44<01:50, 36.94it/s]
 78%|███████▊  | 14575/18660

[2m[36m(_objective pid=20061)[0m {'loss': 0.0672, 'learning_rate': 3.185483867136801e-06, 'epoch': 2.41}


[2m[36m(_objective pid=20061)[0m  80%|████████  | 14999/18660 [06:57<01:54, 32.10it/s]                                                      80%|████████  | 15000/18660 [06:57<01:54, 32.10it/s]
 80%|████████  | 15003/18660 [06:57<01:53, 32.10it/s]
 80%|████████  | 15007/18660 [06:57<01:50, 33.04it/s]
 80%|████████  | 15011/18660 [06:57<01:48, 33.57it/s]
 80%|████████  | 15015/18660 [06:57<01:46, 34.09it/s]
 80%|████████  | 15019/18660 [06:57<01:46, 34.15it/s]
 81%|████████  | 15023/18660 [06:57<01:45, 34.48it/s]
 81%|████████  | 15027/18660 [06:57<01:45, 34.55it/s]
 81%|████████  | 15031/18660 [06:57<01:43, 35.03it/s]
 81%|████████  | 15035/18660 [06:58<01:41, 35.74it/s]


Trial status: 1 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:41:52. Total running time: 21min 31s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00001   RUNNING          1.62407e-05                    3                    

[2m[36m(_objective pid=20061)[0m  81%|████████  | 15039/18660 [06:58<01:39, 36.45it/s]
 81%|████████  | 15043/18660 [06:58<01:40, 35.93it/s]
 81%|████████  | 15047/18660 [06:58<01:40, 35.91it/s]
 81%|████████  | 15051/18660 [06:58<01:38, 36.62it/s]
 81%|████████  | 15055/18660 [06:58<01:36, 37.39it/s]
 81%|████████  | 15059/18660 [06:58<01:36, 37.35it/s]
 81%|████████  | 15063/18660 [06:58<01:36, 37.35it/s]
 81%|████████  | 15067/18660 [06:58<01:39, 35.97it/s]
 81%|████████  | 15071/18660 [06:59<01:36, 37.05it/s]
 81%|████████  | 15075/18660 [06:59<01:40, 35.78it/s]
 81%|████████  | 15079/18660 [06:59<01:38, 36.41it/s]
 81%|████████  | 15083/18660 [06:59<01:37, 36.52it/s]
 81%|████████  | 15087/18660 [06:59<01:40, 35.56it/s]
 81%|████████  | 15091/18660 [06:59<01:41, 35.28it/s]
 81%|████████  | 15095/18660 [06:59<01:41, 34.95it/s]
 81%|████████  | 15099/18660 [06:59<01:45, 33.80it/s]
 81%|████████  | 15103/18660 [06:59<01:43, 34.21it/s]
 81%|████████  | 15107/18660 [07:00<01:43, 34

[2m[36m(_objective pid=20061)[0m {'loss': 0.0808, 'learning_rate': 2.7503084754514458e-06, 'epoch': 2.49}


[2m[36m(_objective pid=20061)[0m  83%|████████▎ | 15499/18660 [07:11<01:31, 34.62it/s]                                                      83%|████████▎ | 15500/18660 [07:11<01:31, 34.62it/s]
 83%|████████▎ | 15503/18660 [07:11<01:33, 33.67it/s]
 83%|████████▎ | 15507/18660 [07:11<01:31, 34.41it/s]
 83%|████████▎ | 15511/18660 [07:11<01:30, 34.68it/s]
 83%|████████▎ | 15515/18660 [07:11<01:29, 34.98it/s]
 83%|████████▎ | 15519/18660 [07:11<01:30, 34.85it/s]
 83%|████████▎ | 15523/18660 [07:12<01:29, 35.02it/s]
 83%|████████▎ | 15527/18660 [07:12<01:30, 34.69it/s]
 83%|████████▎ | 15531/18660 [07:12<01:33, 33.44it/s]
 83%|████████▎ | 15535/18660 [07:12<01:32, 33.77it/s]
 83%|████████▎ | 15539/18660 [07:12<01:31, 33.95it/s]
 83%|████████▎ | 15543/18660 [07:12<01:34, 33.03it/s]
 83%|████████▎ | 15547/18660 [07:12<01:34, 33.06it/s]
 83%|████████▎ | 15551/18660 [07:12<01:34, 32.90it/s]
 83%|████████▎ | 15555/18660 [07:13<01:33, 33.15it/s]
 83%|████████▎ | 15559/18660 [07:13<01:33, 3

[2m[36m(_objective pid=20061)[0m {'loss': 0.1018, 'learning_rate': 2.3151330837660904e-06, 'epoch': 2.57}


[2m[36m(_objective pid=20061)[0m  86%|████████▌ | 16002/18660 [07:25<01:13, 36.26it/s]
 86%|████████▌ | 16006/18660 [07:25<01:15, 35.17it/s]
 86%|████████▌ | 16010/18660 [07:25<01:16, 34.64it/s]
 86%|████████▌ | 16014/18660 [07:25<01:15, 34.92it/s]
 86%|████████▌ | 16018/18660 [07:25<01:14, 35.36it/s]
 86%|████████▌ | 16022/18660 [07:25<01:14, 35.42it/s]
 86%|████████▌ | 16026/18660 [07:25<01:17, 34.06it/s]
 86%|████████▌ | 16030/18660 [07:26<01:16, 34.34it/s]
 86%|████████▌ | 16034/18660 [07:26<01:16, 34.31it/s]
 86%|████████▌ | 16038/18660 [07:26<01:15, 34.55it/s]
 86%|████████▌ | 16042/18660 [07:26<01:16, 34.23it/s]
 86%|████████▌ | 16046/18660 [07:26<01:18, 33.29it/s]
 86%|████████▌ | 16050/18660 [07:26<01:16, 34.34it/s]
 86%|████████▌ | 16054/18660 [07:26<01:18, 33.19it/s]
 86%|████████▌ | 16058/18660 [07:26<01:16, 34.23it/s]
 86%|████████▌ | 16062/18660 [07:27<01:14, 35.09it/s]
 86%|████████▌ | 16066/18660 [07:27<01:12, 35.68it/s]
 86%|████████▌ | 16070/18660 [07:27<01:11, 35

Trial status: 1 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:42:22. Total running time: 22min 1s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00001   RUNNING          1.62407e-05                    3                     

[2m[36m(_objective pid=20061)[0m  86%|████████▋ | 16102/18660 [07:28<01:07, 37.74it/s]
 86%|████████▋ | 16106/18660 [07:28<01:08, 37.13it/s]
 86%|████████▋ | 16110/18660 [07:28<01:08, 37.17it/s]
 86%|████████▋ | 16114/18660 [07:28<01:11, 35.55it/s]
 86%|████████▋ | 16118/18660 [07:28<01:14, 34.32it/s]
 86%|████████▋ | 16122/18660 [07:28<01:11, 35.32it/s]
 86%|████████▋ | 16126/18660 [07:28<01:11, 35.52it/s]
 86%|████████▋ | 16130/18660 [07:28<01:10, 36.11it/s]
 86%|████████▋ | 16134/18660 [07:29<01:09, 36.50it/s]
 86%|████████▋ | 16138/18660 [07:29<01:08, 36.92it/s]
 87%|████████▋ | 16142/18660 [07:29<01:07, 37.03it/s]
 87%|████████▋ | 16146/18660 [07:29<01:08, 36.70it/s]
 87%|████████▋ | 16150/18660 [07:29<01:09, 36.00it/s]
 87%|████████▋ | 16154/18660 [07:29<01:09, 36.13it/s]
 87%|████████▋ | 16158/18660 [07:29<01:09, 36.09it/s]
 87%|████████▋ | 16162/18660 [07:29<01:08, 36.51it/s]
 87%|████████▋ | 16166/18660 [07:29<01:07, 36.77it/s]
 87%|████████▋ | 16170/18660 [07:29<01:06, 37

[2m[36m(_objective pid=20061)[0m {'loss': 0.105, 'learning_rate': 1.879957692080735e-06, 'epoch': 2.65}


[2m[36m(_objective pid=20061)[0m  88%|████████▊ | 16502/18660 [07:38<00:54, 39.36it/s]
 88%|████████▊ | 16506/18660 [07:38<00:54, 39.27it/s]
 88%|████████▊ | 16510/18660 [07:38<00:55, 39.07it/s]
 88%|████████▊ | 16514/18660 [07:38<00:55, 38.69it/s]
 89%|████████▊ | 16518/18660 [07:38<00:55, 38.38it/s]
 89%|████████▊ | 16522/18660 [07:39<00:56, 38.14it/s]
 89%|████████▊ | 16526/18660 [07:39<00:55, 38.15it/s]
 89%|████████▊ | 16530/18660 [07:39<00:55, 38.40it/s]
 89%|████████▊ | 16534/18660 [07:39<00:55, 38.55it/s]
 89%|████████▊ | 16538/18660 [07:39<00:55, 38.33it/s]
 89%|████████▊ | 16542/18660 [07:39<00:57, 36.72it/s]
 89%|████████▊ | 16546/18660 [07:39<00:57, 36.86it/s]
 89%|████████▊ | 16550/18660 [07:39<00:57, 36.75it/s]
 89%|████████▊ | 16554/18660 [07:39<00:56, 36.99it/s]
 89%|████████▊ | 16558/18660 [07:39<00:56, 37.35it/s]
 89%|████████▉ | 16562/18660 [07:40<00:58, 36.07it/s]
 89%|████████▉ | 16566/18660 [07:40<00:56, 36.81it/s]
 89%|████████▉ | 16570/18660 [07:40<00:57, 36

[2m[36m(_objective pid=20061)[0m {'loss': 0.0801, 'learning_rate': 1.4447823003953796e-06, 'epoch': 2.73}


[2m[36m(_objective pid=20061)[0m  91%|█████████ | 17006/18660 [07:51<00:42, 38.56it/s]
 91%|█████████ | 17010/18660 [07:52<00:42, 38.57it/s]
 91%|█████████ | 17014/18660 [07:52<00:42, 38.43it/s]
 91%|█████████ | 17018/18660 [07:52<00:43, 38.07it/s]
 91%|█████████ | 17022/18660 [07:52<00:43, 37.97it/s]
 91%|█████████ | 17026/18660 [07:52<00:43, 37.40it/s]
 91%|█████████▏| 17031/18660 [07:52<00:42, 38.12it/s]
 91%|█████████▏| 17035/18660 [07:52<00:42, 38.09it/s]
 91%|█████████▏| 17040/18660 [07:52<00:42, 38.45it/s]
 91%|█████████▏| 17044/18660 [07:52<00:41, 38.81it/s]
 91%|█████████▏| 17048/18660 [07:53<00:41, 38.40it/s]
 91%|█████████▏| 17053/18660 [07:53<00:40, 39.28it/s]
 91%|█████████▏| 17057/18660 [07:53<00:40, 39.20it/s]
 91%|█████████▏| 17062/18660 [07:53<00:40, 39.47it/s]
 91%|█████████▏| 17066/18660 [07:53<00:40, 39.36it/s]
 91%|█████████▏| 17070/18660 [07:53<00:40, 39.46it/s]
 92%|█████████▏| 17074/18660 [07:53<00:40, 39.39it/s]
 92%|█████████▏| 17079/18660 [07:53<00:39, 39

Trial status: 1 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:42:52. Total running time: 22min 31s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00001   RUNNING          1.62407e-05                    3                    

[2m[36m(_objective pid=20061)[0m  92%|█████████▏| 17237/18660 [07:58<00:41, 34.70it/s]
 92%|█████████▏| 17241/18660 [07:58<00:40, 35.27it/s]
 92%|█████████▏| 17245/18660 [07:58<00:39, 35.88it/s]
 92%|█████████▏| 17249/18660 [07:58<00:39, 35.99it/s]
 92%|█████████▏| 17253/18660 [07:58<00:39, 35.68it/s]
 92%|█████████▏| 17257/18660 [07:58<00:38, 36.50it/s]
 93%|█████████▎| 17261/18660 [07:58<00:37, 37.13it/s]
 93%|█████████▎| 17265/18660 [07:58<00:37, 36.83it/s]
 93%|█████████▎| 17269/18660 [07:58<00:36, 37.61it/s]
 93%|█████████▎| 17273/18660 [07:59<00:36, 37.57it/s]
 93%|█████████▎| 17277/18660 [07:59<00:36, 38.19it/s]
 93%|█████████▎| 17281/18660 [07:59<00:36, 38.12it/s]
 93%|█████████▎| 17285/18660 [07:59<00:36, 38.06it/s]
 93%|█████████▎| 17289/18660 [07:59<00:35, 38.12it/s]
 93%|█████████▎| 17293/18660 [07:59<00:35, 38.14it/s]
 93%|█████████▎| 17297/18660 [07:59<00:36, 37.72it/s]
 93%|█████████▎| 17301/18660 [07:59<00:36, 37.41it/s]
 93%|█████████▎| 17305/18660 [07:59<00:36, 37

[2m[36m(_objective pid=20061)[0m {'loss': 0.118, 'learning_rate': 1.0096069087100244e-06, 'epoch': 2.81}


[2m[36m(_objective pid=20061)[0m  94%|█████████▍| 17506/18660 [08:05<00:30, 37.83it/s]
 94%|█████████▍| 17511/18660 [08:05<00:31, 37.03it/s]
 94%|█████████▍| 17516/18660 [08:05<00:30, 38.09it/s]
 94%|█████████▍| 17520/18660 [08:05<00:29, 38.23it/s]
 94%|█████████▍| 17524/18660 [08:05<00:30, 36.90it/s]
 94%|█████████▍| 17529/18660 [08:05<00:29, 38.02it/s]
 94%|█████████▍| 17533/18660 [08:05<00:29, 38.38it/s]
 94%|█████████▍| 17537/18660 [08:06<00:30, 36.52it/s]
 94%|█████████▍| 17541/18660 [08:06<00:29, 37.33it/s]
 94%|█████████▍| 17545/18660 [08:06<00:29, 37.97it/s]
 94%|█████████▍| 17549/18660 [08:06<00:28, 38.40it/s]
 94%|█████████▍| 17554/18660 [08:06<00:28, 39.08it/s]
 94%|█████████▍| 17558/18660 [08:06<00:28, 38.31it/s]
 94%|█████████▍| 17562/18660 [08:06<00:28, 38.66it/s]
 94%|█████████▍| 17566/18660 [08:06<00:28, 38.63it/s]
 94%|█████████▍| 17570/18660 [08:06<00:27, 39.00it/s]
 94%|█████████▍| 17574/18660 [08:07<00:27, 39.12it/s]
 94%|█████████▍| 17579/18660 [08:07<00:27, 39

[2m[36m(_objective pid=20061)[0m {'loss': 0.1007, 'learning_rate': 5.74431517024669e-07, 'epoch': 2.89}


[2m[36m(_objective pid=20061)[0m  96%|█████████▋| 17999/18660 [08:18<00:17, 36.93it/s]                                                      96%|█████████▋| 18000/18660 [08:18<00:17, 36.93it/s]
 96%|█████████▋| 18003/18660 [08:18<00:17, 37.00it/s]
 97%|█████████▋| 18007/18660 [08:18<00:18, 35.45it/s]
 97%|█████████▋| 18011/18660 [08:18<00:18, 35.26it/s]
 97%|█████████▋| 18016/18660 [08:19<00:17, 36.73it/s]
 97%|█████████▋| 18020/18660 [08:19<00:17, 36.06it/s]
 97%|█████████▋| 18025/18660 [08:19<00:16, 37.36it/s]
 97%|█████████▋| 18029/18660 [08:19<00:16, 37.54it/s]
 97%|█████████▋| 18034/18660 [08:19<00:16, 38.35it/s]
 97%|█████████▋| 18038/18660 [08:19<00:16, 38.25it/s]
 97%|█████████▋| 18042/18660 [08:19<00:16, 38.38it/s]
 97%|█████████▋| 18046/18660 [08:19<00:16, 38.35it/s]
 97%|█████████▋| 18051/18660 [08:19<00:15, 38.59it/s]
 97%|█████████▋| 18055/18660 [08:20<00:15, 38.83it/s]
 97%|█████████▋| 18060/18660 [08:20<00:15, 38.82it/s]
 97%|█████████▋| 18064/18660 [08:20<00:15, 3

Trial status: 1 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:43:22. Total running time: 23min 1s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00001   RUNNING          1.62407e-05                    3                     

[2m[36m(_objective pid=20061)[0m  98%|█████████▊| 18373/18660 [08:28<00:07, 39.03it/s]
 98%|█████████▊| 18377/18660 [08:28<00:07, 37.94it/s]
 99%|█████████▊| 18381/18660 [08:28<00:07, 38.21it/s]
 99%|█████████▊| 18385/18660 [08:28<00:07, 38.45it/s]
 99%|█████████▊| 18389/18660 [08:28<00:06, 38.72it/s]
 99%|█████████▊| 18393/18660 [08:28<00:06, 38.93it/s]
 99%|█████████▊| 18397/18660 [08:28<00:06, 39.17it/s]
 99%|█████████▊| 18401/18660 [08:28<00:06, 37.46it/s]
 99%|█████████▊| 18405/18660 [08:29<00:06, 36.74it/s]
 99%|█████████▊| 18409/18660 [08:29<00:06, 36.86it/s]
 99%|█████████▊| 18413/18660 [08:29<00:06, 37.12it/s]
 99%|█████████▊| 18417/18660 [08:29<00:06, 37.07it/s]
 99%|█████████▊| 18421/18660 [08:29<00:06, 37.53it/s]
 99%|█████████▊| 18425/18660 [08:29<00:06, 37.65it/s]
 99%|█████████▉| 18429/18660 [08:29<00:06, 37.72it/s]
 99%|█████████▉| 18433/18660 [08:29<00:06, 37.63it/s]
 99%|█████████▉| 18437/18660 [08:29<00:05, 37.97it/s]
 99%|█████████▉| 18441/18660 [08:30<00:06, 36

[2m[36m(_objective pid=20061)[0m {'loss': 0.1089, 'learning_rate': 1.392561253393137e-07, 'epoch': 2.97}


[2m[36m(_objective pid=20061)[0m  99%|█████████▉| 18505/18660 [08:31<00:04, 34.60it/s]
 99%|█████████▉| 18509/18660 [08:31<00:04, 34.90it/s]
 99%|█████████▉| 18513/18660 [08:31<00:04, 35.46it/s]
 99%|█████████▉| 18517/18660 [08:32<00:04, 35.18it/s]
 99%|█████████▉| 18521/18660 [08:32<00:03, 35.05it/s]
 99%|█████████▉| 18525/18660 [08:32<00:03, 35.01it/s]
 99%|█████████▉| 18529/18660 [08:32<00:03, 35.97it/s]
 99%|█████████▉| 18533/18660 [08:32<00:03, 36.34it/s]
 99%|█████████▉| 18537/18660 [08:32<00:03, 36.26it/s]
 99%|█████████▉| 18541/18660 [08:32<00:03, 36.17it/s]
 99%|█████████▉| 18545/18660 [08:32<00:03, 36.07it/s]
 99%|█████████▉| 18549/18660 [08:32<00:03, 35.17it/s]
 99%|█████████▉| 18553/18660 [08:33<00:03, 34.03it/s]
 99%|█████████▉| 18557/18660 [08:33<00:02, 34.42it/s]
 99%|█████████▉| 18561/18660 [08:33<00:02, 34.83it/s]
 99%|█████████▉| 18565/18660 [08:33<00:02, 35.22it/s]
100%|█████████▉| 18569/18660 [08:33<00:02, 35.26it/s]
100%|█████████▉| 18573/18660 [08:33<00:02, 34

[2m[36m(_objective pid=20061)[0m {'eval_loss': 0.2587506175041199, 'eval_runtime': 9.5628, 'eval_samples_per_second': 433.659, 'eval_steps_per_second': 13.594, 'epoch': 3.0}
Trial _objective_f556c_00001 finished iteration 3 at 2023-09-11 13:43:40. Total running time: 23min 19s
+-------------------------------------------------+
| Trial _objective_f556c_00001 result             |
+-------------------------------------------------+
| time_this_iter_s                        181.543 |
| time_total_s                            528.047 |
| training_iteration                            3 |
| epoch                                         3 |
| eval_loss                               0.25875 |
| eval_runtime                             9.5628 |
| eval_samples_per_second                 433.659 |
| eval_steps_per_second                    13.594 |
| objective                               0.25875 |
+-------------------------------------------------+

Trial _objective_f556c_00001 completed aft

[2m[36m(_objective pid=20061)[0m                                                      100%|██████████| 18660/18660 [08:47<00:00, 37.89it/s]100%|██████████| 18660/18660 [08:47<00:00, 35.40it/s]


Trial _objective_f556c_00002 started with configuration:
+-------------------------------------------------+
| Trial _objective_f556c_00002 config             |
+-------------------------------------------------+
| adam_epsilon                                  0 |
| learning_rate                             3e-05 |
| num_train_epochs                              3 |
| per_device_eval_batch_size                   32 |
| per_device_train_batch_size                   4 |
| weight_decay                            0.01692 |
+-------------------------------------------------+



[2m[36m(_objective pid=22390)[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_transform.weight']
[2m[36m(_objective pid=22390)[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
[2m[36m(_objective pid=22390)[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[2m[36m(_objective pid=22390)[0m Some weights of DistilBertForSequenceClassification were not initialized from the model che

Trial status: 2 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:43:53. Total running time: 23min 31s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00002   RUNNING          3.40446e-05                    3                    

[2m[36m(_objective pid=22390)[0m   1%|          | 59/9330 [00:01<04:51, 31.79it/s]
  1%|          | 63/9330 [00:01<04:33, 33.83it/s]
  1%|          | 67/9330 [00:02<04:23, 35.17it/s]
  1%|          | 71/9330 [00:02<04:16, 36.15it/s]
  1%|          | 75/9330 [00:02<04:14, 36.31it/s]
  1%|          | 79/9330 [00:02<04:14, 36.37it/s]
  1%|          | 83/9330 [00:02<04:11, 36.72it/s]
  1%|          | 87/9330 [00:02<04:10, 36.96it/s]
  1%|          | 91/9330 [00:02<04:14, 36.24it/s]
  1%|          | 95/9330 [00:02<04:52, 31.60it/s]
  1%|          | 99/9330 [00:03<04:39, 32.97it/s]
  1%|          | 103/9330 [00:03<04:26, 34.66it/s]
  1%|          | 107/9330 [00:03<04:15, 36.09it/s]
  1%|          | 111/9330 [00:03<04:18, 35.66it/s]
  1%|          | 115/9330 [00:03<04:39, 32.94it/s]
  1%|▏         | 119/9330 [00:03<04:32, 33.82it/s]
  1%|▏         | 123/9330 [00:03<04:22, 35.04it/s]
  1%|▏         | 127/9330 [00:03<04:22, 35.08it/s]
  1%|▏         | 132/9330 [00:03<04:10, 36.65it/s]
  1%|

[2m[36m(_objective pid=22390)[0m {'loss': 0.3976, 'learning_rate': 3.2220130991179146e-05, 'epoch': 0.16}


[2m[36m(_objective pid=22390)[0m   5%|▌         | 499/9330 [00:14<04:22, 33.69it/s]                                                    5%|▌         | 500/9330 [00:14<04:22, 33.69it/s]
  5%|▌         | 503/9330 [00:15<04:12, 35.02it/s]
  5%|▌         | 507/9330 [00:15<04:21, 33.68it/s]
  5%|▌         | 511/9330 [00:15<04:19, 34.01it/s]
  6%|▌         | 515/9330 [00:15<04:15, 34.51it/s]
  6%|▌         | 519/9330 [00:15<04:07, 35.59it/s]
  6%|▌         | 524/9330 [00:15<03:56, 37.16it/s]
  6%|▌         | 528/9330 [00:15<03:55, 37.31it/s]
  6%|▌         | 532/9330 [00:15<03:55, 37.39it/s]
  6%|▌         | 536/9330 [00:15<04:09, 35.27it/s]
  6%|▌         | 540/9330 [00:16<04:19, 33.94it/s]
  6%|▌         | 545/9330 [00:16<04:04, 35.92it/s]
  6%|▌         | 549/9330 [00:16<03:57, 36.93it/s]
  6%|▌         | 553/9330 [00:16<03:55, 37.23it/s]
  6%|▌         | 557/9330 [00:16<04:03, 35.99it/s]
  6%|▌         | 562/9330 [00:16<03:54, 37.42it/s]
  6%|▌         | 566/9330 [00:16<03:52, 37.6

[2m[36m(_objective pid=22390)[0m {'loss': 0.3441, 'learning_rate': 3.0395661512629928e-05, 'epoch': 0.32}


[2m[36m(_objective pid=22390)[0m                                                    11%|█         | 1000/9330 [00:29<03:38, 38.11it/s] 11%|█         | 1003/9330 [00:29<03:36, 38.49it/s]
 11%|█         | 1007/9330 [00:29<03:36, 38.53it/s]
 11%|█         | 1011/9330 [00:29<03:34, 38.76it/s]
 11%|█         | 1015/9330 [00:29<03:38, 38.06it/s]
 11%|█         | 1019/9330 [00:29<03:37, 38.13it/s]
 11%|█         | 1023/9330 [00:30<03:52, 35.77it/s]
 11%|█         | 1027/9330 [00:30<03:46, 36.60it/s]
 11%|█         | 1031/9330 [00:30<03:41, 37.47it/s]
 11%|█         | 1035/9330 [00:30<03:49, 36.20it/s]
 11%|█         | 1039/9330 [00:30<03:43, 37.11it/s]
 11%|█         | 1043/9330 [00:30<03:39, 37.69it/s]
 11%|█         | 1047/9330 [00:30<03:36, 38.29it/s]
 11%|█▏        | 1051/9330 [00:30<03:48, 36.23it/s]
 11%|█▏        | 1055/9330 [00:30<03:43, 37.05it/s]
 11%|█▏        | 1059/9330 [00:31<03:46, 36.52it/s]
 11%|█▏        | 1063/9330 [00:31<03:42, 37.23it/s]
 11%|█▏        | 1067/9330 

Trial status: 2 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:44:23. Total running time: 24min 1s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00002   RUNNING          3.40446e-05                    3                     

[2m[36m(_objective pid=22390)[0m  12%|█▏        | 1091/9330 [00:31<04:29, 30.53it/s]
 12%|█▏        | 1095/9330 [00:32<04:14, 32.41it/s]
 12%|█▏        | 1099/9330 [00:32<04:08, 33.11it/s]
 12%|█▏        | 1103/9330 [00:32<03:55, 34.92it/s]
 12%|█▏        | 1107/9330 [00:32<03:57, 34.59it/s]
 12%|█▏        | 1111/9330 [00:32<04:07, 33.15it/s]
 12%|█▏        | 1115/9330 [00:32<03:57, 34.60it/s]
 12%|█▏        | 1119/9330 [00:32<04:08, 33.03it/s]
 12%|█▏        | 1123/9330 [00:32<03:59, 34.32it/s]
 12%|█▏        | 1127/9330 [00:33<03:55, 34.89it/s]
 12%|█▏        | 1131/9330 [00:33<03:51, 35.39it/s]
 12%|█▏        | 1135/9330 [00:33<03:49, 35.78it/s]
 12%|█▏        | 1139/9330 [00:33<03:43, 36.68it/s]
 12%|█▏        | 1143/9330 [00:33<03:42, 36.74it/s]
 12%|█▏        | 1147/9330 [00:33<03:40, 37.10it/s]
 12%|█▏        | 1151/9330 [00:33<03:38, 37.43it/s]
 12%|█▏        | 1155/9330 [00:33<03:34, 38.13it/s]
 12%|█▏        | 1159/9330 [00:33<03:32, 38.44it/s]
 12%|█▏        | 1163/9330 

[2m[36m(_objective pid=22390)[0m {'loss': 0.3073, 'learning_rate': 2.8571192034080713e-05, 'epoch': 0.48}


[2m[36m(_objective pid=22390)[0m  16%|█▌        | 1499/9330 [00:44<03:44, 34.90it/s]                                                    16%|█▌        | 1500/9330 [00:44<03:44, 34.90it/s]
 16%|█▌        | 1503/9330 [00:44<03:37, 35.95it/s]
 16%|█▌        | 1507/9330 [00:44<03:32, 36.86it/s]
 16%|█▌        | 1511/9330 [00:44<03:32, 36.77it/s]
 16%|█▌        | 1515/9330 [00:44<03:31, 37.00it/s]
 16%|█▋        | 1519/9330 [00:44<03:27, 37.66it/s]
 16%|█▋        | 1523/9330 [00:44<03:35, 36.17it/s]
 16%|█▋        | 1527/9330 [00:44<03:34, 36.37it/s]
 16%|█▋        | 1531/9330 [00:44<03:30, 36.97it/s]
 16%|█▋        | 1535/9330 [00:45<03:27, 37.51it/s]
 16%|█▋        | 1539/9330 [00:45<03:27, 37.61it/s]
 17%|█▋        | 1543/9330 [00:45<03:50, 33.85it/s]
 17%|█▋        | 1547/9330 [00:45<03:43, 34.87it/s]
 17%|█▋        | 1551/9330 [00:45<03:42, 34.90it/s]
 17%|█▋        | 1555/9330 [00:45<04:10, 30.99it/s]
 17%|█▋        | 1559/9330 [00:45<04:20, 29.88it/s]
 17%|█▋        | 1563/9330

[2m[36m(_objective pid=22390)[0m {'loss': 0.243, 'learning_rate': 2.6746722555531498e-05, 'epoch': 0.64}


[2m[36m(_objective pid=22390)[0m                                                     21%|██▏       | 2000/9330 [00:58<03:34, 34.20it/s] 21%|██▏       | 2001/9330 [00:58<03:35, 33.96it/s]
 21%|██▏       | 2005/9330 [00:59<03:42, 32.95it/s]
 22%|██▏       | 2009/9330 [00:59<03:40, 33.25it/s]
 22%|██▏       | 2013/9330 [00:59<03:37, 33.60it/s]
 22%|██▏       | 2017/9330 [00:59<03:39, 33.33it/s]
 22%|██▏       | 2021/9330 [00:59<03:49, 31.78it/s]
 22%|██▏       | 2025/9330 [00:59<04:19, 28.17it/s]
 22%|██▏       | 2029/9330 [00:59<03:57, 30.76it/s]
 22%|██▏       | 2033/9330 [00:59<03:46, 32.27it/s]
 22%|██▏       | 2037/9330 [01:00<03:38, 33.37it/s]
 22%|██▏       | 2041/9330 [01:00<03:32, 34.24it/s]
 22%|██▏       | 2045/9330 [01:00<03:48, 31.87it/s]
 22%|██▏       | 2049/9330 [01:00<03:39, 33.20it/s]
 22%|██▏       | 2053/9330 [01:00<03:55, 30.93it/s]
 22%|██▏       | 2057/9330 [01:00<03:53, 31.08it/s]
 22%|██▏       | 2061/9330 [01:00<03:38, 33.30it/s]
 22%|██▏       | 2065/9330

Trial status: 2 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:44:53. Total running time: 24min 31s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00002   RUNNING          3.40446e-05                    3                    

[2m[36m(_objective pid=22390)[0m  23%|██▎       | 2107/9330 [01:02<03:26, 35.04it/s]
 23%|██▎       | 2111/9330 [01:02<03:43, 32.27it/s]
 23%|██▎       | 2115/9330 [01:02<03:33, 33.75it/s]
 23%|██▎       | 2119/9330 [01:02<03:32, 33.87it/s]
 23%|██▎       | 2123/9330 [01:02<03:23, 35.37it/s]
 23%|██▎       | 2127/9330 [01:02<03:18, 36.23it/s]
 23%|██▎       | 2131/9330 [01:02<03:38, 32.99it/s]
 23%|██▎       | 2135/9330 [01:02<03:29, 34.39it/s]
 23%|██▎       | 2139/9330 [01:02<03:21, 35.74it/s]
 23%|██▎       | 2144/9330 [01:03<03:13, 37.14it/s]
 23%|██▎       | 2148/9330 [01:03<03:12, 37.38it/s]
 23%|██▎       | 2152/9330 [01:03<03:12, 37.31it/s]
 23%|██▎       | 2156/9330 [01:03<03:18, 36.15it/s]
 23%|██▎       | 2160/9330 [01:03<03:16, 36.51it/s]
 23%|██▎       | 2164/9330 [01:03<03:13, 37.05it/s]
 23%|██▎       | 2168/9330 [01:03<03:29, 34.13it/s]
 23%|██▎       | 2172/9330 [01:03<03:34, 33.31it/s]
 23%|██▎       | 2176/9330 [01:04<03:25, 34.82it/s]
 23%|██▎       | 2180/9330 

[2m[36m(_objective pid=22390)[0m {'loss': 0.231, 'learning_rate': 2.4922253076982283e-05, 'epoch': 0.8}


[2m[36m(_objective pid=22390)[0m  27%|██▋       | 2502/9330 [01:13<03:14, 35.19it/s]
 27%|██▋       | 2506/9330 [01:13<03:33, 31.93it/s]
 27%|██▋       | 2510/9330 [01:13<03:32, 32.10it/s]
 27%|██▋       | 2514/9330 [01:13<03:51, 29.50it/s]
 27%|██▋       | 2518/9330 [01:14<03:38, 31.13it/s]
 27%|██▋       | 2522/9330 [01:14<03:28, 32.60it/s]
 27%|██▋       | 2526/9330 [01:14<03:27, 32.77it/s]
 27%|██▋       | 2530/9330 [01:14<03:33, 31.82it/s]
 27%|██▋       | 2534/9330 [01:14<03:33, 31.83it/s]
 27%|██▋       | 2538/9330 [01:14<03:27, 32.71it/s]
 27%|██▋       | 2542/9330 [01:14<03:25, 33.01it/s]
 27%|██▋       | 2546/9330 [01:14<03:20, 33.83it/s]
 27%|██▋       | 2550/9330 [01:14<03:19, 34.03it/s]
 27%|██▋       | 2554/9330 [01:15<03:13, 34.97it/s]
 27%|██▋       | 2558/9330 [01:15<03:11, 35.37it/s]
 27%|██▋       | 2562/9330 [01:15<03:11, 35.34it/s]
 28%|██▊       | 2566/9330 [01:15<03:28, 32.39it/s]
 28%|██▊       | 2570/9330 [01:15<03:25, 32.96it/s]
 28%|██▊       | 2574/9330 

[2m[36m(_objective pid=22390)[0m {'loss': 0.2185, 'learning_rate': 2.3097783598433064e-05, 'epoch': 0.96}


[2m[36m(_objective pid=22390)[0m  32%|███▏      | 3000/9330 [01:28<03:30, 30.07it/s]                                                    32%|███▏      | 3000/9330 [01:28<03:30, 30.07it/s]
 32%|███▏      | 3004/9330 [01:28<03:16, 32.20it/s]
 32%|███▏      | 3008/9330 [01:28<03:23, 31.00it/s]
 32%|███▏      | 3012/9330 [01:28<03:16, 32.11it/s]
 32%|███▏      | 3016/9330 [01:28<03:06, 33.85it/s]
 32%|███▏      | 3020/9330 [01:28<02:59, 35.13it/s]
 32%|███▏      | 3024/9330 [01:28<02:55, 35.93it/s]
 32%|███▏      | 3028/9330 [01:28<03:14, 32.40it/s]
 32%|███▏      | 3032/9330 [01:29<03:06, 33.81it/s]
 33%|███▎      | 3036/9330 [01:29<02:59, 34.99it/s]
 33%|███▎      | 3040/9330 [01:29<03:03, 34.22it/s]
 33%|███▎      | 3044/9330 [01:29<02:59, 34.97it/s]
 33%|███▎      | 3048/9330 [01:29<03:00, 34.76it/s]
 33%|███▎      | 3052/9330 [01:29<03:03, 34.21it/s]
 33%|███▎      | 3056/9330 [01:29<03:00, 34.67it/s]
 33%|███▎      | 3060/9330 [01:29<02:55, 35.75it/s]
 33%|███▎      | 3064/9330

Trial status: 2 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:45:23. Total running time: 25min 1s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00002   RUNNING          3.40446e-05                    3                     

[2m[36m(_objective pid=22390)[0m 
[2m[36m(_objective pid=22390)[0m   8%|▊         | 11/130 [00:00<00:08, 13.85it/s][A
[2m[36m(_objective pid=22390)[0m 
 10%|█         | 13/130 [00:00<00:08, 13.43it/s][A
[2m[36m(_objective pid=22390)[0m 
 12%|█▏        | 15/130 [00:01<00:09, 12.32it/s][A
[2m[36m(_objective pid=22390)[0m 
 13%|█▎        | 17/130 [00:01<00:09, 11.56it/s][A
[2m[36m(_objective pid=22390)[0m 
 15%|█▍        | 19/130 [00:01<00:09, 11.47it/s][A
[2m[36m(_objective pid=22390)[0m 
 16%|█▌        | 21/130 [00:01<00:08, 12.98it/s][A
[2m[36m(_objective pid=22390)[0m 
 18%|█▊        | 23/130 [00:01<00:09, 11.82it/s][A
[2m[36m(_objective pid=22390)[0m 
 19%|█▉        | 25/130 [00:01<00:07, 13.45it/s][A
[2m[36m(_objective pid=22390)[0m 
 21%|██        | 27/130 [00:01<00:07, 14.14it/s][A
[2m[36m(_objective pid=22390)[0m 
 24%|██▍       | 31/130 [00:02<00:06, 16.18it/s][A
[2m[36m(_objective pid=22390)[0m 
 26%|██▌       | 34/130 [00:02<00:05

Trial _objective_f556c_00002 finished iteration 1 at 2023-09-11 13:45:31. Total running time: 25min 10s
+-------------------------------------------------+
| Trial _objective_f556c_00002 result             |
+-------------------------------------------------+
| time_this_iter_s                        103.573 |
| time_total_s                            103.573 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.19146 |
| eval_runtime                             9.5945 |
| eval_samples_per_second                 432.228 |
| eval_steps_per_second                    13.549 |
| objective                               0.19146 |
+-------------------------------------------------+

[2m[36m(_objective pid=22390)[0m {'eval_loss': 0.19146284461021423, 'eval_runtime': 9.5945, 'eval_samples_per_second': 432.228, 'eval_steps_per_second': 13.549, 'epoch': 1.0}


[2m[36m(_objective pid=22390)[0m  33%|███▎      | 3112/9330 [01:42<1:27:23,  1.19it/s]
 33%|███▎      | 3116/9330 [01:42<1:02:02,  1.67it/s]
 33%|███▎      | 3120/9330 [01:42<44:52,  2.31it/s]  
 33%|███▎      | 3124/9330 [01:42<32:17,  3.20it/s]
 34%|███▎      | 3128/9330 [01:42<23:31,  4.40it/s]
 34%|███▎      | 3132/9330 [01:42<17:21,  5.95it/s]
 34%|███▎      | 3136/9330 [01:42<12:59,  7.95it/s]
 34%|███▎      | 3140/9330 [01:43<09:59, 10.32it/s]
 34%|███▎      | 3144/9330 [01:43<07:48, 13.21it/s]
 34%|███▎      | 3148/9330 [01:43<06:18, 16.32it/s]
 34%|███▍      | 3152/9330 [01:43<05:15, 19.56it/s]
 34%|███▍      | 3156/9330 [01:43<04:38, 22.20it/s]
 34%|███▍      | 3160/9330 [01:43<04:39, 22.10it/s]
 34%|███▍      | 3164/9330 [01:43<04:26, 23.11it/s]
 34%|███▍      | 3168/9330 [01:43<04:06, 25.02it/s]
 34%|███▍      | 3171/9330 [01:44<04:17, 23.91it/s]
 34%|███▍      | 3175/9330 [01:44<03:50, 26.72it/s]
 34%|███▍      | 3179/9330 [01:44<03:29, 29.41it/s]
 34%|███▍      | 3183

[2m[36m(_objective pid=22390)[0m {'loss': 0.1627, 'learning_rate': 2.1273314119883853e-05, 'epoch': 1.13}


[2m[36m(_objective pid=22390)[0m  38%|███▊      | 3505/9330 [01:53<02:34, 37.80it/s]
 38%|███▊      | 3509/9330 [01:53<02:33, 37.93it/s]
 38%|███▊      | 3513/9330 [01:54<02:40, 36.28it/s]
 38%|███▊      | 3517/9330 [01:54<02:47, 34.62it/s]
 38%|███▊      | 3522/9330 [01:54<02:40, 36.29it/s]
 38%|███▊      | 3526/9330 [01:54<02:55, 33.15it/s]
 38%|███▊      | 3530/9330 [01:54<02:58, 32.42it/s]
 38%|███▊      | 3534/9330 [01:54<02:52, 33.51it/s]
 38%|███▊      | 3538/9330 [01:54<02:56, 32.84it/s]
 38%|███▊      | 3542/9330 [01:54<02:49, 34.18it/s]
 38%|███▊      | 3547/9330 [01:55<02:40, 36.09it/s]
 38%|███▊      | 3551/9330 [01:55<02:36, 36.86it/s]
 38%|███▊      | 3556/9330 [01:55<02:32, 37.88it/s]
 38%|███▊      | 3560/9330 [01:55<02:38, 36.49it/s]
 38%|███▊      | 3564/9330 [01:55<02:45, 34.74it/s]
 38%|███▊      | 3568/9330 [01:55<02:40, 36.00it/s]
 38%|███▊      | 3572/9330 [01:55<02:55, 32.82it/s]
 38%|███▊      | 3576/9330 [01:55<02:46, 34.51it/s]
 38%|███▊      | 3580/9330 

Trial status: 2 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:45:53. Total running time: 25min 31s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00002   RUNNING          3.40446e-05                    3                    

[2m[36m(_objective pid=22390)[0m  41%|████      | 3781/9330 [02:02<02:54, 31.77it/s]
 41%|████      | 3785/9330 [02:02<02:49, 32.69it/s]
 41%|████      | 3789/9330 [02:02<03:01, 30.51it/s]
 41%|████      | 3793/9330 [02:02<03:10, 29.12it/s]
 41%|████      | 3797/9330 [02:02<02:58, 30.94it/s]
 41%|████      | 3801/9330 [02:02<02:52, 32.08it/s]
 41%|████      | 3805/9330 [02:02<02:48, 32.84it/s]
 41%|████      | 3809/9330 [02:02<02:46, 33.08it/s]
 41%|████      | 3813/9330 [02:02<02:43, 33.75it/s]
 41%|████      | 3817/9330 [02:03<02:49, 32.54it/s]
 41%|████      | 3821/9330 [02:03<02:43, 33.61it/s]
 41%|████      | 3825/9330 [02:03<02:59, 30.74it/s]
 41%|████      | 3829/9330 [02:03<02:57, 30.94it/s]
 41%|████      | 3833/9330 [02:03<02:52, 31.94it/s]
 41%|████      | 3837/9330 [02:03<02:57, 31.02it/s]
 41%|████      | 3841/9330 [02:03<02:51, 32.08it/s]
 41%|████      | 3845/9330 [02:04<03:00, 30.37it/s]
 41%|████▏     | 3849/9330 [02:04<02:53, 31.68it/s]
 41%|████▏     | 3853/9330 

[2m[36m(_objective pid=22390)[0m {'loss': 0.1325, 'learning_rate': 1.9448844641334634e-05, 'epoch': 1.29}


[2m[36m(_objective pid=22390)[0m                                                     43%|████▎     | 4000/9330 [02:08<02:38, 33.62it/s] 43%|████▎     | 4001/9330 [02:08<02:32, 34.93it/s]
 43%|████▎     | 4005/9330 [02:08<02:49, 31.50it/s]
 43%|████▎     | 4009/9330 [02:08<02:59, 29.67it/s]
 43%|████▎     | 4013/9330 [02:09<02:46, 31.87it/s]
 43%|████▎     | 4017/9330 [02:09<02:37, 33.73it/s]
 43%|████▎     | 4021/9330 [02:09<02:33, 34.67it/s]
 43%|████▎     | 4025/9330 [02:09<02:28, 35.64it/s]
 43%|████▎     | 4029/9330 [02:09<02:27, 35.98it/s]
 43%|████▎     | 4033/9330 [02:09<02:24, 36.71it/s]
 43%|████▎     | 4037/9330 [02:09<02:24, 36.56it/s]
 43%|████▎     | 4041/9330 [02:09<02:21, 37.25it/s]
 43%|████▎     | 4045/9330 [02:09<02:20, 37.53it/s]
 43%|████▎     | 4049/9330 [02:10<02:18, 38.13it/s]
 43%|████▎     | 4053/9330 [02:10<02:18, 38.02it/s]
 43%|████▎     | 4057/9330 [02:10<02:19, 37.80it/s]
 44%|████▎     | 4061/9330 [02:10<02:20, 37.40it/s]
 44%|████▎     | 4065/9330

[2m[36m(_objective pid=22390)[0m {'loss': 0.1357, 'learning_rate': 1.7624375162785423e-05, 'epoch': 1.45}


[2m[36m(_objective pid=22390)[0m  48%|████▊     | 4505/9330 [02:23<02:23, 33.72it/s]
 48%|████▊     | 4509/9330 [02:23<02:19, 34.64it/s]
 48%|████▊     | 4513/9330 [02:23<02:16, 35.31it/s]
 48%|████▊     | 4517/9330 [02:23<02:13, 36.05it/s]
 48%|████▊     | 4521/9330 [02:23<02:10, 36.79it/s]
 48%|████▊     | 4525/9330 [02:23<02:10, 36.86it/s]
 49%|████▊     | 4529/9330 [02:23<02:08, 37.28it/s]
 49%|████▊     | 4533/9330 [02:24<02:24, 33.19it/s]
 49%|████▊     | 4537/9330 [02:24<02:26, 32.62it/s]
 49%|████▊     | 4541/9330 [02:24<02:22, 33.62it/s]
 49%|████▊     | 4545/9330 [02:24<02:26, 32.62it/s]
 49%|████▉     | 4549/9330 [02:24<02:18, 34.50it/s]
 49%|████▉     | 4554/9330 [02:24<02:11, 36.36it/s]
 49%|████▉     | 4558/9330 [02:24<02:23, 33.18it/s]
 49%|████▉     | 4562/9330 [02:24<02:22, 33.34it/s]
 49%|████▉     | 4566/9330 [02:25<02:19, 34.21it/s]
 49%|████▉     | 4570/9330 [02:25<02:13, 35.58it/s]
 49%|████▉     | 4574/9330 [02:25<02:16, 34.94it/s]
 49%|████▉     | 4578/9330 

Trial status: 2 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:46:23. Total running time: 26min 1s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00002   RUNNING          3.40446e-05                    3                     

[2m[36m(_objective pid=22390)[0m  52%|█████▏    | 4807/9330 [02:32<02:05, 36.09it/s]
 52%|█████▏    | 4811/9330 [02:32<02:03, 36.65it/s]
 52%|█████▏    | 4815/9330 [02:32<02:02, 36.97it/s]
 52%|█████▏    | 4819/9330 [02:32<02:00, 37.28it/s]
 52%|█████▏    | 4823/9330 [02:32<02:07, 35.47it/s]
 52%|█████▏    | 4827/9330 [02:32<02:09, 34.88it/s]
 52%|█████▏    | 4831/9330 [02:32<02:07, 35.42it/s]
 52%|█████▏    | 4835/9330 [02:32<02:06, 35.55it/s]
 52%|█████▏    | 4839/9330 [02:33<02:06, 35.57it/s]
 52%|█████▏    | 4843/9330 [02:33<02:03, 36.20it/s]
 52%|█████▏    | 4847/9330 [02:33<02:14, 33.39it/s]
 52%|█████▏    | 4851/9330 [02:33<02:18, 32.28it/s]
 52%|█████▏    | 4855/9330 [02:33<02:14, 33.17it/s]
 52%|█████▏    | 4859/9330 [02:33<02:14, 33.36it/s]
 52%|█████▏    | 4863/9330 [02:33<02:09, 34.43it/s]
 52%|█████▏    | 4867/9330 [02:33<02:07, 35.06it/s]
 52%|█████▏    | 4871/9330 [02:33<02:05, 35.67it/s]
 52%|█████▏    | 4875/9330 [02:34<02:03, 36.04it/s]
 52%|█████▏    | 4879/9330 

[2m[36m(_objective pid=22390)[0m {'loss': 0.1394, 'learning_rate': 1.5799905684236204e-05, 'epoch': 1.61}


[2m[36m(_objective pid=22390)[0m  54%|█████▎    | 5005/9330 [02:38<02:12, 32.74it/s]
 54%|█████▎    | 5009/9330 [02:38<02:08, 33.55it/s]
 54%|█████▎    | 5013/9330 [02:38<02:05, 34.30it/s]
 54%|█████▍    | 5017/9330 [02:38<02:11, 32.91it/s]
 54%|█████▍    | 5021/9330 [02:38<02:20, 30.59it/s]
 54%|█████▍    | 5025/9330 [02:38<02:12, 32.45it/s]
 54%|█████▍    | 5029/9330 [02:38<02:20, 30.55it/s]
 54%|█████▍    | 5033/9330 [02:39<02:12, 32.32it/s]
 54%|█████▍    | 5037/9330 [02:39<02:09, 33.28it/s]
 54%|█████▍    | 5041/9330 [02:39<02:05, 34.28it/s]
 54%|█████▍    | 5045/9330 [02:39<02:07, 33.65it/s]
 54%|█████▍    | 5049/9330 [02:39<02:20, 30.42it/s]
 54%|█████▍    | 5053/9330 [02:39<02:17, 31.08it/s]
 54%|█████▍    | 5057/9330 [02:39<02:23, 29.69it/s]
 54%|█████▍    | 5061/9330 [02:39<02:17, 31.01it/s]
 54%|█████▍    | 5065/9330 [02:40<02:10, 32.68it/s]
 54%|█████▍    | 5069/9330 [02:40<02:09, 32.90it/s]
 54%|█████▍    | 5073/9330 [02:40<02:03, 34.44it/s]
 54%|█████▍    | 5077/9330 

[2m[36m(_objective pid=22390)[0m {'loss': 0.1622, 'learning_rate': 1.397543620568699e-05, 'epoch': 1.77}


 59%|█████▉    | 5505/9330 [02:53<01:48, 35.35it/s]
 59%|█████▉    | 5509/9330 [02:53<01:50, 34.59it/s]
 59%|█████▉    | 5513/9330 [02:53<01:49, 34.96it/s]
 59%|█████▉    | 5517/9330 [02:53<01:47, 35.37it/s]
 59%|█████▉    | 5521/9330 [02:53<01:57, 32.55it/s]
 59%|█████▉    | 5525/9330 [02:53<01:53, 33.47it/s]
 59%|█████▉    | 5529/9330 [02:53<01:55, 33.02it/s]
 59%|█████▉    | 5533/9330 [02:53<01:53, 33.40it/s]
 59%|█████▉    | 5537/9330 [02:54<01:59, 31.75it/s]
 59%|█████▉    | 5541/9330 [02:54<01:53, 33.48it/s]
 59%|█████▉    | 5545/9330 [02:54<01:49, 34.61it/s]
 59%|█████▉    | 5549/9330 [02:54<01:48, 34.91it/s]
 60%|█████▉    | 5553/9330 [02:54<01:47, 35.27it/s]
 60%|█████▉    | 5557/9330 [02:54<01:58, 31.78it/s]
 60%|█████▉    | 5561/9330 [02:54<01:53, 33.34it/s]
 60%|█████▉    | 5565/9330 [02:54<01:51, 33.83it/s]
 60%|█████▉    | 5569/9330 [02:54<01:49, 34.43it/s]
 60%|█████▉    | 5573/9330 [02:55<01:52, 33.45it/s]
 60%|█████▉    | 5577/9330 [02:55<01:48, 34.48it/s]
 60%|█████▉ 

Trial status: 2 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:46:53. Total running time: 26min 32s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00002   RUNNING          3.40446e-05                    3                    

[2m[36m(_objective pid=22390)[0m  62%|██████▏   | 5814/9330 [03:02<01:37, 36.07it/s]
 62%|██████▏   | 5818/9330 [03:02<01:35, 36.72it/s]
 62%|██████▏   | 5822/9330 [03:02<01:39, 35.20it/s]
 62%|██████▏   | 5826/9330 [03:02<01:38, 35.49it/s]
 62%|██████▏   | 5830/9330 [03:02<01:43, 33.70it/s]
 63%|██████▎   | 5834/9330 [03:02<01:54, 30.61it/s]
 63%|██████▎   | 5839/9330 [03:02<01:44, 33.53it/s]
 63%|██████▎   | 5843/9330 [03:02<01:41, 34.27it/s]
 63%|██████▎   | 5847/9330 [03:03<01:39, 35.07it/s]
 63%|██████▎   | 5851/9330 [03:03<01:41, 34.24it/s]
 63%|██████▎   | 5855/9330 [03:03<01:37, 35.70it/s]
 63%|██████▎   | 5859/9330 [03:03<01:45, 33.00it/s]
 63%|██████▎   | 5863/9330 [03:03<01:41, 34.18it/s]
 63%|██████▎   | 5867/9330 [03:03<01:40, 34.34it/s]
 63%|██████▎   | 5871/9330 [03:03<01:44, 33.11it/s]
 63%|██████▎   | 5876/9330 [03:03<01:37, 35.27it/s]
 63%|██████▎   | 5880/9330 [03:04<01:52, 30.71it/s]
 63%|██████▎   | 5884/9330 [03:04<01:45, 32.66it/s]
 63%|██████▎   | 5888/9330 

[2m[36m(_objective pid=22390)[0m {'loss': 0.1357, 'learning_rate': 1.2150966727137773e-05, 'epoch': 1.93}


[2m[36m(_objective pid=22390)[0m  64%|██████▍   | 6004/9330 [03:07<01:33, 35.42it/s]
 64%|██████▍   | 6008/9330 [03:07<01:32, 35.96it/s]
 64%|██████▍   | 6012/9330 [03:07<01:32, 36.04it/s]
 64%|██████▍   | 6016/9330 [03:08<01:39, 33.28it/s]
 65%|██████▍   | 6020/9330 [03:08<01:36, 34.43it/s]
 65%|██████▍   | 6024/9330 [03:08<01:35, 34.64it/s]
 65%|██████▍   | 6028/9330 [03:08<01:42, 32.23it/s]
 65%|██████▍   | 6032/9330 [03:08<01:39, 33.17it/s]
 65%|██████▍   | 6036/9330 [03:08<01:38, 33.34it/s]
 65%|██████▍   | 6040/9330 [03:08<01:36, 33.96it/s]
 65%|██████▍   | 6044/9330 [03:08<01:37, 33.83it/s]
 65%|██████▍   | 6048/9330 [03:09<01:46, 30.84it/s]
 65%|██████▍   | 6052/9330 [03:09<01:43, 31.56it/s]
 65%|██████▍   | 6056/9330 [03:09<01:50, 29.69it/s]
 65%|██████▍   | 6060/9330 [03:09<01:48, 30.21it/s]
 65%|██████▍   | 6064/9330 [03:09<01:43, 31.64it/s]
 65%|██████▌   | 6068/9330 [03:09<01:41, 32.20it/s]
 65%|██████▌   | 6072/9330 [03:09<01:40, 32.34it/s]
 65%|██████▌   | 6076/9330 

Trial _objective_f556c_00002 finished iteration 2 at 2023-09-11 13:47:14. Total running time: 26min 53s
+-------------------------------------------------+
| Trial _objective_f556c_00002 result             |
+-------------------------------------------------+
| time_this_iter_s                        102.899 |
| time_total_s                            206.472 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                               0.17326 |
| eval_runtime                             9.5923 |
| eval_samples_per_second                 432.324 |
| eval_steps_per_second                    13.552 |
| objective                               0.17326 |
+-------------------------------------------------+

[2m[36m(_objective pid=22390)[0m {'eval_loss': 0.17326313257217407, 'eval_runtime': 9.5923, 'eval_samples_per_second': 432.324, 'eval_steps_per_second': 13.552, 'epoch': 2.0}


[2m[36m(_objective pid=22390)[0m                                                    
[2m[36m(_objective pid=22390)[0m                                                  [A 67%|██████▋   | 6220/9330 [03:23<01:29, 34.68it/s]
[2m[36m(_objective pid=22390)[0m 100%|██████████| 130/130 [00:09<00:00, 11.97it/s][A
                                                 [A
 67%|██████▋   | 6223/9330 [03:25<43:56,  1.18it/s]
 67%|██████▋   | 6227/9330 [03:25<31:08,  1.66it/s]
 67%|██████▋   | 6231/9330 [03:25<22:14,  2.32it/s]
 67%|██████▋   | 6235/9330 [03:25<16:00,  3.22it/s]
 67%|██████▋   | 6239/9330 [03:25<11:45,  4.38it/s]
 67%|██████▋   | 6243/9330 [03:25<08:48,  5.84it/s]
 67%|██████▋   | 6247/9330 [03:25<06:39,  7.72it/s]
 67%|██████▋   | 6251/9330 [03:26<05:10,  9.92it/s]
 67%|██████▋   | 6255/9330 [03:26<04:04, 12.58it/s]
 67%|██████▋   | 6259/9330 [03:26<03:19, 15.38it/s]
 67%|██████▋   | 6263/9330 [03:26<02:48, 18.15it/s]
 67%|██████▋   | 6267/9330 [03:26<02:23, 21.28it/s]
 

Trial status: 2 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:47:23. Total running time: 27min 2s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00002   RUNNING          3.40446e-05                    3                     

[2m[36m(_objective pid=22390)[0m  69%|██████▉   | 6463/9330 [03:32<01:17, 37.18it/s]
 69%|██████▉   | 6467/9330 [03:32<01:15, 37.96it/s]
 69%|██████▉   | 6471/9330 [03:32<01:17, 36.75it/s]
 69%|██████▉   | 6475/9330 [03:32<01:16, 37.51it/s]
 69%|██████▉   | 6479/9330 [03:32<01:14, 38.21it/s]
 69%|██████▉   | 6483/9330 [03:32<01:23, 33.95it/s]
 70%|██████▉   | 6487/9330 [03:32<01:20, 35.31it/s]
 70%|██████▉   | 6491/9330 [03:32<01:18, 36.09it/s]
 70%|██████▉   | 6495/9330 [03:33<01:16, 37.01it/s]
 70%|██████▉   | 6499/9330 [03:33<01:16, 37.07it/s]


[2m[36m(_objective pid=22390)[0m {'loss': 0.1316, 'learning_rate': 1.0326497248588558e-05, 'epoch': 2.09}


[2m[36m(_objective pid=22390)[0m                                                     70%|██████▉   | 6500/9330 [03:33<01:16, 37.07it/s]
 70%|██████▉   | 6503/9330 [03:33<01:20, 35.19it/s]
 70%|██████▉   | 6507/9330 [03:33<01:19, 35.50it/s]
 70%|██████▉   | 6511/9330 [03:33<01:19, 35.34it/s]
 70%|██████▉   | 6515/9330 [03:33<01:26, 32.56it/s]
 70%|██████▉   | 6519/9330 [03:33<01:21, 34.44it/s]
 70%|██████▉   | 6523/9330 [03:33<01:27, 32.06it/s]
 70%|██████▉   | 6527/9330 [03:34<01:24, 33.02it/s]
 70%|███████   | 6531/9330 [03:34<01:28, 31.49it/s]
 70%|███████   | 6536/9330 [03:34<01:21, 34.17it/s]
 70%|███████   | 6540/9330 [03:34<01:19, 35.12it/s]
 70%|███████   | 6544/9330 [03:34<01:16, 36.36it/s]
 70%|███████   | 6549/9330 [03:34<01:13, 37.61it/s]
 70%|███████   | 6553/9330 [03:34<01:17, 36.01it/s]
 70%|███████   | 6558/9330 [03:34<01:14, 37.35it/s]
 70%|███████   | 6562/9330 [03:35<01:15, 36.54it/s]
 70%|███████   | 6566/9330 [03:35<01:14, 37.06it/s]
 70%|███████   | 6570/9330

[2m[36m(_objective pid=22390)[0m {'loss': 0.0834, 'learning_rate': 8.502027770039344e-06, 'epoch': 2.25}


 75%|███████▌  | 7001/9330 [03:47<01:01, 37.93it/s]
 75%|███████▌  | 7005/9330 [03:47<01:05, 35.55it/s]
 75%|███████▌  | 7009/9330 [03:48<01:09, 33.32it/s]
 75%|███████▌  | 7013/9330 [03:48<01:07, 34.16it/s]
 75%|███████▌  | 7017/9330 [03:48<01:05, 35.19it/s]
 75%|███████▌  | 7021/9330 [03:48<01:03, 36.41it/s]
 75%|███████▌  | 7025/9330 [03:48<01:01, 37.32it/s]
 75%|███████▌  | 7029/9330 [03:48<01:07, 33.91it/s]
 75%|███████▌  | 7033/9330 [03:48<01:07, 34.25it/s]
 75%|███████▌  | 7038/9330 [03:48<01:03, 36.19it/s]
 75%|███████▌  | 7043/9330 [03:49<01:01, 37.42it/s]
 76%|███████▌  | 7047/9330 [03:49<01:02, 36.61it/s]
 76%|███████▌  | 7051/9330 [03:49<01:01, 37.13it/s]
 76%|███████▌  | 7055/9330 [03:49<01:01, 37.08it/s]
 76%|███████▌  | 7059/9330 [03:49<00:59, 37.87it/s]
 76%|███████▌  | 7063/9330 [03:49<00:59, 37.81it/s]
 76%|███████▌  | 7067/9330 [03:49<00:59, 38.32it/s]
 76%|███████▌  | 7071/9330 [03:49<01:05, 34.28it/s]
 76%|███████▌  | 7075/9330 [03:49<01:04, 35.03it/s]
 76%|███████

Trial status: 2 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:47:53. Total running time: 27min 32s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00002   RUNNING          3.40446e-05                    3                    

[2m[36m(_objective pid=22390)[0m  80%|████████  | 7489/9330 [04:02<00:58, 31.61it/s]
 80%|████████  | 7493/9330 [04:02<00:57, 32.03it/s]
 80%|████████  | 7497/9330 [04:02<00:56, 32.28it/s]
 80%|████████  | 7501/9330 [04:02<00:54, 33.62it/s]


[2m[36m(_objective pid=22390)[0m {'loss': 0.0625, 'learning_rate': 6.677558291490129e-06, 'epoch': 2.41}


[2m[36m(_objective pid=22390)[0m  80%|████████  | 7505/9330 [04:02<00:52, 34.88it/s]
 80%|████████  | 7509/9330 [04:02<00:51, 35.51it/s]
 81%|████████  | 7513/9330 [04:02<00:50, 36.24it/s]
 81%|████████  | 7517/9330 [04:03<00:49, 36.76it/s]
 81%|████████  | 7521/9330 [04:03<00:50, 36.12it/s]
 81%|████████  | 7525/9330 [04:03<00:55, 32.50it/s]
 81%|████████  | 7529/9330 [04:03<00:53, 33.78it/s]
 81%|████████  | 7533/9330 [04:03<00:51, 34.97it/s]
 81%|████████  | 7537/9330 [04:03<00:56, 31.78it/s]
 81%|████████  | 7541/9330 [04:03<00:59, 29.86it/s]
 81%|████████  | 7545/9330 [04:03<00:57, 31.23it/s]
 81%|████████  | 7549/9330 [04:04<00:53, 33.28it/s]
 81%|████████  | 7553/9330 [04:04<00:56, 31.45it/s]
 81%|████████  | 7557/9330 [04:04<00:55, 32.12it/s]
 81%|████████  | 7561/9330 [04:04<00:52, 33.41it/s]
 81%|████████  | 7565/9330 [04:04<00:51, 34.18it/s]
 81%|████████  | 7569/9330 [04:04<00:49, 35.59it/s]
 81%|████████  | 7573/9330 [04:04<00:52, 33.36it/s]
 81%|████████  | 7577/9330 

[2m[36m(_objective pid=22390)[0m {'loss': 0.0788, 'learning_rate': 4.853088812940913e-06, 'epoch': 2.57}


[2m[36m(_objective pid=22390)[0m  86%|████████▌ | 8003/9330 [04:17<00:40, 32.38it/s]
 86%|████████▌ | 8007/9330 [04:17<00:40, 33.06it/s]
 86%|████████▌ | 8011/9330 [04:17<00:38, 34.29it/s]
 86%|████████▌ | 8015/9330 [04:17<00:41, 31.93it/s]
 86%|████████▌ | 8019/9330 [04:17<00:38, 33.64it/s]
 86%|████████▌ | 8023/9330 [04:17<00:41, 31.14it/s]
 86%|████████▌ | 8027/9330 [04:18<00:44, 29.29it/s]
 86%|████████▌ | 8031/9330 [04:18<00:42, 30.82it/s]
 86%|████████▌ | 8035/9330 [04:18<00:40, 32.02it/s]
 86%|████████▌ | 8039/9330 [04:18<00:38, 33.61it/s]
 86%|████████▌ | 8043/9330 [04:18<00:36, 34.86it/s]
 86%|████████▌ | 8047/9330 [04:18<00:37, 34.49it/s]
 86%|████████▋ | 8051/9330 [04:18<00:36, 34.75it/s]
 86%|████████▋ | 8055/9330 [04:18<00:35, 35.74it/s]
 86%|████████▋ | 8059/9330 [04:19<00:44, 28.65it/s]
 86%|████████▋ | 8063/9330 [04:19<00:41, 30.60it/s]
 86%|████████▋ | 8068/9330 [04:19<00:37, 33.50it/s]
 87%|████████▋ | 8073/9330 [04:19<00:35, 35.56it/s]
 87%|████████▋ | 8078/9330 

[2m[36m(_objective pid=22390)[0m {'loss': 0.0602, 'learning_rate': 3.0286193343916974e-06, 'epoch': 2.73}
Trial status: 2 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:48:23. Total running time: 28min 2s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

[2m[36m(_objective pid=22390)[0m                                                     91%|█████████ | 8500/9330 [04:31<00:25, 33.00it/s] 91%|█████████ | 8501/9330 [04:31<00:24, 34.12it/s]
 91%|█████████ | 8505/9330 [04:32<00:23, 34.84it/s]
 91%|█████████ | 8509/9330 [04:32<00:22, 36.02it/s]
 91%|█████████ | 8513/9330 [04:32<00:23, 35.34it/s]
 91%|█████████▏| 8517/9330 [04:32<00:23, 34.98it/s]
 91%|█████████▏| 8521/9330 [04:32<00:23, 34.56it/s]
 91%|█████████▏| 8525/9330 [04:32<00:23, 33.89it/s]
 91%|█████████▏| 8529/9330 [04:32<00:22, 35.44it/s]
 91%|█████████▏| 8533/9330 [04:32<00:21, 36.50it/s]
 92%|█████████▏| 8537/9330 [04:32<00:21, 36.22it/s]
 92%|█████████▏| 8541/9330 [04:33<00:21, 36.36it/s]
 92%|█████████▏| 8545/9330 [04:33<00:21, 36.99it/s]
 92%|█████████▏| 8549/9330 [04:33<00:22, 34.50it/s]
 92%|█████████▏| 8553/9330 [04:33<00:24, 32.07it/s]
 92%|█████████▏| 8557/9330 [04:33<00:22, 34.05it/s]
 92%|█████████▏| 8561/9330 [04:33<00:22, 34.09it/s]
 92%|█████████▏| 8565/9330

[2m[36m(_objective pid=22390)[0m {'loss': 0.0982, 'learning_rate': 1.2041498558424822e-06, 'epoch': 2.89}


[2m[36m(_objective pid=22390)[0m  97%|█████████▋| 9004/9330 [04:46<00:10, 30.85it/s]
 97%|█████████▋| 9008/9330 [04:47<00:10, 32.07it/s]
 97%|█████████▋| 9012/9330 [04:47<00:10, 29.92it/s]
 97%|█████████▋| 9016/9330 [04:47<00:09, 31.87it/s]
 97%|█████████▋| 9020/9330 [04:47<00:09, 31.99it/s]
 97%|█████████▋| 9024/9330 [04:47<00:09, 31.75it/s]
 97%|█████████▋| 9028/9330 [04:47<00:09, 33.19it/s]
 97%|█████████▋| 9032/9330 [04:47<00:09, 32.26it/s]
 97%|█████████▋| 9036/9330 [04:47<00:08, 33.25it/s]
 97%|█████████▋| 9040/9330 [04:48<00:08, 34.42it/s]
 97%|█████████▋| 9044/9330 [04:48<00:08, 35.12it/s]
 97%|█████████▋| 9048/9330 [04:48<00:08, 34.59it/s]
 97%|█████████▋| 9052/9330 [04:48<00:07, 35.34it/s]
 97%|█████████▋| 9056/9330 [04:48<00:07, 36.43it/s]
 97%|█████████▋| 9060/9330 [04:48<00:07, 34.29it/s]
 97%|█████████▋| 9065/9330 [04:48<00:07, 35.98it/s]
 97%|█████████▋| 9069/9330 [04:48<00:07, 36.99it/s]
 97%|█████████▋| 9073/9330 [04:49<00:07, 33.76it/s]
 97%|█████████▋| 9077/9330 

Trial status: 2 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:48:53. Total running time: 28min 32s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00002   RUNNING          3.40446e-05                    3                    

[2m[36m(_objective pid=22390)[0m 
[2m[36m(_objective pid=22390)[0m  65%|██████▍   | 84/130 [00:05<00:02, 15.80it/s][A
[2m[36m(_objective pid=22390)[0m 
 66%|██████▌   | 86/130 [00:06<00:02, 16.39it/s][A
[2m[36m(_objective pid=22390)[0m 
 68%|██████▊   | 88/130 [00:06<00:02, 15.21it/s][A
[2m[36m(_objective pid=22390)[0m 
 69%|██████▉   | 90/130 [00:06<00:02, 13.82it/s][A
[2m[36m(_objective pid=22390)[0m 
 71%|███████   | 92/130 [00:06<00:03, 12.51it/s][A
[2m[36m(_objective pid=22390)[0m 
 72%|███████▏  | 94/130 [00:06<00:02, 13.72it/s][A
[2m[36m(_objective pid=22390)[0m 
 75%|███████▍  | 97/130 [00:06<00:02, 14.31it/s][A
[2m[36m(_objective pid=22390)[0m 
 77%|███████▋  | 100/130 [00:07<00:01, 16.93it/s][A
[2m[36m(_objective pid=22390)[0m 
 78%|███████▊  | 102/130 [00:07<00:01, 14.84it/s][A
[2m[36m(_objective pid=22390)[0m 
 80%|████████  | 104/130 [00:07<00:01, 13.17it/s][A
[2m[36m(_objective pid=22390)[0m 
 82%|████████▏ | 106/130 [00:07<0

Trial _objective_f556c_00002 finished iteration 3 at 2023-09-11 13:48:57. Total running time: 28min 35s
+-------------------------------------------------+
| Trial _objective_f556c_00002 result             |
+-------------------------------------------------+
| time_this_iter_s                         102.26 |
| time_total_s                            308.732 |
| training_iteration                            3 |
| epoch                                         3 |
| eval_loss                                0.2273 |
| eval_runtime                             9.6035 |
| eval_samples_per_second                 431.822 |
| eval_steps_per_second                    13.537 |
| objective                                0.2273 |
+-------------------------------------------------+

[2m[36m(_objective pid=22390)[0m {'eval_loss': 0.22729657590389252, 'eval_runtime': 9.6035, 'eval_samples_per_second': 431.822, 'eval_steps_per_second': 13.537, 'epoch': 3.0}


[2m[36m(_objective pid=22390)[0m                                                    
[2m[36m(_objective pid=22390)[0m                                                  [A100%|██████████| 9330/9330 [05:06<00:00, 32.12it/s]
[2m[36m(_objective pid=22390)[0m 100%|██████████| 130/130 [00:09<00:00, 12.00it/s][A
[2m[36m(_objective pid=22390)[0m                                                  [A


[2m[36m(_objective pid=22390)[0m {'train_runtime': 307.4616, 'train_samples_per_second': 121.371, 'train_steps_per_second': 30.345, 'train_loss': 0.17048744373382863, 'epoch': 3.0}
Trial _objective_f556c_00002 completed after 3 iterations at 2023-09-11 13:48:58. Total running time: 28min 37s



[2m[36m(_objective pid=22390)[0m                                                    100%|██████████| 9330/9330 [05:07<00:00, 32.12it/s]100%|██████████| 9330/9330 [05:07<00:00, 30.35it/s]


Trial _objective_f556c_00003 started with configuration:
+-------------------------------------------------+
| Trial _objective_f556c_00003 config             |
+-------------------------------------------------+
| adam_epsilon                                  0 |
| learning_rate                             5e-05 |
| num_train_epochs                              2 |
| per_device_eval_batch_size                   32 |
| per_device_train_batch_size                   4 |
| weight_decay                            0.29766 |
+-------------------------------------------------+



[2m[36m(_objective pid=23776)[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_projector.bias', 'vocab_transform.bias', 'vocab_layer_norm.bias']
[2m[36m(_objective pid=23776)[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
[2m[36m(_objective pid=23776)[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[2m[36m(_objective pid=23776)[0m Some weights of DistilBertForSequenceClassification were not initialized from the model che

[2m[36m(_objective pid=23776)[0m {'loss': 0.4134, 'learning_rate': 4.372039546990413e-05, 'epoch': 0.16}


[2m[36m(_objective pid=23776)[0m   8%|▊         | 501/6220 [00:15<02:54, 32.87it/s]
  8%|▊         | 505/6220 [00:15<02:48, 33.99it/s]
  8%|▊         | 509/6220 [00:15<03:04, 30.95it/s]
  8%|▊         | 513/6220 [00:15<02:57, 32.12it/s]
  8%|▊         | 517/6220 [00:15<02:52, 32.98it/s]
  8%|▊         | 521/6220 [00:15<02:46, 34.27it/s]


Trial status: 3 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:49:23. Total running time: 29min 2s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00003   RUNNING          4.75421e-05                    2                     

[2m[36m(_objective pid=23776)[0m   8%|▊         | 525/6220 [00:15<02:41, 35.20it/s]
  9%|▊         | 529/6220 [00:15<02:39, 35.79it/s]
  9%|▊         | 533/6220 [00:15<02:39, 35.74it/s]
  9%|▊         | 537/6220 [00:16<03:01, 31.38it/s]
  9%|▊         | 541/6220 [00:16<02:49, 33.48it/s]
  9%|▉         | 545/6220 [00:16<02:43, 34.73it/s]
  9%|▉         | 549/6220 [00:16<02:38, 35.77it/s]
  9%|▉         | 553/6220 [00:16<02:35, 36.53it/s]
  9%|▉         | 557/6220 [00:16<02:38, 35.72it/s]
  9%|▉         | 561/6220 [00:16<02:36, 36.07it/s]
  9%|▉         | 565/6220 [00:16<02:36, 36.11it/s]
  9%|▉         | 569/6220 [00:16<02:38, 35.58it/s]
  9%|▉         | 573/6220 [00:17<02:36, 35.99it/s]
  9%|▉         | 577/6220 [00:17<02:35, 36.31it/s]
  9%|▉         | 581/6220 [00:17<02:34, 36.42it/s]
  9%|▉         | 585/6220 [00:17<02:34, 36.45it/s]
  9%|▉         | 589/6220 [00:17<02:42, 34.71it/s]
 10%|▉         | 593/6220 [00:17<02:43, 34.42it/s]
 10%|▉         | 597/6220 [00:17<02:42, 34.67

[2m[36m(_objective pid=23776)[0m {'loss': 0.3347, 'learning_rate': 3.989868257917824e-05, 'epoch': 0.32}


[2m[36m(_objective pid=23776)[0m  16%|█▌        | 1008/6220 [00:29<02:17, 37.98it/s]
 16%|█▋        | 1012/6220 [00:29<02:20, 37.12it/s]
 16%|█▋        | 1017/6220 [00:29<02:17, 37.91it/s]
 16%|█▋        | 1021/6220 [00:29<02:19, 37.17it/s]
 16%|█▋        | 1025/6220 [00:30<02:25, 35.79it/s]
 17%|█▋        | 1029/6220 [00:30<02:21, 36.56it/s]
 17%|█▋        | 1033/6220 [00:30<02:19, 37.24it/s]
 17%|█▋        | 1037/6220 [00:30<02:24, 35.86it/s]
 17%|█▋        | 1041/6220 [00:30<02:21, 36.57it/s]
 17%|█▋        | 1045/6220 [00:30<02:20, 36.85it/s]
 17%|█▋        | 1049/6220 [00:30<02:18, 37.23it/s]
 17%|█▋        | 1053/6220 [00:30<02:27, 35.00it/s]
 17%|█▋        | 1057/6220 [00:30<02:25, 35.44it/s]
 17%|█▋        | 1061/6220 [00:31<02:28, 34.76it/s]
 17%|█▋        | 1065/6220 [00:31<02:26, 35.22it/s]
 17%|█▋        | 1069/6220 [00:31<02:23, 35.81it/s]
 17%|█▋        | 1073/6220 [00:31<02:21, 36.36it/s]
 17%|█▋        | 1077/6220 [00:31<02:21, 36.47it/s]
 17%|█▋        | 1081/6220 

[2m[36m(_objective pid=23776)[0m {'loss': 0.2926, 'learning_rate': 3.6076969688452356e-05, 'epoch': 0.48}


[2m[36m(_objective pid=23776)[0m  24%|██▍       | 1504/6220 [00:43<02:07, 37.07it/s]
 24%|██▍       | 1508/6220 [00:44<02:05, 37.56it/s]
 24%|██▍       | 1512/6220 [00:44<02:05, 37.42it/s]
 24%|██▍       | 1516/6220 [00:44<02:04, 37.87it/s]
 24%|██▍       | 1520/6220 [00:44<02:03, 38.13it/s]
 25%|██▍       | 1524/6220 [00:44<02:09, 36.22it/s]
 25%|██▍       | 1528/6220 [00:44<02:07, 36.76it/s]
 25%|██▍       | 1532/6220 [00:44<02:05, 37.30it/s]
 25%|██▍       | 1536/6220 [00:44<02:04, 37.51it/s]
 25%|██▍       | 1540/6220 [00:44<02:06, 36.96it/s]
 25%|██▍       | 1544/6220 [00:45<02:19, 33.45it/s]
 25%|██▍       | 1548/6220 [00:45<02:17, 34.06it/s]
 25%|██▍       | 1552/6220 [00:45<02:12, 35.22it/s]
 25%|██▌       | 1556/6220 [00:45<02:28, 31.32it/s]
 25%|██▌       | 1560/6220 [00:45<02:33, 30.38it/s]
 25%|██▌       | 1565/6220 [00:45<02:20, 33.24it/s]


Trial status: 3 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:49:53. Total running time: 29min 32s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00003   RUNNING          4.75421e-05                    2                    

[2m[36m(_objective pid=23776)[0m  25%|██▌       | 1569/6220 [00:45<02:21, 32.86it/s]
 25%|██▌       | 1573/6220 [00:45<02:16, 33.96it/s]
 25%|██▌       | 1577/6220 [00:46<02:12, 34.93it/s]
 25%|██▌       | 1581/6220 [00:46<02:14, 34.62it/s]
 25%|██▌       | 1585/6220 [00:46<02:11, 35.37it/s]
 26%|██▌       | 1589/6220 [00:46<02:19, 33.19it/s]
 26%|██▌       | 1593/6220 [00:46<02:15, 34.05it/s]
 26%|██▌       | 1597/6220 [00:46<02:15, 34.05it/s]
 26%|██▌       | 1601/6220 [00:46<02:20, 32.95it/s]
 26%|██▌       | 1605/6220 [00:46<02:15, 34.10it/s]
 26%|██▌       | 1609/6220 [00:46<02:15, 34.02it/s]
 26%|██▌       | 1613/6220 [00:47<02:13, 34.56it/s]
 26%|██▌       | 1617/6220 [00:47<02:11, 34.88it/s]
 26%|██▌       | 1621/6220 [00:47<02:17, 33.43it/s]
 26%|██▌       | 1625/6220 [00:47<02:15, 34.03it/s]
 26%|██▌       | 1629/6220 [00:47<02:09, 35.46it/s]
 26%|██▋       | 1633/6220 [00:47<02:29, 30.62it/s]
 26%|██▋       | 1637/6220 [00:47<02:23, 31.91it/s]
 26%|██▋       | 1641/6220 

[2m[36m(_objective pid=23776)[0m {'loss': 0.2336, 'learning_rate': 3.225525679772647e-05, 'epoch': 0.64}


[2m[36m(_objective pid=23776)[0m  32%|███▏      | 2004/6220 [00:58<01:57, 35.88it/s]
 32%|███▏      | 2008/6220 [00:58<02:01, 34.79it/s]
 32%|███▏      | 2012/6220 [00:59<01:57, 35.70it/s]
 32%|███▏      | 2016/6220 [00:59<01:58, 35.40it/s]
 32%|███▏      | 2020/6220 [00:59<02:07, 32.86it/s]
 33%|███▎      | 2024/6220 [00:59<02:12, 31.71it/s]
 33%|███▎      | 2028/6220 [00:59<02:19, 30.08it/s]
 33%|███▎      | 2032/6220 [00:59<02:13, 31.28it/s]
 33%|███▎      | 2036/6220 [00:59<02:07, 32.93it/s]
 33%|███▎      | 2040/6220 [00:59<02:02, 34.21it/s]
 33%|███▎      | 2044/6220 [01:00<02:13, 31.27it/s]
 33%|███▎      | 2048/6220 [01:00<02:09, 32.16it/s]
 33%|███▎      | 2052/6220 [01:00<02:17, 30.34it/s]
 33%|███▎      | 2056/6220 [01:00<02:18, 30.14it/s]
 33%|███▎      | 2060/6220 [01:00<02:09, 32.13it/s]
 33%|███▎      | 2064/6220 [01:00<02:07, 32.57it/s]
 33%|███▎      | 2068/6220 [01:00<02:03, 33.65it/s]
 33%|███▎      | 2072/6220 [01:00<01:59, 34.78it/s]
 33%|███▎      | 2076/6220 

[2m[36m(_objective pid=23776)[0m {'loss': 0.214, 'learning_rate': 2.8433543907000585e-05, 'epoch': 0.8}


[2m[36m(_objective pid=23776)[0m  40%|████      | 2508/6220 [01:14<01:59, 31.06it/s]
 40%|████      | 2512/6220 [01:14<01:56, 31.77it/s]
 40%|████      | 2516/6220 [01:14<02:02, 30.15it/s]
 41%|████      | 2520/6220 [01:14<01:56, 31.66it/s]
 41%|████      | 2524/6220 [01:14<01:50, 33.44it/s]
 41%|████      | 2528/6220 [01:14<01:49, 33.70it/s]
 41%|████      | 2532/6220 [01:15<01:55, 31.99it/s]
 41%|████      | 2536/6220 [01:15<01:49, 33.64it/s]
 41%|████      | 2540/6220 [01:15<01:45, 34.82it/s]
 41%|████      | 2544/6220 [01:15<01:46, 34.63it/s]
 41%|████      | 2548/6220 [01:15<01:45, 34.93it/s]
 41%|████      | 2552/6220 [01:15<01:44, 34.98it/s]
 41%|████      | 2556/6220 [01:15<01:43, 35.26it/s]


Trial status: 3 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:50:23. Total running time: 30min 2s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00003   RUNNING          4.75421e-05                    2                     

 41%|████      | 2560/6220 [01:15<01:41, 36.00it/s]
 41%|████      | 2564/6220 [01:16<01:43, 35.21it/s]
 41%|████▏     | 2568/6220 [01:16<01:53, 32.29it/s]
 41%|████▏     | 2572/6220 [01:16<01:49, 33.25it/s]
 41%|████▏     | 2576/6220 [01:16<01:52, 32.43it/s]
 41%|████▏     | 2580/6220 [01:16<01:58, 30.83it/s]
 42%|████▏     | 2584/6220 [01:16<01:52, 32.33it/s]
 42%|████▏     | 2588/6220 [01:16<01:49, 33.25it/s]
 42%|████▏     | 2592/6220 [01:16<01:46, 33.93it/s]
 42%|████▏     | 2596/6220 [01:17<01:49, 33.14it/s]
 42%|████▏     | 2600/6220 [01:17<01:46, 33.95it/s]
 42%|████▏     | 2604/6220 [01:17<01:43, 34.92it/s]
 42%|████▏     | 2608/6220 [01:17<01:43, 34.87it/s]
 42%|████▏     | 2612/6220 [01:17<01:43, 34.81it/s]
 42%|████▏     | 2616/6220 [01:17<01:54, 31.38it/s]
 42%|████▏     | 2620/6220 [01:17<01:50, 32.72it/s]
 42%|████▏     | 2624/6220 [01:17<01:49, 32.74it/s]
 42%|████▏     | 2628/6220 [01:17<01:49, 32.86it/s]
 42%|████▏     | 2632/6220 [01:18<01:45, 34.01it/s]
 42%|████▏  

[2m[36m(_objective pid=23776)[0m {'loss': 0.2248, 'learning_rate': 2.4611831016274703e-05, 'epoch': 0.96}


[2m[36m(_objective pid=23776)[0m  48%|████▊     | 3005/6220 [01:29<01:47, 29.96it/s]
 48%|████▊     | 3009/6220 [01:29<01:40, 31.99it/s]
 48%|████▊     | 3013/6220 [01:29<01:36, 33.24it/s]
 49%|████▊     | 3017/6220 [01:29<01:32, 34.47it/s]
 49%|████▊     | 3021/6220 [01:29<01:29, 35.58it/s]
 49%|████▊     | 3025/6220 [01:30<01:29, 35.66it/s]
 49%|████▊     | 3029/6220 [01:30<01:38, 32.39it/s]
 49%|████▉     | 3033/6220 [01:30<01:34, 33.63it/s]
 49%|████▉     | 3037/6220 [01:30<01:30, 35.28it/s]
 49%|████▉     | 3041/6220 [01:30<01:32, 34.39it/s]
 49%|████▉     | 3045/6220 [01:30<01:28, 35.82it/s]
 49%|████▉     | 3049/6220 [01:30<01:27, 36.30it/s]
 49%|████▉     | 3053/6220 [01:30<01:26, 36.43it/s]
 49%|████▉     | 3057/6220 [01:30<01:25, 37.17it/s]
 49%|████▉     | 3061/6220 [01:31<01:23, 37.61it/s]
 49%|████▉     | 3065/6220 [01:31<01:23, 37.90it/s]
 49%|████▉     | 3069/6220 [01:31<01:23, 37.62it/s]
 49%|████▉     | 3074/6220 [01:31<01:21, 38.37it/s]
 49%|████▉     | 3078/6220 

Trial _objective_f556c_00003 finished iteration 1 at 2023-09-11 13:50:49. Total running time: 30min 28s
+-------------------------------------------------+
| Trial _objective_f556c_00003 result             |
+-------------------------------------------------+
| time_this_iter_s                        104.411 |
| time_total_s                            104.411 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.17102 |
| eval_runtime                             9.5935 |
| eval_samples_per_second                 432.274 |
| eval_steps_per_second                    13.551 |
| objective                               0.17102 |
+-------------------------------------------------+

[2m[36m(_objective pid=23776)[0m {'eval_loss': 0.17101606726646423, 'eval_runtime': 9.5935, 'eval_samples_per_second': 432.274, 'eval_steps_per_second': 13.551, 'epoch': 1.0}


[2m[36m(_objective pid=23776)[0m                                                    
[2m[36m(_objective pid=23776)[0m                                                  [A 50%|█████     | 3110/6220 [01:42<01:29, 34.59it/s]
[2m[36m(_objective pid=23776)[0m 100%|██████████| 130/130 [00:09<00:00, 11.99it/s][A
                                                 [A
 50%|█████     | 3114/6220 [01:43<43:33,  1.19it/s]
 50%|█████     | 3118/6220 [01:43<31:03,  1.66it/s]
 50%|█████     | 3122/6220 [01:43<22:18,  2.31it/s]
 50%|█████     | 3126/6220 [01:43<15:59,  3.22it/s]
 50%|█████     | 3130/6220 [01:43<11:37,  4.43it/s]
 50%|█████     | 3134/6220 [01:44<08:33,  6.01it/s]
 50%|█████     | 3138/6220 [01:44<06:25,  8.00it/s]
 51%|█████     | 3142/6220 [01:44<04:54, 10.45it/s]
 51%|█████     | 3146/6220 [01:44<03:50, 13.31it/s]
 51%|█████     | 3150/6220 [01:44<03:06, 16.47it/s]
 51%|█████     | 3154/6220 [01:44<02:35, 19.68it/s]
 51%|█████     | 3158/6220 [01:44<02:26, 20.85it/s]
 

Trial status: 3 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:50:53. Total running time: 30min 32s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00003   RUNNING          4.75421e-05                    2                    

[2m[36m(_objective pid=23776)[0m  51%|█████▏    | 3194/6220 [01:45<01:30, 33.55it/s]
 51%|█████▏    | 3198/6220 [01:45<01:32, 32.80it/s]
 51%|█████▏    | 3202/6220 [01:46<01:27, 34.32it/s]
 52%|█████▏    | 3206/6220 [01:46<01:26, 35.03it/s]
 52%|█████▏    | 3210/6220 [01:46<01:26, 34.87it/s]
 52%|█████▏    | 3214/6220 [01:46<01:24, 35.48it/s]
 52%|█████▏    | 3218/6220 [01:46<01:24, 35.60it/s]
 52%|█████▏    | 3222/6220 [01:46<01:24, 35.65it/s]
 52%|█████▏    | 3226/6220 [01:46<01:21, 36.60it/s]
 52%|█████▏    | 3230/6220 [01:46<01:29, 33.23it/s]
 52%|█████▏    | 3234/6220 [01:47<01:25, 34.74it/s]
 52%|█████▏    | 3238/6220 [01:47<01:23, 35.90it/s]
 52%|█████▏    | 3242/6220 [01:47<01:29, 33.45it/s]
 52%|█████▏    | 3247/6220 [01:47<01:29, 33.18it/s]
 52%|█████▏    | 3251/6220 [01:47<01:25, 34.78it/s]
 52%|█████▏    | 3256/6220 [01:47<01:21, 36.40it/s]
 52%|█████▏    | 3260/6220 [01:47<01:19, 37.26it/s]
 52%|█████▏    | 3264/6220 [01:47<01:22, 35.91it/s]
 53%|█████▎    | 3269/6220 

[2m[36m(_objective pid=23776)[0m {'loss': 0.1462, 'learning_rate': 2.0790118125548818e-05, 'epoch': 1.13}


[2m[36m(_objective pid=23776)[0m  56%|█████▋    | 3506/6220 [01:54<01:14, 36.24it/s]
 56%|█████▋    | 3510/6220 [01:54<01:15, 35.92it/s]
 56%|█████▋    | 3514/6220 [01:55<01:17, 34.84it/s]
 57%|█████▋    | 3518/6220 [01:55<01:20, 33.43it/s]
 57%|█████▋    | 3522/6220 [01:55<01:18, 34.22it/s]
 57%|█████▋    | 3526/6220 [01:55<01:26, 31.15it/s]
 57%|█████▋    | 3530/6220 [01:55<01:28, 30.40it/s]
 57%|█████▋    | 3534/6220 [01:55<01:25, 31.34it/s]
 57%|█████▋    | 3538/6220 [01:55<01:28, 30.46it/s]
 57%|█████▋    | 3542/6220 [01:55<01:25, 31.28it/s]
 57%|█████▋    | 3546/6220 [01:56<01:21, 32.84it/s]
 57%|█████▋    | 3550/6220 [01:56<01:19, 33.70it/s]
 57%|█████▋    | 3554/6220 [01:56<01:16, 35.04it/s]
 57%|█████▋    | 3558/6220 [01:56<01:13, 36.11it/s]
 57%|█████▋    | 3562/6220 [01:56<01:16, 34.83it/s]
 57%|█████▋    | 3566/6220 [01:56<01:18, 33.71it/s]
 57%|█████▋    | 3570/6220 [01:56<01:25, 31.15it/s]
 57%|█████▋    | 3574/6220 [01:56<01:21, 32.65it/s]
 58%|█████▊    | 3578/6220 

[2m[36m(_objective pid=23776)[0m {'loss': 0.1168, 'learning_rate': 1.696840523482293e-05, 'epoch': 1.29}


[2m[36m(_objective pid=23776)[0m  64%|██████▍   | 4006/6220 [02:09<01:07, 32.56it/s]
 64%|██████▍   | 4010/6220 [02:09<01:12, 30.54it/s]
 65%|██████▍   | 4014/6220 [02:09<01:07, 32.62it/s]
 65%|██████▍   | 4019/6220 [02:09<01:02, 35.15it/s]
 65%|██████▍   | 4024/6220 [02:09<00:59, 37.00it/s]
 65%|██████▍   | 4028/6220 [02:10<00:58, 37.31it/s]
 65%|██████▍   | 4033/6220 [02:10<00:57, 38.31it/s]
 65%|██████▍   | 4037/6220 [02:10<00:57, 38.19it/s]
 65%|██████▍   | 4042/6220 [02:10<00:55, 38.99it/s]
 65%|██████▌   | 4047/6220 [02:10<00:55, 39.48it/s]
 65%|██████▌   | 4052/6220 [02:10<00:54, 40.14it/s]
 65%|██████▌   | 4057/6220 [02:10<00:55, 39.26it/s]
 65%|██████▌   | 4061/6220 [02:10<00:55, 38.97it/s]
 65%|██████▌   | 4065/6220 [02:10<00:55, 39.14it/s]
 65%|██████▌   | 4069/6220 [02:11<00:56, 38.25it/s]
 65%|██████▌   | 4073/6220 [02:11<00:56, 38.16it/s]
 66%|██████▌   | 4077/6220 [02:11<00:56, 38.10it/s]
 66%|██████▌   | 4081/6220 [02:11<00:56, 37.71it/s]
 66%|██████▌   | 4085/6220 

Trial status: 3 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:51:23. Total running time: 31min 2s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00003   RUNNING          4.75421e-05                    2                     

[2m[36m(_objective pid=23776)[0m  68%|██████▊   | 4233/6220 [02:15<00:54, 36.54it/s]
 68%|██████▊   | 4237/6220 [02:15<00:54, 36.15it/s]
 68%|██████▊   | 4241/6220 [02:16<00:54, 36.05it/s]
 68%|██████▊   | 4245/6220 [02:16<00:59, 33.03it/s]
 68%|██████▊   | 4250/6220 [02:16<00:55, 35.27it/s]
 68%|██████▊   | 4254/6220 [02:16<00:54, 36.39it/s]
 68%|██████▊   | 4258/6220 [02:16<00:52, 37.31it/s]
 69%|██████▊   | 4263/6220 [02:16<00:51, 38.29it/s]
 69%|██████▊   | 4268/6220 [02:16<00:49, 39.11it/s]
 69%|██████▊   | 4272/6220 [02:16<00:49, 39.14it/s]
 69%|██████▊   | 4276/6220 [02:17<00:55, 34.94it/s]
 69%|██████▉   | 4280/6220 [02:17<00:53, 36.08it/s]
 69%|██████▉   | 4284/6220 [02:17<00:56, 34.02it/s]
 69%|██████▉   | 4288/6220 [02:17<00:55, 35.01it/s]
 69%|██████▉   | 4292/6220 [02:17<00:56, 34.21it/s]
 69%|██████▉   | 4296/6220 [02:17<00:53, 35.71it/s]
 69%|██████▉   | 4300/6220 [02:17<00:55, 34.67it/s]
 69%|██████▉   | 4305/6220 [02:17<00:52, 36.69it/s]
 69%|██████▉   | 4309/6220 

[2m[36m(_objective pid=23776)[0m {'loss': 0.1159, 'learning_rate': 1.3146692344097045e-05, 'epoch': 1.45}


[2m[36m(_objective pid=23776)[0m  72%|███████▏  | 4509/6220 [02:23<00:48, 35.19it/s]
 73%|███████▎  | 4513/6220 [02:23<00:47, 36.17it/s]
 73%|███████▎  | 4517/6220 [02:23<00:46, 36.89it/s]
 73%|███████▎  | 4521/6220 [02:23<00:45, 37.34it/s]
 73%|███████▎  | 4525/6220 [02:23<00:44, 38.01it/s]
 73%|███████▎  | 4529/6220 [02:23<00:44, 38.40it/s]
 73%|███████▎  | 4533/6220 [02:24<00:49, 34.10it/s]
 73%|███████▎  | 4537/6220 [02:24<00:50, 33.12it/s]
 73%|███████▎  | 4541/6220 [02:24<00:48, 34.60it/s]
 73%|███████▎  | 4545/6220 [02:24<00:49, 33.57it/s]
 73%|███████▎  | 4549/6220 [02:24<00:47, 35.21it/s]
 73%|███████▎  | 4553/6220 [02:24<00:47, 34.81it/s]
 73%|███████▎  | 4557/6220 [02:24<00:47, 35.35it/s]
 73%|███████▎  | 4561/6220 [02:24<00:52, 31.73it/s]
 73%|███████▎  | 4565/6220 [02:24<00:49, 33.28it/s]
 73%|███████▎  | 4570/6220 [02:25<00:46, 35.50it/s]
 74%|███████▎  | 4574/6220 [02:25<00:46, 35.39it/s]
 74%|███████▎  | 4578/6220 [02:25<00:50, 32.60it/s]
 74%|███████▎  | 4582/6220 

[2m[36m(_objective pid=23776)[0m {'loss': 0.1314, 'learning_rate': 9.32497945337116e-06, 'epoch': 1.61}


[2m[36m(_objective pid=23776)[0m  80%|████████  | 5007/6220 [02:37<00:33, 36.66it/s]
 81%|████████  | 5011/6220 [02:37<00:32, 37.49it/s]
 81%|████████  | 5015/6220 [02:37<00:31, 37.80it/s]
 81%|████████  | 5019/6220 [02:37<00:33, 35.75it/s]
 81%|████████  | 5023/6220 [02:38<00:36, 32.72it/s]
 81%|████████  | 5027/6220 [02:38<00:38, 31.22it/s]
 81%|████████  | 5031/6220 [02:38<00:36, 32.90it/s]
 81%|████████  | 5035/6220 [02:38<00:34, 34.68it/s]
 81%|████████  | 5039/6220 [02:38<00:33, 35.45it/s]
 81%|████████  | 5043/6220 [02:38<00:32, 36.63it/s]
 81%|████████  | 5047/6220 [02:38<00:36, 31.92it/s]
 81%|████████  | 5051/6220 [02:38<00:35, 32.59it/s]
 81%|████████▏ | 5055/6220 [02:39<00:39, 29.79it/s]
 81%|████████▏ | 5059/6220 [02:39<00:36, 31.60it/s]
 81%|████████▏ | 5063/6220 [02:39<00:34, 33.29it/s]
 81%|████████▏ | 5067/6220 [02:39<00:34, 33.65it/s]
 82%|████████▏ | 5071/6220 [02:39<00:32, 35.04it/s]
 82%|████████▏ | 5075/6220 [02:39<00:33, 34.20it/s]
 82%|████████▏ | 5079/6220 

Trial status: 3 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:51:53. Total running time: 31min 32s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00003   RUNNING          4.75421e-05                    2                    

[2m[36m(_objective pid=23776)[0m  85%|████████▌ | 5293/6220 [02:46<00:25, 36.10it/s]
 85%|████████▌ | 5297/6220 [02:46<00:25, 36.89it/s]
 85%|████████▌ | 5301/6220 [02:46<00:27, 33.50it/s]
 85%|████████▌ | 5305/6220 [02:46<00:28, 31.66it/s]
 85%|████████▌ | 5309/6220 [02:46<00:29, 30.94it/s]
 85%|████████▌ | 5313/6220 [02:46<00:27, 33.10it/s]
 85%|████████▌ | 5318/6220 [02:46<00:25, 35.56it/s]
 86%|████████▌ | 5322/6220 [02:46<00:24, 36.71it/s]
 86%|████████▌ | 5326/6220 [02:46<00:24, 36.82it/s]
 86%|████████▌ | 5331/6220 [02:47<00:23, 37.83it/s]
 86%|████████▌ | 5336/6220 [02:47<00:22, 38.92it/s]
 86%|████████▌ | 5341/6220 [02:47<00:22, 39.64it/s]
 86%|████████▌ | 5345/6220 [02:47<00:22, 39.49it/s]
 86%|████████▌ | 5350/6220 [02:47<00:21, 39.74it/s]
 86%|████████▌ | 5355/6220 [02:47<00:21, 39.94it/s]
 86%|████████▌ | 5360/6220 [02:47<00:21, 40.13it/s]
 86%|████████▋ | 5365/6220 [02:48<00:23, 36.20it/s]
 86%|████████▋ | 5369/6220 [02:48<00:22, 37.09it/s]
 86%|████████▋ | 5374/6220 

[2m[36m(_objective pid=23776)[0m {'loss': 0.1415, 'learning_rate': 5.503266562645275e-06, 'epoch': 1.77}


[2m[36m(_objective pid=23776)[0m  88%|████████▊ | 5504/6220 [02:51<00:18, 37.88it/s]
 89%|████████▊ | 5508/6220 [02:52<00:19, 37.11it/s]
 89%|████████▊ | 5512/6220 [02:52<00:18, 37.72it/s]
 89%|████████▊ | 5516/6220 [02:52<00:18, 37.91it/s]
 89%|████████▊ | 5520/6220 [02:52<00:19, 35.02it/s]
 89%|████████▉ | 5524/6220 [02:52<00:19, 35.90it/s]
 89%|████████▉ | 5528/6220 [02:52<00:18, 36.97it/s]
 89%|████████▉ | 5532/6220 [02:52<00:18, 37.42it/s]
 89%|████████▉ | 5536/6220 [02:52<00:18, 37.77it/s]
 89%|████████▉ | 5540/6220 [02:52<00:19, 35.35it/s]
 89%|████████▉ | 5544/6220 [02:53<00:18, 36.21it/s]
 89%|████████▉ | 5549/6220 [02:53<00:18, 36.91it/s]
 89%|████████▉ | 5554/6220 [02:53<00:17, 37.94it/s]
 89%|████████▉ | 5558/6220 [02:53<00:19, 34.31it/s]
 89%|████████▉ | 5562/6220 [02:53<00:18, 35.13it/s]
 90%|████████▉ | 5567/6220 [02:53<00:17, 36.55it/s]
 90%|████████▉ | 5571/6220 [02:53<00:17, 37.23it/s]
 90%|████████▉ | 5575/6220 [02:53<00:17, 35.92it/s]
 90%|████████▉ | 5579/6220 

[2m[36m(_objective pid=23776)[0m {'loss': 0.1068, 'learning_rate': 1.6815536719193896e-06, 'epoch': 1.93}


[2m[36m(_objective pid=23776)[0m  96%|█████████▋| 6002/6220 [03:06<00:05, 37.88it/s]
 97%|█████████▋| 6006/6220 [03:06<00:05, 37.88it/s]
 97%|█████████▋| 6010/6220 [03:06<00:05, 38.30it/s]
 97%|█████████▋| 6014/6220 [03:06<00:05, 36.74it/s]
 97%|█████████▋| 6018/6220 [03:06<00:05, 37.00it/s]
 97%|█████████▋| 6022/6220 [03:06<00:05, 37.46it/s]
 97%|█████████▋| 6027/6220 [03:06<00:05, 38.23it/s]
 97%|█████████▋| 6031/6220 [03:06<00:05, 35.65it/s]
 97%|█████████▋| 6036/6220 [03:07<00:04, 37.22it/s]
 97%|█████████▋| 6040/6220 [03:07<00:04, 37.68it/s]
 97%|█████████▋| 6044/6220 [03:07<00:04, 37.60it/s]
 97%|█████████▋| 6048/6220 [03:07<00:05, 33.75it/s]
 97%|█████████▋| 6052/6220 [03:07<00:04, 34.37it/s]
 97%|█████████▋| 6056/6220 [03:07<00:05, 31.83it/s]
 97%|█████████▋| 6060/6220 [03:07<00:04, 32.82it/s]
 97%|█████████▋| 6064/6220 [03:07<00:04, 34.66it/s]
 98%|█████████▊| 6068/6220 [03:08<00:04, 34.97it/s]
 98%|█████████▊| 6072/6220 [03:08<00:04, 35.04it/s]
 98%|█████████▊| 6076/6220 

Trial status: 3 TERMINATED | 1 RUNNING | 16 PENDING
Current time: 2023-09-11 13:52:23. Total running time: 32min 2s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00003   RUNNING          4.75421e-05                    2                     

[2m[36m(_objective pid=23776)[0m 
 37%|███▋      | 48/130 [00:03<00:06, 11.75it/s][A
[2m[36m(_objective pid=23776)[0m 
 39%|███▉      | 51/130 [00:03<00:06, 12.82it/s][A
[2m[36m(_objective pid=23776)[0m 
 41%|████      | 53/130 [00:03<00:05, 13.26it/s][A
[2m[36m(_objective pid=23776)[0m 
 42%|████▏     | 55/130 [00:04<00:05, 12.68it/s][A
[2m[36m(_objective pid=23776)[0m 
 44%|████▍     | 57/130 [00:04<00:05, 13.17it/s][A
[2m[36m(_objective pid=23776)[0m 
 45%|████▌     | 59/130 [00:04<00:05, 13.31it/s][A
[2m[36m(_objective pid=23776)[0m 
 47%|████▋     | 61/130 [00:04<00:04, 13.90it/s][A
[2m[36m(_objective pid=23776)[0m 
 48%|████▊     | 63/130 [00:04<00:04, 15.00it/s][A
[2m[36m(_objective pid=23776)[0m 
 50%|█████     | 65/130 [00:04<00:04, 14.05it/s][A
[2m[36m(_objective pid=23776)[0m 
 52%|█████▏    | 67/130 [00:04<00:04, 15.00it/s][A
[2m[36m(_objective pid=23776)[0m 
 54%|█████▍    | 70/130 [00:04<00:03, 17.09it/s][A
[2m[36m(_objective 

Trial _objective_f556c_00003 finished iteration 2 at 2023-09-11 13:52:29. Total running time: 32min 8s
+-------------------------------------------------+
| Trial _objective_f556c_00003 result             |
+-------------------------------------------------+
| time_this_iter_s                        100.025 |
| time_total_s                            204.437 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                               0.18534 |
| eval_runtime                             9.5961 |
| eval_samples_per_second                 432.156 |
| eval_steps_per_second                    13.547 |
| objective                               0.18534 |
+-------------------------------------------------+

[2m[36m(_objective pid=23776)[0m {'eval_loss': 0.18533514440059662, 'eval_runtime': 9.5961, 'eval_samples_per_second': 432.156, 'eval_steps_per_second': 13.547, 'epoch': 2.0}


[2m[36m(_objective pid=23776)[0m                                                    
[2m[36m(_objective pid=23776)[0m                                                  [A100%|██████████| 6220/6220 [03:22<00:00, 34.53it/s]
[2m[36m(_objective pid=23776)[0m 100%|██████████| 130/130 [00:09<00:00, 11.96it/s][A
[2m[36m(_objective pid=23776)[0m                                                  [A


Trial _objective_f556c_00003 completed after 2 iterations at 2023-09-11 13:52:31. Total running time: 32min 9s

[2m[36m(_objective pid=23776)[0m {'train_runtime': 203.4898, 'train_samples_per_second': 122.257, 'train_steps_per_second': 30.567, 'train_loss': 0.20407228500513402, 'epoch': 2.0}


[2m[36m(_objective pid=23776)[0m                                                    100%|██████████| 6220/6220 [03:23<00:00, 34.53it/s]100%|██████████| 6220/6220 [03:23<00:00, 30.57it/s]


Trial _objective_f556c_00004 started with configuration:
+-------------------------------------------------+
| Trial _objective_f556c_00004 config             |
+-------------------------------------------------+
| adam_epsilon                                  0 |
| learning_rate                             3e-05 |
| num_train_epochs                              5 |
| per_device_eval_batch_size                   32 |
| per_device_train_batch_size                   4 |
| weight_decay                            0.00692 |
+-------------------------------------------------+



[2m[36m(_objective pid=24721)[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_transform.bias', 'vocab_layer_norm.weight']
[2m[36m(_objective pid=24721)[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
[2m[36m(_objective pid=24721)[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[2m[36m(_objective pid=24721)[0m Some weights of DistilBertForSequenceClassification were not initialized from the model che

Trial status: 4 TERMINATED | 1 RUNNING | 15 PENDING
Current time: 2023-09-11 13:52:53. Total running time: 32min 32s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00004   RUNNING          3.44661e-05                    5                    

[2m[36m(_objective pid=24721)[0m   3%|▎         | 429/15550 [00:12<06:42, 37.60it/s]
  3%|▎         | 433/15550 [00:12<06:41, 37.62it/s]
  3%|▎         | 437/15550 [00:12<07:03, 35.69it/s]
  3%|▎         | 441/15550 [00:12<07:10, 35.13it/s]
  3%|▎         | 445/15550 [00:13<07:03, 35.69it/s]
  3%|▎         | 449/15550 [00:13<07:00, 35.93it/s]
  3%|▎         | 453/15550 [00:13<07:44, 32.49it/s]
  3%|▎         | 457/15550 [00:13<07:19, 34.33it/s]
  3%|▎         | 461/15550 [00:13<07:15, 34.68it/s]
  3%|▎         | 465/15550 [00:13<07:29, 33.53it/s]
  3%|▎         | 469/15550 [00:13<08:01, 31.34it/s]
  3%|▎         | 473/15550 [00:13<07:33, 33.23it/s]
  3%|▎         | 477/15550 [00:14<08:18, 30.27it/s]
  3%|▎         | 481/15550 [00:14<07:55, 31.71it/s]
  3%|▎         | 485/15550 [00:14<07:46, 32.32it/s]
  3%|▎         | 489/15550 [00:14<08:23, 29.93it/s]
  3%|▎         | 493/15550 [00:14<08:02, 31.18it/s]
  3%|▎         | 497/15550 [00:14<07:36, 33.00it/s]


[2m[36m(_objective pid=24721)[0m {'loss': 0.3921, 'learning_rate': 3.335789084334053e-05, 'epoch': 0.16}


[2m[36m(_objective pid=24721)[0m                                                      3%|▎         | 500/15550 [00:14<07:36, 33.00it/s]
  3%|▎         | 501/15550 [00:14<07:30, 33.40it/s]
  3%|▎         | 505/15550 [00:14<07:18, 34.34it/s]
  3%|▎         | 509/15550 [00:15<07:57, 31.47it/s]
  3%|▎         | 513/15550 [00:15<07:40, 32.64it/s]
  3%|▎         | 517/15550 [00:15<07:33, 33.12it/s]
  3%|▎         | 521/15550 [00:15<07:19, 34.23it/s]
  3%|▎         | 525/15550 [00:15<07:15, 34.47it/s]
  3%|▎         | 529/15550 [00:15<07:09, 35.00it/s]
  3%|▎         | 533/15550 [00:15<07:09, 34.98it/s]
  3%|▎         | 537/15550 [00:15<08:07, 30.83it/s]
  3%|▎         | 541/15550 [00:16<07:38, 32.76it/s]
  4%|▎         | 545/15550 [00:16<07:14, 34.50it/s]
  4%|▎         | 549/15550 [00:16<06:57, 35.93it/s]
  4%|▎         | 553/15550 [00:16<06:45, 36.95it/s]
  4%|▎         | 557/15550 [00:16<06:55, 36.12it/s]
  4%|▎         | 561/15550 [00:16<06:50, 36.56it/s]
  4%|▎         | 565/15550

[2m[36m(_objective pid=24721)[0m {'loss': 0.3366, 'learning_rate': 3.224965526714981e-05, 'epoch': 0.32}


[2m[36m(_objective pid=24721)[0m                                                      6%|▋         | 1000/15550 [00:28<06:16, 38.68it/s]  6%|▋         | 1002/15550 [00:28<06:16, 38.68it/s]
  6%|▋         | 1006/15550 [00:29<06:18, 38.42it/s]
  6%|▋         | 1010/15550 [00:29<06:27, 37.51it/s]
  7%|▋         | 1014/15550 [00:29<06:26, 37.65it/s]
  7%|▋         | 1018/15550 [00:29<06:22, 37.99it/s]
  7%|▋         | 1022/15550 [00:29<06:40, 36.27it/s]
  7%|▋         | 1027/15550 [00:29<06:25, 37.64it/s]
  7%|▋         | 1031/15550 [00:29<06:21, 38.04it/s]
  7%|▋         | 1035/15550 [00:29<06:31, 37.11it/s]
  7%|▋         | 1039/15550 [00:29<06:24, 37.70it/s]
  7%|▋         | 1043/15550 [00:30<06:21, 37.98it/s]
  7%|▋         | 1047/15550 [00:30<06:22, 37.92it/s]
  7%|▋         | 1051/15550 [00:30<06:54, 34.96it/s]
  7%|▋         | 1055/15550 [00:30<06:53, 35.08it/s]
  7%|▋         | 1059/15550 [00:30<06:55, 34.86it/s]
  7%|▋         | 1063/15550 [00:30<06:44, 35.79it/s]
  7%|▋   

Trial status: 4 TERMINATED | 1 RUNNING | 15 PENDING
Current time: 2023-09-11 13:53:23. Total running time: 33min 2s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00004   RUNNING          3.44661e-05                    5                     

[2m[36m(_objective pid=24721)[0m   9%|▉         | 1477/15550 [00:42<07:25, 31.61it/s]
 10%|▉         | 1481/15550 [00:42<07:01, 33.41it/s]
 10%|▉         | 1485/15550 [00:42<07:06, 33.00it/s]
 10%|▉         | 1489/15550 [00:42<06:45, 34.71it/s]
 10%|▉         | 1493/15550 [00:42<06:33, 35.71it/s]
 10%|▉         | 1497/15550 [00:43<06:27, 36.22it/s]


[2m[36m(_objective pid=24721)[0m {'loss': 0.3078, 'learning_rate': 3.1141419690959095e-05, 'epoch': 0.48}


[2m[36m(_objective pid=24721)[0m                                                      10%|▉         | 1500/15550 [00:43<06:27, 36.22it/s] 10%|▉         | 1501/15550 [00:43<06:17, 37.18it/s]
 10%|▉         | 1505/15550 [00:43<06:10, 37.96it/s]
 10%|▉         | 1510/15550 [00:43<06:06, 38.34it/s]
 10%|▉         | 1514/15550 [00:43<06:11, 37.78it/s]
 10%|▉         | 1518/15550 [00:43<06:07, 38.20it/s]
 10%|▉         | 1522/15550 [00:43<06:20, 36.88it/s]
 10%|▉         | 1526/15550 [00:43<06:12, 37.60it/s]
 10%|▉         | 1530/15550 [00:43<06:08, 38.03it/s]
 10%|▉         | 1534/15550 [00:43<06:05, 38.40it/s]
 10%|▉         | 1538/15550 [00:44<06:07, 38.13it/s]
 10%|▉         | 1542/15550 [00:44<06:55, 33.75it/s]
 10%|▉         | 1546/15550 [00:44<06:38, 35.16it/s]
 10%|▉         | 1550/15550 [00:44<06:34, 35.49it/s]
 10%|▉         | 1554/15550 [00:44<06:39, 35.00it/s]
 10%|█         | 1558/15550 [00:44<07:51, 29.69it/s]
 10%|█         | 1562/15550 [00:44<07:17, 31.99it/s]
 10%|█  

[2m[36m(_objective pid=24721)[0m {'loss': 0.2313, 'learning_rate': 3.003318411476838e-05, 'epoch': 0.64}


[2m[36m(_objective pid=24721)[0m  13%|█▎        | 2005/15550 [00:57<06:07, 36.82it/s]
 13%|█▎        | 2009/15550 [00:57<06:03, 37.28it/s]
 13%|█▎        | 2013/15550 [00:57<05:58, 37.78it/s]
 13%|█▎        | 2017/15550 [00:58<05:59, 37.67it/s]
 13%|█▎        | 2021/15550 [00:58<06:23, 35.27it/s]
 13%|█▎        | 2025/15550 [00:58<07:24, 30.45it/s]
 13%|█▎        | 2029/15550 [00:58<06:53, 32.67it/s]
 13%|█▎        | 2033/15550 [00:58<06:31, 34.51it/s]
 13%|█▎        | 2037/15550 [00:58<06:16, 35.91it/s]
 13%|█▎        | 2041/15550 [00:58<06:19, 35.57it/s]
 13%|█▎        | 2045/15550 [00:58<06:50, 32.87it/s]
 13%|█▎        | 2049/15550 [00:58<06:33, 34.31it/s]
 13%|█▎        | 2053/15550 [00:59<07:08, 31.51it/s]
 13%|█▎        | 2057/15550 [00:59<07:09, 31.39it/s]
 13%|█▎        | 2061/15550 [00:59<06:42, 33.49it/s]
 13%|█▎        | 2065/15550 [00:59<06:35, 34.08it/s]
 13%|█▎        | 2069/15550 [00:59<06:23, 35.18it/s]
 13%|█▎        | 2074/15550 [00:59<06:07, 36.68it/s]
 13%|█▎  

[2m[36m(_objective pid=24721)[0m {'loss': 0.2297, 'learning_rate': 2.8924948538577666e-05, 'epoch': 0.8}


[2m[36m(_objective pid=24721)[0m  16%|█▌        | 2505/15550 [01:12<06:22, 34.14it/s]
 16%|█▌        | 2509/15550 [01:12<06:18, 34.47it/s]
 16%|█▌        | 2513/15550 [01:12<06:48, 31.90it/s]
 16%|█▌        | 2517/15550 [01:12<06:27, 33.63it/s]


Trial status: 4 TERMINATED | 1 RUNNING | 15 PENDING
Current time: 2023-09-11 13:53:53. Total running time: 33min 32s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00004   RUNNING          3.44661e-05                    5                    

[2m[36m(_objective pid=24721)[0m  16%|█▌        | 2521/15550 [01:12<06:12, 35.01it/s]
 16%|█▌        | 2526/15550 [01:12<06:03, 35.78it/s]
 16%|█▋        | 2530/15550 [01:12<06:12, 34.95it/s]
 16%|█▋        | 2534/15550 [01:12<06:10, 35.16it/s]
 16%|█▋        | 2538/15550 [01:13<05:58, 36.30it/s]
 16%|█▋        | 2542/15550 [01:13<05:51, 36.98it/s]
 16%|█▋        | 2546/15550 [01:13<05:50, 37.11it/s]
 16%|█▋        | 2550/15550 [01:13<05:49, 37.15it/s]
 16%|█▋        | 2554/15550 [01:13<05:43, 37.80it/s]
 16%|█▋        | 2558/15550 [01:13<05:43, 37.88it/s]
 16%|█▋        | 2562/15550 [01:13<05:43, 37.86it/s]
 17%|█▋        | 2566/15550 [01:13<06:18, 34.34it/s]
 17%|█▋        | 2570/15550 [01:13<06:03, 35.72it/s]
 17%|█▋        | 2574/15550 [01:14<05:56, 36.37it/s]
 17%|█▋        | 2578/15550 [01:14<06:10, 35.02it/s]
 17%|█▋        | 2582/15550 [01:14<06:27, 33.45it/s]
 17%|█▋        | 2586/15550 [01:14<06:12, 34.80it/s]
 17%|█▋        | 2590/15550 [01:14<06:04, 35.55it/s]
 17%|█▋  

[2m[36m(_objective pid=24721)[0m {'loss': 0.2325, 'learning_rate': 2.7816712962386953e-05, 'epoch': 0.96}


[2m[36m(_objective pid=24721)[0m  19%|█▉        | 3002/15550 [01:26<06:47, 30.76it/s]
 19%|█▉        | 3006/15550 [01:26<06:52, 30.38it/s]
 19%|█▉        | 3010/15550 [01:26<06:24, 32.60it/s]
 19%|█▉        | 3014/15550 [01:26<06:03, 34.47it/s]
 19%|█▉        | 3018/15550 [01:26<05:49, 35.84it/s]
 19%|█▉        | 3022/15550 [01:27<05:39, 36.89it/s]
 19%|█▉        | 3026/15550 [01:27<05:32, 37.69it/s]
 19%|█▉        | 3030/15550 [01:27<06:09, 33.92it/s]
 20%|█▉        | 3034/15550 [01:27<05:55, 35.22it/s]
 20%|█▉        | 3038/15550 [01:27<05:43, 36.39it/s]
 20%|█▉        | 3042/15550 [01:27<05:52, 35.45it/s]
 20%|█▉        | 3047/15550 [01:27<05:41, 36.63it/s]
 20%|█▉        | 3051/15550 [01:27<05:37, 37.00it/s]
 20%|█▉        | 3055/15550 [01:27<05:36, 37.18it/s]
 20%|█▉        | 3059/15550 [01:28<05:29, 37.87it/s]
 20%|█▉        | 3063/15550 [01:28<05:30, 37.77it/s]
 20%|█▉        | 3067/15550 [01:28<05:38, 36.92it/s]
 20%|█▉        | 3072/15550 [01:28<05:26, 38.19it/s]
 20%|█▉  

Trial _objective_f556c_00004 finished iteration 1 at 2023-09-11 13:54:20. Total running time: 33min 58s[2m[36m(_objective pid=24721)[0m {'eval_loss': 0.19188302755355835, 'eval_runtime': 9.5621, 'eval_samples_per_second': 433.693, 'eval_steps_per_second': 13.595, 'epoch': 1.0}

+-------------------------------------------------+
| Trial _objective_f556c_00004 result             |
+-------------------------------------------------+
| time_this_iter_s                        101.849 |
| time_total_s                            101.849 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.19188 |
| eval_runtime                             9.5621 |
| eval_samples_per_second                 433.693 |
| eval_steps_per_second                    13.595 |
| objective                               0.19188 |
+-------------------------------------------------+



 20%|██        | 3110/15550 [01:40<05:51, 35.35it/s]
 20%|██        | 3111/15550 [01:40<3:34:19,  1.03s/it]
 20%|██        | 3115/15550 [01:40<2:22:04,  1.46it/s]
 20%|██        | 3119/15550 [01:40<1:38:08,  2.11it/s]
 20%|██        | 3123/15550 [01:40<1:08:21,  3.03it/s]
 20%|██        | 3127/15550 [01:40<48:37,  4.26it/s]  
 20%|██        | 3131/15550 [01:40<35:13,  5.88it/s]
 20%|██        | 3135/15550 [01:41<26:03,  7.94it/s]
 20%|██        | 3139/15550 [01:41<19:57, 10.36it/s]
 20%|██        | 3143/15550 [01:41<15:30, 13.33it/s]
 20%|██        | 3147/15550 [01:41<12:34, 16.44it/s]
 20%|██        | 3151/15550 [01:41<10:20, 19.97it/s]
 20%|██        | 3155/15550 [01:41<09:04, 22.77it/s]
 20%|██        | 3159/15550 [01:41<08:42, 23.70it/s]
 20%|██        | 3163/15550 [01:41<08:54, 23.17it/s]
 20%|██        | 3167/15550 [01:42<08:04, 25.57it/s]
 20%|██        | 3171/15550 [01:42<08:19, 24.80it/s]
 20%|██        | 3175/15550 [01:42<07:28, 27.62it/s]
 20%|██        | 3179/15550 [01:42<0

Trial status: 4 TERMINATED | 1 RUNNING | 15 PENDING
Current time: 2023-09-11 13:54:23. Total running time: 34min 2s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00004   RUNNING          3.44661e-05                    5                     

[2m[36m(_objective pid=24721)[0m  20%|██        | 3183/15550 [01:42<06:32, 31.48it/s]
 20%|██        | 3187/15550 [01:42<06:09, 33.50it/s]
 21%|██        | 3191/15550 [01:42<05:53, 34.93it/s]
 21%|██        | 3195/15550 [01:42<06:08, 33.49it/s]
 21%|██        | 3199/15550 [01:42<06:12, 33.16it/s]
 21%|██        | 3203/15550 [01:43<05:53, 34.88it/s]
 21%|██        | 3207/15550 [01:43<05:42, 36.06it/s]
 21%|██        | 3211/15550 [01:43<05:47, 35.50it/s]
 21%|██        | 3215/15550 [01:43<05:38, 36.46it/s]
 21%|██        | 3219/15550 [01:43<05:30, 37.36it/s]
 21%|██        | 3223/15550 [01:43<05:32, 37.09it/s]
 21%|██        | 3227/15550 [01:43<05:28, 37.46it/s]
 21%|██        | 3231/15550 [01:43<06:06, 33.65it/s]
 21%|██        | 3236/15550 [01:43<05:43, 35.87it/s]
 21%|██        | 3241/15550 [01:44<05:59, 34.25it/s]
 21%|██        | 3245/15550 [01:44<05:47, 35.45it/s]
 21%|██        | 3249/15550 [01:44<06:02, 33.91it/s]
 21%|██        | 3254/15550 [01:44<05:42, 35.87it/s]
 21%|██  

[2m[36m(_objective pid=24721)[0m {'loss': 0.1489, 'learning_rate': 2.6708477386196236e-05, 'epoch': 1.13}


[2m[36m(_objective pid=24721)[0m  23%|██▎       | 3500/15550 [01:51<05:38, 35.58it/s]                                                     23%|██▎       | 3500/15550 [01:51<05:38, 35.58it/s]
 23%|██▎       | 3504/15550 [01:51<05:35, 35.94it/s]
 23%|██▎       | 3508/15550 [01:51<05:30, 36.38it/s]
 23%|██▎       | 3512/15550 [01:51<05:50, 34.34it/s]
 23%|██▎       | 3516/15550 [01:51<05:58, 33.59it/s]
 23%|██▎       | 3520/15550 [01:52<05:48, 34.50it/s]
 23%|██▎       | 3524/15550 [01:52<06:16, 31.93it/s]
 23%|██▎       | 3528/15550 [01:52<06:27, 31.04it/s]
 23%|██▎       | 3532/15550 [01:52<06:10, 32.40it/s]
 23%|██▎       | 3536/15550 [01:52<06:10, 32.43it/s]
 23%|██▎       | 3540/15550 [01:52<06:06, 32.75it/s]
 23%|██▎       | 3545/15550 [01:52<05:41, 35.12it/s]
 23%|██▎       | 3549/15550 [01:52<05:34, 35.90it/s]
 23%|██▎       | 3554/15550 [01:53<05:20, 37.39it/s]
 23%|██▎       | 3558/15550 [01:53<05:15, 37.98it/s]
 23%|██▎       | 3562/15550 [01:53<05:27, 36.64it/s]
 23%|██▎

[2m[36m(_objective pid=24721)[0m {'loss': 0.126, 'learning_rate': 2.560024181000552e-05, 'epoch': 1.29}


[2m[36m(_objective pid=24721)[0m  26%|██▌       | 4003/15550 [02:06<06:15, 30.76it/s]
 26%|██▌       | 4007/15550 [02:06<06:03, 31.74it/s]
 26%|██▌       | 4011/15550 [02:06<06:24, 30.03it/s]
 26%|██▌       | 4015/15550 [02:06<06:03, 31.76it/s]
 26%|██▌       | 4019/15550 [02:06<05:48, 33.05it/s]
 26%|██▌       | 4023/15550 [02:06<05:41, 33.79it/s]
 26%|██▌       | 4027/15550 [02:06<05:32, 34.70it/s]
 26%|██▌       | 4031/15550 [02:07<05:25, 35.36it/s]
 26%|██▌       | 4035/15550 [02:07<05:22, 35.73it/s]
 26%|██▌       | 4039/15550 [02:07<05:21, 35.83it/s]
 26%|██▌       | 4043/15550 [02:07<05:25, 35.35it/s]
 26%|██▌       | 4047/15550 [02:07<05:21, 35.77it/s]
 26%|██▌       | 4051/15550 [02:07<05:15, 36.49it/s]
 26%|██▌       | 4055/15550 [02:07<05:12, 36.80it/s]
 26%|██▌       | 4059/15550 [02:07<05:16, 36.34it/s]
 26%|██▌       | 4063/15550 [02:07<05:11, 36.88it/s]
 26%|██▌       | 4067/15550 [02:08<05:07, 37.34it/s]
 26%|██▌       | 4071/15550 [02:08<05:04, 37.69it/s]
 26%|██▌ 

Trial status: 4 TERMINATED | 1 RUNNING | 15 PENDING
Current time: 2023-09-11 13:54:53. Total running time: 34min 32s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00004   RUNNING          3.44661e-05                    5                    

[2m[36m(_objective pid=24721)[0m  27%|██▋       | 4229/15550 [02:12<05:24, 34.91it/s]
 27%|██▋       | 4233/15550 [02:12<05:16, 35.77it/s]
 27%|██▋       | 4237/15550 [02:12<05:16, 35.71it/s]
 27%|██▋       | 4241/15550 [02:13<05:23, 34.97it/s]
 27%|██▋       | 4245/15550 [02:13<05:50, 32.23it/s]
 27%|██▋       | 4249/15550 [02:13<05:32, 33.99it/s]
 27%|██▋       | 4253/15550 [02:13<05:20, 35.30it/s]
 27%|██▋       | 4257/15550 [02:13<05:09, 36.43it/s]
 27%|██▋       | 4261/15550 [02:13<05:06, 36.80it/s]
 27%|██▋       | 4266/15550 [02:13<04:58, 37.77it/s]
 27%|██▋       | 4270/15550 [02:13<04:55, 38.17it/s]
 27%|██▋       | 4274/15550 [02:13<05:31, 34.02it/s]
 28%|██▊       | 4278/15550 [02:14<05:18, 35.41it/s]
 28%|██▊       | 4282/15550 [02:14<05:29, 34.18it/s]
 28%|██▊       | 4286/15550 [02:14<05:16, 35.58it/s]
 28%|██▊       | 4290/15550 [02:14<05:12, 36.02it/s]
 28%|██▊       | 4294/15550 [02:14<05:26, 34.49it/s]
 28%|██▊       | 4298/15550 [02:14<05:14, 35.77it/s]
 28%|██▊ 

[2m[36m(_objective pid=24721)[0m {'loss': 0.146, 'learning_rate': 2.4492006233814804e-05, 'epoch': 1.45}


[2m[36m(_objective pid=24721)[0m  29%|██▉       | 4507/15550 [02:20<05:20, 34.42it/s]
 29%|██▉       | 4511/15550 [02:20<05:13, 35.24it/s]
 29%|██▉       | 4515/15550 [02:20<05:08, 35.72it/s]
 29%|██▉       | 4520/15550 [02:20<04:56, 37.20it/s]
 29%|██▉       | 4524/15550 [02:20<04:56, 37.18it/s]
 29%|██▉       | 4528/15550 [02:20<04:53, 37.60it/s]
 29%|██▉       | 4532/15550 [02:21<05:29, 33.48it/s]
 29%|██▉       | 4536/15550 [02:21<05:28, 33.48it/s]
 29%|██▉       | 4540/15550 [02:21<05:21, 34.28it/s]
 29%|██▉       | 4544/15550 [02:21<05:29, 33.36it/s]
 29%|██▉       | 4548/15550 [02:21<05:18, 34.57it/s]
 29%|██▉       | 4553/15550 [02:21<05:01, 36.45it/s]
 29%|██▉       | 4557/15550 [02:21<05:01, 36.52it/s]
 29%|██▉       | 4561/15550 [02:21<05:38, 32.44it/s]
 29%|██▉       | 4565/15550 [02:22<05:33, 32.90it/s]
 29%|██▉       | 4569/15550 [02:22<05:25, 33.76it/s]
 29%|██▉       | 4573/15550 [02:22<05:23, 33.97it/s]
 29%|██▉       | 4577/15550 [02:22<05:15, 34.77it/s]
 29%|██▉ 

[2m[36m(_objective pid=24721)[0m {'loss': 0.1402, 'learning_rate': 2.338377065762409e-05, 'epoch': 1.61}


[2m[36m(_objective pid=24721)[0m  32%|███▏      | 5007/15550 [02:34<04:41, 37.43it/s]
 32%|███▏      | 5011/15550 [02:34<04:38, 37.84it/s]
 32%|███▏      | 5015/15550 [02:35<04:38, 37.89it/s]
 32%|███▏      | 5019/15550 [02:35<04:56, 35.53it/s]
 32%|███▏      | 5023/15550 [02:35<05:20, 32.80it/s]
 32%|███▏      | 5027/15550 [02:35<05:40, 30.88it/s]
 32%|███▏      | 5031/15550 [02:35<05:20, 32.78it/s]
 32%|███▏      | 5035/15550 [02:35<05:05, 34.40it/s]
 32%|███▏      | 5039/15550 [02:35<04:59, 35.06it/s]
 32%|███▏      | 5043/15550 [02:35<04:56, 35.42it/s]
 32%|███▏      | 5047/15550 [02:36<05:37, 31.14it/s]
 32%|███▏      | 5051/15550 [02:36<05:26, 32.19it/s]
 33%|███▎      | 5055/15550 [02:36<05:56, 29.43it/s]
 33%|███▎      | 5059/15550 [02:36<05:38, 31.02it/s]
 33%|███▎      | 5063/15550 [02:36<05:24, 32.27it/s]
 33%|███▎      | 5067/15550 [02:36<05:18, 32.90it/s]
 33%|███▎      | 5071/15550 [02:36<05:07, 34.07it/s]
 33%|███▎      | 5075/15550 [02:36<05:17, 32.96it/s]
 33%|███▎

Trial status: 4 TERMINATED | 1 RUNNING | 15 PENDING
Current time: 2023-09-11 13:55:23. Total running time: 35min 2s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00004   RUNNING          3.44661e-05                    5                     

[2m[36m(_objective pid=24721)[0m  34%|███▍      | 5277/15550 [02:42<04:35, 37.35it/s]
 34%|███▍      | 5281/15550 [02:42<04:32, 37.71it/s]
 34%|███▍      | 5285/15550 [02:42<04:29, 38.11it/s]
 34%|███▍      | 5289/15550 [02:43<04:36, 37.13it/s]
 34%|███▍      | 5293/15550 [02:43<04:43, 36.17it/s]
 34%|███▍      | 5297/15550 [02:43<04:40, 36.57it/s]
 34%|███▍      | 5301/15550 [02:43<05:10, 32.99it/s]
 34%|███▍      | 5305/15550 [02:43<05:29, 31.13it/s]
 34%|███▍      | 5309/15550 [02:43<05:35, 30.52it/s]
 34%|███▍      | 5313/15550 [02:43<05:19, 32.03it/s]
 34%|███▍      | 5318/15550 [02:43<04:56, 34.55it/s]
 34%|███▍      | 5322/15550 [02:44<04:46, 35.75it/s]
 34%|███▍      | 5326/15550 [02:44<04:43, 36.09it/s]
 34%|███▍      | 5330/15550 [02:44<04:35, 37.15it/s]
 34%|███▍      | 5335/15550 [02:44<04:27, 38.22it/s]
 34%|███▍      | 5339/15550 [02:44<04:25, 38.48it/s]
 34%|███▍      | 5343/15550 [02:44<04:23, 38.80it/s]
 34%|███▍      | 5347/15550 [02:44<04:22, 38.92it/s]
 34%|███▍

[2m[36m(_objective pid=24721)[0m {'loss': 0.1739, 'learning_rate': 2.2275535081433374e-05, 'epoch': 1.77}


[2m[36m(_objective pid=24721)[0m  35%|███▌      | 5507/15550 [02:49<04:23, 38.07it/s]
 35%|███▌      | 5511/15550 [02:49<04:24, 37.93it/s]
 35%|███▌      | 5516/15550 [02:49<04:16, 39.05it/s]
 35%|███▌      | 5520/15550 [02:49<04:39, 35.91it/s]
 36%|███▌      | 5525/15550 [02:49<04:24, 37.86it/s]
 36%|███▌      | 5529/15550 [02:49<04:21, 38.25it/s]
 36%|███▌      | 5533/15550 [02:49<04:18, 38.71it/s]
 36%|███▌      | 5537/15550 [02:50<04:36, 36.18it/s]
 36%|███▌      | 5542/15550 [02:50<04:25, 37.70it/s]
 36%|███▌      | 5547/15550 [02:50<04:19, 38.56it/s]
 36%|███▌      | 5551/15550 [02:50<04:19, 38.50it/s]
 36%|███▌      | 5556/15550 [02:50<04:42, 35.34it/s]
 36%|███▌      | 5560/15550 [02:50<04:35, 36.26it/s]
 36%|███▌      | 5564/15550 [02:50<04:33, 36.56it/s]
 36%|███▌      | 5569/15550 [02:50<04:23, 37.87it/s]
 36%|███▌      | 5573/15550 [02:51<04:32, 36.59it/s]
 36%|███▌      | 5578/15550 [02:51<04:23, 37.78it/s]
 36%|███▌      | 5582/15550 [02:51<04:22, 37.97it/s]
 36%|███▌

[2m[36m(_objective pid=24721)[0m {'loss': 0.152, 'learning_rate': 2.1167299505242658e-05, 'epoch': 1.93}


[2m[36m(_objective pid=24721)[0m  39%|███▊      | 5997/15550 [03:03<04:12, 37.91it/s]                                                     39%|███▊      | 6000/15550 [03:03<04:11, 37.91it/s]
 39%|███▊      | 6001/15550 [03:03<04:09, 38.26it/s]
 39%|███▊      | 6005/15550 [03:03<04:10, 38.09it/s]
 39%|███▊      | 6009/15550 [03:03<04:08, 38.40it/s]
 39%|███▊      | 6013/15550 [03:03<04:21, 36.53it/s]
 39%|███▊      | 6017/15550 [03:03<04:20, 36.56it/s]
 39%|███▊      | 6021/15550 [03:04<04:16, 37.17it/s]
 39%|███▊      | 6025/15550 [03:04<04:11, 37.85it/s]
 39%|███▉      | 6029/15550 [03:04<04:29, 35.27it/s]
 39%|███▉      | 6033/15550 [03:04<04:21, 36.46it/s]
 39%|███▉      | 6037/15550 [03:04<04:15, 37.19it/s]
 39%|███▉      | 6041/15550 [03:04<04:16, 37.10it/s]
 39%|███▉      | 6046/15550 [03:04<04:08, 38.18it/s]
 39%|███▉      | 6050/15550 [03:04<04:39, 33.98it/s]
 39%|███▉      | 6054/15550 [03:04<05:07, 30.89it/s]
 39%|███▉      | 6058/15550 [03:05<04:52, 32.46it/s]
 39%|███

Trial status: 4 TERMINATED | 1 RUNNING | 15 PENDING
Current time: 2023-09-11 13:55:53. Total running time: 35min 32s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00004   RUNNING          3.44661e-05                    5                    

[2m[36m(_objective pid=24721)[0m 
 34%|███▍      | 44/130 [00:03<00:06, 13.85it/s][A
[2m[36m(_objective pid=24721)[0m 
 35%|███▌      | 46/130 [00:03<00:06, 12.62it/s][A
[2m[36m(_objective pid=24721)[0m 
 37%|███▋      | 48/130 [00:03<00:07, 11.68it/s][A
[2m[36m(_objective pid=24721)[0m 
 39%|███▉      | 51/130 [00:03<00:06, 12.86it/s][A
[2m[36m(_objective pid=24721)[0m 
 41%|████      | 53/130 [00:03<00:05, 13.26it/s][A
[2m[36m(_objective pid=24721)[0m 
 42%|████▏     | 55/130 [00:04<00:05, 12.68it/s][A
[2m[36m(_objective pid=24721)[0m 
 44%|████▍     | 57/130 [00:04<00:05, 13.16it/s][A
[2m[36m(_objective pid=24721)[0m 
 45%|████▌     | 59/130 [00:04<00:05, 13.31it/s][A
[2m[36m(_objective pid=24721)[0m 
 47%|████▋     | 61/130 [00:04<00:04, 13.92it/s][A
[2m[36m(_objective pid=24721)[0m 
 48%|████▊     | 63/130 [00:04<00:04, 15.01it/s][A
[2m[36m(_objective pid=24721)[0m 
 50%|█████     | 65/130 [00:04<00:04, 14.00it/s][A
[2m[36m(_objective 

Trial _objective_f556c_00004 finished iteration 2 at 2023-09-11 13:56:00. Total running time: 35min 39s
+-------------------------------------------------+
| Trial _objective_f556c_00004 result             |
+-------------------------------------------------+
| time_this_iter_s                        100.233 |
| time_total_s                            202.082 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                               0.16033 |
| eval_runtime                             9.5923 |
| eval_samples_per_second                 432.325 |
| eval_steps_per_second                    13.553 |
| objective                               0.16033 |
+-------------------------------------------------+

[2m[36m(_objective pid=24721)[0m {'eval_loss': 0.16032539308071136, 'eval_runtime': 9.5923, 'eval_samples_per_second': 432.325, 'eval_steps_per_second': 13.553, 'epoch': 2.0}


[2m[36m(_objective pid=24721)[0m                                                     
[2m[36m(_objective pid=24721)[0m                                                  [A 40%|████      | 6220/15550 [03:19<04:17, 36.26it/s]
[2m[36m(_objective pid=24721)[0m 100%|██████████| 130/130 [00:09<00:00, 11.98it/s][A
                                                 [A
 40%|████      | 6220/15550 [03:20<04:17, 36.26it/s]
 40%|████      | 6221/15550 [03:20<2:24:33,  1.08it/s]
 40%|████      | 6225/15550 [03:20<1:39:19,  1.56it/s]
 40%|████      | 6229/15550 [03:20<1:09:18,  2.24it/s]
 40%|████      | 6233/15550 [03:20<49:12,  3.16it/s]  
 40%|████      | 6237/15550 [03:21<35:41,  4.35it/s]
 40%|████      | 6241/15550 [03:21<26:18,  5.90it/s]
 40%|████      | 6245/15550 [03:21<19:59,  7.76it/s]
 40%|████      | 6249/15550 [03:21<15:24, 10.07it/s]
 40%|████      | 6253/15550 [03:21<12:04, 12.84it/s]
 40%|████      | 6257/15550 [03:21<09:45, 15.88it/s]
 40%|████      | 6261/15550 [03:

[2m[36m(_objective pid=24721)[0m {'loss': 0.1285, 'learning_rate': 2.005906392905194e-05, 'epoch': 2.09}


[2m[36m(_objective pid=24721)[0m  42%|████▏     | 6504/15550 [03:28<04:30, 33.48it/s]
 42%|████▏     | 6508/15550 [03:28<04:27, 33.79it/s]
 42%|████▏     | 6512/15550 [03:28<04:18, 34.92it/s]
 42%|████▏     | 6516/15550 [03:29<04:41, 32.04it/s]
 42%|████▏     | 6520/15550 [03:29<04:29, 33.49it/s]
 42%|████▏     | 6524/15550 [03:29<04:57, 30.34it/s]
 42%|████▏     | 6528/15550 [03:29<04:39, 32.29it/s]
 42%|████▏     | 6532/15550 [03:29<04:50, 31.00it/s]
 42%|████▏     | 6536/15550 [03:29<04:32, 33.08it/s]
 42%|████▏     | 6540/15550 [03:29<04:20, 34.62it/s]
 42%|████▏     | 6545/15550 [03:29<04:07, 36.34it/s]
 42%|████▏     | 6550/15550 [03:30<04:13, 35.46it/s]
 42%|████▏     | 6554/15550 [03:30<04:09, 36.09it/s]
 42%|████▏     | 6559/15550 [03:30<04:00, 37.46it/s]
 42%|████▏     | 6563/15550 [03:30<04:06, 36.47it/s]
 42%|████▏     | 6567/15550 [03:30<04:05, 36.57it/s]
 42%|████▏     | 6572/15550 [03:30<03:58, 37.58it/s]
 42%|████▏     | 6576/15550 [03:30<04:04, 36.76it/s]
 42%|████

Trial status: 4 TERMINATED | 1 RUNNING | 15 PENDING
Current time: 2023-09-11 13:56:23. Total running time: 36min 2s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00004   RUNNING          3.44661e-05                    5                     

[2m[36m(_objective pid=24721)[0m  45%|████▍     | 6994/15550 [03:42<03:56, 36.10it/s]
[2m[36m(_objective pid=24721)[0m  45%|████▌     | 6998/15550 [03:42<03:55, 36.33it/s]


[2m[36m(_objective pid=24721)[0m {'loss': 0.0798, 'learning_rate': 1.895082835286123e-05, 'epoch': 2.25}


[2m[36m(_objective pid=24721)[0m                                                      45%|████▌     | 7000/15550 [03:42<03:55, 36.33it/s] 45%|████▌     | 7002/15550 [03:42<04:08, 34.38it/s]
 45%|████▌     | 7006/15550 [03:43<04:04, 34.97it/s]
 45%|████▌     | 7010/15550 [03:43<04:59, 28.50it/s]
 45%|████▌     | 7014/15550 [03:43<05:14, 27.11it/s]
 45%|████▌     | 7017/15550 [03:43<05:36, 25.39it/s]
 45%|████▌     | 7020/15550 [03:43<05:51, 24.26it/s]
 45%|████▌     | 7023/15550 [03:43<06:02, 23.55it/s]
 45%|████▌     | 7026/15550 [03:43<05:50, 24.29it/s]
 45%|████▌     | 7029/15550 [03:44<06:09, 23.06it/s]
 45%|████▌     | 7033/15550 [03:44<05:27, 25.99it/s]
 45%|████▌     | 7037/15550 [03:44<04:51, 29.20it/s]
 45%|████▌     | 7041/15550 [03:44<04:30, 31.43it/s]
 45%|████▌     | 7045/15550 [03:44<04:20, 32.60it/s]
 45%|████▌     | 7049/15550 [03:44<04:18, 32.88it/s]
 45%|████▌     | 7053/15550 [03:44<04:12, 33.70it/s]
 45%|████▌     | 7057/15550 [03:44<04:09, 34.06it/s]
 45%|███

[2m[36m(_objective pid=24721)[0m {'loss': 0.0802, 'learning_rate': 1.7842592776670516e-05, 'epoch': 2.41}


[2m[36m(_objective pid=24721)[0m  48%|████▊     | 7506/15550 [03:58<03:39, 36.67it/s]
 48%|████▊     | 7510/15550 [03:58<03:43, 36.05it/s]
 48%|████▊     | 7514/15550 [03:58<03:39, 36.60it/s]
 48%|████▊     | 7518/15550 [03:58<03:39, 36.58it/s]
 48%|████▊     | 7522/15550 [03:58<03:53, 34.33it/s]
 48%|████▊     | 7526/15550 [03:58<04:01, 33.23it/s]
 48%|████▊     | 7530/15550 [03:58<03:51, 34.71it/s]
 48%|████▊     | 7534/15550 [03:59<04:10, 32.05it/s]
 48%|████▊     | 7538/15550 [03:59<04:27, 29.97it/s]
 49%|████▊     | 7542/15550 [03:59<04:21, 30.63it/s]
 49%|████▊     | 7546/15550 [03:59<04:08, 32.27it/s]
 49%|████▊     | 7550/15550 [03:59<04:22, 30.48it/s]
 49%|████▊     | 7554/15550 [03:59<04:09, 32.08it/s]
 49%|████▊     | 7558/15550 [03:59<04:05, 32.50it/s]
 49%|████▊     | 7562/15550 [03:59<03:54, 33.99it/s]
 49%|████▊     | 7566/15550 [04:00<03:53, 34.20it/s]
 49%|████▊     | 7570/15550 [04:00<03:46, 35.22it/s]
 49%|████▊     | 7574/15550 [04:00<04:02, 32.93it/s]
 49%|████

Trial status: 4 TERMINATED | 1 RUNNING | 15 PENDING
Current time: 2023-09-11 13:56:53. Total running time: 36min 32s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00004   RUNNING          3.44661e-05                    5                    

[2m[36m(_objective pid=24721)[0m  51%|█████▏    | 7998/15550 [04:12<04:17, 29.35it/s]                                                     51%|█████▏    | 8000/15550 [04:12<04:17, 29.35it/s]
 51%|█████▏    | 8002/15550 [04:12<04:05, 30.75it/s]
 51%|█████▏    | 8006/15550 [04:13<04:15, 29.49it/s]
 52%|█████▏    | 8010/15550 [04:13<04:08, 30.34it/s]
 52%|█████▏    | 8014/15550 [04:13<04:25, 28.43it/s]
 52%|█████▏    | 8018/15550 [04:13<04:10, 30.02it/s]
 52%|█████▏    | 8022/15550 [04:13<04:01, 31.16it/s]
 52%|█████▏    | 8026/15550 [04:13<04:21, 28.76it/s]
 52%|█████▏    | 8029/15550 [04:13<04:34, 27.42it/s]
 52%|█████▏    | 8033/15550 [04:14<04:24, 28.43it/s]
 52%|█████▏    | 8037/15550 [04:14<04:07, 30.40it/s]
 52%|█████▏    | 8041/15550 [04:14<03:55, 31.83it/s]
 52%|█████▏    | 8045/15550 [04:14<03:47, 33.01it/s]
 52%|█████▏    | 8049/15550 [04:14<03:47, 32.94it/s]
 52%|█████▏    | 8053/15550 [04:14<03:47, 32.94it/s]
 52%|█████▏    | 8057/15550 [04:14<04:07, 30.25it/s]
 52%|███

[2m[36m(_objective pid=24721)[0m {'loss': 0.1044, 'learning_rate': 1.5626121624289083e-05, 'epoch': 2.73}


[2m[36m(_objective pid=24721)[0m  55%|█████▍    | 8505/15550 [04:27<03:18, 35.46it/s]
 55%|█████▍    | 8510/15550 [04:27<03:09, 37.08it/s]
 55%|█████▍    | 8514/15550 [04:28<03:14, 36.20it/s]
 55%|█████▍    | 8518/15550 [04:28<03:24, 34.43it/s]
 55%|█████▍    | 8522/15550 [04:28<03:17, 35.51it/s]
 55%|█████▍    | 8526/15550 [04:28<03:22, 34.77it/s]
 55%|█████▍    | 8531/15550 [04:28<03:11, 36.65it/s]
 55%|█████▍    | 8536/15550 [04:28<03:05, 37.86it/s]
 55%|█████▍    | 8540/15550 [04:28<03:07, 37.44it/s]
 55%|█████▍    | 8544/15550 [04:28<03:10, 36.87it/s]
 55%|█████▍    | 8548/15550 [04:29<03:08, 37.10it/s]
 55%|█████▍    | 8552/15550 [04:29<03:40, 31.72it/s]
 55%|█████▌    | 8556/15550 [04:29<03:29, 33.37it/s]
 55%|█████▌    | 8560/15550 [04:29<03:28, 33.50it/s]
 55%|█████▌    | 8564/15550 [04:29<03:19, 35.06it/s]
 55%|█████▌    | 8568/15550 [04:29<03:19, 35.05it/s]
 55%|█████▌    | 8572/15550 [04:29<03:19, 34.93it/s]
 55%|█████▌    | 8576/15550 [04:29<03:16, 35.41it/s]
 55%|████

[2m[36m(_objective pid=24721)[0m {'loss': 0.1046, 'learning_rate': 1.4517886048098368e-05, 'epoch': 2.89}


[2m[36m(_objective pid=24721)[0m  58%|█████▊    | 8999/15550 [04:42<02:56, 37.06it/s]                                                     58%|█████▊    | 9000/15550 [04:42<02:56, 37.06it/s]
 58%|█████▊    | 9003/15550 [04:42<02:57, 36.99it/s]
 58%|█████▊    | 9007/15550 [04:42<03:15, 33.54it/s]
 58%|█████▊    | 9011/15550 [04:42<03:27, 31.52it/s]


Trial status: 4 TERMINATED | 1 RUNNING | 15 PENDING
Current time: 2023-09-11 13:57:23. Total running time: 37min 2s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00004   RUNNING          3.44661e-05                    5                     

[2m[36m(_objective pid=24721)[0m  58%|█████▊    | 9015/15550 [04:42<03:15, 33.35it/s]
 58%|█████▊    | 9019/15550 [04:42<03:13, 33.77it/s]
 58%|█████▊    | 9023/15550 [04:42<03:16, 33.15it/s]
 58%|█████▊    | 9027/15550 [04:43<03:08, 34.69it/s]
 58%|█████▊    | 9031/15550 [04:43<03:10, 34.13it/s]
 58%|█████▊    | 9035/15550 [04:43<03:03, 35.54it/s]
 58%|█████▊    | 9040/15550 [04:43<02:55, 37.15it/s]
 58%|█████▊    | 9044/15550 [04:43<02:52, 37.76it/s]
 58%|█████▊    | 9048/15550 [04:43<02:56, 36.92it/s]
 58%|█████▊    | 9052/15550 [04:43<02:55, 37.03it/s]
 58%|█████▊    | 9057/15550 [04:43<03:01, 35.82it/s]
 58%|█████▊    | 9061/15550 [04:44<02:58, 36.29it/s]
 58%|█████▊    | 9066/15550 [04:44<02:54, 37.17it/s]
 58%|█████▊    | 9070/15550 [04:44<03:07, 34.52it/s]
 58%|█████▊    | 9075/15550 [04:44<02:58, 36.33it/s]
 58%|█████▊    | 9079/15550 [04:44<03:00, 35.78it/s]
 58%|█████▊    | 9084/15550 [04:44<02:55, 36.82it/s]
 58%|█████▊    | 9088/15550 [04:44<02:53, 37.17it/s]
 58%|████

Trial _objective_f556c_00004 finished iteration 3 at 2023-09-11 13:57:42. Total running time: 37min 21s
+-------------------------------------------------+
| Trial _objective_f556c_00004 result             |
+-------------------------------------------------+
| time_this_iter_s                        102.211 |
| time_total_s                            304.293 |
| training_iteration                            3 |
| epoch                                         3 |
| eval_loss                               0.22714 |
| eval_runtime                             9.5812 |
| eval_samples_per_second                 432.827 |
| eval_steps_per_second                    13.568 |
| objective                               0.22714 |
+-------------------------------------------------+

[2m[36m(_objective pid=24721)[0m {'eval_loss': 0.22714164853096008, 'eval_runtime': 9.5812, 'eval_samples_per_second': 432.827, 'eval_steps_per_second': 13.568, 'epoch': 3.0}


[2m[36m(_objective pid=24721)[0m                                                     
[2m[36m(_objective pid=24721)[0m                                                  [A 60%|██████    | 9330/15550 [05:01<03:15, 31.82it/s]
[2m[36m(_objective pid=24721)[0m 100%|██████████| 130/130 [00:09<00:00, 11.99it/s][A
                                                 [A
 60%|██████    | 9333/15550 [05:02<1:27:36,  1.18it/s]
 60%|██████    | 9336/15550 [05:02<1:07:14,  1.54it/s]
 60%|██████    | 9340/15550 [05:03<46:55,  2.21it/s]  
 60%|██████    | 9344/15550 [05:03<33:11,  3.12it/s]
 60%|██████    | 9348/15550 [05:03<24:05,  4.29it/s]
 60%|██████    | 9352/15550 [05:03<17:41,  5.84it/s]
 60%|██████    | 9356/15550 [05:03<13:14,  7.80it/s]
 60%|██████    | 9360/15550 [05:03<10:18, 10.02it/s]
 60%|██████    | 9364/15550 [05:03<08:03, 12.79it/s]
 60%|██████    | 9368/15550 [05:03<06:40, 15.44it/s]
 60%|██████    | 9372/15550 [05:04<05:34, 18.49it/s]
 60%|██████    | 9376/15550 [05:04

[2m[36m(_objective pid=24721)[0m {'loss': 0.0706, 'learning_rate': 1.3409650471907654e-05, 'epoch': 3.05}


[2m[36m(_objective pid=24721)[0m  61%|██████    | 9504/15550 [05:07<02:54, 34.65it/s]
 61%|██████    | 9508/15550 [05:08<02:49, 35.68it/s]
 61%|██████    | 9512/15550 [05:08<02:46, 36.17it/s]
 61%|██████    | 9516/15550 [05:08<02:43, 36.80it/s]
 61%|██████    | 9520/15550 [05:08<02:41, 37.41it/s]
 61%|██████    | 9524/15550 [05:08<02:40, 37.63it/s]
 61%|██████▏   | 9528/15550 [05:08<02:37, 38.26it/s]
 61%|██████▏   | 9532/15550 [05:08<02:36, 38.37it/s]
 61%|██████▏   | 9537/15550 [05:08<02:33, 39.15it/s]
 61%|██████▏   | 9541/15550 [05:08<02:36, 38.52it/s]
 61%|██████▏   | 9545/15550 [05:09<02:45, 36.31it/s]
 61%|██████▏   | 9549/15550 [05:09<02:55, 34.28it/s]
 61%|██████▏   | 9553/15550 [05:09<02:50, 35.18it/s]
 61%|██████▏   | 9557/15550 [05:09<02:50, 35.16it/s]
 61%|██████▏   | 9562/15550 [05:09<02:42, 36.88it/s]
 62%|██████▏   | 9566/15550 [05:09<02:43, 36.62it/s]
 62%|██████▏   | 9571/15550 [05:09<02:38, 37.79it/s]
 62%|██████▏   | 9575/15550 [05:09<02:36, 38.21it/s]
 62%|████

Trial status: 4 TERMINATED | 1 RUNNING | 15 PENDING
Current time: 2023-09-11 13:57:53. Total running time: 37min 32s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00004   RUNNING          3.44661e-05                    5                    

[2m[36m(_objective pid=24721)[0m  62%|██████▏   | 9681/15550 [05:12<02:41, 36.30it/s]
 62%|██████▏   | 9686/15550 [05:12<02:37, 37.14it/s]
 62%|██████▏   | 9690/15550 [05:13<02:44, 35.72it/s]
 62%|██████▏   | 9695/15550 [05:13<02:37, 37.08it/s]
 62%|██████▏   | 9699/15550 [05:13<02:34, 37.76it/s]
 62%|██████▏   | 9703/15550 [05:13<02:34, 37.90it/s]
 62%|██████▏   | 9707/15550 [05:13<02:42, 35.99it/s]
 62%|██████▏   | 9711/15550 [05:13<02:39, 36.69it/s]
 62%|██████▏   | 9715/15550 [05:13<02:41, 36.06it/s]
 63%|██████▎   | 9719/15550 [05:13<02:42, 35.82it/s]
 63%|██████▎   | 9723/15550 [05:14<02:43, 35.64it/s]
 63%|██████▎   | 9727/15550 [05:14<02:40, 36.22it/s]
 63%|██████▎   | 9731/15550 [05:14<02:40, 36.36it/s]
 63%|██████▎   | 9735/15550 [05:14<02:36, 37.19it/s]
 63%|██████▎   | 9739/15550 [05:14<02:39, 36.42it/s]
 63%|██████▎   | 9743/15550 [05:14<02:54, 33.34it/s]
 63%|██████▎   | 9747/15550 [05:14<03:03, 31.60it/s]
 63%|██████▎   | 9751/15550 [05:14<02:58, 32.50it/s]
 63%|████

[2m[36m(_objective pid=24721)[0m {'loss': 0.0755, 'learning_rate': 1.2301414895716937e-05, 'epoch': 3.22}


 64%|██████▍   | 10003/15550 [05:22<02:46, 33.35it/s]
 64%|██████▍   | 10007/15550 [05:22<03:09, 29.23it/s]
 64%|██████▍   | 10011/15550 [05:22<02:57, 31.20it/s]
 64%|██████▍   | 10015/15550 [05:22<03:10, 29.11it/s]
 64%|██████▍   | 10019/15550 [05:22<03:04, 30.05it/s]
 64%|██████▍   | 10023/15550 [05:22<02:57, 31.16it/s]
 64%|██████▍   | 10027/15550 [05:22<02:50, 32.44it/s]
 65%|██████▍   | 10031/15550 [05:23<03:01, 30.48it/s]
 65%|██████▍   | 10035/15550 [05:23<02:53, 31.70it/s]
 65%|██████▍   | 10039/15550 [05:23<02:50, 32.35it/s]
 65%|██████▍   | 10043/15550 [05:23<02:56, 31.27it/s]
 65%|██████▍   | 10047/15550 [05:23<02:49, 32.49it/s]
 65%|██████▍   | 10051/15550 [05:23<03:00, 30.50it/s]
 65%|██████▍   | 10055/15550 [05:23<02:48, 32.67it/s]
 65%|██████▍   | 10059/15550 [05:23<02:39, 34.38it/s]
 65%|██████▍   | 10063/15550 [05:23<02:34, 35.42it/s]
 65%|██████▍   | 10067/15550 [05:24<02:35, 35.28it/s]
 65%|██████▍   | 10071/15550 [05:24<02:38, 34.59it/s]
 65%|██████▍   | 10075/15550

[2m[36m(_objective pid=24721)[0m {'loss': 0.0625, 'learning_rate': 1.1193179319526222e-05, 'epoch': 3.38}


[2m[36m(_objective pid=24721)[0m  68%|██████▊   | 10502/15550 [05:36<02:27, 34.16it/s]
 68%|██████▊   | 10506/15550 [05:36<02:30, 33.57it/s]
 68%|██████▊   | 10510/15550 [05:36<02:32, 32.97it/s]
 68%|██████▊   | 10514/15550 [05:36<02:40, 31.47it/s]
 68%|██████▊   | 10518/15550 [05:36<02:31, 33.16it/s]
 68%|██████▊   | 10522/15550 [05:37<02:25, 34.52it/s]
 68%|██████▊   | 10526/15550 [05:37<02:24, 34.81it/s]
 68%|██████▊   | 10530/15550 [05:37<02:20, 35.79it/s]
 68%|██████▊   | 10534/15550 [05:37<02:33, 32.73it/s]
 68%|██████▊   | 10538/15550 [05:37<02:34, 32.46it/s]
 68%|██████▊   | 10542/15550 [05:37<02:36, 31.93it/s]
 68%|██████▊   | 10546/15550 [05:37<02:30, 33.16it/s]
 68%|██████▊   | 10550/15550 [05:37<02:42, 30.76it/s]
 68%|██████▊   | 10554/15550 [05:38<02:47, 29.82it/s]
 68%|██████▊   | 10558/15550 [05:38<02:55, 28.50it/s]
 68%|██████▊   | 10562/15550 [05:38<02:45, 30.14it/s]
 68%|██████▊   | 10566/15550 [05:38<02:38, 31.38it/s]
 68%|██████▊   | 10570/15550 [05:38<02:30, 33

Trial status: 4 TERMINATED | 1 RUNNING | 15 PENDING
Current time: 2023-09-11 13:58:23. Total running time: 38min 2s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00004   RUNNING          3.44661e-05                    5                     

 69%|██████▉   | 10723/15550 [05:42<02:20, 34.26it/s]
 69%|██████▉   | 10727/15550 [05:43<02:32, 31.68it/s]
 69%|██████▉   | 10732/15550 [05:43<02:20, 34.18it/s]
 69%|██████▉   | 10737/15550 [05:43<02:13, 36.09it/s]
 69%|██████▉   | 10741/15550 [05:43<02:11, 36.63it/s]
 69%|██████▉   | 10745/15550 [05:43<02:08, 37.28it/s]
 69%|██████▉   | 10749/15550 [05:43<02:06, 37.93it/s]
 69%|██████▉   | 10753/15550 [05:43<02:33, 31.22it/s]
 69%|██████▉   | 10757/15550 [05:43<02:25, 32.84it/s]
 69%|██████▉   | 10761/15550 [05:44<02:49, 28.29it/s]
 69%|██████▉   | 10766/15550 [05:44<02:35, 30.79it/s]
 69%|██████▉   | 10771/15550 [05:44<02:22, 33.55it/s]
 69%|██████▉   | 10775/15550 [05:44<02:19, 34.30it/s]
 69%|██████▉   | 10779/15550 [05:44<02:13, 35.62it/s]
 69%|██████▉   | 10783/15550 [05:44<02:13, 35.66it/s]
 69%|██████▉   | 10787/15550 [05:44<02:19, 34.18it/s]
 69%|██████▉   | 10791/15550 [05:44<02:15, 35.07it/s]
 69%|██████▉   | 10795/15550 [05:45<02:11, 36.25it/s]
 69%|██████▉   | 10800/15550

[2m[36m(_objective pid=24721)[0m {'loss': 0.0593, 'learning_rate': 1.0084943743335508e-05, 'epoch': 3.54}


[2m[36m(_objective pid=24721)[0m  71%|███████   | 10999/15550 [05:50<02:07, 35.70it/s]                                                      71%|███████   | 11000/15550 [05:50<02:07, 35.70it/s]
 71%|███████   | 11003/15550 [05:50<02:06, 35.91it/s]
 71%|███████   | 11007/15550 [05:51<02:03, 36.85it/s]
 71%|███████   | 11011/15550 [05:51<02:01, 37.23it/s]
 71%|███████   | 11015/15550 [05:51<02:00, 37.79it/s]
 71%|███████   | 11019/15550 [05:51<01:58, 38.33it/s]
 71%|███████   | 11023/15550 [05:51<01:57, 38.41it/s]
 71%|███████   | 11027/15550 [05:51<02:14, 33.72it/s]
 71%|███████   | 11031/15550 [05:51<02:22, 31.62it/s]
 71%|███████   | 11035/15550 [05:51<02:15, 33.37it/s]
 71%|███████   | 11039/15550 [05:51<02:09, 34.77it/s]
 71%|███████   | 11043/15550 [05:52<02:06, 35.51it/s]
 71%|███████   | 11047/15550 [05:52<02:04, 36.22it/s]
 71%|███████   | 11051/15550 [05:52<02:03, 36.44it/s]
 71%|███████   | 11055/15550 [05:52<02:00, 37.37it/s]
 71%|███████   | 11059/15550 [05:52<01:58, 3

[2m[36m(_objective pid=24721)[0m {'loss': 0.0699, 'learning_rate': 8.976708167144793e-06, 'epoch': 3.7}


[2m[36m(_objective pid=24721)[0m  74%|███████▍  | 11499/15550 [06:04<01:48, 37.23it/s]                                                      74%|███████▍  | 11500/15550 [06:04<01:48, 37.23it/s]
 74%|███████▍  | 11503/15550 [06:04<01:47, 37.60it/s]
 74%|███████▍  | 11507/15550 [06:05<01:46, 38.04it/s]
 74%|███████▍  | 11511/15550 [06:05<01:48, 37.06it/s]
 74%|███████▍  | 11515/15550 [06:05<01:48, 37.14it/s]
 74%|███████▍  | 11519/15550 [06:05<01:58, 33.89it/s]
 74%|███████▍  | 11523/15550 [06:05<01:53, 35.43it/s]
 74%|███████▍  | 11527/15550 [06:05<01:51, 36.20it/s]
 74%|███████▍  | 11532/15550 [06:05<01:45, 38.06it/s]
 74%|███████▍  | 11537/15550 [06:05<01:42, 39.13it/s]
 74%|███████▍  | 11542/15550 [06:05<01:42, 39.19it/s]
 74%|███████▍  | 11546/15550 [06:06<01:44, 38.40it/s]
 74%|███████▍  | 11550/15550 [06:06<01:43, 38.70it/s]
 74%|███████▍  | 11554/15550 [06:06<01:43, 38.65it/s]
 74%|███████▍  | 11558/15550 [06:06<01:43, 38.40it/s]
 74%|███████▍  | 11562/15550 [06:06<01:43, 3

Trial status: 4 TERMINATED | 1 RUNNING | 15 PENDING
Current time: 2023-09-11 13:58:53. Total running time: 38min 32s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00004   RUNNING          3.44661e-05                    5                    

[2m[36m(_objective pid=24721)[0m  76%|███████▌  | 11767/15550 [06:12<01:58, 31.96it/s]
 76%|███████▌  | 11771/15550 [06:12<01:57, 32.17it/s]
 76%|███████▌  | 11775/15550 [06:13<01:55, 32.65it/s]
 76%|███████▌  | 11779/15550 [06:13<01:57, 32.00it/s]
 76%|███████▌  | 11784/15550 [06:13<01:48, 34.60it/s]
 76%|███████▌  | 11788/15550 [06:13<01:44, 35.83it/s]
 76%|███████▌  | 11793/15550 [06:13<01:41, 37.13it/s]
 76%|███████▌  | 11798/15550 [06:13<01:37, 38.46it/s]
 76%|███████▌  | 11803/15550 [06:13<01:35, 39.30it/s]
 76%|███████▌  | 11808/15550 [06:13<01:34, 39.64it/s]
 76%|███████▌  | 11813/15550 [06:14<01:43, 36.01it/s]
 76%|███████▌  | 11817/15550 [06:14<01:44, 35.62it/s]
 76%|███████▌  | 11821/15550 [06:14<01:43, 36.12it/s]
 76%|███████▌  | 11825/15550 [06:14<01:43, 35.93it/s]
 76%|███████▌  | 11829/15550 [06:14<01:41, 36.69it/s]
 76%|███████▌  | 11833/15550 [06:14<01:45, 35.19it/s]
 76%|███████▌  | 11837/15550 [06:14<01:49, 33.99it/s]
 76%|███████▌  | 11841/15550 [06:14<01:59, 30

[2m[36m(_objective pid=24721)[0m {'loss': 0.0457, 'learning_rate': 7.868472590954077e-06, 'epoch': 3.86}


 77%|███████▋  | 12005/15550 [06:19<01:40, 35.14it/s]
 77%|███████▋  | 12009/15550 [06:19<01:48, 32.52it/s]
 77%|███████▋  | 12013/15550 [06:19<01:44, 34.00it/s]
 77%|███████▋  | 12017/15550 [06:19<01:39, 35.52it/s]
 77%|███████▋  | 12021/15550 [06:19<01:36, 36.62it/s]
 77%|███████▋  | 12025/15550 [06:20<01:50, 31.83it/s]
 77%|███████▋  | 12029/15550 [06:20<01:45, 33.52it/s]
 77%|███████▋  | 12033/15550 [06:20<01:53, 31.07it/s]
 77%|███████▋  | 12037/15550 [06:20<01:46, 32.91it/s]
 77%|███████▋  | 12042/15550 [06:20<01:39, 35.16it/s]
 77%|███████▋  | 12046/15550 [06:20<01:37, 35.84it/s]
 77%|███████▋  | 12050/15550 [06:20<01:35, 36.62it/s]
 78%|███████▊  | 12054/15550 [06:20<01:33, 37.51it/s]
 78%|███████▊  | 12058/15550 [06:21<01:37, 35.75it/s]
 78%|███████▊  | 12062/15550 [06:21<01:41, 34.50it/s]
 78%|███████▊  | 12066/15550 [06:21<01:38, 35.52it/s]
 78%|███████▊  | 12070/15550 [06:21<01:37, 35.80it/s]
 78%|███████▊  | 12074/15550 [06:21<01:34, 36.78it/s]
 78%|███████▊  | 12078/15550

Trial _objective_f556c_00004 finished iteration 4 at 2023-09-11 13:59:22. Total running time: 39min 1s
+-------------------------------------------------+
| Trial _objective_f556c_00004 result             |
+-------------------------------------------------+
| time_this_iter_s                         100.27 |
| time_total_s                            404.563 |
| training_iteration                            4 |
| epoch                                         4 |
| eval_loss                               0.27456 |
| eval_runtime                              9.578 |
| eval_samples_per_second                 432.972 |
| eval_steps_per_second                    13.573 |
| objective                               0.27456 |
+-------------------------------------------------+

[2m[36m(_objective pid=24721)[0m {'eval_loss': 0.274558424949646, 'eval_runtime': 9.578, 'eval_samples_per_second': 432.972, 'eval_steps_per_second': 13.573, 'epoch': 4.0}
Trial status: 4 TERMINATED | 1 RUNNING | 15 P

[2m[36m(_objective pid=24721)[0m  80%|████████  | 12444/15550 [06:43<43:35,  1.19it/s]
 80%|████████  | 12447/15550 [06:43<33:26,  1.55it/s]
 80%|████████  | 12451/15550 [06:43<23:18,  2.22it/s]
 80%|████████  | 12455/15550 [06:43<16:28,  3.13it/s]
 80%|████████  | 12459/15550 [06:43<11:48,  4.36it/s]
 80%|████████  | 12463/15550 [06:43<08:36,  5.98it/s]
 80%|████████  | 12467/15550 [06:43<06:24,  8.02it/s]
 80%|████████  | 12471/15550 [06:43<04:54, 10.46it/s]
 80%|████████  | 12475/15550 [06:44<03:55, 13.03it/s]
 80%|████████  | 12479/15550 [06:44<03:15, 15.67it/s]
 80%|████████  | 12483/15550 [06:44<02:47, 18.31it/s]
 80%|████████  | 12487/15550 [06:44<02:20, 21.78it/s]
 80%|████████  | 12491/15550 [06:44<02:03, 24.83it/s]
 80%|████████  | 12495/15550 [06:44<01:55, 26.36it/s]
 80%|████████  | 12499/15550 [06:44<01:47, 28.27it/s]


[2m[36m(_objective pid=24721)[0m {'loss': 0.0481, 'learning_rate': 6.760237014763363e-06, 'epoch': 4.02}


 80%|████████  | 12503/15550 [06:44<01:39, 30.57it/s]
 80%|████████  | 12507/15550 [06:45<01:35, 31.99it/s]
 80%|████████  | 12511/15550 [06:45<01:46, 28.42it/s]
 80%|████████  | 12515/15550 [06:45<01:39, 30.46it/s]
 81%|████████  | 12519/15550 [06:45<01:44, 29.05it/s]
 81%|████████  | 12523/15550 [06:45<01:41, 29.78it/s]
 81%|████████  | 12527/15550 [06:45<01:36, 31.31it/s]
 81%|████████  | 12531/15550 [06:45<01:30, 33.36it/s]
 81%|████████  | 12535/15550 [06:45<01:29, 33.87it/s]
 81%|████████  | 12539/15550 [06:46<01:26, 34.86it/s]
 81%|████████  | 12543/15550 [06:46<01:23, 35.93it/s]
 81%|████████  | 12547/15550 [06:46<01:21, 36.88it/s]
 81%|████████  | 12551/15550 [06:46<01:20, 37.16it/s]
 81%|████████  | 12555/15550 [06:46<01:21, 36.62it/s]
 81%|████████  | 12559/15550 [06:46<01:20, 37.34it/s]
 81%|████████  | 12563/15550 [06:46<01:23, 35.70it/s]
 81%|████████  | 12568/15550 [06:46<01:20, 37.12it/s]
 81%|████████  | 12572/15550 [06:46<01:19, 37.53it/s]
 81%|████████  | 12576/15550

[2m[36m(_objective pid=24721)[0m {'loss': 0.0449, 'learning_rate': 5.652001438572647e-06, 'epoch': 4.18}


[2m[36m(_objective pid=24721)[0m  84%|████████▎ | 13005/15550 [06:59<01:24, 29.98it/s]
 84%|████████▎ | 13009/15550 [06:59<01:21, 31.36it/s]
 84%|████████▎ | 13013/15550 [06:59<01:24, 30.09it/s]
 84%|████████▎ | 13017/15550 [06:59<01:18, 32.20it/s]
 84%|████████▎ | 13021/15550 [07:00<01:14, 33.74it/s]
 84%|████████▍ | 13025/15550 [07:00<01:12, 34.87it/s]
 84%|████████▍ | 13029/15550 [07:00<01:19, 31.84it/s]
 84%|████████▍ | 13033/15550 [07:00<01:14, 33.57it/s]
 84%|████████▍ | 13037/15550 [07:00<01:11, 35.09it/s]
 84%|████████▍ | 13041/15550 [07:00<01:08, 36.37it/s]
 84%|████████▍ | 13045/15550 [07:00<01:08, 36.45it/s]
 84%|████████▍ | 13049/15550 [07:00<01:08, 36.78it/s]
 84%|████████▍ | 13053/15550 [07:00<01:07, 37.27it/s]
 84%|████████▍ | 13057/15550 [07:01<01:09, 36.00it/s]
 84%|████████▍ | 13061/15550 [07:01<01:07, 37.01it/s]
 84%|████████▍ | 13065/15550 [07:01<01:08, 36.03it/s]
 84%|████████▍ | 13069/15550 [07:01<01:07, 36.96it/s]
 84%|████████▍ | 13073/15550 [07:01<01:12, 34

Trial status: 4 TERMINATED | 1 RUNNING | 15 PENDING
Current time: 2023-09-11 13:59:53. Total running time: 39min 32s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00004   RUNNING          3.44661e-05                    5                    

[2m[36m(_objective pid=24721)[0m  87%|████████▋ | 13469/15550 [07:12<01:10, 29.38it/s]
 87%|████████▋ | 13472/15550 [07:13<01:11, 29.14it/s]
 87%|████████▋ | 13476/15550 [07:13<01:06, 31.29it/s]
 87%|████████▋ | 13480/15550 [07:13<01:09, 29.70it/s]
 87%|████████▋ | 13484/15550 [07:13<01:13, 28.04it/s]
 87%|████████▋ | 13488/15550 [07:13<01:07, 30.33it/s]
 87%|████████▋ | 13492/15550 [07:13<01:06, 31.13it/s]
 87%|████████▋ | 13496/15550 [07:13<01:02, 32.62it/s]


[2m[36m(_objective pid=24721)[0m {'loss': 0.0397, 'learning_rate': 4.543765862381932e-06, 'epoch': 4.34}


[2m[36m(_objective pid=24721)[0m  87%|████████▋ | 13500/15550 [07:13<01:00, 34.13it/s]                                                      87%|████████▋ | 13500/15550 [07:13<01:00, 34.13it/s]
 87%|████████▋ | 13504/15550 [07:14<00:58, 35.16it/s]
 87%|████████▋ | 13508/15550 [07:14<00:57, 35.53it/s]
 87%|████████▋ | 13512/15550 [07:14<00:56, 35.93it/s]
 87%|████████▋ | 13516/15550 [07:14<00:57, 35.31it/s]
 87%|████████▋ | 13520/15550 [07:14<01:00, 33.73it/s]
 87%|████████▋ | 13525/15550 [07:14<01:02, 32.23it/s]
 87%|████████▋ | 13529/15550 [07:14<01:03, 31.60it/s]
 87%|████████▋ | 13533/15550 [07:14<01:08, 29.64it/s]
 87%|████████▋ | 13538/15550 [07:15<01:01, 32.59it/s]
 87%|████████▋ | 13542/15550 [07:15<01:00, 33.13it/s]
 87%|████████▋ | 13546/15550 [07:15<00:57, 34.56it/s]
 87%|████████▋ | 13550/15550 [07:15<00:56, 35.28it/s]
 87%|████████▋ | 13554/15550 [07:15<00:54, 36.44it/s]
 87%|████████▋ | 13558/15550 [07:15<00:53, 37.36it/s]
 87%|████████▋ | 13562/15550 [07:15<00:52, 3

[2m[36m(_objective pid=24721)[0m {'loss': 0.0389, 'learning_rate': 3.435530286191217e-06, 'epoch': 4.5}


[2m[36m(_objective pid=24721)[0m  90%|█████████ | 14003/15550 [07:28<00:41, 37.34it/s]
 90%|█████████ | 14007/15550 [07:28<00:42, 36.43it/s]
 90%|█████████ | 14011/15550 [07:28<00:41, 37.05it/s]
 90%|█████████ | 14015/15550 [07:28<00:40, 37.81it/s]
 90%|█████████ | 14020/15550 [07:28<00:39, 38.63it/s]
 90%|█████████ | 14024/15550 [07:28<00:39, 38.81it/s]
 90%|█████████ | 14028/15550 [07:28<00:43, 35.07it/s]
 90%|█████████ | 14033/15550 [07:28<00:41, 36.86it/s]
 90%|█████████ | 14038/15550 [07:28<00:39, 38.00it/s]
 90%|█████████ | 14042/15550 [07:29<00:40, 36.89it/s]
 90%|█████████ | 14046/15550 [07:29<00:39, 37.63it/s]
 90%|█████████ | 14050/15550 [07:29<00:44, 33.93it/s]
 90%|█████████ | 14054/15550 [07:29<00:42, 35.40it/s]
 90%|█████████ | 14058/15550 [07:29<00:40, 36.57it/s]
 90%|█████████ | 14062/15550 [07:29<00:40, 37.15it/s]
 90%|█████████ | 14066/15550 [07:29<00:47, 31.43it/s]
 90%|█████████ | 14070/15550 [07:29<00:44, 33.53it/s]
 91%|█████████ | 14074/15550 [07:30<00:43, 33

[2m[36m(_objective pid=24721)[0m {'loss': 0.0297, 'learning_rate': 2.327294710000502e-06, 'epoch': 4.66}


 93%|█████████▎| 14506/15550 [07:42<00:29, 35.98it/s]
 93%|█████████▎| 14510/15550 [07:42<00:29, 35.01it/s]
 93%|█████████▎| 14514/15550 [07:42<00:31, 32.49it/s]
 93%|█████████▎| 14518/15550 [07:42<00:30, 33.49it/s]
 93%|█████████▎| 14522/15550 [07:43<00:29, 34.64it/s]


Trial status: 4 TERMINATED | 1 RUNNING | 15 PENDING
Current time: 2023-09-11 14:00:24. Total running time: 40min 2s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00004   RUNNING          3.44661e-05                    5                     

[2m[36m(_objective pid=24721)[0m  93%|█████████▎| 14526/15550 [07:43<00:28, 35.92it/s]
 93%|█████████▎| 14530/15550 [07:43<00:29, 35.12it/s]
 93%|█████████▎| 14535/15550 [07:43<00:30, 32.87it/s]
 93%|█████████▎| 14539/15550 [07:43<00:30, 33.03it/s]
 94%|█████████▎| 14543/15550 [07:43<00:29, 33.58it/s]
 94%|█████████▎| 14547/15550 [07:43<00:29, 34.49it/s]
 94%|█████████▎| 14552/15550 [07:43<00:27, 36.33it/s]
 94%|█████████▎| 14556/15550 [07:44<00:27, 36.76it/s]
 94%|█████████▎| 14560/15550 [07:44<00:27, 35.39it/s]
 94%|█████████▎| 14564/15550 [07:44<00:27, 36.24it/s]
 94%|█████████▎| 14568/15550 [07:44<00:26, 36.77it/s]
 94%|█████████▎| 14572/15550 [07:44<00:28, 33.88it/s]
 94%|█████████▎| 14576/15550 [07:44<00:27, 34.79it/s]
 94%|█████████▍| 14580/15550 [07:44<00:29, 32.38it/s]
 94%|█████████▍| 14584/15550 [07:44<00:29, 32.90it/s]
 94%|█████████▍| 14588/15550 [07:44<00:28, 34.35it/s]
 94%|█████████▍| 14593/15550 [07:45<00:26, 36.37it/s]
 94%|█████████▍| 14597/15550 [07:45<00:25, 36

[2m[36m(_objective pid=24721)[0m {'loss': 0.0492, 'learning_rate': 1.2190591338097866e-06, 'epoch': 4.82}


 96%|█████████▋| 15004/15550 [07:56<00:16, 32.58it/s]
 97%|█████████▋| 15009/15550 [07:56<00:15, 34.09it/s]
 97%|█████████▋| 15013/15550 [07:56<00:15, 34.42it/s]
 97%|█████████▋| 15017/15550 [07:57<00:15, 35.13it/s]
 97%|█████████▋| 15021/15550 [07:57<00:15, 34.65it/s]
 97%|█████████▋| 15025/15550 [07:57<00:14, 35.42it/s]
 97%|█████████▋| 15029/15550 [07:57<00:15, 34.05it/s]
 97%|█████████▋| 15034/15550 [07:57<00:14, 35.99it/s]
 97%|█████████▋| 15039/15550 [07:57<00:13, 37.54it/s]
 97%|█████████▋| 15044/15550 [07:57<00:13, 37.63it/s]
 97%|█████████▋| 15048/15550 [07:57<00:14, 34.55it/s]
 97%|█████████▋| 15053/15550 [07:58<00:14, 35.35it/s]
 97%|█████████▋| 15057/15550 [07:58<00:13, 36.03it/s]
 97%|█████████▋| 15061/15550 [07:58<00:13, 36.61it/s]
 97%|█████████▋| 15065/15550 [07:58<00:12, 37.42it/s]
 97%|█████████▋| 15069/15550 [07:58<00:12, 37.10it/s]
 97%|█████████▋| 15074/15550 [07:58<00:12, 38.14it/s]
 97%|█████████▋| 15079/15550 [07:58<00:12, 38.86it/s]
 97%|█████████▋| 15084/15550

[2m[36m(_objective pid=24721)[0m {'loss': 0.035, 'learning_rate': 1.1082355761907151e-07, 'epoch': 4.98}


[2m[36m(_objective pid=24721)[0m 100%|█████████▉| 15507/15550 [08:10<00:01, 39.28it/s]
100%|█████████▉| 15511/15550 [08:10<00:01, 37.98it/s]
100%|█████████▉| 15515/15550 [08:11<00:00, 37.89it/s]
100%|█████████▉| 15520/15550 [08:11<00:00, 35.15it/s]
100%|█████████▉| 15525/15550 [08:11<00:00, 35.63it/s]
100%|█████████▉| 15530/15550 [08:11<00:00, 36.82it/s]
100%|█████████▉| 15534/15550 [08:11<00:00, 33.09it/s]
100%|█████████▉| 15538/15550 [08:11<00:00, 31.10it/s]
100%|█████████▉| 15542/15550 [08:11<00:00, 32.24it/s]
100%|█████████▉| 15546/15550 [08:12<00:00, 31.89it/s]
100%|██████████| 15550/15550 [08:12<00:00, 30.94it/s]
  0%|          | 0/130 [00:00<?, ?it/s][A
[2m[36m(_objective pid=24721)[0m 
  2%|▏         | 2/130 [00:00<00:08, 14.39it/s][A
[2m[36m(_objective pid=24721)[0m 
  4%|▍         | 5/130 [00:00<00:06, 20.63it/s][A
[2m[36m(_objective pid=24721)[0m 
  6%|▌         | 8/130 [00:00<00:05, 21.17it/s][A
[2m[36m(_objective pid=24721)[0m 
  8%|▊         | 11/130 [

Trial status: 4 TERMINATED | 1 RUNNING | 15 PENDING
Current time: 2023-09-11 14:00:54. Total running time: 40min 32s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00004   RUNNING          3.44661e-05                    5                    

[2m[36m(_objective pid=24721)[0m 
 10%|█         | 13/130 [00:00<00:08, 13.49it/s][A
[2m[36m(_objective pid=24721)[0m 
 12%|█▏        | 15/130 [00:01<00:09, 12.36it/s][A
[2m[36m(_objective pid=24721)[0m 
 13%|█▎        | 17/130 [00:01<00:09, 11.61it/s][A
[2m[36m(_objective pid=24721)[0m 
 15%|█▍        | 19/130 [00:01<00:09, 11.52it/s][A
[2m[36m(_objective pid=24721)[0m 
 16%|█▌        | 21/130 [00:01<00:08, 13.02it/s][A
[2m[36m(_objective pid=24721)[0m 
 18%|█▊        | 23/130 [00:01<00:09, 11.84it/s][A
[2m[36m(_objective pid=24721)[0m 
 19%|█▉        | 25/130 [00:01<00:07, 13.47it/s][A
[2m[36m(_objective pid=24721)[0m 
 21%|██        | 27/130 [00:01<00:07, 14.17it/s][A
[2m[36m(_objective pid=24721)[0m 
 24%|██▍       | 31/130 [00:02<00:06, 16.23it/s][A
[2m[36m(_objective pid=24721)[0m 
 26%|██▌       | 34/130 [00:02<00:05, 16.39it/s][A
[2m[36m(_objective pid=24721)[0m 
 28%|██▊       | 36/130 [00:02<00:05, 16.33it/s][A
[2m[36m(_objective 

Trial _objective_f556c_00004 finished iteration 5 at 2023-09-11 14:01:02. Total running time: 40min 41s
+-------------------------------------------------+
| Trial _objective_f556c_00004 result             |
+-------------------------------------------------+
| time_this_iter_s                        100.026 |
| time_total_s                             504.59 |
| training_iteration                            5 |
| epoch                                         5 |
| eval_loss                               0.27618 |
| eval_runtime                             9.5635 |
| eval_samples_per_second                 433.628 |
| eval_steps_per_second                    13.593 |
| objective                               0.27618 |
+-------------------------------------------------+

[2m[36m(_objective pid=24721)[0m {'eval_loss': 0.2761791944503784, 'eval_runtime': 9.5635, 'eval_samples_per_second': 433.628, 'eval_steps_per_second': 13.593, 'epoch': 5.0}


[2m[36m(_objective pid=24721)[0m                                                      
[2m[36m(_objective pid=24721)[0m                                                  [A100%|██████████| 15550/15550 [08:21<00:00, 30.94it/s]
[2m[36m(_objective pid=24721)[0m 100%|██████████| 130/130 [00:09<00:00, 12.01it/s][A
[2m[36m(_objective pid=24721)[0m                                                  [A


[2m[36m(_objective pid=24721)[0m {'train_runtime': 503.2592, 'train_samples_per_second': 123.584, 'train_steps_per_second': 30.899, 'train_loss': 0.12507359102104829, 'epoch': 5.0}
Trial _objective_f556c_00004 completed after 5 iterations at 2023-09-11 14:01:04. Total running time: 40min 43s



[2m[36m(_objective pid=24721)[0m                                                      100%|██████████| 15550/15550 [08:23<00:00, 30.94it/s]100%|██████████| 15550/15550 [08:23<00:00, 30.90it/s]


Trial _objective_f556c_00005 started with configuration:
+-------------------------------------------------+
| Trial _objective_f556c_00005 config             |
+-------------------------------------------------+
| adam_epsilon                                  0 |
| learning_rate                             3e-05 |
| num_train_epochs                              4 |
| per_device_eval_batch_size                   32 |
| per_device_train_batch_size                   4 |
| weight_decay                            0.10991 |
+-------------------------------------------------+



[2m[36m(_objective pid=26946)[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_projector.bias']
[2m[36m(_objective pid=26946)[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
[2m[36m(_objective pid=26946)[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[2m[36m(_objective pid=26946)[0m Some weights of DistilBertForSequenceClassification were not initialized from the model che

Trial status: 5 TERMINATED | 1 RUNNING | 14 PENDING
Current time: 2023-09-11 14:01:24. Total running time: 41min 2s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00005   RUNNING          2.59944e-05                    4                     

  3%|▎         | 359/12440 [00:10<05:47, 34.77it/s]
  3%|▎         | 363/12440 [00:10<05:41, 35.37it/s]
  3%|▎         | 367/12440 [00:11<06:28, 31.07it/s]
  3%|▎         | 371/12440 [00:11<06:06, 32.90it/s]
  3%|▎         | 375/12440 [00:11<05:50, 34.38it/s]
  3%|▎         | 379/12440 [00:11<05:47, 34.74it/s]
  3%|▎         | 383/12440 [00:11<05:41, 35.26it/s]
  3%|▎         | 388/12440 [00:11<05:29, 36.63it/s]
  3%|▎         | 392/12440 [00:11<05:30, 36.45it/s]
  3%|▎         | 396/12440 [00:11<05:33, 36.11it/s]
  3%|▎         | 400/12440 [00:11<05:27, 36.71it/s]
  3%|▎         | 404/12440 [00:12<05:35, 35.88it/s]
  3%|▎         | 408/12440 [00:12<05:39, 35.46it/s]
  3%|▎         | 412/12440 [00:12<05:30, 36.36it/s]
  3%|▎         | 416/12440 [00:12<05:43, 35.02it/s]
  3%|▎         | 420/12440 [00:12<05:33, 36.07it/s]
  3%|▎         | 424/12440 [00:12<05:31, 36.28it/s]
  3%|▎         | 429/12440 [00:12<05:19, 37.62it/s]
  3%|▎         | 433/12440 [00:12<05:17, 37.86it/s]
  4%|▎      

[2m[36m(_objective pid=26946)[0m {'loss': 0.3917, 'learning_rate': 2.494964630958248e-05, 'epoch': 0.16}


[2m[36m(_objective pid=26946)[0m   4%|▍         | 506/12440 [00:15<06:05, 32.64it/s]
  4%|▍         | 510/12440 [00:15<06:09, 32.30it/s]
  4%|▍         | 514/12440 [00:15<05:54, 33.63it/s]
  4%|▍         | 518/12440 [00:15<05:43, 34.70it/s]
  4%|▍         | 523/12440 [00:15<05:25, 36.62it/s]
  4%|▍         | 527/12440 [00:15<05:20, 37.22it/s]
  4%|▍         | 531/12440 [00:15<05:14, 37.87it/s]
  4%|▍         | 535/12440 [00:15<05:31, 35.86it/s]
  4%|▍         | 539/12440 [00:15<05:50, 33.95it/s]
  4%|▍         | 543/12440 [00:16<05:36, 35.35it/s]
  4%|▍         | 548/12440 [00:16<05:21, 36.96it/s]
  4%|▍         | 552/12440 [00:16<05:14, 37.76it/s]
  4%|▍         | 556/12440 [00:16<05:27, 36.24it/s]
  5%|▍         | 561/12440 [00:16<05:16, 37.54it/s]
  5%|▍         | 565/12440 [00:16<05:18, 37.26it/s]
  5%|▍         | 569/12440 [00:16<05:19, 37.13it/s]
  5%|▍         | 573/12440 [00:16<05:20, 36.98it/s]
  5%|▍         | 577/12440 [00:16<05:13, 37.78it/s]
  5%|▍         | 581/12440 

[2m[36m(_objective pid=26946)[0m {'loss': 0.3303, 'learning_rate': 2.390485375055474e-05, 'epoch': 0.32}


  8%|▊         | 1009/12440 [00:28<04:43, 40.34it/s]
  8%|▊         | 1014/12440 [00:29<04:43, 40.28it/s]
  8%|▊         | 1019/12440 [00:29<04:45, 39.99it/s]
  8%|▊         | 1024/12440 [00:29<04:57, 38.31it/s]
  8%|▊         | 1029/12440 [00:29<04:52, 39.01it/s]
  8%|▊         | 1033/12440 [00:29<04:50, 39.25it/s]
  8%|▊         | 1037/12440 [00:29<04:57, 38.36it/s]
  8%|▊         | 1042/12440 [00:29<04:52, 38.93it/s]
  8%|▊         | 1046/12440 [00:29<04:52, 38.99it/s]
  8%|▊         | 1050/12440 [00:29<04:50, 39.15it/s]
  8%|▊         | 1054/12440 [00:30<05:15, 36.13it/s]
  9%|▊         | 1058/12440 [00:30<05:13, 36.26it/s]
  9%|▊         | 1062/12440 [00:30<05:12, 36.44it/s]
  9%|▊         | 1066/12440 [00:30<05:10, 36.58it/s]
  9%|▊         | 1071/12440 [00:30<05:00, 37.87it/s]
  9%|▊         | 1075/12440 [00:30<04:58, 38.11it/s]
  9%|▊         | 1079/12440 [00:30<05:18, 35.64it/s]
  9%|▊         | 1083/12440 [00:30<05:28, 34.54it/s]
  9%|▊         | 1087/12440 [00:31<05:15, 35.9

Trial status: 5 TERMINATED | 1 RUNNING | 14 PENDING
Current time: 2023-09-11 14:01:54. Total running time: 41min 32s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00005   RUNNING          2.59944e-05                    4                    

[2m[36m(_objective pid=26946)[0m  11%|█▏        | 1427/12440 [00:40<05:02, 36.43it/s]
 12%|█▏        | 1431/12440 [00:40<04:55, 37.29it/s]
 12%|█▏        | 1435/12440 [00:40<04:52, 37.60it/s]
 12%|█▏        | 1439/12440 [00:41<05:08, 35.71it/s]
 12%|█▏        | 1444/12440 [00:41<05:14, 34.95it/s]
 12%|█▏        | 1448/12440 [00:41<05:07, 35.76it/s]
 12%|█▏        | 1452/12440 [00:41<05:02, 36.30it/s]
 12%|█▏        | 1457/12440 [00:41<04:54, 37.32it/s]
 12%|█▏        | 1461/12440 [00:41<04:58, 36.79it/s]
 12%|█▏        | 1465/12440 [00:41<05:14, 34.87it/s]
 12%|█▏        | 1469/12440 [00:41<05:10, 35.37it/s]
 12%|█▏        | 1473/12440 [00:41<05:00, 36.46it/s]
 12%|█▏        | 1477/12440 [00:42<05:35, 32.70it/s]
 12%|█▏        | 1481/12440 [00:42<05:17, 34.50it/s]
 12%|█▏        | 1485/12440 [00:42<05:24, 33.81it/s]
 12%|█▏        | 1490/12440 [00:42<05:04, 35.96it/s]
 12%|█▏        | 1494/12440 [00:42<05:02, 36.13it/s]
 12%|█▏        | 1500/12440 [00:42<04:50, 37.69it/s]


[2m[36m(_objective pid=26946)[0m {'loss': 0.2829, 'learning_rate': 2.2860061191527e-05, 'epoch': 0.48}


[2m[36m(_objective pid=26946)[0m  12%|█▏        | 1504/12440 [00:42<04:40, 38.92it/s]
 12%|█▏        | 1509/12440 [00:42<04:34, 39.79it/s]
 12%|█▏        | 1513/12440 [00:43<04:35, 39.68it/s]
 12%|█▏        | 1517/12440 [00:43<04:38, 39.26it/s]
 12%|█▏        | 1522/12440 [00:43<04:46, 38.07it/s]
 12%|█▏        | 1527/12440 [00:43<04:40, 38.94it/s]
 12%|█▏        | 1532/12440 [00:43<04:34, 39.67it/s]
 12%|█▏        | 1537/12440 [00:43<04:32, 40.03it/s]
 12%|█▏        | 1542/12440 [00:43<05:00, 36.24it/s]
 12%|█▏        | 1546/12440 [00:43<04:53, 37.11it/s]
 12%|█▏        | 1550/12440 [00:44<04:57, 36.66it/s]
 12%|█▏        | 1554/12440 [00:44<05:02, 35.94it/s]
 13%|█▎        | 1558/12440 [00:44<06:01, 30.14it/s]
 13%|█▎        | 1562/12440 [00:44<05:36, 32.31it/s]
 13%|█▎        | 1567/12440 [00:44<05:09, 35.16it/s]
 13%|█▎        | 1571/12440 [00:44<05:15, 34.43it/s]
 13%|█▎        | 1576/12440 [00:44<04:55, 36.71it/s]
 13%|█▎        | 1581/12440 [00:44<04:43, 38.26it/s]
 13%|█▎  

[2m[36m(_objective pid=26946)[0m {'loss': 0.2384, 'learning_rate': 2.1815268632499258e-05, 'epoch': 0.64}


[2m[36m(_objective pid=26946)[0m  16%|█▌        | 2006/12440 [00:57<04:35, 37.94it/s]
 16%|█▌        | 2010/12440 [00:57<04:33, 38.13it/s]
 16%|█▌        | 2014/12440 [00:57<04:33, 38.16it/s]
 16%|█▌        | 2018/12440 [00:57<05:00, 34.66it/s]
 16%|█▋        | 2022/12440 [00:57<05:06, 33.96it/s]
 16%|█▋        | 2026/12440 [00:57<05:32, 31.34it/s]
 16%|█▋        | 2031/12440 [00:57<05:05, 34.05it/s]
 16%|█▋        | 2035/12440 [00:58<04:53, 35.47it/s]
 16%|█▋        | 2040/12440 [00:58<04:40, 37.11it/s]
 16%|█▋        | 2044/12440 [00:58<05:08, 33.72it/s]
 16%|█▋        | 2048/12440 [00:58<04:59, 34.66it/s]
 16%|█▋        | 2052/12440 [00:58<05:24, 32.02it/s]
 17%|█▋        | 2056/12440 [00:58<05:24, 31.95it/s]
 17%|█▋        | 2061/12440 [00:58<04:59, 34.63it/s]
 17%|█▋        | 2065/12440 [00:58<04:55, 35.07it/s]
 17%|█▋        | 2069/12440 [00:59<04:45, 36.27it/s]
 17%|█▋        | 2073/12440 [00:59<04:41, 36.87it/s]
 17%|█▋        | 2078/12440 [00:59<04:30, 38.24it/s]
 17%|█▋  

Trial status: 5 TERMINATED | 1 RUNNING | 14 PENDING
Current time: 2023-09-11 14:02:24. Total running time: 42min 2s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00005   RUNNING          2.59944e-05                    4                     

[2m[36m(_objective pid=26946)[0m  20%|█▉        | 2478/12440 [01:10<04:35, 36.19it/s]
 20%|█▉        | 2482/12440 [01:10<04:43, 35.17it/s]
 20%|█▉        | 2486/12440 [01:10<04:42, 35.22it/s]
 20%|██        | 2491/12440 [01:11<04:37, 35.81it/s]
 20%|██        | 2495/12440 [01:11<04:32, 36.45it/s]
 20%|██        | 2500/12440 [01:11<04:22, 37.80it/s]
 20%|██        | 2504/12440 [01:11<04:19, 38.23it/s]


[2m[36m(_objective pid=26946)[0m {'loss': 0.2176, 'learning_rate': 2.0770476073471514e-05, 'epoch': 0.8}


[2m[36m(_objective pid=26946)[0m  20%|██        | 2508/12440 [01:11<04:58, 33.23it/s]
 20%|██        | 2512/12440 [01:11<04:58, 33.26it/s]
 20%|██        | 2516/12440 [01:11<05:14, 31.58it/s]
 20%|██        | 2520/12440 [01:11<04:57, 33.38it/s]
 20%|██        | 2525/12440 [01:12<04:38, 35.57it/s]
 20%|██        | 2529/12440 [01:12<04:52, 33.89it/s]
 20%|██        | 2533/12440 [01:12<04:50, 34.14it/s]
 20%|██        | 2537/12440 [01:12<04:40, 35.26it/s]
 20%|██        | 2541/12440 [01:12<04:35, 35.91it/s]
 20%|██        | 2545/12440 [01:12<04:35, 35.97it/s]
 20%|██        | 2549/12440 [01:12<04:30, 36.51it/s]
 21%|██        | 2553/12440 [01:12<04:25, 37.26it/s]
 21%|██        | 2557/12440 [01:12<04:24, 37.36it/s]
 21%|██        | 2562/12440 [01:13<04:19, 38.02it/s]
 21%|██        | 2566/12440 [01:13<04:43, 34.85it/s]
 21%|██        | 2570/12440 [01:13<04:33, 36.10it/s]
 21%|██        | 2574/12440 [01:13<04:29, 36.58it/s]
 21%|██        | 2578/12440 [01:13<04:39, 35.24it/s]
 21%|██  

[2m[36m(_objective pid=26946)[0m {'loss': 0.2475, 'learning_rate': 1.972568351444377e-05, 'epoch': 0.96}


[2m[36m(_objective pid=26946)[0m  24%|██▍       | 3003/12440 [01:25<04:53, 32.18it/s]
 24%|██▍       | 3007/12440 [01:25<05:01, 31.24it/s]
 24%|██▍       | 3011/12440 [01:25<04:43, 33.31it/s]
 24%|██▍       | 3015/12440 [01:26<04:31, 34.77it/s]
 24%|██▍       | 3020/12440 [01:26<04:19, 36.37it/s]
 24%|██▍       | 3024/12440 [01:26<04:12, 37.29it/s]
 24%|██▍       | 3028/12440 [01:26<04:36, 34.00it/s]
 24%|██▍       | 3032/12440 [01:26<04:26, 35.30it/s]
 24%|██▍       | 3036/12440 [01:26<04:18, 36.41it/s]
 24%|██▍       | 3040/12440 [01:26<04:26, 35.21it/s]
 24%|██▍       | 3045/12440 [01:26<04:14, 36.89it/s]
 25%|██▍       | 3049/12440 [01:27<04:13, 36.99it/s]
 25%|██▍       | 3053/12440 [01:27<04:12, 37.17it/s]
 25%|██▍       | 3058/12440 [01:27<04:05, 38.19it/s]
 25%|██▍       | 3062/12440 [01:27<04:03, 38.55it/s]
 25%|██▍       | 3066/12440 [01:27<04:03, 38.45it/s]
 25%|██▍       | 3070/12440 [01:27<04:04, 38.29it/s]
 25%|██▍       | 3075/12440 [01:27<03:59, 39.03it/s]
 25%|██▍ 

Trial _objective_f556c_00005 finished iteration 1 at 2023-09-11 14:02:51. Total running time: 42min 30s
+-------------------------------------------------+
| Trial _objective_f556c_00005 result             |
+-------------------------------------------------+
| time_this_iter_s                        100.577 |
| time_total_s                            100.577 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.24987 |
| eval_runtime                             9.5592 |
| eval_samples_per_second                 433.824 |
| eval_steps_per_second                      13.6 |
| objective                               0.24987 |
+-------------------------------------------------+

[2m[36m(_objective pid=26946)[0m {'eval_loss': 0.24986836314201355, 'eval_runtime': 9.5592, 'eval_samples_per_second': 433.824, 'eval_steps_per_second': 13.6, 'epoch': 1.0}


[2m[36m(_objective pid=26946)[0m                                                     
[2m[36m(_objective pid=26946)[0m                                                  [A 25%|██▌       | 3110/12440 [01:38<04:11, 37.08it/s]
[2m[36m(_objective pid=26946)[0m 100%|██████████| 130/130 [00:09<00:00, 12.02it/s][A
                                                 [A
 25%|██▌       | 3112/12440 [01:39<2:07:22,  1.22it/s]
 25%|██▌       | 3116/12440 [01:39<1:31:17,  1.70it/s]
 25%|██▌       | 3120/12440 [01:39<1:06:25,  2.34it/s]
 25%|██▌       | 3124/12440 [01:40<47:54,  3.24it/s]  
 25%|██▌       | 3128/12440 [01:40<35:00,  4.43it/s]
 25%|██▌       | 3132/12440 [01:40<25:45,  6.02it/s]
 25%|██▌       | 3136/12440 [01:40<19:23,  8.00it/s]
 25%|██▌       | 3140/12440 [01:40<14:54, 10.40it/s]
 25%|██▌       | 3144/12440 [01:40<11:37, 13.32it/s]
 25%|██▌       | 3148/12440 [01:40<09:19, 16.62it/s]


Trial status: 5 TERMINATED | 1 RUNNING | 14 PENDING
Current time: 2023-09-11 14:02:54. Total running time: 42min 33s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00005   RUNNING          2.59944e-05                    4                    

[2m[36m(_objective pid=26946)[0m  25%|██▌       | 3152/12440 [01:40<07:42, 20.10it/s]
 25%|██▌       | 3156/12440 [01:40<06:40, 23.16it/s]
 25%|██▌       | 3160/12440 [01:41<06:43, 22.98it/s]
 25%|██▌       | 3164/12440 [01:41<06:27, 23.97it/s]
 25%|██▌       | 3168/12440 [01:41<05:55, 26.09it/s]
 25%|██▌       | 3172/12440 [01:41<06:04, 25.41it/s]
 26%|██▌       | 3176/12440 [01:41<05:29, 28.13it/s]
 26%|██▌       | 3180/12440 [01:41<05:01, 30.76it/s]
 26%|██▌       | 3184/12440 [01:41<04:47, 32.17it/s]
 26%|██▌       | 3189/12440 [01:41<04:25, 34.79it/s]
 26%|██▌       | 3193/12440 [01:42<04:36, 33.41it/s]
 26%|██▌       | 3197/12440 [01:42<04:40, 32.95it/s]
 26%|██▌       | 3202/12440 [01:42<04:22, 35.15it/s]
 26%|██▌       | 3206/12440 [01:42<04:14, 36.34it/s]
 26%|██▌       | 3210/12440 [01:42<04:18, 35.68it/s]
 26%|██▌       | 3214/12440 [01:42<04:11, 36.71it/s]
 26%|██▌       | 3218/12440 [01:42<04:06, 37.38it/s]
 26%|██▌       | 3222/12440 [01:42<04:09, 37.01it/s]
 26%|██▌ 

[2m[36m(_objective pid=26946)[0m {'loss': 0.1612, 'learning_rate': 1.868089095541603e-05, 'epoch': 1.13}


[2m[36m(_objective pid=26946)[0m  28%|██▊       | 3503/12440 [01:50<04:03, 36.78it/s]
 28%|██▊       | 3507/12440 [01:50<04:01, 37.02it/s]
 28%|██▊       | 3511/12440 [01:50<04:09, 35.75it/s]
 28%|██▊       | 3515/12440 [01:51<04:17, 34.72it/s]
 28%|██▊       | 3519/12440 [01:51<04:18, 34.49it/s]
 28%|██▊       | 3523/12440 [01:51<04:14, 35.01it/s]
 28%|██▊       | 3527/12440 [01:51<04:40, 31.72it/s]
 28%|██▊       | 3531/12440 [01:51<04:42, 31.53it/s]
 28%|██▊       | 3535/12440 [01:51<04:27, 33.29it/s]
 28%|██▊       | 3539/12440 [01:51<04:33, 32.57it/s]
 28%|██▊       | 3543/12440 [01:51<04:19, 34.33it/s]
 29%|██▊       | 3547/12440 [01:52<04:08, 35.73it/s]
 29%|██▊       | 3551/12440 [01:52<04:02, 36.68it/s]
 29%|██▊       | 3555/12440 [01:52<03:58, 37.18it/s]
 29%|██▊       | 3559/12440 [01:52<03:53, 37.97it/s]
 29%|██▊       | 3563/12440 [01:52<04:01, 36.76it/s]
 29%|██▊       | 3567/12440 [01:52<04:14, 34.89it/s]
 29%|██▊       | 3571/12440 [01:52<04:37, 31.94it/s]
 29%|██▊ 

[2m[36m(_objective pid=26946)[0m {'loss': 0.1375, 'learning_rate': 1.763609839638829e-05, 'epoch': 1.29}


[2m[36m(_objective pid=26946)[0m  32%|███▏      | 4003/12440 [02:05<04:17, 32.80it/s]
 32%|███▏      | 4007/12440 [02:05<04:06, 34.22it/s]
 32%|███▏      | 4011/12440 [02:05<04:24, 31.92it/s]
 32%|███▏      | 4015/12440 [02:05<04:11, 33.55it/s]
 32%|███▏      | 4020/12440 [02:05<03:57, 35.41it/s]
 32%|███▏      | 4025/12440 [02:05<03:49, 36.72it/s]
 32%|███▏      | 4030/12440 [02:05<03:42, 37.78it/s]
 32%|███▏      | 4034/12440 [02:05<03:42, 37.82it/s]
 32%|███▏      | 4039/12440 [02:06<03:36, 38.79it/s]
 32%|███▎      | 4043/12440 [02:06<03:34, 39.10it/s]
 33%|███▎      | 4048/12440 [02:06<03:32, 39.56it/s]
 33%|███▎      | 4052/12440 [02:06<03:33, 39.35it/s]
 33%|███▎      | 4056/12440 [02:06<03:34, 39.06it/s]
 33%|███▎      | 4060/12440 [02:06<03:37, 38.57it/s]
 33%|███▎      | 4064/12440 [02:06<03:35, 38.84it/s]
 33%|███▎      | 4068/12440 [02:06<03:36, 38.64it/s]
 33%|███▎      | 4072/12440 [02:06<03:38, 38.27it/s]
 33%|███▎      | 4076/12440 [02:07<03:39, 38.16it/s]
 33%|███▎

Trial status: 5 TERMINATED | 1 RUNNING | 14 PENDING
Current time: 2023-09-11 14:03:24. Total running time: 43min 3s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00005   RUNNING          2.59944e-05                    4                     

[2m[36m(_objective pid=26946)[0m  34%|███▍      | 4200/12440 [02:10<04:31, 30.37it/s]
 34%|███▍      | 4204/12440 [02:10<04:32, 30.20it/s]
 34%|███▍      | 4209/12440 [02:11<04:08, 33.14it/s]
 34%|███▍      | 4213/12440 [02:11<04:05, 33.58it/s]
 34%|███▍      | 4217/12440 [02:11<04:06, 33.33it/s]
 34%|███▍      | 4221/12440 [02:11<04:05, 33.50it/s]
 34%|███▍      | 4225/12440 [02:11<03:58, 34.39it/s]
 34%|███▍      | 4229/12440 [02:11<04:04, 33.54it/s]
 34%|███▍      | 4233/12440 [02:11<04:01, 34.02it/s]
 34%|███▍      | 4237/12440 [02:11<04:04, 33.51it/s]
 34%|███▍      | 4241/12440 [02:11<04:05, 33.36it/s]
 34%|███▍      | 4245/12440 [02:12<04:29, 30.46it/s]
 34%|███▍      | 4250/12440 [02:12<04:05, 33.40it/s]
 34%|███▍      | 4254/12440 [02:12<03:57, 34.51it/s]
 34%|███▍      | 4259/12440 [02:12<03:44, 36.40it/s]
 34%|███▍      | 4263/12440 [02:12<03:39, 37.19it/s]
 34%|███▍      | 4267/12440 [02:12<03:38, 37.34it/s]
 34%|███▍      | 4271/12440 [02:12<03:38, 37.32it/s]
 34%|███▍

[2m[36m(_objective pid=26946)[0m {'loss': 0.1419, 'learning_rate': 1.6591305837360544e-05, 'epoch': 1.45}


[2m[36m(_objective pid=26946)[0m  36%|███▌      | 4508/12440 [02:19<03:42, 35.60it/s]
 36%|███▋      | 4513/12440 [02:19<03:33, 37.04it/s]
 36%|███▋      | 4517/12440 [02:19<03:30, 37.64it/s]
 36%|███▋      | 4521/12440 [02:19<03:28, 38.04it/s]
 36%|███▋      | 4526/12440 [02:19<03:22, 38.99it/s]
 36%|███▋      | 4530/12440 [02:19<03:24, 38.75it/s]
 36%|███▋      | 4534/12440 [02:19<03:45, 35.00it/s]
 36%|███▋      | 4538/12440 [02:20<03:55, 33.55it/s]
 37%|███▋      | 4542/12440 [02:20<03:46, 34.93it/s]
 37%|███▋      | 4546/12440 [02:20<03:52, 33.89it/s]
 37%|███▋      | 4550/12440 [02:20<03:43, 35.28it/s]
 37%|███▋      | 4555/12440 [02:20<03:33, 36.96it/s]
 37%|███▋      | 4559/12440 [02:20<03:54, 33.56it/s]
 37%|███▋      | 4563/12440 [02:20<03:54, 33.56it/s]
 37%|███▋      | 4567/12440 [02:20<03:47, 34.66it/s]
 37%|███▋      | 4571/12440 [02:20<03:38, 36.04it/s]
 37%|███▋      | 4575/12440 [02:21<03:43, 35.22it/s]
 37%|███▋      | 4579/12440 [02:21<04:00, 32.74it/s]
 37%|███▋

[2m[36m(_objective pid=26946)[0m {'loss': 0.1354, 'learning_rate': 1.5546513278332804e-05, 'epoch': 1.61}


[2m[36m(_objective pid=26946)[0m  40%|████      | 5009/12440 [02:33<03:17, 37.61it/s]
 40%|████      | 5014/12440 [02:33<03:12, 38.66it/s]
 40%|████      | 5018/12440 [02:33<03:24, 36.35it/s]
 40%|████      | 5022/12440 [02:33<03:40, 33.69it/s]
 40%|████      | 5026/12440 [02:34<03:30, 35.17it/s]
 40%|████      | 5030/12440 [02:34<03:45, 32.92it/s]
 40%|████      | 5035/12440 [02:34<03:33, 34.73it/s]
 41%|████      | 5039/12440 [02:34<03:26, 35.90it/s]
 41%|████      | 5044/12440 [02:34<03:26, 35.88it/s]
 41%|████      | 5048/12440 [02:34<03:48, 32.30it/s]
 41%|████      | 5052/12440 [02:34<03:45, 32.80it/s]
 41%|████      | 5056/12440 [02:35<03:56, 31.21it/s]
 41%|████      | 5060/12440 [02:35<03:45, 32.78it/s]
 41%|████      | 5065/12440 [02:35<03:27, 35.50it/s]
 41%|████      | 5069/12440 [02:35<03:28, 35.42it/s]
 41%|████      | 5074/12440 [02:35<03:16, 37.43it/s]
 41%|████      | 5078/12440 [02:35<03:20, 36.68it/s]
 41%|████      | 5083/12440 [02:35<03:13, 37.98it/s]
 41%|████

Trial status: 5 TERMINATED | 1 RUNNING | 14 PENDING
Current time: 2023-09-11 14:03:54. Total running time: 43min 33s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00005   RUNNING          2.59944e-05                    4                    

[2m[36m(_objective pid=26946)[0m  42%|████▏     | 5259/12440 [02:40<03:27, 34.68it/s]
 42%|████▏     | 5263/12440 [02:40<03:21, 35.68it/s]
 42%|████▏     | 5267/12440 [02:41<03:16, 36.47it/s]
 42%|████▏     | 5271/12440 [02:41<03:16, 36.50it/s]
 42%|████▏     | 5275/12440 [02:41<03:11, 37.35it/s]
 42%|████▏     | 5279/12440 [02:41<03:16, 36.43it/s]
 42%|████▏     | 5283/12440 [02:41<03:16, 36.51it/s]
 42%|████▎     | 5287/12440 [02:41<03:19, 35.88it/s]
 43%|████▎     | 5291/12440 [02:41<03:18, 36.06it/s]
 43%|████▎     | 5295/12440 [02:41<03:25, 34.82it/s]
 43%|████▎     | 5299/12440 [02:41<03:21, 35.43it/s]
 43%|████▎     | 5303/12440 [02:42<03:43, 31.87it/s]
 43%|████▎     | 5307/12440 [02:42<04:10, 28.49it/s]
 43%|████▎     | 5311/12440 [02:42<04:01, 29.52it/s]
 43%|████▎     | 5315/12440 [02:42<03:43, 31.81it/s]
 43%|████▎     | 5319/12440 [02:42<03:31, 33.67it/s]
 43%|████▎     | 5323/12440 [02:42<03:25, 34.62it/s]
 43%|████▎     | 5327/12440 [02:42<03:28, 34.15it/s]
 43%|████

[2m[36m(_objective pid=26946)[0m {'loss': 0.1655, 'learning_rate': 1.450172071930506e-05, 'epoch': 1.77}


[2m[36m(_objective pid=26946)[0m  44%|████▍     | 5507/12440 [02:47<03:00, 38.42it/s]
 44%|████▍     | 5512/12440 [02:47<02:56, 39.32it/s]
 44%|████▍     | 5517/12440 [02:48<02:54, 39.60it/s]
 44%|████▍     | 5521/12440 [02:48<03:10, 36.26it/s]
 44%|████▍     | 5526/12440 [02:48<03:03, 37.61it/s]
 44%|████▍     | 5531/12440 [02:48<02:58, 38.78it/s]
 44%|████▍     | 5535/12440 [02:48<02:58, 38.71it/s]
 45%|████▍     | 5539/12440 [02:48<03:10, 36.22it/s]
 45%|████▍     | 5543/12440 [02:48<03:06, 36.95it/s]
 45%|████▍     | 5548/12440 [02:48<02:59, 38.35it/s]
 45%|████▍     | 5552/12440 [02:49<02:59, 38.46it/s]
 45%|████▍     | 5556/12440 [02:49<03:17, 34.85it/s]
 45%|████▍     | 5560/12440 [02:49<03:11, 35.98it/s]
 45%|████▍     | 5564/12440 [02:49<03:09, 36.35it/s]
 45%|████▍     | 5569/12440 [02:49<03:02, 37.66it/s]
 45%|████▍     | 5573/12440 [02:49<03:08, 36.45it/s]
 45%|████▍     | 5578/12440 [02:49<03:00, 38.08it/s]
 45%|████▍     | 5583/12440 [02:49<02:59, 38.31it/s]
 45%|████

[2m[36m(_objective pid=26946)[0m {'loss': 0.1471, 'learning_rate': 1.345692816027732e-05, 'epoch': 1.93}


[2m[36m(_objective pid=26946)[0m  48%|████▊     | 6007/12440 [03:02<02:48, 38.12it/s]
 48%|████▊     | 6011/12440 [03:02<02:54, 36.92it/s]
 48%|████▊     | 6015/12440 [03:02<03:02, 35.21it/s]
 48%|████▊     | 6020/12440 [03:02<02:54, 36.77it/s]
 48%|████▊     | 6024/12440 [03:02<02:55, 36.61it/s]
 48%|████▊     | 6028/12440 [03:02<03:07, 34.14it/s]
 48%|████▊     | 6032/12440 [03:02<03:03, 34.93it/s]
 49%|████▊     | 6037/12440 [03:02<02:55, 36.40it/s]
 49%|████▊     | 6041/12440 [03:03<02:57, 36.08it/s]
 49%|████▊     | 6046/12440 [03:03<02:50, 37.45it/s]
 49%|████▊     | 6050/12440 [03:03<03:11, 33.40it/s]
 49%|████▊     | 6054/12440 [03:03<03:29, 30.54it/s]
 49%|████▊     | 6058/12440 [03:03<03:21, 31.64it/s]
 49%|████▊     | 6062/12440 [03:03<03:10, 33.44it/s]
 49%|████▉     | 6066/12440 [03:03<03:04, 34.47it/s]
 49%|████▉     | 6070/12440 [03:03<03:03, 34.64it/s]
 49%|████▉     | 6074/12440 [03:04<03:08, 33.77it/s]
 49%|████▉     | 6078/12440 [03:04<03:21, 31.58it/s]
 49%|████

Trial status: 5 TERMINATED | 1 RUNNING | 14 PENDING
Current time: 2023-09-11 14:04:24. Total running time: 44min 3s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00005   RUNNING          2.59944e-05                    4                     

[2m[36m(_objective pid=26946)[0m 
 29%|██▉       | 38/130 [00:02<00:05, 15.72it/s][A
[2m[36m(_objective pid=26946)[0m 
 31%|███       | 40/130 [00:02<00:06, 13.80it/s][A
[2m[36m(_objective pid=26946)[0m 
 32%|███▏      | 42/130 [00:02<00:06, 14.23it/s][A
[2m[36m(_objective pid=26946)[0m 
 34%|███▍      | 44/130 [00:03<00:06, 13.95it/s][A
[2m[36m(_objective pid=26946)[0m 
 35%|███▌      | 46/130 [00:03<00:06, 12.69it/s][A
[2m[36m(_objective pid=26946)[0m 
 37%|███▋      | 48/130 [00:03<00:06, 11.78it/s][A
[2m[36m(_objective pid=26946)[0m 
 39%|███▉      | 51/130 [00:03<00:06, 12.89it/s][A
[2m[36m(_objective pid=26946)[0m 
 41%|████      | 53/130 [00:03<00:05, 13.28it/s][A
[2m[36m(_objective pid=26946)[0m 
 42%|████▏     | 55/130 [00:03<00:05, 12.72it/s][A
[2m[36m(_objective pid=26946)[0m 
 44%|████▍     | 57/130 [00:04<00:05, 13.21it/s][A
[2m[36m(_objective pid=26946)[0m 
 45%|████▌     | 59/130 [00:04<00:05, 13.38it/s][A
[2m[36m(_objective 

Trial _objective_f556c_00005 finished iteration 2 at 2023-09-11 14:04:31. Total running time: 44min 10s
+-------------------------------------------------+
| Trial _objective_f556c_00005 result             |
+-------------------------------------------------+
| time_this_iter_s                         99.526 |
| time_total_s                            200.103 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                                0.1675 |
| eval_runtime                             9.5704 |
| eval_samples_per_second                 433.317 |
| eval_steps_per_second                    13.584 |
| objective                                0.1675 |
+-------------------------------------------------+

[2m[36m(_objective pid=26946)[0m {'eval_loss': 0.16750454902648926, 'eval_runtime': 9.5704, 'eval_samples_per_second': 433.317, 'eval_steps_per_second': 13.584, 'epoch': 2.0}


[2m[36m(_objective pid=26946)[0m                                                     
[2m[36m(_objective pid=26946)[0m                                                  [A 50%|█████     | 6220/12440 [03:17<02:51, 36.27it/s]
[2m[36m(_objective pid=26946)[0m 100%|██████████| 130/130 [00:09<00:00, 11.98it/s][A
                                                 [A
 50%|█████     | 6223/12440 [03:19<1:26:09,  1.20it/s]
 50%|█████     | 6227/12440 [03:19<1:01:10,  1.69it/s]
 50%|█████     | 6231/12440 [03:19<43:42,  2.37it/s]  
 50%|█████     | 6235/12440 [03:19<31:23,  3.29it/s]
 50%|█████     | 6239/12440 [03:19<22:59,  4.50it/s]
 50%|█████     | 6243/12440 [03:19<17:10,  6.01it/s]
 50%|█████     | 6247/12440 [03:19<12:54,  7.99it/s]
 50%|█████     | 6251/12440 [03:19<09:58, 10.34it/s]
 50%|█████     | 6255/12440 [03:19<07:47, 13.24it/s]
 50%|█████     | 6259/12440 [03:20<06:16, 16.40it/s]
 50%|█████     | 6263/12440 [03:20<05:18, 19.41it/s]
 50%|█████     | 6267/12440 [03:20

[2m[36m(_objective pid=26946)[0m {'loss': 0.1234, 'learning_rate': 1.2412135601249577e-05, 'epoch': 2.09}


 52%|█████▏    | 6509/12440 [03:27<02:42, 36.42it/s]
 52%|█████▏    | 6513/12440 [03:27<02:46, 35.56it/s]
 52%|█████▏    | 6517/12440 [03:27<02:59, 32.95it/s]
 52%|█████▏    | 6521/12440 [03:27<02:54, 33.91it/s]
 52%|█████▏    | 6525/12440 [03:27<03:12, 30.80it/s]
 52%|█████▏    | 6529/12440 [03:27<03:17, 29.91it/s]
 53%|█████▎    | 6533/12440 [03:27<03:03, 32.15it/s]
 53%|█████▎    | 6537/12440 [03:27<02:55, 33.56it/s]
 53%|█████▎    | 6541/12440 [03:28<02:50, 34.54it/s]
 53%|█████▎    | 6545/12440 [03:28<02:47, 35.14it/s]
 53%|█████▎    | 6549/12440 [03:28<02:44, 35.91it/s]
 53%|█████▎    | 6553/12440 [03:28<02:51, 34.26it/s]
 53%|█████▎    | 6557/12440 [03:28<02:44, 35.73it/s]
 53%|█████▎    | 6561/12440 [03:28<02:48, 34.95it/s]
 53%|█████▎    | 6565/12440 [03:28<02:42, 36.22it/s]
 53%|█████▎    | 6569/12440 [03:28<02:39, 36.74it/s]
 53%|█████▎    | 6573/12440 [03:28<02:42, 36.17it/s]
 53%|█████▎    | 6577/12440 [03:29<02:38, 36.96it/s]
 53%|█████▎    | 6581/12440 [03:29<02:40, 36.5

Trial status: 5 TERMINATED | 1 RUNNING | 14 PENDING
Current time: 2023-09-11 14:04:54. Total running time: 44min 33s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00005   RUNNING          2.59944e-05                    4                    

[2m[36m(_objective pid=26946)[0m  56%|█████▌    | 6995/12440 [03:40<02:27, 36.97it/s]
[2m[36m(_objective pid=26946)[0m  56%|█████▋    | 7000/12440 [03:41<02:25, 37.30it/s]                                                     56%|█████▋    | 7000/12440 [03:41<02:25, 37.30it/s]


[2m[36m(_objective pid=26946)[0m {'loss': 0.0825, 'learning_rate': 1.1367343042221834e-05, 'epoch': 2.25}


[2m[36m(_objective pid=26946)[0m  56%|█████▋    | 7004/12440 [03:41<02:34, 35.22it/s]
 56%|█████▋    | 7008/12440 [03:41<02:44, 33.10it/s]
 56%|█████▋    | 7012/12440 [03:41<02:39, 34.11it/s]
 56%|█████▋    | 7016/12440 [03:41<02:33, 35.35it/s]
 56%|█████▋    | 7021/12440 [03:41<02:25, 37.25it/s]
 56%|█████▋    | 7026/12440 [03:41<02:21, 38.39it/s]
 57%|█████▋    | 7030/12440 [03:41<02:35, 34.79it/s]
 57%|█████▋    | 7034/12440 [03:42<02:35, 34.83it/s]
 57%|█████▋    | 7039/12440 [03:42<02:29, 36.20it/s]
 57%|█████▋    | 7044/12440 [03:42<02:23, 37.52it/s]
 57%|█████▋    | 7048/12440 [03:42<02:24, 37.24it/s]
 57%|█████▋    | 7052/12440 [03:42<02:25, 36.90it/s]
 57%|█████▋    | 7056/12440 [03:42<02:26, 36.63it/s]
 57%|█████▋    | 7060/12440 [03:42<02:25, 37.00it/s]
 57%|█████▋    | 7065/12440 [03:42<02:21, 38.08it/s]
 57%|█████▋    | 7069/12440 [03:42<02:37, 34.00it/s]
 57%|█████▋    | 7073/12440 [03:43<02:36, 34.35it/s]
 57%|█████▋    | 7077/12440 [03:43<02:35, 34.48it/s]
 57%|████

[2m[36m(_objective pid=26946)[0m {'loss': 0.0751, 'learning_rate': 1.0322550483194092e-05, 'epoch': 2.41}


[2m[36m(_objective pid=26946)[0m  60%|██████    | 7506/12440 [03:55<02:10, 37.81it/s]
 60%|██████    | 7510/12440 [03:55<02:10, 37.83it/s]
 60%|██████    | 7515/12440 [03:55<02:06, 38.92it/s]
 60%|██████    | 7519/12440 [03:56<02:06, 38.87it/s]
 60%|██████    | 7523/12440 [03:56<02:14, 36.68it/s]
 61%|██████    | 7527/12440 [03:56<02:20, 34.95it/s]
 61%|██████    | 7531/12440 [03:56<02:15, 36.26it/s]
 61%|██████    | 7535/12440 [03:56<02:27, 33.35it/s]
 61%|██████    | 7539/12440 [03:56<02:35, 31.58it/s]
 61%|██████    | 7543/12440 [03:56<02:30, 32.45it/s]
 61%|██████    | 7547/12440 [03:56<02:22, 34.33it/s]
 61%|██████    | 7551/12440 [03:57<02:31, 32.30it/s]
 61%|██████    | 7556/12440 [03:57<02:20, 34.78it/s]
 61%|██████    | 7560/12440 [03:57<02:19, 34.98it/s]
 61%|██████    | 7564/12440 [03:57<02:16, 35.71it/s]
 61%|██████    | 7568/12440 [03:57<02:13, 36.38it/s]
 61%|██████    | 7572/12440 [03:57<02:18, 35.10it/s]
 61%|██████    | 7576/12440 [03:57<02:17, 35.30it/s]
 61%|████

[2m[36m(_objective pid=26946)[0m {'loss': 0.0884, 'learning_rate': 9.27775792416635e-06, 'epoch': 2.57}


[2m[36m(_objective pid=26946)[0m  64%|██████▍   | 8004/12440 [04:09<02:19, 31.73it/s]
 64%|██████▍   | 8008/12440 [04:10<02:14, 33.01it/s]
 64%|██████▍   | 8012/12440 [04:10<02:23, 30.92it/s]
 64%|██████▍   | 8016/12440 [04:10<02:13, 33.04it/s]
 64%|██████▍   | 8020/12440 [04:10<02:08, 34.38it/s]
 65%|██████▍   | 8024/12440 [04:10<02:19, 31.71it/s]
 65%|██████▍   | 8028/12440 [04:10<02:25, 30.34it/s]
 65%|██████▍   | 8032/12440 [04:10<02:17, 32.02it/s]
 65%|██████▍   | 8036/12440 [04:10<02:13, 33.01it/s]


Trial status: 5 TERMINATED | 1 RUNNING | 14 PENDING
Current time: 2023-09-11 14:05:24. Total running time: 45min 3s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00005   RUNNING          2.59944e-05                    4                     

[2m[36m(_objective pid=26946)[0m  65%|██████▍   | 8041/12440 [04:11<02:05, 35.09it/s]
 65%|██████▍   | 8045/12440 [04:11<02:01, 36.31it/s]
 65%|██████▍   | 8049/12440 [04:11<02:01, 36.20it/s]
 65%|██████▍   | 8053/12440 [04:11<02:02, 35.78it/s]
 65%|██████▍   | 8057/12440 [04:11<02:12, 33.15it/s]
 65%|██████▍   | 8061/12440 [04:11<02:22, 30.63it/s]
 65%|██████▍   | 8065/12440 [04:11<02:14, 32.48it/s]
 65%|██████▍   | 8070/12440 [04:11<02:05, 34.91it/s]
 65%|██████▍   | 8074/12440 [04:11<02:01, 36.04it/s]
 65%|██████▍   | 8078/12440 [04:12<01:58, 36.69it/s]
 65%|██████▍   | 8082/12440 [04:12<02:02, 35.68it/s]
 65%|██████▌   | 8086/12440 [04:12<01:58, 36.74it/s]
 65%|██████▌   | 8090/12440 [04:12<02:01, 35.94it/s]
 65%|██████▌   | 8094/12440 [04:12<02:07, 34.03it/s]
 65%|██████▌   | 8098/12440 [04:12<02:03, 35.12it/s]
 65%|██████▌   | 8102/12440 [04:12<02:12, 32.76it/s]
 65%|██████▌   | 8106/12440 [04:12<02:18, 31.25it/s]
 65%|██████▌   | 8111/12440 [04:13<02:07, 33.86it/s]
 65%|████

[2m[36m(_objective pid=26946)[0m {'loss': 0.0819, 'learning_rate': 8.232965365138609e-06, 'epoch': 2.73}


 68%|██████▊   | 8510/12440 [04:24<01:44, 37.66it/s]
 68%|██████▊   | 8514/12440 [04:24<01:46, 36.75it/s]
 68%|██████▊   | 8518/12440 [04:24<01:52, 34.95it/s]
 69%|██████▊   | 8523/12440 [04:24<01:51, 35.07it/s]
 69%|██████▊   | 8528/12440 [04:24<01:46, 36.81it/s]
 69%|██████▊   | 8532/12440 [04:25<01:44, 37.54it/s]
 69%|██████▊   | 8536/12440 [04:25<01:43, 37.60it/s]
 69%|██████▊   | 8540/12440 [04:25<01:43, 37.68it/s]
 69%|██████▊   | 8544/12440 [04:25<01:42, 37.84it/s]
 69%|██████▊   | 8549/12440 [04:25<01:48, 35.78it/s]
 69%|██████▉   | 8553/12440 [04:25<01:56, 33.41it/s]
 69%|██████▉   | 8558/12440 [04:25<01:51, 34.71it/s]
 69%|██████▉   | 8562/12440 [04:25<01:50, 35.10it/s]
 69%|██████▉   | 8566/12440 [04:26<01:50, 34.99it/s]
 69%|██████▉   | 8571/12440 [04:26<01:45, 36.78it/s]
 69%|██████▉   | 8575/12440 [04:26<01:44, 37.08it/s]
 69%|██████▉   | 8579/12440 [04:26<01:43, 37.38it/s]
 69%|██████▉   | 8584/12440 [04:26<01:39, 38.94it/s]
 69%|██████▉   | 8588/12440 [04:26<01:51, 34.4

[2m[36m(_objective pid=26946)[0m {'loss': 0.1028, 'learning_rate': 7.188172806110866e-06, 'epoch': 2.89}


[2m[36m(_objective pid=26946)[0m  72%|███████▏  | 9004/12440 [04:38<01:41, 33.73it/s]
 72%|███████▏  | 9008/12440 [04:38<01:37, 35.30it/s]
 72%|███████▏  | 9012/12440 [04:39<01:45, 32.41it/s]
 72%|███████▏  | 9017/12440 [04:39<01:38, 34.87it/s]
 73%|███████▎  | 9021/12440 [04:39<01:42, 33.35it/s]
 73%|███████▎  | 9025/12440 [04:39<01:38, 34.74it/s]
 73%|███████▎  | 9030/12440 [04:39<01:38, 34.77it/s]
 73%|███████▎  | 9034/12440 [04:39<01:35, 35.73it/s]
 73%|███████▎  | 9039/12440 [04:39<01:31, 37.23it/s]
 73%|███████▎  | 9043/12440 [04:39<01:29, 37.76it/s]
 73%|███████▎  | 9047/12440 [04:40<01:33, 36.48it/s]
 73%|███████▎  | 9051/12440 [04:40<01:32, 36.82it/s]
 73%|███████▎  | 9056/12440 [04:40<01:30, 37.31it/s]
 73%|███████▎  | 9060/12440 [04:40<01:37, 34.54it/s]
 73%|███████▎  | 9064/12440 [04:40<01:36, 35.02it/s]
 73%|███████▎  | 9068/12440 [04:40<01:35, 35.43it/s]


Trial status: 5 TERMINATED | 1 RUNNING | 14 PENDING
Current time: 2023-09-11 14:05:54. Total running time: 45min 33s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00005   RUNNING          2.59944e-05                    4                    

[2m[36m(_objective pid=26946)[0m  73%|███████▎  | 9072/12440 [04:40<01:42, 32.89it/s]
 73%|███████▎  | 9076/12440 [04:40<01:40, 33.50it/s]
 73%|███████▎  | 9080/12440 [04:41<01:37, 34.35it/s]
 73%|███████▎  | 9084/12440 [04:41<01:34, 35.69it/s]
 73%|███████▎  | 9088/12440 [04:41<01:31, 36.60it/s]
 73%|███████▎  | 9092/12440 [04:41<01:30, 37.03it/s]
 73%|███████▎  | 9096/12440 [04:41<01:28, 37.68it/s]
 73%|███████▎  | 9100/12440 [04:41<01:43, 32.24it/s]
 73%|███████▎  | 9104/12440 [04:41<01:38, 34.02it/s]
 73%|███████▎  | 9108/12440 [04:41<01:40, 33.12it/s]
 73%|███████▎  | 9112/12440 [04:41<01:45, 31.43it/s]
 73%|███████▎  | 9116/12440 [04:42<01:39, 33.46it/s]
 73%|███████▎  | 9120/12440 [04:42<01:34, 35.19it/s]
 73%|███████▎  | 9124/12440 [04:42<01:31, 36.06it/s]
 73%|███████▎  | 9128/12440 [04:42<01:29, 37.08it/s]
 73%|███████▎  | 9132/12440 [04:42<01:27, 37.66it/s]
 73%|███████▎  | 9136/12440 [04:42<01:27, 37.63it/s]
 73%|███████▎  | 9140/12440 [04:42<01:26, 38.06it/s]
 74%|████

Trial _objective_f556c_00005 finished iteration 3 at 2023-09-11 14:06:11. Total running time: 45min 49s
+-------------------------------------------------+
| Trial _objective_f556c_00005 result             |
+-------------------------------------------------+
| time_this_iter_s                        99.9147 |
| time_total_s                            300.018 |
| training_iteration                            3 |
| epoch                                         3 |
| eval_loss                               0.22769 |
| eval_runtime                             9.5861 |
| eval_samples_per_second                 432.605 |
| eval_steps_per_second                    13.561 |
| objective                               0.22769 |
+-------------------------------------------------+

[2m[36m(_objective pid=26946)[0m {'eval_loss': 0.2276928424835205, 'eval_runtime': 9.5861, 'eval_samples_per_second': 432.605, 'eval_steps_per_second': 13.561, 'epoch': 3.0}


[2m[36m(_objective pid=26946)[0m  75%|███████▌  | 9334/12440 [04:59<43:10,  1.20it/s]
 75%|███████▌  | 9337/12440 [04:59<33:08,  1.56it/s]
 75%|███████▌  | 9341/12440 [04:59<23:07,  2.23it/s]
 75%|███████▌  | 9345/12440 [04:59<16:27,  3.14it/s]
 75%|███████▌  | 9349/12440 [04:59<11:48,  4.36it/s]
 75%|███████▌  | 9353/12440 [04:59<08:37,  5.96it/s]
 75%|███████▌  | 9357/12440 [04:59<06:25,  8.01it/s]
 75%|███████▌  | 9361/12440 [04:59<04:58, 10.32it/s]
 75%|███████▌  | 9365/12440 [04:59<03:51, 13.27it/s]
 75%|███████▌  | 9369/12440 [05:00<03:13, 15.86it/s]
 75%|███████▌  | 9373/12440 [05:00<02:39, 19.27it/s]
 75%|███████▌  | 9377/12440 [05:00<02:15, 22.67it/s]
 75%|███████▌  | 9381/12440 [05:00<01:59, 25.64it/s]
 75%|███████▌  | 9386/12440 [05:00<01:43, 29.43it/s]
 75%|███████▌  | 9390/12440 [05:00<01:35, 31.79it/s]
 76%|███████▌  | 9394/12440 [05:00<01:41, 30.14it/s]
 76%|███████▌  | 9398/12440 [05:00<01:34, 32.15it/s]
 76%|███████▌  | 9402/12440 [05:00<01:36, 31.48it/s]
 76%|████

[2m[36m(_objective pid=26946)[0m {'loss': 0.0804, 'learning_rate': 6.143380247083124e-06, 'epoch': 3.05}


[2m[36m(_objective pid=26946)[0m  76%|███████▋  | 9502/12440 [05:03<01:16, 38.44it/s]
 76%|███████▋  | 9506/12440 [05:03<01:19, 36.98it/s]
 76%|███████▋  | 9510/12440 [05:03<01:18, 37.45it/s]
 76%|███████▋  | 9515/12440 [05:03<01:16, 38.40it/s]
 77%|███████▋  | 9519/12440 [05:04<01:16, 38.11it/s]
 77%|███████▋  | 9523/12440 [05:04<01:15, 38.40it/s]
 77%|███████▋  | 9528/12440 [05:04<01:15, 38.82it/s]
 77%|███████▋  | 9532/12440 [05:04<01:15, 38.69it/s]
 77%|███████▋  | 9537/12440 [05:04<01:13, 39.56it/s]
 77%|███████▋  | 9541/12440 [05:04<01:14, 38.98it/s]
 77%|███████▋  | 9545/12440 [05:04<01:18, 36.68it/s]
 77%|███████▋  | 9549/12440 [05:04<01:24, 34.35it/s]
 77%|███████▋  | 9553/12440 [05:04<01:22, 34.94it/s]
 77%|███████▋  | 9557/12440 [05:05<01:23, 34.65it/s]
 77%|███████▋  | 9561/12440 [05:05<01:20, 35.68it/s]
 77%|███████▋  | 9565/12440 [05:05<01:19, 36.08it/s]
 77%|███████▋  | 9569/12440 [05:05<01:20, 35.63it/s]
 77%|███████▋  | 9573/12440 [05:05<01:19, 36.11it/s]
 77%|████

Trial status: 5 TERMINATED | 1 RUNNING | 14 PENDING
Current time: 2023-09-11 14:06:24. Total running time: 46min 3s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00005   RUNNING          2.59944e-05                    4                     

[2m[36m(_objective pid=26946)[0m  78%|███████▊  | 9760/12440 [05:10<01:18, 34.15it/s]
 78%|███████▊  | 9764/12440 [05:11<01:17, 34.63it/s]
 79%|███████▊  | 9768/12440 [05:11<01:18, 34.13it/s]
 79%|███████▊  | 9772/12440 [05:11<01:20, 33.15it/s]
 79%|███████▊  | 9776/12440 [05:11<01:20, 32.94it/s]
 79%|███████▊  | 9780/12440 [05:11<01:19, 33.27it/s]
 79%|███████▊  | 9784/12440 [05:11<01:25, 31.03it/s]
 79%|███████▊  | 9788/12440 [05:11<01:22, 32.26it/s]
 79%|███████▊  | 9793/12440 [05:11<01:15, 35.05it/s]
 79%|███████▉  | 9797/12440 [05:12<01:13, 35.90it/s]
 79%|███████▉  | 9801/12440 [05:12<01:13, 35.75it/s]
 79%|███████▉  | 9805/12440 [05:12<01:12, 36.57it/s]
 79%|███████▉  | 9810/12440 [05:12<01:09, 37.93it/s]
 79%|███████▉  | 9814/12440 [05:12<01:15, 34.61it/s]
 79%|███████▉  | 9819/12440 [05:12<01:12, 36.28it/s]
 79%|███████▉  | 9823/12440 [05:12<01:13, 35.50it/s]
 79%|███████▉  | 9827/12440 [05:12<01:20, 32.33it/s]
 79%|███████▉  | 9831/12440 [05:13<01:18, 33.36it/s]
 79%|████

[2m[36m(_objective pid=26946)[0m {'loss': 0.0673, 'learning_rate': 5.0985876880553816e-06, 'epoch': 3.22}


 80%|████████  | 10006/12440 [05:17<01:11, 33.88it/s]
 80%|████████  | 10010/12440 [05:18<01:15, 32.12it/s]
 80%|████████  | 10014/12440 [05:18<01:19, 30.59it/s]
 81%|████████  | 10018/12440 [05:18<01:16, 31.64it/s]
 81%|████████  | 10022/12440 [05:18<01:12, 33.46it/s]
 81%|████████  | 10027/12440 [05:18<01:07, 35.53it/s]
 81%|████████  | 10031/12440 [05:18<01:13, 32.77it/s]
 81%|████████  | 10035/12440 [05:18<01:11, 33.73it/s]
 81%|████████  | 10039/12440 [05:18<01:10, 34.03it/s]
 81%|████████  | 10043/12440 [05:19<01:12, 33.14it/s]
 81%|████████  | 10047/12440 [05:19<01:09, 34.35it/s]
 81%|████████  | 10051/12440 [05:19<01:15, 31.81it/s]
 81%|████████  | 10055/12440 [05:19<01:11, 33.31it/s]
 81%|████████  | 10059/12440 [05:19<01:08, 34.77it/s]
 81%|████████  | 10064/12440 [05:19<01:04, 36.63it/s]
 81%|████████  | 10068/12440 [05:19<01:05, 36.33it/s]
 81%|████████  | 10072/12440 [05:19<01:06, 35.54it/s]
 81%|████████  | 10076/12440 [05:19<01:06, 35.67it/s]
 81%|████████  | 10080/12440

[2m[36m(_objective pid=26946)[0m {'loss': 0.0624, 'learning_rate': 4.053795129027639e-06, 'epoch': 3.38}


[2m[36m(_objective pid=26946)[0m  84%|████████▍ | 10505/12440 [05:32<00:54, 35.64it/s]
 84%|████████▍ | 10509/12440 [05:32<00:53, 36.36it/s]
 85%|████████▍ | 10513/12440 [05:32<00:55, 34.46it/s]
 85%|████████▍ | 10517/12440 [05:32<00:53, 35.67it/s]
 85%|████████▍ | 10522/12440 [05:32<00:51, 37.23it/s]
 85%|████████▍ | 10526/12440 [05:32<00:51, 37.21it/s]
 85%|████████▍ | 10531/12440 [05:32<00:52, 36.57it/s]
 85%|████████▍ | 10535/12440 [05:32<00:53, 35.31it/s]
 85%|████████▍ | 10539/12440 [05:33<00:57, 33.21it/s]
 85%|████████▍ | 10543/12440 [05:33<00:54, 34.70it/s]
 85%|████████▍ | 10547/12440 [05:33<00:52, 35.86it/s]
 85%|████████▍ | 10551/12440 [05:33<01:02, 30.36it/s]
 85%|████████▍ | 10556/12440 [05:33<00:56, 33.24it/s]
 85%|████████▍ | 10560/12440 [05:33<01:00, 31.15it/s]
 85%|████████▍ | 10565/12440 [05:33<00:55, 34.04it/s]
 85%|████████▍ | 10569/12440 [05:33<00:53, 35.24it/s]
 85%|████████▌ | 10574/12440 [05:34<00:50, 37.00it/s]
 85%|████████▌ | 10578/12440 [05:34<00:50, 36

Trial status: 5 TERMINATED | 1 RUNNING | 14 PENDING
Current time: 2023-09-11 14:06:54. Total running time: 46min 33s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00005   RUNNING          2.59944e-05                    4                    

[2m[36m(_objective pid=26946)[0m  87%|████████▋ | 10811/12440 [05:40<00:44, 36.52it/s]
 87%|████████▋ | 10815/12440 [05:40<00:44, 36.54it/s]
 87%|████████▋ | 10819/12440 [05:41<00:44, 36.83it/s]
 87%|████████▋ | 10823/12440 [05:41<00:43, 37.56it/s]
 87%|████████▋ | 10827/12440 [05:41<00:43, 37.35it/s]
 87%|████████▋ | 10831/12440 [05:41<00:42, 37.44it/s]
 87%|████████▋ | 10835/12440 [05:41<00:45, 35.34it/s]
 87%|████████▋ | 10839/12440 [05:41<00:46, 34.47it/s]
 87%|████████▋ | 10843/12440 [05:41<00:45, 35.09it/s]
 87%|████████▋ | 10847/12440 [05:41<00:44, 36.17it/s]
 87%|████████▋ | 10851/12440 [05:41<00:44, 35.40it/s]
 87%|████████▋ | 10855/12440 [05:42<00:44, 35.99it/s]
 87%|████████▋ | 10859/12440 [05:42<00:43, 36.72it/s]
 87%|████████▋ | 10864/12440 [05:42<00:41, 37.99it/s]
 87%|████████▋ | 10869/12440 [05:42<00:45, 34.80it/s]
 87%|████████▋ | 10873/12440 [05:42<00:44, 34.99it/s]
 87%|████████▋ | 10877/12440 [05:42<00:43, 35.75it/s]
 87%|████████▋ | 10881/12440 [05:42<00:44, 35

[2m[36m(_objective pid=26946)[0m {'loss': 0.0602, 'learning_rate': 3.009002569999897e-06, 'epoch': 3.54}


[2m[36m(_objective pid=26946)[0m  88%|████████▊ | 11005/12440 [05:46<00:38, 37.72it/s]
 89%|████████▊ | 11010/12440 [05:46<00:37, 38.54it/s]
 89%|████████▊ | 11015/12440 [05:46<00:36, 39.28it/s]
 89%|████████▊ | 11020/12440 [05:46<00:35, 39.56it/s]
 89%|████████▊ | 11025/12440 [05:46<00:36, 38.70it/s]
 89%|████████▊ | 11029/12440 [05:46<00:44, 31.99it/s]
 89%|████████▊ | 11033/12440 [05:47<00:41, 33.84it/s]
 89%|████████▊ | 11038/12440 [05:47<00:39, 35.84it/s]
 89%|████████▉ | 11042/12440 [05:47<00:38, 36.63it/s]
 89%|████████▉ | 11046/12440 [05:47<00:37, 37.30it/s]
 89%|████████▉ | 11050/12440 [05:47<00:37, 37.22it/s]
 89%|████████▉ | 11055/12440 [05:47<00:36, 38.44it/s]
 89%|████████▉ | 11059/12440 [05:47<00:35, 38.66it/s]
 89%|████████▉ | 11063/12440 [05:47<00:39, 34.58it/s]
 89%|████████▉ | 11067/12440 [05:48<00:41, 32.92it/s]
 89%|████████▉ | 11071/12440 [05:48<00:39, 34.36it/s]
 89%|████████▉ | 11075/12440 [05:48<00:38, 35.62it/s]
 89%|████████▉ | 11079/12440 [05:48<00:41, 32

[2m[36m(_objective pid=26946)[0m {'loss': 0.0556, 'learning_rate': 1.9642100109721552e-06, 'epoch': 3.7}


[2m[36m(_objective pid=26946)[0m  93%|█████████▎| 11510/12440 [06:00<00:24, 37.53it/s]
 93%|█████████▎| 11514/12440 [06:00<00:24, 37.76it/s]
 93%|█████████▎| 11518/12440 [06:00<00:27, 33.56it/s]
 93%|█████████▎| 11522/12440 [06:01<00:26, 34.96it/s]
 93%|█████████▎| 11527/12440 [06:01<00:24, 36.77it/s]
 93%|█████████▎| 11532/12440 [06:01<00:23, 37.99it/s]
 93%|█████████▎| 11536/12440 [06:01<00:23, 38.52it/s]
 93%|█████████▎| 11541/12440 [06:01<00:22, 39.19it/s]
 93%|█████████▎| 11546/12440 [06:01<00:22, 39.33it/s]
 93%|█████████▎| 11551/12440 [06:01<00:22, 39.81it/s]
 93%|█████████▎| 11555/12440 [06:01<00:22, 39.67it/s]
 93%|█████████▎| 11559/12440 [06:01<00:22, 39.40it/s]
 93%|█████████▎| 11563/12440 [06:02<00:22, 39.32it/s]
 93%|█████████▎| 11568/12440 [06:02<00:21, 39.68it/s]
 93%|█████████▎| 11572/12440 [06:02<00:24, 34.92it/s]
 93%|█████████▎| 11576/12440 [06:02<00:24, 35.93it/s]
 93%|█████████▎| 11581/12440 [06:02<00:24, 34.75it/s]
 93%|█████████▎| 11585/12440 [06:02<00:24, 35

Trial status: 5 TERMINATED | 1 RUNNING | 14 PENDING
Current time: 2023-09-11 14:07:24. Total running time: 47min 3s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00005   RUNNING          2.59944e-05                    4                     

[2m[36m(_objective pid=26946)[0m  95%|█████████▌| 11858/12440 [06:10<00:18, 30.87it/s]
 95%|█████████▌| 11862/12440 [06:11<00:19, 29.55it/s]
 95%|█████████▌| 11866/12440 [06:11<00:18, 31.14it/s]
 95%|█████████▌| 11870/12440 [06:11<00:17, 32.18it/s]
 95%|█████████▌| 11874/12440 [06:11<00:17, 32.69it/s]
 95%|█████████▌| 11878/12440 [06:11<00:16, 34.27it/s]
 96%|█████████▌| 11882/12440 [06:11<00:16, 34.42it/s]
 96%|█████████▌| 11886/12440 [06:11<00:15, 34.90it/s]
 96%|█████████▌| 11890/12440 [06:11<00:16, 34.20it/s]
 96%|█████████▌| 11894/12440 [06:12<00:15, 34.82it/s]
 96%|█████████▌| 11898/12440 [06:12<00:15, 33.98it/s]
 96%|█████████▌| 11902/12440 [06:12<00:15, 34.12it/s]
 96%|█████████▌| 11906/12440 [06:12<00:16, 32.96it/s]
 96%|█████████▌| 11910/12440 [06:12<00:16, 32.55it/s]
 96%|█████████▌| 11914/12440 [06:12<00:17, 30.48it/s]
 96%|█████████▌| 11919/12440 [06:12<00:15, 32.72it/s]
 96%|█████████▌| 11923/12440 [06:12<00:17, 29.84it/s]
 96%|█████████▌| 11927/12440 [06:13<00:16, 31

[2m[36m(_objective pid=26946)[0m {'loss': 0.0504, 'learning_rate': 9.194174519444131e-07, 'epoch': 3.86}


[2m[36m(_objective pid=26946)[0m  97%|█████████▋| 12006/12440 [06:15<00:12, 33.95it/s]
 97%|█████████▋| 12010/12440 [06:15<00:13, 31.64it/s]
 97%|█████████▋| 12014/12440 [06:15<00:12, 33.59it/s]
 97%|█████████▋| 12018/12440 [06:15<00:12, 34.81it/s]
 97%|█████████▋| 12022/12440 [06:15<00:12, 34.74it/s]
 97%|█████████▋| 12026/12440 [06:15<00:13, 31.59it/s]
 97%|█████████▋| 12030/12440 [06:16<00:13, 30.03it/s]
 97%|█████████▋| 12034/12440 [06:16<00:12, 32.29it/s]
 97%|█████████▋| 12038/12440 [06:16<00:11, 33.91it/s]
 97%|█████████▋| 12042/12440 [06:16<00:11, 35.11it/s]
 97%|█████████▋| 12046/12440 [06:16<00:10, 35.87it/s]
 97%|█████████▋| 12050/12440 [06:16<00:10, 36.61it/s]
 97%|█████████▋| 12054/12440 [06:16<00:10, 37.43it/s]
 97%|█████████▋| 12058/12440 [06:16<00:10, 35.54it/s]
 97%|█████████▋| 12062/12440 [06:16<00:10, 34.57it/s]
 97%|█████████▋| 12066/12440 [06:17<00:10, 36.02it/s]
 97%|█████████▋| 12070/12440 [06:17<00:10, 36.34it/s]
 97%|█████████▋| 12074/12440 [06:17<00:09, 37

Trial _objective_f556c_00005 finished iteration 4 at 2023-09-11 14:07:51. Total running time: 47min 29s
+-------------------------------------------------+
| Trial _objective_f556c_00005 result             |
+-------------------------------------------------+
| time_this_iter_s                        99.8862 |
| time_total_s                            399.904 |
| training_iteration                            4 |
| epoch                                         4 |
| eval_loss                               0.24208 |
| eval_runtime                             9.5794 |
| eval_samples_per_second                 432.909 |
| eval_steps_per_second                    13.571 |
| objective                               0.24208 |
+-------------------------------------------------+

[2m[36m(_objective pid=26946)[0m {'eval_loss': 0.24207715690135956, 'eval_runtime': 9.5794, 'eval_samples_per_second': 432.909, 'eval_steps_per_second': 13.571, 'epoch': 4.0}


[2m[36m(_objective pid=26946)[0m                                                      
[2m[36m(_objective pid=26946)[0m                                                  [A100%|██████████| 12440/12440 [06:37<00:00, 34.91it/s]
[2m[36m(_objective pid=26946)[0m 100%|██████████| 130/130 [00:09<00:00, 12.00it/s][A
[2m[36m(_objective pid=26946)[0m                                                  [A


Trial _objective_f556c_00005 completed after 4 iterations at 2023-09-11 14:07:52. Total running time: 47min 31s

[2m[36m(_objective pid=26946)[0m {'train_runtime': 399.062, 'train_samples_per_second': 124.682, 'train_steps_per_second': 31.173, 'train_loss': 0.14316726282861839, 'epoch': 4.0}


[2m[36m(_objective pid=26946)[0m                                                      100%|██████████| 12440/12440 [06:39<00:00, 34.91it/s]100%|██████████| 12440/12440 [06:39<00:00, 31.17it/s]


Trial status: 6 TERMINATED | 14 PENDING
Current time: 2023-09-11 14:07:54. Total running time: 47min 33s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00000   TERMINATED       2.49816e-05                    5                        2       



Trial _objective_f556c_00006 started with configuration:
+-------------------------------------------------+
| Trial _objective_f556c_00006 config             |
+-------------------------------------------------+
| adam_epsilon                                  0 |
| learning_rate                             4e-05 |
| num_train_epochs                              3 |
| per_device_eval_batch_size                   32 |
| per_device_train_batch_size                   4 |
| weight_decay                            0.15427 |
+-------------------------------------------------+



[2m[36m(_objective pid=28725)[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_transform.bias']
[2m[36m(_objective pid=28725)[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
[2m[36m(_objective pid=28725)[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[2m[36m(_objective pid=28725)[0m Some weights of DistilBertForSequenceClassification were not initialized from the model che

[2m[36m(_objective pid=28725)[0m {'loss': 0.4135, 'learning_rate': 3.918801174319533e-05, 'epoch': 0.16}


[2m[36m(_objective pid=28725)[0m   5%|▌         | 507/9330 [00:15<04:26, 33.08it/s]
  5%|▌         | 511/9330 [00:15<04:25, 33.24it/s]
  6%|▌         | 515/9330 [00:15<04:17, 34.23it/s]
  6%|▌         | 519/9330 [00:15<04:08, 35.45it/s]
  6%|▌         | 523/9330 [00:15<04:01, 36.40it/s]
  6%|▌         | 527/9330 [00:15<03:59, 36.80it/s]
  6%|▌         | 531/9330 [00:15<03:56, 37.24it/s]
  6%|▌         | 535/9330 [00:16<04:11, 34.96it/s]
  6%|▌         | 539/9330 [00:16<04:24, 33.29it/s]
  6%|▌         | 543/9330 [00:16<04:11, 34.92it/s]
  6%|▌         | 547/9330 [00:16<04:09, 35.27it/s]
  6%|▌         | 551/9330 [00:16<04:02, 36.16it/s]
  6%|▌         | 555/9330 [00:16<04:01, 36.37it/s]
  6%|▌         | 559/9330 [00:16<04:00, 36.46it/s]
  6%|▌         | 563/9330 [00:16<03:57, 36.90it/s]
  6%|▌         | 567/9330 [00:16<03:52, 37.62it/s]
  6%|▌         | 571/9330 [00:17<03:54, 37.32it/s]
  6%|▌         | 575/9330 [00:17<03:55, 37.10it/s]
  6%|▌         | 579/9330 [00:17<03:52, 37.63

Trial status: 6 TERMINATED | 1 RUNNING | 13 PENDING
Current time: 2023-09-11 14:08:24. Total running time: 48min 3s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00006   RUNNING          4.1407e-05                     3                     

[2m[36m(_objective pid=28725)[0m   8%|▊         | 774/9330 [00:22<04:01, 35.43it/s]
  8%|▊         | 778/9330 [00:23<04:08, 34.45it/s]
  8%|▊         | 782/9330 [00:23<04:34, 31.18it/s]
  8%|▊         | 786/9330 [00:23<04:37, 30.84it/s]
  8%|▊         | 790/9330 [00:23<04:49, 29.53it/s]
  9%|▊         | 794/9330 [00:23<04:30, 31.57it/s]
  9%|▊         | 798/9330 [00:23<04:16, 33.28it/s]
  9%|▊         | 802/9330 [00:23<04:35, 30.96it/s]
  9%|▊         | 806/9330 [00:24<04:24, 32.20it/s]
  9%|▊         | 810/9330 [00:24<04:15, 33.35it/s]
  9%|▊         | 814/9330 [00:24<04:31, 31.32it/s]
  9%|▉         | 818/9330 [00:24<04:25, 32.09it/s]
  9%|▉         | 822/9330 [00:24<04:37, 30.62it/s]
  9%|▉         | 826/9330 [00:24<04:29, 31.51it/s]
  9%|▉         | 830/9330 [00:24<04:22, 32.43it/s]
  9%|▉         | 834/9330 [00:24<04:13, 33.53it/s]
  9%|▉         | 838/9330 [00:25<04:33, 31.07it/s]
  9%|▉         | 842/9330 [00:25<04:34, 30.90it/s]
  9%|▉         | 846/9330 [00:25<04:33, 31.08

[2m[36m(_objective pid=28725)[0m {'loss': 0.3332, 'learning_rate': 3.696898503067011e-05, 'epoch': 0.32}


[2m[36m(_objective pid=28725)[0m  11%|█         | 1006/9330 [00:29<03:31, 39.42it/s]
 11%|█         | 1010/9330 [00:29<03:33, 38.97it/s]
 11%|█         | 1014/9330 [00:29<03:33, 38.96it/s]
 11%|█         | 1018/9330 [00:30<03:38, 38.09it/s]
 11%|█         | 1022/9330 [00:30<03:47, 36.51it/s]
 11%|█         | 1027/9330 [00:30<03:40, 37.67it/s]
 11%|█         | 1031/9330 [00:30<03:40, 37.72it/s]
 11%|█         | 1035/9330 [00:30<03:47, 36.50it/s]
 11%|█         | 1039/9330 [00:30<03:43, 37.06it/s]
 11%|█         | 1043/9330 [00:30<03:39, 37.76it/s]
 11%|█         | 1048/9330 [00:30<03:34, 38.61it/s]
 11%|█▏        | 1052/9330 [00:30<03:47, 36.35it/s]
 11%|█▏        | 1056/9330 [00:31<03:43, 37.00it/s]
 11%|█▏        | 1060/9330 [00:31<03:44, 36.76it/s]
 11%|█▏        | 1064/9330 [00:31<03:44, 36.88it/s]
 11%|█▏        | 1068/9330 [00:31<03:39, 37.63it/s]
 11%|█▏        | 1072/9330 [00:31<03:37, 37.94it/s]
 12%|█▏        | 1076/9330 [00:31<03:35, 38.37it/s]
 12%|█▏        | 1080/9330 

[2m[36m(_objective pid=28725)[0m {'loss': 0.3048, 'learning_rate': 3.47499583181449e-05, 'epoch': 0.48}


[2m[36m(_objective pid=28725)[0m  16%|█▌        | 1503/9330 [00:44<03:44, 34.89it/s]
 16%|█▌        | 1507/9330 [00:44<03:44, 34.84it/s]
 16%|█▌        | 1511/9330 [00:44<03:45, 34.68it/s]
 16%|█▌        | 1515/9330 [00:44<03:48, 34.25it/s]
 16%|█▋        | 1519/9330 [00:44<03:41, 35.34it/s]
 16%|█▋        | 1523/9330 [00:44<03:46, 34.50it/s]
 16%|█▋        | 1527/9330 [00:44<03:41, 35.29it/s]
 16%|█▋        | 1531/9330 [00:44<03:34, 36.36it/s]
 16%|█▋        | 1535/9330 [00:44<03:29, 37.20it/s]
 16%|█▋        | 1539/9330 [00:45<03:26, 37.73it/s]
 17%|█▋        | 1543/9330 [00:45<03:47, 34.26it/s]
 17%|█▋        | 1547/9330 [00:45<03:38, 35.58it/s]
 17%|█▋        | 1551/9330 [00:45<03:42, 34.88it/s]
 17%|█▋        | 1555/9330 [00:45<04:09, 31.12it/s]
 17%|█▋        | 1559/9330 [00:45<04:17, 30.24it/s]
 17%|█▋        | 1563/9330 [00:45<03:59, 32.47it/s]
 17%|█▋        | 1567/9330 [00:45<03:47, 34.05it/s]
 17%|█▋        | 1571/9330 [00:46<03:52, 33.42it/s]
 17%|█▋        | 1576/9330 

Trial status: 6 TERMINATED | 1 RUNNING | 13 PENDING
Current time: 2023-09-11 14:08:54. Total running time: 48min 33s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00006   RUNNING          4.1407e-05                     3                    

[2m[36m(_objective pid=28725)[0m  19%|█▉        | 1807/9330 [00:52<03:34, 35.10it/s]
 19%|█▉        | 1811/9330 [00:53<03:52, 32.35it/s]
 19%|█▉        | 1816/9330 [00:53<03:35, 34.87it/s]
 20%|█▉        | 1820/9330 [00:53<03:27, 36.14it/s]
 20%|█▉        | 1824/9330 [00:53<03:22, 37.00it/s]
 20%|█▉        | 1828/9330 [00:53<03:30, 35.63it/s]
 20%|█▉        | 1832/9330 [00:53<03:26, 36.36it/s]
 20%|█▉        | 1837/9330 [00:53<03:33, 35.08it/s]
 20%|█▉        | 1841/9330 [00:53<03:37, 34.37it/s]
 20%|█▉        | 1846/9330 [00:54<03:27, 36.08it/s]
 20%|█▉        | 1850/9330 [00:54<03:38, 34.20it/s]
 20%|█▉        | 1855/9330 [00:54<03:27, 36.05it/s]
 20%|█▉        | 1859/9330 [00:54<03:24, 36.61it/s]
 20%|█▉        | 1863/9330 [00:54<04:00, 31.01it/s]
 20%|██        | 1868/9330 [00:54<03:40, 33.82it/s]
 20%|██        | 1872/9330 [00:54<03:35, 34.57it/s]
 20%|██        | 1876/9330 [00:54<03:30, 35.45it/s]
 20%|██        | 1880/9330 [00:55<03:46, 32.83it/s]
 20%|██        | 1884/9330 

[2m[36m(_objective pid=28725)[0m {'loss': 0.2434, 'learning_rate': 3.253093160561968e-05, 'epoch': 0.64}


[2m[36m(_objective pid=28725)[0m  22%|██▏       | 2008/9330 [00:58<03:28, 35.10it/s]
 22%|██▏       | 2012/9330 [00:58<03:25, 35.64it/s]
 22%|██▏       | 2016/9330 [00:59<03:21, 36.26it/s]
 22%|██▏       | 2020/9330 [00:59<03:38, 33.39it/s]
 22%|██▏       | 2024/9330 [00:59<03:48, 32.00it/s]
 22%|██▏       | 2028/9330 [00:59<04:02, 30.16it/s]
 22%|██▏       | 2032/9330 [00:59<03:46, 32.19it/s]
 22%|██▏       | 2036/9330 [00:59<03:34, 33.96it/s]
 22%|██▏       | 2040/9330 [00:59<03:25, 35.52it/s]
 22%|██▏       | 2044/9330 [00:59<03:45, 32.33it/s]
 22%|██▏       | 2048/9330 [01:00<03:36, 33.58it/s]
 22%|██▏       | 2052/9330 [01:00<03:51, 31.46it/s]
 22%|██▏       | 2056/9330 [01:00<03:50, 31.57it/s]
 22%|██▏       | 2060/9330 [01:00<03:36, 33.63it/s]
 22%|██▏       | 2064/9330 [01:00<03:35, 33.67it/s]
 22%|██▏       | 2068/9330 [01:00<03:26, 35.12it/s]
 22%|██▏       | 2072/9330 [01:00<03:23, 35.71it/s]
 22%|██▏       | 2076/9330 [01:00<03:17, 36.77it/s]
 22%|██▏       | 2080/9330 

[2m[36m(_objective pid=28725)[0m {'loss': 0.2198, 'learning_rate': 3.0311904893094465e-05, 'epoch': 0.8}


[2m[36m(_objective pid=28725)[0m  27%|██▋       | 2503/9330 [01:13<03:17, 34.49it/s]
 27%|██▋       | 2507/9330 [01:13<03:36, 31.54it/s]
 27%|██▋       | 2511/9330 [01:13<03:35, 31.57it/s]
 27%|██▋       | 2515/9330 [01:13<03:47, 29.97it/s]
 27%|██▋       | 2519/9330 [01:13<03:35, 31.61it/s]
 27%|██▋       | 2523/9330 [01:13<03:24, 33.22it/s]
 27%|██▋       | 2527/9330 [01:13<03:23, 33.42it/s]
 27%|██▋       | 2531/9330 [01:14<03:27, 32.73it/s]
 27%|██▋       | 2535/9330 [01:14<03:27, 32.78it/s]
 27%|██▋       | 2539/9330 [01:14<03:17, 34.34it/s]
 27%|██▋       | 2543/9330 [01:14<03:14, 34.87it/s]
 27%|██▋       | 2547/9330 [01:14<03:10, 35.56it/s]
 27%|██▋       | 2551/9330 [01:14<03:15, 34.60it/s]
 27%|██▋       | 2555/9330 [01:14<03:13, 35.06it/s]
 27%|██▋       | 2559/9330 [01:14<03:10, 35.49it/s]
 27%|██▋       | 2563/9330 [01:14<03:12, 35.20it/s]
 28%|██▊       | 2567/9330 [01:15<03:29, 32.26it/s]
 28%|██▊       | 2571/9330 [01:15<03:25, 32.93it/s]
 28%|██▊       | 2575/9330 

Trial status: 6 TERMINATED | 1 RUNNING | 13 PENDING
Current time: 2023-09-11 14:09:24. Total running time: 49min 3s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00006   RUNNING          4.1407e-05                     3                     

[2m[36m(_objective pid=28725)[0m  30%|███       | 2834/9330 [01:22<03:26, 31.39it/s]
 30%|███       | 2838/9330 [01:22<03:13, 33.55it/s]
 30%|███       | 2842/9330 [01:23<03:11, 33.87it/s]
 31%|███       | 2846/9330 [01:23<03:12, 33.65it/s]
 31%|███       | 2850/9330 [01:23<03:14, 33.34it/s]
 31%|███       | 2854/9330 [01:23<03:05, 34.83it/s]
 31%|███       | 2858/9330 [01:23<03:00, 35.77it/s]
 31%|███       | 2862/9330 [01:23<02:56, 36.62it/s]
 31%|███       | 2866/9330 [01:23<02:57, 36.49it/s]
 31%|███       | 2870/9330 [01:23<02:54, 37.06it/s]
 31%|███       | 2874/9330 [01:24<03:04, 34.97it/s]
 31%|███       | 2878/9330 [01:24<03:02, 35.34it/s]
 31%|███       | 2882/9330 [01:24<02:56, 36.58it/s]
 31%|███       | 2886/9330 [01:24<02:52, 37.42it/s]
 31%|███       | 2890/9330 [01:24<02:54, 36.88it/s]
 31%|███       | 2894/9330 [01:24<02:55, 36.71it/s]
 31%|███       | 2898/9330 [01:24<02:52, 37.26it/s]
 31%|███       | 2902/9330 [01:24<03:11, 33.52it/s]
 31%|███       | 2906/9330 

[2m[36m(_objective pid=28725)[0m {'loss': 0.2211, 'learning_rate': 2.8092878180569247e-05, 'epoch': 0.96}


[2m[36m(_objective pid=28725)[0m  32%|███▏      | 3007/9330 [01:27<03:32, 29.70it/s]
 32%|███▏      | 3011/9330 [01:28<03:20, 31.58it/s]
 32%|███▏      | 3015/9330 [01:28<03:10, 33.18it/s]
 32%|███▏      | 3019/9330 [01:28<03:06, 33.90it/s]
 32%|███▏      | 3023/9330 [01:28<02:59, 35.21it/s]
 32%|███▏      | 3027/9330 [01:28<02:57, 35.54it/s]
 32%|███▏      | 3031/9330 [01:28<03:15, 32.28it/s]
 33%|███▎      | 3035/9330 [01:28<03:11, 32.86it/s]
 33%|███▎      | 3039/9330 [01:28<03:04, 34.03it/s]
 33%|███▎      | 3043/9330 [01:29<03:08, 33.31it/s]
 33%|███▎      | 3047/9330 [01:29<03:06, 33.73it/s]
 33%|███▎      | 3051/9330 [01:29<03:03, 34.24it/s]
 33%|███▎      | 3055/9330 [01:29<03:02, 34.47it/s]
 33%|███▎      | 3059/9330 [01:29<02:56, 35.55it/s]
 33%|███▎      | 3063/9330 [01:29<02:56, 35.58it/s]
 33%|███▎      | 3067/9330 [01:29<02:55, 35.69it/s]
 33%|███▎      | 3071/9330 [01:29<02:53, 36.01it/s]
 33%|███▎      | 3075/9330 [01:29<02:53, 35.99it/s]
 33%|███▎      | 3079/9330 

Trial _objective_f556c_00006 finished iteration 1 at 2023-09-11 14:09:42. Total running time: 49min 21s
+-------------------------------------------------+
| Trial _objective_f556c_00006 result             |
+-------------------------------------------------+
| time_this_iter_s                        102.897 |
| time_total_s                            102.897 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.23449 |
| eval_runtime                             9.5914 |
| eval_samples_per_second                 432.369 |
| eval_steps_per_second                    13.554 |
| objective                               0.23449 |
+-------------------------------------------------+

[2m[36m(_objective pid=28725)[0m {'eval_loss': 0.23449134826660156, 'eval_runtime': 9.5914, 'eval_samples_per_second': 432.369, 'eval_steps_per_second': 13.554, 'epoch': 1.0}


[2m[36m(_objective pid=28725)[0m                                                    
[2m[36m(_objective pid=28725)[0m                                                  [A 33%|███▎      | 3110/9330 [01:40<03:04, 33.65it/s]
[2m[36m(_objective pid=28725)[0m 100%|██████████| 130/130 [00:09<00:00, 11.99it/s][A
                                                 [A
 33%|███▎      | 3111/9330 [01:41<1:27:29,  1.18it/s]
 33%|███▎      | 3115/9330 [01:42<1:02:08,  1.67it/s]
 33%|███▎      | 3119/9330 [01:42<44:56,  2.30it/s]  
 33%|███▎      | 3123/9330 [01:42<32:18,  3.20it/s]
 34%|███▎      | 3127/9330 [01:42<23:29,  4.40it/s]
 34%|███▎      | 3131/9330 [01:42<17:15,  5.99it/s]
 34%|███▎      | 3135/9330 [01:42<12:56,  7.98it/s]
 34%|███▎      | 3139/9330 [01:42<09:58, 10.34it/s]
 34%|███▎      | 3143/9330 [01:42<07:47, 13.22it/s]
 34%|███▎      | 3147/9330 [01:42<06:17, 16.36it/s]
 34%|███▍      | 3151/9330 [01:43<05:13, 19.72it/s]
 34%|███▍      | 3155/9330 [01:43<04:36, 22.36i

Trial status: 6 TERMINATED | 1 RUNNING | 13 PENDING
Current time: 2023-09-11 14:09:54. Total running time: 49min 33s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00006   RUNNING          4.1407e-05                     3                    

[2m[36m(_objective pid=28725)[0m  37%|███▋      | 3481/9330 [01:52<02:51, 34.13it/s]
 37%|███▋      | 3485/9330 [01:53<02:48, 34.75it/s]
 37%|███▋      | 3489/9330 [01:53<02:48, 34.62it/s]
 37%|███▋      | 3493/9330 [01:53<02:46, 34.98it/s]
 37%|███▋      | 3498/9330 [01:53<02:39, 36.65it/s]
 38%|███▊      | 3502/9330 [01:53<02:35, 37.49it/s]


[2m[36m(_objective pid=28725)[0m {'loss': 0.1586, 'learning_rate': 2.587385146804403e-05, 'epoch': 1.13}


[2m[36m(_objective pid=28725)[0m  38%|███▊      | 3506/9330 [01:53<02:35, 37.51it/s]
 38%|███▊      | 3510/9330 [01:53<02:33, 37.91it/s]
 38%|███▊      | 3514/9330 [01:53<02:41, 35.94it/s]
 38%|███▊      | 3518/9330 [01:53<02:49, 34.32it/s]
 38%|███▊      | 3523/9330 [01:54<02:40, 36.29it/s]
 38%|███▊      | 3527/9330 [01:54<02:53, 33.39it/s]
 38%|███▊      | 3531/9330 [01:54<02:58, 32.51it/s]
 38%|███▊      | 3535/9330 [01:54<02:50, 33.94it/s]
 38%|███▊      | 3539/9330 [01:54<02:58, 32.51it/s]
 38%|███▊      | 3543/9330 [01:54<02:49, 34.14it/s]
 38%|███▊      | 3547/9330 [01:54<02:44, 35.26it/s]
 38%|███▊      | 3552/9330 [01:54<02:36, 36.90it/s]
 38%|███▊      | 3556/9330 [01:55<02:33, 37.66it/s]
 38%|███▊      | 3560/9330 [01:55<02:36, 36.85it/s]
 38%|███▊      | 3564/9330 [01:55<02:44, 35.12it/s]
 38%|███▊      | 3568/9330 [01:55<02:39, 36.13it/s]
 38%|███▊      | 3572/9330 [01:55<02:55, 32.77it/s]
 38%|███▊      | 3576/9330 [01:55<02:46, 34.51it/s]
 38%|███▊      | 3580/9330 

[2m[36m(_objective pid=28725)[0m {'loss': 0.1264, 'learning_rate': 2.365482475551881e-05, 'epoch': 1.29}


[2m[36m(_objective pid=28725)[0m  43%|████▎     | 4006/9330 [02:08<02:50, 31.22it/s]
 43%|████▎     | 4010/9330 [02:08<02:58, 29.74it/s]
 43%|████▎     | 4014/9330 [02:08<02:45, 32.14it/s]
 43%|████▎     | 4018/9330 [02:08<02:35, 34.15it/s]
 43%|████▎     | 4022/9330 [02:08<02:29, 35.59it/s]
 43%|████▎     | 4026/9330 [02:09<02:25, 36.40it/s]
 43%|████▎     | 4030/9330 [02:09<02:22, 37.26it/s]
 43%|████▎     | 4034/9330 [02:09<02:25, 36.27it/s]
 43%|████▎     | 4038/9330 [02:09<02:21, 37.31it/s]
 43%|████▎     | 4042/9330 [02:09<02:20, 37.56it/s]
 43%|████▎     | 4046/9330 [02:09<02:18, 38.19it/s]
 43%|████▎     | 4050/9330 [02:09<02:16, 38.56it/s]
 43%|████▎     | 4054/9330 [02:09<02:18, 38.19it/s]
 43%|████▎     | 4058/9330 [02:09<02:20, 37.64it/s]
 44%|████▎     | 4062/9330 [02:09<02:20, 37.37it/s]
 44%|████▎     | 4066/9330 [02:10<02:20, 37.58it/s]
 44%|████▎     | 4070/9330 [02:10<02:18, 38.02it/s]
 44%|████▎     | 4074/9330 [02:10<02:17, 38.11it/s]
 44%|████▎     | 4078/9330 

Trial status: 6 TERMINATED | 1 RUNNING | 13 PENDING
Current time: 2023-09-11 14:10:24. Total running time: 50min 3s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00006   RUNNING          4.1407e-05                     3                     

 48%|████▊     | 4500/9330 [02:22<02:32, 31.58it/s]
 48%|████▊     | 4504/9330 [02:23<02:32, 31.56it/s]
 48%|████▊     | 4508/9330 [02:23<02:25, 33.18it/s]
 48%|████▊     | 4512/9330 [02:23<02:19, 34.62it/s]
 48%|████▊     | 4516/9330 [02:23<02:17, 35.04it/s]
 48%|████▊     | 4520/9330 [02:23<02:12, 36.30it/s]
 48%|████▊     | 4524/9330 [02:23<02:12, 36.19it/s]
 49%|████▊     | 4528/9330 [02:23<02:11, 36.52it/s]
 49%|████▊     | 4532/9330 [02:23<02:25, 32.93it/s]
 49%|████▊     | 4536/9330 [02:23<02:24, 33.21it/s]
 49%|████▊     | 4540/9330 [02:24<02:19, 34.22it/s]
 49%|████▊     | 4544/9330 [02:24<02:23, 33.29it/s]
 49%|████▊     | 4548/9330 [02:24<02:17, 34.86it/s]
 49%|████▉     | 4552/9330 [02:24<02:13, 35.86it/s]
 49%|████▉     | 4557/9330 [02:24<02:07, 37.36it/s]
 49%|████▉     | 4561/9330 [02:24<02:24, 33.10it/s]
 49%|████▉     | 4565/9330 [02:24<02:21, 33.57it/s]
 49%|████▉     | 4570/9330 [02:24<02:14, 35.31it/s]
 49%|████▉     | 4574/9330 [02:25<02:15, 35.13it/s]
 49%|████▉  

[2m[36m(_objective pid=28725)[0m {'loss': 0.1491, 'learning_rate': 1.921677133046838e-05, 'epoch': 1.61}


[2m[36m(_objective pid=28725)[0m  54%|█████▎    | 5006/9330 [02:38<02:05, 34.50it/s]
 54%|█████▎    | 5010/9330 [02:38<02:01, 35.60it/s]
 54%|█████▎    | 5014/9330 [02:38<01:57, 36.60it/s]
 54%|█████▍    | 5018/9330 [02:38<02:04, 34.68it/s]
 54%|█████▍    | 5022/9330 [02:38<02:14, 32.10it/s]
 54%|█████▍    | 5026/9330 [02:38<02:07, 33.84it/s]
 54%|█████▍    | 5030/9330 [02:38<02:16, 31.51it/s]
 54%|█████▍    | 5034/9330 [02:38<02:10, 33.01it/s]
 54%|█████▍    | 5038/9330 [02:38<02:03, 34.73it/s]
 54%|█████▍    | 5042/9330 [02:39<02:00, 35.46it/s]
 54%|█████▍    | 5046/9330 [02:39<02:03, 34.71it/s]
 54%|█████▍    | 5050/9330 [02:39<02:17, 31.21it/s]
 54%|█████▍    | 5054/9330 [02:39<02:13, 32.02it/s]
 54%|█████▍    | 5058/9330 [02:39<02:23, 29.85it/s]
 54%|█████▍    | 5062/9330 [02:39<02:13, 32.09it/s]
 54%|█████▍    | 5066/9330 [02:39<02:10, 32.77it/s]
 54%|█████▍    | 5070/9330 [02:39<02:03, 34.38it/s]
 54%|█████▍    | 5074/9330 [02:40<01:58, 35.77it/s]
 54%|█████▍    | 5078/9330 

[2m[36m(_objective pid=28725)[0m {'loss': 0.1573, 'learning_rate': 1.699774461794316e-05, 'epoch': 1.77}
Trial status: 6 TERMINATED | 1 RUNNING | 13 PENDING
Current time: 2023-09-11 14:10:54. Total running time: 50min 33s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

[2m[36m(_objective pid=28725)[0m  59%|█████▉    | 5510/9330 [02:52<01:45, 36.12it/s]
 59%|█████▉    | 5514/9330 [02:53<01:46, 35.99it/s]
 59%|█████▉    | 5518/9330 [02:53<01:44, 36.52it/s]
 59%|█████▉    | 5522/9330 [02:53<01:54, 33.13it/s]
 59%|█████▉    | 5526/9330 [02:53<01:50, 34.51it/s]
 59%|█████▉    | 5530/9330 [02:53<01:50, 34.47it/s]
 59%|█████▉    | 5534/9330 [02:53<01:47, 35.30it/s]
 59%|█████▉    | 5538/9330 [02:53<01:53, 33.29it/s]
 59%|█████▉    | 5542/9330 [02:53<01:52, 33.78it/s]
 59%|█████▉    | 5546/9330 [02:53<01:47, 35.28it/s]
 59%|█████▉    | 5550/9330 [02:54<01:45, 35.92it/s]
 60%|█████▉    | 5555/9330 [02:54<01:41, 37.36it/s]
 60%|█████▉    | 5559/9330 [02:54<01:51, 33.69it/s]
 60%|█████▉    | 5563/9330 [02:54<01:49, 34.35it/s]
 60%|█████▉    | 5567/9330 [02:54<01:45, 35.53it/s]
 60%|█████▉    | 5571/9330 [02:54<01:42, 36.54it/s]
 60%|█████▉    | 5575/9330 [02:54<01:46, 35.34it/s]
 60%|█████▉    | 5579/9330 [02:54<01:43, 36.19it/s]
 60%|█████▉    | 5583/9330 

[2m[36m(_objective pid=28725)[0m {'loss': 0.1413, 'learning_rate': 1.4778717905417944e-05, 'epoch': 1.93}


[2m[36m(_objective pid=28725)[0m  64%|██████▍   | 6003/9330 [03:07<01:36, 34.38it/s]
 64%|██████▍   | 6007/9330 [03:07<01:35, 34.74it/s]
 64%|██████▍   | 6011/9330 [03:07<01:34, 35.12it/s]
 64%|██████▍   | 6015/9330 [03:07<01:39, 33.21it/s]
 65%|██████▍   | 6019/9330 [03:07<01:37, 34.00it/s]
 65%|██████▍   | 6023/9330 [03:08<01:36, 34.30it/s]
 65%|██████▍   | 6027/9330 [03:08<01:35, 34.74it/s]
 65%|██████▍   | 6031/9330 [03:08<01:41, 32.37it/s]
 65%|██████▍   | 6035/9330 [03:08<01:37, 33.84it/s]
 65%|██████▍   | 6039/9330 [03:08<01:35, 34.46it/s]
 65%|██████▍   | 6043/9330 [03:08<01:36, 34.15it/s]
 65%|██████▍   | 6047/9330 [03:08<01:46, 30.90it/s]
 65%|██████▍   | 6051/9330 [03:08<01:46, 30.88it/s]
 65%|██████▍   | 6055/9330 [03:09<01:53, 28.96it/s]
 65%|██████▍   | 6059/9330 [03:09<01:48, 30.04it/s]
 65%|██████▍   | 6063/9330 [03:09<01:42, 31.74it/s]
 65%|██████▌   | 6067/9330 [03:09<01:41, 32.00it/s]
 65%|██████▌   | 6071/9330 [03:09<01:39, 32.70it/s]
 65%|██████▌   | 6075/9330 

Trial status: 6 TERMINATED | 1 RUNNING | 13 PENDING
Current time: 2023-09-11 14:11:24. Total running time: 51min 3s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00006   RUNNING          4.1407e-05                     3                     

[2m[36m(_objective pid=28725)[0m 
 98%|█████████▊| 127/130 [00:09<00:00, 13.13it/s][A
[2m[36m(_objective pid=28725)[0m 
 99%|█████████▉| 129/130 [00:09<00:00, 11.96it/s][A


Trial _objective_f556c_00006 finished iteration 2 at 2023-09-11 14:11:25. Total running time: 51min 4s
+-------------------------------------------------+
| Trial _objective_f556c_00006 result             |
+-------------------------------------------------+
| time_this_iter_s                        102.952 |
| time_total_s                            205.849 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                               0.16982 |
| eval_runtime                             9.5962 |
| eval_samples_per_second                 432.148 |
| eval_steps_per_second                    13.547 |
| objective                               0.16982 |
+-------------------------------------------------+

[2m[36m(_objective pid=28725)[0m {'eval_loss': 0.16981641948223114, 'eval_runtime': 9.5962, 'eval_samples_per_second': 432.148, 'eval_steps_per_second': 13.547, 'epoch': 2.0}


[2m[36m(_objective pid=28725)[0m                                                    
[2m[36m(_objective pid=28725)[0m                                                  [A 67%|██████▋   | 6220/9330 [03:23<01:34, 32.97it/s]
[2m[36m(_objective pid=28725)[0m 100%|██████████| 130/130 [00:09<00:00, 11.96it/s][A
                                                 [A
 67%|██████▋   | 6221/9330 [03:24<44:04,  1.18it/s]
 67%|██████▋   | 6225/9330 [03:25<31:18,  1.65it/s]
 67%|██████▋   | 6229/9330 [03:25<22:20,  2.31it/s]
 67%|██████▋   | 6233/9330 [03:25<16:06,  3.20it/s]
 67%|██████▋   | 6237/9330 [03:25<11:50,  4.35it/s]
 67%|██████▋   | 6241/9330 [03:25<08:49,  5.84it/s]
 67%|██████▋   | 6245/9330 [03:25<06:45,  7.61it/s]
 67%|██████▋   | 6249/9330 [03:25<05:14,  9.79it/s]
 67%|██████▋   | 6253/9330 [03:26<04:09, 12.34it/s]
 67%|██████▋   | 6257/9330 [03:26<03:23, 15.14it/s]
 67%|██████▋   | 6261/9330 [03:26<02:47, 18.28it/s]
 67%|██████▋   | 6265/9330 [03:26<02:25, 21.08it/s]
 

[2m[36m(_objective pid=28725)[0m {'loss': 0.1299, 'learning_rate': 1.2559691192892727e-05, 'epoch': 2.09}


[2m[36m(_objective pid=28725)[0m  70%|██████▉   | 6509/9330 [03:33<01:19, 35.29it/s]
 70%|██████▉   | 6513/9330 [03:33<01:21, 34.52it/s]
 70%|██████▉   | 6517/9330 [03:33<01:28, 31.76it/s]
 70%|██████▉   | 6521/9330 [03:33<01:25, 32.80it/s]
 70%|██████▉   | 6525/9330 [03:34<01:33, 29.99it/s]
 70%|██████▉   | 6529/9330 [03:34<01:36, 28.97it/s]
 70%|███████   | 6533/9330 [03:34<01:29, 31.16it/s]
 70%|███████   | 6537/9330 [03:34<01:24, 33.12it/s]
 70%|███████   | 6541/9330 [03:34<01:21, 34.25it/s]
 70%|███████   | 6545/9330 [03:34<01:18, 35.67it/s]
 70%|███████   | 6549/9330 [03:34<01:16, 36.50it/s]
 70%|███████   | 6553/9330 [03:34<01:19, 34.95it/s]
 70%|███████   | 6557/9330 [03:34<01:17, 35.91it/s]
 70%|███████   | 6561/9330 [03:35<01:18, 35.42it/s]
 70%|███████   | 6565/9330 [03:35<01:16, 36.20it/s]
 70%|███████   | 6569/9330 [03:35<01:14, 36.94it/s]
 70%|███████   | 6573/9330 [03:35<01:14, 36.77it/s]
 70%|███████   | 6577/9330 [03:35<01:15, 36.56it/s]
 71%|███████   | 6581/9330 

[2m[36m(_objective pid=28725)[0m {'loss': 0.0744, 'learning_rate': 1.0340664480367511e-05, 'epoch': 2.25}


[2m[36m(_objective pid=28725)[0m  75%|███████▌  | 7009/9330 [03:48<01:12, 32.01it/s]
 75%|███████▌  | 7013/9330 [03:48<01:11, 32.51it/s]
 75%|███████▌  | 7017/9330 [03:48<01:09, 33.24it/s]
 75%|███████▌  | 7021/9330 [03:48<01:07, 34.27it/s]
 75%|███████▌  | 7025/9330 [03:48<01:05, 35.20it/s]
 75%|███████▌  | 7029/9330 [03:48<01:11, 32.06it/s]
 75%|███████▌  | 7033/9330 [03:49<01:11, 32.07it/s]
 75%|███████▌  | 7037/9330 [03:49<01:08, 33.54it/s]
 75%|███████▌  | 7041/9330 [03:49<01:05, 34.81it/s]
 76%|███████▌  | 7045/9330 [03:49<01:06, 34.43it/s]
 76%|███████▌  | 7049/9330 [03:49<01:05, 34.92it/s]
 76%|███████▌  | 7053/9330 [03:49<01:04, 35.57it/s]
 76%|███████▌  | 7057/9330 [03:49<01:04, 35.47it/s]
 76%|███████▌  | 7061/9330 [03:49<01:03, 35.46it/s]
 76%|███████▌  | 7065/9330 [03:50<01:02, 36.14it/s]
 76%|███████▌  | 7069/9330 [03:50<01:08, 32.88it/s]
 76%|███████▌  | 7073/9330 [03:50<01:06, 34.01it/s]
 76%|███████▌  | 7077/9330 [03:50<01:05, 34.15it/s]
 76%|███████▌  | 7081/9330 

Trial status: 6 TERMINATED | 1 RUNNING | 13 PENDING
Current time: 2023-09-11 14:11:54. Total running time: 51min 33s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00006   RUNNING          4.1407e-05                     3                    

[2m[36m(_objective pid=28725)[0m  77%|███████▋  | 7169/9330 [03:53<01:11, 30.30it/s]
 77%|███████▋  | 7173/9330 [03:53<01:08, 31.53it/s]
 77%|███████▋  | 7177/9330 [03:53<01:08, 31.41it/s]
 77%|███████▋  | 7181/9330 [03:53<01:04, 33.20it/s]
 77%|███████▋  | 7185/9330 [03:53<01:05, 32.53it/s]
 77%|███████▋  | 7189/9330 [03:53<01:06, 32.37it/s]
 77%|███████▋  | 7193/9330 [03:53<01:03, 33.51it/s]
 77%|███████▋  | 7197/9330 [03:54<01:05, 32.69it/s]
 77%|███████▋  | 7201/9330 [03:54<01:09, 30.48it/s]
 77%|███████▋  | 7205/9330 [03:54<01:05, 32.25it/s]
 77%|███████▋  | 7209/9330 [03:54<01:05, 32.60it/s]
 77%|███████▋  | 7213/9330 [03:54<01:10, 30.20it/s]
 77%|███████▋  | 7217/9330 [03:54<01:06, 31.98it/s]
 77%|███████▋  | 7221/9330 [03:54<01:03, 33.21it/s]
 77%|███████▋  | 7225/9330 [03:54<01:03, 33.15it/s]
 77%|███████▋  | 7229/9330 [03:55<01:02, 33.50it/s]
 78%|███████▊  | 7233/9330 [03:55<01:03, 33.07it/s]
 78%|███████▊  | 7237/9330 [03:55<01:02, 33.76it/s]
 78%|███████▊  | 7241/9330 

[2m[36m(_objective pid=28725)[0m {'loss': 0.0641, 'learning_rate': 8.121637767842294e-06, 'epoch': 2.41}


[2m[36m(_objective pid=28725)[0m  80%|████████  | 7505/9330 [04:03<00:51, 35.10it/s]
 80%|████████  | 7509/9330 [04:03<00:51, 35.55it/s]
 81%|████████  | 7513/9330 [04:03<00:50, 36.18it/s]
 81%|████████  | 7517/9330 [04:03<00:49, 36.30it/s]
 81%|████████  | 7521/9330 [04:03<00:50, 35.60it/s]
 81%|████████  | 7525/9330 [04:04<00:56, 32.20it/s]
 81%|████████  | 7529/9330 [04:04<00:53, 33.57it/s]
 81%|████████  | 7533/9330 [04:04<00:52, 34.35it/s]
 81%|████████  | 7537/9330 [04:04<00:57, 31.43it/s]
 81%|████████  | 7541/9330 [04:04<01:00, 29.67it/s]
 81%|████████  | 7545/9330 [04:04<00:57, 30.98it/s]
 81%|████████  | 7549/9330 [04:04<00:54, 32.91it/s]
 81%|████████  | 7553/9330 [04:04<00:57, 31.09it/s]
 81%|████████  | 7557/9330 [04:05<00:55, 31.87it/s]
 81%|████████  | 7561/9330 [04:05<00:53, 33.35it/s]
 81%|████████  | 7565/9330 [04:05<00:51, 34.02it/s]
 81%|████████  | 7569/9330 [04:05<00:50, 34.97it/s]
 81%|████████  | 7573/9330 [04:05<00:54, 32.12it/s]
 81%|████████  | 7577/9330 

[2m[36m(_objective pid=28725)[0m {'loss': 0.0839, 'learning_rate': 5.9026110553170775e-06, 'epoch': 2.57}


 86%|████████▌ | 8009/9330 [04:18<00:40, 32.86it/s]
 86%|████████▌ | 8013/9330 [04:18<00:43, 30.23it/s]
 86%|████████▌ | 8017/9330 [04:18<00:40, 32.08it/s]
 86%|████████▌ | 8021/9330 [04:18<00:39, 33.44it/s]
 86%|████████▌ | 8025/9330 [04:18<00:43, 30.32it/s]
 86%|████████▌ | 8029/9330 [04:18<00:45, 28.82it/s]
 86%|████████▌ | 8033/9330 [04:19<00:43, 29.64it/s]
 86%|████████▌ | 8037/9330 [04:19<00:40, 32.02it/s]
 86%|████████▌ | 8041/9330 [04:19<00:37, 33.96it/s]
 86%|████████▌ | 8045/9330 [04:19<00:36, 34.84it/s]
 86%|████████▋ | 8049/9330 [04:19<00:36, 34.93it/s]
 86%|████████▋ | 8053/9330 [04:19<00:36, 34.90it/s]
 86%|████████▋ | 8057/9330 [04:19<00:39, 32.20it/s]
 86%|████████▋ | 8061/9330 [04:19<00:42, 29.65it/s]
 86%|████████▋ | 8065/9330 [04:20<00:40, 31.29it/s]
 86%|████████▋ | 8069/9330 [04:20<00:37, 33.24it/s]
 87%|████████▋ | 8073/9330 [04:20<00:36, 34.84it/s]
 87%|████████▋ | 8077/9330 [04:20<00:35, 35.70it/s]
 87%|████████▋ | 8081/9330 [04:20<00:35, 34.92it/s]
 87%|███████

Trial status: 6 TERMINATED | 1 RUNNING | 13 PENDING
Current time: 2023-09-11 14:12:24. Total running time: 52min 3s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00006   RUNNING          4.1407e-05                     3                     

[2m[36m(_objective pid=28725)[0m  88%|████████▊ | 8169/9330 [04:23<00:32, 35.78it/s]
 88%|████████▊ | 8173/9330 [04:23<00:32, 36.15it/s]
 88%|████████▊ | 8177/9330 [04:23<00:32, 35.13it/s]
 88%|████████▊ | 8181/9330 [04:23<00:32, 35.90it/s]
 88%|████████▊ | 8185/9330 [04:23<00:30, 37.00it/s]
 88%|████████▊ | 8189/9330 [04:23<00:30, 37.72it/s]
 88%|████████▊ | 8193/9330 [04:23<00:31, 36.21it/s]
 88%|████████▊ | 8197/9330 [04:23<00:35, 32.24it/s]
 88%|████████▊ | 8201/9330 [04:24<00:33, 33.76it/s]
 88%|████████▊ | 8205/9330 [04:24<00:32, 34.34it/s]
 88%|████████▊ | 8209/9330 [04:24<00:35, 31.74it/s]
 88%|████████▊ | 8213/9330 [04:24<00:34, 32.31it/s]
 88%|████████▊ | 8217/9330 [04:24<00:36, 30.49it/s]
 88%|████████▊ | 8221/9330 [04:24<00:34, 32.08it/s]
 88%|████████▊ | 8225/9330 [04:24<00:34, 32.25it/s]
 88%|████████▊ | 8229/9330 [04:24<00:33, 32.43it/s]
 88%|████████▊ | 8233/9330 [04:25<00:35, 31.10it/s]
 88%|████████▊ | 8237/9330 [04:25<00:33, 32.77it/s]
 88%|████████▊ | 8241/9330 

[2m[36m(_objective pid=28725)[0m {'loss': 0.0661, 'learning_rate': 3.6835843427918597e-06, 'epoch': 2.73}


 91%|█████████ | 8510/9330 [04:33<00:23, 35.22it/s]
 91%|█████████▏| 8514/9330 [04:33<00:24, 33.59it/s]
 91%|█████████▏| 8518/9330 [04:33<00:25, 32.30it/s]
 91%|█████████▏| 8522/9330 [04:33<00:23, 33.69it/s]
 91%|█████████▏| 8526/9330 [04:33<00:24, 32.91it/s]
 91%|█████████▏| 8530/9330 [04:33<00:23, 34.14it/s]
 91%|█████████▏| 8534/9330 [04:33<00:22, 34.65it/s]
 92%|█████████▏| 8538/9330 [04:34<00:22, 34.87it/s]
 92%|█████████▏| 8542/9330 [04:34<00:22, 35.51it/s]
 92%|█████████▏| 8546/9330 [04:34<00:21, 35.67it/s]
 92%|█████████▏| 8550/9330 [04:34<00:25, 30.89it/s]
 92%|█████████▏| 8554/9330 [04:34<00:24, 31.62it/s]
 92%|█████████▏| 8558/9330 [04:34<00:23, 32.68it/s]
 92%|█████████▏| 8562/9330 [04:34<00:22, 33.40it/s]
 92%|█████████▏| 8566/9330 [04:34<00:22, 33.28it/s]
 92%|█████████▏| 8570/9330 [04:35<00:22, 34.35it/s]
 92%|█████████▏| 8574/9330 [04:35<00:22, 33.74it/s]
 92%|█████████▏| 8578/9330 [04:35<00:21, 34.94it/s]
 92%|█████████▏| 8582/9330 [04:35<00:21, 35.62it/s]
 92%|███████

[2m[36m(_objective pid=28725)[0m {'loss': 0.0847, 'learning_rate': 1.4645576302666432e-06, 'epoch': 2.89}


[2m[36m(_objective pid=28725)[0m  97%|█████████▋| 9006/9330 [04:48<00:09, 32.68it/s]
 97%|█████████▋| 9010/9330 [04:48<00:10, 30.75it/s]
 97%|█████████▋| 9014/9330 [04:48<00:09, 32.82it/s]
 97%|█████████▋| 9018/9330 [04:48<00:09, 32.87it/s]
 97%|█████████▋| 9022/9330 [04:48<00:09, 33.20it/s]
 97%|█████████▋| 9026/9330 [04:48<00:08, 33.99it/s]
 97%|█████████▋| 9030/9330 [04:49<00:09, 33.22it/s]
 97%|█████████▋| 9034/9330 [04:49<00:08, 34.53it/s]
 97%|█████████▋| 9038/9330 [04:49<00:08, 35.72it/s]
 97%|█████████▋| 9042/9330 [04:49<00:07, 36.17it/s]
 97%|█████████▋| 9046/9330 [04:49<00:08, 35.27it/s]
 97%|█████████▋| 9050/9330 [04:49<00:07, 35.18it/s]
 97%|█████████▋| 9054/9330 [04:49<00:07, 35.26it/s]
 97%|█████████▋| 9058/9330 [04:49<00:08, 33.10it/s]
 97%|█████████▋| 9062/9330 [04:49<00:08, 33.02it/s]
 97%|█████████▋| 9066/9330 [04:50<00:07, 33.88it/s]
 97%|█████████▋| 9070/9330 [04:50<00:08, 31.43it/s]
 97%|█████████▋| 9074/9330 [04:50<00:07, 33.13it/s]
 97%|█████████▋| 9078/9330 

Trial status: 6 TERMINATED | 1 RUNNING | 13 PENDING
Current time: 2023-09-11 14:12:54. Total running time: 52min 33s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00006   RUNNING          4.1407e-05                     3                    

[2m[36m(_objective pid=28725)[0m  98%|█████████▊| 9170/9330 [04:53<00:04, 35.34it/s]
 98%|█████████▊| 9174/9330 [04:53<00:04, 36.00it/s]
 98%|█████████▊| 9178/9330 [04:53<00:04, 35.80it/s]
 98%|█████████▊| 9182/9330 [04:53<00:04, 34.42it/s]
 98%|█████████▊| 9186/9330 [04:53<00:04, 35.61it/s]
 98%|█████████▊| 9190/9330 [04:53<00:04, 34.28it/s]
 99%|█████████▊| 9194/9330 [04:53<00:03, 35.20it/s]
 99%|█████████▊| 9198/9330 [04:53<00:03, 35.06it/s]
 99%|█████████▊| 9202/9330 [04:54<00:04, 31.52it/s]
 99%|█████████▊| 9206/9330 [04:54<00:03, 33.11it/s]
 99%|█████████▊| 9210/9330 [04:54<00:03, 34.38it/s]
 99%|█████████▉| 9214/9330 [04:54<00:03, 33.84it/s]
 99%|█████████▉| 9218/9330 [04:54<00:03, 35.14it/s]
 99%|█████████▉| 9222/9330 [04:54<00:03, 32.25it/s]
 99%|█████████▉| 9226/9330 [04:54<00:03, 34.02it/s]
 99%|█████████▉| 9230/9330 [04:54<00:02, 34.96it/s]
 99%|█████████▉| 9234/9330 [04:54<00:02, 34.71it/s]
 99%|█████████▉| 9238/9330 [04:55<00:02, 34.81it/s]
 99%|█████████▉| 9242/9330 

Trial _objective_f556c_00006 finished iteration 3 at 2023-09-11 14:13:09. Total running time: 52min 48s
+-------------------------------------------------+
| Trial _objective_f556c_00006 result             |
+-------------------------------------------------+
| time_this_iter_s                        104.017 |
| time_total_s                            309.866 |
| training_iteration                            3 |
| epoch                                         3 |
| eval_loss                               0.20414 |
| eval_runtime                             9.6005 |
| eval_samples_per_second                 431.957 |
| eval_steps_per_second                    13.541 |
| objective                               0.20414 |
+-------------------------------------------------+

[2m[36m(_objective pid=28725)[0m {'eval_loss': 0.20414039492607117, 'eval_runtime': 9.6005, 'eval_samples_per_second': 431.957, 'eval_steps_per_second': 13.541, 'epoch': 3.0}


[2m[36m(_objective pid=28725)[0m                                                    
[2m[36m(_objective pid=28725)[0m                                                  [A100%|██████████| 9330/9330 [05:07<00:00, 30.21it/s]
[2m[36m(_objective pid=28725)[0m 100%|██████████| 130/130 [00:09<00:00, 12.00it/s][A
[2m[36m(_objective pid=28725)[0m                                                  [A


Trial _objective_f556c_00006 completed after 3 iterations at 2023-09-11 14:13:10. Total running time: 52min 49s

[2m[36m(_objective pid=28725)[0m {'train_runtime': 309.0775, 'train_samples_per_second': 120.737, 'train_steps_per_second': 30.187, 'train_loss': 0.16900841548384066, 'epoch': 3.0}


[2m[36m(_objective pid=28725)[0m                                                    100%|██████████| 9330/9330 [05:09<00:00, 30.21it/s]100%|██████████| 9330/9330 [05:09<00:00, 30.19it/s]


Trial _objective_f556c_00007 started with configuration:
+-------------------------------------------------+
| Trial _objective_f556c_00007 config             |
+-------------------------------------------------+
| adam_epsilon                                  0 |
| learning_rate                             1e-05 |
| num_train_epochs                              3 |
| per_device_eval_batch_size                   32 |
| per_device_train_batch_size                   2 |
| weight_decay                            0.05116 |
+-------------------------------------------------+



[2m[36m(_objective pid=30118)[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_transform.bias']
[2m[36m(_objective pid=30118)[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
[2m[36m(_objective pid=30118)[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[2m[36m(_objective pid=30118)[0m Some weights of DistilBertForSequenceClassification were not initialized from the model che

Trial status: 7 TERMINATED | 1 RUNNING | 12 PENDING
Current time: 2023-09-11 14:13:24. Total running time: 53min 3s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00007   RUNNING          1.1858e-05                     3                     

[2m[36m(_objective pid=30118)[0m   1%|          | 185/18660 [00:05<08:14, 37.32it/s]
  1%|          | 189/18660 [00:05<08:20, 36.87it/s]
  1%|          | 193/18660 [00:05<08:17, 37.11it/s]
  1%|          | 197/18660 [00:05<08:14, 37.37it/s]
  1%|          | 202/18660 [00:05<07:58, 38.54it/s]
  1%|          | 206/18660 [00:05<08:01, 38.33it/s]
  1%|          | 210/18660 [00:05<07:56, 38.74it/s]
  1%|          | 214/18660 [00:05<07:53, 38.96it/s]
  1%|          | 218/18660 [00:05<07:54, 38.83it/s]
  1%|          | 222/18660 [00:06<07:54, 38.89it/s]
  1%|          | 226/18660 [00:06<08:14, 37.30it/s]
  1%|          | 230/18660 [00:06<08:09, 37.69it/s]
  1%|▏         | 234/18660 [00:06<08:08, 37.75it/s]
  1%|▏         | 238/18660 [00:06<08:14, 37.25it/s]
  1%|▏         | 243/18660 [00:06<08:02, 38.16it/s]
  1%|▏         | 248/18660 [00:06<07:51, 39.02it/s]
  1%|▏         | 253/18660 [00:06<07:46, 39.50it/s]
  1%|▏         | 257/18660 [00:06<07:49, 39.17it/s]
  1%|▏         | 261/18660 

[2m[36m(_objective pid=30118)[0m {'loss': 0.4825, 'learning_rate': 1.1540277588414058e-05, 'epoch': 0.08}


[2m[36m(_objective pid=30118)[0m   3%|▎         | 501/18660 [00:13<08:29, 35.62it/s]
  3%|▎         | 505/18660 [00:13<08:15, 36.64it/s]
  3%|▎         | 509/18660 [00:14<08:22, 36.14it/s]
  3%|▎         | 513/18660 [00:14<08:25, 35.91it/s]
  3%|▎         | 517/18660 [00:14<08:12, 36.87it/s]
  3%|▎         | 521/18660 [00:14<08:08, 37.15it/s]
  3%|▎         | 525/18660 [00:14<08:06, 37.27it/s]
  3%|▎         | 529/18660 [00:14<08:08, 37.13it/s]
  3%|▎         | 533/18660 [00:14<08:07, 37.20it/s]
  3%|▎         | 537/18660 [00:14<08:06, 37.25it/s]
  3%|▎         | 541/18660 [00:14<08:17, 36.41it/s]
  3%|▎         | 545/18660 [00:15<08:23, 35.97it/s]
  3%|▎         | 549/18660 [00:15<08:22, 36.01it/s]
  3%|▎         | 553/18660 [00:15<08:26, 35.78it/s]
  3%|▎         | 557/18660 [00:15<08:17, 36.41it/s]
  3%|▎         | 561/18660 [00:15<08:22, 36.01it/s]
  3%|▎         | 565/18660 [00:15<08:17, 36.35it/s]
  3%|▎         | 569/18660 [00:15<08:25, 35.78it/s]
  3%|▎         | 573/18660 

[2m[36m(_objective pid=30118)[0m {'loss': 0.4793, 'learning_rate': 1.1222538668028211e-05, 'epoch': 0.16}


[2m[36m(_objective pid=30118)[0m   5%|▌         | 1007/18660 [00:27<08:17, 35.52it/s]
  5%|▌         | 1011/18660 [00:27<08:25, 34.94it/s]
  5%|▌         | 1015/18660 [00:28<08:39, 33.94it/s]
  5%|▌         | 1019/18660 [00:28<08:34, 34.30it/s]
  5%|▌         | 1023/18660 [00:28<08:29, 34.64it/s]
  6%|▌         | 1027/18660 [00:28<08:22, 35.09it/s]
  6%|▌         | 1031/18660 [00:28<08:23, 35.03it/s]
  6%|▌         | 1035/18660 [00:28<08:10, 35.91it/s]
  6%|▌         | 1039/18660 [00:28<08:07, 36.12it/s]
  6%|▌         | 1043/18660 [00:28<08:06, 36.20it/s]
  6%|▌         | 1047/18660 [00:28<08:06, 36.20it/s]
  6%|▌         | 1051/18660 [00:29<08:03, 36.45it/s]
  6%|▌         | 1055/18660 [00:29<08:04, 36.37it/s]
  6%|▌         | 1059/18660 [00:29<08:04, 36.32it/s]
  6%|▌         | 1063/18660 [00:29<08:06, 36.20it/s]
  6%|▌         | 1067/18660 [00:29<08:17, 35.37it/s]
  6%|▌         | 1071/18660 [00:29<08:21, 35.08it/s]
  6%|▌         | 1075/18660 [00:29<08:35, 34.13it/s]
  6%|▌   

Trial status: 7 TERMINATED | 1 RUNNING | 12 PENDING
Current time: 2023-09-11 14:13:54. Total running time: 53min 33s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00007   RUNNING          1.1858e-05                     3                    

[2m[36m(_objective pid=30118)[0m   7%|▋         | 1271/18660 [00:35<07:37, 38.04it/s]
  7%|▋         | 1275/18660 [00:35<07:40, 37.78it/s]
  7%|▋         | 1279/18660 [00:35<07:45, 37.38it/s]
  7%|▋         | 1283/18660 [00:35<07:42, 37.56it/s]
  7%|▋         | 1287/18660 [00:35<07:38, 37.87it/s]
  7%|▋         | 1291/18660 [00:35<07:47, 37.17it/s]
  7%|▋         | 1295/18660 [00:35<07:44, 37.39it/s]
  7%|▋         | 1299/18660 [00:35<07:40, 37.74it/s]
  7%|▋         | 1303/18660 [00:35<07:40, 37.66it/s]
  7%|▋         | 1307/18660 [00:36<07:37, 37.91it/s]
  7%|▋         | 1311/18660 [00:36<07:30, 38.49it/s]
  7%|▋         | 1315/18660 [00:36<07:34, 38.18it/s]
  7%|▋         | 1319/18660 [00:36<07:47, 37.09it/s]
  7%|▋         | 1323/18660 [00:36<07:44, 37.35it/s]
  7%|▋         | 1327/18660 [00:36<07:39, 37.70it/s]
  7%|▋         | 1331/18660 [00:36<07:43, 37.37it/s]
  7%|▋         | 1335/18660 [00:36<07:40, 37.60it/s]
  7%|▋         | 1339/18660 [00:36<07:42, 37.43it/s]
  7%|▋   

[2m[36m(_objective pid=30118)[0m {'loss': 0.3915, 'learning_rate': 1.090479974764236e-05, 'epoch': 0.24}


[2m[36m(_objective pid=30118)[0m   8%|▊         | 1503/18660 [00:41<08:12, 34.82it/s]
  8%|▊         | 1507/18660 [00:41<08:05, 35.31it/s]
  8%|▊         | 1511/18660 [00:41<08:15, 34.58it/s]
  8%|▊         | 1515/18660 [00:41<08:12, 34.80it/s]
  8%|▊         | 1519/18660 [00:41<08:02, 35.53it/s]
  8%|▊         | 1523/18660 [00:41<07:56, 35.94it/s]
  8%|▊         | 1527/18660 [00:42<07:50, 36.39it/s]
  8%|▊         | 1531/18660 [00:42<07:49, 36.48it/s]
  8%|▊         | 1535/18660 [00:42<07:46, 36.72it/s]
  8%|▊         | 1539/18660 [00:42<07:50, 36.36it/s]
  8%|▊         | 1543/18660 [00:42<07:44, 36.83it/s]
  8%|▊         | 1547/18660 [00:42<07:35, 37.57it/s]
  8%|▊         | 1552/18660 [00:42<07:25, 38.41it/s]
  8%|▊         | 1557/18660 [00:42<07:37, 37.38it/s]
  8%|▊         | 1561/18660 [00:42<07:32, 37.75it/s]
  8%|▊         | 1566/18660 [00:43<07:32, 37.77it/s]
  8%|▊         | 1571/18660 [00:43<07:23, 38.51it/s]
  8%|▊         | 1575/18660 [00:43<07:33, 37.66it/s]
  8%|▊   

[2m[36m(_objective pid=30118)[0m {'loss': 0.3707, 'learning_rate': 1.058706082725651e-05, 'epoch': 0.32}


[2m[36m(_objective pid=30118)[0m  11%|█         | 2004/18660 [00:55<07:21, 37.70it/s]
 11%|█         | 2008/18660 [00:55<07:28, 37.16it/s]
 11%|█         | 2012/18660 [00:55<07:26, 37.26it/s]
 11%|█         | 2016/18660 [00:55<07:23, 37.50it/s]
 11%|█         | 2020/18660 [00:55<07:23, 37.55it/s]
 11%|█         | 2024/18660 [00:55<07:19, 37.86it/s]
 11%|█         | 2028/18660 [00:55<07:19, 37.83it/s]
 11%|█         | 2032/18660 [00:55<07:21, 37.64it/s]
 11%|█         | 2036/18660 [00:56<07:17, 37.96it/s]
 11%|█         | 2040/18660 [00:56<07:23, 37.49it/s]
 11%|█         | 2044/18660 [00:56<07:37, 36.33it/s]
 11%|█         | 2048/18660 [00:56<07:38, 36.19it/s]
 11%|█         | 2052/18660 [00:56<07:35, 36.47it/s]
 11%|█         | 2056/18660 [00:56<07:34, 36.50it/s]
 11%|█         | 2060/18660 [00:56<07:34, 36.56it/s]
 11%|█         | 2064/18660 [00:56<07:37, 36.30it/s]
 11%|█         | 2068/18660 [00:56<07:36, 36.33it/s]
 11%|█         | 2072/18660 [00:57<07:32, 36.65it/s]
 11%|█   

Trial status: 7 TERMINATED | 1 RUNNING | 12 PENDING
Current time: 2023-09-11 14:14:24. Total running time: 54min 3s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00007   RUNNING          1.1858e-05                     3                     

[2m[36m(_objective pid=30118)[0m  13%|█▎        | 2368/18660 [01:05<07:47, 34.81it/s]
 13%|█▎        | 2372/18660 [01:05<07:44, 35.10it/s]
 13%|█▎        | 2376/18660 [01:05<07:37, 35.63it/s]
 13%|█▎        | 2380/18660 [01:05<07:29, 36.20it/s]
 13%|█▎        | 2384/18660 [01:05<07:20, 36.96it/s]
 13%|█▎        | 2388/18660 [01:05<07:31, 36.06it/s]
 13%|█▎        | 2392/18660 [01:05<07:19, 36.99it/s]
 13%|█▎        | 2397/18660 [01:05<07:07, 38.07it/s]
 13%|█▎        | 2401/18660 [01:05<07:02, 38.45it/s]
 13%|█▎        | 2406/18660 [01:06<06:53, 39.28it/s]
 13%|█▎        | 2410/18660 [01:06<06:52, 39.35it/s]
 13%|█▎        | 2414/18660 [01:06<07:13, 37.44it/s]
 13%|█▎        | 2418/18660 [01:06<07:14, 37.38it/s]
 13%|█▎        | 2422/18660 [01:06<07:09, 37.85it/s]
 13%|█▎        | 2426/18660 [01:06<07:07, 38.01it/s]
 13%|█▎        | 2430/18660 [01:06<07:04, 38.24it/s]
 13%|█▎        | 2434/18660 [01:06<07:03, 38.30it/s]
 13%|█▎        | 2438/18660 [01:06<07:06, 38.04it/s]
 13%|█▎  

[2m[36m(_objective pid=30118)[0m {'loss': 0.3682, 'learning_rate': 1.026932190687066e-05, 'epoch': 0.4}


[2m[36m(_objective pid=30118)[0m  13%|█▎        | 2502/18660 [01:08<07:15, 37.08it/s]
 13%|█▎        | 2506/18660 [01:08<07:20, 36.65it/s]
 13%|█▎        | 2510/18660 [01:08<07:21, 36.59it/s]
 13%|█▎        | 2514/18660 [01:08<07:24, 36.30it/s]
 13%|█▎        | 2518/18660 [01:09<07:22, 36.50it/s]
 14%|█▎        | 2522/18660 [01:09<07:22, 36.45it/s]
 14%|█▎        | 2526/18660 [01:09<07:20, 36.60it/s]
 14%|█▎        | 2530/18660 [01:09<07:18, 36.75it/s]
 14%|█▎        | 2534/18660 [01:09<07:15, 37.05it/s]
 14%|█▎        | 2538/18660 [01:09<07:14, 37.12it/s]
 14%|█▎        | 2542/18660 [01:09<07:16, 36.94it/s]
 14%|█▎        | 2546/18660 [01:09<07:23, 36.30it/s]
 14%|█▎        | 2550/18660 [01:09<07:27, 35.96it/s]
 14%|█▎        | 2554/18660 [01:10<07:23, 36.31it/s]
 14%|█▎        | 2558/18660 [01:10<07:27, 35.98it/s]
 14%|█▎        | 2562/18660 [01:10<07:22, 36.35it/s]
 14%|█▍        | 2566/18660 [01:10<07:28, 35.89it/s]
 14%|█▍        | 2570/18660 [01:10<07:29, 35.78it/s]
 14%|█▍  

[2m[36m(_objective pid=30118)[0m {'loss': 0.3274, 'learning_rate': 9.951582986484812e-06, 'epoch': 0.48}


[2m[36m(_objective pid=30118)[0m  16%|█▌        | 3006/18660 [01:22<06:53, 37.87it/s]
 16%|█▌        | 3010/18660 [01:22<06:56, 37.60it/s]
 16%|█▌        | 3014/18660 [01:22<06:50, 38.10it/s]
 16%|█▌        | 3019/18660 [01:23<06:43, 38.78it/s]
 16%|█▌        | 3024/18660 [01:23<06:39, 39.16it/s]
 16%|█▌        | 3028/18660 [01:23<06:38, 39.24it/s]
 16%|█▋        | 3033/18660 [01:23<06:35, 39.52it/s]
 16%|█▋        | 3037/18660 [01:23<06:44, 38.60it/s]
 16%|█▋        | 3041/18660 [01:23<06:45, 38.56it/s]
 16%|█▋        | 3045/18660 [01:23<06:54, 37.72it/s]
 16%|█▋        | 3049/18660 [01:23<06:49, 38.15it/s]
 16%|█▋        | 3053/18660 [01:23<06:47, 38.34it/s]
 16%|█▋        | 3057/18660 [01:24<06:45, 38.43it/s]
 16%|█▋        | 3061/18660 [01:24<06:49, 38.06it/s]
 16%|█▋        | 3065/18660 [01:24<06:54, 37.61it/s]
 16%|█▋        | 3069/18660 [01:24<07:01, 37.01it/s]
 16%|█▋        | 3073/18660 [01:24<07:01, 37.02it/s]
 16%|█▋        | 3077/18660 [01:24<06:56, 37.39it/s]
 17%|█▋  

Trial status: 7 TERMINATED | 1 RUNNING | 12 PENDING
Current time: 2023-09-11 14:14:54. Total running time: 54min 33s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00007   RUNNING          1.1858e-05                     3                    

[2m[36m(_objective pid=30118)[0m  19%|█▊        | 3459/18660 [01:35<06:56, 36.52it/s]
 19%|█▊        | 3463/18660 [01:35<06:56, 36.50it/s]
 19%|█▊        | 3467/18660 [01:35<07:05, 35.73it/s]
 19%|█▊        | 3471/18660 [01:35<07:07, 35.54it/s]
 19%|█▊        | 3475/18660 [01:35<07:26, 34.01it/s]
 19%|█▊        | 3479/18660 [01:35<07:24, 34.12it/s]
 19%|█▊        | 3483/18660 [01:35<07:21, 34.35it/s]
 19%|█▊        | 3487/18660 [01:35<07:23, 34.24it/s]
 19%|█▊        | 3491/18660 [01:36<07:36, 33.26it/s]
 19%|█▊        | 3495/18660 [01:36<07:30, 33.65it/s]
 19%|█▉        | 3500/18660 [01:36<07:22, 34.29it/s]
 19%|█▉        | 3503/18660 [01:36<07:24, 34.11it/s]


[2m[36m(_objective pid=30118)[0m {'loss': 0.3171, 'learning_rate': 9.633844066098963e-06, 'epoch': 0.56}


 19%|█▉        | 3507/18660 [01:36<07:21, 34.36it/s]
 19%|█▉        | 3511/18660 [01:36<07:25, 33.98it/s]
 19%|█▉        | 3515/18660 [01:36<07:28, 33.78it/s]
 19%|█▉        | 3519/18660 [01:36<07:32, 33.50it/s]
 19%|█▉        | 3523/18660 [01:37<07:28, 33.75it/s]
 19%|█▉        | 3527/18660 [01:37<07:26, 33.89it/s]
 19%|█▉        | 3531/18660 [01:37<07:29, 33.64it/s]
 19%|█▉        | 3535/18660 [01:37<07:21, 34.23it/s]
 19%|█▉        | 3539/18660 [01:37<07:17, 34.54it/s]
 19%|█▉        | 3543/18660 [01:37<07:11, 35.01it/s]
 19%|█▉        | 3547/18660 [01:37<07:02, 35.76it/s]
 19%|█▉        | 3551/18660 [01:37<06:58, 36.13it/s]
 19%|█▉        | 3555/18660 [01:37<07:02, 35.79it/s]
 19%|█▉        | 3559/18660 [01:38<07:02, 35.74it/s]
 19%|█▉        | 3563/18660 [01:38<06:59, 36.02it/s]
 19%|█▉        | 3567/18660 [01:38<06:55, 36.31it/s]
 19%|█▉        | 3571/18660 [01:38<06:45, 37.18it/s]
 19%|█▉        | 3575/18660 [01:38<06:47, 37.06it/s]
 19%|█▉        | 3579/18660 [01:38<06:57, 36.1

[2m[36m(_objective pid=30118)[0m {'loss': 0.2775, 'learning_rate': 9.316105145713112e-06, 'epoch': 0.64}


[2m[36m(_objective pid=30118)[0m  21%|██▏       | 4004/18660 [01:49<06:21, 38.44it/s]
 21%|██▏       | 4008/18660 [01:49<06:16, 38.88it/s]
 22%|██▏       | 4012/18660 [01:50<06:30, 37.52it/s]
 22%|██▏       | 4016/18660 [01:50<06:29, 37.61it/s]
 22%|██▏       | 4020/18660 [01:50<06:23, 38.19it/s]
 22%|██▏       | 4024/18660 [01:50<06:24, 38.11it/s]
 22%|██▏       | 4028/18660 [01:50<06:20, 38.41it/s]
 22%|██▏       | 4032/18660 [01:50<06:17, 38.77it/s]
 22%|██▏       | 4036/18660 [01:50<06:27, 37.69it/s]
 22%|██▏       | 4040/18660 [01:50<06:26, 37.87it/s]
 22%|██▏       | 4044/18660 [01:50<06:31, 37.37it/s]
 22%|██▏       | 4048/18660 [01:51<06:32, 37.25it/s]
 22%|██▏       | 4052/18660 [01:51<06:55, 35.15it/s]
 22%|██▏       | 4056/18660 [01:51<06:40, 36.47it/s]
 22%|██▏       | 4061/18660 [01:51<06:24, 37.94it/s]
 22%|██▏       | 4066/18660 [01:51<06:14, 38.95it/s]
 22%|██▏       | 4070/18660 [01:51<06:12, 39.13it/s]
 22%|██▏       | 4074/18660 [01:51<06:11, 39.28it/s]
 22%|██▏ 

[2m[36m(_objective pid=30118)[0m {'loss': 0.2865, 'learning_rate': 8.998366225327261e-06, 'epoch': 0.72}


[2m[36m(_objective pid=30118)[0m  24%|██▍       | 4506/18660 [02:03<06:14, 37.82it/s]
 24%|██▍       | 4510/18660 [02:03<06:08, 38.36it/s]
 24%|██▍       | 4514/18660 [02:03<06:11, 38.09it/s]
 24%|██▍       | 4518/18660 [02:03<06:09, 38.29it/s]
 24%|██▍       | 4522/18660 [02:03<06:07, 38.46it/s]
 24%|██▍       | 4526/18660 [02:03<06:23, 36.82it/s]
 24%|██▍       | 4530/18660 [02:03<06:18, 37.36it/s]
 24%|██▍       | 4534/18660 [02:04<06:15, 37.66it/s]
 24%|██▍       | 4538/18660 [02:04<06:16, 37.51it/s]
 24%|██▍       | 4542/18660 [02:04<06:13, 37.81it/s]
 24%|██▍       | 4547/18660 [02:04<06:05, 38.60it/s]
 24%|██▍       | 4551/18660 [02:04<06:01, 38.98it/s]
 24%|██▍       | 4555/18660 [02:04<06:01, 39.06it/s]
 24%|██▍       | 4559/18660 [02:04<06:04, 38.71it/s]
 24%|██▍       | 4563/18660 [02:04<06:19, 37.19it/s]
 24%|██▍       | 4567/18660 [02:04<06:16, 37.44it/s]


Trial status: 7 TERMINATED | 1 RUNNING | 12 PENDING
Current time: 2023-09-11 14:15:25. Total running time: 55min 3s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00007   RUNNING          1.1858e-05                     3                     

[2m[36m(_objective pid=30118)[0m  24%|██▍       | 4571/18660 [02:05<06:25, 36.58it/s]
 25%|██▍       | 4575/18660 [02:05<06:23, 36.76it/s]
 25%|██▍       | 4579/18660 [02:05<06:17, 37.26it/s]
 25%|██▍       | 4583/18660 [02:05<06:15, 37.44it/s]
 25%|██▍       | 4587/18660 [02:05<06:27, 36.31it/s]
 25%|██▍       | 4591/18660 [02:05<06:34, 35.69it/s]
 25%|██▍       | 4595/18660 [02:05<06:38, 35.32it/s]
 25%|██▍       | 4599/18660 [02:05<06:39, 35.20it/s]
 25%|██▍       | 4603/18660 [02:05<06:35, 35.59it/s]
 25%|██▍       | 4607/18660 [02:06<06:35, 35.53it/s]
 25%|██▍       | 4611/18660 [02:06<06:30, 35.98it/s]
 25%|██▍       | 4615/18660 [02:06<06:30, 35.97it/s]
 25%|██▍       | 4619/18660 [02:06<06:25, 36.42it/s]
 25%|██▍       | 4623/18660 [02:06<06:25, 36.37it/s]
 25%|██▍       | 4627/18660 [02:06<06:20, 36.88it/s]
 25%|██▍       | 4631/18660 [02:06<06:33, 35.63it/s]
 25%|██▍       | 4635/18660 [02:06<06:25, 36.41it/s]
 25%|██▍       | 4639/18660 [02:06<06:22, 36.67it/s]
 25%|██▍ 

[2m[36m(_objective pid=30118)[0m {'loss': 0.2279, 'learning_rate': 8.680627304941414e-06, 'epoch': 0.8}


[2m[36m(_objective pid=30118)[0m  27%|██▋       | 5008/18660 [02:17<06:11, 36.74it/s]
 27%|██▋       | 5012/18660 [02:17<06:30, 34.91it/s]
 27%|██▋       | 5016/18660 [02:17<06:24, 35.48it/s]
 27%|██▋       | 5020/18660 [02:17<06:22, 35.69it/s]
 27%|██▋       | 5024/18660 [02:17<06:22, 35.65it/s]
 27%|██▋       | 5028/18660 [02:17<06:33, 34.61it/s]
 27%|██▋       | 5032/18660 [02:17<06:27, 35.14it/s]
 27%|██▋       | 5036/18660 [02:17<06:20, 35.80it/s]
 27%|██▋       | 5040/18660 [02:17<06:10, 36.78it/s]
 27%|██▋       | 5044/18660 [02:18<06:02, 37.52it/s]
 27%|██▋       | 5049/18660 [02:18<05:54, 38.40it/s]
 27%|██▋       | 5053/18660 [02:18<05:57, 38.09it/s]
 27%|██▋       | 5057/18660 [02:18<05:52, 38.59it/s]
 27%|██▋       | 5061/18660 [02:18<05:55, 38.25it/s]
 27%|██▋       | 5065/18660 [02:18<05:55, 38.24it/s]
 27%|██▋       | 5070/18660 [02:18<05:49, 38.93it/s]
 27%|██▋       | 5074/18660 [02:18<05:46, 39.19it/s]
 27%|██▋       | 5079/18660 [02:18<05:43, 39.57it/s]
 27%|██▋ 

[2m[36m(_objective pid=30118)[0m {'loss': 0.2779, 'learning_rate': 8.362888384555563e-06, 'epoch': 0.88}


[2m[36m(_objective pid=30118)[0m  30%|██▉       | 5509/18660 [02:30<05:33, 39.49it/s]
 30%|██▉       | 5513/18660 [02:30<05:40, 38.64it/s]
 30%|██▉       | 5517/18660 [02:30<05:43, 38.25it/s]
 30%|██▉       | 5521/18660 [02:30<05:42, 38.38it/s]
 30%|██▉       | 5525/18660 [02:30<05:40, 38.61it/s]
 30%|██▉       | 5530/18660 [02:30<05:35, 39.15it/s]
 30%|██▉       | 5535/18660 [02:30<05:32, 39.48it/s]
 30%|██▉       | 5540/18660 [02:31<05:29, 39.82it/s]
 30%|██▉       | 5545/18660 [02:31<05:33, 39.35it/s]
 30%|██▉       | 5550/18660 [02:31<05:28, 39.89it/s]
 30%|██▉       | 5555/18660 [02:31<05:25, 40.24it/s]
 30%|██▉       | 5560/18660 [02:31<05:39, 38.61it/s]
 30%|██▉       | 5564/18660 [02:31<05:39, 38.59it/s]
 30%|██▉       | 5568/18660 [02:31<05:45, 37.87it/s]
 30%|██▉       | 5573/18660 [02:31<05:38, 38.66it/s]
 30%|██▉       | 5577/18660 [02:32<05:42, 38.20it/s]
 30%|██▉       | 5581/18660 [02:32<05:40, 38.47it/s]
 30%|██▉       | 5585/18660 [02:32<05:42, 38.17it/s]
 30%|██▉ 

Trial status: 7 TERMINATED | 1 RUNNING | 12 PENDING
Current time: 2023-09-11 14:15:55. Total running time: 55min 33s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00007   RUNNING          1.1858e-05                     3                    

[2m[36m(_objective pid=30118)[0m  30%|███       | 5691/18660 [02:35<05:40, 38.12it/s]
 31%|███       | 5695/18660 [02:35<05:40, 38.13it/s]
 31%|███       | 5699/18660 [02:35<05:41, 38.00it/s]
 31%|███       | 5703/18660 [02:35<05:41, 37.90it/s]
 31%|███       | 5707/18660 [02:35<05:39, 38.14it/s]
 31%|███       | 5711/18660 [02:35<05:40, 38.01it/s]
 31%|███       | 5715/18660 [02:35<05:40, 38.04it/s]
 31%|███       | 5719/18660 [02:35<05:40, 37.98it/s]
 31%|███       | 5723/18660 [02:35<05:43, 37.63it/s]
 31%|███       | 5727/18660 [02:36<05:39, 38.10it/s]
 31%|███       | 5731/18660 [02:36<05:40, 37.94it/s]
 31%|███       | 5735/18660 [02:36<05:36, 38.37it/s]
 31%|███       | 5739/18660 [02:36<05:34, 38.64it/s]
 31%|███       | 5743/18660 [02:36<05:33, 38.77it/s]
 31%|███       | 5747/18660 [02:36<05:43, 37.54it/s]
 31%|███       | 5751/18660 [02:36<05:46, 37.27it/s]
 31%|███       | 5755/18660 [02:36<05:46, 37.23it/s]
 31%|███       | 5759/18660 [02:36<05:41, 37.75it/s]
 31%|███ 

[2m[36m(_objective pid=30118)[0m {'loss': 0.277, 'learning_rate': 8.045149464169713e-06, 'epoch': 0.96}


[2m[36m(_objective pid=30118)[0m  32%|███▏      | 6008/18660 [02:43<05:33, 37.91it/s]
 32%|███▏      | 6012/18660 [02:43<05:42, 36.95it/s]
 32%|███▏      | 6016/18660 [02:43<05:35, 37.71it/s]
 32%|███▏      | 6020/18660 [02:43<05:31, 38.12it/s]
 32%|███▏      | 6025/18660 [02:44<05:23, 39.08it/s]
 32%|███▏      | 6029/18660 [02:44<05:21, 39.33it/s]
 32%|███▏      | 6034/18660 [02:44<05:16, 39.90it/s]
 32%|███▏      | 6039/18660 [02:44<05:13, 40.29it/s]
 32%|███▏      | 6044/18660 [02:44<05:14, 40.13it/s]
 32%|███▏      | 6049/18660 [02:44<05:14, 40.09it/s]
 32%|███▏      | 6054/18660 [02:44<05:13, 40.17it/s]
 32%|███▏      | 6059/18660 [02:44<05:30, 38.08it/s]
 32%|███▏      | 6063/18660 [02:45<05:29, 38.22it/s]
 33%|███▎      | 6067/18660 [02:45<05:25, 38.64it/s]
 33%|███▎      | 6072/18660 [02:45<05:21, 39.13it/s]
 33%|███▎      | 6077/18660 [02:45<05:17, 39.58it/s]
 33%|███▎      | 6081/18660 [02:45<05:21, 39.14it/s]
 33%|███▎      | 6086/18660 [02:45<05:16, 39.69it/s]
 33%|███▎

Trial _objective_f556c_00007 finished iteration 1 at 2023-09-11 14:16:18. Total running time: 55min 57s
+-------------------------------------------------+
| Trial _objective_f556c_00007 result             |
+-------------------------------------------------+
| time_this_iter_s                        181.007 |
| time_total_s                            181.007 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.26242 |
| eval_runtime                             9.5577 |
| eval_samples_per_second                 433.891 |
| eval_steps_per_second                    13.602 |
| objective                               0.26242 |
+-------------------------------------------------+

[2m[36m(_objective pid=30118)[0m {'eval_loss': 0.2624180018901825, 'eval_runtime': 9.5577, 'eval_samples_per_second': 433.891, 'eval_steps_per_second': 13.602, 'epoch': 1.0}


[2m[36m(_objective pid=30118)[0m                                                     
[2m[36m(_objective pid=30118)[0m                                                  [A 33%|███▎      | 6220/18660 [02:58<05:31, 37.51it/s]
[2m[36m(_objective pid=30118)[0m 100%|██████████| 130/130 [00:09<00:00, 12.00it/s][A
                                                 [A
 33%|███▎      | 6221/18660 [02:59<2:48:34,  1.23it/s]
 33%|███▎      | 6225/18660 [03:00<2:00:40,  1.72it/s]
 33%|███▎      | 6229/18660 [03:00<1:26:32,  2.39it/s]
 33%|███▎      | 6233/18660 [03:00<1:02:22,  3.32it/s]
 33%|███▎      | 6237/18660 [03:00<45:32,  4.55it/s]  
 33%|███▎      | 6241/18660 [03:00<33:44,  6.13it/s]
 33%|███▎      | 6246/18660 [03:00<23:49,  8.68it/s]
 33%|███▎      | 6250/18660 [03:00<18:31, 11.16it/s]
 34%|███▎      | 6254/18660 [03:00<14:40, 14.10it/s]
 34%|███▎      | 6259/18660 [03:00<11:24, 18.11it/s]
 34%|███▎      | 6263/18660 [03:01<09:40, 21.35it/s]
 34%|███▎      | 6267/18660 [0

Trial status: 7 TERMINATED | 1 RUNNING | 12 PENDING
Current time: 2023-09-11 14:16:25. Total running time: 56min 3s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00007   RUNNING          1.1858e-05                     3                     

[2m[36m(_objective pid=30118)[0m  34%|███▍      | 6430/18660 [03:05<05:03, 40.30it/s]
 34%|███▍      | 6435/18660 [03:05<05:06, 39.93it/s]
 35%|███▍      | 6440/18660 [03:05<05:02, 40.38it/s]
 35%|███▍      | 6445/18660 [03:05<05:08, 39.64it/s]
 35%|███▍      | 6450/18660 [03:05<05:06, 39.79it/s]
 35%|███▍      | 6455/18660 [03:05<05:04, 40.09it/s]
 35%|███▍      | 6460/18660 [03:06<05:14, 38.78it/s]
 35%|███▍      | 6465/18660 [03:06<05:13, 38.92it/s]
 35%|███▍      | 6470/18660 [03:06<05:08, 39.53it/s]
 35%|███▍      | 6475/18660 [03:06<05:04, 39.96it/s]
 35%|███▍      | 6480/18660 [03:06<05:02, 40.31it/s]
 35%|███▍      | 6485/18660 [03:06<05:14, 38.69it/s]
 35%|███▍      | 6489/18660 [03:06<05:14, 38.74it/s]
 35%|███▍      | 6493/18660 [03:06<05:19, 38.06it/s]
 35%|███▍      | 6500/18660 [03:07<05:15, 38.52it/s]


[2m[36m(_objective pid=30118)[0m {'loss': 0.2485, 'learning_rate': 7.727410543783863e-06, 'epoch': 1.05}


[2m[36m(_objective pid=30118)[0m  35%|███▍      | 6501/18660 [03:07<05:14, 38.70it/s]
 35%|███▍      | 6506/18660 [03:07<05:10, 39.14it/s]
 35%|███▍      | 6510/18660 [03:07<05:10, 39.16it/s]
 35%|███▍      | 6514/18660 [03:07<05:09, 39.25it/s]
 35%|███▍      | 6519/18660 [03:07<05:04, 39.81it/s]
 35%|███▍      | 6523/18660 [03:07<05:11, 38.97it/s]
 35%|███▍      | 6528/18660 [03:07<05:08, 39.38it/s]
 35%|███▌      | 6532/18660 [03:07<05:12, 38.82it/s]
 35%|███▌      | 6537/18660 [03:08<05:07, 39.36it/s]
 35%|███▌      | 6541/18660 [03:08<05:10, 39.07it/s]
 35%|███▌      | 6546/18660 [03:08<05:08, 39.30it/s]
 35%|███▌      | 6550/18660 [03:08<05:08, 39.21it/s]
 35%|███▌      | 6554/18660 [03:08<05:07, 39.40it/s]
 35%|███▌      | 6558/18660 [03:08<05:20, 37.77it/s]
 35%|███▌      | 6562/18660 [03:08<05:16, 38.19it/s]
 35%|███▌      | 6566/18660 [03:08<05:18, 37.97it/s]
 35%|███▌      | 6570/18660 [03:08<05:18, 38.02it/s]
 35%|███▌      | 6574/18660 [03:09<05:16, 38.15it/s]
 35%|███▌

[2m[36m(_objective pid=30118)[0m {'loss': 0.1534, 'learning_rate': 7.4096716233980145e-06, 'epoch': 1.13}


[2m[36m(_objective pid=30118)[0m  38%|███▊      | 7005/18660 [03:20<04:50, 40.12it/s]
 38%|███▊      | 7010/18660 [03:20<04:48, 40.42it/s]
 38%|███▊      | 7015/18660 [03:20<04:48, 40.30it/s]
 38%|███▊      | 7020/18660 [03:20<04:50, 40.11it/s]
 38%|███▊      | 7025/18660 [03:20<04:54, 39.51it/s]
 38%|███▊      | 7029/18660 [03:20<04:58, 38.91it/s]
 38%|███▊      | 7033/18660 [03:21<04:57, 39.05it/s]
 38%|███▊      | 7038/18660 [03:21<04:55, 39.36it/s]
 38%|███▊      | 7042/18660 [03:21<04:56, 39.20it/s]
 38%|███▊      | 7047/18660 [03:21<05:04, 38.15it/s]
 38%|███▊      | 7052/18660 [03:21<04:58, 38.87it/s]
 38%|███▊      | 7056/18660 [03:21<05:02, 38.36it/s]
 38%|███▊      | 7060/18660 [03:21<05:00, 38.64it/s]
 38%|███▊      | 7065/18660 [03:21<04:55, 39.27it/s]
 38%|███▊      | 7069/18660 [03:21<04:54, 39.37it/s]
 38%|███▊      | 7073/18660 [03:22<04:55, 39.16it/s]
 38%|███▊      | 7077/18660 [03:22<04:54, 39.40it/s]
 38%|███▊      | 7082/18660 [03:22<04:50, 39.89it/s]
 38%|███▊

[2m[36m(_objective pid=30118)[0m {'loss': 0.1657, 'learning_rate': 7.091932703012165e-06, 'epoch': 1.21}


[2m[36m(_objective pid=30118)[0m  40%|████      | 7502/18660 [03:33<04:35, 40.54it/s]
 40%|████      | 7507/18660 [03:33<04:41, 39.69it/s]
 40%|████      | 7512/18660 [03:33<04:39, 39.93it/s]
 40%|████      | 7516/18660 [03:33<04:42, 39.50it/s]
 40%|████      | 7520/18660 [03:33<04:44, 39.18it/s]
 40%|████      | 7524/18660 [03:33<04:49, 38.44it/s]
 40%|████      | 7528/18660 [03:33<04:49, 38.44it/s]
 40%|████      | 7532/18660 [03:34<04:47, 38.67it/s]
 40%|████      | 7537/18660 [03:34<04:41, 39.45it/s]
 40%|████      | 7541/18660 [03:34<04:53, 37.91it/s]
 40%|████      | 7546/18660 [03:34<04:48, 38.50it/s]
 40%|████      | 7550/18660 [03:34<04:48, 38.51it/s]
 40%|████      | 7555/18660 [03:34<04:42, 39.29it/s]
 41%|████      | 7559/18660 [03:34<04:41, 39.38it/s]
 41%|████      | 7563/18660 [03:34<04:43, 39.20it/s]
 41%|████      | 7568/18660 [03:35<04:38, 39.78it/s]
 41%|████      | 7573/18660 [03:35<04:46, 38.66it/s]


Trial status: 7 TERMINATED | 1 RUNNING | 12 PENDING
Current time: 2023-09-11 14:16:55. Total running time: 56min 33s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00007   RUNNING          1.1858e-05                     3                    

[2m[36m(_objective pid=30118)[0m  41%|████      | 7578/18660 [03:35<04:41, 39.43it/s]
 41%|████      | 7582/18660 [03:35<04:51, 38.01it/s]
 41%|████      | 7587/18660 [03:35<04:43, 39.07it/s]
 41%|████      | 7592/18660 [03:35<04:38, 39.75it/s]
 41%|████      | 7597/18660 [03:35<04:34, 40.24it/s]
 41%|████      | 7602/18660 [03:35<04:35, 40.19it/s]
 41%|████      | 7607/18660 [03:35<04:34, 40.27it/s]
 41%|████      | 7612/18660 [03:36<04:34, 40.19it/s]
 41%|████      | 7617/18660 [03:36<04:35, 40.02it/s]
 41%|████      | 7622/18660 [03:36<04:38, 39.65it/s]
 41%|████      | 7626/18660 [03:36<04:38, 39.67it/s]
 41%|████      | 7630/18660 [03:36<04:39, 39.40it/s]
 41%|████      | 7634/18660 [03:36<04:42, 39.01it/s]
 41%|████      | 7638/18660 [03:36<04:41, 39.18it/s]
 41%|████      | 7643/18660 [03:36<04:37, 39.71it/s]
 41%|████      | 7647/18660 [03:37<04:48, 38.14it/s]
 41%|████      | 7652/18660 [03:37<04:40, 39.18it/s]
 41%|████      | 7656/18660 [03:37<04:40, 39.28it/s]
 41%|████

[2m[36m(_objective pid=30118)[0m {'loss': 0.1681, 'learning_rate': 6.774193782626315e-06, 'epoch': 1.29}


[2m[36m(_objective pid=30118)[0m  43%|████▎     | 8005/18660 [03:46<04:59, 35.60it/s]
 43%|████▎     | 8009/18660 [03:46<05:08, 34.57it/s]
 43%|████▎     | 8013/18660 [03:46<05:03, 35.14it/s]
 43%|████▎     | 8017/18660 [03:46<05:07, 34.58it/s]
 43%|████▎     | 8021/18660 [03:46<04:57, 35.76it/s]
 43%|████▎     | 8025/18660 [03:46<04:55, 35.94it/s]
 43%|████▎     | 8029/18660 [03:47<04:52, 36.39it/s]
 43%|████▎     | 8033/18660 [03:47<04:50, 36.53it/s]
 43%|████▎     | 8037/18660 [03:47<04:51, 36.46it/s]
 43%|████▎     | 8041/18660 [03:47<04:47, 36.88it/s]
 43%|████▎     | 8045/18660 [03:47<04:42, 37.53it/s]
 43%|████▎     | 8049/18660 [03:47<04:38, 38.16it/s]
 43%|████▎     | 8053/18660 [03:47<04:35, 38.56it/s]
 43%|████▎     | 8057/18660 [03:47<04:33, 38.83it/s]
 43%|████▎     | 8062/18660 [03:47<04:28, 39.48it/s]
 43%|████▎     | 8067/18660 [03:48<04:27, 39.67it/s]
 43%|████▎     | 8071/18660 [03:48<04:27, 39.57it/s]
 43%|████▎     | 8076/18660 [03:48<04:22, 40.27it/s]
 43%|████

[2m[36m(_objective pid=30118)[0m {'loss': 0.1808, 'learning_rate': 6.456454862240465e-06, 'epoch': 1.37}


[2m[36m(_objective pid=30118)[0m  46%|████▌     | 8506/18660 [03:59<04:17, 39.38it/s]
 46%|████▌     | 8510/18660 [03:59<04:22, 38.73it/s]
 46%|████▌     | 8514/18660 [03:59<04:20, 39.00it/s]
 46%|████▌     | 8518/18660 [03:59<04:23, 38.51it/s]
 46%|████▌     | 8523/18660 [03:59<04:18, 39.16it/s]
 46%|████▌     | 8527/18660 [03:59<04:22, 38.62it/s]
 46%|████▌     | 8531/18660 [03:59<04:24, 38.35it/s]
 46%|████▌     | 8535/18660 [03:59<04:24, 38.33it/s]
 46%|████▌     | 8539/18660 [03:59<04:22, 38.53it/s]
 46%|████▌     | 8543/18660 [04:00<04:23, 38.40it/s]
 46%|████▌     | 8547/18660 [04:00<04:35, 36.65it/s]
 46%|████▌     | 8551/18660 [04:00<04:31, 37.23it/s]
 46%|████▌     | 8556/18660 [04:00<04:24, 38.14it/s]
 46%|████▌     | 8560/18660 [04:00<04:21, 38.63it/s]
 46%|████▌     | 8564/18660 [04:00<04:26, 37.87it/s]
 46%|████▌     | 8568/18660 [04:00<04:27, 37.67it/s]
 46%|████▌     | 8572/18660 [04:00<04:24, 38.10it/s]
 46%|████▌     | 8576/18660 [04:00<04:26, 37.89it/s]
 46%|████

Trial status: 7 TERMINATED | 1 RUNNING | 12 PENDING
Current time: 2023-09-11 14:17:25. Total running time: 57min 3s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00007   RUNNING          1.1858e-05                     3                     

[2m[36m(_objective pid=30118)[0m  47%|████▋     | 8747/18660 [04:05<04:09, 39.76it/s]
 47%|████▋     | 8752/18660 [04:05<04:08, 39.92it/s]
 47%|████▋     | 8757/18660 [04:05<04:07, 39.99it/s]
 47%|████▋     | 8762/18660 [04:05<04:07, 39.98it/s]
 47%|████▋     | 8767/18660 [04:05<04:05, 40.24it/s]
 47%|████▋     | 8772/18660 [04:06<04:11, 39.33it/s]
 47%|████▋     | 8776/18660 [04:06<04:10, 39.40it/s]
 47%|████▋     | 8780/18660 [04:06<04:20, 37.94it/s]
 47%|████▋     | 8785/18660 [04:06<04:15, 38.71it/s]
 47%|████▋     | 8790/18660 [04:06<04:09, 39.56it/s]
 47%|████▋     | 8795/18660 [04:06<04:05, 40.24it/s]
 47%|████▋     | 8800/18660 [04:06<04:10, 39.37it/s]
 47%|████▋     | 8804/18660 [04:06<04:10, 39.32it/s]
 47%|████▋     | 8808/18660 [04:06<04:15, 38.53it/s]
 47%|████▋     | 8813/18660 [04:07<04:10, 39.24it/s]
 47%|████▋     | 8818/18660 [04:07<04:08, 39.66it/s]
 47%|████▋     | 8823/18660 [04:07<04:05, 40.13it/s]
 47%|████▋     | 8828/18660 [04:07<04:03, 40.45it/s]
 47%|████

[2m[36m(_objective pid=30118)[0m {'loss': 0.1356, 'learning_rate': 6.138715941854616e-06, 'epoch': 1.45}


[2m[36m(_objective pid=30118)[0m  48%|████▊     | 9007/18660 [04:11<04:07, 38.94it/s]
 48%|████▊     | 9011/18660 [04:11<04:07, 39.02it/s]
 48%|████▊     | 9015/18660 [04:12<04:07, 38.94it/s]
 48%|████▊     | 9020/18660 [04:12<04:03, 39.53it/s]
 48%|████▊     | 9025/18660 [04:12<04:01, 39.97it/s]
 48%|████▊     | 9029/18660 [04:12<04:04, 39.40it/s]
 48%|████▊     | 9034/18660 [04:12<03:59, 40.13it/s]
 48%|████▊     | 9039/18660 [04:12<04:00, 40.01it/s]
 48%|████▊     | 9043/18660 [04:12<04:01, 39.88it/s]
 48%|████▊     | 9047/18660 [04:12<04:00, 39.90it/s]
 49%|████▊     | 9051/18660 [04:12<04:03, 39.45it/s]
 49%|████▊     | 9055/18660 [04:13<04:03, 39.38it/s]
 49%|████▊     | 9060/18660 [04:13<04:04, 39.34it/s]
 49%|████▊     | 9064/18660 [04:13<04:15, 37.62it/s]
 49%|████▊     | 9068/18660 [04:13<04:11, 38.12it/s]
 49%|████▊     | 9072/18660 [04:13<04:11, 38.17it/s]
 49%|████▊     | 9076/18660 [04:13<04:09, 38.34it/s]
 49%|████▊     | 9081/18660 [04:13<04:04, 39.23it/s]
 49%|████

[2m[36m(_objective pid=30118)[0m {'loss': 0.1773, 'learning_rate': 5.820977021468766e-06, 'epoch': 1.53}


[2m[36m(_objective pid=30118)[0m  51%|█████     | 9511/18660 [04:25<03:49, 39.94it/s]
 51%|█████     | 9516/18660 [04:25<03:47, 40.12it/s]
 51%|█████     | 9521/18660 [04:25<03:48, 40.06it/s]
 51%|█████     | 9526/18660 [04:25<03:47, 40.21it/s]
 51%|█████     | 9531/18660 [04:25<03:44, 40.63it/s]
 51%|█████     | 9536/18660 [04:25<03:43, 40.86it/s]
 51%|█████     | 9541/18660 [04:25<03:51, 39.32it/s]
 51%|█████     | 9546/18660 [04:25<03:48, 39.86it/s]
 51%|█████     | 9551/18660 [04:26<03:48, 39.80it/s]
 51%|█████     | 9555/18660 [04:26<03:48, 39.79it/s]
 51%|█████     | 9559/18660 [04:26<03:49, 39.61it/s]
 51%|█████     | 9563/18660 [04:26<03:51, 39.38it/s]
 51%|█████▏    | 9568/18660 [04:26<03:47, 39.99it/s]
 51%|█████▏    | 9572/18660 [04:26<03:48, 39.86it/s]
 51%|█████▏    | 9576/18660 [04:26<03:48, 39.71it/s]
 51%|█████▏    | 9580/18660 [04:26<03:48, 39.68it/s]
 51%|█████▏    | 9585/18660 [04:26<03:47, 39.87it/s]
 51%|█████▏    | 9589/18660 [04:27<03:47, 39.79it/s]
 51%|████

Trial status: 7 TERMINATED | 1 RUNNING | 12 PENDING
Current time: 2023-09-11 14:17:55. Total running time: 57min 33s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00007   RUNNING          1.1858e-05                     3                    

[2m[36m(_objective pid=30118)[0m  53%|█████▎    | 9907/18660 [04:35<04:11, 34.85it/s]
 53%|█████▎    | 9911/18660 [04:35<04:12, 34.65it/s]
 53%|█████▎    | 9915/18660 [04:35<04:12, 34.65it/s]
 53%|█████▎    | 9919/18660 [04:35<04:16, 34.08it/s]
 53%|█████▎    | 9923/18660 [04:35<04:10, 34.82it/s]
 53%|█████▎    | 9927/18660 [04:35<04:04, 35.67it/s]
 53%|█████▎    | 9931/18660 [04:35<04:02, 36.04it/s]
 53%|█████▎    | 9936/18660 [04:36<03:52, 37.49it/s]
 53%|█████▎    | 9941/18660 [04:36<03:44, 38.76it/s]
 53%|█████▎    | 9945/18660 [04:36<03:53, 37.31it/s]
 53%|█████▎    | 9950/18660 [04:36<03:45, 38.58it/s]
 53%|█████▎    | 9955/18660 [04:36<03:40, 39.49it/s]
 53%|█████▎    | 9960/18660 [04:36<03:37, 40.01it/s]
 53%|█████▎    | 9965/18660 [04:36<03:33, 40.76it/s]
 53%|█████▎    | 9970/18660 [04:36<03:36, 40.16it/s]
 53%|█████▎    | 9975/18660 [04:37<03:53, 37.20it/s]
 53%|█████▎    | 9979/18660 [04:37<03:49, 37.78it/s]
 54%|█████▎    | 9984/18660 [04:37<03:44, 38.69it/s]
 54%|████

[2m[36m(_objective pid=30118)[0m {'loss': 0.1505, 'learning_rate': 5.503238101082916e-06, 'epoch': 1.61}


[2m[36m(_objective pid=30118)[0m  54%|█████▎    | 10007/18660 [04:37<03:32, 40.67it/s]
 54%|█████▎    | 10012/18660 [04:38<03:32, 40.72it/s]
 54%|█████▎    | 10017/18660 [04:38<03:32, 40.72it/s]
 54%|█████▎    | 10022/18660 [04:38<03:31, 40.81it/s]
 54%|█████▎    | 10027/18660 [04:38<03:30, 40.97it/s]
 54%|█████▍    | 10032/18660 [04:38<03:35, 39.95it/s]
 54%|█████▍    | 10037/18660 [04:38<03:33, 40.46it/s]
 54%|█████▍    | 10042/18660 [04:38<03:41, 38.98it/s]
 54%|█████▍    | 10047/18660 [04:38<03:37, 39.58it/s]
 54%|█████▍    | 10052/18660 [04:39<03:35, 39.96it/s]
 54%|█████▍    | 10057/18660 [04:39<03:42, 38.70it/s]
 54%|█████▍    | 10062/18660 [04:39<03:38, 39.28it/s]
 54%|█████▍    | 10066/18660 [04:39<03:38, 39.34it/s]
 54%|█████▍    | 10071/18660 [04:39<03:35, 39.84it/s]
 54%|█████▍    | 10076/18660 [04:39<03:34, 40.02it/s]
 54%|█████▍    | 10081/18660 [04:39<03:34, 40.06it/s]
 54%|█████▍    | 10086/18660 [04:39<03:32, 40.33it/s]
 54%|█████▍    | 10091/18660 [04:40<03:36, 39

[2m[36m(_objective pid=30118)[0m {'loss': 0.1775, 'learning_rate': 5.185499180697066e-06, 'epoch': 1.69}


[2m[36m(_objective pid=30118)[0m  56%|█████▋    | 10506/18660 [04:50<03:48, 35.73it/s]
 56%|█████▋    | 10510/18660 [04:51<03:47, 35.81it/s]
 56%|█████▋    | 10514/18660 [04:51<03:42, 36.62it/s]
 56%|█████▋    | 10518/18660 [04:51<03:42, 36.56it/s]
 56%|█████▋    | 10522/18660 [04:51<03:42, 36.58it/s]
 56%|█████▋    | 10526/18660 [04:51<03:44, 36.20it/s]
 56%|█████▋    | 10530/18660 [04:51<03:42, 36.54it/s]
 56%|█████▋    | 10534/18660 [04:51<03:42, 36.52it/s]
 56%|█████▋    | 10538/18660 [04:51<03:39, 37.05it/s]
 56%|█████▋    | 10542/18660 [04:51<03:39, 37.04it/s]
 57%|█████▋    | 10546/18660 [04:51<03:37, 37.35it/s]
 57%|█████▋    | 10550/18660 [04:52<03:36, 37.42it/s]
 57%|█████▋    | 10554/18660 [04:52<03:38, 37.09it/s]
 57%|█████▋    | 10558/18660 [04:52<03:35, 37.68it/s]
 57%|█████▋    | 10563/18660 [04:52<03:30, 38.40it/s]
 57%|█████▋    | 10567/18660 [04:52<03:28, 38.80it/s]
 57%|█████▋    | 10572/18660 [04:52<03:25, 39.28it/s]
 57%|█████▋    | 10576/18660 [04:52<03:25, 39

[2m[36m(_objective pid=30118)[0m {'loss': 0.1993, 'learning_rate': 4.8677602603112165e-06, 'epoch': 1.77}


[2m[36m(_objective pid=30118)[0m  59%|█████▉    | 11007/18660 [05:03<03:16, 38.93it/s]
 59%|█████▉    | 11011/18660 [05:03<03:19, 38.42it/s]
 59%|█████▉    | 11015/18660 [05:03<03:18, 38.55it/s]
 59%|█████▉    | 11019/18660 [05:03<03:16, 38.86it/s]
 59%|█████▉    | 11024/18660 [05:04<03:14, 39.26it/s]
 59%|█████▉    | 11028/18660 [05:04<03:17, 38.68it/s]
 59%|█████▉    | 11032/18660 [05:04<03:15, 38.95it/s]
 59%|█████▉    | 11036/18660 [05:04<03:18, 38.41it/s]
 59%|█████▉    | 11040/18660 [05:04<03:30, 36.23it/s]
 59%|█████▉    | 11044/18660 [05:04<03:28, 36.58it/s]
 59%|█████▉    | 11049/18660 [05:04<03:21, 37.85it/s]
 59%|█████▉    | 11053/18660 [05:04<03:20, 37.86it/s]
 59%|█████▉    | 11058/18660 [05:04<03:16, 38.67it/s]
 59%|█████▉    | 11062/18660 [05:05<03:15, 38.92it/s]
 59%|█████▉    | 11066/18660 [05:05<03:15, 38.78it/s]
 59%|█████▉    | 11070/18660 [05:05<03:18, 38.32it/s]


Trial status: 7 TERMINATED | 1 RUNNING | 12 PENDING
Current time: 2023-09-11 14:18:25. Total running time: 58min 4s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00007   RUNNING          1.1858e-05                     3                     

[2m[36m(_objective pid=30118)[0m  59%|█████▉    | 11074/18660 [05:05<03:25, 36.96it/s]
 59%|█████▉    | 11078/18660 [05:05<03:22, 37.48it/s]
 59%|█████▉    | 11082/18660 [05:05<03:21, 37.55it/s]
 59%|█████▉    | 11086/18660 [05:05<03:26, 36.62it/s]
 59%|█████▉    | 11090/18660 [05:05<03:24, 37.06it/s]
 59%|█████▉    | 11094/18660 [05:05<03:27, 36.41it/s]
 59%|█████▉    | 11098/18660 [05:06<03:26, 36.66it/s]
 59%|█████▉    | 11102/18660 [05:06<03:28, 36.22it/s]
 60%|█████▉    | 11106/18660 [05:06<03:26, 36.51it/s]
 60%|█████▉    | 11110/18660 [05:06<03:25, 36.72it/s]
 60%|█████▉    | 11114/18660 [05:06<03:34, 35.18it/s]
 60%|█████▉    | 11118/18660 [05:06<03:35, 35.05it/s]
 60%|█████▉    | 11122/18660 [05:06<03:31, 35.69it/s]
 60%|█████▉    | 11126/18660 [05:06<03:32, 35.44it/s]
 60%|█████▉    | 11130/18660 [05:06<03:31, 35.52it/s]
 60%|█████▉    | 11134/18660 [05:07<03:31, 35.65it/s]
 60%|█████▉    | 11138/18660 [05:07<03:31, 35.55it/s]
 60%|█████▉    | 11142/18660 [05:07<03:29, 35

[2m[36m(_objective pid=30118)[0m {'loss': 0.2038, 'learning_rate': 4.5500213399253675e-06, 'epoch': 1.85}


[2m[36m(_objective pid=30118)[0m  62%|██████▏   | 11505/18660 [05:16<03:01, 39.53it/s]
 62%|██████▏   | 11510/18660 [05:16<03:00, 39.72it/s]
 62%|██████▏   | 11515/18660 [05:16<02:58, 40.00it/s]
 62%|██████▏   | 11519/18660 [05:16<03:00, 39.58it/s]
 62%|██████▏   | 11523/18660 [05:16<03:01, 39.39it/s]
 62%|██████▏   | 11527/18660 [05:17<03:01, 39.34it/s]
 62%|██████▏   | 11531/18660 [05:17<03:00, 39.40it/s]
 62%|██████▏   | 11535/18660 [05:17<03:00, 39.52it/s]
 62%|██████▏   | 11540/18660 [05:17<02:58, 39.89it/s]
 62%|██████▏   | 11544/18660 [05:17<03:01, 39.27it/s]
 62%|██████▏   | 11549/18660 [05:17<02:57, 40.03it/s]
 62%|██████▏   | 11554/18660 [05:17<02:57, 40.07it/s]
 62%|██████▏   | 11559/18660 [05:17<02:56, 40.25it/s]
 62%|██████▏   | 11564/18660 [05:17<02:54, 40.56it/s]
 62%|██████▏   | 11569/18660 [05:18<02:55, 40.39it/s]
 62%|██████▏   | 11574/18660 [05:18<02:53, 40.76it/s]
 62%|██████▏   | 11579/18660 [05:18<02:58, 39.71it/s]
 62%|██████▏   | 11583/18660 [05:18<03:00, 39

[2m[36m(_objective pid=30118)[0m {'loss': 0.142, 'learning_rate': 4.232282419539518e-06, 'epoch': 1.93}


[2m[36m(_objective pid=30118)[0m  64%|██████▍   | 12003/18660 [05:29<02:49, 39.39it/s]
 64%|██████▍   | 12008/18660 [05:29<02:47, 39.82it/s]
 64%|██████▍   | 12012/18660 [05:29<02:47, 39.74it/s]
 64%|██████▍   | 12017/18660 [05:29<02:45, 40.03it/s]
 64%|██████▍   | 12022/18660 [05:29<02:44, 40.30it/s]
 64%|██████▍   | 12027/18660 [05:29<02:47, 39.51it/s]
 64%|██████▍   | 12032/18660 [05:30<02:47, 39.67it/s]
 65%|██████▍   | 12037/18660 [05:30<02:46, 39.83it/s]
 65%|██████▍   | 12041/18660 [05:30<02:48, 39.28it/s]
 65%|██████▍   | 12045/18660 [05:30<02:48, 39.34it/s]
 65%|██████▍   | 12050/18660 [05:30<02:46, 39.80it/s]
 65%|██████▍   | 12055/18660 [05:30<02:48, 39.21it/s]
 65%|██████▍   | 12059/18660 [05:30<02:49, 38.97it/s]
 65%|██████▍   | 12064/18660 [05:30<02:48, 39.26it/s]
 65%|██████▍   | 12069/18660 [05:30<02:46, 39.62it/s]
 65%|██████▍   | 12074/18660 [05:31<02:45, 39.91it/s]
 65%|██████▍   | 12079/18660 [05:31<02:43, 40.17it/s]
 65%|██████▍   | 12084/18660 [05:31<02:42, 40

Trial status: 7 TERMINATED | 1 RUNNING | 12 PENDING
Current time: 2023-09-11 14:18:55. Total running time: 58min 34s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00007   RUNNING          1.1858e-05                     3                    

[2m[36m(_objective pid=30118)[0m  66%|██████▌   | 12239/18660 [05:35<02:41, 39.74it/s]
 66%|██████▌   | 12243/18660 [05:35<02:42, 39.41it/s]
 66%|██████▌   | 12247/18660 [05:35<02:42, 39.56it/s]
 66%|██████▌   | 12251/18660 [05:35<02:47, 38.28it/s]
 66%|██████▌   | 12256/18660 [05:35<02:46, 38.53it/s]
 66%|██████▌   | 12260/18660 [05:35<02:45, 38.74it/s]
 66%|██████▌   | 12264/18660 [05:35<02:44, 38.95it/s]
 66%|██████▌   | 12268/18660 [05:36<02:43, 39.06it/s]
 66%|██████▌   | 12272/18660 [05:36<02:50, 37.46it/s]
 66%|██████▌   | 12276/18660 [05:36<02:52, 36.98it/s]
 66%|██████▌   | 12280/18660 [05:36<02:52, 37.07it/s]
 66%|██████▌   | 12284/18660 [05:36<02:55, 36.37it/s]
 66%|██████▌   | 12288/18660 [05:36<02:51, 37.25it/s]
 66%|██████▌   | 12292/18660 [05:36<02:47, 37.96it/s]
 66%|██████▌   | 12297/18660 [05:36<02:43, 38.83it/s]
 66%|██████▌   | 12301/18660 [05:36<02:44, 38.62it/s]
 66%|██████▌   | 12305/18660 [05:37<02:46, 38.17it/s]
 66%|██████▌   | 12309/18660 [05:37<02:45, 38

Trial _objective_f556c_00007 finished iteration 2 at 2023-09-11 14:19:10. Total running time: 58min 48s
+-------------------------------------------------+
| Trial _objective_f556c_00007 result             |
+-------------------------------------------------+
| time_this_iter_s                        171.557 |
| time_total_s                            352.564 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                               0.22955 |
| eval_runtime                             9.5613 |
| eval_samples_per_second                 433.729 |
| eval_steps_per_second                    13.597 |
| objective                               0.22955 |
+-------------------------------------------------+

[2m[36m(_objective pid=30118)[0m {'eval_loss': 0.2295476645231247, 'eval_runtime': 9.5613, 'eval_samples_per_second': 433.729, 'eval_steps_per_second': 13.597, 'epoch': 2.0}


[2m[36m(_objective pid=30118)[0m  67%|██████▋   | 12444/18660 [05:51<1:22:54,  1.25it/s]
 67%|██████▋   | 12448/18660 [05:51<59:45,  1.73it/s]  
 67%|██████▋   | 12452/18660 [05:51<43:05,  2.40it/s]
 67%|██████▋   | 12456/18660 [05:51<31:12,  3.31it/s]
 67%|██████▋   | 12460/18660 [05:52<22:45,  4.54it/s]
 67%|██████▋   | 12464/18660 [05:52<16:49,  6.14it/s]
 67%|██████▋   | 12468/18660 [05:52<12:35,  8.19it/s]
 67%|██████▋   | 12472/18660 [05:52<09:40, 10.65it/s]
 67%|██████▋   | 12476/18660 [05:52<07:38, 13.49it/s]
 67%|██████▋   | 12480/18660 [05:52<06:11, 16.66it/s]
 67%|██████▋   | 12484/18660 [05:52<05:11, 19.80it/s]
 67%|██████▋   | 12488/18660 [05:52<04:32, 22.69it/s]
 67%|██████▋   | 12492/18660 [05:52<04:00, 25.68it/s]
 67%|██████▋   | 12496/18660 [05:53<03:39, 28.13it/s]
 67%|██████▋   | 12500/18660 [05:53<03:23, 30.20it/s]


[2m[36m(_objective pid=30118)[0m {'loss': 0.1866, 'learning_rate': 3.914543499153668e-06, 'epoch': 2.01}


[2m[36m(_objective pid=30118)[0m  67%|██████▋   | 12504/18660 [05:53<03:15, 31.49it/s]
 67%|██████▋   | 12508/18660 [05:53<03:07, 32.77it/s]
 67%|██████▋   | 12512/18660 [05:53<03:02, 33.71it/s]
 67%|██████▋   | 12516/18660 [05:53<02:59, 34.30it/s]
 67%|██████▋   | 12520/18660 [05:53<02:56, 34.88it/s]
 67%|██████▋   | 12524/18660 [05:53<02:54, 35.14it/s]
 67%|██████▋   | 12528/18660 [05:53<02:49, 36.22it/s]
 67%|██████▋   | 12532/18660 [05:54<02:45, 37.03it/s]
 67%|██████▋   | 12536/18660 [05:54<02:49, 36.23it/s]
 67%|██████▋   | 12540/18660 [05:54<02:45, 37.02it/s]
 67%|██████▋   | 12544/18660 [05:54<02:45, 37.06it/s]
 67%|██████▋   | 12548/18660 [05:54<02:43, 37.40it/s]
 67%|██████▋   | 12552/18660 [05:54<02:48, 36.31it/s]
 67%|██████▋   | 12556/18660 [05:54<02:49, 35.93it/s]
 67%|██████▋   | 12560/18660 [05:54<02:51, 35.61it/s]
 67%|██████▋   | 12564/18660 [05:54<02:54, 34.95it/s]
 67%|██████▋   | 12568/18660 [05:55<02:52, 35.23it/s]
 67%|██████▋   | 12572/18660 [05:55<02:49, 35

Trial status: 7 TERMINATED | 1 RUNNING | 12 PENDING
Current time: 2023-09-11 14:19:25. Total running time: 59min 4s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00007   RUNNING          1.1858e-05                     3                     

[2m[36m(_objective pid=30118)[0m  69%|██████▉   | 12968/18660 [06:05<02:34, 36.80it/s]
 70%|██████▉   | 12972/18660 [06:05<02:32, 37.33it/s]
 70%|██████▉   | 12977/18660 [06:05<02:30, 37.85it/s]
 70%|██████▉   | 12981/18660 [06:05<02:31, 37.58it/s]
 70%|██████▉   | 12985/18660 [06:05<02:28, 38.19it/s]
 70%|██████▉   | 12989/18660 [06:05<02:29, 37.96it/s]
 70%|██████▉   | 12993/18660 [06:06<02:31, 37.53it/s]
 70%|██████▉   | 12997/18660 [06:06<02:31, 37.50it/s]
 70%|██████▉   | 13001/18660 [06:06<02:30, 37.64it/s]
 70%|██████▉   | 13005/18660 [06:06<02:38, 35.59it/s]


[2m[36m(_objective pid=30118)[0m {'loss': 0.1198, 'learning_rate': 3.596804578767818e-06, 'epoch': 2.09}


[2m[36m(_objective pid=30118)[0m  70%|██████▉   | 13009/18660 [06:06<02:37, 35.86it/s]
 70%|██████▉   | 13013/18660 [06:06<02:32, 36.95it/s]
 70%|██████▉   | 13017/18660 [06:06<02:32, 37.05it/s]
 70%|██████▉   | 13021/18660 [06:06<02:32, 37.00it/s]
 70%|██████▉   | 13025/18660 [06:06<02:32, 36.90it/s]
 70%|██████▉   | 13029/18660 [06:07<02:33, 36.79it/s]
 70%|██████▉   | 13033/18660 [06:07<02:38, 35.55it/s]
 70%|██████▉   | 13037/18660 [06:07<02:39, 35.18it/s]
 70%|██████▉   | 13041/18660 [06:07<02:35, 36.25it/s]
 70%|██████▉   | 13045/18660 [06:07<02:36, 35.87it/s]
 70%|██████▉   | 13049/18660 [06:07<02:36, 35.78it/s]
 70%|██████▉   | 13053/18660 [06:07<02:32, 36.88it/s]
 70%|██████▉   | 13057/18660 [06:07<02:35, 36.02it/s]
 70%|███████   | 13062/18660 [06:07<02:30, 37.29it/s]
 70%|███████   | 13066/18660 [06:08<02:32, 36.64it/s]
 70%|███████   | 13070/18660 [06:08<02:30, 37.08it/s]
 70%|███████   | 13074/18660 [06:08<02:30, 37.21it/s]
 70%|███████   | 13078/18660 [06:08<02:31, 36

[2m[36m(_objective pid=30118)[0m {'loss': 0.1221, 'learning_rate': 3.2790656583819685e-06, 'epoch': 2.17}


[2m[36m(_objective pid=30118)[0m  72%|███████▏  | 13508/18660 [06:20<02:20, 36.74it/s]
 72%|███████▏  | 13512/18660 [06:20<02:21, 36.36it/s]
 72%|███████▏  | 13516/18660 [06:20<02:19, 36.84it/s]
 72%|███████▏  | 13520/18660 [06:20<02:19, 36.91it/s]
 72%|███████▏  | 13524/18660 [06:20<02:16, 37.75it/s]
 72%|███████▏  | 13528/18660 [06:20<02:18, 37.15it/s]
 73%|███████▎  | 13532/18660 [06:20<02:16, 37.68it/s]
 73%|███████▎  | 13536/18660 [06:20<02:14, 38.21it/s]
 73%|███████▎  | 13540/18660 [06:20<02:18, 36.84it/s]
 73%|███████▎  | 13544/18660 [06:20<02:17, 37.22it/s]
 73%|███████▎  | 13548/18660 [06:21<02:21, 36.08it/s]
 73%|███████▎  | 13552/18660 [06:21<02:20, 36.47it/s]
 73%|███████▎  | 13556/18660 [06:21<02:18, 36.84it/s]
 73%|███████▎  | 13560/18660 [06:21<02:15, 37.67it/s]
 73%|███████▎  | 13564/18660 [06:21<02:12, 38.33it/s]
 73%|███████▎  | 13568/18660 [06:21<02:11, 38.74it/s]
 73%|███████▎  | 13572/18660 [06:21<02:11, 38.69it/s]
 73%|███████▎  | 13576/18660 [06:21<02:12, 38

[2m[36m(_objective pid=30118)[0m {'loss': 0.0904, 'learning_rate': 2.961326737996119e-06, 'epoch': 2.25}


[2m[36m(_objective pid=30118)[0m  75%|███████▌  | 14005/18660 [06:33<02:04, 37.37it/s]
 75%|███████▌  | 14009/18660 [06:33<02:03, 37.51it/s]
 75%|███████▌  | 14013/18660 [06:33<02:04, 37.46it/s]
 75%|███████▌  | 14017/18660 [06:33<02:09, 35.85it/s]
 75%|███████▌  | 14021/18660 [06:33<02:07, 36.48it/s]
 75%|███████▌  | 14025/18660 [06:33<02:05, 36.87it/s]
 75%|███████▌  | 14029/18660 [06:34<02:03, 37.55it/s]
 75%|███████▌  | 14033/18660 [06:34<02:02, 37.73it/s]
 75%|███████▌  | 14038/18660 [06:34<01:59, 38.55it/s]
 75%|███████▌  | 14042/18660 [06:34<01:59, 38.71it/s]
 75%|███████▌  | 14046/18660 [06:34<01:59, 38.56it/s]
 75%|███████▌  | 14050/18660 [06:34<01:58, 38.85it/s]
 75%|███████▌  | 14055/18660 [06:34<01:56, 39.40it/s]
 75%|███████▌  | 14059/18660 [06:34<02:02, 37.43it/s]
 75%|███████▌  | 14063/18660 [06:34<02:04, 37.05it/s]
 75%|███████▌  | 14068/18660 [06:35<02:00, 38.16it/s]
 75%|███████▌  | 14073/18660 [06:35<01:57, 39.14it/s]
 75%|███████▌  | 14078/18660 [06:35<01:55, 39

Trial status: 7 TERMINATED | 1 RUNNING | 12 PENDING
Current time: 2023-09-11 14:19:55. Total running time: 59min 34s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00007   RUNNING          1.1858e-05                     3                    

[2m[36m(_objective pid=30118)[0m  75%|███████▌  | 14083/18660 [06:35<01:55, 39.72it/s]
 75%|███████▌  | 14087/18660 [06:35<01:55, 39.43it/s]
 76%|███████▌  | 14091/18660 [06:35<01:57, 38.73it/s]
 76%|███████▌  | 14095/18660 [06:35<02:00, 38.00it/s]
 76%|███████▌  | 14100/18660 [06:35<01:58, 38.55it/s]
 76%|███████▌  | 14104/18660 [06:36<01:57, 38.78it/s]
 76%|███████▌  | 14108/18660 [06:36<01:56, 38.96it/s]
 76%|███████▌  | 14112/18660 [06:36<01:56, 38.92it/s]
 76%|███████▌  | 14117/18660 [06:36<01:55, 39.40it/s]
 76%|███████▌  | 14122/18660 [06:36<01:54, 39.72it/s]
 76%|███████▌  | 14127/18660 [06:36<01:53, 39.90it/s]
 76%|███████▌  | 14131/18660 [06:36<01:53, 39.90it/s]
 76%|███████▌  | 14136/18660 [06:36<01:53, 39.73it/s]
 76%|███████▌  | 14140/18660 [06:36<01:56, 38.89it/s]
 76%|███████▌  | 14145/18660 [06:37<01:54, 39.43it/s]
 76%|███████▌  | 14150/18660 [06:37<01:54, 39.47it/s]
 76%|███████▌  | 14154/18660 [06:37<01:53, 39.54it/s]
 76%|███████▌  | 14159/18660 [06:37<01:52, 39

[2m[36m(_objective pid=30118)[0m {'loss': 0.1081, 'learning_rate': 2.643587817610269e-06, 'epoch': 2.33}


[2m[36m(_objective pid=30118)[0m  78%|███████▊  | 14508/18660 [06:46<01:50, 37.57it/s]
 78%|███████▊  | 14512/18660 [06:46<01:51, 37.05it/s]
 78%|███████▊  | 14517/18660 [06:47<01:48, 38.13it/s]
 78%|███████▊  | 14521/18660 [06:47<01:48, 38.25it/s]
 78%|███████▊  | 14525/18660 [06:47<01:47, 38.55it/s]
 78%|███████▊  | 14530/18660 [06:47<01:45, 39.23it/s]
 78%|███████▊  | 14535/18660 [06:47<01:45, 39.23it/s]
 78%|███████▊  | 14539/18660 [06:47<01:44, 39.33it/s]
 78%|███████▊  | 14543/18660 [06:47<01:44, 39.30it/s]
 78%|███████▊  | 14547/18660 [06:47<01:44, 39.39it/s]
 78%|███████▊  | 14551/18660 [06:47<01:45, 38.77it/s]
 78%|███████▊  | 14555/18660 [06:48<01:45, 38.78it/s]
 78%|███████▊  | 14559/18660 [06:48<01:46, 38.56it/s]
 78%|███████▊  | 14563/18660 [06:48<01:45, 38.75it/s]
 78%|███████▊  | 14567/18660 [06:48<01:46, 38.53it/s]
 78%|███████▊  | 14571/18660 [06:48<01:47, 38.16it/s]
 78%|███████▊  | 14575/18660 [06:48<01:45, 38.67it/s]
 78%|███████▊  | 14579/18660 [06:48<01:45, 38

[2m[36m(_objective pid=30118)[0m {'loss': 0.0689, 'learning_rate': 2.3258488972244197e-06, 'epoch': 2.41}


 80%|████████  | 15012/18660 [07:00<01:37, 37.34it/s]
 80%|████████  | 15016/18660 [07:00<01:38, 37.00it/s]
 80%|████████  | 15020/18660 [07:00<01:36, 37.61it/s]
 81%|████████  | 15024/18660 [07:00<01:35, 38.03it/s]
 81%|████████  | 15028/18660 [07:00<01:34, 38.44it/s]
 81%|████████  | 15032/18660 [07:00<01:37, 37.08it/s]
 81%|████████  | 15036/18660 [07:01<01:36, 37.46it/s]
 81%|████████  | 15041/18660 [07:01<01:33, 38.58it/s]
 81%|████████  | 15045/18660 [07:01<01:34, 38.05it/s]
 81%|████████  | 15049/18660 [07:01<01:34, 38.06it/s]
 81%|████████  | 15053/18660 [07:01<01:33, 38.51it/s]
 81%|████████  | 15058/18660 [07:01<01:31, 39.36it/s]
 81%|████████  | 15062/18660 [07:01<01:31, 39.41it/s]
 81%|████████  | 15067/18660 [07:01<01:33, 38.22it/s]
 81%|████████  | 15071/18660 [07:01<01:34, 37.86it/s]
 81%|████████  | 15075/18660 [07:02<01:37, 36.89it/s]
 81%|████████  | 15080/18660 [07:02<01:33, 38.12it/s]
 81%|████████  | 15085/18660 [07:02<01:32, 38.85it/s]
 81%|████████  | 15089/18660

Trial status: 7 TERMINATED | 1 RUNNING | 12 PENDING
Current time: 2023-09-11 14:20:25. Total running time: 1hr 0min 4s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00007   RUNNING          1.1858e-05                     3                  

[2m[36m(_objective pid=30118)[0m  82%|████████▏ | 15208/18660 [07:05<01:28, 38.86it/s]
 82%|████████▏ | 15212/18660 [07:05<01:30, 38.09it/s]
 82%|████████▏ | 15216/18660 [07:05<01:31, 37.75it/s]
 82%|████████▏ | 15221/18660 [07:05<01:28, 38.77it/s]
 82%|████████▏ | 15226/18660 [07:05<01:27, 39.36it/s]
 82%|████████▏ | 15230/18660 [07:06<01:28, 38.82it/s]
 82%|████████▏ | 15234/18660 [07:06<01:27, 39.03it/s]
 82%|████████▏ | 15238/18660 [07:06<01:29, 38.23it/s]
 82%|████████▏ | 15242/18660 [07:06<01:29, 38.20it/s]
 82%|████████▏ | 15246/18660 [07:06<01:29, 37.98it/s]
 82%|████████▏ | 15251/18660 [07:06<01:27, 38.80it/s]
 82%|████████▏ | 15256/18660 [07:06<01:26, 39.19it/s]
 82%|████████▏ | 15260/18660 [07:06<01:26, 39.32it/s]
 82%|████████▏ | 15264/18660 [07:06<01:27, 38.88it/s]
 82%|████████▏ | 15268/18660 [07:07<01:27, 38.77it/s]
 82%|████████▏ | 15272/18660 [07:07<01:29, 37.91it/s]
 82%|████████▏ | 15276/18660 [07:07<01:30, 37.51it/s]
 82%|████████▏ | 15280/18660 [07:07<01:29, 37

[2m[36m(_objective pid=30118)[0m {'loss': 0.0834, 'learning_rate': 2.00810997683857e-06, 'epoch': 2.49}


[2m[36m(_objective pid=30118)[0m  83%|████████▎ | 15505/18660 [07:13<01:23, 37.87it/s]
 83%|████████▎ | 15509/18660 [07:13<01:23, 37.85it/s]
 83%|████████▎ | 15513/18660 [07:13<01:22, 38.11it/s]
 83%|████████▎ | 15517/18660 [07:13<01:21, 38.64it/s]
 83%|████████▎ | 15522/18660 [07:13<01:20, 38.89it/s]
 83%|████████▎ | 15526/18660 [07:13<01:20, 38.92it/s]
 83%|████████▎ | 15530/18660 [07:13<01:20, 38.77it/s]
 83%|████████▎ | 15534/18660 [07:13<01:24, 36.93it/s]
 83%|████████▎ | 15538/18660 [07:14<01:23, 37.51it/s]
 83%|████████▎ | 15542/18660 [07:14<01:24, 36.74it/s]
 83%|████████▎ | 15546/18660 [07:14<01:24, 36.80it/s]
 83%|████████▎ | 15550/18660 [07:14<01:24, 36.70it/s]
 83%|████████▎ | 15554/18660 [07:14<01:23, 37.01it/s]
 83%|████████▎ | 15558/18660 [07:14<01:22, 37.58it/s]
 83%|████████▎ | 15562/18660 [07:14<01:21, 37.86it/s]
 83%|████████▎ | 15566/18660 [07:14<01:21, 37.75it/s]
 83%|████████▎ | 15570/18660 [07:14<01:21, 37.86it/s]
 83%|████████▎ | 15574/18660 [07:15<01:23, 36

[2m[36m(_objective pid=30118)[0m {'loss': 0.1218, 'learning_rate': 1.6903710564527202e-06, 'epoch': 2.57}


[2m[36m(_objective pid=30118)[0m  86%|████████▌ | 16005/18660 [07:26<01:09, 37.97it/s]
 86%|████████▌ | 16009/18660 [07:26<01:09, 38.20it/s]
 86%|████████▌ | 16013/18660 [07:26<01:10, 37.73it/s]
 86%|████████▌ | 16017/18660 [07:26<01:09, 38.13it/s]
 86%|████████▌ | 16021/18660 [07:26<01:08, 38.61it/s]
 86%|████████▌ | 16025/18660 [07:26<01:11, 36.96it/s]
 86%|████████▌ | 16030/18660 [07:27<01:09, 37.79it/s]
 86%|████████▌ | 16034/18660 [07:27<01:12, 36.07it/s]
 86%|████████▌ | 16038/18660 [07:27<01:16, 34.28it/s]
 86%|████████▌ | 16042/18660 [07:27<01:17, 33.84it/s]
 86%|████████▌ | 16046/18660 [07:27<01:17, 33.58it/s]
 86%|████████▌ | 16050/18660 [07:27<01:16, 34.24it/s]
 86%|████████▌ | 16054/18660 [07:27<01:16, 34.25it/s]
 86%|████████▌ | 16058/18660 [07:27<01:12, 35.69it/s]
 86%|████████▌ | 16062/18660 [07:28<01:11, 36.42it/s]
 86%|████████▌ | 16066/18660 [07:28<01:10, 36.75it/s]
 86%|████████▌ | 16070/18660 [07:28<01:10, 36.82it/s]
 86%|████████▌ | 16074/18660 [07:28<01:11, 36

Trial status: 7 TERMINATED | 1 RUNNING | 12 PENDING
Current time: 2023-09-11 14:20:55. Total running time: 1hr 0min 34s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00007   RUNNING          1.1858e-05                     3                 

[2m[36m(_objective pid=30118)[0m  88%|████████▊ | 16339/18660 [07:35<00:59, 38.99it/s]
 88%|████████▊ | 16343/18660 [07:35<00:59, 39.20it/s]
 88%|████████▊ | 16347/18660 [07:35<00:58, 39.22it/s]
 88%|████████▊ | 16351/18660 [07:35<00:59, 38.72it/s]
 88%|████████▊ | 16355/18660 [07:35<01:02, 37.09it/s]
 88%|████████▊ | 16359/18660 [07:35<01:01, 37.20it/s]
 88%|████████▊ | 16363/18660 [07:36<01:01, 37.65it/s]
 88%|████████▊ | 16367/18660 [07:36<01:00, 37.67it/s]
 88%|████████▊ | 16371/18660 [07:36<01:00, 37.69it/s]
 88%|████████▊ | 16375/18660 [07:36<01:00, 37.46it/s]
 88%|████████▊ | 16380/18660 [07:36<00:59, 38.53it/s]
 88%|████████▊ | 16384/18660 [07:36<00:58, 38.58it/s]
 88%|████████▊ | 16388/18660 [07:36<00:58, 38.88it/s]
 88%|████████▊ | 16392/18660 [07:36<01:01, 36.95it/s]
 88%|████████▊ | 16397/18660 [07:36<00:59, 38.18it/s]
 88%|████████▊ | 16402/18660 [07:37<00:58, 38.88it/s]
 88%|████████▊ | 16407/18660 [07:37<00:57, 39.39it/s]
 88%|████████▊ | 16411/18660 [07:37<00:57, 39

[2m[36m(_objective pid=30118)[0m {'loss': 0.0953, 'learning_rate': 1.3726321360668704e-06, 'epoch': 2.65}


[2m[36m(_objective pid=30118)[0m  88%|████████▊ | 16508/18660 [07:39<00:54, 39.19it/s]
 88%|████████▊ | 16513/18660 [07:39<00:54, 39.47it/s]
 89%|████████▊ | 16518/18660 [07:40<00:53, 39.68it/s]
 89%|████████▊ | 16522/18660 [07:40<00:54, 39.53it/s]
 89%|████████▊ | 16526/18660 [07:40<00:53, 39.58it/s]
 89%|████████▊ | 16530/18660 [07:40<00:54, 39.27it/s]
 89%|████████▊ | 16535/18660 [07:40<00:53, 39.56it/s]
 89%|████████▊ | 16539/18660 [07:40<00:53, 39.33it/s]
 89%|████████▊ | 16543/18660 [07:40<00:56, 37.53it/s]
 89%|████████▊ | 16547/18660 [07:40<00:57, 36.90it/s]
 89%|████████▊ | 16551/18660 [07:40<00:57, 36.89it/s]
 89%|████████▊ | 16555/18660 [07:41<00:55, 37.73it/s]
 89%|████████▊ | 16559/18660 [07:41<00:55, 38.20it/s]
 89%|████████▉ | 16564/18660 [07:41<00:53, 38.91it/s]
 89%|████████▉ | 16568/18660 [07:41<00:53, 38.99it/s]
 89%|████████▉ | 16572/18660 [07:41<00:53, 39.10it/s]
 89%|████████▉ | 16576/18660 [07:41<00:53, 38.70it/s]
 89%|████████▉ | 16580/18660 [07:41<00:53, 38

[2m[36m(_objective pid=30118)[0m {'loss': 0.0816, 'learning_rate': 1.0548932156810207e-06, 'epoch': 2.73}


[2m[36m(_objective pid=30118)[0m  91%|█████████ | 17007/18660 [07:52<00:41, 39.71it/s]
 91%|█████████ | 17011/18660 [07:52<00:42, 39.20it/s]
 91%|█████████ | 17016/18660 [07:53<00:41, 39.82it/s]
 91%|█████████ | 17020/18660 [07:53<00:41, 39.74it/s]
 91%|█████████ | 17024/18660 [07:53<00:41, 39.65it/s]
 91%|█████████▏| 17028/18660 [07:53<00:41, 38.98it/s]
 91%|█████████▏| 17032/18660 [07:53<00:42, 38.58it/s]
 91%|█████████▏| 17036/18660 [07:53<00:42, 37.89it/s]
 91%|█████████▏| 17040/18660 [07:53<00:42, 37.75it/s]
 91%|█████████▏| 17044/18660 [07:53<00:42, 37.79it/s]
 91%|█████████▏| 17048/18660 [07:53<00:43, 36.93it/s]
 91%|█████████▏| 17052/18660 [07:54<00:43, 37.16it/s]
 91%|█████████▏| 17056/18660 [07:54<00:42, 37.43it/s]
 91%|█████████▏| 17060/18660 [07:54<00:42, 37.84it/s]
 91%|█████████▏| 17064/18660 [07:54<00:41, 38.34it/s]
 91%|█████████▏| 17068/18660 [07:54<00:41, 38.69it/s]
 91%|█████████▏| 17073/18660 [07:54<00:40, 39.10it/s]
 92%|█████████▏| 17077/18660 [07:54<00:40, 38

Trial status: 7 TERMINATED | 1 RUNNING | 12 PENDING
Current time: 2023-09-11 14:21:25. Total running time: 1hr 1min 4s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00007   RUNNING          1.1858e-05                     3                  

[2m[36m(_objective pid=30118)[0m  94%|█████████▎| 17485/18660 [08:05<00:31, 37.74it/s]
 94%|█████████▎| 17489/18660 [08:05<00:30, 38.17it/s]
 94%|█████████▍| 17494/18660 [08:05<00:29, 38.97it/s]
 94%|█████████▍| 17498/18660 [08:05<00:30, 38.14it/s]
 94%|█████████▍| 17503/18660 [08:05<00:29, 39.08it/s]
 94%|█████████▍| 17507/18660 [08:05<00:30, 38.16it/s]


[2m[36m(_objective pid=30118)[0m {'loss': 0.134, 'learning_rate': 7.371542952951712e-07, 'epoch': 2.81}


[2m[36m(_objective pid=30118)[0m  94%|█████████▍| 17511/18660 [08:06<00:31, 36.82it/s]
 94%|█████████▍| 17515/18660 [08:06<00:30, 37.05it/s]
 94%|█████████▍| 17519/18660 [08:06<00:30, 37.79it/s]
 94%|█████████▍| 17523/18660 [08:06<00:30, 36.70it/s]
 94%|█████████▍| 17528/18660 [08:06<00:29, 38.03it/s]
 94%|█████████▍| 17532/18660 [08:06<00:29, 38.42it/s]
 94%|█████████▍| 17537/18660 [08:06<00:30, 37.42it/s]
 94%|█████████▍| 17541/18660 [08:06<00:29, 37.99it/s]
 94%|█████████▍| 17545/18660 [08:06<00:28, 38.48it/s]
 94%|█████████▍| 17549/18660 [08:07<00:28, 38.88it/s]
 94%|█████████▍| 17553/18660 [08:07<00:28, 39.09it/s]
 94%|█████████▍| 17557/18660 [08:07<00:28, 38.24it/s]
 94%|█████████▍| 17562/18660 [08:07<00:28, 39.17it/s]
 94%|█████████▍| 17566/18660 [08:07<00:28, 38.91it/s]
 94%|█████████▍| 17571/18660 [08:07<00:27, 39.29it/s]
 94%|█████████▍| 17575/18660 [08:07<00:27, 38.75it/s]
 94%|█████████▍| 17579/18660 [08:07<00:27, 38.87it/s]
 94%|█████████▍| 17583/18660 [08:07<00:27, 38

[2m[36m(_objective pid=30118)[0m {'loss': 0.113, 'learning_rate': 4.1941537490932156e-07, 'epoch': 2.89}


[2m[36m(_objective pid=30118)[0m  96%|█████████▋| 18006/18660 [08:19<00:16, 40.82it/s]
 97%|█████████▋| 18011/18660 [08:19<00:16, 38.61it/s]
 97%|█████████▋| 18016/18660 [08:19<00:16, 39.44it/s]
 97%|█████████▋| 18020/18660 [08:19<00:16, 38.16it/s]
 97%|█████████▋| 18025/18660 [08:19<00:16, 38.68it/s]
 97%|█████████▋| 18029/18660 [08:19<00:16, 38.61it/s]
 97%|█████████▋| 18034/18660 [08:19<00:15, 39.44it/s]
 97%|█████████▋| 18038/18660 [08:19<00:16, 38.67it/s]
 97%|█████████▋| 18042/18660 [08:19<00:16, 38.06it/s]
 97%|█████████▋| 18046/18660 [08:20<00:16, 38.03it/s]
 97%|█████████▋| 18050/18660 [08:20<00:15, 38.21it/s]
 97%|█████████▋| 18054/18660 [08:20<00:16, 37.57it/s]
 97%|█████████▋| 18058/18660 [08:20<00:15, 37.84it/s]
 97%|█████████▋| 18062/18660 [08:20<00:15, 37.59it/s]
 97%|█████████▋| 18066/18660 [08:20<00:15, 38.14it/s]
 97%|█████████▋| 18070/18660 [08:20<00:15, 38.29it/s]
 97%|█████████▋| 18074/18660 [08:20<00:15, 38.77it/s]
 97%|█████████▋| 18078/18660 [08:20<00:15, 37

[2m[36m(_objective pid=30118)[0m {'loss': 0.1359, 'learning_rate': 1.0167645452347189e-07, 'epoch': 2.97}


[2m[36m(_objective pid=30118)[0m  99%|█████████▉| 18503/18660 [08:31<00:04, 38.68it/s]
 99%|█████████▉| 18507/18660 [08:31<00:03, 38.60it/s]
 99%|█████████▉| 18511/18660 [08:32<00:03, 38.17it/s]
 99%|█████████▉| 18515/18660 [08:32<00:03, 36.53it/s]
 99%|█████████▉| 18519/18660 [08:32<00:03, 36.91it/s]
 99%|█████████▉| 18523/18660 [08:32<00:03, 37.40it/s]
 99%|█████████▉| 18527/18660 [08:32<00:03, 37.45it/s]
 99%|█████████▉| 18531/18660 [08:32<00:03, 37.06it/s]
 99%|█████████▉| 18535/18660 [08:32<00:03, 37.34it/s]
 99%|█████████▉| 18539/18660 [08:32<00:03, 36.59it/s]
 99%|█████████▉| 18543/18660 [08:32<00:03, 35.37it/s]
 99%|█████████▉| 18547/18660 [08:33<00:03, 35.45it/s]
 99%|█████████▉| 18551/18660 [08:33<00:03, 35.40it/s]
 99%|█████████▉| 18555/18660 [08:33<00:03, 34.51it/s]
 99%|█████████▉| 18559/18660 [08:33<00:02, 35.24it/s]
 99%|█████████▉| 18563/18660 [08:33<00:02, 36.23it/s]
100%|█████████▉| 18567/18660 [08:33<00:02, 36.89it/s]
100%|█████████▉| 18571/18660 [08:33<00:02, 37

Trial status: 7 TERMINATED | 1 RUNNING | 12 PENDING
Current time: 2023-09-11 14:21:55. Total running time: 1hr 1min 34s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00007   RUNNING          1.1858e-05                     3                 

[2m[36m(_objective pid=30118)[0m 100%|█████████▉| 18640/18660 [08:35<00:00, 37.53it/s]
100%|█████████▉| 18644/18660 [08:35<00:00, 38.02it/s]
100%|█████████▉| 18648/18660 [08:35<00:00, 37.76it/s]
100%|█████████▉| 18652/18660 [08:35<00:00, 37.08it/s]
100%|█████████▉| 18656/18660 [08:36<00:00, 37.63it/s]
100%|██████████| 18660/18660 [08:36<00:00, 37.49it/s]
  0%|          | 0/130 [00:00<?, ?it/s][A
[2m[36m(_objective pid=30118)[0m 
  2%|▏         | 2/130 [00:00<00:08, 14.40it/s][A
[2m[36m(_objective pid=30118)[0m 
  4%|▍         | 5/130 [00:00<00:06, 20.64it/s][A
[2m[36m(_objective pid=30118)[0m 
  6%|▌         | 8/130 [00:00<00:05, 21.21it/s][A
[2m[36m(_objective pid=30118)[0m 
  8%|▊         | 11/130 [00:00<00:08, 13.84it/s][A
[2m[36m(_objective pid=30118)[0m 
 10%|█         | 13/130 [00:00<00:08, 13.47it/s][A
[2m[36m(_objective pid=30118)[0m 
 12%|█▏        | 15/130 [00:01<00:09, 12.33it/s][A
[2m[36m(_objective pid=30118)[0m 
 13%|█▎        | 17/130 [00:

Trial _objective_f556c_00007 finished iteration 3 at 2023-09-11 14:22:05. Total running time: 1hr 1min 44s
+-------------------------------------------------+
| Trial _objective_f556c_00007 result             |
+-------------------------------------------------+
| time_this_iter_s                        175.478 |
| time_total_s                            528.043 |
| training_iteration                            3 |
| epoch                                         3 |
| eval_loss                                0.2581 |
| eval_runtime                             9.5714 |
| eval_samples_per_second                 433.271 |
| eval_steps_per_second                    13.582 |
| objective                                0.2581 |
+-------------------------------------------------+

[2m[36m(_objective pid=30118)[0m {'eval_loss': 0.25809532403945923, 'eval_runtime': 9.5714, 'eval_samples_per_second': 433.271, 'eval_steps_per_second': 13.582, 'epoch': 3.0}


[2m[36m(_objective pid=30118)[0m                                                      
[2m[36m(_objective pid=30118)[0m                                                  [A100%|██████████| 18660/18660 [08:45<00:00, 37.49it/s]
[2m[36m(_objective pid=30118)[0m 100%|██████████| 130/130 [00:09<00:00, 11.99it/s][A
[2m[36m(_objective pid=30118)[0m                                                  [A


Trial _objective_f556c_00007 completed after 3 iterations at 2023-09-11 14:22:07. Total running time: 1hr 1min 45s

[2m[36m(_objective pid=30118)[0m {'train_runtime': 527.1747, 'train_samples_per_second': 70.787, 'train_steps_per_second': 35.396, 'train_loss': 0.20581244757959952, 'epoch': 3.0}


[2m[36m(_objective pid=30118)[0m                                                      100%|██████████| 18660/18660 [08:47<00:00, 37.49it/s]100%|██████████| 18660/18660 [08:47<00:00, 35.40it/s]


Trial _objective_f556c_00008 started with configuration:
+-------------------------------------------------+
| Trial _objective_f556c_00008 config             |
+-------------------------------------------------+
| adam_epsilon                                  0 |
| learning_rate                             5e-05 |
| num_train_epochs                              4 |
| per_device_eval_batch_size                   32 |
| per_device_train_batch_size                   4 |
| weight_decay                            0.24252 |
+-------------------------------------------------+



[2m[36m(_objective pid=32421)[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight']
[2m[36m(_objective pid=32421)[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
[2m[36m(_objective pid=32421)[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[2m[36m(_objective pid=32421)[0m Some weights of DistilBertForSequenceClassification were not initialized from the model che

Trial status: 8 TERMINATED | 1 RUNNING | 11 PENDING
Current time: 2023-09-11 14:22:25. Total running time: 1hr 2min 4s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00008   RUNNING          4.79554e-05                    4                  

[2m[36m(_objective pid=32421)[0m   2%|▏         | 298/12440 [00:08<06:02, 33.51it/s]
  2%|▏         | 302/12440 [00:09<05:51, 34.55it/s]
  2%|▏         | 306/12440 [00:09<07:15, 27.84it/s]
  2%|▏         | 310/12440 [00:09<06:40, 30.28it/s]
  3%|▎         | 314/12440 [00:09<06:11, 32.61it/s]
  3%|▎         | 318/12440 [00:09<06:08, 32.86it/s]
  3%|▎         | 322/12440 [00:09<05:57, 33.92it/s]
  3%|▎         | 326/12440 [00:09<06:37, 30.51it/s]
  3%|▎         | 330/12440 [00:09<06:20, 31.82it/s]
  3%|▎         | 334/12440 [00:10<06:03, 33.28it/s]
  3%|▎         | 338/12440 [00:10<06:00, 33.56it/s]
  3%|▎         | 342/12440 [00:10<06:19, 31.89it/s]
  3%|▎         | 346/12440 [00:10<06:34, 30.67it/s]
  3%|▎         | 350/12440 [00:10<06:29, 31.03it/s]
  3%|▎         | 355/12440 [00:10<05:58, 33.72it/s]
  3%|▎         | 359/12440 [00:10<05:50, 34.44it/s]
  3%|▎         | 363/12440 [00:10<05:46, 34.85it/s]
  3%|▎         | 367/12440 [00:11<06:39, 30.25it/s]
  3%|▎         | 371/12440 

[2m[36m(_objective pid=32421)[0m {'loss': 0.4099, 'learning_rate': 4.6027952780722826e-05, 'epoch': 0.16}


[2m[36m(_objective pid=32421)[0m   4%|▍         | 506/12440 [00:15<06:00, 33.09it/s]
  4%|▍         | 510/12440 [00:15<06:02, 32.92it/s]
  4%|▍         | 514/12440 [00:15<05:48, 34.22it/s]
  4%|▍         | 518/12440 [00:15<05:45, 34.55it/s]
  4%|▍         | 523/12440 [00:15<05:28, 36.32it/s]
  4%|▍         | 527/12440 [00:15<05:20, 37.12it/s]
  4%|▍         | 531/12440 [00:15<05:17, 37.51it/s]
  4%|▍         | 535/12440 [00:15<05:38, 35.13it/s]
  4%|▍         | 539/12440 [00:16<05:56, 33.34it/s]
  4%|▍         | 544/12440 [00:16<05:33, 35.66it/s]
  4%|▍         | 548/12440 [00:16<05:24, 36.59it/s]
  4%|▍         | 552/12440 [00:16<05:17, 37.46it/s]
  4%|▍         | 556/12440 [00:16<05:27, 36.29it/s]
  5%|▍         | 560/12440 [00:16<05:19, 37.19it/s]
  5%|▍         | 564/12440 [00:16<05:16, 37.49it/s]
  5%|▍         | 568/12440 [00:16<05:12, 37.95it/s]
  5%|▍         | 572/12440 [00:16<05:19, 37.18it/s]
  5%|▍         | 577/12440 [00:17<05:10, 38.23it/s]
  5%|▍         | 581/12440 

[2m[36m(_objective pid=32421)[0m {'loss': 0.3507, 'learning_rate': 4.4100484071312326e-05, 'epoch': 0.32}


[2m[36m(_objective pid=32421)[0m   8%|▊         | 1008/12440 [00:29<04:57, 38.42it/s]
  8%|▊         | 1012/12440 [00:29<05:00, 38.01it/s]
  8%|▊         | 1017/12440 [00:29<04:52, 39.08it/s]
  8%|▊         | 1022/12440 [00:29<05:02, 37.71it/s]
  8%|▊         | 1027/12440 [00:30<04:56, 38.55it/s]
  8%|▊         | 1032/12440 [00:30<04:53, 38.84it/s]
  8%|▊         | 1036/12440 [00:30<04:59, 38.07it/s]
  8%|▊         | 1040/12440 [00:30<04:57, 38.29it/s]
  8%|▊         | 1044/12440 [00:30<04:59, 38.10it/s]
  8%|▊         | 1048/12440 [00:30<04:58, 38.18it/s]
  8%|▊         | 1052/12440 [00:30<05:16, 36.01it/s]
  8%|▊         | 1056/12440 [00:30<05:09, 36.73it/s]
  9%|▊         | 1060/12440 [00:30<05:10, 36.60it/s]
  9%|▊         | 1064/12440 [00:31<05:11, 36.50it/s]
  9%|▊         | 1069/12440 [00:31<05:01, 37.70it/s]
  9%|▊         | 1073/12440 [00:31<04:57, 38.23it/s]
  9%|▊         | 1077/12440 [00:31<04:57, 38.15it/s]
  9%|▊         | 1081/12440 [00:31<05:20, 35.41it/s]
  9%|▊   

Trial status: 8 TERMINATED | 1 RUNNING | 11 PENDING
Current time: 2023-09-11 14:22:55. Total running time: 1hr 2min 34s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00008   RUNNING          4.79554e-05                    4                 

[2m[36m(_objective pid=32421)[0m  11%|█         | 1341/12440 [00:38<05:41, 32.50it/s]
 11%|█         | 1345/12440 [00:38<05:27, 33.90it/s]
 11%|█         | 1349/12440 [00:39<05:44, 32.23it/s]
 11%|█         | 1353/12440 [00:39<05:39, 32.70it/s]
 11%|█         | 1357/12440 [00:39<05:42, 32.39it/s]
 11%|█         | 1361/12440 [00:39<05:37, 32.86it/s]
 11%|█         | 1365/12440 [00:39<05:22, 34.29it/s]
 11%|█         | 1369/12440 [00:39<05:21, 34.40it/s]
 11%|█         | 1373/12440 [00:39<05:18, 34.76it/s]
 11%|█         | 1377/12440 [00:39<05:12, 35.36it/s]
 11%|█         | 1381/12440 [00:40<05:17, 34.82it/s]
 11%|█         | 1385/12440 [00:40<05:22, 34.27it/s]
 11%|█         | 1389/12440 [00:40<05:13, 35.26it/s]
 11%|█         | 1393/12440 [00:40<05:42, 32.23it/s]
 11%|█         | 1397/12440 [00:40<06:05, 30.19it/s]
 11%|█▏        | 1401/12440 [00:40<06:24, 28.71it/s]
 11%|█▏        | 1404/12440 [00:40<06:40, 27.54it/s]
 11%|█▏        | 1408/12440 [00:40<06:18, 29.15it/s]
 11%|█▏  

[2m[36m(_objective pid=32421)[0m {'loss': 0.2863, 'learning_rate': 4.217301536190182e-05, 'epoch': 0.48}


[2m[36m(_objective pid=32421)[0m  12%|█▏        | 1505/12440 [00:43<05:08, 35.43it/s]
 12%|█▏        | 1509/12440 [00:43<05:07, 35.57it/s]
 12%|█▏        | 1513/12440 [00:44<05:05, 35.72it/s]
 12%|█▏        | 1517/12440 [00:44<05:09, 35.25it/s]
 12%|█▏        | 1521/12440 [00:44<04:59, 36.49it/s]
 12%|█▏        | 1525/12440 [00:44<05:15, 34.64it/s]
 12%|█▏        | 1529/12440 [00:44<05:04, 35.80it/s]
 12%|█▏        | 1533/12440 [00:44<05:01, 36.13it/s]
 12%|█▏        | 1537/12440 [00:44<04:58, 36.56it/s]
 12%|█▏        | 1541/12440 [00:44<05:28, 33.15it/s]
 12%|█▏        | 1546/12440 [00:45<05:09, 35.22it/s]
 12%|█▏        | 1550/12440 [00:45<05:09, 35.19it/s]
 12%|█▏        | 1554/12440 [00:45<05:14, 34.63it/s]
 13%|█▎        | 1558/12440 [00:45<06:08, 29.51it/s]
 13%|█▎        | 1562/12440 [00:45<05:41, 31.86it/s]
 13%|█▎        | 1566/12440 [00:45<05:20, 33.90it/s]
 13%|█▎        | 1570/12440 [00:45<05:25, 33.43it/s]
 13%|█▎        | 1575/12440 [00:45<05:06, 35.51it/s]
 13%|█▎  

[2m[36m(_objective pid=32421)[0m {'loss': 0.2415, 'learning_rate': 4.0245546652491325e-05, 'epoch': 0.64}


[2m[36m(_objective pid=32421)[0m  16%|█▌        | 2006/12440 [00:58<05:01, 34.64it/s]
 16%|█▌        | 2010/12440 [00:58<04:54, 35.40it/s]
 16%|█▌        | 2014/12440 [00:58<04:51, 35.73it/s]
 16%|█▌        | 2018/12440 [00:58<05:15, 33.04it/s]
 16%|█▋        | 2022/12440 [00:58<05:19, 32.63it/s]
 16%|█▋        | 2026/12440 [00:59<05:46, 30.03it/s]
 16%|█▋        | 2030/12440 [00:59<05:26, 31.86it/s]
 16%|█▋        | 2034/12440 [00:59<05:15, 33.03it/s]
 16%|█▋        | 2038/12440 [00:59<05:10, 33.51it/s]
 16%|█▋        | 2042/12440 [00:59<05:43, 30.31it/s]
 16%|█▋        | 2046/12440 [00:59<05:26, 31.88it/s]
 16%|█▋        | 2050/12440 [00:59<05:18, 32.66it/s]
 17%|█▋        | 2054/12440 [00:59<05:41, 30.37it/s]
 17%|█▋        | 2058/12440 [01:00<05:42, 30.28it/s]
 17%|█▋        | 2062/12440 [01:00<05:33, 31.09it/s]
 17%|█▋        | 2066/12440 [01:00<05:19, 32.47it/s]
 17%|█▋        | 2070/12440 [01:00<05:07, 33.68it/s]
 17%|█▋        | 2074/12440 [01:00<05:00, 34.50it/s]
 17%|█▋  

Trial status: 8 TERMINATED | 1 RUNNING | 11 PENDING
Current time: 2023-09-11 14:23:25. Total running time: 1hr 3min 4s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00008   RUNNING          4.79554e-05                    4                  

[2m[36m(_objective pid=32421)[0m  19%|█▉        | 2361/12440 [01:08<04:30, 37.25it/s]
 19%|█▉        | 2365/12440 [01:08<04:29, 37.43it/s]
 19%|█▉        | 2369/12440 [01:08<04:47, 34.98it/s]
 19%|█▉        | 2373/12440 [01:09<04:53, 34.32it/s]
 19%|█▉        | 2377/12440 [01:09<04:58, 33.66it/s]
 19%|█▉        | 2381/12440 [01:09<05:32, 30.29it/s]
 19%|█▉        | 2385/12440 [01:09<05:08, 32.55it/s]
 19%|█▉        | 2389/12440 [01:09<05:04, 33.01it/s]
 19%|█▉        | 2393/12440 [01:09<04:53, 34.23it/s]
 19%|█▉        | 2397/12440 [01:09<04:55, 34.04it/s]
 19%|█▉        | 2401/12440 [01:09<04:50, 34.55it/s]
 19%|█▉        | 2406/12440 [01:10<04:36, 36.34it/s]
 19%|█▉        | 2410/12440 [01:10<04:42, 35.47it/s]
 19%|█▉        | 2414/12440 [01:10<04:34, 36.55it/s]
 19%|█▉        | 2419/12440 [01:10<04:24, 37.82it/s]
 19%|█▉        | 2424/12440 [01:10<04:18, 38.75it/s]
 20%|█▉        | 2428/12440 [01:10<04:20, 38.49it/s]
 20%|█▉        | 2433/12440 [01:10<04:16, 39.05it/s]
 20%|█▉  

[2m[36m(_objective pid=32421)[0m {'loss': 0.2194, 'learning_rate': 3.831807794308082e-05, 'epoch': 0.8}


[2m[36m(_objective pid=32421)[0m  20%|██        | 2505/12440 [01:12<05:02, 32.81it/s]
 20%|██        | 2509/12440 [01:12<05:02, 32.87it/s]
 20%|██        | 2513/12440 [01:13<05:26, 30.39it/s]
 20%|██        | 2517/12440 [01:13<05:15, 31.42it/s]
 20%|██        | 2521/12440 [01:13<05:01, 32.89it/s]
 20%|██        | 2525/12440 [01:13<04:49, 34.24it/s]
 20%|██        | 2529/12440 [01:13<05:06, 32.38it/s]
 20%|██        | 2533/12440 [01:13<05:02, 32.73it/s]
 20%|██        | 2537/12440 [01:13<04:46, 34.53it/s]
 20%|██        | 2541/12440 [01:13<04:39, 35.46it/s]
 20%|██        | 2545/12440 [01:14<04:37, 35.72it/s]
 20%|██        | 2549/12440 [01:14<04:43, 34.84it/s]
 21%|██        | 2553/12440 [01:14<04:51, 33.94it/s]
 21%|██        | 2557/12440 [01:14<04:50, 34.00it/s]
 21%|██        | 2561/12440 [01:14<04:50, 34.03it/s]
 21%|██        | 2565/12440 [01:14<04:56, 33.28it/s]
 21%|██        | 2569/12440 [01:14<05:10, 31.79it/s]
 21%|██        | 2573/12440 [01:14<05:00, 32.87it/s]
 21%|██  

[2m[36m(_objective pid=32421)[0m {'loss': 0.2357, 'learning_rate': 3.639060923367031e-05, 'epoch': 0.96}


[2m[36m(_objective pid=32421)[0m  24%|██▍       | 3006/12440 [01:27<05:12, 30.23it/s]
 24%|██▍       | 3010/12440 [01:27<04:51, 32.31it/s]
 24%|██▍       | 3014/12440 [01:27<04:41, 33.52it/s]
 24%|██▍       | 3018/12440 [01:27<04:41, 33.51it/s]
 24%|██▍       | 3022/12440 [01:28<04:31, 34.72it/s]
 24%|██▍       | 3026/12440 [01:28<04:21, 36.02it/s]
 24%|██▍       | 3030/12440 [01:28<04:42, 33.34it/s]
 24%|██▍       | 3034/12440 [01:28<04:29, 34.88it/s]
 24%|██▍       | 3038/12440 [01:28<04:25, 35.44it/s]
 24%|██▍       | 3042/12440 [01:28<04:30, 34.72it/s]
 24%|██▍       | 3047/12440 [01:28<04:21, 35.87it/s]
 25%|██▍       | 3051/12440 [01:28<04:17, 36.42it/s]
 25%|██▍       | 3055/12440 [01:28<04:20, 36.07it/s]
 25%|██▍       | 3059/12440 [01:29<04:18, 36.25it/s]
 25%|██▍       | 3063/12440 [01:29<04:20, 36.06it/s]
 25%|██▍       | 3067/12440 [01:29<04:19, 36.06it/s]
 25%|██▍       | 3071/12440 [01:29<04:21, 35.85it/s]
 25%|██▍       | 3075/12440 [01:29<04:14, 36.87it/s]
 25%|██▍ 

Trial status: 8 TERMINATED | 1 RUNNING | 11 PENDING
Current time: 2023-09-11 14:23:55. Total running time: 1hr 3min 34s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00008   RUNNING          4.79554e-05                    4                 

[2m[36m(_objective pid=32421)[0m 
 89%|████████▉ | 116/130 [00:08<00:01, 12.41it/s][A
[2m[36m(_objective pid=32421)[0m 
 91%|█████████ | 118/130 [00:08<00:00, 13.12it/s][A
[2m[36m(_objective pid=32421)[0m 
 92%|█████████▏| 120/130 [00:08<00:00, 14.27it/s][A
[2m[36m(_objective pid=32421)[0m 
 94%|█████████▍| 122/130 [00:08<00:00, 13.65it/s][A
[2m[36m(_objective pid=32421)[0m 
 95%|█████████▌| 124/130 [00:08<00:00, 13.17it/s][A
[2m[36m(_objective pid=32421)[0m 
 98%|█████████▊| 127/130 [00:09<00:00, 13.12it/s][A
[2m[36m(_objective pid=32421)[0m 
 99%|█████████▉| 129/130 [00:09<00:00, 11.95it/s][A


Trial _objective_f556c_00008 finished iteration 1 at 2023-09-11 14:23:56. Total running time: 1hr 3min 35s
+-------------------------------------------------+
| Trial _objective_f556c_00008 result             |
+-------------------------------------------------+
| time_this_iter_s                        102.766 |
| time_total_s                            102.766 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.17219 |
| eval_runtime                             9.5925 |
| eval_samples_per_second                 432.315 |
| eval_steps_per_second                    13.552 |
| objective                               0.17219 |
+-------------------------------------------------+

[2m[36m(_objective pid=32421)[0m {'eval_loss': 0.17218557000160217, 'eval_runtime': 9.5925, 'eval_samples_per_second': 432.315, 'eval_steps_per_second': 13.552, 'epoch': 1.0}


[2m[36m(_objective pid=32421)[0m                                                     
[2m[36m(_objective pid=32421)[0m                                                  [A 25%|██▌       | 3110/12440 [01:40<04:32, 34.27it/s]
[2m[36m(_objective pid=32421)[0m 100%|██████████| 130/130 [00:09<00:00, 11.95it/s][A
                                                 [A
 25%|██▌       | 3111/12440 [01:41<2:11:23,  1.18it/s]
 25%|██▌       | 3115/12440 [01:41<1:33:20,  1.66it/s]
 25%|██▌       | 3119/12440 [01:41<1:07:32,  2.30it/s]
 25%|██▌       | 3123/12440 [01:41<48:30,  3.20it/s]  
 25%|██▌       | 3127/12440 [01:41<35:16,  4.40it/s]
 25%|██▌       | 3131/12440 [01:42<25:53,  5.99it/s]
 25%|██▌       | 3135/12440 [01:42<19:22,  8.00it/s]
 25%|██▌       | 3139/12440 [01:42<15:00, 10.33it/s]
 25%|██▌       | 3143/12440 [01:42<11:40, 13.28it/s]
 25%|██▌       | 3147/12440 [01:42<09:20, 16.57it/s]
 25%|██▌       | 3151/12440 [01:42<07:43, 20.05it/s]
 25%|██▌       | 3155/12440 [01:

[2m[36m(_objective pid=32421)[0m {'loss': 0.1672, 'learning_rate': 3.446314052425981e-05, 'epoch': 1.13}


[2m[36m(_objective pid=32421)[0m  28%|██▊       | 3503/12440 [01:53<04:06, 36.19it/s]
 28%|██▊       | 3507/12440 [01:53<04:03, 36.75it/s]
 28%|██▊       | 3511/12440 [01:53<04:10, 35.61it/s]
 28%|██▊       | 3515/12440 [01:53<04:16, 34.80it/s]
 28%|██▊       | 3519/12440 [01:53<04:12, 35.28it/s]
 28%|██▊       | 3523/12440 [01:53<04:07, 36.04it/s]
 28%|██▊       | 3527/12440 [01:53<04:30, 32.93it/s]
 28%|██▊       | 3531/12440 [01:53<04:36, 32.20it/s]
 28%|██▊       | 3535/12440 [01:54<04:23, 33.76it/s]
 28%|██▊       | 3539/12440 [01:54<04:33, 32.60it/s]
 28%|██▊       | 3543/12440 [01:54<04:21, 33.99it/s]
 29%|██▊       | 3547/12440 [01:54<04:11, 35.30it/s]
 29%|██▊       | 3551/12440 [01:54<04:02, 36.58it/s]
 29%|██▊       | 3555/12440 [01:54<03:58, 37.33it/s]
 29%|██▊       | 3559/12440 [01:54<03:55, 37.75it/s]
 29%|██▊       | 3563/12440 [01:54<04:09, 35.61it/s]
 29%|██▊       | 3567/12440 [01:54<04:20, 34.04it/s]
 29%|██▊       | 3571/12440 [01:55<04:43, 31.27it/s]
 29%|██▊ 

[2m[36m(_objective pid=32421)[0m {'loss': 0.1299, 'learning_rate': 3.25356718148493e-05, 'epoch': 1.29}


[2m[36m(_objective pid=32421)[0m  32%|███▏      | 4007/12440 [02:08<04:24, 31.91it/s]
 32%|███▏      | 4011/12440 [02:08<04:40, 30.03it/s]
 32%|███▏      | 4015/12440 [02:08<04:27, 31.55it/s]
 32%|███▏      | 4019/12440 [02:08<04:15, 33.01it/s]
 32%|███▏      | 4023/12440 [02:08<04:05, 34.33it/s]
 32%|███▏      | 4027/12440 [02:08<04:02, 34.63it/s]


Trial status: 8 TERMINATED | 1 RUNNING | 11 PENDING
Current time: 2023-09-11 14:24:25. Total running time: 1hr 4min 4s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00008   RUNNING          4.79554e-05                    4                  

[2m[36m(_objective pid=32421)[0m  32%|███▏      | 4031/12440 [02:08<03:55, 35.73it/s]
 32%|███▏      | 4035/12440 [02:08<03:55, 35.75it/s]
 32%|███▏      | 4039/12440 [02:09<03:50, 36.49it/s]
 32%|███▎      | 4043/12440 [02:09<03:49, 36.65it/s]
 33%|███▎      | 4047/12440 [02:09<03:44, 37.40it/s]
 33%|███▎      | 4051/12440 [02:09<03:42, 37.65it/s]
 33%|███▎      | 4055/12440 [02:09<03:43, 37.58it/s]
 33%|███▎      | 4059/12440 [02:09<03:44, 37.41it/s]
 33%|███▎      | 4063/12440 [02:09<03:40, 37.96it/s]
 33%|███▎      | 4067/12440 [02:09<03:40, 37.91it/s]
 33%|███▎      | 4071/12440 [02:09<03:38, 38.26it/s]
 33%|███▎      | 4075/12440 [02:10<03:42, 37.52it/s]
 33%|███▎      | 4079/12440 [02:10<03:46, 36.86it/s]
 33%|███▎      | 4083/12440 [02:10<03:49, 36.41it/s]
 33%|███▎      | 4087/12440 [02:10<04:10, 33.33it/s]
 33%|███▎      | 4091/12440 [02:10<04:01, 34.61it/s]
 33%|███▎      | 4095/12440 [02:10<03:58, 34.96it/s]
 33%|███▎      | 4099/12440 [02:10<04:05, 33.97it/s]
 33%|███▎

[2m[36m(_objective pid=32421)[0m {'loss': 0.1399, 'learning_rate': 3.0608203105438794e-05, 'epoch': 1.45}


 36%|███▋      | 4510/12440 [02:22<03:54, 33.83it/s]
 36%|███▋      | 4514/12440 [02:22<03:51, 34.30it/s]
 36%|███▋      | 4518/12440 [02:22<03:43, 35.48it/s]
 36%|███▋      | 4522/12440 [02:22<03:43, 35.49it/s]
 36%|███▋      | 4526/12440 [02:22<03:41, 35.68it/s]
 36%|███▋      | 4530/12440 [02:23<03:40, 35.89it/s]
 36%|███▋      | 4534/12440 [02:23<04:06, 32.10it/s]
 36%|███▋      | 4538/12440 [02:23<04:11, 31.48it/s]
 37%|███▋      | 4542/12440 [02:23<04:00, 32.88it/s]
 37%|███▋      | 4546/12440 [02:23<04:07, 31.93it/s]
 37%|███▋      | 4550/12440 [02:23<03:56, 33.35it/s]
 37%|███▋      | 4554/12440 [02:23<03:46, 34.82it/s]
 37%|███▋      | 4558/12440 [02:23<04:09, 31.62it/s]
 37%|███▋      | 4562/12440 [02:24<04:02, 32.43it/s]
 37%|███▋      | 4566/12440 [02:24<03:56, 33.32it/s]
 37%|███▋      | 4570/12440 [02:24<03:47, 34.53it/s]
 37%|███▋      | 4574/12440 [02:24<03:53, 33.66it/s]
 37%|███▋      | 4578/12440 [02:24<04:11, 31.21it/s]
 37%|███▋      | 4582/12440 [02:24<04:18, 30.3

[2m[36m(_objective pid=32421)[0m {'loss': 0.1502, 'learning_rate': 2.8680734396028297e-05, 'epoch': 1.61}


[2m[36m(_objective pid=32421)[0m  40%|████      | 5006/12440 [02:37<03:40, 33.64it/s]
 40%|████      | 5010/12440 [02:37<03:36, 34.31it/s]
 40%|████      | 5014/12440 [02:37<03:33, 34.74it/s]
 40%|████      | 5018/12440 [02:37<03:46, 32.82it/s]
 40%|████      | 5022/12440 [02:37<04:00, 30.89it/s]
 40%|████      | 5026/12440 [02:37<03:46, 32.71it/s]
 40%|████      | 5030/12440 [02:38<04:01, 30.68it/s]
 40%|████      | 5034/12440 [02:38<03:46, 32.63it/s]
 40%|████      | 5038/12440 [02:38<03:37, 34.01it/s]
 41%|████      | 5042/12440 [02:38<03:32, 34.80it/s]
 41%|████      | 5046/12440 [02:38<03:39, 33.76it/s]
 41%|████      | 5050/12440 [02:38<04:01, 30.61it/s]
 41%|████      | 5054/12440 [02:38<03:54, 31.49it/s]


Trial status: 8 TERMINATED | 1 RUNNING | 11 PENDING
Current time: 2023-09-11 14:24:55. Total running time: 1hr 4min 34s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00008   RUNNING          4.79554e-05                    4                 

[2m[36m(_objective pid=32421)[0m  41%|████      | 5058/12440 [02:38<04:11, 29.32it/s]
 41%|████      | 5062/12440 [02:39<03:52, 31.68it/s]
 41%|████      | 5066/12440 [02:39<03:47, 32.41it/s]
 41%|████      | 5070/12440 [02:39<03:38, 33.66it/s]
 41%|████      | 5074/12440 [02:39<03:29, 35.10it/s]
 41%|████      | 5078/12440 [02:39<03:37, 33.86it/s]
 41%|████      | 5082/12440 [02:39<03:32, 34.58it/s]
 41%|████      | 5086/12440 [02:39<03:48, 32.25it/s]
 41%|████      | 5090/12440 [02:39<03:55, 31.15it/s]
 41%|████      | 5094/12440 [02:39<04:00, 30.54it/s]
 41%|████      | 5098/12440 [02:40<03:55, 31.22it/s]
 41%|████      | 5102/12440 [02:40<04:04, 30.03it/s]
 41%|████      | 5106/12440 [02:40<04:07, 29.65it/s]
 41%|████      | 5109/12440 [02:40<04:13, 28.86it/s]
 41%|████      | 5113/12440 [02:40<03:54, 31.21it/s]
 41%|████      | 5117/12440 [02:40<03:50, 31.79it/s]
 41%|████      | 5121/12440 [02:40<03:37, 33.63it/s]
 41%|████      | 5125/12440 [02:40<03:40, 33.20it/s]
 41%|████

[2m[36m(_objective pid=32421)[0m {'loss': 0.1746, 'learning_rate': 2.675326568661779e-05, 'epoch': 1.77}


[2m[36m(_objective pid=32421)[0m  44%|████▍     | 5504/12440 [02:51<03:13, 35.78it/s]
 44%|████▍     | 5508/12440 [02:51<03:18, 34.87it/s]
 44%|████▍     | 5512/12440 [02:52<03:21, 34.41it/s]
 44%|████▍     | 5516/12440 [02:52<03:15, 35.34it/s]
 44%|████▍     | 5520/12440 [02:52<03:30, 32.94it/s]
 44%|████▍     | 5524/12440 [02:52<03:25, 33.66it/s]
 44%|████▍     | 5528/12440 [02:52<03:23, 33.90it/s]
 44%|████▍     | 5532/12440 [02:52<03:27, 33.32it/s]
 45%|████▍     | 5536/12440 [02:52<03:22, 34.02it/s]
 45%|████▍     | 5540/12440 [02:52<03:32, 32.47it/s]
 45%|████▍     | 5544/12440 [02:53<03:22, 34.14it/s]
 45%|████▍     | 5548/12440 [02:53<03:20, 34.36it/s]
 45%|████▍     | 5552/12440 [02:53<03:19, 34.57it/s]
 45%|████▍     | 5556/12440 [02:53<03:37, 31.72it/s]
 45%|████▍     | 5560/12440 [02:53<03:31, 32.51it/s]
 45%|████▍     | 5564/12440 [02:53<03:30, 32.73it/s]
 45%|████▍     | 5568/12440 [02:53<03:23, 33.81it/s]
 45%|████▍     | 5572/12440 [02:53<03:26, 33.24it/s]
 45%|████

[2m[36m(_objective pid=32421)[0m {'loss': 0.136, 'learning_rate': 2.482579697720729e-05, 'epoch': 1.93}


[2m[36m(_objective pid=32421)[0m  48%|████▊     | 6006/12440 [03:06<02:56, 36.53it/s]
 48%|████▊     | 6010/12440 [03:06<02:53, 37.12it/s]
 48%|████▊     | 6014/12440 [03:06<03:03, 34.98it/s]
 48%|████▊     | 6018/12440 [03:07<03:04, 34.83it/s]
 48%|████▊     | 6022/12440 [03:07<03:01, 35.39it/s]
 48%|████▊     | 6026/12440 [03:07<02:58, 35.93it/s]
 48%|████▊     | 6030/12440 [03:07<03:09, 33.85it/s]
 49%|████▊     | 6034/12440 [03:07<03:06, 34.32it/s]
 49%|████▊     | 6038/12440 [03:07<03:00, 35.44it/s]
 49%|████▊     | 6042/12440 [03:07<03:01, 35.28it/s]
 49%|████▊     | 6046/12440 [03:07<03:03, 34.85it/s]
 49%|████▊     | 6050/12440 [03:08<03:23, 31.35it/s]
 49%|████▊     | 6054/12440 [03:08<03:37, 29.38it/s]
 49%|████▊     | 6058/12440 [03:08<03:28, 30.63it/s]
 49%|████▊     | 6062/12440 [03:08<03:20, 31.74it/s]
 49%|████▉     | 6066/12440 [03:08<03:12, 33.13it/s]
 49%|████▉     | 6070/12440 [03:08<03:12, 33.13it/s]
 49%|████▉     | 6074/12440 [03:08<03:12, 33.15it/s]


Trial status: 8 TERMINATED | 1 RUNNING | 11 PENDING
Current time: 2023-09-11 14:25:25. Total running time: 1hr 5min 4s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00008   RUNNING          4.79554e-05                    4                  

[2m[36m(_objective pid=32421)[0m  49%|████▉     | 6078/12440 [03:08<03:26, 30.87it/s]
 49%|████▉     | 6082/12440 [03:09<03:16, 32.35it/s]
 49%|████▉     | 6086/12440 [03:09<03:29, 30.35it/s]
 49%|████▉     | 6090/12440 [03:09<03:57, 26.72it/s]
 49%|████▉     | 6093/12440 [03:09<04:11, 25.26it/s]
 49%|████▉     | 6097/12440 [03:09<03:47, 27.94it/s]
 49%|████▉     | 6101/12440 [03:09<03:35, 29.41it/s]
 49%|████▉     | 6105/12440 [03:09<03:20, 31.59it/s]
 49%|████▉     | 6109/12440 [03:09<03:08, 33.60it/s]
 49%|████▉     | 6113/12440 [03:10<03:02, 34.67it/s]
 49%|████▉     | 6117/12440 [03:10<02:59, 35.31it/s]
 49%|████▉     | 6121/12440 [03:10<02:53, 36.48it/s]
 49%|████▉     | 6125/12440 [03:10<02:49, 37.25it/s]
 49%|████▉     | 6129/12440 [03:10<02:46, 38.01it/s]
 49%|████▉     | 6133/12440 [03:10<02:44, 38.27it/s]
 49%|████▉     | 6137/12440 [03:10<02:56, 35.74it/s]
 49%|████▉     | 6141/12440 [03:10<02:56, 35.77it/s]
 49%|████▉     | 6145/12440 [03:10<03:00, 34.92it/s]
 49%|████

Trial _objective_f556c_00008 finished iteration 2 at 2023-09-11 14:25:39. Total running time: 1hr 5min 18s
+-------------------------------------------------+
| Trial _objective_f556c_00008 result             |
+-------------------------------------------------+
| time_this_iter_s                        102.556 |
| time_total_s                            205.322 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                               0.17398 |
| eval_runtime                             9.5905 |
| eval_samples_per_second                 432.407 |
| eval_steps_per_second                    13.555 |
| objective                               0.17398 |
+-------------------------------------------------+

[2m[36m(_objective pid=32421)[0m {'eval_loss': 0.17398300766944885, 'eval_runtime': 9.5905, 'eval_samples_per_second': 432.407, 'eval_steps_per_second': 13.555, 'epoch': 2.0}


[2m[36m(_objective pid=32421)[0m                                                     
[2m[36m(_objective pid=32421)[0m                                                  [A 50%|█████     | 6220/12440 [03:22<02:57, 34.98it/s]
[2m[36m(_objective pid=32421)[0m 100%|██████████| 130/130 [00:09<00:00, 11.96it/s][A
                                                 [A
 50%|█████     | 6223/12440 [03:24<1:25:55,  1.21it/s]
 50%|█████     | 6227/12440 [03:24<1:01:19,  1.69it/s]
 50%|█████     | 6231/12440 [03:24<43:58,  2.35it/s]  
 50%|█████     | 6235/12440 [03:24<31:44,  3.26it/s]
 50%|█████     | 6239/12440 [03:24<23:19,  4.43it/s]
 50%|█████     | 6243/12440 [03:24<17:30,  5.90it/s]
 50%|█████     | 6247/12440 [03:24<13:10,  7.84it/s]
 50%|█████     | 6251/12440 [03:24<10:12, 10.10it/s]
 50%|█████     | 6255/12440 [03:25<07:59, 12.90it/s]
 50%|█████     | 6259/12440 [03:25<06:26, 15.98it/s]
 50%|█████     | 6263/12440 [03:25<05:25, 18.98it/s]
 50%|█████     | 6267/12440 [03:25

[2m[36m(_objective pid=32421)[0m {'loss': 0.1317, 'learning_rate': 2.2898328267796785e-05, 'epoch': 2.09}


[2m[36m(_objective pid=32421)[0m  52%|█████▏    | 6507/12440 [03:32<02:52, 34.39it/s]
 52%|█████▏    | 6511/12440 [03:32<02:51, 34.66it/s]
 52%|█████▏    | 6515/12440 [03:32<03:03, 32.26it/s]
 52%|█████▏    | 6519/12440 [03:32<02:55, 33.78it/s]
 52%|█████▏    | 6523/12440 [03:32<03:08, 31.32it/s]
 52%|█████▏    | 6527/12440 [03:33<03:06, 31.75it/s]
 52%|█████▎    | 6531/12440 [03:33<03:17, 29.87it/s]
 53%|█████▎    | 6535/12440 [03:33<03:05, 31.80it/s]
 53%|█████▎    | 6539/12440 [03:33<02:59, 32.93it/s]
 53%|█████▎    | 6543/12440 [03:33<02:52, 34.28it/s]
 53%|█████▎    | 6547/12440 [03:33<02:47, 35.23it/s]
 53%|█████▎    | 6551/12440 [03:33<02:51, 34.42it/s]
 53%|█████▎    | 6555/12440 [03:33<02:45, 35.56it/s]
 53%|█████▎    | 6559/12440 [03:33<02:40, 36.57it/s]
 53%|█████▎    | 6563/12440 [03:34<02:45, 35.58it/s]
 53%|█████▎    | 6567/12440 [03:34<02:42, 36.23it/s]
 53%|█████▎    | 6571/12440 [03:34<02:39, 36.73it/s]
 53%|█████▎    | 6575/12440 [03:34<02:40, 36.50it/s]
 53%|████

Trial status: 8 TERMINATED | 1 RUNNING | 11 PENDING
Current time: 2023-09-11 14:25:55. Total running time: 1hr 5min 34s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00008   RUNNING          4.79554e-05                    4                 

[2m[36m(_objective pid=32421)[0m  54%|█████▍    | 6733/12440 [03:38<02:49, 33.59it/s]
 54%|█████▍    | 6737/12440 [03:39<03:00, 31.57it/s]
 54%|█████▍    | 6741/12440 [03:39<02:52, 33.02it/s]
 54%|█████▍    | 6745/12440 [03:39<02:45, 34.51it/s]
 54%|█████▍    | 6749/12440 [03:39<02:39, 35.61it/s]
 54%|█████▍    | 6753/12440 [03:39<02:58, 31.93it/s]
 54%|█████▍    | 6757/12440 [03:39<02:50, 33.24it/s]
 54%|█████▍    | 6761/12440 [03:39<02:49, 33.54it/s]
 54%|█████▍    | 6765/12440 [03:39<02:51, 33.16it/s]
 54%|█████▍    | 6769/12440 [03:40<03:02, 31.15it/s]
 54%|█████▍    | 6773/12440 [03:40<02:55, 32.24it/s]
 54%|█████▍    | 6777/12440 [03:40<03:16, 28.80it/s]
 55%|█████▍    | 6781/12440 [03:40<03:03, 30.90it/s]
 55%|█████▍    | 6785/12440 [03:40<02:56, 32.06it/s]
 55%|█████▍    | 6789/12440 [03:40<02:48, 33.51it/s]
 55%|█████▍    | 6793/12440 [03:40<02:48, 33.50it/s]
 55%|█████▍    | 6797/12440 [03:40<02:42, 34.82it/s]
 55%|█████▍    | 6801/12440 [03:41<02:46, 33.92it/s]
 55%|████

[2m[36m(_objective pid=32421)[0m {'loss': 0.0866, 'learning_rate': 2.097085955838628e-05, 'epoch': 2.25}


[2m[36m(_objective pid=32421)[0m  56%|█████▋    | 7003/12440 [03:46<02:38, 34.37it/s]
 56%|█████▋    | 7007/12440 [03:47<02:46, 32.70it/s]
 56%|█████▋    | 7011/12440 [03:47<02:42, 33.33it/s]
 56%|█████▋    | 7015/12440 [03:47<02:40, 33.71it/s]
 56%|█████▋    | 7019/12440 [03:47<02:34, 35.10it/s]
 56%|█████▋    | 7023/12440 [03:47<02:32, 35.42it/s]
 56%|█████▋    | 7027/12440 [03:47<02:28, 36.40it/s]
 57%|█████▋    | 7031/12440 [03:47<02:49, 31.89it/s]
 57%|█████▋    | 7035/12440 [03:47<02:43, 33.10it/s]
 57%|█████▋    | 7039/12440 [03:47<02:37, 34.22it/s]
 57%|█████▋    | 7043/12440 [03:48<02:32, 35.42it/s]
 57%|█████▋    | 7047/12440 [03:48<02:33, 35.22it/s]
 57%|█████▋    | 7051/12440 [03:48<02:31, 35.67it/s]
 57%|█████▋    | 7055/12440 [03:48<02:29, 35.99it/s]
 57%|█████▋    | 7059/12440 [03:48<02:31, 35.49it/s]
 57%|█████▋    | 7063/12440 [03:48<02:28, 36.25it/s]
 57%|█████▋    | 7067/12440 [03:48<02:27, 36.43it/s]
 57%|█████▋    | 7071/12440 [03:48<02:44, 32.73it/s]
 57%|████

[2m[36m(_objective pid=32421)[0m {'loss': 0.0737, 'learning_rate': 1.9043390848975777e-05, 'epoch': 2.41}


[2m[36m(_objective pid=32421)[0m  60%|██████    | 7506/12440 [04:01<02:24, 34.08it/s]
 60%|██████    | 7510/12440 [04:02<02:23, 34.35it/s]
 60%|██████    | 7514/12440 [04:02<02:23, 34.34it/s]
 60%|██████    | 7518/12440 [04:02<02:20, 35.00it/s]
 60%|██████    | 7522/12440 [04:02<02:27, 33.37it/s]
 60%|██████    | 7526/12440 [04:02<02:33, 32.10it/s]
 61%|██████    | 7530/12440 [04:02<02:26, 33.47it/s]
 61%|██████    | 7534/12440 [04:02<02:39, 30.72it/s]
 61%|██████    | 7538/12440 [04:02<02:47, 29.35it/s]
 61%|██████    | 7542/12440 [04:03<02:41, 30.33it/s]
 61%|██████    | 7546/12440 [04:03<02:31, 32.29it/s]
 61%|██████    | 7550/12440 [04:03<02:37, 30.98it/s]
 61%|██████    | 7554/12440 [04:03<02:29, 32.72it/s]
 61%|██████    | 7558/12440 [04:03<02:27, 33.15it/s]
 61%|██████    | 7562/12440 [04:03<02:22, 34.18it/s]
 61%|██████    | 7566/12440 [04:03<02:21, 34.35it/s]
 61%|██████    | 7570/12440 [04:03<02:20, 34.62it/s]
 61%|██████    | 7574/12440 [04:04<02:31, 32.13it/s]
 61%|████

Trial status: 8 TERMINATED | 1 RUNNING | 11 PENDING
Current time: 2023-09-11 14:26:25. Total running time: 1hr 6min 4s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00008   RUNNING          4.79554e-05                    4                  

[2m[36m(_objective pid=32421)[0m  62%|██████▏   | 7742/12440 [04:08<02:24, 32.57it/s]
 62%|██████▏   | 7746/12440 [04:09<02:25, 32.26it/s]
 62%|██████▏   | 7750/12440 [04:09<02:22, 32.82it/s]
 62%|██████▏   | 7754/12440 [04:09<02:17, 34.04it/s]
 62%|██████▏   | 7758/12440 [04:09<02:12, 35.27it/s]
 62%|██████▏   | 7762/12440 [04:09<02:10, 35.79it/s]
 62%|██████▏   | 7766/12440 [04:09<02:22, 32.77it/s]
 62%|██████▏   | 7770/12440 [04:09<02:16, 34.14it/s]
 62%|██████▏   | 7774/12440 [04:09<02:20, 33.23it/s]
 63%|██████▎   | 7778/12440 [04:10<02:15, 34.50it/s]
 63%|██████▎   | 7782/12440 [04:10<02:15, 34.48it/s]
 63%|██████▎   | 7786/12440 [04:10<02:14, 34.70it/s]
 63%|██████▎   | 7790/12440 [04:10<02:20, 33.14it/s]
 63%|██████▎   | 7794/12440 [04:10<02:14, 34.52it/s]
 63%|██████▎   | 7798/12440 [04:10<02:11, 35.32it/s]
 63%|██████▎   | 7802/12440 [04:10<02:07, 36.39it/s]
 63%|██████▎   | 7806/12440 [04:10<02:23, 32.40it/s]
 63%|██████▎   | 7810/12440 [04:11<02:30, 30.70it/s]
 63%|████

[2m[36m(_objective pid=32421)[0m {'loss': 0.0945, 'learning_rate': 1.7115922139565273e-05, 'epoch': 2.57}


[2m[36m(_objective pid=32421)[0m  64%|██████▍   | 8006/12440 [04:16<02:19, 31.84it/s]
 64%|██████▍   | 8010/12440 [04:16<02:12, 33.38it/s]
 64%|██████▍   | 8014/12440 [04:16<02:21, 31.24it/s]
 64%|██████▍   | 8018/12440 [04:17<02:17, 32.19it/s]
 64%|██████▍   | 8022/12440 [04:17<02:10, 33.93it/s]
 65%|██████▍   | 8026/12440 [04:17<02:23, 30.86it/s]
 65%|██████▍   | 8030/12440 [04:17<02:33, 28.76it/s]
 65%|██████▍   | 8034/12440 [04:17<02:23, 30.68it/s]
 65%|██████▍   | 8039/12440 [04:17<02:11, 33.58it/s]
 65%|██████▍   | 8043/12440 [04:17<02:05, 34.96it/s]
 65%|██████▍   | 8047/12440 [04:17<02:08, 34.32it/s]
 65%|██████▍   | 8051/12440 [04:18<02:08, 34.19it/s]
 65%|██████▍   | 8055/12440 [04:18<02:04, 35.31it/s]
 65%|██████▍   | 8059/12440 [04:18<02:34, 28.44it/s]
 65%|██████▍   | 8063/12440 [04:18<02:23, 30.47it/s]
 65%|██████▍   | 8067/12440 [04:18<02:13, 32.66it/s]
 65%|██████▍   | 8071/12440 [04:18<02:07, 34.36it/s]
 65%|██████▍   | 8075/12440 [04:18<02:03, 35.36it/s]
 65%|████

[2m[36m(_objective pid=32421)[0m {'loss': 0.0843, 'learning_rate': 1.518845343015477e-05, 'epoch': 2.73}


[2m[36m(_objective pid=32421)[0m  68%|██████▊   | 8511/12440 [04:31<01:49, 36.02it/s]
 68%|██████▊   | 8515/12440 [04:31<01:50, 35.44it/s]
 68%|██████▊   | 8519/12440 [04:31<01:55, 33.84it/s]
 69%|██████▊   | 8523/12440 [04:32<01:58, 33.09it/s]
 69%|██████▊   | 8527/12440 [04:32<01:53, 34.57it/s]
 69%|██████▊   | 8531/12440 [04:32<01:49, 35.54it/s]
 69%|██████▊   | 8535/12440 [04:32<01:47, 36.18it/s]
 69%|██████▊   | 8539/12440 [04:32<01:50, 35.41it/s]
 69%|██████▊   | 8543/12440 [04:32<01:48, 35.95it/s]
 69%|██████▊   | 8547/12440 [04:32<01:48, 35.89it/s]
 69%|██████▊   | 8551/12440 [04:32<02:06, 30.73it/s]
 69%|██████▉   | 8555/12440 [04:32<01:59, 32.64it/s]
 69%|██████▉   | 8559/12440 [04:33<01:56, 33.23it/s]
 69%|██████▉   | 8563/12440 [04:33<01:51, 34.62it/s]
 69%|██████▉   | 8567/12440 [04:33<01:54, 33.94it/s]
 69%|██████▉   | 8571/12440 [04:33<01:50, 35.13it/s]
 69%|██████▉   | 8575/12440 [04:33<01:49, 35.46it/s]
 69%|██████▉   | 8579/12440 [04:33<01:46, 36.38it/s]
 69%|████

Trial status: 8 TERMINATED | 1 RUNNING | 11 PENDING
Current time: 2023-09-11 14:26:55. Total running time: 1hr 6min 34s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00008   RUNNING          4.79554e-05                    4                 

[2m[36m(_objective pid=32421)[0m  70%|███████   | 8759/12440 [04:38<02:00, 30.52it/s]
 70%|███████   | 8763/12440 [04:39<02:05, 29.39it/s]
 70%|███████   | 8767/12440 [04:39<01:55, 31.88it/s]
 71%|███████   | 8771/12440 [04:39<02:01, 30.11it/s]
 71%|███████   | 8775/12440 [04:39<01:56, 31.33it/s]
 71%|███████   | 8779/12440 [04:39<01:59, 30.69it/s]
 71%|███████   | 8783/12440 [04:39<01:55, 31.69it/s]
 71%|███████   | 8787/12440 [04:39<01:49, 33.31it/s]
 71%|███████   | 8791/12440 [04:39<01:45, 34.54it/s]
 71%|███████   | 8795/12440 [04:39<01:42, 35.43it/s]
 71%|███████   | 8799/12440 [04:40<01:43, 35.07it/s]
 71%|███████   | 8803/12440 [04:40<01:55, 31.52it/s]
 71%|███████   | 8807/12440 [04:40<02:10, 27.94it/s]
 71%|███████   | 8811/12440 [04:40<02:02, 29.73it/s]
 71%|███████   | 8815/12440 [04:40<02:00, 29.99it/s]
 71%|███████   | 8819/12440 [04:40<02:05, 28.92it/s]
 71%|███████   | 8823/12440 [04:40<01:55, 31.27it/s]
 71%|███████   | 8827/12440 [04:41<01:50, 32.70it/s]
 71%|████

[2m[36m(_objective pid=32421)[0m {'loss': 0.0842, 'learning_rate': 1.3260984720744267e-05, 'epoch': 2.89}


 72%|███████▏  | 9007/12440 [04:46<01:46, 32.37it/s]
 72%|███████▏  | 9011/12440 [04:46<01:54, 29.98it/s]
 72%|███████▏  | 9015/12440 [04:46<01:46, 32.18it/s]
 72%|███████▎  | 9019/12440 [04:46<01:44, 32.85it/s]
 73%|███████▎  | 9023/12440 [04:46<01:43, 33.01it/s]
 73%|███████▎  | 9027/12440 [04:46<01:38, 34.70it/s]
 73%|███████▎  | 9031/12440 [04:47<01:38, 34.44it/s]
 73%|███████▎  | 9036/12440 [04:47<01:33, 36.33it/s]
 73%|███████▎  | 9040/12440 [04:47<01:31, 37.28it/s]
 73%|███████▎  | 9044/12440 [04:47<01:30, 37.43it/s]
 73%|███████▎  | 9048/12440 [04:47<01:33, 36.38it/s]
 73%|███████▎  | 9052/12440 [04:47<01:31, 36.87it/s]
 73%|███████▎  | 9057/12440 [04:47<01:35, 35.60it/s]
 73%|███████▎  | 9061/12440 [04:47<01:32, 36.71it/s]
 73%|███████▎  | 9066/12440 [04:48<01:29, 37.82it/s]
 73%|███████▎  | 9070/12440 [04:48<01:36, 34.95it/s]
 73%|███████▎  | 9074/12440 [04:48<01:32, 36.21it/s]
 73%|███████▎  | 9078/12440 [04:48<01:34, 35.52it/s]
 73%|███████▎  | 9083/12440 [04:48<01:30, 37.0

Trial _objective_f556c_00008 finished iteration 3 at 2023-09-11 14:27:21. Total running time: 1hr 7min 0s
+-------------------------------------------------+
| Trial _objective_f556c_00008 result             |
+-------------------------------------------------+
| time_this_iter_s                        102.468 |
| time_total_s                            307.789 |
| training_iteration                            3 |
| epoch                                         3 |
| eval_loss                               0.20399 |
| eval_runtime                             9.5933 |
| eval_samples_per_second                 432.281 |
| eval_steps_per_second                    13.551 |
| objective                               0.20399 |
+-------------------------------------------------+

[2m[36m(_objective pid=32421)[0m {'eval_loss': 0.2039937973022461, 'eval_runtime': 9.5933, 'eval_samples_per_second': 432.281, 'eval_steps_per_second': 13.551, 'epoch': 3.0}


[2m[36m(_objective pid=32421)[0m                                                     
[2m[36m(_objective pid=32421)[0m                                                  [A 75%|███████▌  | 9330/12440 [05:05<01:38, 31.59it/s]
[2m[36m(_objective pid=32421)[0m 100%|██████████| 130/130 [00:09<00:00, 11.98it/s][A
                                                 [A
 75%|███████▌  | 9332/12440 [05:06<43:32,  1.19it/s]
 75%|███████▌  | 9335/12440 [05:06<33:25,  1.55it/s]
 75%|███████▌  | 9339/12440 [05:06<23:15,  2.22it/s]
 75%|███████▌  | 9343/12440 [05:06<16:27,  3.14it/s]
 75%|███████▌  | 9347/12440 [05:06<11:56,  4.32it/s]
 75%|███████▌  | 9351/12440 [05:07<08:44,  5.89it/s]
 75%|███████▌  | 9355/12440 [05:07<06:32,  7.87it/s]
 75%|███████▌  | 9359/12440 [05:07<05:04, 10.12it/s]
 75%|███████▌  | 9363/12440 [05:07<03:57, 12.95it/s]
 75%|███████▌  | 9367/12440 [05:07<03:13, 15.91it/s]
 75%|███████▌  | 9371/12440 [05:07<02:42, 18.84it/s]
 75%|███████▌  | 9375/12440 [05:07<02:19

Trial status: 8 TERMINATED | 1 RUNNING | 11 PENDING
Current time: 2023-09-11 14:27:25. Total running time: 1hr 7min 4s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00008   RUNNING          4.79554e-05                    4                  

[2m[36m(_objective pid=32421)[0m  76%|███████▌  | 9415/12440 [05:08<01:29, 33.66it/s]
 76%|███████▌  | 9419/12440 [05:09<01:26, 34.83it/s]
 76%|███████▌  | 9423/12440 [05:09<01:23, 35.94it/s]
 76%|███████▌  | 9427/12440 [05:09<01:27, 34.48it/s]
 76%|███████▌  | 9431/12440 [05:09<01:23, 35.85it/s]
 76%|███████▌  | 9435/12440 [05:09<01:22, 36.37it/s]
 76%|███████▌  | 9439/12440 [05:09<01:21, 36.81it/s]
 76%|███████▌  | 9443/12440 [05:09<01:22, 36.16it/s]
 76%|███████▌  | 9448/12440 [05:09<01:20, 37.31it/s]
 76%|███████▌  | 9452/12440 [05:09<01:19, 37.37it/s]
 76%|███████▌  | 9456/12440 [05:10<01:20, 37.13it/s]
 76%|███████▌  | 9460/12440 [05:10<01:19, 37.42it/s]
 76%|███████▌  | 9464/12440 [05:10<01:19, 37.53it/s]
 76%|███████▌  | 9468/12440 [05:10<01:20, 36.90it/s]
 76%|███████▌  | 9472/12440 [05:10<01:24, 35.09it/s]
 76%|███████▌  | 9476/12440 [05:10<01:21, 36.25it/s]
 76%|███████▌  | 9480/12440 [05:10<01:20, 36.72it/s]
 76%|███████▌  | 9484/12440 [05:10<01:20, 36.92it/s]
 76%|████

[2m[36m(_objective pid=32421)[0m {'loss': 0.0886, 'learning_rate': 1.1333516011333763e-05, 'epoch': 3.05}


[2m[36m(_objective pid=32421)[0m  76%|███████▋  | 9504/12440 [05:11<01:23, 35.19it/s]
 76%|███████▋  | 9508/12440 [05:11<01:21, 35.99it/s]
 76%|███████▋  | 9512/12440 [05:11<01:19, 36.98it/s]
 76%|███████▋  | 9516/12440 [05:11<01:19, 36.90it/s]
 77%|███████▋  | 9520/12440 [05:11<01:18, 37.24it/s]
 77%|███████▋  | 9524/12440 [05:11<01:18, 37.03it/s]
 77%|███████▋  | 9528/12440 [05:11<01:17, 37.50it/s]
 77%|███████▋  | 9532/12440 [05:12<01:18, 36.95it/s]
 77%|███████▋  | 9536/12440 [05:12<01:19, 36.44it/s]
 77%|███████▋  | 9540/12440 [05:12<01:19, 36.45it/s]
 77%|███████▋  | 9544/12440 [05:12<01:25, 33.90it/s]
 77%|███████▋  | 9548/12440 [05:12<01:29, 32.45it/s]
 77%|███████▋  | 9552/12440 [05:12<01:26, 33.28it/s]
 77%|███████▋  | 9556/12440 [05:12<01:23, 34.72it/s]
 77%|███████▋  | 9560/12440 [05:12<01:22, 34.76it/s]
 77%|███████▋  | 9564/12440 [05:13<01:21, 35.16it/s]
 77%|███████▋  | 9568/12440 [05:13<01:22, 34.83it/s]
 77%|███████▋  | 9572/12440 [05:13<01:20, 35.76it/s]
 77%|████

[2m[36m(_objective pid=32421)[0m {'loss': 0.0735, 'learning_rate': 9.406047301923259e-06, 'epoch': 3.22}


[2m[36m(_objective pid=32421)[0m  80%|████████  | 10002/12440 [05:25<01:10, 34.36it/s]
 80%|████████  | 10006/12440 [05:26<01:14, 32.85it/s]
 80%|████████  | 10010/12440 [05:26<01:17, 31.27it/s]
 80%|████████  | 10014/12440 [05:26<01:21, 29.81it/s]
 81%|████████  | 10018/12440 [05:26<01:17, 31.16it/s]
 81%|████████  | 10022/12440 [05:26<01:13, 33.00it/s]
 81%|████████  | 10026/12440 [05:26<01:09, 34.72it/s]
 81%|████████  | 10030/12440 [05:26<01:14, 32.22it/s]
 81%|████████  | 10034/12440 [05:26<01:12, 33.16it/s]
 81%|████████  | 10038/12440 [05:27<01:10, 34.07it/s]
 81%|████████  | 10042/12440 [05:27<01:13, 32.80it/s]
 81%|████████  | 10047/12440 [05:27<01:08, 35.00it/s]
 81%|████████  | 10051/12440 [05:27<01:13, 32.35it/s]
 81%|████████  | 10055/12440 [05:27<01:10, 33.77it/s]
 81%|████████  | 10059/12440 [05:27<01:08, 34.92it/s]
 81%|████████  | 10063/12440 [05:27<01:07, 35.39it/s]
 81%|████████  | 10067/12440 [05:27<01:07, 35.14it/s]
 81%|████████  | 10071/12440 [05:28<01:08, 34

Trial status: 8 TERMINATED | 1 RUNNING | 11 PENDING
Current time: 2023-09-11 14:27:55. Total running time: 1hr 7min 34s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00008   RUNNING          4.79554e-05                    4                 

[2m[36m(_objective pid=32421)[0m  84%|████████▍ | 10449/12440 [05:39<01:01, 32.63it/s]
 84%|████████▍ | 10453/12440 [05:39<00:57, 34.32it/s]
 84%|████████▍ | 10457/12440 [05:39<00:56, 35.07it/s]
 84%|████████▍ | 10461/12440 [05:39<01:00, 32.94it/s]
 84%|████████▍ | 10465/12440 [05:39<00:58, 33.97it/s]
 84%|████████▍ | 10469/12440 [05:39<00:56, 34.61it/s]
 84%|████████▍ | 10474/12440 [05:39<00:54, 36.38it/s]
 84%|████████▍ | 10478/12440 [05:39<00:53, 36.78it/s]
 84%|████████▍ | 10482/12440 [05:40<00:52, 36.95it/s]
 84%|████████▍ | 10487/12440 [05:40<00:51, 37.92it/s]
 84%|████████▍ | 10491/12440 [05:40<00:52, 37.25it/s]
 84%|████████▍ | 10495/12440 [05:40<00:55, 34.86it/s]
 84%|████████▍ | 10499/12440 [05:40<00:56, 34.09it/s]
 84%|████████▍ | 10503/12440 [05:40<00:54, 35.38it/s]


[2m[36m(_objective pid=32421)[0m {'loss': 0.0557, 'learning_rate': 7.478578592512754e-06, 'epoch': 3.38}


[2m[36m(_objective pid=32421)[0m  84%|████████▍ | 10507/12440 [05:40<00:55, 34.73it/s]
 84%|████████▍ | 10511/12440 [05:40<00:53, 36.05it/s]
 85%|████████▍ | 10515/12440 [05:40<00:57, 33.42it/s]
 85%|████████▍ | 10520/12440 [05:41<00:53, 35.73it/s]
 85%|████████▍ | 10524/12440 [05:41<00:52, 36.83it/s]
 85%|████████▍ | 10528/12440 [05:41<00:51, 37.11it/s]
 85%|████████▍ | 10532/12440 [05:41<00:52, 36.27it/s]
 85%|████████▍ | 10536/12440 [05:41<00:56, 33.66it/s]
 85%|████████▍ | 10540/12440 [05:41<00:56, 33.53it/s]
 85%|████████▍ | 10544/12440 [05:41<00:55, 34.37it/s]
 85%|████████▍ | 10549/12440 [05:41<00:52, 36.20it/s]
 85%|████████▍ | 10553/12440 [05:42<01:01, 30.79it/s]
 85%|████████▍ | 10557/12440 [05:42<00:57, 32.79it/s]
 85%|████████▍ | 10561/12440 [05:42<01:01, 30.60it/s]
 85%|████████▍ | 10565/12440 [05:42<00:57, 32.65it/s]
 85%|████████▍ | 10569/12440 [05:42<00:54, 34.29it/s]
 85%|████████▍ | 10573/12440 [05:42<00:53, 35.06it/s]
 85%|████████▌ | 10578/12440 [05:42<00:50, 36

[2m[36m(_objective pid=32421)[0m {'loss': 0.0514, 'learning_rate': 5.551109883102251e-06, 'epoch': 3.54}


[2m[36m(_objective pid=32421)[0m  88%|████████▊ | 11008/12440 [05:55<00:37, 37.85it/s]
 89%|████████▊ | 11012/12440 [05:55<00:38, 37.52it/s]
 89%|████████▊ | 11016/12440 [05:55<00:37, 38.11it/s]
 89%|████████▊ | 11020/12440 [05:55<00:37, 38.13it/s]
 89%|████████▊ | 11024/12440 [05:55<00:36, 38.60it/s]
 89%|████████▊ | 11028/12440 [05:55<00:41, 33.66it/s]
 89%|████████▊ | 11032/12440 [05:56<00:44, 31.63it/s]
 89%|████████▊ | 11036/12440 [05:56<00:42, 33.31it/s]
 89%|████████▊ | 11040/12440 [05:56<00:40, 34.59it/s]
 89%|████████▉ | 11044/12440 [05:56<00:39, 35.78it/s]
 89%|████████▉ | 11048/12440 [05:56<00:38, 36.32it/s]
 89%|████████▉ | 11052/12440 [05:56<00:38, 36.22it/s]
 89%|████████▉ | 11056/12440 [05:56<00:37, 37.16it/s]
 89%|████████▉ | 11060/12440 [05:56<00:36, 37.71it/s]
 89%|████████▉ | 11064/12440 [05:56<00:40, 33.86it/s]
 89%|████████▉ | 11068/12440 [05:57<00:42, 32.66it/s]
 89%|████████▉ | 11072/12440 [05:57<00:39, 34.25it/s]
 89%|████████▉ | 11076/12440 [05:57<00:43, 31

Trial status: 8 TERMINATED | 1 RUNNING | 11 PENDING
Current time: 2023-09-11 14:28:25. Total running time: 1hr 8min 4s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00008   RUNNING          4.79554e-05                    4                  

[2m[36m(_objective pid=32421)[0m  92%|█████████▏| 11473/12440 [06:09<00:31, 30.87it/s]
 92%|█████████▏| 11477/12440 [06:09<00:33, 29.09it/s]
 92%|█████████▏| 11481/12440 [06:09<00:30, 31.02it/s]
 92%|█████████▏| 11485/12440 [06:09<00:30, 31.55it/s]
 92%|█████████▏| 11490/12440 [06:09<00:28, 33.91it/s]
 92%|█████████▏| 11494/12440 [06:09<00:27, 34.95it/s]
 92%|█████████▏| 11500/12440 [06:09<00:26, 35.38it/s]
 92%|█████████▏| 11502/12440 [06:09<00:25, 36.21it/s]


[2m[36m(_objective pid=32421)[0m {'loss': 0.0683, 'learning_rate': 3.623641173691747e-06, 'epoch': 3.7}


[2m[36m(_objective pid=32421)[0m  92%|█████████▏| 11506/12440 [06:09<00:25, 36.45it/s]
 93%|█████████▎| 11510/12440 [06:10<00:26, 35.47it/s]
 93%|█████████▎| 11514/12440 [06:10<00:25, 35.94it/s]
 93%|█████████▎| 11518/12440 [06:10<00:28, 31.91it/s]
 93%|█████████▎| 11522/12440 [06:10<00:28, 32.75it/s]
 93%|█████████▎| 11526/12440 [06:10<00:26, 33.88it/s]
 93%|█████████▎| 11530/12440 [06:10<00:26, 34.71it/s]
 93%|█████████▎| 11534/12440 [06:10<00:25, 35.83it/s]
 93%|█████████▎| 11538/12440 [06:10<00:24, 36.60it/s]
 93%|█████████▎| 11542/12440 [06:10<00:24, 37.10it/s]
 93%|█████████▎| 11546/12440 [06:11<00:24, 36.69it/s]
 93%|█████████▎| 11550/12440 [06:11<00:23, 37.54it/s]
 93%|█████████▎| 11554/12440 [06:11<00:23, 37.22it/s]
 93%|█████████▎| 11558/12440 [06:11<00:23, 37.31it/s]
 93%|█████████▎| 11562/12440 [06:11<00:23, 37.47it/s]
 93%|█████████▎| 11567/12440 [06:11<00:22, 38.42it/s]
 93%|█████████▎| 11571/12440 [06:11<00:25, 33.67it/s]
 93%|█████████▎| 11575/12440 [06:11<00:24, 34

[2m[36m(_objective pid=32421)[0m {'loss': 0.0505, 'learning_rate': 1.6961724642812433e-06, 'epoch': 3.86}


[2m[36m(_objective pid=32421)[0m  96%|█████████▋| 12003/12440 [06:25<00:13, 31.83it/s]
 97%|█████████▋| 12007/12440 [06:25<00:12, 33.36it/s]
 97%|█████████▋| 12011/12440 [06:25<00:13, 31.07it/s]
 97%|█████████▋| 12015/12440 [06:25<00:12, 32.78it/s]
 97%|█████████▋| 12019/12440 [06:25<00:12, 33.95it/s]
 97%|█████████▋| 12023/12440 [06:25<00:12, 33.43it/s]
 97%|█████████▋| 12027/12440 [06:25<00:13, 30.50it/s]
 97%|█████████▋| 12031/12440 [06:26<00:14, 28.73it/s]
 97%|█████████▋| 12035/12440 [06:26<00:13, 30.67it/s]
 97%|█████████▋| 12039/12440 [06:26<00:12, 32.66it/s]
 97%|█████████▋| 12043/12440 [06:26<00:11, 33.12it/s]
 97%|█████████▋| 12047/12440 [06:26<00:11, 33.73it/s]
 97%|█████████▋| 12051/12440 [06:26<00:11, 34.21it/s]
 97%|█████████▋| 12055/12440 [06:26<00:10, 35.44it/s]
 97%|█████████▋| 12059/12440 [06:26<00:11, 34.01it/s]
 97%|█████████▋| 12063/12440 [06:26<00:11, 32.91it/s]
 97%|█████████▋| 12067/12440 [06:27<00:10, 33.98it/s]
 97%|█████████▋| 12071/12440 [06:27<00:10, 34

Trial status: 8 TERMINATED | 1 RUNNING | 11 PENDING
Current time: 2023-09-11 14:28:55. Total running time: 1hr 8min 34s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00008   RUNNING          4.79554e-05                    4                 

[2m[36m(_objective pid=32421)[0m 
  8%|▊         | 11/130 [00:00<00:08, 13.84it/s][A
[2m[36m(_objective pid=32421)[0m 
 10%|█         | 13/130 [00:00<00:08, 13.46it/s][A
[2m[36m(_objective pid=32421)[0m 
 12%|█▏        | 15/130 [00:01<00:09, 12.33it/s][A
[2m[36m(_objective pid=32421)[0m 
 13%|█▎        | 17/130 [00:01<00:09, 11.57it/s][A
[2m[36m(_objective pid=32421)[0m 
 15%|█▍        | 19/130 [00:01<00:09, 11.51it/s][A
[2m[36m(_objective pid=32421)[0m 
 16%|█▌        | 21/130 [00:01<00:08, 13.00it/s][A
[2m[36m(_objective pid=32421)[0m 
 18%|█▊        | 23/130 [00:01<00:09, 11.82it/s][A
[2m[36m(_objective pid=32421)[0m 
 19%|█▉        | 25/130 [00:01<00:07, 13.42it/s][A
[2m[36m(_objective pid=32421)[0m 
 21%|██        | 27/130 [00:01<00:07, 14.11it/s][A
[2m[36m(_objective pid=32421)[0m 
 24%|██▍       | 31/130 [00:02<00:06, 16.17it/s][A
[2m[36m(_objective pid=32421)[0m 
 26%|██▌       | 34/130 [00:02<00:05, 16.36it/s][A
[2m[36m(_objective 

Trial _objective_f556c_00008 finished iteration 4 at 2023-09-11 14:29:04. Total running time: 1hr 8min 43s
+-------------------------------------------------+
| Trial _objective_f556c_00008 result             |
+-------------------------------------------------+
| time_this_iter_s                        102.885 |
| time_total_s                            410.675 |
| training_iteration                            4 |
| epoch                                         4 |
| eval_loss                               0.24502 |
| eval_runtime                             9.5938 |
| eval_samples_per_second                 432.259 |
| eval_steps_per_second                     13.55 |
| objective                               0.24502 |
+-------------------------------------------------+

[2m[36m(_objective pid=32421)[0m {'eval_loss': 0.2450166493654251, 'eval_runtime': 9.5938, 'eval_samples_per_second': 432.259, 'eval_steps_per_second': 13.55, 'epoch': 4.0}


[2m[36m(_objective pid=32421)[0m                                                      
[2m[36m(_objective pid=32421)[0m                                                  [A100%|██████████| 12440/12440 [06:48<00:00, 33.36it/s]
[2m[36m(_objective pid=32421)[0m 100%|██████████| 130/130 [00:09<00:00, 11.97it/s][A
[2m[36m(_objective pid=32421)[0m                                                  [A


Trial _objective_f556c_00008 completed after 4 iterations at 2023-09-11 14:29:06. Total running time: 1hr 8min 45s

[2m[36m(_objective pid=32421)[0m {'train_runtime': 409.5329, 'train_samples_per_second': 121.495, 'train_steps_per_second': 30.376, 'train_loss': 0.14554497237374162, 'epoch': 4.0}


[2m[36m(_objective pid=32421)[0m                                                      100%|██████████| 12440/12440 [06:49<00:00, 33.36it/s]100%|██████████| 12440/12440 [06:49<00:00, 30.38it/s]


Trial _objective_f556c_00009 started with configuration:
+-------------------------------------------------+
| Trial _objective_f556c_00009 config             |
+-------------------------------------------------+
| adam_epsilon                                  0 |
| learning_rate                             1e-05 |
| num_train_epochs                              4 |
| per_device_eval_batch_size                   32 |
| per_device_train_batch_size                   4 |
| weight_decay                            0.13205 |
+-------------------------------------------------+



[2m[36m(_objective pid=34244)[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_projector.bias']
[2m[36m(_objective pid=34244)[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
[2m[36m(_objective pid=34244)[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[2m[36m(_objective pid=34244)[0m Some weights of DistilBertForSequenceClassification were not initialized from the model che

Trial status: 9 TERMINATED | 1 RUNNING | 10 PENDING
Current time: 2023-09-11 14:29:25. Total running time: 1hr 9min 4s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00009   RUNNING          1.39069e-05                    4                  

[2m[36m(_objective pid=34244)[0m   3%|▎         | 345/12440 [00:10<06:54, 29.21it/s]
  3%|▎         | 349/12440 [00:10<06:51, 29.40it/s]
  3%|▎         | 353/12440 [00:10<06:26, 31.25it/s]
  3%|▎         | 357/12440 [00:10<06:15, 32.21it/s]
  3%|▎         | 361/12440 [00:11<06:27, 31.18it/s]
  3%|▎         | 365/12440 [00:11<07:11, 27.96it/s]
  3%|▎         | 369/12440 [00:11<06:41, 30.10it/s]
  3%|▎         | 373/12440 [00:11<06:28, 31.08it/s]
  3%|▎         | 377/12440 [00:11<06:16, 32.04it/s]
  3%|▎         | 381/12440 [00:11<06:15, 32.11it/s]
  3%|▎         | 385/12440 [00:11<06:15, 32.06it/s]
  3%|▎         | 389/12440 [00:11<06:05, 33.01it/s]
  3%|▎         | 393/12440 [00:12<06:11, 32.45it/s]
  3%|▎         | 397/12440 [00:12<06:08, 32.68it/s]
  3%|▎         | 401/12440 [00:12<06:07, 32.72it/s]
  3%|▎         | 405/12440 [00:12<06:05, 32.96it/s]
  3%|▎         | 409/12440 [00:12<06:03, 33.10it/s]
  3%|▎         | 413/12440 [00:12<06:17, 31.85it/s]
  3%|▎         | 417/12440 

[2m[36m(_objective pid=34244)[0m {'loss': 0.407, 'learning_rate': 1.3347926177608436e-05, 'epoch': 0.16}


[2m[36m(_objective pid=34244)[0m   4%|▍         | 506/12440 [00:15<06:05, 32.61it/s]
  4%|▍         | 510/12440 [00:15<06:04, 32.73it/s]
  4%|▍         | 514/12440 [00:15<05:53, 33.72it/s]
  4%|▍         | 518/12440 [00:15<05:39, 35.14it/s]
  4%|▍         | 523/12440 [00:15<05:24, 36.74it/s]
  4%|▍         | 527/12440 [00:16<05:20, 37.19it/s]
  4%|▍         | 531/12440 [00:16<05:16, 37.60it/s]
  4%|▍         | 535/12440 [00:16<05:38, 35.22it/s]
  4%|▍         | 539/12440 [00:16<06:00, 32.98it/s]
  4%|▍         | 543/12440 [00:16<05:43, 34.65it/s]
  4%|▍         | 548/12440 [00:16<05:26, 36.46it/s]
  4%|▍         | 552/12440 [00:16<05:19, 37.17it/s]
  4%|▍         | 556/12440 [00:16<05:27, 36.34it/s]
  5%|▍         | 560/12440 [00:16<05:22, 36.86it/s]
  5%|▍         | 564/12440 [00:17<05:20, 37.10it/s]
  5%|▍         | 568/12440 [00:17<05:18, 37.24it/s]
  5%|▍         | 572/12440 [00:17<05:18, 37.29it/s]
  5%|▍         | 576/12440 [00:17<05:15, 37.57it/s]
  5%|▍         | 581/12440 

[2m[36m(_objective pid=34244)[0m {'loss': 0.3136, 'learning_rate': 1.2788967794961518e-05, 'epoch': 0.32}


[2m[36m(_objective pid=34244)[0m   8%|▊         | 1002/12440 [00:29<05:01, 37.89it/s]
  8%|▊         | 1006/12440 [00:29<05:00, 38.02it/s]
  8%|▊         | 1010/12440 [00:29<04:59, 38.10it/s]
  8%|▊         | 1014/12440 [00:30<05:05, 37.45it/s]
  8%|▊         | 1019/12440 [00:30<04:57, 38.40it/s]
  8%|▊         | 1023/12440 [00:30<05:11, 36.67it/s]
  8%|▊         | 1027/12440 [00:30<05:05, 37.35it/s]
  8%|▊         | 1031/12440 [00:30<05:00, 38.03it/s]
  8%|▊         | 1035/12440 [00:30<05:09, 36.81it/s]
  8%|▊         | 1039/12440 [00:30<05:08, 36.93it/s]
  8%|▊         | 1043/12440 [00:30<05:05, 37.33it/s]
  8%|▊         | 1047/12440 [00:30<05:11, 36.55it/s]
  8%|▊         | 1051/12440 [00:31<05:30, 34.46it/s]
  8%|▊         | 1055/12440 [00:31<05:21, 35.40it/s]
  9%|▊         | 1059/12440 [00:31<05:24, 35.03it/s]
  9%|▊         | 1063/12440 [00:31<05:18, 35.69it/s]
  9%|▊         | 1067/12440 [00:31<05:21, 35.42it/s]
  9%|▊         | 1071/12440 [00:31<05:13, 36.29it/s]
  9%|▊   

Trial status: 9 TERMINATED | 1 RUNNING | 10 PENDING
Current time: 2023-09-11 14:29:55. Total running time: 1hr 9min 34s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00009   RUNNING          1.39069e-05                    4                 

 11%|█         | 1382/12440 [00:40<05:08, 35.79it/s]
 11%|█         | 1386/12440 [00:40<05:12, 35.34it/s]
 11%|█         | 1390/12440 [00:40<05:18, 34.70it/s]
 11%|█         | 1394/12440 [00:41<06:24, 28.72it/s]
 11%|█         | 1398/12440 [00:41<06:36, 27.84it/s]
 11%|█▏        | 1402/12440 [00:41<06:49, 26.98it/s]
 11%|█▏        | 1406/12440 [00:41<06:14, 29.46it/s]
 11%|█▏        | 1410/12440 [00:41<05:58, 30.73it/s]
 11%|█▏        | 1414/12440 [00:41<05:45, 31.95it/s]
 11%|█▏        | 1418/12440 [00:41<06:02, 30.39it/s]
 11%|█▏        | 1422/12440 [00:41<05:41, 32.22it/s]
 11%|█▏        | 1426/12440 [00:42<05:23, 34.06it/s]
 11%|█▏        | 1430/12440 [00:42<05:17, 34.68it/s]
 12%|█▏        | 1434/12440 [00:42<05:09, 35.59it/s]
 12%|█▏        | 1438/12440 [00:42<05:27, 33.56it/s]
 12%|█▏        | 1442/12440 [00:42<05:16, 34.77it/s]
 12%|█▏        | 1446/12440 [00:42<05:37, 32.54it/s]
 12%|█▏        | 1450/12440 [00:42<05:35, 32.71it/s]
 12%|█▏        | 1454/12440 [00:42<05:29, 33.3

[2m[36m(_objective pid=34244)[0m {'loss': 0.296, 'learning_rate': 1.2230009412314598e-05, 'epoch': 0.48}


[2m[36m(_objective pid=34244)[0m                                                      12%|█▏        | 1500/12440 [00:44<05:16, 34.54it/s] 12%|█▏        | 1502/12440 [00:44<05:17, 34.48it/s]
 12%|█▏        | 1506/12440 [00:44<05:15, 34.63it/s]
 12%|█▏        | 1510/12440 [00:44<05:11, 35.05it/s]
 12%|█▏        | 1514/12440 [00:44<05:13, 34.83it/s]
 12%|█▏        | 1518/12440 [00:44<05:05, 35.79it/s]
 12%|█▏        | 1522/12440 [00:44<05:15, 34.59it/s]
 12%|█▏        | 1526/12440 [00:45<05:04, 35.85it/s]
 12%|█▏        | 1530/12440 [00:45<04:57, 36.72it/s]
 12%|█▏        | 1534/12440 [00:45<04:51, 37.41it/s]
 12%|█▏        | 1538/12440 [00:45<04:47, 37.97it/s]
 12%|█▏        | 1542/12440 [00:45<05:18, 34.19it/s]
 12%|█▏        | 1546/12440 [00:45<05:10, 35.10it/s]
 12%|█▏        | 1550/12440 [00:45<05:10, 35.05it/s]
 12%|█▏        | 1554/12440 [00:45<05:16, 34.35it/s]
 13%|█▎        | 1558/12440 [00:46<06:15, 28.99it/s]
 13%|█▎        | 1562/12440 [00:46<05:46, 31.42it/s]
 13%|█▎ 

[2m[36m(_objective pid=34244)[0m {'loss': 0.2549, 'learning_rate': 1.1671051029667678e-05, 'epoch': 0.64}


[2m[36m(_objective pid=34244)[0m  16%|█▌        | 2004/12440 [00:59<04:52, 35.65it/s]
 16%|█▌        | 2008/12440 [00:59<05:03, 34.39it/s]
 16%|█▌        | 2012/12440 [00:59<04:56, 35.14it/s]
 16%|█▌        | 2016/12440 [00:59<04:49, 35.97it/s]
 16%|█▌        | 2020/12440 [00:59<05:12, 33.35it/s]
 16%|█▋        | 2024/12440 [00:59<05:26, 31.85it/s]
 16%|█▋        | 2028/12440 [01:00<05:45, 30.15it/s]
 16%|█▋        | 2032/12440 [01:00<05:23, 32.19it/s]
 16%|█▋        | 2036/12440 [01:00<05:11, 33.40it/s]
 16%|█▋        | 2040/12440 [01:00<05:05, 34.05it/s]
 16%|█▋        | 2044/12440 [01:00<05:35, 31.00it/s]
 16%|█▋        | 2048/12440 [01:00<05:30, 31.41it/s]
 16%|█▋        | 2052/12440 [01:00<05:45, 30.04it/s]
 17%|█▋        | 2056/12440 [01:00<05:50, 29.66it/s]
 17%|█▋        | 2060/12440 [01:01<05:34, 31.08it/s]
 17%|█▋        | 2064/12440 [01:01<05:23, 32.11it/s]
 17%|█▋        | 2068/12440 [01:01<05:10, 33.36it/s]
 17%|█▋        | 2072/12440 [01:01<04:58, 34.75it/s]
 17%|█▋  

Trial status: 9 TERMINATED | 1 RUNNING | 10 PENDING
Current time: 2023-09-11 14:30:25. Total running time: 1hr 10min 4s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00009   RUNNING          1.39069e-05                    4                 

[2m[36m(_objective pid=34244)[0m  19%|█▉        | 2380/12440 [01:10<05:34, 30.03it/s]
 19%|█▉        | 2384/12440 [01:10<05:25, 30.85it/s]
 19%|█▉        | 2388/12440 [01:10<05:13, 32.11it/s]
 19%|█▉        | 2392/12440 [01:11<05:01, 33.36it/s]
 19%|█▉        | 2396/12440 [01:11<04:54, 34.08it/s]
 19%|█▉        | 2400/12440 [01:11<05:00, 33.38it/s]
 19%|█▉        | 2404/12440 [01:11<04:53, 34.23it/s]
 19%|█▉        | 2408/12440 [01:11<04:49, 34.60it/s]
 19%|█▉        | 2412/12440 [01:11<04:50, 34.47it/s]
 19%|█▉        | 2416/12440 [01:11<04:44, 35.29it/s]
 19%|█▉        | 2420/12440 [01:11<04:34, 36.44it/s]
 19%|█▉        | 2424/12440 [01:11<04:38, 35.93it/s]
 20%|█▉        | 2428/12440 [01:12<04:39, 35.77it/s]
 20%|█▉        | 2432/12440 [01:12<04:38, 35.99it/s]
 20%|█▉        | 2436/12440 [01:12<04:51, 34.30it/s]
 20%|█▉        | 2440/12440 [01:12<04:54, 33.98it/s]
 20%|█▉        | 2444/12440 [01:12<04:42, 35.34it/s]
 20%|█▉        | 2448/12440 [01:12<04:37, 35.94it/s]
 20%|█▉  

[2m[36m(_objective pid=34244)[0m {'loss': 0.2168, 'learning_rate': 1.1112092647020758e-05, 'epoch': 0.8}


[2m[36m(_objective pid=34244)[0m  20%|██        | 2504/12440 [01:14<04:51, 34.10it/s]
 20%|██        | 2508/12440 [01:14<05:29, 30.13it/s]
 20%|██        | 2512/12440 [01:14<05:22, 30.79it/s]
 20%|██        | 2516/12440 [01:14<05:39, 29.27it/s]
 20%|██        | 2520/12440 [01:14<05:22, 30.74it/s]
 20%|██        | 2524/12440 [01:14<05:05, 32.42it/s]
 20%|██        | 2528/12440 [01:15<05:05, 32.48it/s]
 20%|██        | 2532/12440 [01:15<05:16, 31.30it/s]
 20%|██        | 2536/12440 [01:15<05:02, 32.72it/s]
 20%|██        | 2540/12440 [01:15<04:55, 33.54it/s]
 20%|██        | 2544/12440 [01:15<04:51, 33.96it/s]
 20%|██        | 2548/12440 [01:15<04:46, 34.54it/s]
 21%|██        | 2552/12440 [01:15<04:47, 34.38it/s]
 21%|██        | 2556/12440 [01:15<04:43, 34.81it/s]
 21%|██        | 2560/12440 [01:16<04:38, 35.49it/s]
 21%|██        | 2564/12440 [01:16<04:38, 35.46it/s]
 21%|██        | 2568/12440 [01:16<05:05, 32.29it/s]
 21%|██        | 2572/12440 [01:16<05:05, 32.28it/s]
 21%|██  

[2m[36m(_objective pid=34244)[0m {'loss': 0.2575, 'learning_rate': 1.0553134264373838e-05, 'epoch': 0.96}


[2m[36m(_objective pid=34244)[0m  24%|██▍       | 3000/12440 [01:29<05:22, 29.31it/s]                                                     24%|██▍       | 3000/12440 [01:29<05:22, 29.31it/s]
 24%|██▍       | 3004/12440 [01:29<05:07, 30.68it/s]
 24%|██▍       | 3008/12440 [01:29<05:13, 30.09it/s]
 24%|██▍       | 3012/12440 [01:29<04:55, 31.96it/s]
 24%|██▍       | 3016/12440 [01:29<04:37, 33.93it/s]
 24%|██▍       | 3020/12440 [01:29<04:26, 35.31it/s]
 24%|██▍       | 3024/12440 [01:29<04:18, 36.38it/s]
 24%|██▍       | 3028/12440 [01:30<04:45, 32.99it/s]
 24%|██▍       | 3032/12440 [01:30<04:32, 34.54it/s]
 24%|██▍       | 3036/12440 [01:30<04:26, 35.32it/s]
 24%|██▍       | 3040/12440 [01:30<04:35, 34.17it/s]
 24%|██▍       | 3044/12440 [01:30<04:25, 35.37it/s]
 25%|██▍       | 3048/12440 [01:30<04:23, 35.65it/s]
 25%|██▍       | 3052/12440 [01:30<04:30, 34.67it/s]
 25%|██▍       | 3056/12440 [01:30<04:25, 35.28it/s]
 25%|██▍       | 3060/12440 [01:31<04:28, 34.92it/s]
 25%|██▍

Trial status: 9 TERMINATED | 1 RUNNING | 10 PENDING
Current time: 2023-09-11 14:30:55. Total running time: 1hr 10min 34s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00009   RUNNING          1.39069e-05                    4                

[2m[36m(_objective pid=34244)[0m 
 87%|████████▋ | 113/130 [00:08<00:01, 11.39it/s][A
[2m[36m(_objective pid=34244)[0m 
 89%|████████▉ | 116/130 [00:08<00:01, 12.45it/s][A
[2m[36m(_objective pid=34244)[0m 
 91%|█████████ | 118/130 [00:08<00:00, 13.13it/s][A
[2m[36m(_objective pid=34244)[0m 
 92%|█████████▏| 120/130 [00:08<00:00, 14.33it/s][A
[2m[36m(_objective pid=34244)[0m 
 94%|█████████▍| 122/130 [00:08<00:00, 13.72it/s][A
[2m[36m(_objective pid=34244)[0m 
 95%|█████████▌| 124/130 [00:08<00:00, 13.18it/s][A
[2m[36m(_objective pid=34244)[0m 
 98%|█████████▊| 127/130 [00:09<00:00, 13.12it/s][A
[2m[36m(_objective pid=34244)[0m 
 99%|█████████▉| 129/130 [00:09<00:00, 11.95it/s][A


Trial _objective_f556c_00009 finished iteration 1 at 2023-09-11 14:30:57. Total running time: 1hr 10min 36s
+-------------------------------------------------+
| Trial _objective_f556c_00009 result             |
+-------------------------------------------------+
| time_this_iter_s                        104.418 |
| time_total_s                            104.418 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.23881 |
| eval_runtime                              9.607 |
| eval_samples_per_second                 431.663 |
| eval_steps_per_second                    13.532 |
| objective                               0.23881 |
+-------------------------------------------------+

[2m[36m(_objective pid=34244)[0m {'eval_loss': 0.23880986869335175, 'eval_runtime': 9.607, 'eval_samples_per_second': 431.663, 'eval_steps_per_second': 13.532, 'epoch': 1.0}


[2m[36m(_objective pid=34244)[0m                                                     
[2m[36m(_objective pid=34244)[0m                                                  [A 25%|██▌       | 3110/12440 [01:42<04:26, 34.98it/s]
[2m[36m(_objective pid=34244)[0m 100%|██████████| 130/130 [00:09<00:00, 11.95it/s][A
                                                 [A
 25%|██▌       | 3112/12440 [01:43<2:11:32,  1.18it/s]
 25%|██▌       | 3116/12440 [01:43<1:33:28,  1.66it/s]
 25%|██▌       | 3120/12440 [01:43<1:07:40,  2.30it/s]
 25%|██▌       | 3124/12440 [01:43<48:43,  3.19it/s]  
 25%|██▌       | 3128/12440 [01:43<35:29,  4.37it/s]
 25%|██▌       | 3132/12440 [01:44<26:08,  5.93it/s]
 25%|██▌       | 3136/12440 [01:44<19:39,  7.89it/s]
 25%|██▌       | 3140/12440 [01:44<15:17, 10.14it/s]
 25%|██▌       | 3144/12440 [01:44<12:00, 12.90it/s]
 25%|██▌       | 3148/12440 [01:44<09:45, 15.87it/s]
 25%|██▌       | 3152/12440 [01:44<08:07, 19.05it/s]
 25%|██▌       | 3156/12440 [01:

[2m[36m(_objective pid=34244)[0m {'loss': 0.1543, 'learning_rate': 9.99417588172692e-06, 'epoch': 1.13}


 28%|██▊       | 3505/12440 [01:55<04:12, 35.32it/s]
 28%|██▊       | 3509/12440 [01:55<04:15, 35.00it/s]
 28%|██▊       | 3513/12440 [01:55<04:26, 33.52it/s]
 28%|██▊       | 3517/12440 [01:55<04:37, 32.18it/s]
 28%|██▊       | 3521/12440 [01:55<04:29, 33.15it/s]
 28%|██▊       | 3525/12440 [01:56<04:48, 30.93it/s]
 28%|██▊       | 3529/12440 [01:56<04:49, 30.74it/s]
 28%|██▊       | 3533/12440 [01:56<04:34, 32.50it/s]
 28%|██▊       | 3537/12440 [01:56<04:47, 30.97it/s]
 28%|██▊       | 3541/12440 [01:56<04:37, 32.12it/s]
 28%|██▊       | 3545/12440 [01:56<04:29, 32.95it/s]
 29%|██▊       | 3549/12440 [01:56<04:21, 33.96it/s]
 29%|██▊       | 3553/12440 [01:56<04:13, 35.07it/s]
 29%|██▊       | 3557/12440 [01:56<04:13, 35.08it/s]
 29%|██▊       | 3561/12440 [01:57<04:23, 33.66it/s]
 29%|██▊       | 3565/12440 [01:57<04:27, 33.12it/s]
 29%|██▊       | 3569/12440 [01:57<04:20, 34.01it/s]
 29%|██▊       | 3573/12440 [01:57<04:43, 31.26it/s]
 29%|██▉       | 3578/12440 [01:57<04:21, 33.8

[2m[36m(_objective pid=34244)[0m {'loss': 0.1352, 'learning_rate': 9.43521749908e-06, 'epoch': 1.29}


[2m[36m(_objective pid=34244)[0m  32%|███▏      | 3999/12440 [02:10<04:33, 30.87it/s]                                                     32%|███▏      | 4000/12440 [02:10<04:33, 30.87it/s]
 32%|███▏      | 4003/12440 [02:10<04:48, 29.26it/s]
 32%|███▏      | 4007/12440 [02:10<04:41, 29.97it/s]
 32%|███▏      | 4011/12440 [02:10<04:55, 28.54it/s]


Trial status: 9 TERMINATED | 1 RUNNING | 10 PENDING
Current time: 2023-09-11 14:31:25. Total running time: 1hr 11min 4s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00009   RUNNING          1.39069e-05                    4                 

[2m[36m(_objective pid=34244)[0m  32%|███▏      | 4015/12440 [02:10<04:36, 30.44it/s]
 32%|███▏      | 4019/12440 [02:10<04:22, 32.09it/s]
 32%|███▏      | 4023/12440 [02:10<04:12, 33.32it/s]
 32%|███▏      | 4027/12440 [02:11<04:14, 33.11it/s]
 32%|███▏      | 4031/12440 [02:11<04:06, 34.12it/s]
 32%|███▏      | 4035/12440 [02:11<04:03, 34.50it/s]
 32%|███▏      | 4039/12440 [02:11<03:56, 35.48it/s]
 32%|███▎      | 4043/12440 [02:11<03:53, 35.93it/s]
 33%|███▎      | 4047/12440 [02:11<03:53, 36.01it/s]
 33%|███▎      | 4051/12440 [02:11<03:52, 36.05it/s]
 33%|███▎      | 4055/12440 [02:11<03:59, 35.00it/s]
 33%|███▎      | 4059/12440 [02:12<03:59, 34.96it/s]
 33%|███▎      | 4063/12440 [02:12<03:59, 34.91it/s]
 33%|███▎      | 4067/12440 [02:12<04:00, 34.88it/s]
 33%|███▎      | 4071/12440 [02:12<04:02, 34.58it/s]
 33%|███▎      | 4075/12440 [02:12<03:59, 34.94it/s]
 33%|███▎      | 4079/12440 [02:12<04:03, 34.29it/s]
 33%|███▎      | 4083/12440 [02:12<04:08, 33.62it/s]
 33%|███▎

[2m[36m(_objective pid=34244)[0m {'loss': 0.1526, 'learning_rate': 8.87625911643308e-06, 'epoch': 1.45}


[2m[36m(_objective pid=34244)[0m  36%|███▌      | 4503/12440 [02:25<04:11, 31.60it/s]
 36%|███▌      | 4507/12440 [02:25<04:01, 32.86it/s]
 36%|███▋      | 4511/12440 [02:25<03:53, 34.02it/s]
 36%|███▋      | 4515/12440 [02:25<03:52, 34.15it/s]
 36%|███▋      | 4519/12440 [02:25<03:48, 34.60it/s]
 36%|███▋      | 4523/12440 [02:25<03:48, 34.60it/s]
 36%|███▋      | 4527/12440 [02:25<03:43, 35.34it/s]
 36%|███▋      | 4531/12440 [02:26<04:10, 31.63it/s]
 36%|███▋      | 4535/12440 [02:26<04:01, 32.79it/s]
 36%|███▋      | 4539/12440 [02:26<04:08, 31.75it/s]
 37%|███▋      | 4543/12440 [02:26<04:12, 31.33it/s]
 37%|███▋      | 4547/12440 [02:26<04:09, 31.58it/s]
 37%|███▋      | 4551/12440 [02:26<04:01, 32.66it/s]
 37%|███▋      | 4555/12440 [02:26<03:54, 33.57it/s]
 37%|███▋      | 4559/12440 [02:27<04:16, 30.77it/s]
 37%|███▋      | 4563/12440 [02:27<04:14, 30.96it/s]
 37%|███▋      | 4567/12440 [02:27<04:03, 32.35it/s]
 37%|███▋      | 4571/12440 [02:27<03:59, 32.79it/s]
 37%|███▋

[2m[36m(_objective pid=34244)[0m {'loss': 0.1505, 'learning_rate': 8.317300733786161e-06, 'epoch': 1.61}
Trial status: 9 TERMINATED | 1 RUNNING | 10 PENDING
Current time: 2023-09-11 14:31:55. Total running time: 1hr 11min 34s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

[2m[36m(_objective pid=34244)[0m  40%|████      | 5003/12440 [02:40<03:47, 32.72it/s]
 40%|████      | 5007/12440 [02:40<03:41, 33.55it/s]
 40%|████      | 5011/12440 [02:40<03:38, 33.97it/s]
 40%|████      | 5015/12440 [02:40<03:37, 34.10it/s]
 40%|████      | 5019/12440 [02:41<03:47, 32.62it/s]
 40%|████      | 5023/12440 [02:41<04:04, 30.29it/s]
 40%|████      | 5027/12440 [02:41<04:17, 28.78it/s]
 40%|████      | 5031/12440 [02:41<03:59, 30.95it/s]
 40%|████      | 5035/12440 [02:41<03:48, 32.38it/s]
 41%|████      | 5039/12440 [02:41<03:41, 33.46it/s]
 41%|████      | 5043/12440 [02:41<03:35, 34.27it/s]
 41%|████      | 5047/12440 [02:42<04:06, 30.05it/s]
 41%|████      | 5051/12440 [02:42<03:57, 31.09it/s]
 41%|████      | 5055/12440 [02:42<04:17, 28.70it/s]
 41%|████      | 5059/12440 [02:42<04:06, 29.96it/s]
 41%|████      | 5063/12440 [02:42<03:50, 31.95it/s]
 41%|████      | 5067/12440 [02:42<03:47, 32.41it/s]
 41%|████      | 5071/12440 [02:42<03:40, 33.42it/s]
 41%|████

[2m[36m(_objective pid=34244)[0m {'loss': 0.1768, 'learning_rate': 7.758342351139241e-06, 'epoch': 1.77}


[2m[36m(_objective pid=34244)[0m  44%|████▍     | 5504/12440 [02:55<02:57, 39.07it/s]
 44%|████▍     | 5508/12440 [02:56<02:59, 38.60it/s]
 44%|████▍     | 5513/12440 [02:56<02:55, 39.39it/s]
 44%|████▍     | 5518/12440 [02:56<02:53, 39.94it/s]
 44%|████▍     | 5522/12440 [02:56<03:09, 36.53it/s]
 44%|████▍     | 5526/12440 [02:56<03:05, 37.33it/s]
 44%|████▍     | 5531/12440 [02:56<03:00, 38.23it/s]
 44%|████▍     | 5535/12440 [02:56<03:00, 38.33it/s]
 45%|████▍     | 5539/12440 [02:56<03:12, 35.81it/s]
 45%|████▍     | 5543/12440 [02:56<03:07, 36.71it/s]
 45%|████▍     | 5548/12440 [02:57<03:01, 37.95it/s]
 45%|████▍     | 5552/12440 [02:57<03:01, 37.96it/s]
 45%|████▍     | 5556/12440 [02:57<03:18, 34.66it/s]
 45%|████▍     | 5560/12440 [02:57<03:12, 35.73it/s]
 45%|████▍     | 5564/12440 [02:57<03:09, 36.22it/s]
 45%|████▍     | 5568/12440 [02:57<03:05, 37.01it/s]
 45%|████▍     | 5572/12440 [02:57<03:12, 35.70it/s]
 45%|████▍     | 5577/12440 [02:57<03:03, 37.40it/s]
 45%|████

[2m[36m(_objective pid=34244)[0m {'loss': 0.1551, 'learning_rate': 7.199383968492323e-06, 'epoch': 1.93}


[2m[36m(_objective pid=34244)[0m  48%|████▊     | 5998/12440 [03:10<02:56, 36.54it/s]                                                     48%|████▊     | 6000/12440 [03:10<02:56, 36.54it/s]
 48%|████▊     | 6002/12440 [03:10<02:56, 36.52it/s]
 48%|████▊     | 6006/12440 [03:10<02:54, 36.91it/s]
 48%|████▊     | 6010/12440 [03:10<02:50, 37.77it/s]


Trial status: 9 TERMINATED | 1 RUNNING | 10 PENDING
Current time: 2023-09-11 14:32:26. Total running time: 1hr 12min 4s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00009   RUNNING          1.39069e-05                    4                 

[2m[36m(_objective pid=34244)[0m  48%|████▊     | 6014/12440 [03:10<02:58, 36.07it/s]
 48%|████▊     | 6018/12440 [03:10<02:57, 36.24it/s]
 48%|████▊     | 6022/12440 [03:11<02:53, 36.98it/s]
 48%|████▊     | 6027/12440 [03:11<02:47, 38.19it/s]
 48%|████▊     | 6031/12440 [03:11<03:00, 35.51it/s]
 49%|████▊     | 6036/12440 [03:11<02:53, 36.99it/s]
 49%|████▊     | 6040/12440 [03:11<02:50, 37.53it/s]
 49%|████▊     | 6044/12440 [03:11<02:50, 37.50it/s]
 49%|████▊     | 6048/12440 [03:11<03:09, 33.76it/s]
 49%|████▊     | 6052/12440 [03:11<03:06, 34.27it/s]
 49%|████▊     | 6056/12440 [03:12<03:20, 31.89it/s]
 49%|████▊     | 6060/12440 [03:12<03:15, 32.66it/s]
 49%|████▊     | 6064/12440 [03:12<03:04, 34.49it/s]
 49%|████▉     | 6068/12440 [03:12<03:03, 34.68it/s]
 49%|████▉     | 6072/12440 [03:12<03:02, 34.84it/s]
 49%|████▉     | 6076/12440 [03:12<03:02, 34.88it/s]
 49%|████▉     | 6080/12440 [03:12<03:19, 31.91it/s]
 49%|████▉     | 6084/12440 [03:12<03:33, 29.73it/s]
 49%|████

Trial _objective_f556c_00009 finished iteration 2 at 2023-09-11 14:32:41. Total running time: 1hr 12min 20s
+-------------------------------------------------+
| Trial _objective_f556c_00009 result             |
+-------------------------------------------------+
| time_this_iter_s                        104.376 |
| time_total_s                            208.794 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                               0.20679 |
| eval_runtime                             9.6073 |
| eval_samples_per_second                  431.65 |
| eval_steps_per_second                    13.531 |
| objective                               0.20679 |
+-------------------------------------------------+

[2m[36m(_objective pid=34244)[0m {'eval_loss': 0.20679479837417603, 'eval_runtime': 9.6073, 'eval_samples_per_second': 431.65, 'eval_steps_per_second': 13.531, 'epoch': 2.0}


[2m[36m(_objective pid=34244)[0m                                                     
[2m[36m(_objective pid=34244)[0m                                                  [A 50%|█████     | 6220/12440 [03:26<03:00, 34.52it/s]
[2m[36m(_objective pid=34244)[0m 100%|██████████| 130/130 [00:09<00:00, 11.96it/s][A
                                                 [A
 50%|█████     | 6223/12440 [03:27<1:27:01,  1.19it/s]
 50%|█████     | 6227/12440 [03:27<1:01:44,  1.68it/s]
 50%|█████     | 6231/12440 [03:27<44:05,  2.35it/s]  
 50%|█████     | 6235/12440 [03:28<31:40,  3.26it/s]
 50%|█████     | 6239/12440 [03:28<23:11,  4.46it/s]
 50%|█████     | 6243/12440 [03:28<17:20,  5.95it/s]
 50%|█████     | 6247/12440 [03:28<13:03,  7.91it/s]
 50%|█████     | 6251/12440 [03:28<10:05, 10.23it/s]
 50%|█████     | 6255/12440 [03:28<07:53, 13.05it/s]
 50%|█████     | 6259/12440 [03:28<06:23, 16.12it/s]
 50%|█████     | 6263/12440 [03:28<05:24, 19.01it/s]
 50%|█████     | 6267/12440 [03:29

[2m[36m(_objective pid=34244)[0m {'loss': 0.1345, 'learning_rate': 6.640425585845403e-06, 'epoch': 2.09}


[2m[36m(_objective pid=34244)[0m  52%|█████▏    | 6500/12440 [03:35<02:38, 37.43it/s]                                                     52%|█████▏    | 6500/12440 [03:35<02:38, 37.43it/s]
 52%|█████▏    | 6504/12440 [03:35<02:47, 35.45it/s]
 52%|█████▏    | 6508/12440 [03:35<02:44, 36.09it/s]
 52%|█████▏    | 6512/12440 [03:36<02:40, 36.95it/s]
 52%|█████▏    | 6516/12440 [03:36<02:55, 33.82it/s]
 52%|█████▏    | 6520/12440 [03:36<02:52, 34.29it/s]
 52%|█████▏    | 6524/12440 [03:36<03:14, 30.43it/s]
 52%|█████▏    | 6528/12440 [03:36<03:01, 32.55it/s]
 53%|█████▎    | 6532/12440 [03:36<03:12, 30.69it/s]
 53%|█████▎    | 6536/12440 [03:36<03:05, 31.89it/s]
 53%|█████▎    | 6540/12440 [03:36<02:56, 33.36it/s]
 53%|█████▎    | 6544/12440 [03:37<02:51, 34.43it/s]
 53%|█████▎    | 6548/12440 [03:37<02:44, 35.72it/s]
 53%|█████▎    | 6552/12440 [03:37<02:55, 33.48it/s]
 53%|█████▎    | 6556/12440 [03:37<02:54, 33.74it/s]
 53%|█████▎    | 6560/12440 [03:37<02:59, 32.77it/s]
 53%|███

Trial status: 9 TERMINATED | 1 RUNNING | 10 PENDING
Current time: 2023-09-11 14:32:56. Total running time: 1hr 12min 34s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00009   RUNNING          1.39069e-05                    4                

[2m[36m(_objective pid=34244)[0m  54%|█████▎    | 6677/12440 [03:40<02:43, 35.15it/s]
 54%|█████▎    | 6681/12440 [03:40<02:42, 35.36it/s]
 54%|█████▎    | 6685/12440 [03:41<02:48, 34.12it/s]
 54%|█████▍    | 6689/12440 [03:41<02:41, 35.52it/s]
 54%|█████▍    | 6693/12440 [03:41<02:36, 36.71it/s]
 54%|█████▍    | 6697/12440 [03:41<02:52, 33.32it/s]
 54%|█████▍    | 6701/12440 [03:41<03:04, 31.06it/s]
 54%|█████▍    | 6705/12440 [03:41<02:55, 32.65it/s]
 54%|█████▍    | 6709/12440 [03:41<02:46, 34.36it/s]
 54%|█████▍    | 6713/12440 [03:41<02:43, 34.96it/s]
 54%|█████▍    | 6717/12440 [03:42<02:48, 33.95it/s]
 54%|█████▍    | 6721/12440 [03:42<02:43, 34.92it/s]
 54%|█████▍    | 6725/12440 [03:42<02:41, 35.44it/s]
 54%|█████▍    | 6729/12440 [03:42<02:41, 35.44it/s]
 54%|█████▍    | 6733/12440 [03:42<02:44, 34.65it/s]
 54%|█████▍    | 6737/12440 [03:42<02:59, 31.84it/s]
 54%|█████▍    | 6741/12440 [03:42<02:48, 33.74it/s]
 54%|█████▍    | 6745/12440 [03:42<02:42, 35.11it/s]
 54%|████

[2m[36m(_objective pid=34244)[0m {'loss': 0.1123, 'learning_rate': 6.081467203198483e-06, 'epoch': 2.25}


[2m[36m(_objective pid=34244)[0m  56%|█████▌    | 6997/12440 [03:50<02:29, 36.29it/s]                                                     56%|█████▋    | 7000/12440 [03:50<02:29, 36.29it/s]
 56%|█████▋    | 7001/12440 [03:50<02:27, 36.91it/s]
 56%|█████▋    | 7005/12440 [03:50<02:36, 34.68it/s]
 56%|█████▋    | 7009/12440 [03:50<02:45, 32.78it/s]
 56%|█████▋    | 7013/12440 [03:50<02:40, 33.81it/s]
 56%|█████▋    | 7017/12440 [03:50<02:37, 34.33it/s]
 56%|█████▋    | 7021/12440 [03:50<02:33, 35.33it/s]
 56%|█████▋    | 7025/12440 [03:50<02:31, 35.80it/s]
 57%|█████▋    | 7029/12440 [03:51<02:45, 32.70it/s]
 57%|█████▋    | 7033/12440 [03:51<02:43, 33.11it/s]
 57%|█████▋    | 7037/12440 [03:51<02:34, 34.86it/s]
 57%|█████▋    | 7041/12440 [03:51<02:34, 35.05it/s]
 57%|█████▋    | 7045/12440 [03:51<02:31, 35.61it/s]
 57%|█████▋    | 7049/12440 [03:51<02:30, 35.85it/s]
 57%|█████▋    | 7053/12440 [03:51<02:26, 36.69it/s]
 57%|█████▋    | 7057/12440 [03:51<02:28, 36.23it/s]
 57%|███

[2m[36m(_objective pid=34244)[0m {'loss': 0.0808, 'learning_rate': 5.522508820551564e-06, 'epoch': 2.41}


[2m[36m(_objective pid=34244)[0m  60%|██████    | 7504/12440 [04:05<02:15, 36.31it/s]
 60%|██████    | 7508/12440 [04:05<02:12, 37.10it/s]
 60%|██████    | 7512/12440 [04:05<02:14, 36.53it/s]
 60%|██████    | 7516/12440 [04:05<02:13, 36.80it/s]
 60%|██████    | 7520/12440 [04:05<02:13, 36.73it/s]
 60%|██████    | 7524/12440 [04:05<02:32, 32.14it/s]
 61%|██████    | 7528/12440 [04:05<02:26, 33.64it/s]
 61%|██████    | 7532/12440 [04:06<02:20, 34.86it/s]
 61%|██████    | 7536/12440 [04:06<02:32, 32.09it/s]
 61%|██████    | 7540/12440 [04:06<02:41, 30.25it/s]
 61%|██████    | 7544/12440 [04:06<02:35, 31.39it/s]
 61%|██████    | 7548/12440 [04:06<02:26, 33.41it/s]
 61%|██████    | 7552/12440 [04:06<02:34, 31.68it/s]
 61%|██████    | 7556/12440 [04:06<02:27, 33.14it/s]
 61%|██████    | 7560/12440 [04:06<02:27, 33.01it/s]
 61%|██████    | 7564/12440 [04:07<02:23, 33.90it/s]
 61%|██████    | 7568/12440 [04:07<02:19, 35.03it/s]
 61%|██████    | 7572/12440 [04:07<02:23, 33.81it/s]
 61%|████

Trial status: 9 TERMINATED | 1 RUNNING | 10 PENDING
Current time: 2023-09-11 14:33:26. Total running time: 1hr 13min 4s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00009   RUNNING          1.39069e-05                    4                 

[2m[36m(_objective pid=34244)[0m  62%|██████▏   | 7696/12440 [04:10<02:31, 31.21it/s]
 62%|██████▏   | 7700/12440 [04:10<02:25, 32.55it/s]
 62%|██████▏   | 7704/12440 [04:11<02:39, 29.63it/s]
 62%|██████▏   | 7708/12440 [04:11<02:28, 31.76it/s]
 62%|██████▏   | 7712/12440 [04:11<02:20, 33.70it/s]
 62%|██████▏   | 7716/12440 [04:11<02:21, 33.28it/s]
 62%|██████▏   | 7720/12440 [04:11<02:17, 34.29it/s]
 62%|██████▏   | 7724/12440 [04:11<02:18, 34.01it/s]
 62%|██████▏   | 7728/12440 [04:11<02:16, 34.60it/s]
 62%|██████▏   | 7732/12440 [04:11<02:17, 34.34it/s]
 62%|██████▏   | 7736/12440 [04:12<02:15, 34.83it/s]
 62%|██████▏   | 7740/12440 [04:12<02:20, 33.34it/s]
 62%|██████▏   | 7744/12440 [04:12<02:21, 33.11it/s]
 62%|██████▏   | 7748/12440 [04:12<02:14, 34.81it/s]
 62%|██████▏   | 7752/12440 [04:12<02:15, 34.72it/s]
 62%|██████▏   | 7756/12440 [04:12<02:13, 34.97it/s]
 62%|██████▏   | 7760/12440 [04:12<02:11, 35.59it/s]
 62%|██████▏   | 7764/12440 [04:12<02:08, 36.35it/s]
 62%|████

[2m[36m(_objective pid=34244)[0m {'loss': 0.1041, 'learning_rate': 4.963550437904645e-06, 'epoch': 2.57}


 64%|██████▍   | 8003/12440 [04:19<02:17, 32.26it/s]
 64%|██████▍   | 8007/12440 [04:19<02:15, 32.61it/s]
 64%|██████▍   | 8011/12440 [04:20<02:09, 34.17it/s]
 64%|██████▍   | 8015/12440 [04:20<02:19, 31.76it/s]
 64%|██████▍   | 8019/12440 [04:20<02:13, 33.14it/s]
 64%|██████▍   | 8023/12440 [04:20<02:22, 30.92it/s]
 65%|██████▍   | 8027/12440 [04:20<02:31, 29.09it/s]
 65%|██████▍   | 8031/12440 [04:20<02:24, 30.59it/s]
 65%|██████▍   | 8035/12440 [04:20<02:17, 32.02it/s]
 65%|██████▍   | 8039/12440 [04:20<02:13, 33.08it/s]
 65%|██████▍   | 8043/12440 [04:21<02:07, 34.47it/s]
 65%|██████▍   | 8047/12440 [04:21<02:08, 34.24it/s]
 65%|██████▍   | 8051/12440 [04:21<02:08, 34.25it/s]
 65%|██████▍   | 8055/12440 [04:21<02:04, 35.15it/s]
 65%|██████▍   | 8059/12440 [04:21<02:34, 28.44it/s]
 65%|██████▍   | 8063/12440 [04:21<02:23, 30.47it/s]
 65%|██████▍   | 8067/12440 [04:21<02:14, 32.61it/s]
 65%|██████▍   | 8071/12440 [04:21<02:06, 34.41it/s]
 65%|██████▍   | 8075/12440 [04:22<02:01, 35.8

[2m[36m(_objective pid=34244)[0m {'loss': 0.096, 'learning_rate': 4.404592055257725e-06, 'epoch': 2.73}


[2m[36m(_objective pid=34244)[0m  68%|██████▊   | 8500/12440 [04:34<01:56, 33.93it/s]                                                     68%|██████▊   | 8500/12440 [04:34<01:56, 33.93it/s]
 68%|██████▊   | 8504/12440 [04:34<01:51, 35.35it/s]
 68%|██████▊   | 8508/12440 [04:34<01:49, 35.82it/s]
 68%|██████▊   | 8512/12440 [04:34<01:46, 36.83it/s]
 68%|██████▊   | 8516/12440 [04:34<01:51, 35.07it/s]
 68%|██████▊   | 8520/12440 [04:35<01:53, 34.62it/s]
 69%|██████▊   | 8524/12440 [04:35<01:54, 34.32it/s]
 69%|██████▊   | 8529/12440 [04:35<01:47, 36.23it/s]
 69%|██████▊   | 8533/12440 [04:35<01:45, 37.14it/s]
 69%|██████▊   | 8537/12440 [04:35<01:45, 37.03it/s]
 69%|██████▊   | 8541/12440 [04:35<01:45, 37.06it/s]
 69%|██████▊   | 8545/12440 [04:35<01:43, 37.46it/s]
 69%|██████▊   | 8549/12440 [04:35<01:51, 34.89it/s]
 69%|██████▉   | 8553/12440 [04:36<02:00, 32.35it/s]
 69%|██████▉   | 8557/12440 [04:36<01:54, 33.82it/s]
 69%|██████▉   | 8561/12440 [04:36<01:54, 33.93it/s]
 69%|███

Trial status: 9 TERMINATED | 1 RUNNING | 10 PENDING
Current time: 2023-09-11 14:33:56. Total running time: 1hr 13min 34s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00009   RUNNING          1.39069e-05                    4                

[2m[36m(_objective pid=34244)[0m  70%|███████   | 8723/12440 [04:40<01:40, 36.90it/s]
 70%|███████   | 8727/12440 [04:40<01:40, 36.97it/s]
 70%|███████   | 8731/12440 [04:41<01:39, 37.29it/s]
 70%|███████   | 8735/12440 [04:41<01:42, 36.19it/s]
 70%|███████   | 8739/12440 [04:41<01:46, 34.73it/s]
 70%|███████   | 8743/12440 [04:41<01:44, 35.49it/s]
 70%|███████   | 8747/12440 [04:41<01:44, 35.37it/s]
 70%|███████   | 8751/12440 [04:41<01:51, 33.04it/s]
 70%|███████   | 8755/12440 [04:41<02:01, 30.38it/s]
 70%|███████   | 8759/12440 [04:41<02:05, 29.31it/s]
 70%|███████   | 8762/12440 [04:42<02:13, 27.59it/s]
 70%|███████   | 8766/12440 [04:42<02:00, 30.40it/s]
 70%|███████   | 8770/12440 [04:42<02:05, 29.28it/s]
 71%|███████   | 8774/12440 [04:42<02:01, 30.12it/s]
 71%|███████   | 8778/12440 [04:42<02:03, 29.65it/s]
 71%|███████   | 8782/12440 [04:42<01:56, 31.40it/s]
 71%|███████   | 8786/12440 [04:42<01:50, 33.03it/s]
 71%|███████   | 8790/12440 [04:42<01:48, 33.55it/s]
 71%|████

[2m[36m(_objective pid=34244)[0m {'loss': 0.1207, 'learning_rate': 3.845633672610806e-06, 'epoch': 2.89}


[2m[36m(_objective pid=34244)[0m  72%|███████▏  | 9000/12440 [04:49<01:34, 36.50it/s]                                                     72%|███████▏  | 9000/12440 [04:49<01:34, 36.50it/s]
 72%|███████▏  | 9004/12440 [04:49<01:42, 33.48it/s]
 72%|███████▏  | 9008/12440 [04:49<01:38, 34.97it/s]
 72%|███████▏  | 9012/12440 [04:49<01:47, 31.92it/s]
 72%|███████▏  | 9016/12440 [04:49<01:42, 33.51it/s]
 73%|███████▎  | 9020/12440 [04:49<01:41, 33.63it/s]
 73%|███████▎  | 9024/12440 [04:50<01:42, 33.17it/s]
 73%|███████▎  | 9029/12440 [04:50<01:36, 35.40it/s]
 73%|███████▎  | 9033/12440 [04:50<01:37, 34.77it/s]
 73%|███████▎  | 9037/12440 [04:50<01:34, 36.03it/s]
 73%|███████▎  | 9041/12440 [04:50<01:32, 36.85it/s]
 73%|███████▎  | 9045/12440 [04:50<01:30, 37.63it/s]
 73%|███████▎  | 9049/12440 [04:50<01:32, 36.83it/s]
 73%|███████▎  | 9053/12440 [04:50<01:29, 37.71it/s]
 73%|███████▎  | 9057/12440 [04:50<01:35, 35.53it/s]
 73%|███████▎  | 9061/12440 [04:51<01:34, 35.95it/s]
 73%|███

Trial _objective_f556c_00009 finished iteration 3 at 2023-09-11 14:34:23. Total running time: 1hr 14min 2s
+-------------------------------------------------+
| Trial _objective_f556c_00009 result             |
+-------------------------------------------------+
| time_this_iter_s                        101.911 |
| time_total_s                            310.705 |
| training_iteration                            3 |
| epoch                                         3 |
| eval_loss                               0.21907 |
| eval_runtime                             9.5895 |
| eval_samples_per_second                 432.451 |
| eval_steps_per_second                    13.556 |
| objective                               0.21907 |
+-------------------------------------------------+

[2m[36m(_objective pid=34244)[0m {'eval_loss': 0.21907447278499603, 'eval_runtime': 9.5895, 'eval_samples_per_second': 432.451, 'eval_steps_per_second': 13.556, 'epoch': 3.0}


[2m[36m(_objective pid=34244)[0m                                                     
[2m[36m(_objective pid=34244)[0m                                                  [A 75%|███████▌  | 9330/12440 [05:08<01:36, 32.34it/s]
[2m[36m(_objective pid=34244)[0m 100%|██████████| 130/130 [00:09<00:00, 11.96it/s][A
                                                 [A
 75%|███████▌  | 9334/12440 [05:09<43:29,  1.19it/s]
 75%|███████▌  | 9337/12440 [05:09<33:21,  1.55it/s]
 75%|███████▌  | 9341/12440 [05:09<23:13,  2.22it/s]
 75%|███████▌  | 9345/12440 [05:10<16:31,  3.12it/s]
 75%|███████▌  | 9349/12440 [05:10<11:51,  4.34it/s]
 75%|███████▌  | 9353/12440 [05:10<08:39,  5.94it/s]
 75%|███████▌  | 9357/12440 [05:10<06:27,  7.96it/s]
 75%|███████▌  | 9361/12440 [05:10<05:01, 10.22it/s]
 75%|███████▌  | 9365/12440 [05:10<03:55, 13.08it/s]
 75%|███████▌  | 9369/12440 [05:10<03:15, 15.70it/s]
 75%|███████▌  | 9373/12440 [05:10<02:40, 19.08it/s]


Trial status: 9 TERMINATED | 1 RUNNING | 10 PENDING
Current time: 2023-09-11 14:34:26. Total running time: 1hr 14min 4s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00009   RUNNING          1.39069e-05                    4                 

[2m[36m(_objective pid=34244)[0m  75%|███████▌  | 9377/12440 [05:10<02:16, 22.50it/s]
 75%|███████▌  | 9381/12440 [05:11<02:01, 25.16it/s]
 75%|███████▌  | 9385/12440 [05:11<01:48, 28.10it/s]
 75%|███████▌  | 9389/12440 [05:11<01:39, 30.64it/s]
 76%|███████▌  | 9393/12440 [05:11<01:43, 29.42it/s]
 76%|███████▌  | 9397/12440 [05:11<01:36, 31.39it/s]
 76%|███████▌  | 9401/12440 [05:11<01:37, 31.10it/s]
 76%|███████▌  | 9405/12440 [05:11<01:32, 32.64it/s]
 76%|███████▌  | 9409/12440 [05:11<01:27, 34.45it/s]
 76%|███████▌  | 9414/12440 [05:12<01:27, 34.65it/s]
 76%|███████▌  | 9418/12440 [05:12<01:25, 35.30it/s]
 76%|███████▌  | 9422/12440 [05:12<01:24, 35.91it/s]
 76%|███████▌  | 9426/12440 [05:12<01:23, 36.16it/s]
 76%|███████▌  | 9430/12440 [05:12<01:21, 37.01it/s]
 76%|███████▌  | 9435/12440 [05:12<01:19, 37.67it/s]
 76%|███████▌  | 9439/12440 [05:12<01:20, 37.47it/s]
 76%|███████▌  | 9443/12440 [05:12<01:22, 36.48it/s]
 76%|███████▌  | 9447/12440 [05:12<01:22, 36.40it/s]
 76%|████

[2m[36m(_objective pid=34244)[0m {'loss': 0.0941, 'learning_rate': 3.286675289963886e-06, 'epoch': 3.05}


[2m[36m(_objective pid=34244)[0m  76%|███████▋  | 9500/12440 [05:14<01:16, 38.59it/s]                                                     76%|███████▋  | 9500/12440 [05:14<01:16, 38.59it/s]
 76%|███████▋  | 9504/12440 [05:14<01:19, 36.76it/s]
 76%|███████▋  | 9508/12440 [05:14<01:18, 37.47it/s]
 76%|███████▋  | 9512/12440 [05:14<01:16, 38.17it/s]
 76%|███████▋  | 9516/12440 [05:14<01:16, 38.36it/s]
 77%|███████▋  | 9520/12440 [05:14<01:16, 38.17it/s]
 77%|███████▋  | 9524/12440 [05:15<01:15, 38.66it/s]
 77%|███████▋  | 9528/12440 [05:15<01:15, 38.33it/s]
 77%|███████▋  | 9532/12440 [05:15<01:16, 38.08it/s]
 77%|███████▋  | 9536/12440 [05:15<01:15, 38.38it/s]
 77%|███████▋  | 9540/12440 [05:15<01:15, 38.36it/s]
 77%|███████▋  | 9544/12440 [05:15<01:22, 35.26it/s]
 77%|███████▋  | 9548/12440 [05:15<01:26, 33.57it/s]
 77%|███████▋  | 9552/12440 [05:15<01:23, 34.42it/s]
 77%|███████▋  | 9556/12440 [05:15<01:20, 35.82it/s]
 77%|███████▋  | 9560/12440 [05:16<01:21, 35.37it/s]
 77%|███

[2m[36m(_objective pid=34244)[0m {'loss': 0.0893, 'learning_rate': 2.727716907316967e-06, 'epoch': 3.22}


 80%|████████  | 10003/12440 [05:28<01:08, 35.34it/s]
 80%|████████  | 10007/12440 [05:29<01:20, 30.23it/s]
 80%|████████  | 10012/12440 [05:29<01:13, 33.07it/s]
 81%|████████  | 10016/12440 [05:29<01:19, 30.45it/s]
 81%|████████  | 10020/12440 [05:29<01:14, 32.31it/s]
 81%|████████  | 10024/12440 [05:29<01:10, 34.13it/s]
 81%|████████  | 10028/12440 [05:29<01:15, 31.89it/s]
 81%|████████  | 10032/12440 [05:29<01:13, 32.95it/s]
 81%|████████  | 10036/12440 [05:29<01:13, 32.74it/s]
 81%|████████  | 10040/12440 [05:30<01:16, 31.28it/s]
 81%|████████  | 10044/12440 [05:30<01:12, 33.06it/s]
 81%|████████  | 10048/12440 [05:30<01:09, 34.39it/s]
 81%|████████  | 10052/12440 [05:30<01:15, 31.44it/s]
 81%|████████  | 10056/12440 [05:30<01:11, 33.53it/s]
 81%|████████  | 10060/12440 [05:30<01:10, 33.96it/s]
 81%|████████  | 10064/12440 [05:30<01:07, 35.20it/s]
 81%|████████  | 10068/12440 [05:30<01:09, 34.22it/s]
 81%|████████  | 10072/12440 [05:31<01:11, 33.12it/s]
 81%|████████  | 10076/12440

Trial status: 9 TERMINATED | 1 RUNNING | 10 PENDING
Current time: 2023-09-11 14:34:56. Total running time: 1hr 14min 34s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00009   RUNNING          1.39069e-05                    4                

[2m[36m(_objective pid=34244)[0m  84%|████████▎ | 10415/12440 [05:40<01:00, 33.67it/s]
 84%|████████▍ | 10419/12440 [05:40<00:58, 34.84it/s]
 84%|████████▍ | 10423/12440 [05:41<01:00, 33.50it/s]
 84%|████████▍ | 10427/12440 [05:41<00:57, 34.74it/s]
 84%|████████▍ | 10431/12440 [05:41<01:05, 30.84it/s]
 84%|████████▍ | 10439/12440 [05:41<00:57, 34.73it/s]
 84%|████████▍ | 10443/12440 [05:41<00:55, 35.93it/s]
 84%|████████▍ | 10447/12440 [05:41<00:53, 37.01it/s]
 84%|████████▍ | 10451/12440 [05:41<00:58, 33.75it/s]
 84%|████████▍ | 10456/12440 [05:42<00:55, 36.05it/s]
 84%|████████▍ | 10460/12440 [05:42<00:56, 34.91it/s]
 84%|████████▍ | 10464/12440 [05:42<00:56, 35.25it/s]
 84%|████████▍ | 10468/12440 [05:42<00:55, 35.65it/s]
 84%|████████▍ | 10472/12440 [05:42<00:53, 36.58it/s]
 84%|████████▍ | 10477/12440 [05:42<00:52, 37.37it/s]
 84%|████████▍ | 10481/12440 [05:42<00:53, 36.68it/s]
 84%|████████▍ | 10485/12440 [05:42<00:52, 37.14it/s]
 84%|████████▍ | 10489/12440 [05:42<00:53, 36

[2m[36m(_objective pid=34244)[0m {'loss': 0.0688, 'learning_rate': 2.1687585246700473e-06, 'epoch': 3.38}


 84%|████████▍ | 10501/12440 [05:43<00:55, 34.93it/s]
 84%|████████▍ | 10505/12440 [05:43<00:55, 34.60it/s]
 84%|████████▍ | 10509/12440 [05:43<00:55, 34.53it/s]
 85%|████████▍ | 10513/12440 [05:43<00:58, 32.83it/s]
 85%|████████▍ | 10517/12440 [05:43<00:57, 33.73it/s]
 85%|████████▍ | 10521/12440 [05:43<00:54, 35.32it/s]
 85%|████████▍ | 10525/12440 [05:43<00:53, 36.00it/s]
 85%|████████▍ | 10529/12440 [05:44<00:51, 36.87it/s]
 85%|████████▍ | 10533/12440 [05:44<00:53, 35.73it/s]
 85%|████████▍ | 10537/12440 [05:44<00:57, 33.23it/s]
 85%|████████▍ | 10541/12440 [05:44<00:57, 33.19it/s]
 85%|████████▍ | 10545/12440 [05:44<00:55, 34.45it/s]
 85%|████████▍ | 10549/12440 [05:44<00:54, 34.90it/s]
 85%|████████▍ | 10553/12440 [05:44<01:04, 29.26it/s]
 85%|████████▍ | 10557/12440 [05:44<00:59, 31.57it/s]
 85%|████████▍ | 10561/12440 [05:45<01:02, 29.89it/s]
 85%|████████▍ | 10565/12440 [05:45<00:58, 32.27it/s]
 85%|████████▍ | 10569/12440 [05:45<00:54, 34.22it/s]
 85%|████████▍ | 10573/12440

[2m[36m(_objective pid=34244)[0m {'loss': 0.0665, 'learning_rate': 1.609800142023128e-06, 'epoch': 3.54}


[2m[36m(_objective pid=34244)[0m  88%|████████▊ | 11004/12440 [05:58<00:39, 36.57it/s]
 88%|████████▊ | 11008/12440 [05:58<00:38, 36.96it/s]
 89%|████████▊ | 11012/12440 [05:58<00:38, 37.32it/s]
 89%|████████▊ | 11016/12440 [05:58<00:37, 37.86it/s]
 89%|████████▊ | 11020/12440 [05:58<00:37, 38.27it/s]
 89%|████████▊ | 11024/12440 [05:58<00:36, 38.45it/s]
 89%|████████▊ | 11028/12440 [05:58<00:42, 33.52it/s]
 89%|████████▊ | 11032/12440 [05:58<00:44, 31.45it/s]
 89%|████████▊ | 11036/12440 [05:58<00:41, 33.50it/s]
 89%|████████▊ | 11040/12440 [05:59<00:39, 35.16it/s]
 89%|████████▉ | 11044/12440 [05:59<00:38, 36.45it/s]
 89%|████████▉ | 11048/12440 [05:59<00:37, 37.06it/s]
 89%|████████▉ | 11052/12440 [05:59<00:37, 37.07it/s]
 89%|████████▉ | 11056/12440 [05:59<00:36, 37.78it/s]
 89%|████████▉ | 11061/12440 [05:59<00:35, 38.44it/s]
 89%|████████▉ | 11065/12440 [05:59<00:43, 31.94it/s]
 89%|████████▉ | 11069/12440 [05:59<00:40, 33.90it/s]
 89%|████████▉ | 11073/12440 [05:59<00:38, 35

Trial status: 9 TERMINATED | 1 RUNNING | 10 PENDING
Current time: 2023-09-11 14:35:26. Total running time: 1hr 15min 4s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00009   RUNNING          1.39069e-05                    4                 

[2m[36m(_objective pid=34244)[0m  92%|█████████▏| 11456/12440 [06:11<00:30, 32.10it/s]
 92%|█████████▏| 11460/12440 [06:11<00:29, 33.45it/s]
 92%|█████████▏| 11464/12440 [06:11<00:28, 34.85it/s]
 92%|█████████▏| 11468/12440 [06:11<00:27, 35.67it/s]
 92%|█████████▏| 11472/12440 [06:11<00:29, 32.85it/s]
 92%|█████████▏| 11476/12440 [06:11<00:31, 30.39it/s]
 92%|█████████▏| 11480/12440 [06:11<00:29, 32.45it/s]
 92%|█████████▏| 11484/12440 [06:12<00:29, 32.76it/s]
 92%|█████████▏| 11488/12440 [06:12<00:27, 34.51it/s]
 92%|█████████▏| 11492/12440 [06:12<00:26, 35.93it/s]
 92%|█████████▏| 11496/12440 [06:12<00:25, 36.63it/s]


[2m[36m(_objective pid=34244)[0m {'loss': 0.0754, 'learning_rate': 1.0508417593762086e-06, 'epoch': 3.7}


[2m[36m(_objective pid=34244)[0m  92%|█████████▏| 11500/12440 [06:12<00:25, 36.39it/s]                                                      92%|█████████▏| 11500/12440 [06:12<00:25, 36.39it/s]
 92%|█████████▏| 11505/12440 [06:12<00:24, 37.62it/s]
 93%|█████████▎| 11510/12440 [06:12<00:25, 37.05it/s]
 93%|█████████▎| 11514/12440 [06:12<00:24, 37.76it/s]
 93%|█████████▎| 11518/12440 [06:12<00:27, 33.67it/s]
 93%|█████████▎| 11522/12440 [06:13<00:27, 33.96it/s]
 93%|█████████▎| 11526/12440 [06:13<00:25, 35.45it/s]
 93%|█████████▎| 11530/12440 [06:13<00:25, 36.21it/s]
 93%|█████████▎| 11535/12440 [06:13<00:24, 37.38it/s]
 93%|█████████▎| 11539/12440 [06:13<00:24, 37.29it/s]
 93%|█████████▎| 11544/12440 [06:13<00:23, 38.31it/s]
 93%|█████████▎| 11548/12440 [06:13<00:23, 37.97it/s]
 93%|█████████▎| 11552/12440 [06:13<00:23, 38.04it/s]
 93%|█████████▎| 11556/12440 [06:13<00:23, 38.28it/s]
 93%|█████████▎| 11560/12440 [06:14<00:23, 38.19it/s]
 93%|█████████▎| 11564/12440 [06:14<00:22, 3

[2m[36m(_objective pid=34244)[0m {'loss': 0.059, 'learning_rate': 4.918833767292891e-07, 'epoch': 3.86}


[2m[36m(_objective pid=34244)[0m  96%|█████████▋| 12003/12440 [06:27<00:13, 33.34it/s]
 97%|█████████▋| 12007/12440 [06:27<00:12, 33.39it/s]
 97%|█████████▋| 12011/12440 [06:27<00:13, 30.80it/s]
 97%|█████████▋| 12015/12440 [06:27<00:13, 31.70it/s]
 97%|█████████▋| 12019/12440 [06:28<00:12, 33.51it/s]
 97%|█████████▋| 12023/12440 [06:28<00:12, 33.40it/s]
 97%|█████████▋| 12027/12440 [06:28<00:13, 30.69it/s]
 97%|█████████▋| 12031/12440 [06:28<00:13, 29.29it/s]
 97%|█████████▋| 12035/12440 [06:28<00:13, 30.93it/s]
 97%|█████████▋| 12040/12440 [06:28<00:11, 33.55it/s]
 97%|█████████▋| 12044/12440 [06:28<00:11, 34.33it/s]
 97%|█████████▋| 12048/12440 [06:28<00:11, 35.50it/s]
 97%|█████████▋| 12052/12440 [06:29<00:10, 35.43it/s]
 97%|█████████▋| 12056/12440 [06:29<00:10, 36.40it/s]
 97%|█████████▋| 12060/12440 [06:29<00:11, 34.35it/s]
 97%|█████████▋| 12064/12440 [06:29<00:11, 33.71it/s]
 97%|█████████▋| 12068/12440 [06:29<00:10, 34.68it/s]
 97%|█████████▋| 12072/12440 [06:29<00:10, 35

Trial status: 9 TERMINATED | 1 RUNNING | 10 PENDING
Current time: 2023-09-11 14:35:56. Total running time: 1hr 15min 34s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00009   RUNNING          1.39069e-05                    4                

[2m[36m(_objective pid=34244)[0m 
  8%|▊         | 11/130 [00:00<00:08, 13.88it/s][A
[2m[36m(_objective pid=34244)[0m 
 10%|█         | 13/130 [00:00<00:08, 13.47it/s][A
[2m[36m(_objective pid=34244)[0m 
 12%|█▏        | 15/130 [00:01<00:09, 12.33it/s][A
[2m[36m(_objective pid=34244)[0m 
 13%|█▎        | 17/130 [00:01<00:09, 11.57it/s][A
[2m[36m(_objective pid=34244)[0m 
 15%|█▍        | 19/130 [00:01<00:09, 11.47it/s][A
[2m[36m(_objective pid=34244)[0m 
 16%|█▌        | 21/130 [00:01<00:08, 12.96it/s][A
[2m[36m(_objective pid=34244)[0m 
 18%|█▊        | 23/130 [00:01<00:09, 11.80it/s][A
[2m[36m(_objective pid=34244)[0m 
 19%|█▉        | 25/130 [00:01<00:07, 13.40it/s][A
[2m[36m(_objective pid=34244)[0m 
 21%|██        | 27/130 [00:01<00:07, 14.06it/s][A
[2m[36m(_objective pid=34244)[0m 
 24%|██▍       | 31/130 [00:02<00:06, 16.14it/s][A
[2m[36m(_objective pid=34244)[0m 
 26%|██▌       | 34/130 [00:02<00:05, 16.33it/s][A
[2m[36m(_objective 

Trial _objective_f556c_00009 finished iteration 4 at 2023-09-11 14:36:05. Total running time: 1hr 15min 44s
+-------------------------------------------------+
| Trial _objective_f556c_00009 result             |
+-------------------------------------------------+
| time_this_iter_s                         101.79 |
| time_total_s                            412.495 |
| training_iteration                            4 |
| epoch                                         4 |
| eval_loss                               0.23798 |
| eval_runtime                             9.5995 |
| eval_samples_per_second                 432.003 |
| eval_steps_per_second                    13.542 |
| objective                               0.23798 |
+-------------------------------------------------+

[2m[36m(_objective pid=34244)[0m {'eval_loss': 0.23797614872455597, 'eval_runtime': 9.5995, 'eval_samples_per_second': 432.003, 'eval_steps_per_second': 13.542, 'epoch': 4.0}


[2m[36m(_objective pid=34244)[0m                                                      
[2m[36m(_objective pid=34244)[0m                                                  [A100%|██████████| 12440/12440 [06:50<00:00, 35.67it/s]
[2m[36m(_objective pid=34244)[0m 100%|██████████| 130/130 [00:09<00:00, 11.99it/s][A
[2m[36m(_objective pid=34244)[0m                                                  [A


Trial _objective_f556c_00009 completed after 4 iterations at 2023-09-11 14:36:06. Total running time: 1hr 15min 45s

[2m[36m(_objective pid=34244)[0m {'train_runtime': 411.7314, 'train_samples_per_second': 120.846, 'train_steps_per_second': 30.214, 'train_loss': 0.1538364690983027, 'epoch': 4.0}


[2m[36m(_objective pid=34244)[0m                                                      100%|██████████| 12440/12440 [06:51<00:00, 35.67it/s]100%|██████████| 12440/12440 [06:51<00:00, 30.21it/s]


Trial _objective_f556c_00010 started with configuration:
+-------------------------------------------------+
| Trial _objective_f556c_00010 config             |
+-------------------------------------------------+
| adam_epsilon                                  0 |
| learning_rate                             3e-05 |
| num_train_epochs                              3 |
| per_device_eval_batch_size                   32 |
| per_device_train_batch_size                   4 |
| weight_decay                            0.11732 |
+-------------------------------------------------+



[2m[36m(_objective pid=36073)[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_projector.bias']
[2m[36m(_objective pid=36073)[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
[2m[36m(_objective pid=36073)[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[2m[36m(_objective pid=36073)[0m Some weights of DistilBertForSequenceClassification were not initialized from the model che

Trial status: 10 TERMINATED | 1 RUNNING | 9 PENDING
Current time: 2023-09-11 14:36:26. Total running time: 1hr 16min 5s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00010   RUNNING          2.98071e-05                    3                 

[2m[36m(_objective pid=36073)[0m   3%|▎         | 324/9330 [00:09<04:41, 31.94it/s]
  4%|▎         | 328/9330 [00:09<05:03, 29.68it/s]
  4%|▎         | 332/9330 [00:10<04:47, 31.34it/s]
  4%|▎         | 336/9330 [00:10<04:41, 31.95it/s]
  4%|▎         | 340/9330 [00:10<04:31, 33.12it/s]
  4%|▎         | 344/9330 [00:10<04:56, 30.27it/s]
  4%|▎         | 348/9330 [00:10<04:54, 30.52it/s]
  4%|▍         | 352/9330 [00:10<04:50, 30.86it/s]
  4%|▍         | 356/9330 [00:10<04:40, 32.00it/s]
  4%|▍         | 360/9330 [00:10<04:32, 32.87it/s]
  4%|▍         | 364/9330 [00:11<04:47, 31.20it/s]
  4%|▍         | 368/9330 [00:11<04:44, 31.51it/s]
  4%|▍         | 372/9330 [00:11<04:27, 33.50it/s]
  4%|▍         | 376/9330 [00:11<04:16, 34.97it/s]
  4%|▍         | 380/9330 [00:11<04:16, 34.90it/s]
  4%|▍         | 384/9330 [00:11<04:10, 35.64it/s]
  4%|▍         | 388/9330 [00:11<04:08, 35.93it/s]
  4%|▍         | 392/9330 [00:11<04:07, 36.10it/s]
  4%|▍         | 396/9330 [00:12<04:12, 35.38

[2m[36m(_objective pid=36073)[0m {'loss': 0.3986, 'learning_rate': 2.820969824772783e-05, 'epoch': 0.16}


[2m[36m(_objective pid=36073)[0m   5%|▌         | 500/9330 [00:15<04:30, 32.59it/s]                                                    5%|▌         | 500/9330 [00:15<04:30, 32.59it/s]
  5%|▌         | 504/9330 [00:15<04:27, 33.05it/s]
  5%|▌         | 508/9330 [00:15<04:36, 31.88it/s]
  5%|▌         | 512/9330 [00:15<04:38, 31.69it/s]
  6%|▌         | 516/9330 [00:15<04:32, 32.35it/s]
  6%|▌         | 520/9330 [00:15<04:24, 33.35it/s]
  6%|▌         | 524/9330 [00:15<04:15, 34.48it/s]
  6%|▌         | 528/9330 [00:15<04:10, 35.08it/s]
  6%|▌         | 532/9330 [00:16<04:12, 34.83it/s]
  6%|▌         | 536/9330 [00:16<04:24, 33.27it/s]
  6%|▌         | 540/9330 [00:16<04:35, 31.94it/s]
  6%|▌         | 544/9330 [00:16<04:19, 33.82it/s]
  6%|▌         | 548/9330 [00:16<04:08, 35.36it/s]
  6%|▌         | 552/9330 [00:16<04:01, 36.37it/s]
  6%|▌         | 556/9330 [00:16<04:08, 35.31it/s]
  6%|▌         | 560/9330 [00:16<04:02, 36.10it/s]
  6%|▌         | 564/9330 [00:16<04:01, 36.3

[2m[36m(_objective pid=36073)[0m {'loss': 0.3286, 'learning_rate': 2.6612320091004848e-05, 'epoch': 0.32}


[2m[36m(_objective pid=36073)[0m                                                    11%|█         | 1000/9330 [00:29<03:47, 36.62it/s] 11%|█         | 1001/9330 [00:29<03:46, 36.82it/s]
 11%|█         | 1005/9330 [00:30<03:47, 36.56it/s]
 11%|█         | 1009/9330 [00:30<03:47, 36.65it/s]
 11%|█         | 1013/9330 [00:30<03:47, 36.63it/s]
 11%|█         | 1017/9330 [00:30<03:47, 36.53it/s]
 11%|█         | 1021/9330 [00:30<03:50, 36.11it/s]
 11%|█         | 1025/9330 [00:30<03:59, 34.61it/s]
 11%|█         | 1029/9330 [00:30<03:52, 35.66it/s]
 11%|█         | 1033/9330 [00:30<03:51, 35.86it/s]
 11%|█         | 1037/9330 [00:30<03:55, 35.19it/s]
 11%|█         | 1041/9330 [00:31<03:51, 35.75it/s]
 11%|█         | 1045/9330 [00:31<03:49, 36.07it/s]
 11%|█         | 1049/9330 [00:31<03:48, 36.17it/s]
 11%|█▏        | 1053/9330 [00:31<04:02, 34.15it/s]
 11%|█▏        | 1057/9330 [00:31<04:01, 34.33it/s]
 11%|█▏        | 1061/9330 [00:31<04:00, 34.38it/s]
 11%|█▏        | 1065/9330 

Trial status: 10 TERMINATED | 1 RUNNING | 9 PENDING
Current time: 2023-09-11 14:36:56. Total running time: 1hr 16min 35s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00010   RUNNING          2.98071e-05                    3                

 14%|█▍        | 1345/9330 [00:40<04:06, 32.33it/s]
 14%|█▍        | 1349/9330 [00:40<04:20, 30.59it/s]
 15%|█▍        | 1353/9330 [00:40<04:18, 30.89it/s]
 15%|█▍        | 1357/9330 [00:40<04:18, 30.90it/s]
 15%|█▍        | 1361/9330 [00:40<04:14, 31.37it/s]
 15%|█▍        | 1365/9330 [00:40<04:03, 32.71it/s]
 15%|█▍        | 1369/9330 [00:40<03:59, 33.29it/s]
 15%|█▍        | 1373/9330 [00:40<03:54, 33.92it/s]
 15%|█▍        | 1377/9330 [00:41<03:52, 34.28it/s]
 15%|█▍        | 1381/9330 [00:41<04:01, 32.85it/s]
 15%|█▍        | 1385/9330 [00:41<04:02, 32.79it/s]
 15%|█▍        | 1389/9330 [00:41<03:56, 33.60it/s]
 15%|█▍        | 1393/9330 [00:41<04:13, 31.33it/s]
 15%|█▍        | 1397/9330 [00:41<04:31, 29.19it/s]
 15%|█▌        | 1400/9330 [00:41<04:45, 27.80it/s]
 15%|█▌        | 1403/9330 [00:41<04:59, 26.44it/s]
 15%|█▌        | 1407/9330 [00:42<04:30, 29.28it/s]
 15%|█▌        | 1411/9330 [00:42<04:18, 30.65it/s]
 15%|█▌        | 1415/9330 [00:42<04:24, 29.92it/s]
 15%|█▌     

[2m[36m(_objective pid=36073)[0m {'loss': 0.2943, 'learning_rate': 2.501494193428187e-05, 'epoch': 0.48}


[2m[36m(_objective pid=36073)[0m  16%|█▌        | 1503/9330 [00:44<03:34, 36.43it/s]
 16%|█▌        | 1507/9330 [00:44<03:32, 36.79it/s]
 16%|█▌        | 1511/9330 [00:45<03:32, 36.74it/s]
 16%|█▌        | 1515/9330 [00:45<03:32, 36.84it/s]
 16%|█▋        | 1519/9330 [00:45<03:30, 37.10it/s]
 16%|█▋        | 1523/9330 [00:45<03:39, 35.62it/s]
 16%|█▋        | 1527/9330 [00:45<03:34, 36.44it/s]
 16%|█▋        | 1531/9330 [00:45<03:31, 36.86it/s]
 16%|█▋        | 1535/9330 [00:45<03:30, 37.12it/s]
 16%|█▋        | 1539/9330 [00:45<03:30, 37.02it/s]
 17%|█▋        | 1543/9330 [00:46<03:52, 33.56it/s]
 17%|█▋        | 1547/9330 [00:46<03:44, 34.74it/s]
 17%|█▋        | 1551/9330 [00:46<03:45, 34.54it/s]
 17%|█▋        | 1555/9330 [00:46<04:14, 30.57it/s]
 17%|█▋        | 1559/9330 [00:46<04:22, 29.58it/s]
 17%|█▋        | 1563/9330 [00:46<04:03, 31.93it/s]
 17%|█▋        | 1567/9330 [00:46<03:50, 33.65it/s]
 17%|█▋        | 1571/9330 [00:46<03:55, 32.97it/s]
 17%|█▋        | 1575/9330 

[2m[36m(_objective pid=36073)[0m {'loss': 0.2324, 'learning_rate': 2.3417563777558888e-05, 'epoch': 0.64}


[2m[36m(_objective pid=36073)[0m  21%|██▏       | 2003/9330 [00:59<03:22, 36.13it/s]
 22%|██▏       | 2007/9330 [01:00<03:27, 35.30it/s]
 22%|██▏       | 2011/9330 [01:00<03:27, 35.29it/s]
 22%|██▏       | 2015/9330 [01:00<03:26, 35.48it/s]
 22%|██▏       | 2019/9330 [01:00<03:44, 32.55it/s]
 22%|██▏       | 2023/9330 [01:00<03:48, 31.93it/s]
 22%|██▏       | 2027/9330 [01:00<04:05, 29.80it/s]
 22%|██▏       | 2031/9330 [01:00<03:48, 31.90it/s]
 22%|██▏       | 2035/9330 [01:00<03:37, 33.57it/s]
 22%|██▏       | 2039/9330 [01:01<03:30, 34.68it/s]
 22%|██▏       | 2043/9330 [01:01<03:52, 31.36it/s]
 22%|██▏       | 2047/9330 [01:01<03:42, 32.67it/s]
 22%|██▏       | 2051/9330 [01:01<03:38, 33.38it/s]
 22%|██▏       | 2055/9330 [01:01<04:06, 29.50it/s]
 22%|██▏       | 2059/9330 [01:01<03:50, 31.61it/s]
 22%|██▏       | 2063/9330 [01:01<03:44, 32.33it/s]
 22%|██▏       | 2067/9330 [01:01<03:35, 33.68it/s]
 22%|██▏       | 2071/9330 [01:02<03:26, 35.16it/s]
 22%|██▏       | 2075/9330 

Trial status: 10 TERMINATED | 1 RUNNING | 9 PENDING
Current time: 2023-09-11 14:37:26. Total running time: 1hr 17min 5s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00010   RUNNING          2.98071e-05                    3                 

[2m[36m(_objective pid=36073)[0m  25%|██▍       | 2331/9330 [01:09<03:36, 32.36it/s]
 25%|██▌       | 2335/9330 [01:10<03:30, 33.21it/s]
 25%|██▌       | 2339/9330 [01:10<03:45, 31.04it/s]
 25%|██▌       | 2343/9330 [01:10<03:51, 30.23it/s]
 25%|██▌       | 2347/9330 [01:10<03:39, 31.81it/s]
 25%|██▌       | 2351/9330 [01:10<03:37, 32.02it/s]
 25%|██▌       | 2355/9330 [01:10<03:27, 33.65it/s]
 25%|██▌       | 2359/9330 [01:10<03:19, 35.03it/s]
 25%|██▌       | 2363/9330 [01:10<03:14, 35.77it/s]
 25%|██▌       | 2367/9330 [01:10<03:29, 33.30it/s]
 25%|██▌       | 2371/9330 [01:11<03:36, 32.15it/s]
 25%|██▌       | 2375/9330 [01:11<03:40, 31.59it/s]
 25%|██▌       | 2379/9330 [01:11<03:33, 32.53it/s]
 26%|██▌       | 2383/9330 [01:11<03:55, 29.54it/s]
 26%|██▌       | 2387/9330 [01:11<03:37, 31.86it/s]
 26%|██▌       | 2391/9330 [01:11<03:33, 32.51it/s]
 26%|██▌       | 2395/9330 [01:11<03:23, 34.09it/s]
 26%|██▌       | 2399/9330 [01:11<03:24, 33.82it/s]
 26%|██▌       | 2403/9330 

[2m[36m(_objective pid=36073)[0m {'loss': 0.2144, 'learning_rate': 2.182018562083591e-05, 'epoch': 0.8}


 27%|██▋       | 2503/9330 [01:15<03:20, 34.11it/s]
 27%|██▋       | 2507/9330 [01:15<03:38, 31.16it/s]
 27%|██▋       | 2511/9330 [01:15<03:42, 30.59it/s]
 27%|██▋       | 2515/9330 [01:15<03:51, 29.39it/s]
 27%|██▋       | 2519/9330 [01:15<03:38, 31.19it/s]
 27%|██▋       | 2523/9330 [01:15<03:26, 32.89it/s]
 27%|██▋       | 2527/9330 [01:15<03:25, 33.14it/s]
 27%|██▋       | 2531/9330 [01:16<03:30, 32.27it/s]
 27%|██▋       | 2535/9330 [01:16<03:27, 32.68it/s]
 27%|██▋       | 2539/9330 [01:16<03:18, 34.16it/s]
 27%|██▋       | 2543/9330 [01:16<03:17, 34.37it/s]
 27%|██▋       | 2547/9330 [01:16<03:11, 35.35it/s]
 27%|██▋       | 2551/9330 [01:16<03:11, 35.48it/s]
 27%|██▋       | 2555/9330 [01:16<03:08, 35.97it/s]
 27%|██▋       | 2559/9330 [01:16<03:05, 36.47it/s]
 27%|██▋       | 2563/9330 [01:16<03:06, 36.35it/s]
 28%|██▊       | 2567/9330 [01:17<03:23, 33.22it/s]
 28%|██▊       | 2571/9330 [01:17<03:19, 33.89it/s]
 28%|██▊       | 2575/9330 [01:17<03:13, 34.95it/s]
 28%|██▊    

[2m[36m(_objective pid=36073)[0m {'loss': 0.2229, 'learning_rate': 2.0222807464112928e-05, 'epoch': 0.96}


[2m[36m(_objective pid=36073)[0m  32%|███▏      | 2998/9330 [01:30<03:35, 29.36it/s]                                                    32%|███▏      | 3000/9330 [01:30<03:35, 29.36it/s]
 32%|███▏      | 3001/9330 [01:30<03:41, 28.56it/s]
 32%|███▏      | 3005/9330 [01:30<03:43, 28.31it/s]
 32%|███▏      | 3009/9330 [01:30<03:27, 30.40it/s]
 32%|███▏      | 3013/9330 [01:30<03:19, 31.67it/s]
 32%|███▏      | 3017/9330 [01:30<03:11, 32.91it/s]
 32%|███▏      | 3021/9330 [01:30<03:07, 33.73it/s]
 32%|███▏      | 3025/9330 [01:31<03:05, 33.92it/s]
 32%|███▏      | 3029/9330 [01:31<03:23, 30.96it/s]
 33%|███▎      | 3033/9330 [01:31<03:12, 32.67it/s]
 33%|███▎      | 3037/9330 [01:31<03:04, 34.09it/s]
 33%|███▎      | 3041/9330 [01:31<03:07, 33.61it/s]
 33%|███▎      | 3045/9330 [01:31<02:59, 35.02it/s]
 33%|███▎      | 3049/9330 [01:31<02:57, 35.40it/s]
 33%|███▎      | 3053/9330 [01:31<02:55, 35.72it/s]
 33%|███▎      | 3057/9330 [01:31<02:53, 36.25it/s]
 33%|███▎      | 3061/9330

Trial status: 10 TERMINATED | 1 RUNNING | 9 PENDING
Current time: 2023-09-11 14:37:56. Total running time: 1hr 17min 35s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00010   RUNNING          2.98071e-05                    3                

[2m[36m(_objective pid=36073)[0m 
[2m[36m(_objective pid=36073)[0m  71%|███████   | 92/130 [00:06<00:03, 12.52it/s][A
[2m[36m(_objective pid=36073)[0m 
 72%|███████▏  | 94/130 [00:06<00:02, 13.73it/s][A
[2m[36m(_objective pid=36073)[0m 
 75%|███████▍  | 97/130 [00:06<00:02, 14.35it/s][A
[2m[36m(_objective pid=36073)[0m 
 77%|███████▋  | 100/130 [00:07<00:01, 16.92it/s][A
[2m[36m(_objective pid=36073)[0m 
 78%|███████▊  | 102/130 [00:07<00:01, 14.80it/s][A
[2m[36m(_objective pid=36073)[0m 
 80%|████████  | 104/130 [00:07<00:01, 13.18it/s][A
[2m[36m(_objective pid=36073)[0m 
 82%|████████▏ | 106/130 [00:07<00:01, 14.30it/s][A
[2m[36m(_objective pid=36073)[0m 
 84%|████████▍ | 109/130 [00:07<00:01, 13.61it/s][A
[2m[36m(_objective pid=36073)[0m 
 85%|████████▌ | 111/130 [00:07<00:01, 11.82it/s][A
[2m[36m(_objective pid=36073)[0m 
 87%|████████▋ | 113/130 [00:08<00:01, 11.35it/s][A
[2m[36m(_objective pid=36073)[0m 
 89%|████████▉ | 116/130 [00:

Trial _objective_f556c_00010 finished iteration 1 at 2023-09-11 14:37:59. Total running time: 1hr 17min 38s
+-------------------------------------------------+
| Trial _objective_f556c_00010 result             |
+-------------------------------------------------+
| time_this_iter_s                        105.377 |
| time_total_s                            105.377 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.22786 |
| eval_runtime                             9.5938 |
| eval_samples_per_second                 432.259 |
| eval_steps_per_second                     13.55 |
| objective                               0.22786 |
+-------------------------------------------------+

[2m[36m(_objective pid=36073)[0m {'eval_loss': 0.22785760462284088, 'eval_runtime': 9.5938, 'eval_samples_per_second': 432.259, 'eval_steps_per_second': 13.55, 'epoch': 1.0}


[2m[36m(_objective pid=36073)[0m                                                    
[2m[36m(_objective pid=36073)[0m                                                  [A 33%|███▎      | 3110/9330 [01:43<03:00, 34.42it/s]
[2m[36m(_objective pid=36073)[0m 100%|██████████| 130/130 [00:09<00:00, 11.98it/s][A
                                                 [A
 33%|███▎      | 3113/9330 [01:44<1:27:35,  1.18it/s]
 33%|███▎      | 3117/9330 [01:44<1:02:13,  1.66it/s]
 33%|███▎      | 3121/9330 [01:44<45:04,  2.30it/s]  
 33%|███▎      | 3125/9330 [01:44<32:23,  3.19it/s]
 34%|███▎      | 3129/9330 [01:45<23:36,  4.38it/s]
 34%|███▎      | 3133/9330 [01:45<17:28,  5.91it/s]
 34%|███▎      | 3137/9330 [01:45<13:08,  7.86it/s]
 34%|███▎      | 3141/9330 [01:45<10:09, 10.15it/s]
 34%|███▎      | 3145/9330 [01:45<08:01, 12.84it/s]
 34%|███▍      | 3149/9330 [01:45<06:31, 15.77it/s]
 34%|███▍      | 3153/9330 [01:45<05:28, 18.82it/s]
 34%|███▍      | 3157/9330 [01:45<05:05, 20.23i

[2m[36m(_objective pid=36073)[0m {'loss': 0.1553, 'learning_rate': 1.862542930738995e-05, 'epoch': 1.13}


 38%|███▊      | 3505/9330 [01:56<02:44, 35.51it/s]
 38%|███▊      | 3509/9330 [01:56<02:44, 35.43it/s]
 38%|███▊      | 3513/9330 [01:56<02:49, 34.24it/s]
 38%|███▊      | 3517/9330 [01:56<02:56, 32.99it/s]
 38%|███▊      | 3521/9330 [01:56<02:50, 34.00it/s]
 38%|███▊      | 3525/9330 [01:56<03:04, 31.54it/s]
 38%|███▊      | 3529/9330 [01:57<03:06, 31.11it/s]
 38%|███▊      | 3533/9330 [01:57<02:57, 32.58it/s]
 38%|███▊      | 3537/9330 [01:57<03:06, 31.13it/s]
 38%|███▊      | 3541/9330 [01:57<02:58, 32.50it/s]
 38%|███▊      | 3545/9330 [01:57<02:56, 32.83it/s]
 38%|███▊      | 3549/9330 [01:57<02:48, 34.33it/s]
 38%|███▊      | 3553/9330 [01:57<02:45, 34.99it/s]
 38%|███▊      | 3557/9330 [01:57<02:39, 36.10it/s]
 38%|███▊      | 3561/9330 [01:57<02:47, 34.52it/s]
 38%|███▊      | 3565/9330 [01:58<02:53, 33.14it/s]
 38%|███▊      | 3569/9330 [01:58<02:52, 33.31it/s]
 38%|███▊      | 3573/9330 [01:58<03:08, 30.47it/s]
 38%|███▊      | 3577/9330 [01:58<02:58, 32.18it/s]
 38%|███▊   

Trial status: 10 TERMINATED | 1 RUNNING | 9 PENDING
Current time: 2023-09-11 14:38:26. Total running time: 1hr 18min 5s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00010   RUNNING          2.98071e-05                    3                 

[2m[36m(_objective pid=36073)[0m  42%|████▏     | 3965/9330 [02:10<02:30, 35.73it/s]
 43%|████▎     | 3969/9330 [02:10<02:39, 33.68it/s]
 43%|████▎     | 3973/9330 [02:10<02:54, 30.70it/s]
 43%|████▎     | 3977/9330 [02:10<02:47, 32.03it/s]
 43%|████▎     | 3981/9330 [02:10<02:38, 33.80it/s]
 43%|████▎     | 3985/9330 [02:10<02:51, 31.25it/s]
 43%|████▎     | 3989/9330 [02:10<02:45, 32.36it/s]
 43%|████▎     | 3993/9330 [02:11<02:47, 31.83it/s]
 43%|████▎     | 3997/9330 [02:11<02:45, 32.28it/s]


[2m[36m(_objective pid=36073)[0m {'loss': 0.1355, 'learning_rate': 1.7028051150666968e-05, 'epoch': 1.29}


 43%|████▎     | 4001/9330 [02:11<02:39, 33.34it/s]
 43%|████▎     | 4005/9330 [02:11<02:57, 30.07it/s]
 43%|████▎     | 4009/9330 [02:11<03:04, 28.81it/s]
 43%|████▎     | 4013/9330 [02:11<02:51, 30.94it/s]
 43%|████▎     | 4017/9330 [02:11<02:43, 32.55it/s]
 43%|████▎     | 4021/9330 [02:11<02:35, 34.19it/s]
 43%|████▎     | 4025/9330 [02:12<02:34, 34.38it/s]
 43%|████▎     | 4029/9330 [02:12<02:30, 35.28it/s]
 43%|████▎     | 4033/9330 [02:12<02:27, 35.84it/s]
 43%|████▎     | 4037/9330 [02:12<02:29, 35.45it/s]
 43%|████▎     | 4041/9330 [02:12<02:27, 35.77it/s]
 43%|████▎     | 4045/9330 [02:12<02:25, 36.35it/s]
 43%|████▎     | 4049/9330 [02:12<02:24, 36.62it/s]
 43%|████▎     | 4053/9330 [02:12<02:23, 36.80it/s]
 43%|████▎     | 4057/9330 [02:12<02:24, 36.61it/s]
 44%|████▎     | 4061/9330 [02:12<02:23, 36.62it/s]
 44%|████▎     | 4065/9330 [02:13<02:20, 37.56it/s]
 44%|████▎     | 4069/9330 [02:13<02:20, 37.52it/s]
 44%|████▎     | 4073/9330 [02:13<02:20, 37.36it/s]
 44%|████▎  

[2m[36m(_objective pid=36073)[0m {'loss': 0.1414, 'learning_rate': 1.543067299394399e-05, 'epoch': 1.45}


[2m[36m(_objective pid=36073)[0m  48%|████▊     | 4502/9330 [02:25<02:33, 31.47it/s]
 48%|████▊     | 4506/9330 [02:26<02:24, 33.27it/s]
 48%|████▊     | 4510/9330 [02:26<02:21, 34.04it/s]
 48%|████▊     | 4514/9330 [02:26<02:20, 34.32it/s]
 48%|████▊     | 4518/9330 [02:26<02:16, 35.33it/s]
 48%|████▊     | 4522/9330 [02:26<02:13, 36.00it/s]
 49%|████▊     | 4526/9330 [02:26<02:10, 36.92it/s]
 49%|████▊     | 4530/9330 [02:26<02:10, 36.88it/s]
 49%|████▊     | 4534/9330 [02:26<02:23, 33.44it/s]
 49%|████▊     | 4538/9330 [02:27<02:25, 32.86it/s]
 49%|████▊     | 4542/9330 [02:27<02:20, 34.12it/s]
 49%|████▊     | 4546/9330 [02:27<02:25, 32.91it/s]
 49%|████▉     | 4550/9330 [02:27<02:19, 34.37it/s]
 49%|████▉     | 4554/9330 [02:27<02:14, 35.47it/s]
 49%|████▉     | 4558/9330 [02:27<02:26, 32.61it/s]
 49%|████▉     | 4562/9330 [02:27<02:23, 33.16it/s]
 49%|████▉     | 4566/9330 [02:27<02:19, 34.13it/s]
 49%|████▉     | 4570/9330 [02:27<02:14, 35.26it/s]
 49%|████▉     | 4574/9330 

Trial status: 10 TERMINATED | 1 RUNNING | 9 PENDING
Current time: 2023-09-11 14:38:56. Total running time: 1hr 18min 35s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00010   RUNNING          2.98071e-05                    3                

[2m[36m(_objective pid=36073)[0m  53%|█████▎    | 4974/9330 [02:40<02:23, 30.37it/s]
 53%|█████▎    | 4978/9330 [02:40<02:16, 31.86it/s]
 53%|█████▎    | 4982/9330 [02:40<02:10, 33.31it/s]
 53%|█████▎    | 4986/9330 [02:40<02:19, 31.06it/s]
 53%|█████▎    | 4990/9330 [02:40<02:34, 28.18it/s]
 54%|█████▎    | 4994/9330 [02:40<02:20, 30.88it/s]
 54%|█████▎    | 4998/9330 [02:40<02:11, 33.02it/s]
 54%|█████▎    | 5002/9330 [02:40<02:05, 34.60it/s]


[2m[36m(_objective pid=36073)[0m {'loss': 0.1381, 'learning_rate': 1.3833294837221006e-05, 'epoch': 1.61}


[2m[36m(_objective pid=36073)[0m  54%|█████▎    | 5006/9330 [02:41<02:00, 35.90it/s]
 54%|█████▎    | 5010/9330 [02:41<01:58, 36.53it/s]
 54%|█████▎    | 5014/9330 [02:41<01:56, 37.10it/s]
 54%|█████▍    | 5018/9330 [02:41<02:04, 34.77it/s]
 54%|█████▍    | 5022/9330 [02:41<02:15, 31.78it/s]
 54%|█████▍    | 5026/9330 [02:41<02:07, 33.67it/s]
 54%|█████▍    | 5030/9330 [02:41<02:16, 31.48it/s]
 54%|█████▍    | 5034/9330 [02:41<02:10, 32.94it/s]
 54%|█████▍    | 5038/9330 [02:41<02:04, 34.61it/s]
 54%|█████▍    | 5042/9330 [02:42<02:01, 35.22it/s]
 54%|█████▍    | 5046/9330 [02:42<02:04, 34.35it/s]
 54%|█████▍    | 5050/9330 [02:42<02:18, 30.96it/s]
 54%|█████▍    | 5054/9330 [02:42<02:17, 31.17it/s]
 54%|█████▍    | 5058/9330 [02:42<02:25, 29.39it/s]
 54%|█████▍    | 5062/9330 [02:42<02:14, 31.62it/s]
 54%|█████▍    | 5066/9330 [02:42<02:12, 32.20it/s]
 54%|█████▍    | 5070/9330 [02:42<02:06, 33.78it/s]
 54%|█████▍    | 5074/9330 [02:43<02:01, 35.11it/s]
 54%|█████▍    | 5078/9330 

[2m[36m(_objective pid=36073)[0m {'loss': 0.1573, 'learning_rate': 1.2235916680498026e-05, 'epoch': 1.77}


[2m[36m(_objective pid=36073)[0m  59%|█████▉    | 5505/9330 [02:56<01:46, 36.07it/s]
 59%|█████▉    | 5509/9330 [02:56<01:49, 34.95it/s]
 59%|█████▉    | 5513/9330 [02:56<01:46, 35.76it/s]
 59%|█████▉    | 5517/9330 [02:56<01:44, 36.51it/s]
 59%|█████▉    | 5521/9330 [02:56<01:52, 33.78it/s]
 59%|█████▉    | 5525/9330 [02:56<01:49, 34.85it/s]
 59%|█████▉    | 5529/9330 [02:56<01:45, 36.01it/s]
 59%|█████▉    | 5533/9330 [02:57<01:43, 36.81it/s]
 59%|█████▉    | 5537/9330 [02:57<01:50, 34.45it/s]
 59%|█████▉    | 5541/9330 [02:57<01:46, 35.66it/s]
 59%|█████▉    | 5545/9330 [02:57<01:43, 36.58it/s]
 59%|█████▉    | 5549/9330 [02:57<01:46, 35.55it/s]
 60%|█████▉    | 5553/9330 [02:57<01:43, 36.41it/s]
 60%|█████▉    | 5557/9330 [02:57<01:54, 33.01it/s]
 60%|█████▉    | 5561/9330 [02:57<01:50, 34.10it/s]
 60%|█████▉    | 5565/9330 [02:57<01:50, 34.13it/s]
 60%|█████▉    | 5569/9330 [02:58<01:47, 34.97it/s]
 60%|█████▉    | 5573/9330 [02:58<01:50, 33.96it/s]
 60%|█████▉    | 5577/9330 

Trial status: 10 TERMINATED | 1 RUNNING | 9 PENDING
Current time: 2023-09-11 14:39:26. Total running time: 1hr 19min 5s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00010   RUNNING          2.98071e-05                    3                 

[2m[36m(_objective pid=36073)[0m  64%|██████▍   | 5960/9330 [03:10<01:38, 34.10it/s]
 64%|██████▍   | 5964/9330 [03:10<01:39, 33.77it/s]
 64%|██████▍   | 5968/9330 [03:10<01:37, 34.39it/s]
 64%|██████▍   | 5972/9330 [03:10<01:35, 35.25it/s]
 64%|██████▍   | 5976/9330 [03:10<01:33, 35.93it/s]
 64%|██████▍   | 5980/9330 [03:10<01:37, 34.20it/s]
 64%|██████▍   | 5984/9330 [03:10<01:34, 35.31it/s]
 64%|██████▍   | 5988/9330 [03:10<01:34, 35.43it/s]
 64%|██████▍   | 5992/9330 [03:11<01:31, 36.61it/s]
 64%|██████▍   | 5996/9330 [03:11<01:30, 37.04it/s]


[2m[36m(_objective pid=36073)[0m {'loss': 0.1376, 'learning_rate': 1.0638538523775046e-05, 'epoch': 1.93}


[2m[36m(_objective pid=36073)[0m  64%|██████▍   | 6000/9330 [03:11<01:29, 37.09it/s]                                                    64%|██████▍   | 6000/9330 [03:11<01:29, 37.09it/s]
 64%|██████▍   | 6004/9330 [03:11<01:31, 36.43it/s]
 64%|██████▍   | 6009/9330 [03:11<01:29, 37.30it/s]
 64%|██████▍   | 6013/9330 [03:11<01:33, 35.35it/s]
 64%|██████▍   | 6017/9330 [03:11<01:33, 35.35it/s]
 65%|██████▍   | 6021/9330 [03:11<01:31, 36.05it/s]
 65%|██████▍   | 6025/9330 [03:11<01:29, 36.88it/s]
 65%|██████▍   | 6029/9330 [03:12<01:37, 34.00it/s]
 65%|██████▍   | 6033/9330 [03:12<01:33, 35.35it/s]
 65%|██████▍   | 6037/9330 [03:12<01:31, 35.94it/s]
 65%|██████▍   | 6041/9330 [03:12<01:31, 35.94it/s]
 65%|██████▍   | 6045/9330 [03:12<01:30, 36.32it/s]
 65%|██████▍   | 6049/9330 [03:12<01:40, 32.60it/s]
 65%|██████▍   | 6053/9330 [03:12<01:49, 29.94it/s]
 65%|██████▍   | 6057/9330 [03:12<01:41, 32.14it/s]
 65%|██████▍   | 6061/9330 [03:13<01:41, 32.23it/s]
 65%|██████▌   | 6065/9330

Trial _objective_f556c_00010 finished iteration 2 at 2023-09-11 14:39:43. Total running time: 1hr 19min 22s
+-------------------------------------------------+
| Trial _objective_f556c_00010 result             |
+-------------------------------------------------+
| time_this_iter_s                        104.389 |
| time_total_s                            209.766 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                                0.1861 |
| eval_runtime                              9.606 |
| eval_samples_per_second                 431.711 |
| eval_steps_per_second                    13.533 |
| objective                                0.1861 |
+-------------------------------------------------+

[2m[36m(_objective pid=36073)[0m {'eval_loss': 0.18610218167304993, 'eval_runtime': 9.606, 'eval_samples_per_second': 431.711, 'eval_steps_per_second': 13.533, 'epoch': 2.0}


[2m[36m(_objective pid=36073)[0m                                                    
[2m[36m(_objective pid=36073)[0m                                                  [A 67%|██████▋   | 6220/9330 [03:27<01:32, 33.80it/s]
[2m[36m(_objective pid=36073)[0m 100%|██████████| 130/130 [00:09<00:00, 11.99it/s][A
                                                 [A
 67%|██████▋   | 6223/9330 [03:28<43:49,  1.18it/s]
 67%|██████▋   | 6227/9330 [03:28<31:03,  1.67it/s]
 67%|██████▋   | 6231/9330 [03:29<22:08,  2.33it/s]
 67%|██████▋   | 6235/9330 [03:29<15:53,  3.25it/s]
 67%|██████▋   | 6239/9330 [03:29<11:38,  4.43it/s]
 67%|██████▋   | 6243/9330 [03:29<08:43,  5.90it/s]
 67%|██████▋   | 6247/9330 [03:29<06:35,  7.80it/s]
 67%|██████▋   | 6251/9330 [03:29<05:06, 10.05it/s]
 67%|██████▋   | 6255/9330 [03:29<03:58, 12.90it/s]
 67%|██████▋   | 6259/9330 [03:29<03:12, 15.91it/s]
 67%|██████▋   | 6263/9330 [03:30<02:43, 18.78it/s]
 67%|██████▋   | 6267/9330 [03:30<02:19, 22.02it/s]
 

[2m[36m(_objective pid=36073)[0m {'loss': 0.1206, 'learning_rate': 9.041160367052066e-06, 'epoch': 2.09}


[2m[36m(_objective pid=36073)[0m  70%|██████▉   | 6503/9330 [03:37<01:26, 32.74it/s]
 70%|██████▉   | 6507/9330 [03:37<01:26, 32.64it/s]
 70%|██████▉   | 6511/9330 [03:37<01:24, 33.26it/s]
 70%|██████▉   | 6515/9330 [03:37<01:30, 31.10it/s]
 70%|██████▉   | 6519/9330 [03:37<01:25, 32.69it/s]
 70%|██████▉   | 6523/9330 [03:37<01:33, 30.14it/s]
 70%|██████▉   | 6527/9330 [03:37<01:31, 30.52it/s]
 70%|███████   | 6531/9330 [03:38<01:36, 28.89it/s]
 70%|███████   | 6535/9330 [03:38<01:31, 30.61it/s]
 70%|███████   | 6539/9330 [03:38<01:26, 32.15it/s]
 70%|███████   | 6543/9330 [03:38<01:25, 32.62it/s]
 70%|███████   | 6547/9330 [03:38<01:23, 33.14it/s]
 70%|███████   | 6551/9330 [03:38<01:28, 31.46it/s]
 70%|███████   | 6555/9330 [03:38<01:25, 32.47it/s]
 70%|███████   | 6559/9330 [03:38<01:23, 33.02it/s]
 70%|███████   | 6563/9330 [03:39<01:24, 32.80it/s]
 70%|███████   | 6567/9330 [03:39<01:22, 33.53it/s]
 70%|███████   | 6571/9330 [03:39<01:21, 33.94it/s]
 70%|███████   | 6575/9330 

Trial status: 10 TERMINATED | 1 RUNNING | 9 PENDING
Current time: 2023-09-11 14:39:56. Total running time: 1hr 19min 35s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00010   RUNNING          2.98071e-05                    3                

 71%|███████   | 6603/9330 [03:40<01:27, 31.32it/s]
 71%|███████   | 6607/9330 [03:40<01:24, 32.11it/s]
 71%|███████   | 6611/9330 [03:40<01:20, 33.64it/s]
 71%|███████   | 6615/9330 [03:40<01:19, 34.33it/s]
 71%|███████   | 6619/9330 [03:40<01:18, 34.44it/s]
 71%|███████   | 6623/9330 [03:40<01:22, 32.69it/s]
 71%|███████   | 6627/9330 [03:40<01:24, 31.87it/s]
 71%|███████   | 6631/9330 [03:41<01:22, 32.72it/s]
 71%|███████   | 6639/9330 [03:41<01:15, 35.44it/s]
 71%|███████   | 6643/9330 [03:41<01:15, 35.39it/s]
 71%|███████   | 6647/9330 [03:41<01:14, 36.18it/s]
 71%|███████▏  | 6651/9330 [03:41<01:14, 36.19it/s]
 71%|███████▏  | 6655/9330 [03:41<01:12, 36.65it/s]
 71%|███████▏  | 6659/9330 [03:41<01:20, 33.17it/s]
 71%|███████▏  | 6663/9330 [03:41<01:19, 33.73it/s]
 71%|███████▏  | 6667/9330 [03:42<01:15, 35.05it/s]
 72%|███████▏  | 6671/9330 [03:42<01:13, 36.32it/s]
 72%|███████▏  | 6675/9330 [03:42<01:11, 37.21it/s]
 72%|███████▏  | 6679/9330 [03:42<01:15, 35.28it/s]
 72%|███████

[2m[36m(_objective pid=36073)[0m {'loss': 0.0724, 'learning_rate': 7.443782210329088e-06, 'epoch': 2.25}


[2m[36m(_objective pid=36073)[0m  75%|███████▌  | 7000/9330 [03:51<01:03, 36.57it/s]                                                    75%|███████▌  | 7000/9330 [03:51<01:03, 36.57it/s]
 75%|███████▌  | 7004/9330 [03:51<01:09, 33.35it/s]
 75%|███████▌  | 7008/9330 [03:52<01:13, 31.62it/s]
 75%|███████▌  | 7012/9330 [03:52<01:11, 32.47it/s]
 75%|███████▌  | 7016/9330 [03:52<01:10, 32.69it/s]
 75%|███████▌  | 7020/9330 [03:52<01:08, 33.60it/s]
 75%|███████▌  | 7024/9330 [03:52<01:06, 34.75it/s]
 75%|███████▌  | 7028/9330 [03:52<01:04, 35.42it/s]
 75%|███████▌  | 7032/9330 [03:52<01:15, 30.27it/s]
 75%|███████▌  | 7036/9330 [03:52<01:11, 31.87it/s]
 75%|███████▌  | 7040/9330 [03:53<01:09, 33.17it/s]
 75%|███████▌  | 7044/9330 [03:53<01:07, 34.08it/s]
 76%|███████▌  | 7048/9330 [03:53<01:08, 33.46it/s]
 76%|███████▌  | 7052/9330 [03:53<01:07, 33.62it/s]
 76%|███████▌  | 7056/9330 [03:53<01:06, 34.11it/s]
 76%|███████▌  | 7060/9330 [03:53<01:05, 34.52it/s]
 76%|███████▌  | 7064/9330

[2m[36m(_objective pid=36073)[0m {'loss': 0.0685, 'learning_rate': 5.846404053606107e-06, 'epoch': 2.41}


[2m[36m(_objective pid=36073)[0m  80%|████████  | 7498/9330 [04:07<00:56, 32.69it/s]                                                    80%|████████  | 7500/9330 [04:07<00:55, 32.69it/s]
 80%|████████  | 7502/9330 [04:07<00:54, 33.83it/s]
 80%|████████  | 7506/9330 [04:07<00:53, 33.92it/s]
 80%|████████  | 7510/9330 [04:07<00:53, 34.02it/s]
 81%|████████  | 7514/9330 [04:07<00:53, 34.23it/s]
 81%|████████  | 7518/9330 [04:07<00:51, 35.24it/s]
 81%|████████  | 7522/9330 [04:07<00:53, 33.62it/s]
 81%|████████  | 7526/9330 [04:07<00:55, 32.68it/s]
 81%|████████  | 7530/9330 [04:07<00:52, 34.05it/s]
 81%|████████  | 7534/9330 [04:08<00:57, 31.19it/s]
 81%|████████  | 7538/9330 [04:08<01:00, 29.54it/s]
 81%|████████  | 7542/9330 [04:08<00:58, 30.55it/s]
 81%|████████  | 7546/9330 [04:08<00:55, 32.06it/s]
 81%|████████  | 7550/9330 [04:08<00:57, 30.90it/s]
 81%|████████  | 7554/9330 [04:08<00:54, 32.43it/s]
 81%|████████  | 7558/9330 [04:08<00:54, 32.79it/s]
 81%|████████  | 7562/9330

Trial status: 10 TERMINATED | 1 RUNNING | 9 PENDING
Current time: 2023-09-11 14:40:26. Total running time: 1hr 20min 5s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00010   RUNNING          2.98071e-05                    3                 

[2m[36m(_objective pid=36073)[0m  81%|████████▏ | 7602/9330 [04:10<00:50, 33.92it/s]
 82%|████████▏ | 7606/9330 [04:10<00:50, 33.82it/s]
 82%|████████▏ | 7610/9330 [04:10<00:51, 33.52it/s]
 82%|████████▏ | 7614/9330 [04:10<00:50, 34.26it/s]
 82%|████████▏ | 7618/9330 [04:10<00:51, 33.18it/s]
 82%|████████▏ | 7622/9330 [04:10<00:52, 32.68it/s]
 82%|████████▏ | 7626/9330 [04:10<00:51, 33.15it/s]
 82%|████████▏ | 7630/9330 [04:11<00:49, 34.10it/s]
 82%|████████▏ | 7634/9330 [04:11<00:49, 34.47it/s]
 82%|████████▏ | 7638/9330 [04:11<00:50, 33.65it/s]
 82%|████████▏ | 7642/9330 [04:11<00:50, 33.73it/s]
 82%|████████▏ | 7646/9330 [04:11<00:49, 33.86it/s]
 82%|████████▏ | 7650/9330 [04:11<00:49, 33.89it/s]
 82%|████████▏ | 7654/9330 [04:11<00:51, 32.44it/s]
 82%|████████▏ | 7658/9330 [04:11<00:51, 32.23it/s]
 82%|████████▏ | 7662/9330 [04:12<00:57, 29.24it/s]
 82%|████████▏ | 7666/9330 [04:12<00:54, 30.57it/s]
 82%|████████▏ | 7670/9330 [04:12<00:51, 31.96it/s]
 82%|████████▏ | 7674/9330 

[2m[36m(_objective pid=36073)[0m {'loss': 0.0801, 'learning_rate': 4.249025896883127e-06, 'epoch': 2.57}


[2m[36m(_objective pid=36073)[0m  86%|████████▌ | 8000/9330 [04:22<00:38, 34.24it/s]                                                    86%|████████▌ | 8000/9330 [04:22<00:38, 34.24it/s]
 86%|████████▌ | 8004/9330 [04:22<00:42, 31.55it/s]
 86%|████████▌ | 8008/9330 [04:22<00:39, 33.41it/s]
 86%|████████▌ | 8012/9330 [04:22<00:43, 30.27it/s]
 86%|████████▌ | 8016/9330 [04:22<00:40, 32.41it/s]
 86%|████████▌ | 8020/9330 [04:22<00:38, 33.64it/s]
 86%|████████▌ | 8024/9330 [04:22<00:42, 30.79it/s]
 86%|████████▌ | 8028/9330 [04:23<00:44, 29.24it/s]
 86%|████████▌ | 8032/9330 [04:23<00:41, 30.93it/s]
 86%|████████▌ | 8036/9330 [04:23<00:40, 31.92it/s]
 86%|████████▌ | 8040/9330 [04:23<00:38, 33.67it/s]
 86%|████████▌ | 8044/9330 [04:23<00:37, 34.60it/s]
 86%|████████▋ | 8048/9330 [04:23<00:37, 33.93it/s]
 86%|████████▋ | 8052/9330 [04:23<00:37, 33.89it/s]
 86%|████████▋ | 8056/9330 [04:23<00:39, 32.41it/s]
 86%|████████▋ | 8060/9330 [04:23<00:43, 28.97it/s]
 86%|████████▋ | 8064/9330

[2m[36m(_objective pid=36073)[0m {'loss': 0.0651, 'learning_rate': 2.651647740160147e-06, 'epoch': 2.73}


[2m[36m(_objective pid=36073)[0m  91%|█████████ | 8501/9330 [04:37<00:24, 33.16it/s]
 91%|█████████ | 8505/9330 [04:37<00:24, 33.89it/s]
 91%|█████████ | 8509/9330 [04:37<00:23, 34.44it/s]
 91%|█████████ | 8513/9330 [04:37<00:23, 34.12it/s]
 91%|█████████▏| 8517/9330 [04:37<00:24, 33.82it/s]
 91%|█████████▏| 8521/9330 [04:37<00:24, 33.03it/s]
 91%|█████████▏| 8525/9330 [04:37<00:24, 32.89it/s]
 91%|█████████▏| 8529/9330 [04:38<00:23, 34.48it/s]
 91%|█████████▏| 8533/9330 [04:38<00:22, 35.18it/s]
 92%|█████████▏| 8537/9330 [04:38<00:22, 35.18it/s]
 92%|█████████▏| 8541/9330 [04:38<00:22, 35.44it/s]
 92%|█████████▏| 8545/9330 [04:38<00:21, 35.95it/s]
 92%|█████████▏| 8549/9330 [04:38<00:23, 33.33it/s]
 92%|█████████▏| 8553/9330 [04:38<00:24, 31.09it/s]
 92%|█████████▏| 8557/9330 [04:38<00:23, 33.13it/s]
 92%|█████████▏| 8561/9330 [04:39<00:23, 33.16it/s]
 92%|█████████▏| 8565/9330 [04:39<00:23, 32.70it/s]
 92%|█████████▏| 8569/9330 [04:39<00:22, 33.93it/s]
 92%|█████████▏| 8573/9330 

Trial status: 10 TERMINATED | 1 RUNNING | 9 PENDING
Current time: 2023-09-11 14:40:56. Total running time: 1hr 20min 35s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00010   RUNNING          2.98071e-05                    3                

[2m[36m(_objective pid=36073)[0m  92%|█████████▏| 8605/9330 [04:40<00:20, 34.64it/s]
 92%|█████████▏| 8609/9330 [04:40<00:21, 34.19it/s]
 92%|█████████▏| 8613/9330 [04:40<00:22, 31.61it/s]
 92%|█████████▏| 8617/9330 [04:40<00:21, 33.17it/s]
 92%|█████████▏| 8621/9330 [04:40<00:22, 30.98it/s]
 92%|█████████▏| 8625/9330 [04:40<00:22, 30.82it/s]
 92%|█████████▏| 8629/9330 [04:41<00:21, 32.82it/s]
 93%|█████████▎| 8633/9330 [04:41<00:21, 32.90it/s]
 93%|█████████▎| 8637/9330 [04:41<00:21, 32.75it/s]
 93%|█████████▎| 8641/9330 [04:41<00:20, 34.09it/s]
 93%|█████████▎| 8645/9330 [04:41<00:20, 33.81it/s]
 93%|█████████▎| 8649/9330 [04:41<00:19, 34.88it/s]
 93%|█████████▎| 8653/9330 [04:41<00:19, 34.12it/s]
 93%|█████████▎| 8657/9330 [04:41<00:19, 34.28it/s]
 93%|█████████▎| 8661/9330 [04:41<00:19, 33.70it/s]
 93%|█████████▎| 8665/9330 [04:42<00:19, 34.06it/s]
 93%|█████████▎| 8669/9330 [04:42<00:19, 33.70it/s]
 93%|█████████▎| 8673/9330 [04:42<00:19, 33.95it/s]
 93%|█████████▎| 8677/9330 

[2m[36m(_objective pid=36073)[0m {'loss': 0.0809, 'learning_rate': 1.054269583437167e-06, 'epoch': 2.89}


 96%|█████████▋| 9001/9330 [04:52<00:09, 35.00it/s]
 97%|█████████▋| 9005/9330 [04:52<00:10, 31.61it/s]
 97%|█████████▋| 9009/9330 [04:52<00:09, 32.87it/s]
 97%|█████████▋| 9013/9330 [04:52<00:10, 30.88it/s]
 97%|█████████▋| 9017/9330 [04:53<00:09, 33.02it/s]
 97%|█████████▋| 9021/9330 [04:53<00:09, 31.84it/s]
 97%|█████████▋| 9025/9330 [04:53<00:09, 32.77it/s]
 97%|█████████▋| 9029/9330 [04:53<00:08, 34.41it/s]
 97%|█████████▋| 9033/9330 [04:53<00:08, 33.12it/s]
 97%|█████████▋| 9037/9330 [04:53<00:08, 34.27it/s]
 97%|█████████▋| 9041/9330 [04:53<00:08, 35.18it/s]
 97%|█████████▋| 9045/9330 [04:53<00:07, 35.90it/s]
 97%|█████████▋| 9049/9330 [04:53<00:08, 34.88it/s]
 97%|█████████▋| 9053/9330 [04:54<00:07, 35.80it/s]
 97%|█████████▋| 9057/9330 [04:54<00:07, 34.27it/s]
 97%|█████████▋| 9061/9330 [04:54<00:07, 35.14it/s]
 97%|█████████▋| 9065/9330 [04:54<00:07, 36.01it/s]
 97%|█████████▋| 9069/9330 [04:54<00:07, 36.08it/s]
 97%|█████████▋| 9073/9330 [04:54<00:07, 32.95it/s]
 97%|███████

Trial status: 10 TERMINATED | 1 RUNNING | 9 PENDING
Current time: 2023-09-11 14:41:26. Total running time: 1hr 21min 5s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00010   RUNNING          2.98071e-05                    3                 

[2m[36m(_objective pid=36073)[0m 
 84%|████████▍ | 109/130 [00:07<00:01, 13.69it/s][A
[2m[36m(_objective pid=36073)[0m 
 85%|████████▌ | 111/130 [00:08<00:01, 11.82it/s][A
[2m[36m(_objective pid=36073)[0m 
 87%|████████▋ | 113/130 [00:08<00:01, 11.40it/s][A
[2m[36m(_objective pid=36073)[0m 
 89%|████████▉ | 116/130 [00:08<00:01, 12.44it/s][A
[2m[36m(_objective pid=36073)[0m 
 91%|█████████ | 118/130 [00:08<00:00, 13.14it/s][A
[2m[36m(_objective pid=36073)[0m 
 92%|█████████▏| 120/130 [00:08<00:00, 14.30it/s][A
[2m[36m(_objective pid=36073)[0m 
 94%|█████████▍| 122/130 [00:08<00:00, 13.71it/s][A
[2m[36m(_objective pid=36073)[0m 
 95%|█████████▌| 124/130 [00:08<00:00, 13.20it/s][A
[2m[36m(_objective pid=36073)[0m 
 98%|█████████▊| 127/130 [00:09<00:00, 13.13it/s][A
[2m[36m(_objective pid=36073)[0m 
 99%|█████████▉| 129/130 [00:09<00:00, 11.97it/s][A


Trial _objective_f556c_00010 finished iteration 3 at 2023-09-11 14:41:28. Total running time: 1hr 21min 7s
+-------------------------------------------------+
| Trial _objective_f556c_00010 result             |
+-------------------------------------------------+
| time_this_iter_s                         104.53 |
| time_total_s                            314.296 |
| training_iteration                            3 |
| epoch                                         3 |
| eval_loss                               0.21656 |
| eval_runtime                             9.6019 |
| eval_samples_per_second                 431.894 |
| eval_steps_per_second                    13.539 |
| objective                               0.21656 |
+-------------------------------------------------+

[2m[36m(_objective pid=36073)[0m {'eval_loss': 0.21655632555484772, 'eval_runtime': 9.6019, 'eval_samples_per_second': 431.894, 'eval_steps_per_second': 13.539, 'epoch': 3.0}


[2m[36m(_objective pid=36073)[0m                                                    
[2m[36m(_objective pid=36073)[0m                                                  [A100%|██████████| 9330/9330 [05:11<00:00, 30.68it/s]
[2m[36m(_objective pid=36073)[0m 100%|██████████| 130/130 [00:09<00:00, 11.97it/s][A
[2m[36m(_objective pid=36073)[0m                                                  [A


Trial _objective_f556c_00010 completed after 3 iterations at 2023-09-11 14:41:29. Total running time: 1hr 21min 8s

[2m[36m(_objective pid=36073)[0m {'train_runtime': 313.494, 'train_samples_per_second': 119.036, 'train_steps_per_second': 29.761, 'train_loss': 0.1662567514826375, 'epoch': 3.0}


[2m[36m(_objective pid=36073)[0m                                                    100%|██████████| 9330/9330 [05:13<00:00, 30.68it/s]100%|██████████| 9330/9330 [05:13<00:00, 29.76it/s]


Trial _objective_f556c_00011 started with configuration:
+-------------------------------------------------+
| Trial _objective_f556c_00011 config             |
+-------------------------------------------------+
| adam_epsilon                                  0 |
| learning_rate                             4e-05 |
| num_train_epochs                              2 |
| per_device_eval_batch_size                   32 |
| per_device_train_batch_size                   4 |
| weight_decay                            0.05546 |
+-------------------------------------------------+



[2m[36m(_objective pid=37490)[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.bias', 'vocab_transform.weight', 'vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight']
[2m[36m(_objective pid=37490)[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
[2m[36m(_objective pid=37490)[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[2m[36m(_objective pid=37490)[0m Some weights of DistilBertForSequenceClassification were not initialized from the model che

[2m[36m(_objective pid=37490)[0m {'loss': 0.4092, 'learning_rate': 3.698178306763327e-05, 'epoch': 0.16}


[2m[36m(_objective pid=37490)[0m   8%|▊         | 508/6220 [00:15<02:56, 32.30it/s]
  8%|▊         | 512/6220 [00:15<02:58, 32.01it/s]
  8%|▊         | 516/6220 [00:15<02:54, 32.73it/s]
  8%|▊         | 520/6220 [00:15<02:48, 33.88it/s]
  8%|▊         | 524/6220 [00:15<02:41, 35.24it/s]
  8%|▊         | 528/6220 [00:15<02:38, 35.80it/s]
  9%|▊         | 532/6220 [00:16<02:38, 35.90it/s]
  9%|▊         | 536/6220 [00:16<02:50, 33.33it/s]
  9%|▊         | 540/6220 [00:16<02:57, 32.05it/s]
  9%|▊         | 544/6220 [00:16<02:48, 33.65it/s]
  9%|▉         | 548/6220 [00:16<02:44, 34.47it/s]
  9%|▉         | 553/6220 [00:16<02:36, 36.24it/s]


Trial status: 11 TERMINATED | 1 RUNNING | 8 PENDING
Current time: 2023-09-11 14:41:56. Total running time: 1hr 21min 35s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00011   RUNNING          4.02145e-05                    2                

[2m[36m(_objective pid=37490)[0m   9%|▉         | 557/6220 [00:16<02:42, 34.95it/s]
  9%|▉         | 561/6220 [00:16<02:38, 35.73it/s]
  9%|▉         | 565/6220 [00:17<02:38, 35.66it/s]
  9%|▉         | 569/6220 [00:17<02:39, 35.44it/s]
  9%|▉         | 573/6220 [00:17<02:35, 36.21it/s]
  9%|▉         | 577/6220 [00:17<02:33, 36.82it/s]
  9%|▉         | 581/6220 [00:17<02:32, 37.00it/s]
  9%|▉         | 585/6220 [00:17<02:33, 36.77it/s]
  9%|▉         | 589/6220 [00:17<02:37, 35.80it/s]
 10%|▉         | 593/6220 [00:17<02:36, 35.85it/s]
 10%|▉         | 597/6220 [00:17<02:34, 36.46it/s]
 10%|▉         | 601/6220 [00:18<02:32, 36.92it/s]
 10%|▉         | 605/6220 [00:18<02:50, 33.00it/s]
 10%|▉         | 609/6220 [00:18<02:43, 34.24it/s]
 10%|▉         | 613/6220 [00:18<02:48, 33.31it/s]
 10%|▉         | 617/6220 [00:18<02:47, 33.47it/s]
 10%|▉         | 621/6220 [00:18<02:41, 34.77it/s]
 10%|█         | 625/6220 [00:18<02:35, 35.96it/s]
 10%|█         | 629/6220 [00:18<02:38, 35.24

[2m[36m(_objective pid=37490)[0m {'loss': 0.3346, 'learning_rate': 3.374910972256043e-05, 'epoch': 0.32}


[2m[36m(_objective pid=37490)[0m  16%|█▌        | 1002/6220 [00:29<02:19, 37.38it/s]
 16%|█▌        | 1006/6220 [00:29<02:19, 37.45it/s]
 16%|█▌        | 1010/6220 [00:29<02:19, 37.43it/s]
 16%|█▋        | 1014/6220 [00:30<02:18, 37.66it/s]
 16%|█▋        | 1018/6220 [00:30<02:18, 37.64it/s]
 16%|█▋        | 1022/6220 [00:30<02:25, 35.76it/s]
 16%|█▋        | 1026/6220 [00:30<02:23, 36.27it/s]
 17%|█▋        | 1030/6220 [00:30<02:20, 36.83it/s]
 17%|█▋        | 1034/6220 [00:30<02:26, 35.45it/s]
 17%|█▋        | 1038/6220 [00:30<02:23, 36.17it/s]
 17%|█▋        | 1042/6220 [00:30<02:19, 37.04it/s]
 17%|█▋        | 1046/6220 [00:30<02:17, 37.52it/s]
 17%|█▋        | 1050/6220 [00:30<02:18, 37.27it/s]
 17%|█▋        | 1054/6220 [00:31<02:29, 34.58it/s]
 17%|█▋        | 1058/6220 [00:31<02:24, 35.67it/s]
 17%|█▋        | 1062/6220 [00:31<02:25, 35.46it/s]
 17%|█▋        | 1066/6220 [00:31<02:24, 35.70it/s]
 17%|█▋        | 1070/6220 [00:31<02:19, 36.79it/s]
 17%|█▋        | 1074/6220 

[2m[36m(_objective pid=37490)[0m {'loss': 0.2973, 'learning_rate': 3.051643637748759e-05, 'epoch': 0.48}


[2m[36m(_objective pid=37490)[0m  24%|██▍       | 1507/6220 [00:44<02:06, 37.35it/s]
 24%|██▍       | 1511/6220 [00:44<02:05, 37.54it/s]
 24%|██▍       | 1515/6220 [00:44<02:04, 37.92it/s]
 24%|██▍       | 1519/6220 [00:44<02:02, 38.51it/s]
 24%|██▍       | 1523/6220 [00:44<02:07, 36.74it/s]
 25%|██▍       | 1527/6220 [00:44<02:07, 36.69it/s]
 25%|██▍       | 1531/6220 [00:45<02:05, 37.45it/s]
 25%|██▍       | 1535/6220 [00:45<02:05, 37.46it/s]
 25%|██▍       | 1539/6220 [00:45<02:04, 37.58it/s]
 25%|██▍       | 1543/6220 [00:45<02:16, 34.26it/s]
 25%|██▍       | 1547/6220 [00:45<02:11, 35.57it/s]
 25%|██▍       | 1551/6220 [00:45<02:14, 34.80it/s]
 25%|██▌       | 1555/6220 [00:45<02:30, 31.00it/s]
 25%|██▌       | 1559/6220 [00:45<02:35, 29.93it/s]
 25%|██▌       | 1563/6220 [00:46<02:25, 31.94it/s]
 25%|██▌       | 1567/6220 [00:46<02:19, 33.36it/s]
 25%|██▌       | 1571/6220 [00:46<02:22, 32.62it/s]
 25%|██▌       | 1575/6220 [00:46<02:17, 33.90it/s]
 25%|██▌       | 1579/6220 

Trial status: 11 TERMINATED | 1 RUNNING | 8 PENDING
Current time: 2023-09-11 14:42:26. Total running time: 1hr 22min 5s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00011   RUNNING          4.02145e-05                    2                 

[2m[36m(_objective pid=37490)[0m  26%|██▌       | 1587/6220 [00:46<02:09, 35.76it/s]
 26%|██▌       | 1591/6220 [00:46<02:12, 34.87it/s]
 26%|██▌       | 1595/6220 [00:46<02:12, 34.81it/s]
 26%|██▌       | 1599/6220 [00:47<02:20, 32.87it/s]
 26%|██▌       | 1603/6220 [00:47<02:15, 34.00it/s]
 26%|██▌       | 1607/6220 [00:47<02:13, 34.54it/s]
 26%|██▌       | 1611/6220 [00:47<02:09, 35.59it/s]
 26%|██▌       | 1615/6220 [00:47<02:07, 36.20it/s]
 26%|██▌       | 1619/6220 [00:47<02:04, 36.82it/s]
 26%|██▌       | 1623/6220 [00:47<02:11, 34.98it/s]
 26%|██▌       | 1627/6220 [00:47<02:06, 36.19it/s]
 26%|██▌       | 1631/6220 [00:47<02:18, 33.12it/s]
 26%|██▋       | 1635/6220 [00:48<02:22, 32.22it/s]
 26%|██▋       | 1639/6220 [00:48<02:16, 33.44it/s]
 26%|██▋       | 1643/6220 [00:48<02:12, 34.66it/s]
 26%|██▋       | 1647/6220 [00:48<02:29, 30.63it/s]
 27%|██▋       | 1651/6220 [00:48<02:21, 32.30it/s]
 27%|██▋       | 1655/6220 [00:48<02:29, 30.60it/s]
 27%|██▋       | 1659/6220 

[2m[36m(_objective pid=37490)[0m {'loss': 0.2403, 'learning_rate': 2.728376303241475e-05, 'epoch': 0.64}


[2m[36m(_objective pid=37490)[0m  32%|███▏      | 2007/6220 [00:59<01:56, 36.11it/s]
 32%|███▏      | 2011/6220 [00:59<01:56, 36.16it/s]
 32%|███▏      | 2015/6220 [00:59<01:54, 36.74it/s]
 32%|███▏      | 2019/6220 [00:59<02:02, 34.23it/s]
 33%|███▎      | 2023/6220 [00:59<02:05, 33.56it/s]
 33%|███▎      | 2027/6220 [00:59<02:14, 31.08it/s]
 33%|███▎      | 2031/6220 [00:59<02:05, 33.31it/s]
 33%|███▎      | 2035/6220 [00:59<01:59, 35.01it/s]
 33%|███▎      | 2040/6220 [01:00<01:53, 36.81it/s]
 33%|███▎      | 2044/6220 [01:00<02:08, 32.49it/s]
 33%|███▎      | 2048/6220 [01:00<02:04, 33.53it/s]
 33%|███▎      | 2052/6220 [01:00<02:12, 31.52it/s]
 33%|███▎      | 2056/6220 [01:00<02:12, 31.37it/s]
 33%|███▎      | 2060/6220 [01:00<02:04, 33.47it/s]
 33%|███▎      | 2064/6220 [01:00<02:02, 33.84it/s]
 33%|███▎      | 2068/6220 [01:00<01:57, 35.25it/s]
 33%|███▎      | 2072/6220 [01:01<01:53, 36.49it/s]
 33%|███▎      | 2076/6220 [01:01<01:51, 37.29it/s]
 33%|███▎      | 2080/6220 

[2m[36m(_objective pid=37490)[0m {'loss': 0.2063, 'learning_rate': 2.4051089687341915e-05, 'epoch': 0.8}


[2m[36m(_objective pid=37490)[0m  40%|████      | 2509/6220 [01:13<01:50, 33.49it/s]
 40%|████      | 2513/6220 [01:14<02:00, 30.85it/s]
 40%|████      | 2517/6220 [01:14<01:53, 32.53it/s]
 41%|████      | 2521/6220 [01:14<01:52, 33.01it/s]
 41%|████      | 2525/6220 [01:14<01:47, 34.34it/s]
 41%|████      | 2529/6220 [01:14<01:53, 32.45it/s]
 41%|████      | 2533/6220 [01:14<01:52, 32.70it/s]
 41%|████      | 2537/6220 [01:14<01:48, 33.93it/s]
 41%|████      | 2541/6220 [01:14<01:48, 33.82it/s]
 41%|████      | 2545/6220 [01:14<01:45, 34.82it/s]
 41%|████      | 2549/6220 [01:15<01:44, 35.28it/s]
 41%|████      | 2553/6220 [01:15<01:41, 36.18it/s]
 41%|████      | 2557/6220 [01:15<01:39, 36.92it/s]
 41%|████      | 2562/6220 [01:15<01:36, 37.72it/s]
 41%|████▏     | 2566/6220 [01:15<01:46, 34.42it/s]
 41%|████▏     | 2570/6220 [01:15<01:42, 35.53it/s]
 41%|████▏     | 2574/6220 [01:15<01:42, 35.45it/s]
 41%|████▏     | 2578/6220 [01:15<01:46, 34.19it/s]
 42%|████▏     | 2582/6220 

Trial status: 11 TERMINATED | 1 RUNNING | 8 PENDING
Current time: 2023-09-11 14:42:56. Total running time: 1hr 22min 35s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00011   RUNNING          4.02145e-05                    2                

[2m[36m(_objective pid=37490)[0m  42%|████▏     | 2610/6220 [01:16<01:40, 36.00it/s]
 42%|████▏     | 2614/6220 [01:16<01:39, 36.10it/s]
 42%|████▏     | 2618/6220 [01:17<01:50, 32.68it/s]
 42%|████▏     | 2622/6220 [01:17<01:51, 32.19it/s]
 42%|████▏     | 2626/6220 [01:17<01:48, 33.20it/s]
 42%|████▏     | 2630/6220 [01:17<01:45, 34.02it/s]
 42%|████▏     | 2634/6220 [01:17<01:42, 34.83it/s]
 42%|████▏     | 2638/6220 [01:17<01:45, 34.04it/s]
 42%|████▏     | 2642/6220 [01:17<01:43, 34.43it/s]
 43%|████▎     | 2646/6220 [01:17<01:40, 35.65it/s]
 43%|████▎     | 2650/6220 [01:17<01:38, 36.12it/s]
 43%|████▎     | 2654/6220 [01:18<01:37, 36.49it/s]
 43%|████▎     | 2658/6220 [01:18<01:50, 32.38it/s]
 43%|████▎     | 2662/6220 [01:18<01:44, 33.97it/s]
 43%|████▎     | 2666/6220 [01:18<01:41, 35.18it/s]
 43%|████▎     | 2670/6220 [01:18<01:40, 35.40it/s]
 43%|████▎     | 2674/6220 [01:18<01:37, 36.22it/s]
 43%|████▎     | 2678/6220 [01:18<01:39, 35.61it/s]
 43%|████▎     | 2682/6220 

[2m[36m(_objective pid=37490)[0m {'loss': 0.2087, 'learning_rate': 2.0818416342269078e-05, 'epoch': 0.96}


[2m[36m(_objective pid=37490)[0m  48%|████▊     | 3006/6220 [01:28<01:51, 28.73it/s]
 48%|████▊     | 3010/6220 [01:28<01:44, 30.81it/s]
 48%|████▊     | 3014/6220 [01:28<01:39, 32.27it/s]
 49%|████▊     | 3018/6220 [01:29<01:35, 33.61it/s]
 49%|████▊     | 3022/6220 [01:29<01:32, 34.55it/s]
 49%|████▊     | 3026/6220 [01:29<01:32, 34.70it/s]
 49%|████▊     | 3030/6220 [01:29<01:40, 31.59it/s]
 49%|████▉     | 3034/6220 [01:29<01:35, 33.27it/s]
 49%|████▉     | 3038/6220 [01:29<01:31, 34.83it/s]
 49%|████▉     | 3042/6220 [01:29<01:36, 33.01it/s]
 49%|████▉     | 3046/6220 [01:29<01:34, 33.69it/s]
 49%|████▉     | 3050/6220 [01:29<01:34, 33.70it/s]
 49%|████▉     | 3054/6220 [01:30<01:34, 33.38it/s]
 49%|████▉     | 3058/6220 [01:30<01:35, 33.27it/s]
 49%|████▉     | 3062/6220 [01:30<01:32, 34.08it/s]
 49%|████▉     | 3066/6220 [01:30<01:32, 33.98it/s]
 49%|████▉     | 3070/6220 [01:30<01:31, 34.44it/s]
 49%|████▉     | 3074/6220 [01:30<01:28, 35.36it/s]
 49%|████▉     | 3078/6220 

Trial _objective_f556c_00011 finished iteration 1 at 2023-09-11 14:43:21. Total running time: 1hr 22min 59s
+-------------------------------------------------+
| Trial _objective_f556c_00011 result             |
+-------------------------------------------------+
| time_this_iter_s                        104.058 |
| time_total_s                            104.058 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.17871 |
| eval_runtime                             9.5941 |
| eval_samples_per_second                 432.243 |
| eval_steps_per_second                     13.55 |
| objective                               0.17871 |
+-------------------------------------------------+

[2m[36m(_objective pid=37490)[0m {'eval_loss': 0.17870572209358215, 'eval_runtime': 9.5941, 'eval_samples_per_second': 432.243, 'eval_steps_per_second': 13.55, 'epoch': 1.0}


[2m[36m(_objective pid=37490)[0m  50%|█████     | 3111/6220 [01:42<42:37,  1.22it/s]
 50%|█████     | 3115/6220 [01:42<30:31,  1.70it/s]
 50%|█████     | 3119/6220 [01:42<22:10,  2.33it/s]
 50%|█████     | 3123/6220 [01:43<15:59,  3.23it/s]
 50%|█████     | 3127/6220 [01:43<11:38,  4.43it/s]
 50%|█████     | 3131/6220 [01:43<08:35,  5.99it/s]
 50%|█████     | 3135/6220 [01:43<06:27,  7.96it/s]
 50%|█████     | 3139/6220 [01:43<05:01, 10.23it/s]
 51%|█████     | 3143/6220 [01:43<03:56, 13.00it/s]
 51%|█████     | 3147/6220 [01:43<03:10, 16.12it/s]
 51%|█████     | 3151/6220 [01:43<02:38, 19.33it/s]
 51%|█████     | 3155/6220 [01:43<02:18, 22.18it/s]
 51%|█████     | 3159/6220 [01:44<02:11, 23.20it/s]
 51%|█████     | 3163/6220 [01:44<02:15, 22.55it/s]
 51%|█████     | 3167/6220 [01:44<02:03, 24.67it/s]
 51%|█████     | 3170/6220 [01:44<02:08, 23.81it/s]
 51%|█████     | 3173/6220 [01:44<02:01, 25.13it/s]
 51%|█████     | 3177/6220 [01:44<01:49, 27.84it/s]
 51%|█████     | 3181/6220 

Trial status: 11 TERMINATED | 1 RUNNING | 8 PENDING
Current time: 2023-09-11 14:43:26. Total running time: 1hr 23min 5s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00011   RUNNING          4.02145e-05                    2                 

[2m[36m(_objective pid=37490)[0m  52%|█████▏    | 3241/6220 [01:46<01:33, 31.82it/s]
 52%|█████▏    | 3245/6220 [01:46<01:29, 33.33it/s]
 52%|█████▏    | 3249/6220 [01:46<01:32, 32.05it/s]
 52%|█████▏    | 3253/6220 [01:47<01:31, 32.32it/s]
 52%|█████▏    | 3257/6220 [01:47<01:27, 33.76it/s]
 52%|█████▏    | 3261/6220 [01:47<01:25, 34.63it/s]
 52%|█████▏    | 3265/6220 [01:47<01:27, 33.60it/s]
 53%|█████▎    | 3269/6220 [01:47<01:25, 34.54it/s]
 53%|█████▎    | 3273/6220 [01:47<01:22, 35.91it/s]
 53%|█████▎    | 3277/6220 [01:47<01:21, 36.16it/s]
 53%|█████▎    | 3281/6220 [01:47<01:26, 33.78it/s]
 53%|█████▎    | 3285/6220 [01:48<01:27, 33.70it/s]
 53%|█████▎    | 3289/6220 [01:48<01:23, 35.26it/s]
 53%|█████▎    | 3293/6220 [01:48<01:30, 32.26it/s]
 53%|█████▎    | 3297/6220 [01:48<01:25, 34.02it/s]
 53%|█████▎    | 3301/6220 [01:48<01:22, 35.42it/s]
 53%|█████▎    | 3305/6220 [01:48<01:23, 34.80it/s]
 53%|█████▎    | 3309/6220 [01:48<01:29, 32.57it/s]
 53%|█████▎    | 3313/6220 

[2m[36m(_objective pid=37490)[0m {'loss': 0.152, 'learning_rate': 1.7585742997196237e-05, 'epoch': 1.13}


[2m[36m(_objective pid=37490)[0m  56%|█████▋    | 3507/6220 [01:54<01:14, 36.33it/s]
 56%|█████▋    | 3511/6220 [01:54<01:18, 34.29it/s]
 57%|█████▋    | 3515/6220 [01:54<01:20, 33.71it/s]
 57%|█████▋    | 3519/6220 [01:54<01:19, 33.97it/s]
 57%|█████▋    | 3523/6220 [01:54<01:16, 35.26it/s]
 57%|█████▋    | 3527/6220 [01:54<01:23, 32.08it/s]
 57%|█████▋    | 3531/6220 [01:55<01:24, 31.79it/s]
 57%|█████▋    | 3535/6220 [01:55<01:21, 32.88it/s]
 57%|█████▋    | 3539/6220 [01:55<01:24, 31.55it/s]
 57%|█████▋    | 3543/6220 [01:55<01:19, 33.48it/s]
 57%|█████▋    | 3547/6220 [01:55<01:17, 34.48it/s]
 57%|█████▋    | 3551/6220 [01:55<01:14, 35.77it/s]
 57%|█████▋    | 3555/6220 [01:55<01:13, 36.36it/s]
 57%|█████▋    | 3559/6220 [01:55<01:11, 37.14it/s]
 57%|█████▋    | 3563/6220 [01:55<01:14, 35.68it/s]
 57%|█████▋    | 3567/6220 [01:56<01:16, 34.68it/s]
 57%|█████▋    | 3571/6220 [01:56<01:24, 31.24it/s]
 57%|█████▋    | 3575/6220 [01:56<01:19, 33.16it/s]
 58%|█████▊    | 3579/6220 

[2m[36m(_objective pid=37490)[0m {'loss': 0.1105, 'learning_rate': 1.43530696521234e-05, 'epoch': 1.29}


[2m[36m(_objective pid=37490)[0m  64%|██████▍   | 4009/6220 [02:09<01:13, 30.25it/s]
 65%|██████▍   | 4013/6220 [02:09<01:10, 31.37it/s]
 65%|██████▍   | 4017/6220 [02:09<01:06, 33.20it/s]
 65%|██████▍   | 4021/6220 [02:09<01:04, 34.06it/s]
 65%|██████▍   | 4025/6220 [02:09<01:03, 34.72it/s]
 65%|██████▍   | 4029/6220 [02:09<01:01, 35.52it/s]
 65%|██████▍   | 4034/6220 [02:10<01:00, 36.26it/s]
 65%|██████▍   | 4038/6220 [02:10<00:58, 37.08it/s]
 65%|██████▍   | 4042/6220 [02:10<00:58, 37.13it/s]
 65%|██████▌   | 4046/6220 [02:10<00:57, 37.66it/s]
 65%|██████▌   | 4051/6220 [02:10<00:56, 38.33it/s]
 65%|██████▌   | 4055/6220 [02:10<00:57, 37.47it/s]
 65%|██████▌   | 4059/6220 [02:10<00:57, 37.44it/s]
 65%|██████▌   | 4063/6220 [02:10<00:56, 38.01it/s]
 65%|██████▌   | 4067/6220 [02:10<00:56, 38.23it/s]
 65%|██████▌   | 4071/6220 [02:11<00:56, 38.21it/s]
 66%|██████▌   | 4075/6220 [02:11<00:55, 38.36it/s]
 66%|██████▌   | 4079/6220 [02:11<00:57, 37.23it/s]
 66%|██████▌   | 4083/6220 

Trial status: 11 TERMINATED | 1 RUNNING | 8 PENDING
Current time: 2023-09-11 14:43:56. Total running time: 1hr 23min 35s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00011   RUNNING          4.02145e-05                    2                

[2m[36m(_objective pid=37490)[0m  69%|██████▊   | 4268/6220 [02:16<00:54, 35.64it/s]
 69%|██████▊   | 4272/6220 [02:16<00:54, 35.57it/s]
 69%|██████▊   | 4276/6220 [02:17<01:01, 31.80it/s]
 69%|██████▉   | 4280/6220 [02:17<00:57, 33.70it/s]
 69%|██████▉   | 4284/6220 [02:17<00:59, 32.53it/s]
 69%|██████▉   | 4288/6220 [02:17<00:58, 32.96it/s]
 69%|██████▉   | 4292/6220 [02:17<00:59, 32.26it/s]
 69%|██████▉   | 4296/6220 [02:17<00:58, 32.83it/s]
 69%|██████▉   | 4300/6220 [02:17<00:59, 32.49it/s]
 69%|██████▉   | 4304/6220 [02:17<00:56, 33.64it/s]
 69%|██████▉   | 4308/6220 [02:18<00:57, 33.40it/s]
 69%|██████▉   | 4312/6220 [02:18<00:55, 34.63it/s]
 69%|██████▉   | 4316/6220 [02:18<01:00, 31.37it/s]
 69%|██████▉   | 4321/6220 [02:18<00:56, 33.83it/s]
 70%|██████▉   | 4325/6220 [02:18<00:53, 35.20it/s]
 70%|██████▉   | 4329/6220 [02:18<00:57, 32.79it/s]
 70%|██████▉   | 4333/6220 [02:18<00:56, 33.69it/s]
 70%|██████▉   | 4337/6220 [02:18<00:54, 34.30it/s]
 70%|██████▉   | 4341/6220 

[2m[36m(_objective pid=37490)[0m {'loss': 0.1201, 'learning_rate': 1.1120396307050563e-05, 'epoch': 1.45}


[2m[36m(_objective pid=37490)[0m  73%|███████▎  | 4510/6220 [02:23<00:47, 36.13it/s]
 73%|███████▎  | 4514/6220 [02:23<00:47, 35.88it/s]
 73%|███████▎  | 4518/6220 [02:23<00:46, 36.96it/s]
 73%|███████▎  | 4522/6220 [02:24<00:45, 37.25it/s]
 73%|███████▎  | 4526/6220 [02:24<00:44, 37.65it/s]
 73%|███████▎  | 4530/6220 [02:24<00:44, 37.94it/s]
 73%|███████▎  | 4534/6220 [02:24<00:49, 34.22it/s]
 73%|███████▎  | 4538/6220 [02:24<00:50, 33.34it/s]
 73%|███████▎  | 4543/6220 [02:24<00:48, 34.38it/s]
 73%|███████▎  | 4547/6220 [02:24<00:48, 34.26it/s]
 73%|███████▎  | 4551/6220 [02:24<00:46, 35.59it/s]
 73%|███████▎  | 4555/6220 [02:24<00:46, 36.18it/s]
 73%|███████▎  | 4559/6220 [02:25<00:51, 32.09it/s]
 73%|███████▎  | 4563/6220 [02:25<00:51, 32.29it/s]
 73%|███████▎  | 4567/6220 [02:25<00:49, 33.73it/s]
 74%|███████▎  | 4572/6220 [02:25<00:46, 35.77it/s]
 74%|███████▎  | 4576/6220 [02:25<00:46, 35.11it/s]
 74%|███████▎  | 4580/6220 [02:25<00:50, 32.42it/s]
 74%|███████▎  | 4584/6220 

[2m[36m(_objective pid=37490)[0m {'loss': 0.1368, 'learning_rate': 7.887722961977725e-06, 'epoch': 1.61}


[2m[36m(_objective pid=37490)[0m  81%|████████  | 5008/6220 [02:38<00:34, 35.05it/s]
 81%|████████  | 5012/6220 [02:38<00:34, 35.08it/s]
 81%|████████  | 5016/6220 [02:38<00:35, 33.68it/s]
 81%|████████  | 5020/6220 [02:38<00:35, 34.19it/s]
 81%|████████  | 5024/6220 [02:38<00:37, 31.50it/s]
 81%|████████  | 5028/6220 [02:39<00:40, 29.58it/s]
 81%|████████  | 5032/6220 [02:39<00:37, 31.59it/s]
 81%|████████  | 5036/6220 [02:39<00:35, 33.32it/s]
 81%|████████  | 5040/6220 [02:39<00:34, 33.96it/s]
 81%|████████  | 5044/6220 [02:39<00:35, 33.24it/s]
 81%|████████  | 5048/6220 [02:39<00:38, 30.30it/s]
 81%|████████  | 5052/6220 [02:39<00:37, 31.28it/s]
 81%|████████▏ | 5056/6220 [02:39<00:38, 29.87it/s]
 81%|████████▏ | 5060/6220 [02:40<00:36, 31.67it/s]
 81%|████████▏ | 5064/6220 [02:40<00:34, 33.64it/s]
 81%|████████▏ | 5068/6220 [02:40<00:34, 33.74it/s]
 82%|████████▏ | 5072/6220 [02:40<00:32, 35.33it/s]
 82%|████████▏ | 5076/6220 [02:40<00:33, 34.23it/s]
 82%|████████▏ | 5080/6220 

Trial status: 11 TERMINATED | 1 RUNNING | 8 PENDING
Current time: 2023-09-11 14:44:26. Total running time: 1hr 24min 5s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00011   RUNNING          4.02145e-05                    2                 

[2m[36m(_objective pid=37490)[0m  85%|████████▌ | 5292/6220 [02:46<00:27, 33.90it/s]
 85%|████████▌ | 5296/6220 [02:46<00:27, 33.49it/s]
 85%|████████▌ | 5300/6220 [02:47<00:29, 30.67it/s]
 85%|████████▌ | 5304/6220 [02:47<00:28, 32.04it/s]
 85%|████████▌ | 5308/6220 [02:47<00:31, 28.79it/s]
 85%|████████▌ | 5311/6220 [02:47<00:31, 28.88it/s]
 85%|████████▌ | 5315/6220 [02:47<00:29, 30.56it/s]
 86%|████████▌ | 5319/6220 [02:47<00:28, 32.05it/s]
 86%|████████▌ | 5323/6220 [02:47<00:27, 32.93it/s]
 86%|████████▌ | 5327/6220 [02:47<00:27, 32.80it/s]
 86%|████████▌ | 5331/6220 [02:48<00:26, 33.47it/s]
 86%|████████▌ | 5335/6220 [02:48<00:26, 33.71it/s]
 86%|████████▌ | 5339/6220 [02:48<00:25, 34.33it/s]
 86%|████████▌ | 5343/6220 [02:48<00:25, 34.86it/s]
 86%|████████▌ | 5347/6220 [02:48<00:24, 35.08it/s]
 86%|████████▌ | 5351/6220 [02:48<00:24, 35.15it/s]
 86%|████████▌ | 5355/6220 [02:48<00:24, 34.79it/s]
 86%|████████▌ | 5359/6220 [02:48<00:25, 34.08it/s]
 86%|████████▌ | 5363/6220 

[2m[36m(_objective pid=37490)[0m {'loss': 0.1371, 'learning_rate': 4.655049616904886e-06, 'epoch': 1.77}


[2m[36m(_objective pid=37490)[0m  89%|████████▊ | 5506/6220 [02:53<00:19, 35.94it/s]
 89%|████████▊ | 5510/6220 [02:53<00:19, 36.95it/s]
 89%|████████▊ | 5514/6220 [02:53<00:19, 36.08it/s]
 89%|████████▊ | 5518/6220 [02:53<00:19, 36.39it/s]
 89%|████████▉ | 5522/6220 [02:53<00:20, 33.87it/s]
 89%|████████▉ | 5526/6220 [02:54<00:20, 34.55it/s]
 89%|████████▉ | 5530/6220 [02:54<00:19, 35.27it/s]
 89%|████████▉ | 5534/6220 [02:54<00:19, 35.86it/s]
 89%|████████▉ | 5538/6220 [02:54<00:20, 34.02it/s]
 89%|████████▉ | 5542/6220 [02:54<00:19, 34.96it/s]
 89%|████████▉ | 5546/6220 [02:54<00:18, 35.91it/s]
 89%|████████▉ | 5550/6220 [02:54<00:18, 35.91it/s]
 89%|████████▉ | 5554/6220 [02:54<00:18, 36.44it/s]
 89%|████████▉ | 5558/6220 [02:54<00:20, 32.87it/s]
 89%|████████▉ | 5562/6220 [02:55<00:19, 33.63it/s]
 89%|████████▉ | 5566/6220 [02:55<00:18, 35.07it/s]
 90%|████████▉ | 5570/6220 [02:55<00:18, 35.63it/s]
 90%|████████▉ | 5574/6220 [02:55<00:18, 35.04it/s]
 90%|████████▉ | 5578/6220 

[2m[36m(_objective pid=37490)[0m {'loss': 0.1177, 'learning_rate': 1.4223762718320487e-06, 'epoch': 1.93}


[2m[36m(_objective pid=37490)[0m  97%|█████████▋| 6006/6220 [03:08<00:06, 34.75it/s]
 97%|█████████▋| 6010/6220 [03:08<00:05, 35.30it/s]
 97%|█████████▋| 6014/6220 [03:08<00:06, 34.06it/s]
 97%|█████████▋| 6018/6220 [03:08<00:05, 34.28it/s]
 97%|█████████▋| 6022/6220 [03:08<00:05, 34.64it/s]
 97%|█████████▋| 6026/6220 [03:08<00:05, 35.24it/s]
 97%|█████████▋| 6030/6220 [03:09<00:05, 32.79it/s]
 97%|█████████▋| 6034/6220 [03:09<00:05, 33.67it/s]
 97%|█████████▋| 6038/6220 [03:09<00:05, 34.88it/s]
 97%|█████████▋| 6042/6220 [03:09<00:05, 34.93it/s]
 97%|█████████▋| 6046/6220 [03:09<00:04, 35.82it/s]
 97%|█████████▋| 6050/6220 [03:09<00:05, 32.26it/s]
 97%|█████████▋| 6054/6220 [03:09<00:05, 29.59it/s]
 97%|█████████▋| 6058/6220 [03:09<00:05, 31.18it/s]
 97%|█████████▋| 6062/6220 [03:10<00:04, 32.48it/s]
 98%|█████████▊| 6066/6220 [03:10<00:04, 34.02it/s]
 98%|█████████▊| 6070/6220 [03:10<00:04, 33.89it/s]
 98%|█████████▊| 6074/6220 [03:10<00:04, 33.41it/s]
 98%|█████████▊| 6078/6220 

Trial status: 11 TERMINATED | 1 RUNNING | 8 PENDING
Current time: 2023-09-11 14:44:56. Total running time: 1hr 24min 35s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00011   RUNNING          4.02145e-05                    2                

[2m[36m(_objective pid=37490)[0m 
 24%|██▍       | 31/130 [00:02<00:06, 16.19it/s][A
[2m[36m(_objective pid=37490)[0m 
 26%|██▌       | 34/130 [00:02<00:05, 16.34it/s][A
[2m[36m(_objective pid=37490)[0m 
 28%|██▊       | 36/130 [00:02<00:05, 16.26it/s][A
[2m[36m(_objective pid=37490)[0m 
 29%|██▉       | 38/130 [00:02<00:05, 15.86it/s][A
[2m[36m(_objective pid=37490)[0m 
 31%|███       | 40/130 [00:02<00:06, 13.90it/s][A
[2m[36m(_objective pid=37490)[0m 
 32%|███▏      | 42/130 [00:02<00:06, 14.23it/s][A
[2m[36m(_objective pid=37490)[0m 
 34%|███▍      | 44/130 [00:03<00:06, 13.92it/s][A
[2m[36m(_objective pid=37490)[0m 
 35%|███▌      | 46/130 [00:03<00:06, 12.70it/s][A
[2m[36m(_objective pid=37490)[0m 
 37%|███▋      | 48/130 [00:03<00:06, 11.76it/s][A
[2m[36m(_objective pid=37490)[0m 
 39%|███▉      | 51/130 [00:03<00:06, 12.88it/s][A
[2m[36m(_objective pid=37490)[0m 
 41%|████      | 53/130 [00:03<00:05, 13.29it/s][A
[2m[36m(_objective 

Trial _objective_f556c_00011 finished iteration 2 at 2023-09-11 14:45:04. Total running time: 1hr 24min 42s
+-------------------------------------------------+
| Trial _objective_f556c_00011 result             |
+-------------------------------------------------+
| time_this_iter_s                        102.971 |
| time_total_s                            207.029 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                               0.19741 |
| eval_runtime                             9.6099 |
| eval_samples_per_second                 431.536 |
| eval_steps_per_second                    13.528 |
| objective                               0.19741 |
+-------------------------------------------------+

[2m[36m(_objective pid=37490)[0m {'eval_loss': 0.19741114974021912, 'eval_runtime': 9.6099, 'eval_samples_per_second': 431.536, 'eval_steps_per_second': 13.528, 'epoch': 2.0}
Trial _objective_f556c_00011 complete

[2m[36m(_objective pid=37490)[0m                                                    100%|██████████| 6220/6220 [03:25<00:00, 33.61it/s]100%|██████████| 6220/6220 [03:25<00:00, 30.23it/s]


Trial _objective_f556c_00012 started with configuration:
+-------------------------------------------------+
| Trial _objective_f556c_00012 config             |
+-------------------------------------------------+
| adam_epsilon                                  0 |
| learning_rate                             4e-05 |
| num_train_epochs                              2 |
| per_device_eval_batch_size                   32 |
| per_device_train_batch_size                   4 |
| weight_decay                            0.26845 |
+-------------------------------------------------+



[2m[36m(_objective pid=38439)[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias']
[2m[36m(_objective pid=38439)[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
[2m[36m(_objective pid=38439)[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[2m[36m(_objective pid=38439)[0m Some weights of DistilBertForSequenceClassification were not initialized from the model che

Trial status: 12 TERMINATED | 1 RUNNING | 7 PENDING
Current time: 2023-09-11 14:45:26. Total running time: 1hr 25min 5s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00012   RUNNING          4.10053e-05                    2                 

[2m[36m(_objective pid=38439)[0m   6%|▋         | 404/6220 [00:12<02:40, 36.22it/s]
  7%|▋         | 408/6220 [00:12<02:38, 36.73it/s]
  7%|▋         | 412/6220 [00:12<02:38, 36.68it/s]
  7%|▋         | 416/6220 [00:13<02:46, 34.86it/s]
  7%|▋         | 420/6220 [00:13<02:42, 35.76it/s]
  7%|▋         | 424/6220 [00:13<02:41, 35.85it/s]
  7%|▋         | 428/6220 [00:13<02:43, 35.45it/s]
  7%|▋         | 432/6220 [00:13<02:38, 36.43it/s]
  7%|▋         | 436/6220 [00:13<02:34, 37.42it/s]
  7%|▋         | 440/6220 [00:13<02:42, 35.49it/s]
  7%|▋         | 444/6220 [00:13<02:42, 35.58it/s]
  7%|▋         | 449/6220 [00:13<02:35, 37.13it/s]
  7%|▋         | 453/6220 [00:14<02:51, 33.69it/s]
  7%|▋         | 457/6220 [00:14<02:45, 34.82it/s]
  7%|▋         | 461/6220 [00:14<02:43, 35.12it/s]
  7%|▋         | 465/6220 [00:14<02:49, 33.87it/s]
  8%|▊         | 469/6220 [00:14<03:04, 31.24it/s]
  8%|▊         | 473/6220 [00:14<02:54, 32.94it/s]
  8%|▊         | 477/6220 [00:14<03:12, 29.91

[2m[36m(_objective pid=38439)[0m {'loss': 0.4073, 'learning_rate': 3.770906591399084e-05, 'epoch': 0.16}


  8%|▊         | 505/6220 [00:15<02:50, 33.61it/s]
  8%|▊         | 509/6220 [00:15<03:03, 31.16it/s]
  8%|▊         | 513/6220 [00:15<02:56, 32.34it/s]
  8%|▊         | 517/6220 [00:16<02:53, 32.93it/s]
  8%|▊         | 521/6220 [00:16<02:49, 33.56it/s]
  8%|▊         | 525/6220 [00:16<02:43, 34.76it/s]
  9%|▊         | 529/6220 [00:16<02:46, 34.08it/s]
  9%|▊         | 533/6220 [00:16<02:50, 33.39it/s]
  9%|▊         | 537/6220 [00:16<03:10, 29.90it/s]
  9%|▊         | 541/6220 [00:16<02:58, 31.79it/s]
  9%|▉         | 545/6220 [00:16<02:55, 32.35it/s]
  9%|▉         | 549/6220 [00:17<02:48, 33.68it/s]
  9%|▉         | 553/6220 [00:17<02:47, 33.90it/s]
  9%|▉         | 557/6220 [00:17<02:50, 33.16it/s]
  9%|▉         | 561/6220 [00:17<02:49, 33.34it/s]
  9%|▉         | 565/6220 [00:17<02:49, 33.41it/s]
  9%|▉         | 569/6220 [00:17<02:46, 34.03it/s]
  9%|▉         | 573/6220 [00:17<02:42, 34.81it/s]
  9%|▉         | 577/6220 [00:17<02:37, 35.75it/s]
  9%|▉         | 581/6220 [00:1

[2m[36m(_objective pid=38439)[0m {'loss': 0.3211, 'learning_rate': 3.4412818893537095e-05, 'epoch': 0.32}


[2m[36m(_objective pid=38439)[0m                                                    16%|█▌        | 1000/6220 [00:30<02:16, 38.33it/s] 16%|█▌        | 1002/6220 [00:30<02:15, 38.59it/s]
 16%|█▌        | 1006/6220 [00:30<02:14, 38.87it/s]
 16%|█▌        | 1010/6220 [00:30<02:12, 39.18it/s]
 16%|█▋        | 1014/6220 [00:30<02:13, 39.12it/s]
 16%|█▋        | 1019/6220 [00:30<02:11, 39.55it/s]
 16%|█▋        | 1023/6220 [00:30<02:18, 37.50it/s]
 17%|█▋        | 1027/6220 [00:30<02:16, 38.09it/s]
 17%|█▋        | 1031/6220 [00:31<02:15, 38.42it/s]
 17%|█▋        | 1035/6220 [00:31<02:19, 37.30it/s]
 17%|█▋        | 1040/6220 [00:31<02:15, 38.26it/s]
 17%|█▋        | 1044/6220 [00:31<02:16, 37.92it/s]
 17%|█▋        | 1048/6220 [00:31<02:15, 38.16it/s]
 17%|█▋        | 1052/6220 [00:31<02:23, 36.13it/s]
 17%|█▋        | 1056/6220 [00:31<02:19, 36.90it/s]
 17%|█▋        | 1060/6220 [00:31<02:20, 36.72it/s]
 17%|█▋        | 1064/6220 [00:31<02:20, 36.70it/s]
 17%|█▋        | 1069/6220 

Trial status: 12 TERMINATED | 1 RUNNING | 7 PENDING
Current time: 2023-09-11 14:45:56. Total running time: 1hr 25min 35s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00012   RUNNING          4.10053e-05                    2                

[2m[36m(_objective pid=38439)[0m  23%|██▎       | 1426/6220 [00:42<02:15, 35.26it/s]
 23%|██▎       | 1430/6220 [00:42<02:13, 35.87it/s]
 23%|██▎       | 1434/6220 [00:42<02:09, 36.92it/s]
 23%|██▎       | 1438/6220 [00:43<02:17, 34.83it/s]
 23%|██▎       | 1443/6220 [00:43<02:10, 36.54it/s]
 23%|██▎       | 1447/6220 [00:43<02:19, 34.32it/s]
 23%|██▎       | 1451/6220 [00:43<02:17, 34.66it/s]
 23%|██▎       | 1456/6220 [00:43<02:10, 36.47it/s]
 23%|██▎       | 1460/6220 [00:43<02:10, 36.55it/s]
 24%|██▎       | 1464/6220 [00:43<02:13, 35.68it/s]
 24%|██▎       | 1468/6220 [00:43<02:19, 34.01it/s]
 24%|██▎       | 1472/6220 [00:44<02:18, 34.25it/s]
 24%|██▎       | 1476/6220 [00:44<02:18, 34.26it/s]
 24%|██▍       | 1480/6220 [00:44<02:25, 32.62it/s]
 24%|██▍       | 1484/6220 [00:44<02:19, 34.01it/s]
 24%|██▍       | 1488/6220 [00:44<02:20, 33.80it/s]
 24%|██▍       | 1492/6220 [00:44<02:14, 35.27it/s]
 24%|██▍       | 1496/6220 [00:44<02:09, 36.40it/s]


[2m[36m(_objective pid=38439)[0m {'loss': 0.2978, 'learning_rate': 3.111657187308335e-05, 'epoch': 0.48}


 24%|██▍       | 1501/6220 [00:44<02:04, 37.92it/s]
 24%|██▍       | 1506/6220 [00:45<02:02, 38.60it/s]
 24%|██▍       | 1510/6220 [00:45<02:03, 38.29it/s]
 24%|██▍       | 1514/6220 [00:45<02:02, 38.31it/s]
 24%|██▍       | 1518/6220 [00:45<02:01, 38.71it/s]
 24%|██▍       | 1522/6220 [00:45<02:06, 37.15it/s]
 25%|██▍       | 1526/6220 [00:45<02:04, 37.72it/s]
 25%|██▍       | 1530/6220 [00:45<02:02, 38.22it/s]
 25%|██▍       | 1535/6220 [00:45<02:00, 38.84it/s]
 25%|██▍       | 1539/6220 [00:45<02:00, 38.96it/s]
 25%|██▍       | 1543/6220 [00:46<02:12, 35.39it/s]
 25%|██▍       | 1547/6220 [00:46<02:08, 36.48it/s]
 25%|██▍       | 1551/6220 [00:46<02:09, 36.11it/s]
 25%|██▌       | 1555/6220 [00:46<02:26, 31.95it/s]
 25%|██▌       | 1559/6220 [00:46<02:30, 30.94it/s]
 25%|██▌       | 1563/6220 [00:46<02:20, 33.06it/s]
 25%|██▌       | 1567/6220 [00:46<02:14, 34.60it/s]
 25%|██▌       | 1571/6220 [00:46<02:16, 34.00it/s]
 25%|██▌       | 1575/6220 [00:46<02:11, 35.20it/s]
 25%|██▌    

[2m[36m(_objective pid=38439)[0m {'loss': 0.2265, 'learning_rate': 2.7820324852629603e-05, 'epoch': 0.64}


[2m[36m(_objective pid=38439)[0m  32%|███▏      | 2003/6220 [00:59<01:49, 38.58it/s]
 32%|███▏      | 2007/6220 [00:59<01:52, 37.44it/s]
 32%|███▏      | 2011/6220 [00:59<01:54, 36.88it/s]
 32%|███▏      | 2015/6220 [00:59<01:53, 37.21it/s]
 32%|███▏      | 2019/6220 [01:00<02:01, 34.65it/s]
 33%|███▎      | 2023/6220 [01:00<02:04, 33.76it/s]
 33%|███▎      | 2027/6220 [01:00<02:13, 31.44it/s]
 33%|███▎      | 2031/6220 [01:00<02:07, 32.87it/s]
 33%|███▎      | 2035/6220 [01:00<02:01, 34.48it/s]
 33%|███▎      | 2039/6220 [01:00<01:57, 35.72it/s]
 33%|███▎      | 2043/6220 [01:00<02:10, 32.07it/s]
 33%|███▎      | 2047/6220 [01:00<02:05, 33.25it/s]
 33%|███▎      | 2051/6220 [01:01<02:01, 34.18it/s]
 33%|███▎      | 2055/6220 [01:01<02:18, 30.10it/s]
 33%|███▎      | 2059/6220 [01:01<02:07, 32.51it/s]
 33%|███▎      | 2063/6220 [01:01<02:08, 32.43it/s]
 33%|███▎      | 2067/6220 [01:01<02:03, 33.56it/s]
 33%|███▎      | 2071/6220 [01:01<02:00, 34.57it/s]
 33%|███▎      | 2075/6220 

Trial status: 12 TERMINATED | 1 RUNNING | 7 PENDING
Current time: 2023-09-11 14:46:26. Total running time: 1hr 26min 5s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00012   RUNNING          4.10053e-05                    2                 

[2m[36m(_objective pid=38439)[0m  39%|███▉      | 2443/6220 [01:12<01:50, 34.32it/s]
 39%|███▉      | 2447/6220 [01:12<01:47, 35.05it/s]
 39%|███▉      | 2451/6220 [01:12<01:47, 35.17it/s]
 39%|███▉      | 2455/6220 [01:13<01:54, 32.95it/s]
 40%|███▉      | 2459/6220 [01:13<01:49, 34.22it/s]
 40%|███▉      | 2463/6220 [01:13<01:59, 31.48it/s]
 40%|███▉      | 2467/6220 [01:13<01:59, 31.39it/s]
 40%|███▉      | 2471/6220 [01:13<01:56, 32.32it/s]
 40%|███▉      | 2475/6220 [01:13<01:52, 33.21it/s]
 40%|███▉      | 2479/6220 [01:13<01:53, 33.10it/s]
 40%|███▉      | 2483/6220 [01:13<01:54, 32.66it/s]
 40%|███▉      | 2487/6220 [01:14<01:50, 33.65it/s]
 40%|████      | 2491/6220 [01:14<01:50, 33.83it/s]
 40%|████      | 2495/6220 [01:14<01:46, 35.06it/s]


[2m[36m(_objective pid=38439)[0m {'loss': 0.2011, 'learning_rate': 2.4524077832175862e-05, 'epoch': 0.8}


[2m[36m(_objective pid=38439)[0m  40%|████      | 2500/6220 [01:14<01:41, 36.61it/s]                                                    40%|████      | 2500/6220 [01:14<01:41, 36.61it/s]
 40%|████      | 2504/6220 [01:14<01:41, 36.74it/s]
 40%|████      | 2508/6220 [01:14<01:55, 32.11it/s]
 40%|████      | 2512/6220 [01:14<01:52, 32.85it/s]
 40%|████      | 2516/6220 [01:14<01:59, 30.93it/s]
 41%|████      | 2520/6220 [01:15<01:57, 31.62it/s]
 41%|████      | 2524/6220 [01:15<01:50, 33.48it/s]
 41%|████      | 2528/6220 [01:15<01:49, 33.73it/s]
 41%|████      | 2532/6220 [01:15<01:55, 32.01it/s]
 41%|████      | 2536/6220 [01:15<01:49, 33.53it/s]
 41%|████      | 2540/6220 [01:15<01:46, 34.52it/s]
 41%|████      | 2544/6220 [01:15<01:43, 35.44it/s]
 41%|████      | 2548/6220 [01:15<01:42, 35.72it/s]
 41%|████      | 2552/6220 [01:16<01:43, 35.54it/s]
 41%|████      | 2556/6220 [01:16<01:42, 35.82it/s]
 41%|████      | 2560/6220 [01:16<01:40, 36.43it/s]
 41%|████      | 2564/6220

[2m[36m(_objective pid=38439)[0m {'loss': 0.2204, 'learning_rate': 2.1227830811722118e-05, 'epoch': 0.96}


[2m[36m(_objective pid=38439)[0m                                                     48%|████▊     | 3000/6220 [01:29<01:46, 30.16it/s] 48%|████▊     | 3002/6220 [01:29<01:46, 30.33it/s]
 48%|████▊     | 3006/6220 [01:29<01:47, 29.84it/s]
 48%|████▊     | 3010/6220 [01:29<01:39, 32.27it/s]
 48%|████▊     | 3014/6220 [01:29<01:37, 32.86it/s]
 49%|████▊     | 3018/6220 [01:30<01:34, 34.02it/s]
 49%|████▊     | 3022/6220 [01:30<01:30, 35.46it/s]
 49%|████▊     | 3026/6220 [01:30<01:27, 36.31it/s]
 49%|████▊     | 3030/6220 [01:30<01:38, 32.54it/s]
 49%|████▉     | 3034/6220 [01:30<01:33, 34.23it/s]
 49%|████▉     | 3038/6220 [01:30<01:31, 34.71it/s]
 49%|████▉     | 3042/6220 [01:30<01:33, 34.14it/s]
 49%|████▉     | 3046/6220 [01:30<01:30, 35.20it/s]
 49%|████▉     | 3050/6220 [01:30<01:30, 35.08it/s]
 49%|████▉     | 3054/6220 [01:31<01:32, 34.10it/s]
 49%|████▉     | 3058/6220 [01:31<01:30, 34.96it/s]
 49%|████▉     | 3062/6220 [01:31<01:29, 35.22it/s]
 49%|████▉     | 3066/6220

Trial _objective_f556c_00012 finished iteration 1 at 2023-09-11 14:46:56. Total running time: 1hr 26min 35s
+-------------------------------------------------+
| Trial _objective_f556c_00012 result             |
+-------------------------------------------------+
| time_this_iter_s                        104.536 |
| time_total_s                            104.536 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.19467 |
| eval_runtime                             9.5868 |
| eval_samples_per_second                 432.576 |
| eval_steps_per_second                     13.56 |
| objective                               0.19467 |
+-------------------------------------------------+

[2m[36m(_objective pid=38439)[0m {'eval_loss': 0.19467386603355408, 'eval_runtime': 9.5868, 'eval_samples_per_second': 432.576, 'eval_steps_per_second': 13.56, 'epoch': 1.0}


[2m[36m(_objective pid=38439)[0m                                                    
[2m[36m(_objective pid=38439)[0m                                                  [A 50%|█████     | 3110/6220 [01:42<01:27, 35.60it/s]
[2m[36m(_objective pid=38439)[0m 100%|██████████| 130/130 [00:09<00:00, 11.98it/s][A
[2m[36m(_objective pid=38439)[0m                                                  [A


Trial status: 12 TERMINATED | 1 RUNNING | 7 PENDING
Current time: 2023-09-11 14:46:56. Total running time: 1hr 26min 35s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00012   RUNNING          4.10053e-05                    2                

 50%|█████     | 3111/6220 [01:43<42:30,  1.22it/s]
 50%|█████     | 3115/6220 [01:43<30:30,  1.70it/s]
 50%|█████     | 3119/6220 [01:43<22:13,  2.33it/s]
 50%|█████     | 3123/6220 [01:44<16:03,  3.21it/s]
 50%|█████     | 3127/6220 [01:44<11:44,  4.39it/s]
 50%|█████     | 3131/6220 [01:44<08:39,  5.94it/s]
 50%|█████     | 3135/6220 [01:44<06:30,  7.89it/s]
 50%|█████     | 3139/6220 [01:44<05:02, 10.19it/s]
 51%|█████     | 3143/6220 [01:44<03:57, 12.95it/s]
 51%|█████     | 3147/6220 [01:44<03:12, 15.93it/s]
 51%|█████     | 3151/6220 [01:44<02:40, 19.18it/s]
 51%|█████     | 3155/6220 [01:44<02:19, 21.97it/s]
 51%|█████     | 3159/6220 [01:45<02:13, 22.97it/s]
 51%|█████     | 3163/6220 [01:45<02:15, 22.51it/s]
 51%|█████     | 3167/6220 [01:45<02:02, 24.89it/s]
 51%|█████     | 3170/6220 [01:45<02:04, 24.41it/s]
 51%|█████     | 3173/6220 [01:45<01:59, 25.52it/s]
 51%|█████     | 3177/6220 [01:45<01:46, 28.68it/s]
 51%|█████     | 3181/6220 [01:45<01:41, 29.96it/s]
 51%|█████  

[2m[36m(_objective pid=38439)[0m {'loss': 0.1402, 'learning_rate': 1.793158379126837e-05, 'epoch': 1.13}


[2m[36m(_objective pid=38439)[0m  56%|█████▋    | 3505/6220 [01:55<01:14, 36.34it/s]
 56%|█████▋    | 3509/6220 [01:55<01:15, 35.78it/s]
 56%|█████▋    | 3513/6220 [01:55<01:19, 33.87it/s]
 57%|█████▋    | 3517/6220 [01:55<01:23, 32.50it/s]
 57%|█████▋    | 3521/6220 [01:55<01:19, 33.88it/s]
 57%|█████▋    | 3525/6220 [01:55<01:26, 31.25it/s]
 57%|█████▋    | 3529/6220 [01:56<01:28, 30.57it/s]
 57%|█████▋    | 3533/6220 [01:56<01:23, 32.03it/s]
 57%|█████▋    | 3537/6220 [01:56<01:26, 31.12it/s]
 57%|█████▋    | 3541/6220 [01:56<01:22, 32.55it/s]
 57%|█████▋    | 3545/6220 [01:56<01:18, 34.04it/s]
 57%|█████▋    | 3549/6220 [01:56<01:17, 34.34it/s]
 57%|█████▋    | 3553/6220 [01:56<01:15, 35.39it/s]
 57%|█████▋    | 3557/6220 [01:56<01:13, 36.14it/s]
 57%|█████▋    | 3561/6220 [01:56<01:17, 34.43it/s]
 57%|█████▋    | 3565/6220 [01:57<01:21, 32.58it/s]
 57%|█████▋    | 3569/6220 [01:57<01:20, 32.81it/s]
 57%|█████▋    | 3573/6220 [01:57<01:28, 29.91it/s]
 58%|█████▊    | 3577/6220 

[2m[36m(_objective pid=38439)[0m {'loss': 0.1177, 'learning_rate': 1.4635336770814626e-05, 'epoch': 1.29}


[2m[36m(_objective pid=38439)[0m  64%|██████▍   | 4001/6220 [02:10<01:03, 34.74it/s]
 64%|██████▍   | 4005/6220 [02:10<01:10, 31.59it/s]
 64%|██████▍   | 4009/6220 [02:10<01:14, 29.87it/s]
 65%|██████▍   | 4013/6220 [02:10<01:10, 31.53it/s]
 65%|██████▍   | 4017/6220 [02:10<01:06, 33.26it/s]
 65%|██████▍   | 4021/6220 [02:10<01:03, 34.71it/s]
 65%|██████▍   | 4025/6220 [02:10<01:01, 35.59it/s]
 65%|██████▍   | 4029/6220 [02:11<00:59, 36.69it/s]
 65%|██████▍   | 4033/6220 [02:11<00:58, 37.57it/s]
 65%|██████▍   | 4037/6220 [02:11<00:58, 37.36it/s]
 65%|██████▍   | 4041/6220 [02:11<00:57, 38.05it/s]
 65%|██████▌   | 4045/6220 [02:11<00:56, 38.57it/s]
 65%|██████▌   | 4049/6220 [02:11<00:56, 38.64it/s]
 65%|██████▌   | 4053/6220 [02:11<00:55, 38.78it/s]
 65%|██████▌   | 4057/6220 [02:11<00:56, 38.06it/s]
 65%|██████▌   | 4061/6220 [02:11<00:57, 37.79it/s]
 65%|██████▌   | 4066/6220 [02:12<00:55, 38.54it/s]
 65%|██████▌   | 4070/6220 [02:12<00:55, 38.65it/s]
 65%|██████▌   | 4074/6220 

Trial status: 12 TERMINATED | 1 RUNNING | 7 PENDING
Current time: 2023-09-11 14:47:26. Total running time: 1hr 27min 5s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00012   RUNNING          4.10053e-05                    2                 

[2m[36m(_objective pid=38439)[0m  66%|██████▌   | 4094/6220 [02:12<00:59, 35.51it/s]
 66%|██████▌   | 4098/6220 [02:12<01:01, 34.41it/s]
 66%|██████▌   | 4102/6220 [02:13<00:59, 35.31it/s]
 66%|██████▌   | 4106/6220 [02:13<01:00, 35.00it/s]
 66%|██████▌   | 4110/6220 [02:13<01:00, 34.98it/s]
 66%|██████▌   | 4114/6220 [02:13<01:00, 34.87it/s]
 66%|██████▌   | 4118/6220 [02:13<01:01, 34.42it/s]
 66%|██████▋   | 4122/6220 [02:13<01:00, 34.62it/s]
 66%|██████▋   | 4126/6220 [02:13<00:59, 35.39it/s]
 66%|██████▋   | 4130/6220 [02:13<01:04, 32.57it/s]
 66%|██████▋   | 4134/6220 [02:14<01:02, 33.36it/s]
 67%|██████▋   | 4138/6220 [02:14<01:00, 34.33it/s]
 67%|██████▋   | 4142/6220 [02:14<01:00, 34.12it/s]
 67%|██████▋   | 4146/6220 [02:14<01:12, 28.73it/s]
 67%|██████▋   | 4150/6220 [02:14<01:07, 30.53it/s]
 67%|██████▋   | 4154/6220 [02:14<01:04, 32.17it/s]
 67%|██████▋   | 4158/6220 [02:14<01:02, 33.03it/s]
 67%|██████▋   | 4162/6220 [02:14<01:03, 32.40it/s]
 67%|██████▋   | 4166/6220 

[2m[36m(_objective pid=38439)[0m {'loss': 0.1161, 'learning_rate': 1.1339089750360882e-05, 'epoch': 1.45}


[2m[36m(_objective pid=38439)[0m  72%|███████▏  | 4505/6220 [02:24<00:50, 33.80it/s]
 72%|███████▏  | 4509/6220 [02:24<00:48, 35.27it/s]
 73%|███████▎  | 4513/6220 [02:24<00:47, 36.07it/s]
 73%|███████▎  | 4517/6220 [02:25<00:46, 36.31it/s]
 73%|███████▎  | 4521/6220 [02:25<00:46, 36.69it/s]
 73%|███████▎  | 4525/6220 [02:25<00:45, 36.98it/s]
 73%|███████▎  | 4529/6220 [02:25<00:45, 37.37it/s]
 73%|███████▎  | 4533/6220 [02:25<00:50, 33.33it/s]
 73%|███████▎  | 4537/6220 [02:25<00:51, 32.62it/s]
 73%|███████▎  | 4541/6220 [02:25<00:49, 33.92it/s]
 73%|███████▎  | 4545/6220 [02:25<00:50, 32.95it/s]
 73%|███████▎  | 4549/6220 [02:25<00:48, 34.32it/s]
 73%|███████▎  | 4553/6220 [02:26<00:46, 35.54it/s]
 73%|███████▎  | 4557/6220 [02:26<00:47, 35.08it/s]
 73%|███████▎  | 4561/6220 [02:26<00:52, 31.31it/s]
 73%|███████▎  | 4565/6220 [02:26<00:51, 32.19it/s]
 73%|███████▎  | 4569/6220 [02:26<00:49, 33.60it/s]
 74%|███████▎  | 4573/6220 [02:26<00:49, 33.41it/s]
 74%|███████▎  | 4577/6220 

[2m[36m(_objective pid=38439)[0m {'loss': 0.1276, 'learning_rate': 8.042842729907137e-06, 'epoch': 1.61}


[2m[36m(_objective pid=38439)[0m  80%|████████  | 5004/6220 [02:39<00:34, 34.82it/s]
 81%|████████  | 5008/6220 [02:39<00:34, 35.08it/s]
 81%|████████  | 5012/6220 [02:39<00:33, 36.24it/s]
 81%|████████  | 5016/6220 [02:40<00:34, 34.76it/s]
 81%|████████  | 5020/6220 [02:40<00:33, 35.55it/s]
 81%|████████  | 5024/6220 [02:40<00:36, 32.82it/s]
 81%|████████  | 5028/6220 [02:40<00:38, 30.81it/s]
 81%|████████  | 5032/6220 [02:40<00:36, 32.81it/s]
 81%|████████  | 5036/6220 [02:40<00:34, 34.67it/s]
 81%|████████  | 5040/6220 [02:40<00:33, 35.73it/s]
 81%|████████  | 5044/6220 [02:40<00:33, 34.75it/s]
 81%|████████  | 5048/6220 [02:41<00:37, 30.97it/s]
 81%|████████  | 5052/6220 [02:41<00:36, 31.70it/s]
 81%|████████▏ | 5056/6220 [02:41<00:38, 30.38it/s]
 81%|████████▏ | 5060/6220 [02:41<00:36, 32.20it/s]
 81%|████████▏ | 5064/6220 [02:41<00:33, 34.02it/s]
 81%|████████▏ | 5068/6220 [02:41<00:33, 34.20it/s]
 82%|████████▏ | 5073/6220 [02:41<00:31, 35.91it/s]
 82%|████████▏ | 5077/6220 

Trial status: 12 TERMINATED | 1 RUNNING | 7 PENDING
Current time: 2023-09-11 14:47:56. Total running time: 1hr 27min 35s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00012   RUNNING          4.10053e-05                    2                

[2m[36m(_objective pid=38439)[0m  82%|████████▏ | 5109/6220 [02:42<00:35, 30.94it/s]
 82%|████████▏ | 5113/6220 [02:42<00:33, 32.87it/s]
 82%|████████▏ | 5117/6220 [02:43<00:33, 32.92it/s]
 82%|████████▏ | 5121/6220 [02:43<00:32, 34.12it/s]
 82%|████████▏ | 5125/6220 [02:43<00:32, 33.99it/s]
 82%|████████▏ | 5129/6220 [02:43<00:34, 31.41it/s]
 83%|████████▎ | 5133/6220 [02:43<00:32, 33.39it/s]
 83%|████████▎ | 5137/6220 [02:43<00:31, 34.32it/s]
 83%|████████▎ | 5141/6220 [02:43<00:30, 35.73it/s]
 83%|████████▎ | 5145/6220 [02:43<00:29, 36.02it/s]
 83%|████████▎ | 5149/6220 [02:43<00:29, 36.59it/s]
 83%|████████▎ | 5153/6220 [02:44<00:29, 35.72it/s]
 83%|████████▎ | 5157/6220 [02:44<00:29, 35.75it/s]
 83%|████████▎ | 5161/6220 [02:44<00:30, 35.26it/s]
 83%|████████▎ | 5165/6220 [02:44<00:31, 33.81it/s]
 83%|████████▎ | 5169/6220 [02:44<00:33, 31.78it/s]
 83%|████████▎ | 5173/6220 [02:44<00:32, 32.05it/s]
 83%|████████▎ | 5177/6220 [02:44<00:32, 32.12it/s]
 83%|████████▎ | 5181/6220 

[2m[36m(_objective pid=38439)[0m {'loss': 0.13, 'learning_rate': 4.746595709453392e-06, 'epoch': 1.77}


[2m[36m(_objective pid=38439)[0m  88%|████████▊ | 5497/6220 [02:54<00:19, 37.00it/s]                                                    88%|████████▊ | 5500/6220 [02:54<00:19, 37.00it/s]
 88%|████████▊ | 5501/6220 [02:54<00:19, 37.48it/s]
 89%|████████▊ | 5505/6220 [02:54<00:18, 37.92it/s]
 89%|████████▊ | 5509/6220 [02:54<00:19, 36.88it/s]
 89%|████████▊ | 5513/6220 [02:54<00:19, 37.15it/s]
 89%|████████▊ | 5517/6220 [02:54<00:18, 37.72it/s]
 89%|████████▉ | 5521/6220 [02:54<00:20, 34.70it/s]
 89%|████████▉ | 5525/6220 [02:55<00:19, 35.69it/s]
 89%|████████▉ | 5529/6220 [02:55<00:18, 36.68it/s]
 89%|████████▉ | 5533/6220 [02:55<00:18, 36.82it/s]
 89%|████████▉ | 5537/6220 [02:55<00:19, 34.77it/s]
 89%|████████▉ | 5541/6220 [02:55<00:19, 34.87it/s]
 89%|████████▉ | 5545/6220 [02:55<00:18, 35.75it/s]
 89%|████████▉ | 5549/6220 [02:55<00:18, 36.26it/s]
 89%|████████▉ | 5553/6220 [02:55<00:17, 37.15it/s]
 89%|████████▉ | 5557/6220 [02:55<00:19, 33.44it/s]
 89%|████████▉ | 5561/6220

[2m[36m(_objective pid=38439)[0m {'loss': 0.1148, 'learning_rate': 1.4503486889996477e-06, 'epoch': 1.93}


[2m[36m(_objective pid=38439)[0m  97%|█████████▋| 6004/6220 [03:09<00:05, 36.68it/s]
 97%|█████████▋| 6008/6220 [03:09<00:05, 37.15it/s]
 97%|█████████▋| 6012/6220 [03:09<00:05, 36.82it/s]
 97%|█████████▋| 6016/6220 [03:09<00:05, 34.13it/s]
 97%|█████████▋| 6020/6220 [03:09<00:05, 34.94it/s]
 97%|█████████▋| 6024/6220 [03:09<00:05, 34.61it/s]
 97%|█████████▋| 6028/6220 [03:09<00:05, 32.75it/s]
 97%|█████████▋| 6032/6220 [03:09<00:05, 33.73it/s]
 97%|█████████▋| 6036/6220 [03:10<00:05, 35.19it/s]
 97%|█████████▋| 6040/6220 [03:10<00:05, 35.69it/s]
 97%|█████████▋| 6044/6220 [03:10<00:04, 35.46it/s]
 97%|█████████▋| 6048/6220 [03:10<00:05, 32.01it/s]
 97%|█████████▋| 6052/6220 [03:10<00:05, 32.81it/s]
 97%|█████████▋| 6056/6220 [03:10<00:05, 30.55it/s]
 97%|█████████▋| 6060/6220 [03:10<00:05, 31.31it/s]
 97%|█████████▋| 6064/6220 [03:10<00:04, 32.90it/s]
 98%|█████████▊| 6068/6220 [03:11<00:04, 32.56it/s]
 98%|█████████▊| 6072/6220 [03:11<00:04, 32.73it/s]
 98%|█████████▊| 6076/6220 

Trial status: 12 TERMINATED | 1 RUNNING | 7 PENDING
Current time: 2023-09-11 14:48:26. Total running time: 1hr 28min 5s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00012   RUNNING          4.10053e-05                    2                 

[2m[36m(_objective pid=38439)[0m  98%|█████████▊| 6122/6220 [03:12<00:02, 34.34it/s]
 98%|█████████▊| 6126/6220 [03:12<00:02, 34.62it/s]
 99%|█████████▊| 6130/6220 [03:13<00:02, 35.42it/s]
 99%|█████████▊| 6134/6220 [03:13<00:02, 35.20it/s]
 99%|█████████▊| 6138/6220 [03:13<00:02, 32.73it/s]
 99%|█████████▊| 6142/6220 [03:13<00:02, 31.78it/s]
 99%|█████████▉| 6146/6220 [03:13<00:02, 32.57it/s]
 99%|█████████▉| 6150/6220 [03:13<00:02, 34.09it/s]
 99%|█████████▉| 6154/6220 [03:13<00:02, 32.49it/s]
 99%|█████████▉| 6158/6220 [03:13<00:02, 30.35it/s]
 99%|█████████▉| 6162/6220 [03:14<00:01, 32.14it/s]
 99%|█████████▉| 6166/6220 [03:14<00:01, 32.24it/s]
 99%|█████████▉| 6170/6220 [03:14<00:01, 33.55it/s]
 99%|█████████▉| 6174/6220 [03:14<00:01, 34.83it/s]
 99%|█████████▉| 6178/6220 [03:14<00:01, 35.61it/s]
 99%|█████████▉| 6182/6220 [03:14<00:01, 34.97it/s]
 99%|█████████▉| 6186/6220 [03:14<00:00, 35.57it/s]
100%|█████████▉| 6190/6220 [03:14<00:00, 33.52it/s]
100%|█████████▉| 6194/6220 

Trial _objective_f556c_00012 finished iteration 2 at 2023-09-11 14:48:39. Total running time: 1hr 28min 18s
+-------------------------------------------------+
| Trial _objective_f556c_00012 result             |
+-------------------------------------------------+
| time_this_iter_s                        103.135 |
| time_total_s                            207.671 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                                0.1955 |
| eval_runtime                             9.6128 |
| eval_samples_per_second                 431.403 |
| eval_steps_per_second                    13.524 |
| objective                                0.1955 |
+-------------------------------------------------+

[2m[36m(_objective pid=38439)[0m {'eval_loss': 0.19549740850925446, 'eval_runtime': 9.6128, 'eval_samples_per_second': 431.403, 'eval_steps_per_second': 13.524, 'epoch': 2.0}
Trial _objective_f556c_00012 complete

[2m[36m(_objective pid=38439)[0m                                                    100%|██████████| 6220/6220 [03:26<00:00, 32.22it/s]100%|██████████| 6220/6220 [03:26<00:00, 30.07it/s]


Trial _objective_f556c_00013 started with configuration:
+-------------------------------------------------+
| Trial _objective_f556c_00013 config             |
+-------------------------------------------------+
| adam_epsilon                                  0 |
| learning_rate                             5e-05 |
| num_train_epochs                              4 |
| per_device_eval_batch_size                   32 |
| per_device_train_batch_size                   2 |
| weight_decay                            0.28835 |
+-------------------------------------------------+



[2m[36m(_objective pid=39398)[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_transform.weight']
[2m[36m(_objective pid=39398)[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
[2m[36m(_objective pid=39398)[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[2m[36m(_objective pid=39398)[0m Some weights of DistilBertForSequenceClassification were not initialized from the model che

Trial status: 13 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-11 14:48:56. Total running time: 1hr 28min 35s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00013   RUNNING          4.6875e-05                     4                

[2m[36m(_objective pid=39398)[0m   1%|          | 246/24880 [00:06<10:41, 38.39it/s]
  1%|          | 250/24880 [00:06<10:42, 38.32it/s]
  1%|          | 254/24880 [00:06<11:10, 36.75it/s]
  1%|          | 258/24880 [00:06<11:05, 37.00it/s]
  1%|          | 262/24880 [00:07<10:57, 37.47it/s]
  1%|          | 266/24880 [00:07<11:03, 37.11it/s]
  1%|          | 270/24880 [00:07<11:27, 35.79it/s]
  1%|          | 274/24880 [00:07<11:22, 36.05it/s]
  1%|          | 278/24880 [00:07<11:08, 36.79it/s]
  1%|          | 282/24880 [00:07<10:57, 37.40it/s]
  1%|          | 286/24880 [00:07<10:50, 37.81it/s]
  1%|          | 290/24880 [00:07<11:06, 36.91it/s]
  1%|          | 294/24880 [00:07<11:24, 35.93it/s]
  1%|          | 298/24880 [00:08<11:22, 36.04it/s]
  1%|          | 302/24880 [00:08<11:04, 37.00it/s]
  1%|          | 306/24880 [00:08<10:51, 37.72it/s]
  1%|          | 310/24880 [00:08<10:44, 38.13it/s]
  1%|▏         | 314/24880 [00:08<10:38, 38.46it/s]
  1%|▏         | 319/24880 

[2m[36m(_objective pid=39398)[0m {'loss': 0.5307, 'learning_rate': 4.59329483116778e-05, 'epoch': 0.08}


[2m[36m(_objective pid=39398)[0m   2%|▏         | 500/24880 [00:13<11:20, 35.84it/s]                                                     2%|▏         | 500/24880 [00:13<11:20, 35.84it/s]
  2%|▏         | 504/24880 [00:13<11:26, 35.53it/s]
  2%|▏         | 508/24880 [00:13<11:10, 36.36it/s]
  2%|▏         | 512/24880 [00:13<11:42, 34.71it/s]
  2%|▏         | 516/24880 [00:14<11:32, 35.18it/s]
  2%|▏         | 520/24880 [00:14<11:32, 35.15it/s]
  2%|▏         | 524/24880 [00:14<11:21, 35.76it/s]
  2%|▏         | 528/24880 [00:14<11:12, 36.21it/s]
  2%|▏         | 532/24880 [00:14<10:55, 37.14it/s]
  2%|▏         | 536/24880 [00:14<11:03, 36.71it/s]
  2%|▏         | 540/24880 [00:14<11:08, 36.43it/s]
  2%|▏         | 544/24880 [00:14<10:56, 37.09it/s]
  2%|▏         | 548/24880 [00:14<10:42, 37.86it/s]
  2%|▏         | 552/24880 [00:14<10:37, 38.13it/s]
  2%|▏         | 556/24880 [00:15<10:30, 38.60it/s]
  2%|▏         | 560/24880 [00:15<10:24, 38.96it/s]
  2%|▏         | 564/24880

[2m[36m(_objective pid=39398)[0m {'loss': 0.4862, 'learning_rate': 4.499092722243092e-05, 'epoch': 0.16}


[2m[36m(_objective pid=39398)[0m   4%|▍         | 1007/24880 [00:26<10:45, 36.96it/s]
  4%|▍         | 1011/24880 [00:27<10:51, 36.62it/s]
  4%|▍         | 1015/24880 [00:27<11:17, 35.21it/s]
  4%|▍         | 1019/24880 [00:27<11:24, 34.87it/s]
  4%|▍         | 1023/24880 [00:27<11:18, 35.17it/s]
  4%|▍         | 1027/24880 [00:27<11:38, 34.14it/s]
  4%|▍         | 1031/24880 [00:27<11:21, 35.02it/s]
  4%|▍         | 1035/24880 [00:27<11:17, 35.22it/s]
  4%|▍         | 1039/24880 [00:27<11:02, 36.00it/s]
  4%|▍         | 1043/24880 [00:28<11:16, 35.21it/s]
  4%|▍         | 1047/24880 [00:28<10:56, 36.30it/s]
  4%|▍         | 1051/24880 [00:28<11:11, 35.48it/s]
  4%|▍         | 1055/24880 [00:28<11:00, 36.09it/s]
  4%|▍         | 1059/24880 [00:28<11:07, 35.70it/s]
  4%|▍         | 1063/24880 [00:28<10:58, 36.16it/s]
  4%|▍         | 1067/24880 [00:28<11:08, 35.61it/s]
  4%|▍         | 1071/24880 [00:28<11:11, 35.48it/s]
  4%|▍         | 1075/24880 [00:28<11:27, 34.64it/s]
  4%|▍   

Trial status: 13 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-11 14:49:26. Total running time: 1hr 29min 5s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00013   RUNNING          4.6875e-05                     4                 

[2m[36m(_objective pid=39398)[0m   5%|▌         | 1361/24880 [00:36<10:35, 36.98it/s]
  5%|▌         | 1365/24880 [00:36<10:30, 37.27it/s]
  6%|▌         | 1369/24880 [00:36<10:24, 37.65it/s]
  6%|▌         | 1373/24880 [00:36<10:24, 37.65it/s]
  6%|▌         | 1378/24880 [00:37<10:09, 38.58it/s]
  6%|▌         | 1382/24880 [00:37<10:27, 37.46it/s]
  6%|▌         | 1386/24880 [00:37<10:24, 37.63it/s]
  6%|▌         | 1390/24880 [00:37<10:18, 37.95it/s]
  6%|▌         | 1394/24880 [00:37<10:14, 38.20it/s]
  6%|▌         | 1398/24880 [00:37<10:13, 38.26it/s]
  6%|▌         | 1402/24880 [00:37<10:12, 38.36it/s]
  6%|▌         | 1406/24880 [00:37<10:11, 38.37it/s]
  6%|▌         | 1410/24880 [00:37<10:10, 38.47it/s]
  6%|▌         | 1414/24880 [00:38<10:20, 37.84it/s]
  6%|▌         | 1418/24880 [00:38<10:13, 38.23it/s]
  6%|▌         | 1422/24880 [00:38<10:14, 38.20it/s]
  6%|▌         | 1426/24880 [00:38<10:14, 38.15it/s]
  6%|▌         | 1430/24880 [00:38<10:11, 38.38it/s]
  6%|▌   

[2m[36m(_objective pid=39398)[0m {'loss': 0.4247, 'learning_rate': 4.404890613318404e-05, 'epoch': 0.24}


[2m[36m(_objective pid=39398)[0m                                                       6%|▌         | 1500/24880 [00:40<11:29, 33.93it/s]  6%|▌         | 1502/24880 [00:40<11:11, 34.83it/s]
  6%|▌         | 1506/24880 [00:40<11:01, 35.31it/s]
  6%|▌         | 1510/24880 [00:40<11:05, 35.14it/s]
  6%|▌         | 1514/24880 [00:40<11:02, 35.29it/s]
  6%|▌         | 1518/24880 [00:40<11:03, 35.22it/s]
  6%|▌         | 1522/24880 [00:41<10:53, 35.73it/s]
  6%|▌         | 1526/24880 [00:41<10:40, 36.46it/s]
  6%|▌         | 1530/24880 [00:41<10:37, 36.60it/s]
  6%|▌         | 1534/24880 [00:41<10:32, 36.90it/s]
  6%|▌         | 1538/24880 [00:41<10:29, 37.07it/s]
  6%|▌         | 1542/24880 [00:41<10:25, 37.30it/s]
  6%|▌         | 1546/24880 [00:41<10:23, 37.41it/s]
  6%|▌         | 1550/24880 [00:41<10:30, 37.00it/s]
  6%|▌         | 1554/24880 [00:41<10:42, 36.32it/s]
  6%|▋         | 1558/24880 [00:42<11:07, 34.96it/s]
  6%|▋         | 1562/24880 [00:42<10:52, 35.75it/s]
  6%|▋  

[2m[36m(_objective pid=39398)[0m {'loss': 0.4564, 'learning_rate': 4.310688504393716e-05, 'epoch': 0.32}


[2m[36m(_objective pid=39398)[0m   8%|▊         | 2006/24880 [00:54<09:29, 40.14it/s]
  8%|▊         | 2011/24880 [00:54<09:26, 40.36it/s]
  8%|▊         | 2016/24880 [00:54<09:25, 40.45it/s]
  8%|▊         | 2021/24880 [00:54<09:22, 40.67it/s]
  8%|▊         | 2026/24880 [00:54<09:23, 40.58it/s]
  8%|▊         | 2031/24880 [00:54<09:31, 39.99it/s]
  8%|▊         | 2036/24880 [00:54<09:34, 39.77it/s]
  8%|▊         | 2040/24880 [00:54<09:33, 39.80it/s]
  8%|▊         | 2044/24880 [00:54<09:40, 39.32it/s]
  8%|▊         | 2048/24880 [00:55<09:39, 39.39it/s]
  8%|▊         | 2052/24880 [00:55<09:37, 39.53it/s]
  8%|▊         | 2056/24880 [00:55<09:35, 39.63it/s]
  8%|▊         | 2061/24880 [00:55<09:27, 40.21it/s]
  8%|▊         | 2066/24880 [00:55<09:28, 40.13it/s]
  8%|▊         | 2071/24880 [00:55<09:32, 39.81it/s]
  8%|▊         | 2075/24880 [00:55<09:33, 39.73it/s]
  8%|▊         | 2080/24880 [00:55<09:28, 40.11it/s]
  8%|▊         | 2085/24880 [00:56<09:30, 39.97it/s]
  8%|▊   

Trial status: 13 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-11 14:49:57. Total running time: 1hr 29min 35s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00013   RUNNING          4.6875e-05                     4                

[2m[36m(_objective pid=39398)[0m  10%|▉         | 2474/24880 [01:06<10:04, 37.04it/s]
 10%|▉         | 2478/24880 [01:06<09:58, 37.44it/s]
 10%|▉         | 2482/24880 [01:06<09:48, 38.06it/s]
 10%|▉         | 2486/24880 [01:06<09:43, 38.37it/s]
 10%|█         | 2490/24880 [01:07<09:55, 37.59it/s]
 10%|█         | 2494/24880 [01:07<09:51, 37.82it/s]
 10%|█         | 2498/24880 [01:07<09:53, 37.72it/s]
 10%|█         | 2502/24880 [01:07<09:52, 37.80it/s]


[2m[36m(_objective pid=39398)[0m {'loss': 0.4196, 'learning_rate': 4.2164863954690285e-05, 'epoch': 0.4}


[2m[36m(_objective pid=39398)[0m  10%|█         | 2507/24880 [01:07<09:39, 38.62it/s]
 10%|█         | 2511/24880 [01:07<09:43, 38.32it/s]
 10%|█         | 2515/24880 [01:07<09:41, 38.44it/s]
 10%|█         | 2519/24880 [01:07<09:39, 38.60it/s]
 10%|█         | 2524/24880 [01:07<09:28, 39.30it/s]
 10%|█         | 2528/24880 [01:07<09:38, 38.65it/s]
 10%|█         | 2533/24880 [01:08<09:34, 38.92it/s]
 10%|█         | 2538/24880 [01:08<09:28, 39.33it/s]
 10%|█         | 2542/24880 [01:08<09:26, 39.42it/s]
 10%|█         | 2546/24880 [01:08<09:32, 39.00it/s]
 10%|█         | 2550/24880 [01:08<09:29, 39.18it/s]
 10%|█         | 2554/24880 [01:08<09:28, 39.25it/s]
 10%|█         | 2558/24880 [01:08<09:35, 38.78it/s]
 10%|█         | 2562/24880 [01:08<09:35, 38.81it/s]
 10%|█         | 2566/24880 [01:08<09:32, 38.98it/s]
 10%|█         | 2570/24880 [01:09<09:29, 39.15it/s]
 10%|█         | 2575/24880 [01:09<09:25, 39.44it/s]
 10%|█         | 2579/24880 [01:09<09:32, 38.92it/s]
 10%|█   

[2m[36m(_objective pid=39398)[0m {'loss': 0.3719, 'learning_rate': 4.122284286544341e-05, 'epoch': 0.48}


[2m[36m(_objective pid=39398)[0m                                                      12%|█▏        | 3000/24880 [01:20<10:00, 36.44it/s] 12%|█▏        | 3002/24880 [01:20<09:53, 36.84it/s]
 12%|█▏        | 3006/24880 [01:20<09:53, 36.85it/s]
 12%|█▏        | 3010/24880 [01:20<09:48, 37.15it/s]
 12%|█▏        | 3014/24880 [01:21<09:46, 37.27it/s]
 12%|█▏        | 3018/24880 [01:21<09:41, 37.62it/s]
 12%|█▏        | 3022/24880 [01:21<09:51, 36.97it/s]
 12%|█▏        | 3026/24880 [01:21<09:58, 36.49it/s]
 12%|█▏        | 3030/24880 [01:21<10:06, 36.02it/s]
 12%|█▏        | 3034/24880 [01:21<09:53, 36.81it/s]
 12%|█▏        | 3038/24880 [01:21<09:45, 37.30it/s]
 12%|█▏        | 3042/24880 [01:21<09:38, 37.74it/s]
 12%|█▏        | 3046/24880 [01:21<09:37, 37.84it/s]
 12%|█▏        | 3051/24880 [01:22<09:29, 38.36it/s]
 12%|█▏        | 3055/24880 [01:22<09:36, 37.87it/s]
 12%|█▏        | 3059/24880 [01:22<09:34, 37.97it/s]
 12%|█▏        | 3063/24880 [01:22<09:30, 38.21it/s]
 12%|█▏ 

[2m[36m(_objective pid=39398)[0m {'loss': 0.3784, 'learning_rate': 4.028082177619653e-05, 'epoch': 0.56}


[2m[36m(_objective pid=39398)[0m  14%|█▍        | 3499/24880 [01:34<10:00, 35.61it/s]                                                     14%|█▍        | 3500/24880 [01:34<10:00, 35.61it/s]
 14%|█▍        | 3503/24880 [01:34<09:48, 36.31it/s]
 14%|█▍        | 3507/24880 [01:34<09:41, 36.77it/s]
 14%|█▍        | 3511/24880 [01:34<09:45, 36.49it/s]
 14%|█▍        | 3515/24880 [01:34<09:37, 37.02it/s]
 14%|█▍        | 3519/24880 [01:34<09:42, 36.69it/s]
 14%|█▍        | 3523/24880 [01:34<09:55, 35.86it/s]
 14%|█▍        | 3527/24880 [01:34<09:53, 35.98it/s]
 14%|█▍        | 3531/24880 [01:35<09:47, 36.34it/s]
 14%|█▍        | 3535/24880 [01:35<09:52, 36.05it/s]
 14%|█▍        | 3539/24880 [01:35<09:45, 36.42it/s]
 14%|█▍        | 3543/24880 [01:35<10:10, 34.98it/s]
 14%|█▍        | 3547/24880 [01:35<10:00, 35.54it/s]
 14%|█▍        | 3551/24880 [01:35<10:28, 33.92it/s]
 14%|█▍        | 3555/24880 [01:35<10:41, 33.25it/s]
 14%|█▍        | 3559/24880 [01:35<10:23, 34.17it/s]
 14%|█▍ 

Trial status: 13 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-11 14:50:27. Total running time: 1hr 30min 5s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00013   RUNNING          4.6875e-05                     4                 

[2m[36m(_objective pid=39398)[0m  14%|█▍        | 3591/24880 [01:36<09:40, 36.66it/s]
 14%|█▍        | 3595/24880 [01:36<09:30, 37.32it/s]
 14%|█▍        | 3599/24880 [01:36<09:51, 35.95it/s]
 14%|█▍        | 3603/24880 [01:37<09:41, 36.61it/s]
 14%|█▍        | 3607/24880 [01:37<09:38, 36.77it/s]
 15%|█▍        | 3611/24880 [01:37<09:43, 36.46it/s]
 15%|█▍        | 3615/24880 [01:37<10:06, 35.05it/s]
 15%|█▍        | 3619/24880 [01:37<10:18, 34.40it/s]
 15%|█▍        | 3623/24880 [01:37<09:53, 35.81it/s]
 15%|█▍        | 3627/24880 [01:37<09:56, 35.60it/s]
 15%|█▍        | 3631/24880 [01:37<09:40, 36.60it/s]
 15%|█▍        | 3636/24880 [01:37<09:18, 38.02it/s]
 15%|█▍        | 3641/24880 [01:38<09:04, 39.00it/s]
 15%|█▍        | 3646/24880 [01:38<08:57, 39.50it/s]
 15%|█▍        | 3650/24880 [01:38<08:56, 39.57it/s]
 15%|█▍        | 3655/24880 [01:38<08:59, 39.31it/s]
 15%|█▍        | 3660/24880 [01:38<08:55, 39.61it/s]
 15%|█▍        | 3665/24880 [01:38<08:51, 39.88it/s]
 15%|█▍  

[2m[36m(_objective pid=39398)[0m {'loss': 0.2846, 'learning_rate': 3.933880068694965e-05, 'epoch': 0.64}


[2m[36m(_objective pid=39398)[0m  16%|█▌        | 4002/24880 [01:47<09:04, 38.34it/s]
 16%|█▌        | 4006/24880 [01:47<09:10, 37.89it/s]
 16%|█▌        | 4010/24880 [01:47<09:14, 37.63it/s]
 16%|█▌        | 4014/24880 [01:47<09:13, 37.69it/s]
 16%|█▌        | 4018/24880 [01:47<09:09, 37.97it/s]
 16%|█▌        | 4022/24880 [01:47<09:15, 37.57it/s]
 16%|█▌        | 4026/24880 [01:47<09:05, 38.20it/s]
 16%|█▌        | 4031/24880 [01:48<08:54, 38.97it/s]
 16%|█▌        | 4036/24880 [01:48<09:09, 37.93it/s]
 16%|█▌        | 4040/24880 [01:48<09:07, 38.09it/s]
 16%|█▋        | 4044/24880 [01:48<09:13, 37.65it/s]
 16%|█▋        | 4048/24880 [01:48<09:11, 37.75it/s]
 16%|█▋        | 4052/24880 [01:48<09:33, 36.34it/s]
 16%|█▋        | 4056/24880 [01:48<09:20, 37.17it/s]
 16%|█▋        | 4060/24880 [01:48<09:12, 37.65it/s]
 16%|█▋        | 4064/24880 [01:49<09:07, 38.05it/s]
 16%|█▋        | 4068/24880 [01:49<08:59, 38.60it/s]
 16%|█▋        | 4072/24880 [01:49<08:58, 38.66it/s]
 16%|█▋  

[2m[36m(_objective pid=39398)[0m {'loss': 0.2658, 'learning_rate': 3.839677959770277e-05, 'epoch': 0.72}


[2m[36m(_objective pid=39398)[0m  18%|█▊        | 4504/24880 [02:00<08:49, 38.46it/s]
 18%|█▊        | 4508/24880 [02:00<08:48, 38.56it/s]
 18%|█▊        | 4512/24880 [02:00<08:44, 38.83it/s]
 18%|█▊        | 4516/24880 [02:00<08:40, 39.11it/s]
 18%|█▊        | 4521/24880 [02:01<08:32, 39.76it/s]
 18%|█▊        | 4525/24880 [02:01<08:55, 38.04it/s]
 18%|█▊        | 4529/24880 [02:01<08:50, 38.37it/s]
 18%|█▊        | 4534/24880 [02:01<08:43, 38.84it/s]
 18%|█▊        | 4539/24880 [02:01<08:35, 39.43it/s]
 18%|█▊        | 4544/24880 [02:01<08:30, 39.80it/s]
 18%|█▊        | 4549/24880 [02:01<08:27, 40.07it/s]
 18%|█▊        | 4554/24880 [02:01<08:24, 40.31it/s]
 18%|█▊        | 4559/24880 [02:02<08:22, 40.41it/s]
 18%|█▊        | 4564/24880 [02:02<08:41, 38.98it/s]
 18%|█▊        | 4569/24880 [02:02<08:37, 39.25it/s]
 18%|█▊        | 4573/24880 [02:02<08:35, 39.40it/s]
 18%|█▊        | 4577/24880 [02:02<08:35, 39.36it/s]
 18%|█▊        | 4582/24880 [02:02<08:32, 39.64it/s]
 18%|█▊  

Trial status: 13 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-11 14:50:57. Total running time: 1hr 30min 35s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00013   RUNNING          4.6875e-05                     4                

[2m[36m(_objective pid=39398)[0m  19%|█▉        | 4735/24880 [02:06<09:35, 35.03it/s]
 19%|█▉        | 4739/24880 [02:06<09:32, 35.17it/s]
 19%|█▉        | 4743/24880 [02:07<09:35, 34.96it/s]
 19%|█▉        | 4747/24880 [02:07<09:33, 35.11it/s]
 19%|█▉        | 4751/24880 [02:07<09:41, 34.62it/s]
 19%|█▉        | 4755/24880 [02:07<09:45, 34.37it/s]
 19%|█▉        | 4759/24880 [02:07<09:37, 34.84it/s]
 19%|█▉        | 4763/24880 [02:07<09:58, 33.59it/s]
 19%|█▉        | 4767/24880 [02:07<09:51, 34.01it/s]
 19%|█▉        | 4771/24880 [02:07<09:50, 34.07it/s]
 19%|█▉        | 4775/24880 [02:07<09:53, 33.89it/s]
 19%|█▉        | 4779/24880 [02:08<09:45, 34.33it/s]
 19%|█▉        | 4783/24880 [02:08<09:52, 33.90it/s]
 19%|█▉        | 4787/24880 [02:08<09:47, 34.19it/s]
 19%|█▉        | 4791/24880 [02:08<09:52, 33.92it/s]
 19%|█▉        | 4795/24880 [02:08<09:54, 33.80it/s]
 19%|█▉        | 4799/24880 [02:08<09:54, 33.80it/s]
 19%|█▉        | 4803/24880 [02:08<09:54, 33.78it/s]
 19%|█▉  

[2m[36m(_objective pid=39398)[0m {'loss': 0.2409, 'learning_rate': 3.745475850845589e-05, 'epoch': 0.8}


[2m[36m(_objective pid=39398)[0m  20%|██        | 4999/24880 [02:13<08:32, 38.82it/s]                                                     20%|██        | 5000/24880 [02:14<08:32, 38.82it/s]
 20%|██        | 5004/24880 [02:14<08:26, 39.20it/s]
 20%|██        | 5009/24880 [02:14<08:20, 39.70it/s]
 20%|██        | 5013/24880 [02:14<08:43, 37.95it/s]
 20%|██        | 5017/24880 [02:14<08:38, 38.29it/s]
 20%|██        | 5021/24880 [02:14<08:37, 38.37it/s]
 20%|██        | 5026/24880 [02:14<08:46, 37.68it/s]
 20%|██        | 5031/24880 [02:14<08:34, 38.56it/s]
 20%|██        | 5036/24880 [02:14<08:26, 39.18it/s]
 20%|██        | 5040/24880 [02:15<08:30, 38.88it/s]
 20%|██        | 5045/24880 [02:15<08:19, 39.73it/s]
 20%|██        | 5049/24880 [02:15<08:18, 39.77it/s]
 20%|██        | 5054/24880 [02:15<08:15, 40.02it/s]
 20%|██        | 5058/24880 [02:15<08:31, 38.75it/s]
 20%|██        | 5063/24880 [02:15<08:25, 39.22it/s]
 20%|██        | 5067/24880 [02:15<08:24, 39.25it/s]
 20%|██ 

[2m[36m(_objective pid=39398)[0m {'loss': 0.2812, 'learning_rate': 3.6512737419209014e-05, 'epoch': 0.88}


[2m[36m(_objective pid=39398)[0m  22%|██▏       | 5507/24880 [02:27<08:10, 39.52it/s]
 22%|██▏       | 5511/24880 [02:27<08:12, 39.35it/s]
 22%|██▏       | 5515/24880 [02:27<08:32, 37.77it/s]
 22%|██▏       | 5520/24880 [02:27<08:22, 38.56it/s]
 22%|██▏       | 5525/24880 [02:27<08:14, 39.15it/s]
 22%|██▏       | 5530/24880 [02:28<08:08, 39.59it/s]
 22%|██▏       | 5535/24880 [02:28<08:05, 39.81it/s]
 22%|██▏       | 5540/24880 [02:28<08:04, 39.93it/s]
 22%|██▏       | 5545/24880 [02:28<08:06, 39.71it/s]
 22%|██▏       | 5550/24880 [02:28<08:02, 40.02it/s]
 22%|██▏       | 5555/24880 [02:28<08:09, 39.44it/s]
 22%|██▏       | 5559/24880 [02:28<08:27, 38.04it/s]
 22%|██▏       | 5564/24880 [02:28<08:17, 38.79it/s]
 22%|██▏       | 5568/24880 [02:29<08:19, 38.64it/s]
 22%|██▏       | 5572/24880 [02:29<08:21, 38.46it/s]
 22%|██▏       | 5576/24880 [02:29<08:21, 38.49it/s]
 22%|██▏       | 5580/24880 [02:29<08:20, 38.56it/s]
 22%|██▏       | 5584/24880 [02:29<08:23, 38.31it/s]
 22%|██▏ 

Trial status: 13 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-11 14:51:27. Total running time: 1hr 31min 5s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00013   RUNNING          4.6875e-05                     4                 

[2m[36m(_objective pid=39398)[0m  24%|██▎       | 5867/24880 [02:36<08:20, 38.02it/s]
 24%|██▎       | 5871/24880 [02:37<08:31, 37.18it/s]
 24%|██▎       | 5875/24880 [02:37<08:31, 37.18it/s]
 24%|██▎       | 5879/24880 [02:37<08:40, 36.50it/s]
 24%|██▎       | 5883/24880 [02:37<08:36, 36.76it/s]
 24%|██▎       | 5887/24880 [02:37<08:46, 36.06it/s]
 24%|██▎       | 5891/24880 [02:37<08:42, 36.36it/s]
 24%|██▎       | 5895/24880 [02:37<08:36, 36.74it/s]
 24%|██▎       | 5899/24880 [02:37<08:28, 37.34it/s]
 24%|██▎       | 5903/24880 [02:37<08:29, 37.27it/s]
 24%|██▎       | 5907/24880 [02:38<08:26, 37.44it/s]
 24%|██▍       | 5911/24880 [02:38<08:36, 36.73it/s]
 24%|██▍       | 5915/24880 [02:38<08:27, 37.38it/s]
 24%|██▍       | 5919/24880 [02:38<08:30, 37.15it/s]
 24%|██▍       | 5923/24880 [02:38<08:26, 37.41it/s]
 24%|██▍       | 5927/24880 [02:38<08:32, 37.02it/s]
 24%|██▍       | 5931/24880 [02:38<08:37, 36.59it/s]
 24%|██▍       | 5935/24880 [02:38<09:02, 34.91it/s]
 24%|██▍ 

[2m[36m(_objective pid=39398)[0m {'loss': 0.3153, 'learning_rate': 3.557071632996213e-05, 'epoch': 0.96}


[2m[36m(_objective pid=39398)[0m  24%|██▍       | 6003/24880 [02:40<08:42, 36.10it/s]
 24%|██▍       | 6007/24880 [02:40<08:46, 35.88it/s]
 24%|██▍       | 6011/24880 [02:40<09:05, 34.57it/s]
 24%|██▍       | 6015/24880 [02:41<08:59, 34.99it/s]
 24%|██▍       | 6019/24880 [02:41<08:46, 35.82it/s]
 24%|██▍       | 6023/24880 [02:41<08:49, 35.60it/s]
 24%|██▍       | 6027/24880 [02:41<08:44, 35.95it/s]
 24%|██▍       | 6031/24880 [02:41<08:42, 36.09it/s]
 24%|██▍       | 6035/24880 [02:41<08:35, 36.57it/s]
 24%|██▍       | 6039/24880 [02:41<08:34, 36.61it/s]
 24%|██▍       | 6043/24880 [02:41<08:37, 36.37it/s]
 24%|██▍       | 6047/24880 [02:41<08:44, 35.94it/s]
 24%|██▍       | 6051/24880 [02:42<08:40, 36.18it/s]
 24%|██▍       | 6055/24880 [02:42<08:40, 36.16it/s]
 24%|██▍       | 6059/24880 [02:42<08:57, 35.02it/s]
 24%|██▍       | 6063/24880 [02:42<09:05, 34.52it/s]
 24%|██▍       | 6067/24880 [02:42<08:55, 35.10it/s]
 24%|██▍       | 6071/24880 [02:42<08:56, 35.06it/s]
 24%|██▍ 

Trial _objective_f556c_00013 finished iteration 1 at 2023-09-11 14:51:46. Total running time: 1hr 31min 25s
+-------------------------------------------------+
| Trial _objective_f556c_00013 result             |
+-------------------------------------------------+
| time_this_iter_s                        178.478 |
| time_total_s                            178.478 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.27074 |
| eval_runtime                             9.5454 |
| eval_samples_per_second                 434.448 |
| eval_steps_per_second                    13.619 |
| objective                               0.27074 |
+-------------------------------------------------+

[2m[36m(_objective pid=39398)[0m {'eval_loss': 0.2707449197769165, 'eval_runtime': 9.5454, 'eval_samples_per_second': 434.448, 'eval_steps_per_second': 13.619, 'epoch': 1.0}


[2m[36m(_objective pid=39398)[0m  25%|██▌       | 6224/24880 [02:57<4:24:02,  1.18it/s]
 25%|██▌       | 6228/24880 [02:57<3:07:21,  1.66it/s]
 25%|██▌       | 6232/24880 [02:57<2:13:40,  2.32it/s]
 25%|██▌       | 6236/24880 [02:57<1:36:34,  3.22it/s]
 25%|██▌       | 6240/24880 [02:58<1:10:40,  4.40it/s]
 25%|██▌       | 6244/24880 [02:58<52:09,  5.96it/s]  
 25%|██▌       | 6248/24880 [02:58<39:18,  7.90it/s]
 25%|██▌       | 6252/24880 [02:58<30:08, 10.30it/s]
 25%|██▌       | 6256/24880 [02:58<23:47, 13.05it/s]
 25%|██▌       | 6260/24880 [02:58<19:22, 16.02it/s]
 25%|██▌       | 6264/24880 [02:58<16:22, 18.95it/s]
 25%|██▌       | 6268/24880 [02:58<14:09, 21.91it/s]
 25%|██▌       | 6272/24880 [02:59<12:52, 24.10it/s]
 25%|██▌       | 6276/24880 [02:59<11:43, 26.44it/s]
 25%|██▌       | 6280/24880 [02:59<10:50, 28.60it/s]
 25%|██▌       | 6284/24880 [02:59<10:18, 30.06it/s]
 25%|██▌       | 6288/24880 [02:59<09:46, 31.68it/s]
 25%|██▌       | 6292/24880 [02:59<09:27, 32.75it/

[2m[36m(_objective pid=39398)[0m {'loss': 0.2633, 'learning_rate': 3.462869524071526e-05, 'epoch': 1.05}


[2m[36m(_objective pid=39398)[0m                                                      26%|██▌       | 6500/24880 [03:05<07:59, 38.31it/s]
 26%|██▌       | 6501/24880 [03:05<07:57, 38.48it/s]
 26%|██▌       | 6505/24880 [03:05<07:56, 38.55it/s]
 26%|██▌       | 6509/24880 [03:05<07:55, 38.65it/s]
 26%|██▌       | 6513/24880 [03:05<07:58, 38.40it/s]
 26%|██▌       | 6518/24880 [03:05<07:50, 39.07it/s]
 26%|██▌       | 6522/24880 [03:05<07:46, 39.32it/s]
 26%|██▌       | 6526/24880 [03:05<07:52, 38.81it/s]
 26%|██▌       | 6530/24880 [03:05<07:53, 38.76it/s]
 26%|██▋       | 6535/24880 [03:05<07:47, 39.24it/s]
 26%|██▋       | 6540/24880 [03:06<07:43, 39.60it/s]
 26%|██▋       | 6544/24880 [03:06<07:49, 39.04it/s]
 26%|██▋       | 6549/24880 [03:06<07:43, 39.54it/s]
 26%|██▋       | 6553/24880 [03:06<07:47, 39.23it/s]
 26%|██▋       | 6557/24880 [03:06<08:02, 37.95it/s]
 26%|██▋       | 6561/24880 [03:06<07:58, 38.28it/s]


Trial status: 13 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-11 14:51:57. Total running time: 1hr 31min 35s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00013   RUNNING          4.6875e-05                     4                

[2m[36m(_objective pid=39398)[0m  26%|██▋       | 6566/24880 [03:06<07:53, 38.65it/s]
 26%|██▋       | 6570/24880 [03:06<07:56, 38.43it/s]
 26%|██▋       | 6575/24880 [03:07<07:47, 39.17it/s]
 26%|██▋       | 6579/24880 [03:07<07:46, 39.26it/s]
 26%|██▋       | 6583/24880 [03:07<08:10, 37.28it/s]
 26%|██▋       | 6587/24880 [03:07<08:01, 37.95it/s]
 26%|██▋       | 6591/24880 [03:07<08:00, 38.07it/s]
 27%|██▋       | 6595/24880 [03:07<08:03, 37.82it/s]
 27%|██▋       | 6599/24880 [03:07<07:59, 38.14it/s]
 27%|██▋       | 6603/24880 [03:07<08:01, 37.92it/s]
 27%|██▋       | 6607/24880 [03:07<07:56, 38.32it/s]
 27%|██▋       | 6611/24880 [03:07<08:09, 37.32it/s]
 27%|██▋       | 6616/24880 [03:08<07:57, 38.27it/s]
 27%|██▋       | 6620/24880 [03:08<08:08, 37.36it/s]
 27%|██▋       | 6624/24880 [03:08<08:08, 37.37it/s]
 27%|██▋       | 6628/24880 [03:08<08:12, 37.09it/s]
 27%|██▋       | 6632/24880 [03:08<08:07, 37.40it/s]
 27%|██▋       | 6636/24880 [03:08<08:09, 37.30it/s]
 27%|██▋ 

[2m[36m(_objective pid=39398)[0m {'loss': 0.1945, 'learning_rate': 3.368667415146838e-05, 'epoch': 1.13}


[2m[36m(_objective pid=39398)[0m  28%|██▊       | 7006/24880 [03:18<07:40, 38.86it/s]
 28%|██▊       | 7010/24880 [03:18<07:38, 38.97it/s]
 28%|██▊       | 7014/24880 [03:18<07:38, 38.99it/s]
 28%|██▊       | 7018/24880 [03:18<07:38, 39.00it/s]
 28%|██▊       | 7022/24880 [03:19<07:40, 38.75it/s]
 28%|██▊       | 7027/24880 [03:19<07:34, 39.26it/s]
 28%|██▊       | 7031/24880 [03:19<07:38, 38.95it/s]
 28%|██▊       | 7035/24880 [03:19<07:38, 38.93it/s]
 28%|██▊       | 7039/24880 [03:19<07:35, 39.15it/s]
 28%|██▊       | 7043/24880 [03:19<07:34, 39.21it/s]
 28%|██▊       | 7047/24880 [03:19<07:55, 37.48it/s]
 28%|██▊       | 7051/24880 [03:19<07:48, 38.05it/s]
 28%|██▊       | 7055/24880 [03:19<07:52, 37.72it/s]
 28%|██▊       | 7059/24880 [03:20<07:50, 37.91it/s]
 28%|██▊       | 7063/24880 [03:20<07:42, 38.50it/s]
 28%|██▊       | 7067/24880 [03:20<07:44, 38.33it/s]
 28%|██▊       | 7071/24880 [03:20<07:43, 38.39it/s]
 28%|██▊       | 7075/24880 [03:20<07:53, 37.61it/s]
 28%|██▊ 

[2m[36m(_objective pid=39398)[0m {'loss': 0.1891, 'learning_rate': 3.27446530622215e-05, 'epoch': 1.21}


[2m[36m(_objective pid=39398)[0m  30%|███       | 7501/24880 [03:31<07:47, 37.21it/s]
 30%|███       | 7505/24880 [03:31<07:44, 37.40it/s]
 30%|███       | 7509/24880 [03:31<07:51, 36.85it/s]
 30%|███       | 7513/24880 [03:32<07:48, 37.09it/s]
 30%|███       | 7517/24880 [03:32<07:45, 37.32it/s]
 30%|███       | 7521/24880 [03:32<07:40, 37.69it/s]
 30%|███       | 7525/24880 [03:32<07:43, 37.40it/s]
 30%|███       | 7529/24880 [03:32<07:43, 37.43it/s]
 30%|███       | 7533/24880 [03:32<07:35, 38.10it/s]
 30%|███       | 7537/24880 [03:32<07:40, 37.62it/s]
 30%|███       | 7541/24880 [03:32<08:09, 35.43it/s]
 30%|███       | 7545/24880 [03:32<07:56, 36.37it/s]
 30%|███       | 7549/24880 [03:33<07:45, 37.24it/s]
 30%|███       | 7553/24880 [03:33<07:46, 37.15it/s]
 30%|███       | 7557/24880 [03:33<07:43, 37.34it/s]
 30%|███       | 7561/24880 [03:33<07:51, 36.70it/s]
 30%|███       | 7565/24880 [03:33<07:53, 36.55it/s]
 30%|███       | 7569/24880 [03:33<07:50, 36.77it/s]
 30%|███ 

Trial status: 13 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-11 14:52:27. Total running time: 1hr 32min 5s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00013   RUNNING          4.6875e-05                     4                 

[2m[36m(_objective pid=39398)[0m  31%|███       | 7695/24880 [03:36<07:29, 38.21it/s]
 31%|███       | 7699/24880 [03:37<07:25, 38.56it/s]
 31%|███       | 7703/24880 [03:37<07:43, 37.03it/s]
 31%|███       | 7707/24880 [03:37<07:36, 37.63it/s]
 31%|███       | 7711/24880 [03:37<07:28, 38.28it/s]
 31%|███       | 7715/24880 [03:37<07:25, 38.56it/s]
 31%|███       | 7719/24880 [03:37<07:24, 38.61it/s]
 31%|███       | 7723/24880 [03:37<07:23, 38.66it/s]
 31%|███       | 7728/24880 [03:37<07:15, 39.41it/s]
 31%|███       | 7732/24880 [03:37<07:14, 39.45it/s]
 31%|███       | 7736/24880 [03:37<07:21, 38.86it/s]
 31%|███       | 7740/24880 [03:38<07:30, 38.03it/s]
 31%|███       | 7744/24880 [03:38<07:27, 38.26it/s]
 31%|███       | 7748/24880 [03:38<07:33, 37.79it/s]
 31%|███       | 7752/24880 [03:38<07:35, 37.62it/s]
 31%|███       | 7756/24880 [03:38<07:41, 37.12it/s]
 31%|███       | 7760/24880 [03:38<07:41, 37.12it/s]
 31%|███       | 7764/24880 [03:38<07:32, 37.81it/s]
 31%|███ 

[2m[36m(_objective pid=39398)[0m {'loss': 0.1506, 'learning_rate': 3.180263197297462e-05, 'epoch': 1.29}


[2m[36m(_objective pid=39398)[0m  32%|███▏      | 8003/24880 [03:45<07:37, 36.92it/s]
 32%|███▏      | 8007/24880 [03:45<08:24, 33.47it/s]
 32%|███▏      | 8011/24880 [03:45<08:21, 33.62it/s]
 32%|███▏      | 8015/24880 [03:45<08:34, 32.75it/s]
 32%|███▏      | 8019/24880 [03:45<08:16, 33.96it/s]
 32%|███▏      | 8023/24880 [03:45<07:59, 35.14it/s]
 32%|███▏      | 8027/24880 [03:45<07:46, 36.16it/s]
 32%|███▏      | 8031/24880 [03:45<07:33, 37.12it/s]
 32%|███▏      | 8035/24880 [03:45<07:28, 37.56it/s]
 32%|███▏      | 8039/24880 [03:46<07:27, 37.67it/s]
 32%|███▏      | 8043/24880 [03:46<07:22, 38.02it/s]
 32%|███▏      | 8047/24880 [03:46<07:22, 38.01it/s]
 32%|███▏      | 8052/24880 [03:46<07:15, 38.64it/s]
 32%|███▏      | 8056/24880 [03:46<07:15, 38.61it/s]
 32%|███▏      | 8060/24880 [03:46<07:12, 38.93it/s]
 32%|███▏      | 8064/24880 [03:46<07:09, 39.13it/s]
 32%|███▏      | 8068/24880 [03:46<07:21, 38.09it/s]
 32%|███▏      | 8072/24880 [03:46<07:22, 37.94it/s]
 32%|███▏

[2m[36m(_objective pid=39398)[0m {'loss': 0.2153, 'learning_rate': 3.0860610883727744e-05, 'epoch': 1.37}


[2m[36m(_objective pid=39398)[0m                                                      34%|███▍      | 8500/24880 [03:58<07:00, 38.91it/s] 34%|███▍      | 8504/24880 [03:58<06:55, 39.40it/s]
 34%|███▍      | 8508/24880 [03:58<06:56, 39.34it/s]
 34%|███▍      | 8512/24880 [03:58<06:55, 39.43it/s]
 34%|███▍      | 8516/24880 [03:58<07:14, 37.62it/s]
 34%|███▍      | 8521/24880 [03:58<07:05, 38.41it/s]
 34%|███▍      | 8525/24880 [03:58<07:03, 38.66it/s]
 34%|███▍      | 8530/24880 [03:58<06:59, 39.02it/s]
 34%|███▍      | 8535/24880 [03:58<06:53, 39.54it/s]
 34%|███▍      | 8539/24880 [03:59<07:05, 38.39it/s]
 34%|███▍      | 8543/24880 [03:59<07:07, 38.18it/s]
 34%|███▍      | 8547/24880 [03:59<07:31, 36.18it/s]
 34%|███▍      | 8551/24880 [03:59<07:19, 37.15it/s]
 34%|███▍      | 8555/24880 [03:59<07:10, 37.90it/s]
 34%|███▍      | 8560/24880 [03:59<07:03, 38.57it/s]
 34%|███▍      | 8564/24880 [03:59<07:32, 36.03it/s]
 34%|███▍      | 8568/24880 [03:59<07:25, 36.64it/s]
 34%|███

Trial status: 13 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-11 14:52:57. Total running time: 1hr 32min 36s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00013   RUNNING          4.6875e-05                     4                

[2m[36m(_objective pid=39398)[0m  36%|███▌      | 8835/24880 [04:06<06:48, 39.24it/s]
 36%|███▌      | 8839/24880 [04:06<06:54, 38.72it/s]
 36%|███▌      | 8843/24880 [04:07<06:58, 38.33it/s]
 36%|███▌      | 8847/24880 [04:07<06:59, 38.22it/s]
 36%|███▌      | 8851/24880 [04:07<06:54, 38.65it/s]
 36%|███▌      | 8855/24880 [04:07<07:02, 37.90it/s]
 36%|███▌      | 8859/24880 [04:07<06:58, 38.26it/s]
 36%|███▌      | 8863/24880 [04:07<06:55, 38.58it/s]
 36%|███▌      | 8868/24880 [04:07<06:48, 39.23it/s]
 36%|███▌      | 8872/24880 [04:07<06:55, 38.56it/s]
 36%|███▌      | 8876/24880 [04:07<06:53, 38.69it/s]
 36%|███▌      | 8880/24880 [04:08<06:55, 38.51it/s]
 36%|███▌      | 8884/24880 [04:08<06:55, 38.54it/s]
 36%|███▌      | 8888/24880 [04:08<06:51, 38.84it/s]
 36%|███▌      | 8892/24880 [04:08<06:53, 38.70it/s]
 36%|███▌      | 8896/24880 [04:08<07:14, 36.75it/s]
 36%|███▌      | 8900/24880 [04:08<07:08, 37.28it/s]
 36%|███▌      | 8904/24880 [04:08<07:10, 37.09it/s]
 36%|███▌

[2m[36m(_objective pid=39398)[0m {'loss': 0.2072, 'learning_rate': 2.991858979448086e-05, 'epoch': 1.45}


[2m[36m(_objective pid=39398)[0m  36%|███▌      | 9004/24880 [04:11<06:52, 38.53it/s]
 36%|███▌      | 9009/24880 [04:11<06:45, 39.10it/s]
 36%|███▌      | 9013/24880 [04:11<06:51, 38.54it/s]
 36%|███▌      | 9017/24880 [04:11<06:50, 38.61it/s]
 36%|███▋      | 9021/24880 [04:11<06:47, 38.95it/s]
 36%|███▋      | 9026/24880 [04:11<06:43, 39.34it/s]
 36%|███▋      | 9030/24880 [04:11<06:42, 39.35it/s]
 36%|███▋      | 9034/24880 [04:11<06:46, 38.99it/s]
 36%|███▋      | 9039/24880 [04:12<06:41, 39.47it/s]
 36%|███▋      | 9043/24880 [04:12<06:41, 39.47it/s]
 36%|███▋      | 9048/24880 [04:12<06:38, 39.77it/s]
 36%|███▋      | 9053/24880 [04:12<06:34, 40.10it/s]
 36%|███▋      | 9058/24880 [04:12<06:32, 40.32it/s]
 36%|███▋      | 9063/24880 [04:12<06:51, 38.40it/s]
 36%|███▋      | 9068/24880 [04:12<06:45, 39.02it/s]
 36%|███▋      | 9072/24880 [04:12<06:45, 38.96it/s]
 36%|███▋      | 9077/24880 [04:13<06:39, 39.52it/s]
 37%|███▋      | 9082/24880 [04:13<06:37, 39.71it/s]
 37%|███▋

[2m[36m(_objective pid=39398)[0m {'loss': 0.1998, 'learning_rate': 2.8976568705233983e-05, 'epoch': 1.53}


[2m[36m(_objective pid=39398)[0m  38%|███▊      | 9498/24880 [04:24<06:51, 37.38it/s]                                                     38%|███▊      | 9500/24880 [04:24<06:51, 37.38it/s]
 38%|███▊      | 9502/24880 [04:24<06:51, 37.38it/s]
 38%|███▊      | 9506/24880 [04:24<06:52, 37.29it/s]
 38%|███▊      | 9510/24880 [04:24<06:48, 37.63it/s]
 38%|███▊      | 9514/24880 [04:24<06:46, 37.76it/s]
 38%|███▊      | 9518/24880 [04:24<06:50, 37.43it/s]
 38%|███▊      | 9522/24880 [04:25<06:57, 36.81it/s]
 38%|███▊      | 9526/24880 [04:25<06:51, 37.33it/s]
 38%|███▊      | 9530/24880 [04:25<06:46, 37.72it/s]
 38%|███▊      | 9534/24880 [04:25<06:46, 37.79it/s]
 38%|███▊      | 9538/24880 [04:25<07:00, 36.53it/s]
 38%|███▊      | 9542/24880 [04:25<06:50, 37.34it/s]
 38%|███▊      | 9547/24880 [04:25<06:39, 38.36it/s]
 38%|███▊      | 9551/24880 [04:25<06:44, 37.89it/s]
 38%|███▊      | 9555/24880 [04:25<06:40, 38.22it/s]
 38%|███▊      | 9560/24880 [04:26<06:32, 39.04it/s]
 38%|███

Trial status: 13 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-11 14:53:27. Total running time: 1hr 33min 6s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00013   RUNNING          4.6875e-05                     4                 

[2m[36m(_objective pid=39398)[0m  40%|████      | 9974/24880 [04:36<06:42, 37.07it/s]
 40%|████      | 9978/24880 [04:37<07:03, 35.21it/s]
 40%|████      | 9982/24880 [04:37<06:58, 35.61it/s]
 40%|████      | 9986/24880 [04:37<06:56, 35.75it/s]
 40%|████      | 9990/24880 [04:37<06:43, 36.87it/s]
 40%|████      | 9994/24880 [04:37<06:39, 37.23it/s]
 40%|████      | 9999/24880 [04:37<06:29, 38.21it/s]
 40%|████      | 10000/24880 [04:37<06:29, 38.21it/s]


[2m[36m(_objective pid=39398)[0m {'loss': 0.2152, 'learning_rate': 2.803454761598711e-05, 'epoch': 1.61}


[2m[36m(_objective pid=39398)[0m  40%|████      | 10003/24880 [04:37<06:36, 37.56it/s]
 40%|████      | 10007/24880 [04:37<06:35, 37.63it/s]
 40%|████      | 10011/24880 [04:37<06:43, 36.90it/s]
 40%|████      | 10015/24880 [04:38<06:40, 37.08it/s]
 40%|████      | 10019/24880 [04:38<06:36, 37.45it/s]
 40%|████      | 10023/24880 [04:38<06:37, 37.37it/s]
 40%|████      | 10027/24880 [04:38<06:34, 37.67it/s]
 40%|████      | 10031/24880 [04:38<06:32, 37.80it/s]
 40%|████      | 10035/24880 [04:38<06:38, 37.26it/s]
 40%|████      | 10039/24880 [04:38<06:34, 37.59it/s]
 40%|████      | 10043/24880 [04:38<06:48, 36.28it/s]
 40%|████      | 10047/24880 [04:38<06:40, 37.00it/s]
 40%|████      | 10051/24880 [04:39<06:34, 37.58it/s]
 40%|████      | 10055/24880 [04:39<06:54, 35.77it/s]
 40%|████      | 10059/24880 [04:39<06:49, 36.16it/s]
 40%|████      | 10063/24880 [04:39<06:42, 36.83it/s]
 40%|████      | 10067/24880 [04:39<06:35, 37.45it/s]
 40%|████      | 10071/24880 [04:39<06:36, 37

[2m[36m(_objective pid=39398)[0m {'loss': 0.2289, 'learning_rate': 2.709252652674023e-05, 'epoch': 1.69}


[2m[36m(_objective pid=39398)[0m                                                       42%|████▏     | 10500/24880 [04:50<06:26, 37.23it/s] 42%|████▏     | 10501/24880 [04:50<06:30, 36.85it/s]
 42%|████▏     | 10505/24880 [04:51<06:34, 36.46it/s]
 42%|████▏     | 10509/24880 [04:51<06:29, 36.89it/s]
 42%|████▏     | 10513/24880 [04:51<06:30, 36.77it/s]
 42%|████▏     | 10517/24880 [04:51<06:30, 36.78it/s]
 42%|████▏     | 10521/24880 [04:51<06:33, 36.49it/s]
 42%|████▏     | 10525/24880 [04:51<06:25, 37.23it/s]
 42%|████▏     | 10529/24880 [04:51<06:26, 37.10it/s]
 42%|████▏     | 10533/24880 [04:51<06:21, 37.61it/s]
 42%|████▏     | 10537/24880 [04:51<06:15, 38.16it/s]
 42%|████▏     | 10541/24880 [04:52<06:15, 38.14it/s]
 42%|████▏     | 10545/24880 [04:52<06:18, 37.91it/s]
 42%|████▏     | 10549/24880 [04:52<06:15, 38.20it/s]
 42%|████▏     | 10553/24880 [04:52<06:16, 38.05it/s]
 42%|████▏     | 10557/24880 [04:52<06:10, 38.61it/s]
 42%|████▏     | 10561/24880 [04:52<06:23, 3

[2m[36m(_objective pid=39398)[0m {'loss': 0.1888, 'learning_rate': 2.6150505437493347e-05, 'epoch': 1.77}


[2m[36m(_objective pid=39398)[0m  44%|████▍     | 11000/24880 [05:04<06:02, 38.34it/s]                                                      44%|████▍     | 11000/24880 [05:04<06:02, 38.34it/s]
 44%|████▍     | 11004/24880 [05:04<05:59, 38.62it/s]
 44%|████▍     | 11008/24880 [05:04<05:58, 38.65it/s]
 44%|████▍     | 11012/24880 [05:04<06:03, 38.11it/s]
 44%|████▍     | 11016/24880 [05:04<06:07, 37.69it/s]
 44%|████▍     | 11020/24880 [05:04<06:06, 37.85it/s]
 44%|████▍     | 11024/24880 [05:04<06:05, 37.88it/s]
 44%|████▍     | 11028/24880 [05:05<06:07, 37.68it/s]
 44%|████▍     | 11032/24880 [05:05<06:07, 37.72it/s]
 44%|████▍     | 11036/24880 [05:05<06:12, 37.12it/s]
 44%|████▍     | 11040/24880 [05:05<06:24, 35.95it/s]
 44%|████▍     | 11044/24880 [05:05<06:17, 36.69it/s]
 44%|████▍     | 11048/24880 [05:05<06:17, 36.65it/s]
 44%|████▍     | 11052/24880 [05:05<06:20, 36.30it/s]
 44%|████▍     | 11056/24880 [05:05<06:19, 36.45it/s]
 44%|████▍     | 11060/24880 [05:05<06:18, 3

Trial status: 13 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-11 14:53:57. Total running time: 1hr 33min 36s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00013   RUNNING          4.6875e-05                     4                

[2m[36m(_objective pid=39398)[0m  45%|████▍     | 11092/24880 [05:06<06:17, 36.50it/s]
 45%|████▍     | 11096/24880 [05:06<06:15, 36.70it/s]
 45%|████▍     | 11100/24880 [05:07<06:11, 37.10it/s]
 45%|████▍     | 11104/24880 [05:07<06:12, 36.96it/s]
 45%|████▍     | 11108/24880 [05:07<06:12, 36.93it/s]
 45%|████▍     | 11112/24880 [05:07<06:30, 35.27it/s]
 45%|████▍     | 11116/24880 [05:07<06:22, 36.02it/s]
 45%|████▍     | 11120/24880 [05:07<06:16, 36.54it/s]
 45%|████▍     | 11124/24880 [05:07<06:21, 36.06it/s]
 45%|████▍     | 11128/24880 [05:07<06:26, 35.56it/s]
 45%|████▍     | 11132/24880 [05:07<06:17, 36.46it/s]
 45%|████▍     | 11136/24880 [05:08<06:12, 36.91it/s]
 45%|████▍     | 11141/24880 [05:08<05:59, 38.18it/s]
 45%|████▍     | 11145/24880 [05:08<06:01, 38.00it/s]
 45%|████▍     | 11150/24880 [05:08<05:51, 39.02it/s]
 45%|████▍     | 11155/24880 [05:08<05:47, 39.53it/s]
 45%|████▍     | 11160/24880 [05:08<05:44, 39.87it/s]
 45%|████▍     | 11164/24880 [05:08<05:45, 39

[2m[36m(_objective pid=39398)[0m {'loss': 0.2386, 'learning_rate': 2.520848434824647e-05, 'epoch': 1.85}


[2m[36m(_objective pid=39398)[0m  46%|████▌     | 11500/24880 [05:17<05:47, 38.51it/s]                                                      46%|████▌     | 11500/24880 [05:17<05:47, 38.51it/s]
 46%|████▌     | 11504/24880 [05:17<05:52, 37.94it/s]
 46%|████▋     | 11508/24880 [05:17<05:48, 38.39it/s]
 46%|████▋     | 11512/24880 [05:17<05:49, 38.22it/s]
 46%|████▋     | 11516/24880 [05:17<05:48, 38.40it/s]
 46%|████▋     | 11520/24880 [05:18<05:58, 37.25it/s]
 46%|████▋     | 11524/24880 [05:18<05:57, 37.38it/s]
 46%|████▋     | 11528/24880 [05:18<05:55, 37.52it/s]
 46%|████▋     | 11532/24880 [05:18<05:55, 37.54it/s]
 46%|████▋     | 11536/24880 [05:18<06:11, 35.91it/s]
 46%|████▋     | 11540/24880 [05:18<06:02, 36.85it/s]
 46%|████▋     | 11544/24880 [05:18<06:03, 36.71it/s]
 46%|████▋     | 11548/24880 [05:18<06:04, 36.54it/s]
 46%|████▋     | 11552/24880 [05:18<06:16, 35.44it/s]
 46%|████▋     | 11556/24880 [05:19<06:06, 36.32it/s]
 46%|████▋     | 11560/24880 [05:19<06:05, 3

[2m[36m(_objective pid=39398)[0m {'loss': 0.1723, 'learning_rate': 2.4266463258999593e-05, 'epoch': 1.93}


[2m[36m(_objective pid=39398)[0m  48%|████▊     | 12005/24880 [05:31<05:40, 37.77it/s]
 48%|████▊     | 12009/24880 [05:31<05:43, 37.44it/s]
 48%|████▊     | 12013/24880 [05:31<05:47, 37.02it/s]
 48%|████▊     | 12017/24880 [05:31<05:51, 36.63it/s]
 48%|████▊     | 12021/24880 [05:31<05:48, 36.90it/s]
 48%|████▊     | 12025/24880 [05:31<05:55, 36.16it/s]
 48%|████▊     | 12029/24880 [05:31<05:50, 36.62it/s]
 48%|████▊     | 12033/24880 [05:31<05:55, 36.12it/s]
 48%|████▊     | 12037/24880 [05:31<05:50, 36.67it/s]
 48%|████▊     | 12041/24880 [05:32<05:47, 36.94it/s]
 48%|████▊     | 12045/24880 [05:32<05:46, 37.08it/s]
 48%|████▊     | 12049/24880 [05:32<05:41, 37.61it/s]
 48%|████▊     | 12053/24880 [05:32<05:42, 37.42it/s]
 48%|████▊     | 12057/24880 [05:32<05:52, 36.41it/s]
 48%|████▊     | 12061/24880 [05:32<05:47, 36.93it/s]
 48%|████▊     | 12065/24880 [05:32<05:43, 37.35it/s]
 49%|████▊     | 12069/24880 [05:32<05:43, 37.26it/s]
 49%|████▊     | 12073/24880 [05:32<05:38, 37

Trial status: 13 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-11 14:54:27. Total running time: 1hr 34min 6s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00013   RUNNING          4.6875e-05                     4                 

[2m[36m(_objective pid=39398)[0m  49%|████▉     | 12228/24880 [05:37<05:22, 39.21it/s]
 49%|████▉     | 12232/24880 [05:37<05:27, 38.67it/s]
 49%|████▉     | 12236/24880 [05:37<05:24, 38.92it/s]
 49%|████▉     | 12240/24880 [05:37<05:23, 39.06it/s]
 49%|████▉     | 12244/24880 [05:37<05:21, 39.27it/s]
 49%|████▉     | 12248/24880 [05:37<05:35, 37.70it/s]
 49%|████▉     | 12252/24880 [05:37<05:34, 37.71it/s]
 49%|████▉     | 12256/24880 [05:37<05:47, 36.29it/s]
 49%|████▉     | 12260/24880 [05:37<05:45, 36.56it/s]
 49%|████▉     | 12264/24880 [05:38<05:39, 37.20it/s]
 49%|████▉     | 12268/24880 [05:38<05:33, 37.77it/s]
 49%|████▉     | 12272/24880 [05:38<05:38, 37.23it/s]
 49%|████▉     | 12276/24880 [05:38<05:33, 37.77it/s]
 49%|████▉     | 12280/24880 [05:38<05:29, 38.21it/s]
 49%|████▉     | 12284/24880 [05:38<05:38, 37.25it/s]
 49%|████▉     | 12288/24880 [05:38<05:33, 37.74it/s]
 49%|████▉     | 12292/24880 [05:38<05:30, 38.11it/s]
 49%|████▉     | 12296/24880 [05:38<05:31, 38

Trial _objective_f556c_00013 finished iteration 2 at 2023-09-11 14:54:42. Total running time: 1hr 34min 21s
+-------------------------------------------------+
| Trial _objective_f556c_00013 result             |
+-------------------------------------------------+
| time_this_iter_s                        176.127 |
| time_total_s                            354.604 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                                0.2372 |
| eval_runtime                             9.5355 |
| eval_samples_per_second                 434.902 |
| eval_steps_per_second                    13.633 |
| objective                                0.2372 |
+-------------------------------------------------+

[2m[36m(_objective pid=39398)[0m {'eval_loss': 0.23720407485961914, 'eval_runtime': 9.5355, 'eval_samples_per_second': 434.902, 'eval_steps_per_second': 13.633, 'epoch': 2.0}


[2m[36m(_objective pid=39398)[0m                                                      
[2m[36m(_objective pid=39398)[0m                                                  [A 50%|█████     | 12440/24880 [05:52<05:34, 37.19it/s]
[2m[36m(_objective pid=39398)[0m 100%|██████████| 130/130 [00:09<00:00, 12.05it/s][A
                                                 [A
 50%|█████     | 12441/24880 [05:53<2:52:03,  1.20it/s]
 50%|█████     | 12445/24880 [05:53<2:02:46,  1.69it/s]
 50%|█████     | 12449/24880 [05:53<1:27:57,  2.36it/s]
 50%|█████     | 12453/24880 [05:53<1:03:30,  3.26it/s]
 50%|█████     | 12457/24880 [05:54<46:12,  4.48it/s]  
 50%|█████     | 12461/24880 [05:54<34:02,  6.08it/s]
 50%|█████     | 12465/24880 [05:54<25:35,  8.09it/s]
 50%|█████     | 12469/24880 [05:54<19:36, 10.55it/s]
 50%|█████     | 12473/24880 [05:54<15:23, 13.44it/s]
 50%|█████     | 12477/24880 [05:54<12:32, 16.48it/s]
 50%|█████     | 12481/24880 [05:54<10:34, 19.55it/s]
 50%|█████     | 

[2m[36m(_objective pid=39398)[0m {'loss': 0.2627, 'learning_rate': 2.3324442169752716e-05, 'epoch': 2.01}


 50%|█████     | 12501/24880 [05:55<06:33, 31.46it/s]
 50%|█████     | 12505/24880 [05:55<06:20, 32.50it/s]
 50%|█████     | 12509/24880 [05:55<06:14, 33.00it/s]
 50%|█████     | 12513/24880 [05:55<06:01, 34.23it/s]
 50%|█████     | 12517/24880 [05:55<05:54, 34.84it/s]
 50%|█████     | 12521/24880 [05:55<05:49, 35.33it/s]
 50%|█████     | 12525/24880 [05:55<05:48, 35.50it/s]
 50%|█████     | 12529/24880 [05:56<05:45, 35.77it/s]
 50%|█████     | 12533/24880 [05:56<05:40, 36.24it/s]
 50%|█████     | 12537/24880 [05:56<05:48, 35.38it/s]
 50%|█████     | 12541/24880 [05:56<05:43, 35.90it/s]
 50%|█████     | 12545/24880 [05:56<05:42, 36.06it/s]
 50%|█████     | 12549/24880 [05:56<05:42, 35.97it/s]
 50%|█████     | 12554/24880 [05:56<05:30, 37.25it/s]
 50%|█████     | 12558/24880 [05:56<05:35, 36.71it/s]
 50%|█████     | 12562/24880 [05:56<05:35, 36.69it/s]
 51%|█████     | 12567/24880 [05:57<05:23, 38.02it/s]
 51%|█████     | 12571/24880 [05:57<05:20, 38.46it/s]
 51%|█████     | 12575/24880

Trial status: 13 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-11 14:54:57. Total running time: 1hr 34min 36s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00013   RUNNING          4.6875e-05                     4                

[2m[36m(_objective pid=39398)[0m  52%|█████▏    | 12940/24880 [06:07<05:23, 36.95it/s]
 52%|█████▏    | 12944/24880 [06:07<05:19, 37.33it/s]
 52%|█████▏    | 12948/24880 [06:07<05:15, 37.79it/s]
 52%|█████▏    | 12952/24880 [06:07<05:11, 38.33it/s]
 52%|█████▏    | 12956/24880 [06:07<05:11, 38.29it/s]
 52%|█████▏    | 12960/24880 [06:07<05:11, 38.30it/s]
 52%|█████▏    | 12964/24880 [06:07<05:08, 38.65it/s]
 52%|█████▏    | 12968/24880 [06:07<05:21, 37.08it/s]
 52%|█████▏    | 12972/24880 [06:07<05:16, 37.58it/s]
 52%|█████▏    | 12976/24880 [06:07<05:13, 38.03it/s]
 52%|█████▏    | 12981/24880 [06:08<05:08, 38.61it/s]
 52%|█████▏    | 12985/24880 [06:08<05:13, 37.91it/s]
 52%|█████▏    | 12989/24880 [06:08<05:15, 37.65it/s]
 52%|█████▏    | 12993/24880 [06:08<05:20, 37.08it/s]
 52%|█████▏    | 12997/24880 [06:08<05:20, 37.05it/s]
 52%|█████▏    | 13001/24880 [06:08<05:20, 37.05it/s]


[2m[36m(_objective pid=39398)[0m {'loss': 0.1636, 'learning_rate': 2.2382421080505835e-05, 'epoch': 2.09}


[2m[36m(_objective pid=39398)[0m  52%|█████▏    | 13005/24880 [06:08<05:23, 36.67it/s]
 52%|█████▏    | 13009/24880 [06:08<05:23, 36.74it/s]
 52%|█████▏    | 13013/24880 [06:08<05:26, 36.35it/s]
 52%|█████▏    | 13017/24880 [06:09<05:20, 36.99it/s]
 52%|█████▏    | 13021/24880 [06:09<05:13, 37.79it/s]
 52%|█████▏    | 13025/24880 [06:09<05:10, 38.19it/s]
 52%|█████▏    | 13030/24880 [06:09<05:20, 36.96it/s]
 52%|█████▏    | 13034/24880 [06:09<05:31, 35.74it/s]
 52%|█████▏    | 13038/24880 [06:09<05:23, 36.65it/s]
 52%|█████▏    | 13042/24880 [06:09<05:24, 36.53it/s]
 52%|█████▏    | 13046/24880 [06:09<05:39, 34.89it/s]
 52%|█████▏    | 13050/24880 [06:09<05:37, 35.02it/s]
 52%|█████▏    | 13054/24880 [06:10<05:30, 35.78it/s]
 52%|█████▏    | 13058/24880 [06:10<05:37, 35.08it/s]
 52%|█████▎    | 13062/24880 [06:10<05:29, 35.84it/s]
 53%|█████▎    | 13066/24880 [06:10<05:23, 36.52it/s]
 53%|█████▎    | 13070/24880 [06:10<05:17, 37.21it/s]
 53%|█████▎    | 13074/24880 [06:10<05:17, 37

[2m[36m(_objective pid=39398)[0m {'loss': 0.1712, 'learning_rate': 2.1440399991258954e-05, 'epoch': 2.17}


[2m[36m(_objective pid=39398)[0m  54%|█████▍    | 13505/24880 [06:22<05:10, 36.63it/s]
 54%|█████▍    | 13509/24880 [06:22<05:23, 35.18it/s]
 54%|█████▍    | 13513/24880 [06:22<05:17, 35.83it/s]
 54%|█████▍    | 13517/24880 [06:22<05:15, 36.02it/s]
 54%|█████▍    | 13521/24880 [06:22<05:08, 36.87it/s]
 54%|█████▍    | 13525/24880 [06:22<05:20, 35.46it/s]
 54%|█████▍    | 13529/24880 [06:22<05:20, 35.41it/s]
 54%|█████▍    | 13533/24880 [06:22<05:14, 36.05it/s]
 54%|█████▍    | 13537/24880 [06:23<05:24, 34.93it/s]
 54%|█████▍    | 13541/24880 [06:23<05:20, 35.42it/s]
 54%|█████▍    | 13545/24880 [06:23<05:14, 36.04it/s]
 54%|█████▍    | 13549/24880 [06:23<05:30, 34.31it/s]
 54%|█████▍    | 13553/24880 [06:23<05:29, 34.35it/s]
 54%|█████▍    | 13557/24880 [06:23<05:27, 34.57it/s]
 55%|█████▍    | 13561/24880 [06:23<05:19, 35.42it/s]
 55%|█████▍    | 13565/24880 [06:23<05:16, 35.71it/s]
 55%|█████▍    | 13569/24880 [06:23<05:09, 36.55it/s]
 55%|█████▍    | 13573/24880 [06:24<05:07, 36

[2m[36m(_objective pid=39398)[0m {'loss': 0.0986, 'learning_rate': 2.0498378902012077e-05, 'epoch': 2.25}


[2m[36m(_objective pid=39398)[0m  56%|█████▋    | 14003/24880 [06:35<04:51, 37.30it/s]
 56%|█████▋    | 14007/24880 [06:35<04:54, 36.97it/s]
 56%|█████▋    | 14011/24880 [06:35<04:51, 37.35it/s]
 56%|█████▋    | 14015/24880 [06:35<04:57, 36.57it/s]
 56%|█████▋    | 14019/24880 [06:35<04:52, 37.16it/s]
 56%|█████▋    | 14023/24880 [06:36<04:53, 37.03it/s]
 56%|█████▋    | 14027/24880 [06:36<04:50, 37.42it/s]
 56%|█████▋    | 14031/24880 [06:36<04:51, 37.25it/s]
 56%|█████▋    | 14035/24880 [06:36<04:49, 37.47it/s]
 56%|█████▋    | 14039/24880 [06:36<04:57, 36.40it/s]
 56%|█████▋    | 14043/24880 [06:36<04:52, 37.04it/s]
 56%|█████▋    | 14047/24880 [06:36<04:52, 37.04it/s]
 56%|█████▋    | 14051/24880 [06:36<04:48, 37.51it/s]
 56%|█████▋    | 14055/24880 [06:36<04:46, 37.82it/s]


Trial status: 13 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-11 14:55:27. Total running time: 1hr 35min 6s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00013   RUNNING          4.6875e-05                     4                 

[2m[36m(_objective pid=39398)[0m  57%|█████▋    | 14059/24880 [06:37<04:57, 36.40it/s]
 57%|█████▋    | 14063/24880 [06:37<04:57, 36.39it/s]
 57%|█████▋    | 14067/24880 [06:37<04:57, 36.36it/s]
 57%|█████▋    | 14071/24880 [06:37<04:54, 36.72it/s]
 57%|█████▋    | 14075/24880 [06:37<04:48, 37.44it/s]
 57%|█████▋    | 14079/24880 [06:37<04:49, 37.32it/s]
 57%|█████▋    | 14083/24880 [06:37<04:46, 37.66it/s]
 57%|█████▋    | 14087/24880 [06:37<04:46, 37.64it/s]
 57%|█████▋    | 14091/24880 [06:37<04:50, 37.19it/s]
 57%|█████▋    | 14095/24880 [06:38<04:49, 37.22it/s]
 57%|█████▋    | 14099/24880 [06:38<04:45, 37.74it/s]
 57%|█████▋    | 14103/24880 [06:38<04:41, 38.28it/s]
 57%|█████▋    | 14107/24880 [06:38<04:42, 38.15it/s]
 57%|█████▋    | 14111/24880 [06:38<04:39, 38.47it/s]
 57%|█████▋    | 14115/24880 [06:38<04:39, 38.57it/s]
 57%|█████▋    | 14119/24880 [06:38<04:39, 38.56it/s]
 57%|█████▋    | 14123/24880 [06:38<04:40, 38.40it/s]
 57%|█████▋    | 14127/24880 [06:38<04:40, 38

[2m[36m(_objective pid=39398)[0m {'loss': 0.1301, 'learning_rate': 1.95563578127652e-05, 'epoch': 2.33}


[2m[36m(_objective pid=39398)[0m  58%|█████▊    | 14499/24880 [06:49<04:33, 38.02it/s]                                                      58%|█████▊    | 14500/24880 [06:49<04:32, 38.02it/s]
 58%|█████▊    | 14503/24880 [06:49<04:30, 38.30it/s]
 58%|█████▊    | 14507/24880 [06:49<04:37, 37.43it/s]
 58%|█████▊    | 14511/24880 [06:49<04:32, 38.04it/s]
 58%|█████▊    | 14515/24880 [06:49<04:29, 38.51it/s]
 58%|█████▊    | 14519/24880 [06:49<04:26, 38.81it/s]
 58%|█████▊    | 14523/24880 [06:49<04:25, 38.97it/s]
 58%|█████▊    | 14527/24880 [06:49<04:24, 39.07it/s]
 58%|█████▊    | 14531/24880 [06:49<04:26, 38.82it/s]
 58%|█████▊    | 14536/24880 [06:49<04:22, 39.35it/s]
 58%|█████▊    | 14540/24880 [06:50<04:22, 39.36it/s]
 58%|█████▊    | 14544/24880 [06:50<04:23, 39.24it/s]
 58%|█████▊    | 14548/24880 [06:50<04:25, 38.85it/s]
 58%|█████▊    | 14552/24880 [06:50<04:28, 38.42it/s]
 59%|█████▊    | 14556/24880 [06:50<04:27, 38.57it/s]
 59%|█████▊    | 14560/24880 [06:50<04:29, 3

[2m[36m(_objective pid=39398)[0m {'loss': 0.1055, 'learning_rate': 1.861433672351832e-05, 'epoch': 2.41}


[2m[36m(_objective pid=39398)[0m  60%|██████    | 15003/24880 [07:02<04:15, 38.58it/s]
 60%|██████    | 15007/24880 [07:02<04:16, 38.56it/s]
 60%|██████    | 15011/24880 [07:02<04:16, 38.53it/s]
 60%|██████    | 15015/24880 [07:02<04:13, 38.86it/s]
 60%|██████    | 15020/24880 [07:03<04:10, 39.38it/s]
 60%|██████    | 15025/24880 [07:03<04:08, 39.73it/s]
 60%|██████    | 15029/24880 [07:03<04:08, 39.67it/s]
 60%|██████    | 15033/24880 [07:03<04:11, 39.08it/s]
 60%|██████    | 15038/24880 [07:03<04:10, 39.27it/s]
 60%|██████    | 15043/24880 [07:03<04:11, 39.08it/s]
 60%|██████    | 15047/24880 [07:03<04:19, 37.86it/s]
 60%|██████    | 15051/24880 [07:03<04:15, 38.40it/s]
 61%|██████    | 15055/24880 [07:03<04:14, 38.65it/s]
 61%|██████    | 15060/24880 [07:04<04:09, 39.41it/s]
 61%|██████    | 15064/24880 [07:04<04:13, 38.66it/s]
 61%|██████    | 15068/24880 [07:04<04:22, 37.33it/s]
 61%|██████    | 15072/24880 [07:04<04:19, 37.78it/s]
 61%|██████    | 15076/24880 [07:04<04:29, 36

Trial status: 13 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-11 14:55:57. Total running time: 1hr 35min 36s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00013   RUNNING          4.6875e-05                     4                

[2m[36m(_objective pid=39398)[0m  61%|██████    | 15173/24880 [07:06<04:12, 38.49it/s]
 61%|██████    | 15178/24880 [07:07<04:07, 39.14it/s]
 61%|██████    | 15182/24880 [07:07<04:08, 39.09it/s]
 61%|██████    | 15186/24880 [07:07<04:11, 38.54it/s]
 61%|██████    | 15191/24880 [07:07<04:06, 39.34it/s]
 61%|██████    | 15195/24880 [07:07<04:13, 38.28it/s]
 61%|██████    | 15200/24880 [07:07<04:09, 38.86it/s]
 61%|██████    | 15204/24880 [07:07<04:07, 39.08it/s]
 61%|██████    | 15208/24880 [07:07<04:06, 39.27it/s]
 61%|██████    | 15212/24880 [07:07<04:14, 38.03it/s]
 61%|██████    | 15216/24880 [07:08<04:13, 38.15it/s]
 61%|██████    | 15221/24880 [07:08<04:08, 38.81it/s]
 61%|██████    | 15225/24880 [07:08<04:10, 38.58it/s]
 61%|██████    | 15229/24880 [07:08<04:11, 38.37it/s]
 61%|██████    | 15233/24880 [07:08<04:09, 38.71it/s]
 61%|██████    | 15237/24880 [07:08<04:11, 38.31it/s]
 61%|██████▏   | 15241/24880 [07:08<04:11, 38.36it/s]
 61%|██████▏   | 15245/24880 [07:08<04:09, 38

[2m[36m(_objective pid=39398)[0m {'loss': 0.1417, 'learning_rate': 1.7672315634271442e-05, 'epoch': 2.49}


 62%|██████▏   | 15500/24880 [07:15<04:16, 36.57it/s]
 62%|██████▏   | 15504/24880 [07:15<04:15, 36.67it/s]
 62%|██████▏   | 15508/24880 [07:15<04:16, 36.51it/s]
 62%|██████▏   | 15512/24880 [07:16<04:21, 35.77it/s]
 62%|██████▏   | 15516/24880 [07:16<04:22, 35.71it/s]
 62%|██████▏   | 15520/24880 [07:16<04:19, 36.05it/s]
 62%|██████▏   | 15524/24880 [07:16<04:27, 35.01it/s]
 62%|██████▏   | 15528/24880 [07:16<04:23, 35.53it/s]
 62%|██████▏   | 15532/24880 [07:16<04:32, 34.25it/s]
 62%|██████▏   | 15536/24880 [07:16<04:30, 34.59it/s]
 62%|██████▏   | 15540/24880 [07:16<04:22, 35.52it/s]
 62%|██████▏   | 15544/24880 [07:17<04:19, 36.03it/s]
 62%|██████▏   | 15548/24880 [07:17<04:22, 35.48it/s]
 63%|██████▎   | 15552/24880 [07:17<04:17, 36.25it/s]
 63%|██████▎   | 15556/24880 [07:17<04:19, 35.92it/s]
 63%|██████▎   | 15560/24880 [07:17<04:20, 35.84it/s]
 63%|██████▎   | 15564/24880 [07:17<04:13, 36.73it/s]
 63%|██████▎   | 15568/24880 [07:17<04:09, 37.25it/s]
 63%|██████▎   | 15572/24880

[2m[36m(_objective pid=39398)[0m {'loss': 0.1529, 'learning_rate': 1.6730294545024565e-05, 'epoch': 2.57}


[2m[36m(_objective pid=39398)[0m                                                       64%|██████▍   | 16000/24880 [07:28<04:06, 36.02it/s] 64%|██████▍   | 16001/24880 [07:28<04:03, 36.53it/s]
 64%|██████▍   | 16005/24880 [07:29<04:08, 35.68it/s]
 64%|██████▍   | 16009/24880 [07:29<04:07, 35.79it/s]
 64%|██████▍   | 16013/24880 [07:29<04:12, 35.15it/s]
 64%|██████▍   | 16017/24880 [07:29<04:10, 35.41it/s]
 64%|██████▍   | 16021/24880 [07:29<04:14, 34.77it/s]
 64%|██████▍   | 16025/24880 [07:29<04:20, 34.00it/s]
 64%|██████▍   | 16029/24880 [07:29<04:14, 34.79it/s]
 64%|██████▍   | 16033/24880 [07:29<04:09, 35.48it/s]
 64%|██████▍   | 16037/24880 [07:29<04:05, 35.95it/s]
 64%|██████▍   | 16041/24880 [07:30<03:59, 36.95it/s]
 64%|██████▍   | 16045/24880 [07:30<04:05, 36.02it/s]
 65%|██████▍   | 16049/24880 [07:30<04:03, 36.33it/s]
 65%|██████▍   | 16053/24880 [07:30<04:05, 36.02it/s]
 65%|██████▍   | 16057/24880 [07:30<04:18, 34.12it/s]
 65%|██████▍   | 16061/24880 [07:30<04:17, 3

Trial status: 13 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-11 14:56:27. Total running time: 1hr 36min 6s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00013   RUNNING          4.6875e-05                     4                 

[2m[36m(_objective pid=39398)[0m  65%|██████▌   | 16294/24880 [07:36<03:37, 39.53it/s]
 66%|██████▌   | 16298/24880 [07:37<03:39, 39.02it/s]
 66%|██████▌   | 16303/24880 [07:37<03:37, 39.41it/s]
 66%|██████▌   | 16307/24880 [07:37<03:39, 39.12it/s]
 66%|██████▌   | 16311/24880 [07:37<03:46, 37.87it/s]
 66%|██████▌   | 16315/24880 [07:37<03:49, 37.32it/s]
 66%|██████▌   | 16320/24880 [07:37<03:43, 38.23it/s]
 66%|██████▌   | 16324/24880 [07:37<03:42, 38.47it/s]
 66%|██████▌   | 16328/24880 [07:37<03:42, 38.44it/s]
 66%|██████▌   | 16332/24880 [07:37<03:42, 38.36it/s]
 66%|██████▌   | 16336/24880 [07:38<03:43, 38.26it/s]
 66%|██████▌   | 16340/24880 [07:38<03:43, 38.17it/s]
 66%|██████▌   | 16345/24880 [07:38<03:39, 38.81it/s]
 66%|██████▌   | 16349/24880 [07:38<03:38, 39.01it/s]
 66%|██████▌   | 16353/24880 [07:38<03:37, 39.13it/s]
 66%|██████▌   | 16357/24880 [07:38<03:37, 39.23it/s]
 66%|██████▌   | 16362/24880 [07:38<03:34, 39.72it/s]
 66%|██████▌   | 16366/24880 [07:38<03:35, 39

[2m[36m(_objective pid=39398)[0m {'loss': 0.152, 'learning_rate': 1.5788273455777687e-05, 'epoch': 2.65}


[2m[36m(_objective pid=39398)[0m  66%|██████▋   | 16505/24880 [07:42<03:42, 37.71it/s]
 66%|██████▋   | 16509/24880 [07:42<03:43, 37.49it/s]
 66%|██████▋   | 16513/24880 [07:42<03:42, 37.54it/s]
 66%|██████▋   | 16517/24880 [07:42<03:41, 37.71it/s]
 66%|██████▋   | 16521/24880 [07:42<03:38, 38.17it/s]
 66%|██████▋   | 16525/24880 [07:43<03:36, 38.57it/s]
 66%|██████▋   | 16529/24880 [07:43<03:35, 38.82it/s]
 66%|██████▋   | 16533/24880 [07:43<03:35, 38.74it/s]
 66%|██████▋   | 16537/24880 [07:43<03:33, 39.04it/s]
 66%|██████▋   | 16541/24880 [07:43<03:44, 37.18it/s]
 66%|██████▋   | 16545/24880 [07:43<03:42, 37.41it/s]
 67%|██████▋   | 16549/24880 [07:43<03:45, 36.99it/s]
 67%|██████▋   | 16553/24880 [07:43<03:43, 37.28it/s]
 67%|██████▋   | 16557/24880 [07:43<03:46, 36.75it/s]
 67%|██████▋   | 16561/24880 [07:43<03:42, 37.46it/s]
 67%|██████▋   | 16565/24880 [07:44<03:49, 36.20it/s]
 67%|██████▋   | 16569/24880 [07:44<03:47, 36.50it/s]
 67%|██████▋   | 16573/24880 [07:44<03:46, 36

[2m[36m(_objective pid=39398)[0m {'loss': 0.1128, 'learning_rate': 1.4846252366530807e-05, 'epoch': 2.73}


[2m[36m(_objective pid=39398)[0m  68%|██████▊   | 17003/24880 [07:56<03:36, 36.37it/s]
 68%|██████▊   | 17007/24880 [07:56<03:33, 36.91it/s]
 68%|██████▊   | 17011/24880 [07:56<03:36, 36.42it/s]
 68%|██████▊   | 17015/24880 [07:56<03:34, 36.63it/s]
 68%|██████▊   | 17019/24880 [07:56<03:31, 37.25it/s]
 68%|██████▊   | 17023/24880 [07:56<03:32, 37.03it/s]
 68%|██████▊   | 17027/24880 [07:56<03:33, 36.84it/s]
 68%|██████▊   | 17031/24880 [07:56<03:39, 35.74it/s]
 68%|██████▊   | 17035/24880 [07:56<03:34, 36.61it/s]
 68%|██████▊   | 17039/24880 [07:57<03:32, 36.85it/s]
 69%|██████▊   | 17043/24880 [07:57<03:34, 36.45it/s]
 69%|██████▊   | 17047/24880 [07:57<03:37, 35.96it/s]
 69%|██████▊   | 17051/24880 [07:57<03:37, 36.01it/s]
 69%|██████▊   | 17055/24880 [07:57<03:38, 35.89it/s]
 69%|██████▊   | 17059/24880 [07:57<03:37, 35.98it/s]
 69%|██████▊   | 17063/24880 [07:57<03:35, 36.29it/s]
 69%|██████▊   | 17067/24880 [07:57<03:34, 36.43it/s]
 69%|██████▊   | 17071/24880 [07:57<03:34, 36

Trial status: 13 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-11 14:56:57. Total running time: 1hr 36min 36s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00013   RUNNING          4.6875e-05                     4                

[2m[36m(_objective pid=39398)[0m  70%|██████▉   | 17387/24880 [08:06<03:30, 35.53it/s]
 70%|██████▉   | 17391/24880 [08:07<03:28, 35.92it/s]
 70%|██████▉   | 17395/24880 [08:07<03:24, 36.55it/s]
 70%|██████▉   | 17399/24880 [08:07<03:27, 36.06it/s]
 70%|██████▉   | 17403/24880 [08:07<03:23, 36.78it/s]
 70%|██████▉   | 17407/24880 [08:07<03:20, 37.25it/s]
 70%|██████▉   | 17411/24880 [08:07<03:21, 37.13it/s]
 70%|██████▉   | 17415/24880 [08:07<03:20, 37.21it/s]
 70%|███████   | 17419/24880 [08:07<03:19, 37.38it/s]
 70%|███████   | 17423/24880 [08:07<03:18, 37.48it/s]
 70%|███████   | 17427/24880 [08:08<03:17, 37.65it/s]
 70%|███████   | 17431/24880 [08:08<03:19, 37.41it/s]
 70%|███████   | 17435/24880 [08:08<03:17, 37.75it/s]
 70%|███████   | 17439/24880 [08:08<03:17, 37.59it/s]
 70%|███████   | 17443/24880 [08:08<03:16, 37.78it/s]
 70%|███████   | 17447/24880 [08:08<03:19, 37.21it/s]
 70%|███████   | 17451/24880 [08:08<03:16, 37.90it/s]
 70%|███████   | 17455/24880 [08:08<03:15, 37

[2m[36m(_objective pid=39398)[0m {'loss': 0.1662, 'learning_rate': 1.3904231277283928e-05, 'epoch': 2.81}


[2m[36m(_objective pid=39398)[0m  70%|███████   | 17505/24880 [08:10<03:19, 36.96it/s]
 70%|███████   | 17509/24880 [08:10<03:18, 37.20it/s]
 70%|███████   | 17513/24880 [08:10<03:23, 36.23it/s]
 70%|███████   | 17517/24880 [08:10<03:20, 36.75it/s]
 70%|███████   | 17521/24880 [08:10<03:16, 37.52it/s]
 70%|███████   | 17525/24880 [08:10<03:23, 36.08it/s]
 70%|███████   | 17530/24880 [08:10<03:17, 37.15it/s]
 70%|███████   | 17534/24880 [08:10<03:15, 37.64it/s]
 70%|███████   | 17538/24880 [08:11<03:19, 36.73it/s]
 71%|███████   | 17542/24880 [08:11<03:17, 37.15it/s]
 71%|███████   | 17546/24880 [08:11<03:14, 37.68it/s]
 71%|███████   | 17550/24880 [08:11<03:11, 38.19it/s]
 71%|███████   | 17554/24880 [08:11<03:11, 38.35it/s]
 71%|███████   | 17558/24880 [08:11<03:16, 37.25it/s]
 71%|███████   | 17563/24880 [08:11<03:10, 38.32it/s]
 71%|███████   | 17567/24880 [08:11<03:11, 38.23it/s]
 71%|███████   | 17571/24880 [08:11<03:10, 38.39it/s]
 71%|███████   | 17575/24880 [08:11<03:11, 38

[2m[36m(_objective pid=39398)[0m {'loss': 0.1477, 'learning_rate': 1.296221018803705e-05, 'epoch': 2.89}


[2m[36m(_objective pid=39398)[0m  72%|███████▏  | 18000/24880 [08:23<02:56, 39.01it/s]                                                      72%|███████▏  | 18000/24880 [08:23<02:56, 39.01it/s]
 72%|███████▏  | 18004/24880 [08:24<02:57, 38.65it/s]
 72%|███████▏  | 18008/24880 [08:24<03:05, 37.08it/s]
 72%|███████▏  | 18012/24880 [08:24<03:04, 37.26it/s]
 72%|███████▏  | 18016/24880 [08:24<03:01, 37.89it/s]
 72%|███████▏  | 18020/24880 [08:24<03:08, 36.38it/s]
 72%|███████▏  | 18024/24880 [08:24<03:06, 36.81it/s]
 72%|███████▏  | 18028/24880 [08:24<03:03, 37.36it/s]
 72%|███████▏  | 18032/24880 [08:24<03:01, 37.66it/s]
 72%|███████▏  | 18036/24880 [08:24<03:02, 37.45it/s]
 73%|███████▎  | 18040/24880 [08:24<03:01, 37.74it/s]
 73%|███████▎  | 18044/24880 [08:25<03:01, 37.61it/s]
 73%|███████▎  | 18048/24880 [08:25<03:00, 37.86it/s]
 73%|███████▎  | 18052/24880 [08:25<03:04, 37.11it/s]
 73%|███████▎  | 18057/24880 [08:25<02:58, 38.14it/s]
 73%|███████▎  | 18061/24880 [08:25<03:04, 3

Trial status: 13 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-11 14:57:27. Total running time: 1hr 37min 6s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00013   RUNNING          4.6875e-05                     4                 

[2m[36m(_objective pid=39398)[0m  74%|███████▍  | 18487/24880 [08:37<03:05, 34.46it/s]
 74%|███████▍  | 18491/24880 [08:37<03:04, 34.64it/s]
 74%|███████▍  | 18495/24880 [08:37<03:04, 34.62it/s]
 74%|███████▍  | 18500/24880 [08:37<03:04, 34.56it/s]


[2m[36m(_objective pid=39398)[0m {'loss': 0.1643, 'learning_rate': 1.202018909879017e-05, 'epoch': 2.97}


[2m[36m(_objective pid=39398)[0m  74%|███████▍  | 18503/24880 [08:37<03:00, 35.24it/s]
 74%|███████▍  | 18507/24880 [08:37<02:59, 35.41it/s]
 74%|███████▍  | 18511/24880 [08:37<02:59, 35.44it/s]
 74%|███████▍  | 18515/24880 [08:37<03:02, 34.89it/s]
 74%|███████▍  | 18519/24880 [08:37<02:59, 35.34it/s]
 74%|███████▍  | 18523/24880 [08:38<02:59, 35.35it/s]
 74%|███████▍  | 18527/24880 [08:38<02:59, 35.49it/s]
 74%|███████▍  | 18531/24880 [08:38<02:57, 35.72it/s]
 74%|███████▍  | 18535/24880 [08:38<02:57, 35.76it/s]
 75%|███████▍  | 18539/24880 [08:38<02:57, 35.82it/s]
 75%|███████▍  | 18543/24880 [08:38<02:57, 35.75it/s]
 75%|███████▍  | 18547/24880 [08:38<02:57, 35.61it/s]
 75%|███████▍  | 18551/24880 [08:38<02:58, 35.37it/s]
 75%|███████▍  | 18555/24880 [08:38<03:12, 32.88it/s]
 75%|███████▍  | 18559/24880 [08:39<03:07, 33.73it/s]
 75%|███████▍  | 18563/24880 [08:39<03:01, 34.80it/s]
 75%|███████▍  | 18567/24880 [08:39<02:55, 35.95it/s]
 75%|███████▍  | 18571/24880 [08:39<02:54, 36

Trial _objective_f556c_00013 finished iteration 3 at 2023-09-11 14:57:41. Total running time: 1hr 37min 20s
+-------------------------------------------------+
| Trial _objective_f556c_00013 result             |
+-------------------------------------------------+
| time_this_iter_s                        179.132 |
| time_total_s                            533.736 |
| training_iteration                            3 |
| epoch                                         3 |
| eval_loss                               0.24407 |
| eval_runtime                             9.5317 |
| eval_samples_per_second                 435.073 |
| eval_steps_per_second                    13.639 |
| objective                               0.24407 |
+-------------------------------------------------+

[2m[36m(_objective pid=39398)[0m {'eval_loss': 0.24407190084457397, 'eval_runtime': 9.5317, 'eval_samples_per_second': 435.073, 'eval_steps_per_second': 13.639, 'epoch': 3.0}


[2m[36m(_objective pid=39398)[0m                                                      
[2m[36m(_objective pid=39398)[0m                                                  [A 75%|███████▌  | 18660/24880 [08:51<02:40, 38.69it/s]
[2m[36m(_objective pid=39398)[0m 100%|██████████| 130/130 [00:09<00:00, 12.02it/s][A
                                                 [A
 75%|███████▌  | 18662/24880 [08:52<1:24:38,  1.22it/s]
 75%|███████▌  | 18666/24880 [08:52<1:00:37,  1.71it/s]
 75%|███████▌  | 18670/24880 [08:52<43:35,  2.37it/s]  
 75%|███████▌  | 18674/24880 [08:53<31:32,  3.28it/s]
 75%|███████▌  | 18678/24880 [08:53<23:01,  4.49it/s]
 75%|███████▌  | 18682/24880 [08:53<17:03,  6.06it/s]
 75%|███████▌  | 18686/24880 [08:53<12:47,  8.07it/s]
 75%|███████▌  | 18690/24880 [08:53<09:54, 10.41it/s]
 75%|███████▌  | 18694/24880 [08:53<07:47, 13.23it/s]
 75%|███████▌  | 18698/24880 [08:53<06:18, 16.33it/s]
 75%|███████▌  | 18702/24880 [08:53<05:19, 19.34it/s]
 75%|███████▌  | 1870

[2m[36m(_objective pid=39398)[0m {'loss': 0.1016, 'learning_rate': 1.1078168009543292e-05, 'epoch': 3.05}


[2m[36m(_objective pid=39398)[0m  76%|███████▋  | 19000/24880 [09:01<02:26, 40.07it/s]                                                      76%|███████▋  | 19000/24880 [09:01<02:26, 40.07it/s]
 76%|███████▋  | 19005/24880 [09:01<02:30, 39.09it/s]
 76%|███████▋  | 19009/24880 [09:01<02:32, 38.41it/s]
 76%|███████▋  | 19013/24880 [09:01<02:32, 38.38it/s]
 76%|███████▋  | 19017/24880 [09:02<02:33, 38.12it/s]
 76%|███████▋  | 19021/24880 [09:02<02:34, 38.00it/s]
 76%|███████▋  | 19025/24880 [09:02<02:33, 38.17it/s]
 76%|███████▋  | 19029/24880 [09:02<02:33, 38.07it/s]
 76%|███████▋  | 19033/24880 [09:02<02:32, 38.46it/s]
 77%|███████▋  | 19037/24880 [09:02<02:31, 38.46it/s]
 77%|███████▋  | 19041/24880 [09:02<02:30, 38.83it/s]
 77%|███████▋  | 19045/24880 [09:02<02:30, 38.89it/s]
 77%|███████▋  | 19050/24880 [09:02<02:28, 39.34it/s]
 77%|███████▋  | 19054/24880 [09:03<02:30, 38.59it/s]
 77%|███████▋  | 19059/24880 [09:03<02:28, 39.10it/s]
 77%|███████▋  | 19063/24880 [09:03<02:29, 3

Trial status: 13 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-11 14:57:57. Total running time: 1hr 37min 36s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00013   RUNNING          4.6875e-05                     4                

 77%|███████▋  | 19210/24880 [09:07<02:31, 37.50it/s]
 77%|███████▋  | 19214/24880 [09:07<02:33, 36.81it/s]
 77%|███████▋  | 19218/24880 [09:07<02:31, 37.27it/s]
 77%|███████▋  | 19222/24880 [09:07<02:31, 37.43it/s]
 77%|███████▋  | 19226/24880 [09:07<02:28, 37.98it/s]
 77%|███████▋  | 19230/24880 [09:07<02:26, 38.45it/s]
 77%|███████▋  | 19235/24880 [09:07<02:24, 38.99it/s]
 77%|███████▋  | 19239/24880 [09:07<02:27, 38.28it/s]
 77%|███████▋  | 19243/24880 [09:07<02:25, 38.62it/s]
 77%|███████▋  | 19247/24880 [09:08<02:27, 38.20it/s]
 77%|███████▋  | 19251/24880 [09:08<02:25, 38.60it/s]
 77%|███████▋  | 19255/24880 [09:08<02:24, 38.88it/s]
 77%|███████▋  | 19259/24880 [09:08<02:26, 38.25it/s]
 77%|███████▋  | 19263/24880 [09:08<02:26, 38.42it/s]
 77%|███████▋  | 19267/24880 [09:08<02:25, 38.56it/s]
 77%|███████▋  | 19271/24880 [09:08<02:31, 36.99it/s]
 77%|███████▋  | 19275/24880 [09:08<02:32, 36.64it/s]
 77%|███████▋  | 19279/24880 [09:08<02:32, 36.72it/s]
 78%|███████▊  | 19283/24880

[2m[36m(_objective pid=39398)[0m {'loss': 0.0966, 'learning_rate': 1.0136146920296413e-05, 'epoch': 3.14}


[2m[36m(_objective pid=39398)[0m                                                       78%|███████▊  | 19500/24880 [09:14<02:28, 36.23it/s] 78%|███████▊  | 19501/24880 [09:14<02:26, 36.70it/s]
 78%|███████▊  | 19505/24880 [09:15<02:26, 36.78it/s]
 78%|███████▊  | 19510/24880 [09:15<02:21, 37.94it/s]
 78%|███████▊  | 19514/24880 [09:15<02:20, 38.09it/s]
 78%|███████▊  | 19518/24880 [09:15<02:21, 37.87it/s]
 78%|███████▊  | 19522/24880 [09:15<02:20, 38.05it/s]
 78%|███████▊  | 19526/24880 [09:15<02:20, 38.19it/s]
 78%|███████▊  | 19530/24880 [09:15<02:20, 38.18it/s]
 79%|███████▊  | 19534/24880 [09:15<02:21, 37.84it/s]
 79%|███████▊  | 19539/24880 [09:15<02:19, 38.25it/s]
 79%|███████▊  | 19544/24880 [09:16<02:16, 39.02it/s]
 79%|███████▊  | 19548/24880 [09:16<02:15, 39.22it/s]
 79%|███████▊  | 19552/24880 [09:16<02:18, 38.54it/s]
 79%|███████▊  | 19556/24880 [09:16<02:20, 37.87it/s]
 79%|███████▊  | 19560/24880 [09:16<02:21, 37.73it/s]
 79%|███████▊  | 19564/24880 [09:16<02:27, 3

[2m[36m(_objective pid=39398)[0m {'loss': 0.1232, 'learning_rate': 9.194125831049536e-06, 'epoch': 3.22}


[2m[36m(_objective pid=39398)[0m                                                       80%|████████  | 20000/24880 [09:28<02:09, 37.54it/s] 80%|████████  | 20002/24880 [09:28<02:10, 37.37it/s]
 80%|████████  | 20006/24880 [09:28<02:08, 37.79it/s]
 80%|████████  | 20011/24880 [09:28<02:10, 37.27it/s]
 80%|████████  | 20015/24880 [09:28<02:15, 36.04it/s]
 80%|████████  | 20019/24880 [09:28<02:12, 36.78it/s]
 80%|████████  | 20023/24880 [09:28<02:11, 37.04it/s]
 80%|████████  | 20027/24880 [09:28<02:08, 37.86it/s]
 81%|████████  | 20031/24880 [09:28<02:13, 36.38it/s]
 81%|████████  | 20035/24880 [09:29<02:10, 37.24it/s]
 81%|████████  | 20039/24880 [09:29<02:08, 37.62it/s]
 81%|████████  | 20043/24880 [09:29<02:07, 38.00it/s]
 81%|████████  | 20047/24880 [09:29<02:06, 38.30it/s]
 81%|████████  | 20051/24880 [09:29<02:04, 38.77it/s]
 81%|████████  | 20055/24880 [09:29<02:11, 36.70it/s]
 81%|████████  | 20059/24880 [09:29<02:10, 36.97it/s]
 81%|████████  | 20063/24880 [09:29<02:10, 3

Trial status: 13 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-11 14:58:27. Total running time: 1hr 38min 6s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00013   RUNNING          4.6875e-05                     4                 

[2m[36m(_objective pid=39398)[0m  82%|████████▏ | 20336/24880 [09:37<02:01, 37.34it/s]
 82%|████████▏ | 20340/24880 [09:37<02:00, 37.65it/s]
 82%|████████▏ | 20344/24880 [09:37<01:59, 37.87it/s]
 82%|████████▏ | 20348/24880 [09:37<01:59, 37.87it/s]
 82%|████████▏ | 20352/24880 [09:37<02:01, 37.12it/s]
 82%|████████▏ | 20356/24880 [09:37<02:02, 36.99it/s]
 82%|████████▏ | 20360/24880 [09:37<02:01, 37.30it/s]
 82%|████████▏ | 20364/24880 [09:37<02:01, 37.19it/s]
 82%|████████▏ | 20368/24880 [09:37<02:02, 36.93it/s]
 82%|████████▏ | 20372/24880 [09:37<02:00, 37.46it/s]
 82%|████████▏ | 20377/24880 [09:38<01:57, 38.41it/s]
 82%|████████▏ | 20381/24880 [09:38<01:58, 38.05it/s]
 82%|████████▏ | 20385/24880 [09:38<02:04, 35.98it/s]
 82%|████████▏ | 20389/24880 [09:38<02:03, 36.32it/s]
 82%|████████▏ | 20393/24880 [09:38<02:00, 37.15it/s]
 82%|████████▏ | 20397/24880 [09:38<02:02, 36.58it/s]
 82%|████████▏ | 20401/24880 [09:38<02:00, 37.29it/s]
 82%|████████▏ | 20405/24880 [09:38<01:59, 37

[2m[36m(_objective pid=39398)[0m {'loss': 0.1046, 'learning_rate': 8.252104741802656e-06, 'epoch': 3.3}


[2m[36m(_objective pid=39398)[0m  82%|████████▏ | 20499/24880 [09:41<01:53, 38.72it/s]                                                      82%|████████▏ | 20500/24880 [09:41<01:53, 38.72it/s]
 82%|████████▏ | 20503/24880 [09:41<01:53, 38.70it/s]
 82%|████████▏ | 20507/24880 [09:41<01:53, 38.40it/s]
 82%|████████▏ | 20511/24880 [09:41<01:57, 37.17it/s]
 82%|████████▏ | 20515/24880 [09:41<02:03, 35.41it/s]
 82%|████████▏ | 20519/24880 [09:41<02:01, 35.99it/s]
 82%|████████▏ | 20523/24880 [09:42<01:58, 36.86it/s]
 83%|████████▎ | 20527/24880 [09:42<01:57, 36.91it/s]
 83%|████████▎ | 20531/24880 [09:42<01:58, 36.73it/s]
 83%|████████▎ | 20535/24880 [09:42<01:59, 36.38it/s]
 83%|████████▎ | 20539/24880 [09:42<01:57, 36.86it/s]
 83%|████████▎ | 20543/24880 [09:42<01:57, 36.76it/s]
 83%|████████▎ | 20547/24880 [09:42<01:58, 36.60it/s]
 83%|████████▎ | 20551/24880 [09:42<02:02, 35.29it/s]
 83%|████████▎ | 20555/24880 [09:42<01:59, 36.12it/s]
 83%|████████▎ | 20559/24880 [09:43<02:02, 3

[2m[36m(_objective pid=39398)[0m {'loss': 0.1207, 'learning_rate': 7.310083652555777e-06, 'epoch': 3.38}


[2m[36m(_objective pid=39398)[0m  84%|████████▍ | 21002/24880 [09:54<01:45, 36.77it/s]
 84%|████████▍ | 21006/24880 [09:55<01:48, 35.74it/s]
 84%|████████▍ | 21010/24880 [09:55<01:48, 35.75it/s]
 84%|████████▍ | 21014/24880 [09:55<01:46, 36.23it/s]
 84%|████████▍ | 21018/24880 [09:55<01:44, 36.83it/s]
 84%|████████▍ | 21022/24880 [09:55<01:43, 37.36it/s]
 85%|████████▍ | 21026/24880 [09:55<01:45, 36.68it/s]
 85%|████████▍ | 21030/24880 [09:55<01:48, 35.49it/s]
 85%|████████▍ | 21034/24880 [09:55<01:46, 36.11it/s]
 85%|████████▍ | 21038/24880 [09:55<01:45, 36.31it/s]
 85%|████████▍ | 21042/24880 [09:56<01:44, 36.60it/s]
 85%|████████▍ | 21046/24880 [09:56<01:44, 36.70it/s]
 85%|████████▍ | 21050/24880 [09:56<01:42, 37.21it/s]
 85%|████████▍ | 21054/24880 [09:56<01:41, 37.72it/s]
 85%|████████▍ | 21058/24880 [09:56<01:41, 37.62it/s]
 85%|████████▍ | 21062/24880 [09:56<01:48, 35.26it/s]
 85%|████████▍ | 21066/24880 [09:56<01:48, 35.12it/s]
 85%|████████▍ | 21070/24880 [09:56<01:49, 34

Trial status: 13 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-11 14:58:57. Total running time: 1hr 38min 36s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00013   RUNNING          4.6875e-05                     4                

[2m[36m(_objective pid=39398)[0m  86%|████████▋ | 21461/24880 [10:07<01:30, 37.86it/s]
 86%|████████▋ | 21465/24880 [10:07<01:29, 38.05it/s]
 86%|████████▋ | 21469/24880 [10:07<01:29, 38.17it/s]
 86%|████████▋ | 21473/24880 [10:07<01:28, 38.60it/s]
 86%|████████▋ | 21478/24880 [10:07<01:26, 39.32it/s]
 86%|████████▋ | 21482/24880 [10:07<01:26, 39.49it/s]
 86%|████████▋ | 21487/24880 [10:07<01:25, 39.70it/s]
 86%|████████▋ | 21491/24880 [10:07<01:25, 39.75it/s]
 86%|████████▋ | 21495/24880 [10:08<01:25, 39.80it/s]


[2m[36m(_objective pid=39398)[0m {'loss': 0.0837, 'learning_rate': 6.368062563308899e-06, 'epoch': 3.46}


[2m[36m(_objective pid=39398)[0m  86%|████████▋ | 21500/24880 [10:08<01:24, 39.98it/s]                                                      86%|████████▋ | 21500/24880 [10:08<01:24, 39.98it/s]
 86%|████████▋ | 21504/24880 [10:08<01:32, 36.31it/s]
 86%|████████▋ | 21508/24880 [10:08<01:34, 35.77it/s]
 86%|████████▋ | 21512/24880 [10:08<01:31, 36.75it/s]
 86%|████████▋ | 21516/24880 [10:08<01:33, 35.93it/s]
 86%|████████▋ | 21520/24880 [10:08<01:34, 35.39it/s]
 87%|████████▋ | 21524/24880 [10:08<01:31, 36.60it/s]
 87%|████████▋ | 21529/24880 [10:08<01:28, 37.79it/s]
 87%|████████▋ | 21534/24880 [10:09<01:27, 38.05it/s]
 87%|████████▋ | 21538/24880 [10:09<01:26, 38.51it/s]
 87%|████████▋ | 21542/24880 [10:09<01:25, 38.82it/s]
 87%|████████▋ | 21547/24880 [10:09<01:24, 39.45it/s]
 87%|████████▋ | 21552/24880 [10:09<01:22, 40.11it/s]
 87%|████████▋ | 21557/24880 [10:09<01:24, 39.37it/s]
 87%|████████▋ | 21561/24880 [10:09<01:24, 39.11it/s]
 87%|████████▋ | 21565/24880 [10:09<01:26, 3

[2m[36m(_objective pid=39398)[0m {'loss': 0.0992, 'learning_rate': 5.42604147406202e-06, 'epoch': 3.54}


 88%|████████▊ | 22000/24880 [10:21<01:17, 37.07it/s]
 88%|████████▊ | 22004/24880 [10:21<01:19, 36.20it/s]
 88%|████████▊ | 22008/24880 [10:21<01:19, 36.21it/s]
 88%|████████▊ | 22012/24880 [10:21<01:20, 35.66it/s]
 88%|████████▊ | 22016/24880 [10:22<01:18, 36.49it/s]
 89%|████████▊ | 22020/24880 [10:22<01:16, 37.21it/s]
 89%|████████▊ | 22024/24880 [10:22<01:16, 37.28it/s]
 89%|████████▊ | 22029/24880 [10:22<01:14, 38.20it/s]
 89%|████████▊ | 22033/24880 [10:22<01:15, 37.95it/s]
 89%|████████▊ | 22038/24880 [10:22<01:13, 38.61it/s]
 89%|████████▊ | 22042/24880 [10:22<01:13, 38.38it/s]
 89%|████████▊ | 22046/24880 [10:22<01:13, 38.80it/s]
 89%|████████▊ | 22050/24880 [10:22<01:12, 38.91it/s]
 89%|████████▊ | 22054/24880 [10:23<01:15, 37.56it/s]
 89%|████████▊ | 22058/24880 [10:23<01:18, 36.03it/s]
 89%|████████▊ | 22062/24880 [10:23<01:16, 36.93it/s]
 89%|████████▊ | 22066/24880 [10:23<01:14, 37.65it/s]
 89%|████████▊ | 22071/24880 [10:23<01:13, 38.27it/s]
 89%|████████▊ | 22076/24880

[2m[36m(_objective pid=39398)[0m {'loss': 0.0994, 'learning_rate': 4.484020384815142e-06, 'epoch': 3.62}


[2m[36m(_objective pid=39398)[0m  90%|█████████ | 22504/24880 [10:35<01:07, 35.15it/s]
 90%|█████████ | 22508/24880 [10:35<01:07, 35.22it/s]
 90%|█████████ | 22512/24880 [10:35<01:09, 34.15it/s]
 90%|█████████ | 22516/24880 [10:35<01:07, 34.92it/s]
 91%|█████████ | 22520/24880 [10:35<01:06, 35.23it/s]
 91%|█████████ | 22524/24880 [10:35<01:05, 35.73it/s]
 91%|█████████ | 22528/24880 [10:36<01:05, 35.74it/s]
 91%|█████████ | 22532/24880 [10:36<01:05, 35.80it/s]
 91%|█████████ | 22536/24880 [10:36<01:05, 35.99it/s]
 91%|█████████ | 22540/24880 [10:36<01:05, 35.66it/s]
 91%|█████████ | 22544/24880 [10:36<01:06, 35.22it/s]
 91%|█████████ | 22548/24880 [10:36<01:05, 35.64it/s]
 91%|█████████ | 22552/24880 [10:36<01:04, 36.00it/s]
 91%|█████████ | 22556/24880 [10:36<01:05, 35.74it/s]
 91%|█████████ | 22560/24880 [10:36<01:05, 35.17it/s]
 91%|█████████ | 22564/24880 [10:37<01:07, 34.36it/s]


Trial status: 13 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-11 14:59:27. Total running time: 1hr 39min 6s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00013   RUNNING          4.6875e-05                     4                 

[2m[36m(_objective pid=39398)[0m  91%|█████████ | 22568/24880 [10:37<01:05, 35.08it/s]
 91%|█████████ | 22572/24880 [10:37<01:05, 35.40it/s]
 91%|█████████ | 22576/24880 [10:37<01:04, 35.53it/s]
 91%|█████████ | 22580/24880 [10:37<01:04, 35.55it/s]
 91%|█████████ | 22584/24880 [10:37<01:03, 35.88it/s]
 91%|█████████ | 22588/24880 [10:37<01:03, 35.96it/s]
 91%|█████████ | 22592/24880 [10:37<01:03, 35.94it/s]
 91%|█████████ | 22596/24880 [10:37<01:03, 36.06it/s]
 91%|█████████ | 22600/24880 [10:38<01:02, 36.26it/s]
 91%|█████████ | 22604/24880 [10:38<01:04, 35.46it/s]
 91%|█████████ | 22608/24880 [10:38<01:04, 35.44it/s]
 91%|█████████ | 22612/24880 [10:38<01:03, 35.63it/s]
 91%|█████████ | 22616/24880 [10:38<01:02, 36.14it/s]
 91%|█████████ | 22620/24880 [10:38<01:02, 36.38it/s]
 91%|█████████ | 22624/24880 [10:38<01:01, 36.79it/s]
 91%|█████████ | 22628/24880 [10:38<01:01, 36.69it/s]
 91%|█████████ | 22632/24880 [10:38<01:01, 36.80it/s]
 91%|█████████ | 22636/24880 [10:39<01:01, 36

[2m[36m(_objective pid=39398)[0m {'loss': 0.1001, 'learning_rate': 3.541999295568263e-06, 'epoch': 3.7}


[2m[36m(_objective pid=39398)[0m  92%|█████████▏| 23004/24880 [10:49<00:51, 36.52it/s]
 92%|█████████▏| 23008/24880 [10:49<00:50, 36.85it/s]
 92%|█████████▏| 23012/24880 [10:49<00:51, 36.61it/s]
 93%|█████████▎| 23016/24880 [10:49<00:52, 35.56it/s]
 93%|█████████▎| 23020/24880 [10:49<00:52, 35.75it/s]
 93%|█████████▎| 23024/24880 [10:49<00:50, 36.42it/s]
 93%|█████████▎| 23028/24880 [10:49<00:50, 36.68it/s]
 93%|█████████▎| 23032/24880 [10:50<00:50, 36.56it/s]
 93%|█████████▎| 23036/24880 [10:50<00:52, 35.15it/s]
 93%|█████████▎| 23040/24880 [10:50<00:51, 36.08it/s]
 93%|█████████▎| 23044/24880 [10:50<00:51, 36.00it/s]
 93%|█████████▎| 23048/24880 [10:50<00:50, 36.61it/s]
 93%|█████████▎| 23052/24880 [10:50<00:49, 37.05it/s]
 93%|█████████▎| 23056/24880 [10:50<00:48, 37.44it/s]
 93%|█████████▎| 23060/24880 [10:50<00:48, 37.36it/s]
 93%|█████████▎| 23064/24880 [10:50<00:48, 37.77it/s]
 93%|█████████▎| 23068/24880 [10:50<00:47, 37.85it/s]
 93%|█████████▎| 23072/24880 [10:51<00:47, 38

[2m[36m(_objective pid=39398)[0m {'loss': 0.0765, 'learning_rate': 2.599978206321385e-06, 'epoch': 3.78}


[2m[36m(_objective pid=39398)[0m  94%|█████████▍| 23505/24880 [11:03<00:40, 33.67it/s]
 94%|█████████▍| 23509/24880 [11:03<00:41, 32.97it/s]
 95%|█████████▍| 23513/24880 [11:03<00:40, 33.49it/s]
 95%|█████████▍| 23517/24880 [11:03<00:40, 33.25it/s]
 95%|█████████▍| 23521/24880 [11:03<00:39, 34.08it/s]
 95%|█████████▍| 23525/24880 [11:03<00:39, 33.96it/s]
 95%|█████████▍| 23529/24880 [11:03<00:38, 34.78it/s]
 95%|█████████▍| 23533/24880 [11:03<00:38, 34.64it/s]
 95%|█████████▍| 23537/24880 [11:03<00:38, 34.70it/s]
 95%|█████████▍| 23541/24880 [11:04<00:38, 34.54it/s]
 95%|█████████▍| 23545/24880 [11:04<00:39, 33.50it/s]
 95%|█████████▍| 23549/24880 [11:04<00:39, 33.90it/s]
 95%|█████████▍| 23553/24880 [11:04<00:38, 34.67it/s]
 95%|█████████▍| 23557/24880 [11:04<00:38, 34.52it/s]
 95%|█████████▍| 23561/24880 [11:04<00:37, 34.81it/s]
 95%|█████████▍| 23565/24880 [11:04<00:36, 35.86it/s]
 95%|█████████▍| 23569/24880 [11:04<00:36, 35.95it/s]
 95%|█████████▍| 23573/24880 [11:04<00:35, 36

Trial status: 13 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-11 14:59:57. Total running time: 1hr 39min 36s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00013   RUNNING          4.6875e-05                     4                

[2m[36m(_objective pid=39398)[0m  95%|█████████▌| 23657/24880 [11:07<00:32, 37.36it/s]
 95%|█████████▌| 23661/24880 [11:07<00:32, 37.12it/s]
 95%|█████████▌| 23665/24880 [11:07<00:32, 37.36it/s]
 95%|█████████▌| 23669/24880 [11:07<00:32, 37.44it/s]
 95%|█████████▌| 23673/24880 [11:07<00:32, 36.80it/s]
 95%|█████████▌| 23677/24880 [11:07<00:33, 36.07it/s]
 95%|█████████▌| 23681/24880 [11:07<00:34, 35.08it/s]
 95%|█████████▌| 23685/24880 [11:07<00:33, 35.54it/s]
 95%|█████████▌| 23689/24880 [11:08<00:32, 36.52it/s]
 95%|█████████▌| 23693/24880 [11:08<00:32, 36.90it/s]
 95%|█████████▌| 23697/24880 [11:08<00:31, 37.01it/s]
 95%|█████████▌| 23701/24880 [11:08<00:32, 36.03it/s]
 95%|█████████▌| 23705/24880 [11:08<00:32, 35.81it/s]
 95%|█████████▌| 23709/24880 [11:08<00:32, 35.87it/s]
 95%|█████████▌| 23713/24880 [11:08<00:31, 36.83it/s]
 95%|█████████▌| 23717/24880 [11:08<00:32, 36.23it/s]
 95%|█████████▌| 23721/24880 [11:08<00:33, 34.99it/s]
 95%|█████████▌| 23725/24880 [11:09<00:32, 35

[2m[36m(_objective pid=39398)[0m {'loss': 0.0989, 'learning_rate': 1.6579571170745063e-06, 'epoch': 3.86}


[2m[36m(_objective pid=39398)[0m  96%|█████████▋| 24006/24880 [11:16<00:23, 37.02it/s]
 97%|█████████▋| 24010/24880 [11:16<00:23, 37.16it/s]
 97%|█████████▋| 24014/24880 [11:16<00:23, 36.85it/s]
 97%|█████████▋| 24018/24880 [11:17<00:24, 35.39it/s]
 97%|█████████▋| 24022/24880 [11:17<00:23, 36.40it/s]
 97%|█████████▋| 24026/24880 [11:17<00:23, 37.05it/s]
 97%|█████████▋| 24030/24880 [11:17<00:22, 37.73it/s]
 97%|█████████▋| 24034/24880 [11:17<00:22, 37.59it/s]
 97%|█████████▋| 24038/24880 [11:17<00:22, 37.91it/s]
 97%|█████████▋| 24042/24880 [11:17<00:22, 37.05it/s]
 97%|█████████▋| 24046/24880 [11:17<00:22, 36.99it/s]
 97%|█████████▋| 24050/24880 [11:17<00:23, 34.86it/s]
 97%|█████████▋| 24054/24880 [11:18<00:23, 35.08it/s]
 97%|█████████▋| 24058/24880 [11:18<00:23, 35.58it/s]
 97%|█████████▋| 24062/24880 [11:18<00:23, 34.60it/s]
 97%|█████████▋| 24066/24880 [11:18<00:23, 35.32it/s]
 97%|█████████▋| 24070/24880 [11:18<00:23, 34.14it/s]
 97%|█████████▋| 24074/24880 [11:18<00:23, 34

[2m[36m(_objective pid=39398)[0m {'loss': 0.1099, 'learning_rate': 7.159360278276278e-07, 'epoch': 3.94}


[2m[36m(_objective pid=39398)[0m                                                       98%|█████████▊| 24500/24880 [11:30<00:10, 36.90it/s] 98%|█████████▊| 24503/24880 [11:30<00:10, 37.41it/s]
 99%|█████████▊| 24507/24880 [11:30<00:10, 36.28it/s]
 99%|█████████▊| 24511/24880 [11:30<00:10, 36.75it/s]
 99%|█████████▊| 24515/24880 [11:31<00:09, 37.26it/s]
 99%|█████████▊| 24519/24880 [11:31<00:09, 37.87it/s]
 99%|█████████▊| 24523/24880 [11:31<00:09, 37.37it/s]
 99%|█████████▊| 24527/24880 [11:31<00:09, 36.64it/s]
 99%|█████████▊| 24531/24880 [11:31<00:09, 36.30it/s]
 99%|█████████▊| 24535/24880 [11:31<00:09, 36.29it/s]
 99%|█████████▊| 24539/24880 [11:31<00:09, 36.42it/s]
 99%|█████████▊| 24543/24880 [11:31<00:09, 35.25it/s]
 99%|█████████▊| 24547/24880 [11:31<00:09, 35.13it/s]
 99%|█████████▊| 24551/24880 [11:32<00:09, 36.23it/s]
 99%|█████████▊| 24555/24880 [11:32<00:08, 36.84it/s]
 99%|█████████▊| 24560/24880 [11:32<00:08, 38.00it/s]
 99%|█████████▊| 24564/24880 [11:32<00:08, 3

Trial status: 13 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-11 15:00:27. Total running time: 1hr 40min 6s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00013   RUNNING          4.6875e-05                     4                 

[2m[36m(_objective pid=39398)[0m  99%|█████████▉| 24744/24880 [11:37<00:03, 35.58it/s]
 99%|█████████▉| 24748/24880 [11:37<00:03, 36.56it/s]
 99%|█████████▉| 24752/24880 [11:37<00:03, 35.32it/s]
100%|█████████▉| 24756/24880 [11:37<00:03, 36.16it/s]
100%|█████████▉| 24761/24880 [11:37<00:03, 37.51it/s]
100%|█████████▉| 24765/24880 [11:37<00:03, 38.03it/s]
100%|█████████▉| 24769/24880 [11:38<00:03, 36.75it/s]
100%|█████████▉| 24774/24880 [11:38<00:02, 37.77it/s]
100%|█████████▉| 24778/24880 [11:38<00:02, 38.29it/s]
100%|█████████▉| 24782/24880 [11:38<00:02, 38.65it/s]
100%|█████████▉| 24786/24880 [11:38<00:02, 38.48it/s]
100%|█████████▉| 24790/24880 [11:38<00:02, 38.33it/s]
100%|█████████▉| 24795/24880 [11:38<00:02, 38.98it/s]
100%|█████████▉| 24799/24880 [11:38<00:02, 39.03it/s]
100%|█████████▉| 24803/24880 [11:38<00:01, 39.00it/s]
100%|█████████▉| 24807/24880 [11:38<00:01, 39.15it/s]
100%|█████████▉| 24812/24880 [11:39<00:01, 39.75it/s]
100%|█████████▉| 24817/24880 [11:39<00:01, 39

Trial _objective_f556c_00013 finished iteration 4 at 2023-09-11 15:00:40. Total running time: 1hr 40min 19s
+-------------------------------------------------+
| Trial _objective_f556c_00013 result             |
+-------------------------------------------------+
| time_this_iter_s                         179.11 |
| time_total_s                            712.847 |
| training_iteration                            4 |
| epoch                                         4 |
| eval_loss                               0.24927 |
| eval_runtime                             9.5344 |
| eval_samples_per_second                 434.952 |
| eval_steps_per_second                    13.635 |
| objective                               0.24927 |
+-------------------------------------------------+

[2m[36m(_objective pid=39398)[0m {'eval_loss': 0.2492665797472, 'eval_runtime': 9.5344, 'eval_samples_per_second': 434.952, 'eval_steps_per_second': 13.635, 'epoch': 4.0}


[2m[36m(_objective pid=39398)[0m                                                      
[2m[36m(_objective pid=39398)[0m                                                  [A100%|██████████| 24880/24880 [11:50<00:00, 37.41it/s]
[2m[36m(_objective pid=39398)[0m 100%|██████████| 130/130 [00:09<00:00, 12.06it/s][A
[2m[36m(_objective pid=39398)[0m                                                  [A


[2m[36m(_objective pid=39398)[0m {'train_runtime': 711.9968, 'train_samples_per_second': 69.882, 'train_steps_per_second': 34.944, 'train_loss': 0.2035555917733735, 'epoch': 4.0}
Trial _objective_f556c_00013 completed after 4 iterations at 2023-09-11 15:00:42. Total running time: 1hr 40min 21s



[2m[36m(_objective pid=39398)[0m                                                      100%|██████████| 24880/24880 [11:51<00:00, 37.41it/s]100%|██████████| 24880/24880 [11:51<00:00, 34.94it/s]


Trial _objective_f556c_00014 started with configuration:
+-------------------------------------------------+
| Trial _objective_f556c_00014 config             |
+-------------------------------------------------+
| adam_epsilon                                  0 |
| learning_rate                             4e-05 |
| num_train_epochs                              5 |
| per_device_eval_batch_size                   32 |
| per_device_train_batch_size                   4 |
| weight_decay                            0.10703 |
+-------------------------------------------------+



[2m[36m(_objective pid=42509)[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight']
[2m[36m(_objective pid=42509)[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
[2m[36m(_objective pid=42509)[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[2m[36m(_objective pid=42509)[0m Some weights of DistilBertForSequenceClassification were not initialized from the model che

Trial status: 14 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-11 15:00:57. Total running time: 1hr 40min 36s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00014   RUNNING          3.98928e-05                    5                

[2m[36m(_objective pid=42509)[0m   1%|▏         | 213/15550 [00:06<08:32, 29.91it/s]
  1%|▏         | 217/15550 [00:06<08:01, 31.87it/s]
  1%|▏         | 221/15550 [00:06<07:31, 33.93it/s]
  1%|▏         | 225/15550 [00:06<08:23, 30.44it/s]
  1%|▏         | 229/15550 [00:07<07:49, 32.63it/s]
  1%|▏         | 233/15550 [00:07<07:33, 33.81it/s]
  2%|▏         | 237/15550 [00:07<07:31, 33.95it/s]
  2%|▏         | 241/15550 [00:07<07:23, 34.53it/s]
  2%|▏         | 245/15550 [00:07<07:36, 33.52it/s]
  2%|▏         | 249/15550 [00:07<08:19, 30.66it/s]
  2%|▏         | 253/15550 [00:07<08:03, 31.64it/s]
  2%|▏         | 257/15550 [00:07<08:12, 31.05it/s]
  2%|▏         | 261/15550 [00:08<07:42, 33.03it/s]
  2%|▏         | 265/15550 [00:08<07:25, 34.32it/s]
  2%|▏         | 269/15550 [00:08<07:06, 35.82it/s]
  2%|▏         | 273/15550 [00:08<07:24, 34.39it/s]
  2%|▏         | 277/15550 [00:08<07:10, 35.45it/s]
  2%|▏         | 281/15550 [00:08<07:16, 34.96it/s]
  2%|▏         | 285/15550 

[2m[36m(_objective pid=42509)[0m {'loss': 0.4006, 'learning_rate': 3.861007757573655e-05, 'epoch': 0.16}


[2m[36m(_objective pid=42509)[0m   3%|▎         | 506/15550 [00:15<07:43, 32.48it/s]
  3%|▎         | 510/15550 [00:15<07:45, 32.33it/s]
  3%|▎         | 514/15550 [00:15<07:31, 33.30it/s]
  3%|▎         | 518/15550 [00:15<07:13, 34.68it/s]
  3%|▎         | 522/15550 [00:15<06:56, 36.10it/s]
  3%|▎         | 526/15550 [00:15<06:50, 36.60it/s]
  3%|▎         | 530/15550 [00:15<06:42, 37.32it/s]
  3%|▎         | 534/15550 [00:16<07:15, 34.49it/s]
  3%|▎         | 538/15550 [00:16<07:39, 32.69it/s]
  3%|▎         | 542/15550 [00:16<07:15, 34.46it/s]
  4%|▎         | 546/15550 [00:16<07:00, 35.67it/s]
  4%|▎         | 550/15550 [00:16<06:50, 36.50it/s]
  4%|▎         | 554/15550 [00:16<06:51, 36.44it/s]
  4%|▎         | 558/15550 [00:16<06:48, 36.73it/s]
  4%|▎         | 562/15550 [00:16<06:44, 37.06it/s]
  4%|▎         | 566/15550 [00:16<06:42, 37.19it/s]
  4%|▎         | 570/15550 [00:17<06:45, 36.91it/s]
  4%|▎         | 575/15550 [00:17<06:30, 38.31it/s]
  4%|▎         | 579/15550 

[2m[36m(_objective pid=42509)[0m {'loss': 0.3216, 'learning_rate': 3.732735074597786e-05, 'epoch': 0.32}


[2m[36m(_objective pid=42509)[0m   6%|▋         | 1009/15550 [00:29<06:15, 38.75it/s]
  7%|▋         | 1013/15550 [00:29<06:14, 38.81it/s]
  7%|▋         | 1017/15550 [00:30<06:16, 38.62it/s]
  7%|▋         | 1021/15550 [00:30<06:19, 38.31it/s]
  7%|▋         | 1025/15550 [00:30<06:38, 36.41it/s]
  7%|▋         | 1029/15550 [00:30<06:34, 36.84it/s]
  7%|▋         | 1033/15550 [00:30<06:29, 37.27it/s]
  7%|▋         | 1037/15550 [00:30<06:42, 36.07it/s]
  7%|▋         | 1041/15550 [00:30<06:34, 36.79it/s]
  7%|▋         | 1045/15550 [00:30<06:40, 36.20it/s]
  7%|▋         | 1049/15550 [00:30<06:35, 36.68it/s]
  7%|▋         | 1053/15550 [00:31<07:00, 34.47it/s]
  7%|▋         | 1057/15550 [00:31<06:49, 35.37it/s]
  7%|▋         | 1061/15550 [00:31<06:51, 35.22it/s]
  7%|▋         | 1065/15550 [00:31<06:47, 35.55it/s]
  7%|▋         | 1069/15550 [00:31<06:37, 36.44it/s]
  7%|▋         | 1073/15550 [00:31<06:30, 37.04it/s]
  7%|▋         | 1077/15550 [00:31<06:27, 37.39it/s]
  7%|▋   

Trial status: 14 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-11 15:01:27. Total running time: 1hr 41min 6s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00014   RUNNING          3.98928e-05                    5                 

[2m[36m(_objective pid=42509)[0m   8%|▊         | 1233/15550 [00:36<07:25, 32.11it/s]
  8%|▊         | 1237/15550 [00:36<08:08, 29.28it/s]
  8%|▊         | 1241/15550 [00:36<07:42, 30.92it/s]
  8%|▊         | 1245/15550 [00:36<08:05, 29.47it/s]
  8%|▊         | 1249/15550 [00:36<07:39, 31.09it/s]
  8%|▊         | 1253/15550 [00:37<07:23, 32.22it/s]
  8%|▊         | 1257/15550 [00:37<07:14, 32.92it/s]
  8%|▊         | 1261/15550 [00:37<06:56, 34.27it/s]
  8%|▊         | 1265/15550 [00:37<06:52, 34.64it/s]
  8%|▊         | 1269/15550 [00:37<06:47, 35.08it/s]
  8%|▊         | 1273/15550 [00:37<06:56, 34.27it/s]
  8%|▊         | 1277/15550 [00:37<07:00, 33.94it/s]
  8%|▊         | 1281/15550 [00:37<07:00, 33.92it/s]
  8%|▊         | 1285/15550 [00:37<06:53, 34.52it/s]
  8%|▊         | 1289/15550 [00:38<06:42, 35.47it/s]
  8%|▊         | 1293/15550 [00:38<07:17, 32.59it/s]
  8%|▊         | 1297/15550 [00:38<07:03, 33.63it/s]
  8%|▊         | 1301/15550 [00:38<07:22, 32.21it/s]
  8%|▊   

[2m[36m(_objective pid=42509)[0m {'loss': 0.3232, 'learning_rate': 3.6044623916219174e-05, 'epoch': 0.48}


[2m[36m(_objective pid=42509)[0m  10%|▉         | 1508/15550 [00:44<06:16, 37.28it/s]
 10%|▉         | 1512/15550 [00:44<06:16, 37.30it/s]
 10%|▉         | 1516/15550 [00:44<06:14, 37.52it/s]
 10%|▉         | 1520/15550 [00:44<06:13, 37.59it/s]
 10%|▉         | 1524/15550 [00:45<06:25, 36.39it/s]
 10%|▉         | 1528/15550 [00:45<06:15, 37.39it/s]
 10%|▉         | 1532/15550 [00:45<06:10, 37.79it/s]
 10%|▉         | 1536/15550 [00:45<06:13, 37.56it/s]
 10%|▉         | 1540/15550 [00:45<06:13, 37.48it/s]
 10%|▉         | 1544/15550 [00:45<06:53, 33.89it/s]
 10%|▉         | 1548/15550 [00:45<06:46, 34.47it/s]
 10%|▉         | 1552/15550 [00:45<06:37, 35.22it/s]
 10%|█         | 1556/15550 [00:46<07:32, 30.92it/s]
 10%|█         | 1560/15550 [00:46<07:46, 29.97it/s]
 10%|█         | 1564/15550 [00:46<07:13, 32.25it/s]
 10%|█         | 1568/15550 [00:46<07:09, 32.59it/s]
 10%|█         | 1572/15550 [00:46<06:58, 33.41it/s]
 10%|█         | 1576/15550 [00:46<06:41, 34.82it/s]
 10%|█   

[2m[36m(_objective pid=42509)[0m {'loss': 0.233, 'learning_rate': 3.476189708646048e-05, 'epoch': 0.64}


[2m[36m(_objective pid=42509)[0m  13%|█▎        | 2005/15550 [00:59<06:25, 35.16it/s]
 13%|█▎        | 2009/15550 [00:59<06:15, 36.05it/s]
 13%|█▎        | 2013/15550 [00:59<06:10, 36.56it/s]
 13%|█▎        | 2017/15550 [01:00<06:10, 36.48it/s]
 13%|█▎        | 2021/15550 [01:00<06:36, 34.10it/s]
 13%|█▎        | 2025/15550 [01:00<07:38, 29.47it/s]
 13%|█▎        | 2029/15550 [01:00<07:13, 31.21it/s]
 13%|█▎        | 2033/15550 [01:00<06:49, 33.03it/s]
 13%|█▎        | 2037/15550 [01:00<06:30, 34.64it/s]
 13%|█▎        | 2041/15550 [01:00<06:17, 35.80it/s]
 13%|█▎        | 2045/15550 [01:00<06:52, 32.74it/s]
 13%|█▎        | 2049/15550 [01:00<06:40, 33.71it/s]
 13%|█▎        | 2053/15550 [01:01<07:07, 31.57it/s]
 13%|█▎        | 2057/15550 [01:01<07:09, 31.41it/s]
 13%|█▎        | 2062/15550 [01:01<06:47, 33.11it/s]
 13%|█▎        | 2066/15550 [01:01<06:30, 34.54it/s]
 13%|█▎        | 2070/15550 [01:01<06:16, 35.77it/s]
 13%|█▎        | 2074/15550 [01:01<06:05, 36.82it/s]
 13%|█▎  

Trial status: 14 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-11 15:01:57. Total running time: 1hr 41min 36s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00014   RUNNING          3.98928e-05                    5                

[2m[36m(_objective pid=42509)[0m  14%|█▍        | 2237/15550 [01:06<06:14, 35.55it/s]
 14%|█▍        | 2241/15550 [01:06<06:25, 34.55it/s]
 14%|█▍        | 2245/15550 [01:06<06:10, 35.92it/s]
 14%|█▍        | 2249/15550 [01:06<06:46, 32.73it/s]
 14%|█▍        | 2253/15550 [01:06<06:42, 33.04it/s]
 15%|█▍        | 2257/15550 [01:07<06:33, 33.80it/s]
 15%|█▍        | 2261/15550 [01:07<06:31, 33.95it/s]
 15%|█▍        | 2265/15550 [01:07<07:12, 30.73it/s]
 15%|█▍        | 2269/15550 [01:07<06:51, 32.31it/s]
 15%|█▍        | 2273/15550 [01:07<06:47, 32.61it/s]
 15%|█▍        | 2277/15550 [01:07<06:32, 33.82it/s]
 15%|█▍        | 2281/15550 [01:07<06:23, 34.63it/s]
 15%|█▍        | 2285/15550 [01:07<07:18, 30.22it/s]
 15%|█▍        | 2289/15550 [01:08<07:04, 31.26it/s]
 15%|█▍        | 2293/15550 [01:08<06:54, 32.00it/s]
 15%|█▍        | 2297/15550 [01:08<06:39, 33.19it/s]
 15%|█▍        | 2301/15550 [01:08<06:40, 33.09it/s]
 15%|█▍        | 2305/15550 [01:08<06:39, 33.14it/s]
 15%|█▍  

[2m[36m(_objective pid=42509)[0m {'loss': 0.2413, 'learning_rate': 3.34791702567018e-05, 'epoch': 0.8}


[2m[36m(_objective pid=42509)[0m  16%|█▌        | 2506/15550 [01:14<06:39, 32.66it/s]
 16%|█▌        | 2510/15550 [01:14<06:44, 32.21it/s]
 16%|█▌        | 2514/15550 [01:14<07:24, 29.36it/s]
 16%|█▌        | 2518/15550 [01:15<06:58, 31.16it/s]
 16%|█▌        | 2522/15550 [01:15<06:39, 32.61it/s]
 16%|█▌        | 2526/15550 [01:15<06:35, 32.90it/s]
 16%|█▋        | 2530/15550 [01:15<06:45, 32.14it/s]
 16%|█▋        | 2534/15550 [01:15<06:37, 32.76it/s]
 16%|█▋        | 2538/15550 [01:15<06:23, 33.94it/s]
 16%|█▋        | 2542/15550 [01:15<06:10, 35.08it/s]
 16%|█▋        | 2546/15550 [01:15<06:08, 35.28it/s]
 16%|█▋        | 2550/15550 [01:16<06:06, 35.44it/s]
 16%|█▋        | 2554/15550 [01:16<06:03, 35.79it/s]
 16%|█▋        | 2558/15550 [01:16<05:54, 36.65it/s]
 16%|█▋        | 2562/15550 [01:16<05:58, 36.22it/s]
 17%|█▋        | 2566/15550 [01:16<06:32, 33.12it/s]
 17%|█▋        | 2570/15550 [01:16<06:18, 34.28it/s]
 17%|█▋        | 2574/15550 [01:16<06:09, 35.13it/s]
 17%|█▋  

[2m[36m(_objective pid=42509)[0m {'loss': 0.2267, 'learning_rate': 3.219644342694311e-05, 'epoch': 0.96}


[2m[36m(_objective pid=42509)[0m  19%|█▉        | 3005/15550 [01:29<07:17, 28.65it/s]
 19%|█▉        | 3009/15550 [01:29<06:41, 31.24it/s]
 19%|█▉        | 3013/15550 [01:30<06:20, 32.96it/s]
 19%|█▉        | 3017/15550 [01:30<06:04, 34.39it/s]
 19%|█▉        | 3021/15550 [01:30<05:54, 35.35it/s]
 19%|█▉        | 3025/15550 [01:30<05:46, 36.13it/s]
 19%|█▉        | 3029/15550 [01:30<06:20, 32.94it/s]
 20%|█▉        | 3033/15550 [01:30<06:03, 34.41it/s]
 20%|█▉        | 3037/15550 [01:30<05:50, 35.70it/s]
 20%|█▉        | 3041/15550 [01:30<05:59, 34.75it/s]
 20%|█▉        | 3045/15550 [01:30<05:48, 35.83it/s]
 20%|█▉        | 3049/15550 [01:31<05:54, 35.31it/s]
 20%|█▉        | 3053/15550 [01:31<05:51, 35.53it/s]
 20%|█▉        | 3057/15550 [01:31<05:43, 36.33it/s]
 20%|█▉        | 3061/15550 [01:31<05:49, 35.78it/s]
 20%|█▉        | 3065/15550 [01:31<05:46, 36.08it/s]
 20%|█▉        | 3069/15550 [01:31<06:00, 34.64it/s]
 20%|█▉        | 3073/15550 [01:31<05:47, 35.95it/s]
 20%|█▉  

Trial status: 14 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-11 15:02:27. Total running time: 1hr 42min 6s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00014   RUNNING          3.98928e-05                    5                 

[2m[36m(_objective pid=42509)[0m 
[2m[36m(_objective pid=42509)[0m  41%|████      | 53/130 [00:03<00:05, 13.31it/s][A
[2m[36m(_objective pid=42509)[0m 
 42%|████▏     | 55/130 [00:03<00:05, 12.74it/s][A
[2m[36m(_objective pid=42509)[0m 
 44%|████▍     | 57/130 [00:04<00:05, 13.22it/s][A
[2m[36m(_objective pid=42509)[0m 
 45%|████▌     | 59/130 [00:04<00:05, 13.35it/s][A
[2m[36m(_objective pid=42509)[0m 
 47%|████▋     | 61/130 [00:04<00:04, 13.94it/s][A
[2m[36m(_objective pid=42509)[0m 
 48%|████▊     | 63/130 [00:04<00:04, 15.04it/s][A
[2m[36m(_objective pid=42509)[0m 
 50%|█████     | 65/130 [00:04<00:04, 14.00it/s][A
[2m[36m(_objective pid=42509)[0m 
 52%|█████▏    | 67/130 [00:04<00:04, 14.96it/s][A
[2m[36m(_objective pid=42509)[0m 
 54%|█████▍    | 70/130 [00:04<00:03, 16.95it/s][A
[2m[36m(_objective pid=42509)[0m 
 55%|█████▌    | 72/130 [00:05<00:04, 13.25it/s][A
[2m[36m(_objective pid=42509)[0m 
 57%|█████▋    | 74/130 [00:05<00:04

Trial _objective_f556c_00014 finished iteration 1 at 2023-09-11 15:02:33. Total running time: 1hr 42min 12s
+-------------------------------------------------+
| Trial _objective_f556c_00014 result             |
+-------------------------------------------------+
| time_this_iter_s                        104.783 |
| time_total_s                            104.783 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.19919 |
| eval_runtime                             9.5998 |
| eval_samples_per_second                 431.989 |
| eval_steps_per_second                    13.542 |
| objective                               0.19919 |
+-------------------------------------------------+

[2m[36m(_objective pid=42509)[0m {'eval_loss': 0.1991904228925705, 'eval_runtime': 9.5998, 'eval_samples_per_second': 431.989, 'eval_steps_per_second': 13.542, 'epoch': 1.0}


[2m[36m(_objective pid=42509)[0m                                                     
[2m[36m(_objective pid=42509)[0m                                                  [A 20%|██        | 3110/15550 [01:42<05:58, 34.73it/s]
[2m[36m(_objective pid=42509)[0m 100%|██████████| 130/130 [00:09<00:00, 11.95it/s][A
                                                 [A
 20%|██        | 3113/15550 [01:43<2:57:01,  1.17it/s]
 20%|██        | 3116/15550 [01:44<2:15:41,  1.53it/s]
 20%|██        | 3120/15550 [01:44<1:35:57,  2.16it/s]
 20%|██        | 3124/15550 [01:44<1:08:04,  3.04it/s]
 20%|██        | 3128/15550 [01:44<49:06,  4.22it/s]  
 20%|██        | 3132/15550 [01:44<36:05,  5.74it/s]
 20%|██        | 3136/15550 [01:44<27:08,  7.62it/s]
 20%|██        | 3140/15550 [01:44<21:02,  9.83it/s]
 20%|██        | 3144/15550 [01:45<16:35, 12.47it/s]
 20%|██        | 3152/15550 [01:45<11:31, 17.93it/s]
 20%|██        | 3156/15550 [01:45<18:59, 10.88it/s]
 20%|██        | 3159/15550 [0

[2m[36m(_objective pid=42509)[0m {'loss': 0.1693, 'learning_rate': 3.091371659718442e-05, 'epoch': 1.13}


 23%|██▎       | 3508/15550 [01:56<05:28, 36.70it/s]
 23%|██▎       | 3512/15550 [01:56<05:49, 34.49it/s]
 23%|██▎       | 3516/15550 [01:56<05:58, 33.58it/s]
 23%|██▎       | 3520/15550 [01:56<05:52, 34.08it/s]
 23%|██▎       | 3524/15550 [01:56<06:27, 31.06it/s]
 23%|██▎       | 3528/15550 [01:56<06:29, 30.87it/s]
 23%|██▎       | 3532/15550 [01:56<06:06, 32.80it/s]
 23%|██▎       | 3536/15550 [01:57<06:14, 32.11it/s]
 23%|██▎       | 3540/15550 [01:57<06:17, 31.81it/s]
 23%|██▎       | 3544/15550 [01:57<06:05, 32.85it/s]
 23%|██▎       | 3548/15550 [01:57<05:53, 33.98it/s]
 23%|██▎       | 3552/15550 [01:57<05:46, 34.61it/s]
 23%|██▎       | 3556/15550 [01:57<05:42, 35.02it/s]
 23%|██▎       | 3560/15550 [01:57<05:50, 34.22it/s]
 23%|██▎       | 3564/15550 [01:57<06:09, 32.45it/s]
 23%|██▎       | 3568/15550 [01:58<05:54, 33.77it/s]
 23%|██▎       | 3572/15550 [01:58<06:25, 31.05it/s]
 23%|██▎       | 3576/15550 [01:58<06:08, 32.46it/s]
 23%|██▎       | 3580/15550 [01:58<05:59, 33.2

Trial status: 14 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-11 15:02:57. Total running time: 1hr 42min 36s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00014   RUNNING          3.98928e-05                    5                

 25%|██▍       | 3844/15550 [02:06<05:34, 34.96it/s]
 25%|██▍       | 3848/15550 [02:06<06:01, 32.38it/s]
 25%|██▍       | 3852/15550 [02:06<06:22, 30.59it/s]
 25%|██▍       | 3856/15550 [02:06<05:57, 32.68it/s]
 25%|██▍       | 3860/15550 [02:07<05:49, 33.44it/s]
 25%|██▍       | 3864/15550 [02:07<05:33, 34.99it/s]
 25%|██▍       | 3868/15550 [02:07<05:28, 35.61it/s]
 25%|██▍       | 3873/15550 [02:07<05:15, 37.05it/s]
 25%|██▍       | 3877/15550 [02:07<05:32, 35.13it/s]
 25%|██▍       | 3881/15550 [02:07<05:51, 33.24it/s]
 25%|██▍       | 3885/15550 [02:07<05:41, 34.19it/s]
 25%|██▌       | 3889/15550 [02:07<05:47, 33.54it/s]
 25%|██▌       | 3893/15550 [02:07<05:35, 34.74it/s]
 25%|██▌       | 3897/15550 [02:08<05:29, 35.41it/s]
 25%|██▌       | 3901/15550 [02:08<05:35, 34.73it/s]
 25%|██▌       | 3905/15550 [02:08<05:27, 35.61it/s]
 25%|██▌       | 3909/15550 [02:08<05:19, 36.46it/s]
 25%|██▌       | 3913/15550 [02:08<06:03, 32.05it/s]
 25%|██▌       | 3917/15550 [02:08<05:44, 33.8

[2m[36m(_objective pid=42509)[0m {'loss': 0.1467, 'learning_rate': 2.9630989767425726e-05, 'epoch': 1.29}


[2m[36m(_objective pid=42509)[0m  26%|██▌       | 4005/15550 [02:11<06:07, 31.44it/s]
 26%|██▌       | 4009/15550 [02:11<06:24, 30.05it/s]
 26%|██▌       | 4014/15550 [02:11<05:51, 32.86it/s]
 26%|██▌       | 4018/15550 [02:11<05:35, 34.34it/s]
 26%|██▌       | 4022/15550 [02:11<05:24, 35.49it/s]
 26%|██▌       | 4026/15550 [02:11<05:19, 36.06it/s]
 26%|██▌       | 4030/15550 [02:12<05:10, 37.08it/s]
 26%|██▌       | 4034/15550 [02:12<05:12, 36.81it/s]
 26%|██▌       | 4038/15550 [02:12<05:12, 36.81it/s]
 26%|██▌       | 4042/15550 [02:12<05:09, 37.14it/s]
 26%|██▌       | 4046/15550 [02:12<05:10, 37.03it/s]
 26%|██▌       | 4050/15550 [02:12<05:06, 37.56it/s]
 26%|██▌       | 4054/15550 [02:12<05:09, 37.16it/s]
 26%|██▌       | 4058/15550 [02:12<05:16, 36.35it/s]
 26%|██▌       | 4062/15550 [02:12<05:14, 36.52it/s]
 26%|██▌       | 4066/15550 [02:13<05:10, 36.96it/s]
 26%|██▌       | 4070/15550 [02:13<05:10, 37.01it/s]
 26%|██▌       | 4074/15550 [02:13<05:12, 36.69it/s]
 26%|██▌ 

[2m[36m(_objective pid=42509)[0m {'loss': 0.1429, 'learning_rate': 2.8348262937667035e-05, 'epoch': 1.45}


[2m[36m(_objective pid=42509)[0m  29%|██▉       | 4506/15550 [02:26<05:24, 34.03it/s]
 29%|██▉       | 4510/15550 [02:26<05:13, 35.17it/s]
 29%|██▉       | 4514/15550 [02:26<05:10, 35.53it/s]
 29%|██▉       | 4518/15550 [02:26<05:02, 36.50it/s]
 29%|██▉       | 4522/15550 [02:26<04:59, 36.76it/s]
 29%|██▉       | 4526/15550 [02:26<04:55, 37.30it/s]
 29%|██▉       | 4530/15550 [02:26<04:55, 37.35it/s]
 29%|██▉       | 4534/15550 [02:26<05:27, 33.64it/s]
 29%|██▉       | 4538/15550 [02:26<05:34, 32.90it/s]
 29%|██▉       | 4542/15550 [02:27<05:19, 34.50it/s]
 29%|██▉       | 4546/15550 [02:27<05:29, 33.44it/s]
 29%|██▉       | 4550/15550 [02:27<05:22, 34.12it/s]
 29%|██▉       | 4555/15550 [02:27<05:05, 36.00it/s]
 29%|██▉       | 4559/15550 [02:27<05:32, 33.09it/s]
 29%|██▉       | 4563/15550 [02:27<05:30, 33.23it/s]
 29%|██▉       | 4567/15550 [02:27<05:14, 34.96it/s]
 29%|██▉       | 4571/15550 [02:27<05:04, 36.09it/s]
 29%|██▉       | 4575/15550 [02:27<05:08, 35.63it/s]
 29%|██▉ 

Trial status: 14 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-11 15:03:27. Total running time: 1hr 43min 6s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00014   RUNNING          3.98928e-05                    5                 

[2m[36m(_objective pid=42509)[0m  31%|███       | 4854/15550 [02:36<05:19, 33.50it/s]
 31%|███       | 4858/15550 [02:36<05:06, 34.84it/s]
 31%|███▏      | 4862/15550 [02:36<05:05, 34.95it/s]
 31%|███▏      | 4866/15550 [02:36<04:58, 35.76it/s]
 31%|███▏      | 4870/15550 [02:36<04:54, 36.22it/s]
 31%|███▏      | 4874/15550 [02:37<04:56, 35.99it/s]
 31%|███▏      | 4878/15550 [02:37<04:51, 36.58it/s]
 31%|███▏      | 4882/15550 [02:37<04:47, 37.12it/s]
 31%|███▏      | 4886/15550 [02:37<04:56, 36.00it/s]
 31%|███▏      | 4890/15550 [02:37<04:54, 36.24it/s]
 31%|███▏      | 4894/15550 [02:37<04:47, 37.04it/s]
 31%|███▏      | 4898/15550 [02:37<04:52, 36.38it/s]
 32%|███▏      | 4902/15550 [02:37<04:59, 35.57it/s]
 32%|███▏      | 4906/15550 [02:37<05:09, 34.42it/s]
 32%|███▏      | 4910/15550 [02:38<05:04, 34.89it/s]
 32%|███▏      | 4914/15550 [02:38<05:08, 34.51it/s]
 32%|███▏      | 4918/15550 [02:38<05:00, 35.38it/s]
 32%|███▏      | 4922/15550 [02:38<04:58, 35.62it/s]
 32%|███▏

[2m[36m(_objective pid=42509)[0m {'loss': 0.1645, 'learning_rate': 2.7065536107908347e-05, 'epoch': 1.61}


[2m[36m(_objective pid=42509)[0m  32%|███▏      | 5006/15550 [02:41<05:10, 34.01it/s]
 32%|███▏      | 5010/15550 [02:41<05:03, 34.75it/s]
 32%|███▏      | 5014/15550 [02:41<04:59, 35.14it/s]
 32%|███▏      | 5018/15550 [02:41<05:14, 33.51it/s]
 32%|███▏      | 5022/15550 [02:41<05:38, 31.09it/s]
 32%|███▏      | 5026/15550 [02:41<05:21, 32.74it/s]
 32%|███▏      | 5030/15550 [02:41<05:44, 30.54it/s]
 32%|███▏      | 5034/15550 [02:41<05:27, 32.08it/s]
 32%|███▏      | 5038/15550 [02:42<05:14, 33.45it/s]
 32%|███▏      | 5042/15550 [02:42<05:14, 33.39it/s]
 32%|███▏      | 5046/15550 [02:42<05:21, 32.69it/s]
 32%|███▏      | 5050/15550 [02:42<05:54, 29.60it/s]
 33%|███▎      | 5054/15550 [02:42<05:43, 30.51it/s]
 33%|███▎      | 5058/15550 [02:42<06:07, 28.59it/s]
 33%|███▎      | 5062/15550 [02:42<05:42, 30.66it/s]
 33%|███▎      | 5066/15550 [02:42<05:36, 31.17it/s]
 33%|███▎      | 5070/15550 [02:43<05:25, 32.19it/s]
 33%|███▎      | 5074/15550 [02:43<05:07, 34.09it/s]
 33%|███▎

[2m[36m(_objective pid=42509)[0m {'loss': 0.156, 'learning_rate': 2.5782809278149656e-05, 'epoch': 1.77}


[2m[36m(_objective pid=42509)[0m  35%|███▌      | 5506/15550 [02:56<04:31, 36.96it/s]
 35%|███▌      | 5510/15550 [02:56<04:34, 36.59it/s]
 35%|███▌      | 5514/15550 [02:56<04:28, 37.43it/s]
 35%|███▌      | 5518/15550 [02:56<04:24, 37.99it/s]
 36%|███▌      | 5522/15550 [02:56<04:45, 35.17it/s]
 36%|███▌      | 5526/15550 [02:56<04:36, 36.22it/s]
 36%|███▌      | 5531/15550 [02:56<04:26, 37.53it/s]
 36%|███▌      | 5535/15550 [02:56<04:28, 37.34it/s]
 36%|███▌      | 5539/15550 [02:56<04:44, 35.22it/s]
 36%|███▌      | 5543/15550 [02:57<04:35, 36.39it/s]
 36%|███▌      | 5547/15550 [02:57<04:28, 37.24it/s]
 36%|███▌      | 5551/15550 [02:57<04:28, 37.29it/s]
 36%|███▌      | 5555/15550 [02:57<04:26, 37.50it/s]
 36%|███▌      | 5559/15550 [02:57<05:00, 33.23it/s]
 36%|███▌      | 5563/15550 [02:57<04:53, 34.06it/s]
 36%|███▌      | 5567/15550 [02:57<04:42, 35.37it/s]
 36%|███▌      | 5571/15550 [02:57<04:33, 36.47it/s]
 36%|███▌      | 5575/15550 [02:57<04:40, 35.53it/s]
 36%|███▌

Trial status: 14 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-11 15:03:57. Total running time: 1hr 43min 36s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00014   RUNNING          3.98928e-05                    5                

[2m[36m(_objective pid=42509)[0m  38%|███▊      | 5860/15550 [03:06<05:09, 31.30it/s]
 38%|███▊      | 5864/15550 [03:06<05:07, 31.49it/s]
 38%|███▊      | 5868/15550 [03:06<04:55, 32.81it/s]
 38%|███▊      | 5872/15550 [03:06<05:11, 31.05it/s]
 38%|███▊      | 5876/15550 [03:07<04:56, 32.64it/s]
 38%|███▊      | 5880/15550 [03:07<05:34, 28.91it/s]
 38%|███▊      | 5884/15550 [03:07<05:11, 31.04it/s]
 38%|███▊      | 5888/15550 [03:07<04:53, 32.93it/s]
 38%|███▊      | 5892/15550 [03:07<04:43, 34.07it/s]
 38%|███▊      | 5896/15550 [03:07<05:06, 31.52it/s]
 38%|███▊      | 5900/15550 [03:07<04:53, 32.91it/s]
 38%|███▊      | 5904/15550 [03:07<05:18, 30.25it/s]
 38%|███▊      | 5908/15550 [03:08<05:00, 32.05it/s]
 38%|███▊      | 5912/15550 [03:08<05:14, 30.61it/s]
 38%|███▊      | 5916/15550 [03:08<05:08, 31.21it/s]
 38%|███▊      | 5920/15550 [03:08<04:49, 33.29it/s]
 38%|███▊      | 5924/15550 [03:08<04:41, 34.24it/s]
 38%|███▊      | 5928/15550 [03:08<04:31, 35.39it/s]
 38%|███▊

[2m[36m(_objective pid=42509)[0m {'loss': 0.1455, 'learning_rate': 2.4500082448390965e-05, 'epoch': 1.93}


[2m[36m(_objective pid=42509)[0m  39%|███▊      | 6008/15550 [03:10<04:14, 37.54it/s]
 39%|███▊      | 6012/15550 [03:11<04:13, 37.58it/s]
 39%|███▊      | 6016/15550 [03:11<04:30, 35.30it/s]
 39%|███▊      | 6020/15550 [03:11<04:27, 35.61it/s]
 39%|███▊      | 6024/15550 [03:11<04:22, 36.33it/s]
 39%|███▉      | 6028/15550 [03:11<04:38, 34.23it/s]
 39%|███▉      | 6032/15550 [03:11<04:29, 35.35it/s]
 39%|███▉      | 6036/15550 [03:11<04:22, 36.18it/s]
 39%|███▉      | 6040/15550 [03:11<04:16, 37.15it/s]
 39%|███▉      | 6044/15550 [03:11<04:19, 36.69it/s]
 39%|███▉      | 6048/15550 [03:12<04:47, 33.04it/s]
 39%|███▉      | 6052/15550 [03:12<04:42, 33.58it/s]
 39%|███▉      | 6056/15550 [03:12<05:02, 31.41it/s]
 39%|███▉      | 6060/15550 [03:12<04:53, 32.37it/s]
 39%|███▉      | 6064/15550 [03:12<04:41, 33.66it/s]
 39%|███▉      | 6068/15550 [03:12<04:40, 33.86it/s]
 39%|███▉      | 6072/15550 [03:12<04:40, 33.78it/s]
 39%|███▉      | 6077/15550 [03:12<04:26, 35.50it/s]
 39%|███▉

Trial _objective_f556c_00014 finished iteration 2 at 2023-09-11 15:04:18. Total running time: 1hr 43min 56s
+-------------------------------------------------+
| Trial _objective_f556c_00014 result             |
+-------------------------------------------------+
| time_this_iter_s                        104.318 |
| time_total_s                              209.1 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                                0.1724 |
| eval_runtime                             9.5974 |
| eval_samples_per_second                 432.098 |
| eval_steps_per_second                    13.545 |
| objective                                0.1724 |
+-------------------------------------------------+

[2m[36m(_objective pid=42509)[0m {'eval_loss': 0.17240013182163239, 'eval_runtime': 9.5974, 'eval_samples_per_second': 432.098, 'eval_steps_per_second': 13.545, 'epoch': 2.0}


[2m[36m(_objective pid=42509)[0m                                                     
[2m[36m(_objective pid=42509)[0m                                                  [A 40%|████      | 6220/15550 [03:26<04:33, 34.07it/s]
[2m[36m(_objective pid=42509)[0m 100%|██████████| 130/130 [00:09<00:00, 11.95it/s][A
                                                 [A
 40%|████      | 6223/15550 [03:28<2:10:30,  1.19it/s]
 40%|████      | 6227/15550 [03:28<1:32:30,  1.68it/s]
 40%|████      | 6231/15550 [03:28<1:06:02,  2.35it/s]
 40%|████      | 6235/15550 [03:28<47:38,  3.26it/s]  
 40%|████      | 6239/15550 [03:28<34:57,  4.44it/s]
 40%|████      | 6243/15550 [03:28<26:20,  5.89it/s]
 40%|████      | 6247/15550 [03:28<19:50,  7.81it/s]
 40%|████      | 6251/15550 [03:28<15:18, 10.12it/s]
 40%|████      | 6255/15550 [03:29<11:54, 13.00it/s]
 40%|████      | 6259/15550 [03:29<09:35, 16.15it/s]
 40%|████      | 6263/15550 [03:29<08:09, 18.96it/s]
 40%|████      | 6267/15550 [03:

[2m[36m(_objective pid=42509)[0m {'loss': 0.1445, 'learning_rate': 2.3217355618632277e-05, 'epoch': 2.09}


[2m[36m(_objective pid=42509)[0m  42%|████▏     | 6507/15550 [03:36<04:20, 34.65it/s]
 42%|████▏     | 6511/15550 [03:36<04:20, 34.73it/s]
 42%|████▏     | 6515/15550 [03:36<04:42, 32.02it/s]
 42%|████▏     | 6519/15550 [03:36<04:34, 32.96it/s]


Trial status: 14 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-11 15:04:27. Total running time: 1hr 44min 6s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00014   RUNNING          3.98928e-05                    5                 

[2m[36m(_objective pid=42509)[0m  42%|████▏     | 6523/15550 [03:36<04:54, 30.65it/s]
 42%|████▏     | 6527/15550 [03:36<04:46, 31.45it/s]
 42%|████▏     | 6531/15550 [03:37<05:08, 29.25it/s]
 42%|████▏     | 6535/15550 [03:37<04:50, 31.01it/s]
 42%|████▏     | 6539/15550 [03:37<04:34, 32.88it/s]
 42%|████▏     | 6543/15550 [03:37<04:23, 34.15it/s]
 42%|████▏     | 6547/15550 [03:37<04:17, 34.99it/s]
 42%|████▏     | 6551/15550 [03:37<04:25, 33.84it/s]
 42%|████▏     | 6555/15550 [03:37<04:18, 34.76it/s]
 42%|████▏     | 6559/15550 [03:37<04:13, 35.48it/s]
 42%|████▏     | 6563/15550 [03:38<04:22, 34.27it/s]
 42%|████▏     | 6567/15550 [03:38<04:19, 34.59it/s]
 42%|████▏     | 6571/15550 [03:38<04:14, 35.32it/s]
 42%|████▏     | 6575/15550 [03:38<04:23, 34.12it/s]
 42%|████▏     | 6579/15550 [03:38<04:24, 33.94it/s]
 42%|████▏     | 6583/15550 [03:38<04:22, 34.11it/s]
 42%|████▏     | 6587/15550 [03:38<04:22, 34.17it/s]
 42%|████▏     | 6591/15550 [03:38<04:13, 35.34it/s]
 42%|████

[2m[36m(_objective pid=42509)[0m {'loss': 0.0866, 'learning_rate': 2.1934628788873586e-05, 'epoch': 2.25}


[2m[36m(_objective pid=42509)[0m  45%|████▌     | 7005/15550 [03:50<04:06, 34.62it/s]
 45%|████▌     | 7009/15550 [03:51<04:22, 32.57it/s]
 45%|████▌     | 7013/15550 [03:51<04:13, 33.74it/s]
 45%|████▌     | 7017/15550 [03:51<04:03, 34.98it/s]
 45%|████▌     | 7021/15550 [03:51<03:55, 36.16it/s]
 45%|████▌     | 7025/15550 [03:51<03:53, 36.48it/s]
 45%|████▌     | 7029/15550 [03:51<04:17, 33.04it/s]
 45%|████▌     | 7033/15550 [03:51<04:21, 32.51it/s]
 45%|████▌     | 7037/15550 [03:51<04:08, 34.23it/s]
 45%|████▌     | 7041/15550 [03:52<04:01, 35.25it/s]
 45%|████▌     | 7045/15550 [03:52<03:59, 35.50it/s]
 45%|████▌     | 7049/15550 [03:52<03:56, 35.89it/s]
 45%|████▌     | 7053/15550 [03:52<03:54, 36.21it/s]
 45%|████▌     | 7057/15550 [03:52<04:00, 35.26it/s]
 45%|████▌     | 7061/15550 [03:52<03:59, 35.45it/s]
 45%|████▌     | 7065/15550 [03:52<04:00, 35.25it/s]
 45%|████▌     | 7069/15550 [03:52<04:23, 32.14it/s]
 45%|████▌     | 7073/15550 [03:52<04:18, 32.79it/s]
 46%|████

[2m[36m(_objective pid=42509)[0m {'loss': 0.0825, 'learning_rate': 2.0651901959114902e-05, 'epoch': 2.41}


[2m[36m(_objective pid=42509)[0m  48%|████▊     | 7504/15550 [04:05<03:48, 35.23it/s]
 48%|████▊     | 7508/15550 [04:05<03:41, 36.28it/s]
 48%|████▊     | 7512/15550 [04:05<03:46, 35.53it/s]
 48%|████▊     | 7516/15550 [04:06<03:47, 35.33it/s]
 48%|████▊     | 7520/15550 [04:06<03:44, 35.74it/s]
 48%|████▊     | 7524/15550 [04:06<04:12, 31.84it/s]
 48%|████▊     | 7528/15550 [04:06<04:05, 32.67it/s]
 48%|████▊     | 7532/15550 [04:06<03:54, 34.24it/s]


Trial status: 14 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-11 15:04:57. Total running time: 1hr 44min 36s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00014   RUNNING          3.98928e-05                    5                

[2m[36m(_objective pid=42509)[0m  48%|████▊     | 7536/15550 [04:06<04:12, 31.76it/s]
 48%|████▊     | 7540/15550 [04:06<04:36, 28.99it/s]
 49%|████▊     | 7544/15550 [04:06<04:23, 30.33it/s]
 49%|████▊     | 7548/15550 [04:07<04:06, 32.47it/s]
 49%|████▊     | 7552/15550 [04:07<04:17, 31.04it/s]
 49%|████▊     | 7556/15550 [04:07<04:03, 32.78it/s]
 49%|████▊     | 7560/15550 [04:07<03:58, 33.50it/s]
 49%|████▊     | 7564/15550 [04:07<03:53, 34.14it/s]
 49%|████▊     | 7568/15550 [04:07<03:46, 35.32it/s]
 49%|████▊     | 7572/15550 [04:07<03:59, 33.37it/s]
 49%|████▊     | 7576/15550 [04:07<03:57, 33.54it/s]
 49%|████▊     | 7580/15550 [04:08<03:47, 35.02it/s]
 49%|████▉     | 7584/15550 [04:08<03:47, 34.99it/s]
 49%|████▉     | 7588/15550 [04:08<04:00, 33.11it/s]
 49%|████▉     | 7592/15550 [04:08<03:57, 33.54it/s]
 49%|████▉     | 7596/15550 [04:08<03:53, 34.08it/s]
 49%|████▉     | 7600/15550 [04:08<03:51, 34.28it/s]
 49%|████▉     | 7604/15550 [04:08<03:45, 35.20it/s]
 49%|████

[2m[36m(_objective pid=42509)[0m {'loss': 0.1061, 'learning_rate': 1.9369175129356208e-05, 'epoch': 2.57}


[2m[36m(_objective pid=42509)[0m  51%|█████▏    | 8007/15550 [04:20<03:43, 33.75it/s]
 52%|█████▏    | 8011/15550 [04:20<03:35, 34.90it/s]
 52%|█████▏    | 8015/15550 [04:20<03:53, 32.26it/s]
 52%|█████▏    | 8019/15550 [04:20<03:41, 33.99it/s]
 52%|█████▏    | 8023/15550 [04:21<03:56, 31.77it/s]
 52%|█████▏    | 8027/15550 [04:21<04:13, 29.72it/s]
 52%|█████▏    | 8031/15550 [04:21<04:00, 31.23it/s]
 52%|█████▏    | 8035/15550 [04:21<03:51, 32.46it/s]
 52%|█████▏    | 8040/15550 [04:21<03:34, 35.01it/s]
 52%|█████▏    | 8044/15550 [04:21<03:30, 35.62it/s]
 52%|█████▏    | 8048/15550 [04:21<03:31, 35.49it/s]
 52%|█████▏    | 8052/15550 [04:21<03:31, 35.39it/s]
 52%|█████▏    | 8056/15550 [04:22<03:43, 33.49it/s]
 52%|█████▏    | 8060/15550 [04:22<04:11, 29.74it/s]
 52%|█████▏    | 8064/15550 [04:22<03:55, 31.72it/s]
 52%|█████▏    | 8068/15550 [04:22<03:41, 33.78it/s]
 52%|█████▏    | 8072/15550 [04:22<03:31, 35.29it/s]
 52%|█████▏    | 8076/15550 [04:22<03:24, 36.46it/s]
 52%|████

[2m[36m(_objective pid=42509)[0m {'loss': 0.1063, 'learning_rate': 1.808644829959752e-05, 'epoch': 2.73}


[2m[36m(_objective pid=42509)[0m  55%|█████▍    | 8508/15550 [04:35<03:16, 35.90it/s]
 55%|█████▍    | 8512/15550 [04:35<03:11, 36.76it/s]
 55%|█████▍    | 8516/15550 [04:35<03:26, 34.12it/s]
 55%|█████▍    | 8520/15550 [04:35<03:28, 33.70it/s]
 55%|█████▍    | 8524/15550 [04:35<03:29, 33.54it/s]
 55%|█████▍    | 8529/15550 [04:35<03:17, 35.57it/s]
 55%|█████▍    | 8533/15550 [04:36<03:12, 36.52it/s]
 55%|█████▍    | 8537/15550 [04:36<03:11, 36.62it/s]
 55%|█████▍    | 8541/15550 [04:36<03:09, 37.03it/s]
 55%|█████▍    | 8545/15550 [04:36<03:06, 37.52it/s]
 55%|█████▍    | 8549/15550 [04:36<03:20, 34.85it/s]


Trial status: 14 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-11 15:05:27. Total running time: 1hr 45min 6s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00014   RUNNING          3.98928e-05                    5                 

[2m[36m(_objective pid=42509)[0m  55%|█████▌    | 8553/15550 [04:36<03:37, 32.11it/s]
 55%|█████▌    | 8557/15550 [04:36<03:27, 33.63it/s]
 55%|█████▌    | 8561/15550 [04:36<03:25, 33.98it/s]
 55%|█████▌    | 8565/15550 [04:37<03:29, 33.30it/s]
 55%|█████▌    | 8569/15550 [04:37<03:20, 34.84it/s]
 55%|█████▌    | 8573/15550 [04:37<03:18, 35.23it/s]
 55%|█████▌    | 8577/15550 [04:37<03:10, 36.52it/s]
 55%|█████▌    | 8581/15550 [04:37<03:08, 37.01it/s]
 55%|█████▌    | 8585/15550 [04:37<03:20, 34.69it/s]
 55%|█████▌    | 8589/15550 [04:37<03:24, 33.96it/s]
 55%|█████▌    | 8593/15550 [04:37<03:20, 34.62it/s]
 55%|█████▌    | 8597/15550 [04:37<03:16, 35.34it/s]
 55%|█████▌    | 8601/15550 [04:38<03:10, 36.43it/s]
 55%|█████▌    | 8605/15550 [04:38<03:13, 35.94it/s]
 55%|█████▌    | 8609/15550 [04:38<03:18, 34.90it/s]
 55%|█████▌    | 8613/15550 [04:38<03:34, 32.36it/s]
 55%|█████▌    | 8617/15550 [04:38<03:26, 33.57it/s]
 55%|█████▌    | 8621/15550 [04:38<03:42, 31.07it/s]
 55%|████

[2m[36m(_objective pid=42509)[0m {'loss': 0.1225, 'learning_rate': 1.6803721469838832e-05, 'epoch': 2.89}


 58%|█████▊    | 9006/15550 [04:50<03:19, 32.79it/s]
 58%|█████▊    | 9010/15550 [04:50<03:31, 30.90it/s]
 58%|█████▊    | 9014/15550 [04:50<03:19, 32.74it/s]
 58%|█████▊    | 9018/15550 [04:50<03:19, 32.78it/s]
 58%|█████▊    | 9022/15550 [04:50<03:15, 33.39it/s]
 58%|█████▊    | 9026/15550 [04:50<03:08, 34.56it/s]
 58%|█████▊    | 9030/15550 [04:51<03:10, 34.17it/s]
 58%|█████▊    | 9034/15550 [04:51<03:03, 35.46it/s]
 58%|█████▊    | 9038/15550 [04:51<02:57, 36.66it/s]
 58%|█████▊    | 9042/15550 [04:51<02:55, 37.16it/s]
 58%|█████▊    | 9046/15550 [04:51<03:01, 35.81it/s]
 58%|█████▊    | 9051/15550 [04:51<02:54, 37.24it/s]
 58%|█████▊    | 9055/15550 [04:51<02:51, 37.81it/s]
 58%|█████▊    | 9059/15550 [04:51<03:05, 34.98it/s]
 58%|█████▊    | 9063/15550 [04:51<03:01, 35.73it/s]
 58%|█████▊    | 9067/15550 [04:51<02:56, 36.82it/s]
 58%|█████▊    | 9071/15550 [04:52<03:11, 33.80it/s]
 58%|█████▊    | 9075/15550 [04:52<03:02, 35.42it/s]
 58%|█████▊    | 9079/15550 [04:52<03:08, 34.3

Trial status: 14 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-11 15:05:57. Total running time: 1hr 45min 36s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00014   RUNNING          3.98928e-05                    5                

[2m[36m(_objective pid=42509)[0m 
[2m[36m(_objective pid=42509)[0m  75%|███████▍  | 97/130 [00:06<00:02, 14.32it/s][A
[2m[36m(_objective pid=42509)[0m 
 77%|███████▋  | 100/130 [00:07<00:01, 16.94it/s][A
[2m[36m(_objective pid=42509)[0m 
 78%|███████▊  | 102/130 [00:07<00:01, 14.81it/s][A
[2m[36m(_objective pid=42509)[0m 
 80%|████████  | 104/130 [00:07<00:01, 13.20it/s][A
[2m[36m(_objective pid=42509)[0m 
 82%|████████▏ | 106/130 [00:07<00:01, 14.32it/s][A
[2m[36m(_objective pid=42509)[0m 
 84%|████████▍ | 109/130 [00:07<00:01, 13.67it/s][A
[2m[36m(_objective pid=42509)[0m 
 85%|████████▌ | 111/130 [00:07<00:01, 11.84it/s][A
[2m[36m(_objective pid=42509)[0m 
 87%|████████▋ | 113/130 [00:08<00:01, 11.38it/s][A
[2m[36m(_objective pid=42509)[0m 
 89%|████████▉ | 116/130 [00:08<00:01, 12.44it/s][A
[2m[36m(_objective pid=42509)[0m 
 91%|█████████ | 118/130 [00:08<00:00, 13.14it/s][A
[2m[36m(_objective pid=42509)[0m 
 92%|█████████▏| 120/130 [0

Trial _objective_f556c_00014 finished iteration 3 at 2023-09-11 15:06:00. Total running time: 1hr 45min 39s
+-------------------------------------------------+
| Trial _objective_f556c_00014 result             |
+-------------------------------------------------+
| time_this_iter_s                        102.579 |
| time_total_s                            311.679 |
| training_iteration                            3 |
| epoch                                         3 |
| eval_loss                               0.18146 |
| eval_runtime                             9.5945 |
| eval_samples_per_second                 432.225 |
| eval_steps_per_second                    13.549 |
| objective                               0.18146 |
+-------------------------------------------------+

[2m[36m(_objective pid=42509)[0m {'eval_loss': 0.1814563274383545, 'eval_runtime': 9.5945, 'eval_samples_per_second': 432.225, 'eval_steps_per_second': 13.549, 'epoch': 3.0}


 60%|██████    | 9330/15550 [05:10<03:23, 30.61it/s]
 60%|██████    | 9331/15550 [05:10<1:34:24,  1.10it/s]
 60%|██████    | 9335/15550 [05:10<1:05:42,  1.58it/s]
 60%|██████    | 9339/15550 [05:10<46:05,  2.25it/s]  
 60%|██████    | 9343/15550 [05:10<32:45,  3.16it/s]
 60%|██████    | 9347/15550 [05:11<23:50,  4.34it/s]
 60%|██████    | 9351/15550 [05:11<17:27,  5.92it/s]
 60%|██████    | 9355/15550 [05:11<13:00,  7.93it/s]
 60%|██████    | 9359/15550 [05:11<10:10, 10.14it/s]
 60%|██████    | 9363/15550 [05:11<07:57, 12.97it/s]
 60%|██████    | 9367/15550 [05:11<06:29, 15.89it/s]
 60%|██████    | 9371/15550 [05:11<05:33, 18.52it/s]
 60%|██████    | 9375/15550 [05:11<04:50, 21.26it/s]
 60%|██████    | 9379/15550 [05:12<04:10, 24.59it/s]
 60%|██████    | 9383/15550 [05:12<03:47, 27.11it/s]
 60%|██████    | 9387/15550 [05:12<03:26, 29.78it/s]
 60%|██████    | 9391/15550 [05:12<03:35, 28.58it/s]
 60%|██████    | 9395/15550 [05:12<03:22, 30.38it/s]
 60%|██████    | 9399/15550 [05:12<03:22

[2m[36m(_objective pid=42509)[0m {'loss': 0.0868, 'learning_rate': 1.552099464008014e-05, 'epoch': 3.05}


[2m[36m(_objective pid=42509)[0m  61%|██████    | 9507/15550 [05:15<03:00, 33.52it/s]
 61%|██████    | 9511/15550 [05:15<02:56, 34.28it/s]
 61%|██████    | 9515/15550 [05:16<02:51, 35.16it/s]
 61%|██████    | 9519/15550 [05:16<02:51, 35.11it/s]
 61%|██████    | 9523/15550 [05:16<02:48, 35.79it/s]
 61%|██████▏   | 9527/15550 [05:16<02:45, 36.44it/s]
 61%|██████▏   | 9531/15550 [05:16<02:43, 36.73it/s]
 61%|██████▏   | 9535/15550 [05:16<02:42, 37.04it/s]
 61%|██████▏   | 9539/15550 [05:16<02:40, 37.45it/s]
 61%|██████▏   | 9543/15550 [05:16<02:42, 36.91it/s]
 61%|██████▏   | 9547/15550 [05:16<03:00, 33.17it/s]
 61%|██████▏   | 9551/15550 [05:17<02:56, 34.02it/s]
 61%|██████▏   | 9555/15550 [05:17<02:49, 35.31it/s]
 61%|██████▏   | 9559/15550 [05:17<02:49, 35.32it/s]
 61%|██████▏   | 9563/15550 [05:17<02:44, 36.36it/s]
 62%|██████▏   | 9567/15550 [05:17<02:46, 35.93it/s]
 62%|██████▏   | 9571/15550 [05:17<02:42, 36.75it/s]
 62%|██████▏   | 9575/15550 [05:17<02:40, 37.22it/s]
 62%|████

[2m[36m(_objective pid=42509)[0m {'loss': 0.0646, 'learning_rate': 1.4238267810321452e-05, 'epoch': 3.22}


 64%|██████▍   | 10006/15550 [05:30<03:05, 29.91it/s]
 64%|██████▍   | 10010/15550 [05:30<03:16, 28.25it/s]
 64%|██████▍   | 10014/15550 [05:30<03:27, 26.64it/s]
 64%|██████▍   | 10017/15550 [05:30<03:24, 27.08it/s]
 64%|██████▍   | 10021/15550 [05:30<03:13, 28.56it/s]
 64%|██████▍   | 10025/15550 [05:30<03:03, 30.11it/s]
 64%|██████▍   | 10029/15550 [05:31<03:13, 28.56it/s]
 65%|██████▍   | 10032/15550 [05:31<03:13, 28.58it/s]
 65%|██████▍   | 10036/15550 [05:31<03:07, 29.48it/s]
 65%|██████▍   | 10040/15550 [05:31<03:13, 28.53it/s]
 65%|██████▍   | 10044/15550 [05:31<03:04, 29.78it/s]
 65%|██████▍   | 10048/15550 [05:31<02:56, 31.24it/s]
 65%|██████▍   | 10052/15550 [05:31<03:15, 28.09it/s]
 65%|██████▍   | 10056/15550 [05:32<03:10, 28.91it/s]
 65%|██████▍   | 10060/15550 [05:32<02:59, 30.51it/s]
 65%|██████▍   | 10064/15550 [05:32<02:51, 31.98it/s]
 65%|██████▍   | 10068/15550 [05:32<02:53, 31.59it/s]
 65%|██████▍   | 10072/15550 [05:32<02:54, 31.38it/s]
 65%|██████▍   | 10076/15550

Trial status: 14 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-11 15:06:28. Total running time: 1hr 46min 6s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00014   RUNNING          3.98928e-05                    5                 

[2m[36m(_objective pid=42509)[0m  66%|██████▌   | 10216/15550 [05:36<02:34, 34.61it/s]
 66%|██████▌   | 10220/15550 [05:36<02:28, 35.87it/s]
 66%|██████▌   | 10224/15550 [05:37<02:26, 36.31it/s]
 66%|██████▌   | 10228/15550 [05:37<02:23, 37.01it/s]
 66%|██████▌   | 10232/15550 [05:37<02:39, 33.25it/s]
 66%|██████▌   | 10236/15550 [05:37<02:34, 34.46it/s]
 66%|██████▌   | 10240/15550 [05:37<02:28, 35.79it/s]
 66%|██████▌   | 10244/15550 [05:37<02:24, 36.66it/s]
 66%|██████▌   | 10248/15550 [05:37<02:24, 36.63it/s]
 66%|██████▌   | 10252/15550 [05:37<02:24, 36.68it/s]
 66%|██████▌   | 10256/15550 [05:37<02:22, 37.28it/s]
 66%|██████▌   | 10260/15550 [05:38<02:22, 37.20it/s]
 66%|██████▌   | 10264/15550 [05:38<02:20, 37.52it/s]
 66%|██████▌   | 10268/15550 [05:38<02:21, 37.33it/s]
 66%|██████▌   | 10273/15550 [05:38<02:18, 38.09it/s]
 66%|██████▌   | 10277/15550 [05:38<02:17, 38.35it/s]
 66%|██████▌   | 10282/15550 [05:38<02:20, 37.40it/s]
 66%|██████▌   | 10286/15550 [05:38<02:21, 37

[2m[36m(_objective pid=42509)[0m {'loss': 0.0659, 'learning_rate': 1.2955540980562763e-05, 'epoch': 3.38}


[2m[36m(_objective pid=42509)[0m  68%|██████▊   | 10506/15550 [05:45<02:29, 33.68it/s]
 68%|██████▊   | 10510/15550 [05:45<02:27, 34.09it/s]
 68%|██████▊   | 10514/15550 [05:45<02:40, 31.36it/s]
 68%|██████▊   | 10518/15550 [05:45<02:34, 32.51it/s]
 68%|██████▊   | 10522/15550 [05:45<02:31, 33.18it/s]
 68%|██████▊   | 10526/15550 [05:45<02:31, 33.25it/s]
 68%|██████▊   | 10530/15550 [05:45<02:28, 33.90it/s]
 68%|██████▊   | 10534/15550 [05:46<02:39, 31.39it/s]
 68%|██████▊   | 10538/15550 [05:46<02:38, 31.54it/s]
 68%|██████▊   | 10542/15550 [05:46<02:38, 31.59it/s]
 68%|██████▊   | 10546/15550 [05:46<02:31, 33.06it/s]
 68%|██████▊   | 10550/15550 [05:46<02:40, 31.14it/s]
 68%|██████▊   | 10554/15550 [05:46<02:46, 30.00it/s]
 68%|██████▊   | 10558/15550 [05:46<02:53, 28.81it/s]
 68%|██████▊   | 10562/15550 [05:47<02:42, 30.64it/s]
 68%|██████▊   | 10566/15550 [05:47<02:33, 32.39it/s]
 68%|██████▊   | 10570/15550 [05:47<02:29, 33.33it/s]
 68%|██████▊   | 10574/15550 [05:47<02:26, 33

[2m[36m(_objective pid=42509)[0m {'loss': 0.0642, 'learning_rate': 1.1672814150804075e-05, 'epoch': 3.54}


[2m[36m(_objective pid=42509)[0m  71%|███████   | 11009/15550 [06:00<02:12, 34.31it/s]
 71%|███████   | 11013/15550 [06:00<02:12, 34.23it/s]
 71%|███████   | 11017/15550 [06:00<02:11, 34.52it/s]
 71%|███████   | 11021/15550 [06:01<02:10, 34.66it/s]
 71%|███████   | 11025/15550 [06:01<02:12, 34.04it/s]
 71%|███████   | 11029/15550 [06:01<02:41, 27.94it/s]
 71%|███████   | 11033/15550 [06:01<02:30, 29.97it/s]
 71%|███████   | 11037/15550 [06:01<02:24, 31.14it/s]
 71%|███████   | 11041/15550 [06:01<02:22, 31.73it/s]
 71%|███████   | 11045/15550 [06:01<02:17, 32.86it/s]
 71%|███████   | 11049/15550 [06:01<02:15, 33.20it/s]
 71%|███████   | 11053/15550 [06:02<02:16, 32.86it/s]
 71%|███████   | 11057/15550 [06:02<02:16, 33.00it/s]
 71%|███████   | 11061/15550 [06:02<02:15, 33.22it/s]
 71%|███████   | 11065/15550 [06:02<02:36, 28.61it/s]
 71%|███████   | 11069/15550 [06:02<02:27, 30.29it/s]
 71%|███████   | 11073/15550 [06:02<02:21, 31.69it/s]
 71%|███████   | 11077/15550 [06:02<02:31, 29

Trial status: 14 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-11 15:06:58. Total running time: 1hr 46min 36s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00014   RUNNING          3.98928e-05                    5                

[2m[36m(_objective pid=42509)[0m  72%|███████▏  | 11197/15550 [06:06<02:20, 31.00it/s]
 72%|███████▏  | 11201/15550 [06:06<02:13, 32.69it/s]
 72%|███████▏  | 11205/15550 [06:06<02:10, 33.21it/s]
 72%|███████▏  | 11209/15550 [06:07<02:15, 32.06it/s]
 72%|███████▏  | 11213/15550 [06:07<02:11, 32.91it/s]
 72%|███████▏  | 11217/15550 [06:07<02:07, 34.02it/s]
 72%|███████▏  | 11221/15550 [06:07<02:04, 34.85it/s]
 72%|███████▏  | 11225/15550 [06:07<02:16, 31.76it/s]
 72%|███████▏  | 11229/15550 [06:07<02:11, 32.93it/s]
 72%|███████▏  | 11233/15550 [06:07<02:07, 33.85it/s]
 72%|███████▏  | 11237/15550 [06:07<02:04, 34.55it/s]
 72%|███████▏  | 11241/15550 [06:08<02:02, 35.21it/s]
 72%|███████▏  | 11245/15550 [06:08<02:01, 35.31it/s]
 72%|███████▏  | 11249/15550 [06:08<02:00, 35.61it/s]
 72%|███████▏  | 11253/15550 [06:08<02:12, 32.32it/s]
 72%|███████▏  | 11257/15550 [06:08<02:15, 31.75it/s]
 72%|███████▏  | 11261/15550 [06:08<02:08, 33.35it/s]
 72%|███████▏  | 11265/15550 [06:08<02:04, 34

[2m[36m(_objective pid=42509)[0m {'loss': 0.0687, 'learning_rate': 1.0390087321045384e-05, 'epoch': 3.7}


 74%|███████▍  | 11509/15550 [06:15<01:55, 34.86it/s]
 74%|███████▍  | 11513/15550 [06:16<02:01, 33.28it/s]
 74%|███████▍  | 11517/15550 [06:16<02:02, 32.80it/s]
 74%|███████▍  | 11521/15550 [06:16<02:16, 29.54it/s]
 74%|███████▍  | 11525/15550 [06:16<02:11, 30.64it/s]
 74%|███████▍  | 11529/15550 [06:16<02:07, 31.58it/s]
 74%|███████▍  | 11533/15550 [06:16<02:04, 32.16it/s]
 74%|███████▍  | 11537/15550 [06:16<02:05, 32.04it/s]
 74%|███████▍  | 11541/15550 [06:16<02:01, 33.05it/s]
 74%|███████▍  | 11545/15550 [06:17<01:59, 33.54it/s]
 74%|███████▍  | 11549/15550 [06:17<01:58, 33.63it/s]
 74%|███████▍  | 11553/15550 [06:17<01:58, 33.67it/s]
 74%|███████▍  | 11557/15550 [06:17<01:57, 33.91it/s]
 74%|███████▍  | 11561/15550 [06:17<01:57, 34.05it/s]
 74%|███████▍  | 11565/15550 [06:17<01:57, 34.05it/s]
 74%|███████▍  | 11569/15550 [06:17<01:56, 34.24it/s]
 74%|███████▍  | 11573/15550 [06:17<02:11, 30.30it/s]
 74%|███████▍  | 11577/15550 [06:18<02:06, 31.31it/s]
 74%|███████▍  | 11581/15550

[2m[36m(_objective pid=42509)[0m {'loss': 0.0568, 'learning_rate': 9.107360491286695e-06, 'epoch': 3.86}


[2m[36m(_objective pid=42509)[0m  77%|███████▋  | 12003/15550 [06:32<01:52, 31.52it/s]
 77%|███████▋  | 12007/15550 [06:32<01:48, 32.56it/s]
 77%|███████▋  | 12011/15550 [06:32<01:56, 30.37it/s]
 77%|███████▋  | 12015/15550 [06:32<01:51, 31.73it/s]
 77%|███████▋  | 12019/15550 [06:32<01:47, 32.79it/s]
 77%|███████▋  | 12023/15550 [06:32<01:47, 32.71it/s]
 77%|███████▋  | 12027/15550 [06:32<01:58, 29.85it/s]
 77%|███████▋  | 12031/15550 [06:33<02:07, 27.56it/s]
 77%|███████▋  | 12035/15550 [06:33<01:58, 29.54it/s]
 77%|███████▋  | 12039/15550 [06:33<01:52, 31.32it/s]
 77%|███████▋  | 12043/15550 [06:33<01:49, 31.99it/s]
 77%|███████▋  | 12047/15550 [06:33<01:45, 33.19it/s]
 77%|███████▋  | 12051/15550 [06:33<01:44, 33.59it/s]
 78%|███████▊  | 12055/15550 [06:33<01:42, 34.05it/s]
 78%|███████▊  | 12059/15550 [06:33<01:48, 32.18it/s]
 78%|███████▊  | 12063/15550 [06:33<01:50, 31.43it/s]
 78%|███████▊  | 12067/15550 [06:34<01:49, 31.94it/s]
 78%|███████▊  | 12071/15550 [06:34<01:46, 32

Trial status: 14 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-11 15:07:28. Total running time: 1hr 47min 6s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00014   RUNNING          3.98928e-05                    5                 

[2m[36m(_objective pid=42509)[0m  78%|███████▊  | 12151/15550 [06:36<01:47, 31.74it/s]
 78%|███████▊  | 12155/15550 [06:36<01:54, 29.66it/s]
 78%|███████▊  | 12159/15550 [06:37<01:58, 28.69it/s]
 78%|███████▊  | 12162/15550 [06:37<02:00, 28.15it/s]
 78%|███████▊  | 12165/15550 [06:37<02:07, 26.62it/s]
 78%|███████▊  | 12168/15550 [06:37<02:04, 27.22it/s]
 78%|███████▊  | 12172/15550 [06:37<01:58, 28.43it/s]
 78%|███████▊  | 12176/15550 [06:37<01:57, 28.63it/s]
 78%|███████▊  | 12180/15550 [06:37<01:52, 29.97it/s]
 78%|███████▊  | 12184/15550 [06:37<01:51, 30.32it/s]
 78%|███████▊  | 12188/15550 [06:38<01:44, 32.05it/s]
 78%|███████▊  | 12192/15550 [06:38<01:42, 32.70it/s]
 78%|███████▊  | 12196/15550 [06:38<01:42, 32.72it/s]
 78%|███████▊  | 12200/15550 [06:38<01:50, 30.22it/s]
 78%|███████▊  | 12204/15550 [06:38<01:44, 32.15it/s]
 79%|███████▊  | 12208/15550 [06:38<01:38, 33.97it/s]
 79%|███████▊  | 12212/15550 [06:38<01:35, 35.12it/s]
 79%|███████▊  | 12216/15550 [06:38<01:32, 35

Trial _objective_f556c_00014 finished iteration 4 at 2023-09-11 15:07:46. Total running time: 1hr 47min 25s
+-------------------------------------------------+
| Trial _objective_f556c_00014 result             |
+-------------------------------------------------+
| time_this_iter_s                         105.74 |
| time_total_s                            417.419 |
| training_iteration                            4 |
| epoch                                         4 |
| eval_loss                               0.28059 |
| eval_runtime                              9.598 |
| eval_samples_per_second                 432.069 |
| eval_steps_per_second                    13.544 |
| objective                               0.28059 |
+-------------------------------------------------+

[2m[36m(_objective pid=42509)[0m {'eval_loss': 0.2805941104888916, 'eval_runtime': 9.598, 'eval_samples_per_second': 432.069, 'eval_steps_per_second': 13.544, 'epoch': 4.0}


[2m[36m(_objective pid=42509)[0m                                                      
[2m[36m(_objective pid=42509)[0m                                                  [A 80%|████████  | 12440/15550 [06:55<01:26, 35.95it/s]
[2m[36m(_objective pid=42509)[0m 100%|██████████| 130/130 [00:09<00:00, 11.90it/s][A
                                                 [A
 80%|████████  | 12444/15550 [06:56<43:41,  1.18it/s]
 80%|████████  | 12447/15550 [06:56<33:27,  1.55it/s]
 80%|████████  | 12451/15550 [06:56<23:16,  2.22it/s]
 80%|████████  | 12455/15550 [06:56<16:29,  3.13it/s]
 80%|████████  | 12459/15550 [06:56<11:51,  4.34it/s]
 80%|████████  | 12463/15550 [06:57<08:41,  5.92it/s]
 80%|████████  | 12467/15550 [06:57<06:28,  7.94it/s]
 80%|████████  | 12471/15550 [06:57<04:56, 10.37it/s]
 80%|████████  | 12475/15550 [06:57<03:58, 12.90it/s]
 80%|████████  | 12479/15550 [06:57<03:16, 15.62it/s]
 80%|████████  | 12483/15550 [06:57<02:50, 17.98it/s]
 80%|████████  | 12487/1555

[2m[36m(_objective pid=42509)[0m {'loss': 0.0542, 'learning_rate': 7.824633661528005e-06, 'epoch': 4.02}


[2m[36m(_objective pid=42509)[0m  80%|████████  | 12503/15550 [06:58<01:43, 29.34it/s]
 80%|████████  | 12507/15550 [06:58<01:38, 30.95it/s]
 80%|████████  | 12511/15550 [06:58<01:50, 27.55it/s]
 80%|████████  | 12515/15550 [06:58<01:43, 29.44it/s]
 81%|████████  | 12519/15550 [06:58<01:47, 28.23it/s]
 81%|████████  | 12522/15550 [06:58<01:47, 28.07it/s]
 81%|████████  | 12526/15550 [06:59<01:41, 29.77it/s]
 81%|████████  | 12530/15550 [06:59<01:37, 30.97it/s]
 81%|████████  | 12534/15550 [06:59<01:34, 32.01it/s]
 81%|████████  | 12538/15550 [06:59<01:35, 31.67it/s]
 81%|████████  | 12542/15550 [06:59<01:30, 33.14it/s]
 81%|████████  | 12546/15550 [06:59<01:27, 34.27it/s]
 81%|████████  | 12550/15550 [06:59<01:25, 35.25it/s]
 81%|████████  | 12554/15550 [06:59<01:25, 35.09it/s]
 81%|████████  | 12558/15550 [06:59<01:23, 35.79it/s]
 81%|████████  | 12562/15550 [07:00<01:26, 34.64it/s]
 81%|████████  | 12566/15550 [07:00<01:25, 35.06it/s]
 81%|████████  | 12570/15550 [07:00<01:23, 35

Trial status: 14 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-11 15:07:58. Total running time: 1hr 47min 36s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00014   RUNNING          3.98928e-05                    5                

[2m[36m(_objective pid=42509)[0m  82%|████████▏ | 12788/15550 [07:07<01:26, 31.84it/s]
 82%|████████▏ | 12792/15550 [07:07<01:31, 30.10it/s]
 82%|████████▏ | 12796/15550 [07:07<01:26, 32.01it/s]
 82%|████████▏ | 12800/15550 [07:07<01:26, 31.64it/s]
 82%|████████▏ | 12804/15550 [07:07<01:24, 32.59it/s]
 82%|████████▏ | 12808/15550 [07:07<01:21, 33.80it/s]
 82%|████████▏ | 12812/15550 [07:07<01:19, 34.36it/s]
 82%|████████▏ | 12816/15550 [07:07<01:27, 31.27it/s]
 82%|████████▏ | 12820/15550 [07:08<01:30, 30.20it/s]
 82%|████████▏ | 12824/15550 [07:08<01:25, 31.94it/s]
 82%|████████▏ | 12828/15550 [07:08<01:23, 32.68it/s]
 83%|████████▎ | 12832/15550 [07:08<01:20, 33.65it/s]
 83%|████████▎ | 12836/15550 [07:08<01:20, 33.76it/s]
 83%|████████▎ | 12840/15550 [07:08<01:21, 33.13it/s]
 83%|████████▎ | 12844/15550 [07:08<01:19, 33.87it/s]
 83%|████████▎ | 12848/15550 [07:08<01:28, 30.65it/s]
 83%|████████▎ | 12852/15550 [07:09<01:27, 30.76it/s]
 83%|████████▎ | 12856/15550 [07:09<01:26, 31

[2m[36m(_objective pid=42509)[0m {'loss': 0.0507, 'learning_rate': 6.541906831769316e-06, 'epoch': 4.18}


 84%|████████▎ | 13008/15550 [07:13<01:21, 31.24it/s]
 84%|████████▎ | 13012/15550 [07:14<01:23, 30.36it/s]
 84%|████████▎ | 13016/15550 [07:14<01:19, 31.91it/s]
 84%|████████▎ | 13020/15550 [07:14<01:15, 33.37it/s]
 84%|████████▍ | 13024/15550 [07:14<01:13, 34.40it/s]
 84%|████████▍ | 13028/15550 [07:14<01:20, 31.47it/s]
 84%|████████▍ | 13032/15550 [07:14<01:16, 32.75it/s]
 84%|████████▍ | 13036/15550 [07:14<01:13, 34.43it/s]
 84%|████████▍ | 13040/15550 [07:14<01:11, 35.14it/s]
 84%|████████▍ | 13044/15550 [07:14<01:09, 36.21it/s]
 84%|████████▍ | 13048/15550 [07:15<01:10, 35.62it/s]
 84%|████████▍ | 13052/15550 [07:15<01:08, 36.31it/s]
 84%|████████▍ | 13056/15550 [07:15<01:09, 35.79it/s]
 84%|████████▍ | 13060/15550 [07:15<01:11, 35.02it/s]
 84%|████████▍ | 13064/15550 [07:15<01:12, 34.29it/s]
 84%|████████▍ | 13068/15550 [07:15<01:10, 35.07it/s]
 84%|████████▍ | 13072/15550 [07:15<01:08, 36.01it/s]
 84%|████████▍ | 13076/15550 [07:15<01:16, 32.44it/s]
 84%|████████▍ | 13080/15550

[2m[36m(_objective pid=42509)[0m {'loss': 0.0461, 'learning_rate': 5.259180002010626e-06, 'epoch': 4.34}


[2m[36m(_objective pid=42509)[0m  87%|████████▋ | 13507/15550 [07:28<00:56, 36.47it/s]
 87%|████████▋ | 13511/15550 [07:28<00:56, 35.99it/s]
 87%|████████▋ | 13515/15550 [07:29<00:57, 35.22it/s]
 87%|████████▋ | 13519/15550 [07:29<00:56, 35.94it/s]
 87%|████████▋ | 13523/15550 [07:29<00:58, 34.60it/s]
 87%|████████▋ | 13527/15550 [07:29<01:06, 30.65it/s]
 87%|████████▋ | 13531/15550 [07:29<01:11, 28.38it/s]
 87%|████████▋ | 13535/15550 [07:29<01:05, 30.55it/s]
 87%|████████▋ | 13539/15550 [07:29<01:02, 32.21it/s]
 87%|████████▋ | 13543/15550 [07:30<01:01, 32.54it/s]
 87%|████████▋ | 13547/15550 [07:30<00:59, 33.39it/s]
 87%|████████▋ | 13551/15550 [07:30<01:00, 33.24it/s]
 87%|████████▋ | 13555/15550 [07:30<00:58, 34.20it/s]
 87%|████████▋ | 13559/15550 [07:30<00:56, 34.94it/s]
 87%|████████▋ | 13563/15550 [07:30<00:55, 35.67it/s]
 87%|████████▋ | 13567/15550 [07:30<00:54, 36.14it/s]
 87%|████████▋ | 13571/15550 [07:30<00:55, 35.87it/s]
 87%|████████▋ | 13575/15550 [07:30<00:55, 35

Trial status: 14 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-11 15:08:28. Total running time: 1hr 48min 6s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00014   RUNNING          3.98928e-05                    5                 

 89%|████████▊ | 13779/15550 [07:36<00:57, 30.61it/s]
 89%|████████▊ | 13783/15550 [07:37<01:01, 28.90it/s]
 89%|████████▊ | 13786/15550 [07:37<01:00, 29.05it/s]
 89%|████████▊ | 13790/15550 [07:37<00:56, 31.27it/s]
 89%|████████▊ | 13794/15550 [07:37<00:53, 33.13it/s]
 89%|████████▊ | 13798/15550 [07:37<00:56, 30.78it/s]
 89%|████████▉ | 13802/15550 [07:37<00:54, 32.04it/s]
 89%|████████▉ | 13806/15550 [07:37<00:52, 33.03it/s]
 89%|████████▉ | 13810/15550 [07:37<00:50, 34.27it/s]
 89%|████████▉ | 13814/15550 [07:37<00:49, 35.18it/s]
 89%|████████▉ | 13818/15550 [07:38<00:48, 35.58it/s]
 89%|████████▉ | 13822/15550 [07:38<00:47, 36.47it/s]
 89%|████████▉ | 13826/15550 [07:38<00:47, 36.22it/s]
 89%|████████▉ | 13830/15550 [07:38<00:47, 35.94it/s]
 89%|████████▉ | 13834/15550 [07:38<00:48, 35.48it/s]
 89%|████████▉ | 13838/15550 [07:38<00:46, 36.57it/s]
 89%|████████▉ | 13842/15550 [07:38<00:46, 36.51it/s]
 89%|████████▉ | 13846/15550 [07:38<00:46, 36.36it/s]
 89%|████████▉ | 13850/15550

[2m[36m(_objective pid=42509)[0m {'loss': 0.0456, 'learning_rate': 3.976453172251937e-06, 'epoch': 4.5}


[2m[36m(_objective pid=42509)[0m  90%|█████████ | 14010/15550 [07:43<00:42, 35.96it/s]
 90%|█████████ | 14014/15550 [07:43<00:42, 35.74it/s]
 90%|█████████ | 14018/15550 [07:43<00:42, 36.46it/s]
 90%|█████████ | 14022/15550 [07:43<00:41, 36.76it/s]
 90%|█████████ | 14026/15550 [07:44<00:46, 33.07it/s]
 90%|█████████ | 14030/15550 [07:44<00:44, 34.44it/s]
 90%|█████████ | 14034/15550 [07:44<00:42, 35.79it/s]
 90%|█████████ | 14038/15550 [07:44<00:41, 36.47it/s]
 90%|█████████ | 14042/15550 [07:44<00:42, 35.36it/s]
 90%|█████████ | 14046/15550 [07:44<00:41, 36.28it/s]
 90%|█████████ | 14050/15550 [07:44<00:45, 32.76it/s]
 90%|█████████ | 14054/15550 [07:44<00:43, 34.15it/s]
 90%|█████████ | 14059/15550 [07:44<00:41, 35.88it/s]
 90%|█████████ | 14063/15550 [07:45<00:45, 32.54it/s]
 90%|█████████ | 14067/15550 [07:45<00:47, 31.38it/s]
 90%|█████████ | 14071/15550 [07:45<00:45, 32.87it/s]
 91%|█████████ | 14075/15550 [07:45<00:44, 33.52it/s]
 91%|█████████ | 14079/15550 [07:45<00:44, 33

[2m[36m(_objective pid=42509)[0m {'loss': 0.0229, 'learning_rate': 2.693726342493248e-06, 'epoch': 4.66}


[2m[36m(_objective pid=42509)[0m  93%|█████████▎| 14504/15550 [07:58<00:32, 32.11it/s]
 93%|█████████▎| 14508/15550 [07:58<00:32, 32.14it/s]
 93%|█████████▎| 14512/15550 [07:58<00:31, 32.49it/s]
 93%|█████████▎| 14516/15550 [07:58<00:34, 30.12it/s]
 93%|█████████▎| 14520/15550 [07:58<00:32, 31.82it/s]
 93%|█████████▎| 14524/15550 [07:58<00:31, 32.44it/s]
 93%|█████████▎| 14528/15550 [07:59<00:31, 31.98it/s]
 93%|█████████▎| 14532/15550 [07:59<00:30, 33.54it/s]
 93%|█████████▎| 14536/15550 [07:59<00:32, 31.08it/s]
 94%|█████████▎| 14540/15550 [07:59<00:32, 30.70it/s]
 94%|█████████▎| 14544/15550 [07:59<00:31, 31.94it/s]
 94%|█████████▎| 14548/15550 [07:59<00:29, 33.66it/s]
 94%|█████████▎| 14552/15550 [07:59<00:28, 34.77it/s]
 94%|█████████▎| 14556/15550 [07:59<00:27, 35.50it/s]
 94%|█████████▎| 14560/15550 [08:00<00:28, 34.83it/s]
 94%|█████████▎| 14564/15550 [08:00<00:27, 35.84it/s]
 94%|█████████▎| 14568/15550 [08:00<00:26, 36.51it/s]
 94%|█████████▎| 14572/15550 [08:00<00:29, 33

Trial status: 14 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-11 15:08:58. Total running time: 1hr 48min 37s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00014   RUNNING          3.98928e-05                    5                

[2m[36m(_objective pid=42509)[0m  95%|█████████▌| 14800/15550 [08:07<00:21, 35.40it/s]
 95%|█████████▌| 14804/15550 [08:07<00:20, 36.30it/s]
 95%|█████████▌| 14808/15550 [08:07<00:20, 36.93it/s]
 95%|█████████▌| 14812/15550 [08:07<00:20, 35.26it/s]
 95%|█████████▌| 14816/15550 [08:07<00:20, 36.34it/s]
 95%|█████████▌| 14820/15550 [08:07<00:22, 32.88it/s]
 95%|█████████▌| 14824/15550 [08:07<00:21, 34.47it/s]
 95%|█████████▌| 14828/15550 [08:07<00:20, 34.50it/s]
 95%|█████████▌| 14832/15550 [08:07<00:20, 35.61it/s]
 95%|█████████▌| 14836/15550 [08:08<00:19, 36.10it/s]
 95%|█████████▌| 14840/15550 [08:08<00:19, 36.99it/s]
 95%|█████████▌| 14844/15550 [08:08<00:20, 34.90it/s]
 95%|█████████▌| 14848/15550 [08:08<00:19, 36.00it/s]
 96%|█████████▌| 14853/15550 [08:08<00:19, 35.53it/s]
 96%|█████████▌| 14857/15550 [08:08<00:19, 35.62it/s]
 96%|█████████▌| 14861/15550 [08:08<00:19, 35.67it/s]
 96%|█████████▌| 14865/15550 [08:08<00:19, 34.49it/s]
 96%|█████████▌| 14869/15550 [08:09<00:20, 33

[2m[36m(_objective pid=42509)[0m {'loss': 0.0476, 'learning_rate': 1.4109995127345585e-06, 'epoch': 4.82}


[2m[36m(_objective pid=42509)[0m  97%|█████████▋| 15007/15550 [08:13<00:16, 32.54it/s]
 97%|█████████▋| 15011/15550 [08:13<00:17, 31.28it/s]
 97%|█████████▋| 15015/15550 [08:13<00:16, 31.81it/s]
 97%|█████████▋| 15019/15550 [08:13<00:16, 31.74it/s]
 97%|█████████▋| 15023/15550 [08:13<00:16, 31.69it/s]
 97%|█████████▋| 15027/15550 [08:13<00:15, 32.77it/s]
 97%|█████████▋| 15031/15550 [08:13<00:16, 31.12it/s]
 97%|█████████▋| 15035/15550 [08:14<00:15, 32.56it/s]
 97%|█████████▋| 15039/15550 [08:14<00:15, 33.93it/s]
 97%|█████████▋| 15043/15550 [08:14<00:15, 33.38it/s]
 97%|█████████▋| 15047/15550 [08:14<00:16, 30.58it/s]
 97%|█████████▋| 15051/15550 [08:14<00:15, 31.96it/s]
 97%|█████████▋| 15055/15550 [08:14<00:15, 32.07it/s]
 97%|█████████▋| 15059/15550 [08:14<00:15, 32.45it/s]
 97%|█████████▋| 15063/15550 [08:14<00:14, 33.53it/s]
 97%|█████████▋| 15067/15550 [08:15<00:14, 33.38it/s]
 97%|█████████▋| 15071/15550 [08:15<00:14, 33.86it/s]
 97%|█████████▋| 15075/15550 [08:15<00:13, 35

[2m[36m(_objective pid=42509)[0m {'loss': 0.042, 'learning_rate': 1.2827268297586895e-07, 'epoch': 4.98}


[2m[36m(_objective pid=42509)[0m 100%|█████████▉| 15504/15550 [08:27<00:01, 36.09it/s]
100%|█████████▉| 15508/15550 [08:28<00:01, 35.71it/s]
100%|█████████▉| 15512/15550 [08:28<00:01, 34.17it/s]
100%|█████████▉| 15516/15550 [08:28<00:00, 35.11it/s]
100%|█████████▉| 15520/15550 [08:28<00:00, 32.29it/s]
100%|█████████▉| 15524/15550 [08:28<00:00, 33.55it/s]
100%|█████████▉| 15528/15550 [08:28<00:00, 33.61it/s]
100%|█████████▉| 15532/15550 [08:28<00:00, 31.21it/s]
100%|█████████▉| 15536/15550 [08:28<00:00, 30.58it/s]
100%|█████████▉| 15540/15550 [08:29<00:00, 27.87it/s]
100%|█████████▉| 15543/15550 [08:29<00:00, 27.89it/s]
100%|█████████▉| 15547/15550 [08:29<00:00, 30.08it/s]
[2m[36m(_objective pid=42509)[0m 
  0%|          | 0/130 [00:00<?, ?it/s][A
[2m[36m(_objective pid=42509)[0m 
  2%|▏         | 2/130 [00:00<00:08, 14.36it/s][A
[2m[36m(_objective pid=42509)[0m 
  4%|▍         | 5/130 [00:00<00:06, 20.64it/s][A
[2m[36m(_objective pid=42509)[0m 
  6%|▌         | 8/130

Trial status: 14 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-11 15:09:28. Total running time: 1hr 49min 7s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00014   RUNNING          3.98928e-05                    5                 

[2m[36m(_objective pid=42509)[0m 
 84%|████████▍ | 109/130 [00:07<00:01, 13.71it/s][A
[2m[36m(_objective pid=42509)[0m 
 85%|████████▌ | 111/130 [00:07<00:01, 11.86it/s][A
[2m[36m(_objective pid=42509)[0m 
 87%|████████▋ | 113/130 [00:08<00:01, 11.38it/s][A
[2m[36m(_objective pid=42509)[0m 
 89%|████████▉ | 116/130 [00:08<00:01, 12.41it/s][A
[2m[36m(_objective pid=42509)[0m 
 91%|█████████ | 118/130 [00:08<00:00, 13.13it/s][A
[2m[36m(_objective pid=42509)[0m 
 92%|█████████▏| 120/130 [00:08<00:00, 14.32it/s][A
[2m[36m(_objective pid=42509)[0m 
 94%|█████████▍| 122/130 [00:08<00:00, 13.75it/s][A
[2m[36m(_objective pid=42509)[0m 
 95%|█████████▌| 124/130 [00:08<00:00, 13.20it/s][A
[2m[36m(_objective pid=42509)[0m 
 98%|█████████▊| 127/130 [00:09<00:00, 13.15it/s][A
[2m[36m(_objective pid=42509)[0m 
 99%|█████████▉| 129/130 [00:09<00:00, 11.95it/s][A


Trial _objective_f556c_00014 finished iteration 5 at 2023-09-11 15:09:30. Total running time: 1hr 49min 9s
+-------------------------------------------------+
| Trial _objective_f556c_00014 result             |
+-------------------------------------------------+
| time_this_iter_s                        103.942 |
| time_total_s                            521.361 |
| training_iteration                            5 |
| epoch                                         5 |
| eval_loss                               0.25714 |
| eval_runtime                             9.6012 |
| eval_samples_per_second                 431.923 |
| eval_steps_per_second                     13.54 |
| objective                               0.25714 |
+-------------------------------------------------+

[2m[36m(_objective pid=42509)[0m {'eval_loss': 0.25713875889778137, 'eval_runtime': 9.6012, 'eval_samples_per_second': 431.923, 'eval_steps_per_second': 13.54, 'epoch': 5.0}


[2m[36m(_objective pid=42509)[0m                                                      
[2m[36m(_objective pid=42509)[0m                                                  [A100%|██████████| 15550/15550 [08:39<00:00, 30.08it/s]
[2m[36m(_objective pid=42509)[0m 100%|██████████| 130/130 [00:09<00:00, 11.95it/s][A
                                                 [A
100%|██████████| 15550/15550 [08:40<00:00, 30.08it/s]


Trial _objective_f556c_00014 completed after 5 iterations at 2023-09-11 15:09:31. Total running time: 1hr 49min 10s

[2m[36m(_objective pid=42509)[0m {'train_runtime': 520.5476, 'train_samples_per_second': 119.48, 'train_steps_per_second': 29.872, 'train_loss': 0.12977551292448372, 'epoch': 5.0}


[2m[36m(_objective pid=42509)[0m                                                      100%|██████████| 15550/15550 [08:40<00:00, 30.08it/s]100%|██████████| 15550/15550 [08:40<00:00, 29.87it/s]


Trial _objective_f556c_00015 started with configuration:
+-------------------------------------------------+
| Trial _objective_f556c_00015 config             |
+-------------------------------------------------+
| adam_epsilon                                  0 |
| learning_rate                             3e-05 |
| num_train_epochs                              1 |
| per_device_eval_batch_size                   32 |
| per_device_train_batch_size                   2 |
| weight_decay                            0.24066 |
+-------------------------------------------------+



[2m[36m(_objective pid=44804)[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_projector.bias']
[2m[36m(_objective pid=44804)[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
[2m[36m(_objective pid=44804)[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[2m[36m(_objective pid=44804)[0m Some weights of DistilBertForSequenceClassification were not initialized from the model che

[2m[36m(_objective pid=44804)[0m {'loss': 0.5318, 'learning_rate': 2.915898132260567e-05, 'epoch': 0.08}


[2m[36m(_objective pid=44804)[0m   8%|▊         | 502/6220 [00:13<02:45, 34.49it/s]
  8%|▊         | 506/6220 [00:13<02:42, 35.22it/s]
  8%|▊         | 510/6220 [00:13<02:40, 35.52it/s]
  8%|▊         | 514/6220 [00:14<02:43, 34.85it/s]
  8%|▊         | 518/6220 [00:14<02:40, 35.55it/s]
  8%|▊         | 522/6220 [00:14<02:37, 36.16it/s]
  8%|▊         | 526/6220 [00:14<02:33, 37.05it/s]
  9%|▊         | 530/6220 [00:14<02:34, 36.87it/s]
  9%|▊         | 534/6220 [00:14<02:32, 37.33it/s]
  9%|▊         | 538/6220 [00:14<02:31, 37.49it/s]
  9%|▊         | 542/6220 [00:14<02:33, 37.07it/s]
  9%|▉         | 546/6220 [00:14<02:34, 36.75it/s]
  9%|▉         | 550/6220 [00:15<02:31, 37.49it/s]
  9%|▉         | 554/6220 [00:15<02:30, 37.73it/s]
  9%|▉         | 558/6220 [00:15<02:29, 37.82it/s]
  9%|▉         | 562/6220 [00:15<02:32, 37.20it/s]
  9%|▉         | 566/6220 [00:15<02:32, 37.19it/s]
  9%|▉         | 570/6220 [00:15<02:32, 37.05it/s]
  9%|▉         | 574/6220 [00:15<02:33, 36.84

Trial status: 15 TERMINATED | 1 RUNNING | 4 PENDING
Current time: 2023-09-11 15:09:58. Total running time: 1hr 49min 37s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00015   RUNNING          3.17078e-05                    1                

 10%|█         | 646/6220 [00:17<02:32, 36.56it/s]
 10%|█         | 650/6220 [00:17<02:33, 36.21it/s]
 11%|█         | 654/6220 [00:17<02:41, 34.55it/s]
 11%|█         | 658/6220 [00:18<02:37, 35.33it/s]
 11%|█         | 662/6220 [00:18<02:33, 36.29it/s]
 11%|█         | 666/6220 [00:18<02:31, 36.73it/s]
 11%|█         | 670/6220 [00:18<02:30, 36.77it/s]
 11%|█         | 674/6220 [00:18<02:30, 36.87it/s]
 11%|█         | 678/6220 [00:18<02:27, 37.60it/s]
 11%|█         | 682/6220 [00:18<02:26, 37.79it/s]
 11%|█         | 686/6220 [00:18<02:34, 35.76it/s]
 11%|█         | 690/6220 [00:18<02:35, 35.48it/s]
 11%|█         | 694/6220 [00:19<02:34, 35.68it/s]
 11%|█         | 698/6220 [00:19<02:33, 35.94it/s]
 11%|█▏        | 702/6220 [00:19<02:30, 36.73it/s]
 11%|█▏        | 706/6220 [00:19<02:28, 37.21it/s]
 11%|█▏        | 710/6220 [00:19<02:26, 37.70it/s]
 11%|█▏        | 714/6220 [00:19<02:23, 38.35it/s]
 12%|█▏        | 718/6220 [00:19<02:25, 37.86it/s]
 12%|█▏        | 722/6220 [00:1

[2m[36m(_objective pid=44804)[0m {'loss': 0.4828, 'learning_rate': 2.6610119318881396e-05, 'epoch': 0.16}


[2m[36m(_objective pid=44804)[0m  16%|█▌        | 1003/6220 [00:27<02:23, 36.26it/s]
 16%|█▌        | 1007/6220 [00:27<02:26, 35.57it/s]
 16%|█▋        | 1011/6220 [00:27<02:25, 35.82it/s]
 16%|█▋        | 1015/6220 [00:27<02:30, 34.67it/s]
 16%|█▋        | 1019/6220 [00:27<02:30, 34.51it/s]
 16%|█▋        | 1023/6220 [00:27<02:28, 34.90it/s]
 17%|█▋        | 1027/6220 [00:28<02:26, 35.52it/s]
 17%|█▋        | 1031/6220 [00:28<02:26, 35.32it/s]
 17%|█▋        | 1035/6220 [00:28<02:25, 35.55it/s]
 17%|█▋        | 1039/6220 [00:28<02:25, 35.60it/s]
 17%|█▋        | 1043/6220 [00:28<02:26, 35.38it/s]
 17%|█▋        | 1047/6220 [00:28<02:25, 35.44it/s]
 17%|█▋        | 1051/6220 [00:28<02:22, 36.20it/s]
 17%|█▋        | 1055/6220 [00:28<02:21, 36.52it/s]
 17%|█▋        | 1059/6220 [00:28<02:23, 35.95it/s]
 17%|█▋        | 1063/6220 [00:29<02:22, 36.27it/s]
 17%|█▋        | 1067/6220 [00:29<02:26, 35.19it/s]
 17%|█▋        | 1071/6220 [00:29<02:26, 35.09it/s]
 17%|█▋        | 1075/6220 

[2m[36m(_objective pid=44804)[0m {'loss': 0.4165, 'learning_rate': 2.4061257315157123e-05, 'epoch': 0.24}


[2m[36m(_objective pid=44804)[0m  24%|██▍       | 1506/6220 [00:41<02:09, 36.43it/s]
 24%|██▍       | 1510/6220 [00:41<02:10, 36.15it/s]
 24%|██▍       | 1514/6220 [00:41<02:13, 35.31it/s]
 24%|██▍       | 1518/6220 [00:41<02:16, 34.43it/s]
 24%|██▍       | 1522/6220 [00:41<02:10, 35.92it/s]
 25%|██▍       | 1526/6220 [00:41<02:07, 36.75it/s]
 25%|██▍       | 1530/6220 [00:41<02:05, 37.43it/s]
 25%|██▍       | 1534/6220 [00:41<02:03, 37.87it/s]
 25%|██▍       | 1538/6220 [00:42<02:01, 38.42it/s]
 25%|██▍       | 1542/6220 [00:42<02:01, 38.39it/s]
 25%|██▍       | 1547/6220 [00:42<01:59, 38.95it/s]
 25%|██▍       | 1551/6220 [00:42<02:01, 38.57it/s]
 25%|██▌       | 1555/6220 [00:42<02:00, 38.73it/s]
 25%|██▌       | 1559/6220 [00:42<02:05, 37.09it/s]
 25%|██▌       | 1563/6220 [00:42<02:04, 37.29it/s]
 25%|██▌       | 1567/6220 [00:42<02:06, 36.79it/s]
 25%|██▌       | 1571/6220 [00:42<02:07, 36.58it/s]
 25%|██▌       | 1575/6220 [00:43<02:09, 35.83it/s]
 25%|██▌       | 1579/6220 

Trial status: 15 TERMINATED | 1 RUNNING | 4 PENDING
Current time: 2023-09-11 15:10:28. Total running time: 1hr 50min 7s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00015   RUNNING          3.17078e-05                    1                 

 28%|██▊       | 1748/6220 [00:47<01:57, 38.18it/s]
 28%|██▊       | 1752/6220 [00:47<02:00, 37.21it/s]
 28%|██▊       | 1756/6220 [00:47<01:59, 37.43it/s]
 28%|██▊       | 1760/6220 [00:48<02:01, 36.60it/s]
 28%|██▊       | 1764/6220 [00:48<01:59, 37.16it/s]
 28%|██▊       | 1768/6220 [00:48<01:58, 37.63it/s]
 28%|██▊       | 1772/6220 [00:48<01:57, 37.95it/s]
 29%|██▊       | 1776/6220 [00:48<01:56, 38.09it/s]
 29%|██▊       | 1780/6220 [00:48<01:55, 38.45it/s]
 29%|██▊       | 1784/6220 [00:48<01:56, 38.09it/s]
 29%|██▊       | 1788/6220 [00:48<01:57, 37.79it/s]
 29%|██▉       | 1792/6220 [00:48<01:57, 37.79it/s]
 29%|██▉       | 1796/6220 [00:49<01:55, 38.15it/s]
 29%|██▉       | 1800/6220 [00:49<01:55, 38.40it/s]
 29%|██▉       | 1804/6220 [00:49<01:57, 37.72it/s]
 29%|██▉       | 1808/6220 [00:49<01:57, 37.47it/s]
 29%|██▉       | 1812/6220 [00:49<01:57, 37.45it/s]
 29%|██▉       | 1816/6220 [00:49<01:57, 37.45it/s]
 29%|██▉       | 1821/6220 [00:49<01:54, 38.34it/s]
 29%|██▉    

[2m[36m(_objective pid=44804)[0m {'loss': 0.3787, 'learning_rate': 2.1512395311432853e-05, 'epoch': 0.32}


[2m[36m(_objective pid=44804)[0m  32%|███▏      | 2006/6220 [00:54<01:47, 39.27it/s]
 32%|███▏      | 2010/6220 [00:54<01:47, 39.07it/s]
 32%|███▏      | 2014/6220 [00:54<01:50, 38.00it/s]
 32%|███▏      | 2018/6220 [00:54<01:49, 38.26it/s]
 33%|███▎      | 2022/6220 [00:54<01:49, 38.50it/s]
 33%|███▎      | 2027/6220 [00:55<01:47, 39.10it/s]
 33%|███▎      | 2031/6220 [00:55<01:46, 39.20it/s]
 33%|███▎      | 2035/6220 [00:55<01:52, 37.11it/s]
 33%|███▎      | 2040/6220 [00:55<01:49, 38.32it/s]
 33%|███▎      | 2044/6220 [00:55<01:48, 38.35it/s]
 33%|███▎      | 2048/6220 [00:55<01:47, 38.73it/s]
 33%|███▎      | 2052/6220 [00:55<01:47, 38.80it/s]
 33%|███▎      | 2056/6220 [00:55<01:46, 39.12it/s]
 33%|███▎      | 2060/6220 [00:55<01:46, 39.00it/s]
 33%|███▎      | 2064/6220 [00:56<01:46, 38.84it/s]
 33%|███▎      | 2068/6220 [00:56<01:48, 38.44it/s]
 33%|███▎      | 2072/6220 [00:56<01:47, 38.74it/s]
 33%|███▎      | 2076/6220 [00:56<01:47, 38.56it/s]
 33%|███▎      | 2080/6220 

[2m[36m(_objective pid=44804)[0m {'loss': 0.3521, 'learning_rate': 1.896353330770858e-05, 'epoch': 0.4}


[2m[36m(_objective pid=44804)[0m  40%|████      | 2507/6220 [01:07<01:36, 38.38it/s]
 40%|████      | 2511/6220 [01:07<01:38, 37.65it/s]
 40%|████      | 2515/6220 [01:08<01:37, 37.98it/s]
 40%|████      | 2519/6220 [01:08<01:37, 38.05it/s]
 41%|████      | 2523/6220 [01:08<01:37, 38.08it/s]
 41%|████      | 2527/6220 [01:08<01:41, 36.39it/s]
 41%|████      | 2531/6220 [01:08<01:40, 36.72it/s]
 41%|████      | 2535/6220 [01:08<01:39, 36.94it/s]
 41%|████      | 2539/6220 [01:08<01:37, 37.59it/s]
 41%|████      | 2543/6220 [01:08<01:36, 37.92it/s]
 41%|████      | 2547/6220 [01:08<01:38, 37.20it/s]
 41%|████      | 2551/6220 [01:08<01:40, 36.67it/s]
 41%|████      | 2555/6220 [01:09<01:42, 35.60it/s]
 41%|████      | 2559/6220 [01:09<01:41, 35.97it/s]
 41%|████      | 2563/6220 [01:09<01:40, 36.55it/s]
 41%|████▏     | 2567/6220 [01:09<01:38, 37.13it/s]
 41%|████▏     | 2571/6220 [01:09<01:37, 37.53it/s]
 41%|████▏     | 2575/6220 [01:09<01:36, 37.88it/s]
 41%|████▏     | 2580/6220 

Trial status: 15 TERMINATED | 1 RUNNING | 4 PENDING
Current time: 2023-09-11 15:10:58. Total running time: 1hr 50min 37s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00015   RUNNING          3.17078e-05                    1                

[2m[36m(_objective pid=44804)[0m  46%|████▋     | 2877/6220 [01:17<01:34, 35.47it/s]
 46%|████▋     | 2881/6220 [01:17<01:32, 36.27it/s]
 46%|████▋     | 2885/6220 [01:18<01:29, 37.16it/s]
 46%|████▋     | 2889/6220 [01:18<01:30, 36.71it/s]
 47%|████▋     | 2893/6220 [01:18<01:28, 37.56it/s]
 47%|████▋     | 2897/6220 [01:18<01:27, 38.02it/s]
 47%|████▋     | 2901/6220 [01:18<01:27, 38.04it/s]
 47%|████▋     | 2905/6220 [01:18<01:26, 38.37it/s]
 47%|████▋     | 2909/6220 [01:18<01:25, 38.57it/s]
 47%|████▋     | 2913/6220 [01:18<01:25, 38.63it/s]
 47%|████▋     | 2917/6220 [01:18<01:25, 38.68it/s]
 47%|████▋     | 2921/6220 [01:18<01:26, 38.18it/s]
 47%|████▋     | 2925/6220 [01:19<01:28, 37.40it/s]
 47%|████▋     | 2929/6220 [01:19<01:27, 37.68it/s]
 47%|████▋     | 2933/6220 [01:19<01:30, 36.37it/s]
 47%|████▋     | 2937/6220 [01:19<01:28, 37.20it/s]
 47%|████▋     | 2941/6220 [01:19<01:26, 37.91it/s]
 47%|████▋     | 2945/6220 [01:19<01:25, 38.21it/s]
 47%|████▋     | 2949/6220 

[2m[36m(_objective pid=44804)[0m {'loss': 0.3377, 'learning_rate': 1.641467130398431e-05, 'epoch': 0.48}


[2m[36m(_objective pid=44804)[0m  48%|████▊     | 3005/6220 [01:21<01:22, 38.77it/s]
 48%|████▊     | 3009/6220 [01:21<01:23, 38.57it/s]
 48%|████▊     | 3013/6220 [01:21<01:23, 38.29it/s]
 49%|████▊     | 3017/6220 [01:21<01:23, 38.32it/s]
 49%|████▊     | 3021/6220 [01:21<01:23, 38.48it/s]
 49%|████▊     | 3026/6220 [01:21<01:21, 38.99it/s]
 49%|████▊     | 3030/6220 [01:21<01:21, 39.20it/s]
 49%|████▉     | 3034/6220 [01:21<01:21, 39.32it/s]
 49%|████▉     | 3038/6220 [01:22<01:21, 39.24it/s]
 49%|████▉     | 3042/6220 [01:22<01:21, 39.18it/s]
 49%|████▉     | 3046/6220 [01:22<01:22, 38.51it/s]
 49%|████▉     | 3050/6220 [01:22<01:23, 37.91it/s]
 49%|████▉     | 3054/6220 [01:22<01:23, 37.82it/s]
 49%|████▉     | 3058/6220 [01:22<01:23, 37.97it/s]
 49%|████▉     | 3062/6220 [01:22<01:22, 38.27it/s]
 49%|████▉     | 3066/6220 [01:22<01:22, 38.26it/s]
 49%|████▉     | 3070/6220 [01:22<01:21, 38.47it/s]
 49%|████▉     | 3074/6220 [01:22<01:22, 38.24it/s]
 49%|████▉     | 3078/6220 

[2m[36m(_objective pid=44804)[0m {'loss': 0.2971, 'learning_rate': 1.3865809300260037e-05, 'epoch': 0.56}


[2m[36m(_objective pid=44804)[0m  56%|█████▋    | 3508/6220 [01:34<01:16, 35.57it/s]
 56%|█████▋    | 3512/6220 [01:34<01:15, 35.94it/s]
 57%|█████▋    | 3516/6220 [01:35<01:15, 35.82it/s]
 57%|█████▋    | 3520/6220 [01:35<01:16, 35.41it/s]
 57%|█████▋    | 3524/6220 [01:35<01:14, 36.42it/s]
 57%|█████▋    | 3528/6220 [01:35<01:13, 36.64it/s]
 57%|█████▋    | 3533/6220 [01:35<01:11, 37.83it/s]
 57%|█████▋    | 3537/6220 [01:35<01:09, 38.38it/s]
 57%|█████▋    | 3541/6220 [01:35<01:09, 38.78it/s]
 57%|█████▋    | 3545/6220 [01:35<01:09, 38.64it/s]
 57%|█████▋    | 3549/6220 [01:35<01:08, 38.79it/s]
 57%|█████▋    | 3553/6220 [01:35<01:09, 38.50it/s]
 57%|█████▋    | 3557/6220 [01:36<01:08, 38.93it/s]
 57%|█████▋    | 3562/6220 [01:36<01:07, 39.35it/s]
 57%|█████▋    | 3567/6220 [01:36<01:07, 39.39it/s]
 57%|█████▋    | 3571/6220 [01:36<01:07, 38.96it/s]
 57%|█████▋    | 3575/6220 [01:36<01:09, 38.32it/s]
 58%|█████▊    | 3579/6220 [01:36<01:11, 36.89it/s]
 58%|█████▊    | 3583/6220 

Trial status: 15 TERMINATED | 1 RUNNING | 4 PENDING
Current time: 2023-09-11 15:11:28. Total running time: 1hr 51min 7s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00015   RUNNING          3.17078e-05                    1                 

[2m[36m(_objective pid=44804)[0m  64%|██████▍   | 3998/6220 [01:47<00:59, 37.14it/s]                                                    64%|██████▍   | 4000/6220 [01:47<00:59, 37.14it/s]
 64%|██████▍   | 4002/6220 [01:47<00:59, 37.16it/s]
 64%|██████▍   | 4006/6220 [01:47<00:59, 36.91it/s]
 64%|██████▍   | 4010/6220 [01:47<01:00, 36.40it/s]
 65%|██████▍   | 4014/6220 [01:48<00:59, 36.93it/s]
 65%|██████▍   | 4018/6220 [01:48<01:00, 36.22it/s]
 65%|██████▍   | 4022/6220 [01:48<00:59, 36.84it/s]
 65%|██████▍   | 4026/6220 [01:48<01:00, 36.55it/s]
 65%|██████▍   | 4030/6220 [01:48<00:59, 36.92it/s]
 65%|██████▍   | 4034/6220 [01:48<00:58, 37.10it/s]
 65%|██████▍   | 4038/6220 [01:48<01:00, 36.27it/s]
 65%|██████▍   | 4042/6220 [01:48<00:59, 36.60it/s]
 65%|██████▌   | 4046/6220 [01:48<01:00, 36.02it/s]
 65%|██████▌   | 4050/6220 [01:49<01:04, 33.76it/s]
 65%|██████▌   | 4054/6220 [01:49<01:02, 34.72it/s]
 65%|██████▌   | 4058/6220 [01:49<01:02, 34.35it/s]
 65%|██████▌   | 4062/6220

[2m[36m(_objective pid=44804)[0m {'loss': 0.2406, 'learning_rate': 8.768085292811494e-06, 'epoch': 0.72}


[2m[36m(_objective pid=44804)[0m  72%|███████▏  | 4505/6220 [02:01<00:44, 38.55it/s]
 72%|███████▏  | 4509/6220 [02:01<00:44, 38.49it/s]
 73%|███████▎  | 4513/6220 [02:01<00:45, 37.66it/s]
 73%|███████▎  | 4517/6220 [02:01<00:44, 37.86it/s]
 73%|███████▎  | 4521/6220 [02:01<00:44, 38.05it/s]
 73%|███████▎  | 4525/6220 [02:01<00:45, 36.98it/s]
 73%|███████▎  | 4529/6220 [02:01<00:45, 36.83it/s]
 73%|███████▎  | 4533/6220 [02:02<00:44, 37.59it/s]
 73%|███████▎  | 4537/6220 [02:02<00:44, 38.14it/s]
 73%|███████▎  | 4541/6220 [02:02<00:44, 37.67it/s]
 73%|███████▎  | 4545/6220 [02:02<00:44, 37.70it/s]
 73%|███████▎  | 4549/6220 [02:02<00:43, 38.03it/s]
 73%|███████▎  | 4553/6220 [02:02<00:43, 38.30it/s]
 73%|███████▎  | 4557/6220 [02:02<00:43, 38.19it/s]
 73%|███████▎  | 4561/6220 [02:02<00:42, 38.59it/s]
 73%|███████▎  | 4565/6220 [02:02<00:44, 36.94it/s]
 73%|███████▎  | 4569/6220 [02:03<00:44, 37.52it/s]
 74%|███████▎  | 4573/6220 [02:03<00:43, 37.98it/s]
 74%|███████▎  | 4577/6220 

[2m[36m(_objective pid=44804)[0m {'loss': 0.213, 'learning_rate': 6.219223289087223e-06, 'epoch': 0.8}


[2m[36m(_objective pid=44804)[0m  80%|████████  | 5004/6220 [02:14<00:32, 37.90it/s]
 81%|████████  | 5008/6220 [02:14<00:33, 36.59it/s]
 81%|████████  | 5012/6220 [02:14<00:34, 34.76it/s]
 81%|████████  | 5016/6220 [02:15<00:34, 35.00it/s]
 81%|████████  | 5020/6220 [02:15<00:33, 36.12it/s]
 81%|████████  | 5024/6220 [02:15<00:32, 36.50it/s]
 81%|████████  | 5028/6220 [02:15<00:33, 35.48it/s]
 81%|████████  | 5032/6220 [02:15<00:33, 35.76it/s]
 81%|████████  | 5037/6220 [02:15<00:32, 36.79it/s]
 81%|████████  | 5041/6220 [02:15<00:31, 37.13it/s]
 81%|████████  | 5045/6220 [02:15<00:31, 37.65it/s]
 81%|████████  | 5049/6220 [02:15<00:30, 38.19it/s]
 81%|████████  | 5053/6220 [02:16<00:30, 38.27it/s]
 81%|████████▏ | 5057/6220 [02:16<00:30, 38.31it/s]
 81%|████████▏ | 5061/6220 [02:16<00:30, 37.72it/s]
 81%|████████▏ | 5065/6220 [02:16<00:30, 37.58it/s]
 81%|████████▏ | 5069/6220 [02:16<00:30, 37.94it/s]
 82%|████████▏ | 5073/6220 [02:16<00:30, 38.01it/s]
 82%|████████▏ | 5077/6220 

Trial status: 15 TERMINATED | 1 RUNNING | 4 PENDING
Current time: 2023-09-11 15:11:58. Total running time: 1hr 51min 37s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00015   RUNNING          3.17078e-05                    1                

[2m[36m(_objective pid=44804)[0m  82%|████████▏ | 5117/6220 [02:17<00:29, 37.03it/s]
 82%|████████▏ | 5121/6220 [02:17<00:29, 36.95it/s]
 82%|████████▏ | 5125/6220 [02:17<00:30, 36.49it/s]
 82%|████████▏ | 5129/6220 [02:18<00:30, 36.26it/s]
 83%|████████▎ | 5133/6220 [02:18<00:30, 35.68it/s]
 83%|████████▎ | 5137/6220 [02:18<00:29, 36.69it/s]
 83%|████████▎ | 5141/6220 [02:18<00:28, 37.26it/s]
 83%|████████▎ | 5145/6220 [02:18<00:29, 36.15it/s]
 83%|████████▎ | 5149/6220 [02:18<00:29, 36.79it/s]
 83%|████████▎ | 5153/6220 [02:18<00:29, 36.12it/s]
 83%|████████▎ | 5157/6220 [02:18<00:29, 35.99it/s]
 83%|████████▎ | 5161/6220 [02:18<00:29, 36.23it/s]
 83%|████████▎ | 5165/6220 [02:19<00:28, 37.22it/s]
 83%|████████▎ | 5169/6220 [02:19<00:28, 37.02it/s]
 83%|████████▎ | 5173/6220 [02:19<00:28, 37.25it/s]
 83%|████████▎ | 5177/6220 [02:19<00:28, 37.18it/s]
 83%|████████▎ | 5181/6220 [02:19<00:27, 37.22it/s]
 83%|████████▎ | 5185/6220 [02:19<00:27, 36.99it/s]
 83%|████████▎ | 5189/6220 

[2m[36m(_objective pid=44804)[0m {'loss': 0.2312, 'learning_rate': 3.6703612853629508e-06, 'epoch': 0.88}


[2m[36m(_objective pid=44804)[0m  88%|████████▊ | 5503/6220 [02:28<00:19, 37.72it/s]
 89%|████████▊ | 5507/6220 [02:28<00:18, 38.05it/s]
 89%|████████▊ | 5511/6220 [02:28<00:18, 38.44it/s]
 89%|████████▊ | 5515/6220 [02:28<00:18, 37.40it/s]
 89%|████████▊ | 5519/6220 [02:28<00:18, 37.84it/s]
 89%|████████▉ | 5523/6220 [02:28<00:18, 37.61it/s]
 89%|████████▉ | 5527/6220 [02:28<00:18, 37.70it/s]
 89%|████████▉ | 5531/6220 [02:29<00:18, 37.48it/s]
 89%|████████▉ | 5535/6220 [02:29<00:18, 37.92it/s]
 89%|████████▉ | 5539/6220 [02:29<00:17, 38.24it/s]
 89%|████████▉ | 5543/6220 [02:29<00:17, 38.58it/s]
 89%|████████▉ | 5547/6220 [02:29<00:18, 37.00it/s]
 89%|████████▉ | 5551/6220 [02:29<00:17, 37.43it/s]
 89%|████████▉ | 5555/6220 [02:29<00:18, 36.69it/s]
 89%|████████▉ | 5559/6220 [02:29<00:18, 35.36it/s]
 89%|████████▉ | 5563/6220 [02:29<00:18, 35.40it/s]
 90%|████████▉ | 5567/6220 [02:29<00:17, 36.51it/s]
 90%|████████▉ | 5571/6220 [02:30<00:17, 37.39it/s]
 90%|████████▉ | 5575/6220 

[2m[36m(_objective pid=44804)[0m {'loss': 0.2645, 'learning_rate': 1.1214992816386796e-06, 'epoch': 0.96}


[2m[36m(_objective pid=44804)[0m  97%|█████████▋| 6007/6220 [02:41<00:05, 38.21it/s]
 97%|█████████▋| 6011/6220 [02:41<00:05, 36.92it/s]
 97%|█████████▋| 6015/6220 [02:42<00:05, 37.48it/s]
 97%|█████████▋| 6019/6220 [02:42<00:05, 38.01it/s]
 97%|█████████▋| 6023/6220 [02:42<00:05, 38.30it/s]
 97%|█████████▋| 6027/6220 [02:42<00:05, 38.20it/s]
 97%|█████████▋| 6031/6220 [02:42<00:05, 37.62it/s]
 97%|█████████▋| 6035/6220 [02:42<00:04, 37.95it/s]
 97%|█████████▋| 6039/6220 [02:42<00:04, 38.11it/s]
 97%|█████████▋| 6043/6220 [02:42<00:04, 38.38it/s]
 97%|█████████▋| 6047/6220 [02:42<00:04, 38.18it/s]
 97%|█████████▋| 6051/6220 [02:43<00:04, 38.11it/s]
 97%|█████████▋| 6055/6220 [02:43<00:04, 38.01it/s]
 97%|█████████▋| 6059/6220 [02:43<00:04, 36.17it/s]
 97%|█████████▋| 6063/6220 [02:43<00:04, 36.99it/s]
 98%|█████████▊| 6067/6220 [02:43<00:04, 37.66it/s]
 98%|█████████▊| 6071/6220 [02:43<00:03, 37.98it/s]
 98%|█████████▊| 6075/6220 [02:43<00:03, 37.46it/s]
 98%|█████████▊| 6079/6220 

Trial status: 15 TERMINATED | 1 RUNNING | 4 PENDING
Current time: 2023-09-11 15:12:28. Total running time: 1hr 52min 7s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00015   RUNNING          3.17078e-05                    1                 

[2m[36m(_objective pid=44804)[0m 
[2m[36m(_objective pid=44804)[0m   2%|▏         | 2/130 [00:00<00:08, 14.74it/s][A
[2m[36m(_objective pid=44804)[0m 
  4%|▍         | 5/130 [00:00<00:06, 20.79it/s][A
[2m[36m(_objective pid=44804)[0m 
  6%|▌         | 8/130 [00:00<00:05, 21.38it/s][A
[2m[36m(_objective pid=44804)[0m 
  8%|▊         | 11/130 [00:00<00:08, 13.91it/s][A
[2m[36m(_objective pid=44804)[0m 
 10%|█         | 13/130 [00:00<00:08, 13.49it/s][A
[2m[36m(_objective pid=44804)[0m 
 12%|█▏        | 15/130 [00:01<00:09, 12.37it/s][A
[2m[36m(_objective pid=44804)[0m 
 13%|█▎        | 17/130 [00:01<00:09, 11.60it/s][A
[2m[36m(_objective pid=44804)[0m 
 15%|█▍        | 19/130 [00:01<00:09, 11.51it/s][A
[2m[36m(_objective pid=44804)[0m 
 16%|█▌        | 21/130 [00:01<00:08, 12.97it/s][A
[2m[36m(_objective pid=44804)[0m 
 18%|█▊        | 23/130 [00:01<00:09, 11.87it/s][A
[2m[36m(_objective pid=44804)[0m 
 19%|█▉        | 25/130 [00:01<00:07, 1

Trial _objective_f556c_00015 finished iteration 1 at 2023-09-11 15:12:37. Total running time: 1hr 52min 16s
+-------------------------------------------------+
| Trial _objective_f556c_00015 result             |
+-------------------------------------------------+
| time_this_iter_s                        179.493 |
| time_total_s                            179.493 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                                 0.236 |
| eval_runtime                              9.562 |
| eval_samples_per_second                 433.694 |
| eval_steps_per_second                    13.595 |
| objective                                 0.236 |
+-------------------------------------------------+

[2m[36m(_objective pid=44804)[0m {'eval_loss': 0.236004039645195, 'eval_runtime': 9.562, 'eval_samples_per_second': 433.694, 'eval_steps_per_second': 13.595, 'epoch': 1.0}


[2m[36m(_objective pid=44804)[0m                                                    
[2m[36m(_objective pid=44804)[0m                                                  [A100%|██████████| 6220/6220 [02:57<00:00, 37.08it/s]
[2m[36m(_objective pid=44804)[0m 100%|██████████| 130/130 [00:09<00:00, 12.01it/s][A
[2m[36m(_objective pid=44804)[0m                                                  [A


Trial _objective_f556c_00015 completed after 1 iterations at 2023-09-11 15:12:39. Total running time: 1hr 52min 18s

[2m[36m(_objective pid=44804)[0m {'train_runtime': 178.639, 'train_samples_per_second': 69.632, 'train_steps_per_second': 34.819, 'train_loss': 0.3304073836642446, 'epoch': 1.0}


[2m[36m(_objective pid=44804)[0m                                                    100%|██████████| 6220/6220 [02:58<00:00, 37.08it/s]100%|██████████| 6220/6220 [02:58<00:00, 34.82it/s]


Trial _objective_f556c_00016 started with configuration:
+-------------------------------------------------+
| Trial _objective_f556c_00016 config             |
+-------------------------------------------------+
| adam_epsilon                                  0 |
| learning_rate                             5e-05 |
| num_train_epochs                              1 |
| per_device_eval_batch_size                   32 |
| per_device_train_batch_size                   4 |
| weight_decay                            0.08805 |
+-------------------------------------------------+



[2m[36m(_objective pid=45639)[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_projector.bias', 'vocab_layer_norm.bias']
[2m[36m(_objective pid=45639)[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
[2m[36m(_objective pid=45639)[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[2m[36m(_objective pid=45639)[0m Some weights of DistilBertForSequenceClassification were not initialized from the model che

Trial status: 16 TERMINATED | 1 RUNNING | 3 PENDING
Current time: 2023-09-11 15:12:58. Total running time: 1hr 52min 37s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00016   RUNNING          4.94755e-05                    1                

[2m[36m(_objective pid=45639)[0m  10%|▉         | 307/3110 [00:09<01:41, 27.51it/s]
 10%|█         | 311/3110 [00:09<01:34, 29.75it/s]
 10%|█         | 315/3110 [00:09<01:27, 32.03it/s]
 10%|█         | 319/3110 [00:10<01:25, 32.70it/s]
 10%|█         | 323/3110 [00:10<01:24, 33.00it/s]
 11%|█         | 327/3110 [00:10<01:31, 30.26it/s]
 11%|█         | 331/3110 [00:10<01:29, 30.92it/s]
 11%|█         | 335/3110 [00:10<01:24, 32.75it/s]
 11%|█         | 339/3110 [00:10<01:22, 33.69it/s]
 11%|█         | 343/3110 [00:10<01:27, 31.70it/s]
 11%|█         | 347/3110 [00:10<01:29, 30.72it/s]
 11%|█▏        | 351/3110 [00:11<01:28, 31.16it/s]
 11%|█▏        | 355/3110 [00:11<01:22, 33.20it/s]
 12%|█▏        | 359/3110 [00:11<01:20, 34.13it/s]
 12%|█▏        | 363/3110 [00:11<01:22, 33.36it/s]
 12%|█▏        | 367/3110 [00:11<01:32, 29.66it/s]
 12%|█▏        | 371/3110 [00:11<01:25, 31.99it/s]
 12%|█▏        | 375/3110 [00:11<01:21, 33.47it/s]
 12%|█▏        | 379/3110 [00:11<01:19, 34.35

[2m[36m(_objective pid=45639)[0m {'loss': 0.4121, 'learning_rate': 4.1521220637007726e-05, 'epoch': 0.16}


[2m[36m(_objective pid=45639)[0m  16%|█▋        | 508/3110 [00:15<01:21, 31.90it/s]
 16%|█▋        | 512/3110 [00:15<01:21, 31.70it/s]
 17%|█▋        | 516/3110 [00:15<01:20, 32.42it/s]
 17%|█▋        | 520/3110 [00:16<01:19, 32.49it/s]
 17%|█▋        | 524/3110 [00:16<01:16, 33.90it/s]
 17%|█▋        | 528/3110 [00:16<01:14, 34.72it/s]
 17%|█▋        | 532/3110 [00:16<01:13, 34.94it/s]
 17%|█▋        | 536/3110 [00:16<01:19, 32.42it/s]
 17%|█▋        | 540/3110 [00:16<01:22, 31.26it/s]
 17%|█▋        | 544/3110 [00:16<01:18, 32.89it/s]
 18%|█▊        | 548/3110 [00:16<01:14, 34.61it/s]
 18%|█▊        | 552/3110 [00:16<01:12, 35.48it/s]
 18%|█▊        | 556/3110 [00:17<01:13, 34.57it/s]
 18%|█▊        | 560/3110 [00:17<01:13, 34.74it/s]
 18%|█▊        | 564/3110 [00:17<01:12, 34.99it/s]
 18%|█▊        | 568/3110 [00:17<01:12, 35.08it/s]
 18%|█▊        | 572/3110 [00:17<01:12, 34.99it/s]
 19%|█▊        | 576/3110 [00:17<01:12, 35.09it/s]
 19%|█▊        | 580/3110 [00:17<01:10, 35.70

[2m[36m(_objective pid=45639)[0m {'loss': 0.3281, 'learning_rate': 3.356696380999475e-05, 'epoch': 0.32}


[2m[36m(_objective pid=45639)[0m  32%|███▏      | 1008/3110 [00:30<00:59, 35.48it/s]
 33%|███▎      | 1012/3110 [00:30<00:59, 35.04it/s]
 33%|███▎      | 1016/3110 [00:30<00:59, 35.27it/s]
 33%|███▎      | 1020/3110 [00:30<00:59, 35.27it/s]
 33%|███▎      | 1024/3110 [00:30<01:02, 33.41it/s]
 33%|███▎      | 1028/3110 [00:31<01:02, 33.34it/s]
 33%|███▎      | 1032/3110 [00:31<01:00, 34.33it/s]
 33%|███▎      | 1036/3110 [00:31<01:01, 33.91it/s]
 33%|███▎      | 1040/3110 [00:31<01:00, 34.09it/s]
 34%|███▎      | 1044/3110 [00:31<01:00, 34.15it/s]
 34%|███▎      | 1048/3110 [00:31<01:01, 33.47it/s]
 34%|███▍      | 1052/3110 [00:31<01:04, 32.08it/s]
 34%|███▍      | 1056/3110 [00:31<01:02, 33.02it/s]
 34%|███▍      | 1060/3110 [00:32<01:02, 32.98it/s]
 34%|███▍      | 1064/3110 [00:32<01:01, 33.31it/s]
 34%|███▍      | 1068/3110 [00:32<00:59, 34.16it/s]
 34%|███▍      | 1072/3110 [00:32<00:58, 34.88it/s]
 35%|███▍      | 1076/3110 [00:32<00:59, 34.38it/s]
 35%|███▍      | 1080/3110 

Trial status: 16 TERMINATED | 1 RUNNING | 3 PENDING
Current time: 2023-09-11 15:13:28. Total running time: 1hr 53min 7s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00016   RUNNING          4.94755e-05                    1                 

 42%|████▏     | 1307/3110 [00:39<00:52, 34.40it/s]
 42%|████▏     | 1311/3110 [00:39<00:56, 31.60it/s]
 42%|████▏     | 1315/3110 [00:39<00:53, 33.65it/s]
 42%|████▏     | 1319/3110 [00:40<00:51, 34.62it/s]
 43%|████▎     | 1323/3110 [00:40<00:51, 34.81it/s]
 43%|████▎     | 1327/3110 [00:40<00:55, 32.34it/s]
 43%|████▎     | 1331/3110 [00:40<00:54, 32.42it/s]
 43%|████▎     | 1335/3110 [00:40<00:52, 33.73it/s]
 43%|████▎     | 1339/3110 [00:40<00:56, 31.26it/s]
 43%|████▎     | 1343/3110 [00:40<00:56, 31.48it/s]
 43%|████▎     | 1347/3110 [00:40<00:53, 32.77it/s]
 43%|████▎     | 1351/3110 [00:41<00:57, 30.61it/s]
 44%|████▎     | 1355/3110 [00:41<00:54, 32.01it/s]
 44%|████▎     | 1359/3110 [00:41<00:56, 31.10it/s]
 44%|████▍     | 1363/3110 [00:41<00:53, 32.51it/s]
 44%|████▍     | 1367/3110 [00:41<00:50, 34.27it/s]
 44%|████▍     | 1371/3110 [00:41<00:49, 34.95it/s]
 44%|████▍     | 1375/3110 [00:41<00:47, 36.24it/s]
 44%|████▍     | 1379/3110 [00:41<00:48, 35.33it/s]
 44%|████▍  

[2m[36m(_objective pid=45639)[0m {'loss': 0.2785, 'learning_rate': 2.561270698298178e-05, 'epoch': 0.48}


[2m[36m(_objective pid=45639)[0m  48%|████▊     | 1502/3110 [00:45<00:44, 36.20it/s]
 48%|████▊     | 1506/3110 [00:45<00:44, 36.37it/s]
 49%|████▊     | 1510/3110 [00:45<00:45, 35.17it/s]
 49%|████▊     | 1514/3110 [00:45<00:45, 35.12it/s]
 49%|████▉     | 1518/3110 [00:46<00:44, 35.58it/s]
 49%|████▉     | 1522/3110 [00:46<00:46, 34.47it/s]
 49%|████▉     | 1526/3110 [00:46<00:44, 35.74it/s]
 49%|████▉     | 1530/3110 [00:46<00:44, 35.67it/s]
 49%|████▉     | 1534/3110 [00:46<00:43, 36.19it/s]
 49%|████▉     | 1538/3110 [00:46<00:42, 36.72it/s]
 50%|████▉     | 1542/3110 [00:46<00:48, 32.53it/s]
 50%|████▉     | 1546/3110 [00:46<00:46, 33.95it/s]
 50%|████▉     | 1550/3110 [00:46<00:46, 33.71it/s]
 50%|████▉     | 1554/3110 [00:47<00:47, 32.58it/s]
 50%|█████     | 1558/3110 [00:47<00:55, 28.16it/s]
 50%|█████     | 1562/3110 [00:47<00:51, 30.20it/s]
 50%|█████     | 1566/3110 [00:47<00:48, 32.13it/s]
 50%|█████     | 1570/3110 [00:47<00:48, 31.83it/s]
 51%|█████     | 1574/3110 

[2m[36m(_objective pid=45639)[0m {'loss': 0.221, 'learning_rate': 1.76584501559688e-05, 'epoch': 0.64}


[2m[36m(_objective pid=45639)[0m  65%|██████▍   | 2008/3110 [01:01<00:30, 35.95it/s]
 65%|██████▍   | 2012/3110 [01:01<00:31, 34.72it/s]
 65%|██████▍   | 2016/3110 [01:01<00:31, 35.20it/s]
 65%|██████▍   | 2020/3110 [01:01<00:32, 33.06it/s]
 65%|██████▌   | 2024/3110 [01:01<00:34, 31.89it/s]
 65%|██████▌   | 2028/3110 [01:01<00:35, 30.43it/s]
 65%|██████▌   | 2032/3110 [01:01<00:33, 32.42it/s]
 65%|██████▌   | 2036/3110 [01:02<00:31, 33.71it/s]
 66%|██████▌   | 2040/3110 [01:02<00:31, 34.39it/s]
 66%|██████▌   | 2044/3110 [01:02<00:33, 31.76it/s]
 66%|██████▌   | 2048/3110 [01:02<00:33, 32.11it/s]
 66%|██████▌   | 2052/3110 [01:02<00:34, 30.47it/s]
 66%|██████▌   | 2056/3110 [01:02<00:34, 30.42it/s]
 66%|██████▌   | 2060/3110 [01:02<00:33, 31.79it/s]
 66%|██████▋   | 2064/3110 [01:02<00:32, 32.61it/s]
 66%|██████▋   | 2068/3110 [01:03<00:30, 33.96it/s]
 67%|██████▋   | 2072/3110 [01:03<00:29, 34.80it/s]
 67%|██████▋   | 2076/3110 [01:03<00:29, 35.61it/s]
 67%|██████▋   | 2080/3110 

Trial status: 16 TERMINATED | 1 RUNNING | 3 PENDING
Current time: 2023-09-11 15:13:58. Total running time: 1hr 53min 37s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00016   RUNNING          4.94755e-05                    1                

[2m[36m(_objective pid=45639)[0m  73%|███████▎  | 2284/3110 [01:09<00:27, 30.28it/s]
 74%|███████▎  | 2288/3110 [01:09<00:26, 31.45it/s]
 74%|███████▎  | 2292/3110 [01:09<00:24, 32.97it/s]
 74%|███████▍  | 2296/3110 [01:10<00:23, 34.47it/s]
 74%|███████▍  | 2300/3110 [01:10<00:23, 34.02it/s]
 74%|███████▍  | 2304/3110 [01:10<00:23, 34.01it/s]
 74%|███████▍  | 2308/3110 [01:10<00:22, 35.22it/s]
 74%|███████▍  | 2312/3110 [01:10<00:22, 36.02it/s]
 74%|███████▍  | 2316/3110 [01:10<00:24, 32.66it/s]
 75%|███████▍  | 2320/3110 [01:10<00:23, 33.82it/s]
 75%|███████▍  | 2324/3110 [01:10<00:22, 34.71it/s]
 75%|███████▍  | 2328/3110 [01:10<00:23, 33.87it/s]
 75%|███████▍  | 2332/3110 [01:11<00:22, 34.17it/s]
 75%|███████▌  | 2336/3110 [01:11<00:22, 34.35it/s]
 75%|███████▌  | 2340/3110 [01:11<00:25, 29.65it/s]
 75%|███████▌  | 2344/3110 [01:11<00:23, 31.96it/s]
 75%|███████▌  | 2348/3110 [01:11<00:23, 32.50it/s]
 76%|███████▌  | 2352/3110 [01:11<00:22, 33.05it/s]
 76%|███████▌  | 2356/3110 

[2m[36m(_objective pid=45639)[0m {'loss': 0.1895, 'learning_rate': 9.704193328955828e-06, 'epoch': 0.8}


 81%|████████  | 2504/3110 [01:16<00:16, 36.40it/s]
 81%|████████  | 2508/3110 [01:16<00:19, 31.21it/s]
 81%|████████  | 2512/3110 [01:16<00:18, 31.63it/s]
 81%|████████  | 2516/3110 [01:16<00:19, 29.87it/s]
 81%|████████  | 2520/3110 [01:16<00:19, 30.90it/s]
 81%|████████  | 2524/3110 [01:16<00:17, 32.57it/s]
 81%|████████▏ | 2528/3110 [01:16<00:17, 32.91it/s]
 81%|████████▏ | 2532/3110 [01:17<00:18, 31.40it/s]
 82%|████████▏ | 2536/3110 [01:17<00:17, 32.37it/s]
 82%|████████▏ | 2540/3110 [01:17<00:16, 33.80it/s]
 82%|████████▏ | 2544/3110 [01:17<00:16, 34.07it/s]
 82%|████████▏ | 2548/3110 [01:17<00:16, 35.09it/s]
 82%|████████▏ | 2552/3110 [01:17<00:15, 34.92it/s]
 82%|████████▏ | 2556/3110 [01:17<00:15, 35.52it/s]
 82%|████████▏ | 2560/3110 [01:17<00:15, 36.07it/s]
 82%|████████▏ | 2564/3110 [01:17<00:15, 36.16it/s]
 83%|████████▎ | 2568/3110 [01:18<00:16, 33.03it/s]
 83%|████████▎ | 2572/3110 [01:18<00:15, 33.68it/s]
 83%|████████▎ | 2576/3110 [01:18<00:16, 32.89it/s]
 83%|███████

[2m[36m(_objective pid=45639)[0m {'loss': 0.2109, 'learning_rate': 1.7499365019428543e-06, 'epoch': 0.96}


[2m[36m(_objective pid=45639)[0m  97%|█████████▋| 3008/3110 [01:31<00:03, 29.87it/s]
 97%|█████████▋| 3012/3110 [01:31<00:03, 31.42it/s]
 97%|█████████▋| 3016/3110 [01:31<00:02, 32.81it/s]
 97%|█████████▋| 3020/3110 [01:31<00:02, 34.13it/s]
 97%|█████████▋| 3024/3110 [01:31<00:02, 34.96it/s]
 97%|█████████▋| 3028/3110 [01:32<00:02, 31.81it/s]
 97%|█████████▋| 3032/3110 [01:32<00:02, 32.95it/s]
 98%|█████████▊| 3036/3110 [01:32<00:02, 33.94it/s]
 98%|█████████▊| 3040/3110 [01:32<00:02, 33.18it/s]
 98%|█████████▊| 3044/3110 [01:32<00:01, 34.04it/s]
 98%|█████████▊| 3048/3110 [01:32<00:01, 34.30it/s]
 98%|█████████▊| 3052/3110 [01:32<00:01, 34.31it/s]
 98%|█████████▊| 3056/3110 [01:32<00:01, 34.83it/s]
 98%|█████████▊| 3060/3110 [01:33<00:01, 35.41it/s]
 99%|█████████▊| 3064/3110 [01:33<00:01, 35.36it/s]
 99%|█████████▊| 3068/3110 [01:33<00:01, 35.16it/s]
 99%|█████████▉| 3072/3110 [01:33<00:01, 35.60it/s]
 99%|█████████▉| 3076/3110 [01:33<00:00, 35.68it/s]
 99%|█████████▉| 3080/3110 

Trial status: 16 TERMINATED | 1 RUNNING | 3 PENDING
Current time: 2023-09-11 15:14:28. Total running time: 1hr 54min 7s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00016   RUNNING          4.94755e-05                    1                 

[2m[36m(_objective pid=45639)[0m 
 55%|█████▌    | 72/130 [00:05<00:04, 13.15it/s][A
[2m[36m(_objective pid=45639)[0m 
 57%|█████▋    | 74/130 [00:05<00:04, 12.76it/s][A
[2m[36m(_objective pid=45639)[0m 
 59%|█████▉    | 77/130 [00:05<00:03, 15.76it/s][A
[2m[36m(_objective pid=45639)[0m 
 61%|██████    | 79/130 [00:05<00:03, 12.76it/s][A
[2m[36m(_objective pid=45639)[0m 
 62%|██████▏   | 81/130 [00:05<00:03, 13.68it/s][A
[2m[36m(_objective pid=45639)[0m 
 65%|██████▍   | 84/130 [00:05<00:02, 15.87it/s][A
[2m[36m(_objective pid=45639)[0m 
 66%|██████▌   | 86/130 [00:06<00:02, 16.41it/s][A
[2m[36m(_objective pid=45639)[0m 
 68%|██████▊   | 88/130 [00:06<00:02, 15.21it/s][A
[2m[36m(_objective pid=45639)[0m 
 69%|██████▉   | 90/130 [00:06<00:02, 13.80it/s][A
[2m[36m(_objective pid=45639)[0m 
 71%|███████   | 92/130 [00:06<00:03, 12.49it/s][A
[2m[36m(_objective pid=45639)[0m 
 72%|███████▏  | 94/130 [00:06<00:02, 13.73it/s][A
[2m[36m(_objective 

Trial _objective_f556c_00016 finished iteration 1 at 2023-09-11 15:14:33. Total running time: 1hr 54min 11s
+-------------------------------------------------+
| Trial _objective_f556c_00016 result             |
+-------------------------------------------------+
| time_this_iter_s                        106.876 |
| time_total_s                            106.876 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.20946 |
| eval_runtime                             9.5971 |
| eval_samples_per_second                 432.109 |
| eval_steps_per_second                    13.546 |
| objective                               0.20946 |
+-------------------------------------------------+

[2m[36m(_objective pid=45639)[0m {'eval_loss': 0.20945608615875244, 'eval_runtime': 9.5971, 'eval_samples_per_second': 432.109, 'eval_steps_per_second': 13.546, 'epoch': 1.0}


[2m[36m(_objective pid=45639)[0m                                                    
[2m[36m(_objective pid=45639)[0m                                                  [A100%|██████████| 3110/3110 [01:44<00:00, 34.91it/s]
[2m[36m(_objective pid=45639)[0m 100%|██████████| 130/130 [00:09<00:00, 11.97it/s][A
[2m[36m(_objective pid=45639)[0m                                                  [A


Trial _objective_f556c_00016 completed after 1 iterations at 2023-09-11 15:14:34. Total running time: 1hr 54min 13s

[2m[36m(_objective pid=45639)[0m {'train_runtime': 105.5931, 'train_samples_per_second': 117.801, 'train_steps_per_second': 29.453, 'train_loss': 0.270300910557198, 'epoch': 1.0}


[2m[36m(_objective pid=45639)[0m                                                    100%|██████████| 3110/3110 [01:45<00:00, 34.91it/s]100%|██████████| 3110/3110 [01:45<00:00, 29.45it/s]


Trial _objective_f556c_00017 started with configuration:
+-------------------------------------------------+
| Trial _objective_f556c_00017 config             |
+-------------------------------------------------+
| adam_epsilon                                  0 |
| learning_rate                             2e-05 |
| num_train_epochs                              3 |
| per_device_eval_batch_size                   32 |
| per_device_train_batch_size                   2 |
| weight_decay                            0.23138 |
+-------------------------------------------------+



[2m[36m(_objective pid=46166)[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_projector.bias']
[2m[36m(_objective pid=46166)[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
[2m[36m(_objective pid=46166)[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[2m[36m(_objective pid=46166)[0m Some weights of DistilBertForSequenceClassification were not initialized from the model che

[2m[36m(_objective pid=46166)[0m {'loss': 0.4978, 'learning_rate': 1.7472621775461313e-05, 'epoch': 0.08}


[2m[36m(_objective pid=46166)[0m   3%|▎         | 504/18660 [00:13<08:15, 36.62it/s]
  3%|▎         | 508/18660 [00:13<08:18, 36.40it/s]
  3%|▎         | 512/18660 [00:14<08:26, 35.83it/s]
  3%|▎         | 516/18660 [00:14<08:16, 36.51it/s]
  3%|▎         | 520/18660 [00:14<08:09, 37.07it/s]
  3%|▎         | 524/18660 [00:14<08:19, 36.30it/s]
  3%|▎         | 528/18660 [00:14<08:17, 36.44it/s]
  3%|▎         | 532/18660 [00:14<08:14, 36.66it/s]
  3%|▎         | 536/18660 [00:14<08:02, 37.57it/s]
  3%|▎         | 540/18660 [00:14<08:18, 36.33it/s]


Trial status: 17 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2023-09-11 15:14:58. Total running time: 1hr 54min 37s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00017   RUNNING          1.79537e-05                    3                

[2m[36m(_objective pid=46166)[0m   3%|▎         | 544/18660 [00:14<08:19, 36.30it/s]
  3%|▎         | 548/18660 [00:15<08:23, 35.99it/s]
  3%|▎         | 552/18660 [00:15<08:40, 34.82it/s]
  3%|▎         | 556/18660 [00:15<08:47, 34.35it/s]
  3%|▎         | 560/18660 [00:15<08:41, 34.70it/s]
  3%|▎         | 564/18660 [00:15<08:47, 34.34it/s]
  3%|▎         | 568/18660 [00:15<08:38, 34.90it/s]
  3%|▎         | 572/18660 [00:15<08:33, 35.24it/s]
  3%|▎         | 576/18660 [00:15<08:22, 35.99it/s]
  3%|▎         | 580/18660 [00:15<08:18, 36.26it/s]
  3%|▎         | 584/18660 [00:16<08:09, 36.96it/s]
  3%|▎         | 588/18660 [00:16<08:01, 37.51it/s]
  3%|▎         | 593/18660 [00:16<08:04, 37.31it/s]
  3%|▎         | 597/18660 [00:16<08:05, 37.18it/s]
  3%|▎         | 601/18660 [00:16<08:01, 37.47it/s]
  3%|▎         | 605/18660 [00:16<08:07, 37.05it/s]
  3%|▎         | 609/18660 [00:16<08:33, 35.15it/s]
  3%|▎         | 613/18660 [00:16<08:45, 34.36it/s]
  3%|▎         | 617/18660 

[2m[36m(_objective pid=46166)[0m {'loss': 0.4565, 'learning_rate': 1.699154738737042e-05, 'epoch': 0.16}


[2m[36m(_objective pid=46166)[0m   5%|▌         | 1010/18660 [00:27<07:37, 38.56it/s]
  5%|▌         | 1014/18660 [00:27<07:52, 37.34it/s]
  5%|▌         | 1018/18660 [00:27<07:53, 37.22it/s]
  5%|▌         | 1022/18660 [00:28<07:59, 36.79it/s]
  5%|▌         | 1026/18660 [00:28<07:52, 37.28it/s]
  6%|▌         | 1030/18660 [00:28<07:48, 37.66it/s]
  6%|▌         | 1034/18660 [00:28<07:42, 38.15it/s]
  6%|▌         | 1038/18660 [00:28<07:50, 37.44it/s]
  6%|▌         | 1042/18660 [00:28<07:46, 37.77it/s]
  6%|▌         | 1046/18660 [00:28<07:44, 37.92it/s]
  6%|▌         | 1050/18660 [00:28<07:41, 38.14it/s]
  6%|▌         | 1054/18660 [00:28<08:02, 36.46it/s]
  6%|▌         | 1058/18660 [00:29<08:01, 36.52it/s]
  6%|▌         | 1062/18660 [00:29<07:51, 37.30it/s]
  6%|▌         | 1066/18660 [00:29<07:48, 37.52it/s]
  6%|▌         | 1070/18660 [00:29<08:01, 36.57it/s]
  6%|▌         | 1074/18660 [00:29<08:11, 35.78it/s]
  6%|▌         | 1078/18660 [00:29<08:07, 36.07it/s]
  6%|▌   

[2m[36m(_objective pid=46166)[0m {'loss': 0.417, 'learning_rate': 1.6510472999279524e-05, 'epoch': 0.24}


[2m[36m(_objective pid=46166)[0m   8%|▊         | 1507/18660 [00:41<07:49, 36.52it/s]
  8%|▊         | 1511/18660 [00:41<07:51, 36.41it/s]
  8%|▊         | 1515/18660 [00:41<07:44, 36.90it/s]
  8%|▊         | 1519/18660 [00:41<07:40, 37.22it/s]
  8%|▊         | 1523/18660 [00:41<07:47, 36.68it/s]
  8%|▊         | 1527/18660 [00:41<07:42, 37.06it/s]
  8%|▊         | 1531/18660 [00:42<07:44, 36.90it/s]
  8%|▊         | 1535/18660 [00:42<07:39, 37.24it/s]
  8%|▊         | 1539/18660 [00:42<07:44, 36.88it/s]
  8%|▊         | 1543/18660 [00:42<07:46, 36.68it/s]
  8%|▊         | 1547/18660 [00:42<07:36, 37.46it/s]
  8%|▊         | 1551/18660 [00:42<07:35, 37.52it/s]
  8%|▊         | 1555/18660 [00:42<07:30, 37.98it/s]
  8%|▊         | 1559/18660 [00:42<07:58, 35.73it/s]
  8%|▊         | 1563/18660 [00:42<07:46, 36.64it/s]
  8%|▊         | 1567/18660 [00:43<07:53, 36.09it/s]
  8%|▊         | 1571/18660 [00:43<07:52, 36.17it/s]
  8%|▊         | 1575/18660 [00:43<08:14, 34.54it/s]
  8%|▊   

Trial status: 17 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2023-09-11 15:15:28. Total running time: 1hr 55min 7s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00017   RUNNING          1.79537e-05                    3                 

[2m[36m(_objective pid=46166)[0m   9%|▉         | 1635/18660 [00:44<07:46, 36.53it/s]
  9%|▉         | 1639/18660 [00:45<08:08, 34.87it/s]
  9%|▉         | 1643/18660 [00:45<07:53, 35.93it/s]
  9%|▉         | 1647/18660 [00:45<07:49, 36.26it/s]
  9%|▉         | 1651/18660 [00:45<07:45, 36.52it/s]
  9%|▉         | 1655/18660 [00:45<07:42, 36.78it/s]
  9%|▉         | 1659/18660 [00:45<07:36, 37.26it/s]
  9%|▉         | 1663/18660 [00:45<07:32, 37.53it/s]
  9%|▉         | 1667/18660 [00:45<07:27, 37.96it/s]
  9%|▉         | 1671/18660 [00:45<07:33, 37.48it/s]
  9%|▉         | 1675/18660 [00:46<07:51, 35.99it/s]
  9%|▉         | 1679/18660 [00:46<07:56, 35.66it/s]
  9%|▉         | 1683/18660 [00:46<07:53, 35.84it/s]
  9%|▉         | 1687/18660 [00:46<07:52, 35.91it/s]
  9%|▉         | 1691/18660 [00:46<07:43, 36.59it/s]
  9%|▉         | 1695/18660 [00:46<07:38, 37.03it/s]
  9%|▉         | 1699/18660 [00:46<07:33, 37.40it/s]
  9%|▉         | 1703/18660 [00:46<07:38, 36.96it/s]
  9%|▉   

[2m[36m(_objective pid=46166)[0m {'loss': 0.3717, 'learning_rate': 1.6029398611188628e-05, 'epoch': 0.32}


[2m[36m(_objective pid=46166)[0m  11%|█         | 2004/18660 [00:54<07:03, 39.35it/s]
 11%|█         | 2008/18660 [00:55<07:04, 39.27it/s]
 11%|█         | 2012/18660 [00:55<07:14, 38.31it/s]
 11%|█         | 2016/18660 [00:55<07:20, 37.76it/s]
 11%|█         | 2020/18660 [00:55<07:14, 38.28it/s]
 11%|█         | 2024/18660 [00:55<07:13, 38.39it/s]
 11%|█         | 2028/18660 [00:55<07:08, 38.79it/s]
 11%|█         | 2032/18660 [00:55<07:09, 38.67it/s]
 11%|█         | 2037/18660 [00:55<06:59, 39.58it/s]
 11%|█         | 2041/18660 [00:55<07:04, 39.18it/s]
 11%|█         | 2045/18660 [00:56<07:11, 38.46it/s]
 11%|█         | 2049/18660 [00:56<07:11, 38.47it/s]
 11%|█         | 2053/18660 [00:56<07:10, 38.58it/s]
 11%|█         | 2057/18660 [00:56<07:11, 38.49it/s]
 11%|█         | 2061/18660 [00:56<07:06, 38.88it/s]
 11%|█         | 2066/18660 [00:56<07:02, 39.25it/s]
 11%|█         | 2070/18660 [00:56<07:11, 38.42it/s]
 11%|█         | 2074/18660 [00:56<07:07, 38.82it/s]
 11%|█   

[2m[36m(_objective pid=46166)[0m {'loss': 0.3416, 'learning_rate': 1.554832422309773e-05, 'epoch': 0.4}


[2m[36m(_objective pid=46166)[0m  13%|█▎        | 2509/18660 [01:08<07:10, 37.54it/s]
 13%|█▎        | 2513/18660 [01:08<07:09, 37.55it/s]
 13%|█▎        | 2517/18660 [01:08<07:11, 37.40it/s]
 14%|█▎        | 2521/18660 [01:09<07:05, 37.92it/s]
 14%|█▎        | 2525/18660 [01:09<07:03, 38.07it/s]
 14%|█▎        | 2529/18660 [01:09<07:06, 37.82it/s]
 14%|█▎        | 2533/18660 [01:09<07:04, 37.98it/s]
 14%|█▎        | 2537/18660 [01:09<07:09, 37.58it/s]
 14%|█▎        | 2541/18660 [01:09<07:10, 37.46it/s]
 14%|█▎        | 2545/18660 [01:09<07:13, 37.20it/s]
 14%|█▎        | 2549/18660 [01:09<07:07, 37.66it/s]
 14%|█▎        | 2553/18660 [01:09<07:10, 37.44it/s]
 14%|█▎        | 2557/18660 [01:09<07:18, 36.69it/s]
 14%|█▎        | 2561/18660 [01:10<07:21, 36.48it/s]
 14%|█▎        | 2565/18660 [01:10<07:17, 36.79it/s]
 14%|█▍        | 2569/18660 [01:10<07:10, 37.34it/s]
 14%|█▍        | 2573/18660 [01:10<07:09, 37.50it/s]
 14%|█▍        | 2577/18660 [01:10<07:28, 35.83it/s]
 14%|█▍  

Trial status: 17 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2023-09-11 15:15:58. Total running time: 1hr 55min 37s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00017   RUNNING          1.79537e-05                    3                

[2m[36m(_objective pid=46166)[0m  15%|█▍        | 2745/18660 [01:15<07:09, 37.09it/s]
 15%|█▍        | 2749/18660 [01:15<07:07, 37.22it/s]
 15%|█▍        | 2753/18660 [01:15<07:06, 37.26it/s]
 15%|█▍        | 2757/18660 [01:15<07:14, 36.57it/s]
 15%|█▍        | 2762/18660 [01:15<07:03, 37.53it/s]
 15%|█▍        | 2766/18660 [01:15<07:08, 37.08it/s]
 15%|█▍        | 2770/18660 [01:15<07:07, 37.14it/s]
 15%|█▍        | 2774/18660 [01:15<07:05, 37.32it/s]
 15%|█▍        | 2778/18660 [01:15<07:01, 37.64it/s]
 15%|█▍        | 2782/18660 [01:16<07:19, 36.10it/s]
 15%|█▍        | 2786/18660 [01:16<07:12, 36.71it/s]
 15%|█▍        | 2790/18660 [01:16<07:21, 35.92it/s]
 15%|█▍        | 2794/18660 [01:16<07:16, 36.32it/s]
 15%|█▍        | 2798/18660 [01:16<07:30, 35.25it/s]
 15%|█▌        | 2802/18660 [01:16<07:26, 35.54it/s]
 15%|█▌        | 2806/18660 [01:16<07:41, 34.32it/s]
 15%|█▌        | 2810/18660 [01:16<07:27, 35.41it/s]
 15%|█▌        | 2814/18660 [01:16<07:15, 36.40it/s]
 15%|█▌  

[2m[36m(_objective pid=46166)[0m {'loss': 0.3352, 'learning_rate': 1.5067249835006839e-05, 'epoch': 0.48}


[2m[36m(_objective pid=46166)[0m  16%|█▌        | 3004/18660 [01:22<07:13, 36.15it/s]
 16%|█▌        | 3008/18660 [01:22<07:22, 35.35it/s]
 16%|█▌        | 3012/18660 [01:22<07:26, 35.06it/s]
 16%|█▌        | 3016/18660 [01:22<07:23, 35.28it/s]
 16%|█▌        | 3020/18660 [01:22<07:12, 36.20it/s]
 16%|█▌        | 3024/18660 [01:22<07:06, 36.70it/s]
 16%|█▌        | 3028/18660 [01:22<06:56, 37.57it/s]
 16%|█▌        | 3032/18660 [01:22<06:50, 38.03it/s]
 16%|█▋        | 3036/18660 [01:22<06:49, 38.13it/s]
 16%|█▋        | 3040/18660 [01:23<06:48, 38.20it/s]
 16%|█▋        | 3044/18660 [01:23<06:51, 37.94it/s]
 16%|█▋        | 3048/18660 [01:23<06:51, 37.94it/s]
 16%|█▋        | 3053/18660 [01:23<06:44, 38.54it/s]
 16%|█▋        | 3057/18660 [01:23<06:46, 38.41it/s]
 16%|█▋        | 3061/18660 [01:23<06:47, 38.32it/s]
 16%|█▋        | 3065/18660 [01:23<06:55, 37.51it/s]
 16%|█▋        | 3069/18660 [01:23<07:02, 36.91it/s]
 16%|█▋        | 3073/18660 [01:23<07:06, 36.51it/s]
 16%|█▋  

[2m[36m(_objective pid=46166)[0m {'loss': 0.2911, 'learning_rate': 1.4586175446915943e-05, 'epoch': 0.56}


[2m[36m(_objective pid=46166)[0m  19%|█▉        | 3504/18660 [01:35<07:10, 35.18it/s]
 19%|█▉        | 3508/18660 [01:35<07:01, 35.91it/s]
 19%|█▉        | 3512/18660 [01:35<07:00, 36.03it/s]
 19%|█▉        | 3516/18660 [01:35<07:01, 35.89it/s]
 19%|█▉        | 3520/18660 [01:35<06:59, 36.09it/s]
 19%|█▉        | 3524/18660 [01:36<07:01, 35.92it/s]
 19%|█▉        | 3528/18660 [01:36<06:55, 36.41it/s]
 19%|█▉        | 3532/18660 [01:36<06:46, 37.18it/s]
 19%|█▉        | 3536/18660 [01:36<06:43, 37.44it/s]
 19%|█▉        | 3540/18660 [01:36<06:43, 37.50it/s]
 19%|█▉        | 3544/18660 [01:36<06:41, 37.67it/s]
 19%|█▉        | 3548/18660 [01:36<06:46, 37.20it/s]
 19%|█▉        | 3552/18660 [01:36<06:45, 37.25it/s]
 19%|█▉        | 3556/18660 [01:36<06:52, 36.65it/s]
 19%|█▉        | 3560/18660 [01:37<06:47, 37.07it/s]
 19%|█▉        | 3564/18660 [01:37<06:53, 36.48it/s]
 19%|█▉        | 3568/18660 [01:37<06:53, 36.52it/s]
 19%|█▉        | 3572/18660 [01:37<07:03, 35.61it/s]
 19%|█▉  

Trial status: 17 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2023-09-11 15:16:28. Total running time: 1hr 56min 7s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00017   RUNNING          1.79537e-05                    3                 

[2m[36m(_objective pid=46166)[0m  21%|██        | 3860/18660 [01:45<06:26, 38.34it/s]
 21%|██        | 3864/18660 [01:45<06:39, 37.08it/s]
 21%|██        | 3868/18660 [01:45<06:55, 35.59it/s]
 21%|██        | 3872/18660 [01:45<06:47, 36.28it/s]
 21%|██        | 3876/18660 [01:45<06:39, 37.05it/s]
 21%|██        | 3880/18660 [01:45<06:30, 37.84it/s]
 21%|██        | 3884/18660 [01:45<06:24, 38.46it/s]
 21%|██        | 3888/18660 [01:45<06:29, 37.89it/s]
 21%|██        | 3892/18660 [01:45<06:25, 38.32it/s]
 21%|██        | 3896/18660 [01:46<06:23, 38.49it/s]
 21%|██        | 3900/18660 [01:46<06:27, 38.14it/s]
 21%|██        | 3904/18660 [01:46<06:29, 37.85it/s]
 21%|██        | 3908/18660 [01:46<06:33, 37.50it/s]
 21%|██        | 3912/18660 [01:46<06:33, 37.51it/s]
 21%|██        | 3916/18660 [01:46<06:28, 37.95it/s]
 21%|██        | 3921/18660 [01:46<06:18, 38.94it/s]
 21%|██        | 3925/18660 [01:46<06:24, 38.31it/s]
 21%|██        | 3929/18660 [01:46<06:36, 37.12it/s]
 21%|██  

[2m[36m(_objective pid=46166)[0m {'loss': 0.2402, 'learning_rate': 1.4105101058825047e-05, 'epoch': 0.64}


[2m[36m(_objective pid=46166)[0m  21%|██▏       | 4008/18660 [01:48<06:06, 40.00it/s]
 22%|██▏       | 4012/18660 [01:49<06:16, 38.95it/s]
 22%|██▏       | 4016/18660 [01:49<06:19, 38.60it/s]
 22%|██▏       | 4020/18660 [01:49<06:24, 38.03it/s]
 22%|██▏       | 4024/18660 [01:49<06:23, 38.19it/s]
 22%|██▏       | 4028/18660 [01:49<06:30, 37.47it/s]
 22%|██▏       | 4032/18660 [01:49<06:30, 37.42it/s]
 22%|██▏       | 4036/18660 [01:49<06:42, 36.34it/s]
 22%|██▏       | 4040/18660 [01:49<06:34, 37.02it/s]
 22%|██▏       | 4044/18660 [01:49<06:43, 36.24it/s]
 22%|██▏       | 4048/18660 [01:50<06:42, 36.28it/s]
 22%|██▏       | 4052/18660 [01:50<06:49, 35.69it/s]
 22%|██▏       | 4056/18660 [01:50<06:37, 36.78it/s]
 22%|██▏       | 4060/18660 [01:50<06:34, 37.04it/s]
 22%|██▏       | 4064/18660 [01:50<06:27, 37.65it/s]
 22%|██▏       | 4068/18660 [01:50<06:20, 38.31it/s]
 22%|██▏       | 4072/18660 [01:50<06:19, 38.43it/s]
 22%|██▏       | 4076/18660 [01:50<06:18, 38.52it/s]
 22%|██▏ 

[2m[36m(_objective pid=46166)[0m {'loss': 0.2747, 'learning_rate': 1.3624026670734152e-05, 'epoch': 0.72}


[2m[36m(_objective pid=46166)[0m  24%|██▍       | 4510/18660 [02:02<06:17, 37.46it/s]
 24%|██▍       | 4514/18660 [02:02<06:16, 37.59it/s]
 24%|██▍       | 4518/18660 [02:02<06:10, 38.20it/s]
 24%|██▍       | 4522/18660 [02:02<06:17, 37.47it/s]
 24%|██▍       | 4526/18660 [02:03<06:32, 36.04it/s]
 24%|██▍       | 4530/18660 [02:03<06:28, 36.40it/s]
 24%|██▍       | 4534/18660 [02:03<06:21, 37.04it/s]
 24%|██▍       | 4538/18660 [02:03<06:17, 37.38it/s]
 24%|██▍       | 4542/18660 [02:03<06:15, 37.58it/s]
 24%|██▍       | 4546/18660 [02:03<06:20, 37.09it/s]
 24%|██▍       | 4550/18660 [02:03<06:17, 37.39it/s]
 24%|██▍       | 4554/18660 [02:03<06:13, 37.76it/s]
 24%|██▍       | 4558/18660 [02:03<06:11, 37.98it/s]
 24%|██▍       | 4562/18660 [02:03<06:26, 36.45it/s]
 24%|██▍       | 4566/18660 [02:04<06:40, 35.20it/s]
 24%|██▍       | 4570/18660 [02:04<06:33, 35.78it/s]
 25%|██▍       | 4574/18660 [02:04<06:27, 36.37it/s]
 25%|██▍       | 4578/18660 [02:04<06:27, 36.36it/s]
 25%|██▍ 

Trial status: 17 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2023-09-11 15:16:58. Total running time: 1hr 56min 37s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00017   RUNNING          1.79537e-05                    3                

[2m[36m(_objective pid=46166)[0m  27%|██▋       | 4963/18660 [02:15<06:13, 36.69it/s]
 27%|██▋       | 4967/18660 [02:15<06:16, 36.34it/s]
 27%|██▋       | 4971/18660 [02:15<06:14, 36.52it/s]
 27%|██▋       | 4975/18660 [02:15<06:17, 36.27it/s]
 27%|██▋       | 4979/18660 [02:15<06:12, 36.71it/s]
 27%|██▋       | 4983/18660 [02:15<06:07, 37.20it/s]
 27%|██▋       | 4987/18660 [02:15<06:00, 37.88it/s]
 27%|██▋       | 4991/18660 [02:15<06:09, 37.00it/s]
 27%|██▋       | 4995/18660 [02:15<06:06, 37.29it/s]
 27%|██▋       | 4999/18660 [02:16<06:08, 37.04it/s]
 27%|██▋       | 5000/18660 [02:16<06:08, 37.04it/s]
 27%|██▋       | 5003/18660 [02:16<06:11, 36.75it/s]


[2m[36m(_objective pid=46166)[0m {'loss': 0.2329, 'learning_rate': 1.3142952282643258e-05, 'epoch': 0.8}


[2m[36m(_objective pid=46166)[0m  27%|██▋       | 5007/18660 [02:16<06:10, 36.82it/s]
 27%|██▋       | 5011/18660 [02:16<06:22, 35.71it/s]
 27%|██▋       | 5015/18660 [02:16<06:19, 35.96it/s]
 27%|██▋       | 5019/18660 [02:16<06:14, 36.39it/s]
 27%|██▋       | 5023/18660 [02:16<06:09, 36.88it/s]
 27%|██▋       | 5027/18660 [02:16<06:19, 35.94it/s]
 27%|██▋       | 5031/18660 [02:16<06:08, 36.96it/s]
 27%|██▋       | 5035/18660 [02:16<06:01, 37.74it/s]
 27%|██▋       | 5039/18660 [02:17<05:56, 38.23it/s]
 27%|██▋       | 5043/18660 [02:17<05:52, 38.58it/s]
 27%|██▋       | 5047/18660 [02:17<05:51, 38.78it/s]
 27%|██▋       | 5051/18660 [02:17<05:52, 38.65it/s]
 27%|██▋       | 5055/18660 [02:17<05:53, 38.46it/s]
 27%|██▋       | 5059/18660 [02:17<05:56, 38.20it/s]
 27%|██▋       | 5064/18660 [02:17<05:51, 38.66it/s]
 27%|██▋       | 5068/18660 [02:17<05:53, 38.49it/s]
 27%|██▋       | 5072/18660 [02:17<05:52, 38.50it/s]
 27%|██▋       | 5076/18660 [02:18<05:48, 38.92it/s]
 27%|██▋ 

[2m[36m(_objective pid=46166)[0m {'loss': 0.2643, 'learning_rate': 1.2661877894552362e-05, 'epoch': 0.88}


[2m[36m(_objective pid=46166)[0m  29%|██▉       | 5502/18660 [02:29<06:18, 34.79it/s]
 30%|██▉       | 5506/18660 [02:29<06:11, 35.37it/s]
 30%|██▉       | 5510/18660 [02:29<06:18, 34.72it/s]
 30%|██▉       | 5514/18660 [02:29<06:11, 35.42it/s]
 30%|██▉       | 5518/18660 [02:30<06:07, 35.75it/s]
 30%|██▉       | 5522/18660 [02:30<06:00, 36.45it/s]
 30%|██▉       | 5526/18660 [02:30<05:53, 37.18it/s]
 30%|██▉       | 5530/18660 [02:30<05:54, 37.04it/s]
 30%|██▉       | 5534/18660 [02:30<05:49, 37.53it/s]
 30%|██▉       | 5538/18660 [02:30<05:49, 37.51it/s]
 30%|██▉       | 5542/18660 [02:30<05:43, 38.15it/s]
 30%|██▉       | 5546/18660 [02:30<05:48, 37.64it/s]
 30%|██▉       | 5550/18660 [02:30<05:44, 38.03it/s]
 30%|██▉       | 5554/18660 [02:31<05:54, 37.02it/s]
 30%|██▉       | 5558/18660 [02:31<06:06, 35.73it/s]
 30%|██▉       | 5562/18660 [02:31<06:00, 36.36it/s]
 30%|██▉       | 5566/18660 [02:31<05:52, 37.10it/s]
 30%|██▉       | 5570/18660 [02:31<05:57, 36.57it/s]
 30%|██▉ 

[2m[36m(_objective pid=46166)[0m {'loss': 0.2803, 'learning_rate': 1.2180803506461467e-05, 'epoch': 0.96}


[2m[36m(_objective pid=46166)[0m  32%|███▏      | 6008/18660 [02:43<05:54, 35.69it/s]
 32%|███▏      | 6012/18660 [02:43<06:08, 34.33it/s]
 32%|███▏      | 6016/18660 [02:43<06:01, 34.97it/s]
 32%|███▏      | 6020/18660 [02:43<06:04, 34.66it/s]
 32%|███▏      | 6024/18660 [02:43<05:53, 35.73it/s]
 32%|███▏      | 6028/18660 [02:43<05:53, 35.76it/s]
 32%|███▏      | 6032/18660 [02:43<05:48, 36.27it/s]
 32%|███▏      | 6036/18660 [02:44<06:00, 35.02it/s]
 32%|███▏      | 6040/18660 [02:44<06:03, 34.67it/s]
 32%|███▏      | 6044/18660 [02:44<05:55, 35.45it/s]
 32%|███▏      | 6048/18660 [02:44<05:53, 35.67it/s]
 32%|███▏      | 6052/18660 [02:44<05:47, 36.33it/s]
 32%|███▏      | 6056/18660 [02:44<06:02, 34.75it/s]
 32%|███▏      | 6060/18660 [02:44<05:57, 35.27it/s]
 32%|███▏      | 6064/18660 [02:44<05:51, 35.88it/s]
 33%|███▎      | 6068/18660 [02:44<05:46, 36.34it/s]


Trial status: 17 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2023-09-11 15:17:28. Total running time: 1hr 57min 7s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00017   RUNNING          1.79537e-05                    3                 

[2m[36m(_objective pid=46166)[0m  33%|███▎      | 6072/18660 [02:45<05:42, 36.77it/s]
 33%|███▎      | 6076/18660 [02:45<05:42, 36.79it/s]
 33%|███▎      | 6080/18660 [02:45<05:44, 36.56it/s]
 33%|███▎      | 6084/18660 [02:45<05:43, 36.66it/s]
 33%|███▎      | 6088/18660 [02:45<05:39, 37.00it/s]
 33%|███▎      | 6092/18660 [02:45<05:42, 36.66it/s]
 33%|███▎      | 6096/18660 [02:45<05:44, 36.52it/s]
 33%|███▎      | 6100/18660 [02:45<05:49, 35.90it/s]
 33%|███▎      | 6104/18660 [02:45<05:53, 35.55it/s]
 33%|███▎      | 6108/18660 [02:46<06:01, 34.75it/s]
 33%|███▎      | 6112/18660 [02:46<05:51, 35.70it/s]
 33%|███▎      | 6116/18660 [02:46<05:44, 36.42it/s]
 33%|███▎      | 6120/18660 [02:46<05:40, 36.79it/s]
 33%|███▎      | 6124/18660 [02:46<05:35, 37.37it/s]
 33%|███▎      | 6128/18660 [02:46<05:30, 37.92it/s]
 33%|███▎      | 6132/18660 [02:46<05:28, 38.10it/s]
 33%|███▎      | 6136/18660 [02:46<05:36, 37.26it/s]
 33%|███▎      | 6140/18660 [02:46<05:34, 37.42it/s]
 33%|███▎

Trial _objective_f556c_00017 finished iteration 1 at 2023-09-11 15:17:42. Total running time: 1hr 57min 21s
+-------------------------------------------------+
| Trial _objective_f556c_00017 result             |
+-------------------------------------------------+
| time_this_iter_s                        181.011 |
| time_total_s                            181.011 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.24695 |
| eval_runtime                             9.5867 |
| eval_samples_per_second                 432.579 |
| eval_steps_per_second                     13.56 |
| objective                               0.24695 |
+-------------------------------------------------+

[2m[36m(_objective pid=46166)[0m {'eval_loss': 0.246952623128891, 'eval_runtime': 9.5867, 'eval_samples_per_second': 432.579, 'eval_steps_per_second': 13.56, 'epoch': 1.0}


[2m[36m(_objective pid=46166)[0m                                                     
[2m[36m(_objective pid=46166)[0m                                                  [A 33%|███▎      | 6220/18660 [02:58<05:32, 37.46it/s]
[2m[36m(_objective pid=46166)[0m 100%|██████████| 130/130 [00:09<00:00, 11.96it/s][A
                                                 [A
 33%|███▎      | 6221/18660 [03:00<2:55:57,  1.18it/s]
 33%|███▎      | 6225/18660 [03:00<2:05:05,  1.66it/s]
 33%|███▎      | 6229/18660 [03:00<1:29:20,  2.32it/s]
 33%|███▎      | 6233/18660 [03:00<1:04:25,  3.22it/s]
 33%|███▎      | 6237/18660 [03:00<47:03,  4.40it/s]  
 33%|███▎      | 6241/18660 [03:00<35:00,  5.91it/s]
 33%|███▎      | 6245/18660 [03:00<26:16,  7.88it/s]
 33%|███▎      | 6249/18660 [03:00<20:09, 10.26it/s]
 34%|███▎      | 6253/18660 [03:00<15:55, 12.99it/s]
 34%|███▎      | 6257/18660 [03:01<13:05, 15.78it/s]
 34%|███▎      | 6261/18660 [03:01<10:55, 18.91it/s]
 34%|███▎      | 6265/18660 [0

[2m[36m(_objective pid=46166)[0m {'loss': 0.2245, 'learning_rate': 1.1699729118370571e-05, 'epoch': 1.05}


[2m[36m(_objective pid=46166)[0m  35%|███▍      | 6504/18660 [03:07<05:30, 36.78it/s]
 35%|███▍      | 6508/18660 [03:07<05:33, 36.39it/s]
 35%|███▍      | 6512/18660 [03:08<05:40, 35.65it/s]
 35%|███▍      | 6516/18660 [03:08<05:35, 36.22it/s]
 35%|███▍      | 6521/18660 [03:08<05:24, 37.42it/s]
 35%|███▍      | 6525/18660 [03:08<05:27, 37.09it/s]
 35%|███▍      | 6529/18660 [03:08<05:20, 37.82it/s]
 35%|███▌      | 6533/18660 [03:08<05:18, 38.11it/s]
 35%|███▌      | 6537/18660 [03:08<05:16, 38.31it/s]
 35%|███▌      | 6541/18660 [03:08<05:19, 37.92it/s]
 35%|███▌      | 6545/18660 [03:08<05:15, 38.44it/s]
 35%|███▌      | 6549/18660 [03:09<05:22, 37.54it/s]
 35%|███▌      | 6553/18660 [03:09<05:24, 37.35it/s]
 35%|███▌      | 6557/18660 [03:09<05:44, 35.18it/s]
 35%|███▌      | 6561/18660 [03:09<05:37, 35.85it/s]
 35%|███▌      | 6565/18660 [03:09<05:33, 36.32it/s]
 35%|███▌      | 6569/18660 [03:09<05:28, 36.85it/s]
 35%|███▌      | 6573/18660 [03:09<05:36, 35.97it/s]
 35%|███▌

Trial status: 17 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2023-09-11 15:17:58. Total running time: 1hr 57min 37s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00017   RUNNING          1.79537e-05                    3                

[2m[36m(_objective pid=46166)[0m  36%|███▋      | 6775/18660 [03:15<05:23, 36.71it/s]
 36%|███▋      | 6779/18660 [03:15<05:26, 36.43it/s]
 36%|███▋      | 6783/18660 [03:15<05:26, 36.35it/s]
 36%|███▋      | 6787/18660 [03:15<05:32, 35.70it/s]
 36%|███▋      | 6791/18660 [03:15<05:26, 36.34it/s]
 36%|███▋      | 6795/18660 [03:15<05:35, 35.32it/s]
 36%|███▋      | 6799/18660 [03:15<05:41, 34.68it/s]
 36%|███▋      | 6803/18660 [03:16<05:44, 34.41it/s]
 36%|███▋      | 6807/18660 [03:16<05:50, 33.86it/s]
 37%|███▋      | 6811/18660 [03:16<05:57, 33.12it/s]
 37%|███▋      | 6815/18660 [03:16<05:49, 33.85it/s]
 37%|███▋      | 6819/18660 [03:16<05:42, 34.60it/s]
 37%|███▋      | 6823/18660 [03:16<05:30, 35.78it/s]
 37%|███▋      | 6827/18660 [03:16<05:29, 35.95it/s]
 37%|███▋      | 6831/18660 [03:16<05:31, 35.72it/s]
 37%|███▋      | 6835/18660 [03:16<05:29, 35.84it/s]
 37%|███▋      | 6839/18660 [03:17<05:26, 36.20it/s]
 37%|███▋      | 6843/18660 [03:17<05:28, 35.93it/s]
 37%|███▋

[2m[36m(_objective pid=46166)[0m {'loss': 0.123, 'learning_rate': 1.1218654730279676e-05, 'epoch': 1.13}


 38%|███▊      | 7008/18660 [03:21<05:20, 36.31it/s]
 38%|███▊      | 7012/18660 [03:21<05:20, 36.34it/s]
 38%|███▊      | 7016/18660 [03:21<05:17, 36.69it/s]
 38%|███▊      | 7020/18660 [03:21<05:19, 36.41it/s]
 38%|███▊      | 7024/18660 [03:22<05:23, 35.95it/s]
 38%|███▊      | 7028/18660 [03:22<05:17, 36.63it/s]
 38%|███▊      | 7032/18660 [03:22<05:27, 35.55it/s]
 38%|███▊      | 7036/18660 [03:22<05:27, 35.49it/s]
 38%|███▊      | 7040/18660 [03:22<05:21, 36.18it/s]
 38%|███▊      | 7044/18660 [03:22<05:15, 36.81it/s]
 38%|███▊      | 7048/18660 [03:22<05:28, 35.30it/s]
 38%|███▊      | 7052/18660 [03:22<05:22, 36.01it/s]
 38%|███▊      | 7056/18660 [03:22<05:31, 34.96it/s]
 38%|███▊      | 7060/18660 [03:23<05:24, 35.73it/s]
 38%|███▊      | 7064/18660 [03:23<05:17, 36.47it/s]
 38%|███▊      | 7068/18660 [03:23<05:12, 37.08it/s]
 38%|███▊      | 7072/18660 [03:23<05:16, 36.66it/s]
 38%|███▊      | 7076/18660 [03:23<05:13, 36.98it/s]
 38%|███▊      | 7080/18660 [03:23<05:15, 36.7

[2m[36m(_objective pid=46166)[0m {'loss': 0.1381, 'learning_rate': 1.0737580342188782e-05, 'epoch': 1.21}


[2m[36m(_objective pid=46166)[0m  40%|████      | 7509/18660 [03:35<04:55, 37.77it/s]
 40%|████      | 7513/18660 [03:35<04:55, 37.74it/s]
 40%|████      | 7517/18660 [03:35<04:53, 37.93it/s]
 40%|████      | 7521/18660 [03:35<04:57, 37.39it/s]
 40%|████      | 7525/18660 [03:35<04:58, 37.26it/s]
 40%|████      | 7529/18660 [03:35<05:03, 36.64it/s]
 40%|████      | 7533/18660 [03:35<05:03, 36.68it/s]
 40%|████      | 7537/18660 [03:36<05:00, 36.99it/s]
 40%|████      | 7541/18660 [03:36<05:12, 35.59it/s]
 40%|████      | 7545/18660 [03:36<05:05, 36.38it/s]
 40%|████      | 7549/18660 [03:36<05:05, 36.42it/s]
 40%|████      | 7553/18660 [03:36<05:00, 36.95it/s]
 40%|████      | 7557/18660 [03:36<04:55, 37.52it/s]
 41%|████      | 7561/18660 [03:36<04:54, 37.69it/s]
 41%|████      | 7565/18660 [03:36<04:56, 37.44it/s]
 41%|████      | 7569/18660 [03:36<04:51, 38.08it/s]
 41%|████      | 7573/18660 [03:37<05:03, 36.59it/s]
 41%|████      | 7577/18660 [03:37<04:55, 37.51it/s]
 41%|████

Trial status: 17 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2023-09-11 15:18:28. Total running time: 1hr 58min 7s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00017   RUNNING          1.79537e-05                    3                 

[2m[36m(_objective pid=46166)[0m  42%|████▏     | 7876/18660 [03:45<04:58, 36.19it/s]
 42%|████▏     | 7880/18660 [03:45<04:52, 36.84it/s]
 42%|████▏     | 7884/18660 [03:45<04:50, 37.13it/s]
 42%|████▏     | 7888/18660 [03:45<04:58, 36.13it/s]
 42%|████▏     | 7892/18660 [03:45<05:01, 35.66it/s]
 42%|████▏     | 7896/18660 [03:45<04:59, 35.92it/s]
 42%|████▏     | 7900/18660 [03:45<04:56, 36.30it/s]
 42%|████▏     | 7904/18660 [03:45<04:52, 36.78it/s]
 42%|████▏     | 7908/18660 [03:46<04:49, 37.16it/s]
 42%|████▏     | 7912/18660 [03:46<04:55, 36.37it/s]
 42%|████▏     | 7916/18660 [03:46<04:52, 36.71it/s]
 42%|████▏     | 7920/18660 [03:46<04:55, 36.38it/s]
 42%|████▏     | 7924/18660 [03:46<04:52, 36.76it/s]
 42%|████▏     | 7928/18660 [03:46<04:54, 36.49it/s]
 43%|████▎     | 7932/18660 [03:46<04:48, 37.18it/s]
 43%|████▎     | 7936/18660 [03:46<04:57, 36.09it/s]
 43%|████▎     | 7940/18660 [03:46<04:58, 35.89it/s]
 43%|████▎     | 7944/18660 [03:47<05:04, 35.21it/s]
 43%|████

[2m[36m(_objective pid=46166)[0m {'loss': 0.1341, 'learning_rate': 1.0256505954097886e-05, 'epoch': 1.29}


[2m[36m(_objective pid=46166)[0m  43%|████▎     | 8004/18660 [03:48<04:59, 35.58it/s]
 43%|████▎     | 8008/18660 [03:48<05:24, 32.78it/s]
 43%|████▎     | 8012/18660 [03:49<05:21, 33.10it/s]
 43%|████▎     | 8016/18660 [03:49<05:31, 32.09it/s]
 43%|████▎     | 8020/18660 [03:49<05:18, 33.43it/s]
 43%|████▎     | 8024/18660 [03:49<05:09, 34.35it/s]
 43%|████▎     | 8028/18660 [03:49<05:05, 34.77it/s]
 43%|████▎     | 8032/18660 [03:49<05:06, 34.72it/s]
 43%|████▎     | 8036/18660 [03:49<05:03, 35.06it/s]
 43%|████▎     | 8040/18660 [03:49<04:55, 35.92it/s]
 43%|████▎     | 8044/18660 [03:49<04:56, 35.76it/s]
 43%|████▎     | 8048/18660 [03:50<04:50, 36.54it/s]
 43%|████▎     | 8052/18660 [03:50<04:49, 36.63it/s]
 43%|████▎     | 8056/18660 [03:50<04:50, 36.54it/s]
 43%|████▎     | 8060/18660 [03:50<04:59, 35.43it/s]
 43%|████▎     | 8064/18660 [03:50<05:03, 34.94it/s]
 43%|████▎     | 8068/18660 [03:50<05:05, 34.62it/s]
 43%|████▎     | 8072/18660 [03:50<05:01, 35.09it/s]
 43%|████

[2m[36m(_objective pid=46166)[0m {'loss': 0.1484, 'learning_rate': 9.77543156600699e-06, 'epoch': 1.37}


[2m[36m(_objective pid=46166)[0m  46%|████▌     | 8505/18660 [04:02<04:47, 35.31it/s]
 46%|████▌     | 8509/18660 [04:02<04:43, 35.84it/s]
 46%|████▌     | 8513/18660 [04:02<04:43, 35.81it/s]
 46%|████▌     | 8517/18660 [04:02<04:44, 35.70it/s]
 46%|████▌     | 8521/18660 [04:02<04:46, 35.44it/s]
 46%|████▌     | 8525/18660 [04:03<04:43, 35.76it/s]
 46%|████▌     | 8529/18660 [04:03<04:43, 35.79it/s]
 46%|████▌     | 8533/18660 [04:03<04:40, 36.04it/s]
 46%|████▌     | 8537/18660 [04:03<04:38, 36.41it/s]
 46%|████▌     | 8541/18660 [04:03<04:41, 35.94it/s]
 46%|████▌     | 8545/18660 [04:03<04:49, 34.98it/s]
 46%|████▌     | 8549/18660 [04:03<04:54, 34.32it/s]
 46%|████▌     | 8553/18660 [04:03<04:55, 34.19it/s]
 46%|████▌     | 8557/18660 [04:03<04:53, 34.42it/s]
 46%|████▌     | 8561/18660 [04:04<04:49, 34.89it/s]
 46%|████▌     | 8565/18660 [04:04<04:58, 33.83it/s]
 46%|████▌     | 8569/18660 [04:04<04:53, 34.39it/s]
 46%|████▌     | 8573/18660 [04:04<04:51, 34.61it/s]
 46%|████

Trial status: 17 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2023-09-11 15:18:58. Total running time: 1hr 58min 37s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00017   RUNNING          1.79537e-05                    3                

[2m[36m(_objective pid=46166)[0m  48%|████▊     | 8965/18660 [04:15<04:28, 36.15it/s]
 48%|████▊     | 8969/18660 [04:15<04:29, 35.90it/s]
 48%|████▊     | 8973/18660 [04:15<04:26, 36.33it/s]
 48%|████▊     | 8977/18660 [04:15<04:22, 36.95it/s]
 48%|████▊     | 8981/18660 [04:15<04:27, 36.12it/s]
 48%|████▊     | 8985/18660 [04:15<04:31, 35.64it/s]
 48%|████▊     | 8989/18660 [04:16<04:25, 36.39it/s]
 48%|████▊     | 8993/18660 [04:16<04:27, 36.11it/s]
 48%|████▊     | 8997/18660 [04:16<04:28, 35.99it/s]
 48%|████▊     | 9001/18660 [04:16<04:43, 34.04it/s]


[2m[36m(_objective pid=46166)[0m {'loss': 0.1449, 'learning_rate': 9.294357177916097e-06, 'epoch': 1.45}


[2m[36m(_objective pid=46166)[0m  48%|████▊     | 9005/18660 [04:16<04:36, 34.86it/s]
 48%|████▊     | 9009/18660 [04:16<04:31, 35.51it/s]
 48%|████▊     | 9013/18660 [04:16<04:28, 35.95it/s]
 48%|████▊     | 9017/18660 [04:16<04:23, 36.63it/s]
 48%|████▊     | 9021/18660 [04:16<04:19, 37.21it/s]
 48%|████▊     | 9025/18660 [04:17<04:18, 37.26it/s]
 48%|████▊     | 9029/18660 [04:17<04:14, 37.81it/s]
 48%|████▊     | 9033/18660 [04:17<04:12, 38.10it/s]
 48%|████▊     | 9037/18660 [04:17<04:19, 37.12it/s]
 48%|████▊     | 9041/18660 [04:17<04:18, 37.24it/s]
 48%|████▊     | 9045/18660 [04:17<04:14, 37.74it/s]
 48%|████▊     | 9049/18660 [04:17<04:12, 38.11it/s]
 49%|████▊     | 9053/18660 [04:17<04:22, 36.56it/s]
 49%|████▊     | 9057/18660 [04:17<04:22, 36.60it/s]
 49%|████▊     | 9061/18660 [04:18<04:18, 37.12it/s]
 49%|████▊     | 9065/18660 [04:18<04:38, 34.42it/s]
 49%|████▊     | 9069/18660 [04:18<04:35, 34.78it/s]
 49%|████▊     | 9073/18660 [04:18<04:33, 35.02it/s]
 49%|████

[2m[36m(_objective pid=46166)[0m {'loss': 0.1838, 'learning_rate': 8.8132827898252e-06, 'epoch': 1.53}


[2m[36m(_objective pid=46166)[0m  51%|█████     | 9506/18660 [04:30<03:52, 39.37it/s]
 51%|█████     | 9510/18660 [04:30<03:53, 39.18it/s]
 51%|█████     | 9514/18660 [04:30<03:54, 38.96it/s]
 51%|█████     | 9518/18660 [04:30<03:54, 38.91it/s]
 51%|█████     | 9522/18660 [04:30<03:55, 38.74it/s]
 51%|█████     | 9526/18660 [04:30<03:59, 38.21it/s]
 51%|█████     | 9531/18660 [04:30<03:55, 38.83it/s]
 51%|█████     | 9535/18660 [04:30<03:54, 38.88it/s]
 51%|█████     | 9539/18660 [04:31<04:04, 37.27it/s]
 51%|█████     | 9543/18660 [04:31<04:01, 37.68it/s]
 51%|█████     | 9547/18660 [04:31<04:08, 36.67it/s]
 51%|█████     | 9551/18660 [04:31<04:10, 36.30it/s]
 51%|█████     | 9555/18660 [04:31<04:06, 36.95it/s]
 51%|█████     | 9559/18660 [04:31<04:14, 35.70it/s]
 51%|█████     | 9563/18660 [04:31<04:18, 35.22it/s]
 51%|█████▏    | 9567/18660 [04:31<04:14, 35.75it/s]
 51%|█████▏    | 9571/18660 [04:31<04:08, 36.59it/s]
 51%|█████▏    | 9575/18660 [04:32<04:09, 36.43it/s]
 51%|████

[2m[36m(_objective pid=46166)[0m {'loss': 0.1398, 'learning_rate': 8.332208401734305e-06, 'epoch': 1.61}


[2m[36m(_objective pid=46166)[0m  54%|█████▎    | 10005/18660 [04:43<03:47, 37.97it/s]
 54%|█████▎    | 10009/18660 [04:43<03:47, 37.98it/s]
 54%|█████▎    | 10013/18660 [04:44<03:47, 38.08it/s]
 54%|█████▎    | 10017/18660 [04:44<03:50, 37.44it/s]
 54%|█████▎    | 10021/18660 [04:44<03:49, 37.70it/s]
 54%|█████▎    | 10025/18660 [04:44<03:50, 37.51it/s]
 54%|█████▎    | 10029/18660 [04:44<03:48, 37.81it/s]
 54%|█████▍    | 10033/18660 [04:44<03:52, 37.14it/s]
 54%|█████▍    | 10037/18660 [04:44<03:48, 37.79it/s]
 54%|█████▍    | 10041/18660 [04:44<03:59, 36.06it/s]
 54%|█████▍    | 10045/18660 [04:44<03:54, 36.72it/s]
 54%|█████▍    | 10049/18660 [04:45<03:51, 37.18it/s]
 54%|█████▍    | 10053/18660 [04:45<03:46, 37.96it/s]


Trial status: 17 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2023-09-11 15:19:28. Total running time: 1hr 59min 7s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00017   RUNNING          1.79537e-05                    3                 

[2m[36m(_objective pid=46166)[0m  54%|█████▍    | 10057/18660 [04:45<03:56, 36.32it/s]
 54%|█████▍    | 10061/18660 [04:45<03:54, 36.67it/s]
 54%|█████▍    | 10065/18660 [04:45<03:53, 36.88it/s]
 54%|█████▍    | 10069/18660 [04:45<03:53, 36.75it/s]
 54%|█████▍    | 10073/18660 [04:45<03:49, 37.39it/s]
 54%|█████▍    | 10077/18660 [04:45<03:50, 37.20it/s]
 54%|█████▍    | 10081/18660 [04:45<03:50, 37.23it/s]
 54%|█████▍    | 10085/18660 [04:45<03:52, 36.91it/s]
 54%|█████▍    | 10089/18660 [04:46<03:54, 36.62it/s]
 54%|█████▍    | 10093/18660 [04:46<03:51, 37.05it/s]
 54%|█████▍    | 10097/18660 [04:46<04:03, 35.10it/s]
 54%|█████▍    | 10101/18660 [04:46<04:02, 35.31it/s]
 54%|█████▍    | 10105/18660 [04:46<04:02, 35.31it/s]
 54%|█████▍    | 10109/18660 [04:46<04:09, 34.27it/s]
 54%|█████▍    | 10113/18660 [04:46<04:01, 35.37it/s]
 54%|█████▍    | 10117/18660 [04:46<04:03, 35.13it/s]
 54%|█████▍    | 10121/18660 [04:46<03:56, 36.12it/s]
 54%|█████▍    | 10125/18660 [04:47<03:57, 35

[2m[36m(_objective pid=46166)[0m {'loss': 0.1907, 'learning_rate': 7.85113401364341e-06, 'epoch': 1.69}


 56%|█████▋    | 10507/18660 [04:57<03:41, 36.76it/s]
 56%|█████▋    | 10511/18660 [04:57<03:43, 36.39it/s]
 56%|█████▋    | 10515/18660 [04:57<03:41, 36.76it/s]
 56%|█████▋    | 10519/18660 [04:57<03:41, 36.74it/s]
 56%|█████▋    | 10523/18660 [04:58<03:40, 36.93it/s]
 56%|█████▋    | 10527/18660 [04:58<03:39, 37.03it/s]
 56%|█████▋    | 10531/18660 [04:58<03:39, 37.06it/s]
 56%|█████▋    | 10535/18660 [04:58<03:40, 36.91it/s]
 56%|█████▋    | 10539/18660 [04:58<03:41, 36.70it/s]
 57%|█████▋    | 10543/18660 [04:58<03:40, 36.76it/s]
 57%|█████▋    | 10547/18660 [04:58<03:40, 36.84it/s]
 57%|█████▋    | 10551/18660 [04:58<03:39, 36.97it/s]
 57%|█████▋    | 10555/18660 [04:58<03:41, 36.53it/s]
 57%|█████▋    | 10559/18660 [04:59<03:40, 36.75it/s]
 57%|█████▋    | 10563/18660 [04:59<03:38, 37.02it/s]
 57%|█████▋    | 10567/18660 [04:59<03:38, 37.03it/s]
 57%|█████▋    | 10571/18660 [04:59<03:39, 36.93it/s]
 57%|█████▋    | 10575/18660 [04:59<03:39, 36.90it/s]
 57%|█████▋    | 10579/18660

[2m[36m(_objective pid=46166)[0m {'loss': 0.1707, 'learning_rate': 7.370059625552514e-06, 'epoch': 1.77}


[2m[36m(_objective pid=46166)[0m  59%|█████▉    | 11006/18660 [05:11<03:47, 33.66it/s]
 59%|█████▉    | 11010/18660 [05:11<03:44, 34.09it/s]
 59%|█████▉    | 11014/18660 [05:11<03:40, 34.69it/s]
 59%|█████▉    | 11018/18660 [05:11<03:36, 35.23it/s]
 59%|█████▉    | 11022/18660 [05:11<03:32, 35.95it/s]
 59%|█████▉    | 11026/18660 [05:11<03:31, 36.14it/s]
 59%|█████▉    | 11030/18660 [05:11<03:30, 36.27it/s]
 59%|█████▉    | 11034/18660 [05:12<03:37, 35.13it/s]
 59%|█████▉    | 11038/18660 [05:12<03:47, 33.56it/s]
 59%|█████▉    | 11042/18660 [05:12<03:42, 34.19it/s]
 59%|█████▉    | 11046/18660 [05:12<03:41, 34.30it/s]
 59%|█████▉    | 11050/18660 [05:12<03:35, 35.33it/s]
 59%|█████▉    | 11054/18660 [05:12<03:31, 36.04it/s]
 59%|█████▉    | 11058/18660 [05:12<03:28, 36.44it/s]
 59%|█████▉    | 11062/18660 [05:12<03:25, 36.98it/s]
 59%|█████▉    | 11066/18660 [05:12<03:21, 37.64it/s]
 59%|█████▉    | 11070/18660 [05:13<03:21, 37.61it/s]
 59%|█████▉    | 11074/18660 [05:13<03:26, 36

Trial status: 17 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2023-09-11 15:19:58. Total running time: 1hr 59min 37s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00017   RUNNING          1.79537e-05                    3                

 60%|█████▉    | 11162/18660 [05:15<03:22, 36.96it/s]
 60%|█████▉    | 11166/18660 [05:15<03:22, 36.98it/s]
 60%|█████▉    | 11170/18660 [05:15<03:21, 37.20it/s]
 60%|█████▉    | 11174/18660 [05:15<03:29, 35.82it/s]
 60%|█████▉    | 11178/18660 [05:15<03:24, 36.55it/s]
 60%|█████▉    | 11182/18660 [05:16<03:21, 37.04it/s]
 60%|█████▉    | 11186/18660 [05:16<03:26, 36.27it/s]
 60%|█████▉    | 11190/18660 [05:16<03:22, 36.82it/s]
 60%|█████▉    | 11194/18660 [05:16<03:19, 37.44it/s]
 60%|██████    | 11198/18660 [05:16<03:17, 37.74it/s]
 60%|██████    | 11202/18660 [05:16<03:16, 37.94it/s]
 60%|██████    | 11206/18660 [05:16<03:16, 38.02it/s]
 60%|██████    | 11210/18660 [05:16<03:16, 37.92it/s]
 60%|██████    | 11214/18660 [05:16<03:16, 37.98it/s]
 60%|██████    | 11218/18660 [05:16<03:22, 36.74it/s]
 60%|██████    | 11222/18660 [05:17<03:20, 37.16it/s]
 60%|██████    | 11226/18660 [05:17<03:26, 35.93it/s]
 60%|██████    | 11230/18660 [05:17<03:30, 35.38it/s]
 60%|██████    | 11234/18660

[2m[36m(_objective pid=46166)[0m {'loss': 0.1907, 'learning_rate': 6.8889852374616196e-06, 'epoch': 1.85}


[2m[36m(_objective pid=46166)[0m  62%|██████▏   | 11507/18660 [05:24<03:18, 36.12it/s]
 62%|██████▏   | 11511/18660 [05:25<03:17, 36.19it/s]
 62%|██████▏   | 11515/18660 [05:25<03:17, 36.13it/s]
 62%|██████▏   | 11519/18660 [05:25<03:17, 36.19it/s]
 62%|██████▏   | 11523/18660 [05:25<03:16, 36.33it/s]
 62%|██████▏   | 11527/18660 [05:25<03:19, 35.82it/s]
 62%|██████▏   | 11531/18660 [05:25<03:20, 35.57it/s]
 62%|██████▏   | 11535/18660 [05:25<03:30, 33.89it/s]
 62%|██████▏   | 11539/18660 [05:25<03:30, 33.83it/s]
 62%|██████▏   | 11543/18660 [05:25<03:29, 33.97it/s]
 62%|██████▏   | 11547/18660 [05:26<03:31, 33.62it/s]
 62%|██████▏   | 11551/18660 [05:26<03:27, 34.22it/s]
 62%|██████▏   | 11555/18660 [05:26<03:24, 34.82it/s]
 62%|██████▏   | 11559/18660 [05:26<03:20, 35.40it/s]
 62%|██████▏   | 11563/18660 [05:26<03:21, 35.15it/s]
 62%|██████▏   | 11567/18660 [05:26<03:15, 36.23it/s]
 62%|██████▏   | 11571/18660 [05:26<03:23, 34.82it/s]
 62%|██████▏   | 11575/18660 [05:26<03:20, 35

[2m[36m(_objective pid=46166)[0m {'loss': 0.1257, 'learning_rate': 6.407910849370724e-06, 'epoch': 1.93}


[2m[36m(_objective pid=46166)[0m  64%|██████▍   | 12008/18660 [05:38<03:01, 36.69it/s]
 64%|██████▍   | 12012/18660 [05:38<03:00, 36.86it/s]
 64%|██████▍   | 12016/18660 [05:39<03:05, 35.89it/s]
 64%|██████▍   | 12020/18660 [05:39<03:02, 36.31it/s]
 64%|██████▍   | 12024/18660 [05:39<02:59, 36.99it/s]
 64%|██████▍   | 12028/18660 [05:39<03:02, 36.43it/s]
 64%|██████▍   | 12032/18660 [05:39<03:00, 36.65it/s]
 65%|██████▍   | 12036/18660 [05:39<03:09, 34.94it/s]
 65%|██████▍   | 12040/18660 [05:39<03:09, 35.02it/s]
 65%|██████▍   | 12044/18660 [05:39<03:06, 35.39it/s]
 65%|██████▍   | 12048/18660 [05:39<03:06, 35.38it/s]
 65%|██████▍   | 12052/18660 [05:40<03:04, 35.88it/s]
 65%|██████▍   | 12056/18660 [05:40<03:06, 35.41it/s]
 65%|██████▍   | 12060/18660 [05:40<03:03, 35.97it/s]
 65%|██████▍   | 12064/18660 [05:40<03:02, 36.15it/s]
 65%|██████▍   | 12068/18660 [05:40<03:00, 36.58it/s]
 65%|██████▍   | 12072/18660 [05:40<03:03, 35.82it/s]
 65%|██████▍   | 12076/18660 [05:40<03:08, 34

Trial status: 17 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2023-09-11 15:20:28. Total running time: 2hr 0min 7s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00017   RUNNING          1.79537e-05                    3                  

[2m[36m(_objective pid=46166)[0m  66%|██████▌   | 12237/18660 [05:45<02:47, 38.38it/s]
 66%|██████▌   | 12241/18660 [05:45<02:47, 38.43it/s]
 66%|██████▌   | 12245/18660 [05:45<02:46, 38.49it/s]
 66%|██████▌   | 12249/18660 [05:45<02:48, 38.11it/s]
 66%|██████▌   | 12253/18660 [05:45<02:46, 38.38it/s]
 66%|██████▌   | 12257/18660 [05:45<02:49, 37.73it/s]
 66%|██████▌   | 12262/18660 [05:45<02:45, 38.58it/s]
 66%|██████▌   | 12266/18660 [05:46<02:46, 38.40it/s]
 66%|██████▌   | 12270/18660 [05:46<02:47, 38.24it/s]
 66%|██████▌   | 12274/18660 [05:46<02:51, 37.34it/s]
 66%|██████▌   | 12278/18660 [05:46<02:48, 37.88it/s]
 66%|██████▌   | 12282/18660 [05:46<02:50, 37.42it/s]
 66%|██████▌   | 12286/18660 [05:46<02:50, 37.42it/s]
 66%|██████▌   | 12290/18660 [05:46<02:48, 37.87it/s]
 66%|██████▌   | 12294/18660 [05:46<02:48, 37.75it/s]
 66%|██████▌   | 12298/18660 [05:46<02:48, 37.77it/s]
 66%|██████▌   | 12302/18660 [05:47<02:53, 36.57it/s]
 66%|██████▌   | 12306/18660 [05:47<02:53, 36

Trial _objective_f556c_00017 finished iteration 2 at 2023-09-11 15:20:44. Total running time: 2hr 0min 22s
+-------------------------------------------------+
| Trial _objective_f556c_00017 result             |
+-------------------------------------------------+
| time_this_iter_s                        181.805 |
| time_total_s                            362.816 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                                0.2074 |
| eval_runtime                              9.574 |
| eval_samples_per_second                  433.15 |
| eval_steps_per_second                    13.578 |
| objective                                0.2074 |
+-------------------------------------------------+

[2m[36m(_objective pid=46166)[0m {'eval_loss': 0.20739877223968506, 'eval_runtime': 9.574, 'eval_samples_per_second': 433.15, 'eval_steps_per_second': 13.578, 'epoch': 2.0}


[2m[36m(_objective pid=46166)[0m  67%|██████▋   | 12442/18660 [06:01<1:27:14,  1.19it/s]
 67%|██████▋   | 12446/18660 [06:01<1:01:54,  1.67it/s]
 67%|██████▋   | 12450/18660 [06:01<44:10,  2.34it/s]  
 67%|██████▋   | 12454/18660 [06:02<31:47,  3.25it/s]
 67%|██████▋   | 12458/18660 [06:02<23:07,  4.47it/s]
 67%|██████▋   | 12462/18660 [06:02<17:05,  6.05it/s]
 67%|██████▋   | 12466/18660 [06:02<12:48,  8.06it/s]
 67%|██████▋   | 12470/18660 [06:02<09:47, 10.53it/s]
 67%|██████▋   | 12474/18660 [06:02<07:47, 13.24it/s]
 67%|██████▋   | 12478/18660 [06:02<06:16, 16.41it/s]
 67%|██████▋   | 12482/18660 [06:02<05:18, 19.39it/s]
 67%|██████▋   | 12486/18660 [06:03<04:36, 22.33it/s]
 67%|██████▋   | 12490/18660 [06:03<04:03, 25.31it/s]
 67%|██████▋   | 12494/18660 [06:03<03:40, 28.01it/s]
 67%|██████▋   | 12500/18660 [06:03<03:29, 29.44it/s]
 67%|██████▋   | 12502/18660 [06:03<03:18, 30.97it/s]


[2m[36m(_objective pid=46166)[0m {'loss': 0.1959, 'learning_rate': 5.926836461279829e-06, 'epoch': 2.01}


[2m[36m(_objective pid=46166)[0m  67%|██████▋   | 12506/18660 [06:03<03:06, 32.97it/s]
 67%|██████▋   | 12510/18660 [06:03<03:01, 33.88it/s]
 67%|██████▋   | 12514/18660 [06:03<02:57, 34.56it/s]
 67%|██████▋   | 12518/18660 [06:03<02:56, 34.72it/s]
 67%|██████▋   | 12522/18660 [06:04<02:53, 35.43it/s]
 67%|██████▋   | 12526/18660 [06:04<02:51, 35.79it/s]
 67%|██████▋   | 12530/18660 [06:04<02:49, 36.20it/s]
 67%|██████▋   | 12534/18660 [06:04<02:50, 35.97it/s]
 67%|██████▋   | 12538/18660 [06:04<02:53, 35.23it/s]
 67%|██████▋   | 12542/18660 [06:04<02:48, 36.32it/s]
 67%|██████▋   | 12546/18660 [06:04<02:45, 36.94it/s]
 67%|██████▋   | 12550/18660 [06:04<02:46, 36.59it/s]
 67%|██████▋   | 12554/18660 [06:04<02:42, 37.53it/s]
 67%|██████▋   | 12558/18660 [06:04<02:40, 38.01it/s]
 67%|██████▋   | 12562/18660 [06:05<02:43, 37.22it/s]
 67%|██████▋   | 12566/18660 [06:05<02:41, 37.65it/s]
 67%|██████▋   | 12570/18660 [06:05<02:39, 38.29it/s]
 67%|██████▋   | 12574/18660 [06:05<02:37, 38

Trial status: 17 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2023-09-11 15:20:59. Total running time: 2hr 0min 37s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00017   RUNNING          1.79537e-05                    3                 

[2m[36m(_objective pid=46166)[0m  69%|██████▉   | 12935/18660 [06:15<02:35, 36.80it/s]
 69%|██████▉   | 12939/18660 [06:15<02:37, 36.25it/s]
 69%|██████▉   | 12943/18660 [06:15<02:39, 35.87it/s]
 69%|██████▉   | 12947/18660 [06:15<02:37, 36.27it/s]
 69%|██████▉   | 12951/18660 [06:15<02:36, 36.54it/s]
 69%|██████▉   | 12955/18660 [06:15<02:38, 36.08it/s]
 69%|██████▉   | 12959/18660 [06:16<02:37, 36.29it/s]
 69%|██████▉   | 12963/18660 [06:16<02:37, 36.17it/s]
 69%|██████▉   | 12967/18660 [06:16<02:44, 34.62it/s]
 70%|██████▉   | 12971/18660 [06:16<02:39, 35.58it/s]
 70%|██████▉   | 12975/18660 [06:16<02:37, 36.02it/s]
 70%|██████▉   | 12979/18660 [06:16<02:35, 36.57it/s]
 70%|██████▉   | 12983/18660 [06:16<02:32, 37.12it/s]
 70%|██████▉   | 12987/18660 [06:16<02:32, 37.27it/s]
 70%|██████▉   | 12991/18660 [06:16<02:34, 36.66it/s]
 70%|██████▉   | 12995/18660 [06:17<02:35, 36.54it/s]
 70%|██████▉   | 13000/18660 [06:17<02:36, 36.22it/s]
 70%|██████▉   | 13003/18660 [06:17<02:42, 34

[2m[36m(_objective pid=46166)[0m {'loss': 0.0942, 'learning_rate': 5.445762073188934e-06, 'epoch': 2.09}


[2m[36m(_objective pid=46166)[0m  70%|██████▉   | 13007/18660 [06:17<02:36, 36.12it/s]
 70%|██████▉   | 13011/18660 [06:17<02:32, 37.12it/s]
 70%|██████▉   | 13015/18660 [06:17<02:30, 37.48it/s]
 70%|██████▉   | 13019/18660 [06:17<02:31, 37.13it/s]
 70%|██████▉   | 13023/18660 [06:17<02:29, 37.73it/s]
 70%|██████▉   | 13027/18660 [06:17<02:35, 36.24it/s]
 70%|██████▉   | 13031/18660 [06:18<02:40, 35.16it/s]
 70%|██████▉   | 13035/18660 [06:18<02:37, 35.81it/s]
 70%|██████▉   | 13039/18660 [06:18<02:32, 36.82it/s]
 70%|██████▉   | 13043/18660 [06:18<02:33, 36.54it/s]
 70%|██████▉   | 13047/18660 [06:18<02:38, 35.49it/s]
 70%|██████▉   | 13051/18660 [06:18<02:36, 35.94it/s]
 70%|██████▉   | 13055/18660 [06:18<02:34, 36.25it/s]
 70%|██████▉   | 13059/18660 [06:18<02:39, 35.22it/s]
 70%|███████   | 13063/18660 [06:18<02:35, 36.01it/s]
 70%|███████   | 13067/18660 [06:19<02:33, 36.43it/s]
 70%|███████   | 13071/18660 [06:19<02:34, 36.28it/s]
 70%|███████   | 13075/18660 [06:19<02:32, 36

[2m[36m(_objective pid=46166)[0m {'loss': 0.099, 'learning_rate': 4.964687685098038e-06, 'epoch': 2.17}


[2m[36m(_objective pid=46166)[0m  72%|███████▏  | 13505/18660 [06:30<02:26, 35.17it/s]
 72%|███████▏  | 13509/18660 [06:31<02:34, 33.45it/s]
 72%|███████▏  | 13513/18660 [06:31<02:32, 33.71it/s]
 72%|███████▏  | 13517/18660 [06:31<02:34, 33.36it/s]
 72%|███████▏  | 13521/18660 [06:31<02:29, 34.33it/s]
 72%|███████▏  | 13525/18660 [06:31<02:29, 34.32it/s]
 73%|███████▎  | 13529/18660 [06:31<02:28, 34.48it/s]
 73%|███████▎  | 13533/18660 [06:31<02:28, 34.51it/s]
 73%|███████▎  | 13537/18660 [06:31<02:34, 33.16it/s]
 73%|███████▎  | 13541/18660 [06:31<02:33, 33.41it/s]
 73%|███████▎  | 13545/18660 [06:32<02:31, 33.81it/s]
 73%|███████▎  | 13549/18660 [06:32<02:33, 33.29it/s]
 73%|███████▎  | 13553/18660 [06:32<02:34, 32.98it/s]
 73%|███████▎  | 13557/18660 [06:32<02:33, 33.24it/s]
 73%|███████▎  | 13561/18660 [06:32<02:30, 33.98it/s]
 73%|███████▎  | 13565/18660 [06:32<02:28, 34.26it/s]
 73%|███████▎  | 13569/18660 [06:32<02:26, 34.79it/s]
 73%|███████▎  | 13573/18660 [06:32<02:25, 34

[2m[36m(_objective pid=46166)[0m {'loss': 0.0846, 'learning_rate': 4.483613297007144e-06, 'epoch': 2.25}


[2m[36m(_objective pid=46166)[0m  75%|███████▌  | 14001/18660 [06:44<02:09, 36.03it/s]
 75%|███████▌  | 14005/18660 [06:44<02:12, 35.07it/s]
 75%|███████▌  | 14009/18660 [06:44<02:13, 34.89it/s]
 75%|███████▌  | 14013/18660 [06:44<02:10, 35.52it/s]
 75%|███████▌  | 14017/18660 [06:44<02:11, 35.40it/s]
 75%|███████▌  | 14021/18660 [06:44<02:10, 35.60it/s]
 75%|███████▌  | 14025/18660 [06:45<02:06, 36.62it/s]
 75%|███████▌  | 14029/18660 [06:45<02:07, 36.31it/s]
 75%|███████▌  | 14033/18660 [06:45<02:06, 36.46it/s]
 75%|███████▌  | 14037/18660 [06:45<02:06, 36.45it/s]


Trial status: 17 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2023-09-11 15:21:29. Total running time: 2hr 1min 7s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00017   RUNNING          1.79537e-05                    3                  

[2m[36m(_objective pid=46166)[0m  75%|███████▌  | 14041/18660 [06:45<02:06, 36.49it/s]
 75%|███████▌  | 14045/18660 [06:45<02:08, 36.01it/s]
 75%|███████▌  | 14049/18660 [06:45<02:07, 36.16it/s]
 75%|███████▌  | 14053/18660 [06:45<02:05, 36.74it/s]
 75%|███████▌  | 14057/18660 [06:45<02:10, 35.39it/s]
 75%|███████▌  | 14061/18660 [06:46<02:12, 34.74it/s]
 75%|███████▌  | 14065/18660 [06:46<02:10, 35.22it/s]
 75%|███████▌  | 14069/18660 [06:46<02:09, 35.56it/s]
 75%|███████▌  | 14073/18660 [06:46<02:07, 36.09it/s]
 75%|███████▌  | 14077/18660 [06:46<02:06, 36.13it/s]
 75%|███████▌  | 14081/18660 [06:46<02:06, 36.20it/s]
 75%|███████▌  | 14085/18660 [06:46<02:05, 36.43it/s]
 76%|███████▌  | 14089/18660 [06:46<02:05, 36.50it/s]
 76%|███████▌  | 14093/18660 [06:46<02:07, 35.85it/s]
 76%|███████▌  | 14097/18660 [06:46<02:04, 36.68it/s]
 76%|███████▌  | 14101/18660 [06:47<02:02, 37.11it/s]
 76%|███████▌  | 14105/18660 [06:47<02:02, 37.16it/s]
 76%|███████▌  | 14109/18660 [06:47<02:02, 37

[2m[36m(_objective pid=46166)[0m {'loss': 0.0946, 'learning_rate': 4.0025389089162486e-06, 'epoch': 2.33}


[2m[36m(_objective pid=46166)[0m  78%|███████▊  | 14507/18660 [06:58<01:55, 35.82it/s]
 78%|███████▊  | 14511/18660 [06:58<01:53, 36.40it/s]
 78%|███████▊  | 14515/18660 [06:58<01:57, 35.25it/s]
 78%|███████▊  | 14519/18660 [06:58<01:56, 35.46it/s]
 78%|███████▊  | 14523/18660 [06:58<01:54, 36.16it/s]
 78%|███████▊  | 14527/18660 [06:58<01:52, 36.81it/s]
 78%|███████▊  | 14531/18660 [06:58<01:51, 37.19it/s]
 78%|███████▊  | 14536/18660 [06:58<01:48, 38.11it/s]
 78%|███████▊  | 14540/18660 [06:58<01:47, 38.37it/s]
 78%|███████▊  | 14544/18660 [06:59<01:50, 37.40it/s]
 78%|███████▊  | 14548/18660 [06:59<01:48, 37.95it/s]
 78%|███████▊  | 14552/18660 [06:59<01:49, 37.43it/s]
 78%|███████▊  | 14556/18660 [06:59<01:49, 37.57it/s]
 78%|███████▊  | 14560/18660 [06:59<01:48, 37.90it/s]
 78%|███████▊  | 14564/18660 [06:59<01:47, 38.24it/s]
 78%|███████▊  | 14568/18660 [06:59<01:50, 36.88it/s]
 78%|███████▊  | 14572/18660 [06:59<01:50, 37.01it/s]
 78%|███████▊  | 14576/18660 [06:59<01:51, 36

[2m[36m(_objective pid=46166)[0m {'loss': 0.075, 'learning_rate': 3.5214645208253533e-06, 'epoch': 2.41}


[2m[36m(_objective pid=46166)[0m  80%|████████  | 15008/18660 [07:12<01:37, 37.39it/s]
 80%|████████  | 15012/18660 [07:12<01:37, 37.55it/s]
 80%|████████  | 15016/18660 [07:12<01:36, 37.92it/s]
 80%|████████  | 15020/18660 [07:12<01:37, 37.41it/s]
 81%|████████  | 15024/18660 [07:12<01:36, 37.78it/s]
 81%|████████  | 15028/18660 [07:12<01:35, 38.04it/s]
 81%|████████  | 15032/18660 [07:12<01:34, 38.37it/s]
 81%|████████  | 15036/18660 [07:12<01:35, 38.04it/s]
 81%|████████  | 15040/18660 [07:12<01:34, 38.32it/s]
 81%|████████  | 15044/18660 [07:12<01:36, 37.37it/s]
 81%|████████  | 15048/18660 [07:13<01:38, 36.85it/s]
 81%|████████  | 15052/18660 [07:13<01:36, 37.27it/s]
 81%|████████  | 15056/18660 [07:13<01:34, 37.95it/s]
 81%|████████  | 15060/18660 [07:13<01:37, 36.77it/s]
 81%|████████  | 15064/18660 [07:13<01:37, 37.07it/s]
 81%|████████  | 15068/18660 [07:13<01:42, 34.96it/s]
 81%|████████  | 15072/18660 [07:13<01:39, 36.10it/s]
 81%|████████  | 15076/18660 [07:13<01:42, 34

Trial status: 17 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2023-09-11 15:21:59. Total running time: 2hr 1min 37s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00017   RUNNING          1.79537e-05                    3                 

[2m[36m(_objective pid=46166)[0m  81%|████████  | 15132/18660 [07:15<01:35, 37.06it/s]
 81%|████████  | 15136/18660 [07:15<01:36, 36.66it/s]
 81%|████████  | 15140/18660 [07:15<01:37, 36.06it/s]
 81%|████████  | 15144/18660 [07:15<01:39, 35.47it/s]
 81%|████████  | 15148/18660 [07:15<01:38, 35.50it/s]
 81%|████████  | 15152/18660 [07:15<01:38, 35.73it/s]
 81%|████████  | 15156/18660 [07:16<01:39, 35.33it/s]
 81%|████████  | 15160/18660 [07:16<01:37, 35.74it/s]
 81%|████████▏ | 15164/18660 [07:16<01:37, 35.72it/s]
 81%|████████▏ | 15168/18660 [07:16<01:38, 35.60it/s]
 81%|████████▏ | 15172/18660 [07:16<01:40, 34.73it/s]
 81%|████████▏ | 15176/18660 [07:16<01:38, 35.46it/s]
 81%|████████▏ | 15180/18660 [07:16<01:36, 36.19it/s]
 81%|████████▏ | 15184/18660 [07:16<01:39, 35.04it/s]
 81%|████████▏ | 15188/18660 [07:16<01:36, 35.94it/s]
 81%|████████▏ | 15192/18660 [07:17<01:34, 36.52it/s]
 81%|████████▏ | 15196/18660 [07:17<01:33, 37.22it/s]
 81%|████████▏ | 15200/18660 [07:17<01:35, 36

[2m[36m(_objective pid=46166)[0m {'loss': 0.0735, 'learning_rate': 3.040390132734458e-06, 'epoch': 2.49}


[2m[36m(_objective pid=46166)[0m  83%|████████▎ | 15504/18660 [07:25<01:26, 36.43it/s]
 83%|████████▎ | 15508/18660 [07:25<01:26, 36.41it/s]
 83%|████████▎ | 15512/18660 [07:25<01:27, 35.81it/s]
 83%|████████▎ | 15516/18660 [07:26<01:28, 35.58it/s]
 83%|████████▎ | 15520/18660 [07:26<01:26, 36.45it/s]
 83%|████████▎ | 15524/18660 [07:26<01:26, 36.19it/s]
 83%|████████▎ | 15528/18660 [07:26<01:25, 36.71it/s]
 83%|████████▎ | 15532/18660 [07:26<01:28, 35.43it/s]
 83%|████████▎ | 15536/18660 [07:26<01:26, 36.17it/s]
 83%|████████▎ | 15540/18660 [07:26<01:25, 36.37it/s]
 83%|████████▎ | 15544/18660 [07:26<01:26, 35.82it/s]
 83%|████████▎ | 15548/18660 [07:26<01:25, 36.23it/s]
 83%|████████▎ | 15552/18660 [07:27<01:23, 37.13it/s]
 83%|████████▎ | 15556/18660 [07:27<01:23, 37.35it/s]
 83%|████████▎ | 15560/18660 [07:27<01:22, 37.45it/s]
 83%|████████▎ | 15564/18660 [07:27<01:22, 37.35it/s]
 83%|████████▎ | 15568/18660 [07:27<01:23, 37.15it/s]
 83%|████████▎ | 15572/18660 [07:27<01:22, 37

[2m[36m(_objective pid=46166)[0m {'loss': 0.096, 'learning_rate': 2.5593157446435627e-06, 'epoch': 2.57}


[2m[36m(_objective pid=46166)[0m  86%|████████▌ | 16009/18660 [07:39<01:14, 35.47it/s]
 86%|████████▌ | 16013/18660 [07:39<01:14, 35.62it/s]
 86%|████████▌ | 16017/18660 [07:40<01:12, 36.45it/s]
 86%|████████▌ | 16021/18660 [07:40<01:12, 36.46it/s]
 86%|████████▌ | 16025/18660 [07:40<01:15, 34.83it/s]
 86%|████████▌ | 16029/18660 [07:40<01:16, 34.51it/s]
 86%|████████▌ | 16033/18660 [07:40<01:14, 35.28it/s]
 86%|████████▌ | 16037/18660 [07:40<01:12, 36.33it/s]
 86%|████████▌ | 16041/18660 [07:40<01:10, 37.12it/s]
 86%|████████▌ | 16045/18660 [07:40<01:13, 35.41it/s]
 86%|████████▌ | 16049/18660 [07:40<01:11, 36.34it/s]
 86%|████████▌ | 16053/18660 [07:41<01:10, 36.89it/s]
 86%|████████▌ | 16057/18660 [07:41<01:13, 35.38it/s]
 86%|████████▌ | 16061/18660 [07:41<01:16, 34.16it/s]
 86%|████████▌ | 16065/18660 [07:41<01:14, 35.02it/s]
 86%|████████▌ | 16069/18660 [07:41<01:13, 35.17it/s]
 86%|████████▌ | 16073/18660 [07:41<01:11, 35.93it/s]
 86%|████████▌ | 16077/18660 [07:41<01:12, 35

Trial status: 17 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2023-09-11 15:22:29. Total running time: 2hr 2min 7s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00017   RUNNING          1.79537e-05                    3                  

[2m[36m(_objective pid=46166)[0m  87%|████████▋ | 16218/18660 [07:45<01:06, 36.56it/s]
 87%|████████▋ | 16222/18660 [07:45<01:07, 36.13it/s]
 87%|████████▋ | 16226/18660 [07:45<01:05, 37.02it/s]
 87%|████████▋ | 16230/18660 [07:45<01:04, 37.50it/s]
 87%|████████▋ | 16234/18660 [07:45<01:04, 37.65it/s]
 87%|████████▋ | 16238/18660 [07:46<01:03, 38.24it/s]
 87%|████████▋ | 16243/18660 [07:46<01:02, 38.66it/s]
 87%|████████▋ | 16247/18660 [07:46<01:01, 38.93it/s]
 87%|████████▋ | 16251/18660 [07:46<01:03, 38.12it/s]
 87%|████████▋ | 16255/18660 [07:46<01:04, 37.45it/s]
 87%|████████▋ | 16259/18660 [07:46<01:04, 37.49it/s]
 87%|████████▋ | 16263/18660 [07:46<01:03, 37.71it/s]
 87%|████████▋ | 16267/18660 [07:46<01:02, 38.15it/s]
 87%|████████▋ | 16271/18660 [07:46<01:02, 38.24it/s]
 87%|████████▋ | 16275/18660 [07:47<01:02, 38.40it/s]
 87%|████████▋ | 16279/18660 [07:47<01:01, 38.70it/s]
 87%|████████▋ | 16284/18660 [07:47<01:00, 39.34it/s]
 87%|████████▋ | 16288/18660 [07:47<01:00, 39

[2m[36m(_objective pid=46166)[0m {'loss': 0.1015, 'learning_rate': 2.0782413565526673e-06, 'epoch': 2.65}


[2m[36m(_objective pid=46166)[0m  88%|████████▊ | 16505/18660 [07:53<01:00, 35.50it/s]
 88%|████████▊ | 16509/18660 [07:53<01:00, 35.62it/s]
 88%|████████▊ | 16513/18660 [07:53<00:59, 35.79it/s]
 89%|████████▊ | 16517/18660 [07:53<00:59, 35.91it/s]
 89%|████████▊ | 16521/18660 [07:53<00:59, 36.23it/s]
 89%|████████▊ | 16525/18660 [07:53<00:59, 35.66it/s]
 89%|████████▊ | 16529/18660 [07:53<01:00, 35.28it/s]
 89%|████████▊ | 16533/18660 [07:54<01:00, 35.19it/s]
 89%|████████▊ | 16537/18660 [07:54<01:00, 35.11it/s]
 89%|████████▊ | 16541/18660 [07:54<01:03, 33.49it/s]
 89%|████████▊ | 16545/18660 [07:54<01:02, 34.06it/s]
 89%|████████▊ | 16549/18660 [07:54<01:03, 33.23it/s]
 89%|████████▊ | 16553/18660 [07:54<01:02, 33.51it/s]
 89%|████████▊ | 16557/18660 [07:54<01:01, 33.93it/s]
 89%|████████▉ | 16561/18660 [07:54<01:01, 34.35it/s]
 89%|████████▉ | 16565/18660 [07:55<01:01, 34.34it/s]
 89%|████████▉ | 16569/18660 [07:55<00:59, 35.27it/s]
 89%|████████▉ | 16573/18660 [07:55<00:59, 35

[2m[36m(_objective pid=46166)[0m {'loss': 0.0694, 'learning_rate': 1.597166968461772e-06, 'epoch': 2.73}


[2m[36m(_objective pid=46166)[0m  91%|█████████ | 17009/18660 [08:07<00:47, 35.01it/s]
 91%|█████████ | 17013/18660 [08:07<00:48, 33.77it/s]
 91%|█████████ | 17017/18660 [08:07<00:47, 34.53it/s]
 91%|█████████ | 17021/18660 [08:07<00:46, 35.48it/s]
 91%|█████████ | 17025/18660 [08:07<00:45, 35.74it/s]
 91%|█████████▏| 17029/18660 [08:07<00:45, 35.64it/s]
 91%|█████████▏| 17033/18660 [08:08<00:45, 35.87it/s]
 91%|█████████▏| 17037/18660 [08:08<00:45, 35.62it/s]
 91%|█████████▏| 17041/18660 [08:08<00:45, 35.96it/s]
 91%|█████████▏| 17045/18660 [08:08<00:45, 35.45it/s]
 91%|█████████▏| 17049/18660 [08:08<00:45, 35.04it/s]
 91%|█████████▏| 17053/18660 [08:08<00:45, 35.12it/s]
 91%|█████████▏| 17057/18660 [08:08<00:45, 35.37it/s]
 91%|█████████▏| 17061/18660 [08:08<00:44, 35.94it/s]
 91%|█████████▏| 17065/18660 [08:09<00:43, 36.41it/s]
 91%|█████████▏| 17069/18660 [08:09<00:43, 36.73it/s]
 91%|█████████▏| 17073/18660 [08:09<00:44, 35.94it/s]
 92%|█████████▏| 17077/18660 [08:09<00:43, 36

Trial status: 17 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2023-09-11 15:22:59. Total running time: 2hr 2min 37s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00017   RUNNING          1.79537e-05                    3                 

[2m[36m(_objective pid=46166)[0m  93%|█████████▎| 17301/18660 [08:15<00:36, 37.56it/s]
 93%|█████████▎| 17305/18660 [08:15<00:36, 37.01it/s]
 93%|█████████▎| 17309/18660 [08:15<00:36, 36.86it/s]
 93%|█████████▎| 17313/18660 [08:15<00:36, 36.47it/s]
 93%|█████████▎| 17317/18660 [08:15<00:36, 36.89it/s]
 93%|█████████▎| 17321/18660 [08:16<00:36, 36.95it/s]
 93%|█████████▎| 17325/18660 [08:16<00:36, 37.08it/s]
 93%|█████████▎| 17329/18660 [08:16<00:35, 37.18it/s]
 93%|█████████▎| 17333/18660 [08:16<00:36, 36.69it/s]
 93%|█████████▎| 17337/18660 [08:16<00:35, 37.11it/s]
 93%|█████████▎| 17341/18660 [08:16<00:35, 37.08it/s]
 93%|█████████▎| 17345/18660 [08:16<00:34, 37.78it/s]
 93%|█████████▎| 17349/18660 [08:16<00:34, 37.56it/s]
 93%|█████████▎| 17353/18660 [08:16<00:35, 36.98it/s]
 93%|█████████▎| 17357/18660 [08:17<00:35, 36.50it/s]
 93%|█████████▎| 17361/18660 [08:17<00:36, 35.42it/s]
 93%|█████████▎| 17365/18660 [08:17<00:35, 36.09it/s]
 93%|█████████▎| 17369/18660 [08:17<00:36, 35

[2m[36m(_objective pid=46166)[0m {'loss': 0.1175, 'learning_rate': 1.116092580370877e-06, 'epoch': 2.81}


[2m[36m(_objective pid=46166)[0m  94%|█████████▍| 17506/18660 [08:21<00:32, 35.98it/s]
 94%|█████████▍| 17510/18660 [08:21<00:31, 36.19it/s]
 94%|█████████▍| 17514/18660 [08:21<00:33, 33.84it/s]
 94%|█████████▍| 17518/18660 [08:21<00:32, 34.83it/s]
 94%|█████████▍| 17522/18660 [08:21<00:33, 34.11it/s]
 94%|█████████▍| 17526/18660 [08:21<00:33, 34.35it/s]
 94%|█████████▍| 17530/18660 [08:21<00:32, 34.45it/s]
 94%|█████████▍| 17534/18660 [08:21<00:32, 34.95it/s]
 94%|█████████▍| 17538/18660 [08:22<00:32, 34.38it/s]
 94%|█████████▍| 17542/18660 [08:22<00:32, 34.54it/s]
 94%|█████████▍| 17546/18660 [08:22<00:31, 35.16it/s]
 94%|█████████▍| 17550/18660 [08:22<00:31, 35.04it/s]
 94%|█████████▍| 17554/18660 [08:22<00:30, 36.04it/s]
 94%|█████████▍| 17558/18660 [08:22<00:31, 35.42it/s]
 94%|█████████▍| 17562/18660 [08:22<00:30, 36.35it/s]
 94%|█████████▍| 17566/18660 [08:22<00:29, 36.57it/s]
 94%|█████████▍| 17570/18660 [08:22<00:29, 36.41it/s]
 94%|█████████▍| 17574/18660 [08:23<00:29, 36

[2m[36m(_objective pid=46166)[0m {'loss': 0.1239, 'learning_rate': 6.350181922799816e-07, 'epoch': 2.89}


[2m[36m(_objective pid=46166)[0m  96%|█████████▋| 18003/18660 [08:35<00:17, 38.40it/s]
 97%|█████████▋| 18007/18660 [08:35<00:17, 36.29it/s]
 97%|█████████▋| 18011/18660 [08:35<00:17, 36.78it/s]
 97%|█████████▋| 18015/18660 [08:35<00:17, 37.06it/s]
 97%|█████████▋| 18019/18660 [08:35<00:17, 35.87it/s]
 97%|█████████▋| 18023/18660 [08:35<00:17, 36.69it/s]
 97%|█████████▋| 18027/18660 [08:35<00:16, 37.45it/s]
 97%|█████████▋| 18031/18660 [08:35<00:16, 37.44it/s]
 97%|█████████▋| 18035/18660 [08:35<00:16, 37.50it/s]
 97%|█████████▋| 18039/18660 [08:36<00:16, 37.32it/s]
 97%|█████████▋| 18043/18660 [08:36<00:16, 37.39it/s]
 97%|█████████▋| 18047/18660 [08:36<00:16, 37.68it/s]
 97%|█████████▋| 18051/18660 [08:36<00:16, 38.05it/s]
 97%|█████████▋| 18055/18660 [08:36<00:16, 37.15it/s]
 97%|█████████▋| 18060/18660 [08:36<00:16, 37.45it/s]
 97%|█████████▋| 18064/18660 [08:36<00:15, 38.14it/s]
 97%|█████████▋| 18068/18660 [08:36<00:15, 38.50it/s]
 97%|█████████▋| 18072/18660 [08:36<00:15, 38

Trial status: 17 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2023-09-11 15:23:29. Total running time: 2hr 3min 7s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00017   RUNNING          1.79537e-05                    3                  

[2m[36m(_objective pid=46166)[0m  99%|█████████▊| 18381/18660 [08:45<00:07, 36.81it/s]
 99%|█████████▊| 18385/18660 [08:45<00:07, 36.69it/s]
 99%|█████████▊| 18389/18660 [08:45<00:07, 37.31it/s]
 99%|█████████▊| 18393/18660 [08:45<00:07, 37.27it/s]
 99%|█████████▊| 18397/18660 [08:45<00:07, 37.18it/s]
 99%|█████████▊| 18401/18660 [08:46<00:07, 36.80it/s]
 99%|█████████▊| 18405/18660 [08:46<00:06, 36.66it/s]
 99%|█████████▊| 18409/18660 [08:46<00:06, 36.48it/s]
 99%|█████████▊| 18413/18660 [08:46<00:06, 36.81it/s]
 99%|█████████▊| 18417/18660 [08:46<00:06, 36.65it/s]
 99%|█████████▊| 18421/18660 [08:46<00:06, 37.28it/s]
 99%|█████████▊| 18425/18660 [08:46<00:06, 37.18it/s]
 99%|█████████▉| 18429/18660 [08:46<00:06, 37.20it/s]
 99%|█████████▉| 18433/18660 [08:46<00:06, 37.02it/s]
 99%|█████████▉| 18437/18660 [08:46<00:05, 37.53it/s]
 99%|█████████▉| 18441/18660 [08:47<00:06, 35.86it/s]
 99%|█████████▉| 18445/18660 [08:47<00:05, 36.95it/s]
 99%|█████████▉| 18449/18660 [08:47<00:05, 37

[2m[36m(_objective pid=46166)[0m {'loss': 0.1093, 'learning_rate': 1.5394380418908647e-07, 'epoch': 2.97}


[2m[36m(_objective pid=46166)[0m  99%|█████████▉| 18505/18660 [08:48<00:04, 38.56it/s]
 99%|█████████▉| 18509/18660 [08:48<00:03, 38.56it/s]
 99%|█████████▉| 18513/18660 [08:48<00:03, 38.75it/s]
 99%|█████████▉| 18517/18660 [08:49<00:03, 38.52it/s]
 99%|█████████▉| 18521/18660 [08:49<00:03, 38.59it/s]
 99%|█████████▉| 18525/18660 [08:49<00:03, 38.74it/s]
 99%|█████████▉| 18529/18660 [08:49<00:03, 39.09it/s]
 99%|█████████▉| 18534/18660 [08:49<00:03, 39.41it/s]
 99%|█████████▉| 18538/18660 [08:49<00:03, 39.04it/s]
 99%|█████████▉| 18542/18660 [08:49<00:03, 39.17it/s]
 99%|█████████▉| 18547/18660 [08:49<00:02, 39.64it/s]
 99%|█████████▉| 18551/18660 [08:49<00:02, 38.60it/s]
 99%|█████████▉| 18555/18660 [08:50<00:02, 37.01it/s]
 99%|█████████▉| 18559/18660 [08:50<00:02, 37.76it/s]
 99%|█████████▉| 18563/18660 [08:50<00:02, 38.24it/s]
100%|█████████▉| 18567/18660 [08:50<00:02, 38.11it/s]
100%|█████████▉| 18571/18660 [08:50<00:02, 38.58it/s]
100%|█████████▉| 18575/18660 [08:50<00:02, 38

Trial _objective_f556c_00017 finished iteration 3 at 2023-09-11 15:23:46. Total running time: 2hr 3min 24s
+-------------------------------------------------+
| Trial _objective_f556c_00017 result             |
+-------------------------------------------------+
| time_this_iter_s                        182.047 |
| time_total_s                            544.862 |
| training_iteration                            3 |
| epoch                                         3 |
| eval_loss                               0.25167 |
| eval_runtime                              9.577 |
| eval_samples_per_second                 433.018 |
| eval_steps_per_second                    13.574 |
| objective                               0.25167 |
+-------------------------------------------------+

[2m[36m(_objective pid=46166)[0m {'eval_loss': 0.25167393684387207, 'eval_runtime': 9.577, 'eval_samples_per_second': 433.018, 'eval_steps_per_second': 13.574, 'epoch': 3.0}


[2m[36m(_objective pid=46166)[0m                                                      
[2m[36m(_objective pid=46166)[0m                                                  [A100%|██████████| 18660/18660 [09:02<00:00, 37.38it/s]
[2m[36m(_objective pid=46166)[0m 100%|██████████| 130/130 [00:09<00:00, 12.03it/s][A
[2m[36m(_objective pid=46166)[0m                                                  [A


Trial _objective_f556c_00017 completed after 3 iterations at 2023-09-11 15:23:47. Total running time: 2hr 3min 26s

[2m[36m(_objective pid=46166)[0m {'train_runtime': 543.9647, 'train_samples_per_second': 68.602, 'train_steps_per_second': 34.304, 'train_loss': 0.19519347091707515, 'epoch': 3.0}


[2m[36m(_objective pid=46166)[0m                                                      100%|██████████| 18660/18660 [09:03<00:00, 37.38it/s]100%|██████████| 18660/18660 [09:03<00:00, 34.30it/s]


Trial _objective_f556c_00018 started with configuration:
+-------------------------------------------------+
| Trial _objective_f556c_00018 config             |
+-------------------------------------------------+
| adam_epsilon                                  0 |
| learning_rate                             2e-05 |
| num_train_epochs                              1 |
| per_device_eval_batch_size                   32 |
| per_device_train_batch_size                   4 |
| weight_decay                            0.25893 |
+-------------------------------------------------+



[2m[36m(_objective pid=48557)[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_layer_norm.bias']
[2m[36m(_objective pid=48557)[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
[2m[36m(_objective pid=48557)[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[2m[36m(_objective pid=48557)[0m Some weights of DistilBertForSequenceClassification were not initialized from the model che

Trial status: 18 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-11 15:23:59. Total running time: 2hr 3min 37s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00018   RUNNING          2.43386e-05                    1                 

[2m[36m(_objective pid=48557)[0m   2%|▏         | 47/3110 [00:01<01:35, 32.02it/s]
  2%|▏         | 51/3110 [00:01<01:35, 32.01it/s]
  2%|▏         | 55/3110 [00:01<01:33, 32.76it/s]
  2%|▏         | 59/3110 [00:02<01:43, 29.40it/s]
  2%|▏         | 63/3110 [00:02<01:35, 31.81it/s]
  2%|▏         | 67/3110 [00:02<01:30, 33.77it/s]
  2%|▏         | 71/3110 [00:02<01:25, 35.35it/s]
  2%|▏         | 75/3110 [00:02<01:24, 35.87it/s]
  3%|▎         | 79/3110 [00:02<01:23, 36.51it/s]
  3%|▎         | 83/3110 [00:02<01:21, 37.23it/s]
  3%|▎         | 87/3110 [00:02<01:19, 37.81it/s]
  3%|▎         | 91/3110 [00:02<01:21, 36.99it/s]
  3%|▎         | 95/3110 [00:03<01:34, 31.81it/s]
  3%|▎         | 99/3110 [00:03<01:30, 33.22it/s]
  3%|▎         | 103/3110 [00:03<01:27, 34.47it/s]
  3%|▎         | 107/3110 [00:03<01:25, 35.12it/s]
  4%|▎         | 111/3110 [00:03<01:26, 34.74it/s]
  4%|▎         | 115/3110 [00:03<01:33, 32.19it/s]
  4%|▍         | 119/3110 [00:03<01:29, 33.36it/s]
  4%|▍  

[2m[36m(_objective pid=48557)[0m {'loss': 0.3916, 'learning_rate': 2.0425666257241823e-05, 'epoch': 0.16}


 16%|█▌        | 504/3110 [00:15<01:18, 33.16it/s]
 16%|█▋        | 508/3110 [00:15<01:22, 31.44it/s]
 16%|█▋        | 512/3110 [00:15<01:22, 31.62it/s]
 17%|█▋        | 516/3110 [00:15<01:18, 32.93it/s]
 17%|█▋        | 520/3110 [00:15<01:18, 33.12it/s]
 17%|█▋        | 524/3110 [00:15<01:17, 33.54it/s]
 17%|█▋        | 528/3110 [00:16<01:16, 33.67it/s]
 17%|█▋        | 532/3110 [00:16<01:15, 34.29it/s]
 17%|█▋        | 536/3110 [00:16<01:19, 32.35it/s]
 17%|█▋        | 540/3110 [00:16<01:23, 30.89it/s]
 17%|█▋        | 544/3110 [00:16<01:19, 32.36it/s]
 18%|█▊        | 548/3110 [00:16<01:16, 33.41it/s]
 18%|█▊        | 553/3110 [00:16<01:11, 35.62it/s]
 18%|█▊        | 557/3110 [00:16<01:12, 35.35it/s]
 18%|█▊        | 561/3110 [00:17<01:10, 36.19it/s]
 18%|█▊        | 565/3110 [00:17<01:09, 36.77it/s]
 18%|█▊        | 569/3110 [00:17<01:09, 36.51it/s]
 18%|█▊        | 573/3110 [00:17<01:09, 36.33it/s]
 19%|█▊        | 577/3110 [00:17<01:10, 35.89it/s]
 19%|█▊        | 581/3110 [00:1

[2m[36m(_objective pid=48557)[0m {'loss': 0.3176, 'learning_rate': 1.651270337271274e-05, 'epoch': 0.32}


[2m[36m(_objective pid=48557)[0m  32%|███▏      | 1010/3110 [00:30<00:54, 38.31it/s]
 33%|███▎      | 1014/3110 [00:30<00:54, 38.13it/s]
 33%|███▎      | 1018/3110 [00:30<00:54, 38.38it/s]
 33%|███▎      | 1022/3110 [00:30<00:57, 36.44it/s]
 33%|███▎      | 1026/3110 [00:30<00:56, 37.13it/s]
 33%|███▎      | 1030/3110 [00:30<00:57, 36.46it/s]
 33%|███▎      | 1034/3110 [00:30<00:57, 36.04it/s]
 33%|███▎      | 1038/3110 [00:30<00:55, 37.00it/s]
 34%|███▎      | 1043/3110 [00:30<00:54, 37.89it/s]
 34%|███▎      | 1047/3110 [00:30<00:53, 38.36it/s]
 34%|███▍      | 1051/3110 [00:31<00:57, 36.08it/s]
 34%|███▍      | 1055/3110 [00:31<00:56, 36.63it/s]
 34%|███▍      | 1059/3110 [00:31<00:58, 35.00it/s]
 34%|███▍      | 1063/3110 [00:31<00:57, 35.88it/s]
 34%|███▍      | 1067/3110 [00:31<00:57, 35.49it/s]


Trial status: 18 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-11 15:24:29. Total running time: 2hr 4min 8s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00018   RUNNING          2.43386e-05                    1                  

[2m[36m(_objective pid=48557)[0m  34%|███▍      | 1071/3110 [00:31<00:55, 36.56it/s]
 35%|███▍      | 1075/3110 [00:31<00:54, 37.17it/s]
 35%|███▍      | 1079/3110 [00:31<00:58, 34.60it/s]
 35%|███▍      | 1083/3110 [00:32<01:01, 32.91it/s]
 35%|███▍      | 1087/3110 [00:32<01:00, 33.69it/s]
 35%|███▌      | 1091/3110 [00:32<01:08, 29.47it/s]
 35%|███▌      | 1095/3110 [00:32<01:04, 31.01it/s]
 35%|███▌      | 1099/3110 [00:32<01:02, 32.01it/s]
 35%|███▌      | 1103/3110 [00:32<00:59, 33.76it/s]
 36%|███▌      | 1107/3110 [00:32<01:00, 33.16it/s]
 36%|███▌      | 1111/3110 [00:32<01:02, 32.21it/s]
 36%|███▌      | 1115/3110 [00:33<00:58, 33.99it/s]
 36%|███▌      | 1119/3110 [00:33<01:01, 32.13it/s]
 36%|███▌      | 1123/3110 [00:33<00:59, 33.46it/s]
 36%|███▌      | 1127/3110 [00:33<00:58, 34.04it/s]
 36%|███▋      | 1131/3110 [00:33<00:56, 34.75it/s]
 36%|███▋      | 1135/3110 [00:33<00:55, 35.31it/s]
 37%|███▋      | 1139/3110 [00:33<00:56, 34.73it/s]
 37%|███▋      | 1143/3110 

[2m[36m(_objective pid=48557)[0m {'loss': 0.284, 'learning_rate': 1.2599740488183653e-05, 'epoch': 0.48}


[2m[36m(_objective pid=48557)[0m  48%|████▊     | 1507/3110 [00:44<00:46, 34.49it/s]
 49%|████▊     | 1511/3110 [00:44<00:47, 34.01it/s]
 49%|████▊     | 1515/3110 [00:44<00:45, 35.19it/s]
 49%|████▉     | 1519/3110 [00:44<00:44, 35.70it/s]
 49%|████▉     | 1523/3110 [00:44<00:45, 34.57it/s]
 49%|████▉     | 1527/3110 [00:45<00:45, 35.07it/s]
 49%|████▉     | 1532/3110 [00:45<00:43, 36.60it/s]
 49%|████▉     | 1536/3110 [00:45<00:42, 36.80it/s]
 50%|████▉     | 1540/3110 [00:45<00:42, 36.93it/s]
 50%|████▉     | 1544/3110 [00:45<00:47, 32.98it/s]
 50%|████▉     | 1548/3110 [00:45<00:46, 33.94it/s]
 50%|████▉     | 1552/3110 [00:45<00:45, 34.04it/s]
 50%|█████     | 1556/3110 [00:45<00:51, 30.22it/s]
 50%|█████     | 1560/3110 [00:46<00:52, 29.55it/s]
 50%|█████     | 1564/3110 [00:46<00:49, 31.49it/s]
 50%|█████     | 1568/3110 [00:46<00:48, 31.91it/s]
 51%|█████     | 1572/3110 [00:46<00:47, 32.69it/s]
 51%|█████     | 1576/3110 [00:46<00:45, 33.53it/s]
 51%|█████     | 1580/3110 

[2m[36m(_objective pid=48557)[0m {'loss': 0.2335, 'learning_rate': 8.686777603654567e-06, 'epoch': 0.64}


[2m[36m(_objective pid=48557)[0m  64%|██████▍   | 2005/3110 [00:59<00:29, 36.84it/s]
 65%|██████▍   | 2009/3110 [00:59<00:29, 37.25it/s]
 65%|██████▍   | 2013/3110 [00:59<00:29, 37.41it/s]
 65%|██████▍   | 2017/3110 [00:59<00:29, 37.39it/s]
 65%|██████▍   | 2021/3110 [00:59<00:31, 34.74it/s]
 65%|██████▌   | 2025/3110 [00:59<00:36, 29.76it/s]
 65%|██████▌   | 2029/3110 [00:59<00:33, 32.03it/s]
 65%|██████▌   | 2033/3110 [01:00<00:31, 33.75it/s]
 65%|██████▌   | 2037/3110 [01:00<00:30, 35.39it/s]
 66%|██████▌   | 2041/3110 [01:00<00:29, 35.79it/s]
 66%|██████▌   | 2045/3110 [01:00<00:32, 32.73it/s]
 66%|██████▌   | 2049/3110 [01:00<00:31, 33.39it/s]
 66%|██████▌   | 2053/3110 [01:00<00:33, 31.19it/s]
 66%|██████▌   | 2057/3110 [01:00<00:34, 30.67it/s]
 66%|██████▋   | 2061/3110 [01:00<00:33, 31.47it/s]
 66%|██████▋   | 2065/3110 [01:01<00:32, 31.71it/s]
 67%|██████▋   | 2069/3110 [01:01<00:31, 32.66it/s]
 67%|██████▋   | 2073/3110 [01:01<00:30, 33.59it/s]
 67%|██████▋   | 2077/3110 

Trial status: 18 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-11 15:24:59. Total running time: 2hr 4min 38s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00018   RUNNING          2.43386e-05                    1                 

[2m[36m(_objective pid=48557)[0m  67%|██████▋   | 2081/3110 [01:01<00:28, 35.84it/s]
 67%|██████▋   | 2085/3110 [01:01<00:29, 34.30it/s]
 67%|██████▋   | 2089/3110 [01:01<00:29, 34.65it/s]
 67%|██████▋   | 2093/3110 [01:01<00:31, 31.91it/s]
 67%|██████▋   | 2097/3110 [01:02<00:30, 33.37it/s]
 68%|██████▊   | 2101/3110 [01:02<00:29, 34.61it/s]
 68%|██████▊   | 2105/3110 [01:02<00:29, 33.84it/s]
 68%|██████▊   | 2109/3110 [01:02<00:32, 30.71it/s]
 68%|██████▊   | 2113/3110 [01:02<00:31, 31.96it/s]
 68%|██████▊   | 2117/3110 [01:02<00:29, 33.20it/s]
 68%|██████▊   | 2121/3110 [01:02<00:29, 33.56it/s]
 68%|██████▊   | 2125/3110 [01:02<00:28, 34.43it/s]
 68%|██████▊   | 2129/3110 [01:03<00:31, 31.50it/s]
 69%|██████▊   | 2133/3110 [01:03<00:29, 33.13it/s]
 69%|██████▊   | 2137/3110 [01:03<00:28, 33.99it/s]
 69%|██████▉   | 2141/3110 [01:03<00:27, 35.48it/s]
 69%|██████▉   | 2145/3110 [01:03<00:26, 36.19it/s]
 69%|██████▉   | 2149/3110 [01:03<00:26, 36.80it/s]
 69%|██████▉   | 2153/3110 

[2m[36m(_objective pid=48557)[0m {'loss': 0.2118, 'learning_rate': 4.773814719125483e-06, 'epoch': 0.8}


[2m[36m(_objective pid=48557)[0m  80%|████████  | 2502/3110 [01:13<00:16, 36.89it/s]
 81%|████████  | 2506/3110 [01:14<00:18, 33.42it/s]
 81%|████████  | 2510/3110 [01:14<00:17, 33.68it/s]
 81%|████████  | 2514/3110 [01:14<00:19, 30.81it/s]
 81%|████████  | 2518/3110 [01:14<00:18, 31.97it/s]
 81%|████████  | 2522/3110 [01:14<00:17, 33.36it/s]
 81%|████████  | 2526/3110 [01:14<00:17, 33.63it/s]
 81%|████████▏ | 2530/3110 [01:14<00:17, 32.86it/s]
 81%|████████▏ | 2534/3110 [01:14<00:17, 32.55it/s]
 82%|████████▏ | 2538/3110 [01:15<00:16, 34.23it/s]
 82%|████████▏ | 2542/3110 [01:15<00:15, 35.54it/s]
 82%|████████▏ | 2546/3110 [01:15<00:15, 36.33it/s]
 82%|████████▏ | 2550/3110 [01:15<00:15, 36.08it/s]
 82%|████████▏ | 2554/3110 [01:15<00:15, 36.39it/s]
 82%|████████▏ | 2558/3110 [01:15<00:14, 37.07it/s]
 82%|████████▏ | 2562/3110 [01:15<00:14, 36.83it/s]
 83%|████████▎ | 2566/3110 [01:15<00:16, 33.65it/s]
 83%|████████▎ | 2570/3110 [01:15<00:15, 34.47it/s]
 83%|████████▎ | 2574/3110 

[2m[36m(_objective pid=48557)[0m {'loss': 0.2258, 'learning_rate': 8.608518345963986e-07, 'epoch': 0.96}


[2m[36m(_objective pid=48557)[0m  97%|█████████▋| 3010/3110 [01:29<00:03, 31.29it/s]
 97%|█████████▋| 3014/3110 [01:29<00:02, 32.62it/s]
 97%|█████████▋| 3018/3110 [01:29<00:02, 33.90it/s]
 97%|█████████▋| 3022/3110 [01:29<00:02, 34.71it/s]
 97%|█████████▋| 3026/3110 [01:29<00:02, 35.84it/s]
 97%|█████████▋| 3030/3110 [01:29<00:02, 32.16it/s]
 98%|█████████▊| 3034/3110 [01:29<00:02, 33.49it/s]
 98%|█████████▊| 3038/3110 [01:29<00:02, 34.10it/s]
 98%|█████████▊| 3042/3110 [01:30<00:02, 33.60it/s]
 98%|█████████▊| 3046/3110 [01:30<00:01, 34.76it/s]
 98%|█████████▊| 3050/3110 [01:30<00:01, 35.25it/s]
 98%|█████████▊| 3054/3110 [01:30<00:01, 34.83it/s]
 98%|█████████▊| 3058/3110 [01:30<00:01, 35.75it/s]
 98%|█████████▊| 3062/3110 [01:30<00:01, 35.63it/s]
 99%|█████████▊| 3066/3110 [01:30<00:01, 35.81it/s]
 99%|█████████▊| 3070/3110 [01:30<00:01, 35.58it/s]
 99%|█████████▉| 3074/3110 [01:30<00:00, 36.51it/s]
 99%|█████████▉| 3078/3110 [01:31<00:00, 36.94it/s]
 99%|█████████▉| 3082/3110 

Trial status: 18 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-11 15:25:29. Total running time: 2hr 5min 8s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00018   RUNNING          2.43386e-05                    1                  

[2m[36m(_objective pid=48557)[0m  99%|█████████▉| 3094/3110 [01:31<00:00, 31.40it/s]
100%|█████████▉| 3098/3110 [01:31<00:00, 33.14it/s]
100%|█████████▉| 3102/3110 [01:31<00:00, 34.10it/s]
100%|█████████▉| 3106/3110 [01:31<00:00, 33.57it/s]
100%|██████████| 3110/3110 [01:32<00:00, 32.89it/s]
  0%|          | 0/130 [00:00<?, ?it/s][A
[2m[36m(_objective pid=48557)[0m 
  2%|▏         | 2/130 [00:00<00:08, 14.58it/s][A
[2m[36m(_objective pid=48557)[0m 
  4%|▍         | 5/130 [00:00<00:06, 20.70it/s][A
[2m[36m(_objective pid=48557)[0m 
  6%|▌         | 8/130 [00:00<00:05, 21.27it/s][A
[2m[36m(_objective pid=48557)[0m 
  8%|▊         | 11/130 [00:00<00:08, 13.87it/s][A
[2m[36m(_objective pid=48557)[0m 
 10%|█         | 13/130 [00:00<00:08, 13.47it/s][A
[2m[36m(_objective pid=48557)[0m 
 12%|█▏        | 15/130 [00:01<00:09, 12.33it/s][A
[2m[36m(_objective pid=48557)[0m 
 13%|█▎        | 17/130 [00:01<00:09, 11.54it/s][A
[2m[36m(_objective pid=48557)[0m 
 15

Trial _objective_f556c_00018 finished iteration 1 at 2023-09-11 15:25:39. Total running time: 2hr 5min 18s
+-------------------------------------------------+
| Trial _objective_f556c_00018 result             |
+-------------------------------------------------+
| time_this_iter_s                        104.704 |
| time_total_s                            104.704 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.21533 |
| eval_runtime                             9.6125 |
| eval_samples_per_second                 431.417 |
| eval_steps_per_second                    13.524 |
| objective                               0.21533 |
+-------------------------------------------------+

[2m[36m(_objective pid=48557)[0m {'eval_loss': 0.21533232927322388, 'eval_runtime': 9.6125, 'eval_samples_per_second': 431.417, 'eval_steps_per_second': 13.524, 'epoch': 1.0}


[2m[36m(_objective pid=48557)[0m                                                    
[2m[36m(_objective pid=48557)[0m                                                  [A100%|██████████| 3110/3110 [01:41<00:00, 32.89it/s]
[2m[36m(_objective pid=48557)[0m 100%|██████████| 130/130 [00:09<00:00, 11.94it/s][A
[2m[36m(_objective pid=48557)[0m                                                  [A


Trial _objective_f556c_00018 completed after 1 iterations at 2023-09-11 15:25:40. Total running time: 2hr 5min 19s

[2m[36m(_objective pid=48557)[0m {'train_runtime': 103.1446, 'train_samples_per_second': 120.598, 'train_steps_per_second': 30.152, 'train_loss': 0.27514782626529216, 'epoch': 1.0}


[2m[36m(_objective pid=48557)[0m                                                    100%|██████████| 3110/3110 [01:43<00:00, 32.89it/s]100%|██████████| 3110/3110 [01:43<00:00, 30.15it/s]


Trial _objective_f556c_00019 started with configuration:
+-------------------------------------------------+
| Trial _objective_f556c_00019 config             |
+-------------------------------------------------+
| adam_epsilon                                  0 |
| learning_rate                             2e-05 |
| num_train_epochs                              5 |
| per_device_eval_batch_size                   32 |
| per_device_train_batch_size                   2 |
| weight_decay                            0.17739 |
+-------------------------------------------------+



[2m[36m(_objective pid=49074)[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.bias']
[2m[36m(_objective pid=49074)[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
[2m[36m(_objective pid=49074)[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[2m[36m(_objective pid=49074)[0m Some weights of DistilBertForSequenceClassification were not initialized from the model che

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:25:59. Total running time: 2hr 5min 38s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2    

[2m[36m(_objective pid=49074)[0m   1%|          | 333/31100 [00:09<13:26, 38.13it/s]
  1%|          | 338/31100 [00:09<13:13, 38.75it/s]
  1%|          | 343/31100 [00:09<13:05, 39.17it/s]
  1%|          | 347/31100 [00:09<13:07, 39.08it/s]
  1%|          | 351/31100 [00:09<13:04, 39.19it/s]
  1%|          | 355/31100 [00:09<13:05, 39.13it/s]
  1%|          | 359/31100 [00:10<13:19, 38.44it/s]
  1%|          | 363/31100 [00:10<13:15, 38.65it/s]
  1%|          | 367/31100 [00:10<13:13, 38.73it/s]
  1%|          | 371/31100 [00:10<13:25, 38.14it/s]
  1%|          | 375/31100 [00:10<13:25, 38.13it/s]
  1%|          | 379/31100 [00:10<13:23, 38.21it/s]
  1%|          | 383/31100 [00:10<13:27, 38.06it/s]
  1%|          | 387/31100 [00:10<13:28, 38.01it/s]
  1%|▏         | 391/31100 [00:10<14:10, 36.11it/s]
  1%|▏         | 395/31100 [00:11<14:21, 35.64it/s]
  1%|▏         | 399/31100 [00:11<14:06, 36.27it/s]
  1%|▏         | 403/31100 [00:11<13:57, 36.64it/s]
  1%|▏         | 407/31100 

[2m[36m(_objective pid=49074)[0m {'loss': 0.4922, 'learning_rate': 2.2862353132464394e-05, 'epoch': 0.08}


[2m[36m(_objective pid=49074)[0m   2%|▏         | 504/31100 [00:14<14:17, 35.69it/s]
  2%|▏         | 508/31100 [00:14<14:40, 34.74it/s]
  2%|▏         | 512/31100 [00:14<14:50, 34.36it/s]
  2%|▏         | 516/31100 [00:14<14:50, 34.33it/s]
  2%|▏         | 520/31100 [00:14<14:19, 35.60it/s]
  2%|▏         | 524/31100 [00:14<14:04, 36.19it/s]
  2%|▏         | 528/31100 [00:14<13:54, 36.65it/s]
  2%|▏         | 532/31100 [00:14<13:53, 36.66it/s]
  2%|▏         | 536/31100 [00:14<13:57, 36.48it/s]
  2%|▏         | 540/31100 [00:15<14:43, 34.59it/s]
  2%|▏         | 544/31100 [00:15<14:26, 35.27it/s]
  2%|▏         | 548/31100 [00:15<14:17, 35.61it/s]
  2%|▏         | 552/31100 [00:15<14:23, 35.36it/s]
  2%|▏         | 556/31100 [00:15<14:31, 35.06it/s]
  2%|▏         | 560/31100 [00:15<14:16, 35.65it/s]
  2%|▏         | 564/31100 [00:15<14:39, 34.73it/s]
  2%|▏         | 568/31100 [00:15<14:14, 35.72it/s]
  2%|▏         | 572/31100 [00:15<14:10, 35.90it/s]
  2%|▏         | 576/31100 

[2m[36m(_objective pid=49074)[0m {'loss': 0.4734, 'learning_rate': 2.248878527082282e-05, 'epoch': 0.16}


[2m[36m(_objective pid=49074)[0m   3%|▎         | 1000/31100 [00:27<13:12, 37.98it/s]                                                      3%|▎         | 1000/31100 [00:27<13:12, 37.98it/s]
  3%|▎         | 1004/31100 [00:27<13:43, 36.53it/s]
  3%|▎         | 1008/31100 [00:28<13:30, 37.13it/s]
  3%|▎         | 1012/31100 [00:28<13:50, 36.22it/s]
  3%|▎         | 1016/31100 [00:28<13:35, 36.91it/s]
  3%|▎         | 1020/31100 [00:28<13:53, 36.08it/s]
  3%|▎         | 1024/31100 [00:28<13:34, 36.91it/s]
  3%|▎         | 1028/31100 [00:28<13:33, 36.95it/s]
  3%|▎         | 1032/31100 [00:28<13:23, 37.41it/s]
  3%|▎         | 1036/31100 [00:28<13:34, 36.90it/s]
  3%|▎         | 1040/31100 [00:28<13:26, 37.26it/s]
  3%|▎         | 1044/31100 [00:29<13:16, 37.72it/s]
  3%|▎         | 1048/31100 [00:29<13:16, 37.72it/s]
  3%|▎         | 1052/31100 [00:29<13:11, 37.97it/s]
  3%|▎         | 1056/31100 [00:29<13:09, 38.05it/s]
  3%|▎         | 1060/31100 [00:29<13:13, 37.84it/s]
  3%|▎  

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:26:29. Total running time: 2hr 6min 8s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2     

[2m[36m(_objective pid=49074)[0m   5%|▍         | 1425/31100 [00:39<12:49, 38.58it/s]
  5%|▍         | 1429/31100 [00:39<13:01, 37.97it/s]
  5%|▍         | 1433/31100 [00:39<13:16, 37.26it/s]
  5%|▍         | 1437/31100 [00:39<13:43, 36.02it/s]
  5%|▍         | 1441/31100 [00:40<13:22, 36.97it/s]
  5%|▍         | 1445/31100 [00:40<13:14, 37.34it/s]
  5%|▍         | 1449/31100 [00:40<13:02, 37.90it/s]
  5%|▍         | 1453/31100 [00:40<13:20, 37.06it/s]
  5%|▍         | 1457/31100 [00:40<13:16, 37.22it/s]
  5%|▍         | 1461/31100 [00:40<13:42, 36.02it/s]
  5%|▍         | 1465/31100 [00:40<13:21, 36.96it/s]
  5%|▍         | 1469/31100 [00:40<13:21, 36.96it/s]
  5%|▍         | 1473/31100 [00:40<13:08, 37.56it/s]
  5%|▍         | 1478/31100 [00:41<12:45, 38.71it/s]
  5%|▍         | 1482/31100 [00:41<13:02, 37.85it/s]
  5%|▍         | 1486/31100 [00:41<12:55, 38.20it/s]
  5%|▍         | 1490/31100 [00:41<12:47, 38.57it/s]
  5%|▍         | 1494/31100 [00:41<13:20, 37.00it/s]
  5%|▍   

[2m[36m(_objective pid=49074)[0m {'loss': 0.4194, 'learning_rate': 2.2115217409181245e-05, 'epoch': 0.24}


[2m[36m(_objective pid=49074)[0m                                                       5%|▍         | 1500/31100 [00:41<14:24, 34.23it/s]
  5%|▍         | 1502/31100 [00:41<14:21, 34.37it/s]
  5%|▍         | 1506/31100 [00:41<14:05, 35.02it/s]
  5%|▍         | 1510/31100 [00:41<14:07, 34.92it/s]
  5%|▍         | 1514/31100 [00:42<13:47, 35.76it/s]
  5%|▍         | 1518/31100 [00:42<13:37, 36.19it/s]
  5%|▍         | 1522/31100 [00:42<13:21, 36.91it/s]
  5%|▍         | 1526/31100 [00:42<13:07, 37.54it/s]
  5%|▍         | 1530/31100 [00:42<12:56, 38.09it/s]
  5%|▍         | 1534/31100 [00:42<12:55, 38.14it/s]
  5%|▍         | 1538/31100 [00:42<13:02, 37.76it/s]
  5%|▍         | 1542/31100 [00:42<12:59, 37.90it/s]
  5%|▍         | 1546/31100 [00:42<12:56, 38.07it/s]
  5%|▍         | 1550/31100 [00:42<12:50, 38.37it/s]
  5%|▍         | 1554/31100 [00:43<12:53, 38.18it/s]
  5%|▌         | 1558/31100 [00:43<13:31, 36.39it/s]
  5%|▌         | 1562/31100 [00:43<13:20, 36.91it/s]
  5%|▌  

[2m[36m(_objective pid=49074)[0m {'loss': 0.3949, 'learning_rate': 2.1741649547539667e-05, 'epoch': 0.32}


[2m[36m(_objective pid=49074)[0m   6%|▋         | 2000/31100 [00:55<12:46, 37.94it/s]                                                      6%|▋         | 2000/31100 [00:55<12:46, 37.94it/s]
  6%|▋         | 2004/31100 [00:55<12:45, 38.01it/s]
  6%|▋         | 2008/31100 [00:55<12:48, 37.87it/s]
  6%|▋         | 2012/31100 [00:55<13:14, 36.62it/s]
  6%|▋         | 2016/31100 [00:55<13:22, 36.22it/s]
  6%|▋         | 2020/31100 [00:55<13:11, 36.76it/s]
  7%|▋         | 2024/31100 [00:55<12:54, 37.54it/s]
  7%|▋         | 2028/31100 [00:55<13:02, 37.16it/s]
  7%|▋         | 2032/31100 [00:56<12:49, 37.78it/s]
  7%|▋         | 2036/31100 [00:56<12:51, 37.66it/s]
  7%|▋         | 2041/31100 [00:56<12:31, 38.67it/s]
  7%|▋         | 2045/31100 [00:56<12:40, 38.21it/s]
  7%|▋         | 2049/31100 [00:56<12:32, 38.60it/s]
  7%|▋         | 2053/31100 [00:56<12:32, 38.58it/s]
  7%|▋         | 2057/31100 [00:56<12:39, 38.26it/s]
  7%|▋         | 2061/31100 [00:56<13:01, 37.15it/s]
  7%|▋  

[2m[36m(_objective pid=49074)[0m {'loss': 0.3194, 'learning_rate': 2.1368081685898093e-05, 'epoch': 0.4}


[2m[36m(_objective pid=49074)[0m   8%|▊         | 2498/31100 [01:08<12:49, 37.18it/s]                                                      8%|▊         | 2500/31100 [01:08<12:49, 37.18it/s]
  8%|▊         | 2502/31100 [01:08<12:41, 37.54it/s]
  8%|▊         | 2506/31100 [01:09<12:29, 38.15it/s]
  8%|▊         | 2510/31100 [01:09<12:24, 38.40it/s]
  8%|▊         | 2514/31100 [01:09<12:24, 38.41it/s]
  8%|▊         | 2518/31100 [01:09<12:26, 38.29it/s]


Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:26:59. Total running time: 2hr 6min 38s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2    

[2m[36m(_objective pid=49074)[0m   8%|▊         | 2522/31100 [01:09<12:20, 38.58it/s]
  8%|▊         | 2526/31100 [01:09<12:29, 38.14it/s]
  8%|▊         | 2530/31100 [01:09<12:32, 37.96it/s]
  8%|▊         | 2534/31100 [01:09<12:27, 38.21it/s]
  8%|▊         | 2538/31100 [01:09<12:23, 38.42it/s]
  8%|▊         | 2542/31100 [01:10<12:30, 38.07it/s]
  8%|▊         | 2546/31100 [01:10<12:35, 37.81it/s]
  8%|▊         | 2550/31100 [01:10<12:30, 38.05it/s]
  8%|▊         | 2554/31100 [01:10<12:30, 38.04it/s]
  8%|▊         | 2558/31100 [01:10<12:34, 37.82it/s]
  8%|▊         | 2562/31100 [01:10<12:26, 38.25it/s]
  8%|▊         | 2566/31100 [01:10<12:22, 38.41it/s]
  8%|▊         | 2571/31100 [01:10<12:17, 38.69it/s]
  8%|▊         | 2575/31100 [01:10<12:12, 38.93it/s]
  8%|▊         | 2579/31100 [01:10<12:06, 39.24it/s]
  8%|▊         | 2583/31100 [01:11<12:45, 37.24it/s]
  8%|▊         | 2587/31100 [01:11<12:37, 37.65it/s]
  8%|▊         | 2591/31100 [01:11<12:34, 37.77it/s]
  8%|▊   

[2m[36m(_objective pid=49074)[0m {'loss': 0.3346, 'learning_rate': 2.099451382425652e-05, 'epoch': 0.48}


[2m[36m(_objective pid=49074)[0m  10%|▉         | 3000/31100 [01:22<12:55, 36.21it/s]                                                     10%|▉         | 3000/31100 [01:22<12:55, 36.21it/s]
 10%|▉         | 3004/31100 [01:22<13:10, 35.56it/s]
 10%|▉         | 3008/31100 [01:22<13:02, 35.89it/s]
 10%|▉         | 3012/31100 [01:22<12:49, 36.50it/s]
 10%|▉         | 3016/31100 [01:22<13:00, 35.98it/s]
 10%|▉         | 3020/31100 [01:22<13:04, 35.78it/s]
 10%|▉         | 3024/31100 [01:23<13:01, 35.94it/s]
 10%|▉         | 3028/31100 [01:23<13:16, 35.23it/s]
 10%|▉         | 3032/31100 [01:23<13:22, 34.98it/s]
 10%|▉         | 3036/31100 [01:23<13:27, 34.77it/s]
 10%|▉         | 3040/31100 [01:23<13:21, 35.02it/s]
 10%|▉         | 3044/31100 [01:23<13:25, 34.83it/s]
 10%|▉         | 3048/31100 [01:23<13:24, 34.88it/s]
 10%|▉         | 3052/31100 [01:23<13:09, 35.52it/s]
 10%|▉         | 3056/31100 [01:23<12:57, 36.08it/s]
 10%|▉         | 3060/31100 [01:24<12:47, 36.53it/s]
 10%|▉  

[2m[36m(_objective pid=49074)[0m {'loss': 0.3106, 'learning_rate': 2.0620945962614944e-05, 'epoch': 0.56}


[2m[36m(_objective pid=49074)[0m  11%|█▏        | 3500/31100 [01:36<12:49, 35.85it/s]                                                     11%|█▏        | 3500/31100 [01:36<12:49, 35.85it/s]
 11%|█▏        | 3504/31100 [01:36<12:39, 36.35it/s]
 11%|█▏        | 3508/31100 [01:36<12:31, 36.69it/s]
 11%|█▏        | 3512/31100 [01:36<12:38, 36.39it/s]
 11%|█▏        | 3516/31100 [01:36<12:57, 35.48it/s]
 11%|█▏        | 3520/31100 [01:36<12:53, 35.65it/s]
 11%|█▏        | 3524/31100 [01:36<12:50, 35.81it/s]
 11%|█▏        | 3528/31100 [01:36<12:52, 35.70it/s]
 11%|█▏        | 3532/31100 [01:36<12:55, 35.54it/s]
 11%|█▏        | 3536/31100 [01:37<12:46, 35.94it/s]
 11%|█▏        | 3540/31100 [01:37<12:59, 35.37it/s]
 11%|█▏        | 3544/31100 [01:37<12:58, 35.40it/s]
 11%|█▏        | 3548/31100 [01:37<13:04, 35.14it/s]
 11%|█▏        | 3552/31100 [01:37<13:07, 34.96it/s]
 11%|█▏        | 3556/31100 [01:37<13:00, 35.31it/s]
 11%|█▏        | 3560/31100 [01:37<12:48, 35.82it/s]
 11%|█▏ 

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:27:29. Total running time: 2hr 7min 8s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2     

[2m[36m(_objective pid=49074)[0m  12%|█▏        | 3628/31100 [01:39<13:14, 34.56it/s]
 12%|█▏        | 3632/31100 [01:39<13:14, 34.58it/s]
 12%|█▏        | 3636/31100 [01:39<13:07, 34.86it/s]
 12%|█▏        | 3640/31100 [01:40<12:48, 35.71it/s]
 12%|█▏        | 3644/31100 [01:40<12:35, 36.34it/s]
 12%|█▏        | 3648/31100 [01:40<12:33, 36.41it/s]
 12%|█▏        | 3652/31100 [01:40<12:19, 37.10it/s]
 12%|█▏        | 3656/31100 [01:40<12:25, 36.82it/s]
 12%|█▏        | 3660/31100 [01:40<12:13, 37.43it/s]
 12%|█▏        | 3664/31100 [01:40<12:04, 37.85it/s]
 12%|█▏        | 3668/31100 [01:40<12:11, 37.48it/s]
 12%|█▏        | 3672/31100 [01:40<12:04, 37.84it/s]
 12%|█▏        | 3676/31100 [01:41<12:17, 37.18it/s]
 12%|█▏        | 3680/31100 [01:41<12:21, 36.98it/s]
 12%|█▏        | 3684/31100 [01:41<12:13, 37.40it/s]
 12%|█▏        | 3688/31100 [01:41<11:59, 38.11it/s]
 12%|█▏        | 3693/31100 [01:41<11:42, 38.99it/s]
 12%|█▏        | 3697/31100 [01:41<11:57, 38.18it/s]
 12%|█▏  

[2m[36m(_objective pid=49074)[0m {'loss': 0.2784, 'learning_rate': 2.0247378100973366e-05, 'epoch': 0.64}


[2m[36m(_objective pid=49074)[0m  13%|█▎        | 3999/31100 [01:49<11:35, 38.97it/s]                                                     13%|█▎        | 4000/31100 [01:49<11:35, 38.97it/s]
 13%|█▎        | 4003/31100 [01:49<11:38, 38.77it/s]
 13%|█▎        | 4008/31100 [01:49<11:30, 39.24it/s]
 13%|█▎        | 4012/31100 [01:49<11:47, 38.27it/s]
 13%|█▎        | 4016/31100 [01:50<11:47, 38.27it/s]
 13%|█▎        | 4020/31100 [01:50<11:40, 38.64it/s]
 13%|█▎        | 4024/31100 [01:50<11:35, 38.95it/s]
 13%|█▎        | 4028/31100 [01:50<11:46, 38.34it/s]
 13%|█▎        | 4032/31100 [01:50<11:41, 38.61it/s]
 13%|█▎        | 4036/31100 [01:50<12:02, 37.47it/s]
 13%|█▎        | 4041/31100 [01:50<11:43, 38.44it/s]
 13%|█▎        | 4045/31100 [01:50<11:54, 37.87it/s]
 13%|█▎        | 4049/31100 [01:50<12:15, 36.80it/s]
 13%|█▎        | 4054/31100 [01:51<11:57, 37.71it/s]
 13%|█▎        | 4058/31100 [01:51<11:55, 37.82it/s]
 13%|█▎        | 4062/31100 [01:51<12:03, 37.37it/s]
 13%|█▎ 

[2m[36m(_objective pid=49074)[0m {'loss': 0.2705, 'learning_rate': 1.9873810239331792e-05, 'epoch': 0.72}


[2m[36m(_objective pid=49074)[0m  14%|█▍        | 4507/31100 [02:03<11:47, 37.59it/s] 15%|█▍        | 4511/31100 [02:03<11:39, 38.03it/s]
 15%|█▍        | 4515/31100 [02:03<11:32, 38.39it/s]
 15%|█▍        | 4519/31100 [02:03<11:24, 38.81it/s]
 15%|█▍        | 4523/31100 [02:03<11:24, 38.82it/s]
 15%|█▍        | 4527/31100 [02:03<11:49, 37.44it/s]
 15%|█▍        | 4531/31100 [02:03<12:09, 36.44it/s]
 15%|█▍        | 4535/31100 [02:03<11:51, 37.34it/s]
 15%|█▍        | 4540/31100 [02:04<11:33, 38.32it/s]
 15%|█▍        | 4544/31100 [02:04<11:30, 38.45it/s]
 15%|█▍        | 4548/31100 [02:04<11:24, 38.78it/s]
 15%|█▍        | 4553/31100 [02:04<11:16, 39.23it/s]
 15%|█▍        | 4557/31100 [02:04<11:14, 39.33it/s]
 15%|█▍        | 4561/31100 [02:04<11:16, 39.22it/s]
 15%|█▍        | 4565/31100 [02:04<11:49, 37.38it/s]
 15%|█▍        | 4569/31100 [02:04<11:42, 37.77it/s]
 15%|█▍        | 4574/31100 [02:04<11:26, 38.62it/s]
 15%|█▍        | 4578/31100 [02:05<11:22, 38.88it/s]
 15%|█▍  

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:27:59. Total running time: 2hr 7min 38s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2    

[2m[36m(_objective pid=49074)[0m  15%|█▌        | 4754/31100 [02:09<12:33, 34.96it/s]
 15%|█▌        | 4758/31100 [02:09<12:34, 34.89it/s]
 15%|█▌        | 4762/31100 [02:09<13:00, 33.76it/s]
 15%|█▌        | 4766/31100 [02:10<12:30, 35.08it/s]
 15%|█▌        | 4770/31100 [02:10<12:15, 35.78it/s]
 15%|█▌        | 4774/31100 [02:10<12:18, 35.67it/s]
 15%|█▌        | 4778/31100 [02:10<12:18, 35.64it/s]
 15%|█▌        | 4782/31100 [02:10<12:10, 36.02it/s]
 15%|█▌        | 4786/31100 [02:10<11:55, 36.80it/s]
 15%|█▌        | 4791/31100 [02:10<11:41, 37.52it/s]
 15%|█▌        | 4795/31100 [02:10<11:45, 37.26it/s]
 15%|█▌        | 4799/31100 [02:10<11:49, 37.07it/s]
 15%|█▌        | 4803/31100 [02:11<11:38, 37.67it/s]
 15%|█▌        | 4807/31100 [02:11<11:54, 36.79it/s]
 15%|█▌        | 4811/31100 [02:11<11:48, 37.08it/s]
 15%|█▌        | 4815/31100 [02:11<11:43, 37.37it/s]
 15%|█▌        | 4819/31100 [02:11<11:32, 37.95it/s]
 16%|█▌        | 4823/31100 [02:11<12:09, 36.01it/s]
 16%|█▌  

[2m[36m(_objective pid=49074)[0m {'loss': 0.2222, 'learning_rate': 1.9500242377690217e-05, 'epoch': 0.8}


[2m[36m(_objective pid=49074)[0m  16%|█▌        | 5005/31100 [02:16<11:04, 39.26it/s]
 16%|█▌        | 5009/31100 [02:16<11:05, 39.20it/s]
 16%|█▌        | 5013/31100 [02:16<11:42, 37.14it/s]
 16%|█▌        | 5017/31100 [02:16<11:41, 37.21it/s]
 16%|█▌        | 5021/31100 [02:16<11:34, 37.58it/s]
 16%|█▌        | 5025/31100 [02:16<11:25, 38.03it/s]
 16%|█▌        | 5029/31100 [02:17<11:49, 36.72it/s]
 16%|█▌        | 5033/31100 [02:17<11:37, 37.37it/s]
 16%|█▌        | 5037/31100 [02:17<11:25, 38.03it/s]
 16%|█▌        | 5041/31100 [02:17<11:24, 38.09it/s]
 16%|█▌        | 5045/31100 [02:17<11:14, 38.63it/s]
 16%|█▌        | 5049/31100 [02:17<11:20, 38.26it/s]
 16%|█▌        | 5053/31100 [02:17<11:26, 37.95it/s]
 16%|█▋        | 5057/31100 [02:17<11:30, 37.74it/s]
 16%|█▋        | 5061/31100 [02:17<11:39, 37.23it/s]
 16%|█▋        | 5065/31100 [02:18<11:46, 36.85it/s]
 16%|█▋        | 5069/31100 [02:18<11:46, 36.82it/s]
 16%|█▋        | 5073/31100 [02:18<11:38, 37.27it/s]
 16%|█▋  

[2m[36m(_objective pid=49074)[0m {'loss': 0.2699, 'learning_rate': 1.9126674516048643e-05, 'epoch': 0.88}


[2m[36m(_objective pid=49074)[0m  18%|█▊        | 5507/31100 [02:29<11:08, 38.26it/s]
 18%|█▊        | 5511/31100 [02:30<11:15, 37.88it/s]
 18%|█▊        | 5515/31100 [02:30<11:39, 36.59it/s]
 18%|█▊        | 5519/31100 [02:30<11:43, 36.37it/s]
 18%|█▊        | 5523/31100 [02:30<11:31, 36.98it/s]
 18%|█▊        | 5527/31100 [02:30<11:17, 37.77it/s]
 18%|█▊        | 5531/31100 [02:30<11:10, 38.13it/s]
 18%|█▊        | 5535/31100 [02:30<11:25, 37.30it/s]
 18%|█▊        | 5539/31100 [02:30<11:13, 37.95it/s]
 18%|█▊        | 5543/31100 [02:30<11:08, 38.23it/s]
 18%|█▊        | 5547/31100 [02:30<11:11, 38.05it/s]
 18%|█▊        | 5551/31100 [02:31<11:05, 38.39it/s]
 18%|█▊        | 5555/31100 [02:31<11:00, 38.68it/s]
 18%|█▊        | 5559/31100 [02:31<11:34, 36.77it/s]
 18%|█▊        | 5563/31100 [02:31<11:24, 37.33it/s]
 18%|█▊        | 5568/31100 [02:31<11:04, 38.45it/s]
 18%|█▊        | 5572/31100 [02:31<10:57, 38.83it/s]
 18%|█▊        | 5576/31100 [02:31<11:01, 38.58it/s]
 18%|█▊  

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:28:29. Total running time: 2hr 8min 8s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2     

[2m[36m(_objective pid=49074)[0m  19%|█▉        | 5883/31100 [02:39<11:49, 35.53it/s]
 19%|█▉        | 5887/31100 [02:39<12:09, 34.58it/s]
 19%|█▉        | 5891/31100 [02:40<12:06, 34.70it/s]
 19%|█▉        | 5895/31100 [02:40<12:07, 34.63it/s]
 19%|█▉        | 5899/31100 [02:40<11:59, 35.01it/s]
 19%|█▉        | 5903/31100 [02:40<12:00, 34.98it/s]
 19%|█▉        | 5907/31100 [02:40<11:39, 36.00it/s]
 19%|█▉        | 5911/31100 [02:40<11:49, 35.52it/s]
 19%|█▉        | 5915/31100 [02:40<11:59, 35.00it/s]
 19%|█▉        | 5919/31100 [02:40<11:59, 34.98it/s]
 19%|█▉        | 5923/31100 [02:40<11:49, 35.51it/s]
 19%|█▉        | 5927/31100 [02:41<12:05, 34.71it/s]
 19%|█▉        | 5931/31100 [02:41<12:01, 34.86it/s]
 19%|█▉        | 5935/31100 [02:41<12:31, 33.47it/s]
 19%|█▉        | 5939/31100 [02:41<12:05, 34.70it/s]
 19%|█▉        | 5943/31100 [02:41<11:54, 35.22it/s]
 19%|█▉        | 5947/31100 [02:41<12:01, 34.84it/s]
 19%|█▉        | 5951/31100 [02:41<11:56, 35.10it/s]
 19%|█▉  

[2m[36m(_objective pid=49074)[0m {'loss': 0.2758, 'learning_rate': 1.875310665440707e-05, 'epoch': 0.96}


[2m[36m(_objective pid=49074)[0m  19%|█▉        | 5999/31100 [02:43<11:54, 35.13it/s]                                                     19%|█▉        | 6000/31100 [02:43<11:54, 35.13it/s]
 19%|█▉        | 6003/31100 [02:43<11:44, 35.65it/s]
 19%|█▉        | 6007/31100 [02:43<11:25, 36.61it/s]
 19%|█▉        | 6011/31100 [02:43<11:41, 35.75it/s]
 19%|█▉        | 6015/31100 [02:43<11:27, 36.51it/s]
 19%|█▉        | 6019/31100 [02:43<11:31, 36.28it/s]
 19%|█▉        | 6023/31100 [02:43<11:30, 36.30it/s]
 19%|█▉        | 6027/31100 [02:43<11:34, 36.13it/s]
 19%|█▉        | 6031/31100 [02:44<11:19, 36.89it/s]
 19%|█▉        | 6035/31100 [02:44<11:18, 36.92it/s]
 19%|█▉        | 6039/31100 [02:44<11:08, 37.46it/s]
 19%|█▉        | 6043/31100 [02:44<11:16, 37.06it/s]
 19%|█▉        | 6047/31100 [02:44<11:11, 37.33it/s]
 19%|█▉        | 6051/31100 [02:44<11:16, 37.03it/s]
 19%|█▉        | 6055/31100 [02:44<11:08, 37.46it/s]
 19%|█▉        | 6059/31100 [02:44<11:41, 35.71it/s]
 19%|█▉ 

Trial _objective_f556c_00019 finished iteration 1 at 2023-09-11 15:28:48. Total running time: 2hr 8min 27s
+-------------------------------------------------+
| Trial _objective_f556c_00019 result             |
+-------------------------------------------------+
| time_this_iter_s                        181.023 |
| time_total_s                            181.023 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.27468 |
| eval_runtime                             9.5663 |
| eval_samples_per_second                   433.5 |
| eval_steps_per_second                    13.589 |
| objective                               0.27468 |
+-------------------------------------------------+

[2m[36m(_objective pid=49074)[0m {'eval_loss': 0.27468499541282654, 'eval_runtime': 9.5663, 'eval_samples_per_second': 433.5, 'eval_steps_per_second': 13.589, 'epoch': 1.0}


[2m[36m(_objective pid=49074)[0m  20%|██        | 6222/31100 [02:59<5:46:20,  1.20it/s]
 20%|██        | 6226/31100 [03:00<4:06:39,  1.68it/s]
 20%|██        | 6230/31100 [03:00<2:56:46,  2.34it/s]
 20%|██        | 6234/31100 [03:00<2:07:50,  3.24it/s]
 20%|██        | 6238/31100 [03:00<1:34:09,  4.40it/s]
 20%|██        | 6242/31100 [03:00<1:09:37,  5.95it/s]
 20%|██        | 6246/31100 [03:00<52:10,  7.94it/s]  
 20%|██        | 6250/31100 [03:00<39:55, 10.37it/s]
 20%|██        | 6254/31100 [03:00<31:32, 13.13it/s]
 20%|██        | 6258/31100 [03:01<25:54, 15.98it/s]
 20%|██        | 6262/31100 [03:01<21:44, 19.04it/s]
 20%|██        | 6266/31100 [03:01<18:45, 22.06it/s]
 20%|██        | 6270/31100 [03:01<16:38, 24.87it/s]
 20%|██        | 6274/31100 [03:01<15:41, 26.36it/s]
 20%|██        | 6278/31100 [03:01<14:39, 28.22it/s]
 20%|██        | 6282/31100 [03:01<13:49, 29.92it/s]
 20%|██        | 6286/31100 [03:01<13:07, 31.51it/s]
 20%|██        | 6290/31100 [03:01<12:23, 33.35i

[2m[36m(_objective pid=49074)[0m {'loss': 0.2186, 'learning_rate': 1.837953879276549e-05, 'epoch': 1.05}


[2m[36m(_objective pid=49074)[0m                                                      21%|██        | 6500/31100 [03:07<10:57, 37.39it/s] 21%|██        | 6501/31100 [03:07<10:50, 37.80it/s]
 21%|██        | 6505/31100 [03:07<10:45, 38.09it/s]
 21%|██        | 6509/31100 [03:07<10:42, 38.30it/s]
 21%|██        | 6513/31100 [03:07<10:41, 38.35it/s]
 21%|██        | 6517/31100 [03:08<10:46, 38.03it/s]
 21%|██        | 6521/31100 [03:08<10:48, 37.90it/s]
 21%|██        | 6525/31100 [03:08<10:48, 37.90it/s]
 21%|██        | 6529/31100 [03:08<11:00, 37.20it/s]
 21%|██        | 6533/31100 [03:08<10:47, 37.91it/s]
 21%|██        | 6537/31100 [03:08<10:49, 37.80it/s]
 21%|██        | 6541/31100 [03:08<10:43, 38.16it/s]
 21%|██        | 6545/31100 [03:08<10:45, 38.05it/s]
 21%|██        | 6549/31100 [03:08<10:35, 38.61it/s]
 21%|██        | 6553/31100 [03:08<10:37, 38.50it/s]
 21%|██        | 6557/31100 [03:09<10:56, 37.41it/s]
 21%|██        | 6561/31100 [03:09<10:50, 37.72it/s]
 21%|██ 

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:28:59. Total running time: 2hr 8min 38s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2    

[2m[36m(_objective pid=49074)[0m  21%|██        | 6590/31100 [03:09<11:12, 36.42it/s]
 21%|██        | 6594/31100 [03:10<10:58, 37.23it/s]
 21%|██        | 6598/31100 [03:10<10:57, 37.28it/s]
 21%|██        | 6602/31100 [03:10<10:49, 37.73it/s]
 21%|██        | 6606/31100 [03:10<10:40, 38.22it/s]
 21%|██▏       | 6610/31100 [03:10<10:44, 38.01it/s]
 21%|██▏       | 6614/31100 [03:10<10:49, 37.72it/s]
 21%|██▏       | 6618/31100 [03:10<10:47, 37.79it/s]
 21%|██▏       | 6622/31100 [03:10<10:53, 37.47it/s]
 21%|██▏       | 6626/31100 [03:10<10:49, 37.66it/s]
 21%|██▏       | 6630/31100 [03:11<10:45, 37.90it/s]
 21%|██▏       | 6634/31100 [03:11<10:36, 38.45it/s]
 21%|██▏       | 6639/31100 [03:11<10:28, 38.92it/s]
 21%|██▏       | 6643/31100 [03:11<10:32, 38.65it/s]
 21%|██▏       | 6647/31100 [03:11<10:38, 38.31it/s]
 21%|██▏       | 6651/31100 [03:11<10:33, 38.61it/s]
 21%|██▏       | 6655/31100 [03:11<10:42, 38.04it/s]
 21%|██▏       | 6659/31100 [03:11<10:38, 38.27it/s]
 21%|██▏ 

[2m[36m(_objective pid=49074)[0m {'loss': 0.1517, 'learning_rate': 1.800597093112392e-05, 'epoch': 1.13}


[2m[36m(_objective pid=49074)[0m  23%|██▎       | 6999/31100 [03:20<10:53, 36.88it/s]                                                     23%|██▎       | 7000/31100 [03:20<10:53, 36.88it/s]
 23%|██▎       | 7003/31100 [03:21<10:42, 37.52it/s]
 23%|██▎       | 7007/31100 [03:21<10:42, 37.49it/s]
 23%|██▎       | 7011/31100 [03:21<10:34, 37.95it/s]
 23%|██▎       | 7015/31100 [03:21<10:49, 37.07it/s]
 23%|██▎       | 7019/31100 [03:21<10:52, 36.93it/s]
 23%|██▎       | 7023/31100 [03:21<10:58, 36.55it/s]
 23%|██▎       | 7027/31100 [03:21<10:47, 37.19it/s]
 23%|██▎       | 7031/31100 [03:21<10:45, 37.30it/s]
 23%|██▎       | 7035/31100 [03:21<10:35, 37.86it/s]
 23%|██▎       | 7039/31100 [03:22<10:29, 38.25it/s]
 23%|██▎       | 7043/31100 [03:22<10:25, 38.44it/s]
 23%|██▎       | 7047/31100 [03:22<10:52, 36.86it/s]
 23%|██▎       | 7051/31100 [03:22<10:37, 37.74it/s]
 23%|██▎       | 7055/31100 [03:22<10:51, 36.91it/s]
 23%|██▎       | 7059/31100 [03:22<10:37, 37.73it/s]
 23%|██▎

[2m[36m(_objective pid=49074)[0m {'loss': 0.1782, 'learning_rate': 1.7632403069482342e-05, 'epoch': 1.21}


[2m[36m(_objective pid=49074)[0m  24%|██▍       | 7503/31100 [03:34<10:51, 36.22it/s]
 24%|██▍       | 7507/31100 [03:34<10:57, 35.91it/s]
 24%|██▍       | 7511/31100 [03:34<11:02, 35.59it/s]
 24%|██▍       | 7515/31100 [03:34<10:56, 35.95it/s]
 24%|██▍       | 7519/31100 [03:34<10:36, 37.06it/s]
 24%|██▍       | 7523/31100 [03:34<10:41, 36.78it/s]
 24%|██▍       | 7527/31100 [03:34<10:29, 37.46it/s]
 24%|██▍       | 7531/31100 [03:35<10:26, 37.60it/s]
 24%|██▍       | 7535/31100 [03:35<10:20, 37.97it/s]
 24%|██▍       | 7539/31100 [03:35<10:48, 36.33it/s]
 24%|██▍       | 7543/31100 [03:35<10:36, 36.99it/s]
 24%|██▍       | 7547/31100 [03:35<10:29, 37.40it/s]
 24%|██▍       | 7551/31100 [03:35<10:20, 37.94it/s]
 24%|██▍       | 7555/31100 [03:35<10:16, 38.19it/s]
 24%|██▍       | 7559/31100 [03:35<10:11, 38.50it/s]
 24%|██▍       | 7563/31100 [03:35<10:18, 38.07it/s]
 24%|██▍       | 7568/31100 [03:36<10:03, 39.02it/s]
 24%|██▍       | 7572/31100 [03:36<10:02, 39.08it/s]
 24%|██▍ 

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:29:29. Total running time: 2hr 9min 8s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2     

[2m[36m(_objective pid=49074)[0m  25%|██▍       | 7718/31100 [03:39<10:17, 37.85it/s]
 25%|██▍       | 7722/31100 [03:40<10:09, 38.33it/s]
 25%|██▍       | 7726/31100 [03:40<10:10, 38.26it/s]
 25%|██▍       | 7730/31100 [03:40<10:19, 37.74it/s]
 25%|██▍       | 7734/31100 [03:40<10:09, 38.35it/s]
 25%|██▍       | 7738/31100 [03:40<10:33, 36.85it/s]
 25%|██▍       | 7742/31100 [03:40<10:23, 37.45it/s]
 25%|██▍       | 7746/31100 [03:40<10:16, 37.87it/s]
 25%|██▍       | 7750/31100 [03:40<10:11, 38.20it/s]
 25%|██▍       | 7754/31100 [03:40<10:04, 38.64it/s]
 25%|██▍       | 7758/31100 [03:41<10:25, 37.32it/s]
 25%|██▍       | 7762/31100 [03:41<10:20, 37.62it/s]
 25%|██▍       | 7766/31100 [03:41<10:11, 38.15it/s]
 25%|██▍       | 7770/31100 [03:41<10:31, 36.97it/s]
 25%|██▌       | 7775/31100 [03:41<10:13, 38.03it/s]
 25%|██▌       | 7779/31100 [03:41<10:07, 38.37it/s]
 25%|██▌       | 7783/31100 [03:41<10:00, 38.81it/s]
 25%|██▌       | 7787/31100 [03:41<10:03, 38.62it/s]
 25%|██▌ 

[2m[36m(_objective pid=49074)[0m {'loss': 0.1283, 'learning_rate': 1.7258835207840768e-05, 'epoch': 1.29}


[2m[36m(_objective pid=49074)[0m  26%|██▌       | 7997/31100 [03:47<10:23, 37.03it/s]                                                     26%|██▌       | 8000/31100 [03:47<10:23, 37.03it/s]
 26%|██▌       | 8001/31100 [03:47<10:23, 37.04it/s]
 26%|██▌       | 8005/31100 [03:47<11:01, 34.89it/s]
 26%|██▌       | 8009/31100 [03:47<11:29, 33.50it/s]
 26%|██▌       | 8013/31100 [03:47<11:13, 34.27it/s]
 26%|██▌       | 8017/31100 [03:48<11:55, 32.25it/s]
 26%|██▌       | 8021/31100 [03:48<11:24, 33.72it/s]
 26%|██▌       | 8025/31100 [03:48<11:33, 33.28it/s]
 26%|██▌       | 8029/31100 [03:48<11:20, 33.92it/s]
 26%|██▌       | 8033/31100 [03:48<11:01, 34.87it/s]
 26%|██▌       | 8037/31100 [03:48<10:43, 35.86it/s]
 26%|██▌       | 8041/31100 [03:48<10:48, 35.58it/s]
 26%|██▌       | 8045/31100 [03:48<10:54, 35.22it/s]
 26%|██▌       | 8049/31100 [03:48<10:49, 35.48it/s]
 26%|██▌       | 8053/31100 [03:49<10:47, 35.57it/s]
 26%|██▌       | 8057/31100 [03:49<10:51, 35.38it/s]
 26%|██▌

[2m[36m(_objective pid=49074)[0m {'loss': 0.1891, 'learning_rate': 1.688526734619919e-05, 'epoch': 1.37}


[2m[36m(_objective pid=49074)[0m  27%|██▋       | 8505/31100 [04:01<09:51, 38.19it/s]
 27%|██▋       | 8509/31100 [04:01<09:50, 38.24it/s]
 27%|██▋       | 8513/31100 [04:01<09:52, 38.15it/s]
 27%|██▋       | 8517/31100 [04:01<10:02, 37.50it/s]
 27%|██▋       | 8521/31100 [04:01<10:00, 37.62it/s]
 27%|██▋       | 8525/31100 [04:01<09:59, 37.67it/s]
 27%|██▋       | 8529/31100 [04:01<10:06, 37.20it/s]
 27%|██▋       | 8533/31100 [04:01<10:21, 36.32it/s]
 27%|██▋       | 8537/31100 [04:01<10:24, 36.15it/s]
 27%|██▋       | 8541/31100 [04:02<10:20, 36.36it/s]
 27%|██▋       | 8545/31100 [04:02<10:39, 35.29it/s]
 27%|██▋       | 8549/31100 [04:02<10:34, 35.56it/s]
 28%|██▊       | 8553/31100 [04:02<10:25, 36.08it/s]
 28%|██▊       | 8557/31100 [04:02<10:29, 35.80it/s]
 28%|██▊       | 8561/31100 [04:02<10:17, 36.48it/s]
 28%|██▊       | 8565/31100 [04:02<10:39, 35.25it/s]
 28%|██▊       | 8569/31100 [04:02<10:32, 35.64it/s]
 28%|██▊       | 8573/31100 [04:02<10:26, 35.94it/s]
 28%|██▊ 

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:29:59. Total running time: 2hr 9min 38s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2    

[2m[36m(_objective pid=49074)[0m  28%|██▊       | 8834/31100 [04:10<09:50, 37.70it/s]
 28%|██▊       | 8838/31100 [04:10<09:54, 37.47it/s]
 28%|██▊       | 8842/31100 [04:10<10:00, 37.04it/s]
 28%|██▊       | 8846/31100 [04:10<09:59, 37.13it/s]
 28%|██▊       | 8850/31100 [04:10<09:50, 37.67it/s]
 28%|██▊       | 8854/31100 [04:10<09:49, 37.72it/s]
 28%|██▊       | 8858/31100 [04:10<09:42, 38.17it/s]
 28%|██▊       | 8862/31100 [04:10<09:40, 38.29it/s]
 29%|██▊       | 8866/31100 [04:10<09:33, 38.75it/s]
 29%|██▊       | 8870/31100 [04:11<09:32, 38.85it/s]
 29%|██▊       | 8874/31100 [04:11<09:31, 38.87it/s]
 29%|██▊       | 8878/31100 [04:11<09:31, 38.87it/s]
 29%|██▊       | 8882/31100 [04:11<09:35, 38.63it/s]
 29%|██▊       | 8886/31100 [04:11<09:35, 38.59it/s]
 29%|██▊       | 8890/31100 [04:11<09:41, 38.21it/s]
 29%|██▊       | 8894/31100 [04:11<09:34, 38.68it/s]
 29%|██▊       | 8898/31100 [04:11<09:32, 38.75it/s]
 29%|██▊       | 8902/31100 [04:11<09:34, 38.62it/s]
 29%|██▊ 

[2m[36m(_objective pid=49074)[0m {'loss': 0.174, 'learning_rate': 1.651169948455762e-05, 'epoch': 1.45}


[2m[36m(_objective pid=49074)[0m  29%|██▉       | 8998/31100 [04:14<09:19, 39.49it/s]                                                     29%|██▉       | 9000/31100 [04:14<09:19, 39.49it/s]
 29%|██▉       | 9002/31100 [04:14<09:43, 37.89it/s]
 29%|██▉       | 9006/31100 [04:14<09:40, 38.03it/s]
 29%|██▉       | 9011/31100 [04:14<09:30, 38.70it/s]
 29%|██▉       | 9015/31100 [04:14<09:33, 38.54it/s]
 29%|██▉       | 9019/31100 [04:14<09:32, 38.60it/s]
 29%|██▉       | 9023/31100 [04:14<09:31, 38.64it/s]
 29%|██▉       | 9027/31100 [04:15<09:33, 38.52it/s]
 29%|██▉       | 9031/31100 [04:15<09:28, 38.82it/s]
 29%|██▉       | 9035/31100 [04:15<09:36, 38.30it/s]
 29%|██▉       | 9039/31100 [04:15<09:36, 38.28it/s]
 29%|██▉       | 9043/31100 [04:15<09:35, 38.31it/s]
 29%|██▉       | 9047/31100 [04:15<09:28, 38.76it/s]
 29%|██▉       | 9051/31100 [04:15<09:35, 38.30it/s]
 29%|██▉       | 9055/31100 [04:15<09:31, 38.54it/s]
 29%|██▉       | 9059/31100 [04:15<09:32, 38.52it/s]
 29%|██▉

[2m[36m(_objective pid=49074)[0m {'loss': 0.1942, 'learning_rate': 1.6138131622916044e-05, 'epoch': 1.53}


[2m[36m(_objective pid=49074)[0m  31%|███       | 9499/31100 [04:27<09:17, 38.73it/s]                                                     31%|███       | 9500/31100 [04:27<09:17, 38.73it/s]
 31%|███       | 9504/31100 [04:27<09:11, 39.12it/s]
 31%|███       | 9509/31100 [04:27<09:05, 39.55it/s]
 31%|███       | 9514/31100 [04:28<09:02, 39.79it/s]
 31%|███       | 9519/31100 [04:28<08:59, 39.99it/s]
 31%|███       | 9523/31100 [04:28<09:02, 39.74it/s]
 31%|███       | 9527/31100 [04:28<09:06, 39.47it/s]
 31%|███       | 9531/31100 [04:28<09:05, 39.56it/s]
 31%|███       | 9535/31100 [04:28<09:03, 39.66it/s]
 31%|███       | 9539/31100 [04:28<09:34, 37.53it/s]
 31%|███       | 9543/31100 [04:28<09:25, 38.11it/s]
 31%|███       | 9547/31100 [04:28<09:29, 37.85it/s]
 31%|███       | 9551/31100 [04:29<09:30, 37.79it/s]
 31%|███       | 9555/31100 [04:29<09:51, 36.45it/s]
 31%|███       | 9559/31100 [04:29<09:38, 37.23it/s]
 31%|███       | 9563/31100 [04:29<09:31, 37.66it/s]
 31%|███

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:30:29. Total running time: 2hr 10min 8s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2    

[2m[36m(_objective pid=49074)[0m  32%|███▏      | 9959/31100 [04:40<09:19, 37.81it/s]
 32%|███▏      | 9963/31100 [04:40<09:22, 37.60it/s]
 32%|███▏      | 9967/31100 [04:40<09:36, 36.69it/s]
 32%|███▏      | 9971/31100 [04:40<09:24, 37.40it/s]
 32%|███▏      | 9975/31100 [04:40<10:06, 34.84it/s]
 32%|███▏      | 9979/31100 [04:40<09:53, 35.57it/s]
 32%|███▏      | 9983/31100 [04:40<09:35, 36.70it/s]
 32%|███▏      | 9987/31100 [04:40<09:22, 37.54it/s]
 32%|███▏      | 9992/31100 [04:41<09:08, 38.48it/s]
 32%|███▏      | 9996/31100 [04:41<09:09, 38.40it/s]


[2m[36m(_objective pid=49074)[0m {'loss': 0.1787, 'learning_rate': 1.5764563761274467e-05, 'epoch': 1.61}


[2m[36m(_objective pid=49074)[0m                                                      32%|███▏      | 10000/31100 [04:41<09:09, 38.40it/s] 32%|███▏      | 10001/31100 [04:41<08:59, 39.10it/s]
 32%|███▏      | 10006/31100 [04:41<08:56, 39.35it/s]
 32%|███▏      | 10010/31100 [04:41<08:55, 39.39it/s]
 32%|███▏      | 10015/31100 [04:41<08:51, 39.64it/s]
 32%|███▏      | 10019/31100 [04:41<08:59, 39.08it/s]
 32%|███▏      | 10023/31100 [04:41<09:04, 38.69it/s]
 32%|███▏      | 10027/31100 [04:41<09:01, 38.94it/s]
 32%|███▏      | 10031/31100 [04:42<09:11, 38.21it/s]
 32%|███▏      | 10035/31100 [04:42<09:28, 37.03it/s]
 32%|███▏      | 10039/31100 [04:42<09:27, 37.11it/s]
 32%|███▏      | 10043/31100 [04:42<09:46, 35.90it/s]
 32%|███▏      | 10047/31100 [04:42<09:33, 36.71it/s]
 32%|███▏      | 10051/31100 [04:42<09:19, 37.59it/s]
 32%|███▏      | 10055/31100 [04:42<09:35, 36.59it/s]
 32%|███▏      | 10060/31100 [04:42<09:23, 37.32it/s]
 32%|███▏      | 10064/31100 [04:42<09:22, 37

[2m[36m(_objective pid=49074)[0m {'loss': 0.1773, 'learning_rate': 1.5390995899632892e-05, 'epoch': 1.69}


[2m[36m(_objective pid=49074)[0m                                                       34%|███▍      | 10500/31100 [04:54<09:27, 36.30it/s] 34%|███▍      | 10501/31100 [04:54<09:11, 37.37it/s]
 34%|███▍      | 10505/31100 [04:54<09:18, 36.85it/s]
 34%|███▍      | 10509/31100 [04:54<09:08, 37.52it/s]
 34%|███▍      | 10513/31100 [04:54<09:16, 37.02it/s]
 34%|███▍      | 10517/31100 [04:55<09:32, 35.97it/s]
 34%|███▍      | 10521/31100 [04:55<09:33, 35.87it/s]
 34%|███▍      | 10525/31100 [04:55<09:22, 36.59it/s]
 34%|███▍      | 10529/31100 [04:55<09:33, 35.85it/s]
 34%|███▍      | 10533/31100 [04:55<09:31, 35.96it/s]
 34%|███▍      | 10537/31100 [04:55<09:23, 36.51it/s]
 34%|███▍      | 10541/31100 [04:55<09:11, 37.27it/s]
 34%|███▍      | 10545/31100 [04:55<09:17, 36.85it/s]
 34%|███▍      | 10550/31100 [04:55<09:01, 37.95it/s]
 34%|███▍      | 10554/31100 [04:56<08:54, 38.43it/s]
 34%|███▍      | 10558/31100 [04:56<08:54, 38.43it/s]
 34%|███▍      | 10563/31100 [04:56<08:46, 3

[2m[36m(_objective pid=49074)[0m {'loss': 0.1919, 'learning_rate': 1.5017428037991316e-05, 'epoch': 1.77}


[2m[36m(_objective pid=49074)[0m  35%|███▌      | 11005/31100 [05:07<09:06, 36.74it/s]
 35%|███▌      | 11009/31100 [05:08<09:00, 37.20it/s]
 35%|███▌      | 11013/31100 [05:08<08:52, 37.74it/s]
 35%|███▌      | 11017/31100 [05:08<08:59, 37.22it/s]
 35%|███▌      | 11021/31100 [05:08<09:03, 36.92it/s]
 35%|███▌      | 11025/31100 [05:08<09:07, 36.65it/s]
 35%|███▌      | 11029/31100 [05:08<08:55, 37.45it/s]
 35%|███▌      | 11033/31100 [05:08<08:55, 37.45it/s]
 35%|███▌      | 11037/31100 [05:08<08:50, 37.85it/s]
 36%|███▌      | 11041/31100 [05:08<09:04, 36.85it/s]
 36%|███▌      | 11045/31100 [05:08<08:52, 37.65it/s]
 36%|███▌      | 11049/31100 [05:09<08:47, 37.98it/s]
 36%|███▌      | 11053/31100 [05:09<08:44, 38.25it/s]
 36%|███▌      | 11057/31100 [05:09<08:45, 38.12it/s]
 36%|███▌      | 11061/31100 [05:09<08:45, 38.10it/s]
 36%|███▌      | 11065/31100 [05:09<08:52, 37.66it/s]
 36%|███▌      | 11069/31100 [05:09<08:58, 37.22it/s]
 36%|███▌      | 11073/31100 [05:09<09:00, 37

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:30:59. Total running time: 2hr 10min 38s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2   

[2m[36m(_objective pid=49074)[0m  36%|███▌      | 11085/31100 [05:10<09:02, 36.88it/s]
 36%|███▌      | 11089/31100 [05:10<09:09, 36.44it/s]
 36%|███▌      | 11093/31100 [05:10<09:00, 37.01it/s]
 36%|███▌      | 11097/31100 [05:10<08:59, 37.05it/s]
 36%|███▌      | 11101/31100 [05:10<09:03, 36.81it/s]
 36%|███▌      | 11105/31100 [05:10<09:06, 36.58it/s]
 36%|███▌      | 11109/31100 [05:10<09:08, 36.44it/s]
 36%|███▌      | 11113/31100 [05:10<09:30, 35.04it/s]
 36%|███▌      | 11117/31100 [05:10<09:14, 36.06it/s]
 36%|███▌      | 11121/31100 [05:11<09:01, 36.86it/s]
 36%|███▌      | 11125/31100 [05:11<09:15, 35.93it/s]
 36%|███▌      | 11129/31100 [05:11<09:18, 35.78it/s]
 36%|███▌      | 11133/31100 [05:11<09:09, 36.31it/s]
 36%|███▌      | 11137/31100 [05:11<09:02, 36.79it/s]
 36%|███▌      | 11141/31100 [05:11<09:01, 36.88it/s]
 36%|███▌      | 11145/31100 [05:11<08:56, 37.22it/s]
 36%|███▌      | 11149/31100 [05:11<08:48, 37.72it/s]
 36%|███▌      | 11153/31100 [05:11<08:40, 38

[2m[36m(_objective pid=49074)[0m {'loss': 0.21, 'learning_rate': 1.4643860176349743e-05, 'epoch': 1.85}


[2m[36m(_objective pid=49074)[0m  37%|███▋      | 11502/31100 [05:21<08:21, 39.08it/s]
 37%|███▋      | 11506/31100 [05:21<08:23, 38.91it/s]
 37%|███▋      | 11510/31100 [05:21<08:28, 38.52it/s]
 37%|███▋      | 11514/31100 [05:21<08:34, 38.05it/s]
 37%|███▋      | 11518/31100 [05:21<08:27, 38.57it/s]
 37%|███▋      | 11522/31100 [05:21<08:38, 37.75it/s]
 37%|███▋      | 11526/31100 [05:21<08:55, 36.58it/s]
 37%|███▋      | 11530/31100 [05:21<09:02, 36.05it/s]
 37%|███▋      | 11534/31100 [05:21<08:57, 36.40it/s]
 37%|███▋      | 11538/31100 [05:22<08:50, 36.90it/s]
 37%|███▋      | 11542/31100 [05:22<08:40, 37.55it/s]
 37%|███▋      | 11546/31100 [05:22<08:38, 37.71it/s]
 37%|███▋      | 11551/31100 [05:22<08:24, 38.75it/s]
 37%|███▋      | 11555/31100 [05:22<08:25, 38.66it/s]
 37%|███▋      | 11559/31100 [05:22<08:22, 38.86it/s]
 37%|███▋      | 11563/31100 [05:22<08:21, 38.93it/s]
 37%|███▋      | 11568/31100 [05:22<08:12, 39.68it/s]
 37%|███▋      | 11573/31100 [05:22<08:08, 39

[2m[36m(_objective pid=49074)[0m {'loss': 0.1365, 'learning_rate': 1.4270292314708166e-05, 'epoch': 1.93}


[2m[36m(_objective pid=49074)[0m  39%|███▊      | 11998/31100 [05:34<08:29, 37.50it/s]                                                      39%|███▊      | 12000/31100 [05:34<08:29, 37.50it/s]
 39%|███▊      | 12002/31100 [05:34<08:23, 37.89it/s]
 39%|███▊      | 12006/31100 [05:34<08:19, 38.21it/s]
 39%|███▊      | 12010/31100 [05:34<08:16, 38.48it/s]
 39%|███▊      | 12014/31100 [05:34<08:13, 38.70it/s]
 39%|███▊      | 12018/31100 [05:34<08:09, 38.96it/s]
 39%|███▊      | 12022/31100 [05:35<08:09, 39.00it/s]
 39%|███▊      | 12026/31100 [05:35<08:19, 38.22it/s]
 39%|███▊      | 12030/31100 [05:35<08:15, 38.50it/s]
 39%|███▊      | 12034/31100 [05:35<08:17, 38.33it/s]
 39%|███▊      | 12038/31100 [05:35<08:21, 37.99it/s]
 39%|███▊      | 12042/31100 [05:35<08:23, 37.85it/s]
 39%|███▊      | 12046/31100 [05:35<08:25, 37.69it/s]
 39%|███▊      | 12050/31100 [05:35<08:28, 37.43it/s]
 39%|███▉      | 12054/31100 [05:35<08:32, 37.17it/s]
 39%|███▉      | 12058/31100 [05:35<08:48, 3

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:31:30. Total running time: 2hr 11min 8s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2    

[2m[36m(_objective pid=49074)[0m  39%|███▉      | 12206/31100 [05:40<09:15, 34.03it/s]
 39%|███▉      | 12210/31100 [05:40<09:16, 33.93it/s]
 39%|███▉      | 12214/31100 [05:40<09:06, 34.53it/s]
 39%|███▉      | 12218/31100 [05:40<09:13, 34.10it/s]
 39%|███▉      | 12222/31100 [05:40<09:05, 34.62it/s]
 39%|███▉      | 12226/31100 [05:40<09:24, 33.46it/s]
 39%|███▉      | 12230/31100 [05:40<09:19, 33.75it/s]
 39%|███▉      | 12234/31100 [05:41<09:05, 34.59it/s]
 39%|███▉      | 12238/31100 [05:41<09:04, 34.65it/s]
 39%|███▉      | 12242/31100 [05:41<09:04, 34.63it/s]
 39%|███▉      | 12246/31100 [05:41<09:07, 34.47it/s]
 39%|███▉      | 12250/31100 [05:41<09:05, 34.54it/s]
 39%|███▉      | 12254/31100 [05:41<09:30, 33.04it/s]
 39%|███▉      | 12258/31100 [05:41<09:21, 33.53it/s]
 39%|███▉      | 12262/31100 [05:41<09:10, 34.22it/s]
 39%|███▉      | 12266/31100 [05:41<09:01, 34.75it/s]
 39%|███▉      | 12270/31100 [05:42<09:09, 34.26it/s]
 39%|███▉      | 12274/31100 [05:42<09:11, 34

Trial _objective_f556c_00019 finished iteration 2 at 2023-09-11 15:31:46. Total running time: 2hr 11min 25s
+-------------------------------------------------+
| Trial _objective_f556c_00019 result             |
+-------------------------------------------------+
| time_this_iter_s                        177.898 |
| time_total_s                            358.921 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                               0.20662 |
| eval_runtime                             9.5695 |
| eval_samples_per_second                 433.354 |
| eval_steps_per_second                    13.585 |
| objective                               0.20662 |
+-------------------------------------------------+

[2m[36m(_objective pid=49074)[0m {'eval_loss': 0.20662125945091248, 'eval_runtime': 9.5695, 'eval_samples_per_second': 433.354, 'eval_steps_per_second': 13.585, 'epoch': 2.0}


[2m[36m(_objective pid=49074)[0m  40%|████      | 12442/31100 [05:57<4:22:37,  1.18it/s]
 40%|████      | 12446/31100 [05:57<3:06:32,  1.67it/s]
 40%|████      | 12450/31100 [05:58<2:13:30,  2.33it/s]
 40%|████      | 12454/31100 [05:58<1:36:14,  3.23it/s]
 40%|████      | 12458/31100 [05:58<1:10:04,  4.43it/s]
 40%|████      | 12462/31100 [05:58<51:52,  5.99it/s]  
 40%|████      | 12466/31100 [05:58<39:07,  7.94it/s]
 40%|████      | 12470/31100 [05:58<30:08, 10.30it/s]
 40%|████      | 12474/31100 [05:58<23:58, 12.95it/s]
 40%|████      | 12478/31100 [05:58<20:02, 15.48it/s]
 40%|████      | 12482/31100 [05:59<16:57, 18.29it/s]
 40%|████      | 12486/31100 [05:59<14:57, 20.74it/s]
 40%|████      | 12490/31100 [05:59<13:18, 23.31it/s]
 40%|████      | 12494/31100 [05:59<11:58, 25.88it/s]
 40%|████      | 12498/31100 [05:59<11:17, 27.47it/s]


[2m[36m(_objective pid=49074)[0m {'loss': 0.1977, 'learning_rate': 1.3896724453066593e-05, 'epoch': 2.01}


[2m[36m(_objective pid=49074)[0m                                                       40%|████      | 12500/31100 [05:59<11:17, 27.47it/s]
 40%|████      | 12502/31100 [05:59<10:50, 28.60it/s]
 40%|████      | 12506/31100 [05:59<10:27, 29.62it/s]
 40%|████      | 12510/31100 [05:59<10:06, 30.67it/s]
 40%|████      | 12514/31100 [06:00<09:43, 31.85it/s]
 40%|████      | 12518/31100 [06:00<09:37, 32.18it/s]
 40%|████      | 12522/31100 [06:00<09:23, 32.99it/s]
 40%|████      | 12526/31100 [06:00<09:21, 33.06it/s]
 40%|████      | 12530/31100 [06:00<09:20, 33.11it/s]
 40%|████      | 12534/31100 [06:00<09:02, 34.20it/s]
 40%|████      | 12538/31100 [06:00<09:06, 33.95it/s]
 40%|████      | 12542/31100 [06:00<09:08, 33.83it/s]
 40%|████      | 12546/31100 [06:01<08:59, 34.36it/s]
 40%|████      | 12550/31100 [06:01<09:02, 34.20it/s]
 40%|████      | 12554/31100 [06:01<08:45, 35.32it/s]
 40%|████      | 12558/31100 [06:01<08:46, 35.23it/s]
 40%|████      | 12562/31100 [06:01<08:50, 3

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:32:00. Total running time: 2hr 11min 38s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2   

[2m[36m(_objective pid=49074)[0m  41%|████▏     | 12870/31100 [06:10<08:18, 36.56it/s]
 41%|████▏     | 12874/31100 [06:10<08:19, 36.51it/s]
 41%|████▏     | 12878/31100 [06:10<08:11, 37.09it/s]
 41%|████▏     | 12882/31100 [06:10<08:19, 36.46it/s]
 41%|████▏     | 12886/31100 [06:10<08:29, 35.77it/s]
 41%|████▏     | 12890/31100 [06:10<08:36, 35.25it/s]
 41%|████▏     | 12894/31100 [06:10<08:36, 35.26it/s]
 41%|████▏     | 12898/31100 [06:10<08:30, 35.69it/s]
 41%|████▏     | 12902/31100 [06:11<08:29, 35.70it/s]
 41%|████▏     | 12906/31100 [06:11<08:26, 35.89it/s]
 42%|████▏     | 12910/31100 [06:11<08:23, 36.14it/s]
 42%|████▏     | 12914/31100 [06:11<08:25, 35.99it/s]
 42%|████▏     | 12918/31100 [06:11<08:29, 35.67it/s]
 42%|████▏     | 12922/31100 [06:11<08:39, 35.02it/s]
 42%|████▏     | 12926/31100 [06:11<08:34, 35.31it/s]
 42%|████▏     | 12930/31100 [06:11<08:33, 35.41it/s]
 42%|████▏     | 12934/31100 [06:11<08:27, 35.78it/s]
 42%|████▏     | 12938/31100 [06:12<08:27, 35

[2m[36m(_objective pid=49074)[0m {'loss': 0.1271, 'learning_rate': 1.3523156591425017e-05, 'epoch': 2.09}


 42%|████▏     | 13006/31100 [06:14<08:26, 35.74it/s]
 42%|████▏     | 13010/31100 [06:14<09:00, 33.47it/s]
 42%|████▏     | 13014/31100 [06:14<09:30, 31.69it/s]
 42%|████▏     | 13018/31100 [06:14<09:15, 32.56it/s]
 42%|████▏     | 13022/31100 [06:14<09:04, 33.21it/s]
 42%|████▏     | 13026/31100 [06:14<08:47, 34.23it/s]
 42%|████▏     | 13030/31100 [06:14<09:07, 33.03it/s]
 42%|████▏     | 13034/31100 [06:14<09:10, 32.83it/s]
 42%|████▏     | 13038/31100 [06:15<09:18, 32.33it/s]
 42%|████▏     | 13042/31100 [06:15<09:19, 32.30it/s]
 42%|████▏     | 13046/31100 [06:15<09:21, 32.16it/s]
 42%|████▏     | 13050/31100 [06:15<09:03, 33.18it/s]
 42%|████▏     | 13054/31100 [06:15<08:46, 34.25it/s]
 42%|████▏     | 13058/31100 [06:15<09:07, 32.98it/s]
 42%|████▏     | 13062/31100 [06:15<08:53, 33.83it/s]
 42%|████▏     | 13066/31100 [06:15<08:42, 34.52it/s]
 42%|████▏     | 13070/31100 [06:15<08:31, 35.22it/s]
 42%|████▏     | 13074/31100 [06:16<08:23, 35.84it/s]
 42%|████▏     | 13078/31100

[2m[36m(_objective pid=49074)[0m {'loss': 0.1076, 'learning_rate': 1.3149588729783442e-05, 'epoch': 2.17}


[2m[36m(_objective pid=49074)[0m  43%|████▎     | 13504/31100 [06:27<08:22, 34.99it/s]
 43%|████▎     | 13508/31100 [06:28<08:42, 33.68it/s]
 43%|████▎     | 13512/31100 [06:28<08:41, 33.71it/s]
 43%|████▎     | 13516/31100 [06:28<08:30, 34.46it/s]
 43%|████▎     | 13520/31100 [06:28<08:21, 35.07it/s]
 43%|████▎     | 13524/31100 [06:28<08:08, 36.00it/s]
 43%|████▎     | 13528/31100 [06:28<08:11, 35.78it/s]
 44%|████▎     | 13532/31100 [06:28<08:10, 35.83it/s]
 44%|████▎     | 13536/31100 [06:28<08:05, 36.20it/s]
 44%|████▎     | 13540/31100 [06:28<08:22, 34.98it/s]
 44%|████▎     | 13544/31100 [06:29<08:13, 35.54it/s]
 44%|████▎     | 13548/31100 [06:29<08:42, 33.61it/s]
 44%|████▎     | 13552/31100 [06:29<08:39, 33.80it/s]
 44%|████▎     | 13556/31100 [06:29<08:25, 34.72it/s]
 44%|████▎     | 13560/31100 [06:29<08:13, 35.53it/s]
 44%|████▎     | 13564/31100 [06:29<08:05, 36.11it/s]
 44%|████▎     | 13568/31100 [06:29<08:04, 36.22it/s]
 44%|████▎     | 13572/31100 [06:29<08:00, 36

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:32:30. Total running time: 2hr 12min 8s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2    

[2m[36m(_objective pid=49074)[0m  45%|████▍     | 13932/31100 [06:40<07:55, 36.12it/s]
 45%|████▍     | 13936/31100 [06:40<07:56, 36.05it/s]
 45%|████▍     | 13940/31100 [06:40<08:05, 35.33it/s]
 45%|████▍     | 13944/31100 [06:40<08:10, 34.96it/s]
 45%|████▍     | 13948/31100 [06:40<08:08, 35.12it/s]
 45%|████▍     | 13952/31100 [06:40<08:08, 35.13it/s]
 45%|████▍     | 13956/31100 [06:40<08:02, 35.54it/s]
 45%|████▍     | 13960/31100 [06:40<07:58, 35.82it/s]
 45%|████▍     | 13964/31100 [06:41<07:51, 36.37it/s]
 45%|████▍     | 13968/31100 [06:41<08:04, 35.38it/s]
 45%|████▍     | 13972/31100 [06:41<08:00, 35.63it/s]
 45%|████▍     | 13976/31100 [06:41<08:05, 35.25it/s]
 45%|████▍     | 13980/31100 [06:41<07:57, 35.89it/s]
 45%|████▍     | 13984/31100 [06:41<07:52, 36.20it/s]
 45%|████▍     | 13988/31100 [06:41<07:48, 36.51it/s]
 45%|████▍     | 13992/31100 [06:41<07:44, 36.84it/s]
 45%|████▌     | 13996/31100 [06:41<07:44, 36.86it/s]


[2m[36m(_objective pid=49074)[0m {'loss': 0.0733, 'learning_rate': 1.2776020868141866e-05, 'epoch': 2.25}


[2m[36m(_objective pid=49074)[0m  45%|████▌     | 14000/31100 [06:42<07:48, 36.48it/s]                                                      45%|████▌     | 14000/31100 [06:42<07:48, 36.48it/s]
 45%|████▌     | 14004/31100 [06:42<07:59, 35.64it/s]
 45%|████▌     | 14008/31100 [06:42<08:02, 35.43it/s]
 45%|████▌     | 14012/31100 [06:42<08:02, 35.44it/s]
 45%|████▌     | 14016/31100 [06:42<08:02, 35.40it/s]
 45%|████▌     | 14020/31100 [06:42<07:50, 36.30it/s]
 45%|████▌     | 14024/31100 [06:42<07:42, 36.95it/s]
 45%|████▌     | 14028/31100 [06:42<07:36, 37.36it/s]
 45%|████▌     | 14032/31100 [06:42<07:32, 37.76it/s]
 45%|████▌     | 14036/31100 [06:43<07:32, 37.70it/s]
 45%|████▌     | 14040/31100 [06:43<07:31, 37.80it/s]
 45%|████▌     | 14044/31100 [06:43<07:35, 37.44it/s]
 45%|████▌     | 14048/31100 [06:43<07:42, 36.89it/s]
 45%|████▌     | 14052/31100 [06:43<07:44, 36.73it/s]
 45%|████▌     | 14056/31100 [06:43<07:36, 37.32it/s]
 45%|████▌     | 14060/31100 [06:43<07:58, 3

[2m[36m(_objective pid=49074)[0m {'loss': 0.0662, 'learning_rate': 1.2402453006500292e-05, 'epoch': 2.33}


[2m[36m(_objective pid=49074)[0m  47%|████▋     | 14505/31100 [06:56<07:47, 35.52it/s]
 47%|████▋     | 14509/31100 [06:56<07:40, 36.05it/s]
 47%|████▋     | 14513/31100 [06:56<07:35, 36.42it/s]
 47%|████▋     | 14517/31100 [06:56<07:38, 36.21it/s]
 47%|████▋     | 14521/31100 [06:56<07:31, 36.71it/s]
 47%|████▋     | 14525/31100 [06:56<07:22, 37.42it/s]
 47%|████▋     | 14529/31100 [06:56<07:15, 38.07it/s]
 47%|████▋     | 14534/31100 [06:56<07:07, 38.74it/s]
 47%|████▋     | 14539/31100 [06:56<07:02, 39.21it/s]
 47%|████▋     | 14544/31100 [06:57<06:57, 39.61it/s]
 47%|████▋     | 14548/31100 [06:57<06:57, 39.69it/s]
 47%|████▋     | 14552/31100 [06:57<06:58, 39.51it/s]
 47%|████▋     | 14556/31100 [06:57<06:57, 39.61it/s]
 47%|████▋     | 14560/31100 [06:57<07:05, 38.88it/s]
 47%|████▋     | 14564/31100 [06:57<07:04, 38.95it/s]
 47%|████▋     | 14568/31100 [06:57<07:05, 38.89it/s]
 47%|████▋     | 14572/31100 [06:57<07:11, 38.34it/s]
 47%|████▋     | 14576/31100 [06:57<07:07, 38

[2m[36m(_objective pid=49074)[0m {'loss': 0.0833, 'learning_rate': 1.2028885144858717e-05, 'epoch': 2.41}


[2m[36m(_objective pid=49074)[0m  48%|████▊     | 15004/31100 [07:09<07:10, 37.38it/s]
 48%|████▊     | 15008/31100 [07:09<07:05, 37.79it/s]
 48%|████▊     | 15012/31100 [07:09<07:05, 37.82it/s]
 48%|████▊     | 15016/31100 [07:09<07:09, 37.46it/s]
 48%|████▊     | 15020/31100 [07:10<07:02, 38.02it/s]
 48%|████▊     | 15024/31100 [07:10<07:05, 37.79it/s]


Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:33:00. Total running time: 2hr 12min 39s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2   

[2m[36m(_objective pid=49074)[0m  48%|████▊     | 15028/31100 [07:10<07:07, 37.60it/s]
 48%|████▊     | 15032/31100 [07:10<07:10, 37.37it/s]
 48%|████▊     | 15036/31100 [07:10<07:04, 37.81it/s]
 48%|████▊     | 15040/31100 [07:10<07:06, 37.65it/s]
 48%|████▊     | 15044/31100 [07:10<07:14, 36.96it/s]
 48%|████▊     | 15048/31100 [07:10<07:17, 36.71it/s]
 48%|████▊     | 15052/31100 [07:10<07:13, 36.99it/s]
 48%|████▊     | 15056/31100 [07:11<07:08, 37.40it/s]
 48%|████▊     | 15060/31100 [07:11<07:06, 37.62it/s]
 48%|████▊     | 15064/31100 [07:11<07:04, 37.79it/s]
 48%|████▊     | 15068/31100 [07:11<07:21, 36.30it/s]
 48%|████▊     | 15072/31100 [07:11<07:25, 35.97it/s]
 48%|████▊     | 15076/31100 [07:11<07:41, 34.75it/s]
 48%|████▊     | 15080/31100 [07:11<07:32, 35.37it/s]
 49%|████▊     | 15084/31100 [07:11<07:24, 36.03it/s]
 49%|████▊     | 15088/31100 [07:11<07:17, 36.61it/s]
 49%|████▊     | 15092/31100 [07:12<07:09, 37.27it/s]
 49%|████▊     | 15096/31100 [07:12<07:12, 37

[2m[36m(_objective pid=49074)[0m {'loss': 0.1216, 'learning_rate': 1.1655317283217141e-05, 'epoch': 2.49}


[2m[36m(_objective pid=49074)[0m  50%|████▉     | 15504/31100 [07:23<06:53, 37.76it/s]
 50%|████▉     | 15508/31100 [07:23<06:52, 37.82it/s]
 50%|████▉     | 15512/31100 [07:23<06:59, 37.20it/s]
 50%|████▉     | 15516/31100 [07:23<06:54, 37.57it/s]
 50%|████▉     | 15520/31100 [07:23<06:54, 37.55it/s]
 50%|████▉     | 15524/31100 [07:23<06:50, 37.95it/s]
 50%|████▉     | 15528/31100 [07:23<06:45, 38.39it/s]
 50%|████▉     | 15532/31100 [07:24<07:07, 36.42it/s]
 50%|████▉     | 15536/31100 [07:24<07:08, 36.35it/s]
 50%|████▉     | 15540/31100 [07:24<07:01, 36.95it/s]
 50%|████▉     | 15544/31100 [07:24<07:06, 36.47it/s]
 50%|████▉     | 15548/31100 [07:24<07:08, 36.31it/s]
 50%|█████     | 15552/31100 [07:24<07:05, 36.56it/s]
 50%|█████     | 15556/31100 [07:24<06:59, 37.06it/s]
 50%|█████     | 15560/31100 [07:24<06:56, 37.33it/s]
 50%|█████     | 15564/31100 [07:24<06:54, 37.44it/s]
 50%|█████     | 15568/31100 [07:25<06:51, 37.76it/s]
 50%|█████     | 15572/31100 [07:25<06:59, 37

[2m[36m(_objective pid=49074)[0m {'loss': 0.0985, 'learning_rate': 1.1281749421575567e-05, 'epoch': 2.57}


[2m[36m(_objective pid=49074)[0m  51%|█████▏    | 16007/31100 [07:36<07:18, 34.46it/s]
 51%|█████▏    | 16011/31100 [07:37<07:16, 34.54it/s]
 51%|█████▏    | 16015/31100 [07:37<07:08, 35.19it/s]
 52%|█████▏    | 16019/31100 [07:37<07:08, 35.21it/s]
 52%|█████▏    | 16023/31100 [07:37<07:22, 34.05it/s]
 52%|█████▏    | 16027/31100 [07:37<07:24, 33.92it/s]
 52%|█████▏    | 16031/31100 [07:37<07:12, 34.88it/s]
 52%|█████▏    | 16035/31100 [07:37<07:07, 35.26it/s]
 52%|█████▏    | 16039/31100 [07:37<06:57, 36.06it/s]
 52%|█████▏    | 16043/31100 [07:37<06:48, 36.85it/s]
 52%|█████▏    | 16047/31100 [07:38<07:01, 35.74it/s]
 52%|█████▏    | 16051/31100 [07:38<06:59, 35.85it/s]
 52%|█████▏    | 16055/31100 [07:38<07:13, 34.70it/s]
 52%|█████▏    | 16059/31100 [07:38<07:19, 34.21it/s]
 52%|█████▏    | 16063/31100 [07:38<07:20, 34.16it/s]
 52%|█████▏    | 16067/31100 [07:38<07:16, 34.41it/s]
 52%|█████▏    | 16071/31100 [07:38<06:59, 35.83it/s]
 52%|█████▏    | 16075/31100 [07:38<06:50, 36

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:33:30. Total running time: 2hr 13min 9s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2    

[2m[36m(_objective pid=49074)[0m  52%|█████▏    | 16136/31100 [07:40<06:36, 37.72it/s]
 52%|█████▏    | 16140/31100 [07:40<06:41, 37.26it/s]
 52%|█████▏    | 16144/31100 [07:40<06:47, 36.67it/s]
 52%|█████▏    | 16148/31100 [07:40<06:52, 36.24it/s]
 52%|█████▏    | 16153/31100 [07:40<06:38, 37.47it/s]
 52%|█████▏    | 16157/31100 [07:40<06:38, 37.51it/s]
 52%|█████▏    | 16161/31100 [07:41<06:40, 37.26it/s]
 52%|█████▏    | 16165/31100 [07:41<06:37, 37.59it/s]
 52%|█████▏    | 16169/31100 [07:41<06:37, 37.52it/s]
 52%|█████▏    | 16173/31100 [07:41<06:35, 37.73it/s]
 52%|█████▏    | 16177/31100 [07:41<06:41, 37.21it/s]
 52%|█████▏    | 16181/31100 [07:41<06:39, 37.31it/s]
 52%|█████▏    | 16185/31100 [07:41<06:48, 36.48it/s]
 52%|█████▏    | 16189/31100 [07:41<06:43, 36.95it/s]
 52%|█████▏    | 16193/31100 [07:41<06:38, 37.38it/s]
 52%|█████▏    | 16197/31100 [07:42<06:55, 35.89it/s]
 52%|█████▏    | 16201/31100 [07:42<06:47, 36.55it/s]
 52%|█████▏    | 16205/31100 [07:42<06:38, 37

[2m[36m(_objective pid=49074)[0m {'loss': 0.1025, 'learning_rate': 1.0908181559933992e-05, 'epoch': 2.65}


[2m[36m(_objective pid=49074)[0m  53%|█████▎    | 16503/31100 [07:50<06:38, 36.66it/s]
 53%|█████▎    | 16507/31100 [07:50<06:36, 36.78it/s]
 53%|█████▎    | 16511/31100 [07:50<06:41, 36.33it/s]
 53%|█████▎    | 16515/31100 [07:50<06:32, 37.19it/s]
 53%|█████▎    | 16519/31100 [07:50<06:29, 37.46it/s]
 53%|█████▎    | 16523/31100 [07:50<06:33, 37.03it/s]
 53%|█████▎    | 16527/31100 [07:50<06:37, 36.65it/s]
 53%|█████▎    | 16531/31100 [07:50<06:41, 36.26it/s]
 53%|█████▎    | 16535/31100 [07:51<06:37, 36.60it/s]
 53%|█████▎    | 16539/31100 [07:51<06:32, 37.07it/s]
 53%|█████▎    | 16543/31100 [07:51<06:55, 35.05it/s]
 53%|█████▎    | 16547/31100 [07:51<06:44, 35.95it/s]
 53%|█████▎    | 16551/31100 [07:51<06:44, 35.94it/s]
 53%|█████▎    | 16555/31100 [07:51<06:44, 35.99it/s]
 53%|█████▎    | 16559/31100 [07:51<06:45, 35.83it/s]
 53%|█████▎    | 16563/31100 [07:51<06:38, 36.52it/s]
 53%|█████▎    | 16567/31100 [07:51<06:31, 37.13it/s]
 53%|█████▎    | 16571/31100 [07:52<06:30, 37

[2m[36m(_objective pid=49074)[0m {'loss': 0.1035, 'learning_rate': 1.0534613698292416e-05, 'epoch': 2.73}


[2m[36m(_objective pid=49074)[0m  55%|█████▍    | 17007/31100 [08:03<06:02, 38.89it/s]
 55%|█████▍    | 17011/31100 [08:03<06:01, 39.02it/s]
 55%|█████▍    | 17015/31100 [08:03<05:59, 39.17it/s]
 55%|█████▍    | 17019/31100 [08:04<05:57, 39.37it/s]
 55%|█████▍    | 17024/31100 [08:04<05:55, 39.63it/s]
 55%|█████▍    | 17028/31100 [08:04<05:59, 39.19it/s]
 55%|█████▍    | 17032/31100 [08:04<06:02, 38.78it/s]
 55%|█████▍    | 17036/31100 [08:04<06:08, 38.20it/s]
 55%|█████▍    | 17040/31100 [08:04<06:06, 38.41it/s]
 55%|█████▍    | 17044/31100 [08:04<06:01, 38.84it/s]
 55%|█████▍    | 17048/31100 [08:04<06:14, 37.51it/s]
 55%|█████▍    | 17052/31100 [08:04<06:21, 36.80it/s]
 55%|█████▍    | 17056/31100 [08:04<06:19, 37.01it/s]
 55%|█████▍    | 17060/31100 [08:05<06:14, 37.52it/s]
 55%|█████▍    | 17064/31100 [08:05<06:15, 37.43it/s]
 55%|█████▍    | 17068/31100 [08:05<06:11, 37.81it/s]
 55%|█████▍    | 17072/31100 [08:05<06:09, 37.95it/s]
 55%|█████▍    | 17076/31100 [08:05<06:08, 38

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:34:00. Total running time: 2hr 13min 39s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2   

[2m[36m(_objective pid=49074)[0m  55%|█████▌    | 17257/31100 [08:10<06:32, 35.25it/s]
 56%|█████▌    | 17261/31100 [08:10<06:34, 35.05it/s]
 56%|█████▌    | 17265/31100 [08:10<06:30, 35.38it/s]
 56%|█████▌    | 17269/31100 [08:10<06:39, 34.63it/s]
 56%|█████▌    | 17273/31100 [08:11<06:40, 34.52it/s]
 56%|█████▌    | 17277/31100 [08:11<06:37, 34.74it/s]
 56%|█████▌    | 17281/31100 [08:11<06:25, 35.88it/s]
 56%|█████▌    | 17285/31100 [08:11<06:19, 36.43it/s]
 56%|█████▌    | 17289/31100 [08:11<06:13, 37.02it/s]
 56%|█████▌    | 17293/31100 [08:11<06:09, 37.42it/s]
 56%|█████▌    | 17297/31100 [08:11<06:03, 37.94it/s]
 56%|█████▌    | 17301/31100 [08:11<06:04, 37.90it/s]
 56%|█████▌    | 17305/31100 [08:11<06:04, 37.80it/s]
 56%|█████▌    | 17309/31100 [08:12<06:10, 37.23it/s]
 56%|█████▌    | 17313/31100 [08:12<06:11, 37.11it/s]
 56%|█████▌    | 17317/31100 [08:12<06:17, 36.49it/s]
 56%|█████▌    | 17321/31100 [08:12<06:11, 37.08it/s]
 56%|█████▌    | 17325/31100 [08:12<06:10, 37

[2m[36m(_objective pid=49074)[0m {'loss': 0.117, 'learning_rate': 1.0161045836650842e-05, 'epoch': 2.81}


[2m[36m(_objective pid=49074)[0m  56%|█████▋    | 17503/31100 [08:17<05:55, 38.20it/s]
 56%|█████▋    | 17507/31100 [08:17<06:12, 36.51it/s]
 56%|█████▋    | 17511/31100 [08:17<06:22, 35.48it/s]
 56%|█████▋    | 17515/31100 [08:17<06:11, 36.54it/s]
 56%|█████▋    | 17519/31100 [08:17<06:04, 37.21it/s]
 56%|█████▋    | 17523/31100 [08:17<06:18, 35.90it/s]
 56%|█████▋    | 17528/31100 [08:17<06:01, 37.51it/s]
 56%|█████▋    | 17532/31100 [08:18<06:05, 37.11it/s]
 56%|█████▋    | 17537/31100 [08:18<06:13, 36.30it/s]
 56%|█████▋    | 17541/31100 [08:18<06:07, 36.91it/s]
 56%|█████▋    | 17545/31100 [08:18<06:02, 37.42it/s]
 56%|█████▋    | 17549/31100 [08:18<05:56, 38.03it/s]
 56%|█████▋    | 17553/31100 [08:18<05:54, 38.17it/s]
 56%|█████▋    | 17557/31100 [08:18<06:03, 37.25it/s]
 56%|█████▋    | 17561/31100 [08:18<06:07, 36.84it/s]
 56%|█████▋    | 17565/31100 [08:18<06:03, 37.20it/s]
 56%|█████▋    | 17569/31100 [08:18<05:57, 37.87it/s]
 57%|█████▋    | 17573/31100 [08:19<05:56, 37

[2m[36m(_objective pid=49074)[0m {'loss': 0.1176, 'learning_rate': 9.787477975009266e-06, 'epoch': 2.89}


[2m[36m(_objective pid=49074)[0m  58%|█████▊    | 17999/31100 [08:30<05:40, 38.51it/s]                                                      58%|█████▊    | 18000/31100 [08:30<05:40, 38.51it/s]
 58%|█████▊    | 18003/31100 [08:30<05:39, 38.58it/s]
 58%|█████▊    | 18007/31100 [08:31<05:51, 37.27it/s]
 58%|█████▊    | 18011/31100 [08:31<05:47, 37.68it/s]
 58%|█████▊    | 18015/31100 [08:31<05:41, 38.33it/s]
 58%|█████▊    | 18019/31100 [08:31<06:00, 36.26it/s]
 58%|█████▊    | 18024/31100 [08:31<05:49, 37.44it/s]
 58%|█████▊    | 18028/31100 [08:31<05:46, 37.76it/s]
 58%|█████▊    | 18032/31100 [08:31<05:43, 38.00it/s]
 58%|█████▊    | 18036/31100 [08:31<05:46, 37.71it/s]
 58%|█████▊    | 18040/31100 [08:31<05:40, 38.34it/s]
 58%|█████▊    | 18044/31100 [08:32<05:45, 37.84it/s]
 58%|█████▊    | 18049/31100 [08:32<05:39, 38.42it/s]
 58%|█████▊    | 18054/31100 [08:32<05:34, 38.97it/s]
 58%|█████▊    | 18059/31100 [08:32<05:30, 39.41it/s]
 58%|█████▊    | 18063/31100 [08:32<05:36, 3

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:34:30. Total running time: 2hr 14min 9s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2    

[2m[36m(_objective pid=49074)[0m  59%|█████▉    | 18368/31100 [08:40<05:48, 36.58it/s]
 59%|█████▉    | 18372/31100 [08:40<05:46, 36.75it/s]
 59%|█████▉    | 18376/31100 [08:40<05:56, 35.73it/s]
 59%|█████▉    | 18380/31100 [08:40<05:58, 35.52it/s]
 59%|█████▉    | 18384/31100 [08:41<05:50, 36.28it/s]
 59%|█████▉    | 18388/31100 [08:41<05:49, 36.41it/s]
 59%|█████▉    | 18392/31100 [08:41<05:45, 36.78it/s]
 59%|█████▉    | 18396/31100 [08:41<05:46, 36.69it/s]
 59%|█████▉    | 18400/31100 [08:41<05:49, 36.36it/s]
 59%|█████▉    | 18404/31100 [08:41<06:06, 34.64it/s]
 59%|█████▉    | 18408/31100 [08:41<06:00, 35.18it/s]
 59%|█████▉    | 18412/31100 [08:41<06:08, 34.48it/s]
 59%|█████▉    | 18416/31100 [08:41<06:03, 34.90it/s]
 59%|█████▉    | 18420/31100 [08:42<05:54, 35.76it/s]
 59%|█████▉    | 18424/31100 [08:42<05:49, 36.29it/s]
 59%|█████▉    | 18428/31100 [08:42<05:58, 35.31it/s]
 59%|█████▉    | 18432/31100 [08:42<06:01, 35.03it/s]
 59%|█████▉    | 18436/31100 [08:42<06:08, 34

[2m[36m(_objective pid=49074)[0m {'loss': 0.1161, 'learning_rate': 9.413910113367691e-06, 'epoch': 2.97}


[2m[36m(_objective pid=49074)[0m  59%|█████▉    | 18500/31100 [08:44<05:27, 38.44it/s]                                                      59%|█████▉    | 18500/31100 [08:44<05:27, 38.44it/s]
 59%|█████▉    | 18504/31100 [08:44<05:30, 38.13it/s]
 60%|█████▉    | 18508/31100 [08:44<05:30, 38.11it/s]
 60%|█████▉    | 18512/31100 [08:44<05:28, 38.35it/s]
 60%|█████▉    | 18516/31100 [08:44<05:32, 37.90it/s]
 60%|█████▉    | 18520/31100 [08:44<05:31, 37.99it/s]
 60%|█████▉    | 18524/31100 [08:44<05:29, 38.12it/s]
 60%|█████▉    | 18528/31100 [08:45<05:34, 37.57it/s]
 60%|█████▉    | 18532/31100 [08:45<05:32, 37.83it/s]
 60%|█████▉    | 18536/31100 [08:45<05:28, 38.25it/s]
 60%|█████▉    | 18540/31100 [08:45<05:24, 38.67it/s]
 60%|█████▉    | 18544/31100 [08:45<05:29, 38.10it/s]
 60%|█████▉    | 18548/31100 [08:45<05:36, 37.27it/s]
 60%|█████▉    | 18552/31100 [08:45<05:55, 35.30it/s]
 60%|█████▉    | 18556/31100 [08:45<05:44, 36.43it/s]
 60%|█████▉    | 18560/31100 [08:45<05:43, 3

Trial _objective_f556c_00019 finished iteration 3 at 2023-09-11 15:34:48. Total running time: 2hr 14min 26s
+-------------------------------------------------+
| Trial _objective_f556c_00019 result             |
+-------------------------------------------------+
| time_this_iter_s                         181.66 |
| time_total_s                            540.582 |
| training_iteration                            3 |
| epoch                                         3 |
| eval_loss                               0.24261 |
| eval_runtime                             9.5574 |
| eval_samples_per_second                 433.904 |
| eval_steps_per_second                    13.602 |
| objective                               0.24261 |
+-------------------------------------------------+

[2m[36m(_objective pid=49074)[0m {'eval_loss': 0.24260881543159485, 'eval_runtime': 9.5574, 'eval_samples_per_second': 433.904, 'eval_steps_per_second': 13.602, 'epoch': 3.0}


[2m[36m(_objective pid=49074)[0m  60%|██████    | 18662/31100 [08:59<2:53:50,  1.19it/s]
 60%|██████    | 18666/31100 [08:59<2:03:39,  1.68it/s]
 60%|██████    | 18670/31100 [08:59<1:28:37,  2.34it/s]
 60%|██████    | 18674/31100 [08:59<1:03:55,  3.24it/s]
 60%|██████    | 18678/31100 [08:59<46:18,  4.47it/s]  
 60%|██████    | 18682/31100 [09:00<34:09,  6.06it/s]
 60%|██████    | 18686/31100 [09:00<25:28,  8.12it/s]
 60%|██████    | 18690/31100 [09:00<19:40, 10.51it/s]
 60%|██████    | 18694/31100 [09:00<15:19, 13.49it/s]
 60%|██████    | 18698/31100 [09:00<12:18, 16.79it/s]
 60%|██████    | 18702/31100 [09:00<10:10, 20.32it/s]
 60%|██████    | 18706/31100 [09:00<08:43, 23.67it/s]
 60%|██████    | 18710/31100 [09:00<07:43, 26.76it/s]
 60%|██████    | 18714/31100 [09:00<07:11, 28.68it/s]
 60%|██████    | 18718/31100 [09:01<06:52, 30.00it/s]
 60%|██████    | 18722/31100 [09:01<06:33, 31.48it/s]
 60%|██████    | 18726/31100 [09:01<06:17, 32.81it/s]
 60%|██████    | 18730/31100 [09:01

[2m[36m(_objective pid=49074)[0m {'loss': 0.0803, 'learning_rate': 9.040342251726117e-06, 'epoch': 3.05}


[2m[36m(_objective pid=49074)[0m  61%|██████    | 18999/31100 [09:08<05:16, 38.18it/s]                                                      61%|██████    | 19000/31100 [09:08<05:16, 38.18it/s]
 61%|██████    | 19003/31100 [09:08<05:19, 37.87it/s]
 61%|██████    | 19007/31100 [09:08<05:21, 37.65it/s]
 61%|██████    | 19011/31100 [09:08<05:17, 38.02it/s]
 61%|██████    | 19015/31100 [09:08<05:18, 37.93it/s]
 61%|██████    | 19019/31100 [09:09<05:16, 38.17it/s]
 61%|██████    | 19023/31100 [09:09<05:14, 38.44it/s]
 61%|██████    | 19027/31100 [09:09<05:14, 38.43it/s]
 61%|██████    | 19031/31100 [09:09<05:14, 38.43it/s]
 61%|██████    | 19035/31100 [09:09<05:13, 38.51it/s]
 61%|██████    | 19040/31100 [09:09<05:07, 39.23it/s]
 61%|██████    | 19044/31100 [09:09<05:12, 38.59it/s]
 61%|██████    | 19048/31100 [09:09<05:15, 38.25it/s]
 61%|██████▏   | 19052/31100 [09:09<05:33, 36.17it/s]
 61%|██████▏   | 19056/31100 [09:10<05:29, 36.53it/s]
 61%|██████▏   | 19060/31100 [09:10<05:32, 3

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:35:00. Total running time: 2hr 14min 39s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2   

[2m[36m(_objective pid=49074)[0m  61%|██████▏   | 19076/31100 [09:10<05:26, 36.81it/s]
 61%|██████▏   | 19080/31100 [09:10<05:22, 37.23it/s]
 61%|██████▏   | 19084/31100 [09:10<05:25, 36.92it/s]
 61%|██████▏   | 19088/31100 [09:10<05:37, 35.62it/s]
 61%|██████▏   | 19092/31100 [09:11<05:33, 36.04it/s]
 61%|██████▏   | 19096/31100 [09:11<05:37, 35.58it/s]
 61%|██████▏   | 19100/31100 [09:11<05:29, 36.38it/s]
 61%|██████▏   | 19104/31100 [09:11<05:34, 35.82it/s]
 61%|██████▏   | 19108/31100 [09:11<05:27, 36.56it/s]
 61%|██████▏   | 19112/31100 [09:11<05:26, 36.71it/s]
 61%|██████▏   | 19116/31100 [09:11<05:25, 36.82it/s]
 61%|██████▏   | 19120/31100 [09:11<05:22, 37.12it/s]
 61%|██████▏   | 19124/31100 [09:11<05:22, 37.19it/s]
 62%|██████▏   | 19128/31100 [09:12<05:19, 37.50it/s]
 62%|██████▏   | 19132/31100 [09:12<05:18, 37.52it/s]
 62%|██████▏   | 19136/31100 [09:12<05:22, 37.10it/s]
 62%|██████▏   | 19140/31100 [09:12<05:21, 37.24it/s]
 62%|██████▏   | 19144/31100 [09:12<05:24, 36

[2m[36m(_objective pid=49074)[0m {'loss': 0.0983, 'learning_rate': 8.666774390084541e-06, 'epoch': 3.14}


[2m[36m(_objective pid=49074)[0m  63%|██████▎   | 19504/31100 [09:22<05:30, 35.05it/s]
 63%|██████▎   | 19508/31100 [09:22<05:24, 35.68it/s]
 63%|██████▎   | 19512/31100 [09:22<05:27, 35.41it/s]
 63%|██████▎   | 19516/31100 [09:22<05:24, 35.70it/s]
 63%|██████▎   | 19520/31100 [09:22<05:19, 36.26it/s]
 63%|██████▎   | 19524/31100 [09:22<05:15, 36.73it/s]
 63%|██████▎   | 19528/31100 [09:23<05:13, 36.95it/s]
 63%|██████▎   | 19532/31100 [09:23<05:14, 36.75it/s]
 63%|██████▎   | 19536/31100 [09:23<05:10, 37.26it/s]
 63%|██████▎   | 19540/31100 [09:23<05:12, 36.97it/s]
 63%|██████▎   | 19544/31100 [09:23<05:10, 37.26it/s]
 63%|██████▎   | 19548/31100 [09:23<05:20, 36.04it/s]
 63%|██████▎   | 19552/31100 [09:23<05:28, 35.18it/s]
 63%|██████▎   | 19556/31100 [09:23<05:23, 35.65it/s]
 63%|██████▎   | 19560/31100 [09:23<05:15, 36.62it/s]
 63%|██████▎   | 19564/31100 [09:24<05:33, 34.57it/s]
 63%|██████▎   | 19568/31100 [09:24<05:43, 33.61it/s]
 63%|██████▎   | 19572/31100 [09:24<05:35, 34

[2m[36m(_objective pid=49074)[0m {'loss': 0.0789, 'learning_rate': 8.293206528442966e-06, 'epoch': 3.22}


[2m[36m(_objective pid=49074)[0m  64%|██████▍   | 20001/31100 [09:36<05:00, 36.91it/s]
 64%|██████▍   | 20005/31100 [09:36<04:55, 37.55it/s]
 64%|██████▍   | 20009/31100 [09:36<04:54, 37.70it/s]
 64%|██████▍   | 20013/31100 [09:36<05:07, 36.02it/s]
 64%|██████▍   | 20017/31100 [09:36<05:18, 34.75it/s]
 64%|██████▍   | 20021/31100 [09:36<05:12, 35.44it/s]
 64%|██████▍   | 20025/31100 [09:36<05:11, 35.56it/s]
 64%|██████▍   | 20029/31100 [09:36<05:25, 34.05it/s]
 64%|██████▍   | 20033/31100 [09:37<05:13, 35.33it/s]
 64%|██████▍   | 20037/31100 [09:37<05:04, 36.34it/s]
 64%|██████▍   | 20041/31100 [09:37<04:56, 37.30it/s]
 64%|██████▍   | 20045/31100 [09:37<04:53, 37.69it/s]
 64%|██████▍   | 20049/31100 [09:37<04:48, 38.30it/s]
 64%|██████▍   | 20053/31100 [09:37<04:47, 38.49it/s]
 64%|██████▍   | 20057/31100 [09:37<05:03, 36.42it/s]
 65%|██████▍   | 20061/31100 [09:37<05:04, 36.30it/s]
 65%|██████▍   | 20065/31100 [09:37<04:55, 37.32it/s]
 65%|██████▍   | 20069/31100 [09:38<04:55, 37

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:35:30. Total running time: 2hr 15min 9s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2    

[2m[36m(_objective pid=49074)[0m  65%|██████▍   | 20174/31100 [09:40<04:45, 38.23it/s]
 65%|██████▍   | 20178/31100 [09:40<04:59, 36.47it/s]
 65%|██████▍   | 20182/31100 [09:41<04:53, 37.19it/s]
 65%|██████▍   | 20186/31100 [09:41<04:50, 37.55it/s]
 65%|██████▍   | 20190/31100 [09:41<04:47, 38.00it/s]
 65%|██████▍   | 20194/31100 [09:41<05:00, 36.27it/s]
 65%|██████▍   | 20198/31100 [09:41<04:54, 37.07it/s]
 65%|██████▍   | 20202/31100 [09:41<04:50, 37.45it/s]
 65%|██████▍   | 20206/31100 [09:41<04:49, 37.60it/s]
 65%|██████▍   | 20210/31100 [09:41<05:03, 35.90it/s]
 65%|██████▍   | 20214/31100 [09:41<04:57, 36.62it/s]
 65%|██████▌   | 20218/31100 [09:42<04:55, 36.77it/s]
 65%|██████▌   | 20222/31100 [09:42<04:49, 37.60it/s]
 65%|██████▌   | 20226/31100 [09:42<04:44, 38.21it/s]
 65%|██████▌   | 20230/31100 [09:42<04:43, 38.36it/s]
 65%|██████▌   | 20234/31100 [09:42<04:50, 37.45it/s]
 65%|██████▌   | 20238/31100 [09:42<04:56, 36.69it/s]
 65%|██████▌   | 20242/31100 [09:42<04:59, 36

[2m[36m(_objective pid=49074)[0m {'loss': 0.0663, 'learning_rate': 7.91963866680139e-06, 'epoch': 3.3}


[2m[36m(_objective pid=49074)[0m  66%|██████▌   | 20498/31100 [09:49<04:36, 38.31it/s]                                                      66%|██████▌   | 20500/31100 [09:49<04:36, 38.31it/s]
 66%|██████▌   | 20502/31100 [09:49<04:38, 38.08it/s]
 66%|██████▌   | 20506/31100 [09:49<04:40, 37.81it/s]
 66%|██████▌   | 20510/31100 [09:49<04:38, 38.01it/s]
 66%|██████▌   | 20514/31100 [09:50<04:39, 37.89it/s]
 66%|██████▌   | 20518/31100 [09:50<04:37, 38.09it/s]
 66%|██████▌   | 20522/31100 [09:50<04:34, 38.47it/s]
 66%|██████▌   | 20526/31100 [09:50<04:33, 38.69it/s]
 66%|██████▌   | 20530/31100 [09:50<04:31, 38.87it/s]
 66%|██████▌   | 20534/31100 [09:50<04:31, 38.98it/s]
 66%|██████▌   | 20538/31100 [09:50<04:29, 39.15it/s]
 66%|██████▌   | 20542/31100 [09:50<04:30, 39.07it/s]
 66%|██████▌   | 20546/31100 [09:50<04:31, 38.90it/s]
 66%|██████▌   | 20550/31100 [09:51<04:29, 39.08it/s]
 66%|██████▌   | 20554/31100 [09:51<04:28, 39.24it/s]
 66%|██████▌   | 20558/31100 [09:51<04:28, 3

[2m[36m(_objective pid=49074)[0m {'loss': 0.0542, 'learning_rate': 7.546070805159815e-06, 'epoch': 3.38}


[2m[36m(_objective pid=49074)[0m  68%|██████▊   | 21003/31100 [10:02<04:40, 36.00it/s]
 68%|██████▊   | 21007/31100 [10:03<04:39, 36.08it/s]
 68%|██████▊   | 21011/31100 [10:03<04:43, 35.57it/s]
 68%|██████▊   | 21015/31100 [10:03<04:42, 35.66it/s]
 68%|██████▊   | 21019/31100 [10:03<04:43, 35.58it/s]
 68%|██████▊   | 21023/31100 [10:03<04:44, 35.41it/s]
 68%|██████▊   | 21027/31100 [10:03<04:48, 34.86it/s]
 68%|██████▊   | 21031/31100 [10:03<04:50, 34.63it/s]
 68%|██████▊   | 21035/31100 [10:03<04:46, 35.18it/s]
 68%|██████▊   | 21039/31100 [10:03<04:47, 35.04it/s]
 68%|██████▊   | 21043/31100 [10:04<04:49, 34.71it/s]
 68%|██████▊   | 21047/31100 [10:04<04:51, 34.53it/s]
 68%|██████▊   | 21051/31100 [10:04<04:53, 34.25it/s]
 68%|██████▊   | 21055/31100 [10:04<04:46, 35.02it/s]
 68%|██████▊   | 21059/31100 [10:04<04:48, 34.81it/s]
 68%|██████▊   | 21063/31100 [10:04<04:45, 35.15it/s]
 68%|██████▊   | 21067/31100 [10:04<04:55, 33.99it/s]
 68%|██████▊   | 21071/31100 [10:04<04:42, 35

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:36:00. Total running time: 2hr 15min 39s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2   

[2m[36m(_objective pid=49074)[0m  68%|██████▊   | 21295/31100 [10:10<04:09, 39.35it/s]
 68%|██████▊   | 21299/31100 [10:10<04:15, 38.35it/s]
 68%|██████▊   | 21303/31100 [10:11<04:17, 38.00it/s]
 69%|██████▊   | 21307/31100 [10:11<04:20, 37.53it/s]
 69%|██████▊   | 21311/31100 [10:11<04:21, 37.42it/s]
 69%|██████▊   | 21315/31100 [10:11<04:22, 37.24it/s]
 69%|██████▊   | 21319/31100 [10:11<04:17, 37.96it/s]
 69%|██████▊   | 21323/31100 [10:11<04:17, 38.03it/s]
 69%|██████▊   | 21327/31100 [10:11<04:15, 38.30it/s]
 69%|██████▊   | 21331/31100 [10:11<04:13, 38.58it/s]
 69%|██████▊   | 21336/31100 [10:11<04:08, 39.35it/s]
 69%|██████▊   | 21340/31100 [10:11<04:06, 39.53it/s]
 69%|██████▊   | 21345/31100 [10:12<04:07, 39.46it/s]
 69%|██████▊   | 21349/31100 [10:12<04:09, 39.10it/s]
 69%|██████▊   | 21354/31100 [10:12<04:06, 39.57it/s]
 69%|██████▊   | 21358/31100 [10:12<04:08, 39.23it/s]
 69%|██████▊   | 21362/31100 [10:12<04:07, 39.34it/s]
 69%|██████▊   | 21366/31100 [10:12<04:07, 39

[2m[36m(_objective pid=49074)[0m {'loss': 0.0586, 'learning_rate': 7.1725029435182416e-06, 'epoch': 3.46}


 69%|██████▉   | 21501/31100 [10:16<04:45, 33.64it/s]
 69%|██████▉   | 21505/31100 [10:16<05:03, 31.62it/s]
 69%|██████▉   | 21509/31100 [10:16<05:04, 31.47it/s]
 69%|██████▉   | 21513/31100 [10:16<04:51, 32.90it/s]
 69%|██████▉   | 21517/31100 [10:16<05:17, 30.23it/s]
 69%|██████▉   | 21521/31100 [10:16<05:02, 31.69it/s]
 69%|██████▉   | 21525/31100 [10:17<04:48, 33.24it/s]
 69%|██████▉   | 21529/31100 [10:17<04:39, 34.26it/s]
 69%|██████▉   | 21533/31100 [10:17<04:34, 34.89it/s]
 69%|██████▉   | 21537/31100 [10:17<04:24, 36.10it/s]
 69%|██████▉   | 21541/31100 [10:17<04:20, 36.71it/s]
 69%|██████▉   | 21545/31100 [10:17<04:19, 36.75it/s]
 69%|██████▉   | 21549/31100 [10:17<04:27, 35.73it/s]
 69%|██████▉   | 21553/31100 [10:17<04:24, 36.12it/s]
 69%|██████▉   | 21557/31100 [10:17<04:25, 35.96it/s]
 69%|██████▉   | 21561/31100 [10:18<04:23, 36.19it/s]
 69%|██████▉   | 21565/31100 [10:18<04:23, 36.24it/s]
 69%|██████▉   | 21569/31100 [10:18<04:29, 35.39it/s]
 69%|██████▉   | 21573/31100

[2m[36m(_objective pid=49074)[0m {'loss': 0.0916, 'learning_rate': 6.798935081876666e-06, 'epoch': 3.54}


[2m[36m(_objective pid=49074)[0m  71%|███████   | 22000/31100 [10:29<03:59, 37.96it/s]                                                      71%|███████   | 22000/31100 [10:29<03:59, 37.96it/s]
 71%|███████   | 22004/31100 [10:30<04:01, 37.70it/s]
 71%|███████   | 22008/31100 [10:30<03:57, 38.22it/s]
 71%|███████   | 22012/31100 [10:30<03:57, 38.29it/s]
 71%|███████   | 22016/31100 [10:30<04:05, 36.96it/s]
 71%|███████   | 22020/31100 [10:30<04:05, 37.03it/s]
 71%|███████   | 22024/31100 [10:30<04:00, 37.70it/s]
 71%|███████   | 22028/31100 [10:30<04:09, 36.42it/s]
 71%|███████   | 22032/31100 [10:30<04:06, 36.78it/s]
 71%|███████   | 22036/31100 [10:30<04:04, 37.08it/s]
 71%|███████   | 22040/31100 [10:30<04:05, 36.85it/s]
 71%|███████   | 22044/31100 [10:31<04:03, 37.20it/s]
 71%|███████   | 22048/31100 [10:31<04:11, 35.95it/s]
 71%|███████   | 22052/31100 [10:31<04:12, 35.90it/s]
 71%|███████   | 22056/31100 [10:31<04:20, 34.70it/s]
 71%|███████   | 22060/31100 [10:31<04:32, 3

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:36:30. Total running time: 2hr 16min 9s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2    

[2m[36m(_objective pid=49074)[0m  72%|███████▏  | 22398/31100 [10:40<03:46, 38.36it/s]
 72%|███████▏  | 22402/31100 [10:40<03:48, 38.12it/s]
 72%|███████▏  | 22406/31100 [10:40<03:45, 38.48it/s]
 72%|███████▏  | 22410/31100 [10:41<03:46, 38.32it/s]
 72%|███████▏  | 22414/31100 [10:41<03:52, 37.34it/s]
 72%|███████▏  | 22418/31100 [10:41<03:47, 38.08it/s]
 72%|███████▏  | 22422/31100 [10:41<03:46, 38.35it/s]
 72%|███████▏  | 22426/31100 [10:41<03:47, 38.20it/s]
 72%|███████▏  | 22430/31100 [10:41<03:48, 37.89it/s]
 72%|███████▏  | 22434/31100 [10:41<03:50, 37.57it/s]
 72%|███████▏  | 22438/31100 [10:41<03:53, 37.13it/s]
 72%|███████▏  | 22442/31100 [10:41<03:54, 36.86it/s]
 72%|███████▏  | 22446/31100 [10:42<04:02, 35.75it/s]
 72%|███████▏  | 22450/31100 [10:42<04:06, 35.08it/s]
 72%|███████▏  | 22455/31100 [10:42<03:54, 36.94it/s]
 72%|███████▏  | 22459/31100 [10:42<03:50, 37.43it/s]
 72%|███████▏  | 22464/31100 [10:42<03:45, 38.34it/s]
 72%|███████▏  | 22468/31100 [10:42<03:44, 38

[2m[36m(_objective pid=49074)[0m {'loss': 0.0666, 'learning_rate': 6.425367220235091e-06, 'epoch': 3.62}


[2m[36m(_objective pid=49074)[0m  72%|███████▏  | 22497/31100 [10:43<03:41, 38.85it/s]                                                      72%|███████▏  | 22500/31100 [10:43<03:41, 38.85it/s]
 72%|███████▏  | 22501/31100 [10:43<03:41, 38.85it/s]
 72%|███████▏  | 22505/31100 [10:43<03:50, 37.27it/s]
 72%|███████▏  | 22510/31100 [10:43<03:44, 38.34it/s]
 72%|███████▏  | 22514/31100 [10:43<03:49, 37.46it/s]
 72%|███████▏  | 22518/31100 [10:43<03:46, 37.91it/s]
 72%|███████▏  | 22523/31100 [10:44<03:40, 38.95it/s]
 72%|███████▏  | 22527/31100 [10:44<03:40, 38.88it/s]
 72%|███████▏  | 22531/31100 [10:44<03:40, 38.78it/s]
 72%|███████▏  | 22535/31100 [10:44<03:42, 38.43it/s]
 72%|███████▏  | 22539/31100 [10:44<03:40, 38.80it/s]
 72%|███████▏  | 22543/31100 [10:44<03:47, 37.63it/s]
 72%|███████▏  | 22547/31100 [10:44<03:49, 37.23it/s]
 73%|███████▎  | 22551/31100 [10:44<03:48, 37.36it/s]
 73%|███████▎  | 22555/31100 [10:44<03:45, 37.84it/s]
 73%|███████▎  | 22559/31100 [10:45<03:46, 3

[2m[36m(_objective pid=49074)[0m {'loss': 0.0667, 'learning_rate': 6.051799358593516e-06, 'epoch': 3.7}


 74%|███████▍  | 23007/31100 [10:57<03:25, 39.41it/s]
 74%|███████▍  | 23012/31100 [10:57<03:23, 39.75it/s]
 74%|███████▍  | 23016/31100 [10:57<03:24, 39.47it/s]
 74%|███████▍  | 23020/31100 [10:57<03:26, 39.10it/s]
 74%|███████▍  | 23024/31100 [10:57<03:27, 38.97it/s]
 74%|███████▍  | 23028/31100 [10:57<03:30, 38.34it/s]
 74%|███████▍  | 23032/31100 [10:57<03:29, 38.53it/s]
 74%|███████▍  | 23036/31100 [10:58<03:37, 37.06it/s]
 74%|███████▍  | 23040/31100 [10:58<03:34, 37.60it/s]
 74%|███████▍  | 23044/31100 [10:58<03:33, 37.72it/s]
 74%|███████▍  | 23048/31100 [10:58<03:30, 38.26it/s]
 74%|███████▍  | 23053/31100 [10:58<03:26, 38.93it/s]
 74%|███████▍  | 23057/31100 [10:58<03:26, 38.97it/s]
 74%|███████▍  | 23061/31100 [10:58<03:25, 39.18it/s]
 74%|███████▍  | 23065/31100 [10:58<03:27, 38.79it/s]
 74%|███████▍  | 23069/31100 [10:58<03:29, 38.29it/s]
 74%|███████▍  | 23073/31100 [10:58<03:28, 38.46it/s]
 74%|███████▍  | 23077/31100 [10:59<03:27, 38.67it/s]
 74%|███████▍  | 23082/31100

[2m[36m(_objective pid=49074)[0m {'loss': 0.0817, 'learning_rate': 5.6782314969519405e-06, 'epoch': 3.78}
Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:37:00. Total running time: 2hr 16min 39s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

 76%|███████▌  | 23505/31100 [11:10<03:30, 36.06it/s]
 76%|███████▌  | 23509/31100 [11:10<03:35, 35.23it/s]
 76%|███████▌  | 23513/31100 [11:11<03:30, 35.97it/s]
 76%|███████▌  | 23517/31100 [11:11<03:25, 36.86it/s]
 76%|███████▌  | 23522/31100 [11:11<03:19, 38.08it/s]
 76%|███████▌  | 23526/31100 [11:11<03:18, 38.08it/s]
 76%|███████▌  | 23530/31100 [11:11<03:18, 38.12it/s]
 76%|███████▌  | 23534/31100 [11:11<03:16, 38.54it/s]
 76%|███████▌  | 23538/31100 [11:11<03:15, 38.63it/s]
 76%|███████▌  | 23542/31100 [11:11<03:22, 37.33it/s]
 76%|███████▌  | 23546/31100 [11:11<03:23, 37.16it/s]
 76%|███████▌  | 23550/31100 [11:12<03:21, 37.46it/s]
 76%|███████▌  | 23554/31100 [11:12<03:26, 36.58it/s]
 76%|███████▌  | 23558/31100 [11:12<03:25, 36.63it/s]
 76%|███████▌  | 23562/31100 [11:12<03:21, 37.35it/s]
 76%|███████▌  | 23566/31100 [11:12<03:22, 37.22it/s]
 76%|███████▌  | 23570/31100 [11:12<03:22, 37.17it/s]
 76%|███████▌  | 23574/31100 [11:12<03:21, 37.39it/s]
 76%|███████▌  | 23578/31100

[2m[36m(_objective pid=49074)[0m {'loss': 0.0621, 'learning_rate': 5.304663635310365e-06, 'epoch': 3.86}


[2m[36m(_objective pid=49074)[0m  77%|███████▋  | 24005/31100 [11:24<03:19, 35.52it/s]
 77%|███████▋  | 24009/31100 [11:24<03:19, 35.49it/s]
 77%|███████▋  | 24013/31100 [11:24<03:19, 35.45it/s]
 77%|███████▋  | 24017/31100 [11:24<03:25, 34.46it/s]
 77%|███████▋  | 24021/31100 [11:24<03:18, 35.69it/s]
 77%|███████▋  | 24025/31100 [11:24<03:14, 36.37it/s]
 77%|███████▋  | 24029/31100 [11:24<03:10, 37.15it/s]
 77%|███████▋  | 24033/31100 [11:25<03:09, 37.23it/s]
 77%|███████▋  | 24037/31100 [11:25<03:11, 36.89it/s]
 77%|███████▋  | 24041/31100 [11:25<03:08, 37.36it/s]
 77%|███████▋  | 24045/31100 [11:25<03:08, 37.36it/s]
 77%|███████▋  | 24049/31100 [11:25<03:16, 35.87it/s]
 77%|███████▋  | 24053/31100 [11:25<03:13, 36.43it/s]
 77%|███████▋  | 24057/31100 [11:25<03:13, 36.36it/s]
 77%|███████▋  | 24061/31100 [11:25<03:22, 34.75it/s]
 77%|███████▋  | 24065/31100 [11:25<03:19, 35.34it/s]
 77%|███████▋  | 24069/31100 [11:26<03:15, 36.03it/s]
 77%|███████▋  | 24073/31100 [11:26<03:17, 35

[2m[36m(_objective pid=49074)[0m {'loss': 0.0682, 'learning_rate': 4.931095773668791e-06, 'epoch': 3.94}


[2m[36m(_objective pid=49074)[0m  79%|███████▉  | 24501/31100 [11:37<03:04, 35.74it/s]
 79%|███████▉  | 24505/31100 [11:37<02:59, 36.65it/s]
 79%|███████▉  | 24509/31100 [11:37<02:59, 36.66it/s]
 79%|███████▉  | 24513/31100 [11:37<03:12, 34.29it/s]
 79%|███████▉  | 24517/31100 [11:37<03:08, 35.00it/s]
 79%|███████▉  | 24521/31100 [11:38<03:04, 35.59it/s]
 79%|███████▉  | 24525/31100 [11:38<03:03, 35.84it/s]
 79%|███████▉  | 24529/31100 [11:38<03:01, 36.21it/s]
 79%|███████▉  | 24533/31100 [11:38<03:02, 35.90it/s]
 79%|███████▉  | 24537/31100 [11:38<03:06, 35.11it/s]
 79%|███████▉  | 24541/31100 [11:38<03:02, 35.96it/s]
 79%|███████▉  | 24545/31100 [11:38<03:07, 34.94it/s]
 79%|███████▉  | 24549/31100 [11:38<03:06, 35.07it/s]
 79%|███████▉  | 24553/31100 [11:38<03:08, 34.65it/s]
 79%|███████▉  | 24557/31100 [11:39<03:04, 35.43it/s]
 79%|███████▉  | 24561/31100 [11:39<03:01, 36.07it/s]
 79%|███████▉  | 24565/31100 [11:39<03:01, 35.95it/s]
 79%|███████▉  | 24569/31100 [11:39<03:03, 35

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:37:30. Total running time: 2hr 17min 9s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2    

[2m[36m(_objective pid=49074)[0m  79%|███████▉  | 24629/31100 [11:41<02:57, 36.46it/s]
 79%|███████▉  | 24633/31100 [11:41<02:57, 36.45it/s]
 79%|███████▉  | 24637/31100 [11:41<02:55, 36.89it/s]
 79%|███████▉  | 24641/31100 [11:41<02:56, 36.64it/s]
 79%|███████▉  | 24645/31100 [11:41<02:57, 36.27it/s]
 79%|███████▉  | 24649/31100 [11:41<02:54, 37.01it/s]
 79%|███████▉  | 24653/31100 [11:41<02:53, 37.19it/s]
 79%|███████▉  | 24657/31100 [11:41<02:50, 37.75it/s]
 79%|███████▉  | 24661/31100 [11:41<02:48, 38.22it/s]
 79%|███████▉  | 24665/31100 [11:41<02:51, 37.61it/s]
 79%|███████▉  | 24669/31100 [11:42<02:50, 37.76it/s]
 79%|███████▉  | 24673/31100 [11:42<02:52, 37.22it/s]
 79%|███████▉  | 24677/31100 [11:42<02:52, 37.30it/s]
 79%|███████▉  | 24681/31100 [11:42<02:55, 36.47it/s]
 79%|███████▉  | 24685/31100 [11:42<02:52, 37.22it/s]
 79%|███████▉  | 24689/31100 [11:42<02:51, 37.46it/s]
 79%|███████▉  | 24693/31100 [11:42<02:57, 36.15it/s]
 79%|███████▉  | 24697/31100 [11:42<02:58, 35

Trial _objective_f556c_00019 finished iteration 4 at 2023-09-11 15:37:47. Total running time: 2hr 17min 26s
+-------------------------------------------------+
| Trial _objective_f556c_00019 result             |
+-------------------------------------------------+
| time_this_iter_s                        179.141 |
| time_total_s                            719.722 |
| training_iteration                            4 |
| epoch                                         4 |
| eval_loss                               0.29128 |
| eval_runtime                             9.5616 |
| eval_samples_per_second                 433.715 |
| eval_steps_per_second                    13.596 |
| objective                               0.29128 |
+-------------------------------------------------+

[2m[36m(_objective pid=49074)[0m {'eval_loss': 0.291279137134552, 'eval_runtime': 9.5616, 'eval_samples_per_second': 433.715, 'eval_steps_per_second': 13.596, 'epoch': 4.0}


[2m[36m(_objective pid=49074)[0m                                                      
[2m[36m(_objective pid=49074)[0m                                                  [A 80%|████████  | 24880/31100 [11:57<02:44, 37.75it/s]
[2m[36m(_objective pid=49074)[0m 100%|██████████| 130/130 [00:09<00:00, 11.97it/s][A
                                                 [A
 80%|████████  | 24881/31100 [11:58<1:27:13,  1.19it/s]
 80%|████████  | 24885/31100 [11:58<1:01:52,  1.67it/s]
 80%|████████  | 24889/31100 [11:58<44:08,  2.35it/s]  
 80%|████████  | 24893/31100 [11:58<31:44,  3.26it/s]
 80%|████████  | 24897/31100 [11:59<23:00,  4.49it/s]
 80%|████████  | 24901/31100 [11:59<16:55,  6.11it/s]
 80%|████████  | 24905/31100 [11:59<12:38,  8.16it/s]
 80%|████████  | 24909/31100 [11:59<09:44, 10.60it/s]
 80%|████████  | 24913/31100 [11:59<07:38, 13.49it/s]
 80%|████████  | 24917/31100 [11:59<06:11, 16.66it/s]
 80%|████████  | 24921/31100 [11:59<05:09, 19.99it/s]
 80%|████████  | 2492

[2m[36m(_objective pid=49074)[0m {'loss': 0.0363, 'learning_rate': 4.557527912027216e-06, 'epoch': 4.02}


[2m[36m(_objective pid=49074)[0m  80%|████████  | 25005/31100 [12:01<02:39, 38.19it/s]
 80%|████████  | 25009/31100 [12:02<02:39, 38.10it/s]
 80%|████████  | 25013/31100 [12:02<02:39, 38.09it/s]
 80%|████████  | 25017/31100 [12:02<02:45, 36.65it/s]
 80%|████████  | 25021/31100 [12:02<02:44, 36.85it/s]
 80%|████████  | 25025/31100 [12:02<02:49, 35.82it/s]
 80%|████████  | 25029/31100 [12:02<02:48, 35.95it/s]
 80%|████████  | 25033/31100 [12:02<02:55, 34.64it/s]
 81%|████████  | 25037/31100 [12:02<02:49, 35.67it/s]
 81%|████████  | 25041/31100 [12:02<02:46, 36.50it/s]
 81%|████████  | 25045/31100 [12:03<02:46, 36.28it/s]
 81%|████████  | 25049/31100 [12:03<02:43, 36.90it/s]
 81%|████████  | 25053/31100 [12:03<02:41, 37.44it/s]
 81%|████████  | 25057/31100 [12:03<02:39, 37.95it/s]
 81%|████████  | 25061/31100 [12:03<02:38, 38.15it/s]
 81%|████████  | 25066/31100 [12:03<02:34, 39.16it/s]
 81%|████████  | 25070/31100 [12:03<02:35, 38.72it/s]
 81%|████████  | 25074/31100 [12:03<02:34, 39

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:38:00. Total running time: 2hr 17min 39s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2   

[2m[36m(_objective pid=49074)[0m  82%|████████▏ | 25354/31100 [12:11<02:43, 35.22it/s]
 82%|████████▏ | 25358/31100 [12:11<02:38, 36.21it/s]
 82%|████████▏ | 25362/31100 [12:11<02:38, 36.27it/s]
 82%|████████▏ | 25366/31100 [12:11<02:44, 34.96it/s]
 82%|████████▏ | 25370/31100 [12:11<02:48, 34.00it/s]
 82%|████████▏ | 25374/31100 [12:11<02:42, 35.18it/s]
 82%|████████▏ | 25378/31100 [12:11<02:39, 35.85it/s]
 82%|████████▏ | 25382/31100 [12:11<02:42, 35.13it/s]
 82%|████████▏ | 25386/31100 [12:12<02:46, 34.35it/s]
 82%|████████▏ | 25390/31100 [12:12<02:44, 34.72it/s]
 82%|████████▏ | 25394/31100 [12:12<02:49, 33.63it/s]
 82%|████████▏ | 25398/31100 [12:12<02:44, 34.64it/s]
 82%|████████▏ | 25402/31100 [12:12<02:40, 35.48it/s]
 82%|████████▏ | 25406/31100 [12:12<02:41, 35.35it/s]
 82%|████████▏ | 25410/31100 [12:12<02:39, 35.74it/s]
 82%|████████▏ | 25414/31100 [12:12<02:45, 34.43it/s]
 82%|████████▏ | 25418/31100 [12:13<02:43, 34.70it/s]
 82%|████████▏ | 25422/31100 [12:13<02:45, 34

[2m[36m(_objective pid=49074)[0m {'loss': 0.0537, 'learning_rate': 4.18396005038564e-06, 'epoch': 4.1}


[2m[36m(_objective pid=49074)[0m                                                       82%|████████▏ | 25500/31100 [12:15<02:29, 37.47it/s] 82%|████████▏ | 25503/31100 [12:15<02:25, 38.45it/s]
 82%|████████▏ | 25507/31100 [12:15<02:23, 38.86it/s]
 82%|████████▏ | 25511/31100 [12:15<02:23, 38.99it/s]
 82%|████████▏ | 25515/31100 [12:15<02:24, 38.75it/s]
 82%|████████▏ | 25519/31100 [12:15<02:23, 38.86it/s]
 82%|████████▏ | 25523/31100 [12:15<02:22, 39.17it/s]
 82%|████████▏ | 25527/31100 [12:16<02:22, 39.19it/s]
 82%|████████▏ | 25531/31100 [12:16<02:21, 39.33it/s]
 82%|████████▏ | 25535/31100 [12:16<02:24, 38.43it/s]
 82%|████████▏ | 25539/31100 [12:16<02:33, 36.32it/s]
 82%|████████▏ | 25543/31100 [12:16<02:30, 37.05it/s]
 82%|████████▏ | 25547/31100 [12:16<02:27, 37.73it/s]
 82%|████████▏ | 25551/31100 [12:16<02:25, 38.07it/s]
 82%|████████▏ | 25555/31100 [12:16<02:30, 36.80it/s]
 82%|████████▏ | 25559/31100 [12:16<02:28, 37.24it/s]
 82%|████████▏ | 25563/31100 [12:17<02:27, 3

[2m[36m(_objective pid=49074)[0m {'loss': 0.0372, 'learning_rate': 3.8103921887440655e-06, 'epoch': 4.18}


[2m[36m(_objective pid=49074)[0m  84%|████████▎ | 25999/31100 [12:28<02:27, 34.66it/s]                                                      84%|████████▎ | 26000/31100 [12:28<02:27, 34.66it/s]
 84%|████████▎ | 26003/31100 [12:28<02:26, 34.80it/s]
 84%|████████▎ | 26007/31100 [12:28<02:26, 34.67it/s]
 84%|████████▎ | 26011/31100 [12:28<02:27, 34.59it/s]
 84%|████████▎ | 26015/31100 [12:29<02:23, 35.44it/s]
 84%|████████▎ | 26019/31100 [12:29<02:22, 35.75it/s]
 84%|████████▎ | 26023/31100 [12:29<02:23, 35.45it/s]
 84%|████████▎ | 26027/31100 [12:29<02:27, 34.39it/s]
 84%|████████▎ | 26031/31100 [12:29<02:24, 35.01it/s]
 84%|████████▎ | 26035/31100 [12:29<02:26, 34.55it/s]
 84%|████████▎ | 26039/31100 [12:29<02:24, 35.01it/s]
 84%|████████▎ | 26043/31100 [12:29<02:23, 35.27it/s]
 84%|████████▍ | 26047/31100 [12:29<02:23, 35.23it/s]
 84%|████████▍ | 26051/31100 [12:30<02:27, 34.25it/s]
 84%|████████▍ | 26055/31100 [12:30<02:23, 35.05it/s]
 84%|████████▍ | 26059/31100 [12:30<02:20, 3

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:38:31. Total running time: 2hr 18min 9s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2    

[2m[36m(_objective pid=49074)[0m  85%|████████▌ | 26482/31100 [12:41<02:02, 37.83it/s]
 85%|████████▌ | 26486/31100 [12:41<02:01, 37.87it/s]
 85%|████████▌ | 26490/31100 [12:41<02:00, 38.14it/s]
 85%|████████▌ | 26494/31100 [12:41<01:59, 38.60it/s]


[2m[36m(_objective pid=49074)[0m {'loss': 0.0616, 'learning_rate': 3.4368243271024903e-06, 'epoch': 4.26}


[2m[36m(_objective pid=49074)[0m  85%|████████▌ | 26498/31100 [12:41<02:03, 37.13it/s]                                                      85%|████████▌ | 26500/31100 [12:41<02:03, 37.13it/s]
 85%|████████▌ | 26502/31100 [12:41<02:02, 37.60it/s]
 85%|████████▌ | 26506/31100 [12:41<02:01, 37.73it/s]
 85%|████████▌ | 26510/31100 [12:41<02:02, 37.35it/s]
 85%|████████▌ | 26514/31100 [12:41<02:05, 36.48it/s]
 85%|████████▌ | 26518/31100 [12:42<02:17, 33.22it/s]
 85%|████████▌ | 26522/31100 [12:42<02:14, 34.14it/s]
 85%|████████▌ | 26526/31100 [12:42<02:10, 34.97it/s]
 85%|████████▌ | 26530/31100 [12:42<02:07, 35.84it/s]
 85%|████████▌ | 26534/31100 [12:42<02:09, 35.39it/s]
 85%|████████▌ | 26538/31100 [12:42<02:13, 34.19it/s]
 85%|████████▌ | 26542/31100 [12:42<02:08, 35.39it/s]
 85%|████████▌ | 26546/31100 [12:42<02:06, 36.02it/s]
 85%|████████▌ | 26550/31100 [12:43<02:05, 36.22it/s]
 85%|████████▌ | 26554/31100 [12:43<02:05, 36.12it/s]
 85%|████████▌ | 26558/31100 [12:43<02:05, 3

[2m[36m(_objective pid=49074)[0m {'loss': 0.0322, 'learning_rate': 3.063256465460915e-06, 'epoch': 4.34}


[2m[36m(_objective pid=49074)[0m  87%|████████▋ | 26999/31100 [12:54<01:47, 38.09it/s]                                                      87%|████████▋ | 27000/31100 [12:55<01:47, 38.09it/s]
 87%|████████▋ | 27003/31100 [12:55<01:46, 38.55it/s]
 87%|████████▋ | 27007/31100 [12:55<01:45, 38.64it/s]
 87%|████████▋ | 27011/31100 [12:55<01:44, 39.02it/s]
 87%|████████▋ | 27015/31100 [12:55<01:44, 38.97it/s]
 87%|████████▋ | 27019/31100 [12:55<01:45, 38.76it/s]
 87%|████████▋ | 27024/31100 [12:55<01:43, 39.33it/s]
 87%|████████▋ | 27028/31100 [12:55<01:44, 39.15it/s]
 87%|████████▋ | 27032/31100 [12:55<01:44, 38.93it/s]
 87%|████████▋ | 27037/31100 [12:55<01:43, 39.38it/s]
 87%|████████▋ | 27041/31100 [12:56<01:44, 38.68it/s]
 87%|████████▋ | 27045/31100 [12:56<01:44, 38.67it/s]
 87%|████████▋ | 27049/31100 [12:56<01:48, 37.41it/s]
 87%|████████▋ | 27053/31100 [12:56<01:48, 37.31it/s]
 87%|████████▋ | 27057/31100 [12:56<01:46, 37.96it/s]
 87%|████████▋ | 27061/31100 [12:56<01:50, 3

[2m[36m(_objective pid=49074)[0m {'loss': 0.034, 'learning_rate': 2.68968860381934e-06, 'epoch': 4.42}


[2m[36m(_objective pid=49074)[0m  88%|████████▊ | 27504/31100 [13:08<01:33, 38.55it/s]
 88%|████████▊ | 27508/31100 [13:08<01:34, 38.02it/s]
 88%|████████▊ | 27512/31100 [13:08<01:34, 37.90it/s]
 88%|████████▊ | 27516/31100 [13:08<01:33, 38.26it/s]
 88%|████████▊ | 27521/31100 [13:08<01:31, 38.92it/s]
 89%|████████▊ | 27525/31100 [13:09<01:32, 38.69it/s]
 89%|████████▊ | 27529/31100 [13:09<01:33, 38.22it/s]
 89%|████████▊ | 27533/31100 [13:09<01:32, 38.51it/s]
 89%|████████▊ | 27538/31100 [13:09<01:31, 38.94it/s]
 89%|████████▊ | 27542/31100 [13:09<01:32, 38.52it/s]
 89%|████████▊ | 27546/31100 [13:09<01:31, 38.92it/s]
 89%|████████▊ | 27550/31100 [13:09<01:34, 37.49it/s]
 89%|████████▊ | 27554/31100 [13:09<01:33, 37.97it/s]
 89%|████████▊ | 27558/31100 [13:09<01:32, 38.45it/s]
 89%|████████▊ | 27562/31100 [13:10<01:34, 37.30it/s]
 89%|████████▊ | 27567/31100 [13:10<01:31, 38.43it/s]
 89%|████████▊ | 27571/31100 [13:10<01:32, 38.10it/s]
 89%|████████▊ | 27575/31100 [13:10<01:31, 38

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:39:01. Total running time: 2hr 18min 39s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2   

[2m[36m(_objective pid=49074)[0m  89%|████████▉ | 27606/31100 [13:11<01:29, 39.08it/s]
 89%|████████▉ | 27610/31100 [13:11<01:29, 39.12it/s]
 89%|████████▉ | 27614/31100 [13:11<01:29, 39.06it/s]
 89%|████████▉ | 27618/31100 [13:11<01:28, 39.21it/s]
 89%|████████▉ | 27622/31100 [13:11<01:28, 39.25it/s]
 89%|████████▉ | 27626/31100 [13:11<01:29, 38.82it/s]
 89%|████████▉ | 27630/31100 [13:11<01:31, 38.09it/s]
 89%|████████▉ | 27634/31100 [13:11<01:29, 38.55it/s]
 89%|████████▉ | 27638/31100 [13:11<01:29, 38.67it/s]
 89%|████████▉ | 27642/31100 [13:12<01:30, 38.23it/s]
 89%|████████▉ | 27646/31100 [13:12<01:30, 38.14it/s]
 89%|████████▉ | 27650/31100 [13:12<01:32, 37.32it/s]
 89%|████████▉ | 27654/31100 [13:12<01:31, 37.47it/s]
 89%|████████▉ | 27658/31100 [13:12<01:31, 37.43it/s]
 89%|████████▉ | 27663/31100 [13:12<01:29, 38.22it/s]
 89%|████████▉ | 27667/31100 [13:12<01:29, 38.32it/s]
 89%|████████▉ | 27671/31100 [13:12<01:30, 38.04it/s]
 89%|████████▉ | 27675/31100 [13:12<01:29, 38

[2m[36m(_objective pid=49074)[0m {'loss': 0.0582, 'learning_rate': 2.3161207421777653e-06, 'epoch': 4.5}


[2m[36m(_objective pid=49074)[0m  90%|█████████ | 27998/31100 [13:21<01:20, 38.37it/s]                                                      90%|█████████ | 28000/31100 [13:21<01:20, 38.37it/s]
 90%|█████████ | 28002/31100 [13:21<01:20, 38.33it/s]
 90%|█████████ | 28006/31100 [13:21<01:21, 38.08it/s]
 90%|█████████ | 28010/31100 [13:22<01:21, 37.83it/s]
 90%|█████████ | 28014/31100 [13:22<01:22, 37.47it/s]
 90%|█████████ | 28018/31100 [13:22<01:20, 38.15it/s]
 90%|█████████ | 28022/31100 [13:22<01:20, 38.16it/s]
 90%|█████████ | 28026/31100 [13:22<01:19, 38.53it/s]
 90%|█████████ | 28030/31100 [13:22<01:18, 38.93it/s]
 90%|█████████ | 28034/31100 [13:22<01:18, 38.83it/s]
 90%|█████████ | 28038/31100 [13:22<01:18, 39.00it/s]
 90%|█████████ | 28042/31100 [13:22<01:17, 39.23it/s]
 90%|█████████ | 28046/31100 [13:23<01:17, 39.19it/s]
 90%|█████████ | 28050/31100 [13:23<01:18, 39.02it/s]
 90%|█████████ | 28054/31100 [13:23<01:21, 37.35it/s]
 90%|█████████ | 28058/31100 [13:23<01:21, 3

[2m[36m(_objective pid=49074)[0m {'loss': 0.0149, 'learning_rate': 1.94255288053619e-06, 'epoch': 4.58}


[2m[36m(_objective pid=49074)[0m  92%|█████████▏| 28505/31100 [13:35<01:11, 36.38it/s]
 92%|█████████▏| 28509/31100 [13:35<01:10, 36.57it/s]
 92%|█████████▏| 28513/31100 [13:35<01:09, 37.35it/s]
 92%|█████████▏| 28517/31100 [13:35<01:08, 37.58it/s]
 92%|█████████▏| 28521/31100 [13:35<01:07, 38.09it/s]
 92%|█████████▏| 28525/31100 [13:36<01:06, 38.52it/s]
 92%|█████████▏| 28530/31100 [13:36<01:06, 38.89it/s]
 92%|█████████▏| 28534/31100 [13:36<01:06, 38.73it/s]
 92%|█████████▏| 28538/31100 [13:36<01:05, 39.07it/s]
 92%|█████████▏| 28542/31100 [13:36<01:06, 38.73it/s]
 92%|█████████▏| 28547/31100 [13:36<01:05, 39.27it/s]
 92%|█████████▏| 28551/31100 [13:36<01:08, 37.03it/s]
 92%|█████████▏| 28556/31100 [13:36<01:06, 38.00it/s]
 92%|█████████▏| 28561/31100 [13:36<01:06, 38.45it/s]
 92%|█████████▏| 28566/31100 [13:37<01:04, 39.14it/s]
 92%|█████████▏| 28570/31100 [13:37<01:04, 39.27it/s]
 92%|█████████▏| 28574/31100 [13:37<01:04, 39.41it/s]
 92%|█████████▏| 28578/31100 [13:37<01:04, 39

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:39:31. Total running time: 2hr 19min 9s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2    

[2m[36m(_objective pid=49074)[0m  92%|█████████▏| 28723/31100 [13:41<00:59, 39.69it/s]
 92%|█████████▏| 28727/31100 [13:41<00:59, 39.68it/s]
 92%|█████████▏| 28731/31100 [13:41<01:00, 39.30it/s]
 92%|█████████▏| 28735/31100 [13:41<01:00, 39.32it/s]
 92%|█████████▏| 28740/31100 [13:41<00:59, 39.61it/s]
 92%|█████████▏| 28744/31100 [13:41<00:59, 39.44it/s]
 92%|█████████▏| 28748/31100 [13:41<01:00, 39.16it/s]
 92%|█████████▏| 28752/31100 [13:41<00:59, 39.27it/s]
 92%|█████████▏| 28756/31100 [13:42<01:00, 39.02it/s]
 92%|█████████▏| 28760/31100 [13:42<01:01, 38.24it/s]
 92%|█████████▏| 28764/31100 [13:42<01:00, 38.37it/s]
 93%|█████████▎| 28768/31100 [13:42<01:00, 38.30it/s]
 93%|█████████▎| 28772/31100 [13:42<01:01, 37.75it/s]
 93%|█████████▎| 28776/31100 [13:42<01:01, 37.79it/s]
 93%|█████████▎| 28780/31100 [13:42<01:01, 37.79it/s]
 93%|█████████▎| 28784/31100 [13:42<01:01, 37.71it/s]
 93%|█████████▎| 28788/31100 [13:42<01:00, 38.03it/s]
 93%|█████████▎| 28792/31100 [13:42<01:01, 37

[2m[36m(_objective pid=49074)[0m {'loss': 0.0514, 'learning_rate': 1.5689850188946154e-06, 'epoch': 4.66}


[2m[36m(_objective pid=49074)[0m  93%|█████████▎| 29004/31100 [13:48<00:57, 36.40it/s]
 93%|█████████▎| 29008/31100 [13:48<00:57, 36.29it/s]
 93%|█████████▎| 29012/31100 [13:49<00:57, 36.01it/s]
 93%|█████████▎| 29016/31100 [13:49<00:57, 36.44it/s]
 93%|█████████▎| 29020/31100 [13:49<00:55, 37.24it/s]
 93%|█████████▎| 29025/31100 [13:49<00:56, 36.42it/s]
 93%|█████████▎| 29029/31100 [13:49<00:57, 36.08it/s]
 93%|█████████▎| 29033/31100 [13:49<00:56, 36.52it/s]
 93%|█████████▎| 29037/31100 [13:49<00:55, 36.87it/s]
 93%|█████████▎| 29041/31100 [13:49<00:55, 36.80it/s]
 93%|█████████▎| 29045/31100 [13:49<00:55, 36.84it/s]
 93%|█████████▎| 29049/31100 [13:50<00:55, 36.67it/s]
 93%|█████████▎| 29053/31100 [13:50<00:56, 36.49it/s]
 93%|█████████▎| 29057/31100 [13:50<00:56, 36.47it/s]
 93%|█████████▎| 29061/31100 [13:50<00:55, 36.52it/s]
 93%|█████████▎| 29065/31100 [13:50<00:55, 36.38it/s]
 93%|█████████▎| 29069/31100 [13:50<00:57, 35.11it/s]
 93%|█████████▎| 29073/31100 [13:50<00:58, 34

[2m[36m(_objective pid=49074)[0m {'loss': 0.0618, 'learning_rate': 1.1954171572530402e-06, 'epoch': 4.74}


[2m[36m(_objective pid=49074)[0m  95%|█████████▍| 29506/31100 [14:02<00:44, 36.18it/s]
 95%|█████████▍| 29510/31100 [14:02<00:43, 36.31it/s]
 95%|█████████▍| 29514/31100 [14:02<00:43, 36.70it/s]
 95%|█████████▍| 29518/31100 [14:02<00:42, 37.11it/s]
 95%|█████████▍| 29522/31100 [14:02<00:43, 36.65it/s]
 95%|█████████▍| 29526/31100 [14:02<00:43, 35.91it/s]
 95%|█████████▍| 29530/31100 [14:02<00:42, 36.83it/s]
 95%|█████████▍| 29534/31100 [14:02<00:44, 35.34it/s]
 95%|█████████▍| 29538/31100 [14:03<00:42, 36.34it/s]
 95%|█████████▍| 29542/31100 [14:03<00:44, 35.17it/s]
 95%|█████████▌| 29546/31100 [14:03<00:42, 36.17it/s]
 95%|█████████▌| 29550/31100 [14:03<00:41, 36.94it/s]
 95%|█████████▌| 29555/31100 [14:03<00:40, 37.97it/s]
 95%|█████████▌| 29559/31100 [14:03<00:40, 38.14it/s]
 95%|█████████▌| 29563/31100 [14:03<00:41, 37.35it/s]
 95%|█████████▌| 29567/31100 [14:03<00:40, 37.54it/s]
 95%|█████████▌| 29571/31100 [14:03<00:40, 37.33it/s]
 95%|█████████▌| 29575/31100 [14:04<00:40, 37

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:40:01. Total running time: 2hr 19min 40s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2   

[2m[36m(_objective pid=49074)[0m  96%|█████████▌| 29853/31100 [14:11<00:31, 39.12it/s]
 96%|█████████▌| 29857/31100 [14:11<00:32, 38.31it/s]
 96%|█████████▌| 29861/31100 [14:11<00:32, 38.49it/s]
 96%|█████████▌| 29865/31100 [14:11<00:32, 38.54it/s]
 96%|█████████▌| 29870/31100 [14:11<00:31, 38.50it/s]
 96%|█████████▌| 29874/31100 [14:12<00:31, 38.71it/s]
 96%|█████████▌| 29878/31100 [14:12<00:32, 38.09it/s]
 96%|█████████▌| 29882/31100 [14:12<00:31, 38.36it/s]
 96%|█████████▌| 29886/31100 [14:12<00:31, 38.70it/s]
 96%|█████████▌| 29890/31100 [14:12<00:31, 38.85it/s]
 96%|█████████▌| 29894/31100 [14:12<00:31, 38.82it/s]
 96%|█████████▌| 29898/31100 [14:12<00:33, 36.41it/s]
 96%|█████████▌| 29902/31100 [14:12<00:32, 36.75it/s]
 96%|█████████▌| 29906/31100 [14:12<00:31, 37.39it/s]
 96%|█████████▌| 29910/31100 [14:13<00:31, 37.91it/s]
 96%|█████████▌| 29914/31100 [14:13<00:32, 37.02it/s]
 96%|█████████▌| 29918/31100 [14:13<00:31, 37.38it/s]
 96%|█████████▌| 29922/31100 [14:13<00:31, 37

[2m[36m(_objective pid=49074)[0m {'loss': 0.0574, 'learning_rate': 8.218492956114651e-07, 'epoch': 4.82}


[2m[36m(_objective pid=49074)[0m                                                       96%|█████████▋| 30000/31100 [14:15<00:32, 33.92it/s] 96%|█████████▋| 30002/31100 [14:15<00:31, 34.64it/s]
 96%|█████████▋| 30006/31100 [14:15<00:31, 35.21it/s]
 96%|█████████▋| 30010/31100 [14:15<00:30, 36.20it/s]
 97%|█████████▋| 30014/31100 [14:15<00:29, 36.93it/s]
 97%|█████████▋| 30018/31100 [14:16<00:29, 36.81it/s]
 97%|█████████▋| 30022/31100 [14:16<00:29, 36.63it/s]
 97%|█████████▋| 30026/31100 [14:16<00:28, 37.35it/s]
 97%|█████████▋| 30030/31100 [14:16<00:28, 37.71it/s]
 97%|█████████▋| 30034/31100 [14:16<00:28, 38.00it/s]
 97%|█████████▋| 30042/31100 [14:16<00:27, 38.06it/s]
 97%|█████████▋| 30046/31100 [14:16<00:27, 38.18it/s]
 97%|█████████▋| 30050/31100 [14:16<00:27, 38.51it/s]
 97%|█████████▋| 30054/31100 [14:16<00:27, 38.66it/s]
 97%|█████████▋| 30058/31100 [14:17<00:27, 38.11it/s]
 97%|█████████▋| 30062/31100 [14:17<00:26, 38.59it/s]
 97%|█████████▋| 30067/31100 [14:17<00:26, 3

[2m[36m(_objective pid=49074)[0m {'loss': 0.0713, 'learning_rate': 4.482814339698901e-07, 'epoch': 4.9}


[2m[36m(_objective pid=49074)[0m  98%|█████████▊| 30504/31100 [14:29<00:15, 38.16it/s]
 98%|█████████▊| 30508/31100 [14:29<00:15, 38.19it/s]
 98%|█████████▊| 30512/31100 [14:29<00:15, 38.00it/s]
 98%|█████████▊| 30516/31100 [14:29<00:15, 37.96it/s]
 98%|█████████▊| 30520/31100 [14:29<00:27, 21.09it/s]
 98%|█████████▊| 30525/31100 [14:29<00:22, 25.33it/s]
 98%|█████████▊| 30529/31100 [14:30<00:20, 28.04it/s]
 98%|█████████▊| 30533/31100 [14:30<00:18, 30.55it/s]
 98%|█████████▊| 30537/31100 [14:30<00:17, 32.71it/s]
 98%|█████████▊| 30542/31100 [14:30<00:15, 35.18it/s]
 98%|█████████▊| 30546/31100 [14:30<00:15, 35.89it/s]
 98%|█████████▊| 30551/31100 [14:30<00:14, 37.31it/s]
 98%|█████████▊| 30555/31100 [14:30<00:14, 37.18it/s]
 98%|█████████▊| 30559/31100 [14:30<00:14, 37.74it/s]
 98%|█████████▊| 30563/31100 [14:30<00:14, 37.80it/s]
 98%|█████████▊| 30568/31100 [14:31<00:13, 38.63it/s]
 98%|█████████▊| 30572/31100 [14:31<00:13, 38.99it/s]
 98%|█████████▊| 30576/31100 [14:31<00:14, 37

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-11 15:40:31. Total running time: 2hr 20min 10s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_f556c_00019   RUNNING          2.32359e-05                    5                        2   

[2m[36m(_objective pid=49074)[0m 100%|█████████▉| 30965/31100 [14:41<00:03, 38.85it/s]
100%|█████████▉| 30969/31100 [14:41<00:03, 38.38it/s]
100%|█████████▉| 30973/31100 [14:41<00:03, 37.68it/s]
100%|█████████▉| 30977/31100 [14:41<00:03, 37.86it/s]
100%|█████████▉| 30981/31100 [14:41<00:03, 38.14it/s]
100%|█████████▉| 30985/31100 [14:42<00:03, 37.67it/s]
100%|█████████▉| 30989/31100 [14:42<00:02, 37.75it/s]
100%|█████████▉| 30993/31100 [14:42<00:02, 37.96it/s]
100%|█████████▉| 30997/31100 [14:42<00:02, 38.27it/s]
100%|█████████▉| 31001/31100 [14:42<00:02, 37.37it/s]


[2m[36m(_objective pid=49074)[0m {'loss': 0.0353, 'learning_rate': 7.471357232831501e-08, 'epoch': 4.98}


[2m[36m(_objective pid=49074)[0m 100%|█████████▉| 31005/31100 [14:42<00:02, 37.92it/s]
100%|█████████▉| 31009/31100 [14:42<00:02, 38.08it/s]
100%|█████████▉| 31013/31100 [14:42<00:02, 37.23it/s]
100%|█████████▉| 31017/31100 [14:42<00:02, 37.08it/s]
100%|█████████▉| 31021/31100 [14:43<00:02, 36.77it/s]
100%|█████████▉| 31025/31100 [14:43<00:02, 36.00it/s]
100%|█████████▉| 31029/31100 [14:43<00:01, 36.95it/s]
100%|█████████▉| 31033/31100 [14:43<00:01, 37.56it/s]
100%|█████████▉| 31037/31100 [14:43<00:01, 37.22it/s]
100%|█████████▉| 31041/31100 [14:43<00:01, 36.42it/s]
100%|█████████▉| 31045/31100 [14:43<00:01, 37.41it/s]
100%|█████████▉| 31049/31100 [14:43<00:01, 37.44it/s]
100%|█████████▉| 31053/31100 [14:43<00:01, 37.83it/s]
100%|█████████▉| 31057/31100 [14:43<00:01, 37.74it/s]
100%|█████████▉| 31061/31100 [14:44<00:01, 38.35it/s]
100%|█████████▉| 31065/31100 [14:44<00:00, 37.40it/s]
100%|█████████▉| 31069/31100 [14:44<00:00, 36.57it/s]
100%|█████████▉| 31073/31100 [14:44<00:00, 36

Trial _objective_f556c_00019 finished iteration 5 at 2023-09-11 15:40:44. Total running time: 2hr 20min 23s
+-------------------------------------------------+
| Trial _objective_f556c_00019 result             |
+-------------------------------------------------+
| time_this_iter_s                        177.443 |
| time_total_s                            897.165 |
| training_iteration                            5 |
| epoch                                         5 |
| eval_loss                               0.30379 |
| eval_runtime                              9.551 |
| eval_samples_per_second                 434.196 |
| eval_steps_per_second                    13.611 |
| objective                               0.30379 |
+-------------------------------------------------+

[2m[36m(_objective pid=49074)[0m {'eval_loss': 0.30379343032836914, 'eval_runtime': 9.551, 'eval_samples_per_second': 434.196, 'eval_steps_per_second': 13.611, 'epoch': 5.0}


[2m[36m(_objective pid=49074)[0m                                                      
[2m[36m(_objective pid=49074)[0m                                                  [A100%|██████████| 31100/31100 [14:54<00:00, 35.98it/s]
[2m[36m(_objective pid=49074)[0m 100%|██████████| 130/130 [00:09<00:00, 12.00it/s][A
[2m[36m(_objective pid=49074)[0m                                                  [A


Trial _objective_f556c_00019 completed after 5 iterations at 2023-09-11 15:40:46. Total running time: 2hr 20min 25s

Trial status: 20 TERMINATED
Current time: 2023-09-11 15:40:46. Total running time: 2hr 20min 25s
Logical resource usage: 1.0/8 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs     ..._train_batch_size     weight_decay     adam_epsilon     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+

[2m[36m(_objective pid=49074)[0m                                                      100%|██████████| 31100/31100 [14:56<00:00, 35.98it/s]100%|██████████| 31100/31100 [14:56<00:00, 34.70it/s]


In [None]:
best_run

BestRun(run_id='f556c_00003', objective=0.18533514440059662, hyperparameters={'learning_rate': 4.754210836063001e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 4, 'weight_decay': 0.29766346778736524, 'adam_epsilon': 2.9507066707905336e-08, 'per_device_eval_batch_size': 32}, run_summary=<ray.tune.analysis.experiment_analysis.ExperimentAnalysis object at 0x78afd630c790>)

In [12]:
best_run_hyperparameters = {'learning_rate': 4.754210836063001e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 4, 'weight_decay': 0.29766346778736524, 'adam_epsilon': 2.9507066707905336e-08, 'per_device_eval_batch_size': 32}
for n, v in best_run_hyperparameters.items(): # it should be best_run.hyperparameters but in purpose of skipping search phase I put hyperparameters manually
    setattr(trainer_search.args, n, v)
setattr(trainer_search, 'train_dataset', tokenized_dataset["train"].shard(index=1, num_shards=1 / (10 * scale))) # working with even 20% of dataset takes 30 mins on V100

trainer_search.train()

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_projector.bias', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'classifier.weight', 'pre_classifier.bias', 'classifier.

Epoch,Training Loss,Validation Loss
1,0.1241,0.231639
2,0.0568,0.115649


TrainOutput(global_step=62192, training_loss=0.14396206925330568, metrics={'train_runtime': 1779.2385, 'train_samples_per_second': 139.815, 'train_steps_per_second': 34.954, 'total_flos': 5517442052384688.0, 'train_loss': 0.14396206925330568, 'epoch': 2.0})

In [13]:
trainer_search.evaluate()

{'eval_loss': 0.11564943939447403,
 'eval_runtime': 9.5508,
 'eval_samples_per_second': 434.202,
 'eval_steps_per_second': 13.611,
 'epoch': 2.0}

# Result on test dataset

In [14]:
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
  predictions, labels = eval_pred
  predictions = np.argmax(predictions, axis=1)
  return metric.compute(predictions=predictions, references=labels)

test_dataset = tokenized_dataset["test"].shard(index=1, num_shards=1 / (10 * scale))
predictions = trainer_search.predict(test_dataset)
print(compute_metrics((predictions.predictions, test_dataset['labels'])))

{'accuracy': 0.9777622343889438}


Now using real repository

In [15]:
!pip install pydriller

Collecting pydriller
  Downloading PyDriller-2.5.1-py3-none-any.whl (33 kB)
Collecting types-pytz (from pydriller)
  Downloading types_pytz-2023.3.0.1-py3-none-any.whl (4.9 kB)
Collecting lizard (from pydriller)
  Downloading lizard-1.17.10-py2.py3-none-any.whl (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.0/66.0 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: types-pytz, lizard, pydriller
Successfully installed lizard-1.17.10 pydriller-2.5.1 types-pytz-2023.3.0.1


I assumed that author is bot if and only if he has 'bot' keyword in his name

In [24]:
from pydriller import Repository

def eval_repo(repo):
  data = []
  for commit in Repository(repo).traverse_commits():
      is_bot = 0
      if commit.author.name.lower().find('bot') >= 0:
        is_bot = 1
      data.append([commit.msg, is_bot])
  df = pandas.DataFrame(data)
  df.to_csv('real_train.csv')
  train_dataset = datasets.load_dataset('csv', data_files='real_train.csv')
  tokenized_train_dataset = train_dataset.map(tokenize_function, batched=True)
  tokenized_train_dataset = tokenized_train_dataset.remove_columns(['Unnamed: 0', '0'])
  tokenized_train_dataset = tokenized_train_dataset.rename_column('1', 'labels')
  metric = evaluate.load("accuracy")

  def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return metric.compute(predictions=predictions, references=labels)

  predictions = trainer_search.predict(tokenized_train_dataset['train'])
  print(compute_metrics((predictions.predictions, tokenized_train_dataset['train']['labels'])))
  print("Number of bot commits predicted:", np.sum(np.argmax(predictions.predictions, axis=1)))
  print("Number of bot commits in repo:", np.sum(np.array(tokenized_train_dataset['train']['labels'])))
  print("Number of commits in repo:", len(np.array(tokenized_train_dataset['train']['labels'])))

In [21]:
repo1 = 'https://github.com/godotengine/godot'
repo2 = 'https://github.com/halirutan/IntelliJ-Key-Promoter-X'

In [25]:
eval_repo(repo1)

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/57310 [00:00<?, ? examples/s]

{'accuracy': 0.8812074681556448}
Number of bot commits predicted: 6798
Number of bot commits in repo: 14
Number of commits in repo: 57310


In [26]:
eval_repo(repo2)

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/233 [00:00<?, ? examples/s]

{'accuracy': 0.9055793991416309}
Number of bot commits predicted: 22
Number of bot commits in repo: 0
Number of commits in repo: 233
