In [None]:
import torch

# If there's a GPU available...
if torch.cuda.is_available():

    # Tell PyTorch to use the GPU.
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: Tesla V100-SXM2-16GB


In [None]:
# Install huggingface library
!pip install -U torch ray==2.6.3 transformers hyperopt accelerate

Collecting ray==2.6.3
  Downloading ray-2.6.3-cp310-cp310-manylinux2014_x86_64.whl (56.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.9/56.9 MB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting transformers
  Downloading transformers-4.33.2-py3-none-any.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m96.7 MB/s[0m eta [36m0:00:00[0m
Collecting accelerate
  Downloading accelerate-0.23.0-py3-none-any.whl (258 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m258.1/258.1 kB[0m [31m25.6 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.15.1 (from transformers)
  Downloading huggingface_hub-0.17.2-py3-none-any.whl (294 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.9/294.9 kB[0m [31m31.4 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.m

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
import random
import numpy as np

# Set the seed value all over the place to make this reproducible.
seed_val = 42

random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

In [None]:
# read here your dataset
import pandas as pd

columns_to_read = ["Text", "greenyesno"]

# Load the dataset into a pandas dataframe.
#df = pd.read_csv("/content/gdrive/My Drive/bert/dataset/in_domain_train.tsv", delimiter='\t', header=None, names=['sentence_source', 'label', 'label_notes', 'sentence'])
#df = pd.read_csv("/content/gdrive/My Drive/bert/dataset/eco_dataset.tsv", delimiter='\t', usecols=columns_to_read)
df = pd.read_csv("./eco_dataset_clean.tsv", delimiter='\t', usecols=columns_to_read)
#df = pd.read_csv("./eco_dataset_clean.tsv", delimiter='\t', usecols=columns_to_read)
df['greenyesno'] = df['greenyesno'].replace({'Eco-related': 1, 'Not eco-related': 0})
df.rename(columns={'Text': 'sentence'}, inplace=True)
df.rename(columns={'greenyesno': 'label'}, inplace=True)

In [None]:
df.head()

Unnamed: 0,sentence,label
0,For environmentalists who want to reduce their...,1.0
1,🔴 Moscow’s mayor has urged residents to stay i...,0.0
2,"Who in their right mind spends £70,000+ on a c...",1.0
3,Shell ranks in the top 10 among the 90 compani...,1.0
4,"We're days away from #COP27, where world leade...",1.0


In [None]:
# get here your sentences and labels
sentences = df.sentence.values
labels = df.label.values

In [None]:
sentences[0]

'For environmentalists who want to reduce their own household waste without assigning themselves more chores, there is a simple option. This @Wirecutter-approved, easy-to-use container does the hard work of composting for you. [URL]'

In [None]:
labels[0]

1.0

In [None]:
# BERT tokenizer: To feed our text to BERT, it must be split into tokens, and then these tokens must be mapped to their index in the tokenizer vocabulary
from transformers import AutoTokenizer

huggingface_model_name = 'bert-base-uncased'
#huggingface_model_name = 'distilbert-base-uncased'

# Load the BERT tokenizer.
print('Loading tokenizer...')
tokenizer = AutoTokenizer.from_pretrained(huggingface_model_name, do_lower_case=True)  # it will download and save it in a cache local directory

Loading tokenizer...


Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [None]:
tokenizer

BertTokenizerFast(name_or_path='bert-base-uncased', vocab_size=30522, model_max_length=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True)

In [None]:
# Get the maximum length of the sentences in the dataset
max_len = 0

# For every sentence...
for sent in sentences:

    # Tokenize the text and add `[CLS]` and `[SEP]` tokens.
    input_ids = tokenizer.encode(sent, add_special_tokens=True)

    # Update the maximum sentence length.
    max_len = max(max_len, len(input_ids))

print('Max sentence length: ', max_len)

Max sentence length:  223


In [None]:
# let's encode the dataset
max_length = 128 # instead of 47, just in case there are some longer test sentences

input_ids = []
attention_masks = []
lab_tensor = torch.zeros((len(sentences), 2))

# For every sentence...
for i, sent in enumerate(sentences):
    # `encode_plus` will:
    #   (1) Tokenize the sentence.
    #   (2) Prepend the `[CLS]` token to the start.
    #   (3) Append the `[SEP]` token to the end.
    #   (4) Map tokens to their IDs.
    #   (5) Pad or truncate the sentence to `max_length`
    #   (6) Create attention masks for [PAD] tokens.
    encoded_dict = tokenizer(
                        sent,                      # Sentence to encode.
                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                        max_length = max_length,           # Pad & truncate all sentences.
                        padding='max_length',
                        return_tensors = 'pt',     # Return pytorch tensors.
                        truncation=True,
                   )

    # Add the encoded sentence to the list.
    input_ids.append(encoded_dict['input_ids'])

    # And its attention mask (simply differentiates padding from non-padding).
    attention_masks.append(encoded_dict['attention_mask'])

    # add label to lab_tensor
    if labels[i] == 0.0:
      lab_tensor[i, 0] = 1
    elif labels[i] == 1.0:
      lab_tensor[i, 1] = 1

# Convert the lists into tensors.
input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)
#labels = torch.tensor(labels).long()    # Added here .long()

# Print sentence 0, now as a list of IDs.
print('Original: ', sentences[0])
print('Token IDs:', input_ids[0])
print('Labels:', lab_tensor[0])

Original:  For environmentalists who want to reduce their own household waste without assigning themselves more chores, there is a simple option. This @Wirecutter-approved, easy-to-use container does the hard work of composting for you. [URL]
Token IDs: tensor([  101,  2005,  4483,  5130,  2040,  2215,  2000,  5547,  2037,  2219,
         4398,  5949,  2302, 23911,  2075,  3209,  2062, 27091,  1010,  2045,
         2003,  1037,  3722,  5724,  1012,  2023,  1030,  7318, 12690,  3334,
         1011,  4844,  1010,  3733,  1011,  2000,  1011,  2224, 11661,  2515,
         1996,  2524,  2147,  1997,  4012, 19894,  2075,  2005,  2017,  1012,
         1031, 24471,  2140,  1033,   102,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,  

In [None]:
# the labels now are long
torch.set_printoptions(threshold=10)
labels

array([1., 0., 1., ..., 1., 1., 0.])

In [None]:
import torch
from torch.utils.data import IterableDataset
from torch.utils.data import TensorDataset, random_split

class MyDataLoader(IterableDataset):

  def __init__(self, ids, mask, labels):
    super(MyDataLoader).__init__()
    self._ids = ids
    self._mask = mask
    self._labels = labels

  def __len__(self):
    return self._ids.size(dim=0)

  def __getitem__(self, idx):
    item = dict()
    item["input_ids"] = torch.Tensor(self._ids[idx])
    item["attention_mask"] = torch.Tensor(self._mask[idx])
    item["labels"] = self._labels[idx, :]
    return item

In [None]:
# training and validation split - 90% train and 10% valid
from torch.utils.data import TensorDataset, random_split

# Combine the training inputs into a TensorDataset.
dataset = MyDataLoader(input_ids, attention_masks, lab_tensor) #TensorDataset(input_ids, attention_masks, labels)

# Create a 90-10 train-validation split.

# Calculate the number of samples to include in each set.
train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size

# Divide the dataset by randomly selecting samples.
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

print('{:>5,} training samples'.format(train_size))
print('{:>5,} validation samples'.format(val_size))

2,523 training samples
  281 validation samples


In [None]:
from transformers import EvalPrediction
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score

def compute_metrics(p: EvalPrediction):
  y_true = p.label_ids
  preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
  y_pred = torch.zeros(preds.shape)
  args = torch.argmax(torch.Tensor(preds), dim=1)
  y_pred[:,args[:]] = 1
  precision = precision_score(y_true=y_true, y_pred=y_pred, average='micro')
  recall = recall_score(y_true=y_true, y_pred=y_pred, average='micro')
  f1_micro_average = f1_score(y_true=y_true, y_pred=y_pred, average='micro')
  roc_auc = roc_auc_score(y_true, y_pred, average='micro')
  accuracy = accuracy_score(y_true, y_pred)
  metrics = {'p': precision,
             'r': recall,
             'f1': f1_micro_average,
             'roc_auc': roc_auc,
             'accuracy': accuracy}
  return metrics

In [None]:
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

# The DataLoader needs to know our batch size for training, so we specify it
# here. For fine-tuning BERT on a specific task, the authors recommend a batch
# size of 16 or 32.
batch_size = 32

# Create the DataLoaders for our training and validation sets.
# We'll take training samples in random order.

train_dataloader = DataLoader(
            train_dataset,  # The training samples.
            sampler = RandomSampler(train_dataset), # Select batches randomly
            batch_size = batch_size # Train with this batch size.
        )

# For validation the order doesn't matter, so we'll just read them sequentially.
validation_dataloader = DataLoader(
            val_dataset, # The validation samples.
            sampler = SequentialSampler(val_dataset), # Pull out batches sequentially.
            batch_size = batch_size # Evaluate with this batch size.
        )



In [None]:
# define the model - we will use BERTForSequenceClassification because it has the same BERT architecture but with a single classification layer on top
from transformers import AutoModelForSequenceClassification

# Load BertForSequenceClassification, the pretrained BERT model with a single
# linear classification layer on top.

def my_model_init():
  model = AutoModelForSequenceClassification.from_pretrained(        # use DistilBertForSequenceClassification if you want
      huggingface_model_name,
      num_labels = 2, # The number of output labels--2 for binary classification.
                      # You can increase this for multi-class tasks.
      output_attentions = False, # Whether the model returns attentions weights.
      output_hidden_states = False, # Whether the model returns all hidden-states.
      return_dict=True
  )

  for name, param in model.named_parameters():
    if 'Bert' in name:
      param.requires_grad = False

  model.to(device)

  return model

# move the model on cpu or on gpu
#model.to(device)

In [None]:
# Freeze the BERT model parameters
#for name, param in model.named_parameters():
#  if 'Bert' in name:
#    param.requires_grad = False

In [None]:
from contextlib import suppress
from transformers import Trainer, TrainingArguments
from ray import tune
#from ray.train import CheckpointConfig
from ray.tune import CLIReporter
from ray.tune.examples.pbt_transformers.utils import (
    download_data,
    build_compute_metrics_fn,
)
from ray.tune.schedulers import PopulationBasedTraining


batch_size = 16
num_epochs = 10

lr = 2e-5
eps= 1e-8
adam_beta_1 = 0.9
adam_beta_2 = 0.999
warmup_steps = len(train_dataset) * num_epochs



training_args = TrainingArguments(output_dir="trainer",
                                  overwrite_output_dir=True,
                                  do_train=True,
                                  do_eval=True,
                                  evaluation_strategy='epoch',
                                  per_device_train_batch_size=batch_size,
                                  per_device_eval_batch_size=batch_size,
                                  learning_rate=lr,
                                  adam_beta1=adam_beta_1,
                                  adam_beta2=adam_beta_2,
                                  adam_epsilon=eps,
                                  lr_scheduler_type='linear',
                                  warmup_steps=warmup_steps,
                                  num_train_epochs=num_epochs,
                                  save_strategy='epoch',
                                  save_total_limit=5,
                                  load_best_model_at_end=True,
                                  #metric_for_best_model='eval_accuracy',
                                  logging_strategy='epoch')


trainer = Trainer(
    model_init=my_model_init,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics
)

#trainer.train()

Downloading model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
from ray.tune.search.hyperopt import HyperOptSearch
from ray.tune.schedulers import ASHAScheduler

best_trial = trainer.hyperparameter_search(
    direction="maximize",
    backend="ray",
    # Choose among many libraries:
    # https://docs.ray.io/en/latest/tune/api_docs/suggestion.html
    search_alg=HyperOptSearch(metric="objective", mode="max"),
    # Choose among schedulers:
    # https://docs.ray.io/en/latest/tune/api_docs/schedulers.html
    scheduler=ASHAScheduler(metric="objective", mode="max"))

2023-09-25 12:54:53,980	INFO tune.py:666 -- [output] This will use the new output engine with verbosity 2. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+-------------------------------------------------------------------+
| Configuration for experiment     _objective_2023-09-25_12-54-53   |
+-------------------------------------------------------------------+
| Search algorithm                 SearchGenerator                  |
| Scheduler                        AsyncHyperBandScheduler          |
| Number of trials                 20                               |
+-------------------------------------------------------------------+

View detailed results here: /root/ray_results/_objective_2023-09-25_12-54-53

Trial status: 1 PENDING
Current time: 2023-09-25 12:54:54. Total running time: 0s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+----------------------------------------------------------------------------------------------------------+
| Trial name            status       learning_rate     num_train_epochs      seed     ..._train_batch_size |
+---------------------------------------------------



Trial _objective_6c47dcc5 started with configuration:
+----------------------------------------------+
| Trial _objective_6c47dcc5 config             |
+----------------------------------------------+
| learning_rate                              0 |
| num_train_epochs                           3 |
| per_device_train_batch_size               64 |
| seed                                 17.2138 |
+----------------------------------------------+



[2m[36m(_objective pid=3684)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
[2m[36m(_objective pid=3684)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/120 [00:00<?, ?it/s]
  1%|          | 1/120 [00:00<01:10,  1.69it/s]
  2%|▏         | 2/120 [00:00<00:54,  2.16it/s]
  2%|▎         | 3/120 [00:01<00:49,  2.36it/s]
  3%|▎         | 4/120 [00:01<00:47,  2.46it/s]
  4%|▍         | 5/120 [00:02<00:45,  2.52it/s]
  5%|▌         | 6/120 [00:02<00:44,  2.56it/s]
  6%|▌         | 7/120 [00:02<00:43,  2.58it/s]
  7%|▋         | 8/120 [00:03<00:43,  2.59it/s]
  8%|▊         | 9/120 [00:03<00:42,  2.61it/s]
  8%|▊         | 10/120 [00:04<00:42,  2.61it/s]
  9%|▉         | 11/120 [00:04<00:41,  2.61it/s]
 10%|█         | 12/120 [00:04<00:41,  2.61it/s]
 11%|█         | 1

[2m[36m(_objective pid=3684)[0m {'loss': 0.7045, 'learning_rate': 5.976066750135957e-09, 'epoch': 1.0}


[2m[36m(_objective pid=3684)[0m 
[2m[36m(_objective pid=3684)[0m  44%|████▍     | 8/18 [00:00<00:00, 30.15it/s][A
[2m[36m(_objective pid=3684)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 29.35it/s][A
[2m[36m(_objective pid=3684)[0m 
 83%|████████▎ | 15/18 [00:00<00:00, 29.05it/s][A
[2m[36m(_objective pid=3684)[0m 
100%|██████████| 18/18 [00:00<00:00, 29.07it/s][A


Trial _objective_6c47dcc5 finished iteration 1 at 2023-09-25 12:55:22. Total running time: 28s
+----------------------------------------------+
| Trial _objective_6c47dcc5 result             |
+----------------------------------------------+
| time_this_iter_s                     19.2814 |
| time_total_s                         19.2814 |
| training_iteration                         1 |
| epoch                                      1 |
| eval_accuracy                        0.51246 |
| eval_f1                              0.51246 |
| eval_loss                            0.70687 |
| eval_p                               0.51246 |
| eval_r                               0.51246 |
| eval_roc_auc                         0.51246 |
| eval_runtime                          0.6847 |
| eval_samples_per_second              410.371 |
| eval_steps_per_second                 26.287 |
| objective                            2.56228 |
+----------------------------------------------+

[2m[36m(_objective p

[2m[36m(_objective pid=3684)[0m                                                 
[2m[36m(_objective pid=3684)[0m                                                [A 33%|███▎      | 40/120 [00:16<00:26,  3.07it/s]
[2m[36m(_objective pid=3684)[0m 100%|██████████| 18/18 [00:00<00:00, 29.07it/s][A
[2m[36m(_objective pid=3684)[0m                                                [A


Trial status: 1 RUNNING | 1 PENDING
Current time: 2023-09-25 12:55:24. Total running time: 30s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status       learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6c47dcc5   RUNNING        3.7694e-06                     3   17.2137                        64        1            19.2814       2.56228      0.706871   0.512456   0.512456 |
| _objective_c691cc98   PENDING        3.04275e-05                    2   

 34%|███▍      | 41/120 [00:22<03:07,  2.38s/it]
 35%|███▌      | 42/120 [00:22<02:18,  1.78s/it]
 36%|███▌      | 43/120 [00:23<01:44,  1.36s/it]
 37%|███▋      | 44/120 [00:23<01:21,  1.07s/it]
 38%|███▊      | 45/120 [00:24<01:04,  1.16it/s]
 38%|███▊      | 46/120 [00:24<00:53,  1.39it/s]
 39%|███▉      | 47/120 [00:24<00:44,  1.62it/s]
 40%|████      | 48/120 [00:25<00:39,  1.83it/s]
 41%|████      | 49/120 [00:25<00:35,  2.01it/s]
 42%|████▏     | 50/120 [00:25<00:32,  2.16it/s]
 42%|████▎     | 51/120 [00:26<00:30,  2.28it/s]
 43%|████▎     | 52/120 [00:26<00:28,  2.37it/s]
 44%|████▍     | 53/120 [00:27<00:27,  2.44it/s]
 45%|████▌     | 54/120 [00:27<00:26,  2.47it/s]
 46%|████▌     | 55/120 [00:27<00:25,  2.50it/s]
 47%|████▋     | 56/120 [00:28<00:25,  2.53it/s]
 48%|████▊     | 57/120 [00:28<00:24,  2.53it/s]
 48%|████▊     | 58/120 [00:29<00:24,  2.54it/s]
 49%|████▉     | 59/120 [00:29<00:23,  2.56it/s]
 50%|█████     | 60/120 [00:29<00:23,  2.56it/s]
 51%|█████     | 61/

[2m[36m(_objective pid=3684)[0m {'loss': 0.7071, 'learning_rate': 1.1952133500271914e-08, 'epoch': 2.0}


[2m[36m(_objective pid=3684)[0m 
[2m[36m(_objective pid=3684)[0m   0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=3684)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 38.67it/s][A
[2m[36m(_objective pid=3684)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 32.23it/s][A
[2m[36m(_objective pid=3684)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 30.67it/s][A


Trial _objective_6c47dcc5 finished iteration 2 at 2023-09-25 12:55:44. Total running time: 50s
+----------------------------------------------+
| Trial _objective_6c47dcc5 result             |
+----------------------------------------------+
| time_this_iter_s                     21.9678 |
| time_total_s                         41.2493 |
| training_iteration                         2 |
| epoch                                      2 |
| eval_accuracy                        0.51246 |
| eval_f1                              0.51246 |
| eval_loss                            0.70664 |
| eval_p                               0.51246 |
| eval_r                               0.51246 |
| eval_roc_auc                         0.51246 |
| eval_runtime                          0.6195 |
| eval_samples_per_second              453.617 |
| eval_steps_per_second                 29.057 |
| objective                            2.56228 |
+----------------------------------------------+

[2m[36m(_objective p

[2m[36m(_objective pid=3684)[0m 
[2m[36m(_objective pid=3684)[0m  89%|████████▉ | 16/18 [00:00<00:00, 30.11it/s][A                                                
[2m[36m(_objective pid=3684)[0m                                                [A 67%|██████▋   | 80/120 [00:37<00:12,  3.09it/s]
[2m[36m(_objective pid=3684)[0m 100%|██████████| 18/18 [00:00<00:00, 30.11it/s][A
                                               [A
 68%|██████▊   | 81/120 [00:43<01:16,  1.96s/it]
 68%|██████▊   | 82/120 [00:43<00:56,  1.49s/it]
 69%|██████▉   | 83/120 [00:43<00:42,  1.16s/it]
 70%|███████   | 84/120 [00:44<00:33,  1.08it/s]
 71%|███████   | 85/120 [00:44<00:26,  1.31it/s]
 72%|███████▏  | 86/120 [00:45<00:22,  1.54it/s]
 72%|███████▎  | 87/120 [00:45<00:18,  1.76it/s]
 73%|███████▎  | 88/120 [00:45<00:16,  1.95it/s]
 74%|███████▍  | 89/120 [00:46<00:14,  2.11it/s]
 75%|███████▌  | 90/120 [00:46<00:13,  2.24it/s]
 76%|███████▌  | 91/120 [00:46<00:12,  2.32it/s]
 77%|███████▋ 

Trial status: 1 RUNNING | 1 PENDING
Current time: 2023-09-25 12:55:54. Total running time: 1min 0s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status       learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6c47dcc5   RUNNING        3.7694e-06                     3   17.2137                        64        2            41.2493       2.56228      0.706637   0.512456   0.512456 |
| _objective_c691cc98   PENDING        3.04275e-05                    

 78%|███████▊  | 94/120 [00:48<00:10,  2.47it/s]
 79%|███████▉  | 95/120 [00:48<00:09,  2.50it/s]
 80%|████████  | 96/120 [00:48<00:09,  2.52it/s]
 81%|████████  | 97/120 [00:49<00:09,  2.53it/s]
 82%|████████▏ | 98/120 [00:50<00:14,  1.54it/s]
 82%|████████▎ | 99/120 [00:51<00:14,  1.47it/s]
 83%|████████▎ | 100/120 [00:51<00:11,  1.69it/s]
 84%|████████▍ | 101/120 [00:52<00:10,  1.87it/s]
 85%|████████▌ | 102/120 [00:52<00:08,  2.04it/s]
 86%|████████▌ | 103/120 [00:52<00:07,  2.17it/s]
 87%|████████▋ | 104/120 [00:53<00:07,  2.25it/s]
 88%|████████▊ | 105/120 [00:53<00:06,  2.34it/s]
 88%|████████▊ | 106/120 [00:54<00:05,  2.40it/s]
 89%|████████▉ | 107/120 [00:54<00:05,  2.45it/s]
 90%|█████████ | 108/120 [00:54<00:04,  2.49it/s]
 91%|█████████ | 109/120 [00:55<00:04,  2.53it/s]
 92%|█████████▏| 110/120 [00:55<00:03,  2.55it/s]
 92%|█████████▎| 111/120 [00:55<00:03,  2.57it/s]
 93%|█████████▎| 112/120 [00:56<00:03,  2.59it/s]
 94%|█████████▍| 113/120 [00:56<00:02,  2.59it/s]
 95%|█

[2m[36m(_objective pid=3684)[0m {'loss': 0.7044, 'learning_rate': 1.7928200250407868e-08, 'epoch': 3.0}


[2m[36m(_objective pid=3684)[0m 
[2m[36m(_objective pid=3684)[0m  44%|████▍     | 8/18 [00:00<00:00, 32.18it/s][A
[2m[36m(_objective pid=3684)[0m 
[2m[36m(_objective pid=3684)[0m  67%|██████▋   | 12/18 [00:00<00:00, 30.57it/s][A


Trial _objective_6c47dcc5 finished iteration 3 at 2023-09-25 12:56:06. Total running time: 1min 12s
+----------------------------------------------+
| Trial _objective_6c47dcc5 result             |
+----------------------------------------------+
| time_this_iter_s                     21.8723 |
| time_total_s                         63.1216 |
| training_iteration                         3 |
| epoch                                      3 |
| eval_accuracy                        0.51246 |
| eval_f1                              0.51246 |
| eval_loss                            0.70625 |
| eval_p                               0.51246 |
| eval_r                               0.51246 |
| eval_roc_auc                         0.51246 |
| eval_runtime                          0.6234 |
| eval_samples_per_second              450.745 |
| eval_steps_per_second                 28.873 |
| objective                            2.56228 |
+----------------------------------------------+

[2m[36m(_object

[2m[36m(_objective pid=3684)[0m 
[2m[36m(_objective pid=3684)[0m  89%|████████▉ | 16/18 [00:00<00:00, 29.84it/s][A                                                 
[2m[36m(_objective pid=3684)[0m                                                [A100%|██████████| 120/120 [00:59<00:00,  3.10it/s]
[2m[36m(_objective pid=3684)[0m 100%|██████████| 18/18 [00:00<00:00, 29.84it/s][A
[2m[36m(_objective pid=3684)[0m                                                [A


Trial _objective_6c47dcc5 completed after 3 iterations at 2023-09-25 12:56:18. Total running time: 1min 24s

[2m[36m(_objective pid=3684)[0m {'train_runtime': 72.2099, 'train_samples_per_second': 104.819, 'train_steps_per_second': 1.662, 'train_loss': 0.7053444544474284, 'epoch': 3.0}


[2m[36m(_objective pid=3684)[0m                                                  100%|██████████| 120/120 [01:12<00:00,  3.10it/s]100%|██████████| 120/120 [01:12<00:00,  1.66it/s]


Trial status: 1 TERMINATED | 1 PENDING
Current time: 2023-09-25 12:56:24. Total running time: 1min 30s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6c47dcc5   TERMINATED       3.7694e-06                     3   17.2137                        64        3            63.1216       2.56228      0.706246   0.512456   0.512456 |
| _objective_c691cc98   PENDING          3.04275e-05      



Trial _objective_c691cc98 started with configuration:
+----------------------------------------------+
| Trial _objective_c691cc98 config             |
+----------------------------------------------+
| learning_rate                          3e-05 |
| num_train_epochs                           2 |
| per_device_train_batch_size                4 |
| seed                                 9.93058 |
+----------------------------------------------+



[2m[36m(_objective pid=4069)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
[2m[36m(_objective pid=4069)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/1262 [00:00<?, ?it/s]
  0%|          | 1/1262 [00:00<06:56,  3.03it/s]
  0%|          | 3/1262 [00:00<02:51,  7.33it/s]
  0%|          | 5/1262 [00:00<02:18,  9.06it/s]
  1%|          | 7/1262 [00:00<01:56, 10.81it/s]
  1%|          | 9/1262 [00:00<01:43, 12.09it/s]
  1%|          | 11/1262 [00:01<01:37, 12.78it/s]
  1%|          | 13/1262 [00:01<01:33, 13.33it/s]
  1%|          | 15/1262 [00:01<01:31, 13.60it/s]
  1%|▏         | 17/1262 [00:01<01:30, 13.70it/s]
  2%|▏         | 19/1262 [00:01<01:33, 13.32it/s]
  2%|▏         | 21/1262 [00:01<01:33, 13.28it/s]
  2%|▏         | 23/1262 [00:01<01:34, 13.07it/s]
 

Trial status: 1 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 12:56:54. Total running time: 2min 0s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_c691cc98   RUNNING          3.04275e-05                    2    9.93058                        4                                                                               |
| _objective_6c47dcc5   TERMINATED       3.7694

[2m[36m(_objective pid=4069)[0m  26%|██▋       | 333/1262 [00:23<01:14, 12.51it/s]
 27%|██▋       | 335/1262 [00:24<01:13, 12.69it/s]
 27%|██▋       | 337/1262 [00:24<01:06, 13.85it/s]
 27%|██▋       | 339/1262 [00:24<01:02, 14.73it/s]
 27%|██▋       | 341/1262 [00:24<01:00, 15.20it/s]
 27%|██▋       | 343/1262 [00:24<00:58, 15.74it/s]
 27%|██▋       | 345/1262 [00:24<00:58, 15.63it/s]
 27%|██▋       | 347/1262 [00:24<00:56, 16.11it/s]
 28%|██▊       | 349/1262 [00:24<00:55, 16.45it/s]
 28%|██▊       | 351/1262 [00:24<00:54, 16.57it/s]
 28%|██▊       | 353/1262 [00:25<00:54, 16.77it/s]
 28%|██▊       | 355/1262 [00:25<00:53, 16.97it/s]
 28%|██▊       | 357/1262 [00:25<00:53, 16.90it/s]
 28%|██▊       | 359/1262 [00:25<00:53, 16.86it/s]
 29%|██▊       | 361/1262 [00:25<00:53, 16.89it/s]
 29%|██▉       | 363/1262 [00:25<00:55, 16.33it/s]
 29%|██▉       | 365/1262 [00:25<00:55, 16.24it/s]
 29%|██▉       | 367/1262 [00:25<00:53, 16.83it/s]
 29%|██▉       | 369/1262 [00:26<00:52, 17.07i

[2m[36m(_objective pid=4069)[0m {'loss': 0.6796, 'learning_rate': 7.609883764529126e-07, 'epoch': 1.0}


[2m[36m(_objective pid=4069)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 39.48it/s][A
[2m[36m(_objective pid=4069)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 32.91it/s][A
[2m[36m(_objective pid=4069)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 31.16it/s][A
[2m[36m(_objective pid=4069)[0m 
 89%|████████▉ | 16/18 [00:00<00:00, 30.54it/s][A
                                                  
 50%|█████     | 631/1262 [00:43<00:38, 16.30it/s]
100%|██████████| 18/18 [00:00<00:00, 30.54it/s][A
                                               [A


Trial _objective_c691cc98 finished iteration 1 at 2023-09-25 12:57:14. Total running time: 2min 20s
+----------------------------------------------+
| Trial _objective_c691cc98 result             |
+----------------------------------------------+
| time_this_iter_s                     47.8185 |
| time_total_s                         47.8185 |
| training_iteration                         1 |
| epoch                                      1 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                            0.63351 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                          0.6157 |
| eval_samples_per_second              456.395 |
| eval_steps_per_second                 29.235 |
| objective                            2.66667 |
+----------------------------------------------+

[2m[36m(_object

 50%|█████     | 633/1262 [00:50<12:27,  1.19s/it]
 50%|█████     | 635/1262 [00:50<08:52,  1.18it/s]
 50%|█████     | 637/1262 [00:51<07:27,  1.40it/s]
 51%|█████     | 639/1262 [00:51<05:23,  1.93it/s]
 51%|█████     | 641/1262 [00:52<03:56,  2.63it/s]
 51%|█████     | 643/1262 [00:52<02:55,  3.52it/s]
 51%|█████     | 645/1262 [00:52<02:12,  4.65it/s]
 51%|█████▏    | 647/1262 [00:52<01:42,  5.98it/s]
 51%|█████▏    | 649/1262 [00:52<01:22,  7.47it/s]
 52%|█████▏    | 651/1262 [00:52<01:08,  8.88it/s]
 52%|█████▏    | 653/1262 [00:52<01:00, 10.12it/s]
 52%|█████▏    | 655/1262 [00:52<00:55, 11.00it/s]
 52%|█████▏    | 657/1262 [00:53<00:50, 11.89it/s]
 52%|█████▏    | 659/1262 [00:53<00:48, 12.49it/s]
 52%|█████▏    | 661/1262 [00:53<00:47, 12.70it/s]
 53%|█████▎    | 663/1262 [00:53<00:47, 12.67it/s]
 53%|█████▎    | 665/1262 [00:53<00:47, 12.69it/s]


Trial status: 1 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 12:57:24. Total running time: 2min 30s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_c691cc98   RUNNING          3.04275e-05                    2    9.93058                        4        1            47.8185       2.66667      0.633506   0.5        1        |
| _objective_6c47dcc5   TERMINATED       3.769

[2m[36m(_objective pid=4069)[0m  53%|█████▎    | 667/1262 [00:53<00:46, 12.82it/s]
 53%|█████▎    | 669/1262 [00:53<00:44, 13.19it/s]
 53%|█████▎    | 671/1262 [00:54<00:44, 13.38it/s]
 53%|█████▎    | 673/1262 [00:54<00:43, 13.64it/s]
 53%|█████▎    | 675/1262 [00:54<00:43, 13.57it/s]
 54%|█████▎    | 677/1262 [00:54<00:44, 13.07it/s]
 54%|█████▍    | 679/1262 [00:54<00:45, 12.75it/s]
 54%|█████▍    | 681/1262 [00:54<00:45, 12.71it/s]
 54%|█████▍    | 683/1262 [00:54<00:44, 13.08it/s]
 54%|█████▍    | 685/1262 [00:55<00:43, 13.37it/s]
 54%|█████▍    | 687/1262 [00:55<00:41, 13.88it/s]
 55%|█████▍    | 689/1262 [00:55<00:41, 13.64it/s]
 55%|█████▍    | 691/1262 [00:55<00:42, 13.50it/s]
 55%|█████▍    | 693/1262 [00:55<00:41, 13.68it/s]
 55%|█████▌    | 695/1262 [00:55<00:43, 13.02it/s]
 55%|█████▌    | 697/1262 [00:55<00:41, 13.53it/s]
 55%|█████▌    | 699/1262 [00:56<00:40, 13.96it/s]
 56%|█████▌    | 701/1262 [00:56<00:44, 12.74it/s]
 56%|█████▌    | 703/1262 [00:56<00:43, 12.76i

Trial status: 1 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 12:57:54. Total running time: 3min 0s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_c691cc98   RUNNING          3.04275e-05                    2    9.93058                        4        1            47.8185       2.66667      0.633506   0.5        1        |
| _objective_6c47dcc5   TERMINATED       3.7694

[2m[36m(_objective pid=4069)[0m  89%|████████▉ | 1123/1262 [01:23<00:07, 17.99it/s]
 89%|████████▉ | 1125/1262 [01:23<00:07, 17.59it/s]
 89%|████████▉ | 1127/1262 [01:23<00:07, 17.48it/s]
 89%|████████▉ | 1129/1262 [01:24<00:07, 17.35it/s]
 90%|████████▉ | 1131/1262 [01:24<00:07, 17.62it/s]
 90%|████████▉ | 1133/1262 [01:24<00:07, 17.62it/s]
 90%|████████▉ | 1135/1262 [01:24<00:07, 17.64it/s]
 90%|█████████ | 1137/1262 [01:24<00:07, 17.66it/s]
 90%|█████████ | 1139/1262 [01:24<00:06, 17.77it/s]
 90%|█████████ | 1141/1262 [01:24<00:07, 17.15it/s]
 91%|█████████ | 1143/1262 [01:24<00:07, 16.38it/s]
 91%|█████████ | 1145/1262 [01:25<00:07, 16.66it/s]
 91%|█████████ | 1147/1262 [01:25<00:06, 16.93it/s]
 91%|█████████ | 1149/1262 [01:25<00:06, 17.04it/s]
 91%|█████████ | 1151/1262 [01:25<00:06, 17.14it/s]
 91%|█████████▏| 1153/1262 [01:25<00:06, 17.30it/s]
 92%|█████████▏| 1155/1262 [01:25<00:06, 17.05it/s]
 92%|█████████▏| 1157/1262 [01:25<00:06, 17.09it/s]
 92%|█████████▏| 1159/1262 [

[2m[36m(_objective pid=4069)[0m {'loss': 0.564, 'learning_rate': 1.5219767529058252e-06, 'epoch': 2.0}


[2m[36m(_objective pid=4069)[0m 
[2m[36m(_objective pid=4069)[0m  44%|████▍     | 8/18 [00:00<00:00, 30.02it/s][A
[2m[36m(_objective pid=4069)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 29.41it/s][A
[2m[36m(_objective pid=4069)[0m 
 83%|████████▎ | 15/18 [00:00<00:00, 28.86it/s][A
[2m[36m(_objective pid=4069)[0m 
                                                   
100%|██████████| 1262/1262 [01:32<00:00, 13.49it/s]
100%|██████████| 18/18 [00:00<00:00, 28.98it/s][A
                                               [A


Trial _objective_c691cc98 finished iteration 2 at 2023-09-25 12:58:03. Total running time: 3min 9s
+----------------------------------------------+
| Trial _objective_c691cc98 result             |
+----------------------------------------------+
| time_this_iter_s                     49.0505 |
| time_total_s                         96.8691 |
| training_iteration                         2 |
| epoch                                      2 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                            0.44916 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                          0.6683 |
| eval_samples_per_second              420.439 |
| eval_steps_per_second                 26.932 |
| objective                            2.66667 |
+----------------------------------------------+

[2m[36m(_objecti

[2m[36m(_objective pid=4069)[0m                                                    100%|██████████| 1262/1262 [01:45<00:00, 13.49it/s]100%|██████████| 1262/1262 [01:45<00:00, 11.94it/s]


Trial status: 2 TERMINATED | 1 PENDING
Current time: 2023-09-25 12:58:24. Total running time: 3min 30s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6c47dcc5   TERMINATED       3.7694e-06                     3   17.2137                        64        3            63.1216       2.56228      0.706246   0.512456   0.512456 |
| _objective_c691cc98   TERMINATED       3.04275e-05      



Trial _objective_d6ca8ba7 started with configuration:
+----------------------------------------------+
| Trial _objective_d6ca8ba7 config             |
+----------------------------------------------+
| learning_rate                              0 |
| num_train_epochs                           5 |
| per_device_train_batch_size                8 |
| seed                                 36.0368 |
+----------------------------------------------+



[2m[36m(_objective pid=4600)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
[2m[36m(_objective pid=4600)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/1580 [00:00<?, ?it/s]
  0%|          | 1/1580 [00:00<07:56,  3.31it/s]
  0%|          | 3/1580 [00:00<03:31,  7.45it/s]
  0%|          | 5/1580 [00:00<02:44,  9.59it/s]
  0%|          | 7/1580 [00:00<02:22, 11.03it/s]
  1%|          | 9/1580 [00:00<02:12, 11.85it/s]
  1%|          | 11/1580 [00:01<02:06, 12.39it/s]
  1%|          | 13/1580 [00:01<02:02, 12.82it/s]
  1%|          | 15/1580 [00:01<01:58, 13.16it/s]
  1%|          | 17/1580 [00:01<01:56, 13.45it/s]
  1%|          | 19/1580 [00:01<01:54, 13.62it/s]
  1%|▏         | 21/1580 [00:01<01:54, 13.58it/s]
  1%|▏         | 23/1580 [00:01<01:54, 13.58it/s]
 

Trial status: 2 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 12:58:54. Total running time: 4min 0s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_d6ca8ba7   RUNNING          1.4175e-06                     5   36.0368                         8                                                                               |
| _objective_6c47dcc5   TERMINATED       3.7694

[2m[36m(_objective pid=4600)[0m                                                    20%|██        | 316/1580 [00:24<01:34, 13.43it/s]
[2m[36m(_objective pid=4600)[0m   0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=4600)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 38.34it/s][A
[2m[36m(_objective pid=4600)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 32.27it/s][A
[2m[36m(_objective pid=4600)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 30.56it/s][A
[2m[36m(_objective pid=4600)[0m 
 89%|████████▉ | 16/18 [00:00<00:00, 29.64it/s][A
                                                  
 20%|██        | 316/1580 [00:25<01:34, 13.43it/s]
100%|██████████| 18/18 [00:00<00:00, 29.64it/s][A
                                               [A


Trial _objective_d6ca8ba7 finished iteration 1 at 2023-09-25 12:58:55. Total running time: 4min 1s
+----------------------------------------------+
| Trial _objective_d6ca8ba7 result             |
+----------------------------------------------+
| time_this_iter_s                     28.5677 |
| time_total_s                         28.5677 |
| training_iteration                         1 |
| epoch                                      1 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                            0.69972 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                          0.6406 |
| eval_samples_per_second              438.646 |
| eval_steps_per_second                 28.098 |
| objective                            2.66667 |
+----------------------------------------------+

[2m[36m(_objecti

 20%|██        | 317/1580 [00:30<20:26,  1.03it/s]
 20%|██        | 319/1580 [00:31<14:51,  1.41it/s]
 20%|██        | 321/1580 [00:31<10:57,  1.91it/s]
 20%|██        | 323/1580 [00:31<08:13,  2.55it/s]
 21%|██        | 325/1580 [00:31<06:19,  3.30it/s]
 21%|██        | 327/1580 [00:31<05:00,  4.18it/s]
 21%|██        | 329/1580 [00:31<04:01,  5.18it/s]
 21%|██        | 331/1580 [00:32<03:22,  6.18it/s]
 21%|██        | 333/1580 [00:32<02:54,  7.13it/s]
 21%|██        | 335/1580 [00:32<02:35,  7.98it/s]
 21%|██▏       | 337/1580 [00:32<02:21,  8.79it/s]
 21%|██▏       | 339/1580 [00:32<02:13,  9.33it/s]
 22%|██▏       | 341/1580 [00:33<02:06,  9.76it/s]
 22%|██▏       | 343/1580 [00:33<02:02, 10.11it/s]
 22%|██▏       | 345/1580 [00:33<02:03, 10.04it/s]
 22%|██▏       | 347/1580 [00:33<01:59, 10.33it/s]
 22%|██▏       | 349/1580 [00:33<01:55, 10.64it/s]
 22%|██▏       | 351/1580 [00:33<01:52, 10.88it/s]
 22%|██▏       | 353/1580 [00:34<01:50, 11.10it/s]
 22%|██▏       | 355/1580 [00:3

Trial status: 2 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 12:59:24. Total running time: 4min 30s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_d6ca8ba7   RUNNING          1.4175e-06                     5   36.0368                         8        1            28.5677       2.66667      0.699724   0.5        1        |
| _objective_6c47dcc5   TERMINATED       3.769

[2m[36m(_objective pid=4600)[0m  39%|███▉      | 613/1580 [00:54<01:11, 13.45it/s]
 39%|███▉      | 615/1580 [00:55<01:11, 13.56it/s]
 39%|███▉      | 617/1580 [00:55<01:11, 13.38it/s]
 39%|███▉      | 619/1580 [00:55<01:12, 13.30it/s]
 39%|███▉      | 621/1580 [00:55<01:11, 13.39it/s]
 39%|███▉      | 623/1580 [00:55<01:10, 13.52it/s]
 40%|███▉      | 625/1580 [00:55<01:09, 13.66it/s]
 40%|███▉      | 627/1580 [00:55<01:11, 13.34it/s]
 40%|███▉      | 629/1580 [00:56<01:12, 13.12it/s]


[2m[36m(_objective pid=4600)[0m {'loss': 0.6997, 'learning_rate': 3.5507801801119244e-08, 'epoch': 2.0}


[2m[36m(_objective pid=4600)[0m  40%|███▉      | 631/1580 [00:56<01:12, 13.15it/s]                                                   40%|████      | 632/1580 [00:56<01:12, 13.15it/s]
[2m[36m(_objective pid=4600)[0m 
  0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=4600)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 39.02it/s][A
[2m[36m(_objective pid=4600)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 32.64it/s][A
[2m[36m(_objective pid=4600)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 31.01it/s][A


Trial _objective_d6ca8ba7 finished iteration 2 at 2023-09-25 12:59:26. Total running time: 4min 32s
+----------------------------------------------+
| Trial _objective_d6ca8ba7 result             |
+----------------------------------------------+
| time_this_iter_s                     31.4022 |
| time_total_s                           59.97 |
| training_iteration                         2 |
| epoch                                      2 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                            0.69526 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                          0.6168 |
| eval_samples_per_second              455.543 |
| eval_steps_per_second                 29.181 |
| objective                            2.66667 |
+----------------------------------------------+

[2m[36m(_object

[2m[36m(_objective pid=4600)[0m 
[2m[36m(_objective pid=4600)[0m  89%|████████▉ | 16/18 [00:00<00:00, 30.14it/s][A                                                  
[2m[36m(_objective pid=4600)[0m                                                [A 40%|████      | 632/1580 [00:56<01:12, 13.15it/s]
[2m[36m(_objective pid=4600)[0m 100%|██████████| 18/18 [00:00<00:00, 30.14it/s][A
                                               [A
 40%|████      | 633/1580 [01:06<25:04,  1.59s/it]
 40%|████      | 635/1580 [01:06<17:54,  1.14s/it]
 40%|████      | 637/1580 [01:06<12:55,  1.22it/s]
 40%|████      | 639/1580 [01:06<09:27,  1.66it/s]
 41%|████      | 641/1580 [01:07<07:00,  2.24it/s]
 41%|████      | 643/1580 [01:07<05:19,  2.94it/s]
 41%|████      | 645/1580 [01:07<04:07,  3.78it/s]
 41%|████      | 647/1580 [01:07<03:18,  4.69it/s]
 41%|████      | 649/1580 [01:07<02:44,  5.66it/s]
 41%|████      | 651/1580 [01:08<02:20,  6.60it/s]
 41%|████▏     | 653/1580 [01:08<02:04,

Trial status: 2 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 12:59:54. Total running time: 5min 0s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_d6ca8ba7   RUNNING          1.4175e-06                     5   36.0368                         8        2            59.97         2.66667      0.695262   0.5        1        |
| _objective_6c47dcc5   TERMINATED       3.7694

 55%|█████▍    | 865/1580 [01:25<01:05, 10.95it/s]
 55%|█████▍    | 867/1580 [01:25<01:03, 11.17it/s]
 55%|█████▌    | 869/1580 [01:25<01:03, 11.20it/s]
 55%|█████▌    | 871/1580 [01:25<01:02, 11.37it/s]
 55%|█████▌    | 873/1580 [01:25<01:02, 11.34it/s]
 55%|█████▌    | 875/1580 [01:26<01:02, 11.21it/s]
 56%|█████▌    | 877/1580 [01:26<01:03, 11.13it/s]
 56%|█████▌    | 879/1580 [01:26<01:03, 10.98it/s]
 56%|█████▌    | 881/1580 [01:26<01:03, 10.97it/s]
 56%|█████▌    | 883/1580 [01:26<01:02, 11.08it/s]
 56%|█████▌    | 885/1580 [01:26<01:02, 11.19it/s]
 56%|█████▌    | 887/1580 [01:27<01:03, 10.96it/s]
 56%|█████▋    | 889/1580 [01:27<01:03, 10.94it/s]
 56%|█████▋    | 891/1580 [01:27<01:02, 11.01it/s]
 57%|█████▋    | 893/1580 [01:27<01:01, 11.09it/s]
 57%|█████▋    | 895/1580 [01:27<01:01, 11.07it/s]
 57%|█████▋    | 897/1580 [01:28<01:02, 10.97it/s]
 57%|█████▋    | 899/1580 [01:28<01:02, 10.93it/s]
 57%|█████▋    | 901/1580 [01:28<01:02, 10.85it/s]
 57%|█████▋    | 903/1580 [01:2

[2m[36m(_objective pid=4600)[0m {'loss': 0.6959, 'learning_rate': 5.326170270167887e-08, 'epoch': 3.0}


[2m[36m(_objective pid=4600)[0m  60%|█████▉    | 947/1580 [01:32<00:46, 13.53it/s]                                                   60%|██████    | 948/1580 [01:32<00:46, 13.53it/s]
[2m[36m(_objective pid=4600)[0m 
  0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=4600)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 38.31it/s][A
[2m[36m(_objective pid=4600)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 32.29it/s][A
[2m[36m(_objective pid=4600)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 30.84it/s][A


Trial _objective_d6ca8ba7 finished iteration 3 at 2023-09-25 13:00:02. Total running time: 5min 8s
+----------------------------------------------+
| Trial _objective_d6ca8ba7 result             |
+----------------------------------------------+
| time_this_iter_s                     35.8868 |
| time_total_s                         95.8567 |
| training_iteration                         3 |
| epoch                                      3 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                            0.68812 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                          0.6185 |
| eval_samples_per_second              454.303 |
| eval_steps_per_second                 29.101 |
| objective                            2.66667 |
+----------------------------------------------+

[2m[36m(_objecti

[2m[36m(_objective pid=4600)[0m 
[2m[36m(_objective pid=4600)[0m  89%|████████▉ | 16/18 [00:00<00:00, 30.18it/s][A                                                  
[2m[36m(_objective pid=4600)[0m                                                [A 60%|██████    | 948/1580 [01:32<00:46, 13.53it/s]
[2m[36m(_objective pid=4600)[0m 100%|██████████| 18/18 [00:00<00:00, 30.18it/s][A
                                               [A
 60%|██████    | 949/1580 [01:37<09:38,  1.09it/s]
 60%|██████    | 951/1580 [01:37<06:57,  1.51it/s]
 60%|██████    | 953/1580 [01:38<05:05,  2.05it/s]
 60%|██████    | 955/1580 [01:38<03:46,  2.76it/s]
 61%|██████    | 957/1580 [01:38<02:52,  3.61it/s]
 61%|██████    | 959/1580 [01:38<02:13,  4.66it/s]
 61%|██████    | 961/1580 [01:38<01:46,  5.81it/s]
 61%|██████    | 963/1580 [01:38<01:27,  7.03it/s]
 61%|██████    | 965/1580 [01:39<01:14,  8.25it/s]
 61%|██████    | 967/1580 [01:39<01:05,  9.32it/s]
 61%|██████▏   | 969/1580 [01:39<00:59,

Trial status: 2 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:00:24. Total running time: 5min 30s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_d6ca8ba7   RUNNING          1.4175e-06                     5   36.0368                         8        3            95.8567       2.66667      0.68812    0.5        1        |
| _objective_6c47dcc5   TERMINATED       3.769

[2m[36m(_objective pid=4600)[0m  74%|███████▎  | 1165/1580 [01:55<00:31, 13.13it/s]
 74%|███████▍  | 1167/1580 [01:55<00:30, 13.35it/s]
 74%|███████▍  | 1169/1580 [01:55<00:30, 13.49it/s]
 74%|███████▍  | 1171/1580 [01:55<00:30, 13.38it/s]
 74%|███████▍  | 1173/1580 [01:55<00:30, 13.50it/s]
 74%|███████▍  | 1175/1580 [01:55<00:30, 13.42it/s]
 74%|███████▍  | 1177/1580 [01:56<00:30, 13.35it/s]
 75%|███████▍  | 1179/1580 [01:56<00:29, 13.52it/s]
 75%|███████▍  | 1181/1580 [01:56<00:29, 13.55it/s]
 75%|███████▍  | 1183/1580 [01:56<00:29, 13.45it/s]
 75%|███████▌  | 1185/1580 [01:56<00:29, 13.53it/s]
 75%|███████▌  | 1187/1580 [01:56<00:28, 13.64it/s]
 75%|███████▌  | 1189/1580 [01:56<00:28, 13.71it/s]
 75%|███████▌  | 1191/1580 [01:57<00:28, 13.57it/s]
 76%|███████▌  | 1193/1580 [01:57<00:28, 13.48it/s]
 76%|███████▌  | 1195/1580 [01:57<00:28, 13.56it/s]
 76%|███████▌  | 1197/1580 [01:57<00:28, 13.61it/s]
 76%|███████▌  | 1199/1580 [01:57<00:27, 13.68it/s]
 76%|███████▌  | 1201/1580 [

[2m[36m(_objective pid=4600)[0m {'loss': 0.6907, 'learning_rate': 7.101560360223849e-08, 'epoch': 4.0}


[2m[36m(_objective pid=4600)[0m                                                     80%|████████  | 1264/1580 [02:03<00:27, 11.36it/s]
[2m[36m(_objective pid=4600)[0m 
  0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=4600)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 37.62it/s][A
[2m[36m(_objective pid=4600)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 28.59it/s][A
[2m[36m(_objective pid=4600)[0m 
 61%|██████    | 11/18 [00:00<00:00, 28.25it/s][A
[2m[36m(_objective pid=4600)[0m 
 78%|███████▊  | 14/18 [00:00<00:00, 27.92it/s][A
[2m[36m(_objective pid=4600)[0m 
 94%|█████████▍| 17/18 [00:00<00:00, 27.23it/s][A
                                                   
 80%|████████  | 1264/1580 [02:03<00:27, 11.36it/s]
100%|██████████| 18/18 [00:00<00:00, 27.23it/s][A
                                               [A


Trial _objective_d6ca8ba7 finished iteration 4 at 2023-09-25 13:00:33. Total running time: 5min 39s
+----------------------------------------------+
| Trial _objective_d6ca8ba7 result             |
+----------------------------------------------+
| time_this_iter_s                     31.1883 |
| time_total_s                         127.045 |
| training_iteration                         4 |
| epoch                                      4 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                            0.67924 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                          0.6992 |
| eval_samples_per_second              401.914 |
| eval_steps_per_second                 25.745 |
| objective                            2.66667 |
+----------------------------------------------+

[2m[36m(_object

[2m[36m(_objective pid=4600)[0m  80%|████████  | 1265/1580 [02:16<11:09,  2.13s/it]
 80%|████████  | 1267/1580 [02:17<07:52,  1.51s/it]
 80%|████████  | 1269/1580 [02:17<05:35,  1.08s/it]
 80%|████████  | 1271/1580 [02:17<04:00,  1.29it/s]
 81%|████████  | 1273/1580 [02:17<02:53,  1.77it/s]
 81%|████████  | 1275/1580 [02:17<02:08,  2.38it/s]
 81%|████████  | 1277/1580 [02:17<01:37,  3.11it/s]
 81%|████████  | 1279/1580 [02:18<01:15,  3.98it/s]
 81%|████████  | 1281/1580 [02:18<00:59,  5.01it/s]
 81%|████████  | 1283/1580 [02:18<00:50,  5.84it/s]
 81%|████████▏ | 1285/1580 [02:18<00:43,  6.82it/s]
 81%|████████▏ | 1287/1580 [02:18<00:37,  7.75it/s]
 82%|████████▏ | 1289/1580 [02:18<00:34,  8.45it/s]
 82%|████████▏ | 1291/1580 [02:19<00:31,  9.23it/s]
 82%|████████▏ | 1293/1580 [02:20<01:01,  4.63it/s]
 82%|████████▏ | 1295/1580 [02:20<00:50,  5.67it/s]
 82%|████████▏ | 1297/1580 [02:20<00:41,  6.75it/s]
 82%|████████▏ | 1299/1580 [02:20<00:36,  7.63it/s]
 82%|████████▏ | 1301/1580 [

Trial status: 2 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:00:54. Total running time: 6min 0s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_d6ca8ba7   RUNNING          1.4175e-06                     5   36.0368                         8        4           127.045        2.66667      0.67924    0.5        1        |
| _objective_6c47dcc5   TERMINATED       3.7694

[2m[36m(_objective pid=4600)[0m  85%|████████▌ | 1349/1580 [02:25<00:20, 11.09it/s]
 86%|████████▌ | 1351/1580 [02:25<00:20, 11.09it/s]
 86%|████████▌ | 1353/1580 [02:25<00:20, 11.00it/s]
 86%|████████▌ | 1355/1580 [02:25<00:20, 11.14it/s]
 86%|████████▌ | 1357/1580 [02:25<00:19, 11.27it/s]
 86%|████████▌ | 1359/1580 [02:25<00:19, 11.07it/s]
 86%|████████▌ | 1361/1580 [02:26<00:19, 11.03it/s]
 86%|████████▋ | 1363/1580 [02:26<00:18, 11.57it/s]
 86%|████████▋ | 1365/1580 [02:26<00:17, 12.07it/s]
 87%|████████▋ | 1367/1580 [02:26<00:17, 12.49it/s]
 87%|████████▋ | 1369/1580 [02:26<00:16, 12.93it/s]
 87%|████████▋ | 1371/1580 [02:26<00:15, 13.10it/s]
 87%|████████▋ | 1373/1580 [02:27<00:15, 13.31it/s]
 87%|████████▋ | 1375/1580 [02:27<00:15, 13.42it/s]
 87%|████████▋ | 1377/1580 [02:27<00:15, 13.40it/s]
 87%|████████▋ | 1379/1580 [02:27<00:14, 13.58it/s]
 87%|████████▋ | 1381/1580 [02:27<00:14, 13.75it/s]
 88%|████████▊ | 1383/1580 [02:27<00:14, 13.65it/s]
 88%|████████▊ | 1385/1580 [

[2m[36m(_objective pid=4600)[0m {'loss': 0.6843, 'learning_rate': 8.876950450279812e-08, 'epoch': 5.0}


[2m[36m(_objective pid=4600)[0m                                                    100%|██████████| 1580/1580 [02:43<00:00, 11.44it/s]
  0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=4600)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 32.89it/s][A
[2m[36m(_objective pid=4600)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 29.81it/s][A
[2m[36m(_objective pid=4600)[0m 
 61%|██████    | 11/18 [00:00<00:00, 28.39it/s][A
[2m[36m(_objective pid=4600)[0m 
 78%|███████▊  | 14/18 [00:00<00:00, 28.11it/s][A


Trial _objective_d6ca8ba7 finished iteration 5 at 2023-09-25 13:01:13. Total running time: 6min 19s
+----------------------------------------------+
| Trial _objective_d6ca8ba7 result             |
+----------------------------------------------+
| time_this_iter_s                     40.2589 |
| time_total_s                         167.304 |
| training_iteration                         5 |
| epoch                                      5 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                            0.66981 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                          0.6751 |
| eval_samples_per_second              416.219 |
| eval_steps_per_second                 26.662 |
| objective                            2.66667 |
+----------------------------------------------+

[2m[36m(_object

[2m[36m(_objective pid=4600)[0m 
[2m[36m(_objective pid=4600)[0m  94%|█████████▍| 17/18 [00:00<00:00, 28.18it/s][A                                                   
[2m[36m(_objective pid=4600)[0m                                                [A100%|██████████| 1580/1580 [02:44<00:00, 11.44it/s]
[2m[36m(_objective pid=4600)[0m 100%|██████████| 18/18 [00:00<00:00, 28.18it/s][A
[2m[36m(_objective pid=4600)[0m                                                [A


Trial _objective_d6ca8ba7 completed after 5 iterations at 2023-09-25 13:01:24. Total running time: 6min 30s

[2m[36m(_objective pid=4600)[0m {'train_runtime': 174.771, 'train_samples_per_second': 72.18, 'train_steps_per_second': 9.04, 'train_loss': 0.694281855715981, 'epoch': 5.0}


[2m[36m(_objective pid=4600)[0m                                                    100%|██████████| 1580/1580 [02:54<00:00, 11.44it/s]100%|██████████| 1580/1580 [02:54<00:00,  9.04it/s]


Trial status: 3 TERMINATED | 1 PENDING
Current time: 2023-09-25 13:01:24. Total running time: 6min 30s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6c47dcc5   TERMINATED       3.7694e-06                     3   17.2137                        64        3            63.1216       2.56228      0.706246   0.512456   0.512456 |
| _objective_c691cc98   TERMINATED       3.04275e-05      



Trial _objective_ecbaa4f1 started with configuration:
+----------------------------------------------+
| Trial _objective_ecbaa4f1 config             |
+----------------------------------------------+
| learning_rate                          3e-05 |
| num_train_epochs                           5 |
| per_device_train_batch_size               32 |
| seed                                 8.80296 |
+----------------------------------------------+



[2m[36m(_objective pid=5423)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
[2m[36m(_objective pid=5423)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/395 [00:00<?, ?it/s]
  0%|          | 1/395 [00:00<02:33,  2.56it/s]
  1%|          | 2/395 [00:00<01:49,  3.60it/s]
  1%|          | 3/395 [00:00<01:35,  4.10it/s]
  1%|          | 4/395 [00:00<01:29,  4.37it/s]
  1%|▏         | 5/395 [00:01<01:25,  4.55it/s]
  2%|▏         | 6/395 [00:01<01:23,  4.66it/s]
  2%|▏         | 7/395 [00:01<01:21,  4.74it/s]
  2%|▏         | 8/395 [00:01<01:21,  4.77it/s]
  2%|▏         | 9/395 [00:02<01:20,  4.81it/s]
  3%|▎         | 10/395 [00:02<01:19,  4.83it/s]
  3%|▎         | 11/395 [00:02<01:19,  4.84it/s]
  3%|▎         | 12/395 [00:02<01:19,  4.83it/s]
  3%|▎         | 1

Trial status: 3 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:01:54. Total running time: 7min 0s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_ecbaa4f1   RUNNING          3.34711e-05                    5    8.80296                       32                                                                               |
| _objective_6c47dcc5   TERMINATED       3.7694

[2m[36m(_objective pid=5423)[0m  19%|█▉        | 75/395 [00:15<01:06,  4.83it/s]
 19%|█▉        | 76/395 [00:16<01:05,  4.84it/s]
 19%|█▉        | 77/395 [00:16<01:05,  4.82it/s]
 20%|█▉        | 78/395 [00:16<01:05,  4.86it/s]


[2m[36m(_objective pid=5423)[0m {'loss': 0.7089, 'learning_rate': 1.0480435391080288e-07, 'epoch': 1.0}


[2m[36m(_objective pid=5423)[0m  20%|██        | 79/395 [00:16<01:02,  5.04it/s]                                                 20%|██        | 79/395 [00:16<01:02,  5.04it/s]
[2m[36m(_objective pid=5423)[0m 
  0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=5423)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 37.18it/s][A
[2m[36m(_objective pid=5423)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 31.60it/s][A
[2m[36m(_objective pid=5423)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 30.47it/s][A
[2m[36m(_objective pid=5423)[0m 
 89%|████████▉ | 16/18 [00:00<00:00, 29.96it/s][A


Trial _objective_ecbaa4f1 finished iteration 1 at 2023-09-25 13:01:56. Total running time: 7min 2s
+----------------------------------------------+
| Trial _objective_ecbaa4f1 result             |
+----------------------------------------------+
| time_this_iter_s                     20.8094 |
| time_total_s                         20.8094 |
| training_iteration                         1 |
| epoch                                      1 |
| eval_accuracy                        0.48754 |
| eval_f1                              0.48754 |
| eval_loss                             0.7064 |
| eval_p                               0.48754 |
| eval_r                               0.48754 |
| eval_roc_auc                         0.48754 |
| eval_runtime                          0.6287 |
| eval_samples_per_second              446.954 |
| eval_steps_per_second                  28.63 |
| objective                            2.43772 |
+----------------------------------------------+

Trial _objective_e

[2m[36m(_objective pid=5423)[0m                                                 
[2m[36m(_objective pid=5423)[0m                                                [A 20%|██        | 79/395 [00:17<01:02,  5.04it/s]
[2m[36m(_objective pid=5423)[0m 100%|██████████| 18/18 [00:00<00:00, 29.96it/s][A
[2m[36m(_objective pid=5423)[0m                                                [A 20%|██        | 79/395 [00:17<01:09,  4.57it/s]


Trial _objective_4edb5e3f started with configuration:
+----------------------------------------------+
| Trial _objective_4edb5e3f config             |
+----------------------------------------------+
| learning_rate                          1e-05 |
| num_train_epochs                           2 |
| per_device_train_batch_size               64 |
| seed                                 23.7512 |
+----------------------------------------------+



[2m[36m(_objective pid=5588)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
[2m[36m(_objective pid=5588)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/80 [00:00<?, ?it/s]
  1%|▏         | 1/80 [00:00<00:53,  1.49it/s]
  2%|▎         | 2/80 [00:01<00:38,  2.01it/s]
  4%|▍         | 3/80 [00:01<00:34,  2.25it/s]
  5%|▌         | 4/80 [00:01<00:31,  2.39it/s]
  6%|▋         | 5/80 [00:02<00:30,  2.46it/s]
  8%|▊         | 6/80 [00:02<00:29,  2.52it/s]
  9%|▉         | 7/80 [00:02<00:28,  2.55it/s]
 10%|█         | 8/80 [00:03<00:28,  2.57it/s]
 11%|█▏        | 9/80 [00:03<00:27,  2.59it/s]
 12%|█▎        | 10/80 [00:04<00:27,  2.59it/s]
 14%|█▍        | 11/80 [00:04<00:26,  2.60it/s]
 15%|█▌        | 12/80 [00:04<00:26,  2.61it/s]
 16%|█▋        | 13/80 [00:05<0

[2m[36m(_objective pid=5588)[0m {'loss': 0.6982, 'learning_rate': 1.4308589328968264e-08, 'epoch': 1.0}


[2m[36m(_objective pid=5588)[0m 
[2m[36m(_objective pid=5588)[0m  44%|████▍     | 8/18 [00:00<00:00, 29.15it/s][A
[2m[36m(_objective pid=5588)[0m 
 61%|██████    | 11/18 [00:00<00:00, 28.50it/s][A
[2m[36m(_objective pid=5588)[0m 
 78%|███████▊  | 14/18 [00:00<00:00, 28.19it/s][A
[2m[36m(_objective pid=5588)[0m 
 94%|█████████▍| 17/18 [00:00<00:00, 28.27it/s][A
                                               
 50%|█████     | 40/80 [00:16<00:13,  3.05it/s]
100%|██████████| 18/18 [00:00<00:00, 28.27it/s][A
                                               [A


[2m[36m(_objective pid=5588)[0m {'eval_loss': 0.6944868564605713, 'eval_p': 0.5, 'eval_r': 1.0, 'eval_f1': 0.6666666666666666, 'eval_roc_auc': 0.5, 'eval_accuracy': 0.0, 'eval_runtime': 0.6914, 'eval_samples_per_second': 406.45, 'eval_steps_per_second': 26.036, 'epoch': 1.0}
Trial _objective_4edb5e3f finished iteration 1 at 2023-09-25 13:02:24. Total running time: 7min 30s
+----------------------------------------------+
| Trial _objective_4edb5e3f result             |
+----------------------------------------------+
| time_this_iter_s                     20.1259 |
| time_total_s                         20.1259 |
| training_iteration                         1 |
| epoch                                      1 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                            0.69449 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                   

 51%|█████▏    | 41/80 [00:21<01:22,  2.12s/it]
 52%|█████▎    | 42/80 [00:22<01:00,  1.60s/it]
 54%|█████▍    | 43/80 [00:22<00:45,  1.24s/it]
 55%|█████▌    | 44/80 [00:22<00:35,  1.02it/s]
 56%|█████▋    | 45/80 [00:23<00:27,  1.25it/s]
 57%|█████▊    | 46/80 [00:23<00:22,  1.48it/s]
 59%|█████▉    | 47/80 [00:24<00:19,  1.71it/s]
 60%|██████    | 48/80 [00:24<00:16,  1.90it/s]
 61%|██████▏   | 49/80 [00:24<00:14,  2.07it/s]
 62%|██████▎   | 50/80 [00:25<00:13,  2.21it/s]
 64%|██████▍   | 51/80 [00:25<00:12,  2.32it/s]
 65%|██████▌   | 52/80 [00:25<00:11,  2.40it/s]
 66%|██████▋   | 53/80 [00:26<00:10,  2.46it/s]
 68%|██████▊   | 54/80 [00:26<00:10,  2.50it/s]
 69%|██████▉   | 55/80 [00:27<00:09,  2.54it/s]
 70%|███████   | 56/80 [00:27<00:09,  2.56it/s]
 71%|███████▏  | 57/80 [00:27<00:08,  2.57it/s]
 72%|███████▎  | 58/80 [00:28<00:08,  2.58it/s]
 74%|███████▍  | 59/80 [00:28<00:08,  2.59it/s]
 75%|███████▌  | 60/80 [00:29<00:07,  2.60it/s]
 76%|███████▋  | 61/80 [00:29<00:07,  2.

[2m[36m(_objective pid=5588)[0m {'loss': 0.698, 'learning_rate': 2.861717865793653e-08, 'epoch': 2.0}


[2m[36m(_objective pid=5588)[0m 
[2m[36m(_objective pid=5588)[0m  44%|████▍     | 8/18 [00:00<00:00, 30.48it/s][A
[2m[36m(_objective pid=5588)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 29.40it/s][A
[2m[36m(_objective pid=5588)[0m 
 83%|████████▎ | 15/18 [00:00<00:00, 29.03it/s][A
[2m[36m(_objective pid=5588)[0m 
                                               
100%|██████████| 80/80 [00:37<00:00,  3.06it/s]
100%|██████████| 18/18 [00:00<00:00, 29.29it/s][A
                                               [A


Trial _objective_4edb5e3f finished iteration 2 at 2023-09-25 13:02:45. Total running time: 7min 51s
+----------------------------------------------+
| Trial _objective_4edb5e3f result             |
+----------------------------------------------+
| time_this_iter_s                     21.1183 |
| time_total_s                         41.2442 |
| training_iteration                         2 |
| epoch                                      2 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                            0.69362 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                          0.6618 |
| eval_samples_per_second               424.62 |
| eval_steps_per_second                   27.2 |
| objective                            2.66667 |
+----------------------------------------------+

[2m[36m(_object

[2m[36m(_objective pid=5588)[0m                                                100%|██████████| 80/80 [00:42<00:00,  3.06it/s]100%|██████████| 80/80 [00:42<00:00,  1.87it/s]


Trial status: 5 TERMINATED | 1 PENDING
Current time: 2023-09-25 13:02:54. Total running time: 8min 0s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6c47dcc5   TERMINATED       3.7694e-06                     3   17.2137                        64        3            63.1216       2.56228      0.706246   0.512456   0.512456 |
| _objective_c691cc98   TERMINATED       3.04275e-05       



Trial _objective_b42824a1 started with configuration:
+----------------------------------------------+
| Trial _objective_b42824a1 config             |
+----------------------------------------------+
| learning_rate                              0 |
| num_train_epochs                           3 |
| per_device_train_batch_size               64 |
| seed                                 24.3662 |
+----------------------------------------------+



[2m[36m(_objective pid=5854)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
[2m[36m(_objective pid=5854)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/120 [00:00<?, ?it/s]
  1%|          | 1/120 [00:00<01:20,  1.48it/s]
  2%|▏         | 2/120 [00:01<00:59,  1.98it/s]
  2%|▎         | 3/120 [00:01<00:52,  2.22it/s]
  3%|▎         | 4/120 [00:01<00:49,  2.34it/s]
  4%|▍         | 5/120 [00:02<00:47,  2.40it/s]
  5%|▌         | 6/120 [00:02<00:46,  2.47it/s]
  6%|▌         | 7/120 [00:03<00:44,  2.52it/s]
  7%|▋         | 8/120 [00:03<00:43,  2.55it/s]
  8%|▊         | 9/120 [00:03<00:43,  2.57it/s]
  8%|▊         | 10/120 [00:04<00:42,  2.58it/s]
  9%|▉         | 11/120 [00:04<00:41,  2.60it/s]
 10%|█         | 12/120 [00:04<00:41,  2.60it/s]
 11%|█         | 1

[2m[36m(_objective pid=5854)[0m {'loss': 0.7028, 'learning_rate': 4.1340646346795355e-09, 'epoch': 1.0}


[2m[36m(_objective pid=5854)[0m 
[2m[36m(_objective pid=5854)[0m  44%|████▍     | 8/18 [00:00<00:00, 29.08it/s][A
[2m[36m(_objective pid=5854)[0m 
 61%|██████    | 11/18 [00:00<00:00, 28.39it/s][A
[2m[36m(_objective pid=5854)[0m 
 78%|███████▊  | 14/18 [00:00<00:00, 28.07it/s][A
[2m[36m(_objective pid=5854)[0m 
 94%|█████████▍| 17/18 [00:00<00:00, 26.17it/s][A


Trial _objective_b42824a1 finished iteration 1 at 2023-09-25 13:03:19. Total running time: 8min 25s
+----------------------------------------------+
| Trial _objective_b42824a1 result             |
+----------------------------------------------+
| time_this_iter_s                     20.4861 |
| time_total_s                         20.4861 |
| training_iteration                         1 |
| epoch                                      1 |
| eval_accuracy                        0.51246 |
| eval_f1                              0.51246 |
| eval_loss                            0.70592 |
| eval_p                               0.51246 |
| eval_r                               0.51246 |
| eval_roc_auc                         0.51246 |
| eval_runtime                          0.7032 |
| eval_samples_per_second              399.593 |
| eval_steps_per_second                 25.597 |
| objective                            2.56228 |
+----------------------------------------------+

Trial _objective_

[2m[36m(_objective pid=5854)[0m                                                 
[2m[36m(_objective pid=5854)[0m                                                [A 33%|███▎      | 40/120 [00:16<00:26,  3.06it/s]
[2m[36m(_objective pid=5854)[0m 100%|██████████| 18/18 [00:00<00:00, 26.17it/s][A
[2m[36m(_objective pid=5854)[0m                                                [A 33%|███▎      | 40/120 [00:16<00:32,  2.47it/s]


Trial status: 6 TERMINATED | 1 PENDING
Current time: 2023-09-25 13:03:24. Total running time: 8min 30s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6c47dcc5   TERMINATED       3.7694e-06                     3   17.2137                        64        3            63.1216       2.56228      0.706246   0.512456   0.512456 |
| _objective_c691cc98   TERMINATED       3.04275e-05      



Trial _objective_5e0fc3d5 started with configuration:
+----------------------------------------------+
| Trial _objective_5e0fc3d5 config             |
+----------------------------------------------+
| learning_rate                          5e-05 |
| num_train_epochs                           3 |
| per_device_train_batch_size               64 |
| seed                                 3.62544 |
+----------------------------------------------+



[2m[36m(_objective pid=5997)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
[2m[36m(_objective pid=5997)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/120 [00:00<?, ?it/s]
  1%|          | 1/120 [00:00<01:08,  1.74it/s]
  2%|▏         | 2/120 [00:00<00:54,  2.18it/s]
  2%|▎         | 3/120 [00:01<00:49,  2.36it/s]
  3%|▎         | 4/120 [00:01<00:47,  2.46it/s]
  4%|▍         | 5/120 [00:02<00:45,  2.51it/s]
  5%|▌         | 6/120 [00:02<00:44,  2.55it/s]
  6%|▌         | 7/120 [00:02<00:44,  2.57it/s]
  7%|▋         | 8/120 [00:03<00:43,  2.58it/s]
  8%|▊         | 9/120 [00:03<00:43,  2.58it/s]
  8%|▊         | 10/120 [00:04<00:42,  2.56it/s]
  9%|▉         | 11/120 [00:04<00:42,  2.56it/s]
 10%|█         | 12/120 [00:04<00:42,  2.55it/s]
 11%|█         | 1

[2m[36m(_objective pid=5997)[0m {'loss': 0.6998, 'learning_rate': 7.274923351077803e-08, 'epoch': 1.0}


[2m[36m(_objective pid=5997)[0m                                                  33%|███▎      | 40/120 [00:15<00:25,  3.10it/s]
[2m[36m(_objective pid=5997)[0m   0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=5997)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 38.18it/s][A
[2m[36m(_objective pid=5997)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 32.41it/s][A
[2m[36m(_objective pid=5997)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 30.65it/s][A


Trial _objective_5e0fc3d5 finished iteration 1 at 2023-09-25 13:03:47. Total running time: 8min 53s
+----------------------------------------------+
| Trial _objective_5e0fc3d5 result             |
+----------------------------------------------+
| time_this_iter_s                     19.0107 |
| time_total_s                         19.0107 |
| training_iteration                         1 |
| epoch                                      1 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                            0.69855 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                          0.6245 |
| eval_samples_per_second              449.989 |
| eval_steps_per_second                 28.825 |
| objective                            2.66667 |
+----------------------------------------------+

[2m[36m(_object

[2m[36m(_objective pid=5997)[0m 
[2m[36m(_objective pid=5997)[0m  89%|████████▉ | 16/18 [00:00<00:00, 29.93it/s][A                                                
[2m[36m(_objective pid=5997)[0m                                                [A 33%|███▎      | 40/120 [00:16<00:25,  3.10it/s]
[2m[36m(_objective pid=5997)[0m 100%|██████████| 18/18 [00:00<00:00, 29.93it/s][A
[2m[36m(_objective pid=5997)[0m                                                [A


Trial status: 6 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:03:54. Total running time: 9min 0s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_5e0fc3d5   RUNNING          4.58866e-05                    3    3.62544                       64        1            19.0107       2.66667      0.698547   0.5        1        |
| _objective_6c47dcc5   TERMINATED       3.7694

 34%|███▍      | 41/120 [00:24<03:54,  2.97s/it]
 35%|███▌      | 42/120 [00:25<02:51,  2.20s/it]
 36%|███▌      | 43/120 [00:25<02:07,  1.66s/it]
 37%|███▋      | 44/120 [00:25<01:37,  1.28s/it]
 38%|███▊      | 45/120 [00:26<01:15,  1.01s/it]
 38%|███▊      | 46/120 [00:26<01:00,  1.21it/s]
 39%|███▉      | 47/120 [00:26<00:50,  1.44it/s]
 40%|████      | 48/120 [00:27<00:43,  1.66it/s]
 41%|████      | 49/120 [00:27<00:38,  1.86it/s]
 42%|████▏     | 50/120 [00:28<00:34,  2.02it/s]
 42%|████▎     | 51/120 [00:28<00:31,  2.16it/s]
 43%|████▎     | 52/120 [00:28<00:29,  2.27it/s]
 44%|████▍     | 53/120 [00:29<00:28,  2.34it/s]
 45%|████▌     | 54/120 [00:29<00:27,  2.42it/s]
 46%|████▌     | 55/120 [00:30<00:26,  2.46it/s]
 47%|████▋     | 56/120 [00:30<00:25,  2.51it/s]
 48%|████▊     | 57/120 [00:30<00:24,  2.54it/s]
 48%|████▊     | 58/120 [00:31<00:24,  2.56it/s]
 49%|████▉     | 59/120 [00:31<00:23,  2.57it/s]
 50%|█████     | 60/120 [00:31<00:23,  2.58it/s]
 51%|█████     | 61/

[2m[36m(_objective pid=5997)[0m {'loss': 0.6985, 'learning_rate': 1.4549846702155606e-07, 'epoch': 2.0}


[2m[36m(_objective pid=5997)[0m 
[2m[36m(_objective pid=5997)[0m   0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=5997)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 33.17it/s][A
[2m[36m(_objective pid=5997)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 28.78it/s][A
[2m[36m(_objective pid=5997)[0m 
 61%|██████    | 11/18 [00:00<00:00, 27.36it/s][A
[2m[36m(_objective pid=5997)[0m 
 78%|███████▊  | 14/18 [00:00<00:00, 25.78it/s][A
[2m[36m(_objective pid=5997)[0m 
                                                
 67%|██████▋   | 80/120 [00:40<00:13,  3.07it/s]
100%|██████████| 18/18 [00:00<00:00, 26.69it/s][A
                                               [A


Trial _objective_5e0fc3d5 finished iteration 2 at 2023-09-25 13:04:11. Total running time: 9min 17s
+----------------------------------------------+
| Trial _objective_5e0fc3d5 result             |
+----------------------------------------------+
| time_this_iter_s                     24.0846 |
| time_total_s                         43.0953 |
| training_iteration                         2 |
| epoch                                      2 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                            0.69656 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                          0.7041 |
| eval_samples_per_second              399.102 |
| eval_steps_per_second                 25.565 |
| objective                            2.66667 |
+----------------------------------------------+

[2m[36m(_object

 68%|██████▊   | 81/120 [00:45<01:21,  2.10s/it]
 68%|██████▊   | 82/120 [00:46<01:00,  1.59s/it]
 69%|██████▉   | 83/120 [00:46<00:45,  1.23s/it]
 70%|███████   | 84/120 [00:46<00:35,  1.02it/s]
 71%|███████   | 85/120 [00:47<00:27,  1.25it/s]
 72%|███████▏  | 86/120 [00:47<00:22,  1.48it/s]
 72%|███████▎  | 87/120 [00:48<00:19,  1.69it/s]
 73%|███████▎  | 88/120 [00:48<00:16,  1.88it/s]
 74%|███████▍  | 89/120 [00:48<00:15,  2.06it/s]
 75%|███████▌  | 90/120 [00:49<00:13,  2.20it/s]
 76%|███████▌  | 91/120 [00:49<00:12,  2.31it/s]
 77%|███████▋  | 92/120 [00:49<00:11,  2.39it/s]
 78%|███████▊  | 93/120 [00:50<00:11,  2.45it/s]
 78%|███████▊  | 94/120 [00:50<00:10,  2.48it/s]
 79%|███████▉  | 95/120 [00:51<00:09,  2.52it/s]
 80%|████████  | 96/120 [00:51<00:09,  2.55it/s]
 81%|████████  | 97/120 [00:51<00:08,  2.56it/s]
 82%|████████▏ | 98/120 [00:52<00:08,  2.57it/s]
 82%|████████▎ | 99/120 [00:52<00:08,  2.58it/s]
 83%|████████▎ | 100/120 [00:53<00:07,  2.59it/s]
 84%|████████▍ | 10

Trial status: 6 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:04:25. Total running time: 9min 31s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_5e0fc3d5   RUNNING          4.58866e-05                    3    3.62544                       64        2            43.0953       2.66667      0.696562   0.5        1        |
| _objective_6c47dcc5   TERMINATED       3.769

 85%|████████▌ | 102/120 [00:53<00:06,  2.60it/s]
 86%|████████▌ | 103/120 [00:54<00:06,  2.60it/s]
 87%|████████▋ | 104/120 [00:54<00:06,  2.60it/s]
 88%|████████▊ | 105/120 [00:54<00:05,  2.60it/s]
 88%|████████▊ | 106/120 [00:55<00:05,  2.60it/s]
 89%|████████▉ | 107/120 [00:55<00:05,  2.59it/s]
 90%|█████████ | 108/120 [00:56<00:04,  2.59it/s]
 91%|█████████ | 109/120 [00:56<00:04,  2.59it/s]
 92%|█████████▏| 110/120 [00:56<00:03,  2.60it/s]
 92%|█████████▎| 111/120 [00:57<00:03,  2.60it/s]
 93%|█████████▎| 112/120 [00:57<00:03,  2.60it/s]
 94%|█████████▍| 113/120 [00:58<00:02,  2.60it/s]
 95%|█████████▌| 114/120 [00:58<00:02,  2.60it/s]
 96%|█████████▌| 115/120 [00:58<00:01,  2.59it/s]
 97%|█████████▋| 116/120 [00:59<00:01,  2.58it/s]
 98%|█████████▊| 117/120 [00:59<00:01,  2.57it/s]
 98%|█████████▊| 118/120 [00:59<00:00,  2.57it/s]
 99%|█████████▉| 119/120 [01:00<00:00,  2.57it/s]


[2m[36m(_objective pid=5997)[0m {'loss': 0.6958, 'learning_rate': 2.1824770053233406e-07, 'epoch': 3.0}


[2m[36m(_objective pid=5997)[0m 100%|██████████| 120/120 [01:00<00:00,  3.05it/s]                                                 100%|██████████| 120/120 [01:00<00:00,  3.05it/s]
[2m[36m(_objective pid=5997)[0m   0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=5997)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 34.44it/s][A
[2m[36m(_objective pid=5997)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 30.03it/s][A
[2m[36m(_objective pid=5997)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 28.80it/s][A
[2m[36m(_objective pid=5997)[0m 
 83%|████████▎ | 15/18 [00:00<00:00, 28.24it/s][A
[2m[36m(_objective pid=5997)[0m 
                                                 
100%|██████████| 120/120 [01:01<00:00,  3.05it/s]
100%|██████████| 18/18 [00:00<00:00, 28.21it/s][A
                                               [A


Trial _objective_5e0fc3d5 finished iteration 3 at 2023-09-25 13:04:32. Total running time: 9min 38s
+----------------------------------------------+
| Trial _objective_5e0fc3d5 result             |
+----------------------------------------------+
| time_this_iter_s                     21.0846 |
| time_total_s                           64.18 |
| training_iteration                         3 |
| epoch                                      3 |
| eval_accuracy                        0.51246 |
| eval_f1                              0.51246 |
| eval_loss                            0.69307 |
| eval_p                               0.51246 |
| eval_r                               0.51246 |
| eval_roc_auc                         0.51246 |
| eval_runtime                          0.6828 |
| eval_samples_per_second              411.543 |
| eval_steps_per_second                 26.362 |
| objective                            2.56228 |
+----------------------------------------------+

[2m[36m(_object

[2m[36m(_objective pid=5997)[0m                                                  100%|██████████| 120/120 [01:15<00:00,  3.05it/s]100%|██████████| 120/120 [01:15<00:00,  1.59it/s]


Trial status: 7 TERMINATED | 1 PENDING
Current time: 2023-09-25 13:04:55. Total running time: 10min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6c47dcc5   TERMINATED       3.7694e-06                     3   17.2137                        64        3            63.1216       2.56228      0.706246   0.512456   0.512456 |
| _objective_c691cc98   TERMINATED       3.04275e-05      



Trial _objective_3f365154 started with configuration:
+----------------------------------------------+
| Trial _objective_3f365154 config             |
+----------------------------------------------+
| learning_rate                              0 |
| num_train_epochs                           3 |
| per_device_train_batch_size               32 |
| seed                                 7.97396 |
+----------------------------------------------+



[2m[36m(_objective pid=6384)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
[2m[36m(_objective pid=6384)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/237 [00:00<?, ?it/s]
  0%|          | 1/237 [00:00<01:34,  2.49it/s]
  1%|          | 2/237 [00:00<01:06,  3.55it/s]
  1%|▏         | 3/237 [00:00<00:57,  4.05it/s]
  2%|▏         | 4/237 [00:01<00:53,  4.35it/s]
  2%|▏         | 5/237 [00:01<00:51,  4.53it/s]
  3%|▎         | 6/237 [00:01<00:49,  4.64it/s]
  3%|▎         | 7/237 [00:01<00:48,  4.72it/s]
  3%|▎         | 8/237 [00:01<00:48,  4.77it/s]
  4%|▍         | 9/237 [00:02<00:47,  4.80it/s]
  4%|▍         | 10/237 [00:02<00:47,  4.82it/s]
  5%|▍         | 11/237 [00:02<00:46,  4.83it/s]
  5%|▌         | 12/237 [00:02<00:46,  4.85it/s]
  5%|▌         | 1

[2m[36m(_objective pid=6384)[0m {'loss': 0.7041, 'learning_rate': 4.482798826794853e-09, 'epoch': 1.0}


[2m[36m(_objective pid=6384)[0m 
[2m[36m(_objective pid=6384)[0m   0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=6384)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 38.25it/s][A
[2m[36m(_objective pid=6384)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 31.89it/s][A
[2m[36m(_objective pid=6384)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 30.28it/s][A


Trial _objective_3f365154 finished iteration 1 at 2023-09-25 13:05:18. Total running time: 10min 24s
+----------------------------------------------+
| Trial _objective_3f365154 result             |
+----------------------------------------------+
| time_this_iter_s                     20.6281 |
| time_total_s                         20.6281 |
| training_iteration                         1 |
| epoch                                      1 |
| eval_accuracy                        0.48754 |
| eval_f1                              0.48754 |
| eval_loss                            0.70276 |
| eval_p                               0.48754 |
| eval_r                               0.48754 |
| eval_roc_auc                         0.48754 |
| eval_runtime                          0.6365 |
| eval_samples_per_second              441.444 |
| eval_steps_per_second                 28.278 |
| objective                            2.43772 |
+----------------------------------------------+

Trial _objective

[2m[36m(_objective pid=6384)[0m 
[2m[36m(_objective pid=6384)[0m  89%|████████▉ | 16/18 [00:00<00:00, 29.46it/s][A                                                
[2m[36m(_objective pid=6384)[0m                                                [A 33%|███▎      | 79/237 [00:17<00:31,  5.04it/s]
[2m[36m(_objective pid=6384)[0m 100%|██████████| 18/18 [00:00<00:00, 29.46it/s][A
[2m[36m(_objective pid=6384)[0m                                                [A 33%|███▎      | 79/237 [00:17<00:34,  4.54it/s]


Trial status: 8 TERMINATED | 1 PENDING
Current time: 2023-09-25 13:05:25. Total running time: 10min 31s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6c47dcc5   TERMINATED       3.7694e-06                     3   17.2137                        64        3            63.1216       2.56228      0.706246   0.512456   0.512456 |
| _objective_c691cc98   TERMINATED       3.04275e-05     

[2m[36m(_objective pid=6543)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
[2m[36m(_objective pid=6543)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/158 [00:00<?, ?it/s]
  1%|          | 1/158 [00:00<01:18,  2.01it/s]
  1%|▏         | 2/158 [00:00<00:51,  3.02it/s]
  2%|▏         | 3/158 [00:00<00:42,  3.62it/s]
  3%|▎         | 4/158 [00:01<00:38,  4.00it/s]
  3%|▎         | 5/158 [00:01<00:36,  4.23it/s]
  4%|▍         | 6/158 [00:01<00:34,  4.38it/s]
  4%|▍         | 7/158 [00:01<00:33,  4.47it/s]
  5%|▌         | 8/158 [00:01<00:33,  4.52it/s]
  6%|▌         | 9/158 [00:02<00:32,  4.56it/s]
  6%|▋         | 10/158 [00:02<00:32,  4.61it/s]
  7%|▋         | 11/158 [00:02<00:31,  4.64it/s]
  8%|▊         | 12/158 [00:02<00:31,  4.66it/s]
  8%|▊         | 1

[2m[36m(_objective pid=6543)[0m {'loss': 0.6937, 'learning_rate': 3.33712706715081e-09, 'epoch': 1.0}


[2m[36m(_objective pid=6543)[0m  50%|█████     | 79/158 [00:16<00:16,  4.84it/s]                                                 50%|█████     | 79/158 [00:16<00:16,  4.84it/s]
[2m[36m(_objective pid=6543)[0m 
  0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=6543)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 36.53it/s][A
[2m[36m(_objective pid=6543)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 29.37it/s][A
[2m[36m(_objective pid=6543)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 28.12it/s][A
[2m[36m(_objective pid=6543)[0m 
 83%|████████▎ | 15/18 [00:00<00:00, 27.55it/s][A


Trial _objective_109ce93b finished iteration 1 at 2023-09-25 13:05:47. Total running time: 10min 53s
+----------------------------------------------+
| Trial _objective_109ce93b result             |
+----------------------------------------------+
| time_this_iter_s                     21.6293 |
| time_total_s                         21.6293 |
| training_iteration                         1 |
| epoch                                      1 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                            0.68971 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                          0.7171 |
| eval_samples_per_second              391.838 |
| eval_steps_per_second                   25.1 |
| objective                            2.66667 |
+----------------------------------------------+

[2m[36m(_objec

[2m[36m(_objective pid=6543)[0m 
[2m[36m(_objective pid=6543)[0m 100%|██████████| 18/18 [00:00<00:00, 27.42it/s][A                                                
[2m[36m(_objective pid=6543)[0m                                                [A 50%|█████     | 79/158 [00:17<00:16,  4.84it/s]
[2m[36m(_objective pid=6543)[0m 100%|██████████| 18/18 [00:00<00:00, 27.42it/s][A
[2m[36m(_objective pid=6543)[0m                                                [A


Trial status: 8 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:05:55. Total running time: 11min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_109ce93b   RUNNING          1.06577e-06                    2    9.47904                       32        1            21.6293       2.66667      0.689707   0.5        1        |
| _objective_6c47dcc5   TERMINATED       3.769

[2m[36m(_objective pid=6543)[0m  51%|█████     | 80/158 [00:26<03:52,  2.99s/it]
 51%|█████▏    | 81/158 [00:26<02:45,  2.15s/it]
 52%|█████▏    | 82/158 [00:26<01:59,  1.57s/it]
 53%|█████▎    | 83/158 [00:26<01:26,  1.16s/it]
 53%|█████▎    | 84/158 [00:27<01:04,  1.14it/s]
 54%|█████▍    | 85/158 [00:27<00:49,  1.48it/s]
 54%|█████▍    | 86/158 [00:27<00:38,  1.87it/s]
 55%|█████▌    | 87/158 [00:27<00:30,  2.29it/s]
 56%|█████▌    | 88/158 [00:27<00:25,  2.73it/s]
 56%|█████▋    | 89/158 [00:28<00:21,  3.14it/s]
 57%|█████▋    | 90/158 [00:28<00:19,  3.52it/s]
 58%|█████▊    | 91/158 [00:28<00:17,  3.83it/s]
 58%|█████▊    | 92/158 [00:28<00:16,  4.09it/s]
 59%|█████▉    | 93/158 [00:28<00:15,  4.28it/s]
 59%|█████▉    | 94/158 [00:29<00:14,  4.44it/s]
 60%|██████    | 95/158 [00:29<00:13,  4.56it/s]
 61%|██████    | 96/158 [00:29<00:13,  4.63it/s]
 61%|██████▏   | 97/158 [00:29<00:13,  4.69it/s]
 62%|██████▏   | 98/158 [00:29<00:12,  4.74it/s]
 63%|██████▎   | 99/158 [00:30<00

[2m[36m(_objective pid=6543)[0m {'loss': 0.6925, 'learning_rate': 6.67425413430162e-09, 'epoch': 2.0}


[2m[36m(_objective pid=6543)[0m 100%|██████████| 158/158 [00:42<00:00,  4.95it/s]                                                 100%|██████████| 158/158 [00:42<00:00,  4.95it/s]
[2m[36m(_objective pid=6543)[0m   0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=6543)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 38.80it/s][A
[2m[36m(_objective pid=6543)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 32.29it/s][A
[2m[36m(_objective pid=6543)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 30.48it/s][A
[2m[36m(_objective pid=6543)[0m 
 89%|████████▉ | 16/18 [00:00<00:00, 29.23it/s][A


Trial _objective_109ce93b finished iteration 2 at 2023-09-25 13:06:13. Total running time: 11min 19s
+----------------------------------------------+
| Trial _objective_109ce93b result             |
+----------------------------------------------+
| time_this_iter_s                     25.7753 |
| time_total_s                         47.4046 |
| training_iteration                         2 |
| epoch                                      2 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                            0.68951 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                          0.6332 |
| eval_samples_per_second              443.786 |
| eval_steps_per_second                 28.428 |
| objective                            2.66667 |
+----------------------------------------------+

[2m[36m(_objec

[2m[36m(_objective pid=6543)[0m                                                  
[2m[36m(_objective pid=6543)[0m                                                [A100%|██████████| 158/158 [00:43<00:00,  4.95it/s]
[2m[36m(_objective pid=6543)[0m 100%|██████████| 18/18 [00:00<00:00, 29.23it/s][A
[2m[36m(_objective pid=6543)[0m                                                [A


Trial _objective_109ce93b completed after 2 iterations at 2023-09-25 13:06:22. Total running time: 11min 28s

[2m[36m(_objective pid=6543)[0m {'train_runtime': 52.7899, 'train_samples_per_second': 95.587, 'train_steps_per_second': 2.993, 'train_loss': 0.693118566199194, 'epoch': 2.0}


[2m[36m(_objective pid=6543)[0m                                                  100%|██████████| 158/158 [00:52<00:00,  4.95it/s]100%|██████████| 158/158 [00:52<00:00,  2.99it/s]


Trial status: 9 TERMINATED | 1 PENDING
Current time: 2023-09-25 13:06:25. Total running time: 11min 31s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6c47dcc5   TERMINATED       3.7694e-06                     3   17.2137                        64        3            63.1216       2.56228      0.706246   0.512456   0.512456 |
| _objective_c691cc98   TERMINATED       3.04275e-05     



Trial _objective_7b79c12e started with configuration:
+----------------------------------------------+
| Trial _objective_7b79c12e config             |
+----------------------------------------------+
| learning_rate                              0 |
| num_train_epochs                           4 |
| per_device_train_batch_size                4 |
| seed                                 17.4711 |
+----------------------------------------------+



[2m[36m(_objective pid=6846)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
[2m[36m(_objective pid=6846)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/2524 [00:00<?, ?it/s]
  0%|          | 1/2524 [00:00<10:17,  4.08it/s]
  0%|          | 3/2524 [00:00<04:32,  9.24it/s]
  0%|          | 5/2524 [00:00<03:32, 11.88it/s]
  0%|          | 7/2524 [00:00<03:13, 12.98it/s]
  0%|          | 9/2524 [00:00<03:09, 13.28it/s]
  0%|          | 11/2524 [00:00<02:58, 14.09it/s]
  1%|          | 13/2524 [00:01<02:47, 15.01it/s]
  1%|          | 15/2524 [00:01<02:43, 15.34it/s]
  1%|          | 17/2524 [00:01<02:39, 15.75it/s]
  1%|          | 19/2524 [00:01<02:37, 15.90it/s]
  1%|          | 21/2524 [00:01<02:34, 16.20it/s]
  1%|          | 23/2524 [00:01<02:41, 15.47it/s]
 

Trial status: 9 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:06:55. Total running time: 12min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_7b79c12e   RUNNING          1.45603e-06                    4   17.4711                         4                                                                               |
| _objective_6c47dcc5   TERMINATED       3.769

[2m[36m(_objective pid=6846)[0m  10%|█         | 261/2524 [00:18<02:21, 16.05it/s]
 10%|█         | 263/2524 [00:18<02:18, 16.31it/s]
 10%|█         | 265/2524 [00:19<02:16, 16.52it/s]
 11%|█         | 267/2524 [00:19<02:18, 16.25it/s]
 11%|█         | 269/2524 [00:19<02:19, 16.14it/s]
 11%|█         | 271/2524 [00:19<02:15, 16.63it/s]
 11%|█         | 273/2524 [00:19<02:18, 16.30it/s]
 11%|█         | 275/2524 [00:19<02:18, 16.25it/s]
 11%|█         | 277/2524 [00:19<02:16, 16.43it/s]
 11%|█         | 279/2524 [00:19<02:16, 16.48it/s]
 11%|█         | 281/2524 [00:19<02:15, 16.59it/s]
 11%|█         | 283/2524 [00:20<02:20, 15.90it/s]
 11%|█▏        | 285/2524 [00:20<02:21, 15.83it/s]
 11%|█▏        | 287/2524 [00:20<02:19, 16.04it/s]
 11%|█▏        | 289/2524 [00:20<02:22, 15.65it/s]
 12%|█▏        | 291/2524 [00:20<02:21, 15.74it/s]
 12%|█▏        | 293/2524 [00:20<02:20, 15.89it/s]
 12%|█▏        | 295/2524 [00:20<02:21, 15.74it/s]
 12%|█▏        | 297/2524 [00:21<02:20, 15.86i

[2m[36m(_objective pid=6846)[0m {'loss': 0.7042, 'learning_rate': 3.6415253220459135e-08, 'epoch': 1.0}


[2m[36m(_objective pid=6846)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 36.65it/s][A
[2m[36m(_objective pid=6846)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 29.38it/s][A
[2m[36m(_objective pid=6846)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 27.06it/s][A
[2m[36m(_objective pid=6846)[0m 
 83%|████████▎ | 15/18 [00:00<00:00, 27.19it/s][A
[2m[36m(_objective pid=6846)[0m 
100%|██████████| 18/18 [00:00<00:00, 26.65it/s][A


Trial _objective_7b79c12e finished iteration 1 at 2023-09-25 13:07:22. Total running time: 12min 28s
+----------------------------------------------+
| Trial _objective_7b79c12e result             |
+----------------------------------------------+
| time_this_iter_s                     48.4659 |
| time_total_s                         48.4659 |
| training_iteration                         1 |
| epoch                                      1 |
| eval_accuracy                        0.51246 |
| eval_f1                              0.51246 |
| eval_loss                            0.70454 |
| eval_p                               0.51246 |
| eval_r                               0.51246 |
| eval_roc_auc                         0.51246 |
| eval_runtime                          0.7156 |
| eval_samples_per_second               392.69 |
| eval_steps_per_second                 25.155 |
| objective                            2.56228 |
+----------------------------------------------+

Trial _objective

[2m[36m(_objective pid=6846)[0m                                                   
[2m[36m(_objective pid=6846)[0m                                                [A 25%|██▌       | 631/2524 [00:45<02:33, 12.32it/s]
[2m[36m(_objective pid=6846)[0m 100%|██████████| 18/18 [00:00<00:00, 26.65it/s][A
[2m[36m(_objective pid=6846)[0m                                                [A 25%|██▌       | 631/2524 [00:45<02:16, 13.87it/s]


Trial status: 10 TERMINATED | 1 PENDING
Current time: 2023-09-25 13:07:25. Total running time: 12min 31s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6c47dcc5   TERMINATED       3.7694e-06                     3   17.2137                        64        3            63.1216       2.56228      0.706246   0.512456   0.512456 |
| _objective_c691cc98   TERMINATED       3.04275e-05    



Trial _objective_afbe033b started with configuration:
+----------------------------------------------+
| Trial _objective_afbe033b config             |
+----------------------------------------------+
| learning_rate                          4e-05 |
| num_train_epochs                           1 |
| per_device_train_batch_size               32 |
| seed                                 35.1967 |
+----------------------------------------------+



[2m[36m(_objective pid=7128)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
[2m[36m(_objective pid=7128)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/79 [00:00<?, ?it/s]
  1%|▏         | 1/79 [00:00<00:33,  2.36it/s]
  3%|▎         | 2/79 [00:00<00:22,  3.42it/s]
  4%|▍         | 3/79 [00:00<00:19,  3.94it/s]
  5%|▌         | 4/79 [00:01<00:17,  4.28it/s]
  6%|▋         | 5/79 [00:01<00:16,  4.48it/s]
  8%|▊         | 6/79 [00:01<00:15,  4.61it/s]
  9%|▉         | 7/79 [00:01<00:15,  4.70it/s]
 10%|█         | 8/79 [00:01<00:14,  4.75it/s]
 11%|█▏        | 9/79 [00:02<00:14,  4.80it/s]
 13%|█▎        | 10/79 [00:02<00:14,  4.82it/s]
 14%|█▍        | 11/79 [00:02<00:14,  4.83it/s]
 15%|█▌        | 12/79 [00:02<00:13,  4.79it/s]
 16%|█▋        | 13/79 [00:02<0

[2m[36m(_objective pid=7128)[0m {'loss': 0.7089, 'learning_rate': 1.320983322809784e-07, 'epoch': 1.0}


[2m[36m(_objective pid=7128)[0m 100%|██████████| 79/79 [00:16<00:00,  5.05it/s]                                               100%|██████████| 79/79 [00:16<00:00,  5.05it/s]
[2m[36m(_objective pid=7128)[0m 
[2m[36m(_objective pid=7128)[0m   0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=7128)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 38.88it/s][A
[2m[36m(_objective pid=7128)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 30.57it/s][A
[2m[36m(_objective pid=7128)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 29.76it/s][A


Trial _objective_afbe033b finished iteration 1 at 2023-09-25 13:07:51. Total running time: 12min 57s
+----------------------------------------------+
| Trial _objective_afbe033b result             |
+----------------------------------------------+
| time_this_iter_s                     20.1526 |
| time_total_s                         20.1526 |
| training_iteration                         1 |
| epoch                                      1 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                            0.70287 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                            0.64 |
| eval_samples_per_second              439.075 |
| eval_steps_per_second                 28.126 |
| objective                            2.66667 |
+----------------------------------------------+

[2m[36m(_objec

[2m[36m(_objective pid=7128)[0m 
[2m[36m(_objective pid=7128)[0m  89%|████████▉ | 16/18 [00:00<00:00, 29.43it/s][A                                               
[2m[36m(_objective pid=7128)[0m                                                [A100%|██████████| 79/79 [00:17<00:00,  5.05it/s]
[2m[36m(_objective pid=7128)[0m 100%|██████████| 18/18 [00:00<00:00, 29.43it/s][A
[2m[36m(_objective pid=7128)[0m                                                [A


Trial status: 10 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:07:55. Total running time: 13min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_afbe033b   RUNNING          4.21879e-05                    1   35.1967                        32        1            20.1526       2.66667      0.702869   0.5        1        |
| _objective_6c47dcc5   TERMINATED       3.76

[2m[36m(_objective pid=7128)[0m                                                100%|██████████| 79/79 [00:28<00:00,  5.05it/s]100%|██████████| 79/79 [00:28<00:00,  2.77it/s]


Trial _objective_da5ca116 started with configuration:
+----------------------------------------------+
| Trial _objective_da5ca116 config             |
+----------------------------------------------+
| learning_rate                          1e-05 |
| num_train_epochs                           5 |
| per_device_train_batch_size               64 |
| seed                                 12.4663 |
+----------------------------------------------+



[2m[36m(_objective pid=7331)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
[2m[36m(_objective pid=7331)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/200 [00:00<?, ?it/s]
  0%|          | 1/200 [00:00<02:16,  1.46it/s]
  1%|          | 2/200 [00:01<01:40,  1.97it/s]
  2%|▏         | 3/200 [00:01<01:29,  2.21it/s]
  2%|▏         | 4/200 [00:01<01:23,  2.34it/s]
  2%|▎         | 5/200 [00:02<01:20,  2.42it/s]
  3%|▎         | 6/200 [00:02<01:18,  2.48it/s]
  4%|▎         | 7/200 [00:03<01:17,  2.49it/s]
  4%|▍         | 8/200 [00:03<01:16,  2.52it/s]
  4%|▍         | 9/200 [00:03<01:15,  2.52it/s]
  5%|▌         | 10/200 [00:04<01:14,  2.54it/s]
  6%|▌         | 11/200 [00:04<01:14,  2.55it/s]
  6%|▌         | 12/200 [00:04<01:13,  2.55it/s]
  6%|▋         | 1

Trial status: 11 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:08:25. Total running time: 13min 31s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_da5ca116   RUNNING          9.38591e-06                    5   12.4663                        64                                                                               |
| _objective_6c47dcc5   TERMINATED       3.7

 14%|█▍        | 29/200 [00:11<01:05,  2.62it/s]
 15%|█▌        | 30/200 [00:11<01:05,  2.61it/s]
 16%|█▌        | 31/200 [00:12<01:04,  2.61it/s]
 16%|█▌        | 32/200 [00:12<01:04,  2.61it/s]
 16%|█▋        | 33/200 [00:13<01:04,  2.60it/s]
 17%|█▋        | 34/200 [00:13<01:03,  2.61it/s]
 18%|█▊        | 35/200 [00:13<01:03,  2.61it/s]
 18%|█▊        | 36/200 [00:14<01:02,  2.61it/s]
 18%|█▊        | 37/200 [00:14<01:02,  2.61it/s]
 19%|█▉        | 38/200 [00:14<01:02,  2.61it/s]
 20%|█▉        | 39/200 [00:15<01:01,  2.62it/s]
 20%|██        | 40/200 [00:15<00:51,  3.12it/s]
  0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=7331)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 39.13it/s][A


[2m[36m(_objective pid=7331)[0m {'loss': 0.7372, 'learning_rate': 1.4880557636503635e-08, 'epoch': 1.0}


[2m[36m(_objective pid=7331)[0m 
[2m[36m(_objective pid=7331)[0m  44%|████▍     | 8/18 [00:00<00:00, 32.46it/s][A
[2m[36m(_objective pid=7331)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 30.49it/s][A
[2m[36m(_objective pid=7331)[0m 
 89%|████████▉ | 16/18 [00:00<00:00, 29.95it/s][A
                                                
 20%|██        | 40/200 [00:16<00:51,  3.12it/s]
100%|██████████| 18/18 [00:00<00:00, 29.95it/s][A
 20%|██        | 40/200 [00:16<01:04,  2.48it/s]


Trial _objective_da5ca116 finished iteration 1 at 2023-09-25 13:08:30. Total running time: 13min 36s
+----------------------------------------------+
| Trial _objective_da5ca116 result             |
+----------------------------------------------+
| time_this_iter_s                      19.782 |
| time_total_s                          19.782 |
| training_iteration                         1 |
| epoch                                      1 |
| eval_accuracy                        0.51246 |
| eval_f1                              0.51246 |
| eval_loss                            0.74449 |
| eval_p                               0.51246 |
| eval_r                               0.51246 |
| eval_roc_auc                         0.51246 |
| eval_runtime                          0.6328 |
| eval_samples_per_second              444.049 |
| eval_steps_per_second                 28.444 |
| objective                            2.56228 |
+----------------------------------------------+

Trial _objective



Trial _objective_1771b99c started with configuration:
+----------------------------------------------+
| Trial _objective_1771b99c config             |
+----------------------------------------------+
| learning_rate                          1e-05 |
| num_train_epochs                           5 |
| per_device_train_batch_size                4 |
| seed                                 33.4594 |
+----------------------------------------------+



[2m[36m(_objective pid=7472)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
[2m[36m(_objective pid=7472)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/3155 [00:00<?, ?it/s]
  0%|          | 1/3155 [00:00<12:30,  4.20it/s]
  0%|          | 3/3155 [00:00<05:28,  9.58it/s]
  0%|          | 5/3155 [00:00<04:17, 12.22it/s]
  0%|          | 7/3155 [00:00<03:48, 13.79it/s]
  0%|          | 9/3155 [00:00<03:41, 14.18it/s]
  0%|          | 11/3155 [00:00<03:29, 15.00it/s]
  0%|          | 13/3155 [00:00<03:24, 15.34it/s]
  0%|          | 15/3155 [00:01<03:18, 15.85it/s]
  1%|          | 17/3155 [00:01<03:17, 15.90it/s]
  1%|          | 19/3155 [00:01<03:16, 15.93it/s]
  1%|          | 21/3155 [00:01<03:12, 16.31it/s]
  1%|          | 23/3155 [00:01<03:15, 16.05it/s]
 

Trial status: 12 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:08:55. Total running time: 14min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_1771b99c   RUNNING          1.06132e-05                    5   33.4594                         4                                                                               |
| _objective_6c47dcc5   TERMINATED       3.76

  6%|▌         | 175/3155 [00:12<04:11, 11.84it/s]
  6%|▌         | 177/3155 [00:12<04:15, 11.65it/s]
  6%|▌         | 179/3155 [00:12<04:19, 11.48it/s]
  6%|▌         | 181/3155 [00:12<04:16, 11.60it/s]
  6%|▌         | 183/3155 [00:13<04:11, 11.82it/s]
  6%|▌         | 185/3155 [00:13<04:07, 11.99it/s]
  6%|▌         | 187/3155 [00:13<04:07, 11.99it/s]
  6%|▌         | 189/3155 [00:13<04:04, 12.11it/s]
  6%|▌         | 191/3155 [00:13<04:08, 11.95it/s]
  6%|▌         | 193/3155 [00:13<04:05, 12.07it/s]
  6%|▌         | 195/3155 [00:14<04:03, 12.14it/s]
  6%|▌         | 197/3155 [00:14<04:01, 12.25it/s]
  6%|▋         | 199/3155 [00:14<03:58, 12.42it/s]
  6%|▋         | 201/3155 [00:14<03:54, 12.58it/s]
  6%|▋         | 203/3155 [00:14<04:08, 11.87it/s]
  6%|▋         | 205/3155 [00:14<03:46, 13.01it/s]
  7%|▋         | 207/3155 [00:15<03:28, 14.12it/s]
  7%|▋         | 209/3155 [00:15<03:24, 14.40it/s]
  7%|▋         | 211/3155 [00:15<03:16, 14.96it/s]
  7%|▋         | 213/3155 [00:1

Trial status: 12 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:09:25. Total running time: 14min 31s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_1771b99c   RUNNING          1.06132e-05                    5   33.4594                         4                                                                               |
| _objective_6c47dcc5   TERMINATED       3.7

[2m[36m(_objective pid=7472)[0m  20%|█▉        | 629/3155 [00:42<02:35, 16.20it/s]
[2m[36m(_objective pid=7472)[0m  20%|██        | 631/3155 [00:42<02:37, 16.03it/s]                                                   20%|██        | 631/3155 [00:42<02:37, 16.03it/s]
[2m[36m(_objective pid=7472)[0m   0%|          | 0/18 [00:00<?, ?it/s][A


[2m[36m(_objective pid=7472)[0m {'loss': 0.6991, 'learning_rate': 2.6543404028922465e-07, 'epoch': 1.0}


[2m[36m(_objective pid=7472)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 39.16it/s][A
[2m[36m(_objective pid=7472)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 32.50it/s][A
[2m[36m(_objective pid=7472)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 30.54it/s][A
[2m[36m(_objective pid=7472)[0m 
 89%|████████▉ | 16/18 [00:00<00:00, 29.90it/s][A
                                                  
 20%|██        | 631/3155 [00:43<02:37, 16.03it/s]
100%|██████████| 18/18 [00:00<00:00, 29.90it/s][A
                                               [A


Trial _objective_1771b99c finished iteration 1 at 2023-09-25 13:09:26. Total running time: 14min 32s
+----------------------------------------------+
| Trial _objective_1771b99c result             |
+----------------------------------------------+
| time_this_iter_s                     46.0388 |
| time_total_s                         46.0388 |
| training_iteration                         1 |
| epoch                                      1 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                             0.6805 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                          0.6227 |
| eval_samples_per_second              451.284 |
| eval_steps_per_second                 28.908 |
| objective                            2.66667 |
+----------------------------------------------+

[2m[36m(_objec

 20%|██        | 631/3155 [00:53<02:37, 16.03it/s]
 20%|██        | 632/3155 [00:56<1:44:04,  2.48s/it]
 20%|██        | 634/3155 [00:56<1:10:04,  1.67s/it]
 20%|██        | 636/3155 [00:56<48:13,  1.15s/it]  
 20%|██        | 638/3155 [00:56<33:46,  1.24it/s]
 20%|██        | 640/3155 [00:56<24:01,  1.74it/s]
 20%|██        | 642/3155 [00:56<17:25,  2.40it/s]
 20%|██        | 644/3155 [00:57<12:51,  3.26it/s]
 20%|██        | 646/3155 [00:57<09:40,  4.33it/s]
 21%|██        | 648/3155 [00:57<07:29,  5.57it/s]
 21%|██        | 650/3155 [00:57<06:02,  6.92it/s]
 21%|██        | 652/3155 [00:57<04:59,  8.35it/s]
 21%|██        | 654/3155 [00:57<04:15,  9.77it/s]
 21%|██        | 656/3155 [00:57<03:44, 11.12it/s]
 21%|██        | 658/3155 [00:57<03:19, 12.50it/s]
 21%|██        | 660/3155 [00:58<03:04, 13.50it/s]
 21%|██        | 662/3155 [00:58<02:52, 14.45it/s]
 21%|██        | 664/3155 [00:58<02:46, 14.97it/s]
 21%|██        | 666/3155 [00:58<02:39, 15.61it/s]
 21%|██        | 668/3155

Trial status: 12 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:09:55. Total running time: 15min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_1771b99c   RUNNING          1.06132e-05                    5   33.4594                         4        1            46.0388       2.66667      0.680501   0.5        1        |
| _objective_6c47dcc5   TERMINATED       3.76

[2m[36m(_objective pid=7472)[0m  27%|██▋       | 864/3155 [01:12<02:16, 16.79it/s]
 27%|██▋       | 866/3155 [01:12<02:16, 16.72it/s]
 28%|██▊       | 868/3155 [01:12<02:14, 17.05it/s]
 28%|██▊       | 870/3155 [01:12<02:15, 16.90it/s]
 28%|██▊       | 872/3155 [01:12<02:16, 16.68it/s]
 28%|██▊       | 874/3155 [01:12<02:14, 16.98it/s]
 28%|██▊       | 876/3155 [01:13<02:13, 17.06it/s]
 28%|██▊       | 878/3155 [01:13<02:14, 16.94it/s]
 28%|██▊       | 880/3155 [01:13<02:15, 16.75it/s]
 28%|██▊       | 882/3155 [01:13<02:14, 16.95it/s]
 28%|██▊       | 884/3155 [01:13<02:13, 17.02it/s]
 28%|██▊       | 886/3155 [01:13<02:17, 16.47it/s]
 28%|██▊       | 888/3155 [01:13<02:18, 16.35it/s]
 28%|██▊       | 890/3155 [01:13<02:17, 16.45it/s]
 28%|██▊       | 892/3155 [01:14<02:15, 16.69it/s]
 28%|██▊       | 894/3155 [01:14<02:18, 16.29it/s]
 28%|██▊       | 896/3155 [01:14<02:16, 16.55it/s]
 28%|██▊       | 898/3155 [01:14<02:15, 16.71it/s]
 29%|██▊       | 900/3155 [01:14<02:11, 17.12i

[2m[36m(_objective pid=7472)[0m {'loss': 0.6611, 'learning_rate': 5.308680805784493e-07, 'epoch': 2.0}


[2m[36m(_objective pid=7472)[0m 
[2m[36m(_objective pid=7472)[0m  22%|██▏       | 4/18 [00:00<00:00, 38.84it/s][A
[2m[36m(_objective pid=7472)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 32.44it/s][A
[2m[36m(_objective pid=7472)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 31.06it/s][A
[2m[36m(_objective pid=7472)[0m 
 89%|████████▉ | 16/18 [00:00<00:00, 30.37it/s][A


Trial _objective_1771b99c finished iteration 2 at 2023-09-25 13:10:22. Total running time: 15min 28s
+----------------------------------------------+
| Trial _objective_1771b99c result             |
+----------------------------------------------+
| time_this_iter_s                     55.7285 |
| time_total_s                         101.767 |
| training_iteration                         2 |
| epoch                                      2 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                            0.59794 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                          0.6303 |
| eval_samples_per_second              445.784 |
| eval_steps_per_second                 28.556 |
| objective                            2.66667 |
+----------------------------------------------+

[2m[36m(_objec

[2m[36m(_objective pid=7472)[0m                                                    
[2m[36m(_objective pid=7472)[0m                                                [A 40%|████      | 1262/3155 [01:38<01:47, 17.61it/s]
[2m[36m(_objective pid=7472)[0m 100%|██████████| 18/18 [00:00<00:00, 30.37it/s][A
[2m[36m(_objective pid=7472)[0m                                                [A


Trial status: 12 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:10:25. Total running time: 15min 31s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_1771b99c   RUNNING          1.06132e-05                    5   33.4594                         4        2           101.767        2.66667      0.597938   0.5        1        |
| _objective_6c47dcc5   TERMINATED       3.7

 40%|████      | 1262/3155 [01:54<01:47, 17.61it/s]
 40%|████      | 1263/3155 [01:54<1:33:20,  2.96s/it]
 40%|████      | 1265/3155 [01:54<1:02:37,  1.99s/it]
 40%|████      | 1267/3155 [01:55<42:56,  1.36s/it]  
 40%|████      | 1269/3155 [01:55<29:55,  1.05it/s]
 40%|████      | 1271/3155 [01:55<21:08,  1.49it/s]
 40%|████      | 1273/3155 [01:55<15:10,  2.07it/s]
 40%|████      | 1275/3155 [01:55<11:04,  2.83it/s]
 40%|████      | 1277/3155 [01:55<08:17,  3.78it/s]
 41%|████      | 1279/3155 [01:55<06:23,  4.89it/s]
 41%|████      | 1281/3155 [01:55<05:00,  6.24it/s]
 41%|████      | 1283/3155 [01:56<04:02,  7.71it/s]
 41%|████      | 1285/3155 [01:56<03:27,  9.01it/s]
 41%|████      | 1287/3155 [01:56<02:58, 10.45it/s]
 41%|████      | 1289/3155 [01:56<02:41, 11.58it/s]
 41%|████      | 1291/3155 [01:56<02:24, 12.87it/s]
 41%|████      | 1293/3155 [01:56<02:26, 12.72it/s]
 41%|████      | 1295/3155 [01:56<02:33, 12.14it/s]
 41%|████      | 1297/3155 [01:57<02:32, 12.20it/s]
 41%|█

Trial status: 12 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:10:55. Total running time: 16min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_1771b99c   RUNNING          1.06132e-05                    5   33.4594                         4        2           101.767        2.66667      0.597938   0.5        1        |
| _objective_6c47dcc5   TERMINATED       3.76

[2m[36m(_objective pid=7472)[0m  48%|████▊     | 1521/3155 [02:12<01:41, 16.08it/s]
 48%|████▊     | 1523/3155 [02:12<01:40, 16.26it/s]
 48%|████▊     | 1525/3155 [02:12<01:39, 16.38it/s]
 48%|████▊     | 1527/3155 [02:12<01:38, 16.50it/s]
 48%|████▊     | 1529/3155 [02:12<01:36, 16.79it/s]
 49%|████▊     | 1531/3155 [02:13<01:39, 16.35it/s]
 49%|████▊     | 1533/3155 [02:13<01:39, 16.33it/s]
 49%|████▊     | 1535/3155 [02:13<01:36, 16.87it/s]
 49%|████▊     | 1537/3155 [02:13<01:34, 17.07it/s]
 49%|████▉     | 1539/3155 [02:13<01:38, 16.36it/s]
 49%|████▉     | 1541/3155 [02:13<01:37, 16.54it/s]
 49%|████▉     | 1543/3155 [02:13<01:36, 16.63it/s]
 49%|████▉     | 1545/3155 [02:13<01:36, 16.72it/s]
 49%|████▉     | 1547/3155 [02:14<01:36, 16.58it/s]
 49%|████▉     | 1549/3155 [02:14<01:40, 16.01it/s]
 49%|████▉     | 1551/3155 [02:14<01:36, 16.62it/s]
 49%|████▉     | 1553/3155 [02:14<01:35, 16.76it/s]
 49%|████▉     | 1555/3155 [02:14<01:41, 15.82it/s]
 49%|████▉     | 1557/3155 [

[2m[36m(_objective pid=7472)[0m {'loss': 0.5754, 'learning_rate': 7.963021208676739e-07, 'epoch': 3.0}


[2m[36m(_objective pid=7472)[0m 
[2m[36m(_objective pid=7472)[0m   0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=7472)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 34.01it/s][A
[2m[36m(_objective pid=7472)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 28.75it/s][A
[2m[36m(_objective pid=7472)[0m 
 61%|██████    | 11/18 [00:00<00:00, 28.07it/s][A
[2m[36m(_objective pid=7472)[0m 
 78%|███████▊  | 14/18 [00:00<00:00, 28.03it/s][A
[2m[36m(_objective pid=7472)[0m 
                                                   
 60%|██████    | 1893/3155 [02:38<01:35, 13.22it/s]
100%|██████████| 18/18 [00:00<00:00, 27.94it/s][A
                                               [A


Trial _objective_1771b99c finished iteration 3 at 2023-09-25 13:11:21. Total running time: 16min 27s
+----------------------------------------------+
| Trial _objective_1771b99c result             |
+----------------------------------------------+
| time_this_iter_s                     59.3773 |
| time_total_s                         161.145 |
| training_iteration                         3 |
| epoch                                      3 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                            0.50141 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                          0.6755 |
| eval_samples_per_second              415.999 |
| eval_steps_per_second                 26.648 |
| objective                            2.66667 |
+----------------------------------------------+

[2m[36m(_objec

[2m[36m(_objective pid=7472)[0m  60%|██████    | 1895/3155 [02:50<40:05,  1.91s/it]
 60%|██████    | 1897/3155 [02:50<28:24,  1.35s/it]
 60%|██████    | 1899/3155 [02:50<20:13,  1.04it/s]
 60%|██████    | 1901/3155 [02:50<14:29,  1.44it/s]
 60%|██████    | 1903/3155 [02:50<10:29,  1.99it/s]
 60%|██████    | 1905/3155 [02:50<07:43,  2.70it/s]
 60%|██████    | 1907/3155 [02:50<05:45,  3.61it/s]
 61%|██████    | 1909/3155 [02:50<04:24,  4.71it/s]
 61%|██████    | 1911/3155 [02:50<03:26,  6.01it/s]
 61%|██████    | 1913/3155 [02:51<02:47,  7.40it/s]
 61%|██████    | 1915/3155 [02:51<03:50,  5.38it/s]
 61%|██████    | 1917/3155 [02:51<03:03,  6.74it/s]
 61%|██████    | 1919/3155 [02:51<02:31,  8.17it/s]
 61%|██████    | 1921/3155 [02:52<02:06,  9.76it/s]
 61%|██████    | 1923/3155 [02:52<01:51, 11.01it/s]
 61%|██████    | 1925/3155 [02:52<01:38, 12.43it/s]
 61%|██████    | 1927/3155 [02:52<01:32, 13.33it/s]
 61%|██████    | 1929/3155 [02:52<01:29, 13.65it/s]
 61%|██████    | 1931/3155 [

Trial status: 12 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:11:55. Total running time: 17min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_1771b99c   RUNNING          1.06132e-05                    5   33.4594                         4        3           161.145        2.66667      0.50141    0.5        1        |
| _objective_6c47dcc5   TERMINATED       3.76

[2m[36m(_objective pid=7472)[0m  71%|███████   | 2227/3155 [03:12<00:54, 17.13it/s]
 71%|███████   | 2229/3155 [03:12<00:54, 16.86it/s]
 71%|███████   | 2231/3155 [03:12<01:01, 14.95it/s]
 71%|███████   | 2233/3155 [03:13<01:05, 14.14it/s]
 71%|███████   | 2235/3155 [03:13<01:06, 13.75it/s]
 71%|███████   | 2237/3155 [03:13<01:05, 14.00it/s]
 71%|███████   | 2239/3155 [03:13<01:05, 14.00it/s]
 71%|███████   | 2241/3155 [03:13<01:05, 13.91it/s]
 71%|███████   | 2243/3155 [03:13<01:08, 13.24it/s]
 71%|███████   | 2245/3155 [03:13<01:09, 13.05it/s]
 71%|███████   | 2247/3155 [03:14<01:13, 12.42it/s]
 71%|███████▏  | 2249/3155 [03:14<01:09, 12.99it/s]
 71%|███████▏  | 2251/3155 [03:14<01:11, 12.56it/s]
 71%|███████▏  | 2253/3155 [03:14<01:11, 12.59it/s]
 71%|███████▏  | 2255/3155 [03:14<01:10, 12.81it/s]
 72%|███████▏  | 2257/3155 [03:14<01:08, 13.14it/s]
 72%|███████▏  | 2259/3155 [03:15<01:11, 12.48it/s]
 72%|███████▏  | 2261/3155 [03:15<01:07, 13.16it/s]
 72%|███████▏  | 2263/3155 [

[2m[36m(_objective pid=7472)[0m {'loss': 0.4455, 'learning_rate': 1.0617361611568986e-06, 'epoch': 4.0}


[2m[36m(_objective pid=7472)[0m 
[2m[36m(_objective pid=7472)[0m  22%|██▏       | 4/18 [00:00<00:00, 37.42it/s][A
[2m[36m(_objective pid=7472)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 30.66it/s][A
[2m[36m(_objective pid=7472)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 26.59it/s][A
[2m[36m(_objective pid=7472)[0m 
 83%|████████▎ | 15/18 [00:00<00:00, 26.89it/s][A


Trial _objective_1771b99c finished iteration 4 at 2023-09-25 13:12:16. Total running time: 17min 22s
+----------------------------------------------+
| Trial _objective_1771b99c result             |
+----------------------------------------------+
| time_this_iter_s                      55.095 |
| time_total_s                          216.24 |
| training_iteration                         4 |
| epoch                                      4 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                             0.4203 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                             0.7 |
| eval_samples_per_second              401.423 |
| eval_steps_per_second                 25.714 |
| objective                            2.66667 |
+----------------------------------------------+

[2m[36m(_objec

[2m[36m(_objective pid=7472)[0m                                                    
[2m[36m(_objective pid=7472)[0m                                                [A 80%|████████  | 2524/3155 [03:33<00:47, 13.15it/s]
[2m[36m(_objective pid=7472)[0m 100%|██████████| 18/18 [00:00<00:00, 26.89it/s][A
                                               [A
 80%|████████  | 2525/3155 [03:38<10:14,  1.03it/s]
 80%|████████  | 2527/3155 [03:38<07:25,  1.41it/s]
 80%|████████  | 2529/3155 [03:39<05:26,  1.91it/s]
 80%|████████  | 2531/3155 [03:39<04:04,  2.56it/s]
 80%|████████  | 2533/3155 [03:39<03:07,  3.31it/s]
 80%|████████  | 2535/3155 [03:39<02:26,  4.23it/s]
 80%|████████  | 2537/3155 [03:39<01:57,  5.27it/s]
 80%|████████  | 2539/3155 [03:40<01:37,  6.31it/s]
 81%|████████  | 2541/3155 [03:40<01:22,  7.45it/s]
 81%|████████  | 2543/3155 [03:40<01:11,  8.59it/s]
 81%|████████  | 2545/3155 [03:40<01:03,  9.64it/s]
 81%|████████  | 2547/3155 [03:40<00:59, 10.15it/s]
 81%|█████

Trial status: 12 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:12:26. Total running time: 17min 32s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_1771b99c   RUNNING          1.06132e-05                    5   33.4594                         4        4           216.24         2.66667      0.420299   0.5        1        |
| _objective_6c47dcc5   TERMINATED       3.7

[2m[36m(_objective pid=7472)[0m  82%|████████▏ | 2581/3155 [03:42<00:33, 17.09it/s]
 82%|████████▏ | 2583/3155 [03:42<00:34, 16.80it/s]
 82%|████████▏ | 2585/3155 [03:43<00:33, 17.20it/s]
 82%|████████▏ | 2587/3155 [03:43<00:34, 16.54it/s]
 82%|████████▏ | 2589/3155 [03:43<00:34, 16.46it/s]
 82%|████████▏ | 2591/3155 [03:43<00:33, 16.63it/s]
 82%|████████▏ | 2593/3155 [03:43<00:34, 16.50it/s]
 82%|████████▏ | 2595/3155 [03:43<00:33, 16.95it/s]
 82%|████████▏ | 2597/3155 [03:43<00:33, 16.79it/s]
 82%|████████▏ | 2599/3155 [03:43<00:32, 16.96it/s]
 82%|████████▏ | 2601/3155 [03:44<00:33, 16.69it/s]
 83%|████████▎ | 2603/3155 [03:44<00:34, 16.05it/s]
 83%|████████▎ | 2605/3155 [03:44<00:33, 16.37it/s]
 83%|████████▎ | 2607/3155 [03:44<00:33, 16.60it/s]
 83%|████████▎ | 2609/3155 [03:44<00:32, 16.72it/s]
 83%|████████▎ | 2611/3155 [03:44<00:31, 17.07it/s]
 83%|████████▎ | 2613/3155 [03:44<00:32, 16.92it/s]
 83%|████████▎ | 2615/3155 [03:44<00:31, 16.90it/s]
 83%|████████▎ | 2617/3155 [

Trial status: 12 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:12:56. Total running time: 18min 2s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_1771b99c   RUNNING          1.06132e-05                    5   33.4594                         4        4           216.24         2.66667      0.420299   0.5        1        |
| _objective_6c47dcc5   TERMINATED       3.76

[2m[36m(_objective pid=7472)[0m  96%|█████████▋| 3041/3155 [04:12<00:08, 13.28it/s]
 96%|█████████▋| 3043/3155 [04:12<00:08, 13.64it/s]
 97%|█████████▋| 3045/3155 [04:13<00:08, 13.07it/s]
 97%|█████████▋| 3047/3155 [04:13<00:07, 13.52it/s]
 97%|█████████▋| 3049/3155 [04:13<00:07, 13.55it/s]
 97%|█████████▋| 3051/3155 [04:13<00:07, 13.61it/s]
 97%|█████████▋| 3053/3155 [04:13<00:07, 13.76it/s]
 97%|█████████▋| 3055/3155 [04:13<00:07, 13.90it/s]
 97%|█████████▋| 3057/3155 [04:13<00:07, 13.83it/s]
 97%|█████████▋| 3059/3155 [04:14<00:07, 13.20it/s]
 97%|█████████▋| 3061/3155 [04:14<00:07, 13.03it/s]
 97%|█████████▋| 3063/3155 [04:14<00:07, 13.13it/s]
 97%|█████████▋| 3065/3155 [04:14<00:06, 13.62it/s]
 97%|█████████▋| 3067/3155 [04:14<00:06, 13.61it/s]
 97%|█████████▋| 3069/3155 [04:14<00:06, 13.35it/s]
 97%|█████████▋| 3071/3155 [04:15<00:06, 13.02it/s]
 97%|█████████▋| 3073/3155 [04:15<00:06, 12.92it/s]
 97%|█████████▋| 3075/3155 [04:15<00:06, 12.82it/s]
 98%|█████████▊| 3077/3155 [

[2m[36m(_objective pid=7472)[0m {'loss': 0.3342, 'learning_rate': 1.3271702014461233e-06, 'epoch': 5.0}


[2m[36m(_objective pid=7472)[0m 
[2m[36m(_objective pid=7472)[0m  44%|████▍     | 8/18 [00:00<00:00, 32.28it/s][A
[2m[36m(_objective pid=7472)[0m 
[2m[36m(_objective pid=7472)[0m  67%|██████▋   | 12/18 [00:00<00:00, 30.65it/s][A


Trial _objective_1771b99c finished iteration 5 at 2023-09-25 13:13:05. Total running time: 18min 11s
+----------------------------------------------+
| Trial _objective_1771b99c result             |
+----------------------------------------------+
| time_this_iter_s                     48.4141 |
| time_total_s                         264.654 |
| training_iteration                         5 |
| epoch                                      5 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                            0.38691 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                          0.6207 |
| eval_samples_per_second              452.699 |
| eval_steps_per_second                 28.999 |
| objective                            2.66667 |
+----------------------------------------------+

[2m[36m(_objec

[2m[36m(_objective pid=7472)[0m 
[2m[36m(_objective pid=7472)[0m  89%|████████▉ | 16/18 [00:00<00:00, 29.89it/s][A                                                   
[2m[36m(_objective pid=7472)[0m                                                [A100%|██████████| 3155/3155 [04:21<00:00, 16.91it/s]
[2m[36m(_objective pid=7472)[0m 100%|██████████| 18/18 [00:00<00:00, 29.89it/s][A
[2m[36m(_objective pid=7472)[0m                                                [A


Trial _objective_1771b99c completed after 5 iterations at 2023-09-25 13:13:10. Total running time: 18min 16s

[2m[36m(_objective pid=7472)[0m {'train_runtime': 267.3159, 'train_samples_per_second': 47.191, 'train_steps_per_second': 11.803, 'train_loss': 0.54303868893776, 'epoch': 5.0}


[2m[36m(_objective pid=7472)[0m                                                    100%|██████████| 3155/3155 [04:27<00:00, 16.91it/s]100%|██████████| 3155/3155 [04:27<00:00, 11.80it/s]


Trial _objective_e998da26 started with configuration:
+----------------------------------------------+
| Trial _objective_e998da26 config             |
+----------------------------------------------+
| learning_rate                          2e-05 |
| num_train_epochs                           2 |
| per_device_train_batch_size               64 |
| seed                                 32.8344 |
+----------------------------------------------+



[2m[36m(_objective pid=8675)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
[2m[36m(_objective pid=8675)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/80 [00:00<?, ?it/s]
  1%|▏         | 1/80 [00:00<00:45,  1.75it/s]
  2%|▎         | 2/80 [00:00<00:35,  2.20it/s]
  4%|▍         | 3/80 [00:01<00:32,  2.37it/s]


Trial status: 13 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:13:26. Total running time: 18min 32s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_e998da26   RUNNING          1.66486e-05                    2   32.8344                        64                                                                               |
| _objective_6c47dcc5   TERMINATED       3.7

  5%|▌         | 4/80 [00:01<00:30,  2.47it/s]
  6%|▋         | 5/80 [00:02<00:29,  2.52it/s]
  8%|▊         | 6/80 [00:02<00:28,  2.55it/s]
  9%|▉         | 7/80 [00:02<00:28,  2.58it/s]
 10%|█         | 8/80 [00:03<00:27,  2.59it/s]
 11%|█▏        | 9/80 [00:03<00:27,  2.60it/s]
 12%|█▎        | 10/80 [00:03<00:26,  2.61it/s]
 14%|█▍        | 11/80 [00:04<00:26,  2.61it/s]
 15%|█▌        | 12/80 [00:04<00:26,  2.61it/s]
 16%|█▋        | 13/80 [00:05<00:25,  2.61it/s]
 18%|█▊        | 14/80 [00:05<00:25,  2.61it/s]
 19%|█▉        | 15/80 [00:05<00:24,  2.60it/s]
 20%|██        | 16/80 [00:06<00:24,  2.59it/s]
 21%|██▏       | 17/80 [00:06<00:24,  2.59it/s]
 22%|██▎       | 18/80 [00:07<00:23,  2.59it/s]
 24%|██▍       | 19/80 [00:07<00:23,  2.58it/s]
 25%|██▌       | 20/80 [00:07<00:23,  2.58it/s]
 26%|██▋       | 21/80 [00:08<00:22,  2.58it/s]
 28%|██▊       | 22/80 [00:08<00:22,  2.57it/s]
 29%|██▉       | 23/80 [00:09<00:22,  2.57it/s]
 30%|███       | 24/80 [00:09<00:21,  2.56it/s

[2m[36m(_objective pid=8675)[0m {'loss': 0.6977, 'learning_rate': 2.6394934657798e-08, 'epoch': 1.0}


[2m[36m(_objective pid=8675)[0m  50%|█████     | 40/80 [00:15<00:12,  3.10it/s]                                                50%|█████     | 40/80 [00:15<00:12,  3.10it/s]
[2m[36m(_objective pid=8675)[0m   0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=8675)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 39.23it/s][A
[2m[36m(_objective pid=8675)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 32.92it/s][A
[2m[36m(_objective pid=8675)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 31.19it/s][A
[2m[36m(_objective pid=8675)[0m 
 89%|████████▉ | 16/18 [00:00<00:00, 30.44it/s][A
                                               
 50%|█████     | 40/80 [00:16<00:12,  3.10it/s]
100%|██████████| 18/18 [00:00<00:00, 30.44it/s][A
                                               [A


Trial _objective_e998da26 finished iteration 1 at 2023-09-25 13:13:40. Total running time: 18min 46s
+----------------------------------------------+
| Trial _objective_e998da26 result             |
+----------------------------------------------+
| time_this_iter_s                     19.0755 |
| time_total_s                         19.0755 |
| training_iteration                         1 |
| epoch                                      1 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                            0.69263 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                          0.6154 |
| eval_samples_per_second              456.613 |
| eval_steps_per_second                 29.249 |
| objective                            2.66667 |
+----------------------------------------------+

[2m[36m(_objec

 51%|█████▏    | 41/80 [00:30<02:59,  4.60s/it]
 52%|█████▎    | 42/80 [00:30<02:06,  3.33s/it]
 54%|█████▍    | 43/80 [00:30<01:30,  2.45s/it]
 55%|█████▌    | 44/80 [00:31<01:05,  1.83s/it]


Trial status: 13 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:13:56. Total running time: 19min 2s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_e998da26   RUNNING          1.66486e-05                    2   32.8344                        64        1            19.0755       2.66667      0.692626   0.5        1        |
| _objective_6c47dcc5   TERMINATED       3.76

 56%|█████▋    | 45/80 [00:31<00:48,  1.39s/it]
 57%|█████▊    | 46/80 [00:31<00:37,  1.09s/it]
 59%|█████▉    | 47/80 [00:32<00:28,  1.14it/s]
 60%|██████    | 48/80 [00:32<00:23,  1.37it/s]
 61%|██████▏   | 49/80 [00:33<00:19,  1.60it/s]
 62%|██████▎   | 50/80 [00:33<00:16,  1.81it/s]
 64%|██████▍   | 51/80 [00:33<00:14,  1.99it/s]
 65%|██████▌   | 52/80 [00:34<00:13,  2.14it/s]
 66%|██████▋   | 53/80 [00:34<00:11,  2.27it/s]
 68%|██████▊   | 54/80 [00:34<00:11,  2.36it/s]
 69%|██████▉   | 55/80 [00:35<00:10,  2.43it/s]
 70%|███████   | 56/80 [00:35<00:09,  2.49it/s]
 71%|███████▏  | 57/80 [00:36<00:09,  2.52it/s]
 72%|███████▎  | 58/80 [00:36<00:08,  2.55it/s]
 74%|███████▍  | 59/80 [00:36<00:08,  2.56it/s]
 75%|███████▌  | 60/80 [00:37<00:07,  2.57it/s]
 76%|███████▋  | 61/80 [00:37<00:07,  2.59it/s]
 78%|███████▊  | 62/80 [00:38<00:06,  2.60it/s]
 79%|███████▉  | 63/80 [00:38<00:06,  2.60it/s]
 80%|████████  | 64/80 [00:38<00:06,  2.61it/s]
 81%|████████▏ | 65/80 [00:39<00:05,  2.

[2m[36m(_objective pid=8675)[0m {'loss': 0.6984, 'learning_rate': 5.2789869315596e-08, 'epoch': 2.0}


[2m[36m(_objective pid=8675)[0m 100%|██████████| 80/80 [00:44<00:00,  3.05it/s]                                               100%|██████████| 80/80 [00:44<00:00,  3.05it/s]
[2m[36m(_objective pid=8675)[0m   0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=8675)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 36.00it/s][A
[2m[36m(_objective pid=8675)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 31.42it/s][A
[2m[36m(_objective pid=8675)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 29.25it/s][A
[2m[36m(_objective pid=8675)[0m 
 83%|████████▎ | 15/18 [00:00<00:00, 28.61it/s][A
[2m[36m(_objective pid=8675)[0m 
                                               
100%|██████████| 80/80 [00:45<00:00,  3.05it/s]
100%|██████████| 18/18 [00:00<00:00, 28.35it/s][A
                                               [A


Trial _objective_e998da26 finished iteration 2 at 2023-09-25 13:14:10. Total running time: 19min 16s
+----------------------------------------------+
| Trial _objective_e998da26 result             |
+----------------------------------------------+
| time_this_iter_s                     29.4221 |
| time_total_s                         48.4976 |
| training_iteration                         2 |
| epoch                                      2 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                            0.69174 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                          0.6697 |
| eval_samples_per_second              419.585 |
| eval_steps_per_second                 26.877 |
| objective                            2.66667 |
+----------------------------------------------+

[2m[36m(_objec

[2m[36m(_objective pid=8675)[0m                                                100%|██████████| 80/80 [00:59<00:00,  3.05it/s]100%|██████████| 80/80 [00:59<00:00,  1.35it/s]


Trial status: 14 TERMINATED | 1 PENDING
Current time: 2023-09-25 13:14:26. Total running time: 19min 32s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6c47dcc5   TERMINATED       3.7694e-06                     3   17.2137                        64        3            63.1216       2.56228      0.706246   0.512456   0.512456 |
| _objective_c691cc98   TERMINATED       3.04275e-05    



Trial _objective_70e64feb started with configuration:
+----------------------------------------------+
| Trial _objective_70e64feb config             |
+----------------------------------------------+
| learning_rate                          2e-05 |
| num_train_epochs                           1 |
| per_device_train_batch_size               64 |
| seed                                 35.9536 |
+----------------------------------------------+



[2m[36m(_objective pid=8996)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
[2m[36m(_objective pid=8996)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/40 [00:00<?, ?it/s]
  2%|▎         | 1/40 [00:00<00:22,  1.74it/s]
  5%|▌         | 2/40 [00:00<00:17,  2.19it/s]
  8%|▊         | 3/40 [00:01<00:15,  2.35it/s]
 10%|█         | 4/40 [00:01<00:14,  2.45it/s]
 12%|█▎        | 5/40 [00:02<00:13,  2.50it/s]
 15%|█▌        | 6/40 [00:02<00:13,  2.54it/s]
 18%|█▊        | 7/40 [00:02<00:12,  2.57it/s]
 20%|██        | 8/40 [00:03<00:12,  2.57it/s]
 22%|██▎       | 9/40 [00:03<00:11,  2.59it/s]
 25%|██▌       | 10/40 [00:04<00:11,  2.60it/s]
 28%|██▊       | 11/40 [00:04<00:11,  2.61it/s]
 30%|███       | 12/40 [00:04<00:10,  2.60it/s]
 32%|███▎      | 13/40 [00:05<0

[2m[36m(_objective pid=8996)[0m {'loss': 0.7075, 'learning_rate': 3.3646620802168035e-08, 'epoch': 1.0}


[2m[36m(_objective pid=8996)[0m 100%|██████████| 40/40 [00:15<00:00,  3.10it/s]                                               100%|██████████| 40/40 [00:15<00:00,  3.10it/s]
[2m[36m(_objective pid=8996)[0m   0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=8996)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 37.49it/s][A
[2m[36m(_objective pid=8996)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 31.94it/s][A
[2m[36m(_objective pid=8996)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 30.02it/s][A
[2m[36m(_objective pid=8996)[0m 
 89%|████████▉ | 16/18 [00:00<00:00, 29.84it/s][A


Trial _objective_70e64feb finished iteration 1 at 2023-09-25 13:14:53. Total running time: 19min 59s
+----------------------------------------------+
| Trial _objective_70e64feb result             |
+----------------------------------------------+
| time_this_iter_s                     19.1693 |
| time_total_s                         19.1693 |
| training_iteration                         1 |
| epoch                                      1 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                            0.70608 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                          0.6314 |
| eval_samples_per_second               445.04 |
| eval_steps_per_second                 28.508 |
| objective                            2.66667 |
+----------------------------------------------+

[2m[36m(_objec

[2m[36m(_objective pid=8996)[0m                                                
[2m[36m(_objective pid=8996)[0m                                                [A100%|██████████| 40/40 [00:16<00:00,  3.10it/s]
[2m[36m(_objective pid=8996)[0m 100%|██████████| 18/18 [00:00<00:00, 29.84it/s][A
[2m[36m(_objective pid=8996)[0m                                                [A


Trial status: 14 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:14:56. Total running time: 20min 2s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_70e64feb   RUNNING          2.12226e-05                    1   35.9536                        64        1            19.1693       2.66667      0.706077   0.5        1        |
| _objective_6c47dcc5   TERMINATED       3.76

[2m[36m(_objective pid=8996)[0m                                                100%|██████████| 40/40 [00:28<00:00,  3.10it/s]100%|██████████| 40/40 [00:28<00:00,  1.42it/s]


Trial _objective_c9a3dbfe started with configuration:
+----------------------------------------------+
| Trial _objective_c9a3dbfe config             |
+----------------------------------------------+
| learning_rate                          1e-05 |
| num_train_epochs                           3 |
| per_device_train_batch_size               32 |
| seed                                 19.1148 |
+----------------------------------------------+



[2m[36m(_objective pid=9195)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
[2m[36m(_objective pid=9195)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/237 [00:00<?, ?it/s]
  0%|          | 1/237 [00:00<01:34,  2.51it/s]
  1%|          | 2/237 [00:00<01:06,  3.54it/s]
  1%|▏         | 3/237 [00:00<00:57,  4.06it/s]
  2%|▏         | 4/237 [00:01<00:53,  4.35it/s]
  2%|▏         | 5/237 [00:01<00:52,  4.44it/s]
  3%|▎         | 6/237 [00:01<00:50,  4.55it/s]
  3%|▎         | 7/237 [00:01<00:50,  4.56it/s]
  3%|▎         | 8/237 [00:01<00:50,  4.57it/s]
  4%|▍         | 9/237 [00:02<00:51,  4.42it/s]
  4%|▍         | 10/237 [00:02<00:50,  4.53it/s]
  5%|▍         | 11/237 [00:02<00:49,  4.57it/s]
  5%|▌         | 12/237 [00:02<00:48,  4.63it/s]
  5%|▌         | 1

Trial status: 15 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:15:26. Total running time: 20min 32s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_c9a3dbfe   RUNNING          7.42687e-06                    3   19.1148                        32                                                                               |
| _objective_6c47dcc5   TERMINATED       3.7

 19%|█▊        | 44/237 [00:09<00:40,  4.76it/s]
 19%|█▉        | 45/237 [00:09<00:40,  4.73it/s]
 19%|█▉        | 46/237 [00:09<00:39,  4.79it/s]
 20%|█▉        | 47/237 [00:10<00:39,  4.82it/s]
 20%|██        | 48/237 [00:10<00:39,  4.83it/s]
 21%|██        | 49/237 [00:10<00:38,  4.83it/s]
 21%|██        | 50/237 [00:10<00:38,  4.85it/s]
 22%|██▏       | 51/237 [00:10<00:38,  4.87it/s]
 22%|██▏       | 52/237 [00:11<00:38,  4.85it/s]
 22%|██▏       | 53/237 [00:11<00:37,  4.87it/s]
 23%|██▎       | 54/237 [00:11<00:37,  4.86it/s]
 23%|██▎       | 55/237 [00:11<00:37,  4.86it/s]
 24%|██▎       | 56/237 [00:11<00:37,  4.87it/s]
 24%|██▍       | 57/237 [00:12<00:36,  4.87it/s]
 24%|██▍       | 58/237 [00:12<00:36,  4.87it/s]
 25%|██▍       | 59/237 [00:12<00:36,  4.88it/s]
 25%|██▌       | 60/237 [00:12<00:36,  4.87it/s]
 26%|██▌       | 61/237 [00:12<00:36,  4.87it/s]
 26%|██▌       | 62/237 [00:13<00:35,  4.86it/s]
 27%|██▋       | 63/237 [00:13<00:35,  4.84it/s]
 27%|██▋       | 64/

[2m[36m(_objective pid=9195)[0m {'loss': 0.6976, 'learning_rate': 2.3254962113561813e-08, 'epoch': 1.0}


[2m[36m(_objective pid=9195)[0m 
[2m[36m(_objective pid=9195)[0m   0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=9195)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 38.91it/s][A
[2m[36m(_objective pid=9195)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 32.29it/s][A
[2m[36m(_objective pid=9195)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 30.66it/s][A


Trial _objective_c9a3dbfe finished iteration 1 at 2023-09-25 13:15:34. Total running time: 20min 40s
+----------------------------------------------+
| Trial _objective_c9a3dbfe result             |
+----------------------------------------------+
| time_this_iter_s                     20.1942 |
| time_total_s                         20.1942 |
| training_iteration                         1 |
| epoch                                      1 |
| eval_accuracy                        0.51246 |
| eval_f1                              0.51246 |
| eval_loss                            0.69647 |
| eval_p                               0.51246 |
| eval_r                               0.51246 |
| eval_roc_auc                         0.51246 |
| eval_runtime                          0.6293 |
| eval_samples_per_second              446.519 |
| eval_steps_per_second                 28.603 |
| objective                            2.56228 |
+----------------------------------------------+

Trial _objective

[2m[36m(_objective pid=9195)[0m 
[2m[36m(_objective pid=9195)[0m  89%|████████▉ | 16/18 [00:00<00:00, 29.82it/s][A                                                
[2m[36m(_objective pid=9195)[0m                                                [A 33%|███▎      | 79/237 [00:17<00:31,  5.05it/s]
[2m[36m(_objective pid=9195)[0m 100%|██████████| 18/18 [00:00<00:00, 29.82it/s][A
[2m[36m(_objective pid=9195)[0m                                                [A 33%|███▎      | 79/237 [00:17<00:34,  4.56it/s]


Trial _objective_573305e1 started with configuration:
+----------------------------------------------+
| Trial _objective_573305e1 config             |
+----------------------------------------------+
| learning_rate                          1e-05 |
| num_train_epochs                           4 |
| per_device_train_batch_size               64 |
| seed                                 5.19853 |
+----------------------------------------------+



[2m[36m(_objective pid=9336)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
[2m[36m(_objective pid=9336)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/160 [00:00<?, ?it/s]
  1%|          | 1/160 [00:00<01:34,  1.68it/s]
  1%|▏         | 2/160 [00:00<01:13,  2.14it/s]
  2%|▏         | 3/160 [00:01<01:07,  2.34it/s]
  2%|▎         | 4/160 [00:01<01:03,  2.44it/s]
  3%|▎         | 5/160 [00:02<01:01,  2.50it/s]
  4%|▍         | 6/160 [00:02<01:00,  2.54it/s]
  4%|▍         | 7/160 [00:02<00:59,  2.56it/s]
  5%|▌         | 8/160 [00:03<00:59,  2.57it/s]
  6%|▌         | 9/160 [00:03<00:58,  2.57it/s]
  6%|▋         | 10/160 [00:04<00:58,  2.59it/s]
  7%|▋         | 11/160 [00:04<00:57,  2.59it/s]
  8%|▊         | 12/160 [00:04<00:56,  2.60it/s]
  8%|▊         | 1

Trial status: 16 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:15:56. Total running time: 21min 2s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_573305e1   RUNNING          1.06846e-05                    4    5.19853                       64                                                                               |
| _objective_6c47dcc5   TERMINATED       3.76

 15%|█▌        | 24/160 [00:09<00:53,  2.56it/s]
 16%|█▌        | 25/160 [00:09<00:52,  2.56it/s]
 16%|█▋        | 26/160 [00:10<00:52,  2.56it/s]
 17%|█▋        | 27/160 [00:10<00:51,  2.57it/s]
 18%|█▊        | 28/160 [00:11<00:51,  2.56it/s]
 18%|█▊        | 29/160 [00:11<00:51,  2.57it/s]
 19%|█▉        | 30/160 [00:11<00:50,  2.57it/s]
 19%|█▉        | 31/160 [00:12<00:50,  2.56it/s]
 20%|██        | 32/160 [00:12<00:49,  2.56it/s]
 21%|██        | 33/160 [00:12<00:49,  2.57it/s]
 21%|██▏       | 34/160 [00:13<00:49,  2.56it/s]
 22%|██▏       | 35/160 [00:13<00:48,  2.56it/s]
 22%|██▎       | 36/160 [00:14<00:48,  2.55it/s]
 23%|██▎       | 37/160 [00:14<00:48,  2.55it/s]
 24%|██▍       | 38/160 [00:14<00:47,  2.55it/s]
 24%|██▍       | 39/160 [00:15<00:47,  2.57it/s]


[2m[36m(_objective pid=9336)[0m {'loss': 0.7032, 'learning_rate': 1.6939460801162654e-08, 'epoch': 1.0}


[2m[36m(_objective pid=9336)[0m  25%|██▌       | 40/160 [00:15<00:39,  3.03it/s]                                                 25%|██▌       | 40/160 [00:15<00:39,  3.03it/s]
[2m[36m(_objective pid=9336)[0m 
  0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=9336)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 33.80it/s][A
[2m[36m(_objective pid=9336)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 26.96it/s][A
[2m[36m(_objective pid=9336)[0m 
 61%|██████    | 11/18 [00:00<00:00, 27.32it/s][A
[2m[36m(_objective pid=9336)[0m 
 78%|███████▊  | 14/18 [00:00<00:00, 26.02it/s][A


Trial _objective_573305e1 finished iteration 1 at 2023-09-25 13:16:03. Total running time: 21min 9s
+----------------------------------------------+
| Trial _objective_573305e1 result             |
+----------------------------------------------+
| time_this_iter_s                     19.4509 |
| time_total_s                         19.4509 |
| training_iteration                         1 |
| epoch                                      1 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                            0.70218 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                          0.7503 |
| eval_samples_per_second              374.514 |
| eval_steps_per_second                  23.99 |
| objective                            2.66667 |
+----------------------------------------------+

[2m[36m(_object

[2m[36m(_objective pid=9336)[0m 
[2m[36m(_objective pid=9336)[0m  94%|█████████▍| 17/18 [00:00<00:00, 25.49it/s][A                                                
[2m[36m(_objective pid=9336)[0m                                                [A 25%|██▌       | 40/160 [00:16<00:39,  3.03it/s]
[2m[36m(_objective pid=9336)[0m 100%|██████████| 18/18 [00:00<00:00, 25.49it/s][A
                                               [A
 26%|██▌       | 41/160 [00:21<04:13,  2.13s/it]
 26%|██▋       | 42/160 [00:22<03:09,  1.60s/it]
 27%|██▋       | 43/160 [00:22<02:24,  1.24s/it]
 28%|██▊       | 44/160 [00:22<01:53,  1.02it/s]
 28%|██▊       | 45/160 [00:23<01:32,  1.25it/s]
 29%|██▉       | 46/160 [00:23<01:16,  1.48it/s]
 29%|██▉       | 47/160 [00:24<01:06,  1.70it/s]
 30%|███       | 48/160 [00:24<00:59,  1.90it/s]
 31%|███       | 49/160 [00:24<00:54,  2.05it/s]
 31%|███▏      | 50/160 [00:25<00:49,  2.20it/s]
 32%|███▏      | 51/160 [00:25<00:47,  2.31it/s]
 32%|███▎     

[2m[36m(_objective pid=9336)[0m {'loss': 0.703, 'learning_rate': 3.387892160232531e-08, 'epoch': 2.0}


[2m[36m(_objective pid=9336)[0m 
[2m[36m(_objective pid=9336)[0m   0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=9336)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 37.73it/s][A
[2m[36m(_objective pid=9336)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 31.98it/s][A
[2m[36m(_objective pid=9336)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 30.19it/s][A


Trial _objective_573305e1 finished iteration 2 at 2023-09-25 13:16:24. Total running time: 21min 30s
+----------------------------------------------+
| Trial _objective_573305e1 result             |
+----------------------------------------------+
| time_this_iter_s                     21.1333 |
| time_total_s                         40.5843 |
| training_iteration                         2 |
| epoch                                      2 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                            0.70164 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                          0.6363 |
| eval_samples_per_second              441.597 |
| eval_steps_per_second                 28.287 |
| objective                            2.66667 |
+----------------------------------------------+[2m[36m(_objecti

[2m[36m(_objective pid=9336)[0m 
[2m[36m(_objective pid=9336)[0m  89%|████████▉ | 16/18 [00:00<00:00, 29.06it/s][A                                                
[2m[36m(_objective pid=9336)[0m                                                [A 50%|█████     | 80/160 [00:37<00:25,  3.09it/s]
[2m[36m(_objective pid=9336)[0m 100%|██████████| 18/18 [00:00<00:00, 29.06it/s][A
[2m[36m(_objective pid=9336)[0m                                                [A


Trial status: 16 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:16:26. Total running time: 21min 32s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_573305e1   RUNNING          1.06846e-05                    4    5.19853                       64        2            40.5843       2.66667      0.701638   0.5        1        |
| _objective_6c47dcc5   TERMINATED       3.7

 51%|█████     | 81/160 [00:52<06:21,  4.83s/it]
 51%|█████▏    | 82/160 [00:52<04:32,  3.50s/it]
 52%|█████▏    | 83/160 [00:52<03:17,  2.56s/it]
 52%|█████▎    | 84/160 [00:53<02:24,  1.91s/it]
 53%|█████▎    | 85/160 [00:53<01:48,  1.45s/it]
 54%|█████▍    | 86/160 [00:53<01:23,  1.13s/it]
 54%|█████▍    | 87/160 [00:54<01:06,  1.10it/s]
 55%|█████▌    | 88/160 [00:54<00:53,  1.34it/s]
 56%|█████▌    | 89/160 [00:55<00:45,  1.57it/s]
 56%|█████▋    | 90/160 [00:55<00:39,  1.78it/s]
 57%|█████▋    | 91/160 [00:55<00:35,  1.97it/s]
 57%|█████▊    | 92/160 [00:56<00:32,  2.12it/s]
 58%|█████▊    | 93/160 [00:56<00:29,  2.25it/s]
 59%|█████▉    | 94/160 [00:57<00:28,  2.35it/s]
 59%|█████▉    | 95/160 [00:57<00:26,  2.42it/s]
 60%|██████    | 96/160 [00:57<00:25,  2.48it/s]
 61%|██████    | 97/160 [00:58<00:25,  2.51it/s]
 61%|██████▏   | 98/160 [00:58<00:24,  2.54it/s]
 62%|██████▏   | 99/160 [00:58<00:23,  2.56it/s]
 62%|██████▎   | 100/160 [00:59<00:23,  2.58it/s]
 63%|██████▎   | 10

[2m[36m(_objective pid=9336)[0m {'loss': 0.7033, 'learning_rate': 5.081838240348796e-08, 'epoch': 3.0}


[2m[36m(_objective pid=9336)[0m  75%|███████▌  | 120/160 [01:06<00:13,  3.03it/s]                                                  75%|███████▌  | 120/160 [01:06<00:13,  3.03it/s]
[2m[36m(_objective pid=9336)[0m   0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=9336)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 35.84it/s][A
[2m[36m(_objective pid=9336)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 31.15it/s][A
[2m[36m(_objective pid=9336)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 27.73it/s][A
[2m[36m(_objective pid=9336)[0m 
 83%|████████▎ | 15/18 [00:00<00:00, 27.29it/s][A


Trial _objective_573305e1 finished iteration 3 at 2023-09-25 13:16:54. Total running time: 22min 0s
+----------------------------------------------+
| Trial _objective_573305e1 result             |
+----------------------------------------------+
| time_this_iter_s                     30.2377 |
| time_total_s                          70.822 |
| training_iteration                         3 |
| epoch                                      3 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                            0.70076 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                          0.6932 |
| eval_samples_per_second              405.362 |
| eval_steps_per_second                 25.966 |
| objective                            2.66667 |
+----------------------------------------------+

[2m[36m(_object

[2m[36m(_objective pid=9336)[0m 
[2m[36m(_objective pid=9336)[0m 100%|██████████| 18/18 [00:00<00:00, 27.75it/s][A                                                 
[2m[36m(_objective pid=9336)[0m                                                [A 75%|███████▌  | 120/160 [01:07<00:13,  3.03it/s]
[2m[36m(_objective pid=9336)[0m 100%|██████████| 18/18 [00:00<00:00, 27.75it/s][A
[2m[36m(_objective pid=9336)[0m                                                [A


Trial status: 16 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:16:56. Total running time: 22min 2s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_573305e1   RUNNING          1.06846e-05                    4    5.19853                       64        3            70.822        2.66667      0.700759   0.5        1        |
| _objective_6c47dcc5   TERMINATED       3.76

 76%|███████▌  | 121/160 [01:14<01:33,  2.39s/it]
 76%|███████▋  | 122/160 [01:14<01:07,  1.79s/it]
 77%|███████▋  | 123/160 [01:14<00:50,  1.37s/it]
 78%|███████▊  | 124/160 [01:15<00:38,  1.07s/it]
 78%|███████▊  | 125/160 [01:15<00:30,  1.16it/s]
 79%|███████▉  | 126/160 [01:16<00:24,  1.39it/s]
 79%|███████▉  | 127/160 [01:16<00:20,  1.62it/s]
 80%|████████  | 128/160 [01:16<00:17,  1.82it/s]
 81%|████████  | 129/160 [01:17<00:15,  2.01it/s]
 81%|████████▏ | 130/160 [01:17<00:13,  2.16it/s]
 82%|████████▏ | 131/160 [01:17<00:12,  2.27it/s]
 82%|████████▎ | 132/160 [01:18<00:11,  2.37it/s]
 83%|████████▎ | 133/160 [01:18<00:11,  2.43it/s]
 84%|████████▍ | 134/160 [01:19<00:10,  2.48it/s]
 84%|████████▍ | 135/160 [01:19<00:09,  2.52it/s]
 85%|████████▌ | 136/160 [01:19<00:09,  2.54it/s]
 86%|████████▌ | 137/160 [01:20<00:08,  2.56it/s]
 86%|████████▋ | 138/160 [01:20<00:08,  2.57it/s]
 87%|████████▋ | 139/160 [01:21<00:08,  2.58it/s]
 88%|████████▊ | 140/160 [01:21<00:07,  2.59it/s]


[2m[36m(_objective pid=9336)[0m {'loss': 0.7016, 'learning_rate': 6.775784320465061e-08, 'epoch': 4.0}


[2m[36m(_objective pid=9336)[0m 100%|██████████| 160/160 [01:29<00:00,  3.04it/s]                                                 100%|██████████| 160/160 [01:29<00:00,  3.04it/s]
  0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=9336)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 32.44it/s][A
[2m[36m(_objective pid=9336)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 27.29it/s][A
[2m[36m(_objective pid=9336)[0m 
 61%|██████    | 11/18 [00:00<00:00, 27.02it/s][A
[2m[36m(_objective pid=9336)[0m 
 78%|███████▊  | 14/18 [00:00<00:00, 26.13it/s][A
[2m[36m(_objective pid=9336)[0m 
 94%|█████████▍| 17/18 [00:00<00:00, 26.27it/s][A
                                                 
100%|██████████| 160/160 [01:29<00:00,  3.04it/s]
100%|██████████| 18/18 [00:00<00:00, 26.27it/s][A
                                               [A


Trial _objective_573305e1 finished iteration 4 at 2023-09-25 13:17:17. Total running time: 22min 23s
+----------------------------------------------+
| Trial _objective_573305e1 result             |
+----------------------------------------------+
| time_this_iter_s                     22.1404 |
| time_total_s                         92.9624 |
| training_iteration                         4 |
| epoch                                      4 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                            0.69959 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                          0.7287 |
| eval_samples_per_second              385.593 |
| eval_steps_per_second                   24.7 |
| objective                            2.66667 |
+----------------------------------------------+

[2m[36m(_objec

[2m[36m(_objective pid=9336)[0m                                                  100%|██████████| 160/160 [01:35<00:00,  3.04it/s]100%|██████████| 160/160 [01:35<00:00,  1.67it/s]


Trial status: 17 TERMINATED | 1 PENDING
Current time: 2023-09-25 13:17:26. Total running time: 22min 32s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6c47dcc5   TERMINATED       3.7694e-06                     3   17.2137                        64        3            63.1216       2.56228      0.706246   0.512456   0.512456 |
| _objective_c691cc98   TERMINATED       3.04275e-05    



Trial _objective_508fa7eb started with configuration:
+---------------------------------------------+
| Trial _objective_508fa7eb config            |
+---------------------------------------------+
| learning_rate                             0 |
| num_train_epochs                          3 |
| per_device_train_batch_size               4 |
| seed                                 38.927 |
+---------------------------------------------+



[2m[36m(_objective pid=9807)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
[2m[36m(_objective pid=9807)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/1893 [00:00<?, ?it/s]
  0%|          | 1/1893 [00:00<07:47,  4.05it/s]
  0%|          | 3/1893 [00:00<03:16,  9.60it/s]
  0%|          | 5/1893 [00:00<02:34, 12.24it/s]
  0%|          | 7/1893 [00:00<02:18, 13.60it/s]
  0%|          | 9/1893 [00:00<02:07, 14.76it/s]
  1%|          | 11/1893 [00:00<02:03, 15.19it/s]
  1%|          | 13/1893 [00:00<02:01, 15.48it/s]
  1%|          | 15/1893 [00:01<02:00, 15.63it/s]
  1%|          | 17/1893 [00:01<01:58, 15.83it/s]
  1%|          | 19/1893 [00:01<01:56, 16.07it/s]
  1%|          | 21/1893 [00:01<01:55, 16.22it/s]
  1%|          | 23/1893 [00:01<01:53, 16.41it/s]
 

Trial status: 17 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:17:56. Total running time: 23min 2s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_508fa7eb   RUNNING          2.59761e-06                    3   38.927                          4                                                                               |
| _objective_6c47dcc5   TERMINATED       3.76

[2m[36m(_objective pid=9807)[0m  16%|█▌        | 297/1893 [00:20<01:35, 16.71it/s]
 16%|█▌        | 299/1893 [00:20<01:37, 16.43it/s]
 16%|█▌        | 301/1893 [00:20<01:38, 16.23it/s]
 16%|█▌        | 303/1893 [00:20<01:38, 16.15it/s]
 16%|█▌        | 305/1893 [00:20<01:37, 16.27it/s]
 16%|█▌        | 307/1893 [00:20<01:35, 16.56it/s]
 16%|█▋        | 309/1893 [00:21<01:33, 17.01it/s]
 16%|█▋        | 311/1893 [00:21<01:35, 16.62it/s]
 17%|█▋        | 313/1893 [00:21<01:34, 16.73it/s]
 17%|█▋        | 315/1893 [00:21<01:34, 16.71it/s]
 17%|█▋        | 317/1893 [00:21<01:33, 16.90it/s]
 17%|█▋        | 319/1893 [00:21<01:37, 16.20it/s]
 17%|█▋        | 321/1893 [00:21<01:35, 16.39it/s]
 17%|█▋        | 323/1893 [00:21<01:35, 16.52it/s]
 17%|█▋        | 325/1893 [00:22<01:35, 16.48it/s]
 17%|█▋        | 327/1893 [00:22<01:36, 16.23it/s]
 17%|█▋        | 329/1893 [00:22<01:33, 16.71it/s]
 17%|█▋        | 331/1893 [00:22<01:35, 16.37it/s]
 18%|█▊        | 333/1893 [00:22<01:36, 16.10i

[2m[36m(_objective pid=9807)[0m {'loss': 0.6983, 'learning_rate': 6.496598856371322e-08, 'epoch': 1.0}


[2m[36m(_objective pid=9807)[0m  33%|███▎      | 631/1893 [00:42<01:14, 16.84it/s]                                                   33%|███▎      | 631/1893 [00:42<01:14, 16.84it/s]
  0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=9807)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 36.93it/s][A
[2m[36m(_objective pid=9807)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 31.18it/s][A
[2m[36m(_objective pid=9807)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 30.09it/s][A
[2m[36m(_objective pid=9807)[0m 
 89%|████████▉ | 16/18 [00:00<00:00, 29.62it/s][A


Trial _objective_508fa7eb finished iteration 1 at 2023-09-25 13:18:19. Total running time: 23min 25s
+----------------------------------------------+
| Trial _objective_508fa7eb result             |
+----------------------------------------------+
| time_this_iter_s                     47.6163 |
| time_total_s                         47.6163 |
| training_iteration                         1 |
| epoch                                      1 |
| eval_accuracy                        0.51246 |
| eval_f1                              0.51246 |
| eval_loss                            0.69484 |
| eval_p                               0.51246 |
| eval_r                               0.51246 |
| eval_roc_auc                         0.51246 |
| eval_runtime                          0.6314 |
| eval_samples_per_second              445.008 |
| eval_steps_per_second                 28.506 |
| objective                            2.56228 |
+----------------------------------------------+

Trial _objective

[2m[36m(_objective pid=9807)[0m                                                   
[2m[36m(_objective pid=9807)[0m                                                [A 33%|███▎      | 631/1893 [00:43<01:14, 16.84it/s]
[2m[36m(_objective pid=9807)[0m 100%|██████████| 18/18 [00:00<00:00, 29.62it/s][A
[2m[36m(_objective pid=9807)[0m                                                [A 33%|███▎      | 631/1893 [00:43<01:26, 14.54it/s]


Trial status: 18 TERMINATED | 1 PENDING
Current time: 2023-09-25 13:18:26. Total running time: 23min 32s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_6c47dcc5   TERMINATED       3.7694e-06                     3   17.2137                        64        3            63.1216       2.56228      0.706246   0.512456   0.512456 |
| _objective_c691cc98   TERMINATED       3.04275e-05    



Trial _objective_4451313b started with configuration:
+----------------------------------------------+
| Trial _objective_4451313b config             |
+----------------------------------------------+
| learning_rate                          1e-05 |
| num_train_epochs                           1 |
| per_device_train_batch_size                8 |
| seed                                 35.1165 |
+----------------------------------------------+



[2m[36m(_objective pid=10068)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
[2m[36m(_objective pid=10068)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/316 [00:00<?, ?it/s]
  0%|          | 1/316 [00:00<01:25,  3.68it/s]
  1%|          | 3/316 [00:00<00:39,  7.98it/s]
  2%|▏         | 5/316 [00:00<00:31,  9.92it/s]
  2%|▏         | 7/316 [00:00<00:28, 10.71it/s]
  3%|▎         | 9/316 [00:00<00:26, 11.69it/s]
  3%|▎         | 11/316 [00:01<00:24, 12.41it/s]
  4%|▍         | 13/316 [00:01<00:24, 12.62it/s]
  5%|▍         | 15/316 [00:01<00:23, 12.67it/s]
  5%|▌         | 17/316 [00:01<00:23, 12.72it/s]
  6%|▌         | 19/316 [00:01<00:22, 13.01it/s]
  7%|▋         | 21/316 [00:01<00:22, 13.06it/s]
  7%|▋         | 23/316 [00:01<00:22, 13.24it/s]
  8%|▊      

Trial status: 18 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-25 13:18:56. Total running time: 24min 2s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4451313b   RUNNING          1.07689e-05                    1   35.1164                         8                                                                               |
| _objective_6c47dcc5   TERMINATED       3.76

[2m[36m(_objective pid=10068)[0m  92%|█████████▏| 291/316 [00:23<00:01, 13.31it/s]
 93%|█████████▎| 293/316 [00:23<00:01, 13.35it/s]
 93%|█████████▎| 295/316 [00:23<00:01, 13.34it/s]
 94%|█████████▍| 297/316 [00:24<00:01, 13.19it/s]
 95%|█████████▍| 299/316 [00:24<00:01, 13.22it/s]
 95%|█████████▌| 301/316 [00:24<00:01, 13.20it/s]
 96%|█████████▌| 303/316 [00:24<00:00, 13.61it/s]
 97%|█████████▋| 305/316 [00:24<00:00, 13.63it/s]
 97%|█████████▋| 307/316 [00:24<00:00, 13.43it/s]
 98%|█████████▊| 309/316 [00:25<00:00, 13.22it/s]
 98%|█████████▊| 311/316 [00:25<00:00, 13.22it/s]
 99%|█████████▉| 313/316 [00:25<00:00, 13.36it/s]
100%|█████████▉| 315/316 [00:25<00:00, 13.59it/s]
100%|██████████| 316/316 [00:25<00:00, 13.59it/s]
  0%|          | 0/18 [00:00<?, ?it/s][A


[2m[36m(_objective pid=10068)[0m {'loss': 0.7068, 'learning_rate': 1.348777986613384e-07, 'epoch': 1.0}


[2m[36m(_objective pid=10068)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 38.73it/s][A
[2m[36m(_objective pid=10068)[0m 
 44%|████▍     | 8/18 [00:00<00:00, 32.41it/s][A
[2m[36m(_objective pid=10068)[0m 
 67%|██████▋   | 12/18 [00:00<00:00, 29.52it/s][A
[2m[36m(_objective pid=10068)[0m 
 89%|████████▉ | 16/18 [00:00<00:00, 29.49it/s][A
                                                 
100%|██████████| 316/316 [00:26<00:00, 13.59it/s]
100%|██████████| 18/18 [00:00<00:00, 29.49it/s][A
                                               [A


Trial _objective_4451313b finished iteration 1 at 2023-09-25 13:18:59. Total running time: 24min 5s
+----------------------------------------------+
| Trial _objective_4451313b result             |
+----------------------------------------------+
| time_this_iter_s                     30.0333 |
| time_total_s                         30.0333 |
| training_iteration                         1 |
| epoch                                      1 |
| eval_accuracy                              0 |
| eval_f1                              0.66667 |
| eval_loss                             0.6976 |
| eval_p                                   0.5 |
| eval_r                                     1 |
| eval_roc_auc                             0.5 |
| eval_runtime                          0.6406 |
| eval_samples_per_second              438.617 |
| eval_steps_per_second                 28.096 |
| objective                            2.66667 |
+----------------------------------------------+

[2m[36m(_object

[2m[36m(_objective pid=10068)[0m                                                  100%|██████████| 316/316 [00:31<00:00, 13.59it/s]100%|██████████| 316/316 [00:31<00:00, 10.00it/s]


Trial _objective_013b7f91 started with configuration:
+----------------------------------------------+
| Trial _objective_013b7f91 config             |
+----------------------------------------------+
| learning_rate                              0 |
| num_train_epochs                           1 |
| per_device_train_batch_size               64 |
| seed                                 7.33516 |
+----------------------------------------------+



[2m[36m(_objective pid=10271)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
[2m[36m(_objective pid=10271)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/40 [00:00<?, ?it/s]
  2%|▎         | 1/40 [00:00<00:23,  1.70it/s]
  5%|▌         | 2/40 [00:00<00:17,  2.16it/s]
  8%|▊         | 3/40 [00:01<00:15,  2.35it/s]
 10%|█         | 4/40 [00:01<00:14,  2.45it/s]
 12%|█▎        | 5/40 [00:02<00:13,  2.51it/s]
 15%|█▌        | 6/40 [00:02<00:13,  2.54it/s]
 18%|█▊        | 7/40 [00:02<00:12,  2.56it/s]
 20%|██        | 8/40 [00:03<00:12,  2.58it/s]
 22%|██▎       | 9/40 [00:03<00:12,  2.57it/s]
 25%|██▌       | 10/40 [00:04<00:11,  2.58it/s]
 28%|██▊       | 11/40 [00:04<00:11,  2.57it/s]
 30%|███       | 12/40 [00:04<00:10,  2.58it/s]
 32%|███▎      | 13/40 [00:05

Trial status: 19 TERMINATED | 1 RUNNING
Current time: 2023-09-25 13:19:26. Total running time: 24min 32s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:V100)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name            status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_p     eval_r |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_013b7f91   RUNNING          2.68338e-06                    1    7.33516                       64                                                                               |
| _objective_6c47dcc5   TERMINATED       3.7694e-06     

 68%|██████▊   | 27/40 [00:10<00:05,  2.56it/s]
 70%|███████   | 28/40 [00:11<00:04,  2.56it/s]
 72%|███████▎  | 29/40 [00:11<00:04,  2.56it/s]
 75%|███████▌  | 30/40 [00:11<00:03,  2.56it/s]
 78%|███████▊  | 31/40 [00:12<00:03,  2.57it/s]
 80%|████████  | 32/40 [00:12<00:03,  2.59it/s]
 82%|████████▎ | 33/40 [00:12<00:02,  2.59it/s]
 85%|████████▌ | 34/40 [00:13<00:02,  2.60it/s]
 88%|████████▊ | 35/40 [00:13<00:01,  2.60it/s]
 90%|█████████ | 36/40 [00:14<00:01,  2.61it/s]
 92%|█████████▎| 37/40 [00:14<00:01,  2.61it/s]
 95%|█████████▌| 38/40 [00:14<00:00,  2.61it/s]
 98%|█████████▊| 39/40 [00:15<00:00,  2.61it/s]
100%|██████████| 40/40 [00:15<00:00,  3.10it/s]
  0%|          | 0/18 [00:00<?, ?it/s][A
[2m[36m(_objective pid=10271)[0m 
 22%|██▏       | 4/18 [00:00<00:00, 38.37it/s][A


[2m[36m(_objective pid=10271)[0m {'loss': 0.7041, 'learning_rate': 4.25427549947295e-09, 'epoch': 1.0}


[2m[36m(_objective pid=10271)[0m 
[2m[36m(_objective pid=10271)[0m  44%|████▍     | 8/18 [00:00<00:00, 32.05it/s][A
[2m[36m(_objective pid=10271)[0m 
[2m[36m(_objective pid=10271)[0m  67%|██████▋   | 12/18 [00:00<00:00, 30.37it/s][A


Trial _objective_013b7f91 finished iteration 1 at 2023-09-25 13:19:32. Total running time: 24min 38s
+----------------------------------------------+
| Trial _objective_013b7f91 result             |
+----------------------------------------------+
| time_this_iter_s                     19.0132 |
| time_total_s                         19.0132 |
| training_iteration                         1 |
| epoch                                      1 |
| eval_accuracy                        0.48754 |
| eval_f1                              0.48754 |
| eval_loss                            0.70279 |
| eval_p                               0.48754 |
| eval_r                               0.48754 |
| eval_roc_auc                         0.48754 |
| eval_runtime                          0.6319 |
| eval_samples_per_second              444.714 |
| eval_steps_per_second                 28.487 |
| objective                            2.43772 |
+----------------------------------------------+

Trial _objective

[2m[36m(_objective pid=10271)[0m 
[2m[36m(_objective pid=10271)[0m  89%|████████▉ | 16/18 [00:00<00:00, 29.77it/s][A
[2m[36m(_objective pid=10271)[0m                                                
[2m[36m(_objective pid=10271)[0m                                                [A100%|██████████| 40/40 [00:16<00:00,  3.10it/s]
[2m[36m(_objective pid=10271)[0m 100%|██████████| 18/18 [00:00<00:00, 29.77it/s][A
[2m[36m(_objective pid=10271)[0m                                                [A100%|██████████| 40/40 [00:16<00:00,  2.48it/s]


In [None]:
best_trial

BestRun(run_id='c691cc98', objective=2.6666666666666665, hyperparameters={'learning_rate': 3.042747502045481e-05, 'num_train_epochs': 2, 'seed': 9.93058038171246, 'per_device_train_batch_size': 4}, run_summary=<ray.tune.analysis.experiment_analysis.ExperimentAnalysis object at 0x7c5c43c1ff70>)

In [None]:
# Default objective is the sum of all metrics
# when metrics are provided, so we have to maximize it.
# trainer.hyperparameter_search(
#    direction="maximize",
#    backend="ray",
#    n_trials=10 # number of trials
#)

2023-09-21 17:07:02,639	INFO worker.py:1621 -- Started a local Ray instance.
2023-09-21 17:07:11,531	INFO tune.py:226 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `tune.run(...)`.
2023-09-21 17:07:11,543	INFO tune.py:666 -- [output] This will use the new output engine with verbosity 2. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949

from ray.air import session

def train(config):
    # ...
    session.report({"metric": metric}, checkpoint=checkpoint)

For more information please see https://docs.ray.io/en/latest/tune/api/trainable.html

2023-09-21 17:07:11,573	INFO tensorboardx.py:178 -- pip install "ray[tune]" to see TensorBoard files.


+-------------------------------------------------------------------+
| Configuration for experiment     _objective_2023-09-21_17-07-11   |
+-------------------------------------------------------------------+
| Search algorithm                 BasicVariantGenerator            |
| Scheduler                        FIFOScheduler                    |
| Number of trials                 10                               |
+-------------------------------------------------------------------+

View detailed results here: /root/ray_results/_objective_2023-09-21_17-07-11

Trial status: 10 PENDING
Current time: 2023-09-21 17:07:11. Total running time: 0s
Logical resource usage: 0/2 CPUs, 0/1 GPUs
+--------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs       seed     ..._train_batch_size |
+------------------------------------------------------------------------------



Trial _objective_4df1a_00000 started with configuration:
+-------------------------------------------------+
| Trial _objective_4df1a_00000 config             |
+-------------------------------------------------+
| learning_rate                             1e-05 |
| num_train_epochs                              5 |
| per_device_train_batch_size                  64 |
| seed                                    8.15396 |
+-------------------------------------------------+



[2m[36m(_objective pid=6862)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
[2m[36m(_objective pid=6862)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/215 [00:00<?, ?it/s]
  0%|          | 1/215 [00:01<06:51,  1.92s/it]
  1%|          | 2/215 [00:03<05:09,  1.45s/it]
  1%|▏         | 3/215 [00:04<04:39,  1.32s/it]
  2%|▏         | 4/215 [00:05<04:25,  1.26s/it]


Trial status: 1 RUNNING | 9 PENDING
Current time: 2023-09-21 17:07:41. Total running time: 30s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+--------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs       seed     ..._train_batch_size |
+--------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00000   RUNNING        5.61152e-06                    5    8.15396                       64 |
| _objective_4df1a_00001   PENDING        1.56207e-05                    2    7.08379                       16 |
| _objective_4df1a_00002   PENDING        8.28892e-06                    5   24.4435                        16 |
| _objective_4df1a_00003   PENDING        1.09943e-06                    2   29.158                          8 |
| _objective_4df1a_00004   PENDING        2.3102e-06               

  2%|▏         | 5/215 [00:06<04:16,  1.22s/it]
  3%|▎         | 6/215 [00:07<04:11,  1.21s/it]
  3%|▎         | 7/215 [00:08<04:08,  1.20s/it]
  4%|▎         | 8/215 [00:10<04:06,  1.19s/it]
  4%|▍         | 9/215 [00:11<04:04,  1.19s/it]
  5%|▍         | 10/215 [00:12<04:02,  1.18s/it]
  5%|▌         | 11/215 [00:13<04:01,  1.18s/it]
  6%|▌         | 12/215 [00:14<04:00,  1.18s/it]
  6%|▌         | 13/215 [00:15<04:00,  1.19s/it]
  7%|▋         | 14/215 [00:17<04:00,  1.19s/it]
  7%|▋         | 15/215 [00:18<03:59,  1.20s/it]
  7%|▋         | 16/215 [00:19<03:58,  1.20s/it]
  8%|▊         | 17/215 [00:20<03:57,  1.20s/it]
  8%|▊         | 18/215 [00:22<03:56,  1.20s/it]
  9%|▉         | 19/215 [00:23<03:55,  1.20s/it]
  9%|▉         | 20/215 [00:24<03:54,  1.20s/it]
 10%|▉         | 21/215 [00:25<03:53,  1.20s/it]
 10%|█         | 22/215 [00:26<03:52,  1.20s/it]
 11%|█         | 23/215 [00:28<03:51,  1.20s/it]
 11%|█         | 24/215 [00:29<03:50,  1.21s/it]
 12%|█▏        | 25/215 [

Trial status: 1 RUNNING | 9 PENDING
Current time: 2023-09-21 17:08:11. Total running time: 1min 0s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+--------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs       seed     ..._train_batch_size |
+--------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00000   RUNNING        5.61152e-06                    5    8.15396                       64 |
| _objective_4df1a_00001   PENDING        1.56207e-05                    2    7.08379                       16 |
| _objective_4df1a_00002   PENDING        8.28892e-06                    5   24.4435                        16 |
| _objective_4df1a_00003   PENDING        1.09943e-06                    2   29.158                          8 |
| _objective_4df1a_00004   PENDING        2.3102e-06           

 14%|█▍        | 30/215 [00:36<03:45,  1.22s/it]
 14%|█▍        | 31/215 [00:37<03:45,  1.22s/it]
 15%|█▍        | 32/215 [00:39<03:44,  1.23s/it]
 15%|█▌        | 33/215 [00:40<03:43,  1.23s/it]
 16%|█▌        | 34/215 [00:41<03:42,  1.23s/it]
 16%|█▋        | 35/215 [00:42<03:41,  1.23s/it]
 17%|█▋        | 36/215 [00:43<03:40,  1.23s/it]
 17%|█▋        | 37/215 [00:45<03:39,  1.23s/it]
 18%|█▊        | 38/215 [00:46<03:38,  1.24s/it]
 18%|█▊        | 39/215 [00:47<03:37,  1.24s/it]
 19%|█▊        | 40/215 [00:48<03:36,  1.24s/it]
 19%|█▉        | 41/215 [00:50<03:36,  1.24s/it]
 20%|█▉        | 42/215 [00:51<03:35,  1.25s/it]
 20%|██        | 43/215 [00:52<03:07,  1.09s/it]
[2m[36m(_objective pid=6862)[0m 
  0%|          | 0/19 [00:00<?, ?it/s][A


[2m[36m(_objective pid=6862)[0m {'loss': 0.7092, 'learning_rate': 8.86462916456223e-09, 'epoch': 1.0}


[2m[36m(_objective pid=6862)[0m 
 16%|█▌        | 3/19 [00:00<00:00, 20.32it/s][A
[2m[36m(_objective pid=6862)[0m 
 32%|███▏      | 6/19 [00:00<00:01, 11.03it/s][A
[2m[36m(_objective pid=6862)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  9.93it/s][A
[2m[36m(_objective pid=6862)[0m 
 53%|█████▎    | 10/19 [00:00<00:00,  9.20it/s][A
[2m[36m(_objective pid=6862)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  9.05it/s][A
[2m[36m(_objective pid=6862)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.98it/s][A
[2m[36m(_objective pid=6862)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.86it/s][A
[2m[36m(_objective pid=6862)[0m 
 74%|███████▎  | 14/19 [00:01<00:00,  8.64it/s][A
[2m[36m(_objective pid=6862)[0m 
 79%|███████▉  | 15/19 [00:01<00:00,  8.55it/s][A
[2m[36m(_objective pid=6862)[0m 
 84%|████████▍ | 16/19 [00:01<00:00,  8.40it/s][A
[2m[36m(_objective pid=6862)[0m 
 89%|████████▉ | 17/19 [00:01<00:00,  8.36it/s][A
[2m[36m(_objective pid=6862)[0m 
 95%|█████

Trial _objective_4df1a_00000 finished iteration 1 at 2023-09-21 17:08:30. Total running time: 1min 18s
+-------------------------------------------------+
| Trial _objective_4df1a_00000 result             |
+-------------------------------------------------+
| time_this_iter_s                         61.886 |
| time_total_s                             61.886 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                                 0.702 |
| eval_runtime                              2.283 |
| eval_samples_per_second                 132.722 |
| eval_steps_per_second                     8.322 |
| objective                                 0.702 |
+-------------------------------------------------+

[2m[36m(_objective pid=6862)[0m {'eval_loss': 0.7020037174224854, 'eval_runtime': 2.283, 'eval_samples_per_second': 132.722, 'eval_steps_per_second': 8.322, 'epoch': 1.0}


[2m[36m(_objective pid=6862)[0m                                                 
[2m[36m(_objective pid=6862)[0m                                                [A 20%|██        | 43/215 [00:54<03:07,  1.09s/it]
[2m[36m(_objective pid=6862)[0m 100%|██████████| 19/19 [00:02<00:00,  8.34it/s][A
                                               [A
 20%|██        | 44/215 [01:00<09:23,  3.29s/it]
 21%|██        | 45/215 [01:01<07:35,  2.68s/it]
 21%|██▏       | 46/215 [01:03<06:20,  2.25s/it]
 22%|██▏       | 47/215 [01:04<05:27,  1.95s/it]
 22%|██▏       | 48/215 [01:05<04:50,  1.74s/it]


Trial status: 1 RUNNING | 9 PENDING
Current time: 2023-09-21 17:08:41. Total running time: 1min 30s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00000   RUNNING        5.61152e-06                    5    8.15396                       64        1             61.886      0.702004      0.702004            2.283                  132.722 |
| _object

 23%|██▎       | 49/215 [01:06<04:24,  1.59s/it]
 23%|██▎       | 50/215 [01:08<04:06,  1.49s/it]
 24%|██▎       | 51/215 [01:09<03:53,  1.42s/it]
 24%|██▍       | 52/215 [01:10<03:43,  1.37s/it]
 25%|██▍       | 53/215 [01:11<03:36,  1.34s/it]
 25%|██▌       | 54/215 [01:13<03:31,  1.31s/it]
 26%|██▌       | 55/215 [01:14<03:27,  1.29s/it]
 26%|██▌       | 56/215 [01:15<03:24,  1.29s/it]
 27%|██▋       | 57/215 [01:16<03:21,  1.28s/it]
 27%|██▋       | 58/215 [01:18<03:19,  1.27s/it]
 27%|██▋       | 59/215 [01:19<03:17,  1.26s/it]
 28%|██▊       | 60/215 [01:20<03:15,  1.26s/it]
 28%|██▊       | 61/215 [01:22<03:31,  1.37s/it]
 29%|██▉       | 62/215 [01:23<03:21,  1.32s/it]
 29%|██▉       | 63/215 [01:24<03:17,  1.30s/it]
 30%|██▉       | 64/215 [01:25<03:13,  1.28s/it]
 30%|███       | 65/215 [01:27<03:11,  1.27s/it]
 31%|███       | 66/215 [01:28<03:09,  1.27s/it]
 31%|███       | 67/215 [01:29<03:07,  1.26s/it]
 32%|███▏      | 68/215 [01:30<03:05,  1.26s/it]
 32%|███▏      | 69/

Trial status: 1 RUNNING | 9 PENDING
Current time: 2023-09-21 17:09:11. Total running time: 2min 0s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00000   RUNNING        5.61152e-06                    5    8.15396                       64        1             61.886      0.702004      0.702004            2.283                  132.722 |
| _objecti

 34%|███▍      | 73/215 [01:37<02:56,  1.24s/it]
 34%|███▍      | 74/215 [01:38<02:54,  1.24s/it]
 35%|███▍      | 75/215 [01:39<02:53,  1.24s/it]
 35%|███▌      | 76/215 [01:40<02:52,  1.24s/it]
 36%|███▌      | 77/215 [01:42<02:50,  1.24s/it]
 36%|███▋      | 78/215 [01:43<02:49,  1.24s/it]
 37%|███▋      | 79/215 [01:44<02:48,  1.24s/it]
 37%|███▋      | 80/215 [01:45<02:46,  1.24s/it]
 38%|███▊      | 81/215 [01:47<02:45,  1.23s/it]
 38%|███▊      | 82/215 [01:48<02:44,  1.23s/it]
 39%|███▊      | 83/215 [01:49<02:42,  1.23s/it]
 39%|███▉      | 84/215 [01:50<02:41,  1.23s/it]
 40%|███▉      | 85/215 [01:51<02:40,  1.23s/it]
 40%|████      | 86/215 [01:52<02:18,  1.07s/it]
  0%|          | 0/19 [00:00<?, ?it/s][A
[2m[36m(_objective pid=6862)[0m 
 11%|█         | 2/19 [00:00<00:00, 19.28it/s][A


[2m[36m(_objective pid=6862)[0m {'loss': 0.7083, 'learning_rate': 1.772925832912446e-08, 'epoch': 2.0}


[2m[36m(_objective pid=6862)[0m 
 21%|██        | 4/19 [00:00<00:01, 11.16it/s][A
[2m[36m(_objective pid=6862)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.71it/s][A
[2m[36m(_objective pid=6862)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  9.08it/s][A
[2m[36m(_objective pid=6862)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  9.00it/s][A
[2m[36m(_objective pid=6862)[0m 
 53%|█████▎    | 10/19 [00:01<00:00,  9.01it/s][A
[2m[36m(_objective pid=6862)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.95it/s][A
[2m[36m(_objective pid=6862)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.80it/s][A
[2m[36m(_objective pid=6862)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.88it/s][A
[2m[36m(_objective pid=6862)[0m 
 74%|███████▎  | 14/19 [00:01<00:00,  8.77it/s][A
[2m[36m(_objective pid=6862)[0m 
 79%|███████▉  | 15/19 [00:01<00:00,  8.61it/s][A
[2m[36m(_objective pid=6862)[0m 
 84%|████████▍ | 16/19 [00:01<00:00,  8.56it/s][A
[2m[36m(_objective pid=6862)[0m 
 89%|██████

Trial _objective_4df1a_00000 finished iteration 2 at 2023-09-21 17:09:30. Total running time: 2min 19s
+-------------------------------------------------+
| Trial _objective_4df1a_00000 result             |
+-------------------------------------------------+
| time_this_iter_s                        60.4612 |
| time_total_s                            122.347 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                               0.70159 |
| eval_runtime                              2.203 |
| eval_samples_per_second                  137.54 |
| eval_steps_per_second                     8.625 |
| objective                               0.70159 |
+-------------------------------------------------+

[2m[36m(_objective pid=6862)[0m {'eval_loss': 0.7015904188156128, 'eval_runtime': 2.203, 'eval_samples_per_second': 137.54, 'eval_steps_per_second': 8.625, 'epoch': 2.0}


[2m[36m(_objective pid=6862)[0m                                                 
[2m[36m(_objective pid=6862)[0m                                                [A 40%|████      | 86/215 [01:54<02:18,  1.07s/it]
[2m[36m(_objective pid=6862)[0m 100%|██████████| 19/19 [00:02<00:00,  8.59it/s][A
                                               [A
 40%|████      | 87/215 [02:00<06:53,  3.23s/it]
 41%|████      | 88/215 [02:02<05:33,  2.63s/it]
 41%|████▏     | 89/215 [02:03<04:37,  2.20s/it]
 42%|████▏     | 90/215 [02:04<03:58,  1.91s/it]
 42%|████▏     | 91/215 [02:05<03:31,  1.70s/it]


Trial status: 1 RUNNING | 9 PENDING
Current time: 2023-09-21 17:09:41. Total running time: 2min 30s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00000   RUNNING        5.61152e-06                    5    8.15396                       64        2            122.347       0.70159       0.70159            2.203                   137.54 |
| _object

 43%|████▎     | 92/215 [02:07<03:11,  1.56s/it]
 43%|████▎     | 93/215 [02:08<02:57,  1.46s/it]
 44%|████▎     | 94/215 [02:09<02:47,  1.39s/it]
 44%|████▍     | 95/215 [02:10<02:40,  1.34s/it]
 45%|████▍     | 96/215 [02:11<02:35,  1.31s/it]
 45%|████▌     | 97/215 [02:13<02:31,  1.28s/it]
 46%|████▌     | 98/215 [02:14<02:28,  1.27s/it]
 46%|████▌     | 99/215 [02:15<02:26,  1.26s/it]
 47%|████▋     | 100/215 [02:16<02:23,  1.25s/it]
 47%|████▋     | 101/215 [02:18<02:22,  1.25s/it]
 47%|████▋     | 102/215 [02:19<02:20,  1.25s/it]
 48%|████▊     | 103/215 [02:20<02:19,  1.25s/it]
 48%|████▊     | 104/215 [02:21<02:18,  1.25s/it]
 49%|████▉     | 105/215 [02:23<02:17,  1.25s/it]
 49%|████▉     | 106/215 [02:24<02:16,  1.25s/it]
 50%|████▉     | 107/215 [02:25<02:15,  1.25s/it]
 50%|█████     | 108/215 [02:26<02:14,  1.25s/it]
 51%|█████     | 109/215 [02:28<02:13,  1.26s/it]
 51%|█████     | 110/215 [02:29<02:11,  1.25s/it]
 52%|█████▏    | 111/215 [02:30<02:10,  1.25s/it]
 52%|███

Trial status: 1 RUNNING | 9 PENDING
Current time: 2023-09-21 17:10:12. Total running time: 3min 0s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00000   RUNNING        5.61152e-06                    5    8.15396                       64        2            122.347       0.70159       0.70159            2.203                   137.54 |
| _objecti

 54%|█████▍    | 116/215 [02:36<02:04,  1.25s/it]
 54%|█████▍    | 117/215 [02:38<02:03,  1.26s/it]
 55%|█████▍    | 118/215 [02:39<02:01,  1.25s/it]
 55%|█████▌    | 119/215 [02:40<02:00,  1.25s/it]
 56%|█████▌    | 120/215 [02:41<01:58,  1.25s/it]
 56%|█████▋    | 121/215 [02:43<01:57,  1.25s/it]
 57%|█████▋    | 122/215 [02:44<01:56,  1.25s/it]
 57%|█████▋    | 123/215 [02:45<01:55,  1.25s/it]
 58%|█████▊    | 124/215 [02:46<01:54,  1.25s/it]
 58%|█████▊    | 125/215 [02:48<01:52,  1.25s/it]
 59%|█████▊    | 126/215 [02:49<01:51,  1.25s/it]
 59%|█████▉    | 127/215 [02:50<01:49,  1.25s/it]
 60%|█████▉    | 128/215 [02:51<01:48,  1.25s/it]
 60%|██████    | 129/215 [02:52<01:33,  1.09s/it]
 60%|██████    | 129/215 [02:52<01:33,  1.09s/it]
  0%|          | 0/19 [00:00<?, ?it/s][A


[2m[36m(_objective pid=6862)[0m {'loss': 0.7072, 'learning_rate': 2.659388749368669e-08, 'epoch': 3.0}


[2m[36m(_objective pid=6862)[0m 
 11%|█         | 2/19 [00:00<00:00, 18.90it/s][A
[2m[36m(_objective pid=6862)[0m 
 21%|██        | 4/19 [00:00<00:01, 10.99it/s][A
[2m[36m(_objective pid=6862)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.42it/s][A
[2m[36m(_objective pid=6862)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  9.10it/s][A
[2m[36m(_objective pid=6862)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.84it/s][A
[2m[36m(_objective pid=6862)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.81it/s][A
[2m[36m(_objective pid=6862)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.71it/s][A
[2m[36m(_objective pid=6862)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.69it/s][A
[2m[36m(_objective pid=6862)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.64it/s][A
[2m[36m(_objective pid=6862)[0m 
 74%|███████▎  | 14/19 [00:01<00:00,  8.44it/s][A
[2m[36m(_objective pid=6862)[0m 
 79%|███████▉  | 15/19 [00:01<00:00,  8.38it/s][A
[2m[36m(_objective pid=6862)[0m 
 84%|███████

Trial _objective_4df1a_00000 finished iteration 3 at 2023-09-21 17:10:30. Total running time: 3min 19s
+-------------------------------------------------+
| Trial _objective_4df1a_00000 result             |
+-------------------------------------------------+
| time_this_iter_s                        60.0052 |
| time_total_s                            182.352 |
| training_iteration                            3 |
| epoch                                         3 |
| eval_loss                               0.70087 |
| eval_runtime                             2.2443 |
| eval_samples_per_second                 135.011 |
| eval_steps_per_second                     8.466 |
| objective                               0.70087 |
+-------------------------------------------------+

[2m[36m(_objective pid=6862)[0m {'eval_loss': 0.7008675336837769, 'eval_runtime': 2.2443, 'eval_samples_per_second': 135.011, 'eval_steps_per_second': 8.466, 'epoch': 3.0}


[2m[36m(_objective pid=6862)[0m 
[2m[36m(_objective pid=6862)[0m 100%|██████████| 19/19 [00:02<00:00,  8.18it/s][A                                                 
[2m[36m(_objective pid=6862)[0m                                                [A 60%|██████    | 129/215 [02:54<01:33,  1.09s/it]
[2m[36m(_objective pid=6862)[0m 100%|██████████| 19/19 [00:02<00:00,  8.18it/s][A
                                               [A
 60%|██████    | 130/215 [03:01<04:38,  3.28s/it]
 61%|██████    | 131/215 [03:02<03:43,  2.66s/it]
 61%|██████▏   | 132/215 [03:03<03:05,  2.23s/it]
 62%|██████▏   | 133/215 [03:04<02:38,  1.93s/it]
 62%|██████▏   | 134/215 [03:05<02:19,  1.72s/it]


Trial status: 1 RUNNING | 9 PENDING
Current time: 2023-09-21 17:10:42. Total running time: 3min 30s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00000   RUNNING        5.61152e-06                    5    8.15396                       64        3            182.352      0.700868      0.700868           2.2443                  135.011 |
| _object

 63%|██████▎   | 135/215 [03:07<02:05,  1.57s/it]
 63%|██████▎   | 136/215 [03:08<01:56,  1.47s/it]
 64%|██████▎   | 137/215 [03:09<01:49,  1.40s/it]
 64%|██████▍   | 138/215 [03:10<01:43,  1.35s/it]
 65%|██████▍   | 139/215 [03:12<01:39,  1.31s/it]
 65%|██████▌   | 140/215 [03:13<01:36,  1.29s/it]
 66%|██████▌   | 141/215 [03:14<01:34,  1.27s/it]
 66%|██████▌   | 142/215 [03:15<01:32,  1.26s/it]
 67%|██████▋   | 143/215 [03:17<01:30,  1.25s/it]
 67%|██████▋   | 144/215 [03:18<01:28,  1.25s/it]
 67%|██████▋   | 145/215 [03:19<01:27,  1.25s/it]
 68%|██████▊   | 146/215 [03:20<01:25,  1.24s/it]
 68%|██████▊   | 147/215 [03:21<01:24,  1.24s/it]
 69%|██████▉   | 148/215 [03:23<01:23,  1.24s/it]
 69%|██████▉   | 149/215 [03:24<01:21,  1.24s/it]
 70%|██████▉   | 150/215 [03:25<01:20,  1.24s/it]
 70%|███████   | 151/215 [03:26<01:19,  1.24s/it]
 71%|███████   | 152/215 [03:28<01:18,  1.25s/it]
 71%|███████   | 153/215 [03:29<01:17,  1.24s/it]
 72%|███████▏  | 154/215 [03:30<01:15,  1.25s/it]


Trial status: 1 RUNNING | 9 PENDING
Current time: 2023-09-21 17:11:12. Total running time: 4min 0s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00000   RUNNING        5.61152e-06                    5    8.15396                       64        3            182.352      0.700868      0.700868           2.2443                  135.011 |
| _objecti

 74%|███████▍  | 159/215 [03:36<01:09,  1.24s/it]
 74%|███████▍  | 160/215 [03:38<01:08,  1.24s/it]
 75%|███████▍  | 161/215 [03:39<01:06,  1.24s/it]
 75%|███████▌  | 162/215 [03:40<01:05,  1.24s/it]
 76%|███████▌  | 163/215 [03:41<01:04,  1.24s/it]
 76%|███████▋  | 164/215 [03:43<01:03,  1.24s/it]
 77%|███████▋  | 165/215 [03:44<01:02,  1.24s/it]
 77%|███████▋  | 166/215 [03:45<01:01,  1.25s/it]
 78%|███████▊  | 167/215 [03:46<00:59,  1.25s/it]
 78%|███████▊  | 168/215 [03:48<00:58,  1.25s/it]
 79%|███████▊  | 169/215 [03:49<00:57,  1.25s/it]
 79%|███████▉  | 170/215 [03:50<00:55,  1.24s/it]
 80%|███████▉  | 171/215 [03:51<00:54,  1.24s/it]
 80%|████████  | 172/215 [03:52<00:46,  1.08s/it]
  0%|          | 0/19 [00:00<?, ?it/s][A
[2m[36m(_objective pid=6862)[0m 
 11%|█         | 2/19 [00:00<00:00, 18.13it/s][A


[2m[36m(_objective pid=6862)[0m {'loss': 0.7067, 'learning_rate': 3.545851665824892e-08, 'epoch': 4.0}


[2m[36m(_objective pid=6862)[0m 
 21%|██        | 4/19 [00:00<00:01, 10.84it/s][A
[2m[36m(_objective pid=6862)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.47it/s][A
[2m[36m(_objective pid=6862)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  8.99it/s][A
[2m[36m(_objective pid=6862)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.82it/s][A
[2m[36m(_objective pid=6862)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.70it/s][A
[2m[36m(_objective pid=6862)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.83it/s][A
[2m[36m(_objective pid=6862)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.79it/s][A
[2m[36m(_objective pid=6862)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.65it/s][A
[2m[36m(_objective pid=6862)[0m 
 74%|███████▎  | 14/19 [00:01<00:00,  8.50it/s][A
[2m[36m(_objective pid=6862)[0m 
 79%|███████▉  | 15/19 [00:01<00:00,  8.48it/s][A
[2m[36m(_objective pid=6862)[0m 
 84%|████████▍ | 16/19 [00:01<00:00,  8.38it/s][A
[2m[36m(_objective pid=6862)[0m 
 89%|██████

Trial _objective_4df1a_00000 finished iteration 4 at 2023-09-21 17:11:30. Total running time: 4min 18s
+-------------------------------------------------+
| Trial _objective_4df1a_00000 result             |
+-------------------------------------------------+
| time_this_iter_s                        59.8864 |
| time_total_s                            242.239 |
| training_iteration                            4 |
| epoch                                         4 |
| eval_loss                               0.69989 |
| eval_runtime                             2.2371 |
| eval_samples_per_second                 135.444 |
| eval_steps_per_second                     8.493 |
| objective                               0.69989 |
+-------------------------------------------------+

[2m[36m(_objective pid=6862)[0m {'eval_loss': 0.6998929381370544, 'eval_runtime': 2.2371, 'eval_samples_per_second': 135.444, 'eval_steps_per_second': 8.493, 'epoch': 4.0}


[2m[36m(_objective pid=6862)[0m                                                  
[2m[36m(_objective pid=6862)[0m                                                [A 80%|████████  | 172/215 [03:54<00:46,  1.08s/it]
[2m[36m(_objective pid=6862)[0m 100%|██████████| 19/19 [00:02<00:00,  8.54it/s][A
                                               [A
 80%|████████  | 173/215 [04:00<02:16,  3.25s/it]
 81%|████████  | 174/215 [04:02<01:49,  2.66s/it]
 81%|████████▏ | 175/215 [04:03<01:29,  2.23s/it]
 82%|████████▏ | 176/215 [04:04<01:15,  1.93s/it]
 82%|████████▏ | 177/215 [04:05<01:05,  1.72s/it]


Trial status: 1 RUNNING | 9 PENDING
Current time: 2023-09-21 17:11:42. Total running time: 4min 30s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00000   RUNNING        5.61152e-06                    5    8.15396                       64        4            242.239      0.699893      0.699893           2.2371                  135.444 |
| _object

 83%|████████▎ | 178/215 [04:07<00:58,  1.57s/it]
 83%|████████▎ | 179/215 [04:08<00:52,  1.47s/it]
 84%|████████▎ | 180/215 [04:09<00:48,  1.40s/it]
 84%|████████▍ | 181/215 [04:10<00:45,  1.35s/it]
 85%|████████▍ | 182/215 [04:11<00:43,  1.32s/it]
 85%|████████▌ | 183/215 [04:13<00:41,  1.29s/it]
 86%|████████▌ | 184/215 [04:14<00:39,  1.27s/it]
 86%|████████▌ | 185/215 [04:15<00:38,  1.27s/it]
 87%|████████▋ | 186/215 [04:16<00:36,  1.26s/it]
 87%|████████▋ | 187/215 [04:18<00:35,  1.26s/it]
 87%|████████▋ | 188/215 [04:19<00:33,  1.26s/it]
 88%|████████▊ | 189/215 [04:20<00:32,  1.26s/it]
 88%|████████▊ | 190/215 [04:21<00:31,  1.26s/it]
 89%|████████▉ | 191/215 [04:23<00:30,  1.25s/it]
 89%|████████▉ | 192/215 [04:24<00:28,  1.25s/it]
 90%|████████▉ | 193/215 [04:25<00:27,  1.25s/it]
 90%|█████████ | 194/215 [04:26<00:26,  1.25s/it]
 91%|█████████ | 195/215 [04:28<00:25,  1.25s/it]
 91%|█████████ | 196/215 [04:29<00:23,  1.25s/it]
 92%|█████████▏| 197/215 [04:30<00:22,  1.25s/it]


Trial status: 1 RUNNING | 9 PENDING
Current time: 2023-09-21 17:12:12. Total running time: 5min 0s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status       learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00000   RUNNING        5.61152e-06                    5    8.15396                       64        4            242.239      0.699893      0.699893           2.2371                  135.444 |
| _objecti

 94%|█████████▍| 202/215 [04:36<00:16,  1.25s/it]
 94%|█████████▍| 203/215 [04:38<00:15,  1.25s/it]
 95%|█████████▍| 204/215 [04:39<00:13,  1.25s/it]
 95%|█████████▌| 205/215 [04:40<00:12,  1.25s/it]
 96%|█████████▌| 206/215 [04:42<00:11,  1.25s/it]
 96%|█████████▋| 207/215 [04:43<00:10,  1.25s/it]
 97%|█████████▋| 208/215 [04:44<00:08,  1.25s/it]
 97%|█████████▋| 209/215 [04:45<00:07,  1.25s/it]
 98%|█████████▊| 210/215 [04:46<00:06,  1.25s/it]
 98%|█████████▊| 211/215 [04:48<00:04,  1.25s/it]
 99%|█████████▊| 212/215 [04:49<00:03,  1.25s/it]
 99%|█████████▉| 213/215 [04:50<00:02,  1.24s/it]
100%|█████████▉| 214/215 [04:51<00:01,  1.24s/it]
100%|██████████| 215/215 [04:52<00:00,  1.08s/it]
100%|██████████| 215/215 [04:52<00:00,  1.08s/it]
  0%|          | 0/19 [00:00<?, ?it/s][A


[2m[36m(_objective pid=6862)[0m {'loss': 0.7053, 'learning_rate': 4.432314582281115e-08, 'epoch': 5.0}


[2m[36m(_objective pid=6862)[0m 
 11%|█         | 2/19 [00:00<00:00, 18.37it/s][A
[2m[36m(_objective pid=6862)[0m 
 21%|██        | 4/19 [00:00<00:01, 11.05it/s][A
[2m[36m(_objective pid=6862)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.60it/s][A
[2m[36m(_objective pid=6862)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  9.00it/s][A
[2m[36m(_objective pid=6862)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.80it/s][A
[2m[36m(_objective pid=6862)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.81it/s][A
[2m[36m(_objective pid=6862)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.72it/s][A
[2m[36m(_objective pid=6862)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.74it/s][A
[2m[36m(_objective pid=6862)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.61it/s][A
[2m[36m(_objective pid=6862)[0m 
 74%|███████▎  | 14/19 [00:01<00:00,  8.48it/s][A
[2m[36m(_objective pid=6862)[0m 
 79%|███████▉  | 15/19 [00:01<00:00,  8.45it/s][A
[2m[36m(_objective pid=6862)[0m 
 84%|███████

Trial _objective_4df1a_00000 finished iteration 5 at 2023-09-21 17:12:30. Total running time: 5min 19s
+-------------------------------------------------+
| Trial _objective_4df1a_00000 result             |
+-------------------------------------------------+
| time_this_iter_s                        60.1226 |
| time_total_s                            302.361 |
| training_iteration                            5 |
| epoch                                         5 |
| eval_loss                               0.69865 |
| eval_runtime                             2.2379 |
| eval_samples_per_second                 135.396 |
| eval_steps_per_second                      8.49 |
| objective                               0.69865 |
+-------------------------------------------------+

[2m[36m(_objective pid=6862)[0m {'eval_loss': 0.6986486315727234, 'eval_runtime': 2.2379, 'eval_samples_per_second': 135.396, 'eval_steps_per_second': 8.49, 'epoch': 5.0}


[2m[36m(_objective pid=6862)[0m                                                  
[2m[36m(_objective pid=6862)[0m                                                [A100%|██████████| 215/215 [04:54<00:00,  1.08s/it]
[2m[36m(_objective pid=6862)[0m 100%|██████████| 19/19 [00:02<00:00,  8.51it/s][A
[2m[36m(_objective pid=6862)[0m                                                [A


Trial _objective_4df1a_00000 completed after 5 iterations at 2023-09-21 17:12:40. Total running time: 5min 28s

[2m[36m(_objective pid=6862)[0m {'train_runtime': 304.6534, 'train_samples_per_second': 44.674, 'train_steps_per_second': 0.706, 'train_loss': 0.7073440108188364, 'epoch': 5.0}


[2m[36m(_objective pid=6862)[0m                                                  100%|██████████| 215/215 [05:04<00:00,  1.08s/it]100%|██████████| 215/215 [05:04<00:00,  1.42s/it]


Trial status: 1 TERMINATED | 9 PENDING
Current time: 2023-09-21 17:12:42. Total running time: 5min 30s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00000   TERMINATED       5.61152e-06                    5    8.15396                       64        5            302.361      0.698649      0.698649           2.2379                  135.396 



Trial _objective_4df1a_00001 started with configuration:
+-------------------------------------------------+
| Trial _objective_4df1a_00001 config             |
+-------------------------------------------------+
| learning_rate                             2e-05 |
| num_train_epochs                              2 |
| per_device_train_batch_size                  16 |
| seed                                    7.08379 |
+-------------------------------------------------+



[2m[36m(_objective pid=8238)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
[2m[36m(_objective pid=8238)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/342 [00:00<?, ?it/s]
  0%|          | 1/342 [00:01<05:52,  1.03s/it]
  1%|          | 2/342 [00:01<03:25,  1.65it/s]
  1%|          | 3/342 [00:01<02:43,  2.08it/s]
  1%|          | 4/342 [00:02<02:24,  2.34it/s]
  1%|▏         | 5/342 [00:02<02:14,  2.50it/s]
  2%|▏         | 6/342 [00:02<02:07,  2.64it/s]
  2%|▏         | 7/342 [00:03<02:03,  2.72it/s]
  2%|▏         | 8/342 [00:03<02:00,  2.77it/s]
  3%|▎         | 9/342 [00:03<01:58,  2.81it/s]
  3%|▎         | 10/342 [00:04<01:57,  2.83it/s]
  3%|▎         | 11/342 [00:04<01:56,  2.84it/s]
  4%|▎         | 12/342 [00:04<01:55,  2.86it/s]
  4%|▍         | 1

Trial status: 1 TERMINATED | 1 RUNNING | 8 PENDING
Current time: 2023-09-21 17:13:12. Total running time: 6min 0s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00001   RUNNING          1.56207e-05                    2    7.08379                       16                                                                                        

 13%|█▎        | 46/342 [00:16<01:44,  2.84it/s]
 14%|█▎        | 47/342 [00:17<01:44,  2.83it/s]
 14%|█▍        | 48/342 [00:17<01:43,  2.84it/s]
 14%|█▍        | 49/342 [00:17<01:42,  2.85it/s]
 15%|█▍        | 50/342 [00:18<01:42,  2.84it/s]
 15%|█▍        | 51/342 [00:18<01:42,  2.83it/s]
 15%|█▌        | 52/342 [00:18<01:42,  2.83it/s]
 15%|█▌        | 53/342 [00:19<01:42,  2.81it/s]
 16%|█▌        | 54/342 [00:19<01:42,  2.80it/s]
 16%|█▌        | 55/342 [00:19<01:41,  2.82it/s]
 16%|█▋        | 56/342 [00:20<01:41,  2.81it/s]
 17%|█▋        | 57/342 [00:20<01:41,  2.81it/s]
 17%|█▋        | 58/342 [00:20<01:41,  2.81it/s]
 17%|█▋        | 59/342 [00:21<01:40,  2.82it/s]
 18%|█▊        | 60/342 [00:21<01:40,  2.81it/s]
 18%|█▊        | 61/342 [00:22<01:40,  2.80it/s]
 18%|█▊        | 62/342 [00:22<01:39,  2.81it/s]
 18%|█▊        | 63/342 [00:22<01:39,  2.80it/s]
 19%|█▊        | 64/342 [00:23<01:39,  2.79it/s]
 19%|█▉        | 65/342 [00:23<01:39,  2.79it/s]
 19%|█▉        | 66/

Trial status: 1 TERMINATED | 1 RUNNING | 8 PENDING
Current time: 2023-09-21 17:13:42. Total running time: 6min 30s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00001   RUNNING          1.56207e-05                    2    7.08379                       16                                                                                       

 38%|███▊      | 129/342 [00:46<01:16,  2.78it/s]
 38%|███▊      | 130/342 [00:46<01:16,  2.78it/s]
 38%|███▊      | 131/342 [00:47<01:15,  2.79it/s]
 39%|███▊      | 132/342 [00:47<01:15,  2.78it/s]
 39%|███▉      | 133/342 [00:47<01:15,  2.78it/s]
 39%|███▉      | 134/342 [00:48<01:14,  2.80it/s]
 39%|███▉      | 135/342 [00:48<01:13,  2.80it/s]
 40%|███▉      | 136/342 [00:49<01:13,  2.79it/s]
 40%|████      | 137/342 [00:49<01:13,  2.80it/s]
 40%|████      | 138/342 [00:49<01:12,  2.80it/s]
 41%|████      | 139/342 [00:50<01:12,  2.79it/s]
 41%|████      | 140/342 [00:50<01:12,  2.79it/s]
 41%|████      | 141/342 [00:50<01:11,  2.80it/s]
 42%|████▏     | 142/342 [00:51<01:11,  2.80it/s]
 42%|████▏     | 143/342 [00:51<01:11,  2.80it/s]
 42%|████▏     | 144/342 [00:51<01:10,  2.81it/s]
 42%|████▏     | 145/342 [00:52<01:10,  2.81it/s]
 43%|████▎     | 146/342 [00:52<01:10,  2.80it/s]
 43%|████▎     | 147/342 [00:52<01:09,  2.80it/s]
 43%|████▎     | 148/342 [00:53<01:09,  2.80it/s]


[2m[36m(_objective pid=8238)[0m {'loss': 0.7012, 'learning_rate': 9.813147018862678e-08, 'epoch': 1.0}


[2m[36m(_objective pid=8238)[0m                                                   50%|█████     | 171/342 [01:01<00:47,  3.61it/s]
[2m[36m(_objective pid=8238)[0m   0%|          | 0/19 [00:00<?, ?it/s][A
[2m[36m(_objective pid=8238)[0m 
 11%|█         | 2/19 [00:00<00:00, 17.80it/s][A
[2m[36m(_objective pid=8238)[0m 
 21%|██        | 4/19 [00:00<00:01, 10.84it/s][A
[2m[36m(_objective pid=8238)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.45it/s][A
[2m[36m(_objective pid=8238)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  9.01it/s][A
[2m[36m(_objective pid=8238)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.78it/s][A
[2m[36m(_objective pid=8238)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.80it/s][A
[2m[36m(_objective pid=8238)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.85it/s][A
[2m[36m(_objective pid=8238)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.74it/s][A
[2m[36m(_objective pid=8238)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.56it/s][A
[2m[3

Trial _objective_4df1a_00001 finished iteration 1 at 2023-09-21 17:13:59. Total running time: 6min 47s
+-------------------------------------------------+
| Trial _objective_4df1a_00001 result             |
+-------------------------------------------------+
| time_this_iter_s                          68.28 |
| time_total_s                              68.28 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.69309 |
| eval_runtime                             2.2281 |
| eval_samples_per_second                  135.99 |
| eval_steps_per_second                     8.527 |
| objective                               0.69309 |
+-------------------------------------------------+

[2m[36m(_objective pid=8238)[0m {'eval_loss': 0.6930900812149048, 'eval_runtime': 2.2281, 'eval_samples_per_second': 135.99, 'eval_steps_per_second': 8.527, 'epoch': 1.0}


 50%|█████     | 172/342 [01:08<06:56,  2.45s/it]
 51%|█████     | 173/342 [01:09<05:07,  1.82s/it]
 51%|█████     | 174/342 [01:09<03:51,  1.38s/it]
 51%|█████     | 175/342 [01:09<02:59,  1.07s/it]
 51%|█████▏    | 176/342 [01:10<02:21,  1.17it/s]
 52%|█████▏    | 177/342 [01:10<01:55,  1.43it/s]
 52%|█████▏    | 178/342 [01:10<01:38,  1.67it/s]
 52%|█████▏    | 179/342 [01:11<01:25,  1.91it/s]
 53%|█████▎    | 180/342 [01:11<01:16,  2.13it/s]
 53%|█████▎    | 181/342 [01:11<01:09,  2.30it/s]
 53%|█████▎    | 182/342 [01:12<01:05,  2.44it/s]
 54%|█████▎    | 183/342 [01:12<01:02,  2.55it/s]
 54%|█████▍    | 184/342 [01:12<01:00,  2.63it/s]
 54%|█████▍    | 185/342 [01:13<00:58,  2.68it/s]
 54%|█████▍    | 186/342 [01:13<00:56,  2.74it/s]
 55%|█████▍    | 187/342 [01:14<00:55,  2.77it/s]
 55%|█████▍    | 188/342 [01:14<00:55,  2.80it/s]
 55%|█████▌    | 189/342 [01:14<00:54,  2.81it/s]
 56%|█████▌    | 190/342 [01:15<00:53,  2.84it/s]
 56%|█████▌    | 191/342 [01:15<00:53,  2.83it/s]


Trial status: 1 TERMINATED | 1 RUNNING | 8 PENDING
Current time: 2023-09-21 17:14:12. Total running time: 7min 0s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00001   RUNNING          1.56207e-05                    2    7.08379                       16        1             68.28       0.69309       0.69309            2.2281               

 57%|█████▋    | 195/342 [01:16<00:51,  2.86it/s]
 57%|█████▋    | 196/342 [01:17<00:50,  2.87it/s]
 58%|█████▊    | 197/342 [01:17<00:50,  2.88it/s]
 58%|█████▊    | 198/342 [01:17<00:50,  2.87it/s]
 58%|█████▊    | 199/342 [01:18<00:49,  2.88it/s]
 58%|█████▊    | 200/342 [01:18<00:49,  2.88it/s]
 59%|█████▉    | 201/342 [01:18<00:49,  2.86it/s]
 59%|█████▉    | 202/342 [01:19<00:48,  2.87it/s]
 59%|█████▉    | 203/342 [01:19<00:48,  2.87it/s]
 60%|█████▉    | 204/342 [01:19<00:48,  2.86it/s]
 60%|█████▉    | 205/342 [01:20<00:47,  2.87it/s]
 60%|██████    | 206/342 [01:20<00:47,  2.87it/s]
 61%|██████    | 207/342 [01:21<00:46,  2.87it/s]
 61%|██████    | 208/342 [01:21<00:46,  2.86it/s]
 61%|██████    | 209/342 [01:21<00:46,  2.87it/s]
 61%|██████▏   | 210/342 [01:22<00:46,  2.86it/s]
 62%|██████▏   | 211/342 [01:22<00:45,  2.86it/s]
 62%|██████▏   | 212/342 [01:22<00:45,  2.88it/s]
 62%|██████▏   | 213/342 [01:23<00:44,  2.87it/s]
 63%|██████▎   | 214/342 [01:23<00:44,  2.86it/s]


Trial status: 1 TERMINATED | 1 RUNNING | 8 PENDING
Current time: 2023-09-21 17:14:42. Total running time: 7min 30s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00001   RUNNING          1.56207e-05                    2    7.08379                       16        1             68.28       0.69309       0.69309            2.2281              

 82%|████████▏ | 280/342 [01:46<00:22,  2.80it/s]
 82%|████████▏ | 281/342 [01:47<00:21,  2.80it/s]
 82%|████████▏ | 282/342 [01:47<00:21,  2.79it/s]
 83%|████████▎ | 283/342 [01:47<00:21,  2.80it/s]
 83%|████████▎ | 284/342 [01:48<00:20,  2.79it/s]
 83%|████████▎ | 285/342 [01:48<00:20,  2.80it/s]
 84%|████████▎ | 286/342 [01:48<00:20,  2.80it/s]
 84%|████████▍ | 287/342 [01:49<00:19,  2.79it/s]
 84%|████████▍ | 288/342 [01:49<00:19,  2.81it/s]
 85%|████████▍ | 289/342 [01:50<00:18,  2.82it/s]
 85%|████████▍ | 290/342 [01:50<00:18,  2.82it/s]
 85%|████████▌ | 291/342 [01:50<00:18,  2.82it/s]
 85%|████████▌ | 292/342 [01:51<00:17,  2.83it/s]
 86%|████████▌ | 293/342 [01:51<00:17,  2.82it/s]
 86%|████████▌ | 294/342 [01:51<00:16,  2.82it/s]
 86%|████████▋ | 295/342 [01:52<00:16,  2.83it/s]
 87%|████████▋ | 296/342 [01:52<00:16,  2.83it/s]
 87%|████████▋ | 297/342 [01:52<00:15,  2.82it/s]
 87%|████████▋ | 298/342 [01:53<00:15,  2.82it/s]
 87%|████████▋ | 299/342 [01:53<00:15,  2.82it/s]


[2m[36m(_objective pid=8238)[0m {'loss': 0.696, 'learning_rate': 1.9626294037725355e-07, 'epoch': 2.0}


[2m[36m(_objective pid=8238)[0m 
 11%|█         | 2/19 [00:00<00:00, 17.61it/s][A
[2m[36m(_objective pid=8238)[0m 
 21%|██        | 4/19 [00:00<00:01, 10.75it/s][A
[2m[36m(_objective pid=8238)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.45it/s][A
[2m[36m(_objective pid=8238)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  8.89it/s][A
[2m[36m(_objective pid=8238)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.80it/s][A
[2m[36m(_objective pid=8238)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.77it/s][A
[2m[36m(_objective pid=8238)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.82it/s][A
[2m[36m(_objective pid=8238)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.64it/s][A
[2m[36m(_objective pid=8238)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.65it/s][A
[2m[36m(_objective pid=8238)[0m 
 74%|███████▎  | 14/19 [00:01<00:00,  8.59it/s][A
[2m[36m(_objective pid=8238)[0m 
 79%|███████▉  | 15/19 [00:01<00:00,  8.45it/s][A
[2m[36m(_objective pid=8238)[0m 
 84%|███████

Trial _objective_4df1a_00001 finished iteration 2 at 2023-09-21 17:15:06. Total running time: 7min 55s
+-------------------------------------------------+
| Trial _objective_4df1a_00001 result             |
+-------------------------------------------------+
| time_this_iter_s                        67.3795 |
| time_total_s                             135.66 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                                0.6816 |
| eval_runtime                             2.2492 |
| eval_samples_per_second                 134.715 |
| eval_steps_per_second                     8.447 |
| objective                                0.6816 |
+-------------------------------------------------+

[2m[36m(_objective pid=8238)[0m {'eval_loss': 0.6815972924232483, 'eval_runtime': 2.2492, 'eval_samples_per_second': 134.715, 'eval_steps_per_second': 8.447, 'epoch': 2.0}


[2m[36m(_objective pid=8238)[0m                                                  
[2m[36m(_objective pid=8238)[0m                                                [A100%|██████████| 342/342 [02:10<00:00,  2.80it/s]
[2m[36m(_objective pid=8238)[0m 100%|██████████| 19/19 [00:02<00:00,  8.47it/s][A
[2m[36m(_objective pid=8238)[0m                                                [A


Trial _objective_4df1a_00001 completed after 2 iterations at 2023-09-21 17:15:11. Total running time: 8min 0s

[2m[36m(_objective pid=8238)[0m {'train_runtime': 136.0598, 'train_samples_per_second': 40.012, 'train_steps_per_second': 2.514, 'train_loss': 0.6985951473838404, 'epoch': 2.0}


[2m[36m(_objective pid=8238)[0m                                                  100%|██████████| 342/342 [02:16<00:00,  2.80it/s]100%|██████████| 342/342 [02:16<00:00,  2.51it/s]


Trial status: 2 TERMINATED | 8 PENDING
Current time: 2023-09-21 17:15:12. Total running time: 8min 1s
Logical resource usage: 0/2 CPUs, 0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00000   TERMINATED       5.61152e-06                    5    8.15396                       64        5            302.361      0.698649      0.698649           2.2379                  135.396 |
| _



Trial _objective_4df1a_00002 started with configuration:
+-------------------------------------------------+
| Trial _objective_4df1a_00002 config             |
+-------------------------------------------------+
| learning_rate                             1e-05 |
| num_train_epochs                              5 |
| per_device_train_batch_size                  16 |
| seed                                    24.4435 |
+-------------------------------------------------+



[2m[36m(_objective pid=8881)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
[2m[36m(_objective pid=8881)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/855 [00:00<?, ?it/s]
  0%|          | 1/855 [00:00<07:13,  1.97it/s]
  0%|          | 2/855 [00:00<05:34,  2.55it/s]
  0%|          | 3/855 [00:01<05:14,  2.71it/s]
  0%|          | 4/855 [00:01<05:13,  2.72it/s]
  1%|          | 5/855 [00:01<05:02,  2.81it/s]
  1%|          | 6/855 [00:02<04:58,  2.85it/s]
  1%|          | 7/855 [00:02<04:56,  2.86it/s]
  1%|          | 8/855 [00:02<04:54,  2.88it/s]
  1%|          | 9/855 [00:03<04:52,  2.89it/s]
  1%|          | 10/855 [00:03<04:53,  2.88it/s]
  1%|▏         | 11/855 [00:03<04:52,  2.89it/s]
  1%|▏         | 12/855 [00:04<04:52,  2.89it/s]
  2%|▏         | 1

Trial status: 2 TERMINATED | 1 RUNNING | 7 PENDING
Current time: 2023-09-21 17:15:42. Total running time: 8min 31s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00002   RUNNING          8.28892e-06                    5   24.4435                        16                                                                                       

  5%|▌         | 44/855 [00:15<04:43,  2.86it/s]
  5%|▌         | 45/855 [00:15<04:43,  2.86it/s]
  5%|▌         | 46/855 [00:16<04:42,  2.86it/s]
  5%|▌         | 47/855 [00:16<04:42,  2.86it/s]
  6%|▌         | 48/855 [00:16<04:44,  2.84it/s]
  6%|▌         | 49/855 [00:17<04:42,  2.85it/s]
  6%|▌         | 50/855 [00:17<04:44,  2.83it/s]
  6%|▌         | 51/855 [00:17<04:46,  2.81it/s]
  6%|▌         | 52/855 [00:18<04:44,  2.83it/s]
  6%|▌         | 53/855 [00:18<04:44,  2.82it/s]
  6%|▋         | 54/855 [00:18<04:46,  2.80it/s]
  6%|▋         | 55/855 [00:19<04:44,  2.81it/s]
  7%|▋         | 56/855 [00:19<04:44,  2.81it/s]
  7%|▋         | 57/855 [00:20<04:45,  2.79it/s]
  7%|▋         | 58/855 [00:20<04:43,  2.81it/s]
  7%|▋         | 59/855 [00:20<04:43,  2.81it/s]
  7%|▋         | 60/855 [00:21<04:43,  2.80it/s]
  7%|▋         | 61/855 [00:21<04:42,  2.81it/s]
  7%|▋         | 62/855 [00:21<04:41,  2.81it/s]
  7%|▋         | 63/855 [00:22<04:41,  2.81it/s]
  7%|▋         | 64/

Trial status: 2 TERMINATED | 1 RUNNING | 7 PENDING
Current time: 2023-09-21 17:16:12. Total running time: 9min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00002   RUNNING          8.28892e-06                    5   24.4435                        16                                                                                        

 15%|█▍        | 128/855 [00:45<04:21,  2.78it/s]
 15%|█▌        | 129/855 [00:45<04:20,  2.79it/s]
 15%|█▌        | 130/855 [00:46<04:19,  2.79it/s]
 15%|█▌        | 131/855 [00:46<04:20,  2.78it/s]
 15%|█▌        | 132/855 [00:47<04:18,  2.80it/s]
 16%|█▌        | 133/855 [00:47<04:19,  2.79it/s]
 16%|█▌        | 134/855 [00:47<04:17,  2.79it/s]
 16%|█▌        | 135/855 [00:48<04:16,  2.81it/s]
 16%|█▌        | 136/855 [00:48<04:16,  2.80it/s]
 16%|█▌        | 137/855 [00:48<04:16,  2.80it/s]
 16%|█▌        | 138/855 [00:49<04:15,  2.81it/s]
 16%|█▋        | 139/855 [00:49<04:15,  2.80it/s]
 16%|█▋        | 140/855 [00:49<04:14,  2.80it/s]
 16%|█▋        | 141/855 [00:50<04:13,  2.81it/s]
 17%|█▋        | 142/855 [00:50<04:14,  2.80it/s]
 17%|█▋        | 143/855 [00:50<04:13,  2.81it/s]
 17%|█▋        | 144/855 [00:51<04:13,  2.80it/s]
 17%|█▋        | 145/855 [00:51<04:14,  2.79it/s]
 17%|█▋        | 146/855 [00:52<04:13,  2.80it/s]
 17%|█▋        | 147/855 [00:52<04:13,  2.79it/s]


[2m[36m(_objective pid=8881)[0m {'loss': 0.7045, 'learning_rate': 5.207218163987366e-08, 'epoch': 1.0}


[2m[36m(_objective pid=8881)[0m                                                   20%|██        | 171/855 [01:00<03:09,  3.61it/s]
[2m[36m(_objective pid=8881)[0m   0%|          | 0/19 [00:00<?, ?it/s][A
[2m[36m(_objective pid=8881)[0m 
 11%|█         | 2/19 [00:00<00:00, 18.31it/s][A
[2m[36m(_objective pid=8881)[0m 
 21%|██        | 4/19 [00:00<00:01, 11.09it/s][A
[2m[36m(_objective pid=8881)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.58it/s][A
[2m[36m(_objective pid=8881)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  9.03it/s][A
[2m[36m(_objective pid=8881)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.80it/s][A
[2m[36m(_objective pid=8881)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.88it/s][A
[2m[36m(_objective pid=8881)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.88it/s][A
[2m[36m(_objective pid=8881)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.85it/s][A
[2m[36m(_objective pid=8881)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.69it/s][A
[2m[3

Trial _objective_4df1a_00002 finished iteration 1 at 2023-09-21 17:16:30. Total running time: 9min 18s
+-------------------------------------------------+
| Trial _objective_4df1a_00002 result             |
+-------------------------------------------------+
| time_this_iter_s                        67.5824 |
| time_total_s                            67.5824 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                                0.7113 |
| eval_runtime                             2.2199 |
| eval_samples_per_second                 136.492 |
| eval_steps_per_second                     8.559 |
| objective                                0.7113 |
+-------------------------------------------------+

[2m[36m(_objective pid=8881)[0m {'eval_loss': 0.7113003134727478, 'eval_runtime': 2.2199, 'eval_samples_per_second': 136.492, 'eval_steps_per_second': 8.559, 'epoch': 1.0}


[2m[36m(_objective pid=8881)[0m                                                  
[2m[36m(_objective pid=8881)[0m                                                [A 20%|██        | 171/855 [01:02<03:09,  3.61it/s]
[2m[36m(_objective pid=8881)[0m 100%|██████████| 19/19 [00:02<00:00,  8.63it/s][A
                                               [A
 20%|██        | 172/855 [01:12<43:53,  3.86s/it]
 20%|██        | 173/855 [01:13<31:50,  2.80s/it]
 20%|██        | 174/855 [01:13<23:25,  2.06s/it]
 20%|██        | 175/855 [01:13<17:33,  1.55s/it]
 21%|██        | 176/855 [01:14<13:25,  1.19s/it]
 21%|██        | 177/855 [01:14<10:32,  1.07it/s]
 21%|██        | 178/855 [01:14<08:33,  1.32it/s]
 21%|██        | 179/855 [01:15<07:08,  1.58it/s]


Trial status: 2 TERMINATED | 1 RUNNING | 7 PENDING
Current time: 2023-09-21 17:16:42. Total running time: 9min 31s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00002   RUNNING          8.28892e-06                    5   24.4435                        16        1            67.5824      0.7113        0.7113             2.2199              

 21%|██        | 180/855 [01:15<06:08,  1.83it/s]
 21%|██        | 181/855 [01:15<05:26,  2.06it/s]
 21%|██▏       | 182/855 [01:16<04:58,  2.26it/s]
 21%|██▏       | 183/855 [01:16<04:37,  2.42it/s]
 22%|██▏       | 184/855 [01:16<04:23,  2.54it/s]
 22%|██▏       | 185/855 [01:17<04:13,  2.64it/s]
 22%|██▏       | 186/855 [01:17<04:06,  2.72it/s]
 22%|██▏       | 187/855 [01:18<04:00,  2.77it/s]
 22%|██▏       | 188/855 [01:18<03:57,  2.81it/s]
 22%|██▏       | 189/855 [01:18<03:54,  2.83it/s]
 22%|██▏       | 190/855 [01:19<03:53,  2.85it/s]
 22%|██▏       | 191/855 [01:19<03:51,  2.87it/s]
 22%|██▏       | 192/855 [01:19<03:50,  2.88it/s]
 23%|██▎       | 193/855 [01:20<03:49,  2.89it/s]
 23%|██▎       | 194/855 [01:20<03:48,  2.89it/s]
 23%|██▎       | 195/855 [01:20<03:47,  2.90it/s]
 23%|██▎       | 196/855 [01:21<03:47,  2.90it/s]
 23%|██▎       | 197/855 [01:21<03:47,  2.89it/s]
 23%|██▎       | 198/855 [01:21<03:46,  2.90it/s]
 23%|██▎       | 199/855 [01:22<03:46,  2.90it/s]


Trial status: 2 TERMINATED | 1 RUNNING | 7 PENDING
Current time: 2023-09-21 17:17:12. Total running time: 10min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00002   RUNNING          8.28892e-06                    5   24.4435                        16        1            67.5824      0.7113        0.7113             2.2199              

 31%|███       | 265/855 [01:45<03:33,  2.76it/s]
 31%|███       | 266/855 [01:46<03:32,  2.77it/s]
 31%|███       | 267/855 [01:46<03:32,  2.77it/s]
 31%|███▏      | 268/855 [01:46<03:31,  2.77it/s]
 31%|███▏      | 269/855 [01:47<03:31,  2.77it/s]
 32%|███▏      | 270/855 [01:47<03:31,  2.77it/s]
 32%|███▏      | 271/855 [01:47<03:30,  2.77it/s]
 32%|███▏      | 272/855 [01:48<03:30,  2.77it/s]
 32%|███▏      | 273/855 [01:48<03:30,  2.77it/s]
 32%|███▏      | 274/855 [01:48<03:30,  2.77it/s]
 32%|███▏      | 275/855 [01:49<03:29,  2.77it/s]
 32%|███▏      | 276/855 [01:49<03:28,  2.78it/s]
 32%|███▏      | 277/855 [01:50<03:28,  2.77it/s]
 33%|███▎      | 278/855 [01:50<03:28,  2.77it/s]
 33%|███▎      | 279/855 [01:50<03:27,  2.78it/s]
 33%|███▎      | 280/855 [01:51<03:27,  2.78it/s]
 33%|███▎      | 281/855 [01:51<03:26,  2.77it/s]
 33%|███▎      | 282/855 [01:51<03:26,  2.77it/s]
 33%|███▎      | 283/855 [01:52<03:26,  2.77it/s]
 33%|███▎      | 284/855 [01:52<03:26,  2.77it/s]


[2m[36m(_objective pid=8881)[0m {'loss': 0.7001, 'learning_rate': 1.0414436327974732e-07, 'epoch': 2.0}


[2m[36m(_objective pid=8881)[0m 
 11%|█         | 2/19 [00:00<00:00, 18.12it/s][A
[2m[36m(_objective pid=8881)[0m 
 21%|██        | 4/19 [00:00<00:01, 10.89it/s][A
[2m[36m(_objective pid=8881)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.55it/s][A
[2m[36m(_objective pid=8881)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  8.92it/s][A
[2m[36m(_objective pid=8881)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.74it/s][A
[2m[36m(_objective pid=8881)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.78it/s][A
[2m[36m(_objective pid=8881)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.80it/s][A
[2m[36m(_objective pid=8881)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.65it/s][A
[2m[36m(_objective pid=8881)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.65it/s][A
[2m[36m(_objective pid=8881)[0m 
 74%|███████▎  | 14/19 [00:01<00:00,  8.58it/s][A
[2m[36m(_objective pid=8881)[0m 
 79%|███████▉  | 15/19 [00:01<00:00,  8.53it/s][A
[2m[36m(_objective pid=8881)[0m 
 84%|███████

[2m[36m(_objective pid=8881)[0m {'eval_loss': 0.7052288055419922, 'eval_runtime': 2.2295, 'eval_samples_per_second': 135.903, 'eval_steps_per_second': 8.522, 'epoch': 2.0}
Trial _objective_4df1a_00002 finished iteration 2 at 2023-09-21 17:17:42. Total running time: 10min 31s
+-------------------------------------------------+
| Trial _objective_4df1a_00002 result             |
+-------------------------------------------------+
| time_this_iter_s                        72.4331 |
| time_total_s                            140.016 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                               0.70523 |
| eval_runtime                             2.2295 |
| eval_samples_per_second                 135.903 |
| eval_steps_per_second                     8.522 |
| objective                               0.70523 |
+-------------------------------------------------+



[2m[36m(_objective pid=8881)[0m 
[2m[36m(_objective pid=8881)[0m 100%|██████████| 19/19 [00:02<00:00,  8.53it/s][A                                                 
[2m[36m(_objective pid=8881)[0m                                                [A
[2m[36m(_objective pid=8881)[0m  40%|████      | 342/855 [02:15<03:01,  2.82it/s]
[2m[36m(_objective pid=8881)[0m 100%|██████████| 19/19 [00:02<00:00,  8.53it/s][A
[2m[36m(_objective pid=8881)[0m                                                [A


Trial status: 2 TERMINATED | 1 RUNNING | 7 PENDING
Current time: 2023-09-21 17:17:43. Total running time: 10min 31s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00002   RUNNING          8.28892e-06                    5   24.4435                        16        2            140.016      0.705229      0.705229           2.2295             

 40%|████      | 343/855 [02:21<18:41,  2.19s/it]
 40%|████      | 344/855 [02:21<14:44,  1.73s/it]
 40%|████      | 345/855 [02:22<11:37,  1.37s/it]
 40%|████      | 346/855 [02:22<09:15,  1.09s/it]
 41%|████      | 347/855 [02:23<07:28,  1.13it/s]
 41%|████      | 348/855 [02:23<06:09,  1.37it/s]
 41%|████      | 349/855 [02:23<05:13,  1.62it/s]
 41%|████      | 350/855 [02:24<04:32,  1.85it/s]
 41%|████      | 351/855 [02:24<04:02,  2.08it/s]
 41%|████      | 352/855 [02:24<03:42,  2.26it/s]
 41%|████▏     | 353/855 [02:25<03:28,  2.41it/s]
 41%|████▏     | 354/855 [02:25<03:17,  2.54it/s]
 42%|████▏     | 355/855 [02:25<03:10,  2.63it/s]
 42%|████▏     | 356/855 [02:26<03:04,  2.70it/s]
 42%|████▏     | 357/855 [02:26<03:00,  2.76it/s]
 42%|████▏     | 358/855 [02:26<02:57,  2.80it/s]
 42%|████▏     | 359/855 [02:27<02:55,  2.82it/s]
 42%|████▏     | 360/855 [02:27<02:54,  2.84it/s]
 42%|████▏     | 361/855 [02:27<02:53,  2.85it/s]
 42%|████▏     | 362/855 [02:28<02:52,  2.86it/s]


Trial status: 2 TERMINATED | 1 RUNNING | 7 PENDING
Current time: 2023-09-21 17:18:13. Total running time: 11min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00002   RUNNING          8.28892e-06                    5   24.4435                        16        2            140.016      0.705229      0.705229           2.2295              

 48%|████▊     | 412/855 [02:45<02:36,  2.83it/s]
 48%|████▊     | 413/855 [02:46<02:34,  2.87it/s]
 48%|████▊     | 414/855 [02:46<02:35,  2.83it/s]
 49%|████▊     | 415/855 [02:46<02:35,  2.82it/s]
 49%|████▊     | 416/855 [02:47<02:34,  2.83it/s]
 49%|████▉     | 417/855 [02:47<02:34,  2.83it/s]
 49%|████▉     | 418/855 [02:47<02:35,  2.82it/s]
 49%|████▉     | 419/855 [02:48<02:35,  2.81it/s]
 49%|████▉     | 420/855 [02:48<02:34,  2.81it/s]
 49%|████▉     | 421/855 [02:48<02:34,  2.81it/s]
 49%|████▉     | 422/855 [02:49<02:34,  2.80it/s]
 49%|████▉     | 423/855 [02:49<02:33,  2.81it/s]
 50%|████▉     | 424/855 [02:50<02:33,  2.80it/s]
 50%|████▉     | 425/855 [02:50<02:33,  2.80it/s]
 50%|████▉     | 426/855 [02:50<02:32,  2.81it/s]
 50%|████▉     | 427/855 [02:51<02:32,  2.80it/s]
 50%|█████     | 428/855 [02:51<02:32,  2.80it/s]
 50%|█████     | 429/855 [02:51<02:31,  2.80it/s]
 50%|█████     | 430/855 [02:52<02:31,  2.80it/s]
 50%|█████     | 431/855 [02:52<02:31,  2.80it/s]


Trial status: 2 TERMINATED | 1 RUNNING | 7 PENDING
Current time: 2023-09-21 17:18:43. Total running time: 11min 31s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00002   RUNNING          8.28892e-06                    5   24.4435                        16        2            140.016      0.705229      0.705229           2.2295             

 58%|█████▊    | 497/855 [03:16<02:06,  2.83it/s]
 58%|█████▊    | 498/855 [03:16<02:06,  2.82it/s]
 58%|█████▊    | 499/855 [03:16<02:06,  2.82it/s]
 58%|█████▊    | 500/855 [03:17<02:05,  2.82it/s]
 59%|█████▊    | 501/855 [03:17<02:05,  2.83it/s]
 59%|█████▊    | 502/855 [03:17<02:05,  2.82it/s]
 59%|█████▉    | 503/855 [03:18<02:04,  2.83it/s]
 59%|█████▉    | 504/855 [03:18<02:04,  2.82it/s]
 59%|█████▉    | 505/855 [03:18<02:03,  2.82it/s]
 59%|█████▉    | 506/855 [03:19<02:03,  2.82it/s]
 59%|█████▉    | 507/855 [03:19<02:03,  2.83it/s]
 59%|█████▉    | 508/855 [03:19<02:02,  2.83it/s]
 60%|█████▉    | 509/855 [03:20<02:02,  2.83it/s]
 60%|█████▉    | 510/855 [03:20<02:01,  2.83it/s]
 60%|█████▉    | 511/855 [03:20<02:01,  2.83it/s]
 60%|█████▉    | 512/855 [03:21<02:01,  2.83it/s]
 60%|██████    | 513/855 [03:21<02:01,  2.83it/s]


[2m[36m(_objective pid=8881)[0m {'loss': 0.6943, 'learning_rate': 1.56216544919621e-07, 'epoch': 3.0}


[2m[36m(_objective pid=8881)[0m 
[2m[36m(_objective pid=8881)[0m   0%|          | 0/19 [00:00<?, ?it/s][A
[2m[36m(_objective pid=8881)[0m 
 11%|█         | 2/19 [00:00<00:01, 15.96it/s][A
[2m[36m(_objective pid=8881)[0m 
 21%|██        | 4/19 [00:00<00:01, 11.08it/s][A
[2m[36m(_objective pid=8881)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.62it/s][A
[2m[36m(_objective pid=8881)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  8.99it/s][A
[2m[36m(_objective pid=8881)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.97it/s][A
[2m[36m(_objective pid=8881)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.91it/s][A
[2m[36m(_objective pid=8881)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.83it/s][A
[2m[36m(_objective pid=8881)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.62it/s][A
[2m[36m(_objective pid=8881)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.62it/s][A
[2m[36m(_objective pid=8881)[0m 
 74%|███████▎  | 14/19 [00:01<00:00,  8.67it/s][A
[2m[36m(_objective 

Trial _objective_4df1a_00002 finished iteration 3 at 2023-09-21 17:18:51. Total running time: 11min 39s
+-------------------------------------------------+
| Trial _objective_4df1a_00002 result             |
+-------------------------------------------------+
| time_this_iter_s                        68.3668 |
| time_total_s                            208.382 |
| training_iteration                            3 |
| epoch                                         3 |
| eval_loss                               0.69505 |
| eval_runtime                             2.2426 |
| eval_samples_per_second                 135.109 |
| eval_steps_per_second                     8.472 |
| objective                               0.69505 |
+-------------------------------------------------+

[2m[36m(_objective pid=8881)[0m {'eval_loss': 0.69505375623703, 'eval_runtime': 2.2426, 'eval_samples_per_second': 135.109, 'eval_steps_per_second': 8.472, 'epoch': 3.0}


 60%|██████    | 514/855 [03:29<11:15,  1.98s/it]
 60%|██████    | 515/855 [03:29<08:55,  1.57s/it]
 60%|██████    | 516/855 [03:29<07:04,  1.25s/it]
 60%|██████    | 517/855 [03:30<05:41,  1.01s/it]
 61%|██████    | 518/855 [03:30<04:37,  1.21it/s]
 61%|██████    | 519/855 [03:30<03:50,  1.46it/s]
 61%|██████    | 520/855 [03:31<03:17,  1.69it/s]
 61%|██████    | 521/855 [03:31<02:53,  1.92it/s]
 61%|██████    | 522/855 [03:31<02:35,  2.14it/s]
 61%|██████    | 523/855 [03:32<02:23,  2.31it/s]
 61%|██████▏   | 524/855 [03:32<02:14,  2.46it/s]
 61%|██████▏   | 525/855 [03:32<02:08,  2.57it/s]
 62%|██████▏   | 526/855 [03:33<02:03,  2.65it/s]
 62%|██████▏   | 527/855 [03:33<02:00,  2.71it/s]
 62%|██████▏   | 528/855 [03:33<01:58,  2.76it/s]
 62%|██████▏   | 529/855 [03:34<01:57,  2.78it/s]
 62%|██████▏   | 530/855 [03:34<01:55,  2.81it/s]
 62%|██████▏   | 531/855 [03:35<01:54,  2.82it/s]
 62%|██████▏   | 532/855 [03:35<01:53,  2.84it/s]
 62%|██████▏   | 533/855 [03:35<01:53,  2.84it/s]


Trial status: 2 TERMINATED | 1 RUNNING | 7 PENDING
Current time: 2023-09-21 17:19:13. Total running time: 12min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00002   RUNNING          8.28892e-06                    5   24.4435                        16        3            208.382      0.695054      0.695054           2.2426              

 66%|██████▌   | 562/855 [03:45<01:43,  2.83it/s]
 66%|██████▌   | 563/855 [03:46<01:42,  2.84it/s]
 66%|██████▌   | 564/855 [03:46<01:42,  2.83it/s]
 66%|██████▌   | 565/855 [03:46<01:42,  2.84it/s]
 66%|██████▌   | 566/855 [03:47<01:41,  2.84it/s]
 66%|██████▋   | 567/855 [03:47<01:41,  2.84it/s]
 66%|██████▋   | 568/855 [03:48<01:41,  2.84it/s]
 67%|██████▋   | 569/855 [03:48<01:41,  2.83it/s]
 67%|██████▋   | 570/855 [03:48<01:40,  2.83it/s]
 67%|██████▋   | 571/855 [03:49<01:40,  2.83it/s]
 67%|██████▋   | 572/855 [03:49<01:39,  2.83it/s]
 67%|██████▋   | 573/855 [03:49<01:39,  2.83it/s]
 67%|██████▋   | 574/855 [03:50<01:39,  2.84it/s]
 67%|██████▋   | 575/855 [03:50<01:39,  2.83it/s]
 67%|██████▋   | 576/855 [03:50<01:38,  2.83it/s]
 67%|██████▋   | 577/855 [03:51<01:38,  2.83it/s]
 68%|██████▊   | 578/855 [03:51<01:37,  2.83it/s]
 68%|██████▊   | 579/855 [03:51<01:37,  2.82it/s]
 68%|██████▊   | 580/855 [03:52<01:37,  2.83it/s]
 68%|██████▊   | 581/855 [03:52<01:37,  2.82it/s]


Trial status: 2 TERMINATED | 1 RUNNING | 7 PENDING
Current time: 2023-09-21 17:19:43. Total running time: 12min 31s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00002   RUNNING          8.28892e-06                    5   24.4435                        16        3            208.382      0.695054      0.695054           2.2426             

 76%|███████▌  | 647/855 [04:16<01:13,  2.81it/s]
 76%|███████▌  | 648/855 [04:16<01:13,  2.81it/s]
 76%|███████▌  | 649/855 [04:16<01:13,  2.81it/s]
 76%|███████▌  | 650/855 [04:17<01:12,  2.82it/s]
 76%|███████▌  | 651/855 [04:17<01:12,  2.82it/s]
 76%|███████▋  | 652/855 [04:17<01:12,  2.82it/s]
 76%|███████▋  | 653/855 [04:18<01:11,  2.82it/s]
 76%|███████▋  | 654/855 [04:18<01:11,  2.82it/s]
 77%|███████▋  | 655/855 [04:18<01:10,  2.82it/s]
 77%|███████▋  | 656/855 [04:19<01:10,  2.82it/s]
 77%|███████▋  | 657/855 [04:19<01:09,  2.84it/s]
 77%|███████▋  | 658/855 [04:19<01:09,  2.83it/s]
 77%|███████▋  | 659/855 [04:20<01:09,  2.84it/s]
 77%|███████▋  | 660/855 [04:20<01:08,  2.83it/s]
 77%|███████▋  | 661/855 [04:21<01:08,  2.83it/s]
 77%|███████▋  | 662/855 [04:21<01:08,  2.83it/s]
 78%|███████▊  | 663/855 [04:21<01:07,  2.84it/s]
 78%|███████▊  | 664/855 [04:22<01:07,  2.84it/s]
 78%|███████▊  | 665/855 [04:22<01:06,  2.85it/s]
 78%|███████▊  | 666/855 [04:22<01:06,  2.85it/s]


[2m[36m(_objective pid=8881)[0m {'loss': 0.6859, 'learning_rate': 2.0828872655949465e-07, 'epoch': 4.0}


[2m[36m(_objective pid=8881)[0m                                                   80%|████████  | 684/855 [04:28<01:00,  2.84it/s]
[2m[36m(_objective pid=8881)[0m   0%|          | 0/19 [00:00<?, ?it/s][A
[2m[36m(_objective pid=8881)[0m 
 11%|█         | 2/19 [00:00<00:00, 17.58it/s][A
[2m[36m(_objective pid=8881)[0m 
 21%|██        | 4/19 [00:00<00:01, 10.65it/s][A
[2m[36m(_objective pid=8881)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.48it/s][A
[2m[36m(_objective pid=8881)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  9.05it/s][A
[2m[36m(_objective pid=8881)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.75it/s][A
[2m[36m(_objective pid=8881)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.95it/s][A
[2m[36m(_objective pid=8881)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.92it/s][A
[2m[36m(_objective pid=8881)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.77it/s][A
[2m[36m(_objective pid=8881)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.64it/s][A
[2m[3

Trial _objective_4df1a_00002 finished iteration 4 at 2023-09-21 17:19:58. Total running time: 12min 46s
+-------------------------------------------------+
| Trial _objective_4df1a_00002 result             |
+-------------------------------------------------+
| time_this_iter_s                        67.4102 |
| time_total_s                            275.793 |
| training_iteration                            4 |
| epoch                                         4 |
| eval_loss                               0.67723 |
| eval_runtime                             2.2227 |
| eval_samples_per_second                 136.319 |
| eval_steps_per_second                     8.548 |
| objective                               0.67723 |
+-------------------------------------------------+

[2m[36m(_objective pid=8881)[0m {'eval_loss': 0.6772260665893555, 'eval_runtime': 2.2227, 'eval_samples_per_second': 136.319, 'eval_steps_per_second': 8.548, 'epoch': 4.0}


 80%|████████  | 685/855 [04:36<05:37,  1.98s/it]
 80%|████████  | 686/855 [04:36<04:26,  1.58s/it]
 80%|████████  | 687/855 [04:37<03:30,  1.25s/it]
 80%|████████  | 688/855 [04:37<02:48,  1.01s/it]
 81%|████████  | 689/855 [04:37<02:16,  1.21it/s]
 81%|████████  | 690/855 [04:38<01:53,  1.46it/s]
 81%|████████  | 691/855 [04:38<01:36,  1.69it/s]
 81%|████████  | 692/855 [04:38<01:24,  1.92it/s]
 81%|████████  | 693/855 [04:39<01:15,  2.14it/s]
 81%|████████  | 694/855 [04:39<01:09,  2.31it/s]
 81%|████████▏ | 695/855 [04:40<01:05,  2.45it/s]
 81%|████████▏ | 696/855 [04:40<01:01,  2.57it/s]
 82%|████████▏ | 697/855 [04:40<00:59,  2.64it/s]
 82%|████████▏ | 698/855 [04:41<00:57,  2.71it/s]
 82%|████████▏ | 699/855 [04:41<00:56,  2.76it/s]
 82%|████████▏ | 700/855 [04:41<00:55,  2.78it/s]
 82%|████████▏ | 701/855 [04:42<00:54,  2.81it/s]
 82%|████████▏ | 702/855 [04:42<00:54,  2.83it/s]
 82%|████████▏ | 703/855 [04:42<00:53,  2.84it/s]
 82%|████████▏ | 704/855 [04:43<00:53,  2.84it/s]


Trial status: 2 TERMINATED | 1 RUNNING | 7 PENDING
Current time: 2023-09-21 17:20:13. Total running time: 13min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00002   RUNNING          8.28892e-06                    5   24.4435                        16        4            275.793      0.677226      0.677226           2.2227              

 83%|████████▎ | 712/855 [04:45<00:50,  2.85it/s]
 83%|████████▎ | 713/855 [04:46<00:49,  2.86it/s]
 84%|████████▎ | 714/855 [04:46<00:49,  2.87it/s]
 84%|████████▎ | 715/855 [04:47<00:48,  2.87it/s]
 84%|████████▎ | 716/855 [04:47<00:49,  2.82it/s]
 84%|████████▍ | 717/855 [04:47<00:48,  2.85it/s]
 84%|████████▍ | 718/855 [04:48<00:48,  2.83it/s]
 84%|████████▍ | 719/855 [04:48<00:48,  2.83it/s]
 84%|████████▍ | 720/855 [04:48<00:47,  2.83it/s]
 84%|████████▍ | 721/855 [04:49<00:47,  2.81it/s]
 84%|████████▍ | 722/855 [04:49<00:47,  2.81it/s]
 85%|████████▍ | 723/855 [04:49<00:46,  2.81it/s]
 85%|████████▍ | 724/855 [04:50<00:46,  2.82it/s]
 85%|████████▍ | 725/855 [04:50<00:46,  2.83it/s]
 85%|████████▍ | 726/855 [04:50<00:45,  2.83it/s]
 85%|████████▌ | 727/855 [04:51<00:45,  2.83it/s]
 85%|████████▌ | 728/855 [04:51<00:44,  2.82it/s]
 85%|████████▌ | 729/855 [04:51<00:44,  2.83it/s]
 85%|████████▌ | 730/855 [04:52<00:44,  2.81it/s]
 85%|████████▌ | 731/855 [04:52<00:44,  2.81it/s]


Trial status: 2 TERMINATED | 1 RUNNING | 7 PENDING
Current time: 2023-09-21 17:20:43. Total running time: 13min 31s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00002   RUNNING          8.28892e-06                    5   24.4435                        16        4            275.793      0.677226      0.677226           2.2227             

 93%|█████████▎| 797/855 [05:16<00:20,  2.82it/s]
 93%|█████████▎| 798/855 [05:16<00:20,  2.82it/s]
 93%|█████████▎| 799/855 [05:16<00:19,  2.82it/s]
 94%|█████████▎| 800/855 [05:17<00:19,  2.83it/s]
 94%|█████████▎| 801/855 [05:17<00:19,  2.83it/s]
 94%|█████████▍| 802/855 [05:17<00:18,  2.82it/s]
 94%|█████████▍| 803/855 [05:18<00:18,  2.82it/s]
 94%|█████████▍| 804/855 [05:18<00:18,  2.83it/s]
 94%|█████████▍| 805/855 [05:18<00:17,  2.82it/s]
 94%|█████████▍| 806/855 [05:19<00:17,  2.82it/s]
 94%|█████████▍| 807/855 [05:19<00:16,  2.83it/s]
 95%|█████████▍| 808/855 [05:20<00:16,  2.83it/s]
 95%|█████████▍| 809/855 [05:20<00:16,  2.82it/s]
 95%|█████████▍| 810/855 [05:20<00:15,  2.83it/s]
 95%|█████████▍| 811/855 [05:21<00:15,  2.82it/s]
 95%|█████████▍| 812/855 [05:21<00:15,  2.81it/s]
 95%|█████████▌| 813/855 [05:21<00:14,  2.82it/s]
 95%|█████████▌| 814/855 [05:22<00:14,  2.82it/s]
 95%|█████████▌| 815/855 [05:22<00:14,  2.82it/s]
 95%|█████████▌| 816/855 [05:22<00:13,  2.82it/s]


[2m[36m(_objective pid=8881)[0m {'loss': 0.6692, 'learning_rate': 2.603609081993683e-07, 'epoch': 5.0}


[2m[36m(_objective pid=8881)[0m                                                  100%|██████████| 855/855 [05:36<00:00,  2.84it/s]
[2m[36m(_objective pid=8881)[0m   0%|          | 0/19 [00:00<?, ?it/s][A
[2m[36m(_objective pid=8881)[0m 
 11%|█         | 2/19 [00:00<00:00, 18.60it/s][A
[2m[36m(_objective pid=8881)[0m 
 21%|██        | 4/19 [00:00<00:01, 10.71it/s][A
[2m[36m(_objective pid=8881)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.68it/s][A
[2m[36m(_objective pid=8881)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  9.05it/s][A
[2m[36m(_objective pid=8881)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.88it/s][A
[2m[36m(_objective pid=8881)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.87it/s][A
[2m[36m(_objective pid=8881)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.84it/s][A
[2m[36m(_objective pid=8881)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.77it/s][A
[2m[36m(_objective pid=8881)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.54it/s][A
[2m[3

Trial _objective_4df1a_00002 finished iteration 5 at 2023-09-21 17:21:06. Total running time: 13min 54s
+-------------------------------------------------+
| Trial _objective_4df1a_00002 result             |
+-------------------------------------------------+
| time_this_iter_s                        67.5369 |
| time_total_s                            343.329 |
| training_iteration                            5 |
| epoch                                         5 |
| eval_loss                               0.65817 |
| eval_runtime                               2.22 |
| eval_samples_per_second                 136.484 |
| eval_steps_per_second                     8.558 |
| objective                               0.65817 |
+-------------------------------------------------+

[2m[36m(_objective pid=8881)[0m {'eval_loss': 0.6581705212593079, 'eval_runtime': 2.22, 'eval_samples_per_second': 136.484, 'eval_steps_per_second': 8.558, 'epoch': 5.0}
Trial _objective_4df1a_00002 completed after 5

[2m[36m(_objective pid=8881)[0m                                                  100%|██████████| 855/855 [05:44<00:00,  2.84it/s]100%|██████████| 855/855 [05:44<00:00,  2.49it/s]


Trial status: 3 TERMINATED | 7 PENDING
Current time: 2023-09-21 17:21:13. Total running time: 14min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00000   TERMINATED       5.61152e-06                    5    8.15396                       64        5            302.361      0.698649      0.698649           2.2379                  135.396 



Trial _objective_4df1a_00003 started with configuration:
+-------------------------------------------------+
| Trial _objective_4df1a_00003 config             |
+-------------------------------------------------+
| learning_rate                                 0 |
| num_train_epochs                              2 |
| per_device_train_batch_size                   8 |
| seed                                    29.1579 |
+-------------------------------------------------+



[2m[36m(_objective pid=10363)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
[2m[36m(_objective pid=10363)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/682 [00:00<?, ?it/s]
  0%|          | 1/682 [00:00<04:57,  2.29it/s]
  0%|          | 2/682 [00:00<03:10,  3.58it/s]
  0%|          | 3/682 [00:00<02:45,  4.10it/s]
  1%|          | 4/682 [00:01<02:35,  4.37it/s]
  1%|          | 5/682 [00:01<02:31,  4.48it/s]
  1%|          | 6/682 [00:01<02:25,  4.64it/s]
  1%|          | 7/682 [00:01<02:24,  4.68it/s]
  1%|          | 8/682 [00:01<02:20,  4.79it/s]
  1%|▏         | 9/682 [00:02<02:17,  4.88it/s]
  1%|▏         | 10/682 [00:02<02:16,  4.91it/s]
  2%|▏         | 11/682 [00:02<02:15,  4.94it/s]
  2%|▏         | 12/682 [00:02<02:15,  4.96it/s]
  2%|▏         |

Trial status: 3 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-21 17:21:43. Total running time: 14min 31s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00003   RUNNING          1.09943e-06                    2   29.158                          8                                                                                      

 11%|█▏        | 77/682 [00:15<02:04,  4.84it/s]
 11%|█▏        | 78/682 [00:15<02:04,  4.85it/s]
 12%|█▏        | 79/682 [00:16<02:04,  4.86it/s]
 12%|█▏        | 80/682 [00:16<02:04,  4.85it/s]
 12%|█▏        | 81/682 [00:16<02:03,  4.87it/s]
 12%|█▏        | 82/682 [00:16<02:03,  4.86it/s]
 12%|█▏        | 83/682 [00:17<02:03,  4.84it/s]
 12%|█▏        | 84/682 [00:17<02:04,  4.82it/s]
 12%|█▏        | 85/682 [00:17<02:04,  4.81it/s]
 13%|█▎        | 86/682 [00:17<02:03,  4.83it/s]
 13%|█▎        | 87/682 [00:17<02:02,  4.86it/s]
 13%|█▎        | 88/682 [00:18<02:02,  4.86it/s]
 13%|█▎        | 89/682 [00:18<02:02,  4.84it/s]
 13%|█▎        | 90/682 [00:18<02:03,  4.81it/s]
 13%|█▎        | 91/682 [00:18<02:02,  4.83it/s]
 13%|█▎        | 92/682 [00:18<02:00,  4.89it/s]
 14%|█▎        | 93/682 [00:19<02:00,  4.89it/s]
 14%|█▍        | 94/682 [00:19<01:59,  4.92it/s]
 14%|█▍        | 95/682 [00:19<01:59,  4.91it/s]
 14%|█▍        | 96/682 [00:19<01:59,  4.89it/s]
 14%|█▍        | 97/

Trial status: 3 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-21 17:22:13. Total running time: 15min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00003   RUNNING          1.09943e-06                    2   29.158                          8                                                                                       

 33%|███▎      | 223/682 [00:46<01:36,  4.78it/s]
 33%|███▎      | 224/682 [00:46<01:35,  4.78it/s]
 33%|███▎      | 225/682 [00:46<01:35,  4.79it/s]
 33%|███▎      | 226/682 [00:46<01:35,  4.78it/s]
 33%|███▎      | 227/682 [00:46<01:35,  4.77it/s]
 33%|███▎      | 228/682 [00:47<01:34,  4.78it/s]
 34%|███▎      | 229/682 [00:47<01:34,  4.78it/s]
 34%|███▎      | 230/682 [00:47<01:34,  4.77it/s]
 34%|███▍      | 231/682 [00:47<01:34,  4.77it/s]
 34%|███▍      | 232/682 [00:47<01:34,  4.77it/s]
 34%|███▍      | 233/682 [00:48<01:34,  4.75it/s]
 34%|███▍      | 234/682 [00:48<01:34,  4.76it/s]
 34%|███▍      | 235/682 [00:48<01:33,  4.78it/s]
 35%|███▍      | 236/682 [00:48<01:33,  4.79it/s]
 35%|███▍      | 237/682 [00:48<01:34,  4.73it/s]
 35%|███▍      | 238/682 [00:49<01:34,  4.72it/s]
 35%|███▌      | 239/682 [00:49<01:32,  4.78it/s]
 35%|███▌      | 240/682 [00:49<01:31,  4.81it/s]
 35%|███▌      | 241/682 [00:49<01:31,  4.81it/s]
 35%|███▌      | 242/682 [00:49<01:31,  4.82it/s]


[2m[36m(_objective pid=10363)[0m {'loss': 0.7108, 'learning_rate': 1.3773212457734315e-08, 'epoch': 1.0}


[2m[36m(_objective pid=10363)[0m 
 11%|█         | 2/19 [00:00<00:00, 18.22it/s][A
[2m[36m(_objective pid=10363)[0m 
 21%|██        | 4/19 [00:00<00:01, 10.87it/s][A
[2m[36m(_objective pid=10363)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.58it/s][A
[2m[36m(_objective pid=10363)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  9.01it/s][A
[2m[36m(_objective pid=10363)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.82it/s][A
[2m[36m(_objective pid=10363)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.92it/s][A
[2m[36m(_objective pid=10363)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.86it/s][A
[2m[36m(_objective pid=10363)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.85it/s][A
[2m[36m(_objective pid=10363)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.77it/s][A
[2m[36m(_objective pid=10363)[0m 
 74%|███████▎  | 14/19 [00:01<00:00,  8.71it/s][A
[2m[36m(_objective pid=10363)[0m 
 79%|███████▉  | 15/19 [00:01<00:00,  8.64it/s][A
[2m[36m(_objective pid=10363)[0m 


Trial _objective_4df1a_00003 finished iteration 1 at 2023-09-21 17:22:40. Total running time: 15min 28s
+-------------------------------------------------+
| Trial _objective_4df1a_00003 result             |
+-------------------------------------------------+
| time_this_iter_s                         78.773 |
| time_total_s                             78.773 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.70745 |
| eval_runtime                              2.216 |
| eval_samples_per_second                 136.732 |
| eval_steps_per_second                     8.574 |
| objective                               0.70745 |
+-------------------------------------------------+

[2m[36m(_objective pid=10363)[0m {'eval_loss': 0.7074495553970337, 'eval_runtime': 2.216, 'eval_samples_per_second': 136.732, 'eval_steps_per_second': 8.574, 'epoch': 1.0}
Trial status: 3 TERMINATED | 1 RUNNING | 6 P

 50%|█████     | 342/682 [01:17<13:14,  2.34s/it]
 50%|█████     | 343/682 [01:17<09:35,  1.70s/it]
 50%|█████     | 344/682 [01:17<07:01,  1.25s/it]
 51%|█████     | 345/682 [01:18<05:14,  1.07it/s]
 51%|█████     | 346/682 [01:18<03:59,  1.40it/s]
 51%|█████     | 347/682 [01:18<03:06,  1.79it/s]
 51%|█████     | 348/682 [01:18<02:30,  2.22it/s]
 51%|█████     | 349/682 [01:18<02:05,  2.66it/s]
 51%|█████▏    | 350/682 [01:19<01:47,  3.09it/s]
 51%|█████▏    | 351/682 [01:19<01:34,  3.49it/s]
 52%|█████▏    | 352/682 [01:19<01:25,  3.85it/s]
 52%|█████▏    | 353/682 [01:19<01:20,  4.08it/s]
 52%|█████▏    | 354/682 [01:20<01:16,  4.29it/s]
 52%|█████▏    | 355/682 [01:20<01:13,  4.46it/s]
 52%|█████▏    | 356/682 [01:20<01:10,  4.59it/s]
 52%|█████▏    | 357/682 [01:20<01:09,  4.68it/s]
 52%|█████▏    | 358/682 [01:20<01:08,  4.72it/s]
 53%|█████▎    | 359/682 [01:21<01:08,  4.75it/s]
 53%|█████▎    | 360/682 [01:21<01:07,  4.78it/s]
 53%|█████▎    | 361/682 [01:21<01:07,  4.77it/s]


Trial status: 3 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-21 17:23:13. Total running time: 16min 1s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00003   RUNNING          1.09943e-06                    2   29.158                          8        1             78.773      0.70745       0.70745            2.216               

 71%|███████   | 481/682 [01:45<00:41,  4.87it/s]
 71%|███████   | 482/682 [01:46<00:41,  4.84it/s]
 71%|███████   | 483/682 [01:46<00:40,  4.88it/s]
 71%|███████   | 484/682 [01:46<00:40,  4.87it/s]
 71%|███████   | 485/682 [01:46<00:40,  4.88it/s]
 71%|███████▏  | 486/682 [01:47<00:40,  4.88it/s]
 71%|███████▏  | 487/682 [01:47<00:40,  4.87it/s]
 72%|███████▏  | 488/682 [01:47<00:39,  4.86it/s]
 72%|███████▏  | 489/682 [01:47<00:39,  4.88it/s]
 72%|███████▏  | 490/682 [01:47<00:39,  4.88it/s]
 72%|███████▏  | 491/682 [01:48<00:39,  4.88it/s]
 72%|███████▏  | 492/682 [01:48<00:38,  4.90it/s]
 72%|███████▏  | 493/682 [01:48<00:38,  4.88it/s]
 72%|███████▏  | 494/682 [01:48<00:38,  4.88it/s]
 73%|███████▎  | 495/682 [01:48<00:38,  4.87it/s]
 73%|███████▎  | 496/682 [01:49<00:38,  4.86it/s]
 73%|███████▎  | 497/682 [01:49<00:37,  4.87it/s]
 73%|███████▎  | 498/682 [01:49<00:37,  4.86it/s]
 73%|███████▎  | 499/682 [01:49<00:37,  4.85it/s]
 73%|███████▎  | 500/682 [01:49<00:37,  4.86it/s]


Trial status: 3 TERMINATED | 1 RUNNING | 6 PENDING
Current time: 2023-09-21 17:23:43. Total running time: 16min 32s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00003   RUNNING          1.09943e-06                    2   29.158                          8        1             78.773      0.70745       0.70745            2.216              

 92%|█████████▏| 627/682 [02:16<00:11,  4.82it/s]
 92%|█████████▏| 628/682 [02:16<00:11,  4.84it/s]
 92%|█████████▏| 629/682 [02:16<00:11,  4.77it/s]
 92%|█████████▏| 630/682 [02:16<00:10,  4.83it/s]
 93%|█████████▎| 631/682 [02:16<00:10,  4.85it/s]
 93%|█████████▎| 632/682 [02:17<00:10,  4.86it/s]
 93%|█████████▎| 633/682 [02:17<00:10,  4.87it/s]
 93%|█████████▎| 634/682 [02:17<00:09,  4.86it/s]
 93%|█████████▎| 635/682 [02:17<00:09,  4.90it/s]
 93%|█████████▎| 636/682 [02:17<00:09,  4.91it/s]
 93%|█████████▎| 637/682 [02:18<00:09,  4.90it/s]
 94%|█████████▎| 638/682 [02:18<00:08,  4.89it/s]
 94%|█████████▎| 639/682 [02:18<00:08,  4.89it/s]
 94%|█████████▍| 640/682 [02:18<00:08,  4.89it/s]
 94%|█████████▍| 641/682 [02:18<00:08,  4.89it/s]
 94%|█████████▍| 642/682 [02:19<00:08,  4.89it/s]
 94%|█████████▍| 643/682 [02:19<00:07,  4.90it/s]
 94%|█████████▍| 644/682 [02:19<00:07,  4.90it/s]
 95%|█████████▍| 645/682 [02:19<00:07,  4.91it/s]
 95%|█████████▍| 646/682 [02:19<00:07,  4.92it/s]


[2m[36m(_objective pid=10363)[0m {'loss': 0.7087, 'learning_rate': 2.754642491546863e-08, 'epoch': 2.0}


[2m[36m(_objective pid=10363)[0m 
[2m[36m(_objective pid=10363)[0m   0%|          | 0/19 [00:00<?, ?it/s][A
[2m[36m(_objective pid=10363)[0m 
 11%|█         | 2/19 [00:00<00:00, 18.10it/s][A
[2m[36m(_objective pid=10363)[0m 
 21%|██        | 4/19 [00:00<00:01, 11.09it/s][A
[2m[36m(_objective pid=10363)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.52it/s][A
[2m[36m(_objective pid=10363)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  8.81it/s][A
[2m[36m(_objective pid=10363)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.93it/s][A
[2m[36m(_objective pid=10363)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.93it/s][A
[2m[36m(_objective pid=10363)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.77it/s][A
[2m[36m(_objective pid=10363)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.81it/s][A
[2m[36m(_objective pid=10363)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.50it/s][A
[2m[36m(_objective pid=10363)[0m 
 74%|███████▎  | 14/19 [00:01<00:00,  8.60it/s][A
[2m[36m

Trial _objective_4df1a_00003 finished iteration 2 at 2023-09-21 17:23:57. Total running time: 16min 45s
+-------------------------------------------------+
| Trial _objective_4df1a_00003 result             |
+-------------------------------------------------+
| time_this_iter_s                        76.9925 |
| time_total_s                            155.765 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                               0.70424 |
| eval_runtime                               2.23 |
| eval_samples_per_second                 135.877 |
| eval_steps_per_second                      8.52 |
| objective                               0.70424 |
+-------------------------------------------------+

[2m[36m(_objective pid=10363)[0m {'eval_loss': 0.7042413353919983, 'eval_runtime': 2.23, 'eval_samples_per_second': 135.877, 'eval_steps_per_second': 8.52, 'epoch': 2.0}
Trial _objective_4df1a_00003 completed after 2

[2m[36m(_objective pid=10363)[0m                                                  100%|██████████| 682/682 [02:38<00:00,  4.89it/s]100%|██████████| 682/682 [02:38<00:00,  4.29it/s]


Trial status: 4 TERMINATED | 6 PENDING
Current time: 2023-09-21 17:24:13. Total running time: 17min 2s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00000   TERMINATED       5.61152e-06                    5    8.15396                       64        5            302.361      0.698649      0.698649           2.2379                  135.396 



Trial _objective_4df1a_00004 started with configuration:
+-------------------------------------------------+
| Trial _objective_4df1a_00004 config             |
+-------------------------------------------------+
| learning_rate                                 0 |
| num_train_epochs                              5 |
| per_device_train_batch_size                   8 |
| seed                                    25.0818 |
+-------------------------------------------------+



[2m[36m(_objective pid=11102)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
[2m[36m(_objective pid=11102)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/1705 [00:00<?, ?it/s]
  0%|          | 1/1705 [00:00<11:11,  2.54it/s]
  0%|          | 2/1705 [00:00<07:21,  3.86it/s]
  0%|          | 3/1705 [00:00<06:35,  4.30it/s]
  0%|          | 4/1705 [00:00<06:09,  4.60it/s]
  0%|          | 5/1705 [00:01<05:58,  4.74it/s]
  0%|          | 6/1705 [00:01<05:51,  4.84it/s]
  0%|          | 7/1705 [00:01<05:47,  4.89it/s]
  0%|          | 8/1705 [00:01<05:43,  4.94it/s]
  1%|          | 9/1705 [00:01<05:40,  4.98it/s]
  1%|          | 10/1705 [00:02<05:37,  5.02it/s]
  1%|          | 11/1705 [00:02<05:37,  5.02it/s]
  1%|          | 12/1705 [00:02<05:36,  5.04it/s]
  1

Trial status: 4 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-21 17:24:43. Total running time: 17min 32s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00004   RUNNING          2.3102e-06                     5   25.0818                         8                                                                                      

  6%|▌         | 105/1705 [00:21<05:24,  4.93it/s]
  6%|▌         | 106/1705 [00:21<05:24,  4.92it/s]
  6%|▋         | 107/1705 [00:21<05:25,  4.91it/s]
  6%|▋         | 108/1705 [00:22<05:25,  4.91it/s]
  6%|▋         | 109/1705 [00:22<05:24,  4.91it/s]
  6%|▋         | 110/1705 [00:22<05:24,  4.92it/s]
  7%|▋         | 111/1705 [00:22<05:24,  4.92it/s]
  7%|▋         | 112/1705 [00:22<05:24,  4.91it/s]
  7%|▋         | 113/1705 [00:23<05:28,  4.85it/s]
  7%|▋         | 114/1705 [00:23<05:28,  4.84it/s]
  7%|▋         | 115/1705 [00:23<05:28,  4.85it/s]
  7%|▋         | 116/1705 [00:23<05:28,  4.83it/s]
  7%|▋         | 117/1705 [00:23<05:28,  4.83it/s]
  7%|▋         | 118/1705 [00:24<05:30,  4.80it/s]
  7%|▋         | 119/1705 [00:24<05:31,  4.79it/s]
  7%|▋         | 120/1705 [00:24<05:31,  4.78it/s]
  7%|▋         | 121/1705 [00:24<05:30,  4.79it/s]
  7%|▋         | 122/1705 [00:24<05:29,  4.80it/s]
  7%|▋         | 123/1705 [00:25<05:27,  4.83it/s]
  7%|▋         | 124/1705 [00:2

Trial status: 4 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-21 17:25:13. Total running time: 18min 2s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00004   RUNNING          2.3102e-06                     5   25.0818                         8                                                                                       

[2m[36m(_objective pid=11102)[0m  15%|█▍        | 249/1705 [00:51<04:59,  4.86it/s]
 15%|█▍        | 250/1705 [00:51<05:00,  4.85it/s]
 15%|█▍        | 251/1705 [00:51<04:59,  4.86it/s]
 15%|█▍        | 252/1705 [00:52<04:58,  4.86it/s]
 15%|█▍        | 253/1705 [00:52<04:57,  4.88it/s]
 15%|█▍        | 254/1705 [00:52<04:58,  4.86it/s]
 15%|█▍        | 255/1705 [00:52<04:58,  4.85it/s]
 15%|█▌        | 256/1705 [00:52<04:59,  4.83it/s]
 15%|█▌        | 257/1705 [00:53<04:58,  4.85it/s]
 15%|█▌        | 258/1705 [00:53<04:57,  4.87it/s]
 15%|█▌        | 259/1705 [00:53<04:57,  4.86it/s]
 15%|█▌        | 260/1705 [00:53<04:57,  4.87it/s]
 15%|█▌        | 261/1705 [00:53<04:57,  4.86it/s]
 15%|█▌        | 262/1705 [00:54<04:57,  4.85it/s]
 15%|█▌        | 263/1705 [00:54<04:56,  4.86it/s]
 15%|█▌        | 264/1705 [00:54<04:56,  4.86it/s]
 16%|█▌        | 265/1705 [00:54<04:55,  4.87it/s]
 16%|█▌        | 266/1705 [00:54<04:54,  4.88it/s]
 16%|█▌        | 267/1705 [00:55<04:53,  4.90

[2m[36m(_objective pid=11102)[0m {'loss': 0.7124, 'learning_rate': 2.894117721363871e-08, 'epoch': 1.0}


[2m[36m(_objective pid=11102)[0m 
 11%|█         | 2/19 [00:00<00:00, 18.17it/s][A
[2m[36m(_objective pid=11102)[0m 
 21%|██        | 4/19 [00:00<00:01, 10.97it/s][A
[2m[36m(_objective pid=11102)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.65it/s][A
[2m[36m(_objective pid=11102)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  9.05it/s][A
[2m[36m(_objective pid=11102)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.85it/s][A
[2m[36m(_objective pid=11102)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.70it/s][A
[2m[36m(_objective pid=11102)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.90it/s][A
[2m[36m(_objective pid=11102)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.95it/s][A
[2m[36m(_objective pid=11102)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.85it/s][A
[2m[36m(_objective pid=11102)[0m 
 74%|███████▎  | 14/19 [00:01<00:00,  8.71it/s][A
[2m[36m(_objective pid=11102)[0m 
 79%|███████▉  | 15/19 [00:01<00:00,  8.64it/s][A
[2m[36m(_objective pid=11102)[0m 


Trial _objective_4df1a_00004 finished iteration 1 at 2023-09-21 17:25:34. Total running time: 18min 23s
+-------------------------------------------------+
| Trial _objective_4df1a_00004 result             |
+-------------------------------------------------+
| time_this_iter_s                        77.8236 |
| time_total_s                            77.8236 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.71056 |
| eval_runtime                             2.2148 |
| eval_samples_per_second                 136.805 |
| eval_steps_per_second                     8.579 |
| objective                               0.71056 |
+-------------------------------------------------+

[2m[36m(_objective pid=11102)[0m {'eval_loss': 0.7105593085289001, 'eval_runtime': 2.2148, 'eval_samples_per_second': 136.805, 'eval_steps_per_second': 8.579, 'epoch': 1.0}


[2m[36m(_objective pid=11102)[0m                                                   
[2m[36m(_objective pid=11102)[0m                                                [A 20%|██        | 341/1705 [01:12<03:58,  5.73it/s]
[2m[36m(_objective pid=11102)[0m 100%|██████████| 19/19 [00:02<00:00,  8.61it/s][A
                                               [A
 20%|██        | 342/1705 [01:17<53:41,  2.36s/it]
 20%|██        | 343/1705 [01:17<38:55,  1.71s/it]
 20%|██        | 344/1705 [01:18<28:35,  1.26s/it]
 20%|██        | 345/1705 [01:18<21:22,  1.06it/s]
 20%|██        | 346/1705 [01:18<16:18,  1.39it/s]
 20%|██        | 347/1705 [01:18<12:46,  1.77it/s]
 20%|██        | 348/1705 [01:18<10:18,  2.19it/s]
 20%|██        | 349/1705 [01:19<08:35,  2.63it/s]
 21%|██        | 350/1705 [01:19<07:22,  3.06it/s]
 21%|██        | 351/1705 [01:19<06:30,  3.47it/s]
 21%|██        | 352/1705 [01:19<05:56,  3.79it/s]
 21%|██        | 353/1705 [01:19<05:30,  4.09it/s]
 21%|██        | 354/

Trial status: 4 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-21 17:25:43. Total running time: 18min 32s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00004   RUNNING          2.3102e-06                     5   25.0818                         8        1            77.8236      0.710559      0.710559           2.2148             

 21%|██        | 361/1705 [01:21<04:34,  4.90it/s]
 21%|██        | 362/1705 [01:21<04:32,  4.92it/s]
 21%|██▏       | 363/1705 [01:21<04:30,  4.96it/s]
 21%|██▏       | 364/1705 [01:22<04:30,  4.97it/s]
 21%|██▏       | 365/1705 [01:22<04:29,  4.97it/s]
 21%|██▏       | 366/1705 [01:22<04:29,  4.96it/s]
 22%|██▏       | 367/1705 [01:22<04:28,  4.99it/s]
 22%|██▏       | 368/1705 [01:22<04:27,  5.01it/s]
 22%|██▏       | 369/1705 [01:23<04:27,  4.99it/s]
 22%|██▏       | 370/1705 [01:23<04:27,  4.99it/s]
 22%|██▏       | 371/1705 [01:23<04:28,  4.97it/s]
 22%|██▏       | 372/1705 [01:23<04:28,  4.97it/s]
 22%|██▏       | 373/1705 [01:23<04:28,  4.96it/s]
 22%|██▏       | 374/1705 [01:24<04:28,  4.96it/s]
 22%|██▏       | 375/1705 [01:24<04:29,  4.94it/s]
 22%|██▏       | 376/1705 [01:24<04:28,  4.95it/s]
 22%|██▏       | 377/1705 [01:24<04:27,  4.96it/s]
 22%|██▏       | 378/1705 [01:24<04:26,  4.98it/s]
 22%|██▏       | 379/1705 [01:25<04:25,  4.99it/s]
 22%|██▏       | 380/1705 [01:2

Trial status: 4 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-21 17:26:13. Total running time: 19min 2s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00004   RUNNING          2.3102e-06                     5   25.0818                         8        1            77.8236      0.710559      0.710559           2.2148              

 30%|██▉       | 508/1705 [01:51<04:08,  4.82it/s]
 30%|██▉       | 509/1705 [01:51<04:08,  4.81it/s]
 30%|██▉       | 510/1705 [01:51<04:07,  4.82it/s]
 30%|██▉       | 511/1705 [01:52<04:08,  4.80it/s]
 30%|███       | 512/1705 [01:52<04:10,  4.75it/s]
 30%|███       | 513/1705 [01:52<04:07,  4.81it/s]
 30%|███       | 514/1705 [01:52<04:07,  4.81it/s]
 30%|███       | 515/1705 [01:53<04:07,  4.82it/s]
 30%|███       | 516/1705 [01:53<04:07,  4.81it/s]
 30%|███       | 517/1705 [01:53<04:07,  4.79it/s]
 30%|███       | 518/1705 [01:53<04:08,  4.78it/s]
 30%|███       | 519/1705 [01:53<04:07,  4.79it/s]
 30%|███       | 520/1705 [01:54<04:09,  4.76it/s]
 31%|███       | 521/1705 [01:54<04:08,  4.77it/s]
 31%|███       | 522/1705 [01:54<04:08,  4.76it/s]
 31%|███       | 523/1705 [01:54<04:09,  4.73it/s]
 31%|███       | 524/1705 [01:54<04:09,  4.73it/s]
 31%|███       | 525/1705 [01:55<04:05,  4.80it/s]
 31%|███       | 526/1705 [01:55<04:04,  4.82it/s]
 31%|███       | 527/1705 [01:5

Trial status: 4 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-21 17:26:43. Total running time: 19min 32s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00004   RUNNING          2.3102e-06                     5   25.0818                         8        1            77.8236      0.710559      0.710559           2.2148             

 38%|███▊      | 654/1705 [02:21<03:34,  4.90it/s]
 38%|███▊      | 655/1705 [02:21<03:34,  4.90it/s]
 38%|███▊      | 656/1705 [02:22<03:33,  4.91it/s]
 39%|███▊      | 657/1705 [02:22<03:34,  4.88it/s]
 39%|███▊      | 658/1705 [02:22<03:34,  4.87it/s]
 39%|███▊      | 659/1705 [02:22<03:35,  4.86it/s]
 39%|███▊      | 660/1705 [02:22<03:34,  4.88it/s]
 39%|███▉      | 661/1705 [02:23<03:34,  4.87it/s]
 39%|███▉      | 662/1705 [02:23<03:34,  4.86it/s]
 39%|███▉      | 663/1705 [02:23<03:34,  4.87it/s]
 39%|███▉      | 664/1705 [02:23<03:33,  4.88it/s]
 39%|███▉      | 665/1705 [02:23<03:33,  4.88it/s]
 39%|███▉      | 666/1705 [02:24<03:33,  4.87it/s]
 39%|███▉      | 667/1705 [02:24<03:34,  4.84it/s]
 39%|███▉      | 668/1705 [02:24<03:35,  4.82it/s]
 39%|███▉      | 669/1705 [02:24<03:34,  4.83it/s]
 39%|███▉      | 670/1705 [02:24<03:33,  4.84it/s]
 39%|███▉      | 671/1705 [02:25<03:33,  4.84it/s]
 39%|███▉      | 672/1705 [02:25<03:34,  4.82it/s]
 39%|███▉      | 673/1705 [02:2

[2m[36m(_objective pid=11102)[0m {'loss': 0.7081, 'learning_rate': 5.788235442727742e-08, 'epoch': 2.0}


[2m[36m(_objective pid=11102)[0m 
 21%|██        | 4/19 [00:00<00:01, 10.68it/s][A
[2m[36m(_objective pid=11102)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.70it/s][A
[2m[36m(_objective pid=11102)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  9.03it/s][A
[2m[36m(_objective pid=11102)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.85it/s][A
[2m[36m(_objective pid=11102)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.86it/s][A
[2m[36m(_objective pid=11102)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.87it/s][A
[2m[36m(_objective pid=11102)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.70it/s][A
[2m[36m(_objective pid=11102)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.69it/s][A
[2m[36m(_objective pid=11102)[0m 
 74%|███████▎  | 14/19 [00:01<00:00,  8.24it/s][A
[2m[36m(_objective pid=11102)[0m 
 79%|███████▉  | 15/19 [00:01<00:00,  8.61it/s][A
[2m[36m(_objective pid=11102)[0m 
 84%|████████▍ | 16/19 [00:01<00:00,  8.43it/s][A
[2m[36m(_objective pid=11102)[0m 

Trial _objective_4df1a_00004 finished iteration 2 at 2023-09-21 17:26:52. Total running time: 19min 40s
+-------------------------------------------------+
| Trial _objective_4df1a_00004 result             |
+-------------------------------------------------+
| time_this_iter_s                        77.1034 |
| time_total_s                            154.927 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                               0.70588 |
| eval_runtime                             2.2297 |
| eval_samples_per_second                 135.895 |
| eval_steps_per_second                     8.521 |
| objective                               0.70588 |
+-------------------------------------------------+

[2m[36m(_objective pid=11102)[0m {'eval_loss': 0.7058844566345215, 'eval_runtime': 2.2297, 'eval_samples_per_second': 135.895, 'eval_steps_per_second': 8.521, 'epoch': 2.0}


[2m[36m(_objective pid=11102)[0m                                                   
[2m[36m(_objective pid=11102)[0m                                                [A 40%|████      | 682/1705 [02:29<03:32,  4.82it/s]
[2m[36m(_objective pid=11102)[0m 100%|██████████| 19/19 [00:02<00:00,  8.70it/s][A
                                               [A
 40%|████      | 683/1705 [02:34<31:25,  1.84s/it]
 40%|████      | 684/1705 [02:34<24:26,  1.44s/it]
 40%|████      | 685/1705 [02:35<18:55,  1.11s/it]
 40%|████      | 686/1705 [02:35<14:40,  1.16it/s]
 40%|████      | 687/1705 [02:35<11:29,  1.48it/s]
 40%|████      | 688/1705 [02:35<09:11,  1.84it/s]
 40%|████      | 689/1705 [02:35<07:30,  2.26it/s]
 40%|████      | 690/1705 [02:36<06:18,  2.68it/s]
 41%|████      | 691/1705 [02:36<05:26,  3.10it/s]
 41%|████      | 692/1705 [02:36<04:49,  3.50it/s]
 41%|████      | 693/1705 [02:36<04:26,  3.80it/s]
 41%|████      | 694/1705 [02:36<04:07,  4.08it/s]
 41%|████      | 695/

Trial status: 4 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-21 17:27:14. Total running time: 20min 2s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00004   RUNNING          2.3102e-06                     5   25.0818                         8        2            154.927      0.705884      0.705884           2.2297              

 45%|████▍     | 764/1705 [02:51<03:10,  4.93it/s]
 45%|████▍     | 765/1705 [02:52<03:10,  4.92it/s]
 45%|████▍     | 766/1705 [02:52<03:10,  4.92it/s]
 45%|████▍     | 767/1705 [02:52<03:10,  4.92it/s]
 45%|████▌     | 768/1705 [02:52<03:09,  4.93it/s]
 45%|████▌     | 769/1705 [02:52<03:09,  4.94it/s]
 45%|████▌     | 770/1705 [02:53<03:09,  4.94it/s]
 45%|████▌     | 771/1705 [02:53<03:09,  4.94it/s]
 45%|████▌     | 772/1705 [02:53<03:10,  4.91it/s]
 45%|████▌     | 773/1705 [02:53<03:10,  4.90it/s]
 45%|████▌     | 774/1705 [02:53<03:09,  4.90it/s]
 45%|████▌     | 775/1705 [02:54<03:09,  4.90it/s]
 46%|████▌     | 776/1705 [02:54<03:09,  4.89it/s]
 46%|████▌     | 777/1705 [02:54<03:09,  4.89it/s]
 46%|████▌     | 778/1705 [02:54<03:09,  4.89it/s]
 46%|████▌     | 779/1705 [02:54<03:10,  4.86it/s]
 46%|████▌     | 780/1705 [02:55<03:09,  4.89it/s]
 46%|████▌     | 781/1705 [02:55<03:08,  4.89it/s]
 46%|████▌     | 782/1705 [02:55<03:08,  4.89it/s]
 46%|████▌     | 783/1705 [02:5

Trial status: 4 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-21 17:27:44. Total running time: 20min 32s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00004   RUNNING          2.3102e-06                     5   25.0818                         8        2            154.927      0.705884      0.705884           2.2297             

 53%|█████▎    | 909/1705 [03:21<02:42,  4.89it/s]
 53%|█████▎    | 910/1705 [03:22<02:42,  4.89it/s]
 53%|█████▎    | 911/1705 [03:22<02:43,  4.86it/s]
 53%|█████▎    | 912/1705 [03:22<02:43,  4.85it/s]
 54%|█████▎    | 913/1705 [03:22<02:43,  4.84it/s]
 54%|█████▎    | 914/1705 [03:22<02:43,  4.84it/s]
 54%|█████▎    | 915/1705 [03:23<02:43,  4.83it/s]
 54%|█████▎    | 916/1705 [03:23<02:43,  4.84it/s]
 54%|█████▍    | 917/1705 [03:23<02:42,  4.85it/s]
 54%|█████▍    | 918/1705 [03:23<02:42,  4.85it/s]
 54%|█████▍    | 919/1705 [03:23<02:41,  4.86it/s]
 54%|█████▍    | 920/1705 [03:24<02:41,  4.85it/s]
 54%|█████▍    | 921/1705 [03:24<02:40,  4.88it/s]
 54%|█████▍    | 922/1705 [03:24<02:39,  4.90it/s]
 54%|█████▍    | 923/1705 [03:24<02:39,  4.90it/s]
 54%|█████▍    | 924/1705 [03:24<02:39,  4.90it/s]
 54%|█████▍    | 925/1705 [03:25<02:39,  4.90it/s]
 54%|█████▍    | 926/1705 [03:25<02:39,  4.90it/s]
 54%|█████▍    | 927/1705 [03:25<02:38,  4.92it/s]
 54%|█████▍    | 928/1705 [03:2

[2m[36m(_objective pid=11102)[0m {'loss': 0.7035, 'learning_rate': 8.682353164091614e-08, 'epoch': 3.0}


[2m[36m(_objective pid=11102)[0m 
 21%|██        | 4/19 [00:00<00:01, 11.03it/s][A
[2m[36m(_objective pid=11102)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.43it/s][A
[2m[36m(_objective pid=11102)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  8.95it/s][A
[2m[36m(_objective pid=11102)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.79it/s][A
[2m[36m(_objective pid=11102)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.82it/s][A
[2m[36m(_objective pid=11102)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.76it/s][A
[2m[36m(_objective pid=11102)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.75it/s][A
[2m[36m(_objective pid=11102)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.62it/s][A
[2m[36m(_objective pid=11102)[0m 
 74%|███████▎  | 14/19 [00:01<00:00,  8.59it/s][A
[2m[36m(_objective pid=11102)[0m 
 79%|███████▉  | 15/19 [00:01<00:00,  8.52it/s][A
[2m[36m(_objective pid=11102)[0m 
 84%|████████▍ | 16/19 [00:01<00:00,  8.41it/s][A
[2m[36m(_objective pid=11102)[0m 

Trial _objective_4df1a_00004 finished iteration 3 at 2023-09-21 17:28:09. Total running time: 20min 58s
+-------------------------------------------------+
| Trial _objective_4df1a_00004 result             |
+-------------------------------------------------+
| time_this_iter_s                        77.7207 |
| time_total_s                            232.648 |
| training_iteration                            3 |
| epoch                                         3 |
| eval_loss                                0.6989 |
| eval_runtime                              2.234 |
| eval_samples_per_second                 135.632 |
| eval_steps_per_second                     8.505 |
| objective                                0.6989 |
+-------------------------------------------------+

[2m[36m(_objective pid=11102)[0m {'eval_loss': 0.6989032030105591, 'eval_runtime': 2.234, 'eval_samples_per_second': 135.632, 'eval_steps_per_second': 8.505, 'epoch': 3.0}
Trial status: 4 TERMINATED | 1 RUNNING | 5 P

 60%|██████    | 1024/1705 [03:52<21:14,  1.87s/it]
 60%|██████    | 1025/1705 [03:52<16:33,  1.46s/it]
 60%|██████    | 1026/1705 [03:52<12:47,  1.13s/it]
 60%|██████    | 1027/1705 [03:53<09:55,  1.14it/s]
 60%|██████    | 1028/1705 [03:53<07:45,  1.45it/s]
 60%|██████    | 1029/1705 [03:53<06:11,  1.82it/s]
 60%|██████    | 1030/1705 [03:53<05:04,  2.22it/s]
 60%|██████    | 1031/1705 [03:54<04:15,  2.64it/s]
 61%|██████    | 1032/1705 [03:54<03:40,  3.06it/s]
 61%|██████    | 1033/1705 [03:54<03:14,  3.45it/s]
 61%|██████    | 1034/1705 [03:54<02:57,  3.78it/s]
 61%|██████    | 1035/1705 [03:54<02:44,  4.08it/s]
 61%|██████    | 1036/1705 [03:55<02:35,  4.30it/s]
 61%|██████    | 1037/1705 [03:55<02:28,  4.49it/s]
 61%|██████    | 1038/1705 [03:55<02:23,  4.64it/s]
 61%|██████    | 1039/1705 [03:55<02:20,  4.73it/s]
 61%|██████    | 1040/1705 [03:55<02:18,  4.79it/s]
 61%|██████    | 1041/1705 [03:56<02:17,  4.84it/s]
 61%|██████    | 1042/1705 [03:56<02:15,  4.88it/s]
 61%|██████ 

Trial status: 4 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-21 17:28:44. Total running time: 21min 32s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00004   RUNNING          2.3102e-06                     5   25.0818                         8        3            232.648      0.698903      0.698903           2.234              

 68%|██████▊   | 1167/1705 [04:21<01:50,  4.87it/s]
 69%|██████▊   | 1168/1705 [04:22<01:51,  4.82it/s]
 69%|██████▊   | 1169/1705 [04:22<01:52,  4.78it/s]
 69%|██████▊   | 1170/1705 [04:22<01:51,  4.80it/s]
 69%|██████▊   | 1171/1705 [04:22<01:51,  4.80it/s]
 69%|██████▊   | 1172/1705 [04:22<01:50,  4.82it/s]
 69%|██████▉   | 1173/1705 [04:23<01:50,  4.80it/s]
 69%|██████▉   | 1174/1705 [04:23<01:50,  4.82it/s]
 69%|██████▉   | 1175/1705 [04:23<01:50,  4.81it/s]
 69%|██████▉   | 1176/1705 [04:23<01:49,  4.82it/s]
 69%|██████▉   | 1177/1705 [04:23<01:49,  4.82it/s]
 69%|██████▉   | 1178/1705 [04:24<01:49,  4.81it/s]
 69%|██████▉   | 1179/1705 [04:24<01:48,  4.84it/s]
 69%|██████▉   | 1180/1705 [04:24<01:48,  4.83it/s]
 69%|██████▉   | 1181/1705 [04:24<01:49,  4.78it/s]
 69%|██████▉   | 1182/1705 [04:24<01:49,  4.80it/s]
 69%|██████▉   | 1183/1705 [04:25<01:48,  4.80it/s]
 69%|██████▉   | 1184/1705 [04:25<01:48,  4.80it/s]
 70%|██████▉   | 1185/1705 [04:25<01:48,  4.79it/s]
 70%|██████▉

Trial status: 4 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-21 17:29:14. Total running time: 22min 2s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00004   RUNNING          2.3102e-06                     5   25.0818                         8        3            232.648      0.698903      0.698903           2.234               

 77%|███████▋  | 1313/1705 [04:51<01:19,  4.95it/s]
 77%|███████▋  | 1314/1705 [04:52<01:19,  4.94it/s]
 77%|███████▋  | 1315/1705 [04:52<01:19,  4.93it/s]
 77%|███████▋  | 1316/1705 [04:52<01:18,  4.94it/s]
 77%|███████▋  | 1317/1705 [04:52<01:18,  4.93it/s]
 77%|███████▋  | 1318/1705 [04:52<01:18,  4.94it/s]
 77%|███████▋  | 1319/1705 [04:53<01:18,  4.93it/s]
 77%|███████▋  | 1320/1705 [04:53<01:18,  4.93it/s]
 77%|███████▋  | 1321/1705 [04:53<01:18,  4.90it/s]
 78%|███████▊  | 1322/1705 [04:53<01:17,  4.91it/s]
 78%|███████▊  | 1323/1705 [04:53<01:17,  4.92it/s]
 78%|███████▊  | 1324/1705 [04:54<01:17,  4.92it/s]
 78%|███████▊  | 1325/1705 [04:54<01:17,  4.92it/s]
 78%|███████▊  | 1326/1705 [04:54<01:17,  4.92it/s]
 78%|███████▊  | 1327/1705 [04:54<01:16,  4.92it/s]
 78%|███████▊  | 1328/1705 [04:54<01:16,  4.92it/s]
 78%|███████▊  | 1329/1705 [04:55<01:16,  4.94it/s]
 78%|███████▊  | 1330/1705 [04:55<01:15,  4.95it/s]
 78%|███████▊  | 1331/1705 [04:55<01:15,  4.94it/s]
 78%|███████

[2m[36m(_objective pid=11102)[0m {'loss': 0.6945, 'learning_rate': 1.1576470885455484e-07, 'epoch': 4.0}


[2m[36m(_objective pid=11102)[0m 
 21%|██        | 4/19 [00:00<00:01, 11.12it/s][A
[2m[36m(_objective pid=11102)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.80it/s][A
[2m[36m(_objective pid=11102)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  9.05it/s][A
[2m[36m(_objective pid=11102)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.96it/s][A
[2m[36m(_objective pid=11102)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.94it/s][A
[2m[36m(_objective pid=11102)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.95it/s][A
[2m[36m(_objective pid=11102)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.91it/s][A
[2m[36m(_objective pid=11102)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.80it/s][A
[2m[36m(_objective pid=11102)[0m 
 74%|███████▎  | 14/19 [00:01<00:00,  8.67it/s][A
[2m[36m(_objective pid=11102)[0m 
 79%|███████▉  | 15/19 [00:01<00:00,  8.55it/s][A
[2m[36m(_objective pid=11102)[0m 
 84%|████████▍ | 16/19 [00:01<00:00,  8.28it/s][A
[2m[36m(_objective pid=11102)[0m 

Trial _objective_4df1a_00004 finished iteration 4 at 2023-09-21 17:29:26. Total running time: 22min 15s
+-------------------------------------------------+
| Trial _objective_4df1a_00004 result             |
+-------------------------------------------------+
| time_this_iter_s                         77.184 |
| time_total_s                            309.832 |
| training_iteration                            4 |
| epoch                                         4 |
| eval_loss                               0.69141 |
| eval_runtime                             2.2163 |
| eval_samples_per_second                 136.713 |
| eval_steps_per_second                     8.573 |
| objective                               0.69141 |
+-------------------------------------------------+

[2m[36m(_objective pid=11102)[0m {'eval_loss': 0.6914106607437134, 'eval_runtime': 2.2163, 'eval_samples_per_second': 136.713, 'eval_steps_per_second': 8.573, 'epoch': 4.0}


 80%|████████  | 1365/1705 [05:11<15:49,  2.79s/it]
 80%|████████  | 1366/1705 [05:11<11:24,  2.02s/it]
 80%|████████  | 1367/1705 [05:11<08:17,  1.47s/it]
 80%|████████  | 1368/1705 [05:11<06:07,  1.09s/it]
 80%|████████  | 1369/1705 [05:11<04:36,  1.22it/s]
 80%|████████  | 1370/1705 [05:12<03:32,  1.57it/s]
 80%|████████  | 1371/1705 [05:12<02:49,  1.97it/s]
 80%|████████  | 1372/1705 [05:12<02:17,  2.42it/s]
 81%|████████  | 1373/1705 [05:12<01:56,  2.86it/s]
 81%|████████  | 1374/1705 [05:12<01:41,  3.26it/s]
 81%|████████  | 1375/1705 [05:13<01:30,  3.65it/s]
 81%|████████  | 1376/1705 [05:13<01:23,  3.94it/s]
 81%|████████  | 1377/1705 [05:13<01:18,  4.19it/s]
 81%|████████  | 1378/1705 [05:13<01:14,  4.39it/s]
 81%|████████  | 1379/1705 [05:13<01:11,  4.54it/s]
 81%|████████  | 1380/1705 [05:14<01:09,  4.65it/s]
 81%|████████  | 1381/1705 [05:14<01:08,  4.73it/s]
 81%|████████  | 1382/1705 [05:14<01:07,  4.77it/s]
 81%|████████  | 1383/1705 [05:14<01:06,  4.83it/s]
 81%|███████

Trial status: 4 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-21 17:29:44. Total running time: 22min 32s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00004   RUNNING          2.3102e-06                     5   25.0818                         8        4            309.832      0.691411      0.691411           2.2163             

 83%|████████▎ | 1418/1705 [05:21<00:58,  4.93it/s]
 83%|████████▎ | 1419/1705 [05:22<00:58,  4.92it/s]
 83%|████████▎ | 1420/1705 [05:22<00:58,  4.90it/s]
 83%|████████▎ | 1421/1705 [05:22<00:57,  4.93it/s]
 83%|████████▎ | 1422/1705 [05:22<00:57,  4.93it/s]
 83%|████████▎ | 1423/1705 [05:22<00:57,  4.94it/s]
 84%|████████▎ | 1424/1705 [05:23<00:56,  4.94it/s]
 84%|████████▎ | 1425/1705 [05:23<00:56,  4.94it/s]
 84%|████████▎ | 1426/1705 [05:23<00:56,  4.94it/s]
 84%|████████▎ | 1427/1705 [05:23<00:56,  4.94it/s]
 84%|████████▍ | 1428/1705 [05:23<00:56,  4.93it/s]
 84%|████████▍ | 1429/1705 [05:24<00:56,  4.92it/s]
 84%|████████▍ | 1430/1705 [05:24<00:55,  4.93it/s]
 84%|████████▍ | 1431/1705 [05:24<00:55,  4.93it/s]
 84%|████████▍ | 1432/1705 [05:24<00:55,  4.93it/s]
 84%|████████▍ | 1433/1705 [05:24<00:55,  4.93it/s]
 84%|████████▍ | 1434/1705 [05:25<00:55,  4.92it/s]
 84%|████████▍ | 1435/1705 [05:25<00:54,  4.92it/s]
 84%|████████▍ | 1436/1705 [05:25<00:54,  4.92it/s]
 84%|███████

Trial status: 4 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-21 17:30:14. Total running time: 23min 2s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00004   RUNNING          2.3102e-06                     5   25.0818                         8        4            309.832      0.691411      0.691411           2.2163              

 92%|█████████▏| 1564/1705 [05:51<00:29,  4.86it/s]
 92%|█████████▏| 1565/1705 [05:52<00:28,  4.84it/s]
 92%|█████████▏| 1566/1705 [05:52<00:28,  4.83it/s]
 92%|█████████▏| 1567/1705 [05:52<00:28,  4.81it/s]
 92%|█████████▏| 1568/1705 [05:52<00:28,  4.82it/s]
 92%|█████████▏| 1569/1705 [05:53<00:28,  4.85it/s]
 92%|█████████▏| 1570/1705 [05:53<00:27,  4.84it/s]
 92%|█████████▏| 1571/1705 [05:53<00:27,  4.85it/s]
 92%|█████████▏| 1572/1705 [05:53<00:27,  4.81it/s]
 92%|█████████▏| 1573/1705 [05:53<00:27,  4.81it/s]
 92%|█████████▏| 1574/1705 [05:54<00:26,  4.87it/s]
 92%|█████████▏| 1575/1705 [05:54<00:26,  4.88it/s]
 92%|█████████▏| 1576/1705 [05:54<00:26,  4.87it/s]
 92%|█████████▏| 1577/1705 [05:54<00:26,  4.85it/s]
 93%|█████████▎| 1578/1705 [05:54<00:26,  4.86it/s]
 93%|█████████▎| 1579/1705 [05:55<00:25,  4.87it/s]
 93%|█████████▎| 1580/1705 [05:55<00:25,  4.89it/s]
 93%|█████████▎| 1581/1705 [05:55<00:25,  4.89it/s]
 93%|█████████▎| 1582/1705 [05:55<00:25,  4.88it/s]
 93%|███████

[2m[36m(_objective pid=11102)[0m {'loss': 0.6895, 'learning_rate': 1.4470588606819355e-07, 'epoch': 5.0}


[2m[36m(_objective pid=11102)[0m 
 11%|█         | 2/19 [00:00<00:00, 18.67it/s][A
[2m[36m(_objective pid=11102)[0m 
 21%|██        | 4/19 [00:00<00:01, 10.85it/s][A
[2m[36m(_objective pid=11102)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.28it/s][A
[2m[36m(_objective pid=11102)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  8.97it/s][A
[2m[36m(_objective pid=11102)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.83it/s][A
[2m[36m(_objective pid=11102)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.82it/s][A
[2m[36m(_objective pid=11102)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.79it/s][A


Trial status: 4 TERMINATED | 1 RUNNING | 5 PENDING
Current time: 2023-09-21 17:30:44. Total running time: 23min 32s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00004   RUNNING          2.3102e-06                     5   25.0818                         8        4            309.832      0.691411      0.691411           2.2163             

[2m[36m(_objective pid=11102)[0m 
[2m[36m(_objective pid=11102)[0m  63%|██████▎   | 12/19 [00:01<00:00,  8.56it/s][A
[2m[36m(_objective pid=11102)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.70it/s][A
[2m[36m(_objective pid=11102)[0m 
 74%|███████▎  | 14/19 [00:01<00:00,  8.70it/s][A
[2m[36m(_objective pid=11102)[0m 
 79%|███████▉  | 15/19 [00:01<00:00,  8.48it/s][A
[2m[36m(_objective pid=11102)[0m 
 84%|████████▍ | 16/19 [00:01<00:00,  8.44it/s][A
[2m[36m(_objective pid=11102)[0m 
 89%|████████▉ | 17/19 [00:01<00:00,  8.44it/s][A
[2m[36m(_objective pid=11102)[0m 
 95%|█████████▍| 18/19 [00:02<00:00,  8.44it/s][A


Trial _objective_4df1a_00004 finished iteration 5 at 2023-09-21 17:30:45. Total running time: 23min 33s
+-------------------------------------------------+
| Trial _objective_4df1a_00004 result             |
+-------------------------------------------------+
| time_this_iter_s                        78.5181 |
| time_total_s                             388.35 |
| training_iteration                            5 |
| epoch                                         5 |
| eval_loss                               0.68155 |
| eval_runtime                             2.2276 |
| eval_samples_per_second                 136.019 |
| eval_steps_per_second                     8.529 |
| objective                               0.68155 |
+-------------------------------------------------+

[2m[36m(_objective pid=11102)[0m {'eval_loss': 0.6815542578697205, 'eval_runtime': 2.2276, 'eval_samples_per_second': 136.019, 'eval_steps_per_second': 8.529, 'epoch': 5.0}


[2m[36m(_objective pid=11102)[0m 
[2m[36m(_objective pid=11102)[0m 100%|██████████| 19/19 [00:02<00:00,  8.59it/s][A                                                   
[2m[36m(_objective pid=11102)[0m                                                [A100%|██████████| 1705/1705 [06:23<00:00,  4.91it/s]
[2m[36m(_objective pid=11102)[0m 100%|██████████| 19/19 [00:02<00:00,  8.59it/s][A
[2m[36m(_objective pid=11102)[0m                                                [A


Trial _objective_4df1a_00004 completed after 5 iterations at 2023-09-21 17:30:50. Total running time: 23min 39s

[2m[36m(_objective pid=11102)[0m {'train_runtime': 388.4142, 'train_samples_per_second': 35.04, 'train_steps_per_second': 4.39, 'train_loss': 0.7016100483555947, 'epoch': 5.0}


[2m[36m(_objective pid=11102)[0m                                                    100%|██████████| 1705/1705 [06:28<00:00,  4.91it/s]100%|██████████| 1705/1705 [06:28<00:00,  4.39it/s]


Trial _objective_4df1a_00005 started with configuration:
+-------------------------------------------------+
| Trial _objective_4df1a_00005 config             |
+-------------------------------------------------+
| learning_rate                             1e-05 |
| num_train_epochs                              4 |
| per_device_train_batch_size                  16 |
| seed                                    1.89943 |
+-------------------------------------------------+



[2m[36m(_objective pid=12760)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
[2m[36m(_objective pid=12760)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/684 [00:00<?, ?it/s]
  0%|          | 1/684 [00:00<06:18,  1.81it/s]
  0%|          | 2/684 [00:00<04:40,  2.43it/s]
  0%|          | 3/684 [00:01<04:17,  2.65it/s]
  1%|          | 4/684 [00:01<04:10,  2.72it/s]
  1%|          | 5/684 [00:01<04:03,  2.78it/s]
  1%|          | 6/684 [00:02<03:59,  2.83it/s]
  1%|          | 7/684 [00:02<03:59,  2.83it/s]
  1%|          | 8/684 [00:02<03:57,  2.85it/s]
  1%|▏         | 9/684 [00:03<03:55,  2.87it/s]
  1%|▏         | 10/684 [00:03<03:54,  2.87it/s]
  2%|▏         | 11/684 [00:03<03:53,  2.88it/s]
  2%|▏         | 12/684 [00:04<03:51,  2.90it/s]
  2%|▏         |

Trial status: 5 TERMINATED | 1 RUNNING | 4 PENDING
Current time: 2023-09-21 17:31:14. Total running time: 24min 2s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00005   RUNNING          1.12076e-05                    4    1.89943                       16                                                                                       

  5%|▍         | 32/684 [00:11<03:47,  2.87it/s]
  5%|▍         | 33/684 [00:11<03:46,  2.87it/s]
  5%|▍         | 34/684 [00:11<03:46,  2.87it/s]
  5%|▌         | 35/684 [00:12<03:46,  2.86it/s]
  5%|▌         | 36/684 [00:12<03:46,  2.86it/s]
  5%|▌         | 37/684 [00:13<03:45,  2.86it/s]
  6%|▌         | 38/684 [00:13<03:46,  2.85it/s]
  6%|▌         | 39/684 [00:13<03:46,  2.85it/s]
  6%|▌         | 40/684 [00:14<03:45,  2.86it/s]
  6%|▌         | 41/684 [00:14<03:46,  2.84it/s]
  6%|▌         | 42/684 [00:14<03:47,  2.82it/s]
  6%|▋         | 43/684 [00:15<03:47,  2.82it/s]
  6%|▋         | 44/684 [00:15<03:46,  2.82it/s]
  7%|▋         | 45/684 [00:15<03:47,  2.81it/s]
  7%|▋         | 46/684 [00:16<03:46,  2.82it/s]
  7%|▋         | 47/684 [00:16<03:46,  2.81it/s]
  7%|▋         | 48/684 [00:16<03:47,  2.80it/s]
  7%|▋         | 49/684 [00:17<03:46,  2.80it/s]
  7%|▋         | 50/684 [00:17<03:45,  2.81it/s]
  7%|▋         | 51/684 [00:17<03:44,  2.82it/s]
  8%|▊         | 52/

Trial status: 5 TERMINATED | 1 RUNNING | 4 PENDING
Current time: 2023-09-21 17:31:44. Total running time: 24min 32s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00005   RUNNING          1.12076e-05                    4    1.89943                       16                                                                                      

 17%|█▋        | 116/684 [00:41<03:25,  2.77it/s]
 17%|█▋        | 117/684 [00:41<03:25,  2.76it/s]
 17%|█▋        | 118/684 [00:42<03:25,  2.76it/s]
 17%|█▋        | 119/684 [00:42<03:24,  2.76it/s]
 18%|█▊        | 120/684 [00:42<03:23,  2.77it/s]
 18%|█▊        | 121/684 [00:43<03:23,  2.76it/s]
 18%|█▊        | 122/684 [00:43<03:23,  2.77it/s]
 18%|█▊        | 123/684 [00:43<03:22,  2.77it/s]
 18%|█▊        | 124/684 [00:44<03:21,  2.77it/s]
 18%|█▊        | 125/684 [00:44<03:21,  2.78it/s]
 18%|█▊        | 126/684 [00:44<03:21,  2.77it/s]
 19%|█▊        | 127/684 [00:45<03:20,  2.77it/s]
 19%|█▊        | 128/684 [00:45<03:20,  2.77it/s]
 19%|█▉        | 129/684 [00:46<03:19,  2.78it/s]
 19%|█▉        | 130/684 [00:46<03:19,  2.77it/s]
 19%|█▉        | 131/684 [00:46<03:18,  2.78it/s]
 19%|█▉        | 132/684 [00:47<03:17,  2.79it/s]
 19%|█▉        | 133/684 [00:47<03:17,  2.79it/s]
 20%|█▉        | 134/684 [00:47<03:16,  2.80it/s]
 20%|█▉        | 135/684 [00:48<03:16,  2.79it/s]


[2m[36m(_objective pid=12760)[0m {'loss': 0.7064, 'learning_rate': 7.040781271962386e-08, 'epoch': 1.0}


[2m[36m(_objective pid=12760)[0m                                                   25%|██▌       | 171/684 [01:00<02:22,  3.60it/s]
[2m[36m(_objective pid=12760)[0m   0%|          | 0/19 [00:00<?, ?it/s][A
[2m[36m(_objective pid=12760)[0m 
 11%|█         | 2/19 [00:00<00:00, 17.83it/s][A
[2m[36m(_objective pid=12760)[0m 
 21%|██        | 4/19 [00:00<00:01, 10.95it/s][A
[2m[36m(_objective pid=12760)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.60it/s][A
[2m[36m(_objective pid=12760)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  9.10it/s][A
[2m[36m(_objective pid=12760)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.85it/s][A
[2m[36m(_objective pid=12760)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.85it/s][A
[2m[36m(_objective pid=12760)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.77it/s][A
[2m[36m(_objective pid=12760)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.82it/s][A
[2m[36m(_objective pid=12760)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.69it/s]

Trial _objective_4df1a_00005 finished iteration 1 at 2023-09-21 17:32:06. Total running time: 24min 54s
+-------------------------------------------------+
| Trial _objective_4df1a_00005 result             |
+-------------------------------------------------+
| time_this_iter_s                        68.2526 |
| time_total_s                            68.2526 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.69253 |
| eval_runtime                             2.2251 |
| eval_samples_per_second                 136.174 |
| eval_steps_per_second                     8.539 |
| objective                               0.69253 |
+-------------------------------------------------+

[2m[36m(_objective pid=12760)[0m {'eval_loss': 0.6925253868103027, 'eval_runtime': 2.2251, 'eval_samples_per_second': 136.174, 'eval_steps_per_second': 8.539, 'epoch': 1.0}


[2m[36m(_objective pid=12760)[0m                                                  
[2m[36m(_objective pid=12760)[0m                                                [A 25%|██▌       | 171/684 [01:03<02:22,  3.60it/s]
[2m[36m(_objective pid=12760)[0m 100%|██████████| 19/19 [00:02<00:00,  8.56it/s][A
                                               [A
 25%|██▌       | 172/684 [01:08<21:02,  2.47s/it]
 25%|██▌       | 173/684 [01:08<15:34,  1.83s/it]
 25%|██▌       | 174/684 [01:09<11:45,  1.38s/it]
 26%|██▌       | 175/684 [01:09<09:07,  1.08s/it]
 26%|██▌       | 176/684 [01:09<07:15,  1.17it/s]
 26%|██▌       | 177/684 [01:10<05:56,  1.42it/s]
 26%|██▌       | 178/684 [01:10<05:03,  1.67it/s]
 26%|██▌       | 179/684 [01:10<04:24,  1.91it/s]
 26%|██▋       | 180/684 [01:11<03:56,  2.13it/s]


Trial status: 5 TERMINATED | 1 RUNNING | 4 PENDING
Current time: 2023-09-21 17:32:14. Total running time: 25min 2s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00005   RUNNING          1.12076e-05                    4    1.89943                       16        1            68.2526      0.692525      0.692525           2.2251              

 26%|██▋       | 181/684 [01:11<03:38,  2.30it/s]
 27%|██▋       | 182/684 [01:11<03:25,  2.44it/s]
 27%|██▋       | 183/684 [01:12<03:15,  2.56it/s]
 27%|██▋       | 184/684 [01:12<03:08,  2.65it/s]
 27%|██▋       | 185/684 [01:12<03:03,  2.72it/s]
 27%|██▋       | 186/684 [01:13<03:00,  2.77it/s]
 27%|██▋       | 187/684 [01:13<02:57,  2.80it/s]
 27%|██▋       | 188/684 [01:13<02:56,  2.82it/s]
 28%|██▊       | 189/684 [01:14<02:54,  2.84it/s]
 28%|██▊       | 190/684 [01:14<02:53,  2.85it/s]
 28%|██▊       | 191/684 [01:14<02:52,  2.85it/s]
 28%|██▊       | 192/684 [01:15<02:51,  2.87it/s]
 28%|██▊       | 193/684 [01:15<02:51,  2.87it/s]
 28%|██▊       | 194/684 [01:16<02:50,  2.87it/s]
 29%|██▊       | 195/684 [01:16<02:50,  2.86it/s]
 29%|██▊       | 196/684 [01:16<02:50,  2.87it/s]
 29%|██▉       | 197/684 [01:17<02:50,  2.85it/s]
 29%|██▉       | 198/684 [01:17<02:49,  2.86it/s]
 29%|██▉       | 199/684 [01:17<02:49,  2.87it/s]
 29%|██▉       | 200/684 [01:18<02:48,  2.87it/s]


Trial status: 5 TERMINATED | 1 RUNNING | 4 PENDING
Current time: 2023-09-21 17:32:44. Total running time: 25min 33s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00005   RUNNING          1.12076e-05                    4    1.89943                       16        1            68.2526      0.692525      0.692525           2.2251             

 39%|███▉      | 266/684 [01:41<02:30,  2.78it/s]
 39%|███▉      | 267/684 [01:41<02:30,  2.77it/s]
 39%|███▉      | 268/684 [01:42<02:29,  2.79it/s]
 39%|███▉      | 269/684 [01:42<02:29,  2.78it/s]
 39%|███▉      | 270/684 [01:42<02:28,  2.79it/s]
 40%|███▉      | 271/684 [01:43<02:27,  2.80it/s]
 40%|███▉      | 272/684 [01:43<02:27,  2.79it/s]
 40%|███▉      | 273/684 [01:43<02:28,  2.78it/s]
 40%|████      | 274/684 [01:44<02:27,  2.78it/s]
 40%|████      | 275/684 [01:44<02:26,  2.79it/s]
 40%|████      | 276/684 [01:45<02:26,  2.79it/s]
 40%|████      | 277/684 [01:45<02:26,  2.78it/s]
 41%|████      | 278/684 [01:45<02:25,  2.79it/s]
 41%|████      | 279/684 [01:46<02:25,  2.77it/s]
 41%|████      | 280/684 [01:46<02:25,  2.77it/s]
 41%|████      | 281/684 [01:46<02:24,  2.79it/s]
 41%|████      | 282/684 [01:47<02:24,  2.78it/s]
 41%|████▏     | 283/684 [01:47<02:24,  2.78it/s]
 42%|████▏     | 284/684 [01:47<02:24,  2.76it/s]
 42%|████▏     | 285/684 [01:48<02:22,  2.79it/s]


[2m[36m(_objective pid=12760)[0m {'loss': 0.6967, 'learning_rate': 1.4081562543924773e-07, 'epoch': 2.0}


[2m[36m(_objective pid=12760)[0m                                                   50%|█████     | 342/684 [02:08<01:35,  3.57it/s]
[2m[36m(_objective pid=12760)[0m   0%|          | 0/19 [00:00<?, ?it/s][A
[2m[36m(_objective pid=12760)[0m 
 11%|█         | 2/19 [00:00<00:00, 18.45it/s][A
[2m[36m(_objective pid=12760)[0m 
 21%|██        | 4/19 [00:00<00:01, 10.88it/s][A
[2m[36m(_objective pid=12760)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.48it/s][A
[2m[36m(_objective pid=12760)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  9.03it/s][A
[2m[36m(_objective pid=12760)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  9.05it/s][A
[2m[36m(_objective pid=12760)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.96it/s][A
[2m[36m(_objective pid=12760)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.92it/s][A
[2m[36m(_objective pid=12760)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.84it/s][A
[2m[36m(_objective pid=12760)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.57it/s]

Trial _objective_4df1a_00005 finished iteration 2 at 2023-09-21 17:33:13. Total running time: 26min 2s
+-------------------------------------------------+
| Trial _objective_4df1a_00005 result             |
+-------------------------------------------------+
| time_this_iter_s                        67.5594 |
| time_total_s                            135.812 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                               0.67777 |
| eval_runtime                             2.2395 |
| eval_samples_per_second                 135.299 |
| eval_steps_per_second                     8.484 |
| objective                               0.67777 |
+-------------------------------------------------+

[2m[36m(_objective pid=12760)[0m {'eval_loss': 0.6777711510658264, 'eval_runtime': 2.2395, 'eval_samples_per_second': 135.299, 'eval_steps_per_second': 8.484, 'epoch': 2.0}
Trial status: 5 TERMINATED | 1 RUNNING | 4 P

 50%|█████     | 343/684 [02:15<13:59,  2.46s/it]
 50%|█████     | 344/684 [02:16<10:20,  1.83s/it]
 50%|█████     | 345/684 [02:16<07:48,  1.38s/it]
 51%|█████     | 346/684 [02:16<06:02,  1.07s/it]
 51%|█████     | 347/684 [02:17<04:48,  1.17it/s]
 51%|█████     | 348/684 [02:17<03:56,  1.42it/s]
 51%|█████     | 349/684 [02:17<03:20,  1.67it/s]
 51%|█████     | 350/684 [02:18<02:54,  1.91it/s]
 51%|█████▏    | 351/684 [02:18<02:36,  2.13it/s]
 51%|█████▏    | 352/684 [02:19<02:24,  2.30it/s]
 52%|█████▏    | 353/684 [02:19<02:15,  2.43it/s]
 52%|█████▏    | 354/684 [02:19<02:09,  2.55it/s]
 52%|█████▏    | 355/684 [02:20<02:05,  2.62it/s]
 52%|█████▏    | 356/684 [02:20<02:02,  2.68it/s]
 52%|█████▏    | 357/684 [02:20<01:59,  2.74it/s]
 52%|█████▏    | 358/684 [02:21<01:58,  2.75it/s]
 52%|█████▏    | 359/684 [02:21<01:56,  2.80it/s]
 53%|█████▎    | 360/684 [02:21<01:55,  2.81it/s]
 53%|█████▎    | 361/684 [02:22<01:54,  2.81it/s]
 53%|█████▎    | 362/684 [02:22<01:54,  2.82it/s]


Trial status: 5 TERMINATED | 1 RUNNING | 4 PENDING
Current time: 2023-09-21 17:33:44. Total running time: 26min 33s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00005   RUNNING          1.12076e-05                    4    1.89943                       16        2            135.812      0.677771      0.677771           2.2395             

 61%|██████    | 416/684 [02:41<01:35,  2.80it/s]
 61%|██████    | 417/684 [02:41<01:35,  2.80it/s]
 61%|██████    | 418/684 [02:42<01:34,  2.81it/s]
 61%|██████▏   | 419/684 [02:42<01:34,  2.80it/s]
 61%|██████▏   | 420/684 [02:43<01:33,  2.81it/s]
 62%|██████▏   | 421/684 [02:43<01:33,  2.83it/s]
 62%|██████▏   | 422/684 [02:43<01:32,  2.83it/s]
 62%|██████▏   | 423/684 [02:44<01:32,  2.82it/s]
 62%|██████▏   | 424/684 [02:44<01:32,  2.82it/s]
 62%|██████▏   | 425/684 [02:44<01:31,  2.83it/s]
 62%|██████▏   | 426/684 [02:45<01:31,  2.82it/s]
 62%|██████▏   | 427/684 [02:45<01:31,  2.82it/s]
 63%|██████▎   | 428/684 [02:45<01:30,  2.83it/s]
 63%|██████▎   | 429/684 [02:46<01:30,  2.82it/s]
 63%|██████▎   | 430/684 [02:46<01:29,  2.82it/s]
 63%|██████▎   | 431/684 [02:46<01:29,  2.82it/s]
 63%|██████▎   | 432/684 [02:47<01:29,  2.82it/s]
 63%|██████▎   | 433/684 [02:47<01:29,  2.81it/s]
 63%|██████▎   | 434/684 [02:48<01:28,  2.82it/s]
 64%|██████▎   | 435/684 [02:48<01:28,  2.82it/s]


Trial status: 5 TERMINATED | 1 RUNNING | 4 PENDING
Current time: 2023-09-21 17:34:14. Total running time: 27min 3s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00005   RUNNING          1.12076e-05                    4    1.89943                       16        2            135.812      0.677771      0.677771           2.2395              

 73%|███████▎  | 501/684 [03:11<01:05,  2.80it/s]
 73%|███████▎  | 502/684 [03:12<01:04,  2.81it/s]
 74%|███████▎  | 503/684 [03:12<01:04,  2.81it/s]
 74%|███████▎  | 504/684 [03:12<01:03,  2.82it/s]
 74%|███████▍  | 505/684 [03:13<01:03,  2.82it/s]
 74%|███████▍  | 506/684 [03:13<01:03,  2.82it/s]
 74%|███████▍  | 507/684 [03:13<01:02,  2.82it/s]
 74%|███████▍  | 508/684 [03:14<01:02,  2.83it/s]
 74%|███████▍  | 509/684 [03:14<01:01,  2.83it/s]
 75%|███████▍  | 510/684 [03:14<01:01,  2.82it/s]
 75%|███████▍  | 511/684 [03:15<01:01,  2.82it/s]
 75%|███████▍  | 512/684 [03:15<01:00,  2.82it/s]
 75%|███████▌  | 513/684 [03:15<00:48,  3.56it/s]


[2m[36m(_objective pid=12760)[0m {'loss': 0.6778, 'learning_rate': 2.1122343815887163e-07, 'epoch': 3.0}


[2m[36m(_objective pid=12760)[0m 
[2m[36m(_objective pid=12760)[0m   0%|          | 0/19 [00:00<?, ?it/s][A
[2m[36m(_objective pid=12760)[0m 
 11%|█         | 2/19 [00:00<00:00, 18.35it/s][A
[2m[36m(_objective pid=12760)[0m 
 21%|██        | 4/19 [00:00<00:01, 10.92it/s][A
[2m[36m(_objective pid=12760)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.67it/s][A
[2m[36m(_objective pid=12760)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  8.79it/s][A
[2m[36m(_objective pid=12760)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.86it/s][A
[2m[36m(_objective pid=12760)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.81it/s][A
[2m[36m(_objective pid=12760)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.98it/s][A
[2m[36m(_objective pid=12760)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.88it/s][A
[2m[36m(_objective pid=12760)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.72it/s][A
[2m[36m(_objective pid=12760)[0m 
 74%|███████▎  | 14/19 [00:01<00:00,  8.58it/s][A
[2m[36m

Trial _objective_4df1a_00005 finished iteration 3 at 2023-09-21 17:34:21. Total running time: 27min 9s
+-------------------------------------------------+
| Trial _objective_4df1a_00005 result             |
+-------------------------------------------------+
| time_this_iter_s                        67.4054 |
| time_total_s                            203.217 |
| training_iteration                            3 |
| epoch                                         3 |
| eval_loss                               0.65029 |
| eval_runtime                             2.2367 |
| eval_samples_per_second                 135.465 |
| eval_steps_per_second                     8.495 |
| objective                               0.65029 |
+-------------------------------------------------+

[2m[36m(_objective pid=12760)[0m {'eval_loss': 0.6502929329872131, 'eval_runtime': 2.2367, 'eval_samples_per_second': 135.465, 'eval_steps_per_second': 8.495, 'epoch': 3.0}


[2m[36m(_objective pid=12760)[0m                                                  
[2m[36m(_objective pid=12760)[0m                                                [A 75%|███████▌  | 513/684 [03:18<00:48,  3.56it/s]
[2m[36m(_objective pid=12760)[0m 100%|██████████| 19/19 [00:02<00:00,  8.51it/s][A
                                               [A
 75%|███████▌  | 514/684 [03:23<07:00,  2.48s/it]
 75%|███████▌  | 515/684 [03:23<05:10,  1.83s/it]
 75%|███████▌  | 516/684 [03:24<03:53,  1.39s/it]
 76%|███████▌  | 517/684 [03:24<03:00,  1.08s/it]
 76%|███████▌  | 518/684 [03:24<02:22,  1.16it/s]
 76%|███████▌  | 519/684 [03:25<01:56,  1.42it/s]
 76%|███████▌  | 520/684 [03:25<01:38,  1.67it/s]
 76%|███████▌  | 521/684 [03:25<01:25,  1.91it/s]
 76%|███████▋  | 522/684 [03:26<01:16,  2.13it/s]
 76%|███████▋  | 523/684 [03:26<01:09,  2.30it/s]
 77%|███████▋  | 524/684 [03:26<01:05,  2.45it/s]
 77%|███████▋  | 525/684 [03:27<01:02,  2.56it/s]
 77%|███████▋  | 526/684 [03:27<00:

Trial status: 5 TERMINATED | 1 RUNNING | 4 PENDING
Current time: 2023-09-21 17:34:44. Total running time: 27min 33s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00005   RUNNING          1.12076e-05                    4    1.89943                       16        3            203.217      0.650293      0.650293           2.2367             

 83%|████████▎ | 566/684 [03:41<00:41,  2.83it/s]
 83%|████████▎ | 567/684 [03:41<00:41,  2.82it/s]
 83%|████████▎ | 568/684 [03:42<00:40,  2.83it/s]
 83%|████████▎ | 569/684 [03:42<00:40,  2.86it/s]
 83%|████████▎ | 570/684 [03:43<00:40,  2.83it/s]
 83%|████████▎ | 571/684 [03:43<00:39,  2.83it/s]
 84%|████████▎ | 572/684 [03:43<00:39,  2.84it/s]
 84%|████████▍ | 573/684 [03:44<00:39,  2.81it/s]
 84%|████████▍ | 574/684 [03:44<00:39,  2.82it/s]
 84%|████████▍ | 575/684 [03:44<00:38,  2.82it/s]
 84%|████████▍ | 576/684 [03:45<00:38,  2.82it/s]
 84%|████████▍ | 577/684 [03:45<00:38,  2.81it/s]
 85%|████████▍ | 578/684 [03:45<00:37,  2.81it/s]
 85%|████████▍ | 579/684 [03:46<00:37,  2.80it/s]
 85%|████████▍ | 580/684 [03:46<00:37,  2.81it/s]
 85%|████████▍ | 581/684 [03:46<00:36,  2.81it/s]
 85%|████████▌ | 582/684 [03:47<00:36,  2.81it/s]
 85%|████████▌ | 583/684 [03:47<00:35,  2.81it/s]
 85%|████████▌ | 584/684 [03:48<00:35,  2.81it/s]
 86%|████████▌ | 585/684 [03:48<00:35,  2.80it/s]


Trial status: 5 TERMINATED | 1 RUNNING | 4 PENDING
Current time: 2023-09-21 17:35:14. Total running time: 28min 3s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00005   RUNNING          1.12076e-05                    4    1.89943                       16        3            203.217      0.650293      0.650293           2.2367              

 95%|█████████▌| 651/684 [04:11<00:11,  2.82it/s]
 95%|█████████▌| 652/684 [04:12<00:11,  2.83it/s]
 95%|█████████▌| 653/684 [04:12<00:10,  2.83it/s]
 96%|█████████▌| 654/684 [04:12<00:10,  2.83it/s]
 96%|█████████▌| 655/684 [04:13<00:10,  2.83it/s]
 96%|█████████▌| 656/684 [04:13<00:09,  2.83it/s]
 96%|█████████▌| 657/684 [04:13<00:09,  2.82it/s]
 96%|█████████▌| 658/684 [04:14<00:09,  2.83it/s]
 96%|█████████▋| 659/684 [04:14<00:08,  2.83it/s]
 96%|█████████▋| 660/684 [04:15<00:08,  2.83it/s]
 97%|█████████▋| 661/684 [04:15<00:08,  2.83it/s]
 97%|█████████▋| 662/684 [04:15<00:07,  2.84it/s]
 97%|█████████▋| 663/684 [04:16<00:07,  2.84it/s]
 97%|█████████▋| 664/684 [04:16<00:07,  2.84it/s]
 97%|█████████▋| 665/684 [04:16<00:06,  2.84it/s]
 97%|█████████▋| 666/684 [04:17<00:06,  2.84it/s]
 98%|█████████▊| 667/684 [04:17<00:05,  2.83it/s]
 98%|█████████▊| 668/684 [04:17<00:05,  2.82it/s]
 98%|█████████▊| 669/684 [04:18<00:05,  2.82it/s]
 98%|█████████▊| 670/684 [04:18<00:04,  2.81it/s]


[2m[36m(_objective pid=12760)[0m {'loss': 0.6507, 'learning_rate': 2.8163125087849545e-07, 'epoch': 4.0}


[2m[36m(_objective pid=12760)[0m 
 11%|█         | 2/19 [00:00<00:00, 18.16it/s][A
[2m[36m(_objective pid=12760)[0m 
 21%|██        | 4/19 [00:00<00:01, 10.68it/s][A
[2m[36m(_objective pid=12760)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.69it/s][A
[2m[36m(_objective pid=12760)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  9.05it/s][A
[2m[36m(_objective pid=12760)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.87it/s][A
[2m[36m(_objective pid=12760)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.93it/s][A
[2m[36m(_objective pid=12760)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.87it/s][A
[2m[36m(_objective pid=12760)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.86it/s][A
[2m[36m(_objective pid=12760)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.86it/s][A
[2m[36m(_objective pid=12760)[0m 
 74%|███████▎  | 14/19 [00:01<00:00,  8.82it/s][A
[2m[36m(_objective pid=12760)[0m 
 79%|███████▉  | 15/19 [00:01<00:00,  8.65it/s][A
[2m[36m(_objective pid=12760)[0m 


Trial _objective_4df1a_00005 finished iteration 4 at 2023-09-21 17:35:28. Total running time: 28min 17s
+-------------------------------------------------+
| Trial _objective_4df1a_00005 result             |
+-------------------------------------------------+
| time_this_iter_s                        67.5064 |
| time_total_s                            270.724 |
| training_iteration                            4 |
| epoch                                         4 |
| eval_loss                               0.62064 |
| eval_runtime                              2.224 |
| eval_samples_per_second                 136.238 |
| eval_steps_per_second                     8.543 |
| objective                               0.62064 |
+-------------------------------------------------+

[2m[36m(_objective pid=12760)[0m {'eval_loss': 0.6206356883049011, 'eval_runtime': 2.224, 'eval_samples_per_second': 136.238, 'eval_steps_per_second': 8.543, 'epoch': 4.0}


[2m[36m(_objective pid=12760)[0m 
[2m[36m(_objective pid=12760)[0m 100%|██████████| 19/19 [00:02<00:00,  8.36it/s][A                                                 
[2m[36m(_objective pid=12760)[0m                                                [A100%|██████████| 684/684 [04:25<00:00,  3.58it/s]
[2m[36m(_objective pid=12760)[0m 100%|██████████| 19/19 [00:02<00:00,  8.36it/s][A
[2m[36m(_objective pid=12760)[0m                                                [A


Trial _objective_4df1a_00005 completed after 4 iterations at 2023-09-21 17:35:33. Total running time: 28min 22s

[2m[36m(_objective pid=12760)[0m {'train_runtime': 270.7361, 'train_samples_per_second': 40.216, 'train_steps_per_second': 2.526, 'train_loss': 0.6828975342867667, 'epoch': 4.0}


[2m[36m(_objective pid=12760)[0m                                                  100%|██████████| 684/684 [04:30<00:00,  3.58it/s]100%|██████████| 684/684 [04:30<00:00,  2.53it/s]


Trial _objective_4df1a_00006 started with configuration:
+-------------------------------------------------+
| Trial _objective_4df1a_00006 config             |
+-------------------------------------------------+
| learning_rate                             2e-05 |
| num_train_epochs                              2 |
| per_device_train_batch_size                  32 |
| seed                                    2.81996 |
+-------------------------------------------------+

Trial status: 6 TERMINATED | 1 RUNNING | 3 PENDING
Current time: 2023-09-21 17:35:44. Total running time: 28min 33s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objec

[2m[36m(_objective pid=13929)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
[2m[36m(_objective pid=13929)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/172 [00:00<?, ?it/s]
  1%|          | 1/172 [00:00<02:28,  1.15it/s]
  1%|          | 2/172 [00:01<01:58,  1.43it/s]
  2%|▏         | 3/172 [00:02<01:53,  1.49it/s]
  2%|▏         | 4/172 [00:02<01:48,  1.54it/s]
  3%|▎         | 5/172 [00:03<01:46,  1.56it/s]
  3%|▎         | 6/172 [00:03<01:45,  1.58it/s]
  4%|▍         | 7/172 [00:04<01:43,  1.59it/s]
  5%|▍         | 8/172 [00:05<01:42,  1.60it/s]
  5%|▌         | 9/172 [00:05<01:42,  1.60it/s]
  6%|▌         | 10/172 [00:06<01:41,  1.59it/s]
  6%|▋         | 11/172 [00:07<01:41,  1.59it/s]
  7%|▋         | 12/172 [00:07<01:40,  1.59it/s]
  8%|▊         |

Trial status: 6 TERMINATED | 1 RUNNING | 3 PENDING
Current time: 2023-09-21 17:36:14. Total running time: 29min 3s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00006   RUNNING          1.67381e-05                    2    2.81996                       32                                                                                       

 24%|██▍       | 42/172 [00:26<01:24,  1.54it/s]
 25%|██▌       | 43/172 [00:27<01:24,  1.53it/s]
 26%|██▌       | 44/172 [00:28<01:23,  1.53it/s]
 26%|██▌       | 45/172 [00:28<01:22,  1.53it/s]
 27%|██▋       | 46/172 [00:29<01:22,  1.53it/s]
 27%|██▋       | 47/172 [00:30<01:21,  1.53it/s]
 28%|██▊       | 48/172 [00:30<01:21,  1.53it/s]
 28%|██▊       | 49/172 [00:31<01:20,  1.53it/s]
 29%|██▉       | 50/172 [00:32<01:19,  1.53it/s]
 30%|██▉       | 51/172 [00:32<01:19,  1.53it/s]
 30%|███       | 52/172 [00:33<01:18,  1.53it/s]
 31%|███       | 53/172 [00:34<01:17,  1.53it/s]
 31%|███▏      | 54/172 [00:34<01:17,  1.53it/s]
 32%|███▏      | 55/172 [00:35<01:16,  1.53it/s]
 33%|███▎      | 56/172 [00:36<01:16,  1.53it/s]
 33%|███▎      | 57/172 [00:36<01:15,  1.53it/s]
 34%|███▎      | 58/172 [00:37<01:14,  1.52it/s]
 34%|███▍      | 59/172 [00:38<01:14,  1.52it/s]
 35%|███▍      | 60/172 [00:38<01:13,  1.52it/s]
 35%|███▌      | 61/172 [00:39<01:12,  1.52it/s]
 36%|███▌      | 62/

[2m[36m(_objective pid=13929)[0m {'loss': 0.7194, 'learning_rate': 5.2883004328901046e-08, 'epoch': 1.0}


[2m[36m(_objective pid=13929)[0m 
[2m[36m(_objective pid=13929)[0m   0%|          | 0/19 [00:00<?, ?it/s][A
[2m[36m(_objective pid=13929)[0m 
 16%|█▌        | 3/19 [00:00<00:00, 23.32it/s][A
[2m[36m(_objective pid=13929)[0m 
 32%|███▏      | 6/19 [00:00<00:01, 11.29it/s][A
[2m[36m(_objective pid=13929)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  9.83it/s][A
[2m[36m(_objective pid=13929)[0m 
 53%|█████▎    | 10/19 [00:00<00:00,  9.28it/s][A
[2m[36m(_objective pid=13929)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  9.19it/s][A
[2m[36m(_objective pid=13929)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  9.02it/s][A


Trial status: 6 TERMINATED | 1 RUNNING | 3 PENDING
Current time: 2023-09-21 17:36:45. Total running time: 29min 33s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00006   RUNNING          1.67381e-05                    2    2.81996                       32                                                                                      

[2m[36m(_objective pid=13929)[0m 
[2m[36m(_objective pid=13929)[0m  74%|███████▎  | 14/19 [00:01<00:00,  8.90it/s][A
[2m[36m(_objective pid=13929)[0m 
 79%|███████▉  | 15/19 [00:01<00:00,  8.76it/s][A
[2m[36m(_objective pid=13929)[0m 
 84%|████████▍ | 16/19 [00:01<00:00,  8.27it/s][A
[2m[36m(_objective pid=13929)[0m 
 89%|████████▉ | 17/19 [00:01<00:00,  8.47it/s][A
[2m[36m(_objective pid=13929)[0m 
 95%|█████████▍| 18/19 [00:01<00:00,  8.49it/s][A
[2m[36m(_objective pid=13929)[0m 
100%|██████████| 19/19 [00:02<00:00,  8.52it/s][A


Trial _objective_4df1a_00006 finished iteration 1 at 2023-09-21 17:36:45. Total running time: 29min 34s
+-------------------------------------------------+
| Trial _objective_4df1a_00006 result             |
+-------------------------------------------------+
| time_this_iter_s                        62.8989 |
| time_total_s                            62.8989 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.72372 |
| eval_runtime                             2.2434 |
| eval_samples_per_second                 135.065 |
| eval_steps_per_second                     8.469 |
| objective                               0.72372 |
+-------------------------------------------------+

[2m[36m(_objective pid=13929)[0m {'eval_loss': 0.7237247228622437, 'eval_runtime': 2.2434, 'eval_samples_per_second': 135.065, 'eval_steps_per_second': 8.469, 'epoch': 1.0}


[2m[36m(_objective pid=13929)[0m                                                 
[2m[36m(_objective pid=13929)[0m                                                [A 50%|█████     | 86/172 [00:57<00:41,  2.08it/s]
[2m[36m(_objective pid=13929)[0m 100%|██████████| 19/19 [00:02<00:00,  8.52it/s][A
                                               [A
 51%|█████     | 87/172 [01:03<03:55,  2.77s/it]
 51%|█████     | 88/172 [01:03<02:58,  2.13s/it]
 52%|█████▏    | 89/172 [01:04<02:19,  1.68s/it]
 52%|█████▏    | 90/172 [01:05<01:51,  1.36s/it]
 53%|█████▎    | 91/172 [01:05<01:32,  1.14s/it]
 53%|█████▎    | 92/172 [01:06<01:19,  1.01it/s]
 54%|█████▍    | 93/172 [01:06<01:09,  1.13it/s]
 55%|█████▍    | 94/172 [01:07<01:02,  1.24it/s]
 55%|█████▌    | 95/172 [01:08<00:57,  1.33it/s]
 56%|█████▌    | 96/172 [01:08<00:54,  1.40it/s]
 56%|█████▋    | 97/172 [01:09<00:51,  1.45it/s]
 57%|█████▋    | 98/172 [01:10<00:49,  1.49it/s]
 58%|█████▊    | 99/172 [01:10<00:48,  1.52it/s]


Trial status: 6 TERMINATED | 1 RUNNING | 3 PENDING
Current time: 2023-09-21 17:37:15. Total running time: 30min 3s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00006   RUNNING          1.67381e-05                    2    2.81996                       32        1            62.8989      0.723725      0.723725           2.2434              

 72%|███████▏  | 124/172 [01:27<00:30,  1.57it/s]
 73%|███████▎  | 125/172 [01:27<00:29,  1.57it/s]
 73%|███████▎  | 126/172 [01:28<00:29,  1.57it/s]
 74%|███████▍  | 127/172 [01:28<00:28,  1.57it/s]
 74%|███████▍  | 128/172 [01:29<00:28,  1.57it/s]
 75%|███████▌  | 129/172 [01:30<00:27,  1.56it/s]
 76%|███████▌  | 130/172 [01:30<00:26,  1.56it/s]
 76%|███████▌  | 131/172 [01:31<00:26,  1.56it/s]
 77%|███████▋  | 132/172 [01:32<00:25,  1.56it/s]
 77%|███████▋  | 133/172 [01:32<00:25,  1.56it/s]
 78%|███████▊  | 134/172 [01:33<00:24,  1.55it/s]
 78%|███████▊  | 135/172 [01:34<00:23,  1.55it/s]
 79%|███████▉  | 136/172 [01:34<00:23,  1.55it/s]
 80%|███████▉  | 137/172 [01:35<00:22,  1.55it/s]
 80%|████████  | 138/172 [01:36<00:22,  1.54it/s]
 81%|████████  | 139/172 [01:36<00:21,  1.55it/s]
 81%|████████▏ | 140/172 [01:37<00:20,  1.55it/s]
 82%|████████▏ | 141/172 [01:38<00:19,  1.55it/s]
 83%|████████▎ | 142/172 [01:38<00:19,  1.55it/s]
 83%|████████▎ | 143/172 [01:39<00:18,  1.56it/s]


Trial status: 6 TERMINATED | 1 RUNNING | 3 PENDING
Current time: 2023-09-21 17:37:45. Total running time: 30min 33s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00006   RUNNING          1.67381e-05                    2    2.81996                       32        1            62.8989      0.723725      0.723725           2.2434             

[2m[36m(_objective pid=13929)[0m  99%|█████████▉| 171/172 [01:57<00:00,  1.55it/s]
[2m[36m(_objective pid=13929)[0m                                                  100%|██████████| 172/172 [01:57<00:00,  1.55it/s]
[2m[36m(_objective pid=13929)[0m   0%|          | 0/19 [00:00<?, ?it/s][A


[2m[36m(_objective pid=13929)[0m {'loss': 0.7149, 'learning_rate': 1.0576600865780209e-07, 'epoch': 2.0}


[2m[36m(_objective pid=13929)[0m 
 11%|█         | 2/19 [00:00<00:00, 17.97it/s][A
[2m[36m(_objective pid=13929)[0m 
 21%|██        | 4/19 [00:00<00:01, 10.75it/s][A
[2m[36m(_objective pid=13929)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.14it/s][A
[2m[36m(_objective pid=13929)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  9.04it/s][A
[2m[36m(_objective pid=13929)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.94it/s][A
[2m[36m(_objective pid=13929)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.86it/s][A
[2m[36m(_objective pid=13929)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.78it/s][A
[2m[36m(_objective pid=13929)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.69it/s][A
[2m[36m(_objective pid=13929)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.58it/s][A
[2m[36m(_objective pid=13929)[0m 
 74%|███████▎  | 14/19 [00:01<00:00,  8.45it/s][A
[2m[36m(_objective pid=13929)[0m 
 79%|███████▉  | 15/19 [00:01<00:00,  8.56it/s][A
[2m[36m(_objective pid=13929)[0m 


Trial _objective_4df1a_00006 finished iteration 2 at 2023-09-21 17:37:48. Total running time: 30min 36s
+-------------------------------------------------+
| Trial _objective_4df1a_00006 result             |
+-------------------------------------------------+
| time_this_iter_s                        62.3345 |
| time_total_s                            125.233 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                               0.71928 |
| eval_runtime                             2.2424 |
| eval_samples_per_second                 135.123 |
| eval_steps_per_second                     8.473 |
| objective                               0.71928 |
+-------------------------------------------------+

[2m[36m(_objective pid=13929)[0m {'eval_loss': 0.7192806005477905, 'eval_runtime': 2.2424, 'eval_samples_per_second': 135.123, 'eval_steps_per_second': 8.473, 'epoch': 2.0}
Trial _objective_4df1a_00006 completed afte

[2m[36m(_objective pid=13929)[0m                                                  100%|██████████| 172/172 [02:05<00:00,  1.55it/s]100%|██████████| 172/172 [02:05<00:00,  1.38it/s]


Trial _objective_4df1a_00007 started with configuration:
+------------------------------------------------+
| Trial _objective_4df1a_00007 config            |
+------------------------------------------------+
| learning_rate                            1e-05 |
| num_train_epochs                             3 |
| per_device_train_batch_size                 32 |
| seed                                    15.916 |
+------------------------------------------------+



[2m[36m(_objective pid=14526)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
[2m[36m(_objective pid=14526)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/258 [00:00<?, ?it/s]
  0%|          | 1/258 [00:00<03:27,  1.24it/s]
  1%|          | 2/258 [00:01<02:53,  1.48it/s]
  1%|          | 3/258 [00:02<02:48,  1.51it/s]
  2%|▏         | 4/258 [00:02<02:44,  1.55it/s]
  2%|▏         | 5/258 [00:03<02:41,  1.57it/s]
  2%|▏         | 6/258 [00:03<02:39,  1.58it/s]
  3%|▎         | 7/258 [00:04<02:38,  1.58it/s]
  3%|▎         | 8/258 [00:05<02:37,  1.58it/s]
  3%|▎         | 9/258 [00:05<02:37,  1.59it/s]
  4%|▍         | 10/258 [00:06<02:36,  1.59it/s]
  4%|▍         | 11/258 [00:07<02:36,  1.58it/s]


Trial status: 7 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2023-09-21 17:38:15. Total running time: 31min 3s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00007   RUNNING          5.4041e-06                     3   15.916                         32                                                                                       

  5%|▍         | 12/258 [00:07<02:35,  1.58it/s]
  5%|▌         | 13/258 [00:08<02:34,  1.58it/s]
  5%|▌         | 14/258 [00:08<02:33,  1.58it/s]
  6%|▌         | 15/258 [00:09<02:33,  1.58it/s]
  6%|▌         | 16/258 [00:10<02:32,  1.58it/s]
  7%|▋         | 17/258 [00:10<02:32,  1.58it/s]
  7%|▋         | 18/258 [00:11<02:32,  1.58it/s]
  7%|▋         | 19/258 [00:12<02:31,  1.58it/s]
  8%|▊         | 20/258 [00:12<02:30,  1.58it/s]
  8%|▊         | 21/258 [00:13<02:30,  1.58it/s]
  9%|▊         | 22/258 [00:14<02:30,  1.57it/s]
  9%|▉         | 23/258 [00:14<02:29,  1.57it/s]
  9%|▉         | 24/258 [00:15<02:29,  1.57it/s]
 10%|▉         | 25/258 [00:15<02:28,  1.57it/s]
 10%|█         | 26/258 [00:16<02:28,  1.57it/s]
 10%|█         | 27/258 [00:17<02:26,  1.57it/s]
 11%|█         | 28/258 [00:17<02:26,  1.57it/s]
 11%|█         | 29/258 [00:18<02:26,  1.56it/s]
 12%|█▏        | 30/258 [00:19<02:26,  1.56it/s]
 12%|█▏        | 31/258 [00:19<02:25,  1.56it/s]
 12%|█▏        | 32/

Trial status: 7 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2023-09-21 17:38:45. Total running time: 31min 33s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00007   RUNNING          5.4041e-06                     3   15.916                         32                                                                                      

 23%|██▎       | 59/258 [00:38<02:11,  1.52it/s]
 23%|██▎       | 60/258 [00:38<02:10,  1.52it/s]
 24%|██▎       | 61/258 [00:39<02:09,  1.52it/s]
 24%|██▍       | 62/258 [00:40<02:09,  1.52it/s]
 24%|██▍       | 63/258 [00:40<02:08,  1.52it/s]
 25%|██▍       | 64/258 [00:41<02:07,  1.52it/s]
 25%|██▌       | 65/258 [00:42<02:06,  1.53it/s]
 26%|██▌       | 66/258 [00:42<02:05,  1.53it/s]
 26%|██▌       | 67/258 [00:43<02:04,  1.53it/s]
 26%|██▋       | 68/258 [00:44<02:04,  1.53it/s]
 27%|██▋       | 69/258 [00:44<02:03,  1.53it/s]
 27%|██▋       | 70/258 [00:45<02:02,  1.54it/s]
 28%|██▊       | 71/258 [00:45<02:01,  1.54it/s]
 28%|██▊       | 72/258 [00:46<02:00,  1.54it/s]
 28%|██▊       | 73/258 [00:47<01:59,  1.54it/s]
 29%|██▊       | 74/258 [00:47<01:59,  1.54it/s]
 29%|██▉       | 75/258 [00:48<01:58,  1.54it/s]
 29%|██▉       | 76/258 [00:49<01:57,  1.55it/s]
 30%|██▉       | 77/258 [00:49<01:57,  1.54it/s]
 30%|███       | 78/258 [00:50<01:56,  1.55it/s]
 31%|███       | 79/

[2m[36m(_objective pid=14526)[0m {'loss': 0.7036, 'learning_rate': 1.7073950459209077e-08, 'epoch': 1.0}


[2m[36m(_objective pid=14526)[0m 
 16%|█▌        | 3/19 [00:00<00:00, 22.47it/s][A
[2m[36m(_objective pid=14526)[0m 
 32%|███▏      | 6/19 [00:00<00:01, 11.42it/s][A
[2m[36m(_objective pid=14526)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  9.71it/s][A
[2m[36m(_objective pid=14526)[0m 
 53%|█████▎    | 10/19 [00:00<00:00,  9.34it/s][A
[2m[36m(_objective pid=14526)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  9.11it/s][A
[2m[36m(_objective pid=14526)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  9.06it/s][A
[2m[36m(_objective pid=14526)[0m 
 74%|███████▎  | 14/19 [00:01<00:00,  8.97it/s][A
[2m[36m(_objective pid=14526)[0m 
 79%|███████▉  | 15/19 [00:01<00:00,  8.82it/s][A
[2m[36m(_objective pid=14526)[0m 
 84%|████████▍ | 16/19 [00:01<00:00,  8.50it/s][A
[2m[36m(_objective pid=14526)[0m 
 89%|████████▉ | 17/19 [00:01<00:00,  8.40it/s][A
[2m[36m(_objective pid=14526)[0m 
 95%|█████████▍| 18/19 [00:01<00:00,  8.26it/s][A
[2m[36m(_objective pid=14526)[0m

[2m[36m(_objective pid=14526)[0m {'eval_loss': 0.6988958120346069, 'eval_runtime': 2.2409, 'eval_samples_per_second': 135.216, 'eval_steps_per_second': 8.479, 'epoch': 1.0}
Trial _objective_4df1a_00007 finished iteration 1 at 2023-09-21 17:39:04. Total running time: 31min 53s
+-------------------------------------------------+
| Trial _objective_4df1a_00007 result             |
+-------------------------------------------------+
| time_this_iter_s                        62.4332 |
| time_total_s                            62.4332 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                                0.6989 |
| eval_runtime                             2.2409 |
| eval_samples_per_second                 135.216 |
| eval_steps_per_second                     8.479 |
| objective                                0.6989 |
+-------------------------------------------------+



 34%|███▎      | 87/258 [01:03<07:48,  2.74s/it]
 34%|███▍      | 88/258 [01:03<05:57,  2.10s/it]
 34%|███▍      | 89/258 [01:04<04:40,  1.66s/it]
 35%|███▍      | 90/258 [01:04<03:47,  1.35s/it]
 35%|███▌      | 91/258 [01:05<03:09,  1.13s/it]
 36%|███▌      | 92/258 [01:06<02:42,  1.02it/s]
 36%|███▌      | 93/258 [01:06<02:24,  1.14it/s]
 36%|███▋      | 94/258 [01:07<02:11,  1.25it/s]


Trial status: 7 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2023-09-21 17:39:15. Total running time: 32min 3s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00007   RUNNING          5.4041e-06                     3   15.916                         32        1            62.4332      0.698896      0.698896           2.2409              

 37%|███▋      | 95/258 [01:08<02:02,  1.33it/s]
 37%|███▋      | 96/258 [01:08<01:55,  1.40it/s]
 38%|███▊      | 97/258 [01:09<01:51,  1.45it/s]
 38%|███▊      | 98/258 [01:10<01:48,  1.48it/s]
 38%|███▊      | 99/258 [01:10<01:45,  1.50it/s]
 39%|███▉      | 100/258 [01:11<01:43,  1.53it/s]
 39%|███▉      | 101/258 [01:11<01:41,  1.54it/s]
 40%|███▉      | 102/258 [01:12<01:40,  1.55it/s]
 40%|███▉      | 103/258 [01:13<01:39,  1.56it/s]
 40%|████      | 104/258 [01:13<01:38,  1.56it/s]
 41%|████      | 105/258 [01:14<01:37,  1.57it/s]
 41%|████      | 106/258 [01:15<01:36,  1.57it/s]
 41%|████▏     | 107/258 [01:15<01:35,  1.58it/s]
 42%|████▏     | 108/258 [01:16<01:34,  1.58it/s]
 42%|████▏     | 109/258 [01:17<01:34,  1.58it/s]
 43%|████▎     | 110/258 [01:17<01:33,  1.58it/s]
 43%|████▎     | 111/258 [01:18<01:32,  1.58it/s]
 43%|████▎     | 112/258 [01:18<01:32,  1.58it/s]
 44%|████▍     | 113/258 [01:19<01:31,  1.58it/s]
 44%|████▍     | 114/258 [01:20<01:31,  1.58it/s]
 45%|

Trial status: 7 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2023-09-21 17:39:45. Total running time: 32min 33s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00007   RUNNING          5.4041e-06                     3   15.916                         32        1            62.4332      0.698896      0.698896           2.2409             

 55%|█████▌    | 142/258 [01:38<01:15,  1.55it/s]
 55%|█████▌    | 143/258 [01:38<01:14,  1.55it/s]
 56%|█████▌    | 144/258 [01:39<01:13,  1.55it/s]
 56%|█████▌    | 145/258 [01:40<01:12,  1.55it/s]
 57%|█████▋    | 146/258 [01:40<01:12,  1.55it/s]
 57%|█████▋    | 147/258 [01:41<01:11,  1.55it/s]
 57%|█████▋    | 148/258 [01:41<01:10,  1.55it/s]
 58%|█████▊    | 149/258 [01:42<01:10,  1.55it/s]
 58%|█████▊    | 150/258 [01:43<01:09,  1.55it/s]
 59%|█████▊    | 151/258 [01:43<01:09,  1.55it/s]
 59%|█████▉    | 152/258 [01:44<01:08,  1.55it/s]
 59%|█████▉    | 153/258 [01:45<01:07,  1.55it/s]
 60%|█████▉    | 154/258 [01:45<01:07,  1.55it/s]
 60%|██████    | 155/258 [01:46<01:06,  1.55it/s]
 60%|██████    | 156/258 [01:47<01:05,  1.55it/s]
 61%|██████    | 157/258 [01:47<01:05,  1.55it/s]
 61%|██████    | 158/258 [01:48<01:04,  1.55it/s]
 62%|██████▏   | 159/258 [01:49<01:03,  1.55it/s]
 62%|██████▏   | 160/258 [01:49<01:03,  1.55it/s]
 62%|██████▏   | 161/258 [01:50<01:02,  1.55it/s]


[2m[36m(_objective pid=14526)[0m {'loss': 0.7044, 'learning_rate': 3.4147900918418155e-08, 'epoch': 2.0}


[2m[36m(_objective pid=14526)[0m 
 11%|█         | 2/19 [00:00<00:00, 18.17it/s][A
[2m[36m(_objective pid=14526)[0m 
 21%|██        | 4/19 [00:00<00:01, 10.46it/s][A
[2m[36m(_objective pid=14526)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.46it/s][A
[2m[36m(_objective pid=14526)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  8.89it/s][A
[2m[36m(_objective pid=14526)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.77it/s][A
[2m[36m(_objective pid=14526)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.75it/s][A
[2m[36m(_objective pid=14526)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.72it/s][A
[2m[36m(_objective pid=14526)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.58it/s][A
[2m[36m(_objective pid=14526)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.69it/s][A
[2m[36m(_objective pid=14526)[0m 
 74%|███████▎  | 14/19 [00:01<00:00,  8.60it/s][A
[2m[36m(_objective pid=14526)[0m 
 79%|███████▉  | 15/19 [00:01<00:00,  8.52it/s][A
[2m[36m(_objective pid=14526)[0m 


Trial _objective_4df1a_00007 finished iteration 2 at 2023-09-21 17:40:06. Total running time: 32min 55s
+-------------------------------------------------+
| Trial _objective_4df1a_00007 result             |
+-------------------------------------------------+
| time_this_iter_s                        61.7996 |
| time_total_s                            124.233 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                               0.69756 |
| eval_runtime                             2.2493 |
| eval_samples_per_second                 134.707 |
| eval_steps_per_second                     8.447 |
| objective                               0.69756 |
+-------------------------------------------------+

[2m[36m(_objective pid=14526)[0m {'eval_loss': 0.6975592374801636, 'eval_runtime': 2.2493, 'eval_samples_per_second': 134.707, 'eval_steps_per_second': 8.447, 'epoch': 2.0}
Trial status: 7 TERMINATED | 1 RUNNING | 2 

 67%|██████▋   | 173/258 [02:09<04:35,  3.24s/it]
 67%|██████▋   | 174/258 [02:09<03:37,  2.59s/it]
 68%|██████▊   | 175/258 [02:10<02:52,  2.08s/it]
 68%|██████▊   | 176/258 [02:11<02:18,  1.68s/it]
 69%|██████▊   | 177/258 [02:11<01:52,  1.39s/it]
 69%|██████▉   | 178/258 [02:12<01:33,  1.17s/it]
 69%|██████▉   | 179/258 [02:13<01:20,  1.01s/it]
 70%|██████▉   | 180/258 [02:13<01:10,  1.11it/s]
 70%|███████   | 181/258 [02:14<01:03,  1.22it/s]
 71%|███████   | 182/258 [02:15<00:58,  1.31it/s]
 71%|███████   | 183/258 [02:15<00:54,  1.38it/s]
 71%|███████▏  | 184/258 [02:16<00:51,  1.43it/s]
 72%|███████▏  | 185/258 [02:16<00:49,  1.47it/s]
 72%|███████▏  | 186/258 [02:17<00:48,  1.50it/s]
 72%|███████▏  | 187/258 [02:18<00:46,  1.51it/s]
 73%|███████▎  | 188/258 [02:18<00:45,  1.53it/s]
 73%|███████▎  | 189/258 [02:19<00:44,  1.54it/s]
 74%|███████▎  | 190/258 [02:20<00:43,  1.55it/s]
 74%|███████▍  | 191/258 [02:20<00:43,  1.55it/s]
 74%|███████▍  | 192/258 [02:21<00:42,  1.55it/s]


Trial status: 7 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2023-09-21 17:40:45. Total running time: 33min 33s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00007   RUNNING          5.4041e-06                     3   15.916                         32        2            124.233      0.697559      0.697559           2.2493             

 84%|████████▍ | 218/258 [02:38<00:26,  1.53it/s]
 85%|████████▍ | 219/258 [02:38<00:25,  1.53it/s]
 85%|████████▌ | 220/258 [02:39<00:24,  1.53it/s]
 86%|████████▌ | 221/258 [02:40<00:24,  1.54it/s]
 86%|████████▌ | 222/258 [02:40<00:23,  1.54it/s]
 86%|████████▋ | 223/258 [02:41<00:22,  1.54it/s]
 87%|████████▋ | 224/258 [02:42<00:22,  1.54it/s]
 87%|████████▋ | 225/258 [02:42<00:21,  1.54it/s]
 88%|████████▊ | 226/258 [02:43<00:20,  1.54it/s]
 88%|████████▊ | 227/258 [02:44<00:20,  1.55it/s]
 88%|████████▊ | 228/258 [02:44<00:19,  1.55it/s]
 89%|████████▉ | 229/258 [02:45<00:18,  1.55it/s]
 89%|████████▉ | 230/258 [02:46<00:18,  1.55it/s]
 90%|████████▉ | 231/258 [02:46<00:17,  1.55it/s]
 90%|████████▉ | 232/258 [02:47<00:16,  1.55it/s]
 90%|█████████ | 233/258 [02:47<00:16,  1.55it/s]
 91%|█████████ | 234/258 [02:48<00:15,  1.55it/s]
 91%|█████████ | 235/258 [02:49<00:14,  1.55it/s]
 91%|█████████▏| 236/258 [02:49<00:14,  1.55it/s]
 92%|█████████▏| 237/258 [02:50<00:13,  1.55it/s]


[2m[36m(_objective pid=14526)[0m {'loss': 0.7028, 'learning_rate': 5.1221851377627235e-08, 'epoch': 3.0}


[2m[36m(_objective pid=14526)[0m 
[2m[36m(_objective pid=14526)[0m   0%|          | 0/19 [00:00<?, ?it/s][A
[2m[36m(_objective pid=14526)[0m 
 11%|█         | 2/19 [00:00<00:00, 17.31it/s][A
[2m[36m(_objective pid=14526)[0m 
 21%|██        | 4/19 [00:00<00:01, 11.15it/s][A
[2m[36m(_objective pid=14526)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.68it/s][A
[2m[36m(_objective pid=14526)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  8.97it/s][A
[2m[36m(_objective pid=14526)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.89it/s][A
[2m[36m(_objective pid=14526)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.86it/s][A
[2m[36m(_objective pid=14526)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.85it/s][A
[2m[36m(_objective pid=14526)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.74it/s][A
[2m[36m(_objective pid=14526)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.70it/s][A
[2m[36m(_objective pid=14526)[0m 
 74%|███████▎  | 14/19 [00:01<00:00,  8.62it/s][A
[2m[36m

Trial _objective_4df1a_00007 finished iteration 3 at 2023-09-21 17:41:13. Total running time: 34min 1s
+-------------------------------------------------+
| Trial _objective_4df1a_00007 result             |
+-------------------------------------------------+
| time_this_iter_s                          66.53 |
| time_total_s                            190.763 |
| training_iteration                            3 |
| epoch                                         3 |
| eval_loss                               0.69537 |
| eval_runtime                             2.2238 |
| eval_samples_per_second                 136.251 |
| eval_steps_per_second                     8.544 |
| objective                               0.69537 |
+-------------------------------------------------+

[2m[36m(_objective pid=14526)[0m {'eval_loss': 0.6953747272491455, 'eval_runtime': 2.2238, 'eval_samples_per_second': 136.251, 'eval_steps_per_second': 8.544, 'epoch': 3.0}


[2m[36m(_objective pid=14526)[0m                                                  
[2m[36m(_objective pid=14526)[0m                                                [A100%|██████████| 258/258 [03:05<00:00,  1.57it/s]
[2m[36m(_objective pid=14526)[0m 100%|██████████| 19/19 [00:02<00:00,  8.59it/s][A
[2m[36m(_objective pid=14526)[0m                                                [A


Trial status: 7 TERMINATED | 1 RUNNING | 2 PENDING
Current time: 2023-09-21 17:41:15. Total running time: 34min 4s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00007   RUNNING          5.4041e-06                     3   15.916                         32        3            190.763      0.695375      0.695375           2.2238              

[2m[36m(_objective pid=14526)[0m                                                  100%|██████████| 258/258 [03:14<00:00,  1.57it/s]100%|██████████| 258/258 [03:14<00:00,  1.32it/s]


Trial _objective_4df1a_00008 started with configuration:
+-------------------------------------------------+
| Trial _objective_4df1a_00008 config             |
+-------------------------------------------------+
| learning_rate                             2e-05 |
| num_train_epochs                              3 |
| per_device_train_batch_size                  64 |
| seed                                    34.5377 |
+-------------------------------------------------+



[2m[36m(_objective pid=15399)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
[2m[36m(_objective pid=15399)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/129 [00:00<?, ?it/s]
  1%|          | 1/129 [00:01<02:51,  1.34s/it]
  2%|▏         | 2/129 [00:02<02:37,  1.24s/it]
  2%|▏         | 3/129 [00:03<02:33,  1.22s/it]
  3%|▎         | 4/129 [00:04<02:31,  1.21s/it]
  4%|▍         | 5/129 [00:06<02:30,  1.21s/it]
  5%|▍         | 6/129 [00:07<02:29,  1.21s/it]
  5%|▌         | 7/129 [00:08<02:27,  1.21s/it]


Trial status: 8 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-21 17:41:45. Total running time: 34min 34s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00008   RUNNING          1.53049e-05                    3   34.5377                        64                                                                                      

  6%|▌         | 8/129 [00:09<02:27,  1.22s/it]
  7%|▋         | 9/129 [00:10<02:26,  1.22s/it]
  8%|▊         | 10/129 [00:12<02:25,  1.22s/it]
  9%|▊         | 11/129 [00:13<02:24,  1.23s/it]
  9%|▉         | 12/129 [00:14<02:24,  1.23s/it]
 10%|█         | 13/129 [00:15<02:22,  1.23s/it]
 11%|█         | 14/129 [00:17<02:21,  1.23s/it]
 12%|█▏        | 15/129 [00:18<02:20,  1.23s/it]
 12%|█▏        | 16/129 [00:19<02:19,  1.23s/it]
 13%|█▎        | 17/129 [00:20<02:18,  1.24s/it]
 14%|█▍        | 18/129 [00:22<02:17,  1.24s/it]
 15%|█▍        | 19/129 [00:23<02:16,  1.24s/it]
 16%|█▌        | 20/129 [00:24<02:15,  1.25s/it]
 16%|█▋        | 21/129 [00:25<02:14,  1.25s/it]
 17%|█▋        | 22/129 [00:27<02:13,  1.25s/it]
 18%|█▊        | 23/129 [00:28<02:12,  1.25s/it]
 19%|█▊        | 24/129 [00:29<02:12,  1.26s/it]
 19%|█▉        | 25/129 [00:30<02:10,  1.26s/it]
 20%|██        | 26/129 [00:32<02:09,  1.26s/it]
 21%|██        | 27/129 [00:33<02:08,  1.26s/it]
 22%|██▏       | 28/12

Trial status: 8 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-21 17:42:15. Total running time: 35min 4s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00008   RUNNING          1.53049e-05                    3   34.5377                        64                                                                                       

 25%|██▍       | 32/129 [00:39<02:03,  1.27s/it]
 26%|██▌       | 33/129 [00:41<02:02,  1.27s/it]
 26%|██▋       | 34/129 [00:42<02:00,  1.27s/it]
 27%|██▋       | 35/129 [00:43<01:59,  1.27s/it]
 28%|██▊       | 36/129 [00:44<01:58,  1.27s/it]
 29%|██▊       | 37/129 [00:46<01:57,  1.28s/it]
 29%|██▉       | 38/129 [00:47<01:55,  1.27s/it]
 30%|███       | 39/129 [00:48<01:54,  1.27s/it]
 31%|███       | 40/129 [00:49<01:52,  1.27s/it]
 32%|███▏      | 41/129 [00:51<01:51,  1.26s/it]
 33%|███▎      | 42/129 [00:52<01:49,  1.26s/it]


[2m[36m(_objective pid=15399)[0m {'loss': 0.6987, 'learning_rate': 2.4177393138822726e-08, 'epoch': 1.0}


[2m[36m(_objective pid=15399)[0m  33%|███▎      | 43/129 [00:53<01:34,  1.10s/it]                                                 33%|███▎      | 43/129 [00:53<01:34,  1.10s/it]
[2m[36m(_objective pid=15399)[0m 
  0%|          | 0/19 [00:00<?, ?it/s][A
[2m[36m(_objective pid=15399)[0m 
 16%|█▌        | 3/19 [00:00<00:00, 20.92it/s][A
[2m[36m(_objective pid=15399)[0m 
 32%|███▏      | 6/19 [00:00<00:01, 11.12it/s][A
[2m[36m(_objective pid=15399)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  9.90it/s][A
[2m[36m(_objective pid=15399)[0m 
 53%|█████▎    | 10/19 [00:00<00:00,  9.27it/s][A
[2m[36m(_objective pid=15399)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  9.20it/s][A
[2m[36m(_objective pid=15399)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.98it/s][A
[2m[36m(_objective pid=15399)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.87it/s][A
[2m[36m(_objective pid=15399)[0m 
 74%|███████▎  | 14/19 [00:01<00:00,  8.61it/s][A
[2m[36m(_objective pid=15399)[0m

Trial _objective_4df1a_00008 finished iteration 1 at 2023-09-21 17:42:32. Total running time: 35min 20s
+-------------------------------------------------+
| Trial _objective_4df1a_00008 result             |
+-------------------------------------------------+
| time_this_iter_s                        61.6544 |
| time_total_s                            61.6544 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.69696 |
| eval_runtime                             2.2619 |
| eval_samples_per_second                  133.96 |
| eval_steps_per_second                       8.4 |
| objective                               0.69696 |
+-------------------------------------------------+

[2m[36m(_objective pid=15399)[0m {'eval_loss': 0.6969596147537231, 'eval_runtime': 2.2619, 'eval_samples_per_second': 133.96, 'eval_steps_per_second': 8.4, 'epoch': 1.0}


[2m[36m(_objective pid=15399)[0m                                                 
[2m[36m(_objective pid=15399)[0m                                                [A 33%|███▎      | 43/129 [00:55<01:34,  1.10s/it]
[2m[36m(_objective pid=15399)[0m 100%|██████████| 19/19 [00:02<00:00,  8.56it/s][A
                                               [A
 34%|███▍      | 44/129 [01:01<04:40,  3.30s/it]
 35%|███▍      | 45/129 [01:02<03:45,  2.68s/it]
 36%|███▌      | 46/129 [01:04<03:06,  2.25s/it]
 36%|███▋      | 47/129 [01:05<02:39,  1.94s/it]
 37%|███▋      | 48/129 [01:06<02:19,  1.73s/it]
 38%|███▊      | 49/129 [01:07<02:05,  1.57s/it]
 39%|███▉      | 50/129 [01:09<01:56,  1.47s/it]


Trial status: 8 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-21 17:42:45. Total running time: 35min 34s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00008   RUNNING          1.53049e-05                    3   34.5377                        64        1            61.6544      0.69696       0.69696            2.2619             

 40%|███▉      | 51/129 [01:10<01:48,  1.40s/it]
 40%|████      | 52/129 [01:11<01:43,  1.35s/it]
 41%|████      | 53/129 [01:12<01:39,  1.31s/it]
 42%|████▏     | 54/129 [01:13<01:36,  1.28s/it]
 43%|████▎     | 55/129 [01:15<01:33,  1.27s/it]
 43%|████▎     | 56/129 [01:16<01:32,  1.26s/it]
 44%|████▍     | 57/129 [01:17<01:30,  1.25s/it]
 45%|████▍     | 58/129 [01:18<01:28,  1.25s/it]
 46%|████▌     | 59/129 [01:20<01:27,  1.24s/it]
 47%|████▋     | 60/129 [01:21<01:25,  1.24s/it]
 47%|████▋     | 61/129 [01:22<01:24,  1.24s/it]
 48%|████▊     | 62/129 [01:23<01:22,  1.24s/it]
 49%|████▉     | 63/129 [01:25<01:21,  1.23s/it]
 50%|████▉     | 64/129 [01:26<01:20,  1.23s/it]
 50%|█████     | 65/129 [01:27<01:18,  1.23s/it]
 51%|█████     | 66/129 [01:28<01:17,  1.23s/it]
 52%|█████▏    | 67/129 [01:29<01:16,  1.23s/it]
 53%|█████▎    | 68/129 [01:31<01:15,  1.23s/it]
 53%|█████▎    | 69/129 [01:32<01:14,  1.23s/it]
 54%|█████▍    | 70/129 [01:33<01:12,  1.24s/it]
 55%|█████▌    | 71/

Trial status: 8 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-21 17:43:16. Total running time: 36min 4s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00008   RUNNING          1.53049e-05                    3   34.5377                        64        1            61.6544      0.69696       0.69696            2.2619              

 58%|█████▊    | 75/129 [01:39<01:06,  1.24s/it]
 59%|█████▉    | 76/129 [01:41<01:05,  1.24s/it]
 60%|█████▉    | 77/129 [01:42<01:04,  1.24s/it]
 60%|██████    | 78/129 [01:43<01:03,  1.24s/it]
 61%|██████    | 79/129 [01:44<01:02,  1.24s/it]
 62%|██████▏   | 80/129 [01:46<01:00,  1.24s/it]
 63%|██████▎   | 81/129 [01:47<00:59,  1.24s/it]
 64%|██████▎   | 82/129 [01:48<00:58,  1.24s/it]
 64%|██████▍   | 83/129 [01:49<00:57,  1.24s/it]
 65%|██████▌   | 84/129 [01:51<00:55,  1.24s/it]
 66%|██████▌   | 85/129 [01:52<00:54,  1.24s/it]


[2m[36m(_objective pid=15399)[0m {'loss': 0.6979, 'learning_rate': 4.835478627764545e-08, 'epoch': 2.0}


[2m[36m(_objective pid=15399)[0m  67%|██████▋   | 86/129 [01:53<00:46,  1.09s/it]                                                 67%|██████▋   | 86/129 [01:53<00:46,  1.09s/it]
[2m[36m(_objective pid=15399)[0m   0%|          | 0/19 [00:00<?, ?it/s][A
[2m[36m(_objective pid=15399)[0m 
 11%|█         | 2/19 [00:00<00:00, 18.32it/s][A
[2m[36m(_objective pid=15399)[0m 
 21%|██        | 4/19 [00:00<00:01, 10.60it/s][A
[2m[36m(_objective pid=15399)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.42it/s][A
[2m[36m(_objective pid=15399)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  9.01it/s][A
[2m[36m(_objective pid=15399)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.70it/s][A
[2m[36m(_objective pid=15399)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.78it/s][A
[2m[36m(_objective pid=15399)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.71it/s][A
[2m[36m(_objective pid=15399)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.71it/s][A
[2m[36m(_objective pid=15399)[0m 


Trial _objective_4df1a_00008 finished iteration 2 at 2023-09-21 17:43:31. Total running time: 36min 20s
+-------------------------------------------------+
| Trial _objective_4df1a_00008 result             |
+-------------------------------------------------+
| time_this_iter_s                        59.7861 |
| time_total_s                            121.441 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                               0.69592 |
| eval_runtime                             2.2504 |
| eval_samples_per_second                 134.641 |
| eval_steps_per_second                     8.443 |
| objective                               0.69592 |
+-------------------------------------------------+

[2m[36m(_objective pid=15399)[0m {'eval_loss': 0.6959233283996582, 'eval_runtime': 2.2504, 'eval_samples_per_second': 134.641, 'eval_steps_per_second': 8.443, 'epoch': 2.0}


 67%|██████▋   | 87/129 [02:01<02:19,  3.33s/it]
 68%|██████▊   | 88/129 [02:02<01:50,  2.70s/it]
 69%|██████▉   | 89/129 [02:04<01:30,  2.26s/it]
 70%|██████▉   | 90/129 [02:05<01:15,  1.95s/it]
 71%|███████   | 91/129 [02:06<01:05,  1.73s/it]
 71%|███████▏  | 92/129 [02:07<00:58,  1.58s/it]
 72%|███████▏  | 93/129 [02:08<00:53,  1.48s/it]


Trial status: 8 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-21 17:43:46. Total running time: 36min 34s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00008   RUNNING          1.53049e-05                    3   34.5377                        64        2            121.441      0.695923      0.695923           2.2504             

 73%|███████▎  | 94/129 [02:10<00:49,  1.41s/it]
 74%|███████▎  | 95/129 [02:11<00:46,  1.36s/it]
 74%|███████▍  | 96/129 [02:12<00:43,  1.32s/it]
 75%|███████▌  | 97/129 [02:13<00:41,  1.29s/it]
 76%|███████▌  | 98/129 [02:15<00:39,  1.27s/it]
 77%|███████▋  | 99/129 [02:16<00:37,  1.26s/it]
 78%|███████▊  | 100/129 [02:17<00:36,  1.25s/it]
 78%|███████▊  | 101/129 [02:18<00:35,  1.25s/it]
 79%|███████▉  | 102/129 [02:20<00:33,  1.25s/it]
 80%|███████▉  | 103/129 [02:21<00:32,  1.25s/it]
 81%|████████  | 104/129 [02:22<00:31,  1.25s/it]
 81%|████████▏ | 105/129 [02:23<00:29,  1.25s/it]
 82%|████████▏ | 106/129 [02:25<00:28,  1.25s/it]
 83%|████████▎ | 107/129 [02:26<00:27,  1.25s/it]
 84%|████████▎ | 108/129 [02:27<00:26,  1.25s/it]
 84%|████████▍ | 109/129 [02:28<00:25,  1.25s/it]
 85%|████████▌ | 110/129 [02:30<00:23,  1.25s/it]
 86%|████████▌ | 111/129 [02:31<00:22,  1.25s/it]
 87%|████████▋ | 112/129 [02:32<00:21,  1.25s/it]
 88%|████████▊ | 113/129 [02:33<00:20,  1.25s/it]
 88%|█

Trial status: 8 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2023-09-21 17:44:16. Total running time: 37min 4s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00008   RUNNING          1.53049e-05                    3   34.5377                        64        2            121.441      0.695923      0.695923           2.2504              

 91%|█████████▏| 118/129 [02:40<00:13,  1.25s/it]
 92%|█████████▏| 119/129 [02:41<00:12,  1.25s/it]
 93%|█████████▎| 120/129 [02:42<00:11,  1.25s/it]
 94%|█████████▍| 121/129 [02:43<00:09,  1.24s/it]
 95%|█████████▍| 122/129 [02:45<00:08,  1.24s/it]
 95%|█████████▌| 123/129 [02:46<00:07,  1.24s/it]
 96%|█████████▌| 124/129 [02:47<00:06,  1.24s/it]
 97%|█████████▋| 125/129 [02:48<00:04,  1.24s/it]
 98%|█████████▊| 126/129 [02:50<00:03,  1.24s/it]
 98%|█████████▊| 127/129 [02:51<00:02,  1.24s/it]
 99%|█████████▉| 128/129 [02:52<00:01,  1.24s/it]


[2m[36m(_objective pid=15399)[0m {'loss': 0.696, 'learning_rate': 7.253217941646817e-08, 'epoch': 3.0}


[2m[36m(_objective pid=15399)[0m 100%|██████████| 129/129 [02:53<00:00,  1.08s/it]                                                 100%|██████████| 129/129 [02:53<00:00,  1.08s/it]
[2m[36m(_objective pid=15399)[0m 
  0%|          | 0/19 [00:00<?, ?it/s][A
[2m[36m(_objective pid=15399)[0m 
 11%|█         | 2/19 [00:00<00:00, 19.04it/s][A
[2m[36m(_objective pid=15399)[0m 
 21%|██        | 4/19 [00:00<00:01, 10.97it/s][A
[2m[36m(_objective pid=15399)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.54it/s][A
[2m[36m(_objective pid=15399)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  9.10it/s][A
[2m[36m(_objective pid=15399)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.87it/s][A
[2m[36m(_objective pid=15399)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.78it/s][A
[2m[36m(_objective pid=15399)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.83it/s][A
[2m[36m(_objective pid=15399)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.78it/s][A
[2m[36m(_objective pid=15399)[0

Trial _objective_4df1a_00008 finished iteration 3 at 2023-09-21 17:44:32. Total running time: 37min 20s
+-------------------------------------------------+
| Trial _objective_4df1a_00008 result             |
+-------------------------------------------------+
| time_this_iter_s                        60.1608 |
| time_total_s                            181.601 |
| training_iteration                            3 |
| epoch                                         3 |
| eval_loss                               0.69414 |
| eval_runtime                             2.2274 |
| eval_samples_per_second                 136.035 |
| eval_steps_per_second                      8.53 |
| objective                               0.69414 |
+-------------------------------------------------+

[2m[36m(_objective pid=15399)[0m {'eval_loss': 0.6941379904747009, 'eval_runtime': 2.2274, 'eval_samples_per_second': 136.035, 'eval_steps_per_second': 8.53, 'epoch': 3.0}
Trial _objective_4df1a_00008 completed after

[2m[36m(_objective pid=15399)[0m                                                  100%|██████████| 129/129 [03:00<00:00,  1.08s/it]100%|██████████| 129/129 [03:00<00:00,  1.40s/it]


Trial _objective_4df1a_00009 started with configuration:
+-------------------------------------------------+
| Trial _objective_4df1a_00009 config             |
+-------------------------------------------------+
| learning_rate                             1e-05 |
| num_train_epochs                              2 |
| per_device_train_batch_size                  32 |
| seed                                    38.0065 |
+-------------------------------------------------+

Trial status: 9 TERMINATED | 1 RUNNING
Current time: 2023-09-21 17:44:46. Total running time: 37min 34s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eva

[2m[36m(_objective pid=16214)[0m Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
[2m[36m(_objective pid=16214)[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/172 [00:00<?, ?it/s]
  1%|          | 1/172 [00:00<02:25,  1.18it/s]
  1%|          | 2/172 [00:01<01:57,  1.44it/s]
  2%|▏         | 3/172 [00:02<01:53,  1.49it/s]
  2%|▏         | 4/172 [00:02<01:49,  1.54it/s]
  3%|▎         | 5/172 [00:03<01:47,  1.56it/s]
  3%|▎         | 6/172 [00:03<01:45,  1.57it/s]
  4%|▍         | 7/172 [00:04<01:44,  1.58it/s]
  5%|▍         | 8/172 [00:05<01:43,  1.58it/s]
  5%|▌         | 9/172 [00:05<01:42,  1.59it/s]
  6%|▌         | 10/172 [00:06<01:41,  1.59it/s]
  6%|▋         | 11/172 [00:07<01:40,  1.60it/s]
  7%|▋         | 12/172 [00:07<01:40,  1.59it/s]
  8%|▊         |

Trial status: 9 TERMINATED | 1 RUNNING
Current time: 2023-09-21 17:45:16. Total running time: 38min 4s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00009   RUNNING          7.96157e-06                    2   38.0065                        32                                                                                                   

 23%|██▎       | 40/172 [00:25<01:25,  1.55it/s]
 24%|██▍       | 41/172 [00:26<01:24,  1.55it/s]
 24%|██▍       | 42/172 [00:26<01:24,  1.54it/s]
 25%|██▌       | 43/172 [00:27<01:23,  1.54it/s]
 26%|██▌       | 44/172 [00:28<01:22,  1.54it/s]
 26%|██▌       | 45/172 [00:28<01:22,  1.54it/s]
 27%|██▋       | 46/172 [00:29<01:21,  1.54it/s]
 27%|██▋       | 47/172 [00:30<01:21,  1.54it/s]
 28%|██▊       | 48/172 [00:30<01:20,  1.54it/s]
 28%|██▊       | 49/172 [00:31<01:20,  1.53it/s]
 29%|██▉       | 50/172 [00:32<01:19,  1.53it/s]
 30%|██▉       | 51/172 [00:32<01:19,  1.53it/s]
 30%|███       | 52/172 [00:33<01:18,  1.53it/s]
 31%|███       | 53/172 [00:34<01:18,  1.52it/s]
 31%|███▏      | 54/172 [00:34<01:17,  1.52it/s]
 32%|███▏      | 55/172 [00:35<01:16,  1.52it/s]
 33%|███▎      | 56/172 [00:36<01:16,  1.52it/s]
 33%|███▎      | 57/172 [00:36<01:16,  1.51it/s]
 34%|███▎      | 58/172 [00:37<01:15,  1.52it/s]
 34%|███▍      | 59/172 [00:38<01:14,  1.52it/s]
 35%|███▍      | 60/

[2m[36m(_objective pid=16214)[0m {'loss': 0.7024, 'learning_rate': 2.5154102965224657e-08, 'epoch': 1.0}
Trial status: 9 TERMINATED | 1 RUNNING
Current time: 2023-09-21 17:45:46. Total running time: 38min 34s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00009   RUNNING          7.96157e-06                    2   38.0065                

[2m[36m(_objective pid=16214)[0m 
 16%|█▌        | 3/19 [00:00<00:00, 20.85it/s][A
[2m[36m(_objective pid=16214)[0m 
 32%|███▏      | 6/19 [00:00<00:01, 11.31it/s][A
[2m[36m(_objective pid=16214)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  9.85it/s][A
[2m[36m(_objective pid=16214)[0m 
 53%|█████▎    | 10/19 [00:00<00:00,  9.30it/s][A
[2m[36m(_objective pid=16214)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  9.36it/s][A
[2m[36m(_objective pid=16214)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  9.18it/s][A
[2m[36m(_objective pid=16214)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  9.04it/s][A
[2m[36m(_objective pid=16214)[0m 
 74%|███████▎  | 14/19 [00:01<00:00,  8.84it/s][A
[2m[36m(_objective pid=16214)[0m 
 79%|███████▉  | 15/19 [00:01<00:00,  8.73it/s][A
[2m[36m(_objective pid=16214)[0m 
 84%|████████▍ | 16/19 [00:01<00:00,  8.43it/s][A
[2m[36m(_objective pid=16214)[0m 
 89%|████████▉ | 17/19 [00:01<00:00,  8.37it/s][A
[2m[36m(_objective pid=16214)[0m

Trial _objective_4df1a_00009 finished iteration 1 at 2023-09-21 17:45:48. Total running time: 38min 36s
+-------------------------------------------------+
| Trial _objective_4df1a_00009 result             |
+-------------------------------------------------+
| time_this_iter_s                         62.522 |
| time_total_s                             62.522 |
| training_iteration                            1 |
| epoch                                         1 |
| eval_loss                               0.70596 |
| eval_runtime                             2.2512 |
| eval_samples_per_second                 134.597 |
| eval_steps_per_second                      8.44 |
| objective                               0.70596 |
+-------------------------------------------------+

[2m[36m(_objective pid=16214)[0m {'eval_loss': 0.7059649229049683, 'eval_runtime': 2.2512, 'eval_samples_per_second': 134.597, 'eval_steps_per_second': 8.44, 'epoch': 1.0}


[2m[36m(_objective pid=16214)[0m                                                 
[2m[36m(_objective pid=16214)[0m                                                [A 50%|█████     | 86/172 [00:57<00:42,  2.04it/s]
[2m[36m(_objective pid=16214)[0m 100%|██████████| 19/19 [00:02<00:00,  8.51it/s][A
                                               [A
 51%|█████     | 87/172 [01:03<03:50,  2.71s/it]
 51%|█████     | 88/172 [01:03<02:55,  2.09s/it]
 52%|█████▏    | 89/172 [01:04<02:16,  1.65s/it]
 52%|█████▏    | 90/172 [01:04<01:50,  1.34s/it]
 53%|█████▎    | 91/172 [01:05<01:31,  1.13s/it]
 53%|█████▎    | 92/172 [01:06<01:18,  1.02it/s]
 54%|█████▍    | 93/172 [01:06<01:09,  1.14it/s]
 55%|█████▍    | 94/172 [01:07<01:02,  1.25it/s]
 55%|█████▌    | 95/172 [01:08<00:57,  1.33it/s]
 56%|█████▌    | 96/172 [01:08<00:54,  1.40it/s]
 56%|█████▋    | 97/172 [01:09<00:51,  1.45it/s]
 57%|█████▋    | 98/172 [01:09<00:49,  1.49it/s]
 58%|█████▊    | 99/172 [01:10<00:48,  1.51it/s]


Trial status: 9 TERMINATED | 1 RUNNING
Current time: 2023-09-21 17:46:16. Total running time: 39min 4s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00009   RUNNING          7.96157e-06                    2   38.0065                        32        1             62.522      0.705965      0.705965           2.2512                  134.597 

 72%|███████▏  | 123/172 [01:25<00:31,  1.56it/s]
 72%|███████▏  | 124/172 [01:26<00:30,  1.56it/s]
 73%|███████▎  | 125/172 [01:27<00:30,  1.56it/s]
 73%|███████▎  | 126/172 [01:27<00:29,  1.56it/s]
 74%|███████▍  | 127/172 [01:28<00:28,  1.56it/s]
 74%|███████▍  | 128/172 [01:29<00:28,  1.56it/s]
 75%|███████▌  | 129/172 [01:29<00:27,  1.56it/s]
 76%|███████▌  | 130/172 [01:30<00:26,  1.56it/s]
 76%|███████▌  | 131/172 [01:30<00:26,  1.57it/s]
 77%|███████▋  | 132/172 [01:31<00:25,  1.56it/s]
 77%|███████▋  | 133/172 [01:32<00:24,  1.57it/s]
 78%|███████▊  | 134/172 [01:32<00:24,  1.57it/s]
 78%|███████▊  | 135/172 [01:33<00:23,  1.56it/s]
 79%|███████▉  | 136/172 [01:34<00:23,  1.56it/s]
 80%|███████▉  | 137/172 [01:34<00:22,  1.56it/s]
 80%|████████  | 138/172 [01:35<00:21,  1.56it/s]
 81%|████████  | 139/172 [01:36<00:21,  1.57it/s]
 81%|████████▏ | 140/172 [01:36<00:20,  1.56it/s]
 82%|████████▏ | 141/172 [01:37<00:19,  1.56it/s]
 83%|████████▎ | 142/172 [01:37<00:19,  1.56it/s]


Trial status: 9 TERMINATED | 1 RUNNING
Current time: 2023-09-21 17:46:46. Total running time: 39min 34s
Logical resource usage: 1.0/2 CPUs, 1.0/1 GPUs
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name               status         learning_rate     num_train_epochs       seed     ..._train_batch_size     iter     total time (s)     objective     eval_loss     eval_runtime     ...amples_per_second |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| _objective_4df1a_00009   RUNNING          7.96157e-06                    2   38.0065                        32        1             62.522      0.705965      0.705965           2.2512                  134.597

 98%|█████████▊| 169/172 [01:55<00:01,  1.55it/s]
 99%|█████████▉| 170/172 [01:55<00:01,  1.56it/s]
 99%|█████████▉| 171/172 [01:56<00:00,  1.55it/s]
100%|██████████| 172/172 [01:56<00:00,  2.07it/s]
  0%|          | 0/19 [00:00<?, ?it/s][A


[2m[36m(_objective pid=16214)[0m {'loss': 0.7005, 'learning_rate': 5.030820593044931e-08, 'epoch': 2.0}


[2m[36m(_objective pid=16214)[0m 
 11%|█         | 2/19 [00:00<00:00, 17.84it/s][A
[2m[36m(_objective pid=16214)[0m 
 21%|██        | 4/19 [00:00<00:01, 10.66it/s][A
[2m[36m(_objective pid=16214)[0m 
 32%|███▏      | 6/19 [00:00<00:01,  9.44it/s][A
[2m[36m(_objective pid=16214)[0m 
 42%|████▏     | 8/19 [00:00<00:01,  8.86it/s][A
[2m[36m(_objective pid=16214)[0m 
 47%|████▋     | 9/19 [00:00<00:01,  8.75it/s][A
[2m[36m(_objective pid=16214)[0m 
 53%|█████▎    | 10/19 [00:01<00:01,  8.64it/s][A
[2m[36m(_objective pid=16214)[0m 
 58%|█████▊    | 11/19 [00:01<00:00,  8.78it/s][A
[2m[36m(_objective pid=16214)[0m 
 63%|██████▎   | 12/19 [00:01<00:00,  8.67it/s][A
[2m[36m(_objective pid=16214)[0m 
 68%|██████▊   | 13/19 [00:01<00:00,  8.59it/s][A
[2m[36m(_objective pid=16214)[0m 
 74%|███████▎  | 14/19 [00:01<00:00,  8.66it/s][A
[2m[36m(_objective pid=16214)[0m 
 79%|███████▉  | 15/19 [00:01<00:00,  8.62it/s][A
[2m[36m(_objective pid=16214)[0m 


Trial _objective_4df1a_00009 finished iteration 2 at 2023-09-21 17:46:49. Total running time: 39min 38s
+-------------------------------------------------+
| Trial _objective_4df1a_00009 result             |
+-------------------------------------------------+
| time_this_iter_s                        61.6286 |
| time_total_s                            124.151 |
| training_iteration                            2 |
| epoch                                         2 |
| eval_loss                               0.70483 |
| eval_runtime                             2.2515 |
| eval_samples_per_second                 134.576 |
| eval_steps_per_second                     8.439 |
| objective                               0.70483 |
+-------------------------------------------------+

[2m[36m(_objective pid=16214)[0m {'eval_loss': 0.7048310041427612, 'eval_runtime': 2.2515, 'eval_samples_per_second': 134.576, 'eval_steps_per_second': 8.439, 'epoch': 2.0}
Trial _objective_4df1a_00009 completed afte

BestRun(run_id='4df1a_00006', objective=0.7192806005477905, hyperparameters={'learning_rate': 1.6738085788752168e-05, 'num_train_epochs': 2, 'seed': 2.8199608653310015, 'per_device_train_batch_size': 32}, run_summary=<ray.tune.analysis.experiment_analysis.ExperimentAnalysis object at 0x7b78df854610>)

[2m[36m(_objective pid=16214)[0m                                                  100%|██████████| 172/172 [02:15<00:00,  2.07it/s]100%|██████████| 172/172 [02:15<00:00,  1.27it/s]


In [None]:
# optimizer and lr scheduler - for finetuning, the authors of the original BERT paper recommend choosing from the following values:
# - Batch size: 16, 32  - we set the batch_size=32 when we created the dataloader
# Optimizer Adam
# Learning rate (lr): 5e-5, 3e-5, 2e-5  - we use lr = 2e-5
# Number of epochs: 2, 3, 4  - we will use 4

#optimizer = AdamW(model.parameters(),
#                  lr = 2e-5, # default is 5e-5, our notebook had 2e-5
#                  eps = 1e-8   # default is 1e-8
#                )

In [None]:
#from transformers import get_linear_schedule_with_warmup

#epochs = 4

# Total number of training steps is [number of batches] x [number of epochs].
#total_steps = len(train_dataloader) * epochs

# Create the learning rate scheduler.
#scheduler = get_linear_schedule_with_warmup(optimizer,
#                                            num_warmup_steps = 0,
#                                            num_training_steps = total_steps)

In [None]:
# Function to calculate the accuracy of our predictions vs labels
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

In [None]:
import time
import datetime

def format_time(elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    # Round to the nearest second.
    elapsed_rounded = int(round((elapsed)))

    # Format as hh:mm:ss
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [None]:
# finetuning

training_stats = []

# Measure the total training time for the whole run.
total_t0 = time.time()

# For each epoch...
for epoch_i in range(0, epochs):

    # ========================================
    #               Training
    # ========================================

    # Perform one full pass over the training set.

    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')

    # Measure how long the training epoch takes.
    t0 = time.time()

    # Reset the total loss for this epoch.
    total_train_loss = 0

    # Put the model into training mode
    model.train()

    # For each batch of training data...
    for step, batch in enumerate(train_dataloader):

        # Progress update every 40 batches.
        if step % 40 == 0 and not step == 0:
            # Calculate elapsed time in minutes.
            elapsed = format_time(time.time() - t0)

            # Report progress.
            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))

        # Unpack this training batch from our dataloader.
        #
        # As we unpack the batch, we'll also copy each tensor to the GPU (or CPU) using the
        # `to` method.
        #
        # `batch` contains three pytorch tensors:
        #   [0]: input ids
        #   [1]: attention masks
        #   [2]: labels
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)

        # Always clear any previously calculated gradients before performing a
        # backward pass. PyTorch doesn't do this automatically
        optimizer.zero_grad()

        # Perform a forward pass
        # The documentation for this `model` function is here:
        # https://huggingface.co/docs/transformers/model_doc/bert#transformers.BertForSequenceClassification
        # It returns different numbers of parameters depending on what arguments
        # arge given and what flags are set. For our useage here, it returns
        # the loss (because we provided labels) and the "logits"--the model
        # outputs prior to activation.
        # The "logits" are the output values prior to applying an activation function like the softmax.
        loss, logits = model(b_input_ids,
                             attention_mask=b_input_mask,
                             labels=b_labels,
                             return_dict=False)

        # Accumulate the training loss over all of the batches so that we can
        # calculate the average loss at the end. `loss` is a Tensor containing a
        # single value; the `.item()` function just returns the Python value
        # from the tensor.
        total_train_loss += loss.item()

        # Perform a backward pass to calculate the gradients.
        loss.backward()

        # Clip the norm of the gradients to 1.0.
        # This is to help prevent the "exploding gradients" problem.
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        # Update parameters and take a step using the computed gradient.
        # The optimizer dictates the "update rule"--how the parameters are
        # modified based on their gradients, the learning rate, etc.
        optimizer.step()

        # Update the learning rate.
        scheduler.step()

    # Calculate the average loss over all of the batches.
    avg_train_loss = total_train_loss / len(train_dataloader)

    # Measure how long this epoch took.
    training_time = format_time(time.time() - t0)

    print("")
    print("  Average training loss: {0:.2f}".format(avg_train_loss))
    print("  Training epcoh took: {:}".format(training_time))

    # ========================================
    #               Validation
    # ========================================
    # After the completion of each training epoch, measure our performance on
    # our validation set.

    print("")
    print("Running Validation...")

    t0 = time.time()

    # Put the model in evaluation mode--the dropout layers behave differently
    # during evaluation.
    model.eval()

    # Tracking variables
    total_eval_accuracy = 0
    total_eval_loss = 0
    nb_eval_steps = 0

    # Evaluate data for one epoch
    for batch in validation_dataloader:

        # Unpack this training batch from our dataloader.
        #
        # As we unpack the batch, we'll also copy each tensor to the GPU (or CPU) using
        # the `to` method.
        #
        # `batch` contains three pytorch tensors:
        #   [0]: input ids
        #   [1]: attention masks
        #   [2]: labels
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)

        # Tell pytorch not to bother with constructing the compute graph during
        # the forward pass, since this is only needed for backprop (training).
        with torch.no_grad():

            # Forward pass, calculate logit predictions.
            (loss, logits) = model(b_input_ids,
                                   attention_mask=b_input_mask,
                                   labels=b_labels,
                                   return_dict=False)

        # Accumulate the validation loss.
        total_eval_loss += loss.item()

        # Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()

        # Calculate the accuracy for this batch of test sentences, and
        # accumulate it over all batches.
        total_eval_accuracy += flat_accuracy(logits, label_ids)


    # Report the final accuracy for this validation run.
    avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)
    print("  Accuracy: {0:.2f}".format(avg_val_accuracy))

    # Calculate the average loss over all of the batches.
    avg_val_loss = total_eval_loss / len(validation_dataloader)

    # Measure how long the validation run took.
    validation_time = format_time(time.time() - t0)

    print("  Validation Loss: {0:.2f}".format(avg_val_loss))
    print("  Validation took: {:}".format(validation_time))

    # Record all statistics from this epoch.
    training_stats.append(
        {
            'epoch': epoch_i + 1,
            'Training Loss': avg_train_loss,
            'Valid. Loss': avg_val_loss,
            'Valid. Accur.': avg_val_accuracy,
            'Training Time': training_time,
            'Validation Time': validation_time
        }
    )

print("")
print("Training complete!")

print("Total training took {:} (h:mm:ss)".format(format_time(time.time()-total_t0)))

In [None]:
# Let’s view the summary of the training process

# Create a DataFrame from our training statistics.
df_stats = pd.DataFrame(data=training_stats)

# Use the 'epoch' as the row index.
df_stats = df_stats.set_index('epoch')

# A hack to force the column headers to wrap.
#df = df.style.set_table_styles([dict(selector="th",props=[('max-width', '70px')])])

In [None]:
# Display the table.
df_stats

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Use plot styling from seaborn.
sns.set(style='darkgrid')

# Increase the plot size and font size.
sns.set(font_scale=1.5)
plt.rcParams["figure.figsize"] = (12,6)

# Plot the learning curve.
plt.plot(df_stats['Training Loss'], 'b-o', label="Training")
plt.plot(df_stats['Valid. Loss'], 'g-o', label="Validation")

# Label the plot.
plt.title("Training & Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.xticks([1, 2, 3, 4])

plt.show()

In [None]:
# save the model and the tokenizer

import os

out_dir = '/content/gdrive/My Drive/eco_project_model/bert_base'

if not os.path.exists(out_dir):
    os.makedirs(out_dir, exist_ok=True)

model.save_pretrained(out_dir)
tokenizer.save_pretrained(out_dir)