Make initializations

In [1]:
import cbh_data_definitions
import pathlib
import os

import optuna
import pytorch_lightning as pl
import mlflow
from ray import tune
import ray
import ray.tune
import ray.tune.search
import ray.tune.search.optuna
from ray.tune.search.optuna import OptunaSearch
from ray.tune.integration.mlflow import mlflow_mixin
from ray.tune.search import ConcurrencyLimiter
from pytorch_lightning.callbacks import (
    RichProgressBar,
)
from ray.tune.integration.pytorch_lightning import TuneReportCallback
import numpy as np
import datetime
import cbh_torch_MLP

In [2]:
# intialize some settings: mlflow, data directory, resources
root_data_directory = pathlib.Path(os.environ["SCRATCH"]) / "cbh_data"

dev_data_path = root_data_directory / "analysis_ready" / "dev_randomized.zarr"
training_data_path = root_data_directory / "analysis_ready" / "train_randomized.zarr"

mlflow_command_line_run = """
    mlflow server --port 5001 --backend-store-uri sqlite:///mlflowSQLserver.db  --default-artifact-root ./mlflow_artifacts/
"""
mlflow_server_address = 'vld425'
mlflow_server_port = 5001
mlflow_server_uri = f'http://{mlflow_server_address}:{mlflow_server_port:d}'
mlflow_artifact_root = pathlib.Path('./mlflow_artifacts/')

hparams_for_mlflow = {}

redefine data

In [3]:
# init data
(
    train_input,
    train_labels,
    _,
) = cbh_data_definitions.load_data_from_zarr(training_data_path)

(
    dev_input, 
    dev_labels, 
    _
) = cbh_data_definitions.load_data_from_zarr(dev_data_path)

# the cloud volume is not needed for the task, so isn't saved on the load
# show a chunk
train_input

Loaded zarr, file information:
 Name              : /
Type              : zarr.hierarchy.Group
Read-only         : False
Synchronizer type : zarr.sync.ThreadSynchronizer
Store type        : zarr.storage.DirectoryStore
No. members       : 2
No. arrays        : 2
No. groups        : 0
Arrays            : cloud_base_label_y.zarr, humidity_temp_pressure_x.zarr
 

Loaded zarr, file information:
 Name              : /
Type              : zarr.hierarchy.Group
Read-only         : False
Synchronizer type : zarr.sync.ThreadSynchronizer
Store type        : zarr.storage.DirectoryStore
No. members       : 2
No. arrays        : 2
No. groups        : 0
Arrays            : cloud_base_label_y.zarr, humidity_temp_pressure_x.zarr
 



Unnamed: 0,Array,Chunk
Bytes,87.48 GiB,1.82 GiB
Shape,"(111820800, 70, 3)","(2329600, 70, 3)"
Count,2 Graph Layers,48 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 87.48 GiB 1.82 GiB Shape (111820800, 70, 3) (2329600, 70, 3) Count 2 Graph Layers 48 Chunks Type float32 numpy.ndarray",3  70  111820800,

Unnamed: 0,Array,Chunk
Bytes,87.48 GiB,1.82 GiB
Shape,"(111820800, 70, 3)","(2329600, 70, 3)"
Count,2 Graph Layers,48 Chunks
Type,float32,numpy.ndarray


In [4]:
# limit the data by a factor for less data in a tuning trial
factors_of_chunk = [n for n in range(1, train_input.chunksize[0] + 1) if train_input.chunksize[0] % n == 0]
print("Factors of chunk: ", factors_of_chunk)
hparams_for_mlflow['Limited sample number'] =  -1

Factors of chunk:  [1, 2, 4, 5, 7, 8, 10, 13, 14, 16, 20, 25, 26, 28, 32, 35, 40, 50, 52, 56, 64, 65, 70, 80, 91, 100, 104, 112, 128, 130, 140, 160, 175, 182, 200, 208, 224, 256, 260, 280, 320, 325, 350, 364, 400, 416, 448, 455, 512, 520, 560, 640, 650, 700, 728, 800, 832, 896, 910, 1024, 1040, 1120, 1280, 1300, 1400, 1456, 1600, 1664, 1792, 1820, 2080, 2240, 2275, 2560, 2600, 2800, 2912, 3200, 3328, 3584, 3640, 4160, 4480, 4550, 5120, 5200, 5600, 5824, 6400, 6656, 7168, 7280, 8320, 8960, 9100, 10400, 11200, 11648, 12800, 13312, 14560, 16640, 17920, 18200, 20800, 22400, 23296, 25600, 29120, 33280, 35840, 36400, 41600, 44800, 46592, 58240, 66560, 72800, 83200, 89600, 93184, 116480, 145600, 166400, 179200, 232960, 291200, 332800, 465920, 582400, 1164800, 2329600]


setup study

In [5]:
# DEFINE ALL SETTINGS FOR TRAINING, includes hparam space
experiment_name = 'cbh-hparam-tuning'
CPU_COUNT = 8
RAM_GB = 100
hparams_for_mlflow['CPU Count'] = CPU_COUNT
hparams_for_mlflow['Compute Memory'] = RAM_GB
thread_count_for_dask = CPU_COUNT
dataset_method = '1chunk'
randomize_chunkwise_1chunk = False
shuffle_train_data = False
collate_fn = None # alt: cbh_data_definitions.dataloader_collate_with_dask
num_workers_dataloader = 0 # alt: CPU_COUNT +-
global_trail_number = 0
max_time_for_trial = "00:02:00:00"  # dd:hh:mm:ss
hparams_for_mlflow["Training timeout"] = max_time_for_trial

max_node_num_exclusive = 513
max_layers = 11
factors_for_hparam_choice = [factor for factor in factors_of_chunk if (factor<3300 and factor>3)]
mlp_search_space = {
    "epoch": 1,
    "lr": tune.quniform(0.0001, 0.01, 0.00005),
    "data_limit": 4,#tune.randint(4, int(len(train_labels.chunks[0]) / 4)), # multiple chunk ind by chunklen: train_input.chunksize[0])
    # "activation": tune.choice(["relu", "tanh"]),
    "batch_size": tune.choice(factors_for_hparam_choice),
    "arch_name":"MLP",
    "hidden_layers":tune.randint(1,max_layers),
    "activation":tune.choice(["relu", "tanh"]),
    "input_size":(train_input.shape[2] * train_input.shape[1]),
    "output_size": train_input.shape[1],
    # "layer_node_num": tune.sample_from(lambda spec: 8*np.random.randint(1,int(max_node_num_exclusive/8), size=spec.config.hidden_layers)), # DOES NOT WORK WITH OPTUNA SAMPLER
    "deterministic":False,
    "chkpt_time":datetime.timedelta(minutes=15),
    "max_time":max_time_for_trial
    
}
layer_pattern = 'layer_node_number_{layer_num}_div_8'
for layer_num in range(max_layers):
    mlp_search_space[layer_pattern.format(layer_num=layer_num)] = tune.randint(1,int(max_node_num_exclusive/8))
print(mlp_search_space)

{'epoch': 1, 'lr': <ray.tune.search.sample.Float object at 0x2b807e2f87c0>, 'data_limit': 4, 'batch_size': <ray.tune.search.sample.Categorical object at 0x2b807e2fb970>, 'arch_name': 'MLP', 'hidden_layers': <ray.tune.search.sample.Integer object at 0x2b807e2f8eb0>, 'activation': <ray.tune.search.sample.Categorical object at 0x2b807e2f8dc0>, 'input_size': 210, 'output_size': 70, 'deterministic': False, 'chkpt_time': datetime.timedelta(seconds=900), 'max_time': '00:02:00:00', 'layer_node_number_0_div_8': <ray.tune.search.sample.Integer object at 0x2b807e2fad40>, 'layer_node_number_1_div_8': <ray.tune.search.sample.Integer object at 0x2b807e2facb0>, 'layer_node_number_2_div_8': <ray.tune.search.sample.Integer object at 0x2b807e2fac20>, 'layer_node_number_3_div_8': <ray.tune.search.sample.Integer object at 0x2b807e2fab90>, 'layer_node_number_4_div_8': <ray.tune.search.sample.Integer object at 0x2b807e2fab00>, 'layer_node_number_5_div_8': <ray.tune.search.sample.Integer object at 0x2b807e2f

In [6]:
class MLFlowLogger(pl.loggers.MLFlowLogger): #overwrite mlflogger
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)

    def after_save_checkpoint(self, model_checkpoint: pl.callbacks.ModelCheckpoint) -> None:
        """
        Called after model checkpoint callback saves a new checkpoint.
        """
        best_chkpt = torch.load(model_checkpoint.best_model_path)
        checkpoint_for_mlflow = {
            "val loss": float(best_chkpt['callbacks'][list(key for key in list(best_chkpt['callbacks'].keys()) if "ModelCheckpoint" in key)[0]]['current_score']),
            "train loss at step-1": list(train_loss_metric.value for train_loss_metric in mlf_logger._mlflow_client.get_metric_history(run.info.run_id, "Train loss") if (int(train_loss_metric.step) == int(best_chkpt['global_step']-1)))[0],
            "global_step": best_chkpt['global_step'],
            "model_state_dict": best_chkpt['state_dict'],
            "checkpoint": best_chkpt,
        }
        with TemporaryDirectory() as tmpdirname:
            f_name = os.path.join(tmpdirname, f"{run.info.run_id}-best_model_checkpoint-step_{best_chkpt['global_step']}.pt")
            torch.save(checkpoint_for_mlflow, f_name)
            mlflow.log_artifact(f_name)

mlflow.set_tracking_uri(mlflow_server_uri)
# make vars global
mlf_exp = None
mlf_exp_id = None
try: 
    print('Creating experiment')
    mlf_exp_id = mlflow.create_experiment(experiment_name)
    mlf_exp = mlflow.get_experiment(mlf_exp_id)
except mlflow.exceptions.RestException as e:
    print("Caught")
    if False:
        print(e)
    mlf_exp = mlflow.get_experiment_by_name(experiment_name)
print("Success")

mlp_search_space["mlflow"] = {
    "tracking_uri":mlflow_server_uri,
    "experiment_id":mlf_exp_id,
    "experiment_name":experiment_name,
    # "run_name":(experiment_name+str(datetime.datetime.now())),
}

Creating experiment
Caught
Success


In [7]:
@mlflow_mixin
def objective(ray_config):
    # def model hparams with config
    # print(ray_config)
    # print(dir(ray_config))
    # def data
    print("Test print")
    datamodule = cbh_data_definitions.CBH_DataModule(
        train_input, train_labels,
        dev_input, dev_labels,
        thread_count_for_dask,
        ray_config['batch_size'],
        num_workers = num_workers_dataloader,
        collate_fn = collate_fn,
        shuffle = shuffle_train_data,
        randomize_chunkwise = randomize_chunkwise_1chunk,
        method=dataset_method,
    )
    #def model
    ff_nodes_strings = []
    for key in ray_config:
        if key.startswith("layer_node_number_"):
            ff_nodes_strings.append(key)
    ff_nodes_strings = sorted(ff_nodes_strings)
    ff_nodes = [(8*ray_config[ff_node_num]) for ff_node_num in ff_nodes_strings]
    print(ray_config['hidden_layers'])
    print(ff_nodes)
    model = cbh_torch_MLP.CloudBaseMLP(
        ray_config['input_size'],
        ff_nodes,
        ray_config['output_size'],
        ray_config['hidden_layers'],
        ray_config['activation'],
        ray_config['lr'],
    )
    # def experiment naming
    timestamp_template = '{dt.year:04d}{dt.month:02d}{dt.day:02d}T{dt.hour:02d}{dt.minute:02d}{dt.second:02d}'
    run_name_template = 'cbh_challenge_{network_name}_' + timestamp_template
    global global_trail_number
    current_run_name = run_name_template.format(network_name=model.__class__.__name__,
                                                    dt=datetime.datetime.now()
                                                   )
    print("Finished model init")
    # begin mlflow experiment run
    with mlflow.start_run(experiment_id=mlf_exp.experiment_id, run_name=current_run_name, nested=True) as run:
        print("Started mlflow run")
        mlflow.pytorch.autolog()
        mlf_logger = MLFlowLogger(experiment_name=experiment_name, tracking_uri=mlflow_server_uri, run_id=run.info.run_id)
        print("Finished init logger")
        # define trainer
        time_for_checkpoint = ray_config['chkpt_time']
        checkpoint_callback = pl.callbacks.ModelCheckpoint(
            train_time_interval=time_for_checkpoint,
            dirpath=run.info.artifact_uri,
            monitor="val_loss_mean",
            save_on_train_epoch_end=False,
            mode="min"
        )
        callbacks = [checkpoint_callback, TuneReportCallback(on="validation_end")]
        print("Finished define callbacks")
        trainer_hparams = {
            'max_epochs':ray_config['epoch'],
            'deterministic':ray_config['deterministic'],
            'val_check_interval':0.05, # val every percentage of the epoch or an INT for after a number of batches
            'devices':"auto",
            'accelerator':"auto",
            # 'max_time':ray_config['max_time'],
            # 'replace_sampler_ddp':False,
            # 'enable_checkpointing':True,
            # 'strategy':None,
            'callbacks':callbacks,
            'logger':mlf_logger,
        }
        print("Finished init hparams kwargs")

        print("Finished log hparams mlflow")
        print(trainer_hparams)
        trainer = pl.Trainer(
            **trainer_hparams
        )
        print("REACH all init before fit")
        trainer.fit(model=model, datamodule=datamodule)
        path_to_save = '{dt.year:04d}{dt.month:02d}{dt.day:02d}-{dt.hour:02d}{dt.minute:02d}{dt.second:02d}'.format(dt=datetime.datetime.now())
        trainer.save_checkpoint(filepath=run.info.artifact_uri + f'/post_epoch_modelchkpt_{path_to_save}')

In [None]:
searcher = OptunaSearch(metric=["val_loss_mean"], mode=["min"])
algo = ConcurrencyLimiter(searcher, max_concurrent=20)#int(CPU_COUNT*(3/4)))
num_hparam_trials = 20

tuner = tune.Tuner(
    objective,
    tune_config=tune.TuneConfig(
        search_alg=algo,
        num_samples=num_hparam_trials,
    ),
    param_space=mlp_search_space,
)
results = tuner.fit()

2022-11-15 11:19:26,834	INFO worker.py:1518 -- Started a local Ray instance.
  return ot.distributions.DiscreteUniformDistribution(
  return ot.distributions.IntUniformDistribution(
[32m[I 2022-11-15 11:19:34,355][0m A new study created in memory with name: optuna[0m


Trial name,status,loc,activation,batch_size,hidden_layers,layer_node_number...,layer_node_number....1,layer_node_number....2,layer_node_number....3,layer_node_number....4,layer_node_number....5,layer_node_number....6,layer_node_number....7,layer_node_number....8,layer_node_number....9,layer_node_number....10,lr
objective_620b53aa,RUNNING,10.154.1.24:10754,tanh,175,6,17,15,33,14,28,2,41,21,30,25,33,0.0052
objective_67125ac4,RUNNING,10.154.1.24:10831,relu,5,4,63,61,42,22,31,8,20,14,56,35,20,0.00635
objective_6ae00066,RUNNING,10.154.1.24:10883,relu,910,9,17,59,19,8,54,57,53,36,12,26,33,0.00595
objective_6e77dae6,RUNNING,10.154.1.24:10975,relu,35,8,56,3,43,21,40,1,5,11,1,13,35,0.00295
objective_720d1900,RUNNING,10.154.1.24:11409,relu,260,10,15,48,28,5,7,36,8,3,50,35,51,0.00695
objective_75b5f4d2,RUNNING,10.154.1.24:11609,tanh,650,10,53,56,40,51,31,10,28,39,49,61,10,0.005
objective_793d587a,RUNNING,10.154.1.24:11807,tanh,40,6,38,46,5,9,17,22,44,62,40,48,18,0.0046
objective_7da702da,RUNNING,10.154.1.24:11991,relu,832,9,31,61,34,15,50,21,12,20,51,24,63,0.0026
objective_81bfd41e,RUNNING,10.154.1.24:12165,tanh,56,5,5,16,22,53,52,39,22,11,8,17,62,0.0087
objective_856c64a6,RUNNING,10.154.1.24:12327,relu,65,2,55,21,38,21,41,25,35,21,39,62,34,0.0068




[2m[36m(objective pid=10754)[0m Test print
[2m[36m(objective pid=10754)[0m 6
[2m[36m(objective pid=10754)[0m [136, 120, 264, 112, 224, 16, 328, 168, 240, 200, 264]
[2m[36m(objective pid=10754)[0m Finished model init
[2m[36m(objective pid=10754)[0m Started mlflow run
[2m[36m(objective pid=10754)[0m Finished init logger
[2m[36m(objective pid=10754)[0m Finished define callbacks
[2m[36m(objective pid=10754)[0m Finished init hparams kwargs
[2m[36m(objective pid=10754)[0m Finished log hparams mlflow
[2m[36m(objective pid=10754)[0m {'max_epochs': 1, 'deterministic': False, 'val_check_interval': 0.05, 'devices': 'auto', 'accelerator': 'auto', 'callbacks': [<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x2b52c867b730>, <ray.tune.integration.pytorch_lightning.TuneReportCallback object at 0x2b52c86ad8d0>], 'logger': <__main__.MLFlowLogger object at 0x2b52c867b700>}
[2m[36m(objective pid=10754)[0m REACH all init before fit


[2m[36m(objective pid=10754)[0m GPU available: False, used: False
[2m[36m(objective pid=10754)[0m TPU available: False, using: 0 TPU cores
[2m[36m(objective pid=10754)[0m IPU available: False, using: 0 IPUs
[2m[36m(objective pid=10754)[0m HPU available: False, using: 0 HPUs
[2m[36m(objective pid=10754)[0m   rank_zero_deprecation(
[2m[36m(objective pid=10754)[0m   rank_zero_deprecation("The `on_init_end` callback hook was deprecated in v1.6 and will be removed in v1.8.")
[2m[36m(objective pid=10754)[0m   rank_zero_deprecation(
[2m[36m(objective pid=10754)[0m   rank_zero_deprecation(
[2m[36m(objective pid=10754)[0m   rank_zero_deprecation(
[2m[36m(objective pid=10754)[0m   rank_zero_deprecation(
[2m[36m(objective pid=10754)[0m 
[2m[36m(objective pid=10754)[0m   | Name              | Type             | Params
[2m[36m(objective pid=10754)[0m -------------------------------------------------------
[2m[36m(objective pid=10754)[0m 0 | layer_norm      

[2m[36m(objective pid=10831)[0m Test print
[2m[36m(objective pid=10831)[0m 4
[2m[36m(objective pid=10831)[0m [504, 488, 336, 176, 248, 64, 160, 112, 448, 280, 160]
[2m[36m(objective pid=10831)[0m Finished model init
Sanity Checking: 0it [00:00, ?it/s]
[2m[36m(objective pid=10831)[0m Started mlflow run
[2m[36m(objective pid=10831)[0m Finished init logger
[2m[36m(objective pid=10831)[0m Finished define callbacks
[2m[36m(objective pid=10831)[0m Finished init hparams kwargs
[2m[36m(objective pid=10831)[0m Finished log hparams mlflow
[2m[36m(objective pid=10831)[0m {'max_epochs': 1, 'deterministic': False, 'val_check_interval': 0.05, 'devices': 'auto', 'accelerator': 'auto', 'callbacks': [<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x2b3b905438e0>, <ray.tune.integration.pytorch_lightning.TuneReportCallback object at 0x2b3b90571720>], 'logger': <__main__.MLFlowLogger object at 0x2b3b90543b80>}


[2m[36m(objective pid=10754)[0m   rank_zero_warn(


Sanity Checking DataLoader 0:  50%|█████     | 1/2 [00:00<00:00, 16.00it/s]
Epoch 0:   0%|          | 0/674096 [00:00<?, ?it/s]                        


[2m[36m(objective pid=10754)[0m   rank_zero_warn(


[2m[36m(objective pid=10883)[0m Test print
[2m[36m(objective pid=10883)[0m 9
[2m[36m(objective pid=10883)[0m [136, 472, 152, 64, 432, 456, 424, 288, 96, 208, 264]
[2m[36m(objective pid=10883)[0m Finished model init
[2m[36m(objective pid=10883)[0m Started mlflow run
[2m[36m(objective pid=10883)[0m Finished init logger
[2m[36m(objective pid=10883)[0m Finished define callbacks
[2m[36m(objective pid=10883)[0m Finished init hparams kwargs
[2m[36m(objective pid=10883)[0m Finished log hparams mlflow
[2m[36m(objective pid=10883)[0m {'max_epochs': 1, 'deterministic': False, 'val_check_interval': 0.05, 'devices': 'auto', 'accelerator': 'auto', 'callbacks': [<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x2afe4e153ac0>, <ray.tune.integration.pytorch_lightning.TuneReportCallback object at 0x2afe4e189960>], 'logger': <__main__.MLFlowLogger object at 0x2afe4e1539a0>}
Epoch 0:   0%|          | 1/674096 [00:05<1051:46:51,  5.62s/it, loss=4.31, v_

[2m[36m(objective pid=10975)[0m GPU available: False, used: False
[2m[36m(objective pid=10975)[0m TPU available: False, using: 0 TPU cores
[2m[36m(objective pid=10975)[0m IPU available: False, using: 0 IPUs
[2m[36m(objective pid=10975)[0m HPU available: False, using: 0 HPUs
[2m[36m(objective pid=10975)[0m   rank_zero_deprecation(
[2m[36m(objective pid=10975)[0m   rank_zero_deprecation("The `on_init_end` callback hook was deprecated in v1.6 and will be removed in v1.8.")
[2m[36m(objective pid=10975)[0m   rank_zero_deprecation(
[2m[36m(objective pid=10975)[0m   rank_zero_deprecation(
[2m[36m(objective pid=10975)[0m   rank_zero_deprecation(
[2m[36m(objective pid=10975)[0m   rank_zero_deprecation(


Epoch 0:   0%|          | 90/674096 [00:11<23:36:14,  7.93it/s, loss=3.05, v_num=fc77]
Epoch 0:   0%|          | 91/674096 [00:11<23:30:30,  7.96it/s, loss=3.05, v_num=fc77]
Epoch 0:   0%|          | 92/674096 [00:11<23:28:32,  7.98it/s, loss=3.05, v_num=fc77]
Epoch 0:   0%|          | 93/674096 [00:11<23:21:01,  8.02it/s, loss=3.04, v_num=fc77]
Epoch 0:   0%|          | 95/674096 [00:11<23:06:58,  8.10it/s, loss=3.04, v_num=fc77]
Epoch 0:   0%|          | 97/674096 [00:11<22:49:50,  8.20it/s, loss=3.05, v_num=fc77]
Epoch 0:   0%|          | 99/674096 [00:11<22:35:55,  8.28it/s, loss=3.03, v_num=fc77]
Epoch 0:   0%|          | 100/674096 [00:11<22:27:56,  8.33it/s, loss=3.05, v_num=fc77]
Epoch 0:   0%|          | 102/674096 [00:12<22:21:07,  8.38it/s, loss=3.05, v_num=fc77]
Epoch 0:   0%|          | 103/674096 [00:12<22:14:38,  8.42it/s, loss=3.04, v_num=fc77]
Epoch 0:   0%|          | 106/674096 [00:12<21:53:10,  8.55it/s, loss=3.05, v_num=fc77]
Epoch 0:   0%|          | 108/674096 [0

[2m[36m(objective pid=10975)[0m 
[2m[36m(objective pid=10975)[0m   | Name              | Type             | Params
[2m[36m(objective pid=10975)[0m -------------------------------------------------------
[2m[36m(objective pid=10975)[0m 0 | layer_norm        | LayerNorm        | 420   
[2m[36m(objective pid=10975)[0m 1 | linears           | ModuleList       | 238 K 
[2m[36m(objective pid=10975)[0m 2 | normalize_outputs | Softmax          | 0     
[2m[36m(objective pid=10975)[0m 3 | crossentropy_loss | CrossEntropyLoss | 0     
[2m[36m(objective pid=10975)[0m -------------------------------------------------------
[2m[36m(objective pid=10975)[0m 239 K     Trainable params
[2m[36m(objective pid=10975)[0m 0         Non-trainable params
[2m[36m(objective pid=10975)[0m 239 K     Total params
[2m[36m(objective pid=10975)[0m 0.957     Total estimated model params size (MB)


Epoch 0:   0%|          | 190/674096 [00:16<16:22:21, 11.43it/s, loss=3.04, v_num=fc77]
Epoch 0:   0%|          | 192/674096 [00:16<16:18:06, 11.48it/s, loss=3.05, v_num=fc77]
Epoch 0:   0%|          | 194/674096 [00:16<16:13:46, 11.53it/s, loss=3.05, v_num=fc77]
Sanity Checking: 0it [00:00, ?it/s]
Epoch 0:   0%|          | 196/674096 [00:16<16:09:13, 11.59it/s, loss=3.04, v_num=fc77]


[2m[36m(objective pid=10975)[0m   rank_zero_warn(


Epoch 0:   0%|          | 198/674096 [00:17<16:06:07, 11.63it/s, loss=3.03, v_num=fc77]
[2m[36m(objective pid=11409)[0m Test print
[2m[36m(objective pid=11409)[0m 10
[2m[36m(objective pid=11409)[0m [120, 384, 224, 40, 56, 288, 64, 24, 400, 280, 408]
[2m[36m(objective pid=11409)[0m Finished model init
Epoch 0:   0%|          | 200/674096 [00:17<16:02:48, 11.67it/s, loss=3.02, v_num=fc77]
[2m[36m(objective pid=11409)[0m Started mlflow run
[2m[36m(objective pid=11409)[0m Finished init logger
[2m[36m(objective pid=11409)[0m Finished define callbacks
[2m[36m(objective pid=11409)[0m Finished init hparams kwargs
[2m[36m(objective pid=11409)[0m Finished log hparams mlflow
[2m[36m(objective pid=11409)[0m {'max_epochs': 1, 'deterministic': False, 'val_check_interval': 0.05, 'devices': 'auto', 'accelerator': 'auto', 'callbacks': [<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x2b719cd6faf0>, <ray.tune.integration.pytorch_lightning.TuneReport

[2m[36m(objective pid=10975)[0m   rank_zero_warn(


Epoch 0:   0%|          | 216/674096 [00:19<16:28:21, 11.36it/s, loss=3.04, v_num=fc77]
Epoch 0:   0%|          | 218/674096 [00:19<16:23:48, 11.42it/s, loss=3.04, v_num=fc77]
Epoch 0:   0%|          | 220/674096 [00:19<16:19:30, 11.47it/s, loss=3.06, v_num=fc77]
Epoch 0:   0%|          | 222/674096 [00:19<16:15:23, 11.51it/s, loss=3.06, v_num=fc77]
Epoch 0:   0%|          | 223/674096 [00:19<16:13:18, 11.54it/s, loss=3.06, v_num=fc77]
Epoch 0:   0%|          | 225/674096 [00:19<16:09:23, 11.59it/s, loss=3.07, v_num=fc77]
Epoch 0:   0%|          | 227/674096 [00:19<16:05:20, 11.63it/s, loss=3.07, v_num=fc77]
Epoch 0:   0%|          | 229/674096 [00:19<16:01:15, 11.68it/s, loss=3.06, v_num=fc77]
Epoch 0:   0%|          | 232/674096 [00:19<15:55:51, 11.75it/s, loss=3.07, v_num=fc77]
Epoch 0:   0%|          | 234/674096 [00:19<15:52:14, 11.79it/s, loss=3.05, v_num=fc77]
Epoch 0:   0%|          | 236/674096 [00:19<15:49:08, 11.83it/s, loss=3.06, v_num=fc77]
Epoch 0:   0%|          | 238/67

[2m[36m(objective pid=11991)[0m GPU available: False, used: False
[2m[36m(objective pid=11991)[0m TPU available: False, using: 0 TPU cores
[2m[36m(objective pid=11991)[0m IPU available: False, using: 0 IPUs
[2m[36m(objective pid=11991)[0m HPU available: False, using: 0 HPUs


Epoch 0:   0%|          | 550/674096 [00:37<12:46:46, 14.64it/s, loss=3.03, v_num=fc77]
Epoch 0:   0%|          | 268/3370440 [00:19<69:06:32, 13.55it/s, loss=3.02, v_num=fff0]
Epoch 0:   0%|          | 269/3370440 [00:19<69:01:50, 13.56it/s, loss=3.02, v_num=fff0]
Epoch 0:   0%|          | 551/674096 [00:37<12:48:23, 14.61it/s, loss=3.02, v_num=fc77]
Epoch 0:   0%|          | 271/3370440 [00:19<68:49:19, 13.60it/s, loss=3, v_num=fff0]   
Epoch 0:   0%|          | 552/674096 [00:37<12:48:00, 14.62it/s, loss=3.03, v_num=fc77]
Epoch 0:   0%|          | 554/674096 [00:37<12:46:58, 14.64it/s, loss=3.04, v_num=fc77]
Epoch 0:   0%|          | 274/3370440 [00:20<68:30:00, 13.67it/s, loss=2.98, v_num=fff0]
Epoch 0:   0%|          | 556/674096 [00:37<12:46:12, 14.65it/s, loss=3.05, v_num=fc77]
Epoch 0:   0%|          | 276/3370440 [00:20<68:19:50, 13.70it/s, loss=2.98, v_num=fff0]
Epoch 0:   0%|          | 559/674096 [00:38<12:44:42, 14.68it/s, loss=3.04, v_num=fc77]
Epoch 0:   0%|          | 2

[2m[36m(objective pid=12165)[0m GPU available: False, used: False
[2m[36m(objective pid=12165)[0m TPU available: False, using: 0 TPU cores
[2m[36m(objective pid=12165)[0m IPU available: False, using: 0 IPUs
[2m[36m(objective pid=12165)[0m HPU available: False, using: 0 HPUs
[2m[36m(objective pid=12165)[0m   rank_zero_deprecation(
[2m[36m(objective pid=12165)[0m   rank_zero_deprecation("The `on_init_end` callback hook was deprecated in v1.6 and will be removed in v1.8.")
[2m[36m(objective pid=12165)[0m   rank_zero_deprecation(
[2m[36m(objective pid=12165)[0m   rank_zero_deprecation(
[2m[36m(objective pid=12165)[0m   rank_zero_deprecation(
[2m[36m(objective pid=12165)[0m   rank_zero_deprecation(


Epoch 0:   0%|          | 643/674096 [00:43<12:41:46, 14.73it/s, loss=3.04, v_num=fc77]
Epoch 0:   0%|          | 381/3370440 [00:25<63:29:04, 14.75it/s, loss=3.12, v_num=fff0]
Epoch 0:   0%|          | 645/674096 [00:43<12:41:11, 14.75it/s, loss=3.03, v_num=fc77]
Epoch 0:   0%|          | 383/3370440 [00:25<63:24:17, 14.76it/s, loss=3.07, v_num=fff0]
Epoch 0:   0%|          | 647/674096 [00:43<12:40:28, 14.76it/s, loss=3.04, v_num=fc77]
Epoch 0:   0%|          | 385/3370440 [00:26<63:18:38, 14.79it/s, loss=3.07, v_num=fff0]
Epoch 0:   0%|          | 650/674096 [00:43<12:39:23, 14.78it/s, loss=3.04, v_num=fc77]
Epoch 0:   0%|          | 387/3370440 [00:26<63:12:31, 14.81it/s, loss=3.07, v_num=fff0]
Epoch 0:   0%|          | 389/3370440 [00:26<63:07:52, 14.83it/s, loss=3.08, v_num=fff0]
Epoch 0:   0%|          | 652/674096 [00:44<12:40:32, 14.76it/s, loss=3.03, v_num=fc77]
Epoch 0:   0%|          | 391/3370440 [00:26<63:03:37, 14.84it/s, loss=3.05, v_num=fff0]
Epoch 0:   0%|          | 

[2m[36m(objective pid=12165)[0m 
[2m[36m(objective pid=12165)[0m   | Name              | Type             | Params
[2m[36m(objective pid=12165)[0m -------------------------------------------------------
[2m[36m(objective pid=12165)[0m 0 | layer_norm        | LayerNorm        | 420   
[2m[36m(objective pid=12165)[0m 1 | linears           | ModuleList       | 317 K 
[2m[36m(objective pid=12165)[0m 2 | normalize_outputs | Softmax          | 0     
[2m[36m(objective pid=12165)[0m 3 | crossentropy_loss | CrossEntropyLoss | 0     
[2m[36m(objective pid=12165)[0m -------------------------------------------------------
[2m[36m(objective pid=12165)[0m 317 K     Trainable params
[2m[36m(objective pid=12165)[0m 0         Non-trainable params
[2m[36m(objective pid=12165)[0m 317 K     Total params
[2m[36m(objective pid=12165)[0m 1.271     Total estimated model params size (MB)


Epoch 0:   0%|          | 729/674096 [00:48<12:19:38, 15.17it/s, loss=3.01, v_num=fc77]
Epoch 0:   0%|          | 446/3370440 [00:30<63:30:10, 14.74it/s, loss=3, v_num=fff0]   
Epoch 0:   0%|          | 731/674096 [00:48<12:19:14, 15.18it/s, loss=3.02, v_num=fc77]
Epoch 0:   0%|          | 448/3370440 [00:30<63:26:43, 14.75it/s, loss=3.02, v_num=fff0]
Epoch 0:   0%|          | 732/674096 [00:48<12:18:59, 15.19it/s, loss=3.02, v_num=fc77]
Epoch 0:   0%|          | 733/674096 [00:48<12:19:20, 15.18it/s, loss=3.02, v_num=fc77]
Epoch 0:   0%|          | 450/3370440 [00:30<63:24:00, 14.77it/s, loss=3.01, v_num=fff0]
Sanity Checking: 0it [00:00, ?it/s]
Epoch 0:   0%|          | 735/674096 [00:48<12:18:45, 15.19it/s, loss=3.03, v_num=fc77]


[2m[36m(objective pid=12165)[0m   rank_zero_warn(


Epoch 0:   0%|          | 737/674096 [00:48<12:18:08, 15.20it/s, loss=3.06, v_num=fc77]
Epoch 0:   0%|          | 452/3370440 [00:30<63:32:16, 14.73it/s, loss=3.05, v_num=fff0]
Epoch 0:   0%|          | 740/674096 [00:48<12:17:15, 15.22it/s, loss=3.04, v_num=fc77]
Epoch 0:   0%|          | 454/3370440 [00:30<63:26:41, 14.75it/s, loss=3.05, v_num=fff0]
Epoch 0:   0%|          | 742/674096 [00:48<12:16:57, 15.23it/s, loss=3.06, v_num=fc77]
Epoch 0:   0%|          | 456/3370440 [00:30<63:22:46, 14.77it/s, loss=3.08, v_num=fff0]
Epoch 0:   0%|          | 742/674096 [00:48<12:16:58, 15.23it/s, loss=3.06, v_num=fc77]
Epoch 0:   0%|          | 744/674096 [00:48<12:16:29, 15.24it/s, loss=3.05, v_num=fc77]
Epoch 0:   0%|          | 458/3370440 [00:30<63:18:39, 14.79it/s, loss=3.07, v_num=fff0]
Epoch 0:   0%|          | 746/674096 [00:48<12:16:10, 15.24it/s, loss=3.06, v_num=fc77]
Epoch 0:   0%|          | 460/3370440 [00:31<63:15:14, 14.80it/s, loss=3.05, v_num=fff0]
Sanity Checking DataLoader 

[2m[36m(objective pid=12165)[0m   rank_zero_warn(


Epoch 0:   0%|          | 749/674096 [00:49<12:15:49, 15.25it/s, loss=3.06, v_num=fc77]
Epoch 0:   0%|          | 464/3370440 [00:31<63:11:42, 14.81it/s, loss=3.04, v_num=fff0]
Epoch 0:   0%|          | 750/674096 [00:49<12:15:41, 15.25it/s, loss=3.07, v_num=fc77]
Epoch 0:   0%|          | 465/3370440 [00:31<63:11:53, 14.81it/s, loss=3.06, v_num=fff0]
Epoch 0:   0%|          | 0/2106520 [00:00<?, ?it/s] 
Epoch 0:   0%|          | 751/674096 [00:49<12:17:15, 15.22it/s, loss=3.06, v_num=fc77]
Epoch 0:   0%|          | 467/3370440 [00:31<63:08:17, 14.83it/s, loss=3.03, v_num=fff0]
[2m[36m(objective pid=12327)[0m Test print
[2m[36m(objective pid=12327)[0m 2
[2m[36m(objective pid=12327)[0m [440, 168, 304, 168, 328, 200, 280, 168, 312, 496, 272]
[2m[36m(objective pid=12327)[0m Finished model init
Epoch 0:   0%|          | 753/674096 [00:49<12:17:14, 15.22it/s, loss=3.05, v_num=fc77]
Epoch 0:   0%|          | 469/3370440 [00:31<63:08:44, 14.82it/s, loss=3.01, v_num=fff0]
[2m[36m

[2m[36m(objective pid=12327)[0m GPU available: False, used: False
[2m[36m(objective pid=12327)[0m TPU available: False, using: 0 TPU cores
[2m[36m(objective pid=12327)[0m IPU available: False, using: 0 IPUs
[2m[36m(objective pid=12327)[0m HPU available: False, using: 0 HPUs


Epoch 0:   0%|          | 759/674096 [00:49<12:16:01, 15.25it/s, loss=3.04, v_num=fc77]
Epoch 0:   0%|          | 475/3370440 [00:31<62:55:41, 14.88it/s, loss=3.04, v_num=fff0]
Epoch 0:   0%|          | 760/674096 [00:49<12:16:05, 15.25it/s, loss=3.03, v_num=fc77]
Epoch 0:   0%|          | 477/3370440 [00:32<62:52:57, 14.89it/s, loss=3.06, v_num=fff0]
Epoch 0:   0%|          | 762/674096 [00:49<12:15:45, 15.25it/s, loss=3.03, v_num=fc77]
Epoch 0:   0%|          | 479/3370440 [00:32<62:51:30, 14.89it/s, loss=3.06, v_num=fff0]
Epoch 0:   0%|          | 765/674096 [00:50<12:15:03, 15.27it/s, loss=3.03, v_num=fc77]
Epoch 0:   0%|          | 481/3370440 [00:32<62:46:56, 14.91it/s, loss=3.1, v_num=fff0] 
Epoch 0:   0%|          | 767/674096 [00:50<12:14:33, 15.28it/s, loss=3.03, v_num=fc77]
Epoch 0:   0%|          | 483/3370440 [00:32<62:42:33, 14.93it/s, loss=3.08, v_num=fff0]
Epoch 0:   0%|          | 769/674096 [00:50<12:14:00, 15.29it/s, loss=3.03, v_num=fc77]
Epoch 0:   0%|          | 4

[2m[36m(objective pid=12627)[0m GPU available: False, used: False
[2m[36m(objective pid=12627)[0m TPU available: False, using: 0 TPU cores
[2m[36m(objective pid=12627)[0m IPU available: False, using: 0 IPUs
[2m[36m(objective pid=12627)[0m HPU available: False, using: 0 HPUs
[2m[36m(objective pid=12627)[0m   rank_zero_deprecation(
[2m[36m(objective pid=12627)[0m   rank_zero_deprecation("The `on_init_end` callback hook was deprecated in v1.6 and will be removed in v1.8.")
[2m[36m(objective pid=12627)[0m   rank_zero_deprecation(
[2m[36m(objective pid=12627)[0m   rank_zero_deprecation(
[2m[36m(objective pid=12627)[0m   rank_zero_deprecation(
[2m[36m(objective pid=12627)[0m   rank_zero_deprecation(


Epoch 0:   0%|          | 854/674096 [00:56<12:16:07, 15.24it/s, loss=3.05, v_num=fc77]
Epoch 0:   0%|          | 573/3370440 [00:38<62:20:50, 15.01it/s, loss=3.07, v_num=fff0]
Epoch 0:   0%|          | 36/2106520 [00:06<111:29:11,  5.25it/s, loss=3.49, v_num=12d3]
Epoch 0:   0%|          | 855/674096 [00:56<12:16:09, 15.24it/s, loss=3.05, v_num=fc77]
Epoch 0:   0%|          | 575/3370440 [00:38<62:22:55, 15.01it/s, loss=3.07, v_num=fff0]
Epoch 0:   0%|          | 37/2106520 [00:06<109:26:28,  5.35it/s, loss=3.49, v_num=12d3]
Epoch 0:   0%|          | 857/674096 [00:56<12:16:16, 15.24it/s, loss=3.04, v_num=fc77]
Epoch 0:   0%|          | 576/3370440 [00:38<62:23:19, 15.00it/s, loss=3.07, v_num=fff0]
Epoch 0:   0%|          | 39/2106520 [00:07<105:58:29,  5.52it/s, loss=3.52, v_num=12d3]
Epoch 0:   0%|          | 859/674096 [00:56<12:16:17, 15.24it/s, loss=3.04, v_num=fc77]
Epoch 0:   0%|          | 578/3370440 [00:38<62:22:32, 15.01it/s, loss=3.04, v_num=fff0]
Epoch 0:   0%|          |

[2m[36m(objective pid=12627)[0m 
[2m[36m(objective pid=12627)[0m   | Name              | Type             | Params
[2m[36m(objective pid=12627)[0m -------------------------------------------------------
[2m[36m(objective pid=12627)[0m 0 | layer_norm        | LayerNorm        | 420   
[2m[36m(objective pid=12627)[0m 1 | linears           | ModuleList       | 504 K 
[2m[36m(objective pid=12627)[0m 2 | normalize_outputs | Softmax          | 0     
[2m[36m(objective pid=12627)[0m 3 | crossentropy_loss | CrossEntropyLoss | 0     
[2m[36m(objective pid=12627)[0m -------------------------------------------------------
[2m[36m(objective pid=12627)[0m 504 K     Trainable params
[2m[36m(objective pid=12627)[0m 0         Non-trainable params
[2m[36m(objective pid=12627)[0m 504 K     Total params
[2m[36m(objective pid=12627)[0m 2.018     Total estimated model params size (MB)


Epoch 0:   0%|          | 924/674096 [01:01<12:21:19, 15.13it/s, loss=3.04, v_num=fc77]
Epoch 0:   0%|          | 105/2106520 [00:11<65:55:03,  8.88it/s, loss=3.64, v_num=12d3]
Epoch 0:   0%|          | 925/674096 [01:01<12:21:18, 15.13it/s, loss=3.05, v_num=fc77]
Epoch 0:   0%|          | 107/2106520 [00:11<65:27:35,  8.94it/s, loss=3.57, v_num=12d3]
Sanity Checking: 0it [00:00, ?it/s]
Epoch 0:   0%|          | 927/674096 [01:01<12:21:08, 15.14it/s, loss=3.06, v_num=fc77]
Epoch 0:   0%|          | 109/2106520 [00:12<64:48:13,  9.03it/s, loss=3.64, v_num=12d3]


[2m[36m(objective pid=12627)[0m   rank_zero_warn(


Epoch 0:   0%|          | 929/674096 [01:01<12:20:56, 15.14it/s, loss=3.06, v_num=fc77]
Epoch 0:   0%|          | 110/2106520 [00:12<64:33:37,  9.06it/s, loss=3.67, v_num=12d3]
Epoch 0:   0%|          | 930/674096 [01:01<12:21:13, 15.14it/s, loss=3.06, v_num=fc77]
Epoch 0:   0%|          | 112/2106520 [00:12<64:05:25,  9.13it/s, loss=3.66, v_num=12d3]
Epoch 0:   0%|          | 932/674096 [01:01<12:21:12, 15.14it/s, loss=3.04, v_num=fc77]
Epoch 0:   0%|          | 113/2106520 [00:12<63:52:12,  9.16it/s, loss=3.65, v_num=12d3]
Epoch 0:   0%|          | 933/674096 [01:01<12:21:12, 15.14it/s, loss=3.04, v_num=fc77]
Epoch 0:   0%|          | 115/2106520 [00:12<63:30:01,  9.21it/s, loss=3.68, v_num=12d3]
Epoch 0:   0%|          | 935/674096 [01:01<12:21:02, 15.14it/s, loss=3.05, v_num=fc77]
Epoch 0:   0%|          | 640/3370440 [00:43<64:17:10, 14.56it/s, loss=3, v_num=fff0]   
Epoch 0:   0%|          | 117/2106520 [00:12<63:02:07,  9.28it/s, loss=3.59, v_num=12d3]
Epoch 0:   0%|          | 

[2m[36m(objective pid=12627)[0m   rank_zero_warn(


Epoch 0:   0%|          | 940/674096 [01:02<12:22:28, 15.11it/s, loss=3.02, v_num=fc77]
Epoch 0:   0%|          | 645/3370440 [00:44<64:22:35, 14.54it/s, loss=2.95, v_num=fff0]
[2m[36m(objective pid=12838)[0m Test print
[2m[36m(objective pid=12838)[0m 7
[2m[36m(objective pid=12838)[0m [168, 384, 80, 8, 440, 32, 424, 368, 400, 96, 344]
[2m[36m(objective pid=12838)[0m Finished model init
Epoch 0:   0%|          | 941/674096 [01:02<12:22:28, 15.11it/s, loss=3, v_num=fc77]   
Epoch 0:   0%|          | 647/3370440 [00:44<64:22:20, 14.54it/s, loss=2.93, v_num=fff0]
Epoch 0:   0%|          | 122/2106520 [00:13<62:51:44,  9.31it/s, loss=3.64, v_num=12d3]
[2m[36m(objective pid=12838)[0m Started mlflow run
[2m[36m(objective pid=12838)[0m Finished init logger
[2m[36m(objective pid=12838)[0m Finished define callbacks
[2m[36m(objective pid=12838)[0m Finished init hparams kwargs
[2m[36m(objective pid=12838)[0m Finished log hparams mlflow
[2m[36m(objective pid=12838)[0m 

[2m[36m(objective pid=12838)[0m GPU available: False, used: False
[2m[36m(objective pid=12838)[0m TPU available: False, using: 0 TPU cores
[2m[36m(objective pid=12838)[0m IPU available: False, using: 0 IPUs
[2m[36m(objective pid=12838)[0m HPU available: False, using: 0 HPUs


Epoch 0:   0%|          | 945/674096 [01:02<12:22:22, 15.11it/s, loss=3.02, v_num=fc77]
Epoch 0:   0%|          | 649/3370440 [00:44<64:25:07, 14.53it/s, loss=2.91, v_num=fff0]
Epoch 0:   0%|          | 125/2106520 [00:13<62:22:40,  9.38it/s, loss=3.69, v_num=12d3]
Epoch 0:   0%|          | 946/674096 [01:02<12:22:27, 15.11it/s, loss=3.01, v_num=fc77]
Epoch 0:   0%|          | 650/3370440 [00:44<64:25:27, 14.53it/s, loss=2.92, v_num=fff0]
Epoch 0:   0%|          | 126/2106520 [00:13<62:12:02,  9.41it/s, loss=3.74, v_num=12d3]
Epoch 0:   0%|          | 127/2106520 [00:13<62:01:28,  9.43it/s, loss=3.75, v_num=12d3]
Epoch 0:   0%|          | 947/674096 [01:02<12:22:34, 15.11it/s, loss=3.02, v_num=fc77]
Epoch 0:   0%|          | 948/674096 [01:02<12:23:27, 15.09it/s, loss=3.03, v_num=fc77]
Epoch 0:   0%|          | 651/3370440 [00:45<64:43:53, 14.46it/s, loss=2.93, v_num=fff0]
Epoch 0:   0%|          | 128/2106520 [00:13<62:05:00,  9.42it/s, loss=3.71, v_num=12d3]
Epoch 0:   0%|          |

[2m[36m(objective pid=14122)[0m GPU available: False, used: False
[2m[36m(objective pid=14122)[0m TPU available: False, using: 0 TPU cores
[2m[36m(objective pid=14122)[0m IPU available: False, using: 0 IPUs
[2m[36m(objective pid=14122)[0m HPU available: False, using: 0 HPUs


Epoch 0:   0%|          | 1152/674096 [01:21<13:14:46, 14.11it/s, loss=3.06, v_num=fc77]
Epoch 0:   0%|          | 357/2106520 [00:32<53:12:03, 11.00it/s, loss=3.72, v_num=12d3]
Epoch 0:   0%|          | 118/16852120 [00:18<746:56:04,  6.27it/s, loss=3.51, v_num=77d3]
Epoch 0:   0%|          | 1153/674096 [01:21<13:14:49, 14.11it/s, loss=3.07, v_num=fc77]
Epoch 0:   0%|          | 358/2106520 [00:32<53:12:21, 11.00it/s, loss=3.66, v_num=12d3]
Epoch 0:   0%|          | 120/16852120 [00:18<740:37:57,  6.32it/s, loss=3.54, v_num=77d3]
Epoch 0:   0%|          | 1155/674096 [01:21<13:15:01, 14.11it/s, loss=3.07, v_num=fc77]
Epoch 0:   0%|          | 360/2106520 [00:32<53:04:19, 11.02it/s, loss=3.62, v_num=12d3]
Epoch 0:   0%|          | 121/16852120 [00:19<736:46:52,  6.35it/s, loss=3.56, v_num=77d3]
Epoch 0:   0%|          | 1156/674096 [01:21<13:14:55, 14.11it/s, loss=3.05, v_num=fc77]
Epoch 0:   0%|          | 850/3370440 [01:04<70:34:44, 13.26it/s, loss=3.09, v_num=fff0]
Epoch 0:   0%| 

[2m[36m(objective pid=14360)[0m GPU available: False, used: False
[2m[36m(objective pid=14360)[0m TPU available: False, using: 0 TPU cores
[2m[36m(objective pid=14360)[0m IPU available: False, using: 0 IPUs
[2m[36m(objective pid=14360)[0m HPU available: False, using: 0 HPUs


Epoch 0:   0%|          | 1228/674096 [01:28<13:23:58, 13.95it/s, loss=3.05, v_num=fc77]
Epoch 0:   0%|          | 406/2106520 [00:38<55:55:17, 10.46it/s, loss=3.51, v_num=12d3]
Epoch 0:   0%|          | 190/16852120 [00:25<621:33:53,  7.53it/s, loss=3.23, v_num=77d3]
Epoch 0:   0%|          | 407/2106520 [00:38<55:52:36, 10.47it/s, loss=3.52, v_num=12d3]
Epoch 0:   0%|          | 1230/674096 [01:28<13:23:46, 13.95it/s, loss=3.06, v_num=fc77]
Epoch 0:   0%|          | 408/2106520 [00:38<55:49:16, 10.48it/s, loss=3.51, v_num=12d3]
Epoch 0:   0%|          | 192/16852120 [00:25<618:10:14,  7.57it/s, loss=3.24, v_num=77d3]
Epoch 0:   0%|          | 1231/674096 [01:28<13:23:41, 13.95it/s, loss=3.05, v_num=fc77]
Epoch 0:   0%|          | 410/2106520 [00:39<55:43:37, 10.50it/s, loss=3.47, v_num=12d3]
Epoch 0:   0%|          | 194/16852120 [00:25<614:41:31,  7.62it/s, loss=3.25, v_num=77d3]
Epoch 0:   0%|          | 1233/674096 [01:28<13:23:34, 13.96it/s, loss=3.06, v_num=fc77]
Epoch 0:   0%| 

[2m[36m(objective pid=15309)[0m GPU available: False, used: False
[2m[36m(objective pid=15309)[0m TPU available: False, using: 0 TPU cores
[2m[36m(objective pid=15309)[0m IPU available: False, using: 0 IPUs
[2m[36m(objective pid=15309)[0m HPU available: False, using: 0 HPUs


Epoch 0:   0%|          | 1457/674096 [01:54<14:38:46, 12.76it/s, loss=3.01, v_num=fc77]
Epoch 0:   0%|          | 1197/3370440 [01:36<75:23:05, 12.41it/s, loss=3.1, v_num=fff0] 
Epoch 0:   0%|          | 700/2106520 [01:05<54:22:49, 10.76it/s, loss=3.46, v_num=12d3]
Epoch 0:   0%|          | 504/16852120 [00:51<477:25:23,  9.80it/s, loss=3.42, v_num=77d3]
Epoch 0:   0%|          | 1459/674096 [01:54<14:38:39, 12.76it/s, loss=3, v_num=fc77]   
Epoch 0:   0%|          | 1199/3370440 [01:36<75:21:45, 12.42it/s, loss=3.11, v_num=fff0]
Epoch 0:   0%|          | 1460/674096 [01:54<14:38:36, 12.76it/s, loss=3, v_num=fc77]
Epoch 0:   0%|          | 1200/3370440 [01:36<75:21:35, 12.42it/s, loss=3.1, v_num=fff0] 
Epoch 0:   0%|          | 701/2106520 [01:05<54:28:40, 10.74it/s, loss=3.5, v_num=12d3] 
Epoch 0:   0%|          | 1462/674096 [01:54<14:38:24, 12.76it/s, loss=3.02, v_num=fc77]
Epoch 0:   0%|          | 702/2106520 [01:05<54:27:16, 10.74it/s, loss=3.52, v_num=12d3]
Epoch 0:   0%|     

ensure mlflow

run study

eval