In [1]:
# chose the current file directory as the working directory
import os
os.chdir("/teamspace/studios/this_studio/rubikscubesolver")

In [2]:
from tqdm import tqdm
import pickle

import wandb  # for logging
import time
from dataclasses import dataclass

import jax
import jax.numpy as jnp
import flax.nnx as nnx

import rubiktransformer.dataset as dataset
from rubiktransformer.trainer import reshape_sample

from rubiktransformer.trainer_online import init_model_optimizer, init_buffer, train_step_transformer_rf, training_loop
from rubiktransformer.online_training_utils import run_n_steps, reshape_diffusion_setup

cuda_plugin_extension is not found.


In [3]:
@dataclass
class Config:
    """Configuration class"""

    jax_key: jnp.ndarray = jax.random.PRNGKey(49)
    rngs = nnx.Rngs(48)
    batch_size: int = 128
    lr_1: float = 4e-4
    lr_2: float = 4e-4
    nb_games: int = 128 * 100
    len_seq: int = 32
    nb_step: int = 1000000
    log_every_step: int = 10
    log_eval_every_step: int = 10
    log_policy_reward_every_step: int = 10
    add_data_every_step: int = 500

    save_model_every_step: int = 2000


config = Config()

# init wandb config
user = "forbu14"
project = "RubikTransformer"
display_name = "experiment_" + time.strftime("%Y%m%d-%H%M%S")

wandb.init(entity=user, project=project, name=display_name)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mforbu14[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [4]:

(
    optimizer_diffuser,
    optimizer_inverse,
    metrics_train,
    metrics_eval,
    metrics_inverse,
    transformer,
    inverse_rl_model,
) = init_model_optimizer(config)

env, buffer, buffer_eval, buffer_list, buffer_list_eval, jit_step = init_buffer(
    config
)

vmap_reset = jax.vmap(jax.jit(env.reset))
vmap_step = jax.vmap(run_n_steps, in_axes=(0, 0, None))

##### TRAINING #####
key, subkey = jax.random.split(config.jax_key)
config.jax_key = key


In [5]:

buffer, buffer_list = dataset.fast_gathering_data_diffusion(
    env,
    vmap_reset,
    vmap_step,
    config.nb_games * 10,  # old is int(config.nb_games * 10.0),
    config.len_seq,
    buffer,
    buffer_list,
    subkey,
)


In [6]:
# load weight from world model transformer:
# import pickle

# filename = "state_ddt_model_improved_v0.pickle"

# with open(filename, "rb") as input_file:
#     state = pickle.load(input_file)

# nnx.update(transformer, state)

# # load weight from world model transformer:
# import pickle

# filename = "state_inverse_rl_model_improved_v0.pickle"

# with open(filename, "rb") as input_file:
#     state = pickle.load(input_file)

# nnx.update(inverse_rl_model, state)

In [7]:
sample = buffer.sample(buffer_list, subkey)
sample = reshape_diffusion_setup(sample)


In [8]:
training_loop(config,
    env,
    buffer,
    buffer_eval,
    buffer_list,
    buffer_list_eval,
    vmap_reset,
    vmap_step,
    transformer,
    optimizer_diffuser,
    metrics_train,
    metrics_eval,
    inverse_rl_model,
    optimizer_inverse,
    metrics_inverse,)

{'loss': Array(0.17635848, dtype=float32), 'loss_cross_entropy': Array(0.16696732, dtype=float32)}
{'loss_inverse': Array(8.644339e-07, dtype=float32)}


  6%|▋         | 64205/1000000 [3:02:17<37:03:15,  7.02it/s]

  6%|▋         | 64209/1000000 [3:02:17<26:40:55,  9.74it/s]

{'loss': Array(0.1592211, dtype=float32), 'loss_cross_entropy': Array(0.14973159, dtype=float32)}
{'loss_inverse': Array(3.7941223e-08, dtype=float32)}


  6%|▋         | 64219/1000000 [3:02:19<32:24:36,  8.02it/s]

{'loss': Array(0.1394069, dtype=float32), 'loss_cross_entropy': Array(0.13089067, dtype=float32)}
{'loss_inverse': Array(1.9350416e-06, dtype=float32)}


  6%|▋         | 64229/1000000 [3:02:20<27:19:28,  9.51it/s]

{'loss': Array(0.14728451, dtype=float32), 'loss_cross_entropy': Array(0.1383798, dtype=float32)}
{'loss_inverse': Array(0.00011489, dtype=float32)}


  6%|▋         | 64239/1000000 [3:02:22<38:32:25,  6.74it/s]

{'loss': Array(0.15798315, dtype=float32), 'loss_cross_entropy': Array(0.14811371, dtype=float32)}
{'loss_inverse': Array(1.7343727e-06, dtype=float32)}


  6%|▋         | 64249/1000000 [3:02:23<27:24:35,  9.48it/s]

{'loss': Array(0.16488062, dtype=float32), 'loss_cross_entropy': Array(0.15559538, dtype=float32)}
{'loss_inverse': Array(2.9616804e-05, dtype=float32)}


  6%|▋         | 64259/1000000 [3:02:25<26:04:12,  9.97it/s]

{'loss': Array(0.16273995, dtype=float32), 'loss_cross_entropy': Array(0.15280798, dtype=float32)}
{'loss_inverse': Array(7.090864e-06, dtype=float32)}


  6%|▋         | 64269/1000000 [3:02:27<30:56:07,  8.40it/s]

{'loss': Array(0.15709282, dtype=float32), 'loss_cross_entropy': Array(0.14738676, dtype=float32)}
{'loss_inverse': Array(2.6361857e-07, dtype=float32)}


  6%|▋         | 64279/1000000 [3:02:28<26:06:56,  9.95it/s]

{'loss': Array(0.16574167, dtype=float32), 'loss_cross_entropy': Array(0.15632713, dtype=float32)}
{'loss_inverse': Array(3.6505867e-07, dtype=float32)}


  6%|▋         | 64289/1000000 [3:02:30<30:19:18,  8.57it/s]

{'loss': Array(0.16021581, dtype=float32), 'loss_cross_entropy': Array(0.15082163, dtype=float32)}
{'loss_inverse': Array(4.3449977e-06, dtype=float32)}


  6%|▋         | 64299/1000000 [3:02:31<25:59:51, 10.00it/s]

{'loss': Array(0.1361056, dtype=float32), 'loss_cross_entropy': Array(0.1275112, dtype=float32)}
{'loss_inverse': Array(4.758128e-06, dtype=float32)}


  6%|▋         | 64309/1000000 [3:02:33<31:27:46,  8.26it/s]

{'loss': Array(0.15941003, dtype=float32), 'loss_cross_entropy': Array(0.15027803, dtype=float32)}
{'loss_inverse': Array(0.00012564, dtype=float32)}


  6%|▋         | 64319/1000000 [3:02:34<26:36:13,  9.77it/s]

{'loss': Array(0.15022404, dtype=float32), 'loss_cross_entropy': Array(0.14100856, dtype=float32)}
{'loss_inverse': Array(6.845929e-06, dtype=float32)}


  6%|▋         | 64329/1000000 [3:02:36<31:26:34,  8.27it/s]

{'loss': Array(0.1501526, dtype=float32), 'loss_cross_entropy': Array(0.14174055, dtype=float32)}
{'loss_inverse': Array(6.771378e-08, dtype=float32)}


  6%|▋         | 64339/1000000 [3:02:38<26:47:32,  9.70it/s]

{'loss': Array(0.17018978, dtype=float32), 'loss_cross_entropy': Array(0.1600758, dtype=float32)}
{'loss_inverse': Array(3.0957625e-07, dtype=float32)}


  6%|▋         | 64349/1000000 [3:02:39<25:41:23, 10.12it/s]

{'loss': Array(0.13698874, dtype=float32), 'loss_cross_entropy': Array(0.12815897, dtype=float32)}
{'loss_inverse': Array(3.0137105e-07, dtype=float32)}


  6%|▋         | 64359/1000000 [3:02:41<28:52:38,  9.00it/s]

{'loss': Array(0.13699324, dtype=float32), 'loss_cross_entropy': Array(0.12816973, dtype=float32)}
{'loss_inverse': Array(1.7171787e-07, dtype=float32)}


  6%|▋         | 64369/1000000 [3:02:42<26:03:26,  9.97it/s]

{'loss': Array(0.16460218, dtype=float32), 'loss_cross_entropy': Array(0.15493064, dtype=float32)}
{'loss_inverse': Array(8.223332e-06, dtype=float32)}


  6%|▋         | 64379/1000000 [3:02:44<30:32:57,  8.51it/s]

{'loss': Array(0.17333648, dtype=float32), 'loss_cross_entropy': Array(0.16364934, dtype=float32)}
{'loss_inverse': Array(2.2422635e-06, dtype=float32)}


  6%|▋         | 64389/1000000 [3:02:45<27:06:52,  9.58it/s]

{'loss': Array(0.16786222, dtype=float32), 'loss_cross_entropy': Array(0.15748902, dtype=float32)}
{'loss_inverse': Array(7.774494e-06, dtype=float32)}


  6%|▋         | 64399/1000000 [3:02:47<31:55:39,  8.14it/s]

{'loss': Array(0.15373772, dtype=float32), 'loss_cross_entropy': Array(0.14517191, dtype=float32)}
{'loss_inverse': Array(3.2928685e-07, dtype=float32)}


  6%|▋         | 64409/1000000 [3:02:49<26:36:20,  9.77it/s]

{'loss': Array(0.15341449, dtype=float32), 'loss_cross_entropy': Array(0.14412092, dtype=float32)}
{'loss_inverse': Array(1.8950273e-07, dtype=float32)}


  6%|▋         | 64419/1000000 [3:02:50<25:45:20, 10.09it/s]

{'loss': Array(0.14122555, dtype=float32), 'loss_cross_entropy': Array(0.13265048, dtype=float32)}
{'loss_inverse': Array(6.566838e-08, dtype=float32)}


  6%|▋         | 64429/1000000 [3:02:52<29:39:17,  8.76it/s]

{'loss': Array(0.15750487, dtype=float32), 'loss_cross_entropy': Array(0.14833069, dtype=float32)}
{'loss_inverse': Array(0.00026832, dtype=float32)}


  6%|▋         | 64439/1000000 [3:02:53<25:47:16, 10.08it/s]

{'loss': Array(0.13844672, dtype=float32), 'loss_cross_entropy': Array(0.1296768, dtype=float32)}
{'loss_inverse': Array(6.2657864e-07, dtype=float32)}


  6%|▋         | 64449/1000000 [3:02:55<34:40:58,  7.49it/s]

{'loss': Array(0.15013905, dtype=float32), 'loss_cross_entropy': Array(0.14093928, dtype=float32)}
{'loss_inverse': Array(1.6774259e-06, dtype=float32)}


  6%|▋         | 64459/1000000 [3:02:56<26:51:32,  9.68it/s]

{'loss': Array(0.15312783, dtype=float32), 'loss_cross_entropy': Array(0.14389497, dtype=float32)}
{'loss_inverse': Array(5.3248572e-05, dtype=float32)}


  6%|▋         | 64469/1000000 [3:02:58<39:36:11,  6.56it/s]

{'loss': Array(0.16066681, dtype=float32), 'loss_cross_entropy': Array(0.15097788, dtype=float32)}
{'loss_inverse': Array(2.724369e-07, dtype=float32)}


  6%|▋         | 64479/1000000 [3:02:59<27:37:19,  9.41it/s]

{'loss': Array(0.15578225, dtype=float32), 'loss_cross_entropy': Array(0.14649743, dtype=float32)}
{'loss_inverse': Array(9.115228e-06, dtype=float32)}


  6%|▋         | 64489/1000000 [3:03:01<39:26:52,  6.59it/s]

{'loss': Array(0.16645236, dtype=float32), 'loss_cross_entropy': Array(0.15684952, dtype=float32)}
{'loss_inverse': Array(4.471934e-06, dtype=float32)}


  6%|▋         | 64499/1000000 [3:03:03<29:02:43,  8.95it/s]

{'loss': Array(0.15719299, dtype=float32), 'loss_cross_entropy': Array(0.14762671, dtype=float32)}
{'loss_inverse': Array(6.9617624e-07, dtype=float32)}


  6%|▋         | 64509/1000000 [3:03:11<89:04:16,  2.92it/s] 

{'loss': Array(0.15574364, dtype=float32), 'loss_cross_entropy': Array(0.14652853, dtype=float32)}
{'loss_inverse': Array(7.1103773e-06, dtype=float32)}


  6%|▋         | 64519/1000000 [3:03:13<45:38:34,  5.69it/s]

{'loss': Array(0.15525584, dtype=float32), 'loss_cross_entropy': Array(0.14503348, dtype=float32)}
{'loss_inverse': Array(3.6333515e-07, dtype=float32)}


  6%|▋         | 64529/1000000 [3:03:14<28:53:29,  8.99it/s]

{'loss': Array(0.17510049, dtype=float32), 'loss_cross_entropy': Array(0.16481774, dtype=float32)}
{'loss_inverse': Array(2.7969025e-07, dtype=float32)}


  6%|▋         | 64539/1000000 [3:03:16<26:42:05,  9.73it/s]

{'loss': Array(0.1605913, dtype=float32), 'loss_cross_entropy': Array(0.15110542, dtype=float32)}
{'loss_inverse': Array(3.7123797e-07, dtype=float32)}


  6%|▋         | 64549/1000000 [3:03:17<28:56:17,  8.98it/s]

{'loss': Array(0.16328089, dtype=float32), 'loss_cross_entropy': Array(0.15313052, dtype=float32)}
{'loss_inverse': Array(1.881754e-07, dtype=float32)}


  6%|▋         | 64559/1000000 [3:03:19<26:47:38,  9.70it/s]

{'loss': Array(0.16279064, dtype=float32), 'loss_cross_entropy': Array(0.15275908, dtype=float32)}
{'loss_inverse': Array(6.6071675e-07, dtype=float32)}


  6%|▋         | 64569/1000000 [3:03:21<29:19:21,  8.86it/s]

{'loss': Array(0.16360371, dtype=float32), 'loss_cross_entropy': Array(0.15479288, dtype=float32)}
{'loss_inverse': Array(5.434195e-06, dtype=float32)}


  6%|▋         | 64579/1000000 [3:03:22<26:41:28,  9.73it/s]

{'loss': Array(0.15538001, dtype=float32), 'loss_cross_entropy': Array(0.1457671, dtype=float32)}
{'loss_inverse': Array(2.1230646e-05, dtype=float32)}


  6%|▋         | 64589/1000000 [3:03:24<29:10:09,  8.91it/s]

{'loss': Array(0.15298508, dtype=float32), 'loss_cross_entropy': Array(0.14344616, dtype=float32)}
{'loss_inverse': Array(1.1657462e-06, dtype=float32)}


  6%|▋         | 64599/1000000 [3:03:25<26:01:13,  9.99it/s]

{'loss': Array(0.16079892, dtype=float32), 'loss_cross_entropy': Array(0.15156767, dtype=float32)}
{'loss_inverse': Array(1.8276445e-06, dtype=float32)}


  6%|▋         | 64609/1000000 [3:03:27<29:51:40,  8.70it/s]

{'loss': Array(0.14693363, dtype=float32), 'loss_cross_entropy': Array(0.13767046, dtype=float32)}
{'loss_inverse': Array(7.989058e-05, dtype=float32)}


  6%|▋         | 64619/1000000 [3:03:28<26:23:43,  9.84it/s]

{'loss': Array(0.16021633, dtype=float32), 'loss_cross_entropy': Array(0.15071207, dtype=float32)}
{'loss_inverse': Array(4.3188397e-06, dtype=float32)}


  6%|▋         | 64629/1000000 [3:03:30<32:20:57,  8.03it/s]

{'loss': Array(0.14807443, dtype=float32), 'loss_cross_entropy': Array(0.13902687, dtype=float32)}
{'loss_inverse': Array(1.9552058e-06, dtype=float32)}


  6%|▋         | 64639/1000000 [3:03:32<26:32:53,  9.79it/s]

{'loss': Array(0.15388678, dtype=float32), 'loss_cross_entropy': Array(0.14471667, dtype=float32)}
{'loss_inverse': Array(2.4924273e-06, dtype=float32)}


  6%|▋         | 64649/1000000 [3:03:33<34:39:06,  7.50it/s]

{'loss': Array(0.16221456, dtype=float32), 'loss_cross_entropy': Array(0.15294254, dtype=float32)}
{'loss_inverse': Array(7.1844947e-06, dtype=float32)}


  6%|▋         | 64659/1000000 [3:03:35<27:20:14,  9.50it/s]

{'loss': Array(0.14910226, dtype=float32), 'loss_cross_entropy': Array(0.13992302, dtype=float32)}
{'loss_inverse': Array(7.166617e-05, dtype=float32)}


  6%|▋         | 64669/1000000 [3:03:36<25:22:14, 10.24it/s]

{'loss': Array(0.1530041, dtype=float32), 'loss_cross_entropy': Array(0.14360829, dtype=float32)}
{'loss_inverse': Array(0.00019769, dtype=float32)}


  6%|▋         | 64679/1000000 [3:03:38<30:05:47,  8.63it/s]

{'loss': Array(0.14893149, dtype=float32), 'loss_cross_entropy': Array(0.14045529, dtype=float32)}
{'loss_inverse': Array(4.754933e-06, dtype=float32)}


  6%|▋         | 64689/1000000 [3:03:39<25:59:31, 10.00it/s]

{'loss': Array(0.15412205, dtype=float32), 'loss_cross_entropy': Array(0.14485428, dtype=float32)}
{'loss_inverse': Array(1.3391619e-06, dtype=float32)}


  6%|▋         | 64699/1000000 [3:03:41<34:29:19,  7.53it/s]

{'loss': Array(0.15114336, dtype=float32), 'loss_cross_entropy': Array(0.14203575, dtype=float32)}
{'loss_inverse': Array(1.903631e-05, dtype=float32)}


  6%|▋         | 64709/1000000 [3:03:42<27:03:11,  9.60it/s]

{'loss': Array(0.17750601, dtype=float32), 'loss_cross_entropy': Array(0.16714895, dtype=float32)}
{'loss_inverse': Array(6.632995e-05, dtype=float32)}


  6%|▋         | 64719/1000000 [3:03:44<35:06:48,  7.40it/s]

{'loss': Array(0.14225708, dtype=float32), 'loss_cross_entropy': Array(0.13407065, dtype=float32)}
{'loss_inverse': Array(8.879735e-06, dtype=float32)}


  6%|▋         | 64729/1000000 [3:03:46<27:14:45,  9.54it/s]

{'loss': Array(0.14871088, dtype=float32), 'loss_cross_entropy': Array(0.14028847, dtype=float32)}
{'loss_inverse': Array(5.502936e-05, dtype=float32)}


  6%|▋         | 64739/1000000 [3:03:47<25:24:06, 10.23it/s]

{'loss': Array(0.14296733, dtype=float32), 'loss_cross_entropy': Array(0.13442628, dtype=float32)}
{'loss_inverse': Array(1.2097174e-06, dtype=float32)}


  6%|▋         | 64749/1000000 [3:03:49<28:25:57,  9.14it/s]

{'loss': Array(0.1413766, dtype=float32), 'loss_cross_entropy': Array(0.13269699, dtype=float32)}
{'loss_inverse': Array(2.4276707e-05, dtype=float32)}


  6%|▋         | 64759/1000000 [3:03:50<26:12:34,  9.91it/s]

{'loss': Array(0.15901868, dtype=float32), 'loss_cross_entropy': Array(0.14979601, dtype=float32)}
{'loss_inverse': Array(3.1866117e-07, dtype=float32)}


  6%|▋         | 64769/1000000 [3:03:52<39:26:50,  6.59it/s]

{'loss': Array(0.14160411, dtype=float32), 'loss_cross_entropy': Array(0.13320471, dtype=float32)}
{'loss_inverse': Array(1.8219048e-07, dtype=float32)}


  6%|▋         | 64779/1000000 [3:03:53<28:12:55,  9.21it/s]

{'loss': Array(0.1500895, dtype=float32), 'loss_cross_entropy': Array(0.14097925, dtype=float32)}
{'loss_inverse': Array(2.9167602e-05, dtype=float32)}


  6%|▋         | 64789/1000000 [3:03:55<25:36:19, 10.15it/s]

{'loss': Array(0.16809206, dtype=float32), 'loss_cross_entropy': Array(0.15857549, dtype=float32)}
{'loss_inverse': Array(1.6849259e-06, dtype=float32)}


  6%|▋         | 64799/1000000 [3:03:57<29:50:49,  8.70it/s]

{'loss': Array(0.14269012, dtype=float32), 'loss_cross_entropy': Array(0.13406734, dtype=float32)}
{'loss_inverse': Array(3.7262285e-05, dtype=float32)}


  6%|▋         | 64809/1000000 [3:03:58<26:24:42,  9.84it/s]

{'loss': Array(0.140224, dtype=float32), 'loss_cross_entropy': Array(0.1317847, dtype=float32)}
{'loss_inverse': Array(3.8361154e-07, dtype=float32)}


  6%|▋         | 64819/1000000 [3:04:00<29:53:50,  8.69it/s]

{'loss': Array(0.14646605, dtype=float32), 'loss_cross_entropy': Array(0.137155, dtype=float32)}
{'loss_inverse': Array(2.460433e-06, dtype=float32)}


  6%|▋         | 64829/1000000 [3:04:01<25:48:50, 10.06it/s]

{'loss': Array(0.14331144, dtype=float32), 'loss_cross_entropy': Array(0.13453804, dtype=float32)}
{'loss_inverse': Array(1.1728146e-06, dtype=float32)}


  6%|▋         | 64839/1000000 [3:04:03<32:05:16,  8.10it/s]

{'loss': Array(0.1456271, dtype=float32), 'loss_cross_entropy': Array(0.13713129, dtype=float32)}
{'loss_inverse': Array(0.00010779, dtype=float32)}


  6%|▋         | 64849/1000000 [3:04:04<27:22:32,  9.49it/s]

{'loss': Array(0.13892041, dtype=float32), 'loss_cross_entropy': Array(0.13034344, dtype=float32)}
{'loss_inverse': Array(4.641019e-06, dtype=float32)}


  6%|▋         | 64859/1000000 [3:04:06<26:07:44,  9.94it/s]

{'loss': Array(0.1581584, dtype=float32), 'loss_cross_entropy': Array(0.14885285, dtype=float32)}
{'loss_inverse': Array(4.383284e-06, dtype=float32)}


  6%|▋         | 64869/1000000 [3:04:08<29:55:56,  8.68it/s]

{'loss': Array(0.14727461, dtype=float32), 'loss_cross_entropy': Array(0.1383674, dtype=float32)}
{'loss_inverse': Array(8.787575e-05, dtype=float32)}


  6%|▋         | 64879/1000000 [3:04:09<26:16:40,  9.88it/s]

{'loss': Array(0.1580622, dtype=float32), 'loss_cross_entropy': Array(0.14899826, dtype=float32)}
{'loss_inverse': Array(7.426653e-06, dtype=float32)}


  6%|▋         | 64889/1000000 [3:04:11<35:07:28,  7.40it/s]

{'loss': Array(0.15690283, dtype=float32), 'loss_cross_entropy': Array(0.14729649, dtype=float32)}
{'loss_inverse': Array(0.00023917, dtype=float32)}


  6%|▋         | 64899/1000000 [3:04:12<26:50:48,  9.68it/s]

{'loss': Array(0.14201207, dtype=float32), 'loss_cross_entropy': Array(0.13304137, dtype=float32)}
{'loss_inverse': Array(0.00011416, dtype=float32)}


  6%|▋         | 64909/1000000 [3:04:14<38:27:57,  6.75it/s]

{'loss': Array(0.15047091, dtype=float32), 'loss_cross_entropy': Array(0.14129119, dtype=float32)}
{'loss_inverse': Array(1.7677263e-05, dtype=float32)}


  6%|▋         | 64919/1000000 [3:04:15<27:37:45,  9.40it/s]

{'loss': Array(0.14237121, dtype=float32), 'loss_cross_entropy': Array(0.13356079, dtype=float32)}
{'loss_inverse': Array(1.6523607e-05, dtype=float32)}


  6%|▋         | 64929/1000000 [3:04:17<39:02:38,  6.65it/s]

{'loss': Array(0.14168736, dtype=float32), 'loss_cross_entropy': Array(0.13242877, dtype=float32)}
{'loss_inverse': Array(7.307044e-07, dtype=float32)}


  6%|▋         | 64939/1000000 [3:04:19<27:43:04,  9.37it/s]

{'loss': Array(0.14628457, dtype=float32), 'loss_cross_entropy': Array(0.13785528, dtype=float32)}
{'loss_inverse': Array(1.2794296e-05, dtype=float32)}


  6%|▋         | 64949/1000000 [3:04:20<26:07:08,  9.94it/s]

{'loss': Array(0.14895162, dtype=float32), 'loss_cross_entropy': Array(0.1401282, dtype=float32)}
{'loss_inverse': Array(2.7007045e-05, dtype=float32)}


  6%|▋         | 64959/1000000 [3:04:22<35:00:28,  7.42it/s]

{'loss': Array(0.13655645, dtype=float32), 'loss_cross_entropy': Array(0.12882872, dtype=float32)}
{'loss_inverse': Array(1.030621e-06, dtype=float32)}


  6%|▋         | 64969/1000000 [3:04:23<27:05:47,  9.59it/s]

{'loss': Array(0.13976185, dtype=float32), 'loss_cross_entropy': Array(0.13098867, dtype=float32)}
{'loss_inverse': Array(2.5177421e-06, dtype=float32)}


  6%|▋         | 64979/1000000 [3:04:25<38:23:22,  6.77it/s]

{'loss': Array(0.15694024, dtype=float32), 'loss_cross_entropy': Array(0.14751923, dtype=float32)}
{'loss_inverse': Array(2.1968362e-05, dtype=float32)}


  6%|▋         | 64989/1000000 [3:04:26<27:23:31,  9.48it/s]

{'loss': Array(0.16610955, dtype=float32), 'loss_cross_entropy': Array(0.1569721, dtype=float32)}
{'loss_inverse': Array(0.00015473, dtype=float32)}


  6%|▋         | 64999/1000000 [3:04:28<25:58:38, 10.00it/s]

{'loss': Array(0.15671284, dtype=float32), 'loss_cross_entropy': Array(0.14803733, dtype=float32)}
{'loss_inverse': Array(7.525901e-05, dtype=float32)}


  7%|▋         | 65009/1000000 [3:04:36<89:46:44,  2.89it/s] 

{'loss': Array(0.140622, dtype=float32), 'loss_cross_entropy': Array(0.132417, dtype=float32)}
{'loss_inverse': Array(1.0122719e-05, dtype=float32)}


  7%|▋         | 65019/1000000 [3:04:37<35:57:11,  7.22it/s]

{'loss': Array(0.15731333, dtype=float32), 'loss_cross_entropy': Array(0.1483035, dtype=float32)}
{'loss_inverse': Array(0.00037641, dtype=float32)}


  7%|▋         | 65029/1000000 [3:04:39<30:18:14,  8.57it/s]

{'loss': Array(0.16280986, dtype=float32), 'loss_cross_entropy': Array(0.15339406, dtype=float32)}
{'loss_inverse': Array(8.408554e-06, dtype=float32)}


  7%|▋         | 65039/1000000 [3:04:41<26:26:40,  9.82it/s]

{'loss': Array(0.13703233, dtype=float32), 'loss_cross_entropy': Array(0.12839325, dtype=float32)}
{'loss_inverse': Array(0.00014688, dtype=float32)}


  7%|▋         | 65049/1000000 [3:04:42<29:39:54,  8.75it/s]

{'loss': Array(0.15207812, dtype=float32), 'loss_cross_entropy': Array(0.14283976, dtype=float32)}
{'loss_inverse': Array(0.00013932, dtype=float32)}


  7%|▋         | 65059/1000000 [3:04:44<26:20:26,  9.86it/s]

{'loss': Array(0.14453472, dtype=float32), 'loss_cross_entropy': Array(0.1356166, dtype=float32)}
{'loss_inverse': Array(1.2096369e-05, dtype=float32)}


  7%|▋         | 65069/1000000 [3:04:45<34:09:39,  7.60it/s]

{'loss': Array(0.15190384, dtype=float32), 'loss_cross_entropy': Array(0.14262334, dtype=float32)}
{'loss_inverse': Array(1.7720507e-05, dtype=float32)}


  7%|▋         | 65079/1000000 [3:04:47<27:05:41,  9.58it/s]

{'loss': Array(0.16021, dtype=float32), 'loss_cross_entropy': Array(0.15127908, dtype=float32)}
{'loss_inverse': Array(2.4059398e-06, dtype=float32)}


  7%|▋         | 65089/1000000 [3:04:49<34:53:29,  7.44it/s]

{'loss': Array(0.15631682, dtype=float32), 'loss_cross_entropy': Array(0.1471604, dtype=float32)}
{'loss_inverse': Array(9.934235e-05, dtype=float32)}


  7%|▋         | 65099/1000000 [3:04:50<26:56:02,  9.64it/s]

{'loss': Array(0.15924676, dtype=float32), 'loss_cross_entropy': Array(0.14946026, dtype=float32)}
{'loss_inverse': Array(4.2982992e-06, dtype=float32)}


  7%|▋         | 65109/1000000 [3:04:52<38:15:35,  6.79it/s]

{'loss': Array(0.1442823, dtype=float32), 'loss_cross_entropy': Array(0.13571039, dtype=float32)}
{'loss_inverse': Array(4.5013963e-05, dtype=float32)}


  7%|▋         | 65119/1000000 [3:04:53<28:10:11,  9.22it/s]

{'loss': Array(0.16189618, dtype=float32), 'loss_cross_entropy': Array(0.15300053, dtype=float32)}
{'loss_inverse': Array(1.788815e-06, dtype=float32)}


  7%|▋         | 65129/1000000 [3:04:55<26:08:31,  9.93it/s]

{'loss': Array(0.15203395, dtype=float32), 'loss_cross_entropy': Array(0.14290138, dtype=float32)}
{'loss_inverse': Array(3.5864614e-06, dtype=float32)}


  7%|▋         | 65139/1000000 [3:04:56<31:49:54,  8.16it/s]

{'loss': Array(0.1451911, dtype=float32), 'loss_cross_entropy': Array(0.13667642, dtype=float32)}
{'loss_inverse': Array(9.19931e-05, dtype=float32)}


  7%|▋         | 65149/1000000 [3:04:58<26:19:48,  9.86it/s]

{'loss': Array(0.15379255, dtype=float32), 'loss_cross_entropy': Array(0.14427832, dtype=float32)}
{'loss_inverse': Array(3.6375604e-05, dtype=float32)}


  7%|▋         | 65159/1000000 [3:05:00<38:00:17,  6.83it/s]

{'loss': Array(0.16550905, dtype=float32), 'loss_cross_entropy': Array(0.15606129, dtype=float32)}
{'loss_inverse': Array(1.5659169e-05, dtype=float32)}


  7%|▋         | 65169/1000000 [3:05:01<27:20:47,  9.50it/s]

{'loss': Array(0.15023473, dtype=float32), 'loss_cross_entropy': Array(0.14123519, dtype=float32)}
{'loss_inverse': Array(1.37726665e-05, dtype=float32)}


  7%|▋         | 65179/1000000 [3:05:02<25:45:03, 10.08it/s]

{'loss': Array(0.14510813, dtype=float32), 'loss_cross_entropy': Array(0.1366755, dtype=float32)}
{'loss_inverse': Array(2.757697e-07, dtype=float32)}


  7%|▋         | 65189/1000000 [3:05:04<29:34:09,  8.78it/s]

{'loss': Array(0.13369095, dtype=float32), 'loss_cross_entropy': Array(0.12586479, dtype=float32)}
{'loss_inverse': Array(7.4860795e-06, dtype=float32)}


  7%|▋         | 65199/1000000 [3:05:06<26:31:53,  9.79it/s]

{'loss': Array(0.13426603, dtype=float32), 'loss_cross_entropy': Array(0.12609994, dtype=float32)}
{'loss_inverse': Array(2.1962067e-05, dtype=float32)}


  7%|▋         | 65209/1000000 [3:05:07<29:42:12,  8.74it/s]

{'loss': Array(0.14674288, dtype=float32), 'loss_cross_entropy': Array(0.13761888, dtype=float32)}
{'loss_inverse': Array(7.092649e-05, dtype=float32)}


  7%|▋         | 65219/1000000 [3:05:09<26:29:19,  9.80it/s]

{'loss': Array(0.15640429, dtype=float32), 'loss_cross_entropy': Array(0.14712182, dtype=float32)}
{'loss_inverse': Array(2.821353e-06, dtype=float32)}


  7%|▋         | 65229/1000000 [3:05:10<31:49:45,  8.16it/s]

{'loss': Array(0.16802146, dtype=float32), 'loss_cross_entropy': Array(0.15859911, dtype=float32)}
{'loss_inverse': Array(1.1124979e-05, dtype=float32)}


  7%|▋         | 65239/1000000 [3:05:12<26:36:17,  9.76it/s]

{'loss': Array(0.15576132, dtype=float32), 'loss_cross_entropy': Array(0.14688487, dtype=float32)}
{'loss_inverse': Array(0.00012069, dtype=float32)}


  7%|▋         | 65249/1000000 [3:05:14<32:06:51,  8.09it/s]

{'loss': Array(0.14026636, dtype=float32), 'loss_cross_entropy': Array(0.13178967, dtype=float32)}
{'loss_inverse': Array(0.00033048, dtype=float32)}


  7%|▋         | 65259/1000000 [3:05:15<26:33:16,  9.78it/s]

{'loss': Array(0.15772472, dtype=float32), 'loss_cross_entropy': Array(0.14878936, dtype=float32)}
{'loss_inverse': Array(3.3912759e-06, dtype=float32)}


  7%|▋         | 65269/1000000 [3:05:17<25:52:18, 10.04it/s]

{'loss': Array(0.14418052, dtype=float32), 'loss_cross_entropy': Array(0.1350814, dtype=float32)}
{'loss_inverse': Array(0.00119047, dtype=float32)}


  7%|▋         | 65279/1000000 [3:05:18<28:32:30,  9.10it/s]

{'loss': Array(0.14326872, dtype=float32), 'loss_cross_entropy': Array(0.13425161, dtype=float32)}
{'loss_inverse': Array(8.684957e-05, dtype=float32)}


  7%|▋         | 65289/1000000 [3:05:20<26:17:08,  9.88it/s]

{'loss': Array(0.14528239, dtype=float32), 'loss_cross_entropy': Array(0.13599502, dtype=float32)}
{'loss_inverse': Array(6.3714956e-06, dtype=float32)}


  7%|▋         | 65299/1000000 [3:05:21<29:43:00,  8.74it/s]

{'loss': Array(0.16520496, dtype=float32), 'loss_cross_entropy': Array(0.15516777, dtype=float32)}
{'loss_inverse': Array(1.1144999e-05, dtype=float32)}


  7%|▋         | 65309/1000000 [3:05:23<26:14:20,  9.90it/s]

{'loss': Array(0.15775633, dtype=float32), 'loss_cross_entropy': Array(0.1487719, dtype=float32)}
{'loss_inverse': Array(2.3087801e-05, dtype=float32)}


  7%|▋         | 65319/1000000 [3:05:25<31:32:45,  8.23it/s]

{'loss': Array(0.1206691, dtype=float32), 'loss_cross_entropy': Array(0.11319764, dtype=float32)}
{'loss_inverse': Array(7.294542e-05, dtype=float32)}


  7%|▋         | 65329/1000000 [3:05:26<26:05:14,  9.95it/s]

{'loss': Array(0.14025033, dtype=float32), 'loss_cross_entropy': Array(0.13151997, dtype=float32)}
{'loss_inverse': Array(1.3449001e-05, dtype=float32)}


  7%|▋         | 65339/1000000 [3:05:27<25:25:56, 10.21it/s]

{'loss': Array(0.16070335, dtype=float32), 'loss_cross_entropy': Array(0.15092933, dtype=float32)}
{'loss_inverse': Array(8.973728e-06, dtype=float32)}


  7%|▋         | 65349/1000000 [3:05:29<30:59:21,  8.38it/s]

{'loss': Array(0.15620337, dtype=float32), 'loss_cross_entropy': Array(0.14679329, dtype=float32)}
{'loss_inverse': Array(1.4751279e-05, dtype=float32)}


  7%|▋         | 65359/1000000 [3:05:31<25:56:30, 10.01it/s]

{'loss': Array(0.14454977, dtype=float32), 'loss_cross_entropy': Array(0.13558266, dtype=float32)}
{'loss_inverse': Array(1.2684228e-05, dtype=float32)}


  7%|▋         | 65369/1000000 [3:05:32<34:21:40,  7.56it/s]

{'loss': Array(0.16761993, dtype=float32), 'loss_cross_entropy': Array(0.15808414, dtype=float32)}
{'loss_inverse': Array(9.67639e-05, dtype=float32)}


  7%|▋         | 65379/1000000 [3:05:34<27:56:49,  9.29it/s]

{'loss': Array(0.15492325, dtype=float32), 'loss_cross_entropy': Array(0.14604591, dtype=float32)}
{'loss_inverse': Array(1.2271213e-05, dtype=float32)}


  7%|▋         | 65389/1000000 [3:05:36<39:47:51,  6.52it/s]

{'loss': Array(0.1591752, dtype=float32), 'loss_cross_entropy': Array(0.14952138, dtype=float32)}
{'loss_inverse': Array(0.0001041, dtype=float32)}


  7%|▋         | 65399/1000000 [3:05:37<27:50:29,  9.32it/s]

{'loss': Array(0.1520056, dtype=float32), 'loss_cross_entropy': Array(0.1431804, dtype=float32)}
{'loss_inverse': Array(3.4060802e-06, dtype=float32)}


  7%|▋         | 65409/1000000 [3:05:39<39:36:42,  6.55it/s]

{'loss': Array(0.1509178, dtype=float32), 'loss_cross_entropy': Array(0.14207186, dtype=float32)}
{'loss_inverse': Array(0.00014066, dtype=float32)}


  7%|▋         | 65419/1000000 [3:05:40<27:29:26,  9.44it/s]

{'loss': Array(0.14802447, dtype=float32), 'loss_cross_entropy': Array(0.13773732, dtype=float32)}
{'loss_inverse': Array(0.00012745, dtype=float32)}


  7%|▋         | 65429/1000000 [3:05:42<25:50:17, 10.05it/s]

{'loss': Array(0.15501152, dtype=float32), 'loss_cross_entropy': Array(0.14557405, dtype=float32)}
{'loss_inverse': Array(0.00078107, dtype=float32)}


  7%|▋         | 65439/1000000 [3:05:43<34:46:12,  7.47it/s]

{'loss': Array(0.17370084, dtype=float32), 'loss_cross_entropy': Array(0.16359916, dtype=float32)}
{'loss_inverse': Array(0.00019741, dtype=float32)}


  7%|▋         | 65449/1000000 [3:05:45<27:10:06,  9.56it/s]

{'loss': Array(0.14438307, dtype=float32), 'loss_cross_entropy': Array(0.13546196, dtype=float32)}
{'loss_inverse': Array(0.00019637, dtype=float32)}


  7%|▋         | 65459/1000000 [3:05:47<38:43:40,  6.70it/s]

{'loss': Array(0.1377322, dtype=float32), 'loss_cross_entropy': Array(0.12940998, dtype=float32)}
{'loss_inverse': Array(7.951014e-05, dtype=float32)}


  7%|▋         | 65469/1000000 [3:05:48<28:06:21,  9.24it/s]

{'loss': Array(0.18470441, dtype=float32), 'loss_cross_entropy': Array(0.17452069, dtype=float32)}
{'loss_inverse': Array(3.881205e-05, dtype=float32)}


  7%|▋         | 65479/1000000 [3:05:50<25:45:22, 10.08it/s]

{'loss': Array(0.14395842, dtype=float32), 'loss_cross_entropy': Array(0.13602804, dtype=float32)}
{'loss_inverse': Array(1.0763913e-05, dtype=float32)}


  7%|▋         | 65489/1000000 [3:05:51<29:05:15,  8.92it/s]

{'loss': Array(0.15390442, dtype=float32), 'loss_cross_entropy': Array(0.14529377, dtype=float32)}
{'loss_inverse': Array(1.1205978e-05, dtype=float32)}


  7%|▋         | 65499/1000000 [3:05:53<25:49:44, 10.05it/s]

{'loss': Array(0.16465318, dtype=float32), 'loss_cross_entropy': Array(0.15492772, dtype=float32)}
{'loss_inverse': Array(5.226908e-06, dtype=float32)}


  7%|▋         | 65509/1000000 [3:06:01<95:32:48,  2.72it/s] 

{'loss': Array(0.15307847, dtype=float32), 'loss_cross_entropy': Array(0.14461446, dtype=float32)}
{'loss_inverse': Array(0.00024278, dtype=float32)}


  7%|▋         | 65519/1000000 [3:06:02<36:54:40,  7.03it/s] 

{'loss': Array(0.13807996, dtype=float32), 'loss_cross_entropy': Array(0.12920809, dtype=float32)}
{'loss_inverse': Array(0.00017259, dtype=float32)}


  7%|▋         | 65529/1000000 [3:06:04<27:15:35,  9.52it/s]

{'loss': Array(0.16697215, dtype=float32), 'loss_cross_entropy': Array(0.15746965, dtype=float32)}
{'loss_inverse': Array(5.722747e-05, dtype=float32)}


  7%|▋         | 65539/1000000 [3:06:06<28:57:33,  8.96it/s]

{'loss': Array(0.15920377, dtype=float32), 'loss_cross_entropy': Array(0.14980684, dtype=float32)}
{'loss_inverse': Array(0.00037391, dtype=float32)}


  7%|▋         | 65549/1000000 [3:06:07<25:36:05, 10.14it/s]

{'loss': Array(0.13951886, dtype=float32), 'loss_cross_entropy': Array(0.13095948, dtype=float32)}
{'loss_inverse': Array(2.3221677e-05, dtype=float32)}


  7%|▋         | 65559/1000000 [3:06:09<30:09:03,  8.61it/s]

{'loss': Array(0.15016043, dtype=float32), 'loss_cross_entropy': Array(0.14049932, dtype=float32)}
{'loss_inverse': Array(2.4777366e-05, dtype=float32)}


  7%|▋         | 65569/1000000 [3:06:10<26:14:50,  9.89it/s]

{'loss': Array(0.16186698, dtype=float32), 'loss_cross_entropy': Array(0.15265277, dtype=float32)}
{'loss_inverse': Array(9.432869e-05, dtype=float32)}


  7%|▋         | 65579/1000000 [3:06:12<31:29:51,  8.24it/s]

{'loss': Array(0.14543903, dtype=float32), 'loss_cross_entropy': Array(0.13680491, dtype=float32)}
{'loss_inverse': Array(0.00020974, dtype=float32)}


  7%|▋         | 65589/1000000 [3:06:13<26:18:44,  9.86it/s]

{'loss': Array(0.16338761, dtype=float32), 'loss_cross_entropy': Array(0.15402003, dtype=float32)}
{'loss_inverse': Array(0.00035301, dtype=float32)}


  7%|▋         | 65599/1000000 [3:06:15<31:31:00,  8.24it/s]

{'loss': Array(0.14173388, dtype=float32), 'loss_cross_entropy': Array(0.1332226, dtype=float32)}
{'loss_inverse': Array(7.501853e-05, dtype=float32)}


  7%|▋         | 65609/1000000 [3:06:16<26:14:07,  9.89it/s]

{'loss': Array(0.15408692, dtype=float32), 'loss_cross_entropy': Array(0.14513834, dtype=float32)}
{'loss_inverse': Array(1.7832665e-05, dtype=float32)}


  7%|▋         | 65619/1000000 [3:06:18<25:53:33, 10.02it/s]

{'loss': Array(0.15426476, dtype=float32), 'loss_cross_entropy': Array(0.14542341, dtype=float32)}
{'loss_inverse': Array(0.0003621, dtype=float32)}


  7%|▋         | 65629/1000000 [3:06:20<28:39:41,  9.06it/s]

{'loss': Array(0.14693275, dtype=float32), 'loss_cross_entropy': Array(0.13773286, dtype=float32)}
{'loss_inverse': Array(0.00010049, dtype=float32)}


  7%|▋         | 65639/1000000 [3:06:21<25:51:11, 10.04it/s]

{'loss': Array(0.14743726, dtype=float32), 'loss_cross_entropy': Array(0.13810848, dtype=float32)}
{'loss_inverse': Array(0.00012325, dtype=float32)}


  7%|▋         | 65649/1000000 [3:06:23<29:53:48,  8.68it/s]

{'loss': Array(0.1583095, dtype=float32), 'loss_cross_entropy': Array(0.14888106, dtype=float32)}
{'loss_inverse': Array(2.8671973e-05, dtype=float32)}


  7%|▋         | 65659/1000000 [3:06:24<27:19:51,  9.50it/s]

{'loss': Array(0.15870105, dtype=float32), 'loss_cross_entropy': Array(0.14969154, dtype=float32)}
{'loss_inverse': Array(0.00014707, dtype=float32)}


  7%|▋         | 65669/1000000 [3:06:26<32:37:46,  7.95it/s]

{'loss': Array(0.14001058, dtype=float32), 'loss_cross_entropy': Array(0.13177687, dtype=float32)}
{'loss_inverse': Array(0.00019476, dtype=float32)}


  7%|▋         | 65679/1000000 [3:06:27<27:00:52,  9.61it/s]

{'loss': Array(0.15566877, dtype=float32), 'loss_cross_entropy': Array(0.14664225, dtype=float32)}
{'loss_inverse': Array(4.4902772e-05, dtype=float32)}


  7%|▋         | 65689/1000000 [3:06:29<26:12:52,  9.90it/s]

{'loss': Array(0.15398678, dtype=float32), 'loss_cross_entropy': Array(0.14508538, dtype=float32)}
{'loss_inverse': Array(0.00016439, dtype=float32)}


  7%|▋         | 65699/1000000 [3:06:31<30:26:19,  8.53it/s]

{'loss': Array(0.1560234, dtype=float32), 'loss_cross_entropy': Array(0.1472022, dtype=float32)}
{'loss_inverse': Array(0.00011988, dtype=float32)}


  7%|▋         | 65709/1000000 [3:06:32<26:59:19,  9.62it/s]

{'loss': Array(0.16546188, dtype=float32), 'loss_cross_entropy': Array(0.15590923, dtype=float32)}
{'loss_inverse': Array(2.2386897e-05, dtype=float32)}


  7%|▋         | 65719/1000000 [3:06:34<34:57:13,  7.42it/s]

{'loss': Array(0.15334459, dtype=float32), 'loss_cross_entropy': Array(0.14391248, dtype=float32)}
{'loss_inverse': Array(1.7430386e-06, dtype=float32)}


  7%|▋         | 65729/1000000 [3:06:35<26:36:22,  9.75it/s]

{'loss': Array(0.1551489, dtype=float32), 'loss_cross_entropy': Array(0.14601065, dtype=float32)}
{'loss_inverse': Array(0.0001194, dtype=float32)}


  7%|▋         | 65739/1000000 [3:06:37<38:27:35,  6.75it/s]

{'loss': Array(0.13205202, dtype=float32), 'loss_cross_entropy': Array(0.12403028, dtype=float32)}
{'loss_inverse': Array(0.00024982, dtype=float32)}


  7%|▋         | 65749/1000000 [3:06:39<27:38:34,  9.39it/s]

{'loss': Array(0.14676645, dtype=float32), 'loss_cross_entropy': Array(0.13759342, dtype=float32)}
{'loss_inverse': Array(0.00014939, dtype=float32)}


  7%|▋         | 65759/1000000 [3:06:40<38:19:14,  6.77it/s]

{'loss': Array(0.15491372, dtype=float32), 'loss_cross_entropy': Array(0.146205, dtype=float32)}
{'loss_inverse': Array(1.4192825e-05, dtype=float32)}


  7%|▋         | 65769/1000000 [3:06:42<27:33:36,  9.42it/s]

{'loss': Array(0.14205678, dtype=float32), 'loss_cross_entropy': Array(0.1335897, dtype=float32)}
{'loss_inverse': Array(0.00013295, dtype=float32)}


  7%|▋         | 65779/1000000 [3:06:43<25:18:25, 10.25it/s]

{'loss': Array(0.15525389, dtype=float32), 'loss_cross_entropy': Array(0.14595897, dtype=float32)}
{'loss_inverse': Array(0.00010229, dtype=float32)}


  7%|▋         | 65789/1000000 [3:06:45<35:00:53,  7.41it/s]

{'loss': Array(0.14422388, dtype=float32), 'loss_cross_entropy': Array(0.13539067, dtype=float32)}
{'loss_inverse': Array(2.3363242e-05, dtype=float32)}


  7%|▋         | 65799/1000000 [3:06:46<26:59:16,  9.62it/s]

{'loss': Array(0.14924662, dtype=float32), 'loss_cross_entropy': Array(0.14010744, dtype=float32)}
{'loss_inverse': Array(0.00032383, dtype=float32)}


  7%|▋         | 65809/1000000 [3:06:48<39:10:48,  6.62it/s]

{'loss': Array(0.16664913, dtype=float32), 'loss_cross_entropy': Array(0.15725274, dtype=float32)}
{'loss_inverse': Array(8.911819e-06, dtype=float32)}


  7%|▋         | 65819/1000000 [3:06:49<27:24:00,  9.47it/s]

{'loss': Array(0.1472735, dtype=float32), 'loss_cross_entropy': Array(0.13839404, dtype=float32)}
{'loss_inverse': Array(4.793392e-05, dtype=float32)}


  7%|▋         | 65829/1000000 [3:06:51<25:37:21, 10.13it/s]

{'loss': Array(0.15842526, dtype=float32), 'loss_cross_entropy': Array(0.14869285, dtype=float32)}
{'loss_inverse': Array(7.38978e-05, dtype=float32)}


  7%|▋         | 65839/1000000 [3:06:53<28:24:17,  9.14it/s]

{'loss': Array(0.14453483, dtype=float32), 'loss_cross_entropy': Array(0.13596052, dtype=float32)}
{'loss_inverse': Array(0.00044335, dtype=float32)}


  7%|▋         | 65849/1000000 [3:06:54<26:09:14,  9.92it/s]

{'loss': Array(0.14639662, dtype=float32), 'loss_cross_entropy': Array(0.13771008, dtype=float32)}
{'loss_inverse': Array(0.00015385, dtype=float32)}


  7%|▋         | 65859/1000000 [3:06:56<34:14:10,  7.58it/s]

{'loss': Array(0.15734355, dtype=float32), 'loss_cross_entropy': Array(0.14802945, dtype=float32)}
{'loss_inverse': Array(2.5375706e-05, dtype=float32)}


  7%|▋         | 65869/1000000 [3:06:57<26:47:15,  9.69it/s]

{'loss': Array(0.17524035, dtype=float32), 'loss_cross_entropy': Array(0.1651173, dtype=float32)}
{'loss_inverse': Array(5.3903354e-06, dtype=float32)}


  7%|▋         | 65879/1000000 [3:06:58<25:25:46, 10.20it/s]

{'loss': Array(0.13675037, dtype=float32), 'loss_cross_entropy': Array(0.12815765, dtype=float32)}
{'loss_inverse': Array(0.00016066, dtype=float32)}


  7%|▋         | 65889/1000000 [3:07:00<29:57:18,  8.66it/s]

{'loss': Array(0.15929998, dtype=float32), 'loss_cross_entropy': Array(0.1500315, dtype=float32)}
{'loss_inverse': Array(4.0950254e-06, dtype=float32)}


  7%|▋         | 65899/1000000 [3:07:02<26:31:03,  9.78it/s]

{'loss': Array(0.15369956, dtype=float32), 'loss_cross_entropy': Array(0.14453249, dtype=float32)}
{'loss_inverse': Array(9.493858e-05, dtype=float32)}


  7%|▋         | 65909/1000000 [3:07:03<34:43:19,  7.47it/s]

{'loss': Array(0.12984252, dtype=float32), 'loss_cross_entropy': Array(0.12209388, dtype=float32)}
{'loss_inverse': Array(1.4655743e-06, dtype=float32)}


  7%|▋         | 65919/1000000 [3:07:05<26:52:50,  9.65it/s]

{'loss': Array(0.14294803, dtype=float32), 'loss_cross_entropy': Array(0.13452476, dtype=float32)}
{'loss_inverse': Array(6.1688415e-06, dtype=float32)}


  7%|▋         | 65929/1000000 [3:07:07<35:46:08,  7.25it/s]

{'loss': Array(0.15851812, dtype=float32), 'loss_cross_entropy': Array(0.14907931, dtype=float32)}
{'loss_inverse': Array(1.1666415e-06, dtype=float32)}


  7%|▋         | 65939/1000000 [3:07:08<27:27:46,  9.45it/s]

{'loss': Array(0.1568514, dtype=float32), 'loss_cross_entropy': Array(0.14770621, dtype=float32)}
{'loss_inverse': Array(6.612728e-06, dtype=float32)}


  7%|▋         | 65949/1000000 [3:07:10<25:57:10, 10.00it/s]

{'loss': Array(0.1536563, dtype=float32), 'loss_cross_entropy': Array(0.14453034, dtype=float32)}
{'loss_inverse': Array(9.845655e-05, dtype=float32)}


  7%|▋         | 65959/1000000 [3:07:11<28:35:29,  9.07it/s]

{'loss': Array(0.14981362, dtype=float32), 'loss_cross_entropy': Array(0.14116718, dtype=float32)}
{'loss_inverse': Array(4.6020683e-05, dtype=float32)}


  7%|▋         | 65969/1000000 [3:07:13<26:41:59,  9.72it/s]

{'loss': Array(0.14661483, dtype=float32), 'loss_cross_entropy': Array(0.13728055, dtype=float32)}
{'loss_inverse': Array(9.13335e-06, dtype=float32)}


  7%|▋         | 65979/1000000 [3:07:15<38:35:37,  6.72it/s]

{'loss': Array(0.13437131, dtype=float32), 'loss_cross_entropy': Array(0.12617452, dtype=float32)}
{'loss_inverse': Array(7.2859926e-05, dtype=float32)}


  7%|▋         | 65989/1000000 [3:07:16<27:25:11,  9.46it/s]

{'loss': Array(0.16028237, dtype=float32), 'loss_cross_entropy': Array(0.15141104, dtype=float32)}
{'loss_inverse': Array(0.00010212, dtype=float32)}


  7%|▋         | 65999/1000000 [3:07:17<26:24:47,  9.82it/s]

{'loss': Array(0.15156387, dtype=float32), 'loss_cross_entropy': Array(0.14251183, dtype=float32)}
{'loss_inverse': Array(0.0001978, dtype=float32)}


  7%|▋         | 66010/1000000 [3:07:26<73:49:48,  3.51it/s] 

{'loss': Array(0.14417008, dtype=float32), 'loss_cross_entropy': Array(0.13535094, dtype=float32)}
{'loss_inverse': Array(0.00022092, dtype=float32)}


  7%|▋         | 66020/1000000 [3:07:28<32:57:34,  7.87it/s]

{'loss': Array(0.15411377, dtype=float32), 'loss_cross_entropy': Array(0.14486478, dtype=float32)}
{'loss_inverse': Array(4.362305e-05, dtype=float32)}


  7%|▋         | 66030/1000000 [3:07:29<29:22:32,  8.83it/s]

{'loss': Array(0.15476017, dtype=float32), 'loss_cross_entropy': Array(0.14551269, dtype=float32)}
{'loss_inverse': Array(2.0671338e-05, dtype=float32)}


  7%|▋         | 66040/1000000 [3:07:31<26:03:55,  9.95it/s]

{'loss': Array(0.13830136, dtype=float32), 'loss_cross_entropy': Array(0.12945902, dtype=float32)}
{'loss_inverse': Array(2.857821e-05, dtype=float32)}


  7%|▋         | 66050/1000000 [3:07:32<31:14:58,  8.30it/s]

{'loss': Array(0.16314562, dtype=float32), 'loss_cross_entropy': Array(0.15399124, dtype=float32)}
{'loss_inverse': Array(5.1190955e-06, dtype=float32)}


  7%|▋         | 66060/1000000 [3:07:34<26:29:40,  9.79it/s]

{'loss': Array(0.13323203, dtype=float32), 'loss_cross_entropy': Array(0.12516962, dtype=float32)}
{'loss_inverse': Array(4.5628018e-05, dtype=float32)}


  7%|▋         | 66070/1000000 [3:07:36<32:06:47,  8.08it/s]

{'loss': Array(0.1314451, dtype=float32), 'loss_cross_entropy': Array(0.12357169, dtype=float32)}
{'loss_inverse': Array(3.8890444e-06, dtype=float32)}


  7%|▋         | 66080/1000000 [3:07:37<26:51:10,  9.66it/s]

{'loss': Array(0.14700817, dtype=float32), 'loss_cross_entropy': Array(0.1384867, dtype=float32)}
{'loss_inverse': Array(2.794604e-05, dtype=float32)}


  7%|▋         | 66090/1000000 [3:07:39<39:21:00,  6.59it/s]

{'loss': Array(0.14624009, dtype=float32), 'loss_cross_entropy': Array(0.13749044, dtype=float32)}
{'loss_inverse': Array(1.425313e-05, dtype=float32)}


  7%|▋         | 66100/1000000 [3:07:40<27:29:32,  9.44it/s]

{'loss': Array(0.15782586, dtype=float32), 'loss_cross_entropy': Array(0.14854787, dtype=float32)}
{'loss_inverse': Array(2.142696e-05, dtype=float32)}


  7%|▋         | 66110/1000000 [3:07:42<26:32:04,  9.78it/s]

{'loss': Array(0.14024039, dtype=float32), 'loss_cross_entropy': Array(0.13169742, dtype=float32)}
{'loss_inverse': Array(4.965408e-07, dtype=float32)}


  7%|▋         | 66120/1000000 [3:07:43<28:15:56,  9.18it/s]

{'loss': Array(0.14750022, dtype=float32), 'loss_cross_entropy': Array(0.13823192, dtype=float32)}
{'loss_inverse': Array(9.620202e-07, dtype=float32)}


  7%|▋         | 66130/1000000 [3:07:45<26:10:15,  9.91it/s]

{'loss': Array(0.1433324, dtype=float32), 'loss_cross_entropy': Array(0.13465782, dtype=float32)}
{'loss_inverse': Array(4.2879233e-06, dtype=float32)}


  7%|▋         | 66140/1000000 [3:07:47<31:29:22,  8.24it/s]

{'loss': Array(0.15157852, dtype=float32), 'loss_cross_entropy': Array(0.14158231, dtype=float32)}
{'loss_inverse': Array(7.2091166e-06, dtype=float32)}


  7%|▋         | 66150/1000000 [3:07:48<27:38:26,  9.38it/s]

{'loss': Array(0.16492291, dtype=float32), 'loss_cross_entropy': Array(0.15506111, dtype=float32)}
{'loss_inverse': Array(2.8947662e-07, dtype=float32)}


  7%|▋         | 66160/1000000 [3:07:49<25:37:34, 10.12it/s]

{'loss': Array(0.16042724, dtype=float32), 'loss_cross_entropy': Array(0.15107942, dtype=float32)}
{'loss_inverse': Array(2.068721e-06, dtype=float32)}


  7%|▋         | 66170/1000000 [3:07:51<30:07:33,  8.61it/s]

{'loss': Array(0.14453015, dtype=float32), 'loss_cross_entropy': Array(0.13550709, dtype=float32)}
{'loss_inverse': Array(0.00011886, dtype=float32)}


  7%|▋         | 66180/1000000 [3:07:53<25:50:46, 10.04it/s]

{'loss': Array(0.15566161, dtype=float32), 'loss_cross_entropy': Array(0.14695929, dtype=float32)}
{'loss_inverse': Array(1.7161994e-05, dtype=float32)}


  7%|▋         | 66190/1000000 [3:07:54<34:18:10,  7.56it/s]

{'loss': Array(0.15241177, dtype=float32), 'loss_cross_entropy': Array(0.14391445, dtype=float32)}
{'loss_inverse': Array(1.0906248e-05, dtype=float32)}


  7%|▋         | 66200/1000000 [3:07:56<26:43:40,  9.70it/s]

{'loss': Array(0.15215254, dtype=float32), 'loss_cross_entropy': Array(0.1431003, dtype=float32)}
{'loss_inverse': Array(0.00024709, dtype=float32)}


  7%|▋         | 66210/1000000 [3:07:57<38:35:49,  6.72it/s]

{'loss': Array(0.15669613, dtype=float32), 'loss_cross_entropy': Array(0.14778154, dtype=float32)}
{'loss_inverse': Array(6.616953e-05, dtype=float32)}


  7%|▋         | 66220/1000000 [3:07:59<27:45:49,  9.34it/s]

{'loss': Array(0.1479094, dtype=float32), 'loss_cross_entropy': Array(0.13919136, dtype=float32)}
{'loss_inverse': Array(1.3341313e-06, dtype=float32)}


  7%|▋         | 66230/1000000 [3:08:01<39:20:42,  6.59it/s]

{'loss': Array(0.15728235, dtype=float32), 'loss_cross_entropy': Array(0.14809434, dtype=float32)}
{'loss_inverse': Array(6.4287015e-06, dtype=float32)}


  7%|▋         | 66240/1000000 [3:08:02<27:44:46,  9.35it/s]

{'loss': Array(0.1427025, dtype=float32), 'loss_cross_entropy': Array(0.13379112, dtype=float32)}
{'loss_inverse': Array(9.858717e-05, dtype=float32)}


  7%|▋         | 66250/1000000 [3:08:04<25:19:38, 10.24it/s]

{'loss': Array(0.15410234, dtype=float32), 'loss_cross_entropy': Array(0.14504977, dtype=float32)}
{'loss_inverse': Array(0.00030159, dtype=float32)}


  7%|▋         | 66260/1000000 [3:08:05<31:17:24,  8.29it/s]

{'loss': Array(0.14244384, dtype=float32), 'loss_cross_entropy': Array(0.1338789, dtype=float32)}
{'loss_inverse': Array(1.0058415e-06, dtype=float32)}


  7%|▋         | 66270/1000000 [3:08:07<26:46:33,  9.69it/s]

{'loss': Array(0.17659548, dtype=float32), 'loss_cross_entropy': Array(0.1669467, dtype=float32)}
{'loss_inverse': Array(0.00015312, dtype=float32)}


  7%|▋         | 66280/1000000 [3:08:08<37:50:26,  6.85it/s]

{'loss': Array(0.13741347, dtype=float32), 'loss_cross_entropy': Array(0.12894659, dtype=float32)}
{'loss_inverse': Array(2.6678526e-06, dtype=float32)}


  7%|▋         | 66290/1000000 [3:08:10<27:29:44,  9.43it/s]

{'loss': Array(0.13798472, dtype=float32), 'loss_cross_entropy': Array(0.12908602, dtype=float32)}
{'loss_inverse': Array(1.1169886e-05, dtype=float32)}


  7%|▋         | 66300/1000000 [3:08:12<38:05:33,  6.81it/s]

{'loss': Array(0.16362211, dtype=float32), 'loss_cross_entropy': Array(0.15383522, dtype=float32)}
{'loss_inverse': Array(2.1574619e-05, dtype=float32)}


  7%|▋         | 66310/1000000 [3:08:13<27:19:49,  9.49it/s]

{'loss': Array(0.17121604, dtype=float32), 'loss_cross_entropy': Array(0.16126196, dtype=float32)}
{'loss_inverse': Array(6.0772345e-05, dtype=float32)}


  7%|▋         | 66320/1000000 [3:08:14<25:31:48, 10.16it/s]

{'loss': Array(0.14854465, dtype=float32), 'loss_cross_entropy': Array(0.14041133, dtype=float32)}
{'loss_inverse': Array(8.986417e-06, dtype=float32)}


  7%|▋         | 66330/1000000 [3:08:16<34:11:41,  7.58it/s]

{'loss': Array(0.15134351, dtype=float32), 'loss_cross_entropy': Array(0.14197707, dtype=float32)}
{'loss_inverse': Array(6.480906e-05, dtype=float32)}


  7%|▋         | 66340/1000000 [3:08:17<26:44:08,  9.70it/s]

{'loss': Array(0.13987921, dtype=float32), 'loss_cross_entropy': Array(0.13092488, dtype=float32)}
{'loss_inverse': Array(8.204265e-06, dtype=float32)}


  7%|▋         | 66350/1000000 [3:08:19<25:39:19, 10.11it/s]

{'loss': Array(0.16196248, dtype=float32), 'loss_cross_entropy': Array(0.1525652, dtype=float32)}
{'loss_inverse': Array(2.1100877e-05, dtype=float32)}


  7%|▋         | 66360/1000000 [3:08:21<28:44:51,  9.02it/s]

{'loss': Array(0.13814816, dtype=float32), 'loss_cross_entropy': Array(0.12955293, dtype=float32)}
{'loss_inverse': Array(2.2852459e-05, dtype=float32)}


  7%|▋         | 66370/1000000 [3:08:22<26:20:03,  9.85it/s]

{'loss': Array(0.1520101, dtype=float32), 'loss_cross_entropy': Array(0.1429974, dtype=float32)}
{'loss_inverse': Array(2.007805e-05, dtype=float32)}


  7%|▋         | 66380/1000000 [3:08:24<30:03:01,  8.63it/s]

{'loss': Array(0.16870844, dtype=float32), 'loss_cross_entropy': Array(0.15915883, dtype=float32)}
{'loss_inverse': Array(0.00017934, dtype=float32)}


  7%|▋         | 66390/1000000 [3:08:25<26:02:18,  9.96it/s]

{'loss': Array(0.14033623, dtype=float32), 'loss_cross_entropy': Array(0.13098447, dtype=float32)}
{'loss_inverse': Array(5.7151706e-06, dtype=float32)}


  7%|▋         | 66400/1000000 [3:08:27<29:52:32,  8.68it/s]

{'loss': Array(0.1666118, dtype=float32), 'loss_cross_entropy': Array(0.15660006, dtype=float32)}
{'loss_inverse': Array(6.0539034e-05, dtype=float32)}


  7%|▋         | 66410/1000000 [3:08:28<26:27:04,  9.80it/s]

{'loss': Array(0.15810965, dtype=float32), 'loss_cross_entropy': Array(0.14872979, dtype=float32)}
{'loss_inverse': Array(5.0614006e-05, dtype=float32)}


  7%|▋         | 66420/1000000 [3:08:30<30:01:14,  8.64it/s]

{'loss': Array(0.16549578, dtype=float32), 'loss_cross_entropy': Array(0.15638028, dtype=float32)}
{'loss_inverse': Array(6.152143e-06, dtype=float32)}


  7%|▋         | 66430/1000000 [3:08:32<27:09:06,  9.55it/s]

{'loss': Array(0.14768973, dtype=float32), 'loss_cross_entropy': Array(0.139461, dtype=float32)}
{'loss_inverse': Array(3.768787e-05, dtype=float32)}


  7%|▋         | 66440/1000000 [3:08:33<34:40:50,  7.48it/s]

{'loss': Array(0.14550716, dtype=float32), 'loss_cross_entropy': Array(0.13644479, dtype=float32)}
{'loss_inverse': Array(0.0001416, dtype=float32)}


  7%|▋         | 66450/1000000 [3:08:35<27:25:00,  9.46it/s]

{'loss': Array(0.15132524, dtype=float32), 'loss_cross_entropy': Array(0.1423348, dtype=float32)}
{'loss_inverse': Array(3.0301147e-05, dtype=float32)}


  7%|▋         | 66460/1000000 [3:08:37<38:25:49,  6.75it/s]

{'loss': Array(0.1662869, dtype=float32), 'loss_cross_entropy': Array(0.1568611, dtype=float32)}
{'loss_inverse': Array(4.642916e-07, dtype=float32)}


  7%|▋         | 66470/1000000 [3:08:38<28:10:29,  9.20it/s]

{'loss': Array(0.15100718, dtype=float32), 'loss_cross_entropy': Array(0.14186789, dtype=float32)}
{'loss_inverse': Array(7.683318e-05, dtype=float32)}


  7%|▋         | 66480/1000000 [3:08:40<35:23:16,  7.33it/s]

{'loss': Array(0.151372, dtype=float32), 'loss_cross_entropy': Array(0.14246632, dtype=float32)}
{'loss_inverse': Array(0.00022153, dtype=float32)}


  7%|▋         | 66490/1000000 [3:08:41<27:09:58,  9.55it/s]

{'loss': Array(0.15156935, dtype=float32), 'loss_cross_entropy': Array(0.1423882, dtype=float32)}
{'loss_inverse': Array(0.0003208, dtype=float32)}


  7%|▋         | 66500/1000000 [3:08:43<25:53:38, 10.01it/s]

{'loss': Array(0.14225523, dtype=float32), 'loss_cross_entropy': Array(0.13337982, dtype=float32)}
{'loss_inverse': Array(8.492637e-05, dtype=float32)}


  7%|▋         | 66510/1000000 [3:08:51<90:23:55,  2.87it/s] 

{'loss': Array(0.14242592, dtype=float32), 'loss_cross_entropy': Array(0.13380903, dtype=float32)}
{'loss_inverse': Array(3.1039187e-07, dtype=float32)}


  7%|▋         | 66520/1000000 [3:08:52<36:02:32,  7.19it/s]

{'loss': Array(0.1681265, dtype=float32), 'loss_cross_entropy': Array(0.15882336, dtype=float32)}
{'loss_inverse': Array(3.7835332e-06, dtype=float32)}


  7%|▋         | 66530/1000000 [3:08:54<33:15:34,  7.80it/s]

{'loss': Array(0.14173332, dtype=float32), 'loss_cross_entropy': Array(0.13371697, dtype=float32)}
{'loss_inverse': Array(0.00016705, dtype=float32)}


  7%|▋         | 66540/1000000 [3:08:55<27:15:26,  9.51it/s]

{'loss': Array(0.15422927, dtype=float32), 'loss_cross_entropy': Array(0.1452502, dtype=float32)}
{'loss_inverse': Array(8.2931154e-05, dtype=float32)}


  7%|▋         | 66550/1000000 [3:08:57<35:02:20,  7.40it/s]

{'loss': Array(0.16313528, dtype=float32), 'loss_cross_entropy': Array(0.15424511, dtype=float32)}
{'loss_inverse': Array(8.915677e-06, dtype=float32)}


  7%|▋         | 66560/1000000 [3:08:59<27:25:22,  9.46it/s]

{'loss': Array(0.14850442, dtype=float32), 'loss_cross_entropy': Array(0.14008228, dtype=float32)}
{'loss_inverse': Array(5.715959e-06, dtype=float32)}


  7%|▋         | 66570/1000000 [3:09:00<35:18:24,  7.34it/s]

{'loss': Array(0.1590554, dtype=float32), 'loss_cross_entropy': Array(0.14953874, dtype=float32)}
{'loss_inverse': Array(1.845618e-05, dtype=float32)}


  7%|▋         | 66580/1000000 [3:09:02<27:00:02,  9.60it/s]

{'loss': Array(0.16420956, dtype=float32), 'loss_cross_entropy': Array(0.15478429, dtype=float32)}
{'loss_inverse': Array(1.4852584e-05, dtype=float32)}


  7%|▋         | 66590/1000000 [3:09:03<26:05:00,  9.94it/s]

{'loss': Array(0.1653149, dtype=float32), 'loss_cross_entropy': Array(0.15501605, dtype=float32)}
{'loss_inverse': Array(0.00010492, dtype=float32)}


  7%|▋         | 66600/1000000 [3:09:05<31:50:52,  8.14it/s]

{'loss': Array(0.14070094, dtype=float32), 'loss_cross_entropy': Array(0.13196467, dtype=float32)}
{'loss_inverse': Array(2.019788e-07, dtype=float32)}


  7%|▋         | 66610/1000000 [3:09:07<26:52:53,  9.65it/s]

{'loss': Array(0.1376537, dtype=float32), 'loss_cross_entropy': Array(0.12927134, dtype=float32)}
{'loss_inverse': Array(0.00014218, dtype=float32)}


  7%|▋         | 66620/1000000 [3:09:08<38:10:50,  6.79it/s]

{'loss': Array(0.16241755, dtype=float32), 'loss_cross_entropy': Array(0.15265167, dtype=float32)}
{'loss_inverse': Array(6.150807e-06, dtype=float32)}


  7%|▋         | 66630/1000000 [3:09:10<28:01:25,  9.25it/s]

{'loss': Array(0.16274075, dtype=float32), 'loss_cross_entropy': Array(0.15300503, dtype=float32)}
{'loss_inverse': Array(3.039561e-06, dtype=float32)}


  7%|▋         | 66640/1000000 [3:09:11<25:52:00, 10.02it/s]

{'loss': Array(0.14622729, dtype=float32), 'loss_cross_entropy': Array(0.13767205, dtype=float32)}
{'loss_inverse': Array(0.00014136, dtype=float32)}


  7%|▋         | 66650/1000000 [3:09:13<28:47:42,  9.00it/s]

{'loss': Array(0.15093672, dtype=float32), 'loss_cross_entropy': Array(0.14223014, dtype=float32)}
{'loss_inverse': Array(7.426394e-05, dtype=float32)}


  7%|▋         | 66660/1000000 [3:09:14<26:08:49,  9.92it/s]

{'loss': Array(0.14837159, dtype=float32), 'loss_cross_entropy': Array(0.13953514, dtype=float32)}
{'loss_inverse': Array(6.0249968e-06, dtype=float32)}


  7%|▋         | 66670/1000000 [3:09:16<29:14:26,  8.87it/s]

{'loss': Array(0.15967745, dtype=float32), 'loss_cross_entropy': Array(0.15045296, dtype=float32)}
{'loss_inverse': Array(0.00040377, dtype=float32)}


  7%|▋         | 66680/1000000 [3:09:18<26:18:14,  9.86it/s]

{'loss': Array(0.15511817, dtype=float32), 'loss_cross_entropy': Array(0.14545397, dtype=float32)}
{'loss_inverse': Array(5.9383292e-05, dtype=float32)}


  7%|▋         | 66690/1000000 [3:09:19<31:58:09,  8.11it/s]

{'loss': Array(0.1587225, dtype=float32), 'loss_cross_entropy': Array(0.15000558, dtype=float32)}
{'loss_inverse': Array(0.00032827, dtype=float32)}


  7%|▋         | 66700/1000000 [3:09:21<26:36:03,  9.75it/s]

{'loss': Array(0.15948942, dtype=float32), 'loss_cross_entropy': Array(0.1501766, dtype=float32)}
{'loss_inverse': Array(1.9613111e-05, dtype=float32)}


  7%|▋         | 66710/1000000 [3:09:22<32:53:44,  7.88it/s]

{'loss': Array(0.1429987, dtype=float32), 'loss_cross_entropy': Array(0.13397914, dtype=float32)}
{'loss_inverse': Array(0.00028626, dtype=float32)}


  7%|▋         | 66720/1000000 [3:09:24<27:12:02,  9.53it/s]

{'loss': Array(0.15113868, dtype=float32), 'loss_cross_entropy': Array(0.14273643, dtype=float32)}
{'loss_inverse': Array(0.00019187, dtype=float32)}


  7%|▋         | 66730/1000000 [3:09:26<38:47:13,  6.68it/s]

{'loss': Array(0.13762689, dtype=float32), 'loss_cross_entropy': Array(0.1298242, dtype=float32)}
{'loss_inverse': Array(2.9035833e-05, dtype=float32)}


  7%|▋         | 66740/1000000 [3:09:27<27:23:36,  9.46it/s]

{'loss': Array(0.15342854, dtype=float32), 'loss_cross_entropy': Array(0.1444749, dtype=float32)}
{'loss_inverse': Array(1.0768985e-05, dtype=float32)}


  7%|▋         | 66750/1000000 [3:09:28<25:37:49, 10.11it/s]

{'loss': Array(0.14749818, dtype=float32), 'loss_cross_entropy': Array(0.13836609, dtype=float32)}
{'loss_inverse': Array(1.845675e-05, dtype=float32)}


  7%|▋         | 66760/1000000 [3:09:30<28:21:44,  9.14it/s]

{'loss': Array(0.15597454, dtype=float32), 'loss_cross_entropy': Array(0.1472446, dtype=float32)}
{'loss_inverse': Array(6.494562e-05, dtype=float32)}


  7%|▋         | 66770/1000000 [3:09:32<26:28:54,  9.79it/s]

{'loss': Array(0.13970053, dtype=float32), 'loss_cross_entropy': Array(0.13119136, dtype=float32)}
{'loss_inverse': Array(4.2324795e-05, dtype=float32)}


  7%|▋         | 66780/1000000 [3:09:33<32:04:01,  8.08it/s]

{'loss': Array(0.15335642, dtype=float32), 'loss_cross_entropy': Array(0.14461938, dtype=float32)}
{'loss_inverse': Array(1.3296417e-05, dtype=float32)}


  7%|▋         | 66790/1000000 [3:09:35<26:44:17,  9.69it/s]

{'loss': Array(0.14507243, dtype=float32), 'loss_cross_entropy': Array(0.1361916, dtype=float32)}
{'loss_inverse': Array(1.4892861e-06, dtype=float32)}


  7%|▋         | 66800/1000000 [3:09:36<25:43:16, 10.08it/s]

{'loss': Array(0.14374505, dtype=float32), 'loss_cross_entropy': Array(0.134603, dtype=float32)}
{'loss_inverse': Array(3.8227586e-06, dtype=float32)}


  7%|▋         | 66810/1000000 [3:09:38<30:06:10,  8.61it/s]

{'loss': Array(0.13679451, dtype=float32), 'loss_cross_entropy': Array(0.12785846, dtype=float32)}
{'loss_inverse': Array(5.7271695e-06, dtype=float32)}


  7%|▋         | 66820/1000000 [3:09:40<26:17:10,  9.86it/s]

{'loss': Array(0.1724508, dtype=float32), 'loss_cross_entropy': Array(0.16356272, dtype=float32)}
{'loss_inverse': Array(8.681641e-07, dtype=float32)}


  7%|▋         | 66830/1000000 [3:09:41<34:01:28,  7.62it/s]

{'loss': Array(0.17542495, dtype=float32), 'loss_cross_entropy': Array(0.16547543, dtype=float32)}
{'loss_inverse': Array(6.307643e-05, dtype=float32)}


  7%|▋         | 66840/1000000 [3:09:43<27:21:11,  9.48it/s]

{'loss': Array(0.17279613, dtype=float32), 'loss_cross_entropy': Array(0.16342404, dtype=float32)}
{'loss_inverse': Array(1.09232105e-05, dtype=float32)}


  7%|▋         | 66850/1000000 [3:09:44<38:47:50,  6.68it/s]

{'loss': Array(0.12968907, dtype=float32), 'loss_cross_entropy': Array(0.12186446, dtype=float32)}
{'loss_inverse': Array(0.00012354, dtype=float32)}


  7%|▋         | 66860/1000000 [3:09:46<28:14:59,  9.18it/s]

{'loss': Array(0.15858835, dtype=float32), 'loss_cross_entropy': Array(0.14934853, dtype=float32)}
{'loss_inverse': Array(0.0001017, dtype=float32)}


  7%|▋         | 66870/1000000 [3:09:48<39:17:47,  6.60it/s]

{'loss': Array(0.15292211, dtype=float32), 'loss_cross_entropy': Array(0.14333726, dtype=float32)}
{'loss_inverse': Array(2.2354105e-05, dtype=float32)}


  7%|▋         | 66880/1000000 [3:09:49<28:03:47,  9.24it/s]

{'loss': Array(0.1570752, dtype=float32), 'loss_cross_entropy': Array(0.14831111, dtype=float32)}
{'loss_inverse': Array(0.00024417, dtype=float32)}


  7%|▋         | 66890/1000000 [3:09:51<26:14:20,  9.88it/s]

{'loss': Array(0.1698713, dtype=float32), 'loss_cross_entropy': Array(0.16059226, dtype=float32)}
{'loss_inverse': Array(0.00019805, dtype=float32)}


  7%|▋         | 66900/1000000 [3:09:52<31:57:01,  8.11it/s]

{'loss': Array(0.15346476, dtype=float32), 'loss_cross_entropy': Array(0.14408235, dtype=float32)}
{'loss_inverse': Array(4.344993e-06, dtype=float32)}


  7%|▋         | 66910/1000000 [3:09:54<26:43:38,  9.70it/s]

{'loss': Array(0.13811758, dtype=float32), 'loss_cross_entropy': Array(0.12927182, dtype=float32)}
{'loss_inverse': Array(0.00020732, dtype=float32)}


  7%|▋         | 66920/1000000 [3:09:56<38:19:32,  6.76it/s]

{'loss': Array(0.15860295, dtype=float32), 'loss_cross_entropy': Array(0.14908373, dtype=float32)}
{'loss_inverse': Array(2.0492667e-05, dtype=float32)}


  7%|▋         | 66930/1000000 [3:09:57<27:51:35,  9.30it/s]

{'loss': Array(0.1431721, dtype=float32), 'loss_cross_entropy': Array(0.13481155, dtype=float32)}
{'loss_inverse': Array(1.4527433e-05, dtype=float32)}


  7%|▋         | 66940/1000000 [3:09:59<40:03:53,  6.47it/s]

{'loss': Array(0.15590096, dtype=float32), 'loss_cross_entropy': Array(0.14694946, dtype=float32)}
{'loss_inverse': Array(4.6703262e-05, dtype=float32)}


  7%|▋         | 66950/1000000 [3:10:00<27:31:13,  9.42it/s]

{'loss': Array(0.17303878, dtype=float32), 'loss_cross_entropy': Array(0.1634988, dtype=float32)}
{'loss_inverse': Array(3.720066e-06, dtype=float32)}


  7%|▋         | 66960/1000000 [3:10:02<25:50:22, 10.03it/s]

{'loss': Array(0.16015375, dtype=float32), 'loss_cross_entropy': Array(0.15149955, dtype=float32)}
{'loss_inverse': Array(3.9654682e-05, dtype=float32)}


  7%|▋         | 66970/1000000 [3:10:03<34:19:23,  7.55it/s]

{'loss': Array(0.14838818, dtype=float32), 'loss_cross_entropy': Array(0.13898091, dtype=float32)}
{'loss_inverse': Array(3.94273e-06, dtype=float32)}


  7%|▋         | 66980/1000000 [3:10:05<27:45:15,  9.34it/s]

{'loss': Array(0.1412712, dtype=float32), 'loss_cross_entropy': Array(0.13311441, dtype=float32)}
{'loss_inverse': Array(0.00010289, dtype=float32)}


  7%|▋         | 66990/1000000 [3:10:06<26:37:14,  9.74it/s]

{'loss': Array(0.14788663, dtype=float32), 'loss_cross_entropy': Array(0.13915549, dtype=float32)}
{'loss_inverse': Array(3.4286233e-05, dtype=float32)}


  7%|▋         | 67000/1000000 [3:10:08<29:07:12,  8.90it/s]

{'loss': Array(0.15574196, dtype=float32), 'loss_cross_entropy': Array(0.14661016, dtype=float32)}
{'loss_inverse': Array(0.00012302, dtype=float32)}


  7%|▋         | 67010/1000000 [3:10:16<87:54:30,  2.95it/s] 

{'loss': Array(0.16569619, dtype=float32), 'loss_cross_entropy': Array(0.15653907, dtype=float32)}
{'loss_inverse': Array(0.00012926, dtype=float32)}


  7%|▋         | 67020/1000000 [3:10:18<42:24:40,  6.11it/s]

{'loss': Array(0.1610423, dtype=float32), 'loss_cross_entropy': Array(0.15172003, dtype=float32)}
{'loss_inverse': Array(8.6727116e-05, dtype=float32)}


  7%|▋         | 67030/1000000 [3:10:19<28:11:01,  9.20it/s]

{'loss': Array(0.1508806, dtype=float32), 'loss_cross_entropy': Array(0.14181352, dtype=float32)}
{'loss_inverse': Array(4.9281633e-05, dtype=float32)}


  7%|▋         | 67040/1000000 [3:10:21<35:31:32,  7.29it/s]

{'loss': Array(0.15229791, dtype=float32), 'loss_cross_entropy': Array(0.14347705, dtype=float32)}
{'loss_inverse': Array(0.00039474, dtype=float32)}


  7%|▋         | 67050/1000000 [3:10:23<26:46:42,  9.68it/s]

{'loss': Array(0.15577726, dtype=float32), 'loss_cross_entropy': Array(0.1465309, dtype=float32)}
{'loss_inverse': Array(7.611612e-05, dtype=float32)}


  7%|▋         | 67060/1000000 [3:10:24<38:53:57,  6.66it/s]

{'loss': Array(0.15989672, dtype=float32), 'loss_cross_entropy': Array(0.15083723, dtype=float32)}
{'loss_inverse': Array(2.808264e-05, dtype=float32)}


  7%|▋         | 67070/1000000 [3:10:26<27:44:15,  9.34it/s]

{'loss': Array(0.1577974, dtype=float32), 'loss_cross_entropy': Array(0.14870583, dtype=float32)}
{'loss_inverse': Array(8.89947e-05, dtype=float32)}


  7%|▋         | 67080/1000000 [3:10:28<34:54:56,  7.42it/s]

{'loss': Array(0.1530783, dtype=float32), 'loss_cross_entropy': Array(0.14390163, dtype=float32)}
{'loss_inverse': Array(1.2313379e-05, dtype=float32)}


  7%|▋         | 67090/1000000 [3:10:29<26:55:17,  9.63it/s]

{'loss': Array(0.1397396, dtype=float32), 'loss_cross_entropy': Array(0.13136218, dtype=float32)}
{'loss_inverse': Array(9.514659e-06, dtype=float32)}


  7%|▋         | 67100/1000000 [3:10:30<26:25:53,  9.80it/s]

{'loss': Array(0.15738012, dtype=float32), 'loss_cross_entropy': Array(0.14816602, dtype=float32)}
{'loss_inverse': Array(7.644254e-07, dtype=float32)}


  7%|▋         | 67110/1000000 [3:10:32<29:57:36,  8.65it/s]

{'loss': Array(0.157268, dtype=float32), 'loss_cross_entropy': Array(0.14803652, dtype=float32)}
{'loss_inverse': Array(5.304373e-05, dtype=float32)}


  7%|▋         | 67120/1000000 [3:10:34<26:15:43,  9.87it/s]

{'loss': Array(0.17953007, dtype=float32), 'loss_cross_entropy': Array(0.16926722, dtype=float32)}
{'loss_inverse': Array(5.5542227e-05, dtype=float32)}


  7%|▋         | 67130/1000000 [3:10:35<34:26:49,  7.52it/s]

{'loss': Array(0.14195064, dtype=float32), 'loss_cross_entropy': Array(0.13375294, dtype=float32)}
{'loss_inverse': Array(0.00012693, dtype=float32)}


  7%|▋         | 67140/1000000 [3:10:37<27:03:58,  9.57it/s]

{'loss': Array(0.15435293, dtype=float32), 'loss_cross_entropy': Array(0.14490478, dtype=float32)}
{'loss_inverse': Array(4.0755156e-05, dtype=float32)}


  7%|▋         | 67150/1000000 [3:10:38<38:10:40,  6.79it/s]

{'loss': Array(0.15002124, dtype=float32), 'loss_cross_entropy': Array(0.14115681, dtype=float32)}
{'loss_inverse': Array(1.9045567e-05, dtype=float32)}


  7%|▋         | 67160/1000000 [3:10:40<27:23:03,  9.46it/s]

{'loss': Array(0.14692879, dtype=float32), 'loss_cross_entropy': Array(0.13795456, dtype=float32)}
{'loss_inverse': Array(0.0001969, dtype=float32)}


  7%|▋         | 67170/1000000 [3:10:41<25:46:21, 10.05it/s]

{'loss': Array(0.16409314, dtype=float32), 'loss_cross_entropy': Array(0.15411924, dtype=float32)}
{'loss_inverse': Array(9.151072e-07, dtype=float32)}


  7%|▋         | 67180/1000000 [3:10:43<28:48:40,  8.99it/s]

{'loss': Array(0.13996139, dtype=float32), 'loss_cross_entropy': Array(0.1311207, dtype=float32)}
{'loss_inverse': Array(3.5829034e-05, dtype=float32)}


  7%|▋         | 67190/1000000 [3:10:44<25:37:36, 10.11it/s]

{'loss': Array(0.1581469, dtype=float32), 'loss_cross_entropy': Array(0.14843309, dtype=float32)}
{'loss_inverse': Array(7.28069e-06, dtype=float32)}


  7%|▋         | 67200/1000000 [3:10:46<30:01:40,  8.63it/s]

{'loss': Array(0.15375523, dtype=float32), 'loss_cross_entropy': Array(0.14459948, dtype=float32)}
{'loss_inverse': Array(1.8923754e-05, dtype=float32)}


  7%|▋         | 67210/1000000 [3:10:48<26:32:26,  9.76it/s]

{'loss': Array(0.17635046, dtype=float32), 'loss_cross_entropy': Array(0.16574536, dtype=float32)}
{'loss_inverse': Array(1.20765435e-05, dtype=float32)}


  7%|▋         | 67220/1000000 [3:10:49<32:02:38,  8.09it/s]

{'loss': Array(0.15182741, dtype=float32), 'loss_cross_entropy': Array(0.14253959, dtype=float32)}
{'loss_inverse': Array(0.00015988, dtype=float32)}


  7%|▋         | 67230/1000000 [3:10:51<26:38:09,  9.73it/s]

{'loss': Array(0.17101587, dtype=float32), 'loss_cross_entropy': Array(0.16110364, dtype=float32)}
{'loss_inverse': Array(5.2298833e-06, dtype=float32)}


  7%|▋         | 67240/1000000 [3:10:53<34:30:55,  7.51it/s]

{'loss': Array(0.1421581, dtype=float32), 'loss_cross_entropy': Array(0.13356303, dtype=float32)}
{'loss_inverse': Array(6.4234904e-05, dtype=float32)}


  7%|▋         | 67250/1000000 [3:10:54<27:31:12,  9.41it/s]

{'loss': Array(0.16396496, dtype=float32), 'loss_cross_entropy': Array(0.15395425, dtype=float32)}
{'loss_inverse': Array(2.7073746e-05, dtype=float32)}


  7%|▋         | 67260/1000000 [3:10:56<35:23:10,  7.32it/s]

{'loss': Array(0.14471208, dtype=float32), 'loss_cross_entropy': Array(0.13639389, dtype=float32)}
{'loss_inverse': Array(0.00016525, dtype=float32)}


  7%|▋         | 67270/1000000 [3:10:57<27:19:35,  9.48it/s]

{'loss': Array(0.1399309, dtype=float32), 'loss_cross_entropy': Array(0.13159388, dtype=float32)}
{'loss_inverse': Array(3.91634e-05, dtype=float32)}


  7%|▋         | 67280/1000000 [3:10:59<25:30:16, 10.16it/s]

{'loss': Array(0.15372296, dtype=float32), 'loss_cross_entropy': Array(0.14449883, dtype=float32)}
{'loss_inverse': Array(0.00027429, dtype=float32)}


  7%|▋         | 67290/1000000 [3:11:00<31:14:23,  8.29it/s]

{'loss': Array(0.1458606, dtype=float32), 'loss_cross_entropy': Array(0.13748309, dtype=float32)}
{'loss_inverse': Array(9.157857e-05, dtype=float32)}


  7%|▋         | 67300/1000000 [3:11:02<26:20:27,  9.84it/s]

{'loss': Array(0.15789716, dtype=float32), 'loss_cross_entropy': Array(0.14959982, dtype=float32)}
{'loss_inverse': Array(0.00048861, dtype=float32)}


  7%|▋         | 67310/1000000 [3:11:04<38:02:33,  6.81it/s]

{'loss': Array(0.14709587, dtype=float32), 'loss_cross_entropy': Array(0.1384799, dtype=float32)}
{'loss_inverse': Array(0.00014423, dtype=float32)}


  7%|▋         | 67320/1000000 [3:11:05<27:30:48,  9.42it/s]

{'loss': Array(0.15522486, dtype=float32), 'loss_cross_entropy': Array(0.14662956, dtype=float32)}
{'loss_inverse': Array(3.107163e-05, dtype=float32)}


  7%|▋         | 67330/1000000 [3:11:06<26:38:35,  9.72it/s]

{'loss': Array(0.1713739, dtype=float32), 'loss_cross_entropy': Array(0.16221145, dtype=float32)}
{'loss_inverse': Array(9.085004e-06, dtype=float32)}


  7%|▋         | 67340/1000000 [3:11:08<28:56:09,  8.95it/s]

{'loss': Array(0.14775443, dtype=float32), 'loss_cross_entropy': Array(0.13896872, dtype=float32)}
{'loss_inverse': Array(4.364279e-06, dtype=float32)}


  7%|▋         | 67350/1000000 [3:11:10<26:27:57,  9.79it/s]

{'loss': Array(0.1604634, dtype=float32), 'loss_cross_entropy': Array(0.15110382, dtype=float32)}
{'loss_inverse': Array(1.0826448e-05, dtype=float32)}


  7%|▋         | 67360/1000000 [3:11:11<28:20:12,  9.14it/s]

{'loss': Array(0.15984039, dtype=float32), 'loss_cross_entropy': Array(0.15009588, dtype=float32)}
{'loss_inverse': Array(6.870696e-05, dtype=float32)}


  7%|▋         | 67370/1000000 [3:11:13<26:27:51,  9.79it/s]

{'loss': Array(0.18283805, dtype=float32), 'loss_cross_entropy': Array(0.17179142, dtype=float32)}
{'loss_inverse': Array(8.694189e-06, dtype=float32)}


  7%|▋         | 67380/1000000 [3:11:15<31:55:48,  8.11it/s]

{'loss': Array(0.17343403, dtype=float32), 'loss_cross_entropy': Array(0.16363145, dtype=float32)}
{'loss_inverse': Array(2.422225e-05, dtype=float32)}


  7%|▋         | 67390/1000000 [3:11:16<26:33:48,  9.75it/s]

{'loss': Array(0.1575702, dtype=float32), 'loss_cross_entropy': Array(0.14937277, dtype=float32)}
{'loss_inverse': Array(1.478836e-06, dtype=float32)}


  7%|▋         | 67400/1000000 [3:11:18<31:50:26,  8.14it/s]

{'loss': Array(0.15424272, dtype=float32), 'loss_cross_entropy': Array(0.14605634, dtype=float32)}
{'loss_inverse': Array(8.0325284e-05, dtype=float32)}


  7%|▋         | 67410/1000000 [3:11:19<26:32:16,  9.76it/s]

{'loss': Array(0.14095332, dtype=float32), 'loss_cross_entropy': Array(0.13225581, dtype=float32)}
{'loss_inverse': Array(4.462889e-05, dtype=float32)}


  7%|▋         | 67420/1000000 [3:11:21<38:19:41,  6.76it/s]

{'loss': Array(0.13718474, dtype=float32), 'loss_cross_entropy': Array(0.1293679, dtype=float32)}
{'loss_inverse': Array(4.1380234e-05, dtype=float32)}


  7%|▋         | 67430/1000000 [3:11:22<27:57:30,  9.27it/s]

{'loss': Array(0.14358702, dtype=float32), 'loss_cross_entropy': Array(0.13557638, dtype=float32)}
{'loss_inverse': Array(9.3453145e-06, dtype=float32)}


  7%|▋         | 67440/1000000 [3:11:24<25:36:13, 10.12it/s]

{'loss': Array(0.15721193, dtype=float32), 'loss_cross_entropy': Array(0.14809617, dtype=float32)}
{'loss_inverse': Array(1.1336818e-05, dtype=float32)}


  7%|▋         | 67450/1000000 [3:11:26<29:09:23,  8.88it/s]

{'loss': Array(0.1478408, dtype=float32), 'loss_cross_entropy': Array(0.13866284, dtype=float32)}
{'loss_inverse': Array(2.5784631e-05, dtype=float32)}


  7%|▋         | 67460/1000000 [3:11:27<26:05:45,  9.93it/s]

{'loss': Array(0.16260825, dtype=float32), 'loss_cross_entropy': Array(0.15339984, dtype=float32)}
{'loss_inverse': Array(7.739633e-07, dtype=float32)}


  7%|▋         | 67470/1000000 [3:11:29<32:16:26,  8.03it/s]

{'loss': Array(0.17394069, dtype=float32), 'loss_cross_entropy': Array(0.16393106, dtype=float32)}
{'loss_inverse': Array(6.018521e-06, dtype=float32)}


  7%|▋         | 67480/1000000 [3:11:30<26:20:54,  9.83it/s]

{'loss': Array(0.1538567, dtype=float32), 'loss_cross_entropy': Array(0.14526908, dtype=float32)}
{'loss_inverse': Array(5.390917e-05, dtype=float32)}


  7%|▋         | 67490/1000000 [3:11:32<25:27:09, 10.18it/s]

{'loss': Array(0.15772483, dtype=float32), 'loss_cross_entropy': Array(0.1479616, dtype=float32)}
{'loss_inverse': Array(1.9171387e-06, dtype=float32)}


  7%|▋         | 67500/1000000 [3:11:33<29:36:56,  8.75it/s]

{'loss': Array(0.14240982, dtype=float32), 'loss_cross_entropy': Array(0.13361154, dtype=float32)}
{'loss_inverse': Array(0.00015191, dtype=float32)}


  7%|▋         | 67510/1000000 [3:11:41<86:13:27,  3.00it/s] 

{'loss': Array(0.13738136, dtype=float32), 'loss_cross_entropy': Array(0.12930606, dtype=float32)}
{'loss_inverse': Array(0.00040447, dtype=float32)}


  7%|▋         | 67520/1000000 [3:11:43<39:23:59,  6.57it/s] 

{'loss': Array(0.15905783, dtype=float32), 'loss_cross_entropy': Array(0.14931327, dtype=float32)}
{'loss_inverse': Array(1.1611278e-06, dtype=float32)}


  7%|▋         | 67530/1000000 [3:11:45<28:31:04,  9.08it/s]

{'loss': Array(0.15073995, dtype=float32), 'loss_cross_entropy': Array(0.14247389, dtype=float32)}
{'loss_inverse': Array(1.1602455e-05, dtype=float32)}


  7%|▋         | 67540/1000000 [3:11:46<31:45:37,  8.16it/s]

{'loss': Array(0.15342759, dtype=float32), 'loss_cross_entropy': Array(0.14365308, dtype=float32)}
{'loss_inverse': Array(5.517569e-05, dtype=float32)}


  7%|▋         | 67550/1000000 [3:11:48<26:16:17,  9.86it/s]

{'loss': Array(0.17703354, dtype=float32), 'loss_cross_entropy': Array(0.16706125, dtype=float32)}
{'loss_inverse': Array(1.8884175e-06, dtype=float32)}


  7%|▋         | 67560/1000000 [3:11:49<31:48:50,  8.14it/s]

{'loss': Array(0.14560604, dtype=float32), 'loss_cross_entropy': Array(0.13658433, dtype=float32)}
{'loss_inverse': Array(0.00027514, dtype=float32)}


  7%|▋         | 67570/1000000 [3:11:51<26:37:31,  9.73it/s]

{'loss': Array(0.15481749, dtype=float32), 'loss_cross_entropy': Array(0.14564139, dtype=float32)}
{'loss_inverse': Array(0.00011189, dtype=float32)}


  7%|▋         | 67580/1000000 [3:11:53<33:57:45,  7.63it/s]

{'loss': Array(0.15678917, dtype=float32), 'loss_cross_entropy': Array(0.14805092, dtype=float32)}
{'loss_inverse': Array(0.00010411, dtype=float32)}


  7%|▋         | 67590/1000000 [3:11:54<26:18:30,  9.84it/s]

{'loss': Array(0.15004371, dtype=float32), 'loss_cross_entropy': Array(0.1414166, dtype=float32)}
{'loss_inverse': Array(4.1128143e-05, dtype=float32)}


  7%|▋         | 67600/1000000 [3:11:56<31:43:10,  8.17it/s]

{'loss': Array(0.14838293, dtype=float32), 'loss_cross_entropy': Array(0.13882558, dtype=float32)}
{'loss_inverse': Array(0.00010916, dtype=float32)}


  7%|▋         | 67610/1000000 [3:11:57<26:15:38,  9.86it/s]

{'loss': Array(0.16174473, dtype=float32), 'loss_cross_entropy': Array(0.15260552, dtype=float32)}
{'loss_inverse': Array(1.5990378e-05, dtype=float32)}


  7%|▋         | 67620/1000000 [3:11:59<38:35:25,  6.71it/s]

{'loss': Array(0.15771374, dtype=float32), 'loss_cross_entropy': Array(0.14805166, dtype=float32)}
{'loss_inverse': Array(1.8371298e-06, dtype=float32)}


  7%|▋         | 67630/1000000 [3:12:00<27:27:20,  9.43it/s]

{'loss': Array(0.15056148, dtype=float32), 'loss_cross_entropy': Array(0.14130512, dtype=float32)}
{'loss_inverse': Array(3.0339802e-06, dtype=float32)}


  7%|▋         | 67640/1000000 [3:12:02<26:21:34,  9.83it/s]

{'loss': Array(0.16447993, dtype=float32), 'loss_cross_entropy': Array(0.15436468, dtype=float32)}
{'loss_inverse': Array(1.9455698e-05, dtype=float32)}


  7%|▋         | 67650/1000000 [3:12:03<28:55:19,  8.95it/s]

{'loss': Array(0.13902064, dtype=float32), 'loss_cross_entropy': Array(0.13075821, dtype=float32)}
{'loss_inverse': Array(5.38739e-07, dtype=float32)}


  7%|▋         | 67660/1000000 [3:12:05<26:32:57,  9.75it/s]

{'loss': Array(0.12720506, dtype=float32), 'loss_cross_entropy': Array(0.11928289, dtype=float32)}
{'loss_inverse': Array(3.5396162e-05, dtype=float32)}


  7%|▋         | 67670/1000000 [3:12:07<28:50:42,  8.98it/s]

{'loss': Array(0.16100128, dtype=float32), 'loss_cross_entropy': Array(0.15160736, dtype=float32)}
{'loss_inverse': Array(1.5639513e-05, dtype=float32)}


  7%|▋         | 67680/1000000 [3:12:08<25:56:27,  9.98it/s]

{'loss': Array(0.1297479, dtype=float32), 'loss_cross_entropy': Array(0.12147825, dtype=float32)}
{'loss_inverse': Array(2.807008e-06, dtype=float32)}


  7%|▋         | 67690/1000000 [3:12:10<30:00:55,  8.63it/s]

{'loss': Array(0.1467394, dtype=float32), 'loss_cross_entropy': Array(0.13774166, dtype=float32)}
{'loss_inverse': Array(1.7402874e-05, dtype=float32)}


  7%|▋         | 67700/1000000 [3:12:11<25:57:42,  9.98it/s]

{'loss': Array(0.16112919, dtype=float32), 'loss_cross_entropy': Array(0.15205139, dtype=float32)}
{'loss_inverse': Array(9.091457e-06, dtype=float32)}


  7%|▋         | 67710/1000000 [3:12:13<39:13:44,  6.60it/s]

{'loss': Array(0.12996332, dtype=float32), 'loss_cross_entropy': Array(0.12201428, dtype=float32)}
{'loss_inverse': Array(9.468714e-06, dtype=float32)}


  7%|▋         | 67720/1000000 [3:12:14<27:55:47,  9.27it/s]

{'loss': Array(0.14204143, dtype=float32), 'loss_cross_entropy': Array(0.13342217, dtype=float32)}
{'loss_inverse': Array(4.2944973e-07, dtype=float32)}


  7%|▋         | 67730/1000000 [3:12:16<25:42:37, 10.07it/s]

{'loss': Array(0.15394416, dtype=float32), 'loss_cross_entropy': Array(0.1453056, dtype=float32)}
{'loss_inverse': Array(9.353464e-06, dtype=float32)}


  7%|▋         | 67740/1000000 [3:12:18<29:51:14,  8.67it/s]

{'loss': Array(0.15219806, dtype=float32), 'loss_cross_entropy': Array(0.14298104, dtype=float32)}
{'loss_inverse': Array(8.967806e-05, dtype=float32)}


  7%|▋         | 67750/1000000 [3:12:19<26:05:12,  9.93it/s]

{'loss': Array(0.16440903, dtype=float32), 'loss_cross_entropy': Array(0.15570365, dtype=float32)}
{'loss_inverse': Array(2.0575637e-06, dtype=float32)}


  7%|▋         | 67760/1000000 [3:12:21<30:29:05,  8.49it/s]

{'loss': Array(0.16061601, dtype=float32), 'loss_cross_entropy': Array(0.1511975, dtype=float32)}
{'loss_inverse': Array(3.5881342e-06, dtype=float32)}


  7%|▋         | 67770/1000000 [3:12:22<26:14:15,  9.87it/s]

{'loss': Array(0.14557348, dtype=float32), 'loss_cross_entropy': Array(0.13709874, dtype=float32)}
{'loss_inverse': Array(5.5155465e-07, dtype=float32)}


  7%|▋         | 67780/1000000 [3:12:24<31:43:34,  8.16it/s]

{'loss': Array(0.15312175, dtype=float32), 'loss_cross_entropy': Array(0.14452732, dtype=float32)}
{'loss_inverse': Array(2.8342636e-06, dtype=float32)}


  7%|▋         | 67790/1000000 [3:12:25<26:32:13,  9.76it/s]

{'loss': Array(0.1536431, dtype=float32), 'loss_cross_entropy': Array(0.1447466, dtype=float32)}
{'loss_inverse': Array(2.9448934e-06, dtype=float32)}


  7%|▋         | 67800/1000000 [3:12:27<26:01:33,  9.95it/s]

{'loss': Array(0.1686595, dtype=float32), 'loss_cross_entropy': Array(0.15886061, dtype=float32)}
{'loss_inverse': Array(1.5004877e-05, dtype=float32)}


  7%|▋         | 67810/1000000 [3:12:29<28:33:26,  9.07it/s]

{'loss': Array(0.160304, dtype=float32), 'loss_cross_entropy': Array(0.15143357, dtype=float32)}
{'loss_inverse': Array(5.3883858e-05, dtype=float32)}


  7%|▋         | 67820/1000000 [3:12:30<25:44:28, 10.06it/s]

{'loss': Array(0.14130798, dtype=float32), 'loss_cross_entropy': Array(0.13276738, dtype=float32)}
{'loss_inverse': Array(0.00012503, dtype=float32)}


  7%|▋         | 67830/1000000 [3:12:32<30:10:12,  8.58it/s]

{'loss': Array(0.16634926, dtype=float32), 'loss_cross_entropy': Array(0.15725508, dtype=float32)}
{'loss_inverse': Array(2.3555765e-05, dtype=float32)}


  7%|▋         | 67840/1000000 [3:12:33<26:41:32,  9.70it/s]

{'loss': Array(0.15762687, dtype=float32), 'loss_cross_entropy': Array(0.14851888, dtype=float32)}
{'loss_inverse': Array(9.947371e-07, dtype=float32)}


  7%|▋         | 67850/1000000 [3:12:35<31:57:15,  8.10it/s]

{'loss': Array(0.14621285, dtype=float32), 'loss_cross_entropy': Array(0.13812485, dtype=float32)}
{'loss_inverse': Array(7.0161004e-05, dtype=float32)}


  7%|▋         | 67860/1000000 [3:12:36<26:06:51,  9.92it/s]

{'loss': Array(0.16383924, dtype=float32), 'loss_cross_entropy': Array(0.15486778, dtype=float32)}
{'loss_inverse': Array(1.3853636e-05, dtype=float32)}


  7%|▋         | 67870/1000000 [3:12:38<25:20:27, 10.22it/s]

{'loss': Array(0.15770893, dtype=float32), 'loss_cross_entropy': Array(0.14906886, dtype=float32)}
{'loss_inverse': Array(1.988147e-06, dtype=float32)}


  7%|▋         | 67880/1000000 [3:12:40<30:01:42,  8.62it/s]

{'loss': Array(0.14308994, dtype=float32), 'loss_cross_entropy': Array(0.13504376, dtype=float32)}
{'loss_inverse': Array(7.946825e-05, dtype=float32)}


  7%|▋         | 67890/1000000 [3:12:41<26:00:52,  9.95it/s]

{'loss': Array(0.1555408, dtype=float32), 'loss_cross_entropy': Array(0.14644222, dtype=float32)}
{'loss_inverse': Array(2.3777054e-06, dtype=float32)}


  7%|▋         | 67900/1000000 [3:12:43<32:06:29,  8.06it/s]

{'loss': Array(0.14448938, dtype=float32), 'loss_cross_entropy': Array(0.13632382, dtype=float32)}
{'loss_inverse': Array(2.9229685e-05, dtype=float32)}


  7%|▋         | 67910/1000000 [3:12:44<27:22:27,  9.46it/s]

{'loss': Array(0.16212784, dtype=float32), 'loss_cross_entropy': Array(0.15249826, dtype=float32)}
{'loss_inverse': Array(0.00018263, dtype=float32)}


  7%|▋         | 67920/1000000 [3:12:46<35:08:57,  7.37it/s]

{'loss': Array(0.16857418, dtype=float32), 'loss_cross_entropy': Array(0.15900624, dtype=float32)}
{'loss_inverse': Array(5.2273267e-06, dtype=float32)}


  7%|▋         | 67930/1000000 [3:12:47<26:33:53,  9.75it/s]

{'loss': Array(0.13706534, dtype=float32), 'loss_cross_entropy': Array(0.12879887, dtype=float32)}
{'loss_inverse': Array(2.6503452e-05, dtype=float32)}


  7%|▋         | 67940/1000000 [3:12:49<25:50:29, 10.02it/s]

{'loss': Array(0.15530191, dtype=float32), 'loss_cross_entropy': Array(0.14551884, dtype=float32)}
{'loss_inverse': Array(2.9787256e-05, dtype=float32)}


  7%|▋         | 67950/1000000 [3:12:50<28:13:04,  9.18it/s]

{'loss': Array(0.1584159, dtype=float32), 'loss_cross_entropy': Array(0.14972447, dtype=float32)}
{'loss_inverse': Array(5.0009818e-05, dtype=float32)}


  7%|▋         | 67960/1000000 [3:12:52<26:35:39,  9.74it/s]

{'loss': Array(0.13795972, dtype=float32), 'loss_cross_entropy': Array(0.12938538, dtype=float32)}
{'loss_inverse': Array(1.2055177e-06, dtype=float32)}


  7%|▋         | 67970/1000000 [3:12:54<34:57:28,  7.41it/s]

{'loss': Array(0.1465813, dtype=float32), 'loss_cross_entropy': Array(0.13807395, dtype=float32)}
{'loss_inverse': Array(3.572143e-05, dtype=float32)}


  7%|▋         | 67980/1000000 [3:12:55<27:20:40,  9.47it/s]

{'loss': Array(0.16115376, dtype=float32), 'loss_cross_entropy': Array(0.15204285, dtype=float32)}
{'loss_inverse': Array(1.1810488e-06, dtype=float32)}


  7%|▋         | 67990/1000000 [3:12:57<28:31:17,  9.08it/s]

{'loss': Array(0.14556734, dtype=float32), 'loss_cross_entropy': Array(0.13668759, dtype=float32)}
{'loss_inverse': Array(8.9438116e-05, dtype=float32)}


  7%|▋         | 68000/1000000 [3:12:58<28:49:09,  8.98it/s]

{'loss': Array(0.14176631, dtype=float32), 'loss_cross_entropy': Array(0.13318777, dtype=float32)}
{'loss_inverse': Array(0.00017098, dtype=float32)}


  7%|▋         | 68010/1000000 [3:13:07<88:37:06,  2.92it/s] 

{'loss': Array(0.1494363, dtype=float32), 'loss_cross_entropy': Array(0.14010525, dtype=float32)}
{'loss_inverse': Array(3.3333265e-06, dtype=float32)}


  7%|▋         | 68020/1000000 [3:13:08<40:27:40,  6.40it/s]

{'loss': Array(0.14955658, dtype=float32), 'loss_cross_entropy': Array(0.14038672, dtype=float32)}
{'loss_inverse': Array(3.7342065e-07, dtype=float32)}


  7%|▋         | 68030/1000000 [3:13:10<28:07:26,  9.20it/s]

{'loss': Array(0.16178654, dtype=float32), 'loss_cross_entropy': Array(0.15207957, dtype=float32)}
{'loss_inverse': Array(7.964571e-07, dtype=float32)}


  7%|▋         | 68040/1000000 [3:13:12<31:03:00,  8.34it/s]

{'loss': Array(0.14676708, dtype=float32), 'loss_cross_entropy': Array(0.13892427, dtype=float32)}
{'loss_inverse': Array(3.2758024e-05, dtype=float32)}


  7%|▋         | 68050/1000000 [3:13:13<26:10:37,  9.89it/s]

{'loss': Array(0.15229367, dtype=float32), 'loss_cross_entropy': Array(0.1432661, dtype=float32)}
{'loss_inverse': Array(1.26087125e-05, dtype=float32)}


  7%|▋         | 68060/1000000 [3:13:15<38:26:13,  6.73it/s]

{'loss': Array(0.13674302, dtype=float32), 'loss_cross_entropy': Array(0.1286004, dtype=float32)}
{'loss_inverse': Array(1.0040457e-06, dtype=float32)}


  7%|▋         | 68070/1000000 [3:13:16<27:54:44,  9.27it/s]

{'loss': Array(0.14782843, dtype=float32), 'loss_cross_entropy': Array(0.13870399, dtype=float32)}
{'loss_inverse': Array(5.41323e-06, dtype=float32)}


  7%|▋         | 68080/1000000 [3:13:18<26:19:33,  9.83it/s]

{'loss': Array(0.14676614, dtype=float32), 'loss_cross_entropy': Array(0.13781469, dtype=float32)}
{'loss_inverse': Array(1.4916146e-06, dtype=float32)}


  7%|▋         | 68090/1000000 [3:13:19<30:03:59,  8.61it/s]

{'loss': Array(0.16532734, dtype=float32), 'loss_cross_entropy': Array(0.15565805, dtype=float32)}
{'loss_inverse': Array(3.2207285e-07, dtype=float32)}


  7%|▋         | 68100/1000000 [3:13:21<26:26:42,  9.79it/s]

{'loss': Array(0.13325153, dtype=float32), 'loss_cross_entropy': Array(0.12471323, dtype=float32)}
{'loss_inverse': Array(0.00022083, dtype=float32)}


  7%|▋         | 68110/1000000 [3:13:23<32:31:13,  7.96it/s]

{'loss': Array(0.15404281, dtype=float32), 'loss_cross_entropy': Array(0.1448829, dtype=float32)}
{'loss_inverse': Array(7.718344e-05, dtype=float32)}


  7%|▋         | 68120/1000000 [3:13:24<27:07:45,  9.54it/s]

{'loss': Array(0.15354843, dtype=float32), 'loss_cross_entropy': Array(0.1446437, dtype=float32)}
{'loss_inverse': Array(0.00044882, dtype=float32)}


  7%|▋         | 68130/1000000 [3:13:26<31:39:11,  8.18it/s]

{'loss': Array(0.14168267, dtype=float32), 'loss_cross_entropy': Array(0.13365428, dtype=float32)}
{'loss_inverse': Array(0.00012805, dtype=float32)}


  7%|▋         | 68140/1000000 [3:13:27<26:10:58,  9.89it/s]

{'loss': Array(0.13815358, dtype=float32), 'loss_cross_entropy': Array(0.12896001, dtype=float32)}
{'loss_inverse': Array(0.00011997, dtype=float32)}


  7%|▋         | 68150/1000000 [3:13:29<26:11:07,  9.89it/s]

{'loss': Array(0.16074824, dtype=float32), 'loss_cross_entropy': Array(0.15136962, dtype=float32)}
{'loss_inverse': Array(0.00020886, dtype=float32)}


  7%|▋         | 68160/1000000 [3:13:30<28:44:38,  9.01it/s]

{'loss': Array(0.1336752, dtype=float32), 'loss_cross_entropy': Array(0.12515496, dtype=float32)}
{'loss_inverse': Array(1.9167976e-06, dtype=float32)}


  7%|▋         | 68170/1000000 [3:13:32<25:41:27, 10.08it/s]

{'loss': Array(0.14525464, dtype=float32), 'loss_cross_entropy': Array(0.13680635, dtype=float32)}
{'loss_inverse': Array(0.00031734, dtype=float32)}


  7%|▋         | 68180/1000000 [3:13:34<29:24:47,  8.80it/s]

{'loss': Array(0.14874597, dtype=float32), 'loss_cross_entropy': Array(0.139593, dtype=float32)}
{'loss_inverse': Array(4.3230248e-06, dtype=float32)}


  7%|▋         | 68190/1000000 [3:13:35<26:04:58,  9.92it/s]

{'loss': Array(0.15545812, dtype=float32), 'loss_cross_entropy': Array(0.14692706, dtype=float32)}
{'loss_inverse': Array(1.6297407e-05, dtype=float32)}


  7%|▋         | 68200/1000000 [3:13:37<29:45:59,  8.70it/s]

{'loss': Array(0.16902064, dtype=float32), 'loss_cross_entropy': Array(0.16007687, dtype=float32)}
{'loss_inverse': Array(5.2118285e-05, dtype=float32)}


  7%|▋         | 68210/1000000 [3:13:38<25:49:03, 10.03it/s]

{'loss': Array(0.15067914, dtype=float32), 'loss_cross_entropy': Array(0.14273147, dtype=float32)}
{'loss_inverse': Array(1.0390578e-05, dtype=float32)}


  7%|▋         | 68220/1000000 [3:13:40<29:48:31,  8.68it/s]

{'loss': Array(0.17755298, dtype=float32), 'loss_cross_entropy': Array(0.1676886, dtype=float32)}
{'loss_inverse': Array(5.1978623e-05, dtype=float32)}


  7%|▋         | 68230/1000000 [3:13:41<26:13:02,  9.87it/s]

{'loss': Array(0.1535615, dtype=float32), 'loss_cross_entropy': Array(0.14467916, dtype=float32)}
{'loss_inverse': Array(0.00015419, dtype=float32)}


  7%|▋         | 68240/1000000 [3:13:43<34:26:55,  7.51it/s]

{'loss': Array(0.15244012, dtype=float32), 'loss_cross_entropy': Array(0.1429076, dtype=float32)}
{'loss_inverse': Array(8.35258e-06, dtype=float32)}


  7%|▋         | 68250/1000000 [3:13:44<26:47:02,  9.66it/s]

{'loss': Array(0.15738422, dtype=float32), 'loss_cross_entropy': Array(0.14913449, dtype=float32)}
{'loss_inverse': Array(0.00019599, dtype=float32)}


  7%|▋         | 68260/1000000 [3:13:46<38:01:24,  6.81it/s]

{'loss': Array(0.14999107, dtype=float32), 'loss_cross_entropy': Array(0.14110774, dtype=float32)}
{'loss_inverse': Array(2.2492432e-05, dtype=float32)}


  7%|▋         | 68270/1000000 [3:13:48<27:34:04,  9.39it/s]

{'loss': Array(0.14577413, dtype=float32), 'loss_cross_entropy': Array(0.1372214, dtype=float32)}
{'loss_inverse': Array(0.0002131, dtype=float32)}


  7%|▋         | 68280/1000000 [3:13:49<34:47:52,  7.44it/s]

{'loss': Array(0.1761636, dtype=float32), 'loss_cross_entropy': Array(0.16621248, dtype=float32)}
{'loss_inverse': Array(0.00012705, dtype=float32)}


  7%|▋         | 68290/1000000 [3:13:51<27:01:50,  9.57it/s]

{'loss': Array(0.15336172, dtype=float32), 'loss_cross_entropy': Array(0.14408022, dtype=float32)}
{'loss_inverse': Array(0.00011024, dtype=float32)}


  7%|▋         | 68300/1000000 [3:13:52<26:18:18,  9.84it/s]

{'loss': Array(0.16749097, dtype=float32), 'loss_cross_entropy': Array(0.15842587, dtype=float32)}
{'loss_inverse': Array(3.5426583e-05, dtype=float32)}


  7%|▋         | 68310/1000000 [3:13:54<30:22:33,  8.52it/s]

{'loss': Array(0.14479576, dtype=float32), 'loss_cross_entropy': Array(0.13611694, dtype=float32)}
{'loss_inverse': Array(0.00023876, dtype=float32)}


  7%|▋         | 68320/1000000 [3:13:55<26:02:45,  9.94it/s]

{'loss': Array(0.16308223, dtype=float32), 'loss_cross_entropy': Array(0.15377755, dtype=float32)}
{'loss_inverse': Array(8.428477e-05, dtype=float32)}


  7%|▋         | 68330/1000000 [3:13:57<31:53:51,  8.11it/s]

{'loss': Array(0.13624212, dtype=float32), 'loss_cross_entropy': Array(0.12746917, dtype=float32)}
{'loss_inverse': Array(3.4390405e-07, dtype=float32)}


  7%|▋         | 68340/1000000 [3:13:59<27:48:49,  9.30it/s]

{'loss': Array(0.13900594, dtype=float32), 'loss_cross_entropy': Array(0.13091053, dtype=float32)}
{'loss_inverse': Array(2.026347e-06, dtype=float32)}


  7%|▋         | 68350/1000000 [3:14:00<34:50:33,  7.43it/s]

{'loss': Array(0.1383404, dtype=float32), 'loss_cross_entropy': Array(0.13023153, dtype=float32)}
{'loss_inverse': Array(0.00042481, dtype=float32)}


  7%|▋         | 68360/1000000 [3:14:02<27:02:20,  9.57it/s]

{'loss': Array(0.15244283, dtype=float32), 'loss_cross_entropy': Array(0.14373796, dtype=float32)}
{'loss_inverse': Array(0.0004933, dtype=float32)}


  7%|▋         | 68370/1000000 [3:14:04<34:57:14,  7.40it/s]

{'loss': Array(0.15512082, dtype=float32), 'loss_cross_entropy': Array(0.14518617, dtype=float32)}
{'loss_inverse': Array(8.4711624e-05, dtype=float32)}


  7%|▋         | 68380/1000000 [3:14:05<26:36:31,  9.73it/s]

{'loss': Array(0.14740685, dtype=float32), 'loss_cross_entropy': Array(0.13854703, dtype=float32)}
{'loss_inverse': Array(0.00013414, dtype=float32)}


  7%|▋         | 68390/1000000 [3:14:06<27:05:02,  9.55it/s]

{'loss': Array(0.15466087, dtype=float32), 'loss_cross_entropy': Array(0.14548932, dtype=float32)}
{'loss_inverse': Array(4.8468137e-05, dtype=float32)}


  7%|▋         | 68400/1000000 [3:14:08<31:44:56,  8.15it/s]

{'loss': Array(0.15652116, dtype=float32), 'loss_cross_entropy': Array(0.14781313, dtype=float32)}
{'loss_inverse': Array(0.00012264, dtype=float32)}


  7%|▋         | 68410/1000000 [3:14:10<26:49:57,  9.64it/s]

{'loss': Array(0.15335079, dtype=float32), 'loss_cross_entropy': Array(0.14458385, dtype=float32)}
{'loss_inverse': Array(5.9703557e-06, dtype=float32)}


  7%|▋         | 68420/1000000 [3:14:11<38:33:52,  6.71it/s]

{'loss': Array(0.14323674, dtype=float32), 'loss_cross_entropy': Array(0.13565038, dtype=float32)}
{'loss_inverse': Array(0.00010394, dtype=float32)}


  7%|▋         | 68430/1000000 [3:14:13<28:39:16,  9.03it/s]

{'loss': Array(0.1534529, dtype=float32), 'loss_cross_entropy': Array(0.14477248, dtype=float32)}
{'loss_inverse': Array(3.7958343e-06, dtype=float32)}


  7%|▋         | 68440/1000000 [3:14:14<25:52:13, 10.00it/s]

{'loss': Array(0.1691127, dtype=float32), 'loss_cross_entropy': Array(0.15946661, dtype=float32)}
{'loss_inverse': Array(0.0002337, dtype=float32)}


  7%|▋         | 68450/1000000 [3:14:16<29:12:35,  8.86it/s]

{'loss': Array(0.1545655, dtype=float32), 'loss_cross_entropy': Array(0.14615889, dtype=float32)}
{'loss_inverse': Array(4.7823714e-06, dtype=float32)}


  7%|▋         | 68460/1000000 [3:14:18<26:31:44,  9.75it/s]

{'loss': Array(0.1463605, dtype=float32), 'loss_cross_entropy': Array(0.13734536, dtype=float32)}
{'loss_inverse': Array(5.810924e-05, dtype=float32)}


  7%|▋         | 68470/1000000 [3:14:19<28:51:00,  8.97it/s]

{'loss': Array(0.15981703, dtype=float32), 'loss_cross_entropy': Array(0.15084997, dtype=float32)}
{'loss_inverse': Array(6.644983e-05, dtype=float32)}


  7%|▋         | 68480/1000000 [3:14:21<25:52:28, 10.00it/s]

{'loss': Array(0.1435827, dtype=float32), 'loss_cross_entropy': Array(0.13428491, dtype=float32)}
{'loss_inverse': Array(3.056662e-05, dtype=float32)}


  7%|▋         | 68490/1000000 [3:14:23<31:36:13,  8.19it/s]

{'loss': Array(0.14192428, dtype=float32), 'loss_cross_entropy': Array(0.13326061, dtype=float32)}
{'loss_inverse': Array(2.6488856e-06, dtype=float32)}


  7%|▋         | 68500/1000000 [3:14:24<26:30:25,  9.76it/s]

{'loss': Array(0.15254138, dtype=float32), 'loss_cross_entropy': Array(0.14382099, dtype=float32)}
{'loss_inverse': Array(4.858202e-06, dtype=float32)}


  7%|▋         | 68510/1000000 [3:14:32<92:50:18,  2.79it/s] 

{'loss': Array(0.15119822, dtype=float32), 'loss_cross_entropy': Array(0.14143214, dtype=float32)}
{'loss_inverse': Array(9.8492696e-05, dtype=float32)}


  7%|▋         | 68520/1000000 [3:14:34<36:44:08,  7.04it/s] 

{'loss': Array(0.15132456, dtype=float32), 'loss_cross_entropy': Array(0.14172678, dtype=float32)}
{'loss_inverse': Array(0.00023127, dtype=float32)}


  7%|▋         | 68530/1000000 [3:14:35<26:52:47,  9.63it/s]

{'loss': Array(0.16505855, dtype=float32), 'loss_cross_entropy': Array(0.15570644, dtype=float32)}
{'loss_inverse': Array(0.00015092, dtype=float32)}


  7%|▋         | 68540/1000000 [3:14:37<28:45:59,  8.99it/s]

{'loss': Array(0.13348584, dtype=float32), 'loss_cross_entropy': Array(0.12511317, dtype=float32)}
{'loss_inverse': Array(3.2476744e-06, dtype=float32)}


  7%|▋         | 68550/1000000 [3:14:38<25:49:01, 10.02it/s]

{'loss': Array(0.13520008, dtype=float32), 'loss_cross_entropy': Array(0.12647744, dtype=float32)}
{'loss_inverse': Array(1.8905941e-05, dtype=float32)}


  7%|▋         | 68560/1000000 [3:14:40<29:52:19,  8.66it/s]

{'loss': Array(0.17320952, dtype=float32), 'loss_cross_entropy': Array(0.16300692, dtype=float32)}
{'loss_inverse': Array(4.3460626e-05, dtype=float32)}


  7%|▋         | 68570/1000000 [3:14:41<25:53:55,  9.99it/s]

{'loss': Array(0.11247311, dtype=float32), 'loss_cross_entropy': Array(0.10546178, dtype=float32)}
{'loss_inverse': Array(7.963284e-07, dtype=float32)}


  7%|▋         | 68580/1000000 [3:14:43<32:51:33,  7.87it/s]

{'loss': Array(0.16777833, dtype=float32), 'loss_cross_entropy': Array(0.1575966, dtype=float32)}
{'loss_inverse': Array(2.41255e-06, dtype=float32)}


  7%|▋         | 68590/1000000 [3:14:45<27:50:11,  9.29it/s]

{'loss': Array(0.14352502, dtype=float32), 'loss_cross_entropy': Array(0.13526382, dtype=float32)}
{'loss_inverse': Array(3.4641082e-06, dtype=float32)}


  7%|▋         | 68600/1000000 [3:14:46<25:23:13, 10.19it/s]

{'loss': Array(0.1503741, dtype=float32), 'loss_cross_entropy': Array(0.14083347, dtype=float32)}
{'loss_inverse': Array(0.00020321, dtype=float32)}


  7%|▋         | 68610/1000000 [3:14:48<29:57:18,  8.64it/s]

{'loss': Array(0.16388421, dtype=float32), 'loss_cross_entropy': Array(0.15490618, dtype=float32)}
{'loss_inverse': Array(1.6626893e-05, dtype=float32)}


  7%|▋         | 68620/1000000 [3:14:49<26:41:23,  9.69it/s]

{'loss': Array(0.17747772, dtype=float32), 'loss_cross_entropy': Array(0.16758023, dtype=float32)}
{'loss_inverse': Array(3.1749376e-05, dtype=float32)}


  7%|▋         | 68630/1000000 [3:14:51<31:57:38,  8.09it/s]

{'loss': Array(0.169088, dtype=float32), 'loss_cross_entropy': Array(0.15921299, dtype=float32)}
{'loss_inverse': Array(1.5160206e-06, dtype=float32)}


  7%|▋         | 68640/1000000 [3:14:53<26:14:17,  9.86it/s]

{'loss': Array(0.15900205, dtype=float32), 'loss_cross_entropy': Array(0.14986935, dtype=float32)}
{'loss_inverse': Array(2.5765728e-05, dtype=float32)}


  7%|▋         | 68650/1000000 [3:14:54<34:13:49,  7.56it/s]

{'loss': Array(0.1501876, dtype=float32), 'loss_cross_entropy': Array(0.14167313, dtype=float32)}
{'loss_inverse': Array(0.00010207, dtype=float32)}


  7%|▋         | 68660/1000000 [3:14:56<26:44:58,  9.67it/s]

{'loss': Array(0.14697023, dtype=float32), 'loss_cross_entropy': Array(0.13851885, dtype=float32)}
{'loss_inverse': Array(2.6914906e-06, dtype=float32)}


  7%|▋         | 68670/1000000 [3:14:57<25:12:46, 10.26it/s]

{'loss': Array(0.1496773, dtype=float32), 'loss_cross_entropy': Array(0.14124498, dtype=float32)}
{'loss_inverse': Array(0.00041278, dtype=float32)}


  7%|▋         | 68680/1000000 [3:14:59<28:25:13,  9.10it/s]

{'loss': Array(0.15677752, dtype=float32), 'loss_cross_entropy': Array(0.14769839, dtype=float32)}
{'loss_inverse': Array(8.414426e-05, dtype=float32)}


  7%|▋         | 68690/1000000 [3:15:00<26:04:33,  9.92it/s]

{'loss': Array(0.13203086, dtype=float32), 'loss_cross_entropy': Array(0.12354562, dtype=float32)}
{'loss_inverse': Array(6.9220776e-05, dtype=float32)}


  7%|▋         | 68700/1000000 [3:15:02<34:10:07,  7.57it/s]

{'loss': Array(0.16452897, dtype=float32), 'loss_cross_entropy': Array(0.1550426, dtype=float32)}
{'loss_inverse': Array(4.0221653e-06, dtype=float32)}


  7%|▋         | 68710/1000000 [3:15:03<26:47:36,  9.65it/s]

{'loss': Array(0.15799205, dtype=float32), 'loss_cross_entropy': Array(0.14839618, dtype=float32)}
{'loss_inverse': Array(2.0720412e-05, dtype=float32)}


  7%|▋         | 68720/1000000 [3:15:05<25:35:09, 10.11it/s]

{'loss': Array(0.13576311, dtype=float32), 'loss_cross_entropy': Array(0.12779668, dtype=float32)}
{'loss_inverse': Array(0.00033118, dtype=float32)}


  7%|▋         | 68730/1000000 [3:15:07<28:29:22,  9.08it/s]

{'loss': Array(0.14494096, dtype=float32), 'loss_cross_entropy': Array(0.136614, dtype=float32)}
{'loss_inverse': Array(4.2242787e-06, dtype=float32)}


  7%|▋         | 68740/1000000 [3:15:08<26:25:05,  9.79it/s]

{'loss': Array(0.14656512, dtype=float32), 'loss_cross_entropy': Array(0.1376798, dtype=float32)}
{'loss_inverse': Array(0.00012358, dtype=float32)}


  7%|▋         | 68750/1000000 [3:15:10<29:51:33,  8.66it/s]

{'loss': Array(0.1713192, dtype=float32), 'loss_cross_entropy': Array(0.16218649, dtype=float32)}
{'loss_inverse': Array(4.3763597e-05, dtype=float32)}


  7%|▋         | 68759/1000000 [3:15:11<27:06:21,  9.54it/s]

{'loss': Array(0.15005231, dtype=float32), 'loss_cross_entropy': Array(0.1418163, dtype=float32)}
{'loss_inverse': Array(3.0258225e-05, dtype=float32)}


  7%|▋         | 68769/1000000 [3:15:13<32:05:25,  8.06it/s]

{'loss': Array(0.1602999, dtype=float32), 'loss_cross_entropy': Array(0.1512407, dtype=float32)}
{'loss_inverse': Array(3.9961556e-06, dtype=float32)}


  7%|▋         | 68779/1000000 [3:15:14<26:28:22,  9.77it/s]

{'loss': Array(0.17117295, dtype=float32), 'loss_cross_entropy': Array(0.1612218, dtype=float32)}
{'loss_inverse': Array(3.2763728e-05, dtype=float32)}


  7%|▋         | 68789/1000000 [3:15:16<25:52:13, 10.00it/s]

{'loss': Array(0.1579375, dtype=float32), 'loss_cross_entropy': Array(0.14912738, dtype=float32)}
{'loss_inverse': Array(2.9808021e-05, dtype=float32)}


  7%|▋         | 68799/1000000 [3:15:17<30:13:06,  8.56it/s]

{'loss': Array(0.14172833, dtype=float32), 'loss_cross_entropy': Array(0.13330029, dtype=float32)}
{'loss_inverse': Array(0.00045972, dtype=float32)}


  7%|▋         | 68809/1000000 [3:15:19<26:49:19,  9.64it/s]

{'loss': Array(0.1573937, dtype=float32), 'loss_cross_entropy': Array(0.14791875, dtype=float32)}
{'loss_inverse': Array(5.3467284e-05, dtype=float32)}


  7%|▋         | 68819/1000000 [3:15:21<35:32:33,  7.28it/s]

{'loss': Array(0.14465247, dtype=float32), 'loss_cross_entropy': Array(0.13586505, dtype=float32)}
{'loss_inverse': Array(0.00013381, dtype=float32)}


  7%|▋         | 68829/1000000 [3:15:22<26:42:31,  9.68it/s]

{'loss': Array(0.1437863, dtype=float32), 'loss_cross_entropy': Array(0.13569778, dtype=float32)}
{'loss_inverse': Array(2.920352e-05, dtype=float32)}


  7%|▋         | 68839/1000000 [3:15:24<38:18:29,  6.75it/s]

{'loss': Array(0.14364928, dtype=float32), 'loss_cross_entropy': Array(0.1350485, dtype=float32)}
{'loss_inverse': Array(5.284091e-05, dtype=float32)}


  7%|▋         | 68849/1000000 [3:15:25<27:50:17,  9.29it/s]

{'loss': Array(0.15170693, dtype=float32), 'loss_cross_entropy': Array(0.14233916, dtype=float32)}
{'loss_inverse': Array(3.291344e-05, dtype=float32)}


  7%|▋         | 68859/1000000 [3:15:27<39:30:31,  6.55it/s]

{'loss': Array(0.16867435, dtype=float32), 'loss_cross_entropy': Array(0.15859023, dtype=float32)}
{'loss_inverse': Array(3.4913628e-06, dtype=float32)}


  7%|▋         | 68869/1000000 [3:15:29<28:01:34,  9.23it/s]

{'loss': Array(0.14704627, dtype=float32), 'loss_cross_entropy': Array(0.13867585, dtype=float32)}
{'loss_inverse': Array(2.4494817e-05, dtype=float32)}


  7%|▋         | 68879/1000000 [3:15:30<25:30:05, 10.14it/s]

{'loss': Array(0.14554165, dtype=float32), 'loss_cross_entropy': Array(0.13738413, dtype=float32)}
{'loss_inverse': Array(9.916394e-06, dtype=float32)}


  7%|▋         | 68889/1000000 [3:15:32<34:13:42,  7.56it/s]

{'loss': Array(0.15269826, dtype=float32), 'loss_cross_entropy': Array(0.14341359, dtype=float32)}
{'loss_inverse': Array(3.3057503e-07, dtype=float32)}


  7%|▋         | 68899/1000000 [3:15:33<27:29:21,  9.41it/s]

{'loss': Array(0.15711683, dtype=float32), 'loss_cross_entropy': Array(0.14835405, dtype=float32)}
{'loss_inverse': Array(1.32653795e-05, dtype=float32)}


  7%|▋         | 68909/1000000 [3:15:35<38:46:56,  6.67it/s]

{'loss': Array(0.14417928, dtype=float32), 'loss_cross_entropy': Array(0.13592367, dtype=float32)}
{'loss_inverse': Array(4.576755e-05, dtype=float32)}


  7%|▋         | 68919/1000000 [3:15:36<27:27:40,  9.42it/s]

{'loss': Array(0.16858771, dtype=float32), 'loss_cross_entropy': Array(0.15842317, dtype=float32)}
{'loss_inverse': Array(5.6735083e-05, dtype=float32)}


  7%|▋         | 68929/1000000 [3:15:38<26:18:41,  9.83it/s]

{'loss': Array(0.1426884, dtype=float32), 'loss_cross_entropy': Array(0.13506651, dtype=float32)}
{'loss_inverse': Array(0.00015909, dtype=float32)}


  7%|▋         | 68939/1000000 [3:15:40<28:25:22,  9.10it/s]

{'loss': Array(0.16594973, dtype=float32), 'loss_cross_entropy': Array(0.15654683, dtype=float32)}
{'loss_inverse': Array(0.00024725, dtype=float32)}


  7%|▋         | 68949/1000000 [3:15:41<25:39:45, 10.08it/s]

{'loss': Array(0.15339184, dtype=float32), 'loss_cross_entropy': Array(0.14437574, dtype=float32)}
{'loss_inverse': Array(3.4553002e-05, dtype=float32)}


  7%|▋         | 68959/1000000 [3:15:43<34:04:45,  7.59it/s]

{'loss': Array(0.16153316, dtype=float32), 'loss_cross_entropy': Array(0.15262787, dtype=float32)}
{'loss_inverse': Array(4.6557918e-05, dtype=float32)}


  7%|▋         | 68969/1000000 [3:15:44<27:00:02,  9.58it/s]

{'loss': Array(0.13038662, dtype=float32), 'loss_cross_entropy': Array(0.12211054, dtype=float32)}
{'loss_inverse': Array(5.058335e-07, dtype=float32)}


  7%|▋         | 68979/1000000 [3:15:45<25:23:30, 10.19it/s]

{'loss': Array(0.15255128, dtype=float32), 'loss_cross_entropy': Array(0.14301139, dtype=float32)}
{'loss_inverse': Array(6.4037355e-05, dtype=float32)}


  7%|▋         | 68989/1000000 [3:15:47<29:16:52,  8.83it/s]

{'loss': Array(0.15382013, dtype=float32), 'loss_cross_entropy': Array(0.14494085, dtype=float32)}
{'loss_inverse': Array(4.639202e-06, dtype=float32)}


  7%|▋         | 68999/1000000 [3:15:49<26:04:30,  9.92it/s]

{'loss': Array(0.15775798, dtype=float32), 'loss_cross_entropy': Array(0.14819492, dtype=float32)}
{'loss_inverse': Array(0.00034605, dtype=float32)}


  7%|▋         | 69009/1000000 [3:15:57<92:04:58,  2.81it/s] 

{'loss': Array(0.15969335, dtype=float32), 'loss_cross_entropy': Array(0.15109102, dtype=float32)}
{'loss_inverse': Array(6.898923e-05, dtype=float32)}


  7%|▋         | 69019/1000000 [3:15:58<36:37:23,  7.06it/s]

{'loss': Array(0.17112409, dtype=float32), 'loss_cross_entropy': Array(0.16089676, dtype=float32)}
{'loss_inverse': Array(2.0395197e-05, dtype=float32)}


  7%|▋         | 69029/1000000 [3:16:00<33:22:38,  7.75it/s]

{'loss': Array(0.15539221, dtype=float32), 'loss_cross_entropy': Array(0.14573641, dtype=float32)}
{'loss_inverse': Array(3.8281587e-06, dtype=float32)}


  7%|▋         | 69039/1000000 [3:16:01<26:32:40,  9.74it/s]

{'loss': Array(0.12707518, dtype=float32), 'loss_cross_entropy': Array(0.11847756, dtype=float32)}
{'loss_inverse': Array(1.5855248e-05, dtype=float32)}


  7%|▋         | 69049/1000000 [3:16:03<34:27:13,  7.51it/s]

{'loss': Array(0.16222301, dtype=float32), 'loss_cross_entropy': Array(0.15335234, dtype=float32)}
{'loss_inverse': Array(1.8035324e-06, dtype=float32)}


  7%|▋         | 69059/1000000 [3:16:05<27:26:04,  9.43it/s]

{'loss': Array(0.1624695, dtype=float32), 'loss_cross_entropy': Array(0.153283, dtype=float32)}
{'loss_inverse': Array(1.454803e-06, dtype=float32)}


  7%|▋         | 69069/1000000 [3:16:06<26:07:30,  9.90it/s]

{'loss': Array(0.16198243, dtype=float32), 'loss_cross_entropy': Array(0.15233843, dtype=float32)}
{'loss_inverse': Array(1.1483502e-06, dtype=float32)}


  7%|▋         | 69079/1000000 [3:16:08<30:03:07,  8.60it/s]

{'loss': Array(0.14909269, dtype=float32), 'loss_cross_entropy': Array(0.1405684, dtype=float32)}
{'loss_inverse': Array(4.9568553e-06, dtype=float32)}


  7%|▋         | 69089/1000000 [3:16:09<26:16:02,  9.84it/s]

{'loss': Array(0.15548697, dtype=float32), 'loss_cross_entropy': Array(0.14622332, dtype=float32)}
{'loss_inverse': Array(2.191252e-05, dtype=float32)}


  7%|▋         | 69099/1000000 [3:16:11<31:59:40,  8.08it/s]

{'loss': Array(0.14958929, dtype=float32), 'loss_cross_entropy': Array(0.14086412, dtype=float32)}
{'loss_inverse': Array(0.00014267, dtype=float32)}


  7%|▋         | 69109/1000000 [3:16:12<26:43:09,  9.68it/s]

{'loss': Array(0.1386571, dtype=float32), 'loss_cross_entropy': Array(0.13001183, dtype=float32)}
{'loss_inverse': Array(6.802967e-05, dtype=float32)}


  7%|▋         | 69119/1000000 [3:16:14<31:44:36,  8.15it/s]

{'loss': Array(0.14470786, dtype=float32), 'loss_cross_entropy': Array(0.13623899, dtype=float32)}
{'loss_inverse': Array(0.0001726, dtype=float32)}


  7%|▋         | 69129/1000000 [3:16:16<26:53:04,  9.62it/s]

{'loss': Array(0.13039692, dtype=float32), 'loss_cross_entropy': Array(0.12249211, dtype=float32)}
{'loss_inverse': Array(5.2874947e-07, dtype=float32)}


  7%|▋         | 69139/1000000 [3:16:17<32:38:54,  7.92it/s]

{'loss': Array(0.15838097, dtype=float32), 'loss_cross_entropy': Array(0.14949958, dtype=float32)}
{'loss_inverse': Array(8.3941195e-06, dtype=float32)}


  7%|▋         | 69149/1000000 [3:16:19<26:11:00,  9.88it/s]

{'loss': Array(0.16334847, dtype=float32), 'loss_cross_entropy': Array(0.15389673, dtype=float32)}
{'loss_inverse': Array(2.4398913e-05, dtype=float32)}


  7%|▋         | 69159/1000000 [3:16:21<34:53:53,  7.41it/s]

{'loss': Array(0.13389313, dtype=float32), 'loss_cross_entropy': Array(0.12618499, dtype=float32)}
{'loss_inverse': Array(1.2130316e-05, dtype=float32)}


  7%|▋         | 69169/1000000 [3:16:22<26:59:35,  9.58it/s]

{'loss': Array(0.13787858, dtype=float32), 'loss_cross_entropy': Array(0.1302302, dtype=float32)}
{'loss_inverse': Array(4.0885276e-05, dtype=float32)}


  7%|▋         | 69179/1000000 [3:16:24<38:52:53,  6.65it/s]

{'loss': Array(0.14235726, dtype=float32), 'loss_cross_entropy': Array(0.13403936, dtype=float32)}
{'loss_inverse': Array(2.7722164e-05, dtype=float32)}


  7%|▋         | 69189/1000000 [3:16:25<27:32:33,  9.39it/s]

{'loss': Array(0.13125277, dtype=float32), 'loss_cross_entropy': Array(0.12376092, dtype=float32)}
{'loss_inverse': Array(1.0508076e-06, dtype=float32)}


  7%|▋         | 69199/1000000 [3:16:27<38:48:38,  6.66it/s]

{'loss': Array(0.15888172, dtype=float32), 'loss_cross_entropy': Array(0.14873032, dtype=float32)}
{'loss_inverse': Array(1.0961277e-05, dtype=float32)}


  7%|▋         | 69209/1000000 [3:16:28<27:20:42,  9.46it/s]

{'loss': Array(0.14865111, dtype=float32), 'loss_cross_entropy': Array(0.13984935, dtype=float32)}
{'loss_inverse': Array(2.7092412e-05, dtype=float32)}


  7%|▋         | 69219/1000000 [3:16:30<25:43:05, 10.05it/s]

{'loss': Array(0.16129266, dtype=float32), 'loss_cross_entropy': Array(0.15177119, dtype=float32)}
{'loss_inverse': Array(7.164194e-07, dtype=float32)}


  7%|▋         | 69229/1000000 [3:16:32<31:40:15,  8.16it/s]

{'loss': Array(0.15879409, dtype=float32), 'loss_cross_entropy': Array(0.14938454, dtype=float32)}
{'loss_inverse': Array(0.00016805, dtype=float32)}


  7%|▋         | 69239/1000000 [3:16:33<27:16:18,  9.48it/s]

{'loss': Array(0.15377437, dtype=float32), 'loss_cross_entropy': Array(0.14552505, dtype=float32)}
{'loss_inverse': Array(5.756402e-06, dtype=float32)}


  7%|▋         | 69249/1000000 [3:16:35<35:11:56,  7.35it/s]

{'loss': Array(0.1559287, dtype=float32), 'loss_cross_entropy': Array(0.14701068, dtype=float32)}
{'loss_inverse': Array(6.4311265e-05, dtype=float32)}


  7%|▋         | 69259/1000000 [3:16:36<26:40:28,  9.69it/s]

{'loss': Array(0.14297646, dtype=float32), 'loss_cross_entropy': Array(0.13412532, dtype=float32)}
{'loss_inverse': Array(5.2720397e-06, dtype=float32)}


  7%|▋         | 69269/1000000 [3:16:38<35:13:42,  7.34it/s]

{'loss': Array(0.1632222, dtype=float32), 'loss_cross_entropy': Array(0.15438604, dtype=float32)}
{'loss_inverse': Array(2.1592146e-05, dtype=float32)}


  7%|▋         | 69279/1000000 [3:16:40<27:46:29,  9.31it/s]

{'loss': Array(0.15124223, dtype=float32), 'loss_cross_entropy': Array(0.14197461, dtype=float32)}
{'loss_inverse': Array(1.5579926e-05, dtype=float32)}


  7%|▋         | 69289/1000000 [3:16:41<39:19:42,  6.57it/s]

{'loss': Array(0.1591889, dtype=float32), 'loss_cross_entropy': Array(0.14959975, dtype=float32)}
{'loss_inverse': Array(1.0179157e-05, dtype=float32)}


  7%|▋         | 69299/1000000 [3:16:43<28:03:19,  9.21it/s]

{'loss': Array(0.15430395, dtype=float32), 'loss_cross_entropy': Array(0.14532232, dtype=float32)}
{'loss_inverse': Array(2.947007e-07, dtype=float32)}


  7%|▋         | 69309/1000000 [3:16:44<25:43:42, 10.05it/s]

{'loss': Array(0.13162193, dtype=float32), 'loss_cross_entropy': Array(0.12372606, dtype=float32)}
{'loss_inverse': Array(7.125367e-07, dtype=float32)}


  7%|▋         | 69319/1000000 [3:16:46<32:35:00,  7.93it/s]

{'loss': Array(0.15014388, dtype=float32), 'loss_cross_entropy': Array(0.14203706, dtype=float32)}
{'loss_inverse': Array(4.0076913e-05, dtype=float32)}


  7%|▋         | 69329/1000000 [3:16:47<26:39:04,  9.70it/s]

{'loss': Array(0.16201891, dtype=float32), 'loss_cross_entropy': Array(0.1530838, dtype=float32)}
{'loss_inverse': Array(5.684424e-06, dtype=float32)}


  7%|▋         | 69339/1000000 [3:16:49<38:34:19,  6.70it/s]

{'loss': Array(0.14428303, dtype=float32), 'loss_cross_entropy': Array(0.13585697, dtype=float32)}
{'loss_inverse': Array(2.24261e-06, dtype=float32)}


  7%|▋         | 69349/1000000 [3:16:51<27:56:09,  9.25it/s]

{'loss': Array(0.13143492, dtype=float32), 'loss_cross_entropy': Array(0.12350811, dtype=float32)}
{'loss_inverse': Array(0.00015498, dtype=float32)}


  7%|▋         | 69359/1000000 [3:16:52<25:58:51,  9.95it/s]

{'loss': Array(0.14836685, dtype=float32), 'loss_cross_entropy': Array(0.1395543, dtype=float32)}
{'loss_inverse': Array(3.5603687e-06, dtype=float32)}


  7%|▋         | 69369/1000000 [3:16:54<29:52:19,  8.65it/s]

{'loss': Array(0.1412326, dtype=float32), 'loss_cross_entropy': Array(0.13282003, dtype=float32)}
{'loss_inverse': Array(4.6669084e-05, dtype=float32)}


  7%|▋         | 69379/1000000 [3:16:55<26:11:02,  9.87it/s]

{'loss': Array(0.14751692, dtype=float32), 'loss_cross_entropy': Array(0.13840169, dtype=float32)}
{'loss_inverse': Array(9.387539e-05, dtype=float32)}


  7%|▋         | 69389/1000000 [3:16:57<30:00:55,  8.61it/s]

{'loss': Array(0.14147256, dtype=float32), 'loss_cross_entropy': Array(0.13283373, dtype=float32)}
{'loss_inverse': Array(0.00038277, dtype=float32)}


  7%|▋         | 69399/1000000 [3:16:59<27:11:38,  9.51it/s]

{'loss': Array(0.13505399, dtype=float32), 'loss_cross_entropy': Array(0.12652534, dtype=float32)}
{'loss_inverse': Array(7.2208604e-05, dtype=float32)}


  7%|▋         | 69409/1000000 [3:17:00<31:55:44,  8.10it/s]

{'loss': Array(0.15678601, dtype=float32), 'loss_cross_entropy': Array(0.1475449, dtype=float32)}
{'loss_inverse': Array(2.1904438e-05, dtype=float32)}


  7%|▋         | 69419/1000000 [3:17:02<26:15:08,  9.85it/s]

{'loss': Array(0.15340857, dtype=float32), 'loss_cross_entropy': Array(0.14446421, dtype=float32)}
{'loss_inverse': Array(6.0242714e-05, dtype=float32)}


  7%|▋         | 69429/1000000 [3:17:03<31:35:03,  8.18it/s]

{'loss': Array(0.15828633, dtype=float32), 'loss_cross_entropy': Array(0.14886864, dtype=float32)}
{'loss_inverse': Array(1.7628709e-05, dtype=float32)}


  7%|▋         | 69439/1000000 [3:17:05<26:20:46,  9.81it/s]

{'loss': Array(0.155668, dtype=float32), 'loss_cross_entropy': Array(0.14737505, dtype=float32)}
{'loss_inverse': Array(0.00011011, dtype=float32)}


  7%|▋         | 69449/1000000 [3:17:06<25:55:32,  9.97it/s]

{'loss': Array(0.15391384, dtype=float32), 'loss_cross_entropy': Array(0.1450687, dtype=float32)}
{'loss_inverse': Array(1.5114631e-06, dtype=float32)}


  7%|▋         | 69459/1000000 [3:17:08<29:07:58,  8.87it/s]

{'loss': Array(0.17530958, dtype=float32), 'loss_cross_entropy': Array(0.16575585, dtype=float32)}
{'loss_inverse': Array(1.6696697e-06, dtype=float32)}


  7%|▋         | 69469/1000000 [3:17:10<26:05:42,  9.91it/s]

{'loss': Array(0.14483535, dtype=float32), 'loss_cross_entropy': Array(0.13656648, dtype=float32)}
{'loss_inverse': Array(0.00014705, dtype=float32)}


  7%|▋         | 69479/1000000 [3:17:11<30:21:37,  8.51it/s]

{'loss': Array(0.14394623, dtype=float32), 'loss_cross_entropy': Array(0.13595091, dtype=float32)}
{'loss_inverse': Array(3.620687e-05, dtype=float32)}


  7%|▋         | 69489/1000000 [3:17:13<26:21:25,  9.81it/s]

{'loss': Array(0.13773645, dtype=float32), 'loss_cross_entropy': Array(0.12976055, dtype=float32)}
{'loss_inverse': Array(3.6503734e-06, dtype=float32)}


  7%|▋         | 69499/1000000 [3:17:15<32:07:37,  8.05it/s]

{'loss': Array(0.14987741, dtype=float32), 'loss_cross_entropy': Array(0.14078784, dtype=float32)}
{'loss_inverse': Array(0.00013075, dtype=float32)}


  7%|▋         | 69509/1000000 [3:17:23<87:40:19,  2.95it/s] 

{'loss': Array(0.14626367, dtype=float32), 'loss_cross_entropy': Array(0.13724966, dtype=float32)}
{'loss_inverse': Array(2.8463384e-05, dtype=float32)}


  7%|▋         | 69519/1000000 [3:17:24<44:34:11,  5.80it/s]

{'loss': Array(0.18492758, dtype=float32), 'loss_cross_entropy': Array(0.1747459, dtype=float32)}
{'loss_inverse': Array(1.8306175e-06, dtype=float32)}


  7%|▋         | 69529/1000000 [3:17:26<28:21:40,  9.11it/s]

{'loss': Array(0.15015219, dtype=float32), 'loss_cross_entropy': Array(0.14169827, dtype=float32)}
{'loss_inverse': Array(5.5445616e-05, dtype=float32)}


  7%|▋         | 69539/1000000 [3:17:27<25:29:16, 10.14it/s]

{'loss': Array(0.1436877, dtype=float32), 'loss_cross_entropy': Array(0.13492347, dtype=float32)}
{'loss_inverse': Array(6.3406005e-06, dtype=float32)}


  7%|▋         | 69549/1000000 [3:17:29<28:34:03,  9.05it/s]

{'loss': Array(0.1296257, dtype=float32), 'loss_cross_entropy': Array(0.12167954, dtype=float32)}
{'loss_inverse': Array(4.863577e-06, dtype=float32)}


  7%|▋         | 69559/1000000 [3:17:30<25:54:11,  9.98it/s]

{'loss': Array(0.15436743, dtype=float32), 'loss_cross_entropy': Array(0.14560243, dtype=float32)}
{'loss_inverse': Array(3.2512385e-06, dtype=float32)}


  7%|▋         | 69569/1000000 [3:17:32<31:54:16,  8.10it/s]

{'loss': Array(0.1314725, dtype=float32), 'loss_cross_entropy': Array(0.12290481, dtype=float32)}
{'loss_inverse': Array(2.2081991e-05, dtype=float32)}


  7%|▋         | 69579/1000000 [3:17:33<26:45:39,  9.66it/s]

{'loss': Array(0.15428251, dtype=float32), 'loss_cross_entropy': Array(0.14558285, dtype=float32)}
{'loss_inverse': Array(1.5795013e-06, dtype=float32)}


  7%|▋         | 69589/1000000 [3:17:35<34:18:38,  7.53it/s]

{'loss': Array(0.14454176, dtype=float32), 'loss_cross_entropy': Array(0.13613202, dtype=float32)}
{'loss_inverse': Array(3.5160698e-05, dtype=float32)}


  7%|▋         | 69599/1000000 [3:17:37<27:08:19,  9.52it/s]

{'loss': Array(0.14780907, dtype=float32), 'loss_cross_entropy': Array(0.13912421, dtype=float32)}
{'loss_inverse': Array(0.00020446, dtype=float32)}


  7%|▋         | 69609/1000000 [3:17:38<39:19:15,  6.57it/s]

{'loss': Array(0.1665783, dtype=float32), 'loss_cross_entropy': Array(0.15746649, dtype=float32)}
{'loss_inverse': Array(5.271622e-06, dtype=float32)}


  7%|▋         | 69619/1000000 [3:17:40<27:42:10,  9.33it/s]

{'loss': Array(0.15919255, dtype=float32), 'loss_cross_entropy': Array(0.15011488, dtype=float32)}
{'loss_inverse': Array(2.8225704e-05, dtype=float32)}


  7%|▋         | 69629/1000000 [3:17:42<38:19:57,  6.74it/s]

{'loss': Array(0.14383405, dtype=float32), 'loss_cross_entropy': Array(0.13544516, dtype=float32)}
{'loss_inverse': Array(4.6334724e-05, dtype=float32)}


  7%|▋         | 69639/1000000 [3:17:43<27:38:58,  9.35it/s]

{'loss': Array(0.15579008, dtype=float32), 'loss_cross_entropy': Array(0.14644572, dtype=float32)}
{'loss_inverse': Array(4.167018e-05, dtype=float32)}


  7%|▋         | 69649/1000000 [3:17:44<25:35:03, 10.10it/s]

{'loss': Array(0.15941243, dtype=float32), 'loss_cross_entropy': Array(0.15072279, dtype=float32)}
{'loss_inverse': Array(2.3603052e-05, dtype=float32)}


  7%|▋         | 69659/1000000 [3:17:46<31:28:17,  8.21it/s]

{'loss': Array(0.13941127, dtype=float32), 'loss_cross_entropy': Array(0.13147777, dtype=float32)}
{'loss_inverse': Array(3.739104e-05, dtype=float32)}


  7%|▋         | 69669/1000000 [3:17:48<27:01:29,  9.56it/s]

{'loss': Array(0.15858473, dtype=float32), 'loss_cross_entropy': Array(0.14936773, dtype=float32)}
{'loss_inverse': Array(6.2942345e-05, dtype=float32)}


  7%|▋         | 69679/1000000 [3:17:49<34:43:30,  7.44it/s]

{'loss': Array(0.16603474, dtype=float32), 'loss_cross_entropy': Array(0.15618876, dtype=float32)}
{'loss_inverse': Array(0.00028297, dtype=float32)}


  7%|▋         | 69689/1000000 [3:17:51<27:08:40,  9.52it/s]

{'loss': Array(0.14540073, dtype=float32), 'loss_cross_entropy': Array(0.13674133, dtype=float32)}
{'loss_inverse': Array(3.5534365e-05, dtype=float32)}


  7%|▋         | 69699/1000000 [3:17:53<34:42:06,  7.45it/s]

{'loss': Array(0.1474544, dtype=float32), 'loss_cross_entropy': Array(0.1388613, dtype=float32)}
{'loss_inverse': Array(0.00020542, dtype=float32)}


  7%|▋         | 69709/1000000 [3:17:54<27:25:12,  9.42it/s]

{'loss': Array(0.14021985, dtype=float32), 'loss_cross_entropy': Array(0.13171844, dtype=float32)}
{'loss_inverse': Array(7.67061e-05, dtype=float32)}


  7%|▋         | 69719/1000000 [3:17:56<38:45:49,  6.67it/s]

{'loss': Array(0.13962488, dtype=float32), 'loss_cross_entropy': Array(0.13130125, dtype=float32)}
{'loss_inverse': Array(1.4263214e-05, dtype=float32)}


  7%|▋         | 69729/1000000 [3:17:57<27:53:50,  9.26it/s]

{'loss': Array(0.14281766, dtype=float32), 'loss_cross_entropy': Array(0.1344999, dtype=float32)}
{'loss_inverse': Array(1.7865418e-07, dtype=float32)}


  7%|▋         | 69739/1000000 [3:17:59<25:28:32, 10.14it/s]

{'loss': Array(0.17874737, dtype=float32), 'loss_cross_entropy': Array(0.1686016, dtype=float32)}
{'loss_inverse': Array(2.1856951e-05, dtype=float32)}


  7%|▋         | 69749/1000000 [3:18:00<31:22:30,  8.24it/s]

{'loss': Array(0.14449878, dtype=float32), 'loss_cross_entropy': Array(0.13600968, dtype=float32)}
{'loss_inverse': Array(4.566104e-05, dtype=float32)}


  7%|▋         | 69759/1000000 [3:18:02<26:55:04,  9.60it/s]

{'loss': Array(0.16238151, dtype=float32), 'loss_cross_entropy': Array(0.15263708, dtype=float32)}
{'loss_inverse': Array(2.2152502e-05, dtype=float32)}


  7%|▋         | 69769/1000000 [3:18:04<38:23:38,  6.73it/s]

{'loss': Array(0.1503294, dtype=float32), 'loss_cross_entropy': Array(0.14187606, dtype=float32)}
{'loss_inverse': Array(1.6257558e-05, dtype=float32)}


  7%|▋         | 69779/1000000 [3:18:05<27:39:44,  9.34it/s]

{'loss': Array(0.14603047, dtype=float32), 'loss_cross_entropy': Array(0.13696758, dtype=float32)}
{'loss_inverse': Array(1.4494614e-06, dtype=float32)}


  7%|▋         | 69789/1000000 [3:18:06<26:23:01,  9.79it/s]

{'loss': Array(0.13558133, dtype=float32), 'loss_cross_entropy': Array(0.12775908, dtype=float32)}
{'loss_inverse': Array(5.082693e-06, dtype=float32)}


  7%|▋         | 69799/1000000 [3:18:08<29:21:56,  8.80it/s]

{'loss': Array(0.15352605, dtype=float32), 'loss_cross_entropy': Array(0.1450219, dtype=float32)}
{'loss_inverse': Array(5.6938217e-05, dtype=float32)}


  7%|▋         | 69809/1000000 [3:18:10<25:45:44, 10.03it/s]

{'loss': Array(0.14810573, dtype=float32), 'loss_cross_entropy': Array(0.1392444, dtype=float32)}
{'loss_inverse': Array(5.825307e-05, dtype=float32)}


  7%|▋         | 69819/1000000 [3:18:11<29:25:13,  8.78it/s]

{'loss': Array(0.14667378, dtype=float32), 'loss_cross_entropy': Array(0.13833325, dtype=float32)}
{'loss_inverse': Array(8.864662e-06, dtype=float32)}


  7%|▋         | 69829/1000000 [3:18:13<26:42:14,  9.68it/s]

{'loss': Array(0.14752658, dtype=float32), 'loss_cross_entropy': Array(0.13881247, dtype=float32)}
{'loss_inverse': Array(1.1311048e-05, dtype=float32)}


  7%|▋         | 69839/1000000 [3:18:15<32:13:10,  8.02it/s]

{'loss': Array(0.1493409, dtype=float32), 'loss_cross_entropy': Array(0.14046311, dtype=float32)}
{'loss_inverse': Array(0.00018776, dtype=float32)}


  7%|▋         | 69849/1000000 [3:18:16<26:50:35,  9.63it/s]

{'loss': Array(0.15179153, dtype=float32), 'loss_cross_entropy': Array(0.14359891, dtype=float32)}
{'loss_inverse': Array(0.0001356, dtype=float32)}


  7%|▋         | 69859/1000000 [3:18:18<32:42:47,  7.90it/s]

{'loss': Array(0.15631698, dtype=float32), 'loss_cross_entropy': Array(0.14745438, dtype=float32)}
{'loss_inverse': Array(2.8671695e-06, dtype=float32)}


  7%|▋         | 69869/1000000 [3:18:19<27:36:48,  9.36it/s]

{'loss': Array(0.14888647, dtype=float32), 'loss_cross_entropy': Array(0.1404923, dtype=float32)}
{'loss_inverse': Array(1.0642234e-05, dtype=float32)}


  7%|▋         | 69879/1000000 [3:18:21<25:38:55, 10.07it/s]

{'loss': Array(0.14211492, dtype=float32), 'loss_cross_entropy': Array(0.13312505, dtype=float32)}
{'loss_inverse': Array(3.2109172e-05, dtype=float32)}


  7%|▋         | 69889/1000000 [3:18:22<28:05:29,  9.20it/s]

{'loss': Array(0.1536009, dtype=float32), 'loss_cross_entropy': Array(0.1445937, dtype=float32)}
{'loss_inverse': Array(8.8573004e-05, dtype=float32)}


  7%|▋         | 69899/1000000 [3:18:24<25:55:00,  9.97it/s]

{'loss': Array(0.13136944, dtype=float32), 'loss_cross_entropy': Array(0.12328024, dtype=float32)}
{'loss_inverse': Array(1.4528176e-05, dtype=float32)}


  7%|▋         | 69909/1000000 [3:18:26<29:45:50,  8.68it/s]

{'loss': Array(0.15830046, dtype=float32), 'loss_cross_entropy': Array(0.14968818, dtype=float32)}
{'loss_inverse': Array(5.6863595e-05, dtype=float32)}


  7%|▋         | 69919/1000000 [3:18:27<25:50:03, 10.00it/s]

{'loss': Array(0.15814222, dtype=float32), 'loss_cross_entropy': Array(0.1484453, dtype=float32)}
{'loss_inverse': Array(6.1610585e-06, dtype=float32)}


  7%|▋         | 69929/1000000 [3:18:29<32:28:32,  7.96it/s]

{'loss': Array(0.16471808, dtype=float32), 'loss_cross_entropy': Array(0.154925, dtype=float32)}
{'loss_inverse': Array(2.5991294e-06, dtype=float32)}


  7%|▋         | 69939/1000000 [3:18:30<26:39:29,  9.69it/s]

{'loss': Array(0.15926497, dtype=float32), 'loss_cross_entropy': Array(0.15012407, dtype=float32)}
{'loss_inverse': Array(2.29609e-06, dtype=float32)}


  7%|▋         | 69949/1000000 [3:18:32<25:48:16, 10.01it/s]

{'loss': Array(0.17325579, dtype=float32), 'loss_cross_entropy': Array(0.1636416, dtype=float32)}
{'loss_inverse': Array(4.2528976e-05, dtype=float32)}


  7%|▋         | 69959/1000000 [3:18:33<29:51:42,  8.65it/s]

{'loss': Array(0.14564128, dtype=float32), 'loss_cross_entropy': Array(0.13625748, dtype=float32)}
{'loss_inverse': Array(2.437515e-06, dtype=float32)}


  7%|▋         | 69969/1000000 [3:18:35<26:10:00,  9.87it/s]

{'loss': Array(0.17316422, dtype=float32), 'loss_cross_entropy': Array(0.16344714, dtype=float32)}
{'loss_inverse': Array(4.4491706e-05, dtype=float32)}


  7%|▋         | 69979/1000000 [3:18:37<34:44:46,  7.44it/s]

{'loss': Array(0.15163223, dtype=float32), 'loss_cross_entropy': Array(0.14257208, dtype=float32)}
{'loss_inverse': Array(3.3732245e-05, dtype=float32)}


  7%|▋         | 69989/1000000 [3:18:38<27:04:53,  9.54it/s]

{'loss': Array(0.14896181, dtype=float32), 'loss_cross_entropy': Array(0.14004548, dtype=float32)}
{'loss_inverse': Array(4.3172997e-05, dtype=float32)}


  7%|▋         | 69999/1000000 [3:18:40<38:23:24,  6.73it/s]

{'loss': Array(0.15885368, dtype=float32), 'loss_cross_entropy': Array(0.1502447, dtype=float32)}
{'loss_inverse': Array(1.0972223e-07, dtype=float32)}


  7%|▋         | 70009/1000000 [3:18:48<90:44:40,  2.85it/s] 

{'loss': Array(0.14330393, dtype=float32), 'loss_cross_entropy': Array(0.13526765, dtype=float32)}
{'loss_inverse': Array(7.8909296e-05, dtype=float32)}


  7%|▋         | 70019/1000000 [3:18:50<36:31:28,  7.07it/s]

{'loss': Array(0.13850838, dtype=float32), 'loss_cross_entropy': Array(0.13017549, dtype=float32)}
{'loss_inverse': Array(0.00020088, dtype=float32)}


  7%|▋         | 70029/1000000 [3:18:51<31:28:09,  8.21it/s]

{'loss': Array(0.16571923, dtype=float32), 'loss_cross_entropy': Array(0.15601158, dtype=float32)}
{'loss_inverse': Array(1.825628e-06, dtype=float32)}


  7%|▋         | 70039/1000000 [3:18:53<26:42:44,  9.67it/s]

{'loss': Array(0.16499095, dtype=float32), 'loss_cross_entropy': Array(0.15620981, dtype=float32)}
{'loss_inverse': Array(3.8862495e-06, dtype=float32)}


  7%|▋         | 70049/1000000 [3:18:55<30:49:00,  8.38it/s]

{'loss': Array(0.14904775, dtype=float32), 'loss_cross_entropy': Array(0.13936198, dtype=float32)}
{'loss_inverse': Array(1.8282402e-06, dtype=float32)}


  7%|▋         | 70059/1000000 [3:18:56<27:01:46,  9.56it/s]

{'loss': Array(0.13039328, dtype=float32), 'loss_cross_entropy': Array(0.12271743, dtype=float32)}
{'loss_inverse': Array(0.00020721, dtype=float32)}


  7%|▋         | 70069/1000000 [3:18:58<35:16:40,  7.32it/s]

{'loss': Array(0.14773403, dtype=float32), 'loss_cross_entropy': Array(0.13855053, dtype=float32)}
{'loss_inverse': Array(6.805317e-05, dtype=float32)}


  7%|▋         | 70079/1000000 [3:18:59<27:14:14,  9.48it/s]

{'loss': Array(0.12896861, dtype=float32), 'loss_cross_entropy': Array(0.12151601, dtype=float32)}
{'loss_inverse': Array(1.7535864e-05, dtype=float32)}


  7%|▋         | 70089/1000000 [3:19:01<34:54:44,  7.40it/s]

{'loss': Array(0.16214915, dtype=float32), 'loss_cross_entropy': Array(0.15266944, dtype=float32)}
{'loss_inverse': Array(9.4038565e-05, dtype=float32)}


  7%|▋         | 70099/1000000 [3:19:02<27:15:38,  9.48it/s]

{'loss': Array(0.1575058, dtype=float32), 'loss_cross_entropy': Array(0.1486451, dtype=float32)}
{'loss_inverse': Array(0.00016133, dtype=float32)}


  7%|▋         | 70109/1000000 [3:19:04<38:32:17,  6.70it/s]

{'loss': Array(0.15156117, dtype=float32), 'loss_cross_entropy': Array(0.14266498, dtype=float32)}
{'loss_inverse': Array(7.984044e-06, dtype=float32)}


  7%|▋         | 70119/1000000 [3:19:06<27:38:16,  9.35it/s]

{'loss': Array(0.15284859, dtype=float32), 'loss_cross_entropy': Array(0.14409614, dtype=float32)}
{'loss_inverse': Array(1.1436948e-05, dtype=float32)}


  7%|▋         | 70129/1000000 [3:19:07<25:49:08, 10.00it/s]

{'loss': Array(0.1534946, dtype=float32), 'loss_cross_entropy': Array(0.14454414, dtype=float32)}
{'loss_inverse': Array(1.9757672e-06, dtype=float32)}


  7%|▋         | 70139/1000000 [3:19:09<31:37:41,  8.17it/s]

{'loss': Array(0.16327408, dtype=float32), 'loss_cross_entropy': Array(0.15337543, dtype=float32)}
{'loss_inverse': Array(0.00029187, dtype=float32)}


  7%|▋         | 70149/1000000 [3:19:10<26:07:58,  9.88it/s]

{'loss': Array(0.15123962, dtype=float32), 'loss_cross_entropy': Array(0.14199616, dtype=float32)}
{'loss_inverse': Array(9.923649e-05, dtype=float32)}


  7%|▋         | 70159/1000000 [3:19:12<38:24:36,  6.72it/s]

{'loss': Array(0.14890656, dtype=float32), 'loss_cross_entropy': Array(0.13946007, dtype=float32)}
{'loss_inverse': Array(6.405519e-05, dtype=float32)}


  7%|▋         | 70169/1000000 [3:19:13<27:36:46,  9.35it/s]

{'loss': Array(0.14927995, dtype=float32), 'loss_cross_entropy': Array(0.13989267, dtype=float32)}
{'loss_inverse': Array(0.00019529, dtype=float32)}


  7%|▋         | 70179/1000000 [3:19:15<25:44:56, 10.03it/s]

{'loss': Array(0.1498831, dtype=float32), 'loss_cross_entropy': Array(0.14125995, dtype=float32)}
{'loss_inverse': Array(1.9194063e-06, dtype=float32)}


  7%|▋         | 70189/1000000 [3:19:17<29:49:09,  8.66it/s]

{'loss': Array(0.15448943, dtype=float32), 'loss_cross_entropy': Array(0.14578235, dtype=float32)}
{'loss_inverse': Array(1.4962199e-05, dtype=float32)}


  7%|▋         | 70199/1000000 [3:19:18<25:54:47,  9.97it/s]

{'loss': Array(0.13819511, dtype=float32), 'loss_cross_entropy': Array(0.12885147, dtype=float32)}
{'loss_inverse': Array(2.7128024e-06, dtype=float32)}


  7%|▋         | 70209/1000000 [3:19:20<29:53:56,  8.64it/s]

{'loss': Array(0.13606627, dtype=float32), 'loss_cross_entropy': Array(0.12820975, dtype=float32)}
{'loss_inverse': Array(1.6071328e-05, dtype=float32)}


  7%|▋         | 70219/1000000 [3:19:21<26:05:44,  9.90it/s]

{'loss': Array(0.14210482, dtype=float32), 'loss_cross_entropy': Array(0.13364106, dtype=float32)}
{'loss_inverse': Array(4.7445672e-05, dtype=float32)}


  7%|▋         | 70229/1000000 [3:19:23<31:41:39,  8.15it/s]

{'loss': Array(0.15691303, dtype=float32), 'loss_cross_entropy': Array(0.14766364, dtype=float32)}
{'loss_inverse': Array(9.842339e-05, dtype=float32)}


  7%|▋         | 70239/1000000 [3:19:24<26:24:58,  9.78it/s]

{'loss': Array(0.14628997, dtype=float32), 'loss_cross_entropy': Array(0.13768463, dtype=float32)}
{'loss_inverse': Array(1.931258e-06, dtype=float32)}


  7%|▋         | 70249/1000000 [3:19:26<31:30:38,  8.20it/s]

{'loss': Array(0.14383055, dtype=float32), 'loss_cross_entropy': Array(0.13512169, dtype=float32)}
{'loss_inverse': Array(4.2354874e-05, dtype=float32)}


  7%|▋         | 70259/1000000 [3:19:27<26:21:46,  9.80it/s]

{'loss': Array(0.16701767, dtype=float32), 'loss_cross_entropy': Array(0.15735476, dtype=float32)}
{'loss_inverse': Array(0.00013205, dtype=float32)}


  7%|▋         | 70269/1000000 [3:19:29<25:51:20,  9.99it/s]

{'loss': Array(0.15546116, dtype=float32), 'loss_cross_entropy': Array(0.1461191, dtype=float32)}
{'loss_inverse': Array(7.368154e-05, dtype=float32)}


  7%|▋         | 70279/1000000 [3:19:31<28:49:21,  8.96it/s]

{'loss': Array(0.1209827, dtype=float32), 'loss_cross_entropy': Array(0.11356839, dtype=float32)}
{'loss_inverse': Array(5.5952346e-06, dtype=float32)}


  7%|▋         | 70289/1000000 [3:19:32<26:18:41,  9.82it/s]

{'loss': Array(0.14948802, dtype=float32), 'loss_cross_entropy': Array(0.1407743, dtype=float32)}
{'loss_inverse': Array(2.3103126e-05, dtype=float32)}


  7%|▋         | 70299/1000000 [3:19:34<30:03:08,  8.59it/s]

{'loss': Array(0.14311595, dtype=float32), 'loss_cross_entropy': Array(0.1340921, dtype=float32)}
{'loss_inverse': Array(2.0767948e-06, dtype=float32)}


  7%|▋         | 70309/1000000 [3:19:35<26:28:05,  9.76it/s]

{'loss': Array(0.15651846, dtype=float32), 'loss_cross_entropy': Array(0.14752598, dtype=float32)}
{'loss_inverse': Array(9.405766e-06, dtype=float32)}


  7%|▋         | 70319/1000000 [3:19:37<31:53:20,  8.10it/s]

{'loss': Array(0.14136033, dtype=float32), 'loss_cross_entropy': Array(0.13275866, dtype=float32)}
{'loss_inverse': Array(5.409905e-06, dtype=float32)}


  7%|▋         | 70329/1000000 [3:19:38<26:26:06,  9.77it/s]

{'loss': Array(0.13985217, dtype=float32), 'loss_cross_entropy': Array(0.13170888, dtype=float32)}
{'loss_inverse': Array(2.0775038e-05, dtype=float32)}


  7%|▋         | 70339/1000000 [3:19:40<25:22:14, 10.18it/s]

{'loss': Array(0.15015136, dtype=float32), 'loss_cross_entropy': Array(0.14191307, dtype=float32)}
{'loss_inverse': Array(0.00021956, dtype=float32)}


  7%|▋         | 70349/1000000 [3:19:42<30:00:39,  8.60it/s]

{'loss': Array(0.13724677, dtype=float32), 'loss_cross_entropy': Array(0.12950967, dtype=float32)}
{'loss_inverse': Array(4.3458975e-05, dtype=float32)}


  7%|▋         | 70359/1000000 [3:19:43<26:08:29,  9.88it/s]

{'loss': Array(0.14968066, dtype=float32), 'loss_cross_entropy': Array(0.14100306, dtype=float32)}
{'loss_inverse': Array(6.288029e-05, dtype=float32)}


  7%|▋         | 70369/1000000 [3:19:45<34:35:26,  7.47it/s]

{'loss': Array(0.14122261, dtype=float32), 'loss_cross_entropy': Array(0.13234971, dtype=float32)}
{'loss_inverse': Array(6.1185974e-06, dtype=float32)}


  7%|▋         | 70379/1000000 [3:19:46<26:43:06,  9.66it/s]

{'loss': Array(0.15411228, dtype=float32), 'loss_cross_entropy': Array(0.14511757, dtype=float32)}
{'loss_inverse': Array(0.00048312, dtype=float32)}


  7%|▋         | 70389/1000000 [3:19:48<38:24:13,  6.72it/s]

{'loss': Array(0.15344393, dtype=float32), 'loss_cross_entropy': Array(0.14449227, dtype=float32)}
{'loss_inverse': Array(9.2698965e-06, dtype=float32)}


  7%|▋         | 70399/1000000 [3:19:49<27:17:36,  9.46it/s]

{'loss': Array(0.1593774, dtype=float32), 'loss_cross_entropy': Array(0.15059279, dtype=float32)}
{'loss_inverse': Array(0.00030328, dtype=float32)}


  7%|▋         | 70409/1000000 [3:19:51<38:02:44,  6.79it/s]

{'loss': Array(0.14118245, dtype=float32), 'loss_cross_entropy': Array(0.13272014, dtype=float32)}
{'loss_inverse': Array(0.00043012, dtype=float32)}


  7%|▋         | 70419/1000000 [3:19:53<27:37:02,  9.35it/s]

{'loss': Array(0.15430094, dtype=float32), 'loss_cross_entropy': Array(0.14466779, dtype=float32)}
{'loss_inverse': Array(2.2210856e-05, dtype=float32)}


  7%|▋         | 70429/1000000 [3:19:54<25:45:16, 10.03it/s]

{'loss': Array(0.14250295, dtype=float32), 'loss_cross_entropy': Array(0.13358234, dtype=float32)}
{'loss_inverse': Array(2.6731412e-05, dtype=float32)}


  7%|▋         | 70439/1000000 [3:19:56<34:10:43,  7.55it/s]

{'loss': Array(0.13521883, dtype=float32), 'loss_cross_entropy': Array(0.12698285, dtype=float32)}
{'loss_inverse': Array(7.4279524e-05, dtype=float32)}


  7%|▋         | 70449/1000000 [3:19:57<27:06:49,  9.52it/s]

{'loss': Array(0.15006627, dtype=float32), 'loss_cross_entropy': Array(0.14211567, dtype=float32)}
{'loss_inverse': Array(8.330409e-05, dtype=float32)}


  7%|▋         | 70459/1000000 [3:19:59<38:29:19,  6.71it/s]

{'loss': Array(0.15337005, dtype=float32), 'loss_cross_entropy': Array(0.14406054, dtype=float32)}
{'loss_inverse': Array(1.2975981e-05, dtype=float32)}


  7%|▋         | 70469/1000000 [3:20:00<27:36:12,  9.35it/s]

{'loss': Array(0.14230676, dtype=float32), 'loss_cross_entropy': Array(0.1343299, dtype=float32)}
{'loss_inverse': Array(0.00028388, dtype=float32)}


  7%|▋         | 70479/1000000 [3:20:02<25:40:11, 10.06it/s]

{'loss': Array(0.13485253, dtype=float32), 'loss_cross_entropy': Array(0.1265084, dtype=float32)}
{'loss_inverse': Array(0.00024242, dtype=float32)}


  7%|▋         | 70489/1000000 [3:20:04<28:43:36,  8.99it/s]

{'loss': Array(0.13260645, dtype=float32), 'loss_cross_entropy': Array(0.12441587, dtype=float32)}
{'loss_inverse': Array(1.5269914e-05, dtype=float32)}


  7%|▋         | 70499/1000000 [3:20:05<25:54:21,  9.97it/s]

{'loss': Array(0.13985299, dtype=float32), 'loss_cross_entropy': Array(0.13190858, dtype=float32)}
{'loss_inverse': Array(3.3012282e-06, dtype=float32)}


  7%|▋         | 70509/1000000 [3:20:13<94:51:18,  2.72it/s] 

{'loss': Array(0.13242528, dtype=float32), 'loss_cross_entropy': Array(0.12380411, dtype=float32)}
{'loss_inverse': Array(4.8629812e-05, dtype=float32)}


  7%|▋         | 70519/1000000 [3:20:15<37:11:21,  6.94it/s] 

{'loss': Array(0.15614119, dtype=float32), 'loss_cross_entropy': Array(0.14638184, dtype=float32)}
{'loss_inverse': Array(0.00016169, dtype=float32)}


  7%|▋         | 70529/1000000 [3:20:16<27:07:14,  9.52it/s]

{'loss': Array(0.15005413, dtype=float32), 'loss_cross_entropy': Array(0.14127976, dtype=float32)}
{'loss_inverse': Array(1.5265055e-05, dtype=float32)}


  7%|▋         | 70539/1000000 [3:20:18<28:53:32,  8.94it/s]

{'loss': Array(0.14720316, dtype=float32), 'loss_cross_entropy': Array(0.13906832, dtype=float32)}
{'loss_inverse': Array(0.00024964, dtype=float32)}


  7%|▋         | 70549/1000000 [3:20:19<25:52:23,  9.98it/s]

{'loss': Array(0.15103717, dtype=float32), 'loss_cross_entropy': Array(0.14231801, dtype=float32)}
{'loss_inverse': Array(0.00020828, dtype=float32)}


  7%|▋         | 70559/1000000 [3:20:21<29:42:10,  8.69it/s]

{'loss': Array(0.14656024, dtype=float32), 'loss_cross_entropy': Array(0.13780746, dtype=float32)}
{'loss_inverse': Array(2.1426051e-06, dtype=float32)}


  7%|▋         | 70569/1000000 [3:20:22<26:15:49,  9.83it/s]

{'loss': Array(0.16898651, dtype=float32), 'loss_cross_entropy': Array(0.1591079, dtype=float32)}
{'loss_inverse': Array(0.00084874, dtype=float32)}


  7%|▋         | 70579/1000000 [3:20:24<32:37:02,  7.92it/s]

{'loss': Array(0.17008166, dtype=float32), 'loss_cross_entropy': Array(0.1604311, dtype=float32)}
{'loss_inverse': Array(1.2011492e-05, dtype=float32)}


  7%|▋         | 70589/1000000 [3:20:26<26:23:17,  9.78it/s]

{'loss': Array(0.14000773, dtype=float32), 'loss_cross_entropy': Array(0.13216019, dtype=float32)}
{'loss_inverse': Array(3.8957855e-06, dtype=float32)}


  7%|▋         | 70599/1000000 [3:20:27<31:57:27,  8.08it/s]

{'loss': Array(0.1572861, dtype=float32), 'loss_cross_entropy': Array(0.14832427, dtype=float32)}
{'loss_inverse': Array(4.1256204e-05, dtype=float32)}


  7%|▋         | 70609/1000000 [3:20:29<26:42:48,  9.66it/s]

{'loss': Array(0.15491909, dtype=float32), 'loss_cross_entropy': Array(0.14657484, dtype=float32)}
{'loss_inverse': Array(6.8804127e-07, dtype=float32)}


  7%|▋         | 70619/1000000 [3:20:30<26:05:15,  9.90it/s]

{'loss': Array(0.14674948, dtype=float32), 'loss_cross_entropy': Array(0.13825975, dtype=float32)}
{'loss_inverse': Array(4.353516e-06, dtype=float32)}


  7%|▋         | 70629/1000000 [3:20:32<28:21:38,  9.10it/s]

{'loss': Array(0.14633006, dtype=float32), 'loss_cross_entropy': Array(0.13727781, dtype=float32)}
{'loss_inverse': Array(1.4478955e-05, dtype=float32)}


  7%|▋         | 70639/1000000 [3:20:33<25:37:54, 10.07it/s]

{'loss': Array(0.15869725, dtype=float32), 'loss_cross_entropy': Array(0.14965181, dtype=float32)}
{'loss_inverse': Array(2.2177512e-06, dtype=float32)}


  7%|▋         | 70649/1000000 [3:20:35<30:03:16,  8.59it/s]

{'loss': Array(0.16203848, dtype=float32), 'loss_cross_entropy': Array(0.15296589, dtype=float32)}
{'loss_inverse': Array(2.812363e-06, dtype=float32)}


  7%|▋         | 70659/1000000 [3:20:37<26:00:08,  9.93it/s]

{'loss': Array(0.1540827, dtype=float32), 'loss_cross_entropy': Array(0.14522675, dtype=float32)}
{'loss_inverse': Array(8.1008315e-05, dtype=float32)}


  7%|▋         | 70669/1000000 [3:20:38<31:36:04,  8.17it/s]

{'loss': Array(0.12167003, dtype=float32), 'loss_cross_entropy': Array(0.11409005, dtype=float32)}
{'loss_inverse': Array(1.0486626e-06, dtype=float32)}


  7%|▋         | 70679/1000000 [3:20:40<26:53:59,  9.60it/s]

{'loss': Array(0.14203155, dtype=float32), 'loss_cross_entropy': Array(0.13336214, dtype=float32)}
{'loss_inverse': Array(1.326769e-06, dtype=float32)}


  7%|▋         | 70689/1000000 [3:20:41<26:19:31,  9.81it/s]

{'loss': Array(0.14132456, dtype=float32), 'loss_cross_entropy': Array(0.13250373, dtype=float32)}
{'loss_inverse': Array(4.6680998e-05, dtype=float32)}


  7%|▋         | 70699/1000000 [3:20:43<30:28:43,  8.47it/s]

{'loss': Array(0.14939924, dtype=float32), 'loss_cross_entropy': Array(0.14100537, dtype=float32)}
{'loss_inverse': Array(2.8655118e-06, dtype=float32)}


  7%|▋         | 70709/1000000 [3:20:44<25:46:25, 10.02it/s]

{'loss': Array(0.14960815, dtype=float32), 'loss_cross_entropy': Array(0.1403852, dtype=float32)}
{'loss_inverse': Array(1.5079546e-05, dtype=float32)}


  7%|▋         | 70719/1000000 [3:20:46<33:54:19,  7.61it/s]

{'loss': Array(0.13744693, dtype=float32), 'loss_cross_entropy': Array(0.12881573, dtype=float32)}
{'loss_inverse': Array(0.00011325, dtype=float32)}


  7%|▋         | 70729/1000000 [3:20:48<27:17:23,  9.46it/s]

{'loss': Array(0.1350255, dtype=float32), 'loss_cross_entropy': Array(0.126501, dtype=float32)}
{'loss_inverse': Array(0.00012267, dtype=float32)}


  7%|▋         | 70739/1000000 [3:20:49<38:27:20,  6.71it/s]

{'loss': Array(0.15061925, dtype=float32), 'loss_cross_entropy': Array(0.14188756, dtype=float32)}
{'loss_inverse': Array(2.4754044e-07, dtype=float32)}


  7%|▋         | 70749/1000000 [3:20:51<27:40:59,  9.32it/s]

{'loss': Array(0.15604876, dtype=float32), 'loss_cross_entropy': Array(0.14768204, dtype=float32)}
{'loss_inverse': Array(1.2454491e-05, dtype=float32)}


  7%|▋         | 70759/1000000 [3:20:53<38:15:59,  6.75it/s]

{'loss': Array(0.16093045, dtype=float32), 'loss_cross_entropy': Array(0.15116848, dtype=float32)}
{'loss_inverse': Array(6.573956e-05, dtype=float32)}


  7%|▋         | 70769/1000000 [3:20:54<27:39:42,  9.33it/s]

{'loss': Array(0.1489868, dtype=float32), 'loss_cross_entropy': Array(0.14100705, dtype=float32)}
{'loss_inverse': Array(6.8601294e-06, dtype=float32)}


  7%|▋         | 70779/1000000 [3:20:55<25:33:35, 10.10it/s]

{'loss': Array(0.14588463, dtype=float32), 'loss_cross_entropy': Array(0.13700294, dtype=float32)}
{'loss_inverse': Array(3.9594586e-05, dtype=float32)}


  7%|▋         | 70789/1000000 [3:20:57<34:41:05,  7.44it/s]

{'loss': Array(0.1521676, dtype=float32), 'loss_cross_entropy': Array(0.14327817, dtype=float32)}
{'loss_inverse': Array(1.5092477e-06, dtype=float32)}


  7%|▋         | 70799/1000000 [3:20:59<26:42:02,  9.67it/s]

{'loss': Array(0.13674013, dtype=float32), 'loss_cross_entropy': Array(0.12842123, dtype=float32)}
{'loss_inverse': Array(4.4108492e-07, dtype=float32)}


  7%|▋         | 70809/1000000 [3:21:00<38:11:46,  6.76it/s]

{'loss': Array(0.14447527, dtype=float32), 'loss_cross_entropy': Array(0.13551834, dtype=float32)}
{'loss_inverse': Array(8.344938e-05, dtype=float32)}


  7%|▋         | 70819/1000000 [3:21:02<27:56:54,  9.24it/s]

{'loss': Array(0.14721401, dtype=float32), 'loss_cross_entropy': Array(0.13899714, dtype=float32)}
{'loss_inverse': Array(6.6591224e-06, dtype=float32)}


  7%|▋         | 70829/1000000 [3:21:03<25:31:12, 10.11it/s]

{'loss': Array(0.17033821, dtype=float32), 'loss_cross_entropy': Array(0.16079275, dtype=float32)}
{'loss_inverse': Array(4.5665405e-05, dtype=float32)}


  7%|▋         | 70839/1000000 [3:21:05<28:15:09,  9.14it/s]

{'loss': Array(0.14574747, dtype=float32), 'loss_cross_entropy': Array(0.13701952, dtype=float32)}
{'loss_inverse': Array(0.00019148, dtype=float32)}


  7%|▋         | 70849/1000000 [3:21:06<26:04:05,  9.90it/s]

{'loss': Array(0.14954385, dtype=float32), 'loss_cross_entropy': Array(0.14045401, dtype=float32)}
{'loss_inverse': Array(7.126569e-05, dtype=float32)}


  7%|▋         | 70859/1000000 [3:21:08<34:26:16,  7.49it/s]

{'loss': Array(0.13943449, dtype=float32), 'loss_cross_entropy': Array(0.1310557, dtype=float32)}
{'loss_inverse': Array(1.8596347e-05, dtype=float32)}


  7%|▋         | 70869/1000000 [3:21:10<26:42:58,  9.66it/s]

{'loss': Array(0.15305142, dtype=float32), 'loss_cross_entropy': Array(0.14437298, dtype=float32)}
{'loss_inverse': Array(3.5821047e-06, dtype=float32)}


  7%|▋         | 70879/1000000 [3:21:11<25:41:58, 10.04it/s]

{'loss': Array(0.13935885, dtype=float32), 'loss_cross_entropy': Array(0.13114409, dtype=float32)}
{'loss_inverse': Array(1.0558687e-07, dtype=float32)}


  7%|▋         | 70889/1000000 [3:21:13<29:51:26,  8.64it/s]

{'loss': Array(0.16319989, dtype=float32), 'loss_cross_entropy': Array(0.15403365, dtype=float32)}
{'loss_inverse': Array(1.4775592e-05, dtype=float32)}


  7%|▋         | 70899/1000000 [3:21:14<25:48:09, 10.00it/s]

{'loss': Array(0.16751224, dtype=float32), 'loss_cross_entropy': Array(0.15795603, dtype=float32)}
{'loss_inverse': Array(3.489475e-05, dtype=float32)}


  7%|▋         | 70909/1000000 [3:21:16<31:13:34,  8.26it/s]

{'loss': Array(0.14124684, dtype=float32), 'loss_cross_entropy': Array(0.13271733, dtype=float32)}
{'loss_inverse': Array(4.016314e-05, dtype=float32)}


  7%|▋         | 70919/1000000 [3:21:17<26:30:47,  9.73it/s]

{'loss': Array(0.13685986, dtype=float32), 'loss_cross_entropy': Array(0.12935594, dtype=float32)}
{'loss_inverse': Array(7.895149e-05, dtype=float32)}


  7%|▋         | 70929/1000000 [3:21:19<32:15:26,  8.00it/s]

{'loss': Array(0.15234376, dtype=float32), 'loss_cross_entropy': Array(0.1438648, dtype=float32)}
{'loss_inverse': Array(6.555039e-07, dtype=float32)}


  7%|▋         | 70939/1000000 [3:21:20<26:16:46,  9.82it/s]

{'loss': Array(0.14011246, dtype=float32), 'loss_cross_entropy': Array(0.13180989, dtype=float32)}
{'loss_inverse': Array(9.908381e-05, dtype=float32)}


  7%|▋         | 70949/1000000 [3:21:22<31:27:24,  8.20it/s]

{'loss': Array(0.1676627, dtype=float32), 'loss_cross_entropy': Array(0.15840548, dtype=float32)}
{'loss_inverse': Array(3.3459267e-07, dtype=float32)}


  7%|▋         | 70959/1000000 [3:21:24<26:14:41,  9.83it/s]

{'loss': Array(0.15949216, dtype=float32), 'loss_cross_entropy': Array(0.14971094, dtype=float32)}
{'loss_inverse': Array(9.416962e-06, dtype=float32)}


  7%|▋         | 70969/1000000 [3:21:25<35:00:41,  7.37it/s]

{'loss': Array(0.14889021, dtype=float32), 'loss_cross_entropy': Array(0.1397901, dtype=float32)}
{'loss_inverse': Array(0.00012959, dtype=float32)}


  7%|▋         | 70979/1000000 [3:21:27<26:46:50,  9.64it/s]

{'loss': Array(0.14358893, dtype=float32), 'loss_cross_entropy': Array(0.13514124, dtype=float32)}
{'loss_inverse': Array(0.00014434, dtype=float32)}


  7%|▋         | 70989/1000000 [3:21:29<38:10:17,  6.76it/s]

{'loss': Array(0.14520736, dtype=float32), 'loss_cross_entropy': Array(0.13673931, dtype=float32)}
{'loss_inverse': Array(2.1433994e-05, dtype=float32)}


  7%|▋         | 70999/1000000 [3:21:30<27:37:19,  9.34it/s]

{'loss': Array(0.15582137, dtype=float32), 'loss_cross_entropy': Array(0.14668258, dtype=float32)}
{'loss_inverse': Array(8.570655e-07, dtype=float32)}


  7%|▋         | 71009/1000000 [3:21:38<99:32:40,  2.59it/s] 

{'loss': Array(0.14371489, dtype=float32), 'loss_cross_entropy': Array(0.1355872, dtype=float32)}
{'loss_inverse': Array(3.38322e-06, dtype=float32)}


  7%|▋         | 71019/1000000 [3:21:40<37:50:24,  6.82it/s] 

{'loss': Array(0.14028366, dtype=float32), 'loss_cross_entropy': Array(0.13194981, dtype=float32)}
{'loss_inverse': Array(1.0732073e-06, dtype=float32)}


  7%|▋         | 71029/1000000 [3:21:41<27:22:53,  9.42it/s]

{'loss': Array(0.15396576, dtype=float32), 'loss_cross_entropy': Array(0.14558253, dtype=float32)}
{'loss_inverse': Array(1.5790105e-06, dtype=float32)}


  7%|▋         | 71039/1000000 [3:21:43<32:07:16,  8.03it/s]

{'loss': Array(0.15750465, dtype=float32), 'loss_cross_entropy': Array(0.14753929, dtype=float32)}
{'loss_inverse': Array(3.9655847e-06, dtype=float32)}


  7%|▋         | 71049/1000000 [3:21:44<26:02:14,  9.91it/s]

{'loss': Array(0.14699791, dtype=float32), 'loss_cross_entropy': Array(0.13853057, dtype=float32)}
{'loss_inverse': Array(1.2837416e-05, dtype=float32)}


  7%|▋         | 71059/1000000 [3:21:46<38:03:37,  6.78it/s]

{'loss': Array(0.16865854, dtype=float32), 'loss_cross_entropy': Array(0.15869613, dtype=float32)}
{'loss_inverse': Array(3.7603966e-06, dtype=float32)}


  7%|▋         | 71069/1000000 [3:21:47<27:21:52,  9.43it/s]

{'loss': Array(0.13569681, dtype=float32), 'loss_cross_entropy': Array(0.12736656, dtype=float32)}
{'loss_inverse': Array(0.00012458, dtype=float32)}


  7%|▋         | 71079/1000000 [3:21:49<39:32:13,  6.53it/s]

{'loss': Array(0.16873544, dtype=float32), 'loss_cross_entropy': Array(0.15904896, dtype=float32)}
{'loss_inverse': Array(6.2144377e-06, dtype=float32)}


  7%|▋         | 71089/1000000 [3:21:51<28:03:36,  9.20it/s]

{'loss': Array(0.14073056, dtype=float32), 'loss_cross_entropy': Array(0.13191764, dtype=float32)}
{'loss_inverse': Array(5.288007e-05, dtype=float32)}


  7%|▋         | 71099/1000000 [3:21:52<38:57:31,  6.62it/s]

{'loss': Array(0.14178789, dtype=float32), 'loss_cross_entropy': Array(0.13306245, dtype=float32)}
{'loss_inverse': Array(9.899047e-05, dtype=float32)}


  7%|▋         | 71109/1000000 [3:21:54<27:25:37,  9.41it/s]

{'loss': Array(0.1792654, dtype=float32), 'loss_cross_entropy': Array(0.16955177, dtype=float32)}
{'loss_inverse': Array(0.00011919, dtype=float32)}


  7%|▋         | 71119/1000000 [3:21:55<26:10:46,  9.86it/s]

{'loss': Array(0.14829704, dtype=float32), 'loss_cross_entropy': Array(0.13944577, dtype=float32)}
{'loss_inverse': Array(4.6313194e-06, dtype=float32)}


  7%|▋         | 71129/1000000 [3:21:57<30:25:48,  8.48it/s]

{'loss': Array(0.14773123, dtype=float32), 'loss_cross_entropy': Array(0.13898852, dtype=float32)}
{'loss_inverse': Array(0.00014253, dtype=float32)}


  7%|▋         | 71139/1000000 [3:21:58<25:58:26,  9.93it/s]

{'loss': Array(0.177572, dtype=float32), 'loss_cross_entropy': Array(0.16788551, dtype=float32)}
{'loss_inverse': Array(6.9825006e-07, dtype=float32)}


  7%|▋         | 71149/1000000 [3:22:00<29:48:46,  8.65it/s]

{'loss': Array(0.13498382, dtype=float32), 'loss_cross_entropy': Array(0.12718438, dtype=float32)}
{'loss_inverse': Array(1.0910876e-05, dtype=float32)}


  7%|▋         | 71159/1000000 [3:22:02<26:31:40,  9.73it/s]

{'loss': Array(0.13701963, dtype=float32), 'loss_cross_entropy': Array(0.12974918, dtype=float32)}
{'loss_inverse': Array(1.8466347e-06, dtype=float32)}


  7%|▋         | 71169/1000000 [3:22:03<34:16:50,  7.53it/s]

{'loss': Array(0.1366523, dtype=float32), 'loss_cross_entropy': Array(0.12884001, dtype=float32)}
{'loss_inverse': Array(2.6378189e-06, dtype=float32)}


  7%|▋         | 71179/1000000 [3:22:05<26:47:17,  9.63it/s]

{'loss': Array(0.15134417, dtype=float32), 'loss_cross_entropy': Array(0.1433798, dtype=float32)}
{'loss_inverse': Array(0.00018953, dtype=float32)}


  7%|▋         | 71189/1000000 [3:22:06<26:34:11,  9.71it/s]

{'loss': Array(0.14188622, dtype=float32), 'loss_cross_entropy': Array(0.13370459, dtype=float32)}
{'loss_inverse': Array(2.802136e-06, dtype=float32)}


  7%|▋         | 71199/1000000 [3:22:08<31:43:09,  8.13it/s]

{'loss': Array(0.14399885, dtype=float32), 'loss_cross_entropy': Array(0.1353433, dtype=float32)}
{'loss_inverse': Array(2.4640705e-05, dtype=float32)}


  7%|▋         | 71209/1000000 [3:22:10<26:17:23,  9.81it/s]

{'loss': Array(0.12720786, dtype=float32), 'loss_cross_entropy': Array(0.12046011, dtype=float32)}
{'loss_inverse': Array(0.00013791, dtype=float32)}


  7%|▋         | 71219/1000000 [3:22:11<31:21:11,  8.23it/s]

{'loss': Array(0.1560707, dtype=float32), 'loss_cross_entropy': Array(0.14637677, dtype=float32)}
{'loss_inverse': Array(6.3085594e-05, dtype=float32)}


  7%|▋         | 71229/1000000 [3:22:13<26:41:36,  9.66it/s]

{'loss': Array(0.14201823, dtype=float32), 'loss_cross_entropy': Array(0.13344495, dtype=float32)}
{'loss_inverse': Array(7.353593e-05, dtype=float32)}


  7%|▋         | 71239/1000000 [3:22:15<32:12:48,  8.01it/s]

{'loss': Array(0.15626732, dtype=float32), 'loss_cross_entropy': Array(0.14690462, dtype=float32)}
{'loss_inverse': Array(0.00028615, dtype=float32)}


  7%|▋         | 71249/1000000 [3:22:16<26:42:35,  9.66it/s]

{'loss': Array(0.15603761, dtype=float32), 'loss_cross_entropy': Array(0.14707097, dtype=float32)}
{'loss_inverse': Array(3.1668587e-05, dtype=float32)}


  7%|▋         | 71259/1000000 [3:22:18<31:48:00,  8.11it/s]

{'loss': Array(0.15089762, dtype=float32), 'loss_cross_entropy': Array(0.14232902, dtype=float32)}
{'loss_inverse': Array(0.00028379, dtype=float32)}


  7%|▋         | 71269/1000000 [3:22:19<26:03:19,  9.90it/s]

{'loss': Array(0.15762106, dtype=float32), 'loss_cross_entropy': Array(0.14891307, dtype=float32)}
{'loss_inverse': Array(0.00040796, dtype=float32)}


  7%|▋         | 71279/1000000 [3:22:21<34:33:47,  7.46it/s]

{'loss': Array(0.14425251, dtype=float32), 'loss_cross_entropy': Array(0.13550863, dtype=float32)}
{'loss_inverse': Array(3.4804282e-06, dtype=float32)}


  7%|▋         | 71289/1000000 [3:22:22<26:45:31,  9.64it/s]

{'loss': Array(0.1490801, dtype=float32), 'loss_cross_entropy': Array(0.14072065, dtype=float32)}
{'loss_inverse': Array(0.00021557, dtype=float32)}


  7%|▋         | 71299/1000000 [3:22:24<38:59:49,  6.62it/s]

{'loss': Array(0.13601433, dtype=float32), 'loss_cross_entropy': Array(0.12752786, dtype=float32)}
{'loss_inverse': Array(2.2783306e-05, dtype=float32)}


  7%|▋         | 71309/1000000 [3:22:26<28:09:10,  9.16it/s]

{'loss': Array(0.1503753, dtype=float32), 'loss_cross_entropy': Array(0.14190403, dtype=float32)}
{'loss_inverse': Array(0.00019729, dtype=float32)}


  7%|▋         | 71319/1000000 [3:22:27<39:01:42,  6.61it/s]

{'loss': Array(0.13836014, dtype=float32), 'loss_cross_entropy': Array(0.13034569, dtype=float32)}
{'loss_inverse': Array(6.299278e-05, dtype=float32)}


  7%|▋         | 71329/1000000 [3:22:29<28:01:04,  9.21it/s]

{'loss': Array(0.14133653, dtype=float32), 'loss_cross_entropy': Array(0.13298514, dtype=float32)}
{'loss_inverse': Array(8.2171486e-05, dtype=float32)}


  7%|▋         | 71339/1000000 [3:22:30<25:56:49,  9.94it/s]

{'loss': Array(0.14580682, dtype=float32), 'loss_cross_entropy': Array(0.13720141, dtype=float32)}
{'loss_inverse': Array(0.00015089, dtype=float32)}


  7%|▋         | 71349/1000000 [3:22:32<31:43:46,  8.13it/s]

{'loss': Array(0.1461315, dtype=float32), 'loss_cross_entropy': Array(0.13765933, dtype=float32)}
{'loss_inverse': Array(8.8490115e-06, dtype=float32)}


  7%|▋         | 71359/1000000 [3:22:33<26:53:19,  9.59it/s]

{'loss': Array(0.17034583, dtype=float32), 'loss_cross_entropy': Array(0.16119508, dtype=float32)}
{'loss_inverse': Array(0.00013847, dtype=float32)}


  7%|▋         | 71369/1000000 [3:22:35<34:55:09,  7.39it/s]

{'loss': Array(0.14042418, dtype=float32), 'loss_cross_entropy': Array(0.13260141, dtype=float32)}
{'loss_inverse': Array(8.809622e-05, dtype=float32)}


  7%|▋         | 71379/1000000 [3:22:37<27:01:05,  9.55it/s]

{'loss': Array(0.17372611, dtype=float32), 'loss_cross_entropy': Array(0.16354826, dtype=float32)}
{'loss_inverse': Array(5.3497708e-05, dtype=float32)}


  7%|▋         | 71389/1000000 [3:22:38<34:27:20,  7.49it/s]

{'loss': Array(0.14388967, dtype=float32), 'loss_cross_entropy': Array(0.13562647, dtype=float32)}
{'loss_inverse': Array(9.927156e-06, dtype=float32)}


  7%|▋         | 71399/1000000 [3:22:40<27:21:35,  9.43it/s]

{'loss': Array(0.1420617, dtype=float32), 'loss_cross_entropy': Array(0.13386275, dtype=float32)}
{'loss_inverse': Array(8.04963e-05, dtype=float32)}


  7%|▋         | 71409/1000000 [3:22:42<38:21:38,  6.72it/s]

{'loss': Array(0.1510179, dtype=float32), 'loss_cross_entropy': Array(0.14212292, dtype=float32)}
{'loss_inverse': Array(9.1859e-06, dtype=float32)}


  7%|▋         | 71419/1000000 [3:22:43<28:04:06,  9.19it/s]

{'loss': Array(0.1620649, dtype=float32), 'loss_cross_entropy': Array(0.1527822, dtype=float32)}
{'loss_inverse': Array(3.6419933e-05, dtype=float32)}


  7%|▋         | 71429/1000000 [3:22:44<25:47:30, 10.00it/s]

{'loss': Array(0.13881885, dtype=float32), 'loss_cross_entropy': Array(0.13087702, dtype=float32)}
{'loss_inverse': Array(5.2770054e-05, dtype=float32)}


  7%|▋         | 71439/1000000 [3:22:46<31:39:01,  8.15it/s]

{'loss': Array(0.13931252, dtype=float32), 'loss_cross_entropy': Array(0.13128386, dtype=float32)}
{'loss_inverse': Array(9.523702e-06, dtype=float32)}


  7%|▋         | 71449/1000000 [3:22:48<26:48:31,  9.62it/s]

{'loss': Array(0.14813612, dtype=float32), 'loss_cross_entropy': Array(0.13906363, dtype=float32)}
{'loss_inverse': Array(7.980968e-05, dtype=float32)}


  7%|▋         | 71459/1000000 [3:22:49<38:03:52,  6.78it/s]

{'loss': Array(0.13441806, dtype=float32), 'loss_cross_entropy': Array(0.12597476, dtype=float32)}
{'loss_inverse': Array(2.6137518e-06, dtype=float32)}


  7%|▋         | 71469/1000000 [3:22:51<27:18:53,  9.44it/s]

{'loss': Array(0.16124779, dtype=float32), 'loss_cross_entropy': Array(0.15166938, dtype=float32)}
{'loss_inverse': Array(2.1568181e-05, dtype=float32)}


  7%|▋         | 71479/1000000 [3:22:52<25:30:02, 10.11it/s]

{'loss': Array(0.1443189, dtype=float32), 'loss_cross_entropy': Array(0.13526992, dtype=float32)}
{'loss_inverse': Array(0.00021132, dtype=float32)}


  7%|▋         | 71489/1000000 [3:22:54<29:59:39,  8.60it/s]

{'loss': Array(0.15457843, dtype=float32), 'loss_cross_entropy': Array(0.14548504, dtype=float32)}
{'loss_inverse': Array(3.7574424e-05, dtype=float32)}


  7%|▋         | 71499/1000000 [3:22:55<26:23:19,  9.77it/s]

{'loss': Array(0.14453083, dtype=float32), 'loss_cross_entropy': Array(0.13614734, dtype=float32)}
{'loss_inverse': Array(6.242751e-05, dtype=float32)}


  7%|▋         | 71510/1000000 [3:23:04<70:09:55,  3.68it/s] 

{'loss': Array(0.1466523, dtype=float32), 'loss_cross_entropy': Array(0.13817978, dtype=float32)}
{'loss_inverse': Array(5.2054307e-05, dtype=float32)}


  7%|▋         | 71520/1000000 [3:23:05<32:35:51,  7.91it/s]

{'loss': Array(0.13523749, dtype=float32), 'loss_cross_entropy': Array(0.12714247, dtype=float32)}
{'loss_inverse': Array(1.6226712e-05, dtype=float32)}


  7%|▋         | 71530/1000000 [3:23:07<33:28:28,  7.70it/s]

{'loss': Array(0.15341412, dtype=float32), 'loss_cross_entropy': Array(0.14486782, dtype=float32)}
{'loss_inverse': Array(4.3222185e-06, dtype=float32)}


  7%|▋         | 71540/1000000 [3:23:08<26:41:11,  9.66it/s]

{'loss': Array(0.16928248, dtype=float32), 'loss_cross_entropy': Array(0.16020118, dtype=float32)}
{'loss_inverse': Array(5.158182e-05, dtype=float32)}


  7%|▋         | 71550/1000000 [3:23:10<31:27:49,  8.20it/s]

{'loss': Array(0.14073218, dtype=float32), 'loss_cross_entropy': Array(0.13242267, dtype=float32)}
{'loss_inverse': Array(9.5265736e-05, dtype=float32)}


  7%|▋         | 71560/1000000 [3:23:11<27:48:58,  9.27it/s]

{'loss': Array(0.15595971, dtype=float32), 'loss_cross_entropy': Array(0.14706154, dtype=float32)}
{'loss_inverse': Array(7.2703183e-06, dtype=float32)}


  7%|▋         | 71570/1000000 [3:23:13<26:11:07,  9.85it/s]

{'loss': Array(0.15596025, dtype=float32), 'loss_cross_entropy': Array(0.1472086, dtype=float32)}
{'loss_inverse': Array(2.392155e-05, dtype=float32)}


  7%|▋         | 71580/1000000 [3:23:15<28:34:13,  9.03it/s]

{'loss': Array(0.13100742, dtype=float32), 'loss_cross_entropy': Array(0.12289532, dtype=float32)}
{'loss_inverse': Array(4.809302e-06, dtype=float32)}


  7%|▋         | 71590/1000000 [3:23:16<25:24:16, 10.15it/s]

{'loss': Array(0.15993524, dtype=float32), 'loss_cross_entropy': Array(0.15180682, dtype=float32)}
{'loss_inverse': Array(3.7631806e-05, dtype=float32)}


  7%|▋         | 71600/1000000 [3:23:18<32:14:06,  8.00it/s]

{'loss': Array(0.152985, dtype=float32), 'loss_cross_entropy': Array(0.14383414, dtype=float32)}
{'loss_inverse': Array(3.0177722e-07, dtype=float32)}


  7%|▋         | 71610/1000000 [3:23:19<25:54:43,  9.95it/s]

{'loss': Array(0.15636463, dtype=float32), 'loss_cross_entropy': Array(0.1469331, dtype=float32)}
{'loss_inverse': Array(1.5648347e-06, dtype=float32)}


  7%|▋         | 71620/1000000 [3:23:21<31:44:54,  8.12it/s]

{'loss': Array(0.13717458, dtype=float32), 'loss_cross_entropy': Array(0.12859784, dtype=float32)}
{'loss_inverse': Array(4.3304808e-05, dtype=float32)}


  7%|▋         | 71630/1000000 [3:23:22<26:18:06,  9.80it/s]

{'loss': Array(0.14767812, dtype=float32), 'loss_cross_entropy': Array(0.13882075, dtype=float32)}
{'loss_inverse': Array(1.3369508e-06, dtype=float32)}


  7%|▋         | 71640/1000000 [3:23:24<34:44:26,  7.42it/s]

{'loss': Array(0.16123149, dtype=float32), 'loss_cross_entropy': Array(0.15188847, dtype=float32)}
{'loss_inverse': Array(1.5097221e-05, dtype=float32)}


  7%|▋         | 71650/1000000 [3:23:26<26:44:35,  9.64it/s]

{'loss': Array(0.1569833, dtype=float32), 'loss_cross_entropy': Array(0.14764802, dtype=float32)}
{'loss_inverse': Array(8.044262e-06, dtype=float32)}


  7%|▋         | 71660/1000000 [3:23:27<31:50:28,  8.10it/s]

{'loss': Array(0.14926073, dtype=float32), 'loss_cross_entropy': Array(0.14057301, dtype=float32)}
{'loss_inverse': Array(6.204285e-08, dtype=float32)}


  7%|▋         | 71670/1000000 [3:23:29<26:37:24,  9.69it/s]

{'loss': Array(0.1613242, dtype=float32), 'loss_cross_entropy': Array(0.15205535, dtype=float32)}
{'loss_inverse': Array(8.412533e-07, dtype=float32)}


  7%|▋         | 71680/1000000 [3:23:31<38:14:53,  6.74it/s]

{'loss': Array(0.14989443, dtype=float32), 'loss_cross_entropy': Array(0.14115268, dtype=float32)}
{'loss_inverse': Array(6.297578e-05, dtype=float32)}


  7%|▋         | 71690/1000000 [3:23:32<28:02:58,  9.19it/s]

{'loss': Array(0.14053032, dtype=float32), 'loss_cross_entropy': Array(0.13176799, dtype=float32)}
{'loss_inverse': Array(1.4790702e-07, dtype=float32)}


  7%|▋         | 71700/1000000 [3:23:33<25:24:20, 10.15it/s]

{'loss': Array(0.15959181, dtype=float32), 'loss_cross_entropy': Array(0.15023474, dtype=float32)}
{'loss_inverse': Array(9.005782e-07, dtype=float32)}


  7%|▋         | 71710/1000000 [3:23:35<28:14:05,  9.13it/s]

{'loss': Array(0.13545792, dtype=float32), 'loss_cross_entropy': Array(0.12745316, dtype=float32)}
{'loss_inverse': Array(8.9317965e-08, dtype=float32)}


  7%|▋         | 71720/1000000 [3:23:37<25:33:36, 10.09it/s]

{'loss': Array(0.14440596, dtype=float32), 'loss_cross_entropy': Array(0.13651964, dtype=float32)}
{'loss_inverse': Array(2.1854046e-06, dtype=float32)}


  7%|▋         | 71730/1000000 [3:23:38<28:11:58,  9.14it/s]

{'loss': Array(0.16128422, dtype=float32), 'loss_cross_entropy': Array(0.15254053, dtype=float32)}
{'loss_inverse': Array(2.9259976e-07, dtype=float32)}


  7%|▋         | 71740/1000000 [3:23:40<26:05:04,  9.89it/s]

{'loss': Array(0.14493829, dtype=float32), 'loss_cross_entropy': Array(0.13620625, dtype=float32)}
{'loss_inverse': Array(1.169296e-05, dtype=float32)}


  7%|▋         | 71750/1000000 [3:23:42<29:44:11,  8.67it/s]

{'loss': Array(0.1585509, dtype=float32), 'loss_cross_entropy': Array(0.14925586, dtype=float32)}
{'loss_inverse': Array(7.5482126e-07, dtype=float32)}


  7%|▋         | 71760/1000000 [3:23:43<25:41:36, 10.04it/s]

{'loss': Array(0.14434825, dtype=float32), 'loss_cross_entropy': Array(0.1361746, dtype=float32)}
{'loss_inverse': Array(6.1632672e-06, dtype=float32)}


  7%|▋         | 71770/1000000 [3:23:45<38:12:18,  6.75it/s]

{'loss': Array(0.13796566, dtype=float32), 'loss_cross_entropy': Array(0.12978536, dtype=float32)}
{'loss_inverse': Array(3.0459364e-06, dtype=float32)}


  7%|▋         | 71780/1000000 [3:23:46<27:46:13,  9.28it/s]

{'loss': Array(0.14641416, dtype=float32), 'loss_cross_entropy': Array(0.13772584, dtype=float32)}
{'loss_inverse': Array(2.480999e-06, dtype=float32)}


  7%|▋         | 71790/1000000 [3:23:48<26:10:23,  9.85it/s]

{'loss': Array(0.14982805, dtype=float32), 'loss_cross_entropy': Array(0.14110829, dtype=float32)}
{'loss_inverse': Array(6.077413e-06, dtype=float32)}


  7%|▋         | 71800/1000000 [3:23:49<29:25:47,  8.76it/s]

{'loss': Array(0.14605667, dtype=float32), 'loss_cross_entropy': Array(0.13680835, dtype=float32)}
{'loss_inverse': Array(7.3749848e-06, dtype=float32)}


  7%|▋         | 71810/1000000 [3:23:51<25:57:21,  9.93it/s]

{'loss': Array(0.16108944, dtype=float32), 'loss_cross_entropy': Array(0.15225436, dtype=float32)}
{'loss_inverse': Array(6.9596183e-07, dtype=float32)}


  7%|▋         | 71820/1000000 [3:23:52<29:20:47,  8.79it/s]

{'loss': Array(0.15380919, dtype=float32), 'loss_cross_entropy': Array(0.1448246, dtype=float32)}
{'loss_inverse': Array(4.0011323e-06, dtype=float32)}


  7%|▋         | 71830/1000000 [3:23:54<26:09:12,  9.86it/s]

{'loss': Array(0.1508094, dtype=float32), 'loss_cross_entropy': Array(0.14223306, dtype=float32)}
{'loss_inverse': Array(1.26343e-06, dtype=float32)}


  7%|▋         | 71840/1000000 [3:23:56<31:21:47,  8.22it/s]

{'loss': Array(0.14153723, dtype=float32), 'loss_cross_entropy': Array(0.13360201, dtype=float32)}
{'loss_inverse': Array(4.0132104e-08, dtype=float32)}


  7%|▋         | 71850/1000000 [3:23:57<26:30:27,  9.73it/s]

{'loss': Array(0.15118065, dtype=float32), 'loss_cross_entropy': Array(0.1425774, dtype=float32)}
{'loss_inverse': Array(1.2436506e-07, dtype=float32)}


  7%|▋         | 71860/1000000 [3:23:58<25:50:02,  9.98it/s]

{'loss': Array(0.14712803, dtype=float32), 'loss_cross_entropy': Array(0.13791162, dtype=float32)}
{'loss_inverse': Array(2.095499e-07, dtype=float32)}


  7%|▋         | 71870/1000000 [3:24:00<28:37:57,  9.00it/s]

{'loss': Array(0.1504743, dtype=float32), 'loss_cross_entropy': Array(0.14216295, dtype=float32)}
{'loss_inverse': Array(2.696088e-06, dtype=float32)}


  7%|▋         | 71880/1000000 [3:24:02<25:57:56,  9.93it/s]

{'loss': Array(0.15955631, dtype=float32), 'loss_cross_entropy': Array(0.15016763, dtype=float32)}
{'loss_inverse': Array(6.6549426e-08, dtype=float32)}


  7%|▋         | 71890/1000000 [3:24:03<30:02:21,  8.58it/s]

{'loss': Array(0.13904741, dtype=float32), 'loss_cross_entropy': Array(0.13053173, dtype=float32)}
{'loss_inverse': Array(9.607545e-07, dtype=float32)}


  7%|▋         | 71900/1000000 [3:24:05<26:11:42,  9.84it/s]

{'loss': Array(0.14035068, dtype=float32), 'loss_cross_entropy': Array(0.13236986, dtype=float32)}
{'loss_inverse': Array(9.495528e-06, dtype=float32)}


  7%|▋         | 71910/1000000 [3:24:07<32:37:37,  7.90it/s]

{'loss': Array(0.15624896, dtype=float32), 'loss_cross_entropy': Array(0.14772223, dtype=float32)}
{'loss_inverse': Array(1.4210954e-07, dtype=float32)}


  7%|▋         | 71920/1000000 [3:24:08<26:20:22,  9.79it/s]

{'loss': Array(0.15028226, dtype=float32), 'loss_cross_entropy': Array(0.1418612, dtype=float32)}
{'loss_inverse': Array(2.406081e-05, dtype=float32)}


  7%|▋         | 71930/1000000 [3:24:09<25:27:31, 10.13it/s]

{'loss': Array(0.14923574, dtype=float32), 'loss_cross_entropy': Array(0.14014035, dtype=float32)}
{'loss_inverse': Array(9.0708585e-07, dtype=float32)}


  7%|▋         | 71940/1000000 [3:24:11<29:48:48,  8.65it/s]

{'loss': Array(0.14902289, dtype=float32), 'loss_cross_entropy': Array(0.14063367, dtype=float32)}
{'loss_inverse': Array(1.17929666e-07, dtype=float32)}


  7%|▋         | 71950/1000000 [3:24:13<26:23:48,  9.77it/s]

{'loss': Array(0.13712111, dtype=float32), 'loss_cross_entropy': Array(0.12902756, dtype=float32)}
{'loss_inverse': Array(6.4866954e-06, dtype=float32)}


  7%|▋         | 71960/1000000 [3:24:14<31:46:46,  8.11it/s]

{'loss': Array(0.1347522, dtype=float32), 'loss_cross_entropy': Array(0.12634626, dtype=float32)}
{'loss_inverse': Array(4.1045618e-07, dtype=float32)}


  7%|▋         | 71970/1000000 [3:24:16<26:12:00,  9.84it/s]

{'loss': Array(0.16103534, dtype=float32), 'loss_cross_entropy': Array(0.15209852, dtype=float32)}
{'loss_inverse': Array(1.0882553e-06, dtype=float32)}


  7%|▋         | 71980/1000000 [3:24:18<36:02:59,  7.15it/s]

{'loss': Array(0.13667944, dtype=float32), 'loss_cross_entropy': Array(0.12841785, dtype=float32)}
{'loss_inverse': Array(1.7792107e-06, dtype=float32)}


  7%|▋         | 71990/1000000 [3:24:19<26:56:40,  9.57it/s]

{'loss': Array(0.1425658, dtype=float32), 'loss_cross_entropy': Array(0.13366172, dtype=float32)}
{'loss_inverse': Array(3.3273988e-08, dtype=float32)}


  7%|▋         | 72000/1000000 [3:24:20<25:26:54, 10.13it/s]

{'loss': Array(0.14719662, dtype=float32), 'loss_cross_entropy': Array(0.13840103, dtype=float32)}
{'loss_inverse': Array(3.0999422e-06, dtype=float32)}


  7%|▋         | 72010/1000000 [3:24:29<92:21:49,  2.79it/s] 

{'loss': Array(0.17093809, dtype=float32), 'loss_cross_entropy': Array(0.161085, dtype=float32)}
{'loss_inverse': Array(7.636959e-07, dtype=float32)}


  7%|▋         | 72020/1000000 [3:24:31<48:55:47,  5.27it/s]

{'loss': Array(0.13358875, dtype=float32), 'loss_cross_entropy': Array(0.12561189, dtype=float32)}
{'loss_inverse': Array(2.7538746e-07, dtype=float32)}


  7%|▋         | 72030/1000000 [3:24:32<29:38:26,  8.70it/s]

{'loss': Array(0.1674242, dtype=float32), 'loss_cross_entropy': Array(0.15827222, dtype=float32)}
{'loss_inverse': Array(1.02437376e-07, dtype=float32)}


  7%|▋         | 72040/1000000 [3:24:34<25:56:36,  9.94it/s]

{'loss': Array(0.16187261, dtype=float32), 'loss_cross_entropy': Array(0.15208618, dtype=float32)}
{'loss_inverse': Array(3.74886e-08, dtype=float32)}


  7%|▋         | 72050/1000000 [3:24:35<31:32:58,  8.17it/s]

{'loss': Array(0.13186543, dtype=float32), 'loss_cross_entropy': Array(0.12372384, dtype=float32)}
{'loss_inverse': Array(1.8146083e-07, dtype=float32)}


  7%|▋         | 72060/1000000 [3:24:37<26:49:30,  9.61it/s]

{'loss': Array(0.1500461, dtype=float32), 'loss_cross_entropy': Array(0.14129232, dtype=float32)}
{'loss_inverse': Array(6.834345e-05, dtype=float32)}


  7%|▋         | 72070/1000000 [3:24:39<34:48:19,  7.41it/s]

{'loss': Array(0.13792507, dtype=float32), 'loss_cross_entropy': Array(0.13061628, dtype=float32)}
{'loss_inverse': Array(7.759855e-06, dtype=float32)}


  7%|▋         | 72080/1000000 [3:24:40<27:18:15,  9.44it/s]

{'loss': Array(0.14940684, dtype=float32), 'loss_cross_entropy': Array(0.14060329, dtype=float32)}
{'loss_inverse': Array(6.659798e-05, dtype=float32)}


  7%|▋         | 72090/1000000 [3:24:42<34:49:49,  7.40it/s]

{'loss': Array(0.14237025, dtype=float32), 'loss_cross_entropy': Array(0.13327025, dtype=float32)}
{'loss_inverse': Array(3.293176e-07, dtype=float32)}


  7%|▋         | 72100/1000000 [3:24:43<26:43:58,  9.64it/s]

{'loss': Array(0.14055905, dtype=float32), 'loss_cross_entropy': Array(0.13229445, dtype=float32)}
{'loss_inverse': Array(1.7814715e-07, dtype=float32)}


  7%|▋         | 72110/1000000 [3:24:45<34:27:11,  7.48it/s]

{'loss': Array(0.12379398, dtype=float32), 'loss_cross_entropy': Array(0.11601136, dtype=float32)}
{'loss_inverse': Array(7.845932e-08, dtype=float32)}


  7%|▋         | 72120/1000000 [3:24:46<26:53:17,  9.59it/s]

{'loss': Array(0.15055053, dtype=float32), 'loss_cross_entropy': Array(0.1419414, dtype=float32)}
{'loss_inverse': Array(2.1872843e-07, dtype=float32)}


  7%|▋         | 72130/1000000 [3:24:48<25:12:39, 10.22it/s]

{'loss': Array(0.14395952, dtype=float32), 'loss_cross_entropy': Array(0.13504389, dtype=float32)}
{'loss_inverse': Array(3.4074017e-06, dtype=float32)}


  7%|▋         | 72140/1000000 [3:24:50<31:24:44,  8.21it/s]

{'loss': Array(0.16515301, dtype=float32), 'loss_cross_entropy': Array(0.15518673, dtype=float32)}
{'loss_inverse': Array(1.6553687e-07, dtype=float32)}


  7%|▋         | 72150/1000000 [3:24:51<26:09:24,  9.85it/s]

{'loss': Array(0.16209719, dtype=float32), 'loss_cross_entropy': Array(0.15278636, dtype=float32)}
{'loss_inverse': Array(3.3320524e-05, dtype=float32)}


  7%|▋         | 72160/1000000 [3:24:53<33:55:52,  7.60it/s]

{'loss': Array(0.15708087, dtype=float32), 'loss_cross_entropy': Array(0.14829896, dtype=float32)}
{'loss_inverse': Array(1.9258935e-07, dtype=float32)}


  7%|▋         | 72170/1000000 [3:24:54<26:37:32,  9.68it/s]

{'loss': Array(0.12821561, dtype=float32), 'loss_cross_entropy': Array(0.12047895, dtype=float32)}
{'loss_inverse': Array(2.2185084e-05, dtype=float32)}


  7%|▋         | 72180/1000000 [3:24:56<38:35:11,  6.68it/s]

{'loss': Array(0.14785916, dtype=float32), 'loss_cross_entropy': Array(0.1396759, dtype=float32)}
{'loss_inverse': Array(3.4323955e-06, dtype=float32)}


  7%|▋         | 72190/1000000 [3:24:57<27:45:19,  9.29it/s]

{'loss': Array(0.14993702, dtype=float32), 'loss_cross_entropy': Array(0.14093828, dtype=float32)}
{'loss_inverse': Array(1.2738933e-07, dtype=float32)}


  7%|▋         | 72200/1000000 [3:24:59<34:41:45,  7.43it/s]

{'loss': Array(0.14423095, dtype=float32), 'loss_cross_entropy': Array(0.1352712, dtype=float32)}
{'loss_inverse': Array(8.670314e-06, dtype=float32)}


  7%|▋         | 72210/1000000 [3:25:01<26:36:40,  9.68it/s]

{'loss': Array(0.15855825, dtype=float32), 'loss_cross_entropy': Array(0.14923206, dtype=float32)}
{'loss_inverse': Array(7.873528e-06, dtype=float32)}


  7%|▋         | 72220/1000000 [3:25:02<25:49:07,  9.98it/s]

{'loss': Array(0.13323352, dtype=float32), 'loss_cross_entropy': Array(0.1254607, dtype=float32)}
{'loss_inverse': Array(7.6197534e-07, dtype=float32)}


  7%|▋         | 72230/1000000 [3:25:04<28:39:49,  8.99it/s]

{'loss': Array(0.15820955, dtype=float32), 'loss_cross_entropy': Array(0.14830409, dtype=float32)}
{'loss_inverse': Array(1.474078e-05, dtype=float32)}


  7%|▋         | 72240/1000000 [3:25:05<25:59:57,  9.91it/s]

{'loss': Array(0.14774905, dtype=float32), 'loss_cross_entropy': Array(0.1392763, dtype=float32)}
{'loss_inverse': Array(1.3383209e-06, dtype=float32)}


  7%|▋         | 72250/1000000 [3:25:07<28:55:08,  8.91it/s]

{'loss': Array(0.13211708, dtype=float32), 'loss_cross_entropy': Array(0.12406881, dtype=float32)}
{'loss_inverse': Array(9.6762165e-08, dtype=float32)}


  7%|▋         | 72260/1000000 [3:25:08<27:18:37,  9.44it/s]

{'loss': Array(0.13144498, dtype=float32), 'loss_cross_entropy': Array(0.12378645, dtype=float32)}
{'loss_inverse': Array(2.1643276e-05, dtype=float32)}


  7%|▋         | 72270/1000000 [3:25:10<34:38:41,  7.44it/s]

{'loss': Array(0.13608907, dtype=float32), 'loss_cross_entropy': Array(0.1277395, dtype=float32)}
{'loss_inverse': Array(8.4769835e-07, dtype=float32)}


  7%|▋         | 72280/1000000 [3:25:12<26:47:51,  9.62it/s]

{'loss': Array(0.14141206, dtype=float32), 'loss_cross_entropy': Array(0.13333544, dtype=float32)}
{'loss_inverse': Array(5.3442517e-07, dtype=float32)}


  7%|▋         | 72290/1000000 [3:25:13<25:32:25, 10.09it/s]

{'loss': Array(0.14862995, dtype=float32), 'loss_cross_entropy': Array(0.13951997, dtype=float32)}
{'loss_inverse': Array(1.0523015e-06, dtype=float32)}


  7%|▋         | 72300/1000000 [3:25:15<28:37:52,  9.00it/s]

{'loss': Array(0.15084918, dtype=float32), 'loss_cross_entropy': Array(0.14206138, dtype=float32)}
{'loss_inverse': Array(2.0764772e-07, dtype=float32)}


  7%|▋         | 72310/1000000 [3:25:16<25:43:39, 10.02it/s]

{'loss': Array(0.15216823, dtype=float32), 'loss_cross_entropy': Array(0.14402366, dtype=float32)}
{'loss_inverse': Array(4.177899e-07, dtype=float32)}


  7%|▋         | 72320/1000000 [3:25:18<29:54:21,  8.62it/s]

{'loss': Array(0.13730158, dtype=float32), 'loss_cross_entropy': Array(0.1288523, dtype=float32)}
{'loss_inverse': Array(0.00021494, dtype=float32)}


  7%|▋         | 72330/1000000 [3:25:19<25:54:04,  9.95it/s]

{'loss': Array(0.1360045, dtype=float32), 'loss_cross_entropy': Array(0.12828963, dtype=float32)}
{'loss_inverse': Array(3.723464e-07, dtype=float32)}


  7%|▋         | 72340/1000000 [3:25:21<30:11:11,  8.54it/s]

{'loss': Array(0.15143844, dtype=float32), 'loss_cross_entropy': Array(0.14306293, dtype=float32)}
{'loss_inverse': Array(4.6560476e-07, dtype=float32)}


  7%|▋         | 72350/1000000 [3:25:23<27:00:16,  9.54it/s]

{'loss': Array(0.1477237, dtype=float32), 'loss_cross_entropy': Array(0.13832596, dtype=float32)}
{'loss_inverse': Array(1.6827274e-06, dtype=float32)}


  7%|▋         | 72360/1000000 [3:25:24<30:05:10,  8.56it/s]

{'loss': Array(0.14591423, dtype=float32), 'loss_cross_entropy': Array(0.1375724, dtype=float32)}
{'loss_inverse': Array(5.937749e-05, dtype=float32)}


  7%|▋         | 72370/1000000 [3:25:26<26:06:33,  9.87it/s]

{'loss': Array(0.14888547, dtype=float32), 'loss_cross_entropy': Array(0.14007746, dtype=float32)}
{'loss_inverse': Array(2.783779e-07, dtype=float32)}


  7%|▋         | 72380/1000000 [3:25:28<34:19:01,  7.51it/s]

{'loss': Array(0.15918124, dtype=float32), 'loss_cross_entropy': Array(0.15011552, dtype=float32)}
{'loss_inverse': Array(7.8793346e-07, dtype=float32)}


  7%|▋         | 72390/1000000 [3:25:29<26:39:47,  9.66it/s]

{'loss': Array(0.14774679, dtype=float32), 'loss_cross_entropy': Array(0.13909237, dtype=float32)}
{'loss_inverse': Array(0.00016036, dtype=float32)}


  7%|▋         | 72400/1000000 [3:25:31<38:43:00,  6.66it/s]

{'loss': Array(0.15106867, dtype=float32), 'loss_cross_entropy': Array(0.1421715, dtype=float32)}
{'loss_inverse': Array(8.614547e-05, dtype=float32)}


  7%|▋         | 72410/1000000 [3:25:32<28:13:17,  9.13it/s]

{'loss': Array(0.15904008, dtype=float32), 'loss_cross_entropy': Array(0.15044563, dtype=float32)}
{'loss_inverse': Array(8.495898e-05, dtype=float32)}


  7%|▋         | 72420/1000000 [3:25:34<35:42:51,  7.21it/s]

{'loss': Array(0.15471308, dtype=float32), 'loss_cross_entropy': Array(0.14612637, dtype=float32)}
{'loss_inverse': Array(8.5375467e-07, dtype=float32)}


  7%|▋         | 72430/1000000 [3:25:36<27:07:57,  9.50it/s]

{'loss': Array(0.14219016, dtype=float32), 'loss_cross_entropy': Array(0.13396786, dtype=float32)}
{'loss_inverse': Array(0.00021129, dtype=float32)}


  7%|▋         | 72440/1000000 [3:25:37<26:21:30,  9.78it/s]

{'loss': Array(0.1557251, dtype=float32), 'loss_cross_entropy': Array(0.1467296, dtype=float32)}
{'loss_inverse': Array(5.6142375e-07, dtype=float32)}


  7%|▋         | 72450/1000000 [3:25:39<30:07:41,  8.55it/s]

{'loss': Array(0.14308415, dtype=float32), 'loss_cross_entropy': Array(0.13468467, dtype=float32)}
{'loss_inverse': Array(8.769606e-05, dtype=float32)}


  7%|▋         | 72460/1000000 [3:25:40<26:18:59,  9.79it/s]

{'loss': Array(0.15506111, dtype=float32), 'loss_cross_entropy': Array(0.14670023, dtype=float32)}
{'loss_inverse': Array(8.593814e-06, dtype=float32)}


  7%|▋         | 72470/1000000 [3:25:42<31:26:42,  8.19it/s]

{'loss': Array(0.14486258, dtype=float32), 'loss_cross_entropy': Array(0.13636191, dtype=float32)}
{'loss_inverse': Array(0.00011524, dtype=float32)}


  7%|▋         | 72480/1000000 [3:25:43<26:11:30,  9.84it/s]

{'loss': Array(0.15081249, dtype=float32), 'loss_cross_entropy': Array(0.14181209, dtype=float32)}
{'loss_inverse': Array(7.210809e-08, dtype=float32)}


  7%|▋         | 72490/1000000 [3:25:45<34:31:53,  7.46it/s]

{'loss': Array(0.12947634, dtype=float32), 'loss_cross_entropy': Array(0.12118428, dtype=float32)}
{'loss_inverse': Array(2.0194972e-05, dtype=float32)}


  7%|▋         | 72500/1000000 [3:25:47<27:01:07,  9.54it/s]

{'loss': Array(0.16534641, dtype=float32), 'loss_cross_entropy': Array(0.15599273, dtype=float32)}
{'loss_inverse': Array(1.2821393e-05, dtype=float32)}


  7%|▋         | 72510/1000000 [3:25:55<94:02:21,  2.74it/s] 

{'loss': Array(0.13738768, dtype=float32), 'loss_cross_entropy': Array(0.12888612, dtype=float32)}
{'loss_inverse': Array(1.0529136e-05, dtype=float32)}


  7%|▋         | 72520/1000000 [3:25:56<36:43:46,  7.01it/s] 

{'loss': Array(0.15006696, dtype=float32), 'loss_cross_entropy': Array(0.14159496, dtype=float32)}
{'loss_inverse': Array(0.00018047, dtype=float32)}


  7%|▋         | 72530/1000000 [3:25:57<27:02:43,  9.53it/s]

{'loss': Array(0.15325864, dtype=float32), 'loss_cross_entropy': Array(0.14480041, dtype=float32)}
{'loss_inverse': Array(1.20643845e-05, dtype=float32)}


  7%|▋         | 72540/1000000 [3:25:59<28:51:20,  8.93it/s]

{'loss': Array(0.15363024, dtype=float32), 'loss_cross_entropy': Array(0.14430664, dtype=float32)}
{'loss_inverse': Array(5.5995935e-07, dtype=float32)}


  7%|▋         | 72550/1000000 [3:26:01<26:14:12,  9.82it/s]

{'loss': Array(0.1498587, dtype=float32), 'loss_cross_entropy': Array(0.14172041, dtype=float32)}
{'loss_inverse': Array(1.2112711e-05, dtype=float32)}


  7%|▋         | 72560/1000000 [3:26:02<34:44:30,  7.42it/s]

{'loss': Array(0.1568952, dtype=float32), 'loss_cross_entropy': Array(0.1481685, dtype=float32)}
{'loss_inverse': Array(9.739895e-06, dtype=float32)}


  7%|▋         | 72570/1000000 [3:26:04<26:37:55,  9.67it/s]

{'loss': Array(0.15262693, dtype=float32), 'loss_cross_entropy': Array(0.14404313, dtype=float32)}
{'loss_inverse': Array(3.7832713e-06, dtype=float32)}


  7%|▋         | 72580/1000000 [3:26:06<38:38:34,  6.67it/s]

{'loss': Array(0.14334199, dtype=float32), 'loss_cross_entropy': Array(0.13498408, dtype=float32)}
{'loss_inverse': Array(3.7728154e-05, dtype=float32)}


  7%|▋         | 72590/1000000 [3:26:07<28:28:01,  9.05it/s]

{'loss': Array(0.1360902, dtype=float32), 'loss_cross_entropy': Array(0.12807816, dtype=float32)}
{'loss_inverse': Array(3.251628e-05, dtype=float32)}


  7%|▋         | 72600/1000000 [3:26:09<25:53:54,  9.95it/s]

{'loss': Array(0.13987704, dtype=float32), 'loss_cross_entropy': Array(0.13143936, dtype=float32)}
{'loss_inverse': Array(7.1020327e-07, dtype=float32)}


  7%|▋         | 72610/1000000 [3:26:10<29:00:40,  8.88it/s]

{'loss': Array(0.16710715, dtype=float32), 'loss_cross_entropy': Array(0.15773597, dtype=float32)}
{'loss_inverse': Array(8.330575e-07, dtype=float32)}


  7%|▋         | 72620/1000000 [3:26:12<38:51:56,  6.63it/s]

{'loss': Array(0.14565721, dtype=float32), 'loss_cross_entropy': Array(0.13696054, dtype=float32)}
{'loss_inverse': Array(3.6345315e-07, dtype=float32)}


  7%|▋         | 72630/1000000 [3:26:13<27:13:05,  9.46it/s]

{'loss': Array(0.15700084, dtype=float32), 'loss_cross_entropy': Array(0.14772324, dtype=float32)}
{'loss_inverse': Array(1.793717e-05, dtype=float32)}


  7%|▋         | 72640/1000000 [3:26:15<25:32:35, 10.08it/s]

{'loss': Array(0.16588543, dtype=float32), 'loss_cross_entropy': Array(0.15671793, dtype=float32)}
{'loss_inverse': Array(1.26707955e-05, dtype=float32)}


  7%|▋         | 72650/1000000 [3:26:17<28:59:54,  8.88it/s]

{'loss': Array(0.14485897, dtype=float32), 'loss_cross_entropy': Array(0.13674457, dtype=float32)}
{'loss_inverse': Array(1.5146301e-06, dtype=float32)}


  7%|▋         | 72660/1000000 [3:26:18<26:19:53,  9.78it/s]

{'loss': Array(0.14493912, dtype=float32), 'loss_cross_entropy': Array(0.13668714, dtype=float32)}
{'loss_inverse': Array(3.3239255e-06, dtype=float32)}


  7%|▋         | 72670/1000000 [3:26:20<29:45:22,  8.66it/s]

{'loss': Array(0.14230786, dtype=float32), 'loss_cross_entropy': Array(0.13386112, dtype=float32)}
{'loss_inverse': Array(8.621329e-07, dtype=float32)}


  7%|▋         | 72680/1000000 [3:26:21<25:42:26, 10.02it/s]

{'loss': Array(0.13779593, dtype=float32), 'loss_cross_entropy': Array(0.12994604, dtype=float32)}
{'loss_inverse': Array(1.5755391e-05, dtype=float32)}


  7%|▋         | 72690/1000000 [3:26:23<32:00:36,  8.05it/s]

{'loss': Array(0.14728841, dtype=float32), 'loss_cross_entropy': Array(0.13869129, dtype=float32)}
{'loss_inverse': Array(4.159555e-06, dtype=float32)}


  7%|▋         | 72700/1000000 [3:26:25<26:21:38,  9.77it/s]

{'loss': Array(0.16051763, dtype=float32), 'loss_cross_entropy': Array(0.15199624, dtype=float32)}
{'loss_inverse': Array(1.2817545e-06, dtype=float32)}


  7%|▋         | 72710/1000000 [3:26:26<25:47:32,  9.99it/s]

{'loss': Array(0.1339168, dtype=float32), 'loss_cross_entropy': Array(0.12592085, dtype=float32)}
{'loss_inverse': Array(5.8494757e-06, dtype=float32)}


  7%|▋         | 72720/1000000 [3:26:28<30:09:08,  8.54it/s]

{'loss': Array(0.1446381, dtype=float32), 'loss_cross_entropy': Array(0.13605167, dtype=float32)}
{'loss_inverse': Array(1.531905e-07, dtype=float32)}


  7%|▋         | 72730/1000000 [3:26:29<26:56:14,  9.56it/s]

{'loss': Array(0.15043455, dtype=float32), 'loss_cross_entropy': Array(0.1418706, dtype=float32)}
{'loss_inverse': Array(5.088129e-07, dtype=float32)}


  7%|▋         | 72740/1000000 [3:26:31<32:31:15,  7.92it/s]

{'loss': Array(0.13679256, dtype=float32), 'loss_cross_entropy': Array(0.12884133, dtype=float32)}
{'loss_inverse': Array(1.5582339e-06, dtype=float32)}


  7%|▋         | 72750/1000000 [3:26:33<26:48:32,  9.61it/s]

{'loss': Array(0.1296951, dtype=float32), 'loss_cross_entropy': Array(0.12224066, dtype=float32)}
{'loss_inverse': Array(3.1812785e-08, dtype=float32)}


  7%|▋         | 72760/1000000 [3:26:34<34:44:59,  7.41it/s]

{'loss': Array(0.16893649, dtype=float32), 'loss_cross_entropy': Array(0.15982233, dtype=float32)}
{'loss_inverse': Array(2.1454399e-07, dtype=float32)}


  7%|▋         | 72770/1000000 [3:26:36<26:59:20,  9.54it/s]

{'loss': Array(0.1282079, dtype=float32), 'loss_cross_entropy': Array(0.12039926, dtype=float32)}
{'loss_inverse': Array(4.4422418e-07, dtype=float32)}


  7%|▋         | 72780/1000000 [3:26:37<25:17:20, 10.18it/s]

{'loss': Array(0.14916037, dtype=float32), 'loss_cross_entropy': Array(0.14036207, dtype=float32)}
{'loss_inverse': Array(9.796814e-06, dtype=float32)}


  7%|▋         | 72790/1000000 [3:26:39<27:46:57,  9.27it/s]

{'loss': Array(0.14196238, dtype=float32), 'loss_cross_entropy': Array(0.13336514, dtype=float32)}
{'loss_inverse': Array(2.942278e-07, dtype=float32)}


  7%|▋         | 72800/1000000 [3:26:40<25:23:25, 10.14it/s]

{'loss': Array(0.15143247, dtype=float32), 'loss_cross_entropy': Array(0.14236994, dtype=float32)}
{'loss_inverse': Array(1.10686635e-07, dtype=float32)}


  7%|▋         | 72810/1000000 [3:26:42<34:35:33,  7.45it/s]

{'loss': Array(0.16879569, dtype=float32), 'loss_cross_entropy': Array(0.15952367, dtype=float32)}
{'loss_inverse': Array(7.157509e-07, dtype=float32)}


  7%|▋         | 72820/1000000 [3:26:43<26:24:11,  9.75it/s]

{'loss': Array(0.14539905, dtype=float32), 'loss_cross_entropy': Array(0.13679416, dtype=float32)}
{'loss_inverse': Array(2.2844479e-05, dtype=float32)}


  7%|▋         | 72830/1000000 [3:26:45<25:25:10, 10.13it/s]

{'loss': Array(0.15325569, dtype=float32), 'loss_cross_entropy': Array(0.14443526, dtype=float32)}
{'loss_inverse': Array(1.0110019e-07, dtype=float32)}


  7%|▋         | 72840/1000000 [3:26:47<28:07:38,  9.16it/s]

{'loss': Array(0.14758556, dtype=float32), 'loss_cross_entropy': Array(0.13905479, dtype=float32)}
{'loss_inverse': Array(9.052506e-08, dtype=float32)}


  7%|▋         | 72850/1000000 [3:26:48<25:53:09,  9.95it/s]

{'loss': Array(0.14876314, dtype=float32), 'loss_cross_entropy': Array(0.13955395, dtype=float32)}
{'loss_inverse': Array(1.3472547e-06, dtype=float32)}


  7%|▋         | 72860/1000000 [3:26:50<30:03:52,  8.57it/s]

{'loss': Array(0.15447846, dtype=float32), 'loss_cross_entropy': Array(0.14530662, dtype=float32)}
{'loss_inverse': Array(2.0400224e-05, dtype=float32)}


  7%|▋         | 72870/1000000 [3:26:51<26:21:02,  9.77it/s]

{'loss': Array(0.13996328, dtype=float32), 'loss_cross_entropy': Array(0.13249716, dtype=float32)}
{'loss_inverse': Array(4.4672393e-07, dtype=float32)}


  7%|▋         | 72880/1000000 [3:26:53<31:22:00,  8.21it/s]

{'loss': Array(0.15996832, dtype=float32), 'loss_cross_entropy': Array(0.15127471, dtype=float32)}
{'loss_inverse': Array(2.1218389e-08, dtype=float32)}


  7%|▋         | 72890/1000000 [3:26:54<26:01:16,  9.90it/s]

{'loss': Array(0.1575492, dtype=float32), 'loss_cross_entropy': Array(0.14879404, dtype=float32)}
{'loss_inverse': Array(6.621353e-07, dtype=float32)}


  7%|▋         | 72900/1000000 [3:26:56<25:19:25, 10.17it/s]

{'loss': Array(0.15557104, dtype=float32), 'loss_cross_entropy': Array(0.14674278, dtype=float32)}
{'loss_inverse': Array(1.5061613e-06, dtype=float32)}


  7%|▋         | 72910/1000000 [3:26:57<29:09:54,  8.83it/s]

{'loss': Array(0.12634619, dtype=float32), 'loss_cross_entropy': Array(0.11854612, dtype=float32)}
{'loss_inverse': Array(3.0949454e-07, dtype=float32)}


  7%|▋         | 72920/1000000 [3:26:59<25:44:54, 10.00it/s]

{'loss': Array(0.15142964, dtype=float32), 'loss_cross_entropy': Array(0.14211135, dtype=float32)}
{'loss_inverse': Array(1.0298841e-07, dtype=float32)}


  7%|▋         | 72930/1000000 [3:27:01<33:58:53,  7.58it/s]

{'loss': Array(0.12782054, dtype=float32), 'loss_cross_entropy': Array(0.11958156, dtype=float32)}
{'loss_inverse': Array(3.4110732e-07, dtype=float32)}


  7%|▋         | 72940/1000000 [3:27:02<26:27:18,  9.73it/s]

{'loss': Array(0.15237854, dtype=float32), 'loss_cross_entropy': Array(0.1438743, dtype=float32)}
{'loss_inverse': Array(1.8954523e-07, dtype=float32)}


  7%|▋         | 72950/1000000 [3:27:04<38:10:40,  6.75it/s]

{'loss': Array(0.15801361, dtype=float32), 'loss_cross_entropy': Array(0.14898649, dtype=float32)}
{'loss_inverse': Array(2.9952562e-07, dtype=float32)}


  7%|▋         | 72960/1000000 [3:27:05<27:57:18,  9.21it/s]

{'loss': Array(0.1442023, dtype=float32), 'loss_cross_entropy': Array(0.13624886, dtype=float32)}
{'loss_inverse': Array(1.596054e-08, dtype=float32)}


  7%|▋         | 72970/1000000 [3:27:07<39:17:50,  6.55it/s]

{'loss': Array(0.15215549, dtype=float32), 'loss_cross_entropy': Array(0.14320534, dtype=float32)}
{'loss_inverse': Array(1.2640832e-07, dtype=float32)}


  7%|▋         | 72980/1000000 [3:27:08<27:29:55,  9.36it/s]

{'loss': Array(0.15696697, dtype=float32), 'loss_cross_entropy': Array(0.14743476, dtype=float32)}
{'loss_inverse': Array(2.9467335e-06, dtype=float32)}


  7%|▋         | 72990/1000000 [3:27:10<26:01:01,  9.90it/s]

{'loss': Array(0.15455423, dtype=float32), 'loss_cross_entropy': Array(0.14612871, dtype=float32)}
{'loss_inverse': Array(1.4946157e-07, dtype=float32)}


  7%|▋         | 73000/1000000 [3:27:12<32:12:21,  8.00it/s]

{'loss': Array(0.14980507, dtype=float32), 'loss_cross_entropy': Array(0.14140789, dtype=float32)}
{'loss_inverse': Array(7.916835e-06, dtype=float32)}


  7%|▋         | 73010/1000000 [3:27:20<86:31:52,  2.98it/s] 

{'loss': Array(0.15640207, dtype=float32), 'loss_cross_entropy': Array(0.1475729, dtype=float32)}
{'loss_inverse': Array(7.361579e-08, dtype=float32)}


  7%|▋         | 73020/1000000 [3:27:21<35:17:28,  7.30it/s]

{'loss': Array(0.12524961, dtype=float32), 'loss_cross_entropy': Array(0.11726435, dtype=float32)}
{'loss_inverse': Array(1.9073523e-06, dtype=float32)}


  7%|▋         | 73030/1000000 [3:27:23<31:40:20,  8.13it/s]

{'loss': Array(0.13435756, dtype=float32), 'loss_cross_entropy': Array(0.12629516, dtype=float32)}
{'loss_inverse': Array(1.207667e-07, dtype=float32)}


  7%|▋         | 73040/1000000 [3:27:24<27:21:36,  9.41it/s]

{'loss': Array(0.13928592, dtype=float32), 'loss_cross_entropy': Array(0.1313543, dtype=float32)}
{'loss_inverse': Array(5.9854833e-06, dtype=float32)}


  7%|▋         | 73050/1000000 [3:27:26<32:00:45,  8.04it/s]

{'loss': Array(0.16566078, dtype=float32), 'loss_cross_entropy': Array(0.15597768, dtype=float32)}
{'loss_inverse': Array(1.7756292e-08, dtype=float32)}


  7%|▋         | 73060/1000000 [3:27:27<26:21:47,  9.77it/s]

{'loss': Array(0.16267364, dtype=float32), 'loss_cross_entropy': Array(0.15337417, dtype=float32)}
{'loss_inverse': Array(5.878697e-08, dtype=float32)}


  7%|▋         | 73070/1000000 [3:27:29<34:31:53,  7.46it/s]

{'loss': Array(0.13097523, dtype=float32), 'loss_cross_entropy': Array(0.12327906, dtype=float32)}
{'loss_inverse': Array(2.444445e-07, dtype=float32)}


  7%|▋         | 73080/1000000 [3:27:31<27:18:53,  9.43it/s]

{'loss': Array(0.13926153, dtype=float32), 'loss_cross_entropy': Array(0.131052, dtype=float32)}
{'loss_inverse': Array(8.010359e-08, dtype=float32)}


  7%|▋         | 73090/1000000 [3:27:32<26:17:22,  9.79it/s]

{'loss': Array(0.1509473, dtype=float32), 'loss_cross_entropy': Array(0.14215693, dtype=float32)}
{'loss_inverse': Array(1.1998854e-07, dtype=float32)}


  7%|▋         | 73100/1000000 [3:27:34<29:48:23,  8.64it/s]

{'loss': Array(0.13940082, dtype=float32), 'loss_cross_entropy': Array(0.13107571, dtype=float32)}
{'loss_inverse': Array(1.09308296e-07, dtype=float32)}


  7%|▋         | 73110/1000000 [3:27:35<26:14:41,  9.81it/s]

{'loss': Array(0.12719406, dtype=float32), 'loss_cross_entropy': Array(0.11967991, dtype=float32)}
{'loss_inverse': Array(3.7708585e-08, dtype=float32)}


  7%|▋         | 73120/1000000 [3:27:37<34:17:05,  7.51it/s]

{'loss': Array(0.1419218, dtype=float32), 'loss_cross_entropy': Array(0.13339041, dtype=float32)}
{'loss_inverse': Array(1.8216596e-07, dtype=float32)}


  7%|▋         | 73130/1000000 [3:27:38<26:20:41,  9.77it/s]

{'loss': Array(0.14655145, dtype=float32), 'loss_cross_entropy': Array(0.13782115, dtype=float32)}
{'loss_inverse': Array(8.772401e-07, dtype=float32)}


  7%|▋         | 73140/1000000 [3:27:40<38:51:30,  6.63it/s]

{'loss': Array(0.15935282, dtype=float32), 'loss_cross_entropy': Array(0.15035345, dtype=float32)}
{'loss_inverse': Array(8.552147e-08, dtype=float32)}


  7%|▋         | 73150/1000000 [3:27:42<28:06:54,  9.16it/s]

{'loss': Array(0.14964807, dtype=float32), 'loss_cross_entropy': Array(0.14076799, dtype=float32)}
{'loss_inverse': Array(3.7874614e-07, dtype=float32)}


  7%|▋         | 73160/1000000 [3:27:43<25:50:51,  9.96it/s]

{'loss': Array(0.14722191, dtype=float32), 'loss_cross_entropy': Array(0.13855788, dtype=float32)}
{'loss_inverse': Array(2.7149267e-07, dtype=float32)}


  7%|▋         | 73170/1000000 [3:27:45<28:34:45,  9.01it/s]

{'loss': Array(0.14065486, dtype=float32), 'loss_cross_entropy': Array(0.13240825, dtype=float32)}
{'loss_inverse': Array(1.6457797e-07, dtype=float32)}


  7%|▋         | 73180/1000000 [3:27:46<25:32:49, 10.08it/s]

{'loss': Array(0.15229909, dtype=float32), 'loss_cross_entropy': Array(0.1433428, dtype=float32)}
{'loss_inverse': Array(4.5012776e-08, dtype=float32)}


  7%|▋         | 73190/1000000 [3:27:48<29:24:11,  8.76it/s]

{'loss': Array(0.14664632, dtype=float32), 'loss_cross_entropy': Array(0.13772333, dtype=float32)}
{'loss_inverse': Array(1.1815657e-06, dtype=float32)}


  7%|▋         | 73200/1000000 [3:27:49<25:54:39,  9.94it/s]

{'loss': Array(0.16508444, dtype=float32), 'loss_cross_entropy': Array(0.15548217, dtype=float32)}
{'loss_inverse': Array(5.7669837e-07, dtype=float32)}


  7%|▋         | 73210/1000000 [3:27:51<31:37:25,  8.14it/s]

{'loss': Array(0.15311177, dtype=float32), 'loss_cross_entropy': Array(0.1447956, dtype=float32)}
{'loss_inverse': Array(1.3205822e-07, dtype=float32)}


  7%|▋         | 73220/1000000 [3:27:52<26:02:43,  9.88it/s]

{'loss': Array(0.1606979, dtype=float32), 'loss_cross_entropy': Array(0.15168504, dtype=float32)}
{'loss_inverse': Array(5.0116535e-07, dtype=float32)}


  7%|▋         | 73230/1000000 [3:27:54<34:31:03,  7.46it/s]

{'loss': Array(0.1662644, dtype=float32), 'loss_cross_entropy': Array(0.1568708, dtype=float32)}
{'loss_inverse': Array(1.2274454e-07, dtype=float32)}


  7%|▋         | 73240/1000000 [3:27:56<27:47:01,  9.27it/s]

{'loss': Array(0.1544205, dtype=float32), 'loss_cross_entropy': Array(0.14618145, dtype=float32)}
{'loss_inverse': Array(8.004545e-07, dtype=float32)}


  7%|▋         | 73250/1000000 [3:27:58<34:55:36,  7.37it/s]

{'loss': Array(0.13377188, dtype=float32), 'loss_cross_entropy': Array(0.1252238, dtype=float32)}
{'loss_inverse': Array(1.24469e-07, dtype=float32)}


  7%|▋         | 73260/1000000 [3:27:59<26:33:36,  9.69it/s]

{'loss': Array(0.15619706, dtype=float32), 'loss_cross_entropy': Array(0.14725623, dtype=float32)}
{'loss_inverse': Array(1.0818426e-07, dtype=float32)}


  7%|▋         | 73270/1000000 [3:28:00<25:33:09, 10.07it/s]

{'loss': Array(0.14474656, dtype=float32), 'loss_cross_entropy': Array(0.1366192, dtype=float32)}
{'loss_inverse': Array(1.4380933e-07, dtype=float32)}


  7%|▋         | 73280/1000000 [3:28:02<31:43:35,  8.11it/s]

{'loss': Array(0.14339577, dtype=float32), 'loss_cross_entropy': Array(0.13533436, dtype=float32)}
{'loss_inverse': Array(7.4282707e-07, dtype=float32)}


  7%|▋         | 73290/1000000 [3:28:04<26:02:35,  9.88it/s]

{'loss': Array(0.14615418, dtype=float32), 'loss_cross_entropy': Array(0.13747981, dtype=float32)}
{'loss_inverse': Array(7.374627e-08, dtype=float32)}


  7%|▋         | 73300/1000000 [3:28:05<37:41:31,  6.83it/s]

{'loss': Array(0.12880784, dtype=float32), 'loss_cross_entropy': Array(0.12120964, dtype=float32)}
{'loss_inverse': Array(9.924272e-08, dtype=float32)}


  7%|▋         | 73310/1000000 [3:28:07<27:56:26,  9.21it/s]

{'loss': Array(0.16697586, dtype=float32), 'loss_cross_entropy': Array(0.15758339, dtype=float32)}
{'loss_inverse': Array(3.133087e-08, dtype=float32)}


  7%|▋         | 73320/1000000 [3:28:08<25:35:13, 10.06it/s]

{'loss': Array(0.14673832, dtype=float32), 'loss_cross_entropy': Array(0.13928011, dtype=float32)}
{'loss_inverse': Array(1.8123237e-07, dtype=float32)}


  7%|▋         | 73330/1000000 [3:28:10<28:25:19,  9.06it/s]

{'loss': Array(0.1449703, dtype=float32), 'loss_cross_entropy': Array(0.13634476, dtype=float32)}
{'loss_inverse': Array(1.2882302e-06, dtype=float32)}


  7%|▋         | 73340/1000000 [3:28:11<25:51:24,  9.96it/s]

{'loss': Array(0.13927852, dtype=float32), 'loss_cross_entropy': Array(0.13130158, dtype=float32)}
{'loss_inverse': Array(9.142453e-08, dtype=float32)}


  7%|▋         | 73350/1000000 [3:28:13<28:18:02,  9.10it/s]

{'loss': Array(0.13728027, dtype=float32), 'loss_cross_entropy': Array(0.12951414, dtype=float32)}
{'loss_inverse': Array(1.2661003e-05, dtype=float32)}


  7%|▋         | 73360/1000000 [3:28:14<26:10:57,  9.83it/s]

{'loss': Array(0.13376847, dtype=float32), 'loss_cross_entropy': Array(0.12595525, dtype=float32)}
{'loss_inverse': Array(3.4138837e-05, dtype=float32)}


  7%|▋         | 73370/1000000 [3:28:16<31:42:10,  8.12it/s]

{'loss': Array(0.15165904, dtype=float32), 'loss_cross_entropy': Array(0.1436076, dtype=float32)}
{'loss_inverse': Array(6.622317e-08, dtype=float32)}


  7%|▋         | 73380/1000000 [3:28:18<26:48:43,  9.60it/s]

{'loss': Array(0.16102894, dtype=float32), 'loss_cross_entropy': Array(0.1517885, dtype=float32)}
{'loss_inverse': Array(0.00013201, dtype=float32)}


  7%|▋         | 73390/1000000 [3:28:19<32:12:08,  7.99it/s]

{'loss': Array(0.14544296, dtype=float32), 'loss_cross_entropy': Array(0.13729565, dtype=float32)}
{'loss_inverse': Array(2.2604263e-06, dtype=float32)}


  7%|▋         | 73400/1000000 [3:28:21<26:30:52,  9.71it/s]

{'loss': Array(0.15385185, dtype=float32), 'loss_cross_entropy': Array(0.14528318, dtype=float32)}
{'loss_inverse': Array(1.313038e-05, dtype=float32)}


  7%|▋         | 73410/1000000 [3:28:23<38:28:07,  6.69it/s]

{'loss': Array(0.14127015, dtype=float32), 'loss_cross_entropy': Array(0.13312729, dtype=float32)}
{'loss_inverse': Array(3.9088285e-05, dtype=float32)}


  7%|▋         | 73420/1000000 [3:28:24<27:33:56,  9.34it/s]

{'loss': Array(0.13202207, dtype=float32), 'loss_cross_entropy': Array(0.12372245, dtype=float32)}
{'loss_inverse': Array(4.128081e-05, dtype=float32)}


  7%|▋         | 73430/1000000 [3:28:25<25:16:39, 10.18it/s]

{'loss': Array(0.15215002, dtype=float32), 'loss_cross_entropy': Array(0.14309663, dtype=float32)}
{'loss_inverse': Array(2.2844463e-06, dtype=float32)}


  7%|▋         | 73440/1000000 [3:28:27<28:07:11,  9.15it/s]

{'loss': Array(0.13910866, dtype=float32), 'loss_cross_entropy': Array(0.13086568, dtype=float32)}
{'loss_inverse': Array(5.6837084e-06, dtype=float32)}


  7%|▋         | 73450/1000000 [3:28:29<26:57:47,  9.55it/s]

{'loss': Array(0.14584151, dtype=float32), 'loss_cross_entropy': Array(0.13730676, dtype=float32)}
{'loss_inverse': Array(5.7841466e-05, dtype=float32)}


  7%|▋         | 73460/1000000 [3:28:30<31:20:38,  8.21it/s]

{'loss': Array(0.14724103, dtype=float32), 'loss_cross_entropy': Array(0.13872658, dtype=float32)}
{'loss_inverse': Array(3.9394686e-07, dtype=float32)}


  7%|▋         | 73470/1000000 [3:28:32<26:31:38,  9.70it/s]

{'loss': Array(0.16260634, dtype=float32), 'loss_cross_entropy': Array(0.15323476, dtype=float32)}
{'loss_inverse': Array(5.577113e-05, dtype=float32)}


  7%|▋         | 73480/1000000 [3:28:33<25:37:01, 10.05it/s]

{'loss': Array(0.1762247, dtype=float32), 'loss_cross_entropy': Array(0.16634324, dtype=float32)}
{'loss_inverse': Array(3.324373e-06, dtype=float32)}


  7%|▋         | 73490/1000000 [3:28:35<29:36:58,  8.69it/s]

{'loss': Array(0.136959, dtype=float32), 'loss_cross_entropy': Array(0.12952065, dtype=float32)}
{'loss_inverse': Array(2.9748542e-06, dtype=float32)}


  7%|▋         | 73500/1000000 [3:28:36<26:01:54,  9.89it/s]

{'loss': Array(0.1433786, dtype=float32), 'loss_cross_entropy': Array(0.13512439, dtype=float32)}
{'loss_inverse': Array(0.0002912, dtype=float32)}


  7%|▋         | 73510/1000000 [3:28:45<95:50:43,  2.69it/s] 

{'loss': Array(0.15185183, dtype=float32), 'loss_cross_entropy': Array(0.14331701, dtype=float32)}
{'loss_inverse': Array(1.2278654e-05, dtype=float32)}


  7%|▋         | 73520/1000000 [3:28:46<36:39:42,  7.02it/s] 

{'loss': Array(0.15225057, dtype=float32), 'loss_cross_entropy': Array(0.14386982, dtype=float32)}
{'loss_inverse': Array(6.573018e-05, dtype=float32)}


  7%|▋         | 73530/1000000 [3:28:48<39:49:10,  6.46it/s]

{'loss': Array(0.152839, dtype=float32), 'loss_cross_entropy': Array(0.14420594, dtype=float32)}
{'loss_inverse': Array(1.7077739e-06, dtype=float32)}


  7%|▋         | 73540/1000000 [3:28:49<28:05:09,  9.16it/s]

{'loss': Array(0.16555671, dtype=float32), 'loss_cross_entropy': Array(0.15687391, dtype=float32)}
{'loss_inverse': Array(4.3103755e-06, dtype=float32)}


  7%|▋         | 73550/1000000 [3:28:51<26:06:29,  9.86it/s]

{'loss': Array(0.15894525, dtype=float32), 'loss_cross_entropy': Array(0.14949362, dtype=float32)}
{'loss_inverse': Array(0.00034075, dtype=float32)}


  7%|▋         | 73560/1000000 [3:28:53<29:19:20,  8.78it/s]

{'loss': Array(0.16630118, dtype=float32), 'loss_cross_entropy': Array(0.15700936, dtype=float32)}
{'loss_inverse': Array(0.00014009, dtype=float32)}


  7%|▋         | 73570/1000000 [3:28:54<38:41:37,  6.65it/s]

{'loss': Array(0.1608693, dtype=float32), 'loss_cross_entropy': Array(0.15244852, dtype=float32)}
{'loss_inverse': Array(7.419527e-05, dtype=float32)}


  7%|▋         | 73580/1000000 [3:28:56<27:24:45,  9.39it/s]

{'loss': Array(0.12970798, dtype=float32), 'loss_cross_entropy': Array(0.12199128, dtype=float32)}
{'loss_inverse': Array(0.00022972, dtype=float32)}


  7%|▋         | 73590/1000000 [3:28:57<25:27:37, 10.11it/s]

{'loss': Array(0.14774263, dtype=float32), 'loss_cross_entropy': Array(0.13890675, dtype=float32)}
{'loss_inverse': Array(9.438408e-06, dtype=float32)}


  7%|▋         | 73600/1000000 [3:28:59<28:20:14,  9.08it/s]

{'loss': Array(0.15490144, dtype=float32), 'loss_cross_entropy': Array(0.14594446, dtype=float32)}
{'loss_inverse': Array(0.00038742, dtype=float32)}


  7%|▋         | 73610/1000000 [3:29:00<25:51:59,  9.95it/s]

{'loss': Array(0.15658545, dtype=float32), 'loss_cross_entropy': Array(0.14748079, dtype=float32)}
{'loss_inverse': Array(0.00018616, dtype=float32)}


  7%|▋         | 73620/1000000 [3:29:02<29:36:49,  8.69it/s]

{'loss': Array(0.13570303, dtype=float32), 'loss_cross_entropy': Array(0.12760976, dtype=float32)}
{'loss_inverse': Array(9.2317954e-05, dtype=float32)}


  7%|▋         | 73630/1000000 [3:29:04<25:53:09,  9.94it/s]

{'loss': Array(0.16227518, dtype=float32), 'loss_cross_entropy': Array(0.15303992, dtype=float32)}
{'loss_inverse': Array(7.859815e-05, dtype=float32)}


  7%|▋         | 73640/1000000 [3:29:05<31:34:18,  8.15it/s]

{'loss': Array(0.14794806, dtype=float32), 'loss_cross_entropy': Array(0.13977225, dtype=float32)}
{'loss_inverse': Array(0.00015066, dtype=float32)}


  7%|▋         | 73650/1000000 [3:29:07<27:28:56,  9.36it/s]

{'loss': Array(0.15149875, dtype=float32), 'loss_cross_entropy': Array(0.14275853, dtype=float32)}
{'loss_inverse': Array(0.00020166, dtype=float32)}


  7%|▋         | 73660/1000000 [3:29:08<25:47:46,  9.98it/s]

{'loss': Array(0.14526664, dtype=float32), 'loss_cross_entropy': Array(0.13666776, dtype=float32)}
{'loss_inverse': Array(5.911299e-05, dtype=float32)}


  7%|▋         | 73670/1000000 [3:29:10<29:23:06,  8.76it/s]

{'loss': Array(0.1518397, dtype=float32), 'loss_cross_entropy': Array(0.1435346, dtype=float32)}
{'loss_inverse': Array(8.901159e-05, dtype=float32)}


  7%|▋         | 73680/1000000 [3:29:11<25:52:36,  9.94it/s]

{'loss': Array(0.14840879, dtype=float32), 'loss_cross_entropy': Array(0.13964845, dtype=float32)}
{'loss_inverse': Array(0.0002027, dtype=float32)}


  7%|▋         | 73690/1000000 [3:29:13<31:42:23,  8.12it/s]

{'loss': Array(0.14453448, dtype=float32), 'loss_cross_entropy': Array(0.13612647, dtype=float32)}
{'loss_inverse': Array(0.00067728, dtype=float32)}


  7%|▋         | 73700/1000000 [3:29:15<26:46:05,  9.61it/s]

{'loss': Array(0.15561491, dtype=float32), 'loss_cross_entropy': Array(0.14738184, dtype=float32)}
{'loss_inverse': Array(0.00012324, dtype=float32)}


  7%|▋         | 73710/1000000 [3:29:16<34:11:50,  7.52it/s]

{'loss': Array(0.13608655, dtype=float32), 'loss_cross_entropy': Array(0.12814133, dtype=float32)}
{'loss_inverse': Array(6.797047e-05, dtype=float32)}


  7%|▋         | 73720/1000000 [3:29:18<26:31:30,  9.70it/s]

{'loss': Array(0.14627613, dtype=float32), 'loss_cross_entropy': Array(0.13736372, dtype=float32)}
{'loss_inverse': Array(0.00015294, dtype=float32)}


  7%|▋         | 73730/1000000 [3:29:19<25:17:42, 10.17it/s]

{'loss': Array(0.14385544, dtype=float32), 'loss_cross_entropy': Array(0.13564067, dtype=float32)}
{'loss_inverse': Array(4.8935886e-05, dtype=float32)}


  7%|▋         | 73740/1000000 [3:29:21<27:55:51,  9.21it/s]

{'loss': Array(0.15585437, dtype=float32), 'loss_cross_entropy': Array(0.14674453, dtype=float32)}
{'loss_inverse': Array(0.00044847, dtype=float32)}


  7%|▋         | 73750/1000000 [3:29:22<25:30:18, 10.09it/s]

{'loss': Array(0.15836863, dtype=float32), 'loss_cross_entropy': Array(0.14945428, dtype=float32)}
{'loss_inverse': Array(0.00039903, dtype=float32)}


  7%|▋         | 73760/1000000 [3:29:24<33:46:26,  7.62it/s]

{'loss': Array(0.15488619, dtype=float32), 'loss_cross_entropy': Array(0.1457587, dtype=float32)}
{'loss_inverse': Array(0.00029655, dtype=float32)}


  7%|▋         | 73770/1000000 [3:29:25<26:55:07,  9.56it/s]

{'loss': Array(0.15247375, dtype=float32), 'loss_cross_entropy': Array(0.1439063, dtype=float32)}
{'loss_inverse': Array(0.00013562, dtype=float32)}


  7%|▋         | 73780/1000000 [3:29:27<25:14:57, 10.19it/s]

{'loss': Array(0.17636445, dtype=float32), 'loss_cross_entropy': Array(0.16632529, dtype=float32)}
{'loss_inverse': Array(0.00028376, dtype=float32)}


  7%|▋         | 73790/1000000 [3:29:29<28:25:53,  9.05it/s]

{'loss': Array(0.13452017, dtype=float32), 'loss_cross_entropy': Array(0.12673555, dtype=float32)}
{'loss_inverse': Array(2.8746575e-05, dtype=float32)}


  7%|▋         | 73800/1000000 [3:29:30<25:29:45, 10.09it/s]

{'loss': Array(0.1308103, dtype=float32), 'loss_cross_entropy': Array(0.12286794, dtype=float32)}
{'loss_inverse': Array(0.00032459, dtype=float32)}


  7%|▋         | 73810/1000000 [3:29:32<30:01:16,  8.57it/s]

{'loss': Array(0.15177189, dtype=float32), 'loss_cross_entropy': Array(0.14282022, dtype=float32)}
{'loss_inverse': Array(2.6559079e-05, dtype=float32)}


  7%|▋         | 73820/1000000 [3:29:33<26:15:27,  9.80it/s]

{'loss': Array(0.15048003, dtype=float32), 'loss_cross_entropy': Array(0.14199667, dtype=float32)}
{'loss_inverse': Array(3.1815533e-05, dtype=float32)}


  7%|▋         | 73830/1000000 [3:29:35<31:46:03,  8.10it/s]

{'loss': Array(0.16739635, dtype=float32), 'loss_cross_entropy': Array(0.15741515, dtype=float32)}
{'loss_inverse': Array(1.8793764e-05, dtype=float32)}


  7%|▋         | 73840/1000000 [3:29:36<26:19:27,  9.77it/s]

{'loss': Array(0.16167289, dtype=float32), 'loss_cross_entropy': Array(0.15261112, dtype=float32)}
{'loss_inverse': Array(0.0003758, dtype=float32)}


  7%|▋         | 73850/1000000 [3:29:38<25:17:46, 10.17it/s]

{'loss': Array(0.1448455, dtype=float32), 'loss_cross_entropy': Array(0.13630751, dtype=float32)}
{'loss_inverse': Array(0.00019901, dtype=float32)}


  7%|▋         | 73860/1000000 [3:29:40<30:15:19,  8.50it/s]

{'loss': Array(0.14592966, dtype=float32), 'loss_cross_entropy': Array(0.13684164, dtype=float32)}
{'loss_inverse': Array(0.00028563, dtype=float32)}


  7%|▋         | 73870/1000000 [3:29:41<25:46:24,  9.98it/s]

{'loss': Array(0.14600769, dtype=float32), 'loss_cross_entropy': Array(0.13739501, dtype=float32)}
{'loss_inverse': Array(8.936281e-06, dtype=float32)}


  7%|▋         | 73880/1000000 [3:29:43<34:17:03,  7.50it/s]

{'loss': Array(0.16788904, dtype=float32), 'loss_cross_entropy': Array(0.15842198, dtype=float32)}
{'loss_inverse': Array(0.00015292, dtype=float32)}


  7%|▋         | 73890/1000000 [3:29:44<26:45:48,  9.61it/s]

{'loss': Array(0.15318614, dtype=float32), 'loss_cross_entropy': Array(0.14419036, dtype=float32)}
{'loss_inverse': Array(0.00033988, dtype=float32)}


  7%|▋         | 73900/1000000 [3:29:46<38:24:43,  6.70it/s]

{'loss': Array(0.1547072, dtype=float32), 'loss_cross_entropy': Array(0.1460772, dtype=float32)}
{'loss_inverse': Array(5.9134705e-05, dtype=float32)}


  7%|▋         | 73910/1000000 [3:29:47<27:30:17,  9.35it/s]

{'loss': Array(0.1445517, dtype=float32), 'loss_cross_entropy': Array(0.13515268, dtype=float32)}
{'loss_inverse': Array(0.00017477, dtype=float32)}


  7%|▋         | 73920/1000000 [3:29:49<38:46:35,  6.63it/s]

{'loss': Array(0.13965791, dtype=float32), 'loss_cross_entropy': Array(0.13092111, dtype=float32)}
{'loss_inverse': Array(9.973835e-06, dtype=float32)}


  7%|▋         | 73930/1000000 [3:29:51<27:01:23,  9.52it/s]

{'loss': Array(0.16791493, dtype=float32), 'loss_cross_entropy': Array(0.15923314, dtype=float32)}
{'loss_inverse': Array(5.3906155e-05, dtype=float32)}


  7%|▋         | 73940/1000000 [3:29:52<25:10:34, 10.22it/s]

{'loss': Array(0.14347373, dtype=float32), 'loss_cross_entropy': Array(0.13539241, dtype=float32)}
{'loss_inverse': Array(1.0390264e-05, dtype=float32)}


  7%|▋         | 73950/1000000 [3:29:54<31:22:05,  8.20it/s]

{'loss': Array(0.15716949, dtype=float32), 'loss_cross_entropy': Array(0.14758977, dtype=float32)}
{'loss_inverse': Array(0.00012366, dtype=float32)}


  7%|▋         | 73960/1000000 [3:29:55<26:12:42,  9.81it/s]

{'loss': Array(0.12814811, dtype=float32), 'loss_cross_entropy': Array(0.1197014, dtype=float32)}
{'loss_inverse': Array(0.0001214, dtype=float32)}


  7%|▋         | 73970/1000000 [3:29:57<39:26:07,  6.52it/s]

{'loss': Array(0.1664405, dtype=float32), 'loss_cross_entropy': Array(0.15660131, dtype=float32)}
{'loss_inverse': Array(7.4424715e-06, dtype=float32)}


  7%|▋         | 73980/1000000 [3:29:58<27:50:30,  9.24it/s]

{'loss': Array(0.14756966, dtype=float32), 'loss_cross_entropy': Array(0.13917105, dtype=float32)}
{'loss_inverse': Array(9.906876e-05, dtype=float32)}


  7%|▋         | 73990/1000000 [3:30:00<38:56:36,  6.61it/s]

{'loss': Array(0.1802306, dtype=float32), 'loss_cross_entropy': Array(0.17052883, dtype=float32)}
{'loss_inverse': Array(0.00012693, dtype=float32)}


  7%|▋         | 74000/1000000 [3:30:02<28:20:35,  9.08it/s]

{'loss': Array(0.15045789, dtype=float32), 'loss_cross_entropy': Array(0.14217003, dtype=float32)}
{'loss_inverse': Array(3.208618e-05, dtype=float32)}


  7%|▋         | 74010/1000000 [3:30:10<89:39:55,  2.87it/s] 

{'loss': Array(0.13622765, dtype=float32), 'loss_cross_entropy': Array(0.1279449, dtype=float32)}
{'loss_inverse': Array(4.60505e-05, dtype=float32)}


  7%|▋         | 74020/1000000 [3:30:12<45:14:17,  5.69it/s]

{'loss': Array(0.1532989, dtype=float32), 'loss_cross_entropy': Array(0.14457332, dtype=float32)}
{'loss_inverse': Array(2.185472e-06, dtype=float32)}


  7%|▋         | 74030/1000000 [3:30:13<28:32:56,  9.01it/s]

{'loss': Array(0.129547, dtype=float32), 'loss_cross_entropy': Array(0.12192192, dtype=float32)}
{'loss_inverse': Array(2.229916e-06, dtype=float32)}


  7%|▋         | 74040/1000000 [3:30:15<26:05:51,  9.86it/s]

{'loss': Array(0.15567939, dtype=float32), 'loss_cross_entropy': Array(0.1466432, dtype=float32)}
{'loss_inverse': Array(9.549858e-06, dtype=float32)}


  7%|▋         | 74050/1000000 [3:30:16<28:40:45,  8.97it/s]

{'loss': Array(0.13833019, dtype=float32), 'loss_cross_entropy': Array(0.13056415, dtype=float32)}
{'loss_inverse': Array(9.2179025e-06, dtype=float32)}


  7%|▋         | 74060/1000000 [3:30:18<25:28:34, 10.10it/s]

{'loss': Array(0.13865341, dtype=float32), 'loss_cross_entropy': Array(0.12991397, dtype=float32)}
{'loss_inverse': Array(3.4994733e-05, dtype=float32)}


  7%|▋         | 74070/1000000 [3:30:19<29:15:08,  8.79it/s]

{'loss': Array(0.13829754, dtype=float32), 'loss_cross_entropy': Array(0.13054466, dtype=float32)}
{'loss_inverse': Array(0.00010074, dtype=float32)}


  7%|▋         | 74080/1000000 [3:30:21<25:56:24,  9.92it/s]

{'loss': Array(0.15191655, dtype=float32), 'loss_cross_entropy': Array(0.14311934, dtype=float32)}
{'loss_inverse': Array(4.874838e-05, dtype=float32)}


  7%|▋         | 74090/1000000 [3:30:23<31:58:25,  8.04it/s]

{'loss': Array(0.13647996, dtype=float32), 'loss_cross_entropy': Array(0.12852572, dtype=float32)}
{'loss_inverse': Array(0.00011619, dtype=float32)}


  7%|▋         | 74100/1000000 [3:30:24<26:03:12,  9.87it/s]

{'loss': Array(0.16178839, dtype=float32), 'loss_cross_entropy': Array(0.15296857, dtype=float32)}
{'loss_inverse': Array(1.0832768e-05, dtype=float32)}


  7%|▋         | 74110/1000000 [3:30:25<25:28:20, 10.10it/s]

{'loss': Array(0.14688486, dtype=float32), 'loss_cross_entropy': Array(0.13844171, dtype=float32)}
{'loss_inverse': Array(3.0745938e-05, dtype=float32)}


  7%|▋         | 74120/1000000 [3:30:27<28:08:49,  9.14it/s]

{'loss': Array(0.15203856, dtype=float32), 'loss_cross_entropy': Array(0.14349575, dtype=float32)}
{'loss_inverse': Array(1.9773663e-05, dtype=float32)}


  7%|▋         | 74130/1000000 [3:30:29<25:47:32,  9.97it/s]

{'loss': Array(0.12122077, dtype=float32), 'loss_cross_entropy': Array(0.11374644, dtype=float32)}
{'loss_inverse': Array(0.00011914, dtype=float32)}


  7%|▋         | 74140/1000000 [3:30:30<29:39:18,  8.67it/s]

{'loss': Array(0.1490349, dtype=float32), 'loss_cross_entropy': Array(0.14074296, dtype=float32)}
{'loss_inverse': Array(2.8893285e-06, dtype=float32)}


  7%|▋         | 74150/1000000 [3:30:32<26:28:58,  9.71it/s]

{'loss': Array(0.15045848, dtype=float32), 'loss_cross_entropy': Array(0.14186399, dtype=float32)}
{'loss_inverse': Array(6.7707406e-05, dtype=float32)}


  7%|▋         | 74160/1000000 [3:30:34<32:34:21,  7.90it/s]

{'loss': Array(0.15600964, dtype=float32), 'loss_cross_entropy': Array(0.14716874, dtype=float32)}
{'loss_inverse': Array(1.4354979e-05, dtype=float32)}


  7%|▋         | 74170/1000000 [3:30:35<26:52:33,  9.57it/s]

{'loss': Array(0.15142633, dtype=float32), 'loss_cross_entropy': Array(0.14253668, dtype=float32)}
{'loss_inverse': Array(4.7561472e-05, dtype=float32)}


  7%|▋         | 74180/1000000 [3:30:37<25:19:46, 10.15it/s]

{'loss': Array(0.14190525, dtype=float32), 'loss_cross_entropy': Array(0.1334879, dtype=float32)}
{'loss_inverse': Array(7.955232e-07, dtype=float32)}


  7%|▋         | 74190/1000000 [3:30:38<29:27:04,  8.73it/s]

{'loss': Array(0.14909811, dtype=float32), 'loss_cross_entropy': Array(0.14064378, dtype=float32)}
{'loss_inverse': Array(8.982924e-05, dtype=float32)}


  7%|▋         | 74200/1000000 [3:30:40<26:41:01,  9.64it/s]

{'loss': Array(0.14161175, dtype=float32), 'loss_cross_entropy': Array(0.13297516, dtype=float32)}
{'loss_inverse': Array(1.6235264e-05, dtype=float32)}


  7%|▋         | 74210/1000000 [3:30:42<32:32:44,  7.90it/s]

{'loss': Array(0.1662916, dtype=float32), 'loss_cross_entropy': Array(0.15709785, dtype=float32)}
{'loss_inverse': Array(2.5731342e-07, dtype=float32)}


  7%|▋         | 74220/1000000 [3:30:43<26:25:40,  9.73it/s]

{'loss': Array(0.15158507, dtype=float32), 'loss_cross_entropy': Array(0.14337586, dtype=float32)}
{'loss_inverse': Array(1.9632043e-05, dtype=float32)}


  7%|▋         | 74230/1000000 [3:30:45<34:20:05,  7.49it/s]

{'loss': Array(0.14327013, dtype=float32), 'loss_cross_entropy': Array(0.13529904, dtype=float32)}
{'loss_inverse': Array(1.6968557e-05, dtype=float32)}


  7%|▋         | 74240/1000000 [3:30:46<26:40:31,  9.64it/s]

{'loss': Array(0.14203338, dtype=float32), 'loss_cross_entropy': Array(0.13390632, dtype=float32)}
{'loss_inverse': Array(3.3447188e-05, dtype=float32)}


  7%|▋         | 74250/1000000 [3:30:48<25:24:33, 10.12it/s]

{'loss': Array(0.13034248, dtype=float32), 'loss_cross_entropy': Array(0.12272467, dtype=float32)}
{'loss_inverse': Array(5.3622836e-07, dtype=float32)}


  7%|▋         | 74260/1000000 [3:30:49<27:55:46,  9.21it/s]

{'loss': Array(0.15579455, dtype=float32), 'loss_cross_entropy': Array(0.1472663, dtype=float32)}
{'loss_inverse': Array(8.0659066e-07, dtype=float32)}


  7%|▋         | 74270/1000000 [3:30:51<25:43:45,  9.99it/s]

{'loss': Array(0.14548202, dtype=float32), 'loss_cross_entropy': Array(0.1365778, dtype=float32)}
{'loss_inverse': Array(1.2047923e-06, dtype=float32)}


  7%|▋         | 74280/1000000 [3:30:52<34:17:14,  7.50it/s]

{'loss': Array(0.16665165, dtype=float32), 'loss_cross_entropy': Array(0.15778868, dtype=float32)}
{'loss_inverse': Array(0.00013077, dtype=float32)}


  7%|▋         | 74290/1000000 [3:30:54<27:18:36,  9.42it/s]

{'loss': Array(0.14452563, dtype=float32), 'loss_cross_entropy': Array(0.13530447, dtype=float32)}
{'loss_inverse': Array(0.00031949, dtype=float32)}


  7%|▋         | 74300/1000000 [3:30:55<26:00:34,  9.89it/s]

{'loss': Array(0.14180487, dtype=float32), 'loss_cross_entropy': Array(0.13337232, dtype=float32)}
{'loss_inverse': Array(0.0001072, dtype=float32)}


  7%|▋         | 74310/1000000 [3:30:57<28:49:15,  8.92it/s]

{'loss': Array(0.15760599, dtype=float32), 'loss_cross_entropy': Array(0.14928393, dtype=float32)}
{'loss_inverse': Array(5.083501e-05, dtype=float32)}


  7%|▋         | 74320/1000000 [3:30:59<26:31:22,  9.69it/s]

{'loss': Array(0.15261516, dtype=float32), 'loss_cross_entropy': Array(0.1439806, dtype=float32)}
{'loss_inverse': Array(5.1733656e-05, dtype=float32)}


  7%|▋         | 74330/1000000 [3:31:00<30:08:14,  8.53it/s]

{'loss': Array(0.14647569, dtype=float32), 'loss_cross_entropy': Array(0.13812467, dtype=float32)}
{'loss_inverse': Array(2.7125236e-05, dtype=float32)}


  7%|▋         | 74340/1000000 [3:31:02<25:48:26,  9.96it/s]

{'loss': Array(0.14927374, dtype=float32), 'loss_cross_entropy': Array(0.14136629, dtype=float32)}
{'loss_inverse': Array(5.4147906e-05, dtype=float32)}


  7%|▋         | 74350/1000000 [3:31:03<31:17:50,  8.22it/s]

{'loss': Array(0.14382212, dtype=float32), 'loss_cross_entropy': Array(0.1355935, dtype=float32)}
{'loss_inverse': Array(2.1075048e-05, dtype=float32)}


  7%|▋         | 74360/1000000 [3:31:05<26:18:03,  9.78it/s]

{'loss': Array(0.14537202, dtype=float32), 'loss_cross_entropy': Array(0.13742678, dtype=float32)}
{'loss_inverse': Array(3.9874863e-05, dtype=float32)}


  7%|▋         | 74370/1000000 [3:31:06<26:04:34,  9.86it/s]

{'loss': Array(0.1422661, dtype=float32), 'loss_cross_entropy': Array(0.13403697, dtype=float32)}
{'loss_inverse': Array(0.0002197, dtype=float32)}


  7%|▋         | 74380/1000000 [3:31:08<30:19:37,  8.48it/s]

{'loss': Array(0.14678165, dtype=float32), 'loss_cross_entropy': Array(0.13867503, dtype=float32)}
{'loss_inverse': Array(3.7409856e-05, dtype=float32)}


  7%|▋         | 74390/1000000 [3:31:10<26:03:17,  9.87it/s]

{'loss': Array(0.15180019, dtype=float32), 'loss_cross_entropy': Array(0.14309946, dtype=float32)}
{'loss_inverse': Array(1.7369144e-06, dtype=float32)}


  7%|▋         | 74400/1000000 [3:31:11<34:31:01,  7.45it/s]

{'loss': Array(0.14647308, dtype=float32), 'loss_cross_entropy': Array(0.1381307, dtype=float32)}
{'loss_inverse': Array(0.00020102, dtype=float32)}


  7%|▋         | 74410/1000000 [3:31:13<26:36:30,  9.66it/s]

{'loss': Array(0.14372309, dtype=float32), 'loss_cross_entropy': Array(0.13624814, dtype=float32)}
{'loss_inverse': Array(1.6078128e-06, dtype=float32)}


  7%|▋         | 74420/1000000 [3:31:14<37:52:53,  6.79it/s]

{'loss': Array(0.15151392, dtype=float32), 'loss_cross_entropy': Array(0.14305909, dtype=float32)}
{'loss_inverse': Array(1.3313743e-05, dtype=float32)}


  7%|▋         | 74430/1000000 [3:31:16<27:15:07,  9.43it/s]

{'loss': Array(0.1306277, dtype=float32), 'loss_cross_entropy': Array(0.12263571, dtype=float32)}
{'loss_inverse': Array(3.287614e-07, dtype=float32)}


  7%|▋         | 74440/1000000 [3:31:18<39:09:39,  6.57it/s]

{'loss': Array(0.12802082, dtype=float32), 'loss_cross_entropy': Array(0.12024989, dtype=float32)}
{'loss_inverse': Array(4.7381823e-06, dtype=float32)}


  7%|▋         | 74450/1000000 [3:31:19<27:33:57,  9.33it/s]

{'loss': Array(0.13880669, dtype=float32), 'loss_cross_entropy': Array(0.1303724, dtype=float32)}
{'loss_inverse': Array(1.631675e-06, dtype=float32)}


  7%|▋         | 74460/1000000 [3:31:21<27:04:42,  9.49it/s]

{'loss': Array(0.15457234, dtype=float32), 'loss_cross_entropy': Array(0.14582513, dtype=float32)}
{'loss_inverse': Array(2.027419e-05, dtype=float32)}


  7%|▋         | 74470/1000000 [3:31:22<31:39:51,  8.12it/s]

{'loss': Array(0.15269797, dtype=float32), 'loss_cross_entropy': Array(0.14401029, dtype=float32)}
{'loss_inverse': Array(3.356208e-06, dtype=float32)}


  7%|▋         | 74480/1000000 [3:31:24<26:14:49,  9.79it/s]

{'loss': Array(0.14595471, dtype=float32), 'loss_cross_entropy': Array(0.13811061, dtype=float32)}
{'loss_inverse': Array(5.6471043e-07, dtype=float32)}


  7%|▋         | 74490/1000000 [3:31:25<37:56:17,  6.78it/s]

{'loss': Array(0.1522808, dtype=float32), 'loss_cross_entropy': Array(0.14341123, dtype=float32)}
{'loss_inverse': Array(3.04032e-07, dtype=float32)}


  7%|▋         | 74500/1000000 [3:31:27<27:03:50,  9.50it/s]

{'loss': Array(0.14915885, dtype=float32), 'loss_cross_entropy': Array(0.14119916, dtype=float32)}
{'loss_inverse': Array(2.6559994e-07, dtype=float32)}


  7%|▋         | 74510/1000000 [3:31:35<99:57:25,  2.57it/s] 

{'loss': Array(0.15755676, dtype=float32), 'loss_cross_entropy': Array(0.14867301, dtype=float32)}
{'loss_inverse': Array(2.2488057e-07, dtype=float32)}


  7%|▋         | 74520/1000000 [3:31:37<38:11:22,  6.73it/s] 

{'loss': Array(0.15645878, dtype=float32), 'loss_cross_entropy': Array(0.14753163, dtype=float32)}
{'loss_inverse': Array(1.9226929e-05, dtype=float32)}


  7%|▋         | 74530/1000000 [3:31:38<27:16:11,  9.43it/s]

{'loss': Array(0.14668941, dtype=float32), 'loss_cross_entropy': Array(0.13795169, dtype=float32)}
{'loss_inverse': Array(5.626462e-05, dtype=float32)}


  7%|▋         | 74540/1000000 [3:31:40<29:43:01,  8.65it/s]

{'loss': Array(0.14050038, dtype=float32), 'loss_cross_entropy': Array(0.13174872, dtype=float32)}
{'loss_inverse': Array(1.6434968e-06, dtype=float32)}


  7%|▋         | 74550/1000000 [3:31:41<26:24:03,  9.74it/s]

{'loss': Array(0.13600992, dtype=float32), 'loss_cross_entropy': Array(0.12813371, dtype=float32)}
{'loss_inverse': Array(5.0277927e-06, dtype=float32)}


  7%|▋         | 74560/1000000 [3:31:43<38:42:51,  6.64it/s]

{'loss': Array(0.1404334, dtype=float32), 'loss_cross_entropy': Array(0.13214351, dtype=float32)}
{'loss_inverse': Array(2.5745892e-06, dtype=float32)}


  7%|▋         | 74570/1000000 [3:31:45<27:41:13,  9.28it/s]

{'loss': Array(0.17027402, dtype=float32), 'loss_cross_entropy': Array(0.16097108, dtype=float32)}
{'loss_inverse': Array(1.9711572e-06, dtype=float32)}


  7%|▋         | 74580/1000000 [3:31:46<25:33:45, 10.06it/s]

{'loss': Array(0.13490012, dtype=float32), 'loss_cross_entropy': Array(0.12706657, dtype=float32)}
{'loss_inverse': Array(1.3485997e-07, dtype=float32)}


  7%|▋         | 74590/1000000 [3:31:48<30:07:52,  8.53it/s]

{'loss': Array(0.13285759, dtype=float32), 'loss_cross_entropy': Array(0.12501965, dtype=float32)}
{'loss_inverse': Array(4.719741e-07, dtype=float32)}


  7%|▋         | 74600/1000000 [3:31:49<26:05:03,  9.85it/s]

{'loss': Array(0.15236665, dtype=float32), 'loss_cross_entropy': Array(0.14353172, dtype=float32)}
{'loss_inverse': Array(2.5080635e-06, dtype=float32)}


  7%|▋         | 74610/1000000 [3:31:51<29:46:55,  8.63it/s]

{'loss': Array(0.14542817, dtype=float32), 'loss_cross_entropy': Array(0.1371636, dtype=float32)}
{'loss_inverse': Array(4.3750512e-07, dtype=float32)}


  7%|▋         | 74620/1000000 [3:31:52<25:34:45, 10.05it/s]

{'loss': Array(0.1448486, dtype=float32), 'loss_cross_entropy': Array(0.1361171, dtype=float32)}
{'loss_inverse': Array(6.2920444e-05, dtype=float32)}


  7%|▋         | 74630/1000000 [3:31:54<31:51:25,  8.07it/s]

{'loss': Array(0.14289032, dtype=float32), 'loss_cross_entropy': Array(0.13568881, dtype=float32)}
{'loss_inverse': Array(2.6778525e-06, dtype=float32)}


  7%|▋         | 74640/1000000 [3:31:56<26:51:52,  9.57it/s]

{'loss': Array(0.14970855, dtype=float32), 'loss_cross_entropy': Array(0.14090985, dtype=float32)}
{'loss_inverse': Array(6.3030275e-05, dtype=float32)}


  7%|▋         | 74650/1000000 [3:31:57<25:33:32, 10.06it/s]

{'loss': Array(0.15182981, dtype=float32), 'loss_cross_entropy': Array(0.14298107, dtype=float32)}
{'loss_inverse': Array(3.3806147e-05, dtype=float32)}


  7%|▋         | 74660/1000000 [3:31:59<28:42:46,  8.95it/s]

{'loss': Array(0.15377605, dtype=float32), 'loss_cross_entropy': Array(0.1449214, dtype=float32)}
{'loss_inverse': Array(4.539065e-05, dtype=float32)}


  7%|▋         | 74670/1000000 [3:32:00<25:47:03,  9.97it/s]

{'loss': Array(0.15431742, dtype=float32), 'loss_cross_entropy': Array(0.14551464, dtype=float32)}
{'loss_inverse': Array(1.9100913e-05, dtype=float32)}


  7%|▋         | 74680/1000000 [3:32:02<30:01:48,  8.56it/s]

{'loss': Array(0.14437161, dtype=float32), 'loss_cross_entropy': Array(0.13546601, dtype=float32)}
{'loss_inverse': Array(9.9186786e-05, dtype=float32)}


  7%|▋         | 74690/1000000 [3:32:03<26:05:38,  9.85it/s]

{'loss': Array(0.14334615, dtype=float32), 'loss_cross_entropy': Array(0.13528125, dtype=float32)}
{'loss_inverse': Array(1.1294593e-06, dtype=float32)}


  7%|▋         | 74700/1000000 [3:32:05<32:04:08,  8.01it/s]

{'loss': Array(0.16222934, dtype=float32), 'loss_cross_entropy': Array(0.15203527, dtype=float32)}
{'loss_inverse': Array(1.4104543e-07, dtype=float32)}


  7%|▋         | 74710/1000000 [3:32:07<27:07:09,  9.48it/s]

{'loss': Array(0.13733105, dtype=float32), 'loss_cross_entropy': Array(0.1292935, dtype=float32)}
{'loss_inverse': Array(0.00012753, dtype=float32)}


  7%|▋         | 74720/1000000 [3:32:08<25:24:43, 10.11it/s]

{'loss': Array(0.1425079, dtype=float32), 'loss_cross_entropy': Array(0.13475871, dtype=float32)}
{'loss_inverse': Array(2.9249575e-06, dtype=float32)}


  7%|▋         | 74730/1000000 [3:32:10<30:06:26,  8.54it/s]

{'loss': Array(0.14026323, dtype=float32), 'loss_cross_entropy': Array(0.13201326, dtype=float32)}
{'loss_inverse': Array(8.91243e-05, dtype=float32)}


  7%|▋         | 74740/1000000 [3:32:11<25:36:04, 10.04it/s]

{'loss': Array(0.14209989, dtype=float32), 'loss_cross_entropy': Array(0.1332963, dtype=float32)}
{'loss_inverse': Array(2.2178507e-05, dtype=float32)}


  7%|▋         | 74750/1000000 [3:32:13<32:01:35,  8.03it/s]

{'loss': Array(0.14010316, dtype=float32), 'loss_cross_entropy': Array(0.13175063, dtype=float32)}
{'loss_inverse': Array(8.386015e-06, dtype=float32)}


  7%|▋         | 74760/1000000 [3:32:14<26:14:15,  9.80it/s]

{'loss': Array(0.1378596, dtype=float32), 'loss_cross_entropy': Array(0.12978984, dtype=float32)}
{'loss_inverse': Array(2.1804959e-05, dtype=float32)}


  7%|▋         | 74770/1000000 [3:32:16<34:59:26,  7.35it/s]

{'loss': Array(0.15344736, dtype=float32), 'loss_cross_entropy': Array(0.14500152, dtype=float32)}
{'loss_inverse': Array(8.409569e-05, dtype=float32)}


  7%|▋         | 74780/1000000 [3:32:18<26:58:25,  9.53it/s]

{'loss': Array(0.1405342, dtype=float32), 'loss_cross_entropy': Array(0.13248733, dtype=float32)}
{'loss_inverse': Array(8.8882933e-07, dtype=float32)}


  7%|▋         | 74790/1000000 [3:32:19<24:52:46, 10.33it/s]

{'loss': Array(0.15232502, dtype=float32), 'loss_cross_entropy': Array(0.14383882, dtype=float32)}
{'loss_inverse': Array(9.796467e-06, dtype=float32)}


  7%|▋         | 74800/1000000 [3:32:21<27:57:56,  9.19it/s]

{'loss': Array(0.14623116, dtype=float32), 'loss_cross_entropy': Array(0.13759778, dtype=float32)}
{'loss_inverse': Array(4.2140775e-05, dtype=float32)}


  7%|▋         | 74810/1000000 [3:32:22<25:11:15, 10.20it/s]

{'loss': Array(0.13918537, dtype=float32), 'loss_cross_entropy': Array(0.13070099, dtype=float32)}
{'loss_inverse': Array(4.4772456e-05, dtype=float32)}


  7%|▋         | 74820/1000000 [3:32:24<33:53:51,  7.58it/s]

{'loss': Array(0.14208029, dtype=float32), 'loss_cross_entropy': Array(0.13446258, dtype=float32)}
{'loss_inverse': Array(0.00024035, dtype=float32)}


  7%|▋         | 74830/1000000 [3:32:25<26:27:07,  9.72it/s]

{'loss': Array(0.14568074, dtype=float32), 'loss_cross_entropy': Array(0.13716625, dtype=float32)}
{'loss_inverse': Array(5.140769e-06, dtype=float32)}


  7%|▋         | 74840/1000000 [3:32:27<25:16:36, 10.17it/s]

{'loss': Array(0.1537177, dtype=float32), 'loss_cross_entropy': Array(0.14498366, dtype=float32)}
{'loss_inverse': Array(3.3594242e-07, dtype=float32)}


  7%|▋         | 74850/1000000 [3:32:28<28:00:34,  9.17it/s]

{'loss': Array(0.14964432, dtype=float32), 'loss_cross_entropy': Array(0.1408963, dtype=float32)}
{'loss_inverse': Array(3.546076e-05, dtype=float32)}


  7%|▋         | 74860/1000000 [3:32:30<26:08:29,  9.83it/s]

{'loss': Array(0.15999547, dtype=float32), 'loss_cross_entropy': Array(0.15039928, dtype=float32)}
{'loss_inverse': Array(1.0426441e-05, dtype=float32)}


  7%|▋         | 74870/1000000 [3:32:32<29:44:37,  8.64it/s]

{'loss': Array(0.13598257, dtype=float32), 'loss_cross_entropy': Array(0.12777346, dtype=float32)}
{'loss_inverse': Array(5.324479e-07, dtype=float32)}


  7%|▋         | 74880/1000000 [3:32:33<26:14:48,  9.79it/s]

{'loss': Array(0.1363328, dtype=float32), 'loss_cross_entropy': Array(0.1282461, dtype=float32)}
{'loss_inverse': Array(4.7982403e-06, dtype=float32)}


  7%|▋         | 74890/1000000 [3:32:35<31:39:55,  8.12it/s]

{'loss': Array(0.14881608, dtype=float32), 'loss_cross_entropy': Array(0.14076708, dtype=float32)}
{'loss_inverse': Array(8.517281e-05, dtype=float32)}


  7%|▋         | 74900/1000000 [3:32:36<26:10:13,  9.82it/s]

{'loss': Array(0.15847111, dtype=float32), 'loss_cross_entropy': Array(0.1489604, dtype=float32)}
{'loss_inverse': Array(2.5134166e-06, dtype=float32)}


  7%|▋         | 74910/1000000 [3:32:38<25:35:11, 10.04it/s]

{'loss': Array(0.17885858, dtype=float32), 'loss_cross_entropy': Array(0.16961448, dtype=float32)}
{'loss_inverse': Array(1.1480675e-05, dtype=float32)}


  7%|▋         | 74920/1000000 [3:32:39<29:19:37,  8.76it/s]

{'loss': Array(0.14376998, dtype=float32), 'loss_cross_entropy': Array(0.13579367, dtype=float32)}
{'loss_inverse': Array(6.7854307e-06, dtype=float32)}


  7%|▋         | 74930/1000000 [3:32:41<26:01:09,  9.88it/s]

{'loss': Array(0.16371581, dtype=float32), 'loss_cross_entropy': Array(0.15402208, dtype=float32)}
{'loss_inverse': Array(1.5249987e-05, dtype=float32)}


  7%|▋         | 74940/1000000 [3:32:43<35:18:54,  7.28it/s]

{'loss': Array(0.17004287, dtype=float32), 'loss_cross_entropy': Array(0.16087751, dtype=float32)}
{'loss_inverse': Array(2.7557292e-06, dtype=float32)}


  7%|▋         | 74950/1000000 [3:32:44<26:33:47,  9.67it/s]

{'loss': Array(0.16256279, dtype=float32), 'loss_cross_entropy': Array(0.15317445, dtype=float32)}
{'loss_inverse': Array(6.983953e-06, dtype=float32)}


  7%|▋         | 74960/1000000 [3:32:46<38:03:03,  6.75it/s]

{'loss': Array(0.13734205, dtype=float32), 'loss_cross_entropy': Array(0.12946406, dtype=float32)}
{'loss_inverse': Array(2.4361352e-06, dtype=float32)}


  7%|▋         | 74970/1000000 [3:32:47<27:20:51,  9.40it/s]

{'loss': Array(0.1402051, dtype=float32), 'loss_cross_entropy': Array(0.13179675, dtype=float32)}
{'loss_inverse': Array(4.4874774e-05, dtype=float32)}


  7%|▋         | 74980/1000000 [3:32:49<38:33:18,  6.66it/s]

{'loss': Array(0.16120249, dtype=float32), 'loss_cross_entropy': Array(0.1519637, dtype=float32)}
{'loss_inverse': Array(9.150567e-07, dtype=float32)}


  7%|▋         | 74990/1000000 [3:32:50<27:10:54,  9.45it/s]

{'loss': Array(0.14896783, dtype=float32), 'loss_cross_entropy': Array(0.1402313, dtype=float32)}
{'loss_inverse': Array(2.9912087e-06, dtype=float32)}


  8%|▊         | 75000/1000000 [3:32:52<25:36:23, 10.03it/s]

{'loss': Array(0.11054157, dtype=float32), 'loss_cross_entropy': Array(0.10346569, dtype=float32)}
{'loss_inverse': Array(4.887723e-06, dtype=float32)}


  8%|▊         | 75010/1000000 [3:33:00<95:16:35,  2.70it/s] 

{'loss': Array(0.14752643, dtype=float32), 'loss_cross_entropy': Array(0.13913743, dtype=float32)}
{'loss_inverse': Array(6.009455e-06, dtype=float32)}


  8%|▊         | 75020/1000000 [3:33:02<37:23:47,  6.87it/s] 

{'loss': Array(0.1451217, dtype=float32), 'loss_cross_entropy': Array(0.13688831, dtype=float32)}
{'loss_inverse': Array(6.080935e-07, dtype=float32)}


  8%|▊         | 75030/1000000 [3:33:03<27:25:53,  9.37it/s]

{'loss': Array(0.1337963, dtype=float32), 'loss_cross_entropy': Array(0.12618911, dtype=float32)}
{'loss_inverse': Array(3.941723e-06, dtype=float32)}


  8%|▊         | 75040/1000000 [3:33:05<28:56:43,  8.88it/s]

{'loss': Array(0.14961003, dtype=float32), 'loss_cross_entropy': Array(0.14063072, dtype=float32)}
{'loss_inverse': Array(4.750892e-07, dtype=float32)}


  8%|▊         | 75050/1000000 [3:33:06<26:14:59,  9.79it/s]

{'loss': Array(0.14994265, dtype=float32), 'loss_cross_entropy': Array(0.14157152, dtype=float32)}
{'loss_inverse': Array(4.0659517e-05, dtype=float32)}


  8%|▊         | 75060/1000000 [3:33:08<30:38:04,  8.39it/s]

{'loss': Array(0.13181491, dtype=float32), 'loss_cross_entropy': Array(0.12345614, dtype=float32)}
{'loss_inverse': Array(1.9565508e-07, dtype=float32)}


  8%|▊         | 75070/1000000 [3:33:09<25:39:21, 10.01it/s]

{'loss': Array(0.13421193, dtype=float32), 'loss_cross_entropy': Array(0.12647794, dtype=float32)}
{'loss_inverse': Array(3.5966052e-06, dtype=float32)}


  8%|▊         | 75080/1000000 [3:33:11<29:28:41,  8.72it/s]

{'loss': Array(0.13239451, dtype=float32), 'loss_cross_entropy': Array(0.12483032, dtype=float32)}
{'loss_inverse': Array(7.5517363e-07, dtype=float32)}


  8%|▊         | 75090/1000000 [3:33:13<26:23:45,  9.73it/s]

{'loss': Array(0.14478932, dtype=float32), 'loss_cross_entropy': Array(0.13639209, dtype=float32)}
{'loss_inverse': Array(3.5563316e-06, dtype=float32)}


  8%|▊         | 75100/1000000 [3:33:14<29:53:29,  8.59it/s]

{'loss': Array(0.1442327, dtype=float32), 'loss_cross_entropy': Array(0.1360099, dtype=float32)}
{'loss_inverse': Array(1.4373853e-06, dtype=float32)}


  8%|▊         | 75110/1000000 [3:33:16<26:04:09,  9.85it/s]

{'loss': Array(0.14505339, dtype=float32), 'loss_cross_entropy': Array(0.13605173, dtype=float32)}
{'loss_inverse': Array(7.303458e-07, dtype=float32)}


  8%|▊         | 75120/1000000 [3:33:18<34:14:18,  7.50it/s]

{'loss': Array(0.1523402, dtype=float32), 'loss_cross_entropy': Array(0.14312379, dtype=float32)}
{'loss_inverse': Array(3.0627973e-06, dtype=float32)}


  8%|▊         | 75130/1000000 [3:33:19<26:30:42,  9.69it/s]

{'loss': Array(0.14190048, dtype=float32), 'loss_cross_entropy': Array(0.13370842, dtype=float32)}
{'loss_inverse': Array(3.2437475e-07, dtype=float32)}


  8%|▊         | 75140/1000000 [3:33:21<38:40:40,  6.64it/s]

{'loss': Array(0.1590659, dtype=float32), 'loss_cross_entropy': Array(0.15021335, dtype=float32)}
{'loss_inverse': Array(8.2458754e-08, dtype=float32)}


  8%|▊         | 75150/1000000 [3:33:22<27:14:05,  9.43it/s]

{'loss': Array(0.13175906, dtype=float32), 'loss_cross_entropy': Array(0.12400985, dtype=float32)}
{'loss_inverse': Array(1.1847308e-06, dtype=float32)}


  8%|▊         | 75160/1000000 [3:33:24<34:23:20,  7.47it/s]

{'loss': Array(0.15553908, dtype=float32), 'loss_cross_entropy': Array(0.14656956, dtype=float32)}
{'loss_inverse': Array(2.7651884e-06, dtype=float32)}


  8%|▊         | 75170/1000000 [3:33:25<27:12:16,  9.44it/s]

{'loss': Array(0.13891168, dtype=float32), 'loss_cross_entropy': Array(0.13031988, dtype=float32)}
{'loss_inverse': Array(9.648261e-07, dtype=float32)}


  8%|▊         | 75180/1000000 [3:33:27<25:08:58, 10.21it/s]

{'loss': Array(0.13617675, dtype=float32), 'loss_cross_entropy': Array(0.1278531, dtype=float32)}
{'loss_inverse': Array(1.7098033e-06, dtype=float32)}


  8%|▊         | 75190/1000000 [3:33:29<30:05:51,  8.54it/s]

{'loss': Array(0.13266607, dtype=float32), 'loss_cross_entropy': Array(0.12454025, dtype=float32)}
{'loss_inverse': Array(8.241192e-08, dtype=float32)}


  8%|▊         | 75200/1000000 [3:33:30<26:11:35,  9.81it/s]

{'loss': Array(0.15537626, dtype=float32), 'loss_cross_entropy': Array(0.14689672, dtype=float32)}
{'loss_inverse': Array(4.810561e-07, dtype=float32)}


  8%|▊         | 75210/1000000 [3:33:32<31:23:45,  8.18it/s]

{'loss': Array(0.13877176, dtype=float32), 'loss_cross_entropy': Array(0.13047731, dtype=float32)}
{'loss_inverse': Array(1.03852926e-07, dtype=float32)}


  8%|▊         | 75220/1000000 [3:33:33<26:25:32,  9.72it/s]

{'loss': Array(0.15137565, dtype=float32), 'loss_cross_entropy': Array(0.14306395, dtype=float32)}
{'loss_inverse': Array(1.093326e-06, dtype=float32)}


  8%|▊         | 75230/1000000 [3:33:35<34:28:42,  7.45it/s]

{'loss': Array(0.15859005, dtype=float32), 'loss_cross_entropy': Array(0.14920442, dtype=float32)}
{'loss_inverse': Array(1.3105132e-07, dtype=float32)}


  8%|▊         | 75240/1000000 [3:33:36<26:56:24,  9.54it/s]

{'loss': Array(0.14134295, dtype=float32), 'loss_cross_entropy': Array(0.1336037, dtype=float32)}
{'loss_inverse': Array(4.3821374e-06, dtype=float32)}


  8%|▊         | 75250/1000000 [3:33:38<34:54:20,  7.36it/s]

{'loss': Array(0.14694022, dtype=float32), 'loss_cross_entropy': Array(0.13825904, dtype=float32)}
{'loss_inverse': Array(6.6076615e-07, dtype=float32)}


  8%|▊         | 75260/1000000 [3:33:40<26:48:34,  9.58it/s]

{'loss': Array(0.15943556, dtype=float32), 'loss_cross_entropy': Array(0.15061139, dtype=float32)}
{'loss_inverse': Array(6.5919203e-06, dtype=float32)}


  8%|▊         | 75270/1000000 [3:33:41<25:09:06, 10.21it/s]

{'loss': Array(0.14387351, dtype=float32), 'loss_cross_entropy': Array(0.13560092, dtype=float32)}
{'loss_inverse': Array(1.4185906e-07, dtype=float32)}


  8%|▊         | 75280/1000000 [3:33:43<31:16:32,  8.21it/s]

{'loss': Array(0.1447308, dtype=float32), 'loss_cross_entropy': Array(0.13653664, dtype=float32)}
{'loss_inverse': Array(1.0985511e-06, dtype=float32)}


  8%|▊         | 75290/1000000 [3:33:44<26:21:22,  9.75it/s]

{'loss': Array(0.14383356, dtype=float32), 'loss_cross_entropy': Array(0.13571475, dtype=float32)}
{'loss_inverse': Array(2.4680548e-07, dtype=float32)}


  8%|▊         | 75300/1000000 [3:33:46<38:07:09,  6.74it/s]

{'loss': Array(0.14965144, dtype=float32), 'loss_cross_entropy': Array(0.14144622, dtype=float32)}
{'loss_inverse': Array(6.7975645e-07, dtype=float32)}


  8%|▊         | 75310/1000000 [3:33:47<27:17:29,  9.41it/s]

{'loss': Array(0.1483362, dtype=float32), 'loss_cross_entropy': Array(0.14000171, dtype=float32)}
{'loss_inverse': Array(1.07957e-07, dtype=float32)}


  8%|▊         | 75320/1000000 [3:33:49<26:29:00,  9.70it/s]

{'loss': Array(0.16953997, dtype=float32), 'loss_cross_entropy': Array(0.16035818, dtype=float32)}
{'loss_inverse': Array(8.1549366e-08, dtype=float32)}


  8%|▊         | 75330/1000000 [3:33:50<28:55:25,  8.88it/s]

{'loss': Array(0.14162035, dtype=float32), 'loss_cross_entropy': Array(0.13353078, dtype=float32)}
{'loss_inverse': Array(1.2583432e-07, dtype=float32)}


  8%|▊         | 75340/1000000 [3:33:52<26:10:15,  9.81it/s]

{'loss': Array(0.13792254, dtype=float32), 'loss_cross_entropy': Array(0.12999438, dtype=float32)}
{'loss_inverse': Array(1.4776519e-07, dtype=float32)}


  8%|▊         | 75350/1000000 [3:33:54<28:12:56,  9.10it/s]

{'loss': Array(0.15281755, dtype=float32), 'loss_cross_entropy': Array(0.144495, dtype=float32)}
{'loss_inverse': Array(5.496106e-06, dtype=float32)}


  8%|▊         | 75360/1000000 [3:33:55<25:29:59, 10.07it/s]

{'loss': Array(0.14245969, dtype=float32), 'loss_cross_entropy': Array(0.13415693, dtype=float32)}
{'loss_inverse': Array(7.6884184e-08, dtype=float32)}


  8%|▊         | 75370/1000000 [3:33:57<32:13:34,  7.97it/s]

{'loss': Array(0.14086969, dtype=float32), 'loss_cross_entropy': Array(0.13225004, dtype=float32)}
{'loss_inverse': Array(1.9487338e-07, dtype=float32)}


  8%|▊         | 75380/1000000 [3:33:58<26:19:51,  9.75it/s]

{'loss': Array(0.15347135, dtype=float32), 'loss_cross_entropy': Array(0.14454977, dtype=float32)}
{'loss_inverse': Array(2.938997e-06, dtype=float32)}


  8%|▊         | 75390/1000000 [3:34:00<31:30:30,  8.15it/s]

{'loss': Array(0.15100366, dtype=float32), 'loss_cross_entropy': Array(0.14230882, dtype=float32)}
{'loss_inverse': Array(8.478851e-07, dtype=float32)}


  8%|▊         | 75400/1000000 [3:34:01<26:18:34,  9.76it/s]

{'loss': Array(0.15041693, dtype=float32), 'loss_cross_entropy': Array(0.14168608, dtype=float32)}
{'loss_inverse': Array(1.1121729e-07, dtype=float32)}


  8%|▊         | 75410/1000000 [3:34:03<38:16:58,  6.71it/s]

{'loss': Array(0.13899733, dtype=float32), 'loss_cross_entropy': Array(0.1309259, dtype=float32)}
{'loss_inverse': Array(1.4305947e-06, dtype=float32)}


  8%|▊         | 75420/1000000 [3:34:05<26:52:54,  9.55it/s]

{'loss': Array(0.13123077, dtype=float32), 'loss_cross_entropy': Array(0.12359202, dtype=float32)}
{'loss_inverse': Array(1.1409889e-07, dtype=float32)}


  8%|▊         | 75430/1000000 [3:34:06<25:21:15, 10.13it/s]

{'loss': Array(0.15494314, dtype=float32), 'loss_cross_entropy': Array(0.14673255, dtype=float32)}
{'loss_inverse': Array(3.754085e-07, dtype=float32)}


  8%|▊         | 75440/1000000 [3:34:08<29:05:14,  8.83it/s]

{'loss': Array(0.14401643, dtype=float32), 'loss_cross_entropy': Array(0.13584594, dtype=float32)}
{'loss_inverse': Array(1.8680002e-08, dtype=float32)}


  8%|▊         | 75450/1000000 [3:34:09<25:34:49, 10.04it/s]

{'loss': Array(0.14496358, dtype=float32), 'loss_cross_entropy': Array(0.13643077, dtype=float32)}
{'loss_inverse': Array(2.0990619e-07, dtype=float32)}


  8%|▊         | 75460/1000000 [3:34:11<31:23:43,  8.18it/s]

{'loss': Array(0.14918901, dtype=float32), 'loss_cross_entropy': Array(0.1404313, dtype=float32)}
{'loss_inverse': Array(5.0582963e-07, dtype=float32)}


  8%|▊         | 75470/1000000 [3:34:12<26:18:36,  9.76it/s]

{'loss': Array(0.15786085, dtype=float32), 'loss_cross_entropy': Array(0.14900063, dtype=float32)}
{'loss_inverse': Array(1.2240359e-07, dtype=float32)}


  8%|▊         | 75480/1000000 [3:34:14<26:17:34,  9.77it/s]

{'loss': Array(0.1522971, dtype=float32), 'loss_cross_entropy': Array(0.14393122, dtype=float32)}
{'loss_inverse': Array(4.956868e-07, dtype=float32)}


  8%|▊         | 75490/1000000 [3:34:16<29:16:59,  8.77it/s]

{'loss': Array(0.13514704, dtype=float32), 'loss_cross_entropy': Array(0.1269631, dtype=float32)}
{'loss_inverse': Array(4.602493e-08, dtype=float32)}


  8%|▊         | 75500/1000000 [3:34:17<25:30:42, 10.07it/s]

{'loss': Array(0.13564871, dtype=float32), 'loss_cross_entropy': Array(0.12762883, dtype=float32)}
{'loss_inverse': Array(3.8165656e-08, dtype=float32)}


  8%|▊         | 75510/1000000 [3:34:25<93:48:15,  2.74it/s] 

{'loss': Array(0.1551837, dtype=float32), 'loss_cross_entropy': Array(0.14586091, dtype=float32)}
{'loss_inverse': Array(3.406106e-05, dtype=float32)}


  8%|▊         | 75520/1000000 [3:34:27<37:37:19,  6.83it/s] 

{'loss': Array(0.15750562, dtype=float32), 'loss_cross_entropy': Array(0.14829086, dtype=float32)}
{'loss_inverse': Array(1.5318712e-05, dtype=float32)}


  8%|▊         | 75530/1000000 [3:34:28<39:26:04,  6.51it/s]

{'loss': Array(0.13341253, dtype=float32), 'loss_cross_entropy': Array(0.12557243, dtype=float32)}
{'loss_inverse': Array(4.2087722e-05, dtype=float32)}


  8%|▊         | 75540/1000000 [3:34:30<27:54:18,  9.20it/s]

{'loss': Array(0.15482377, dtype=float32), 'loss_cross_entropy': Array(0.14583406, dtype=float32)}
{'loss_inverse': Array(5.922599e-06, dtype=float32)}


  8%|▊         | 75550/1000000 [3:34:31<25:41:02, 10.00it/s]

{'loss': Array(0.12158155, dtype=float32), 'loss_cross_entropy': Array(0.11364185, dtype=float32)}
{'loss_inverse': Array(2.1672518e-06, dtype=float32)}


  8%|▊         | 75560/1000000 [3:34:33<28:30:01,  9.01it/s]

{'loss': Array(0.13941391, dtype=float32), 'loss_cross_entropy': Array(0.13140173, dtype=float32)}
{'loss_inverse': Array(4.1023554e-06, dtype=float32)}


  8%|▊         | 75570/1000000 [3:34:35<37:56:59,  6.77it/s]

{'loss': Array(0.15040213, dtype=float32), 'loss_cross_entropy': Array(0.14173543, dtype=float32)}
{'loss_inverse': Array(0.00019352, dtype=float32)}


  8%|▊         | 75580/1000000 [3:34:36<26:57:11,  9.53it/s]

{'loss': Array(0.15292326, dtype=float32), 'loss_cross_entropy': Array(0.14402157, dtype=float32)}
{'loss_inverse': Array(6.3195957e-06, dtype=float32)}


  8%|▊         | 75590/1000000 [3:34:37<25:24:13, 10.11it/s]

{'loss': Array(0.13068263, dtype=float32), 'loss_cross_entropy': Array(0.12321818, dtype=float32)}
{'loss_inverse': Array(4.8339392e-05, dtype=float32)}


  8%|▊         | 75600/1000000 [3:34:39<28:26:18,  9.03it/s]

{'loss': Array(0.14924066, dtype=float32), 'loss_cross_entropy': Array(0.14057633, dtype=float32)}
{'loss_inverse': Array(1.1428746e-06, dtype=float32)}


  8%|▊         | 75610/1000000 [3:34:41<26:03:36,  9.85it/s]

{'loss': Array(0.13322966, dtype=float32), 'loss_cross_entropy': Array(0.1254103, dtype=float32)}
{'loss_inverse': Array(4.1750063e-05, dtype=float32)}


  8%|▊         | 75620/1000000 [3:34:42<29:49:29,  8.61it/s]

{'loss': Array(0.13254602, dtype=float32), 'loss_cross_entropy': Array(0.12475494, dtype=float32)}
{'loss_inverse': Array(6.3131294e-05, dtype=float32)}


  8%|▊         | 75630/1000000 [3:34:44<26:32:02,  9.68it/s]

{'loss': Array(0.14881766, dtype=float32), 'loss_cross_entropy': Array(0.13990127, dtype=float32)}
{'loss_inverse': Array(0.00013497, dtype=float32)}


  8%|▊         | 75640/1000000 [3:34:46<32:14:16,  7.96it/s]

{'loss': Array(0.1569631, dtype=float32), 'loss_cross_entropy': Array(0.14857009, dtype=float32)}
{'loss_inverse': Array(4.7525145e-06, dtype=float32)}


  8%|▊         | 75650/1000000 [3:34:47<26:43:14,  9.61it/s]

{'loss': Array(0.14681144, dtype=float32), 'loss_cross_entropy': Array(0.13876164, dtype=float32)}
{'loss_inverse': Array(3.0361798e-05, dtype=float32)}


  8%|▊         | 75660/1000000 [3:34:49<25:15:46, 10.16it/s]

{'loss': Array(0.1574134, dtype=float32), 'loss_cross_entropy': Array(0.14866158, dtype=float32)}
{'loss_inverse': Array(0.00014672, dtype=float32)}


  8%|▊         | 75670/1000000 [3:34:50<30:03:39,  8.54it/s]

{'loss': Array(0.14556746, dtype=float32), 'loss_cross_entropy': Array(0.13675286, dtype=float32)}
{'loss_inverse': Array(2.0059013e-06, dtype=float32)}


  8%|▊         | 75680/1000000 [3:34:52<26:34:54,  9.66it/s]

{'loss': Array(0.16052862, dtype=float32), 'loss_cross_entropy': Array(0.1519829, dtype=float32)}
{'loss_inverse': Array(7.970499e-06, dtype=float32)}


  8%|▊         | 75690/1000000 [3:34:54<31:21:00,  8.19it/s]

{'loss': Array(0.1303638, dtype=float32), 'loss_cross_entropy': Array(0.12251379, dtype=float32)}
{'loss_inverse': Array(8.079838e-06, dtype=float32)}


  8%|▊         | 75700/1000000 [3:34:55<25:53:53,  9.91it/s]

{'loss': Array(0.15126322, dtype=float32), 'loss_cross_entropy': Array(0.14290398, dtype=float32)}
{'loss_inverse': Array(4.954408e-07, dtype=float32)}


  8%|▊         | 75710/1000000 [3:34:57<34:04:16,  7.54it/s]

{'loss': Array(0.14731394, dtype=float32), 'loss_cross_entropy': Array(0.13841473, dtype=float32)}
{'loss_inverse': Array(7.033377e-06, dtype=float32)}


  8%|▊         | 75720/1000000 [3:34:58<26:07:31,  9.83it/s]

{'loss': Array(0.1433638, dtype=float32), 'loss_cross_entropy': Array(0.13527977, dtype=float32)}
{'loss_inverse': Array(1.9931378e-05, dtype=float32)}


  8%|▊         | 75730/1000000 [3:34:59<25:26:06, 10.09it/s]

{'loss': Array(0.1517751, dtype=float32), 'loss_cross_entropy': Array(0.14251764, dtype=float32)}
{'loss_inverse': Array(1.5987763e-07, dtype=float32)}


  8%|▊         | 75740/1000000 [3:35:01<27:50:31,  9.22it/s]

{'loss': Array(0.13891657, dtype=float32), 'loss_cross_entropy': Array(0.13012333, dtype=float32)}
{'loss_inverse': Array(4.500223e-06, dtype=float32)}


  8%|▊         | 75750/1000000 [3:35:03<25:54:26,  9.91it/s]

{'loss': Array(0.12969778, dtype=float32), 'loss_cross_entropy': Array(0.12178292, dtype=float32)}
{'loss_inverse': Array(7.697135e-05, dtype=float32)}


  8%|▊         | 75760/1000000 [3:35:04<33:40:50,  7.62it/s]

{'loss': Array(0.14420538, dtype=float32), 'loss_cross_entropy': Array(0.13601287, dtype=float32)}
{'loss_inverse': Array(2.582326e-05, dtype=float32)}


  8%|▊         | 75770/1000000 [3:35:06<26:35:56,  9.65it/s]

{'loss': Array(0.15191634, dtype=float32), 'loss_cross_entropy': Array(0.14324787, dtype=float32)}
{'loss_inverse': Array(9.426005e-08, dtype=float32)}


  8%|▊         | 75780/1000000 [3:35:07<26:15:02,  9.78it/s]

{'loss': Array(0.1429417, dtype=float32), 'loss_cross_entropy': Array(0.13451631, dtype=float32)}
{'loss_inverse': Array(6.0798917e-05, dtype=float32)}


  8%|▊         | 75790/1000000 [3:35:09<29:04:21,  8.83it/s]

{'loss': Array(0.14236169, dtype=float32), 'loss_cross_entropy': Array(0.13446392, dtype=float32)}
{'loss_inverse': Array(3.493677e-06, dtype=float32)}


  8%|▊         | 75800/1000000 [3:35:10<25:24:45, 10.10it/s]

{'loss': Array(0.14532135, dtype=float32), 'loss_cross_entropy': Array(0.13643676, dtype=float32)}
{'loss_inverse': Array(3.4930417e-06, dtype=float32)}


  8%|▊         | 75810/1000000 [3:35:12<29:10:59,  8.80it/s]

{'loss': Array(0.13604324, dtype=float32), 'loss_cross_entropy': Array(0.12817752, dtype=float32)}
{'loss_inverse': Array(2.0222342e-06, dtype=float32)}


  8%|▊         | 75820/1000000 [3:35:14<25:39:26, 10.01it/s]

{'loss': Array(0.13888328, dtype=float32), 'loss_cross_entropy': Array(0.1309646, dtype=float32)}
{'loss_inverse': Array(2.8613065e-06, dtype=float32)}


  8%|▊         | 75830/1000000 [3:35:15<31:30:30,  8.15it/s]

{'loss': Array(0.17235623, dtype=float32), 'loss_cross_entropy': Array(0.16264941, dtype=float32)}
{'loss_inverse': Array(1.04921014e-07, dtype=float32)}


  8%|▊         | 75840/1000000 [3:35:17<26:16:35,  9.77it/s]

{'loss': Array(0.12818764, dtype=float32), 'loss_cross_entropy': Array(0.12036073, dtype=float32)}
{'loss_inverse': Array(5.8358164e-06, dtype=float32)}


  8%|▊         | 75850/1000000 [3:35:18<25:41:29,  9.99it/s]

{'loss': Array(0.14474143, dtype=float32), 'loss_cross_entropy': Array(0.1359439, dtype=float32)}
{'loss_inverse': Array(5.3648773e-07, dtype=float32)}


  8%|▊         | 75860/1000000 [3:35:20<29:49:05,  8.61it/s]

{'loss': Array(0.1666352, dtype=float32), 'loss_cross_entropy': Array(0.15703987, dtype=float32)}
{'loss_inverse': Array(7.289268e-05, dtype=float32)}


  8%|▊         | 75870/1000000 [3:35:21<25:32:52, 10.05it/s]

{'loss': Array(0.1491768, dtype=float32), 'loss_cross_entropy': Array(0.1402657, dtype=float32)}
{'loss_inverse': Array(1.2119124e-05, dtype=float32)}


  8%|▊         | 75880/1000000 [3:35:23<33:45:47,  7.60it/s]

{'loss': Array(0.1354265, dtype=float32), 'loss_cross_entropy': Array(0.12797375, dtype=float32)}
{'loss_inverse': Array(2.9524681e-05, dtype=float32)}


  8%|▊         | 75890/1000000 [3:35:25<26:20:02,  9.75it/s]

{'loss': Array(0.13587283, dtype=float32), 'loss_cross_entropy': Array(0.12794517, dtype=float32)}
{'loss_inverse': Array(1.0140077e-06, dtype=float32)}


  8%|▊         | 75900/1000000 [3:35:26<37:57:26,  6.76it/s]

{'loss': Array(0.15829046, dtype=float32), 'loss_cross_entropy': Array(0.14863393, dtype=float32)}
{'loss_inverse': Array(1.5999232e-05, dtype=float32)}


  8%|▊         | 75910/1000000 [3:35:28<27:08:46,  9.46it/s]

{'loss': Array(0.1339151, dtype=float32), 'loss_cross_entropy': Array(0.1260001, dtype=float32)}
{'loss_inverse': Array(3.4977754e-07, dtype=float32)}


  8%|▊         | 75920/1000000 [3:35:29<38:38:39,  6.64it/s]

{'loss': Array(0.1483116, dtype=float32), 'loss_cross_entropy': Array(0.13947402, dtype=float32)}
{'loss_inverse': Array(5.061516e-05, dtype=float32)}


  8%|▊         | 75930/1000000 [3:35:31<27:46:37,  9.24it/s]

{'loss': Array(0.15199575, dtype=float32), 'loss_cross_entropy': Array(0.14346395, dtype=float32)}
{'loss_inverse': Array(4.709031e-05, dtype=float32)}


  8%|▊         | 75940/1000000 [3:35:32<25:55:57,  9.90it/s]

{'loss': Array(0.14855781, dtype=float32), 'loss_cross_entropy': Array(0.14010744, dtype=float32)}
{'loss_inverse': Array(0.00022966, dtype=float32)}


  8%|▊         | 75950/1000000 [3:35:34<31:39:55,  8.11it/s]

{'loss': Array(0.14796387, dtype=float32), 'loss_cross_entropy': Array(0.13947761, dtype=float32)}
{'loss_inverse': Array(3.1635512e-08, dtype=float32)}


  8%|▊         | 75960/1000000 [3:35:36<26:15:10,  9.78it/s]

{'loss': Array(0.13251255, dtype=float32), 'loss_cross_entropy': Array(0.12486609, dtype=float32)}
{'loss_inverse': Array(0.00016375, dtype=float32)}


  8%|▊         | 75970/1000000 [3:35:37<37:38:05,  6.82it/s]

{'loss': Array(0.14339364, dtype=float32), 'loss_cross_entropy': Array(0.13503984, dtype=float32)}
{'loss_inverse': Array(9.7839125e-05, dtype=float32)}


  8%|▊         | 75980/1000000 [3:35:39<27:39:25,  9.28it/s]

{'loss': Array(0.13087343, dtype=float32), 'loss_cross_entropy': Array(0.12310085, dtype=float32)}
{'loss_inverse': Array(0.0001213, dtype=float32)}


  8%|▊         | 75990/1000000 [3:35:40<37:59:10,  6.76it/s]

{'loss': Array(0.14434612, dtype=float32), 'loss_cross_entropy': Array(0.13575172, dtype=float32)}
{'loss_inverse': Array(0.00026822, dtype=float32)}


  8%|▊         | 76000/1000000 [3:35:42<27:38:17,  9.29it/s]

{'loss': Array(0.14831366, dtype=float32), 'loss_cross_entropy': Array(0.1400659, dtype=float32)}
{'loss_inverse': Array(0.00026964, dtype=float32)}


  8%|▊         | 76010/1000000 [3:35:50<85:59:03,  2.99it/s] 

{'loss': Array(0.1273021, dtype=float32), 'loss_cross_entropy': Array(0.12072398, dtype=float32)}
{'loss_inverse': Array(0.00037373, dtype=float32)}


  8%|▊         | 76020/1000000 [3:35:52<44:22:04,  5.78it/s]

{'loss': Array(0.16291416, dtype=float32), 'loss_cross_entropy': Array(0.15362087, dtype=float32)}
{'loss_inverse': Array(0.00050388, dtype=float32)}


  8%|▊         | 76030/1000000 [3:35:53<28:20:11,  9.06it/s]

{'loss': Array(0.16119559, dtype=float32), 'loss_cross_entropy': Array(0.15243582, dtype=float32)}
{'loss_inverse': Array(1.4776978e-05, dtype=float32)}


  8%|▊         | 76040/1000000 [3:35:54<26:08:11,  9.82it/s]

{'loss': Array(0.15210243, dtype=float32), 'loss_cross_entropy': Array(0.1431881, dtype=float32)}
{'loss_inverse': Array(8.724079e-05, dtype=float32)}


  8%|▊         | 76050/1000000 [3:35:56<28:12:50,  9.10it/s]

{'loss': Array(0.14429705, dtype=float32), 'loss_cross_entropy': Array(0.13563709, dtype=float32)}
{'loss_inverse': Array(6.616559e-06, dtype=float32)}


  8%|▊         | 76060/1000000 [3:35:58<26:02:01,  9.86it/s]

{'loss': Array(0.14508992, dtype=float32), 'loss_cross_entropy': Array(0.13696514, dtype=float32)}
{'loss_inverse': Array(0.00056028, dtype=float32)}


  8%|▊         | 76070/1000000 [3:35:59<29:05:23,  8.82it/s]

{'loss': Array(0.11372975, dtype=float32), 'loss_cross_entropy': Array(0.10659181, dtype=float32)}
{'loss_inverse': Array(0.00018084, dtype=float32)}


  8%|▊         | 76080/1000000 [3:36:01<25:34:54, 10.03it/s]

{'loss': Array(0.14211981, dtype=float32), 'loss_cross_entropy': Array(0.13403791, dtype=float32)}
{'loss_inverse': Array(3.079307e-05, dtype=float32)}


  8%|▊         | 76090/1000000 [3:36:02<31:26:26,  8.16it/s]

{'loss': Array(0.1472808, dtype=float32), 'loss_cross_entropy': Array(0.1384912, dtype=float32)}
{'loss_inverse': Array(0.00035958, dtype=float32)}


  8%|▊         | 76100/1000000 [3:36:04<26:38:06,  9.64it/s]

{'loss': Array(0.15803996, dtype=float32), 'loss_cross_entropy': Array(0.14941385, dtype=float32)}
{'loss_inverse': Array(1.3984142e-05, dtype=float32)}


  8%|▊         | 76110/1000000 [3:36:05<25:08:30, 10.21it/s]

{'loss': Array(0.14427018, dtype=float32), 'loss_cross_entropy': Array(0.13544504, dtype=float32)}
{'loss_inverse': Array(5.064186e-05, dtype=float32)}


  8%|▊         | 76120/1000000 [3:36:07<28:55:10,  8.87it/s]

{'loss': Array(0.13217105, dtype=float32), 'loss_cross_entropy': Array(0.12422921, dtype=float32)}
{'loss_inverse': Array(0.00013846, dtype=float32)}


  8%|▊         | 76130/1000000 [3:36:09<26:01:00,  9.86it/s]

{'loss': Array(0.13996284, dtype=float32), 'loss_cross_entropy': Array(0.13166305, dtype=float32)}
{'loss_inverse': Array(0.00036557, dtype=float32)}


  8%|▊         | 76140/1000000 [3:36:10<30:05:11,  8.53it/s]

{'loss': Array(0.15363641, dtype=float32), 'loss_cross_entropy': Array(0.14493184, dtype=float32)}
{'loss_inverse': Array(6.16586e-05, dtype=float32)}


  8%|▊         | 76150/1000000 [3:36:12<26:35:23,  9.65it/s]

{'loss': Array(0.12827048, dtype=float32), 'loss_cross_entropy': Array(0.12046542, dtype=float32)}
{'loss_inverse': Array(0.00013243, dtype=float32)}


  8%|▊         | 76160/1000000 [3:36:14<31:38:19,  8.11it/s]

{'loss': Array(0.13085143, dtype=float32), 'loss_cross_entropy': Array(0.12349009, dtype=float32)}
{'loss_inverse': Array(0.00025543, dtype=float32)}


  8%|▊         | 76170/1000000 [3:36:15<26:02:29,  9.85it/s]

{'loss': Array(0.1426435, dtype=float32), 'loss_cross_entropy': Array(0.13459975, dtype=float32)}
{'loss_inverse': Array(0.00033265, dtype=float32)}


  8%|▊         | 76180/1000000 [3:36:16<25:24:20, 10.10it/s]

{'loss': Array(0.15820347, dtype=float32), 'loss_cross_entropy': Array(0.14951469, dtype=float32)}
{'loss_inverse': Array(0.00014151, dtype=float32)}


  8%|▊         | 76190/1000000 [3:36:18<29:36:07,  8.67it/s]

{'loss': Array(0.14184324, dtype=float32), 'loss_cross_entropy': Array(0.13345312, dtype=float32)}
{'loss_inverse': Array(1.7475253e-05, dtype=float32)}


  8%|▊         | 76200/1000000 [3:36:20<25:59:26,  9.87it/s]

{'loss': Array(0.15071909, dtype=float32), 'loss_cross_entropy': Array(0.14226584, dtype=float32)}
{'loss_inverse': Array(0.00024513, dtype=float32)}


  8%|▊         | 76210/1000000 [3:36:21<31:19:34,  8.19it/s]

{'loss': Array(0.15409029, dtype=float32), 'loss_cross_entropy': Array(0.14545266, dtype=float32)}
{'loss_inverse': Array(1.9312507e-05, dtype=float32)}


  8%|▊         | 76220/1000000 [3:36:23<27:30:45,  9.33it/s]

{'loss': Array(0.14172353, dtype=float32), 'loss_cross_entropy': Array(0.13295892, dtype=float32)}
{'loss_inverse': Array(0.00021601, dtype=float32)}


  8%|▊         | 76230/1000000 [3:36:24<34:27:10,  7.45it/s]

{'loss': Array(0.1475225, dtype=float32), 'loss_cross_entropy': Array(0.13931121, dtype=float32)}
{'loss_inverse': Array(7.8847166e-05, dtype=float32)}


  8%|▊         | 76240/1000000 [3:36:26<26:50:05,  9.56it/s]

{'loss': Array(0.16024998, dtype=float32), 'loss_cross_entropy': Array(0.15118818, dtype=float32)}
{'loss_inverse': Array(2.955475e-05, dtype=float32)}


  8%|▊         | 76250/1000000 [3:36:27<25:29:35, 10.07it/s]

{'loss': Array(0.15231901, dtype=float32), 'loss_cross_entropy': Array(0.1435229, dtype=float32)}
{'loss_inverse': Array(0.00058456, dtype=float32)}


  8%|▊         | 76260/1000000 [3:36:29<28:08:59,  9.12it/s]

{'loss': Array(0.12195455, dtype=float32), 'loss_cross_entropy': Array(0.1144351, dtype=float32)}
{'loss_inverse': Array(0.00012518, dtype=float32)}


  8%|▊         | 76270/1000000 [3:36:31<25:54:28,  9.90it/s]

{'loss': Array(0.15589103, dtype=float32), 'loss_cross_entropy': Array(0.14671099, dtype=float32)}
{'loss_inverse': Array(5.0349983e-05, dtype=float32)}


  8%|▊         | 76280/1000000 [3:36:32<33:43:41,  7.61it/s]

{'loss': Array(0.13192998, dtype=float32), 'loss_cross_entropy': Array(0.12377752, dtype=float32)}
{'loss_inverse': Array(3.675869e-05, dtype=float32)}


  8%|▊         | 76290/1000000 [3:36:34<27:00:25,  9.50it/s]

{'loss': Array(0.14596073, dtype=float32), 'loss_cross_entropy': Array(0.13703935, dtype=float32)}
{'loss_inverse': Array(0.00034096, dtype=float32)}


  8%|▊         | 76300/1000000 [3:36:35<25:25:35, 10.09it/s]

{'loss': Array(0.14572786, dtype=float32), 'loss_cross_entropy': Array(0.13726874, dtype=float32)}
{'loss_inverse': Array(9.381438e-05, dtype=float32)}


  8%|▊         | 76310/1000000 [3:36:37<28:11:33,  9.10it/s]

{'loss': Array(0.1560177, dtype=float32), 'loss_cross_entropy': Array(0.14743736, dtype=float32)}
{'loss_inverse': Array(8.828173e-05, dtype=float32)}


  8%|▊         | 76320/1000000 [3:36:38<25:38:50, 10.00it/s]

{'loss': Array(0.14393674, dtype=float32), 'loss_cross_entropy': Array(0.13523093, dtype=float32)}
{'loss_inverse': Array(0.00019615, dtype=float32)}


  8%|▊         | 76330/1000000 [3:36:40<29:17:15,  8.76it/s]

{'loss': Array(0.12850718, dtype=float32), 'loss_cross_entropy': Array(0.12120023, dtype=float32)}
{'loss_inverse': Array(0.00010579, dtype=float32)}


  8%|▊         | 76340/1000000 [3:36:41<26:03:29,  9.85it/s]

{'loss': Array(0.1362868, dtype=float32), 'loss_cross_entropy': Array(0.12874636, dtype=float32)}
{'loss_inverse': Array(2.9014593e-05, dtype=float32)}


  8%|▊         | 76350/1000000 [3:36:43<31:41:49,  8.09it/s]

{'loss': Array(0.14710575, dtype=float32), 'loss_cross_entropy': Array(0.1387054, dtype=float32)}
{'loss_inverse': Array(0.00014102, dtype=float32)}


  8%|▊         | 76360/1000000 [3:36:45<26:42:21,  9.61it/s]

{'loss': Array(0.13117753, dtype=float32), 'loss_cross_entropy': Array(0.12362035, dtype=float32)}
{'loss_inverse': Array(0.00019026, dtype=float32)}


  8%|▊         | 76370/1000000 [3:36:46<25:12:51, 10.18it/s]

{'loss': Array(0.15202649, dtype=float32), 'loss_cross_entropy': Array(0.14282304, dtype=float32)}
{'loss_inverse': Array(0.00012341, dtype=float32)}


  8%|▊         | 76380/1000000 [3:36:48<29:12:41,  8.78it/s]

{'loss': Array(0.15346849, dtype=float32), 'loss_cross_entropy': Array(0.14481142, dtype=float32)}
{'loss_inverse': Array(1.5528594e-05, dtype=float32)}


  8%|▊         | 76390/1000000 [3:36:49<25:48:16,  9.94it/s]

{'loss': Array(0.13653624, dtype=float32), 'loss_cross_entropy': Array(0.12846492, dtype=float32)}
{'loss_inverse': Array(1.6438746e-06, dtype=float32)}


  8%|▊         | 76400/1000000 [3:36:51<34:13:11,  7.50it/s]

{'loss': Array(0.1307187, dtype=float32), 'loss_cross_entropy': Array(0.12266971, dtype=float32)}
{'loss_inverse': Array(3.406746e-06, dtype=float32)}


  8%|▊         | 76410/1000000 [3:36:52<26:28:11,  9.69it/s]

{'loss': Array(0.1453596, dtype=float32), 'loss_cross_entropy': Array(0.13737784, dtype=float32)}
{'loss_inverse': Array(1.4014611e-06, dtype=float32)}


  8%|▊         | 76420/1000000 [3:36:54<38:16:30,  6.70it/s]

{'loss': Array(0.15847127, dtype=float32), 'loss_cross_entropy': Array(0.1496447, dtype=float32)}
{'loss_inverse': Array(0.0001547, dtype=float32)}


  8%|▊         | 76430/1000000 [3:36:56<27:21:05,  9.38it/s]

{'loss': Array(0.13981955, dtype=float32), 'loss_cross_entropy': Array(0.13130157, dtype=float32)}
{'loss_inverse': Array(1.2000509e-05, dtype=float32)}


  8%|▊         | 76440/1000000 [3:36:57<38:38:11,  6.64it/s]

{'loss': Array(0.13638347, dtype=float32), 'loss_cross_entropy': Array(0.12826017, dtype=float32)}
{'loss_inverse': Array(9.938186e-06, dtype=float32)}


  8%|▊         | 76450/1000000 [3:36:59<27:13:47,  9.42it/s]

{'loss': Array(0.14896667, dtype=float32), 'loss_cross_entropy': Array(0.14048545, dtype=float32)}
{'loss_inverse': Array(1.15812e-07, dtype=float32)}


  8%|▊         | 76460/1000000 [3:37:00<25:29:37, 10.06it/s]

{'loss': Array(0.13722746, dtype=float32), 'loss_cross_entropy': Array(0.12894784, dtype=float32)}
{'loss_inverse': Array(1.7852101e-06, dtype=float32)}


  8%|▊         | 76470/1000000 [3:37:02<31:44:40,  8.08it/s]

{'loss': Array(0.13362288, dtype=float32), 'loss_cross_entropy': Array(0.12581158, dtype=float32)}
{'loss_inverse': Array(1.0320418e-05, dtype=float32)}


  8%|▊         | 76480/1000000 [3:37:03<26:17:28,  9.76it/s]

{'loss': Array(0.14730696, dtype=float32), 'loss_cross_entropy': Array(0.13877316, dtype=float32)}
{'loss_inverse': Array(2.3113969e-06, dtype=float32)}


  8%|▊         | 76490/1000000 [3:37:05<37:18:09,  6.88it/s]

{'loss': Array(0.13584118, dtype=float32), 'loss_cross_entropy': Array(0.12770112, dtype=float32)}
{'loss_inverse': Array(1.0493065e-06, dtype=float32)}


  8%|▊         | 76500/1000000 [3:37:07<27:33:12,  9.31it/s]

{'loss': Array(0.13808405, dtype=float32), 'loss_cross_entropy': Array(0.12977867, dtype=float32)}
{'loss_inverse': Array(5.5258206e-06, dtype=float32)}


  8%|▊         | 76510/1000000 [3:37:15<99:38:49,  2.57it/s] 

{'loss': Array(0.13879283, dtype=float32), 'loss_cross_entropy': Array(0.13044877, dtype=float32)}
{'loss_inverse': Array(1.2105003e-05, dtype=float32)}


  8%|▊         | 76520/1000000 [3:37:16<37:50:20,  6.78it/s] 

{'loss': Array(0.15853249, dtype=float32), 'loss_cross_entropy': Array(0.14963543, dtype=float32)}
{'loss_inverse': Array(8.547034e-06, dtype=float32)}


  8%|▊         | 76530/1000000 [3:37:18<26:56:34,  9.52it/s]

{'loss': Array(0.16666965, dtype=float32), 'loss_cross_entropy': Array(0.15778117, dtype=float32)}
{'loss_inverse': Array(9.566257e-06, dtype=float32)}


  8%|▊         | 76540/1000000 [3:37:19<29:29:50,  8.70it/s]

{'loss': Array(0.14824794, dtype=float32), 'loss_cross_entropy': Array(0.13973054, dtype=float32)}
{'loss_inverse': Array(4.2523725e-06, dtype=float32)}


  8%|▊         | 76550/1000000 [3:37:21<26:35:45,  9.64it/s]

{'loss': Array(0.15564589, dtype=float32), 'loss_cross_entropy': Array(0.14689939, dtype=float32)}
{'loss_inverse': Array(1.6736748e-06, dtype=float32)}


  8%|▊         | 76560/1000000 [3:37:23<38:40:03,  6.63it/s]

{'loss': Array(0.166447, dtype=float32), 'loss_cross_entropy': Array(0.15688281, dtype=float32)}
{'loss_inverse': Array(1.830711e-06, dtype=float32)}


  8%|▊         | 76570/1000000 [3:37:24<27:04:57,  9.47it/s]

{'loss': Array(0.15107062, dtype=float32), 'loss_cross_entropy': Array(0.14177442, dtype=float32)}
{'loss_inverse': Array(3.7582802e-06, dtype=float32)}


  8%|▊         | 76580/1000000 [3:37:25<25:02:13, 10.25it/s]

{'loss': Array(0.13039468, dtype=float32), 'loss_cross_entropy': Array(0.122101, dtype=float32)}
{'loss_inverse': Array(1.9415234e-05, dtype=float32)}


  8%|▊         | 76590/1000000 [3:37:27<29:25:26,  8.72it/s]

{'loss': Array(0.15790676, dtype=float32), 'loss_cross_entropy': Array(0.14977007, dtype=float32)}
{'loss_inverse': Array(2.062013e-07, dtype=float32)}


  8%|▊         | 76600/1000000 [3:37:28<25:15:50, 10.15it/s]

{'loss': Array(0.15417846, dtype=float32), 'loss_cross_entropy': Array(0.1449785, dtype=float32)}
{'loss_inverse': Array(3.409602e-07, dtype=float32)}


  8%|▊         | 76610/1000000 [3:37:30<30:31:01,  8.41it/s]

{'loss': Array(0.1448498, dtype=float32), 'loss_cross_entropy': Array(0.13676153, dtype=float32)}
{'loss_inverse': Array(3.748073e-08, dtype=float32)}


  8%|▊         | 76620/1000000 [3:37:32<26:18:17,  9.75it/s]

{'loss': Array(0.15678795, dtype=float32), 'loss_cross_entropy': Array(0.14822307, dtype=float32)}
{'loss_inverse': Array(4.11551e-06, dtype=float32)}


  8%|▊         | 76630/1000000 [3:37:34<32:08:45,  7.98it/s]

{'loss': Array(0.14661025, dtype=float32), 'loss_cross_entropy': Array(0.13808541, dtype=float32)}
{'loss_inverse': Array(2.8525753e-06, dtype=float32)}


  8%|▊         | 76640/1000000 [3:37:35<26:09:27,  9.81it/s]

{'loss': Array(0.1649965, dtype=float32), 'loss_cross_entropy': Array(0.15577748, dtype=float32)}
{'loss_inverse': Array(2.6538618e-08, dtype=float32)}


  8%|▊         | 76650/1000000 [3:37:36<24:53:16, 10.31it/s]

{'loss': Array(0.1424658, dtype=float32), 'loss_cross_entropy': Array(0.13487701, dtype=float32)}
{'loss_inverse': Array(2.5252006e-05, dtype=float32)}


  8%|▊         | 76660/1000000 [3:37:38<27:43:08,  9.25it/s]

{'loss': Array(0.15819454, dtype=float32), 'loss_cross_entropy': Array(0.14897229, dtype=float32)}
{'loss_inverse': Array(3.5734708e-06, dtype=float32)}


  8%|▊         | 76670/1000000 [3:37:40<26:21:15,  9.73it/s]

{'loss': Array(0.14884643, dtype=float32), 'loss_cross_entropy': Array(0.1403282, dtype=float32)}
{'loss_inverse': Array(9.783348e-05, dtype=float32)}


  8%|▊         | 76680/1000000 [3:37:41<30:05:06,  8.53it/s]

{'loss': Array(0.14985257, dtype=float32), 'loss_cross_entropy': Array(0.14132406, dtype=float32)}
{'loss_inverse': Array(5.6064454e-07, dtype=float32)}


  8%|▊         | 76690/1000000 [3:37:43<26:05:53,  9.83it/s]

{'loss': Array(0.11868253, dtype=float32), 'loss_cross_entropy': Array(0.11075138, dtype=float32)}
{'loss_inverse': Array(8.7111934e-05, dtype=float32)}


  8%|▊         | 76700/1000000 [3:37:44<31:54:45,  8.04it/s]

{'loss': Array(0.14514688, dtype=float32), 'loss_cross_entropy': Array(0.13702057, dtype=float32)}
{'loss_inverse': Array(9.103645e-05, dtype=float32)}


  8%|▊         | 76710/1000000 [3:37:46<26:42:51,  9.60it/s]

{'loss': Array(0.1406521, dtype=float32), 'loss_cross_entropy': Array(0.13245717, dtype=float32)}
{'loss_inverse': Array(6.2506166e-05, dtype=float32)}


  8%|▊         | 76720/1000000 [3:37:47<25:22:42, 10.11it/s]

{'loss': Array(0.15668295, dtype=float32), 'loss_cross_entropy': Array(0.14789453, dtype=float32)}
{'loss_inverse': Array(2.3336686e-05, dtype=float32)}


  8%|▊         | 76730/1000000 [3:37:49<29:31:54,  8.68it/s]

{'loss': Array(0.14191182, dtype=float32), 'loss_cross_entropy': Array(0.13357903, dtype=float32)}
{'loss_inverse': Array(0.00025926, dtype=float32)}


  8%|▊         | 76740/1000000 [3:37:51<26:04:31,  9.84it/s]

{'loss': Array(0.14537728, dtype=float32), 'loss_cross_entropy': Array(0.1374235, dtype=float32)}
{'loss_inverse': Array(4.465191e-06, dtype=float32)}


  8%|▊         | 76750/1000000 [3:37:52<31:43:09,  8.09it/s]

{'loss': Array(0.14177148, dtype=float32), 'loss_cross_entropy': Array(0.13361369, dtype=float32)}
{'loss_inverse': Array(2.5212855e-06, dtype=float32)}


  8%|▊         | 76760/1000000 [3:37:54<26:45:38,  9.58it/s]

{'loss': Array(0.1367109, dtype=float32), 'loss_cross_entropy': Array(0.1283897, dtype=float32)}
{'loss_inverse': Array(0.00031737, dtype=float32)}


  8%|▊         | 76770/1000000 [3:37:55<34:09:04,  7.51it/s]

{'loss': Array(0.14393757, dtype=float32), 'loss_cross_entropy': Array(0.13563737, dtype=float32)}
{'loss_inverse': Array(1.1188936e-06, dtype=float32)}


  8%|▊         | 76780/1000000 [3:37:57<26:38:11,  9.63it/s]

{'loss': Array(0.14106892, dtype=float32), 'loss_cross_entropy': Array(0.13250081, dtype=float32)}
{'loss_inverse': Array(0.00016897, dtype=float32)}


  8%|▊         | 76790/1000000 [3:37:58<25:00:17, 10.26it/s]

{'loss': Array(0.15092443, dtype=float32), 'loss_cross_entropy': Array(0.14295644, dtype=float32)}
{'loss_inverse': Array(3.4562447e-05, dtype=float32)}


  8%|▊         | 76800/1000000 [3:38:00<28:00:07,  9.16it/s]

{'loss': Array(0.1390771, dtype=float32), 'loss_cross_entropy': Array(0.13075456, dtype=float32)}
{'loss_inverse': Array(5.0120918e-05, dtype=float32)}


  8%|▊         | 76810/1000000 [3:38:01<25:33:35, 10.03it/s]

{'loss': Array(0.14695773, dtype=float32), 'loss_cross_entropy': Array(0.13796091, dtype=float32)}
{'loss_inverse': Array(0.0001884, dtype=float32)}


  8%|▊         | 76820/1000000 [3:38:03<34:49:30,  7.36it/s]

{'loss': Array(0.14796726, dtype=float32), 'loss_cross_entropy': Array(0.13937554, dtype=float32)}
{'loss_inverse': Array(3.1755615e-05, dtype=float32)}


  8%|▊         | 76830/1000000 [3:38:05<26:35:27,  9.64it/s]

{'loss': Array(0.1366759, dtype=float32), 'loss_cross_entropy': Array(0.12819171, dtype=float32)}
{'loss_inverse': Array(8.847775e-06, dtype=float32)}


  8%|▊         | 76840/1000000 [3:38:06<26:04:59,  9.83it/s]

{'loss': Array(0.14736652, dtype=float32), 'loss_cross_entropy': Array(0.13932782, dtype=float32)}
{'loss_inverse': Array(7.749763e-07, dtype=float32)}


  8%|▊         | 76850/1000000 [3:38:08<29:05:17,  8.82it/s]

{'loss': Array(0.1532559, dtype=float32), 'loss_cross_entropy': Array(0.1443795, dtype=float32)}
{'loss_inverse': Array(8.768597e-07, dtype=float32)}


  8%|▊         | 76860/1000000 [3:38:09<26:34:26,  9.65it/s]

{'loss': Array(0.14093502, dtype=float32), 'loss_cross_entropy': Array(0.1327779, dtype=float32)}
{'loss_inverse': Array(0.00012632, dtype=float32)}


  8%|▊         | 76870/1000000 [3:38:11<29:51:46,  8.59it/s]

{'loss': Array(0.14353804, dtype=float32), 'loss_cross_entropy': Array(0.13511412, dtype=float32)}
{'loss_inverse': Array(2.9813434e-06, dtype=float32)}


  8%|▊         | 76880/1000000 [3:38:13<25:55:35,  9.89it/s]

{'loss': Array(0.14978124, dtype=float32), 'loss_cross_entropy': Array(0.14122127, dtype=float32)}
{'loss_inverse': Array(2.8844332e-05, dtype=float32)}


  8%|▊         | 76890/1000000 [3:38:14<31:41:14,  8.09it/s]

{'loss': Array(0.1425368, dtype=float32), 'loss_cross_entropy': Array(0.13387768, dtype=float32)}
{'loss_inverse': Array(0.00041254, dtype=float32)}


  8%|▊         | 76900/1000000 [3:38:16<26:44:13,  9.59it/s]

{'loss': Array(0.16084827, dtype=float32), 'loss_cross_entropy': Array(0.15155022, dtype=float32)}
{'loss_inverse': Array(0.00023915, dtype=float32)}


  8%|▊         | 76910/1000000 [3:38:17<25:26:09, 10.08it/s]

{'loss': Array(0.15043174, dtype=float32), 'loss_cross_entropy': Array(0.14189647, dtype=float32)}
{'loss_inverse': Array(3.1580854e-05, dtype=float32)}


  8%|▊         | 76920/1000000 [3:38:19<29:33:53,  8.67it/s]

{'loss': Array(0.15484767, dtype=float32), 'loss_cross_entropy': Array(0.14568631, dtype=float32)}
{'loss_inverse': Array(1.3974337e-06, dtype=float32)}


  8%|▊         | 76930/1000000 [3:38:20<25:23:52, 10.10it/s]

{'loss': Array(0.15671504, dtype=float32), 'loss_cross_entropy': Array(0.14768869, dtype=float32)}
{'loss_inverse': Array(4.6066583e-05, dtype=float32)}


  8%|▊         | 76940/1000000 [3:38:22<34:33:00,  7.42it/s]

{'loss': Array(0.14012223, dtype=float32), 'loss_cross_entropy': Array(0.13149, dtype=float32)}
{'loss_inverse': Array(0.00071628, dtype=float32)}


  8%|▊         | 76950/1000000 [3:38:24<27:01:47,  9.49it/s]

{'loss': Array(0.15556231, dtype=float32), 'loss_cross_entropy': Array(0.14696243, dtype=float32)}
{'loss_inverse': Array(7.4207833e-06, dtype=float32)}


  8%|▊         | 76960/1000000 [3:38:25<37:55:38,  6.76it/s]

{'loss': Array(0.12966706, dtype=float32), 'loss_cross_entropy': Array(0.12174969, dtype=float32)}
{'loss_inverse': Array(0.0002784, dtype=float32)}


  8%|▊         | 76970/1000000 [3:38:27<27:44:10,  9.24it/s]

{'loss': Array(0.1345187, dtype=float32), 'loss_cross_entropy': Array(0.12609284, dtype=float32)}
{'loss_inverse': Array(2.1949388e-06, dtype=float32)}


  8%|▊         | 76980/1000000 [3:38:28<37:58:25,  6.75it/s]

{'loss': Array(0.17077647, dtype=float32), 'loss_cross_entropy': Array(0.16151169, dtype=float32)}
{'loss_inverse': Array(3.7634905e-05, dtype=float32)}


  8%|▊         | 76990/1000000 [3:38:30<27:49:48,  9.21it/s]

{'loss': Array(0.1565065, dtype=float32), 'loss_cross_entropy': Array(0.14687993, dtype=float32)}
{'loss_inverse': Array(1.4350034e-07, dtype=float32)}


  8%|▊         | 77000/1000000 [3:38:31<25:44:48,  9.96it/s]

{'loss': Array(0.13147645, dtype=float32), 'loss_cross_entropy': Array(0.12378631, dtype=float32)}
{'loss_inverse': Array(0.00014049, dtype=float32)}


  8%|▊         | 77010/1000000 [3:38:40<93:42:13,  2.74it/s] 

{'loss': Array(0.14575855, dtype=float32), 'loss_cross_entropy': Array(0.13769847, dtype=float32)}
{'loss_inverse': Array(4.7687985e-07, dtype=float32)}


  8%|▊         | 77020/1000000 [3:38:41<36:24:42,  7.04it/s]

{'loss': Array(0.12459306, dtype=float32), 'loss_cross_entropy': Array(0.117121, dtype=float32)}
{'loss_inverse': Array(1.9404372e-06, dtype=float32)}


  8%|▊         | 77030/1000000 [3:38:42<27:11:27,  9.43it/s]

{'loss': Array(0.14090225, dtype=float32), 'loss_cross_entropy': Array(0.13319384, dtype=float32)}
{'loss_inverse': Array(5.097268e-05, dtype=float32)}


  8%|▊         | 77040/1000000 [3:38:44<28:19:22,  9.05it/s]

{'loss': Array(0.15546313, dtype=float32), 'loss_cross_entropy': Array(0.14661019, dtype=float32)}
{'loss_inverse': Array(7.5181435e-07, dtype=float32)}


  8%|▊         | 77050/1000000 [3:38:46<26:07:54,  9.81it/s]

{'loss': Array(0.15575266, dtype=float32), 'loss_cross_entropy': Array(0.14594309, dtype=float32)}
{'loss_inverse': Array(6.009758e-06, dtype=float32)}


  8%|▊         | 77060/1000000 [3:38:47<29:17:34,  8.75it/s]

{'loss': Array(0.14209089, dtype=float32), 'loss_cross_entropy': Array(0.1332594, dtype=float32)}
{'loss_inverse': Array(3.7808986e-05, dtype=float32)}


  8%|▊         | 77070/1000000 [3:38:49<25:58:48,  9.87it/s]

{'loss': Array(0.15941761, dtype=float32), 'loss_cross_entropy': Array(0.15069011, dtype=float32)}
{'loss_inverse': Array(0.00018775, dtype=float32)}


  8%|▊         | 77080/1000000 [3:38:50<29:27:39,  8.70it/s]

{'loss': Array(0.16342187, dtype=float32), 'loss_cross_entropy': Array(0.1546223, dtype=float32)}
{'loss_inverse': Array(2.0668787e-05, dtype=float32)}


  8%|▊         | 77090/1000000 [3:38:52<26:03:16,  9.84it/s]

{'loss': Array(0.15941341, dtype=float32), 'loss_cross_entropy': Array(0.1507237, dtype=float32)}
{'loss_inverse': Array(3.3244748e-06, dtype=float32)}


  8%|▊         | 77100/1000000 [3:38:54<30:05:24,  8.52it/s]

{'loss': Array(0.1324702, dtype=float32), 'loss_cross_entropy': Array(0.12428413, dtype=float32)}
{'loss_inverse': Array(6.265966e-08, dtype=float32)}


  8%|▊         | 77110/1000000 [3:38:55<25:35:52, 10.01it/s]

{'loss': Array(0.15996464, dtype=float32), 'loss_cross_entropy': Array(0.15033184, dtype=float32)}
{'loss_inverse': Array(1.2105836e-07, dtype=float32)}


  8%|▊         | 77120/1000000 [3:38:57<34:06:45,  7.51it/s]

{'loss': Array(0.15530305, dtype=float32), 'loss_cross_entropy': Array(0.14611773, dtype=float32)}
{'loss_inverse': Array(6.44398e-05, dtype=float32)}


  8%|▊         | 77130/1000000 [3:38:58<26:38:27,  9.62it/s]

{'loss': Array(0.13794231, dtype=float32), 'loss_cross_entropy': Array(0.1295607, dtype=float32)}
{'loss_inverse': Array(9.161841e-05, dtype=float32)}


  8%|▊         | 77140/1000000 [3:39:00<37:35:44,  6.82it/s]

{'loss': Array(0.1420655, dtype=float32), 'loss_cross_entropy': Array(0.1341035, dtype=float32)}
{'loss_inverse': Array(1.9549982e-07, dtype=float32)}


  8%|▊         | 77150/1000000 [3:39:01<26:52:26,  9.54it/s]

{'loss': Array(0.15191756, dtype=float32), 'loss_cross_entropy': Array(0.14320575, dtype=float32)}
{'loss_inverse': Array(8.1226084e-05, dtype=float32)}


  8%|▊         | 77160/1000000 [3:39:03<35:05:25,  7.31it/s]

{'loss': Array(0.1493047, dtype=float32), 'loss_cross_entropy': Array(0.14044209, dtype=float32)}
{'loss_inverse': Array(0.00012078, dtype=float32)}


  8%|▊         | 77170/1000000 [3:39:05<28:03:00,  9.14it/s]

{'loss': Array(0.15596667, dtype=float32), 'loss_cross_entropy': Array(0.14763156, dtype=float32)}
{'loss_inverse': Array(7.824808e-06, dtype=float32)}


  8%|▊         | 77180/1000000 [3:39:06<27:03:32,  9.47it/s]

{'loss': Array(0.1455281, dtype=float32), 'loss_cross_entropy': Array(0.13693051, dtype=float32)}
{'loss_inverse': Array(3.2010947e-05, dtype=float32)}


  8%|▊         | 77190/1000000 [3:39:08<30:18:38,  8.46it/s]

{'loss': Array(0.1380832, dtype=float32), 'loss_cross_entropy': Array(0.13025866, dtype=float32)}
{'loss_inverse': Array(9.720086e-07, dtype=float32)}


  8%|▊         | 77200/1000000 [3:39:09<25:48:06,  9.93it/s]

{'loss': Array(0.15365897, dtype=float32), 'loss_cross_entropy': Array(0.14535484, dtype=float32)}
{'loss_inverse': Array(1.4582546e-05, dtype=float32)}


  8%|▊         | 77210/1000000 [3:39:11<31:59:36,  8.01it/s]

{'loss': Array(0.14626703, dtype=float32), 'loss_cross_entropy': Array(0.13778429, dtype=float32)}
{'loss_inverse': Array(2.8804577e-06, dtype=float32)}


  8%|▊         | 77220/1000000 [3:39:13<26:11:58,  9.78it/s]

{'loss': Array(0.14906608, dtype=float32), 'loss_cross_entropy': Array(0.14076546, dtype=float32)}
{'loss_inverse': Array(3.865553e-06, dtype=float32)}


  8%|▊         | 77230/1000000 [3:39:14<34:50:41,  7.36it/s]

{'loss': Array(0.14701606, dtype=float32), 'loss_cross_entropy': Array(0.1388341, dtype=float32)}
{'loss_inverse': Array(2.5159343e-05, dtype=float32)}


  8%|▊         | 77240/1000000 [3:39:16<26:51:39,  9.54it/s]

{'loss': Array(0.16862796, dtype=float32), 'loss_cross_entropy': Array(0.1593048, dtype=float32)}
{'loss_inverse': Array(0.00026824, dtype=float32)}


  8%|▊         | 77250/1000000 [3:39:18<34:53:22,  7.35it/s]

{'loss': Array(0.13632937, dtype=float32), 'loss_cross_entropy': Array(0.12862524, dtype=float32)}
{'loss_inverse': Array(3.012791e-06, dtype=float32)}


  8%|▊         | 77260/1000000 [3:39:19<26:46:52,  9.57it/s]

{'loss': Array(0.16691594, dtype=float32), 'loss_cross_entropy': Array(0.1579242, dtype=float32)}
{'loss_inverse': Array(1.1136427e-05, dtype=float32)}


  8%|▊         | 77270/1000000 [3:39:20<25:45:20,  9.95it/s]

{'loss': Array(0.15389924, dtype=float32), 'loss_cross_entropy': Array(0.14526199, dtype=float32)}
{'loss_inverse': Array(4.067273e-05, dtype=float32)}


  8%|▊         | 77280/1000000 [3:39:22<31:44:19,  8.08it/s]

{'loss': Array(0.1586804, dtype=float32), 'loss_cross_entropy': Array(0.15002467, dtype=float32)}
{'loss_inverse': Array(1.8039096e-06, dtype=float32)}


  8%|▊         | 77290/1000000 [3:39:24<27:08:56,  9.44it/s]

{'loss': Array(0.16497175, dtype=float32), 'loss_cross_entropy': Array(0.15577906, dtype=float32)}
{'loss_inverse': Array(7.944087e-06, dtype=float32)}


  8%|▊         | 77300/1000000 [3:39:25<38:27:17,  6.67it/s]

{'loss': Array(0.16053782, dtype=float32), 'loss_cross_entropy': Array(0.15131305, dtype=float32)}
{'loss_inverse': Array(3.882039e-06, dtype=float32)}


  8%|▊         | 77310/1000000 [3:39:27<27:20:45,  9.37it/s]

{'loss': Array(0.14346288, dtype=float32), 'loss_cross_entropy': Array(0.1351562, dtype=float32)}
{'loss_inverse': Array(5.1152656e-06, dtype=float32)}


  8%|▊         | 77320/1000000 [3:39:28<25:30:13, 10.05it/s]

{'loss': Array(0.1509632, dtype=float32), 'loss_cross_entropy': Array(0.14250158, dtype=float32)}
{'loss_inverse': Array(2.0250666e-07, dtype=float32)}


  8%|▊         | 77330/1000000 [3:39:30<28:49:57,  8.89it/s]

{'loss': Array(0.1482036, dtype=float32), 'loss_cross_entropy': Array(0.13993795, dtype=float32)}
{'loss_inverse': Array(1.652505e-05, dtype=float32)}


  8%|▊         | 77340/1000000 [3:39:31<25:55:26,  9.89it/s]

{'loss': Array(0.13109764, dtype=float32), 'loss_cross_entropy': Array(0.12367546, dtype=float32)}
{'loss_inverse': Array(1.9369297e-06, dtype=float32)}


  8%|▊         | 77350/1000000 [3:39:33<28:05:50,  9.12it/s]

{'loss': Array(0.1362182, dtype=float32), 'loss_cross_entropy': Array(0.12890737, dtype=float32)}
{'loss_inverse': Array(6.581253e-07, dtype=float32)}


  8%|▊         | 77360/1000000 [3:39:35<25:30:59, 10.04it/s]

{'loss': Array(0.12730967, dtype=float32), 'loss_cross_entropy': Array(0.1192665, dtype=float32)}
{'loss_inverse': Array(3.6746545e-07, dtype=float32)}


  8%|▊         | 77370/1000000 [3:39:36<31:28:23,  8.14it/s]

{'loss': Array(0.15581173, dtype=float32), 'loss_cross_entropy': Array(0.14759254, dtype=float32)}
{'loss_inverse': Array(9.133842e-06, dtype=float32)}


  8%|▊         | 77380/1000000 [3:39:38<26:07:19,  9.81it/s]

{'loss': Array(0.13719219, dtype=float32), 'loss_cross_entropy': Array(0.1293435, dtype=float32)}
{'loss_inverse': Array(1.2513085e-05, dtype=float32)}


  8%|▊         | 77390/1000000 [3:39:40<31:43:50,  8.08it/s]

{'loss': Array(0.14162704, dtype=float32), 'loss_cross_entropy': Array(0.13339454, dtype=float32)}
{'loss_inverse': Array(2.0947452e-06, dtype=float32)}


  8%|▊         | 77400/1000000 [3:39:41<25:57:06,  9.88it/s]

{'loss': Array(0.1323024, dtype=float32), 'loss_cross_entropy': Array(0.1244721, dtype=float32)}
{'loss_inverse': Array(4.3368618e-07, dtype=float32)}


  8%|▊         | 77410/1000000 [3:39:43<38:23:56,  6.67it/s]

{'loss': Array(0.14855199, dtype=float32), 'loss_cross_entropy': Array(0.1400904, dtype=float32)}
{'loss_inverse': Array(3.5124733e-06, dtype=float32)}


  8%|▊         | 77420/1000000 [3:39:44<27:22:30,  9.36it/s]

{'loss': Array(0.1380456, dtype=float32), 'loss_cross_entropy': Array(0.1303279, dtype=float32)}
{'loss_inverse': Array(0.0001041, dtype=float32)}


  8%|▊         | 77430/1000000 [3:39:46<25:27:06, 10.07it/s]

{'loss': Array(0.14324467, dtype=float32), 'loss_cross_entropy': Array(0.13457192, dtype=float32)}
{'loss_inverse': Array(5.319159e-07, dtype=float32)}


  8%|▊         | 77440/1000000 [3:39:47<28:17:52,  9.06it/s]

{'loss': Array(0.13801797, dtype=float32), 'loss_cross_entropy': Array(0.13053861, dtype=float32)}
{'loss_inverse': Array(2.0752918e-07, dtype=float32)}


  8%|▊         | 77450/1000000 [3:39:49<25:36:13, 10.01it/s]

{'loss': Array(0.1328599, dtype=float32), 'loss_cross_entropy': Array(0.12490974, dtype=float32)}
{'loss_inverse': Array(3.911857e-05, dtype=float32)}


  8%|▊         | 77460/1000000 [3:39:50<31:18:56,  8.18it/s]

{'loss': Array(0.15585387, dtype=float32), 'loss_cross_entropy': Array(0.14700238, dtype=float32)}
{'loss_inverse': Array(9.2724224e-07, dtype=float32)}


  8%|▊         | 77470/1000000 [3:39:52<25:56:56,  9.88it/s]

{'loss': Array(0.13182111, dtype=float32), 'loss_cross_entropy': Array(0.12406465, dtype=float32)}
{'loss_inverse': Array(0.00017164, dtype=float32)}


  8%|▊         | 77480/1000000 [3:39:53<25:40:15,  9.98it/s]

{'loss': Array(0.15014854, dtype=float32), 'loss_cross_entropy': Array(0.14116062, dtype=float32)}
{'loss_inverse': Array(2.0639205e-05, dtype=float32)}


  8%|▊         | 77490/1000000 [3:39:55<29:41:41,  8.63it/s]

{'loss': Array(0.1474434, dtype=float32), 'loss_cross_entropy': Array(0.13920347, dtype=float32)}
{'loss_inverse': Array(1.3620811e-06, dtype=float32)}


  8%|▊         | 77500/1000000 [3:39:56<25:45:08,  9.95it/s]

{'loss': Array(0.12354034, dtype=float32), 'loss_cross_entropy': Array(0.11559159, dtype=float32)}
{'loss_inverse': Array(7.3825627e-06, dtype=float32)}


  8%|▊         | 77510/1000000 [3:40:05<94:58:35,  2.70it/s] 

{'loss': Array(0.13384013, dtype=float32), 'loss_cross_entropy': Array(0.12604323, dtype=float32)}
{'loss_inverse': Array(7.490351e-06, dtype=float32)}


  8%|▊         | 77520/1000000 [3:40:06<38:01:42,  6.74it/s] 

{'loss': Array(0.14233224, dtype=float32), 'loss_cross_entropy': Array(0.13423581, dtype=float32)}
{'loss_inverse': Array(5.117796e-07, dtype=float32)}


  8%|▊         | 77530/1000000 [3:40:08<40:32:21,  6.32it/s]

{'loss': Array(0.15483192, dtype=float32), 'loss_cross_entropy': Array(0.14594299, dtype=float32)}
{'loss_inverse': Array(8.9609847e-07, dtype=float32)}


  8%|▊         | 77540/1000000 [3:40:09<27:25:20,  9.34it/s]

{'loss': Array(0.16760097, dtype=float32), 'loss_cross_entropy': Array(0.1584537, dtype=float32)}
{'loss_inverse': Array(1.05030814e-07, dtype=float32)}


  8%|▊         | 77550/1000000 [3:40:11<25:23:02, 10.09it/s]

{'loss': Array(0.14125669, dtype=float32), 'loss_cross_entropy': Array(0.13308494, dtype=float32)}
{'loss_inverse': Array(2.0357074e-06, dtype=float32)}


  8%|▊         | 77560/1000000 [3:40:13<28:41:53,  8.93it/s]

{'loss': Array(0.13028784, dtype=float32), 'loss_cross_entropy': Array(0.12299142, dtype=float32)}
{'loss_inverse': Array(2.885164e-06, dtype=float32)}


  8%|▊         | 77570/1000000 [3:40:14<38:30:14,  6.65it/s]

{'loss': Array(0.14710562, dtype=float32), 'loss_cross_entropy': Array(0.13837098, dtype=float32)}
{'loss_inverse': Array(3.961192e-06, dtype=float32)}


  8%|▊         | 77580/1000000 [3:40:16<27:17:59,  9.39it/s]

{'loss': Array(0.1502748, dtype=float32), 'loss_cross_entropy': Array(0.14184998, dtype=float32)}
{'loss_inverse': Array(1.3275607e-06, dtype=float32)}


  8%|▊         | 77590/1000000 [3:40:17<25:28:38, 10.06it/s]

{'loss': Array(0.14805725, dtype=float32), 'loss_cross_entropy': Array(0.13907397, dtype=float32)}
{'loss_inverse': Array(0.00045636, dtype=float32)}


  8%|▊         | 77600/1000000 [3:40:19<28:56:07,  8.86it/s]

{'loss': Array(0.14531974, dtype=float32), 'loss_cross_entropy': Array(0.1373288, dtype=float32)}
{'loss_inverse': Array(3.012536e-07, dtype=float32)}


  8%|▊         | 77610/1000000 [3:40:20<25:22:24, 10.10it/s]

{'loss': Array(0.13465871, dtype=float32), 'loss_cross_entropy': Array(0.1267166, dtype=float32)}
{'loss_inverse': Array(4.8731596e-05, dtype=float32)}


  8%|▊         | 77620/1000000 [3:40:22<29:42:55,  8.62it/s]

{'loss': Array(0.14296381, dtype=float32), 'loss_cross_entropy': Array(0.13496116, dtype=float32)}
{'loss_inverse': Array(2.9240703e-06, dtype=float32)}


  8%|▊         | 77630/1000000 [3:40:24<26:08:50,  9.80it/s]

{'loss': Array(0.14057845, dtype=float32), 'loss_cross_entropy': Array(0.1323365, dtype=float32)}
{'loss_inverse': Array(7.045587e-05, dtype=float32)}


  8%|▊         | 77640/1000000 [3:40:25<31:23:05,  8.16it/s]

{'loss': Array(0.12179822, dtype=float32), 'loss_cross_entropy': Array(0.11445945, dtype=float32)}
{'loss_inverse': Array(3.0146286e-06, dtype=float32)}


  8%|▊         | 77650/1000000 [3:40:27<26:01:50,  9.84it/s]

{'loss': Array(0.13525113, dtype=float32), 'loss_cross_entropy': Array(0.12721811, dtype=float32)}
{'loss_inverse': Array(1.5105796e-07, dtype=float32)}


  8%|▊         | 77660/1000000 [3:40:28<25:13:17, 10.16it/s]

{'loss': Array(0.15322714, dtype=float32), 'loss_cross_entropy': Array(0.14503275, dtype=float32)}
{'loss_inverse': Array(0.00017529, dtype=float32)}


  8%|▊         | 77670/1000000 [3:40:30<29:39:01,  8.64it/s]

{'loss': Array(0.16085431, dtype=float32), 'loss_cross_entropy': Array(0.1518289, dtype=float32)}
{'loss_inverse': Array(5.443569e-06, dtype=float32)}


  8%|▊         | 77680/1000000 [3:40:31<25:45:02,  9.95it/s]

{'loss': Array(0.14800702, dtype=float32), 'loss_cross_entropy': Array(0.13955414, dtype=float32)}
{'loss_inverse': Array(2.6658685e-05, dtype=float32)}


  8%|▊         | 77690/1000000 [3:40:33<31:48:20,  8.06it/s]

{'loss': Array(0.15108466, dtype=float32), 'loss_cross_entropy': Array(0.14172421, dtype=float32)}
{'loss_inverse': Array(5.7556677e-05, dtype=float32)}


  8%|▊         | 77700/1000000 [3:40:35<26:04:24,  9.83it/s]

{'loss': Array(0.13690029, dtype=float32), 'loss_cross_entropy': Array(0.12857531, dtype=float32)}
{'loss_inverse': Array(4.002516e-05, dtype=float32)}


  8%|▊         | 77710/1000000 [3:40:36<34:22:03,  7.45it/s]

{'loss': Array(0.14156716, dtype=float32), 'loss_cross_entropy': Array(0.1338593, dtype=float32)}
{'loss_inverse': Array(1.0096023e-05, dtype=float32)}


  8%|▊         | 77720/1000000 [3:40:38<26:45:41,  9.57it/s]

{'loss': Array(0.14073122, dtype=float32), 'loss_cross_entropy': Array(0.13243756, dtype=float32)}
{'loss_inverse': Array(0.00014179, dtype=float32)}


  8%|▊         | 77730/1000000 [3:40:39<24:55:36, 10.28it/s]

{'loss': Array(0.13786232, dtype=float32), 'loss_cross_entropy': Array(0.13026212, dtype=float32)}
{'loss_inverse': Array(2.3360778e-06, dtype=float32)}


  8%|▊         | 77740/1000000 [3:40:41<27:40:09,  9.26it/s]

{'loss': Array(0.12696539, dtype=float32), 'loss_cross_entropy': Array(0.1188887, dtype=float32)}
{'loss_inverse': Array(2.1843034e-05, dtype=float32)}


  8%|▊         | 77750/1000000 [3:40:42<25:28:32, 10.06it/s]

{'loss': Array(0.14956956, dtype=float32), 'loss_cross_entropy': Array(0.14097916, dtype=float32)}
{'loss_inverse': Array(6.684404e-07, dtype=float32)}


  8%|▊         | 77760/1000000 [3:40:44<33:37:13,  7.62it/s]

{'loss': Array(0.15000033, dtype=float32), 'loss_cross_entropy': Array(0.14189006, dtype=float32)}
{'loss_inverse': Array(5.8595706e-06, dtype=float32)}


  8%|▊         | 77770/1000000 [3:40:45<26:18:31,  9.74it/s]

{'loss': Array(0.14368413, dtype=float32), 'loss_cross_entropy': Array(0.1358351, dtype=float32)}
{'loss_inverse': Array(4.0368133e-05, dtype=float32)}


  8%|▊         | 77780/1000000 [3:40:47<25:29:18, 10.05it/s]

{'loss': Array(0.13449606, dtype=float32), 'loss_cross_entropy': Array(0.12673305, dtype=float32)}
{'loss_inverse': Array(1.0988548e-06, dtype=float32)}


  8%|▊         | 77790/1000000 [3:40:49<28:37:02,  8.95it/s]

{'loss': Array(0.13943519, dtype=float32), 'loss_cross_entropy': Array(0.13150513, dtype=float32)}
{'loss_inverse': Array(2.6512973e-05, dtype=float32)}


  8%|▊         | 77800/1000000 [3:40:50<25:28:27, 10.06it/s]

{'loss': Array(0.13374506, dtype=float32), 'loss_cross_entropy': Array(0.12618113, dtype=float32)}
{'loss_inverse': Array(7.623494e-05, dtype=float32)}


  8%|▊         | 77810/1000000 [3:40:52<29:27:12,  8.70it/s]

{'loss': Array(0.13885291, dtype=float32), 'loss_cross_entropy': Array(0.13062893, dtype=float32)}
{'loss_inverse': Array(5.7253626e-05, dtype=float32)}


  8%|▊         | 77820/1000000 [3:40:53<26:53:39,  9.52it/s]

{'loss': Array(0.1435365, dtype=float32), 'loss_cross_entropy': Array(0.13551767, dtype=float32)}
{'loss_inverse': Array(0.00019207, dtype=float32)}


  8%|▊         | 77830/1000000 [3:40:55<32:09:08,  7.97it/s]

{'loss': Array(0.1371175, dtype=float32), 'loss_cross_entropy': Array(0.12906376, dtype=float32)}
{'loss_inverse': Array(1.6033298e-05, dtype=float32)}


  8%|▊         | 77840/1000000 [3:40:56<26:13:54,  9.77it/s]

{'loss': Array(0.14014158, dtype=float32), 'loss_cross_entropy': Array(0.13203429, dtype=float32)}
{'loss_inverse': Array(4.3718805e-07, dtype=float32)}


  8%|▊         | 77850/1000000 [3:40:58<26:00:04,  9.85it/s]

{'loss': Array(0.139219, dtype=float32), 'loss_cross_entropy': Array(0.13074185, dtype=float32)}
{'loss_inverse': Array(1.2091345e-05, dtype=float32)}


  8%|▊         | 77860/1000000 [3:41:00<29:31:47,  8.67it/s]

{'loss': Array(0.15288973, dtype=float32), 'loss_cross_entropy': Array(0.14418937, dtype=float32)}
{'loss_inverse': Array(1.4579506e-06, dtype=float32)}


  8%|▊         | 77870/1000000 [3:41:01<25:41:07,  9.97it/s]

{'loss': Array(0.1281949, dtype=float32), 'loss_cross_entropy': Array(0.12071959, dtype=float32)}
{'loss_inverse': Array(1.4264323e-06, dtype=float32)}


  8%|▊         | 77880/1000000 [3:41:03<33:54:21,  7.55it/s]

{'loss': Array(0.14345501, dtype=float32), 'loss_cross_entropy': Array(0.13502792, dtype=float32)}
{'loss_inverse': Array(0.0001535, dtype=float32)}


  8%|▊         | 77890/1000000 [3:41:04<26:25:32,  9.69it/s]

{'loss': Array(0.1420781, dtype=float32), 'loss_cross_entropy': Array(0.13375048, dtype=float32)}
{'loss_inverse': Array(6.43958e-06, dtype=float32)}


  8%|▊         | 77900/1000000 [3:41:06<38:01:42,  6.74it/s]

{'loss': Array(0.134598, dtype=float32), 'loss_cross_entropy': Array(0.12712978, dtype=float32)}
{'loss_inverse': Array(3.0997708e-05, dtype=float32)}


  8%|▊         | 77910/1000000 [3:41:07<27:31:34,  9.31it/s]

{'loss': Array(0.1504463, dtype=float32), 'loss_cross_entropy': Array(0.14189793, dtype=float32)}
{'loss_inverse': Array(1.2542528e-05, dtype=float32)}


  8%|▊         | 77920/1000000 [3:41:09<39:37:23,  6.46it/s]

{'loss': Array(0.13277505, dtype=float32), 'loss_cross_entropy': Array(0.12475753, dtype=float32)}
{'loss_inverse': Array(7.113878e-05, dtype=float32)}


  8%|▊         | 77930/1000000 [3:41:11<27:44:22,  9.23it/s]

{'loss': Array(0.14086656, dtype=float32), 'loss_cross_entropy': Array(0.13232183, dtype=float32)}
{'loss_inverse': Array(0.00013572, dtype=float32)}


  8%|▊         | 77940/1000000 [3:41:12<27:01:05,  9.48it/s]

{'loss': Array(0.13647185, dtype=float32), 'loss_cross_entropy': Array(0.12816879, dtype=float32)}
{'loss_inverse': Array(9.137171e-05, dtype=float32)}


  8%|▊         | 77950/1000000 [3:41:14<31:45:46,  8.06it/s]

{'loss': Array(0.15731362, dtype=float32), 'loss_cross_entropy': Array(0.14848502, dtype=float32)}
{'loss_inverse': Array(0.00013946, dtype=float32)}


  8%|▊         | 77960/1000000 [3:41:15<26:19:15,  9.73it/s]

{'loss': Array(0.13437586, dtype=float32), 'loss_cross_entropy': Array(0.12699734, dtype=float32)}
{'loss_inverse': Array(2.6728056e-05, dtype=float32)}


  8%|▊         | 77970/1000000 [3:41:17<37:59:32,  6.74it/s]

{'loss': Array(0.14016938, dtype=float32), 'loss_cross_entropy': Array(0.13204898, dtype=float32)}
{'loss_inverse': Array(3.256766e-05, dtype=float32)}


  8%|▊         | 77980/1000000 [3:41:18<27:24:55,  9.34it/s]

{'loss': Array(0.14884847, dtype=float32), 'loss_cross_entropy': Array(0.14078973, dtype=float32)}
{'loss_inverse': Array(1.7296083e-05, dtype=float32)}


  8%|▊         | 77990/1000000 [3:41:20<38:16:54,  6.69it/s]

{'loss': Array(0.1317091, dtype=float32), 'loss_cross_entropy': Array(0.1242879, dtype=float32)}
{'loss_inverse': Array(0.00017351, dtype=float32)}


  8%|▊         | 78000/1000000 [3:41:22<27:44:05,  9.23it/s]

{'loss': Array(0.13308913, dtype=float32), 'loss_cross_entropy': Array(0.12498446, dtype=float32)}
{'loss_inverse': Array(4.454291e-07, dtype=float32)}


  8%|▊         | 78010/1000000 [3:41:30<87:54:58,  2.91it/s] 

{'loss': Array(0.14930278, dtype=float32), 'loss_cross_entropy': Array(0.14048477, dtype=float32)}
{'loss_inverse': Array(4.539357e-05, dtype=float32)}


  8%|▊         | 78020/1000000 [3:41:32<44:37:09,  5.74it/s]

{'loss': Array(0.13634515, dtype=float32), 'loss_cross_entropy': Array(0.12837303, dtype=float32)}
{'loss_inverse': Array(0.00014538, dtype=float32)}


  8%|▊         | 78030/1000000 [3:41:33<28:44:24,  8.91it/s]

{'loss': Array(0.13618888, dtype=float32), 'loss_cross_entropy': Array(0.12788841, dtype=float32)}
{'loss_inverse': Array(0.00029519, dtype=float32)}


  8%|▊         | 78040/1000000 [3:41:34<26:05:27,  9.82it/s]

{'loss': Array(0.16293068, dtype=float32), 'loss_cross_entropy': Array(0.1533916, dtype=float32)}
{'loss_inverse': Array(3.9876802e-05, dtype=float32)}


  8%|▊         | 78050/1000000 [3:41:36<28:33:15,  8.97it/s]

{'loss': Array(0.13354881, dtype=float32), 'loss_cross_entropy': Array(0.12552835, dtype=float32)}
{'loss_inverse': Array(6.468374e-07, dtype=float32)}


  8%|▊         | 78060/1000000 [3:41:38<26:10:58,  9.78it/s]

{'loss': Array(0.14519562, dtype=float32), 'loss_cross_entropy': Array(0.13696627, dtype=float32)}
{'loss_inverse': Array(6.7877045e-06, dtype=float32)}


  8%|▊         | 78070/1000000 [3:41:39<29:08:56,  8.79it/s]

{'loss': Array(0.1326164, dtype=float32), 'loss_cross_entropy': Array(0.12473064, dtype=float32)}
{'loss_inverse': Array(3.0968093e-07, dtype=float32)}


  8%|▊         | 78080/1000000 [3:41:41<25:54:42,  9.88it/s]

{'loss': Array(0.16020758, dtype=float32), 'loss_cross_entropy': Array(0.15134652, dtype=float32)}
{'loss_inverse': Array(9.29149e-06, dtype=float32)}


  8%|▊         | 78090/1000000 [3:41:43<31:41:27,  8.08it/s]

{'loss': Array(0.1363161, dtype=float32), 'loss_cross_entropy': Array(0.12878035, dtype=float32)}
{'loss_inverse': Array(2.4139414e-05, dtype=float32)}


  8%|▊         | 78100/1000000 [3:41:44<25:52:33,  9.90it/s]

{'loss': Array(0.15308115, dtype=float32), 'loss_cross_entropy': Array(0.14436196, dtype=float32)}
{'loss_inverse': Array(6.0231146e-06, dtype=float32)}


  8%|▊         | 78110/1000000 [3:41:45<24:43:21, 10.36it/s]

{'loss': Array(0.14299317, dtype=float32), 'loss_cross_entropy': Array(0.13531132, dtype=float32)}
{'loss_inverse': Array(2.6680233e-05, dtype=float32)}


  8%|▊         | 78120/1000000 [3:41:47<27:44:19,  9.23it/s]

{'loss': Array(0.14764182, dtype=float32), 'loss_cross_entropy': Array(0.1389678, dtype=float32)}
{'loss_inverse': Array(1.7582378e-06, dtype=float32)}


  8%|▊         | 78130/1000000 [3:41:48<25:08:23, 10.19it/s]

{'loss': Array(0.1536609, dtype=float32), 'loss_cross_entropy': Array(0.145001, dtype=float32)}
{'loss_inverse': Array(0.00037465, dtype=float32)}


  8%|▊         | 78140/1000000 [3:41:50<29:57:38,  8.55it/s]

{'loss': Array(0.14182633, dtype=float32), 'loss_cross_entropy': Array(0.13385175, dtype=float32)}
{'loss_inverse': Array(1.9849806e-05, dtype=float32)}


  8%|▊         | 78150/1000000 [3:41:52<26:21:40,  9.71it/s]

{'loss': Array(0.13168237, dtype=float32), 'loss_cross_entropy': Array(0.12437402, dtype=float32)}
{'loss_inverse': Array(6.761986e-05, dtype=float32)}


  8%|▊         | 78160/1000000 [3:41:54<32:01:04,  8.00it/s]

{'loss': Array(0.15602683, dtype=float32), 'loss_cross_entropy': Array(0.14751427, dtype=float32)}
{'loss_inverse': Array(3.3601435e-05, dtype=float32)}


  8%|▊         | 78170/1000000 [3:41:55<26:38:01,  9.61it/s]

{'loss': Array(0.14361574, dtype=float32), 'loss_cross_entropy': Array(0.13628407, dtype=float32)}
{'loss_inverse': Array(2.3061327e-06, dtype=float32)}


  8%|▊         | 78180/1000000 [3:41:56<25:14:08, 10.15it/s]

{'loss': Array(0.12995827, dtype=float32), 'loss_cross_entropy': Array(0.12225431, dtype=float32)}
{'loss_inverse': Array(8.712972e-05, dtype=float32)}


  8%|▊         | 78190/1000000 [3:41:58<29:05:22,  8.80it/s]

{'loss': Array(0.16487147, dtype=float32), 'loss_cross_entropy': Array(0.15581141, dtype=float32)}
{'loss_inverse': Array(0.00014302, dtype=float32)}


  8%|▊         | 78200/1000000 [3:42:00<25:34:17, 10.01it/s]

{'loss': Array(0.158853, dtype=float32), 'loss_cross_entropy': Array(0.14970282, dtype=float32)}
{'loss_inverse': Array(0.00043303, dtype=float32)}


  8%|▊         | 78210/1000000 [3:42:01<31:06:10,  8.23it/s]

{'loss': Array(0.15498668, dtype=float32), 'loss_cross_entropy': Array(0.14688534, dtype=float32)}
{'loss_inverse': Array(4.0854924e-05, dtype=float32)}


  8%|▊         | 78220/1000000 [3:42:03<26:20:20,  9.72it/s]

{'loss': Array(0.15634689, dtype=float32), 'loss_cross_entropy': Array(0.14795743, dtype=float32)}
{'loss_inverse': Array(0.00019318, dtype=float32)}


  8%|▊         | 78230/1000000 [3:42:04<34:28:10,  7.43it/s]

{'loss': Array(0.1505739, dtype=float32), 'loss_cross_entropy': Array(0.14189352, dtype=float32)}
{'loss_inverse': Array(0.00018021, dtype=float32)}


  8%|▊         | 78240/1000000 [3:42:06<27:38:21,  9.26it/s]

{'loss': Array(0.14289057, dtype=float32), 'loss_cross_entropy': Array(0.13484435, dtype=float32)}
{'loss_inverse': Array(0.00031685, dtype=float32)}


  8%|▊         | 78250/1000000 [3:42:07<26:49:36,  9.54it/s]

{'loss': Array(0.14780775, dtype=float32), 'loss_cross_entropy': Array(0.1393481, dtype=float32)}
{'loss_inverse': Array(0.00030255, dtype=float32)}


  8%|▊         | 78260/1000000 [3:42:09<28:36:53,  8.95it/s]

{'loss': Array(0.1473301, dtype=float32), 'loss_cross_entropy': Array(0.13886915, dtype=float32)}
{'loss_inverse': Array(0.00018655, dtype=float32)}


  8%|▊         | 78270/1000000 [3:42:11<25:24:27, 10.08it/s]

{'loss': Array(0.12141597, dtype=float32), 'loss_cross_entropy': Array(0.11419334, dtype=float32)}
{'loss_inverse': Array(5.2608102e-05, dtype=float32)}


  8%|▊         | 78280/1000000 [3:42:12<34:09:17,  7.50it/s]

{'loss': Array(0.15552878, dtype=float32), 'loss_cross_entropy': Array(0.14685972, dtype=float32)}
{'loss_inverse': Array(3.1289444e-05, dtype=float32)}


  8%|▊         | 78290/1000000 [3:42:14<27:24:05,  9.34it/s]

{'loss': Array(0.15154205, dtype=float32), 'loss_cross_entropy': Array(0.1428707, dtype=float32)}
{'loss_inverse': Array(2.7299204e-05, dtype=float32)}


  8%|▊         | 78300/1000000 [3:42:15<24:58:11, 10.25it/s]

{'loss': Array(0.15024441, dtype=float32), 'loss_cross_entropy': Array(0.14116846, dtype=float32)}
{'loss_inverse': Array(0.00010889, dtype=float32)}


  8%|▊         | 78310/1000000 [3:42:17<28:09:14,  9.09it/s]

{'loss': Array(0.12894157, dtype=float32), 'loss_cross_entropy': Array(0.12080858, dtype=float32)}
{'loss_inverse': Array(0.00011219, dtype=float32)}


  8%|▊         | 78320/1000000 [3:42:18<25:29:57, 10.04it/s]

{'loss': Array(0.13015416, dtype=float32), 'loss_cross_entropy': Array(0.12247963, dtype=float32)}
{'loss_inverse': Array(2.3525672e-05, dtype=float32)}


  8%|▊         | 78330/1000000 [3:42:20<30:03:55,  8.52it/s]

{'loss': Array(0.14930363, dtype=float32), 'loss_cross_entropy': Array(0.14105849, dtype=float32)}
{'loss_inverse': Array(4.440557e-06, dtype=float32)}


  8%|▊         | 78340/1000000 [3:42:22<25:48:43,  9.92it/s]

{'loss': Array(0.15088034, dtype=float32), 'loss_cross_entropy': Array(0.14231375, dtype=float32)}
{'loss_inverse': Array(9.2484996e-05, dtype=float32)}


  8%|▊         | 78350/1000000 [3:42:23<31:11:30,  8.21it/s]

{'loss': Array(0.14696895, dtype=float32), 'loss_cross_entropy': Array(0.13824709, dtype=float32)}
{'loss_inverse': Array(0.00010041, dtype=float32)}


  8%|▊         | 78360/1000000 [3:42:25<26:07:58,  9.80it/s]

{'loss': Array(0.1627916, dtype=float32), 'loss_cross_entropy': Array(0.1534037, dtype=float32)}
{'loss_inverse': Array(0.00020776, dtype=float32)}


  8%|▊         | 78370/1000000 [3:42:26<25:29:52, 10.04it/s]

{'loss': Array(0.13240615, dtype=float32), 'loss_cross_entropy': Array(0.12478751, dtype=float32)}
{'loss_inverse': Array(0.00010551, dtype=float32)}


  8%|▊         | 78380/1000000 [3:42:28<29:42:04,  8.62it/s]

{'loss': Array(0.134638, dtype=float32), 'loss_cross_entropy': Array(0.12648752, dtype=float32)}
{'loss_inverse': Array(0.00017664, dtype=float32)}


  8%|▊         | 78390/1000000 [3:42:29<25:52:18,  9.90it/s]

{'loss': Array(0.14911292, dtype=float32), 'loss_cross_entropy': Array(0.14026389, dtype=float32)}
{'loss_inverse': Array(5.7098066e-07, dtype=float32)}


  8%|▊         | 78400/1000000 [3:42:31<34:31:52,  7.41it/s]

{'loss': Array(0.13854721, dtype=float32), 'loss_cross_entropy': Array(0.13070494, dtype=float32)}
{'loss_inverse': Array(1.61859e-05, dtype=float32)}


  8%|▊         | 78410/1000000 [3:42:33<27:27:50,  9.32it/s]

{'loss': Array(0.14254911, dtype=float32), 'loss_cross_entropy': Array(0.13422735, dtype=float32)}
{'loss_inverse': Array(0.00014083, dtype=float32)}


  8%|▊         | 78420/1000000 [3:42:34<39:10:23,  6.53it/s]

{'loss': Array(0.13334842, dtype=float32), 'loss_cross_entropy': Array(0.125575, dtype=float32)}
{'loss_inverse': Array(1.3476152e-05, dtype=float32)}


  8%|▊         | 78430/1000000 [3:42:36<27:51:21,  9.19it/s]

{'loss': Array(0.16119324, dtype=float32), 'loss_cross_entropy': Array(0.15192877, dtype=float32)}
{'loss_inverse': Array(2.6147374e-05, dtype=float32)}


  8%|▊         | 78440/1000000 [3:42:38<38:12:10,  6.70it/s]

{'loss': Array(0.15481025, dtype=float32), 'loss_cross_entropy': Array(0.1461363, dtype=float32)}
{'loss_inverse': Array(2.3237362e-05, dtype=float32)}


  8%|▊         | 78450/1000000 [3:42:39<27:30:48,  9.30it/s]

{'loss': Array(0.1532641, dtype=float32), 'loss_cross_entropy': Array(0.14448655, dtype=float32)}
{'loss_inverse': Array(2.100491e-05, dtype=float32)}


  8%|▊         | 78460/1000000 [3:42:40<25:12:53, 10.15it/s]

{'loss': Array(0.11957785, dtype=float32), 'loss_cross_entropy': Array(0.11276642, dtype=float32)}
{'loss_inverse': Array(5.43328e-07, dtype=float32)}


  8%|▊         | 78470/1000000 [3:42:42<31:22:45,  8.16it/s]

{'loss': Array(0.14047912, dtype=float32), 'loss_cross_entropy': Array(0.1323837, dtype=float32)}
{'loss_inverse': Array(1.7526118e-07, dtype=float32)}


  8%|▊         | 78480/1000000 [3:42:44<26:22:47,  9.70it/s]

{'loss': Array(0.12664881, dtype=float32), 'loss_cross_entropy': Array(0.11941647, dtype=float32)}
{'loss_inverse': Array(4.3687105e-06, dtype=float32)}


  8%|▊         | 78490/1000000 [3:42:45<38:12:34,  6.70it/s]

{'loss': Array(0.14503664, dtype=float32), 'loss_cross_entropy': Array(0.13619016, dtype=float32)}
{'loss_inverse': Array(1.690973e-05, dtype=float32)}


  8%|▊         | 78500/1000000 [3:42:47<27:08:04,  9.43it/s]

{'loss': Array(0.15290181, dtype=float32), 'loss_cross_entropy': Array(0.14357166, dtype=float32)}
{'loss_inverse': Array(6.453072e-05, dtype=float32)}


  8%|▊         | 78510/1000000 [3:42:55<99:04:06,  2.58it/s] 

{'loss': Array(0.14700747, dtype=float32), 'loss_cross_entropy': Array(0.13863882, dtype=float32)}
{'loss_inverse': Array(7.0886534e-07, dtype=float32)}


  8%|▊         | 78520/1000000 [3:42:57<37:43:11,  6.79it/s] 

{'loss': Array(0.16043995, dtype=float32), 'loss_cross_entropy': Array(0.15194331, dtype=float32)}
{'loss_inverse': Array(8.779495e-07, dtype=float32)}


  8%|▊         | 78530/1000000 [3:42:58<27:54:28,  9.17it/s]

{'loss': Array(0.15098135, dtype=float32), 'loss_cross_entropy': Array(0.14240825, dtype=float32)}
{'loss_inverse': Array(1.0482375e-05, dtype=float32)}


  8%|▊         | 78540/1000000 [3:43:00<29:49:57,  8.58it/s]

{'loss': Array(0.15545718, dtype=float32), 'loss_cross_entropy': Array(0.14692293, dtype=float32)}
{'loss_inverse': Array(0.00013932, dtype=float32)}


  8%|▊         | 78550/1000000 [3:43:01<25:48:10,  9.92it/s]

{'loss': Array(0.15471816, dtype=float32), 'loss_cross_entropy': Array(0.14666767, dtype=float32)}
{'loss_inverse': Array(4.3304113e-05, dtype=float32)}


  8%|▊         | 78560/1000000 [3:43:03<38:24:18,  6.66it/s]

{'loss': Array(0.14922965, dtype=float32), 'loss_cross_entropy': Array(0.14153647, dtype=float32)}
{'loss_inverse': Array(1.33387175e-05, dtype=float32)}


  8%|▊         | 78570/1000000 [3:43:04<27:44:14,  9.23it/s]

{'loss': Array(0.13882999, dtype=float32), 'loss_cross_entropy': Array(0.13131072, dtype=float32)}
{'loss_inverse': Array(2.6736623e-07, dtype=float32)}


  8%|▊         | 78580/1000000 [3:43:06<26:27:54,  9.67it/s]

{'loss': Array(0.14895922, dtype=float32), 'loss_cross_entropy': Array(0.14045301, dtype=float32)}
{'loss_inverse': Array(1.7193448e-06, dtype=float32)}


  8%|▊         | 78590/1000000 [3:43:08<30:55:26,  8.28it/s]

{'loss': Array(0.1350805, dtype=float32), 'loss_cross_entropy': Array(0.12689327, dtype=float32)}
{'loss_inverse': Array(0.00038401, dtype=float32)}


  8%|▊         | 78600/1000000 [3:43:09<26:15:02,  9.75it/s]

{'loss': Array(0.14737894, dtype=float32), 'loss_cross_entropy': Array(0.13840936, dtype=float32)}
{'loss_inverse': Array(3.2686855e-06, dtype=float32)}


  8%|▊         | 78610/1000000 [3:43:11<29:58:23,  8.54it/s]

{'loss': Array(0.13503699, dtype=float32), 'loss_cross_entropy': Array(0.12695198, dtype=float32)}
{'loss_inverse': Array(4.154557e-05, dtype=float32)}


  8%|▊         | 78620/1000000 [3:43:12<25:33:06, 10.02it/s]

{'loss': Array(0.14683549, dtype=float32), 'loss_cross_entropy': Array(0.13836654, dtype=float32)}
{'loss_inverse': Array(0.00025196, dtype=float32)}


  8%|▊         | 78630/1000000 [3:43:14<31:14:09,  8.19it/s]

{'loss': Array(0.12584183, dtype=float32), 'loss_cross_entropy': Array(0.11845358, dtype=float32)}
{'loss_inverse': Array(3.5463865e-05, dtype=float32)}


  8%|▊         | 78640/1000000 [3:43:16<25:57:02,  9.86it/s]

{'loss': Array(0.14302985, dtype=float32), 'loss_cross_entropy': Array(0.13550223, dtype=float32)}
{'loss_inverse': Array(3.7231987e-07, dtype=float32)}


  8%|▊         | 78650/1000000 [3:43:17<24:56:19, 10.26it/s]

{'loss': Array(0.15342242, dtype=float32), 'loss_cross_entropy': Array(0.1448226, dtype=float32)}
{'loss_inverse': Array(0.00011201, dtype=float32)}


  8%|▊         | 78660/1000000 [3:43:19<28:46:49,  8.89it/s]

{'loss': Array(0.12583204, dtype=float32), 'loss_cross_entropy': Array(0.1179003, dtype=float32)}
{'loss_inverse': Array(2.717904e-05, dtype=float32)}


  8%|▊         | 78670/1000000 [3:43:20<25:50:55,  9.90it/s]

{'loss': Array(0.15807544, dtype=float32), 'loss_cross_entropy': Array(0.14909132, dtype=float32)}
{'loss_inverse': Array(1.9974495e-06, dtype=float32)}


  8%|▊         | 78680/1000000 [3:43:22<29:23:25,  8.71it/s]

{'loss': Array(0.13349387, dtype=float32), 'loss_cross_entropy': Array(0.12534334, dtype=float32)}
{'loss_inverse': Array(2.50462e-06, dtype=float32)}


  8%|▊         | 78690/1000000 [3:43:23<25:49:11,  9.91it/s]

{'loss': Array(0.15577313, dtype=float32), 'loss_cross_entropy': Array(0.14682049, dtype=float32)}
{'loss_inverse': Array(4.6542686e-05, dtype=float32)}


  8%|▊         | 78700/1000000 [3:43:25<31:13:56,  8.19it/s]

{'loss': Array(0.14126043, dtype=float32), 'loss_cross_entropy': Array(0.13344146, dtype=float32)}
{'loss_inverse': Array(4.735913e-06, dtype=float32)}


  8%|▊         | 78710/1000000 [3:43:26<25:40:03,  9.97it/s]

{'loss': Array(0.13505846, dtype=float32), 'loss_cross_entropy': Array(0.12735635, dtype=float32)}
{'loss_inverse': Array(4.2512676e-07, dtype=float32)}


  8%|▊         | 78720/1000000 [3:43:28<25:27:11, 10.05it/s]

{'loss': Array(0.12901355, dtype=float32), 'loss_cross_entropy': Array(0.12142824, dtype=float32)}
{'loss_inverse': Array(7.952725e-05, dtype=float32)}


  8%|▊         | 78730/1000000 [3:43:30<30:38:58,  8.35it/s]

{'loss': Array(0.14890532, dtype=float32), 'loss_cross_entropy': Array(0.14043707, dtype=float32)}
{'loss_inverse': Array(4.3233737e-05, dtype=float32)}


  8%|▊         | 78740/1000000 [3:43:31<26:15:59,  9.74it/s]

{'loss': Array(0.15585923, dtype=float32), 'loss_cross_entropy': Array(0.14700173, dtype=float32)}
{'loss_inverse': Array(4.906064e-06, dtype=float32)}


  8%|▊         | 78750/1000000 [3:43:33<31:51:58,  8.03it/s]

{'loss': Array(0.16178088, dtype=float32), 'loss_cross_entropy': Array(0.1526506, dtype=float32)}
{'loss_inverse': Array(5.6911404e-07, dtype=float32)}


  8%|▊         | 78760/1000000 [3:43:34<26:42:28,  9.58it/s]

{'loss': Array(0.13499734, dtype=float32), 'loss_cross_entropy': Array(0.127649, dtype=float32)}
{'loss_inverse': Array(4.4523877e-05, dtype=float32)}


  8%|▊         | 78770/1000000 [3:43:36<34:20:47,  7.45it/s]

{'loss': Array(0.13841115, dtype=float32), 'loss_cross_entropy': Array(0.13040139, dtype=float32)}
{'loss_inverse': Array(4.5930934e-05, dtype=float32)}


  8%|▊         | 78780/1000000 [3:43:38<26:29:24,  9.66it/s]

{'loss': Array(0.13712046, dtype=float32), 'loss_cross_entropy': Array(0.12940577, dtype=float32)}
{'loss_inverse': Array(8.416482e-05, dtype=float32)}


  8%|▊         | 78790/1000000 [3:43:39<25:07:07, 10.19it/s]

{'loss': Array(0.13272357, dtype=float32), 'loss_cross_entropy': Array(0.12443353, dtype=float32)}
{'loss_inverse': Array(1.4433167e-05, dtype=float32)}


  8%|▊         | 78800/1000000 [3:43:41<28:05:42,  9.11it/s]

{'loss': Array(0.14143293, dtype=float32), 'loss_cross_entropy': Array(0.13312468, dtype=float32)}
{'loss_inverse': Array(1.3707245e-05, dtype=float32)}


  8%|▊         | 78810/1000000 [3:43:42<25:16:43, 10.12it/s]

{'loss': Array(0.14809246, dtype=float32), 'loss_cross_entropy': Array(0.14020726, dtype=float32)}
{'loss_inverse': Array(0.00010722, dtype=float32)}


  8%|▊         | 78820/1000000 [3:43:44<33:52:41,  7.55it/s]

{'loss': Array(0.14970273, dtype=float32), 'loss_cross_entropy': Array(0.14129072, dtype=float32)}
{'loss_inverse': Array(5.2686016e-07, dtype=float32)}


  8%|▊         | 78830/1000000 [3:43:45<26:27:01,  9.67it/s]

{'loss': Array(0.13431513, dtype=float32), 'loss_cross_entropy': Array(0.12618685, dtype=float32)}
{'loss_inverse': Array(3.6528178e-05, dtype=float32)}


  8%|▊         | 78840/1000000 [3:43:47<25:06:50, 10.19it/s]

{'loss': Array(0.14206038, dtype=float32), 'loss_cross_entropy': Array(0.13388227, dtype=float32)}
{'loss_inverse': Array(7.946567e-06, dtype=float32)}


  8%|▊         | 78850/1000000 [3:43:48<27:25:27,  9.33it/s]

{'loss': Array(0.13397197, dtype=float32), 'loss_cross_entropy': Array(0.12642744, dtype=float32)}
{'loss_inverse': Array(5.683168e-06, dtype=float32)}


  8%|▊         | 78860/1000000 [3:43:50<25:23:14, 10.08it/s]

{'loss': Array(0.15109465, dtype=float32), 'loss_cross_entropy': Array(0.14245652, dtype=float32)}
{'loss_inverse': Array(4.894695e-06, dtype=float32)}


  8%|▊         | 78870/1000000 [3:43:51<29:16:28,  8.74it/s]

{'loss': Array(0.12434979, dtype=float32), 'loss_cross_entropy': Array(0.117265, dtype=float32)}
{'loss_inverse': Array(3.9579925e-05, dtype=float32)}


  8%|▊         | 78880/1000000 [3:43:53<25:16:05, 10.13it/s]

{'loss': Array(0.1629824, dtype=float32), 'loss_cross_entropy': Array(0.15378611, dtype=float32)}
{'loss_inverse': Array(0.000158, dtype=float32)}


  8%|▊         | 78890/1000000 [3:43:55<31:05:32,  8.23it/s]

{'loss': Array(0.16255271, dtype=float32), 'loss_cross_entropy': Array(0.15384412, dtype=float32)}
{'loss_inverse': Array(8.409475e-05, dtype=float32)}


  8%|▊         | 78900/1000000 [3:43:56<26:19:42,  9.72it/s]

{'loss': Array(0.14180242, dtype=float32), 'loss_cross_entropy': Array(0.13349685, dtype=float32)}
{'loss_inverse': Array(2.474441e-05, dtype=float32)}


  8%|▊         | 78910/1000000 [3:43:57<25:29:18, 10.04it/s]

{'loss': Array(0.14431107, dtype=float32), 'loss_cross_entropy': Array(0.1361748, dtype=float32)}
{'loss_inverse': Array(2.5906744e-05, dtype=float32)}


  8%|▊         | 78920/1000000 [3:43:59<29:07:27,  8.78it/s]

{'loss': Array(0.15641433, dtype=float32), 'loss_cross_entropy': Array(0.14776446, dtype=float32)}
{'loss_inverse': Array(5.8738962e-05, dtype=float32)}


  8%|▊         | 78930/1000000 [3:44:01<25:35:18, 10.00it/s]

{'loss': Array(0.14316167, dtype=float32), 'loss_cross_entropy': Array(0.13489754, dtype=float32)}
{'loss_inverse': Array(0.00016762, dtype=float32)}


  8%|▊         | 78940/1000000 [3:44:02<33:51:16,  7.56it/s]

{'loss': Array(0.13965027, dtype=float32), 'loss_cross_entropy': Array(0.13198434, dtype=float32)}
{'loss_inverse': Array(4.992806e-06, dtype=float32)}


  8%|▊         | 78950/1000000 [3:44:04<27:21:07,  9.35it/s]

{'loss': Array(0.13616025, dtype=float32), 'loss_cross_entropy': Array(0.12857161, dtype=float32)}
{'loss_inverse': Array(0.00011647, dtype=float32)}


  8%|▊         | 78960/1000000 [3:44:06<38:00:46,  6.73it/s]

{'loss': Array(0.1439078, dtype=float32), 'loss_cross_entropy': Array(0.13590632, dtype=float32)}
{'loss_inverse': Array(1.8754907e-05, dtype=float32)}


  8%|▊         | 78970/1000000 [3:44:07<27:17:23,  9.37it/s]

{'loss': Array(0.1517942, dtype=float32), 'loss_cross_entropy': Array(0.1437255, dtype=float32)}
{'loss_inverse': Array(4.5435077e-05, dtype=float32)}


  8%|▊         | 78980/1000000 [3:44:09<38:38:23,  6.62it/s]

{'loss': Array(0.12520961, dtype=float32), 'loss_cross_entropy': Array(0.11834303, dtype=float32)}
{'loss_inverse': Array(6.137223e-06, dtype=float32)}


  8%|▊         | 78990/1000000 [3:44:10<27:55:18,  9.16it/s]

{'loss': Array(0.15565567, dtype=float32), 'loss_cross_entropy': Array(0.14705627, dtype=float32)}
{'loss_inverse': Array(8.752441e-06, dtype=float32)}


  8%|▊         | 79000/1000000 [3:44:12<26:05:05,  9.81it/s]

{'loss': Array(0.15000686, dtype=float32), 'loss_cross_entropy': Array(0.14110015, dtype=float32)}
{'loss_inverse': Array(0.00016567, dtype=float32)}


  8%|▊         | 79010/1000000 [3:44:20<94:33:37,  2.71it/s] 

{'loss': Array(0.14740919, dtype=float32), 'loss_cross_entropy': Array(0.1388748, dtype=float32)}
{'loss_inverse': Array(1.4683309e-05, dtype=float32)}


  8%|▊         | 79020/1000000 [3:44:21<36:47:32,  6.95it/s] 

{'loss': Array(0.13599971, dtype=float32), 'loss_cross_entropy': Array(0.12770581, dtype=float32)}
{'loss_inverse': Array(9.147316e-05, dtype=float32)}


  8%|▊         | 79030/1000000 [3:44:23<27:10:47,  9.41it/s]

{'loss': Array(0.1399941, dtype=float32), 'loss_cross_entropy': Array(0.13241912, dtype=float32)}
{'loss_inverse': Array(2.0268222e-07, dtype=float32)}


  8%|▊         | 79040/1000000 [3:44:24<28:22:09,  9.02it/s]

{'loss': Array(0.14176354, dtype=float32), 'loss_cross_entropy': Array(0.13346304, dtype=float32)}
{'loss_inverse': Array(1.9166994e-06, dtype=float32)}


  8%|▊         | 79050/1000000 [3:44:26<25:21:41, 10.09it/s]

{'loss': Array(0.13229322, dtype=float32), 'loss_cross_entropy': Array(0.12427075, dtype=float32)}
{'loss_inverse': Array(8.745784e-05, dtype=float32)}


  8%|▊         | 79060/1000000 [3:44:28<29:49:09,  8.58it/s]

{'loss': Array(0.14665674, dtype=float32), 'loss_cross_entropy': Array(0.13814235, dtype=float32)}
{'loss_inverse': Array(3.0274938e-05, dtype=float32)}


  8%|▊         | 79070/1000000 [3:44:29<26:20:05,  9.71it/s]

{'loss': Array(0.16394155, dtype=float32), 'loss_cross_entropy': Array(0.15509637, dtype=float32)}
{'loss_inverse': Array(4.1970834e-05, dtype=float32)}


  8%|▊         | 79080/1000000 [3:44:31<29:28:51,  8.68it/s]

{'loss': Array(0.15162441, dtype=float32), 'loss_cross_entropy': Array(0.14254604, dtype=float32)}
{'loss_inverse': Array(0.00013591, dtype=float32)}


  8%|▊         | 79090/1000000 [3:44:32<26:00:54,  9.83it/s]

{'loss': Array(0.14134045, dtype=float32), 'loss_cross_entropy': Array(0.13285194, dtype=float32)}
{'loss_inverse': Array(2.1608501e-07, dtype=float32)}


  8%|▊         | 79100/1000000 [3:44:34<30:04:00,  8.51it/s]

{'loss': Array(0.1376131, dtype=float32), 'loss_cross_entropy': Array(0.12939471, dtype=float32)}
{'loss_inverse': Array(1.2674719e-06, dtype=float32)}


  8%|▊         | 79110/1000000 [3:44:35<25:49:44,  9.90it/s]

{'loss': Array(0.14036809, dtype=float32), 'loss_cross_entropy': Array(0.13195981, dtype=float32)}
{'loss_inverse': Array(2.1093476e-06, dtype=float32)}


  8%|▊         | 79120/1000000 [3:44:37<33:46:01,  7.58it/s]

{'loss': Array(0.14399056, dtype=float32), 'loss_cross_entropy': Array(0.13492088, dtype=float32)}
{'loss_inverse': Array(6.274032e-05, dtype=float32)}


  8%|▊         | 79130/1000000 [3:44:39<26:49:05,  9.54it/s]

{'loss': Array(0.1591957, dtype=float32), 'loss_cross_entropy': Array(0.15081345, dtype=float32)}
{'loss_inverse': Array(6.012601e-05, dtype=float32)}


  8%|▊         | 79140/1000000 [3:44:40<37:35:23,  6.80it/s]

{'loss': Array(0.1316495, dtype=float32), 'loss_cross_entropy': Array(0.12380604, dtype=float32)}
{'loss_inverse': Array(0.00036867, dtype=float32)}


  8%|▊         | 79150/1000000 [3:44:42<26:49:57,  9.53it/s]

{'loss': Array(0.14503382, dtype=float32), 'loss_cross_entropy': Array(0.13729177, dtype=float32)}
{'loss_inverse': Array(2.8694768e-07, dtype=float32)}


  8%|▊         | 79160/1000000 [3:44:43<33:40:23,  7.60it/s]

{'loss': Array(0.14323595, dtype=float32), 'loss_cross_entropy': Array(0.13475345, dtype=float32)}
{'loss_inverse': Array(7.95091e-05, dtype=float32)}


  8%|▊         | 79170/1000000 [3:44:45<26:17:42,  9.73it/s]

{'loss': Array(0.1390173, dtype=float32), 'loss_cross_entropy': Array(0.13144675, dtype=float32)}
{'loss_inverse': Array(2.2052063e-05, dtype=float32)}


  8%|▊         | 79180/1000000 [3:44:46<25:22:31, 10.08it/s]

{'loss': Array(0.15198503, dtype=float32), 'loss_cross_entropy': Array(0.14336342, dtype=float32)}
{'loss_inverse': Array(1.2257609e-05, dtype=float32)}


  8%|▊         | 79190/1000000 [3:44:48<29:14:19,  8.75it/s]

{'loss': Array(0.14144307, dtype=float32), 'loss_cross_entropy': Array(0.13348036, dtype=float32)}
{'loss_inverse': Array(5.7440484e-06, dtype=float32)}


  8%|▊         | 79200/1000000 [3:44:49<25:59:15,  9.84it/s]

{'loss': Array(0.15132998, dtype=float32), 'loss_cross_entropy': Array(0.1421688, dtype=float32)}
{'loss_inverse': Array(3.369216e-05, dtype=float32)}


  8%|▊         | 79210/1000000 [3:44:51<31:13:55,  8.19it/s]

{'loss': Array(0.13806425, dtype=float32), 'loss_cross_entropy': Array(0.12967058, dtype=float32)}
{'loss_inverse': Array(5.0578154e-07, dtype=float32)}


  8%|▊         | 79220/1000000 [3:44:53<26:34:47,  9.62it/s]

{'loss': Array(0.15897267, dtype=float32), 'loss_cross_entropy': Array(0.14970092, dtype=float32)}
{'loss_inverse': Array(0.00018552, dtype=float32)}


  8%|▊         | 79230/1000000 [3:44:54<33:59:27,  7.52it/s]

{'loss': Array(0.1480505, dtype=float32), 'loss_cross_entropy': Array(0.1396784, dtype=float32)}
{'loss_inverse': Array(0.00011576, dtype=float32)}


  8%|▊         | 79240/1000000 [3:44:56<26:21:05,  9.71it/s]

{'loss': Array(0.14920461, dtype=float32), 'loss_cross_entropy': Array(0.14126402, dtype=float32)}
{'loss_inverse': Array(8.464181e-05, dtype=float32)}


  8%|▊         | 79250/1000000 [3:44:58<33:58:53,  7.53it/s]

{'loss': Array(0.16022396, dtype=float32), 'loss_cross_entropy': Array(0.15097877, dtype=float32)}
{'loss_inverse': Array(3.6733425e-05, dtype=float32)}


  8%|▊         | 79260/1000000 [3:44:59<26:49:04,  9.54it/s]

{'loss': Array(0.1498807, dtype=float32), 'loss_cross_entropy': Array(0.14188017, dtype=float32)}
{'loss_inverse': Array(3.907566e-05, dtype=float32)}


  8%|▊         | 79270/1000000 [3:45:00<25:24:56, 10.06it/s]

{'loss': Array(0.13585089, dtype=float32), 'loss_cross_entropy': Array(0.12788038, dtype=float32)}
{'loss_inverse': Array(1.0259914e-05, dtype=float32)}


  8%|▊         | 79280/1000000 [3:45:02<30:59:40,  8.25it/s]

{'loss': Array(0.15087779, dtype=float32), 'loss_cross_entropy': Array(0.14314756, dtype=float32)}
{'loss_inverse': Array(4.96597e-06, dtype=float32)}


  8%|▊         | 79290/1000000 [3:45:04<26:00:52,  9.83it/s]

{'loss': Array(0.15649675, dtype=float32), 'loss_cross_entropy': Array(0.14771287, dtype=float32)}
{'loss_inverse': Array(1.6182888e-06, dtype=float32)}


  8%|▊         | 79300/1000000 [3:45:05<38:30:12,  6.64it/s]

{'loss': Array(0.13694264, dtype=float32), 'loss_cross_entropy': Array(0.12910204, dtype=float32)}
{'loss_inverse': Array(9.932787e-06, dtype=float32)}


  8%|▊         | 79310/1000000 [3:45:07<28:41:26,  8.91it/s]

{'loss': Array(0.13576806, dtype=float32), 'loss_cross_entropy': Array(0.12828179, dtype=float32)}
{'loss_inverse': Array(2.7430444e-06, dtype=float32)}


  8%|▊         | 79320/1000000 [3:45:08<26:09:43,  9.78it/s]

{'loss': Array(0.158041, dtype=float32), 'loss_cross_entropy': Array(0.14924756, dtype=float32)}
{'loss_inverse': Array(5.525605e-07, dtype=float32)}


  8%|▊         | 79330/1000000 [3:45:10<28:40:07,  8.92it/s]

{'loss': Array(0.13393222, dtype=float32), 'loss_cross_entropy': Array(0.12607875, dtype=float32)}
{'loss_inverse': Array(1.940311e-06, dtype=float32)}


  8%|▊         | 79340/1000000 [3:45:12<26:21:43,  9.70it/s]

{'loss': Array(0.14124326, dtype=float32), 'loss_cross_entropy': Array(0.13319068, dtype=float32)}
{'loss_inverse': Array(0.00020264, dtype=float32)}


  8%|▊         | 79350/1000000 [3:45:13<28:59:01,  8.82it/s]

{'loss': Array(0.14503662, dtype=float32), 'loss_cross_entropy': Array(0.13684516, dtype=float32)}
{'loss_inverse': Array(1.9736171e-05, dtype=float32)}


  8%|▊         | 79360/1000000 [3:45:15<25:54:33,  9.87it/s]

{'loss': Array(0.14707288, dtype=float32), 'loss_cross_entropy': Array(0.13836066, dtype=float32)}
{'loss_inverse': Array(3.8834824e-07, dtype=float32)}


  8%|▊         | 79370/1000000 [3:45:17<31:17:28,  8.17it/s]

{'loss': Array(0.15205668, dtype=float32), 'loss_cross_entropy': Array(0.14348759, dtype=float32)}
{'loss_inverse': Array(1.705888e-05, dtype=float32)}


  8%|▊         | 79380/1000000 [3:45:18<26:03:24,  9.81it/s]

{'loss': Array(0.1598246, dtype=float32), 'loss_cross_entropy': Array(0.15128358, dtype=float32)}
{'loss_inverse': Array(9.963743e-06, dtype=float32)}


  8%|▊         | 79390/1000000 [3:45:20<31:35:29,  8.09it/s]

{'loss': Array(0.12527923, dtype=float32), 'loss_cross_entropy': Array(0.1178961, dtype=float32)}
{'loss_inverse': Array(2.386984e-07, dtype=float32)}


  8%|▊         | 79400/1000000 [3:45:21<26:11:54,  9.76it/s]

{'loss': Array(0.14115022, dtype=float32), 'loss_cross_entropy': Array(0.1331085, dtype=float32)}
{'loss_inverse': Array(3.5975063e-07, dtype=float32)}


  8%|▊         | 79410/1000000 [3:45:23<37:58:17,  6.73it/s]

{'loss': Array(0.1369932, dtype=float32), 'loss_cross_entropy': Array(0.12903024, dtype=float32)}
{'loss_inverse': Array(1.1700179e-05, dtype=float32)}


  8%|▊         | 79420/1000000 [3:45:24<28:06:43,  9.10it/s]

{'loss': Array(0.1498057, dtype=float32), 'loss_cross_entropy': Array(0.14174426, dtype=float32)}
{'loss_inverse': Array(3.0497617e-06, dtype=float32)}


  8%|▊         | 79430/1000000 [3:45:26<25:21:36, 10.08it/s]

{'loss': Array(0.14368244, dtype=float32), 'loss_cross_entropy': Array(0.1351401, dtype=float32)}
{'loss_inverse': Array(0.0001529, dtype=float32)}


  8%|▊         | 79440/1000000 [3:45:27<28:00:06,  9.13it/s]

{'loss': Array(0.14121221, dtype=float32), 'loss_cross_entropy': Array(0.13301425, dtype=float32)}
{'loss_inverse': Array(1.2379626e-05, dtype=float32)}


  8%|▊         | 79450/1000000 [3:45:29<26:20:42,  9.71it/s]

{'loss': Array(0.13006207, dtype=float32), 'loss_cross_entropy': Array(0.12202847, dtype=float32)}
{'loss_inverse': Array(0.00026463, dtype=float32)}


  8%|▊         | 79460/1000000 [3:45:31<31:44:52,  8.05it/s]

{'loss': Array(0.11839838, dtype=float32), 'loss_cross_entropy': Array(0.11113043, dtype=float32)}
{'loss_inverse': Array(1.3580543e-05, dtype=float32)}


  8%|▊         | 79470/1000000 [3:45:32<26:55:07,  9.50it/s]

{'loss': Array(0.13688414, dtype=float32), 'loss_cross_entropy': Array(0.12929887, dtype=float32)}
{'loss_inverse': Array(8.814588e-07, dtype=float32)}


  8%|▊         | 79480/1000000 [3:45:34<25:58:27,  9.84it/s]

{'loss': Array(0.13161102, dtype=float32), 'loss_cross_entropy': Array(0.12331786, dtype=float32)}
{'loss_inverse': Array(3.216551e-06, dtype=float32)}


  8%|▊         | 79490/1000000 [3:45:35<29:36:19,  8.64it/s]

{'loss': Array(0.14549848, dtype=float32), 'loss_cross_entropy': Array(0.1373848, dtype=float32)}
{'loss_inverse': Array(1.2362618e-07, dtype=float32)}


  8%|▊         | 79500/1000000 [3:45:37<25:34:09, 10.00it/s]

{'loss': Array(0.1468718, dtype=float32), 'loss_cross_entropy': Array(0.1383824, dtype=float32)}
{'loss_inverse': Array(1.5335936e-06, dtype=float32)}


  8%|▊         | 79510/1000000 [3:45:45<97:10:40,  2.63it/s] 

{'loss': Array(0.14045613, dtype=float32), 'loss_cross_entropy': Array(0.13245581, dtype=float32)}
{'loss_inverse': Array(0.0001579, dtype=float32)}


  8%|▊         | 79520/1000000 [3:45:47<38:08:40,  6.70it/s] 

{'loss': Array(0.13494925, dtype=float32), 'loss_cross_entropy': Array(0.12755795, dtype=float32)}
{'loss_inverse': Array(1.6622393e-05, dtype=float32)}


  8%|▊         | 79530/1000000 [3:45:49<40:43:26,  6.28it/s]

{'loss': Array(0.15800317, dtype=float32), 'loss_cross_entropy': Array(0.14926058, dtype=float32)}
{'loss_inverse': Array(2.1692651e-06, dtype=float32)}


  8%|▊         | 79540/1000000 [3:45:50<27:33:34,  9.28it/s]

{'loss': Array(0.14189498, dtype=float32), 'loss_cross_entropy': Array(0.13361064, dtype=float32)}
{'loss_inverse': Array(3.384627e-06, dtype=float32)}


  8%|▊         | 79550/1000000 [3:45:52<25:07:12, 10.18it/s]

{'loss': Array(0.1346234, dtype=float32), 'loss_cross_entropy': Array(0.12693496, dtype=float32)}
{'loss_inverse': Array(1.4853249e-06, dtype=float32)}


  8%|▊         | 79560/1000000 [3:45:53<28:21:11,  9.02it/s]

{'loss': Array(0.15937518, dtype=float32), 'loss_cross_entropy': Array(0.15067491, dtype=float32)}
{'loss_inverse': Array(8.212282e-06, dtype=float32)}


  8%|▊         | 79570/1000000 [3:45:55<38:41:46,  6.61it/s]

{'loss': Array(0.13606137, dtype=float32), 'loss_cross_entropy': Array(0.12850301, dtype=float32)}
{'loss_inverse': Array(5.3152047e-07, dtype=float32)}


  8%|▊         | 79580/1000000 [3:45:57<27:38:27,  9.25it/s]

{'loss': Array(0.14508475, dtype=float32), 'loss_cross_entropy': Array(0.13746156, dtype=float32)}
{'loss_inverse': Array(5.4276825e-06, dtype=float32)}


  8%|▊         | 79590/1000000 [3:45:58<25:08:51, 10.17it/s]

{'loss': Array(0.15233432, dtype=float32), 'loss_cross_entropy': Array(0.1442051, dtype=float32)}
{'loss_inverse': Array(1.2489137e-06, dtype=float32)}


  8%|▊         | 79600/1000000 [3:46:00<28:17:02,  9.04it/s]

{'loss': Array(0.15119188, dtype=float32), 'loss_cross_entropy': Array(0.14227344, dtype=float32)}
{'loss_inverse': Array(8.962076e-06, dtype=float32)}


  8%|▊         | 79610/1000000 [3:46:01<25:10:49, 10.15it/s]

{'loss': Array(0.14607438, dtype=float32), 'loss_cross_entropy': Array(0.13815527, dtype=float32)}
{'loss_inverse': Array(9.6849246e-05, dtype=float32)}


  8%|▊         | 79620/1000000 [3:46:03<29:29:11,  8.67it/s]

{'loss': Array(0.15930764, dtype=float32), 'loss_cross_entropy': Array(0.150521, dtype=float32)}
{'loss_inverse': Array(1.7855521e-06, dtype=float32)}


  8%|▊         | 79630/1000000 [3:46:04<26:14:15,  9.74it/s]

{'loss': Array(0.13910988, dtype=float32), 'loss_cross_entropy': Array(0.13102505, dtype=float32)}
{'loss_inverse': Array(2.7521296e-06, dtype=float32)}


  8%|▊         | 79640/1000000 [3:46:06<32:31:42,  7.86it/s]

{'loss': Array(0.12758887, dtype=float32), 'loss_cross_entropy': Array(0.11967953, dtype=float32)}
{'loss_inverse': Array(3.7046464e-06, dtype=float32)}


  8%|▊         | 79650/1000000 [3:46:08<26:55:56,  9.49it/s]

{'loss': Array(0.1436089, dtype=float32), 'loss_cross_entropy': Array(0.1358094, dtype=float32)}
{'loss_inverse': Array(4.881156e-07, dtype=float32)}


  8%|▊         | 79660/1000000 [3:46:09<25:51:11,  9.89it/s]

{'loss': Array(0.14342967, dtype=float32), 'loss_cross_entropy': Array(0.13468961, dtype=float32)}
{'loss_inverse': Array(2.590778e-07, dtype=float32)}


  8%|▊         | 79670/1000000 [3:46:11<29:39:00,  8.62it/s]

{'loss': Array(0.13574468, dtype=float32), 'loss_cross_entropy': Array(0.12765355, dtype=float32)}
{'loss_inverse': Array(2.2927828e-05, dtype=float32)}


  8%|▊         | 79680/1000000 [3:46:12<25:52:46,  9.88it/s]

{'loss': Array(0.13857165, dtype=float32), 'loss_cross_entropy': Array(0.13052903, dtype=float32)}
{'loss_inverse': Array(3.7590242e-05, dtype=float32)}


  8%|▊         | 79690/1000000 [3:46:14<30:55:10,  8.27it/s]

{'loss': Array(0.14528334, dtype=float32), 'loss_cross_entropy': Array(0.1364105, dtype=float32)}
{'loss_inverse': Array(4.5474153e-05, dtype=float32)}


  8%|▊         | 79700/1000000 [3:46:15<25:59:46,  9.83it/s]

{'loss': Array(0.15621434, dtype=float32), 'loss_cross_entropy': Array(0.1475488, dtype=float32)}
{'loss_inverse': Array(5.8876885e-06, dtype=float32)}


  8%|▊         | 79710/1000000 [3:46:17<33:27:39,  7.64it/s]

{'loss': Array(0.12675719, dtype=float32), 'loss_cross_entropy': Array(0.11944419, dtype=float32)}
{'loss_inverse': Array(3.8788053e-07, dtype=float32)}


  8%|▊         | 79720/1000000 [3:46:19<26:44:24,  9.56it/s]

{'loss': Array(0.14404309, dtype=float32), 'loss_cross_entropy': Array(0.13542186, dtype=float32)}
{'loss_inverse': Array(1.9935967e-06, dtype=float32)}


  8%|▊         | 79730/1000000 [3:46:20<26:41:33,  9.58it/s]

{'loss': Array(0.15814775, dtype=float32), 'loss_cross_entropy': Array(0.14980565, dtype=float32)}
{'loss_inverse': Array(6.670972e-06, dtype=float32)}


  8%|▊         | 79740/1000000 [3:46:22<28:50:09,  8.86it/s]

{'loss': Array(0.13084546, dtype=float32), 'loss_cross_entropy': Array(0.12367045, dtype=float32)}
{'loss_inverse': Array(3.1587606e-06, dtype=float32)}


  8%|▊         | 79750/1000000 [3:46:23<25:32:19, 10.01it/s]

{'loss': Array(0.14078425, dtype=float32), 'loss_cross_entropy': Array(0.13212688, dtype=float32)}
{'loss_inverse': Array(7.664654e-07, dtype=float32)}


  8%|▊         | 79760/1000000 [3:46:25<34:13:49,  7.47it/s]

{'loss': Array(0.14133728, dtype=float32), 'loss_cross_entropy': Array(0.13370213, dtype=float32)}
{'loss_inverse': Array(1.2281583e-06, dtype=float32)}


  8%|▊         | 79770/1000000 [3:46:26<26:18:34,  9.72it/s]

{'loss': Array(0.15730679, dtype=float32), 'loss_cross_entropy': Array(0.1483597, dtype=float32)}
{'loss_inverse': Array(4.501165e-07, dtype=float32)}


  8%|▊         | 79780/1000000 [3:46:28<25:25:31, 10.05it/s]

{'loss': Array(0.14195903, dtype=float32), 'loss_cross_entropy': Array(0.13381587, dtype=float32)}
{'loss_inverse': Array(7.3729404e-07, dtype=float32)}


  8%|▊         | 79790/1000000 [3:46:30<27:59:02,  9.13it/s]

{'loss': Array(0.16174802, dtype=float32), 'loss_cross_entropy': Array(0.1518598, dtype=float32)}
{'loss_inverse': Array(4.3812284e-07, dtype=float32)}


  8%|▊         | 79800/1000000 [3:46:31<25:33:38, 10.00it/s]

{'loss': Array(0.17748377, dtype=float32), 'loss_cross_entropy': Array(0.16737176, dtype=float32)}
{'loss_inverse': Array(6.116634e-05, dtype=float32)}


  8%|▊         | 79810/1000000 [3:46:33<29:48:37,  8.57it/s]

{'loss': Array(0.16110733, dtype=float32), 'loss_cross_entropy': Array(0.15223831, dtype=float32)}
{'loss_inverse': Array(3.9609287e-05, dtype=float32)}


  8%|▊         | 79820/1000000 [3:46:34<25:57:19,  9.85it/s]

{'loss': Array(0.14412287, dtype=float32), 'loss_cross_entropy': Array(0.1362329, dtype=float32)}
{'loss_inverse': Array(8.36663e-06, dtype=float32)}


  8%|▊         | 79830/1000000 [3:46:36<32:08:45,  7.95it/s]

{'loss': Array(0.13966878, dtype=float32), 'loss_cross_entropy': Array(0.13176288, dtype=float32)}
{'loss_inverse': Array(0.00042701, dtype=float32)}


  8%|▊         | 79840/1000000 [3:46:37<25:50:18,  9.89it/s]

{'loss': Array(0.12227907, dtype=float32), 'loss_cross_entropy': Array(0.11439048, dtype=float32)}
{'loss_inverse': Array(4.3113423e-06, dtype=float32)}


  8%|▊         | 79850/1000000 [3:46:39<25:29:41, 10.03it/s]

{'loss': Array(0.16466573, dtype=float32), 'loss_cross_entropy': Array(0.15519512, dtype=float32)}
{'loss_inverse': Array(0.00010666, dtype=float32)}


  8%|▊         | 79860/1000000 [3:46:41<29:51:32,  8.56it/s]

{'loss': Array(0.1437762, dtype=float32), 'loss_cross_entropy': Array(0.13532971, dtype=float32)}
{'loss_inverse': Array(3.5692854e-06, dtype=float32)}


  8%|▊         | 79870/1000000 [3:46:42<26:08:36,  9.78it/s]

{'loss': Array(0.1580816, dtype=float32), 'loss_cross_entropy': Array(0.14943996, dtype=float32)}
{'loss_inverse': Array(4.8676946e-05, dtype=float32)}


  8%|▊         | 79880/1000000 [3:46:44<33:58:39,  7.52it/s]

{'loss': Array(0.17093742, dtype=float32), 'loss_cross_entropy': Array(0.16185759, dtype=float32)}
{'loss_inverse': Array(7.32941e-06, dtype=float32)}


  8%|▊         | 79890/1000000 [3:46:45<27:07:54,  9.42it/s]

{'loss': Array(0.15648232, dtype=float32), 'loss_cross_entropy': Array(0.1479286, dtype=float32)}
{'loss_inverse': Array(0.00011917, dtype=float32)}


  8%|▊         | 79900/1000000 [3:46:47<39:19:49,  6.50it/s]

{'loss': Array(0.15810369, dtype=float32), 'loss_cross_entropy': Array(0.14919043, dtype=float32)}
{'loss_inverse': Array(7.8778314e-05, dtype=float32)}


  8%|▊         | 79910/1000000 [3:46:48<27:07:29,  9.42it/s]

{'loss': Array(0.1513625, dtype=float32), 'loss_cross_entropy': Array(0.14293313, dtype=float32)}
{'loss_inverse': Array(3.4403718e-06, dtype=float32)}


  8%|▊         | 79920/1000000 [3:46:50<37:38:03,  6.79it/s]

{'loss': Array(0.15659513, dtype=float32), 'loss_cross_entropy': Array(0.14802204, dtype=float32)}
{'loss_inverse': Array(0.00010688, dtype=float32)}


  8%|▊         | 79930/1000000 [3:46:52<27:24:29,  9.32it/s]

{'loss': Array(0.16023967, dtype=float32), 'loss_cross_entropy': Array(0.15112685, dtype=float32)}
{'loss_inverse': Array(4.639038e-05, dtype=float32)}


  8%|▊         | 79940/1000000 [3:46:53<25:42:58,  9.94it/s]

{'loss': Array(0.14665356, dtype=float32), 'loss_cross_entropy': Array(0.13847327, dtype=float32)}
{'loss_inverse': Array(6.886292e-05, dtype=float32)}


  8%|▊         | 79950/1000000 [3:46:55<31:57:36,  8.00it/s]

{'loss': Array(0.15759382, dtype=float32), 'loss_cross_entropy': Array(0.14837565, dtype=float32)}
{'loss_inverse': Array(1.237915e-06, dtype=float32)}


  8%|▊         | 79960/1000000 [3:46:56<26:03:17,  9.81it/s]

{'loss': Array(0.13944803, dtype=float32), 'loss_cross_entropy': Array(0.1320915, dtype=float32)}
{'loss_inverse': Array(8.704077e-05, dtype=float32)}


  8%|▊         | 79970/1000000 [3:46:58<37:56:39,  6.74it/s]

{'loss': Array(0.158514, dtype=float32), 'loss_cross_entropy': Array(0.1499028, dtype=float32)}
{'loss_inverse': Array(3.0020433e-06, dtype=float32)}


  8%|▊         | 79980/1000000 [3:46:59<26:39:08,  9.59it/s]

{'loss': Array(0.14177014, dtype=float32), 'loss_cross_entropy': Array(0.13364054, dtype=float32)}
{'loss_inverse': Array(1.0220846e-06, dtype=float32)}


  8%|▊         | 79990/1000000 [3:47:01<37:46:06,  6.77it/s]

{'loss': Array(0.1378462, dtype=float32), 'loss_cross_entropy': Array(0.12945455, dtype=float32)}
{'loss_inverse': Array(1.910748e-06, dtype=float32)}


  8%|▊         | 80000/1000000 [3:47:03<27:06:51,  9.43it/s]

{'loss': Array(0.13159764, dtype=float32), 'loss_cross_entropy': Array(0.12426541, dtype=float32)}
{'loss_inverse': Array(1.7144068e-06, dtype=float32)}


  8%|▊         | 80010/1000000 [3:47:11<86:04:29,  2.97it/s] 

{'loss': Array(0.13878857, dtype=float32), 'loss_cross_entropy': Array(0.12999743, dtype=float32)}
{'loss_inverse': Array(2.0496882e-05, dtype=float32)}


  8%|▊         | 80020/1000000 [3:47:12<43:53:40,  5.82it/s]

{'loss': Array(0.13145475, dtype=float32), 'loss_cross_entropy': Array(0.12316938, dtype=float32)}
{'loss_inverse': Array(8.366949e-06, dtype=float32)}


  8%|▊         | 80030/1000000 [3:47:14<28:24:11,  9.00it/s]

{'loss': Array(0.13183203, dtype=float32), 'loss_cross_entropy': Array(0.12430085, dtype=float32)}
{'loss_inverse': Array(7.1809504e-06, dtype=float32)}


  8%|▊         | 80040/1000000 [3:47:15<25:27:27, 10.04it/s]

{'loss': Array(0.16566566, dtype=float32), 'loss_cross_entropy': Array(0.1561106, dtype=float32)}
{'loss_inverse': Array(3.2638396e-05, dtype=float32)}


  8%|▊         | 80050/1000000 [3:47:17<28:11:16,  9.07it/s]

{'loss': Array(0.14376031, dtype=float32), 'loss_cross_entropy': Array(0.13518938, dtype=float32)}
{'loss_inverse': Array(6.9189485e-07, dtype=float32)}


  8%|▊         | 80060/1000000 [3:47:18<25:20:30, 10.08it/s]

{'loss': Array(0.16915591, dtype=float32), 'loss_cross_entropy': Array(0.159671, dtype=float32)}
{'loss_inverse': Array(2.2812974e-06, dtype=float32)}


  8%|▊         | 80070/1000000 [3:47:20<29:08:06,  8.77it/s]

{'loss': Array(0.13933666, dtype=float32), 'loss_cross_entropy': Array(0.1313086, dtype=float32)}
{'loss_inverse': Array(4.115027e-05, dtype=float32)}


  8%|▊         | 80080/1000000 [3:47:21<25:33:04, 10.00it/s]

{'loss': Array(0.14148587, dtype=float32), 'loss_cross_entropy': Array(0.13365446, dtype=float32)}
{'loss_inverse': Array(3.4999488e-07, dtype=float32)}


  8%|▊         | 80090/1000000 [3:47:23<31:56:25,  8.00it/s]

{'loss': Array(0.16656049, dtype=float32), 'loss_cross_entropy': Array(0.15743573, dtype=float32)}
{'loss_inverse': Array(7.418518e-08, dtype=float32)}


  8%|▊         | 80100/1000000 [3:47:25<26:02:25,  9.81it/s]

{'loss': Array(0.1418945, dtype=float32), 'loss_cross_entropy': Array(0.1335579, dtype=float32)}
{'loss_inverse': Array(1.748e-06, dtype=float32)}


  8%|▊         | 80110/1000000 [3:47:26<24:55:08, 10.25it/s]

{'loss': Array(0.15497941, dtype=float32), 'loss_cross_entropy': Array(0.14696787, dtype=float32)}
{'loss_inverse': Array(0.00018342, dtype=float32)}


  8%|▊         | 80120/1000000 [3:47:28<28:24:58,  8.99it/s]

{'loss': Array(0.14640175, dtype=float32), 'loss_cross_entropy': Array(0.13789229, dtype=float32)}
{'loss_inverse': Array(7.1441207e-07, dtype=float32)}


  8%|▊         | 80130/1000000 [3:47:29<25:39:43,  9.96it/s]

{'loss': Array(0.14670846, dtype=float32), 'loss_cross_entropy': Array(0.1387837, dtype=float32)}
{'loss_inverse': Array(5.9825845e-05, dtype=float32)}


  8%|▊         | 80140/1000000 [3:47:31<29:29:36,  8.66it/s]

{'loss': Array(0.14875664, dtype=float32), 'loss_cross_entropy': Array(0.13938572, dtype=float32)}
{'loss_inverse': Array(2.9062583e-06, dtype=float32)}


  8%|▊         | 80150/1000000 [3:47:32<25:53:31,  9.87it/s]

{'loss': Array(0.15271427, dtype=float32), 'loss_cross_entropy': Array(0.1443103, dtype=float32)}
{'loss_inverse': Array(0.00012397, dtype=float32)}


  8%|▊         | 80160/1000000 [3:47:34<32:01:04,  7.98it/s]

{'loss': Array(0.13332711, dtype=float32), 'loss_cross_entropy': Array(0.12537192, dtype=float32)}
{'loss_inverse': Array(0.00051978, dtype=float32)}


  8%|▊         | 80170/1000000 [3:47:36<26:49:27,  9.53it/s]

{'loss': Array(0.16707316, dtype=float32), 'loss_cross_entropy': Array(0.15792029, dtype=float32)}
{'loss_inverse': Array(2.0033433e-05, dtype=float32)}


  8%|▊         | 80180/1000000 [3:47:37<25:04:31, 10.19it/s]

{'loss': Array(0.16343181, dtype=float32), 'loss_cross_entropy': Array(0.15463583, dtype=float32)}
{'loss_inverse': Array(2.4006327e-05, dtype=float32)}


  8%|▊         | 80190/1000000 [3:47:39<29:15:44,  8.73it/s]

{'loss': Array(0.14570846, dtype=float32), 'loss_cross_entropy': Array(0.1370402, dtype=float32)}
{'loss_inverse': Array(1.9856964e-06, dtype=float32)}


  8%|▊         | 80200/1000000 [3:47:40<25:40:46,  9.95it/s]

{'loss': Array(0.15259066, dtype=float32), 'loss_cross_entropy': Array(0.14378205, dtype=float32)}
{'loss_inverse': Array(4.936773e-06, dtype=float32)}


  8%|▊         | 80210/1000000 [3:47:42<31:34:32,  8.09it/s]

{'loss': Array(0.12316703, dtype=float32), 'loss_cross_entropy': Array(0.11598378, dtype=float32)}
{'loss_inverse': Array(1.381747e-05, dtype=float32)}


  8%|▊         | 80220/1000000 [3:47:43<26:00:12,  9.83it/s]

{'loss': Array(0.13834469, dtype=float32), 'loss_cross_entropy': Array(0.13021345, dtype=float32)}
{'loss_inverse': Array(2.4040452e-05, dtype=float32)}


  8%|▊         | 80230/1000000 [3:47:45<34:37:47,  7.38it/s]

{'loss': Array(0.14938833, dtype=float32), 'loss_cross_entropy': Array(0.14016731, dtype=float32)}
{'loss_inverse': Array(8.932141e-07, dtype=float32)}


  8%|▊         | 80240/1000000 [3:47:47<26:38:19,  9.59it/s]

{'loss': Array(0.14814082, dtype=float32), 'loss_cross_entropy': Array(0.13949884, dtype=float32)}
{'loss_inverse': Array(0.0001918, dtype=float32)}


  8%|▊         | 80250/1000000 [3:47:48<25:09:54, 10.15it/s]

{'loss': Array(0.132603, dtype=float32), 'loss_cross_entropy': Array(0.1251553, dtype=float32)}
{'loss_inverse': Array(0.00015218, dtype=float32)}


  8%|▊         | 80260/1000000 [3:47:50<29:28:59,  8.67it/s]

{'loss': Array(0.13662006, dtype=float32), 'loss_cross_entropy': Array(0.12888658, dtype=float32)}
{'loss_inverse': Array(2.1544958e-05, dtype=float32)}


  8%|▊         | 80270/1000000 [3:47:51<25:32:50, 10.00it/s]

{'loss': Array(0.15281, dtype=float32), 'loss_cross_entropy': Array(0.14383042, dtype=float32)}
{'loss_inverse': Array(1.0010707e-05, dtype=float32)}


  8%|▊         | 80280/1000000 [3:47:53<33:59:44,  7.52it/s]

{'loss': Array(0.14957027, dtype=float32), 'loss_cross_entropy': Array(0.14079843, dtype=float32)}
{'loss_inverse': Array(3.2160628e-05, dtype=float32)}


  8%|▊         | 80290/1000000 [3:47:54<26:26:32,  9.66it/s]

{'loss': Array(0.14515084, dtype=float32), 'loss_cross_entropy': Array(0.13711129, dtype=float32)}
{'loss_inverse': Array(1.9192817e-05, dtype=float32)}


  8%|▊         | 80300/1000000 [3:47:56<25:20:32, 10.08it/s]

{'loss': Array(0.13793924, dtype=float32), 'loss_cross_entropy': Array(0.12982647, dtype=float32)}
{'loss_inverse': Array(5.6170694e-07, dtype=float32)}


  8%|▊         | 80310/1000000 [3:47:57<28:16:02,  9.04it/s]

{'loss': Array(0.15671709, dtype=float32), 'loss_cross_entropy': Array(0.14817174, dtype=float32)}
{'loss_inverse': Array(1.7102544e-05, dtype=float32)}


  8%|▊         | 80320/1000000 [3:47:59<25:36:06,  9.98it/s]

{'loss': Array(0.14113744, dtype=float32), 'loss_cross_entropy': Array(0.1333696, dtype=float32)}
{'loss_inverse': Array(6.101424e-05, dtype=float32)}


  8%|▊         | 80330/1000000 [3:48:01<29:29:55,  8.66it/s]

{'loss': Array(0.16007364, dtype=float32), 'loss_cross_entropy': Array(0.15063249, dtype=float32)}
{'loss_inverse': Array(1.28678075e-05, dtype=float32)}


  8%|▊         | 80340/1000000 [3:48:02<26:06:07,  9.79it/s]

{'loss': Array(0.15049975, dtype=float32), 'loss_cross_entropy': Array(0.14160743, dtype=float32)}
{'loss_inverse': Array(9.703701e-05, dtype=float32)}


  8%|▊         | 80350/1000000 [3:48:04<31:31:26,  8.10it/s]

{'loss': Array(0.13948573, dtype=float32), 'loss_cross_entropy': Array(0.13139041, dtype=float32)}
{'loss_inverse': Array(8.735866e-08, dtype=float32)}


  8%|▊         | 80360/1000000 [3:48:05<26:23:47,  9.68it/s]

{'loss': Array(0.12686329, dtype=float32), 'loss_cross_entropy': Array(0.11947855, dtype=float32)}
{'loss_inverse': Array(2.886408e-07, dtype=float32)}


  8%|▊         | 80370/1000000 [3:48:07<26:08:04,  9.77it/s]

{'loss': Array(0.13918304, dtype=float32), 'loss_cross_entropy': Array(0.13053633, dtype=float32)}
{'loss_inverse': Array(1.6881157e-05, dtype=float32)}


  8%|▊         | 80380/1000000 [3:48:09<30:28:51,  8.38it/s]

{'loss': Array(0.15220682, dtype=float32), 'loss_cross_entropy': Array(0.14384122, dtype=float32)}
{'loss_inverse': Array(2.997295e-06, dtype=float32)}


  8%|▊         | 80390/1000000 [3:48:10<25:51:58,  9.88it/s]

{'loss': Array(0.15758882, dtype=float32), 'loss_cross_entropy': Array(0.14909357, dtype=float32)}
{'loss_inverse': Array(9.834947e-07, dtype=float32)}


  8%|▊         | 80400/1000000 [3:48:12<34:07:59,  7.48it/s]

{'loss': Array(0.12782489, dtype=float32), 'loss_cross_entropy': Array(0.12038546, dtype=float32)}
{'loss_inverse': Array(0.00014965, dtype=float32)}


  8%|▊         | 80410/1000000 [3:48:13<26:21:58,  9.69it/s]

{'loss': Array(0.13916916, dtype=float32), 'loss_cross_entropy': Array(0.1310693, dtype=float32)}
{'loss_inverse': Array(1.112496e-06, dtype=float32)}


  8%|▊         | 80420/1000000 [3:48:15<38:40:27,  6.60it/s]

{'loss': Array(0.14427426, dtype=float32), 'loss_cross_entropy': Array(0.13500634, dtype=float32)}
{'loss_inverse': Array(2.2883462e-06, dtype=float32)}


  8%|▊         | 80430/1000000 [3:48:16<27:30:21,  9.29it/s]

{'loss': Array(0.13714188, dtype=float32), 'loss_cross_entropy': Array(0.1298541, dtype=float32)}
{'loss_inverse': Array(1.1072347e-07, dtype=float32)}


  8%|▊         | 80440/1000000 [3:48:18<38:43:59,  6.59it/s]

{'loss': Array(0.14616649, dtype=float32), 'loss_cross_entropy': Array(0.13793492, dtype=float32)}
{'loss_inverse': Array(2.5062316e-06, dtype=float32)}


  8%|▊         | 80450/1000000 [3:48:19<27:07:22,  9.42it/s]

{'loss': Array(0.14261757, dtype=float32), 'loss_cross_entropy': Array(0.13418017, dtype=float32)}
{'loss_inverse': Array(9.205216e-07, dtype=float32)}


  8%|▊         | 80460/1000000 [3:48:21<25:12:04, 10.14it/s]

{'loss': Array(0.15534447, dtype=float32), 'loss_cross_entropy': Array(0.14680454, dtype=float32)}
{'loss_inverse': Array(2.7627815e-05, dtype=float32)}


  8%|▊         | 80470/1000000 [3:48:23<31:36:46,  8.08it/s]

{'loss': Array(0.13325219, dtype=float32), 'loss_cross_entropy': Array(0.12525737, dtype=float32)}
{'loss_inverse': Array(0.00019899, dtype=float32)}


  8%|▊         | 80480/1000000 [3:48:24<26:29:31,  9.64it/s]

{'loss': Array(0.14686345, dtype=float32), 'loss_cross_entropy': Array(0.13893688, dtype=float32)}
{'loss_inverse': Array(6.944658e-06, dtype=float32)}


  8%|▊         | 80490/1000000 [3:48:26<37:43:23,  6.77it/s]

{'loss': Array(0.14890248, dtype=float32), 'loss_cross_entropy': Array(0.14058803, dtype=float32)}
{'loss_inverse': Array(1.4646622e-05, dtype=float32)}


  8%|▊         | 80500/1000000 [3:48:27<26:53:36,  9.50it/s]

{'loss': Array(0.13753764, dtype=float32), 'loss_cross_entropy': Array(0.12937365, dtype=float32)}
{'loss_inverse': Array(4.538534e-05, dtype=float32)}


  8%|▊         | 80510/1000000 [3:48:35<97:05:59,  2.63it/s] 

{'loss': Array(0.13954294, dtype=float32), 'loss_cross_entropy': Array(0.13199008, dtype=float32)}
{'loss_inverse': Array(4.8145666e-06, dtype=float32)}


  8%|▊         | 80520/1000000 [3:48:37<36:54:18,  6.92it/s] 

{'loss': Array(0.13572572, dtype=float32), 'loss_cross_entropy': Array(0.12780665, dtype=float32)}
{'loss_inverse': Array(7.329556e-06, dtype=float32)}


  8%|▊         | 80530/1000000 [3:48:38<26:57:27,  9.47it/s]

{'loss': Array(0.13579214, dtype=float32), 'loss_cross_entropy': Array(0.1284948, dtype=float32)}
{'loss_inverse': Array(5.949123e-06, dtype=float32)}


  8%|▊         | 80540/1000000 [3:48:40<29:58:25,  8.52it/s]

{'loss': Array(0.14552578, dtype=float32), 'loss_cross_entropy': Array(0.13739415, dtype=float32)}
{'loss_inverse': Array(9.3623487e-07, dtype=float32)}


  8%|▊         | 80550/1000000 [3:48:41<26:02:13,  9.81it/s]

{'loss': Array(0.13890614, dtype=float32), 'loss_cross_entropy': Array(0.13148312, dtype=float32)}
{'loss_inverse': Array(1.40554575e-05, dtype=float32)}


  8%|▊         | 80560/1000000 [3:48:43<37:31:02,  6.81it/s]

{'loss': Array(0.1385372, dtype=float32), 'loss_cross_entropy': Array(0.12995735, dtype=float32)}
{'loss_inverse': Array(0.00023657, dtype=float32)}


  8%|▊         | 80570/1000000 [3:48:45<26:58:34,  9.47it/s]

{'loss': Array(0.133129, dtype=float32), 'loss_cross_entropy': Array(0.12520413, dtype=float32)}
{'loss_inverse': Array(7.8331743e-07, dtype=float32)}


  8%|▊         | 80580/1000000 [3:48:46<25:44:13,  9.92it/s]

{'loss': Array(0.14914306, dtype=float32), 'loss_cross_entropy': Array(0.14016874, dtype=float32)}
{'loss_inverse': Array(4.826553e-05, dtype=float32)}


  8%|▊         | 80590/1000000 [3:48:48<30:05:46,  8.49it/s]

{'loss': Array(0.13598256, dtype=float32), 'loss_cross_entropy': Array(0.12817381, dtype=float32)}
{'loss_inverse': Array(1.664642e-05, dtype=float32)}


  8%|▊         | 80600/1000000 [3:48:49<25:45:06,  9.92it/s]

{'loss': Array(0.15248403, dtype=float32), 'loss_cross_entropy': Array(0.14357336, dtype=float32)}
{'loss_inverse': Array(1.0382307e-06, dtype=float32)}


  8%|▊         | 80610/1000000 [3:48:51<29:18:47,  8.71it/s]

{'loss': Array(0.14218964, dtype=float32), 'loss_cross_entropy': Array(0.13395868, dtype=float32)}
{'loss_inverse': Array(3.4202992e-06, dtype=float32)}


  8%|▊         | 80620/1000000 [3:48:52<26:09:58,  9.76it/s]

{'loss': Array(0.12991273, dtype=float32), 'loss_cross_entropy': Array(0.12235955, dtype=float32)}
{'loss_inverse': Array(5.393194e-06, dtype=float32)}


  8%|▊         | 80630/1000000 [3:48:54<31:46:56,  8.04it/s]

{'loss': Array(0.15017287, dtype=float32), 'loss_cross_entropy': Array(0.14125675, dtype=float32)}
{'loss_inverse': Array(2.1784072e-06, dtype=float32)}


  8%|▊         | 80640/1000000 [3:48:56<26:34:49,  9.61it/s]

{'loss': Array(0.1436821, dtype=float32), 'loss_cross_entropy': Array(0.13585357, dtype=float32)}
{'loss_inverse': Array(9.097514e-06, dtype=float32)}


  8%|▊         | 80650/1000000 [3:48:57<25:06:40, 10.17it/s]

{'loss': Array(0.14963883, dtype=float32), 'loss_cross_entropy': Array(0.14140074, dtype=float32)}
{'loss_inverse': Array(8.115589e-07, dtype=float32)}


  8%|▊         | 80660/1000000 [3:48:59<28:23:45,  8.99it/s]

{'loss': Array(0.1534496, dtype=float32), 'loss_cross_entropy': Array(0.14510684, dtype=float32)}
{'loss_inverse': Array(8.258633e-09, dtype=float32)}


  8%|▊         | 80670/1000000 [3:49:00<25:47:12,  9.90it/s]

{'loss': Array(0.14719318, dtype=float32), 'loss_cross_entropy': Array(0.1389817, dtype=float32)}
{'loss_inverse': Array(2.0202674e-06, dtype=float32)}


  8%|▊         | 80680/1000000 [3:49:02<29:26:20,  8.67it/s]

{'loss': Array(0.14098705, dtype=float32), 'loss_cross_entropy': Array(0.13272284, dtype=float32)}
{'loss_inverse': Array(0.00029174, dtype=float32)}


  8%|▊         | 80690/1000000 [3:49:03<25:35:15,  9.98it/s]

{'loss': Array(0.14876477, dtype=float32), 'loss_cross_entropy': Array(0.14020252, dtype=float32)}
{'loss_inverse': Array(2.3578791e-07, dtype=float32)}


  8%|▊         | 80700/1000000 [3:49:05<31:41:46,  8.06it/s]

{'loss': Array(0.1441889, dtype=float32), 'loss_cross_entropy': Array(0.1352729, dtype=float32)}
{'loss_inverse': Array(2.527429e-06, dtype=float32)}


  8%|▊         | 80710/1000000 [3:49:07<27:01:52,  9.45it/s]

{'loss': Array(0.13419095, dtype=float32), 'loss_cross_entropy': Array(0.12593699, dtype=float32)}
{'loss_inverse': Array(1.3194715e-05, dtype=float32)}


  8%|▊         | 80720/1000000 [3:49:08<26:56:01,  9.48it/s]

{'loss': Array(0.15505862, dtype=float32), 'loss_cross_entropy': Array(0.14620732, dtype=float32)}
{'loss_inverse': Array(7.4773775e-06, dtype=float32)}


  8%|▊         | 80730/1000000 [3:49:10<30:25:57,  8.39it/s]

{'loss': Array(0.1621661, dtype=float32), 'loss_cross_entropy': Array(0.15340231, dtype=float32)}
{'loss_inverse': Array(1.27937e-07, dtype=float32)}


  8%|▊         | 80740/1000000 [3:49:11<26:53:26,  9.50it/s]

{'loss': Array(0.13398132, dtype=float32), 'loss_cross_entropy': Array(0.12606469, dtype=float32)}
{'loss_inverse': Array(4.5510137e-07, dtype=float32)}


  8%|▊         | 80750/1000000 [3:49:13<32:32:50,  7.85it/s]

{'loss': Array(0.15896194, dtype=float32), 'loss_cross_entropy': Array(0.15024458, dtype=float32)}
{'loss_inverse': Array(2.0118239e-07, dtype=float32)}


  8%|▊         | 80760/1000000 [3:49:15<26:32:38,  9.62it/s]

{'loss': Array(0.15489647, dtype=float32), 'loss_cross_entropy': Array(0.14624284, dtype=float32)}
{'loss_inverse': Array(3.0362398e-06, dtype=float32)}


  8%|▊         | 80770/1000000 [3:49:17<34:59:27,  7.30it/s]

{'loss': Array(0.15241538, dtype=float32), 'loss_cross_entropy': Array(0.14304969, dtype=float32)}
{'loss_inverse': Array(7.957603e-05, dtype=float32)}


  8%|▊         | 80780/1000000 [3:49:18<27:03:42,  9.44it/s]

{'loss': Array(0.15099062, dtype=float32), 'loss_cross_entropy': Array(0.14204296, dtype=float32)}
{'loss_inverse': Array(9.542454e-07, dtype=float32)}


  8%|▊         | 80790/1000000 [3:49:19<25:00:21, 10.21it/s]

{'loss': Array(0.13812187, dtype=float32), 'loss_cross_entropy': Array(0.12991533, dtype=float32)}
{'loss_inverse': Array(0.00032758, dtype=float32)}


  8%|▊         | 80800/1000000 [3:49:21<27:33:42,  9.26it/s]

{'loss': Array(0.15001586, dtype=float32), 'loss_cross_entropy': Array(0.14101215, dtype=float32)}
{'loss_inverse': Array(2.392632e-06, dtype=float32)}


  8%|▊         | 80810/1000000 [3:49:23<25:41:59,  9.94it/s]

{'loss': Array(0.14685956, dtype=float32), 'loss_cross_entropy': Array(0.13882564, dtype=float32)}
{'loss_inverse': Array(6.792917e-07, dtype=float32)}


  8%|▊         | 80820/1000000 [3:49:24<34:11:50,  7.47it/s]

{'loss': Array(0.13949439, dtype=float32), 'loss_cross_entropy': Array(0.13188939, dtype=float32)}
{'loss_inverse': Array(1.1446928e-05, dtype=float32)}


  8%|▊         | 80830/1000000 [3:49:26<26:57:18,  9.47it/s]

{'loss': Array(0.15248998, dtype=float32), 'loss_cross_entropy': Array(0.14342457, dtype=float32)}
{'loss_inverse': Array(6.0087816e-05, dtype=float32)}


  8%|▊         | 80840/1000000 [3:49:27<25:00:22, 10.21it/s]

{'loss': Array(0.14061703, dtype=float32), 'loss_cross_entropy': Array(0.13320665, dtype=float32)}
{'loss_inverse': Array(5.6248286e-05, dtype=float32)}


  8%|▊         | 80850/1000000 [3:49:29<29:20:33,  8.70it/s]

{'loss': Array(0.14564951, dtype=float32), 'loss_cross_entropy': Array(0.13807398, dtype=float32)}
{'loss_inverse': Array(1.601665e-07, dtype=float32)}


  8%|▊         | 80860/1000000 [3:49:30<25:26:25, 10.04it/s]

{'loss': Array(0.15358862, dtype=float32), 'loss_cross_entropy': Array(0.14550559, dtype=float32)}
{'loss_inverse': Array(2.1751016e-07, dtype=float32)}


  8%|▊         | 80870/1000000 [3:49:32<30:55:20,  8.26it/s]

{'loss': Array(0.14442793, dtype=float32), 'loss_cross_entropy': Array(0.1360944, dtype=float32)}
{'loss_inverse': Array(1.5896061e-06, dtype=float32)}


  8%|▊         | 80880/1000000 [3:49:33<25:51:58,  9.87it/s]

{'loss': Array(0.15634392, dtype=float32), 'loss_cross_entropy': Array(0.14725037, dtype=float32)}
{'loss_inverse': Array(2.1257179e-06, dtype=float32)}


  8%|▊         | 80890/1000000 [3:49:35<34:13:07,  7.46it/s]

{'loss': Array(0.13786782, dtype=float32), 'loss_cross_entropy': Array(0.12973006, dtype=float32)}
{'loss_inverse': Array(2.9019014e-07, dtype=float32)}


  8%|▊         | 80900/1000000 [3:49:37<26:51:25,  9.51it/s]

{'loss': Array(0.1523893, dtype=float32), 'loss_cross_entropy': Array(0.14378999, dtype=float32)}
{'loss_inverse': Array(4.0160313e-07, dtype=float32)}


  8%|▊         | 80910/1000000 [3:49:38<25:46:08,  9.91it/s]

{'loss': Array(0.1363299, dtype=float32), 'loss_cross_entropy': Array(0.12849663, dtype=float32)}
{'loss_inverse': Array(1.054146e-06, dtype=float32)}


  8%|▊         | 80920/1000000 [3:49:40<29:40:12,  8.60it/s]

{'loss': Array(0.1436619, dtype=float32), 'loss_cross_entropy': Array(0.13589944, dtype=float32)}
{'loss_inverse': Array(1.5063799e-07, dtype=float32)}


  8%|▊         | 80930/1000000 [3:49:41<25:50:49,  9.88it/s]

{'loss': Array(0.13746609, dtype=float32), 'loss_cross_entropy': Array(0.12935956, dtype=float32)}
{'loss_inverse': Array(3.278794e-05, dtype=float32)}


  8%|▊         | 80940/1000000 [3:49:43<34:25:42,  7.42it/s]

{'loss': Array(0.14033441, dtype=float32), 'loss_cross_entropy': Array(0.13202868, dtype=float32)}
{'loss_inverse': Array(1.394549e-07, dtype=float32)}


  8%|▊         | 80950/1000000 [3:49:44<26:25:03,  9.66it/s]

{'loss': Array(0.13636728, dtype=float32), 'loss_cross_entropy': Array(0.1287251, dtype=float32)}
{'loss_inverse': Array(1.28357615e-05, dtype=float32)}


  8%|▊         | 80960/1000000 [3:49:46<37:45:02,  6.76it/s]

{'loss': Array(0.13928887, dtype=float32), 'loss_cross_entropy': Array(0.13085505, dtype=float32)}
{'loss_inverse': Array(3.792682e-07, dtype=float32)}


  8%|▊         | 80970/1000000 [3:49:48<27:50:51,  9.17it/s]

{'loss': Array(0.1341056, dtype=float32), 'loss_cross_entropy': Array(0.12626646, dtype=float32)}
{'loss_inverse': Array(8.886226e-05, dtype=float32)}


  8%|▊         | 80980/1000000 [3:49:49<25:13:22, 10.12it/s]

{'loss': Array(0.16572212, dtype=float32), 'loss_cross_entropy': Array(0.15722452, dtype=float32)}
{'loss_inverse': Array(5.8876416e-05, dtype=float32)}


  8%|▊         | 80990/1000000 [3:49:51<28:19:12,  9.01it/s]

{'loss': Array(0.15827037, dtype=float32), 'loss_cross_entropy': Array(0.14927882, dtype=float32)}
{'loss_inverse': Array(2.2410766e-06, dtype=float32)}


  8%|▊         | 81000/1000000 [3:49:52<25:19:10, 10.08it/s]

{'loss': Array(0.1486145, dtype=float32), 'loss_cross_entropy': Array(0.1406507, dtype=float32)}
{'loss_inverse': Array(5.4280736e-05, dtype=float32)}


  8%|▊         | 81010/1000000 [3:50:00<88:50:47,  2.87it/s] 

{'loss': Array(0.1457424, dtype=float32), 'loss_cross_entropy': Array(0.13736632, dtype=float32)}
{'loss_inverse': Array(0.0003617, dtype=float32)}


  8%|▊         | 81020/1000000 [3:50:02<35:30:46,  7.19it/s]

{'loss': Array(0.15517588, dtype=float32), 'loss_cross_entropy': Array(0.14663666, dtype=float32)}
{'loss_inverse': Array(4.5774053e-05, dtype=float32)}


  8%|▊         | 81030/1000000 [3:50:03<39:01:52,  6.54it/s]

{'loss': Array(0.15197991, dtype=float32), 'loss_cross_entropy': Array(0.14310381, dtype=float32)}
{'loss_inverse': Array(7.3537485e-05, dtype=float32)}


  8%|▊         | 81040/1000000 [3:50:05<27:50:51,  9.17it/s]

{'loss': Array(0.14174543, dtype=float32), 'loss_cross_entropy': Array(0.13323309, dtype=float32)}
{'loss_inverse': Array(4.8626673e-05, dtype=float32)}


  8%|▊         | 81050/1000000 [3:50:06<26:52:16,  9.50it/s]

{'loss': Array(0.1403244, dtype=float32), 'loss_cross_entropy': Array(0.13213845, dtype=float32)}
{'loss_inverse': Array(2.355359e-05, dtype=float32)}


  8%|▊         | 81060/1000000 [3:50:08<30:06:04,  8.48it/s]

{'loss': Array(0.1440656, dtype=float32), 'loss_cross_entropy': Array(0.13577303, dtype=float32)}
{'loss_inverse': Array(2.1700947e-05, dtype=float32)}


  8%|▊         | 81070/1000000 [3:50:10<25:55:38,  9.85it/s]

{'loss': Array(0.16194876, dtype=float32), 'loss_cross_entropy': Array(0.1528035, dtype=float32)}
{'loss_inverse': Array(1.7338054e-06, dtype=float32)}


  8%|▊         | 81080/1000000 [3:50:11<31:19:58,  8.15it/s]

{'loss': Array(0.13828011, dtype=float32), 'loss_cross_entropy': Array(0.13036604, dtype=float32)}
{'loss_inverse': Array(7.198548e-07, dtype=float32)}


  8%|▊         | 81090/1000000 [3:50:13<25:52:41,  9.86it/s]

{'loss': Array(0.15099238, dtype=float32), 'loss_cross_entropy': Array(0.14235777, dtype=float32)}
{'loss_inverse': Array(1.9467298e-06, dtype=float32)}


  8%|▊         | 81100/1000000 [3:50:15<32:07:36,  7.95it/s]

{'loss': Array(0.15171956, dtype=float32), 'loss_cross_entropy': Array(0.14292984, dtype=float32)}
{'loss_inverse': Array(3.4116047e-07, dtype=float32)}


  8%|▊         | 81110/1000000 [3:50:16<25:58:03,  9.83it/s]

{'loss': Array(0.12758349, dtype=float32), 'loss_cross_entropy': Array(0.12085284, dtype=float32)}
{'loss_inverse': Array(4.335535e-05, dtype=float32)}


  8%|▊         | 81120/1000000 [3:50:17<26:03:22,  9.80it/s]

{'loss': Array(0.13969353, dtype=float32), 'loss_cross_entropy': Array(0.13097477, dtype=float32)}
{'loss_inverse': Array(6.322302e-07, dtype=float32)}


  8%|▊         | 81130/1000000 [3:50:19<28:58:39,  8.81it/s]

{'loss': Array(0.1233902, dtype=float32), 'loss_cross_entropy': Array(0.11581387, dtype=float32)}
{'loss_inverse': Array(2.2187092e-05, dtype=float32)}


  8%|▊         | 81140/1000000 [3:50:21<26:08:11,  9.77it/s]

{'loss': Array(0.13734339, dtype=float32), 'loss_cross_entropy': Array(0.12979217, dtype=float32)}
{'loss_inverse': Array(6.2325445e-05, dtype=float32)}


  8%|▊         | 81150/1000000 [3:50:22<29:24:49,  8.68it/s]

{'loss': Array(0.13380542, dtype=float32), 'loss_cross_entropy': Array(0.12605533, dtype=float32)}
{'loss_inverse': Array(2.522303e-07, dtype=float32)}


  8%|▊         | 81160/1000000 [3:50:24<26:22:22,  9.68it/s]

{'loss': Array(0.13740148, dtype=float32), 'loss_cross_entropy': Array(0.12978105, dtype=float32)}
{'loss_inverse': Array(5.691415e-06, dtype=float32)}


  8%|▊         | 81170/1000000 [3:50:26<30:24:08,  8.40it/s]

{'loss': Array(0.15169711, dtype=float32), 'loss_cross_entropy': Array(0.14339547, dtype=float32)}
{'loss_inverse': Array(1.7398937e-05, dtype=float32)}


  8%|▊         | 81180/1000000 [3:50:27<26:01:59,  9.80it/s]

{'loss': Array(0.15019386, dtype=float32), 'loss_cross_entropy': Array(0.14147556, dtype=float32)}
{'loss_inverse': Array(7.034991e-05, dtype=float32)}


  8%|▊         | 81190/1000000 [3:50:29<29:59:46,  8.51it/s]

{'loss': Array(0.14464612, dtype=float32), 'loss_cross_entropy': Array(0.13611177, dtype=float32)}
{'loss_inverse': Array(8.833949e-06, dtype=float32)}


  8%|▊         | 81200/1000000 [3:50:30<25:38:18,  9.95it/s]

{'loss': Array(0.12990634, dtype=float32), 'loss_cross_entropy': Array(0.12140162, dtype=float32)}
{'loss_inverse': Array(1.8939949e-06, dtype=float32)}


  8%|▊         | 81210/1000000 [3:50:32<35:03:29,  7.28it/s]

{'loss': Array(0.14509249, dtype=float32), 'loss_cross_entropy': Array(0.13663603, dtype=float32)}
{'loss_inverse': Array(1.703494e-05, dtype=float32)}


  8%|▊         | 81220/1000000 [3:50:34<26:38:19,  9.58it/s]

{'loss': Array(0.13136496, dtype=float32), 'loss_cross_entropy': Array(0.12336161, dtype=float32)}
{'loss_inverse': Array(7.2072735e-06, dtype=float32)}


  8%|▊         | 81230/1000000 [3:50:35<37:55:52,  6.73it/s]

{'loss': Array(0.1495721, dtype=float32), 'loss_cross_entropy': Array(0.14157592, dtype=float32)}
{'loss_inverse': Array(5.723457e-06, dtype=float32)}


  8%|▊         | 81240/1000000 [3:50:37<27:13:09,  9.38it/s]

{'loss': Array(0.16904332, dtype=float32), 'loss_cross_entropy': Array(0.15948443, dtype=float32)}
{'loss_inverse': Array(5.8722544e-05, dtype=float32)}


  8%|▊         | 81250/1000000 [3:50:38<34:36:40,  7.37it/s]

{'loss': Array(0.14261411, dtype=float32), 'loss_cross_entropy': Array(0.13473678, dtype=float32)}
{'loss_inverse': Array(2.2011604e-06, dtype=float32)}


  8%|▊         | 81260/1000000 [3:50:40<26:37:35,  9.58it/s]

{'loss': Array(0.15044776, dtype=float32), 'loss_cross_entropy': Array(0.14250095, dtype=float32)}
{'loss_inverse': Array(0.00035192, dtype=float32)}


  8%|▊         | 81270/1000000 [3:50:41<25:36:18,  9.97it/s]

{'loss': Array(0.1495791, dtype=float32), 'loss_cross_entropy': Array(0.14148413, dtype=float32)}
{'loss_inverse': Array(4.269531e-06, dtype=float32)}


  8%|▊         | 81280/1000000 [3:50:43<29:28:27,  8.66it/s]

{'loss': Array(0.13731496, dtype=float32), 'loss_cross_entropy': Array(0.1291621, dtype=float32)}
{'loss_inverse': Array(5.756752e-05, dtype=float32)}


  8%|▊         | 81290/1000000 [3:50:45<25:56:21,  9.84it/s]

{'loss': Array(0.13949063, dtype=float32), 'loss_cross_entropy': Array(0.13133818, dtype=float32)}
{'loss_inverse': Array(2.3276549e-05, dtype=float32)}


  8%|▊         | 81300/1000000 [3:50:46<30:55:28,  8.25it/s]

{'loss': Array(0.14706466, dtype=float32), 'loss_cross_entropy': Array(0.13822511, dtype=float32)}
{'loss_inverse': Array(3.287561e-05, dtype=float32)}


  8%|▊         | 81310/1000000 [3:50:48<25:42:37,  9.93it/s]

{'loss': Array(0.14854293, dtype=float32), 'loss_cross_entropy': Array(0.14058352, dtype=float32)}
{'loss_inverse': Array(2.7125285e-05, dtype=float32)}


  8%|▊         | 81320/1000000 [3:50:49<33:42:46,  7.57it/s]

{'loss': Array(0.14048384, dtype=float32), 'loss_cross_entropy': Array(0.13216506, dtype=float32)}
{'loss_inverse': Array(4.2792537e-05, dtype=float32)}


  8%|▊         | 81330/1000000 [3:50:51<26:19:22,  9.69it/s]

{'loss': Array(0.15151536, dtype=float32), 'loss_cross_entropy': Array(0.14272664, dtype=float32)}
{'loss_inverse': Array(6.4062197e-06, dtype=float32)}


  8%|▊         | 81340/1000000 [3:50:53<33:38:19,  7.59it/s]

{'loss': Array(0.15440217, dtype=float32), 'loss_cross_entropy': Array(0.1460378, dtype=float32)}
{'loss_inverse': Array(6.225734e-05, dtype=float32)}


  8%|▊         | 81350/1000000 [3:50:54<26:01:40,  9.80it/s]

{'loss': Array(0.14350107, dtype=float32), 'loss_cross_entropy': Array(0.13515063, dtype=float32)}
{'loss_inverse': Array(2.536577e-06, dtype=float32)}


  8%|▊         | 81360/1000000 [3:50:55<25:09:49, 10.14it/s]

{'loss': Array(0.1262031, dtype=float32), 'loss_cross_entropy': Array(0.11862209, dtype=float32)}
{'loss_inverse': Array(0.00016704, dtype=float32)}


  8%|▊         | 81370/1000000 [3:50:57<31:47:16,  8.03it/s]

{'loss': Array(0.1559345, dtype=float32), 'loss_cross_entropy': Array(0.14741682, dtype=float32)}
{'loss_inverse': Array(2.7236869e-05, dtype=float32)}


  8%|▊         | 81380/1000000 [3:50:58<25:42:17,  9.93it/s]

{'loss': Array(0.15259159, dtype=float32), 'loss_cross_entropy': Array(0.14425671, dtype=float32)}
{'loss_inverse': Array(2.9275518e-05, dtype=float32)}


  8%|▊         | 81390/1000000 [3:51:00<37:46:43,  6.75it/s]

{'loss': Array(0.15914251, dtype=float32), 'loss_cross_entropy': Array(0.15006915, dtype=float32)}
{'loss_inverse': Array(0.00012248, dtype=float32)}


  8%|▊         | 81400/1000000 [3:51:02<28:17:25,  9.02it/s]

{'loss': Array(0.14800055, dtype=float32), 'loss_cross_entropy': Array(0.13928853, dtype=float32)}
{'loss_inverse': Array(0.00018012, dtype=float32)}


  8%|▊         | 81410/1000000 [3:51:03<25:28:48, 10.01it/s]

{'loss': Array(0.1454731, dtype=float32), 'loss_cross_entropy': Array(0.13714804, dtype=float32)}
{'loss_inverse': Array(7.668309e-07, dtype=float32)}


  8%|▊         | 81420/1000000 [3:51:05<27:51:00,  9.16it/s]

{'loss': Array(0.14169703, dtype=float32), 'loss_cross_entropy': Array(0.13359599, dtype=float32)}
{'loss_inverse': Array(9.2420894e-05, dtype=float32)}


  8%|▊         | 81430/1000000 [3:51:06<26:18:10,  9.70it/s]

{'loss': Array(0.14512402, dtype=float32), 'loss_cross_entropy': Array(0.13723652, dtype=float32)}
{'loss_inverse': Array(0.00028329, dtype=float32)}


  8%|▊         | 81440/1000000 [3:51:08<29:27:11,  8.66it/s]

{'loss': Array(0.15870224, dtype=float32), 'loss_cross_entropy': Array(0.1492455, dtype=float32)}
{'loss_inverse': Array(1.0026877e-06, dtype=float32)}


  8%|▊         | 81450/1000000 [3:51:10<25:59:26,  9.82it/s]

{'loss': Array(0.14736775, dtype=float32), 'loss_cross_entropy': Array(0.13892047, dtype=float32)}
{'loss_inverse': Array(4.2842472e-05, dtype=float32)}


  8%|▊         | 81460/1000000 [3:51:11<31:41:52,  8.05it/s]

{'loss': Array(0.13930641, dtype=float32), 'loss_cross_entropy': Array(0.13141657, dtype=float32)}
{'loss_inverse': Array(3.5461858e-06, dtype=float32)}


  8%|▊         | 81470/1000000 [3:51:13<26:48:54,  9.52it/s]

{'loss': Array(0.13942777, dtype=float32), 'loss_cross_entropy': Array(0.13041428, dtype=float32)}
{'loss_inverse': Array(2.4761368e-05, dtype=float32)}


  8%|▊         | 81480/1000000 [3:51:15<32:36:45,  7.82it/s]

{'loss': Array(0.13632338, dtype=float32), 'loss_cross_entropy': Array(0.12856229, dtype=float32)}
{'loss_inverse': Array(4.0180956e-05, dtype=float32)}


  8%|▊         | 81490/1000000 [3:51:16<26:02:52,  9.80it/s]

{'loss': Array(0.13512874, dtype=float32), 'loss_cross_entropy': Array(0.12725495, dtype=float32)}
{'loss_inverse': Array(2.0399102e-06, dtype=float32)}


  8%|▊         | 81500/1000000 [3:51:18<38:25:20,  6.64it/s]

{'loss': Array(0.15422502, dtype=float32), 'loss_cross_entropy': Array(0.14570156, dtype=float32)}
{'loss_inverse': Array(0.0001374, dtype=float32)}


  8%|▊         | 81510/1000000 [3:51:26<87:27:06,  2.92it/s] 

{'loss': Array(0.14629911, dtype=float32), 'loss_cross_entropy': Array(0.1379898, dtype=float32)}
{'loss_inverse': Array(0.00049257, dtype=float32)}


  8%|▊         | 81520/1000000 [3:51:27<35:22:37,  7.21it/s]

{'loss': Array(0.13361807, dtype=float32), 'loss_cross_entropy': Array(0.125455, dtype=float32)}
{'loss_inverse': Array(1.05157105e-05, dtype=float32)}


  8%|▊         | 81530/1000000 [3:51:29<36:04:04,  7.07it/s]

{'loss': Array(0.1399524, dtype=float32), 'loss_cross_entropy': Array(0.13173974, dtype=float32)}
{'loss_inverse': Array(3.8391258e-07, dtype=float32)}


  8%|▊         | 81540/1000000 [3:51:30<26:40:29,  9.56it/s]

{'loss': Array(0.15122095, dtype=float32), 'loss_cross_entropy': Array(0.14298771, dtype=float32)}
{'loss_inverse': Array(2.053746e-05, dtype=float32)}


  8%|▊         | 81550/1000000 [3:51:32<38:11:54,  6.68it/s]

{'loss': Array(0.14645837, dtype=float32), 'loss_cross_entropy': Array(0.13818787, dtype=float32)}
{'loss_inverse': Array(3.5769077e-07, dtype=float32)}


  8%|▊         | 81560/1000000 [3:51:34<27:26:01,  9.30it/s]

{'loss': Array(0.13956137, dtype=float32), 'loss_cross_entropy': Array(0.13164304, dtype=float32)}
{'loss_inverse': Array(1.7981569e-05, dtype=float32)}


  8%|▊         | 81570/1000000 [3:51:35<37:57:56,  6.72it/s]

{'loss': Array(0.13784628, dtype=float32), 'loss_cross_entropy': Array(0.1301561, dtype=float32)}
{'loss_inverse': Array(3.8019996e-06, dtype=float32)}


  8%|▊         | 81580/1000000 [3:51:37<27:52:51,  9.15it/s]

{'loss': Array(0.11766278, dtype=float32), 'loss_cross_entropy': Array(0.11025152, dtype=float32)}
{'loss_inverse': Array(7.7401955e-06, dtype=float32)}


  8%|▊         | 81590/1000000 [3:51:38<26:10:10,  9.75it/s]

{'loss': Array(0.15478697, dtype=float32), 'loss_cross_entropy': Array(0.14615704, dtype=float32)}
{'loss_inverse': Array(5.7186444e-05, dtype=float32)}


  8%|▊         | 81600/1000000 [3:51:40<32:01:26,  7.97it/s]

{'loss': Array(0.14797235, dtype=float32), 'loss_cross_entropy': Array(0.13952108, dtype=float32)}
{'loss_inverse': Array(0.00019818, dtype=float32)}


  8%|▊         | 81610/1000000 [3:51:42<25:55:42,  9.84it/s]

{'loss': Array(0.14389776, dtype=float32), 'loss_cross_entropy': Array(0.13580243, dtype=float32)}
{'loss_inverse': Array(5.1325696e-06, dtype=float32)}


  8%|▊         | 81620/1000000 [3:51:43<38:05:53,  6.70it/s]

{'loss': Array(0.1419235, dtype=float32), 'loss_cross_entropy': Array(0.13366728, dtype=float32)}
{'loss_inverse': Array(5.4097425e-05, dtype=float32)}


  8%|▊         | 81630/1000000 [3:51:45<27:51:21,  9.16it/s]

{'loss': Array(0.13492915, dtype=float32), 'loss_cross_entropy': Array(0.12697397, dtype=float32)}
{'loss_inverse': Array(3.111633e-05, dtype=float32)}


  8%|▊         | 81640/1000000 [3:51:46<38:23:35,  6.64it/s]

{'loss': Array(0.13950315, dtype=float32), 'loss_cross_entropy': Array(0.13122432, dtype=float32)}
{'loss_inverse': Array(0.00026406, dtype=float32)}


  8%|▊         | 81650/1000000 [3:51:48<27:44:20,  9.20it/s]

{'loss': Array(0.16530356, dtype=float32), 'loss_cross_entropy': Array(0.1558397, dtype=float32)}
{'loss_inverse': Array(1.6032876e-05, dtype=float32)}


  8%|▊         | 81660/1000000 [3:51:49<25:31:43,  9.99it/s]

{'loss': Array(0.15258038, dtype=float32), 'loss_cross_entropy': Array(0.14462213, dtype=float32)}
{'loss_inverse': Array(1.1141299e-06, dtype=float32)}


  8%|▊         | 81670/1000000 [3:51:51<33:41:50,  7.57it/s]

{'loss': Array(0.12526332, dtype=float32), 'loss_cross_entropy': Array(0.1174924, dtype=float32)}
{'loss_inverse': Array(0.00012475, dtype=float32)}


  8%|▊         | 81680/1000000 [3:51:53<26:08:42,  9.76it/s]

{'loss': Array(0.13885258, dtype=float32), 'loss_cross_entropy': Array(0.13083674, dtype=float32)}
{'loss_inverse': Array(0.00019241, dtype=float32)}


  8%|▊         | 81690/1000000 [3:51:54<24:51:46, 10.26it/s]

{'loss': Array(0.16550188, dtype=float32), 'loss_cross_entropy': Array(0.15638784, dtype=float32)}
{'loss_inverse': Array(7.017483e-05, dtype=float32)}


  8%|▊         | 81700/1000000 [3:51:56<28:06:47,  9.07it/s]

{'loss': Array(0.13907671, dtype=float32), 'loss_cross_entropy': Array(0.13112296, dtype=float32)}
{'loss_inverse': Array(2.4811372e-06, dtype=float32)}


  8%|▊         | 81710/1000000 [3:51:57<26:06:44,  9.77it/s]

{'loss': Array(0.14171098, dtype=float32), 'loss_cross_entropy': Array(0.13362098, dtype=float32)}
{'loss_inverse': Array(0.000267, dtype=float32)}


  8%|▊         | 81720/1000000 [3:51:59<29:01:55,  8.79it/s]

{'loss': Array(0.12596598, dtype=float32), 'loss_cross_entropy': Array(0.11885851, dtype=float32)}
{'loss_inverse': Array(6.810182e-05, dtype=float32)}


  8%|▊         | 81730/1000000 [3:52:00<26:41:48,  9.55it/s]

{'loss': Array(0.15005559, dtype=float32), 'loss_cross_entropy': Array(0.14190865, dtype=float32)}
{'loss_inverse': Array(9.983232e-05, dtype=float32)}


  8%|▊         | 81740/1000000 [3:52:02<29:44:44,  8.58it/s]

{'loss': Array(0.15680774, dtype=float32), 'loss_cross_entropy': Array(0.14807348, dtype=float32)}
{'loss_inverse': Array(3.186811e-06, dtype=float32)}


  8%|▊         | 81750/1000000 [3:52:03<25:55:37,  9.84it/s]

{'loss': Array(0.1559603, dtype=float32), 'loss_cross_entropy': Array(0.14729169, dtype=float32)}
{'loss_inverse': Array(0.00015304, dtype=float32)}


  8%|▊         | 81760/1000000 [3:52:05<29:12:50,  8.73it/s]

{'loss': Array(0.14220391, dtype=float32), 'loss_cross_entropy': Array(0.13410766, dtype=float32)}
{'loss_inverse': Array(0.00012358, dtype=float32)}


  8%|▊         | 81770/1000000 [3:52:07<26:04:46,  9.78it/s]

{'loss': Array(0.13144143, dtype=float32), 'loss_cross_entropy': Array(0.12340014, dtype=float32)}
{'loss_inverse': Array(6.28983e-05, dtype=float32)}


  8%|▊         | 81780/1000000 [3:52:08<34:44:15,  7.34it/s]

{'loss': Array(0.14947936, dtype=float32), 'loss_cross_entropy': Array(0.1414466, dtype=float32)}
{'loss_inverse': Array(8.235785e-05, dtype=float32)}


  8%|▊         | 81790/1000000 [3:52:10<26:49:06,  9.51it/s]

{'loss': Array(0.13617304, dtype=float32), 'loss_cross_entropy': Array(0.127993, dtype=float32)}
{'loss_inverse': Array(8.9395086e-05, dtype=float32)}


  8%|▊         | 81800/1000000 [3:52:12<37:21:49,  6.83it/s]

{'loss': Array(0.14582358, dtype=float32), 'loss_cross_entropy': Array(0.13778107, dtype=float32)}
{'loss_inverse': Array(4.678094e-05, dtype=float32)}


  8%|▊         | 81810/1000000 [3:52:13<26:29:14,  9.63it/s]

{'loss': Array(0.16094688, dtype=float32), 'loss_cross_entropy': Array(0.15168579, dtype=float32)}
{'loss_inverse': Array(1.3160136e-05, dtype=float32)}


  8%|▊         | 81820/1000000 [3:52:15<34:40:33,  7.36it/s]

{'loss': Array(0.14741933, dtype=float32), 'loss_cross_entropy': Array(0.13906685, dtype=float32)}
{'loss_inverse': Array(1.6119626e-06, dtype=float32)}


  8%|▊         | 81830/1000000 [3:52:16<26:37:40,  9.58it/s]

{'loss': Array(0.15404509, dtype=float32), 'loss_cross_entropy': Array(0.14545979, dtype=float32)}
{'loss_inverse': Array(1.3411006e-05, dtype=float32)}


  8%|▊         | 81840/1000000 [3:52:17<24:50:55, 10.26it/s]

{'loss': Array(0.1421511, dtype=float32), 'loss_cross_entropy': Array(0.13383254, dtype=float32)}
{'loss_inverse': Array(0.00010767, dtype=float32)}


  8%|▊         | 81850/1000000 [3:52:19<29:04:49,  8.77it/s]

{'loss': Array(0.13491793, dtype=float32), 'loss_cross_entropy': Array(0.12677418, dtype=float32)}
{'loss_inverse': Array(9.266819e-06, dtype=float32)}


  8%|▊         | 81860/1000000 [3:52:21<25:48:13,  9.88it/s]

{'loss': Array(0.1429127, dtype=float32), 'loss_cross_entropy': Array(0.13481764, dtype=float32)}
{'loss_inverse': Array(0.00027187, dtype=float32)}


  8%|▊         | 81870/1000000 [3:52:22<31:21:19,  8.13it/s]

{'loss': Array(0.1518099, dtype=float32), 'loss_cross_entropy': Array(0.14276621, dtype=float32)}
{'loss_inverse': Array(4.238774e-05, dtype=float32)}


  8%|▊         | 81880/1000000 [3:52:24<26:06:51,  9.77it/s]

{'loss': Array(0.12766086, dtype=float32), 'loss_cross_entropy': Array(0.12026095, dtype=float32)}
{'loss_inverse': Array(9.4523675e-05, dtype=float32)}


  8%|▊         | 81890/1000000 [3:52:26<33:47:00,  7.55it/s]

{'loss': Array(0.14641443, dtype=float32), 'loss_cross_entropy': Array(0.13800122, dtype=float32)}
{'loss_inverse': Array(9.910943e-05, dtype=float32)}


  8%|▊         | 81900/1000000 [3:52:27<26:54:27,  9.48it/s]

{'loss': Array(0.14860515, dtype=float32), 'loss_cross_entropy': Array(0.14007558, dtype=float32)}
{'loss_inverse': Array(0.00016222, dtype=float32)}


  8%|▊         | 81910/1000000 [3:52:29<34:35:51,  7.37it/s]

{'loss': Array(0.14538682, dtype=float32), 'loss_cross_entropy': Array(0.1369227, dtype=float32)}
{'loss_inverse': Array(0.00011998, dtype=float32)}


  8%|▊         | 81920/1000000 [3:52:30<26:57:04,  9.46it/s]

{'loss': Array(0.12827457, dtype=float32), 'loss_cross_entropy': Array(0.12085419, dtype=float32)}
{'loss_inverse': Array(0.00020558, dtype=float32)}


  8%|▊         | 81930/1000000 [3:52:32<26:02:24,  9.79it/s]

{'loss': Array(0.13058804, dtype=float32), 'loss_cross_entropy': Array(0.1226209, dtype=float32)}
{'loss_inverse': Array(1.8458517e-05, dtype=float32)}


  8%|▊         | 81940/1000000 [3:52:33<31:16:12,  8.16it/s]

{'loss': Array(0.14763121, dtype=float32), 'loss_cross_entropy': Array(0.13905947, dtype=float32)}
{'loss_inverse': Array(4.518038e-05, dtype=float32)}


  8%|▊         | 81950/1000000 [3:52:35<25:56:18,  9.83it/s]

{'loss': Array(0.14001541, dtype=float32), 'loss_cross_entropy': Array(0.1319871, dtype=float32)}
{'loss_inverse': Array(1.0879597e-05, dtype=float32)}


  8%|▊         | 81960/1000000 [3:52:37<37:36:49,  6.78it/s]

{'loss': Array(0.14350374, dtype=float32), 'loss_cross_entropy': Array(0.13489437, dtype=float32)}
{'loss_inverse': Array(5.8365305e-05, dtype=float32)}


  8%|▊         | 81970/1000000 [3:52:38<27:07:11,  9.40it/s]

{'loss': Array(0.15386446, dtype=float32), 'loss_cross_entropy': Array(0.14492649, dtype=float32)}
{'loss_inverse': Array(7.075169e-06, dtype=float32)}


  8%|▊         | 81980/1000000 [3:52:39<25:17:20, 10.08it/s]

{'loss': Array(0.14497487, dtype=float32), 'loss_cross_entropy': Array(0.13719945, dtype=float32)}
{'loss_inverse': Array(0.00015915, dtype=float32)}


  8%|▊         | 81990/1000000 [3:52:41<28:21:18,  8.99it/s]

{'loss': Array(0.12990141, dtype=float32), 'loss_cross_entropy': Array(0.12196926, dtype=float32)}
{'loss_inverse': Array(2.702993e-06, dtype=float32)}


  8%|▊         | 82000/1000000 [3:52:43<25:22:11, 10.05it/s]

{'loss': Array(0.14951476, dtype=float32), 'loss_cross_entropy': Array(0.14110243, dtype=float32)}
{'loss_inverse': Array(2.9898785e-05, dtype=float32)}


  8%|▊         | 82010/1000000 [3:52:51<89:03:08,  2.86it/s] 

{'loss': Array(0.1457886, dtype=float32), 'loss_cross_entropy': Array(0.13846055, dtype=float32)}
{'loss_inverse': Array(6.487024e-05, dtype=float32)}


  8%|▊         | 82020/1000000 [3:52:52<36:15:48,  7.03it/s]

{'loss': Array(0.15198821, dtype=float32), 'loss_cross_entropy': Array(0.14364527, dtype=float32)}
{'loss_inverse': Array(5.9228787e-05, dtype=float32)}


  8%|▊         | 82030/1000000 [3:52:54<31:36:41,  8.07it/s]

{'loss': Array(0.15030363, dtype=float32), 'loss_cross_entropy': Array(0.14188616, dtype=float32)}
{'loss_inverse': Array(4.9588365e-05, dtype=float32)}


  8%|▊         | 82040/1000000 [3:52:56<26:13:00,  9.73it/s]

{'loss': Array(0.16237372, dtype=float32), 'loss_cross_entropy': Array(0.15393634, dtype=float32)}
{'loss_inverse': Array(3.5849062e-06, dtype=float32)}


  8%|▊         | 82050/1000000 [3:52:57<37:28:44,  6.80it/s]

{'loss': Array(0.13892646, dtype=float32), 'loss_cross_entropy': Array(0.13079311, dtype=float32)}
{'loss_inverse': Array(0.00015646, dtype=float32)}


  8%|▊         | 82060/1000000 [3:52:59<27:12:55,  9.37it/s]

{'loss': Array(0.14816046, dtype=float32), 'loss_cross_entropy': Array(0.13944325, dtype=float32)}
{'loss_inverse': Array(1.3251551e-05, dtype=float32)}


  8%|▊         | 82070/1000000 [3:53:00<25:16:21, 10.09it/s]

{'loss': Array(0.13135754, dtype=float32), 'loss_cross_entropy': Array(0.12395468, dtype=float32)}
{'loss_inverse': Array(2.3533936e-05, dtype=float32)}


  8%|▊         | 82080/1000000 [3:53:02<29:43:00,  8.58it/s]

{'loss': Array(0.13460825, dtype=float32), 'loss_cross_entropy': Array(0.12689458, dtype=float32)}
{'loss_inverse': Array(0.00019337, dtype=float32)}


  8%|▊         | 82090/1000000 [3:53:03<25:59:37,  9.81it/s]

{'loss': Array(0.15069829, dtype=float32), 'loss_cross_entropy': Array(0.14179449, dtype=float32)}
{'loss_inverse': Array(3.6426558e-05, dtype=float32)}


  8%|▊         | 82100/1000000 [3:53:05<29:54:23,  8.53it/s]

{'loss': Array(0.15292998, dtype=float32), 'loss_cross_entropy': Array(0.14429936, dtype=float32)}
{'loss_inverse': Array(6.673473e-05, dtype=float32)}


  8%|▊         | 82110/1000000 [3:53:07<26:39:55,  9.56it/s]

{'loss': Array(0.14846897, dtype=float32), 'loss_cross_entropy': Array(0.13998495, dtype=float32)}
{'loss_inverse': Array(0.00011045, dtype=float32)}


  8%|▊         | 82120/1000000 [3:53:08<32:04:22,  7.95it/s]

{'loss': Array(0.15440898, dtype=float32), 'loss_cross_entropy': Array(0.14501503, dtype=float32)}
{'loss_inverse': Array(0.00011468, dtype=float32)}


  8%|▊         | 82130/1000000 [3:53:10<26:34:47,  9.59it/s]

{'loss': Array(0.16967182, dtype=float32), 'loss_cross_entropy': Array(0.16123746, dtype=float32)}
{'loss_inverse': Array(0.00019096, dtype=float32)}


  8%|▊         | 82140/1000000 [3:53:11<24:55:21, 10.23it/s]

{'loss': Array(0.1353159, dtype=float32), 'loss_cross_entropy': Array(0.12801285, dtype=float32)}
{'loss_inverse': Array(5.574299e-07, dtype=float32)}


  8%|▊         | 82150/1000000 [3:53:13<27:52:45,  9.15it/s]

{'loss': Array(0.14325662, dtype=float32), 'loss_cross_entropy': Array(0.13516214, dtype=float32)}
{'loss_inverse': Array(1.4603504e-05, dtype=float32)}


  8%|▊         | 82160/1000000 [3:53:14<25:15:58, 10.09it/s]

{'loss': Array(0.14916341, dtype=float32), 'loss_cross_entropy': Array(0.1398273, dtype=float32)}
{'loss_inverse': Array(1.2619838e-05, dtype=float32)}


  8%|▊         | 82170/1000000 [3:53:16<29:32:20,  8.63it/s]

{'loss': Array(0.13496082, dtype=float32), 'loss_cross_entropy': Array(0.12707229, dtype=float32)}
{'loss_inverse': Array(0.0001822, dtype=float32)}


  8%|▊         | 82180/1000000 [3:53:18<25:58:03,  9.82it/s]

{'loss': Array(0.16075075, dtype=float32), 'loss_cross_entropy': Array(0.15136729, dtype=float32)}
{'loss_inverse': Array(4.7664403e-07, dtype=float32)}


  8%|▊         | 82190/1000000 [3:53:19<31:25:03,  8.11it/s]

{'loss': Array(0.1141836, dtype=float32), 'loss_cross_entropy': Array(0.10735768, dtype=float32)}
{'loss_inverse': Array(1.3458622e-05, dtype=float32)}


  8%|▊         | 82200/1000000 [3:53:21<25:51:39,  9.86it/s]

{'loss': Array(0.14107595, dtype=float32), 'loss_cross_entropy': Array(0.13275243, dtype=float32)}
{'loss_inverse': Array(3.316223e-06, dtype=float32)}


  8%|▊         | 82210/1000000 [3:53:22<25:37:12,  9.95it/s]

{'loss': Array(0.15366934, dtype=float32), 'loss_cross_entropy': Array(0.1455385, dtype=float32)}
{'loss_inverse': Array(1.6161828e-05, dtype=float32)}


  8%|▊         | 82220/1000000 [3:53:24<29:50:19,  8.54it/s]

{'loss': Array(0.12920141, dtype=float32), 'loss_cross_entropy': Array(0.12167388, dtype=float32)}
{'loss_inverse': Array(1.6648054e-06, dtype=float32)}


  8%|▊         | 82230/1000000 [3:53:25<25:12:42, 10.11it/s]

{'loss': Array(0.14153017, dtype=float32), 'loss_cross_entropy': Array(0.13407356, dtype=float32)}
{'loss_inverse': Array(2.223593e-06, dtype=float32)}


  8%|▊         | 82240/1000000 [3:53:27<30:53:32,  8.25it/s]

{'loss': Array(0.15164553, dtype=float32), 'loss_cross_entropy': Array(0.1426932, dtype=float32)}
{'loss_inverse': Array(4.5841593e-06, dtype=float32)}


  8%|▊         | 82250/1000000 [3:53:29<26:03:36,  9.78it/s]

{'loss': Array(0.13699982, dtype=float32), 'loss_cross_entropy': Array(0.1296367, dtype=float32)}
{'loss_inverse': Array(1.1188046e-07, dtype=float32)}


  8%|▊         | 82260/1000000 [3:53:30<34:13:14,  7.45it/s]

{'loss': Array(0.15322423, dtype=float32), 'loss_cross_entropy': Array(0.14442784, dtype=float32)}
{'loss_inverse': Array(7.300571e-06, dtype=float32)}


  8%|▊         | 82270/1000000 [3:53:32<26:55:20,  9.47it/s]

{'loss': Array(0.15629001, dtype=float32), 'loss_cross_entropy': Array(0.14735629, dtype=float32)}
{'loss_inverse': Array(3.9141604e-08, dtype=float32)}


  8%|▊         | 82280/1000000 [3:53:33<25:29:38, 10.00it/s]

{'loss': Array(0.1477047, dtype=float32), 'loss_cross_entropy': Array(0.13927385, dtype=float32)}
{'loss_inverse': Array(2.7246264e-08, dtype=float32)}


  8%|▊         | 82290/1000000 [3:53:35<28:16:06,  9.02it/s]

{'loss': Array(0.14513656, dtype=float32), 'loss_cross_entropy': Array(0.1368927, dtype=float32)}
{'loss_inverse': Array(5.5951905e-06, dtype=float32)}


  8%|▊         | 82300/1000000 [3:53:36<25:14:35, 10.10it/s]

{'loss': Array(0.13627253, dtype=float32), 'loss_cross_entropy': Array(0.12830263, dtype=float32)}
{'loss_inverse': Array(3.7237394e-06, dtype=float32)}


  8%|▊         | 82310/1000000 [3:53:38<33:20:05,  7.65it/s]

{'loss': Array(0.14505206, dtype=float32), 'loss_cross_entropy': Array(0.13684729, dtype=float32)}
{'loss_inverse': Array(5.5947103e-06, dtype=float32)}


  8%|▊         | 82320/1000000 [3:53:39<25:59:53,  9.80it/s]

{'loss': Array(0.15543795, dtype=float32), 'loss_cross_entropy': Array(0.14714973, dtype=float32)}
{'loss_inverse': Array(0.00011076, dtype=float32)}


  8%|▊         | 82330/1000000 [3:53:41<25:26:46, 10.02it/s]

{'loss': Array(0.14984904, dtype=float32), 'loss_cross_entropy': Array(0.14127478, dtype=float32)}
{'loss_inverse': Array(3.3011165e-06, dtype=float32)}


  8%|▊         | 82340/1000000 [3:53:43<28:18:51,  9.00it/s]

{'loss': Array(0.11969161, dtype=float32), 'loss_cross_entropy': Array(0.11249758, dtype=float32)}
{'loss_inverse': Array(0.00038951, dtype=float32)}


  8%|▊         | 82350/1000000 [3:53:44<25:43:49,  9.91it/s]

{'loss': Array(0.14833921, dtype=float32), 'loss_cross_entropy': Array(0.1394719, dtype=float32)}
{'loss_inverse': Array(2.811102e-07, dtype=float32)}


  8%|▊         | 82360/1000000 [3:53:46<30:08:32,  8.46it/s]

{'loss': Array(0.1417042, dtype=float32), 'loss_cross_entropy': Array(0.13450472, dtype=float32)}
{'loss_inverse': Array(2.237708e-05, dtype=float32)}


  8%|▊         | 82370/1000000 [3:53:47<25:58:29,  9.81it/s]

{'loss': Array(0.16804573, dtype=float32), 'loss_cross_entropy': Array(0.15916047, dtype=float32)}
{'loss_inverse': Array(6.506542e-06, dtype=float32)}


  8%|▊         | 82380/1000000 [3:53:49<29:58:40,  8.50it/s]

{'loss': Array(0.14383726, dtype=float32), 'loss_cross_entropy': Array(0.1357588, dtype=float32)}
{'loss_inverse': Array(1.5504828e-06, dtype=float32)}


  8%|▊         | 82390/1000000 [3:53:51<26:06:23,  9.76it/s]

{'loss': Array(0.16141056, dtype=float32), 'loss_cross_entropy': Array(0.15273413, dtype=float32)}
{'loss_inverse': Array(8.3572755e-05, dtype=float32)}


  8%|▊         | 82400/1000000 [3:53:52<33:48:12,  7.54it/s]

{'loss': Array(0.12982759, dtype=float32), 'loss_cross_entropy': Array(0.12179499, dtype=float32)}
{'loss_inverse': Array(1.2702502e-07, dtype=float32)}


  8%|▊         | 82410/1000000 [3:53:54<26:35:18,  9.59it/s]

{'loss': Array(0.1381089, dtype=float32), 'loss_cross_entropy': Array(0.12993382, dtype=float32)}
{'loss_inverse': Array(6.670743e-05, dtype=float32)}


  8%|▊         | 82420/1000000 [3:53:55<25:44:57,  9.90it/s]

{'loss': Array(0.15346067, dtype=float32), 'loss_cross_entropy': Array(0.14486814, dtype=float32)}
{'loss_inverse': Array(1.0821897e-07, dtype=float32)}


  8%|▊         | 82430/1000000 [3:53:57<29:12:02,  8.73it/s]

{'loss': Array(0.14037214, dtype=float32), 'loss_cross_entropy': Array(0.13171884, dtype=float32)}
{'loss_inverse': Array(3.5870125e-05, dtype=float32)}


  8%|▊         | 82440/1000000 [3:53:58<26:00:20,  9.80it/s]

{'loss': Array(0.14205, dtype=float32), 'loss_cross_entropy': Array(0.13436818, dtype=float32)}
{'loss_inverse': Array(3.2824778e-07, dtype=float32)}


  8%|▊         | 82450/1000000 [3:54:00<30:24:10,  8.38it/s]

{'loss': Array(0.14908752, dtype=float32), 'loss_cross_entropy': Array(0.14065078, dtype=float32)}
{'loss_inverse': Array(2.5051339e-05, dtype=float32)}


  8%|▊         | 82460/1000000 [3:54:02<25:44:00,  9.90it/s]

{'loss': Array(0.14129233, dtype=float32), 'loss_cross_entropy': Array(0.13285303, dtype=float32)}
{'loss_inverse': Array(8.0732956e-05, dtype=float32)}


  8%|▊         | 82470/1000000 [3:54:03<30:48:47,  8.27it/s]

{'loss': Array(0.16641799, dtype=float32), 'loss_cross_entropy': Array(0.15721425, dtype=float32)}
{'loss_inverse': Array(1.1263238e-05, dtype=float32)}


  8%|▊         | 82480/1000000 [3:54:05<26:38:01,  9.57it/s]

{'loss': Array(0.13869704, dtype=float32), 'loss_cross_entropy': Array(0.13081998, dtype=float32)}
{'loss_inverse': Array(1.8249642e-06, dtype=float32)}


  8%|▊         | 82490/1000000 [3:54:06<25:35:19,  9.96it/s]

{'loss': Array(0.15343826, dtype=float32), 'loss_cross_entropy': Array(0.1448666, dtype=float32)}
{'loss_inverse': Array(9.39178e-06, dtype=float32)}


  8%|▊         | 82500/1000000 [3:54:08<30:28:24,  8.36it/s]

{'loss': Array(0.15176061, dtype=float32), 'loss_cross_entropy': Array(0.14360687, dtype=float32)}
{'loss_inverse': Array(0.00012923, dtype=float32)}


  8%|▊         | 82510/1000000 [3:54:16<84:55:22,  3.00it/s] 

{'loss': Array(0.15899251, dtype=float32), 'loss_cross_entropy': Array(0.14996144, dtype=float32)}
{'loss_inverse': Array(1.6430466e-05, dtype=float32)}


  8%|▊         | 82520/1000000 [3:54:18<47:56:12,  5.32it/s]

{'loss': Array(0.14993909, dtype=float32), 'loss_cross_entropy': Array(0.14155827, dtype=float32)}
{'loss_inverse': Array(4.500914e-06, dtype=float32)}


  8%|▊         | 82530/1000000 [3:54:19<29:07:39,  8.75it/s]

{'loss': Array(0.15596405, dtype=float32), 'loss_cross_entropy': Array(0.14715052, dtype=float32)}
{'loss_inverse': Array(2.320387e-05, dtype=float32)}


  8%|▊         | 82540/1000000 [3:54:21<25:22:38, 10.04it/s]

{'loss': Array(0.13546342, dtype=float32), 'loss_cross_entropy': Array(0.1277563, dtype=float32)}
{'loss_inverse': Array(1.9821603e-07, dtype=float32)}


  8%|▊         | 82550/1000000 [3:54:22<29:16:24,  8.71it/s]

{'loss': Array(0.13342653, dtype=float32), 'loss_cross_entropy': Array(0.1254138, dtype=float32)}
{'loss_inverse': Array(6.506702e-06, dtype=float32)}


  8%|▊         | 82560/1000000 [3:54:24<26:38:09,  9.57it/s]

{'loss': Array(0.15862195, dtype=float32), 'loss_cross_entropy': Array(0.14983553, dtype=float32)}
{'loss_inverse': Array(4.400678e-06, dtype=float32)}


  8%|▊         | 82570/1000000 [3:54:25<29:18:44,  8.69it/s]

{'loss': Array(0.15816723, dtype=float32), 'loss_cross_entropy': Array(0.14948007, dtype=float32)}
{'loss_inverse': Array(3.3952008e-07, dtype=float32)}


  8%|▊         | 82580/1000000 [3:54:27<25:45:54,  9.89it/s]

{'loss': Array(0.1496078, dtype=float32), 'loss_cross_entropy': Array(0.14095609, dtype=float32)}
{'loss_inverse': Array(2.8614752e-06, dtype=float32)}


  8%|▊         | 82590/1000000 [3:54:29<31:33:18,  8.08it/s]

{'loss': Array(0.13638018, dtype=float32), 'loss_cross_entropy': Array(0.12823735, dtype=float32)}
{'loss_inverse': Array(2.0652801e-07, dtype=float32)}


  8%|▊         | 82600/1000000 [3:54:30<26:03:03,  9.78it/s]

{'loss': Array(0.13649109, dtype=float32), 'loss_cross_entropy': Array(0.12815905, dtype=float32)}
{'loss_inverse': Array(2.5636285e-07, dtype=float32)}


  8%|▊         | 82610/1000000 [3:54:31<25:06:50, 10.15it/s]

{'loss': Array(0.13990936, dtype=float32), 'loss_cross_entropy': Array(0.13141564, dtype=float32)}
{'loss_inverse': Array(0.0001657, dtype=float32)}


  8%|▊         | 82620/1000000 [3:54:33<27:58:52,  9.11it/s]

{'loss': Array(0.1521822, dtype=float32), 'loss_cross_entropy': Array(0.14432627, dtype=float32)}
{'loss_inverse': Array(6.510281e-07, dtype=float32)}


  8%|▊         | 82630/1000000 [3:54:35<25:42:44,  9.91it/s]

{'loss': Array(0.15400411, dtype=float32), 'loss_cross_entropy': Array(0.14501344, dtype=float32)}
{'loss_inverse': Array(9.337795e-07, dtype=float32)}


  8%|▊         | 82640/1000000 [3:54:36<29:22:23,  8.68it/s]

{'loss': Array(0.1277134, dtype=float32), 'loss_cross_entropy': Array(0.12013604, dtype=float32)}
{'loss_inverse': Array(5.5393542e-05, dtype=float32)}


  8%|▊         | 82650/1000000 [3:54:38<26:01:50,  9.79it/s]

{'loss': Array(0.13615718, dtype=float32), 'loss_cross_entropy': Array(0.12803006, dtype=float32)}
{'loss_inverse': Array(1.4852475e-07, dtype=float32)}


  8%|▊         | 82660/1000000 [3:54:40<31:28:39,  8.10it/s]

{'loss': Array(0.14497517, dtype=float32), 'loss_cross_entropy': Array(0.13652633, dtype=float32)}
{'loss_inverse': Array(3.9104172e-05, dtype=float32)}


  8%|▊         | 82670/1000000 [3:54:41<26:43:10,  9.54it/s]

{'loss': Array(0.17455666, dtype=float32), 'loss_cross_entropy': Array(0.16552186, dtype=float32)}
{'loss_inverse': Array(6.6498626e-07, dtype=float32)}


  8%|▊         | 82680/1000000 [3:54:43<26:36:33,  9.58it/s]

{'loss': Array(0.1177907, dtype=float32), 'loss_cross_entropy': Array(0.11024158, dtype=float32)}
{'loss_inverse': Array(8.851277e-06, dtype=float32)}


  8%|▊         | 82682/1000000 [3:54:43<54:03:58,  4.71it/s]

In [16]:

def sampling_model(key, model, sample_eval, nb_step=100, config=None):
    """
    Function used to sampling a state from a list 
    """
    seq_len_future = config.len_seq - config.len_seq // 4 
    noise_future  = jax.random.dirichlet(key, jnp.ones(6) * 5., (config.batch_size, seq_len_future, 54))
    sample_eval["reward"] = jnp.linspace(start=-0.5, stop=0.5, num=config.batch_size)[:, None]

    for t_step in range(nb_step):
        t_step_array = jnp.ones((config.batch_size, 1, 1, 1)) * float(t_step / nb_step)
        sample_eval["context"] = jnp.concatenate([sample_eval["reward"], t_step_array[:, :, 0, 0]], axis=1)

        estimation_logits_past, estimation_logits_future = model(
            sample_eval["state_past"], noise_future, sample_eval["context"]
        )

        estimation_proba_future = jax.nn.softmax(estimation_logits_future, axis=-1)

        noise_future = noise_future + float(1. / nb_step) * 1./ (1. - t_step_array + 0.0001) * (estimation_proba_future - noise_future)

    return noise_future



In [17]:
key, subkey = jax.random.split(config.jax_key)
config.jax_key = key

buffer_eval, buffer_list_eval = dataset.fast_gathering_data_diffusion(
    env,
    vmap_reset,
    vmap_step,
    int(config.batch_size),
    config.len_seq,
    buffer_eval,
    buffer_list_eval,
    subkey,
)

sample = buffer_eval.sample(buffer_list_eval, subkey)
sample = reshape_diffusion_setup(sample, subkey)

In [23]:
key, subkey = jax.random.split(config.jax_key)
config.jax_key = key

sample = buffer.sample(buffer_list, subkey)
sample = reshape_diffusion_setup(sample, subkey)


result = sampling_model(key=config.jax_key, model=transformer, sample_eval=sample, config=config, nb_step=100)
result

Array([[[[1.09494431e-05, 1.66741665e-05, 9.99895751e-01,
          2.22197268e-05, 3.50209884e-05, 1.94163295e-05],
         [2.11554579e-05, 2.44809780e-05, 2.29226425e-05,
          9.99903977e-01, 8.82399036e-06, 1.86653342e-05],
         [2.21990049e-05, 8.59028660e-06, 2.98996456e-05,
          9.99905646e-01, 1.01229525e-05, 2.35370826e-05],
         ...,
         [1.43166399e-05, 3.20903491e-05, 9.99898672e-01,
          1.93911837e-05, 1.82657968e-05, 1.72907021e-05],
         [9.99898255e-01, 1.77349430e-05, 2.03370582e-05,
          3.52105126e-05, 1.59306219e-05, 1.25847291e-05],
         [2.01943330e-05, 8.95850826e-06, 9.99900341e-01,
          2.50139274e-05, 2.45582778e-05, 2.09209975e-05]],

        [[1.19837932e-05, 1.96152832e-05, 9.99905944e-01,
          1.66372629e-05, 2.55447812e-05, 2.02928204e-05],
         [1.41649507e-05, 1.69913983e-05, 2.56728381e-05,
          9.99911547e-01, 1.36010349e-05, 1.80726638e-05],
         [1.13089336e-05, 2.37140339e-05, 3.1119

In [24]:
index_batch  = 64

jnp.argmax(sample["state_past"], axis=-1).reshape((128, 8, 6, 3, 3))[index_batch, -1, :, :, :]

Array([[[2, 0, 1],
        [0, 0, 5],
        [2, 4, 5]],

       [[5, 1, 4],
        [2, 1, 2],
        [1, 3, 3]],

       [[3, 4, 4],
        [1, 2, 1],
        [5, 5, 1]],

       [[5, 4, 3],
        [5, 3, 0],
        [0, 1, 0]],

       [[0, 3, 1],
        [2, 4, 3],
        [4, 4, 0]],

       [[4, 5, 2],
        [3, 5, 2],
        [3, 0, 2]]], dtype=int32)

In [25]:
jnp.argmax(result, axis=-1).reshape((128, 24, 6, 3, 3))[index_batch, 0, :, :, :]

Array([[[0, 0, 1],
        [0, 0, 5],
        [3, 4, 5]],

       [[2, 1, 4],
        [0, 1, 2],
        [2, 3, 3]],

       [[3, 4, 4],
        [1, 2, 1],
        [5, 5, 1]],

       [[5, 4, 3],
        [5, 3, 3],
        [0, 1, 4]],

       [[4, 2, 0],
        [4, 4, 3],
        [0, 3, 1]],

       [[5, 5, 2],
        [2, 5, 2],
        [1, 0, 2]]], dtype=int32)

In [26]:
jnp.argmax(result, axis=-1).reshape((128, 24, 6, 3, 3))[index_batch, 1, :, :, :]

Array([[[0, 0, 5],
        [0, 0, 5],
        [3, 4, 1]],

       [[2, 1, 4],
        [0, 1, 2],
        [2, 3, 4]],

       [[5, 4, 3],
        [1, 2, 1],
        [5, 5, 1]],

       [[2, 4, 3],
        [5, 3, 3],
        [0, 1, 4]],

       [[4, 2, 0],
        [4, 4, 3],
        [0, 3, 1]],

       [[5, 5, 3],
        [2, 5, 2],
        [1, 0, 2]]], dtype=int32)

In [None]:
sample = buffer_eval.sample(buffer_list_eval, subkey)
sample = reshape_sample(sample)

TrajectoryBufferSample(experience={'action': Array([[[1.32556781e-01, 7.96739519e-01, 5.36718592e-02, ...,
         3.91646661e-03, 4.48901858e-03, 9.91594553e-01],
        [3.49070907e-01, 4.57749265e-04, 4.38157976e-01, ...,
         7.23136306e-01, 1.23497941e-01, 1.53365776e-01],
        [6.12441264e-03, 2.50436477e-02, 1.35732419e-03, ...,
         3.82237613e-01, 5.98694921e-01, 1.90675538e-02],
        ...,
        [1.41329234e-04, 2.44877161e-03, 8.43136787e-01, ...,
         2.33344346e-01, 6.42170012e-01, 1.24485560e-01],
        [6.32655225e-04, 1.77795421e-02, 9.65278149e-01, ...,
         1.25269741e-02, 3.21629345e-01, 6.65843725e-01],
        [9.08881542e-04, 1.04175135e-01, 7.50824576e-04, ...,
         9.99683421e-03, 7.89827347e-01, 2.00175866e-01]],

       [[2.03237548e-01, 7.00179100e-01, 3.63819454e-05, ...,
         9.96583939e-01, 2.39940570e-03, 1.01662707e-03],
        [7.63220847e-01, 1.11325733e-01, 3.15520242e-02, ...,
         5.45369804e-01, 4.54322606e-0

In [40]:
def generate_past_state_with_with_random_policy(key, vmap_reset, step_jit_env, config):
    """
    Generate past state with random policy

    Args:
        config: configuration object

    Returns:
        state_past: (batch_size, len_seq//4, 6, 3, 3)

    """

    key1, key2 = jax.random.split(config.jax_key)

    keys = jax.random.split(key1, config.batch_size)
    state, timestep = vmap_reset(keys)

    last_state = None
    past_state = []

    actions_all = jax.random.randint(
        key=config.jax_key,
        minval=env.action_spec.minimum,
        maxval=env.action_spec.maximum,
        shape=(config.batch_size, config.len_seq // 4, 3),
    )

    for i in range(config.len_seq // 4):

        # apply random policy and retrieve state
        action = actions_all[:, i, :]

        state, timestep  = step_jit_env(state, action)
        past_state.append(state.cube)

    # concat all the past state to get the shape (batch_size, len_seq//4, 6, 3, 3) from a list of state of size (batch_size, 6, 3, 3) by creating the 1 axis
    state_past = jnp.stack(past_state, axis=1)

    return state_past, state, actions_all

step_jit_env = jax.vmap(jit_step)

state_past, state, actions_past = generate_past_state_with_with_random_policy(key, vmap_reset, step_jit_env, config)

In [31]:
state_past.shape

(128, 8, 6, 3, 3)

In [41]:

def apply_decision_diffuser_policy(key, state_past, decision_diffuser, inverse_rl_model, config):
    """
    1. Make a estimation of the targeted reward
    2. Generate futur state with those targeted reward
    3. Choose policy from that
    """
    sample_eval = {
        "state_past": jax.nn.one_hot(state_past, 6),
    }

    state_past = jnp.copy(state_past.reshape((state_past.shape[0], state_past.shape[1], -1)))
    state_past = jax.nn.one_hot(state_past, num_classes=6)

    state_future = sampling_model(key, decision_diffuser, sample_eval, nb_step=100, config=config)

    # state_future is (batch_size, seq_len, dim_input_state / 6, 6)
    state_to_act = jnp.concatenate([state_past, state_future], axis=1)
    state_to_act_futur_t = state_to_act[:, (config.len_seq // 4 - 1):(-1), :, :]
    state_to_act_futur_td1 = state_to_act[:, (config.len_seq // 4):, :, :]

    # flatten the last 2 axis
    state_to_act_futur_t = state_to_act_futur_t.reshape(
        (state_to_act_futur_t.shape[0], state_to_act_futur_t.shape[1], -1)
    )

    state_to_act_futur_td1 = state_to_act_futur_td1.reshape(
        (state_to_act_futur_td1.shape[0], state_to_act_futur_td1.shape[1], -1)
    )

    # now use reverse RL to compute the action TODO later
    actions = inverse_rl_model(state_to_act_futur_t, state_to_act_futur_td1)

    return actions

actions_futur = apply_decision_diffuser_policy(config.jax_key, state_past, transformer, inverse_rl_model, config)

(128, 24, 9)

In [56]:

from rubiktransformer.dataset import GOAL_OBSERVATION

def gather_data_with_policy(state, state_past, actions_past, actions_futur, buffer, buffer_list, config):
    """
    For loop with those policy and state

    log performance compare to target

    """
    state_futur_list = []

    for i in range(config.len_seq - config.len_seq // 4):
        actions_step = actions_futur[:, i, :]
        actions_0 = jnp.argmax(actions_step[:, :6], axis=1)
        actions_1 = jnp.argmax(actions_step[:, 6:], axis=1)

        actions_full = jnp.stack([actions_0, jnp.zeros(config.batch_size), actions_1], axis=1)
        
        # transform to int type
        actions_full = actions_full.astype(jnp.int32)
    
        # step 
        state, timestep  = step_jit_env(state, actions_full)

        state_futur_list.append(state.cube)

    # TODO SAVE DATA into batch format for later training
    actions_0_all_futur = jnp.argmax(actions_futur[:, :, :6], axis=-1)
    actions_1_all_futur = jnp.argmax(actions_futur[:, :, 6:], axis=-1)

    action_all_futur = jnp.stack([actions_0_all_futur, jnp.zeros((config.batch_size, actions_0_all_futur.shape[1])), actions_1_all_futur], axis=-1)

    action_all = jnp.concatenate([actions_past, action_all_futur], axis=1)
    action_all = action_all.astype(jnp.int32)

    state_futur = jnp.stack(state_futur_list, axis=1)

    state_all = jnp.concatenate([state_past, state_futur], axis=1)

    # compute reward 
    goal_observation = jnp.repeat(
        GOAL_OBSERVATION[None, None, :, :, :], config.batch_size, axis=0
    )
    goal_observation = jnp.repeat(goal_observation, config.len_seq, axis=1)
    reward = jnp.where(state_all != goal_observation, -1.0, 1.0)

    reward = reward.mean(axis=[2, 3, 4])
    reward = reward[:, -1] - reward[:, config.len_seq//4]

    # add data into the buffer
    pass

    for idx_batch in range(config.batch_size):
        buffer_list = buffer.add(
            buffer_list,
            {
                "action": action_all[idx_batch],
                "reward": reward[idx_batch],
                "state_histo": state_all[idx_batch],
            },
        )

    return buffer, buffer_list

buffer, buffer_list = gather_data_with_policy(state, state_past, actions_past, actions_futur, buffer, buffer_list, config)


In [None]:


def improve_training_loop(nb_iter=10000):
    """
    Relaunch the training loop with those new data incorporated into the buffer
    
    Full stuff here
    Online transformer setup

    1. We generate env setup 
    2. First random action in the different env
    3. Use decision_diffuser to choose the action to do from here
    4. Observe / apply policy  to retrieve data
    5. Add the data into the buffer
    6. Train model on those data

    Remember to log the performance data to compare with other run / algorithms
    """
    
    for _ in range(nb_iter):

        key, subkey = jax.random.split(config.jax_key)
        config.jax_key = key

        # first generate random state
        state_past, state, actions_past = generate_past_state_with_with_random_policy(key, vmap_reset, step_jit_env, config)
        
        # apply model to get some generation
        actions_futur = apply_decision_diffuser_policy(config.jax_key, state_past, transformer, inverse_rl_model, config)

        # update replay buffer dataset
        buffer, buffer_list = gather_data_with_policy(state, state_past, actions_past, actions_futur, buffer, buffer_list, config)

        # now we can do the training loop
        sample = buffer.sample(buffer_list, subkey)
        sample = reshape_diffusion_setup(sample, subkey)

        # we update the policy
        train_step_transformer_rf(
            transformer, optimizer_diffuser, metrics_train, sample
        )

        if idx_step % config.log_every_step == 0:
            metrics_train_result = metrics_train.compute()
            print(metrics_train_result)

            wandb.log(metrics_train_result, step=idx_step)
            metrics_train.reset()


