In [1]:
# chose the current file directory as the working directory
import os
os.chdir("/teamspace/studios/this_studio/rubikscubesolver")

In [2]:
from tqdm import tqdm
import pickle

import wandb  # for logging
import time
from dataclasses import dataclass

import jax
import jax.numpy as jnp
import flax.nnx as nnx

import rubiktransformer.dataset as dataset
from rubiktransformer.trainer import reshape_sample

from rubiktransformer.trainer_online import init_model_optimizer, init_buffer, train_step_transformer_rf, training_loop
from rubiktransformer.online_training_utils import run_n_steps, reshape_diffusion_setup

cuda_plugin_extension is not found.


In [3]:
@dataclass
class Config:
    """Configuration class"""

    jax_key: jnp.ndarray = jax.random.PRNGKey(49)
    rngs = nnx.Rngs(48)
    batch_size: int = 128
    lr_1: float = 4e-4
    lr_2: float = 4e-4
    nb_games: int = 128 * 100
    len_seq: int = 32
    nb_step: int = 1000000
    max_length_buffer: int = 1024 * 100
    log_every_step: int = 10
    log_eval_every_step: int = 10
    log_policy_reward_every_step: int = 10
    add_data_every_step: int = 500

    save_model_every_step: int = 2000


config = Config()

# init wandb config
user = "forbu14"
project = "RubikTransformer"
display_name = "experiment_" + time.strftime("%Y%m%d-%H%M%S")

wandb.init(entity=user, project=project, name=display_name)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mforbu14[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [4]:

(
    optimizer_diffuser,
    optimizer_inverse,
    metrics_train,
    metrics_eval,
    metrics_inverse,
    transformer,
    inverse_rl_model,
) = init_model_optimizer(config)

env, buffer, buffer_eval, buffer_list, buffer_list_eval, jit_step = init_buffer(
    config
)

vmap_reset = jax.vmap(jax.jit(env.reset))
vmap_step = jax.vmap(run_n_steps, in_axes=(0, 0, None))

##### TRAINING #####
key, subkey = jax.random.split(config.jax_key)
config.jax_key = key


In [5]:

buffer, buffer_list = dataset.fast_gathering_data_diffusion(
    env,
    vmap_reset,
    vmap_step,
    config.nb_games * 1,  # old is int(config.nb_games * 10.0),
    config.len_seq,
    buffer,
    buffer_list,
    subkey,
)


In [7]:
# load weight from world model transformer:
import pickle

filename = "state_ddt_model_improved_v2.pickle"

with open(filename, "rb") as input_file:
    state = pickle.load(input_file)

nnx.update(transformer, state)

# load weight from world model transformer:
import pickle

filename = "state_inverse_rl_model_improved_v2.pickle"

with open(filename, "rb") as input_file:
    state = pickle.load(input_file)

nnx.update(inverse_rl_model, state)

In [8]:
sample = buffer.sample(buffer_list, subkey)
sample = reshape_diffusion_setup(sample)


In [13]:
sample.keys()

dict_keys(['action', 'reward', 'state_histo', 'time_step', 'context', 'state_past', 'state_future', 'state_future_noise', 'action_inverse', 'state_histo_inverse_t', 'state_histo_inverse_td1'])

In [19]:
sample["action_inverse"]

Array([[0., 0., 1., ..., 1., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [1., 0., 0., ..., 1., 0., 0.],
       [1., 0., 0., ..., 1., 0., 0.]], dtype=float32)

In [21]:
sample["state_histo_inverse_t"]

Array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [22]:
sample["state_histo_inverse_td1"]

Array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [24]:
inverse_rl_model(sample["state_histo_inverse_t"], sample["state_histo_inverse_td1"])[:, :6]

Array([[-48.623276 ,  -9.329204 ,  70.21669  , -22.916101 , -31.451435 ,
        -10.404979 ],
       [-31.948256 , -48.7778   ,  -5.5023904,  56.92978  ,  -9.326402 ,
         -9.130017 ],
       [-59.59993  , -53.93168  , -33.287945 ,  75.91284  ,   0.7346423,
        -19.287275 ],
       ...,
       [-37.839226 , -65.764305 , -17.716326 ,  63.848442 ,  -1.0198423,
        -11.484803 ],
       [ 73.249626 , -21.477034 , -41.02303  , -35.555355 , -27.016033 ,
        -17.915022 ],
       [ 72.79475  , -17.908035 , -51.450764 , -27.936342 , -31.690832 ,
        -18.683146 ]], dtype=float32)

In [8]:
training_loop(config,
    env,
    buffer,
    buffer_eval,
    buffer_list,
    buffer_list_eval,
    vmap_reset,
    vmap_step,
    transformer,
    optimizer_diffuser,
    metrics_train,
    metrics_eval,
    inverse_rl_model,
    optimizer_inverse,
    metrics_inverse,)

{'loss': Array(0.17635848, dtype=float32), 'loss_cross_entropy': Array(0.16696732, dtype=float32)}
{'loss_inverse': Array(8.644339e-07, dtype=float32)}


  6%|▋         | 64205/1000000 [3:02:17<37:03:15,  7.02it/s]

  6%|▋         | 64209/1000000 [3:02:17<26:40:55,  9.74it/s]

{'loss': Array(0.1592211, dtype=float32), 'loss_cross_entropy': Array(0.14973159, dtype=float32)}
{'loss_inverse': Array(3.7941223e-08, dtype=float32)}


  6%|▋         | 64219/1000000 [3:02:19<32:24:36,  8.02it/s]

{'loss': Array(0.1394069, dtype=float32), 'loss_cross_entropy': Array(0.13089067, dtype=float32)}
{'loss_inverse': Array(1.9350416e-06, dtype=float32)}


  6%|▋         | 64229/1000000 [3:02:20<27:19:28,  9.51it/s]

{'loss': Array(0.14728451, dtype=float32), 'loss_cross_entropy': Array(0.1383798, dtype=float32)}
{'loss_inverse': Array(0.00011489, dtype=float32)}


  6%|▋         | 64239/1000000 [3:02:22<38:32:25,  6.74it/s]

{'loss': Array(0.15798315, dtype=float32), 'loss_cross_entropy': Array(0.14811371, dtype=float32)}
{'loss_inverse': Array(1.7343727e-06, dtype=float32)}


  6%|▋         | 64249/1000000 [3:02:23<27:24:35,  9.48it/s]

{'loss': Array(0.16488062, dtype=float32), 'loss_cross_entropy': Array(0.15559538, dtype=float32)}
{'loss_inverse': Array(2.9616804e-05, dtype=float32)}


  6%|▋         | 64259/1000000 [3:02:25<26:04:12,  9.97it/s]

{'loss': Array(0.16273995, dtype=float32), 'loss_cross_entropy': Array(0.15280798, dtype=float32)}
{'loss_inverse': Array(7.090864e-06, dtype=float32)}


  6%|▋         | 64269/1000000 [3:02:27<30:56:07,  8.40it/s]

{'loss': Array(0.15709282, dtype=float32), 'loss_cross_entropy': Array(0.14738676, dtype=float32)}
{'loss_inverse': Array(2.6361857e-07, dtype=float32)}


  6%|▋         | 64279/1000000 [3:02:28<26:06:56,  9.95it/s]

{'loss': Array(0.16574167, dtype=float32), 'loss_cross_entropy': Array(0.15632713, dtype=float32)}
{'loss_inverse': Array(3.6505867e-07, dtype=float32)}


  6%|▋         | 64289/1000000 [3:02:30<30:19:18,  8.57it/s]

{'loss': Array(0.16021581, dtype=float32), 'loss_cross_entropy': Array(0.15082163, dtype=float32)}
{'loss_inverse': Array(4.3449977e-06, dtype=float32)}


  6%|▋         | 64299/1000000 [3:02:31<25:59:51, 10.00it/s]

{'loss': Array(0.1361056, dtype=float32), 'loss_cross_entropy': Array(0.1275112, dtype=float32)}
{'loss_inverse': Array(4.758128e-06, dtype=float32)}


  6%|▋         | 64309/1000000 [3:02:33<31:27:46,  8.26it/s]

{'loss': Array(0.15941003, dtype=float32), 'loss_cross_entropy': Array(0.15027803, dtype=float32)}
{'loss_inverse': Array(0.00012564, dtype=float32)}


  6%|▋         | 64319/1000000 [3:02:34<26:36:13,  9.77it/s]

{'loss': Array(0.15022404, dtype=float32), 'loss_cross_entropy': Array(0.14100856, dtype=float32)}
{'loss_inverse': Array(6.845929e-06, dtype=float32)}


  6%|▋         | 64329/1000000 [3:02:36<31:26:34,  8.27it/s]

{'loss': Array(0.1501526, dtype=float32), 'loss_cross_entropy': Array(0.14174055, dtype=float32)}
{'loss_inverse': Array(6.771378e-08, dtype=float32)}


  6%|▋         | 64339/1000000 [3:02:38<26:47:32,  9.70it/s]

{'loss': Array(0.17018978, dtype=float32), 'loss_cross_entropy': Array(0.1600758, dtype=float32)}
{'loss_inverse': Array(3.0957625e-07, dtype=float32)}


  6%|▋         | 64349/1000000 [3:02:39<25:41:23, 10.12it/s]

{'loss': Array(0.13698874, dtype=float32), 'loss_cross_entropy': Array(0.12815897, dtype=float32)}
{'loss_inverse': Array(3.0137105e-07, dtype=float32)}


  6%|▋         | 64359/1000000 [3:02:41<28:52:38,  9.00it/s]

{'loss': Array(0.13699324, dtype=float32), 'loss_cross_entropy': Array(0.12816973, dtype=float32)}
{'loss_inverse': Array(1.7171787e-07, dtype=float32)}


  6%|▋         | 64369/1000000 [3:02:42<26:03:26,  9.97it/s]

{'loss': Array(0.16460218, dtype=float32), 'loss_cross_entropy': Array(0.15493064, dtype=float32)}
{'loss_inverse': Array(8.223332e-06, dtype=float32)}


  6%|▋         | 64379/1000000 [3:02:44<30:32:57,  8.51it/s]

{'loss': Array(0.17333648, dtype=float32), 'loss_cross_entropy': Array(0.16364934, dtype=float32)}
{'loss_inverse': Array(2.2422635e-06, dtype=float32)}


  6%|▋         | 64389/1000000 [3:02:45<27:06:52,  9.58it/s]

{'loss': Array(0.16786222, dtype=float32), 'loss_cross_entropy': Array(0.15748902, dtype=float32)}
{'loss_inverse': Array(7.774494e-06, dtype=float32)}


  6%|▋         | 64399/1000000 [3:02:47<31:55:39,  8.14it/s]

{'loss': Array(0.15373772, dtype=float32), 'loss_cross_entropy': Array(0.14517191, dtype=float32)}
{'loss_inverse': Array(3.2928685e-07, dtype=float32)}


  6%|▋         | 64409/1000000 [3:02:49<26:36:20,  9.77it/s]

{'loss': Array(0.15341449, dtype=float32), 'loss_cross_entropy': Array(0.14412092, dtype=float32)}
{'loss_inverse': Array(1.8950273e-07, dtype=float32)}


  6%|▋         | 64419/1000000 [3:02:50<25:45:20, 10.09it/s]

{'loss': Array(0.14122555, dtype=float32), 'loss_cross_entropy': Array(0.13265048, dtype=float32)}
{'loss_inverse': Array(6.566838e-08, dtype=float32)}


  6%|▋         | 64429/1000000 [3:02:52<29:39:17,  8.76it/s]

{'loss': Array(0.15750487, dtype=float32), 'loss_cross_entropy': Array(0.14833069, dtype=float32)}
{'loss_inverse': Array(0.00026832, dtype=float32)}


  6%|▋         | 64439/1000000 [3:02:53<25:47:16, 10.08it/s]

{'loss': Array(0.13844672, dtype=float32), 'loss_cross_entropy': Array(0.1296768, dtype=float32)}
{'loss_inverse': Array(6.2657864e-07, dtype=float32)}


  6%|▋         | 64449/1000000 [3:02:55<34:40:58,  7.49it/s]

{'loss': Array(0.15013905, dtype=float32), 'loss_cross_entropy': Array(0.14093928, dtype=float32)}
{'loss_inverse': Array(1.6774259e-06, dtype=float32)}


  6%|▋         | 64459/1000000 [3:02:56<26:51:32,  9.68it/s]

{'loss': Array(0.15312783, dtype=float32), 'loss_cross_entropy': Array(0.14389497, dtype=float32)}
{'loss_inverse': Array(5.3248572e-05, dtype=float32)}


  6%|▋         | 64469/1000000 [3:02:58<39:36:11,  6.56it/s]

{'loss': Array(0.16066681, dtype=float32), 'loss_cross_entropy': Array(0.15097788, dtype=float32)}
{'loss_inverse': Array(2.724369e-07, dtype=float32)}


  6%|▋         | 64479/1000000 [3:02:59<27:37:19,  9.41it/s]

{'loss': Array(0.15578225, dtype=float32), 'loss_cross_entropy': Array(0.14649743, dtype=float32)}
{'loss_inverse': Array(9.115228e-06, dtype=float32)}


  6%|▋         | 64489/1000000 [3:03:01<39:26:52,  6.59it/s]

{'loss': Array(0.16645236, dtype=float32), 'loss_cross_entropy': Array(0.15684952, dtype=float32)}
{'loss_inverse': Array(4.471934e-06, dtype=float32)}


  6%|▋         | 64499/1000000 [3:03:03<29:02:43,  8.95it/s]

{'loss': Array(0.15719299, dtype=float32), 'loss_cross_entropy': Array(0.14762671, dtype=float32)}
{'loss_inverse': Array(6.9617624e-07, dtype=float32)}


  6%|▋         | 64509/1000000 [3:03:11<89:04:16,  2.92it/s] 

{'loss': Array(0.15574364, dtype=float32), 'loss_cross_entropy': Array(0.14652853, dtype=float32)}
{'loss_inverse': Array(7.1103773e-06, dtype=float32)}


  6%|▋         | 64519/1000000 [3:03:13<45:38:34,  5.69it/s]

{'loss': Array(0.15525584, dtype=float32), 'loss_cross_entropy': Array(0.14503348, dtype=float32)}
{'loss_inverse': Array(3.6333515e-07, dtype=float32)}


  6%|▋         | 64529/1000000 [3:03:14<28:53:29,  8.99it/s]

{'loss': Array(0.17510049, dtype=float32), 'loss_cross_entropy': Array(0.16481774, dtype=float32)}
{'loss_inverse': Array(2.7969025e-07, dtype=float32)}


  6%|▋         | 64539/1000000 [3:03:16<26:42:05,  9.73it/s]

{'loss': Array(0.1605913, dtype=float32), 'loss_cross_entropy': Array(0.15110542, dtype=float32)}
{'loss_inverse': Array(3.7123797e-07, dtype=float32)}


  6%|▋         | 64549/1000000 [3:03:17<28:56:17,  8.98it/s]

{'loss': Array(0.16328089, dtype=float32), 'loss_cross_entropy': Array(0.15313052, dtype=float32)}
{'loss_inverse': Array(1.881754e-07, dtype=float32)}


  6%|▋         | 64559/1000000 [3:03:19<26:47:38,  9.70it/s]

{'loss': Array(0.16279064, dtype=float32), 'loss_cross_entropy': Array(0.15275908, dtype=float32)}
{'loss_inverse': Array(6.6071675e-07, dtype=float32)}


  6%|▋         | 64569/1000000 [3:03:21<29:19:21,  8.86it/s]

{'loss': Array(0.16360371, dtype=float32), 'loss_cross_entropy': Array(0.15479288, dtype=float32)}
{'loss_inverse': Array(5.434195e-06, dtype=float32)}


  6%|▋         | 64579/1000000 [3:03:22<26:41:28,  9.73it/s]

{'loss': Array(0.15538001, dtype=float32), 'loss_cross_entropy': Array(0.1457671, dtype=float32)}
{'loss_inverse': Array(2.1230646e-05, dtype=float32)}


  6%|▋         | 64589/1000000 [3:03:24<29:10:09,  8.91it/s]

{'loss': Array(0.15298508, dtype=float32), 'loss_cross_entropy': Array(0.14344616, dtype=float32)}
{'loss_inverse': Array(1.1657462e-06, dtype=float32)}


  6%|▋         | 64599/1000000 [3:03:25<26:01:13,  9.99it/s]

{'loss': Array(0.16079892, dtype=float32), 'loss_cross_entropy': Array(0.15156767, dtype=float32)}
{'loss_inverse': Array(1.8276445e-06, dtype=float32)}


  6%|▋         | 64609/1000000 [3:03:27<29:51:40,  8.70it/s]

{'loss': Array(0.14693363, dtype=float32), 'loss_cross_entropy': Array(0.13767046, dtype=float32)}
{'loss_inverse': Array(7.989058e-05, dtype=float32)}


  6%|▋         | 64619/1000000 [3:03:28<26:23:43,  9.84it/s]

{'loss': Array(0.16021633, dtype=float32), 'loss_cross_entropy': Array(0.15071207, dtype=float32)}
{'loss_inverse': Array(4.3188397e-06, dtype=float32)}


  6%|▋         | 64629/1000000 [3:03:30<32:20:57,  8.03it/s]

{'loss': Array(0.14807443, dtype=float32), 'loss_cross_entropy': Array(0.13902687, dtype=float32)}
{'loss_inverse': Array(1.9552058e-06, dtype=float32)}


  6%|▋         | 64639/1000000 [3:03:32<26:32:53,  9.79it/s]

{'loss': Array(0.15388678, dtype=float32), 'loss_cross_entropy': Array(0.14471667, dtype=float32)}
{'loss_inverse': Array(2.4924273e-06, dtype=float32)}


  6%|▋         | 64649/1000000 [3:03:33<34:39:06,  7.50it/s]

{'loss': Array(0.16221456, dtype=float32), 'loss_cross_entropy': Array(0.15294254, dtype=float32)}
{'loss_inverse': Array(7.1844947e-06, dtype=float32)}


  6%|▋         | 64659/1000000 [3:03:35<27:20:14,  9.50it/s]

{'loss': Array(0.14910226, dtype=float32), 'loss_cross_entropy': Array(0.13992302, dtype=float32)}
{'loss_inverse': Array(7.166617e-05, dtype=float32)}


  6%|▋         | 64669/1000000 [3:03:36<25:22:14, 10.24it/s]

{'loss': Array(0.1530041, dtype=float32), 'loss_cross_entropy': Array(0.14360829, dtype=float32)}
{'loss_inverse': Array(0.00019769, dtype=float32)}


  6%|▋         | 64679/1000000 [3:03:38<30:05:47,  8.63it/s]

{'loss': Array(0.14893149, dtype=float32), 'loss_cross_entropy': Array(0.14045529, dtype=float32)}
{'loss_inverse': Array(4.754933e-06, dtype=float32)}


  6%|▋         | 64689/1000000 [3:03:39<25:59:31, 10.00it/s]

{'loss': Array(0.15412205, dtype=float32), 'loss_cross_entropy': Array(0.14485428, dtype=float32)}
{'loss_inverse': Array(1.3391619e-06, dtype=float32)}


  6%|▋         | 64699/1000000 [3:03:41<34:29:19,  7.53it/s]

{'loss': Array(0.15114336, dtype=float32), 'loss_cross_entropy': Array(0.14203575, dtype=float32)}
{'loss_inverse': Array(1.903631e-05, dtype=float32)}


  6%|▋         | 64709/1000000 [3:03:42<27:03:11,  9.60it/s]

{'loss': Array(0.17750601, dtype=float32), 'loss_cross_entropy': Array(0.16714895, dtype=float32)}
{'loss_inverse': Array(6.632995e-05, dtype=float32)}


  6%|▋         | 64719/1000000 [3:03:44<35:06:48,  7.40it/s]

{'loss': Array(0.14225708, dtype=float32), 'loss_cross_entropy': Array(0.13407065, dtype=float32)}
{'loss_inverse': Array(8.879735e-06, dtype=float32)}


  6%|▋         | 64729/1000000 [3:03:46<27:14:45,  9.54it/s]

{'loss': Array(0.14871088, dtype=float32), 'loss_cross_entropy': Array(0.14028847, dtype=float32)}
{'loss_inverse': Array(5.502936e-05, dtype=float32)}


  6%|▋         | 64739/1000000 [3:03:47<25:24:06, 10.23it/s]

{'loss': Array(0.14296733, dtype=float32), 'loss_cross_entropy': Array(0.13442628, dtype=float32)}
{'loss_inverse': Array(1.2097174e-06, dtype=float32)}


  6%|▋         | 64749/1000000 [3:03:49<28:25:57,  9.14it/s]

{'loss': Array(0.1413766, dtype=float32), 'loss_cross_entropy': Array(0.13269699, dtype=float32)}
{'loss_inverse': Array(2.4276707e-05, dtype=float32)}


  6%|▋         | 64759/1000000 [3:03:50<26:12:34,  9.91it/s]

{'loss': Array(0.15901868, dtype=float32), 'loss_cross_entropy': Array(0.14979601, dtype=float32)}
{'loss_inverse': Array(3.1866117e-07, dtype=float32)}


  6%|▋         | 64769/1000000 [3:03:52<39:26:50,  6.59it/s]

{'loss': Array(0.14160411, dtype=float32), 'loss_cross_entropy': Array(0.13320471, dtype=float32)}
{'loss_inverse': Array(1.8219048e-07, dtype=float32)}


  6%|▋         | 64779/1000000 [3:03:53<28:12:55,  9.21it/s]

{'loss': Array(0.1500895, dtype=float32), 'loss_cross_entropy': Array(0.14097925, dtype=float32)}
{'loss_inverse': Array(2.9167602e-05, dtype=float32)}


  6%|▋         | 64789/1000000 [3:03:55<25:36:19, 10.15it/s]

{'loss': Array(0.16809206, dtype=float32), 'loss_cross_entropy': Array(0.15857549, dtype=float32)}
{'loss_inverse': Array(1.6849259e-06, dtype=float32)}


  6%|▋         | 64799/1000000 [3:03:57<29:50:49,  8.70it/s]

{'loss': Array(0.14269012, dtype=float32), 'loss_cross_entropy': Array(0.13406734, dtype=float32)}
{'loss_inverse': Array(3.7262285e-05, dtype=float32)}


  6%|▋         | 64809/1000000 [3:03:58<26:24:42,  9.84it/s]

{'loss': Array(0.140224, dtype=float32), 'loss_cross_entropy': Array(0.1317847, dtype=float32)}
{'loss_inverse': Array(3.8361154e-07, dtype=float32)}


  6%|▋         | 64819/1000000 [3:04:00<29:53:50,  8.69it/s]

{'loss': Array(0.14646605, dtype=float32), 'loss_cross_entropy': Array(0.137155, dtype=float32)}
{'loss_inverse': Array(2.460433e-06, dtype=float32)}


  6%|▋         | 64829/1000000 [3:04:01<25:48:50, 10.06it/s]

{'loss': Array(0.14331144, dtype=float32), 'loss_cross_entropy': Array(0.13453804, dtype=float32)}
{'loss_inverse': Array(1.1728146e-06, dtype=float32)}


  6%|▋         | 64839/1000000 [3:04:03<32:05:16,  8.10it/s]

{'loss': Array(0.1456271, dtype=float32), 'loss_cross_entropy': Array(0.13713129, dtype=float32)}
{'loss_inverse': Array(0.00010779, dtype=float32)}


  6%|▋         | 64849/1000000 [3:04:04<27:22:32,  9.49it/s]

{'loss': Array(0.13892041, dtype=float32), 'loss_cross_entropy': Array(0.13034344, dtype=float32)}
{'loss_inverse': Array(4.641019e-06, dtype=float32)}


  6%|▋         | 64859/1000000 [3:04:06<26:07:44,  9.94it/s]

{'loss': Array(0.1581584, dtype=float32), 'loss_cross_entropy': Array(0.14885285, dtype=float32)}
{'loss_inverse': Array(4.383284e-06, dtype=float32)}


  6%|▋         | 64869/1000000 [3:04:08<29:55:56,  8.68it/s]

{'loss': Array(0.14727461, dtype=float32), 'loss_cross_entropy': Array(0.1383674, dtype=float32)}
{'loss_inverse': Array(8.787575e-05, dtype=float32)}


  6%|▋         | 64879/1000000 [3:04:09<26:16:40,  9.88it/s]

{'loss': Array(0.1580622, dtype=float32), 'loss_cross_entropy': Array(0.14899826, dtype=float32)}
{'loss_inverse': Array(7.426653e-06, dtype=float32)}


  6%|▋         | 64889/1000000 [3:04:11<35:07:28,  7.40it/s]

{'loss': Array(0.15690283, dtype=float32), 'loss_cross_entropy': Array(0.14729649, dtype=float32)}
{'loss_inverse': Array(0.00023917, dtype=float32)}


  6%|▋         | 64899/1000000 [3:04:12<26:50:48,  9.68it/s]

{'loss': Array(0.14201207, dtype=float32), 'loss_cross_entropy': Array(0.13304137, dtype=float32)}
{'loss_inverse': Array(0.00011416, dtype=float32)}


  6%|▋         | 64909/1000000 [3:04:14<38:27:57,  6.75it/s]

{'loss': Array(0.15047091, dtype=float32), 'loss_cross_entropy': Array(0.14129119, dtype=float32)}
{'loss_inverse': Array(1.7677263e-05, dtype=float32)}


  6%|▋         | 64919/1000000 [3:04:15<27:37:45,  9.40it/s]

{'loss': Array(0.14237121, dtype=float32), 'loss_cross_entropy': Array(0.13356079, dtype=float32)}
{'loss_inverse': Array(1.6523607e-05, dtype=float32)}


  6%|▋         | 64929/1000000 [3:04:17<39:02:38,  6.65it/s]

{'loss': Array(0.14168736, dtype=float32), 'loss_cross_entropy': Array(0.13242877, dtype=float32)}
{'loss_inverse': Array(7.307044e-07, dtype=float32)}


  6%|▋         | 64939/1000000 [3:04:19<27:43:04,  9.37it/s]

{'loss': Array(0.14628457, dtype=float32), 'loss_cross_entropy': Array(0.13785528, dtype=float32)}
{'loss_inverse': Array(1.2794296e-05, dtype=float32)}


  6%|▋         | 64949/1000000 [3:04:20<26:07:08,  9.94it/s]

{'loss': Array(0.14895162, dtype=float32), 'loss_cross_entropy': Array(0.1401282, dtype=float32)}
{'loss_inverse': Array(2.7007045e-05, dtype=float32)}


  6%|▋         | 64959/1000000 [3:04:22<35:00:28,  7.42it/s]

{'loss': Array(0.13655645, dtype=float32), 'loss_cross_entropy': Array(0.12882872, dtype=float32)}
{'loss_inverse': Array(1.030621e-06, dtype=float32)}


  6%|▋         | 64969/1000000 [3:04:23<27:05:47,  9.59it/s]

{'loss': Array(0.13976185, dtype=float32), 'loss_cross_entropy': Array(0.13098867, dtype=float32)}
{'loss_inverse': Array(2.5177421e-06, dtype=float32)}


  6%|▋         | 64979/1000000 [3:04:25<38:23:22,  6.77it/s]

{'loss': Array(0.15694024, dtype=float32), 'loss_cross_entropy': Array(0.14751923, dtype=float32)}
{'loss_inverse': Array(2.1968362e-05, dtype=float32)}


  6%|▋         | 64989/1000000 [3:04:26<27:23:31,  9.48it/s]

{'loss': Array(0.16610955, dtype=float32), 'loss_cross_entropy': Array(0.1569721, dtype=float32)}
{'loss_inverse': Array(0.00015473, dtype=float32)}


  6%|▋         | 64999/1000000 [3:04:28<25:58:38, 10.00it/s]

{'loss': Array(0.15671284, dtype=float32), 'loss_cross_entropy': Array(0.14803733, dtype=float32)}
{'loss_inverse': Array(7.525901e-05, dtype=float32)}


  7%|▋         | 65009/1000000 [3:04:36<89:46:44,  2.89it/s] 

{'loss': Array(0.140622, dtype=float32), 'loss_cross_entropy': Array(0.132417, dtype=float32)}
{'loss_inverse': Array(1.0122719e-05, dtype=float32)}


  7%|▋         | 65019/1000000 [3:04:37<35:57:11,  7.22it/s]

{'loss': Array(0.15731333, dtype=float32), 'loss_cross_entropy': Array(0.1483035, dtype=float32)}
{'loss_inverse': Array(0.00037641, dtype=float32)}


  7%|▋         | 65029/1000000 [3:04:39<30:18:14,  8.57it/s]

{'loss': Array(0.16280986, dtype=float32), 'loss_cross_entropy': Array(0.15339406, dtype=float32)}
{'loss_inverse': Array(8.408554e-06, dtype=float32)}


  7%|▋         | 65039/1000000 [3:04:41<26:26:40,  9.82it/s]

{'loss': Array(0.13703233, dtype=float32), 'loss_cross_entropy': Array(0.12839325, dtype=float32)}
{'loss_inverse': Array(0.00014688, dtype=float32)}


  7%|▋         | 65049/1000000 [3:04:42<29:39:54,  8.75it/s]

{'loss': Array(0.15207812, dtype=float32), 'loss_cross_entropy': Array(0.14283976, dtype=float32)}
{'loss_inverse': Array(0.00013932, dtype=float32)}


  7%|▋         | 65059/1000000 [3:04:44<26:20:26,  9.86it/s]

{'loss': Array(0.14453472, dtype=float32), 'loss_cross_entropy': Array(0.1356166, dtype=float32)}
{'loss_inverse': Array(1.2096369e-05, dtype=float32)}


  7%|▋         | 65069/1000000 [3:04:45<34:09:39,  7.60it/s]

{'loss': Array(0.15190384, dtype=float32), 'loss_cross_entropy': Array(0.14262334, dtype=float32)}
{'loss_inverse': Array(1.7720507e-05, dtype=float32)}


  7%|▋         | 65079/1000000 [3:04:47<27:05:41,  9.58it/s]

{'loss': Array(0.16021, dtype=float32), 'loss_cross_entropy': Array(0.15127908, dtype=float32)}
{'loss_inverse': Array(2.4059398e-06, dtype=float32)}


  7%|▋         | 65089/1000000 [3:04:49<34:53:29,  7.44it/s]

{'loss': Array(0.15631682, dtype=float32), 'loss_cross_entropy': Array(0.1471604, dtype=float32)}
{'loss_inverse': Array(9.934235e-05, dtype=float32)}


  7%|▋         | 65099/1000000 [3:04:50<26:56:02,  9.64it/s]

{'loss': Array(0.15924676, dtype=float32), 'loss_cross_entropy': Array(0.14946026, dtype=float32)}
{'loss_inverse': Array(4.2982992e-06, dtype=float32)}


  7%|▋         | 65109/1000000 [3:04:52<38:15:35,  6.79it/s]

{'loss': Array(0.1442823, dtype=float32), 'loss_cross_entropy': Array(0.13571039, dtype=float32)}
{'loss_inverse': Array(4.5013963e-05, dtype=float32)}


  7%|▋         | 65119/1000000 [3:04:53<28:10:11,  9.22it/s]

{'loss': Array(0.16189618, dtype=float32), 'loss_cross_entropy': Array(0.15300053, dtype=float32)}
{'loss_inverse': Array(1.788815e-06, dtype=float32)}


  7%|▋         | 65129/1000000 [3:04:55<26:08:31,  9.93it/s]

{'loss': Array(0.15203395, dtype=float32), 'loss_cross_entropy': Array(0.14290138, dtype=float32)}
{'loss_inverse': Array(3.5864614e-06, dtype=float32)}


  7%|▋         | 65139/1000000 [3:04:56<31:49:54,  8.16it/s]

{'loss': Array(0.1451911, dtype=float32), 'loss_cross_entropy': Array(0.13667642, dtype=float32)}
{'loss_inverse': Array(9.19931e-05, dtype=float32)}


  7%|▋         | 65149/1000000 [3:04:58<26:19:48,  9.86it/s]

{'loss': Array(0.15379255, dtype=float32), 'loss_cross_entropy': Array(0.14427832, dtype=float32)}
{'loss_inverse': Array(3.6375604e-05, dtype=float32)}


  7%|▋         | 65159/1000000 [3:05:00<38:00:17,  6.83it/s]

{'loss': Array(0.16550905, dtype=float32), 'loss_cross_entropy': Array(0.15606129, dtype=float32)}
{'loss_inverse': Array(1.5659169e-05, dtype=float32)}


  7%|▋         | 65169/1000000 [3:05:01<27:20:47,  9.50it/s]

{'loss': Array(0.15023473, dtype=float32), 'loss_cross_entropy': Array(0.14123519, dtype=float32)}
{'loss_inverse': Array(1.37726665e-05, dtype=float32)}


  7%|▋         | 65179/1000000 [3:05:02<25:45:03, 10.08it/s]

{'loss': Array(0.14510813, dtype=float32), 'loss_cross_entropy': Array(0.1366755, dtype=float32)}
{'loss_inverse': Array(2.757697e-07, dtype=float32)}


  7%|▋         | 65189/1000000 [3:05:04<29:34:09,  8.78it/s]

{'loss': Array(0.13369095, dtype=float32), 'loss_cross_entropy': Array(0.12586479, dtype=float32)}
{'loss_inverse': Array(7.4860795e-06, dtype=float32)}


  7%|▋         | 65199/1000000 [3:05:06<26:31:53,  9.79it/s]

{'loss': Array(0.13426603, dtype=float32), 'loss_cross_entropy': Array(0.12609994, dtype=float32)}
{'loss_inverse': Array(2.1962067e-05, dtype=float32)}


  7%|▋         | 65209/1000000 [3:05:07<29:42:12,  8.74it/s]

{'loss': Array(0.14674288, dtype=float32), 'loss_cross_entropy': Array(0.13761888, dtype=float32)}
{'loss_inverse': Array(7.092649e-05, dtype=float32)}


  7%|▋         | 65219/1000000 [3:05:09<26:29:19,  9.80it/s]

{'loss': Array(0.15640429, dtype=float32), 'loss_cross_entropy': Array(0.14712182, dtype=float32)}
{'loss_inverse': Array(2.821353e-06, dtype=float32)}


  7%|▋         | 65229/1000000 [3:05:10<31:49:45,  8.16it/s]

{'loss': Array(0.16802146, dtype=float32), 'loss_cross_entropy': Array(0.15859911, dtype=float32)}
{'loss_inverse': Array(1.1124979e-05, dtype=float32)}


  7%|▋         | 65239/1000000 [3:05:12<26:36:17,  9.76it/s]

{'loss': Array(0.15576132, dtype=float32), 'loss_cross_entropy': Array(0.14688487, dtype=float32)}
{'loss_inverse': Array(0.00012069, dtype=float32)}


  7%|▋         | 65249/1000000 [3:05:14<32:06:51,  8.09it/s]

{'loss': Array(0.14026636, dtype=float32), 'loss_cross_entropy': Array(0.13178967, dtype=float32)}
{'loss_inverse': Array(0.00033048, dtype=float32)}


  7%|▋         | 65259/1000000 [3:05:15<26:33:16,  9.78it/s]

{'loss': Array(0.15772472, dtype=float32), 'loss_cross_entropy': Array(0.14878936, dtype=float32)}
{'loss_inverse': Array(3.3912759e-06, dtype=float32)}


  7%|▋         | 65269/1000000 [3:05:17<25:52:18, 10.04it/s]

{'loss': Array(0.14418052, dtype=float32), 'loss_cross_entropy': Array(0.1350814, dtype=float32)}
{'loss_inverse': Array(0.00119047, dtype=float32)}


  7%|▋         | 65279/1000000 [3:05:18<28:32:30,  9.10it/s]

{'loss': Array(0.14326872, dtype=float32), 'loss_cross_entropy': Array(0.13425161, dtype=float32)}
{'loss_inverse': Array(8.684957e-05, dtype=float32)}


  7%|▋         | 65289/1000000 [3:05:20<26:17:08,  9.88it/s]

{'loss': Array(0.14528239, dtype=float32), 'loss_cross_entropy': Array(0.13599502, dtype=float32)}
{'loss_inverse': Array(6.3714956e-06, dtype=float32)}


  7%|▋         | 65299/1000000 [3:05:21<29:43:00,  8.74it/s]

{'loss': Array(0.16520496, dtype=float32), 'loss_cross_entropy': Array(0.15516777, dtype=float32)}
{'loss_inverse': Array(1.1144999e-05, dtype=float32)}


  7%|▋         | 65309/1000000 [3:05:23<26:14:20,  9.90it/s]

{'loss': Array(0.15775633, dtype=float32), 'loss_cross_entropy': Array(0.1487719, dtype=float32)}
{'loss_inverse': Array(2.3087801e-05, dtype=float32)}


  7%|▋         | 65319/1000000 [3:05:25<31:32:45,  8.23it/s]

{'loss': Array(0.1206691, dtype=float32), 'loss_cross_entropy': Array(0.11319764, dtype=float32)}
{'loss_inverse': Array(7.294542e-05, dtype=float32)}


  7%|▋         | 65329/1000000 [3:05:26<26:05:14,  9.95it/s]

{'loss': Array(0.14025033, dtype=float32), 'loss_cross_entropy': Array(0.13151997, dtype=float32)}
{'loss_inverse': Array(1.3449001e-05, dtype=float32)}


  7%|▋         | 65339/1000000 [3:05:27<25:25:56, 10.21it/s]

{'loss': Array(0.16070335, dtype=float32), 'loss_cross_entropy': Array(0.15092933, dtype=float32)}
{'loss_inverse': Array(8.973728e-06, dtype=float32)}


  7%|▋         | 65349/1000000 [3:05:29<30:59:21,  8.38it/s]

{'loss': Array(0.15620337, dtype=float32), 'loss_cross_entropy': Array(0.14679329, dtype=float32)}
{'loss_inverse': Array(1.4751279e-05, dtype=float32)}


  7%|▋         | 65359/1000000 [3:05:31<25:56:30, 10.01it/s]

{'loss': Array(0.14454977, dtype=float32), 'loss_cross_entropy': Array(0.13558266, dtype=float32)}
{'loss_inverse': Array(1.2684228e-05, dtype=float32)}


  7%|▋         | 65369/1000000 [3:05:32<34:21:40,  7.56it/s]

{'loss': Array(0.16761993, dtype=float32), 'loss_cross_entropy': Array(0.15808414, dtype=float32)}
{'loss_inverse': Array(9.67639e-05, dtype=float32)}


  7%|▋         | 65379/1000000 [3:05:34<27:56:49,  9.29it/s]

{'loss': Array(0.15492325, dtype=float32), 'loss_cross_entropy': Array(0.14604591, dtype=float32)}
{'loss_inverse': Array(1.2271213e-05, dtype=float32)}


  7%|▋         | 65389/1000000 [3:05:36<39:47:51,  6.52it/s]

{'loss': Array(0.1591752, dtype=float32), 'loss_cross_entropy': Array(0.14952138, dtype=float32)}
{'loss_inverse': Array(0.0001041, dtype=float32)}


  7%|▋         | 65399/1000000 [3:05:37<27:50:29,  9.32it/s]

{'loss': Array(0.1520056, dtype=float32), 'loss_cross_entropy': Array(0.1431804, dtype=float32)}
{'loss_inverse': Array(3.4060802e-06, dtype=float32)}


  7%|▋         | 65409/1000000 [3:05:39<39:36:42,  6.55it/s]

{'loss': Array(0.1509178, dtype=float32), 'loss_cross_entropy': Array(0.14207186, dtype=float32)}
{'loss_inverse': Array(0.00014066, dtype=float32)}


  7%|▋         | 65419/1000000 [3:05:40<27:29:26,  9.44it/s]

{'loss': Array(0.14802447, dtype=float32), 'loss_cross_entropy': Array(0.13773732, dtype=float32)}
{'loss_inverse': Array(0.00012745, dtype=float32)}


  7%|▋         | 65429/1000000 [3:05:42<25:50:17, 10.05it/s]

{'loss': Array(0.15501152, dtype=float32), 'loss_cross_entropy': Array(0.14557405, dtype=float32)}
{'loss_inverse': Array(0.00078107, dtype=float32)}


  7%|▋         | 65439/1000000 [3:05:43<34:46:12,  7.47it/s]

{'loss': Array(0.17370084, dtype=float32), 'loss_cross_entropy': Array(0.16359916, dtype=float32)}
{'loss_inverse': Array(0.00019741, dtype=float32)}


  7%|▋         | 65449/1000000 [3:05:45<27:10:06,  9.56it/s]

{'loss': Array(0.14438307, dtype=float32), 'loss_cross_entropy': Array(0.13546196, dtype=float32)}
{'loss_inverse': Array(0.00019637, dtype=float32)}


  7%|▋         | 65459/1000000 [3:05:47<38:43:40,  6.70it/s]

{'loss': Array(0.1377322, dtype=float32), 'loss_cross_entropy': Array(0.12940998, dtype=float32)}
{'loss_inverse': Array(7.951014e-05, dtype=float32)}


  7%|▋         | 65469/1000000 [3:05:48<28:06:21,  9.24it/s]

{'loss': Array(0.18470441, dtype=float32), 'loss_cross_entropy': Array(0.17452069, dtype=float32)}
{'loss_inverse': Array(3.881205e-05, dtype=float32)}


  7%|▋         | 65479/1000000 [3:05:50<25:45:22, 10.08it/s]

{'loss': Array(0.14395842, dtype=float32), 'loss_cross_entropy': Array(0.13602804, dtype=float32)}
{'loss_inverse': Array(1.0763913e-05, dtype=float32)}


  7%|▋         | 65489/1000000 [3:05:51<29:05:15,  8.92it/s]

{'loss': Array(0.15390442, dtype=float32), 'loss_cross_entropy': Array(0.14529377, dtype=float32)}
{'loss_inverse': Array(1.1205978e-05, dtype=float32)}


  7%|▋         | 65499/1000000 [3:05:53<25:49:44, 10.05it/s]

{'loss': Array(0.16465318, dtype=float32), 'loss_cross_entropy': Array(0.15492772, dtype=float32)}
{'loss_inverse': Array(5.226908e-06, dtype=float32)}


  7%|▋         | 65509/1000000 [3:06:01<95:32:48,  2.72it/s] 

{'loss': Array(0.15307847, dtype=float32), 'loss_cross_entropy': Array(0.14461446, dtype=float32)}
{'loss_inverse': Array(0.00024278, dtype=float32)}


  7%|▋         | 65519/1000000 [3:06:02<36:54:40,  7.03it/s] 

{'loss': Array(0.13807996, dtype=float32), 'loss_cross_entropy': Array(0.12920809, dtype=float32)}
{'loss_inverse': Array(0.00017259, dtype=float32)}


  7%|▋         | 65529/1000000 [3:06:04<27:15:35,  9.52it/s]

{'loss': Array(0.16697215, dtype=float32), 'loss_cross_entropy': Array(0.15746965, dtype=float32)}
{'loss_inverse': Array(5.722747e-05, dtype=float32)}


  7%|▋         | 65539/1000000 [3:06:06<28:57:33,  8.96it/s]

{'loss': Array(0.15920377, dtype=float32), 'loss_cross_entropy': Array(0.14980684, dtype=float32)}
{'loss_inverse': Array(0.00037391, dtype=float32)}


  7%|▋         | 65549/1000000 [3:06:07<25:36:05, 10.14it/s]

{'loss': Array(0.13951886, dtype=float32), 'loss_cross_entropy': Array(0.13095948, dtype=float32)}
{'loss_inverse': Array(2.3221677e-05, dtype=float32)}


  7%|▋         | 65559/1000000 [3:06:09<30:09:03,  8.61it/s]

{'loss': Array(0.15016043, dtype=float32), 'loss_cross_entropy': Array(0.14049932, dtype=float32)}
{'loss_inverse': Array(2.4777366e-05, dtype=float32)}


  7%|▋         | 65569/1000000 [3:06:10<26:14:50,  9.89it/s]

{'loss': Array(0.16186698, dtype=float32), 'loss_cross_entropy': Array(0.15265277, dtype=float32)}
{'loss_inverse': Array(9.432869e-05, dtype=float32)}


  7%|▋         | 65579/1000000 [3:06:12<31:29:51,  8.24it/s]

{'loss': Array(0.14543903, dtype=float32), 'loss_cross_entropy': Array(0.13680491, dtype=float32)}
{'loss_inverse': Array(0.00020974, dtype=float32)}


  7%|▋         | 65589/1000000 [3:06:13<26:18:44,  9.86it/s]

{'loss': Array(0.16338761, dtype=float32), 'loss_cross_entropy': Array(0.15402003, dtype=float32)}
{'loss_inverse': Array(0.00035301, dtype=float32)}


  7%|▋         | 65599/1000000 [3:06:15<31:31:00,  8.24it/s]

{'loss': Array(0.14173388, dtype=float32), 'loss_cross_entropy': Array(0.1332226, dtype=float32)}
{'loss_inverse': Array(7.501853e-05, dtype=float32)}


  7%|▋         | 65609/1000000 [3:06:16<26:14:07,  9.89it/s]

{'loss': Array(0.15408692, dtype=float32), 'loss_cross_entropy': Array(0.14513834, dtype=float32)}
{'loss_inverse': Array(1.7832665e-05, dtype=float32)}


  7%|▋         | 65619/1000000 [3:06:18<25:53:33, 10.02it/s]

{'loss': Array(0.15426476, dtype=float32), 'loss_cross_entropy': Array(0.14542341, dtype=float32)}
{'loss_inverse': Array(0.0003621, dtype=float32)}


  7%|▋         | 65629/1000000 [3:06:20<28:39:41,  9.06it/s]

{'loss': Array(0.14693275, dtype=float32), 'loss_cross_entropy': Array(0.13773286, dtype=float32)}
{'loss_inverse': Array(0.00010049, dtype=float32)}


  7%|▋         | 65639/1000000 [3:06:21<25:51:11, 10.04it/s]

{'loss': Array(0.14743726, dtype=float32), 'loss_cross_entropy': Array(0.13810848, dtype=float32)}
{'loss_inverse': Array(0.00012325, dtype=float32)}


  7%|▋         | 65649/1000000 [3:06:23<29:53:48,  8.68it/s]

{'loss': Array(0.1583095, dtype=float32), 'loss_cross_entropy': Array(0.14888106, dtype=float32)}
{'loss_inverse': Array(2.8671973e-05, dtype=float32)}


  7%|▋         | 65659/1000000 [3:06:24<27:19:51,  9.50it/s]

{'loss': Array(0.15870105, dtype=float32), 'loss_cross_entropy': Array(0.14969154, dtype=float32)}
{'loss_inverse': Array(0.00014707, dtype=float32)}


  7%|▋         | 65669/1000000 [3:06:26<32:37:46,  7.95it/s]

{'loss': Array(0.14001058, dtype=float32), 'loss_cross_entropy': Array(0.13177687, dtype=float32)}
{'loss_inverse': Array(0.00019476, dtype=float32)}


  7%|▋         | 65679/1000000 [3:06:27<27:00:52,  9.61it/s]

{'loss': Array(0.15566877, dtype=float32), 'loss_cross_entropy': Array(0.14664225, dtype=float32)}
{'loss_inverse': Array(4.4902772e-05, dtype=float32)}


  7%|▋         | 65689/1000000 [3:06:29<26:12:52,  9.90it/s]

{'loss': Array(0.15398678, dtype=float32), 'loss_cross_entropy': Array(0.14508538, dtype=float32)}
{'loss_inverse': Array(0.00016439, dtype=float32)}


  7%|▋         | 65699/1000000 [3:06:31<30:26:19,  8.53it/s]

{'loss': Array(0.1560234, dtype=float32), 'loss_cross_entropy': Array(0.1472022, dtype=float32)}
{'loss_inverse': Array(0.00011988, dtype=float32)}


  7%|▋         | 65709/1000000 [3:06:32<26:59:19,  9.62it/s]

{'loss': Array(0.16546188, dtype=float32), 'loss_cross_entropy': Array(0.15590923, dtype=float32)}
{'loss_inverse': Array(2.2386897e-05, dtype=float32)}


  7%|▋         | 65719/1000000 [3:06:34<34:57:13,  7.42it/s]

{'loss': Array(0.15334459, dtype=float32), 'loss_cross_entropy': Array(0.14391248, dtype=float32)}
{'loss_inverse': Array(1.7430386e-06, dtype=float32)}


  7%|▋         | 65729/1000000 [3:06:35<26:36:22,  9.75it/s]

{'loss': Array(0.1551489, dtype=float32), 'loss_cross_entropy': Array(0.14601065, dtype=float32)}
{'loss_inverse': Array(0.0001194, dtype=float32)}


  7%|▋         | 65739/1000000 [3:06:37<38:27:35,  6.75it/s]

{'loss': Array(0.13205202, dtype=float32), 'loss_cross_entropy': Array(0.12403028, dtype=float32)}
{'loss_inverse': Array(0.00024982, dtype=float32)}


  7%|▋         | 65749/1000000 [3:06:39<27:38:34,  9.39it/s]

{'loss': Array(0.14676645, dtype=float32), 'loss_cross_entropy': Array(0.13759342, dtype=float32)}
{'loss_inverse': Array(0.00014939, dtype=float32)}


  7%|▋         | 65759/1000000 [3:06:40<38:19:14,  6.77it/s]

{'loss': Array(0.15491372, dtype=float32), 'loss_cross_entropy': Array(0.146205, dtype=float32)}
{'loss_inverse': Array(1.4192825e-05, dtype=float32)}


  7%|▋         | 65769/1000000 [3:06:42<27:33:36,  9.42it/s]

{'loss': Array(0.14205678, dtype=float32), 'loss_cross_entropy': Array(0.1335897, dtype=float32)}
{'loss_inverse': Array(0.00013295, dtype=float32)}


  7%|▋         | 65779/1000000 [3:06:43<25:18:25, 10.25it/s]

{'loss': Array(0.15525389, dtype=float32), 'loss_cross_entropy': Array(0.14595897, dtype=float32)}
{'loss_inverse': Array(0.00010229, dtype=float32)}


  7%|▋         | 65789/1000000 [3:06:45<35:00:53,  7.41it/s]

{'loss': Array(0.14422388, dtype=float32), 'loss_cross_entropy': Array(0.13539067, dtype=float32)}
{'loss_inverse': Array(2.3363242e-05, dtype=float32)}


  7%|▋         | 65799/1000000 [3:06:46<26:59:16,  9.62it/s]

{'loss': Array(0.14924662, dtype=float32), 'loss_cross_entropy': Array(0.14010744, dtype=float32)}
{'loss_inverse': Array(0.00032383, dtype=float32)}


  7%|▋         | 65809/1000000 [3:06:48<39:10:48,  6.62it/s]

{'loss': Array(0.16664913, dtype=float32), 'loss_cross_entropy': Array(0.15725274, dtype=float32)}
{'loss_inverse': Array(8.911819e-06, dtype=float32)}


  7%|▋         | 65819/1000000 [3:06:49<27:24:00,  9.47it/s]

{'loss': Array(0.1472735, dtype=float32), 'loss_cross_entropy': Array(0.13839404, dtype=float32)}
{'loss_inverse': Array(4.793392e-05, dtype=float32)}


  7%|▋         | 65829/1000000 [3:06:51<25:37:21, 10.13it/s]

{'loss': Array(0.15842526, dtype=float32), 'loss_cross_entropy': Array(0.14869285, dtype=float32)}
{'loss_inverse': Array(7.38978e-05, dtype=float32)}


  7%|▋         | 65839/1000000 [3:06:53<28:24:17,  9.14it/s]

{'loss': Array(0.14453483, dtype=float32), 'loss_cross_entropy': Array(0.13596052, dtype=float32)}
{'loss_inverse': Array(0.00044335, dtype=float32)}


  7%|▋         | 65849/1000000 [3:06:54<26:09:14,  9.92it/s]

{'loss': Array(0.14639662, dtype=float32), 'loss_cross_entropy': Array(0.13771008, dtype=float32)}
{'loss_inverse': Array(0.00015385, dtype=float32)}


  7%|▋         | 65859/1000000 [3:06:56<34:14:10,  7.58it/s]

{'loss': Array(0.15734355, dtype=float32), 'loss_cross_entropy': Array(0.14802945, dtype=float32)}
{'loss_inverse': Array(2.5375706e-05, dtype=float32)}


  7%|▋         | 65869/1000000 [3:06:57<26:47:15,  9.69it/s]

{'loss': Array(0.17524035, dtype=float32), 'loss_cross_entropy': Array(0.1651173, dtype=float32)}
{'loss_inverse': Array(5.3903354e-06, dtype=float32)}


  7%|▋         | 65879/1000000 [3:06:58<25:25:46, 10.20it/s]

{'loss': Array(0.13675037, dtype=float32), 'loss_cross_entropy': Array(0.12815765, dtype=float32)}
{'loss_inverse': Array(0.00016066, dtype=float32)}


  7%|▋         | 65889/1000000 [3:07:00<29:57:18,  8.66it/s]

{'loss': Array(0.15929998, dtype=float32), 'loss_cross_entropy': Array(0.1500315, dtype=float32)}
{'loss_inverse': Array(4.0950254e-06, dtype=float32)}


  7%|▋         | 65899/1000000 [3:07:02<26:31:03,  9.78it/s]

{'loss': Array(0.15369956, dtype=float32), 'loss_cross_entropy': Array(0.14453249, dtype=float32)}
{'loss_inverse': Array(9.493858e-05, dtype=float32)}


  7%|▋         | 65909/1000000 [3:07:03<34:43:19,  7.47it/s]

{'loss': Array(0.12984252, dtype=float32), 'loss_cross_entropy': Array(0.12209388, dtype=float32)}
{'loss_inverse': Array(1.4655743e-06, dtype=float32)}


  7%|▋         | 65919/1000000 [3:07:05<26:52:50,  9.65it/s]

{'loss': Array(0.14294803, dtype=float32), 'loss_cross_entropy': Array(0.13452476, dtype=float32)}
{'loss_inverse': Array(6.1688415e-06, dtype=float32)}


  7%|▋         | 65929/1000000 [3:07:07<35:46:08,  7.25it/s]

{'loss': Array(0.15851812, dtype=float32), 'loss_cross_entropy': Array(0.14907931, dtype=float32)}
{'loss_inverse': Array(1.1666415e-06, dtype=float32)}


  7%|▋         | 65939/1000000 [3:07:08<27:27:46,  9.45it/s]

{'loss': Array(0.1568514, dtype=float32), 'loss_cross_entropy': Array(0.14770621, dtype=float32)}
{'loss_inverse': Array(6.612728e-06, dtype=float32)}


  7%|▋         | 65949/1000000 [3:07:10<25:57:10, 10.00it/s]

{'loss': Array(0.1536563, dtype=float32), 'loss_cross_entropy': Array(0.14453034, dtype=float32)}
{'loss_inverse': Array(9.845655e-05, dtype=float32)}


  7%|▋         | 65959/1000000 [3:07:11<28:35:29,  9.07it/s]

{'loss': Array(0.14981362, dtype=float32), 'loss_cross_entropy': Array(0.14116718, dtype=float32)}
{'loss_inverse': Array(4.6020683e-05, dtype=float32)}


  7%|▋         | 65969/1000000 [3:07:13<26:41:59,  9.72it/s]

{'loss': Array(0.14661483, dtype=float32), 'loss_cross_entropy': Array(0.13728055, dtype=float32)}
{'loss_inverse': Array(9.13335e-06, dtype=float32)}


  7%|▋         | 65979/1000000 [3:07:15<38:35:37,  6.72it/s]

{'loss': Array(0.13437131, dtype=float32), 'loss_cross_entropy': Array(0.12617452, dtype=float32)}
{'loss_inverse': Array(7.2859926e-05, dtype=float32)}


  7%|▋         | 65989/1000000 [3:07:16<27:25:11,  9.46it/s]

{'loss': Array(0.16028237, dtype=float32), 'loss_cross_entropy': Array(0.15141104, dtype=float32)}
{'loss_inverse': Array(0.00010212, dtype=float32)}


  7%|▋         | 65999/1000000 [3:07:17<26:24:47,  9.82it/s]

{'loss': Array(0.15156387, dtype=float32), 'loss_cross_entropy': Array(0.14251183, dtype=float32)}
{'loss_inverse': Array(0.0001978, dtype=float32)}


  7%|▋         | 66010/1000000 [3:07:26<73:49:48,  3.51it/s] 

{'loss': Array(0.14417008, dtype=float32), 'loss_cross_entropy': Array(0.13535094, dtype=float32)}
{'loss_inverse': Array(0.00022092, dtype=float32)}


  7%|▋         | 66020/1000000 [3:07:28<32:57:34,  7.87it/s]

{'loss': Array(0.15411377, dtype=float32), 'loss_cross_entropy': Array(0.14486478, dtype=float32)}
{'loss_inverse': Array(4.362305e-05, dtype=float32)}


  7%|▋         | 66030/1000000 [3:07:29<29:22:32,  8.83it/s]

{'loss': Array(0.15476017, dtype=float32), 'loss_cross_entropy': Array(0.14551269, dtype=float32)}
{'loss_inverse': Array(2.0671338e-05, dtype=float32)}


  7%|▋         | 66040/1000000 [3:07:31<26:03:55,  9.95it/s]

{'loss': Array(0.13830136, dtype=float32), 'loss_cross_entropy': Array(0.12945902, dtype=float32)}
{'loss_inverse': Array(2.857821e-05, dtype=float32)}


  7%|▋         | 66050/1000000 [3:07:32<31:14:58,  8.30it/s]

{'loss': Array(0.16314562, dtype=float32), 'loss_cross_entropy': Array(0.15399124, dtype=float32)}
{'loss_inverse': Array(5.1190955e-06, dtype=float32)}


  7%|▋         | 66060/1000000 [3:07:34<26:29:40,  9.79it/s]

{'loss': Array(0.13323203, dtype=float32), 'loss_cross_entropy': Array(0.12516962, dtype=float32)}
{'loss_inverse': Array(4.5628018e-05, dtype=float32)}


  7%|▋         | 66070/1000000 [3:07:36<32:06:47,  8.08it/s]

{'loss': Array(0.1314451, dtype=float32), 'loss_cross_entropy': Array(0.12357169, dtype=float32)}
{'loss_inverse': Array(3.8890444e-06, dtype=float32)}


  7%|▋         | 66080/1000000 [3:07:37<26:51:10,  9.66it/s]

{'loss': Array(0.14700817, dtype=float32), 'loss_cross_entropy': Array(0.1384867, dtype=float32)}
{'loss_inverse': Array(2.794604e-05, dtype=float32)}


  7%|▋         | 66090/1000000 [3:07:39<39:21:00,  6.59it/s]

{'loss': Array(0.14624009, dtype=float32), 'loss_cross_entropy': Array(0.13749044, dtype=float32)}
{'loss_inverse': Array(1.425313e-05, dtype=float32)}


  7%|▋         | 66100/1000000 [3:07:40<27:29:32,  9.44it/s]

{'loss': Array(0.15782586, dtype=float32), 'loss_cross_entropy': Array(0.14854787, dtype=float32)}
{'loss_inverse': Array(2.142696e-05, dtype=float32)}


  7%|▋         | 66110/1000000 [3:07:42<26:32:04,  9.78it/s]

{'loss': Array(0.14024039, dtype=float32), 'loss_cross_entropy': Array(0.13169742, dtype=float32)}
{'loss_inverse': Array(4.965408e-07, dtype=float32)}


  7%|▋         | 66120/1000000 [3:07:43<28:15:56,  9.18it/s]

{'loss': Array(0.14750022, dtype=float32), 'loss_cross_entropy': Array(0.13823192, dtype=float32)}
{'loss_inverse': Array(9.620202e-07, dtype=float32)}


  7%|▋         | 66130/1000000 [3:07:45<26:10:15,  9.91it/s]

{'loss': Array(0.1433324, dtype=float32), 'loss_cross_entropy': Array(0.13465782, dtype=float32)}
{'loss_inverse': Array(4.2879233e-06, dtype=float32)}


  7%|▋         | 66140/1000000 [3:07:47<31:29:22,  8.24it/s]

{'loss': Array(0.15157852, dtype=float32), 'loss_cross_entropy': Array(0.14158231, dtype=float32)}
{'loss_inverse': Array(7.2091166e-06, dtype=float32)}


  7%|▋         | 66150/1000000 [3:07:48<27:38:26,  9.38it/s]

{'loss': Array(0.16492291, dtype=float32), 'loss_cross_entropy': Array(0.15506111, dtype=float32)}
{'loss_inverse': Array(2.8947662e-07, dtype=float32)}


  7%|▋         | 66160/1000000 [3:07:49<25:37:34, 10.12it/s]

{'loss': Array(0.16042724, dtype=float32), 'loss_cross_entropy': Array(0.15107942, dtype=float32)}
{'loss_inverse': Array(2.068721e-06, dtype=float32)}


  7%|▋         | 66170/1000000 [3:07:51<30:07:33,  8.61it/s]

{'loss': Array(0.14453015, dtype=float32), 'loss_cross_entropy': Array(0.13550709, dtype=float32)}
{'loss_inverse': Array(0.00011886, dtype=float32)}


  7%|▋         | 66180/1000000 [3:07:53<25:50:46, 10.04it/s]

{'loss': Array(0.15566161, dtype=float32), 'loss_cross_entropy': Array(0.14695929, dtype=float32)}
{'loss_inverse': Array(1.7161994e-05, dtype=float32)}


  7%|▋         | 66190/1000000 [3:07:54<34:18:10,  7.56it/s]

{'loss': Array(0.15241177, dtype=float32), 'loss_cross_entropy': Array(0.14391445, dtype=float32)}
{'loss_inverse': Array(1.0906248e-05, dtype=float32)}


  7%|▋         | 66200/1000000 [3:07:56<26:43:40,  9.70it/s]

{'loss': Array(0.15215254, dtype=float32), 'loss_cross_entropy': Array(0.1431003, dtype=float32)}
{'loss_inverse': Array(0.00024709, dtype=float32)}


  7%|▋         | 66210/1000000 [3:07:57<38:35:49,  6.72it/s]

{'loss': Array(0.15669613, dtype=float32), 'loss_cross_entropy': Array(0.14778154, dtype=float32)}
{'loss_inverse': Array(6.616953e-05, dtype=float32)}


  7%|▋         | 66220/1000000 [3:07:59<27:45:49,  9.34it/s]

{'loss': Array(0.1479094, dtype=float32), 'loss_cross_entropy': Array(0.13919136, dtype=float32)}
{'loss_inverse': Array(1.3341313e-06, dtype=float32)}


  7%|▋         | 66230/1000000 [3:08:01<39:20:42,  6.59it/s]

{'loss': Array(0.15728235, dtype=float32), 'loss_cross_entropy': Array(0.14809434, dtype=float32)}
{'loss_inverse': Array(6.4287015e-06, dtype=float32)}


  7%|▋         | 66240/1000000 [3:08:02<27:44:46,  9.35it/s]

{'loss': Array(0.1427025, dtype=float32), 'loss_cross_entropy': Array(0.13379112, dtype=float32)}
{'loss_inverse': Array(9.858717e-05, dtype=float32)}


  7%|▋         | 66250/1000000 [3:08:04<25:19:38, 10.24it/s]

{'loss': Array(0.15410234, dtype=float32), 'loss_cross_entropy': Array(0.14504977, dtype=float32)}
{'loss_inverse': Array(0.00030159, dtype=float32)}


  7%|▋         | 66260/1000000 [3:08:05<31:17:24,  8.29it/s]

{'loss': Array(0.14244384, dtype=float32), 'loss_cross_entropy': Array(0.1338789, dtype=float32)}
{'loss_inverse': Array(1.0058415e-06, dtype=float32)}


  7%|▋         | 66270/1000000 [3:08:07<26:46:33,  9.69it/s]

{'loss': Array(0.17659548, dtype=float32), 'loss_cross_entropy': Array(0.1669467, dtype=float32)}
{'loss_inverse': Array(0.00015312, dtype=float32)}


  7%|▋         | 66280/1000000 [3:08:08<37:50:26,  6.85it/s]

{'loss': Array(0.13741347, dtype=float32), 'loss_cross_entropy': Array(0.12894659, dtype=float32)}
{'loss_inverse': Array(2.6678526e-06, dtype=float32)}


  7%|▋         | 66290/1000000 [3:08:10<27:29:44,  9.43it/s]

{'loss': Array(0.13798472, dtype=float32), 'loss_cross_entropy': Array(0.12908602, dtype=float32)}
{'loss_inverse': Array(1.1169886e-05, dtype=float32)}


  7%|▋         | 66300/1000000 [3:08:12<38:05:33,  6.81it/s]

{'loss': Array(0.16362211, dtype=float32), 'loss_cross_entropy': Array(0.15383522, dtype=float32)}
{'loss_inverse': Array(2.1574619e-05, dtype=float32)}


  7%|▋         | 66310/1000000 [3:08:13<27:19:49,  9.49it/s]

{'loss': Array(0.17121604, dtype=float32), 'loss_cross_entropy': Array(0.16126196, dtype=float32)}
{'loss_inverse': Array(6.0772345e-05, dtype=float32)}


  7%|▋         | 66320/1000000 [3:08:14<25:31:48, 10.16it/s]

{'loss': Array(0.14854465, dtype=float32), 'loss_cross_entropy': Array(0.14041133, dtype=float32)}
{'loss_inverse': Array(8.986417e-06, dtype=float32)}


  7%|▋         | 66330/1000000 [3:08:16<34:11:41,  7.58it/s]

{'loss': Array(0.15134351, dtype=float32), 'loss_cross_entropy': Array(0.14197707, dtype=float32)}
{'loss_inverse': Array(6.480906e-05, dtype=float32)}


  7%|▋         | 66340/1000000 [3:08:17<26:44:08,  9.70it/s]

{'loss': Array(0.13987921, dtype=float32), 'loss_cross_entropy': Array(0.13092488, dtype=float32)}
{'loss_inverse': Array(8.204265e-06, dtype=float32)}


  7%|▋         | 66350/1000000 [3:08:19<25:39:19, 10.11it/s]

{'loss': Array(0.16196248, dtype=float32), 'loss_cross_entropy': Array(0.1525652, dtype=float32)}
{'loss_inverse': Array(2.1100877e-05, dtype=float32)}


  7%|▋         | 66360/1000000 [3:08:21<28:44:51,  9.02it/s]

{'loss': Array(0.13814816, dtype=float32), 'loss_cross_entropy': Array(0.12955293, dtype=float32)}
{'loss_inverse': Array(2.2852459e-05, dtype=float32)}


  7%|▋         | 66370/1000000 [3:08:22<26:20:03,  9.85it/s]

{'loss': Array(0.1520101, dtype=float32), 'loss_cross_entropy': Array(0.1429974, dtype=float32)}
{'loss_inverse': Array(2.007805e-05, dtype=float32)}


  7%|▋         | 66380/1000000 [3:08:24<30:03:01,  8.63it/s]

{'loss': Array(0.16870844, dtype=float32), 'loss_cross_entropy': Array(0.15915883, dtype=float32)}
{'loss_inverse': Array(0.00017934, dtype=float32)}


  7%|▋         | 66390/1000000 [3:08:25<26:02:18,  9.96it/s]

{'loss': Array(0.14033623, dtype=float32), 'loss_cross_entropy': Array(0.13098447, dtype=float32)}
{'loss_inverse': Array(5.7151706e-06, dtype=float32)}


  7%|▋         | 66400/1000000 [3:08:27<29:52:32,  8.68it/s]

{'loss': Array(0.1666118, dtype=float32), 'loss_cross_entropy': Array(0.15660006, dtype=float32)}
{'loss_inverse': Array(6.0539034e-05, dtype=float32)}


  7%|▋         | 66410/1000000 [3:08:28<26:27:04,  9.80it/s]

{'loss': Array(0.15810965, dtype=float32), 'loss_cross_entropy': Array(0.14872979, dtype=float32)}
{'loss_inverse': Array(5.0614006e-05, dtype=float32)}


  7%|▋         | 66420/1000000 [3:08:30<30:01:14,  8.64it/s]

{'loss': Array(0.16549578, dtype=float32), 'loss_cross_entropy': Array(0.15638028, dtype=float32)}
{'loss_inverse': Array(6.152143e-06, dtype=float32)}


  7%|▋         | 66430/1000000 [3:08:32<27:09:06,  9.55it/s]

{'loss': Array(0.14768973, dtype=float32), 'loss_cross_entropy': Array(0.139461, dtype=float32)}
{'loss_inverse': Array(3.768787e-05, dtype=float32)}


  7%|▋         | 66440/1000000 [3:08:33<34:40:50,  7.48it/s]

{'loss': Array(0.14550716, dtype=float32), 'loss_cross_entropy': Array(0.13644479, dtype=float32)}
{'loss_inverse': Array(0.0001416, dtype=float32)}


  7%|▋         | 66450/1000000 [3:08:35<27:25:00,  9.46it/s]

{'loss': Array(0.15132524, dtype=float32), 'loss_cross_entropy': Array(0.1423348, dtype=float32)}
{'loss_inverse': Array(3.0301147e-05, dtype=float32)}


  7%|▋         | 66460/1000000 [3:08:37<38:25:49,  6.75it/s]

{'loss': Array(0.1662869, dtype=float32), 'loss_cross_entropy': Array(0.1568611, dtype=float32)}
{'loss_inverse': Array(4.642916e-07, dtype=float32)}


  7%|▋         | 66470/1000000 [3:08:38<28:10:29,  9.20it/s]

{'loss': Array(0.15100718, dtype=float32), 'loss_cross_entropy': Array(0.14186789, dtype=float32)}
{'loss_inverse': Array(7.683318e-05, dtype=float32)}


  7%|▋         | 66480/1000000 [3:08:40<35:23:16,  7.33it/s]

{'loss': Array(0.151372, dtype=float32), 'loss_cross_entropy': Array(0.14246632, dtype=float32)}
{'loss_inverse': Array(0.00022153, dtype=float32)}


  7%|▋         | 66490/1000000 [3:08:41<27:09:58,  9.55it/s]

{'loss': Array(0.15156935, dtype=float32), 'loss_cross_entropy': Array(0.1423882, dtype=float32)}
{'loss_inverse': Array(0.0003208, dtype=float32)}


  7%|▋         | 66500/1000000 [3:08:43<25:53:38, 10.01it/s]

{'loss': Array(0.14225523, dtype=float32), 'loss_cross_entropy': Array(0.13337982, dtype=float32)}
{'loss_inverse': Array(8.492637e-05, dtype=float32)}


  7%|▋         | 66510/1000000 [3:08:51<90:23:55,  2.87it/s] 

{'loss': Array(0.14242592, dtype=float32), 'loss_cross_entropy': Array(0.13380903, dtype=float32)}
{'loss_inverse': Array(3.1039187e-07, dtype=float32)}


  7%|▋         | 66520/1000000 [3:08:52<36:02:32,  7.19it/s]

{'loss': Array(0.1681265, dtype=float32), 'loss_cross_entropy': Array(0.15882336, dtype=float32)}
{'loss_inverse': Array(3.7835332e-06, dtype=float32)}


  7%|▋         | 66530/1000000 [3:08:54<33:15:34,  7.80it/s]

{'loss': Array(0.14173332, dtype=float32), 'loss_cross_entropy': Array(0.13371697, dtype=float32)}
{'loss_inverse': Array(0.00016705, dtype=float32)}


  7%|▋         | 66540/1000000 [3:08:55<27:15:26,  9.51it/s]

{'loss': Array(0.15422927, dtype=float32), 'loss_cross_entropy': Array(0.1452502, dtype=float32)}
{'loss_inverse': Array(8.2931154e-05, dtype=float32)}


  7%|▋         | 66550/1000000 [3:08:57<35:02:20,  7.40it/s]

{'loss': Array(0.16313528, dtype=float32), 'loss_cross_entropy': Array(0.15424511, dtype=float32)}
{'loss_inverse': Array(8.915677e-06, dtype=float32)}


  7%|▋         | 66560/1000000 [3:08:59<27:25:22,  9.46it/s]

{'loss': Array(0.14850442, dtype=float32), 'loss_cross_entropy': Array(0.14008228, dtype=float32)}
{'loss_inverse': Array(5.715959e-06, dtype=float32)}


  7%|▋         | 66570/1000000 [3:09:00<35:18:24,  7.34it/s]

{'loss': Array(0.1590554, dtype=float32), 'loss_cross_entropy': Array(0.14953874, dtype=float32)}
{'loss_inverse': Array(1.845618e-05, dtype=float32)}


  7%|▋         | 66580/1000000 [3:09:02<27:00:02,  9.60it/s]

{'loss': Array(0.16420956, dtype=float32), 'loss_cross_entropy': Array(0.15478429, dtype=float32)}
{'loss_inverse': Array(1.4852584e-05, dtype=float32)}


  7%|▋         | 66590/1000000 [3:09:03<26:05:00,  9.94it/s]

{'loss': Array(0.1653149, dtype=float32), 'loss_cross_entropy': Array(0.15501605, dtype=float32)}
{'loss_inverse': Array(0.00010492, dtype=float32)}


  7%|▋         | 66600/1000000 [3:09:05<31:50:52,  8.14it/s]

{'loss': Array(0.14070094, dtype=float32), 'loss_cross_entropy': Array(0.13196467, dtype=float32)}
{'loss_inverse': Array(2.019788e-07, dtype=float32)}


  7%|▋         | 66610/1000000 [3:09:07<26:52:53,  9.65it/s]

{'loss': Array(0.1376537, dtype=float32), 'loss_cross_entropy': Array(0.12927134, dtype=float32)}
{'loss_inverse': Array(0.00014218, dtype=float32)}


  7%|▋         | 66620/1000000 [3:09:08<38:10:50,  6.79it/s]

{'loss': Array(0.16241755, dtype=float32), 'loss_cross_entropy': Array(0.15265167, dtype=float32)}
{'loss_inverse': Array(6.150807e-06, dtype=float32)}


  7%|▋         | 66630/1000000 [3:09:10<28:01:25,  9.25it/s]

{'loss': Array(0.16274075, dtype=float32), 'loss_cross_entropy': Array(0.15300503, dtype=float32)}
{'loss_inverse': Array(3.039561e-06, dtype=float32)}


  7%|▋         | 66640/1000000 [3:09:11<25:52:00, 10.02it/s]

{'loss': Array(0.14622729, dtype=float32), 'loss_cross_entropy': Array(0.13767205, dtype=float32)}
{'loss_inverse': Array(0.00014136, dtype=float32)}


  7%|▋         | 66650/1000000 [3:09:13<28:47:42,  9.00it/s]

{'loss': Array(0.15093672, dtype=float32), 'loss_cross_entropy': Array(0.14223014, dtype=float32)}
{'loss_inverse': Array(7.426394e-05, dtype=float32)}


  7%|▋         | 66660/1000000 [3:09:14<26:08:49,  9.92it/s]

{'loss': Array(0.14837159, dtype=float32), 'loss_cross_entropy': Array(0.13953514, dtype=float32)}
{'loss_inverse': Array(6.0249968e-06, dtype=float32)}


  7%|▋         | 66670/1000000 [3:09:16<29:14:26,  8.87it/s]

{'loss': Array(0.15967745, dtype=float32), 'loss_cross_entropy': Array(0.15045296, dtype=float32)}
{'loss_inverse': Array(0.00040377, dtype=float32)}


  7%|▋         | 66680/1000000 [3:09:18<26:18:14,  9.86it/s]

{'loss': Array(0.15511817, dtype=float32), 'loss_cross_entropy': Array(0.14545397, dtype=float32)}
{'loss_inverse': Array(5.9383292e-05, dtype=float32)}


  7%|▋         | 66690/1000000 [3:09:19<31:58:09,  8.11it/s]

{'loss': Array(0.1587225, dtype=float32), 'loss_cross_entropy': Array(0.15000558, dtype=float32)}
{'loss_inverse': Array(0.00032827, dtype=float32)}


  7%|▋         | 66700/1000000 [3:09:21<26:36:03,  9.75it/s]

{'loss': Array(0.15948942, dtype=float32), 'loss_cross_entropy': Array(0.1501766, dtype=float32)}
{'loss_inverse': Array(1.9613111e-05, dtype=float32)}


  7%|▋         | 66710/1000000 [3:09:22<32:53:44,  7.88it/s]

{'loss': Array(0.1429987, dtype=float32), 'loss_cross_entropy': Array(0.13397914, dtype=float32)}
{'loss_inverse': Array(0.00028626, dtype=float32)}


  7%|▋         | 66720/1000000 [3:09:24<27:12:02,  9.53it/s]

{'loss': Array(0.15113868, dtype=float32), 'loss_cross_entropy': Array(0.14273643, dtype=float32)}
{'loss_inverse': Array(0.00019187, dtype=float32)}


  7%|▋         | 66730/1000000 [3:09:26<38:47:13,  6.68it/s]

{'loss': Array(0.13762689, dtype=float32), 'loss_cross_entropy': Array(0.1298242, dtype=float32)}
{'loss_inverse': Array(2.9035833e-05, dtype=float32)}


  7%|▋         | 66740/1000000 [3:09:27<27:23:36,  9.46it/s]

{'loss': Array(0.15342854, dtype=float32), 'loss_cross_entropy': Array(0.1444749, dtype=float32)}
{'loss_inverse': Array(1.0768985e-05, dtype=float32)}


  7%|▋         | 66750/1000000 [3:09:28<25:37:49, 10.11it/s]

{'loss': Array(0.14749818, dtype=float32), 'loss_cross_entropy': Array(0.13836609, dtype=float32)}
{'loss_inverse': Array(1.845675e-05, dtype=float32)}


  7%|▋         | 66760/1000000 [3:09:30<28:21:44,  9.14it/s]

{'loss': Array(0.15597454, dtype=float32), 'loss_cross_entropy': Array(0.1472446, dtype=float32)}
{'loss_inverse': Array(6.494562e-05, dtype=float32)}


  7%|▋         | 66770/1000000 [3:09:32<26:28:54,  9.79it/s]

{'loss': Array(0.13970053, dtype=float32), 'loss_cross_entropy': Array(0.13119136, dtype=float32)}
{'loss_inverse': Array(4.2324795e-05, dtype=float32)}


  7%|▋         | 66780/1000000 [3:09:33<32:04:01,  8.08it/s]

{'loss': Array(0.15335642, dtype=float32), 'loss_cross_entropy': Array(0.14461938, dtype=float32)}
{'loss_inverse': Array(1.3296417e-05, dtype=float32)}


  7%|▋         | 66790/1000000 [3:09:35<26:44:17,  9.69it/s]

{'loss': Array(0.14507243, dtype=float32), 'loss_cross_entropy': Array(0.1361916, dtype=float32)}
{'loss_inverse': Array(1.4892861e-06, dtype=float32)}


  7%|▋         | 66800/1000000 [3:09:36<25:43:16, 10.08it/s]

{'loss': Array(0.14374505, dtype=float32), 'loss_cross_entropy': Array(0.134603, dtype=float32)}
{'loss_inverse': Array(3.8227586e-06, dtype=float32)}


  7%|▋         | 66810/1000000 [3:09:38<30:06:10,  8.61it/s]

{'loss': Array(0.13679451, dtype=float32), 'loss_cross_entropy': Array(0.12785846, dtype=float32)}
{'loss_inverse': Array(5.7271695e-06, dtype=float32)}


  7%|▋         | 66820/1000000 [3:09:40<26:17:10,  9.86it/s]

{'loss': Array(0.1724508, dtype=float32), 'loss_cross_entropy': Array(0.16356272, dtype=float32)}
{'loss_inverse': Array(8.681641e-07, dtype=float32)}


  7%|▋         | 66830/1000000 [3:09:41<34:01:28,  7.62it/s]

{'loss': Array(0.17542495, dtype=float32), 'loss_cross_entropy': Array(0.16547543, dtype=float32)}
{'loss_inverse': Array(6.307643e-05, dtype=float32)}


  7%|▋         | 66840/1000000 [3:09:43<27:21:11,  9.48it/s]

{'loss': Array(0.17279613, dtype=float32), 'loss_cross_entropy': Array(0.16342404, dtype=float32)}
{'loss_inverse': Array(1.09232105e-05, dtype=float32)}


  7%|▋         | 66850/1000000 [3:09:44<38:47:50,  6.68it/s]

{'loss': Array(0.12968907, dtype=float32), 'loss_cross_entropy': Array(0.12186446, dtype=float32)}
{'loss_inverse': Array(0.00012354, dtype=float32)}


  7%|▋         | 66860/1000000 [3:09:46<28:14:59,  9.18it/s]

{'loss': Array(0.15858835, dtype=float32), 'loss_cross_entropy': Array(0.14934853, dtype=float32)}
{'loss_inverse': Array(0.0001017, dtype=float32)}


  7%|▋         | 66870/1000000 [3:09:48<39:17:47,  6.60it/s]

{'loss': Array(0.15292211, dtype=float32), 'loss_cross_entropy': Array(0.14333726, dtype=float32)}
{'loss_inverse': Array(2.2354105e-05, dtype=float32)}


  7%|▋         | 66880/1000000 [3:09:49<28:03:47,  9.24it/s]

{'loss': Array(0.1570752, dtype=float32), 'loss_cross_entropy': Array(0.14831111, dtype=float32)}
{'loss_inverse': Array(0.00024417, dtype=float32)}


  7%|▋         | 66890/1000000 [3:09:51<26:14:20,  9.88it/s]

{'loss': Array(0.1698713, dtype=float32), 'loss_cross_entropy': Array(0.16059226, dtype=float32)}
{'loss_inverse': Array(0.00019805, dtype=float32)}


  7%|▋         | 66900/1000000 [3:09:52<31:57:01,  8.11it/s]

{'loss': Array(0.15346476, dtype=float32), 'loss_cross_entropy': Array(0.14408235, dtype=float32)}
{'loss_inverse': Array(4.344993e-06, dtype=float32)}


  7%|▋         | 66910/1000000 [3:09:54<26:43:38,  9.70it/s]

{'loss': Array(0.13811758, dtype=float32), 'loss_cross_entropy': Array(0.12927182, dtype=float32)}
{'loss_inverse': Array(0.00020732, dtype=float32)}


  7%|▋         | 66920/1000000 [3:09:56<38:19:32,  6.76it/s]

{'loss': Array(0.15860295, dtype=float32), 'loss_cross_entropy': Array(0.14908373, dtype=float32)}
{'loss_inverse': Array(2.0492667e-05, dtype=float32)}


  7%|▋         | 66930/1000000 [3:09:57<27:51:35,  9.30it/s]

{'loss': Array(0.1431721, dtype=float32), 'loss_cross_entropy': Array(0.13481155, dtype=float32)}
{'loss_inverse': Array(1.4527433e-05, dtype=float32)}


  7%|▋         | 66940/1000000 [3:09:59<40:03:53,  6.47it/s]

{'loss': Array(0.15590096, dtype=float32), 'loss_cross_entropy': Array(0.14694946, dtype=float32)}
{'loss_inverse': Array(4.6703262e-05, dtype=float32)}


  7%|▋         | 66950/1000000 [3:10:00<27:31:13,  9.42it/s]

{'loss': Array(0.17303878, dtype=float32), 'loss_cross_entropy': Array(0.1634988, dtype=float32)}
{'loss_inverse': Array(3.720066e-06, dtype=float32)}


  7%|▋         | 66960/1000000 [3:10:02<25:50:22, 10.03it/s]

{'loss': Array(0.16015375, dtype=float32), 'loss_cross_entropy': Array(0.15149955, dtype=float32)}
{'loss_inverse': Array(3.9654682e-05, dtype=float32)}


  7%|▋         | 66970/1000000 [3:10:03<34:19:23,  7.55it/s]

{'loss': Array(0.14838818, dtype=float32), 'loss_cross_entropy': Array(0.13898091, dtype=float32)}
{'loss_inverse': Array(3.94273e-06, dtype=float32)}


  7%|▋         | 66980/1000000 [3:10:05<27:45:15,  9.34it/s]

{'loss': Array(0.1412712, dtype=float32), 'loss_cross_entropy': Array(0.13311441, dtype=float32)}
{'loss_inverse': Array(0.00010289, dtype=float32)}


  7%|▋         | 66990/1000000 [3:10:06<26:37:14,  9.74it/s]

{'loss': Array(0.14788663, dtype=float32), 'loss_cross_entropy': Array(0.13915549, dtype=float32)}
{'loss_inverse': Array(3.4286233e-05, dtype=float32)}


  7%|▋         | 67000/1000000 [3:10:08<29:07:12,  8.90it/s]

{'loss': Array(0.15574196, dtype=float32), 'loss_cross_entropy': Array(0.14661016, dtype=float32)}
{'loss_inverse': Array(0.00012302, dtype=float32)}


  7%|▋         | 67010/1000000 [3:10:16<87:54:30,  2.95it/s] 

{'loss': Array(0.16569619, dtype=float32), 'loss_cross_entropy': Array(0.15653907, dtype=float32)}
{'loss_inverse': Array(0.00012926, dtype=float32)}


  7%|▋         | 67020/1000000 [3:10:18<42:24:40,  6.11it/s]

{'loss': Array(0.1610423, dtype=float32), 'loss_cross_entropy': Array(0.15172003, dtype=float32)}
{'loss_inverse': Array(8.6727116e-05, dtype=float32)}


  7%|▋         | 67030/1000000 [3:10:19<28:11:01,  9.20it/s]

{'loss': Array(0.1508806, dtype=float32), 'loss_cross_entropy': Array(0.14181352, dtype=float32)}
{'loss_inverse': Array(4.9281633e-05, dtype=float32)}


  7%|▋         | 67040/1000000 [3:10:21<35:31:32,  7.29it/s]

{'loss': Array(0.15229791, dtype=float32), 'loss_cross_entropy': Array(0.14347705, dtype=float32)}
{'loss_inverse': Array(0.00039474, dtype=float32)}


  7%|▋         | 67050/1000000 [3:10:23<26:46:42,  9.68it/s]

{'loss': Array(0.15577726, dtype=float32), 'loss_cross_entropy': Array(0.1465309, dtype=float32)}
{'loss_inverse': Array(7.611612e-05, dtype=float32)}


  7%|▋         | 67060/1000000 [3:10:24<38:53:57,  6.66it/s]

{'loss': Array(0.15989672, dtype=float32), 'loss_cross_entropy': Array(0.15083723, dtype=float32)}
{'loss_inverse': Array(2.808264e-05, dtype=float32)}


  7%|▋         | 67070/1000000 [3:10:26<27:44:15,  9.34it/s]

{'loss': Array(0.1577974, dtype=float32), 'loss_cross_entropy': Array(0.14870583, dtype=float32)}
{'loss_inverse': Array(8.89947e-05, dtype=float32)}


  7%|▋         | 67080/1000000 [3:10:28<34:54:56,  7.42it/s]

{'loss': Array(0.1530783, dtype=float32), 'loss_cross_entropy': Array(0.14390163, dtype=float32)}
{'loss_inverse': Array(1.2313379e-05, dtype=float32)}


  7%|▋         | 67090/1000000 [3:10:29<26:55:17,  9.63it/s]

{'loss': Array(0.1397396, dtype=float32), 'loss_cross_entropy': Array(0.13136218, dtype=float32)}
{'loss_inverse': Array(9.514659e-06, dtype=float32)}


  7%|▋         | 67100/1000000 [3:10:30<26:25:53,  9.80it/s]

{'loss': Array(0.15738012, dtype=float32), 'loss_cross_entropy': Array(0.14816602, dtype=float32)}
{'loss_inverse': Array(7.644254e-07, dtype=float32)}


  7%|▋         | 67110/1000000 [3:10:32<29:57:36,  8.65it/s]

{'loss': Array(0.157268, dtype=float32), 'loss_cross_entropy': Array(0.14803652, dtype=float32)}
{'loss_inverse': Array(5.304373e-05, dtype=float32)}


  7%|▋         | 67120/1000000 [3:10:34<26:15:43,  9.87it/s]

{'loss': Array(0.17953007, dtype=float32), 'loss_cross_entropy': Array(0.16926722, dtype=float32)}
{'loss_inverse': Array(5.5542227e-05, dtype=float32)}


  7%|▋         | 67130/1000000 [3:10:35<34:26:49,  7.52it/s]

{'loss': Array(0.14195064, dtype=float32), 'loss_cross_entropy': Array(0.13375294, dtype=float32)}
{'loss_inverse': Array(0.00012693, dtype=float32)}


  7%|▋         | 67140/1000000 [3:10:37<27:03:58,  9.57it/s]

{'loss': Array(0.15435293, dtype=float32), 'loss_cross_entropy': Array(0.14490478, dtype=float32)}
{'loss_inverse': Array(4.0755156e-05, dtype=float32)}


  7%|▋         | 67150/1000000 [3:10:38<38:10:40,  6.79it/s]

{'loss': Array(0.15002124, dtype=float32), 'loss_cross_entropy': Array(0.14115681, dtype=float32)}
{'loss_inverse': Array(1.9045567e-05, dtype=float32)}


  7%|▋         | 67160/1000000 [3:10:40<27:23:03,  9.46it/s]

{'loss': Array(0.14692879, dtype=float32), 'loss_cross_entropy': Array(0.13795456, dtype=float32)}
{'loss_inverse': Array(0.0001969, dtype=float32)}


  7%|▋         | 67170/1000000 [3:10:41<25:46:21, 10.05it/s]

{'loss': Array(0.16409314, dtype=float32), 'loss_cross_entropy': Array(0.15411924, dtype=float32)}
{'loss_inverse': Array(9.151072e-07, dtype=float32)}


  7%|▋         | 67180/1000000 [3:10:43<28:48:40,  8.99it/s]

{'loss': Array(0.13996139, dtype=float32), 'loss_cross_entropy': Array(0.1311207, dtype=float32)}
{'loss_inverse': Array(3.5829034e-05, dtype=float32)}


  7%|▋         | 67190/1000000 [3:10:44<25:37:36, 10.11it/s]

{'loss': Array(0.1581469, dtype=float32), 'loss_cross_entropy': Array(0.14843309, dtype=float32)}
{'loss_inverse': Array(7.28069e-06, dtype=float32)}


  7%|▋         | 67200/1000000 [3:10:46<30:01:40,  8.63it/s]

{'loss': Array(0.15375523, dtype=float32), 'loss_cross_entropy': Array(0.14459948, dtype=float32)}
{'loss_inverse': Array(1.8923754e-05, dtype=float32)}


  7%|▋         | 67210/1000000 [3:10:48<26:32:26,  9.76it/s]

{'loss': Array(0.17635046, dtype=float32), 'loss_cross_entropy': Array(0.16574536, dtype=float32)}
{'loss_inverse': Array(1.20765435e-05, dtype=float32)}


  7%|▋         | 67220/1000000 [3:10:49<32:02:38,  8.09it/s]

{'loss': Array(0.15182741, dtype=float32), 'loss_cross_entropy': Array(0.14253959, dtype=float32)}
{'loss_inverse': Array(0.00015988, dtype=float32)}


  7%|▋         | 67230/1000000 [3:10:51<26:38:09,  9.73it/s]

{'loss': Array(0.17101587, dtype=float32), 'loss_cross_entropy': Array(0.16110364, dtype=float32)}
{'loss_inverse': Array(5.2298833e-06, dtype=float32)}


  7%|▋         | 67240/1000000 [3:10:53<34:30:55,  7.51it/s]

{'loss': Array(0.1421581, dtype=float32), 'loss_cross_entropy': Array(0.13356303, dtype=float32)}
{'loss_inverse': Array(6.4234904e-05, dtype=float32)}


  7%|▋         | 67250/1000000 [3:10:54<27:31:12,  9.41it/s]

{'loss': Array(0.16396496, dtype=float32), 'loss_cross_entropy': Array(0.15395425, dtype=float32)}
{'loss_inverse': Array(2.7073746e-05, dtype=float32)}


  7%|▋         | 67260/1000000 [3:10:56<35:23:10,  7.32it/s]

{'loss': Array(0.14471208, dtype=float32), 'loss_cross_entropy': Array(0.13639389, dtype=float32)}
{'loss_inverse': Array(0.00016525, dtype=float32)}


  7%|▋         | 67270/1000000 [3:10:57<27:19:35,  9.48it/s]

{'loss': Array(0.1399309, dtype=float32), 'loss_cross_entropy': Array(0.13159388, dtype=float32)}
{'loss_inverse': Array(3.91634e-05, dtype=float32)}


  7%|▋         | 67280/1000000 [3:10:59<25:30:16, 10.16it/s]

{'loss': Array(0.15372296, dtype=float32), 'loss_cross_entropy': Array(0.14449883, dtype=float32)}
{'loss_inverse': Array(0.00027429, dtype=float32)}


  7%|▋         | 67290/1000000 [3:11:00<31:14:23,  8.29it/s]

{'loss': Array(0.1458606, dtype=float32), 'loss_cross_entropy': Array(0.13748309, dtype=float32)}
{'loss_inverse': Array(9.157857e-05, dtype=float32)}


  7%|▋         | 67300/1000000 [3:11:02<26:20:27,  9.84it/s]

{'loss': Array(0.15789716, dtype=float32), 'loss_cross_entropy': Array(0.14959982, dtype=float32)}
{'loss_inverse': Array(0.00048861, dtype=float32)}


  7%|▋         | 67310/1000000 [3:11:04<38:02:33,  6.81it/s]

{'loss': Array(0.14709587, dtype=float32), 'loss_cross_entropy': Array(0.1384799, dtype=float32)}
{'loss_inverse': Array(0.00014423, dtype=float32)}


  7%|▋         | 67320/1000000 [3:11:05<27:30:48,  9.42it/s]

{'loss': Array(0.15522486, dtype=float32), 'loss_cross_entropy': Array(0.14662956, dtype=float32)}
{'loss_inverse': Array(3.107163e-05, dtype=float32)}


  7%|▋         | 67330/1000000 [3:11:06<26:38:35,  9.72it/s]

{'loss': Array(0.1713739, dtype=float32), 'loss_cross_entropy': Array(0.16221145, dtype=float32)}
{'loss_inverse': Array(9.085004e-06, dtype=float32)}


  7%|▋         | 67340/1000000 [3:11:08<28:56:09,  8.95it/s]

{'loss': Array(0.14775443, dtype=float32), 'loss_cross_entropy': Array(0.13896872, dtype=float32)}
{'loss_inverse': Array(4.364279e-06, dtype=float32)}


  7%|▋         | 67350/1000000 [3:11:10<26:27:57,  9.79it/s]

{'loss': Array(0.1604634, dtype=float32), 'loss_cross_entropy': Array(0.15110382, dtype=float32)}
{'loss_inverse': Array(1.0826448e-05, dtype=float32)}


  7%|▋         | 67360/1000000 [3:11:11<28:20:12,  9.14it/s]

{'loss': Array(0.15984039, dtype=float32), 'loss_cross_entropy': Array(0.15009588, dtype=float32)}
{'loss_inverse': Array(6.870696e-05, dtype=float32)}


  7%|▋         | 67370/1000000 [3:11:13<26:27:51,  9.79it/s]

{'loss': Array(0.18283805, dtype=float32), 'loss_cross_entropy': Array(0.17179142, dtype=float32)}
{'loss_inverse': Array(8.694189e-06, dtype=float32)}


  7%|▋         | 67380/1000000 [3:11:15<31:55:48,  8.11it/s]

{'loss': Array(0.17343403, dtype=float32), 'loss_cross_entropy': Array(0.16363145, dtype=float32)}
{'loss_inverse': Array(2.422225e-05, dtype=float32)}


  7%|▋         | 67390/1000000 [3:11:16<26:33:48,  9.75it/s]

{'loss': Array(0.1575702, dtype=float32), 'loss_cross_entropy': Array(0.14937277, dtype=float32)}
{'loss_inverse': Array(1.478836e-06, dtype=float32)}


  7%|▋         | 67400/1000000 [3:11:18<31:50:26,  8.14it/s]

{'loss': Array(0.15424272, dtype=float32), 'loss_cross_entropy': Array(0.14605634, dtype=float32)}
{'loss_inverse': Array(8.0325284e-05, dtype=float32)}


  7%|▋         | 67410/1000000 [3:11:19<26:32:16,  9.76it/s]

{'loss': Array(0.14095332, dtype=float32), 'loss_cross_entropy': Array(0.13225581, dtype=float32)}
{'loss_inverse': Array(4.462889e-05, dtype=float32)}


  7%|▋         | 67420/1000000 [3:11:21<38:19:41,  6.76it/s]

{'loss': Array(0.13718474, dtype=float32), 'loss_cross_entropy': Array(0.1293679, dtype=float32)}
{'loss_inverse': Array(4.1380234e-05, dtype=float32)}


  7%|▋         | 67430/1000000 [3:11:22<27:57:30,  9.27it/s]

{'loss': Array(0.14358702, dtype=float32), 'loss_cross_entropy': Array(0.13557638, dtype=float32)}
{'loss_inverse': Array(9.3453145e-06, dtype=float32)}


  7%|▋         | 67440/1000000 [3:11:24<25:36:13, 10.12it/s]

{'loss': Array(0.15721193, dtype=float32), 'loss_cross_entropy': Array(0.14809617, dtype=float32)}
{'loss_inverse': Array(1.1336818e-05, dtype=float32)}


  7%|▋         | 67450/1000000 [3:11:26<29:09:23,  8.88it/s]

{'loss': Array(0.1478408, dtype=float32), 'loss_cross_entropy': Array(0.13866284, dtype=float32)}
{'loss_inverse': Array(2.5784631e-05, dtype=float32)}


  7%|▋         | 67460/1000000 [3:11:27<26:05:45,  9.93it/s]

{'loss': Array(0.16260825, dtype=float32), 'loss_cross_entropy': Array(0.15339984, dtype=float32)}
{'loss_inverse': Array(7.739633e-07, dtype=float32)}


  7%|▋         | 67470/1000000 [3:11:29<32:16:26,  8.03it/s]

{'loss': Array(0.17394069, dtype=float32), 'loss_cross_entropy': Array(0.16393106, dtype=float32)}
{'loss_inverse': Array(6.018521e-06, dtype=float32)}


  7%|▋         | 67480/1000000 [3:11:30<26:20:54,  9.83it/s]

{'loss': Array(0.1538567, dtype=float32), 'loss_cross_entropy': Array(0.14526908, dtype=float32)}
{'loss_inverse': Array(5.390917e-05, dtype=float32)}


  7%|▋         | 67490/1000000 [3:11:32<25:27:09, 10.18it/s]

{'loss': Array(0.15772483, dtype=float32), 'loss_cross_entropy': Array(0.1479616, dtype=float32)}
{'loss_inverse': Array(1.9171387e-06, dtype=float32)}


  7%|▋         | 67500/1000000 [3:11:33<29:36:56,  8.75it/s]

{'loss': Array(0.14240982, dtype=float32), 'loss_cross_entropy': Array(0.13361154, dtype=float32)}
{'loss_inverse': Array(0.00015191, dtype=float32)}


  7%|▋         | 67510/1000000 [3:11:41<86:13:27,  3.00it/s] 

{'loss': Array(0.13738136, dtype=float32), 'loss_cross_entropy': Array(0.12930606, dtype=float32)}
{'loss_inverse': Array(0.00040447, dtype=float32)}


  7%|▋         | 67520/1000000 [3:11:43<39:23:59,  6.57it/s] 

{'loss': Array(0.15905783, dtype=float32), 'loss_cross_entropy': Array(0.14931327, dtype=float32)}
{'loss_inverse': Array(1.1611278e-06, dtype=float32)}


  7%|▋         | 67530/1000000 [3:11:45<28:31:04,  9.08it/s]

{'loss': Array(0.15073995, dtype=float32), 'loss_cross_entropy': Array(0.14247389, dtype=float32)}
{'loss_inverse': Array(1.1602455e-05, dtype=float32)}


  7%|▋         | 67540/1000000 [3:11:46<31:45:37,  8.16it/s]

{'loss': Array(0.15342759, dtype=float32), 'loss_cross_entropy': Array(0.14365308, dtype=float32)}
{'loss_inverse': Array(5.517569e-05, dtype=float32)}


  7%|▋         | 67550/1000000 [3:11:48<26:16:17,  9.86it/s]

{'loss': Array(0.17703354, dtype=float32), 'loss_cross_entropy': Array(0.16706125, dtype=float32)}
{'loss_inverse': Array(1.8884175e-06, dtype=float32)}


  7%|▋         | 67560/1000000 [3:11:49<31:48:50,  8.14it/s]

{'loss': Array(0.14560604, dtype=float32), 'loss_cross_entropy': Array(0.13658433, dtype=float32)}
{'loss_inverse': Array(0.00027514, dtype=float32)}


  7%|▋         | 67570/1000000 [3:11:51<26:37:31,  9.73it/s]

{'loss': Array(0.15481749, dtype=float32), 'loss_cross_entropy': Array(0.14564139, dtype=float32)}
{'loss_inverse': Array(0.00011189, dtype=float32)}


  7%|▋         | 67580/1000000 [3:11:53<33:57:45,  7.63it/s]

{'loss': Array(0.15678917, dtype=float32), 'loss_cross_entropy': Array(0.14805092, dtype=float32)}
{'loss_inverse': Array(0.00010411, dtype=float32)}


  7%|▋         | 67590/1000000 [3:11:54<26:18:30,  9.84it/s]

{'loss': Array(0.15004371, dtype=float32), 'loss_cross_entropy': Array(0.1414166, dtype=float32)}
{'loss_inverse': Array(4.1128143e-05, dtype=float32)}


  7%|▋         | 67600/1000000 [3:11:56<31:43:10,  8.17it/s]

{'loss': Array(0.14838293, dtype=float32), 'loss_cross_entropy': Array(0.13882558, dtype=float32)}
{'loss_inverse': Array(0.00010916, dtype=float32)}


  7%|▋         | 67610/1000000 [3:11:57<26:15:38,  9.86it/s]

{'loss': Array(0.16174473, dtype=float32), 'loss_cross_entropy': Array(0.15260552, dtype=float32)}
{'loss_inverse': Array(1.5990378e-05, dtype=float32)}


  7%|▋         | 67620/1000000 [3:11:59<38:35:25,  6.71it/s]

{'loss': Array(0.15771374, dtype=float32), 'loss_cross_entropy': Array(0.14805166, dtype=float32)}
{'loss_inverse': Array(1.8371298e-06, dtype=float32)}


  7%|▋         | 67630/1000000 [3:12:00<27:27:20,  9.43it/s]

{'loss': Array(0.15056148, dtype=float32), 'loss_cross_entropy': Array(0.14130512, dtype=float32)}
{'loss_inverse': Array(3.0339802e-06, dtype=float32)}


  7%|▋         | 67640/1000000 [3:12:02<26:21:34,  9.83it/s]

{'loss': Array(0.16447993, dtype=float32), 'loss_cross_entropy': Array(0.15436468, dtype=float32)}
{'loss_inverse': Array(1.9455698e-05, dtype=float32)}


  7%|▋         | 67650/1000000 [3:12:03<28:55:19,  8.95it/s]

{'loss': Array(0.13902064, dtype=float32), 'loss_cross_entropy': Array(0.13075821, dtype=float32)}
{'loss_inverse': Array(5.38739e-07, dtype=float32)}


  7%|▋         | 67660/1000000 [3:12:05<26:32:57,  9.75it/s]

{'loss': Array(0.12720506, dtype=float32), 'loss_cross_entropy': Array(0.11928289, dtype=float32)}
{'loss_inverse': Array(3.5396162e-05, dtype=float32)}


  7%|▋         | 67670/1000000 [3:12:07<28:50:42,  8.98it/s]

{'loss': Array(0.16100128, dtype=float32), 'loss_cross_entropy': Array(0.15160736, dtype=float32)}
{'loss_inverse': Array(1.5639513e-05, dtype=float32)}


  7%|▋         | 67680/1000000 [3:12:08<25:56:27,  9.98it/s]

{'loss': Array(0.1297479, dtype=float32), 'loss_cross_entropy': Array(0.12147825, dtype=float32)}
{'loss_inverse': Array(2.807008e-06, dtype=float32)}


  7%|▋         | 67690/1000000 [3:12:10<30:00:55,  8.63it/s]

{'loss': Array(0.1467394, dtype=float32), 'loss_cross_entropy': Array(0.13774166, dtype=float32)}
{'loss_inverse': Array(1.7402874e-05, dtype=float32)}


  7%|▋         | 67700/1000000 [3:12:11<25:57:42,  9.98it/s]

{'loss': Array(0.16112919, dtype=float32), 'loss_cross_entropy': Array(0.15205139, dtype=float32)}
{'loss_inverse': Array(9.091457e-06, dtype=float32)}


  7%|▋         | 67710/1000000 [3:12:13<39:13:44,  6.60it/s]

{'loss': Array(0.12996332, dtype=float32), 'loss_cross_entropy': Array(0.12201428, dtype=float32)}
{'loss_inverse': Array(9.468714e-06, dtype=float32)}


  7%|▋         | 67720/1000000 [3:12:14<27:55:47,  9.27it/s]

{'loss': Array(0.14204143, dtype=float32), 'loss_cross_entropy': Array(0.13342217, dtype=float32)}
{'loss_inverse': Array(4.2944973e-07, dtype=float32)}


  7%|▋         | 67730/1000000 [3:12:16<25:42:37, 10.07it/s]

{'loss': Array(0.15394416, dtype=float32), 'loss_cross_entropy': Array(0.1453056, dtype=float32)}
{'loss_inverse': Array(9.353464e-06, dtype=float32)}


  7%|▋         | 67740/1000000 [3:12:18<29:51:14,  8.67it/s]

{'loss': Array(0.15219806, dtype=float32), 'loss_cross_entropy': Array(0.14298104, dtype=float32)}
{'loss_inverse': Array(8.967806e-05, dtype=float32)}


  7%|▋         | 67750/1000000 [3:12:19<26:05:12,  9.93it/s]

{'loss': Array(0.16440903, dtype=float32), 'loss_cross_entropy': Array(0.15570365, dtype=float32)}
{'loss_inverse': Array(2.0575637e-06, dtype=float32)}


  7%|▋         | 67760/1000000 [3:12:21<30:29:05,  8.49it/s]

{'loss': Array(0.16061601, dtype=float32), 'loss_cross_entropy': Array(0.1511975, dtype=float32)}
{'loss_inverse': Array(3.5881342e-06, dtype=float32)}


  7%|▋         | 67770/1000000 [3:12:22<26:14:15,  9.87it/s]

{'loss': Array(0.14557348, dtype=float32), 'loss_cross_entropy': Array(0.13709874, dtype=float32)}
{'loss_inverse': Array(5.5155465e-07, dtype=float32)}


  7%|▋         | 67780/1000000 [3:12:24<31:43:34,  8.16it/s]

{'loss': Array(0.15312175, dtype=float32), 'loss_cross_entropy': Array(0.14452732, dtype=float32)}
{'loss_inverse': Array(2.8342636e-06, dtype=float32)}


  7%|▋         | 67790/1000000 [3:12:25<26:32:13,  9.76it/s]

{'loss': Array(0.1536431, dtype=float32), 'loss_cross_entropy': Array(0.1447466, dtype=float32)}
{'loss_inverse': Array(2.9448934e-06, dtype=float32)}


  7%|▋         | 67800/1000000 [3:12:27<26:01:33,  9.95it/s]

{'loss': Array(0.1686595, dtype=float32), 'loss_cross_entropy': Array(0.15886061, dtype=float32)}
{'loss_inverse': Array(1.5004877e-05, dtype=float32)}


  7%|▋         | 67810/1000000 [3:12:29<28:33:26,  9.07it/s]

{'loss': Array(0.160304, dtype=float32), 'loss_cross_entropy': Array(0.15143357, dtype=float32)}
{'loss_inverse': Array(5.3883858e-05, dtype=float32)}


  7%|▋         | 67820/1000000 [3:12:30<25:44:28, 10.06it/s]

{'loss': Array(0.14130798, dtype=float32), 'loss_cross_entropy': Array(0.13276738, dtype=float32)}
{'loss_inverse': Array(0.00012503, dtype=float32)}


  7%|▋         | 67830/1000000 [3:12:32<30:10:12,  8.58it/s]

{'loss': Array(0.16634926, dtype=float32), 'loss_cross_entropy': Array(0.15725508, dtype=float32)}
{'loss_inverse': Array(2.3555765e-05, dtype=float32)}


  7%|▋         | 67840/1000000 [3:12:33<26:41:32,  9.70it/s]

{'loss': Array(0.15762687, dtype=float32), 'loss_cross_entropy': Array(0.14851888, dtype=float32)}
{'loss_inverse': Array(9.947371e-07, dtype=float32)}


  7%|▋         | 67850/1000000 [3:12:35<31:57:15,  8.10it/s]

{'loss': Array(0.14621285, dtype=float32), 'loss_cross_entropy': Array(0.13812485, dtype=float32)}
{'loss_inverse': Array(7.0161004e-05, dtype=float32)}


  7%|▋         | 67860/1000000 [3:12:36<26:06:51,  9.92it/s]

{'loss': Array(0.16383924, dtype=float32), 'loss_cross_entropy': Array(0.15486778, dtype=float32)}
{'loss_inverse': Array(1.3853636e-05, dtype=float32)}


  7%|▋         | 67870/1000000 [3:12:38<25:20:27, 10.22it/s]

{'loss': Array(0.15770893, dtype=float32), 'loss_cross_entropy': Array(0.14906886, dtype=float32)}
{'loss_inverse': Array(1.988147e-06, dtype=float32)}


  7%|▋         | 67880/1000000 [3:12:40<30:01:42,  8.62it/s]

{'loss': Array(0.14308994, dtype=float32), 'loss_cross_entropy': Array(0.13504376, dtype=float32)}
{'loss_inverse': Array(7.946825e-05, dtype=float32)}


  7%|▋         | 67890/1000000 [3:12:41<26:00:52,  9.95it/s]

{'loss': Array(0.1555408, dtype=float32), 'loss_cross_entropy': Array(0.14644222, dtype=float32)}
{'loss_inverse': Array(2.3777054e-06, dtype=float32)}


  7%|▋         | 67900/1000000 [3:12:43<32:06:29,  8.06it/s]

{'loss': Array(0.14448938, dtype=float32), 'loss_cross_entropy': Array(0.13632382, dtype=float32)}
{'loss_inverse': Array(2.9229685e-05, dtype=float32)}


  7%|▋         | 67910/1000000 [3:12:44<27:22:27,  9.46it/s]

{'loss': Array(0.16212784, dtype=float32), 'loss_cross_entropy': Array(0.15249826, dtype=float32)}
{'loss_inverse': Array(0.00018263, dtype=float32)}


  7%|▋         | 67920/1000000 [3:12:46<35:08:57,  7.37it/s]

{'loss': Array(0.16857418, dtype=float32), 'loss_cross_entropy': Array(0.15900624, dtype=float32)}
{'loss_inverse': Array(5.2273267e-06, dtype=float32)}


  7%|▋         | 67930/1000000 [3:12:47<26:33:53,  9.75it/s]

{'loss': Array(0.13706534, dtype=float32), 'loss_cross_entropy': Array(0.12879887, dtype=float32)}
{'loss_inverse': Array(2.6503452e-05, dtype=float32)}


  7%|▋         | 67940/1000000 [3:12:49<25:50:29, 10.02it/s]

{'loss': Array(0.15530191, dtype=float32), 'loss_cross_entropy': Array(0.14551884, dtype=float32)}
{'loss_inverse': Array(2.9787256e-05, dtype=float32)}


  7%|▋         | 67950/1000000 [3:12:50<28:13:04,  9.18it/s]

{'loss': Array(0.1584159, dtype=float32), 'loss_cross_entropy': Array(0.14972447, dtype=float32)}
{'loss_inverse': Array(5.0009818e-05, dtype=float32)}


  7%|▋         | 67960/1000000 [3:12:52<26:35:39,  9.74it/s]

{'loss': Array(0.13795972, dtype=float32), 'loss_cross_entropy': Array(0.12938538, dtype=float32)}
{'loss_inverse': Array(1.2055177e-06, dtype=float32)}


  7%|▋         | 67970/1000000 [3:12:54<34:57:28,  7.41it/s]

{'loss': Array(0.1465813, dtype=float32), 'loss_cross_entropy': Array(0.13807395, dtype=float32)}
{'loss_inverse': Array(3.572143e-05, dtype=float32)}


  7%|▋         | 67980/1000000 [3:12:55<27:20:40,  9.47it/s]

{'loss': Array(0.16115376, dtype=float32), 'loss_cross_entropy': Array(0.15204285, dtype=float32)}
{'loss_inverse': Array(1.1810488e-06, dtype=float32)}


  7%|▋         | 67990/1000000 [3:12:57<28:31:17,  9.08it/s]

{'loss': Array(0.14556734, dtype=float32), 'loss_cross_entropy': Array(0.13668759, dtype=float32)}
{'loss_inverse': Array(8.9438116e-05, dtype=float32)}


  7%|▋         | 68000/1000000 [3:12:58<28:49:09,  8.98it/s]

{'loss': Array(0.14176631, dtype=float32), 'loss_cross_entropy': Array(0.13318777, dtype=float32)}
{'loss_inverse': Array(0.00017098, dtype=float32)}


  7%|▋         | 68010/1000000 [3:13:07<88:37:06,  2.92it/s] 

{'loss': Array(0.1494363, dtype=float32), 'loss_cross_entropy': Array(0.14010525, dtype=float32)}
{'loss_inverse': Array(3.3333265e-06, dtype=float32)}


  7%|▋         | 68020/1000000 [3:13:08<40:27:40,  6.40it/s]

{'loss': Array(0.14955658, dtype=float32), 'loss_cross_entropy': Array(0.14038672, dtype=float32)}
{'loss_inverse': Array(3.7342065e-07, dtype=float32)}


  7%|▋         | 68030/1000000 [3:13:10<28:07:26,  9.20it/s]

{'loss': Array(0.16178654, dtype=float32), 'loss_cross_entropy': Array(0.15207957, dtype=float32)}
{'loss_inverse': Array(7.964571e-07, dtype=float32)}


  7%|▋         | 68040/1000000 [3:13:12<31:03:00,  8.34it/s]

{'loss': Array(0.14676708, dtype=float32), 'loss_cross_entropy': Array(0.13892427, dtype=float32)}
{'loss_inverse': Array(3.2758024e-05, dtype=float32)}


  7%|▋         | 68050/1000000 [3:13:13<26:10:37,  9.89it/s]

{'loss': Array(0.15229367, dtype=float32), 'loss_cross_entropy': Array(0.1432661, dtype=float32)}
{'loss_inverse': Array(1.26087125e-05, dtype=float32)}


  7%|▋         | 68060/1000000 [3:13:15<38:26:13,  6.73it/s]

{'loss': Array(0.13674302, dtype=float32), 'loss_cross_entropy': Array(0.1286004, dtype=float32)}
{'loss_inverse': Array(1.0040457e-06, dtype=float32)}


  7%|▋         | 68070/1000000 [3:13:16<27:54:44,  9.27it/s]

{'loss': Array(0.14782843, dtype=float32), 'loss_cross_entropy': Array(0.13870399, dtype=float32)}
{'loss_inverse': Array(5.41323e-06, dtype=float32)}


  7%|▋         | 68080/1000000 [3:13:18<26:19:33,  9.83it/s]

{'loss': Array(0.14676614, dtype=float32), 'loss_cross_entropy': Array(0.13781469, dtype=float32)}
{'loss_inverse': Array(1.4916146e-06, dtype=float32)}


  7%|▋         | 68090/1000000 [3:13:19<30:03:59,  8.61it/s]

{'loss': Array(0.16532734, dtype=float32), 'loss_cross_entropy': Array(0.15565805, dtype=float32)}
{'loss_inverse': Array(3.2207285e-07, dtype=float32)}


  7%|▋         | 68100/1000000 [3:13:21<26:26:42,  9.79it/s]

{'loss': Array(0.13325153, dtype=float32), 'loss_cross_entropy': Array(0.12471323, dtype=float32)}
{'loss_inverse': Array(0.00022083, dtype=float32)}


  7%|▋         | 68110/1000000 [3:13:23<32:31:13,  7.96it/s]

{'loss': Array(0.15404281, dtype=float32), 'loss_cross_entropy': Array(0.1448829, dtype=float32)}
{'loss_inverse': Array(7.718344e-05, dtype=float32)}


  7%|▋         | 68120/1000000 [3:13:24<27:07:45,  9.54it/s]

{'loss': Array(0.15354843, dtype=float32), 'loss_cross_entropy': Array(0.1446437, dtype=float32)}
{'loss_inverse': Array(0.00044882, dtype=float32)}


  7%|▋         | 68130/1000000 [3:13:26<31:39:11,  8.18it/s]

{'loss': Array(0.14168267, dtype=float32), 'loss_cross_entropy': Array(0.13365428, dtype=float32)}
{'loss_inverse': Array(0.00012805, dtype=float32)}


  7%|▋         | 68140/1000000 [3:13:27<26:10:58,  9.89it/s]

{'loss': Array(0.13815358, dtype=float32), 'loss_cross_entropy': Array(0.12896001, dtype=float32)}
{'loss_inverse': Array(0.00011997, dtype=float32)}


  7%|▋         | 68150/1000000 [3:13:29<26:11:07,  9.89it/s]

{'loss': Array(0.16074824, dtype=float32), 'loss_cross_entropy': Array(0.15136962, dtype=float32)}
{'loss_inverse': Array(0.00020886, dtype=float32)}


  7%|▋         | 68160/1000000 [3:13:30<28:44:38,  9.01it/s]

{'loss': Array(0.1336752, dtype=float32), 'loss_cross_entropy': Array(0.12515496, dtype=float32)}
{'loss_inverse': Array(1.9167976e-06, dtype=float32)}


  7%|▋         | 68170/1000000 [3:13:32<25:41:27, 10.08it/s]

{'loss': Array(0.14525464, dtype=float32), 'loss_cross_entropy': Array(0.13680635, dtype=float32)}
{'loss_inverse': Array(0.00031734, dtype=float32)}


  7%|▋         | 68180/1000000 [3:13:34<29:24:47,  8.80it/s]

{'loss': Array(0.14874597, dtype=float32), 'loss_cross_entropy': Array(0.139593, dtype=float32)}
{'loss_inverse': Array(4.3230248e-06, dtype=float32)}


  7%|▋         | 68190/1000000 [3:13:35<26:04:58,  9.92it/s]

{'loss': Array(0.15545812, dtype=float32), 'loss_cross_entropy': Array(0.14692706, dtype=float32)}
{'loss_inverse': Array(1.6297407e-05, dtype=float32)}


  7%|▋         | 68200/1000000 [3:13:37<29:45:59,  8.70it/s]

{'loss': Array(0.16902064, dtype=float32), 'loss_cross_entropy': Array(0.16007687, dtype=float32)}
{'loss_inverse': Array(5.2118285e-05, dtype=float32)}


  7%|▋         | 68210/1000000 [3:13:38<25:49:03, 10.03it/s]

{'loss': Array(0.15067914, dtype=float32), 'loss_cross_entropy': Array(0.14273147, dtype=float32)}
{'loss_inverse': Array(1.0390578e-05, dtype=float32)}


  7%|▋         | 68220/1000000 [3:13:40<29:48:31,  8.68it/s]

{'loss': Array(0.17755298, dtype=float32), 'loss_cross_entropy': Array(0.1676886, dtype=float32)}
{'loss_inverse': Array(5.1978623e-05, dtype=float32)}


  7%|▋         | 68230/1000000 [3:13:41<26:13:02,  9.87it/s]

{'loss': Array(0.1535615, dtype=float32), 'loss_cross_entropy': Array(0.14467916, dtype=float32)}
{'loss_inverse': Array(0.00015419, dtype=float32)}


  7%|▋         | 68240/1000000 [3:13:43<34:26:55,  7.51it/s]

{'loss': Array(0.15244012, dtype=float32), 'loss_cross_entropy': Array(0.1429076, dtype=float32)}
{'loss_inverse': Array(8.35258e-06, dtype=float32)}


  7%|▋         | 68250/1000000 [3:13:44<26:47:02,  9.66it/s]

{'loss': Array(0.15738422, dtype=float32), 'loss_cross_entropy': Array(0.14913449, dtype=float32)}
{'loss_inverse': Array(0.00019599, dtype=float32)}


  7%|▋         | 68260/1000000 [3:13:46<38:01:24,  6.81it/s]

{'loss': Array(0.14999107, dtype=float32), 'loss_cross_entropy': Array(0.14110774, dtype=float32)}
{'loss_inverse': Array(2.2492432e-05, dtype=float32)}


  7%|▋         | 68270/1000000 [3:13:48<27:34:04,  9.39it/s]

{'loss': Array(0.14577413, dtype=float32), 'loss_cross_entropy': Array(0.1372214, dtype=float32)}
{'loss_inverse': Array(0.0002131, dtype=float32)}


  7%|▋         | 68280/1000000 [3:13:49<34:47:52,  7.44it/s]

{'loss': Array(0.1761636, dtype=float32), 'loss_cross_entropy': Array(0.16621248, dtype=float32)}
{'loss_inverse': Array(0.00012705, dtype=float32)}


  7%|▋         | 68290/1000000 [3:13:51<27:01:50,  9.57it/s]

{'loss': Array(0.15336172, dtype=float32), 'loss_cross_entropy': Array(0.14408022, dtype=float32)}
{'loss_inverse': Array(0.00011024, dtype=float32)}


  7%|▋         | 68300/1000000 [3:13:52<26:18:18,  9.84it/s]

{'loss': Array(0.16749097, dtype=float32), 'loss_cross_entropy': Array(0.15842587, dtype=float32)}
{'loss_inverse': Array(3.5426583e-05, dtype=float32)}


  7%|▋         | 68310/1000000 [3:13:54<30:22:33,  8.52it/s]

{'loss': Array(0.14479576, dtype=float32), 'loss_cross_entropy': Array(0.13611694, dtype=float32)}
{'loss_inverse': Array(0.00023876, dtype=float32)}


  7%|▋         | 68320/1000000 [3:13:55<26:02:45,  9.94it/s]

{'loss': Array(0.16308223, dtype=float32), 'loss_cross_entropy': Array(0.15377755, dtype=float32)}
{'loss_inverse': Array(8.428477e-05, dtype=float32)}


  7%|▋         | 68330/1000000 [3:13:57<31:53:51,  8.11it/s]

{'loss': Array(0.13624212, dtype=float32), 'loss_cross_entropy': Array(0.12746917, dtype=float32)}
{'loss_inverse': Array(3.4390405e-07, dtype=float32)}


  7%|▋         | 68340/1000000 [3:13:59<27:48:49,  9.30it/s]

{'loss': Array(0.13900594, dtype=float32), 'loss_cross_entropy': Array(0.13091053, dtype=float32)}
{'loss_inverse': Array(2.026347e-06, dtype=float32)}


  7%|▋         | 68350/1000000 [3:14:00<34:50:33,  7.43it/s]

{'loss': Array(0.1383404, dtype=float32), 'loss_cross_entropy': Array(0.13023153, dtype=float32)}
{'loss_inverse': Array(0.00042481, dtype=float32)}


  7%|▋         | 68360/1000000 [3:14:02<27:02:20,  9.57it/s]

{'loss': Array(0.15244283, dtype=float32), 'loss_cross_entropy': Array(0.14373796, dtype=float32)}
{'loss_inverse': Array(0.0004933, dtype=float32)}


  7%|▋         | 68370/1000000 [3:14:04<34:57:14,  7.40it/s]

{'loss': Array(0.15512082, dtype=float32), 'loss_cross_entropy': Array(0.14518617, dtype=float32)}
{'loss_inverse': Array(8.4711624e-05, dtype=float32)}


  7%|▋         | 68380/1000000 [3:14:05<26:36:31,  9.73it/s]

{'loss': Array(0.14740685, dtype=float32), 'loss_cross_entropy': Array(0.13854703, dtype=float32)}
{'loss_inverse': Array(0.00013414, dtype=float32)}


  7%|▋         | 68390/1000000 [3:14:06<27:05:02,  9.55it/s]

{'loss': Array(0.15466087, dtype=float32), 'loss_cross_entropy': Array(0.14548932, dtype=float32)}
{'loss_inverse': Array(4.8468137e-05, dtype=float32)}


  7%|▋         | 68400/1000000 [3:14:08<31:44:56,  8.15it/s]

{'loss': Array(0.15652116, dtype=float32), 'loss_cross_entropy': Array(0.14781313, dtype=float32)}
{'loss_inverse': Array(0.00012264, dtype=float32)}


  7%|▋         | 68410/1000000 [3:14:10<26:49:57,  9.64it/s]

{'loss': Array(0.15335079, dtype=float32), 'loss_cross_entropy': Array(0.14458385, dtype=float32)}
{'loss_inverse': Array(5.9703557e-06, dtype=float32)}


  7%|▋         | 68420/1000000 [3:14:11<38:33:52,  6.71it/s]

{'loss': Array(0.14323674, dtype=float32), 'loss_cross_entropy': Array(0.13565038, dtype=float32)}
{'loss_inverse': Array(0.00010394, dtype=float32)}


  7%|▋         | 68430/1000000 [3:14:13<28:39:16,  9.03it/s]

{'loss': Array(0.1534529, dtype=float32), 'loss_cross_entropy': Array(0.14477248, dtype=float32)}
{'loss_inverse': Array(3.7958343e-06, dtype=float32)}


  7%|▋         | 68440/1000000 [3:14:14<25:52:13, 10.00it/s]

{'loss': Array(0.1691127, dtype=float32), 'loss_cross_entropy': Array(0.15946661, dtype=float32)}
{'loss_inverse': Array(0.0002337, dtype=float32)}


  7%|▋         | 68450/1000000 [3:14:16<29:12:35,  8.86it/s]

{'loss': Array(0.1545655, dtype=float32), 'loss_cross_entropy': Array(0.14615889, dtype=float32)}
{'loss_inverse': Array(4.7823714e-06, dtype=float32)}


  7%|▋         | 68460/1000000 [3:14:18<26:31:44,  9.75it/s]

{'loss': Array(0.1463605, dtype=float32), 'loss_cross_entropy': Array(0.13734536, dtype=float32)}
{'loss_inverse': Array(5.810924e-05, dtype=float32)}


  7%|▋         | 68470/1000000 [3:14:19<28:51:00,  8.97it/s]

{'loss': Array(0.15981703, dtype=float32), 'loss_cross_entropy': Array(0.15084997, dtype=float32)}
{'loss_inverse': Array(6.644983e-05, dtype=float32)}


  7%|▋         | 68480/1000000 [3:14:21<25:52:28, 10.00it/s]

{'loss': Array(0.1435827, dtype=float32), 'loss_cross_entropy': Array(0.13428491, dtype=float32)}
{'loss_inverse': Array(3.056662e-05, dtype=float32)}


  7%|▋         | 68490/1000000 [3:14:23<31:36:13,  8.19it/s]

{'loss': Array(0.14192428, dtype=float32), 'loss_cross_entropy': Array(0.13326061, dtype=float32)}
{'loss_inverse': Array(2.6488856e-06, dtype=float32)}


  7%|▋         | 68500/1000000 [3:14:24<26:30:25,  9.76it/s]

{'loss': Array(0.15254138, dtype=float32), 'loss_cross_entropy': Array(0.14382099, dtype=float32)}
{'loss_inverse': Array(4.858202e-06, dtype=float32)}


  7%|▋         | 68510/1000000 [3:14:32<92:50:18,  2.79it/s] 

{'loss': Array(0.15119822, dtype=float32), 'loss_cross_entropy': Array(0.14143214, dtype=float32)}
{'loss_inverse': Array(9.8492696e-05, dtype=float32)}


  7%|▋         | 68520/1000000 [3:14:34<36:44:08,  7.04it/s] 

{'loss': Array(0.15132456, dtype=float32), 'loss_cross_entropy': Array(0.14172678, dtype=float32)}
{'loss_inverse': Array(0.00023127, dtype=float32)}


  7%|▋         | 68530/1000000 [3:14:35<26:52:47,  9.63it/s]

{'loss': Array(0.16505855, dtype=float32), 'loss_cross_entropy': Array(0.15570644, dtype=float32)}
{'loss_inverse': Array(0.00015092, dtype=float32)}


  7%|▋         | 68540/1000000 [3:14:37<28:45:59,  8.99it/s]

{'loss': Array(0.13348584, dtype=float32), 'loss_cross_entropy': Array(0.12511317, dtype=float32)}
{'loss_inverse': Array(3.2476744e-06, dtype=float32)}


  7%|▋         | 68550/1000000 [3:14:38<25:49:01, 10.02it/s]

{'loss': Array(0.13520008, dtype=float32), 'loss_cross_entropy': Array(0.12647744, dtype=float32)}
{'loss_inverse': Array(1.8905941e-05, dtype=float32)}


  7%|▋         | 68560/1000000 [3:14:40<29:52:19,  8.66it/s]

{'loss': Array(0.17320952, dtype=float32), 'loss_cross_entropy': Array(0.16300692, dtype=float32)}
{'loss_inverse': Array(4.3460626e-05, dtype=float32)}


  7%|▋         | 68570/1000000 [3:14:41<25:53:55,  9.99it/s]

{'loss': Array(0.11247311, dtype=float32), 'loss_cross_entropy': Array(0.10546178, dtype=float32)}
{'loss_inverse': Array(7.963284e-07, dtype=float32)}


  7%|▋         | 68580/1000000 [3:14:43<32:51:33,  7.87it/s]

{'loss': Array(0.16777833, dtype=float32), 'loss_cross_entropy': Array(0.1575966, dtype=float32)}
{'loss_inverse': Array(2.41255e-06, dtype=float32)}


  7%|▋         | 68590/1000000 [3:14:45<27:50:11,  9.29it/s]

{'loss': Array(0.14352502, dtype=float32), 'loss_cross_entropy': Array(0.13526382, dtype=float32)}
{'loss_inverse': Array(3.4641082e-06, dtype=float32)}


  7%|▋         | 68600/1000000 [3:14:46<25:23:13, 10.19it/s]

{'loss': Array(0.1503741, dtype=float32), 'loss_cross_entropy': Array(0.14083347, dtype=float32)}
{'loss_inverse': Array(0.00020321, dtype=float32)}


  7%|▋         | 68610/1000000 [3:14:48<29:57:18,  8.64it/s]

{'loss': Array(0.16388421, dtype=float32), 'loss_cross_entropy': Array(0.15490618, dtype=float32)}
{'loss_inverse': Array(1.6626893e-05, dtype=float32)}


  7%|▋         | 68620/1000000 [3:14:49<26:41:23,  9.69it/s]

{'loss': Array(0.17747772, dtype=float32), 'loss_cross_entropy': Array(0.16758023, dtype=float32)}
{'loss_inverse': Array(3.1749376e-05, dtype=float32)}


  7%|▋         | 68630/1000000 [3:14:51<31:57:38,  8.09it/s]

{'loss': Array(0.169088, dtype=float32), 'loss_cross_entropy': Array(0.15921299, dtype=float32)}
{'loss_inverse': Array(1.5160206e-06, dtype=float32)}


  7%|▋         | 68640/1000000 [3:14:53<26:14:17,  9.86it/s]

{'loss': Array(0.15900205, dtype=float32), 'loss_cross_entropy': Array(0.14986935, dtype=float32)}
{'loss_inverse': Array(2.5765728e-05, dtype=float32)}


  7%|▋         | 68650/1000000 [3:14:54<34:13:49,  7.56it/s]

{'loss': Array(0.1501876, dtype=float32), 'loss_cross_entropy': Array(0.14167313, dtype=float32)}
{'loss_inverse': Array(0.00010207, dtype=float32)}


  7%|▋         | 68660/1000000 [3:14:56<26:44:58,  9.67it/s]

{'loss': Array(0.14697023, dtype=float32), 'loss_cross_entropy': Array(0.13851885, dtype=float32)}
{'loss_inverse': Array(2.6914906e-06, dtype=float32)}


  7%|▋         | 68670/1000000 [3:14:57<25:12:46, 10.26it/s]

{'loss': Array(0.1496773, dtype=float32), 'loss_cross_entropy': Array(0.14124498, dtype=float32)}
{'loss_inverse': Array(0.00041278, dtype=float32)}


  7%|▋         | 68680/1000000 [3:14:59<28:25:13,  9.10it/s]

{'loss': Array(0.15677752, dtype=float32), 'loss_cross_entropy': Array(0.14769839, dtype=float32)}
{'loss_inverse': Array(8.414426e-05, dtype=float32)}


  7%|▋         | 68690/1000000 [3:15:00<26:04:33,  9.92it/s]

{'loss': Array(0.13203086, dtype=float32), 'loss_cross_entropy': Array(0.12354562, dtype=float32)}
{'loss_inverse': Array(6.9220776e-05, dtype=float32)}


  7%|▋         | 68700/1000000 [3:15:02<34:10:07,  7.57it/s]

{'loss': Array(0.16452897, dtype=float32), 'loss_cross_entropy': Array(0.1550426, dtype=float32)}
{'loss_inverse': Array(4.0221653e-06, dtype=float32)}


  7%|▋         | 68710/1000000 [3:15:03<26:47:36,  9.65it/s]

{'loss': Array(0.15799205, dtype=float32), 'loss_cross_entropy': Array(0.14839618, dtype=float32)}
{'loss_inverse': Array(2.0720412e-05, dtype=float32)}


  7%|▋         | 68720/1000000 [3:15:05<25:35:09, 10.11it/s]

{'loss': Array(0.13576311, dtype=float32), 'loss_cross_entropy': Array(0.12779668, dtype=float32)}
{'loss_inverse': Array(0.00033118, dtype=float32)}


  7%|▋         | 68730/1000000 [3:15:07<28:29:22,  9.08it/s]

{'loss': Array(0.14494096, dtype=float32), 'loss_cross_entropy': Array(0.136614, dtype=float32)}
{'loss_inverse': Array(4.2242787e-06, dtype=float32)}


  7%|▋         | 68740/1000000 [3:15:08<26:25:05,  9.79it/s]

{'loss': Array(0.14656512, dtype=float32), 'loss_cross_entropy': Array(0.1376798, dtype=float32)}
{'loss_inverse': Array(0.00012358, dtype=float32)}


  7%|▋         | 68750/1000000 [3:15:10<29:51:33,  8.66it/s]

{'loss': Array(0.1713192, dtype=float32), 'loss_cross_entropy': Array(0.16218649, dtype=float32)}
{'loss_inverse': Array(4.3763597e-05, dtype=float32)}


  7%|▋         | 68759/1000000 [3:15:11<27:06:21,  9.54it/s]

{'loss': Array(0.15005231, dtype=float32), 'loss_cross_entropy': Array(0.1418163, dtype=float32)}
{'loss_inverse': Array(3.0258225e-05, dtype=float32)}


  7%|▋         | 68769/1000000 [3:15:13<32:05:25,  8.06it/s]

{'loss': Array(0.1602999, dtype=float32), 'loss_cross_entropy': Array(0.1512407, dtype=float32)}
{'loss_inverse': Array(3.9961556e-06, dtype=float32)}


  7%|▋         | 68779/1000000 [3:15:14<26:28:22,  9.77it/s]

{'loss': Array(0.17117295, dtype=float32), 'loss_cross_entropy': Array(0.1612218, dtype=float32)}
{'loss_inverse': Array(3.2763728e-05, dtype=float32)}


  7%|▋         | 68789/1000000 [3:15:16<25:52:13, 10.00it/s]

{'loss': Array(0.1579375, dtype=float32), 'loss_cross_entropy': Array(0.14912738, dtype=float32)}
{'loss_inverse': Array(2.9808021e-05, dtype=float32)}


  7%|▋         | 68799/1000000 [3:15:17<30:13:06,  8.56it/s]

{'loss': Array(0.14172833, dtype=float32), 'loss_cross_entropy': Array(0.13330029, dtype=float32)}
{'loss_inverse': Array(0.00045972, dtype=float32)}


  7%|▋         | 68809/1000000 [3:15:19<26:49:19,  9.64it/s]

{'loss': Array(0.1573937, dtype=float32), 'loss_cross_entropy': Array(0.14791875, dtype=float32)}
{'loss_inverse': Array(5.3467284e-05, dtype=float32)}


  7%|▋         | 68819/1000000 [3:15:21<35:32:33,  7.28it/s]

{'loss': Array(0.14465247, dtype=float32), 'loss_cross_entropy': Array(0.13586505, dtype=float32)}
{'loss_inverse': Array(0.00013381, dtype=float32)}


  7%|▋         | 68829/1000000 [3:15:22<26:42:31,  9.68it/s]

{'loss': Array(0.1437863, dtype=float32), 'loss_cross_entropy': Array(0.13569778, dtype=float32)}
{'loss_inverse': Array(2.920352e-05, dtype=float32)}


  7%|▋         | 68839/1000000 [3:15:24<38:18:29,  6.75it/s]

{'loss': Array(0.14364928, dtype=float32), 'loss_cross_entropy': Array(0.1350485, dtype=float32)}
{'loss_inverse': Array(5.284091e-05, dtype=float32)}


  7%|▋         | 68849/1000000 [3:15:25<27:50:17,  9.29it/s]

{'loss': Array(0.15170693, dtype=float32), 'loss_cross_entropy': Array(0.14233916, dtype=float32)}
{'loss_inverse': Array(3.291344e-05, dtype=float32)}


  7%|▋         | 68859/1000000 [3:15:27<39:30:31,  6.55it/s]

{'loss': Array(0.16867435, dtype=float32), 'loss_cross_entropy': Array(0.15859023, dtype=float32)}
{'loss_inverse': Array(3.4913628e-06, dtype=float32)}


  7%|▋         | 68869/1000000 [3:15:29<28:01:34,  9.23it/s]

{'loss': Array(0.14704627, dtype=float32), 'loss_cross_entropy': Array(0.13867585, dtype=float32)}
{'loss_inverse': Array(2.4494817e-05, dtype=float32)}


  7%|▋         | 68879/1000000 [3:15:30<25:30:05, 10.14it/s]

{'loss': Array(0.14554165, dtype=float32), 'loss_cross_entropy': Array(0.13738413, dtype=float32)}
{'loss_inverse': Array(9.916394e-06, dtype=float32)}


  7%|▋         | 68889/1000000 [3:15:32<34:13:42,  7.56it/s]

{'loss': Array(0.15269826, dtype=float32), 'loss_cross_entropy': Array(0.14341359, dtype=float32)}
{'loss_inverse': Array(3.3057503e-07, dtype=float32)}


  7%|▋         | 68899/1000000 [3:15:33<27:29:21,  9.41it/s]

{'loss': Array(0.15711683, dtype=float32), 'loss_cross_entropy': Array(0.14835405, dtype=float32)}
{'loss_inverse': Array(1.32653795e-05, dtype=float32)}


  7%|▋         | 68909/1000000 [3:15:35<38:46:56,  6.67it/s]

{'loss': Array(0.14417928, dtype=float32), 'loss_cross_entropy': Array(0.13592367, dtype=float32)}
{'loss_inverse': Array(4.576755e-05, dtype=float32)}


  7%|▋         | 68919/1000000 [3:15:36<27:27:40,  9.42it/s]

{'loss': Array(0.16858771, dtype=float32), 'loss_cross_entropy': Array(0.15842317, dtype=float32)}
{'loss_inverse': Array(5.6735083e-05, dtype=float32)}


  7%|▋         | 68929/1000000 [3:15:38<26:18:41,  9.83it/s]

{'loss': Array(0.1426884, dtype=float32), 'loss_cross_entropy': Array(0.13506651, dtype=float32)}
{'loss_inverse': Array(0.00015909, dtype=float32)}


  7%|▋         | 68939/1000000 [3:15:40<28:25:22,  9.10it/s]

{'loss': Array(0.16594973, dtype=float32), 'loss_cross_entropy': Array(0.15654683, dtype=float32)}
{'loss_inverse': Array(0.00024725, dtype=float32)}


  7%|▋         | 68949/1000000 [3:15:41<25:39:45, 10.08it/s]

{'loss': Array(0.15339184, dtype=float32), 'loss_cross_entropy': Array(0.14437574, dtype=float32)}
{'loss_inverse': Array(3.4553002e-05, dtype=float32)}


  7%|▋         | 68959/1000000 [3:15:43<34:04:45,  7.59it/s]

{'loss': Array(0.16153316, dtype=float32), 'loss_cross_entropy': Array(0.15262787, dtype=float32)}
{'loss_inverse': Array(4.6557918e-05, dtype=float32)}


  7%|▋         | 68969/1000000 [3:15:44<27:00:02,  9.58it/s]

{'loss': Array(0.13038662, dtype=float32), 'loss_cross_entropy': Array(0.12211054, dtype=float32)}
{'loss_inverse': Array(5.058335e-07, dtype=float32)}


  7%|▋         | 68979/1000000 [3:15:45<25:23:30, 10.19it/s]

{'loss': Array(0.15255128, dtype=float32), 'loss_cross_entropy': Array(0.14301139, dtype=float32)}
{'loss_inverse': Array(6.4037355e-05, dtype=float32)}


  7%|▋         | 68989/1000000 [3:15:47<29:16:52,  8.83it/s]

{'loss': Array(0.15382013, dtype=float32), 'loss_cross_entropy': Array(0.14494085, dtype=float32)}
{'loss_inverse': Array(4.639202e-06, dtype=float32)}


  7%|▋         | 68999/1000000 [3:15:49<26:04:30,  9.92it/s]

{'loss': Array(0.15775798, dtype=float32), 'loss_cross_entropy': Array(0.14819492, dtype=float32)}
{'loss_inverse': Array(0.00034605, dtype=float32)}


  7%|▋         | 69009/1000000 [3:15:57<92:04:58,  2.81it/s] 

{'loss': Array(0.15969335, dtype=float32), 'loss_cross_entropy': Array(0.15109102, dtype=float32)}
{'loss_inverse': Array(6.898923e-05, dtype=float32)}


  7%|▋         | 69019/1000000 [3:15:58<36:37:23,  7.06it/s]

{'loss': Array(0.17112409, dtype=float32), 'loss_cross_entropy': Array(0.16089676, dtype=float32)}
{'loss_inverse': Array(2.0395197e-05, dtype=float32)}


  7%|▋         | 69029/1000000 [3:16:00<33:22:38,  7.75it/s]

{'loss': Array(0.15539221, dtype=float32), 'loss_cross_entropy': Array(0.14573641, dtype=float32)}
{'loss_inverse': Array(3.8281587e-06, dtype=float32)}


  7%|▋         | 69039/1000000 [3:16:01<26:32:40,  9.74it/s]

{'loss': Array(0.12707518, dtype=float32), 'loss_cross_entropy': Array(0.11847756, dtype=float32)}
{'loss_inverse': Array(1.5855248e-05, dtype=float32)}


  7%|▋         | 69049/1000000 [3:16:03<34:27:13,  7.51it/s]

{'loss': Array(0.16222301, dtype=float32), 'loss_cross_entropy': Array(0.15335234, dtype=float32)}
{'loss_inverse': Array(1.8035324e-06, dtype=float32)}


  7%|▋         | 69059/1000000 [3:16:05<27:26:04,  9.43it/s]

{'loss': Array(0.1624695, dtype=float32), 'loss_cross_entropy': Array(0.153283, dtype=float32)}
{'loss_inverse': Array(1.454803e-06, dtype=float32)}


  7%|▋         | 69069/1000000 [3:16:06<26:07:30,  9.90it/s]

{'loss': Array(0.16198243, dtype=float32), 'loss_cross_entropy': Array(0.15233843, dtype=float32)}
{'loss_inverse': Array(1.1483502e-06, dtype=float32)}


  7%|▋         | 69079/1000000 [3:16:08<30:03:07,  8.60it/s]

{'loss': Array(0.14909269, dtype=float32), 'loss_cross_entropy': Array(0.1405684, dtype=float32)}
{'loss_inverse': Array(4.9568553e-06, dtype=float32)}


  7%|▋         | 69089/1000000 [3:16:09<26:16:02,  9.84it/s]

{'loss': Array(0.15548697, dtype=float32), 'loss_cross_entropy': Array(0.14622332, dtype=float32)}
{'loss_inverse': Array(2.191252e-05, dtype=float32)}


  7%|▋         | 69099/1000000 [3:16:11<31:59:40,  8.08it/s]

{'loss': Array(0.14958929, dtype=float32), 'loss_cross_entropy': Array(0.14086412, dtype=float32)}
{'loss_inverse': Array(0.00014267, dtype=float32)}


  7%|▋         | 69109/1000000 [3:16:12<26:43:09,  9.68it/s]

{'loss': Array(0.1386571, dtype=float32), 'loss_cross_entropy': Array(0.13001183, dtype=float32)}
{'loss_inverse': Array(6.802967e-05, dtype=float32)}


  7%|▋         | 69119/1000000 [3:16:14<31:44:36,  8.15it/s]

{'loss': Array(0.14470786, dtype=float32), 'loss_cross_entropy': Array(0.13623899, dtype=float32)}
{'loss_inverse': Array(0.0001726, dtype=float32)}


  7%|▋         | 69129/1000000 [3:16:16<26:53:04,  9.62it/s]

{'loss': Array(0.13039692, dtype=float32), 'loss_cross_entropy': Array(0.12249211, dtype=float32)}
{'loss_inverse': Array(5.2874947e-07, dtype=float32)}


  7%|▋         | 69139/1000000 [3:16:17<32:38:54,  7.92it/s]

{'loss': Array(0.15838097, dtype=float32), 'loss_cross_entropy': Array(0.14949958, dtype=float32)}
{'loss_inverse': Array(8.3941195e-06, dtype=float32)}


  7%|▋         | 69149/1000000 [3:16:19<26:11:00,  9.88it/s]

{'loss': Array(0.16334847, dtype=float32), 'loss_cross_entropy': Array(0.15389673, dtype=float32)}
{'loss_inverse': Array(2.4398913e-05, dtype=float32)}


  7%|▋         | 69159/1000000 [3:16:21<34:53:53,  7.41it/s]

{'loss': Array(0.13389313, dtype=float32), 'loss_cross_entropy': Array(0.12618499, dtype=float32)}
{'loss_inverse': Array(1.2130316e-05, dtype=float32)}


  7%|▋         | 69169/1000000 [3:16:22<26:59:35,  9.58it/s]

{'loss': Array(0.13787858, dtype=float32), 'loss_cross_entropy': Array(0.1302302, dtype=float32)}
{'loss_inverse': Array(4.0885276e-05, dtype=float32)}


  7%|▋         | 69179/1000000 [3:16:24<38:52:53,  6.65it/s]

{'loss': Array(0.14235726, dtype=float32), 'loss_cross_entropy': Array(0.13403936, dtype=float32)}
{'loss_inverse': Array(2.7722164e-05, dtype=float32)}


  7%|▋         | 69189/1000000 [3:16:25<27:32:33,  9.39it/s]

{'loss': Array(0.13125277, dtype=float32), 'loss_cross_entropy': Array(0.12376092, dtype=float32)}
{'loss_inverse': Array(1.0508076e-06, dtype=float32)}


  7%|▋         | 69199/1000000 [3:16:27<38:48:38,  6.66it/s]

{'loss': Array(0.15888172, dtype=float32), 'loss_cross_entropy': Array(0.14873032, dtype=float32)}
{'loss_inverse': Array(1.0961277e-05, dtype=float32)}


  7%|▋         | 69209/1000000 [3:16:28<27:20:42,  9.46it/s]

{'loss': Array(0.14865111, dtype=float32), 'loss_cross_entropy': Array(0.13984935, dtype=float32)}
{'loss_inverse': Array(2.7092412e-05, dtype=float32)}


  7%|▋         | 69219/1000000 [3:16:30<25:43:05, 10.05it/s]

{'loss': Array(0.16129266, dtype=float32), 'loss_cross_entropy': Array(0.15177119, dtype=float32)}
{'loss_inverse': Array(7.164194e-07, dtype=float32)}


  7%|▋         | 69229/1000000 [3:16:32<31:40:15,  8.16it/s]

{'loss': Array(0.15879409, dtype=float32), 'loss_cross_entropy': Array(0.14938454, dtype=float32)}
{'loss_inverse': Array(0.00016805, dtype=float32)}


  7%|▋         | 69239/1000000 [3:16:33<27:16:18,  9.48it/s]

{'loss': Array(0.15377437, dtype=float32), 'loss_cross_entropy': Array(0.14552505, dtype=float32)}
{'loss_inverse': Array(5.756402e-06, dtype=float32)}


  7%|▋         | 69249/1000000 [3:16:35<35:11:56,  7.35it/s]

{'loss': Array(0.1559287, dtype=float32), 'loss_cross_entropy': Array(0.14701068, dtype=float32)}
{'loss_inverse': Array(6.4311265e-05, dtype=float32)}


  7%|▋         | 69259/1000000 [3:16:36<26:40:28,  9.69it/s]

{'loss': Array(0.14297646, dtype=float32), 'loss_cross_entropy': Array(0.13412532, dtype=float32)}
{'loss_inverse': Array(5.2720397e-06, dtype=float32)}


  7%|▋         | 69269/1000000 [3:16:38<35:13:42,  7.34it/s]

{'loss': Array(0.1632222, dtype=float32), 'loss_cross_entropy': Array(0.15438604, dtype=float32)}
{'loss_inverse': Array(2.1592146e-05, dtype=float32)}


  7%|▋         | 69279/1000000 [3:16:40<27:46:29,  9.31it/s]

{'loss': Array(0.15124223, dtype=float32), 'loss_cross_entropy': Array(0.14197461, dtype=float32)}
{'loss_inverse': Array(1.5579926e-05, dtype=float32)}


  7%|▋         | 69289/1000000 [3:16:41<39:19:42,  6.57it/s]

{'loss': Array(0.1591889, dtype=float32), 'loss_cross_entropy': Array(0.14959975, dtype=float32)}
{'loss_inverse': Array(1.0179157e-05, dtype=float32)}


  7%|▋         | 69299/1000000 [3:16:43<28:03:19,  9.21it/s]

{'loss': Array(0.15430395, dtype=float32), 'loss_cross_entropy': Array(0.14532232, dtype=float32)}
{'loss_inverse': Array(2.947007e-07, dtype=float32)}


  7%|▋         | 69309/1000000 [3:16:44<25:43:42, 10.05it/s]

{'loss': Array(0.13162193, dtype=float32), 'loss_cross_entropy': Array(0.12372606, dtype=float32)}
{'loss_inverse': Array(7.125367e-07, dtype=float32)}


  7%|▋         | 69319/1000000 [3:16:46<32:35:00,  7.93it/s]

{'loss': Array(0.15014388, dtype=float32), 'loss_cross_entropy': Array(0.14203706, dtype=float32)}
{'loss_inverse': Array(4.0076913e-05, dtype=float32)}


  7%|▋         | 69329/1000000 [3:16:47<26:39:04,  9.70it/s]

{'loss': Array(0.16201891, dtype=float32), 'loss_cross_entropy': Array(0.1530838, dtype=float32)}
{'loss_inverse': Array(5.684424e-06, dtype=float32)}


  7%|▋         | 69339/1000000 [3:16:49<38:34:19,  6.70it/s]

{'loss': Array(0.14428303, dtype=float32), 'loss_cross_entropy': Array(0.13585697, dtype=float32)}
{'loss_inverse': Array(2.24261e-06, dtype=float32)}


  7%|▋         | 69349/1000000 [3:16:51<27:56:09,  9.25it/s]

{'loss': Array(0.13143492, dtype=float32), 'loss_cross_entropy': Array(0.12350811, dtype=float32)}
{'loss_inverse': Array(0.00015498, dtype=float32)}


  7%|▋         | 69359/1000000 [3:16:52<25:58:51,  9.95it/s]

{'loss': Array(0.14836685, dtype=float32), 'loss_cross_entropy': Array(0.1395543, dtype=float32)}
{'loss_inverse': Array(3.5603687e-06, dtype=float32)}


  7%|▋         | 69369/1000000 [3:16:54<29:52:19,  8.65it/s]

{'loss': Array(0.1412326, dtype=float32), 'loss_cross_entropy': Array(0.13282003, dtype=float32)}
{'loss_inverse': Array(4.6669084e-05, dtype=float32)}


  7%|▋         | 69379/1000000 [3:16:55<26:11:02,  9.87it/s]

{'loss': Array(0.14751692, dtype=float32), 'loss_cross_entropy': Array(0.13840169, dtype=float32)}
{'loss_inverse': Array(9.387539e-05, dtype=float32)}


  7%|▋         | 69389/1000000 [3:16:57<30:00:55,  8.61it/s]

{'loss': Array(0.14147256, dtype=float32), 'loss_cross_entropy': Array(0.13283373, dtype=float32)}
{'loss_inverse': Array(0.00038277, dtype=float32)}


  7%|▋         | 69399/1000000 [3:16:59<27:11:38,  9.51it/s]

{'loss': Array(0.13505399, dtype=float32), 'loss_cross_entropy': Array(0.12652534, dtype=float32)}
{'loss_inverse': Array(7.2208604e-05, dtype=float32)}


  7%|▋         | 69409/1000000 [3:17:00<31:55:44,  8.10it/s]

{'loss': Array(0.15678601, dtype=float32), 'loss_cross_entropy': Array(0.1475449, dtype=float32)}
{'loss_inverse': Array(2.1904438e-05, dtype=float32)}


  7%|▋         | 69419/1000000 [3:17:02<26:15:08,  9.85it/s]

{'loss': Array(0.15340857, dtype=float32), 'loss_cross_entropy': Array(0.14446421, dtype=float32)}
{'loss_inverse': Array(6.0242714e-05, dtype=float32)}


  7%|▋         | 69429/1000000 [3:17:03<31:35:03,  8.18it/s]

{'loss': Array(0.15828633, dtype=float32), 'loss_cross_entropy': Array(0.14886864, dtype=float32)}
{'loss_inverse': Array(1.7628709e-05, dtype=float32)}


  7%|▋         | 69439/1000000 [3:17:05<26:20:46,  9.81it/s]

{'loss': Array(0.155668, dtype=float32), 'loss_cross_entropy': Array(0.14737505, dtype=float32)}
{'loss_inverse': Array(0.00011011, dtype=float32)}


  7%|▋         | 69449/1000000 [3:17:06<25:55:32,  9.97it/s]

{'loss': Array(0.15391384, dtype=float32), 'loss_cross_entropy': Array(0.1450687, dtype=float32)}
{'loss_inverse': Array(1.5114631e-06, dtype=float32)}


  7%|▋         | 69459/1000000 [3:17:08<29:07:58,  8.87it/s]

{'loss': Array(0.17530958, dtype=float32), 'loss_cross_entropy': Array(0.16575585, dtype=float32)}
{'loss_inverse': Array(1.6696697e-06, dtype=float32)}


  7%|▋         | 69469/1000000 [3:17:10<26:05:42,  9.91it/s]

{'loss': Array(0.14483535, dtype=float32), 'loss_cross_entropy': Array(0.13656648, dtype=float32)}
{'loss_inverse': Array(0.00014705, dtype=float32)}


  7%|▋         | 69479/1000000 [3:17:11<30:21:37,  8.51it/s]

{'loss': Array(0.14394623, dtype=float32), 'loss_cross_entropy': Array(0.13595091, dtype=float32)}
{'loss_inverse': Array(3.620687e-05, dtype=float32)}


  7%|▋         | 69489/1000000 [3:17:13<26:21:25,  9.81it/s]

{'loss': Array(0.13773645, dtype=float32), 'loss_cross_entropy': Array(0.12976055, dtype=float32)}
{'loss_inverse': Array(3.6503734e-06, dtype=float32)}


  7%|▋         | 69499/1000000 [3:17:15<32:07:37,  8.05it/s]

{'loss': Array(0.14987741, dtype=float32), 'loss_cross_entropy': Array(0.14078784, dtype=float32)}
{'loss_inverse': Array(0.00013075, dtype=float32)}


  7%|▋         | 69509/1000000 [3:17:23<87:40:19,  2.95it/s] 

{'loss': Array(0.14626367, dtype=float32), 'loss_cross_entropy': Array(0.13724966, dtype=float32)}
{'loss_inverse': Array(2.8463384e-05, dtype=float32)}


  7%|▋         | 69519/1000000 [3:17:24<44:34:11,  5.80it/s]

{'loss': Array(0.18492758, dtype=float32), 'loss_cross_entropy': Array(0.1747459, dtype=float32)}
{'loss_inverse': Array(1.8306175e-06, dtype=float32)}


  7%|▋         | 69529/1000000 [3:17:26<28:21:40,  9.11it/s]

{'loss': Array(0.15015219, dtype=float32), 'loss_cross_entropy': Array(0.14169827, dtype=float32)}
{'loss_inverse': Array(5.5445616e-05, dtype=float32)}


  7%|▋         | 69539/1000000 [3:17:27<25:29:16, 10.14it/s]

{'loss': Array(0.1436877, dtype=float32), 'loss_cross_entropy': Array(0.13492347, dtype=float32)}
{'loss_inverse': Array(6.3406005e-06, dtype=float32)}


  7%|▋         | 69549/1000000 [3:17:29<28:34:03,  9.05it/s]

{'loss': Array(0.1296257, dtype=float32), 'loss_cross_entropy': Array(0.12167954, dtype=float32)}
{'loss_inverse': Array(4.863577e-06, dtype=float32)}


  7%|▋         | 69559/1000000 [3:17:30<25:54:11,  9.98it/s]

{'loss': Array(0.15436743, dtype=float32), 'loss_cross_entropy': Array(0.14560243, dtype=float32)}
{'loss_inverse': Array(3.2512385e-06, dtype=float32)}


  7%|▋         | 69569/1000000 [3:17:32<31:54:16,  8.10it/s]

{'loss': Array(0.1314725, dtype=float32), 'loss_cross_entropy': Array(0.12290481, dtype=float32)}
{'loss_inverse': Array(2.2081991e-05, dtype=float32)}


  7%|▋         | 69579/1000000 [3:17:33<26:45:39,  9.66it/s]

{'loss': Array(0.15428251, dtype=float32), 'loss_cross_entropy': Array(0.14558285, dtype=float32)}
{'loss_inverse': Array(1.5795013e-06, dtype=float32)}


  7%|▋         | 69589/1000000 [3:17:35<34:18:38,  7.53it/s]

{'loss': Array(0.14454176, dtype=float32), 'loss_cross_entropy': Array(0.13613202, dtype=float32)}
{'loss_inverse': Array(3.5160698e-05, dtype=float32)}


  7%|▋         | 69599/1000000 [3:17:37<27:08:19,  9.52it/s]

{'loss': Array(0.14780907, dtype=float32), 'loss_cross_entropy': Array(0.13912421, dtype=float32)}
{'loss_inverse': Array(0.00020446, dtype=float32)}


  7%|▋         | 69609/1000000 [3:17:38<39:19:15,  6.57it/s]

{'loss': Array(0.1665783, dtype=float32), 'loss_cross_entropy': Array(0.15746649, dtype=float32)}
{'loss_inverse': Array(5.271622e-06, dtype=float32)}


  7%|▋         | 69619/1000000 [3:17:40<27:42:10,  9.33it/s]

{'loss': Array(0.15919255, dtype=float32), 'loss_cross_entropy': Array(0.15011488, dtype=float32)}
{'loss_inverse': Array(2.8225704e-05, dtype=float32)}


  7%|▋         | 69629/1000000 [3:17:42<38:19:57,  6.74it/s]

{'loss': Array(0.14383405, dtype=float32), 'loss_cross_entropy': Array(0.13544516, dtype=float32)}
{'loss_inverse': Array(4.6334724e-05, dtype=float32)}


  7%|▋         | 69639/1000000 [3:17:43<27:38:58,  9.35it/s]

{'loss': Array(0.15579008, dtype=float32), 'loss_cross_entropy': Array(0.14644572, dtype=float32)}
{'loss_inverse': Array(4.167018e-05, dtype=float32)}


  7%|▋         | 69649/1000000 [3:17:44<25:35:03, 10.10it/s]

{'loss': Array(0.15941243, dtype=float32), 'loss_cross_entropy': Array(0.15072279, dtype=float32)}
{'loss_inverse': Array(2.3603052e-05, dtype=float32)}


  7%|▋         | 69659/1000000 [3:17:46<31:28:17,  8.21it/s]

{'loss': Array(0.13941127, dtype=float32), 'loss_cross_entropy': Array(0.13147777, dtype=float32)}
{'loss_inverse': Array(3.739104e-05, dtype=float32)}


  7%|▋         | 69669/1000000 [3:17:48<27:01:29,  9.56it/s]

{'loss': Array(0.15858473, dtype=float32), 'loss_cross_entropy': Array(0.14936773, dtype=float32)}
{'loss_inverse': Array(6.2942345e-05, dtype=float32)}


  7%|▋         | 69679/1000000 [3:17:49<34:43:30,  7.44it/s]

{'loss': Array(0.16603474, dtype=float32), 'loss_cross_entropy': Array(0.15618876, dtype=float32)}
{'loss_inverse': Array(0.00028297, dtype=float32)}


  7%|▋         | 69689/1000000 [3:17:51<27:08:40,  9.52it/s]

{'loss': Array(0.14540073, dtype=float32), 'loss_cross_entropy': Array(0.13674133, dtype=float32)}
{'loss_inverse': Array(3.5534365e-05, dtype=float32)}


  7%|▋         | 69699/1000000 [3:17:53<34:42:06,  7.45it/s]

{'loss': Array(0.1474544, dtype=float32), 'loss_cross_entropy': Array(0.1388613, dtype=float32)}
{'loss_inverse': Array(0.00020542, dtype=float32)}


  7%|▋         | 69709/1000000 [3:17:54<27:25:12,  9.42it/s]

{'loss': Array(0.14021985, dtype=float32), 'loss_cross_entropy': Array(0.13171844, dtype=float32)}
{'loss_inverse': Array(7.67061e-05, dtype=float32)}


  7%|▋         | 69719/1000000 [3:17:56<38:45:49,  6.67it/s]

{'loss': Array(0.13962488, dtype=float32), 'loss_cross_entropy': Array(0.13130125, dtype=float32)}
{'loss_inverse': Array(1.4263214e-05, dtype=float32)}


  7%|▋         | 69729/1000000 [3:17:57<27:53:50,  9.26it/s]

{'loss': Array(0.14281766, dtype=float32), 'loss_cross_entropy': Array(0.1344999, dtype=float32)}
{'loss_inverse': Array(1.7865418e-07, dtype=float32)}


  7%|▋         | 69739/1000000 [3:17:59<25:28:32, 10.14it/s]

{'loss': Array(0.17874737, dtype=float32), 'loss_cross_entropy': Array(0.1686016, dtype=float32)}
{'loss_inverse': Array(2.1856951e-05, dtype=float32)}


  7%|▋         | 69749/1000000 [3:18:00<31:22:30,  8.24it/s]

{'loss': Array(0.14449878, dtype=float32), 'loss_cross_entropy': Array(0.13600968, dtype=float32)}
{'loss_inverse': Array(4.566104e-05, dtype=float32)}


  7%|▋         | 69759/1000000 [3:18:02<26:55:04,  9.60it/s]

{'loss': Array(0.16238151, dtype=float32), 'loss_cross_entropy': Array(0.15263708, dtype=float32)}
{'loss_inverse': Array(2.2152502e-05, dtype=float32)}


  7%|▋         | 69769/1000000 [3:18:04<38:23:38,  6.73it/s]

{'loss': Array(0.1503294, dtype=float32), 'loss_cross_entropy': Array(0.14187606, dtype=float32)}
{'loss_inverse': Array(1.6257558e-05, dtype=float32)}


  7%|▋         | 69779/1000000 [3:18:05<27:39:44,  9.34it/s]

{'loss': Array(0.14603047, dtype=float32), 'loss_cross_entropy': Array(0.13696758, dtype=float32)}
{'loss_inverse': Array(1.4494614e-06, dtype=float32)}


  7%|▋         | 69789/1000000 [3:18:06<26:23:01,  9.79it/s]

{'loss': Array(0.13558133, dtype=float32), 'loss_cross_entropy': Array(0.12775908, dtype=float32)}
{'loss_inverse': Array(5.082693e-06, dtype=float32)}


  7%|▋         | 69799/1000000 [3:18:08<29:21:56,  8.80it/s]

{'loss': Array(0.15352605, dtype=float32), 'loss_cross_entropy': Array(0.1450219, dtype=float32)}
{'loss_inverse': Array(5.6938217e-05, dtype=float32)}


  7%|▋         | 69809/1000000 [3:18:10<25:45:44, 10.03it/s]

{'loss': Array(0.14810573, dtype=float32), 'loss_cross_entropy': Array(0.1392444, dtype=float32)}
{'loss_inverse': Array(5.825307e-05, dtype=float32)}


  7%|▋         | 69819/1000000 [3:18:11<29:25:13,  8.78it/s]

{'loss': Array(0.14667378, dtype=float32), 'loss_cross_entropy': Array(0.13833325, dtype=float32)}
{'loss_inverse': Array(8.864662e-06, dtype=float32)}


  7%|▋         | 69829/1000000 [3:18:13<26:42:14,  9.68it/s]

{'loss': Array(0.14752658, dtype=float32), 'loss_cross_entropy': Array(0.13881247, dtype=float32)}
{'loss_inverse': Array(1.1311048e-05, dtype=float32)}


  7%|▋         | 69839/1000000 [3:18:15<32:13:10,  8.02it/s]

{'loss': Array(0.1493409, dtype=float32), 'loss_cross_entropy': Array(0.14046311, dtype=float32)}
{'loss_inverse': Array(0.00018776, dtype=float32)}


  7%|▋         | 69849/1000000 [3:18:16<26:50:35,  9.63it/s]

{'loss': Array(0.15179153, dtype=float32), 'loss_cross_entropy': Array(0.14359891, dtype=float32)}
{'loss_inverse': Array(0.0001356, dtype=float32)}


  7%|▋         | 69859/1000000 [3:18:18<32:42:47,  7.90it/s]

{'loss': Array(0.15631698, dtype=float32), 'loss_cross_entropy': Array(0.14745438, dtype=float32)}
{'loss_inverse': Array(2.8671695e-06, dtype=float32)}


  7%|▋         | 69869/1000000 [3:18:19<27:36:48,  9.36it/s]

{'loss': Array(0.14888647, dtype=float32), 'loss_cross_entropy': Array(0.1404923, dtype=float32)}
{'loss_inverse': Array(1.0642234e-05, dtype=float32)}


  7%|▋         | 69879/1000000 [3:18:21<25:38:55, 10.07it/s]

{'loss': Array(0.14211492, dtype=float32), 'loss_cross_entropy': Array(0.13312505, dtype=float32)}
{'loss_inverse': Array(3.2109172e-05, dtype=float32)}


  7%|▋         | 69889/1000000 [3:18:22<28:05:29,  9.20it/s]

{'loss': Array(0.1536009, dtype=float32), 'loss_cross_entropy': Array(0.1445937, dtype=float32)}
{'loss_inverse': Array(8.8573004e-05, dtype=float32)}


  7%|▋         | 69899/1000000 [3:18:24<25:55:00,  9.97it/s]

{'loss': Array(0.13136944, dtype=float32), 'loss_cross_entropy': Array(0.12328024, dtype=float32)}
{'loss_inverse': Array(1.4528176e-05, dtype=float32)}


  7%|▋         | 69909/1000000 [3:18:26<29:45:50,  8.68it/s]

{'loss': Array(0.15830046, dtype=float32), 'loss_cross_entropy': Array(0.14968818, dtype=float32)}
{'loss_inverse': Array(5.6863595e-05, dtype=float32)}


  7%|▋         | 69919/1000000 [3:18:27<25:50:03, 10.00it/s]

{'loss': Array(0.15814222, dtype=float32), 'loss_cross_entropy': Array(0.1484453, dtype=float32)}
{'loss_inverse': Array(6.1610585e-06, dtype=float32)}


  7%|▋         | 69929/1000000 [3:18:29<32:28:32,  7.96it/s]

{'loss': Array(0.16471808, dtype=float32), 'loss_cross_entropy': Array(0.154925, dtype=float32)}
{'loss_inverse': Array(2.5991294e-06, dtype=float32)}


  7%|▋         | 69939/1000000 [3:18:30<26:39:29,  9.69it/s]

{'loss': Array(0.15926497, dtype=float32), 'loss_cross_entropy': Array(0.15012407, dtype=float32)}
{'loss_inverse': Array(2.29609e-06, dtype=float32)}


  7%|▋         | 69949/1000000 [3:18:32<25:48:16, 10.01it/s]

{'loss': Array(0.17325579, dtype=float32), 'loss_cross_entropy': Array(0.1636416, dtype=float32)}
{'loss_inverse': Array(4.2528976e-05, dtype=float32)}


  7%|▋         | 69959/1000000 [3:18:33<29:51:42,  8.65it/s]

{'loss': Array(0.14564128, dtype=float32), 'loss_cross_entropy': Array(0.13625748, dtype=float32)}
{'loss_inverse': Array(2.437515e-06, dtype=float32)}


  7%|▋         | 69969/1000000 [3:18:35<26:10:00,  9.87it/s]

{'loss': Array(0.17316422, dtype=float32), 'loss_cross_entropy': Array(0.16344714, dtype=float32)}
{'loss_inverse': Array(4.4491706e-05, dtype=float32)}


  7%|▋         | 69979/1000000 [3:18:37<34:44:46,  7.44it/s]

{'loss': Array(0.15163223, dtype=float32), 'loss_cross_entropy': Array(0.14257208, dtype=float32)}
{'loss_inverse': Array(3.3732245e-05, dtype=float32)}


  7%|▋         | 69989/1000000 [3:18:38<27:04:53,  9.54it/s]

{'loss': Array(0.14896181, dtype=float32), 'loss_cross_entropy': Array(0.14004548, dtype=float32)}
{'loss_inverse': Array(4.3172997e-05, dtype=float32)}


  7%|▋         | 69999/1000000 [3:18:40<38:23:24,  6.73it/s]

{'loss': Array(0.15885368, dtype=float32), 'loss_cross_entropy': Array(0.1502447, dtype=float32)}
{'loss_inverse': Array(1.0972223e-07, dtype=float32)}


  7%|▋         | 70009/1000000 [3:18:48<90:44:40,  2.85it/s] 

{'loss': Array(0.14330393, dtype=float32), 'loss_cross_entropy': Array(0.13526765, dtype=float32)}
{'loss_inverse': Array(7.8909296e-05, dtype=float32)}


  7%|▋         | 70019/1000000 [3:18:50<36:31:28,  7.07it/s]

{'loss': Array(0.13850838, dtype=float32), 'loss_cross_entropy': Array(0.13017549, dtype=float32)}
{'loss_inverse': Array(0.00020088, dtype=float32)}


  7%|▋         | 70029/1000000 [3:18:51<31:28:09,  8.21it/s]

{'loss': Array(0.16571923, dtype=float32), 'loss_cross_entropy': Array(0.15601158, dtype=float32)}
{'loss_inverse': Array(1.825628e-06, dtype=float32)}


  7%|▋         | 70039/1000000 [3:18:53<26:42:44,  9.67it/s]

{'loss': Array(0.16499095, dtype=float32), 'loss_cross_entropy': Array(0.15620981, dtype=float32)}
{'loss_inverse': Array(3.8862495e-06, dtype=float32)}


  7%|▋         | 70049/1000000 [3:18:55<30:49:00,  8.38it/s]

{'loss': Array(0.14904775, dtype=float32), 'loss_cross_entropy': Array(0.13936198, dtype=float32)}
{'loss_inverse': Array(1.8282402e-06, dtype=float32)}


  7%|▋         | 70059/1000000 [3:18:56<27:01:46,  9.56it/s]

{'loss': Array(0.13039328, dtype=float32), 'loss_cross_entropy': Array(0.12271743, dtype=float32)}
{'loss_inverse': Array(0.00020721, dtype=float32)}


  7%|▋         | 70069/1000000 [3:18:58<35:16:40,  7.32it/s]

{'loss': Array(0.14773403, dtype=float32), 'loss_cross_entropy': Array(0.13855053, dtype=float32)}
{'loss_inverse': Array(6.805317e-05, dtype=float32)}


  7%|▋         | 70079/1000000 [3:18:59<27:14:14,  9.48it/s]

{'loss': Array(0.12896861, dtype=float32), 'loss_cross_entropy': Array(0.12151601, dtype=float32)}
{'loss_inverse': Array(1.7535864e-05, dtype=float32)}


  7%|▋         | 70089/1000000 [3:19:01<34:54:44,  7.40it/s]

{'loss': Array(0.16214915, dtype=float32), 'loss_cross_entropy': Array(0.15266944, dtype=float32)}
{'loss_inverse': Array(9.4038565e-05, dtype=float32)}


  7%|▋         | 70099/1000000 [3:19:02<27:15:38,  9.48it/s]

{'loss': Array(0.1575058, dtype=float32), 'loss_cross_entropy': Array(0.1486451, dtype=float32)}
{'loss_inverse': Array(0.00016133, dtype=float32)}


  7%|▋         | 70109/1000000 [3:19:04<38:32:17,  6.70it/s]

{'loss': Array(0.15156117, dtype=float32), 'loss_cross_entropy': Array(0.14266498, dtype=float32)}
{'loss_inverse': Array(7.984044e-06, dtype=float32)}


  7%|▋         | 70119/1000000 [3:19:06<27:38:16,  9.35it/s]

{'loss': Array(0.15284859, dtype=float32), 'loss_cross_entropy': Array(0.14409614, dtype=float32)}
{'loss_inverse': Array(1.1436948e-05, dtype=float32)}


  7%|▋         | 70129/1000000 [3:19:07<25:49:08, 10.00it/s]

{'loss': Array(0.1534946, dtype=float32), 'loss_cross_entropy': Array(0.14454414, dtype=float32)}
{'loss_inverse': Array(1.9757672e-06, dtype=float32)}


  7%|▋         | 70139/1000000 [3:19:09<31:37:41,  8.17it/s]

{'loss': Array(0.16327408, dtype=float32), 'loss_cross_entropy': Array(0.15337543, dtype=float32)}
{'loss_inverse': Array(0.00029187, dtype=float32)}


  7%|▋         | 70149/1000000 [3:19:10<26:07:58,  9.88it/s]

{'loss': Array(0.15123962, dtype=float32), 'loss_cross_entropy': Array(0.14199616, dtype=float32)}
{'loss_inverse': Array(9.923649e-05, dtype=float32)}


  7%|▋         | 70159/1000000 [3:19:12<38:24:36,  6.72it/s]

{'loss': Array(0.14890656, dtype=float32), 'loss_cross_entropy': Array(0.13946007, dtype=float32)}
{'loss_inverse': Array(6.405519e-05, dtype=float32)}


  7%|▋         | 70169/1000000 [3:19:13<27:36:46,  9.35it/s]

{'loss': Array(0.14927995, dtype=float32), 'loss_cross_entropy': Array(0.13989267, dtype=float32)}
{'loss_inverse': Array(0.00019529, dtype=float32)}


  7%|▋         | 70179/1000000 [3:19:15<25:44:56, 10.03it/s]

{'loss': Array(0.1498831, dtype=float32), 'loss_cross_entropy': Array(0.14125995, dtype=float32)}
{'loss_inverse': Array(1.9194063e-06, dtype=float32)}


  7%|▋         | 70189/1000000 [3:19:17<29:49:09,  8.66it/s]

{'loss': Array(0.15448943, dtype=float32), 'loss_cross_entropy': Array(0.14578235, dtype=float32)}
{'loss_inverse': Array(1.4962199e-05, dtype=float32)}


  7%|▋         | 70199/1000000 [3:19:18<25:54:47,  9.97it/s]

{'loss': Array(0.13819511, dtype=float32), 'loss_cross_entropy': Array(0.12885147, dtype=float32)}
{'loss_inverse': Array(2.7128024e-06, dtype=float32)}


  7%|▋         | 70209/1000000 [3:19:20<29:53:56,  8.64it/s]

{'loss': Array(0.13606627, dtype=float32), 'loss_cross_entropy': Array(0.12820975, dtype=float32)}
{'loss_inverse': Array(1.6071328e-05, dtype=float32)}


  7%|▋         | 70219/1000000 [3:19:21<26:05:44,  9.90it/s]

{'loss': Array(0.14210482, dtype=float32), 'loss_cross_entropy': Array(0.13364106, dtype=float32)}
{'loss_inverse': Array(4.7445672e-05, dtype=float32)}


  7%|▋         | 70229/1000000 [3:19:23<31:41:39,  8.15it/s]

{'loss': Array(0.15691303, dtype=float32), 'loss_cross_entropy': Array(0.14766364, dtype=float32)}
{'loss_inverse': Array(9.842339e-05, dtype=float32)}


  7%|▋         | 70239/1000000 [3:19:24<26:24:58,  9.78it/s]

{'loss': Array(0.14628997, dtype=float32), 'loss_cross_entropy': Array(0.13768463, dtype=float32)}
{'loss_inverse': Array(1.931258e-06, dtype=float32)}


  7%|▋         | 70249/1000000 [3:19:26<31:30:38,  8.20it/s]

{'loss': Array(0.14383055, dtype=float32), 'loss_cross_entropy': Array(0.13512169, dtype=float32)}
{'loss_inverse': Array(4.2354874e-05, dtype=float32)}


  7%|▋         | 70259/1000000 [3:19:27<26:21:46,  9.80it/s]

{'loss': Array(0.16701767, dtype=float32), 'loss_cross_entropy': Array(0.15735476, dtype=float32)}
{'loss_inverse': Array(0.00013205, dtype=float32)}


  7%|▋         | 70269/1000000 [3:19:29<25:51:20,  9.99it/s]

{'loss': Array(0.15546116, dtype=float32), 'loss_cross_entropy': Array(0.1461191, dtype=float32)}
{'loss_inverse': Array(7.368154e-05, dtype=float32)}


  7%|▋         | 70279/1000000 [3:19:31<28:49:21,  8.96it/s]

{'loss': Array(0.1209827, dtype=float32), 'loss_cross_entropy': Array(0.11356839, dtype=float32)}
{'loss_inverse': Array(5.5952346e-06, dtype=float32)}


  7%|▋         | 70289/1000000 [3:19:32<26:18:41,  9.82it/s]

{'loss': Array(0.14948802, dtype=float32), 'loss_cross_entropy': Array(0.1407743, dtype=float32)}
{'loss_inverse': Array(2.3103126e-05, dtype=float32)}


  7%|▋         | 70299/1000000 [3:19:34<30:03:08,  8.59it/s]

{'loss': Array(0.14311595, dtype=float32), 'loss_cross_entropy': Array(0.1340921, dtype=float32)}
{'loss_inverse': Array(2.0767948e-06, dtype=float32)}


  7%|▋         | 70309/1000000 [3:19:35<26:28:05,  9.76it/s]

{'loss': Array(0.15651846, dtype=float32), 'loss_cross_entropy': Array(0.14752598, dtype=float32)}
{'loss_inverse': Array(9.405766e-06, dtype=float32)}


  7%|▋         | 70319/1000000 [3:19:37<31:53:20,  8.10it/s]

{'loss': Array(0.14136033, dtype=float32), 'loss_cross_entropy': Array(0.13275866, dtype=float32)}
{'loss_inverse': Array(5.409905e-06, dtype=float32)}


  7%|▋         | 70329/1000000 [3:19:38<26:26:06,  9.77it/s]

{'loss': Array(0.13985217, dtype=float32), 'loss_cross_entropy': Array(0.13170888, dtype=float32)}
{'loss_inverse': Array(2.0775038e-05, dtype=float32)}


  7%|▋         | 70339/1000000 [3:19:40<25:22:14, 10.18it/s]

{'loss': Array(0.15015136, dtype=float32), 'loss_cross_entropy': Array(0.14191307, dtype=float32)}
{'loss_inverse': Array(0.00021956, dtype=float32)}


  7%|▋         | 70349/1000000 [3:19:42<30:00:39,  8.60it/s]

{'loss': Array(0.13724677, dtype=float32), 'loss_cross_entropy': Array(0.12950967, dtype=float32)}
{'loss_inverse': Array(4.3458975e-05, dtype=float32)}


  7%|▋         | 70359/1000000 [3:19:43<26:08:29,  9.88it/s]

{'loss': Array(0.14968066, dtype=float32), 'loss_cross_entropy': Array(0.14100306, dtype=float32)}
{'loss_inverse': Array(6.288029e-05, dtype=float32)}


  7%|▋         | 70369/1000000 [3:19:45<34:35:26,  7.47it/s]

{'loss': Array(0.14122261, dtype=float32), 'loss_cross_entropy': Array(0.13234971, dtype=float32)}
{'loss_inverse': Array(6.1185974e-06, dtype=float32)}


  7%|▋         | 70379/1000000 [3:19:46<26:43:06,  9.66it/s]

{'loss': Array(0.15411228, dtype=float32), 'loss_cross_entropy': Array(0.14511757, dtype=float32)}
{'loss_inverse': Array(0.00048312, dtype=float32)}


  7%|▋         | 70389/1000000 [3:19:48<38:24:13,  6.72it/s]

{'loss': Array(0.15344393, dtype=float32), 'loss_cross_entropy': Array(0.14449227, dtype=float32)}
{'loss_inverse': Array(9.2698965e-06, dtype=float32)}


  7%|▋         | 70399/1000000 [3:19:49<27:17:36,  9.46it/s]

{'loss': Array(0.1593774, dtype=float32), 'loss_cross_entropy': Array(0.15059279, dtype=float32)}
{'loss_inverse': Array(0.00030328, dtype=float32)}


  7%|▋         | 70409/1000000 [3:19:51<38:02:44,  6.79it/s]

{'loss': Array(0.14118245, dtype=float32), 'loss_cross_entropy': Array(0.13272014, dtype=float32)}
{'loss_inverse': Array(0.00043012, dtype=float32)}


  7%|▋         | 70419/1000000 [3:19:53<27:37:02,  9.35it/s]

{'loss': Array(0.15430094, dtype=float32), 'loss_cross_entropy': Array(0.14466779, dtype=float32)}
{'loss_inverse': Array(2.2210856e-05, dtype=float32)}


  7%|▋         | 70429/1000000 [3:19:54<25:45:16, 10.03it/s]

{'loss': Array(0.14250295, dtype=float32), 'loss_cross_entropy': Array(0.13358234, dtype=float32)}
{'loss_inverse': Array(2.6731412e-05, dtype=float32)}


  7%|▋         | 70439/1000000 [3:19:56<34:10:43,  7.55it/s]

{'loss': Array(0.13521883, dtype=float32), 'loss_cross_entropy': Array(0.12698285, dtype=float32)}
{'loss_inverse': Array(7.4279524e-05, dtype=float32)}


  7%|▋         | 70449/1000000 [3:19:57<27:06:49,  9.52it/s]

{'loss': Array(0.15006627, dtype=float32), 'loss_cross_entropy': Array(0.14211567, dtype=float32)}
{'loss_inverse': Array(8.330409e-05, dtype=float32)}


  7%|▋         | 70459/1000000 [3:19:59<38:29:19,  6.71it/s]

{'loss': Array(0.15337005, dtype=float32), 'loss_cross_entropy': Array(0.14406054, dtype=float32)}
{'loss_inverse': Array(1.2975981e-05, dtype=float32)}


  7%|▋         | 70469/1000000 [3:20:00<27:36:12,  9.35it/s]

{'loss': Array(0.14230676, dtype=float32), 'loss_cross_entropy': Array(0.1343299, dtype=float32)}
{'loss_inverse': Array(0.00028388, dtype=float32)}


  7%|▋         | 70479/1000000 [3:20:02<25:40:11, 10.06it/s]

{'loss': Array(0.13485253, dtype=float32), 'loss_cross_entropy': Array(0.1265084, dtype=float32)}
{'loss_inverse': Array(0.00024242, dtype=float32)}


  7%|▋         | 70489/1000000 [3:20:04<28:43:36,  8.99it/s]

{'loss': Array(0.13260645, dtype=float32), 'loss_cross_entropy': Array(0.12441587, dtype=float32)}
{'loss_inverse': Array(1.5269914e-05, dtype=float32)}


  7%|▋         | 70499/1000000 [3:20:05<25:54:21,  9.97it/s]

{'loss': Array(0.13985299, dtype=float32), 'loss_cross_entropy': Array(0.13190858, dtype=float32)}
{'loss_inverse': Array(3.3012282e-06, dtype=float32)}


  7%|▋         | 70509/1000000 [3:20:13<94:51:18,  2.72it/s] 

{'loss': Array(0.13242528, dtype=float32), 'loss_cross_entropy': Array(0.12380411, dtype=float32)}
{'loss_inverse': Array(4.8629812e-05, dtype=float32)}


  7%|▋         | 70519/1000000 [3:20:15<37:11:21,  6.94it/s] 

{'loss': Array(0.15614119, dtype=float32), 'loss_cross_entropy': Array(0.14638184, dtype=float32)}
{'loss_inverse': Array(0.00016169, dtype=float32)}


  7%|▋         | 70529/1000000 [3:20:16<27:07:14,  9.52it/s]

{'loss': Array(0.15005413, dtype=float32), 'loss_cross_entropy': Array(0.14127976, dtype=float32)}
{'loss_inverse': Array(1.5265055e-05, dtype=float32)}


  7%|▋         | 70539/1000000 [3:20:18<28:53:32,  8.94it/s]

{'loss': Array(0.14720316, dtype=float32), 'loss_cross_entropy': Array(0.13906832, dtype=float32)}
{'loss_inverse': Array(0.00024964, dtype=float32)}


  7%|▋         | 70549/1000000 [3:20:19<25:52:23,  9.98it/s]

{'loss': Array(0.15103717, dtype=float32), 'loss_cross_entropy': Array(0.14231801, dtype=float32)}
{'loss_inverse': Array(0.00020828, dtype=float32)}


  7%|▋         | 70559/1000000 [3:20:21<29:42:10,  8.69it/s]

{'loss': Array(0.14656024, dtype=float32), 'loss_cross_entropy': Array(0.13780746, dtype=float32)}
{'loss_inverse': Array(2.1426051e-06, dtype=float32)}


  7%|▋         | 70569/1000000 [3:20:22<26:15:49,  9.83it/s]

{'loss': Array(0.16898651, dtype=float32), 'loss_cross_entropy': Array(0.1591079, dtype=float32)}
{'loss_inverse': Array(0.00084874, dtype=float32)}


  7%|▋         | 70579/1000000 [3:20:24<32:37:02,  7.92it/s]

{'loss': Array(0.17008166, dtype=float32), 'loss_cross_entropy': Array(0.1604311, dtype=float32)}
{'loss_inverse': Array(1.2011492e-05, dtype=float32)}


  7%|▋         | 70589/1000000 [3:20:26<26:23:17,  9.78it/s]

{'loss': Array(0.14000773, dtype=float32), 'loss_cross_entropy': Array(0.13216019, dtype=float32)}
{'loss_inverse': Array(3.8957855e-06, dtype=float32)}


  7%|▋         | 70599/1000000 [3:20:27<31:57:27,  8.08it/s]

{'loss': Array(0.1572861, dtype=float32), 'loss_cross_entropy': Array(0.14832427, dtype=float32)}
{'loss_inverse': Array(4.1256204e-05, dtype=float32)}


  7%|▋         | 70609/1000000 [3:20:29<26:42:48,  9.66it/s]

{'loss': Array(0.15491909, dtype=float32), 'loss_cross_entropy': Array(0.14657484, dtype=float32)}
{'loss_inverse': Array(6.8804127e-07, dtype=float32)}


  7%|▋         | 70619/1000000 [3:20:30<26:05:15,  9.90it/s]

{'loss': Array(0.14674948, dtype=float32), 'loss_cross_entropy': Array(0.13825975, dtype=float32)}
{'loss_inverse': Array(4.353516e-06, dtype=float32)}


  7%|▋         | 70629/1000000 [3:20:32<28:21:38,  9.10it/s]

{'loss': Array(0.14633006, dtype=float32), 'loss_cross_entropy': Array(0.13727781, dtype=float32)}
{'loss_inverse': Array(1.4478955e-05, dtype=float32)}


  7%|▋         | 70639/1000000 [3:20:33<25:37:54, 10.07it/s]

{'loss': Array(0.15869725, dtype=float32), 'loss_cross_entropy': Array(0.14965181, dtype=float32)}
{'loss_inverse': Array(2.2177512e-06, dtype=float32)}


  7%|▋         | 70649/1000000 [3:20:35<30:03:16,  8.59it/s]

{'loss': Array(0.16203848, dtype=float32), 'loss_cross_entropy': Array(0.15296589, dtype=float32)}
{'loss_inverse': Array(2.812363e-06, dtype=float32)}


  7%|▋         | 70659/1000000 [3:20:37<26:00:08,  9.93it/s]

{'loss': Array(0.1540827, dtype=float32), 'loss_cross_entropy': Array(0.14522675, dtype=float32)}
{'loss_inverse': Array(8.1008315e-05, dtype=float32)}


  7%|▋         | 70669/1000000 [3:20:38<31:36:04,  8.17it/s]

{'loss': Array(0.12167003, dtype=float32), 'loss_cross_entropy': Array(0.11409005, dtype=float32)}
{'loss_inverse': Array(1.0486626e-06, dtype=float32)}


  7%|▋         | 70679/1000000 [3:20:40<26:53:59,  9.60it/s]

{'loss': Array(0.14203155, dtype=float32), 'loss_cross_entropy': Array(0.13336214, dtype=float32)}
{'loss_inverse': Array(1.326769e-06, dtype=float32)}


  7%|▋         | 70689/1000000 [3:20:41<26:19:31,  9.81it/s]

{'loss': Array(0.14132456, dtype=float32), 'loss_cross_entropy': Array(0.13250373, dtype=float32)}
{'loss_inverse': Array(4.6680998e-05, dtype=float32)}


  7%|▋         | 70699/1000000 [3:20:43<30:28:43,  8.47it/s]

{'loss': Array(0.14939924, dtype=float32), 'loss_cross_entropy': Array(0.14100537, dtype=float32)}
{'loss_inverse': Array(2.8655118e-06, dtype=float32)}


  7%|▋         | 70709/1000000 [3:20:44<25:46:25, 10.02it/s]

{'loss': Array(0.14960815, dtype=float32), 'loss_cross_entropy': Array(0.1403852, dtype=float32)}
{'loss_inverse': Array(1.5079546e-05, dtype=float32)}


  7%|▋         | 70719/1000000 [3:20:46<33:54:19,  7.61it/s]

{'loss': Array(0.13744693, dtype=float32), 'loss_cross_entropy': Array(0.12881573, dtype=float32)}
{'loss_inverse': Array(0.00011325, dtype=float32)}


  7%|▋         | 70729/1000000 [3:20:48<27:17:23,  9.46it/s]

{'loss': Array(0.1350255, dtype=float32), 'loss_cross_entropy': Array(0.126501, dtype=float32)}
{'loss_inverse': Array(0.00012267, dtype=float32)}


  7%|▋         | 70739/1000000 [3:20:49<38:27:20,  6.71it/s]

{'loss': Array(0.15061925, dtype=float32), 'loss_cross_entropy': Array(0.14188756, dtype=float32)}
{'loss_inverse': Array(2.4754044e-07, dtype=float32)}


  7%|▋         | 70749/1000000 [3:20:51<27:40:59,  9.32it/s]

{'loss': Array(0.15604876, dtype=float32), 'loss_cross_entropy': Array(0.14768204, dtype=float32)}
{'loss_inverse': Array(1.2454491e-05, dtype=float32)}


  7%|▋         | 70759/1000000 [3:20:53<38:15:59,  6.75it/s]

{'loss': Array(0.16093045, dtype=float32), 'loss_cross_entropy': Array(0.15116848, dtype=float32)}
{'loss_inverse': Array(6.573956e-05, dtype=float32)}


  7%|▋         | 70769/1000000 [3:20:54<27:39:42,  9.33it/s]

{'loss': Array(0.1489868, dtype=float32), 'loss_cross_entropy': Array(0.14100705, dtype=float32)}
{'loss_inverse': Array(6.8601294e-06, dtype=float32)}


  7%|▋         | 70779/1000000 [3:20:55<25:33:35, 10.10it/s]

{'loss': Array(0.14588463, dtype=float32), 'loss_cross_entropy': Array(0.13700294, dtype=float32)}
{'loss_inverse': Array(3.9594586e-05, dtype=float32)}


  7%|▋         | 70789/1000000 [3:20:57<34:41:05,  7.44it/s]

{'loss': Array(0.1521676, dtype=float32), 'loss_cross_entropy': Array(0.14327817, dtype=float32)}
{'loss_inverse': Array(1.5092477e-06, dtype=float32)}


  7%|▋         | 70799/1000000 [3:20:59<26:42:02,  9.67it/s]

{'loss': Array(0.13674013, dtype=float32), 'loss_cross_entropy': Array(0.12842123, dtype=float32)}
{'loss_inverse': Array(4.4108492e-07, dtype=float32)}


  7%|▋         | 70809/1000000 [3:21:00<38:11:46,  6.76it/s]

{'loss': Array(0.14447527, dtype=float32), 'loss_cross_entropy': Array(0.13551834, dtype=float32)}
{'loss_inverse': Array(8.344938e-05, dtype=float32)}


  7%|▋         | 70819/1000000 [3:21:02<27:56:54,  9.24it/s]

{'loss': Array(0.14721401, dtype=float32), 'loss_cross_entropy': Array(0.13899714, dtype=float32)}
{'loss_inverse': Array(6.6591224e-06, dtype=float32)}


  7%|▋         | 70829/1000000 [3:21:03<25:31:12, 10.11it/s]

{'loss': Array(0.17033821, dtype=float32), 'loss_cross_entropy': Array(0.16079275, dtype=float32)}
{'loss_inverse': Array(4.5665405e-05, dtype=float32)}


  7%|▋         | 70839/1000000 [3:21:05<28:15:09,  9.14it/s]

{'loss': Array(0.14574747, dtype=float32), 'loss_cross_entropy': Array(0.13701952, dtype=float32)}
{'loss_inverse': Array(0.00019148, dtype=float32)}


  7%|▋         | 70849/1000000 [3:21:06<26:04:05,  9.90it/s]

{'loss': Array(0.14954385, dtype=float32), 'loss_cross_entropy': Array(0.14045401, dtype=float32)}
{'loss_inverse': Array(7.126569e-05, dtype=float32)}


  7%|▋         | 70859/1000000 [3:21:08<34:26:16,  7.49it/s]

{'loss': Array(0.13943449, dtype=float32), 'loss_cross_entropy': Array(0.1310557, dtype=float32)}
{'loss_inverse': Array(1.8596347e-05, dtype=float32)}


  7%|▋         | 70869/1000000 [3:21:10<26:42:58,  9.66it/s]

{'loss': Array(0.15305142, dtype=float32), 'loss_cross_entropy': Array(0.14437298, dtype=float32)}
{'loss_inverse': Array(3.5821047e-06, dtype=float32)}


  7%|▋         | 70879/1000000 [3:21:11<25:41:58, 10.04it/s]

{'loss': Array(0.13935885, dtype=float32), 'loss_cross_entropy': Array(0.13114409, dtype=float32)}
{'loss_inverse': Array(1.0558687e-07, dtype=float32)}


  7%|▋         | 70889/1000000 [3:21:13<29:51:26,  8.64it/s]

{'loss': Array(0.16319989, dtype=float32), 'loss_cross_entropy': Array(0.15403365, dtype=float32)}
{'loss_inverse': Array(1.4775592e-05, dtype=float32)}


  7%|▋         | 70899/1000000 [3:21:14<25:48:09, 10.00it/s]

{'loss': Array(0.16751224, dtype=float32), 'loss_cross_entropy': Array(0.15795603, dtype=float32)}
{'loss_inverse': Array(3.489475e-05, dtype=float32)}


  7%|▋         | 70909/1000000 [3:21:16<31:13:34,  8.26it/s]

{'loss': Array(0.14124684, dtype=float32), 'loss_cross_entropy': Array(0.13271733, dtype=float32)}
{'loss_inverse': Array(4.016314e-05, dtype=float32)}


  7%|▋         | 70919/1000000 [3:21:17<26:30:47,  9.73it/s]

{'loss': Array(0.13685986, dtype=float32), 'loss_cross_entropy': Array(0.12935594, dtype=float32)}
{'loss_inverse': Array(7.895149e-05, dtype=float32)}


  7%|▋         | 70929/1000000 [3:21:19<32:15:26,  8.00it/s]

{'loss': Array(0.15234376, dtype=float32), 'loss_cross_entropy': Array(0.1438648, dtype=float32)}
{'loss_inverse': Array(6.555039e-07, dtype=float32)}


  7%|▋         | 70939/1000000 [3:21:20<26:16:46,  9.82it/s]

{'loss': Array(0.14011246, dtype=float32), 'loss_cross_entropy': Array(0.13180989, dtype=float32)}
{'loss_inverse': Array(9.908381e-05, dtype=float32)}


  7%|▋         | 70949/1000000 [3:21:22<31:27:24,  8.20it/s]

{'loss': Array(0.1676627, dtype=float32), 'loss_cross_entropy': Array(0.15840548, dtype=float32)}
{'loss_inverse': Array(3.3459267e-07, dtype=float32)}


  7%|▋         | 70959/1000000 [3:21:24<26:14:41,  9.83it/s]

{'loss': Array(0.15949216, dtype=float32), 'loss_cross_entropy': Array(0.14971094, dtype=float32)}
{'loss_inverse': Array(9.416962e-06, dtype=float32)}


  7%|▋         | 70969/1000000 [3:21:25<35:00:41,  7.37it/s]

{'loss': Array(0.14889021, dtype=float32), 'loss_cross_entropy': Array(0.1397901, dtype=float32)}
{'loss_inverse': Array(0.00012959, dtype=float32)}


  7%|▋         | 70979/1000000 [3:21:27<26:46:50,  9.64it/s]

{'loss': Array(0.14358893, dtype=float32), 'loss_cross_entropy': Array(0.13514124, dtype=float32)}
{'loss_inverse': Array(0.00014434, dtype=float32)}


  7%|▋         | 70989/1000000 [3:21:29<38:10:17,  6.76it/s]

{'loss': Array(0.14520736, dtype=float32), 'loss_cross_entropy': Array(0.13673931, dtype=float32)}
{'loss_inverse': Array(2.1433994e-05, dtype=float32)}


  7%|▋         | 70999/1000000 [3:21:30<27:37:19,  9.34it/s]

{'loss': Array(0.15582137, dtype=float32), 'loss_cross_entropy': Array(0.14668258, dtype=float32)}
{'loss_inverse': Array(8.570655e-07, dtype=float32)}


  7%|▋         | 71009/1000000 [3:21:38<99:32:40,  2.59it/s] 

{'loss': Array(0.14371489, dtype=float32), 'loss_cross_entropy': Array(0.1355872, dtype=float32)}
{'loss_inverse': Array(3.38322e-06, dtype=float32)}


  7%|▋         | 71019/1000000 [3:21:40<37:50:24,  6.82it/s] 

{'loss': Array(0.14028366, dtype=float32), 'loss_cross_entropy': Array(0.13194981, dtype=float32)}
{'loss_inverse': Array(1.0732073e-06, dtype=float32)}


  7%|▋         | 71029/1000000 [3:21:41<27:22:53,  9.42it/s]

{'loss': Array(0.15396576, dtype=float32), 'loss_cross_entropy': Array(0.14558253, dtype=float32)}
{'loss_inverse': Array(1.5790105e-06, dtype=float32)}


  7%|▋         | 71039/1000000 [3:21:43<32:07:16,  8.03it/s]

{'loss': Array(0.15750465, dtype=float32), 'loss_cross_entropy': Array(0.14753929, dtype=float32)}
{'loss_inverse': Array(3.9655847e-06, dtype=float32)}


  7%|▋         | 71049/1000000 [3:21:44<26:02:14,  9.91it/s]

{'loss': Array(0.14699791, dtype=float32), 'loss_cross_entropy': Array(0.13853057, dtype=float32)}
{'loss_inverse': Array(1.2837416e-05, dtype=float32)}


  7%|▋         | 71059/1000000 [3:21:46<38:03:37,  6.78it/s]

{'loss': Array(0.16865854, dtype=float32), 'loss_cross_entropy': Array(0.15869613, dtype=float32)}
{'loss_inverse': Array(3.7603966e-06, dtype=float32)}


  7%|▋         | 71069/1000000 [3:21:47<27:21:52,  9.43it/s]

{'loss': Array(0.13569681, dtype=float32), 'loss_cross_entropy': Array(0.12736656, dtype=float32)}
{'loss_inverse': Array(0.00012458, dtype=float32)}


  7%|▋         | 71079/1000000 [3:21:49<39:32:13,  6.53it/s]

{'loss': Array(0.16873544, dtype=float32), 'loss_cross_entropy': Array(0.15904896, dtype=float32)}
{'loss_inverse': Array(6.2144377e-06, dtype=float32)}


  7%|▋         | 71089/1000000 [3:21:51<28:03:36,  9.20it/s]

{'loss': Array(0.14073056, dtype=float32), 'loss_cross_entropy': Array(0.13191764, dtype=float32)}
{'loss_inverse': Array(5.288007e-05, dtype=float32)}


  7%|▋         | 71099/1000000 [3:21:52<38:57:31,  6.62it/s]

{'loss': Array(0.14178789, dtype=float32), 'loss_cross_entropy': Array(0.13306245, dtype=float32)}
{'loss_inverse': Array(9.899047e-05, dtype=float32)}


  7%|▋         | 71109/1000000 [3:21:54<27:25:37,  9.41it/s]

{'loss': Array(0.1792654, dtype=float32), 'loss_cross_entropy': Array(0.16955177, dtype=float32)}
{'loss_inverse': Array(0.00011919, dtype=float32)}


  7%|▋         | 71119/1000000 [3:21:55<26:10:46,  9.86it/s]

{'loss': Array(0.14829704, dtype=float32), 'loss_cross_entropy': Array(0.13944577, dtype=float32)}
{'loss_inverse': Array(4.6313194e-06, dtype=float32)}


  7%|▋         | 71129/1000000 [3:21:57<30:25:48,  8.48it/s]

{'loss': Array(0.14773123, dtype=float32), 'loss_cross_entropy': Array(0.13898852, dtype=float32)}
{'loss_inverse': Array(0.00014253, dtype=float32)}


  7%|▋         | 71139/1000000 [3:21:58<25:58:26,  9.93it/s]

{'loss': Array(0.177572, dtype=float32), 'loss_cross_entropy': Array(0.16788551, dtype=float32)}
{'loss_inverse': Array(6.9825006e-07, dtype=float32)}


  7%|▋         | 71149/1000000 [3:22:00<29:48:46,  8.65it/s]

{'loss': Array(0.13498382, dtype=float32), 'loss_cross_entropy': Array(0.12718438, dtype=float32)}
{'loss_inverse': Array(1.0910876e-05, dtype=float32)}


  7%|▋         | 71159/1000000 [3:22:02<26:31:40,  9.73it/s]

{'loss': Array(0.13701963, dtype=float32), 'loss_cross_entropy': Array(0.12974918, dtype=float32)}
{'loss_inverse': Array(1.8466347e-06, dtype=float32)}


  7%|▋         | 71169/1000000 [3:22:03<34:16:50,  7.53it/s]

{'loss': Array(0.1366523, dtype=float32), 'loss_cross_entropy': Array(0.12884001, dtype=float32)}
{'loss_inverse': Array(2.6378189e-06, dtype=float32)}


  7%|▋         | 71179/1000000 [3:22:05<26:47:17,  9.63it/s]

{'loss': Array(0.15134417, dtype=float32), 'loss_cross_entropy': Array(0.1433798, dtype=float32)}
{'loss_inverse': Array(0.00018953, dtype=float32)}


  7%|▋         | 71189/1000000 [3:22:06<26:34:11,  9.71it/s]

{'loss': Array(0.14188622, dtype=float32), 'loss_cross_entropy': Array(0.13370459, dtype=float32)}
{'loss_inverse': Array(2.802136e-06, dtype=float32)}


  7%|▋         | 71199/1000000 [3:22:08<31:43:09,  8.13it/s]

{'loss': Array(0.14399885, dtype=float32), 'loss_cross_entropy': Array(0.1353433, dtype=float32)}
{'loss_inverse': Array(2.4640705e-05, dtype=float32)}


  7%|▋         | 71209/1000000 [3:22:10<26:17:23,  9.81it/s]

{'loss': Array(0.12720786, dtype=float32), 'loss_cross_entropy': Array(0.12046011, dtype=float32)}
{'loss_inverse': Array(0.00013791, dtype=float32)}


  7%|▋         | 71219/1000000 [3:22:11<31:21:11,  8.23it/s]

{'loss': Array(0.1560707, dtype=float32), 'loss_cross_entropy': Array(0.14637677, dtype=float32)}
{'loss_inverse': Array(6.3085594e-05, dtype=float32)}


  7%|▋         | 71229/1000000 [3:22:13<26:41:36,  9.66it/s]

{'loss': Array(0.14201823, dtype=float32), 'loss_cross_entropy': Array(0.13344495, dtype=float32)}
{'loss_inverse': Array(7.353593e-05, dtype=float32)}


  7%|▋         | 71239/1000000 [3:22:15<32:12:48,  8.01it/s]

{'loss': Array(0.15626732, dtype=float32), 'loss_cross_entropy': Array(0.14690462, dtype=float32)}
{'loss_inverse': Array(0.00028615, dtype=float32)}


  7%|▋         | 71249/1000000 [3:22:16<26:42:35,  9.66it/s]

{'loss': Array(0.15603761, dtype=float32), 'loss_cross_entropy': Array(0.14707097, dtype=float32)}
{'loss_inverse': Array(3.1668587e-05, dtype=float32)}


  7%|▋         | 71259/1000000 [3:22:18<31:48:00,  8.11it/s]

{'loss': Array(0.15089762, dtype=float32), 'loss_cross_entropy': Array(0.14232902, dtype=float32)}
{'loss_inverse': Array(0.00028379, dtype=float32)}


  7%|▋         | 71269/1000000 [3:22:19<26:03:19,  9.90it/s]

{'loss': Array(0.15762106, dtype=float32), 'loss_cross_entropy': Array(0.14891307, dtype=float32)}
{'loss_inverse': Array(0.00040796, dtype=float32)}


  7%|▋         | 71279/1000000 [3:22:21<34:33:47,  7.46it/s]

{'loss': Array(0.14425251, dtype=float32), 'loss_cross_entropy': Array(0.13550863, dtype=float32)}
{'loss_inverse': Array(3.4804282e-06, dtype=float32)}


  7%|▋         | 71289/1000000 [3:22:22<26:45:31,  9.64it/s]

{'loss': Array(0.1490801, dtype=float32), 'loss_cross_entropy': Array(0.14072065, dtype=float32)}
{'loss_inverse': Array(0.00021557, dtype=float32)}


  7%|▋         | 71299/1000000 [3:22:24<38:59:49,  6.62it/s]

{'loss': Array(0.13601433, dtype=float32), 'loss_cross_entropy': Array(0.12752786, dtype=float32)}
{'loss_inverse': Array(2.2783306e-05, dtype=float32)}


  7%|▋         | 71309/1000000 [3:22:26<28:09:10,  9.16it/s]

{'loss': Array(0.1503753, dtype=float32), 'loss_cross_entropy': Array(0.14190403, dtype=float32)}
{'loss_inverse': Array(0.00019729, dtype=float32)}


  7%|▋         | 71319/1000000 [3:22:27<39:01:42,  6.61it/s]

{'loss': Array(0.13836014, dtype=float32), 'loss_cross_entropy': Array(0.13034569, dtype=float32)}
{'loss_inverse': Array(6.299278e-05, dtype=float32)}


  7%|▋         | 71329/1000000 [3:22:29<28:01:04,  9.21it/s]

{'loss': Array(0.14133653, dtype=float32), 'loss_cross_entropy': Array(0.13298514, dtype=float32)}
{'loss_inverse': Array(8.2171486e-05, dtype=float32)}


  7%|▋         | 71339/1000000 [3:22:30<25:56:49,  9.94it/s]

{'loss': Array(0.14580682, dtype=float32), 'loss_cross_entropy': Array(0.13720141, dtype=float32)}
{'loss_inverse': Array(0.00015089, dtype=float32)}


  7%|▋         | 71349/1000000 [3:22:32<31:43:46,  8.13it/s]

{'loss': Array(0.1461315, dtype=float32), 'loss_cross_entropy': Array(0.13765933, dtype=float32)}
{'loss_inverse': Array(8.8490115e-06, dtype=float32)}


  7%|▋         | 71359/1000000 [3:22:33<26:53:19,  9.59it/s]

{'loss': Array(0.17034583, dtype=float32), 'loss_cross_entropy': Array(0.16119508, dtype=float32)}
{'loss_inverse': Array(0.00013847, dtype=float32)}


  7%|▋         | 71369/1000000 [3:22:35<34:55:09,  7.39it/s]

{'loss': Array(0.14042418, dtype=float32), 'loss_cross_entropy': Array(0.13260141, dtype=float32)}
{'loss_inverse': Array(8.809622e-05, dtype=float32)}


  7%|▋         | 71379/1000000 [3:22:37<27:01:05,  9.55it/s]

{'loss': Array(0.17372611, dtype=float32), 'loss_cross_entropy': Array(0.16354826, dtype=float32)}
{'loss_inverse': Array(5.3497708e-05, dtype=float32)}


  7%|▋         | 71389/1000000 [3:22:38<34:27:20,  7.49it/s]

{'loss': Array(0.14388967, dtype=float32), 'loss_cross_entropy': Array(0.13562647, dtype=float32)}
{'loss_inverse': Array(9.927156e-06, dtype=float32)}


  7%|▋         | 71399/1000000 [3:22:40<27:21:35,  9.43it/s]

{'loss': Array(0.1420617, dtype=float32), 'loss_cross_entropy': Array(0.13386275, dtype=float32)}
{'loss_inverse': Array(8.04963e-05, dtype=float32)}


  7%|▋         | 71409/1000000 [3:22:42<38:21:38,  6.72it/s]

{'loss': Array(0.1510179, dtype=float32), 'loss_cross_entropy': Array(0.14212292, dtype=float32)}
{'loss_inverse': Array(9.1859e-06, dtype=float32)}


  7%|▋         | 71419/1000000 [3:22:43<28:04:06,  9.19it/s]

{'loss': Array(0.1620649, dtype=float32), 'loss_cross_entropy': Array(0.1527822, dtype=float32)}
{'loss_inverse': Array(3.6419933e-05, dtype=float32)}


  7%|▋         | 71429/1000000 [3:22:44<25:47:30, 10.00it/s]

{'loss': Array(0.13881885, dtype=float32), 'loss_cross_entropy': Array(0.13087702, dtype=float32)}
{'loss_inverse': Array(5.2770054e-05, dtype=float32)}


  7%|▋         | 71439/1000000 [3:22:46<31:39:01,  8.15it/s]

{'loss': Array(0.13931252, dtype=float32), 'loss_cross_entropy': Array(0.13128386, dtype=float32)}
{'loss_inverse': Array(9.523702e-06, dtype=float32)}


  7%|▋         | 71449/1000000 [3:22:48<26:48:31,  9.62it/s]

{'loss': Array(0.14813612, dtype=float32), 'loss_cross_entropy': Array(0.13906363, dtype=float32)}
{'loss_inverse': Array(7.980968e-05, dtype=float32)}


  7%|▋         | 71459/1000000 [3:22:49<38:03:52,  6.78it/s]

{'loss': Array(0.13441806, dtype=float32), 'loss_cross_entropy': Array(0.12597476, dtype=float32)}
{'loss_inverse': Array(2.6137518e-06, dtype=float32)}


  7%|▋         | 71469/1000000 [3:22:51<27:18:53,  9.44it/s]

{'loss': Array(0.16124779, dtype=float32), 'loss_cross_entropy': Array(0.15166938, dtype=float32)}
{'loss_inverse': Array(2.1568181e-05, dtype=float32)}


  7%|▋         | 71479/1000000 [3:22:52<25:30:02, 10.11it/s]

{'loss': Array(0.1443189, dtype=float32), 'loss_cross_entropy': Array(0.13526992, dtype=float32)}
{'loss_inverse': Array(0.00021132, dtype=float32)}


  7%|▋         | 71489/1000000 [3:22:54<29:59:39,  8.60it/s]

{'loss': Array(0.15457843, dtype=float32), 'loss_cross_entropy': Array(0.14548504, dtype=float32)}
{'loss_inverse': Array(3.7574424e-05, dtype=float32)}


  7%|▋         | 71499/1000000 [3:22:55<26:23:19,  9.77it/s]

{'loss': Array(0.14453083, dtype=float32), 'loss_cross_entropy': Array(0.13614734, dtype=float32)}
{'loss_inverse': Array(6.242751e-05, dtype=float32)}


  7%|▋         | 71510/1000000 [3:23:04<70:09:55,  3.68it/s] 

{'loss': Array(0.1466523, dtype=float32), 'loss_cross_entropy': Array(0.13817978, dtype=float32)}
{'loss_inverse': Array(5.2054307e-05, dtype=float32)}


  7%|▋         | 71520/1000000 [3:23:05<32:35:51,  7.91it/s]

{'loss': Array(0.13523749, dtype=float32), 'loss_cross_entropy': Array(0.12714247, dtype=float32)}
{'loss_inverse': Array(1.6226712e-05, dtype=float32)}


  7%|▋         | 71530/1000000 [3:23:07<33:28:28,  7.70it/s]

{'loss': Array(0.15341412, dtype=float32), 'loss_cross_entropy': Array(0.14486782, dtype=float32)}
{'loss_inverse': Array(4.3222185e-06, dtype=float32)}


  7%|▋         | 71540/1000000 [3:23:08<26:41:11,  9.66it/s]

{'loss': Array(0.16928248, dtype=float32), 'loss_cross_entropy': Array(0.16020118, dtype=float32)}
{'loss_inverse': Array(5.158182e-05, dtype=float32)}


  7%|▋         | 71550/1000000 [3:23:10<31:27:49,  8.20it/s]

{'loss': Array(0.14073218, dtype=float32), 'loss_cross_entropy': Array(0.13242267, dtype=float32)}
{'loss_inverse': Array(9.5265736e-05, dtype=float32)}


  7%|▋         | 71560/1000000 [3:23:11<27:48:58,  9.27it/s]

{'loss': Array(0.15595971, dtype=float32), 'loss_cross_entropy': Array(0.14706154, dtype=float32)}
{'loss_inverse': Array(7.2703183e-06, dtype=float32)}


  7%|▋         | 71570/1000000 [3:23:13<26:11:07,  9.85it/s]

{'loss': Array(0.15596025, dtype=float32), 'loss_cross_entropy': Array(0.1472086, dtype=float32)}
{'loss_inverse': Array(2.392155e-05, dtype=float32)}


  7%|▋         | 71580/1000000 [3:23:15<28:34:13,  9.03it/s]

{'loss': Array(0.13100742, dtype=float32), 'loss_cross_entropy': Array(0.12289532, dtype=float32)}
{'loss_inverse': Array(4.809302e-06, dtype=float32)}


  7%|▋         | 71590/1000000 [3:23:16<25:24:16, 10.15it/s]

{'loss': Array(0.15993524, dtype=float32), 'loss_cross_entropy': Array(0.15180682, dtype=float32)}
{'loss_inverse': Array(3.7631806e-05, dtype=float32)}


  7%|▋         | 71600/1000000 [3:23:18<32:14:06,  8.00it/s]

{'loss': Array(0.152985, dtype=float32), 'loss_cross_entropy': Array(0.14383414, dtype=float32)}
{'loss_inverse': Array(3.0177722e-07, dtype=float32)}


  7%|▋         | 71610/1000000 [3:23:19<25:54:43,  9.95it/s]

{'loss': Array(0.15636463, dtype=float32), 'loss_cross_entropy': Array(0.1469331, dtype=float32)}
{'loss_inverse': Array(1.5648347e-06, dtype=float32)}


  7%|▋         | 71620/1000000 [3:23:21<31:44:54,  8.12it/s]

{'loss': Array(0.13717458, dtype=float32), 'loss_cross_entropy': Array(0.12859784, dtype=float32)}
{'loss_inverse': Array(4.3304808e-05, dtype=float32)}


  7%|▋         | 71630/1000000 [3:23:22<26:18:06,  9.80it/s]

{'loss': Array(0.14767812, dtype=float32), 'loss_cross_entropy': Array(0.13882075, dtype=float32)}
{'loss_inverse': Array(1.3369508e-06, dtype=float32)}


  7%|▋         | 71640/1000000 [3:23:24<34:44:26,  7.42it/s]

{'loss': Array(0.16123149, dtype=float32), 'loss_cross_entropy': Array(0.15188847, dtype=float32)}
{'loss_inverse': Array(1.5097221e-05, dtype=float32)}


  7%|▋         | 71650/1000000 [3:23:26<26:44:35,  9.64it/s]

{'loss': Array(0.1569833, dtype=float32), 'loss_cross_entropy': Array(0.14764802, dtype=float32)}
{'loss_inverse': Array(8.044262e-06, dtype=float32)}


  7%|▋         | 71660/1000000 [3:23:27<31:50:28,  8.10it/s]

{'loss': Array(0.14926073, dtype=float32), 'loss_cross_entropy': Array(0.14057301, dtype=float32)}
{'loss_inverse': Array(6.204285e-08, dtype=float32)}


  7%|▋         | 71670/1000000 [3:23:29<26:37:24,  9.69it/s]

{'loss': Array(0.1613242, dtype=float32), 'loss_cross_entropy': Array(0.15205535, dtype=float32)}
{'loss_inverse': Array(8.412533e-07, dtype=float32)}


  7%|▋         | 71680/1000000 [3:23:31<38:14:53,  6.74it/s]

{'loss': Array(0.14989443, dtype=float32), 'loss_cross_entropy': Array(0.14115268, dtype=float32)}
{'loss_inverse': Array(6.297578e-05, dtype=float32)}


  7%|▋         | 71690/1000000 [3:23:32<28:02:58,  9.19it/s]

{'loss': Array(0.14053032, dtype=float32), 'loss_cross_entropy': Array(0.13176799, dtype=float32)}
{'loss_inverse': Array(1.4790702e-07, dtype=float32)}


  7%|▋         | 71700/1000000 [3:23:33<25:24:20, 10.15it/s]

{'loss': Array(0.15959181, dtype=float32), 'loss_cross_entropy': Array(0.15023474, dtype=float32)}
{'loss_inverse': Array(9.005782e-07, dtype=float32)}


  7%|▋         | 71710/1000000 [3:23:35<28:14:05,  9.13it/s]

{'loss': Array(0.13545792, dtype=float32), 'loss_cross_entropy': Array(0.12745316, dtype=float32)}
{'loss_inverse': Array(8.9317965e-08, dtype=float32)}


  7%|▋         | 71720/1000000 [3:23:37<25:33:36, 10.09it/s]

{'loss': Array(0.14440596, dtype=float32), 'loss_cross_entropy': Array(0.13651964, dtype=float32)}
{'loss_inverse': Array(2.1854046e-06, dtype=float32)}


  7%|▋         | 71730/1000000 [3:23:38<28:11:58,  9.14it/s]

{'loss': Array(0.16128422, dtype=float32), 'loss_cross_entropy': Array(0.15254053, dtype=float32)}
{'loss_inverse': Array(2.9259976e-07, dtype=float32)}


  7%|▋         | 71740/1000000 [3:23:40<26:05:04,  9.89it/s]

{'loss': Array(0.14493829, dtype=float32), 'loss_cross_entropy': Array(0.13620625, dtype=float32)}
{'loss_inverse': Array(1.169296e-05, dtype=float32)}


  7%|▋         | 71750/1000000 [3:23:42<29:44:11,  8.67it/s]

{'loss': Array(0.1585509, dtype=float32), 'loss_cross_entropy': Array(0.14925586, dtype=float32)}
{'loss_inverse': Array(7.5482126e-07, dtype=float32)}


  7%|▋         | 71760/1000000 [3:23:43<25:41:36, 10.04it/s]

{'loss': Array(0.14434825, dtype=float32), 'loss_cross_entropy': Array(0.1361746, dtype=float32)}
{'loss_inverse': Array(6.1632672e-06, dtype=float32)}


  7%|▋         | 71770/1000000 [3:23:45<38:12:18,  6.75it/s]

{'loss': Array(0.13796566, dtype=float32), 'loss_cross_entropy': Array(0.12978536, dtype=float32)}
{'loss_inverse': Array(3.0459364e-06, dtype=float32)}


  7%|▋         | 71780/1000000 [3:23:46<27:46:13,  9.28it/s]

{'loss': Array(0.14641416, dtype=float32), 'loss_cross_entropy': Array(0.13772584, dtype=float32)}
{'loss_inverse': Array(2.480999e-06, dtype=float32)}


  7%|▋         | 71790/1000000 [3:23:48<26:10:23,  9.85it/s]

{'loss': Array(0.14982805, dtype=float32), 'loss_cross_entropy': Array(0.14110829, dtype=float32)}
{'loss_inverse': Array(6.077413e-06, dtype=float32)}


  7%|▋         | 71800/1000000 [3:23:49<29:25:47,  8.76it/s]

{'loss': Array(0.14605667, dtype=float32), 'loss_cross_entropy': Array(0.13680835, dtype=float32)}
{'loss_inverse': Array(7.3749848e-06, dtype=float32)}


  7%|▋         | 71810/1000000 [3:23:51<25:57:21,  9.93it/s]

{'loss': Array(0.16108944, dtype=float32), 'loss_cross_entropy': Array(0.15225436, dtype=float32)}
{'loss_inverse': Array(6.9596183e-07, dtype=float32)}


  7%|▋         | 71820/1000000 [3:23:52<29:20:47,  8.79it/s]

{'loss': Array(0.15380919, dtype=float32), 'loss_cross_entropy': Array(0.1448246, dtype=float32)}
{'loss_inverse': Array(4.0011323e-06, dtype=float32)}


  7%|▋         | 71830/1000000 [3:23:54<26:09:12,  9.86it/s]

{'loss': Array(0.1508094, dtype=float32), 'loss_cross_entropy': Array(0.14223306, dtype=float32)}
{'loss_inverse': Array(1.26343e-06, dtype=float32)}


  7%|▋         | 71840/1000000 [3:23:56<31:21:47,  8.22it/s]

{'loss': Array(0.14153723, dtype=float32), 'loss_cross_entropy': Array(0.13360201, dtype=float32)}
{'loss_inverse': Array(4.0132104e-08, dtype=float32)}


  7%|▋         | 71850/1000000 [3:23:57<26:30:27,  9.73it/s]

{'loss': Array(0.15118065, dtype=float32), 'loss_cross_entropy': Array(0.1425774, dtype=float32)}
{'loss_inverse': Array(1.2436506e-07, dtype=float32)}


  7%|▋         | 71860/1000000 [3:23:58<25:50:02,  9.98it/s]

{'loss': Array(0.14712803, dtype=float32), 'loss_cross_entropy': Array(0.13791162, dtype=float32)}
{'loss_inverse': Array(2.095499e-07, dtype=float32)}


  7%|▋         | 71870/1000000 [3:24:00<28:37:57,  9.00it/s]

{'loss': Array(0.1504743, dtype=float32), 'loss_cross_entropy': Array(0.14216295, dtype=float32)}
{'loss_inverse': Array(2.696088e-06, dtype=float32)}


  7%|▋         | 71880/1000000 [3:24:02<25:57:56,  9.93it/s]

{'loss': Array(0.15955631, dtype=float32), 'loss_cross_entropy': Array(0.15016763, dtype=float32)}
{'loss_inverse': Array(6.6549426e-08, dtype=float32)}


  7%|▋         | 71890/1000000 [3:24:03<30:02:21,  8.58it/s]

{'loss': Array(0.13904741, dtype=float32), 'loss_cross_entropy': Array(0.13053173, dtype=float32)}
{'loss_inverse': Array(9.607545e-07, dtype=float32)}


  7%|▋         | 71900/1000000 [3:24:05<26:11:42,  9.84it/s]

{'loss': Array(0.14035068, dtype=float32), 'loss_cross_entropy': Array(0.13236986, dtype=float32)}
{'loss_inverse': Array(9.495528e-06, dtype=float32)}


  7%|▋         | 71910/1000000 [3:24:07<32:37:37,  7.90it/s]

{'loss': Array(0.15624896, dtype=float32), 'loss_cross_entropy': Array(0.14772223, dtype=float32)}
{'loss_inverse': Array(1.4210954e-07, dtype=float32)}


  7%|▋         | 71920/1000000 [3:24:08<26:20:22,  9.79it/s]

{'loss': Array(0.15028226, dtype=float32), 'loss_cross_entropy': Array(0.1418612, dtype=float32)}
{'loss_inverse': Array(2.406081e-05, dtype=float32)}


  7%|▋         | 71930/1000000 [3:24:09<25:27:31, 10.13it/s]

{'loss': Array(0.14923574, dtype=float32), 'loss_cross_entropy': Array(0.14014035, dtype=float32)}
{'loss_inverse': Array(9.0708585e-07, dtype=float32)}


  7%|▋         | 71940/1000000 [3:24:11<29:48:48,  8.65it/s]

{'loss': Array(0.14902289, dtype=float32), 'loss_cross_entropy': Array(0.14063367, dtype=float32)}
{'loss_inverse': Array(1.17929666e-07, dtype=float32)}


  7%|▋         | 71950/1000000 [3:24:13<26:23:48,  9.77it/s]

{'loss': Array(0.13712111, dtype=float32), 'loss_cross_entropy': Array(0.12902756, dtype=float32)}
{'loss_inverse': Array(6.4866954e-06, dtype=float32)}


  7%|▋         | 71960/1000000 [3:24:14<31:46:46,  8.11it/s]

{'loss': Array(0.1347522, dtype=float32), 'loss_cross_entropy': Array(0.12634626, dtype=float32)}
{'loss_inverse': Array(4.1045618e-07, dtype=float32)}


  7%|▋         | 71970/1000000 [3:24:16<26:12:00,  9.84it/s]

{'loss': Array(0.16103534, dtype=float32), 'loss_cross_entropy': Array(0.15209852, dtype=float32)}
{'loss_inverse': Array(1.0882553e-06, dtype=float32)}


  7%|▋         | 71980/1000000 [3:24:18<36:02:59,  7.15it/s]

{'loss': Array(0.13667944, dtype=float32), 'loss_cross_entropy': Array(0.12841785, dtype=float32)}
{'loss_inverse': Array(1.7792107e-06, dtype=float32)}


  7%|▋         | 71990/1000000 [3:24:19<26:56:40,  9.57it/s]

{'loss': Array(0.1425658, dtype=float32), 'loss_cross_entropy': Array(0.13366172, dtype=float32)}
{'loss_inverse': Array(3.3273988e-08, dtype=float32)}


  7%|▋         | 72000/1000000 [3:24:20<25:26:54, 10.13it/s]

{'loss': Array(0.14719662, dtype=float32), 'loss_cross_entropy': Array(0.13840103, dtype=float32)}
{'loss_inverse': Array(3.0999422e-06, dtype=float32)}


  7%|▋         | 72010/1000000 [3:24:29<92:21:49,  2.79it/s] 

{'loss': Array(0.17093809, dtype=float32), 'loss_cross_entropy': Array(0.161085, dtype=float32)}
{'loss_inverse': Array(7.636959e-07, dtype=float32)}


  7%|▋         | 72020/1000000 [3:24:31<48:55:47,  5.27it/s]

{'loss': Array(0.13358875, dtype=float32), 'loss_cross_entropy': Array(0.12561189, dtype=float32)}
{'loss_inverse': Array(2.7538746e-07, dtype=float32)}


  7%|▋         | 72030/1000000 [3:24:32<29:38:26,  8.70it/s]

{'loss': Array(0.1674242, dtype=float32), 'loss_cross_entropy': Array(0.15827222, dtype=float32)}
{'loss_inverse': Array(1.02437376e-07, dtype=float32)}


  7%|▋         | 72040/1000000 [3:24:34<25:56:36,  9.94it/s]

{'loss': Array(0.16187261, dtype=float32), 'loss_cross_entropy': Array(0.15208618, dtype=float32)}
{'loss_inverse': Array(3.74886e-08, dtype=float32)}


  7%|▋         | 72050/1000000 [3:24:35<31:32:58,  8.17it/s]

{'loss': Array(0.13186543, dtype=float32), 'loss_cross_entropy': Array(0.12372384, dtype=float32)}
{'loss_inverse': Array(1.8146083e-07, dtype=float32)}


  7%|▋         | 72060/1000000 [3:24:37<26:49:30,  9.61it/s]

{'loss': Array(0.1500461, dtype=float32), 'loss_cross_entropy': Array(0.14129232, dtype=float32)}
{'loss_inverse': Array(6.834345e-05, dtype=float32)}


  7%|▋         | 72070/1000000 [3:24:39<34:48:19,  7.41it/s]

{'loss': Array(0.13792507, dtype=float32), 'loss_cross_entropy': Array(0.13061628, dtype=float32)}
{'loss_inverse': Array(7.759855e-06, dtype=float32)}


  7%|▋         | 72080/1000000 [3:24:40<27:18:15,  9.44it/s]

{'loss': Array(0.14940684, dtype=float32), 'loss_cross_entropy': Array(0.14060329, dtype=float32)}
{'loss_inverse': Array(6.659798e-05, dtype=float32)}


  7%|▋         | 72090/1000000 [3:24:42<34:49:49,  7.40it/s]

{'loss': Array(0.14237025, dtype=float32), 'loss_cross_entropy': Array(0.13327025, dtype=float32)}
{'loss_inverse': Array(3.293176e-07, dtype=float32)}


  7%|▋         | 72100/1000000 [3:24:43<26:43:58,  9.64it/s]

{'loss': Array(0.14055905, dtype=float32), 'loss_cross_entropy': Array(0.13229445, dtype=float32)}
{'loss_inverse': Array(1.7814715e-07, dtype=float32)}


  7%|▋         | 72110/1000000 [3:24:45<34:27:11,  7.48it/s]

{'loss': Array(0.12379398, dtype=float32), 'loss_cross_entropy': Array(0.11601136, dtype=float32)}
{'loss_inverse': Array(7.845932e-08, dtype=float32)}


  7%|▋         | 72120/1000000 [3:24:46<26:53:17,  9.59it/s]

{'loss': Array(0.15055053, dtype=float32), 'loss_cross_entropy': Array(0.1419414, dtype=float32)}
{'loss_inverse': Array(2.1872843e-07, dtype=float32)}


  7%|▋         | 72130/1000000 [3:24:48<25:12:39, 10.22it/s]

{'loss': Array(0.14395952, dtype=float32), 'loss_cross_entropy': Array(0.13504389, dtype=float32)}
{'loss_inverse': Array(3.4074017e-06, dtype=float32)}


  7%|▋         | 72140/1000000 [3:24:50<31:24:44,  8.21it/s]

{'loss': Array(0.16515301, dtype=float32), 'loss_cross_entropy': Array(0.15518673, dtype=float32)}
{'loss_inverse': Array(1.6553687e-07, dtype=float32)}


  7%|▋         | 72150/1000000 [3:24:51<26:09:24,  9.85it/s]

{'loss': Array(0.16209719, dtype=float32), 'loss_cross_entropy': Array(0.15278636, dtype=float32)}
{'loss_inverse': Array(3.3320524e-05, dtype=float32)}


  7%|▋         | 72160/1000000 [3:24:53<33:55:52,  7.60it/s]

{'loss': Array(0.15708087, dtype=float32), 'loss_cross_entropy': Array(0.14829896, dtype=float32)}
{'loss_inverse': Array(1.9258935e-07, dtype=float32)}


  7%|▋         | 72170/1000000 [3:24:54<26:37:32,  9.68it/s]

{'loss': Array(0.12821561, dtype=float32), 'loss_cross_entropy': Array(0.12047895, dtype=float32)}
{'loss_inverse': Array(2.2185084e-05, dtype=float32)}


  7%|▋         | 72180/1000000 [3:24:56<38:35:11,  6.68it/s]

{'loss': Array(0.14785916, dtype=float32), 'loss_cross_entropy': Array(0.1396759, dtype=float32)}
{'loss_inverse': Array(3.4323955e-06, dtype=float32)}


  7%|▋         | 72190/1000000 [3:24:57<27:45:19,  9.29it/s]

{'loss': Array(0.14993702, dtype=float32), 'loss_cross_entropy': Array(0.14093828, dtype=float32)}
{'loss_inverse': Array(1.2738933e-07, dtype=float32)}


  7%|▋         | 72200/1000000 [3:24:59<34:41:45,  7.43it/s]

{'loss': Array(0.14423095, dtype=float32), 'loss_cross_entropy': Array(0.1352712, dtype=float32)}
{'loss_inverse': Array(8.670314e-06, dtype=float32)}


  7%|▋         | 72210/1000000 [3:25:01<26:36:40,  9.68it/s]

{'loss': Array(0.15855825, dtype=float32), 'loss_cross_entropy': Array(0.14923206, dtype=float32)}
{'loss_inverse': Array(7.873528e-06, dtype=float32)}


  7%|▋         | 72220/1000000 [3:25:02<25:49:07,  9.98it/s]

{'loss': Array(0.13323352, dtype=float32), 'loss_cross_entropy': Array(0.1254607, dtype=float32)}
{'loss_inverse': Array(7.6197534e-07, dtype=float32)}


  7%|▋         | 72230/1000000 [3:25:04<28:39:49,  8.99it/s]

{'loss': Array(0.15820955, dtype=float32), 'loss_cross_entropy': Array(0.14830409, dtype=float32)}
{'loss_inverse': Array(1.474078e-05, dtype=float32)}


  7%|▋         | 72240/1000000 [3:25:05<25:59:57,  9.91it/s]

{'loss': Array(0.14774905, dtype=float32), 'loss_cross_entropy': Array(0.1392763, dtype=float32)}
{'loss_inverse': Array(1.3383209e-06, dtype=float32)}


  7%|▋         | 72250/1000000 [3:25:07<28:55:08,  8.91it/s]

{'loss': Array(0.13211708, dtype=float32), 'loss_cross_entropy': Array(0.12406881, dtype=float32)}
{'loss_inverse': Array(9.6762165e-08, dtype=float32)}


  7%|▋         | 72260/1000000 [3:25:08<27:18:37,  9.44it/s]

{'loss': Array(0.13144498, dtype=float32), 'loss_cross_entropy': Array(0.12378645, dtype=float32)}
{'loss_inverse': Array(2.1643276e-05, dtype=float32)}


  7%|▋         | 72270/1000000 [3:25:10<34:38:41,  7.44it/s]

{'loss': Array(0.13608907, dtype=float32), 'loss_cross_entropy': Array(0.1277395, dtype=float32)}
{'loss_inverse': Array(8.4769835e-07, dtype=float32)}


  7%|▋         | 72280/1000000 [3:25:12<26:47:51,  9.62it/s]

{'loss': Array(0.14141206, dtype=float32), 'loss_cross_entropy': Array(0.13333544, dtype=float32)}
{'loss_inverse': Array(5.3442517e-07, dtype=float32)}


  7%|▋         | 72290/1000000 [3:25:13<25:32:25, 10.09it/s]

{'loss': Array(0.14862995, dtype=float32), 'loss_cross_entropy': Array(0.13951997, dtype=float32)}
{'loss_inverse': Array(1.0523015e-06, dtype=float32)}


  7%|▋         | 72300/1000000 [3:25:15<28:37:52,  9.00it/s]

{'loss': Array(0.15084918, dtype=float32), 'loss_cross_entropy': Array(0.14206138, dtype=float32)}
{'loss_inverse': Array(2.0764772e-07, dtype=float32)}


  7%|▋         | 72310/1000000 [3:25:16<25:43:39, 10.02it/s]

{'loss': Array(0.15216823, dtype=float32), 'loss_cross_entropy': Array(0.14402366, dtype=float32)}
{'loss_inverse': Array(4.177899e-07, dtype=float32)}


  7%|▋         | 72320/1000000 [3:25:18<29:54:21,  8.62it/s]

{'loss': Array(0.13730158, dtype=float32), 'loss_cross_entropy': Array(0.1288523, dtype=float32)}
{'loss_inverse': Array(0.00021494, dtype=float32)}


  7%|▋         | 72330/1000000 [3:25:19<25:54:04,  9.95it/s]

{'loss': Array(0.1360045, dtype=float32), 'loss_cross_entropy': Array(0.12828963, dtype=float32)}
{'loss_inverse': Array(3.723464e-07, dtype=float32)}


  7%|▋         | 72340/1000000 [3:25:21<30:11:11,  8.54it/s]

{'loss': Array(0.15143844, dtype=float32), 'loss_cross_entropy': Array(0.14306293, dtype=float32)}
{'loss_inverse': Array(4.6560476e-07, dtype=float32)}


  7%|▋         | 72350/1000000 [3:25:23<27:00:16,  9.54it/s]

{'loss': Array(0.1477237, dtype=float32), 'loss_cross_entropy': Array(0.13832596, dtype=float32)}
{'loss_inverse': Array(1.6827274e-06, dtype=float32)}


  7%|▋         | 72360/1000000 [3:25:24<30:05:10,  8.56it/s]

{'loss': Array(0.14591423, dtype=float32), 'loss_cross_entropy': Array(0.1375724, dtype=float32)}
{'loss_inverse': Array(5.937749e-05, dtype=float32)}


  7%|▋         | 72370/1000000 [3:25:26<26:06:33,  9.87it/s]

{'loss': Array(0.14888547, dtype=float32), 'loss_cross_entropy': Array(0.14007746, dtype=float32)}
{'loss_inverse': Array(2.783779e-07, dtype=float32)}


  7%|▋         | 72380/1000000 [3:25:28<34:19:01,  7.51it/s]

{'loss': Array(0.15918124, dtype=float32), 'loss_cross_entropy': Array(0.15011552, dtype=float32)}
{'loss_inverse': Array(7.8793346e-07, dtype=float32)}


  7%|▋         | 72390/1000000 [3:25:29<26:39:47,  9.66it/s]

{'loss': Array(0.14774679, dtype=float32), 'loss_cross_entropy': Array(0.13909237, dtype=float32)}
{'loss_inverse': Array(0.00016036, dtype=float32)}


  7%|▋         | 72400/1000000 [3:25:31<38:43:00,  6.66it/s]

{'loss': Array(0.15106867, dtype=float32), 'loss_cross_entropy': Array(0.1421715, dtype=float32)}
{'loss_inverse': Array(8.614547e-05, dtype=float32)}


  7%|▋         | 72410/1000000 [3:25:32<28:13:17,  9.13it/s]

{'loss': Array(0.15904008, dtype=float32), 'loss_cross_entropy': Array(0.15044563, dtype=float32)}
{'loss_inverse': Array(8.495898e-05, dtype=float32)}


  7%|▋         | 72420/1000000 [3:25:34<35:42:51,  7.21it/s]

{'loss': Array(0.15471308, dtype=float32), 'loss_cross_entropy': Array(0.14612637, dtype=float32)}
{'loss_inverse': Array(8.5375467e-07, dtype=float32)}


  7%|▋         | 72430/1000000 [3:25:36<27:07:57,  9.50it/s]

{'loss': Array(0.14219016, dtype=float32), 'loss_cross_entropy': Array(0.13396786, dtype=float32)}
{'loss_inverse': Array(0.00021129, dtype=float32)}


  7%|▋         | 72440/1000000 [3:25:37<26:21:30,  9.78it/s]

{'loss': Array(0.1557251, dtype=float32), 'loss_cross_entropy': Array(0.1467296, dtype=float32)}
{'loss_inverse': Array(5.6142375e-07, dtype=float32)}


  7%|▋         | 72450/1000000 [3:25:39<30:07:41,  8.55it/s]

{'loss': Array(0.14308415, dtype=float32), 'loss_cross_entropy': Array(0.13468467, dtype=float32)}
{'loss_inverse': Array(8.769606e-05, dtype=float32)}


  7%|▋         | 72460/1000000 [3:25:40<26:18:59,  9.79it/s]

{'loss': Array(0.15506111, dtype=float32), 'loss_cross_entropy': Array(0.14670023, dtype=float32)}
{'loss_inverse': Array(8.593814e-06, dtype=float32)}


  7%|▋         | 72470/1000000 [3:25:42<31:26:42,  8.19it/s]

{'loss': Array(0.14486258, dtype=float32), 'loss_cross_entropy': Array(0.13636191, dtype=float32)}
{'loss_inverse': Array(0.00011524, dtype=float32)}


  7%|▋         | 72480/1000000 [3:25:43<26:11:30,  9.84it/s]

{'loss': Array(0.15081249, dtype=float32), 'loss_cross_entropy': Array(0.14181209, dtype=float32)}
{'loss_inverse': Array(7.210809e-08, dtype=float32)}


  7%|▋         | 72490/1000000 [3:25:45<34:31:53,  7.46it/s]

{'loss': Array(0.12947634, dtype=float32), 'loss_cross_entropy': Array(0.12118428, dtype=float32)}
{'loss_inverse': Array(2.0194972e-05, dtype=float32)}


  7%|▋         | 72500/1000000 [3:25:47<27:01:07,  9.54it/s]

{'loss': Array(0.16534641, dtype=float32), 'loss_cross_entropy': Array(0.15599273, dtype=float32)}
{'loss_inverse': Array(1.2821393e-05, dtype=float32)}


  7%|▋         | 72510/1000000 [3:25:55<94:02:21,  2.74it/s] 

{'loss': Array(0.13738768, dtype=float32), 'loss_cross_entropy': Array(0.12888612, dtype=float32)}
{'loss_inverse': Array(1.0529136e-05, dtype=float32)}


  7%|▋         | 72520/1000000 [3:25:56<36:43:46,  7.01it/s] 

{'loss': Array(0.15006696, dtype=float32), 'loss_cross_entropy': Array(0.14159496, dtype=float32)}
{'loss_inverse': Array(0.00018047, dtype=float32)}


  7%|▋         | 72530/1000000 [3:25:57<27:02:43,  9.53it/s]

{'loss': Array(0.15325864, dtype=float32), 'loss_cross_entropy': Array(0.14480041, dtype=float32)}
{'loss_inverse': Array(1.20643845e-05, dtype=float32)}


  7%|▋         | 72540/1000000 [3:25:59<28:51:20,  8.93it/s]

{'loss': Array(0.15363024, dtype=float32), 'loss_cross_entropy': Array(0.14430664, dtype=float32)}
{'loss_inverse': Array(5.5995935e-07, dtype=float32)}


  7%|▋         | 72550/1000000 [3:26:01<26:14:12,  9.82it/s]

{'loss': Array(0.1498587, dtype=float32), 'loss_cross_entropy': Array(0.14172041, dtype=float32)}
{'loss_inverse': Array(1.2112711e-05, dtype=float32)}


  7%|▋         | 72560/1000000 [3:26:02<34:44:30,  7.42it/s]

{'loss': Array(0.1568952, dtype=float32), 'loss_cross_entropy': Array(0.1481685, dtype=float32)}
{'loss_inverse': Array(9.739895e-06, dtype=float32)}


  7%|▋         | 72570/1000000 [3:26:04<26:37:55,  9.67it/s]

{'loss': Array(0.15262693, dtype=float32), 'loss_cross_entropy': Array(0.14404313, dtype=float32)}
{'loss_inverse': Array(3.7832713e-06, dtype=float32)}


  7%|▋         | 72580/1000000 [3:26:06<38:38:34,  6.67it/s]

{'loss': Array(0.14334199, dtype=float32), 'loss_cross_entropy': Array(0.13498408, dtype=float32)}
{'loss_inverse': Array(3.7728154e-05, dtype=float32)}


  7%|▋         | 72590/1000000 [3:26:07<28:28:01,  9.05it/s]

{'loss': Array(0.1360902, dtype=float32), 'loss_cross_entropy': Array(0.12807816, dtype=float32)}
{'loss_inverse': Array(3.251628e-05, dtype=float32)}


  7%|▋         | 72600/1000000 [3:26:09<25:53:54,  9.95it/s]

{'loss': Array(0.13987704, dtype=float32), 'loss_cross_entropy': Array(0.13143936, dtype=float32)}
{'loss_inverse': Array(7.1020327e-07, dtype=float32)}


  7%|▋         | 72610/1000000 [3:26:10<29:00:40,  8.88it/s]

{'loss': Array(0.16710715, dtype=float32), 'loss_cross_entropy': Array(0.15773597, dtype=float32)}
{'loss_inverse': Array(8.330575e-07, dtype=float32)}


  7%|▋         | 72620/1000000 [3:26:12<38:51:56,  6.63it/s]

{'loss': Array(0.14565721, dtype=float32), 'loss_cross_entropy': Array(0.13696054, dtype=float32)}
{'loss_inverse': Array(3.6345315e-07, dtype=float32)}


  7%|▋         | 72630/1000000 [3:26:13<27:13:05,  9.46it/s]

{'loss': Array(0.15700084, dtype=float32), 'loss_cross_entropy': Array(0.14772324, dtype=float32)}
{'loss_inverse': Array(1.793717e-05, dtype=float32)}


  7%|▋         | 72640/1000000 [3:26:15<25:32:35, 10.08it/s]

{'loss': Array(0.16588543, dtype=float32), 'loss_cross_entropy': Array(0.15671793, dtype=float32)}
{'loss_inverse': Array(1.26707955e-05, dtype=float32)}


  7%|▋         | 72650/1000000 [3:26:17<28:59:54,  8.88it/s]

{'loss': Array(0.14485897, dtype=float32), 'loss_cross_entropy': Array(0.13674457, dtype=float32)}
{'loss_inverse': Array(1.5146301e-06, dtype=float32)}


  7%|▋         | 72660/1000000 [3:26:18<26:19:53,  9.78it/s]

{'loss': Array(0.14493912, dtype=float32), 'loss_cross_entropy': Array(0.13668714, dtype=float32)}
{'loss_inverse': Array(3.3239255e-06, dtype=float32)}


  7%|▋         | 72670/1000000 [3:26:20<29:45:22,  8.66it/s]

{'loss': Array(0.14230786, dtype=float32), 'loss_cross_entropy': Array(0.13386112, dtype=float32)}
{'loss_inverse': Array(8.621329e-07, dtype=float32)}


  7%|▋         | 72680/1000000 [3:26:21<25:42:26, 10.02it/s]

{'loss': Array(0.13779593, dtype=float32), 'loss_cross_entropy': Array(0.12994604, dtype=float32)}
{'loss_inverse': Array(1.5755391e-05, dtype=float32)}


  7%|▋         | 72690/1000000 [3:26:23<32:00:36,  8.05it/s]

{'loss': Array(0.14728841, dtype=float32), 'loss_cross_entropy': Array(0.13869129, dtype=float32)}
{'loss_inverse': Array(4.159555e-06, dtype=float32)}


  7%|▋         | 72700/1000000 [3:26:25<26:21:38,  9.77it/s]

{'loss': Array(0.16051763, dtype=float32), 'loss_cross_entropy': Array(0.15199624, dtype=float32)}
{'loss_inverse': Array(1.2817545e-06, dtype=float32)}


  7%|▋         | 72710/1000000 [3:26:26<25:47:32,  9.99it/s]

{'loss': Array(0.1339168, dtype=float32), 'loss_cross_entropy': Array(0.12592085, dtype=float32)}
{'loss_inverse': Array(5.8494757e-06, dtype=float32)}


  7%|▋         | 72720/1000000 [3:26:28<30:09:08,  8.54it/s]

{'loss': Array(0.1446381, dtype=float32), 'loss_cross_entropy': Array(0.13605167, dtype=float32)}
{'loss_inverse': Array(1.531905e-07, dtype=float32)}


  7%|▋         | 72730/1000000 [3:26:29<26:56:14,  9.56it/s]

{'loss': Array(0.15043455, dtype=float32), 'loss_cross_entropy': Array(0.1418706, dtype=float32)}
{'loss_inverse': Array(5.088129e-07, dtype=float32)}


  7%|▋         | 72740/1000000 [3:26:31<32:31:15,  7.92it/s]

{'loss': Array(0.13679256, dtype=float32), 'loss_cross_entropy': Array(0.12884133, dtype=float32)}
{'loss_inverse': Array(1.5582339e-06, dtype=float32)}


  7%|▋         | 72750/1000000 [3:26:33<26:48:32,  9.61it/s]

{'loss': Array(0.1296951, dtype=float32), 'loss_cross_entropy': Array(0.12224066, dtype=float32)}
{'loss_inverse': Array(3.1812785e-08, dtype=float32)}


  7%|▋         | 72760/1000000 [3:26:34<34:44:59,  7.41it/s]

{'loss': Array(0.16893649, dtype=float32), 'loss_cross_entropy': Array(0.15982233, dtype=float32)}
{'loss_inverse': Array(2.1454399e-07, dtype=float32)}


  7%|▋         | 72770/1000000 [3:26:36<26:59:20,  9.54it/s]

{'loss': Array(0.1282079, dtype=float32), 'loss_cross_entropy': Array(0.12039926, dtype=float32)}
{'loss_inverse': Array(4.4422418e-07, dtype=float32)}


  7%|▋         | 72780/1000000 [3:26:37<25:17:20, 10.18it/s]

{'loss': Array(0.14916037, dtype=float32), 'loss_cross_entropy': Array(0.14036207, dtype=float32)}
{'loss_inverse': Array(9.796814e-06, dtype=float32)}


  7%|▋         | 72790/1000000 [3:26:39<27:46:57,  9.27it/s]

{'loss': Array(0.14196238, dtype=float32), 'loss_cross_entropy': Array(0.13336514, dtype=float32)}
{'loss_inverse': Array(2.942278e-07, dtype=float32)}


  7%|▋         | 72800/1000000 [3:26:40<25:23:25, 10.14it/s]

{'loss': Array(0.15143247, dtype=float32), 'loss_cross_entropy': Array(0.14236994, dtype=float32)}
{'loss_inverse': Array(1.10686635e-07, dtype=float32)}


  7%|▋         | 72810/1000000 [3:26:42<34:35:33,  7.45it/s]

{'loss': Array(0.16879569, dtype=float32), 'loss_cross_entropy': Array(0.15952367, dtype=float32)}
{'loss_inverse': Array(7.157509e-07, dtype=float32)}


  7%|▋         | 72820/1000000 [3:26:43<26:24:11,  9.75it/s]

{'loss': Array(0.14539905, dtype=float32), 'loss_cross_entropy': Array(0.13679416, dtype=float32)}
{'loss_inverse': Array(2.2844479e-05, dtype=float32)}


  7%|▋         | 72830/1000000 [3:26:45<25:25:10, 10.13it/s]

{'loss': Array(0.15325569, dtype=float32), 'loss_cross_entropy': Array(0.14443526, dtype=float32)}
{'loss_inverse': Array(1.0110019e-07, dtype=float32)}


  7%|▋         | 72840/1000000 [3:26:47<28:07:38,  9.16it/s]

{'loss': Array(0.14758556, dtype=float32), 'loss_cross_entropy': Array(0.13905479, dtype=float32)}
{'loss_inverse': Array(9.052506e-08, dtype=float32)}


  7%|▋         | 72850/1000000 [3:26:48<25:53:09,  9.95it/s]

{'loss': Array(0.14876314, dtype=float32), 'loss_cross_entropy': Array(0.13955395, dtype=float32)}
{'loss_inverse': Array(1.3472547e-06, dtype=float32)}


  7%|▋         | 72860/1000000 [3:26:50<30:03:52,  8.57it/s]

{'loss': Array(0.15447846, dtype=float32), 'loss_cross_entropy': Array(0.14530662, dtype=float32)}
{'loss_inverse': Array(2.0400224e-05, dtype=float32)}


  7%|▋         | 72870/1000000 [3:26:51<26:21:02,  9.77it/s]

{'loss': Array(0.13996328, dtype=float32), 'loss_cross_entropy': Array(0.13249716, dtype=float32)}
{'loss_inverse': Array(4.4672393e-07, dtype=float32)}


  7%|▋         | 72880/1000000 [3:26:53<31:22:00,  8.21it/s]

{'loss': Array(0.15996832, dtype=float32), 'loss_cross_entropy': Array(0.15127471, dtype=float32)}
{'loss_inverse': Array(2.1218389e-08, dtype=float32)}


  7%|▋         | 72890/1000000 [3:26:54<26:01:16,  9.90it/s]

{'loss': Array(0.1575492, dtype=float32), 'loss_cross_entropy': Array(0.14879404, dtype=float32)}
{'loss_inverse': Array(6.621353e-07, dtype=float32)}


  7%|▋         | 72900/1000000 [3:26:56<25:19:25, 10.17it/s]

{'loss': Array(0.15557104, dtype=float32), 'loss_cross_entropy': Array(0.14674278, dtype=float32)}
{'loss_inverse': Array(1.5061613e-06, dtype=float32)}


  7%|▋         | 72910/1000000 [3:26:57<29:09:54,  8.83it/s]

{'loss': Array(0.12634619, dtype=float32), 'loss_cross_entropy': Array(0.11854612, dtype=float32)}
{'loss_inverse': Array(3.0949454e-07, dtype=float32)}


  7%|▋         | 72920/1000000 [3:26:59<25:44:54, 10.00it/s]

{'loss': Array(0.15142964, dtype=float32), 'loss_cross_entropy': Array(0.14211135, dtype=float32)}
{'loss_inverse': Array(1.0298841e-07, dtype=float32)}


  7%|▋         | 72930/1000000 [3:27:01<33:58:53,  7.58it/s]

{'loss': Array(0.12782054, dtype=float32), 'loss_cross_entropy': Array(0.11958156, dtype=float32)}
{'loss_inverse': Array(3.4110732e-07, dtype=float32)}


  7%|▋         | 72940/1000000 [3:27:02<26:27:18,  9.73it/s]

{'loss': Array(0.15237854, dtype=float32), 'loss_cross_entropy': Array(0.1438743, dtype=float32)}
{'loss_inverse': Array(1.8954523e-07, dtype=float32)}


  7%|▋         | 72950/1000000 [3:27:04<38:10:40,  6.75it/s]

{'loss': Array(0.15801361, dtype=float32), 'loss_cross_entropy': Array(0.14898649, dtype=float32)}
{'loss_inverse': Array(2.9952562e-07, dtype=float32)}


  7%|▋         | 72960/1000000 [3:27:05<27:57:18,  9.21it/s]

{'loss': Array(0.1442023, dtype=float32), 'loss_cross_entropy': Array(0.13624886, dtype=float32)}
{'loss_inverse': Array(1.596054e-08, dtype=float32)}


  7%|▋         | 72970/1000000 [3:27:07<39:17:50,  6.55it/s]

{'loss': Array(0.15215549, dtype=float32), 'loss_cross_entropy': Array(0.14320534, dtype=float32)}
{'loss_inverse': Array(1.2640832e-07, dtype=float32)}


  7%|▋         | 72980/1000000 [3:27:08<27:29:55,  9.36it/s]

{'loss': Array(0.15696697, dtype=float32), 'loss_cross_entropy': Array(0.14743476, dtype=float32)}
{'loss_inverse': Array(2.9467335e-06, dtype=float32)}


  7%|▋         | 72990/1000000 [3:27:10<26:01:01,  9.90it/s]

{'loss': Array(0.15455423, dtype=float32), 'loss_cross_entropy': Array(0.14612871, dtype=float32)}
{'loss_inverse': Array(1.4946157e-07, dtype=float32)}


  7%|▋         | 73000/1000000 [3:27:12<32:12:21,  8.00it/s]

{'loss': Array(0.14980507, dtype=float32), 'loss_cross_entropy': Array(0.14140789, dtype=float32)}
{'loss_inverse': Array(7.916835e-06, dtype=float32)}


  7%|▋         | 73010/1000000 [3:27:20<86:31:52,  2.98it/s] 

{'loss': Array(0.15640207, dtype=float32), 'loss_cross_entropy': Array(0.1475729, dtype=float32)}
{'loss_inverse': Array(7.361579e-08, dtype=float32)}


  7%|▋         | 73020/1000000 [3:27:21<35:17:28,  7.30it/s]

{'loss': Array(0.12524961, dtype=float32), 'loss_cross_entropy': Array(0.11726435, dtype=float32)}
{'loss_inverse': Array(1.9073523e-06, dtype=float32)}


  7%|▋         | 73030/1000000 [3:27:23<31:40:20,  8.13it/s]

{'loss': Array(0.13435756, dtype=float32), 'loss_cross_entropy': Array(0.12629516, dtype=float32)}
{'loss_inverse': Array(1.207667e-07, dtype=float32)}


  7%|▋         | 73040/1000000 [3:27:24<27:21:36,  9.41it/s]

{'loss': Array(0.13928592, dtype=float32), 'loss_cross_entropy': Array(0.1313543, dtype=float32)}
{'loss_inverse': Array(5.9854833e-06, dtype=float32)}


  7%|▋         | 73050/1000000 [3:27:26<32:00:45,  8.04it/s]

{'loss': Array(0.16566078, dtype=float32), 'loss_cross_entropy': Array(0.15597768, dtype=float32)}
{'loss_inverse': Array(1.7756292e-08, dtype=float32)}


  7%|▋         | 73060/1000000 [3:27:27<26:21:47,  9.77it/s]

{'loss': Array(0.16267364, dtype=float32), 'loss_cross_entropy': Array(0.15337417, dtype=float32)}
{'loss_inverse': Array(5.878697e-08, dtype=float32)}


  7%|▋         | 73070/1000000 [3:27:29<34:31:53,  7.46it/s]

{'loss': Array(0.13097523, dtype=float32), 'loss_cross_entropy': Array(0.12327906, dtype=float32)}
{'loss_inverse': Array(2.444445e-07, dtype=float32)}


  7%|▋         | 73080/1000000 [3:27:31<27:18:53,  9.43it/s]

{'loss': Array(0.13926153, dtype=float32), 'loss_cross_entropy': Array(0.131052, dtype=float32)}
{'loss_inverse': Array(8.010359e-08, dtype=float32)}


  7%|▋         | 73090/1000000 [3:27:32<26:17:22,  9.79it/s]

{'loss': Array(0.1509473, dtype=float32), 'loss_cross_entropy': Array(0.14215693, dtype=float32)}
{'loss_inverse': Array(1.1998854e-07, dtype=float32)}


  7%|▋         | 73100/1000000 [3:27:34<29:48:23,  8.64it/s]

{'loss': Array(0.13940082, dtype=float32), 'loss_cross_entropy': Array(0.13107571, dtype=float32)}
{'loss_inverse': Array(1.09308296e-07, dtype=float32)}


  7%|▋         | 73110/1000000 [3:27:35<26:14:41,  9.81it/s]

{'loss': Array(0.12719406, dtype=float32), 'loss_cross_entropy': Array(0.11967991, dtype=float32)}
{'loss_inverse': Array(3.7708585e-08, dtype=float32)}


  7%|▋         | 73120/1000000 [3:27:37<34:17:05,  7.51it/s]

{'loss': Array(0.1419218, dtype=float32), 'loss_cross_entropy': Array(0.13339041, dtype=float32)}
{'loss_inverse': Array(1.8216596e-07, dtype=float32)}


  7%|▋         | 73130/1000000 [3:27:38<26:20:41,  9.77it/s]

{'loss': Array(0.14655145, dtype=float32), 'loss_cross_entropy': Array(0.13782115, dtype=float32)}
{'loss_inverse': Array(8.772401e-07, dtype=float32)}


  7%|▋         | 73140/1000000 [3:27:40<38:51:30,  6.63it/s]

{'loss': Array(0.15935282, dtype=float32), 'loss_cross_entropy': Array(0.15035345, dtype=float32)}
{'loss_inverse': Array(8.552147e-08, dtype=float32)}


  7%|▋         | 73150/1000000 [3:27:42<28:06:54,  9.16it/s]

{'loss': Array(0.14964807, dtype=float32), 'loss_cross_entropy': Array(0.14076799, dtype=float32)}
{'loss_inverse': Array(3.7874614e-07, dtype=float32)}


  7%|▋         | 73160/1000000 [3:27:43<25:50:51,  9.96it/s]

{'loss': Array(0.14722191, dtype=float32), 'loss_cross_entropy': Array(0.13855788, dtype=float32)}
{'loss_inverse': Array(2.7149267e-07, dtype=float32)}


  7%|▋         | 73170/1000000 [3:27:45<28:34:45,  9.01it/s]

{'loss': Array(0.14065486, dtype=float32), 'loss_cross_entropy': Array(0.13240825, dtype=float32)}
{'loss_inverse': Array(1.6457797e-07, dtype=float32)}


  7%|▋         | 73180/1000000 [3:27:46<25:32:49, 10.08it/s]

{'loss': Array(0.15229909, dtype=float32), 'loss_cross_entropy': Array(0.1433428, dtype=float32)}
{'loss_inverse': Array(4.5012776e-08, dtype=float32)}


  7%|▋         | 73190/1000000 [3:27:48<29:24:11,  8.76it/s]

{'loss': Array(0.14664632, dtype=float32), 'loss_cross_entropy': Array(0.13772333, dtype=float32)}
{'loss_inverse': Array(1.1815657e-06, dtype=float32)}


  7%|▋         | 73200/1000000 [3:27:49<25:54:39,  9.94it/s]

{'loss': Array(0.16508444, dtype=float32), 'loss_cross_entropy': Array(0.15548217, dtype=float32)}
{'loss_inverse': Array(5.7669837e-07, dtype=float32)}


  7%|▋         | 73210/1000000 [3:27:51<31:37:25,  8.14it/s]

{'loss': Array(0.15311177, dtype=float32), 'loss_cross_entropy': Array(0.1447956, dtype=float32)}
{'loss_inverse': Array(1.3205822e-07, dtype=float32)}


  7%|▋         | 73220/1000000 [3:27:52<26:02:43,  9.88it/s]

{'loss': Array(0.1606979, dtype=float32), 'loss_cross_entropy': Array(0.15168504, dtype=float32)}
{'loss_inverse': Array(5.0116535e-07, dtype=float32)}


  7%|▋         | 73230/1000000 [3:27:54<34:31:03,  7.46it/s]

{'loss': Array(0.1662644, dtype=float32), 'loss_cross_entropy': Array(0.1568708, dtype=float32)}
{'loss_inverse': Array(1.2274454e-07, dtype=float32)}


  7%|▋         | 73240/1000000 [3:27:56<27:47:01,  9.27it/s]

{'loss': Array(0.1544205, dtype=float32), 'loss_cross_entropy': Array(0.14618145, dtype=float32)}
{'loss_inverse': Array(8.004545e-07, dtype=float32)}


  7%|▋         | 73250/1000000 [3:27:58<34:55:36,  7.37it/s]

{'loss': Array(0.13377188, dtype=float32), 'loss_cross_entropy': Array(0.1252238, dtype=float32)}
{'loss_inverse': Array(1.24469e-07, dtype=float32)}


  7%|▋         | 73260/1000000 [3:27:59<26:33:36,  9.69it/s]

{'loss': Array(0.15619706, dtype=float32), 'loss_cross_entropy': Array(0.14725623, dtype=float32)}
{'loss_inverse': Array(1.0818426e-07, dtype=float32)}


  7%|▋         | 73270/1000000 [3:28:00<25:33:09, 10.07it/s]

{'loss': Array(0.14474656, dtype=float32), 'loss_cross_entropy': Array(0.1366192, dtype=float32)}
{'loss_inverse': Array(1.4380933e-07, dtype=float32)}


  7%|▋         | 73280/1000000 [3:28:02<31:43:35,  8.11it/s]

{'loss': Array(0.14339577, dtype=float32), 'loss_cross_entropy': Array(0.13533436, dtype=float32)}
{'loss_inverse': Array(7.4282707e-07, dtype=float32)}


  7%|▋         | 73290/1000000 [3:28:04<26:02:35,  9.88it/s]

{'loss': Array(0.14615418, dtype=float32), 'loss_cross_entropy': Array(0.13747981, dtype=float32)}
{'loss_inverse': Array(7.374627e-08, dtype=float32)}


  7%|▋         | 73300/1000000 [3:28:05<37:41:31,  6.83it/s]

{'loss': Array(0.12880784, dtype=float32), 'loss_cross_entropy': Array(0.12120964, dtype=float32)}
{'loss_inverse': Array(9.924272e-08, dtype=float32)}


  7%|▋         | 73310/1000000 [3:28:07<27:56:26,  9.21it/s]

{'loss': Array(0.16697586, dtype=float32), 'loss_cross_entropy': Array(0.15758339, dtype=float32)}
{'loss_inverse': Array(3.133087e-08, dtype=float32)}


  7%|▋         | 73320/1000000 [3:28:08<25:35:13, 10.06it/s]

{'loss': Array(0.14673832, dtype=float32), 'loss_cross_entropy': Array(0.13928011, dtype=float32)}
{'loss_inverse': Array(1.8123237e-07, dtype=float32)}


  7%|▋         | 73330/1000000 [3:28:10<28:25:19,  9.06it/s]

{'loss': Array(0.1449703, dtype=float32), 'loss_cross_entropy': Array(0.13634476, dtype=float32)}
{'loss_inverse': Array(1.2882302e-06, dtype=float32)}


  7%|▋         | 73340/1000000 [3:28:11<25:51:24,  9.96it/s]

{'loss': Array(0.13927852, dtype=float32), 'loss_cross_entropy': Array(0.13130158, dtype=float32)}
{'loss_inverse': Array(9.142453e-08, dtype=float32)}


  7%|▋         | 73350/1000000 [3:28:13<28:18:02,  9.10it/s]

{'loss': Array(0.13728027, dtype=float32), 'loss_cross_entropy': Array(0.12951414, dtype=float32)}
{'loss_inverse': Array(1.2661003e-05, dtype=float32)}


  7%|▋         | 73360/1000000 [3:28:14<26:10:57,  9.83it/s]

{'loss': Array(0.13376847, dtype=float32), 'loss_cross_entropy': Array(0.12595525, dtype=float32)}
{'loss_inverse': Array(3.4138837e-05, dtype=float32)}


  7%|▋         | 73370/1000000 [3:28:16<31:42:10,  8.12it/s]

{'loss': Array(0.15165904, dtype=float32), 'loss_cross_entropy': Array(0.1436076, dtype=float32)}
{'loss_inverse': Array(6.622317e-08, dtype=float32)}


  7%|▋         | 73380/1000000 [3:28:18<26:48:43,  9.60it/s]

{'loss': Array(0.16102894, dtype=float32), 'loss_cross_entropy': Array(0.1517885, dtype=float32)}
{'loss_inverse': Array(0.00013201, dtype=float32)}


  7%|▋         | 73390/1000000 [3:28:19<32:12:08,  7.99it/s]

{'loss': Array(0.14544296, dtype=float32), 'loss_cross_entropy': Array(0.13729565, dtype=float32)}
{'loss_inverse': Array(2.2604263e-06, dtype=float32)}


  7%|▋         | 73400/1000000 [3:28:21<26:30:52,  9.71it/s]

{'loss': Array(0.15385185, dtype=float32), 'loss_cross_entropy': Array(0.14528318, dtype=float32)}
{'loss_inverse': Array(1.313038e-05, dtype=float32)}


  7%|▋         | 73410/1000000 [3:28:23<38:28:07,  6.69it/s]

{'loss': Array(0.14127015, dtype=float32), 'loss_cross_entropy': Array(0.13312729, dtype=float32)}
{'loss_inverse': Array(3.9088285e-05, dtype=float32)}


  7%|▋         | 73420/1000000 [3:28:24<27:33:56,  9.34it/s]

{'loss': Array(0.13202207, dtype=float32), 'loss_cross_entropy': Array(0.12372245, dtype=float32)}
{'loss_inverse': Array(4.128081e-05, dtype=float32)}


  7%|▋         | 73430/1000000 [3:28:25<25:16:39, 10.18it/s]

{'loss': Array(0.15215002, dtype=float32), 'loss_cross_entropy': Array(0.14309663, dtype=float32)}
{'loss_inverse': Array(2.2844463e-06, dtype=float32)}


  7%|▋         | 73440/1000000 [3:28:27<28:07:11,  9.15it/s]

{'loss': Array(0.13910866, dtype=float32), 'loss_cross_entropy': Array(0.13086568, dtype=float32)}
{'loss_inverse': Array(5.6837084e-06, dtype=float32)}


  7%|▋         | 73450/1000000 [3:28:29<26:57:47,  9.55it/s]

{'loss': Array(0.14584151, dtype=float32), 'loss_cross_entropy': Array(0.13730676, dtype=float32)}
{'loss_inverse': Array(5.7841466e-05, dtype=float32)}


  7%|▋         | 73460/1000000 [3:28:30<31:20:38,  8.21it/s]

{'loss': Array(0.14724103, dtype=float32), 'loss_cross_entropy': Array(0.13872658, dtype=float32)}
{'loss_inverse': Array(3.9394686e-07, dtype=float32)}


  7%|▋         | 73470/1000000 [3:28:32<26:31:38,  9.70it/s]

{'loss': Array(0.16260634, dtype=float32), 'loss_cross_entropy': Array(0.15323476, dtype=float32)}
{'loss_inverse': Array(5.577113e-05, dtype=float32)}


  7%|▋         | 73480/1000000 [3:28:33<25:37:01, 10.05it/s]

{'loss': Array(0.1762247, dtype=float32), 'loss_cross_entropy': Array(0.16634324, dtype=float32)}
{'loss_inverse': Array(3.324373e-06, dtype=float32)}


  7%|▋         | 73490/1000000 [3:28:35<29:36:58,  8.69it/s]

{'loss': Array(0.136959, dtype=float32), 'loss_cross_entropy': Array(0.12952065, dtype=float32)}
{'loss_inverse': Array(2.9748542e-06, dtype=float32)}


  7%|▋         | 73500/1000000 [3:28:36<26:01:54,  9.89it/s]

{'loss': Array(0.1433786, dtype=float32), 'loss_cross_entropy': Array(0.13512439, dtype=float32)}
{'loss_inverse': Array(0.0002912, dtype=float32)}


  7%|▋         | 73510/1000000 [3:28:45<95:50:43,  2.69it/s] 

{'loss': Array(0.15185183, dtype=float32), 'loss_cross_entropy': Array(0.14331701, dtype=float32)}
{'loss_inverse': Array(1.2278654e-05, dtype=float32)}


  7%|▋         | 73520/1000000 [3:28:46<36:39:42,  7.02it/s] 

{'loss': Array(0.15225057, dtype=float32), 'loss_cross_entropy': Array(0.14386982, dtype=float32)}
{'loss_inverse': Array(6.573018e-05, dtype=float32)}


  7%|▋         | 73530/1000000 [3:28:48<39:49:10,  6.46it/s]

{'loss': Array(0.152839, dtype=float32), 'loss_cross_entropy': Array(0.14420594, dtype=float32)}
{'loss_inverse': Array(1.7077739e-06, dtype=float32)}


  7%|▋         | 73540/1000000 [3:28:49<28:05:09,  9.16it/s]

{'loss': Array(0.16555671, dtype=float32), 'loss_cross_entropy': Array(0.15687391, dtype=float32)}
{'loss_inverse': Array(4.3103755e-06, dtype=float32)}


  7%|▋         | 73550/1000000 [3:28:51<26:06:29,  9.86it/s]

{'loss': Array(0.15894525, dtype=float32), 'loss_cross_entropy': Array(0.14949362, dtype=float32)}
{'loss_inverse': Array(0.00034075, dtype=float32)}


  7%|▋         | 73560/1000000 [3:28:53<29:19:20,  8.78it/s]

{'loss': Array(0.16630118, dtype=float32), 'loss_cross_entropy': Array(0.15700936, dtype=float32)}
{'loss_inverse': Array(0.00014009, dtype=float32)}


  7%|▋         | 73570/1000000 [3:28:54<38:41:37,  6.65it/s]

{'loss': Array(0.1608693, dtype=float32), 'loss_cross_entropy': Array(0.15244852, dtype=float32)}
{'loss_inverse': Array(7.419527e-05, dtype=float32)}


  7%|▋         | 73580/1000000 [3:28:56<27:24:45,  9.39it/s]

{'loss': Array(0.12970798, dtype=float32), 'loss_cross_entropy': Array(0.12199128, dtype=float32)}
{'loss_inverse': Array(0.00022972, dtype=float32)}


  7%|▋         | 73590/1000000 [3:28:57<25:27:37, 10.11it/s]

{'loss': Array(0.14774263, dtype=float32), 'loss_cross_entropy': Array(0.13890675, dtype=float32)}
{'loss_inverse': Array(9.438408e-06, dtype=float32)}


  7%|▋         | 73600/1000000 [3:28:59<28:20:14,  9.08it/s]

{'loss': Array(0.15490144, dtype=float32), 'loss_cross_entropy': Array(0.14594446, dtype=float32)}
{'loss_inverse': Array(0.00038742, dtype=float32)}


  7%|▋         | 73610/1000000 [3:29:00<25:51:59,  9.95it/s]

{'loss': Array(0.15658545, dtype=float32), 'loss_cross_entropy': Array(0.14748079, dtype=float32)}
{'loss_inverse': Array(0.00018616, dtype=float32)}


  7%|▋         | 73620/1000000 [3:29:02<29:36:49,  8.69it/s]

{'loss': Array(0.13570303, dtype=float32), 'loss_cross_entropy': Array(0.12760976, dtype=float32)}
{'loss_inverse': Array(9.2317954e-05, dtype=float32)}


  7%|▋         | 73630/1000000 [3:29:04<25:53:09,  9.94it/s]

{'loss': Array(0.16227518, dtype=float32), 'loss_cross_entropy': Array(0.15303992, dtype=float32)}
{'loss_inverse': Array(7.859815e-05, dtype=float32)}


  7%|▋         | 73640/1000000 [3:29:05<31:34:18,  8.15it/s]

{'loss': Array(0.14794806, dtype=float32), 'loss_cross_entropy': Array(0.13977225, dtype=float32)}
{'loss_inverse': Array(0.00015066, dtype=float32)}


  7%|▋         | 73650/1000000 [3:29:07<27:28:56,  9.36it/s]

{'loss': Array(0.15149875, dtype=float32), 'loss_cross_entropy': Array(0.14275853, dtype=float32)}
{'loss_inverse': Array(0.00020166, dtype=float32)}


  7%|▋         | 73660/1000000 [3:29:08<25:47:46,  9.98it/s]

{'loss': Array(0.14526664, dtype=float32), 'loss_cross_entropy': Array(0.13666776, dtype=float32)}
{'loss_inverse': Array(5.911299e-05, dtype=float32)}


  7%|▋         | 73670/1000000 [3:29:10<29:23:06,  8.76it/s]

{'loss': Array(0.1518397, dtype=float32), 'loss_cross_entropy': Array(0.1435346, dtype=float32)}
{'loss_inverse': Array(8.901159e-05, dtype=float32)}


  7%|▋         | 73680/1000000 [3:29:11<25:52:36,  9.94it/s]

{'loss': Array(0.14840879, dtype=float32), 'loss_cross_entropy': Array(0.13964845, dtype=float32)}
{'loss_inverse': Array(0.0002027, dtype=float32)}


  7%|▋         | 73690/1000000 [3:29:13<31:42:23,  8.12it/s]

{'loss': Array(0.14453448, dtype=float32), 'loss_cross_entropy': Array(0.13612647, dtype=float32)}
{'loss_inverse': Array(0.00067728, dtype=float32)}


  7%|▋         | 73700/1000000 [3:29:15<26:46:05,  9.61it/s]

{'loss': Array(0.15561491, dtype=float32), 'loss_cross_entropy': Array(0.14738184, dtype=float32)}
{'loss_inverse': Array(0.00012324, dtype=float32)}


  7%|▋         | 73710/1000000 [3:29:16<34:11:50,  7.52it/s]

{'loss': Array(0.13608655, dtype=float32), 'loss_cross_entropy': Array(0.12814133, dtype=float32)}
{'loss_inverse': Array(6.797047e-05, dtype=float32)}


  7%|▋         | 73720/1000000 [3:29:18<26:31:30,  9.70it/s]

{'loss': Array(0.14627613, dtype=float32), 'loss_cross_entropy': Array(0.13736372, dtype=float32)}
{'loss_inverse': Array(0.00015294, dtype=float32)}


  7%|▋         | 73730/1000000 [3:29:19<25:17:42, 10.17it/s]

{'loss': Array(0.14385544, dtype=float32), 'loss_cross_entropy': Array(0.13564067, dtype=float32)}
{'loss_inverse': Array(4.8935886e-05, dtype=float32)}


  7%|▋         | 73740/1000000 [3:29:21<27:55:51,  9.21it/s]

{'loss': Array(0.15585437, dtype=float32), 'loss_cross_entropy': Array(0.14674453, dtype=float32)}
{'loss_inverse': Array(0.00044847, dtype=float32)}


  7%|▋         | 73750/1000000 [3:29:22<25:30:18, 10.09it/s]

{'loss': Array(0.15836863, dtype=float32), 'loss_cross_entropy': Array(0.14945428, dtype=float32)}
{'loss_inverse': Array(0.00039903, dtype=float32)}


  7%|▋         | 73760/1000000 [3:29:24<33:46:26,  7.62it/s]

{'loss': Array(0.15488619, dtype=float32), 'loss_cross_entropy': Array(0.1457587, dtype=float32)}
{'loss_inverse': Array(0.00029655, dtype=float32)}


  7%|▋         | 73770/1000000 [3:29:25<26:55:07,  9.56it/s]

{'loss': Array(0.15247375, dtype=float32), 'loss_cross_entropy': Array(0.1439063, dtype=float32)}
{'loss_inverse': Array(0.00013562, dtype=float32)}


  7%|▋         | 73780/1000000 [3:29:27<25:14:57, 10.19it/s]

{'loss': Array(0.17636445, dtype=float32), 'loss_cross_entropy': Array(0.16632529, dtype=float32)}
{'loss_inverse': Array(0.00028376, dtype=float32)}


  7%|▋         | 73790/1000000 [3:29:29<28:25:53,  9.05it/s]

{'loss': Array(0.13452017, dtype=float32), 'loss_cross_entropy': Array(0.12673555, dtype=float32)}
{'loss_inverse': Array(2.8746575e-05, dtype=float32)}


  7%|▋         | 73800/1000000 [3:29:30<25:29:45, 10.09it/s]

{'loss': Array(0.1308103, dtype=float32), 'loss_cross_entropy': Array(0.12286794, dtype=float32)}
{'loss_inverse': Array(0.00032459, dtype=float32)}


  7%|▋         | 73810/1000000 [3:29:32<30:01:16,  8.57it/s]

{'loss': Array(0.15177189, dtype=float32), 'loss_cross_entropy': Array(0.14282022, dtype=float32)}
{'loss_inverse': Array(2.6559079e-05, dtype=float32)}


  7%|▋         | 73820/1000000 [3:29:33<26:15:27,  9.80it/s]

{'loss': Array(0.15048003, dtype=float32), 'loss_cross_entropy': Array(0.14199667, dtype=float32)}
{'loss_inverse': Array(3.1815533e-05, dtype=float32)}


  7%|▋         | 73830/1000000 [3:29:35<31:46:03,  8.10it/s]

{'loss': Array(0.16739635, dtype=float32), 'loss_cross_entropy': Array(0.15741515, dtype=float32)}
{'loss_inverse': Array(1.8793764e-05, dtype=float32)}


  7%|▋         | 73840/1000000 [3:29:36<26:19:27,  9.77it/s]

{'loss': Array(0.16167289, dtype=float32), 'loss_cross_entropy': Array(0.15261112, dtype=float32)}
{'loss_inverse': Array(0.0003758, dtype=float32)}


  7%|▋         | 73850/1000000 [3:29:38<25:17:46, 10.17it/s]

{'loss': Array(0.1448455, dtype=float32), 'loss_cross_entropy': Array(0.13630751, dtype=float32)}
{'loss_inverse': Array(0.00019901, dtype=float32)}


  7%|▋         | 73860/1000000 [3:29:40<30:15:19,  8.50it/s]

{'loss': Array(0.14592966, dtype=float32), 'loss_cross_entropy': Array(0.13684164, dtype=float32)}
{'loss_inverse': Array(0.00028563, dtype=float32)}


  7%|▋         | 73870/1000000 [3:29:41<25:46:24,  9.98it/s]

{'loss': Array(0.14600769, dtype=float32), 'loss_cross_entropy': Array(0.13739501, dtype=float32)}
{'loss_inverse': Array(8.936281e-06, dtype=float32)}


  7%|▋         | 73880/1000000 [3:29:43<34:17:03,  7.50it/s]

{'loss': Array(0.16788904, dtype=float32), 'loss_cross_entropy': Array(0.15842198, dtype=float32)}
{'loss_inverse': Array(0.00015292, dtype=float32)}


  7%|▋         | 73890/1000000 [3:29:44<26:45:48,  9.61it/s]

{'loss': Array(0.15318614, dtype=float32), 'loss_cross_entropy': Array(0.14419036, dtype=float32)}
{'loss_inverse': Array(0.00033988, dtype=float32)}


  7%|▋         | 73900/1000000 [3:29:46<38:24:43,  6.70it/s]

{'loss': Array(0.1547072, dtype=float32), 'loss_cross_entropy': Array(0.1460772, dtype=float32)}
{'loss_inverse': Array(5.9134705e-05, dtype=float32)}


  7%|▋         | 73910/1000000 [3:29:47<27:30:17,  9.35it/s]

{'loss': Array(0.1445517, dtype=float32), 'loss_cross_entropy': Array(0.13515268, dtype=float32)}
{'loss_inverse': Array(0.00017477, dtype=float32)}


  7%|▋         | 73920/1000000 [3:29:49<38:46:35,  6.63it/s]

{'loss': Array(0.13965791, dtype=float32), 'loss_cross_entropy': Array(0.13092111, dtype=float32)}
{'loss_inverse': Array(9.973835e-06, dtype=float32)}


  7%|▋         | 73930/1000000 [3:29:51<27:01:23,  9.52it/s]

{'loss': Array(0.16791493, dtype=float32), 'loss_cross_entropy': Array(0.15923314, dtype=float32)}
{'loss_inverse': Array(5.3906155e-05, dtype=float32)}


  7%|▋         | 73940/1000000 [3:29:52<25:10:34, 10.22it/s]

{'loss': Array(0.14347373, dtype=float32), 'loss_cross_entropy': Array(0.13539241, dtype=float32)}
{'loss_inverse': Array(1.0390264e-05, dtype=float32)}


  7%|▋         | 73950/1000000 [3:29:54<31:22:05,  8.20it/s]

{'loss': Array(0.15716949, dtype=float32), 'loss_cross_entropy': Array(0.14758977, dtype=float32)}
{'loss_inverse': Array(0.00012366, dtype=float32)}


  7%|▋         | 73960/1000000 [3:29:55<26:12:42,  9.81it/s]

{'loss': Array(0.12814811, dtype=float32), 'loss_cross_entropy': Array(0.1197014, dtype=float32)}
{'loss_inverse': Array(0.0001214, dtype=float32)}


  7%|▋         | 73970/1000000 [3:29:57<39:26:07,  6.52it/s]

{'loss': Array(0.1664405, dtype=float32), 'loss_cross_entropy': Array(0.15660131, dtype=float32)}
{'loss_inverse': Array(7.4424715e-06, dtype=float32)}


  7%|▋         | 73980/1000000 [3:29:58<27:50:30,  9.24it/s]

{'loss': Array(0.14756966, dtype=float32), 'loss_cross_entropy': Array(0.13917105, dtype=float32)}
{'loss_inverse': Array(9.906876e-05, dtype=float32)}


  7%|▋         | 73990/1000000 [3:30:00<38:56:36,  6.61it/s]

{'loss': Array(0.1802306, dtype=float32), 'loss_cross_entropy': Array(0.17052883, dtype=float32)}
{'loss_inverse': Array(0.00012693, dtype=float32)}


  7%|▋         | 74000/1000000 [3:30:02<28:20:35,  9.08it/s]

{'loss': Array(0.15045789, dtype=float32), 'loss_cross_entropy': Array(0.14217003, dtype=float32)}
{'loss_inverse': Array(3.208618e-05, dtype=float32)}


  7%|▋         | 74010/1000000 [3:30:10<89:39:55,  2.87it/s] 

{'loss': Array(0.13622765, dtype=float32), 'loss_cross_entropy': Array(0.1279449, dtype=float32)}
{'loss_inverse': Array(4.60505e-05, dtype=float32)}


  7%|▋         | 74020/1000000 [3:30:12<45:14:17,  5.69it/s]

{'loss': Array(0.1532989, dtype=float32), 'loss_cross_entropy': Array(0.14457332, dtype=float32)}
{'loss_inverse': Array(2.185472e-06, dtype=float32)}


  7%|▋         | 74030/1000000 [3:30:13<28:32:56,  9.01it/s]

{'loss': Array(0.129547, dtype=float32), 'loss_cross_entropy': Array(0.12192192, dtype=float32)}
{'loss_inverse': Array(2.229916e-06, dtype=float32)}


  7%|▋         | 74040/1000000 [3:30:15<26:05:51,  9.86it/s]

{'loss': Array(0.15567939, dtype=float32), 'loss_cross_entropy': Array(0.1466432, dtype=float32)}
{'loss_inverse': Array(9.549858e-06, dtype=float32)}


  7%|▋         | 74050/1000000 [3:30:16<28:40:45,  8.97it/s]

{'loss': Array(0.13833019, dtype=float32), 'loss_cross_entropy': Array(0.13056415, dtype=float32)}
{'loss_inverse': Array(9.2179025e-06, dtype=float32)}


  7%|▋         | 74060/1000000 [3:30:18<25:28:34, 10.10it/s]

{'loss': Array(0.13865341, dtype=float32), 'loss_cross_entropy': Array(0.12991397, dtype=float32)}
{'loss_inverse': Array(3.4994733e-05, dtype=float32)}


  7%|▋         | 74070/1000000 [3:30:19<29:15:08,  8.79it/s]

{'loss': Array(0.13829754, dtype=float32), 'loss_cross_entropy': Array(0.13054466, dtype=float32)}
{'loss_inverse': Array(0.00010074, dtype=float32)}


  7%|▋         | 74080/1000000 [3:30:21<25:56:24,  9.92it/s]

{'loss': Array(0.15191655, dtype=float32), 'loss_cross_entropy': Array(0.14311934, dtype=float32)}
{'loss_inverse': Array(4.874838e-05, dtype=float32)}


  7%|▋         | 74090/1000000 [3:30:23<31:58:25,  8.04it/s]

{'loss': Array(0.13647996, dtype=float32), 'loss_cross_entropy': Array(0.12852572, dtype=float32)}
{'loss_inverse': Array(0.00011619, dtype=float32)}


  7%|▋         | 74100/1000000 [3:30:24<26:03:12,  9.87it/s]

{'loss': Array(0.16178839, dtype=float32), 'loss_cross_entropy': Array(0.15296857, dtype=float32)}
{'loss_inverse': Array(1.0832768e-05, dtype=float32)}


  7%|▋         | 74110/1000000 [3:30:25<25:28:20, 10.10it/s]

{'loss': Array(0.14688486, dtype=float32), 'loss_cross_entropy': Array(0.13844171, dtype=float32)}
{'loss_inverse': Array(3.0745938e-05, dtype=float32)}


  7%|▋         | 74120/1000000 [3:30:27<28:08:49,  9.14it/s]

{'loss': Array(0.15203856, dtype=float32), 'loss_cross_entropy': Array(0.14349575, dtype=float32)}
{'loss_inverse': Array(1.9773663e-05, dtype=float32)}


  7%|▋         | 74130/1000000 [3:30:29<25:47:32,  9.97it/s]

{'loss': Array(0.12122077, dtype=float32), 'loss_cross_entropy': Array(0.11374644, dtype=float32)}
{'loss_inverse': Array(0.00011914, dtype=float32)}


  7%|▋         | 74140/1000000 [3:30:30<29:39:18,  8.67it/s]

{'loss': Array(0.1490349, dtype=float32), 'loss_cross_entropy': Array(0.14074296, dtype=float32)}
{'loss_inverse': Array(2.8893285e-06, dtype=float32)}


  7%|▋         | 74150/1000000 [3:30:32<26:28:58,  9.71it/s]

{'loss': Array(0.15045848, dtype=float32), 'loss_cross_entropy': Array(0.14186399, dtype=float32)}
{'loss_inverse': Array(6.7707406e-05, dtype=float32)}


  7%|▋         | 74160/1000000 [3:30:34<32:34:21,  7.90it/s]

{'loss': Array(0.15600964, dtype=float32), 'loss_cross_entropy': Array(0.14716874, dtype=float32)}
{'loss_inverse': Array(1.4354979e-05, dtype=float32)}


  7%|▋         | 74170/1000000 [3:30:35<26:52:33,  9.57it/s]

{'loss': Array(0.15142633, dtype=float32), 'loss_cross_entropy': Array(0.14253668, dtype=float32)}
{'loss_inverse': Array(4.7561472e-05, dtype=float32)}


  7%|▋         | 74180/1000000 [3:30:37<25:19:46, 10.15it/s]

{'loss': Array(0.14190525, dtype=float32), 'loss_cross_entropy': Array(0.1334879, dtype=float32)}
{'loss_inverse': Array(7.955232e-07, dtype=float32)}


  7%|▋         | 74190/1000000 [3:30:38<29:27:04,  8.73it/s]

{'loss': Array(0.14909811, dtype=float32), 'loss_cross_entropy': Array(0.14064378, dtype=float32)}
{'loss_inverse': Array(8.982924e-05, dtype=float32)}


  7%|▋         | 74200/1000000 [3:30:40<26:41:01,  9.64it/s]

{'loss': Array(0.14161175, dtype=float32), 'loss_cross_entropy': Array(0.13297516, dtype=float32)}
{'loss_inverse': Array(1.6235264e-05, dtype=float32)}


  7%|▋         | 74210/1000000 [3:30:42<32:32:44,  7.90it/s]

{'loss': Array(0.1662916, dtype=float32), 'loss_cross_entropy': Array(0.15709785, dtype=float32)}
{'loss_inverse': Array(2.5731342e-07, dtype=float32)}


  7%|▋         | 74220/1000000 [3:30:43<26:25:40,  9.73it/s]

{'loss': Array(0.15158507, dtype=float32), 'loss_cross_entropy': Array(0.14337586, dtype=float32)}
{'loss_inverse': Array(1.9632043e-05, dtype=float32)}


  7%|▋         | 74230/1000000 [3:30:45<34:20:05,  7.49it/s]

{'loss': Array(0.14327013, dtype=float32), 'loss_cross_entropy': Array(0.13529904, dtype=float32)}
{'loss_inverse': Array(1.6968557e-05, dtype=float32)}


  7%|▋         | 74240/1000000 [3:30:46<26:40:31,  9.64it/s]

{'loss': Array(0.14203338, dtype=float32), 'loss_cross_entropy': Array(0.13390632, dtype=float32)}
{'loss_inverse': Array(3.3447188e-05, dtype=float32)}


  7%|▋         | 74250/1000000 [3:30:48<25:24:33, 10.12it/s]

{'loss': Array(0.13034248, dtype=float32), 'loss_cross_entropy': Array(0.12272467, dtype=float32)}
{'loss_inverse': Array(5.3622836e-07, dtype=float32)}


  7%|▋         | 74260/1000000 [3:30:49<27:55:46,  9.21it/s]

{'loss': Array(0.15579455, dtype=float32), 'loss_cross_entropy': Array(0.1472663, dtype=float32)}
{'loss_inverse': Array(8.0659066e-07, dtype=float32)}


  7%|▋         | 74270/1000000 [3:30:51<25:43:45,  9.99it/s]

{'loss': Array(0.14548202, dtype=float32), 'loss_cross_entropy': Array(0.1365778, dtype=float32)}
{'loss_inverse': Array(1.2047923e-06, dtype=float32)}


  7%|▋         | 74280/1000000 [3:30:52<34:17:14,  7.50it/s]

{'loss': Array(0.16665165, dtype=float32), 'loss_cross_entropy': Array(0.15778868, dtype=float32)}
{'loss_inverse': Array(0.00013077, dtype=float32)}


  7%|▋         | 74290/1000000 [3:30:54<27:18:36,  9.42it/s]

{'loss': Array(0.14452563, dtype=float32), 'loss_cross_entropy': Array(0.13530447, dtype=float32)}
{'loss_inverse': Array(0.00031949, dtype=float32)}


  7%|▋         | 74300/1000000 [3:30:55<26:00:34,  9.89it/s]

{'loss': Array(0.14180487, dtype=float32), 'loss_cross_entropy': Array(0.13337232, dtype=float32)}
{'loss_inverse': Array(0.0001072, dtype=float32)}


  7%|▋         | 74310/1000000 [3:30:57<28:49:15,  8.92it/s]

{'loss': Array(0.15760599, dtype=float32), 'loss_cross_entropy': Array(0.14928393, dtype=float32)}
{'loss_inverse': Array(5.083501e-05, dtype=float32)}


  7%|▋         | 74320/1000000 [3:30:59<26:31:22,  9.69it/s]

{'loss': Array(0.15261516, dtype=float32), 'loss_cross_entropy': Array(0.1439806, dtype=float32)}
{'loss_inverse': Array(5.1733656e-05, dtype=float32)}


  7%|▋         | 74330/1000000 [3:31:00<30:08:14,  8.53it/s]

{'loss': Array(0.14647569, dtype=float32), 'loss_cross_entropy': Array(0.13812467, dtype=float32)}
{'loss_inverse': Array(2.7125236e-05, dtype=float32)}


  7%|▋         | 74340/1000000 [3:31:02<25:48:26,  9.96it/s]

{'loss': Array(0.14927374, dtype=float32), 'loss_cross_entropy': Array(0.14136629, dtype=float32)}
{'loss_inverse': Array(5.4147906e-05, dtype=float32)}


  7%|▋         | 74350/1000000 [3:31:03<31:17:50,  8.22it/s]

{'loss': Array(0.14382212, dtype=float32), 'loss_cross_entropy': Array(0.1355935, dtype=float32)}
{'loss_inverse': Array(2.1075048e-05, dtype=float32)}


  7%|▋         | 74360/1000000 [3:31:05<26:18:03,  9.78it/s]

{'loss': Array(0.14537202, dtype=float32), 'loss_cross_entropy': Array(0.13742678, dtype=float32)}
{'loss_inverse': Array(3.9874863e-05, dtype=float32)}


  7%|▋         | 74370/1000000 [3:31:06<26:04:34,  9.86it/s]

{'loss': Array(0.1422661, dtype=float32), 'loss_cross_entropy': Array(0.13403697, dtype=float32)}
{'loss_inverse': Array(0.0002197, dtype=float32)}


  7%|▋         | 74380/1000000 [3:31:08<30:19:37,  8.48it/s]

{'loss': Array(0.14678165, dtype=float32), 'loss_cross_entropy': Array(0.13867503, dtype=float32)}
{'loss_inverse': Array(3.7409856e-05, dtype=float32)}


  7%|▋         | 74390/1000000 [3:31:10<26:03:17,  9.87it/s]

{'loss': Array(0.15180019, dtype=float32), 'loss_cross_entropy': Array(0.14309946, dtype=float32)}
{'loss_inverse': Array(1.7369144e-06, dtype=float32)}


  7%|▋         | 74400/1000000 [3:31:11<34:31:01,  7.45it/s]

{'loss': Array(0.14647308, dtype=float32), 'loss_cross_entropy': Array(0.1381307, dtype=float32)}
{'loss_inverse': Array(0.00020102, dtype=float32)}


  7%|▋         | 74410/1000000 [3:31:13<26:36:30,  9.66it/s]

{'loss': Array(0.14372309, dtype=float32), 'loss_cross_entropy': Array(0.13624814, dtype=float32)}
{'loss_inverse': Array(1.6078128e-06, dtype=float32)}


  7%|▋         | 74420/1000000 [3:31:14<37:52:53,  6.79it/s]

{'loss': Array(0.15151392, dtype=float32), 'loss_cross_entropy': Array(0.14305909, dtype=float32)}
{'loss_inverse': Array(1.3313743e-05, dtype=float32)}


  7%|▋         | 74430/1000000 [3:31:16<27:15:07,  9.43it/s]

{'loss': Array(0.1306277, dtype=float32), 'loss_cross_entropy': Array(0.12263571, dtype=float32)}
{'loss_inverse': Array(3.287614e-07, dtype=float32)}


  7%|▋         | 74440/1000000 [3:31:18<39:09:39,  6.57it/s]

{'loss': Array(0.12802082, dtype=float32), 'loss_cross_entropy': Array(0.12024989, dtype=float32)}
{'loss_inverse': Array(4.7381823e-06, dtype=float32)}


  7%|▋         | 74450/1000000 [3:31:19<27:33:57,  9.33it/s]

{'loss': Array(0.13880669, dtype=float32), 'loss_cross_entropy': Array(0.1303724, dtype=float32)}
{'loss_inverse': Array(1.631675e-06, dtype=float32)}


  7%|▋         | 74460/1000000 [3:31:21<27:04:42,  9.49it/s]

{'loss': Array(0.15457234, dtype=float32), 'loss_cross_entropy': Array(0.14582513, dtype=float32)}
{'loss_inverse': Array(2.027419e-05, dtype=float32)}


  7%|▋         | 74470/1000000 [3:31:22<31:39:51,  8.12it/s]

{'loss': Array(0.15269797, dtype=float32), 'loss_cross_entropy': Array(0.14401029, dtype=float32)}
{'loss_inverse': Array(3.356208e-06, dtype=float32)}


  7%|▋         | 74480/1000000 [3:31:24<26:14:49,  9.79it/s]

{'loss': Array(0.14595471, dtype=float32), 'loss_cross_entropy': Array(0.13811061, dtype=float32)}
{'loss_inverse': Array(5.6471043e-07, dtype=float32)}


  7%|▋         | 74490/1000000 [3:31:25<37:56:17,  6.78it/s]

{'loss': Array(0.1522808, dtype=float32), 'loss_cross_entropy': Array(0.14341123, dtype=float32)}
{'loss_inverse': Array(3.04032e-07, dtype=float32)}


  7%|▋         | 74500/1000000 [3:31:27<27:03:50,  9.50it/s]

{'loss': Array(0.14915885, dtype=float32), 'loss_cross_entropy': Array(0.14119916, dtype=float32)}
{'loss_inverse': Array(2.6559994e-07, dtype=float32)}


  7%|▋         | 74510/1000000 [3:31:35<99:57:25,  2.57it/s] 

{'loss': Array(0.15755676, dtype=float32), 'loss_cross_entropy': Array(0.14867301, dtype=float32)}
{'loss_inverse': Array(2.2488057e-07, dtype=float32)}


  7%|▋         | 74520/1000000 [3:31:37<38:11:22,  6.73it/s] 

{'loss': Array(0.15645878, dtype=float32), 'loss_cross_entropy': Array(0.14753163, dtype=float32)}
{'loss_inverse': Array(1.9226929e-05, dtype=float32)}


  7%|▋         | 74530/1000000 [3:31:38<27:16:11,  9.43it/s]

{'loss': Array(0.14668941, dtype=float32), 'loss_cross_entropy': Array(0.13795169, dtype=float32)}
{'loss_inverse': Array(5.626462e-05, dtype=float32)}


  7%|▋         | 74540/1000000 [3:31:40<29:43:01,  8.65it/s]

{'loss': Array(0.14050038, dtype=float32), 'loss_cross_entropy': Array(0.13174872, dtype=float32)}
{'loss_inverse': Array(1.6434968e-06, dtype=float32)}


  7%|▋         | 74550/1000000 [3:31:41<26:24:03,  9.74it/s]

{'loss': Array(0.13600992, dtype=float32), 'loss_cross_entropy': Array(0.12813371, dtype=float32)}
{'loss_inverse': Array(5.0277927e-06, dtype=float32)}


  7%|▋         | 74560/1000000 [3:31:43<38:42:51,  6.64it/s]

{'loss': Array(0.1404334, dtype=float32), 'loss_cross_entropy': Array(0.13214351, dtype=float32)}
{'loss_inverse': Array(2.5745892e-06, dtype=float32)}


  7%|▋         | 74570/1000000 [3:31:45<27:41:13,  9.28it/s]

{'loss': Array(0.17027402, dtype=float32), 'loss_cross_entropy': Array(0.16097108, dtype=float32)}
{'loss_inverse': Array(1.9711572e-06, dtype=float32)}


  7%|▋         | 74580/1000000 [3:31:46<25:33:45, 10.06it/s]

{'loss': Array(0.13490012, dtype=float32), 'loss_cross_entropy': Array(0.12706657, dtype=float32)}
{'loss_inverse': Array(1.3485997e-07, dtype=float32)}


  7%|▋         | 74590/1000000 [3:31:48<30:07:52,  8.53it/s]

{'loss': Array(0.13285759, dtype=float32), 'loss_cross_entropy': Array(0.12501965, dtype=float32)}
{'loss_inverse': Array(4.719741e-07, dtype=float32)}


  7%|▋         | 74600/1000000 [3:31:49<26:05:03,  9.85it/s]

{'loss': Array(0.15236665, dtype=float32), 'loss_cross_entropy': Array(0.14353172, dtype=float32)}
{'loss_inverse': Array(2.5080635e-06, dtype=float32)}


  7%|▋         | 74610/1000000 [3:31:51<29:46:55,  8.63it/s]

{'loss': Array(0.14542817, dtype=float32), 'loss_cross_entropy': Array(0.1371636, dtype=float32)}
{'loss_inverse': Array(4.3750512e-07, dtype=float32)}


  7%|▋         | 74620/1000000 [3:31:52<25:34:45, 10.05it/s]

{'loss': Array(0.1448486, dtype=float32), 'loss_cross_entropy': Array(0.1361171, dtype=float32)}
{'loss_inverse': Array(6.2920444e-05, dtype=float32)}


  7%|▋         | 74630/1000000 [3:31:54<31:51:25,  8.07it/s]

{'loss': Array(0.14289032, dtype=float32), 'loss_cross_entropy': Array(0.13568881, dtype=float32)}
{'loss_inverse': Array(2.6778525e-06, dtype=float32)}


  7%|▋         | 74640/1000000 [3:31:56<26:51:52,  9.57it/s]

{'loss': Array(0.14970855, dtype=float32), 'loss_cross_entropy': Array(0.14090985, dtype=float32)}
{'loss_inverse': Array(6.3030275e-05, dtype=float32)}


  7%|▋         | 74650/1000000 [3:31:57<25:33:32, 10.06it/s]

{'loss': Array(0.15182981, dtype=float32), 'loss_cross_entropy': Array(0.14298107, dtype=float32)}
{'loss_inverse': Array(3.3806147e-05, dtype=float32)}


  7%|▋         | 74660/1000000 [3:31:59<28:42:46,  8.95it/s]

{'loss': Array(0.15377605, dtype=float32), 'loss_cross_entropy': Array(0.1449214, dtype=float32)}
{'loss_inverse': Array(4.539065e-05, dtype=float32)}


  7%|▋         | 74670/1000000 [3:32:00<25:47:03,  9.97it/s]

{'loss': Array(0.15431742, dtype=float32), 'loss_cross_entropy': Array(0.14551464, dtype=float32)}
{'loss_inverse': Array(1.9100913e-05, dtype=float32)}


  7%|▋         | 74680/1000000 [3:32:02<30:01:48,  8.56it/s]

{'loss': Array(0.14437161, dtype=float32), 'loss_cross_entropy': Array(0.13546601, dtype=float32)}
{'loss_inverse': Array(9.9186786e-05, dtype=float32)}


  7%|▋         | 74690/1000000 [3:32:03<26:05:38,  9.85it/s]

{'loss': Array(0.14334615, dtype=float32), 'loss_cross_entropy': Array(0.13528125, dtype=float32)}
{'loss_inverse': Array(1.1294593e-06, dtype=float32)}


  7%|▋         | 74700/1000000 [3:32:05<32:04:08,  8.01it/s]

{'loss': Array(0.16222934, dtype=float32), 'loss_cross_entropy': Array(0.15203527, dtype=float32)}
{'loss_inverse': Array(1.4104543e-07, dtype=float32)}


  7%|▋         | 74710/1000000 [3:32:07<27:07:09,  9.48it/s]

{'loss': Array(0.13733105, dtype=float32), 'loss_cross_entropy': Array(0.1292935, dtype=float32)}
{'loss_inverse': Array(0.00012753, dtype=float32)}


  7%|▋         | 74720/1000000 [3:32:08<25:24:43, 10.11it/s]

{'loss': Array(0.1425079, dtype=float32), 'loss_cross_entropy': Array(0.13475871, dtype=float32)}
{'loss_inverse': Array(2.9249575e-06, dtype=float32)}


  7%|▋         | 74730/1000000 [3:32:10<30:06:26,  8.54it/s]

{'loss': Array(0.14026323, dtype=float32), 'loss_cross_entropy': Array(0.13201326, dtype=float32)}
{'loss_inverse': Array(8.91243e-05, dtype=float32)}


  7%|▋         | 74740/1000000 [3:32:11<25:36:04, 10.04it/s]

{'loss': Array(0.14209989, dtype=float32), 'loss_cross_entropy': Array(0.1332963, dtype=float32)}
{'loss_inverse': Array(2.2178507e-05, dtype=float32)}


  7%|▋         | 74750/1000000 [3:32:13<32:01:35,  8.03it/s]

{'loss': Array(0.14010316, dtype=float32), 'loss_cross_entropy': Array(0.13175063, dtype=float32)}
{'loss_inverse': Array(8.386015e-06, dtype=float32)}


  7%|▋         | 74760/1000000 [3:32:14<26:14:15,  9.80it/s]

{'loss': Array(0.1378596, dtype=float32), 'loss_cross_entropy': Array(0.12978984, dtype=float32)}
{'loss_inverse': Array(2.1804959e-05, dtype=float32)}


  7%|▋         | 74770/1000000 [3:32:16<34:59:26,  7.35it/s]

{'loss': Array(0.15344736, dtype=float32), 'loss_cross_entropy': Array(0.14500152, dtype=float32)}
{'loss_inverse': Array(8.409569e-05, dtype=float32)}


  7%|▋         | 74780/1000000 [3:32:18<26:58:25,  9.53it/s]

{'loss': Array(0.1405342, dtype=float32), 'loss_cross_entropy': Array(0.13248733, dtype=float32)}
{'loss_inverse': Array(8.8882933e-07, dtype=float32)}


  7%|▋         | 74790/1000000 [3:32:19<24:52:46, 10.33it/s]

{'loss': Array(0.15232502, dtype=float32), 'loss_cross_entropy': Array(0.14383882, dtype=float32)}
{'loss_inverse': Array(9.796467e-06, dtype=float32)}


  7%|▋         | 74800/1000000 [3:32:21<27:57:56,  9.19it/s]

{'loss': Array(0.14623116, dtype=float32), 'loss_cross_entropy': Array(0.13759778, dtype=float32)}
{'loss_inverse': Array(4.2140775e-05, dtype=float32)}


  7%|▋         | 74810/1000000 [3:32:22<25:11:15, 10.20it/s]

{'loss': Array(0.13918537, dtype=float32), 'loss_cross_entropy': Array(0.13070099, dtype=float32)}
{'loss_inverse': Array(4.4772456e-05, dtype=float32)}


  7%|▋         | 74820/1000000 [3:32:24<33:53:51,  7.58it/s]

{'loss': Array(0.14208029, dtype=float32), 'loss_cross_entropy': Array(0.13446258, dtype=float32)}
{'loss_inverse': Array(0.00024035, dtype=float32)}


  7%|▋         | 74830/1000000 [3:32:25<26:27:07,  9.72it/s]

{'loss': Array(0.14568074, dtype=float32), 'loss_cross_entropy': Array(0.13716625, dtype=float32)}
{'loss_inverse': Array(5.140769e-06, dtype=float32)}


  7%|▋         | 74840/1000000 [3:32:27<25:16:36, 10.17it/s]

{'loss': Array(0.1537177, dtype=float32), 'loss_cross_entropy': Array(0.14498366, dtype=float32)}
{'loss_inverse': Array(3.3594242e-07, dtype=float32)}


  7%|▋         | 74850/1000000 [3:32:28<28:00:34,  9.17it/s]

{'loss': Array(0.14964432, dtype=float32), 'loss_cross_entropy': Array(0.1408963, dtype=float32)}
{'loss_inverse': Array(3.546076e-05, dtype=float32)}


  7%|▋         | 74860/1000000 [3:32:30<26:08:29,  9.83it/s]

{'loss': Array(0.15999547, dtype=float32), 'loss_cross_entropy': Array(0.15039928, dtype=float32)}
{'loss_inverse': Array(1.0426441e-05, dtype=float32)}


  7%|▋         | 74870/1000000 [3:32:32<29:44:37,  8.64it/s]

{'loss': Array(0.13598257, dtype=float32), 'loss_cross_entropy': Array(0.12777346, dtype=float32)}
{'loss_inverse': Array(5.324479e-07, dtype=float32)}


  7%|▋         | 74880/1000000 [3:32:33<26:14:48,  9.79it/s]

{'loss': Array(0.1363328, dtype=float32), 'loss_cross_entropy': Array(0.1282461, dtype=float32)}
{'loss_inverse': Array(4.7982403e-06, dtype=float32)}


  7%|▋         | 74890/1000000 [3:32:35<31:39:55,  8.12it/s]

{'loss': Array(0.14881608, dtype=float32), 'loss_cross_entropy': Array(0.14076708, dtype=float32)}
{'loss_inverse': Array(8.517281e-05, dtype=float32)}


  7%|▋         | 74900/1000000 [3:32:36<26:10:13,  9.82it/s]

{'loss': Array(0.15847111, dtype=float32), 'loss_cross_entropy': Array(0.1489604, dtype=float32)}
{'loss_inverse': Array(2.5134166e-06, dtype=float32)}


  7%|▋         | 74910/1000000 [3:32:38<25:35:11, 10.04it/s]

{'loss': Array(0.17885858, dtype=float32), 'loss_cross_entropy': Array(0.16961448, dtype=float32)}
{'loss_inverse': Array(1.1480675e-05, dtype=float32)}


  7%|▋         | 74920/1000000 [3:32:39<29:19:37,  8.76it/s]

{'loss': Array(0.14376998, dtype=float32), 'loss_cross_entropy': Array(0.13579367, dtype=float32)}
{'loss_inverse': Array(6.7854307e-06, dtype=float32)}


  7%|▋         | 74930/1000000 [3:32:41<26:01:09,  9.88it/s]

{'loss': Array(0.16371581, dtype=float32), 'loss_cross_entropy': Array(0.15402208, dtype=float32)}
{'loss_inverse': Array(1.5249987e-05, dtype=float32)}


  7%|▋         | 74940/1000000 [3:32:43<35:18:54,  7.28it/s]

{'loss': Array(0.17004287, dtype=float32), 'loss_cross_entropy': Array(0.16087751, dtype=float32)}
{'loss_inverse': Array(2.7557292e-06, dtype=float32)}


  7%|▋         | 74950/1000000 [3:32:44<26:33:47,  9.67it/s]

{'loss': Array(0.16256279, dtype=float32), 'loss_cross_entropy': Array(0.15317445, dtype=float32)}
{'loss_inverse': Array(6.983953e-06, dtype=float32)}


  7%|▋         | 74960/1000000 [3:32:46<38:03:03,  6.75it/s]

{'loss': Array(0.13734205, dtype=float32), 'loss_cross_entropy': Array(0.12946406, dtype=float32)}
{'loss_inverse': Array(2.4361352e-06, dtype=float32)}


  7%|▋         | 74970/1000000 [3:32:47<27:20:51,  9.40it/s]

{'loss': Array(0.1402051, dtype=float32), 'loss_cross_entropy': Array(0.13179675, dtype=float32)}
{'loss_inverse': Array(4.4874774e-05, dtype=float32)}


  7%|▋         | 74980/1000000 [3:32:49<38:33:18,  6.66it/s]

{'loss': Array(0.16120249, dtype=float32), 'loss_cross_entropy': Array(0.1519637, dtype=float32)}
{'loss_inverse': Array(9.150567e-07, dtype=float32)}


  7%|▋         | 74990/1000000 [3:32:50<27:10:54,  9.45it/s]

{'loss': Array(0.14896783, dtype=float32), 'loss_cross_entropy': Array(0.1402313, dtype=float32)}
{'loss_inverse': Array(2.9912087e-06, dtype=float32)}


  8%|▊         | 75000/1000000 [3:32:52<25:36:23, 10.03it/s]

{'loss': Array(0.11054157, dtype=float32), 'loss_cross_entropy': Array(0.10346569, dtype=float32)}
{'loss_inverse': Array(4.887723e-06, dtype=float32)}


  8%|▊         | 75010/1000000 [3:33:00<95:16:35,  2.70it/s] 

{'loss': Array(0.14752643, dtype=float32), 'loss_cross_entropy': Array(0.13913743, dtype=float32)}
{'loss_inverse': Array(6.009455e-06, dtype=float32)}


  8%|▊         | 75020/1000000 [3:33:02<37:23:47,  6.87it/s] 

{'loss': Array(0.1451217, dtype=float32), 'loss_cross_entropy': Array(0.13688831, dtype=float32)}
{'loss_inverse': Array(6.080935e-07, dtype=float32)}


  8%|▊         | 75030/1000000 [3:33:03<27:25:53,  9.37it/s]

{'loss': Array(0.1337963, dtype=float32), 'loss_cross_entropy': Array(0.12618911, dtype=float32)}
{'loss_inverse': Array(3.941723e-06, dtype=float32)}


  8%|▊         | 75040/1000000 [3:33:05<28:56:43,  8.88it/s]

{'loss': Array(0.14961003, dtype=float32), 'loss_cross_entropy': Array(0.14063072, dtype=float32)}
{'loss_inverse': Array(4.750892e-07, dtype=float32)}


  8%|▊         | 75050/1000000 [3:33:06<26:14:59,  9.79it/s]

{'loss': Array(0.14994265, dtype=float32), 'loss_cross_entropy': Array(0.14157152, dtype=float32)}
{'loss_inverse': Array(4.0659517e-05, dtype=float32)}


  8%|▊         | 75060/1000000 [3:33:08<30:38:04,  8.39it/s]

{'loss': Array(0.13181491, dtype=float32), 'loss_cross_entropy': Array(0.12345614, dtype=float32)}
{'loss_inverse': Array(1.9565508e-07, dtype=float32)}


  8%|▊         | 75070/1000000 [3:33:09<25:39:21, 10.01it/s]

{'loss': Array(0.13421193, dtype=float32), 'loss_cross_entropy': Array(0.12647794, dtype=float32)}
{'loss_inverse': Array(3.5966052e-06, dtype=float32)}


  8%|▊         | 75080/1000000 [3:33:11<29:28:41,  8.72it/s]

{'loss': Array(0.13239451, dtype=float32), 'loss_cross_entropy': Array(0.12483032, dtype=float32)}
{'loss_inverse': Array(7.5517363e-07, dtype=float32)}


  8%|▊         | 75090/1000000 [3:33:13<26:23:45,  9.73it/s]

{'loss': Array(0.14478932, dtype=float32), 'loss_cross_entropy': Array(0.13639209, dtype=float32)}
{'loss_inverse': Array(3.5563316e-06, dtype=float32)}


  8%|▊         | 75100/1000000 [3:33:14<29:53:29,  8.59it/s]

{'loss': Array(0.1442327, dtype=float32), 'loss_cross_entropy': Array(0.1360099, dtype=float32)}
{'loss_inverse': Array(1.4373853e-06, dtype=float32)}


  8%|▊         | 75110/1000000 [3:33:16<26:04:09,  9.85it/s]

{'loss': Array(0.14505339, dtype=float32), 'loss_cross_entropy': Array(0.13605173, dtype=float32)}
{'loss_inverse': Array(7.303458e-07, dtype=float32)}


  8%|▊         | 75120/1000000 [3:33:18<34:14:18,  7.50it/s]

{'loss': Array(0.1523402, dtype=float32), 'loss_cross_entropy': Array(0.14312379, dtype=float32)}
{'loss_inverse': Array(3.0627973e-06, dtype=float32)}


  8%|▊         | 75130/1000000 [3:33:19<26:30:42,  9.69it/s]

{'loss': Array(0.14190048, dtype=float32), 'loss_cross_entropy': Array(0.13370842, dtype=float32)}
{'loss_inverse': Array(3.2437475e-07, dtype=float32)}


  8%|▊         | 75140/1000000 [3:33:21<38:40:40,  6.64it/s]

{'loss': Array(0.1590659, dtype=float32), 'loss_cross_entropy': Array(0.15021335, dtype=float32)}
{'loss_inverse': Array(8.2458754e-08, dtype=float32)}


  8%|▊         | 75150/1000000 [3:33:22<27:14:05,  9.43it/s]

{'loss': Array(0.13175906, dtype=float32), 'loss_cross_entropy': Array(0.12400985, dtype=float32)}
{'loss_inverse': Array(1.1847308e-06, dtype=float32)}


  8%|▊         | 75160/1000000 [3:33:24<34:23:20,  7.47it/s]

{'loss': Array(0.15553908, dtype=float32), 'loss_cross_entropy': Array(0.14656956, dtype=float32)}
{'loss_inverse': Array(2.7651884e-06, dtype=float32)}


  8%|▊         | 75170/1000000 [3:33:25<27:12:16,  9.44it/s]

{'loss': Array(0.13891168, dtype=float32), 'loss_cross_entropy': Array(0.13031988, dtype=float32)}
{'loss_inverse': Array(9.648261e-07, dtype=float32)}


  8%|▊         | 75180/1000000 [3:33:27<25:08:58, 10.21it/s]

{'loss': Array(0.13617675, dtype=float32), 'loss_cross_entropy': Array(0.1278531, dtype=float32)}
{'loss_inverse': Array(1.7098033e-06, dtype=float32)}


  8%|▊         | 75190/1000000 [3:33:29<30:05:51,  8.54it/s]

{'loss': Array(0.13266607, dtype=float32), 'loss_cross_entropy': Array(0.12454025, dtype=float32)}
{'loss_inverse': Array(8.241192e-08, dtype=float32)}


  8%|▊         | 75200/1000000 [3:33:30<26:11:35,  9.81it/s]

{'loss': Array(0.15537626, dtype=float32), 'loss_cross_entropy': Array(0.14689672, dtype=float32)}
{'loss_inverse': Array(4.810561e-07, dtype=float32)}


  8%|▊         | 75210/1000000 [3:33:32<31:23:45,  8.18it/s]

{'loss': Array(0.13877176, dtype=float32), 'loss_cross_entropy': Array(0.13047731, dtype=float32)}
{'loss_inverse': Array(1.03852926e-07, dtype=float32)}


  8%|▊         | 75220/1000000 [3:33:33<26:25:32,  9.72it/s]

{'loss': Array(0.15137565, dtype=float32), 'loss_cross_entropy': Array(0.14306395, dtype=float32)}
{'loss_inverse': Array(1.093326e-06, dtype=float32)}


  8%|▊         | 75230/1000000 [3:33:35<34:28:42,  7.45it/s]

{'loss': Array(0.15859005, dtype=float32), 'loss_cross_entropy': Array(0.14920442, dtype=float32)}
{'loss_inverse': Array(1.3105132e-07, dtype=float32)}


  8%|▊         | 75240/1000000 [3:33:36<26:56:24,  9.54it/s]

{'loss': Array(0.14134295, dtype=float32), 'loss_cross_entropy': Array(0.1336037, dtype=float32)}
{'loss_inverse': Array(4.3821374e-06, dtype=float32)}


  8%|▊         | 75250/1000000 [3:33:38<34:54:20,  7.36it/s]

{'loss': Array(0.14694022, dtype=float32), 'loss_cross_entropy': Array(0.13825904, dtype=float32)}
{'loss_inverse': Array(6.6076615e-07, dtype=float32)}


  8%|▊         | 75260/1000000 [3:33:40<26:48:34,  9.58it/s]

{'loss': Array(0.15943556, dtype=float32), 'loss_cross_entropy': Array(0.15061139, dtype=float32)}
{'loss_inverse': Array(6.5919203e-06, dtype=float32)}


  8%|▊         | 75270/1000000 [3:33:41<25:09:06, 10.21it/s]

{'loss': Array(0.14387351, dtype=float32), 'loss_cross_entropy': Array(0.13560092, dtype=float32)}
{'loss_inverse': Array(1.4185906e-07, dtype=float32)}


  8%|▊         | 75280/1000000 [3:33:43<31:16:32,  8.21it/s]

{'loss': Array(0.1447308, dtype=float32), 'loss_cross_entropy': Array(0.13653664, dtype=float32)}
{'loss_inverse': Array(1.0985511e-06, dtype=float32)}


  8%|▊         | 75290/1000000 [3:33:44<26:21:22,  9.75it/s]

{'loss': Array(0.14383356, dtype=float32), 'loss_cross_entropy': Array(0.13571475, dtype=float32)}
{'loss_inverse': Array(2.4680548e-07, dtype=float32)}


  8%|▊         | 75300/1000000 [3:33:46<38:07:09,  6.74it/s]

{'loss': Array(0.14965144, dtype=float32), 'loss_cross_entropy': Array(0.14144622, dtype=float32)}
{'loss_inverse': Array(6.7975645e-07, dtype=float32)}


  8%|▊         | 75310/1000000 [3:33:47<27:17:29,  9.41it/s]

{'loss': Array(0.1483362, dtype=float32), 'loss_cross_entropy': Array(0.14000171, dtype=float32)}
{'loss_inverse': Array(1.07957e-07, dtype=float32)}


  8%|▊         | 75320/1000000 [3:33:49<26:29:00,  9.70it/s]

{'loss': Array(0.16953997, dtype=float32), 'loss_cross_entropy': Array(0.16035818, dtype=float32)}
{'loss_inverse': Array(8.1549366e-08, dtype=float32)}


  8%|▊         | 75330/1000000 [3:33:50<28:55:25,  8.88it/s]

{'loss': Array(0.14162035, dtype=float32), 'loss_cross_entropy': Array(0.13353078, dtype=float32)}
{'loss_inverse': Array(1.2583432e-07, dtype=float32)}


  8%|▊         | 75340/1000000 [3:33:52<26:10:15,  9.81it/s]

{'loss': Array(0.13792254, dtype=float32), 'loss_cross_entropy': Array(0.12999438, dtype=float32)}
{'loss_inverse': Array(1.4776519e-07, dtype=float32)}


  8%|▊         | 75350/1000000 [3:33:54<28:12:56,  9.10it/s]

{'loss': Array(0.15281755, dtype=float32), 'loss_cross_entropy': Array(0.144495, dtype=float32)}
{'loss_inverse': Array(5.496106e-06, dtype=float32)}


  8%|▊         | 75360/1000000 [3:33:55<25:29:59, 10.07it/s]

{'loss': Array(0.14245969, dtype=float32), 'loss_cross_entropy': Array(0.13415693, dtype=float32)}
{'loss_inverse': Array(7.6884184e-08, dtype=float32)}


  8%|▊         | 75370/1000000 [3:33:57<32:13:34,  7.97it/s]

{'loss': Array(0.14086969, dtype=float32), 'loss_cross_entropy': Array(0.13225004, dtype=float32)}
{'loss_inverse': Array(1.9487338e-07, dtype=float32)}


  8%|▊         | 75380/1000000 [3:33:58<26:19:51,  9.75it/s]

{'loss': Array(0.15347135, dtype=float32), 'loss_cross_entropy': Array(0.14454977, dtype=float32)}
{'loss_inverse': Array(2.938997e-06, dtype=float32)}


  8%|▊         | 75390/1000000 [3:34:00<31:30:30,  8.15it/s]

{'loss': Array(0.15100366, dtype=float32), 'loss_cross_entropy': Array(0.14230882, dtype=float32)}
{'loss_inverse': Array(8.478851e-07, dtype=float32)}


  8%|▊         | 75400/1000000 [3:34:01<26:18:34,  9.76it/s]

{'loss': Array(0.15041693, dtype=float32), 'loss_cross_entropy': Array(0.14168608, dtype=float32)}
{'loss_inverse': Array(1.1121729e-07, dtype=float32)}


  8%|▊         | 75410/1000000 [3:34:03<38:16:58,  6.71it/s]

{'loss': Array(0.13899733, dtype=float32), 'loss_cross_entropy': Array(0.1309259, dtype=float32)}
{'loss_inverse': Array(1.4305947e-06, dtype=float32)}


  8%|▊         | 75420/1000000 [3:34:05<26:52:54,  9.55it/s]

{'loss': Array(0.13123077, dtype=float32), 'loss_cross_entropy': Array(0.12359202, dtype=float32)}
{'loss_inverse': Array(1.1409889e-07, dtype=float32)}


  8%|▊         | 75430/1000000 [3:34:06<25:21:15, 10.13it/s]

{'loss': Array(0.15494314, dtype=float32), 'loss_cross_entropy': Array(0.14673255, dtype=float32)}
{'loss_inverse': Array(3.754085e-07, dtype=float32)}


  8%|▊         | 75440/1000000 [3:34:08<29:05:14,  8.83it/s]

{'loss': Array(0.14401643, dtype=float32), 'loss_cross_entropy': Array(0.13584594, dtype=float32)}
{'loss_inverse': Array(1.8680002e-08, dtype=float32)}


  8%|▊         | 75450/1000000 [3:34:09<25:34:49, 10.04it/s]

{'loss': Array(0.14496358, dtype=float32), 'loss_cross_entropy': Array(0.13643077, dtype=float32)}
{'loss_inverse': Array(2.0990619e-07, dtype=float32)}


  8%|▊         | 75460/1000000 [3:34:11<31:23:43,  8.18it/s]

{'loss': Array(0.14918901, dtype=float32), 'loss_cross_entropy': Array(0.1404313, dtype=float32)}
{'loss_inverse': Array(5.0582963e-07, dtype=float32)}


  8%|▊         | 75470/1000000 [3:34:12<26:18:36,  9.76it/s]

{'loss': Array(0.15786085, dtype=float32), 'loss_cross_entropy': Array(0.14900063, dtype=float32)}
{'loss_inverse': Array(1.2240359e-07, dtype=float32)}


  8%|▊         | 75480/1000000 [3:34:14<26:17:34,  9.77it/s]

{'loss': Array(0.1522971, dtype=float32), 'loss_cross_entropy': Array(0.14393122, dtype=float32)}
{'loss_inverse': Array(4.956868e-07, dtype=float32)}


  8%|▊         | 75490/1000000 [3:34:16<29:16:59,  8.77it/s]

{'loss': Array(0.13514704, dtype=float32), 'loss_cross_entropy': Array(0.1269631, dtype=float32)}
{'loss_inverse': Array(4.602493e-08, dtype=float32)}


  8%|▊         | 75500/1000000 [3:34:17<25:30:42, 10.07it/s]

{'loss': Array(0.13564871, dtype=float32), 'loss_cross_entropy': Array(0.12762883, dtype=float32)}
{'loss_inverse': Array(3.8165656e-08, dtype=float32)}


  8%|▊         | 75510/1000000 [3:34:25<93:48:15,  2.74it/s] 

{'loss': Array(0.1551837, dtype=float32), 'loss_cross_entropy': Array(0.14586091, dtype=float32)}
{'loss_inverse': Array(3.406106e-05, dtype=float32)}


  8%|▊         | 75520/1000000 [3:34:27<37:37:19,  6.83it/s] 

{'loss': Array(0.15750562, dtype=float32), 'loss_cross_entropy': Array(0.14829086, dtype=float32)}
{'loss_inverse': Array(1.5318712e-05, dtype=float32)}


  8%|▊         | 75530/1000000 [3:34:28<39:26:04,  6.51it/s]

{'loss': Array(0.13341253, dtype=float32), 'loss_cross_entropy': Array(0.12557243, dtype=float32)}
{'loss_inverse': Array(4.2087722e-05, dtype=float32)}


  8%|▊         | 75540/1000000 [3:34:30<27:54:18,  9.20it/s]

{'loss': Array(0.15482377, dtype=float32), 'loss_cross_entropy': Array(0.14583406, dtype=float32)}
{'loss_inverse': Array(5.922599e-06, dtype=float32)}


  8%|▊         | 75550/1000000 [3:34:31<25:41:02, 10.00it/s]

{'loss': Array(0.12158155, dtype=float32), 'loss_cross_entropy': Array(0.11364185, dtype=float32)}
{'loss_inverse': Array(2.1672518e-06, dtype=float32)}


  8%|▊         | 75560/1000000 [3:34:33<28:30:01,  9.01it/s]

{'loss': Array(0.13941391, dtype=float32), 'loss_cross_entropy': Array(0.13140173, dtype=float32)}
{'loss_inverse': Array(4.1023554e-06, dtype=float32)}


  8%|▊         | 75570/1000000 [3:34:35<37:56:59,  6.77it/s]

{'loss': Array(0.15040213, dtype=float32), 'loss_cross_entropy': Array(0.14173543, dtype=float32)}
{'loss_inverse': Array(0.00019352, dtype=float32)}


  8%|▊         | 75580/1000000 [3:34:36<26:57:11,  9.53it/s]

{'loss': Array(0.15292326, dtype=float32), 'loss_cross_entropy': Array(0.14402157, dtype=float32)}
{'loss_inverse': Array(6.3195957e-06, dtype=float32)}


  8%|▊         | 75590/1000000 [3:34:37<25:24:13, 10.11it/s]

{'loss': Array(0.13068263, dtype=float32), 'loss_cross_entropy': Array(0.12321818, dtype=float32)}
{'loss_inverse': Array(4.8339392e-05, dtype=float32)}


  8%|▊         | 75600/1000000 [3:34:39<28:26:18,  9.03it/s]

{'loss': Array(0.14924066, dtype=float32), 'loss_cross_entropy': Array(0.14057633, dtype=float32)}
{'loss_inverse': Array(1.1428746e-06, dtype=float32)}


  8%|▊         | 75610/1000000 [3:34:41<26:03:36,  9.85it/s]

{'loss': Array(0.13322966, dtype=float32), 'loss_cross_entropy': Array(0.1254103, dtype=float32)}
{'loss_inverse': Array(4.1750063e-05, dtype=float32)}


  8%|▊         | 75620/1000000 [3:34:42<29:49:29,  8.61it/s]

{'loss': Array(0.13254602, dtype=float32), 'loss_cross_entropy': Array(0.12475494, dtype=float32)}
{'loss_inverse': Array(6.3131294e-05, dtype=float32)}


  8%|▊         | 75630/1000000 [3:34:44<26:32:02,  9.68it/s]

{'loss': Array(0.14881766, dtype=float32), 'loss_cross_entropy': Array(0.13990127, dtype=float32)}
{'loss_inverse': Array(0.00013497, dtype=float32)}


  8%|▊         | 75640/1000000 [3:34:46<32:14:16,  7.96it/s]

{'loss': Array(0.1569631, dtype=float32), 'loss_cross_entropy': Array(0.14857009, dtype=float32)}
{'loss_inverse': Array(4.7525145e-06, dtype=float32)}


  8%|▊         | 75650/1000000 [3:34:47<26:43:14,  9.61it/s]

{'loss': Array(0.14681144, dtype=float32), 'loss_cross_entropy': Array(0.13876164, dtype=float32)}
{'loss_inverse': Array(3.0361798e-05, dtype=float32)}


  8%|▊         | 75660/1000000 [3:34:49<25:15:46, 10.16it/s]

{'loss': Array(0.1574134, dtype=float32), 'loss_cross_entropy': Array(0.14866158, dtype=float32)}
{'loss_inverse': Array(0.00014672, dtype=float32)}


  8%|▊         | 75670/1000000 [3:34:50<30:03:39,  8.54it/s]

{'loss': Array(0.14556746, dtype=float32), 'loss_cross_entropy': Array(0.13675286, dtype=float32)}
{'loss_inverse': Array(2.0059013e-06, dtype=float32)}


  8%|▊         | 75680/1000000 [3:34:52<26:34:54,  9.66it/s]

{'loss': Array(0.16052862, dtype=float32), 'loss_cross_entropy': Array(0.1519829, dtype=float32)}
{'loss_inverse': Array(7.970499e-06, dtype=float32)}


  8%|▊         | 75690/1000000 [3:34:54<31:21:00,  8.19it/s]

{'loss': Array(0.1303638, dtype=float32), 'loss_cross_entropy': Array(0.12251379, dtype=float32)}
{'loss_inverse': Array(8.079838e-06, dtype=float32)}


  8%|▊         | 75700/1000000 [3:34:55<25:53:53,  9.91it/s]

{'loss': Array(0.15126322, dtype=float32), 'loss_cross_entropy': Array(0.14290398, dtype=float32)}
{'loss_inverse': Array(4.954408e-07, dtype=float32)}


  8%|▊         | 75710/1000000 [3:34:57<34:04:16,  7.54it/s]

{'loss': Array(0.14731394, dtype=float32), 'loss_cross_entropy': Array(0.13841473, dtype=float32)}
{'loss_inverse': Array(7.033377e-06, dtype=float32)}


  8%|▊         | 75720/1000000 [3:34:58<26:07:31,  9.83it/s]

{'loss': Array(0.1433638, dtype=float32), 'loss_cross_entropy': Array(0.13527977, dtype=float32)}
{'loss_inverse': Array(1.9931378e-05, dtype=float32)}


  8%|▊         | 75730/1000000 [3:34:59<25:26:06, 10.09it/s]

{'loss': Array(0.1517751, dtype=float32), 'loss_cross_entropy': Array(0.14251764, dtype=float32)}
{'loss_inverse': Array(1.5987763e-07, dtype=float32)}


  8%|▊         | 75740/1000000 [3:35:01<27:50:31,  9.22it/s]

{'loss': Array(0.13891657, dtype=float32), 'loss_cross_entropy': Array(0.13012333, dtype=float32)}
{'loss_inverse': Array(4.500223e-06, dtype=float32)}


  8%|▊         | 75750/1000000 [3:35:03<25:54:26,  9.91it/s]

{'loss': Array(0.12969778, dtype=float32), 'loss_cross_entropy': Array(0.12178292, dtype=float32)}
{'loss_inverse': Array(7.697135e-05, dtype=float32)}


  8%|▊         | 75760/1000000 [3:35:04<33:40:50,  7.62it/s]

{'loss': Array(0.14420538, dtype=float32), 'loss_cross_entropy': Array(0.13601287, dtype=float32)}
{'loss_inverse': Array(2.582326e-05, dtype=float32)}


  8%|▊         | 75770/1000000 [3:35:06<26:35:56,  9.65it/s]

{'loss': Array(0.15191634, dtype=float32), 'loss_cross_entropy': Array(0.14324787, dtype=float32)}
{'loss_inverse': Array(9.426005e-08, dtype=float32)}


  8%|▊         | 75780/1000000 [3:35:07<26:15:02,  9.78it/s]

{'loss': Array(0.1429417, dtype=float32), 'loss_cross_entropy': Array(0.13451631, dtype=float32)}
{'loss_inverse': Array(6.0798917e-05, dtype=float32)}


  8%|▊         | 75790/1000000 [3:35:09<29:04:21,  8.83it/s]

{'loss': Array(0.14236169, dtype=float32), 'loss_cross_entropy': Array(0.13446392, dtype=float32)}
{'loss_inverse': Array(3.493677e-06, dtype=float32)}


  8%|▊         | 75800/1000000 [3:35:10<25:24:45, 10.10it/s]

{'loss': Array(0.14532135, dtype=float32), 'loss_cross_entropy': Array(0.13643676, dtype=float32)}
{'loss_inverse': Array(3.4930417e-06, dtype=float32)}


  8%|▊         | 75810/1000000 [3:35:12<29:10:59,  8.80it/s]

{'loss': Array(0.13604324, dtype=float32), 'loss_cross_entropy': Array(0.12817752, dtype=float32)}
{'loss_inverse': Array(2.0222342e-06, dtype=float32)}


  8%|▊         | 75820/1000000 [3:35:14<25:39:26, 10.01it/s]

{'loss': Array(0.13888328, dtype=float32), 'loss_cross_entropy': Array(0.1309646, dtype=float32)}
{'loss_inverse': Array(2.8613065e-06, dtype=float32)}


  8%|▊         | 75830/1000000 [3:35:15<31:30:30,  8.15it/s]

{'loss': Array(0.17235623, dtype=float32), 'loss_cross_entropy': Array(0.16264941, dtype=float32)}
{'loss_inverse': Array(1.04921014e-07, dtype=float32)}


  8%|▊         | 75840/1000000 [3:35:17<26:16:35,  9.77it/s]

{'loss': Array(0.12818764, dtype=float32), 'loss_cross_entropy': Array(0.12036073, dtype=float32)}
{'loss_inverse': Array(5.8358164e-06, dtype=float32)}


  8%|▊         | 75850/1000000 [3:35:18<25:41:29,  9.99it/s]

{'loss': Array(0.14474143, dtype=float32), 'loss_cross_entropy': Array(0.1359439, dtype=float32)}
{'loss_inverse': Array(5.3648773e-07, dtype=float32)}


  8%|▊         | 75860/1000000 [3:35:20<29:49:05,  8.61it/s]

{'loss': Array(0.1666352, dtype=float32), 'loss_cross_entropy': Array(0.15703987, dtype=float32)}
{'loss_inverse': Array(7.289268e-05, dtype=float32)}


  8%|▊         | 75870/1000000 [3:35:21<25:32:52, 10.05it/s]

{'loss': Array(0.1491768, dtype=float32), 'loss_cross_entropy': Array(0.1402657, dtype=float32)}
{'loss_inverse': Array(1.2119124e-05, dtype=float32)}


  8%|▊         | 75880/1000000 [3:35:23<33:45:47,  7.60it/s]

{'loss': Array(0.1354265, dtype=float32), 'loss_cross_entropy': Array(0.12797375, dtype=float32)}
{'loss_inverse': Array(2.9524681e-05, dtype=float32)}


  8%|▊         | 75890/1000000 [3:35:25<26:20:02,  9.75it/s]

{'loss': Array(0.13587283, dtype=float32), 'loss_cross_entropy': Array(0.12794517, dtype=float32)}
{'loss_inverse': Array(1.0140077e-06, dtype=float32)}


  8%|▊         | 75900/1000000 [3:35:26<37:57:26,  6.76it/s]

{'loss': Array(0.15829046, dtype=float32), 'loss_cross_entropy': Array(0.14863393, dtype=float32)}
{'loss_inverse': Array(1.5999232e-05, dtype=float32)}


  8%|▊         | 75910/1000000 [3:35:28<27:08:46,  9.46it/s]

{'loss': Array(0.1339151, dtype=float32), 'loss_cross_entropy': Array(0.1260001, dtype=float32)}
{'loss_inverse': Array(3.4977754e-07, dtype=float32)}


  8%|▊         | 75920/1000000 [3:35:29<38:38:39,  6.64it/s]

{'loss': Array(0.1483116, dtype=float32), 'loss_cross_entropy': Array(0.13947402, dtype=float32)}
{'loss_inverse': Array(5.061516e-05, dtype=float32)}


  8%|▊         | 75930/1000000 [3:35:31<27:46:37,  9.24it/s]

{'loss': Array(0.15199575, dtype=float32), 'loss_cross_entropy': Array(0.14346395, dtype=float32)}
{'loss_inverse': Array(4.709031e-05, dtype=float32)}


  8%|▊         | 75940/1000000 [3:35:32<25:55:57,  9.90it/s]

{'loss': Array(0.14855781, dtype=float32), 'loss_cross_entropy': Array(0.14010744, dtype=float32)}
{'loss_inverse': Array(0.00022966, dtype=float32)}


  8%|▊         | 75950/1000000 [3:35:34<31:39:55,  8.11it/s]

{'loss': Array(0.14796387, dtype=float32), 'loss_cross_entropy': Array(0.13947761, dtype=float32)}
{'loss_inverse': Array(3.1635512e-08, dtype=float32)}


  8%|▊         | 75960/1000000 [3:35:36<26:15:10,  9.78it/s]

{'loss': Array(0.13251255, dtype=float32), 'loss_cross_entropy': Array(0.12486609, dtype=float32)}
{'loss_inverse': Array(0.00016375, dtype=float32)}


  8%|▊         | 75970/1000000 [3:35:37<37:38:05,  6.82it/s]

{'loss': Array(0.14339364, dtype=float32), 'loss_cross_entropy': Array(0.13503984, dtype=float32)}
{'loss_inverse': Array(9.7839125e-05, dtype=float32)}


  8%|▊         | 75980/1000000 [3:35:39<27:39:25,  9.28it/s]

{'loss': Array(0.13087343, dtype=float32), 'loss_cross_entropy': Array(0.12310085, dtype=float32)}
{'loss_inverse': Array(0.0001213, dtype=float32)}


  8%|▊         | 75990/1000000 [3:35:40<37:59:10,  6.76it/s]

{'loss': Array(0.14434612, dtype=float32), 'loss_cross_entropy': Array(0.13575172, dtype=float32)}
{'loss_inverse': Array(0.00026822, dtype=float32)}


  8%|▊         | 76000/1000000 [3:35:42<27:38:17,  9.29it/s]

{'loss': Array(0.14831366, dtype=float32), 'loss_cross_entropy': Array(0.1400659, dtype=float32)}
{'loss_inverse': Array(0.00026964, dtype=float32)}


  8%|▊         | 76010/1000000 [3:35:50<85:59:03,  2.99it/s] 

{'loss': Array(0.1273021, dtype=float32), 'loss_cross_entropy': Array(0.12072398, dtype=float32)}
{'loss_inverse': Array(0.00037373, dtype=float32)}


  8%|▊         | 76020/1000000 [3:35:52<44:22:04,  5.78it/s]

{'loss': Array(0.16291416, dtype=float32), 'loss_cross_entropy': Array(0.15362087, dtype=float32)}
{'loss_inverse': Array(0.00050388, dtype=float32)}


  8%|▊         | 76030/1000000 [3:35:53<28:20:11,  9.06it/s]

{'loss': Array(0.16119559, dtype=float32), 'loss_cross_entropy': Array(0.15243582, dtype=float32)}
{'loss_inverse': Array(1.4776978e-05, dtype=float32)}


  8%|▊         | 76040/1000000 [3:35:54<26:08:11,  9.82it/s]

{'loss': Array(0.15210243, dtype=float32), 'loss_cross_entropy': Array(0.1431881, dtype=float32)}
{'loss_inverse': Array(8.724079e-05, dtype=float32)}


  8%|▊         | 76050/1000000 [3:35:56<28:12:50,  9.10it/s]

{'loss': Array(0.14429705, dtype=float32), 'loss_cross_entropy': Array(0.13563709, dtype=float32)}
{'loss_inverse': Array(6.616559e-06, dtype=float32)}


  8%|▊         | 76060/1000000 [3:35:58<26:02:01,  9.86it/s]

{'loss': Array(0.14508992, dtype=float32), 'loss_cross_entropy': Array(0.13696514, dtype=float32)}
{'loss_inverse': Array(0.00056028, dtype=float32)}


  8%|▊         | 76070/1000000 [3:35:59<29:05:23,  8.82it/s]

{'loss': Array(0.11372975, dtype=float32), 'loss_cross_entropy': Array(0.10659181, dtype=float32)}
{'loss_inverse': Array(0.00018084, dtype=float32)}


  8%|▊         | 76080/1000000 [3:36:01<25:34:54, 10.03it/s]

{'loss': Array(0.14211981, dtype=float32), 'loss_cross_entropy': Array(0.13403791, dtype=float32)}
{'loss_inverse': Array(3.079307e-05, dtype=float32)}


  8%|▊         | 76090/1000000 [3:36:02<31:26:26,  8.16it/s]

{'loss': Array(0.1472808, dtype=float32), 'loss_cross_entropy': Array(0.1384912, dtype=float32)}
{'loss_inverse': Array(0.00035958, dtype=float32)}


  8%|▊         | 76100/1000000 [3:36:04<26:38:06,  9.64it/s]

{'loss': Array(0.15803996, dtype=float32), 'loss_cross_entropy': Array(0.14941385, dtype=float32)}
{'loss_inverse': Array(1.3984142e-05, dtype=float32)}


  8%|▊         | 76110/1000000 [3:36:05<25:08:30, 10.21it/s]

{'loss': Array(0.14427018, dtype=float32), 'loss_cross_entropy': Array(0.13544504, dtype=float32)}
{'loss_inverse': Array(5.064186e-05, dtype=float32)}


  8%|▊         | 76120/1000000 [3:36:07<28:55:10,  8.87it/s]

{'loss': Array(0.13217105, dtype=float32), 'loss_cross_entropy': Array(0.12422921, dtype=float32)}
{'loss_inverse': Array(0.00013846, dtype=float32)}


  8%|▊         | 76130/1000000 [3:36:09<26:01:00,  9.86it/s]

{'loss': Array(0.13996284, dtype=float32), 'loss_cross_entropy': Array(0.13166305, dtype=float32)}
{'loss_inverse': Array(0.00036557, dtype=float32)}


  8%|▊         | 76140/1000000 [3:36:10<30:05:11,  8.53it/s]

{'loss': Array(0.15363641, dtype=float32), 'loss_cross_entropy': Array(0.14493184, dtype=float32)}
{'loss_inverse': Array(6.16586e-05, dtype=float32)}


  8%|▊         | 76150/1000000 [3:36:12<26:35:23,  9.65it/s]

{'loss': Array(0.12827048, dtype=float32), 'loss_cross_entropy': Array(0.12046542, dtype=float32)}
{'loss_inverse': Array(0.00013243, dtype=float32)}


  8%|▊         | 76160/1000000 [3:36:14<31:38:19,  8.11it/s]

{'loss': Array(0.13085143, dtype=float32), 'loss_cross_entropy': Array(0.12349009, dtype=float32)}
{'loss_inverse': Array(0.00025543, dtype=float32)}


  8%|▊         | 76170/1000000 [3:36:15<26:02:29,  9.85it/s]

{'loss': Array(0.1426435, dtype=float32), 'loss_cross_entropy': Array(0.13459975, dtype=float32)}
{'loss_inverse': Array(0.00033265, dtype=float32)}


  8%|▊         | 76180/1000000 [3:36:16<25:24:20, 10.10it/s]

{'loss': Array(0.15820347, dtype=float32), 'loss_cross_entropy': Array(0.14951469, dtype=float32)}
{'loss_inverse': Array(0.00014151, dtype=float32)}


  8%|▊         | 76190/1000000 [3:36:18<29:36:07,  8.67it/s]

{'loss': Array(0.14184324, dtype=float32), 'loss_cross_entropy': Array(0.13345312, dtype=float32)}
{'loss_inverse': Array(1.7475253e-05, dtype=float32)}


  8%|▊         | 76200/1000000 [3:36:20<25:59:26,  9.87it/s]

{'loss': Array(0.15071909, dtype=float32), 'loss_cross_entropy': Array(0.14226584, dtype=float32)}
{'loss_inverse': Array(0.00024513, dtype=float32)}


  8%|▊         | 76210/1000000 [3:36:21<31:19:34,  8.19it/s]

{'loss': Array(0.15409029, dtype=float32), 'loss_cross_entropy': Array(0.14545266, dtype=float32)}
{'loss_inverse': Array(1.9312507e-05, dtype=float32)}


  8%|▊         | 76220/1000000 [3:36:23<27:30:45,  9.33it/s]

{'loss': Array(0.14172353, dtype=float32), 'loss_cross_entropy': Array(0.13295892, dtype=float32)}
{'loss_inverse': Array(0.00021601, dtype=float32)}


  8%|▊         | 76230/1000000 [3:36:24<34:27:10,  7.45it/s]

{'loss': Array(0.1475225, dtype=float32), 'loss_cross_entropy': Array(0.13931121, dtype=float32)}
{'loss_inverse': Array(7.8847166e-05, dtype=float32)}


  8%|▊         | 76240/1000000 [3:36:26<26:50:05,  9.56it/s]

{'loss': Array(0.16024998, dtype=float32), 'loss_cross_entropy': Array(0.15118818, dtype=float32)}
{'loss_inverse': Array(2.955475e-05, dtype=float32)}


  8%|▊         | 76250/1000000 [3:36:27<25:29:35, 10.07it/s]

{'loss': Array(0.15231901, dtype=float32), 'loss_cross_entropy': Array(0.1435229, dtype=float32)}
{'loss_inverse': Array(0.00058456, dtype=float32)}


  8%|▊         | 76260/1000000 [3:36:29<28:08:59,  9.12it/s]

{'loss': Array(0.12195455, dtype=float32), 'loss_cross_entropy': Array(0.1144351, dtype=float32)}
{'loss_inverse': Array(0.00012518, dtype=float32)}


  8%|▊         | 76270/1000000 [3:36:31<25:54:28,  9.90it/s]

{'loss': Array(0.15589103, dtype=float32), 'loss_cross_entropy': Array(0.14671099, dtype=float32)}
{'loss_inverse': Array(5.0349983e-05, dtype=float32)}


  8%|▊         | 76280/1000000 [3:36:32<33:43:41,  7.61it/s]

{'loss': Array(0.13192998, dtype=float32), 'loss_cross_entropy': Array(0.12377752, dtype=float32)}
{'loss_inverse': Array(3.675869e-05, dtype=float32)}


  8%|▊         | 76290/1000000 [3:36:34<27:00:25,  9.50it/s]

{'loss': Array(0.14596073, dtype=float32), 'loss_cross_entropy': Array(0.13703935, dtype=float32)}
{'loss_inverse': Array(0.00034096, dtype=float32)}


  8%|▊         | 76300/1000000 [3:36:35<25:25:35, 10.09it/s]

{'loss': Array(0.14572786, dtype=float32), 'loss_cross_entropy': Array(0.13726874, dtype=float32)}
{'loss_inverse': Array(9.381438e-05, dtype=float32)}


  8%|▊         | 76310/1000000 [3:36:37<28:11:33,  9.10it/s]

{'loss': Array(0.1560177, dtype=float32), 'loss_cross_entropy': Array(0.14743736, dtype=float32)}
{'loss_inverse': Array(8.828173e-05, dtype=float32)}


  8%|▊         | 76320/1000000 [3:36:38<25:38:50, 10.00it/s]

{'loss': Array(0.14393674, dtype=float32), 'loss_cross_entropy': Array(0.13523093, dtype=float32)}
{'loss_inverse': Array(0.00019615, dtype=float32)}


  8%|▊         | 76330/1000000 [3:36:40<29:17:15,  8.76it/s]

{'loss': Array(0.12850718, dtype=float32), 'loss_cross_entropy': Array(0.12120023, dtype=float32)}
{'loss_inverse': Array(0.00010579, dtype=float32)}


  8%|▊         | 76340/1000000 [3:36:41<26:03:29,  9.85it/s]

{'loss': Array(0.1362868, dtype=float32), 'loss_cross_entropy': Array(0.12874636, dtype=float32)}
{'loss_inverse': Array(2.9014593e-05, dtype=float32)}


  8%|▊         | 76350/1000000 [3:36:43<31:41:49,  8.09it/s]

{'loss': Array(0.14710575, dtype=float32), 'loss_cross_entropy': Array(0.1387054, dtype=float32)}
{'loss_inverse': Array(0.00014102, dtype=float32)}


  8%|▊         | 76360/1000000 [3:36:45<26:42:21,  9.61it/s]

{'loss': Array(0.13117753, dtype=float32), 'loss_cross_entropy': Array(0.12362035, dtype=float32)}
{'loss_inverse': Array(0.00019026, dtype=float32)}


  8%|▊         | 76370/1000000 [3:36:46<25:12:51, 10.18it/s]

{'loss': Array(0.15202649, dtype=float32), 'loss_cross_entropy': Array(0.14282304, dtype=float32)}
{'loss_inverse': Array(0.00012341, dtype=float32)}


  8%|▊         | 76380/1000000 [3:36:48<29:12:41,  8.78it/s]

{'loss': Array(0.15346849, dtype=float32), 'loss_cross_entropy': Array(0.14481142, dtype=float32)}
{'loss_inverse': Array(1.5528594e-05, dtype=float32)}


  8%|▊         | 76390/1000000 [3:36:49<25:48:16,  9.94it/s]

{'loss': Array(0.13653624, dtype=float32), 'loss_cross_entropy': Array(0.12846492, dtype=float32)}
{'loss_inverse': Array(1.6438746e-06, dtype=float32)}


  8%|▊         | 76400/1000000 [3:36:51<34:13:11,  7.50it/s]

{'loss': Array(0.1307187, dtype=float32), 'loss_cross_entropy': Array(0.12266971, dtype=float32)}
{'loss_inverse': Array(3.406746e-06, dtype=float32)}


  8%|▊         | 76410/1000000 [3:36:52<26:28:11,  9.69it/s]

{'loss': Array(0.1453596, dtype=float32), 'loss_cross_entropy': Array(0.13737784, dtype=float32)}
{'loss_inverse': Array(1.4014611e-06, dtype=float32)}


  8%|▊         | 76420/1000000 [3:36:54<38:16:30,  6.70it/s]

{'loss': Array(0.15847127, dtype=float32), 'loss_cross_entropy': Array(0.1496447, dtype=float32)}
{'loss_inverse': Array(0.0001547, dtype=float32)}


  8%|▊         | 76430/1000000 [3:36:56<27:21:05,  9.38it/s]

{'loss': Array(0.13981955, dtype=float32), 'loss_cross_entropy': Array(0.13130157, dtype=float32)}
{'loss_inverse': Array(1.2000509e-05, dtype=float32)}


  8%|▊         | 76440/1000000 [3:36:57<38:38:11,  6.64it/s]

{'loss': Array(0.13638347, dtype=float32), 'loss_cross_entropy': Array(0.12826017, dtype=float32)}
{'loss_inverse': Array(9.938186e-06, dtype=float32)}


  8%|▊         | 76450/1000000 [3:36:59<27:13:47,  9.42it/s]

{'loss': Array(0.14896667, dtype=float32), 'loss_cross_entropy': Array(0.14048545, dtype=float32)}
{'loss_inverse': Array(1.15812e-07, dtype=float32)}


  8%|▊         | 76460/1000000 [3:37:00<25:29:37, 10.06it/s]

{'loss': Array(0.13722746, dtype=float32), 'loss_cross_entropy': Array(0.12894784, dtype=float32)}
{'loss_inverse': Array(1.7852101e-06, dtype=float32)}


  8%|▊         | 76470/1000000 [3:37:02<31:44:40,  8.08it/s]

{'loss': Array(0.13362288, dtype=float32), 'loss_cross_entropy': Array(0.12581158, dtype=float32)}
{'loss_inverse': Array(1.0320418e-05, dtype=float32)}


  8%|▊         | 76480/1000000 [3:37:03<26:17:28,  9.76it/s]

{'loss': Array(0.14730696, dtype=float32), 'loss_cross_entropy': Array(0.13877316, dtype=float32)}
{'loss_inverse': Array(2.3113969e-06, dtype=float32)}


  8%|▊         | 76490/1000000 [3:37:05<37:18:09,  6.88it/s]

{'loss': Array(0.13584118, dtype=float32), 'loss_cross_entropy': Array(0.12770112, dtype=float32)}
{'loss_inverse': Array(1.0493065e-06, dtype=float32)}


  8%|▊         | 76500/1000000 [3:37:07<27:33:12,  9.31it/s]

{'loss': Array(0.13808405, dtype=float32), 'loss_cross_entropy': Array(0.12977867, dtype=float32)}
{'loss_inverse': Array(5.5258206e-06, dtype=float32)}


  8%|▊         | 76510/1000000 [3:37:15<99:38:49,  2.57it/s] 

{'loss': Array(0.13879283, dtype=float32), 'loss_cross_entropy': Array(0.13044877, dtype=float32)}
{'loss_inverse': Array(1.2105003e-05, dtype=float32)}


  8%|▊         | 76520/1000000 [3:37:16<37:50:20,  6.78it/s] 

{'loss': Array(0.15853249, dtype=float32), 'loss_cross_entropy': Array(0.14963543, dtype=float32)}
{'loss_inverse': Array(8.547034e-06, dtype=float32)}


  8%|▊         | 76530/1000000 [3:37:18<26:56:34,  9.52it/s]

{'loss': Array(0.16666965, dtype=float32), 'loss_cross_entropy': Array(0.15778117, dtype=float32)}
{'loss_inverse': Array(9.566257e-06, dtype=float32)}


  8%|▊         | 76540/1000000 [3:37:19<29:29:50,  8.70it/s]

{'loss': Array(0.14824794, dtype=float32), 'loss_cross_entropy': Array(0.13973054, dtype=float32)}
{'loss_inverse': Array(4.2523725e-06, dtype=float32)}


  8%|▊         | 76550/1000000 [3:37:21<26:35:45,  9.64it/s]

{'loss': Array(0.15564589, dtype=float32), 'loss_cross_entropy': Array(0.14689939, dtype=float32)}
{'loss_inverse': Array(1.6736748e-06, dtype=float32)}


  8%|▊         | 76560/1000000 [3:37:23<38:40:03,  6.63it/s]

{'loss': Array(0.166447, dtype=float32), 'loss_cross_entropy': Array(0.15688281, dtype=float32)}
{'loss_inverse': Array(1.830711e-06, dtype=float32)}


  8%|▊         | 76570/1000000 [3:37:24<27:04:57,  9.47it/s]

{'loss': Array(0.15107062, dtype=float32), 'loss_cross_entropy': Array(0.14177442, dtype=float32)}
{'loss_inverse': Array(3.7582802e-06, dtype=float32)}


  8%|▊         | 76580/1000000 [3:37:25<25:02:13, 10.25it/s]

{'loss': Array(0.13039468, dtype=float32), 'loss_cross_entropy': Array(0.122101, dtype=float32)}
{'loss_inverse': Array(1.9415234e-05, dtype=float32)}


  8%|▊         | 76590/1000000 [3:37:27<29:25:26,  8.72it/s]

{'loss': Array(0.15790676, dtype=float32), 'loss_cross_entropy': Array(0.14977007, dtype=float32)}
{'loss_inverse': Array(2.062013e-07, dtype=float32)}


  8%|▊         | 76600/1000000 [3:37:28<25:15:50, 10.15it/s]

{'loss': Array(0.15417846, dtype=float32), 'loss_cross_entropy': Array(0.1449785, dtype=float32)}
{'loss_inverse': Array(3.409602e-07, dtype=float32)}


  8%|▊         | 76610/1000000 [3:37:30<30:31:01,  8.41it/s]

{'loss': Array(0.1448498, dtype=float32), 'loss_cross_entropy': Array(0.13676153, dtype=float32)}
{'loss_inverse': Array(3.748073e-08, dtype=float32)}


  8%|▊         | 76620/1000000 [3:37:32<26:18:17,  9.75it/s]

{'loss': Array(0.15678795, dtype=float32), 'loss_cross_entropy': Array(0.14822307, dtype=float32)}
{'loss_inverse': Array(4.11551e-06, dtype=float32)}


  8%|▊         | 76630/1000000 [3:37:34<32:08:45,  7.98it/s]

{'loss': Array(0.14661025, dtype=float32), 'loss_cross_entropy': Array(0.13808541, dtype=float32)}
{'loss_inverse': Array(2.8525753e-06, dtype=float32)}


  8%|▊         | 76640/1000000 [3:37:35<26:09:27,  9.81it/s]

{'loss': Array(0.1649965, dtype=float32), 'loss_cross_entropy': Array(0.15577748, dtype=float32)}
{'loss_inverse': Array(2.6538618e-08, dtype=float32)}


  8%|▊         | 76650/1000000 [3:37:36<24:53:16, 10.31it/s]

{'loss': Array(0.1424658, dtype=float32), 'loss_cross_entropy': Array(0.13487701, dtype=float32)}
{'loss_inverse': Array(2.5252006e-05, dtype=float32)}


  8%|▊         | 76660/1000000 [3:37:38<27:43:08,  9.25it/s]

{'loss': Array(0.15819454, dtype=float32), 'loss_cross_entropy': Array(0.14897229, dtype=float32)}
{'loss_inverse': Array(3.5734708e-06, dtype=float32)}


  8%|▊         | 76670/1000000 [3:37:40<26:21:15,  9.73it/s]

{'loss': Array(0.14884643, dtype=float32), 'loss_cross_entropy': Array(0.1403282, dtype=float32)}
{'loss_inverse': Array(9.783348e-05, dtype=float32)}


  8%|▊         | 76680/1000000 [3:37:41<30:05:06,  8.53it/s]

{'loss': Array(0.14985257, dtype=float32), 'loss_cross_entropy': Array(0.14132406, dtype=float32)}
{'loss_inverse': Array(5.6064454e-07, dtype=float32)}


  8%|▊         | 76690/1000000 [3:37:43<26:05:53,  9.83it/s]

{'loss': Array(0.11868253, dtype=float32), 'loss_cross_entropy': Array(0.11075138, dtype=float32)}
{'loss_inverse': Array(8.7111934e-05, dtype=float32)}


  8%|▊         | 76700/1000000 [3:37:44<31:54:45,  8.04it/s]

{'loss': Array(0.14514688, dtype=float32), 'loss_cross_entropy': Array(0.13702057, dtype=float32)}
{'loss_inverse': Array(9.103645e-05, dtype=float32)}


  8%|▊         | 76710/1000000 [3:37:46<26:42:51,  9.60it/s]

{'loss': Array(0.1406521, dtype=float32), 'loss_cross_entropy': Array(0.13245717, dtype=float32)}
{'loss_inverse': Array(6.2506166e-05, dtype=float32)}


  8%|▊         | 76720/1000000 [3:37:47<25:22:42, 10.11it/s]

{'loss': Array(0.15668295, dtype=float32), 'loss_cross_entropy': Array(0.14789453, dtype=float32)}
{'loss_inverse': Array(2.3336686e-05, dtype=float32)}


  8%|▊         | 76730/1000000 [3:37:49<29:31:54,  8.68it/s]

{'loss': Array(0.14191182, dtype=float32), 'loss_cross_entropy': Array(0.13357903, dtype=float32)}
{'loss_inverse': Array(0.00025926, dtype=float32)}


  8%|▊         | 76740/1000000 [3:37:51<26:04:31,  9.84it/s]

{'loss': Array(0.14537728, dtype=float32), 'loss_cross_entropy': Array(0.1374235, dtype=float32)}
{'loss_inverse': Array(4.465191e-06, dtype=float32)}


  8%|▊         | 76750/1000000 [3:37:52<31:43:09,  8.09it/s]

{'loss': Array(0.14177148, dtype=float32), 'loss_cross_entropy': Array(0.13361369, dtype=float32)}
{'loss_inverse': Array(2.5212855e-06, dtype=float32)}


  8%|▊         | 76760/1000000 [3:37:54<26:45:38,  9.58it/s]

{'loss': Array(0.1367109, dtype=float32), 'loss_cross_entropy': Array(0.1283897, dtype=float32)}
{'loss_inverse': Array(0.00031737, dtype=float32)}


  8%|▊         | 76770/1000000 [3:37:55<34:09:04,  7.51it/s]

{'loss': Array(0.14393757, dtype=float32), 'loss_cross_entropy': Array(0.13563737, dtype=float32)}
{'loss_inverse': Array(1.1188936e-06, dtype=float32)}


  8%|▊         | 76780/1000000 [3:37:57<26:38:11,  9.63it/s]

{'loss': Array(0.14106892, dtype=float32), 'loss_cross_entropy': Array(0.13250081, dtype=float32)}
{'loss_inverse': Array(0.00016897, dtype=float32)}


  8%|▊         | 76790/1000000 [3:37:58<25:00:17, 10.26it/s]

{'loss': Array(0.15092443, dtype=float32), 'loss_cross_entropy': Array(0.14295644, dtype=float32)}
{'loss_inverse': Array(3.4562447e-05, dtype=float32)}


  8%|▊         | 76800/1000000 [3:38:00<28:00:07,  9.16it/s]

{'loss': Array(0.1390771, dtype=float32), 'loss_cross_entropy': Array(0.13075456, dtype=float32)}
{'loss_inverse': Array(5.0120918e-05, dtype=float32)}


  8%|▊         | 76810/1000000 [3:38:01<25:33:35, 10.03it/s]

{'loss': Array(0.14695773, dtype=float32), 'loss_cross_entropy': Array(0.13796091, dtype=float32)}
{'loss_inverse': Array(0.0001884, dtype=float32)}


  8%|▊         | 76820/1000000 [3:38:03<34:49:30,  7.36it/s]

{'loss': Array(0.14796726, dtype=float32), 'loss_cross_entropy': Array(0.13937554, dtype=float32)}
{'loss_inverse': Array(3.1755615e-05, dtype=float32)}


  8%|▊         | 76830/1000000 [3:38:05<26:35:27,  9.64it/s]

{'loss': Array(0.1366759, dtype=float32), 'loss_cross_entropy': Array(0.12819171, dtype=float32)}
{'loss_inverse': Array(8.847775e-06, dtype=float32)}


  8%|▊         | 76840/1000000 [3:38:06<26:04:59,  9.83it/s]

{'loss': Array(0.14736652, dtype=float32), 'loss_cross_entropy': Array(0.13932782, dtype=float32)}
{'loss_inverse': Array(7.749763e-07, dtype=float32)}


  8%|▊         | 76850/1000000 [3:38:08<29:05:17,  8.82it/s]

{'loss': Array(0.1532559, dtype=float32), 'loss_cross_entropy': Array(0.1443795, dtype=float32)}
{'loss_inverse': Array(8.768597e-07, dtype=float32)}


  8%|▊         | 76860/1000000 [3:38:09<26:34:26,  9.65it/s]

{'loss': Array(0.14093502, dtype=float32), 'loss_cross_entropy': Array(0.1327779, dtype=float32)}
{'loss_inverse': Array(0.00012632, dtype=float32)}


  8%|▊         | 76870/1000000 [3:38:11<29:51:46,  8.59it/s]

{'loss': Array(0.14353804, dtype=float32), 'loss_cross_entropy': Array(0.13511412, dtype=float32)}
{'loss_inverse': Array(2.9813434e-06, dtype=float32)}


  8%|▊         | 76880/1000000 [3:38:13<25:55:35,  9.89it/s]

{'loss': Array(0.14978124, dtype=float32), 'loss_cross_entropy': Array(0.14122127, dtype=float32)}
{'loss_inverse': Array(2.8844332e-05, dtype=float32)}


  8%|▊         | 76890/1000000 [3:38:14<31:41:14,  8.09it/s]

{'loss': Array(0.1425368, dtype=float32), 'loss_cross_entropy': Array(0.13387768, dtype=float32)}
{'loss_inverse': Array(0.00041254, dtype=float32)}


  8%|▊         | 76900/1000000 [3:38:16<26:44:13,  9.59it/s]

{'loss': Array(0.16084827, dtype=float32), 'loss_cross_entropy': Array(0.15155022, dtype=float32)}
{'loss_inverse': Array(0.00023915, dtype=float32)}


  8%|▊         | 76910/1000000 [3:38:17<25:26:09, 10.08it/s]

{'loss': Array(0.15043174, dtype=float32), 'loss_cross_entropy': Array(0.14189647, dtype=float32)}
{'loss_inverse': Array(3.1580854e-05, dtype=float32)}


  8%|▊         | 76920/1000000 [3:38:19<29:33:53,  8.67it/s]

{'loss': Array(0.15484767, dtype=float32), 'loss_cross_entropy': Array(0.14568631, dtype=float32)}
{'loss_inverse': Array(1.3974337e-06, dtype=float32)}


  8%|▊         | 76930/1000000 [3:38:20<25:23:52, 10.10it/s]

{'loss': Array(0.15671504, dtype=float32), 'loss_cross_entropy': Array(0.14768869, dtype=float32)}
{'loss_inverse': Array(4.6066583e-05, dtype=float32)}


  8%|▊         | 76940/1000000 [3:38:22<34:33:00,  7.42it/s]

{'loss': Array(0.14012223, dtype=float32), 'loss_cross_entropy': Array(0.13149, dtype=float32)}
{'loss_inverse': Array(0.00071628, dtype=float32)}


  8%|▊         | 76950/1000000 [3:38:24<27:01:47,  9.49it/s]

{'loss': Array(0.15556231, dtype=float32), 'loss_cross_entropy': Array(0.14696243, dtype=float32)}
{'loss_inverse': Array(7.4207833e-06, dtype=float32)}


  8%|▊         | 76960/1000000 [3:38:25<37:55:38,  6.76it/s]

{'loss': Array(0.12966706, dtype=float32), 'loss_cross_entropy': Array(0.12174969, dtype=float32)}
{'loss_inverse': Array(0.0002784, dtype=float32)}


  8%|▊         | 76970/1000000 [3:38:27<27:44:10,  9.24it/s]

{'loss': Array(0.1345187, dtype=float32), 'loss_cross_entropy': Array(0.12609284, dtype=float32)}
{'loss_inverse': Array(2.1949388e-06, dtype=float32)}


  8%|▊         | 76980/1000000 [3:38:28<37:58:25,  6.75it/s]

{'loss': Array(0.17077647, dtype=float32), 'loss_cross_entropy': Array(0.16151169, dtype=float32)}
{'loss_inverse': Array(3.7634905e-05, dtype=float32)}


  8%|▊         | 76990/1000000 [3:38:30<27:49:48,  9.21it/s]

{'loss': Array(0.1565065, dtype=float32), 'loss_cross_entropy': Array(0.14687993, dtype=float32)}
{'loss_inverse': Array(1.4350034e-07, dtype=float32)}


  8%|▊         | 77000/1000000 [3:38:31<25:44:48,  9.96it/s]

{'loss': Array(0.13147645, dtype=float32), 'loss_cross_entropy': Array(0.12378631, dtype=float32)}
{'loss_inverse': Array(0.00014049, dtype=float32)}


  8%|▊         | 77010/1000000 [3:38:40<93:42:13,  2.74it/s] 

{'loss': Array(0.14575855, dtype=float32), 'loss_cross_entropy': Array(0.13769847, dtype=float32)}
{'loss_inverse': Array(4.7687985e-07, dtype=float32)}


  8%|▊         | 77020/1000000 [3:38:41<36:24:42,  7.04it/s]

{'loss': Array(0.12459306, dtype=float32), 'loss_cross_entropy': Array(0.117121, dtype=float32)}
{'loss_inverse': Array(1.9404372e-06, dtype=float32)}


  8%|▊         | 77030/1000000 [3:38:42<27:11:27,  9.43it/s]

{'loss': Array(0.14090225, dtype=float32), 'loss_cross_entropy': Array(0.13319384, dtype=float32)}
{'loss_inverse': Array(5.097268e-05, dtype=float32)}


  8%|▊         | 77040/1000000 [3:38:44<28:19:22,  9.05it/s]

{'loss': Array(0.15546313, dtype=float32), 'loss_cross_entropy': Array(0.14661019, dtype=float32)}
{'loss_inverse': Array(7.5181435e-07, dtype=float32)}


  8%|▊         | 77050/1000000 [3:38:46<26:07:54,  9.81it/s]

{'loss': Array(0.15575266, dtype=float32), 'loss_cross_entropy': Array(0.14594309, dtype=float32)}
{'loss_inverse': Array(6.009758e-06, dtype=float32)}


  8%|▊         | 77060/1000000 [3:38:47<29:17:34,  8.75it/s]

{'loss': Array(0.14209089, dtype=float32), 'loss_cross_entropy': Array(0.1332594, dtype=float32)}
{'loss_inverse': Array(3.7808986e-05, dtype=float32)}


  8%|▊         | 77070/1000000 [3:38:49<25:58:48,  9.87it/s]

{'loss': Array(0.15941761, dtype=float32), 'loss_cross_entropy': Array(0.15069011, dtype=float32)}
{'loss_inverse': Array(0.00018775, dtype=float32)}


  8%|▊         | 77080/1000000 [3:38:50<29:27:39,  8.70it/s]

{'loss': Array(0.16342187, dtype=float32), 'loss_cross_entropy': Array(0.1546223, dtype=float32)}
{'loss_inverse': Array(2.0668787e-05, dtype=float32)}


  8%|▊         | 77090/1000000 [3:38:52<26:03:16,  9.84it/s]

{'loss': Array(0.15941341, dtype=float32), 'loss_cross_entropy': Array(0.1507237, dtype=float32)}
{'loss_inverse': Array(3.3244748e-06, dtype=float32)}


  8%|▊         | 77100/1000000 [3:38:54<30:05:24,  8.52it/s]

{'loss': Array(0.1324702, dtype=float32), 'loss_cross_entropy': Array(0.12428413, dtype=float32)}
{'loss_inverse': Array(6.265966e-08, dtype=float32)}


  8%|▊         | 77110/1000000 [3:38:55<25:35:52, 10.01it/s]

{'loss': Array(0.15996464, dtype=float32), 'loss_cross_entropy': Array(0.15033184, dtype=float32)}
{'loss_inverse': Array(1.2105836e-07, dtype=float32)}


  8%|▊         | 77120/1000000 [3:38:57<34:06:45,  7.51it/s]

{'loss': Array(0.15530305, dtype=float32), 'loss_cross_entropy': Array(0.14611773, dtype=float32)}
{'loss_inverse': Array(6.44398e-05, dtype=float32)}


  8%|▊         | 77130/1000000 [3:38:58<26:38:27,  9.62it/s]

{'loss': Array(0.13794231, dtype=float32), 'loss_cross_entropy': Array(0.1295607, dtype=float32)}
{'loss_inverse': Array(9.161841e-05, dtype=float32)}


  8%|▊         | 77140/1000000 [3:39:00<37:35:44,  6.82it/s]

{'loss': Array(0.1420655, dtype=float32), 'loss_cross_entropy': Array(0.1341035, dtype=float32)}
{'loss_inverse': Array(1.9549982e-07, dtype=float32)}


  8%|▊         | 77150/1000000 [3:39:01<26:52:26,  9.54it/s]

{'loss': Array(0.15191756, dtype=float32), 'loss_cross_entropy': Array(0.14320575, dtype=float32)}
{'loss_inverse': Array(8.1226084e-05, dtype=float32)}


  8%|▊         | 77160/1000000 [3:39:03<35:05:25,  7.31it/s]

{'loss': Array(0.1493047, dtype=float32), 'loss_cross_entropy': Array(0.14044209, dtype=float32)}
{'loss_inverse': Array(0.00012078, dtype=float32)}


  8%|▊         | 77170/1000000 [3:39:05<28:03:00,  9.14it/s]

{'loss': Array(0.15596667, dtype=float32), 'loss_cross_entropy': Array(0.14763156, dtype=float32)}
{'loss_inverse': Array(7.824808e-06, dtype=float32)}


  8%|▊         | 77180/1000000 [3:39:06<27:03:32,  9.47it/s]

{'loss': Array(0.1455281, dtype=float32), 'loss_cross_entropy': Array(0.13693051, dtype=float32)}
{'loss_inverse': Array(3.2010947e-05, dtype=float32)}


  8%|▊         | 77190/1000000 [3:39:08<30:18:38,  8.46it/s]

{'loss': Array(0.1380832, dtype=float32), 'loss_cross_entropy': Array(0.13025866, dtype=float32)}
{'loss_inverse': Array(9.720086e-07, dtype=float32)}


  8%|▊         | 77200/1000000 [3:39:09<25:48:06,  9.93it/s]

{'loss': Array(0.15365897, dtype=float32), 'loss_cross_entropy': Array(0.14535484, dtype=float32)}
{'loss_inverse': Array(1.4582546e-05, dtype=float32)}


  8%|▊         | 77210/1000000 [3:39:11<31:59:36,  8.01it/s]

{'loss': Array(0.14626703, dtype=float32), 'loss_cross_entropy': Array(0.13778429, dtype=float32)}
{'loss_inverse': Array(2.8804577e-06, dtype=float32)}


  8%|▊         | 77220/1000000 [3:39:13<26:11:58,  9.78it/s]

{'loss': Array(0.14906608, dtype=float32), 'loss_cross_entropy': Array(0.14076546, dtype=float32)}
{'loss_inverse': Array(3.865553e-06, dtype=float32)}


  8%|▊         | 77230/1000000 [3:39:14<34:50:41,  7.36it/s]

{'loss': Array(0.14701606, dtype=float32), 'loss_cross_entropy': Array(0.1388341, dtype=float32)}
{'loss_inverse': Array(2.5159343e-05, dtype=float32)}


  8%|▊         | 77240/1000000 [3:39:16<26:51:39,  9.54it/s]

{'loss': Array(0.16862796, dtype=float32), 'loss_cross_entropy': Array(0.1593048, dtype=float32)}
{'loss_inverse': Array(0.00026824, dtype=float32)}


  8%|▊         | 77250/1000000 [3:39:18<34:53:22,  7.35it/s]

{'loss': Array(0.13632937, dtype=float32), 'loss_cross_entropy': Array(0.12862524, dtype=float32)}
{'loss_inverse': Array(3.012791e-06, dtype=float32)}


  8%|▊         | 77260/1000000 [3:39:19<26:46:52,  9.57it/s]

{'loss': Array(0.16691594, dtype=float32), 'loss_cross_entropy': Array(0.1579242, dtype=float32)}
{'loss_inverse': Array(1.1136427e-05, dtype=float32)}


  8%|▊         | 77270/1000000 [3:39:20<25:45:20,  9.95it/s]

{'loss': Array(0.15389924, dtype=float32), 'loss_cross_entropy': Array(0.14526199, dtype=float32)}
{'loss_inverse': Array(4.067273e-05, dtype=float32)}


  8%|▊         | 77280/1000000 [3:39:22<31:44:19,  8.08it/s]

{'loss': Array(0.1586804, dtype=float32), 'loss_cross_entropy': Array(0.15002467, dtype=float32)}
{'loss_inverse': Array(1.8039096e-06, dtype=float32)}


  8%|▊         | 77290/1000000 [3:39:24<27:08:56,  9.44it/s]

{'loss': Array(0.16497175, dtype=float32), 'loss_cross_entropy': Array(0.15577906, dtype=float32)}
{'loss_inverse': Array(7.944087e-06, dtype=float32)}


  8%|▊         | 77300/1000000 [3:39:25<38:27:17,  6.67it/s]

{'loss': Array(0.16053782, dtype=float32), 'loss_cross_entropy': Array(0.15131305, dtype=float32)}
{'loss_inverse': Array(3.882039e-06, dtype=float32)}


  8%|▊         | 77310/1000000 [3:39:27<27:20:45,  9.37it/s]

{'loss': Array(0.14346288, dtype=float32), 'loss_cross_entropy': Array(0.1351562, dtype=float32)}
{'loss_inverse': Array(5.1152656e-06, dtype=float32)}


  8%|▊         | 77320/1000000 [3:39:28<25:30:13, 10.05it/s]

{'loss': Array(0.1509632, dtype=float32), 'loss_cross_entropy': Array(0.14250158, dtype=float32)}
{'loss_inverse': Array(2.0250666e-07, dtype=float32)}


  8%|▊         | 77330/1000000 [3:39:30<28:49:57,  8.89it/s]

{'loss': Array(0.1482036, dtype=float32), 'loss_cross_entropy': Array(0.13993795, dtype=float32)}
{'loss_inverse': Array(1.652505e-05, dtype=float32)}


  8%|▊         | 77340/1000000 [3:39:31<25:55:26,  9.89it/s]

{'loss': Array(0.13109764, dtype=float32), 'loss_cross_entropy': Array(0.12367546, dtype=float32)}
{'loss_inverse': Array(1.9369297e-06, dtype=float32)}


  8%|▊         | 77350/1000000 [3:39:33<28:05:50,  9.12it/s]

{'loss': Array(0.1362182, dtype=float32), 'loss_cross_entropy': Array(0.12890737, dtype=float32)}
{'loss_inverse': Array(6.581253e-07, dtype=float32)}


  8%|▊         | 77360/1000000 [3:39:35<25:30:59, 10.04it/s]

{'loss': Array(0.12730967, dtype=float32), 'loss_cross_entropy': Array(0.1192665, dtype=float32)}
{'loss_inverse': Array(3.6746545e-07, dtype=float32)}


  8%|▊         | 77370/1000000 [3:39:36<31:28:23,  8.14it/s]

{'loss': Array(0.15581173, dtype=float32), 'loss_cross_entropy': Array(0.14759254, dtype=float32)}
{'loss_inverse': Array(9.133842e-06, dtype=float32)}


  8%|▊         | 77380/1000000 [3:39:38<26:07:19,  9.81it/s]

{'loss': Array(0.13719219, dtype=float32), 'loss_cross_entropy': Array(0.1293435, dtype=float32)}
{'loss_inverse': Array(1.2513085e-05, dtype=float32)}


  8%|▊         | 77390/1000000 [3:39:40<31:43:50,  8.08it/s]

{'loss': Array(0.14162704, dtype=float32), 'loss_cross_entropy': Array(0.13339454, dtype=float32)}
{'loss_inverse': Array(2.0947452e-06, dtype=float32)}


  8%|▊         | 77400/1000000 [3:39:41<25:57:06,  9.88it/s]

{'loss': Array(0.1323024, dtype=float32), 'loss_cross_entropy': Array(0.1244721, dtype=float32)}
{'loss_inverse': Array(4.3368618e-07, dtype=float32)}


  8%|▊         | 77410/1000000 [3:39:43<38:23:56,  6.67it/s]

{'loss': Array(0.14855199, dtype=float32), 'loss_cross_entropy': Array(0.1400904, dtype=float32)}
{'loss_inverse': Array(3.5124733e-06, dtype=float32)}


  8%|▊         | 77420/1000000 [3:39:44<27:22:30,  9.36it/s]

{'loss': Array(0.1380456, dtype=float32), 'loss_cross_entropy': Array(0.1303279, dtype=float32)}
{'loss_inverse': Array(0.0001041, dtype=float32)}


  8%|▊         | 77430/1000000 [3:39:46<25:27:06, 10.07it/s]

{'loss': Array(0.14324467, dtype=float32), 'loss_cross_entropy': Array(0.13457192, dtype=float32)}
{'loss_inverse': Array(5.319159e-07, dtype=float32)}


  8%|▊         | 77440/1000000 [3:39:47<28:17:52,  9.06it/s]

{'loss': Array(0.13801797, dtype=float32), 'loss_cross_entropy': Array(0.13053861, dtype=float32)}
{'loss_inverse': Array(2.0752918e-07, dtype=float32)}


  8%|▊         | 77450/1000000 [3:39:49<25:36:13, 10.01it/s]

{'loss': Array(0.1328599, dtype=float32), 'loss_cross_entropy': Array(0.12490974, dtype=float32)}
{'loss_inverse': Array(3.911857e-05, dtype=float32)}


  8%|▊         | 77460/1000000 [3:39:50<31:18:56,  8.18it/s]

{'loss': Array(0.15585387, dtype=float32), 'loss_cross_entropy': Array(0.14700238, dtype=float32)}
{'loss_inverse': Array(9.2724224e-07, dtype=float32)}


  8%|▊         | 77470/1000000 [3:39:52<25:56:56,  9.88it/s]

{'loss': Array(0.13182111, dtype=float32), 'loss_cross_entropy': Array(0.12406465, dtype=float32)}
{'loss_inverse': Array(0.00017164, dtype=float32)}


  8%|▊         | 77480/1000000 [3:39:53<25:40:15,  9.98it/s]

{'loss': Array(0.15014854, dtype=float32), 'loss_cross_entropy': Array(0.14116062, dtype=float32)}
{'loss_inverse': Array(2.0639205e-05, dtype=float32)}


  8%|▊         | 77490/1000000 [3:39:55<29:41:41,  8.63it/s]

{'loss': Array(0.1474434, dtype=float32), 'loss_cross_entropy': Array(0.13920347, dtype=float32)}
{'loss_inverse': Array(1.3620811e-06, dtype=float32)}


  8%|▊         | 77500/1000000 [3:39:56<25:45:08,  9.95it/s]

{'loss': Array(0.12354034, dtype=float32), 'loss_cross_entropy': Array(0.11559159, dtype=float32)}
{'loss_inverse': Array(7.3825627e-06, dtype=float32)}


  8%|▊         | 77510/1000000 [3:40:05<94:58:35,  2.70it/s] 

{'loss': Array(0.13384013, dtype=float32), 'loss_cross_entropy': Array(0.12604323, dtype=float32)}
{'loss_inverse': Array(7.490351e-06, dtype=float32)}


  8%|▊         | 77520/1000000 [3:40:06<38:01:42,  6.74it/s] 

{'loss': Array(0.14233224, dtype=float32), 'loss_cross_entropy': Array(0.13423581, dtype=float32)}
{'loss_inverse': Array(5.117796e-07, dtype=float32)}


  8%|▊         | 77530/1000000 [3:40:08<40:32:21,  6.32it/s]

{'loss': Array(0.15483192, dtype=float32), 'loss_cross_entropy': Array(0.14594299, dtype=float32)}
{'loss_inverse': Array(8.9609847e-07, dtype=float32)}


  8%|▊         | 77540/1000000 [3:40:09<27:25:20,  9.34it/s]

{'loss': Array(0.16760097, dtype=float32), 'loss_cross_entropy': Array(0.1584537, dtype=float32)}
{'loss_inverse': Array(1.05030814e-07, dtype=float32)}


  8%|▊         | 77550/1000000 [3:40:11<25:23:02, 10.09it/s]

{'loss': Array(0.14125669, dtype=float32), 'loss_cross_entropy': Array(0.13308494, dtype=float32)}
{'loss_inverse': Array(2.0357074e-06, dtype=float32)}


  8%|▊         | 77560/1000000 [3:40:13<28:41:53,  8.93it/s]

{'loss': Array(0.13028784, dtype=float32), 'loss_cross_entropy': Array(0.12299142, dtype=float32)}
{'loss_inverse': Array(2.885164e-06, dtype=float32)}


  8%|▊         | 77570/1000000 [3:40:14<38:30:14,  6.65it/s]

{'loss': Array(0.14710562, dtype=float32), 'loss_cross_entropy': Array(0.13837098, dtype=float32)}
{'loss_inverse': Array(3.961192e-06, dtype=float32)}


  8%|▊         | 77580/1000000 [3:40:16<27:17:59,  9.39it/s]

{'loss': Array(0.1502748, dtype=float32), 'loss_cross_entropy': Array(0.14184998, dtype=float32)}
{'loss_inverse': Array(1.3275607e-06, dtype=float32)}


  8%|▊         | 77590/1000000 [3:40:17<25:28:38, 10.06it/s]

{'loss': Array(0.14805725, dtype=float32), 'loss_cross_entropy': Array(0.13907397, dtype=float32)}
{'loss_inverse': Array(0.00045636, dtype=float32)}


  8%|▊         | 77600/1000000 [3:40:19<28:56:07,  8.86it/s]

{'loss': Array(0.14531974, dtype=float32), 'loss_cross_entropy': Array(0.1373288, dtype=float32)}
{'loss_inverse': Array(3.012536e-07, dtype=float32)}


  8%|▊         | 77610/1000000 [3:40:20<25:22:24, 10.10it/s]

{'loss': Array(0.13465871, dtype=float32), 'loss_cross_entropy': Array(0.1267166, dtype=float32)}
{'loss_inverse': Array(4.8731596e-05, dtype=float32)}


  8%|▊         | 77620/1000000 [3:40:22<29:42:55,  8.62it/s]

{'loss': Array(0.14296381, dtype=float32), 'loss_cross_entropy': Array(0.13496116, dtype=float32)}
{'loss_inverse': Array(2.9240703e-06, dtype=float32)}


  8%|▊         | 77630/1000000 [3:40:24<26:08:50,  9.80it/s]

{'loss': Array(0.14057845, dtype=float32), 'loss_cross_entropy': Array(0.1323365, dtype=float32)}
{'loss_inverse': Array(7.045587e-05, dtype=float32)}


  8%|▊         | 77640/1000000 [3:40:25<31:23:05,  8.16it/s]

{'loss': Array(0.12179822, dtype=float32), 'loss_cross_entropy': Array(0.11445945, dtype=float32)}
{'loss_inverse': Array(3.0146286e-06, dtype=float32)}


  8%|▊         | 77650/1000000 [3:40:27<26:01:50,  9.84it/s]

{'loss': Array(0.13525113, dtype=float32), 'loss_cross_entropy': Array(0.12721811, dtype=float32)}
{'loss_inverse': Array(1.5105796e-07, dtype=float32)}


  8%|▊         | 77660/1000000 [3:40:28<25:13:17, 10.16it/s]

{'loss': Array(0.15322714, dtype=float32), 'loss_cross_entropy': Array(0.14503275, dtype=float32)}
{'loss_inverse': Array(0.00017529, dtype=float32)}


  8%|▊         | 77670/1000000 [3:40:30<29:39:01,  8.64it/s]

{'loss': Array(0.16085431, dtype=float32), 'loss_cross_entropy': Array(0.1518289, dtype=float32)}
{'loss_inverse': Array(5.443569e-06, dtype=float32)}


  8%|▊         | 77680/1000000 [3:40:31<25:45:02,  9.95it/s]

{'loss': Array(0.14800702, dtype=float32), 'loss_cross_entropy': Array(0.13955414, dtype=float32)}
{'loss_inverse': Array(2.6658685e-05, dtype=float32)}


  8%|▊         | 77690/1000000 [3:40:33<31:48:20,  8.06it/s]

{'loss': Array(0.15108466, dtype=float32), 'loss_cross_entropy': Array(0.14172421, dtype=float32)}
{'loss_inverse': Array(5.7556677e-05, dtype=float32)}


  8%|▊         | 77700/1000000 [3:40:35<26:04:24,  9.83it/s]

{'loss': Array(0.13690029, dtype=float32), 'loss_cross_entropy': Array(0.12857531, dtype=float32)}
{'loss_inverse': Array(4.002516e-05, dtype=float32)}


  8%|▊         | 77710/1000000 [3:40:36<34:22:03,  7.45it/s]

{'loss': Array(0.14156716, dtype=float32), 'loss_cross_entropy': Array(0.1338593, dtype=float32)}
{'loss_inverse': Array(1.0096023e-05, dtype=float32)}


  8%|▊         | 77720/1000000 [3:40:38<26:45:41,  9.57it/s]

{'loss': Array(0.14073122, dtype=float32), 'loss_cross_entropy': Array(0.13243756, dtype=float32)}
{'loss_inverse': Array(0.00014179, dtype=float32)}


  8%|▊         | 77730/1000000 [3:40:39<24:55:36, 10.28it/s]

{'loss': Array(0.13786232, dtype=float32), 'loss_cross_entropy': Array(0.13026212, dtype=float32)}
{'loss_inverse': Array(2.3360778e-06, dtype=float32)}


  8%|▊         | 77740/1000000 [3:40:41<27:40:09,  9.26it/s]

{'loss': Array(0.12696539, dtype=float32), 'loss_cross_entropy': Array(0.1188887, dtype=float32)}
{'loss_inverse': Array(2.1843034e-05, dtype=float32)}


  8%|▊         | 77750/1000000 [3:40:42<25:28:32, 10.06it/s]

{'loss': Array(0.14956956, dtype=float32), 'loss_cross_entropy': Array(0.14097916, dtype=float32)}
{'loss_inverse': Array(6.684404e-07, dtype=float32)}


  8%|▊         | 77760/1000000 [3:40:44<33:37:13,  7.62it/s]

{'loss': Array(0.15000033, dtype=float32), 'loss_cross_entropy': Array(0.14189006, dtype=float32)}
{'loss_inverse': Array(5.8595706e-06, dtype=float32)}


  8%|▊         | 77770/1000000 [3:40:45<26:18:31,  9.74it/s]

{'loss': Array(0.14368413, dtype=float32), 'loss_cross_entropy': Array(0.1358351, dtype=float32)}
{'loss_inverse': Array(4.0368133e-05, dtype=float32)}


  8%|▊         | 77780/1000000 [3:40:47<25:29:18, 10.05it/s]

{'loss': Array(0.13449606, dtype=float32), 'loss_cross_entropy': Array(0.12673305, dtype=float32)}
{'loss_inverse': Array(1.0988548e-06, dtype=float32)}


  8%|▊         | 77790/1000000 [3:40:49<28:37:02,  8.95it/s]

{'loss': Array(0.13943519, dtype=float32), 'loss_cross_entropy': Array(0.13150513, dtype=float32)}
{'loss_inverse': Array(2.6512973e-05, dtype=float32)}


  8%|▊         | 77800/1000000 [3:40:50<25:28:27, 10.06it/s]

{'loss': Array(0.13374506, dtype=float32), 'loss_cross_entropy': Array(0.12618113, dtype=float32)}
{'loss_inverse': Array(7.623494e-05, dtype=float32)}


  8%|▊         | 77810/1000000 [3:40:52<29:27:12,  8.70it/s]

{'loss': Array(0.13885291, dtype=float32), 'loss_cross_entropy': Array(0.13062893, dtype=float32)}
{'loss_inverse': Array(5.7253626e-05, dtype=float32)}


  8%|▊         | 77820/1000000 [3:40:53<26:53:39,  9.52it/s]

{'loss': Array(0.1435365, dtype=float32), 'loss_cross_entropy': Array(0.13551767, dtype=float32)}
{'loss_inverse': Array(0.00019207, dtype=float32)}


  8%|▊         | 77830/1000000 [3:40:55<32:09:08,  7.97it/s]

{'loss': Array(0.1371175, dtype=float32), 'loss_cross_entropy': Array(0.12906376, dtype=float32)}
{'loss_inverse': Array(1.6033298e-05, dtype=float32)}


  8%|▊         | 77840/1000000 [3:40:56<26:13:54,  9.77it/s]

{'loss': Array(0.14014158, dtype=float32), 'loss_cross_entropy': Array(0.13203429, dtype=float32)}
{'loss_inverse': Array(4.3718805e-07, dtype=float32)}


  8%|▊         | 77850/1000000 [3:40:58<26:00:04,  9.85it/s]

{'loss': Array(0.139219, dtype=float32), 'loss_cross_entropy': Array(0.13074185, dtype=float32)}
{'loss_inverse': Array(1.2091345e-05, dtype=float32)}


  8%|▊         | 77860/1000000 [3:41:00<29:31:47,  8.67it/s]

{'loss': Array(0.15288973, dtype=float32), 'loss_cross_entropy': Array(0.14418937, dtype=float32)}
{'loss_inverse': Array(1.4579506e-06, dtype=float32)}


  8%|▊         | 77870/1000000 [3:41:01<25:41:07,  9.97it/s]

{'loss': Array(0.1281949, dtype=float32), 'loss_cross_entropy': Array(0.12071959, dtype=float32)}
{'loss_inverse': Array(1.4264323e-06, dtype=float32)}


  8%|▊         | 77880/1000000 [3:41:03<33:54:21,  7.55it/s]

{'loss': Array(0.14345501, dtype=float32), 'loss_cross_entropy': Array(0.13502792, dtype=float32)}
{'loss_inverse': Array(0.0001535, dtype=float32)}


  8%|▊         | 77890/1000000 [3:41:04<26:25:32,  9.69it/s]

{'loss': Array(0.1420781, dtype=float32), 'loss_cross_entropy': Array(0.13375048, dtype=float32)}
{'loss_inverse': Array(6.43958e-06, dtype=float32)}


  8%|▊         | 77900/1000000 [3:41:06<38:01:42,  6.74it/s]

{'loss': Array(0.134598, dtype=float32), 'loss_cross_entropy': Array(0.12712978, dtype=float32)}
{'loss_inverse': Array(3.0997708e-05, dtype=float32)}


  8%|▊         | 77910/1000000 [3:41:07<27:31:34,  9.31it/s]

{'loss': Array(0.1504463, dtype=float32), 'loss_cross_entropy': Array(0.14189793, dtype=float32)}
{'loss_inverse': Array(1.2542528e-05, dtype=float32)}


  8%|▊         | 77920/1000000 [3:41:09<39:37:23,  6.46it/s]

{'loss': Array(0.13277505, dtype=float32), 'loss_cross_entropy': Array(0.12475753, dtype=float32)}
{'loss_inverse': Array(7.113878e-05, dtype=float32)}


  8%|▊         | 77930/1000000 [3:41:11<27:44:22,  9.23it/s]

{'loss': Array(0.14086656, dtype=float32), 'loss_cross_entropy': Array(0.13232183, dtype=float32)}
{'loss_inverse': Array(0.00013572, dtype=float32)}


  8%|▊         | 77940/1000000 [3:41:12<27:01:05,  9.48it/s]

{'loss': Array(0.13647185, dtype=float32), 'loss_cross_entropy': Array(0.12816879, dtype=float32)}
{'loss_inverse': Array(9.137171e-05, dtype=float32)}


  8%|▊         | 77950/1000000 [3:41:14<31:45:46,  8.06it/s]

{'loss': Array(0.15731362, dtype=float32), 'loss_cross_entropy': Array(0.14848502, dtype=float32)}
{'loss_inverse': Array(0.00013946, dtype=float32)}


  8%|▊         | 77960/1000000 [3:41:15<26:19:15,  9.73it/s]

{'loss': Array(0.13437586, dtype=float32), 'loss_cross_entropy': Array(0.12699734, dtype=float32)}
{'loss_inverse': Array(2.6728056e-05, dtype=float32)}


  8%|▊         | 77970/1000000 [3:41:17<37:59:32,  6.74it/s]

{'loss': Array(0.14016938, dtype=float32), 'loss_cross_entropy': Array(0.13204898, dtype=float32)}
{'loss_inverse': Array(3.256766e-05, dtype=float32)}


  8%|▊         | 77980/1000000 [3:41:18<27:24:55,  9.34it/s]

{'loss': Array(0.14884847, dtype=float32), 'loss_cross_entropy': Array(0.14078973, dtype=float32)}
{'loss_inverse': Array(1.7296083e-05, dtype=float32)}


  8%|▊         | 77990/1000000 [3:41:20<38:16:54,  6.69it/s]

{'loss': Array(0.1317091, dtype=float32), 'loss_cross_entropy': Array(0.1242879, dtype=float32)}
{'loss_inverse': Array(0.00017351, dtype=float32)}


  8%|▊         | 78000/1000000 [3:41:22<27:44:05,  9.23it/s]

{'loss': Array(0.13308913, dtype=float32), 'loss_cross_entropy': Array(0.12498446, dtype=float32)}
{'loss_inverse': Array(4.454291e-07, dtype=float32)}


  8%|▊         | 78010/1000000 [3:41:30<87:54:58,  2.91it/s] 

{'loss': Array(0.14930278, dtype=float32), 'loss_cross_entropy': Array(0.14048477, dtype=float32)}
{'loss_inverse': Array(4.539357e-05, dtype=float32)}


  8%|▊         | 78020/1000000 [3:41:32<44:37:09,  5.74it/s]

{'loss': Array(0.13634515, dtype=float32), 'loss_cross_entropy': Array(0.12837303, dtype=float32)}
{'loss_inverse': Array(0.00014538, dtype=float32)}


  8%|▊         | 78030/1000000 [3:41:33<28:44:24,  8.91it/s]

{'loss': Array(0.13618888, dtype=float32), 'loss_cross_entropy': Array(0.12788841, dtype=float32)}
{'loss_inverse': Array(0.00029519, dtype=float32)}


  8%|▊         | 78040/1000000 [3:41:34<26:05:27,  9.82it/s]

{'loss': Array(0.16293068, dtype=float32), 'loss_cross_entropy': Array(0.1533916, dtype=float32)}
{'loss_inverse': Array(3.9876802e-05, dtype=float32)}


  8%|▊         | 78050/1000000 [3:41:36<28:33:15,  8.97it/s]

{'loss': Array(0.13354881, dtype=float32), 'loss_cross_entropy': Array(0.12552835, dtype=float32)}
{'loss_inverse': Array(6.468374e-07, dtype=float32)}


  8%|▊         | 78060/1000000 [3:41:38<26:10:58,  9.78it/s]

{'loss': Array(0.14519562, dtype=float32), 'loss_cross_entropy': Array(0.13696627, dtype=float32)}
{'loss_inverse': Array(6.7877045e-06, dtype=float32)}


  8%|▊         | 78070/1000000 [3:41:39<29:08:56,  8.79it/s]

{'loss': Array(0.1326164, dtype=float32), 'loss_cross_entropy': Array(0.12473064, dtype=float32)}
{'loss_inverse': Array(3.0968093e-07, dtype=float32)}


  8%|▊         | 78080/1000000 [3:41:41<25:54:42,  9.88it/s]

{'loss': Array(0.16020758, dtype=float32), 'loss_cross_entropy': Array(0.15134652, dtype=float32)}
{'loss_inverse': Array(9.29149e-06, dtype=float32)}


  8%|▊         | 78090/1000000 [3:41:43<31:41:27,  8.08it/s]

{'loss': Array(0.1363161, dtype=float32), 'loss_cross_entropy': Array(0.12878035, dtype=float32)}
{'loss_inverse': Array(2.4139414e-05, dtype=float32)}


  8%|▊         | 78100/1000000 [3:41:44<25:52:33,  9.90it/s]

{'loss': Array(0.15308115, dtype=float32), 'loss_cross_entropy': Array(0.14436196, dtype=float32)}
{'loss_inverse': Array(6.0231146e-06, dtype=float32)}


  8%|▊         | 78110/1000000 [3:41:45<24:43:21, 10.36it/s]

{'loss': Array(0.14299317, dtype=float32), 'loss_cross_entropy': Array(0.13531132, dtype=float32)}
{'loss_inverse': Array(2.6680233e-05, dtype=float32)}


  8%|▊         | 78120/1000000 [3:41:47<27:44:19,  9.23it/s]

{'loss': Array(0.14764182, dtype=float32), 'loss_cross_entropy': Array(0.1389678, dtype=float32)}
{'loss_inverse': Array(1.7582378e-06, dtype=float32)}


  8%|▊         | 78130/1000000 [3:41:48<25:08:23, 10.19it/s]

{'loss': Array(0.1536609, dtype=float32), 'loss_cross_entropy': Array(0.145001, dtype=float32)}
{'loss_inverse': Array(0.00037465, dtype=float32)}


  8%|▊         | 78140/1000000 [3:41:50<29:57:38,  8.55it/s]

{'loss': Array(0.14182633, dtype=float32), 'loss_cross_entropy': Array(0.13385175, dtype=float32)}
{'loss_inverse': Array(1.9849806e-05, dtype=float32)}


  8%|▊         | 78150/1000000 [3:41:52<26:21:40,  9.71it/s]

{'loss': Array(0.13168237, dtype=float32), 'loss_cross_entropy': Array(0.12437402, dtype=float32)}
{'loss_inverse': Array(6.761986e-05, dtype=float32)}


  8%|▊         | 78160/1000000 [3:41:54<32:01:04,  8.00it/s]

{'loss': Array(0.15602683, dtype=float32), 'loss_cross_entropy': Array(0.14751427, dtype=float32)}
{'loss_inverse': Array(3.3601435e-05, dtype=float32)}


  8%|▊         | 78170/1000000 [3:41:55<26:38:01,  9.61it/s]

{'loss': Array(0.14361574, dtype=float32), 'loss_cross_entropy': Array(0.13628407, dtype=float32)}
{'loss_inverse': Array(2.3061327e-06, dtype=float32)}


  8%|▊         | 78180/1000000 [3:41:56<25:14:08, 10.15it/s]

{'loss': Array(0.12995827, dtype=float32), 'loss_cross_entropy': Array(0.12225431, dtype=float32)}
{'loss_inverse': Array(8.712972e-05, dtype=float32)}


  8%|▊         | 78190/1000000 [3:41:58<29:05:22,  8.80it/s]

{'loss': Array(0.16487147, dtype=float32), 'loss_cross_entropy': Array(0.15581141, dtype=float32)}
{'loss_inverse': Array(0.00014302, dtype=float32)}


  8%|▊         | 78200/1000000 [3:42:00<25:34:17, 10.01it/s]

{'loss': Array(0.158853, dtype=float32), 'loss_cross_entropy': Array(0.14970282, dtype=float32)}
{'loss_inverse': Array(0.00043303, dtype=float32)}


  8%|▊         | 78210/1000000 [3:42:01<31:06:10,  8.23it/s]

{'loss': Array(0.15498668, dtype=float32), 'loss_cross_entropy': Array(0.14688534, dtype=float32)}
{'loss_inverse': Array(4.0854924e-05, dtype=float32)}


  8%|▊         | 78220/1000000 [3:42:03<26:20:20,  9.72it/s]

{'loss': Array(0.15634689, dtype=float32), 'loss_cross_entropy': Array(0.14795743, dtype=float32)}
{'loss_inverse': Array(0.00019318, dtype=float32)}


  8%|▊         | 78230/1000000 [3:42:04<34:28:10,  7.43it/s]

{'loss': Array(0.1505739, dtype=float32), 'loss_cross_entropy': Array(0.14189352, dtype=float32)}
{'loss_inverse': Array(0.00018021, dtype=float32)}


  8%|▊         | 78240/1000000 [3:42:06<27:38:21,  9.26it/s]

{'loss': Array(0.14289057, dtype=float32), 'loss_cross_entropy': Array(0.13484435, dtype=float32)}
{'loss_inverse': Array(0.00031685, dtype=float32)}


  8%|▊         | 78250/1000000 [3:42:07<26:49:36,  9.54it/s]

{'loss': Array(0.14780775, dtype=float32), 'loss_cross_entropy': Array(0.1393481, dtype=float32)}
{'loss_inverse': Array(0.00030255, dtype=float32)}


  8%|▊         | 78260/1000000 [3:42:09<28:36:53,  8.95it/s]

{'loss': Array(0.1473301, dtype=float32), 'loss_cross_entropy': Array(0.13886915, dtype=float32)}
{'loss_inverse': Array(0.00018655, dtype=float32)}


  8%|▊         | 78270/1000000 [3:42:11<25:24:27, 10.08it/s]

{'loss': Array(0.12141597, dtype=float32), 'loss_cross_entropy': Array(0.11419334, dtype=float32)}
{'loss_inverse': Array(5.2608102e-05, dtype=float32)}


  8%|▊         | 78280/1000000 [3:42:12<34:09:17,  7.50it/s]

{'loss': Array(0.15552878, dtype=float32), 'loss_cross_entropy': Array(0.14685972, dtype=float32)}
{'loss_inverse': Array(3.1289444e-05, dtype=float32)}


  8%|▊         | 78290/1000000 [3:42:14<27:24:05,  9.34it/s]

{'loss': Array(0.15154205, dtype=float32), 'loss_cross_entropy': Array(0.1428707, dtype=float32)}
{'loss_inverse': Array(2.7299204e-05, dtype=float32)}


  8%|▊         | 78300/1000000 [3:42:15<24:58:11, 10.25it/s]

{'loss': Array(0.15024441, dtype=float32), 'loss_cross_entropy': Array(0.14116846, dtype=float32)}
{'loss_inverse': Array(0.00010889, dtype=float32)}


  8%|▊         | 78310/1000000 [3:42:17<28:09:14,  9.09it/s]

{'loss': Array(0.12894157, dtype=float32), 'loss_cross_entropy': Array(0.12080858, dtype=float32)}
{'loss_inverse': Array(0.00011219, dtype=float32)}


  8%|▊         | 78320/1000000 [3:42:18<25:29:57, 10.04it/s]

{'loss': Array(0.13015416, dtype=float32), 'loss_cross_entropy': Array(0.12247963, dtype=float32)}
{'loss_inverse': Array(2.3525672e-05, dtype=float32)}


  8%|▊         | 78330/1000000 [3:42:20<30:03:55,  8.52it/s]

{'loss': Array(0.14930363, dtype=float32), 'loss_cross_entropy': Array(0.14105849, dtype=float32)}
{'loss_inverse': Array(4.440557e-06, dtype=float32)}


  8%|▊         | 78340/1000000 [3:42:22<25:48:43,  9.92it/s]

{'loss': Array(0.15088034, dtype=float32), 'loss_cross_entropy': Array(0.14231375, dtype=float32)}
{'loss_inverse': Array(9.2484996e-05, dtype=float32)}


  8%|▊         | 78350/1000000 [3:42:23<31:11:30,  8.21it/s]

{'loss': Array(0.14696895, dtype=float32), 'loss_cross_entropy': Array(0.13824709, dtype=float32)}
{'loss_inverse': Array(0.00010041, dtype=float32)}


  8%|▊         | 78360/1000000 [3:42:25<26:07:58,  9.80it/s]

{'loss': Array(0.1627916, dtype=float32), 'loss_cross_entropy': Array(0.1534037, dtype=float32)}
{'loss_inverse': Array(0.00020776, dtype=float32)}


  8%|▊         | 78370/1000000 [3:42:26<25:29:52, 10.04it/s]

{'loss': Array(0.13240615, dtype=float32), 'loss_cross_entropy': Array(0.12478751, dtype=float32)}
{'loss_inverse': Array(0.00010551, dtype=float32)}


  8%|▊         | 78380/1000000 [3:42:28<29:42:04,  8.62it/s]

{'loss': Array(0.134638, dtype=float32), 'loss_cross_entropy': Array(0.12648752, dtype=float32)}
{'loss_inverse': Array(0.00017664, dtype=float32)}


  8%|▊         | 78390/1000000 [3:42:29<25:52:18,  9.90it/s]

{'loss': Array(0.14911292, dtype=float32), 'loss_cross_entropy': Array(0.14026389, dtype=float32)}
{'loss_inverse': Array(5.7098066e-07, dtype=float32)}


  8%|▊         | 78400/1000000 [3:42:31<34:31:52,  7.41it/s]

{'loss': Array(0.13854721, dtype=float32), 'loss_cross_entropy': Array(0.13070494, dtype=float32)}
{'loss_inverse': Array(1.61859e-05, dtype=float32)}


  8%|▊         | 78410/1000000 [3:42:33<27:27:50,  9.32it/s]

{'loss': Array(0.14254911, dtype=float32), 'loss_cross_entropy': Array(0.13422735, dtype=float32)}
{'loss_inverse': Array(0.00014083, dtype=float32)}


  8%|▊         | 78420/1000000 [3:42:34<39:10:23,  6.53it/s]

{'loss': Array(0.13334842, dtype=float32), 'loss_cross_entropy': Array(0.125575, dtype=float32)}
{'loss_inverse': Array(1.3476152e-05, dtype=float32)}


  8%|▊         | 78430/1000000 [3:42:36<27:51:21,  9.19it/s]

{'loss': Array(0.16119324, dtype=float32), 'loss_cross_entropy': Array(0.15192877, dtype=float32)}
{'loss_inverse': Array(2.6147374e-05, dtype=float32)}


  8%|▊         | 78440/1000000 [3:42:38<38:12:10,  6.70it/s]

{'loss': Array(0.15481025, dtype=float32), 'loss_cross_entropy': Array(0.1461363, dtype=float32)}
{'loss_inverse': Array(2.3237362e-05, dtype=float32)}


  8%|▊         | 78450/1000000 [3:42:39<27:30:48,  9.30it/s]

{'loss': Array(0.1532641, dtype=float32), 'loss_cross_entropy': Array(0.14448655, dtype=float32)}
{'loss_inverse': Array(2.100491e-05, dtype=float32)}


  8%|▊         | 78460/1000000 [3:42:40<25:12:53, 10.15it/s]

{'loss': Array(0.11957785, dtype=float32), 'loss_cross_entropy': Array(0.11276642, dtype=float32)}
{'loss_inverse': Array(5.43328e-07, dtype=float32)}


  8%|▊         | 78470/1000000 [3:42:42<31:22:45,  8.16it/s]

{'loss': Array(0.14047912, dtype=float32), 'loss_cross_entropy': Array(0.1323837, dtype=float32)}
{'loss_inverse': Array(1.7526118e-07, dtype=float32)}


  8%|▊         | 78480/1000000 [3:42:44<26:22:47,  9.70it/s]

{'loss': Array(0.12664881, dtype=float32), 'loss_cross_entropy': Array(0.11941647, dtype=float32)}
{'loss_inverse': Array(4.3687105e-06, dtype=float32)}


  8%|▊         | 78490/1000000 [3:42:45<38:12:34,  6.70it/s]

{'loss': Array(0.14503664, dtype=float32), 'loss_cross_entropy': Array(0.13619016, dtype=float32)}
{'loss_inverse': Array(1.690973e-05, dtype=float32)}


  8%|▊         | 78500/1000000 [3:42:47<27:08:04,  9.43it/s]

{'loss': Array(0.15290181, dtype=float32), 'loss_cross_entropy': Array(0.14357166, dtype=float32)}
{'loss_inverse': Array(6.453072e-05, dtype=float32)}


  8%|▊         | 78510/1000000 [3:42:55<99:04:06,  2.58it/s] 

{'loss': Array(0.14700747, dtype=float32), 'loss_cross_entropy': Array(0.13863882, dtype=float32)}
{'loss_inverse': Array(7.0886534e-07, dtype=float32)}


  8%|▊         | 78520/1000000 [3:42:57<37:43:11,  6.79it/s] 

{'loss': Array(0.16043995, dtype=float32), 'loss_cross_entropy': Array(0.15194331, dtype=float32)}
{'loss_inverse': Array(8.779495e-07, dtype=float32)}


  8%|▊         | 78530/1000000 [3:42:58<27:54:28,  9.17it/s]

{'loss': Array(0.15098135, dtype=float32), 'loss_cross_entropy': Array(0.14240825, dtype=float32)}
{'loss_inverse': Array(1.0482375e-05, dtype=float32)}


  8%|▊         | 78540/1000000 [3:43:00<29:49:57,  8.58it/s]

{'loss': Array(0.15545718, dtype=float32), 'loss_cross_entropy': Array(0.14692293, dtype=float32)}
{'loss_inverse': Array(0.00013932, dtype=float32)}


  8%|▊         | 78550/1000000 [3:43:01<25:48:10,  9.92it/s]

{'loss': Array(0.15471816, dtype=float32), 'loss_cross_entropy': Array(0.14666767, dtype=float32)}
{'loss_inverse': Array(4.3304113e-05, dtype=float32)}


  8%|▊         | 78560/1000000 [3:43:03<38:24:18,  6.66it/s]

{'loss': Array(0.14922965, dtype=float32), 'loss_cross_entropy': Array(0.14153647, dtype=float32)}
{'loss_inverse': Array(1.33387175e-05, dtype=float32)}


  8%|▊         | 78570/1000000 [3:43:04<27:44:14,  9.23it/s]

{'loss': Array(0.13882999, dtype=float32), 'loss_cross_entropy': Array(0.13131072, dtype=float32)}
{'loss_inverse': Array(2.6736623e-07, dtype=float32)}


  8%|▊         | 78580/1000000 [3:43:06<26:27:54,  9.67it/s]

{'loss': Array(0.14895922, dtype=float32), 'loss_cross_entropy': Array(0.14045301, dtype=float32)}
{'loss_inverse': Array(1.7193448e-06, dtype=float32)}


  8%|▊         | 78590/1000000 [3:43:08<30:55:26,  8.28it/s]

{'loss': Array(0.1350805, dtype=float32), 'loss_cross_entropy': Array(0.12689327, dtype=float32)}
{'loss_inverse': Array(0.00038401, dtype=float32)}


  8%|▊         | 78600/1000000 [3:43:09<26:15:02,  9.75it/s]

{'loss': Array(0.14737894, dtype=float32), 'loss_cross_entropy': Array(0.13840936, dtype=float32)}
{'loss_inverse': Array(3.2686855e-06, dtype=float32)}


  8%|▊         | 78610/1000000 [3:43:11<29:58:23,  8.54it/s]

{'loss': Array(0.13503699, dtype=float32), 'loss_cross_entropy': Array(0.12695198, dtype=float32)}
{'loss_inverse': Array(4.154557e-05, dtype=float32)}


  8%|▊         | 78620/1000000 [3:43:12<25:33:06, 10.02it/s]

{'loss': Array(0.14683549, dtype=float32), 'loss_cross_entropy': Array(0.13836654, dtype=float32)}
{'loss_inverse': Array(0.00025196, dtype=float32)}


  8%|▊         | 78630/1000000 [3:43:14<31:14:09,  8.19it/s]

{'loss': Array(0.12584183, dtype=float32), 'loss_cross_entropy': Array(0.11845358, dtype=float32)}
{'loss_inverse': Array(3.5463865e-05, dtype=float32)}


  8%|▊         | 78640/1000000 [3:43:16<25:57:02,  9.86it/s]

{'loss': Array(0.14302985, dtype=float32), 'loss_cross_entropy': Array(0.13550223, dtype=float32)}
{'loss_inverse': Array(3.7231987e-07, dtype=float32)}


  8%|▊         | 78650/1000000 [3:43:17<24:56:19, 10.26it/s]

{'loss': Array(0.15342242, dtype=float32), 'loss_cross_entropy': Array(0.1448226, dtype=float32)}
{'loss_inverse': Array(0.00011201, dtype=float32)}


  8%|▊         | 78660/1000000 [3:43:19<28:46:49,  8.89it/s]

{'loss': Array(0.12583204, dtype=float32), 'loss_cross_entropy': Array(0.1179003, dtype=float32)}
{'loss_inverse': Array(2.717904e-05, dtype=float32)}


  8%|▊         | 78670/1000000 [3:43:20<25:50:55,  9.90it/s]

{'loss': Array(0.15807544, dtype=float32), 'loss_cross_entropy': Array(0.14909132, dtype=float32)}
{'loss_inverse': Array(1.9974495e-06, dtype=float32)}


  8%|▊         | 78680/1000000 [3:43:22<29:23:25,  8.71it/s]

{'loss': Array(0.13349387, dtype=float32), 'loss_cross_entropy': Array(0.12534334, dtype=float32)}
{'loss_inverse': Array(2.50462e-06, dtype=float32)}


  8%|▊         | 78690/1000000 [3:43:23<25:49:11,  9.91it/s]

{'loss': Array(0.15577313, dtype=float32), 'loss_cross_entropy': Array(0.14682049, dtype=float32)}
{'loss_inverse': Array(4.6542686e-05, dtype=float32)}


  8%|▊         | 78700/1000000 [3:43:25<31:13:56,  8.19it/s]

{'loss': Array(0.14126043, dtype=float32), 'loss_cross_entropy': Array(0.13344146, dtype=float32)}
{'loss_inverse': Array(4.735913e-06, dtype=float32)}


  8%|▊         | 78710/1000000 [3:43:26<25:40:03,  9.97it/s]

{'loss': Array(0.13505846, dtype=float32), 'loss_cross_entropy': Array(0.12735635, dtype=float32)}
{'loss_inverse': Array(4.2512676e-07, dtype=float32)}


  8%|▊         | 78720/1000000 [3:43:28<25:27:11, 10.05it/s]

{'loss': Array(0.12901355, dtype=float32), 'loss_cross_entropy': Array(0.12142824, dtype=float32)}
{'loss_inverse': Array(7.952725e-05, dtype=float32)}


  8%|▊         | 78730/1000000 [3:43:30<30:38:58,  8.35it/s]

{'loss': Array(0.14890532, dtype=float32), 'loss_cross_entropy': Array(0.14043707, dtype=float32)}
{'loss_inverse': Array(4.3233737e-05, dtype=float32)}


  8%|▊         | 78740/1000000 [3:43:31<26:15:59,  9.74it/s]

{'loss': Array(0.15585923, dtype=float32), 'loss_cross_entropy': Array(0.14700173, dtype=float32)}
{'loss_inverse': Array(4.906064e-06, dtype=float32)}


  8%|▊         | 78750/1000000 [3:43:33<31:51:58,  8.03it/s]

{'loss': Array(0.16178088, dtype=float32), 'loss_cross_entropy': Array(0.1526506, dtype=float32)}
{'loss_inverse': Array(5.6911404e-07, dtype=float32)}


  8%|▊         | 78760/1000000 [3:43:34<26:42:28,  9.58it/s]

{'loss': Array(0.13499734, dtype=float32), 'loss_cross_entropy': Array(0.127649, dtype=float32)}
{'loss_inverse': Array(4.4523877e-05, dtype=float32)}


  8%|▊         | 78770/1000000 [3:43:36<34:20:47,  7.45it/s]

{'loss': Array(0.13841115, dtype=float32), 'loss_cross_entropy': Array(0.13040139, dtype=float32)}
{'loss_inverse': Array(4.5930934e-05, dtype=float32)}


  8%|▊         | 78780/1000000 [3:43:38<26:29:24,  9.66it/s]

{'loss': Array(0.13712046, dtype=float32), 'loss_cross_entropy': Array(0.12940577, dtype=float32)}
{'loss_inverse': Array(8.416482e-05, dtype=float32)}


  8%|▊         | 78790/1000000 [3:43:39<25:07:07, 10.19it/s]

{'loss': Array(0.13272357, dtype=float32), 'loss_cross_entropy': Array(0.12443353, dtype=float32)}
{'loss_inverse': Array(1.4433167e-05, dtype=float32)}


  8%|▊         | 78800/1000000 [3:43:41<28:05:42,  9.11it/s]

{'loss': Array(0.14143293, dtype=float32), 'loss_cross_entropy': Array(0.13312468, dtype=float32)}
{'loss_inverse': Array(1.3707245e-05, dtype=float32)}


  8%|▊         | 78810/1000000 [3:43:42<25:16:43, 10.12it/s]

{'loss': Array(0.14809246, dtype=float32), 'loss_cross_entropy': Array(0.14020726, dtype=float32)}
{'loss_inverse': Array(0.00010722, dtype=float32)}


  8%|▊         | 78820/1000000 [3:43:44<33:52:41,  7.55it/s]

{'loss': Array(0.14970273, dtype=float32), 'loss_cross_entropy': Array(0.14129072, dtype=float32)}
{'loss_inverse': Array(5.2686016e-07, dtype=float32)}


  8%|▊         | 78830/1000000 [3:43:45<26:27:01,  9.67it/s]

{'loss': Array(0.13431513, dtype=float32), 'loss_cross_entropy': Array(0.12618685, dtype=float32)}
{'loss_inverse': Array(3.6528178e-05, dtype=float32)}


  8%|▊         | 78840/1000000 [3:43:47<25:06:50, 10.19it/s]

{'loss': Array(0.14206038, dtype=float32), 'loss_cross_entropy': Array(0.13388227, dtype=float32)}
{'loss_inverse': Array(7.946567e-06, dtype=float32)}


  8%|▊         | 78850/1000000 [3:43:48<27:25:27,  9.33it/s]

{'loss': Array(0.13397197, dtype=float32), 'loss_cross_entropy': Array(0.12642744, dtype=float32)}
{'loss_inverse': Array(5.683168e-06, dtype=float32)}


  8%|▊         | 78860/1000000 [3:43:50<25:23:14, 10.08it/s]

{'loss': Array(0.15109465, dtype=float32), 'loss_cross_entropy': Array(0.14245652, dtype=float32)}
{'loss_inverse': Array(4.894695e-06, dtype=float32)}


  8%|▊         | 78870/1000000 [3:43:51<29:16:28,  8.74it/s]

{'loss': Array(0.12434979, dtype=float32), 'loss_cross_entropy': Array(0.117265, dtype=float32)}
{'loss_inverse': Array(3.9579925e-05, dtype=float32)}


  8%|▊         | 78880/1000000 [3:43:53<25:16:05, 10.13it/s]

{'loss': Array(0.1629824, dtype=float32), 'loss_cross_entropy': Array(0.15378611, dtype=float32)}
{'loss_inverse': Array(0.000158, dtype=float32)}


  8%|▊         | 78890/1000000 [3:43:55<31:05:32,  8.23it/s]

{'loss': Array(0.16255271, dtype=float32), 'loss_cross_entropy': Array(0.15384412, dtype=float32)}
{'loss_inverse': Array(8.409475e-05, dtype=float32)}


  8%|▊         | 78900/1000000 [3:43:56<26:19:42,  9.72it/s]

{'loss': Array(0.14180242, dtype=float32), 'loss_cross_entropy': Array(0.13349685, dtype=float32)}
{'loss_inverse': Array(2.474441e-05, dtype=float32)}


  8%|▊         | 78910/1000000 [3:43:57<25:29:18, 10.04it/s]

{'loss': Array(0.14431107, dtype=float32), 'loss_cross_entropy': Array(0.1361748, dtype=float32)}
{'loss_inverse': Array(2.5906744e-05, dtype=float32)}


  8%|▊         | 78920/1000000 [3:43:59<29:07:27,  8.78it/s]

{'loss': Array(0.15641433, dtype=float32), 'loss_cross_entropy': Array(0.14776446, dtype=float32)}
{'loss_inverse': Array(5.8738962e-05, dtype=float32)}


  8%|▊         | 78930/1000000 [3:44:01<25:35:18, 10.00it/s]

{'loss': Array(0.14316167, dtype=float32), 'loss_cross_entropy': Array(0.13489754, dtype=float32)}
{'loss_inverse': Array(0.00016762, dtype=float32)}


  8%|▊         | 78940/1000000 [3:44:02<33:51:16,  7.56it/s]

{'loss': Array(0.13965027, dtype=float32), 'loss_cross_entropy': Array(0.13198434, dtype=float32)}
{'loss_inverse': Array(4.992806e-06, dtype=float32)}


  8%|▊         | 78950/1000000 [3:44:04<27:21:07,  9.35it/s]

{'loss': Array(0.13616025, dtype=float32), 'loss_cross_entropy': Array(0.12857161, dtype=float32)}
{'loss_inverse': Array(0.00011647, dtype=float32)}


  8%|▊         | 78960/1000000 [3:44:06<38:00:46,  6.73it/s]

{'loss': Array(0.1439078, dtype=float32), 'loss_cross_entropy': Array(0.13590632, dtype=float32)}
{'loss_inverse': Array(1.8754907e-05, dtype=float32)}


  8%|▊         | 78970/1000000 [3:44:07<27:17:23,  9.37it/s]

{'loss': Array(0.1517942, dtype=float32), 'loss_cross_entropy': Array(0.1437255, dtype=float32)}
{'loss_inverse': Array(4.5435077e-05, dtype=float32)}


  8%|▊         | 78980/1000000 [3:44:09<38:38:23,  6.62it/s]

{'loss': Array(0.12520961, dtype=float32), 'loss_cross_entropy': Array(0.11834303, dtype=float32)}
{'loss_inverse': Array(6.137223e-06, dtype=float32)}


  8%|▊         | 78990/1000000 [3:44:10<27:55:18,  9.16it/s]

{'loss': Array(0.15565567, dtype=float32), 'loss_cross_entropy': Array(0.14705627, dtype=float32)}
{'loss_inverse': Array(8.752441e-06, dtype=float32)}


  8%|▊         | 79000/1000000 [3:44:12<26:05:05,  9.81it/s]

{'loss': Array(0.15000686, dtype=float32), 'loss_cross_entropy': Array(0.14110015, dtype=float32)}
{'loss_inverse': Array(0.00016567, dtype=float32)}


  8%|▊         | 79010/1000000 [3:44:20<94:33:37,  2.71it/s] 

{'loss': Array(0.14740919, dtype=float32), 'loss_cross_entropy': Array(0.1388748, dtype=float32)}
{'loss_inverse': Array(1.4683309e-05, dtype=float32)}


  8%|▊         | 79020/1000000 [3:44:21<36:47:32,  6.95it/s] 

{'loss': Array(0.13599971, dtype=float32), 'loss_cross_entropy': Array(0.12770581, dtype=float32)}
{'loss_inverse': Array(9.147316e-05, dtype=float32)}


  8%|▊         | 79030/1000000 [3:44:23<27:10:47,  9.41it/s]

{'loss': Array(0.1399941, dtype=float32), 'loss_cross_entropy': Array(0.13241912, dtype=float32)}
{'loss_inverse': Array(2.0268222e-07, dtype=float32)}


  8%|▊         | 79040/1000000 [3:44:24<28:22:09,  9.02it/s]

{'loss': Array(0.14176354, dtype=float32), 'loss_cross_entropy': Array(0.13346304, dtype=float32)}
{'loss_inverse': Array(1.9166994e-06, dtype=float32)}


  8%|▊         | 79050/1000000 [3:44:26<25:21:41, 10.09it/s]

{'loss': Array(0.13229322, dtype=float32), 'loss_cross_entropy': Array(0.12427075, dtype=float32)}
{'loss_inverse': Array(8.745784e-05, dtype=float32)}


  8%|▊         | 79060/1000000 [3:44:28<29:49:09,  8.58it/s]

{'loss': Array(0.14665674, dtype=float32), 'loss_cross_entropy': Array(0.13814235, dtype=float32)}
{'loss_inverse': Array(3.0274938e-05, dtype=float32)}


  8%|▊         | 79070/1000000 [3:44:29<26:20:05,  9.71it/s]

{'loss': Array(0.16394155, dtype=float32), 'loss_cross_entropy': Array(0.15509637, dtype=float32)}
{'loss_inverse': Array(4.1970834e-05, dtype=float32)}


  8%|▊         | 79080/1000000 [3:44:31<29:28:51,  8.68it/s]

{'loss': Array(0.15162441, dtype=float32), 'loss_cross_entropy': Array(0.14254604, dtype=float32)}
{'loss_inverse': Array(0.00013591, dtype=float32)}


  8%|▊         | 79090/1000000 [3:44:32<26:00:54,  9.83it/s]

{'loss': Array(0.14134045, dtype=float32), 'loss_cross_entropy': Array(0.13285194, dtype=float32)}
{'loss_inverse': Array(2.1608501e-07, dtype=float32)}


  8%|▊         | 79100/1000000 [3:44:34<30:04:00,  8.51it/s]

{'loss': Array(0.1376131, dtype=float32), 'loss_cross_entropy': Array(0.12939471, dtype=float32)}
{'loss_inverse': Array(1.2674719e-06, dtype=float32)}


  8%|▊         | 79110/1000000 [3:44:35<25:49:44,  9.90it/s]

{'loss': Array(0.14036809, dtype=float32), 'loss_cross_entropy': Array(0.13195981, dtype=float32)}
{'loss_inverse': Array(2.1093476e-06, dtype=float32)}


  8%|▊         | 79120/1000000 [3:44:37<33:46:01,  7.58it/s]

{'loss': Array(0.14399056, dtype=float32), 'loss_cross_entropy': Array(0.13492088, dtype=float32)}
{'loss_inverse': Array(6.274032e-05, dtype=float32)}


  8%|▊         | 79130/1000000 [3:44:39<26:49:05,  9.54it/s]

{'loss': Array(0.1591957, dtype=float32), 'loss_cross_entropy': Array(0.15081345, dtype=float32)}
{'loss_inverse': Array(6.012601e-05, dtype=float32)}


  8%|▊         | 79140/1000000 [3:44:40<37:35:23,  6.80it/s]

{'loss': Array(0.1316495, dtype=float32), 'loss_cross_entropy': Array(0.12380604, dtype=float32)}
{'loss_inverse': Array(0.00036867, dtype=float32)}


  8%|▊         | 79150/1000000 [3:44:42<26:49:57,  9.53it/s]

{'loss': Array(0.14503382, dtype=float32), 'loss_cross_entropy': Array(0.13729177, dtype=float32)}
{'loss_inverse': Array(2.8694768e-07, dtype=float32)}


  8%|▊         | 79160/1000000 [3:44:43<33:40:23,  7.60it/s]

{'loss': Array(0.14323595, dtype=float32), 'loss_cross_entropy': Array(0.13475345, dtype=float32)}
{'loss_inverse': Array(7.95091e-05, dtype=float32)}


  8%|▊         | 79170/1000000 [3:44:45<26:17:42,  9.73it/s]

{'loss': Array(0.1390173, dtype=float32), 'loss_cross_entropy': Array(0.13144675, dtype=float32)}
{'loss_inverse': Array(2.2052063e-05, dtype=float32)}


  8%|▊         | 79180/1000000 [3:44:46<25:22:31, 10.08it/s]

{'loss': Array(0.15198503, dtype=float32), 'loss_cross_entropy': Array(0.14336342, dtype=float32)}
{'loss_inverse': Array(1.2257609e-05, dtype=float32)}


  8%|▊         | 79190/1000000 [3:44:48<29:14:19,  8.75it/s]

{'loss': Array(0.14144307, dtype=float32), 'loss_cross_entropy': Array(0.13348036, dtype=float32)}
{'loss_inverse': Array(5.7440484e-06, dtype=float32)}


  8%|▊         | 79200/1000000 [3:44:49<25:59:15,  9.84it/s]

{'loss': Array(0.15132998, dtype=float32), 'loss_cross_entropy': Array(0.1421688, dtype=float32)}
{'loss_inverse': Array(3.369216e-05, dtype=float32)}


  8%|▊         | 79210/1000000 [3:44:51<31:13:55,  8.19it/s]

{'loss': Array(0.13806425, dtype=float32), 'loss_cross_entropy': Array(0.12967058, dtype=float32)}
{'loss_inverse': Array(5.0578154e-07, dtype=float32)}


  8%|▊         | 79220/1000000 [3:44:53<26:34:47,  9.62it/s]

{'loss': Array(0.15897267, dtype=float32), 'loss_cross_entropy': Array(0.14970092, dtype=float32)}
{'loss_inverse': Array(0.00018552, dtype=float32)}


  8%|▊         | 79230/1000000 [3:44:54<33:59:27,  7.52it/s]

{'loss': Array(0.1480505, dtype=float32), 'loss_cross_entropy': Array(0.1396784, dtype=float32)}
{'loss_inverse': Array(0.00011576, dtype=float32)}


  8%|▊         | 79240/1000000 [3:44:56<26:21:05,  9.71it/s]

{'loss': Array(0.14920461, dtype=float32), 'loss_cross_entropy': Array(0.14126402, dtype=float32)}
{'loss_inverse': Array(8.464181e-05, dtype=float32)}


  8%|▊         | 79250/1000000 [3:44:58<33:58:53,  7.53it/s]

{'loss': Array(0.16022396, dtype=float32), 'loss_cross_entropy': Array(0.15097877, dtype=float32)}
{'loss_inverse': Array(3.6733425e-05, dtype=float32)}


  8%|▊         | 79260/1000000 [3:44:59<26:49:04,  9.54it/s]

{'loss': Array(0.1498807, dtype=float32), 'loss_cross_entropy': Array(0.14188017, dtype=float32)}
{'loss_inverse': Array(3.907566e-05, dtype=float32)}


  8%|▊         | 79270/1000000 [3:45:00<25:24:56, 10.06it/s]

{'loss': Array(0.13585089, dtype=float32), 'loss_cross_entropy': Array(0.12788038, dtype=float32)}
{'loss_inverse': Array(1.0259914e-05, dtype=float32)}


  8%|▊         | 79280/1000000 [3:45:02<30:59:40,  8.25it/s]

{'loss': Array(0.15087779, dtype=float32), 'loss_cross_entropy': Array(0.14314756, dtype=float32)}
{'loss_inverse': Array(4.96597e-06, dtype=float32)}


  8%|▊         | 79290/1000000 [3:45:04<26:00:52,  9.83it/s]

{'loss': Array(0.15649675, dtype=float32), 'loss_cross_entropy': Array(0.14771287, dtype=float32)}
{'loss_inverse': Array(1.6182888e-06, dtype=float32)}


  8%|▊         | 79300/1000000 [3:45:05<38:30:12,  6.64it/s]

{'loss': Array(0.13694264, dtype=float32), 'loss_cross_entropy': Array(0.12910204, dtype=float32)}
{'loss_inverse': Array(9.932787e-06, dtype=float32)}


  8%|▊         | 79310/1000000 [3:45:07<28:41:26,  8.91it/s]

{'loss': Array(0.13576806, dtype=float32), 'loss_cross_entropy': Array(0.12828179, dtype=float32)}
{'loss_inverse': Array(2.7430444e-06, dtype=float32)}


  8%|▊         | 79320/1000000 [3:45:08<26:09:43,  9.78it/s]

{'loss': Array(0.158041, dtype=float32), 'loss_cross_entropy': Array(0.14924756, dtype=float32)}
{'loss_inverse': Array(5.525605e-07, dtype=float32)}


  8%|▊         | 79330/1000000 [3:45:10<28:40:07,  8.92it/s]

{'loss': Array(0.13393222, dtype=float32), 'loss_cross_entropy': Array(0.12607875, dtype=float32)}
{'loss_inverse': Array(1.940311e-06, dtype=float32)}


  8%|▊         | 79340/1000000 [3:45:12<26:21:43,  9.70it/s]

{'loss': Array(0.14124326, dtype=float32), 'loss_cross_entropy': Array(0.13319068, dtype=float32)}
{'loss_inverse': Array(0.00020264, dtype=float32)}


  8%|▊         | 79350/1000000 [3:45:13<28:59:01,  8.82it/s]

{'loss': Array(0.14503662, dtype=float32), 'loss_cross_entropy': Array(0.13684516, dtype=float32)}
{'loss_inverse': Array(1.9736171e-05, dtype=float32)}


  8%|▊         | 79360/1000000 [3:45:15<25:54:33,  9.87it/s]

{'loss': Array(0.14707288, dtype=float32), 'loss_cross_entropy': Array(0.13836066, dtype=float32)}
{'loss_inverse': Array(3.8834824e-07, dtype=float32)}


  8%|▊         | 79370/1000000 [3:45:17<31:17:28,  8.17it/s]

{'loss': Array(0.15205668, dtype=float32), 'loss_cross_entropy': Array(0.14348759, dtype=float32)}
{'loss_inverse': Array(1.705888e-05, dtype=float32)}


  8%|▊         | 79380/1000000 [3:45:18<26:03:24,  9.81it/s]

{'loss': Array(0.1598246, dtype=float32), 'loss_cross_entropy': Array(0.15128358, dtype=float32)}
{'loss_inverse': Array(9.963743e-06, dtype=float32)}


  8%|▊         | 79390/1000000 [3:45:20<31:35:29,  8.09it/s]

{'loss': Array(0.12527923, dtype=float32), 'loss_cross_entropy': Array(0.1178961, dtype=float32)}
{'loss_inverse': Array(2.386984e-07, dtype=float32)}


  8%|▊         | 79400/1000000 [3:45:21<26:11:54,  9.76it/s]

{'loss': Array(0.14115022, dtype=float32), 'loss_cross_entropy': Array(0.1331085, dtype=float32)}
{'loss_inverse': Array(3.5975063e-07, dtype=float32)}


  8%|▊         | 79410/1000000 [3:45:23<37:58:17,  6.73it/s]

{'loss': Array(0.1369932, dtype=float32), 'loss_cross_entropy': Array(0.12903024, dtype=float32)}
{'loss_inverse': Array(1.1700179e-05, dtype=float32)}


  8%|▊         | 79420/1000000 [3:45:24<28:06:43,  9.10it/s]

{'loss': Array(0.1498057, dtype=float32), 'loss_cross_entropy': Array(0.14174426, dtype=float32)}
{'loss_inverse': Array(3.0497617e-06, dtype=float32)}


  8%|▊         | 79430/1000000 [3:45:26<25:21:36, 10.08it/s]

{'loss': Array(0.14368244, dtype=float32), 'loss_cross_entropy': Array(0.1351401, dtype=float32)}
{'loss_inverse': Array(0.0001529, dtype=float32)}


  8%|▊         | 79440/1000000 [3:45:27<28:00:06,  9.13it/s]

{'loss': Array(0.14121221, dtype=float32), 'loss_cross_entropy': Array(0.13301425, dtype=float32)}
{'loss_inverse': Array(1.2379626e-05, dtype=float32)}


  8%|▊         | 79450/1000000 [3:45:29<26:20:42,  9.71it/s]

{'loss': Array(0.13006207, dtype=float32), 'loss_cross_entropy': Array(0.12202847, dtype=float32)}
{'loss_inverse': Array(0.00026463, dtype=float32)}


  8%|▊         | 79460/1000000 [3:45:31<31:44:52,  8.05it/s]

{'loss': Array(0.11839838, dtype=float32), 'loss_cross_entropy': Array(0.11113043, dtype=float32)}
{'loss_inverse': Array(1.3580543e-05, dtype=float32)}


  8%|▊         | 79470/1000000 [3:45:32<26:55:07,  9.50it/s]

{'loss': Array(0.13688414, dtype=float32), 'loss_cross_entropy': Array(0.12929887, dtype=float32)}
{'loss_inverse': Array(8.814588e-07, dtype=float32)}


  8%|▊         | 79480/1000000 [3:45:34<25:58:27,  9.84it/s]

{'loss': Array(0.13161102, dtype=float32), 'loss_cross_entropy': Array(0.12331786, dtype=float32)}
{'loss_inverse': Array(3.216551e-06, dtype=float32)}


  8%|▊         | 79490/1000000 [3:45:35<29:36:19,  8.64it/s]

{'loss': Array(0.14549848, dtype=float32), 'loss_cross_entropy': Array(0.1373848, dtype=float32)}
{'loss_inverse': Array(1.2362618e-07, dtype=float32)}


  8%|▊         | 79500/1000000 [3:45:37<25:34:09, 10.00it/s]

{'loss': Array(0.1468718, dtype=float32), 'loss_cross_entropy': Array(0.1383824, dtype=float32)}
{'loss_inverse': Array(1.5335936e-06, dtype=float32)}


  8%|▊         | 79510/1000000 [3:45:45<97:10:40,  2.63it/s] 

{'loss': Array(0.14045613, dtype=float32), 'loss_cross_entropy': Array(0.13245581, dtype=float32)}
{'loss_inverse': Array(0.0001579, dtype=float32)}


  8%|▊         | 79520/1000000 [3:45:47<38:08:40,  6.70it/s] 

{'loss': Array(0.13494925, dtype=float32), 'loss_cross_entropy': Array(0.12755795, dtype=float32)}
{'loss_inverse': Array(1.6622393e-05, dtype=float32)}


  8%|▊         | 79530/1000000 [3:45:49<40:43:26,  6.28it/s]

{'loss': Array(0.15800317, dtype=float32), 'loss_cross_entropy': Array(0.14926058, dtype=float32)}
{'loss_inverse': Array(2.1692651e-06, dtype=float32)}


  8%|▊         | 79540/1000000 [3:45:50<27:33:34,  9.28it/s]

{'loss': Array(0.14189498, dtype=float32), 'loss_cross_entropy': Array(0.13361064, dtype=float32)}
{'loss_inverse': Array(3.384627e-06, dtype=float32)}


  8%|▊         | 79550/1000000 [3:45:52<25:07:12, 10.18it/s]

{'loss': Array(0.1346234, dtype=float32), 'loss_cross_entropy': Array(0.12693496, dtype=float32)}
{'loss_inverse': Array(1.4853249e-06, dtype=float32)}


  8%|▊         | 79560/1000000 [3:45:53<28:21:11,  9.02it/s]

{'loss': Array(0.15937518, dtype=float32), 'loss_cross_entropy': Array(0.15067491, dtype=float32)}
{'loss_inverse': Array(8.212282e-06, dtype=float32)}


  8%|▊         | 79570/1000000 [3:45:55<38:41:46,  6.61it/s]

{'loss': Array(0.13606137, dtype=float32), 'loss_cross_entropy': Array(0.12850301, dtype=float32)}
{'loss_inverse': Array(5.3152047e-07, dtype=float32)}


  8%|▊         | 79580/1000000 [3:45:57<27:38:27,  9.25it/s]

{'loss': Array(0.14508475, dtype=float32), 'loss_cross_entropy': Array(0.13746156, dtype=float32)}
{'loss_inverse': Array(5.4276825e-06, dtype=float32)}


  8%|▊         | 79590/1000000 [3:45:58<25:08:51, 10.17it/s]

{'loss': Array(0.15233432, dtype=float32), 'loss_cross_entropy': Array(0.1442051, dtype=float32)}
{'loss_inverse': Array(1.2489137e-06, dtype=float32)}


  8%|▊         | 79600/1000000 [3:46:00<28:17:02,  9.04it/s]

{'loss': Array(0.15119188, dtype=float32), 'loss_cross_entropy': Array(0.14227344, dtype=float32)}
{'loss_inverse': Array(8.962076e-06, dtype=float32)}


  8%|▊         | 79610/1000000 [3:46:01<25:10:49, 10.15it/s]

{'loss': Array(0.14607438, dtype=float32), 'loss_cross_entropy': Array(0.13815527, dtype=float32)}
{'loss_inverse': Array(9.6849246e-05, dtype=float32)}


  8%|▊         | 79620/1000000 [3:46:03<29:29:11,  8.67it/s]

{'loss': Array(0.15930764, dtype=float32), 'loss_cross_entropy': Array(0.150521, dtype=float32)}
{'loss_inverse': Array(1.7855521e-06, dtype=float32)}


  8%|▊         | 79630/1000000 [3:46:04<26:14:15,  9.74it/s]

{'loss': Array(0.13910988, dtype=float32), 'loss_cross_entropy': Array(0.13102505, dtype=float32)}
{'loss_inverse': Array(2.7521296e-06, dtype=float32)}


  8%|▊         | 79640/1000000 [3:46:06<32:31:42,  7.86it/s]

{'loss': Array(0.12758887, dtype=float32), 'loss_cross_entropy': Array(0.11967953, dtype=float32)}
{'loss_inverse': Array(3.7046464e-06, dtype=float32)}


  8%|▊         | 79650/1000000 [3:46:08<26:55:56,  9.49it/s]

{'loss': Array(0.1436089, dtype=float32), 'loss_cross_entropy': Array(0.1358094, dtype=float32)}
{'loss_inverse': Array(4.881156e-07, dtype=float32)}


  8%|▊         | 79660/1000000 [3:46:09<25:51:11,  9.89it/s]

{'loss': Array(0.14342967, dtype=float32), 'loss_cross_entropy': Array(0.13468961, dtype=float32)}
{'loss_inverse': Array(2.590778e-07, dtype=float32)}


  8%|▊         | 79670/1000000 [3:46:11<29:39:00,  8.62it/s]

{'loss': Array(0.13574468, dtype=float32), 'loss_cross_entropy': Array(0.12765355, dtype=float32)}
{'loss_inverse': Array(2.2927828e-05, dtype=float32)}


  8%|▊         | 79680/1000000 [3:46:12<25:52:46,  9.88it/s]

{'loss': Array(0.13857165, dtype=float32), 'loss_cross_entropy': Array(0.13052903, dtype=float32)}
{'loss_inverse': Array(3.7590242e-05, dtype=float32)}


  8%|▊         | 79690/1000000 [3:46:14<30:55:10,  8.27it/s]

{'loss': Array(0.14528334, dtype=float32), 'loss_cross_entropy': Array(0.1364105, dtype=float32)}
{'loss_inverse': Array(4.5474153e-05, dtype=float32)}


  8%|▊         | 79700/1000000 [3:46:15<25:59:46,  9.83it/s]

{'loss': Array(0.15621434, dtype=float32), 'loss_cross_entropy': Array(0.1475488, dtype=float32)}
{'loss_inverse': Array(5.8876885e-06, dtype=float32)}


  8%|▊         | 79710/1000000 [3:46:17<33:27:39,  7.64it/s]

{'loss': Array(0.12675719, dtype=float32), 'loss_cross_entropy': Array(0.11944419, dtype=float32)}
{'loss_inverse': Array(3.8788053e-07, dtype=float32)}


  8%|▊         | 79720/1000000 [3:46:19<26:44:24,  9.56it/s]

{'loss': Array(0.14404309, dtype=float32), 'loss_cross_entropy': Array(0.13542186, dtype=float32)}
{'loss_inverse': Array(1.9935967e-06, dtype=float32)}


  8%|▊         | 79730/1000000 [3:46:20<26:41:33,  9.58it/s]

{'loss': Array(0.15814775, dtype=float32), 'loss_cross_entropy': Array(0.14980565, dtype=float32)}
{'loss_inverse': Array(6.670972e-06, dtype=float32)}


  8%|▊         | 79740/1000000 [3:46:22<28:50:09,  8.86it/s]

{'loss': Array(0.13084546, dtype=float32), 'loss_cross_entropy': Array(0.12367045, dtype=float32)}
{'loss_inverse': Array(3.1587606e-06, dtype=float32)}


  8%|▊         | 79750/1000000 [3:46:23<25:32:19, 10.01it/s]

{'loss': Array(0.14078425, dtype=float32), 'loss_cross_entropy': Array(0.13212688, dtype=float32)}
{'loss_inverse': Array(7.664654e-07, dtype=float32)}


  8%|▊         | 79760/1000000 [3:46:25<34:13:49,  7.47it/s]

{'loss': Array(0.14133728, dtype=float32), 'loss_cross_entropy': Array(0.13370213, dtype=float32)}
{'loss_inverse': Array(1.2281583e-06, dtype=float32)}


  8%|▊         | 79770/1000000 [3:46:26<26:18:34,  9.72it/s]

{'loss': Array(0.15730679, dtype=float32), 'loss_cross_entropy': Array(0.1483597, dtype=float32)}
{'loss_inverse': Array(4.501165e-07, dtype=float32)}


  8%|▊         | 79780/1000000 [3:46:28<25:25:31, 10.05it/s]

{'loss': Array(0.14195903, dtype=float32), 'loss_cross_entropy': Array(0.13381587, dtype=float32)}
{'loss_inverse': Array(7.3729404e-07, dtype=float32)}


  8%|▊         | 79790/1000000 [3:46:30<27:59:02,  9.13it/s]

{'loss': Array(0.16174802, dtype=float32), 'loss_cross_entropy': Array(0.1518598, dtype=float32)}
{'loss_inverse': Array(4.3812284e-07, dtype=float32)}


  8%|▊         | 79800/1000000 [3:46:31<25:33:38, 10.00it/s]

{'loss': Array(0.17748377, dtype=float32), 'loss_cross_entropy': Array(0.16737176, dtype=float32)}
{'loss_inverse': Array(6.116634e-05, dtype=float32)}


  8%|▊         | 79810/1000000 [3:46:33<29:48:37,  8.57it/s]

{'loss': Array(0.16110733, dtype=float32), 'loss_cross_entropy': Array(0.15223831, dtype=float32)}
{'loss_inverse': Array(3.9609287e-05, dtype=float32)}


  8%|▊         | 79820/1000000 [3:46:34<25:57:19,  9.85it/s]

{'loss': Array(0.14412287, dtype=float32), 'loss_cross_entropy': Array(0.1362329, dtype=float32)}
{'loss_inverse': Array(8.36663e-06, dtype=float32)}


  8%|▊         | 79830/1000000 [3:46:36<32:08:45,  7.95it/s]

{'loss': Array(0.13966878, dtype=float32), 'loss_cross_entropy': Array(0.13176288, dtype=float32)}
{'loss_inverse': Array(0.00042701, dtype=float32)}


  8%|▊         | 79840/1000000 [3:46:37<25:50:18,  9.89it/s]

{'loss': Array(0.12227907, dtype=float32), 'loss_cross_entropy': Array(0.11439048, dtype=float32)}
{'loss_inverse': Array(4.3113423e-06, dtype=float32)}


  8%|▊         | 79850/1000000 [3:46:39<25:29:41, 10.03it/s]

{'loss': Array(0.16466573, dtype=float32), 'loss_cross_entropy': Array(0.15519512, dtype=float32)}
{'loss_inverse': Array(0.00010666, dtype=float32)}


  8%|▊         | 79860/1000000 [3:46:41<29:51:32,  8.56it/s]

{'loss': Array(0.1437762, dtype=float32), 'loss_cross_entropy': Array(0.13532971, dtype=float32)}
{'loss_inverse': Array(3.5692854e-06, dtype=float32)}


  8%|▊         | 79870/1000000 [3:46:42<26:08:36,  9.78it/s]

{'loss': Array(0.1580816, dtype=float32), 'loss_cross_entropy': Array(0.14943996, dtype=float32)}
{'loss_inverse': Array(4.8676946e-05, dtype=float32)}


  8%|▊         | 79880/1000000 [3:46:44<33:58:39,  7.52it/s]

{'loss': Array(0.17093742, dtype=float32), 'loss_cross_entropy': Array(0.16185759, dtype=float32)}
{'loss_inverse': Array(7.32941e-06, dtype=float32)}


  8%|▊         | 79890/1000000 [3:46:45<27:07:54,  9.42it/s]

{'loss': Array(0.15648232, dtype=float32), 'loss_cross_entropy': Array(0.1479286, dtype=float32)}
{'loss_inverse': Array(0.00011917, dtype=float32)}


  8%|▊         | 79900/1000000 [3:46:47<39:19:49,  6.50it/s]

{'loss': Array(0.15810369, dtype=float32), 'loss_cross_entropy': Array(0.14919043, dtype=float32)}
{'loss_inverse': Array(7.8778314e-05, dtype=float32)}


  8%|▊         | 79910/1000000 [3:46:48<27:07:29,  9.42it/s]

{'loss': Array(0.1513625, dtype=float32), 'loss_cross_entropy': Array(0.14293313, dtype=float32)}
{'loss_inverse': Array(3.4403718e-06, dtype=float32)}


  8%|▊         | 79920/1000000 [3:46:50<37:38:03,  6.79it/s]

{'loss': Array(0.15659513, dtype=float32), 'loss_cross_entropy': Array(0.14802204, dtype=float32)}
{'loss_inverse': Array(0.00010688, dtype=float32)}


  8%|▊         | 79930/1000000 [3:46:52<27:24:29,  9.32it/s]

{'loss': Array(0.16023967, dtype=float32), 'loss_cross_entropy': Array(0.15112685, dtype=float32)}
{'loss_inverse': Array(4.639038e-05, dtype=float32)}


  8%|▊         | 79940/1000000 [3:46:53<25:42:58,  9.94it/s]

{'loss': Array(0.14665356, dtype=float32), 'loss_cross_entropy': Array(0.13847327, dtype=float32)}
{'loss_inverse': Array(6.886292e-05, dtype=float32)}


  8%|▊         | 79950/1000000 [3:46:55<31:57:36,  8.00it/s]

{'loss': Array(0.15759382, dtype=float32), 'loss_cross_entropy': Array(0.14837565, dtype=float32)}
{'loss_inverse': Array(1.237915e-06, dtype=float32)}


  8%|▊         | 79960/1000000 [3:46:56<26:03:17,  9.81it/s]

{'loss': Array(0.13944803, dtype=float32), 'loss_cross_entropy': Array(0.1320915, dtype=float32)}
{'loss_inverse': Array(8.704077e-05, dtype=float32)}


  8%|▊         | 79970/1000000 [3:46:58<37:56:39,  6.74it/s]

{'loss': Array(0.158514, dtype=float32), 'loss_cross_entropy': Array(0.1499028, dtype=float32)}
{'loss_inverse': Array(3.0020433e-06, dtype=float32)}


  8%|▊         | 79980/1000000 [3:46:59<26:39:08,  9.59it/s]

{'loss': Array(0.14177014, dtype=float32), 'loss_cross_entropy': Array(0.13364054, dtype=float32)}
{'loss_inverse': Array(1.0220846e-06, dtype=float32)}


  8%|▊         | 79990/1000000 [3:47:01<37:46:06,  6.77it/s]

{'loss': Array(0.1378462, dtype=float32), 'loss_cross_entropy': Array(0.12945455, dtype=float32)}
{'loss_inverse': Array(1.910748e-06, dtype=float32)}


  8%|▊         | 80000/1000000 [3:47:03<27:06:51,  9.43it/s]

{'loss': Array(0.13159764, dtype=float32), 'loss_cross_entropy': Array(0.12426541, dtype=float32)}
{'loss_inverse': Array(1.7144068e-06, dtype=float32)}


  8%|▊         | 80010/1000000 [3:47:11<86:04:29,  2.97it/s] 

{'loss': Array(0.13878857, dtype=float32), 'loss_cross_entropy': Array(0.12999743, dtype=float32)}
{'loss_inverse': Array(2.0496882e-05, dtype=float32)}


  8%|▊         | 80020/1000000 [3:47:12<43:53:40,  5.82it/s]

{'loss': Array(0.13145475, dtype=float32), 'loss_cross_entropy': Array(0.12316938, dtype=float32)}
{'loss_inverse': Array(8.366949e-06, dtype=float32)}


  8%|▊         | 80030/1000000 [3:47:14<28:24:11,  9.00it/s]

{'loss': Array(0.13183203, dtype=float32), 'loss_cross_entropy': Array(0.12430085, dtype=float32)}
{'loss_inverse': Array(7.1809504e-06, dtype=float32)}


  8%|▊         | 80040/1000000 [3:47:15<25:27:27, 10.04it/s]

{'loss': Array(0.16566566, dtype=float32), 'loss_cross_entropy': Array(0.1561106, dtype=float32)}
{'loss_inverse': Array(3.2638396e-05, dtype=float32)}


  8%|▊         | 80050/1000000 [3:47:17<28:11:16,  9.07it/s]

{'loss': Array(0.14376031, dtype=float32), 'loss_cross_entropy': Array(0.13518938, dtype=float32)}
{'loss_inverse': Array(6.9189485e-07, dtype=float32)}


  8%|▊         | 80060/1000000 [3:47:18<25:20:30, 10.08it/s]

{'loss': Array(0.16915591, dtype=float32), 'loss_cross_entropy': Array(0.159671, dtype=float32)}
{'loss_inverse': Array(2.2812974e-06, dtype=float32)}


  8%|▊         | 80070/1000000 [3:47:20<29:08:06,  8.77it/s]

{'loss': Array(0.13933666, dtype=float32), 'loss_cross_entropy': Array(0.1313086, dtype=float32)}
{'loss_inverse': Array(4.115027e-05, dtype=float32)}


  8%|▊         | 80080/1000000 [3:47:21<25:33:04, 10.00it/s]

{'loss': Array(0.14148587, dtype=float32), 'loss_cross_entropy': Array(0.13365446, dtype=float32)}
{'loss_inverse': Array(3.4999488e-07, dtype=float32)}


  8%|▊         | 80090/1000000 [3:47:23<31:56:25,  8.00it/s]

{'loss': Array(0.16656049, dtype=float32), 'loss_cross_entropy': Array(0.15743573, dtype=float32)}
{'loss_inverse': Array(7.418518e-08, dtype=float32)}


  8%|▊         | 80100/1000000 [3:47:25<26:02:25,  9.81it/s]

{'loss': Array(0.1418945, dtype=float32), 'loss_cross_entropy': Array(0.1335579, dtype=float32)}
{'loss_inverse': Array(1.748e-06, dtype=float32)}


  8%|▊         | 80110/1000000 [3:47:26<24:55:08, 10.25it/s]

{'loss': Array(0.15497941, dtype=float32), 'loss_cross_entropy': Array(0.14696787, dtype=float32)}
{'loss_inverse': Array(0.00018342, dtype=float32)}


  8%|▊         | 80120/1000000 [3:47:28<28:24:58,  8.99it/s]

{'loss': Array(0.14640175, dtype=float32), 'loss_cross_entropy': Array(0.13789229, dtype=float32)}
{'loss_inverse': Array(7.1441207e-07, dtype=float32)}


  8%|▊         | 80130/1000000 [3:47:29<25:39:43,  9.96it/s]

{'loss': Array(0.14670846, dtype=float32), 'loss_cross_entropy': Array(0.1387837, dtype=float32)}
{'loss_inverse': Array(5.9825845e-05, dtype=float32)}


  8%|▊         | 80140/1000000 [3:47:31<29:29:36,  8.66it/s]

{'loss': Array(0.14875664, dtype=float32), 'loss_cross_entropy': Array(0.13938572, dtype=float32)}
{'loss_inverse': Array(2.9062583e-06, dtype=float32)}


  8%|▊         | 80150/1000000 [3:47:32<25:53:31,  9.87it/s]

{'loss': Array(0.15271427, dtype=float32), 'loss_cross_entropy': Array(0.1443103, dtype=float32)}
{'loss_inverse': Array(0.00012397, dtype=float32)}


  8%|▊         | 80160/1000000 [3:47:34<32:01:04,  7.98it/s]

{'loss': Array(0.13332711, dtype=float32), 'loss_cross_entropy': Array(0.12537192, dtype=float32)}
{'loss_inverse': Array(0.00051978, dtype=float32)}


  8%|▊         | 80170/1000000 [3:47:36<26:49:27,  9.53it/s]

{'loss': Array(0.16707316, dtype=float32), 'loss_cross_entropy': Array(0.15792029, dtype=float32)}
{'loss_inverse': Array(2.0033433e-05, dtype=float32)}


  8%|▊         | 80180/1000000 [3:47:37<25:04:31, 10.19it/s]

{'loss': Array(0.16343181, dtype=float32), 'loss_cross_entropy': Array(0.15463583, dtype=float32)}
{'loss_inverse': Array(2.4006327e-05, dtype=float32)}


  8%|▊         | 80190/1000000 [3:47:39<29:15:44,  8.73it/s]

{'loss': Array(0.14570846, dtype=float32), 'loss_cross_entropy': Array(0.1370402, dtype=float32)}
{'loss_inverse': Array(1.9856964e-06, dtype=float32)}


  8%|▊         | 80200/1000000 [3:47:40<25:40:46,  9.95it/s]

{'loss': Array(0.15259066, dtype=float32), 'loss_cross_entropy': Array(0.14378205, dtype=float32)}
{'loss_inverse': Array(4.936773e-06, dtype=float32)}


  8%|▊         | 80210/1000000 [3:47:42<31:34:32,  8.09it/s]

{'loss': Array(0.12316703, dtype=float32), 'loss_cross_entropy': Array(0.11598378, dtype=float32)}
{'loss_inverse': Array(1.381747e-05, dtype=float32)}


  8%|▊         | 80220/1000000 [3:47:43<26:00:12,  9.83it/s]

{'loss': Array(0.13834469, dtype=float32), 'loss_cross_entropy': Array(0.13021345, dtype=float32)}
{'loss_inverse': Array(2.4040452e-05, dtype=float32)}


  8%|▊         | 80230/1000000 [3:47:45<34:37:47,  7.38it/s]

{'loss': Array(0.14938833, dtype=float32), 'loss_cross_entropy': Array(0.14016731, dtype=float32)}
{'loss_inverse': Array(8.932141e-07, dtype=float32)}


  8%|▊         | 80240/1000000 [3:47:47<26:38:19,  9.59it/s]

{'loss': Array(0.14814082, dtype=float32), 'loss_cross_entropy': Array(0.13949884, dtype=float32)}
{'loss_inverse': Array(0.0001918, dtype=float32)}


  8%|▊         | 80250/1000000 [3:47:48<25:09:54, 10.15it/s]

{'loss': Array(0.132603, dtype=float32), 'loss_cross_entropy': Array(0.1251553, dtype=float32)}
{'loss_inverse': Array(0.00015218, dtype=float32)}


  8%|▊         | 80260/1000000 [3:47:50<29:28:59,  8.67it/s]

{'loss': Array(0.13662006, dtype=float32), 'loss_cross_entropy': Array(0.12888658, dtype=float32)}
{'loss_inverse': Array(2.1544958e-05, dtype=float32)}


  8%|▊         | 80270/1000000 [3:47:51<25:32:50, 10.00it/s]

{'loss': Array(0.15281, dtype=float32), 'loss_cross_entropy': Array(0.14383042, dtype=float32)}
{'loss_inverse': Array(1.0010707e-05, dtype=float32)}


  8%|▊         | 80280/1000000 [3:47:53<33:59:44,  7.52it/s]

{'loss': Array(0.14957027, dtype=float32), 'loss_cross_entropy': Array(0.14079843, dtype=float32)}
{'loss_inverse': Array(3.2160628e-05, dtype=float32)}


  8%|▊         | 80290/1000000 [3:47:54<26:26:32,  9.66it/s]

{'loss': Array(0.14515084, dtype=float32), 'loss_cross_entropy': Array(0.13711129, dtype=float32)}
{'loss_inverse': Array(1.9192817e-05, dtype=float32)}


  8%|▊         | 80300/1000000 [3:47:56<25:20:32, 10.08it/s]

{'loss': Array(0.13793924, dtype=float32), 'loss_cross_entropy': Array(0.12982647, dtype=float32)}
{'loss_inverse': Array(5.6170694e-07, dtype=float32)}


  8%|▊         | 80310/1000000 [3:47:57<28:16:02,  9.04it/s]

{'loss': Array(0.15671709, dtype=float32), 'loss_cross_entropy': Array(0.14817174, dtype=float32)}
{'loss_inverse': Array(1.7102544e-05, dtype=float32)}


  8%|▊         | 80320/1000000 [3:47:59<25:36:06,  9.98it/s]

{'loss': Array(0.14113744, dtype=float32), 'loss_cross_entropy': Array(0.1333696, dtype=float32)}
{'loss_inverse': Array(6.101424e-05, dtype=float32)}


  8%|▊         | 80330/1000000 [3:48:01<29:29:55,  8.66it/s]

{'loss': Array(0.16007364, dtype=float32), 'loss_cross_entropy': Array(0.15063249, dtype=float32)}
{'loss_inverse': Array(1.28678075e-05, dtype=float32)}


  8%|▊         | 80340/1000000 [3:48:02<26:06:07,  9.79it/s]

{'loss': Array(0.15049975, dtype=float32), 'loss_cross_entropy': Array(0.14160743, dtype=float32)}
{'loss_inverse': Array(9.703701e-05, dtype=float32)}


  8%|▊         | 80350/1000000 [3:48:04<31:31:26,  8.10it/s]

{'loss': Array(0.13948573, dtype=float32), 'loss_cross_entropy': Array(0.13139041, dtype=float32)}
{'loss_inverse': Array(8.735866e-08, dtype=float32)}


  8%|▊         | 80360/1000000 [3:48:05<26:23:47,  9.68it/s]

{'loss': Array(0.12686329, dtype=float32), 'loss_cross_entropy': Array(0.11947855, dtype=float32)}
{'loss_inverse': Array(2.886408e-07, dtype=float32)}


  8%|▊         | 80370/1000000 [3:48:07<26:08:04,  9.77it/s]

{'loss': Array(0.13918304, dtype=float32), 'loss_cross_entropy': Array(0.13053633, dtype=float32)}
{'loss_inverse': Array(1.6881157e-05, dtype=float32)}


  8%|▊         | 80380/1000000 [3:48:09<30:28:51,  8.38it/s]

{'loss': Array(0.15220682, dtype=float32), 'loss_cross_entropy': Array(0.14384122, dtype=float32)}
{'loss_inverse': Array(2.997295e-06, dtype=float32)}


  8%|▊         | 80390/1000000 [3:48:10<25:51:58,  9.88it/s]

{'loss': Array(0.15758882, dtype=float32), 'loss_cross_entropy': Array(0.14909357, dtype=float32)}
{'loss_inverse': Array(9.834947e-07, dtype=float32)}


  8%|▊         | 80400/1000000 [3:48:12<34:07:59,  7.48it/s]

{'loss': Array(0.12782489, dtype=float32), 'loss_cross_entropy': Array(0.12038546, dtype=float32)}
{'loss_inverse': Array(0.00014965, dtype=float32)}


  8%|▊         | 80410/1000000 [3:48:13<26:21:58,  9.69it/s]

{'loss': Array(0.13916916, dtype=float32), 'loss_cross_entropy': Array(0.1310693, dtype=float32)}
{'loss_inverse': Array(1.112496e-06, dtype=float32)}


  8%|▊         | 80420/1000000 [3:48:15<38:40:27,  6.60it/s]

{'loss': Array(0.14427426, dtype=float32), 'loss_cross_entropy': Array(0.13500634, dtype=float32)}
{'loss_inverse': Array(2.2883462e-06, dtype=float32)}


  8%|▊         | 80430/1000000 [3:48:16<27:30:21,  9.29it/s]

{'loss': Array(0.13714188, dtype=float32), 'loss_cross_entropy': Array(0.1298541, dtype=float32)}
{'loss_inverse': Array(1.1072347e-07, dtype=float32)}


  8%|▊         | 80440/1000000 [3:48:18<38:43:59,  6.59it/s]

{'loss': Array(0.14616649, dtype=float32), 'loss_cross_entropy': Array(0.13793492, dtype=float32)}
{'loss_inverse': Array(2.5062316e-06, dtype=float32)}


  8%|▊         | 80450/1000000 [3:48:19<27:07:22,  9.42it/s]

{'loss': Array(0.14261757, dtype=float32), 'loss_cross_entropy': Array(0.13418017, dtype=float32)}
{'loss_inverse': Array(9.205216e-07, dtype=float32)}


  8%|▊         | 80460/1000000 [3:48:21<25:12:04, 10.14it/s]

{'loss': Array(0.15534447, dtype=float32), 'loss_cross_entropy': Array(0.14680454, dtype=float32)}
{'loss_inverse': Array(2.7627815e-05, dtype=float32)}


  8%|▊         | 80470/1000000 [3:48:23<31:36:46,  8.08it/s]

{'loss': Array(0.13325219, dtype=float32), 'loss_cross_entropy': Array(0.12525737, dtype=float32)}
{'loss_inverse': Array(0.00019899, dtype=float32)}


  8%|▊         | 80480/1000000 [3:48:24<26:29:31,  9.64it/s]

{'loss': Array(0.14686345, dtype=float32), 'loss_cross_entropy': Array(0.13893688, dtype=float32)}
{'loss_inverse': Array(6.944658e-06, dtype=float32)}


  8%|▊         | 80490/1000000 [3:48:26<37:43:23,  6.77it/s]

{'loss': Array(0.14890248, dtype=float32), 'loss_cross_entropy': Array(0.14058803, dtype=float32)}
{'loss_inverse': Array(1.4646622e-05, dtype=float32)}


  8%|▊         | 80500/1000000 [3:48:27<26:53:36,  9.50it/s]

{'loss': Array(0.13753764, dtype=float32), 'loss_cross_entropy': Array(0.12937365, dtype=float32)}
{'loss_inverse': Array(4.538534e-05, dtype=float32)}


  8%|▊         | 80510/1000000 [3:48:35<97:05:59,  2.63it/s] 

{'loss': Array(0.13954294, dtype=float32), 'loss_cross_entropy': Array(0.13199008, dtype=float32)}
{'loss_inverse': Array(4.8145666e-06, dtype=float32)}


  8%|▊         | 80520/1000000 [3:48:37<36:54:18,  6.92it/s] 

{'loss': Array(0.13572572, dtype=float32), 'loss_cross_entropy': Array(0.12780665, dtype=float32)}
{'loss_inverse': Array(7.329556e-06, dtype=float32)}


  8%|▊         | 80530/1000000 [3:48:38<26:57:27,  9.47it/s]

{'loss': Array(0.13579214, dtype=float32), 'loss_cross_entropy': Array(0.1284948, dtype=float32)}
{'loss_inverse': Array(5.949123e-06, dtype=float32)}


  8%|▊         | 80540/1000000 [3:48:40<29:58:25,  8.52it/s]

{'loss': Array(0.14552578, dtype=float32), 'loss_cross_entropy': Array(0.13739415, dtype=float32)}
{'loss_inverse': Array(9.3623487e-07, dtype=float32)}


  8%|▊         | 80550/1000000 [3:48:41<26:02:13,  9.81it/s]

{'loss': Array(0.13890614, dtype=float32), 'loss_cross_entropy': Array(0.13148312, dtype=float32)}
{'loss_inverse': Array(1.40554575e-05, dtype=float32)}


  8%|▊         | 80560/1000000 [3:48:43<37:31:02,  6.81it/s]

{'loss': Array(0.1385372, dtype=float32), 'loss_cross_entropy': Array(0.12995735, dtype=float32)}
{'loss_inverse': Array(0.00023657, dtype=float32)}


  8%|▊         | 80570/1000000 [3:48:45<26:58:34,  9.47it/s]

{'loss': Array(0.133129, dtype=float32), 'loss_cross_entropy': Array(0.12520413, dtype=float32)}
{'loss_inverse': Array(7.8331743e-07, dtype=float32)}


  8%|▊         | 80580/1000000 [3:48:46<25:44:13,  9.92it/s]

{'loss': Array(0.14914306, dtype=float32), 'loss_cross_entropy': Array(0.14016874, dtype=float32)}
{'loss_inverse': Array(4.826553e-05, dtype=float32)}


  8%|▊         | 80590/1000000 [3:48:48<30:05:46,  8.49it/s]

{'loss': Array(0.13598256, dtype=float32), 'loss_cross_entropy': Array(0.12817381, dtype=float32)}
{'loss_inverse': Array(1.664642e-05, dtype=float32)}


  8%|▊         | 80600/1000000 [3:48:49<25:45:06,  9.92it/s]

{'loss': Array(0.15248403, dtype=float32), 'loss_cross_entropy': Array(0.14357336, dtype=float32)}
{'loss_inverse': Array(1.0382307e-06, dtype=float32)}


  8%|▊         | 80610/1000000 [3:48:51<29:18:47,  8.71it/s]

{'loss': Array(0.14218964, dtype=float32), 'loss_cross_entropy': Array(0.13395868, dtype=float32)}
{'loss_inverse': Array(3.4202992e-06, dtype=float32)}


  8%|▊         | 80620/1000000 [3:48:52<26:09:58,  9.76it/s]

{'loss': Array(0.12991273, dtype=float32), 'loss_cross_entropy': Array(0.12235955, dtype=float32)}
{'loss_inverse': Array(5.393194e-06, dtype=float32)}


  8%|▊         | 80630/1000000 [3:48:54<31:46:56,  8.04it/s]

{'loss': Array(0.15017287, dtype=float32), 'loss_cross_entropy': Array(0.14125675, dtype=float32)}
{'loss_inverse': Array(2.1784072e-06, dtype=float32)}


  8%|▊         | 80640/1000000 [3:48:56<26:34:49,  9.61it/s]

{'loss': Array(0.1436821, dtype=float32), 'loss_cross_entropy': Array(0.13585357, dtype=float32)}
{'loss_inverse': Array(9.097514e-06, dtype=float32)}


  8%|▊         | 80650/1000000 [3:48:57<25:06:40, 10.17it/s]

{'loss': Array(0.14963883, dtype=float32), 'loss_cross_entropy': Array(0.14140074, dtype=float32)}
{'loss_inverse': Array(8.115589e-07, dtype=float32)}


  8%|▊         | 80660/1000000 [3:48:59<28:23:45,  8.99it/s]

{'loss': Array(0.1534496, dtype=float32), 'loss_cross_entropy': Array(0.14510684, dtype=float32)}
{'loss_inverse': Array(8.258633e-09, dtype=float32)}


  8%|▊         | 80670/1000000 [3:49:00<25:47:12,  9.90it/s]

{'loss': Array(0.14719318, dtype=float32), 'loss_cross_entropy': Array(0.1389817, dtype=float32)}
{'loss_inverse': Array(2.0202674e-06, dtype=float32)}


  8%|▊         | 80680/1000000 [3:49:02<29:26:20,  8.67it/s]

{'loss': Array(0.14098705, dtype=float32), 'loss_cross_entropy': Array(0.13272284, dtype=float32)}
{'loss_inverse': Array(0.00029174, dtype=float32)}


  8%|▊         | 80690/1000000 [3:49:03<25:35:15,  9.98it/s]

{'loss': Array(0.14876477, dtype=float32), 'loss_cross_entropy': Array(0.14020252, dtype=float32)}
{'loss_inverse': Array(2.3578791e-07, dtype=float32)}


  8%|▊         | 80700/1000000 [3:49:05<31:41:46,  8.06it/s]

{'loss': Array(0.1441889, dtype=float32), 'loss_cross_entropy': Array(0.1352729, dtype=float32)}
{'loss_inverse': Array(2.527429e-06, dtype=float32)}


  8%|▊         | 80710/1000000 [3:49:07<27:01:52,  9.45it/s]

{'loss': Array(0.13419095, dtype=float32), 'loss_cross_entropy': Array(0.12593699, dtype=float32)}
{'loss_inverse': Array(1.3194715e-05, dtype=float32)}


  8%|▊         | 80720/1000000 [3:49:08<26:56:01,  9.48it/s]

{'loss': Array(0.15505862, dtype=float32), 'loss_cross_entropy': Array(0.14620732, dtype=float32)}
{'loss_inverse': Array(7.4773775e-06, dtype=float32)}


  8%|▊         | 80730/1000000 [3:49:10<30:25:57,  8.39it/s]

{'loss': Array(0.1621661, dtype=float32), 'loss_cross_entropy': Array(0.15340231, dtype=float32)}
{'loss_inverse': Array(1.27937e-07, dtype=float32)}


  8%|▊         | 80740/1000000 [3:49:11<26:53:26,  9.50it/s]

{'loss': Array(0.13398132, dtype=float32), 'loss_cross_entropy': Array(0.12606469, dtype=float32)}
{'loss_inverse': Array(4.5510137e-07, dtype=float32)}


  8%|▊         | 80750/1000000 [3:49:13<32:32:50,  7.85it/s]

{'loss': Array(0.15896194, dtype=float32), 'loss_cross_entropy': Array(0.15024458, dtype=float32)}
{'loss_inverse': Array(2.0118239e-07, dtype=float32)}


  8%|▊         | 80760/1000000 [3:49:15<26:32:38,  9.62it/s]

{'loss': Array(0.15489647, dtype=float32), 'loss_cross_entropy': Array(0.14624284, dtype=float32)}
{'loss_inverse': Array(3.0362398e-06, dtype=float32)}


  8%|▊         | 80770/1000000 [3:49:17<34:59:27,  7.30it/s]

{'loss': Array(0.15241538, dtype=float32), 'loss_cross_entropy': Array(0.14304969, dtype=float32)}
{'loss_inverse': Array(7.957603e-05, dtype=float32)}


  8%|▊         | 80780/1000000 [3:49:18<27:03:42,  9.44it/s]

{'loss': Array(0.15099062, dtype=float32), 'loss_cross_entropy': Array(0.14204296, dtype=float32)}
{'loss_inverse': Array(9.542454e-07, dtype=float32)}


  8%|▊         | 80790/1000000 [3:49:19<25:00:21, 10.21it/s]

{'loss': Array(0.13812187, dtype=float32), 'loss_cross_entropy': Array(0.12991533, dtype=float32)}
{'loss_inverse': Array(0.00032758, dtype=float32)}


  8%|▊         | 80800/1000000 [3:49:21<27:33:42,  9.26it/s]

{'loss': Array(0.15001586, dtype=float32), 'loss_cross_entropy': Array(0.14101215, dtype=float32)}
{'loss_inverse': Array(2.392632e-06, dtype=float32)}


  8%|▊         | 80810/1000000 [3:49:23<25:41:59,  9.94it/s]

{'loss': Array(0.14685956, dtype=float32), 'loss_cross_entropy': Array(0.13882564, dtype=float32)}
{'loss_inverse': Array(6.792917e-07, dtype=float32)}


  8%|▊         | 80820/1000000 [3:49:24<34:11:50,  7.47it/s]

{'loss': Array(0.13949439, dtype=float32), 'loss_cross_entropy': Array(0.13188939, dtype=float32)}
{'loss_inverse': Array(1.1446928e-05, dtype=float32)}


  8%|▊         | 80830/1000000 [3:49:26<26:57:18,  9.47it/s]

{'loss': Array(0.15248998, dtype=float32), 'loss_cross_entropy': Array(0.14342457, dtype=float32)}
{'loss_inverse': Array(6.0087816e-05, dtype=float32)}


  8%|▊         | 80840/1000000 [3:49:27<25:00:22, 10.21it/s]

{'loss': Array(0.14061703, dtype=float32), 'loss_cross_entropy': Array(0.13320665, dtype=float32)}
{'loss_inverse': Array(5.6248286e-05, dtype=float32)}


  8%|▊         | 80850/1000000 [3:49:29<29:20:33,  8.70it/s]

{'loss': Array(0.14564951, dtype=float32), 'loss_cross_entropy': Array(0.13807398, dtype=float32)}
{'loss_inverse': Array(1.601665e-07, dtype=float32)}


  8%|▊         | 80860/1000000 [3:49:30<25:26:25, 10.04it/s]

{'loss': Array(0.15358862, dtype=float32), 'loss_cross_entropy': Array(0.14550559, dtype=float32)}
{'loss_inverse': Array(2.1751016e-07, dtype=float32)}


  8%|▊         | 80870/1000000 [3:49:32<30:55:20,  8.26it/s]

{'loss': Array(0.14442793, dtype=float32), 'loss_cross_entropy': Array(0.1360944, dtype=float32)}
{'loss_inverse': Array(1.5896061e-06, dtype=float32)}


  8%|▊         | 80880/1000000 [3:49:33<25:51:58,  9.87it/s]

{'loss': Array(0.15634392, dtype=float32), 'loss_cross_entropy': Array(0.14725037, dtype=float32)}
{'loss_inverse': Array(2.1257179e-06, dtype=float32)}


  8%|▊         | 80890/1000000 [3:49:35<34:13:07,  7.46it/s]

{'loss': Array(0.13786782, dtype=float32), 'loss_cross_entropy': Array(0.12973006, dtype=float32)}
{'loss_inverse': Array(2.9019014e-07, dtype=float32)}


  8%|▊         | 80900/1000000 [3:49:37<26:51:25,  9.51it/s]

{'loss': Array(0.1523893, dtype=float32), 'loss_cross_entropy': Array(0.14378999, dtype=float32)}
{'loss_inverse': Array(4.0160313e-07, dtype=float32)}


  8%|▊         | 80910/1000000 [3:49:38<25:46:08,  9.91it/s]

{'loss': Array(0.1363299, dtype=float32), 'loss_cross_entropy': Array(0.12849663, dtype=float32)}
{'loss_inverse': Array(1.054146e-06, dtype=float32)}


  8%|▊         | 80920/1000000 [3:49:40<29:40:12,  8.60it/s]

{'loss': Array(0.1436619, dtype=float32), 'loss_cross_entropy': Array(0.13589944, dtype=float32)}
{'loss_inverse': Array(1.5063799e-07, dtype=float32)}


  8%|▊         | 80930/1000000 [3:49:41<25:50:49,  9.88it/s]

{'loss': Array(0.13746609, dtype=float32), 'loss_cross_entropy': Array(0.12935956, dtype=float32)}
{'loss_inverse': Array(3.278794e-05, dtype=float32)}


  8%|▊         | 80940/1000000 [3:49:43<34:25:42,  7.42it/s]

{'loss': Array(0.14033441, dtype=float32), 'loss_cross_entropy': Array(0.13202868, dtype=float32)}
{'loss_inverse': Array(1.394549e-07, dtype=float32)}


  8%|▊         | 80950/1000000 [3:49:44<26:25:03,  9.66it/s]

{'loss': Array(0.13636728, dtype=float32), 'loss_cross_entropy': Array(0.1287251, dtype=float32)}
{'loss_inverse': Array(1.28357615e-05, dtype=float32)}


  8%|▊         | 80960/1000000 [3:49:46<37:45:02,  6.76it/s]

{'loss': Array(0.13928887, dtype=float32), 'loss_cross_entropy': Array(0.13085505, dtype=float32)}
{'loss_inverse': Array(3.792682e-07, dtype=float32)}


  8%|▊         | 80970/1000000 [3:49:48<27:50:51,  9.17it/s]

{'loss': Array(0.1341056, dtype=float32), 'loss_cross_entropy': Array(0.12626646, dtype=float32)}
{'loss_inverse': Array(8.886226e-05, dtype=float32)}


  8%|▊         | 80980/1000000 [3:49:49<25:13:22, 10.12it/s]

{'loss': Array(0.16572212, dtype=float32), 'loss_cross_entropy': Array(0.15722452, dtype=float32)}
{'loss_inverse': Array(5.8876416e-05, dtype=float32)}


  8%|▊         | 80990/1000000 [3:49:51<28:19:12,  9.01it/s]

{'loss': Array(0.15827037, dtype=float32), 'loss_cross_entropy': Array(0.14927882, dtype=float32)}
{'loss_inverse': Array(2.2410766e-06, dtype=float32)}


  8%|▊         | 81000/1000000 [3:49:52<25:19:10, 10.08it/s]

{'loss': Array(0.1486145, dtype=float32), 'loss_cross_entropy': Array(0.1406507, dtype=float32)}
{'loss_inverse': Array(5.4280736e-05, dtype=float32)}


  8%|▊         | 81010/1000000 [3:50:00<88:50:47,  2.87it/s] 

{'loss': Array(0.1457424, dtype=float32), 'loss_cross_entropy': Array(0.13736632, dtype=float32)}
{'loss_inverse': Array(0.0003617, dtype=float32)}


  8%|▊         | 81020/1000000 [3:50:02<35:30:46,  7.19it/s]

{'loss': Array(0.15517588, dtype=float32), 'loss_cross_entropy': Array(0.14663666, dtype=float32)}
{'loss_inverse': Array(4.5774053e-05, dtype=float32)}


  8%|▊         | 81030/1000000 [3:50:03<39:01:52,  6.54it/s]

{'loss': Array(0.15197991, dtype=float32), 'loss_cross_entropy': Array(0.14310381, dtype=float32)}
{'loss_inverse': Array(7.3537485e-05, dtype=float32)}


  8%|▊         | 81040/1000000 [3:50:05<27:50:51,  9.17it/s]

{'loss': Array(0.14174543, dtype=float32), 'loss_cross_entropy': Array(0.13323309, dtype=float32)}
{'loss_inverse': Array(4.8626673e-05, dtype=float32)}


  8%|▊         | 81050/1000000 [3:50:06<26:52:16,  9.50it/s]

{'loss': Array(0.1403244, dtype=float32), 'loss_cross_entropy': Array(0.13213845, dtype=float32)}
{'loss_inverse': Array(2.355359e-05, dtype=float32)}


  8%|▊         | 81060/1000000 [3:50:08<30:06:04,  8.48it/s]

{'loss': Array(0.1440656, dtype=float32), 'loss_cross_entropy': Array(0.13577303, dtype=float32)}
{'loss_inverse': Array(2.1700947e-05, dtype=float32)}


  8%|▊         | 81070/1000000 [3:50:10<25:55:38,  9.85it/s]

{'loss': Array(0.16194876, dtype=float32), 'loss_cross_entropy': Array(0.1528035, dtype=float32)}
{'loss_inverse': Array(1.7338054e-06, dtype=float32)}


  8%|▊         | 81080/1000000 [3:50:11<31:19:58,  8.15it/s]

{'loss': Array(0.13828011, dtype=float32), 'loss_cross_entropy': Array(0.13036604, dtype=float32)}
{'loss_inverse': Array(7.198548e-07, dtype=float32)}


  8%|▊         | 81090/1000000 [3:50:13<25:52:41,  9.86it/s]

{'loss': Array(0.15099238, dtype=float32), 'loss_cross_entropy': Array(0.14235777, dtype=float32)}
{'loss_inverse': Array(1.9467298e-06, dtype=float32)}


  8%|▊         | 81100/1000000 [3:50:15<32:07:36,  7.95it/s]

{'loss': Array(0.15171956, dtype=float32), 'loss_cross_entropy': Array(0.14292984, dtype=float32)}
{'loss_inverse': Array(3.4116047e-07, dtype=float32)}


  8%|▊         | 81110/1000000 [3:50:16<25:58:03,  9.83it/s]

{'loss': Array(0.12758349, dtype=float32), 'loss_cross_entropy': Array(0.12085284, dtype=float32)}
{'loss_inverse': Array(4.335535e-05, dtype=float32)}


  8%|▊         | 81120/1000000 [3:50:17<26:03:22,  9.80it/s]

{'loss': Array(0.13969353, dtype=float32), 'loss_cross_entropy': Array(0.13097477, dtype=float32)}
{'loss_inverse': Array(6.322302e-07, dtype=float32)}


  8%|▊         | 81130/1000000 [3:50:19<28:58:39,  8.81it/s]

{'loss': Array(0.1233902, dtype=float32), 'loss_cross_entropy': Array(0.11581387, dtype=float32)}
{'loss_inverse': Array(2.2187092e-05, dtype=float32)}


  8%|▊         | 81140/1000000 [3:50:21<26:08:11,  9.77it/s]

{'loss': Array(0.13734339, dtype=float32), 'loss_cross_entropy': Array(0.12979217, dtype=float32)}
{'loss_inverse': Array(6.2325445e-05, dtype=float32)}


  8%|▊         | 81150/1000000 [3:50:22<29:24:49,  8.68it/s]

{'loss': Array(0.13380542, dtype=float32), 'loss_cross_entropy': Array(0.12605533, dtype=float32)}
{'loss_inverse': Array(2.522303e-07, dtype=float32)}


  8%|▊         | 81160/1000000 [3:50:24<26:22:22,  9.68it/s]

{'loss': Array(0.13740148, dtype=float32), 'loss_cross_entropy': Array(0.12978105, dtype=float32)}
{'loss_inverse': Array(5.691415e-06, dtype=float32)}


  8%|▊         | 81170/1000000 [3:50:26<30:24:08,  8.40it/s]

{'loss': Array(0.15169711, dtype=float32), 'loss_cross_entropy': Array(0.14339547, dtype=float32)}
{'loss_inverse': Array(1.7398937e-05, dtype=float32)}


  8%|▊         | 81180/1000000 [3:50:27<26:01:59,  9.80it/s]

{'loss': Array(0.15019386, dtype=float32), 'loss_cross_entropy': Array(0.14147556, dtype=float32)}
{'loss_inverse': Array(7.034991e-05, dtype=float32)}


  8%|▊         | 81190/1000000 [3:50:29<29:59:46,  8.51it/s]

{'loss': Array(0.14464612, dtype=float32), 'loss_cross_entropy': Array(0.13611177, dtype=float32)}
{'loss_inverse': Array(8.833949e-06, dtype=float32)}


  8%|▊         | 81200/1000000 [3:50:30<25:38:18,  9.95it/s]

{'loss': Array(0.12990634, dtype=float32), 'loss_cross_entropy': Array(0.12140162, dtype=float32)}
{'loss_inverse': Array(1.8939949e-06, dtype=float32)}


  8%|▊         | 81210/1000000 [3:50:32<35:03:29,  7.28it/s]

{'loss': Array(0.14509249, dtype=float32), 'loss_cross_entropy': Array(0.13663603, dtype=float32)}
{'loss_inverse': Array(1.703494e-05, dtype=float32)}


  8%|▊         | 81220/1000000 [3:50:34<26:38:19,  9.58it/s]

{'loss': Array(0.13136496, dtype=float32), 'loss_cross_entropy': Array(0.12336161, dtype=float32)}
{'loss_inverse': Array(7.2072735e-06, dtype=float32)}


  8%|▊         | 81230/1000000 [3:50:35<37:55:52,  6.73it/s]

{'loss': Array(0.1495721, dtype=float32), 'loss_cross_entropy': Array(0.14157592, dtype=float32)}
{'loss_inverse': Array(5.723457e-06, dtype=float32)}


  8%|▊         | 81240/1000000 [3:50:37<27:13:09,  9.38it/s]

{'loss': Array(0.16904332, dtype=float32), 'loss_cross_entropy': Array(0.15948443, dtype=float32)}
{'loss_inverse': Array(5.8722544e-05, dtype=float32)}


  8%|▊         | 81250/1000000 [3:50:38<34:36:40,  7.37it/s]

{'loss': Array(0.14261411, dtype=float32), 'loss_cross_entropy': Array(0.13473678, dtype=float32)}
{'loss_inverse': Array(2.2011604e-06, dtype=float32)}


  8%|▊         | 81260/1000000 [3:50:40<26:37:35,  9.58it/s]

{'loss': Array(0.15044776, dtype=float32), 'loss_cross_entropy': Array(0.14250095, dtype=float32)}
{'loss_inverse': Array(0.00035192, dtype=float32)}


  8%|▊         | 81270/1000000 [3:50:41<25:36:18,  9.97it/s]

{'loss': Array(0.1495791, dtype=float32), 'loss_cross_entropy': Array(0.14148413, dtype=float32)}
{'loss_inverse': Array(4.269531e-06, dtype=float32)}


  8%|▊         | 81280/1000000 [3:50:43<29:28:27,  8.66it/s]

{'loss': Array(0.13731496, dtype=float32), 'loss_cross_entropy': Array(0.1291621, dtype=float32)}
{'loss_inverse': Array(5.756752e-05, dtype=float32)}


  8%|▊         | 81290/1000000 [3:50:45<25:56:21,  9.84it/s]

{'loss': Array(0.13949063, dtype=float32), 'loss_cross_entropy': Array(0.13133818, dtype=float32)}
{'loss_inverse': Array(2.3276549e-05, dtype=float32)}


  8%|▊         | 81300/1000000 [3:50:46<30:55:28,  8.25it/s]

{'loss': Array(0.14706466, dtype=float32), 'loss_cross_entropy': Array(0.13822511, dtype=float32)}
{'loss_inverse': Array(3.287561e-05, dtype=float32)}


  8%|▊         | 81310/1000000 [3:50:48<25:42:37,  9.93it/s]

{'loss': Array(0.14854293, dtype=float32), 'loss_cross_entropy': Array(0.14058352, dtype=float32)}
{'loss_inverse': Array(2.7125285e-05, dtype=float32)}


  8%|▊         | 81320/1000000 [3:50:49<33:42:46,  7.57it/s]

{'loss': Array(0.14048384, dtype=float32), 'loss_cross_entropy': Array(0.13216506, dtype=float32)}
{'loss_inverse': Array(4.2792537e-05, dtype=float32)}


  8%|▊         | 81330/1000000 [3:50:51<26:19:22,  9.69it/s]

{'loss': Array(0.15151536, dtype=float32), 'loss_cross_entropy': Array(0.14272664, dtype=float32)}
{'loss_inverse': Array(6.4062197e-06, dtype=float32)}


  8%|▊         | 81340/1000000 [3:50:53<33:38:19,  7.59it/s]

{'loss': Array(0.15440217, dtype=float32), 'loss_cross_entropy': Array(0.1460378, dtype=float32)}
{'loss_inverse': Array(6.225734e-05, dtype=float32)}


  8%|▊         | 81350/1000000 [3:50:54<26:01:40,  9.80it/s]

{'loss': Array(0.14350107, dtype=float32), 'loss_cross_entropy': Array(0.13515063, dtype=float32)}
{'loss_inverse': Array(2.536577e-06, dtype=float32)}


  8%|▊         | 81360/1000000 [3:50:55<25:09:49, 10.14it/s]

{'loss': Array(0.1262031, dtype=float32), 'loss_cross_entropy': Array(0.11862209, dtype=float32)}
{'loss_inverse': Array(0.00016704, dtype=float32)}


  8%|▊         | 81370/1000000 [3:50:57<31:47:16,  8.03it/s]

{'loss': Array(0.1559345, dtype=float32), 'loss_cross_entropy': Array(0.14741682, dtype=float32)}
{'loss_inverse': Array(2.7236869e-05, dtype=float32)}


  8%|▊         | 81380/1000000 [3:50:58<25:42:17,  9.93it/s]

{'loss': Array(0.15259159, dtype=float32), 'loss_cross_entropy': Array(0.14425671, dtype=float32)}
{'loss_inverse': Array(2.9275518e-05, dtype=float32)}


  8%|▊         | 81390/1000000 [3:51:00<37:46:43,  6.75it/s]

{'loss': Array(0.15914251, dtype=float32), 'loss_cross_entropy': Array(0.15006915, dtype=float32)}
{'loss_inverse': Array(0.00012248, dtype=float32)}


  8%|▊         | 81400/1000000 [3:51:02<28:17:25,  9.02it/s]

{'loss': Array(0.14800055, dtype=float32), 'loss_cross_entropy': Array(0.13928853, dtype=float32)}
{'loss_inverse': Array(0.00018012, dtype=float32)}


  8%|▊         | 81410/1000000 [3:51:03<25:28:48, 10.01it/s]

{'loss': Array(0.1454731, dtype=float32), 'loss_cross_entropy': Array(0.13714804, dtype=float32)}
{'loss_inverse': Array(7.668309e-07, dtype=float32)}


  8%|▊         | 81420/1000000 [3:51:05<27:51:00,  9.16it/s]

{'loss': Array(0.14169703, dtype=float32), 'loss_cross_entropy': Array(0.13359599, dtype=float32)}
{'loss_inverse': Array(9.2420894e-05, dtype=float32)}


  8%|▊         | 81430/1000000 [3:51:06<26:18:10,  9.70it/s]

{'loss': Array(0.14512402, dtype=float32), 'loss_cross_entropy': Array(0.13723652, dtype=float32)}
{'loss_inverse': Array(0.00028329, dtype=float32)}


  8%|▊         | 81440/1000000 [3:51:08<29:27:11,  8.66it/s]

{'loss': Array(0.15870224, dtype=float32), 'loss_cross_entropy': Array(0.1492455, dtype=float32)}
{'loss_inverse': Array(1.0026877e-06, dtype=float32)}


  8%|▊         | 81450/1000000 [3:51:10<25:59:26,  9.82it/s]

{'loss': Array(0.14736775, dtype=float32), 'loss_cross_entropy': Array(0.13892047, dtype=float32)}
{'loss_inverse': Array(4.2842472e-05, dtype=float32)}


  8%|▊         | 81460/1000000 [3:51:11<31:41:52,  8.05it/s]

{'loss': Array(0.13930641, dtype=float32), 'loss_cross_entropy': Array(0.13141657, dtype=float32)}
{'loss_inverse': Array(3.5461858e-06, dtype=float32)}


  8%|▊         | 81470/1000000 [3:51:13<26:48:54,  9.52it/s]

{'loss': Array(0.13942777, dtype=float32), 'loss_cross_entropy': Array(0.13041428, dtype=float32)}
{'loss_inverse': Array(2.4761368e-05, dtype=float32)}


  8%|▊         | 81480/1000000 [3:51:15<32:36:45,  7.82it/s]

{'loss': Array(0.13632338, dtype=float32), 'loss_cross_entropy': Array(0.12856229, dtype=float32)}
{'loss_inverse': Array(4.0180956e-05, dtype=float32)}


  8%|▊         | 81490/1000000 [3:51:16<26:02:52,  9.80it/s]

{'loss': Array(0.13512874, dtype=float32), 'loss_cross_entropy': Array(0.12725495, dtype=float32)}
{'loss_inverse': Array(2.0399102e-06, dtype=float32)}


  8%|▊         | 81500/1000000 [3:51:18<38:25:20,  6.64it/s]

{'loss': Array(0.15422502, dtype=float32), 'loss_cross_entropy': Array(0.14570156, dtype=float32)}
{'loss_inverse': Array(0.0001374, dtype=float32)}


  8%|▊         | 81510/1000000 [3:51:26<87:27:06,  2.92it/s] 

{'loss': Array(0.14629911, dtype=float32), 'loss_cross_entropy': Array(0.1379898, dtype=float32)}
{'loss_inverse': Array(0.00049257, dtype=float32)}


  8%|▊         | 81520/1000000 [3:51:27<35:22:37,  7.21it/s]

{'loss': Array(0.13361807, dtype=float32), 'loss_cross_entropy': Array(0.125455, dtype=float32)}
{'loss_inverse': Array(1.05157105e-05, dtype=float32)}


  8%|▊         | 81530/1000000 [3:51:29<36:04:04,  7.07it/s]

{'loss': Array(0.1399524, dtype=float32), 'loss_cross_entropy': Array(0.13173974, dtype=float32)}
{'loss_inverse': Array(3.8391258e-07, dtype=float32)}


  8%|▊         | 81540/1000000 [3:51:30<26:40:29,  9.56it/s]

{'loss': Array(0.15122095, dtype=float32), 'loss_cross_entropy': Array(0.14298771, dtype=float32)}
{'loss_inverse': Array(2.053746e-05, dtype=float32)}


  8%|▊         | 81550/1000000 [3:51:32<38:11:54,  6.68it/s]

{'loss': Array(0.14645837, dtype=float32), 'loss_cross_entropy': Array(0.13818787, dtype=float32)}
{'loss_inverse': Array(3.5769077e-07, dtype=float32)}


  8%|▊         | 81560/1000000 [3:51:34<27:26:01,  9.30it/s]

{'loss': Array(0.13956137, dtype=float32), 'loss_cross_entropy': Array(0.13164304, dtype=float32)}
{'loss_inverse': Array(1.7981569e-05, dtype=float32)}


  8%|▊         | 81570/1000000 [3:51:35<37:57:56,  6.72it/s]

{'loss': Array(0.13784628, dtype=float32), 'loss_cross_entropy': Array(0.1301561, dtype=float32)}
{'loss_inverse': Array(3.8019996e-06, dtype=float32)}


  8%|▊         | 81580/1000000 [3:51:37<27:52:51,  9.15it/s]

{'loss': Array(0.11766278, dtype=float32), 'loss_cross_entropy': Array(0.11025152, dtype=float32)}
{'loss_inverse': Array(7.7401955e-06, dtype=float32)}


  8%|▊         | 81590/1000000 [3:51:38<26:10:10,  9.75it/s]

{'loss': Array(0.15478697, dtype=float32), 'loss_cross_entropy': Array(0.14615704, dtype=float32)}
{'loss_inverse': Array(5.7186444e-05, dtype=float32)}


  8%|▊         | 81600/1000000 [3:51:40<32:01:26,  7.97it/s]

{'loss': Array(0.14797235, dtype=float32), 'loss_cross_entropy': Array(0.13952108, dtype=float32)}
{'loss_inverse': Array(0.00019818, dtype=float32)}


  8%|▊         | 81610/1000000 [3:51:42<25:55:42,  9.84it/s]

{'loss': Array(0.14389776, dtype=float32), 'loss_cross_entropy': Array(0.13580243, dtype=float32)}
{'loss_inverse': Array(5.1325696e-06, dtype=float32)}


  8%|▊         | 81620/1000000 [3:51:43<38:05:53,  6.70it/s]

{'loss': Array(0.1419235, dtype=float32), 'loss_cross_entropy': Array(0.13366728, dtype=float32)}
{'loss_inverse': Array(5.4097425e-05, dtype=float32)}


  8%|▊         | 81630/1000000 [3:51:45<27:51:21,  9.16it/s]

{'loss': Array(0.13492915, dtype=float32), 'loss_cross_entropy': Array(0.12697397, dtype=float32)}
{'loss_inverse': Array(3.111633e-05, dtype=float32)}


  8%|▊         | 81640/1000000 [3:51:46<38:23:35,  6.64it/s]

{'loss': Array(0.13950315, dtype=float32), 'loss_cross_entropy': Array(0.13122432, dtype=float32)}
{'loss_inverse': Array(0.00026406, dtype=float32)}


  8%|▊         | 81650/1000000 [3:51:48<27:44:20,  9.20it/s]

{'loss': Array(0.16530356, dtype=float32), 'loss_cross_entropy': Array(0.1558397, dtype=float32)}
{'loss_inverse': Array(1.6032876e-05, dtype=float32)}


  8%|▊         | 81660/1000000 [3:51:49<25:31:43,  9.99it/s]

{'loss': Array(0.15258038, dtype=float32), 'loss_cross_entropy': Array(0.14462213, dtype=float32)}
{'loss_inverse': Array(1.1141299e-06, dtype=float32)}


  8%|▊         | 81670/1000000 [3:51:51<33:41:50,  7.57it/s]

{'loss': Array(0.12526332, dtype=float32), 'loss_cross_entropy': Array(0.1174924, dtype=float32)}
{'loss_inverse': Array(0.00012475, dtype=float32)}


  8%|▊         | 81680/1000000 [3:51:53<26:08:42,  9.76it/s]

{'loss': Array(0.13885258, dtype=float32), 'loss_cross_entropy': Array(0.13083674, dtype=float32)}
{'loss_inverse': Array(0.00019241, dtype=float32)}


  8%|▊         | 81690/1000000 [3:51:54<24:51:46, 10.26it/s]

{'loss': Array(0.16550188, dtype=float32), 'loss_cross_entropy': Array(0.15638784, dtype=float32)}
{'loss_inverse': Array(7.017483e-05, dtype=float32)}


  8%|▊         | 81700/1000000 [3:51:56<28:06:47,  9.07it/s]

{'loss': Array(0.13907671, dtype=float32), 'loss_cross_entropy': Array(0.13112296, dtype=float32)}
{'loss_inverse': Array(2.4811372e-06, dtype=float32)}


  8%|▊         | 81710/1000000 [3:51:57<26:06:44,  9.77it/s]

{'loss': Array(0.14171098, dtype=float32), 'loss_cross_entropy': Array(0.13362098, dtype=float32)}
{'loss_inverse': Array(0.000267, dtype=float32)}


  8%|▊         | 81720/1000000 [3:51:59<29:01:55,  8.79it/s]

{'loss': Array(0.12596598, dtype=float32), 'loss_cross_entropy': Array(0.11885851, dtype=float32)}
{'loss_inverse': Array(6.810182e-05, dtype=float32)}


  8%|▊         | 81730/1000000 [3:52:00<26:41:48,  9.55it/s]

{'loss': Array(0.15005559, dtype=float32), 'loss_cross_entropy': Array(0.14190865, dtype=float32)}
{'loss_inverse': Array(9.983232e-05, dtype=float32)}


  8%|▊         | 81740/1000000 [3:52:02<29:44:44,  8.58it/s]

{'loss': Array(0.15680774, dtype=float32), 'loss_cross_entropy': Array(0.14807348, dtype=float32)}
{'loss_inverse': Array(3.186811e-06, dtype=float32)}


  8%|▊         | 81750/1000000 [3:52:03<25:55:37,  9.84it/s]

{'loss': Array(0.1559603, dtype=float32), 'loss_cross_entropy': Array(0.14729169, dtype=float32)}
{'loss_inverse': Array(0.00015304, dtype=float32)}


  8%|▊         | 81760/1000000 [3:52:05<29:12:50,  8.73it/s]

{'loss': Array(0.14220391, dtype=float32), 'loss_cross_entropy': Array(0.13410766, dtype=float32)}
{'loss_inverse': Array(0.00012358, dtype=float32)}


  8%|▊         | 81770/1000000 [3:52:07<26:04:46,  9.78it/s]

{'loss': Array(0.13144143, dtype=float32), 'loss_cross_entropy': Array(0.12340014, dtype=float32)}
{'loss_inverse': Array(6.28983e-05, dtype=float32)}


  8%|▊         | 81780/1000000 [3:52:08<34:44:15,  7.34it/s]

{'loss': Array(0.14947936, dtype=float32), 'loss_cross_entropy': Array(0.1414466, dtype=float32)}
{'loss_inverse': Array(8.235785e-05, dtype=float32)}


  8%|▊         | 81790/1000000 [3:52:10<26:49:06,  9.51it/s]

{'loss': Array(0.13617304, dtype=float32), 'loss_cross_entropy': Array(0.127993, dtype=float32)}
{'loss_inverse': Array(8.9395086e-05, dtype=float32)}


  8%|▊         | 81800/1000000 [3:52:12<37:21:49,  6.83it/s]

{'loss': Array(0.14582358, dtype=float32), 'loss_cross_entropy': Array(0.13778107, dtype=float32)}
{'loss_inverse': Array(4.678094e-05, dtype=float32)}


  8%|▊         | 81810/1000000 [3:52:13<26:29:14,  9.63it/s]

{'loss': Array(0.16094688, dtype=float32), 'loss_cross_entropy': Array(0.15168579, dtype=float32)}
{'loss_inverse': Array(1.3160136e-05, dtype=float32)}


  8%|▊         | 81820/1000000 [3:52:15<34:40:33,  7.36it/s]

{'loss': Array(0.14741933, dtype=float32), 'loss_cross_entropy': Array(0.13906685, dtype=float32)}
{'loss_inverse': Array(1.6119626e-06, dtype=float32)}


  8%|▊         | 81830/1000000 [3:52:16<26:37:40,  9.58it/s]

{'loss': Array(0.15404509, dtype=float32), 'loss_cross_entropy': Array(0.14545979, dtype=float32)}
{'loss_inverse': Array(1.3411006e-05, dtype=float32)}


  8%|▊         | 81840/1000000 [3:52:17<24:50:55, 10.26it/s]

{'loss': Array(0.1421511, dtype=float32), 'loss_cross_entropy': Array(0.13383254, dtype=float32)}
{'loss_inverse': Array(0.00010767, dtype=float32)}


  8%|▊         | 81850/1000000 [3:52:19<29:04:49,  8.77it/s]

{'loss': Array(0.13491793, dtype=float32), 'loss_cross_entropy': Array(0.12677418, dtype=float32)}
{'loss_inverse': Array(9.266819e-06, dtype=float32)}


  8%|▊         | 81860/1000000 [3:52:21<25:48:13,  9.88it/s]

{'loss': Array(0.1429127, dtype=float32), 'loss_cross_entropy': Array(0.13481764, dtype=float32)}
{'loss_inverse': Array(0.00027187, dtype=float32)}


  8%|▊         | 81870/1000000 [3:52:22<31:21:19,  8.13it/s]

{'loss': Array(0.1518099, dtype=float32), 'loss_cross_entropy': Array(0.14276621, dtype=float32)}
{'loss_inverse': Array(4.238774e-05, dtype=float32)}


  8%|▊         | 81880/1000000 [3:52:24<26:06:51,  9.77it/s]

{'loss': Array(0.12766086, dtype=float32), 'loss_cross_entropy': Array(0.12026095, dtype=float32)}
{'loss_inverse': Array(9.4523675e-05, dtype=float32)}


  8%|▊         | 81890/1000000 [3:52:26<33:47:00,  7.55it/s]

{'loss': Array(0.14641443, dtype=float32), 'loss_cross_entropy': Array(0.13800122, dtype=float32)}
{'loss_inverse': Array(9.910943e-05, dtype=float32)}


  8%|▊         | 81900/1000000 [3:52:27<26:54:27,  9.48it/s]

{'loss': Array(0.14860515, dtype=float32), 'loss_cross_entropy': Array(0.14007558, dtype=float32)}
{'loss_inverse': Array(0.00016222, dtype=float32)}


  8%|▊         | 81910/1000000 [3:52:29<34:35:51,  7.37it/s]

{'loss': Array(0.14538682, dtype=float32), 'loss_cross_entropy': Array(0.1369227, dtype=float32)}
{'loss_inverse': Array(0.00011998, dtype=float32)}


  8%|▊         | 81920/1000000 [3:52:30<26:57:04,  9.46it/s]

{'loss': Array(0.12827457, dtype=float32), 'loss_cross_entropy': Array(0.12085419, dtype=float32)}
{'loss_inverse': Array(0.00020558, dtype=float32)}


  8%|▊         | 81930/1000000 [3:52:32<26:02:24,  9.79it/s]

{'loss': Array(0.13058804, dtype=float32), 'loss_cross_entropy': Array(0.1226209, dtype=float32)}
{'loss_inverse': Array(1.8458517e-05, dtype=float32)}


  8%|▊         | 81940/1000000 [3:52:33<31:16:12,  8.16it/s]

{'loss': Array(0.14763121, dtype=float32), 'loss_cross_entropy': Array(0.13905947, dtype=float32)}
{'loss_inverse': Array(4.518038e-05, dtype=float32)}


  8%|▊         | 81950/1000000 [3:52:35<25:56:18,  9.83it/s]

{'loss': Array(0.14001541, dtype=float32), 'loss_cross_entropy': Array(0.1319871, dtype=float32)}
{'loss_inverse': Array(1.0879597e-05, dtype=float32)}


  8%|▊         | 81960/1000000 [3:52:37<37:36:49,  6.78it/s]

{'loss': Array(0.14350374, dtype=float32), 'loss_cross_entropy': Array(0.13489437, dtype=float32)}
{'loss_inverse': Array(5.8365305e-05, dtype=float32)}


  8%|▊         | 81970/1000000 [3:52:38<27:07:11,  9.40it/s]

{'loss': Array(0.15386446, dtype=float32), 'loss_cross_entropy': Array(0.14492649, dtype=float32)}
{'loss_inverse': Array(7.075169e-06, dtype=float32)}


  8%|▊         | 81980/1000000 [3:52:39<25:17:20, 10.08it/s]

{'loss': Array(0.14497487, dtype=float32), 'loss_cross_entropy': Array(0.13719945, dtype=float32)}
{'loss_inverse': Array(0.00015915, dtype=float32)}


  8%|▊         | 81990/1000000 [3:52:41<28:21:18,  8.99it/s]

{'loss': Array(0.12990141, dtype=float32), 'loss_cross_entropy': Array(0.12196926, dtype=float32)}
{'loss_inverse': Array(2.702993e-06, dtype=float32)}


  8%|▊         | 82000/1000000 [3:52:43<25:22:11, 10.05it/s]

{'loss': Array(0.14951476, dtype=float32), 'loss_cross_entropy': Array(0.14110243, dtype=float32)}
{'loss_inverse': Array(2.9898785e-05, dtype=float32)}


  8%|▊         | 82010/1000000 [3:52:51<89:03:08,  2.86it/s] 

{'loss': Array(0.1457886, dtype=float32), 'loss_cross_entropy': Array(0.13846055, dtype=float32)}
{'loss_inverse': Array(6.487024e-05, dtype=float32)}


  8%|▊         | 82020/1000000 [3:52:52<36:15:48,  7.03it/s]

{'loss': Array(0.15198821, dtype=float32), 'loss_cross_entropy': Array(0.14364527, dtype=float32)}
{'loss_inverse': Array(5.9228787e-05, dtype=float32)}


  8%|▊         | 82030/1000000 [3:52:54<31:36:41,  8.07it/s]

{'loss': Array(0.15030363, dtype=float32), 'loss_cross_entropy': Array(0.14188616, dtype=float32)}
{'loss_inverse': Array(4.9588365e-05, dtype=float32)}


  8%|▊         | 82040/1000000 [3:52:56<26:13:00,  9.73it/s]

{'loss': Array(0.16237372, dtype=float32), 'loss_cross_entropy': Array(0.15393634, dtype=float32)}
{'loss_inverse': Array(3.5849062e-06, dtype=float32)}


  8%|▊         | 82050/1000000 [3:52:57<37:28:44,  6.80it/s]

{'loss': Array(0.13892646, dtype=float32), 'loss_cross_entropy': Array(0.13079311, dtype=float32)}
{'loss_inverse': Array(0.00015646, dtype=float32)}


  8%|▊         | 82060/1000000 [3:52:59<27:12:55,  9.37it/s]

{'loss': Array(0.14816046, dtype=float32), 'loss_cross_entropy': Array(0.13944325, dtype=float32)}
{'loss_inverse': Array(1.3251551e-05, dtype=float32)}


  8%|▊         | 82070/1000000 [3:53:00<25:16:21, 10.09it/s]

{'loss': Array(0.13135754, dtype=float32), 'loss_cross_entropy': Array(0.12395468, dtype=float32)}
{'loss_inverse': Array(2.3533936e-05, dtype=float32)}


  8%|▊         | 82080/1000000 [3:53:02<29:43:00,  8.58it/s]

{'loss': Array(0.13460825, dtype=float32), 'loss_cross_entropy': Array(0.12689458, dtype=float32)}
{'loss_inverse': Array(0.00019337, dtype=float32)}


  8%|▊         | 82090/1000000 [3:53:03<25:59:37,  9.81it/s]

{'loss': Array(0.15069829, dtype=float32), 'loss_cross_entropy': Array(0.14179449, dtype=float32)}
{'loss_inverse': Array(3.6426558e-05, dtype=float32)}


  8%|▊         | 82100/1000000 [3:53:05<29:54:23,  8.53it/s]

{'loss': Array(0.15292998, dtype=float32), 'loss_cross_entropy': Array(0.14429936, dtype=float32)}
{'loss_inverse': Array(6.673473e-05, dtype=float32)}


  8%|▊         | 82110/1000000 [3:53:07<26:39:55,  9.56it/s]

{'loss': Array(0.14846897, dtype=float32), 'loss_cross_entropy': Array(0.13998495, dtype=float32)}
{'loss_inverse': Array(0.00011045, dtype=float32)}


  8%|▊         | 82120/1000000 [3:53:08<32:04:22,  7.95it/s]

{'loss': Array(0.15440898, dtype=float32), 'loss_cross_entropy': Array(0.14501503, dtype=float32)}
{'loss_inverse': Array(0.00011468, dtype=float32)}


  8%|▊         | 82130/1000000 [3:53:10<26:34:47,  9.59it/s]

{'loss': Array(0.16967182, dtype=float32), 'loss_cross_entropy': Array(0.16123746, dtype=float32)}
{'loss_inverse': Array(0.00019096, dtype=float32)}


  8%|▊         | 82140/1000000 [3:53:11<24:55:21, 10.23it/s]

{'loss': Array(0.1353159, dtype=float32), 'loss_cross_entropy': Array(0.12801285, dtype=float32)}
{'loss_inverse': Array(5.574299e-07, dtype=float32)}


  8%|▊         | 82150/1000000 [3:53:13<27:52:45,  9.15it/s]

{'loss': Array(0.14325662, dtype=float32), 'loss_cross_entropy': Array(0.13516214, dtype=float32)}
{'loss_inverse': Array(1.4603504e-05, dtype=float32)}


  8%|▊         | 82160/1000000 [3:53:14<25:15:58, 10.09it/s]

{'loss': Array(0.14916341, dtype=float32), 'loss_cross_entropy': Array(0.1398273, dtype=float32)}
{'loss_inverse': Array(1.2619838e-05, dtype=float32)}


  8%|▊         | 82170/1000000 [3:53:16<29:32:20,  8.63it/s]

{'loss': Array(0.13496082, dtype=float32), 'loss_cross_entropy': Array(0.12707229, dtype=float32)}
{'loss_inverse': Array(0.0001822, dtype=float32)}


  8%|▊         | 82180/1000000 [3:53:18<25:58:03,  9.82it/s]

{'loss': Array(0.16075075, dtype=float32), 'loss_cross_entropy': Array(0.15136729, dtype=float32)}
{'loss_inverse': Array(4.7664403e-07, dtype=float32)}


  8%|▊         | 82190/1000000 [3:53:19<31:25:03,  8.11it/s]

{'loss': Array(0.1141836, dtype=float32), 'loss_cross_entropy': Array(0.10735768, dtype=float32)}
{'loss_inverse': Array(1.3458622e-05, dtype=float32)}


  8%|▊         | 82200/1000000 [3:53:21<25:51:39,  9.86it/s]

{'loss': Array(0.14107595, dtype=float32), 'loss_cross_entropy': Array(0.13275243, dtype=float32)}
{'loss_inverse': Array(3.316223e-06, dtype=float32)}


  8%|▊         | 82210/1000000 [3:53:22<25:37:12,  9.95it/s]

{'loss': Array(0.15366934, dtype=float32), 'loss_cross_entropy': Array(0.1455385, dtype=float32)}
{'loss_inverse': Array(1.6161828e-05, dtype=float32)}


  8%|▊         | 82220/1000000 [3:53:24<29:50:19,  8.54it/s]

{'loss': Array(0.12920141, dtype=float32), 'loss_cross_entropy': Array(0.12167388, dtype=float32)}
{'loss_inverse': Array(1.6648054e-06, dtype=float32)}


  8%|▊         | 82230/1000000 [3:53:25<25:12:42, 10.11it/s]

{'loss': Array(0.14153017, dtype=float32), 'loss_cross_entropy': Array(0.13407356, dtype=float32)}
{'loss_inverse': Array(2.223593e-06, dtype=float32)}


  8%|▊         | 82240/1000000 [3:53:27<30:53:32,  8.25it/s]

{'loss': Array(0.15164553, dtype=float32), 'loss_cross_entropy': Array(0.1426932, dtype=float32)}
{'loss_inverse': Array(4.5841593e-06, dtype=float32)}


  8%|▊         | 82250/1000000 [3:53:29<26:03:36,  9.78it/s]

{'loss': Array(0.13699982, dtype=float32), 'loss_cross_entropy': Array(0.1296367, dtype=float32)}
{'loss_inverse': Array(1.1188046e-07, dtype=float32)}


  8%|▊         | 82260/1000000 [3:53:30<34:13:14,  7.45it/s]

{'loss': Array(0.15322423, dtype=float32), 'loss_cross_entropy': Array(0.14442784, dtype=float32)}
{'loss_inverse': Array(7.300571e-06, dtype=float32)}


  8%|▊         | 82270/1000000 [3:53:32<26:55:20,  9.47it/s]

{'loss': Array(0.15629001, dtype=float32), 'loss_cross_entropy': Array(0.14735629, dtype=float32)}
{'loss_inverse': Array(3.9141604e-08, dtype=float32)}


  8%|▊         | 82280/1000000 [3:53:33<25:29:38, 10.00it/s]

{'loss': Array(0.1477047, dtype=float32), 'loss_cross_entropy': Array(0.13927385, dtype=float32)}
{'loss_inverse': Array(2.7246264e-08, dtype=float32)}


  8%|▊         | 82290/1000000 [3:53:35<28:16:06,  9.02it/s]

{'loss': Array(0.14513656, dtype=float32), 'loss_cross_entropy': Array(0.1368927, dtype=float32)}
{'loss_inverse': Array(5.5951905e-06, dtype=float32)}


  8%|▊         | 82300/1000000 [3:53:36<25:14:35, 10.10it/s]

{'loss': Array(0.13627253, dtype=float32), 'loss_cross_entropy': Array(0.12830263, dtype=float32)}
{'loss_inverse': Array(3.7237394e-06, dtype=float32)}


  8%|▊         | 82310/1000000 [3:53:38<33:20:05,  7.65it/s]

{'loss': Array(0.14505206, dtype=float32), 'loss_cross_entropy': Array(0.13684729, dtype=float32)}
{'loss_inverse': Array(5.5947103e-06, dtype=float32)}


  8%|▊         | 82320/1000000 [3:53:39<25:59:53,  9.80it/s]

{'loss': Array(0.15543795, dtype=float32), 'loss_cross_entropy': Array(0.14714973, dtype=float32)}
{'loss_inverse': Array(0.00011076, dtype=float32)}


  8%|▊         | 82330/1000000 [3:53:41<25:26:46, 10.02it/s]

{'loss': Array(0.14984904, dtype=float32), 'loss_cross_entropy': Array(0.14127478, dtype=float32)}
{'loss_inverse': Array(3.3011165e-06, dtype=float32)}


  8%|▊         | 82340/1000000 [3:53:43<28:18:51,  9.00it/s]

{'loss': Array(0.11969161, dtype=float32), 'loss_cross_entropy': Array(0.11249758, dtype=float32)}
{'loss_inverse': Array(0.00038951, dtype=float32)}


  8%|▊         | 82350/1000000 [3:53:44<25:43:49,  9.91it/s]

{'loss': Array(0.14833921, dtype=float32), 'loss_cross_entropy': Array(0.1394719, dtype=float32)}
{'loss_inverse': Array(2.811102e-07, dtype=float32)}


  8%|▊         | 82360/1000000 [3:53:46<30:08:32,  8.46it/s]

{'loss': Array(0.1417042, dtype=float32), 'loss_cross_entropy': Array(0.13450472, dtype=float32)}
{'loss_inverse': Array(2.237708e-05, dtype=float32)}


  8%|▊         | 82370/1000000 [3:53:47<25:58:29,  9.81it/s]

{'loss': Array(0.16804573, dtype=float32), 'loss_cross_entropy': Array(0.15916047, dtype=float32)}
{'loss_inverse': Array(6.506542e-06, dtype=float32)}


  8%|▊         | 82380/1000000 [3:53:49<29:58:40,  8.50it/s]

{'loss': Array(0.14383726, dtype=float32), 'loss_cross_entropy': Array(0.1357588, dtype=float32)}
{'loss_inverse': Array(1.5504828e-06, dtype=float32)}


  8%|▊         | 82390/1000000 [3:53:51<26:06:23,  9.76it/s]

{'loss': Array(0.16141056, dtype=float32), 'loss_cross_entropy': Array(0.15273413, dtype=float32)}
{'loss_inverse': Array(8.3572755e-05, dtype=float32)}


  8%|▊         | 82400/1000000 [3:53:52<33:48:12,  7.54it/s]

{'loss': Array(0.12982759, dtype=float32), 'loss_cross_entropy': Array(0.12179499, dtype=float32)}
{'loss_inverse': Array(1.2702502e-07, dtype=float32)}


  8%|▊         | 82410/1000000 [3:53:54<26:35:18,  9.59it/s]

{'loss': Array(0.1381089, dtype=float32), 'loss_cross_entropy': Array(0.12993382, dtype=float32)}
{'loss_inverse': Array(6.670743e-05, dtype=float32)}


  8%|▊         | 82420/1000000 [3:53:55<25:44:57,  9.90it/s]

{'loss': Array(0.15346067, dtype=float32), 'loss_cross_entropy': Array(0.14486814, dtype=float32)}
{'loss_inverse': Array(1.0821897e-07, dtype=float32)}


  8%|▊         | 82430/1000000 [3:53:57<29:12:02,  8.73it/s]

{'loss': Array(0.14037214, dtype=float32), 'loss_cross_entropy': Array(0.13171884, dtype=float32)}
{'loss_inverse': Array(3.5870125e-05, dtype=float32)}


  8%|▊         | 82440/1000000 [3:53:58<26:00:20,  9.80it/s]

{'loss': Array(0.14205, dtype=float32), 'loss_cross_entropy': Array(0.13436818, dtype=float32)}
{'loss_inverse': Array(3.2824778e-07, dtype=float32)}


  8%|▊         | 82450/1000000 [3:54:00<30:24:10,  8.38it/s]

{'loss': Array(0.14908752, dtype=float32), 'loss_cross_entropy': Array(0.14065078, dtype=float32)}
{'loss_inverse': Array(2.5051339e-05, dtype=float32)}


  8%|▊         | 82460/1000000 [3:54:02<25:44:00,  9.90it/s]

{'loss': Array(0.14129233, dtype=float32), 'loss_cross_entropy': Array(0.13285303, dtype=float32)}
{'loss_inverse': Array(8.0732956e-05, dtype=float32)}


  8%|▊         | 82470/1000000 [3:54:03<30:48:47,  8.27it/s]

{'loss': Array(0.16641799, dtype=float32), 'loss_cross_entropy': Array(0.15721425, dtype=float32)}
{'loss_inverse': Array(1.1263238e-05, dtype=float32)}


  8%|▊         | 82480/1000000 [3:54:05<26:38:01,  9.57it/s]

{'loss': Array(0.13869704, dtype=float32), 'loss_cross_entropy': Array(0.13081998, dtype=float32)}
{'loss_inverse': Array(1.8249642e-06, dtype=float32)}


  8%|▊         | 82490/1000000 [3:54:06<25:35:19,  9.96it/s]

{'loss': Array(0.15343826, dtype=float32), 'loss_cross_entropy': Array(0.1448666, dtype=float32)}
{'loss_inverse': Array(9.39178e-06, dtype=float32)}


  8%|▊         | 82500/1000000 [3:54:08<30:28:24,  8.36it/s]

{'loss': Array(0.15176061, dtype=float32), 'loss_cross_entropy': Array(0.14360687, dtype=float32)}
{'loss_inverse': Array(0.00012923, dtype=float32)}


  8%|▊         | 82510/1000000 [3:54:16<84:55:22,  3.00it/s] 

{'loss': Array(0.15899251, dtype=float32), 'loss_cross_entropy': Array(0.14996144, dtype=float32)}
{'loss_inverse': Array(1.6430466e-05, dtype=float32)}


  8%|▊         | 82520/1000000 [3:54:18<47:56:12,  5.32it/s]

{'loss': Array(0.14993909, dtype=float32), 'loss_cross_entropy': Array(0.14155827, dtype=float32)}
{'loss_inverse': Array(4.500914e-06, dtype=float32)}


  8%|▊         | 82530/1000000 [3:54:19<29:07:39,  8.75it/s]

{'loss': Array(0.15596405, dtype=float32), 'loss_cross_entropy': Array(0.14715052, dtype=float32)}
{'loss_inverse': Array(2.320387e-05, dtype=float32)}


  8%|▊         | 82540/1000000 [3:54:21<25:22:38, 10.04it/s]

{'loss': Array(0.13546342, dtype=float32), 'loss_cross_entropy': Array(0.1277563, dtype=float32)}
{'loss_inverse': Array(1.9821603e-07, dtype=float32)}


  8%|▊         | 82550/1000000 [3:54:22<29:16:24,  8.71it/s]

{'loss': Array(0.13342653, dtype=float32), 'loss_cross_entropy': Array(0.1254138, dtype=float32)}
{'loss_inverse': Array(6.506702e-06, dtype=float32)}


  8%|▊         | 82560/1000000 [3:54:24<26:38:09,  9.57it/s]

{'loss': Array(0.15862195, dtype=float32), 'loss_cross_entropy': Array(0.14983553, dtype=float32)}
{'loss_inverse': Array(4.400678e-06, dtype=float32)}


  8%|▊         | 82570/1000000 [3:54:25<29:18:44,  8.69it/s]

{'loss': Array(0.15816723, dtype=float32), 'loss_cross_entropy': Array(0.14948007, dtype=float32)}
{'loss_inverse': Array(3.3952008e-07, dtype=float32)}


  8%|▊         | 82580/1000000 [3:54:27<25:45:54,  9.89it/s]

{'loss': Array(0.1496078, dtype=float32), 'loss_cross_entropy': Array(0.14095609, dtype=float32)}
{'loss_inverse': Array(2.8614752e-06, dtype=float32)}


  8%|▊         | 82590/1000000 [3:54:29<31:33:18,  8.08it/s]

{'loss': Array(0.13638018, dtype=float32), 'loss_cross_entropy': Array(0.12823735, dtype=float32)}
{'loss_inverse': Array(2.0652801e-07, dtype=float32)}


  8%|▊         | 82600/1000000 [3:54:30<26:03:03,  9.78it/s]

{'loss': Array(0.13649109, dtype=float32), 'loss_cross_entropy': Array(0.12815905, dtype=float32)}
{'loss_inverse': Array(2.5636285e-07, dtype=float32)}


  8%|▊         | 82610/1000000 [3:54:31<25:06:50, 10.15it/s]

{'loss': Array(0.13990936, dtype=float32), 'loss_cross_entropy': Array(0.13141564, dtype=float32)}
{'loss_inverse': Array(0.0001657, dtype=float32)}


  8%|▊         | 82620/1000000 [3:54:33<27:58:52,  9.11it/s]

{'loss': Array(0.1521822, dtype=float32), 'loss_cross_entropy': Array(0.14432627, dtype=float32)}
{'loss_inverse': Array(6.510281e-07, dtype=float32)}


  8%|▊         | 82630/1000000 [3:54:35<25:42:44,  9.91it/s]

{'loss': Array(0.15400411, dtype=float32), 'loss_cross_entropy': Array(0.14501344, dtype=float32)}
{'loss_inverse': Array(9.337795e-07, dtype=float32)}


  8%|▊         | 82640/1000000 [3:54:36<29:22:23,  8.68it/s]

{'loss': Array(0.1277134, dtype=float32), 'loss_cross_entropy': Array(0.12013604, dtype=float32)}
{'loss_inverse': Array(5.5393542e-05, dtype=float32)}


  8%|▊         | 82650/1000000 [3:54:38<26:01:50,  9.79it/s]

{'loss': Array(0.13615718, dtype=float32), 'loss_cross_entropy': Array(0.12803006, dtype=float32)}
{'loss_inverse': Array(1.4852475e-07, dtype=float32)}


  8%|▊         | 82660/1000000 [3:54:40<31:28:39,  8.10it/s]

{'loss': Array(0.14497517, dtype=float32), 'loss_cross_entropy': Array(0.13652633, dtype=float32)}
{'loss_inverse': Array(3.9104172e-05, dtype=float32)}


  8%|▊         | 82670/1000000 [3:54:41<26:43:10,  9.54it/s]

{'loss': Array(0.17455666, dtype=float32), 'loss_cross_entropy': Array(0.16552186, dtype=float32)}
{'loss_inverse': Array(6.6498626e-07, dtype=float32)}


  8%|▊         | 82680/1000000 [3:54:43<26:36:33,  9.58it/s]

{'loss': Array(0.1177907, dtype=float32), 'loss_cross_entropy': Array(0.11024158, dtype=float32)}
{'loss_inverse': Array(8.851277e-06, dtype=float32)}


  8%|▊         | 82690/1000000 [3:54:44<30:09:06,  8.45it/s]

{'loss': Array(0.14504607, dtype=float32), 'loss_cross_entropy': Array(0.13689439, dtype=float32)}
{'loss_inverse': Array(6.221879e-05, dtype=float32)}


  8%|▊         | 82700/1000000 [3:54:46<26:10:30,  9.73it/s]

{'loss': Array(0.17341116, dtype=float32), 'loss_cross_entropy': Array(0.16416593, dtype=float32)}
{'loss_inverse': Array(0.00020869, dtype=float32)}


  8%|▊         | 82710/1000000 [3:54:48<31:49:20,  8.01it/s]

{'loss': Array(0.13638261, dtype=float32), 'loss_cross_entropy': Array(0.12753814, dtype=float32)}
{'loss_inverse': Array(1.0060468e-05, dtype=float32)}


  8%|▊         | 82720/1000000 [3:54:49<26:16:40,  9.70it/s]

{'loss': Array(0.14672282, dtype=float32), 'loss_cross_entropy': Array(0.13873151, dtype=float32)}
{'loss_inverse': Array(5.453295e-06, dtype=float32)}


  8%|▊         | 82730/1000000 [3:54:51<34:59:48,  7.28it/s]

{'loss': Array(0.1583132, dtype=float32), 'loss_cross_entropy': Array(0.14919949, dtype=float32)}
{'loss_inverse': Array(4.7225636e-05, dtype=float32)}


  8%|▊         | 82740/1000000 [3:54:52<26:49:05,  9.50it/s]

{'loss': Array(0.1352555, dtype=float32), 'loss_cross_entropy': Array(0.12691505, dtype=float32)}
{'loss_inverse': Array(7.4772215e-06, dtype=float32)}


  8%|▊         | 82750/1000000 [3:54:54<25:52:01,  9.85it/s]

{'loss': Array(0.14423619, dtype=float32), 'loss_cross_entropy': Array(0.13586396, dtype=float32)}
{'loss_inverse': Array(3.8713816e-07, dtype=float32)}


  8%|▊         | 82760/1000000 [3:54:56<29:05:25,  8.76it/s]

{'loss': Array(0.14696203, dtype=float32), 'loss_cross_entropy': Array(0.13813314, dtype=float32)}
{'loss_inverse': Array(0.00015574, dtype=float32)}


  8%|▊         | 82770/1000000 [3:54:57<26:04:36,  9.77it/s]

{'loss': Array(0.16056684, dtype=float32), 'loss_cross_entropy': Array(0.15193188, dtype=float32)}
{'loss_inverse': Array(1.2919604e-05, dtype=float32)}


  8%|▊         | 82780/1000000 [3:54:59<34:36:53,  7.36it/s]

{'loss': Array(0.15840492, dtype=float32), 'loss_cross_entropy': Array(0.14942156, dtype=float32)}
{'loss_inverse': Array(2.6429092e-07, dtype=float32)}


  8%|▊         | 82790/1000000 [3:55:00<27:49:50,  9.15it/s]

{'loss': Array(0.15175673, dtype=float32), 'loss_cross_entropy': Array(0.14241356, dtype=float32)}
{'loss_inverse': Array(1.1070373e-06, dtype=float32)}


  8%|▊         | 82800/1000000 [3:55:02<26:31:51,  9.60it/s]

{'loss': Array(0.13717543, dtype=float32), 'loss_cross_entropy': Array(0.12953536, dtype=float32)}
{'loss_inverse': Array(8.481027e-05, dtype=float32)}


  8%|▊         | 82810/1000000 [3:55:03<28:25:23,  8.96it/s]

{'loss': Array(0.14037414, dtype=float32), 'loss_cross_entropy': Array(0.13222729, dtype=float32)}
{'loss_inverse': Array(2.6809314e-06, dtype=float32)}


  8%|▊         | 82820/1000000 [3:55:05<25:50:31,  9.86it/s]

{'loss': Array(0.16760425, dtype=float32), 'loss_cross_entropy': Array(0.1582475, dtype=float32)}
{'loss_inverse': Array(2.255734e-06, dtype=float32)}


  8%|▊         | 82830/1000000 [3:55:07<29:59:14,  8.50it/s]

{'loss': Array(0.15585445, dtype=float32), 'loss_cross_entropy': Array(0.14725241, dtype=float32)}
{'loss_inverse': Array(5.6768204e-05, dtype=float32)}


  8%|▊         | 82840/1000000 [3:55:08<25:46:47,  9.88it/s]

{'loss': Array(0.15480252, dtype=float32), 'loss_cross_entropy': Array(0.14600198, dtype=float32)}
{'loss_inverse': Array(5.801317e-05, dtype=float32)}


  8%|▊         | 82850/1000000 [3:55:10<32:16:56,  7.89it/s]

{'loss': Array(0.15624462, dtype=float32), 'loss_cross_entropy': Array(0.14686878, dtype=float32)}
{'loss_inverse': Array(2.5388916e-05, dtype=float32)}


  8%|▊         | 82860/1000000 [3:55:11<26:11:25,  9.73it/s]

{'loss': Array(0.15228905, dtype=float32), 'loss_cross_entropy': Array(0.14370343, dtype=float32)}
{'loss_inverse': Array(0.00014152, dtype=float32)}


  8%|▊         | 82870/1000000 [3:55:13<26:14:05,  9.71it/s]

{'loss': Array(0.15674515, dtype=float32), 'loss_cross_entropy': Array(0.14790258, dtype=float32)}
{'loss_inverse': Array(0.00021857, dtype=float32)}


  8%|▊         | 82879/1000000 [3:55:15<32:07:36,  7.93it/s]

{'loss': Array(0.13156292, dtype=float32), 'loss_cross_entropy': Array(0.12373531, dtype=float32)}
{'loss_inverse': Array(3.3607434e-06, dtype=float32)}


  8%|▊         | 82889/1000000 [3:55:16<27:00:34,  9.43it/s]

{'loss': Array(0.13478927, dtype=float32), 'loss_cross_entropy': Array(0.12712094, dtype=float32)}
{'loss_inverse': Array(8.899854e-06, dtype=float32)}


  8%|▊         | 82899/1000000 [3:55:18<35:03:24,  7.27it/s]

{'loss': Array(0.16005673, dtype=float32), 'loss_cross_entropy': Array(0.15131369, dtype=float32)}
{'loss_inverse': Array(8.342263e-05, dtype=float32)}


  8%|▊         | 82909/1000000 [3:55:19<26:45:20,  9.52it/s]

{'loss': Array(0.1339967, dtype=float32), 'loss_cross_entropy': Array(0.12663785, dtype=float32)}
{'loss_inverse': Array(1.2096139e-05, dtype=float32)}


  8%|▊         | 82919/1000000 [3:55:21<38:37:00,  6.60it/s]

{'loss': Array(0.1314605, dtype=float32), 'loss_cross_entropy': Array(0.12361423, dtype=float32)}
{'loss_inverse': Array(2.4442657e-05, dtype=float32)}


  8%|▊         | 82929/1000000 [3:55:23<27:32:08,  9.25it/s]

{'loss': Array(0.13641442, dtype=float32), 'loss_cross_entropy': Array(0.1291251, dtype=float32)}
{'loss_inverse': Array(0.00010864, dtype=float32)}


  8%|▊         | 82939/1000000 [3:55:24<38:21:31,  6.64it/s]

{'loss': Array(0.14805314, dtype=float32), 'loss_cross_entropy': Array(0.13975441, dtype=float32)}
{'loss_inverse': Array(3.6004297e-07, dtype=float32)}


  8%|▊         | 82949/1000000 [3:55:26<27:42:17,  9.19it/s]

{'loss': Array(0.14199077, dtype=float32), 'loss_cross_entropy': Array(0.13418464, dtype=float32)}
{'loss_inverse': Array(3.47401e-06, dtype=float32)}


  8%|▊         | 82959/1000000 [3:55:27<25:07:43, 10.14it/s]

{'loss': Array(0.15406339, dtype=float32), 'loss_cross_entropy': Array(0.14567074, dtype=float32)}
{'loss_inverse': Array(0.00029576, dtype=float32)}


  8%|▊         | 82969/1000000 [3:55:29<34:12:49,  7.45it/s]

{'loss': Array(0.1436392, dtype=float32), 'loss_cross_entropy': Array(0.13574755, dtype=float32)}
{'loss_inverse': Array(5.0710758e-05, dtype=float32)}


  8%|▊         | 82979/1000000 [3:55:30<26:18:28,  9.68it/s]

{'loss': Array(0.1406419, dtype=float32), 'loss_cross_entropy': Array(0.1323648, dtype=float32)}
{'loss_inverse': Array(9.874902e-07, dtype=float32)}


  8%|▊         | 82989/1000000 [3:55:32<39:00:58,  6.53it/s]

{'loss': Array(0.15472819, dtype=float32), 'loss_cross_entropy': Array(0.14577745, dtype=float32)}
{'loss_inverse': Array(8.731454e-06, dtype=float32)}


  8%|▊         | 82999/1000000 [3:55:34<27:42:39,  9.19it/s]

{'loss': Array(0.13367885, dtype=float32), 'loss_cross_entropy': Array(0.12632838, dtype=float32)}
{'loss_inverse': Array(1.7519114e-05, dtype=float32)}


  8%|▊         | 83009/1000000 [3:55:42<85:29:57,  2.98it/s] 

{'loss': Array(0.14491044, dtype=float32), 'loss_cross_entropy': Array(0.13664894, dtype=float32)}
{'loss_inverse': Array(3.5241592e-06, dtype=float32)}


  8%|▊         | 83019/1000000 [3:55:43<38:41:51,  6.58it/s] 

{'loss': Array(0.14058553, dtype=float32), 'loss_cross_entropy': Array(0.13256912, dtype=float32)}
{'loss_inverse': Array(3.6171048e-07, dtype=float32)}


  8%|▊         | 83029/1000000 [3:55:45<28:14:47,  9.02it/s]

{'loss': Array(0.13068384, dtype=float32), 'loss_cross_entropy': Array(0.12256447, dtype=float32)}
{'loss_inverse': Array(1.567521e-08, dtype=float32)}


  8%|▊         | 83039/1000000 [3:55:47<30:18:58,  8.40it/s]

{'loss': Array(0.14677587, dtype=float32), 'loss_cross_entropy': Array(0.13861641, dtype=float32)}
{'loss_inverse': Array(2.2712553e-07, dtype=float32)}


  8%|▊         | 83049/1000000 [3:55:48<26:28:30,  9.62it/s]

{'loss': Array(0.14105816, dtype=float32), 'loss_cross_entropy': Array(0.13292734, dtype=float32)}
{'loss_inverse': Array(2.0170116e-06, dtype=float32)}


  8%|▊         | 83059/1000000 [3:55:50<38:13:55,  6.66it/s]

{'loss': Array(0.14409178, dtype=float32), 'loss_cross_entropy': Array(0.13555257, dtype=float32)}
{'loss_inverse': Array(1.8583114e-05, dtype=float32)}


  8%|▊         | 83069/1000000 [3:55:51<27:30:39,  9.26it/s]

{'loss': Array(0.13314709, dtype=float32), 'loss_cross_entropy': Array(0.12556231, dtype=float32)}
{'loss_inverse': Array(9.233332e-06, dtype=float32)}


  8%|▊         | 83079/1000000 [3:55:53<26:02:10,  9.78it/s]

{'loss': Array(0.13690124, dtype=float32), 'loss_cross_entropy': Array(0.12913509, dtype=float32)}
{'loss_inverse': Array(3.985918e-07, dtype=float32)}


  8%|▊         | 83089/1000000 [3:55:54<29:29:57,  8.63it/s]

{'loss': Array(0.12445678, dtype=float32), 'loss_cross_entropy': Array(0.11741783, dtype=float32)}
{'loss_inverse': Array(3.7831975e-05, dtype=float32)}


  8%|▊         | 83099/1000000 [3:55:56<25:56:37,  9.82it/s]

{'loss': Array(0.1338121, dtype=float32), 'loss_cross_entropy': Array(0.12627432, dtype=float32)}
{'loss_inverse': Array(9.5944815e-05, dtype=float32)}


  8%|▊         | 83109/1000000 [3:55:58<31:56:09,  7.98it/s]

{'loss': Array(0.13097052, dtype=float32), 'loss_cross_entropy': Array(0.12353253, dtype=float32)}
{'loss_inverse': Array(7.573134e-05, dtype=float32)}


  8%|▊         | 83119/1000000 [3:55:59<26:21:48,  9.66it/s]

{'loss': Array(0.1639972, dtype=float32), 'loss_cross_entropy': Array(0.15632205, dtype=float32)}
{'loss_inverse': Array(2.5310923e-05, dtype=float32)}


  8%|▊         | 83129/1000000 [3:56:01<31:22:12,  8.12it/s]

{'loss': Array(0.12742937, dtype=float32), 'loss_cross_entropy': Array(0.11959627, dtype=float32)}
{'loss_inverse': Array(1.9701083e-06, dtype=float32)}


  8%|▊         | 83139/1000000 [3:56:02<25:59:02,  9.80it/s]

{'loss': Array(0.1301805, dtype=float32), 'loss_cross_entropy': Array(0.12250801, dtype=float32)}
{'loss_inverse': Array(4.2334552e-05, dtype=float32)}


  8%|▊         | 83149/1000000 [3:56:04<26:34:31,  9.58it/s]

{'loss': Array(0.1369991, dtype=float32), 'loss_cross_entropy': Array(0.12928115, dtype=float32)}
{'loss_inverse': Array(5.0654002e-05, dtype=float32)}


  8%|▊         | 83159/1000000 [3:56:05<27:51:25,  9.14it/s]

{'loss': Array(0.14753962, dtype=float32), 'loss_cross_entropy': Array(0.13888156, dtype=float32)}
{'loss_inverse': Array(2.7884532e-06, dtype=float32)}


  8%|▊         | 83169/1000000 [3:56:07<25:33:32,  9.96it/s]

{'loss': Array(0.1364894, dtype=float32), 'loss_cross_entropy': Array(0.12935333, dtype=float32)}
{'loss_inverse': Array(9.376304e-06, dtype=float32)}


  8%|▊         | 83179/1000000 [3:56:09<32:07:07,  7.93it/s]

{'loss': Array(0.14260282, dtype=float32), 'loss_cross_entropy': Array(0.1346095, dtype=float32)}
{'loss_inverse': Array(2.0770649e-07, dtype=float32)}


  8%|▊         | 83189/1000000 [3:56:10<26:43:53,  9.53it/s]

{'loss': Array(0.13779436, dtype=float32), 'loss_cross_entropy': Array(0.12996368, dtype=float32)}
{'loss_inverse': Array(0.00011935, dtype=float32)}


  8%|▊         | 83199/1000000 [3:56:12<34:40:15,  7.35it/s]

{'loss': Array(0.15108489, dtype=float32), 'loss_cross_entropy': Array(0.14199053, dtype=float32)}
{'loss_inverse': Array(7.5296606e-07, dtype=float32)}


  8%|▊         | 83209/1000000 [3:56:13<26:55:53,  9.46it/s]

{'loss': Array(0.15103845, dtype=float32), 'loss_cross_entropy': Array(0.14332914, dtype=float32)}
{'loss_inverse': Array(1.7207352e-06, dtype=float32)}


  8%|▊         | 83219/1000000 [3:56:15<25:17:22, 10.07it/s]

{'loss': Array(0.1582813, dtype=float32), 'loss_cross_entropy': Array(0.14914215, dtype=float32)}
{'loss_inverse': Array(6.373131e-06, dtype=float32)}


  8%|▊         | 83229/1000000 [3:56:16<29:21:56,  8.67it/s]

{'loss': Array(0.14236812, dtype=float32), 'loss_cross_entropy': Array(0.13426065, dtype=float32)}
{'loss_inverse': Array(0.00042866, dtype=float32)}


  8%|▊         | 83239/1000000 [3:56:18<25:28:14, 10.00it/s]

{'loss': Array(0.1421554, dtype=float32), 'loss_cross_entropy': Array(0.13421367, dtype=float32)}
{'loss_inverse': Array(3.1755757e-05, dtype=float32)}


  8%|▊         | 83249/1000000 [3:56:20<33:51:03,  7.52it/s]

{'loss': Array(0.13433546, dtype=float32), 'loss_cross_entropy': Array(0.12657648, dtype=float32)}
{'loss_inverse': Array(7.5692365e-06, dtype=float32)}


  8%|▊         | 83259/1000000 [3:56:21<29:03:07,  8.77it/s]

{'loss': Array(0.12949055, dtype=float32), 'loss_cross_entropy': Array(0.12191031, dtype=float32)}
{'loss_inverse': Array(6.7710374e-05, dtype=float32)}


  8%|▊         | 83269/1000000 [3:56:23<34:33:28,  7.37it/s]

{'loss': Array(0.14725806, dtype=float32), 'loss_cross_entropy': Array(0.13879542, dtype=float32)}
{'loss_inverse': Array(8.289e-06, dtype=float32)}


  8%|▊         | 83279/1000000 [3:56:24<26:48:28,  9.50it/s]

{'loss': Array(0.1524424, dtype=float32), 'loss_cross_entropy': Array(0.14385192, dtype=float32)}
{'loss_inverse': Array(0.00021174, dtype=float32)}


  8%|▊         | 83289/1000000 [3:56:26<25:16:21, 10.08it/s]

{'loss': Array(0.14249542, dtype=float32), 'loss_cross_entropy': Array(0.13369773, dtype=float32)}
{'loss_inverse': Array(8.156526e-07, dtype=float32)}


  8%|▊         | 83299/1000000 [3:56:27<28:04:49,  9.07it/s]

{'loss': Array(0.15489873, dtype=float32), 'loss_cross_entropy': Array(0.14661397, dtype=float32)}
{'loss_inverse': Array(1.6927628e-05, dtype=float32)}


  8%|▊         | 83309/1000000 [3:56:29<25:35:01,  9.95it/s]

{'loss': Array(0.13670708, dtype=float32), 'loss_cross_entropy': Array(0.12839529, dtype=float32)}
{'loss_inverse': Array(0.0001348, dtype=float32)}


  8%|▊         | 83319/1000000 [3:56:31<38:49:43,  6.56it/s]

{'loss': Array(0.14947286, dtype=float32), 'loss_cross_entropy': Array(0.14137413, dtype=float32)}
{'loss_inverse': Array(6.45194e-05, dtype=float32)}


  8%|▊         | 83329/1000000 [3:56:32<27:21:46,  9.31it/s]

{'loss': Array(0.15802449, dtype=float32), 'loss_cross_entropy': Array(0.1496308, dtype=float32)}
{'loss_inverse': Array(5.728663e-05, dtype=float32)}


  8%|▊         | 83339/1000000 [3:56:34<25:38:23,  9.93it/s]

{'loss': Array(0.14992195, dtype=float32), 'loss_cross_entropy': Array(0.14147243, dtype=float32)}
{'loss_inverse': Array(0.00017706, dtype=float32)}


  8%|▊         | 83349/1000000 [3:56:35<29:32:19,  8.62it/s]

{'loss': Array(0.12614337, dtype=float32), 'loss_cross_entropy': Array(0.11860313, dtype=float32)}
{'loss_inverse': Array(0.00017027, dtype=float32)}


  8%|▊         | 83359/1000000 [3:56:37<26:08:49,  9.74it/s]

{'loss': Array(0.13206741, dtype=float32), 'loss_cross_entropy': Array(0.12480579, dtype=float32)}
{'loss_inverse': Array(2.9937906e-05, dtype=float32)}


  8%|▊         | 83369/1000000 [3:56:39<29:33:00,  8.62it/s]

{'loss': Array(0.14816226, dtype=float32), 'loss_cross_entropy': Array(0.139982, dtype=float32)}
{'loss_inverse': Array(4.9150964e-05, dtype=float32)}


  8%|▊         | 83379/1000000 [3:56:40<28:11:51,  9.03it/s]

{'loss': Array(0.15823439, dtype=float32), 'loss_cross_entropy': Array(0.149769, dtype=float32)}
{'loss_inverse': Array(3.9837603e-05, dtype=float32)}


  8%|▊         | 83389/1000000 [3:56:42<32:03:57,  7.94it/s]

{'loss': Array(0.15963285, dtype=float32), 'loss_cross_entropy': Array(0.15090439, dtype=float32)}
{'loss_inverse': Array(0.00037685, dtype=float32)}


  8%|▊         | 83399/1000000 [3:56:43<26:11:27,  9.72it/s]

{'loss': Array(0.15002444, dtype=float32), 'loss_cross_entropy': Array(0.14098243, dtype=float32)}
{'loss_inverse': Array(0.00033215, dtype=float32)}


  8%|▊         | 83409/1000000 [3:56:45<25:46:18,  9.88it/s]

{'loss': Array(0.14905947, dtype=float32), 'loss_cross_entropy': Array(0.1407893, dtype=float32)}
{'loss_inverse': Array(0.00025698, dtype=float32)}


  8%|▊         | 83419/1000000 [3:56:46<29:57:54,  8.50it/s]

{'loss': Array(0.15250263, dtype=float32), 'loss_cross_entropy': Array(0.1442795, dtype=float32)}
{'loss_inverse': Array(8.1967204e-07, dtype=float32)}


  8%|▊         | 83429/1000000 [3:56:48<25:53:46,  9.83it/s]

{'loss': Array(0.1289899, dtype=float32), 'loss_cross_entropy': Array(0.12140894, dtype=float32)}
{'loss_inverse': Array(0.00014563, dtype=float32)}


  8%|▊         | 83439/1000000 [3:56:50<34:43:38,  7.33it/s]

{'loss': Array(0.13654618, dtype=float32), 'loss_cross_entropy': Array(0.12867717, dtype=float32)}
{'loss_inverse': Array(0.00013544, dtype=float32)}


  8%|▊         | 83449/1000000 [3:56:51<26:49:05,  9.49it/s]

{'loss': Array(0.15090655, dtype=float32), 'loss_cross_entropy': Array(0.14259614, dtype=float32)}
{'loss_inverse': Array(4.4674694e-06, dtype=float32)}


  8%|▊         | 83459/1000000 [3:56:53<39:01:46,  6.52it/s]

{'loss': Array(0.15462881, dtype=float32), 'loss_cross_entropy': Array(0.14647892, dtype=float32)}
{'loss_inverse': Array(0.00011599, dtype=float32)}


  8%|▊         | 83469/1000000 [3:56:54<27:14:30,  9.35it/s]

{'loss': Array(0.13703077, dtype=float32), 'loss_cross_entropy': Array(0.12957364, dtype=float32)}
{'loss_inverse': Array(0.00016789, dtype=float32)}


  8%|▊         | 83479/1000000 [3:56:56<38:45:08,  6.57it/s]

{'loss': Array(0.13481124, dtype=float32), 'loss_cross_entropy': Array(0.12684987, dtype=float32)}
{'loss_inverse': Array(0.0002394, dtype=float32)}


  8%|▊         | 83489/1000000 [3:56:58<27:15:52,  9.34it/s]

{'loss': Array(0.15504074, dtype=float32), 'loss_cross_entropy': Array(0.14652899, dtype=float32)}
{'loss_inverse': Array(3.4842622e-05, dtype=float32)}


  8%|▊         | 83499/1000000 [3:56:59<25:18:09, 10.06it/s]

{'loss': Array(0.1475282, dtype=float32), 'loss_cross_entropy': Array(0.1395065, dtype=float32)}
{'loss_inverse': Array(9.8028686e-05, dtype=float32)}


  8%|▊         | 83509/1000000 [3:57:07<93:11:38,  2.73it/s] 

{'loss': Array(0.15308806, dtype=float32), 'loss_cross_entropy': Array(0.14467435, dtype=float32)}
{'loss_inverse': Array(9.1809835e-07, dtype=float32)}


  8%|▊         | 83519/1000000 [3:57:09<36:51:41,  6.91it/s] 

{'loss': Array(0.14294295, dtype=float32), 'loss_cross_entropy': Array(0.13474792, dtype=float32)}
{'loss_inverse': Array(2.0015164e-05, dtype=float32)}


  8%|▊         | 83529/1000000 [3:57:10<27:22:05,  9.30it/s]

{'loss': Array(0.15499425, dtype=float32), 'loss_cross_entropy': Array(0.14676653, dtype=float32)}
{'loss_inverse': Array(1.4318806e-06, dtype=float32)}


  8%|▊         | 83539/1000000 [3:57:12<28:20:00,  8.98it/s]

{'loss': Array(0.17233558, dtype=float32), 'loss_cross_entropy': Array(0.16253935, dtype=float32)}
{'loss_inverse': Array(0.00020158, dtype=float32)}


  8%|▊         | 83549/1000000 [3:57:13<25:51:02,  9.85it/s]

{'loss': Array(0.14029053, dtype=float32), 'loss_cross_entropy': Array(0.13202496, dtype=float32)}
{'loss_inverse': Array(1.9841607e-05, dtype=float32)}


  8%|▊         | 83559/1000000 [3:57:15<31:55:44,  7.97it/s]

{'loss': Array(0.14648601, dtype=float32), 'loss_cross_entropy': Array(0.13821994, dtype=float32)}
{'loss_inverse': Array(0.0005446, dtype=float32)}


  8%|▊         | 83569/1000000 [3:57:17<26:40:01,  9.55it/s]

{'loss': Array(0.15578632, dtype=float32), 'loss_cross_entropy': Array(0.1475396, dtype=float32)}
{'loss_inverse': Array(0.00010731, dtype=float32)}


  8%|▊         | 83579/1000000 [3:57:18<32:14:28,  7.90it/s]

{'loss': Array(0.1388642, dtype=float32), 'loss_cross_entropy': Array(0.13136522, dtype=float32)}
{'loss_inverse': Array(0.00018335, dtype=float32)}


  8%|▊         | 83589/1000000 [3:57:20<26:23:40,  9.64it/s]

{'loss': Array(0.13750593, dtype=float32), 'loss_cross_entropy': Array(0.12963645, dtype=float32)}
{'loss_inverse': Array(2.0373094e-05, dtype=float32)}


  8%|▊         | 83599/1000000 [3:57:22<31:19:52,  8.12it/s]

{'loss': Array(0.15396328, dtype=float32), 'loss_cross_entropy': Array(0.14582752, dtype=float32)}
{'loss_inverse': Array(4.8359234e-06, dtype=float32)}


  8%|▊         | 83609/1000000 [3:57:23<26:15:31,  9.69it/s]

{'loss': Array(0.1595733, dtype=float32), 'loss_cross_entropy': Array(0.15109059, dtype=float32)}
{'loss_inverse': Array(8.125705e-06, dtype=float32)}


  8%|▊         | 83619/1000000 [3:57:25<34:30:44,  7.38it/s]

{'loss': Array(0.15851326, dtype=float32), 'loss_cross_entropy': Array(0.15024607, dtype=float32)}
{'loss_inverse': Array(7.235627e-07, dtype=float32)}


  8%|▊         | 83629/1000000 [3:57:26<26:48:40,  9.49it/s]

{'loss': Array(0.15682642, dtype=float32), 'loss_cross_entropy': Array(0.14770305, dtype=float32)}
{'loss_inverse': Array(2.1553394e-06, dtype=float32)}


  8%|▊         | 83639/1000000 [3:57:28<38:26:43,  6.62it/s]

{'loss': Array(0.15305282, dtype=float32), 'loss_cross_entropy': Array(0.14435415, dtype=float32)}
{'loss_inverse': Array(8.593292e-05, dtype=float32)}


  8%|▊         | 83649/1000000 [3:57:29<27:26:19,  9.28it/s]

{'loss': Array(0.15417723, dtype=float32), 'loss_cross_entropy': Array(0.14538209, dtype=float32)}
{'loss_inverse': Array(4.3447097e-05, dtype=float32)}


  8%|▊         | 83659/1000000 [3:57:31<38:00:59,  6.70it/s]

{'loss': Array(0.16090009, dtype=float32), 'loss_cross_entropy': Array(0.15198272, dtype=float32)}
{'loss_inverse': Array(4.287999e-06, dtype=float32)}


  8%|▊         | 83669/1000000 [3:57:33<27:12:41,  9.35it/s]

{'loss': Array(0.1645659, dtype=float32), 'loss_cross_entropy': Array(0.15575925, dtype=float32)}
{'loss_inverse': Array(1.06476755e-05, dtype=float32)}


  8%|▊         | 83679/1000000 [3:57:34<26:21:40,  9.66it/s]

{'loss': Array(0.15123574, dtype=float32), 'loss_cross_entropy': Array(0.14303316, dtype=float32)}
{'loss_inverse': Array(1.6179527e-07, dtype=float32)}


  8%|▊         | 83689/1000000 [3:57:36<31:50:16,  7.99it/s]

{'loss': Array(0.14033246, dtype=float32), 'loss_cross_entropy': Array(0.13230889, dtype=float32)}
{'loss_inverse': Array(6.2679413e-07, dtype=float32)}


  8%|▊         | 83699/1000000 [3:57:37<26:15:57,  9.69it/s]

{'loss': Array(0.13384722, dtype=float32), 'loss_cross_entropy': Array(0.12585822, dtype=float32)}
{'loss_inverse': Array(9.3597235e-05, dtype=float32)}


  8%|▊         | 83709/1000000 [3:57:39<34:33:53,  7.36it/s]

{'loss': Array(0.15609215, dtype=float32), 'loss_cross_entropy': Array(0.14728792, dtype=float32)}
{'loss_inverse': Array(3.5327577e-07, dtype=float32)}


  8%|▊         | 83719/1000000 [3:57:41<27:32:19,  9.24it/s]

{'loss': Array(0.13280931, dtype=float32), 'loss_cross_entropy': Array(0.12501065, dtype=float32)}
{'loss_inverse': Array(2.6238768e-06, dtype=float32)}


  8%|▊         | 83729/1000000 [3:57:42<34:41:52,  7.34it/s]

{'loss': Array(0.16897544, dtype=float32), 'loss_cross_entropy': Array(0.1597369, dtype=float32)}
{'loss_inverse': Array(0.00037624, dtype=float32)}


  8%|▊         | 83739/1000000 [3:57:44<26:51:47,  9.47it/s]

{'loss': Array(0.15099363, dtype=float32), 'loss_cross_entropy': Array(0.14216033, dtype=float32)}
{'loss_inverse': Array(0.00014217, dtype=float32)}


  8%|▊         | 83749/1000000 [3:57:46<38:18:51,  6.64it/s]

{'loss': Array(0.16002399, dtype=float32), 'loss_cross_entropy': Array(0.15148887, dtype=float32)}
{'loss_inverse': Array(3.360877e-06, dtype=float32)}


  8%|▊         | 83759/1000000 [3:57:47<27:05:09,  9.40it/s]

{'loss': Array(0.13956678, dtype=float32), 'loss_cross_entropy': Array(0.13155018, dtype=float32)}
{'loss_inverse': Array(3.7939346e-06, dtype=float32)}


  8%|▊         | 83769/1000000 [3:57:48<25:33:36,  9.96it/s]

{'loss': Array(0.14486955, dtype=float32), 'loss_cross_entropy': Array(0.1364927, dtype=float32)}
{'loss_inverse': Array(1.8206898e-05, dtype=float32)}


  8%|▊         | 83779/1000000 [3:57:50<31:22:24,  8.11it/s]

{'loss': Array(0.1428561, dtype=float32), 'loss_cross_entropy': Array(0.13497083, dtype=float32)}
{'loss_inverse': Array(0.00027005, dtype=float32)}


  8%|▊         | 83789/1000000 [3:57:52<25:59:11,  9.79it/s]

{'loss': Array(0.14280283, dtype=float32), 'loss_cross_entropy': Array(0.13474096, dtype=float32)}
{'loss_inverse': Array(2.9229446e-05, dtype=float32)}


  8%|▊         | 83799/1000000 [3:57:53<38:15:43,  6.65it/s]

{'loss': Array(0.13428576, dtype=float32), 'loss_cross_entropy': Array(0.12577349, dtype=float32)}
{'loss_inverse': Array(0.00040997, dtype=float32)}


  8%|▊         | 83809/1000000 [3:57:55<27:34:49,  9.23it/s]

{'loss': Array(0.11986595, dtype=float32), 'loss_cross_entropy': Array(0.11280216, dtype=float32)}
{'loss_inverse': Array(8.993359e-08, dtype=float32)}


  8%|▊         | 83819/1000000 [3:57:56<25:45:35,  9.88it/s]

{'loss': Array(0.13032213, dtype=float32), 'loss_cross_entropy': Array(0.12194271, dtype=float32)}
{'loss_inverse': Array(1.2117113e-05, dtype=float32)}


  8%|▊         | 83829/1000000 [3:57:58<29:20:23,  8.67it/s]

{'loss': Array(0.14663585, dtype=float32), 'loss_cross_entropy': Array(0.13870196, dtype=float32)}
{'loss_inverse': Array(2.7594486e-05, dtype=float32)}


  8%|▊         | 83839/1000000 [3:57:59<25:55:02,  9.82it/s]

{'loss': Array(0.14785793, dtype=float32), 'loss_cross_entropy': Array(0.13965467, dtype=float32)}
{'loss_inverse': Array(1.5675553e-07, dtype=float32)}


  8%|▊         | 83849/1000000 [3:58:01<29:33:14,  8.61it/s]

{'loss': Array(0.14284587, dtype=float32), 'loss_cross_entropy': Array(0.13509852, dtype=float32)}
{'loss_inverse': Array(2.0538826e-05, dtype=float32)}


  8%|▊         | 83859/1000000 [3:58:03<25:40:57,  9.91it/s]

{'loss': Array(0.13972996, dtype=float32), 'loss_cross_entropy': Array(0.13180676, dtype=float32)}
{'loss_inverse': Array(2.4767052e-05, dtype=float32)}


  8%|▊         | 83869/1000000 [3:58:04<31:09:34,  8.17it/s]

{'loss': Array(0.1482567, dtype=float32), 'loss_cross_entropy': Array(0.14021198, dtype=float32)}
{'loss_inverse': Array(5.29095e-07, dtype=float32)}


  8%|▊         | 83879/1000000 [3:58:06<26:07:01,  9.74it/s]

{'loss': Array(0.1382138, dtype=float32), 'loss_cross_entropy': Array(0.13082232, dtype=float32)}
{'loss_inverse': Array(1.2229955e-07, dtype=float32)}


  8%|▊         | 83889/1000000 [3:58:07<31:39:49,  8.04it/s]

{'loss': Array(0.13939507, dtype=float32), 'loss_cross_entropy': Array(0.1311653, dtype=float32)}
{'loss_inverse': Array(5.748825e-06, dtype=float32)}


  8%|▊         | 83899/1000000 [3:58:09<26:59:02,  9.43it/s]

{'loss': Array(0.14409289, dtype=float32), 'loss_cross_entropy': Array(0.13591902, dtype=float32)}
{'loss_inverse': Array(0.00049232, dtype=float32)}


  8%|▊         | 83909/1000000 [3:58:10<25:22:30, 10.03it/s]

{'loss': Array(0.1398273, dtype=float32), 'loss_cross_entropy': Array(0.13176292, dtype=float32)}
{'loss_inverse': Array(0.0005245, dtype=float32)}


  8%|▊         | 83919/1000000 [3:58:12<27:48:49,  9.15it/s]

{'loss': Array(0.13782434, dtype=float32), 'loss_cross_entropy': Array(0.13014963, dtype=float32)}
{'loss_inverse': Array(2.6504216e-05, dtype=float32)}


  8%|▊         | 83929/1000000 [3:58:14<26:08:49,  9.73it/s]

{'loss': Array(0.15014242, dtype=float32), 'loss_cross_entropy': Array(0.14204982, dtype=float32)}
{'loss_inverse': Array(2.5428105e-07, dtype=float32)}


  8%|▊         | 83939/1000000 [3:58:15<29:40:04,  8.58it/s]

{'loss': Array(0.14500977, dtype=float32), 'loss_cross_entropy': Array(0.13674551, dtype=float32)}
{'loss_inverse': Array(2.6352363e-06, dtype=float32)}


  8%|▊         | 83949/1000000 [3:58:17<25:52:10,  9.84it/s]

{'loss': Array(0.15387934, dtype=float32), 'loss_cross_entropy': Array(0.14581726, dtype=float32)}
{'loss_inverse': Array(4.510815e-06, dtype=float32)}


  8%|▊         | 83959/1000000 [3:58:19<31:40:43,  8.03it/s]

{'loss': Array(0.1381372, dtype=float32), 'loss_cross_entropy': Array(0.13025944, dtype=float32)}
{'loss_inverse': Array(0.00010176, dtype=float32)}


  8%|▊         | 83969/1000000 [3:58:20<27:40:46,  9.19it/s]

{'loss': Array(0.13729261, dtype=float32), 'loss_cross_entropy': Array(0.12991832, dtype=float32)}
{'loss_inverse': Array(2.4734065e-06, dtype=float32)}


  8%|▊         | 83979/1000000 [3:58:21<25:18:36, 10.05it/s]

{'loss': Array(0.12803526, dtype=float32), 'loss_cross_entropy': Array(0.12007164, dtype=float32)}
{'loss_inverse': Array(8.964643e-06, dtype=float32)}


  8%|▊         | 83989/1000000 [3:58:23<29:12:31,  8.71it/s]

{'loss': Array(0.12789306, dtype=float32), 'loss_cross_entropy': Array(0.12000079, dtype=float32)}
{'loss_inverse': Array(1.2148185e-05, dtype=float32)}


  8%|▊         | 83999/1000000 [3:58:25<25:42:03,  9.90it/s]

{'loss': Array(0.13879213, dtype=float32), 'loss_cross_entropy': Array(0.13054278, dtype=float32)}
{'loss_inverse': Array(7.628472e-07, dtype=float32)}


  8%|▊         | 84009/1000000 [3:58:33<95:28:16,  2.67it/s] 

{'loss': Array(0.15242401, dtype=float32), 'loss_cross_entropy': Array(0.14406534, dtype=float32)}
{'loss_inverse': Array(2.1303666e-07, dtype=float32)}


  8%|▊         | 84019/1000000 [3:58:34<36:53:01,  6.90it/s] 

{'loss': Array(0.14366148, dtype=float32), 'loss_cross_entropy': Array(0.1355497, dtype=float32)}
{'loss_inverse': Array(5.551376e-05, dtype=float32)}


  8%|▊         | 84029/1000000 [3:58:36<39:14:36,  6.48it/s]

{'loss': Array(0.14747506, dtype=float32), 'loss_cross_entropy': Array(0.13938366, dtype=float32)}
{'loss_inverse': Array(2.4629813e-07, dtype=float32)}


  8%|▊         | 84039/1000000 [3:58:38<26:51:27,  9.47it/s]

{'loss': Array(0.14171323, dtype=float32), 'loss_cross_entropy': Array(0.13377513, dtype=float32)}
{'loss_inverse': Array(0.00010097, dtype=float32)}


  8%|▊         | 84049/1000000 [3:58:39<25:46:19,  9.87it/s]

{'loss': Array(0.14780916, dtype=float32), 'loss_cross_entropy': Array(0.13900052, dtype=float32)}
{'loss_inverse': Array(0.00023378, dtype=float32)}


  8%|▊         | 84059/1000000 [3:58:41<27:57:52,  9.10it/s]

{'loss': Array(0.15008722, dtype=float32), 'loss_cross_entropy': Array(0.14171098, dtype=float32)}
{'loss_inverse': Array(7.572187e-06, dtype=float32)}


  8%|▊         | 84069/1000000 [3:58:42<25:21:38, 10.03it/s]

{'loss': Array(0.15427418, dtype=float32), 'loss_cross_entropy': Array(0.14548473, dtype=float32)}
{'loss_inverse': Array(1.0770925e-05, dtype=float32)}


  8%|▊         | 84079/1000000 [3:58:44<30:16:59,  8.40it/s]

{'loss': Array(0.13988316, dtype=float32), 'loss_cross_entropy': Array(0.13176543, dtype=float32)}
{'loss_inverse': Array(4.834344e-06, dtype=float32)}


  8%|▊         | 84089/1000000 [3:58:45<25:21:33, 10.03it/s]

{'loss': Array(0.14136471, dtype=float32), 'loss_cross_entropy': Array(0.13298799, dtype=float32)}
{'loss_inverse': Array(8.039295e-05, dtype=float32)}


  8%|▊         | 84099/1000000 [3:58:47<38:07:48,  6.67it/s]

{'loss': Array(0.14004245, dtype=float32), 'loss_cross_entropy': Array(0.13193037, dtype=float32)}
{'loss_inverse': Array(0.00010849, dtype=float32)}


  8%|▊         | 84109/1000000 [3:58:49<27:10:33,  9.36it/s]

{'loss': Array(0.14196935, dtype=float32), 'loss_cross_entropy': Array(0.13406216, dtype=float32)}
{'loss_inverse': Array(1.1289868e-05, dtype=float32)}


  8%|▊         | 84119/1000000 [3:58:50<25:25:32, 10.01it/s]

{'loss': Array(0.1605141, dtype=float32), 'loss_cross_entropy': Array(0.1514954, dtype=float32)}
{'loss_inverse': Array(6.120665e-06, dtype=float32)}


  8%|▊         | 84129/1000000 [3:58:52<29:42:30,  8.56it/s]

{'loss': Array(0.15238145, dtype=float32), 'loss_cross_entropy': Array(0.14401734, dtype=float32)}
{'loss_inverse': Array(5.571371e-06, dtype=float32)}


  8%|▊         | 84139/1000000 [3:58:53<25:43:05,  9.89it/s]

{'loss': Array(0.14686435, dtype=float32), 'loss_cross_entropy': Array(0.13853747, dtype=float32)}
{'loss_inverse': Array(6.5923535e-07, dtype=float32)}


  8%|▊         | 84149/1000000 [3:58:55<30:53:23,  8.24it/s]

{'loss': Array(0.15615061, dtype=float32), 'loss_cross_entropy': Array(0.14791064, dtype=float32)}
{'loss_inverse': Array(0.00037452, dtype=float32)}


  8%|▊         | 84159/1000000 [3:58:56<26:42:09,  9.53it/s]

{'loss': Array(0.16095096, dtype=float32), 'loss_cross_entropy': Array(0.15200996, dtype=float32)}
{'loss_inverse': Array(0.00030643, dtype=float32)}


  8%|▊         | 84169/1000000 [3:58:58<32:30:48,  7.82it/s]

{'loss': Array(0.13240771, dtype=float32), 'loss_cross_entropy': Array(0.12484733, dtype=float32)}
{'loss_inverse': Array(1.7905153e-06, dtype=float32)}


  8%|▊         | 84179/1000000 [3:59:00<27:00:14,  9.42it/s]

{'loss': Array(0.13357641, dtype=float32), 'loss_cross_entropy': Array(0.12655926, dtype=float32)}
{'loss_inverse': Array(8.2295934e-05, dtype=float32)}


  8%|▊         | 84189/1000000 [3:59:01<25:16:07, 10.07it/s]

{'loss': Array(0.15948175, dtype=float32), 'loss_cross_entropy': Array(0.15070973, dtype=float32)}
{'loss_inverse': Array(1.0548548e-05, dtype=float32)}


  8%|▊         | 84199/1000000 [3:59:03<28:11:22,  9.02it/s]

{'loss': Array(0.15979932, dtype=float32), 'loss_cross_entropy': Array(0.15038681, dtype=float32)}
{'loss_inverse': Array(8.212296e-06, dtype=float32)}


  8%|▊         | 84209/1000000 [3:59:04<25:33:09,  9.96it/s]

{'loss': Array(0.14058635, dtype=float32), 'loss_cross_entropy': Array(0.13284917, dtype=float32)}
{'loss_inverse': Array(4.0626983e-06, dtype=float32)}


  8%|▊         | 84219/1000000 [3:59:06<31:52:27,  7.98it/s]

{'loss': Array(0.14111887, dtype=float32), 'loss_cross_entropy': Array(0.13354038, dtype=float32)}
{'loss_inverse': Array(3.370644e-05, dtype=float32)}


  8%|▊         | 84229/1000000 [3:59:08<27:26:44,  9.27it/s]

{'loss': Array(0.14970595, dtype=float32), 'loss_cross_entropy': Array(0.14123012, dtype=float32)}
{'loss_inverse': Array(8.294175e-05, dtype=float32)}


  8%|▊         | 84239/1000000 [3:59:09<35:31:31,  7.16it/s]

{'loss': Array(0.132398, dtype=float32), 'loss_cross_entropy': Array(0.12444132, dtype=float32)}
{'loss_inverse': Array(1.23461205e-05, dtype=float32)}


  8%|▊         | 84249/1000000 [3:59:11<26:50:01,  9.48it/s]

{'loss': Array(0.15466698, dtype=float32), 'loss_cross_entropy': Array(0.14578511, dtype=float32)}
{'loss_inverse': Array(3.5838614e-06, dtype=float32)}


  8%|▊         | 84259/1000000 [3:59:12<25:04:41, 10.14it/s]

{'loss': Array(0.13504575, dtype=float32), 'loss_cross_entropy': Array(0.12657969, dtype=float32)}
{'loss_inverse': Array(3.0357169e-06, dtype=float32)}


  8%|▊         | 84269/1000000 [3:59:14<29:23:06,  8.66it/s]

{'loss': Array(0.12867239, dtype=float32), 'loss_cross_entropy': Array(0.12146448, dtype=float32)}
{'loss_inverse': Array(4.1424675e-05, dtype=float32)}


  8%|▊         | 84279/1000000 [3:59:15<26:31:20,  9.59it/s]

{'loss': Array(0.14905731, dtype=float32), 'loss_cross_entropy': Array(0.14088042, dtype=float32)}
{'loss_inverse': Array(2.0485932e-07, dtype=float32)}


  8%|▊         | 84289/1000000 [3:59:17<34:15:36,  7.42it/s]

{'loss': Array(0.13236028, dtype=float32), 'loss_cross_entropy': Array(0.12435324, dtype=float32)}
{'loss_inverse': Array(6.217852e-05, dtype=float32)}


  8%|▊         | 84299/1000000 [3:59:19<26:31:40,  9.59it/s]

{'loss': Array(0.15134224, dtype=float32), 'loss_cross_entropy': Array(0.14282519, dtype=float32)}
{'loss_inverse': Array(1.6595497e-05, dtype=float32)}


  8%|▊         | 84309/1000000 [3:59:20<34:23:03,  7.40it/s]

{'loss': Array(0.12441702, dtype=float32), 'loss_cross_entropy': Array(0.11674543, dtype=float32)}
{'loss_inverse': Array(2.1182836e-07, dtype=float32)}


  8%|▊         | 84319/1000000 [3:59:22<26:39:01,  9.54it/s]

{'loss': Array(0.13374136, dtype=float32), 'loss_cross_entropy': Array(0.1264037, dtype=float32)}
{'loss_inverse': Array(1.6174636e-06, dtype=float32)}


  8%|▊         | 84329/1000000 [3:59:23<24:50:41, 10.24it/s]

{'loss': Array(0.14984134, dtype=float32), 'loss_cross_entropy': Array(0.14168404, dtype=float32)}
{'loss_inverse': Array(6.7819275e-07, dtype=float32)}


  8%|▊         | 84339/1000000 [3:59:25<27:54:56,  9.11it/s]

{'loss': Array(0.1366393, dtype=float32), 'loss_cross_entropy': Array(0.1290424, dtype=float32)}
{'loss_inverse': Array(3.4821671e-06, dtype=float32)}


  8%|▊         | 84349/1000000 [3:59:26<25:34:20,  9.95it/s]

{'loss': Array(0.14988326, dtype=float32), 'loss_cross_entropy': Array(0.14132154, dtype=float32)}
{'loss_inverse': Array(0.0002446, dtype=float32)}


  8%|▊         | 84359/1000000 [3:59:28<40:00:31,  6.36it/s]

{'loss': Array(0.16146712, dtype=float32), 'loss_cross_entropy': Array(0.15296851, dtype=float32)}
{'loss_inverse': Array(0.00037861, dtype=float32)}


  8%|▊         | 84369/1000000 [3:59:30<28:05:32,  9.05it/s]

{'loss': Array(0.13713932, dtype=float32), 'loss_cross_entropy': Array(0.12930368, dtype=float32)}
{'loss_inverse': Array(4.182203e-06, dtype=float32)}


  8%|▊         | 84379/1000000 [3:59:31<25:44:00,  9.88it/s]

{'loss': Array(0.14479011, dtype=float32), 'loss_cross_entropy': Array(0.13649791, dtype=float32)}
{'loss_inverse': Array(3.58733e-07, dtype=float32)}


  8%|▊         | 84389/1000000 [3:59:33<29:13:08,  8.70it/s]

{'loss': Array(0.13577451, dtype=float32), 'loss_cross_entropy': Array(0.1276159, dtype=float32)}
{'loss_inverse': Array(5.371002e-06, dtype=float32)}


  8%|▊         | 84399/1000000 [3:59:34<25:19:59, 10.04it/s]

{'loss': Array(0.14987971, dtype=float32), 'loss_cross_entropy': Array(0.14141105, dtype=float32)}
{'loss_inverse': Array(2.560373e-05, dtype=float32)}


  8%|▊         | 84409/1000000 [3:59:36<29:23:51,  8.65it/s]

{'loss': Array(0.13765228, dtype=float32), 'loss_cross_entropy': Array(0.13031097, dtype=float32)}
{'loss_inverse': Array(7.656578e-07, dtype=float32)}


  8%|▊         | 84419/1000000 [3:59:37<25:34:27,  9.94it/s]

{'loss': Array(0.14167665, dtype=float32), 'loss_cross_entropy': Array(0.13306454, dtype=float32)}
{'loss_inverse': Array(2.5542337e-07, dtype=float32)}


  8%|▊         | 84429/1000000 [3:59:39<32:25:37,  7.84it/s]

{'loss': Array(0.13633363, dtype=float32), 'loss_cross_entropy': Array(0.12814048, dtype=float32)}
{'loss_inverse': Array(1.8124409e-05, dtype=float32)}


  8%|▊         | 84439/1000000 [3:59:41<25:56:18,  9.80it/s]

{'loss': Array(0.14546677, dtype=float32), 'loss_cross_entropy': Array(0.13677068, dtype=float32)}
{'loss_inverse': Array(7.63467e-08, dtype=float32)}


  8%|▊         | 84449/1000000 [3:59:42<24:38:41, 10.32it/s]

{'loss': Array(0.15515755, dtype=float32), 'loss_cross_entropy': Array(0.14646094, dtype=float32)}
{'loss_inverse': Array(1.035143e-07, dtype=float32)}


  8%|▊         | 84459/1000000 [3:59:44<29:07:03,  8.73it/s]

{'loss': Array(0.15213573, dtype=float32), 'loss_cross_entropy': Array(0.14369047, dtype=float32)}
{'loss_inverse': Array(1.7704709e-06, dtype=float32)}


  8%|▊         | 84469/1000000 [3:59:45<25:37:06,  9.93it/s]

{'loss': Array(0.1482444, dtype=float32), 'loss_cross_entropy': Array(0.13948679, dtype=float32)}
{'loss_inverse': Array(1.809151e-05, dtype=float32)}


  8%|▊         | 84479/1000000 [3:59:47<34:13:19,  7.43it/s]

{'loss': Array(0.1451803, dtype=float32), 'loss_cross_entropy': Array(0.13766545, dtype=float32)}
{'loss_inverse': Array(2.1767837e-05, dtype=float32)}


  8%|▊         | 84489/1000000 [3:59:48<26:15:30,  9.68it/s]

{'loss': Array(0.15951796, dtype=float32), 'loss_cross_entropy': Array(0.15014182, dtype=float32)}
{'loss_inverse': Array(4.6053984e-07, dtype=float32)}


  8%|▊         | 84499/1000000 [3:59:50<37:59:44,  6.69it/s]

{'loss': Array(0.1438322, dtype=float32), 'loss_cross_entropy': Array(0.13575143, dtype=float32)}
{'loss_inverse': Array(4.0356727e-07, dtype=float32)}


  8%|▊         | 84509/1000000 [3:59:58<87:00:16,  2.92it/s] 

{'loss': Array(0.14794381, dtype=float32), 'loss_cross_entropy': Array(0.13888758, dtype=float32)}
{'loss_inverse': Array(1.0795819e-06, dtype=float32)}


  8%|▊         | 84519/1000000 [4:00:00<35:48:06,  7.10it/s]

{'loss': Array(0.14377785, dtype=float32), 'loss_cross_entropy': Array(0.13599704, dtype=float32)}
{'loss_inverse': Array(3.880468e-07, dtype=float32)}


  8%|▊         | 84529/1000000 [4:00:01<30:56:58,  8.22it/s]

{'loss': Array(0.15262166, dtype=float32), 'loss_cross_entropy': Array(0.14399128, dtype=float32)}
{'loss_inverse': Array(5.5199996e-07, dtype=float32)}


  8%|▊         | 84539/1000000 [4:00:03<26:42:21,  9.52it/s]

{'loss': Array(0.14215663, dtype=float32), 'loss_cross_entropy': Array(0.13437004, dtype=float32)}
{'loss_inverse': Array(8.502613e-07, dtype=float32)}


  8%|▊         | 84549/1000000 [4:00:05<32:02:36,  7.94it/s]

{'loss': Array(0.15026234, dtype=float32), 'loss_cross_entropy': Array(0.14188348, dtype=float32)}
{'loss_inverse': Array(1.0767442e-06, dtype=float32)}


  8%|▊         | 84559/1000000 [4:00:06<25:47:21,  9.86it/s]

{'loss': Array(0.14491531, dtype=float32), 'loss_cross_entropy': Array(0.13680287, dtype=float32)}
{'loss_inverse': Array(1.8265426e-06, dtype=float32)}


  8%|▊         | 84569/1000000 [4:00:07<25:25:38, 10.00it/s]

{'loss': Array(0.12185233, dtype=float32), 'loss_cross_entropy': Array(0.11412533, dtype=float32)}
{'loss_inverse': Array(1.3024443e-07, dtype=float32)}


  8%|▊         | 84579/1000000 [4:00:09<29:08:46,  8.72it/s]

{'loss': Array(0.13725711, dtype=float32), 'loss_cross_entropy': Array(0.13008521, dtype=float32)}
{'loss_inverse': Array(1.8770026e-05, dtype=float32)}


  8%|▊         | 84589/1000000 [4:00:11<26:14:11,  9.69it/s]

{'loss': Array(0.13777994, dtype=float32), 'loss_cross_entropy': Array(0.12978517, dtype=float32)}
{'loss_inverse': Array(4.0235636e-06, dtype=float32)}


  8%|▊         | 84599/1000000 [4:00:12<31:43:05,  8.02it/s]

{'loss': Array(0.14703517, dtype=float32), 'loss_cross_entropy': Array(0.13849233, dtype=float32)}
{'loss_inverse': Array(3.2642834e-07, dtype=float32)}


  8%|▊         | 84609/1000000 [4:00:14<26:21:28,  9.65it/s]

{'loss': Array(0.15673749, dtype=float32), 'loss_cross_entropy': Array(0.14886689, dtype=float32)}
{'loss_inverse': Array(2.5616187e-06, dtype=float32)}


  8%|▊         | 84619/1000000 [4:00:16<34:16:03,  7.42it/s]

{'loss': Array(0.12056936, dtype=float32), 'loss_cross_entropy': Array(0.11380637, dtype=float32)}
{'loss_inverse': Array(2.109746e-07, dtype=float32)}


  8%|▊         | 84629/1000000 [4:00:17<26:07:12,  9.73it/s]

{'loss': Array(0.13464947, dtype=float32), 'loss_cross_entropy': Array(0.1269579, dtype=float32)}
{'loss_inverse': Array(9.1923916e-08, dtype=float32)}


  8%|▊         | 84639/1000000 [4:00:18<24:46:08, 10.27it/s]

{'loss': Array(0.1574847, dtype=float32), 'loss_cross_entropy': Array(0.14904839, dtype=float32)}
{'loss_inverse': Array(2.5028119e-06, dtype=float32)}


  8%|▊         | 84649/1000000 [4:00:20<28:50:01,  8.82it/s]

{'loss': Array(0.14495137, dtype=float32), 'loss_cross_entropy': Array(0.136895, dtype=float32)}
{'loss_inverse': Array(3.7522489e-06, dtype=float32)}


  8%|▊         | 84659/1000000 [4:00:22<25:37:36,  9.92it/s]

{'loss': Array(0.14642455, dtype=float32), 'loss_cross_entropy': Array(0.13843316, dtype=float32)}
{'loss_inverse': Array(3.1280162e-08, dtype=float32)}


  8%|▊         | 84669/1000000 [4:00:23<33:43:48,  7.54it/s]

{'loss': Array(0.13080911, dtype=float32), 'loss_cross_entropy': Array(0.12309833, dtype=float32)}
{'loss_inverse': Array(5.804131e-09, dtype=float32)}


  8%|▊         | 84679/1000000 [4:00:25<26:51:13,  9.47it/s]

{'loss': Array(0.1454163, dtype=float32), 'loss_cross_entropy': Array(0.13661595, dtype=float32)}
{'loss_inverse': Array(5.8754363e-08, dtype=float32)}


  8%|▊         | 84689/1000000 [4:00:27<34:44:01,  7.32it/s]

{'loss': Array(0.13045111, dtype=float32), 'loss_cross_entropy': Array(0.12299671, dtype=float32)}
{'loss_inverse': Array(7.2291355e-06, dtype=float32)}


  8%|▊         | 84699/1000000 [4:00:28<26:59:16,  9.42it/s]

{'loss': Array(0.14425021, dtype=float32), 'loss_cross_entropy': Array(0.1364622, dtype=float32)}
{'loss_inverse': Array(1.757623e-08, dtype=float32)}


  8%|▊         | 84709/1000000 [4:00:29<24:56:46, 10.19it/s]

{'loss': Array(0.13253574, dtype=float32), 'loss_cross_entropy': Array(0.12461754, dtype=float32)}
{'loss_inverse': Array(8.064562e-08, dtype=float32)}


  8%|▊         | 84719/1000000 [4:00:31<28:04:00,  9.06it/s]

{'loss': Array(0.14912336, dtype=float32), 'loss_cross_entropy': Array(0.14155826, dtype=float32)}
{'loss_inverse': Array(2.751848e-08, dtype=float32)}


  8%|▊         | 84729/1000000 [4:00:33<25:57:17,  9.80it/s]

{'loss': Array(0.13073319, dtype=float32), 'loss_cross_entropy': Array(0.12294848, dtype=float32)}
{'loss_inverse': Array(5.985167e-07, dtype=float32)}


  8%|▊         | 84739/1000000 [4:00:34<38:02:02,  6.68it/s]

{'loss': Array(0.15636389, dtype=float32), 'loss_cross_entropy': Array(0.14794281, dtype=float32)}
{'loss_inverse': Array(1.0314073e-05, dtype=float32)}


  8%|▊         | 84749/1000000 [4:00:36<26:47:07,  9.49it/s]

{'loss': Array(0.13950406, dtype=float32), 'loss_cross_entropy': Array(0.13196872, dtype=float32)}
{'loss_inverse': Array(1.587e-06, dtype=float32)}


  8%|▊         | 84759/1000000 [4:00:37<26:05:27,  9.74it/s]

{'loss': Array(0.14548692, dtype=float32), 'loss_cross_entropy': Array(0.13742578, dtype=float32)}
{'loss_inverse': Array(6.850655e-08, dtype=float32)}


  8%|▊         | 84769/1000000 [4:00:39<30:03:09,  8.46it/s]

{'loss': Array(0.15180515, dtype=float32), 'loss_cross_entropy': Array(0.1428927, dtype=float32)}
{'loss_inverse': Array(1.8941561e-08, dtype=float32)}


  8%|▊         | 84779/1000000 [4:00:41<25:46:42,  9.86it/s]

{'loss': Array(0.13954757, dtype=float32), 'loss_cross_entropy': Array(0.13137785, dtype=float32)}
{'loss_inverse': Array(0.00010676, dtype=float32)}


  8%|▊         | 84789/1000000 [4:00:42<29:10:51,  8.71it/s]

{'loss': Array(0.15047438, dtype=float32), 'loss_cross_entropy': Array(0.14204557, dtype=float32)}
{'loss_inverse': Array(2.512885e-08, dtype=float32)}


  8%|▊         | 84799/1000000 [4:00:44<26:08:07,  9.73it/s]

{'loss': Array(0.13874726, dtype=float32), 'loss_cross_entropy': Array(0.130905, dtype=float32)}
{'loss_inverse': Array(2.8343127e-06, dtype=float32)}


  8%|▊         | 84809/1000000 [4:00:46<32:05:25,  7.92it/s]

{'loss': Array(0.14250122, dtype=float32), 'loss_cross_entropy': Array(0.13486163, dtype=float32)}
{'loss_inverse': Array(1.1863898e-06, dtype=float32)}


  8%|▊         | 84819/1000000 [4:00:47<26:03:20,  9.76it/s]

{'loss': Array(0.13206087, dtype=float32), 'loss_cross_entropy': Array(0.12343526, dtype=float32)}
{'loss_inverse': Array(3.317241e-05, dtype=float32)}


  8%|▊         | 84829/1000000 [4:00:48<24:56:47, 10.19it/s]

{'loss': Array(0.16484605, dtype=float32), 'loss_cross_entropy': Array(0.15593061, dtype=float32)}
{'loss_inverse': Array(1.3680541e-08, dtype=float32)}


  8%|▊         | 84839/1000000 [4:00:50<29:10:18,  8.71it/s]

{'loss': Array(0.13712625, dtype=float32), 'loss_cross_entropy': Array(0.12921782, dtype=float32)}
{'loss_inverse': Array(9.499245e-09, dtype=float32)}


  8%|▊         | 84849/1000000 [4:00:52<26:16:32,  9.67it/s]

{'loss': Array(0.16262074, dtype=float32), 'loss_cross_entropy': Array(0.1536209, dtype=float32)}
{'loss_inverse': Array(1.1977025e-06, dtype=float32)}


  8%|▊         | 84859/1000000 [4:00:53<33:43:51,  7.54it/s]

{'loss': Array(0.14687827, dtype=float32), 'loss_cross_entropy': Array(0.13866714, dtype=float32)}
{'loss_inverse': Array(6.8790754e-08, dtype=float32)}


  8%|▊         | 84869/1000000 [4:00:55<26:03:48,  9.75it/s]

{'loss': Array(0.14748947, dtype=float32), 'loss_cross_entropy': Array(0.13910475, dtype=float32)}
{'loss_inverse': Array(9.674027e-07, dtype=float32)}


  8%|▊         | 84879/1000000 [4:00:56<38:15:36,  6.64it/s]

{'loss': Array(0.13301782, dtype=float32), 'loss_cross_entropy': Array(0.12564968, dtype=float32)}
{'loss_inverse': Array(1.557621e-07, dtype=float32)}


  8%|▊         | 84889/1000000 [4:00:58<27:53:01,  9.12it/s]

{'loss': Array(0.15989096, dtype=float32), 'loss_cross_entropy': Array(0.15094648, dtype=float32)}
{'loss_inverse': Array(1.1741595e-05, dtype=float32)}


  8%|▊         | 84899/1000000 [4:01:00<38:10:43,  6.66it/s]

{'loss': Array(0.14125761, dtype=float32), 'loss_cross_entropy': Array(0.13312948, dtype=float32)}
{'loss_inverse': Array(7.993434e-06, dtype=float32)}


  8%|▊         | 84909/1000000 [4:01:01<27:38:51,  9.19it/s]

{'loss': Array(0.15793684, dtype=float32), 'loss_cross_entropy': Array(0.14914443, dtype=float32)}
{'loss_inverse': Array(1.1374342e-07, dtype=float32)}


  8%|▊         | 84919/1000000 [4:01:03<25:20:22, 10.03it/s]

{'loss': Array(0.13504732, dtype=float32), 'loss_cross_entropy': Array(0.12662204, dtype=float32)}
{'loss_inverse': Array(0.00015107, dtype=float32)}


  8%|▊         | 84929/1000000 [4:01:04<33:49:17,  7.52it/s]

{'loss': Array(0.1343216, dtype=float32), 'loss_cross_entropy': Array(0.12651013, dtype=float32)}
{'loss_inverse': Array(2.1975854e-06, dtype=float32)}


  8%|▊         | 84939/1000000 [4:01:06<26:12:25,  9.70it/s]

{'loss': Array(0.14459601, dtype=float32), 'loss_cross_entropy': Array(0.13681136, dtype=float32)}
{'loss_inverse': Array(8.369824e-05, dtype=float32)}


  8%|▊         | 84949/1000000 [4:01:07<38:59:14,  6.52it/s]

{'loss': Array(0.14148349, dtype=float32), 'loss_cross_entropy': Array(0.1336284, dtype=float32)}
{'loss_inverse': Array(2.099289e-05, dtype=float32)}


  8%|▊         | 84959/1000000 [4:01:09<29:16:41,  8.68it/s]

{'loss': Array(0.13526796, dtype=float32), 'loss_cross_entropy': Array(0.1274061, dtype=float32)}
{'loss_inverse': Array(4.9632325e-07, dtype=float32)}


  8%|▊         | 84969/1000000 [4:01:10<25:55:15,  9.81it/s]

{'loss': Array(0.14107817, dtype=float32), 'loss_cross_entropy': Array(0.13263889, dtype=float32)}
{'loss_inverse': Array(1.40918755e-05, dtype=float32)}


  8%|▊         | 84979/1000000 [4:01:12<28:19:51,  8.97it/s]

{'loss': Array(0.13881876, dtype=float32), 'loss_cross_entropy': Array(0.1310815, dtype=float32)}
{'loss_inverse': Array(2.6939708e-07, dtype=float32)}


  8%|▊         | 84989/1000000 [4:01:14<25:48:05,  9.85it/s]

{'loss': Array(0.12218078, dtype=float32), 'loss_cross_entropy': Array(0.11468138, dtype=float32)}
{'loss_inverse': Array(2.6789118e-08, dtype=float32)}


  8%|▊         | 84999/1000000 [4:01:15<33:28:00,  7.59it/s]

{'loss': Array(0.1538189, dtype=float32), 'loss_cross_entropy': Array(0.14573628, dtype=float32)}
{'loss_inverse': Array(1.2554506e-07, dtype=float32)}


  9%|▊         | 85009/1000000 [4:01:23<85:37:42,  2.97it/s] 

{'loss': Array(0.12977883, dtype=float32), 'loss_cross_entropy': Array(0.12189735, dtype=float32)}
{'loss_inverse': Array(9.905015e-08, dtype=float32)}


  9%|▊         | 85019/1000000 [4:01:25<40:54:47,  6.21it/s]

{'loss': Array(0.14127898, dtype=float32), 'loss_cross_entropy': Array(0.1329457, dtype=float32)}
{'loss_inverse': Array(4.8580077e-06, dtype=float32)}


  9%|▊         | 85029/1000000 [4:01:26<27:21:50,  9.29it/s]

{'loss': Array(0.12083396, dtype=float32), 'loss_cross_entropy': Array(0.11348396, dtype=float32)}
{'loss_inverse': Array(9.178601e-06, dtype=float32)}


  9%|▊         | 85039/1000000 [4:01:28<38:23:31,  6.62it/s]

{'loss': Array(0.12942284, dtype=float32), 'loss_cross_entropy': Array(0.12243777, dtype=float32)}
{'loss_inverse': Array(9.8410725e-05, dtype=float32)}


  9%|▊         | 85049/1000000 [4:01:30<27:45:24,  9.16it/s]

{'loss': Array(0.14426188, dtype=float32), 'loss_cross_entropy': Array(0.13604896, dtype=float32)}
{'loss_inverse': Array(3.5608468e-07, dtype=float32)}


  9%|▊         | 85059/1000000 [4:01:31<37:51:41,  6.71it/s]

{'loss': Array(0.14386258, dtype=float32), 'loss_cross_entropy': Array(0.13565423, dtype=float32)}
{'loss_inverse': Array(7.9325744e-08, dtype=float32)}


  9%|▊         | 85069/1000000 [4:01:33<26:53:55,  9.45it/s]

{'loss': Array(0.13938233, dtype=float32), 'loss_cross_entropy': Array(0.13146226, dtype=float32)}
{'loss_inverse': Array(1.4353252e-06, dtype=float32)}


  9%|▊         | 85079/1000000 [4:01:34<25:43:08,  9.88it/s]

{'loss': Array(0.17624542, dtype=float32), 'loss_cross_entropy': Array(0.16664845, dtype=float32)}
{'loss_inverse': Array(3.9653776e-05, dtype=float32)}


  9%|▊         | 85089/1000000 [4:01:36<33:35:12,  7.57it/s]

{'loss': Array(0.14792113, dtype=float32), 'loss_cross_entropy': Array(0.1392723, dtype=float32)}
{'loss_inverse': Array(4.2486263e-06, dtype=float32)}


  9%|▊         | 85099/1000000 [4:01:37<26:12:56,  9.69it/s]

{'loss': Array(0.1492696, dtype=float32), 'loss_cross_entropy': Array(0.14037108, dtype=float32)}
{'loss_inverse': Array(0.00013615, dtype=float32)}


  9%|▊         | 85109/1000000 [4:01:39<25:34:12,  9.94it/s]

{'loss': Array(0.14344692, dtype=float32), 'loss_cross_entropy': Array(0.13530649, dtype=float32)}
{'loss_inverse': Array(2.0647237e-06, dtype=float32)}


  9%|▊         | 85119/1000000 [4:01:41<28:35:31,  8.89it/s]

{'loss': Array(0.1352468, dtype=float32), 'loss_cross_entropy': Array(0.12737025, dtype=float32)}
{'loss_inverse': Array(2.668328e-06, dtype=float32)}


  9%|▊         | 85129/1000000 [4:01:42<25:12:07, 10.08it/s]

{'loss': Array(0.14732902, dtype=float32), 'loss_cross_entropy': Array(0.13928734, dtype=float32)}
{'loss_inverse': Array(6.363551e-05, dtype=float32)}


  9%|▊         | 85139/1000000 [4:01:44<31:07:15,  8.17it/s]

{'loss': Array(0.16532229, dtype=float32), 'loss_cross_entropy': Array(0.15610854, dtype=float32)}
{'loss_inverse': Array(2.5365216e-05, dtype=float32)}


  9%|▊         | 85149/1000000 [4:01:45<25:53:29,  9.81it/s]

{'loss': Array(0.14580025, dtype=float32), 'loss_cross_entropy': Array(0.13749646, dtype=float32)}
{'loss_inverse': Array(7.2269227e-06, dtype=float32)}


  9%|▊         | 85159/1000000 [4:01:47<34:09:32,  7.44it/s]

{'loss': Array(0.1434179, dtype=float32), 'loss_cross_entropy': Array(0.13559271, dtype=float32)}
{'loss_inverse': Array(3.421332e-05, dtype=float32)}


  9%|▊         | 85169/1000000 [4:01:48<26:04:31,  9.75it/s]

{'loss': Array(0.14862002, dtype=float32), 'loss_cross_entropy': Array(0.14102808, dtype=float32)}
{'loss_inverse': Array(1.2045214e-07, dtype=float32)}


  9%|▊         | 85179/1000000 [4:01:50<33:40:40,  7.55it/s]

{'loss': Array(0.15062681, dtype=float32), 'loss_cross_entropy': Array(0.14220284, dtype=float32)}
{'loss_inverse': Array(1.5162084e-07, dtype=float32)}


  9%|▊         | 85189/1000000 [4:01:51<26:35:13,  9.56it/s]

{'loss': Array(0.14510159, dtype=float32), 'loss_cross_entropy': Array(0.13715129, dtype=float32)}
{'loss_inverse': Array(0.00015396, dtype=float32)}


  9%|▊         | 85199/1000000 [4:01:53<25:44:16,  9.87it/s]

{'loss': Array(0.14356232, dtype=float32), 'loss_cross_entropy': Array(0.13582526, dtype=float32)}
{'loss_inverse': Array(1.1809075e-05, dtype=float32)}


  9%|▊         | 85209/1000000 [4:01:55<31:29:23,  8.07it/s]

{'loss': Array(0.1372635, dtype=float32), 'loss_cross_entropy': Array(0.12892325, dtype=float32)}
{'loss_inverse': Array(5.7049624e-07, dtype=float32)}


  9%|▊         | 85219/1000000 [4:01:56<25:46:41,  9.86it/s]

{'loss': Array(0.1434605, dtype=float32), 'loss_cross_entropy': Array(0.13536882, dtype=float32)}
{'loss_inverse': Array(4.016198e-05, dtype=float32)}


  9%|▊         | 85229/1000000 [4:01:58<34:07:52,  7.44it/s]

{'loss': Array(0.13100238, dtype=float32), 'loss_cross_entropy': Array(0.12317784, dtype=float32)}
{'loss_inverse': Array(5.9255613e-05, dtype=float32)}


  9%|▊         | 85239/1000000 [4:01:59<26:42:02,  9.52it/s]

{'loss': Array(0.14425571, dtype=float32), 'loss_cross_entropy': Array(0.13654472, dtype=float32)}
{'loss_inverse': Array(9.038387e-07, dtype=float32)}


  9%|▊         | 85249/1000000 [4:02:01<37:46:56,  6.73it/s]

{'loss': Array(0.13670887, dtype=float32), 'loss_cross_entropy': Array(0.1288735, dtype=float32)}
{'loss_inverse': Array(3.1230404e-05, dtype=float32)}


  9%|▊         | 85259/1000000 [4:02:02<26:53:38,  9.45it/s]

{'loss': Array(0.15253662, dtype=float32), 'loss_cross_entropy': Array(0.14414911, dtype=float32)}
{'loss_inverse': Array(4.8605552e-06, dtype=float32)}


  9%|▊         | 85269/1000000 [4:02:04<26:30:40,  9.58it/s]

{'loss': Array(0.14666586, dtype=float32), 'loss_cross_entropy': Array(0.13846831, dtype=float32)}
{'loss_inverse': Array(3.4873943e-07, dtype=float32)}


  9%|▊         | 85279/1000000 [4:02:06<29:09:20,  8.71it/s]

{'loss': Array(0.13507748, dtype=float32), 'loss_cross_entropy': Array(0.12746385, dtype=float32)}
{'loss_inverse': Array(4.276993e-07, dtype=float32)}


  9%|▊         | 85289/1000000 [4:02:07<26:15:20,  9.68it/s]

{'loss': Array(0.1495219, dtype=float32), 'loss_cross_entropy': Array(0.14101988, dtype=float32)}
{'loss_inverse': Array(3.6424305e-07, dtype=float32)}


  9%|▊         | 85299/1000000 [4:02:09<34:32:36,  7.36it/s]

{'loss': Array(0.12669773, dtype=float32), 'loss_cross_entropy': Array(0.11934859, dtype=float32)}
{'loss_inverse': Array(3.5016208e-07, dtype=float32)}


  9%|▊         | 85309/1000000 [4:02:10<26:19:53,  9.65it/s]

{'loss': Array(0.1508416, dtype=float32), 'loss_cross_entropy': Array(0.14245047, dtype=float32)}
{'loss_inverse': Array(3.265901e-07, dtype=float32)}


  9%|▊         | 85319/1000000 [4:02:12<25:19:54, 10.03it/s]

{'loss': Array(0.15100995, dtype=float32), 'loss_cross_entropy': Array(0.1423691, dtype=float32)}
{'loss_inverse': Array(0.00015242, dtype=float32)}


  9%|▊         | 85329/1000000 [4:02:13<28:09:43,  9.02it/s]

{'loss': Array(0.16197196, dtype=float32), 'loss_cross_entropy': Array(0.15266596, dtype=float32)}
{'loss_inverse': Array(1.3050648e-07, dtype=float32)}


  9%|▊         | 85339/1000000 [4:02:15<25:52:19,  9.82it/s]

{'loss': Array(0.14170244, dtype=float32), 'loss_cross_entropy': Array(0.13307723, dtype=float32)}
{'loss_inverse': Array(6.836818e-05, dtype=float32)}


  9%|▊         | 85349/1000000 [4:02:17<28:13:27,  9.00it/s]

{'loss': Array(0.13419078, dtype=float32), 'loss_cross_entropy': Array(0.12635326, dtype=float32)}
{'loss_inverse': Array(3.052264e-07, dtype=float32)}


  9%|▊         | 85359/1000000 [4:02:18<25:03:29, 10.14it/s]

{'loss': Array(0.14333813, dtype=float32), 'loss_cross_entropy': Array(0.13463509, dtype=float32)}
{'loss_inverse': Array(8.019553e-07, dtype=float32)}


  9%|▊         | 85369/1000000 [4:02:20<27:34:36,  9.21it/s]

{'loss': Array(0.1498748, dtype=float32), 'loss_cross_entropy': Array(0.14161439, dtype=float32)}
{'loss_inverse': Array(7.478403e-07, dtype=float32)}


  9%|▊         | 85379/1000000 [4:02:21<25:16:15, 10.05it/s]

{'loss': Array(0.14257437, dtype=float32), 'loss_cross_entropy': Array(0.1342675, dtype=float32)}
{'loss_inverse': Array(0.00021106, dtype=float32)}


  9%|▊         | 85389/1000000 [4:02:23<29:11:05,  8.71it/s]

{'loss': Array(0.13957754, dtype=float32), 'loss_cross_entropy': Array(0.13172488, dtype=float32)}
{'loss_inverse': Array(0.00032906, dtype=float32)}


  9%|▊         | 85399/1000000 [4:02:24<25:41:19,  9.89it/s]

{'loss': Array(0.14499715, dtype=float32), 'loss_cross_entropy': Array(0.13709389, dtype=float32)}
{'loss_inverse': Array(1.3421372e-05, dtype=float32)}


  9%|▊         | 85409/1000000 [4:02:26<31:41:36,  8.02it/s]

{'loss': Array(0.1374111, dtype=float32), 'loss_cross_entropy': Array(0.12970592, dtype=float32)}
{'loss_inverse': Array(0.00011001, dtype=float32)}


  9%|▊         | 85419/1000000 [4:02:27<26:42:31,  9.51it/s]

{'loss': Array(0.14870617, dtype=float32), 'loss_cross_entropy': Array(0.13954759, dtype=float32)}
{'loss_inverse': Array(1.8345644e-06, dtype=float32)}


  9%|▊         | 85429/1000000 [4:02:29<33:57:19,  7.48it/s]

{'loss': Array(0.14944434, dtype=float32), 'loss_cross_entropy': Array(0.14095835, dtype=float32)}
{'loss_inverse': Array(0.00027629, dtype=float32)}


  9%|▊         | 85439/1000000 [4:02:31<27:08:00,  9.36it/s]

{'loss': Array(0.1394409, dtype=float32), 'loss_cross_entropy': Array(0.13133535, dtype=float32)}
{'loss_inverse': Array(4.1515195e-06, dtype=float32)}


  9%|▊         | 85449/1000000 [4:02:32<25:30:06,  9.96it/s]

{'loss': Array(0.12898244, dtype=float32), 'loss_cross_entropy': Array(0.12146167, dtype=float32)}
{'loss_inverse': Array(0.00011145, dtype=float32)}


  9%|▊         | 85459/1000000 [4:02:34<30:11:58,  8.41it/s]

{'loss': Array(0.12683992, dtype=float32), 'loss_cross_entropy': Array(0.11990009, dtype=float32)}
{'loss_inverse': Array(0.00018269, dtype=float32)}


  9%|▊         | 85469/1000000 [4:02:35<25:36:21,  9.92it/s]

{'loss': Array(0.13195427, dtype=float32), 'loss_cross_entropy': Array(0.12424462, dtype=float32)}
{'loss_inverse': Array(5.7796886e-05, dtype=float32)}


  9%|▊         | 85479/1000000 [4:02:37<34:00:02,  7.47it/s]

{'loss': Array(0.15105642, dtype=float32), 'loss_cross_entropy': Array(0.14271449, dtype=float32)}
{'loss_inverse': Array(8.537218e-05, dtype=float32)}


  9%|▊         | 85489/1000000 [4:02:39<27:33:50,  9.22it/s]

{'loss': Array(0.14352892, dtype=float32), 'loss_cross_entropy': Array(0.13571271, dtype=float32)}
{'loss_inverse': Array(3.0131092e-05, dtype=float32)}


  9%|▊         | 85499/1000000 [4:02:40<34:47:41,  7.30it/s]

{'loss': Array(0.1846038, dtype=float32), 'loss_cross_entropy': Array(0.17378955, dtype=float32)}
{'loss_inverse': Array(0.00011571, dtype=float32)}


  9%|▊         | 85509/1000000 [4:02:48<87:07:07,  2.92it/s] 

{'loss': Array(0.16140328, dtype=float32), 'loss_cross_entropy': Array(0.15215485, dtype=float32)}
{'loss_inverse': Array(2.7481497e-06, dtype=float32)}


  9%|▊         | 85519/1000000 [4:02:50<44:13:20,  5.74it/s]

{'loss': Array(0.14244497, dtype=float32), 'loss_cross_entropy': Array(0.13422692, dtype=float32)}
{'loss_inverse': Array(2.8660766e-05, dtype=float32)}


  9%|▊         | 85529/1000000 [4:02:52<27:43:44,  9.16it/s]

{'loss': Array(0.13569202, dtype=float32), 'loss_cross_entropy': Array(0.12754834, dtype=float32)}
{'loss_inverse': Array(0.00021253, dtype=float32)}


  9%|▊         | 85539/1000000 [4:02:53<25:26:13,  9.99it/s]

{'loss': Array(0.15421419, dtype=float32), 'loss_cross_entropy': Array(0.14512306, dtype=float32)}
{'loss_inverse': Array(1.881783e-05, dtype=float32)}


  9%|▊         | 85549/1000000 [4:02:55<31:00:34,  8.19it/s]

{'loss': Array(0.1399406, dtype=float32), 'loss_cross_entropy': Array(0.131901, dtype=float32)}
{'loss_inverse': Array(0.00012727, dtype=float32)}


  9%|▊         | 85559/1000000 [4:02:56<26:08:36,  9.72it/s]

{'loss': Array(0.13347612, dtype=float32), 'loss_cross_entropy': Array(0.12575981, dtype=float32)}
{'loss_inverse': Array(9.516025e-07, dtype=float32)}


  9%|▊         | 85569/1000000 [4:02:58<33:58:50,  7.48it/s]

{'loss': Array(0.15479776, dtype=float32), 'loss_cross_entropy': Array(0.14689958, dtype=float32)}
{'loss_inverse': Array(1.2336439e-05, dtype=float32)}


  9%|▊         | 85579/1000000 [4:02:59<25:57:05,  9.79it/s]

{'loss': Array(0.13281439, dtype=float32), 'loss_cross_entropy': Array(0.12486789, dtype=float32)}
{'loss_inverse': Array(0.0001971, dtype=float32)}


  9%|▊         | 85589/1000000 [4:03:01<37:43:55,  6.73it/s]

{'loss': Array(0.12531526, dtype=float32), 'loss_cross_entropy': Array(0.11768425, dtype=float32)}
{'loss_inverse': Array(0.00025476, dtype=float32)}


  9%|▊         | 85599/1000000 [4:03:02<26:56:56,  9.43it/s]

{'loss': Array(0.15514405, dtype=float32), 'loss_cross_entropy': Array(0.14657378, dtype=float32)}
{'loss_inverse': Array(1.6616672e-05, dtype=float32)}


  9%|▊         | 85609/1000000 [4:03:04<25:29:27,  9.96it/s]

{'loss': Array(0.12698615, dtype=float32), 'loss_cross_entropy': Array(0.11959083, dtype=float32)}
{'loss_inverse': Array(0.00030757, dtype=float32)}


  9%|▊         | 85619/1000000 [4:03:06<29:40:23,  8.56it/s]

{'loss': Array(0.14294975, dtype=float32), 'loss_cross_entropy': Array(0.13465305, dtype=float32)}
{'loss_inverse': Array(0.00042304, dtype=float32)}


  9%|▊         | 85629/1000000 [4:03:07<26:48:56,  9.47it/s]

{'loss': Array(0.14939988, dtype=float32), 'loss_cross_entropy': Array(0.14114885, dtype=float32)}
{'loss_inverse': Array(0.00019421, dtype=float32)}


  9%|▊         | 85639/1000000 [4:03:09<34:40:55,  7.32it/s]

{'loss': Array(0.1527687, dtype=float32), 'loss_cross_entropy': Array(0.14372654, dtype=float32)}
{'loss_inverse': Array(0.00013949, dtype=float32)}


  9%|▊         | 85649/1000000 [4:03:10<26:45:42,  9.49it/s]

{'loss': Array(0.135257, dtype=float32), 'loss_cross_entropy': Array(0.12760535, dtype=float32)}
{'loss_inverse': Array(7.8686975e-07, dtype=float32)}


  9%|▊         | 85659/1000000 [4:03:12<25:15:04, 10.06it/s]

{'loss': Array(0.13111936, dtype=float32), 'loss_cross_entropy': Array(0.12345587, dtype=float32)}
{'loss_inverse': Array(0.00031959, dtype=float32)}


  9%|▊         | 85669/1000000 [4:03:14<28:52:16,  8.80it/s]

{'loss': Array(0.12464817, dtype=float32), 'loss_cross_entropy': Array(0.11704586, dtype=float32)}
{'loss_inverse': Array(2.3936088e-06, dtype=float32)}


  9%|▊         | 85679/1000000 [4:03:15<26:17:07,  9.66it/s]

{'loss': Array(0.14293814, dtype=float32), 'loss_cross_entropy': Array(0.13461383, dtype=float32)}
{'loss_inverse': Array(0.00073605, dtype=float32)}


  9%|▊         | 85689/1000000 [4:03:17<28:37:32,  8.87it/s]

{'loss': Array(0.13475393, dtype=float32), 'loss_cross_entropy': Array(0.12726668, dtype=float32)}
{'loss_inverse': Array(0.00017105, dtype=float32)}


  9%|▊         | 85699/1000000 [4:03:18<25:07:24, 10.11it/s]

{'loss': Array(0.13551573, dtype=float32), 'loss_cross_entropy': Array(0.12765633, dtype=float32)}
{'loss_inverse': Array(1.7759722e-07, dtype=float32)}


  9%|▊         | 85709/1000000 [4:03:20<27:46:54,  9.14it/s]

{'loss': Array(0.13373365, dtype=float32), 'loss_cross_entropy': Array(0.12572832, dtype=float32)}
{'loss_inverse': Array(0.00017888, dtype=float32)}


  9%|▊         | 85719/1000000 [4:03:21<25:03:00, 10.14it/s]

{'loss': Array(0.14611714, dtype=float32), 'loss_cross_entropy': Array(0.13790521, dtype=float32)}
{'loss_inverse': Array(3.9802558e-06, dtype=float32)}


  9%|▊         | 85729/1000000 [4:03:23<29:16:43,  8.67it/s]

{'loss': Array(0.1280379, dtype=float32), 'loss_cross_entropy': Array(0.12026584, dtype=float32)}
{'loss_inverse': Array(1.300859e-05, dtype=float32)}


  9%|▊         | 85739/1000000 [4:03:25<25:51:58,  9.82it/s]

{'loss': Array(0.15158348, dtype=float32), 'loss_cross_entropy': Array(0.14336616, dtype=float32)}
{'loss_inverse': Array(6.13252e-05, dtype=float32)}


  9%|▊         | 85749/1000000 [4:03:26<31:53:48,  7.96it/s]

{'loss': Array(0.16368006, dtype=float32), 'loss_cross_entropy': Array(0.15510866, dtype=float32)}
{'loss_inverse': Array(3.0196004e-06, dtype=float32)}


  9%|▊         | 85759/1000000 [4:03:28<26:02:44,  9.75it/s]

{'loss': Array(0.14391395, dtype=float32), 'loss_cross_entropy': Array(0.13530922, dtype=float32)}
{'loss_inverse': Array(1.0451736e-05, dtype=float32)}


  9%|▊         | 85769/1000000 [4:03:30<34:34:05,  7.35it/s]

{'loss': Array(0.1650788, dtype=float32), 'loss_cross_entropy': Array(0.15626244, dtype=float32)}
{'loss_inverse': Array(3.588334e-05, dtype=float32)}


  9%|▊         | 85779/1000000 [4:03:31<26:23:56,  9.62it/s]

{'loss': Array(0.14093249, dtype=float32), 'loss_cross_entropy': Array(0.13323204, dtype=float32)}
{'loss_inverse': Array(0.00029281, dtype=float32)}


  9%|▊         | 85789/1000000 [4:03:32<25:03:43, 10.13it/s]

{'loss': Array(0.12691493, dtype=float32), 'loss_cross_entropy': Array(0.12003932, dtype=float32)}
{'loss_inverse': Array(0.00010729, dtype=float32)}


  9%|▊         | 85799/1000000 [4:03:34<29:14:58,  8.68it/s]

{'loss': Array(0.13828748, dtype=float32), 'loss_cross_entropy': Array(0.1298295, dtype=float32)}
{'loss_inverse': Array(0.00021791, dtype=float32)}


  9%|▊         | 85809/1000000 [4:03:36<26:48:38,  9.47it/s]

{'loss': Array(0.14872654, dtype=float32), 'loss_cross_entropy': Array(0.14030747, dtype=float32)}
{'loss_inverse': Array(2.047377e-06, dtype=float32)}


  9%|▊         | 85819/1000000 [4:03:37<33:50:46,  7.50it/s]

{'loss': Array(0.11518174, dtype=float32), 'loss_cross_entropy': Array(0.10871299, dtype=float32)}
{'loss_inverse': Array(3.1745923e-05, dtype=float32)}


  9%|▊         | 85829/1000000 [4:03:39<26:47:14,  9.48it/s]

{'loss': Array(0.15286033, dtype=float32), 'loss_cross_entropy': Array(0.14352034, dtype=float32)}
{'loss_inverse': Array(1.4037536e-06, dtype=float32)}


  9%|▊         | 85839/1000000 [4:03:41<34:50:19,  7.29it/s]

{'loss': Array(0.1511536, dtype=float32), 'loss_cross_entropy': Array(0.14324678, dtype=float32)}
{'loss_inverse': Array(8.681009e-05, dtype=float32)}


  9%|▊         | 85849/1000000 [4:03:42<26:55:26,  9.43it/s]

{'loss': Array(0.14561631, dtype=float32), 'loss_cross_entropy': Array(0.13757443, dtype=float32)}
{'loss_inverse': Array(7.758767e-07, dtype=float32)}


  9%|▊         | 85859/1000000 [4:03:44<25:57:31,  9.78it/s]

{'loss': Array(0.14549321, dtype=float32), 'loss_cross_entropy': Array(0.13769445, dtype=float32)}
{'loss_inverse': Array(3.4571072e-05, dtype=float32)}


  9%|▊         | 85869/1000000 [4:03:45<28:21:41,  8.95it/s]

{'loss': Array(0.14195302, dtype=float32), 'loss_cross_entropy': Array(0.1335416, dtype=float32)}
{'loss_inverse': Array(4.40312e-06, dtype=float32)}


  9%|▊         | 85879/1000000 [4:03:47<25:48:35,  9.84it/s]

{'loss': Array(0.14383048, dtype=float32), 'loss_cross_entropy': Array(0.13608284, dtype=float32)}
{'loss_inverse': Array(0.00011922, dtype=float32)}


  9%|▊         | 85889/1000000 [4:03:49<37:09:04,  6.83it/s]

{'loss': Array(0.15935257, dtype=float32), 'loss_cross_entropy': Array(0.14994769, dtype=float32)}
{'loss_inverse': Array(8.663995e-07, dtype=float32)}


  9%|▊         | 85899/1000000 [4:03:50<26:41:17,  9.51it/s]

{'loss': Array(0.14372616, dtype=float32), 'loss_cross_entropy': Array(0.13554713, dtype=float32)}
{'loss_inverse': Array(1.651072e-06, dtype=float32)}


  9%|▊         | 85909/1000000 [4:03:51<24:45:32, 10.26it/s]

{'loss': Array(0.13702358, dtype=float32), 'loss_cross_entropy': Array(0.12922437, dtype=float32)}
{'loss_inverse': Array(1.0280959e-06, dtype=float32)}


  9%|▊         | 85919/1000000 [4:03:53<28:46:48,  8.82it/s]

{'loss': Array(0.15149707, dtype=float32), 'loss_cross_entropy': Array(0.14305152, dtype=float32)}
{'loss_inverse': Array(6.525914e-06, dtype=float32)}


  9%|▊         | 85929/1000000 [4:03:54<25:26:24,  9.98it/s]

{'loss': Array(0.14720236, dtype=float32), 'loss_cross_entropy': Array(0.13948578, dtype=float32)}
{'loss_inverse': Array(2.8461804e-07, dtype=float32)}


  9%|▊         | 85939/1000000 [4:03:56<29:05:15,  8.73it/s]

{'loss': Array(0.13999294, dtype=float32), 'loss_cross_entropy': Array(0.13126865, dtype=float32)}
{'loss_inverse': Array(0.00010103, dtype=float32)}


  9%|▊         | 85949/1000000 [4:03:58<26:17:55,  9.65it/s]

{'loss': Array(0.1374141, dtype=float32), 'loss_cross_entropy': Array(0.1290075, dtype=float32)}
{'loss_inverse': Array(7.398725e-06, dtype=float32)}


  9%|▊         | 85959/1000000 [4:03:59<31:58:31,  7.94it/s]

{'loss': Array(0.15744984, dtype=float32), 'loss_cross_entropy': Array(0.14833173, dtype=float32)}
{'loss_inverse': Array(4.7553374e-05, dtype=float32)}


  9%|▊         | 85969/1000000 [4:04:01<26:16:48,  9.66it/s]

{'loss': Array(0.15855888, dtype=float32), 'loss_cross_entropy': Array(0.14983532, dtype=float32)}
{'loss_inverse': Array(7.652089e-05, dtype=float32)}


  9%|▊         | 85979/1000000 [4:04:02<25:33:46,  9.93it/s]

{'loss': Array(0.13275672, dtype=float32), 'loss_cross_entropy': Array(0.12538035, dtype=float32)}
{'loss_inverse': Array(2.7771657e-07, dtype=float32)}


  9%|▊         | 85989/1000000 [4:04:04<29:26:45,  8.62it/s]

{'loss': Array(0.16511817, dtype=float32), 'loss_cross_entropy': Array(0.15638945, dtype=float32)}
{'loss_inverse': Array(4.417374e-06, dtype=float32)}


  9%|▊         | 85999/1000000 [4:04:06<25:51:41,  9.82it/s]

{'loss': Array(0.14383712, dtype=float32), 'loss_cross_entropy': Array(0.13561916, dtype=float32)}
{'loss_inverse': Array(9.866327e-06, dtype=float32)}


  9%|▊         | 86009/1000000 [4:04:14<96:50:17,  2.62it/s] 

{'loss': Array(0.1461704, dtype=float32), 'loss_cross_entropy': Array(0.13834207, dtype=float32)}
{'loss_inverse': Array(1.1608448e-05, dtype=float32)}


  9%|▊         | 86019/1000000 [4:04:16<37:05:42,  6.84it/s] 

{'loss': Array(0.14225136, dtype=float32), 'loss_cross_entropy': Array(0.1351541, dtype=float32)}
{'loss_inverse': Array(5.253578e-05, dtype=float32)}


  9%|▊         | 86029/1000000 [4:04:17<39:45:10,  6.39it/s]

{'loss': Array(0.13508283, dtype=float32), 'loss_cross_entropy': Array(0.1275578, dtype=float32)}
{'loss_inverse': Array(8.788273e-06, dtype=float32)}


  9%|▊         | 86039/1000000 [4:04:19<27:35:09,  9.20it/s]

{'loss': Array(0.14724858, dtype=float32), 'loss_cross_entropy': Array(0.1391398, dtype=float32)}
{'loss_inverse': Array(3.755478e-05, dtype=float32)}


  9%|▊         | 86049/1000000 [4:04:20<25:01:22, 10.15it/s]

{'loss': Array(0.13463755, dtype=float32), 'loss_cross_entropy': Array(0.12693062, dtype=float32)}
{'loss_inverse': Array(2.7576692e-07, dtype=float32)}


  9%|▊         | 86059/1000000 [4:04:22<27:42:14,  9.16it/s]

{'loss': Array(0.12915337, dtype=float32), 'loss_cross_entropy': Array(0.12210192, dtype=float32)}
{'loss_inverse': Array(1.06723405e-07, dtype=float32)}


  9%|▊         | 86069/1000000 [4:04:23<24:57:24, 10.17it/s]

{'loss': Array(0.13941355, dtype=float32), 'loss_cross_entropy': Array(0.13067906, dtype=float32)}
{'loss_inverse': Array(3.1477455e-05, dtype=float32)}


  9%|▊         | 86079/1000000 [4:04:25<29:27:51,  8.62it/s]

{'loss': Array(0.15419775, dtype=float32), 'loss_cross_entropy': Array(0.14531039, dtype=float32)}
{'loss_inverse': Array(8.014872e-07, dtype=float32)}


  9%|▊         | 86089/1000000 [4:04:26<25:08:20, 10.10it/s]

{'loss': Array(0.13764326, dtype=float32), 'loss_cross_entropy': Array(0.13012885, dtype=float32)}
{'loss_inverse': Array(2.7926308e-05, dtype=float32)}


  9%|▊         | 86099/1000000 [4:04:28<38:10:39,  6.65it/s]

{'loss': Array(0.13778038, dtype=float32), 'loss_cross_entropy': Array(0.1302794, dtype=float32)}
{'loss_inverse': Array(5.753565e-07, dtype=float32)}


  9%|▊         | 86109/1000000 [4:04:30<27:28:25,  9.24it/s]

{'loss': Array(0.14279677, dtype=float32), 'loss_cross_entropy': Array(0.13476796, dtype=float32)}
{'loss_inverse': Array(9.50891e-05, dtype=float32)}


  9%|▊         | 86119/1000000 [4:04:31<25:36:10,  9.92it/s]

{'loss': Array(0.13597909, dtype=float32), 'loss_cross_entropy': Array(0.12842439, dtype=float32)}
{'loss_inverse': Array(2.8932966e-06, dtype=float32)}


  9%|▊         | 86129/1000000 [4:04:33<29:09:24,  8.71it/s]

{'loss': Array(0.15114288, dtype=float32), 'loss_cross_entropy': Array(0.14310473, dtype=float32)}
{'loss_inverse': Array(7.8248524e-07, dtype=float32)}


  9%|▊         | 86139/1000000 [4:04:34<25:18:46, 10.03it/s]

{'loss': Array(0.17057507, dtype=float32), 'loss_cross_entropy': Array(0.16190407, dtype=float32)}
{'loss_inverse': Array(8.513673e-05, dtype=float32)}


  9%|▊         | 86149/1000000 [4:04:36<31:11:20,  8.14it/s]

{'loss': Array(0.13771674, dtype=float32), 'loss_cross_entropy': Array(0.12988871, dtype=float32)}
{'loss_inverse': Array(3.9987123e-07, dtype=float32)}


  9%|▊         | 86159/1000000 [4:04:37<25:53:00,  9.81it/s]

{'loss': Array(0.13679524, dtype=float32), 'loss_cross_entropy': Array(0.12844837, dtype=float32)}
{'loss_inverse': Array(4.6954347e-07, dtype=float32)}


  9%|▊         | 86169/1000000 [4:04:39<30:55:40,  8.21it/s]

{'loss': Array(0.13258398, dtype=float32), 'loss_cross_entropy': Array(0.12507418, dtype=float32)}
{'loss_inverse': Array(0.00017924, dtype=float32)}


  9%|▊         | 86179/1000000 [4:04:41<26:28:49,  9.59it/s]

{'loss': Array(0.14518516, dtype=float32), 'loss_cross_entropy': Array(0.13686834, dtype=float32)}
{'loss_inverse': Array(2.1597796e-07, dtype=float32)}


  9%|▊         | 86189/1000000 [4:04:42<25:30:07,  9.95it/s]

{'loss': Array(0.13525973, dtype=float32), 'loss_cross_entropy': Array(0.1271575, dtype=float32)}
{'loss_inverse': Array(1.2401129e-05, dtype=float32)}


  9%|▊         | 86199/1000000 [4:04:44<28:12:02,  9.00it/s]

{'loss': Array(0.15099587, dtype=float32), 'loss_cross_entropy': Array(0.14225167, dtype=float32)}
{'loss_inverse': Array(2.9001098e-05, dtype=float32)}


  9%|▊         | 86209/1000000 [4:04:45<25:08:35, 10.10it/s]

{'loss': Array(0.15232074, dtype=float32), 'loss_cross_entropy': Array(0.14340264, dtype=float32)}
{'loss_inverse': Array(0.00024036, dtype=float32)}


  9%|▊         | 86219/1000000 [4:04:47<30:54:16,  8.21it/s]

{'loss': Array(0.16802253, dtype=float32), 'loss_cross_entropy': Array(0.15967432, dtype=float32)}
{'loss_inverse': Array(2.1389076e-06, dtype=float32)}


  9%|▊         | 86229/1000000 [4:04:48<26:08:12,  9.71it/s]

{'loss': Array(0.15256548, dtype=float32), 'loss_cross_entropy': Array(0.1443402, dtype=float32)}
{'loss_inverse': Array(0.00020527, dtype=float32)}


  9%|▊         | 86239/1000000 [4:04:50<34:41:49,  7.32it/s]

{'loss': Array(0.14276297, dtype=float32), 'loss_cross_entropy': Array(0.134809, dtype=float32)}
{'loss_inverse': Array(0.00018567, dtype=float32)}


  9%|▊         | 86249/1000000 [4:04:52<26:19:41,  9.64it/s]

{'loss': Array(0.13492656, dtype=float32), 'loss_cross_entropy': Array(0.12732372, dtype=float32)}
{'loss_inverse': Array(3.3094107e-05, dtype=float32)}


  9%|▊         | 86259/1000000 [4:04:53<24:58:49, 10.16it/s]

{'loss': Array(0.13820942, dtype=float32), 'loss_cross_entropy': Array(0.12994008, dtype=float32)}
{'loss_inverse': Array(9.450425e-06, dtype=float32)}


  9%|▊         | 86269/1000000 [4:04:55<29:31:08,  8.60it/s]

{'loss': Array(0.14444743, dtype=float32), 'loss_cross_entropy': Array(0.13631049, dtype=float32)}
{'loss_inverse': Array(3.2744345e-05, dtype=float32)}


  9%|▊         | 86279/1000000 [4:04:56<25:11:34, 10.07it/s]

{'loss': Array(0.14314742, dtype=float32), 'loss_cross_entropy': Array(0.13498554, dtype=float32)}
{'loss_inverse': Array(3.7717517e-05, dtype=float32)}


  9%|▊         | 86289/1000000 [4:04:58<33:12:28,  7.64it/s]

{'loss': Array(0.13711862, dtype=float32), 'loss_cross_entropy': Array(0.12969507, dtype=float32)}
{'loss_inverse': Array(1.6964608e-05, dtype=float32)}


  9%|▊         | 86299/1000000 [4:04:59<26:25:10,  9.61it/s]

{'loss': Array(0.1261699, dtype=float32), 'loss_cross_entropy': Array(0.11803854, dtype=float32)}
{'loss_inverse': Array(7.178181e-06, dtype=float32)}


  9%|▊         | 86309/1000000 [4:05:01<35:01:01,  7.25it/s]

{'loss': Array(0.14243524, dtype=float32), 'loss_cross_entropy': Array(0.13435173, dtype=float32)}
{'loss_inverse': Array(5.6491117e-06, dtype=float32)}


  9%|▊         | 86319/1000000 [4:05:03<26:49:21,  9.46it/s]

{'loss': Array(0.14792697, dtype=float32), 'loss_cross_entropy': Array(0.1397808, dtype=float32)}
{'loss_inverse': Array(1.037551e-06, dtype=float32)}


  9%|▊         | 86329/1000000 [4:05:04<25:16:41, 10.04it/s]

{'loss': Array(0.16293101, dtype=float32), 'loss_cross_entropy': Array(0.15376283, dtype=float32)}
{'loss_inverse': Array(3.11235e-07, dtype=float32)}


  9%|▊         | 86339/1000000 [4:05:06<27:58:19,  9.07it/s]

{'loss': Array(0.15079716, dtype=float32), 'loss_cross_entropy': Array(0.14243355, dtype=float32)}
{'loss_inverse': Array(2.8997358e-06, dtype=float32)}


  9%|▊         | 86349/1000000 [4:05:07<26:03:58,  9.74it/s]

{'loss': Array(0.12985168, dtype=float32), 'loss_cross_entropy': Array(0.12290519, dtype=float32)}
{'loss_inverse': Array(7.4563235e-07, dtype=float32)}


  9%|▊         | 86359/1000000 [4:05:09<38:25:17,  6.61it/s]

{'loss': Array(0.14285578, dtype=float32), 'loss_cross_entropy': Array(0.13495228, dtype=float32)}
{'loss_inverse': Array(3.4289173e-05, dtype=float32)}


  9%|▊         | 86369/1000000 [4:05:10<27:07:44,  9.35it/s]

{'loss': Array(0.13104278, dtype=float32), 'loss_cross_entropy': Array(0.123492, dtype=float32)}
{'loss_inverse': Array(0.00034992, dtype=float32)}


  9%|▊         | 86379/1000000 [4:05:12<25:13:03, 10.06it/s]

{'loss': Array(0.14750959, dtype=float32), 'loss_cross_entropy': Array(0.13944696, dtype=float32)}
{'loss_inverse': Array(2.379334e-05, dtype=float32)}


  9%|▊         | 86389/1000000 [4:05:14<29:16:41,  8.67it/s]

{'loss': Array(0.13828577, dtype=float32), 'loss_cross_entropy': Array(0.1298353, dtype=float32)}
{'loss_inverse': Array(9.190369e-05, dtype=float32)}


  9%|▊         | 86399/1000000 [4:05:15<25:34:09,  9.93it/s]

{'loss': Array(0.13348858, dtype=float32), 'loss_cross_entropy': Array(0.12620299, dtype=float32)}
{'loss_inverse': Array(7.947202e-05, dtype=float32)}


  9%|▊         | 86409/1000000 [4:05:17<29:14:03,  8.68it/s]

{'loss': Array(0.13301747, dtype=float32), 'loss_cross_entropy': Array(0.12571786, dtype=float32)}
{'loss_inverse': Array(9.97428e-05, dtype=float32)}


  9%|▊         | 86419/1000000 [4:05:18<25:59:28,  9.76it/s]

{'loss': Array(0.15042843, dtype=float32), 'loss_cross_entropy': Array(0.14221066, dtype=float32)}
{'loss_inverse': Array(0.0001072, dtype=float32)}


  9%|▊         | 86429/1000000 [4:05:20<31:23:11,  8.09it/s]

{'loss': Array(0.14501333, dtype=float32), 'loss_cross_entropy': Array(0.13685955, dtype=float32)}
{'loss_inverse': Array(5.063168e-07, dtype=float32)}


  9%|▊         | 86439/1000000 [4:05:21<25:48:35,  9.83it/s]

{'loss': Array(0.13738666, dtype=float32), 'loss_cross_entropy': Array(0.1295626, dtype=float32)}
{'loss_inverse': Array(0.00015902, dtype=float32)}


  9%|▊         | 86449/1000000 [4:05:23<25:13:29, 10.06it/s]

{'loss': Array(0.14440854, dtype=float32), 'loss_cross_entropy': Array(0.1360993, dtype=float32)}
{'loss_inverse': Array(0.0001563, dtype=float32)}


  9%|▊         | 86459/1000000 [4:05:25<29:21:49,  8.64it/s]

{'loss': Array(0.1356854, dtype=float32), 'loss_cross_entropy': Array(0.12768303, dtype=float32)}
{'loss_inverse': Array(4.1895387e-06, dtype=float32)}


  9%|▊         | 86469/1000000 [4:05:26<25:13:20, 10.06it/s]

{'loss': Array(0.15810071, dtype=float32), 'loss_cross_entropy': Array(0.14983322, dtype=float32)}
{'loss_inverse': Array(1.731526e-05, dtype=float32)}


  9%|▊         | 86479/1000000 [4:05:28<33:42:54,  7.53it/s]

{'loss': Array(0.14363757, dtype=float32), 'loss_cross_entropy': Array(0.13516265, dtype=float32)}
{'loss_inverse': Array(0.00023778, dtype=float32)}


  9%|▊         | 86489/1000000 [4:05:29<26:03:34,  9.74it/s]

{'loss': Array(0.1532653, dtype=float32), 'loss_cross_entropy': Array(0.14464797, dtype=float32)}
{'loss_inverse': Array(2.9775634e-05, dtype=float32)}


  9%|▊         | 86499/1000000 [4:05:31<38:03:07,  6.67it/s]

{'loss': Array(0.16665606, dtype=float32), 'loss_cross_entropy': Array(0.15718661, dtype=float32)}
{'loss_inverse': Array(2.9031585e-06, dtype=float32)}


  9%|▊         | 86509/1000000 [4:05:39<86:32:12,  2.93it/s] 

{'loss': Array(0.13566874, dtype=float32), 'loss_cross_entropy': Array(0.12771758, dtype=float32)}
{'loss_inverse': Array(0.00018064, dtype=float32)}


  9%|▊         | 86519/1000000 [4:05:40<35:31:41,  7.14it/s]

{'loss': Array(0.1291257, dtype=float32), 'loss_cross_entropy': Array(0.12206105, dtype=float32)}
{'loss_inverse': Array(3.9539646e-05, dtype=float32)}


  9%|▊         | 86529/1000000 [4:05:42<31:02:45,  8.17it/s]

{'loss': Array(0.14320335, dtype=float32), 'loss_cross_entropy': Array(0.13525157, dtype=float32)}
{'loss_inverse': Array(2.5296075e-07, dtype=float32)}


  9%|▊         | 86539/1000000 [4:05:43<26:53:46,  9.43it/s]

{'loss': Array(0.13541718, dtype=float32), 'loss_cross_entropy': Array(0.12859611, dtype=float32)}
{'loss_inverse': Array(7.2895244e-05, dtype=float32)}


  9%|▊         | 86549/1000000 [4:05:45<31:28:21,  8.06it/s]

{'loss': Array(0.14210941, dtype=float32), 'loss_cross_entropy': Array(0.1340385, dtype=float32)}
{'loss_inverse': Array(1.3978841e-05, dtype=float32)}


  9%|▊         | 86559/1000000 [4:05:47<26:31:59,  9.56it/s]

{'loss': Array(0.14442703, dtype=float32), 'loss_cross_entropy': Array(0.13628809, dtype=float32)}
{'loss_inverse': Array(8.8704167e-07, dtype=float32)}


  9%|▊         | 86569/1000000 [4:05:48<25:23:14,  9.99it/s]

{'loss': Array(0.15067153, dtype=float32), 'loss_cross_entropy': Array(0.14198956, dtype=float32)}
{'loss_inverse': Array(3.037754e-06, dtype=float32)}


  9%|▊         | 86579/1000000 [4:05:50<28:09:49,  9.01it/s]

{'loss': Array(0.14034109, dtype=float32), 'loss_cross_entropy': Array(0.13221884, dtype=float32)}
{'loss_inverse': Array(6.393534e-05, dtype=float32)}


  9%|▊         | 86589/1000000 [4:05:51<25:02:25, 10.13it/s]

{'loss': Array(0.1342678, dtype=float32), 'loss_cross_entropy': Array(0.12671864, dtype=float32)}
{'loss_inverse': Array(7.4706527e-06, dtype=float32)}


  9%|▊         | 86599/1000000 [4:05:53<30:48:15,  8.24it/s]

{'loss': Array(0.13767023, dtype=float32), 'loss_cross_entropy': Array(0.13013335, dtype=float32)}
{'loss_inverse': Array(1.0536251e-05, dtype=float32)}


  9%|▊         | 86609/1000000 [4:05:54<25:41:10,  9.88it/s]

{'loss': Array(0.14549215, dtype=float32), 'loss_cross_entropy': Array(0.13760921, dtype=float32)}
{'loss_inverse': Array(2.4470324e-05, dtype=float32)}


  9%|▊         | 86619/1000000 [4:05:56<35:13:34,  7.20it/s]

{'loss': Array(0.14546727, dtype=float32), 'loss_cross_entropy': Array(0.13731745, dtype=float32)}
{'loss_inverse': Array(0.00010318, dtype=float32)}


  9%|▊         | 86629/1000000 [4:05:58<26:16:09,  9.66it/s]

{'loss': Array(0.15161653, dtype=float32), 'loss_cross_entropy': Array(0.14302157, dtype=float32)}
{'loss_inverse': Array(2.7566728e-06, dtype=float32)}


  9%|▊         | 86639/1000000 [4:05:59<25:28:08,  9.96it/s]

{'loss': Array(0.13832931, dtype=float32), 'loss_cross_entropy': Array(0.13069181, dtype=float32)}
{'loss_inverse': Array(1.4406799e-05, dtype=float32)}


  9%|▊         | 86649/1000000 [4:06:01<30:25:42,  8.34it/s]

{'loss': Array(0.14630471, dtype=float32), 'loss_cross_entropy': Array(0.13768707, dtype=float32)}
{'loss_inverse': Array(5.1096316e-07, dtype=float32)}


  9%|▊         | 86659/1000000 [4:06:02<25:31:56,  9.94it/s]

{'loss': Array(0.13402015, dtype=float32), 'loss_cross_entropy': Array(0.12650585, dtype=float32)}
{'loss_inverse': Array(1.3077376e-05, dtype=float32)}


  9%|▊         | 86669/1000000 [4:06:04<34:31:51,  7.35it/s]

{'loss': Array(0.13239518, dtype=float32), 'loss_cross_entropy': Array(0.12419078, dtype=float32)}
{'loss_inverse': Array(0.00086986, dtype=float32)}


  9%|▊         | 86679/1000000 [4:06:06<26:39:28,  9.52it/s]

{'loss': Array(0.13948607, dtype=float32), 'loss_cross_entropy': Array(0.13203822, dtype=float32)}
{'loss_inverse': Array(6.1119016e-07, dtype=float32)}


  9%|▊         | 86689/1000000 [4:06:07<35:01:01,  7.24it/s]

{'loss': Array(0.12794752, dtype=float32), 'loss_cross_entropy': Array(0.1201307, dtype=float32)}
{'loss_inverse': Array(1.3546722e-06, dtype=float32)}


  9%|▊         | 86699/1000000 [4:06:09<27:26:35,  9.24it/s]

{'loss': Array(0.1385278, dtype=float32), 'loss_cross_entropy': Array(0.13082086, dtype=float32)}
{'loss_inverse': Array(2.6048796e-05, dtype=float32)}


  9%|▊         | 86709/1000000 [4:06:10<25:04:41, 10.12it/s]

{'loss': Array(0.13676178, dtype=float32), 'loss_cross_entropy': Array(0.1288089, dtype=float32)}
{'loss_inverse': Array(2.372706e-06, dtype=float32)}


  9%|▊         | 86719/1000000 [4:06:12<27:34:35,  9.20it/s]

{'loss': Array(0.13266058, dtype=float32), 'loss_cross_entropy': Array(0.12490951, dtype=float32)}
{'loss_inverse': Array(2.1535773e-06, dtype=float32)}


  9%|▊         | 86729/1000000 [4:06:13<26:03:27,  9.74it/s]

{'loss': Array(0.13768546, dtype=float32), 'loss_cross_entropy': Array(0.12950714, dtype=float32)}
{'loss_inverse': Array(1.494543e-05, dtype=float32)}


  9%|▊         | 86739/1000000 [4:06:15<37:20:40,  6.79it/s]

{'loss': Array(0.1482252, dtype=float32), 'loss_cross_entropy': Array(0.14021045, dtype=float32)}
{'loss_inverse': Array(0.00022115, dtype=float32)}


  9%|▊         | 86749/1000000 [4:06:16<26:31:20,  9.56it/s]

{'loss': Array(0.13796468, dtype=float32), 'loss_cross_entropy': Array(0.13040906, dtype=float32)}
{'loss_inverse': Array(4.3432297e-05, dtype=float32)}


  9%|▊         | 86759/1000000 [4:06:18<24:51:07, 10.21it/s]

{'loss': Array(0.15458466, dtype=float32), 'loss_cross_entropy': Array(0.14608249, dtype=float32)}
{'loss_inverse': Array(0.00023509, dtype=float32)}


  9%|▊         | 86769/1000000 [4:06:20<29:21:16,  8.64it/s]

{'loss': Array(0.12555125, dtype=float32), 'loss_cross_entropy': Array(0.11824068, dtype=float32)}
{'loss_inverse': Array(0.00017606, dtype=float32)}


  9%|▊         | 86779/1000000 [4:06:21<25:43:22,  9.86it/s]

{'loss': Array(0.14033829, dtype=float32), 'loss_cross_entropy': Array(0.13206528, dtype=float32)}
{'loss_inverse': Array(0.00041824, dtype=float32)}


  9%|▊         | 86789/1000000 [4:06:23<29:17:46,  8.66it/s]

{'loss': Array(0.13881266, dtype=float32), 'loss_cross_entropy': Array(0.13111527, dtype=float32)}
{'loss_inverse': Array(0.00016959, dtype=float32)}


  9%|▊         | 86799/1000000 [4:06:24<25:48:11,  9.83it/s]

{'loss': Array(0.15009867, dtype=float32), 'loss_cross_entropy': Array(0.14173314, dtype=float32)}
{'loss_inverse': Array(0.00012147, dtype=float32)}


  9%|▊         | 86809/1000000 [4:06:26<32:11:17,  7.88it/s]

{'loss': Array(0.13558775, dtype=float32), 'loss_cross_entropy': Array(0.12860598, dtype=float32)}
{'loss_inverse': Array(0.00014219, dtype=float32)}


  9%|▊         | 86819/1000000 [4:06:27<25:42:21,  9.87it/s]

{'loss': Array(0.14277457, dtype=float32), 'loss_cross_entropy': Array(0.1349601, dtype=float32)}
{'loss_inverse': Array(7.975764e-05, dtype=float32)}


  9%|▊         | 86829/1000000 [4:06:29<25:13:37, 10.05it/s]

{'loss': Array(0.15130572, dtype=float32), 'loss_cross_entropy': Array(0.14258395, dtype=float32)}
{'loss_inverse': Array(0.00013903, dtype=float32)}


  9%|▊         | 86839/1000000 [4:06:31<29:23:33,  8.63it/s]

{'loss': Array(0.12979542, dtype=float32), 'loss_cross_entropy': Array(0.12242778, dtype=float32)}
{'loss_inverse': Array(9.987e-06, dtype=float32)}


  9%|▊         | 86849/1000000 [4:06:32<25:16:43, 10.03it/s]

{'loss': Array(0.11714061, dtype=float32), 'loss_cross_entropy': Array(0.10998078, dtype=float32)}
{'loss_inverse': Array(0.00067131, dtype=float32)}


  9%|▊         | 86859/1000000 [4:06:34<33:45:22,  7.51it/s]

{'loss': Array(0.16084711, dtype=float32), 'loss_cross_entropy': Array(0.15229562, dtype=float32)}
{'loss_inverse': Array(5.810217e-05, dtype=float32)}


  9%|▊         | 86869/1000000 [4:06:35<26:23:56,  9.61it/s]

{'loss': Array(0.1514799, dtype=float32), 'loss_cross_entropy': Array(0.14354807, dtype=float32)}
{'loss_inverse': Array(0.00043201, dtype=float32)}


  9%|▊         | 86879/1000000 [4:06:37<37:48:50,  6.71it/s]

{'loss': Array(0.13464981, dtype=float32), 'loss_cross_entropy': Array(0.12679106, dtype=float32)}
{'loss_inverse': Array(0.00020651, dtype=float32)}


  9%|▊         | 86889/1000000 [4:06:38<26:52:36,  9.44it/s]

{'loss': Array(0.15656282, dtype=float32), 'loss_cross_entropy': Array(0.14733616, dtype=float32)}
{'loss_inverse': Array(3.4771125e-05, dtype=float32)}


  9%|▊         | 86899/1000000 [4:06:40<37:51:17,  6.70it/s]

{'loss': Array(0.14792398, dtype=float32), 'loss_cross_entropy': Array(0.13918018, dtype=float32)}
{'loss_inverse': Array(1.4862492e-05, dtype=float32)}


  9%|▊         | 86909/1000000 [4:06:41<26:58:15,  9.40it/s]

{'loss': Array(0.12969033, dtype=float32), 'loss_cross_entropy': Array(0.12215491, dtype=float32)}
{'loss_inverse': Array(9.645393e-06, dtype=float32)}


  9%|▊         | 86919/1000000 [4:06:43<25:00:53, 10.14it/s]

{'loss': Array(0.14779432, dtype=float32), 'loss_cross_entropy': Array(0.13989715, dtype=float32)}
{'loss_inverse': Array(1.0873349e-05, dtype=float32)}


  9%|▊         | 86929/1000000 [4:06:45<33:27:24,  7.58it/s]

{'loss': Array(0.1368503, dtype=float32), 'loss_cross_entropy': Array(0.12942062, dtype=float32)}
{'loss_inverse': Array(3.1563704e-05, dtype=float32)}


  9%|▊         | 86939/1000000 [4:06:46<26:31:55,  9.56it/s]

{'loss': Array(0.1463174, dtype=float32), 'loss_cross_entropy': Array(0.13804333, dtype=float32)}
{'loss_inverse': Array(5.8071004e-07, dtype=float32)}


  9%|▊         | 86949/1000000 [4:06:48<38:29:50,  6.59it/s]

{'loss': Array(0.1530975, dtype=float32), 'loss_cross_entropy': Array(0.14504932, dtype=float32)}
{'loss_inverse': Array(4.147276e-06, dtype=float32)}


  9%|▊         | 86959/1000000 [4:06:49<27:01:44,  9.38it/s]

{'loss': Array(0.15175544, dtype=float32), 'loss_cross_entropy': Array(0.14306088, dtype=float32)}
{'loss_inverse': Array(2.304805e-05, dtype=float32)}


  9%|▊         | 86969/1000000 [4:06:51<25:07:44, 10.09it/s]

{'loss': Array(0.13768044, dtype=float32), 'loss_cross_entropy': Array(0.13009806, dtype=float32)}
{'loss_inverse': Array(9.061148e-05, dtype=float32)}


  9%|▊         | 86979/1000000 [4:06:52<27:55:59,  9.08it/s]

{'loss': Array(0.14315926, dtype=float32), 'loss_cross_entropy': Array(0.1352744, dtype=float32)}
{'loss_inverse': Array(6.881453e-05, dtype=float32)}


  9%|▊         | 86989/1000000 [4:06:54<25:44:05,  9.85it/s]

{'loss': Array(0.14489017, dtype=float32), 'loss_cross_entropy': Array(0.13676141, dtype=float32)}
{'loss_inverse': Array(0.00013909, dtype=float32)}


  9%|▊         | 86999/1000000 [4:06:56<33:39:30,  7.53it/s]

{'loss': Array(0.15639113, dtype=float32), 'loss_cross_entropy': Array(0.14823157, dtype=float32)}
{'loss_inverse': Array(0.0003031, dtype=float32)}


  9%|▊         | 87009/1000000 [4:07:04<87:53:39,  2.89it/s] 

{'loss': Array(0.1416267, dtype=float32), 'loss_cross_entropy': Array(0.13379486, dtype=float32)}
{'loss_inverse': Array(0.00013558, dtype=float32)}


  9%|▊         | 87019/1000000 [4:07:06<41:58:20,  6.04it/s]

{'loss': Array(0.14083414, dtype=float32), 'loss_cross_entropy': Array(0.13379793, dtype=float32)}
{'loss_inverse': Array(1.153873e-06, dtype=float32)}


  9%|▊         | 87029/1000000 [4:07:07<28:40:57,  8.84it/s]

{'loss': Array(0.13134892, dtype=float32), 'loss_cross_entropy': Array(0.12416225, dtype=float32)}
{'loss_inverse': Array(2.6241909e-05, dtype=float32)}


  9%|▊         | 87039/1000000 [4:07:09<39:04:22,  6.49it/s]

{'loss': Array(0.13869022, dtype=float32), 'loss_cross_entropy': Array(0.13038246, dtype=float32)}
{'loss_inverse': Array(6.6228074e-07, dtype=float32)}


  9%|▊         | 87049/1000000 [4:07:10<27:18:23,  9.29it/s]

{'loss': Array(0.145548, dtype=float32), 'loss_cross_entropy': Array(0.13701008, dtype=float32)}
{'loss_inverse': Array(8.910395e-05, dtype=float32)}


  9%|▊         | 87059/1000000 [4:07:12<38:18:12,  6.62it/s]

{'loss': Array(0.14577582, dtype=float32), 'loss_cross_entropy': Array(0.13836418, dtype=float32)}
{'loss_inverse': Array(0.0001804, dtype=float32)}


  9%|▊         | 87069/1000000 [4:07:13<27:05:58,  9.36it/s]

{'loss': Array(0.13950455, dtype=float32), 'loss_cross_entropy': Array(0.1322081, dtype=float32)}
{'loss_inverse': Array(1.9593715e-06, dtype=float32)}


  9%|▊         | 87079/1000000 [4:07:15<25:27:44,  9.96it/s]

{'loss': Array(0.15871243, dtype=float32), 'loss_cross_entropy': Array(0.15011297, dtype=float32)}
{'loss_inverse': Array(0.00024488, dtype=float32)}


  9%|▊         | 87089/1000000 [4:07:17<33:35:24,  7.55it/s]

{'loss': Array(0.12066676, dtype=float32), 'loss_cross_entropy': Array(0.11386068, dtype=float32)}
{'loss_inverse': Array(3.4990449e-06, dtype=float32)}


  9%|▊         | 87099/1000000 [4:07:18<25:58:31,  9.76it/s]

{'loss': Array(0.13728762, dtype=float32), 'loss_cross_entropy': Array(0.12938647, dtype=float32)}
{'loss_inverse': Array(2.816038e-06, dtype=float32)}


  9%|▊         | 87109/1000000 [4:07:19<25:17:58, 10.02it/s]

{'loss': Array(0.12953492, dtype=float32), 'loss_cross_entropy': Array(0.12140729, dtype=float32)}
{'loss_inverse': Array(1.7154589e-05, dtype=float32)}


  9%|▊         | 87119/1000000 [4:07:21<28:17:39,  8.96it/s]

{'loss': Array(0.14825012, dtype=float32), 'loss_cross_entropy': Array(0.1400538, dtype=float32)}
{'loss_inverse': Array(3.3554315e-06, dtype=float32)}


  9%|▊         | 87129/1000000 [4:07:23<25:37:26,  9.90it/s]

{'loss': Array(0.14659339, dtype=float32), 'loss_cross_entropy': Array(0.1385096, dtype=float32)}
{'loss_inverse': Array(0.00011823, dtype=float32)}


  9%|▊         | 87139/1000000 [4:07:24<31:01:12,  8.17it/s]

{'loss': Array(0.13229133, dtype=float32), 'loss_cross_entropy': Array(0.1249926, dtype=float32)}
{'loss_inverse': Array(1.5547295e-05, dtype=float32)}


  9%|▊         | 87149/1000000 [4:07:26<26:06:20,  9.71it/s]

{'loss': Array(0.15157671, dtype=float32), 'loss_cross_entropy': Array(0.14314055, dtype=float32)}
{'loss_inverse': Array(7.1382675e-07, dtype=float32)}


  9%|▊         | 87159/1000000 [4:07:28<33:51:26,  7.49it/s]

{'loss': Array(0.1324264, dtype=float32), 'loss_cross_entropy': Array(0.12437518, dtype=float32)}
{'loss_inverse': Array(7.275264e-05, dtype=float32)}


  9%|▊         | 87169/1000000 [4:07:29<26:09:26,  9.69it/s]

{'loss': Array(0.12947915, dtype=float32), 'loss_cross_entropy': Array(0.12205179, dtype=float32)}
{'loss_inverse': Array(0.00026884, dtype=float32)}


  9%|▊         | 87179/1000000 [4:07:31<34:16:16,  7.40it/s]

{'loss': Array(0.16618009, dtype=float32), 'loss_cross_entropy': Array(0.15698978, dtype=float32)}
{'loss_inverse': Array(0.00013913, dtype=float32)}


  9%|▊         | 87189/1000000 [4:07:32<27:20:13,  9.28it/s]

{'loss': Array(0.14380632, dtype=float32), 'loss_cross_entropy': Array(0.13586271, dtype=float32)}
{'loss_inverse': Array(3.841731e-05, dtype=float32)}


  9%|▊         | 87199/1000000 [4:07:34<24:59:30, 10.15it/s]

{'loss': Array(0.15016843, dtype=float32), 'loss_cross_entropy': Array(0.1417331, dtype=float32)}
{'loss_inverse': Array(3.4853154e-05, dtype=float32)}


  9%|▊         | 87209/1000000 [4:07:35<30:59:00,  8.18it/s]

{'loss': Array(0.1533951, dtype=float32), 'loss_cross_entropy': Array(0.1449611, dtype=float32)}
{'loss_inverse': Array(2.2663087e-05, dtype=float32)}


  9%|▊         | 87219/1000000 [4:07:37<26:05:35,  9.72it/s]

{'loss': Array(0.14008528, dtype=float32), 'loss_cross_entropy': Array(0.13263302, dtype=float32)}
{'loss_inverse': Array(1.15066335e-07, dtype=float32)}


  9%|▊         | 87229/1000000 [4:07:39<34:26:16,  7.36it/s]

{'loss': Array(0.14976726, dtype=float32), 'loss_cross_entropy': Array(0.14191803, dtype=float32)}
{'loss_inverse': Array(4.4688048e-05, dtype=float32)}


  9%|▊         | 87239/1000000 [4:07:40<28:17:06,  8.96it/s]

{'loss': Array(0.14820215, dtype=float32), 'loss_cross_entropy': Array(0.13991784, dtype=float32)}
{'loss_inverse': Array(5.8925466e-06, dtype=float32)}


  9%|▊         | 87249/1000000 [4:07:42<38:12:04,  6.64it/s]

{'loss': Array(0.1434497, dtype=float32), 'loss_cross_entropy': Array(0.13534367, dtype=float32)}
{'loss_inverse': Array(8.768956e-07, dtype=float32)}


  9%|▊         | 87259/1000000 [4:07:43<26:59:09,  9.40it/s]

{'loss': Array(0.13889666, dtype=float32), 'loss_cross_entropy': Array(0.1315748, dtype=float32)}
{'loss_inverse': Array(1.3137263e-06, dtype=float32)}


  9%|▊         | 87269/1000000 [4:07:45<25:07:33, 10.09it/s]

{'loss': Array(0.14236288, dtype=float32), 'loss_cross_entropy': Array(0.13436759, dtype=float32)}
{'loss_inverse': Array(8.687065e-05, dtype=float32)}


  9%|▊         | 87279/1000000 [4:07:46<28:54:23,  8.77it/s]

{'loss': Array(0.15766753, dtype=float32), 'loss_cross_entropy': Array(0.14932504, dtype=float32)}
{'loss_inverse': Array(2.770929e-06, dtype=float32)}


  9%|▊         | 87289/1000000 [4:07:48<25:44:40,  9.85it/s]

{'loss': Array(0.16198352, dtype=float32), 'loss_cross_entropy': Array(0.15297346, dtype=float32)}
{'loss_inverse': Array(2.4161616e-06, dtype=float32)}


  9%|▊         | 87299/1000000 [4:07:50<34:13:16,  7.41it/s]

{'loss': Array(0.14079116, dtype=float32), 'loss_cross_entropy': Array(0.13309279, dtype=float32)}
{'loss_inverse': Array(1.8405535e-05, dtype=float32)}


  9%|▊         | 87309/1000000 [4:07:51<26:36:31,  9.53it/s]

{'loss': Array(0.11824507, dtype=float32), 'loss_cross_entropy': Array(0.11125087, dtype=float32)}
{'loss_inverse': Array(4.854896e-06, dtype=float32)}


  9%|▊         | 87319/1000000 [4:07:52<25:07:26, 10.09it/s]

{'loss': Array(0.15848184, dtype=float32), 'loss_cross_entropy': Array(0.15000121, dtype=float32)}
{'loss_inverse': Array(5.6629683e-06, dtype=float32)}


  9%|▊         | 87329/1000000 [4:07:54<28:38:07,  8.85it/s]

{'loss': Array(0.132024, dtype=float32), 'loss_cross_entropy': Array(0.12383868, dtype=float32)}
{'loss_inverse': Array(0.00023809, dtype=float32)}


  9%|▊         | 87339/1000000 [4:07:56<26:20:33,  9.62it/s]

{'loss': Array(0.15184642, dtype=float32), 'loss_cross_entropy': Array(0.1435765, dtype=float32)}
{'loss_inverse': Array(1.1589114e-05, dtype=float32)}


  9%|▊         | 87349/1000000 [4:07:58<28:22:35,  8.93it/s]

{'loss': Array(0.13309497, dtype=float32), 'loss_cross_entropy': Array(0.12521628, dtype=float32)}
{'loss_inverse': Array(8.1780645e-06, dtype=float32)}


  9%|▊         | 87359/1000000 [4:07:59<25:23:13,  9.99it/s]

{'loss': Array(0.11990664, dtype=float32), 'loss_cross_entropy': Array(0.11263161, dtype=float32)}
{'loss_inverse': Array(1.6662023e-07, dtype=float32)}


  9%|▊         | 87369/1000000 [4:08:01<27:43:10,  9.15it/s]

{'loss': Array(0.16068485, dtype=float32), 'loss_cross_entropy': Array(0.15176621, dtype=float32)}
{'loss_inverse': Array(5.0712856e-06, dtype=float32)}


  9%|▊         | 87379/1000000 [4:08:02<25:33:12,  9.92it/s]

{'loss': Array(0.13797824, dtype=float32), 'loss_cross_entropy': Array(0.13034736, dtype=float32)}
{'loss_inverse': Array(3.9138333e-05, dtype=float32)}


  9%|▊         | 87389/1000000 [4:08:04<29:28:50,  8.60it/s]

{'loss': Array(0.13318671, dtype=float32), 'loss_cross_entropy': Array(0.12542976, dtype=float32)}
{'loss_inverse': Array(3.6804456e-06, dtype=float32)}


  9%|▊         | 87399/1000000 [4:08:05<26:11:45,  9.68it/s]

{'loss': Array(0.13450311, dtype=float32), 'loss_cross_entropy': Array(0.12680787, dtype=float32)}
{'loss_inverse': Array(6.5927095e-05, dtype=float32)}


  9%|▊         | 87409/1000000 [4:08:07<33:05:09,  7.66it/s]

{'loss': Array(0.15729065, dtype=float32), 'loss_cross_entropy': Array(0.14838733, dtype=float32)}
{'loss_inverse': Array(8.1637576e-05, dtype=float32)}


  9%|▊         | 87419/1000000 [4:08:09<27:34:10,  9.19it/s]

{'loss': Array(0.14765742, dtype=float32), 'loss_cross_entropy': Array(0.13926956, dtype=float32)}
{'loss_inverse': Array(0.00011171, dtype=float32)}


  9%|▊         | 87429/1000000 [4:08:10<34:19:53,  7.38it/s]

{'loss': Array(0.15406288, dtype=float32), 'loss_cross_entropy': Array(0.14552549, dtype=float32)}
{'loss_inverse': Array(3.5055702e-06, dtype=float32)}


  9%|▊         | 87439/1000000 [4:08:12<26:08:55,  9.69it/s]

{'loss': Array(0.1389051, dtype=float32), 'loss_cross_entropy': Array(0.1312187, dtype=float32)}
{'loss_inverse': Array(9.57208e-07, dtype=float32)}


  9%|▊         | 87449/1000000 [4:08:13<24:47:59, 10.22it/s]

{'loss': Array(0.1358596, dtype=float32), 'loss_cross_entropy': Array(0.12891825, dtype=float32)}
{'loss_inverse': Array(6.12322e-06, dtype=float32)}


  9%|▊         | 87459/1000000 [4:08:15<29:51:54,  8.49it/s]

{'loss': Array(0.14993238, dtype=float32), 'loss_cross_entropy': Array(0.14162934, dtype=float32)}
{'loss_inverse': Array(1.06682955e-05, dtype=float32)}


  9%|▊         | 87469/1000000 [4:08:16<25:33:48,  9.92it/s]

{'loss': Array(0.13758014, dtype=float32), 'loss_cross_entropy': Array(0.13025214, dtype=float32)}
{'loss_inverse': Array(6.0262555e-05, dtype=float32)}


  9%|▊         | 87479/1000000 [4:08:18<33:42:10,  7.52it/s]

{'loss': Array(0.14071733, dtype=float32), 'loss_cross_entropy': Array(0.133127, dtype=float32)}
{'loss_inverse': Array(0.00051644, dtype=float32)}


  9%|▊         | 87489/1000000 [4:08:20<27:40:33,  9.16it/s]

{'loss': Array(0.1388231, dtype=float32), 'loss_cross_entropy': Array(0.13035098, dtype=float32)}
{'loss_inverse': Array(3.194296e-05, dtype=float32)}


  9%|▊         | 87499/1000000 [4:08:21<35:09:31,  7.21it/s]

{'loss': Array(0.14209406, dtype=float32), 'loss_cross_entropy': Array(0.13400367, dtype=float32)}
{'loss_inverse': Array(0.00026643, dtype=float32)}


  9%|▉         | 87509/1000000 [4:08:29<86:08:23,  2.94it/s] 

{'loss': Array(0.14649943, dtype=float32), 'loss_cross_entropy': Array(0.13869281, dtype=float32)}
{'loss_inverse': Array(4.3099342e-05, dtype=float32)}


  9%|▉         | 87519/1000000 [4:08:31<43:47:29,  5.79it/s]

{'loss': Array(0.13816296, dtype=float32), 'loss_cross_entropy': Array(0.13047674, dtype=float32)}
{'loss_inverse': Array(1.8984667e-06, dtype=float32)}


  9%|▉         | 87529/1000000 [4:08:32<27:48:02,  9.12it/s]

{'loss': Array(0.13420205, dtype=float32), 'loss_cross_entropy': Array(0.12662582, dtype=float32)}
{'loss_inverse': Array(7.998133e-05, dtype=float32)}


  9%|▉         | 87539/1000000 [4:08:34<24:58:42, 10.15it/s]

{'loss': Array(0.1369474, dtype=float32), 'loss_cross_entropy': Array(0.1286979, dtype=float32)}
{'loss_inverse': Array(2.8761931e-05, dtype=float32)}


  9%|▉         | 87549/1000000 [4:08:36<30:52:34,  8.21it/s]

{'loss': Array(0.15887089, dtype=float32), 'loss_cross_entropy': Array(0.15052812, dtype=float32)}
{'loss_inverse': Array(2.1108292e-06, dtype=float32)}


  9%|▉         | 87559/1000000 [4:08:37<25:33:59,  9.91it/s]

{'loss': Array(0.13505249, dtype=float32), 'loss_cross_entropy': Array(0.12741737, dtype=float32)}
{'loss_inverse': Array(3.6011777e-05, dtype=float32)}


  9%|▉         | 87569/1000000 [4:08:39<34:43:32,  7.30it/s]

{'loss': Array(0.13123989, dtype=float32), 'loss_cross_entropy': Array(0.12379016, dtype=float32)}
{'loss_inverse': Array(0.00034068, dtype=float32)}


  9%|▉         | 87579/1000000 [4:08:40<26:08:01,  9.70it/s]

{'loss': Array(0.12066229, dtype=float32), 'loss_cross_entropy': Array(0.11331216, dtype=float32)}
{'loss_inverse': Array(1.3412766e-06, dtype=float32)}


  9%|▉         | 87589/1000000 [4:08:42<37:29:38,  6.76it/s]

{'loss': Array(0.12939598, dtype=float32), 'loss_cross_entropy': Array(0.12158195, dtype=float32)}
{'loss_inverse': Array(7.925765e-06, dtype=float32)}


  9%|▉         | 87599/1000000 [4:08:43<26:34:20,  9.54it/s]

{'loss': Array(0.13816135, dtype=float32), 'loss_cross_entropy': Array(0.13002667, dtype=float32)}
{'loss_inverse': Array(8.08514e-06, dtype=float32)}


  9%|▉         | 87609/1000000 [4:08:45<26:21:55,  9.61it/s]

{'loss': Array(0.1450955, dtype=float32), 'loss_cross_entropy': Array(0.13675967, dtype=float32)}
{'loss_inverse': Array(0.00079481, dtype=float32)}


  9%|▉         | 87620/1000000 [4:08:47<27:06:33,  9.35it/s]

{'loss': Array(0.15495315, dtype=float32), 'loss_cross_entropy': Array(0.14600496, dtype=float32)}
{'loss_inverse': Array(0.00018417, dtype=float32)}


  9%|▉         | 87630/1000000 [4:08:48<24:54:19, 10.18it/s]

{'loss': Array(0.14602298, dtype=float32), 'loss_cross_entropy': Array(0.13821228, dtype=float32)}
{'loss_inverse': Array(5.9126207e-05, dtype=float32)}


  9%|▉         | 87640/1000000 [4:08:50<33:28:04,  7.57it/s]

{'loss': Array(0.14756857, dtype=float32), 'loss_cross_entropy': Array(0.13933997, dtype=float32)}
{'loss_inverse': Array(0.00026545, dtype=float32)}


  9%|▉         | 87650/1000000 [4:08:51<26:39:58,  9.50it/s]

{'loss': Array(0.13013469, dtype=float32), 'loss_cross_entropy': Array(0.12272789, dtype=float32)}
{'loss_inverse': Array(3.0379138e-07, dtype=float32)}


  9%|▉         | 87660/1000000 [4:08:53<39:57:08,  6.34it/s]

{'loss': Array(0.15637764, dtype=float32), 'loss_cross_entropy': Array(0.14799757, dtype=float32)}
{'loss_inverse': Array(8.1991406e-05, dtype=float32)}


  9%|▉         | 87670/1000000 [4:08:54<27:25:46,  9.24it/s]

{'loss': Array(0.14650612, dtype=float32), 'loss_cross_entropy': Array(0.13891572, dtype=float32)}
{'loss_inverse': Array(5.0949104e-05, dtype=float32)}


  9%|▉         | 87680/1000000 [4:08:56<24:58:08, 10.15it/s]

{'loss': Array(0.10675507, dtype=float32), 'loss_cross_entropy': Array(0.09992534, dtype=float32)}
{'loss_inverse': Array(5.6646568e-05, dtype=float32)}


  9%|▉         | 87690/1000000 [4:08:58<28:25:30,  8.92it/s]

{'loss': Array(0.1319834, dtype=float32), 'loss_cross_entropy': Array(0.12468795, dtype=float32)}
{'loss_inverse': Array(1.5336162e-05, dtype=float32)}


  9%|▉         | 87700/1000000 [4:08:59<37:59:55,  6.67it/s]

{'loss': Array(0.1550491, dtype=float32), 'loss_cross_entropy': Array(0.14626645, dtype=float32)}
{'loss_inverse': Array(0.00011883, dtype=float32)}


  9%|▉         | 87710/1000000 [4:09:01<27:32:48,  9.20it/s]

{'loss': Array(0.14054133, dtype=float32), 'loss_cross_entropy': Array(0.13226792, dtype=float32)}
{'loss_inverse': Array(2.983376e-05, dtype=float32)}


  9%|▉         | 87720/1000000 [4:09:02<25:20:24, 10.00it/s]

{'loss': Array(0.13918708, dtype=float32), 'loss_cross_entropy': Array(0.13184862, dtype=float32)}
{'loss_inverse': Array(8.827955e-07, dtype=float32)}


  9%|▉         | 87730/1000000 [4:09:04<27:47:46,  9.12it/s]

{'loss': Array(0.14045948, dtype=float32), 'loss_cross_entropy': Array(0.13226075, dtype=float32)}
{'loss_inverse': Array(4.8167e-06, dtype=float32)}


  9%|▉         | 87740/1000000 [4:09:05<24:51:58, 10.19it/s]

{'loss': Array(0.14823818, dtype=float32), 'loss_cross_entropy': Array(0.13974456, dtype=float32)}
{'loss_inverse': Array(1.9344168e-05, dtype=float32)}


  9%|▉         | 87750/1000000 [4:09:07<29:51:20,  8.49it/s]

{'loss': Array(0.13713087, dtype=float32), 'loss_cross_entropy': Array(0.12940128, dtype=float32)}
{'loss_inverse': Array(3.5397003e-05, dtype=float32)}


  9%|▉         | 87760/1000000 [4:09:09<27:10:17,  9.33it/s]

{'loss': Array(0.13180064, dtype=float32), 'loss_cross_entropy': Array(0.12421279, dtype=float32)}
{'loss_inverse': Array(9.906667e-06, dtype=float32)}


  9%|▉         | 87770/1000000 [4:09:10<31:16:01,  8.10it/s]

{'loss': Array(0.13820419, dtype=float32), 'loss_cross_entropy': Array(0.13020326, dtype=float32)}
{'loss_inverse': Array(0.00036726, dtype=float32)}


  9%|▉         | 87780/1000000 [4:09:12<25:25:53,  9.96it/s]

{'loss': Array(0.1451418, dtype=float32), 'loss_cross_entropy': Array(0.13676234, dtype=float32)}
{'loss_inverse': Array(0.00027845, dtype=float32)}


  9%|▉         | 87790/1000000 [4:09:13<24:45:25, 10.24it/s]

{'loss': Array(0.14303151, dtype=float32), 'loss_cross_entropy': Array(0.13468091, dtype=float32)}
{'loss_inverse': Array(3.756801e-06, dtype=float32)}


  9%|▉         | 87800/1000000 [4:09:15<29:13:57,  8.67it/s]

{'loss': Array(0.14455555, dtype=float32), 'loss_cross_entropy': Array(0.13658224, dtype=float32)}
{'loss_inverse': Array(3.824625e-05, dtype=float32)}


  9%|▉         | 87810/1000000 [4:09:16<25:25:34,  9.97it/s]

{'loss': Array(0.13602354, dtype=float32), 'loss_cross_entropy': Array(0.12785366, dtype=float32)}
{'loss_inverse': Array(4.0605442e-05, dtype=float32)}


  9%|▉         | 87820/1000000 [4:09:18<31:09:21,  8.13it/s]

{'loss': Array(0.16006073, dtype=float32), 'loss_cross_entropy': Array(0.15197094, dtype=float32)}
{'loss_inverse': Array(0.00015006, dtype=float32)}


  9%|▉         | 87830/1000000 [4:09:20<26:14:45,  9.65it/s]

{'loss': Array(0.14615713, dtype=float32), 'loss_cross_entropy': Array(0.13776717, dtype=float32)}
{'loss_inverse': Array(4.87355e-05, dtype=float32)}


  9%|▉         | 87840/1000000 [4:09:21<33:59:30,  7.45it/s]

{'loss': Array(0.16088812, dtype=float32), 'loss_cross_entropy': Array(0.15224124, dtype=float32)}
{'loss_inverse': Array(2.5673796e-07, dtype=float32)}


  9%|▉         | 87850/1000000 [4:09:23<27:19:32,  9.27it/s]

{'loss': Array(0.14102574, dtype=float32), 'loss_cross_entropy': Array(0.13293034, dtype=float32)}
{'loss_inverse': Array(7.7155666e-05, dtype=float32)}


  9%|▉         | 87860/1000000 [4:09:24<25:00:38, 10.13it/s]

{'loss': Array(0.14797793, dtype=float32), 'loss_cross_entropy': Array(0.13998678, dtype=float32)}
{'loss_inverse': Array(0.00010632, dtype=float32)}


  9%|▉         | 87870/1000000 [4:09:26<27:19:53,  9.27it/s]

{'loss': Array(0.15031275, dtype=float32), 'loss_cross_entropy': Array(0.14177582, dtype=float32)}
{'loss_inverse': Array(7.922306e-06, dtype=float32)}


  9%|▉         | 87880/1000000 [4:09:27<25:11:12, 10.06it/s]

{'loss': Array(0.14430186, dtype=float32), 'loss_cross_entropy': Array(0.1358512, dtype=float32)}
{'loss_inverse': Array(0.00023478, dtype=float32)}


  9%|▉         | 87890/1000000 [4:09:29<33:40:58,  7.52it/s]

{'loss': Array(0.13472247, dtype=float32), 'loss_cross_entropy': Array(0.12734355, dtype=float32)}
{'loss_inverse': Array(7.940274e-05, dtype=float32)}


  9%|▉         | 87900/1000000 [4:09:31<26:32:53,  9.54it/s]

{'loss': Array(0.14390591, dtype=float32), 'loss_cross_entropy': Array(0.1364212, dtype=float32)}
{'loss_inverse': Array(1.1142703e-07, dtype=float32)}


  9%|▉         | 87910/1000000 [4:09:32<25:02:48, 10.12it/s]

{'loss': Array(0.14801978, dtype=float32), 'loss_cross_entropy': Array(0.13971402, dtype=float32)}
{'loss_inverse': Array(2.98861e-07, dtype=float32)}


  9%|▉         | 87920/1000000 [4:09:34<28:08:25,  9.00it/s]

{'loss': Array(0.13858072, dtype=float32), 'loss_cross_entropy': Array(0.13033134, dtype=float32)}
{'loss_inverse': Array(9.108471e-06, dtype=float32)}


  9%|▉         | 87930/1000000 [4:09:35<25:29:21,  9.94it/s]

{'loss': Array(0.14309172, dtype=float32), 'loss_cross_entropy': Array(0.13534306, dtype=float32)}
{'loss_inverse': Array(0.00011676, dtype=float32)}


  9%|▉         | 87940/1000000 [4:09:37<29:25:19,  8.61it/s]

{'loss': Array(0.14400414, dtype=float32), 'loss_cross_entropy': Array(0.13554363, dtype=float32)}
{'loss_inverse': Array(2.3457004e-07, dtype=float32)}


  9%|▉         | 87950/1000000 [4:09:38<25:35:38,  9.90it/s]

{'loss': Array(0.14203122, dtype=float32), 'loss_cross_entropy': Array(0.13382518, dtype=float32)}
{'loss_inverse': Array(3.9204406e-05, dtype=float32)}


  9%|▉         | 87960/1000000 [4:09:40<30:50:42,  8.21it/s]

{'loss': Array(0.15232593, dtype=float32), 'loss_cross_entropy': Array(0.14380454, dtype=float32)}
{'loss_inverse': Array(1.1802288e-05, dtype=float32)}


  9%|▉         | 87970/1000000 [4:09:42<25:29:48,  9.94it/s]

{'loss': Array(0.13665369, dtype=float32), 'loss_cross_entropy': Array(0.12896715, dtype=float32)}
{'loss_inverse': Array(1.932777e-06, dtype=float32)}


  9%|▉         | 87980/1000000 [4:09:43<24:30:18, 10.34it/s]

{'loss': Array(0.14791071, dtype=float32), 'loss_cross_entropy': Array(0.13977797, dtype=float32)}
{'loss_inverse': Array(2.6675752e-06, dtype=float32)}


  9%|▉         | 87990/1000000 [4:09:45<28:48:11,  8.80it/s]

{'loss': Array(0.14627777, dtype=float32), 'loss_cross_entropy': Array(0.13824217, dtype=float32)}
{'loss_inverse': Array(0.00012474, dtype=float32)}


  9%|▉         | 88000/1000000 [4:09:46<25:02:47, 10.11it/s]

{'loss': Array(0.15167153, dtype=float32), 'loss_cross_entropy': Array(0.14283462, dtype=float32)}
{'loss_inverse': Array(9.809586e-07, dtype=float32)}


  9%|▉         | 88010/1000000 [4:09:55<96:09:43,  2.63it/s] 

{'loss': Array(0.14160457, dtype=float32), 'loss_cross_entropy': Array(0.13333647, dtype=float32)}
{'loss_inverse': Array(2.0726238e-06, dtype=float32)}


  9%|▉         | 88020/1000000 [4:09:56<37:05:55,  6.83it/s] 

{'loss': Array(0.1372443, dtype=float32), 'loss_cross_entropy': Array(0.12978095, dtype=float32)}
{'loss_inverse': Array(1.4593114e-07, dtype=float32)}


  9%|▉         | 88030/1000000 [4:09:58<39:06:14,  6.48it/s]

{'loss': Array(0.15062779, dtype=float32), 'loss_cross_entropy': Array(0.1424232, dtype=float32)}
{'loss_inverse': Array(3.87457e-05, dtype=float32)}


  9%|▉         | 88040/1000000 [4:09:59<27:22:45,  9.25it/s]

{'loss': Array(0.14494513, dtype=float32), 'loss_cross_entropy': Array(0.1369913, dtype=float32)}
{'loss_inverse': Array(2.0555615e-05, dtype=float32)}


  9%|▉         | 88050/1000000 [4:10:01<37:54:16,  6.68it/s]

{'loss': Array(0.13277891, dtype=float32), 'loss_cross_entropy': Array(0.12517016, dtype=float32)}
{'loss_inverse': Array(8.2888937e-07, dtype=float32)}


  9%|▉         | 88060/1000000 [4:10:02<26:39:47,  9.50it/s]

{'loss': Array(0.13506582, dtype=float32), 'loss_cross_entropy': Array(0.12682909, dtype=float32)}
{'loss_inverse': Array(1.262936e-08, dtype=float32)}


  9%|▉         | 88070/1000000 [4:10:04<25:39:32,  9.87it/s]

{'loss': Array(0.13919537, dtype=float32), 'loss_cross_entropy': Array(0.13129537, dtype=float32)}
{'loss_inverse': Array(4.5791623e-07, dtype=float32)}


  9%|▉         | 88080/1000000 [4:10:06<30:23:34,  8.33it/s]

{'loss': Array(0.15200697, dtype=float32), 'loss_cross_entropy': Array(0.1441493, dtype=float32)}
{'loss_inverse': Array(1.0757504e-05, dtype=float32)}


  9%|▉         | 88090/1000000 [4:10:07<27:10:55,  9.32it/s]

{'loss': Array(0.17443407, dtype=float32), 'loss_cross_entropy': Array(0.16526528, dtype=float32)}
{'loss_inverse': Array(2.7616537e-07, dtype=float32)}


  9%|▉         | 88100/1000000 [4:10:09<38:38:37,  6.55it/s]

{'loss': Array(0.15998183, dtype=float32), 'loss_cross_entropy': Array(0.15168376, dtype=float32)}
{'loss_inverse': Array(2.6577321e-05, dtype=float32)}


  9%|▉         | 88110/1000000 [4:10:10<28:06:19,  9.01it/s]

{'loss': Array(0.13382092, dtype=float32), 'loss_cross_entropy': Array(0.12589256, dtype=float32)}
{'loss_inverse': Array(0.00010906, dtype=float32)}


  9%|▉         | 88120/1000000 [4:10:12<26:19:49,  9.62it/s]

{'loss': Array(0.15150905, dtype=float32), 'loss_cross_entropy': Array(0.1429046, dtype=float32)}
{'loss_inverse': Array(0.00117141, dtype=float32)}


  9%|▉         | 88130/1000000 [4:10:14<29:46:58,  8.50it/s]

{'loss': Array(0.13667153, dtype=float32), 'loss_cross_entropy': Array(0.12860683, dtype=float32)}
{'loss_inverse': Array(5.3931104e-05, dtype=float32)}


  9%|▉         | 88140/1000000 [4:10:15<25:59:12,  9.75it/s]

{'loss': Array(0.13357867, dtype=float32), 'loss_cross_entropy': Array(0.12624079, dtype=float32)}
{'loss_inverse': Array(3.002189e-06, dtype=float32)}


  9%|▉         | 88150/1000000 [4:10:17<30:47:14,  8.23it/s]

{'loss': Array(0.1490284, dtype=float32), 'loss_cross_entropy': Array(0.14081578, dtype=float32)}
{'loss_inverse': Array(1.4268485e-06, dtype=float32)}


  9%|▉         | 88160/1000000 [4:10:18<26:08:52,  9.69it/s]

{'loss': Array(0.13071112, dtype=float32), 'loss_cross_entropy': Array(0.12320254, dtype=float32)}
{'loss_inverse': Array(1.6399963e-06, dtype=float32)}


  9%|▉         | 88170/1000000 [4:10:20<31:30:18,  8.04it/s]

{'loss': Array(0.14195724, dtype=float32), 'loss_cross_entropy': Array(0.13387635, dtype=float32)}
{'loss_inverse': Array(4.2138778e-05, dtype=float32)}


  9%|▉         | 88180/1000000 [4:10:22<26:46:01,  9.46it/s]

{'loss': Array(0.13515632, dtype=float32), 'loss_cross_entropy': Array(0.12814769, dtype=float32)}
{'loss_inverse': Array(0.00022422, dtype=float32)}


  9%|▉         | 88190/1000000 [4:10:23<25:10:51, 10.06it/s]

{'loss': Array(0.14725725, dtype=float32), 'loss_cross_entropy': Array(0.13897131, dtype=float32)}
{'loss_inverse': Array(5.1973875e-06, dtype=float32)}


  9%|▉         | 88200/1000000 [4:10:25<28:40:16,  8.83it/s]

{'loss': Array(0.12295987, dtype=float32), 'loss_cross_entropy': Array(0.11535336, dtype=float32)}
{'loss_inverse': Array(7.3892766e-06, dtype=float32)}


  9%|▉         | 88210/1000000 [4:10:26<25:30:05,  9.93it/s]

{'loss': Array(0.13334309, dtype=float32), 'loss_cross_entropy': Array(0.12584801, dtype=float32)}
{'loss_inverse': Array(6.219004e-05, dtype=float32)}


  9%|▉         | 88220/1000000 [4:10:28<29:18:43,  8.64it/s]

{'loss': Array(0.1215973, dtype=float32), 'loss_cross_entropy': Array(0.11418525, dtype=float32)}
{'loss_inverse': Array(8.3674917e-07, dtype=float32)}


  9%|▉         | 88230/1000000 [4:10:30<25:33:48,  9.91it/s]

{'loss': Array(0.13263449, dtype=float32), 'loss_cross_entropy': Array(0.12501684, dtype=float32)}
{'loss_inverse': Array(0.00017988, dtype=float32)}


  9%|▉         | 88240/1000000 [4:10:31<31:06:57,  8.14it/s]

{'loss': Array(0.1624866, dtype=float32), 'loss_cross_entropy': Array(0.15320143, dtype=float32)}
{'loss_inverse': Array(1.1870574e-06, dtype=float32)}


  9%|▉         | 88250/1000000 [4:10:33<25:36:13,  9.89it/s]

{'loss': Array(0.1418047, dtype=float32), 'loss_cross_entropy': Array(0.13359281, dtype=float32)}
{'loss_inverse': Array(0.00020698, dtype=float32)}


  9%|▉         | 88260/1000000 [4:10:34<24:40:26, 10.26it/s]

{'loss': Array(0.15036221, dtype=float32), 'loss_cross_entropy': Array(0.14232396, dtype=float32)}
{'loss_inverse': Array(6.232905e-05, dtype=float32)}


  9%|▉         | 88270/1000000 [4:10:36<29:22:35,  8.62it/s]

{'loss': Array(0.14592803, dtype=float32), 'loss_cross_entropy': Array(0.1380782, dtype=float32)}
{'loss_inverse': Array(1.0025439e-05, dtype=float32)}


  9%|▉         | 88280/1000000 [4:10:37<25:10:59, 10.06it/s]

{'loss': Array(0.15990558, dtype=float32), 'loss_cross_entropy': Array(0.15152633, dtype=float32)}
{'loss_inverse': Array(0.0002334, dtype=float32)}


  9%|▉         | 88290/1000000 [4:10:39<31:09:23,  8.13it/s]

{'loss': Array(0.14477864, dtype=float32), 'loss_cross_entropy': Array(0.13668296, dtype=float32)}
{'loss_inverse': Array(6.8409776e-08, dtype=float32)}


  9%|▉         | 88300/1000000 [4:10:40<26:28:02,  9.57it/s]

{'loss': Array(0.16366585, dtype=float32), 'loss_cross_entropy': Array(0.15403174, dtype=float32)}
{'loss_inverse': Array(4.4114854e-06, dtype=float32)}


  9%|▉         | 88310/1000000 [4:10:42<33:54:44,  7.47it/s]

{'loss': Array(0.14510317, dtype=float32), 'loss_cross_entropy': Array(0.13677227, dtype=float32)}
{'loss_inverse': Array(1.0363144e-06, dtype=float32)}


  9%|▉         | 88320/1000000 [4:10:44<26:20:38,  9.61it/s]

{'loss': Array(0.15515275, dtype=float32), 'loss_cross_entropy': Array(0.14641573, dtype=float32)}
{'loss_inverse': Array(1.7569054e-06, dtype=float32)}


  9%|▉         | 88330/1000000 [4:10:45<33:25:21,  7.58it/s]

{'loss': Array(0.14244813, dtype=float32), 'loss_cross_entropy': Array(0.1344214, dtype=float32)}
{'loss_inverse': Array(0.00019832, dtype=float32)}


  9%|▉         | 88340/1000000 [4:10:47<26:05:04,  9.71it/s]

{'loss': Array(0.15925114, dtype=float32), 'loss_cross_entropy': Array(0.15081151, dtype=float32)}
{'loss_inverse': Array(1.1466898e-05, dtype=float32)}


  9%|▉         | 88350/1000000 [4:10:48<25:29:51,  9.93it/s]

{'loss': Array(0.13138507, dtype=float32), 'loss_cross_entropy': Array(0.12390219, dtype=float32)}
{'loss_inverse': Array(2.4740262e-05, dtype=float32)}


  9%|▉         | 88360/1000000 [4:10:50<28:57:00,  8.75it/s]

{'loss': Array(0.1560638, dtype=float32), 'loss_cross_entropy': Array(0.14773105, dtype=float32)}
{'loss_inverse': Array(2.1969722e-06, dtype=float32)}


  9%|▉         | 88370/1000000 [4:10:51<25:01:25, 10.12it/s]

{'loss': Array(0.13495082, dtype=float32), 'loss_cross_entropy': Array(0.12713687, dtype=float32)}
{'loss_inverse': Array(3.1197615e-05, dtype=float32)}


  9%|▉         | 88380/1000000 [4:10:53<33:45:12,  7.50it/s]

{'loss': Array(0.13828735, dtype=float32), 'loss_cross_entropy': Array(0.13090758, dtype=float32)}
{'loss_inverse': Array(4.3550637e-05, dtype=float32)}


  9%|▉         | 88390/1000000 [4:10:54<26:23:13,  9.60it/s]

{'loss': Array(0.1463555, dtype=float32), 'loss_cross_entropy': Array(0.13848378, dtype=float32)}
{'loss_inverse': Array(3.0778137e-05, dtype=float32)}


  9%|▉         | 88400/1000000 [4:10:56<37:21:01,  6.78it/s]

{'loss': Array(0.1062791, dtype=float32), 'loss_cross_entropy': Array(0.0997585, dtype=float32)}
{'loss_inverse': Array(4.2321848e-05, dtype=float32)}


  9%|▉         | 88410/1000000 [4:10:58<27:16:31,  9.28it/s]

{'loss': Array(0.15840659, dtype=float32), 'loss_cross_entropy': Array(0.14964719, dtype=float32)}
{'loss_inverse': Array(6.8599446e-05, dtype=float32)}


  9%|▉         | 88420/1000000 [4:10:59<37:38:05,  6.73it/s]

{'loss': Array(0.13461003, dtype=float32), 'loss_cross_entropy': Array(0.12683336, dtype=float32)}
{'loss_inverse': Array(2.7356161e-06, dtype=float32)}


  9%|▉         | 88430/1000000 [4:11:01<27:32:06,  9.20it/s]

{'loss': Array(0.13322088, dtype=float32), 'loss_cross_entropy': Array(0.12564409, dtype=float32)}
{'loss_inverse': Array(9.67888e-06, dtype=float32)}


  9%|▉         | 88440/1000000 [4:11:02<24:51:18, 10.19it/s]

{'loss': Array(0.13324046, dtype=float32), 'loss_cross_entropy': Array(0.12619965, dtype=float32)}
{'loss_inverse': Array(2.6896063e-07, dtype=float32)}


  9%|▉         | 88450/1000000 [4:11:04<33:54:12,  7.47it/s]

{'loss': Array(0.12717402, dtype=float32), 'loss_cross_entropy': Array(0.12007163, dtype=float32)}
{'loss_inverse': Array(2.0413003e-05, dtype=float32)}


  9%|▉         | 88460/1000000 [4:11:05<26:23:48,  9.59it/s]

{'loss': Array(0.12313171, dtype=float32), 'loss_cross_entropy': Array(0.11621325, dtype=float32)}
{'loss_inverse': Array(0.00011858, dtype=float32)}


  9%|▉         | 88470/1000000 [4:11:07<25:14:29, 10.03it/s]

{'loss': Array(0.14314164, dtype=float32), 'loss_cross_entropy': Array(0.13498636, dtype=float32)}
{'loss_inverse': Array(3.6121975e-05, dtype=float32)}


  9%|▉         | 88480/1000000 [4:11:09<28:59:02,  8.74it/s]

{'loss': Array(0.14619282, dtype=float32), 'loss_cross_entropy': Array(0.13826172, dtype=float32)}
{'loss_inverse': Array(1.05778e-06, dtype=float32)}


  9%|▉         | 88490/1000000 [4:11:10<25:32:55,  9.91it/s]

{'loss': Array(0.1287043, dtype=float32), 'loss_cross_entropy': Array(0.12136636, dtype=float32)}
{'loss_inverse': Array(0.00011349, dtype=float32)}


  9%|▉         | 88500/1000000 [4:11:12<28:35:45,  8.85it/s]

{'loss': Array(0.13064462, dtype=float32), 'loss_cross_entropy': Array(0.12341453, dtype=float32)}
{'loss_inverse': Array(7.7150535e-06, dtype=float32)}


  9%|▉         | 88510/1000000 [4:11:20<84:01:08,  3.01it/s] 

{'loss': Array(0.14623952, dtype=float32), 'loss_cross_entropy': Array(0.13788852, dtype=float32)}
{'loss_inverse': Array(1.82273e-05, dtype=float32)}


  9%|▉         | 88520/1000000 [4:11:21<41:04:30,  6.16it/s]

{'loss': Array(0.1365158, dtype=float32), 'loss_cross_entropy': Array(0.12845884, dtype=float32)}
{'loss_inverse': Array(1.2612873e-06, dtype=float32)}


  9%|▉         | 88530/1000000 [4:11:23<27:15:24,  9.29it/s]

{'loss': Array(0.14124803, dtype=float32), 'loss_cross_entropy': Array(0.13335048, dtype=float32)}
{'loss_inverse': Array(0.00017012, dtype=float32)}


  9%|▉         | 88540/1000000 [4:11:25<37:44:31,  6.71it/s]

{'loss': Array(0.15271704, dtype=float32), 'loss_cross_entropy': Array(0.14446603, dtype=float32)}
{'loss_inverse': Array(4.3818423e-07, dtype=float32)}


  9%|▉         | 88550/1000000 [4:11:26<26:45:49,  9.46it/s]

{'loss': Array(0.15306616, dtype=float32), 'loss_cross_entropy': Array(0.14428228, dtype=float32)}
{'loss_inverse': Array(9.756708e-08, dtype=float32)}


  9%|▉         | 88560/1000000 [4:11:27<25:02:08, 10.11it/s]

{'loss': Array(0.13981125, dtype=float32), 'loss_cross_entropy': Array(0.13169768, dtype=float32)}
{'loss_inverse': Array(2.160883e-08, dtype=float32)}


  9%|▉         | 88570/1000000 [4:11:29<29:11:19,  8.67it/s]

{'loss': Array(0.14951353, dtype=float32), 'loss_cross_entropy': Array(0.14089961, dtype=float32)}
{'loss_inverse': Array(9.4164847e-07, dtype=float32)}


  9%|▉         | 88580/1000000 [4:11:31<26:24:19,  9.59it/s]

{'loss': Array(0.15530065, dtype=float32), 'loss_cross_entropy': Array(0.14636903, dtype=float32)}
{'loss_inverse': Array(0.00011706, dtype=float32)}


  9%|▉         | 88590/1000000 [4:11:32<31:43:33,  7.98it/s]

{'loss': Array(0.12991731, dtype=float32), 'loss_cross_entropy': Array(0.12221798, dtype=float32)}
{'loss_inverse': Array(1.813393e-05, dtype=float32)}


  9%|▉         | 88600/1000000 [4:11:34<25:59:13,  9.74it/s]

{'loss': Array(0.1476605, dtype=float32), 'loss_cross_entropy': Array(0.13978182, dtype=float32)}
{'loss_inverse': Array(0.0001361, dtype=float32)}


  9%|▉         | 88610/1000000 [4:11:36<33:50:49,  7.48it/s]

{'loss': Array(0.12888704, dtype=float32), 'loss_cross_entropy': Array(0.12150353, dtype=float32)}
{'loss_inverse': Array(5.1331476e-06, dtype=float32)}


  9%|▉         | 88620/1000000 [4:11:37<27:06:53,  9.34it/s]

{'loss': Array(0.13639282, dtype=float32), 'loss_cross_entropy': Array(0.12907197, dtype=float32)}
{'loss_inverse': Array(2.7468423e-06, dtype=float32)}


  9%|▉         | 88630/1000000 [4:11:39<34:01:01,  7.44it/s]

{'loss': Array(0.15504019, dtype=float32), 'loss_cross_entropy': Array(0.1468975, dtype=float32)}
{'loss_inverse': Array(1.4773709e-06, dtype=float32)}


  9%|▉         | 88640/1000000 [4:11:40<26:14:11,  9.65it/s]

{'loss': Array(0.14288981, dtype=float32), 'loss_cross_entropy': Array(0.13506952, dtype=float32)}
{'loss_inverse': Array(3.8289395e-06, dtype=float32)}


  9%|▉         | 88650/1000000 [4:11:42<34:45:32,  7.28it/s]

{'loss': Array(0.13756274, dtype=float32), 'loss_cross_entropy': Array(0.12977143, dtype=float32)}
{'loss_inverse': Array(1.5217836e-07, dtype=float32)}


  9%|▉         | 88660/1000000 [4:11:43<27:00:46,  9.37it/s]

{'loss': Array(0.14031278, dtype=float32), 'loss_cross_entropy': Array(0.13274823, dtype=float32)}
{'loss_inverse': Array(4.0610743e-05, dtype=float32)}


  9%|▉         | 88670/1000000 [4:11:45<25:22:03,  9.98it/s]

{'loss': Array(0.14780442, dtype=float32), 'loss_cross_entropy': Array(0.13941331, dtype=float32)}
{'loss_inverse': Array(1.5591271e-05, dtype=float32)}


  9%|▉         | 88680/1000000 [4:11:47<27:56:24,  9.06it/s]

{'loss': Array(0.1388946, dtype=float32), 'loss_cross_entropy': Array(0.13111348, dtype=float32)}
{'loss_inverse': Array(4.4530097e-07, dtype=float32)}


  9%|▉         | 88690/1000000 [4:11:48<25:13:19, 10.04it/s]

{'loss': Array(0.14940317, dtype=float32), 'loss_cross_entropy': Array(0.14044109, dtype=float32)}
{'loss_inverse': Array(1.9710198e-05, dtype=float32)}


  9%|▉         | 88700/1000000 [4:11:50<28:29:09,  8.89it/s]

{'loss': Array(0.13694331, dtype=float32), 'loss_cross_entropy': Array(0.12947349, dtype=float32)}
{'loss_inverse': Array(1.7239707e-05, dtype=float32)}


  9%|▉         | 88710/1000000 [4:11:51<25:11:35, 10.05it/s]

{'loss': Array(0.12898345, dtype=float32), 'loss_cross_entropy': Array(0.12117863, dtype=float32)}
{'loss_inverse': Array(0.00017661, dtype=float32)}


  9%|▉         | 88720/1000000 [4:11:53<29:42:31,  8.52it/s]

{'loss': Array(0.1324706, dtype=float32), 'loss_cross_entropy': Array(0.12512247, dtype=float32)}
{'loss_inverse': Array(7.8407265e-07, dtype=float32)}


  9%|▉         | 88730/1000000 [4:11:55<26:08:53,  9.68it/s]

{'loss': Array(0.14430356, dtype=float32), 'loss_cross_entropy': Array(0.13620278, dtype=float32)}
{'loss_inverse': Array(3.3232744e-05, dtype=float32)}


  9%|▉         | 88740/1000000 [4:11:56<37:24:16,  6.77it/s]

{'loss': Array(0.12751286, dtype=float32), 'loss_cross_entropy': Array(0.11949705, dtype=float32)}
{'loss_inverse': Array(7.592677e-07, dtype=float32)}


  9%|▉         | 88750/1000000 [4:11:58<26:59:20,  9.38it/s]

{'loss': Array(0.14941406, dtype=float32), 'loss_cross_entropy': Array(0.14152493, dtype=float32)}
{'loss_inverse': Array(1.1793503e-06, dtype=float32)}


  9%|▉         | 88760/1000000 [4:11:59<24:52:37, 10.17it/s]

{'loss': Array(0.13053621, dtype=float32), 'loss_cross_entropy': Array(0.12330097, dtype=float32)}
{'loss_inverse': Array(7.053153e-05, dtype=float32)}


  9%|▉         | 88770/1000000 [4:12:01<28:44:08,  8.81it/s]

{'loss': Array(0.13667521, dtype=float32), 'loss_cross_entropy': Array(0.12946425, dtype=float32)}
{'loss_inverse': Array(2.4520793e-06, dtype=float32)}


  9%|▉         | 88780/1000000 [4:12:02<25:21:35,  9.98it/s]

{'loss': Array(0.14665292, dtype=float32), 'loss_cross_entropy': Array(0.13784455, dtype=float32)}
{'loss_inverse': Array(5.0393265e-08, dtype=float32)}


  9%|▉         | 88790/1000000 [4:12:04<29:07:17,  8.69it/s]

{'loss': Array(0.13827513, dtype=float32), 'loss_cross_entropy': Array(0.13017707, dtype=float32)}
{'loss_inverse': Array(3.990949e-07, dtype=float32)}


  9%|▉         | 88800/1000000 [4:12:05<24:59:26, 10.13it/s]

{'loss': Array(0.13751438, dtype=float32), 'loss_cross_entropy': Array(0.12967817, dtype=float32)}
{'loss_inverse': Array(1.155661e-06, dtype=float32)}


  9%|▉         | 88810/1000000 [4:12:07<31:48:49,  7.96it/s]

{'loss': Array(0.1259342, dtype=float32), 'loss_cross_entropy': Array(0.11871449, dtype=float32)}
{'loss_inverse': Array(0.00020227, dtype=float32)}


  9%|▉         | 88820/1000000 [4:12:09<28:04:28,  9.02it/s]

{'loss': Array(0.14618824, dtype=float32), 'loss_cross_entropy': Array(0.13826299, dtype=float32)}
{'loss_inverse': Array(4.8212396e-06, dtype=float32)}


  9%|▉         | 88830/1000000 [4:12:10<26:13:35,  9.65it/s]

{'loss': Array(0.1446752, dtype=float32), 'loss_cross_entropy': Array(0.13675599, dtype=float32)}
{'loss_inverse': Array(3.5430034e-07, dtype=float32)}


  9%|▉         | 88840/1000000 [4:12:12<27:52:52,  9.08it/s]

{'loss': Array(0.13059203, dtype=float32), 'loss_cross_entropy': Array(0.12306833, dtype=float32)}
{'loss_inverse': Array(1.7913814e-06, dtype=float32)}


  9%|▉         | 88850/1000000 [4:12:13<25:47:52,  9.81it/s]

{'loss': Array(0.13525303, dtype=float32), 'loss_cross_entropy': Array(0.12751663, dtype=float32)}
{'loss_inverse': Array(1.342576e-05, dtype=float32)}


  9%|▉         | 88860/1000000 [4:12:15<28:55:37,  8.75it/s]

{'loss': Array(0.13782983, dtype=float32), 'loss_cross_entropy': Array(0.12997617, dtype=float32)}
{'loss_inverse': Array(0.00010543, dtype=float32)}


  9%|▉         | 88870/1000000 [4:12:16<25:04:09, 10.10it/s]

{'loss': Array(0.14802645, dtype=float32), 'loss_cross_entropy': Array(0.13979366, dtype=float32)}
{'loss_inverse': Array(1.6770808e-07, dtype=float32)}


  9%|▉         | 88880/1000000 [4:12:18<30:46:46,  8.22it/s]

{'loss': Array(0.14024018, dtype=float32), 'loss_cross_entropy': Array(0.13229194, dtype=float32)}
{'loss_inverse': Array(0.00020051, dtype=float32)}


  9%|▉         | 88890/1000000 [4:12:20<25:56:14,  9.76it/s]

{'loss': Array(0.1551813, dtype=float32), 'loss_cross_entropy': Array(0.14632739, dtype=float32)}
{'loss_inverse': Array(6.4681103e-06, dtype=float32)}


  9%|▉         | 88900/1000000 [4:12:21<24:52:20, 10.18it/s]

{'loss': Array(0.14241593, dtype=float32), 'loss_cross_entropy': Array(0.13473319, dtype=float32)}
{'loss_inverse': Array(2.4002422e-05, dtype=float32)}


  9%|▉         | 88910/1000000 [4:12:23<28:53:47,  8.76it/s]

{'loss': Array(0.15011203, dtype=float32), 'loss_cross_entropy': Array(0.14217332, dtype=float32)}
{'loss_inverse': Array(2.3376862e-05, dtype=float32)}


  9%|▉         | 88920/1000000 [4:12:24<25:16:29, 10.01it/s]

{'loss': Array(0.15045099, dtype=float32), 'loss_cross_entropy': Array(0.14171849, dtype=float32)}
{'loss_inverse': Array(6.665431e-08, dtype=float32)}


  9%|▉         | 88930/1000000 [4:12:26<30:59:08,  8.17it/s]

{'loss': Array(0.1380173, dtype=float32), 'loss_cross_entropy': Array(0.13075058, dtype=float32)}
{'loss_inverse': Array(0.00015162, dtype=float32)}


  9%|▉         | 88940/1000000 [4:12:27<26:11:07,  9.66it/s]

{'loss': Array(0.14144121, dtype=float32), 'loss_cross_entropy': Array(0.13363841, dtype=float32)}
{'loss_inverse': Array(4.267183e-05, dtype=float32)}


  9%|▉         | 88950/1000000 [4:12:29<34:31:43,  7.33it/s]

{'loss': Array(0.13041042, dtype=float32), 'loss_cross_entropy': Array(0.12251053, dtype=float32)}
{'loss_inverse': Array(5.3259932e-06, dtype=float32)}


  9%|▉         | 88960/1000000 [4:12:31<27:09:00,  9.32it/s]

{'loss': Array(0.13098298, dtype=float32), 'loss_cross_entropy': Array(0.12350191, dtype=float32)}
{'loss_inverse': Array(1.7623303e-07, dtype=float32)}


  9%|▉         | 88970/1000000 [4:12:32<24:36:13, 10.29it/s]

{'loss': Array(0.13295431, dtype=float32), 'loss_cross_entropy': Array(0.1253498, dtype=float32)}
{'loss_inverse': Array(2.7695945e-05, dtype=float32)}


  9%|▉         | 88980/1000000 [4:12:34<27:22:37,  9.24it/s]

{'loss': Array(0.13500819, dtype=float32), 'loss_cross_entropy': Array(0.12676835, dtype=float32)}
{'loss_inverse': Array(3.680057e-08, dtype=float32)}


  9%|▉         | 88990/1000000 [4:12:35<25:44:20,  9.83it/s]

{'loss': Array(0.15399386, dtype=float32), 'loss_cross_entropy': Array(0.1452492, dtype=float32)}
{'loss_inverse': Array(3.830348e-08, dtype=float32)}


  9%|▉         | 89000/1000000 [4:12:37<33:28:18,  7.56it/s]

{'loss': Array(0.1596641, dtype=float32), 'loss_cross_entropy': Array(0.1515326, dtype=float32)}
{'loss_inverse': Array(1.8189724e-07, dtype=float32)}


  9%|▉         | 89010/1000000 [4:12:45<86:05:18,  2.94it/s] 

{'loss': Array(0.1541477, dtype=float32), 'loss_cross_entropy': Array(0.14535308, dtype=float32)}
{'loss_inverse': Array(0.00026049, dtype=float32)}


  9%|▉         | 89020/1000000 [4:12:47<41:21:38,  6.12it/s]

{'loss': Array(0.14585172, dtype=float32), 'loss_cross_entropy': Array(0.13755377, dtype=float32)}
{'loss_inverse': Array(5.406107e-06, dtype=float32)}


  9%|▉         | 89030/1000000 [4:12:48<27:53:09,  9.07it/s]

{'loss': Array(0.15969476, dtype=float32), 'loss_cross_entropy': Array(0.15108417, dtype=float32)}
{'loss_inverse': Array(0.00030462, dtype=float32)}


  9%|▉         | 89040/1000000 [4:12:50<37:33:38,  6.74it/s]

{'loss': Array(0.14591676, dtype=float32), 'loss_cross_entropy': Array(0.13731326, dtype=float32)}
{'loss_inverse': Array(0.00012309, dtype=float32)}


  9%|▉         | 89050/1000000 [4:12:51<26:29:30,  9.55it/s]

{'loss': Array(0.14513208, dtype=float32), 'loss_cross_entropy': Array(0.13739693, dtype=float32)}
{'loss_inverse': Array(2.3052728e-06, dtype=float32)}


  9%|▉         | 89060/1000000 [4:12:53<25:03:54, 10.10it/s]

{'loss': Array(0.12649159, dtype=float32), 'loss_cross_entropy': Array(0.11955144, dtype=float32)}
{'loss_inverse': Array(3.327587e-05, dtype=float32)}


  9%|▉         | 89070/1000000 [4:12:54<27:51:23,  9.08it/s]

{'loss': Array(0.14920041, dtype=float32), 'loss_cross_entropy': Array(0.14051764, dtype=float32)}
{'loss_inverse': Array(8.813168e-05, dtype=float32)}


  9%|▉         | 89080/1000000 [4:12:56<25:55:21,  9.76it/s]

{'loss': Array(0.15339917, dtype=float32), 'loss_cross_entropy': Array(0.14470826, dtype=float32)}
{'loss_inverse': Array(2.912022e-05, dtype=float32)}


  9%|▉         | 89090/1000000 [4:12:57<30:40:32,  8.25it/s]

{'loss': Array(0.12966333, dtype=float32), 'loss_cross_entropy': Array(0.12305897, dtype=float32)}
{'loss_inverse': Array(0.00011895, dtype=float32)}


  9%|▉         | 89100/1000000 [4:12:59<25:23:28,  9.97it/s]

{'loss': Array(0.15481125, dtype=float32), 'loss_cross_entropy': Array(0.14668992, dtype=float32)}
{'loss_inverse': Array(1.3880096e-05, dtype=float32)}


  9%|▉         | 89110/1000000 [4:13:01<31:05:41,  8.14it/s]

{'loss': Array(0.14727557, dtype=float32), 'loss_cross_entropy': Array(0.13888477, dtype=float32)}
{'loss_inverse': Array(0.00028473, dtype=float32)}


  9%|▉         | 89120/1000000 [4:13:02<26:19:23,  9.61it/s]

{'loss': Array(0.14434503, dtype=float32), 'loss_cross_entropy': Array(0.13659476, dtype=float32)}
{'loss_inverse': Array(5.7058147e-05, dtype=float32)}


  9%|▉         | 89130/1000000 [4:13:04<37:24:36,  6.76it/s]

{'loss': Array(0.14865237, dtype=float32), 'loss_cross_entropy': Array(0.14020132, dtype=float32)}
{'loss_inverse': Array(2.546279e-06, dtype=float32)}


  9%|▉         | 89140/1000000 [4:13:05<26:53:01,  9.41it/s]

{'loss': Array(0.15021268, dtype=float32), 'loss_cross_entropy': Array(0.14205854, dtype=float32)}
{'loss_inverse': Array(8.322547e-05, dtype=float32)}


  9%|▉         | 89150/1000000 [4:13:07<25:43:01,  9.84it/s]

{'loss': Array(0.13543046, dtype=float32), 'loss_cross_entropy': Array(0.12713285, dtype=float32)}
{'loss_inverse': Array(0.00016407, dtype=float32)}


  9%|▉         | 89160/1000000 [4:13:08<27:58:05,  9.05it/s]

{'loss': Array(0.14787547, dtype=float32), 'loss_cross_entropy': Array(0.13978733, dtype=float32)}
{'loss_inverse': Array(5.6177134e-07, dtype=float32)}


  9%|▉         | 89170/1000000 [4:13:10<26:10:23,  9.67it/s]

{'loss': Array(0.15806474, dtype=float32), 'loss_cross_entropy': Array(0.14945611, dtype=float32)}
{'loss_inverse': Array(0.00021298, dtype=float32)}


  9%|▉         | 89180/1000000 [4:13:12<31:38:11,  8.00it/s]

{'loss': Array(0.12413893, dtype=float32), 'loss_cross_entropy': Array(0.11680465, dtype=float32)}
{'loss_inverse': Array(4.3807318e-06, dtype=float32)}


  9%|▉         | 89190/1000000 [4:13:13<26:35:15,  9.52it/s]

{'loss': Array(0.143984, dtype=float32), 'loss_cross_entropy': Array(0.13587326, dtype=float32)}
{'loss_inverse': Array(4.1188196e-05, dtype=float32)}


  9%|▉         | 89200/1000000 [4:13:15<24:50:47, 10.18it/s]

{'loss': Array(0.15423371, dtype=float32), 'loss_cross_entropy': Array(0.1460598, dtype=float32)}
{'loss_inverse': Array(3.8200832e-07, dtype=float32)}


  9%|▉         | 89210/1000000 [4:13:16<28:42:54,  8.81it/s]

{'loss': Array(0.13463305, dtype=float32), 'loss_cross_entropy': Array(0.1262319, dtype=float32)}
{'loss_inverse': Array(1.2554601e-05, dtype=float32)}


  9%|▉         | 89220/1000000 [4:13:18<25:26:54,  9.94it/s]

{'loss': Array(0.14284329, dtype=float32), 'loss_cross_entropy': Array(0.13475548, dtype=float32)}
{'loss_inverse': Array(0.00012545, dtype=float32)}


  9%|▉         | 89230/1000000 [4:13:19<33:14:09,  7.61it/s]

{'loss': Array(0.14518368, dtype=float32), 'loss_cross_entropy': Array(0.13646846, dtype=float32)}
{'loss_inverse': Array(7.760202e-08, dtype=float32)}


  9%|▉         | 89240/1000000 [4:13:21<25:53:50,  9.77it/s]

{'loss': Array(0.1367427, dtype=float32), 'loss_cross_entropy': Array(0.12934479, dtype=float32)}
{'loss_inverse': Array(5.1817915e-07, dtype=float32)}


  9%|▉         | 89250/1000000 [4:13:23<36:58:10,  6.84it/s]

{'loss': Array(0.13315558, dtype=float32), 'loss_cross_entropy': Array(0.1253697, dtype=float32)}
{'loss_inverse': Array(5.661335e-05, dtype=float32)}


  9%|▉         | 89260/1000000 [4:13:24<26:55:07,  9.40it/s]

{'loss': Array(0.14533545, dtype=float32), 'loss_cross_entropy': Array(0.13675362, dtype=float32)}
{'loss_inverse': Array(1.4388975e-05, dtype=float32)}


  9%|▉         | 89270/1000000 [4:13:26<37:21:36,  6.77it/s]

{'loss': Array(0.1106982, dtype=float32), 'loss_cross_entropy': Array(0.1039324, dtype=float32)}
{'loss_inverse': Array(3.563308e-05, dtype=float32)}


  9%|▉         | 89280/1000000 [4:13:27<26:27:23,  9.56it/s]

{'loss': Array(0.16379511, dtype=float32), 'loss_cross_entropy': Array(0.15443371, dtype=float32)}
{'loss_inverse': Array(4.809092e-06, dtype=float32)}


  9%|▉         | 89290/1000000 [4:13:28<24:44:37, 10.22it/s]

{'loss': Array(0.13307102, dtype=float32), 'loss_cross_entropy': Array(0.12562706, dtype=float32)}
{'loss_inverse': Array(8.968516e-05, dtype=float32)}


  9%|▉         | 89300/1000000 [4:13:30<31:06:08,  8.13it/s]

{'loss': Array(0.15399137, dtype=float32), 'loss_cross_entropy': Array(0.14536087, dtype=float32)}
{'loss_inverse': Array(9.4825117e-07, dtype=float32)}


  9%|▉         | 89310/1000000 [4:13:32<26:07:34,  9.68it/s]

{'loss': Array(0.14443313, dtype=float32), 'loss_cross_entropy': Array(0.13635242, dtype=float32)}
{'loss_inverse': Array(1.5066366e-05, dtype=float32)}


  9%|▉         | 89320/1000000 [4:13:33<37:19:05,  6.78it/s]

{'loss': Array(0.13153647, dtype=float32), 'loss_cross_entropy': Array(0.12370452, dtype=float32)}
{'loss_inverse': Array(0.0001506, dtype=float32)}


  9%|▉         | 89330/1000000 [4:13:35<27:10:21,  9.31it/s]

{'loss': Array(0.1494723, dtype=float32), 'loss_cross_entropy': Array(0.14101134, dtype=float32)}
{'loss_inverse': Array(1.7738842e-06, dtype=float32)}


  9%|▉         | 89340/1000000 [4:13:37<37:33:11,  6.74it/s]

{'loss': Array(0.13907954, dtype=float32), 'loss_cross_entropy': Array(0.13191496, dtype=float32)}
{'loss_inverse': Array(4.4858118e-07, dtype=float32)}


  9%|▉         | 89350/1000000 [4:13:38<26:22:51,  9.59it/s]

{'loss': Array(0.13373962, dtype=float32), 'loss_cross_entropy': Array(0.12629892, dtype=float32)}
{'loss_inverse': Array(0.00013641, dtype=float32)}


  9%|▉         | 89360/1000000 [4:13:39<24:46:05, 10.21it/s]

{'loss': Array(0.15801415, dtype=float32), 'loss_cross_entropy': Array(0.1499664, dtype=float32)}
{'loss_inverse': Array(4.187721e-08, dtype=float32)}


  9%|▉         | 89370/1000000 [4:13:41<33:14:26,  7.61it/s]

{'loss': Array(0.1477698, dtype=float32), 'loss_cross_entropy': Array(0.13952646, dtype=float32)}
{'loss_inverse': Array(2.843325e-06, dtype=float32)}


  9%|▉         | 89380/1000000 [4:13:43<26:27:15,  9.56it/s]

{'loss': Array(0.18115292, dtype=float32), 'loss_cross_entropy': Array(0.17193376, dtype=float32)}
{'loss_inverse': Array(1.6056609e-06, dtype=float32)}


  9%|▉         | 89390/1000000 [4:13:44<24:49:59, 10.19it/s]

{'loss': Array(0.1494272, dtype=float32), 'loss_cross_entropy': Array(0.14156292, dtype=float32)}
{'loss_inverse': Array(5.171094e-05, dtype=float32)}


  9%|▉         | 89400/1000000 [4:13:46<27:53:28,  9.07it/s]

{'loss': Array(0.12953138, dtype=float32), 'loss_cross_entropy': Array(0.12223538, dtype=float32)}
{'loss_inverse': Array(0.0001785, dtype=float32)}


  9%|▉         | 89410/1000000 [4:13:47<25:51:00,  9.78it/s]

{'loss': Array(0.12853411, dtype=float32), 'loss_cross_entropy': Array(0.12127312, dtype=float32)}
{'loss_inverse': Array(5.0959093e-06, dtype=float32)}


  9%|▉         | 89420/1000000 [4:13:49<29:28:54,  8.58it/s]

{'loss': Array(0.15529433, dtype=float32), 'loss_cross_entropy': Array(0.14635567, dtype=float32)}
{'loss_inverse': Array(8.7030196e-05, dtype=float32)}


  9%|▉         | 89430/1000000 [4:13:50<25:38:22,  9.87it/s]

{'loss': Array(0.14501832, dtype=float32), 'loss_cross_entropy': Array(0.13655846, dtype=float32)}
{'loss_inverse': Array(9.6055526e-05, dtype=float32)}


  9%|▉         | 89440/1000000 [4:13:52<29:05:12,  8.70it/s]

{'loss': Array(0.14920883, dtype=float32), 'loss_cross_entropy': Array(0.14143902, dtype=float32)}
{'loss_inverse': Array(9.906494e-07, dtype=float32)}


  9%|▉         | 89450/1000000 [4:13:54<26:16:06,  9.63it/s]

{'loss': Array(0.14491634, dtype=float32), 'loss_cross_entropy': Array(0.13652638, dtype=float32)}
{'loss_inverse': Array(1.2066695e-07, dtype=float32)}


  9%|▉         | 89460/1000000 [4:13:55<29:37:29,  8.54it/s]

{'loss': Array(0.13428622, dtype=float32), 'loss_cross_entropy': Array(0.12735131, dtype=float32)}
{'loss_inverse': Array(1.8116889e-07, dtype=float32)}


  9%|▉         | 89470/1000000 [4:13:57<26:18:27,  9.61it/s]

{'loss': Array(0.13596056, dtype=float32), 'loss_cross_entropy': Array(0.12812798, dtype=float32)}
{'loss_inverse': Array(1.5458903e-06, dtype=float32)}


  9%|▉         | 89480/1000000 [4:13:59<33:39:09,  7.52it/s]

{'loss': Array(0.13936071, dtype=float32), 'loss_cross_entropy': Array(0.13127983, dtype=float32)}
{'loss_inverse': Array(4.8836602e-05, dtype=float32)}


  9%|▉         | 89490/1000000 [4:14:00<26:05:12,  9.70it/s]

{'loss': Array(0.14736956, dtype=float32), 'loss_cross_entropy': Array(0.13883193, dtype=float32)}
{'loss_inverse': Array(9.9519224e-05, dtype=float32)}


  9%|▉         | 89500/1000000 [4:14:02<36:48:51,  6.87it/s]

{'loss': Array(0.14478384, dtype=float32), 'loss_cross_entropy': Array(0.13652854, dtype=float32)}
{'loss_inverse': Array(8.571276e-06, dtype=float32)}


  9%|▉         | 89510/1000000 [4:14:10<87:18:55,  2.90it/s] 

{'loss': Array(0.14249341, dtype=float32), 'loss_cross_entropy': Array(0.13446045, dtype=float32)}
{'loss_inverse': Array(9.856493e-05, dtype=float32)}


  9%|▉         | 89520/1000000 [4:14:11<35:07:10,  7.20it/s]

{'loss': Array(0.14850414, dtype=float32), 'loss_cross_entropy': Array(0.14054607, dtype=float32)}
{'loss_inverse': Array(1.7128297e-05, dtype=float32)}


  9%|▉         | 89530/1000000 [4:14:13<30:39:29,  8.25it/s]

{'loss': Array(0.14312276, dtype=float32), 'loss_cross_entropy': Array(0.13494667, dtype=float32)}
{'loss_inverse': Array(0.00015808, dtype=float32)}


  9%|▉         | 89540/1000000 [4:14:15<38:38:56,  6.54it/s]

{'loss': Array(0.12788427, dtype=float32), 'loss_cross_entropy': Array(0.12067235, dtype=float32)}
{'loss_inverse': Array(9.633039e-05, dtype=float32)}


  9%|▉         | 89550/1000000 [4:14:16<27:08:59,  9.32it/s]

{'loss': Array(0.11327318, dtype=float32), 'loss_cross_entropy': Array(0.10647895, dtype=float32)}
{'loss_inverse': Array(7.1300274e-06, dtype=float32)}


  9%|▉         | 89560/1000000 [4:14:18<25:41:17,  9.84it/s]

{'loss': Array(0.13031757, dtype=float32), 'loss_cross_entropy': Array(0.12284944, dtype=float32)}
{'loss_inverse': Array(9.569135e-05, dtype=float32)}


  9%|▉         | 89570/1000000 [4:14:19<33:39:15,  7.51it/s]

{'loss': Array(0.15924399, dtype=float32), 'loss_cross_entropy': Array(0.1503278, dtype=float32)}
{'loss_inverse': Array(6.9764843e-07, dtype=float32)}


  9%|▉         | 89580/1000000 [4:14:21<26:03:22,  9.71it/s]

{'loss': Array(0.13816456, dtype=float32), 'loss_cross_entropy': Array(0.13030186, dtype=float32)}
{'loss_inverse': Array(4.3313858e-07, dtype=float32)}


  9%|▉         | 89590/1000000 [4:14:22<25:08:55, 10.06it/s]

{'loss': Array(0.15321879, dtype=float32), 'loss_cross_entropy': Array(0.14502811, dtype=float32)}
{'loss_inverse': Array(5.4810175e-06, dtype=float32)}


  9%|▉         | 89600/1000000 [4:14:24<28:05:04,  9.00it/s]

{'loss': Array(0.14390348, dtype=float32), 'loss_cross_entropy': Array(0.13668877, dtype=float32)}
{'loss_inverse': Array(4.6994173e-06, dtype=float32)}


  9%|▉         | 89610/1000000 [4:14:25<25:00:07, 10.11it/s]

{'loss': Array(0.14717226, dtype=float32), 'loss_cross_entropy': Array(0.1389811, dtype=float32)}
{'loss_inverse': Array(2.181321e-06, dtype=float32)}


  9%|▉         | 89620/1000000 [4:14:27<27:52:26,  9.07it/s]

{'loss': Array(0.13162406, dtype=float32), 'loss_cross_entropy': Array(0.12404128, dtype=float32)}
{'loss_inverse': Array(2.8300965e-06, dtype=float32)}


  9%|▉         | 89630/1000000 [4:14:29<25:40:25,  9.85it/s]

{'loss': Array(0.13866897, dtype=float32), 'loss_cross_entropy': Array(0.13087218, dtype=float32)}
{'loss_inverse': Array(6.3412864e-07, dtype=float32)}


  9%|▉         | 89640/1000000 [4:14:30<30:13:54,  8.36it/s]

{'loss': Array(0.15245031, dtype=float32), 'loss_cross_entropy': Array(0.14483133, dtype=float32)}
{'loss_inverse': Array(6.188108e-05, dtype=float32)}


  9%|▉         | 89650/1000000 [4:14:32<26:09:16,  9.67it/s]

{'loss': Array(0.15299942, dtype=float32), 'loss_cross_entropy': Array(0.14433794, dtype=float32)}
{'loss_inverse': Array(1.42054e-05, dtype=float32)}


  9%|▉         | 89660/1000000 [4:14:34<37:55:16,  6.67it/s]

{'loss': Array(0.131759, dtype=float32), 'loss_cross_entropy': Array(0.12451549, dtype=float32)}
{'loss_inverse': Array(1.4168954e-05, dtype=float32)}


  9%|▉         | 89670/1000000 [4:14:35<27:11:34,  9.30it/s]

{'loss': Array(0.12781546, dtype=float32), 'loss_cross_entropy': Array(0.12067699, dtype=float32)}
{'loss_inverse': Array(1.0762911e-06, dtype=float32)}


  9%|▉         | 89680/1000000 [4:14:36<25:14:00, 10.02it/s]

{'loss': Array(0.14383705, dtype=float32), 'loss_cross_entropy': Array(0.1362385, dtype=float32)}
{'loss_inverse': Array(0.00010626, dtype=float32)}


  9%|▉         | 89690/1000000 [4:14:38<29:57:40,  8.44it/s]

{'loss': Array(0.14921224, dtype=float32), 'loss_cross_entropy': Array(0.1412124, dtype=float32)}
{'loss_inverse': Array(0.0001076, dtype=float32)}


  9%|▉         | 89700/1000000 [4:14:40<26:11:00,  9.66it/s]

{'loss': Array(0.14251584, dtype=float32), 'loss_cross_entropy': Array(0.13459955, dtype=float32)}
{'loss_inverse': Array(1.5134205e-05, dtype=float32)}


  9%|▉         | 89710/1000000 [4:14:41<28:56:47,  8.74it/s]

{'loss': Array(0.17369083, dtype=float32), 'loss_cross_entropy': Array(0.16440636, dtype=float32)}
{'loss_inverse': Array(5.5239852e-05, dtype=float32)}


  9%|▉         | 89720/1000000 [4:14:43<25:07:43, 10.06it/s]

{'loss': Array(0.15445524, dtype=float32), 'loss_cross_entropy': Array(0.14589415, dtype=float32)}
{'loss_inverse': Array(5.579986e-06, dtype=float32)}


  9%|▉         | 89730/1000000 [4:14:45<31:27:42,  8.04it/s]

{'loss': Array(0.15082426, dtype=float32), 'loss_cross_entropy': Array(0.14215803, dtype=float32)}
{'loss_inverse': Array(3.3384953e-07, dtype=float32)}


  9%|▉         | 89740/1000000 [4:14:46<25:40:36,  9.85it/s]

{'loss': Array(0.13693763, dtype=float32), 'loss_cross_entropy': Array(0.12821166, dtype=float32)}
{'loss_inverse': Array(7.4212658e-06, dtype=float32)}


  9%|▉         | 89750/1000000 [4:14:47<24:41:10, 10.24it/s]

{'loss': Array(0.15089677, dtype=float32), 'loss_cross_entropy': Array(0.14226851, dtype=float32)}
{'loss_inverse': Array(3.1759438e-07, dtype=float32)}


  9%|▉         | 89760/1000000 [4:14:49<27:42:50,  9.12it/s]

{'loss': Array(0.14179341, dtype=float32), 'loss_cross_entropy': Array(0.13403438, dtype=float32)}
{'loss_inverse': Array(0.00013969, dtype=float32)}


  9%|▉         | 89770/1000000 [4:14:51<25:25:15,  9.95it/s]

{'loss': Array(0.13126743, dtype=float32), 'loss_cross_entropy': Array(0.12301262, dtype=float32)}
{'loss_inverse': Array(6.212779e-08, dtype=float32)}


  9%|▉         | 89780/1000000 [4:14:52<28:51:58,  8.76it/s]

{'loss': Array(0.15468064, dtype=float32), 'loss_cross_entropy': Array(0.14693905, dtype=float32)}
{'loss_inverse': Array(3.4227066e-06, dtype=float32)}


  9%|▉         | 89790/1000000 [4:14:54<25:10:40, 10.04it/s]

{'loss': Array(0.14854284, dtype=float32), 'loss_cross_entropy': Array(0.14047977, dtype=float32)}
{'loss_inverse': Array(1.583474e-05, dtype=float32)}


  9%|▉         | 89800/1000000 [4:14:55<30:49:56,  8.20it/s]

{'loss': Array(0.128718, dtype=float32), 'loss_cross_entropy': Array(0.12182704, dtype=float32)}
{'loss_inverse': Array(7.168469e-07, dtype=float32)}


  9%|▉         | 89810/1000000 [4:14:57<25:53:48,  9.76it/s]

{'loss': Array(0.13589843, dtype=float32), 'loss_cross_entropy': Array(0.1281933, dtype=float32)}
{'loss_inverse': Array(6.212057e-06, dtype=float32)}


  9%|▉         | 89820/1000000 [4:14:58<24:56:01, 10.14it/s]

{'loss': Array(0.14509936, dtype=float32), 'loss_cross_entropy': Array(0.13672324, dtype=float32)}
{'loss_inverse': Array(4.020287e-06, dtype=float32)}


  9%|▉         | 89830/1000000 [4:15:00<29:08:55,  8.67it/s]

{'loss': Array(0.13664989, dtype=float32), 'loss_cross_entropy': Array(0.12914969, dtype=float32)}
{'loss_inverse': Array(1.6879186e-07, dtype=float32)}


  9%|▉         | 89840/1000000 [4:15:01<25:43:02,  9.83it/s]

{'loss': Array(0.14977698, dtype=float32), 'loss_cross_entropy': Array(0.14122277, dtype=float32)}
{'loss_inverse': Array(3.9257753e-08, dtype=float32)}


  9%|▉         | 89850/1000000 [4:15:03<31:25:58,  8.04it/s]

{'loss': Array(0.12226494, dtype=float32), 'loss_cross_entropy': Array(0.1147901, dtype=float32)}
{'loss_inverse': Array(8.8722496e-05, dtype=float32)}


  9%|▉         | 89860/1000000 [4:15:05<26:09:16,  9.67it/s]

{'loss': Array(0.14561401, dtype=float32), 'loss_cross_entropy': Array(0.13761179, dtype=float32)}
{'loss_inverse': Array(9.880587e-05, dtype=float32)}


  9%|▉         | 89870/1000000 [4:15:07<35:24:01,  7.14it/s]

{'loss': Array(0.1568317, dtype=float32), 'loss_cross_entropy': Array(0.14829563, dtype=float32)}
{'loss_inverse': Array(1.3403949e-06, dtype=float32)}


  9%|▉         | 89880/1000000 [4:15:08<27:23:36,  9.23it/s]

{'loss': Array(0.1484521, dtype=float32), 'loss_cross_entropy': Array(0.14022437, dtype=float32)}
{'loss_inverse': Array(3.5274413e-06, dtype=float32)}


  9%|▉         | 89890/1000000 [4:15:09<26:08:24,  9.67it/s]

{'loss': Array(0.1632338, dtype=float32), 'loss_cross_entropy': Array(0.15466784, dtype=float32)}
{'loss_inverse': Array(4.315854e-06, dtype=float32)}


  9%|▉         | 89900/1000000 [4:15:11<28:03:58,  9.01it/s]

{'loss': Array(0.14663488, dtype=float32), 'loss_cross_entropy': Array(0.13858937, dtype=float32)}
{'loss_inverse': Array(2.7782244e-06, dtype=float32)}


  9%|▉         | 89910/1000000 [4:15:13<25:42:05,  9.84it/s]

{'loss': Array(0.14264946, dtype=float32), 'loss_cross_entropy': Array(0.13421026, dtype=float32)}
{'loss_inverse': Array(5.8965848e-06, dtype=float32)}


  9%|▉         | 89920/1000000 [4:15:14<34:19:57,  7.36it/s]

{'loss': Array(0.15185903, dtype=float32), 'loss_cross_entropy': Array(0.14320302, dtype=float32)}
{'loss_inverse': Array(0.00016433, dtype=float32)}


  9%|▉         | 89930/1000000 [4:15:16<26:30:08,  9.54it/s]

{'loss': Array(0.15410645, dtype=float32), 'loss_cross_entropy': Array(0.14596711, dtype=float32)}
{'loss_inverse': Array(2.0164387e-06, dtype=float32)}


  9%|▉         | 89940/1000000 [4:15:17<25:27:08,  9.93it/s]

{'loss': Array(0.13793945, dtype=float32), 'loss_cross_entropy': Array(0.13068707, dtype=float32)}
{'loss_inverse': Array(0.00012799, dtype=float32)}


  9%|▉         | 89950/1000000 [4:15:19<28:12:47,  8.96it/s]

{'loss': Array(0.14171655, dtype=float32), 'loss_cross_entropy': Array(0.1336386, dtype=float32)}
{'loss_inverse': Array(1.8013374e-06, dtype=float32)}


  9%|▉         | 89960/1000000 [4:15:20<25:03:34, 10.09it/s]

{'loss': Array(0.14190204, dtype=float32), 'loss_cross_entropy': Array(0.13393252, dtype=float32)}
{'loss_inverse': Array(4.751776e-06, dtype=float32)}


  9%|▉         | 89970/1000000 [4:15:22<28:52:53,  8.75it/s]

{'loss': Array(0.13178134, dtype=float32), 'loss_cross_entropy': Array(0.12416326, dtype=float32)}
{'loss_inverse': Array(9.364049e-06, dtype=float32)}


  9%|▉         | 89980/1000000 [4:15:24<25:15:01, 10.01it/s]

{'loss': Array(0.139047, dtype=float32), 'loss_cross_entropy': Array(0.13136458, dtype=float32)}
{'loss_inverse': Array(8.631468e-06, dtype=float32)}


  9%|▉         | 89990/1000000 [4:15:25<30:54:59,  8.18it/s]

{'loss': Array(0.13943078, dtype=float32), 'loss_cross_entropy': Array(0.13170826, dtype=float32)}
{'loss_inverse': Array(1.0593877e-05, dtype=float32)}


  9%|▉         | 90000/1000000 [4:15:27<25:39:15,  9.85it/s]

{'loss': Array(0.14120953, dtype=float32), 'loss_cross_entropy': Array(0.1331867, dtype=float32)}
{'loss_inverse': Array(2.7599555e-05, dtype=float32)}


  9%|▉         | 90010/1000000 [4:15:35<86:24:02,  2.93it/s] 

{'loss': Array(0.15190779, dtype=float32), 'loss_cross_entropy': Array(0.1436667, dtype=float32)}
{'loss_inverse': Array(5.3393593e-05, dtype=float32)}


  9%|▉         | 90020/1000000 [4:15:37<38:15:37,  6.61it/s] 

{'loss': Array(0.12627138, dtype=float32), 'loss_cross_entropy': Array(0.11873925, dtype=float32)}
{'loss_inverse': Array(3.467551e-05, dtype=float32)}


  9%|▉         | 90030/1000000 [4:15:38<27:12:24,  9.29it/s]

{'loss': Array(0.14362876, dtype=float32), 'loss_cross_entropy': Array(0.1353669, dtype=float32)}
{'loss_inverse': Array(3.756473e-05, dtype=float32)}


  9%|▉         | 90040/1000000 [4:15:40<29:26:20,  8.59it/s]

{'loss': Array(0.16231248, dtype=float32), 'loss_cross_entropy': Array(0.15305373, dtype=float32)}
{'loss_inverse': Array(3.242265e-07, dtype=float32)}


  9%|▉         | 90050/1000000 [4:15:41<25:59:48,  9.72it/s]

{'loss': Array(0.15928793, dtype=float32), 'loss_cross_entropy': Array(0.1507084, dtype=float32)}
{'loss_inverse': Array(7.9772384e-08, dtype=float32)}


  9%|▉         | 90060/1000000 [4:15:43<31:25:03,  8.05it/s]

{'loss': Array(0.132749, dtype=float32), 'loss_cross_entropy': Array(0.12538067, dtype=float32)}
{'loss_inverse': Array(3.5012473e-08, dtype=float32)}


  9%|▉         | 90070/1000000 [4:15:45<26:25:24,  9.57it/s]

{'loss': Array(0.13380256, dtype=float32), 'loss_cross_entropy': Array(0.12623969, dtype=float32)}
{'loss_inverse': Array(7.157559e-07, dtype=float32)}


  9%|▉         | 90080/1000000 [4:15:46<25:21:42,  9.97it/s]

{'loss': Array(0.15434559, dtype=float32), 'loss_cross_entropy': Array(0.14623715, dtype=float32)}
{'loss_inverse': Array(9.66607e-07, dtype=float32)}


  9%|▉         | 90090/1000000 [4:15:48<29:20:24,  8.61it/s]

{'loss': Array(0.14417742, dtype=float32), 'loss_cross_entropy': Array(0.13643585, dtype=float32)}
{'loss_inverse': Array(1.9631022e-07, dtype=float32)}


  9%|▉         | 90100/1000000 [4:15:49<25:20:59,  9.97it/s]

{'loss': Array(0.15377977, dtype=float32), 'loss_cross_entropy': Array(0.14513841, dtype=float32)}
{'loss_inverse': Array(1.3967637e-07, dtype=float32)}


  9%|▉         | 90110/1000000 [4:15:51<30:49:21,  8.20it/s]

{'loss': Array(0.13520455, dtype=float32), 'loss_cross_entropy': Array(0.12715253, dtype=float32)}
{'loss_inverse': Array(6.834058e-07, dtype=float32)}


  9%|▉         | 90120/1000000 [4:15:52<25:31:54,  9.90it/s]

{'loss': Array(0.14063488, dtype=float32), 'loss_cross_entropy': Array(0.1328796, dtype=float32)}
{'loss_inverse': Array(6.3324364e-07, dtype=float32)}


  9%|▉         | 90130/1000000 [4:15:54<33:29:53,  7.54it/s]

{'loss': Array(0.11981624, dtype=float32), 'loss_cross_entropy': Array(0.11309069, dtype=float32)}
{'loss_inverse': Array(6.6380367e-06, dtype=float32)}


  9%|▉         | 90140/1000000 [4:15:55<26:01:52,  9.71it/s]

{'loss': Array(0.15804034, dtype=float32), 'loss_cross_entropy': Array(0.14948267, dtype=float32)}
{'loss_inverse': Array(7.3208845e-08, dtype=float32)}


  9%|▉         | 90150/1000000 [4:15:57<25:57:16,  9.74it/s]

{'loss': Array(0.13925599, dtype=float32), 'loss_cross_entropy': Array(0.13157962, dtype=float32)}
{'loss_inverse': Array(2.2516817e-06, dtype=float32)}


  9%|▉         | 90160/1000000 [4:15:59<28:03:22,  9.01it/s]

{'loss': Array(0.14414994, dtype=float32), 'loss_cross_entropy': Array(0.13649721, dtype=float32)}
{'loss_inverse': Array(1.9336588e-07, dtype=float32)}


  9%|▉         | 90170/1000000 [4:16:00<25:15:29, 10.01it/s]

{'loss': Array(0.11812983, dtype=float32), 'loss_cross_entropy': Array(0.11163445, dtype=float32)}
{'loss_inverse': Array(1.919831e-05, dtype=float32)}


  9%|▉         | 90180/1000000 [4:16:02<33:44:00,  7.49it/s]

{'loss': Array(0.14082955, dtype=float32), 'loss_cross_entropy': Array(0.1332176, dtype=float32)}
{'loss_inverse': Array(1.4900955e-07, dtype=float32)}


  9%|▉         | 90190/1000000 [4:16:03<27:09:17,  9.31it/s]

{'loss': Array(0.1419162, dtype=float32), 'loss_cross_entropy': Array(0.13361989, dtype=float32)}
{'loss_inverse': Array(5.430741e-07, dtype=float32)}


  9%|▉         | 90200/1000000 [4:16:05<24:56:40, 10.13it/s]

{'loss': Array(0.14772722, dtype=float32), 'loss_cross_entropy': Array(0.139536, dtype=float32)}
{'loss_inverse': Array(7.3824265e-07, dtype=float32)}


  9%|▉         | 90210/1000000 [4:16:07<28:32:11,  8.86it/s]

{'loss': Array(0.14863317, dtype=float32), 'loss_cross_entropy': Array(0.1402258, dtype=float32)}
{'loss_inverse': Array(4.178802e-06, dtype=float32)}


  9%|▉         | 90220/1000000 [4:16:08<26:47:09,  9.43it/s]

{'loss': Array(0.13968998, dtype=float32), 'loss_cross_entropy': Array(0.1315662, dtype=float32)}
{'loss_inverse': Array(6.7167784e-05, dtype=float32)}


  9%|▉         | 90230/1000000 [4:16:10<30:30:50,  8.28it/s]

{'loss': Array(0.13363494, dtype=float32), 'loss_cross_entropy': Array(0.12642181, dtype=float32)}
{'loss_inverse': Array(3.5557798e-07, dtype=float32)}


  9%|▉         | 90240/1000000 [4:16:11<25:29:21,  9.91it/s]

{'loss': Array(0.17148435, dtype=float32), 'loss_cross_entropy': Array(0.1624615, dtype=float32)}
{'loss_inverse': Array(8.937447e-06, dtype=float32)}


  9%|▉         | 90250/1000000 [4:16:13<31:23:22,  8.05it/s]

{'loss': Array(0.13809235, dtype=float32), 'loss_cross_entropy': Array(0.13004878, dtype=float32)}
{'loss_inverse': Array(0.00011592, dtype=float32)}


  9%|▉         | 90260/1000000 [4:16:14<25:46:13,  9.81it/s]

{'loss': Array(0.14766714, dtype=float32), 'loss_cross_entropy': Array(0.13929164, dtype=float32)}
{'loss_inverse': Array(6.9267765e-07, dtype=float32)}


  9%|▉         | 90270/1000000 [4:16:16<25:15:45, 10.00it/s]

{'loss': Array(0.12770618, dtype=float32), 'loss_cross_entropy': Array(0.12129593, dtype=float32)}
{'loss_inverse': Array(6.387745e-05, dtype=float32)}


  9%|▉         | 90280/1000000 [4:16:18<28:59:55,  8.71it/s]

{'loss': Array(0.14761132, dtype=float32), 'loss_cross_entropy': Array(0.13963257, dtype=float32)}
{'loss_inverse': Array(5.9000836e-06, dtype=float32)}


  9%|▉         | 90290/1000000 [4:16:19<25:39:36,  9.85it/s]

{'loss': Array(0.1617054, dtype=float32), 'loss_cross_entropy': Array(0.15254296, dtype=float32)}
{'loss_inverse': Array(1.193878e-06, dtype=float32)}


  9%|▉         | 90300/1000000 [4:16:21<33:27:53,  7.55it/s]

{'loss': Array(0.12220066, dtype=float32), 'loss_cross_entropy': Array(0.11493088, dtype=float32)}
{'loss_inverse': Array(8.538371e-06, dtype=float32)}


  9%|▉         | 90310/1000000 [4:16:22<25:57:51,  9.73it/s]

{'loss': Array(0.14518414, dtype=float32), 'loss_cross_entropy': Array(0.13625878, dtype=float32)}
{'loss_inverse': Array(7.2303833e-06, dtype=float32)}


  9%|▉         | 90320/1000000 [4:16:24<37:20:34,  6.77it/s]

{'loss': Array(0.12933695, dtype=float32), 'loss_cross_entropy': Array(0.12173559, dtype=float32)}
{'loss_inverse': Array(2.4969186e-08, dtype=float32)}


  9%|▉         | 90330/1000000 [4:16:25<27:10:00,  9.30it/s]

{'loss': Array(0.11885039, dtype=float32), 'loss_cross_entropy': Array(0.11203908, dtype=float32)}
{'loss_inverse': Array(1.4287765e-06, dtype=float32)}


  9%|▉         | 90340/1000000 [4:16:27<38:08:04,  6.63it/s]

{'loss': Array(0.15443341, dtype=float32), 'loss_cross_entropy': Array(0.14605378, dtype=float32)}
{'loss_inverse': Array(7.92909e-07, dtype=float32)}


  9%|▉         | 90350/1000000 [4:16:29<26:59:15,  9.36it/s]

{'loss': Array(0.14884107, dtype=float32), 'loss_cross_entropy': Array(0.14099684, dtype=float32)}
{'loss_inverse': Array(9.483775e-09, dtype=float32)}


  9%|▉         | 90360/1000000 [4:16:30<25:31:56,  9.90it/s]

{'loss': Array(0.14486995, dtype=float32), 'loss_cross_entropy': Array(0.13689046, dtype=float32)}
{'loss_inverse': Array(1.6372436e-06, dtype=float32)}


  9%|▉         | 90370/1000000 [4:16:32<30:55:20,  8.17it/s]

{'loss': Array(0.14861073, dtype=float32), 'loss_cross_entropy': Array(0.14029615, dtype=float32)}
{'loss_inverse': Array(2.7263745e-06, dtype=float32)}


  9%|▉         | 90380/1000000 [4:16:33<27:04:42,  9.33it/s]

{'loss': Array(0.15372284, dtype=float32), 'loss_cross_entropy': Array(0.14531466, dtype=float32)}
{'loss_inverse': Array(5.9666063e-06, dtype=float32)}


  9%|▉         | 90390/1000000 [4:16:35<37:57:59,  6.66it/s]

{'loss': Array(0.13022016, dtype=float32), 'loss_cross_entropy': Array(0.12327709, dtype=float32)}
{'loss_inverse': Array(2.7419492e-07, dtype=float32)}


  9%|▉         | 90400/1000000 [4:16:37<27:25:40,  9.21it/s]

{'loss': Array(0.15995602, dtype=float32), 'loss_cross_entropy': Array(0.15234819, dtype=float32)}
{'loss_inverse': Array(6.995801e-05, dtype=float32)}


  9%|▉         | 90410/1000000 [4:16:38<38:38:59,  6.54it/s]

{'loss': Array(0.13486443, dtype=float32), 'loss_cross_entropy': Array(0.12773432, dtype=float32)}
{'loss_inverse': Array(5.961532e-08, dtype=float32)}


  9%|▉         | 90420/1000000 [4:16:40<27:58:42,  9.03it/s]

{'loss': Array(0.1394244, dtype=float32), 'loss_cross_entropy': Array(0.13182332, dtype=float32)}
{'loss_inverse': Array(2.3193961e-05, dtype=float32)}


  9%|▉         | 90430/1000000 [4:16:41<25:05:07, 10.07it/s]

{'loss': Array(0.13296898, dtype=float32), 'loss_cross_entropy': Array(0.12549888, dtype=float32)}
{'loss_inverse': Array(8.722915e-07, dtype=float32)}


  9%|▉         | 90440/1000000 [4:16:43<33:42:50,  7.49it/s]

{'loss': Array(0.14890909, dtype=float32), 'loss_cross_entropy': Array(0.14068602, dtype=float32)}
{'loss_inverse': Array(2.1967135e-06, dtype=float32)}


  9%|▉         | 90450/1000000 [4:16:44<26:01:33,  9.71it/s]

{'loss': Array(0.12686512, dtype=float32), 'loss_cross_entropy': Array(0.11928871, dtype=float32)}
{'loss_inverse': Array(4.5462144e-05, dtype=float32)}


  9%|▉         | 90460/1000000 [4:16:46<25:20:39,  9.97it/s]

{'loss': Array(0.1527199, dtype=float32), 'loss_cross_entropy': Array(0.14433454, dtype=float32)}
{'loss_inverse': Array(1.8585846e-05, dtype=float32)}


  9%|▉         | 90470/1000000 [4:16:48<28:08:04,  8.98it/s]

{'loss': Array(0.13830027, dtype=float32), 'loss_cross_entropy': Array(0.13050905, dtype=float32)}
{'loss_inverse': Array(7.911177e-07, dtype=float32)}


  9%|▉         | 90480/1000000 [4:16:49<25:29:29,  9.91it/s]

{'loss': Array(0.13646686, dtype=float32), 'loss_cross_entropy': Array(0.1289571, dtype=float32)}
{'loss_inverse': Array(7.1253664e-05, dtype=float32)}


  9%|▉         | 90490/1000000 [4:16:51<28:58:45,  8.72it/s]

{'loss': Array(0.13460514, dtype=float32), 'loss_cross_entropy': Array(0.1271692, dtype=float32)}
{'loss_inverse': Array(1.4489403e-07, dtype=float32)}


  9%|▉         | 90500/1000000 [4:16:52<25:20:27,  9.97it/s]

{'loss': Array(0.14465837, dtype=float32), 'loss_cross_entropy': Array(0.13705292, dtype=float32)}
{'loss_inverse': Array(4.356245e-05, dtype=float32)}


  9%|▉         | 90510/1000000 [4:17:00<89:37:13,  2.82it/s] 

{'loss': Array(0.1379702, dtype=float32), 'loss_cross_entropy': Array(0.13063286, dtype=float32)}
{'loss_inverse': Array(2.061371e-08, dtype=float32)}


  9%|▉         | 90520/1000000 [4:17:02<35:43:07,  7.07it/s]

{'loss': Array(0.14211018, dtype=float32), 'loss_cross_entropy': Array(0.13393985, dtype=float32)}
{'loss_inverse': Array(5.1975405e-08, dtype=float32)}


  9%|▉         | 90530/1000000 [4:17:04<32:42:50,  7.72it/s]

{'loss': Array(0.12939508, dtype=float32), 'loss_cross_entropy': Array(0.12151394, dtype=float32)}
{'loss_inverse': Array(2.4858357e-06, dtype=float32)}


  9%|▉         | 90540/1000000 [4:17:05<25:55:41,  9.74it/s]

{'loss': Array(0.16159506, dtype=float32), 'loss_cross_entropy': Array(0.1533957, dtype=float32)}
{'loss_inverse': Array(0.00022647, dtype=float32)}


  9%|▉         | 90550/1000000 [4:17:07<39:03:38,  6.47it/s]

{'loss': Array(0.15071233, dtype=float32), 'loss_cross_entropy': Array(0.14229555, dtype=float32)}
{'loss_inverse': Array(1.67273e-07, dtype=float32)}


  9%|▉         | 90560/1000000 [4:17:08<28:00:09,  9.02it/s]

{'loss': Array(0.14447021, dtype=float32), 'loss_cross_entropy': Array(0.13647227, dtype=float32)}
{'loss_inverse': Array(4.3758243e-07, dtype=float32)}


  9%|▉         | 90570/1000000 [4:17:10<26:53:08,  9.40it/s]

{'loss': Array(0.13328066, dtype=float32), 'loss_cross_entropy': Array(0.1254835, dtype=float32)}
{'loss_inverse': Array(6.185615e-08, dtype=float32)}


  9%|▉         | 90580/1000000 [4:17:12<29:00:49,  8.71it/s]

{'loss': Array(0.13445947, dtype=float32), 'loss_cross_entropy': Array(0.12655215, dtype=float32)}
{'loss_inverse': Array(9.254227e-07, dtype=float32)}


  9%|▉         | 90590/1000000 [4:17:13<25:25:54,  9.93it/s]

{'loss': Array(0.13978551, dtype=float32), 'loss_cross_entropy': Array(0.13215269, dtype=float32)}
{'loss_inverse': Array(1.0961442e-05, dtype=float32)}


  9%|▉         | 90600/1000000 [4:17:15<31:01:20,  8.14it/s]

{'loss': Array(0.13909967, dtype=float32), 'loss_cross_entropy': Array(0.13101892, dtype=float32)}
{'loss_inverse': Array(1.5399631e-07, dtype=float32)}


  9%|▉         | 90610/1000000 [4:17:16<26:01:39,  9.71it/s]

{'loss': Array(0.14345878, dtype=float32), 'loss_cross_entropy': Array(0.13526501, dtype=float32)}
{'loss_inverse': Array(0.00032777, dtype=float32)}


  9%|▉         | 90620/1000000 [4:17:18<33:48:20,  7.47it/s]

{'loss': Array(0.15683858, dtype=float32), 'loss_cross_entropy': Array(0.14811961, dtype=float32)}
{'loss_inverse': Array(6.462428e-07, dtype=float32)}


  9%|▉         | 90630/1000000 [4:17:19<26:06:09,  9.68it/s]

{'loss': Array(0.15186632, dtype=float32), 'loss_cross_entropy': Array(0.14369074, dtype=float32)}
{'loss_inverse': Array(6.456971e-07, dtype=float32)}


  9%|▉         | 90640/1000000 [4:17:21<33:33:36,  7.53it/s]

{'loss': Array(0.1355866, dtype=float32), 'loss_cross_entropy': Array(0.12766282, dtype=float32)}
{'loss_inverse': Array(0.00010811, dtype=float32)}


  9%|▉         | 90650/1000000 [4:17:22<26:34:44,  9.50it/s]

{'loss': Array(0.13774852, dtype=float32), 'loss_cross_entropy': Array(0.12997864, dtype=float32)}
{'loss_inverse': Array(1.227825e-06, dtype=float32)}


  9%|▉         | 90660/1000000 [4:17:24<33:47:07,  7.48it/s]

{'loss': Array(0.1512857, dtype=float32), 'loss_cross_entropy': Array(0.14354229, dtype=float32)}
{'loss_inverse': Array(0.00023144, dtype=float32)}


  9%|▉         | 90670/1000000 [4:17:26<26:02:43,  9.70it/s]

{'loss': Array(0.13572685, dtype=float32), 'loss_cross_entropy': Array(0.12820776, dtype=float32)}
{'loss_inverse': Array(2.0303514e-05, dtype=float32)}


  9%|▉         | 90680/1000000 [4:17:27<24:48:59, 10.18it/s]

{'loss': Array(0.1392375, dtype=float32), 'loss_cross_entropy': Array(0.13105816, dtype=float32)}
{'loss_inverse': Array(9.374083e-05, dtype=float32)}


  9%|▉         | 90690/1000000 [4:17:29<28:34:26,  8.84it/s]

{'loss': Array(0.14416157, dtype=float32), 'loss_cross_entropy': Array(0.13610005, dtype=float32)}
{'loss_inverse': Array(0.00022347, dtype=float32)}


  9%|▉         | 90700/1000000 [4:17:30<25:32:48,  9.89it/s]

{'loss': Array(0.14531744, dtype=float32), 'loss_cross_entropy': Array(0.13701974, dtype=float32)}
{'loss_inverse': Array(6.5968336e-05, dtype=float32)}


  9%|▉         | 90710/1000000 [4:17:32<27:49:09,  9.08it/s]

{'loss': Array(0.15716635, dtype=float32), 'loss_cross_entropy': Array(0.14880504, dtype=float32)}
{'loss_inverse': Array(3.931764e-05, dtype=float32)}


  9%|▉         | 90720/1000000 [4:17:33<25:30:04,  9.90it/s]

{'loss': Array(0.12767062, dtype=float32), 'loss_cross_entropy': Array(0.12009366, dtype=float32)}
{'loss_inverse': Array(0.00012943, dtype=float32)}


  9%|▉         | 90730/1000000 [4:17:35<29:46:47,  8.48it/s]

{'loss': Array(0.13702738, dtype=float32), 'loss_cross_entropy': Array(0.12959409, dtype=float32)}
{'loss_inverse': Array(8.440592e-05, dtype=float32)}


  9%|▉         | 90740/1000000 [4:17:37<25:47:19,  9.79it/s]

{'loss': Array(0.13569762, dtype=float32), 'loss_cross_entropy': Array(0.12763825, dtype=float32)}
{'loss_inverse': Array(0.0001586, dtype=float32)}


  9%|▉         | 90750/1000000 [4:17:38<37:19:36,  6.77it/s]

{'loss': Array(0.16389577, dtype=float32), 'loss_cross_entropy': Array(0.15495855, dtype=float32)}
{'loss_inverse': Array(0.00032193, dtype=float32)}


  9%|▉         | 90760/1000000 [4:17:40<27:10:17,  9.30it/s]

{'loss': Array(0.14105748, dtype=float32), 'loss_cross_entropy': Array(0.13278642, dtype=float32)}
{'loss_inverse': Array(4.098587e-06, dtype=float32)}


  9%|▉         | 90770/1000000 [4:17:41<25:03:39, 10.08it/s]

{'loss': Array(0.1417755, dtype=float32), 'loss_cross_entropy': Array(0.13385932, dtype=float32)}
{'loss_inverse': Array(0.00033639, dtype=float32)}


  9%|▉         | 90780/1000000 [4:17:43<29:23:16,  8.59it/s]

{'loss': Array(0.15747352, dtype=float32), 'loss_cross_entropy': Array(0.14889623, dtype=float32)}
{'loss_inverse': Array(0.00018305, dtype=float32)}


  9%|▉         | 90790/1000000 [4:17:44<25:27:56,  9.92it/s]

{'loss': Array(0.16438851, dtype=float32), 'loss_cross_entropy': Array(0.15566418, dtype=float32)}
{'loss_inverse': Array(8.8910775e-07, dtype=float32)}


  9%|▉         | 90800/1000000 [4:17:46<28:50:07,  8.76it/s]

{'loss': Array(0.14153282, dtype=float32), 'loss_cross_entropy': Array(0.13395096, dtype=float32)}
{'loss_inverse': Array(1.4192378e-05, dtype=float32)}


  9%|▉         | 90810/1000000 [4:17:48<27:35:53,  9.15it/s]

{'loss': Array(0.14054178, dtype=float32), 'loss_cross_entropy': Array(0.13283737, dtype=float32)}
{'loss_inverse': Array(0.00012824, dtype=float32)}


  9%|▉         | 90820/1000000 [4:17:49<31:27:54,  8.03it/s]

{'loss': Array(0.12352697, dtype=float32), 'loss_cross_entropy': Array(0.11653646, dtype=float32)}
{'loss_inverse': Array(1.9369812e-05, dtype=float32)}


  9%|▉         | 90830/1000000 [4:17:51<25:35:38,  9.87it/s]

{'loss': Array(0.14562173, dtype=float32), 'loss_cross_entropy': Array(0.13834773, dtype=float32)}
{'loss_inverse': Array(2.7397778e-05, dtype=float32)}


  9%|▉         | 90840/1000000 [4:17:52<25:05:43, 10.06it/s]

{'loss': Array(0.13598931, dtype=float32), 'loss_cross_entropy': Array(0.12859665, dtype=float32)}
{'loss_inverse': Array(1.9852492e-07, dtype=float32)}


  9%|▉         | 90850/1000000 [4:17:54<27:55:20,  9.04it/s]

{'loss': Array(0.13690506, dtype=float32), 'loss_cross_entropy': Array(0.128969, dtype=float32)}
{'loss_inverse': Array(7.070578e-05, dtype=float32)}


  9%|▉         | 90860/1000000 [4:17:55<25:34:33,  9.87it/s]

{'loss': Array(0.11989634, dtype=float32), 'loss_cross_entropy': Array(0.11305662, dtype=float32)}
{'loss_inverse': Array(1.0826809e-06, dtype=float32)}


  9%|▉         | 90870/1000000 [4:17:57<30:04:18,  8.40it/s]

{'loss': Array(0.15616046, dtype=float32), 'loss_cross_entropy': Array(0.1482352, dtype=float32)}
{'loss_inverse': Array(5.298531e-07, dtype=float32)}


  9%|▉         | 90880/1000000 [4:17:59<26:21:58,  9.58it/s]

{'loss': Array(0.14853075, dtype=float32), 'loss_cross_entropy': Array(0.1401893, dtype=float32)}
{'loss_inverse': Array(3.3191474e-05, dtype=float32)}


  9%|▉         | 90890/1000000 [4:18:01<31:11:38,  8.10it/s]

{'loss': Array(0.1514719, dtype=float32), 'loss_cross_entropy': Array(0.14349101, dtype=float32)}
{'loss_inverse': Array(2.400925e-06, dtype=float32)}


  9%|▉         | 90900/1000000 [4:18:02<25:28:33,  9.91it/s]

{'loss': Array(0.14083739, dtype=float32), 'loss_cross_entropy': Array(0.13276336, dtype=float32)}
{'loss_inverse': Array(3.1961532e-05, dtype=float32)}


  9%|▉         | 90910/1000000 [4:18:03<24:38:40, 10.25it/s]

{'loss': Array(0.13432655, dtype=float32), 'loss_cross_entropy': Array(0.12667239, dtype=float32)}
{'loss_inverse': Array(1.3329735e-06, dtype=float32)}


  9%|▉         | 90920/1000000 [4:18:05<29:53:30,  8.45it/s]

{'loss': Array(0.12760842, dtype=float32), 'loss_cross_entropy': Array(0.12038326, dtype=float32)}
{'loss_inverse': Array(0.00018437, dtype=float32)}


  9%|▉         | 90930/1000000 [4:18:07<26:49:06,  9.42it/s]

{'loss': Array(0.15423344, dtype=float32), 'loss_cross_entropy': Array(0.14616883, dtype=float32)}
{'loss_inverse': Array(2.7699534e-06, dtype=float32)}


  9%|▉         | 90940/1000000 [4:18:08<31:56:22,  7.91it/s]

{'loss': Array(0.14261037, dtype=float32), 'loss_cross_entropy': Array(0.13505514, dtype=float32)}
{'loss_inverse': Array(4.9135377e-08, dtype=float32)}


  9%|▉         | 90950/1000000 [4:18:10<26:55:44,  9.38it/s]

{'loss': Array(0.15213759, dtype=float32), 'loss_cross_entropy': Array(0.14382662, dtype=float32)}
{'loss_inverse': Array(0.00014509, dtype=float32)}


  9%|▉         | 90960/1000000 [4:18:12<34:48:56,  7.25it/s]

{'loss': Array(0.13385515, dtype=float32), 'loss_cross_entropy': Array(0.12618664, dtype=float32)}
{'loss_inverse': Array(1.8357581e-05, dtype=float32)}


  9%|▉         | 90970/1000000 [4:18:13<26:51:51,  9.40it/s]

{'loss': Array(0.13831507, dtype=float32), 'loss_cross_entropy': Array(0.1304934, dtype=float32)}
{'loss_inverse': Array(6.203811e-06, dtype=float32)}


  9%|▉         | 90980/1000000 [4:18:14<24:52:40, 10.15it/s]

{'loss': Array(0.14496937, dtype=float32), 'loss_cross_entropy': Array(0.13701962, dtype=float32)}
{'loss_inverse': Array(0.00025015, dtype=float32)}


  9%|▉         | 90990/1000000 [4:18:16<27:41:34,  9.12it/s]

{'loss': Array(0.13609612, dtype=float32), 'loss_cross_entropy': Array(0.12860733, dtype=float32)}
{'loss_inverse': Array(3.9254356e-08, dtype=float32)}


  9%|▉         | 91000/1000000 [4:18:18<24:52:46, 10.15it/s]

{'loss': Array(0.15213059, dtype=float32), 'loss_cross_entropy': Array(0.14375047, dtype=float32)}
{'loss_inverse': Array(4.2157182e-05, dtype=float32)}


  9%|▉         | 91010/1000000 [4:18:26<92:37:36,  2.73it/s] 

{'loss': Array(0.13096248, dtype=float32), 'loss_cross_entropy': Array(0.12358908, dtype=float32)}
{'loss_inverse': Array(6.48457e-06, dtype=float32)}


  9%|▉         | 91020/1000000 [4:18:27<35:53:26,  7.04it/s]

{'loss': Array(0.10710414, dtype=float32), 'loss_cross_entropy': Array(0.10057213, dtype=float32)}
{'loss_inverse': Array(5.609463e-08, dtype=float32)}


  9%|▉         | 91030/1000000 [4:18:29<27:16:17,  9.26it/s]

{'loss': Array(0.13054541, dtype=float32), 'loss_cross_entropy': Array(0.12277134, dtype=float32)}
{'loss_inverse': Array(6.4399785e-05, dtype=float32)}


  9%|▉         | 91040/1000000 [4:18:30<29:00:24,  8.70it/s]

{'loss': Array(0.12413452, dtype=float32), 'loss_cross_entropy': Array(0.11707037, dtype=float32)}
{'loss_inverse': Array(4.315397e-07, dtype=float32)}


  9%|▉         | 91050/1000000 [4:18:32<25:24:56,  9.93it/s]

{'loss': Array(0.15316996, dtype=float32), 'loss_cross_entropy': Array(0.14427768, dtype=float32)}
{'loss_inverse': Array(0.00023817, dtype=float32)}


  9%|▉         | 91060/1000000 [4:18:34<30:52:43,  8.18it/s]

{'loss': Array(0.13335125, dtype=float32), 'loss_cross_entropy': Array(0.12535404, dtype=float32)}
{'loss_inverse': Array(7.7901194e-07, dtype=float32)}


  9%|▉         | 91070/1000000 [4:18:35<26:29:40,  9.53it/s]

{'loss': Array(0.13945809, dtype=float32), 'loss_cross_entropy': Array(0.13134733, dtype=float32)}
{'loss_inverse': Array(1.3934282e-07, dtype=float32)}


  9%|▉         | 91080/1000000 [4:18:37<34:03:04,  7.41it/s]

{'loss': Array(0.12986098, dtype=float32), 'loss_cross_entropy': Array(0.12290587, dtype=float32)}
{'loss_inverse': Array(1.8764528e-06, dtype=float32)}


  9%|▉         | 91090/1000000 [4:18:38<25:58:53,  9.72it/s]

{'loss': Array(0.14202407, dtype=float32), 'loss_cross_entropy': Array(0.13429761, dtype=float32)}
{'loss_inverse': Array(0.00015266, dtype=float32)}


  9%|▉         | 91100/1000000 [4:18:40<25:17:57,  9.98it/s]

{'loss': Array(0.12315106, dtype=float32), 'loss_cross_entropy': Array(0.11654764, dtype=float32)}
{'loss_inverse': Array(0.00013484, dtype=float32)}


  9%|▉         | 91110/1000000 [4:18:41<29:24:15,  8.59it/s]

{'loss': Array(0.13580634, dtype=float32), 'loss_cross_entropy': Array(0.12815242, dtype=float32)}
{'loss_inverse': Array(0.00017051, dtype=float32)}


  9%|▉         | 91120/1000000 [4:18:43<25:27:18,  9.92it/s]

{'loss': Array(0.14301123, dtype=float32), 'loss_cross_entropy': Array(0.13423187, dtype=float32)}
{'loss_inverse': Array(5.6528916e-06, dtype=float32)}


  9%|▉         | 91130/1000000 [4:18:44<33:19:34,  7.58it/s]

{'loss': Array(0.14736621, dtype=float32), 'loss_cross_entropy': Array(0.1391796, dtype=float32)}
{'loss_inverse': Array(1.2004127e-05, dtype=float32)}


  9%|▉         | 91140/1000000 [4:18:46<26:05:43,  9.67it/s]

{'loss': Array(0.12225509, dtype=float32), 'loss_cross_entropy': Array(0.11574741, dtype=float32)}
{'loss_inverse': Array(2.3334453e-05, dtype=float32)}


  9%|▉         | 91150/1000000 [4:18:48<37:30:01,  6.73it/s]

{'loss': Array(0.14754058, dtype=float32), 'loss_cross_entropy': Array(0.1400115, dtype=float32)}
{'loss_inverse': Array(3.6987065e-06, dtype=float32)}


  9%|▉         | 91160/1000000 [4:18:49<27:16:24,  9.26it/s]

{'loss': Array(0.13309138, dtype=float32), 'loss_cross_entropy': Array(0.12566738, dtype=float32)}
{'loss_inverse': Array(5.747037e-06, dtype=float32)}


  9%|▉         | 91170/1000000 [4:18:50<24:44:24, 10.20it/s]

{'loss': Array(0.13085496, dtype=float32), 'loss_cross_entropy': Array(0.12331019, dtype=float32)}
{'loss_inverse': Array(3.789691e-06, dtype=float32)}


  9%|▉         | 91180/1000000 [4:18:52<28:09:58,  8.96it/s]

{'loss': Array(0.1407003, dtype=float32), 'loss_cross_entropy': Array(0.13240802, dtype=float32)}
{'loss_inverse': Array(7.4489703e-06, dtype=float32)}


  9%|▉         | 91190/1000000 [4:18:54<25:46:26,  9.79it/s]

{'loss': Array(0.15703784, dtype=float32), 'loss_cross_entropy': Array(0.14883459, dtype=float32)}
{'loss_inverse': Array(0.00023069, dtype=float32)}


  9%|▉         | 91200/1000000 [4:18:55<29:16:45,  8.62it/s]

{'loss': Array(0.1496961, dtype=float32), 'loss_cross_entropy': Array(0.1419199, dtype=float32)}
{'loss_inverse': Array(1.7151318e-06, dtype=float32)}


  9%|▉         | 91210/1000000 [4:18:57<26:32:08,  9.51it/s]

{'loss': Array(0.13569067, dtype=float32), 'loss_cross_entropy': Array(0.12816359, dtype=float32)}
{'loss_inverse': Array(0.00023963, dtype=float32)}


  9%|▉         | 91220/1000000 [4:18:59<31:23:46,  8.04it/s]

{'loss': Array(0.13707742, dtype=float32), 'loss_cross_entropy': Array(0.12909073, dtype=float32)}
{'loss_inverse': Array(7.970253e-06, dtype=float32)}


  9%|▉         | 91230/1000000 [4:19:00<25:55:51,  9.73it/s]

{'loss': Array(0.15926114, dtype=float32), 'loss_cross_entropy': Array(0.15057577, dtype=float32)}
{'loss_inverse': Array(1.8059058e-06, dtype=float32)}


  9%|▉         | 91240/1000000 [4:19:02<33:44:59,  7.48it/s]

{'loss': Array(0.15666093, dtype=float32), 'loss_cross_entropy': Array(0.14820282, dtype=float32)}
{'loss_inverse': Array(4.0779364e-07, dtype=float32)}


  9%|▉         | 91250/1000000 [4:19:03<26:05:43,  9.67it/s]

{'loss': Array(0.14998975, dtype=float32), 'loss_cross_entropy': Array(0.14094505, dtype=float32)}
{'loss_inverse': Array(0.00010241, dtype=float32)}


  9%|▉         | 91260/1000000 [4:19:05<34:06:47,  7.40it/s]

{'loss': Array(0.15436828, dtype=float32), 'loss_cross_entropy': Array(0.14631909, dtype=float32)}
{'loss_inverse': Array(6.853214e-05, dtype=float32)}


  9%|▉         | 91270/1000000 [4:19:07<27:16:23,  9.26it/s]

{'loss': Array(0.16167328, dtype=float32), 'loss_cross_entropy': Array(0.15304069, dtype=float32)}
{'loss_inverse': Array(3.340147e-05, dtype=float32)}


  9%|▉         | 91280/1000000 [4:19:08<26:30:06,  9.52it/s]

{'loss': Array(0.151466, dtype=float32), 'loss_cross_entropy': Array(0.14288335, dtype=float32)}
{'loss_inverse': Array(2.1728245e-05, dtype=float32)}


  9%|▉         | 91290/1000000 [4:19:10<32:36:44,  7.74it/s]

{'loss': Array(0.14929321, dtype=float32), 'loss_cross_entropy': Array(0.14137393, dtype=float32)}
{'loss_inverse': Array(6.544012e-05, dtype=float32)}


  9%|▉         | 91300/1000000 [4:19:11<26:44:30,  9.44it/s]

{'loss': Array(0.15485403, dtype=float32), 'loss_cross_entropy': Array(0.14640288, dtype=float32)}
{'loss_inverse': Array(6.191059e-05, dtype=float32)}


  9%|▉         | 91310/1000000 [4:19:13<37:54:01,  6.66it/s]

{'loss': Array(0.1411772, dtype=float32), 'loss_cross_entropy': Array(0.1334544, dtype=float32)}
{'loss_inverse': Array(1.7504208e-05, dtype=float32)}


  9%|▉         | 91320/1000000 [4:19:14<26:46:08,  9.43it/s]

{'loss': Array(0.13193159, dtype=float32), 'loss_cross_entropy': Array(0.12416983, dtype=float32)}
{'loss_inverse': Array(2.0378332e-06, dtype=float32)}


  9%|▉         | 91330/1000000 [4:19:16<24:54:26, 10.13it/s]

{'loss': Array(0.14443968, dtype=float32), 'loss_cross_entropy': Array(0.1369713, dtype=float32)}
{'loss_inverse': Array(7.560195e-06, dtype=float32)}


  9%|▉         | 91340/1000000 [4:19:18<28:08:57,  8.97it/s]

{'loss': Array(0.14641728, dtype=float32), 'loss_cross_entropy': Array(0.13804506, dtype=float32)}
{'loss_inverse': Array(0.00024318, dtype=float32)}


  9%|▉         | 91350/1000000 [4:19:19<24:48:02, 10.18it/s]

{'loss': Array(0.15705012, dtype=float32), 'loss_cross_entropy': Array(0.14813273, dtype=float32)}
{'loss_inverse': Array(3.5178295e-05, dtype=float32)}


  9%|▉         | 91360/1000000 [4:19:21<27:23:59,  9.21it/s]

{'loss': Array(0.12913291, dtype=float32), 'loss_cross_entropy': Array(0.12182106, dtype=float32)}
{'loss_inverse': Array(9.657299e-05, dtype=float32)}


  9%|▉         | 91370/1000000 [4:19:22<25:45:15,  9.80it/s]

{'loss': Array(0.12787955, dtype=float32), 'loss_cross_entropy': Array(0.12040179, dtype=float32)}
{'loss_inverse': Array(5.3794804e-05, dtype=float32)}


  9%|▉         | 91380/1000000 [4:19:24<30:36:43,  8.24it/s]

{'loss': Array(0.1394959, dtype=float32), 'loss_cross_entropy': Array(0.13166265, dtype=float32)}
{'loss_inverse': Array(1.6494572e-05, dtype=float32)}


  9%|▉         | 91390/1000000 [4:19:25<25:09:51, 10.03it/s]

{'loss': Array(0.14331625, dtype=float32), 'loss_cross_entropy': Array(0.13512932, dtype=float32)}
{'loss_inverse': Array(7.653995e-06, dtype=float32)}


  9%|▉         | 91400/1000000 [4:19:27<31:26:54,  8.03it/s]

{'loss': Array(0.13060027, dtype=float32), 'loss_cross_entropy': Array(0.12362138, dtype=float32)}
{'loss_inverse': Array(3.9179986e-07, dtype=float32)}


  9%|▉         | 91410/1000000 [4:19:29<26:19:07,  9.59it/s]

{'loss': Array(0.12807094, dtype=float32), 'loss_cross_entropy': Array(0.12111789, dtype=float32)}
{'loss_inverse': Array(0.00019161, dtype=float32)}


  9%|▉         | 91420/1000000 [4:19:30<37:30:45,  6.73it/s]

{'loss': Array(0.13155405, dtype=float32), 'loss_cross_entropy': Array(0.12321718, dtype=float32)}
{'loss_inverse': Array(3.050645e-06, dtype=float32)}


  9%|▉         | 91430/1000000 [4:19:32<26:35:33,  9.49it/s]

{'loss': Array(0.14776628, dtype=float32), 'loss_cross_entropy': Array(0.14012694, dtype=float32)}
{'loss_inverse': Array(1.102078e-05, dtype=float32)}


  9%|▉         | 91440/1000000 [4:19:33<24:39:34, 10.23it/s]

{'loss': Array(0.14122866, dtype=float32), 'loss_cross_entropy': Array(0.13339463, dtype=float32)}
{'loss_inverse': Array(0.00019568, dtype=float32)}


  9%|▉         | 91450/1000000 [4:19:35<28:22:44,  8.89it/s]

{'loss': Array(0.14267896, dtype=float32), 'loss_cross_entropy': Array(0.13457975, dtype=float32)}
{'loss_inverse': Array(2.2690123e-05, dtype=float32)}


  9%|▉         | 91460/1000000 [4:19:36<25:15:01,  9.99it/s]

{'loss': Array(0.13997948, dtype=float32), 'loss_cross_entropy': Array(0.13215111, dtype=float32)}
{'loss_inverse': Array(1.907889e-05, dtype=float32)}


  9%|▉         | 91470/1000000 [4:19:38<30:56:03,  8.16it/s]

{'loss': Array(0.15214565, dtype=float32), 'loss_cross_entropy': Array(0.14377977, dtype=float32)}
{'loss_inverse': Array(3.390291e-06, dtype=float32)}


  9%|▉         | 91480/1000000 [4:19:39<25:59:30,  9.71it/s]

{'loss': Array(0.13113892, dtype=float32), 'loss_cross_entropy': Array(0.12348127, dtype=float32)}
{'loss_inverse': Array(5.1887404e-05, dtype=float32)}


  9%|▉         | 91490/1000000 [4:19:41<25:00:58, 10.09it/s]

{'loss': Array(0.13823962, dtype=float32), 'loss_cross_entropy': Array(0.13029955, dtype=float32)}
{'loss_inverse': Array(0.00012451, dtype=float32)}


  9%|▉         | 91500/1000000 [4:19:43<29:08:37,  8.66it/s]

{'loss': Array(0.14452353, dtype=float32), 'loss_cross_entropy': Array(0.1360405, dtype=float32)}
{'loss_inverse': Array(0.0001368, dtype=float32)}


  9%|▉         | 91510/1000000 [4:19:51<85:54:31,  2.94it/s] 

{'loss': Array(0.14554025, dtype=float32), 'loss_cross_entropy': Array(0.13731544, dtype=float32)}
{'loss_inverse': Array(9.878234e-05, dtype=float32)}


  9%|▉         | 91520/1000000 [4:19:52<38:07:09,  6.62it/s] 

{'loss': Array(0.13523299, dtype=float32), 'loss_cross_entropy': Array(0.12765759, dtype=float32)}
{'loss_inverse': Array(6.142135e-07, dtype=float32)}


  9%|▉         | 91530/1000000 [4:19:54<27:11:26,  9.28it/s]

{'loss': Array(0.14974402, dtype=float32), 'loss_cross_entropy': Array(0.1423419, dtype=float32)}
{'loss_inverse': Array(2.103114e-05, dtype=float32)}


  9%|▉         | 91540/1000000 [4:19:56<31:43:53,  7.95it/s]

{'loss': Array(0.14126404, dtype=float32), 'loss_cross_entropy': Array(0.1336213, dtype=float32)}
{'loss_inverse': Array(0.00039256, dtype=float32)}


  9%|▉         | 91550/1000000 [4:19:57<25:48:31,  9.78it/s]

{'loss': Array(0.12784359, dtype=float32), 'loss_cross_entropy': Array(0.12048586, dtype=float32)}
{'loss_inverse': Array(1.998053e-07, dtype=float32)}


  9%|▉         | 91560/1000000 [4:19:59<31:48:43,  7.93it/s]

{'loss': Array(0.14342064, dtype=float32), 'loss_cross_entropy': Array(0.13494952, dtype=float32)}
{'loss_inverse': Array(0.00017837, dtype=float32)}


  9%|▉         | 91570/1000000 [4:20:00<26:21:27,  9.57it/s]

{'loss': Array(0.1383709, dtype=float32), 'loss_cross_entropy': Array(0.13009146, dtype=float32)}
{'loss_inverse': Array(0.00017782, dtype=float32)}


  9%|▉         | 91580/1000000 [4:20:02<34:01:16,  7.42it/s]

{'loss': Array(0.13144982, dtype=float32), 'loss_cross_entropy': Array(0.1236757, dtype=float32)}
{'loss_inverse': Array(7.581314e-05, dtype=float32)}


  9%|▉         | 91590/1000000 [4:20:04<25:50:12,  9.77it/s]

{'loss': Array(0.1595569, dtype=float32), 'loss_cross_entropy': Array(0.15034138, dtype=float32)}
{'loss_inverse': Array(2.5012348e-06, dtype=float32)}


  9%|▉         | 91600/1000000 [4:20:05<31:08:14,  8.10it/s]

{'loss': Array(0.13597937, dtype=float32), 'loss_cross_entropy': Array(0.12811755, dtype=float32)}
{'loss_inverse': Array(1.1358175e-06, dtype=float32)}


  9%|▉         | 91610/1000000 [4:20:07<26:38:08,  9.47it/s]

{'loss': Array(0.1352187, dtype=float32), 'loss_cross_entropy': Array(0.1273797, dtype=float32)}
{'loss_inverse': Array(2.5589308e-05, dtype=float32)}


  9%|▉         | 91620/1000000 [4:20:09<37:51:36,  6.66it/s]

{'loss': Array(0.13732967, dtype=float32), 'loss_cross_entropy': Array(0.12933695, dtype=float32)}
{'loss_inverse': Array(7.179865e-05, dtype=float32)}


  9%|▉         | 91630/1000000 [4:20:10<27:49:04,  9.07it/s]

{'loss': Array(0.12790167, dtype=float32), 'loss_cross_entropy': Array(0.12024925, dtype=float32)}
{'loss_inverse': Array(0.00011144, dtype=float32)}


  9%|▉         | 91640/1000000 [4:20:11<25:23:00,  9.94it/s]

{'loss': Array(0.15642737, dtype=float32), 'loss_cross_entropy': Array(0.14725456, dtype=float32)}
{'loss_inverse': Array(5.8409487e-06, dtype=float32)}


  9%|▉         | 91650/1000000 [4:20:13<28:18:39,  8.91it/s]

{'loss': Array(0.1425773, dtype=float32), 'loss_cross_entropy': Array(0.13459268, dtype=float32)}
{'loss_inverse': Array(3.5072685e-06, dtype=float32)}


  9%|▉         | 91660/1000000 [4:20:15<25:12:35, 10.01it/s]

{'loss': Array(0.14658931, dtype=float32), 'loss_cross_entropy': Array(0.13800506, dtype=float32)}
{'loss_inverse': Array(1.0192788e-05, dtype=float32)}


  9%|▉         | 91670/1000000 [4:20:16<27:47:21,  9.08it/s]

{'loss': Array(0.14156675, dtype=float32), 'loss_cross_entropy': Array(0.13332076, dtype=float32)}
{'loss_inverse': Array(1.7309043e-05, dtype=float32)}


  9%|▉         | 91680/1000000 [4:20:18<25:31:09,  9.89it/s]

{'loss': Array(0.14070323, dtype=float32), 'loss_cross_entropy': Array(0.13250045, dtype=float32)}
{'loss_inverse': Array(3.5808152e-05, dtype=float32)}


  9%|▉         | 91690/1000000 [4:20:20<30:04:47,  8.39it/s]

{'loss': Array(0.12732959, dtype=float32), 'loss_cross_entropy': Array(0.11970692, dtype=float32)}
{'loss_inverse': Array(1.2390602e-05, dtype=float32)}


  9%|▉         | 91700/1000000 [4:20:21<25:11:57, 10.01it/s]

{'loss': Array(0.14237693, dtype=float32), 'loss_cross_entropy': Array(0.13526472, dtype=float32)}
{'loss_inverse': Array(2.4390584e-07, dtype=float32)}


  9%|▉         | 91710/1000000 [4:20:23<37:17:41,  6.77it/s]

{'loss': Array(0.13893163, dtype=float32), 'loss_cross_entropy': Array(0.13149957, dtype=float32)}
{'loss_inverse': Array(3.5041164e-06, dtype=float32)}


  9%|▉         | 91720/1000000 [4:20:24<27:02:36,  9.33it/s]

{'loss': Array(0.14950068, dtype=float32), 'loss_cross_entropy': Array(0.14121705, dtype=float32)}
{'loss_inverse': Array(5.174343e-06, dtype=float32)}


  9%|▉         | 91730/1000000 [4:20:26<25:09:24, 10.03it/s]

{'loss': Array(0.15455537, dtype=float32), 'loss_cross_entropy': Array(0.14643833, dtype=float32)}
{'loss_inverse': Array(1.3765039e-06, dtype=float32)}


  9%|▉         | 91740/1000000 [4:20:27<29:03:53,  8.68it/s]

{'loss': Array(0.16029224, dtype=float32), 'loss_cross_entropy': Array(0.15073408, dtype=float32)}
{'loss_inverse': Array(7.236826e-07, dtype=float32)}


  9%|▉         | 91750/1000000 [4:20:29<25:22:34,  9.94it/s]

{'loss': Array(0.15498637, dtype=float32), 'loss_cross_entropy': Array(0.14653741, dtype=float32)}
{'loss_inverse': Array(1.148563e-06, dtype=float32)}


  9%|▉         | 91760/1000000 [4:20:30<28:41:54,  8.79it/s]

{'loss': Array(0.15552656, dtype=float32), 'loss_cross_entropy': Array(0.1471287, dtype=float32)}
{'loss_inverse': Array(2.929021e-08, dtype=float32)}


  9%|▉         | 91770/1000000 [4:20:32<25:06:48, 10.05it/s]

{'loss': Array(0.15557913, dtype=float32), 'loss_cross_entropy': Array(0.14730553, dtype=float32)}
{'loss_inverse': Array(4.6792007e-07, dtype=float32)}


  9%|▉         | 91780/1000000 [4:20:34<31:13:26,  8.08it/s]

{'loss': Array(0.14282951, dtype=float32), 'loss_cross_entropy': Array(0.134809, dtype=float32)}
{'loss_inverse': Array(2.6692596e-05, dtype=float32)}


  9%|▉         | 91790/1000000 [4:20:35<25:45:13,  9.80it/s]

{'loss': Array(0.1506511, dtype=float32), 'loss_cross_entropy': Array(0.14212064, dtype=float32)}
{'loss_inverse': Array(1.1208749e-05, dtype=float32)}


  9%|▉         | 91800/1000000 [4:20:36<25:51:35,  9.76it/s]

{'loss': Array(0.13894573, dtype=float32), 'loss_cross_entropy': Array(0.13123687, dtype=float32)}
{'loss_inverse': Array(2.3253444e-05, dtype=float32)}


  9%|▉         | 91810/1000000 [4:20:38<27:44:36,  9.09it/s]

{'loss': Array(0.13749275, dtype=float32), 'loss_cross_entropy': Array(0.13025537, dtype=float32)}
{'loss_inverse': Array(1.3294316e-05, dtype=float32)}


  9%|▉         | 91820/1000000 [4:20:40<25:55:16,  9.73it/s]

{'loss': Array(0.1451553, dtype=float32), 'loss_cross_entropy': Array(0.13739944, dtype=float32)}
{'loss_inverse': Array(7.604888e-07, dtype=float32)}


  9%|▉         | 91830/1000000 [4:20:41<29:12:47,  8.64it/s]

{'loss': Array(0.1326935, dtype=float32), 'loss_cross_entropy': Array(0.12548618, dtype=float32)}
{'loss_inverse': Array(1.1876667e-05, dtype=float32)}


  9%|▉         | 91840/1000000 [4:20:43<25:42:47,  9.81it/s]

{'loss': Array(0.14360446, dtype=float32), 'loss_cross_entropy': Array(0.13486992, dtype=float32)}
{'loss_inverse': Array(2.9942466e-08, dtype=float32)}


  9%|▉         | 91850/1000000 [4:20:45<31:21:04,  8.05it/s]

{'loss': Array(0.14771788, dtype=float32), 'loss_cross_entropy': Array(0.13958777, dtype=float32)}
{'loss_inverse': Array(8.19379e-08, dtype=float32)}


  9%|▉         | 91860/1000000 [4:20:46<25:47:00,  9.78it/s]

{'loss': Array(0.1327434, dtype=float32), 'loss_cross_entropy': Array(0.12537478, dtype=float32)}
{'loss_inverse': Array(1.4081645e-07, dtype=float32)}


  9%|▉         | 91870/1000000 [4:20:47<24:51:43, 10.15it/s]

{'loss': Array(0.14588879, dtype=float32), 'loss_cross_entropy': Array(0.13781938, dtype=float32)}
{'loss_inverse': Array(2.3842734e-08, dtype=float32)}


  9%|▉         | 91880/1000000 [4:20:49<28:43:53,  8.78it/s]

{'loss': Array(0.13571878, dtype=float32), 'loss_cross_entropy': Array(0.12755816, dtype=float32)}
{'loss_inverse': Array(4.3574528e-05, dtype=float32)}


  9%|▉         | 91890/1000000 [4:20:51<25:25:04,  9.92it/s]

{'loss': Array(0.12906966, dtype=float32), 'loss_cross_entropy': Array(0.12246869, dtype=float32)}
{'loss_inverse': Array(6.716161e-06, dtype=float32)}


  9%|▉         | 91900/1000000 [4:20:52<30:36:56,  8.24it/s]

{'loss': Array(0.14227085, dtype=float32), 'loss_cross_entropy': Array(0.1334795, dtype=float32)}
{'loss_inverse': Array(2.2678794e-06, dtype=float32)}


  9%|▉         | 91910/1000000 [4:20:54<26:06:45,  9.66it/s]

{'loss': Array(0.14738412, dtype=float32), 'loss_cross_entropy': Array(0.13956425, dtype=float32)}
{'loss_inverse': Array(2.9954727e-08, dtype=float32)}


  9%|▉         | 91920/1000000 [4:20:56<33:57:11,  7.43it/s]

{'loss': Array(0.13779834, dtype=float32), 'loss_cross_entropy': Array(0.12993154, dtype=float32)}
{'loss_inverse': Array(2.265935e-05, dtype=float32)}


  9%|▉         | 91930/1000000 [4:20:57<26:12:35,  9.62it/s]

{'loss': Array(0.1462731, dtype=float32), 'loss_cross_entropy': Array(0.13816564, dtype=float32)}
{'loss_inverse': Array(4.495958e-07, dtype=float32)}


  9%|▉         | 91940/1000000 [4:20:58<24:51:35, 10.15it/s]

{'loss': Array(0.16070244, dtype=float32), 'loss_cross_entropy': Array(0.15177612, dtype=float32)}
{'loss_inverse': Array(4.2036686e-07, dtype=float32)}


  9%|▉         | 91950/1000000 [4:21:00<27:42:01,  9.11it/s]

{'loss': Array(0.14598629, dtype=float32), 'loss_cross_entropy': Array(0.13797484, dtype=float32)}
{'loss_inverse': Array(1.1645874e-07, dtype=float32)}


  9%|▉         | 91960/1000000 [4:21:02<24:57:22, 10.11it/s]

{'loss': Array(0.14037497, dtype=float32), 'loss_cross_entropy': Array(0.1327856, dtype=float32)}
{'loss_inverse': Array(4.3849013e-07, dtype=float32)}


  9%|▉         | 91970/1000000 [4:21:03<33:31:23,  7.52it/s]

{'loss': Array(0.13168222, dtype=float32), 'loss_cross_entropy': Array(0.12438585, dtype=float32)}
{'loss_inverse': Array(6.338448e-06, dtype=float32)}


  9%|▉         | 91980/1000000 [4:21:05<26:23:46,  9.56it/s]

{'loss': Array(0.15339915, dtype=float32), 'loss_cross_entropy': Array(0.14493287, dtype=float32)}
{'loss_inverse': Array(6.615302e-09, dtype=float32)}


  9%|▉         | 91990/1000000 [4:21:06<25:15:05,  9.99it/s]

{'loss': Array(0.14491501, dtype=float32), 'loss_cross_entropy': Array(0.13705365, dtype=float32)}
{'loss_inverse': Array(1.0267679e-06, dtype=float32)}


  9%|▉         | 92000/1000000 [4:21:08<28:20:00,  8.90it/s]

{'loss': Array(0.14468566, dtype=float32), 'loss_cross_entropy': Array(0.13718732, dtype=float32)}
{'loss_inverse': Array(1.2321477e-06, dtype=float32)}


  9%|▉         | 92010/1000000 [4:21:16<86:56:34,  2.90it/s] 

{'loss': Array(0.13230698, dtype=float32), 'loss_cross_entropy': Array(0.12520075, dtype=float32)}
{'loss_inverse': Array(0.00020711, dtype=float32)}


  9%|▉         | 92020/1000000 [4:21:18<39:39:07,  6.36it/s]

{'loss': Array(0.14384885, dtype=float32), 'loss_cross_entropy': Array(0.13598454, dtype=float32)}
{'loss_inverse': Array(3.7905655e-05, dtype=float32)}


  9%|▉         | 92030/1000000 [4:21:19<27:37:03,  9.13it/s]

{'loss': Array(0.13964783, dtype=float32), 'loss_cross_entropy': Array(0.13238937, dtype=float32)}
{'loss_inverse': Array(4.560288e-06, dtype=float32)}


  9%|▉         | 92040/1000000 [4:21:21<29:42:04,  8.49it/s]

{'loss': Array(0.14670818, dtype=float32), 'loss_cross_entropy': Array(0.13863829, dtype=float32)}
{'loss_inverse': Array(3.715214e-07, dtype=float32)}


  9%|▉         | 92050/1000000 [4:21:23<25:08:22, 10.03it/s]

{'loss': Array(0.12863706, dtype=float32), 'loss_cross_entropy': Array(0.12088891, dtype=float32)}
{'loss_inverse': Array(5.502775e-08, dtype=float32)}


  9%|▉         | 92060/1000000 [4:21:24<37:15:55,  6.77it/s]

{'loss': Array(0.13327454, dtype=float32), 'loss_cross_entropy': Array(0.12587267, dtype=float32)}
{'loss_inverse': Array(1.2398443e-05, dtype=float32)}


  9%|▉         | 92070/1000000 [4:21:26<26:51:27,  9.39it/s]

{'loss': Array(0.13549154, dtype=float32), 'loss_cross_entropy': Array(0.12810366, dtype=float32)}
{'loss_inverse': Array(8.171869e-08, dtype=float32)}


  9%|▉         | 92080/1000000 [4:21:27<25:45:35,  9.79it/s]

{'loss': Array(0.13497455, dtype=float32), 'loss_cross_entropy': Array(0.12705651, dtype=float32)}
{'loss_inverse': Array(4.9201793e-07, dtype=float32)}


  9%|▉         | 92090/1000000 [4:21:29<29:22:01,  8.59it/s]

{'loss': Array(0.14663993, dtype=float32), 'loss_cross_entropy': Array(0.13781594, dtype=float32)}
{'loss_inverse': Array(5.573814e-05, dtype=float32)}


  9%|▉         | 92100/1000000 [4:21:30<25:32:46,  9.87it/s]

{'loss': Array(0.12814699, dtype=float32), 'loss_cross_entropy': Array(0.12057403, dtype=float32)}
{'loss_inverse': Array(3.557064e-07, dtype=float32)}


  9%|▉         | 92110/1000000 [4:21:32<31:24:04,  8.03it/s]

{'loss': Array(0.13763434, dtype=float32), 'loss_cross_entropy': Array(0.1293275, dtype=float32)}
{'loss_inverse': Array(5.6692494e-05, dtype=float32)}


  9%|▉         | 92120/1000000 [4:21:34<25:44:08,  9.80it/s]

{'loss': Array(0.1469649, dtype=float32), 'loss_cross_entropy': Array(0.13848443, dtype=float32)}
{'loss_inverse': Array(0.00017447, dtype=float32)}


  9%|▉         | 92130/1000000 [4:21:35<30:43:25,  8.21it/s]

{'loss': Array(0.1476941, dtype=float32), 'loss_cross_entropy': Array(0.13936272, dtype=float32)}
{'loss_inverse': Array(0.00012273, dtype=float32)}


  9%|▉         | 92140/1000000 [4:21:37<26:56:37,  9.36it/s]

{'loss': Array(0.16794407, dtype=float32), 'loss_cross_entropy': Array(0.1586325, dtype=float32)}
{'loss_inverse': Array(3.3739987e-05, dtype=float32)}


  9%|▉         | 92150/1000000 [4:21:38<25:17:10,  9.97it/s]

{'loss': Array(0.1374416, dtype=float32), 'loss_cross_entropy': Array(0.12966006, dtype=float32)}
{'loss_inverse': Array(2.0390294e-06, dtype=float32)}


  9%|▉         | 92160/1000000 [4:21:40<28:09:59,  8.95it/s]

{'loss': Array(0.13990432, dtype=float32), 'loss_cross_entropy': Array(0.1319777, dtype=float32)}
{'loss_inverse': Array(8.690511e-07, dtype=float32)}


  9%|▉         | 92170/1000000 [4:21:41<25:22:50,  9.94it/s]

{'loss': Array(0.15532146, dtype=float32), 'loss_cross_entropy': Array(0.1472445, dtype=float32)}
{'loss_inverse': Array(4.1872994e-08, dtype=float32)}


  9%|▉         | 92180/1000000 [4:21:43<28:51:23,  8.74it/s]

{'loss': Array(0.1515561, dtype=float32), 'loss_cross_entropy': Array(0.14328834, dtype=float32)}
{'loss_inverse': Array(7.913272e-07, dtype=float32)}


  9%|▉         | 92190/1000000 [4:21:45<25:19:18,  9.96it/s]

{'loss': Array(0.13370566, dtype=float32), 'loss_cross_entropy': Array(0.12564822, dtype=float32)}
{'loss_inverse': Array(1.8681263e-06, dtype=float32)}


  9%|▉         | 92200/1000000 [4:21:46<28:54:34,  8.72it/s]

{'loss': Array(0.16053051, dtype=float32), 'loss_cross_entropy': Array(0.15170273, dtype=float32)}
{'loss_inverse': Array(0.00026177, dtype=float32)}


  9%|▉         | 92210/1000000 [4:21:48<25:23:09,  9.93it/s]

{'loss': Array(0.16459763, dtype=float32), 'loss_cross_entropy': Array(0.15616058, dtype=float32)}
{'loss_inverse': Array(0.00019037, dtype=float32)}


  9%|▉         | 92220/1000000 [4:21:50<29:52:29,  8.44it/s]

{'loss': Array(0.12901041, dtype=float32), 'loss_cross_entropy': Array(0.12080432, dtype=float32)}
{'loss_inverse': Array(1.0455538e-05, dtype=float32)}


  9%|▉         | 92230/1000000 [4:21:51<25:44:53,  9.79it/s]

{'loss': Array(0.14482981, dtype=float32), 'loss_cross_entropy': Array(0.1372443, dtype=float32)}
{'loss_inverse': Array(7.799482e-07, dtype=float32)}


  9%|▉         | 92240/1000000 [4:21:53<33:29:44,  7.53it/s]

{'loss': Array(0.14032494, dtype=float32), 'loss_cross_entropy': Array(0.13214205, dtype=float32)}
{'loss_inverse': Array(1.2704849e-06, dtype=float32)}


  9%|▉         | 92250/1000000 [4:21:54<26:08:27,  9.65it/s]

{'loss': Array(0.14657955, dtype=float32), 'loss_cross_entropy': Array(0.13888364, dtype=float32)}
{'loss_inverse': Array(6.4107985e-07, dtype=float32)}


  9%|▉         | 92260/1000000 [4:21:56<37:28:04,  6.73it/s]

{'loss': Array(0.12205887, dtype=float32), 'loss_cross_entropy': Array(0.11448665, dtype=float32)}
{'loss_inverse': Array(1.372689e-07, dtype=float32)}


  9%|▉         | 92270/1000000 [4:21:57<27:01:51,  9.33it/s]

{'loss': Array(0.14688858, dtype=float32), 'loss_cross_entropy': Array(0.13916145, dtype=float32)}
{'loss_inverse': Array(6.082245e-07, dtype=float32)}


  9%|▉         | 92280/1000000 [4:21:59<33:50:17,  7.45it/s]

{'loss': Array(0.134328, dtype=float32), 'loss_cross_entropy': Array(0.12708695, dtype=float32)}
{'loss_inverse': Array(2.3274979e-05, dtype=float32)}


  9%|▉         | 92290/1000000 [4:22:01<26:10:40,  9.63it/s]

{'loss': Array(0.16330856, dtype=float32), 'loss_cross_entropy': Array(0.15465187, dtype=float32)}
{'loss_inverse': Array(6.228288e-05, dtype=float32)}


  9%|▉         | 92300/1000000 [4:22:02<25:17:23,  9.97it/s]

{'loss': Array(0.1395209, dtype=float32), 'loss_cross_entropy': Array(0.13128406, dtype=float32)}
{'loss_inverse': Array(7.625625e-05, dtype=float32)}


  9%|▉         | 92310/1000000 [4:22:04<28:57:03,  8.71it/s]

{'loss': Array(0.14085653, dtype=float32), 'loss_cross_entropy': Array(0.13309088, dtype=float32)}
{'loss_inverse': Array(1.7079027e-06, dtype=float32)}


  9%|▉         | 92320/1000000 [4:22:05<25:34:03,  9.86it/s]

{'loss': Array(0.13792416, dtype=float32), 'loss_cross_entropy': Array(0.13016887, dtype=float32)}
{'loss_inverse': Array(4.221116e-05, dtype=float32)}


  9%|▉         | 92330/1000000 [4:22:07<31:22:31,  8.04it/s]

{'loss': Array(0.14690326, dtype=float32), 'loss_cross_entropy': Array(0.13852544, dtype=float32)}
{'loss_inverse': Array(5.402314e-06, dtype=float32)}


  9%|▉         | 92340/1000000 [4:22:08<26:19:01,  9.58it/s]

{'loss': Array(0.13351245, dtype=float32), 'loss_cross_entropy': Array(0.12547222, dtype=float32)}
{'loss_inverse': Array(6.659351e-05, dtype=float32)}


  9%|▉         | 92350/1000000 [4:22:10<33:58:29,  7.42it/s]

{'loss': Array(0.1339259, dtype=float32), 'loss_cross_entropy': Array(0.1268592, dtype=float32)}
{'loss_inverse': Array(0.0001263, dtype=float32)}


  9%|▉         | 92360/1000000 [4:22:11<25:56:13,  9.72it/s]

{'loss': Array(0.13842726, dtype=float32), 'loss_cross_entropy': Array(0.13017163, dtype=float32)}
{'loss_inverse': Array(0.00027587, dtype=float32)}


  9%|▉         | 92370/1000000 [4:22:13<34:31:41,  7.30it/s]

{'loss': Array(0.15768185, dtype=float32), 'loss_cross_entropy': Array(0.14884412, dtype=float32)}
{'loss_inverse': Array(0.00023033, dtype=float32)}


  9%|▉         | 92380/1000000 [4:22:15<26:49:26,  9.40it/s]

{'loss': Array(0.14987908, dtype=float32), 'loss_cross_entropy': Array(0.14212155, dtype=float32)}
{'loss_inverse': Array(2.2031758e-05, dtype=float32)}


  9%|▉         | 92390/1000000 [4:22:16<25:27:38,  9.90it/s]

{'loss': Array(0.1431074, dtype=float32), 'loss_cross_entropy': Array(0.13488428, dtype=float32)}
{'loss_inverse': Array(8.558054e-05, dtype=float32)}


  9%|▉         | 92400/1000000 [4:22:18<31:29:27,  8.01it/s]

{'loss': Array(0.14566469, dtype=float32), 'loss_cross_entropy': Array(0.13773157, dtype=float32)}
{'loss_inverse': Array(4.2273846e-06, dtype=float32)}


  9%|▉         | 92410/1000000 [4:22:19<26:26:52,  9.53it/s]

{'loss': Array(0.13881779, dtype=float32), 'loss_cross_entropy': Array(0.13213277, dtype=float32)}
{'loss_inverse': Array(9.054698e-05, dtype=float32)}


  9%|▉         | 92420/1000000 [4:22:21<37:26:02,  6.73it/s]

{'loss': Array(0.15081958, dtype=float32), 'loss_cross_entropy': Array(0.14229701, dtype=float32)}
{'loss_inverse': Array(5.951798e-05, dtype=float32)}


  9%|▉         | 92430/1000000 [4:22:23<27:12:33,  9.27it/s]

{'loss': Array(0.15213566, dtype=float32), 'loss_cross_entropy': Array(0.14379257, dtype=float32)}
{'loss_inverse': Array(7.224443e-05, dtype=float32)}


  9%|▉         | 92440/1000000 [4:22:24<25:34:04,  9.86it/s]

{'loss': Array(0.13550949, dtype=float32), 'loss_cross_entropy': Array(0.12781799, dtype=float32)}
{'loss_inverse': Array(2.0425592e-05, dtype=float32)}


  9%|▉         | 92450/1000000 [4:22:26<27:45:38,  9.08it/s]

{'loss': Array(0.12502818, dtype=float32), 'loss_cross_entropy': Array(0.11779865, dtype=float32)}
{'loss_inverse': Array(4.750566e-07, dtype=float32)}


  9%|▉         | 92460/1000000 [4:22:27<24:59:18, 10.09it/s]

{'loss': Array(0.1211072, dtype=float32), 'loss_cross_entropy': Array(0.11422399, dtype=float32)}
{'loss_inverse': Array(8.351194e-05, dtype=float32)}


  9%|▉         | 92470/1000000 [4:22:29<27:47:21,  9.07it/s]

{'loss': Array(0.13361846, dtype=float32), 'loss_cross_entropy': Array(0.12629272, dtype=float32)}
{'loss_inverse': Array(2.0868369e-05, dtype=float32)}


  9%|▉         | 92480/1000000 [4:22:30<24:59:30, 10.09it/s]

{'loss': Array(0.13858522, dtype=float32), 'loss_cross_entropy': Array(0.13080713, dtype=float32)}
{'loss_inverse': Array(3.9307488e-05, dtype=float32)}


  9%|▉         | 92490/1000000 [4:22:32<30:52:02,  8.17it/s]

{'loss': Array(0.14330654, dtype=float32), 'loss_cross_entropy': Array(0.13538755, dtype=float32)}
{'loss_inverse': Array(1.9825357e-05, dtype=float32)}


  9%|▉         | 92500/1000000 [4:22:34<25:16:00,  9.98it/s]

{'loss': Array(0.14464222, dtype=float32), 'loss_cross_entropy': Array(0.13648666, dtype=float32)}
{'loss_inverse': Array(0.00020819, dtype=float32)}


  9%|▉         | 92510/1000000 [4:22:42<89:07:01,  2.83it/s] 

{'loss': Array(0.13899247, dtype=float32), 'loss_cross_entropy': Array(0.13168123, dtype=float32)}
{'loss_inverse': Array(5.9613376e-05, dtype=float32)}


  9%|▉         | 92520/1000000 [4:22:43<35:46:22,  7.05it/s]

{'loss': Array(0.13425277, dtype=float32), 'loss_cross_entropy': Array(0.12657872, dtype=float32)}
{'loss_inverse': Array(6.9197245e-05, dtype=float32)}


  9%|▉         | 92530/1000000 [4:22:45<26:13:43,  9.61it/s]

{'loss': Array(0.12961484, dtype=float32), 'loss_cross_entropy': Array(0.12203621, dtype=float32)}
{'loss_inverse': Array(9.664207e-08, dtype=float32)}


  9%|▉         | 92540/1000000 [4:22:46<27:34:39,  9.14it/s]

{'loss': Array(0.15385309, dtype=float32), 'loss_cross_entropy': Array(0.1454073, dtype=float32)}
{'loss_inverse': Array(1.3556263e-05, dtype=float32)}


  9%|▉         | 92550/1000000 [4:22:48<25:00:08, 10.08it/s]

{'loss': Array(0.15091853, dtype=float32), 'loss_cross_entropy': Array(0.14265482, dtype=float32)}
{'loss_inverse': Array(4.1395393e-05, dtype=float32)}


  9%|▉         | 92560/1000000 [4:22:49<29:01:20,  8.69it/s]

{'loss': Array(0.15688807, dtype=float32), 'loss_cross_entropy': Array(0.14827135, dtype=float32)}
{'loss_inverse': Array(1.1543779e-05, dtype=float32)}


  9%|▉         | 92570/1000000 [4:22:51<25:14:45,  9.98it/s]

{'loss': Array(0.16192888, dtype=float32), 'loss_cross_entropy': Array(0.15340675, dtype=float32)}
{'loss_inverse': Array(1.58841e-07, dtype=float32)}


  9%|▉         | 92580/1000000 [4:22:53<30:54:52,  8.15it/s]

{'loss': Array(0.12825806, dtype=float32), 'loss_cross_entropy': Array(0.12062601, dtype=float32)}
{'loss_inverse': Array(1.0653533e-05, dtype=float32)}


  9%|▉         | 92590/1000000 [4:22:54<25:30:26,  9.88it/s]

{'loss': Array(0.13873433, dtype=float32), 'loss_cross_entropy': Array(0.13096693, dtype=float32)}
{'loss_inverse': Array(1.9435442e-05, dtype=float32)}


  9%|▉         | 92600/1000000 [4:22:55<25:05:32, 10.05it/s]

{'loss': Array(0.14035414, dtype=float32), 'loss_cross_entropy': Array(0.1323582, dtype=float32)}
{'loss_inverse': Array(6.47083e-05, dtype=float32)}


  9%|▉         | 92610/1000000 [4:22:57<28:47:22,  8.76it/s]

{'loss': Array(0.13227852, dtype=float32), 'loss_cross_entropy': Array(0.12527996, dtype=float32)}
{'loss_inverse': Array(1.7755307e-06, dtype=float32)}


  9%|▉         | 92620/1000000 [4:22:59<25:05:30, 10.05it/s]

{'loss': Array(0.14283553, dtype=float32), 'loss_cross_entropy': Array(0.13473064, dtype=float32)}
{'loss_inverse': Array(0.00025744, dtype=float32)}


  9%|▉         | 92630/1000000 [4:23:00<30:56:54,  8.14it/s]

{'loss': Array(0.14983645, dtype=float32), 'loss_cross_entropy': Array(0.14134642, dtype=float32)}
{'loss_inverse': Array(0.0001065, dtype=float32)}


  9%|▉         | 92640/1000000 [4:23:02<25:32:56,  9.87it/s]

{'loss': Array(0.14387159, dtype=float32), 'loss_cross_entropy': Array(0.13551717, dtype=float32)}
{'loss_inverse': Array(1.1569295e-05, dtype=float32)}


  9%|▉         | 92650/1000000 [4:23:03<33:26:46,  7.54it/s]

{'loss': Array(0.13531564, dtype=float32), 'loss_cross_entropy': Array(0.12697248, dtype=float32)}
{'loss_inverse': Array(3.249315e-05, dtype=float32)}


  9%|▉         | 92660/1000000 [4:23:05<26:03:29,  9.67it/s]

{'loss': Array(0.15475863, dtype=float32), 'loss_cross_entropy': Array(0.14601056, dtype=float32)}
{'loss_inverse': Array(8.83365e-05, dtype=float32)}


  9%|▉         | 92670/1000000 [4:23:06<25:54:14,  9.73it/s]

{'loss': Array(0.14207229, dtype=float32), 'loss_cross_entropy': Array(0.13405691, dtype=float32)}
{'loss_inverse': Array(0.00035053, dtype=float32)}


  9%|▉         | 92680/1000000 [4:23:08<28:22:18,  8.88it/s]

{'loss': Array(0.13533905, dtype=float32), 'loss_cross_entropy': Array(0.12726854, dtype=float32)}
{'loss_inverse': Array(1.4926228e-06, dtype=float32)}


  9%|▉         | 92690/1000000 [4:23:10<26:41:11,  9.44it/s]

{'loss': Array(0.132556, dtype=float32), 'loss_cross_entropy': Array(0.12525938, dtype=float32)}
{'loss_inverse': Array(0.0002461, dtype=float32)}


  9%|▉         | 92700/1000000 [4:23:11<34:17:12,  7.35it/s]

{'loss': Array(0.15966523, dtype=float32), 'loss_cross_entropy': Array(0.15097071, dtype=float32)}
{'loss_inverse': Array(6.2592308e-06, dtype=float32)}


  9%|▉         | 92710/1000000 [4:23:13<26:47:59,  9.40it/s]

{'loss': Array(0.14250846, dtype=float32), 'loss_cross_entropy': Array(0.13482481, dtype=float32)}
{'loss_inverse': Array(3.4955625e-05, dtype=float32)}


  9%|▉         | 92720/1000000 [4:23:14<25:00:52, 10.07it/s]

{'loss': Array(0.15050153, dtype=float32), 'loss_cross_entropy': Array(0.1418883, dtype=float32)}
{'loss_inverse': Array(3.61513e-05, dtype=float32)}


  9%|▉         | 92730/1000000 [4:23:16<27:30:07,  9.16it/s]

{'loss': Array(0.12859447, dtype=float32), 'loss_cross_entropy': Array(0.12110504, dtype=float32)}
{'loss_inverse': Array(1.5989239e-05, dtype=float32)}


  9%|▉         | 92740/1000000 [4:23:17<25:06:49, 10.04it/s]

{'loss': Array(0.14574085, dtype=float32), 'loss_cross_entropy': Array(0.13765588, dtype=float32)}
{'loss_inverse': Array(8.54432e-05, dtype=float32)}


  9%|▉         | 92750/1000000 [4:23:19<28:56:28,  8.71it/s]

{'loss': Array(0.14380626, dtype=float32), 'loss_cross_entropy': Array(0.13594012, dtype=float32)}
{'loss_inverse': Array(9.020989e-05, dtype=float32)}


  9%|▉         | 92760/1000000 [4:23:21<25:32:42,  9.87it/s]

{'loss': Array(0.1436005, dtype=float32), 'loss_cross_entropy': Array(0.13564168, dtype=float32)}
{'loss_inverse': Array(0.00015429, dtype=float32)}


  9%|▉         | 92770/1000000 [4:23:22<30:32:41,  8.25it/s]

{'loss': Array(0.14959364, dtype=float32), 'loss_cross_entropy': Array(0.14168122, dtype=float32)}
{'loss_inverse': Array(0.00015132, dtype=float32)}


  9%|▉         | 92780/1000000 [4:23:24<25:36:39,  9.84it/s]

{'loss': Array(0.15105031, dtype=float32), 'loss_cross_entropy': Array(0.14277028, dtype=float32)}
{'loss_inverse': Array(2.1387614e-06, dtype=float32)}


  9%|▉         | 92790/1000000 [4:23:25<24:36:23, 10.24it/s]

{'loss': Array(0.1395345, dtype=float32), 'loss_cross_entropy': Array(0.13212173, dtype=float32)}
{'loss_inverse': Array(4.3369528e-05, dtype=float32)}


  9%|▉         | 92800/1000000 [4:23:27<28:46:30,  8.76it/s]

{'loss': Array(0.14444913, dtype=float32), 'loss_cross_entropy': Array(0.1362885, dtype=float32)}
{'loss_inverse': Array(7.89146e-06, dtype=float32)}


  9%|▉         | 92810/1000000 [4:23:28<25:27:34,  9.90it/s]

{'loss': Array(0.12800138, dtype=float32), 'loss_cross_entropy': Array(0.12093373, dtype=float32)}
{'loss_inverse': Array(5.962374e-06, dtype=float32)}


  9%|▉         | 92820/1000000 [4:23:30<33:32:14,  7.51it/s]

{'loss': Array(0.13562398, dtype=float32), 'loss_cross_entropy': Array(0.12785974, dtype=float32)}
{'loss_inverse': Array(1.2919157e-06, dtype=float32)}


  9%|▉         | 92830/1000000 [4:23:31<25:51:21,  9.75it/s]

{'loss': Array(0.13888311, dtype=float32), 'loss_cross_entropy': Array(0.13160808, dtype=float32)}
{'loss_inverse': Array(3.4331766e-05, dtype=float32)}


  9%|▉         | 92840/1000000 [4:23:33<37:43:34,  6.68it/s]

{'loss': Array(0.13413893, dtype=float32), 'loss_cross_entropy': Array(0.12647727, dtype=float32)}
{'loss_inverse': Array(1.7409595e-06, dtype=float32)}


  9%|▉         | 92850/1000000 [4:23:34<26:22:33,  9.55it/s]

{'loss': Array(0.150788, dtype=float32), 'loss_cross_entropy': Array(0.1431049, dtype=float32)}
{'loss_inverse': Array(0.00012375, dtype=float32)}


  9%|▉         | 92860/1000000 [4:23:36<37:20:46,  6.75it/s]

{'loss': Array(0.14646168, dtype=float32), 'loss_cross_entropy': Array(0.13868158, dtype=float32)}
{'loss_inverse': Array(0.0001673, dtype=float32)}


  9%|▉         | 92870/1000000 [4:23:38<26:19:10,  9.57it/s]

{'loss': Array(0.13372533, dtype=float32), 'loss_cross_entropy': Array(0.12613814, dtype=float32)}
{'loss_inverse': Array(3.1513096e-05, dtype=float32)}


  9%|▉         | 92880/1000000 [4:23:39<24:42:43, 10.20it/s]

{'loss': Array(0.1525474, dtype=float32), 'loss_cross_entropy': Array(0.1444262, dtype=float32)}
{'loss_inverse': Array(0.00013785, dtype=float32)}


  9%|▉         | 92890/1000000 [4:23:41<31:19:31,  8.04it/s]

{'loss': Array(0.16259073, dtype=float32), 'loss_cross_entropy': Array(0.1538442, dtype=float32)}
{'loss_inverse': Array(2.982899e-05, dtype=float32)}


  9%|▉         | 92900/1000000 [4:23:42<25:42:18,  9.80it/s]

{'loss': Array(0.13526906, dtype=float32), 'loss_cross_entropy': Array(0.12734354, dtype=float32)}
{'loss_inverse': Array(0.00094771, dtype=float32)}


  9%|▉         | 92910/1000000 [4:23:44<37:12:48,  6.77it/s]

{'loss': Array(0.13879721, dtype=float32), 'loss_cross_entropy': Array(0.13086577, dtype=float32)}
{'loss_inverse': Array(3.4587527e-05, dtype=float32)}


  9%|▉         | 92920/1000000 [4:23:45<26:25:42,  9.53it/s]

{'loss': Array(0.1623353, dtype=float32), 'loss_cross_entropy': Array(0.15321067, dtype=float32)}
{'loss_inverse': Array(0.00041487, dtype=float32)}


  9%|▉         | 92930/1000000 [4:23:47<37:59:11,  6.63it/s]

{'loss': Array(0.16834827, dtype=float32), 'loss_cross_entropy': Array(0.15946588, dtype=float32)}
{'loss_inverse': Array(9.623261e-06, dtype=float32)}


  9%|▉         | 92940/1000000 [4:23:49<26:39:25,  9.45it/s]

{'loss': Array(0.12780407, dtype=float32), 'loss_cross_entropy': Array(0.12092655, dtype=float32)}
{'loss_inverse': Array(0.00043669, dtype=float32)}


  9%|▉         | 92950/1000000 [4:23:50<24:55:34, 10.11it/s]

{'loss': Array(0.11937317, dtype=float32), 'loss_cross_entropy': Array(0.11248627, dtype=float32)}
{'loss_inverse': Array(1.2446598e-06, dtype=float32)}


  9%|▉         | 92960/1000000 [4:23:52<33:56:12,  7.42it/s]

{'loss': Array(0.13511024, dtype=float32), 'loss_cross_entropy': Array(0.12759681, dtype=float32)}
{'loss_inverse': Array(0.00054852, dtype=float32)}


  9%|▉         | 92970/1000000 [4:23:53<27:11:35,  9.27it/s]

{'loss': Array(0.16214117, dtype=float32), 'loss_cross_entropy': Array(0.15316196, dtype=float32)}
{'loss_inverse': Array(0.00022154, dtype=float32)}


  9%|▉         | 92980/1000000 [4:23:55<25:06:16, 10.04it/s]

{'loss': Array(0.13496551, dtype=float32), 'loss_cross_entropy': Array(0.12717314, dtype=float32)}
{'loss_inverse': Array(1.7790013e-06, dtype=float32)}


  9%|▉         | 92990/1000000 [4:23:56<28:31:56,  8.83it/s]

{'loss': Array(0.13790627, dtype=float32), 'loss_cross_entropy': Array(0.12976506, dtype=float32)}
{'loss_inverse': Array(6.804181e-05, dtype=float32)}


  9%|▉         | 93000/1000000 [4:23:58<25:35:08,  9.85it/s]

{'loss': Array(0.13513687, dtype=float32), 'loss_cross_entropy': Array(0.12787284, dtype=float32)}
{'loss_inverse': Array(5.4980187e-06, dtype=float32)}


  9%|▉         | 93010/1000000 [4:24:06<87:23:57,  2.88it/s] 

{'loss': Array(0.15112495, dtype=float32), 'loss_cross_entropy': Array(0.14294516, dtype=float32)}
{'loss_inverse': Array(6.9010785e-05, dtype=float32)}


  9%|▉         | 93020/1000000 [4:24:07<35:25:45,  7.11it/s]

{'loss': Array(0.1458604, dtype=float32), 'loss_cross_entropy': Array(0.13756047, dtype=float32)}
{'loss_inverse': Array(2.1635074e-06, dtype=float32)}


  9%|▉         | 93030/1000000 [4:24:09<33:44:53,  7.47it/s]

{'loss': Array(0.14903149, dtype=float32), 'loss_cross_entropy': Array(0.14098553, dtype=float32)}
{'loss_inverse': Array(0.00012889, dtype=float32)}


  9%|▉         | 93040/1000000 [4:24:11<27:46:42,  9.07it/s]

{'loss': Array(0.13086472, dtype=float32), 'loss_cross_entropy': Array(0.12323981, dtype=float32)}
{'loss_inverse': Array(8.3391586e-07, dtype=float32)}


  9%|▉         | 93050/1000000 [4:24:13<31:33:25,  7.98it/s]

{'loss': Array(0.15414509, dtype=float32), 'loss_cross_entropy': Array(0.14569362, dtype=float32)}
{'loss_inverse': Array(9.617999e-07, dtype=float32)}


  9%|▉         | 93060/1000000 [4:24:14<25:48:44,  9.76it/s]

{'loss': Array(0.14068502, dtype=float32), 'loss_cross_entropy': Array(0.132751, dtype=float32)}
{'loss_inverse': Array(7.535426e-05, dtype=float32)}


  9%|▉         | 93070/1000000 [4:24:15<24:31:47, 10.27it/s]

{'loss': Array(0.14645514, dtype=float32), 'loss_cross_entropy': Array(0.13855998, dtype=float32)}
{'loss_inverse': Array(1.2625154e-05, dtype=float32)}


  9%|▉         | 93080/1000000 [4:24:17<27:51:10,  9.04it/s]

{'loss': Array(0.14079498, dtype=float32), 'loss_cross_entropy': Array(0.13280323, dtype=float32)}
{'loss_inverse': Array(2.5822983e-05, dtype=float32)}


  9%|▉         | 93090/1000000 [4:24:19<25:12:12, 10.00it/s]

{'loss': Array(0.14341779, dtype=float32), 'loss_cross_entropy': Array(0.1357975, dtype=float32)}
{'loss_inverse': Array(4.528609e-06, dtype=float32)}


  9%|▉         | 93100/1000000 [4:24:20<29:07:54,  8.65it/s]

{'loss': Array(0.1404268, dtype=float32), 'loss_cross_entropy': Array(0.13170928, dtype=float32)}
{'loss_inverse': Array(1.2564497e-07, dtype=float32)}


  9%|▉         | 93110/1000000 [4:24:22<25:23:21,  9.92it/s]

{'loss': Array(0.1488627, dtype=float32), 'loss_cross_entropy': Array(0.1413009, dtype=float32)}
{'loss_inverse': Array(3.7070834e-08, dtype=float32)}


  9%|▉         | 93120/1000000 [4:24:24<29:30:47,  8.54it/s]

{'loss': Array(0.13996398, dtype=float32), 'loss_cross_entropy': Array(0.1321335, dtype=float32)}
{'loss_inverse': Array(1.0879589e-05, dtype=float32)}


  9%|▉         | 93130/1000000 [4:24:25<25:47:47,  9.77it/s]

{'loss': Array(0.14258498, dtype=float32), 'loss_cross_entropy': Array(0.13412659, dtype=float32)}
{'loss_inverse': Array(3.669478e-06, dtype=float32)}


  9%|▉         | 93140/1000000 [4:24:27<28:53:22,  8.72it/s]

{'loss': Array(0.14620118, dtype=float32), 'loss_cross_entropy': Array(0.13823006, dtype=float32)}
{'loss_inverse': Array(6.667738e-05, dtype=float32)}


  9%|▉         | 93150/1000000 [4:24:28<25:08:26, 10.02it/s]

{'loss': Array(0.13845323, dtype=float32), 'loss_cross_entropy': Array(0.13084191, dtype=float32)}
{'loss_inverse': Array(1.476037e-06, dtype=float32)}


  9%|▉         | 93160/1000000 [4:24:30<33:42:41,  7.47it/s]

{'loss': Array(0.15345263, dtype=float32), 'loss_cross_entropy': Array(0.14490469, dtype=float32)}
{'loss_inverse': Array(1.264106e-07, dtype=float32)}


  9%|▉         | 93170/1000000 [4:24:31<25:51:54,  9.74it/s]

{'loss': Array(0.14522488, dtype=float32), 'loss_cross_entropy': Array(0.13756089, dtype=float32)}
{'loss_inverse': Array(4.9224047e-05, dtype=float32)}


  9%|▉         | 93180/1000000 [4:24:33<38:13:45,  6.59it/s]

{'loss': Array(0.14040022, dtype=float32), 'loss_cross_entropy': Array(0.13334624, dtype=float32)}
{'loss_inverse': Array(4.8834e-07, dtype=float32)}


  9%|▉         | 93190/1000000 [4:24:34<26:39:43,  9.45it/s]

{'loss': Array(0.14231345, dtype=float32), 'loss_cross_entropy': Array(0.13435923, dtype=float32)}
{'loss_inverse': Array(5.7172554e-05, dtype=float32)}


  9%|▉         | 93200/1000000 [4:24:36<34:45:47,  7.25it/s]

{'loss': Array(0.14406584, dtype=float32), 'loss_cross_entropy': Array(0.13616888, dtype=float32)}
{'loss_inverse': Array(5.5710142e-08, dtype=float32)}


  9%|▉         | 93210/1000000 [4:24:38<26:38:50,  9.45it/s]

{'loss': Array(0.13462274, dtype=float32), 'loss_cross_entropy': Array(0.12693271, dtype=float32)}
{'loss_inverse': Array(5.8879203e-05, dtype=float32)}


  9%|▉         | 93220/1000000 [4:24:39<25:25:17,  9.91it/s]

{'loss': Array(0.13688521, dtype=float32), 'loss_cross_entropy': Array(0.1294791, dtype=float32)}
{'loss_inverse': Array(1.0888185e-06, dtype=float32)}


  9%|▉         | 93230/1000000 [4:24:41<29:06:35,  8.65it/s]

{'loss': Array(0.13591073, dtype=float32), 'loss_cross_entropy': Array(0.128351, dtype=float32)}
{'loss_inverse': Array(8.027113e-06, dtype=float32)}


  9%|▉         | 93240/1000000 [4:24:42<25:35:40,  9.84it/s]

{'loss': Array(0.16882408, dtype=float32), 'loss_cross_entropy': Array(0.1597853, dtype=float32)}
{'loss_inverse': Array(1.801095e-06, dtype=float32)}


  9%|▉         | 93250/1000000 [4:24:44<30:48:41,  8.17it/s]

{'loss': Array(0.12097979, dtype=float32), 'loss_cross_entropy': Array(0.11439409, dtype=float32)}
{'loss_inverse': Array(3.0684723e-07, dtype=float32)}


  9%|▉         | 93260/1000000 [4:24:45<25:45:19,  9.78it/s]

{'loss': Array(0.14388445, dtype=float32), 'loss_cross_entropy': Array(0.13595758, dtype=float32)}
{'loss_inverse': Array(1.7090109e-07, dtype=float32)}


  9%|▉         | 93270/1000000 [4:24:47<33:37:25,  7.49it/s]

{'loss': Array(0.1460464, dtype=float32), 'loss_cross_entropy': Array(0.13781641, dtype=float32)}
{'loss_inverse': Array(7.1213194e-06, dtype=float32)}


  9%|▉         | 93280/1000000 [4:24:49<26:27:16,  9.52it/s]

{'loss': Array(0.14816444, dtype=float32), 'loss_cross_entropy': Array(0.1394244, dtype=float32)}
{'loss_inverse': Array(1.2821704e-07, dtype=float32)}


  9%|▉         | 93290/1000000 [4:24:50<33:21:17,  7.55it/s]

{'loss': Array(0.15207092, dtype=float32), 'loss_cross_entropy': Array(0.14435588, dtype=float32)}
{'loss_inverse': Array(0.00017523, dtype=float32)}


  9%|▉         | 93300/1000000 [4:24:52<26:28:43,  9.51it/s]

{'loss': Array(0.11263001, dtype=float32), 'loss_cross_entropy': Array(0.10649462, dtype=float32)}
{'loss_inverse': Array(1.739057e-05, dtype=float32)}


  9%|▉         | 93310/1000000 [4:24:54<33:45:36,  7.46it/s]

{'loss': Array(0.13598931, dtype=float32), 'loss_cross_entropy': Array(0.12872279, dtype=float32)}
{'loss_inverse': Array(4.2521588e-06, dtype=float32)}


  9%|▉         | 93320/1000000 [4:24:55<26:08:41,  9.63it/s]

{'loss': Array(0.13474326, dtype=float32), 'loss_cross_entropy': Array(0.12684558, dtype=float32)}
{'loss_inverse': Array(4.0574287e-05, dtype=float32)}


  9%|▉         | 93330/1000000 [4:24:56<24:28:49, 10.29it/s]

{'loss': Array(0.15479793, dtype=float32), 'loss_cross_entropy': Array(0.14678346, dtype=float32)}
{'loss_inverse': Array(3.2618885e-07, dtype=float32)}


  9%|▉         | 93340/1000000 [4:24:58<31:15:44,  8.06it/s]

{'loss': Array(0.13355564, dtype=float32), 'loss_cross_entropy': Array(0.12585084, dtype=float32)}
{'loss_inverse': Array(2.1149053e-05, dtype=float32)}


  9%|▉         | 93350/1000000 [4:25:00<25:38:50,  9.82it/s]

{'loss': Array(0.14548014, dtype=float32), 'loss_cross_entropy': Array(0.13735951, dtype=float32)}
{'loss_inverse': Array(1.3431994e-06, dtype=float32)}


  9%|▉         | 93360/1000000 [4:25:01<34:08:32,  7.38it/s]

{'loss': Array(0.12738428, dtype=float32), 'loss_cross_entropy': Array(0.12032533, dtype=float32)}
{'loss_inverse': Array(3.2419695e-08, dtype=float32)}


  9%|▉         | 93370/1000000 [4:25:03<26:14:02,  9.60it/s]

{'loss': Array(0.14924671, dtype=float32), 'loss_cross_entropy': Array(0.14059924, dtype=float32)}
{'loss_inverse': Array(2.21133e-05, dtype=float32)}


  9%|▉         | 93380/1000000 [4:25:04<37:37:18,  6.69it/s]

{'loss': Array(0.14970052, dtype=float32), 'loss_cross_entropy': Array(0.14096029, dtype=float32)}
{'loss_inverse': Array(3.2405302e-07, dtype=float32)}


  9%|▉         | 93390/1000000 [4:25:06<27:35:26,  9.13it/s]

{'loss': Array(0.15473323, dtype=float32), 'loss_cross_entropy': Array(0.14668308, dtype=float32)}
{'loss_inverse': Array(1.9866668e-06, dtype=float32)}


  9%|▉         | 93400/1000000 [4:25:08<35:29:28,  7.10it/s]

{'loss': Array(0.15146884, dtype=float32), 'loss_cross_entropy': Array(0.14339954, dtype=float32)}
{'loss_inverse': Array(3.5625617e-06, dtype=float32)}


  9%|▉         | 93410/1000000 [4:25:09<27:21:02,  9.21it/s]

{'loss': Array(0.12694512, dtype=float32), 'loss_cross_entropy': Array(0.11956743, dtype=float32)}
{'loss_inverse': Array(2.0645119e-07, dtype=float32)}


  9%|▉         | 93420/1000000 [4:25:11<25:56:58,  9.70it/s]

{'loss': Array(0.13924564, dtype=float32), 'loss_cross_entropy': Array(0.13207112, dtype=float32)}
{'loss_inverse': Array(4.2874905e-07, dtype=float32)}


  9%|▉         | 93430/1000000 [4:25:12<27:32:45,  9.14it/s]

{'loss': Array(0.14823557, dtype=float32), 'loss_cross_entropy': Array(0.14026444, dtype=float32)}
{'loss_inverse': Array(3.2685728e-06, dtype=float32)}


  9%|▉         | 93440/1000000 [4:25:14<25:35:14,  9.84it/s]

{'loss': Array(0.15704252, dtype=float32), 'loss_cross_entropy': Array(0.14933893, dtype=float32)}
{'loss_inverse': Array(3.261231e-08, dtype=float32)}


  9%|▉         | 93450/1000000 [4:25:16<27:21:25,  9.20it/s]

{'loss': Array(0.12999944, dtype=float32), 'loss_cross_entropy': Array(0.12272742, dtype=float32)}
{'loss_inverse': Array(5.4227502e-08, dtype=float32)}


  9%|▉         | 93460/1000000 [4:25:17<24:41:53, 10.20it/s]

{'loss': Array(0.14115962, dtype=float32), 'loss_cross_entropy': Array(0.133601, dtype=float32)}
{'loss_inverse': Array(5.7915014e-07, dtype=float32)}


  9%|▉         | 93470/1000000 [4:25:19<33:56:14,  7.42it/s]

{'loss': Array(0.12360277, dtype=float32), 'loss_cross_entropy': Array(0.11642519, dtype=float32)}
{'loss_inverse': Array(9.138252e-09, dtype=float32)}


  9%|▉         | 93480/1000000 [4:25:20<26:20:27,  9.56it/s]

{'loss': Array(0.15083723, dtype=float32), 'loss_cross_entropy': Array(0.14268057, dtype=float32)}
{'loss_inverse': Array(0.00012409, dtype=float32)}


  9%|▉         | 93490/1000000 [4:25:22<24:52:54, 10.12it/s]

{'loss': Array(0.13148303, dtype=float32), 'loss_cross_entropy': Array(0.12409755, dtype=float32)}
{'loss_inverse': Array(9.5506394e-08, dtype=float32)}


  9%|▉         | 93500/1000000 [4:25:23<27:32:52,  9.14it/s]

{'loss': Array(0.16262951, dtype=float32), 'loss_cross_entropy': Array(0.15484713, dtype=float32)}
{'loss_inverse': Array(5.4658326e-06, dtype=float32)}


  9%|▉         | 93510/1000000 [4:25:31<83:10:22,  3.03it/s] 

{'loss': Array(0.13611138, dtype=float32), 'loss_cross_entropy': Array(0.12866792, dtype=float32)}
{'loss_inverse': Array(4.66626e-07, dtype=float32)}


  9%|▉         | 93520/1000000 [4:25:33<40:30:55,  6.21it/s]

{'loss': Array(0.14092793, dtype=float32), 'loss_cross_entropy': Array(0.1328363, dtype=float32)}
{'loss_inverse': Array(0.00021303, dtype=float32)}


  9%|▉         | 93530/1000000 [4:25:34<27:23:55,  9.19it/s]

{'loss': Array(0.12689851, dtype=float32), 'loss_cross_entropy': Array(0.12007835, dtype=float32)}
{'loss_inverse': Array(1.855203e-06, dtype=float32)}


  9%|▉         | 93540/1000000 [4:25:36<34:39:19,  7.27it/s]

{'loss': Array(0.15490307, dtype=float32), 'loss_cross_entropy': Array(0.1464201, dtype=float32)}
{'loss_inverse': Array(0.00033889, dtype=float32)}


  9%|▉         | 93550/1000000 [4:25:38<26:27:35,  9.52it/s]

{'loss': Array(0.14162932, dtype=float32), 'loss_cross_entropy': Array(0.13319623, dtype=float32)}
{'loss_inverse': Array(0.00012105, dtype=float32)}


  9%|▉         | 93560/1000000 [4:25:39<37:23:32,  6.73it/s]

{'loss': Array(0.14631866, dtype=float32), 'loss_cross_entropy': Array(0.13826145, dtype=float32)}
{'loss_inverse': Array(8.526729e-05, dtype=float32)}


  9%|▉         | 93570/1000000 [4:25:41<26:44:19,  9.42it/s]

{'loss': Array(0.15079464, dtype=float32), 'loss_cross_entropy': Array(0.14284587, dtype=float32)}
{'loss_inverse': Array(6.7937904e-06, dtype=float32)}


  9%|▉         | 93580/1000000 [4:25:42<33:45:37,  7.46it/s]

{'loss': Array(0.14023128, dtype=float32), 'loss_cross_entropy': Array(0.13222317, dtype=float32)}
{'loss_inverse': Array(2.0062862e-05, dtype=float32)}


  9%|▉         | 93590/1000000 [4:25:44<26:56:11,  9.35it/s]

{'loss': Array(0.14416789, dtype=float32), 'loss_cross_entropy': Array(0.13628227, dtype=float32)}
{'loss_inverse': Array(3.3268123e-05, dtype=float32)}


  9%|▉         | 93600/1000000 [4:25:45<25:00:46, 10.07it/s]

{'loss': Array(0.14443713, dtype=float32), 'loss_cross_entropy': Array(0.13708642, dtype=float32)}
{'loss_inverse': Array(2.898262e-06, dtype=float32)}


  9%|▉         | 93610/1000000 [4:25:47<28:47:38,  8.74it/s]

{'loss': Array(0.15045263, dtype=float32), 'loss_cross_entropy': Array(0.14160892, dtype=float32)}
{'loss_inverse': Array(9.650716e-06, dtype=float32)}


  9%|▉         | 93620/1000000 [4:25:49<25:11:02, 10.00it/s]

{'loss': Array(0.14065544, dtype=float32), 'loss_cross_entropy': Array(0.13230091, dtype=float32)}
{'loss_inverse': Array(2.785713e-05, dtype=float32)}


  9%|▉         | 93630/1000000 [4:25:50<33:18:43,  7.56it/s]

{'loss': Array(0.15138692, dtype=float32), 'loss_cross_entropy': Array(0.14342415, dtype=float32)}
{'loss_inverse': Array(2.5443745e-07, dtype=float32)}


  9%|▉         | 93640/1000000 [4:25:52<26:10:48,  9.62it/s]

{'loss': Array(0.12125391, dtype=float32), 'loss_cross_entropy': Array(0.11473081, dtype=float32)}
{'loss_inverse': Array(8.26958e-07, dtype=float32)}


  9%|▉         | 93650/1000000 [4:25:53<37:21:56,  6.74it/s]

{'loss': Array(0.13553305, dtype=float32), 'loss_cross_entropy': Array(0.12765554, dtype=float32)}
{'loss_inverse': Array(8.114498e-05, dtype=float32)}


  9%|▉         | 93660/1000000 [4:25:55<26:32:40,  9.48it/s]

{'loss': Array(0.13124625, dtype=float32), 'loss_cross_entropy': Array(0.12399121, dtype=float32)}
{'loss_inverse': Array(5.1519482e-06, dtype=float32)}


  9%|▉         | 93670/1000000 [4:25:56<24:47:37, 10.15it/s]

{'loss': Array(0.13178435, dtype=float32), 'loss_cross_entropy': Array(0.12409347, dtype=float32)}
{'loss_inverse': Array(9.876e-05, dtype=float32)}


  9%|▉         | 93680/1000000 [4:25:58<28:17:39,  8.90it/s]

{'loss': Array(0.14124024, dtype=float32), 'loss_cross_entropy': Array(0.13386212, dtype=float32)}
{'loss_inverse': Array(5.4740394e-05, dtype=float32)}


  9%|▉         | 93690/1000000 [4:25:59<25:09:15, 10.01it/s]

{'loss': Array(0.11063898, dtype=float32), 'loss_cross_entropy': Array(0.10448445, dtype=float32)}
{'loss_inverse': Array(6.871803e-06, dtype=float32)}


  9%|▉         | 93700/1000000 [4:26:01<28:48:49,  8.74it/s]

{'loss': Array(0.13941766, dtype=float32), 'loss_cross_entropy': Array(0.13115199, dtype=float32)}
{'loss_inverse': Array(1.5133798e-05, dtype=float32)}


  9%|▉         | 93710/1000000 [4:26:03<25:18:50,  9.94it/s]

{'loss': Array(0.14656956, dtype=float32), 'loss_cross_entropy': Array(0.13851301, dtype=float32)}
{'loss_inverse': Array(0.00033072, dtype=float32)}


  9%|▉         | 93720/1000000 [4:26:04<31:04:27,  8.10it/s]

{'loss': Array(0.14096378, dtype=float32), 'loss_cross_entropy': Array(0.13279983, dtype=float32)}
{'loss_inverse': Array(2.5643583e-06, dtype=float32)}


  9%|▉         | 93730/1000000 [4:26:06<25:52:58,  9.73it/s]

{'loss': Array(0.13425305, dtype=float32), 'loss_cross_entropy': Array(0.12718183, dtype=float32)}
{'loss_inverse': Array(2.885754e-05, dtype=float32)}


  9%|▉         | 93740/1000000 [4:26:08<34:11:38,  7.36it/s]

{'loss': Array(0.14870068, dtype=float32), 'loss_cross_entropy': Array(0.14105582, dtype=float32)}
{'loss_inverse': Array(1.8067666e-06, dtype=float32)}


  9%|▉         | 93750/1000000 [4:26:09<26:53:13,  9.36it/s]

{'loss': Array(0.15144317, dtype=float32), 'loss_cross_entropy': Array(0.14307384, dtype=float32)}
{'loss_inverse': Array(1.7087931e-06, dtype=float32)}


  9%|▉         | 93760/1000000 [4:26:11<34:14:22,  7.35it/s]

{'loss': Array(0.14299195, dtype=float32), 'loss_cross_entropy': Array(0.13499938, dtype=float32)}
{'loss_inverse': Array(5.3296815e-05, dtype=float32)}


  9%|▉         | 93770/1000000 [4:26:12<26:15:29,  9.59it/s]

{'loss': Array(0.13900039, dtype=float32), 'loss_cross_entropy': Array(0.13168328, dtype=float32)}
{'loss_inverse': Array(9.870918e-07, dtype=float32)}


  9%|▉         | 93780/1000000 [4:26:14<24:52:11, 10.12it/s]

{'loss': Array(0.13625266, dtype=float32), 'loss_cross_entropy': Array(0.12930964, dtype=float32)}
{'loss_inverse': Array(6.724703e-08, dtype=float32)}


  9%|▉         | 93790/1000000 [4:26:15<30:35:32,  8.23it/s]

{'loss': Array(0.13913146, dtype=float32), 'loss_cross_entropy': Array(0.1314298, dtype=float32)}
{'loss_inverse': Array(4.9048023e-05, dtype=float32)}


  9%|▉         | 93800/1000000 [4:26:17<26:11:38,  9.61it/s]

{'loss': Array(0.14107601, dtype=float32), 'loss_cross_entropy': Array(0.13343962, dtype=float32)}
{'loss_inverse': Array(1.4301662e-06, dtype=float32)}


  9%|▉         | 93810/1000000 [4:26:19<37:21:58,  6.74it/s]

{'loss': Array(0.1294009, dtype=float32), 'loss_cross_entropy': Array(0.12208124, dtype=float32)}
{'loss_inverse': Array(9.462918e-07, dtype=float32)}


  9%|▉         | 93820/1000000 [4:26:20<26:15:58,  9.58it/s]

{'loss': Array(0.13161983, dtype=float32), 'loss_cross_entropy': Array(0.12368472, dtype=float32)}
{'loss_inverse': Array(6.1417194e-07, dtype=float32)}


  9%|▉         | 93830/1000000 [4:26:21<25:10:47, 10.00it/s]

{'loss': Array(0.15005788, dtype=float32), 'loss_cross_entropy': Array(0.14257799, dtype=float32)}
{'loss_inverse': Array(6.6037856e-08, dtype=float32)}


  9%|▉         | 93840/1000000 [4:26:23<27:28:23,  9.16it/s]

{'loss': Array(0.16086446, dtype=float32), 'loss_cross_entropy': Array(0.15281828, dtype=float32)}
{'loss_inverse': Array(1.3477544e-06, dtype=float32)}


  9%|▉         | 93850/1000000 [4:26:24<25:03:56, 10.04it/s]

{'loss': Array(0.1308131, dtype=float32), 'loss_cross_entropy': Array(0.12331774, dtype=float32)}
{'loss_inverse': Array(6.361812e-05, dtype=float32)}


  9%|▉         | 93860/1000000 [4:26:26<27:34:33,  9.13it/s]

{'loss': Array(0.15834515, dtype=float32), 'loss_cross_entropy': Array(0.15028004, dtype=float32)}
{'loss_inverse': Array(0.00024872, dtype=float32)}


  9%|▉         | 93870/1000000 [4:26:28<25:09:34, 10.00it/s]

{'loss': Array(0.14266825, dtype=float32), 'loss_cross_entropy': Array(0.13419965, dtype=float32)}
{'loss_inverse': Array(9.533813e-05, dtype=float32)}


  9%|▉         | 93880/1000000 [4:26:29<31:08:54,  8.08it/s]

{'loss': Array(0.14397812, dtype=float32), 'loss_cross_entropy': Array(0.13587977, dtype=float32)}
{'loss_inverse': Array(2.0826828e-06, dtype=float32)}


  9%|▉         | 93890/1000000 [4:26:31<25:59:40,  9.68it/s]

{'loss': Array(0.13797042, dtype=float32), 'loss_cross_entropy': Array(0.13034159, dtype=float32)}
{'loss_inverse': Array(1.7322089e-05, dtype=float32)}


  9%|▉         | 93900/1000000 [4:26:33<30:52:03,  8.15it/s]

{'loss': Array(0.15684767, dtype=float32), 'loss_cross_entropy': Array(0.14843924, dtype=float32)}
{'loss_inverse': Array(1.2275892e-07, dtype=float32)}


  9%|▉         | 93910/1000000 [4:26:34<25:16:29,  9.96it/s]

{'loss': Array(0.14858805, dtype=float32), 'loss_cross_entropy': Array(0.13998677, dtype=float32)}
{'loss_inverse': Array(7.629765e-06, dtype=float32)}


  9%|▉         | 93920/1000000 [4:26:36<37:13:42,  6.76it/s]

{'loss': Array(0.13235214, dtype=float32), 'loss_cross_entropy': Array(0.12436879, dtype=float32)}
{'loss_inverse': Array(1.9304049e-05, dtype=float32)}


  9%|▉         | 93930/1000000 [4:26:37<27:25:07,  9.18it/s]

{'loss': Array(0.13914531, dtype=float32), 'loss_cross_entropy': Array(0.13146226, dtype=float32)}
{'loss_inverse': Array(1.5586151e-07, dtype=float32)}


  9%|▉         | 93940/1000000 [4:26:39<25:11:05,  9.99it/s]

{'loss': Array(0.13807721, dtype=float32), 'loss_cross_entropy': Array(0.1306392, dtype=float32)}
{'loss_inverse': Array(0.00030259, dtype=float32)}


  9%|▉         | 93950/1000000 [4:26:40<27:55:54,  9.01it/s]

{'loss': Array(0.14173977, dtype=float32), 'loss_cross_entropy': Array(0.13419628, dtype=float32)}
{'loss_inverse': Array(0.00026726, dtype=float32)}


  9%|▉         | 93960/1000000 [4:26:42<25:24:17,  9.91it/s]

{'loss': Array(0.14966606, dtype=float32), 'loss_cross_entropy': Array(0.14163883, dtype=float32)}
{'loss_inverse': Array(8.130965e-05, dtype=float32)}


  9%|▉         | 93970/1000000 [4:26:44<31:10:49,  8.07it/s]

{'loss': Array(0.13812959, dtype=float32), 'loss_cross_entropy': Array(0.12992844, dtype=float32)}
{'loss_inverse': Array(7.3615576e-07, dtype=float32)}


  9%|▉         | 93980/1000000 [4:26:45<25:17:59,  9.95it/s]

{'loss': Array(0.15137152, dtype=float32), 'loss_cross_entropy': Array(0.14270367, dtype=float32)}
{'loss_inverse': Array(9.407353e-06, dtype=float32)}


  9%|▉         | 93990/1000000 [4:26:46<24:14:53, 10.38it/s]

{'loss': Array(0.12032743, dtype=float32), 'loss_cross_entropy': Array(0.1130958, dtype=float32)}
{'loss_inverse': Array(0.00028201, dtype=float32)}


  9%|▉         | 94000/1000000 [4:26:48<28:38:12,  8.79it/s]

{'loss': Array(0.1322562, dtype=float32), 'loss_cross_entropy': Array(0.12481969, dtype=float32)}
{'loss_inverse': Array(4.137843e-05, dtype=float32)}


  9%|▉         | 94010/1000000 [4:26:56<86:11:51,  2.92it/s] 

{'loss': Array(0.14129071, dtype=float32), 'loss_cross_entropy': Array(0.13384634, dtype=float32)}
{'loss_inverse': Array(0.00011304, dtype=float32)}


  9%|▉         | 94020/1000000 [4:26:58<43:41:16,  5.76it/s]

{'loss': Array(0.14289936, dtype=float32), 'loss_cross_entropy': Array(0.13474205, dtype=float32)}
{'loss_inverse': Array(6.501632e-07, dtype=float32)}


  9%|▉         | 94030/1000000 [4:26:59<27:44:25,  9.07it/s]

{'loss': Array(0.15847631, dtype=float32), 'loss_cross_entropy': Array(0.14975266, dtype=float32)}
{'loss_inverse': Array(9.4152375e-07, dtype=float32)}


  9%|▉         | 94040/1000000 [4:27:01<38:05:15,  6.61it/s]

{'loss': Array(0.15299629, dtype=float32), 'loss_cross_entropy': Array(0.1452302, dtype=float32)}
{'loss_inverse': Array(0.00010731, dtype=float32)}


  9%|▉         | 94050/1000000 [4:27:02<26:55:16,  9.35it/s]

{'loss': Array(0.13270552, dtype=float32), 'loss_cross_entropy': Array(0.12509133, dtype=float32)}
{'loss_inverse': Array(9.607622e-05, dtype=float32)}


  9%|▉         | 94060/1000000 [4:27:04<38:07:41,  6.60it/s]

{'loss': Array(0.13036457, dtype=float32), 'loss_cross_entropy': Array(0.12310351, dtype=float32)}
{'loss_inverse': Array(0.00012525, dtype=float32)}


  9%|▉         | 94070/1000000 [4:27:06<27:00:06,  9.32it/s]

{'loss': Array(0.13302694, dtype=float32), 'loss_cross_entropy': Array(0.12544246, dtype=float32)}
{'loss_inverse': Array(6.6070854e-05, dtype=float32)}


  9%|▉         | 94080/1000000 [4:27:07<25:55:37,  9.71it/s]

{'loss': Array(0.12817769, dtype=float32), 'loss_cross_entropy': Array(0.12089328, dtype=float32)}
{'loss_inverse': Array(2.6319442e-06, dtype=float32)}


  9%|▉         | 94090/1000000 [4:27:09<31:57:32,  7.87it/s]

{'loss': Array(0.15637945, dtype=float32), 'loss_cross_entropy': Array(0.14784083, dtype=float32)}
{'loss_inverse': Array(1.7676159e-06, dtype=float32)}


  9%|▉         | 94100/1000000 [4:27:10<26:41:01,  9.43it/s]

{'loss': Array(0.13094021, dtype=float32), 'loss_cross_entropy': Array(0.12397877, dtype=float32)}
{'loss_inverse': Array(0.00012923, dtype=float32)}


  9%|▉         | 94110/1000000 [4:27:12<37:42:09,  6.67it/s]

{'loss': Array(0.12866902, dtype=float32), 'loss_cross_entropy': Array(0.1211789, dtype=float32)}
{'loss_inverse': Array(7.6760574e-05, dtype=float32)}


  9%|▉         | 94120/1000000 [4:27:14<27:19:48,  9.21it/s]

{'loss': Array(0.14467227, dtype=float32), 'loss_cross_entropy': Array(0.13700162, dtype=float32)}
{'loss_inverse': Array(4.1441467e-06, dtype=float32)}


  9%|▉         | 94130/1000000 [4:27:15<38:20:55,  6.56it/s]

{'loss': Array(0.1378217, dtype=float32), 'loss_cross_entropy': Array(0.1299913, dtype=float32)}
{'loss_inverse': Array(0.00061113, dtype=float32)}


  9%|▉         | 94140/1000000 [4:27:17<27:11:38,  9.25it/s]

{'loss': Array(0.1346023, dtype=float32), 'loss_cross_entropy': Array(0.12686527, dtype=float32)}
{'loss_inverse': Array(0.00031215, dtype=float32)}


  9%|▉         | 94150/1000000 [4:27:18<24:44:58, 10.17it/s]

{'loss': Array(0.1353581, dtype=float32), 'loss_cross_entropy': Array(0.12841181, dtype=float32)}
{'loss_inverse': Array(5.797536e-06, dtype=float32)}


  9%|▉         | 94160/1000000 [4:27:20<33:31:10,  7.51it/s]

{'loss': Array(0.13594128, dtype=float32), 'loss_cross_entropy': Array(0.1281498, dtype=float32)}
{'loss_inverse': Array(0.0001175, dtype=float32)}


  9%|▉         | 94170/1000000 [4:27:21<26:05:43,  9.64it/s]

{'loss': Array(0.13011804, dtype=float32), 'loss_cross_entropy': Array(0.12232272, dtype=float32)}
{'loss_inverse': Array(9.837011e-05, dtype=float32)}


  9%|▉         | 94180/1000000 [4:27:23<25:17:52,  9.95it/s]

{'loss': Array(0.1463582, dtype=float32), 'loss_cross_entropy': Array(0.13860686, dtype=float32)}
{'loss_inverse': Array(3.7951904e-05, dtype=float32)}


  9%|▉         | 94190/1000000 [4:27:25<27:36:08,  9.12it/s]

{'loss': Array(0.14413941, dtype=float32), 'loss_cross_entropy': Array(0.13584414, dtype=float32)}
{'loss_inverse': Array(6.8827244e-06, dtype=float32)}


  9%|▉         | 94200/1000000 [4:27:26<25:20:43,  9.93it/s]

{'loss': Array(0.12448549, dtype=float32), 'loss_cross_entropy': Array(0.11733749, dtype=float32)}
{'loss_inverse': Array(0.0001517, dtype=float32)}


  9%|▉         | 94210/1000000 [4:27:28<29:05:42,  8.65it/s]

{'loss': Array(0.13862686, dtype=float32), 'loss_cross_entropy': Array(0.13016556, dtype=float32)}
{'loss_inverse': Array(0.00011151, dtype=float32)}


  9%|▉         | 94220/1000000 [4:27:29<25:25:58,  9.89it/s]

{'loss': Array(0.12360269, dtype=float32), 'loss_cross_entropy': Array(0.11741763, dtype=float32)}
{'loss_inverse': Array(7.0182045e-06, dtype=float32)}


  9%|▉         | 94230/1000000 [4:27:31<29:15:15,  8.60it/s]

{'loss': Array(0.13958196, dtype=float32), 'loss_cross_entropy': Array(0.13167487, dtype=float32)}
{'loss_inverse': Array(7.2934745e-05, dtype=float32)}


  9%|▉         | 94240/1000000 [4:27:32<25:29:34,  9.87it/s]

{'loss': Array(0.15545498, dtype=float32), 'loss_cross_entropy': Array(0.14615315, dtype=float32)}
{'loss_inverse': Array(3.3996857e-06, dtype=float32)}


  9%|▉         | 94250/1000000 [4:27:34<28:48:28,  8.73it/s]

{'loss': Array(0.16161685, dtype=float32), 'loss_cross_entropy': Array(0.15320362, dtype=float32)}
{'loss_inverse': Array(1.1907366e-05, dtype=float32)}


  9%|▉         | 94260/1000000 [4:27:35<25:08:01, 10.01it/s]

{'loss': Array(0.12243426, dtype=float32), 'loss_cross_entropy': Array(0.11555643, dtype=float32)}
{'loss_inverse': Array(0.0007673, dtype=float32)}


  9%|▉         | 94270/1000000 [4:27:37<33:18:12,  7.55it/s]

{'loss': Array(0.13818751, dtype=float32), 'loss_cross_entropy': Array(0.13039987, dtype=float32)}
{'loss_inverse': Array(3.211667e-05, dtype=float32)}


  9%|▉         | 94280/1000000 [4:27:39<25:58:17,  9.69it/s]

{'loss': Array(0.14421226, dtype=float32), 'loss_cross_entropy': Array(0.13593917, dtype=float32)}
{'loss_inverse': Array(4.119715e-05, dtype=float32)}


  9%|▉         | 94290/1000000 [4:27:40<37:00:54,  6.80it/s]

{'loss': Array(0.13740794, dtype=float32), 'loss_cross_entropy': Array(0.13012324, dtype=float32)}
{'loss_inverse': Array(7.355765e-07, dtype=float32)}


  9%|▉         | 94300/1000000 [4:27:42<27:06:31,  9.28it/s]

{'loss': Array(0.13007337, dtype=float32), 'loss_cross_entropy': Array(0.12218016, dtype=float32)}
{'loss_inverse': Array(0.00012623, dtype=float32)}


  9%|▉         | 94310/1000000 [4:27:44<34:20:50,  7.32it/s]

{'loss': Array(0.13912448, dtype=float32), 'loss_cross_entropy': Array(0.13171604, dtype=float32)}
{'loss_inverse': Array(9.807289e-05, dtype=float32)}


  9%|▉         | 94320/1000000 [4:27:45<26:33:31,  9.47it/s]

{'loss': Array(0.14998493, dtype=float32), 'loss_cross_entropy': Array(0.14153, dtype=float32)}
{'loss_inverse': Array(0.00013073, dtype=float32)}


  9%|▉         | 94330/1000000 [4:27:46<24:47:19, 10.15it/s]

{'loss': Array(0.14133304, dtype=float32), 'loss_cross_entropy': Array(0.13338368, dtype=float32)}
{'loss_inverse': Array(1.3422514e-05, dtype=float32)}


  9%|▉         | 94340/1000000 [4:27:48<29:26:33,  8.54it/s]

{'loss': Array(0.14908157, dtype=float32), 'loss_cross_entropy': Array(0.1412963, dtype=float32)}
{'loss_inverse': Array(0.0002809, dtype=float32)}


  9%|▉         | 94350/1000000 [4:27:50<25:44:14,  9.77it/s]

{'loss': Array(0.14074963, dtype=float32), 'loss_cross_entropy': Array(0.13324685, dtype=float32)}
{'loss_inverse': Array(9.4461124e-05, dtype=float32)}


  9%|▉         | 94360/1000000 [4:27:51<30:17:03,  8.31it/s]

{'loss': Array(0.13994639, dtype=float32), 'loss_cross_entropy': Array(0.13198508, dtype=float32)}
{'loss_inverse': Array(0.00023483, dtype=float32)}


  9%|▉         | 94370/1000000 [4:27:53<25:36:51,  9.82it/s]

{'loss': Array(0.13944958, dtype=float32), 'loss_cross_entropy': Array(0.13219869, dtype=float32)}
{'loss_inverse': Array(1.6563345e-05, dtype=float32)}


  9%|▉         | 94380/1000000 [4:27:54<33:23:53,  7.53it/s]

{'loss': Array(0.13800463, dtype=float32), 'loss_cross_entropy': Array(0.13030355, dtype=float32)}
{'loss_inverse': Array(3.035138e-05, dtype=float32)}


  9%|▉         | 94390/1000000 [4:27:56<26:45:08,  9.40it/s]

{'loss': Array(0.13379028, dtype=float32), 'loss_cross_entropy': Array(0.12601317, dtype=float32)}
{'loss_inverse': Array(4.205271e-06, dtype=float32)}


  9%|▉         | 94400/1000000 [4:27:58<34:41:16,  7.25it/s]

{'loss': Array(0.15223609, dtype=float32), 'loss_cross_entropy': Array(0.14417744, dtype=float32)}
{'loss_inverse': Array(2.7552042e-06, dtype=float32)}


  9%|▉         | 94410/1000000 [4:27:59<26:16:39,  9.57it/s]

{'loss': Array(0.12981941, dtype=float32), 'loss_cross_entropy': Array(0.12230407, dtype=float32)}
{'loss_inverse': Array(2.293872e-06, dtype=float32)}


  9%|▉         | 94420/1000000 [4:28:01<25:07:42, 10.01it/s]

{'loss': Array(0.14231017, dtype=float32), 'loss_cross_entropy': Array(0.1343903, dtype=float32)}
{'loss_inverse': Array(2.8045028e-07, dtype=float32)}


  9%|▉         | 94430/1000000 [4:28:02<30:41:21,  8.20it/s]

{'loss': Array(0.14761059, dtype=float32), 'loss_cross_entropy': Array(0.13963611, dtype=float32)}
{'loss_inverse': Array(1.0351975e-05, dtype=float32)}


  9%|▉         | 94440/1000000 [4:28:04<25:23:22,  9.91it/s]

{'loss': Array(0.13762145, dtype=float32), 'loss_cross_entropy': Array(0.129662, dtype=float32)}
{'loss_inverse': Array(1.0018768e-07, dtype=float32)}


  9%|▉         | 94450/1000000 [4:28:05<36:36:36,  6.87it/s]

{'loss': Array(0.13840549, dtype=float32), 'loss_cross_entropy': Array(0.13073216, dtype=float32)}
{'loss_inverse': Array(1.2722702e-05, dtype=float32)}


  9%|▉         | 94460/1000000 [4:28:07<27:22:02,  9.19it/s]

{'loss': Array(0.13802122, dtype=float32), 'loss_cross_entropy': Array(0.13039804, dtype=float32)}
{'loss_inverse': Array(1.5507952e-05, dtype=float32)}


  9%|▉         | 94470/1000000 [4:28:08<25:31:36,  9.85it/s]

{'loss': Array(0.14696316, dtype=float32), 'loss_cross_entropy': Array(0.13903894, dtype=float32)}
{'loss_inverse': Array(2.324302e-07, dtype=float32)}


  9%|▉         | 94480/1000000 [4:28:10<29:39:22,  8.48it/s]

{'loss': Array(0.15656371, dtype=float32), 'loss_cross_entropy': Array(0.1479657, dtype=float32)}
{'loss_inverse': Array(9.387675e-07, dtype=float32)}


  9%|▉         | 94490/1000000 [4:28:12<25:48:43,  9.74it/s]

{'loss': Array(0.14849232, dtype=float32), 'loss_cross_entropy': Array(0.1410169, dtype=float32)}
{'loss_inverse': Array(1.6296528e-06, dtype=float32)}


  9%|▉         | 94500/1000000 [4:28:13<28:14:14,  8.91it/s]

{'loss': Array(0.11265943, dtype=float32), 'loss_cross_entropy': Array(0.10598939, dtype=float32)}
{'loss_inverse': Array(7.350281e-05, dtype=float32)}


  9%|▉         | 94510/1000000 [4:28:21<83:21:12,  3.02it/s] 

{'loss': Array(0.1455811, dtype=float32), 'loss_cross_entropy': Array(0.13730939, dtype=float32)}
{'loss_inverse': Array(6.17536e-08, dtype=float32)}


  9%|▉         | 94520/1000000 [4:28:23<40:43:00,  6.18it/s]

{'loss': Array(0.14535137, dtype=float32), 'loss_cross_entropy': Array(0.13777903, dtype=float32)}
{'loss_inverse': Array(7.1164226e-07, dtype=float32)}


  9%|▉         | 94530/1000000 [4:28:24<27:20:10,  9.20it/s]

{'loss': Array(0.14231081, dtype=float32), 'loss_cross_entropy': Array(0.13454103, dtype=float32)}
{'loss_inverse': Array(4.0352688e-07, dtype=float32)}


  9%|▉         | 94540/1000000 [4:28:26<31:18:46,  8.03it/s]

{'loss': Array(0.15381236, dtype=float32), 'loss_cross_entropy': Array(0.14552486, dtype=float32)}
{'loss_inverse': Array(2.0525971e-05, dtype=float32)}


  9%|▉         | 94550/1000000 [4:28:28<25:15:18,  9.96it/s]

{'loss': Array(0.13963774, dtype=float32), 'loss_cross_entropy': Array(0.13192861, dtype=float32)}
{'loss_inverse': Array(5.7643092e-08, dtype=float32)}


  9%|▉         | 94560/1000000 [4:28:29<24:31:13, 10.26it/s]

{'loss': Array(0.1339513, dtype=float32), 'loss_cross_entropy': Array(0.12687956, dtype=float32)}
{'loss_inverse': Array(5.467984e-07, dtype=float32)}


  9%|▉         | 94570/1000000 [4:28:31<28:59:55,  8.67it/s]

{'loss': Array(0.14603208, dtype=float32), 'loss_cross_entropy': Array(0.13851134, dtype=float32)}
{'loss_inverse': Array(2.6915989e-06, dtype=float32)}


  9%|▉         | 94580/1000000 [4:28:32<25:35:19,  9.83it/s]

{'loss': Array(0.15098256, dtype=float32), 'loss_cross_entropy': Array(0.14247195, dtype=float32)}
{'loss_inverse': Array(2.9801802e-06, dtype=float32)}


  9%|▉         | 94590/1000000 [4:28:34<33:15:17,  7.56it/s]

{'loss': Array(0.13751827, dtype=float32), 'loss_cross_entropy': Array(0.12978534, dtype=float32)}
{'loss_inverse': Array(1.749231e-06, dtype=float32)}


  9%|▉         | 94600/1000000 [4:28:35<25:48:50,  9.74it/s]

{'loss': Array(0.14287955, dtype=float32), 'loss_cross_entropy': Array(0.13511862, dtype=float32)}
{'loss_inverse': Array(2.3656671e-06, dtype=float32)}


  9%|▉         | 94610/1000000 [4:28:37<37:26:22,  6.72it/s]

{'loss': Array(0.15235792, dtype=float32), 'loss_cross_entropy': Array(0.14350136, dtype=float32)}
{'loss_inverse': Array(1.2624569e-06, dtype=float32)}


  9%|▉         | 94620/1000000 [4:28:38<27:26:30,  9.16it/s]

{'loss': Array(0.14229935, dtype=float32), 'loss_cross_entropy': Array(0.13481188, dtype=float32)}
{'loss_inverse': Array(9.9274224e-08, dtype=float32)}


  9%|▉         | 94630/1000000 [4:28:40<37:23:49,  6.72it/s]

{'loss': Array(0.144965, dtype=float32), 'loss_cross_entropy': Array(0.13714449, dtype=float32)}
{'loss_inverse': Array(1.23234e-05, dtype=float32)}


  9%|▉         | 94640/1000000 [4:28:42<26:22:12,  9.54it/s]

{'loss': Array(0.15463176, dtype=float32), 'loss_cross_entropy': Array(0.14646274, dtype=float32)}
{'loss_inverse': Array(1.4897577e-08, dtype=float32)}


  9%|▉         | 94650/1000000 [4:28:43<25:19:04,  9.93it/s]

{'loss': Array(0.13699685, dtype=float32), 'loss_cross_entropy': Array(0.12949501, dtype=float32)}
{'loss_inverse': Array(9.4474885e-08, dtype=float32)}


  9%|▉         | 94660/1000000 [4:28:45<31:19:52,  8.03it/s]

{'loss': Array(0.13384512, dtype=float32), 'loss_cross_entropy': Array(0.12698695, dtype=float32)}
{'loss_inverse': Array(4.0315708e-08, dtype=float32)}


  9%|▉         | 94670/1000000 [4:28:46<25:45:27,  9.76it/s]

{'loss': Array(0.13432333, dtype=float32), 'loss_cross_entropy': Array(0.12652507, dtype=float32)}
{'loss_inverse': Array(4.06184e-08, dtype=float32)}


  9%|▉         | 94680/1000000 [4:28:48<37:28:26,  6.71it/s]

{'loss': Array(0.13562219, dtype=float32), 'loss_cross_entropy': Array(0.12843779, dtype=float32)}
{'loss_inverse': Array(0.00045925, dtype=float32)}


  9%|▉         | 94690/1000000 [4:28:49<27:04:40,  9.29it/s]

{'loss': Array(0.13128492, dtype=float32), 'loss_cross_entropy': Array(0.12342348, dtype=float32)}
{'loss_inverse': Array(1.5770496e-06, dtype=float32)}


  9%|▉         | 94700/1000000 [4:28:51<39:00:32,  6.45it/s]

{'loss': Array(0.13262399, dtype=float32), 'loss_cross_entropy': Array(0.12528749, dtype=float32)}
{'loss_inverse': Array(1.6466569e-07, dtype=float32)}


  9%|▉         | 94710/1000000 [4:28:53<27:11:09,  9.25it/s]

{'loss': Array(0.15749007, dtype=float32), 'loss_cross_entropy': Array(0.14856978, dtype=float32)}
{'loss_inverse': Array(1.0053247e-05, dtype=float32)}


  9%|▉         | 94720/1000000 [4:28:54<24:57:03, 10.08it/s]

{'loss': Array(0.13976489, dtype=float32), 'loss_cross_entropy': Array(0.13170798, dtype=float32)}
{'loss_inverse': Array(7.6814194e-07, dtype=float32)}


  9%|▉         | 94730/1000000 [4:28:56<33:46:11,  7.45it/s]

{'loss': Array(0.12905155, dtype=float32), 'loss_cross_entropy': Array(0.12202698, dtype=float32)}
{'loss_inverse': Array(6.63859e-07, dtype=float32)}


  9%|▉         | 94740/1000000 [4:28:57<26:05:44,  9.64it/s]

{'loss': Array(0.1415495, dtype=float32), 'loss_cross_entropy': Array(0.13383143, dtype=float32)}
{'loss_inverse': Array(2.9802817e-07, dtype=float32)}


  9%|▉         | 94750/1000000 [4:28:59<25:31:50,  9.85it/s]

{'loss': Array(0.13390075, dtype=float32), 'loss_cross_entropy': Array(0.12671036, dtype=float32)}
{'loss_inverse': Array(5.9103405e-07, dtype=float32)}


  9%|▉         | 94760/1000000 [4:29:00<27:21:39,  9.19it/s]

{'loss': Array(0.13929518, dtype=float32), 'loss_cross_entropy': Array(0.13149571, dtype=float32)}
{'loss_inverse': Array(9.4324434e-07, dtype=float32)}


  9%|▉         | 94770/1000000 [4:29:02<25:31:19,  9.85it/s]

{'loss': Array(0.15476036, dtype=float32), 'loss_cross_entropy': Array(0.14621344, dtype=float32)}
{'loss_inverse': Array(4.3466557e-06, dtype=float32)}


  9%|▉         | 94780/1000000 [4:29:04<28:49:01,  8.73it/s]

{'loss': Array(0.13451008, dtype=float32), 'loss_cross_entropy': Array(0.12652658, dtype=float32)}
{'loss_inverse': Array(1.4913442e-06, dtype=float32)}


  9%|▉         | 94790/1000000 [4:29:05<25:17:07,  9.94it/s]

{'loss': Array(0.14826654, dtype=float32), 'loss_cross_entropy': Array(0.14055726, dtype=float32)}
{'loss_inverse': Array(3.637891e-05, dtype=float32)}


  9%|▉         | 94800/1000000 [4:29:07<29:41:09,  8.47it/s]

{'loss': Array(0.14240174, dtype=float32), 'loss_cross_entropy': Array(0.13455436, dtype=float32)}
{'loss_inverse': Array(1.3257613e-07, dtype=float32)}


  9%|▉         | 94810/1000000 [4:29:08<26:16:04,  9.57it/s]

{'loss': Array(0.15264331, dtype=float32), 'loss_cross_entropy': Array(0.14414147, dtype=float32)}
{'loss_inverse': Array(2.6115511e-05, dtype=float32)}


  9%|▉         | 94820/1000000 [4:29:10<30:22:19,  8.28it/s]

{'loss': Array(0.13705376, dtype=float32), 'loss_cross_entropy': Array(0.12982647, dtype=float32)}
{'loss_inverse': Array(1.0100515e-07, dtype=float32)}


  9%|▉         | 94830/1000000 [4:29:11<25:43:44,  9.77it/s]

{'loss': Array(0.14637904, dtype=float32), 'loss_cross_entropy': Array(0.1383086, dtype=float32)}
{'loss_inverse': Array(7.9156e-07, dtype=float32)}


  9%|▉         | 94840/1000000 [4:29:13<34:26:55,  7.30it/s]

{'loss': Array(0.13452068, dtype=float32), 'loss_cross_entropy': Array(0.12729214, dtype=float32)}
{'loss_inverse': Array(2.1717422e-06, dtype=float32)}


  9%|▉         | 94850/1000000 [4:29:15<26:35:08,  9.46it/s]

{'loss': Array(0.14504914, dtype=float32), 'loss_cross_entropy': Array(0.13663499, dtype=float32)}
{'loss_inverse': Array(3.7849147e-06, dtype=float32)}


  9%|▉         | 94860/1000000 [4:29:16<36:52:36,  6.82it/s]

{'loss': Array(0.14707525, dtype=float32), 'loss_cross_entropy': Array(0.1389635, dtype=float32)}
{'loss_inverse': Array(3.086514e-08, dtype=float32)}


  9%|▉         | 94870/1000000 [4:29:18<26:15:53,  9.57it/s]

{'loss': Array(0.11564197, dtype=float32), 'loss_cross_entropy': Array(0.10865398, dtype=float32)}
{'loss_inverse': Array(2.3443991e-07, dtype=float32)}


  9%|▉         | 94880/1000000 [4:29:20<33:45:07,  7.45it/s]

{'loss': Array(0.13196993, dtype=float32), 'loss_cross_entropy': Array(0.12443047, dtype=float32)}
{'loss_inverse': Array(1.3654773e-07, dtype=float32)}


  9%|▉         | 94890/1000000 [4:29:21<25:50:44,  9.73it/s]

{'loss': Array(0.13547355, dtype=float32), 'loss_cross_entropy': Array(0.12788418, dtype=float32)}
{'loss_inverse': Array(3.271313e-05, dtype=float32)}


  9%|▉         | 94900/1000000 [4:29:22<24:31:11, 10.25it/s]

{'loss': Array(0.14200626, dtype=float32), 'loss_cross_entropy': Array(0.13451035, dtype=float32)}
{'loss_inverse': Array(3.15452e-07, dtype=float32)}


  9%|▉         | 94910/1000000 [4:29:24<28:33:50,  8.80it/s]

{'loss': Array(0.13862097, dtype=float32), 'loss_cross_entropy': Array(0.13058613, dtype=float32)}
{'loss_inverse': Array(9.890696e-05, dtype=float32)}


  9%|▉         | 94920/1000000 [4:29:25<25:20:35,  9.92it/s]

{'loss': Array(0.14034264, dtype=float32), 'loss_cross_entropy': Array(0.132717, dtype=float32)}
{'loss_inverse': Array(3.8948343e-05, dtype=float32)}


  9%|▉         | 94930/1000000 [4:29:27<30:54:30,  8.13it/s]

{'loss': Array(0.15762378, dtype=float32), 'loss_cross_entropy': Array(0.1490806, dtype=float32)}
{'loss_inverse': Array(2.3453958e-06, dtype=float32)}


  9%|▉         | 94940/1000000 [4:29:29<25:37:05,  9.81it/s]

{'loss': Array(0.16334811, dtype=float32), 'loss_cross_entropy': Array(0.15432823, dtype=float32)}
{'loss_inverse': Array(2.5696983e-05, dtype=float32)}


  9%|▉         | 94950/1000000 [4:29:30<33:24:58,  7.52it/s]

{'loss': Array(0.148015, dtype=float32), 'loss_cross_entropy': Array(0.14013036, dtype=float32)}
{'loss_inverse': Array(7.044738e-05, dtype=float32)}


  9%|▉         | 94960/1000000 [4:29:32<26:12:36,  9.59it/s]

{'loss': Array(0.1403555, dtype=float32), 'loss_cross_entropy': Array(0.1321701, dtype=float32)}
{'loss_inverse': Array(2.6466937e-06, dtype=float32)}


  9%|▉         | 94970/1000000 [4:29:33<33:09:41,  7.58it/s]

{'loss': Array(0.1256324, dtype=float32), 'loss_cross_entropy': Array(0.11807386, dtype=float32)}
{'loss_inverse': Array(2.1655754e-07, dtype=float32)}


  9%|▉         | 94980/1000000 [4:29:35<26:37:11,  9.44it/s]

{'loss': Array(0.13287523, dtype=float32), 'loss_cross_entropy': Array(0.12521282, dtype=float32)}
{'loss_inverse': Array(2.816689e-07, dtype=float32)}


  9%|▉         | 94990/1000000 [4:29:36<25:04:28, 10.03it/s]

{'loss': Array(0.1378503, dtype=float32), 'loss_cross_entropy': Array(0.13005337, dtype=float32)}
{'loss_inverse': Array(4.1721403e-08, dtype=float32)}


 10%|▉         | 95000/1000000 [4:29:38<30:55:24,  8.13it/s]

{'loss': Array(0.14392212, dtype=float32), 'loss_cross_entropy': Array(0.13623329, dtype=float32)}
{'loss_inverse': Array(1.1479344e-05, dtype=float32)}


 10%|▉         | 95010/1000000 [4:29:46<85:24:06,  2.94it/s] 

{'loss': Array(0.14415547, dtype=float32), 'loss_cross_entropy': Array(0.13648616, dtype=float32)}
{'loss_inverse': Array(0.00020206, dtype=float32)}


 10%|▉         | 95020/1000000 [4:29:48<39:23:55,  6.38it/s]

{'loss': Array(0.13940951, dtype=float32), 'loss_cross_entropy': Array(0.13109308, dtype=float32)}
{'loss_inverse': Array(3.676942e-07, dtype=float32)}


 10%|▉         | 95030/1000000 [4:29:49<26:50:37,  9.36it/s]

{'loss': Array(0.13557832, dtype=float32), 'loss_cross_entropy': Array(0.1275584, dtype=float32)}
{'loss_inverse': Array(1.1416457e-07, dtype=float32)}


 10%|▉         | 95040/1000000 [4:29:51<34:04:18,  7.38it/s]

{'loss': Array(0.15860021, dtype=float32), 'loss_cross_entropy': Array(0.15005387, dtype=float32)}
{'loss_inverse': Array(2.3615903e-05, dtype=float32)}


 10%|▉         | 95050/1000000 [4:29:52<25:56:02,  9.69it/s]

{'loss': Array(0.12865365, dtype=float32), 'loss_cross_entropy': Array(0.12101538, dtype=float32)}
{'loss_inverse': Array(1.1926123e-05, dtype=float32)}


 10%|▉         | 95060/1000000 [4:29:54<37:20:00,  6.73it/s]

{'loss': Array(0.11644926, dtype=float32), 'loss_cross_entropy': Array(0.10976736, dtype=float32)}
{'loss_inverse': Array(5.4766275e-07, dtype=float32)}


 10%|▉         | 95070/1000000 [4:29:56<27:01:33,  9.30it/s]

{'loss': Array(0.12557863, dtype=float32), 'loss_cross_entropy': Array(0.11833771, dtype=float32)}
{'loss_inverse': Array(0.00025002, dtype=float32)}


 10%|▉         | 95080/1000000 [4:29:57<37:43:27,  6.66it/s]

{'loss': Array(0.15057254, dtype=float32), 'loss_cross_entropy': Array(0.14246361, dtype=float32)}
{'loss_inverse': Array(1.6461187e-05, dtype=float32)}


 10%|▉         | 95090/1000000 [4:29:59<27:07:55,  9.26it/s]

{'loss': Array(0.14236382, dtype=float32), 'loss_cross_entropy': Array(0.13507397, dtype=float32)}
{'loss_inverse': Array(0.00012245, dtype=float32)}


 10%|▉         | 95100/1000000 [4:30:00<24:59:44, 10.06it/s]

{'loss': Array(0.14850655, dtype=float32), 'loss_cross_entropy': Array(0.141228, dtype=float32)}
{'loss_inverse': Array(2.5784936e-06, dtype=float32)}


 10%|▉         | 95110/1000000 [4:30:02<33:56:02,  7.41it/s]

{'loss': Array(0.12117288, dtype=float32), 'loss_cross_entropy': Array(0.11409657, dtype=float32)}
{'loss_inverse': Array(1.6392127e-05, dtype=float32)}


 10%|▉         | 95120/1000000 [4:30:03<26:03:23,  9.65it/s]

{'loss': Array(0.13660112, dtype=float32), 'loss_cross_entropy': Array(0.12874205, dtype=float32)}
{'loss_inverse': Array(2.4638723e-08, dtype=float32)}


 10%|▉         | 95130/1000000 [4:30:05<24:57:19, 10.07it/s]

{'loss': Array(0.11004626, dtype=float32), 'loss_cross_entropy': Array(0.10405429, dtype=float32)}
{'loss_inverse': Array(0.00017486, dtype=float32)}


 10%|▉         | 95140/1000000 [4:30:07<28:36:06,  8.79it/s]

{'loss': Array(0.1324093, dtype=float32), 'loss_cross_entropy': Array(0.12471912, dtype=float32)}
{'loss_inverse': Array(5.2192254e-06, dtype=float32)}


 10%|▉         | 95150/1000000 [4:30:08<26:58:35,  9.32it/s]

{'loss': Array(0.13780318, dtype=float32), 'loss_cross_entropy': Array(0.13027456, dtype=float32)}
{'loss_inverse': Array(0.00015951, dtype=float32)}


 10%|▉         | 95160/1000000 [4:30:10<28:03:05,  8.96it/s]

{'loss': Array(0.13987859, dtype=float32), 'loss_cross_entropy': Array(0.13264957, dtype=float32)}
{'loss_inverse': Array(2.1059357e-06, dtype=float32)}


 10%|▉         | 95170/1000000 [4:30:12<26:06:06,  9.63it/s]

{'loss': Array(0.12478091, dtype=float32), 'loss_cross_entropy': Array(0.11786973, dtype=float32)}
{'loss_inverse': Array(3.86932e-06, dtype=float32)}


 10%|▉         | 95180/1000000 [4:30:13<29:41:43,  8.46it/s]

{'loss': Array(0.13523638, dtype=float32), 'loss_cross_entropy': Array(0.12732698, dtype=float32)}
{'loss_inverse': Array(8.4504606e-08, dtype=float32)}


 10%|▉         | 95190/1000000 [4:30:15<25:37:00,  9.81it/s]

{'loss': Array(0.16319941, dtype=float32), 'loss_cross_entropy': Array(0.1539716, dtype=float32)}
{'loss_inverse': Array(5.2358602e-05, dtype=float32)}


 10%|▉         | 95200/1000000 [4:30:16<37:08:28,  6.77it/s]

{'loss': Array(0.12877771, dtype=float32), 'loss_cross_entropy': Array(0.12118074, dtype=float32)}
{'loss_inverse': Array(2.1691087e-05, dtype=float32)}


 10%|▉         | 95210/1000000 [4:30:18<26:31:30,  9.48it/s]

{'loss': Array(0.14679663, dtype=float32), 'loss_cross_entropy': Array(0.13897891, dtype=float32)}
{'loss_inverse': Array(6.330062e-05, dtype=float32)}


 10%|▉         | 95220/1000000 [4:30:19<24:34:24, 10.23it/s]

{'loss': Array(0.15490279, dtype=float32), 'loss_cross_entropy': Array(0.14619915, dtype=float32)}
{'loss_inverse': Array(8.927812e-06, dtype=float32)}


 10%|▉         | 95230/1000000 [4:30:21<28:53:11,  8.70it/s]

{'loss': Array(0.13793388, dtype=float32), 'loss_cross_entropy': Array(0.13020349, dtype=float32)}
{'loss_inverse': Array(1.0909562e-05, dtype=float32)}


 10%|▉         | 95240/1000000 [4:30:22<25:10:34,  9.98it/s]

{'loss': Array(0.14605057, dtype=float32), 'loss_cross_entropy': Array(0.1381167, dtype=float32)}
{'loss_inverse': Array(2.972165e-07, dtype=float32)}


 10%|▉         | 95250/1000000 [4:30:24<28:59:57,  8.67it/s]

{'loss': Array(0.14064498, dtype=float32), 'loss_cross_entropy': Array(0.13226043, dtype=float32)}
{'loss_inverse': Array(2.042772e-06, dtype=float32)}


 10%|▉         | 95260/1000000 [4:30:26<25:13:04,  9.97it/s]

{'loss': Array(0.14746985, dtype=float32), 'loss_cross_entropy': Array(0.13939342, dtype=float32)}
{'loss_inverse': Array(5.354771e-05, dtype=float32)}


 10%|▉         | 95270/1000000 [4:30:27<30:40:12,  8.19it/s]

{'loss': Array(0.12690563, dtype=float32), 'loss_cross_entropy': Array(0.119983, dtype=float32)}
{'loss_inverse': Array(7.805315e-05, dtype=float32)}


 10%|▉         | 95280/1000000 [4:30:29<25:56:42,  9.69it/s]

{'loss': Array(0.12814806, dtype=float32), 'loss_cross_entropy': Array(0.12059408, dtype=float32)}
{'loss_inverse': Array(3.6313195e-05, dtype=float32)}


 10%|▉         | 95290/1000000 [4:30:30<25:28:15,  9.87it/s]

{'loss': Array(0.12248756, dtype=float32), 'loss_cross_entropy': Array(0.11556423, dtype=float32)}
{'loss_inverse': Array(3.070079e-05, dtype=float32)}


 10%|▉         | 95300/1000000 [4:30:32<27:38:10,  9.09it/s]

{'loss': Array(0.14731124, dtype=float32), 'loss_cross_entropy': Array(0.13921179, dtype=float32)}
{'loss_inverse': Array(0.00090058, dtype=float32)}


 10%|▉         | 95310/1000000 [4:30:33<25:16:22,  9.94it/s]

{'loss': Array(0.12943365, dtype=float32), 'loss_cross_entropy': Array(0.12223458, dtype=float32)}
{'loss_inverse': Array(0.0008142, dtype=float32)}


 10%|▉         | 95320/1000000 [4:30:35<28:37:43,  8.78it/s]

{'loss': Array(0.14094034, dtype=float32), 'loss_cross_entropy': Array(0.1336351, dtype=float32)}
{'loss_inverse': Array(0.00040641, dtype=float32)}


 10%|▉         | 95330/1000000 [4:30:37<25:01:08, 10.04it/s]

{'loss': Array(0.1412369, dtype=float32), 'loss_cross_entropy': Array(0.13370375, dtype=float32)}
{'loss_inverse': Array(8.8187924e-05, dtype=float32)}


 10%|▉         | 95340/1000000 [4:30:38<30:47:03,  8.16it/s]

{'loss': Array(0.15879498, dtype=float32), 'loss_cross_entropy': Array(0.15049876, dtype=float32)}
{'loss_inverse': Array(6.5214663e-06, dtype=float32)}


 10%|▉         | 95350/1000000 [4:30:40<25:23:54,  9.89it/s]

{'loss': Array(0.13266896, dtype=float32), 'loss_cross_entropy': Array(0.1255508, dtype=float32)}
{'loss_inverse': Array(0.00023883, dtype=float32)}


 10%|▉         | 95360/1000000 [4:30:41<24:30:30, 10.25it/s]

{'loss': Array(0.13799578, dtype=float32), 'loss_cross_entropy': Array(0.13071513, dtype=float32)}
{'loss_inverse': Array(0.0002254, dtype=float32)}


 10%|▉         | 95370/1000000 [4:30:43<29:23:02,  8.55it/s]

{'loss': Array(0.13909997, dtype=float32), 'loss_cross_entropy': Array(0.13087688, dtype=float32)}
{'loss_inverse': Array(0.0003351, dtype=float32)}


 10%|▉         | 95380/1000000 [4:30:44<25:35:57,  9.82it/s]

{'loss': Array(0.12650542, dtype=float32), 'loss_cross_entropy': Array(0.1191183, dtype=float32)}
{'loss_inverse': Array(0.00032232, dtype=float32)}


 10%|▉         | 95390/1000000 [4:30:46<30:45:12,  8.17it/s]

{'loss': Array(0.14253122, dtype=float32), 'loss_cross_entropy': Array(0.13447253, dtype=float32)}
{'loss_inverse': Array(1.6680477e-05, dtype=float32)}


 10%|▉         | 95400/1000000 [4:30:47<25:35:27,  9.82it/s]

{'loss': Array(0.1376939, dtype=float32), 'loss_cross_entropy': Array(0.13025658, dtype=float32)}
{'loss_inverse': Array(6.918706e-05, dtype=float32)}


 10%|▉         | 95410/1000000 [4:30:49<33:51:57,  7.42it/s]

{'loss': Array(0.14136703, dtype=float32), 'loss_cross_entropy': Array(0.13382065, dtype=float32)}
{'loss_inverse': Array(1.6224647e-05, dtype=float32)}


 10%|▉         | 95420/1000000 [4:30:51<26:47:33,  9.38it/s]

{'loss': Array(0.13637374, dtype=float32), 'loss_cross_entropy': Array(0.12886438, dtype=float32)}
{'loss_inverse': Array(0.00016679, dtype=float32)}


 10%|▉         | 95430/1000000 [4:30:52<24:26:13, 10.28it/s]

{'loss': Array(0.1604505, dtype=float32), 'loss_cross_entropy': Array(0.15215138, dtype=float32)}
{'loss_inverse': Array(7.280141e-06, dtype=float32)}


 10%|▉         | 95440/1000000 [4:30:54<27:53:23,  9.01it/s]

{'loss': Array(0.15421811, dtype=float32), 'loss_cross_entropy': Array(0.14561212, dtype=float32)}
{'loss_inverse': Array(0.00019654, dtype=float32)}


 10%|▉         | 95450/1000000 [4:30:55<24:54:23, 10.09it/s]

{'loss': Array(0.13171299, dtype=float32), 'loss_cross_entropy': Array(0.12421175, dtype=float32)}
{'loss_inverse': Array(1.518024e-05, dtype=float32)}


 10%|▉         | 95460/1000000 [4:30:57<33:30:22,  7.50it/s]

{'loss': Array(0.15780742, dtype=float32), 'loss_cross_entropy': Array(0.14937021, dtype=float32)}
{'loss_inverse': Array(6.3403415e-05, dtype=float32)}


 10%|▉         | 95470/1000000 [4:30:58<26:07:50,  9.62it/s]

{'loss': Array(0.14756869, dtype=float32), 'loss_cross_entropy': Array(0.13988721, dtype=float32)}
{'loss_inverse': Array(0.00015586, dtype=float32)}


 10%|▉         | 95480/1000000 [4:31:00<25:12:45,  9.97it/s]

{'loss': Array(0.1418573, dtype=float32), 'loss_cross_entropy': Array(0.13357762, dtype=float32)}
{'loss_inverse': Array(6.5732015e-06, dtype=float32)}


 10%|▉         | 95490/1000000 [4:31:02<27:49:11,  9.03it/s]

{'loss': Array(0.14246427, dtype=float32), 'loss_cross_entropy': Array(0.13448182, dtype=float32)}
{'loss_inverse': Array(4.554545e-05, dtype=float32)}


 10%|▉         | 95500/1000000 [4:31:03<25:08:07, 10.00it/s]

{'loss': Array(0.1480008, dtype=float32), 'loss_cross_entropy': Array(0.13963892, dtype=float32)}
{'loss_inverse': Array(9.566115e-06, dtype=float32)}


 10%|▉         | 95510/1000000 [4:31:11<89:50:02,  2.80it/s] 

{'loss': Array(0.1469071, dtype=float32), 'loss_cross_entropy': Array(0.13920358, dtype=float32)}
{'loss_inverse': Array(0.00022577, dtype=float32)}


 10%|▉         | 95520/1000000 [4:31:13<35:48:27,  7.02it/s]

{'loss': Array(0.13775519, dtype=float32), 'loss_cross_entropy': Array(0.13007419, dtype=float32)}
{'loss_inverse': Array(4.2023504e-07, dtype=float32)}


 10%|▉         | 95530/1000000 [4:31:15<31:07:23,  8.07it/s]

{'loss': Array(0.17135637, dtype=float32), 'loss_cross_entropy': Array(0.16281895, dtype=float32)}
{'loss_inverse': Array(5.990159e-07, dtype=float32)}


 10%|▉         | 95540/1000000 [4:31:16<25:45:28,  9.75it/s]

{'loss': Array(0.15440147, dtype=float32), 'loss_cross_entropy': Array(0.1466368, dtype=float32)}
{'loss_inverse': Array(1.1713663e-06, dtype=float32)}


 10%|▉         | 95550/1000000 [4:31:18<33:32:31,  7.49it/s]

{'loss': Array(0.12840007, dtype=float32), 'loss_cross_entropy': Array(0.12108753, dtype=float32)}
{'loss_inverse': Array(1.3851785e-07, dtype=float32)}


 10%|▉         | 95560/1000000 [4:31:19<26:09:25,  9.60it/s]

{'loss': Array(0.13853185, dtype=float32), 'loss_cross_entropy': Array(0.13068192, dtype=float32)}
{'loss_inverse': Array(0.000303, dtype=float32)}


 10%|▉         | 95570/1000000 [4:31:21<25:14:12,  9.95it/s]

{'loss': Array(0.13297014, dtype=float32), 'loss_cross_entropy': Array(0.12560077, dtype=float32)}
{'loss_inverse': Array(1.413274e-05, dtype=float32)}


 10%|▉         | 95580/1000000 [4:31:22<29:01:39,  8.65it/s]

{'loss': Array(0.1274209, dtype=float32), 'loss_cross_entropy': Array(0.12046962, dtype=float32)}
{'loss_inverse': Array(0.0001839, dtype=float32)}


 10%|▉         | 95590/1000000 [4:31:24<24:49:34, 10.12it/s]

{'loss': Array(0.14667834, dtype=float32), 'loss_cross_entropy': Array(0.13892128, dtype=float32)}
{'loss_inverse': Array(1.7188704e-07, dtype=float32)}


 10%|▉         | 95600/1000000 [4:31:26<30:25:44,  8.26it/s]

{'loss': Array(0.15684026, dtype=float32), 'loss_cross_entropy': Array(0.14902213, dtype=float32)}
{'loss_inverse': Array(0.00012658, dtype=float32)}


 10%|▉         | 95610/1000000 [4:31:27<26:51:28,  9.35it/s]

{'loss': Array(0.143793, dtype=float32), 'loss_cross_entropy': Array(0.13606769, dtype=float32)}
{'loss_inverse': Array(9.0721153e-07, dtype=float32)}


 10%|▉         | 95620/1000000 [4:31:29<34:04:31,  7.37it/s]

{'loss': Array(0.12909289, dtype=float32), 'loss_cross_entropy': Array(0.1214756, dtype=float32)}
{'loss_inverse': Array(0.0003874, dtype=float32)}


 10%|▉         | 95630/1000000 [4:31:30<25:47:30,  9.74it/s]

{'loss': Array(0.12318077, dtype=float32), 'loss_cross_entropy': Array(0.11595353, dtype=float32)}
{'loss_inverse': Array(4.644583e-06, dtype=float32)}


 10%|▉         | 95640/1000000 [4:31:32<24:33:22, 10.23it/s]

{'loss': Array(0.1316682, dtype=float32), 'loss_cross_entropy': Array(0.12432285, dtype=float32)}
{'loss_inverse': Array(3.0546423e-06, dtype=float32)}


 10%|▉         | 95650/1000000 [4:31:33<28:54:50,  8.69it/s]

{'loss': Array(0.14476977, dtype=float32), 'loss_cross_entropy': Array(0.13662586, dtype=float32)}
{'loss_inverse': Array(0.00021146, dtype=float32)}


 10%|▉         | 95660/1000000 [4:31:35<25:02:21, 10.03it/s]

{'loss': Array(0.11244761, dtype=float32), 'loss_cross_entropy': Array(0.10561331, dtype=float32)}
{'loss_inverse': Array(2.3600887e-06, dtype=float32)}


 10%|▉         | 95670/1000000 [4:31:37<33:40:10,  7.46it/s]

{'loss': Array(0.14294063, dtype=float32), 'loss_cross_entropy': Array(0.13497247, dtype=float32)}
{'loss_inverse': Array(4.345044e-05, dtype=float32)}


 10%|▉         | 95680/1000000 [4:31:38<25:51:29,  9.71it/s]

{'loss': Array(0.12961918, dtype=float32), 'loss_cross_entropy': Array(0.12206872, dtype=float32)}
{'loss_inverse': Array(1.5566733e-05, dtype=float32)}


 10%|▉         | 95690/1000000 [4:31:40<38:18:48,  6.56it/s]

{'loss': Array(0.13670595, dtype=float32), 'loss_cross_entropy': Array(0.12918542, dtype=float32)}
{'loss_inverse': Array(0.00012343, dtype=float32)}


 10%|▉         | 95700/1000000 [4:31:41<27:20:37,  9.19it/s]

{'loss': Array(0.1486082, dtype=float32), 'loss_cross_entropy': Array(0.14155717, dtype=float32)}
{'loss_inverse': Array(6.807752e-07, dtype=float32)}


 10%|▉         | 95710/1000000 [4:31:43<25:01:51, 10.04it/s]

{'loss': Array(0.14849359, dtype=float32), 'loss_cross_entropy': Array(0.14049368, dtype=float32)}
{'loss_inverse': Array(1.3696271e-07, dtype=float32)}


 10%|▉         | 95720/1000000 [4:31:44<27:30:27,  9.13it/s]

{'loss': Array(0.13897423, dtype=float32), 'loss_cross_entropy': Array(0.13109444, dtype=float32)}
{'loss_inverse': Array(2.5248746e-05, dtype=float32)}


 10%|▉         | 95730/1000000 [4:31:46<24:58:06, 10.06it/s]

{'loss': Array(0.13379765, dtype=float32), 'loss_cross_entropy': Array(0.12588368, dtype=float32)}
{'loss_inverse': Array(0.00019125, dtype=float32)}


 10%|▉         | 95740/1000000 [4:31:47<28:41:00,  8.76it/s]

{'loss': Array(0.14323942, dtype=float32), 'loss_cross_entropy': Array(0.13570063, dtype=float32)}
{'loss_inverse': Array(5.686574e-06, dtype=float32)}


 10%|▉         | 95750/1000000 [4:31:49<25:24:00,  9.89it/s]

{'loss': Array(0.14038813, dtype=float32), 'loss_cross_entropy': Array(0.13277605, dtype=float32)}
{'loss_inverse': Array(0.00060295, dtype=float32)}


 10%|▉         | 95760/1000000 [4:31:51<30:54:00,  8.13it/s]

{'loss': Array(0.14959875, dtype=float32), 'loss_cross_entropy': Array(0.14136621, dtype=float32)}
{'loss_inverse': Array(4.117174e-05, dtype=float32)}


 10%|▉         | 95770/1000000 [4:31:52<25:16:41,  9.94it/s]

{'loss': Array(0.14738719, dtype=float32), 'loss_cross_entropy': Array(0.13909076, dtype=float32)}
{'loss_inverse': Array(2.984832e-05, dtype=float32)}


 10%|▉         | 95780/1000000 [4:31:54<33:20:01,  7.54it/s]

{'loss': Array(0.14607307, dtype=float32), 'loss_cross_entropy': Array(0.1383516, dtype=float32)}
{'loss_inverse': Array(5.0017725e-06, dtype=float32)}


 10%|▉         | 95790/1000000 [4:31:55<25:43:57,  9.76it/s]

{'loss': Array(0.15530057, dtype=float32), 'loss_cross_entropy': Array(0.14687626, dtype=float32)}
{'loss_inverse': Array(1.7112412e-05, dtype=float32)}


 10%|▉         | 95800/1000000 [4:31:57<33:42:14,  7.45it/s]

{'loss': Array(0.16152497, dtype=float32), 'loss_cross_entropy': Array(0.15264098, dtype=float32)}
{'loss_inverse': Array(1.1844801e-07, dtype=float32)}


 10%|▉         | 95810/1000000 [4:31:58<25:48:29,  9.73it/s]

{'loss': Array(0.13738069, dtype=float32), 'loss_cross_entropy': Array(0.12979038, dtype=float32)}
{'loss_inverse': Array(1.985888e-05, dtype=float32)}


 10%|▉         | 95820/1000000 [4:32:00<24:46:07, 10.14it/s]

{'loss': Array(0.15341441, dtype=float32), 'loss_cross_entropy': Array(0.1451066, dtype=float32)}
{'loss_inverse': Array(1.4807782e-06, dtype=float32)}


 10%|▉         | 95830/1000000 [4:32:01<30:38:36,  8.20it/s]

{'loss': Array(0.14245205, dtype=float32), 'loss_cross_entropy': Array(0.13489275, dtype=float32)}
{'loss_inverse': Array(6.667905e-07, dtype=float32)}


 10%|▉         | 95840/1000000 [4:32:03<25:53:54,  9.70it/s]

{'loss': Array(0.13329504, dtype=float32), 'loss_cross_entropy': Array(0.1257855, dtype=float32)}
{'loss_inverse': Array(0.00012177, dtype=float32)}


 10%|▉         | 95850/1000000 [4:32:05<37:30:52,  6.69it/s]

{'loss': Array(0.15736173, dtype=float32), 'loss_cross_entropy': Array(0.14811802, dtype=float32)}
{'loss_inverse': Array(3.833505e-06, dtype=float32)}


 10%|▉         | 95860/1000000 [4:32:06<26:31:41,  9.47it/s]

{'loss': Array(0.14401984, dtype=float32), 'loss_cross_entropy': Array(0.13671659, dtype=float32)}
{'loss_inverse': Array(0.00060998, dtype=float32)}


 10%|▉         | 95870/1000000 [4:32:07<24:49:34, 10.12it/s]

{'loss': Array(0.1385842, dtype=float32), 'loss_cross_entropy': Array(0.13083737, dtype=float32)}
{'loss_inverse': Array(3.9309445e-05, dtype=float32)}


 10%|▉         | 95880/1000000 [4:32:09<28:27:46,  8.82it/s]

{'loss': Array(0.14316536, dtype=float32), 'loss_cross_entropy': Array(0.1352779, dtype=float32)}
{'loss_inverse': Array(1.8007793e-05, dtype=float32)}


 10%|▉         | 95890/1000000 [4:32:11<25:13:48,  9.95it/s]

{'loss': Array(0.14365996, dtype=float32), 'loss_cross_entropy': Array(0.13591501, dtype=float32)}
{'loss_inverse': Array(1.6075901e-07, dtype=float32)}


 10%|▉         | 95900/1000000 [4:32:12<27:41:57,  9.07it/s]

{'loss': Array(0.13523, dtype=float32), 'loss_cross_entropy': Array(0.12788479, dtype=float32)}
{'loss_inverse': Array(0.00077109, dtype=float32)}


 10%|▉         | 95910/1000000 [4:32:14<25:53:38,  9.70it/s]

{'loss': Array(0.13023703, dtype=float32), 'loss_cross_entropy': Array(0.12297591, dtype=float32)}
{'loss_inverse': Array(3.567308e-07, dtype=float32)}


 10%|▉         | 95920/1000000 [4:32:16<32:02:22,  7.84it/s]

{'loss': Array(0.14545101, dtype=float32), 'loss_cross_entropy': Array(0.13769706, dtype=float32)}
{'loss_inverse': Array(1.7483131e-06, dtype=float32)}


 10%|▉         | 95930/1000000 [4:32:17<28:02:45,  8.95it/s]

{'loss': Array(0.14955418, dtype=float32), 'loss_cross_entropy': Array(0.14181708, dtype=float32)}
{'loss_inverse': Array(1.3179382e-06, dtype=float32)}


 10%|▉         | 95940/1000000 [4:32:19<31:51:28,  7.88it/s]

{'loss': Array(0.1488903, dtype=float32), 'loss_cross_entropy': Array(0.14066672, dtype=float32)}
{'loss_inverse': Array(0.000184, dtype=float32)}


 10%|▉         | 95950/1000000 [4:32:20<26:36:44,  9.44it/s]

{'loss': Array(0.15146308, dtype=float32), 'loss_cross_entropy': Array(0.1431734, dtype=float32)}
{'loss_inverse': Array(2.1397671e-08, dtype=float32)}


 10%|▉         | 95960/1000000 [4:32:22<37:47:22,  6.65it/s]

{'loss': Array(0.14014404, dtype=float32), 'loss_cross_entropy': Array(0.13209862, dtype=float32)}
{'loss_inverse': Array(1.9271117e-06, dtype=float32)}


 10%|▉         | 95970/1000000 [4:32:24<26:51:17,  9.35it/s]

{'loss': Array(0.15626241, dtype=float32), 'loss_cross_entropy': Array(0.14789228, dtype=float32)}
{'loss_inverse': Array(1.0593884e-05, dtype=float32)}


 10%|▉         | 95980/1000000 [4:32:25<25:06:50, 10.00it/s]

{'loss': Array(0.14394765, dtype=float32), 'loss_cross_entropy': Array(0.1367428, dtype=float32)}
{'loss_inverse': Array(0.0002973, dtype=float32)}


 10%|▉         | 95990/1000000 [4:32:27<27:36:40,  9.09it/s]

{'loss': Array(0.14399932, dtype=float32), 'loss_cross_entropy': Array(0.13584071, dtype=float32)}
{'loss_inverse': Array(1.2626216e-05, dtype=float32)}


 10%|▉         | 96000/1000000 [4:32:28<24:59:34, 10.05it/s]

{'loss': Array(0.14179866, dtype=float32), 'loss_cross_entropy': Array(0.13386637, dtype=float32)}
{'loss_inverse': Array(0.00013625, dtype=float32)}


 10%|▉         | 96010/1000000 [4:32:37<90:42:43,  2.77it/s] 

{'loss': Array(0.13312083, dtype=float32), 'loss_cross_entropy': Array(0.12560244, dtype=float32)}
{'loss_inverse': Array(8.047479e-05, dtype=float32)}


 10%|▉         | 96020/1000000 [4:32:38<35:44:48,  7.02it/s]

{'loss': Array(0.13959841, dtype=float32), 'loss_cross_entropy': Array(0.13264613, dtype=float32)}
{'loss_inverse': Array(3.0860954e-06, dtype=float32)}


 10%|▉         | 96030/1000000 [4:32:40<32:23:24,  7.75it/s]

{'loss': Array(0.12675242, dtype=float32), 'loss_cross_entropy': Array(0.11964239, dtype=float32)}
{'loss_inverse': Array(5.1755575e-05, dtype=float32)}


 10%|▉         | 96040/1000000 [4:32:41<25:22:47,  9.89it/s]

{'loss': Array(0.14426461, dtype=float32), 'loss_cross_entropy': Array(0.13648331, dtype=float32)}
{'loss_inverse': Array(1.1009385e-05, dtype=float32)}


 10%|▉         | 96050/1000000 [4:32:42<24:14:10, 10.36it/s]

{'loss': Array(0.1372606, dtype=float32), 'loss_cross_entropy': Array(0.12954755, dtype=float32)}
{'loss_inverse': Array(5.8935866e-06, dtype=float32)}


 10%|▉         | 96060/1000000 [4:32:44<27:11:47,  9.23it/s]

{'loss': Array(0.13439332, dtype=float32), 'loss_cross_entropy': Array(0.12672786, dtype=float32)}
{'loss_inverse': Array(1.1026066e-06, dtype=float32)}


 10%|▉         | 96070/1000000 [4:32:46<25:56:23,  9.68it/s]

{'loss': Array(0.14172077, dtype=float32), 'loss_cross_entropy': Array(0.13361031, dtype=float32)}
{'loss_inverse': Array(4.4657017e-06, dtype=float32)}


 10%|▉         | 96080/1000000 [4:32:47<31:01:44,  8.09it/s]

{'loss': Array(0.13490649, dtype=float32), 'loss_cross_entropy': Array(0.12739919, dtype=float32)}
{'loss_inverse': Array(2.1845997e-05, dtype=float32)}


 10%|▉         | 96090/1000000 [4:32:49<25:46:48,  9.74it/s]

{'loss': Array(0.14189279, dtype=float32), 'loss_cross_entropy': Array(0.13365014, dtype=float32)}
{'loss_inverse': Array(7.164635e-07, dtype=float32)}


 10%|▉         | 96100/1000000 [4:32:51<30:55:56,  8.12it/s]

{'loss': Array(0.12836154, dtype=float32), 'loss_cross_entropy': Array(0.12122347, dtype=float32)}
{'loss_inverse': Array(1.4958823e-05, dtype=float32)}


 10%|▉         | 96110/1000000 [4:32:52<25:55:10,  9.69it/s]

{'loss': Array(0.1282917, dtype=float32), 'loss_cross_entropy': Array(0.12146238, dtype=float32)}
{'loss_inverse': Array(7.697544e-08, dtype=float32)}


 10%|▉         | 96120/1000000 [4:32:54<33:20:11,  7.53it/s]

{'loss': Array(0.12161012, dtype=float32), 'loss_cross_entropy': Array(0.11438011, dtype=float32)}
{'loss_inverse': Array(4.9718955e-07, dtype=float32)}


 10%|▉         | 96130/1000000 [4:32:55<26:27:00,  9.49it/s]

{'loss': Array(0.14998107, dtype=float32), 'loss_cross_entropy': Array(0.14162497, dtype=float32)}
{'loss_inverse': Array(4.986757e-06, dtype=float32)}


 10%|▉         | 96140/1000000 [4:32:57<31:25:09,  7.99it/s]

{'loss': Array(0.13753414, dtype=float32), 'loss_cross_entropy': Array(0.12964545, dtype=float32)}
{'loss_inverse': Array(1.2203557e-07, dtype=float32)}


 10%|▉         | 96150/1000000 [4:32:58<26:35:03,  9.44it/s]

{'loss': Array(0.13563348, dtype=float32), 'loss_cross_entropy': Array(0.12822579, dtype=float32)}
{'loss_inverse': Array(4.470418e-06, dtype=float32)}


 10%|▉         | 96160/1000000 [4:33:00<37:39:28,  6.67it/s]

{'loss': Array(0.14331542, dtype=float32), 'loss_cross_entropy': Array(0.13588639, dtype=float32)}
{'loss_inverse': Array(1.0353176e-06, dtype=float32)}


 10%|▉         | 96170/1000000 [4:33:02<26:53:41,  9.34it/s]

{'loss': Array(0.12577026, dtype=float32), 'loss_cross_entropy': Array(0.11894095, dtype=float32)}
{'loss_inverse': Array(9.639829e-07, dtype=float32)}


 10%|▉         | 96180/1000000 [4:33:03<25:04:30, 10.01it/s]

{'loss': Array(0.12836598, dtype=float32), 'loss_cross_entropy': Array(0.12119255, dtype=float32)}
{'loss_inverse': Array(7.565048e-08, dtype=float32)}


 10%|▉         | 96190/1000000 [4:33:05<28:03:18,  8.95it/s]

{'loss': Array(0.13223383, dtype=float32), 'loss_cross_entropy': Array(0.12442715, dtype=float32)}
{'loss_inverse': Array(3.683709e-07, dtype=float32)}


 10%|▉         | 96200/1000000 [4:33:06<26:41:17,  9.41it/s]

{'loss': Array(0.15159433, dtype=float32), 'loss_cross_entropy': Array(0.14289214, dtype=float32)}
{'loss_inverse': Array(5.5791423e-07, dtype=float32)}


 10%|▉         | 96210/1000000 [4:33:08<28:53:51,  8.69it/s]

{'loss': Array(0.15356974, dtype=float32), 'loss_cross_entropy': Array(0.14507103, dtype=float32)}
{'loss_inverse': Array(1.411054e-06, dtype=float32)}


 10%|▉         | 96220/1000000 [4:33:10<26:55:23,  9.32it/s]

{'loss': Array(0.14276525, dtype=float32), 'loss_cross_entropy': Array(0.13512613, dtype=float32)}
{'loss_inverse': Array(1.6450359e-07, dtype=float32)}


 10%|▉         | 96230/1000000 [4:33:11<29:23:36,  8.54it/s]

{'loss': Array(0.13955414, dtype=float32), 'loss_cross_entropy': Array(0.1317697, dtype=float32)}
{'loss_inverse': Array(1.215245e-07, dtype=float32)}


 10%|▉         | 96240/1000000 [4:33:13<25:25:08,  9.88it/s]

{'loss': Array(0.13577919, dtype=float32), 'loss_cross_entropy': Array(0.12883061, dtype=float32)}
{'loss_inverse': Array(4.5818455e-05, dtype=float32)}


 10%|▉         | 96250/1000000 [4:33:15<37:15:01,  6.74it/s]

{'loss': Array(0.15209554, dtype=float32), 'loss_cross_entropy': Array(0.14349425, dtype=float32)}
{'loss_inverse': Array(0.00012286, dtype=float32)}


 10%|▉         | 96260/1000000 [4:33:16<26:26:58,  9.49it/s]

{'loss': Array(0.14484781, dtype=float32), 'loss_cross_entropy': Array(0.13682832, dtype=float32)}
{'loss_inverse': Array(1.6035854e-05, dtype=float32)}


 10%|▉         | 96270/1000000 [4:33:17<25:04:26, 10.01it/s]

{'loss': Array(0.14629818, dtype=float32), 'loss_cross_entropy': Array(0.13762264, dtype=float32)}
{'loss_inverse': Array(1.5730682e-05, dtype=float32)}


 10%|▉         | 96280/1000000 [4:33:19<28:31:12,  8.80it/s]

{'loss': Array(0.142517, dtype=float32), 'loss_cross_entropy': Array(0.13492076, dtype=float32)}
{'loss_inverse': Array(7.34789e-05, dtype=float32)}


 10%|▉         | 96290/1000000 [4:33:21<26:43:55,  9.39it/s]

{'loss': Array(0.13431233, dtype=float32), 'loss_cross_entropy': Array(0.12758556, dtype=float32)}
{'loss_inverse': Array(1.4746695e-05, dtype=float32)}


 10%|▉         | 96300/1000000 [4:33:22<29:05:43,  8.63it/s]

{'loss': Array(0.14222847, dtype=float32), 'loss_cross_entropy': Array(0.13447888, dtype=float32)}
{'loss_inverse': Array(9.077333e-07, dtype=float32)}


 10%|▉         | 96310/1000000 [4:33:24<25:26:14,  9.87it/s]

{'loss': Array(0.12427592, dtype=float32), 'loss_cross_entropy': Array(0.11700588, dtype=float32)}
{'loss_inverse': Array(8.827164e-05, dtype=float32)}


 10%|▉         | 96320/1000000 [4:33:26<30:48:49,  8.15it/s]

{'loss': Array(0.1482254, dtype=float32), 'loss_cross_entropy': Array(0.14024095, dtype=float32)}
{'loss_inverse': Array(3.9304763e-05, dtype=float32)}


 10%|▉         | 96330/1000000 [4:33:27<25:35:10,  9.81it/s]

{'loss': Array(0.13358797, dtype=float32), 'loss_cross_entropy': Array(0.12636097, dtype=float32)}
{'loss_inverse': Array(1.1949718e-06, dtype=float32)}


 10%|▉         | 96340/1000000 [4:33:28<24:52:09, 10.09it/s]

{'loss': Array(0.1355028, dtype=float32), 'loss_cross_entropy': Array(0.12772696, dtype=float32)}
{'loss_inverse': Array(2.7263968e-06, dtype=float32)}


 10%|▉         | 96350/1000000 [4:33:30<28:06:19,  8.93it/s]

{'loss': Array(0.13304527, dtype=float32), 'loss_cross_entropy': Array(0.12656288, dtype=float32)}
{'loss_inverse': Array(7.883387e-06, dtype=float32)}


 10%|▉         | 96360/1000000 [4:33:32<25:58:27,  9.66it/s]

{'loss': Array(0.1448676, dtype=float32), 'loss_cross_entropy': Array(0.13690214, dtype=float32)}
{'loss_inverse': Array(2.816158e-05, dtype=float32)}


 10%|▉         | 96370/1000000 [4:33:33<28:51:54,  8.70it/s]

{'loss': Array(0.13730042, dtype=float32), 'loss_cross_entropy': Array(0.12978446, dtype=float32)}
{'loss_inverse': Array(1.4406284e-05, dtype=float32)}


 10%|▉         | 96380/1000000 [4:33:35<25:34:46,  9.81it/s]

{'loss': Array(0.1380319, dtype=float32), 'loss_cross_entropy': Array(0.1306201, dtype=float32)}
{'loss_inverse': Array(3.7079913e-05, dtype=float32)}


 10%|▉         | 96390/1000000 [4:33:37<31:06:00,  8.07it/s]

{'loss': Array(0.12852146, dtype=float32), 'loss_cross_entropy': Array(0.12117267, dtype=float32)}
{'loss_inverse': Array(1.440705e-06, dtype=float32)}


 10%|▉         | 96400/1000000 [4:33:38<25:47:21,  9.73it/s]

{'loss': Array(0.14128749, dtype=float32), 'loss_cross_entropy': Array(0.13375786, dtype=float32)}
{'loss_inverse': Array(2.0031798e-07, dtype=float32)}


 10%|▉         | 96410/1000000 [4:33:39<25:13:32,  9.95it/s]

{'loss': Array(0.11994759, dtype=float32), 'loss_cross_entropy': Array(0.11310825, dtype=float32)}
{'loss_inverse': Array(7.86859e-07, dtype=float32)}


 10%|▉         | 96420/1000000 [4:33:41<28:51:30,  8.70it/s]

{'loss': Array(0.1344691, dtype=float32), 'loss_cross_entropy': Array(0.12681095, dtype=float32)}
{'loss_inverse': Array(8.223764e-06, dtype=float32)}


 10%|▉         | 96430/1000000 [4:33:43<25:19:40,  9.91it/s]

{'loss': Array(0.15277092, dtype=float32), 'loss_cross_entropy': Array(0.1445031, dtype=float32)}
{'loss_inverse': Array(1.5698639e-05, dtype=float32)}


 10%|▉         | 96440/1000000 [4:33:44<30:26:51,  8.24it/s]

{'loss': Array(0.15947628, dtype=float32), 'loss_cross_entropy': Array(0.1512615, dtype=float32)}
{'loss_inverse': Array(8.140303e-06, dtype=float32)}


 10%|▉         | 96450/1000000 [4:33:46<25:59:53,  9.65it/s]

{'loss': Array(0.13381423, dtype=float32), 'loss_cross_entropy': Array(0.12612674, dtype=float32)}
{'loss_inverse': Array(5.1313812e-08, dtype=float32)}


 10%|▉         | 96460/1000000 [4:33:48<34:22:07,  7.30it/s]

{'loss': Array(0.14148895, dtype=float32), 'loss_cross_entropy': Array(0.13328646, dtype=float32)}
{'loss_inverse': Array(4.3577938e-07, dtype=float32)}


 10%|▉         | 96470/1000000 [4:33:49<27:08:26,  9.25it/s]

{'loss': Array(0.13437392, dtype=float32), 'loss_cross_entropy': Array(0.12638249, dtype=float32)}
{'loss_inverse': Array(4.262792e-05, dtype=float32)}


 10%|▉         | 96480/1000000 [4:33:51<25:16:40,  9.93it/s]

{'loss': Array(0.13379835, dtype=float32), 'loss_cross_entropy': Array(0.12602417, dtype=float32)}
{'loss_inverse': Array(3.5443898e-06, dtype=float32)}


 10%|▉         | 96490/1000000 [4:33:52<27:24:09,  9.16it/s]

{'loss': Array(0.14973985, dtype=float32), 'loss_cross_entropy': Array(0.1415116, dtype=float32)}
{'loss_inverse': Array(6.223049e-06, dtype=float32)}


 10%|▉         | 96500/1000000 [4:33:54<25:04:53, 10.01it/s]

{'loss': Array(0.13279645, dtype=float32), 'loss_cross_entropy': Array(0.12603684, dtype=float32)}
{'loss_inverse': Array(2.0450452e-05, dtype=float32)}


 10%|▉         | 96510/1000000 [4:34:02<93:02:44,  2.70it/s] 

{'loss': Array(0.14981249, dtype=float32), 'loss_cross_entropy': Array(0.14149065, dtype=float32)}
{'loss_inverse': Array(5.7698758e-06, dtype=float32)}


 10%|▉         | 96520/1000000 [4:34:04<36:08:28,  6.94it/s]

{'loss': Array(0.14035699, dtype=float32), 'loss_cross_entropy': Array(0.13293946, dtype=float32)}
{'loss_inverse': Array(8.42828e-07, dtype=float32)}


 10%|▉         | 96530/1000000 [4:34:05<26:45:32,  9.38it/s]

{'loss': Array(0.13949184, dtype=float32), 'loss_cross_entropy': Array(0.13244684, dtype=float32)}
{'loss_inverse': Array(0.00032696, dtype=float32)}


 10%|▉         | 96540/1000000 [4:34:07<29:00:11,  8.65it/s]

{'loss': Array(0.13123143, dtype=float32), 'loss_cross_entropy': Array(0.12343377, dtype=float32)}
{'loss_inverse': Array(2.8156535e-06, dtype=float32)}


 10%|▉         | 96550/1000000 [4:34:08<25:25:28,  9.87it/s]

{'loss': Array(0.1356714, dtype=float32), 'loss_cross_entropy': Array(0.1282857, dtype=float32)}
{'loss_inverse': Array(5.048741e-07, dtype=float32)}


 10%|▉         | 96560/1000000 [4:34:10<29:39:08,  8.46it/s]

{'loss': Array(0.14891186, dtype=float32), 'loss_cross_entropy': Array(0.14038019, dtype=float32)}
{'loss_inverse': Array(8.583647e-07, dtype=float32)}


 10%|▉         | 96570/1000000 [4:34:12<26:04:28,  9.62it/s]

{'loss': Array(0.14208856, dtype=float32), 'loss_cross_entropy': Array(0.13409455, dtype=float32)}
{'loss_inverse': Array(0.00034615, dtype=float32)}


 10%|▉         | 96580/1000000 [4:34:13<31:15:36,  8.03it/s]

{'loss': Array(0.12797792, dtype=float32), 'loss_cross_entropy': Array(0.12070701, dtype=float32)}
{'loss_inverse': Array(2.5703298e-07, dtype=float32)}


 10%|▉         | 96590/1000000 [4:34:15<25:44:05,  9.75it/s]

{'loss': Array(0.13380109, dtype=float32), 'loss_cross_entropy': Array(0.12678897, dtype=float32)}
{'loss_inverse': Array(8.484321e-07, dtype=float32)}


 10%|▉         | 96600/1000000 [4:34:16<24:56:39, 10.06it/s]

{'loss': Array(0.14036053, dtype=float32), 'loss_cross_entropy': Array(0.13276954, dtype=float32)}
{'loss_inverse': Array(6.177342e-05, dtype=float32)}


 10%|▉         | 96610/1000000 [4:34:18<29:21:23,  8.55it/s]

{'loss': Array(0.12672895, dtype=float32), 'loss_cross_entropy': Array(0.11933058, dtype=float32)}
{'loss_inverse': Array(0.00031041, dtype=float32)}


 10%|▉         | 96620/1000000 [4:34:19<26:30:43,  9.47it/s]

{'loss': Array(0.14319022, dtype=float32), 'loss_cross_entropy': Array(0.13533299, dtype=float32)}
{'loss_inverse': Array(3.524336e-05, dtype=float32)}


 10%|▉         | 96630/1000000 [4:34:21<34:39:59,  7.24it/s]

{'loss': Array(0.13121946, dtype=float32), 'loss_cross_entropy': Array(0.12349813, dtype=float32)}
{'loss_inverse': Array(3.5657567e-07, dtype=float32)}


 10%|▉         | 96640/1000000 [4:34:23<26:00:11,  9.65it/s]

{'loss': Array(0.15105884, dtype=float32), 'loss_cross_entropy': Array(0.14169332, dtype=float32)}
{'loss_inverse': Array(0.0001121, dtype=float32)}


 10%|▉         | 96650/1000000 [4:34:24<37:31:19,  6.69it/s]

{'loss': Array(0.13403885, dtype=float32), 'loss_cross_entropy': Array(0.126602, dtype=float32)}
{'loss_inverse': Array(2.7259304e-07, dtype=float32)}


 10%|▉         | 96660/1000000 [4:34:26<26:33:11,  9.45it/s]

{'loss': Array(0.1250139, dtype=float32), 'loss_cross_entropy': Array(0.11710699, dtype=float32)}
{'loss_inverse': Array(3.3720048e-07, dtype=float32)}


 10%|▉         | 96670/1000000 [4:34:27<37:05:03,  6.77it/s]

{'loss': Array(0.14238231, dtype=float32), 'loss_cross_entropy': Array(0.13511685, dtype=float32)}
{'loss_inverse': Array(0.00014569, dtype=float32)}


 10%|▉         | 96680/1000000 [4:34:29<27:14:43,  9.21it/s]

{'loss': Array(0.14568207, dtype=float32), 'loss_cross_entropy': Array(0.13783437, dtype=float32)}
{'loss_inverse': Array(1.5032869e-06, dtype=float32)}


 10%|▉         | 96690/1000000 [4:34:30<25:59:32,  9.65it/s]

{'loss': Array(0.12254429, dtype=float32), 'loss_cross_entropy': Array(0.11505914, dtype=float32)}
{'loss_inverse': Array(7.075675e-05, dtype=float32)}


 10%|▉         | 96700/1000000 [4:34:32<31:02:37,  8.08it/s]

{'loss': Array(0.14949854, dtype=float32), 'loss_cross_entropy': Array(0.1417139, dtype=float32)}
{'loss_inverse': Array(3.348703e-05, dtype=float32)}


 10%|▉         | 96710/1000000 [4:34:34<25:15:59,  9.93it/s]

{'loss': Array(0.14067028, dtype=float32), 'loss_cross_entropy': Array(0.1339798, dtype=float32)}
{'loss_inverse': Array(0.00034481, dtype=float32)}


 10%|▉         | 96720/1000000 [4:34:35<37:05:35,  6.76it/s]

{'loss': Array(0.15465744, dtype=float32), 'loss_cross_entropy': Array(0.14606875, dtype=float32)}
{'loss_inverse': Array(6.457907e-07, dtype=float32)}


 10%|▉         | 96730/1000000 [4:34:37<27:35:04,  9.10it/s]

{'loss': Array(0.14391266, dtype=float32), 'loss_cross_entropy': Array(0.13565299, dtype=float32)}
{'loss_inverse': Array(0.00013825, dtype=float32)}


 10%|▉         | 96740/1000000 [4:34:38<37:19:09,  6.72it/s]

{'loss': Array(0.15268894, dtype=float32), 'loss_cross_entropy': Array(0.14442454, dtype=float32)}
{'loss_inverse': Array(0.00016797, dtype=float32)}


 10%|▉         | 96750/1000000 [4:34:40<26:33:03,  9.45it/s]

{'loss': Array(0.15097581, dtype=float32), 'loss_cross_entropy': Array(0.1428368, dtype=float32)}
{'loss_inverse': Array(2.21546e-05, dtype=float32)}


 10%|▉         | 96760/1000000 [4:34:41<24:53:44, 10.08it/s]

{'loss': Array(0.1434415, dtype=float32), 'loss_cross_entropy': Array(0.13525948, dtype=float32)}
{'loss_inverse': Array(2.0450978e-05, dtype=float32)}


 10%|▉         | 96770/1000000 [4:34:43<33:42:31,  7.44it/s]

{'loss': Array(0.14289847, dtype=float32), 'loss_cross_entropy': Array(0.13526665, dtype=float32)}
{'loss_inverse': Array(5.428521e-05, dtype=float32)}


 10%|▉         | 96780/1000000 [4:34:44<25:48:57,  9.72it/s]

{'loss': Array(0.14928398, dtype=float32), 'loss_cross_entropy': Array(0.14092343, dtype=float32)}
{'loss_inverse': Array(2.4685161e-05, dtype=float32)}


 10%|▉         | 96790/1000000 [4:34:46<24:38:25, 10.18it/s]

{'loss': Array(0.12889348, dtype=float32), 'loss_cross_entropy': Array(0.1213273, dtype=float32)}
{'loss_inverse': Array(6.5793494e-05, dtype=float32)}


 10%|▉         | 96800/1000000 [4:34:48<27:28:54,  9.13it/s]

{'loss': Array(0.13963301, dtype=float32), 'loss_cross_entropy': Array(0.13171877, dtype=float32)}
{'loss_inverse': Array(7.9317906e-05, dtype=float32)}


 10%|▉         | 96810/1000000 [4:34:49<24:43:33, 10.15it/s]

{'loss': Array(0.13733564, dtype=float32), 'loss_cross_entropy': Array(0.12946896, dtype=float32)}
{'loss_inverse': Array(3.3519044e-08, dtype=float32)}


 10%|▉         | 96820/1000000 [4:34:51<29:06:54,  8.62it/s]

{'loss': Array(0.12276868, dtype=float32), 'loss_cross_entropy': Array(0.11620247, dtype=float32)}
{'loss_inverse': Array(5.866149e-07, dtype=float32)}


 10%|▉         | 96830/1000000 [4:34:52<25:20:36,  9.90it/s]

{'loss': Array(0.17105238, dtype=float32), 'loss_cross_entropy': Array(0.16206922, dtype=float32)}
{'loss_inverse': Array(8.5535606e-07, dtype=float32)}


 10%|▉         | 96840/1000000 [4:34:54<29:11:01,  8.60it/s]

{'loss': Array(0.1306173, dtype=float32), 'loss_cross_entropy': Array(0.12340292, dtype=float32)}
{'loss_inverse': Array(1.3848888e-05, dtype=float32)}


 10%|▉         | 96850/1000000 [4:34:55<24:50:21, 10.10it/s]

{'loss': Array(0.1537936, dtype=float32), 'loss_cross_entropy': Array(0.14585237, dtype=float32)}
{'loss_inverse': Array(4.233798e-06, dtype=float32)}


 10%|▉         | 96860/1000000 [4:34:57<28:48:27,  8.71it/s]

{'loss': Array(0.1457333, dtype=float32), 'loss_cross_entropy': Array(0.13772427, dtype=float32)}
{'loss_inverse': Array(4.051188e-07, dtype=float32)}


 10%|▉         | 96870/1000000 [4:34:58<25:01:08, 10.03it/s]

{'loss': Array(0.1383227, dtype=float32), 'loss_cross_entropy': Array(0.13046639, dtype=float32)}
{'loss_inverse': Array(1.198692e-07, dtype=float32)}


 10%|▉         | 96880/1000000 [4:35:00<33:07:24,  7.57it/s]

{'loss': Array(0.14314997, dtype=float32), 'loss_cross_entropy': Array(0.13544965, dtype=float32)}
{'loss_inverse': Array(9.0961294e-07, dtype=float32)}


 10%|▉         | 96890/1000000 [4:35:02<26:11:53,  9.58it/s]

{'loss': Array(0.14187005, dtype=float32), 'loss_cross_entropy': Array(0.13383247, dtype=float32)}
{'loss_inverse': Array(3.101331e-08, dtype=float32)}


 10%|▉         | 96900/1000000 [4:35:03<37:29:05,  6.69it/s]

{'loss': Array(0.13738748, dtype=float32), 'loss_cross_entropy': Array(0.12952691, dtype=float32)}
{'loss_inverse': Array(2.4678075e-06, dtype=float32)}


 10%|▉         | 96910/1000000 [4:35:05<27:01:34,  9.28it/s]

{'loss': Array(0.12511864, dtype=float32), 'loss_cross_entropy': Array(0.1180272, dtype=float32)}
{'loss_inverse': Array(3.2210562e-05, dtype=float32)}


 10%|▉         | 96920/1000000 [4:35:07<35:01:04,  7.16it/s]

{'loss': Array(0.1298805, dtype=float32), 'loss_cross_entropy': Array(0.12261432, dtype=float32)}
{'loss_inverse': Array(5.6697587e-07, dtype=float32)}


 10%|▉         | 96930/1000000 [4:35:08<27:14:26,  9.21it/s]

{'loss': Array(0.14376238, dtype=float32), 'loss_cross_entropy': Array(0.13563895, dtype=float32)}
{'loss_inverse': Array(4.1325686e-05, dtype=float32)}


 10%|▉         | 96940/1000000 [4:35:10<26:05:37,  9.61it/s]

{'loss': Array(0.13610052, dtype=float32), 'loss_cross_entropy': Array(0.12869619, dtype=float32)}
{'loss_inverse': Array(1.2326772e-05, dtype=float32)}


 10%|▉         | 96950/1000000 [4:35:11<29:57:30,  8.37it/s]

{'loss': Array(0.12231167, dtype=float32), 'loss_cross_entropy': Array(0.11515349, dtype=float32)}
{'loss_inverse': Array(2.6090137e-07, dtype=float32)}


 10%|▉         | 96960/1000000 [4:35:13<25:18:17,  9.91it/s]

{'loss': Array(0.13917609, dtype=float32), 'loss_cross_entropy': Array(0.13167326, dtype=float32)}
{'loss_inverse': Array(1.7774504e-05, dtype=float32)}


 10%|▉         | 96970/1000000 [4:35:15<32:01:34,  7.83it/s]

{'loss': Array(0.14307651, dtype=float32), 'loss_cross_entropy': Array(0.13489966, dtype=float32)}
{'loss_inverse': Array(1.2490783e-07, dtype=float32)}


 10%|▉         | 96980/1000000 [4:35:16<25:59:12,  9.65it/s]

{'loss': Array(0.13258223, dtype=float32), 'loss_cross_entropy': Array(0.12512155, dtype=float32)}
{'loss_inverse': Array(0.00010522, dtype=float32)}


 10%|▉         | 96990/1000000 [4:35:18<34:06:58,  7.35it/s]

{'loss': Array(0.14826296, dtype=float32), 'loss_cross_entropy': Array(0.13967535, dtype=float32)}
{'loss_inverse': Array(3.006968e-05, dtype=float32)}


 10%|▉         | 97000/1000000 [4:35:19<25:53:11,  9.69it/s]

{'loss': Array(0.15447521, dtype=float32), 'loss_cross_entropy': Array(0.14532533, dtype=float32)}
{'loss_inverse': Array(8.789609e-06, dtype=float32)}


 10%|▉         | 97010/1000000 [4:35:27<91:51:43,  2.73it/s] 

{'loss': Array(0.13997297, dtype=float32), 'loss_cross_entropy': Array(0.1320858, dtype=float32)}
{'loss_inverse': Array(3.0033104e-07, dtype=float32)}


 10%|▉         | 97020/1000000 [4:35:29<36:00:46,  6.96it/s]

{'loss': Array(0.14197583, dtype=float32), 'loss_cross_entropy': Array(0.13425964, dtype=float32)}
{'loss_inverse': Array(9.963829e-07, dtype=float32)}


 10%|▉         | 97030/1000000 [4:35:30<26:24:51,  9.50it/s]

{'loss': Array(0.13627748, dtype=float32), 'loss_cross_entropy': Array(0.12868443, dtype=float32)}
{'loss_inverse': Array(0.00014205, dtype=float32)}


 10%|▉         | 97040/1000000 [4:35:32<27:40:41,  9.06it/s]

{'loss': Array(0.1432049, dtype=float32), 'loss_cross_entropy': Array(0.1354909, dtype=float32)}
{'loss_inverse': Array(4.4068474e-05, dtype=float32)}


 10%|▉         | 97050/1000000 [4:35:33<24:47:22, 10.12it/s]

{'loss': Array(0.15805165, dtype=float32), 'loss_cross_entropy': Array(0.14954682, dtype=float32)}
{'loss_inverse': Array(0.00034505, dtype=float32)}


 10%|▉         | 97060/1000000 [4:35:35<34:25:26,  7.29it/s]

{'loss': Array(0.13199256, dtype=float32), 'loss_cross_entropy': Array(0.12424904, dtype=float32)}
{'loss_inverse': Array(2.8952349e-05, dtype=float32)}


 10%|▉         | 97070/1000000 [4:35:37<26:25:39,  9.49it/s]

{'loss': Array(0.14096639, dtype=float32), 'loss_cross_entropy': Array(0.13317464, dtype=float32)}
{'loss_inverse': Array(1.0958023e-06, dtype=float32)}


 10%|▉         | 97080/1000000 [4:35:38<37:09:54,  6.75it/s]

{'loss': Array(0.13196509, dtype=float32), 'loss_cross_entropy': Array(0.12402774, dtype=float32)}
{'loss_inverse': Array(1.03659495e-05, dtype=float32)}


 10%|▉         | 97090/1000000 [4:35:40<26:33:55,  9.44it/s]

{'loss': Array(0.14295764, dtype=float32), 'loss_cross_entropy': Array(0.1347815, dtype=float32)}
{'loss_inverse': Array(1.5716176e-05, dtype=float32)}


 10%|▉         | 97100/1000000 [4:35:41<24:43:40, 10.14it/s]

{'loss': Array(0.13935563, dtype=float32), 'loss_cross_entropy': Array(0.13164404, dtype=float32)}
{'loss_inverse': Array(5.1440173e-05, dtype=float32)}


 10%|▉         | 97110/1000000 [4:35:43<27:58:42,  8.96it/s]

{'loss': Array(0.15517662, dtype=float32), 'loss_cross_entropy': Array(0.14563642, dtype=float32)}
{'loss_inverse': Array(1.3648143e-07, dtype=float32)}


 10%|▉         | 97120/1000000 [4:35:45<37:50:09,  6.63it/s]

{'loss': Array(0.15511897, dtype=float32), 'loss_cross_entropy': Array(0.14725202, dtype=float32)}
{'loss_inverse': Array(2.059243e-07, dtype=float32)}


 10%|▉         | 97130/1000000 [4:35:46<26:58:08,  9.30it/s]

{'loss': Array(0.14845413, dtype=float32), 'loss_cross_entropy': Array(0.14048375, dtype=float32)}
{'loss_inverse': Array(0.00010583, dtype=float32)}


 10%|▉         | 97140/1000000 [4:35:48<25:08:14,  9.98it/s]

{'loss': Array(0.14330328, dtype=float32), 'loss_cross_entropy': Array(0.13559078, dtype=float32)}
{'loss_inverse': Array(5.121099e-05, dtype=float32)}


 10%|▉         | 97150/1000000 [4:35:49<28:17:12,  8.87it/s]

{'loss': Array(0.13589197, dtype=float32), 'loss_cross_entropy': Array(0.12861742, dtype=float32)}
{'loss_inverse': Array(6.57562e-05, dtype=float32)}


 10%|▉         | 97160/1000000 [4:35:51<26:03:59,  9.62it/s]

{'loss': Array(0.13289331, dtype=float32), 'loss_cross_entropy': Array(0.12534827, dtype=float32)}
{'loss_inverse': Array(1.3233158e-05, dtype=float32)}


 10%|▉         | 97170/1000000 [4:35:53<29:29:16,  8.50it/s]

{'loss': Array(0.12458766, dtype=float32), 'loss_cross_entropy': Array(0.11770438, dtype=float32)}
{'loss_inverse': Array(0.00068868, dtype=float32)}


 10%|▉         | 97180/1000000 [4:35:54<26:04:19,  9.62it/s]

{'loss': Array(0.13878872, dtype=float32), 'loss_cross_entropy': Array(0.13126056, dtype=float32)}
{'loss_inverse': Array(1.9907316e-06, dtype=float32)}


 10%|▉         | 97190/1000000 [4:35:56<31:10:10,  8.05it/s]

{'loss': Array(0.16077827, dtype=float32), 'loss_cross_entropy': Array(0.15226012, dtype=float32)}
{'loss_inverse': Array(0.00017256, dtype=float32)}


 10%|▉         | 97200/1000000 [4:35:57<25:27:55,  9.85it/s]

{'loss': Array(0.15227278, dtype=float32), 'loss_cross_entropy': Array(0.14436924, dtype=float32)}
{'loss_inverse': Array(0.0001295, dtype=float32)}


 10%|▉         | 97210/1000000 [4:35:59<25:06:22,  9.99it/s]

{'loss': Array(0.1466048, dtype=float32), 'loss_cross_entropy': Array(0.13853438, dtype=float32)}
{'loss_inverse': Array(4.1918986e-05, dtype=float32)}


 10%|▉         | 97220/1000000 [4:36:01<29:09:00,  8.60it/s]

{'loss': Array(0.15335554, dtype=float32), 'loss_cross_entropy': Array(0.14493772, dtype=float32)}
{'loss_inverse': Array(9.188853e-06, dtype=float32)}


 10%|▉         | 97230/1000000 [4:36:02<25:53:53,  9.68it/s]

{'loss': Array(0.14567482, dtype=float32), 'loss_cross_entropy': Array(0.1376969, dtype=float32)}
{'loss_inverse': Array(1.993865e-06, dtype=float32)}


 10%|▉         | 97240/1000000 [4:36:04<31:04:57,  8.07it/s]

{'loss': Array(0.1473386, dtype=float32), 'loss_cross_entropy': Array(0.13947324, dtype=float32)}
{'loss_inverse': Array(8.128877e-05, dtype=float32)}


 10%|▉         | 97250/1000000 [4:36:05<25:17:42,  9.91it/s]

{'loss': Array(0.13296728, dtype=float32), 'loss_cross_entropy': Array(0.12574382, dtype=float32)}
{'loss_inverse': Array(5.043495e-06, dtype=float32)}


 10%|▉         | 97260/1000000 [4:36:07<34:12:38,  7.33it/s]

{'loss': Array(0.14156458, dtype=float32), 'loss_cross_entropy': Array(0.13374881, dtype=float32)}
{'loss_inverse': Array(1.8374336e-05, dtype=float32)}


 10%|▉         | 97270/1000000 [4:36:08<26:10:35,  9.58it/s]

{'loss': Array(0.13177486, dtype=float32), 'loss_cross_entropy': Array(0.12461495, dtype=float32)}
{'loss_inverse': Array(1.756391e-06, dtype=float32)}


 10%|▉         | 97280/1000000 [4:36:10<25:47:23,  9.72it/s]

{'loss': Array(0.1409662, dtype=float32), 'loss_cross_entropy': Array(0.13354947, dtype=float32)}
{'loss_inverse': Array(5.475509e-05, dtype=float32)}


 10%|▉         | 97290/1000000 [4:36:12<28:45:18,  8.72it/s]

{'loss': Array(0.15465824, dtype=float32), 'loss_cross_entropy': Array(0.14592603, dtype=float32)}
{'loss_inverse': Array(1.3245127e-08, dtype=float32)}


 10%|▉         | 97300/1000000 [4:36:13<25:17:59,  9.91it/s]

{'loss': Array(0.14588106, dtype=float32), 'loss_cross_entropy': Array(0.13813028, dtype=float32)}
{'loss_inverse': Array(8.067176e-08, dtype=float32)}


 10%|▉         | 97310/1000000 [4:36:15<33:30:39,  7.48it/s]

{'loss': Array(0.1375529, dtype=float32), 'loss_cross_entropy': Array(0.12945494, dtype=float32)}
{'loss_inverse': Array(2.9096802e-06, dtype=float32)}


 10%|▉         | 97320/1000000 [4:36:16<25:45:24,  9.74it/s]

{'loss': Array(0.14560083, dtype=float32), 'loss_cross_entropy': Array(0.13777974, dtype=float32)}
{'loss_inverse': Array(1.3373847e-07, dtype=float32)}


 10%|▉         | 97330/1000000 [4:36:17<24:37:04, 10.19it/s]

{'loss': Array(0.13945147, dtype=float32), 'loss_cross_entropy': Array(0.13210294, dtype=float32)}
{'loss_inverse': Array(9.3900994e-08, dtype=float32)}


 10%|▉         | 97340/1000000 [4:36:19<27:09:36,  9.23it/s]

{'loss': Array(0.15226425, dtype=float32), 'loss_cross_entropy': Array(0.1437624, dtype=float32)}
{'loss_inverse': Array(0.00016099, dtype=float32)}


 10%|▉         | 97350/1000000 [4:36:21<25:26:26,  9.86it/s]

{'loss': Array(0.12347756, dtype=float32), 'loss_cross_entropy': Array(0.1160077, dtype=float32)}
{'loss_inverse': Array(6.0545583e-05, dtype=float32)}


 10%|▉         | 97360/1000000 [4:36:22<29:00:53,  8.64it/s]

{'loss': Array(0.13264476, dtype=float32), 'loss_cross_entropy': Array(0.12501946, dtype=float32)}
{'loss_inverse': Array(2.8933966e-06, dtype=float32)}


 10%|▉         | 97370/1000000 [4:36:24<26:03:01,  9.62it/s]

{'loss': Array(0.11525477, dtype=float32), 'loss_cross_entropy': Array(0.10837816, dtype=float32)}
{'loss_inverse': Array(8.063392e-05, dtype=float32)}


 10%|▉         | 97380/1000000 [4:36:26<31:46:02,  7.89it/s]

{'loss': Array(0.12998407, dtype=float32), 'loss_cross_entropy': Array(0.12243654, dtype=float32)}
{'loss_inverse': Array(1.8217803e-07, dtype=float32)}


 10%|▉         | 97390/1000000 [4:36:27<25:53:37,  9.68it/s]

{'loss': Array(0.1277607, dtype=float32), 'loss_cross_entropy': Array(0.12097601, dtype=float32)}
{'loss_inverse': Array(3.6389306e-06, dtype=float32)}


 10%|▉         | 97400/1000000 [4:36:29<25:18:18,  9.91it/s]

{'loss': Array(0.11884826, dtype=float32), 'loss_cross_entropy': Array(0.11179838, dtype=float32)}
{'loss_inverse': Array(0.00012427, dtype=float32)}


 10%|▉         | 97410/1000000 [4:36:30<28:55:23,  8.67it/s]

{'loss': Array(0.12565902, dtype=float32), 'loss_cross_entropy': Array(0.11832422, dtype=float32)}
{'loss_inverse': Array(0.00031903, dtype=float32)}


 10%|▉         | 97420/1000000 [4:36:32<25:18:38,  9.91it/s]

{'loss': Array(0.13439047, dtype=float32), 'loss_cross_entropy': Array(0.1265451, dtype=float32)}
{'loss_inverse': Array(6.921522e-05, dtype=float32)}


 10%|▉         | 97430/1000000 [4:36:33<32:40:49,  7.67it/s]

{'loss': Array(0.15508017, dtype=float32), 'loss_cross_entropy': Array(0.14634767, dtype=float32)}
{'loss_inverse': Array(0.00022249, dtype=float32)}


 10%|▉         | 97440/1000000 [4:36:35<25:59:15,  9.65it/s]

{'loss': Array(0.15209079, dtype=float32), 'loss_cross_entropy': Array(0.14496076, dtype=float32)}
{'loss_inverse': Array(2.5281137e-08, dtype=float32)}


 10%|▉         | 97450/1000000 [4:36:37<37:45:46,  6.64it/s]

{'loss': Array(0.13949166, dtype=float32), 'loss_cross_entropy': Array(0.13168466, dtype=float32)}
{'loss_inverse': Array(3.2630396e-05, dtype=float32)}


 10%|▉         | 97460/1000000 [4:36:38<26:27:26,  9.48it/s]

{'loss': Array(0.13710864, dtype=float32), 'loss_cross_entropy': Array(0.12908912, dtype=float32)}
{'loss_inverse': Array(4.8298716e-07, dtype=float32)}


 10%|▉         | 97470/1000000 [4:36:40<37:00:58,  6.77it/s]

{'loss': Array(0.1327178, dtype=float32), 'loss_cross_entropy': Array(0.12502329, dtype=float32)}
{'loss_inverse': Array(2.1208436e-05, dtype=float32)}


 10%|▉         | 97480/1000000 [4:36:41<26:30:08,  9.46it/s]

{'loss': Array(0.14374076, dtype=float32), 'loss_cross_entropy': Array(0.13610755, dtype=float32)}
{'loss_inverse': Array(0.00035655, dtype=float32)}


 10%|▉         | 97490/1000000 [4:36:43<24:40:12, 10.16it/s]

{'loss': Array(0.13862763, dtype=float32), 'loss_cross_entropy': Array(0.13126957, dtype=float32)}
{'loss_inverse': Array(1.2881482e-06, dtype=float32)}


 10%|▉         | 97500/1000000 [4:36:44<30:47:17,  8.14it/s]

{'loss': Array(0.15000843, dtype=float32), 'loss_cross_entropy': Array(0.14172769, dtype=float32)}
{'loss_inverse': Array(0.00031722, dtype=float32)}


 10%|▉         | 97510/1000000 [4:36:52<84:09:17,  2.98it/s] 

{'loss': Array(0.13783662, dtype=float32), 'loss_cross_entropy': Array(0.13067845, dtype=float32)}
{'loss_inverse': Array(7.43835e-08, dtype=float32)}


 10%|▉         | 97520/1000000 [4:36:54<34:53:41,  7.18it/s]

{'loss': Array(0.12497637, dtype=float32), 'loss_cross_entropy': Array(0.11799834, dtype=float32)}
{'loss_inverse': Array(1.104383e-05, dtype=float32)}


 10%|▉         | 97530/1000000 [4:36:55<29:58:22,  8.36it/s]

{'loss': Array(0.13132848, dtype=float32), 'loss_cross_entropy': Array(0.12400845, dtype=float32)}
{'loss_inverse': Array(7.797916e-05, dtype=float32)}


 10%|▉         | 97540/1000000 [4:36:57<25:01:27, 10.02it/s]

{'loss': Array(0.15961325, dtype=float32), 'loss_cross_entropy': Array(0.15102687, dtype=float32)}
{'loss_inverse': Array(1.9044444e-05, dtype=float32)}


 10%|▉         | 97550/1000000 [4:36:58<30:30:56,  8.21it/s]

{'loss': Array(0.143636, dtype=float32), 'loss_cross_entropy': Array(0.13567787, dtype=float32)}
{'loss_inverse': Array(5.911346e-05, dtype=float32)}


 10%|▉         | 97560/1000000 [4:37:00<25:40:14,  9.77it/s]

{'loss': Array(0.1393111, dtype=float32), 'loss_cross_entropy': Array(0.13222182, dtype=float32)}
{'loss_inverse': Array(1.7498174e-06, dtype=float32)}


 10%|▉         | 97570/1000000 [4:37:02<33:14:31,  7.54it/s]

{'loss': Array(0.1572089, dtype=float32), 'loss_cross_entropy': Array(0.14868939, dtype=float32)}
{'loss_inverse': Array(2.458318e-05, dtype=float32)}


 10%|▉         | 97580/1000000 [4:37:03<25:36:24,  9.79it/s]

{'loss': Array(0.12345096, dtype=float32), 'loss_cross_entropy': Array(0.11564417, dtype=float32)}
{'loss_inverse': Array(9.647963e-06, dtype=float32)}


 10%|▉         | 97590/1000000 [4:37:04<24:49:51, 10.10it/s]

{'loss': Array(0.1508649, dtype=float32), 'loss_cross_entropy': Array(0.14294505, dtype=float32)}
{'loss_inverse': Array(7.502441e-07, dtype=float32)}


 10%|▉         | 97600/1000000 [4:37:06<29:41:44,  8.44it/s]

{'loss': Array(0.14776795, dtype=float32), 'loss_cross_entropy': Array(0.13977042, dtype=float32)}
{'loss_inverse': Array(0.0002495, dtype=float32)}


 10%|▉         | 97610/1000000 [4:37:08<26:18:13,  9.53it/s]

{'loss': Array(0.134302, dtype=float32), 'loss_cross_entropy': Array(0.12692195, dtype=float32)}
{'loss_inverse': Array(0.00023749, dtype=float32)}


 10%|▉         | 97620/1000000 [4:37:10<33:56:07,  7.39it/s]

{'loss': Array(0.14956306, dtype=float32), 'loss_cross_entropy': Array(0.14122091, dtype=float32)}
{'loss_inverse': Array(0.00025881, dtype=float32)}


 10%|▉         | 97630/1000000 [4:37:11<26:20:07,  9.52it/s]

{'loss': Array(0.13327882, dtype=float32), 'loss_cross_entropy': Array(0.12533836, dtype=float32)}
{'loss_inverse': Array(0.00016935, dtype=float32)}


 10%|▉         | 97640/1000000 [4:37:13<37:10:01,  6.74it/s]

{'loss': Array(0.13660462, dtype=float32), 'loss_cross_entropy': Array(0.12926029, dtype=float32)}
{'loss_inverse': Array(0.00024934, dtype=float32)}


 10%|▉         | 97650/1000000 [4:37:14<26:29:29,  9.46it/s]

{'loss': Array(0.15051417, dtype=float32), 'loss_cross_entropy': Array(0.14216837, dtype=float32)}
{'loss_inverse': Array(6.013274e-07, dtype=float32)}


 10%|▉         | 97660/1000000 [4:37:15<24:58:38, 10.04it/s]

{'loss': Array(0.14945811, dtype=float32), 'loss_cross_entropy': Array(0.14078504, dtype=float32)}
{'loss_inverse': Array(6.322649e-05, dtype=float32)}


 10%|▉         | 97670/1000000 [4:37:17<28:10:47,  8.89it/s]

{'loss': Array(0.1310765, dtype=float32), 'loss_cross_entropy': Array(0.12322233, dtype=float32)}
{'loss_inverse': Array(2.0352134e-07, dtype=float32)}


 10%|▉         | 97680/1000000 [4:37:19<25:11:41,  9.95it/s]

{'loss': Array(0.14096415, dtype=float32), 'loss_cross_entropy': Array(0.13318388, dtype=float32)}
{'loss_inverse': Array(2.3954337e-06, dtype=float32)}


 10%|▉         | 97690/1000000 [4:37:20<29:14:19,  8.57it/s]

{'loss': Array(0.14683023, dtype=float32), 'loss_cross_entropy': Array(0.13910261, dtype=float32)}
{'loss_inverse': Array(0.00059016, dtype=float32)}


 10%|▉         | 97700/1000000 [4:37:22<25:28:09,  9.84it/s]

{'loss': Array(0.13011084, dtype=float32), 'loss_cross_entropy': Array(0.12270401, dtype=float32)}
{'loss_inverse': Array(0.00055302, dtype=float32)}


 10%|▉         | 97710/1000000 [4:37:24<31:33:41,  7.94it/s]

{'loss': Array(0.12984174, dtype=float32), 'loss_cross_entropy': Array(0.12264942, dtype=float32)}
{'loss_inverse': Array(6.606377e-05, dtype=float32)}


 10%|▉         | 97720/1000000 [4:37:25<26:15:51,  9.54it/s]

{'loss': Array(0.14134227, dtype=float32), 'loss_cross_entropy': Array(0.13384053, dtype=float32)}
{'loss_inverse': Array(0.00024185, dtype=float32)}


 10%|▉         | 97730/1000000 [4:37:27<34:06:40,  7.35it/s]

{'loss': Array(0.14427058, dtype=float32), 'loss_cross_entropy': Array(0.13641326, dtype=float32)}
{'loss_inverse': Array(9.477726e-05, dtype=float32)}


 10%|▉         | 97740/1000000 [4:37:28<26:08:03,  9.59it/s]

{'loss': Array(0.14420532, dtype=float32), 'loss_cross_entropy': Array(0.13661359, dtype=float32)}
{'loss_inverse': Array(2.4903069e-05, dtype=float32)}


 10%|▉         | 97750/1000000 [4:37:30<34:01:47,  7.36it/s]

{'loss': Array(0.12985915, dtype=float32), 'loss_cross_entropy': Array(0.12268225, dtype=float32)}
{'loss_inverse': Array(7.1637273e-06, dtype=float32)}


 10%|▉         | 97760/1000000 [4:37:32<25:43:20,  9.74it/s]

{'loss': Array(0.15392056, dtype=float32), 'loss_cross_entropy': Array(0.14520378, dtype=float32)}
{'loss_inverse': Array(0.00053916, dtype=float32)}


 10%|▉         | 97770/1000000 [4:37:33<24:24:01, 10.27it/s]

{'loss': Array(0.14689107, dtype=float32), 'loss_cross_entropy': Array(0.13833855, dtype=float32)}
{'loss_inverse': Array(2.7717965e-06, dtype=float32)}


 10%|▉         | 97780/1000000 [4:37:35<30:54:18,  8.11it/s]

{'loss': Array(0.15057884, dtype=float32), 'loss_cross_entropy': Array(0.14290576, dtype=float32)}
{'loss_inverse': Array(0.00016703, dtype=float32)}


 10%|▉         | 97790/1000000 [4:37:36<26:23:06,  9.50it/s]

{'loss': Array(0.12953906, dtype=float32), 'loss_cross_entropy': Array(0.12182994, dtype=float32)}
{'loss_inverse': Array(6.303586e-05, dtype=float32)}


 10%|▉         | 97800/1000000 [4:37:38<36:53:31,  6.79it/s]

{'loss': Array(0.15031274, dtype=float32), 'loss_cross_entropy': Array(0.14271222, dtype=float32)}
{'loss_inverse': Array(0.0002278, dtype=float32)}


 10%|▉         | 97810/1000000 [4:37:39<26:17:03,  9.53it/s]

{'loss': Array(0.14701174, dtype=float32), 'loss_cross_entropy': Array(0.13902603, dtype=float32)}
{'loss_inverse': Array(5.817843e-07, dtype=float32)}


 10%|▉         | 97820/1000000 [4:37:41<25:31:45,  9.82it/s]

{'loss': Array(0.1305096, dtype=float32), 'loss_cross_entropy': Array(0.12321544, dtype=float32)}
{'loss_inverse': Array(0.0002988, dtype=float32)}


 10%|▉         | 97830/1000000 [4:37:42<27:23:46,  9.15it/s]

{'loss': Array(0.14444698, dtype=float32), 'loss_cross_entropy': Array(0.13679527, dtype=float32)}
{'loss_inverse': Array(1.8239342e-06, dtype=float32)}


 10%|▉         | 97840/1000000 [4:37:44<24:53:53, 10.06it/s]

{'loss': Array(0.14156699, dtype=float32), 'loss_cross_entropy': Array(0.13419467, dtype=float32)}
{'loss_inverse': Array(0.00025603, dtype=float32)}


 10%|▉         | 97850/1000000 [4:37:46<27:27:17,  9.13it/s]

{'loss': Array(0.14584576, dtype=float32), 'loss_cross_entropy': Array(0.13802114, dtype=float32)}
{'loss_inverse': Array(1.1630747e-06, dtype=float32)}


 10%|▉         | 97860/1000000 [4:37:47<25:32:02,  9.81it/s]

{'loss': Array(0.14329518, dtype=float32), 'loss_cross_entropy': Array(0.13565372, dtype=float32)}
{'loss_inverse': Array(2.578059e-05, dtype=float32)}


 10%|▉         | 97870/1000000 [4:37:49<30:52:55,  8.11it/s]

{'loss': Array(0.13339701, dtype=float32), 'loss_cross_entropy': Array(0.12605329, dtype=float32)}
{'loss_inverse': Array(4.249377e-05, dtype=float32)}


 10%|▉         | 97880/1000000 [4:37:50<25:27:22,  9.84it/s]

{'loss': Array(0.14058666, dtype=float32), 'loss_cross_entropy': Array(0.13359934, dtype=float32)}
{'loss_inverse': Array(0.00027196, dtype=float32)}


 10%|▉         | 97890/1000000 [4:37:52<31:11:04,  8.04it/s]

{'loss': Array(0.13975339, dtype=float32), 'loss_cross_entropy': Array(0.13215907, dtype=float32)}
{'loss_inverse': Array(1.0019618e-05, dtype=float32)}


 10%|▉         | 97900/1000000 [4:37:53<26:19:01,  9.52it/s]

{'loss': Array(0.14609969, dtype=float32), 'loss_cross_entropy': Array(0.1385683, dtype=float32)}
{'loss_inverse': Array(2.1862928e-05, dtype=float32)}


 10%|▉         | 97910/1000000 [4:37:55<37:42:05,  6.65it/s]

{'loss': Array(0.13573138, dtype=float32), 'loss_cross_entropy': Array(0.12757455, dtype=float32)}
{'loss_inverse': Array(2.652015e-05, dtype=float32)}


 10%|▉         | 97920/1000000 [4:37:57<26:42:54,  9.38it/s]

{'loss': Array(0.15791942, dtype=float32), 'loss_cross_entropy': Array(0.14923835, dtype=float32)}
{'loss_inverse': Array(0.00025872, dtype=float32)}


 10%|▉         | 97930/1000000 [4:37:58<24:58:02, 10.04it/s]

{'loss': Array(0.1274814, dtype=float32), 'loss_cross_entropy': Array(0.11999025, dtype=float32)}
{'loss_inverse': Array(4.361099e-08, dtype=float32)}


 10%|▉         | 97940/1000000 [4:38:00<28:09:57,  8.90it/s]

{'loss': Array(0.13343358, dtype=float32), 'loss_cross_entropy': Array(0.12592934, dtype=float32)}
{'loss_inverse': Array(0.0004326, dtype=float32)}


 10%|▉         | 97950/1000000 [4:38:01<24:51:47, 10.08it/s]

{'loss': Array(0.12893571, dtype=float32), 'loss_cross_entropy': Array(0.1213422, dtype=float32)}
{'loss_inverse': Array(2.2706216e-05, dtype=float32)}


 10%|▉         | 97960/1000000 [4:38:03<30:34:50,  8.19it/s]

{'loss': Array(0.13114691, dtype=float32), 'loss_cross_entropy': Array(0.12363767, dtype=float32)}
{'loss_inverse': Array(5.6776014e-05, dtype=float32)}


 10%|▉         | 97970/1000000 [4:38:04<25:08:08,  9.97it/s]

{'loss': Array(0.15572187, dtype=float32), 'loss_cross_entropy': Array(0.14696062, dtype=float32)}
{'loss_inverse': Array(0.00029114, dtype=float32)}


 10%|▉         | 97980/1000000 [4:38:06<25:19:19,  9.89it/s]

{'loss': Array(0.1604876, dtype=float32), 'loss_cross_entropy': Array(0.15165295, dtype=float32)}
{'loss_inverse': Array(4.9383118e-05, dtype=float32)}


 10%|▉         | 97990/1000000 [4:38:08<30:07:13,  8.32it/s]

{'loss': Array(0.12893656, dtype=float32), 'loss_cross_entropy': Array(0.12198641, dtype=float32)}
{'loss_inverse': Array(6.4631575e-05, dtype=float32)}


 10%|▉         | 98000/1000000 [4:38:09<26:27:29,  9.47it/s]

{'loss': Array(0.16636173, dtype=float32), 'loss_cross_entropy': Array(0.15794502, dtype=float32)}
{'loss_inverse': Array(0.00012355, dtype=float32)}


 10%|▉         | 98010/1000000 [4:38:18<93:41:19,  2.67it/s] 

{'loss': Array(0.11663147, dtype=float32), 'loss_cross_entropy': Array(0.11052066, dtype=float32)}
{'loss_inverse': Array(2.23578e-06, dtype=float32)}


 10%|▉         | 98020/1000000 [4:38:19<36:38:01,  6.84it/s] 

{'loss': Array(0.13334322, dtype=float32), 'loss_cross_entropy': Array(0.1259058, dtype=float32)}
{'loss_inverse': Array(0.00029669, dtype=float32)}


 10%|▉         | 98030/1000000 [4:38:21<39:12:45,  6.39it/s]

{'loss': Array(0.13028364, dtype=float32), 'loss_cross_entropy': Array(0.12310313, dtype=float32)}
{'loss_inverse': Array(0.00014862, dtype=float32)}


 10%|▉         | 98040/1000000 [4:38:22<26:54:05,  9.31it/s]

{'loss': Array(0.15704548, dtype=float32), 'loss_cross_entropy': Array(0.14930545, dtype=float32)}
{'loss_inverse': Array(2.8626016e-07, dtype=float32)}


 10%|▉         | 98050/1000000 [4:38:24<37:16:38,  6.72it/s]

{'loss': Array(0.15020224, dtype=float32), 'loss_cross_entropy': Array(0.14185528, dtype=float32)}
{'loss_inverse': Array(2.0818728e-05, dtype=float32)}


 10%|▉         | 98060/1000000 [4:38:25<26:33:10,  9.44it/s]

{'loss': Array(0.15529655, dtype=float32), 'loss_cross_entropy': Array(0.14716403, dtype=float32)}
{'loss_inverse': Array(4.0352984e-06, dtype=float32)}


 10%|▉         | 98070/1000000 [4:38:27<24:57:20, 10.04it/s]

{'loss': Array(0.12702915, dtype=float32), 'loss_cross_entropy': Array(0.11920238, dtype=float32)}
{'loss_inverse': Array(3.9752274e-05, dtype=float32)}


 10%|▉         | 98080/1000000 [4:38:29<29:10:52,  8.59it/s]

{'loss': Array(0.14505427, dtype=float32), 'loss_cross_entropy': Array(0.13676533, dtype=float32)}
{'loss_inverse': Array(2.5637637e-05, dtype=float32)}


 10%|▉         | 98090/1000000 [4:38:30<25:47:38,  9.71it/s]

{'loss': Array(0.12640464, dtype=float32), 'loss_cross_entropy': Array(0.11962903, dtype=float32)}
{'loss_inverse': Array(2.7350536e-06, dtype=float32)}


 10%|▉         | 98100/1000000 [4:38:32<38:01:17,  6.59it/s]

{'loss': Array(0.15414172, dtype=float32), 'loss_cross_entropy': Array(0.14578158, dtype=float32)}
{'loss_inverse': Array(8.295254e-07, dtype=float32)}


 10%|▉         | 98110/1000000 [4:38:33<27:34:52,  9.08it/s]

{'loss': Array(0.14110653, dtype=float32), 'loss_cross_entropy': Array(0.13321619, dtype=float32)}
{'loss_inverse': Array(7.138891e-05, dtype=float32)}


 10%|▉         | 98120/1000000 [4:38:35<24:56:44, 10.04it/s]

{'loss': Array(0.12953462, dtype=float32), 'loss_cross_entropy': Array(0.12183966, dtype=float32)}
{'loss_inverse': Array(2.3012574e-06, dtype=float32)}


 10%|▉         | 98130/1000000 [4:38:36<29:24:33,  8.52it/s]

{'loss': Array(0.13123807, dtype=float32), 'loss_cross_entropy': Array(0.12425219, dtype=float32)}
{'loss_inverse': Array(3.911014e-08, dtype=float32)}


 10%|▉         | 98140/1000000 [4:38:38<26:15:08,  9.54it/s]

{'loss': Array(0.13125555, dtype=float32), 'loss_cross_entropy': Array(0.12427489, dtype=float32)}
{'loss_inverse': Array(3.986157e-05, dtype=float32)}


 10%|▉         | 98150/1000000 [4:38:40<29:14:57,  8.56it/s]

{'loss': Array(0.1570667, dtype=float32), 'loss_cross_entropy': Array(0.14858833, dtype=float32)}
{'loss_inverse': Array(2.3247565e-06, dtype=float32)}


 10%|▉         | 98160/1000000 [4:38:41<25:14:53,  9.92it/s]

{'loss': Array(0.14837766, dtype=float32), 'loss_cross_entropy': Array(0.14033617, dtype=float32)}
{'loss_inverse': Array(0.00015935, dtype=float32)}


 10%|▉         | 98170/1000000 [4:38:43<30:40:25,  8.17it/s]

{'loss': Array(0.14019181, dtype=float32), 'loss_cross_entropy': Array(0.13243717, dtype=float32)}
{'loss_inverse': Array(2.7335735e-05, dtype=float32)}


 10%|▉         | 98180/1000000 [4:38:44<25:08:42,  9.96it/s]

{'loss': Array(0.1281606, dtype=float32), 'loss_cross_entropy': Array(0.12090283, dtype=float32)}
{'loss_inverse': Array(3.2367338e-05, dtype=float32)}


 10%|▉         | 98190/1000000 [4:38:46<24:53:40, 10.06it/s]

{'loss': Array(0.12731485, dtype=float32), 'loss_cross_entropy': Array(0.11964058, dtype=float32)}
{'loss_inverse': Array(1.28333895e-05, dtype=float32)}


 10%|▉         | 98200/1000000 [4:38:47<27:04:09,  9.25it/s]

{'loss': Array(0.16204694, dtype=float32), 'loss_cross_entropy': Array(0.15330301, dtype=float32)}
{'loss_inverse': Array(1.0284028e-06, dtype=float32)}


 10%|▉         | 98210/1000000 [4:38:49<25:11:18,  9.94it/s]

{'loss': Array(0.13804567, dtype=float32), 'loss_cross_entropy': Array(0.13068512, dtype=float32)}
{'loss_inverse': Array(2.2004495e-05, dtype=float32)}


 10%|▉         | 98220/1000000 [4:38:51<28:56:21,  8.66it/s]

{'loss': Array(0.14109813, dtype=float32), 'loss_cross_entropy': Array(0.13326018, dtype=float32)}
{'loss_inverse': Array(2.146166e-07, dtype=float32)}


 10%|▉         | 98230/1000000 [4:38:52<25:15:14,  9.92it/s]

{'loss': Array(0.12797558, dtype=float32), 'loss_cross_entropy': Array(0.1207059, dtype=float32)}
{'loss_inverse': Array(6.6080574e-06, dtype=float32)}


 10%|▉         | 98240/1000000 [4:38:54<30:45:14,  8.14it/s]

{'loss': Array(0.14253731, dtype=float32), 'loss_cross_entropy': Array(0.1350411, dtype=float32)}
{'loss_inverse': Array(1.5908627e-05, dtype=float32)}


 10%|▉         | 98250/1000000 [4:38:55<25:46:01,  9.72it/s]

{'loss': Array(0.13749461, dtype=float32), 'loss_cross_entropy': Array(0.1302231, dtype=float32)}
{'loss_inverse': Array(1.8436223e-05, dtype=float32)}


 10%|▉         | 98260/1000000 [4:38:57<24:30:35, 10.22it/s]

{'loss': Array(0.14992481, dtype=float32), 'loss_cross_entropy': Array(0.14193764, dtype=float32)}
{'loss_inverse': Array(8.714335e-05, dtype=float32)}


 10%|▉         | 98270/1000000 [4:38:58<29:23:08,  8.52it/s]

{'loss': Array(0.14585793, dtype=float32), 'loss_cross_entropy': Array(0.13777283, dtype=float32)}
{'loss_inverse': Array(1.791503e-07, dtype=float32)}


 10%|▉         | 98280/1000000 [4:39:00<25:10:10,  9.95it/s]

{'loss': Array(0.14307015, dtype=float32), 'loss_cross_entropy': Array(0.13500886, dtype=float32)}
{'loss_inverse': Array(8.8357055e-08, dtype=float32)}


 10%|▉         | 98290/1000000 [4:39:02<31:14:37,  8.02it/s]

{'loss': Array(0.14933984, dtype=float32), 'loss_cross_entropy': Array(0.14095129, dtype=float32)}
{'loss_inverse': Array(3.2979102e-08, dtype=float32)}


 10%|▉         | 98300/1000000 [4:39:03<25:26:09,  9.85it/s]

{'loss': Array(0.14700735, dtype=float32), 'loss_cross_entropy': Array(0.13886184, dtype=float32)}
{'loss_inverse': Array(7.21182e-08, dtype=float32)}


 10%|▉         | 98310/1000000 [4:39:05<33:54:39,  7.39it/s]

{'loss': Array(0.13853869, dtype=float32), 'loss_cross_entropy': Array(0.13133721, dtype=float32)}
{'loss_inverse': Array(1.3231198e-05, dtype=float32)}


 10%|▉         | 98320/1000000 [4:39:06<27:09:16,  9.22it/s]

{'loss': Array(0.12636775, dtype=float32), 'loss_cross_entropy': Array(0.11894839, dtype=float32)}
{'loss_inverse': Array(5.461256e-05, dtype=float32)}


 10%|▉         | 98330/1000000 [4:39:08<25:49:52,  9.70it/s]

{'loss': Array(0.1641148, dtype=float32), 'loss_cross_entropy': Array(0.156066, dtype=float32)}
{'loss_inverse': Array(1.0112005e-08, dtype=float32)}


 10%|▉         | 98340/1000000 [4:39:09<28:39:46,  8.74it/s]

{'loss': Array(0.14517829, dtype=float32), 'loss_cross_entropy': Array(0.13762544, dtype=float32)}
{'loss_inverse': Array(1.5814148e-06, dtype=float32)}


 10%|▉         | 98350/1000000 [4:39:11<25:58:27,  9.64it/s]

{'loss': Array(0.12836313, dtype=float32), 'loss_cross_entropy': Array(0.12111433, dtype=float32)}
{'loss_inverse': Array(1.48571e-06, dtype=float32)}


 10%|▉         | 98360/1000000 [4:39:13<34:06:37,  7.34it/s]

{'loss': Array(0.15201694, dtype=float32), 'loss_cross_entropy': Array(0.14367226, dtype=float32)}
{'loss_inverse': Array(4.4031914e-08, dtype=float32)}


 10%|▉         | 98370/1000000 [4:39:14<26:33:50,  9.43it/s]

{'loss': Array(0.14565116, dtype=float32), 'loss_cross_entropy': Array(0.13833155, dtype=float32)}
{'loss_inverse': Array(7.412285e-06, dtype=float32)}


 10%|▉         | 98380/1000000 [4:39:16<24:28:19, 10.23it/s]

{'loss': Array(0.1472644, dtype=float32), 'loss_cross_entropy': Array(0.138605, dtype=float32)}
{'loss_inverse': Array(2.166123e-05, dtype=float32)}


 10%|▉         | 98390/1000000 [4:39:17<27:47:51,  9.01it/s]

{'loss': Array(0.15312591, dtype=float32), 'loss_cross_entropy': Array(0.14507365, dtype=float32)}
{'loss_inverse': Array(9.883739e-09, dtype=float32)}


 10%|▉         | 98400/1000000 [4:39:19<25:37:49,  9.77it/s]

{'loss': Array(0.14865397, dtype=float32), 'loss_cross_entropy': Array(0.13995118, dtype=float32)}
{'loss_inverse': Array(2.327886e-08, dtype=float32)}


 10%|▉         | 98410/1000000 [4:39:21<28:33:52,  8.77it/s]

{'loss': Array(0.13479953, dtype=float32), 'loss_cross_entropy': Array(0.12766968, dtype=float32)}
{'loss_inverse': Array(7.3198455e-05, dtype=float32)}


 10%|▉         | 98420/1000000 [4:39:22<24:54:50, 10.05it/s]

{'loss': Array(0.14887331, dtype=float32), 'loss_cross_entropy': Array(0.14031652, dtype=float32)}
{'loss_inverse': Array(9.812809e-06, dtype=float32)}


 10%|▉         | 98430/1000000 [4:39:24<31:07:08,  8.05it/s]

{'loss': Array(0.15979199, dtype=float32), 'loss_cross_entropy': Array(0.15117435, dtype=float32)}
{'loss_inverse': Array(0.0004094, dtype=float32)}


 10%|▉         | 98440/1000000 [4:39:25<25:41:33,  9.75it/s]

{'loss': Array(0.13974719, dtype=float32), 'loss_cross_entropy': Array(0.13171466, dtype=float32)}
{'loss_inverse': Array(7.338858e-06, dtype=float32)}


 10%|▉         | 98450/1000000 [4:39:26<24:14:04, 10.33it/s]

{'loss': Array(0.14542522, dtype=float32), 'loss_cross_entropy': Array(0.13739978, dtype=float32)}
{'loss_inverse': Array(6.6319876e-06, dtype=float32)}


 10%|▉         | 98460/1000000 [4:39:28<28:41:20,  8.73it/s]

{'loss': Array(0.1258717, dtype=float32), 'loss_cross_entropy': Array(0.11872496, dtype=float32)}
{'loss_inverse': Array(4.6672858e-07, dtype=float32)}


 10%|▉         | 98470/1000000 [4:39:30<25:37:12,  9.77it/s]

{'loss': Array(0.14126158, dtype=float32), 'loss_cross_entropy': Array(0.13291971, dtype=float32)}
{'loss_inverse': Array(1.5863446e-07, dtype=float32)}


 10%|▉         | 98480/1000000 [4:39:31<34:28:46,  7.26it/s]

{'loss': Array(0.14023018, dtype=float32), 'loss_cross_entropy': Array(0.13232385, dtype=float32)}
{'loss_inverse': Array(2.006188e-06, dtype=float32)}


 10%|▉         | 98490/1000000 [4:39:33<25:47:59,  9.71it/s]

{'loss': Array(0.12891196, dtype=float32), 'loss_cross_entropy': Array(0.12181169, dtype=float32)}
{'loss_inverse': Array(1.1929806e-07, dtype=float32)}


 10%|▉         | 98500/1000000 [4:39:35<36:38:27,  6.83it/s]

{'loss': Array(0.13738996, dtype=float32), 'loss_cross_entropy': Array(0.12996264, dtype=float32)}
{'loss_inverse': Array(5.7335916e-07, dtype=float32)}


 10%|▉         | 98510/1000000 [4:39:42<85:24:55,  2.93it/s] 

{'loss': Array(0.12818764, dtype=float32), 'loss_cross_entropy': Array(0.1208967, dtype=float32)}
{'loss_inverse': Array(5.5374592e-08, dtype=float32)}


 10%|▉         | 98520/1000000 [4:39:44<34:20:38,  7.29it/s]

{'loss': Array(0.14189708, dtype=float32), 'loss_cross_entropy': Array(0.13447054, dtype=float32)}
{'loss_inverse': Array(2.221701e-05, dtype=float32)}


 10%|▉         | 98530/1000000 [4:39:46<30:09:52,  8.30it/s]

{'loss': Array(0.1407266, dtype=float32), 'loss_cross_entropy': Array(0.13330194, dtype=float32)}
{'loss_inverse': Array(1.2852891e-07, dtype=float32)}


 10%|▉         | 98540/1000000 [4:39:47<25:19:10,  9.89it/s]

{'loss': Array(0.14597805, dtype=float32), 'loss_cross_entropy': Array(0.13801093, dtype=float32)}
{'loss_inverse': Array(4.0349985e-08, dtype=float32)}


 10%|▉         | 98550/1000000 [4:39:49<30:57:07,  8.09it/s]

{'loss': Array(0.15325384, dtype=float32), 'loss_cross_entropy': Array(0.14464894, dtype=float32)}
{'loss_inverse': Array(1.8717206e-08, dtype=float32)}


 10%|▉         | 98560/1000000 [4:39:50<25:35:32,  9.78it/s]

{'loss': Array(0.13971615, dtype=float32), 'loss_cross_entropy': Array(0.13265286, dtype=float32)}
{'loss_inverse': Array(1.6030594e-05, dtype=float32)}


 10%|▉         | 98570/1000000 [4:39:52<24:40:39, 10.15it/s]

{'loss': Array(0.1271799, dtype=float32), 'loss_cross_entropy': Array(0.12007818, dtype=float32)}
{'loss_inverse': Array(2.5883915e-07, dtype=float32)}


 10%|▉         | 98580/1000000 [4:39:53<27:47:35,  9.01it/s]

{'loss': Array(0.13164105, dtype=float32), 'loss_cross_entropy': Array(0.12415794, dtype=float32)}
{'loss_inverse': Array(2.7133959e-08, dtype=float32)}


 10%|▉         | 98590/1000000 [4:39:55<25:24:56,  9.85it/s]

{'loss': Array(0.13118444, dtype=float32), 'loss_cross_entropy': Array(0.1235323, dtype=float32)}
{'loss_inverse': Array(0.00012207, dtype=float32)}


 10%|▉         | 98600/1000000 [4:39:57<28:31:54,  8.78it/s]

{'loss': Array(0.13563916, dtype=float32), 'loss_cross_entropy': Array(0.12830365, dtype=float32)}
{'loss_inverse': Array(7.9730034e-07, dtype=float32)}


 10%|▉         | 98610/1000000 [4:39:58<25:06:17,  9.97it/s]

{'loss': Array(0.16722539, dtype=float32), 'loss_cross_entropy': Array(0.15805086, dtype=float32)}
{'loss_inverse': Array(0.00027176, dtype=float32)}


 10%|▉         | 98620/1000000 [4:40:00<31:10:34,  8.03it/s]

{'loss': Array(0.1343063, dtype=float32), 'loss_cross_entropy': Array(0.12688236, dtype=float32)}
{'loss_inverse': Array(3.3009425e-08, dtype=float32)}


 10%|▉         | 98630/1000000 [4:40:01<26:31:06,  9.44it/s]

{'loss': Array(0.15378495, dtype=float32), 'loss_cross_entropy': Array(0.14567631, dtype=float32)}
{'loss_inverse': Array(1.9753747e-07, dtype=float32)}


 10%|▉         | 98640/1000000 [4:40:03<24:21:11, 10.28it/s]

{'loss': Array(0.12569813, dtype=float32), 'loss_cross_entropy': Array(0.11842201, dtype=float32)}
{'loss_inverse': Array(5.1816837e-08, dtype=float32)}


 10%|▉         | 98650/1000000 [4:40:04<28:10:38,  8.89it/s]

{'loss': Array(0.12668146, dtype=float32), 'loss_cross_entropy': Array(0.11951739, dtype=float32)}
{'loss_inverse': Array(0.00018467, dtype=float32)}


 10%|▉         | 98660/1000000 [4:40:06<25:09:37,  9.95it/s]

{'loss': Array(0.12962663, dtype=float32), 'loss_cross_entropy': Array(0.12179229, dtype=float32)}
{'loss_inverse': Array(7.035388e-05, dtype=float32)}


 10%|▉         | 98670/1000000 [4:40:07<31:18:38,  8.00it/s]

{'loss': Array(0.1466885, dtype=float32), 'loss_cross_entropy': Array(0.13843423, dtype=float32)}
{'loss_inverse': Array(0.00081378, dtype=float32)}


 10%|▉         | 98680/1000000 [4:40:09<26:23:39,  9.49it/s]

{'loss': Array(0.14990221, dtype=float32), 'loss_cross_entropy': Array(0.14207545, dtype=float32)}
{'loss_inverse': Array(0.00013664, dtype=float32)}


 10%|▉         | 98690/1000000 [4:40:11<34:48:57,  7.19it/s]

{'loss': Array(0.13726453, dtype=float32), 'loss_cross_entropy': Array(0.12957972, dtype=float32)}
{'loss_inverse': Array(0.00010925, dtype=float32)}


 10%|▉         | 98700/1000000 [4:40:12<26:33:46,  9.43it/s]

{'loss': Array(0.1392527, dtype=float32), 'loss_cross_entropy': Array(0.13159712, dtype=float32)}
{'loss_inverse': Array(0.00015768, dtype=float32)}


 10%|▉         | 98710/1000000 [4:40:14<24:36:17, 10.18it/s]

{'loss': Array(0.14252064, dtype=float32), 'loss_cross_entropy': Array(0.13498385, dtype=float32)}
{'loss_inverse': Array(2.3228672e-07, dtype=float32)}


 10%|▉         | 98720/1000000 [4:40:15<27:01:46,  9.26it/s]

{'loss': Array(0.11809047, dtype=float32), 'loss_cross_entropy': Array(0.11123458, dtype=float32)}
{'loss_inverse': Array(3.584601e-07, dtype=float32)}


 10%|▉         | 98730/1000000 [4:40:17<25:16:39,  9.90it/s]

{'loss': Array(0.13412473, dtype=float32), 'loss_cross_entropy': Array(0.12659554, dtype=float32)}
{'loss_inverse': Array(7.893004e-05, dtype=float32)}


 10%|▉         | 98740/1000000 [4:40:18<32:57:39,  7.60it/s]

{'loss': Array(0.14305168, dtype=float32), 'loss_cross_entropy': Array(0.13530427, dtype=float32)}
{'loss_inverse': Array(9.8247314e-05, dtype=float32)}


 10%|▉         | 98750/1000000 [4:40:20<25:56:01,  9.65it/s]

{'loss': Array(0.11758628, dtype=float32), 'loss_cross_entropy': Array(0.11096419, dtype=float32)}
{'loss_inverse': Array(3.7519143e-07, dtype=float32)}


 10%|▉         | 98760/1000000 [4:40:21<24:26:03, 10.25it/s]

{'loss': Array(0.16140644, dtype=float32), 'loss_cross_entropy': Array(0.15276824, dtype=float32)}
{'loss_inverse': Array(0.00014857, dtype=float32)}


 10%|▉         | 98770/1000000 [4:40:23<27:32:16,  9.09it/s]

{'loss': Array(0.13969736, dtype=float32), 'loss_cross_entropy': Array(0.13187669, dtype=float32)}
{'loss_inverse': Array(1.1111289e-05, dtype=float32)}


 10%|▉         | 98780/1000000 [4:40:24<24:38:21, 10.16it/s]

{'loss': Array(0.13719922, dtype=float32), 'loss_cross_entropy': Array(0.12897108, dtype=float32)}
{'loss_inverse': Array(4.7380036e-05, dtype=float32)}


 10%|▉         | 98790/1000000 [4:40:26<28:20:48,  8.83it/s]

{'loss': Array(0.14815377, dtype=float32), 'loss_cross_entropy': Array(0.14019115, dtype=float32)}
{'loss_inverse': Array(1.2933939e-06, dtype=float32)}


 10%|▉         | 98800/1000000 [4:40:28<25:10:14,  9.95it/s]

{'loss': Array(0.13647261, dtype=float32), 'loss_cross_entropy': Array(0.12913679, dtype=float32)}
{'loss_inverse': Array(3.986539e-07, dtype=float32)}


 10%|▉         | 98810/1000000 [4:40:29<31:10:30,  8.03it/s]

{'loss': Array(0.13950303, dtype=float32), 'loss_cross_entropy': Array(0.13169259, dtype=float32)}
{'loss_inverse': Array(6.4523414e-07, dtype=float32)}


 10%|▉         | 98820/1000000 [4:40:31<25:43:50,  9.73it/s]

{'loss': Array(0.13227645, dtype=float32), 'loss_cross_entropy': Array(0.12485709, dtype=float32)}
{'loss_inverse': Array(2.229647e-06, dtype=float32)}


 10%|▉         | 98830/1000000 [4:40:32<24:49:34, 10.08it/s]

{'loss': Array(0.1442404, dtype=float32), 'loss_cross_entropy': Array(0.13637348, dtype=float32)}
{'loss_inverse': Array(1.4965836e-05, dtype=float32)}


 10%|▉         | 98840/1000000 [4:40:34<28:38:09,  8.74it/s]

{'loss': Array(0.11855477, dtype=float32), 'loss_cross_entropy': Array(0.11184114, dtype=float32)}
{'loss_inverse': Array(9.4525355e-08, dtype=float32)}


 10%|▉         | 98850/1000000 [4:40:35<25:12:16,  9.93it/s]

{'loss': Array(0.14148165, dtype=float32), 'loss_cross_entropy': Array(0.13361514, dtype=float32)}
{'loss_inverse': Array(5.140145e-07, dtype=float32)}


 10%|▉         | 98860/1000000 [4:40:37<32:53:22,  7.61it/s]

{'loss': Array(0.15176229, dtype=float32), 'loss_cross_entropy': Array(0.14377265, dtype=float32)}
{'loss_inverse': Array(1.3799299e-06, dtype=float32)}


 10%|▉         | 98870/1000000 [4:40:38<25:42:36,  9.74it/s]

{'loss': Array(0.13729452, dtype=float32), 'loss_cross_entropy': Array(0.12956087, dtype=float32)}
{'loss_inverse': Array(2.417838e-05, dtype=float32)}


 10%|▉         | 98880/1000000 [4:40:40<37:21:16,  6.70it/s]

{'loss': Array(0.15614364, dtype=float32), 'loss_cross_entropy': Array(0.14759575, dtype=float32)}
{'loss_inverse': Array(0.00027695, dtype=float32)}


 10%|▉         | 98890/1000000 [4:40:42<27:48:28,  9.00it/s]

{'loss': Array(0.130652, dtype=float32), 'loss_cross_entropy': Array(0.1234557, dtype=float32)}
{'loss_inverse': Array(0.00032488, dtype=float32)}


 10%|▉         | 98900/1000000 [4:40:43<38:07:46,  6.56it/s]

{'loss': Array(0.14783902, dtype=float32), 'loss_cross_entropy': Array(0.14017469, dtype=float32)}
{'loss_inverse': Array(4.0830623e-06, dtype=float32)}


 10%|▉         | 98910/1000000 [4:40:45<26:44:52,  9.36it/s]

{'loss': Array(0.13935293, dtype=float32), 'loss_cross_entropy': Array(0.13192205, dtype=float32)}
{'loss_inverse': Array(1.5299219e-06, dtype=float32)}


 10%|▉         | 98920/1000000 [4:40:46<24:50:22, 10.08it/s]

{'loss': Array(0.14215206, dtype=float32), 'loss_cross_entropy': Array(0.13391401, dtype=float32)}
{'loss_inverse': Array(2.5450092e-05, dtype=float32)}


 10%|▉         | 98930/1000000 [4:40:48<31:07:53,  8.04it/s]

{'loss': Array(0.11487316, dtype=float32), 'loss_cross_entropy': Array(0.10868126, dtype=float32)}
{'loss_inverse': Array(5.2851276e-05, dtype=float32)}


 10%|▉         | 98940/1000000 [4:40:49<25:16:06,  9.91it/s]

{'loss': Array(0.14462242, dtype=float32), 'loss_cross_entropy': Array(0.13635994, dtype=float32)}
{'loss_inverse': Array(8.219358e-06, dtype=float32)}


 10%|▉         | 98950/1000000 [4:40:51<37:13:35,  6.72it/s]

{'loss': Array(0.1414253, dtype=float32), 'loss_cross_entropy': Array(0.13359581, dtype=float32)}
{'loss_inverse': Array(5.4913933e-05, dtype=float32)}


 10%|▉         | 98960/1000000 [4:40:53<26:51:12,  9.32it/s]

{'loss': Array(0.12985004, dtype=float32), 'loss_cross_entropy': Array(0.12243686, dtype=float32)}
{'loss_inverse': Array(3.1745706e-06, dtype=float32)}


 10%|▉         | 98970/1000000 [4:40:54<38:01:53,  6.58it/s]

{'loss': Array(0.1471782, dtype=float32), 'loss_cross_entropy': Array(0.13921122, dtype=float32)}
{'loss_inverse': Array(1.4109066e-05, dtype=float32)}


 10%|▉         | 98980/1000000 [4:40:56<26:29:32,  9.45it/s]

{'loss': Array(0.13824931, dtype=float32), 'loss_cross_entropy': Array(0.13090423, dtype=float32)}
{'loss_inverse': Array(3.713585e-07, dtype=float32)}


 10%|▉         | 98990/1000000 [4:40:57<24:50:40, 10.07it/s]

{'loss': Array(0.15169029, dtype=float32), 'loss_cross_entropy': Array(0.14353818, dtype=float32)}
{'loss_inverse': Array(0.00014985, dtype=float32)}


 10%|▉         | 99000/1000000 [4:40:59<33:28:53,  7.48it/s]

{'loss': Array(0.1508163, dtype=float32), 'loss_cross_entropy': Array(0.14333327, dtype=float32)}
{'loss_inverse': Array(1.3095058e-07, dtype=float32)}


 10%|▉         | 99010/1000000 [4:41:07<86:00:08,  2.91it/s] 

{'loss': Array(0.1500721, dtype=float32), 'loss_cross_entropy': Array(0.14165615, dtype=float32)}
{'loss_inverse': Array(2.3671051e-05, dtype=float32)}


 10%|▉         | 99020/1000000 [4:41:09<42:11:46,  5.93it/s]

{'loss': Array(0.13372369, dtype=float32), 'loss_cross_entropy': Array(0.12598407, dtype=float32)}
{'loss_inverse': Array(1.0039485e-06, dtype=float32)}


 10%|▉         | 99030/1000000 [4:41:10<28:26:44,  8.80it/s]

{'loss': Array(0.14193186, dtype=float32), 'loss_cross_entropy': Array(0.13384603, dtype=float32)}
{'loss_inverse': Array(1.3311308e-06, dtype=float32)}


 10%|▉         | 99040/1000000 [4:41:12<34:12:55,  7.31it/s]

{'loss': Array(0.12327846, dtype=float32), 'loss_cross_entropy': Array(0.11622434, dtype=float32)}
{'loss_inverse': Array(9.072363e-06, dtype=float32)}


 10%|▉         | 99050/1000000 [4:41:14<26:12:47,  9.55it/s]

{'loss': Array(0.13884388, dtype=float32), 'loss_cross_entropy': Array(0.13149369, dtype=float32)}
{'loss_inverse': Array(6.0472044e-05, dtype=float32)}


 10%|▉         | 99060/1000000 [4:41:15<37:33:47,  6.66it/s]

{'loss': Array(0.14491938, dtype=float32), 'loss_cross_entropy': Array(0.13672906, dtype=float32)}
{'loss_inverse': Array(5.529625e-07, dtype=float32)}


 10%|▉         | 99070/1000000 [4:41:17<26:49:57,  9.33it/s]

{'loss': Array(0.13405994, dtype=float32), 'loss_cross_entropy': Array(0.12641919, dtype=float32)}
{'loss_inverse': Array(6.58437e-06, dtype=float32)}


 10%|▉         | 99080/1000000 [4:41:18<25:14:46,  9.91it/s]

{'loss': Array(0.14757797, dtype=float32), 'loss_cross_entropy': Array(0.13988964, dtype=float32)}
{'loss_inverse': Array(3.6775804e-05, dtype=float32)}


 10%|▉         | 99090/1000000 [4:41:20<30:42:10,  8.15it/s]

{'loss': Array(0.16470353, dtype=float32), 'loss_cross_entropy': Array(0.15554841, dtype=float32)}
{'loss_inverse': Array(1.3949172e-05, dtype=float32)}


 10%|▉         | 99100/1000000 [4:41:21<25:11:12,  9.94it/s]

{'loss': Array(0.14003171, dtype=float32), 'loss_cross_entropy': Array(0.13217779, dtype=float32)}
{'loss_inverse': Array(7.782106e-07, dtype=float32)}


 10%|▉         | 99110/1000000 [4:41:23<25:09:14,  9.95it/s]

{'loss': Array(0.13289616, dtype=float32), 'loss_cross_entropy': Array(0.12537485, dtype=float32)}
{'loss_inverse': Array(2.200841e-07, dtype=float32)}


 10%|▉         | 99120/1000000 [4:41:25<27:44:51,  9.02it/s]

{'loss': Array(0.12916665, dtype=float32), 'loss_cross_entropy': Array(0.122343, dtype=float32)}
{'loss_inverse': Array(6.615144e-06, dtype=float32)}


 10%|▉         | 99130/1000000 [4:41:26<25:39:59,  9.75it/s]

{'loss': Array(0.11726486, dtype=float32), 'loss_cross_entropy': Array(0.11043809, dtype=float32)}
{'loss_inverse': Array(8.2393555e-05, dtype=float32)}


 10%|▉         | 99140/1000000 [4:41:28<30:41:58,  8.15it/s]

{'loss': Array(0.12469403, dtype=float32), 'loss_cross_entropy': Array(0.11763299, dtype=float32)}
{'loss_inverse': Array(1.2437885e-06, dtype=float32)}


 10%|▉         | 99150/1000000 [4:41:29<25:37:06,  9.77it/s]

{'loss': Array(0.12914553, dtype=float32), 'loss_cross_entropy': Array(0.12192136, dtype=float32)}
{'loss_inverse': Array(9.751369e-08, dtype=float32)}


 10%|▉         | 99160/1000000 [4:41:31<30:48:00,  8.12it/s]

{'loss': Array(0.14414142, dtype=float32), 'loss_cross_entropy': Array(0.13596016, dtype=float32)}
{'loss_inverse': Array(2.2066409e-07, dtype=float32)}


 10%|▉         | 99170/1000000 [4:41:32<25:32:27,  9.80it/s]

{'loss': Array(0.15422444, dtype=float32), 'loss_cross_entropy': Array(0.1459439, dtype=float32)}
{'loss_inverse': Array(7.417511e-05, dtype=float32)}


 10%|▉         | 99180/1000000 [4:41:34<33:37:54,  7.44it/s]

{'loss': Array(0.14920668, dtype=float32), 'loss_cross_entropy': Array(0.14059053, dtype=float32)}
{'loss_inverse': Array(2.5354198e-06, dtype=float32)}


 10%|▉         | 99190/1000000 [4:41:36<26:20:56,  9.50it/s]

{'loss': Array(0.11967406, dtype=float32), 'loss_cross_entropy': Array(0.11281019, dtype=float32)}
{'loss_inverse': Array(0.00013867, dtype=float32)}


 10%|▉         | 99200/1000000 [4:41:37<24:48:36, 10.09it/s]

{'loss': Array(0.13126269, dtype=float32), 'loss_cross_entropy': Array(0.124016, dtype=float32)}
{'loss_inverse': Array(2.2449272e-07, dtype=float32)}


 10%|▉         | 99210/1000000 [4:41:39<28:38:38,  8.74it/s]

{'loss': Array(0.13424996, dtype=float32), 'loss_cross_entropy': Array(0.12646231, dtype=float32)}
{'loss_inverse': Array(0.00017219, dtype=float32)}


 10%|▉         | 99220/1000000 [4:41:40<24:56:32, 10.03it/s]

{'loss': Array(0.13761541, dtype=float32), 'loss_cross_entropy': Array(0.1300099, dtype=float32)}
{'loss_inverse': Array(8.588661e-05, dtype=float32)}


 10%|▉         | 99230/1000000 [4:41:42<33:28:57,  7.47it/s]

{'loss': Array(0.13847181, dtype=float32), 'loss_cross_entropy': Array(0.13075803, dtype=float32)}
{'loss_inverse': Array(0.0001076, dtype=float32)}


 10%|▉         | 99240/1000000 [4:41:43<25:47:20,  9.70it/s]

{'loss': Array(0.15447347, dtype=float32), 'loss_cross_entropy': Array(0.1458354, dtype=float32)}
{'loss_inverse': Array(1.802286e-06, dtype=float32)}


 10%|▉         | 99250/1000000 [4:41:45<33:30:28,  7.47it/s]

{'loss': Array(0.1406418, dtype=float32), 'loss_cross_entropy': Array(0.132374, dtype=float32)}
{'loss_inverse': Array(1.5685537e-06, dtype=float32)}


 10%|▉         | 99260/1000000 [4:41:46<25:39:21,  9.75it/s]

{'loss': Array(0.16039743, dtype=float32), 'loss_cross_entropy': Array(0.15222912, dtype=float32)}
{'loss_inverse': Array(4.705376e-07, dtype=float32)}


 10%|▉         | 99270/1000000 [4:41:48<25:34:16,  9.78it/s]

{'loss': Array(0.13062803, dtype=float32), 'loss_cross_entropy': Array(0.12359335, dtype=float32)}
{'loss_inverse': Array(8.328998e-05, dtype=float32)}


 10%|▉         | 99280/1000000 [4:41:50<27:57:25,  8.95it/s]

{'loss': Array(0.14350282, dtype=float32), 'loss_cross_entropy': Array(0.13538203, dtype=float32)}
{'loss_inverse': Array(1.4874792e-05, dtype=float32)}


 10%|▉         | 99290/1000000 [4:41:51<24:45:16, 10.11it/s]

{'loss': Array(0.13572223, dtype=float32), 'loss_cross_entropy': Array(0.12767725, dtype=float32)}
{'loss_inverse': Array(4.3268687e-06, dtype=float32)}


 10%|▉         | 99300/1000000 [4:41:53<33:17:38,  7.51it/s]

{'loss': Array(0.13035958, dtype=float32), 'loss_cross_entropy': Array(0.12374827, dtype=float32)}
{'loss_inverse': Array(7.8517936e-05, dtype=float32)}


 10%|▉         | 99310/1000000 [4:41:54<26:25:41,  9.47it/s]

{'loss': Array(0.12790605, dtype=float32), 'loss_cross_entropy': Array(0.12084216, dtype=float32)}
{'loss_inverse': Array(1.5161206e-07, dtype=float32)}


 10%|▉         | 99320/1000000 [4:41:56<37:06:52,  6.74it/s]

{'loss': Array(0.13567092, dtype=float32), 'loss_cross_entropy': Array(0.12816568, dtype=float32)}
{'loss_inverse': Array(6.696408e-05, dtype=float32)}


 10%|▉         | 99330/1000000 [4:41:57<27:09:05,  9.21it/s]

{'loss': Array(0.15269189, dtype=float32), 'loss_cross_entropy': Array(0.14461653, dtype=float32)}
{'loss_inverse': Array(1.2576133e-06, dtype=float32)}


 10%|▉         | 99340/1000000 [4:41:59<25:21:05,  9.87it/s]

{'loss': Array(0.13969342, dtype=float32), 'loss_cross_entropy': Array(0.13209379, dtype=float32)}
{'loss_inverse': Array(0.00016257, dtype=float32)}


 10%|▉         | 99350/1000000 [4:42:01<28:12:59,  8.87it/s]

{'loss': Array(0.14224437, dtype=float32), 'loss_cross_entropy': Array(0.13414429, dtype=float32)}
{'loss_inverse': Array(2.336875e-05, dtype=float32)}


 10%|▉         | 99360/1000000 [4:42:02<37:03:24,  6.75it/s]

{'loss': Array(0.1253293, dtype=float32), 'loss_cross_entropy': Array(0.11868787, dtype=float32)}
{'loss_inverse': Array(0.00013428, dtype=float32)}


 10%|▉         | 99370/1000000 [4:42:04<26:29:02,  9.45it/s]

{'loss': Array(0.1468722, dtype=float32), 'loss_cross_entropy': Array(0.13941431, dtype=float32)}
{'loss_inverse': Array(2.235011e-05, dtype=float32)}


 10%|▉         | 99380/1000000 [4:42:05<24:44:24, 10.11it/s]

{'loss': Array(0.12639964, dtype=float32), 'loss_cross_entropy': Array(0.11924016, dtype=float32)}
{'loss_inverse': Array(0.00014322, dtype=float32)}


 10%|▉         | 99390/1000000 [4:42:07<28:56:28,  8.64it/s]

{'loss': Array(0.12833786, dtype=float32), 'loss_cross_entropy': Array(0.121273, dtype=float32)}
{'loss_inverse': Array(2.1170813e-06, dtype=float32)}


 10%|▉         | 99400/1000000 [4:42:09<26:09:35,  9.56it/s]

{'loss': Array(0.14271356, dtype=float32), 'loss_cross_entropy': Array(0.13459735, dtype=float32)}
{'loss_inverse': Array(9.207319e-05, dtype=float32)}


 10%|▉         | 99410/1000000 [4:42:10<30:06:41,  8.31it/s]

{'loss': Array(0.13580759, dtype=float32), 'loss_cross_entropy': Array(0.12865911, dtype=float32)}
{'loss_inverse': Array(3.280483e-06, dtype=float32)}


 10%|▉         | 99420/1000000 [4:42:12<25:48:15,  9.69it/s]

{'loss': Array(0.12932126, dtype=float32), 'loss_cross_entropy': Array(0.12199744, dtype=float32)}
{'loss_inverse': Array(0.00080207, dtype=float32)}


 10%|▉         | 99430/1000000 [4:42:14<31:13:21,  8.01it/s]

{'loss': Array(0.13234042, dtype=float32), 'loss_cross_entropy': Array(0.12496503, dtype=float32)}
{'loss_inverse': Array(6.142696e-05, dtype=float32)}


 10%|▉         | 99440/1000000 [4:42:15<25:34:58,  9.78it/s]

{'loss': Array(0.13707761, dtype=float32), 'loss_cross_entropy': Array(0.12978461, dtype=float32)}
{'loss_inverse': Array(1.4902683e-07, dtype=float32)}


 10%|▉         | 99450/1000000 [4:42:16<24:52:25, 10.06it/s]

{'loss': Array(0.1323014, dtype=float32), 'loss_cross_entropy': Array(0.12446751, dtype=float32)}
{'loss_inverse': Array(8.780818e-08, dtype=float32)}


 10%|▉         | 99460/1000000 [4:42:18<28:55:14,  8.65it/s]

{'loss': Array(0.13196127, dtype=float32), 'loss_cross_entropy': Array(0.12457304, dtype=float32)}
{'loss_inverse': Array(5.1263317e-08, dtype=float32)}


 10%|▉         | 99470/1000000 [4:42:20<24:55:01, 10.04it/s]

{'loss': Array(0.13959622, dtype=float32), 'loss_cross_entropy': Array(0.13167295, dtype=float32)}
{'loss_inverse': Array(1.3740009e-06, dtype=float32)}


 10%|▉         | 99480/1000000 [4:42:21<30:39:26,  8.16it/s]

{'loss': Array(0.12630737, dtype=float32), 'loss_cross_entropy': Array(0.11893799, dtype=float32)}
{'loss_inverse': Array(2.8319523e-06, dtype=float32)}


 10%|▉         | 99490/1000000 [4:42:23<26:00:20,  9.62it/s]

{'loss': Array(0.14276488, dtype=float32), 'loss_cross_entropy': Array(0.13475224, dtype=float32)}
{'loss_inverse': Array(0.00045735, dtype=float32)}


 10%|▉         | 99500/1000000 [4:42:24<33:34:59,  7.45it/s]

{'loss': Array(0.14754592, dtype=float32), 'loss_cross_entropy': Array(0.13993509, dtype=float32)}
{'loss_inverse': Array(1.4945367e-07, dtype=float32)}


 10%|▉         | 99510/1000000 [4:42:32<83:39:40,  2.99it/s] 

{'loss': Array(0.12995994, dtype=float32), 'loss_cross_entropy': Array(0.12276075, dtype=float32)}
{'loss_inverse': Array(6.217689e-08, dtype=float32)}


 10%|▉         | 99520/1000000 [4:42:34<43:32:10,  5.75it/s]

{'loss': Array(0.12968016, dtype=float32), 'loss_cross_entropy': Array(0.12273892, dtype=float32)}
{'loss_inverse': Array(6.9957964e-06, dtype=float32)}


 10%|▉         | 99530/1000000 [4:42:35<27:28:51,  9.10it/s]

{'loss': Array(0.14057462, dtype=float32), 'loss_cross_entropy': Array(0.13278516, dtype=float32)}
{'loss_inverse': Array(4.6328296e-06, dtype=float32)}


 10%|▉         | 99540/1000000 [4:42:37<25:25:46,  9.84it/s]

{'loss': Array(0.13226031, dtype=float32), 'loss_cross_entropy': Array(0.12533729, dtype=float32)}
{'loss_inverse': Array(0.00010942, dtype=float32)}


 10%|▉         | 99550/1000000 [4:42:39<28:31:56,  8.77it/s]

{'loss': Array(0.13717946, dtype=float32), 'loss_cross_entropy': Array(0.12952858, dtype=float32)}
{'loss_inverse': Array(3.2668632e-07, dtype=float32)}


 10%|▉         | 99560/1000000 [4:42:40<24:43:00, 10.12it/s]

{'loss': Array(0.1499234, dtype=float32), 'loss_cross_entropy': Array(0.14194137, dtype=float32)}
{'loss_inverse': Array(0.00022072, dtype=float32)}


 10%|▉         | 99570/1000000 [4:42:42<33:09:16,  7.54it/s]

{'loss': Array(0.13234472, dtype=float32), 'loss_cross_entropy': Array(0.1246817, dtype=float32)}
{'loss_inverse': Array(0.00015183, dtype=float32)}


 10%|▉         | 99580/1000000 [4:42:43<26:07:47,  9.57it/s]

{'loss': Array(0.12678733, dtype=float32), 'loss_cross_entropy': Array(0.11987557, dtype=float32)}
{'loss_inverse': Array(1.0714039e-05, dtype=float32)}


 10%|▉         | 99590/1000000 [4:42:45<33:38:08,  7.44it/s]

{'loss': Array(0.14775534, dtype=float32), 'loss_cross_entropy': Array(0.14000471, dtype=float32)}
{'loss_inverse': Array(8.9775006e-07, dtype=float32)}


 10%|▉         | 99600/1000000 [4:42:46<26:06:05,  9.58it/s]

{'loss': Array(0.14024304, dtype=float32), 'loss_cross_entropy': Array(0.13278233, dtype=float32)}
{'loss_inverse': Array(9.4613686e-05, dtype=float32)}


 10%|▉         | 99610/1000000 [4:42:48<25:07:21,  9.96it/s]

{'loss': Array(0.1350997, dtype=float32), 'loss_cross_entropy': Array(0.12749098, dtype=float32)}
{'loss_inverse': Array(1.6358177e-06, dtype=float32)}


 10%|▉         | 99620/1000000 [4:42:50<27:45:00,  9.01it/s]

{'loss': Array(0.12677065, dtype=float32), 'loss_cross_entropy': Array(0.11989112, dtype=float32)}
{'loss_inverse': Array(0.00010295, dtype=float32)}


 10%|▉         | 99630/1000000 [4:42:51<25:18:08,  9.88it/s]

{'loss': Array(0.14383398, dtype=float32), 'loss_cross_entropy': Array(0.13630325, dtype=float32)}
{'loss_inverse': Array(3.5208728e-07, dtype=float32)}


 10%|▉         | 99640/1000000 [4:42:53<33:21:18,  7.50it/s]

{'loss': Array(0.13887669, dtype=float32), 'loss_cross_entropy': Array(0.13088994, dtype=float32)}
{'loss_inverse': Array(2.1560106e-07, dtype=float32)}


 10%|▉         | 99650/1000000 [4:42:54<25:56:53,  9.64it/s]

{'loss': Array(0.12362894, dtype=float32), 'loss_cross_entropy': Array(0.11646493, dtype=float32)}
{'loss_inverse': Array(5.6878243e-08, dtype=float32)}


 10%|▉         | 99660/1000000 [4:42:56<36:53:14,  6.78it/s]

{'loss': Array(0.15202953, dtype=float32), 'loss_cross_entropy': Array(0.14397424, dtype=float32)}
{'loss_inverse': Array(1.4040405e-05, dtype=float32)}


 10%|▉         | 99670/1000000 [4:42:57<26:45:36,  9.35it/s]

{'loss': Array(0.14452809, dtype=float32), 'loss_cross_entropy': Array(0.13705651, dtype=float32)}
{'loss_inverse': Array(0.00014556, dtype=float32)}


 10%|▉         | 99680/1000000 [4:42:59<24:50:03, 10.07it/s]

{'loss': Array(0.13945268, dtype=float32), 'loss_cross_entropy': Array(0.1320005, dtype=float32)}
{'loss_inverse': Array(3.810637e-05, dtype=float32)}


 10%|▉         | 99690/1000000 [4:43:01<28:06:58,  8.89it/s]

{'loss': Array(0.14211817, dtype=float32), 'loss_cross_entropy': Array(0.1346066, dtype=float32)}
{'loss_inverse': Array(7.15482e-05, dtype=float32)}


 10%|▉         | 99700/1000000 [4:43:02<37:58:53,  6.58it/s]

{'loss': Array(0.1601506, dtype=float32), 'loss_cross_entropy': Array(0.15170681, dtype=float32)}
{'loss_inverse': Array(5.0905496e-06, dtype=float32)}


 10%|▉         | 99710/1000000 [4:43:04<26:34:03,  9.41it/s]

{'loss': Array(0.14192526, dtype=float32), 'loss_cross_entropy': Array(0.1343972, dtype=float32)}
{'loss_inverse': Array(0.00014948, dtype=float32)}


 10%|▉         | 99720/1000000 [4:43:05<24:25:21, 10.24it/s]

{'loss': Array(0.15361464, dtype=float32), 'loss_cross_entropy': Array(0.14590277, dtype=float32)}
{'loss_inverse': Array(1.26927835e-05, dtype=float32)}


 10%|▉         | 99730/1000000 [4:43:07<28:15:37,  8.85it/s]

{'loss': Array(0.15368554, dtype=float32), 'loss_cross_entropy': Array(0.14521441, dtype=float32)}
{'loss_inverse': Array(8.608048e-05, dtype=float32)}


 10%|▉         | 99740/1000000 [4:43:08<26:17:35,  9.51it/s]

{'loss': Array(0.13783853, dtype=float32), 'loss_cross_entropy': Array(0.13006534, dtype=float32)}
{'loss_inverse': Array(5.7303414e-06, dtype=float32)}


 10%|▉         | 99750/1000000 [4:43:10<30:07:28,  8.30it/s]

{'loss': Array(0.14184855, dtype=float32), 'loss_cross_entropy': Array(0.13408528, dtype=float32)}
{'loss_inverse': Array(1.5430056e-06, dtype=float32)}


 10%|▉         | 99760/1000000 [4:43:12<25:56:35,  9.64it/s]

{'loss': Array(0.13710271, dtype=float32), 'loss_cross_entropy': Array(0.12980224, dtype=float32)}
{'loss_inverse': Array(4.7801932e-05, dtype=float32)}


 10%|▉         | 99770/1000000 [4:43:13<31:13:58,  8.01it/s]

{'loss': Array(0.13406849, dtype=float32), 'loss_cross_entropy': Array(0.12694861, dtype=float32)}
{'loss_inverse': Array(4.0935098e-05, dtype=float32)}


 10%|▉         | 99780/1000000 [4:43:15<26:08:03,  9.57it/s]

{'loss': Array(0.13242285, dtype=float32), 'loss_cross_entropy': Array(0.12585282, dtype=float32)}
{'loss_inverse': Array(1.2171068e-05, dtype=float32)}


 10%|▉         | 99790/1000000 [4:43:16<25:09:37,  9.94it/s]

{'loss': Array(0.14184265, dtype=float32), 'loss_cross_entropy': Array(0.1335837, dtype=float32)}
{'loss_inverse': Array(2.2039181e-07, dtype=float32)}


 10%|▉         | 99800/1000000 [4:43:18<29:00:02,  8.62it/s]

{'loss': Array(0.12636308, dtype=float32), 'loss_cross_entropy': Array(0.11944908, dtype=float32)}
{'loss_inverse': Array(4.579351e-06, dtype=float32)}


 10%|▉         | 99810/1000000 [4:43:20<25:53:52,  9.66it/s]

{'loss': Array(0.14397773, dtype=float32), 'loss_cross_entropy': Array(0.13578723, dtype=float32)}
{'loss_inverse': Array(5.367369e-07, dtype=float32)}


 10%|▉         | 99820/1000000 [4:43:21<30:28:35,  8.20it/s]

{'loss': Array(0.153432, dtype=float32), 'loss_cross_entropy': Array(0.14477903, dtype=float32)}
{'loss_inverse': Array(8.279431e-08, dtype=float32)}


 10%|▉         | 99830/1000000 [4:43:23<25:50:40,  9.68it/s]

{'loss': Array(0.13162552, dtype=float32), 'loss_cross_entropy': Array(0.1237989, dtype=float32)}
{'loss_inverse': Array(9.860869e-05, dtype=float32)}


 10%|▉         | 99840/1000000 [4:43:25<34:41:49,  7.21it/s]

{'loss': Array(0.14664501, dtype=float32), 'loss_cross_entropy': Array(0.13829671, dtype=float32)}
{'loss_inverse': Array(3.1836174e-05, dtype=float32)}


 10%|▉         | 99850/1000000 [4:43:26<26:22:10,  9.48it/s]

{'loss': Array(0.15642674, dtype=float32), 'loss_cross_entropy': Array(0.14775515, dtype=float32)}
{'loss_inverse': Array(3.5255264e-06, dtype=float32)}


 10%|▉         | 99860/1000000 [4:43:27<24:57:28, 10.02it/s]

{'loss': Array(0.13667795, dtype=float32), 'loss_cross_entropy': Array(0.12881689, dtype=float32)}
{'loss_inverse': Array(5.6699304e-05, dtype=float32)}


 10%|▉         | 99870/1000000 [4:43:29<27:40:32,  9.03it/s]

{'loss': Array(0.13106005, dtype=float32), 'loss_cross_entropy': Array(0.12401531, dtype=float32)}
{'loss_inverse': Array(1.15151614e-07, dtype=float32)}


 10%|▉         | 99880/1000000 [4:43:31<25:20:48,  9.86it/s]

{'loss': Array(0.13747878, dtype=float32), 'loss_cross_entropy': Array(0.1293533, dtype=float32)}
{'loss_inverse': Array(7.852082e-06, dtype=float32)}


 10%|▉         | 99890/1000000 [4:43:32<33:33:39,  7.45it/s]

{'loss': Array(0.13780311, dtype=float32), 'loss_cross_entropy': Array(0.12969919, dtype=float32)}
{'loss_inverse': Array(1.4388723e-06, dtype=float32)}


 10%|▉         | 99900/1000000 [4:43:34<26:07:07,  9.57it/s]

{'loss': Array(0.16116527, dtype=float32), 'loss_cross_entropy': Array(0.15229672, dtype=float32)}
{'loss_inverse': Array(2.807951e-05, dtype=float32)}


 10%|▉         | 99910/1000000 [4:43:35<24:59:01, 10.01it/s]

{'loss': Array(0.13440453, dtype=float32), 'loss_cross_entropy': Array(0.12719366, dtype=float32)}
{'loss_inverse': Array(1.8154457e-06, dtype=float32)}


 10%|▉         | 99920/1000000 [4:43:37<27:55:25,  8.95it/s]

{'loss': Array(0.12687345, dtype=float32), 'loss_cross_entropy': Array(0.11957257, dtype=float32)}
{'loss_inverse': Array(4.680686e-06, dtype=float32)}


 10%|▉         | 99930/1000000 [4:43:38<25:00:58,  9.99it/s]

{'loss': Array(0.13513164, dtype=float32), 'loss_cross_entropy': Array(0.12830697, dtype=float32)}
{'loss_inverse': Array(0.00016528, dtype=float32)}


 10%|▉         | 99940/1000000 [4:43:40<28:40:19,  8.72it/s]

{'loss': Array(0.13747464, dtype=float32), 'loss_cross_entropy': Array(0.12978275, dtype=float32)}
{'loss_inverse': Array(7.685897e-05, dtype=float32)}


 10%|▉         | 99950/1000000 [4:43:42<24:53:44, 10.04it/s]

{'loss': Array(0.13261913, dtype=float32), 'loss_cross_entropy': Array(0.12524985, dtype=float32)}
{'loss_inverse': Array(2.002462e-05, dtype=float32)}


 10%|▉         | 99960/1000000 [4:43:43<30:45:13,  8.13it/s]

{'loss': Array(0.13248134, dtype=float32), 'loss_cross_entropy': Array(0.12435206, dtype=float32)}
{'loss_inverse': Array(1.3385282e-07, dtype=float32)}


 10%|▉         | 99970/1000000 [4:43:45<25:22:20,  9.85it/s]

{'loss': Array(0.16407952, dtype=float32), 'loss_cross_entropy': Array(0.15575743, dtype=float32)}
{'loss_inverse': Array(1.0613445e-07, dtype=float32)}


 10%|▉         | 99980/1000000 [4:43:46<24:23:46, 10.25it/s]

{'loss': Array(0.1389977, dtype=float32), 'loss_cross_entropy': Array(0.13159941, dtype=float32)}
{'loss_inverse': Array(0.00033586, dtype=float32)}


 10%|▉         | 99990/1000000 [4:43:48<28:45:17,  8.69it/s]

{'loss': Array(0.12790817, dtype=float32), 'loss_cross_entropy': Array(0.12103758, dtype=float32)}
{'loss_inverse': Array(3.814742e-06, dtype=float32)}


 10%|█         | 100000/1000000 [4:43:49<25:05:37,  9.96it/s]

{'loss': Array(0.13627067, dtype=float32), 'loss_cross_entropy': Array(0.12869544, dtype=float32)}
{'loss_inverse': Array(4.4095073e-06, dtype=float32)}


 10%|█         | 100010/1000000 [4:43:58<94:04:01,  2.66it/s] 

{'loss': Array(0.13861237, dtype=float32), 'loss_cross_entropy': Array(0.13108025, dtype=float32)}
{'loss_inverse': Array(4.333859e-05, dtype=float32)}


 10%|█         | 100020/1000000 [4:43:59<35:49:26,  6.98it/s]

{'loss': Array(0.13496207, dtype=float32), 'loss_cross_entropy': Array(0.12792636, dtype=float32)}
{'loss_inverse': Array(1.126278e-07, dtype=float32)}


 10%|█         | 100030/1000000 [4:44:01<38:34:16,  6.48it/s]

{'loss': Array(0.15620187, dtype=float32), 'loss_cross_entropy': Array(0.14834522, dtype=float32)}
{'loss_inverse': Array(6.894033e-05, dtype=float32)}


 10%|█         | 100040/1000000 [4:44:02<27:11:17,  9.19it/s]

{'loss': Array(0.1526836, dtype=float32), 'loss_cross_entropy': Array(0.14433746, dtype=float32)}
{'loss_inverse': Array(1.0723573e-07, dtype=float32)}


 10%|█         | 100050/1000000 [4:44:04<37:09:50,  6.73it/s]

{'loss': Array(0.13640438, dtype=float32), 'loss_cross_entropy': Array(0.12892398, dtype=float32)}
{'loss_inverse': Array(3.2922458e-06, dtype=float32)}


 10%|█         | 100060/1000000 [4:44:05<26:15:04,  9.52it/s]

{'loss': Array(0.15077604, dtype=float32), 'loss_cross_entropy': Array(0.14273824, dtype=float32)}
{'loss_inverse': Array(1.512594e-07, dtype=float32)}


 10%|█         | 100070/1000000 [4:44:07<26:04:40,  9.59it/s]

{'loss': Array(0.1456935, dtype=float32), 'loss_cross_entropy': Array(0.13805959, dtype=float32)}
{'loss_inverse': Array(4.703203e-05, dtype=float32)}


 10%|█         | 100080/1000000 [4:44:09<29:51:24,  8.37it/s]

{'loss': Array(0.14965878, dtype=float32), 'loss_cross_entropy': Array(0.14127241, dtype=float32)}
{'loss_inverse': Array(1.1770381e-06, dtype=float32)}


 10%|█         | 100090/1000000 [4:44:10<26:33:01,  9.42it/s]

{'loss': Array(0.15361059, dtype=float32), 'loss_cross_entropy': Array(0.14515512, dtype=float32)}
{'loss_inverse': Array(5.477241e-07, dtype=float32)}


 10%|█         | 100100/1000000 [4:44:12<37:41:59,  6.63it/s]

{'loss': Array(0.15539378, dtype=float32), 'loss_cross_entropy': Array(0.14783077, dtype=float32)}
{'loss_inverse': Array(0.00032942, dtype=float32)}


 10%|█         | 100110/1000000 [4:44:13<26:42:14,  9.36it/s]

{'loss': Array(0.13283494, dtype=float32), 'loss_cross_entropy': Array(0.12576307, dtype=float32)}
{'loss_inverse': Array(0.00043504, dtype=float32)}


 10%|█         | 100120/1000000 [4:44:15<24:54:18, 10.04it/s]

{'loss': Array(0.15623708, dtype=float32), 'loss_cross_entropy': Array(0.14813678, dtype=float32)}
{'loss_inverse': Array(8.376782e-06, dtype=float32)}


 10%|█         | 100130/1000000 [4:44:17<28:39:28,  8.72it/s]

{'loss': Array(0.11659843, dtype=float32), 'loss_cross_entropy': Array(0.10986102, dtype=float32)}
{'loss_inverse': Array(1.7045071e-05, dtype=float32)}


 10%|█         | 100140/1000000 [4:44:18<25:04:17,  9.97it/s]

{'loss': Array(0.14446257, dtype=float32), 'loss_cross_entropy': Array(0.13660693, dtype=float32)}
{'loss_inverse': Array(1.1950242e-07, dtype=float32)}


 10%|█         | 100150/1000000 [4:44:20<29:08:23,  8.58it/s]

{'loss': Array(0.12993388, dtype=float32), 'loss_cross_entropy': Array(0.12222505, dtype=float32)}
{'loss_inverse': Array(1.5870237e-05, dtype=float32)}


 10%|█         | 100160/1000000 [4:44:21<25:33:51,  9.78it/s]

{'loss': Array(0.14571956, dtype=float32), 'loss_cross_entropy': Array(0.13782422, dtype=float32)}
{'loss_inverse': Array(2.90136e-06, dtype=float32)}


 10%|█         | 100170/1000000 [4:44:23<30:54:48,  8.09it/s]

{'loss': Array(0.15804689, dtype=float32), 'loss_cross_entropy': Array(0.14895819, dtype=float32)}
{'loss_inverse': Array(0.00010771, dtype=float32)}


 10%|█         | 100180/1000000 [4:44:24<25:17:48,  9.88it/s]

{'loss': Array(0.13779908, dtype=float32), 'loss_cross_entropy': Array(0.13046925, dtype=float32)}
{'loss_inverse': Array(9.961355e-06, dtype=float32)}


 10%|█         | 100190/1000000 [4:44:26<24:14:29, 10.31it/s]

{'loss': Array(0.15130775, dtype=float32), 'loss_cross_entropy': Array(0.14269231, dtype=float32)}
{'loss_inverse': Array(1.8145269e-05, dtype=float32)}


 10%|█         | 100200/1000000 [4:44:28<27:36:30,  9.05it/s]

{'loss': Array(0.13820675, dtype=float32), 'loss_cross_entropy': Array(0.1304388, dtype=float32)}
{'loss_inverse': Array(5.7611455e-08, dtype=float32)}


 10%|█         | 100210/1000000 [4:44:29<25:14:02,  9.90it/s]

{'loss': Array(0.13627858, dtype=float32), 'loss_cross_entropy': Array(0.1296777, dtype=float32)}
{'loss_inverse': Array(1.693954e-07, dtype=float32)}


 10%|█         | 100220/1000000 [4:44:31<28:43:46,  8.70it/s]

{'loss': Array(0.12763076, dtype=float32), 'loss_cross_entropy': Array(0.1208233, dtype=float32)}
{'loss_inverse': Array(7.4521718e-06, dtype=float32)}


 10%|█         | 100230/1000000 [4:44:32<25:04:22,  9.97it/s]

{'loss': Array(0.14685546, dtype=float32), 'loss_cross_entropy': Array(0.13909118, dtype=float32)}
{'loss_inverse': Array(4.8688896e-08, dtype=float32)}


 10%|█         | 100240/1000000 [4:44:34<31:00:51,  8.06it/s]

{'loss': Array(0.14318213, dtype=float32), 'loss_cross_entropy': Array(0.13468267, dtype=float32)}
{'loss_inverse': Array(4.261415e-06, dtype=float32)}


 10%|█         | 100250/1000000 [4:44:35<25:26:08,  9.83it/s]

{'loss': Array(0.13874535, dtype=float32), 'loss_cross_entropy': Array(0.13086766, dtype=float32)}
{'loss_inverse': Array(2.9999023e-07, dtype=float32)}


 10%|█         | 100260/1000000 [4:44:37<25:11:53,  9.92it/s]

{'loss': Array(0.13633323, dtype=float32), 'loss_cross_entropy': Array(0.12940249, dtype=float32)}
{'loss_inverse': Array(3.843433e-06, dtype=float32)}


 10%|█         | 100270/1000000 [4:44:39<28:36:27,  8.74it/s]

{'loss': Array(0.137215, dtype=float32), 'loss_cross_entropy': Array(0.12979549, dtype=float32)}
{'loss_inverse': Array(1.5522896e-07, dtype=float32)}


 10%|█         | 100280/1000000 [4:44:40<24:48:23, 10.07it/s]

{'loss': Array(0.13739374, dtype=float32), 'loss_cross_entropy': Array(0.12974422, dtype=float32)}
{'loss_inverse': Array(1.0135809e-07, dtype=float32)}


 10%|█         | 100290/1000000 [4:44:42<30:31:00,  8.19it/s]

{'loss': Array(0.13459203, dtype=float32), 'loss_cross_entropy': Array(0.12687418, dtype=float32)}
{'loss_inverse': Array(2.1722892e-06, dtype=float32)}


 10%|█         | 100300/1000000 [4:44:43<25:55:57,  9.64it/s]

{'loss': Array(0.12261754, dtype=float32), 'loss_cross_entropy': Array(0.11559333, dtype=float32)}
{'loss_inverse': Array(4.9815714e-08, dtype=float32)}


 10%|█         | 100310/1000000 [4:44:45<33:19:06,  7.50it/s]

{'loss': Array(0.145735, dtype=float32), 'loss_cross_entropy': Array(0.13776383, dtype=float32)}
{'loss_inverse': Array(2.8230542e-07, dtype=float32)}


 10%|█         | 100320/1000000 [4:44:46<25:52:14,  9.66it/s]

{'loss': Array(0.13237108, dtype=float32), 'loss_cross_entropy': Array(0.12486664, dtype=float32)}
{'loss_inverse': Array(6.248268e-06, dtype=float32)}


 10%|█         | 100330/1000000 [4:44:48<25:08:12,  9.94it/s]

{'loss': Array(0.13144603, dtype=float32), 'loss_cross_entropy': Array(0.12417284, dtype=float32)}
{'loss_inverse': Array(6.6985945e-07, dtype=float32)}


 10%|█         | 100340/1000000 [4:44:49<28:04:42,  8.90it/s]

{'loss': Array(0.13576503, dtype=float32), 'loss_cross_entropy': Array(0.12842754, dtype=float32)}
{'loss_inverse': Array(7.633616e-05, dtype=float32)}


 10%|█         | 100350/1000000 [4:44:51<25:30:27,  9.80it/s]

{'loss': Array(0.13904075, dtype=float32), 'loss_cross_entropy': Array(0.13147284, dtype=float32)}
{'loss_inverse': Array(1.0335845e-05, dtype=float32)}


 10%|█         | 100360/1000000 [4:44:53<33:23:30,  7.48it/s]

{'loss': Array(0.1431337, dtype=float32), 'loss_cross_entropy': Array(0.13534723, dtype=float32)}
{'loss_inverse': Array(5.7258273e-08, dtype=float32)}


 10%|█         | 100370/1000000 [4:44:54<26:06:14,  9.57it/s]

{'loss': Array(0.13144252, dtype=float32), 'loss_cross_entropy': Array(0.12388282, dtype=float32)}
{'loss_inverse': Array(1.6410853e-07, dtype=float32)}


 10%|█         | 100380/1000000 [4:44:56<24:42:46, 10.11it/s]

{'loss': Array(0.1569854, dtype=float32), 'loss_cross_entropy': Array(0.14876635, dtype=float32)}
{'loss_inverse': Array(6.116189e-06, dtype=float32)}


 10%|█         | 100390/1000000 [4:44:57<27:24:45,  9.12it/s]

{'loss': Array(0.13263701, dtype=float32), 'loss_cross_entropy': Array(0.12532769, dtype=float32)}
{'loss_inverse': Array(1.34626625e-05, dtype=float32)}


 10%|█         | 100400/1000000 [4:44:59<25:22:30,  9.85it/s]

{'loss': Array(0.15960835, dtype=float32), 'loss_cross_entropy': Array(0.15139687, dtype=float32)}
{'loss_inverse': Array(3.8785612e-07, dtype=float32)}


 10%|█         | 100410/1000000 [4:45:00<28:58:20,  8.62it/s]

{'loss': Array(0.14054498, dtype=float32), 'loss_cross_entropy': Array(0.13278916, dtype=float32)}
{'loss_inverse': Array(4.6906914e-07, dtype=float32)}


 10%|█         | 100420/1000000 [4:45:02<24:58:50, 10.00it/s]

{'loss': Array(0.14866702, dtype=float32), 'loss_cross_entropy': Array(0.14046179, dtype=float32)}
{'loss_inverse': Array(4.742659e-06, dtype=float32)}


 10%|█         | 100430/1000000 [4:45:04<30:38:10,  8.16it/s]

{'loss': Array(0.1270634, dtype=float32), 'loss_cross_entropy': Array(0.11947798, dtype=float32)}
{'loss_inverse': Array(8.767759e-08, dtype=float32)}


 10%|█         | 100440/1000000 [4:45:05<25:28:49,  9.81it/s]

{'loss': Array(0.14209284, dtype=float32), 'loss_cross_entropy': Array(0.13348845, dtype=float32)}
{'loss_inverse': Array(0.00010601, dtype=float32)}


 10%|█         | 100450/1000000 [4:45:07<26:23:34,  9.47it/s]

{'loss': Array(0.13109677, dtype=float32), 'loss_cross_entropy': Array(0.12404344, dtype=float32)}
{'loss_inverse': Array(4.4901695e-05, dtype=float32)}


 10%|█         | 100459/1000000 [4:45:08<30:42:53,  8.14it/s]

{'loss': Array(0.13511735, dtype=float32), 'loss_cross_entropy': Array(0.12804848, dtype=float32)}
{'loss_inverse': Array(2.4132187e-06, dtype=float32)}


 10%|█         | 100469/1000000 [4:45:10<26:36:10,  9.39it/s]

{'loss': Array(0.1318559, dtype=float32), 'loss_cross_entropy': Array(0.12479277, dtype=float32)}
{'loss_inverse': Array(8.243013e-05, dtype=float32)}


 10%|█         | 100479/1000000 [4:45:12<35:38:23,  7.01it/s]

{'loss': Array(0.13845228, dtype=float32), 'loss_cross_entropy': Array(0.13090532, dtype=float32)}
{'loss_inverse': Array(3.3380552e-06, dtype=float32)}


 10%|█         | 100489/1000000 [4:45:13<26:36:54,  9.39it/s]

{'loss': Array(0.1461402, dtype=float32), 'loss_cross_entropy': Array(0.13819507, dtype=float32)}
{'loss_inverse': Array(2.2094346e-05, dtype=float32)}


 10%|█         | 100499/1000000 [4:45:15<37:18:20,  6.70it/s]

{'loss': Array(0.14701657, dtype=float32), 'loss_cross_entropy': Array(0.13900733, dtype=float32)}
{'loss_inverse': Array(0.00068877, dtype=float32)}


 10%|█         | 100509/1000000 [4:45:23<85:24:48,  2.93it/s] 

{'loss': Array(0.12463609, dtype=float32), 'loss_cross_entropy': Array(0.11723453, dtype=float32)}
{'loss_inverse': Array(3.6557024e-06, dtype=float32)}


 10%|█         | 100519/1000000 [4:45:24<34:57:05,  7.15it/s]

{'loss': Array(0.12603556, dtype=float32), 'loss_cross_entropy': Array(0.11877339, dtype=float32)}
{'loss_inverse': Array(6.463698e-05, dtype=float32)}


 10%|█         | 100529/1000000 [4:45:26<31:03:23,  8.05it/s]

{'loss': Array(0.14565973, dtype=float32), 'loss_cross_entropy': Array(0.13758819, dtype=float32)}
{'loss_inverse': Array(1.1804261e-06, dtype=float32)}


 10%|█         | 100539/1000000 [4:45:27<25:12:18,  9.91it/s]

{'loss': Array(0.13840768, dtype=float32), 'loss_cross_entropy': Array(0.13046573, dtype=float32)}
{'loss_inverse': Array(1.4074355e-05, dtype=float32)}


 10%|█         | 100549/1000000 [4:45:29<30:33:25,  8.18it/s]

{'loss': Array(0.13414398, dtype=float32), 'loss_cross_entropy': Array(0.12652948, dtype=float32)}
{'loss_inverse': Array(2.819143e-06, dtype=float32)}


 10%|█         | 100559/1000000 [4:45:30<25:27:49,  9.81it/s]

{'loss': Array(0.12175285, dtype=float32), 'loss_cross_entropy': Array(0.11530878, dtype=float32)}
{'loss_inverse': Array(4.654523e-05, dtype=float32)}


 10%|█         | 100569/1000000 [4:45:32<24:06:52, 10.36it/s]

{'loss': Array(0.14157097, dtype=float32), 'loss_cross_entropy': Array(0.13363492, dtype=float32)}
{'loss_inverse': Array(4.885197e-05, dtype=float32)}


 10%|█         | 100579/1000000 [4:45:34<27:42:47,  9.02it/s]

{'loss': Array(0.12390526, dtype=float32), 'loss_cross_entropy': Array(0.11686021, dtype=float32)}
{'loss_inverse': Array(0.00014307, dtype=float32)}


 10%|█         | 100589/1000000 [4:45:35<24:47:07, 10.08it/s]

{'loss': Array(0.1592878, dtype=float32), 'loss_cross_entropy': Array(0.15073888, dtype=float32)}
{'loss_inverse': Array(1.4664041e-06, dtype=float32)}


 10%|█         | 100599/1000000 [4:45:37<30:42:56,  8.13it/s]

{'loss': Array(0.14205758, dtype=float32), 'loss_cross_entropy': Array(0.134284, dtype=float32)}
{'loss_inverse': Array(5.406637e-05, dtype=float32)}


 10%|█         | 100609/1000000 [4:45:38<25:11:23,  9.92it/s]

{'loss': Array(0.13376537, dtype=float32), 'loss_cross_entropy': Array(0.12665448, dtype=float32)}
{'loss_inverse': Array(3.6348005e-08, dtype=float32)}


 10%|█         | 100619/1000000 [4:45:40<33:58:03,  7.35it/s]

{'loss': Array(0.14301407, dtype=float32), 'loss_cross_entropy': Array(0.13492988, dtype=float32)}
{'loss_inverse': Array(1.9055004e-07, dtype=float32)}


 10%|█         | 100629/1000000 [4:45:41<26:36:03,  9.39it/s]

{'loss': Array(0.13641767, dtype=float32), 'loss_cross_entropy': Array(0.1290369, dtype=float32)}
{'loss_inverse': Array(0.00023297, dtype=float32)}


 10%|█         | 100639/1000000 [4:45:43<25:55:45,  9.63it/s]

{'loss': Array(0.14767002, dtype=float32), 'loss_cross_entropy': Array(0.13983865, dtype=float32)}
{'loss_inverse': Array(2.8761426e-07, dtype=float32)}


 10%|█         | 100649/1000000 [4:45:45<28:51:06,  8.66it/s]

{'loss': Array(0.14065523, dtype=float32), 'loss_cross_entropy': Array(0.13336404, dtype=float32)}
{'loss_inverse': Array(0.00025853, dtype=float32)}


 10%|█         | 100659/1000000 [4:45:46<25:24:27,  9.83it/s]

{'loss': Array(0.14574192, dtype=float32), 'loss_cross_entropy': Array(0.1379132, dtype=float32)}
{'loss_inverse': Array(2.3298985e-06, dtype=float32)}


 10%|█         | 100669/1000000 [4:45:48<33:16:39,  7.51it/s]

{'loss': Array(0.15234815, dtype=float32), 'loss_cross_entropy': Array(0.14381407, dtype=float32)}
{'loss_inverse': Array(0.00020993, dtype=float32)}


 10%|█         | 100679/1000000 [4:45:49<26:54:57,  9.28it/s]

{'loss': Array(0.13085221, dtype=float32), 'loss_cross_entropy': Array(0.12383306, dtype=float32)}
{'loss_inverse': Array(1.3904778e-06, dtype=float32)}


 10%|█         | 100689/1000000 [4:45:51<34:04:05,  7.33it/s]

{'loss': Array(0.14366254, dtype=float32), 'loss_cross_entropy': Array(0.13614658, dtype=float32)}
{'loss_inverse': Array(1.28675165e-05, dtype=float32)}


 10%|█         | 100699/1000000 [4:45:52<26:02:30,  9.59it/s]

{'loss': Array(0.14355157, dtype=float32), 'loss_cross_entropy': Array(0.13618325, dtype=float32)}
{'loss_inverse': Array(2.3289479e-06, dtype=float32)}


 10%|█         | 100709/1000000 [4:45:54<26:55:50,  9.28it/s]

{'loss': Array(0.12593777, dtype=float32), 'loss_cross_entropy': Array(0.11872073, dtype=float32)}
{'loss_inverse': Array(0.00010236, dtype=float32)}


 10%|█         | 100719/1000000 [4:45:56<28:19:05,  8.82it/s]

{'loss': Array(0.12844054, dtype=float32), 'loss_cross_entropy': Array(0.12147435, dtype=float32)}
{'loss_inverse': Array(0.00043457, dtype=float32)}


 10%|█         | 100729/1000000 [4:45:57<25:11:30,  9.92it/s]

{'loss': Array(0.12746106, dtype=float32), 'loss_cross_entropy': Array(0.12009627, dtype=float32)}
{'loss_inverse': Array(1.2913135e-07, dtype=float32)}


 10%|█         | 100739/1000000 [4:45:59<36:53:51,  6.77it/s]

{'loss': Array(0.13533239, dtype=float32), 'loss_cross_entropy': Array(0.1275092, dtype=float32)}
{'loss_inverse': Array(5.2654973e-06, dtype=float32)}


 10%|█         | 100749/1000000 [4:46:00<26:31:27,  9.42it/s]

{'loss': Array(0.14703487, dtype=float32), 'loss_cross_entropy': Array(0.13815217, dtype=float32)}
{'loss_inverse': Array(1.3240325e-05, dtype=float32)}


 10%|█         | 100759/1000000 [4:46:02<24:52:21, 10.04it/s]

{'loss': Array(0.11195087, dtype=float32), 'loss_cross_entropy': Array(0.10537463, dtype=float32)}
{'loss_inverse': Array(9.147471e-09, dtype=float32)}


 10%|█         | 100769/1000000 [4:46:04<28:47:58,  8.67it/s]

{'loss': Array(0.14998418, dtype=float32), 'loss_cross_entropy': Array(0.14196245, dtype=float32)}
{'loss_inverse': Array(2.519193e-05, dtype=float32)}


 10%|█         | 100779/1000000 [4:46:05<25:26:18,  9.82it/s]

{'loss': Array(0.15819041, dtype=float32), 'loss_cross_entropy': Array(0.15023999, dtype=float32)}
{'loss_inverse': Array(5.332453e-09, dtype=float32)}


 10%|█         | 100789/1000000 [4:46:07<30:03:34,  8.31it/s]

{'loss': Array(0.14760242, dtype=float32), 'loss_cross_entropy': Array(0.13909028, dtype=float32)}
{'loss_inverse': Array(1.0615096e-07, dtype=float32)}


 10%|█         | 100799/1000000 [4:46:08<26:10:14,  9.54it/s]

{'loss': Array(0.12035888, dtype=float32), 'loss_cross_entropy': Array(0.11310797, dtype=float32)}
{'loss_inverse': Array(1.1543746e-06, dtype=float32)}


 10%|█         | 100809/1000000 [4:46:10<31:57:52,  7.81it/s]

{'loss': Array(0.13322131, dtype=float32), 'loss_cross_entropy': Array(0.12565105, dtype=float32)}
{'loss_inverse': Array(4.030491e-06, dtype=float32)}


 10%|█         | 100819/1000000 [4:46:12<26:39:16,  9.37it/s]

{'loss': Array(0.1554486, dtype=float32), 'loss_cross_entropy': Array(0.14757995, dtype=float32)}
{'loss_inverse': Array(1.7793105e-06, dtype=float32)}


 10%|█         | 100829/1000000 [4:46:13<25:11:58,  9.91it/s]

{'loss': Array(0.15147711, dtype=float32), 'loss_cross_entropy': Array(0.1432207, dtype=float32)}
{'loss_inverse': Array(6.9328956e-07, dtype=float32)}


 10%|█         | 100839/1000000 [4:46:15<29:25:04,  8.49it/s]

{'loss': Array(0.14373814, dtype=float32), 'loss_cross_entropy': Array(0.13651046, dtype=float32)}
{'loss_inverse': Array(6.436464e-07, dtype=float32)}


 10%|█         | 100849/1000000 [4:46:16<25:16:21,  9.88it/s]

{'loss': Array(0.13730095, dtype=float32), 'loss_cross_entropy': Array(0.12937398, dtype=float32)}
{'loss_inverse': Array(6.085874e-08, dtype=float32)}


 10%|█         | 100859/1000000 [4:46:18<33:53:47,  7.37it/s]

{'loss': Array(0.14761876, dtype=float32), 'loss_cross_entropy': Array(0.14002201, dtype=float32)}
{'loss_inverse': Array(4.495535e-06, dtype=float32)}


 10%|█         | 100869/1000000 [4:46:19<26:54:32,  9.28it/s]

{'loss': Array(0.12860325, dtype=float32), 'loss_cross_entropy': Array(0.12120368, dtype=float32)}
{'loss_inverse': Array(3.6603464e-07, dtype=float32)}


 10%|█         | 100879/1000000 [4:46:21<37:20:38,  6.69it/s]

{'loss': Array(0.14552893, dtype=float32), 'loss_cross_entropy': Array(0.13814425, dtype=float32)}
{'loss_inverse': Array(6.53331e-05, dtype=float32)}


 10%|█         | 100889/1000000 [4:46:23<26:40:00,  9.37it/s]

{'loss': Array(0.13489176, dtype=float32), 'loss_cross_entropy': Array(0.12704714, dtype=float32)}
{'loss_inverse': Array(1.0196151e-07, dtype=float32)}


 10%|█         | 100899/1000000 [4:46:24<37:35:24,  6.64it/s]

{'loss': Array(0.13580054, dtype=float32), 'loss_cross_entropy': Array(0.12827311, dtype=float32)}
{'loss_inverse': Array(8.078982e-06, dtype=float32)}


 10%|█         | 100909/1000000 [4:46:26<27:30:23,  9.08it/s]

{'loss': Array(0.12553966, dtype=float32), 'loss_cross_entropy': Array(0.11853444, dtype=float32)}
{'loss_inverse': Array(1.6804761e-06, dtype=float32)}


 10%|█         | 100919/1000000 [4:46:27<25:17:26,  9.87it/s]

{'loss': Array(0.13409522, dtype=float32), 'loss_cross_entropy': Array(0.12696968, dtype=float32)}
{'loss_inverse': Array(0.00015237, dtype=float32)}


 10%|█         | 100929/1000000 [4:46:29<34:11:35,  7.30it/s]

{'loss': Array(0.13277023, dtype=float32), 'loss_cross_entropy': Array(0.12532184, dtype=float32)}
{'loss_inverse': Array(0.0001954, dtype=float32)}


 10%|█         | 100939/1000000 [4:46:31<26:01:31,  9.60it/s]

{'loss': Array(0.14194773, dtype=float32), 'loss_cross_entropy': Array(0.1338411, dtype=float32)}
{'loss_inverse': Array(8.747991e-08, dtype=float32)}


 10%|█         | 100949/1000000 [4:46:32<37:07:09,  6.73it/s]

{'loss': Array(0.13569275, dtype=float32), 'loss_cross_entropy': Array(0.1281784, dtype=float32)}
{'loss_inverse': Array(3.854041e-05, dtype=float32)}


 10%|█         | 100959/1000000 [4:46:34<26:45:08,  9.34it/s]

{'loss': Array(0.14764343, dtype=float32), 'loss_cross_entropy': Array(0.14033502, dtype=float32)}
{'loss_inverse': Array(4.261509e-06, dtype=float32)}


 10%|█         | 100969/1000000 [4:46:35<24:34:48, 10.16it/s]

{'loss': Array(0.14464065, dtype=float32), 'loss_cross_entropy': Array(0.13669547, dtype=float32)}
{'loss_inverse': Array(4.104321e-06, dtype=float32)}


 10%|█         | 100979/1000000 [4:46:37<27:34:04,  9.06it/s]

{'loss': Array(0.14525127, dtype=float32), 'loss_cross_entropy': Array(0.13759588, dtype=float32)}
{'loss_inverse': Array(1.831668e-05, dtype=float32)}


 10%|█         | 100989/1000000 [4:46:38<24:51:45, 10.04it/s]

{'loss': Array(0.15069003, dtype=float32), 'loss_cross_entropy': Array(0.14328772, dtype=float32)}
{'loss_inverse': Array(9.746254e-05, dtype=float32)}


 10%|█         | 100999/1000000 [4:46:40<33:41:29,  7.41it/s]

{'loss': Array(0.13066116, dtype=float32), 'loss_cross_entropy': Array(0.12375209, dtype=float32)}
{'loss_inverse': Array(3.050527e-07, dtype=float32)}


 10%|█         | 101009/1000000 [4:46:48<82:56:28,  3.01it/s] 

{'loss': Array(0.13696592, dtype=float32), 'loss_cross_entropy': Array(0.1295261, dtype=float32)}
{'loss_inverse': Array(3.5607474e-07, dtype=float32)}


 10%|█         | 101019/1000000 [4:46:50<41:18:31,  6.05it/s]

{'loss': Array(0.14089791, dtype=float32), 'loss_cross_entropy': Array(0.13325706, dtype=float32)}
{'loss_inverse': Array(2.2605354e-08, dtype=float32)}


 10%|█         | 101029/1000000 [4:46:51<27:34:23,  9.06it/s]

{'loss': Array(0.14002712, dtype=float32), 'loss_cross_entropy': Array(0.13236044, dtype=float32)}
{'loss_inverse': Array(9.08046e-06, dtype=float32)}


 10%|█         | 101039/1000000 [4:46:53<37:27:10,  6.67it/s]

{'loss': Array(0.15354194, dtype=float32), 'loss_cross_entropy': Array(0.14551456, dtype=float32)}
{'loss_inverse': Array(0.00011512, dtype=float32)}


 10%|█         | 101049/1000000 [4:46:54<26:23:05,  9.46it/s]

{'loss': Array(0.15766759, dtype=float32), 'loss_cross_entropy': Array(0.14960514, dtype=float32)}
{'loss_inverse': Array(0.00010014, dtype=float32)}


 10%|█         | 101059/1000000 [4:46:56<38:06:07,  6.55it/s]

{'loss': Array(0.14596999, dtype=float32), 'loss_cross_entropy': Array(0.13821822, dtype=float32)}
{'loss_inverse': Array(2.1872702e-05, dtype=float32)}


 10%|█         | 101069/1000000 [4:46:58<28:04:13,  8.90it/s]

{'loss': Array(0.13190906, dtype=float32), 'loss_cross_entropy': Array(0.12445524, dtype=float32)}
{'loss_inverse': Array(4.1139792e-05, dtype=float32)}


 10%|█         | 101079/1000000 [4:46:59<24:55:52, 10.02it/s]

{'loss': Array(0.13763736, dtype=float32), 'loss_cross_entropy': Array(0.12994611, dtype=float32)}
{'loss_inverse': Array(1.7070539e-05, dtype=float32)}


 10%|█         | 101089/1000000 [4:47:01<33:00:12,  7.57it/s]

{'loss': Array(0.14337932, dtype=float32), 'loss_cross_entropy': Array(0.13510163, dtype=float32)}
{'loss_inverse': Array(9.918376e-07, dtype=float32)}


 10%|█         | 101099/1000000 [4:47:02<25:57:52,  9.62it/s]

{'loss': Array(0.13269146, dtype=float32), 'loss_cross_entropy': Array(0.12517537, dtype=float32)}
{'loss_inverse': Array(5.656072e-08, dtype=float32)}


 10%|█         | 101109/1000000 [4:47:04<24:44:27, 10.09it/s]

{'loss': Array(0.13631509, dtype=float32), 'loss_cross_entropy': Array(0.12869053, dtype=float32)}
{'loss_inverse': Array(1.214776e-05, dtype=float32)}


 10%|█         | 101119/1000000 [4:47:05<27:50:44,  8.97it/s]

{'loss': Array(0.11000475, dtype=float32), 'loss_cross_entropy': Array(0.10339057, dtype=float32)}
{'loss_inverse': Array(0.00013078, dtype=float32)}


 10%|█         | 101129/1000000 [4:47:07<26:07:09,  9.56it/s]

{'loss': Array(0.14226015, dtype=float32), 'loss_cross_entropy': Array(0.13406049, dtype=float32)}
{'loss_inverse': Array(0.00010916, dtype=float32)}


 10%|█         | 101139/1000000 [4:47:09<31:26:42,  7.94it/s]

{'loss': Array(0.13910682, dtype=float32), 'loss_cross_entropy': Array(0.13185845, dtype=float32)}
{'loss_inverse': Array(1.15108996e-07, dtype=float32)}


 10%|█         | 101149/1000000 [4:47:10<27:31:43,  9.07it/s]

{'loss': Array(0.13700353, dtype=float32), 'loss_cross_entropy': Array(0.12978373, dtype=float32)}
{'loss_inverse': Array(4.861562e-05, dtype=float32)}


 10%|█         | 101159/1000000 [4:47:12<33:49:07,  7.38it/s]

{'loss': Array(0.13333791, dtype=float32), 'loss_cross_entropy': Array(0.12593102, dtype=float32)}
{'loss_inverse': Array(4.0218332e-05, dtype=float32)}


 10%|█         | 101169/1000000 [4:47:13<26:29:18,  9.43it/s]

{'loss': Array(0.15223555, dtype=float32), 'loss_cross_entropy': Array(0.1440774, dtype=float32)}
{'loss_inverse': Array(1.7201744e-07, dtype=float32)}


 10%|█         | 101179/1000000 [4:47:15<34:24:21,  7.26it/s]

{'loss': Array(0.14942892, dtype=float32), 'loss_cross_entropy': Array(0.14166108, dtype=float32)}
{'loss_inverse': Array(0.00020494, dtype=float32)}


 10%|█         | 101189/1000000 [4:47:16<26:25:24,  9.45it/s]

{'loss': Array(0.12439348, dtype=float32), 'loss_cross_entropy': Array(0.11733337, dtype=float32)}
{'loss_inverse': Array(6.6759276e-06, dtype=float32)}


 10%|█         | 101199/1000000 [4:47:18<24:51:02, 10.05it/s]

{'loss': Array(0.12088776, dtype=float32), 'loss_cross_entropy': Array(0.11436947, dtype=float32)}
{'loss_inverse': Array(2.2566819e-07, dtype=float32)}


 10%|█         | 101209/1000000 [4:47:20<30:37:47,  8.15it/s]

{'loss': Array(0.15113872, dtype=float32), 'loss_cross_entropy': Array(0.14263155, dtype=float32)}
{'loss_inverse': Array(1.7821229e-05, dtype=float32)}


 10%|█         | 101219/1000000 [4:47:21<25:27:22,  9.81it/s]

{'loss': Array(0.12822334, dtype=float32), 'loss_cross_entropy': Array(0.12097786, dtype=float32)}
{'loss_inverse': Array(3.1165783e-07, dtype=float32)}


 10%|█         | 101229/1000000 [4:47:23<33:33:05,  7.44it/s]

{'loss': Array(0.11790638, dtype=float32), 'loss_cross_entropy': Array(0.11106508, dtype=float32)}
{'loss_inverse': Array(4.4854273e-06, dtype=float32)}


 10%|█         | 101239/1000000 [4:47:24<25:59:34,  9.60it/s]

{'loss': Array(0.13265322, dtype=float32), 'loss_cross_entropy': Array(0.12555121, dtype=float32)}
{'loss_inverse': Array(2.5381165e-05, dtype=float32)}


 10%|█         | 101249/1000000 [4:47:26<37:07:45,  6.72it/s]

{'loss': Array(0.1559006, dtype=float32), 'loss_cross_entropy': Array(0.14762203, dtype=float32)}
{'loss_inverse': Array(2.5054078e-07, dtype=float32)}


 10%|█         | 101259/1000000 [4:47:27<26:28:48,  9.43it/s]

{'loss': Array(0.13235489, dtype=float32), 'loss_cross_entropy': Array(0.1252569, dtype=float32)}
{'loss_inverse': Array(3.0985216e-06, dtype=float32)}


 10%|█         | 101269/1000000 [4:47:29<24:58:43,  9.99it/s]

{'loss': Array(0.134961, dtype=float32), 'loss_cross_entropy': Array(0.1273142, dtype=float32)}
{'loss_inverse': Array(3.956487e-07, dtype=float32)}


 10%|█         | 101279/1000000 [4:47:31<28:35:47,  8.73it/s]

{'loss': Array(0.12599377, dtype=float32), 'loss_cross_entropy': Array(0.11878394, dtype=float32)}
{'loss_inverse': Array(2.4680863e-05, dtype=float32)}


 10%|█         | 101289/1000000 [4:47:32<25:03:04,  9.97it/s]

{'loss': Array(0.14830172, dtype=float32), 'loss_cross_entropy': Array(0.14007394, dtype=float32)}
{'loss_inverse': Array(0.00026115, dtype=float32)}


 10%|█         | 101299/1000000 [4:47:34<33:44:30,  7.40it/s]

{'loss': Array(0.16286129, dtype=float32), 'loss_cross_entropy': Array(0.1540583, dtype=float32)}
{'loss_inverse': Array(6.608007e-08, dtype=float32)}


 10%|█         | 101309/1000000 [4:47:35<25:51:56,  9.65it/s]

{'loss': Array(0.141459, dtype=float32), 'loss_cross_entropy': Array(0.13349369, dtype=float32)}
{'loss_inverse': Array(1.5008828e-07, dtype=float32)}


 10%|█         | 101319/1000000 [4:47:37<24:51:40, 10.04it/s]

{'loss': Array(0.14289483, dtype=float32), 'loss_cross_entropy': Array(0.13522044, dtype=float32)}
{'loss_inverse': Array(7.305219e-07, dtype=float32)}


 10%|█         | 101329/1000000 [4:47:38<27:10:14,  9.19it/s]

{'loss': Array(0.14034736, dtype=float32), 'loss_cross_entropy': Array(0.13262051, dtype=float32)}
{'loss_inverse': Array(2.7626362e-07, dtype=float32)}


 10%|█         | 101339/1000000 [4:47:40<24:59:01,  9.99it/s]

{'loss': Array(0.13726752, dtype=float32), 'loss_cross_entropy': Array(0.12931183, dtype=float32)}
{'loss_inverse': Array(4.0779614e-07, dtype=float32)}


 10%|█         | 101349/1000000 [4:47:41<27:13:51,  9.17it/s]

{'loss': Array(0.14355241, dtype=float32), 'loss_cross_entropy': Array(0.13449791, dtype=float32)}
{'loss_inverse': Array(1.2719909e-06, dtype=float32)}


 10%|█         | 101359/1000000 [4:47:43<25:11:48,  9.91it/s]

{'loss': Array(0.14859423, dtype=float32), 'loss_cross_entropy': Array(0.1407402, dtype=float32)}
{'loss_inverse': Array(1.9663594e-05, dtype=float32)}


 10%|█         | 101369/1000000 [4:47:45<27:39:57,  9.02it/s]

{'loss': Array(0.13931473, dtype=float32), 'loss_cross_entropy': Array(0.13185245, dtype=float32)}
{'loss_inverse': Array(1.5388272e-06, dtype=float32)}


 10%|█         | 101379/1000000 [4:47:46<24:46:58, 10.07it/s]

{'loss': Array(0.15107922, dtype=float32), 'loss_cross_entropy': Array(0.14274769, dtype=float32)}
{'loss_inverse': Array(0.00016407, dtype=float32)}


 10%|█         | 101389/1000000 [4:47:48<28:51:38,  8.65it/s]

{'loss': Array(0.15483153, dtype=float32), 'loss_cross_entropy': Array(0.14690521, dtype=float32)}
{'loss_inverse': Array(3.5363433e-07, dtype=float32)}


 10%|█         | 101399/1000000 [4:47:49<25:15:51,  9.88it/s]

{'loss': Array(0.13772987, dtype=float32), 'loss_cross_entropy': Array(0.12965839, dtype=float32)}
{'loss_inverse': Array(8.952967e-06, dtype=float32)}


 10%|█         | 101409/1000000 [4:47:51<30:32:08,  8.17it/s]

{'loss': Array(0.13802086, dtype=float32), 'loss_cross_entropy': Array(0.13031232, dtype=float32)}
{'loss_inverse': Array(1.0787163e-06, dtype=float32)}


 10%|█         | 101419/1000000 [4:47:52<25:12:04,  9.90it/s]

{'loss': Array(0.12538616, dtype=float32), 'loss_cross_entropy': Array(0.11839348, dtype=float32)}
{'loss_inverse': Array(9.222287e-05, dtype=float32)}


 10%|█         | 101429/1000000 [4:47:54<33:14:58,  7.51it/s]

{'loss': Array(0.14096783, dtype=float32), 'loss_cross_entropy': Array(0.13330722, dtype=float32)}
{'loss_inverse': Array(1.7174452e-06, dtype=float32)}


 10%|█         | 101439/1000000 [4:47:56<26:46:07,  9.32it/s]

{'loss': Array(0.1425589, dtype=float32), 'loss_cross_entropy': Array(0.13466614, dtype=float32)}
{'loss_inverse': Array(2.236796e-05, dtype=float32)}


 10%|█         | 101449/1000000 [4:47:57<24:32:30, 10.17it/s]

{'loss': Array(0.14597604, dtype=float32), 'loss_cross_entropy': Array(0.13760756, dtype=float32)}
{'loss_inverse': Array(1.072062e-05, dtype=float32)}


 10%|█         | 101459/1000000 [4:47:59<29:02:51,  8.59it/s]

{'loss': Array(0.14725477, dtype=float32), 'loss_cross_entropy': Array(0.1392875, dtype=float32)}
{'loss_inverse': Array(2.8547646e-08, dtype=float32)}


 10%|█         | 101469/1000000 [4:48:00<26:08:08,  9.55it/s]

{'loss': Array(0.11874502, dtype=float32), 'loss_cross_entropy': Array(0.11166199, dtype=float32)}
{'loss_inverse': Array(2.5548634e-06, dtype=float32)}


 10%|█         | 101479/1000000 [4:48:02<33:34:57,  7.43it/s]

{'loss': Array(0.1329292, dtype=float32), 'loss_cross_entropy': Array(0.12530227, dtype=float32)}
{'loss_inverse': Array(1.6911177e-05, dtype=float32)}


 10%|█         | 101489/1000000 [4:48:03<25:31:50,  9.78it/s]

{'loss': Array(0.1248887, dtype=float32), 'loss_cross_entropy': Array(0.11751187, dtype=float32)}
{'loss_inverse': Array(6.182935e-08, dtype=float32)}


 10%|█         | 101499/1000000 [4:48:05<33:31:08,  7.45it/s]

{'loss': Array(0.12385561, dtype=float32), 'loss_cross_entropy': Array(0.11665042, dtype=float32)}
{'loss_inverse': Array(2.5405456e-07, dtype=float32)}


 10%|█         | 101509/1000000 [4:48:13<86:43:11,  2.88it/s] 

{'loss': Array(0.13119589, dtype=float32), 'loss_cross_entropy': Array(0.12391653, dtype=float32)}
{'loss_inverse': Array(1.6008798e-06, dtype=float32)}


 10%|█         | 101519/1000000 [4:48:15<44:00:02,  5.67it/s]

{'loss': Array(0.12503418, dtype=float32), 'loss_cross_entropy': Array(0.1175132, dtype=float32)}
{'loss_inverse': Array(4.148404e-06, dtype=float32)}


 10%|█         | 101529/1000000 [4:48:16<27:23:04,  9.11it/s]

{'loss': Array(0.13727553, dtype=float32), 'loss_cross_entropy': Array(0.12868573, dtype=float32)}
{'loss_inverse': Array(3.987306e-05, dtype=float32)}


 10%|█         | 101539/1000000 [4:48:18<24:45:43, 10.08it/s]

{'loss': Array(0.13622157, dtype=float32), 'loss_cross_entropy': Array(0.12881266, dtype=float32)}
{'loss_inverse': Array(4.305535e-06, dtype=float32)}


 10%|█         | 101549/1000000 [4:48:20<30:26:23,  8.20it/s]

{'loss': Array(0.15081851, dtype=float32), 'loss_cross_entropy': Array(0.14298598, dtype=float32)}
{'loss_inverse': Array(1.7409887e-07, dtype=float32)}


 10%|█         | 101559/1000000 [4:48:21<25:11:52,  9.90it/s]

{'loss': Array(0.1257488, dtype=float32), 'loss_cross_entropy': Array(0.1184127, dtype=float32)}
{'loss_inverse': Array(1.1377282e-06, dtype=float32)}


 10%|█         | 101569/1000000 [4:48:23<37:18:25,  6.69it/s]

{'loss': Array(0.16071048, dtype=float32), 'loss_cross_entropy': Array(0.15215911, dtype=float32)}
{'loss_inverse': Array(4.230487e-06, dtype=float32)}


 10%|█         | 101579/1000000 [4:48:24<26:10:30,  9.53it/s]

{'loss': Array(0.14995618, dtype=float32), 'loss_cross_entropy': Array(0.14179282, dtype=float32)}
{'loss_inverse': Array(4.156559e-05, dtype=float32)}


 10%|█         | 101589/1000000 [4:48:25<24:45:27, 10.08it/s]

{'loss': Array(0.15076919, dtype=float32), 'loss_cross_entropy': Array(0.14266421, dtype=float32)}
{'loss_inverse': Array(4.5508128e-05, dtype=float32)}


 10%|█         | 101599/1000000 [4:48:27<27:39:31,  9.02it/s]

{'loss': Array(0.1551043, dtype=float32), 'loss_cross_entropy': Array(0.14684741, dtype=float32)}
{'loss_inverse': Array(7.420835e-05, dtype=float32)}


 10%|█         | 101609/1000000 [4:48:29<25:34:15,  9.76it/s]

{'loss': Array(0.15073861, dtype=float32), 'loss_cross_entropy': Array(0.14267968, dtype=float32)}
{'loss_inverse': Array(9.641826e-05, dtype=float32)}


 10%|█         | 101619/1000000 [4:48:30<27:32:12,  9.06it/s]

{'loss': Array(0.11779109, dtype=float32), 'loss_cross_entropy': Array(0.11166295, dtype=float32)}
{'loss_inverse': Array(1.1017045e-05, dtype=float32)}


 10%|█         | 101629/1000000 [4:48:32<24:56:48, 10.00it/s]

{'loss': Array(0.14215589, dtype=float32), 'loss_cross_entropy': Array(0.13387293, dtype=float32)}
{'loss_inverse': Array(3.866106e-05, dtype=float32)}


 10%|█         | 101639/1000000 [4:48:34<36:53:59,  6.76it/s]

{'loss': Array(0.13274343, dtype=float32), 'loss_cross_entropy': Array(0.12459536, dtype=float32)}
{'loss_inverse': Array(8.0568617e-07, dtype=float32)}


 10%|█         | 101649/1000000 [4:48:35<26:29:29,  9.42it/s]

{'loss': Array(0.11808014, dtype=float32), 'loss_cross_entropy': Array(0.11131272, dtype=float32)}
{'loss_inverse': Array(1.4003047e-06, dtype=float32)}


 10%|█         | 101659/1000000 [4:48:36<24:50:18, 10.05it/s]

{'loss': Array(0.14478119, dtype=float32), 'loss_cross_entropy': Array(0.13699272, dtype=float32)}
{'loss_inverse': Array(3.5647234e-05, dtype=float32)}


 10%|█         | 101669/1000000 [4:48:38<27:10:38,  9.18it/s]

{'loss': Array(0.15449981, dtype=float32), 'loss_cross_entropy': Array(0.14679341, dtype=float32)}
{'loss_inverse': Array(0.00030556, dtype=float32)}


 10%|█         | 101679/1000000 [4:48:39<24:38:57, 10.12it/s]

{'loss': Array(0.1330122, dtype=float32), 'loss_cross_entropy': Array(0.12631635, dtype=float32)}
{'loss_inverse': Array(6.51694e-06, dtype=float32)}


 10%|█         | 101689/1000000 [4:48:41<28:37:26,  8.72it/s]

{'loss': Array(0.12946354, dtype=float32), 'loss_cross_entropy': Array(0.12206691, dtype=float32)}
{'loss_inverse': Array(1.4318887e-05, dtype=float32)}


 10%|█         | 101699/1000000 [4:48:43<25:32:14,  9.77it/s]

{'loss': Array(0.12595843, dtype=float32), 'loss_cross_entropy': Array(0.11862302, dtype=float32)}
{'loss_inverse': Array(4.5513075e-06, dtype=float32)}


 10%|█         | 101709/1000000 [4:48:44<31:09:15,  8.01it/s]

{'loss': Array(0.10434746, dtype=float32), 'loss_cross_entropy': Array(0.0978672, dtype=float32)}
{'loss_inverse': Array(3.6364735e-08, dtype=float32)}


 10%|█         | 101719/1000000 [4:48:46<25:32:51,  9.77it/s]

{'loss': Array(0.13982284, dtype=float32), 'loss_cross_entropy': Array(0.13227683, dtype=float32)}
{'loss_inverse': Array(2.6327955e-06, dtype=float32)}


 10%|█         | 101729/1000000 [4:48:47<24:47:23, 10.07it/s]

{'loss': Array(0.12938058, dtype=float32), 'loss_cross_entropy': Array(0.12184453, dtype=float32)}
{'loss_inverse': Array(6.622478e-08, dtype=float32)}


 10%|█         | 101739/1000000 [4:48:49<27:59:46,  8.91it/s]

{'loss': Array(0.15539448, dtype=float32), 'loss_cross_entropy': Array(0.1474764, dtype=float32)}
{'loss_inverse': Array(5.760318e-07, dtype=float32)}


 10%|█         | 101749/1000000 [4:48:50<25:03:48,  9.96it/s]

{'loss': Array(0.1528136, dtype=float32), 'loss_cross_entropy': Array(0.1446226, dtype=float32)}
{'loss_inverse': Array(3.859731e-06, dtype=float32)}


 10%|█         | 101759/1000000 [4:48:52<28:36:27,  8.72it/s]

{'loss': Array(0.13860081, dtype=float32), 'loss_cross_entropy': Array(0.13140364, dtype=float32)}
{'loss_inverse': Array(3.698417e-07, dtype=float32)}


 10%|█         | 101769/1000000 [4:48:54<25:13:26,  9.89it/s]

{'loss': Array(0.11666813, dtype=float32), 'loss_cross_entropy': Array(0.10977121, dtype=float32)}
{'loss_inverse': Array(4.225599e-08, dtype=float32)}


 10%|█         | 101779/1000000 [4:48:55<31:20:01,  7.96it/s]

{'loss': Array(0.14039491, dtype=float32), 'loss_cross_entropy': Array(0.1327272, dtype=float32)}
{'loss_inverse': Array(5.9079434e-06, dtype=float32)}


 10%|█         | 101789/1000000 [4:48:57<25:51:04,  9.65it/s]

{'loss': Array(0.12535454, dtype=float32), 'loss_cross_entropy': Array(0.11842611, dtype=float32)}
{'loss_inverse': Array(4.260292e-07, dtype=float32)}


 10%|█         | 101799/1000000 [4:48:59<31:17:46,  7.97it/s]

{'loss': Array(0.13058482, dtype=float32), 'loss_cross_entropy': Array(0.12343098, dtype=float32)}
{'loss_inverse': Array(1.8436072e-07, dtype=float32)}


 10%|█         | 101809/1000000 [4:49:00<25:54:12,  9.63it/s]

{'loss': Array(0.14713362, dtype=float32), 'loss_cross_entropy': Array(0.13926624, dtype=float32)}
{'loss_inverse': Array(5.0746687e-05, dtype=float32)}


 10%|█         | 101819/1000000 [4:49:01<24:50:29, 10.04it/s]

{'loss': Array(0.15644307, dtype=float32), 'loss_cross_entropy': Array(0.14909475, dtype=float32)}
{'loss_inverse': Array(9.434685e-08, dtype=float32)}


 10%|█         | 101829/1000000 [4:49:03<27:43:32,  9.00it/s]

{'loss': Array(0.14170592, dtype=float32), 'loss_cross_entropy': Array(0.13408977, dtype=float32)}
{'loss_inverse': Array(4.2360907e-06, dtype=float32)}


 10%|█         | 101839/1000000 [4:49:05<26:51:40,  9.29it/s]

{'loss': Array(0.13550559, dtype=float32), 'loss_cross_entropy': Array(0.1277226, dtype=float32)}
{'loss_inverse': Array(0.00010538, dtype=float32)}


 10%|█         | 101849/1000000 [4:49:07<32:14:41,  7.74it/s]

{'loss': Array(0.1173928, dtype=float32), 'loss_cross_entropy': Array(0.11060648, dtype=float32)}
{'loss_inverse': Array(4.0451632e-05, dtype=float32)}


 10%|█         | 101859/1000000 [4:49:08<26:50:41,  9.29it/s]

{'loss': Array(0.13334091, dtype=float32), 'loss_cross_entropy': Array(0.12573737, dtype=float32)}
{'loss_inverse': Array(6.623229e-08, dtype=float32)}


 10%|█         | 101869/1000000 [4:49:10<34:12:56,  7.29it/s]

{'loss': Array(0.12968513, dtype=float32), 'loss_cross_entropy': Array(0.1222175, dtype=float32)}
{'loss_inverse': Array(3.7491506e-07, dtype=float32)}


 10%|█         | 101879/1000000 [4:49:11<26:57:22,  9.25it/s]

{'loss': Array(0.14170456, dtype=float32), 'loss_cross_entropy': Array(0.13421005, dtype=float32)}
{'loss_inverse': Array(4.6963993e-07, dtype=float32)}


 10%|█         | 101889/1000000 [4:49:13<25:28:55,  9.79it/s]

{'loss': Array(0.12825052, dtype=float32), 'loss_cross_entropy': Array(0.12064178, dtype=float32)}
{'loss_inverse': Array(4.921732e-05, dtype=float32)}


 10%|█         | 101899/1000000 [4:49:15<28:43:02,  8.69it/s]

{'loss': Array(0.13775867, dtype=float32), 'loss_cross_entropy': Array(0.13026313, dtype=float32)}
{'loss_inverse': Array(6.2480817e-06, dtype=float32)}


 10%|█         | 101909/1000000 [4:49:16<25:06:13,  9.94it/s]

{'loss': Array(0.13837126, dtype=float32), 'loss_cross_entropy': Array(0.13109162, dtype=float32)}
{'loss_inverse': Array(1.1500582e-06, dtype=float32)}


 10%|█         | 101919/1000000 [4:49:18<32:51:31,  7.59it/s]

{'loss': Array(0.12358804, dtype=float32), 'loss_cross_entropy': Array(0.11695381, dtype=float32)}
{'loss_inverse': Array(1.158578e-06, dtype=float32)}


 10%|█         | 101929/1000000 [4:49:19<25:56:01,  9.62it/s]

{'loss': Array(0.11480925, dtype=float32), 'loss_cross_entropy': Array(0.1079974, dtype=float32)}
{'loss_inverse': Array(0.00014841, dtype=float32)}


 10%|█         | 101939/1000000 [4:49:21<33:17:21,  7.49it/s]

{'loss': Array(0.13741966, dtype=float32), 'loss_cross_entropy': Array(0.13020805, dtype=float32)}
{'loss_inverse': Array(3.7277543e-05, dtype=float32)}


 10%|█         | 101949/1000000 [4:49:22<25:55:40,  9.62it/s]

{'loss': Array(0.15542524, dtype=float32), 'loss_cross_entropy': Array(0.14718536, dtype=float32)}
{'loss_inverse': Array(3.07249e-08, dtype=float32)}


 10%|█         | 101959/1000000 [4:49:24<25:07:39,  9.93it/s]

{'loss': Array(0.1392541, dtype=float32), 'loss_cross_entropy': Array(0.13187338, dtype=float32)}
{'loss_inverse': Array(1.7853201e-05, dtype=float32)}


 10%|█         | 101969/1000000 [4:49:25<27:26:16,  9.09it/s]

{'loss': Array(0.14861935, dtype=float32), 'loss_cross_entropy': Array(0.14085858, dtype=float32)}
{'loss_inverse': Array(0.00017911, dtype=float32)}


 10%|█         | 101979/1000000 [4:49:27<24:41:01, 10.11it/s]

{'loss': Array(0.13889846, dtype=float32), 'loss_cross_entropy': Array(0.13140517, dtype=float32)}
{'loss_inverse': Array(1.6308239e-05, dtype=float32)}


 10%|█         | 101989/1000000 [4:49:29<36:42:45,  6.79it/s]

{'loss': Array(0.13480146, dtype=float32), 'loss_cross_entropy': Array(0.12708327, dtype=float32)}
{'loss_inverse': Array(2.3053182e-05, dtype=float32)}


 10%|█         | 101999/1000000 [4:49:30<27:10:42,  9.18it/s]

{'loss': Array(0.15133117, dtype=float32), 'loss_cross_entropy': Array(0.14289884, dtype=float32)}
{'loss_inverse': Array(5.9673766e-05, dtype=float32)}


 10%|█         | 102009/1000000 [4:49:38<86:39:49,  2.88it/s] 

{'loss': Array(0.1185483, dtype=float32), 'loss_cross_entropy': Array(0.11126406, dtype=float32)}
{'loss_inverse': Array(0.00027881, dtype=float32)}


 10%|█         | 102019/1000000 [4:49:40<39:04:39,  6.38it/s] 

{'loss': Array(0.14700346, dtype=float32), 'loss_cross_entropy': Array(0.13844916, dtype=float32)}
{'loss_inverse': Array(0.00026628, dtype=float32)}


 10%|█         | 102029/1000000 [4:49:42<26:36:36,  9.37it/s]

{'loss': Array(0.14053325, dtype=float32), 'loss_cross_entropy': Array(0.13310592, dtype=float32)}
{'loss_inverse': Array(0.00027131, dtype=float32)}


 10%|█         | 102039/1000000 [4:49:43<28:48:08,  8.66it/s]

{'loss': Array(0.13520458, dtype=float32), 'loss_cross_entropy': Array(0.12790291, dtype=float32)}
{'loss_inverse': Array(0.00014127, dtype=float32)}


 10%|█         | 102049/1000000 [4:49:45<25:57:17,  9.61it/s]

{'loss': Array(0.11842288, dtype=float32), 'loss_cross_entropy': Array(0.11135512, dtype=float32)}
{'loss_inverse': Array(0.00011796, dtype=float32)}


 10%|█         | 102059/1000000 [4:49:47<31:52:14,  7.83it/s]

{'loss': Array(0.1582073, dtype=float32), 'loss_cross_entropy': Array(0.14990012, dtype=float32)}
{'loss_inverse': Array(7.2070965e-05, dtype=float32)}


 10%|█         | 102069/1000000 [4:49:48<25:18:07,  9.86it/s]

{'loss': Array(0.14701037, dtype=float32), 'loss_cross_entropy': Array(0.13908833, dtype=float32)}
{'loss_inverse': Array(7.244972e-05, dtype=float32)}


 10%|█         | 102079/1000000 [4:49:49<24:30:29, 10.18it/s]

{'loss': Array(0.14066713, dtype=float32), 'loss_cross_entropy': Array(0.1324868, dtype=float32)}
{'loss_inverse': Array(0.00012166, dtype=float32)}


 10%|█         | 102089/1000000 [4:49:51<27:15:30,  9.15it/s]

{'loss': Array(0.14254141, dtype=float32), 'loss_cross_entropy': Array(0.13506967, dtype=float32)}
{'loss_inverse': Array(1.4978342e-07, dtype=float32)}


 10%|█         | 102099/1000000 [4:49:52<24:37:39, 10.13it/s]

{'loss': Array(0.15194558, dtype=float32), 'loss_cross_entropy': Array(0.14358939, dtype=float32)}
{'loss_inverse': Array(2.854961e-06, dtype=float32)}


 10%|█         | 102109/1000000 [4:49:54<28:52:30,  8.64it/s]

{'loss': Array(0.11373891, dtype=float32), 'loss_cross_entropy': Array(0.10730159, dtype=float32)}
{'loss_inverse': Array(0.00036279, dtype=float32)}


 10%|█         | 102119/1000000 [4:49:56<24:51:55, 10.03it/s]

{'loss': Array(0.13305461, dtype=float32), 'loss_cross_entropy': Array(0.12576811, dtype=float32)}
{'loss_inverse': Array(0.00012214, dtype=float32)}


 10%|█         | 102129/1000000 [4:49:57<30:35:38,  8.15it/s]

{'loss': Array(0.13750039, dtype=float32), 'loss_cross_entropy': Array(0.13002276, dtype=float32)}
{'loss_inverse': Array(8.561715e-07, dtype=float32)}


 10%|█         | 102139/1000000 [4:49:59<25:32:13,  9.77it/s]

{'loss': Array(0.13475437, dtype=float32), 'loss_cross_entropy': Array(0.1275934, dtype=float32)}
{'loss_inverse': Array(6.940735e-05, dtype=float32)}


 10%|█         | 102149/1000000 [4:50:01<30:43:59,  8.12it/s]

{'loss': Array(0.14476477, dtype=float32), 'loss_cross_entropy': Array(0.13701054, dtype=float32)}
{'loss_inverse': Array(4.039489e-06, dtype=float32)}


 10%|█         | 102159/1000000 [4:50:02<25:53:51,  9.63it/s]

{'loss': Array(0.14094454, dtype=float32), 'loss_cross_entropy': Array(0.1329588, dtype=float32)}
{'loss_inverse': Array(3.2521746e-07, dtype=float32)}


 10%|█         | 102169/1000000 [4:50:03<24:58:37,  9.99it/s]

{'loss': Array(0.15419178, dtype=float32), 'loss_cross_entropy': Array(0.1460136, dtype=float32)}
{'loss_inverse': Array(7.997696e-05, dtype=float32)}


 10%|█         | 102179/1000000 [4:50:05<28:00:56,  8.90it/s]

{'loss': Array(0.13446966, dtype=float32), 'loss_cross_entropy': Array(0.1268311, dtype=float32)}
{'loss_inverse': Array(2.9093024e-05, dtype=float32)}


 10%|█         | 102189/1000000 [4:50:07<26:54:39,  9.27it/s]

{'loss': Array(0.13941522, dtype=float32), 'loss_cross_entropy': Array(0.13210297, dtype=float32)}
{'loss_inverse': Array(0.00015638, dtype=float32)}


 10%|█         | 102199/1000000 [4:50:09<31:45:48,  7.85it/s]

{'loss': Array(0.13682182, dtype=float32), 'loss_cross_entropy': Array(0.12869945, dtype=float32)}
{'loss_inverse': Array(1.7919423e-05, dtype=float32)}


 10%|█         | 102209/1000000 [4:50:10<27:40:43,  9.01it/s]

{'loss': Array(0.14564544, dtype=float32), 'loss_cross_entropy': Array(0.13843535, dtype=float32)}
{'loss_inverse': Array(1.38847945e-05, dtype=float32)}


 10%|█         | 102219/1000000 [4:50:12<34:45:46,  7.17it/s]

{'loss': Array(0.14938115, dtype=float32), 'loss_cross_entropy': Array(0.14157374, dtype=float32)}
{'loss_inverse': Array(4.548472e-06, dtype=float32)}


 10%|█         | 102229/1000000 [4:50:13<26:01:41,  9.58it/s]

{'loss': Array(0.13644184, dtype=float32), 'loss_cross_entropy': Array(0.12858021, dtype=float32)}
{'loss_inverse': Array(3.0448273e-05, dtype=float32)}


 10%|█         | 102239/1000000 [4:50:15<24:40:44, 10.10it/s]

{'loss': Array(0.11503769, dtype=float32), 'loss_cross_entropy': Array(0.10836542, dtype=float32)}
{'loss_inverse': Array(3.1329557e-06, dtype=float32)}


 10%|█         | 102249/1000000 [4:50:17<29:01:30,  8.59it/s]

{'loss': Array(0.14145513, dtype=float32), 'loss_cross_entropy': Array(0.13365363, dtype=float32)}
{'loss_inverse': Array(0.00018206, dtype=float32)}


 10%|█         | 102259/1000000 [4:50:18<25:39:39,  9.72it/s]

{'loss': Array(0.12078971, dtype=float32), 'loss_cross_entropy': Array(0.11352241, dtype=float32)}
{'loss_inverse': Array(3.0948955e-05, dtype=float32)}


 10%|█         | 102269/1000000 [4:50:20<34:08:08,  7.31it/s]

{'loss': Array(0.1400996, dtype=float32), 'loss_cross_entropy': Array(0.13206245, dtype=float32)}
{'loss_inverse': Array(0.00016889, dtype=float32)}


 10%|█         | 102279/1000000 [4:50:21<26:03:25,  9.57it/s]

{'loss': Array(0.13036118, dtype=float32), 'loss_cross_entropy': Array(0.12336089, dtype=float32)}
{'loss_inverse': Array(8.566151e-06, dtype=float32)}


 10%|█         | 102289/1000000 [4:50:23<33:42:30,  7.40it/s]

{'loss': Array(0.13860878, dtype=float32), 'loss_cross_entropy': Array(0.13065667, dtype=float32)}
{'loss_inverse': Array(2.2508482e-08, dtype=float32)}


 10%|█         | 102299/1000000 [4:50:24<25:44:08,  9.69it/s]

{'loss': Array(0.13762933, dtype=float32), 'loss_cross_entropy': Array(0.13042049, dtype=float32)}
{'loss_inverse': Array(6.3824077e-06, dtype=float32)}


 10%|█         | 102309/1000000 [4:50:26<24:28:38, 10.19it/s]

{'loss': Array(0.15161668, dtype=float32), 'loss_cross_entropy': Array(0.14410257, dtype=float32)}
{'loss_inverse': Array(2.018996e-07, dtype=float32)}


 10%|█         | 102319/1000000 [4:50:28<27:16:41,  9.14it/s]

{'loss': Array(0.13669145, dtype=float32), 'loss_cross_entropy': Array(0.12881145, dtype=float32)}
{'loss_inverse': Array(6.082842e-07, dtype=float32)}


 10%|█         | 102329/1000000 [4:50:29<24:48:42, 10.05it/s]

{'loss': Array(0.14333169, dtype=float32), 'loss_cross_entropy': Array(0.13621396, dtype=float32)}
{'loss_inverse': Array(3.1473473e-07, dtype=float32)}


 10%|█         | 102339/1000000 [4:50:31<37:24:00,  6.67it/s]

{'loss': Array(0.11737603, dtype=float32), 'loss_cross_entropy': Array(0.11029293, dtype=float32)}
{'loss_inverse': Array(1.0253227e-08, dtype=float32)}


 10%|█         | 102349/1000000 [4:50:32<26:33:49,  9.39it/s]

{'loss': Array(0.13878141, dtype=float32), 'loss_cross_entropy': Array(0.13055682, dtype=float32)}
{'loss_inverse': Array(4.6007976e-06, dtype=float32)}


 10%|█         | 102359/1000000 [4:50:34<25:46:18,  9.68it/s]

{'loss': Array(0.13916968, dtype=float32), 'loss_cross_entropy': Array(0.13192712, dtype=float32)}
{'loss_inverse': Array(1.11616455e-05, dtype=float32)}


 10%|█         | 102369/1000000 [4:50:35<29:14:29,  8.53it/s]

{'loss': Array(0.13277254, dtype=float32), 'loss_cross_entropy': Array(0.12516831, dtype=float32)}
{'loss_inverse': Array(3.6643766e-07, dtype=float32)}


 10%|█         | 102379/1000000 [4:50:37<25:24:16,  9.81it/s]

{'loss': Array(0.1402287, dtype=float32), 'loss_cross_entropy': Array(0.13266179, dtype=float32)}
{'loss_inverse': Array(3.7697444e-06, dtype=float32)}


 10%|█         | 102389/1000000 [4:50:39<28:57:10,  8.61it/s]

{'loss': Array(0.13570486, dtype=float32), 'loss_cross_entropy': Array(0.12854524, dtype=float32)}
{'loss_inverse': Array(6.9757454e-07, dtype=float32)}


 10%|█         | 102399/1000000 [4:50:40<25:28:27,  9.79it/s]

{'loss': Array(0.14420916, dtype=float32), 'loss_cross_entropy': Array(0.13645904, dtype=float32)}
{'loss_inverse': Array(1.0110111e-06, dtype=float32)}


 10%|█         | 102409/1000000 [4:50:42<31:23:16,  7.94it/s]

{'loss': Array(0.13020347, dtype=float32), 'loss_cross_entropy': Array(0.12337667, dtype=float32)}
{'loss_inverse': Array(6.252316e-07, dtype=float32)}


 10%|█         | 102419/1000000 [4:50:43<25:12:30,  9.89it/s]

{'loss': Array(0.15583108, dtype=float32), 'loss_cross_entropy': Array(0.14714868, dtype=float32)}
{'loss_inverse': Array(6.2123174e-05, dtype=float32)}


 10%|█         | 102429/1000000 [4:50:45<24:32:04, 10.16it/s]

{'loss': Array(0.13485554, dtype=float32), 'loss_cross_entropy': Array(0.12801851, dtype=float32)}
{'loss_inverse': Array(1.5085569e-05, dtype=float32)}


 10%|█         | 102439/1000000 [4:50:46<28:53:08,  8.63it/s]

{'loss': Array(0.13865736, dtype=float32), 'loss_cross_entropy': Array(0.13169675, dtype=float32)}
{'loss_inverse': Array(1.0993824e-06, dtype=float32)}


 10%|█         | 102449/1000000 [4:50:48<25:40:30,  9.71it/s]

{'loss': Array(0.16778004, dtype=float32), 'loss_cross_entropy': Array(0.15909104, dtype=float32)}
{'loss_inverse': Array(3.45643e-05, dtype=float32)}


 10%|█         | 102459/1000000 [4:50:50<34:01:40,  7.33it/s]

{'loss': Array(0.13720332, dtype=float32), 'loss_cross_entropy': Array(0.12990974, dtype=float32)}
{'loss_inverse': Array(1.0407155e-05, dtype=float32)}


 10%|█         | 102469/1000000 [4:50:51<25:51:39,  9.64it/s]

{'loss': Array(0.12297362, dtype=float32), 'loss_cross_entropy': Array(0.11608058, dtype=float32)}
{'loss_inverse': Array(7.3909126e-07, dtype=float32)}


 10%|█         | 102479/1000000 [4:50:53<37:21:12,  6.67it/s]

{'loss': Array(0.14125733, dtype=float32), 'loss_cross_entropy': Array(0.13282835, dtype=float32)}
{'loss_inverse': Array(6.8403e-05, dtype=float32)}


 10%|█         | 102489/1000000 [4:50:54<26:06:30,  9.55it/s]

{'loss': Array(0.13856918, dtype=float32), 'loss_cross_entropy': Array(0.13054372, dtype=float32)}
{'loss_inverse': Array(9.930916e-07, dtype=float32)}


 10%|█         | 102499/1000000 [4:50:56<36:41:44,  6.79it/s]

{'loss': Array(0.13357137, dtype=float32), 'loss_cross_entropy': Array(0.12643921, dtype=float32)}
{'loss_inverse': Array(3.5167407e-06, dtype=float32)}


 10%|█         | 102509/1000000 [4:51:04<84:59:26,  2.93it/s] 

{'loss': Array(0.12348385, dtype=float32), 'loss_cross_entropy': Array(0.11617049, dtype=float32)}
{'loss_inverse': Array(5.646812e-05, dtype=float32)}


 10%|█         | 102519/1000000 [4:51:06<47:17:18,  5.27it/s]

{'loss': Array(0.13348345, dtype=float32), 'loss_cross_entropy': Array(0.12582853, dtype=float32)}
{'loss_inverse': Array(2.637077e-08, dtype=float32)}


 10%|█         | 102529/1000000 [4:51:07<28:53:40,  8.63it/s]

{'loss': Array(0.13319124, dtype=float32), 'loss_cross_entropy': Array(0.12593725, dtype=float32)}
{'loss_inverse': Array(7.2022704e-07, dtype=float32)}


 10%|█         | 102539/1000000 [4:51:09<26:02:06,  9.58it/s]

{'loss': Array(0.15895882, dtype=float32), 'loss_cross_entropy': Array(0.15094912, dtype=float32)}
{'loss_inverse': Array(6.392169e-06, dtype=float32)}


 10%|█         | 102549/1000000 [4:51:10<30:03:08,  8.30it/s]

{'loss': Array(0.14209893, dtype=float32), 'loss_cross_entropy': Array(0.13475877, dtype=float32)}
{'loss_inverse': Array(0.000334, dtype=float32)}


 10%|█         | 102559/1000000 [4:51:12<26:17:14,  9.48it/s]

{'loss': Array(0.15009902, dtype=float32), 'loss_cross_entropy': Array(0.14269745, dtype=float32)}
{'loss_inverse': Array(2.591649e-06, dtype=float32)}


 10%|█         | 102569/1000000 [4:51:14<33:35:09,  7.42it/s]

{'loss': Array(0.16402058, dtype=float32), 'loss_cross_entropy': Array(0.15572245, dtype=float32)}
{'loss_inverse': Array(2.441924e-05, dtype=float32)}


 10%|█         | 102579/1000000 [4:51:15<25:55:30,  9.62it/s]

{'loss': Array(0.13480076, dtype=float32), 'loss_cross_entropy': Array(0.12734768, dtype=float32)}
{'loss_inverse': Array(7.432407e-05, dtype=float32)}


 10%|█         | 102589/1000000 [4:51:16<25:12:40,  9.89it/s]

{'loss': Array(0.14663537, dtype=float32), 'loss_cross_entropy': Array(0.13841231, dtype=float32)}
{'loss_inverse': Array(4.4671148e-07, dtype=float32)}


 10%|█         | 102599/1000000 [4:51:18<27:41:46,  9.00it/s]

{'loss': Array(0.1497722, dtype=float32), 'loss_cross_entropy': Array(0.1416233, dtype=float32)}
{'loss_inverse': Array(2.0833271e-07, dtype=float32)}


 10%|█         | 102609/1000000 [4:51:20<25:29:44,  9.78it/s]

{'loss': Array(0.11448723, dtype=float32), 'loss_cross_entropy': Array(0.10785689, dtype=float32)}
{'loss_inverse': Array(7.4205865e-08, dtype=float32)}


 10%|█         | 102619/1000000 [4:51:21<27:25:17,  9.09it/s]

{'loss': Array(0.13760339, dtype=float32), 'loss_cross_entropy': Array(0.13019787, dtype=float32)}
{'loss_inverse': Array(7.028694e-05, dtype=float32)}


 10%|█         | 102629/1000000 [4:51:23<25:11:11,  9.90it/s]

{'loss': Array(0.1488192, dtype=float32), 'loss_cross_entropy': Array(0.1403807, dtype=float32)}
{'loss_inverse': Array(2.020569e-07, dtype=float32)}


 10%|█         | 102639/1000000 [4:51:25<27:52:22,  8.94it/s]

{'loss': Array(0.1489728, dtype=float32), 'loss_cross_entropy': Array(0.14097926, dtype=float32)}
{'loss_inverse': Array(0.00023195, dtype=float32)}


 10%|█         | 102649/1000000 [4:51:26<24:59:53,  9.97it/s]

{'loss': Array(0.13940196, dtype=float32), 'loss_cross_entropy': Array(0.1316642, dtype=float32)}
{'loss_inverse': Array(5.726774e-05, dtype=float32)}


 10%|█         | 102659/1000000 [4:51:28<28:39:20,  8.70it/s]

{'loss': Array(0.1436943, dtype=float32), 'loss_cross_entropy': Array(0.13568532, dtype=float32)}
{'loss_inverse': Array(1.3276552e-06, dtype=float32)}


 10%|█         | 102669/1000000 [4:51:29<25:28:31,  9.78it/s]

{'loss': Array(0.13934772, dtype=float32), 'loss_cross_entropy': Array(0.13198085, dtype=float32)}
{'loss_inverse': Array(3.077644e-05, dtype=float32)}


 10%|█         | 102679/1000000 [4:51:31<30:57:40,  8.05it/s]

{'loss': Array(0.1376824, dtype=float32), 'loss_cross_entropy': Array(0.13025098, dtype=float32)}
{'loss_inverse': Array(0.00025161, dtype=float32)}


 10%|█         | 102689/1000000 [4:51:32<25:39:34,  9.71it/s]

{'loss': Array(0.14486502, dtype=float32), 'loss_cross_entropy': Array(0.13661075, dtype=float32)}
{'loss_inverse': Array(1.6777949e-05, dtype=float32)}


 10%|█         | 102699/1000000 [4:51:34<33:21:27,  7.47it/s]

{'loss': Array(0.13929814, dtype=float32), 'loss_cross_entropy': Array(0.13228904, dtype=float32)}
{'loss_inverse': Array(4.005522e-05, dtype=float32)}


 10%|█         | 102709/1000000 [4:51:36<26:13:33,  9.50it/s]

{'loss': Array(0.1404431, dtype=float32), 'loss_cross_entropy': Array(0.132914, dtype=float32)}
{'loss_inverse': Array(4.8367247e-06, dtype=float32)}


 10%|█         | 102719/1000000 [4:51:37<24:33:10, 10.15it/s]

{'loss': Array(0.13204454, dtype=float32), 'loss_cross_entropy': Array(0.12423775, dtype=float32)}
{'loss_inverse': Array(4.6914412e-08, dtype=float32)}


 10%|█         | 102729/1000000 [4:51:39<28:40:52,  8.69it/s]

{'loss': Array(0.14360605, dtype=float32), 'loss_cross_entropy': Array(0.13523115, dtype=float32)}
{'loss_inverse': Array(0.00010754, dtype=float32)}


 10%|█         | 102739/1000000 [4:51:40<25:24:36,  9.81it/s]

{'loss': Array(0.12827812, dtype=float32), 'loss_cross_entropy': Array(0.12047248, dtype=float32)}
{'loss_inverse': Array(7.269588e-05, dtype=float32)}


 10%|█         | 102749/1000000 [4:51:42<33:30:17,  7.44it/s]

{'loss': Array(0.13552, dtype=float32), 'loss_cross_entropy': Array(0.12761155, dtype=float32)}
{'loss_inverse': Array(0.00069937, dtype=float32)}


 10%|█         | 102759/1000000 [4:51:43<26:01:30,  9.58it/s]

{'loss': Array(0.12916015, dtype=float32), 'loss_cross_entropy': Array(0.12184103, dtype=float32)}
{'loss_inverse': Array(0.00016497, dtype=float32)}


 10%|█         | 102769/1000000 [4:51:45<33:57:20,  7.34it/s]

{'loss': Array(0.13574131, dtype=float32), 'loss_cross_entropy': Array(0.12858534, dtype=float32)}
{'loss_inverse': Array(7.723442e-06, dtype=float32)}


 10%|█         | 102779/1000000 [4:51:47<26:08:30,  9.53it/s]

{'loss': Array(0.1316165, dtype=float32), 'loss_cross_entropy': Array(0.12383609, dtype=float32)}
{'loss_inverse': Array(0.00017297, dtype=float32)}


 10%|█         | 102789/1000000 [4:51:48<25:31:46,  9.76it/s]

{'loss': Array(0.14505623, dtype=float32), 'loss_cross_entropy': Array(0.13758834, dtype=float32)}
{'loss_inverse': Array(1.1642741e-06, dtype=float32)}


 10%|█         | 102799/1000000 [4:51:50<27:32:31,  9.05it/s]

{'loss': Array(0.14098488, dtype=float32), 'loss_cross_entropy': Array(0.13361886, dtype=float32)}
{'loss_inverse': Array(4.868266e-06, dtype=float32)}


 10%|█         | 102809/1000000 [4:51:51<24:54:49, 10.00it/s]

{'loss': Array(0.14315997, dtype=float32), 'loss_cross_entropy': Array(0.13492568, dtype=float32)}
{'loss_inverse': Array(1.6997001e-06, dtype=float32)}


 10%|█         | 102819/1000000 [4:51:53<37:09:35,  6.71it/s]

{'loss': Array(0.14460252, dtype=float32), 'loss_cross_entropy': Array(0.13631478, dtype=float32)}
{'loss_inverse': Array(2.4541084e-06, dtype=float32)}


 10%|█         | 102829/1000000 [4:51:54<27:23:30,  9.10it/s]

{'loss': Array(0.14193755, dtype=float32), 'loss_cross_entropy': Array(0.13400246, dtype=float32)}
{'loss_inverse': Array(4.6291157e-06, dtype=float32)}


 10%|█         | 102839/1000000 [4:51:56<24:43:22, 10.08it/s]

{'loss': Array(0.1469529, dtype=float32), 'loss_cross_entropy': Array(0.13947208, dtype=float32)}
{'loss_inverse': Array(3.4972957e-06, dtype=float32)}


 10%|█         | 102849/1000000 [4:51:58<28:48:10,  8.65it/s]

{'loss': Array(0.14366692, dtype=float32), 'loss_cross_entropy': Array(0.13583855, dtype=float32)}
{'loss_inverse': Array(0.00014309, dtype=float32)}


 10%|█         | 102859/1000000 [4:51:59<25:23:42,  9.81it/s]

{'loss': Array(0.1266087, dtype=float32), 'loss_cross_entropy': Array(0.11921412, dtype=float32)}
{'loss_inverse': Array(0.00023105, dtype=float32)}


 10%|█         | 102869/1000000 [4:52:01<29:16:12,  8.51it/s]

{'loss': Array(0.1307324, dtype=float32), 'loss_cross_entropy': Array(0.12323099, dtype=float32)}
{'loss_inverse': Array(6.705012e-05, dtype=float32)}


 10%|█         | 102879/1000000 [4:52:02<24:55:35, 10.00it/s]

{'loss': Array(0.14335215, dtype=float32), 'loss_cross_entropy': Array(0.13501191, dtype=float32)}
{'loss_inverse': Array(1.1793077e-06, dtype=float32)}


 10%|█         | 102889/1000000 [4:52:04<30:40:25,  8.12it/s]

{'loss': Array(0.14897387, dtype=float32), 'loss_cross_entropy': Array(0.14112747, dtype=float32)}
{'loss_inverse': Array(4.8569486e-06, dtype=float32)}


 10%|█         | 102899/1000000 [4:52:05<25:09:23,  9.91it/s]

{'loss': Array(0.13828705, dtype=float32), 'loss_cross_entropy': Array(0.13081674, dtype=float32)}
{'loss_inverse': Array(0.00040527, dtype=float32)}


 10%|█         | 102909/1000000 [4:52:07<25:44:07,  9.68it/s]

{'loss': Array(0.1298924, dtype=float32), 'loss_cross_entropy': Array(0.12273693, dtype=float32)}
{'loss_inverse': Array(9.497348e-05, dtype=float32)}


 10%|█         | 102919/1000000 [4:52:09<29:43:48,  8.38it/s]

{'loss': Array(0.11333138, dtype=float32), 'loss_cross_entropy': Array(0.10689306, dtype=float32)}
{'loss_inverse': Array(1.6933196e-05, dtype=float32)}


 10%|█         | 102929/1000000 [4:52:10<26:23:26,  9.44it/s]

{'loss': Array(0.14200337, dtype=float32), 'loss_cross_entropy': Array(0.13463838, dtype=float32)}
{'loss_inverse': Array(1.2733202e-06, dtype=float32)}


 10%|█         | 102939/1000000 [4:52:12<35:29:31,  7.02it/s]

{'loss': Array(0.12408356, dtype=float32), 'loss_cross_entropy': Array(0.11707196, dtype=float32)}
{'loss_inverse': Array(1.4009087e-06, dtype=float32)}


 10%|█         | 102949/1000000 [4:52:13<26:31:04,  9.40it/s]

{'loss': Array(0.13812554, dtype=float32), 'loss_cross_entropy': Array(0.13031355, dtype=float32)}
{'loss_inverse': Array(2.77831e-06, dtype=float32)}


 10%|█         | 102959/1000000 [4:52:15<37:36:38,  6.63it/s]

{'loss': Array(0.14637244, dtype=float32), 'loss_cross_entropy': Array(0.13863839, dtype=float32)}
{'loss_inverse': Array(1.3775626e-07, dtype=float32)}


 10%|█         | 102969/1000000 [4:52:17<26:33:50,  9.38it/s]

{'loss': Array(0.15782446, dtype=float32), 'loss_cross_entropy': Array(0.15006603, dtype=float32)}
{'loss_inverse': Array(2.983138e-05, dtype=float32)}


 10%|█         | 102979/1000000 [4:52:18<37:49:07,  6.59it/s]

{'loss': Array(0.1357779, dtype=float32), 'loss_cross_entropy': Array(0.12783055, dtype=float32)}
{'loss_inverse': Array(1.533538e-06, dtype=float32)}


 10%|█         | 102989/1000000 [4:52:20<27:03:58,  9.21it/s]

{'loss': Array(0.14423959, dtype=float32), 'loss_cross_entropy': Array(0.1368482, dtype=float32)}
{'loss_inverse': Array(3.9186266e-06, dtype=float32)}


 10%|█         | 102999/1000000 [4:52:21<25:00:41,  9.96it/s]

{'loss': Array(0.14594337, dtype=float32), 'loss_cross_entropy': Array(0.13803773, dtype=float32)}
{'loss_inverse': Array(0.00011093, dtype=float32)}


 10%|█         | 103009/1000000 [4:52:30<92:29:34,  2.69it/s] 

{'loss': Array(0.12716874, dtype=float32), 'loss_cross_entropy': Array(0.12030939, dtype=float32)}
{'loss_inverse': Array(1.16647125e-05, dtype=float32)}


 10%|█         | 103019/1000000 [4:52:31<35:45:11,  6.97it/s]

{'loss': Array(0.12449441, dtype=float32), 'loss_cross_entropy': Array(0.11746017, dtype=float32)}
{'loss_inverse': Array(1.4320917e-05, dtype=float32)}


 10%|█         | 103029/1000000 [4:52:32<25:53:56,  9.62it/s]

{'loss': Array(0.13327913, dtype=float32), 'loss_cross_entropy': Array(0.12601021, dtype=float32)}
{'loss_inverse': Array(1.42157905e-05, dtype=float32)}


 10%|█         | 103039/1000000 [4:52:34<29:35:49,  8.42it/s]

{'loss': Array(0.13843517, dtype=float32), 'loss_cross_entropy': Array(0.13054922, dtype=float32)}
{'loss_inverse': Array(6.9206194e-06, dtype=float32)}


 10%|█         | 103049/1000000 [4:52:36<25:46:31,  9.67it/s]

{'loss': Array(0.13015205, dtype=float32), 'loss_cross_entropy': Array(0.12297843, dtype=float32)}
{'loss_inverse': Array(6.266154e-07, dtype=float32)}


 10%|█         | 103059/1000000 [4:52:37<31:22:02,  7.94it/s]

{'loss': Array(0.14639525, dtype=float32), 'loss_cross_entropy': Array(0.13785985, dtype=float32)}
{'loss_inverse': Array(2.4783258e-06, dtype=float32)}


 10%|█         | 103069/1000000 [4:52:39<25:28:18,  9.78it/s]

{'loss': Array(0.14214562, dtype=float32), 'loss_cross_entropy': Array(0.13385251, dtype=float32)}
{'loss_inverse': Array(3.203684e-06, dtype=float32)}


 10%|█         | 103079/1000000 [4:52:41<30:16:34,  8.23it/s]

{'loss': Array(0.12667565, dtype=float32), 'loss_cross_entropy': Array(0.11959463, dtype=float32)}
{'loss_inverse': Array(1.15342004e-07, dtype=float32)}


 10%|█         | 103089/1000000 [4:52:42<25:27:10,  9.79it/s]

{'loss': Array(0.13458431, dtype=float32), 'loss_cross_entropy': Array(0.12704776, dtype=float32)}
{'loss_inverse': Array(1.7339369e-08, dtype=float32)}


 10%|█         | 103099/1000000 [4:52:44<31:04:18,  8.02it/s]

{'loss': Array(0.14210781, dtype=float32), 'loss_cross_entropy': Array(0.13454272, dtype=float32)}
{'loss_inverse': Array(1.566635e-07, dtype=float32)}


 10%|█         | 103109/1000000 [4:52:45<25:47:29,  9.66it/s]

{'loss': Array(0.13345456, dtype=float32), 'loss_cross_entropy': Array(0.1265024, dtype=float32)}
{'loss_inverse': Array(9.551195e-08, dtype=float32)}


 10%|█         | 103119/1000000 [4:52:47<33:15:46,  7.49it/s]

{'loss': Array(0.13661563, dtype=float32), 'loss_cross_entropy': Array(0.12926845, dtype=float32)}
{'loss_inverse': Array(3.4542648e-08, dtype=float32)}


 10%|█         | 103129/1000000 [4:52:48<25:41:35,  9.70it/s]

{'loss': Array(0.15892132, dtype=float32), 'loss_cross_entropy': Array(0.15039723, dtype=float32)}
{'loss_inverse': Array(7.605694e-08, dtype=float32)}


 10%|█         | 103139/1000000 [4:52:50<37:03:40,  6.72it/s]

{'loss': Array(0.14206561, dtype=float32), 'loss_cross_entropy': Array(0.13410436, dtype=float32)}
{'loss_inverse': Array(0.00024237, dtype=float32)}


 10%|█         | 103149/1000000 [4:52:51<26:08:11,  9.53it/s]

{'loss': Array(0.14015786, dtype=float32), 'loss_cross_entropy': Array(0.13290198, dtype=float32)}
{'loss_inverse': Array(1.12143084e-07, dtype=float32)}


 10%|█         | 103159/1000000 [4:52:53<37:26:43,  6.65it/s]

{'loss': Array(0.16708541, dtype=float32), 'loss_cross_entropy': Array(0.15831895, dtype=float32)}
{'loss_inverse': Array(4.687015e-06, dtype=float32)}


 10%|█         | 103169/1000000 [4:52:55<27:13:03,  9.15it/s]

{'loss': Array(0.13841645, dtype=float32), 'loss_cross_entropy': Array(0.13085517, dtype=float32)}
{'loss_inverse': Array(4.4583754e-05, dtype=float32)}


 10%|█         | 103179/1000000 [4:52:56<25:04:37,  9.93it/s]

{'loss': Array(0.15326416, dtype=float32), 'loss_cross_entropy': Array(0.1449701, dtype=float32)}
{'loss_inverse': Array(2.5135657e-07, dtype=float32)}


 10%|█         | 103189/1000000 [4:52:58<31:12:56,  7.98it/s]

{'loss': Array(0.14114717, dtype=float32), 'loss_cross_entropy': Array(0.13340954, dtype=float32)}
{'loss_inverse': Array(4.6070983e-07, dtype=float32)}


 10%|█         | 103199/1000000 [4:52:59<25:23:17,  9.81it/s]

{'loss': Array(0.12655172, dtype=float32), 'loss_cross_entropy': Array(0.11881652, dtype=float32)}
{'loss_inverse': Array(4.305057e-09, dtype=float32)}


 10%|█         | 103209/1000000 [4:53:01<33:33:45,  7.42it/s]

{'loss': Array(0.11793461, dtype=float32), 'loss_cross_entropy': Array(0.11085018, dtype=float32)}
{'loss_inverse': Array(8.6122935e-07, dtype=float32)}


 10%|█         | 103219/1000000 [4:53:03<25:38:05,  9.72it/s]

{'loss': Array(0.12539034, dtype=float32), 'loss_cross_entropy': Array(0.11852556, dtype=float32)}
{'loss_inverse': Array(3.7582954e-08, dtype=float32)}


 10%|█         | 103229/1000000 [4:53:04<33:11:43,  7.50it/s]

{'loss': Array(0.15192711, dtype=float32), 'loss_cross_entropy': Array(0.14372295, dtype=float32)}
{'loss_inverse': Array(2.1848447e-05, dtype=float32)}


 10%|█         | 103239/1000000 [4:53:06<26:11:54,  9.51it/s]

{'loss': Array(0.14183722, dtype=float32), 'loss_cross_entropy': Array(0.13377793, dtype=float32)}
{'loss_inverse': Array(1.318988e-06, dtype=float32)}


 10%|█         | 103249/1000000 [4:53:08<38:06:40,  6.54it/s]

{'loss': Array(0.14028816, dtype=float32), 'loss_cross_entropy': Array(0.13236807, dtype=float32)}
{'loss_inverse': Array(3.7435322e-07, dtype=float32)}


 10%|█         | 103259/1000000 [4:53:09<27:00:32,  9.22it/s]

{'loss': Array(0.14130558, dtype=float32), 'loss_cross_entropy': Array(0.13407616, dtype=float32)}
{'loss_inverse': Array(1.9725561e-05, dtype=float32)}


 10%|█         | 103269/1000000 [4:53:10<24:47:05, 10.05it/s]

{'loss': Array(0.14380594, dtype=float32), 'loss_cross_entropy': Array(0.1356676, dtype=float32)}
{'loss_inverse': Array(2.197752e-05, dtype=float32)}


 10%|█         | 103279/1000000 [4:53:12<30:38:07,  8.13it/s]

{'loss': Array(0.14955987, dtype=float32), 'loss_cross_entropy': Array(0.14168902, dtype=float32)}
{'loss_inverse': Array(1.3630827e-07, dtype=float32)}


 10%|█         | 103289/1000000 [4:53:14<25:41:59,  9.69it/s]

{'loss': Array(0.1308107, dtype=float32), 'loss_cross_entropy': Array(0.12380131, dtype=float32)}
{'loss_inverse': Array(1.1457404e-07, dtype=float32)}


 10%|█         | 103299/1000000 [4:53:15<37:11:25,  6.70it/s]

{'loss': Array(0.14059655, dtype=float32), 'loss_cross_entropy': Array(0.1326499, dtype=float32)}
{'loss_inverse': Array(6.614477e-05, dtype=float32)}


 10%|█         | 103309/1000000 [4:53:17<26:50:31,  9.28it/s]

{'loss': Array(0.13838068, dtype=float32), 'loss_cross_entropy': Array(0.13117744, dtype=float32)}
{'loss_inverse': Array(1.4180408e-06, dtype=float32)}


 10%|█         | 103319/1000000 [4:53:18<26:07:05,  9.54it/s]

{'loss': Array(0.12613195, dtype=float32), 'loss_cross_entropy': Array(0.11908289, dtype=float32)}
{'loss_inverse': Array(0.00014636, dtype=float32)}


 10%|█         | 103330/1000000 [4:53:20<25:21:17,  9.82it/s]

{'loss': Array(0.15629278, dtype=float32), 'loss_cross_entropy': Array(0.14752424, dtype=float32)}
{'loss_inverse': Array(3.0117116e-07, dtype=float32)}


 10%|█         | 103340/1000000 [4:53:21<24:21:40, 10.22it/s]

{'loss': Array(0.129064, dtype=float32), 'loss_cross_entropy': Array(0.12154005, dtype=float32)}
{'loss_inverse': Array(4.8972295e-05, dtype=float32)}


 10%|█         | 103350/1000000 [4:53:23<27:11:50,  9.16it/s]

{'loss': Array(0.1588328, dtype=float32), 'loss_cross_entropy': Array(0.15038642, dtype=float32)}
{'loss_inverse': Array(4.2547327e-07, dtype=float32)}


 10%|█         | 103360/1000000 [4:53:25<24:56:00,  9.99it/s]

{'loss': Array(0.15641284, dtype=float32), 'loss_cross_entropy': Array(0.14770988, dtype=float32)}
{'loss_inverse': Array(1.3178808e-06, dtype=float32)}


 10%|█         | 103370/1000000 [4:53:26<30:27:03,  8.18it/s]

{'loss': Array(0.14346053, dtype=float32), 'loss_cross_entropy': Array(0.13593893, dtype=float32)}
{'loss_inverse': Array(0.00011373, dtype=float32)}


 10%|█         | 103380/1000000 [4:53:28<25:23:47,  9.81it/s]

{'loss': Array(0.12835725, dtype=float32), 'loss_cross_entropy': Array(0.12064433, dtype=float32)}
{'loss_inverse': Array(1.4383823e-05, dtype=float32)}


 10%|█         | 103390/1000000 [4:53:30<30:41:06,  8.12it/s]

{'loss': Array(0.14850272, dtype=float32), 'loss_cross_entropy': Array(0.1403418, dtype=float32)}
{'loss_inverse': Array(3.3258927e-06, dtype=float32)}


 10%|█         | 103400/1000000 [4:53:31<26:16:16,  9.48it/s]

{'loss': Array(0.15290724, dtype=float32), 'loss_cross_entropy': Array(0.14470632, dtype=float32)}
{'loss_inverse': Array(3.136996e-08, dtype=float32)}


 10%|█         | 103410/1000000 [4:53:33<37:02:44,  6.72it/s]

{'loss': Array(0.14072378, dtype=float32), 'loss_cross_entropy': Array(0.13325097, dtype=float32)}
{'loss_inverse': Array(1.406657e-05, dtype=float32)}


 10%|█         | 103420/1000000 [4:53:34<26:16:43,  9.48it/s]

{'loss': Array(0.13459483, dtype=float32), 'loss_cross_entropy': Array(0.12718281, dtype=float32)}
{'loss_inverse': Array(5.5796772e-06, dtype=float32)}


 10%|█         | 103430/1000000 [4:53:36<25:09:41,  9.90it/s]

{'loss': Array(0.15012437, dtype=float32), 'loss_cross_entropy': Array(0.14194809, dtype=float32)}
{'loss_inverse': Array(1.1041155e-05, dtype=float32)}


 10%|█         | 103440/1000000 [4:53:37<28:26:32,  8.76it/s]

{'loss': Array(0.13588391, dtype=float32), 'loss_cross_entropy': Array(0.12930886, dtype=float32)}
{'loss_inverse': Array(3.6198167e-06, dtype=float32)}


 10%|█         | 103450/1000000 [4:53:39<24:36:49, 10.12it/s]

{'loss': Array(0.14543799, dtype=float32), 'loss_cross_entropy': Array(0.13814506, dtype=float32)}
{'loss_inverse': Array(8.140972e-06, dtype=float32)}


 10%|█         | 103460/1000000 [4:53:40<30:09:49,  8.26it/s]

{'loss': Array(0.12252966, dtype=float32), 'loss_cross_entropy': Array(0.11553686, dtype=float32)}
{'loss_inverse': Array(1.3060106e-06, dtype=float32)}


 10%|█         | 103470/1000000 [4:53:42<25:42:11,  9.69it/s]

{'loss': Array(0.14940773, dtype=float32), 'loss_cross_entropy': Array(0.1416267, dtype=float32)}
{'loss_inverse': Array(3.217655e-08, dtype=float32)}


 10%|█         | 103480/1000000 [4:53:43<23:58:48, 10.38it/s]

{'loss': Array(0.12680541, dtype=float32), 'loss_cross_entropy': Array(0.11920221, dtype=float32)}
{'loss_inverse': Array(1.9500185e-06, dtype=float32)}


 10%|█         | 103490/1000000 [4:53:45<28:40:36,  8.68it/s]

{'loss': Array(0.12864625, dtype=float32), 'loss_cross_entropy': Array(0.12132086, dtype=float32)}
{'loss_inverse': Array(4.414174e-06, dtype=float32)}


 10%|█         | 103500/1000000 [4:53:47<25:19:08,  9.84it/s]

{'loss': Array(0.14435707, dtype=float32), 'loss_cross_entropy': Array(0.13641618, dtype=float32)}
{'loss_inverse': Array(4.775533e-08, dtype=float32)}


 10%|█         | 103510/1000000 [4:53:55<90:57:12,  2.74it/s] 

{'loss': Array(0.13686408, dtype=float32), 'loss_cross_entropy': Array(0.12974389, dtype=float32)}
{'loss_inverse': Array(0.00012084, dtype=float32)}


 10%|█         | 103520/1000000 [4:53:56<35:16:57,  7.06it/s]

{'loss': Array(0.14012428, dtype=float32), 'loss_cross_entropy': Array(0.13265245, dtype=float32)}
{'loss_inverse': Array(2.5727834e-08, dtype=float32)}


 10%|█         | 103530/1000000 [4:53:58<38:47:55,  6.42it/s]

{'loss': Array(0.1489069, dtype=float32), 'loss_cross_entropy': Array(0.1411589, dtype=float32)}
{'loss_inverse': Array(0.00011559, dtype=float32)}


 10%|█         | 103540/1000000 [4:53:59<26:29:56,  9.40it/s]

{'loss': Array(0.14397591, dtype=float32), 'loss_cross_entropy': Array(0.13596539, dtype=float32)}
{'loss_inverse': Array(6.8801244e-05, dtype=float32)}


 10%|█         | 103550/1000000 [4:54:01<25:23:31,  9.81it/s]

{'loss': Array(0.14463982, dtype=float32), 'loss_cross_entropy': Array(0.13720381, dtype=float32)}
{'loss_inverse': Array(8.469877e-05, dtype=float32)}


 10%|█         | 103560/1000000 [4:54:02<27:32:04,  9.04it/s]

{'loss': Array(0.14099622, dtype=float32), 'loss_cross_entropy': Array(0.13290378, dtype=float32)}
{'loss_inverse': Array(4.0144073e-08, dtype=float32)}


 10%|█         | 103570/1000000 [4:54:04<37:04:19,  6.72it/s]

{'loss': Array(0.13238974, dtype=float32), 'loss_cross_entropy': Array(0.1255968, dtype=float32)}
{'loss_inverse': Array(0.00013357, dtype=float32)}


 10%|█         | 103580/1000000 [4:54:06<26:42:03,  9.33it/s]

{'loss': Array(0.13469493, dtype=float32), 'loss_cross_entropy': Array(0.12722729, dtype=float32)}
{'loss_inverse': Array(3.0422943e-06, dtype=float32)}


 10%|█         | 103590/1000000 [4:54:07<26:12:50,  9.50it/s]

{'loss': Array(0.14345676, dtype=float32), 'loss_cross_entropy': Array(0.13603865, dtype=float32)}
{'loss_inverse': Array(2.634637e-06, dtype=float32)}


 10%|█         | 103600/1000000 [4:54:09<28:50:08,  8.64it/s]

{'loss': Array(0.14666642, dtype=float32), 'loss_cross_entropy': Array(0.13911374, dtype=float32)}
{'loss_inverse': Array(1.5132308e-06, dtype=float32)}


 10%|█         | 103610/1000000 [4:54:10<26:06:47,  9.54it/s]

{'loss': Array(0.14995317, dtype=float32), 'loss_cross_entropy': Array(0.14196666, dtype=float32)}
{'loss_inverse': Array(1.9401678e-07, dtype=float32)}


 10%|█         | 103620/1000000 [4:54:12<29:21:40,  8.48it/s]

{'loss': Array(0.13069205, dtype=float32), 'loss_cross_entropy': Array(0.12352212, dtype=float32)}
{'loss_inverse': Array(5.02826e-07, dtype=float32)}


 10%|█         | 103630/1000000 [4:54:14<25:42:40,  9.68it/s]

{'loss': Array(0.1411, dtype=float32), 'loss_cross_entropy': Array(0.13322459, dtype=float32)}
{'loss_inverse': Array(3.3208377e-07, dtype=float32)}


 10%|█         | 103640/1000000 [4:54:15<30:33:13,  8.15it/s]

{'loss': Array(0.15377343, dtype=float32), 'loss_cross_entropy': Array(0.14569531, dtype=float32)}
{'loss_inverse': Array(4.5679909e-07, dtype=float32)}


 10%|█         | 103650/1000000 [4:54:17<25:19:17,  9.83it/s]

{'loss': Array(0.14279208, dtype=float32), 'loss_cross_entropy': Array(0.13564831, dtype=float32)}
{'loss_inverse': Array(0.0002174, dtype=float32)}


 10%|█         | 103660/1000000 [4:54:18<24:32:39, 10.14it/s]

{'loss': Array(0.13812385, dtype=float32), 'loss_cross_entropy': Array(0.13045965, dtype=float32)}
{'loss_inverse': Array(1.7143228e-07, dtype=float32)}


 10%|█         | 103670/1000000 [4:54:20<29:31:08,  8.43it/s]

{'loss': Array(0.15398893, dtype=float32), 'loss_cross_entropy': Array(0.14601241, dtype=float32)}
{'loss_inverse': Array(2.014122e-05, dtype=float32)}


 10%|█         | 103680/1000000 [4:54:21<25:17:46,  9.84it/s]

{'loss': Array(0.1492565, dtype=float32), 'loss_cross_entropy': Array(0.14177038, dtype=float32)}
{'loss_inverse': Array(4.4552475e-08, dtype=float32)}


 10%|█         | 103690/1000000 [4:54:23<30:21:53,  8.20it/s]

{'loss': Array(0.13077895, dtype=float32), 'loss_cross_entropy': Array(0.12412435, dtype=float32)}
{'loss_inverse': Array(1.2592113e-05, dtype=float32)}


 10%|█         | 103700/1000000 [4:54:25<25:22:39,  9.81it/s]

{'loss': Array(0.13656288, dtype=float32), 'loss_cross_entropy': Array(0.12967107, dtype=float32)}
{'loss_inverse': Array(6.984646e-06, dtype=float32)}


 10%|█         | 103710/1000000 [4:54:26<33:30:42,  7.43it/s]

{'loss': Array(0.14673339, dtype=float32), 'loss_cross_entropy': Array(0.13859111, dtype=float32)}
{'loss_inverse': Array(1.2992198e-06, dtype=float32)}


 10%|█         | 103720/1000000 [4:54:28<25:38:44,  9.71it/s]

{'loss': Array(0.12533262, dtype=float32), 'loss_cross_entropy': Array(0.11853755, dtype=float32)}
{'loss_inverse': Array(5.6564117e-05, dtype=float32)}


 10%|█         | 103730/1000000 [4:54:29<24:52:47, 10.01it/s]

{'loss': Array(0.13635552, dtype=float32), 'loss_cross_entropy': Array(0.12840219, dtype=float32)}
{'loss_inverse': Array(4.9047494e-06, dtype=float32)}


 10%|█         | 103740/1000000 [4:54:31<27:24:40,  9.08it/s]

{'loss': Array(0.1367336, dtype=float32), 'loss_cross_entropy': Array(0.1295412, dtype=float32)}
{'loss_inverse': Array(7.47894e-06, dtype=float32)}


 10%|█         | 103750/1000000 [4:54:32<24:48:27, 10.04it/s]

{'loss': Array(0.12785083, dtype=float32), 'loss_cross_entropy': Array(0.11998685, dtype=float32)}
{'loss_inverse': Array(2.910899e-07, dtype=float32)}


 10%|█         | 103760/1000000 [4:54:34<32:50:01,  7.58it/s]

{'loss': Array(0.15208833, dtype=float32), 'loss_cross_entropy': Array(0.14443843, dtype=float32)}
{'loss_inverse': Array(0.00010745, dtype=float32)}


 10%|█         | 103770/1000000 [4:54:35<25:44:52,  9.67it/s]

{'loss': Array(0.15925534, dtype=float32), 'loss_cross_entropy': Array(0.1511644, dtype=float32)}
{'loss_inverse': Array(0.00012478, dtype=float32)}


 10%|█         | 103780/1000000 [4:54:37<24:45:59, 10.05it/s]

{'loss': Array(0.13626836, dtype=float32), 'loss_cross_entropy': Array(0.12970102, dtype=float32)}
{'loss_inverse': Array(8.8701985e-05, dtype=float32)}


 10%|█         | 103790/1000000 [4:54:39<27:52:38,  8.93it/s]

{'loss': Array(0.14438199, dtype=float32), 'loss_cross_entropy': Array(0.13645732, dtype=float32)}
{'loss_inverse': Array(8.406357e-06, dtype=float32)}


 10%|█         | 103800/1000000 [4:54:40<24:42:57, 10.07it/s]

{'loss': Array(0.1455837, dtype=float32), 'loss_cross_entropy': Array(0.13779356, dtype=float32)}
{'loss_inverse': Array(1.5359243e-06, dtype=float32)}


 10%|█         | 103810/1000000 [4:54:42<29:13:07,  8.52it/s]

{'loss': Array(0.13073711, dtype=float32), 'loss_cross_entropy': Array(0.12360014, dtype=float32)}
{'loss_inverse': Array(6.474546e-05, dtype=float32)}


 10%|█         | 103820/1000000 [4:54:43<25:36:03,  9.72it/s]

{'loss': Array(0.13297758, dtype=float32), 'loss_cross_entropy': Array(0.12572269, dtype=float32)}
{'loss_inverse': Array(1.0445182e-05, dtype=float32)}


 10%|█         | 103830/1000000 [4:54:45<30:35:29,  8.14it/s]

{'loss': Array(0.1425466, dtype=float32), 'loss_cross_entropy': Array(0.13428722, dtype=float32)}
{'loss_inverse': Array(1.5069881e-05, dtype=float32)}


 10%|█         | 103840/1000000 [4:54:46<25:22:28,  9.81it/s]

{'loss': Array(0.13511394, dtype=float32), 'loss_cross_entropy': Array(0.12787955, dtype=float32)}
{'loss_inverse': Array(1.7841854e-05, dtype=float32)}


 10%|█         | 103850/1000000 [4:54:48<24:46:07, 10.05it/s]

{'loss': Array(0.15343581, dtype=float32), 'loss_cross_entropy': Array(0.14558883, dtype=float32)}
{'loss_inverse': Array(3.6954407e-06, dtype=float32)}


 10%|█         | 103860/1000000 [4:54:50<30:24:30,  8.19it/s]

{'loss': Array(0.14761007, dtype=float32), 'loss_cross_entropy': Array(0.14048587, dtype=float32)}
{'loss_inverse': Array(2.430468e-06, dtype=float32)}


 10%|█         | 103870/1000000 [4:54:51<25:10:23,  9.89it/s]

{'loss': Array(0.15268089, dtype=float32), 'loss_cross_entropy': Array(0.14440818, dtype=float32)}
{'loss_inverse': Array(2.5790584e-07, dtype=float32)}


 10%|█         | 103880/1000000 [4:54:53<32:54:48,  7.56it/s]

{'loss': Array(0.13414231, dtype=float32), 'loss_cross_entropy': Array(0.12673306, dtype=float32)}
{'loss_inverse': Array(3.3376963e-09, dtype=float32)}


 10%|█         | 103890/1000000 [4:54:54<26:17:31,  9.47it/s]

{'loss': Array(0.13876656, dtype=float32), 'loss_cross_entropy': Array(0.13137782, dtype=float32)}
{'loss_inverse': Array(2.6215372e-05, dtype=float32)}


 10%|█         | 103900/1000000 [4:54:56<37:47:38,  6.59it/s]

{'loss': Array(0.12025433, dtype=float32), 'loss_cross_entropy': Array(0.11378022, dtype=float32)}
{'loss_inverse': Array(1.143104e-07, dtype=float32)}


 10%|█         | 103910/1000000 [4:54:58<26:30:19,  9.39it/s]

{'loss': Array(0.12253194, dtype=float32), 'loss_cross_entropy': Array(0.11557122, dtype=float32)}
{'loss_inverse': Array(6.5880683e-09, dtype=float32)}


 10%|█         | 103920/1000000 [4:54:59<37:34:04,  6.63it/s]

{'loss': Array(0.12910195, dtype=float32), 'loss_cross_entropy': Array(0.12174892, dtype=float32)}
{'loss_inverse': Array(1.28956135e-05, dtype=float32)}


 10%|█         | 103930/1000000 [4:55:01<27:00:45,  9.21it/s]

{'loss': Array(0.13134943, dtype=float32), 'loss_cross_entropy': Array(0.12417704, dtype=float32)}
{'loss_inverse': Array(7.854748e-07, dtype=float32)}


 10%|█         | 103940/1000000 [4:55:02<24:40:47, 10.09it/s]

{'loss': Array(0.14608489, dtype=float32), 'loss_cross_entropy': Array(0.13862732, dtype=float32)}
{'loss_inverse': Array(3.8540566e-07, dtype=float32)}


 10%|█         | 103950/1000000 [4:55:04<30:49:18,  8.08it/s]

{'loss': Array(0.13687706, dtype=float32), 'loss_cross_entropy': Array(0.12936111, dtype=float32)}
{'loss_inverse': Array(1.9262502e-07, dtype=float32)}


 10%|█         | 103960/1000000 [4:55:05<26:01:46,  9.56it/s]

{'loss': Array(0.1441379, dtype=float32), 'loss_cross_entropy': Array(0.13639055, dtype=float32)}
{'loss_inverse': Array(0.00012588, dtype=float32)}


 10%|█         | 103970/1000000 [4:55:07<38:34:42,  6.45it/s]

{'loss': Array(0.13705505, dtype=float32), 'loss_cross_entropy': Array(0.12912518, dtype=float32)}
{'loss_inverse': Array(4.6761725e-05, dtype=float32)}


 10%|█         | 103980/1000000 [4:55:09<28:00:47,  8.88it/s]

{'loss': Array(0.12807582, dtype=float32), 'loss_cross_entropy': Array(0.12080921, dtype=float32)}
{'loss_inverse': Array(1.5898443e-06, dtype=float32)}


 10%|█         | 103990/1000000 [4:55:10<26:03:08,  9.55it/s]

{'loss': Array(0.12887983, dtype=float32), 'loss_cross_entropy': Array(0.1221828, dtype=float32)}
{'loss_inverse': Array(2.819457e-07, dtype=float32)}


 10%|█         | 104000/1000000 [4:55:12<28:44:57,  8.66it/s]

{'loss': Array(0.14548418, dtype=float32), 'loss_cross_entropy': Array(0.13810728, dtype=float32)}
{'loss_inverse': Array(1.9075318e-05, dtype=float32)}


 10%|█         | 104010/1000000 [4:55:20<97:03:55,  2.56it/s] 

{'loss': Array(0.13685875, dtype=float32), 'loss_cross_entropy': Array(0.12959911, dtype=float32)}
{'loss_inverse': Array(6.0814536e-06, dtype=float32)}


 10%|█         | 104020/1000000 [4:55:22<36:32:32,  6.81it/s] 

{'loss': Array(0.15385821, dtype=float32), 'loss_cross_entropy': Array(0.1453746, dtype=float32)}
{'loss_inverse': Array(4.699484e-06, dtype=float32)}


 10%|█         | 104030/1000000 [4:55:23<26:08:05,  9.52it/s]

{'loss': Array(0.13230184, dtype=float32), 'loss_cross_entropy': Array(0.12468021, dtype=float32)}
{'loss_inverse': Array(1.7071592e-07, dtype=float32)}


 10%|█         | 104040/1000000 [4:55:25<33:17:49,  7.47it/s]

{'loss': Array(0.125596, dtype=float32), 'loss_cross_entropy': Array(0.11887868, dtype=float32)}
{'loss_inverse': Array(6.8156974e-06, dtype=float32)}


 10%|█         | 104050/1000000 [4:55:26<26:14:57,  9.48it/s]

{'loss': Array(0.13475017, dtype=float32), 'loss_cross_entropy': Array(0.12804817, dtype=float32)}
{'loss_inverse': Array(1.5044368e-08, dtype=float32)}


 10%|█         | 104060/1000000 [4:55:28<24:34:53, 10.12it/s]

{'loss': Array(0.13437966, dtype=float32), 'loss_cross_entropy': Array(0.12732606, dtype=float32)}
{'loss_inverse': Array(5.6194693e-07, dtype=float32)}


 10%|█         | 104070/1000000 [4:55:30<27:11:05,  9.15it/s]

{'loss': Array(0.15139697, dtype=float32), 'loss_cross_entropy': Array(0.14288318, dtype=float32)}
{'loss_inverse': Array(0.00021551, dtype=float32)}


 10%|█         | 104080/1000000 [4:55:31<25:01:11,  9.95it/s]

{'loss': Array(0.1520092, dtype=float32), 'loss_cross_entropy': Array(0.14409518, dtype=float32)}
{'loss_inverse': Array(3.0853778e-06, dtype=float32)}


 10%|█         | 104090/1000000 [4:55:33<27:41:11,  8.99it/s]

{'loss': Array(0.13980563, dtype=float32), 'loss_cross_entropy': Array(0.13184151, dtype=float32)}
{'loss_inverse': Array(1.8483692e-07, dtype=float32)}


 10%|█         | 104100/1000000 [4:55:34<24:33:16, 10.13it/s]

{'loss': Array(0.14082459, dtype=float32), 'loss_cross_entropy': Array(0.13328396, dtype=float32)}
{'loss_inverse': Array(1.6975811e-05, dtype=float32)}


 10%|█         | 104110/1000000 [4:55:36<28:53:59,  8.61it/s]

{'loss': Array(0.14902413, dtype=float32), 'loss_cross_entropy': Array(0.14141749, dtype=float32)}
{'loss_inverse': Array(2.2126042e-05, dtype=float32)}


 10%|█         | 104120/1000000 [4:55:37<24:48:11, 10.03it/s]

{'loss': Array(0.137724, dtype=float32), 'loss_cross_entropy': Array(0.13020994, dtype=float32)}
{'loss_inverse': Array(1.8530021e-07, dtype=float32)}


 10%|█         | 104130/1000000 [4:55:39<37:20:14,  6.66it/s]

{'loss': Array(0.13603425, dtype=float32), 'loss_cross_entropy': Array(0.12842661, dtype=float32)}
{'loss_inverse': Array(1.0796744e-05, dtype=float32)}


 10%|█         | 104140/1000000 [4:55:41<26:39:44,  9.33it/s]

{'loss': Array(0.14310513, dtype=float32), 'loss_cross_entropy': Array(0.1359074, dtype=float32)}
{'loss_inverse': Array(4.445972e-08, dtype=float32)}


 10%|█         | 104150/1000000 [4:55:42<24:40:06, 10.09it/s]

{'loss': Array(0.12336724, dtype=float32), 'loss_cross_entropy': Array(0.11619747, dtype=float32)}
{'loss_inverse': Array(2.1868552e-06, dtype=float32)}


 10%|█         | 104160/1000000 [4:55:44<28:21:39,  8.77it/s]

{'loss': Array(0.14067401, dtype=float32), 'loss_cross_entropy': Array(0.13342737, dtype=float32)}
{'loss_inverse': Array(2.5896346e-05, dtype=float32)}


 10%|█         | 104170/1000000 [4:55:45<24:42:18, 10.07it/s]

{'loss': Array(0.14182045, dtype=float32), 'loss_cross_entropy': Array(0.13414358, dtype=float32)}
{'loss_inverse': Array(9.04906e-07, dtype=float32)}


 10%|█         | 104180/1000000 [4:55:47<28:41:34,  8.67it/s]

{'loss': Array(0.126341, dtype=float32), 'loss_cross_entropy': Array(0.11948396, dtype=float32)}
{'loss_inverse': Array(1.4832999e-07, dtype=float32)}


 10%|█         | 104190/1000000 [4:55:48<24:37:20, 10.11it/s]

{'loss': Array(0.1282459, dtype=float32), 'loss_cross_entropy': Array(0.12115239, dtype=float32)}
{'loss_inverse': Array(9.716839e-06, dtype=float32)}


 10%|█         | 104200/1000000 [4:55:50<30:25:12,  8.18it/s]

{'loss': Array(0.14154063, dtype=float32), 'loss_cross_entropy': Array(0.13381283, dtype=float32)}
{'loss_inverse': Array(6.115005e-07, dtype=float32)}


 10%|█         | 104210/1000000 [4:55:51<25:01:05,  9.95it/s]

{'loss': Array(0.16181365, dtype=float32), 'loss_cross_entropy': Array(0.15362045, dtype=float32)}
{'loss_inverse': Array(2.986928e-05, dtype=float32)}


 10%|█         | 104220/1000000 [4:55:53<24:39:58, 10.09it/s]

{'loss': Array(0.14152715, dtype=float32), 'loss_cross_entropy': Array(0.13391592, dtype=float32)}
{'loss_inverse': Array(3.1103422e-08, dtype=float32)}


 10%|█         | 104230/1000000 [4:55:54<27:04:22,  9.19it/s]

{'loss': Array(0.12779936, dtype=float32), 'loss_cross_entropy': Array(0.12112381, dtype=float32)}
{'loss_inverse': Array(1.3290036e-06, dtype=float32)}


 10%|█         | 104240/1000000 [4:55:56<25:37:50,  9.71it/s]

{'loss': Array(0.12085488, dtype=float32), 'loss_cross_entropy': Array(0.11388569, dtype=float32)}
{'loss_inverse': Array(1.6416969e-07, dtype=float32)}


 10%|█         | 104250/1000000 [4:55:58<28:46:38,  8.65it/s]

{'loss': Array(0.14569432, dtype=float32), 'loss_cross_entropy': Array(0.13737988, dtype=float32)}
{'loss_inverse': Array(9.1076083e-07, dtype=float32)}


 10%|█         | 104260/1000000 [4:55:59<24:54:22,  9.99it/s]

{'loss': Array(0.1334125, dtype=float32), 'loss_cross_entropy': Array(0.12615536, dtype=float32)}
{'loss_inverse': Array(6.448343e-07, dtype=float32)}


 10%|█         | 104270/1000000 [4:56:01<30:56:05,  8.04it/s]

{'loss': Array(0.13549595, dtype=float32), 'loss_cross_entropy': Array(0.12788026, dtype=float32)}
{'loss_inverse': Array(3.8806807e-05, dtype=float32)}


 10%|█         | 104280/1000000 [4:56:02<26:05:00,  9.54it/s]

{'loss': Array(0.15415709, dtype=float32), 'loss_cross_entropy': Array(0.14606181, dtype=float32)}
{'loss_inverse': Array(2.181604e-08, dtype=float32)}


 10%|█         | 104290/1000000 [4:56:04<24:38:48, 10.09it/s]

{'loss': Array(0.13481158, dtype=float32), 'loss_cross_entropy': Array(0.12710452, dtype=float32)}
{'loss_inverse': Array(8.9265177e-07, dtype=float32)}


 10%|█         | 104300/1000000 [4:56:05<28:18:59,  8.79it/s]

{'loss': Array(0.14677313, dtype=float32), 'loss_cross_entropy': Array(0.1381639, dtype=float32)}
{'loss_inverse': Array(8.6428656e-07, dtype=float32)}


 10%|█         | 104310/1000000 [4:56:07<25:34:56,  9.73it/s]

{'loss': Array(0.12828748, dtype=float32), 'loss_cross_entropy': Array(0.12106919, dtype=float32)}
{'loss_inverse': Array(1.1954601e-06, dtype=float32)}


 10%|█         | 104320/1000000 [4:56:09<31:19:06,  7.94it/s]

{'loss': Array(0.14468786, dtype=float32), 'loss_cross_entropy': Array(0.13685262, dtype=float32)}
{'loss_inverse': Array(5.0375576e-05, dtype=float32)}


 10%|█         | 104330/1000000 [4:56:10<26:29:34,  9.39it/s]

{'loss': Array(0.1385063, dtype=float32), 'loss_cross_entropy': Array(0.13127346, dtype=float32)}
{'loss_inverse': Array(4.541733e-06, dtype=float32)}


 10%|█         | 104340/1000000 [4:56:12<34:52:47,  7.13it/s]

{'loss': Array(0.14192054, dtype=float32), 'loss_cross_entropy': Array(0.13428824, dtype=float32)}
{'loss_inverse': Array(7.866691e-08, dtype=float32)}


 10%|█         | 104350/1000000 [4:56:13<26:20:08,  9.45it/s]

{'loss': Array(0.13376264, dtype=float32), 'loss_cross_entropy': Array(0.12622534, dtype=float32)}
{'loss_inverse': Array(0.00022752, dtype=float32)}


 10%|█         | 104360/1000000 [4:56:15<24:57:08,  9.97it/s]

{'loss': Array(0.14732136, dtype=float32), 'loss_cross_entropy': Array(0.13932137, dtype=float32)}
{'loss_inverse': Array(1.0020708e-07, dtype=float32)}


 10%|█         | 104370/1000000 [4:56:17<27:32:53,  9.03it/s]

{'loss': Array(0.14551598, dtype=float32), 'loss_cross_entropy': Array(0.13718848, dtype=float32)}
{'loss_inverse': Array(0.00020072, dtype=float32)}


 10%|█         | 104380/1000000 [4:56:18<24:53:58,  9.99it/s]

{'loss': Array(0.12254357, dtype=float32), 'loss_cross_entropy': Array(0.11546476, dtype=float32)}
{'loss_inverse': Array(3.2926366e-07, dtype=float32)}


 10%|█         | 104390/1000000 [4:56:20<33:49:53,  7.35it/s]

{'loss': Array(0.13537294, dtype=float32), 'loss_cross_entropy': Array(0.12862132, dtype=float32)}
{'loss_inverse': Array(6.0635743e-05, dtype=float32)}


 10%|█         | 104400/1000000 [4:56:21<25:46:11,  9.65it/s]

{'loss': Array(0.14867377, dtype=float32), 'loss_cross_entropy': Array(0.14081974, dtype=float32)}
{'loss_inverse': Array(2.1007088e-07, dtype=float32)}


 10%|█         | 104410/1000000 [4:56:23<24:15:46, 10.25it/s]

{'loss': Array(0.12416512, dtype=float32), 'loss_cross_entropy': Array(0.11692665, dtype=float32)}
{'loss_inverse': Array(4.1441413e-06, dtype=float32)}


 10%|█         | 104420/1000000 [4:56:24<27:40:11,  8.99it/s]

{'loss': Array(0.14375378, dtype=float32), 'loss_cross_entropy': Array(0.13622366, dtype=float32)}
{'loss_inverse': Array(5.5394247e-07, dtype=float32)}


 10%|█         | 104430/1000000 [4:56:26<24:44:34, 10.05it/s]

{'loss': Array(0.15693246, dtype=float32), 'loss_cross_entropy': Array(0.14903553, dtype=float32)}
{'loss_inverse': Array(1.7595511e-07, dtype=float32)}


 10%|█         | 104440/1000000 [4:56:27<28:20:55,  8.78it/s]

{'loss': Array(0.14689022, dtype=float32), 'loss_cross_entropy': Array(0.13891174, dtype=float32)}
{'loss_inverse': Array(1.6601665e-05, dtype=float32)}


 10%|█         | 104450/1000000 [4:56:29<25:08:58,  9.89it/s]

{'loss': Array(0.12482554, dtype=float32), 'loss_cross_entropy': Array(0.11779705, dtype=float32)}
{'loss_inverse': Array(1.0053793e-06, dtype=float32)}


 10%|█         | 104460/1000000 [4:56:31<31:06:30,  8.00it/s]

{'loss': Array(0.13680384, dtype=float32), 'loss_cross_entropy': Array(0.12926458, dtype=float32)}
{'loss_inverse': Array(3.5463115e-06, dtype=float32)}


 10%|█         | 104470/1000000 [4:56:32<26:06:04,  9.53it/s]

{'loss': Array(0.1454753, dtype=float32), 'loss_cross_entropy': Array(0.13721375, dtype=float32)}
{'loss_inverse': Array(7.767151e-07, dtype=float32)}


 10%|█         | 104480/1000000 [4:56:34<25:09:00,  9.89it/s]

{'loss': Array(0.13549225, dtype=float32), 'loss_cross_entropy': Array(0.12753853, dtype=float32)}
{'loss_inverse': Array(7.9082747e-07, dtype=float32)}


 10%|█         | 104490/1000000 [4:56:35<28:20:36,  8.78it/s]

{'loss': Array(0.14055231, dtype=float32), 'loss_cross_entropy': Array(0.132585, dtype=float32)}
{'loss_inverse': Array(0.00013802, dtype=float32)}


 10%|█         | 104500/1000000 [4:56:37<25:49:15,  9.63it/s]

{'loss': Array(0.1268105, dtype=float32), 'loss_cross_entropy': Array(0.11947898, dtype=float32)}
{'loss_inverse': Array(2.2543341e-05, dtype=float32)}


 10%|█         | 104510/1000000 [4:56:45<91:37:49,  2.71it/s] 

{'loss': Array(0.13703586, dtype=float32), 'loss_cross_entropy': Array(0.12946513, dtype=float32)}
{'loss_inverse': Array(1.3763204e-06, dtype=float32)}


 10%|█         | 104520/1000000 [4:56:46<35:20:00,  7.04it/s]

{'loss': Array(0.13744347, dtype=float32), 'loss_cross_entropy': Array(0.1303144, dtype=float32)}
{'loss_inverse': Array(1.6153274e-06, dtype=float32)}


 10%|█         | 104530/1000000 [4:56:48<38:33:48,  6.45it/s]

{'loss': Array(0.14558065, dtype=float32), 'loss_cross_entropy': Array(0.1382305, dtype=float32)}
{'loss_inverse': Array(5.75182e-07, dtype=float32)}


 10%|█         | 104540/1000000 [4:56:50<27:24:41,  9.07it/s]

{'loss': Array(0.14308724, dtype=float32), 'loss_cross_entropy': Array(0.13490465, dtype=float32)}
{'loss_inverse': Array(2.1202564e-07, dtype=float32)}


 10%|█         | 104550/1000000 [4:56:51<24:31:34, 10.14it/s]

{'loss': Array(0.14027376, dtype=float32), 'loss_cross_entropy': Array(0.13229597, dtype=float32)}
{'loss_inverse': Array(1.0766632e-05, dtype=float32)}


 10%|█         | 104560/1000000 [4:56:53<27:32:01,  9.03it/s]

{'loss': Array(0.14514093, dtype=float32), 'loss_cross_entropy': Array(0.13756037, dtype=float32)}
{'loss_inverse': Array(2.223704e-05, dtype=float32)}


 10%|█         | 104570/1000000 [4:56:55<37:37:45,  6.61it/s]

{'loss': Array(0.14711574, dtype=float32), 'loss_cross_entropy': Array(0.1393521, dtype=float32)}
{'loss_inverse': Array(1.4574522e-06, dtype=float32)}


 10%|█         | 104580/1000000 [4:56:56<26:23:37,  9.42it/s]

{'loss': Array(0.15397185, dtype=float32), 'loss_cross_entropy': Array(0.14616348, dtype=float32)}
{'loss_inverse': Array(9.3953176e-08, dtype=float32)}


 10%|█         | 104590/1000000 [4:56:57<24:12:31, 10.27it/s]

{'loss': Array(0.13672398, dtype=float32), 'loss_cross_entropy': Array(0.1295157, dtype=float32)}
{'loss_inverse': Array(2.0646756e-07, dtype=float32)}


 10%|█         | 104600/1000000 [4:56:59<27:13:59,  9.13it/s]

{'loss': Array(0.14979823, dtype=float32), 'loss_cross_entropy': Array(0.14144637, dtype=float32)}
{'loss_inverse': Array(4.2149335e-07, dtype=float32)}


 10%|█         | 104610/1000000 [4:57:01<25:21:13,  9.81it/s]

{'loss': Array(0.14675406, dtype=float32), 'loss_cross_entropy': Array(0.13946815, dtype=float32)}
{'loss_inverse': Array(7.524295e-07, dtype=float32)}


 10%|█         | 104620/1000000 [4:57:02<28:53:13,  8.61it/s]

{'loss': Array(0.14316173, dtype=float32), 'loss_cross_entropy': Array(0.13539329, dtype=float32)}
{'loss_inverse': Array(1.8504826e-07, dtype=float32)}


 10%|█         | 104630/1000000 [4:57:04<25:35:55,  9.72it/s]

{'loss': Array(0.14170238, dtype=float32), 'loss_cross_entropy': Array(0.1339413, dtype=float32)}
{'loss_inverse': Array(3.67642e-08, dtype=float32)}


 10%|█         | 104640/1000000 [4:57:06<30:41:34,  8.10it/s]

{'loss': Array(0.13783476, dtype=float32), 'loss_cross_entropy': Array(0.13031344, dtype=float32)}
{'loss_inverse': Array(7.294269e-08, dtype=float32)}


 10%|█         | 104650/1000000 [4:57:07<26:26:07,  9.41it/s]

{'loss': Array(0.1432814, dtype=float32), 'loss_cross_entropy': Array(0.1355942, dtype=float32)}
{'loss_inverse': Array(0.00010309, dtype=float32)}


 10%|█         | 104660/1000000 [4:57:09<25:12:15,  9.87it/s]

{'loss': Array(0.13670324, dtype=float32), 'loss_cross_entropy': Array(0.12899707, dtype=float32)}
{'loss_inverse': Array(3.5005703e-06, dtype=float32)}


 10%|█         | 104670/1000000 [4:57:10<30:05:19,  8.27it/s]

{'loss': Array(0.15047215, dtype=float32), 'loss_cross_entropy': Array(0.1426823, dtype=float32)}
{'loss_inverse': Array(5.570093e-07, dtype=float32)}


 10%|█         | 104680/1000000 [4:57:12<26:40:07,  9.33it/s]

{'loss': Array(0.13139056, dtype=float32), 'loss_cross_entropy': Array(0.12423668, dtype=float32)}
{'loss_inverse': Array(4.546908e-05, dtype=float32)}


 10%|█         | 104690/1000000 [4:57:14<30:54:45,  8.05it/s]

{'loss': Array(0.13684337, dtype=float32), 'loss_cross_entropy': Array(0.12916994, dtype=float32)}
{'loss_inverse': Array(0.00032653, dtype=float32)}


 10%|█         | 104700/1000000 [4:57:15<25:18:41,  9.83it/s]

{'loss': Array(0.12698948, dtype=float32), 'loss_cross_entropy': Array(0.11975991, dtype=float32)}
{'loss_inverse': Array(3.1356827e-05, dtype=float32)}


 10%|█         | 104710/1000000 [4:57:17<32:48:32,  7.58it/s]

{'loss': Array(0.15272823, dtype=float32), 'loss_cross_entropy': Array(0.14478281, dtype=float32)}
{'loss_inverse': Array(0.00035765, dtype=float32)}


 10%|█         | 104720/1000000 [4:57:18<26:25:42,  9.41it/s]

{'loss': Array(0.12303255, dtype=float32), 'loss_cross_entropy': Array(0.11586642, dtype=float32)}
{'loss_inverse': Array(3.5282198e-06, dtype=float32)}


 10%|█         | 104730/1000000 [4:57:20<25:18:40,  9.83it/s]

{'loss': Array(0.122449, dtype=float32), 'loss_cross_entropy': Array(0.11570816, dtype=float32)}
{'loss_inverse': Array(8.0204035e-07, dtype=float32)}


 10%|█         | 104740/1000000 [4:57:21<26:56:49,  9.23it/s]

{'loss': Array(0.13620402, dtype=float32), 'loss_cross_entropy': Array(0.12899746, dtype=float32)}
{'loss_inverse': Array(5.722946e-07, dtype=float32)}


 10%|█         | 104750/1000000 [4:57:23<25:30:38,  9.75it/s]

{'loss': Array(0.15255058, dtype=float32), 'loss_cross_entropy': Array(0.14396517, dtype=float32)}
{'loss_inverse': Array(0.00045399, dtype=float32)}


 10%|█         | 104760/1000000 [4:57:25<33:43:48,  7.37it/s]

{'loss': Array(0.13067079, dtype=float32), 'loss_cross_entropy': Array(0.12331444, dtype=float32)}
{'loss_inverse': Array(9.20281e-07, dtype=float32)}


 10%|█         | 104770/1000000 [4:57:26<25:59:18,  9.57it/s]

{'loss': Array(0.12083524, dtype=float32), 'loss_cross_entropy': Array(0.11416926, dtype=float32)}
{'loss_inverse': Array(4.8485377e-05, dtype=float32)}


 10%|█         | 104780/1000000 [4:57:28<24:32:45, 10.13it/s]

{'loss': Array(0.13712482, dtype=float32), 'loss_cross_entropy': Array(0.12968832, dtype=float32)}
{'loss_inverse': Array(1.16977866e-07, dtype=float32)}


 10%|█         | 104790/1000000 [4:57:29<27:24:54,  9.07it/s]

{'loss': Array(0.15237646, dtype=float32), 'loss_cross_entropy': Array(0.14412819, dtype=float32)}
{'loss_inverse': Array(5.469546e-06, dtype=float32)}


 10%|█         | 104800/1000000 [4:57:31<24:55:45,  9.97it/s]

{'loss': Array(0.14740165, dtype=float32), 'loss_cross_entropy': Array(0.13920604, dtype=float32)}
{'loss_inverse': Array(0.00013399, dtype=float32)}


 10%|█         | 104810/1000000 [4:57:32<28:56:01,  8.59it/s]

{'loss': Array(0.14456455, dtype=float32), 'loss_cross_entropy': Array(0.13629751, dtype=float32)}
{'loss_inverse': Array(4.019551e-07, dtype=float32)}


 10%|█         | 104820/1000000 [4:57:34<26:24:28,  9.42it/s]

{'loss': Array(0.15924875, dtype=float32), 'loss_cross_entropy': Array(0.15077749, dtype=float32)}
{'loss_inverse': Array(0.00013786, dtype=float32)}


In [16]:

def sampling_model(key, model, sample_eval, nb_step=100, config=None):
    """
    Function used to sampling a state from a list 
    """
    seq_len_future = config.len_seq - config.len_seq // 4 
    noise_future  = jax.random.dirichlet(key, jnp.ones(6) * 5., (config.batch_size, seq_len_future, 54))
    sample_eval["reward"] = jnp.linspace(start=-0.5, stop=0.5, num=config.batch_size)[:, None]

    for t_step in range(nb_step):
        t_step_array = jnp.ones((config.batch_size, 1, 1, 1)) * float(t_step / nb_step)
        sample_eval["context"] = jnp.concatenate([sample_eval["reward"], t_step_array[:, :, 0, 0]], axis=1)

        estimation_logits_past, estimation_logits_future = model(
            sample_eval["state_past"], noise_future, sample_eval["context"]
        )

        estimation_proba_future = jax.nn.softmax(estimation_logits_future, axis=-1)

        noise_future = noise_future + float(1. / nb_step) * 1./ (1. - t_step_array + 0.0001) * (estimation_proba_future - noise_future)

    return noise_future



In [17]:
key, subkey = jax.random.split(config.jax_key)
config.jax_key = key

buffer_eval, buffer_list_eval = dataset.fast_gathering_data_diffusion(
    env,
    vmap_reset,
    vmap_step,
    int(config.batch_size),
    config.len_seq,
    buffer_eval,
    buffer_list_eval,
    subkey,
)

sample = buffer_eval.sample(buffer_list_eval, subkey)
sample = reshape_diffusion_setup(sample, subkey)

In [23]:
key, subkey = jax.random.split(config.jax_key)
config.jax_key = key

sample = buffer.sample(buffer_list, subkey)
sample = reshape_diffusion_setup(sample, subkey)


result = sampling_model(key=config.jax_key, model=transformer, sample_eval=sample, config=config, nb_step=100)
result

Array([[[[1.09494431e-05, 1.66741665e-05, 9.99895751e-01,
          2.22197268e-05, 3.50209884e-05, 1.94163295e-05],
         [2.11554579e-05, 2.44809780e-05, 2.29226425e-05,
          9.99903977e-01, 8.82399036e-06, 1.86653342e-05],
         [2.21990049e-05, 8.59028660e-06, 2.98996456e-05,
          9.99905646e-01, 1.01229525e-05, 2.35370826e-05],
         ...,
         [1.43166399e-05, 3.20903491e-05, 9.99898672e-01,
          1.93911837e-05, 1.82657968e-05, 1.72907021e-05],
         [9.99898255e-01, 1.77349430e-05, 2.03370582e-05,
          3.52105126e-05, 1.59306219e-05, 1.25847291e-05],
         [2.01943330e-05, 8.95850826e-06, 9.99900341e-01,
          2.50139274e-05, 2.45582778e-05, 2.09209975e-05]],

        [[1.19837932e-05, 1.96152832e-05, 9.99905944e-01,
          1.66372629e-05, 2.55447812e-05, 2.02928204e-05],
         [1.41649507e-05, 1.69913983e-05, 2.56728381e-05,
          9.99911547e-01, 1.36010349e-05, 1.80726638e-05],
         [1.13089336e-05, 2.37140339e-05, 3.1119

In [24]:
index_batch  = 64

jnp.argmax(sample["state_past"], axis=-1).reshape((128, 8, 6, 3, 3))[index_batch, -1, :, :, :]

Array([[[2, 0, 1],
        [0, 0, 5],
        [2, 4, 5]],

       [[5, 1, 4],
        [2, 1, 2],
        [1, 3, 3]],

       [[3, 4, 4],
        [1, 2, 1],
        [5, 5, 1]],

       [[5, 4, 3],
        [5, 3, 0],
        [0, 1, 0]],

       [[0, 3, 1],
        [2, 4, 3],
        [4, 4, 0]],

       [[4, 5, 2],
        [3, 5, 2],
        [3, 0, 2]]], dtype=int32)

In [25]:
jnp.argmax(result, axis=-1).reshape((128, 24, 6, 3, 3))[index_batch, 0, :, :, :]

Array([[[0, 0, 1],
        [0, 0, 5],
        [3, 4, 5]],

       [[2, 1, 4],
        [0, 1, 2],
        [2, 3, 3]],

       [[3, 4, 4],
        [1, 2, 1],
        [5, 5, 1]],

       [[5, 4, 3],
        [5, 3, 3],
        [0, 1, 4]],

       [[4, 2, 0],
        [4, 4, 3],
        [0, 3, 1]],

       [[5, 5, 2],
        [2, 5, 2],
        [1, 0, 2]]], dtype=int32)

In [26]:
jnp.argmax(result, axis=-1).reshape((128, 24, 6, 3, 3))[index_batch, 1, :, :, :]

Array([[[0, 0, 5],
        [0, 0, 5],
        [3, 4, 1]],

       [[2, 1, 4],
        [0, 1, 2],
        [2, 3, 4]],

       [[5, 4, 3],
        [1, 2, 1],
        [5, 5, 1]],

       [[2, 4, 3],
        [5, 3, 3],
        [0, 1, 4]],

       [[4, 2, 0],
        [4, 4, 3],
        [0, 3, 1]],

       [[5, 5, 3],
        [2, 5, 2],
        [1, 0, 2]]], dtype=int32)

In [None]:
sample = buffer_eval.sample(buffer_list_eval, subkey)
sample = reshape_sample(sample)

TrajectoryBufferSample(experience={'action': Array([[[1.32556781e-01, 7.96739519e-01, 5.36718592e-02, ...,
         3.91646661e-03, 4.48901858e-03, 9.91594553e-01],
        [3.49070907e-01, 4.57749265e-04, 4.38157976e-01, ...,
         7.23136306e-01, 1.23497941e-01, 1.53365776e-01],
        [6.12441264e-03, 2.50436477e-02, 1.35732419e-03, ...,
         3.82237613e-01, 5.98694921e-01, 1.90675538e-02],
        ...,
        [1.41329234e-04, 2.44877161e-03, 8.43136787e-01, ...,
         2.33344346e-01, 6.42170012e-01, 1.24485560e-01],
        [6.32655225e-04, 1.77795421e-02, 9.65278149e-01, ...,
         1.25269741e-02, 3.21629345e-01, 6.65843725e-01],
        [9.08881542e-04, 1.04175135e-01, 7.50824576e-04, ...,
         9.99683421e-03, 7.89827347e-01, 2.00175866e-01]],

       [[2.03237548e-01, 7.00179100e-01, 3.63819454e-05, ...,
         9.96583939e-01, 2.39940570e-03, 1.01662707e-03],
        [7.63220847e-01, 1.11325733e-01, 3.15520242e-02, ...,
         5.45369804e-01, 4.54322606e-0

In [40]:
def generate_past_state_with_with_random_policy(key, vmap_reset, step_jit_env, config):
    """
    Generate past state with random policy

    Args:
        config: configuration object

    Returns:
        state_past: (batch_size, len_seq//4, 6, 3, 3)

    """

    key1, key2 = jax.random.split(config.jax_key)

    keys = jax.random.split(key1, config.batch_size)
    state, timestep = vmap_reset(keys)

    last_state = None
    past_state = []

    actions_all = jax.random.randint(
        key=config.jax_key,
        minval=env.action_spec.minimum,
        maxval=env.action_spec.maximum,
        shape=(config.batch_size, config.len_seq // 4, 3),
    )

    for i in range(config.len_seq // 4):

        # apply random policy and retrieve state
        action = actions_all[:, i, :]

        state, timestep  = step_jit_env(state, action)
        past_state.append(state.cube)

    # concat all the past state to get the shape (batch_size, len_seq//4, 6, 3, 3) from a list of state of size (batch_size, 6, 3, 3) by creating the 1 axis
    state_past = jnp.stack(past_state, axis=1)

    return state_past, state, actions_all

step_jit_env = jax.vmap(jit_step)

state_past, state, actions_past = generate_past_state_with_with_random_policy(key, vmap_reset, step_jit_env, config)

In [31]:
state_past.shape

(128, 8, 6, 3, 3)

In [41]:

def apply_decision_diffuser_policy(key, state_past, decision_diffuser, inverse_rl_model, config):
    """
    1. Make a estimation of the targeted reward
    2. Generate futur state with those targeted reward
    3. Choose policy from that
    """
    sample_eval = {
        "state_past": jax.nn.one_hot(state_past, 6),
    }

    state_past = jnp.copy(state_past.reshape((state_past.shape[0], state_past.shape[1], -1)))
    state_past = jax.nn.one_hot(state_past, num_classes=6)

    state_future = sampling_model(key, decision_diffuser, sample_eval, nb_step=100, config=config)

    # state_future is (batch_size, seq_len, dim_input_state / 6, 6)
    state_to_act = jnp.concatenate([state_past, state_future], axis=1)
    state_to_act_futur_t = state_to_act[:, (config.len_seq // 4 - 1):(-1), :, :]
    state_to_act_futur_td1 = state_to_act[:, (config.len_seq // 4):, :, :]

    # flatten the last 2 axis
    state_to_act_futur_t = state_to_act_futur_t.reshape(
        (state_to_act_futur_t.shape[0], state_to_act_futur_t.shape[1], -1)
    )

    state_to_act_futur_td1 = state_to_act_futur_td1.reshape(
        (state_to_act_futur_td1.shape[0], state_to_act_futur_td1.shape[1], -1)
    )

    # now use reverse RL to compute the action TODO later
    actions = inverse_rl_model(state_to_act_futur_t, state_to_act_futur_td1)

    return actions

actions_futur = apply_decision_diffuser_policy(config.jax_key, state_past, transformer, inverse_rl_model, config)

(128, 24, 9)

In [56]:

from rubiktransformer.dataset import GOAL_OBSERVATION

def gather_data_with_policy(state, state_past, actions_past, actions_futur, buffer, buffer_list, config):
    """
    For loop with those policy and state

    log performance compare to target

    """
    state_futur_list = []

    for i in range(config.len_seq - config.len_seq // 4):
        actions_step = actions_futur[:, i, :]
        actions_0 = jnp.argmax(actions_step[:, :6], axis=1)
        actions_1 = jnp.argmax(actions_step[:, 6:], axis=1)

        actions_full = jnp.stack([actions_0, jnp.zeros(config.batch_size), actions_1], axis=1)
        
        # transform to int type
        actions_full = actions_full.astype(jnp.int32)
    
        # step 
        state, timestep  = step_jit_env(state, actions_full)

        state_futur_list.append(state.cube)

    # TODO SAVE DATA into batch format for later training
    actions_0_all_futur = jnp.argmax(actions_futur[:, :, :6], axis=-1)
    actions_1_all_futur = jnp.argmax(actions_futur[:, :, 6:], axis=-1)

    action_all_futur = jnp.stack([actions_0_all_futur, jnp.zeros((config.batch_size, actions_0_all_futur.shape[1])), actions_1_all_futur], axis=-1)

    action_all = jnp.concatenate([actions_past, action_all_futur], axis=1)
    action_all = action_all.astype(jnp.int32)

    state_futur = jnp.stack(state_futur_list, axis=1)

    state_all = jnp.concatenate([state_past, state_futur], axis=1)

    # compute reward 
    goal_observation = jnp.repeat(
        GOAL_OBSERVATION[None, None, :, :, :], config.batch_size, axis=0
    )
    goal_observation = jnp.repeat(goal_observation, config.len_seq, axis=1)
    reward = jnp.where(state_all != goal_observation, -1.0, 1.0)

    reward = reward.mean(axis=[2, 3, 4])
    reward = reward[:, -1] - reward[:, config.len_seq//4]

    # add data into the buffer
    pass

    for idx_batch in range(config.batch_size):
        buffer_list = buffer.add(
            buffer_list,
            {
                "action": action_all[idx_batch],
                "reward": reward[idx_batch],
                "state_histo": state_all[idx_batch],
            },
        )

    return buffer, buffer_list

buffer, buffer_list = gather_data_with_policy(state, state_past, actions_past, actions_futur, buffer, buffer_list, config)


In [None]:


def improve_training_loop(nb_iter=10000):
    """
    Relaunch the training loop with those new data incorporated into the buffer
    
    Full stuff here
    Online transformer setup

    1. We generate env setup 
    2. First random action in the different env
    3. Use decision_diffuser to choose the action to do from here
    4. Observe / apply policy  to retrieve data
    5. Add the data into the buffer
    6. Train model on those data

    Remember to log the performance data to compare with other run / algorithms
    """
    
    for _ in range(nb_iter):

        key, subkey = jax.random.split(config.jax_key)
        config.jax_key = key

        # first generate random state
        state_past, state, actions_past = generate_past_state_with_with_random_policy(key, vmap_reset, step_jit_env, config)
        
        # apply model to get some generation
        actions_futur = apply_decision_diffuser_policy(config.jax_key, state_past, transformer, inverse_rl_model, config)

        # update replay buffer dataset
        buffer, buffer_list = gather_data_with_policy(state, state_past, actions_past, actions_futur, buffer, buffer_list, config)

        # now we can do the training loop
        sample = buffer.sample(buffer_list, subkey)
        sample = reshape_diffusion_setup(sample, subkey)

        # we update the policy
        train_step_transformer_rf(
            transformer, optimizer_diffuser, metrics_train, sample
        )

        if idx_step % config.log_every_step == 0:
            metrics_train_result = metrics_train.compute()
            print(metrics_train_result)

            wandb.log(metrics_train_result, step=idx_step)
            metrics_train.reset()


