In [1]:
# chose the current file directory as the working directory
import os

os.chdir("/teamspace/studios/this_studio/rubikscubesolver")

In [2]:
from tqdm import tqdm

import wandb  # for logging
import time
from dataclasses import dataclass

import jax
import jax.numpy as jnp
import flax.nnx as nnx

import optax

from rubiktransformer.model_diffusion_dt import RubikDTTransformer
import rubiktransformer.dataset as dataset
from rubiktransformer.trainer import reshape_sample

cuda_plugin_extension is not found.


In [3]:
@dataclass
class Config:
    """Configuration class"""

    jax_key: jnp.ndarray = jax.random.PRNGKey(47)
    rngs = nnx.Rngs(46)
    batch_size: int = 128
    lr_1: float = 4e-4
    lr_2: float = 4e-4
    nb_games: int = 128 * 100
    len_seq: int = 32
    nb_step: int = 1000000
    log_every_step: int = 10
    log_eval_every_step: int = 10
    log_policy_reward_every_step: int = 10
    add_data_every_step: int = 500

    save_model_every_step: int = 2000


config = Config()

# init wandb config
user = "forbu14"
project = "RubikTransformer"
display_name = "experiment_" + time.strftime("%Y%m%d-%H%M%S")

wandb.init(entity=user, project=project, name=display_name)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


[34m[1mwandb[0m: Currently logged in as: [33mforbu14[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [4]:
transformer = RubikDTTransformer(rngs=config.rngs, causal=True)

scheduler = optax.linear_schedule(init_value=0.0, end_value=1.0, transition_steps=4000)

# init optimizer
optimizer_dd = optax.chain(
    optax.clip_by_global_norm(1.0),
    #optax.lion(config.lr_1 / 10.0),
    optax.adamw(config.lr_1),
    optax.scale_by_schedule(scheduler),
)

optimizer_diffuser = nnx.Optimizer(transformer, optimizer_dd)

# metrics
metrics_train = nnx.MultiMetric(
    loss=nnx.metrics.Average("loss"),
    loss_cross_entropy=nnx.metrics.Average("loss_cross_entropy"),
)

metrics_eval = nnx.MultiMetric(
    loss_eval=nnx.metrics.Average("loss_eval"),

    loss_cross_entropy_eval=nnx.metrics.Average("loss_cross_entropy_eval"),
)

In [5]:
import pickle

filename = "state_ddt_model.pickle"

with open(filename, "rb") as input_file:
    state = pickle.load(input_file)

nnx.update(transformer, state)

In [5]:
# gather data from the environment
# init models and optimizers
env, buffer = dataset.init_env_buffer(sample_batch_size=config.batch_size)
env, buffer_eval = dataset.init_env_buffer(sample_batch_size=config.batch_size)


nb_games = config.nb_games
len_seq = config.len_seq

state_first = jnp.zeros((6, 3, 3))
state_next = jnp.zeros((len_seq, 6, 3, 3))
action = jnp.zeros((len_seq, 3))
action_proba = jnp.zeros((len_seq, 9))

# transform state to int8 type
state_first = state_first.astype(jnp.int8)
state_next = state_next.astype(jnp.int8)

# action to int32 type
action = action.astype(jnp.int32)

reward = jnp.zeros((1))

jit_step = jax.jit(env.step)

buffer_list = buffer.init(
    {
        "action": action,
        "reward": reward,
        "state_histo": state_next,
    }
)

buffer_list_eval = buffer_eval.init(
    {
        "action": action,
        "reward": reward,
        "state_histo": state_next,
    }
)

In [6]:
def step_fn(state, key):
    """
    Simple step function
    We choose a random action
    """

    action = jax.random.randint(
        key=key,
        minval=env.action_spec.minimum,
        maxval=env.action_spec.maximum,
        shape=(3,),
    )

    new_state, timestep = jit_step(state, action)
    timestep.extras["action"] = action

    return new_state, timestep


def run_n_steps(state, key, n):
    random_keys = jax.random.split(key, n)
    state, rollout = jax.lax.scan(step_fn, state, random_keys)

    return rollout


vmap_reset = jax.vmap(jax.jit(env.reset))
vmap_step = jax.vmap(run_n_steps, in_axes=(0, 0, None))

In [7]:
nnx.display(transformer)

In [8]:
key, subkey = jax.random.split(config.jax_key)
config.jax_key = key

buffer, buffer_list = dataset.fast_gathering_data_diffusion(
    env,
    vmap_reset,
    vmap_step,
    int(config.nb_games / 10),
    config.len_seq,
    buffer,
    buffer_list,
    subkey,
)

In [9]:
sample = buffer.sample(buffer_list, subkey)

def reshape_diffusion_setup(sample, key=jax.random.PRNGKey(0)):
    sample.experience["state_histo"] = sample.experience["state_histo"].reshape(
        (sample.experience["state_histo"].shape[0], sample.experience["state_histo"].shape[1], 54)
    )

    # one hot encoding for state_histo
    sample.experience["state_histo"] = jax.nn.one_hot(
        sample.experience["state_histo"],
        num_classes=6,
        axis=-1,
    )

    # batch creation
    batch  = sample.experience
    len_seq = batch["state_histo"].shape[1]

    time_step = jax.random.uniform(
        key, (batch["state_histo"].shape[0], 1, 1, 1)
    ) # random value between 0 and 1

    batch['time_step'] = time_step

    # now contact the value to have the context for the rectified flow setup
    batch["context"] = jnp.concatenate([batch["reward"], time_step[:, :, 0, 0]], axis=1)

    batch["state_past"] = batch["state_histo"][:, :len_seq//4, :, :]
    batch["state_future"] = batch["state_histo"][:, len_seq//4:, :, :]

    # now we generate the random noise for the rectified flow setup
    simplex_noise = jax.random.dirichlet(key, jnp.ones(6), batch["state_future"].shape[:-1])

    batch["state_future_noise"] = (
        (1 - time_step) * simplex_noise + time_step * batch["state_future"]
    )
    
    return batch


sample = reshape_diffusion_setup(sample)


In [10]:
def loss_fn_transformer_rf(model: RubikDTTransformer, batch):
    # rectified flow setup
    state_past, state_future = model(
        batch["state_past"], batch["state_future_noise"], batch["context"]
    )

    loss_crossentropy = optax.softmax_cross_entropy(
        logits=state_future, labels=batch["state_future"]
    ).mean(axis=[1, 2])

    weight = jnp.clip(1. / (1. - batch["time_step"][:, 0, 0, 0]), min=0.005, max=1.5)

    loss_cross_entropy_weight = loss_crossentropy * weight

    return loss_cross_entropy_weight.mean(), (loss_crossentropy.mean())


@nnx.jit
def train_step_transformer_rf(
    model: RubikDTTransformer,
    optimizer: nnx.Optimizer,
    metrics: nnx.MultiMetric,
    batch,
):
    """Train for a single step."""

    grad_fn = nnx.value_and_grad(loss_fn_transformer_rf, has_aux=True)
    (loss, (loss_crossentropy)), grads = grad_fn(model, batch)
    metrics.update(
        loss=loss, loss_cross_entropy=loss_crossentropy
    )
    optimizer.update(grads)

In [11]:
key, subkey = jax.random.split(config.jax_key)
config.jax_key = key

buffer, buffer_list = dataset.fast_gathering_data_diffusion(
    env,
    vmap_reset,
    vmap_step,
    config.nb_games * 15, # old is int(config.nb_games * 10.0),
    config.len_seq,
    buffer,
    buffer_list,
    subkey,
)

In [12]:
import pickle

# transformer model calibration
for idx_step in tqdm(range(config.nb_step)):
    # training for world model
    key, subkey = jax.random.split(config.jax_key)
    config.jax_key = key

    if idx_step % config.add_data_every_step == 0:
        buffer, buffer_list = dataset.fast_gathering_data_diffusion(
            env,
            vmap_reset,
            vmap_step,
            int(config.nb_games // 10),
            config.len_seq,
            buffer,
            buffer_list,
            config.jax_key,
        )

    sample = buffer.sample(buffer_list, subkey)
    sample = reshape_diffusion_setup(sample, subkey)

    # we update the policy
    train_step_transformer_rf(
        transformer, optimizer_diffuser, metrics_train, sample
    )

    if idx_step % config.log_every_step == 0:
        metrics_train_result = metrics_train.compute()
        print(metrics_train_result)

        wandb.log(metrics_train_result, step=idx_step)
        metrics_train.reset()

    if idx_step % config.log_eval_every_step == 0:
        key, subkey = jax.random.split(config.jax_key)
        config.jax_key = key

        buffer_eval, buffer_list_eval = dataset.fast_gathering_data_diffusion(
            env,
            vmap_reset,
            vmap_step,
            int(128),
            config.len_seq,
            buffer_eval,
            buffer_list_eval,
            subkey,
        )

        sample = buffer_eval.sample(buffer_list_eval, subkey)
        sample = reshape_diffusion_setup(sample, subkey)

        loss, (loss_crossentropy) = loss_fn_transformer_rf(
            transformer, sample
        )

        metrics_eval.update(
            loss_eval=loss,
            loss_cross_entropy_eval=loss_crossentropy,
        )
        wandb.log(metrics_eval.compute(), step=idx_step)

        metrics_eval.reset()

    if idx_step % config.save_model_every_step == 0:

        state_weight = nnx.state(transformer)

        with open("state_ddt_model.pickle", "wb") as handle:
            pickle.dump(state_weight, handle, protocol=pickle.HIGHEST_PROTOCOL)

{'loss': Array(0.32483298, dtype=float32), 'loss_cross_entropy': Array(0.28385186, dtype=float32)}


  1%|          | 5509/1000000 [15:34<89:07:51,  3.10it/s] 

{'loss': Array(0.32437667, dtype=float32), 'loss_cross_entropy': Array(0.2848944, dtype=float32)}


  1%|          | 5519/1000000 [15:36<41:30:26,  6.66it/s]

{'loss': Array(0.3191602, dtype=float32), 'loss_cross_entropy': Array(0.27801296, dtype=float32)}


  1%|          | 5529/1000000 [15:37<28:21:37,  9.74it/s]

{'loss': Array(0.33983144, dtype=float32), 'loss_cross_entropy': Array(0.29937404, dtype=float32)}


  1%|          | 5539/1000000 [15:39<34:21:37,  8.04it/s]

{'loss': Array(0.3221778, dtype=float32), 'loss_cross_entropy': Array(0.28160444, dtype=float32)}


  1%|          | 5549/1000000 [15:40<26:52:07, 10.28it/s]

{'loss': Array(0.33066025, dtype=float32), 'loss_cross_entropy': Array(0.28953508, dtype=float32)}


  1%|          | 5559/1000000 [15:42<40:37:04,  6.80it/s]

{'loss': Array(0.3328711, dtype=float32), 'loss_cross_entropy': Array(0.2903545, dtype=float32)}


  1%|          | 5569/1000000 [15:43<28:29:58,  9.69it/s]

{'loss': Array(0.34401202, dtype=float32), 'loss_cross_entropy': Array(0.3014132, dtype=float32)}


  1%|          | 5579/1000000 [15:45<26:26:25, 10.45it/s]

{'loss': Array(0.32321188, dtype=float32), 'loss_cross_entropy': Array(0.28173435, dtype=float32)}


  1%|          | 5589/1000000 [15:46<29:15:41,  9.44it/s]

{'loss': Array(0.30504972, dtype=float32), 'loss_cross_entropy': Array(0.26391417, dtype=float32)}


  1%|          | 5599/1000000 [15:48<26:55:03, 10.26it/s]

{'loss': Array(0.3639209, dtype=float32), 'loss_cross_entropy': Array(0.31772768, dtype=float32)}


  1%|          | 5609/1000000 [15:50<41:15:28,  6.69it/s]

{'loss': Array(0.3560739, dtype=float32), 'loss_cross_entropy': Array(0.311517, dtype=float32)}


  1%|          | 5619/1000000 [15:51<28:37:42,  9.65it/s]

{'loss': Array(0.3188464, dtype=float32), 'loss_cross_entropy': Array(0.27705655, dtype=float32)}


  1%|          | 5629/1000000 [15:52<25:47:38, 10.71it/s]

{'loss': Array(0.32485595, dtype=float32), 'loss_cross_entropy': Array(0.28535858, dtype=float32)}


  1%|          | 5639/1000000 [15:54<30:49:59,  8.96it/s]

{'loss': Array(0.33839315, dtype=float32), 'loss_cross_entropy': Array(0.2986674, dtype=float32)}


  1%|          | 5649/1000000 [15:55<26:35:28, 10.39it/s]

{'loss': Array(0.33131325, dtype=float32), 'loss_cross_entropy': Array(0.28772065, dtype=float32)}


  1%|          | 5659/1000000 [15:57<35:52:19,  7.70it/s]

{'loss': Array(0.3652555, dtype=float32), 'loss_cross_entropy': Array(0.32194877, dtype=float32)}


  1%|          | 5669/1000000 [15:58<27:38:52,  9.99it/s]

{'loss': Array(0.3564074, dtype=float32), 'loss_cross_entropy': Array(0.3117389, dtype=float32)}


  1%|          | 5679/1000000 [16:00<40:42:55,  6.78it/s]

{'loss': Array(0.32671994, dtype=float32), 'loss_cross_entropy': Array(0.28674266, dtype=float32)}


  1%|          | 5689/1000000 [16:02<28:23:45,  9.73it/s]

{'loss': Array(0.32966578, dtype=float32), 'loss_cross_entropy': Array(0.28812778, dtype=float32)}


  1%|          | 5699/1000000 [16:03<26:38:11, 10.37it/s]

{'loss': Array(0.3339735, dtype=float32), 'loss_cross_entropy': Array(0.29328784, dtype=float32)}


  1%|          | 5709/1000000 [16:05<33:05:41,  8.35it/s]

{'loss': Array(0.3599298, dtype=float32), 'loss_cross_entropy': Array(0.31526715, dtype=float32)}


  1%|          | 5719/1000000 [16:06<26:57:38, 10.24it/s]

{'loss': Array(0.33452803, dtype=float32), 'loss_cross_entropy': Array(0.29206142, dtype=float32)}


  1%|          | 5729/1000000 [16:08<36:49:47,  7.50it/s]

{'loss': Array(0.32501903, dtype=float32), 'loss_cross_entropy': Array(0.28444085, dtype=float32)}


  1%|          | 5739/1000000 [16:09<27:44:49,  9.95it/s]

{'loss': Array(0.32929453, dtype=float32), 'loss_cross_entropy': Array(0.28837845, dtype=float32)}


  1%|          | 5749/1000000 [16:11<26:27:56, 10.44it/s]

{'loss': Array(0.33603707, dtype=float32), 'loss_cross_entropy': Array(0.29394525, dtype=float32)}


  1%|          | 5759/1000000 [16:12<29:41:13,  9.30it/s]

{'loss': Array(0.31913263, dtype=float32), 'loss_cross_entropy': Array(0.27946708, dtype=float32)}


  1%|          | 5769/1000000 [16:14<26:33:29, 10.40it/s]

{'loss': Array(0.30865258, dtype=float32), 'loss_cross_entropy': Array(0.2721876, dtype=float32)}


  1%|          | 5779/1000000 [16:15<39:57:36,  6.91it/s]

{'loss': Array(0.33908793, dtype=float32), 'loss_cross_entropy': Array(0.29726765, dtype=float32)}


  1%|          | 5789/1000000 [16:17<28:00:40,  9.86it/s]

{'loss': Array(0.31556296, dtype=float32), 'loss_cross_entropy': Array(0.27467915, dtype=float32)}


  1%|          | 5799/1000000 [16:18<26:20:53, 10.48it/s]

{'loss': Array(0.33938077, dtype=float32), 'loss_cross_entropy': Array(0.29616237, dtype=float32)}


  1%|          | 5809/1000000 [16:20<32:51:31,  8.40it/s]

{'loss': Array(0.30561393, dtype=float32), 'loss_cross_entropy': Array(0.2668488, dtype=float32)}


  1%|          | 5819/1000000 [16:21<26:43:25, 10.33it/s]

{'loss': Array(0.31620184, dtype=float32), 'loss_cross_entropy': Array(0.27733153, dtype=float32)}


  1%|          | 5829/1000000 [16:23<36:12:00,  7.63it/s]

{'loss': Array(0.3156723, dtype=float32), 'loss_cross_entropy': Array(0.2754865, dtype=float32)}


  1%|          | 5839/1000000 [16:24<27:14:52, 10.13it/s]

{'loss': Array(0.32808393, dtype=float32), 'loss_cross_entropy': Array(0.28574392, dtype=float32)}


  1%|          | 5849/1000000 [16:26<36:17:17,  7.61it/s]

{'loss': Array(0.3162548, dtype=float32), 'loss_cross_entropy': Array(0.27632073, dtype=float32)}


  1%|          | 5859/1000000 [16:27<27:32:33, 10.03it/s]

{'loss': Array(0.32786378, dtype=float32), 'loss_cross_entropy': Array(0.28578958, dtype=float32)}


  1%|          | 5869/1000000 [16:29<26:45:39, 10.32it/s]

{'loss': Array(0.3327196, dtype=float32), 'loss_cross_entropy': Array(0.29127344, dtype=float32)}


  1%|          | 5879/1000000 [16:30<30:41:29,  9.00it/s]

{'loss': Array(0.33427808, dtype=float32), 'loss_cross_entropy': Array(0.29087272, dtype=float32)}


  1%|          | 5889/1000000 [16:32<26:04:43, 10.59it/s]

{'loss': Array(0.31473264, dtype=float32), 'loss_cross_entropy': Array(0.27362677, dtype=float32)}


  1%|          | 5899/1000000 [16:33<33:14:36,  8.31it/s]

{'loss': Array(0.3353339, dtype=float32), 'loss_cross_entropy': Array(0.29412946, dtype=float32)}


  1%|          | 5909/1000000 [16:35<26:41:26, 10.35it/s]

{'loss': Array(0.34484082, dtype=float32), 'loss_cross_entropy': Array(0.30537686, dtype=float32)}


  1%|          | 5919/1000000 [16:36<40:41:09,  6.79it/s]

{'loss': Array(0.34670314, dtype=float32), 'loss_cross_entropy': Array(0.30300042, dtype=float32)}


  1%|          | 5929/1000000 [16:38<29:12:45,  9.45it/s]

{'loss': Array(0.32082254, dtype=float32), 'loss_cross_entropy': Array(0.27896008, dtype=float32)}


  1%|          | 5939/1000000 [16:40<41:31:09,  6.65it/s]

{'loss': Array(0.341202, dtype=float32), 'loss_cross_entropy': Array(0.2991385, dtype=float32)}


  1%|          | 5949/1000000 [16:41<28:41:14,  9.63it/s]

{'loss': Array(0.3212993, dtype=float32), 'loss_cross_entropy': Array(0.28093255, dtype=float32)}


  1%|          | 5959/1000000 [16:42<26:12:09, 10.54it/s]

{'loss': Array(0.3230575, dtype=float32), 'loss_cross_entropy': Array(0.28072822, dtype=float32)}


  1%|          | 5969/1000000 [16:44<36:10:00,  7.63it/s]

{'loss': Array(0.36036745, dtype=float32), 'loss_cross_entropy': Array(0.31503898, dtype=float32)}


  1%|          | 5979/1000000 [16:45<27:21:12, 10.09it/s]

{'loss': Array(0.32422253, dtype=float32), 'loss_cross_entropy': Array(0.2819927, dtype=float32)}


  1%|          | 5989/1000000 [16:47<40:25:20,  6.83it/s]

{'loss': Array(0.32690683, dtype=float32), 'loss_cross_entropy': Array(0.28936297, dtype=float32)}


  1%|          | 5999/1000000 [16:49<28:38:15,  9.64it/s]

{'loss': Array(0.32992846, dtype=float32), 'loss_cross_entropy': Array(0.28888443, dtype=float32)}


  1%|          | 6009/1000000 [16:57<92:01:02,  3.00it/s] 

{'loss': Array(0.3488129, dtype=float32), 'loss_cross_entropy': Array(0.3036781, dtype=float32)}


  1%|          | 6019/1000000 [16:58<40:31:35,  6.81it/s] 

{'loss': Array(0.35344782, dtype=float32), 'loss_cross_entropy': Array(0.30902454, dtype=float32)}


  1%|          | 6029/1000000 [17:00<28:05:28,  9.83it/s]

{'loss': Array(0.33780706, dtype=float32), 'loss_cross_entropy': Array(0.2947491, dtype=float32)}


  1%|          | 6039/1000000 [17:01<29:18:05,  9.42it/s]

{'loss': Array(0.32212004, dtype=float32), 'loss_cross_entropy': Array(0.28051227, dtype=float32)}


  1%|          | 6049/1000000 [17:03<27:42:20,  9.97it/s]

{'loss': Array(0.32825842, dtype=float32), 'loss_cross_entropy': Array(0.28743592, dtype=float32)}


  1%|          | 6059/1000000 [17:04<41:06:19,  6.72it/s]

{'loss': Array(0.33676782, dtype=float32), 'loss_cross_entropy': Array(0.29643774, dtype=float32)}


  1%|          | 6069/1000000 [17:06<29:12:28,  9.45it/s]

{'loss': Array(0.35266864, dtype=float32), 'loss_cross_entropy': Array(0.31072283, dtype=float32)}


  1%|          | 6079/1000000 [17:07<26:24:29, 10.45it/s]

{'loss': Array(0.34484157, dtype=float32), 'loss_cross_entropy': Array(0.30128822, dtype=float32)}


  1%|          | 6089/1000000 [17:09<33:50:36,  8.16it/s]

{'loss': Array(0.32171273, dtype=float32), 'loss_cross_entropy': Array(0.2821046, dtype=float32)}


  1%|          | 6099/1000000 [17:10<27:35:12, 10.01it/s]

{'loss': Array(0.31312382, dtype=float32), 'loss_cross_entropy': Array(0.27477536, dtype=float32)}


  1%|          | 6109/1000000 [17:12<40:42:46,  6.78it/s]

{'loss': Array(0.3487236, dtype=float32), 'loss_cross_entropy': Array(0.30518445, dtype=float32)}


  1%|          | 6119/1000000 [17:14<28:21:25,  9.74it/s]

{'loss': Array(0.29549327, dtype=float32), 'loss_cross_entropy': Array(0.25688902, dtype=float32)}


  1%|          | 6129/1000000 [17:15<25:41:59, 10.74it/s]

{'loss': Array(0.3421088, dtype=float32), 'loss_cross_entropy': Array(0.29899302, dtype=float32)}


  1%|          | 6139/1000000 [17:17<31:03:27,  8.89it/s]

{'loss': Array(0.36963892, dtype=float32), 'loss_cross_entropy': Array(0.3250276, dtype=float32)}


  1%|          | 6149/1000000 [17:18<27:07:48, 10.18it/s]

{'loss': Array(0.303299, dtype=float32), 'loss_cross_entropy': Array(0.26521063, dtype=float32)}


  1%|          | 6159/1000000 [17:20<35:48:45,  7.71it/s]

{'loss': Array(0.34381494, dtype=float32), 'loss_cross_entropy': Array(0.30186078, dtype=float32)}


  1%|          | 6169/1000000 [17:21<27:09:52, 10.16it/s]

{'loss': Array(0.3279903, dtype=float32), 'loss_cross_entropy': Array(0.28830507, dtype=float32)}


  1%|          | 6179/1000000 [17:22<26:30:31, 10.41it/s]

{'loss': Array(0.31418568, dtype=float32), 'loss_cross_entropy': Array(0.27563846, dtype=float32)}


  1%|          | 6189/1000000 [17:24<33:22:00,  8.27it/s]

{'loss': Array(0.29671618, dtype=float32), 'loss_cross_entropy': Array(0.2576239, dtype=float32)}


  1%|          | 6199/1000000 [17:26<27:13:08, 10.14it/s]

{'loss': Array(0.31797868, dtype=float32), 'loss_cross_entropy': Array(0.27846676, dtype=float32)}


  1%|          | 6209/1000000 [17:27<25:40:16, 10.75it/s]

{'loss': Array(0.3546234, dtype=float32), 'loss_cross_entropy': Array(0.31351095, dtype=float32)}


  1%|          | 6219/1000000 [17:29<30:01:09,  9.20it/s]

{'loss': Array(0.3534181, dtype=float32), 'loss_cross_entropy': Array(0.30949393, dtype=float32)}


  1%|          | 6229/1000000 [17:30<25:54:32, 10.65it/s]

{'loss': Array(0.31800464, dtype=float32), 'loss_cross_entropy': Array(0.277418, dtype=float32)}


  1%|          | 6239/1000000 [17:32<32:29:54,  8.49it/s]

{'loss': Array(0.31120008, dtype=float32), 'loss_cross_entropy': Array(0.27211675, dtype=float32)}


  1%|          | 6249/1000000 [17:33<27:14:54, 10.13it/s]

{'loss': Array(0.34154937, dtype=float32), 'loss_cross_entropy': Array(0.29957148, dtype=float32)}


  1%|          | 6259/1000000 [17:35<40:41:57,  6.78it/s]

{'loss': Array(0.33616754, dtype=float32), 'loss_cross_entropy': Array(0.2954031, dtype=float32)}


  1%|          | 6269/1000000 [17:36<27:40:53,  9.97it/s]

{'loss': Array(0.32186437, dtype=float32), 'loss_cross_entropy': Array(0.27876386, dtype=float32)}


  1%|          | 6279/1000000 [17:38<42:31:19,  6.49it/s]

{'loss': Array(0.32674515, dtype=float32), 'loss_cross_entropy': Array(0.2857735, dtype=float32)}


  1%|          | 6289/1000000 [17:39<28:27:53,  9.70it/s]

{'loss': Array(0.33697483, dtype=float32), 'loss_cross_entropy': Array(0.29592592, dtype=float32)}


  1%|          | 6299/1000000 [17:41<25:49:43, 10.69it/s]

{'loss': Array(0.34279895, dtype=float32), 'loss_cross_entropy': Array(0.30246183, dtype=float32)}


  1%|          | 6309/1000000 [17:42<35:59:50,  7.67it/s]

{'loss': Array(0.2975275, dtype=float32), 'loss_cross_entropy': Array(0.2598047, dtype=float32)}


  1%|          | 6319/1000000 [17:44<27:53:19,  9.90it/s]

{'loss': Array(0.3397093, dtype=float32), 'loss_cross_entropy': Array(0.2983232, dtype=float32)}


  1%|          | 6329/1000000 [17:45<40:57:48,  6.74it/s]

{'loss': Array(0.34185413, dtype=float32), 'loss_cross_entropy': Array(0.29944426, dtype=float32)}


  1%|          | 6339/1000000 [17:47<28:15:54,  9.77it/s]

{'loss': Array(0.33713272, dtype=float32), 'loss_cross_entropy': Array(0.2958672, dtype=float32)}


  1%|          | 6349/1000000 [17:48<26:14:44, 10.52it/s]

{'loss': Array(0.32268116, dtype=float32), 'loss_cross_entropy': Array(0.2828864, dtype=float32)}


  1%|          | 6359/1000000 [17:50<29:01:10,  9.51it/s]

{'loss': Array(0.3327846, dtype=float32), 'loss_cross_entropy': Array(0.2903189, dtype=float32)}


  1%|          | 6369/1000000 [17:51<26:47:15, 10.30it/s]

{'loss': Array(0.32847565, dtype=float32), 'loss_cross_entropy': Array(0.2873256, dtype=float32)}


  1%|          | 6379/1000000 [17:53<29:49:43,  9.25it/s]

{'loss': Array(0.32711634, dtype=float32), 'loss_cross_entropy': Array(0.28696766, dtype=float32)}


  1%|          | 6389/1000000 [17:54<26:14:48, 10.52it/s]

{'loss': Array(0.3352104, dtype=float32), 'loss_cross_entropy': Array(0.29488719, dtype=float32)}


  1%|          | 6399/1000000 [17:56<40:20:01,  6.84it/s]

{'loss': Array(0.34576514, dtype=float32), 'loss_cross_entropy': Array(0.30131838, dtype=float32)}


  1%|          | 6409/1000000 [17:58<29:41:47,  9.29it/s]

{'loss': Array(0.3337985, dtype=float32), 'loss_cross_entropy': Array(0.29266128, dtype=float32)}


  1%|          | 6419/1000000 [17:59<26:50:07, 10.28it/s]

{'loss': Array(0.31493732, dtype=float32), 'loss_cross_entropy': Array(0.27493185, dtype=float32)}


  1%|          | 6429/1000000 [18:01<33:09:59,  8.32it/s]

{'loss': Array(0.32764456, dtype=float32), 'loss_cross_entropy': Array(0.28859922, dtype=float32)}


  1%|          | 6439/1000000 [18:02<27:09:38, 10.16it/s]

{'loss': Array(0.34653154, dtype=float32), 'loss_cross_entropy': Array(0.30563045, dtype=float32)}


  1%|          | 6449/1000000 [18:04<40:56:09,  6.74it/s]

{'loss': Array(0.32637605, dtype=float32), 'loss_cross_entropy': Array(0.28428903, dtype=float32)}


  1%|          | 6459/1000000 [18:05<28:22:16,  9.73it/s]

{'loss': Array(0.3274605, dtype=float32), 'loss_cross_entropy': Array(0.28793463, dtype=float32)}


  1%|          | 6469/1000000 [18:07<25:53:10, 10.66it/s]

{'loss': Array(0.32314095, dtype=float32), 'loss_cross_entropy': Array(0.28234178, dtype=float32)}


  1%|          | 6479/1000000 [18:08<30:42:44,  8.99it/s]

{'loss': Array(0.33114877, dtype=float32), 'loss_cross_entropy': Array(0.29293278, dtype=float32)}


  1%|          | 6489/1000000 [18:10<26:08:40, 10.56it/s]

{'loss': Array(0.34482217, dtype=float32), 'loss_cross_entropy': Array(0.3014278, dtype=float32)}


  1%|          | 6499/1000000 [18:11<36:01:39,  7.66it/s]

{'loss': Array(0.3264986, dtype=float32), 'loss_cross_entropy': Array(0.28626698, dtype=float32)}


  1%|          | 6509/1000000 [18:19<91:24:33,  3.02it/s] 

{'loss': Array(0.32219872, dtype=float32), 'loss_cross_entropy': Array(0.28332958, dtype=float32)}


  1%|          | 6519/1000000 [18:21<47:11:46,  5.85it/s] 

{'loss': Array(0.33135542, dtype=float32), 'loss_cross_entropy': Array(0.29266423, dtype=float32)}


  1%|          | 6529/1000000 [18:22<28:44:17,  9.60it/s]

{'loss': Array(0.31464168, dtype=float32), 'loss_cross_entropy': Array(0.27547085, dtype=float32)}


  1%|          | 6539/1000000 [18:24<26:19:22, 10.48it/s]

{'loss': Array(0.32909858, dtype=float32), 'loss_cross_entropy': Array(0.28970614, dtype=float32)}


  1%|          | 6549/1000000 [18:25<33:02:21,  8.35it/s]

{'loss': Array(0.34487495, dtype=float32), 'loss_cross_entropy': Array(0.30154502, dtype=float32)}


  1%|          | 6559/1000000 [18:27<27:42:15,  9.96it/s]

{'loss': Array(0.33126605, dtype=float32), 'loss_cross_entropy': Array(0.29067838, dtype=float32)}


  1%|          | 6569/1000000 [18:28<26:59:40, 10.22it/s]

{'loss': Array(0.3158507, dtype=float32), 'loss_cross_entropy': Array(0.2750937, dtype=float32)}


  1%|          | 6579/1000000 [18:30<29:12:12,  9.45it/s]

{'loss': Array(0.3096986, dtype=float32), 'loss_cross_entropy': Array(0.27232364, dtype=float32)}


  1%|          | 6589/1000000 [18:31<26:37:41, 10.36it/s]

{'loss': Array(0.32196614, dtype=float32), 'loss_cross_entropy': Array(0.2808603, dtype=float32)}


  1%|          | 6599/1000000 [18:33<34:04:38,  8.10it/s]

{'loss': Array(0.3192632, dtype=float32), 'loss_cross_entropy': Array(0.27933848, dtype=float32)}


  1%|          | 6609/1000000 [18:34<26:27:44, 10.43it/s]

{'loss': Array(0.32040864, dtype=float32), 'loss_cross_entropy': Array(0.28064808, dtype=float32)}


  1%|          | 6619/1000000 [18:36<36:02:52,  7.65it/s]

{'loss': Array(0.3073813, dtype=float32), 'loss_cross_entropy': Array(0.26973662, dtype=float32)}


  1%|          | 6629/1000000 [18:37<27:17:03, 10.11it/s]

{'loss': Array(0.32947668, dtype=float32), 'loss_cross_entropy': Array(0.28741014, dtype=float32)}


  1%|          | 6639/1000000 [18:39<26:18:40, 10.49it/s]

{'loss': Array(0.30907595, dtype=float32), 'loss_cross_entropy': Array(0.2709804, dtype=float32)}


  1%|          | 6649/1000000 [18:41<33:27:43,  8.25it/s]

{'loss': Array(0.3415837, dtype=float32), 'loss_cross_entropy': Array(0.29932317, dtype=float32)}


  1%|          | 6659/1000000 [18:42<27:10:26, 10.15it/s]

{'loss': Array(0.3499784, dtype=float32), 'loss_cross_entropy': Array(0.3065989, dtype=float32)}


  1%|          | 6669/1000000 [18:43<26:12:23, 10.53it/s]

{'loss': Array(0.33565816, dtype=float32), 'loss_cross_entropy': Array(0.29700908, dtype=float32)}


  1%|          | 6679/1000000 [18:45<29:16:21,  9.43it/s]

{'loss': Array(0.34357744, dtype=float32), 'loss_cross_entropy': Array(0.30246255, dtype=float32)}


  1%|          | 6689/1000000 [18:46<26:00:00, 10.61it/s]

{'loss': Array(0.3315671, dtype=float32), 'loss_cross_entropy': Array(0.2907525, dtype=float32)}


  1%|          | 6699/1000000 [18:48<33:27:47,  8.25it/s]

{'loss': Array(0.31119838, dtype=float32), 'loss_cross_entropy': Array(0.27165666, dtype=float32)}


  1%|          | 6709/1000000 [18:49<26:53:28, 10.26it/s]

{'loss': Array(0.3046074, dtype=float32), 'loss_cross_entropy': Array(0.26575145, dtype=float32)}


  1%|          | 6719/1000000 [18:51<40:00:54,  6.90it/s]

{'loss': Array(0.32874584, dtype=float32), 'loss_cross_entropy': Array(0.28870308, dtype=float32)}


  1%|          | 6729/1000000 [18:53<28:46:53,  9.59it/s]

{'loss': Array(0.34122214, dtype=float32), 'loss_cross_entropy': Array(0.2980629, dtype=float32)}


  1%|          | 6739/1000000 [18:54<26:55:43, 10.25it/s]

{'loss': Array(0.31039783, dtype=float32), 'loss_cross_entropy': Array(0.2710668, dtype=float32)}


  1%|          | 6749/1000000 [18:56<36:57:04,  7.47it/s]

{'loss': Array(0.33836532, dtype=float32), 'loss_cross_entropy': Array(0.29752502, dtype=float32)}


  1%|          | 6759/1000000 [18:57<27:57:04,  9.87it/s]

{'loss': Array(0.30571064, dtype=float32), 'loss_cross_entropy': Array(0.26885173, dtype=float32)}


  1%|          | 6769/1000000 [18:59<26:51:41, 10.27it/s]

{'loss': Array(0.33744985, dtype=float32), 'loss_cross_entropy': Array(0.2992646, dtype=float32)}


  1%|          | 6779/1000000 [19:00<31:05:13,  8.87it/s]

{'loss': Array(0.34529275, dtype=float32), 'loss_cross_entropy': Array(0.30404064, dtype=float32)}


  1%|          | 6789/1000000 [19:02<26:03:31, 10.59it/s]

{'loss': Array(0.3049726, dtype=float32), 'loss_cross_entropy': Array(0.26606533, dtype=float32)}


  1%|          | 6799/1000000 [19:03<33:25:33,  8.25it/s]

{'loss': Array(0.334719, dtype=float32), 'loss_cross_entropy': Array(0.29304555, dtype=float32)}


  1%|          | 6809/1000000 [19:05<27:04:17, 10.19it/s]

{'loss': Array(0.3392242, dtype=float32), 'loss_cross_entropy': Array(0.30002528, dtype=float32)}


  1%|          | 6819/1000000 [19:06<25:38:22, 10.76it/s]

{'loss': Array(0.32723233, dtype=float32), 'loss_cross_entropy': Array(0.28761643, dtype=float32)}


  1%|          | 6829/1000000 [19:08<30:21:02,  9.09it/s]

{'loss': Array(0.32937524, dtype=float32), 'loss_cross_entropy': Array(0.28757256, dtype=float32)}


  1%|          | 6839/1000000 [19:09<26:28:39, 10.42it/s]

{'loss': Array(0.32297164, dtype=float32), 'loss_cross_entropy': Array(0.28215298, dtype=float32)}


  1%|          | 6849/1000000 [19:11<35:39:59,  7.73it/s]

{'loss': Array(0.3410974, dtype=float32), 'loss_cross_entropy': Array(0.3004222, dtype=float32)}


  1%|          | 6859/1000000 [19:12<26:39:33, 10.35it/s]

{'loss': Array(0.31735367, dtype=float32), 'loss_cross_entropy': Array(0.27735022, dtype=float32)}


  1%|          | 6869/1000000 [19:14<26:32:01, 10.40it/s]

{'loss': Array(0.3314329, dtype=float32), 'loss_cross_entropy': Array(0.29279503, dtype=float32)}


  1%|          | 6879/1000000 [19:15<29:03:11,  9.50it/s]

{'loss': Array(0.34918508, dtype=float32), 'loss_cross_entropy': Array(0.30802867, dtype=float32)}


  1%|          | 6889/1000000 [19:17<26:14:17, 10.51it/s]

{'loss': Array(0.34899172, dtype=float32), 'loss_cross_entropy': Array(0.3063589, dtype=float32)}


  1%|          | 6899/1000000 [19:18<31:44:08,  8.69it/s]

{'loss': Array(0.33217666, dtype=float32), 'loss_cross_entropy': Array(0.29318663, dtype=float32)}


  1%|          | 6909/1000000 [19:20<26:26:45, 10.43it/s]

{'loss': Array(0.3430409, dtype=float32), 'loss_cross_entropy': Array(0.30133602, dtype=float32)}


  1%|          | 6919/1000000 [19:21<40:24:20,  6.83it/s]

{'loss': Array(0.33403364, dtype=float32), 'loss_cross_entropy': Array(0.29098284, dtype=float32)}


  1%|          | 6929/1000000 [19:23<28:58:29,  9.52it/s]

{'loss': Array(0.34389287, dtype=float32), 'loss_cross_entropy': Array(0.30264822, dtype=float32)}


  1%|          | 6939/1000000 [19:24<25:38:59, 10.75it/s]

{'loss': Array(0.3258557, dtype=float32), 'loss_cross_entropy': Array(0.2862706, dtype=float32)}


  1%|          | 6949/1000000 [19:26<32:39:21,  8.45it/s]

{'loss': Array(0.32180226, dtype=float32), 'loss_cross_entropy': Array(0.28309682, dtype=float32)}


  1%|          | 6959/1000000 [19:27<26:24:30, 10.45it/s]

{'loss': Array(0.3496513, dtype=float32), 'loss_cross_entropy': Array(0.30785352, dtype=float32)}


  1%|          | 6969/1000000 [19:29<37:47:29,  7.30it/s]

{'loss': Array(0.33063656, dtype=float32), 'loss_cross_entropy': Array(0.2885761, dtype=float32)}


  1%|          | 6979/1000000 [19:30<27:56:10,  9.87it/s]

{'loss': Array(0.32562277, dtype=float32), 'loss_cross_entropy': Array(0.2849292, dtype=float32)}


  1%|          | 6989/1000000 [19:32<35:50:43,  7.70it/s]

{'loss': Array(0.32038915, dtype=float32), 'loss_cross_entropy': Array(0.27932104, dtype=float32)}


  1%|          | 6999/1000000 [19:34<28:26:00,  9.70it/s]

{'loss': Array(0.3379133, dtype=float32), 'loss_cross_entropy': Array(0.29500952, dtype=float32)}


  1%|          | 7009/1000000 [19:41<90:06:02,  3.06it/s] 

{'loss': Array(0.30877018, dtype=float32), 'loss_cross_entropy': Array(0.27099666, dtype=float32)}


  1%|          | 7019/1000000 [19:43<42:01:09,  6.56it/s] 

{'loss': Array(0.3164495, dtype=float32), 'loss_cross_entropy': Array(0.27710474, dtype=float32)}


  1%|          | 7029/1000000 [19:44<28:33:27,  9.66it/s]

{'loss': Array(0.33817345, dtype=float32), 'loss_cross_entropy': Array(0.29558977, dtype=float32)}


  1%|          | 7039/1000000 [19:46<36:26:15,  7.57it/s]

{'loss': Array(0.3388271, dtype=float32), 'loss_cross_entropy': Array(0.29689977, dtype=float32)}


  1%|          | 7049/1000000 [19:48<29:31:19,  9.34it/s]

{'loss': Array(0.32378957, dtype=float32), 'loss_cross_entropy': Array(0.28718105, dtype=float32)}


  1%|          | 7059/1000000 [19:49<40:48:02,  6.76it/s]

{'loss': Array(0.3312924, dtype=float32), 'loss_cross_entropy': Array(0.28863654, dtype=float32)}


  1%|          | 7069/1000000 [19:51<28:11:17,  9.78it/s]

{'loss': Array(0.32028833, dtype=float32), 'loss_cross_entropy': Array(0.28166848, dtype=float32)}


  1%|          | 7079/1000000 [19:52<25:54:17, 10.65it/s]

{'loss': Array(0.31742522, dtype=float32), 'loss_cross_entropy': Array(0.2790689, dtype=float32)}


  1%|          | 7089/1000000 [19:54<33:14:48,  8.30it/s]

{'loss': Array(0.30479822, dtype=float32), 'loss_cross_entropy': Array(0.2667819, dtype=float32)}


  1%|          | 7099/1000000 [19:55<26:47:40, 10.29it/s]

{'loss': Array(0.3191808, dtype=float32), 'loss_cross_entropy': Array(0.2787056, dtype=float32)}


  1%|          | 7109/1000000 [19:57<35:27:16,  7.78it/s]

{'loss': Array(0.34410033, dtype=float32), 'loss_cross_entropy': Array(0.3031355, dtype=float32)}


  1%|          | 7119/1000000 [19:58<27:41:29,  9.96it/s]

{'loss': Array(0.33663598, dtype=float32), 'loss_cross_entropy': Array(0.2952015, dtype=float32)}


  1%|          | 7129/1000000 [20:00<25:45:07, 10.71it/s]

{'loss': Array(0.33926055, dtype=float32), 'loss_cross_entropy': Array(0.30035606, dtype=float32)}


  1%|          | 7139/1000000 [20:01<28:52:04,  9.55it/s]

{'loss': Array(0.3180334, dtype=float32), 'loss_cross_entropy': Array(0.28063673, dtype=float32)}


  1%|          | 7149/1000000 [20:03<28:32:06,  9.67it/s]

{'loss': Array(0.3339471, dtype=float32), 'loss_cross_entropy': Array(0.29182753, dtype=float32)}


  1%|          | 7159/1000000 [20:04<40:30:25,  6.81it/s]

{'loss': Array(0.33215395, dtype=float32), 'loss_cross_entropy': Array(0.2899073, dtype=float32)}


  1%|          | 7169/1000000 [20:06<28:29:30,  9.68it/s]

{'loss': Array(0.322908, dtype=float32), 'loss_cross_entropy': Array(0.28250828, dtype=float32)}


  1%|          | 7179/1000000 [20:07<26:17:32, 10.49it/s]

{'loss': Array(0.33694372, dtype=float32), 'loss_cross_entropy': Array(0.2959595, dtype=float32)}


  1%|          | 7189/1000000 [20:09<33:03:45,  8.34it/s]

{'loss': Array(0.33503315, dtype=float32), 'loss_cross_entropy': Array(0.29655728, dtype=float32)}


  1%|          | 7199/1000000 [20:10<26:32:54, 10.39it/s]

{'loss': Array(0.33289635, dtype=float32), 'loss_cross_entropy': Array(0.29220363, dtype=float32)}


  1%|          | 7209/1000000 [20:12<35:47:13,  7.71it/s]

{'loss': Array(0.2952778, dtype=float32), 'loss_cross_entropy': Array(0.25715992, dtype=float32)}


  1%|          | 7219/1000000 [20:13<27:20:41, 10.08it/s]

{'loss': Array(0.3427422, dtype=float32), 'loss_cross_entropy': Array(0.30043098, dtype=float32)}


  1%|          | 7229/1000000 [20:15<36:44:50,  7.50it/s]

{'loss': Array(0.3238266, dtype=float32), 'loss_cross_entropy': Array(0.28394616, dtype=float32)}


  1%|          | 7239/1000000 [20:16<27:06:24, 10.17it/s]

{'loss': Array(0.34423396, dtype=float32), 'loss_cross_entropy': Array(0.3027208, dtype=float32)}


  1%|          | 7249/1000000 [20:18<27:22:58, 10.07it/s]

{'loss': Array(0.2937773, dtype=float32), 'loss_cross_entropy': Array(0.2559927, dtype=float32)}


  1%|          | 7259/1000000 [20:19<30:57:50,  8.91it/s]

{'loss': Array(0.3359363, dtype=float32), 'loss_cross_entropy': Array(0.29652008, dtype=float32)}


  1%|          | 7269/1000000 [20:21<26:22:43, 10.45it/s]

{'loss': Array(0.32502624, dtype=float32), 'loss_cross_entropy': Array(0.28533307, dtype=float32)}


  1%|          | 7279/1000000 [20:23<34:22:59,  8.02it/s]

{'loss': Array(0.34901246, dtype=float32), 'loss_cross_entropy': Array(0.30762923, dtype=float32)}


  1%|          | 7289/1000000 [20:24<26:59:30, 10.22it/s]

{'loss': Array(0.30351135, dtype=float32), 'loss_cross_entropy': Array(0.26439098, dtype=float32)}


  1%|          | 7299/1000000 [20:26<40:32:18,  6.80it/s]

{'loss': Array(0.31191835, dtype=float32), 'loss_cross_entropy': Array(0.2748031, dtype=float32)}


  1%|          | 7309/1000000 [20:27<27:49:26,  9.91it/s]

{'loss': Array(0.2943329, dtype=float32), 'loss_cross_entropy': Array(0.25768194, dtype=float32)}


  1%|          | 7319/1000000 [20:29<41:28:23,  6.65it/s]

{'loss': Array(0.33757296, dtype=float32), 'loss_cross_entropy': Array(0.29685453, dtype=float32)}


  1%|          | 7329/1000000 [20:30<27:59:48,  9.85it/s]

{'loss': Array(0.31489167, dtype=float32), 'loss_cross_entropy': Array(0.27565002, dtype=float32)}


  1%|          | 7339/1000000 [20:31<26:08:53, 10.55it/s]

{'loss': Array(0.32803485, dtype=float32), 'loss_cross_entropy': Array(0.28848246, dtype=float32)}


  1%|          | 7349/1000000 [20:33<36:33:56,  7.54it/s]

{'loss': Array(0.32984135, dtype=float32), 'loss_cross_entropy': Array(0.28958383, dtype=float32)}


  1%|          | 7359/1000000 [20:35<27:23:33, 10.07it/s]

{'loss': Array(0.33610636, dtype=float32), 'loss_cross_entropy': Array(0.29778233, dtype=float32)}


  1%|          | 7369/1000000 [20:36<41:04:50,  6.71it/s]

{'loss': Array(0.3380678, dtype=float32), 'loss_cross_entropy': Array(0.29763886, dtype=float32)}


  1%|          | 7379/1000000 [20:38<30:18:10,  9.10it/s]

{'loss': Array(0.31262884, dtype=float32), 'loss_cross_entropy': Array(0.27464586, dtype=float32)}


  1%|          | 7389/1000000 [20:39<26:06:08, 10.56it/s]

{'loss': Array(0.33207777, dtype=float32), 'loss_cross_entropy': Array(0.29189488, dtype=float32)}


  1%|          | 7399/1000000 [20:41<28:45:14,  9.59it/s]

{'loss': Array(0.3091962, dtype=float32), 'loss_cross_entropy': Array(0.2716895, dtype=float32)}


  1%|          | 7409/1000000 [20:42<25:39:51, 10.74it/s]

{'loss': Array(0.31301346, dtype=float32), 'loss_cross_entropy': Array(0.27356765, dtype=float32)}


  1%|          | 7419/1000000 [20:44<29:25:20,  9.37it/s]

{'loss': Array(0.3118185, dtype=float32), 'loss_cross_entropy': Array(0.27175024, dtype=float32)}


  1%|          | 7429/1000000 [20:45<25:53:46, 10.65it/s]

{'loss': Array(0.32061788, dtype=float32), 'loss_cross_entropy': Array(0.28122452, dtype=float32)}


  1%|          | 7439/1000000 [20:47<40:15:33,  6.85it/s]

{'loss': Array(0.31212503, dtype=float32), 'loss_cross_entropy': Array(0.2723404, dtype=float32)}


  1%|          | 7449/1000000 [20:48<28:24:45,  9.70it/s]

{'loss': Array(0.3127574, dtype=float32), 'loss_cross_entropy': Array(0.27275553, dtype=float32)}


  1%|          | 7459/1000000 [20:50<26:20:00, 10.47it/s]

{'loss': Array(0.29868335, dtype=float32), 'loss_cross_entropy': Array(0.2596452, dtype=float32)}


  1%|          | 7469/1000000 [20:51<32:20:23,  8.53it/s]

{'loss': Array(0.33653408, dtype=float32), 'loss_cross_entropy': Array(0.29665458, dtype=float32)}


  1%|          | 7479/1000000 [20:53<29:02:44,  9.49it/s]

{'loss': Array(0.34130725, dtype=float32), 'loss_cross_entropy': Array(0.30185887, dtype=float32)}


  1%|          | 7489/1000000 [20:54<40:41:30,  6.78it/s]

{'loss': Array(0.30193967, dtype=float32), 'loss_cross_entropy': Array(0.26375213, dtype=float32)}


  1%|          | 7499/1000000 [20:56<28:03:13,  9.83it/s]

{'loss': Array(0.3344557, dtype=float32), 'loss_cross_entropy': Array(0.29618356, dtype=float32)}


  1%|          | 7509/1000000 [21:04<90:25:33,  3.05it/s] 

{'loss': Array(0.31505078, dtype=float32), 'loss_cross_entropy': Array(0.27712566, dtype=float32)}


  1%|          | 7519/1000000 [21:05<41:18:07,  6.67it/s]

{'loss': Array(0.2953393, dtype=float32), 'loss_cross_entropy': Array(0.257675, dtype=float32)}


  1%|          | 7529/1000000 [21:07<28:34:17,  9.65it/s]

{'loss': Array(0.3343656, dtype=float32), 'loss_cross_entropy': Array(0.29345435, dtype=float32)}


  1%|          | 7539/1000000 [21:08<34:07:51,  8.08it/s]

{'loss': Array(0.30845144, dtype=float32), 'loss_cross_entropy': Array(0.27191752, dtype=float32)}


  1%|          | 7549/1000000 [21:10<26:32:50, 10.38it/s]

{'loss': Array(0.32492024, dtype=float32), 'loss_cross_entropy': Array(0.2880465, dtype=float32)}


  1%|          | 7559/1000000 [21:11<40:21:27,  6.83it/s]

{'loss': Array(0.33516344, dtype=float32), 'loss_cross_entropy': Array(0.29688755, dtype=float32)}


  1%|          | 7569/1000000 [21:13<29:51:10,  9.23it/s]

{'loss': Array(0.32769185, dtype=float32), 'loss_cross_entropy': Array(0.28762105, dtype=float32)}


  1%|          | 7579/1000000 [21:14<25:56:18, 10.63it/s]

{'loss': Array(0.32017443, dtype=float32), 'loss_cross_entropy': Array(0.28230405, dtype=float32)}


  1%|          | 7589/1000000 [21:16<29:03:03,  9.49it/s]

{'loss': Array(0.31674966, dtype=float32), 'loss_cross_entropy': Array(0.27715403, dtype=float32)}


  1%|          | 7599/1000000 [21:17<26:28:38, 10.41it/s]

{'loss': Array(0.31965682, dtype=float32), 'loss_cross_entropy': Array(0.28151527, dtype=float32)}


  1%|          | 7609/1000000 [21:19<41:19:38,  6.67it/s]

{'loss': Array(0.32647952, dtype=float32), 'loss_cross_entropy': Array(0.28680375, dtype=float32)}


  1%|          | 7619/1000000 [21:20<28:05:09,  9.81it/s]

{'loss': Array(0.32995626, dtype=float32), 'loss_cross_entropy': Array(0.29163393, dtype=float32)}


  1%|          | 7629/1000000 [21:22<25:41:48, 10.73it/s]

{'loss': Array(0.32211515, dtype=float32), 'loss_cross_entropy': Array(0.28267202, dtype=float32)}


  1%|          | 7639/1000000 [21:24<31:34:53,  8.73it/s]

{'loss': Array(0.30881065, dtype=float32), 'loss_cross_entropy': Array(0.27091393, dtype=float32)}


  1%|          | 7649/1000000 [21:25<26:38:44, 10.35it/s]

{'loss': Array(0.30229625, dtype=float32), 'loss_cross_entropy': Array(0.26373672, dtype=float32)}


  1%|          | 7659/1000000 [21:27<36:10:25,  7.62it/s]

{'loss': Array(0.33317915, dtype=float32), 'loss_cross_entropy': Array(0.29434308, dtype=float32)}


  1%|          | 7669/1000000 [21:28<27:36:37,  9.98it/s]

{'loss': Array(0.33026624, dtype=float32), 'loss_cross_entropy': Array(0.29026127, dtype=float32)}


  1%|          | 7679/1000000 [21:30<40:24:12,  6.82it/s]

{'loss': Array(0.29794383, dtype=float32), 'loss_cross_entropy': Array(0.26202244, dtype=float32)}


  1%|          | 7689/1000000 [21:31<27:51:57,  9.89it/s]

{'loss': Array(0.3002989, dtype=float32), 'loss_cross_entropy': Array(0.2645896, dtype=float32)}


  1%|          | 7699/1000000 [21:32<26:10:05, 10.53it/s]

{'loss': Array(0.31587788, dtype=float32), 'loss_cross_entropy': Array(0.2788177, dtype=float32)}


  1%|          | 7709/1000000 [21:34<33:33:02,  8.22it/s]

{'loss': Array(0.32805446, dtype=float32), 'loss_cross_entropy': Array(0.2874343, dtype=float32)}


  1%|          | 7719/1000000 [21:35<26:42:53, 10.32it/s]

{'loss': Array(0.32658014, dtype=float32), 'loss_cross_entropy': Array(0.28882504, dtype=float32)}


  1%|          | 7729/1000000 [21:37<35:46:34,  7.70it/s]

{'loss': Array(0.32615423, dtype=float32), 'loss_cross_entropy': Array(0.2876806, dtype=float32)}


  1%|          | 7739/1000000 [21:39<28:07:05,  9.80it/s]

{'loss': Array(0.3185706, dtype=float32), 'loss_cross_entropy': Array(0.2801021, dtype=float32)}


  1%|          | 7749/1000000 [21:40<26:29:02, 10.41it/s]

{'loss': Array(0.3066609, dtype=float32), 'loss_cross_entropy': Array(0.26795354, dtype=float32)}


  1%|          | 7759/1000000 [21:42<29:17:34,  9.41it/s]

{'loss': Array(0.35419092, dtype=float32), 'loss_cross_entropy': Array(0.3119186, dtype=float32)}


  1%|          | 7769/1000000 [21:43<26:56:43, 10.23it/s]

{'loss': Array(0.28699556, dtype=float32), 'loss_cross_entropy': Array(0.25110295, dtype=float32)}


  1%|          | 7779/1000000 [21:45<40:53:03,  6.74it/s]

{'loss': Array(0.3466519, dtype=float32), 'loss_cross_entropy': Array(0.30803463, dtype=float32)}


  1%|          | 7789/1000000 [21:46<28:32:24,  9.66it/s]

{'loss': Array(0.28496194, dtype=float32), 'loss_cross_entropy': Array(0.24785852, dtype=float32)}


  1%|          | 7799/1000000 [21:48<27:56:14,  9.87it/s]

{'loss': Array(0.30685434, dtype=float32), 'loss_cross_entropy': Array(0.26860392, dtype=float32)}


  1%|          | 7809/1000000 [21:49<32:56:14,  8.37it/s]

{'loss': Array(0.31721246, dtype=float32), 'loss_cross_entropy': Array(0.28079996, dtype=float32)}


  1%|          | 7819/1000000 [21:51<26:33:20, 10.38it/s]

{'loss': Array(0.31432182, dtype=float32), 'loss_cross_entropy': Array(0.27436504, dtype=float32)}


  1%|          | 7829/1000000 [21:52<36:10:02,  7.62it/s]

{'loss': Array(0.31916896, dtype=float32), 'loss_cross_entropy': Array(0.2814462, dtype=float32)}


  1%|          | 7839/1000000 [21:54<27:28:44, 10.03it/s]

{'loss': Array(0.32900205, dtype=float32), 'loss_cross_entropy': Array(0.29042587, dtype=float32)}


  1%|          | 7849/1000000 [21:56<36:07:51,  7.63it/s]

{'loss': Array(0.31059304, dtype=float32), 'loss_cross_entropy': Array(0.27318022, dtype=float32)}


  1%|          | 7859/1000000 [21:57<26:45:16, 10.30it/s]

{'loss': Array(0.33426914, dtype=float32), 'loss_cross_entropy': Array(0.29463872, dtype=float32)}


  1%|          | 7869/1000000 [21:58<26:33:34, 10.38it/s]

{'loss': Array(0.35784358, dtype=float32), 'loss_cross_entropy': Array(0.3166812, dtype=float32)}


  1%|          | 7879/1000000 [22:00<30:28:12,  9.04it/s]

{'loss': Array(0.31467387, dtype=float32), 'loss_cross_entropy': Array(0.27626693, dtype=float32)}


  1%|          | 7889/1000000 [22:01<26:22:26, 10.45it/s]

{'loss': Array(0.31958175, dtype=float32), 'loss_cross_entropy': Array(0.28159606, dtype=float32)}


  1%|          | 7899/1000000 [22:03<32:47:54,  8.40it/s]

{'loss': Array(0.3242747, dtype=float32), 'loss_cross_entropy': Array(0.28731257, dtype=float32)}


  1%|          | 7909/1000000 [22:04<27:02:59, 10.19it/s]

{'loss': Array(0.33158207, dtype=float32), 'loss_cross_entropy': Array(0.29242548, dtype=float32)}


  1%|          | 7919/1000000 [22:06<40:36:11,  6.79it/s]

{'loss': Array(0.31906077, dtype=float32), 'loss_cross_entropy': Array(0.28078052, dtype=float32)}


  1%|          | 7929/1000000 [22:07<28:42:02,  9.60it/s]

{'loss': Array(0.32293972, dtype=float32), 'loss_cross_entropy': Array(0.28424054, dtype=float32)}


  1%|          | 7939/1000000 [22:09<42:10:07,  6.53it/s]

{'loss': Array(0.3100065, dtype=float32), 'loss_cross_entropy': Array(0.27235696, dtype=float32)}


  1%|          | 7949/1000000 [22:11<28:30:30,  9.67it/s]

{'loss': Array(0.3257421, dtype=float32), 'loss_cross_entropy': Array(0.28702408, dtype=float32)}


  1%|          | 7959/1000000 [22:12<26:02:32, 10.58it/s]

{'loss': Array(0.33361995, dtype=float32), 'loss_cross_entropy': Array(0.29503402, dtype=float32)}


  1%|          | 7969/1000000 [22:14<36:33:53,  7.54it/s]

{'loss': Array(0.33975634, dtype=float32), 'loss_cross_entropy': Array(0.2982193, dtype=float32)}


  1%|          | 7979/1000000 [22:15<27:31:26, 10.01it/s]

{'loss': Array(0.31891873, dtype=float32), 'loss_cross_entropy': Array(0.27929822, dtype=float32)}


  1%|          | 7989/1000000 [22:17<41:20:09,  6.67it/s]

{'loss': Array(0.30275318, dtype=float32), 'loss_cross_entropy': Array(0.2666249, dtype=float32)}


  1%|          | 7999/1000000 [22:18<28:29:06,  9.67it/s]

{'loss': Array(0.32271072, dtype=float32), 'loss_cross_entropy': Array(0.2844264, dtype=float32)}


  1%|          | 8009/1000000 [22:26<91:34:32,  3.01it/s] 

{'loss': Array(0.28268132, dtype=float32), 'loss_cross_entropy': Array(0.24691562, dtype=float32)}


  1%|          | 8019/1000000 [22:28<40:50:09,  6.75it/s] 

{'loss': Array(0.32568488, dtype=float32), 'loss_cross_entropy': Array(0.28666756, dtype=float32)}


  1%|          | 8029/1000000 [22:29<28:19:47,  9.73it/s]

{'loss': Array(0.34447297, dtype=float32), 'loss_cross_entropy': Array(0.3066271, dtype=float32)}


  1%|          | 8039/1000000 [22:31<30:05:29,  9.16it/s]

{'loss': Array(0.3041333, dtype=float32), 'loss_cross_entropy': Array(0.26615015, dtype=float32)}


  1%|          | 8049/1000000 [22:33<28:31:53,  9.66it/s]

{'loss': Array(0.2983748, dtype=float32), 'loss_cross_entropy': Array(0.26272562, dtype=float32)}


  1%|          | 8059/1000000 [22:34<41:28:42,  6.64it/s]

{'loss': Array(0.33893666, dtype=float32), 'loss_cross_entropy': Array(0.2994685, dtype=float32)}


  1%|          | 8069/1000000 [22:36<27:59:52,  9.84it/s]

{'loss': Array(0.29623252, dtype=float32), 'loss_cross_entropy': Array(0.25839493, dtype=float32)}


  1%|          | 8079/1000000 [22:37<25:42:51, 10.72it/s]

{'loss': Array(0.2959364, dtype=float32), 'loss_cross_entropy': Array(0.26087695, dtype=float32)}


  1%|          | 8089/1000000 [22:39<33:34:42,  8.21it/s]

{'loss': Array(0.2992634, dtype=float32), 'loss_cross_entropy': Array(0.26448688, dtype=float32)}


  1%|          | 8099/1000000 [22:40<27:46:48,  9.92it/s]

{'loss': Array(0.33240652, dtype=float32), 'loss_cross_entropy': Array(0.29399356, dtype=float32)}


  1%|          | 8109/1000000 [22:42<41:22:28,  6.66it/s]

{'loss': Array(0.30259806, dtype=float32), 'loss_cross_entropy': Array(0.26787424, dtype=float32)}


  1%|          | 8119/1000000 [22:43<28:44:36,  9.59it/s]

{'loss': Array(0.29893088, dtype=float32), 'loss_cross_entropy': Array(0.26556894, dtype=float32)}


  1%|          | 8129/1000000 [22:45<25:52:23, 10.65it/s]

{'loss': Array(0.30224428, dtype=float32), 'loss_cross_entropy': Array(0.26504388, dtype=float32)}


  1%|          | 8139/1000000 [22:46<31:03:32,  8.87it/s]

{'loss': Array(0.30825788, dtype=float32), 'loss_cross_entropy': Array(0.27104148, dtype=float32)}


  1%|          | 8149/1000000 [22:48<27:16:46, 10.10it/s]

{'loss': Array(0.30390826, dtype=float32), 'loss_cross_entropy': Array(0.26727125, dtype=float32)}


  1%|          | 8159/1000000 [22:50<36:34:12,  7.53it/s]

{'loss': Array(0.3144498, dtype=float32), 'loss_cross_entropy': Array(0.2784265, dtype=float32)}


  1%|          | 8169/1000000 [22:51<27:37:07,  9.98it/s]

{'loss': Array(0.34874207, dtype=float32), 'loss_cross_entropy': Array(0.30890542, dtype=float32)}


  1%|          | 8179/1000000 [22:52<25:48:43, 10.67it/s]

{'loss': Array(0.30191115, dtype=float32), 'loss_cross_entropy': Array(0.26596686, dtype=float32)}


  1%|          | 8189/1000000 [22:54<32:50:45,  8.39it/s]

{'loss': Array(0.32539612, dtype=float32), 'loss_cross_entropy': Array(0.28630593, dtype=float32)}


  1%|          | 8199/1000000 [22:55<27:03:08, 10.18it/s]

{'loss': Array(0.29691702, dtype=float32), 'loss_cross_entropy': Array(0.26149213, dtype=float32)}


  1%|          | 8209/1000000 [22:57<26:09:16, 10.53it/s]

{'loss': Array(0.324383, dtype=float32), 'loss_cross_entropy': Array(0.2855478, dtype=float32)}


  1%|          | 8219/1000000 [22:59<29:56:26,  9.20it/s]

{'loss': Array(0.3062046, dtype=float32), 'loss_cross_entropy': Array(0.27106425, dtype=float32)}


  1%|          | 8229/1000000 [23:00<26:37:52, 10.34it/s]

{'loss': Array(0.30902705, dtype=float32), 'loss_cross_entropy': Array(0.27176318, dtype=float32)}


  1%|          | 8239/1000000 [23:02<33:29:10,  8.23it/s]

{'loss': Array(0.32446545, dtype=float32), 'loss_cross_entropy': Array(0.2889595, dtype=float32)}


  1%|          | 8249/1000000 [23:03<27:26:33, 10.04it/s]

{'loss': Array(0.31220385, dtype=float32), 'loss_cross_entropy': Array(0.27460757, dtype=float32)}


  1%|          | 8259/1000000 [23:05<40:01:44,  6.88it/s]

{'loss': Array(0.32939285, dtype=float32), 'loss_cross_entropy': Array(0.29278195, dtype=float32)}


  1%|          | 8269/1000000 [23:06<27:41:42,  9.95it/s]

{'loss': Array(0.31195304, dtype=float32), 'loss_cross_entropy': Array(0.2755253, dtype=float32)}


  1%|          | 8279/1000000 [23:08<43:09:11,  6.38it/s]

{'loss': Array(0.30938503, dtype=float32), 'loss_cross_entropy': Array(0.2723499, dtype=float32)}


  1%|          | 8289/1000000 [23:09<28:15:17,  9.75it/s]

{'loss': Array(0.30978954, dtype=float32), 'loss_cross_entropy': Array(0.27323085, dtype=float32)}


  1%|          | 8299/1000000 [23:11<25:54:45, 10.63it/s]

{'loss': Array(0.2960471, dtype=float32), 'loss_cross_entropy': Array(0.26072007, dtype=float32)}


  1%|          | 8309/1000000 [23:12<35:58:45,  7.66it/s]

{'loss': Array(0.3078739, dtype=float32), 'loss_cross_entropy': Array(0.2703341, dtype=float32)}


  1%|          | 8319/1000000 [23:14<27:33:52,  9.99it/s]

{'loss': Array(0.30734116, dtype=float32), 'loss_cross_entropy': Array(0.269438, dtype=float32)}


  1%|          | 8329/1000000 [23:15<40:46:22,  6.76it/s]

{'loss': Array(0.30992872, dtype=float32), 'loss_cross_entropy': Array(0.2740995, dtype=float32)}


  1%|          | 8339/1000000 [23:17<28:11:23,  9.77it/s]

{'loss': Array(0.31911412, dtype=float32), 'loss_cross_entropy': Array(0.28260252, dtype=float32)}


  1%|          | 8349/1000000 [23:18<26:22:22, 10.44it/s]

{'loss': Array(0.29296896, dtype=float32), 'loss_cross_entropy': Array(0.2582736, dtype=float32)}


  1%|          | 8359/1000000 [23:20<29:07:44,  9.46it/s]

{'loss': Array(0.32276708, dtype=float32), 'loss_cross_entropy': Array(0.28426328, dtype=float32)}


  1%|          | 8369/1000000 [23:21<25:45:50, 10.69it/s]

{'loss': Array(0.3187427, dtype=float32), 'loss_cross_entropy': Array(0.28192657, dtype=float32)}


  1%|          | 8379/1000000 [23:23<29:23:31,  9.37it/s]

{'loss': Array(0.30692625, dtype=float32), 'loss_cross_entropy': Array(0.26977867, dtype=float32)}


  1%|          | 8389/1000000 [23:24<26:09:26, 10.53it/s]

{'loss': Array(0.29115036, dtype=float32), 'loss_cross_entropy': Array(0.2553381, dtype=float32)}


  1%|          | 8399/1000000 [23:26<40:00:39,  6.88it/s]

{'loss': Array(0.30721685, dtype=float32), 'loss_cross_entropy': Array(0.27014488, dtype=float32)}


  1%|          | 8409/1000000 [23:28<30:31:42,  9.02it/s]

{'loss': Array(0.2981827, dtype=float32), 'loss_cross_entropy': Array(0.26262692, dtype=float32)}


  1%|          | 8419/1000000 [23:29<26:34:12, 10.37it/s]

{'loss': Array(0.31860754, dtype=float32), 'loss_cross_entropy': Array(0.28050965, dtype=float32)}


  1%|          | 8429/1000000 [23:31<32:58:14,  8.35it/s]

{'loss': Array(0.32915244, dtype=float32), 'loss_cross_entropy': Array(0.2922407, dtype=float32)}


  1%|          | 8439/1000000 [23:32<27:09:50, 10.14it/s]

{'loss': Array(0.3275644, dtype=float32), 'loss_cross_entropy': Array(0.29106343, dtype=float32)}


  1%|          | 8449/1000000 [23:34<41:13:28,  6.68it/s]

{'loss': Array(0.3205323, dtype=float32), 'loss_cross_entropy': Array(0.28297624, dtype=float32)}


  1%|          | 8459/1000000 [23:35<27:57:30,  9.85it/s]

{'loss': Array(0.30613497, dtype=float32), 'loss_cross_entropy': Array(0.2701357, dtype=float32)}


  1%|          | 8469/1000000 [23:37<26:27:34, 10.41it/s]

{'loss': Array(0.306569, dtype=float32), 'loss_cross_entropy': Array(0.2714071, dtype=float32)}


  1%|          | 8479/1000000 [23:38<30:53:26,  8.92it/s]

{'loss': Array(0.28328404, dtype=float32), 'loss_cross_entropy': Array(0.24900365, dtype=float32)}


  1%|          | 8489/1000000 [23:40<26:10:31, 10.52it/s]

{'loss': Array(0.33870688, dtype=float32), 'loss_cross_entropy': Array(0.30016112, dtype=float32)}


  1%|          | 8499/1000000 [23:41<35:57:38,  7.66it/s]

{'loss': Array(0.29497644, dtype=float32), 'loss_cross_entropy': Array(0.26018006, dtype=float32)}


  1%|          | 8509/1000000 [23:49<90:21:57,  3.05it/s] 

{'loss': Array(0.3082585, dtype=float32), 'loss_cross_entropy': Array(0.27313372, dtype=float32)}


  1%|          | 8519/1000000 [23:51<46:04:35,  5.98it/s]

{'loss': Array(0.30974954, dtype=float32), 'loss_cross_entropy': Array(0.27479997, dtype=float32)}


  1%|          | 8529/1000000 [23:52<28:27:56,  9.68it/s]

{'loss': Array(0.2970191, dtype=float32), 'loss_cross_entropy': Array(0.26179552, dtype=float32)}


  1%|          | 8539/1000000 [23:53<26:28:02, 10.41it/s]

{'loss': Array(0.31326744, dtype=float32), 'loss_cross_entropy': Array(0.27832785, dtype=float32)}


  1%|          | 8549/1000000 [23:55<32:47:46,  8.40it/s]

{'loss': Array(0.30865353, dtype=float32), 'loss_cross_entropy': Array(0.2726335, dtype=float32)}


  1%|          | 8559/1000000 [23:57<26:42:07, 10.31it/s]

{'loss': Array(0.29802212, dtype=float32), 'loss_cross_entropy': Array(0.26364222, dtype=float32)}


  1%|          | 8569/1000000 [23:58<26:06:19, 10.55it/s]

{'loss': Array(0.30645108, dtype=float32), 'loss_cross_entropy': Array(0.27104512, dtype=float32)}


  1%|          | 8579/1000000 [24:00<29:08:57,  9.45it/s]

{'loss': Array(0.32615358, dtype=float32), 'loss_cross_entropy': Array(0.28944936, dtype=float32)}


  1%|          | 8589/1000000 [24:01<26:04:48, 10.56it/s]

{'loss': Array(0.30272, dtype=float32), 'loss_cross_entropy': Array(0.26809117, dtype=float32)}


  1%|          | 8599/1000000 [24:03<34:31:05,  7.98it/s]

{'loss': Array(0.33455002, dtype=float32), 'loss_cross_entropy': Array(0.29621014, dtype=float32)}


  1%|          | 8609/1000000 [24:04<26:53:16, 10.24it/s]

{'loss': Array(0.31292495, dtype=float32), 'loss_cross_entropy': Array(0.2783673, dtype=float32)}


  1%|          | 8619/1000000 [24:06<36:21:22,  7.57it/s]

{'loss': Array(0.2994909, dtype=float32), 'loss_cross_entropy': Array(0.2629722, dtype=float32)}


  1%|          | 8629/1000000 [24:07<27:47:17,  9.91it/s]

{'loss': Array(0.28319916, dtype=float32), 'loss_cross_entropy': Array(0.2509918, dtype=float32)}


  1%|          | 8639/1000000 [24:09<26:07:13, 10.54it/s]

{'loss': Array(0.29052433, dtype=float32), 'loss_cross_entropy': Array(0.25738034, dtype=float32)}


  1%|          | 8649/1000000 [24:10<32:28:53,  8.48it/s]

{'loss': Array(0.30503985, dtype=float32), 'loss_cross_entropy': Array(0.27105272, dtype=float32)}


  1%|          | 8659/1000000 [24:12<26:33:03, 10.37it/s]

{'loss': Array(0.29463395, dtype=float32), 'loss_cross_entropy': Array(0.2613308, dtype=float32)}


  1%|          | 8669/1000000 [24:13<25:49:27, 10.66it/s]

{'loss': Array(0.3147785, dtype=float32), 'loss_cross_entropy': Array(0.2778097, dtype=float32)}


  1%|          | 8679/1000000 [24:15<28:38:11,  9.62it/s]

{'loss': Array(0.31729123, dtype=float32), 'loss_cross_entropy': Array(0.2820571, dtype=float32)}


  1%|          | 8689/1000000 [24:16<25:53:13, 10.64it/s]

{'loss': Array(0.29245752, dtype=float32), 'loss_cross_entropy': Array(0.26165742, dtype=float32)}


  1%|          | 8699/1000000 [24:18<33:59:47,  8.10it/s]

{'loss': Array(0.32656917, dtype=float32), 'loss_cross_entropy': Array(0.2887069, dtype=float32)}


  1%|          | 8709/1000000 [24:19<26:37:00, 10.35it/s]

{'loss': Array(0.31665054, dtype=float32), 'loss_cross_entropy': Array(0.28052214, dtype=float32)}


  1%|          | 8719/1000000 [24:21<40:25:50,  6.81it/s]

{'loss': Array(0.28718707, dtype=float32), 'loss_cross_entropy': Array(0.2536009, dtype=float32)}


  1%|          | 8729/1000000 [24:22<27:42:00,  9.94it/s]

{'loss': Array(0.31067297, dtype=float32), 'loss_cross_entropy': Array(0.27541456, dtype=float32)}


  1%|          | 8739/1000000 [24:24<25:52:12, 10.64it/s]

{'loss': Array(0.2837025, dtype=float32), 'loss_cross_entropy': Array(0.24898675, dtype=float32)}


  1%|          | 8749/1000000 [24:25<35:12:27,  7.82it/s]

{'loss': Array(0.28584236, dtype=float32), 'loss_cross_entropy': Array(0.25260168, dtype=float32)}


  1%|          | 8759/1000000 [24:27<26:58:24, 10.21it/s]

{'loss': Array(0.30939922, dtype=float32), 'loss_cross_entropy': Array(0.27526936, dtype=float32)}


  1%|          | 8769/1000000 [24:28<26:36:25, 10.35it/s]

{'loss': Array(0.27283055, dtype=float32), 'loss_cross_entropy': Array(0.24106768, dtype=float32)}


  1%|          | 8779/1000000 [24:30<30:48:39,  8.94it/s]

{'loss': Array(0.30230948, dtype=float32), 'loss_cross_entropy': Array(0.26928744, dtype=float32)}


  1%|          | 8789/1000000 [24:31<26:42:00, 10.31it/s]

{'loss': Array(0.29801804, dtype=float32), 'loss_cross_entropy': Array(0.26422533, dtype=float32)}


  1%|          | 8799/1000000 [24:33<34:34:58,  7.96it/s]

{'loss': Array(0.32459366, dtype=float32), 'loss_cross_entropy': Array(0.2894193, dtype=float32)}


  1%|          | 8809/1000000 [24:34<26:59:02, 10.20it/s]

{'loss': Array(0.30423695, dtype=float32), 'loss_cross_entropy': Array(0.27017635, dtype=float32)}


  1%|          | 8819/1000000 [24:36<25:26:29, 10.82it/s]

{'loss': Array(0.30678502, dtype=float32), 'loss_cross_entropy': Array(0.27430508, dtype=float32)}


  1%|          | 8829/1000000 [24:37<28:29:32,  9.66it/s]

{'loss': Array(0.32243168, dtype=float32), 'loss_cross_entropy': Array(0.28649592, dtype=float32)}


  1%|          | 8839/1000000 [24:39<26:43:30, 10.30it/s]

{'loss': Array(0.29693738, dtype=float32), 'loss_cross_entropy': Array(0.26207456, dtype=float32)}


  1%|          | 8849/1000000 [24:40<35:52:20,  7.67it/s]

{'loss': Array(0.29163715, dtype=float32), 'loss_cross_entropy': Array(0.2580771, dtype=float32)}


  1%|          | 8859/1000000 [24:42<27:46:11,  9.91it/s]

{'loss': Array(0.31908768, dtype=float32), 'loss_cross_entropy': Array(0.2828109, dtype=float32)}


  1%|          | 8869/1000000 [24:43<26:20:08, 10.45it/s]

{'loss': Array(0.30025086, dtype=float32), 'loss_cross_entropy': Array(0.2660147, dtype=float32)}


  1%|          | 8879/1000000 [24:45<29:25:03,  9.36it/s]

{'loss': Array(0.30364412, dtype=float32), 'loss_cross_entropy': Array(0.26901925, dtype=float32)}


  1%|          | 8889/1000000 [24:46<25:52:31, 10.64it/s]

{'loss': Array(0.3147538, dtype=float32), 'loss_cross_entropy': Array(0.27998435, dtype=float32)}


  1%|          | 8899/1000000 [24:48<31:22:17,  8.78it/s]

{'loss': Array(0.3050553, dtype=float32), 'loss_cross_entropy': Array(0.26850888, dtype=float32)}


  1%|          | 8909/1000000 [24:49<26:40:10, 10.32it/s]

{'loss': Array(0.30973563, dtype=float32), 'loss_cross_entropy': Array(0.2747996, dtype=float32)}


  1%|          | 8919/1000000 [24:51<40:48:46,  6.75it/s]

{'loss': Array(0.29810768, dtype=float32), 'loss_cross_entropy': Array(0.2646663, dtype=float32)}


  1%|          | 8929/1000000 [24:52<28:42:12,  9.59it/s]

{'loss': Array(0.27202377, dtype=float32), 'loss_cross_entropy': Array(0.24069297, dtype=float32)}


  1%|          | 8939/1000000 [24:54<26:14:55, 10.49it/s]

{'loss': Array(0.31371912, dtype=float32), 'loss_cross_entropy': Array(0.27744958, dtype=float32)}


  1%|          | 8949/1000000 [24:56<33:24:01,  8.24it/s]

{'loss': Array(0.31686458, dtype=float32), 'loss_cross_entropy': Array(0.2833138, dtype=float32)}


  1%|          | 8959/1000000 [24:57<26:59:32, 10.20it/s]

{'loss': Array(0.28875875, dtype=float32), 'loss_cross_entropy': Array(0.25668117, dtype=float32)}


  1%|          | 8969/1000000 [24:59<37:24:45,  7.36it/s]

{'loss': Array(0.30286813, dtype=float32), 'loss_cross_entropy': Array(0.2672727, dtype=float32)}


  1%|          | 8979/1000000 [25:00<27:49:57,  9.89it/s]

{'loss': Array(0.29856637, dtype=float32), 'loss_cross_entropy': Array(0.26397476, dtype=float32)}


  1%|          | 8989/1000000 [25:02<36:13:14,  7.60it/s]

{'loss': Array(0.3063005, dtype=float32), 'loss_cross_entropy': Array(0.27217743, dtype=float32)}


  1%|          | 8999/1000000 [25:03<28:15:11,  9.74it/s]

{'loss': Array(0.3190601, dtype=float32), 'loss_cross_entropy': Array(0.28621793, dtype=float32)}


  1%|          | 9009/1000000 [25:11<90:33:25,  3.04it/s] 

{'loss': Array(0.32188463, dtype=float32), 'loss_cross_entropy': Array(0.2859454, dtype=float32)}


  1%|          | 9019/1000000 [25:13<41:54:18,  6.57it/s]

{'loss': Array(0.31259796, dtype=float32), 'loss_cross_entropy': Array(0.27928814, dtype=float32)}


  1%|          | 9029/1000000 [25:14<27:52:45,  9.87it/s]

{'loss': Array(0.2836972, dtype=float32), 'loss_cross_entropy': Array(0.25169882, dtype=float32)}


  1%|          | 9039/1000000 [25:16<36:34:51,  7.52it/s]

{'loss': Array(0.29402682, dtype=float32), 'loss_cross_entropy': Array(0.26098758, dtype=float32)}


  1%|          | 9049/1000000 [25:17<27:01:39, 10.18it/s]

{'loss': Array(0.310578, dtype=float32), 'loss_cross_entropy': Array(0.27582303, dtype=float32)}


  1%|          | 9059/1000000 [25:19<40:55:32,  6.73it/s]

{'loss': Array(0.29770482, dtype=float32), 'loss_cross_entropy': Array(0.26537266, dtype=float32)}


  1%|          | 9069/1000000 [25:21<28:40:07,  9.60it/s]

{'loss': Array(0.2896821, dtype=float32), 'loss_cross_entropy': Array(0.2579118, dtype=float32)}


  1%|          | 9079/1000000 [25:22<26:11:33, 10.51it/s]

{'loss': Array(0.27682993, dtype=float32), 'loss_cross_entropy': Array(0.24583769, dtype=float32)}


  1%|          | 9089/1000000 [25:24<33:33:53,  8.20it/s]

{'loss': Array(0.29748362, dtype=float32), 'loss_cross_entropy': Array(0.263428, dtype=float32)}


  1%|          | 9099/1000000 [25:25<27:08:40, 10.14it/s]

{'loss': Array(0.31191966, dtype=float32), 'loss_cross_entropy': Array(0.27606484, dtype=float32)}


  1%|          | 9109/1000000 [25:27<35:54:14,  7.67it/s]

{'loss': Array(0.2779016, dtype=float32), 'loss_cross_entropy': Array(0.24529056, dtype=float32)}


  1%|          | 9119/1000000 [25:28<28:06:47,  9.79it/s]

{'loss': Array(0.2852669, dtype=float32), 'loss_cross_entropy': Array(0.25219613, dtype=float32)}


  1%|          | 9129/1000000 [25:30<25:35:37, 10.75it/s]

{'loss': Array(0.28988957, dtype=float32), 'loss_cross_entropy': Array(0.2571255, dtype=float32)}


  1%|          | 9139/1000000 [25:31<28:56:03,  9.51it/s]

{'loss': Array(0.29951409, dtype=float32), 'loss_cross_entropy': Array(0.26566392, dtype=float32)}


  1%|          | 9149/1000000 [25:33<28:07:12,  9.79it/s]

{'loss': Array(0.30538833, dtype=float32), 'loss_cross_entropy': Array(0.27207324, dtype=float32)}


  1%|          | 9159/1000000 [25:34<40:42:52,  6.76it/s]

{'loss': Array(0.2614959, dtype=float32), 'loss_cross_entropy': Array(0.23229507, dtype=float32)}


  1%|          | 9169/1000000 [25:36<28:53:47,  9.52it/s]

{'loss': Array(0.29752833, dtype=float32), 'loss_cross_entropy': Array(0.26461694, dtype=float32)}


  1%|          | 9179/1000000 [25:37<26:10:24, 10.52it/s]

{'loss': Array(0.27366206, dtype=float32), 'loss_cross_entropy': Array(0.24378796, dtype=float32)}


  1%|          | 9189/1000000 [25:39<32:39:39,  8.43it/s]

{'loss': Array(0.28443345, dtype=float32), 'loss_cross_entropy': Array(0.25209713, dtype=float32)}


  1%|          | 9199/1000000 [25:40<26:10:49, 10.51it/s]

{'loss': Array(0.2795399, dtype=float32), 'loss_cross_entropy': Array(0.24771543, dtype=float32)}


  1%|          | 9209/1000000 [25:42<35:52:24,  7.67it/s]

{'loss': Array(0.3022153, dtype=float32), 'loss_cross_entropy': Array(0.26893502, dtype=float32)}


  1%|          | 9219/1000000 [25:43<27:18:39, 10.08it/s]

{'loss': Array(0.2973033, dtype=float32), 'loss_cross_entropy': Array(0.2641941, dtype=float32)}


  1%|          | 9229/1000000 [25:45<36:04:47,  7.63it/s]

{'loss': Array(0.33897272, dtype=float32), 'loss_cross_entropy': Array(0.30348417, dtype=float32)}


  1%|          | 9239/1000000 [25:46<27:05:33, 10.16it/s]

{'loss': Array(0.3102614, dtype=float32), 'loss_cross_entropy': Array(0.27670598, dtype=float32)}


  1%|          | 9249/1000000 [25:48<27:32:05,  9.99it/s]

{'loss': Array(0.29530767, dtype=float32), 'loss_cross_entropy': Array(0.2636191, dtype=float32)}


  1%|          | 9259/1000000 [25:49<30:45:56,  8.95it/s]

{'loss': Array(0.29956067, dtype=float32), 'loss_cross_entropy': Array(0.26479337, dtype=float32)}


  1%|          | 9269/1000000 [25:51<26:08:45, 10.53it/s]

{'loss': Array(0.29130602, dtype=float32), 'loss_cross_entropy': Array(0.25817654, dtype=float32)}


  1%|          | 9279/1000000 [25:52<32:23:24,  8.50it/s]

{'loss': Array(0.30165112, dtype=float32), 'loss_cross_entropy': Array(0.26936588, dtype=float32)}


  1%|          | 9289/1000000 [25:54<26:57:44, 10.21it/s]

{'loss': Array(0.28896663, dtype=float32), 'loss_cross_entropy': Array(0.25673696, dtype=float32)}


  1%|          | 9299/1000000 [25:55<40:05:17,  6.86it/s]

{'loss': Array(0.31798497, dtype=float32), 'loss_cross_entropy': Array(0.28398606, dtype=float32)}


  1%|          | 9309/1000000 [25:57<27:31:22, 10.00it/s]

{'loss': Array(0.2984512, dtype=float32), 'loss_cross_entropy': Array(0.26620674, dtype=float32)}


  1%|          | 9319/1000000 [25:59<41:13:25,  6.68it/s]

{'loss': Array(0.2889609, dtype=float32), 'loss_cross_entropy': Array(0.25701544, dtype=float32)}


  1%|          | 9329/1000000 [26:00<28:11:19,  9.76it/s]

{'loss': Array(0.2909882, dtype=float32), 'loss_cross_entropy': Array(0.25836012, dtype=float32)}


  1%|          | 9339/1000000 [26:01<25:48:30, 10.66it/s]

{'loss': Array(0.30390874, dtype=float32), 'loss_cross_entropy': Array(0.270166, dtype=float32)}


  1%|          | 9349/1000000 [26:03<36:15:44,  7.59it/s]

{'loss': Array(0.2933745, dtype=float32), 'loss_cross_entropy': Array(0.2599853, dtype=float32)}


  1%|          | 9359/1000000 [26:04<27:35:27,  9.97it/s]

{'loss': Array(0.28819597, dtype=float32), 'loss_cross_entropy': Array(0.25576958, dtype=float32)}


  1%|          | 9369/1000000 [26:06<40:28:23,  6.80it/s]

{'loss': Array(0.29289603, dtype=float32), 'loss_cross_entropy': Array(0.26133707, dtype=float32)}


  1%|          | 9379/1000000 [26:07<28:28:34,  9.66it/s]

{'loss': Array(0.32186893, dtype=float32), 'loss_cross_entropy': Array(0.28651562, dtype=float32)}


  1%|          | 9389/1000000 [26:09<26:28:07, 10.40it/s]

{'loss': Array(0.30189952, dtype=float32), 'loss_cross_entropy': Array(0.2692233, dtype=float32)}


  1%|          | 9399/1000000 [26:11<29:15:43,  9.40it/s]

{'loss': Array(0.29787877, dtype=float32), 'loss_cross_entropy': Array(0.26514092, dtype=float32)}


  1%|          | 9409/1000000 [26:12<25:57:45, 10.60it/s]

{'loss': Array(0.28765905, dtype=float32), 'loss_cross_entropy': Array(0.25423756, dtype=float32)}


  1%|          | 9419/1000000 [26:14<29:49:25,  9.23it/s]

{'loss': Array(0.31290245, dtype=float32), 'loss_cross_entropy': Array(0.27928454, dtype=float32)}


  1%|          | 9429/1000000 [26:15<26:18:51, 10.46it/s]

{'loss': Array(0.27680603, dtype=float32), 'loss_cross_entropy': Array(0.24525075, dtype=float32)}


  1%|          | 9439/1000000 [26:17<40:27:45,  6.80it/s]

{'loss': Array(0.27846244, dtype=float32), 'loss_cross_entropy': Array(0.24772504, dtype=float32)}


  1%|          | 9449/1000000 [26:18<28:54:25,  9.52it/s]

{'loss': Array(0.29137275, dtype=float32), 'loss_cross_entropy': Array(0.25958607, dtype=float32)}


  1%|          | 9459/1000000 [26:20<26:00:10, 10.58it/s]

{'loss': Array(0.2772979, dtype=float32), 'loss_cross_entropy': Array(0.24585438, dtype=float32)}


  1%|          | 9469/1000000 [26:21<32:29:13,  8.47it/s]

{'loss': Array(0.27947116, dtype=float32), 'loss_cross_entropy': Array(0.24887656, dtype=float32)}


  1%|          | 9479/1000000 [26:23<28:38:55,  9.60it/s]

{'loss': Array(0.29231492, dtype=float32), 'loss_cross_entropy': Array(0.25905785, dtype=float32)}


  1%|          | 9489/1000000 [26:24<41:01:43,  6.71it/s]

{'loss': Array(0.27355218, dtype=float32), 'loss_cross_entropy': Array(0.244289, dtype=float32)}


  1%|          | 9499/1000000 [26:26<27:59:44,  9.83it/s]

{'loss': Array(0.27571216, dtype=float32), 'loss_cross_entropy': Array(0.2453985, dtype=float32)}


  1%|          | 9509/1000000 [26:34<89:51:04,  3.06it/s] 

{'loss': Array(0.2813978, dtype=float32), 'loss_cross_entropy': Array(0.25096047, dtype=float32)}


  1%|          | 9519/1000000 [26:35<41:02:41,  6.70it/s]

{'loss': Array(0.28718793, dtype=float32), 'loss_cross_entropy': Array(0.25520083, dtype=float32)}


  1%|          | 9529/1000000 [26:37<28:18:58,  9.72it/s]

{'loss': Array(0.26494995, dtype=float32), 'loss_cross_entropy': Array(0.23463105, dtype=float32)}


  1%|          | 9539/1000000 [26:38<34:23:23,  8.00it/s]

{'loss': Array(0.28579488, dtype=float32), 'loss_cross_entropy': Array(0.25416178, dtype=float32)}


  1%|          | 9549/1000000 [26:40<26:49:48, 10.25it/s]

{'loss': Array(0.2873887, dtype=float32), 'loss_cross_entropy': Array(0.25560805, dtype=float32)}


  1%|          | 9559/1000000 [26:42<41:07:47,  6.69it/s]

{'loss': Array(0.28161305, dtype=float32), 'loss_cross_entropy': Array(0.2514523, dtype=float32)}


  1%|          | 9569/1000000 [26:43<28:38:57,  9.60it/s]

{'loss': Array(0.2941682, dtype=float32), 'loss_cross_entropy': Array(0.26309422, dtype=float32)}


  1%|          | 9579/1000000 [26:44<25:35:56, 10.75it/s]

{'loss': Array(0.28913984, dtype=float32), 'loss_cross_entropy': Array(0.25928053, dtype=float32)}


  1%|          | 9589/1000000 [26:46<28:40:56,  9.59it/s]

{'loss': Array(0.2850291, dtype=float32), 'loss_cross_entropy': Array(0.25302976, dtype=float32)}


  1%|          | 9599/1000000 [26:47<26:27:32, 10.40it/s]

{'loss': Array(0.28637066, dtype=float32), 'loss_cross_entropy': Array(0.25577343, dtype=float32)}


  1%|          | 9609/1000000 [26:49<40:36:49,  6.77it/s]

{'loss': Array(0.29868403, dtype=float32), 'loss_cross_entropy': Array(0.26606336, dtype=float32)}


  1%|          | 9619/1000000 [26:50<27:39:36,  9.95it/s]

{'loss': Array(0.29920304, dtype=float32), 'loss_cross_entropy': Array(0.2666596, dtype=float32)}


  1%|          | 9629/1000000 [26:52<25:15:16, 10.89it/s]

{'loss': Array(0.280714, dtype=float32), 'loss_cross_entropy': Array(0.24962905, dtype=float32)}


  1%|          | 9639/1000000 [26:54<30:45:28,  8.94it/s]

{'loss': Array(0.323707, dtype=float32), 'loss_cross_entropy': Array(0.2899762, dtype=float32)}


  1%|          | 9649/1000000 [26:55<26:06:48, 10.53it/s]

{'loss': Array(0.3118801, dtype=float32), 'loss_cross_entropy': Array(0.2769257, dtype=float32)}


  1%|          | 9659/1000000 [26:57<35:38:32,  7.72it/s]

{'loss': Array(0.28610113, dtype=float32), 'loss_cross_entropy': Array(0.2556594, dtype=float32)}


  1%|          | 9669/1000000 [26:58<27:37:14,  9.96it/s]

{'loss': Array(0.30483177, dtype=float32), 'loss_cross_entropy': Array(0.2724619, dtype=float32)}


  1%|          | 9679/1000000 [27:00<40:48:14,  6.74it/s]

{'loss': Array(0.28185531, dtype=float32), 'loss_cross_entropy': Array(0.24941356, dtype=float32)}


  1%|          | 9689/1000000 [27:01<28:00:56,  9.82it/s]

{'loss': Array(0.26184863, dtype=float32), 'loss_cross_entropy': Array(0.23134224, dtype=float32)}


  1%|          | 9699/1000000 [27:02<26:51:28, 10.24it/s]

{'loss': Array(0.2677837, dtype=float32), 'loss_cross_entropy': Array(0.23792891, dtype=float32)}


  1%|          | 9709/1000000 [27:04<32:55:59,  8.35it/s]

{'loss': Array(0.2799371, dtype=float32), 'loss_cross_entropy': Array(0.24785744, dtype=float32)}


  1%|          | 9719/1000000 [27:05<26:47:17, 10.27it/s]

{'loss': Array(0.2773409, dtype=float32), 'loss_cross_entropy': Array(0.2458316, dtype=float32)}


  1%|          | 9729/1000000 [27:07<35:22:33,  7.78it/s]

{'loss': Array(0.27558127, dtype=float32), 'loss_cross_entropy': Array(0.24536471, dtype=float32)}


  1%|          | 9739/1000000 [27:09<28:07:49,  9.78it/s]

{'loss': Array(0.27651653, dtype=float32), 'loss_cross_entropy': Array(0.24622564, dtype=float32)}


  1%|          | 9749/1000000 [27:10<25:43:52, 10.69it/s]

{'loss': Array(0.3160797, dtype=float32), 'loss_cross_entropy': Array(0.2823191, dtype=float32)}


  1%|          | 9759/1000000 [27:12<28:39:07,  9.60it/s]

{'loss': Array(0.2586356, dtype=float32), 'loss_cross_entropy': Array(0.23046532, dtype=float32)}


  1%|          | 9769/1000000 [27:13<26:44:37, 10.29it/s]

{'loss': Array(0.2769616, dtype=float32), 'loss_cross_entropy': Array(0.24875155, dtype=float32)}


  1%|          | 9779/1000000 [27:15<40:42:53,  6.76it/s]

{'loss': Array(0.2721792, dtype=float32), 'loss_cross_entropy': Array(0.24257353, dtype=float32)}


  1%|          | 9789/1000000 [27:16<28:21:05,  9.70it/s]

{'loss': Array(0.3057368, dtype=float32), 'loss_cross_entropy': Array(0.27431244, dtype=float32)}


  1%|          | 9799/1000000 [27:17<25:48:39, 10.66it/s]

{'loss': Array(0.29432222, dtype=float32), 'loss_cross_entropy': Array(0.26167747, dtype=float32)}


  1%|          | 9809/1000000 [27:19<33:08:15,  8.30it/s]

{'loss': Array(0.29802012, dtype=float32), 'loss_cross_entropy': Array(0.26385483, dtype=float32)}


  1%|          | 9819/1000000 [27:21<26:57:47, 10.20it/s]

{'loss': Array(0.29739997, dtype=float32), 'loss_cross_entropy': Array(0.26471436, dtype=float32)}


  1%|          | 9829/1000000 [27:22<35:32:40,  7.74it/s]

{'loss': Array(0.29333788, dtype=float32), 'loss_cross_entropy': Array(0.2611133, dtype=float32)}


  1%|          | 9839/1000000 [27:24<27:35:35,  9.97it/s]

{'loss': Array(0.27628574, dtype=float32), 'loss_cross_entropy': Array(0.24623866, dtype=float32)}


  1%|          | 9849/1000000 [27:25<36:19:52,  7.57it/s]

{'loss': Array(0.27944675, dtype=float32), 'loss_cross_entropy': Array(0.24935535, dtype=float32)}


  1%|          | 9859/1000000 [27:27<26:56:31, 10.21it/s]

{'loss': Array(0.27921647, dtype=float32), 'loss_cross_entropy': Array(0.24983683, dtype=float32)}


  1%|          | 9869/1000000 [27:28<25:57:09, 10.60it/s]

{'loss': Array(0.27634498, dtype=float32), 'loss_cross_entropy': Array(0.24408281, dtype=float32)}


  1%|          | 9879/1000000 [27:30<30:23:49,  9.05it/s]

{'loss': Array(0.2703937, dtype=float32), 'loss_cross_entropy': Array(0.24123876, dtype=float32)}


  1%|          | 9889/1000000 [27:31<26:04:00, 10.55it/s]

{'loss': Array(0.29873157, dtype=float32), 'loss_cross_entropy': Array(0.26738152, dtype=float32)}


  1%|          | 9899/1000000 [27:33<33:15:53,  8.27it/s]

{'loss': Array(0.300422, dtype=float32), 'loss_cross_entropy': Array(0.26703474, dtype=float32)}


  1%|          | 9909/1000000 [27:34<26:43:11, 10.29it/s]

{'loss': Array(0.28030226, dtype=float32), 'loss_cross_entropy': Array(0.24767156, dtype=float32)}


  1%|          | 9919/1000000 [27:36<40:18:23,  6.82it/s]

{'loss': Array(0.3003659, dtype=float32), 'loss_cross_entropy': Array(0.26892707, dtype=float32)}


  1%|          | 9929/1000000 [27:37<28:18:44,  9.71it/s]

{'loss': Array(0.29942128, dtype=float32), 'loss_cross_entropy': Array(0.26803246, dtype=float32)}


  1%|          | 9939/1000000 [27:39<41:35:59,  6.61it/s]

{'loss': Array(0.2828041, dtype=float32), 'loss_cross_entropy': Array(0.25198367, dtype=float32)}


  1%|          | 9949/1000000 [27:40<28:29:56,  9.65it/s]

{'loss': Array(0.29368845, dtype=float32), 'loss_cross_entropy': Array(0.26340544, dtype=float32)}


  1%|          | 9959/1000000 [27:42<26:09:54, 10.51it/s]

{'loss': Array(0.27782047, dtype=float32), 'loss_cross_entropy': Array(0.24746989, dtype=float32)}


  1%|          | 9969/1000000 [27:44<36:05:00,  7.62it/s]

{'loss': Array(0.29468408, dtype=float32), 'loss_cross_entropy': Array(0.2627065, dtype=float32)}


  1%|          | 9979/1000000 [27:45<27:43:02,  9.92it/s]

{'loss': Array(0.26218972, dtype=float32), 'loss_cross_entropy': Array(0.23184673, dtype=float32)}


  1%|          | 9989/1000000 [27:47<40:36:03,  6.77it/s]

{'loss': Array(0.27783573, dtype=float32), 'loss_cross_entropy': Array(0.24849513, dtype=float32)}


  1%|          | 9999/1000000 [27:48<28:21:41,  9.70it/s]

{'loss': Array(0.2777103, dtype=float32), 'loss_cross_entropy': Array(0.2470504, dtype=float32)}


  1%|          | 10009/1000000 [27:56<89:49:28,  3.06it/s] 

{'loss': Array(0.28205582, dtype=float32), 'loss_cross_entropy': Array(0.2522137, dtype=float32)}


  1%|          | 10019/1000000 [27:58<41:31:09,  6.62it/s] 

{'loss': Array(0.26482177, dtype=float32), 'loss_cross_entropy': Array(0.23697011, dtype=float32)}


  1%|          | 10029/1000000 [27:59<28:56:02,  9.50it/s]

{'loss': Array(0.2756398, dtype=float32), 'loss_cross_entropy': Array(0.24624582, dtype=float32)}


  1%|          | 10039/1000000 [28:01<29:53:44,  9.20it/s]

{'loss': Array(0.2809945, dtype=float32), 'loss_cross_entropy': Array(0.2507826, dtype=float32)}


  1%|          | 10049/1000000 [28:02<26:35:19, 10.34it/s]

{'loss': Array(0.2590676, dtype=float32), 'loss_cross_entropy': Array(0.23117153, dtype=float32)}


  1%|          | 10059/1000000 [28:04<40:48:33,  6.74it/s]

{'loss': Array(0.29160148, dtype=float32), 'loss_cross_entropy': Array(0.2601461, dtype=float32)}


  1%|          | 10069/1000000 [28:05<27:48:23,  9.89it/s]

{'loss': Array(0.30441758, dtype=float32), 'loss_cross_entropy': Array(0.27435824, dtype=float32)}


  1%|          | 10079/1000000 [28:07<25:39:08, 10.72it/s]

{'loss': Array(0.2748374, dtype=float32), 'loss_cross_entropy': Array(0.24503243, dtype=float32)}


  1%|          | 10089/1000000 [28:08<33:20:41,  8.25it/s]

{'loss': Array(0.25438863, dtype=float32), 'loss_cross_entropy': Array(0.2259918, dtype=float32)}


  1%|          | 10099/1000000 [28:10<26:46:41, 10.27it/s]

{'loss': Array(0.3027498, dtype=float32), 'loss_cross_entropy': Array(0.27225894, dtype=float32)}


  1%|          | 10109/1000000 [28:11<39:50:16,  6.90it/s]

{'loss': Array(0.26637053, dtype=float32), 'loss_cross_entropy': Array(0.23843709, dtype=float32)}


  1%|          | 10119/1000000 [28:13<28:46:51,  9.55it/s]

{'loss': Array(0.28289065, dtype=float32), 'loss_cross_entropy': Array(0.25378898, dtype=float32)}


  1%|          | 10129/1000000 [28:14<26:09:15, 10.51it/s]

{'loss': Array(0.31185582, dtype=float32), 'loss_cross_entropy': Array(0.2804012, dtype=float32)}


  1%|          | 10139/1000000 [28:16<31:13:17,  8.81it/s]

{'loss': Array(0.28233957, dtype=float32), 'loss_cross_entropy': Array(0.25178847, dtype=float32)}


  1%|          | 10149/1000000 [28:17<26:22:46, 10.42it/s]

{'loss': Array(0.28026336, dtype=float32), 'loss_cross_entropy': Array(0.25222522, dtype=float32)}


  1%|          | 10159/1000000 [28:19<36:18:25,  7.57it/s]

{'loss': Array(0.2930965, dtype=float32), 'loss_cross_entropy': Array(0.262232, dtype=float32)}


  1%|          | 10169/1000000 [28:20<27:58:39,  9.83it/s]

{'loss': Array(0.3126996, dtype=float32), 'loss_cross_entropy': Array(0.27949294, dtype=float32)}


  1%|          | 10179/1000000 [28:22<25:37:42, 10.73it/s]

{'loss': Array(0.27894735, dtype=float32), 'loss_cross_entropy': Array(0.24827377, dtype=float32)}


  1%|          | 10189/1000000 [28:23<32:55:42,  8.35it/s]

{'loss': Array(0.2696801, dtype=float32), 'loss_cross_entropy': Array(0.24016578, dtype=float32)}


  1%|          | 10199/1000000 [28:25<26:32:54, 10.36it/s]

{'loss': Array(0.27568665, dtype=float32), 'loss_cross_entropy': Array(0.24650386, dtype=float32)}


  1%|          | 10209/1000000 [28:26<25:38:22, 10.72it/s]

{'loss': Array(0.2913864, dtype=float32), 'loss_cross_entropy': Array(0.25905922, dtype=float32)}


  1%|          | 10219/1000000 [28:28<30:03:19,  9.15it/s]

{'loss': Array(0.2805318, dtype=float32), 'loss_cross_entropy': Array(0.25071937, dtype=float32)}


  1%|          | 10229/1000000 [28:29<26:24:28, 10.41it/s]

{'loss': Array(0.28761953, dtype=float32), 'loss_cross_entropy': Array(0.25603744, dtype=float32)}


  1%|          | 10239/1000000 [28:31<32:50:23,  8.37it/s]

{'loss': Array(0.26955697, dtype=float32), 'loss_cross_entropy': Array(0.23987389, dtype=float32)}


  1%|          | 10249/1000000 [28:32<26:53:45, 10.22it/s]

{'loss': Array(0.29629663, dtype=float32), 'loss_cross_entropy': Array(0.26605976, dtype=float32)}


  1%|          | 10259/1000000 [28:34<40:34:56,  6.77it/s]

{'loss': Array(0.27564815, dtype=float32), 'loss_cross_entropy': Array(0.24520873, dtype=float32)}


  1%|          | 10269/1000000 [28:36<27:56:34,  9.84it/s]

{'loss': Array(0.28096807, dtype=float32), 'loss_cross_entropy': Array(0.25110576, dtype=float32)}


  1%|          | 10279/1000000 [28:37<41:23:04,  6.64it/s]

{'loss': Array(0.2633696, dtype=float32), 'loss_cross_entropy': Array(0.23535143, dtype=float32)}


  1%|          | 10289/1000000 [28:39<28:34:23,  9.62it/s]

{'loss': Array(0.29742736, dtype=float32), 'loss_cross_entropy': Array(0.2679622, dtype=float32)}


  1%|          | 10299/1000000 [28:40<26:10:48, 10.50it/s]

{'loss': Array(0.27527642, dtype=float32), 'loss_cross_entropy': Array(0.24825262, dtype=float32)}


  1%|          | 10309/1000000 [28:42<35:28:54,  7.75it/s]

{'loss': Array(0.27300104, dtype=float32), 'loss_cross_entropy': Array(0.2439984, dtype=float32)}


  1%|          | 10319/1000000 [28:43<27:13:26, 10.10it/s]

{'loss': Array(0.25992483, dtype=float32), 'loss_cross_entropy': Array(0.23223166, dtype=float32)}


  1%|          | 10329/1000000 [28:45<41:04:11,  6.69it/s]

{'loss': Array(0.27775118, dtype=float32), 'loss_cross_entropy': Array(0.24793337, dtype=float32)}


  1%|          | 10339/1000000 [28:46<27:55:47,  9.84it/s]

{'loss': Array(0.2639166, dtype=float32), 'loss_cross_entropy': Array(0.23499005, dtype=float32)}


  1%|          | 10349/1000000 [28:48<27:54:41,  9.85it/s]

{'loss': Array(0.27771837, dtype=float32), 'loss_cross_entropy': Array(0.24918242, dtype=float32)}


  1%|          | 10359/1000000 [28:49<29:13:23,  9.41it/s]

{'loss': Array(0.2748566, dtype=float32), 'loss_cross_entropy': Array(0.2450317, dtype=float32)}


  1%|          | 10369/1000000 [28:51<25:41:19, 10.70it/s]

{'loss': Array(0.29461703, dtype=float32), 'loss_cross_entropy': Array(0.26318297, dtype=float32)}


  1%|          | 10379/1000000 [28:52<29:08:07,  9.44it/s]

{'loss': Array(0.26487777, dtype=float32), 'loss_cross_entropy': Array(0.2370259, dtype=float32)}


  1%|          | 10389/1000000 [28:54<26:20:58, 10.43it/s]

{'loss': Array(0.28298426, dtype=float32), 'loss_cross_entropy': Array(0.2529777, dtype=float32)}


  1%|          | 10399/1000000 [28:55<40:33:45,  6.78it/s]

{'loss': Array(0.27103433, dtype=float32), 'loss_cross_entropy': Array(0.24091344, dtype=float32)}


  1%|          | 10409/1000000 [28:57<27:28:33, 10.00it/s]

{'loss': Array(0.28731257, dtype=float32), 'loss_cross_entropy': Array(0.2572228, dtype=float32)}


  1%|          | 10419/1000000 [28:58<25:56:27, 10.60it/s]

{'loss': Array(0.2759695, dtype=float32), 'loss_cross_entropy': Array(0.24549036, dtype=float32)}


  1%|          | 10429/1000000 [29:00<32:36:12,  8.43it/s]

{'loss': Array(0.30207834, dtype=float32), 'loss_cross_entropy': Array(0.27032325, dtype=float32)}


  1%|          | 10439/1000000 [29:01<27:00:04, 10.18it/s]

{'loss': Array(0.27780676, dtype=float32), 'loss_cross_entropy': Array(0.24903126, dtype=float32)}


  1%|          | 10449/1000000 [29:03<42:49:34,  6.42it/s]

{'loss': Array(0.3092371, dtype=float32), 'loss_cross_entropy': Array(0.27640936, dtype=float32)}


  1%|          | 10459/1000000 [29:04<28:06:40,  9.78it/s]

{'loss': Array(0.2732774, dtype=float32), 'loss_cross_entropy': Array(0.2444458, dtype=float32)}


  1%|          | 10469/1000000 [29:06<25:33:09, 10.76it/s]

{'loss': Array(0.28629163, dtype=float32), 'loss_cross_entropy': Array(0.25583822, dtype=float32)}


  1%|          | 10479/1000000 [29:07<30:34:08,  8.99it/s]

{'loss': Array(0.26122493, dtype=float32), 'loss_cross_entropy': Array(0.23267911, dtype=float32)}


  1%|          | 10489/1000000 [29:09<26:43:20, 10.29it/s]

{'loss': Array(0.28644758, dtype=float32), 'loss_cross_entropy': Array(0.25601873, dtype=float32)}


  1%|          | 10499/1000000 [29:11<36:46:03,  7.48it/s]

{'loss': Array(0.28037843, dtype=float32), 'loss_cross_entropy': Array(0.25128803, dtype=float32)}


  1%|          | 10509/1000000 [29:18<91:07:17,  3.02it/s] 

{'loss': Array(0.26540938, dtype=float32), 'loss_cross_entropy': Array(0.23741643, dtype=float32)}


  1%|          | 10519/1000000 [29:20<47:19:08,  5.81it/s] 

{'loss': Array(0.29598638, dtype=float32), 'loss_cross_entropy': Array(0.26439777, dtype=float32)}


  1%|          | 10529/1000000 [29:21<28:49:39,  9.53it/s]

{'loss': Array(0.31795582, dtype=float32), 'loss_cross_entropy': Array(0.28622136, dtype=float32)}


  1%|          | 10539/1000000 [29:23<27:00:52, 10.17it/s]

{'loss': Array(0.2831142, dtype=float32), 'loss_cross_entropy': Array(0.25396892, dtype=float32)}


  1%|          | 10549/1000000 [29:25<33:38:11,  8.17it/s]

{'loss': Array(0.2775894, dtype=float32), 'loss_cross_entropy': Array(0.24987732, dtype=float32)}


  1%|          | 10559/1000000 [29:26<27:06:26, 10.14it/s]

{'loss': Array(0.28640434, dtype=float32), 'loss_cross_entropy': Array(0.2565976, dtype=float32)}


  1%|          | 10569/1000000 [29:27<25:36:08, 10.74it/s]

{'loss': Array(0.26557487, dtype=float32), 'loss_cross_entropy': Array(0.23734799, dtype=float32)}


  1%|          | 10579/1000000 [29:29<29:14:20,  9.40it/s]

{'loss': Array(0.27629927, dtype=float32), 'loss_cross_entropy': Array(0.24826682, dtype=float32)}


  1%|          | 10589/1000000 [29:30<25:53:45, 10.61it/s]

{'loss': Array(0.29292616, dtype=float32), 'loss_cross_entropy': Array(0.2625272, dtype=float32)}


  1%|          | 10599/1000000 [29:32<32:14:26,  8.52it/s]

{'loss': Array(0.28825757, dtype=float32), 'loss_cross_entropy': Array(0.2565145, dtype=float32)}


  1%|          | 10609/1000000 [29:33<27:14:04, 10.09it/s]

{'loss': Array(0.2846795, dtype=float32), 'loss_cross_entropy': Array(0.2565842, dtype=float32)}


  1%|          | 10619/1000000 [29:35<35:28:09,  7.75it/s]

{'loss': Array(0.28763977, dtype=float32), 'loss_cross_entropy': Array(0.25751925, dtype=float32)}


  1%|          | 10629/1000000 [29:36<26:55:48, 10.21it/s]

{'loss': Array(0.28120068, dtype=float32), 'loss_cross_entropy': Array(0.25123832, dtype=float32)}


  1%|          | 10639/1000000 [29:38<26:55:54, 10.20it/s]

{'loss': Array(0.26402155, dtype=float32), 'loss_cross_entropy': Array(0.23569489, dtype=float32)}


  1%|          | 10649/1000000 [29:40<33:28:13,  8.21it/s]

{'loss': Array(0.29013243, dtype=float32), 'loss_cross_entropy': Array(0.26167813, dtype=float32)}


  1%|          | 10659/1000000 [29:41<26:43:34, 10.28it/s]

{'loss': Array(0.30432737, dtype=float32), 'loss_cross_entropy': Array(0.27163574, dtype=float32)}


  1%|          | 10669/1000000 [29:42<25:19:19, 10.85it/s]

{'loss': Array(0.2510437, dtype=float32), 'loss_cross_entropy': Array(0.22453307, dtype=float32)}


  1%|          | 10679/1000000 [29:44<28:48:38,  9.54it/s]

{'loss': Array(0.26084396, dtype=float32), 'loss_cross_entropy': Array(0.23227687, dtype=float32)}


  1%|          | 10689/1000000 [29:45<26:06:42, 10.52it/s]

{'loss': Array(0.2867653, dtype=float32), 'loss_cross_entropy': Array(0.25831863, dtype=float32)}


  1%|          | 10699/1000000 [29:47<33:48:37,  8.13it/s]

{'loss': Array(0.3053243, dtype=float32), 'loss_cross_entropy': Array(0.27534103, dtype=float32)}


  1%|          | 10709/1000000 [29:48<27:03:49, 10.15it/s]

{'loss': Array(0.27195215, dtype=float32), 'loss_cross_entropy': Array(0.24307196, dtype=float32)}


  1%|          | 10719/1000000 [29:50<40:01:26,  6.87it/s]

{'loss': Array(0.2927721, dtype=float32), 'loss_cross_entropy': Array(0.26348418, dtype=float32)}


  1%|          | 10729/1000000 [29:52<28:21:36,  9.69it/s]

{'loss': Array(0.26332355, dtype=float32), 'loss_cross_entropy': Array(0.23549972, dtype=float32)}


  1%|          | 10739/1000000 [29:53<26:28:17, 10.38it/s]

{'loss': Array(0.30056778, dtype=float32), 'loss_cross_entropy': Array(0.2700505, dtype=float32)}


  1%|          | 10749/1000000 [29:55<35:21:21,  7.77it/s]

{'loss': Array(0.25886774, dtype=float32), 'loss_cross_entropy': Array(0.23190403, dtype=float32)}


  1%|          | 10759/1000000 [29:56<27:02:45, 10.16it/s]

{'loss': Array(0.2942509, dtype=float32), 'loss_cross_entropy': Array(0.26409364, dtype=float32)}


  1%|          | 10769/1000000 [29:57<25:24:12, 10.82it/s]

{'loss': Array(0.2801549, dtype=float32), 'loss_cross_entropy': Array(0.25164974, dtype=float32)}


  1%|          | 10779/1000000 [29:59<31:10:58,  8.81it/s]

{'loss': Array(0.260766, dtype=float32), 'loss_cross_entropy': Array(0.23142557, dtype=float32)}


  1%|          | 10789/1000000 [30:00<26:33:35, 10.35it/s]

{'loss': Array(0.31648895, dtype=float32), 'loss_cross_entropy': Array(0.2841661, dtype=float32)}


  1%|          | 10799/1000000 [30:02<32:11:29,  8.54it/s]

{'loss': Array(0.29587552, dtype=float32), 'loss_cross_entropy': Array(0.2666364, dtype=float32)}


  1%|          | 10809/1000000 [30:04<27:08:36, 10.12it/s]

{'loss': Array(0.26881653, dtype=float32), 'loss_cross_entropy': Array(0.24127832, dtype=float32)}


  1%|          | 10819/1000000 [30:05<26:16:57, 10.45it/s]

{'loss': Array(0.2600813, dtype=float32), 'loss_cross_entropy': Array(0.23322742, dtype=float32)}


  1%|          | 10829/1000000 [30:07<28:55:28,  9.50it/s]

{'loss': Array(0.26627564, dtype=float32), 'loss_cross_entropy': Array(0.23787597, dtype=float32)}


  1%|          | 10839/1000000 [30:08<26:33:56, 10.34it/s]

{'loss': Array(0.28225243, dtype=float32), 'loss_cross_entropy': Array(0.25264502, dtype=float32)}


  1%|          | 10849/1000000 [30:10<36:04:06,  7.62it/s]

{'loss': Array(0.2939987, dtype=float32), 'loss_cross_entropy': Array(0.26422128, dtype=float32)}


  1%|          | 10859/1000000 [30:11<26:44:46, 10.27it/s]

{'loss': Array(0.25919968, dtype=float32), 'loss_cross_entropy': Array(0.23146759, dtype=float32)}


  1%|          | 10869/1000000 [30:13<27:12:30, 10.10it/s]

{'loss': Array(0.28447384, dtype=float32), 'loss_cross_entropy': Array(0.25688845, dtype=float32)}


  1%|          | 10879/1000000 [30:14<30:16:36,  9.07it/s]

{'loss': Array(0.26490006, dtype=float32), 'loss_cross_entropy': Array(0.23583975, dtype=float32)}


  1%|          | 10889/1000000 [30:16<27:02:32, 10.16it/s]

{'loss': Array(0.27515504, dtype=float32), 'loss_cross_entropy': Array(0.24676402, dtype=float32)}


  1%|          | 10899/1000000 [30:18<32:13:45,  8.52it/s]

{'loss': Array(0.270359, dtype=float32), 'loss_cross_entropy': Array(0.24103354, dtype=float32)}


  1%|          | 10909/1000000 [30:19<27:14:05, 10.09it/s]

{'loss': Array(0.26113662, dtype=float32), 'loss_cross_entropy': Array(0.23287725, dtype=float32)}


  1%|          | 10919/1000000 [30:21<40:51:49,  6.72it/s]

{'loss': Array(0.23064573, dtype=float32), 'loss_cross_entropy': Array(0.20505214, dtype=float32)}


  1%|          | 10929/1000000 [30:22<27:49:06,  9.88it/s]

{'loss': Array(0.29637152, dtype=float32), 'loss_cross_entropy': Array(0.2656064, dtype=float32)}


  1%|          | 10939/1000000 [30:23<26:19:47, 10.43it/s]

{'loss': Array(0.2763929, dtype=float32), 'loss_cross_entropy': Array(0.24752, dtype=float32)}


  1%|          | 10949/1000000 [30:25<32:53:10,  8.35it/s]

{'loss': Array(0.29939842, dtype=float32), 'loss_cross_entropy': Array(0.26909944, dtype=float32)}


  1%|          | 10959/1000000 [30:26<26:52:59, 10.22it/s]

{'loss': Array(0.26809463, dtype=float32), 'loss_cross_entropy': Array(0.2387542, dtype=float32)}


  1%|          | 10969/1000000 [30:28<36:43:52,  7.48it/s]

{'loss': Array(0.2706058, dtype=float32), 'loss_cross_entropy': Array(0.24371281, dtype=float32)}


  1%|          | 10979/1000000 [30:30<27:10:17, 10.11it/s]

{'loss': Array(0.2827941, dtype=float32), 'loss_cross_entropy': Array(0.25240323, dtype=float32)}


  1%|          | 10989/1000000 [30:31<36:08:45,  7.60it/s]

{'loss': Array(0.28769508, dtype=float32), 'loss_cross_entropy': Array(0.2570294, dtype=float32)}


  1%|          | 10999/1000000 [30:33<29:44:03,  9.24it/s]

{'loss': Array(0.28009424, dtype=float32), 'loss_cross_entropy': Array(0.2514415, dtype=float32)}


  1%|          | 11009/1000000 [30:40<89:08:03,  3.08it/s] 

{'loss': Array(0.292306, dtype=float32), 'loss_cross_entropy': Array(0.26109302, dtype=float32)}


  1%|          | 11019/1000000 [30:42<40:41:15,  6.75it/s]

{'loss': Array(0.2941805, dtype=float32), 'loss_cross_entropy': Array(0.26504177, dtype=float32)}


  1%|          | 11029/1000000 [30:44<28:40:05,  9.58it/s]

{'loss': Array(0.28781876, dtype=float32), 'loss_cross_entropy': Array(0.2598869, dtype=float32)}


  1%|          | 11039/1000000 [30:45<36:51:45,  7.45it/s]

{'loss': Array(0.28508723, dtype=float32), 'loss_cross_entropy': Array(0.25464752, dtype=float32)}


  1%|          | 11049/1000000 [30:47<27:06:00, 10.14it/s]

{'loss': Array(0.24198018, dtype=float32), 'loss_cross_entropy': Array(0.21592748, dtype=float32)}


  1%|          | 11059/1000000 [30:48<41:21:04,  6.64it/s]

{'loss': Array(0.2891858, dtype=float32), 'loss_cross_entropy': Array(0.25867355, dtype=float32)}


  1%|          | 11069/1000000 [30:50<28:07:37,  9.77it/s]

{'loss': Array(0.2703968, dtype=float32), 'loss_cross_entropy': Array(0.24304138, dtype=float32)}


  1%|          | 11079/1000000 [30:51<25:26:03, 10.80it/s]

{'loss': Array(0.275363, dtype=float32), 'loss_cross_entropy': Array(0.24679, dtype=float32)}


  1%|          | 11089/1000000 [30:53<33:48:25,  8.13it/s]

{'loss': Array(0.26197007, dtype=float32), 'loss_cross_entropy': Array(0.2350894, dtype=float32)}


  1%|          | 11099/1000000 [30:54<26:49:44, 10.24it/s]

{'loss': Array(0.3023784, dtype=float32), 'loss_cross_entropy': Array(0.27185115, dtype=float32)}


  1%|          | 11109/1000000 [30:56<35:38:08,  7.71it/s]

{'loss': Array(0.26585338, dtype=float32), 'loss_cross_entropy': Array(0.23784725, dtype=float32)}


  1%|          | 11119/1000000 [30:57<26:51:40, 10.23it/s]

{'loss': Array(0.28426108, dtype=float32), 'loss_cross_entropy': Array(0.25455204, dtype=float32)}


  1%|          | 11129/1000000 [30:59<26:28:11, 10.38it/s]

{'loss': Array(0.27298293, dtype=float32), 'loss_cross_entropy': Array(0.24387507, dtype=float32)}


  1%|          | 11139/1000000 [31:00<29:05:51,  9.44it/s]

{'loss': Array(0.261654, dtype=float32), 'loss_cross_entropy': Array(0.23398101, dtype=float32)}


  1%|          | 11149/1000000 [31:02<26:07:01, 10.52it/s]

{'loss': Array(0.26109043, dtype=float32), 'loss_cross_entropy': Array(0.23592916, dtype=float32)}


  1%|          | 11159/1000000 [31:03<40:28:38,  6.79it/s]

{'loss': Array(0.27189136, dtype=float32), 'loss_cross_entropy': Array(0.24389993, dtype=float32)}


  1%|          | 11169/1000000 [31:05<27:51:02,  9.86it/s]

{'loss': Array(0.2762745, dtype=float32), 'loss_cross_entropy': Array(0.2474062, dtype=float32)}


  1%|          | 11179/1000000 [31:06<26:02:09, 10.55it/s]

{'loss': Array(0.26579502, dtype=float32), 'loss_cross_entropy': Array(0.23947828, dtype=float32)}


  1%|          | 11189/1000000 [31:08<33:14:48,  8.26it/s]

{'loss': Array(0.24060886, dtype=float32), 'loss_cross_entropy': Array(0.21451436, dtype=float32)}


  1%|          | 11199/1000000 [31:09<26:26:31, 10.39it/s]

{'loss': Array(0.26540563, dtype=float32), 'loss_cross_entropy': Array(0.23651998, dtype=float32)}


  1%|          | 11209/1000000 [31:11<35:39:56,  7.70it/s]

{'loss': Array(0.28662857, dtype=float32), 'loss_cross_entropy': Array(0.2581714, dtype=float32)}


  1%|          | 11219/1000000 [31:12<27:21:33, 10.04it/s]

{'loss': Array(0.28627944, dtype=float32), 'loss_cross_entropy': Array(0.25877652, dtype=float32)}


  1%|          | 11229/1000000 [31:14<36:38:45,  7.49it/s]

{'loss': Array(0.2692903, dtype=float32), 'loss_cross_entropy': Array(0.24070735, dtype=float32)}


  1%|          | 11239/1000000 [31:15<27:25:42, 10.01it/s]

{'loss': Array(0.24190734, dtype=float32), 'loss_cross_entropy': Array(0.21594477, dtype=float32)}


  1%|          | 11249/1000000 [31:17<26:05:07, 10.53it/s]

{'loss': Array(0.27105615, dtype=float32), 'loss_cross_entropy': Array(0.24382523, dtype=float32)}


  1%|          | 11259/1000000 [31:19<31:51:26,  8.62it/s]

{'loss': Array(0.2885686, dtype=float32), 'loss_cross_entropy': Array(0.25924692, dtype=float32)}


  1%|          | 11269/1000000 [31:20<26:30:07, 10.36it/s]

{'loss': Array(0.2845245, dtype=float32), 'loss_cross_entropy': Array(0.25575963, dtype=float32)}


  1%|          | 11279/1000000 [31:22<32:29:17,  8.45it/s]

{'loss': Array(0.2822406, dtype=float32), 'loss_cross_entropy': Array(0.25486964, dtype=float32)}


  1%|          | 11289/1000000 [31:23<27:18:36, 10.06it/s]

{'loss': Array(0.2626416, dtype=float32), 'loss_cross_entropy': Array(0.23558629, dtype=float32)}


  1%|          | 11299/1000000 [31:25<40:34:28,  6.77it/s]

{'loss': Array(0.30248663, dtype=float32), 'loss_cross_entropy': Array(0.27263996, dtype=float32)}


  1%|          | 11309/1000000 [31:26<27:44:58,  9.90it/s]

{'loss': Array(0.25989923, dtype=float32), 'loss_cross_entropy': Array(0.23365171, dtype=float32)}


  1%|          | 11319/1000000 [31:28<41:47:38,  6.57it/s]

{'loss': Array(0.27263716, dtype=float32), 'loss_cross_entropy': Array(0.24294882, dtype=float32)}


  1%|          | 11329/1000000 [31:29<28:29:52,  9.64it/s]

{'loss': Array(0.29387128, dtype=float32), 'loss_cross_entropy': Array(0.26467037, dtype=float32)}


  1%|          | 11339/1000000 [31:31<26:45:05, 10.27it/s]

{'loss': Array(0.2881681, dtype=float32), 'loss_cross_entropy': Array(0.25775027, dtype=float32)}


  1%|          | 11349/1000000 [31:32<36:22:04,  7.55it/s]

{'loss': Array(0.27334195, dtype=float32), 'loss_cross_entropy': Array(0.2447171, dtype=float32)}


  1%|          | 11359/1000000 [31:34<27:53:42,  9.84it/s]

{'loss': Array(0.28373072, dtype=float32), 'loss_cross_entropy': Array(0.2551076, dtype=float32)}


  1%|          | 11369/1000000 [31:36<41:08:31,  6.67it/s]

{'loss': Array(0.26456016, dtype=float32), 'loss_cross_entropy': Array(0.23643672, dtype=float32)}


  1%|          | 11379/1000000 [31:37<27:51:28,  9.86it/s]

{'loss': Array(0.28627378, dtype=float32), 'loss_cross_entropy': Array(0.2573631, dtype=float32)}


  1%|          | 11389/1000000 [31:38<26:14:03, 10.47it/s]

{'loss': Array(0.26161283, dtype=float32), 'loss_cross_entropy': Array(0.23405723, dtype=float32)}


  1%|          | 11399/1000000 [31:40<29:18:24,  9.37it/s]

{'loss': Array(0.2801024, dtype=float32), 'loss_cross_entropy': Array(0.25232196, dtype=float32)}


  1%|          | 11409/1000000 [31:41<26:08:17, 10.51it/s]

{'loss': Array(0.2853271, dtype=float32), 'loss_cross_entropy': Array(0.25632975, dtype=float32)}


  1%|          | 11419/1000000 [31:43<29:04:52,  9.44it/s]

{'loss': Array(0.2736071, dtype=float32), 'loss_cross_entropy': Array(0.24622524, dtype=float32)}


  1%|          | 11429/1000000 [31:44<26:04:48, 10.53it/s]

{'loss': Array(0.28578687, dtype=float32), 'loss_cross_entropy': Array(0.25553903, dtype=float32)}


  1%|          | 11439/1000000 [31:46<39:37:52,  6.93it/s]

{'loss': Array(0.30441865, dtype=float32), 'loss_cross_entropy': Array(0.27374837, dtype=float32)}


  1%|          | 11449/1000000 [31:47<27:44:48,  9.90it/s]

{'loss': Array(0.25906894, dtype=float32), 'loss_cross_entropy': Array(0.23214626, dtype=float32)}


  1%|          | 11459/1000000 [31:49<26:13:57, 10.47it/s]

{'loss': Array(0.28770867, dtype=float32), 'loss_cross_entropy': Array(0.2576268, dtype=float32)}


  1%|          | 11469/1000000 [31:51<33:00:30,  8.32it/s]

{'loss': Array(0.3055267, dtype=float32), 'loss_cross_entropy': Array(0.27562425, dtype=float32)}


  1%|          | 11479/1000000 [31:52<27:08:35, 10.12it/s]

{'loss': Array(0.27037653, dtype=float32), 'loss_cross_entropy': Array(0.24348652, dtype=float32)}


  1%|          | 11489/1000000 [31:54<40:40:30,  6.75it/s]

{'loss': Array(0.28281584, dtype=float32), 'loss_cross_entropy': Array(0.25387898, dtype=float32)}


  1%|          | 11499/1000000 [31:55<27:57:01,  9.82it/s]

{'loss': Array(0.31014073, dtype=float32), 'loss_cross_entropy': Array(0.27835926, dtype=float32)}


  1%|          | 11509/1000000 [32:03<90:50:18,  3.02it/s] 

{'loss': Array(0.26077935, dtype=float32), 'loss_cross_entropy': Array(0.23491238, dtype=float32)}


  1%|          | 11519/1000000 [32:05<41:17:32,  6.65it/s]

{'loss': Array(0.250321, dtype=float32), 'loss_cross_entropy': Array(0.22444244, dtype=float32)}


  1%|          | 11529/1000000 [32:06<27:39:25,  9.93it/s]

{'loss': Array(0.24825202, dtype=float32), 'loss_cross_entropy': Array(0.22214971, dtype=float32)}


  1%|          | 11539/1000000 [32:08<34:32:48,  7.95it/s]

{'loss': Array(0.27838925, dtype=float32), 'loss_cross_entropy': Array(0.25176653, dtype=float32)}


  1%|          | 11549/1000000 [32:09<26:55:28, 10.20it/s]

{'loss': Array(0.2662802, dtype=float32), 'loss_cross_entropy': Array(0.23900306, dtype=float32)}


  1%|          | 11559/1000000 [32:11<40:19:01,  6.81it/s]

{'loss': Array(0.2857035, dtype=float32), 'loss_cross_entropy': Array(0.2580528, dtype=float32)}


  1%|          | 11569/1000000 [32:12<27:26:22, 10.01it/s]

{'loss': Array(0.26614967, dtype=float32), 'loss_cross_entropy': Array(0.23951955, dtype=float32)}


  1%|          | 11579/1000000 [32:13<26:25:21, 10.39it/s]

{'loss': Array(0.27969456, dtype=float32), 'loss_cross_entropy': Array(0.24990837, dtype=float32)}


  1%|          | 11589/1000000 [32:15<29:27:26,  9.32it/s]

{'loss': Array(0.26529136, dtype=float32), 'loss_cross_entropy': Array(0.23693068, dtype=float32)}


  1%|          | 11599/1000000 [32:17<26:28:03, 10.37it/s]

{'loss': Array(0.3026401, dtype=float32), 'loss_cross_entropy': Array(0.27302307, dtype=float32)}


  1%|          | 11609/1000000 [32:18<40:58:14,  6.70it/s]

{'loss': Array(0.2748316, dtype=float32), 'loss_cross_entropy': Array(0.24707988, dtype=float32)}


  1%|          | 11619/1000000 [32:20<28:08:11,  9.76it/s]

{'loss': Array(0.29049346, dtype=float32), 'loss_cross_entropy': Array(0.26225105, dtype=float32)}


  1%|          | 11629/1000000 [32:21<26:03:37, 10.54it/s]

{'loss': Array(0.25141743, dtype=float32), 'loss_cross_entropy': Array(0.22570641, dtype=float32)}


  1%|          | 11639/1000000 [32:23<31:46:53,  8.64it/s]

{'loss': Array(0.24930659, dtype=float32), 'loss_cross_entropy': Array(0.22314744, dtype=float32)}


  1%|          | 11649/1000000 [32:24<26:14:55, 10.46it/s]

{'loss': Array(0.2807517, dtype=float32), 'loss_cross_entropy': Array(0.2530026, dtype=float32)}


  1%|          | 11659/1000000 [32:26<35:16:40,  7.78it/s]

{'loss': Array(0.2590805, dtype=float32), 'loss_cross_entropy': Array(0.23398693, dtype=float32)}


  1%|          | 11669/1000000 [32:27<26:42:32, 10.28it/s]

{'loss': Array(0.2758094, dtype=float32), 'loss_cross_entropy': Array(0.24660133, dtype=float32)}


  1%|          | 11679/1000000 [32:29<40:52:00,  6.72it/s]

{'loss': Array(0.2639678, dtype=float32), 'loss_cross_entropy': Array(0.23525634, dtype=float32)}


  1%|          | 11689/1000000 [32:30<27:47:52,  9.88it/s]

{'loss': Array(0.26842877, dtype=float32), 'loss_cross_entropy': Array(0.2403779, dtype=float32)}


  1%|          | 11699/1000000 [32:32<25:37:25, 10.71it/s]

{'loss': Array(0.23944464, dtype=float32), 'loss_cross_entropy': Array(0.21281779, dtype=float32)}


  1%|          | 11709/1000000 [32:33<33:10:05,  8.28it/s]

{'loss': Array(0.28173777, dtype=float32), 'loss_cross_entropy': Array(0.25329483, dtype=float32)}


  1%|          | 11719/1000000 [32:35<26:41:47, 10.28it/s]

{'loss': Array(0.29521295, dtype=float32), 'loss_cross_entropy': Array(0.26596135, dtype=float32)}


  1%|          | 11729/1000000 [32:36<36:00:38,  7.62it/s]

{'loss': Array(0.27958265, dtype=float32), 'loss_cross_entropy': Array(0.24975203, dtype=float32)}


  1%|          | 11739/1000000 [32:38<28:13:16,  9.73it/s]

{'loss': Array(0.27534536, dtype=float32), 'loss_cross_entropy': Array(0.24787001, dtype=float32)}


  1%|          | 11749/1000000 [32:39<25:40:55, 10.69it/s]

{'loss': Array(0.26508626, dtype=float32), 'loss_cross_entropy': Array(0.23700082, dtype=float32)}


  1%|          | 11759/1000000 [32:41<29:11:33,  9.40it/s]

{'loss': Array(0.2698172, dtype=float32), 'loss_cross_entropy': Array(0.24247395, dtype=float32)}


  1%|          | 11769/1000000 [32:42<26:07:20, 10.51it/s]

{'loss': Array(0.27783182, dtype=float32), 'loss_cross_entropy': Array(0.24978617, dtype=float32)}


  1%|          | 11779/1000000 [32:44<40:46:28,  6.73it/s]

{'loss': Array(0.27639544, dtype=float32), 'loss_cross_entropy': Array(0.24834423, dtype=float32)}


  1%|          | 11789/1000000 [32:45<27:57:08,  9.82it/s]

{'loss': Array(0.2536123, dtype=float32), 'loss_cross_entropy': Array(0.227095, dtype=float32)}


  1%|          | 11799/1000000 [32:47<25:44:53, 10.66it/s]

{'loss': Array(0.283517, dtype=float32), 'loss_cross_entropy': Array(0.25384054, dtype=float32)}


  1%|          | 11809/1000000 [32:49<33:19:39,  8.24it/s]

{'loss': Array(0.24297129, dtype=float32), 'loss_cross_entropy': Array(0.21844696, dtype=float32)}


  1%|          | 11819/1000000 [32:50<27:21:39, 10.03it/s]

{'loss': Array(0.26547974, dtype=float32), 'loss_cross_entropy': Array(0.23854001, dtype=float32)}


  1%|          | 11829/1000000 [32:52<36:16:30,  7.57it/s]

{'loss': Array(0.26484117, dtype=float32), 'loss_cross_entropy': Array(0.23701711, dtype=float32)}


  1%|          | 11839/1000000 [32:53<27:38:37,  9.93it/s]

{'loss': Array(0.2679064, dtype=float32), 'loss_cross_entropy': Array(0.2388777, dtype=float32)}


  1%|          | 11849/1000000 [32:55<35:47:28,  7.67it/s]

{'loss': Array(0.24626124, dtype=float32), 'loss_cross_entropy': Array(0.22038482, dtype=float32)}


  1%|          | 11859/1000000 [32:56<26:59:51, 10.17it/s]

{'loss': Array(0.28923795, dtype=float32), 'loss_cross_entropy': Array(0.26008445, dtype=float32)}


  1%|          | 11869/1000000 [32:57<26:01:31, 10.55it/s]

{'loss': Array(0.25061142, dtype=float32), 'loss_cross_entropy': Array(0.22387922, dtype=float32)}


  1%|          | 11879/1000000 [32:59<31:47:05,  8.64it/s]

{'loss': Array(0.27072784, dtype=float32), 'loss_cross_entropy': Array(0.24325977, dtype=float32)}


  1%|          | 11889/1000000 [33:01<26:08:17, 10.50it/s]

{'loss': Array(0.27068314, dtype=float32), 'loss_cross_entropy': Array(0.2420858, dtype=float32)}


  1%|          | 11899/1000000 [33:02<33:38:34,  8.16it/s]

{'loss': Array(0.27973244, dtype=float32), 'loss_cross_entropy': Array(0.25190696, dtype=float32)}


  1%|          | 11909/1000000 [33:04<26:55:22, 10.19it/s]

{'loss': Array(0.27560624, dtype=float32), 'loss_cross_entropy': Array(0.24882744, dtype=float32)}


  1%|          | 11919/1000000 [33:05<40:33:47,  6.77it/s]

{'loss': Array(0.25065437, dtype=float32), 'loss_cross_entropy': Array(0.22436996, dtype=float32)}


  1%|          | 11929/1000000 [33:07<27:46:03,  9.88it/s]

{'loss': Array(0.27678427, dtype=float32), 'loss_cross_entropy': Array(0.24795185, dtype=float32)}


  1%|          | 11939/1000000 [33:09<41:12:54,  6.66it/s]

{'loss': Array(0.2601336, dtype=float32), 'loss_cross_entropy': Array(0.2333292, dtype=float32)}


  1%|          | 11949/1000000 [33:10<28:01:42,  9.79it/s]

{'loss': Array(0.27719244, dtype=float32), 'loss_cross_entropy': Array(0.24808192, dtype=float32)}


  1%|          | 11959/1000000 [33:11<25:51:26, 10.61it/s]

{'loss': Array(0.28937215, dtype=float32), 'loss_cross_entropy': Array(0.2587645, dtype=float32)}


  1%|          | 11969/1000000 [33:13<36:21:13,  7.55it/s]

{'loss': Array(0.2915166, dtype=float32), 'loss_cross_entropy': Array(0.26330802, dtype=float32)}


  1%|          | 11979/1000000 [33:14<27:13:42, 10.08it/s]

{'loss': Array(0.25983977, dtype=float32), 'loss_cross_entropy': Array(0.23384786, dtype=float32)}


  1%|          | 11989/1000000 [33:16<40:51:05,  6.72it/s]

{'loss': Array(0.27416173, dtype=float32), 'loss_cross_entropy': Array(0.24601486, dtype=float32)}


  1%|          | 11999/1000000 [33:17<27:39:17,  9.92it/s]

{'loss': Array(0.2617356, dtype=float32), 'loss_cross_entropy': Array(0.2330844, dtype=float32)}


  1%|          | 12009/1000000 [33:25<92:26:51,  2.97it/s] 

{'loss': Array(0.2907756, dtype=float32), 'loss_cross_entropy': Array(0.260133, dtype=float32)}


  1%|          | 12019/1000000 [33:27<40:11:42,  6.83it/s] 

{'loss': Array(0.27102986, dtype=float32), 'loss_cross_entropy': Array(0.24441145, dtype=float32)}


  1%|          | 12029/1000000 [33:29<28:19:24,  9.69it/s]

{'loss': Array(0.25902525, dtype=float32), 'loss_cross_entropy': Array(0.23294233, dtype=float32)}


  1%|          | 12039/1000000 [33:30<29:00:45,  9.46it/s]

{'loss': Array(0.25296718, dtype=float32), 'loss_cross_entropy': Array(0.22785048, dtype=float32)}


  1%|          | 12049/1000000 [33:32<26:26:50, 10.38it/s]

{'loss': Array(0.27870148, dtype=float32), 'loss_cross_entropy': Array(0.25050616, dtype=float32)}


  1%|          | 12059/1000000 [33:33<40:37:52,  6.75it/s]

{'loss': Array(0.2522842, dtype=float32), 'loss_cross_entropy': Array(0.22558801, dtype=float32)}


  1%|          | 12069/1000000 [33:35<28:02:20,  9.79it/s]

{'loss': Array(0.26931334, dtype=float32), 'loss_cross_entropy': Array(0.24173692, dtype=float32)}


  1%|          | 12079/1000000 [33:36<25:25:15, 10.80it/s]

{'loss': Array(0.25920603, dtype=float32), 'loss_cross_entropy': Array(0.23213294, dtype=float32)}


  1%|          | 12089/1000000 [33:38<34:26:36,  7.97it/s]

{'loss': Array(0.2736271, dtype=float32), 'loss_cross_entropy': Array(0.24570964, dtype=float32)}


  1%|          | 12099/1000000 [33:39<26:51:57, 10.21it/s]

{'loss': Array(0.27259853, dtype=float32), 'loss_cross_entropy': Array(0.24494343, dtype=float32)}


  1%|          | 12109/1000000 [33:41<40:36:43,  6.76it/s]

{'loss': Array(0.28072187, dtype=float32), 'loss_cross_entropy': Array(0.2528383, dtype=float32)}


  1%|          | 12119/1000000 [33:42<27:31:02,  9.97it/s]

{'loss': Array(0.26166058, dtype=float32), 'loss_cross_entropy': Array(0.23510377, dtype=float32)}


  1%|          | 12129/1000000 [33:44<26:50:52, 10.22it/s]

{'loss': Array(0.26619813, dtype=float32), 'loss_cross_entropy': Array(0.23794937, dtype=float32)}


  1%|          | 12139/1000000 [33:45<30:13:17,  9.08it/s]

{'loss': Array(0.26807877, dtype=float32), 'loss_cross_entropy': Array(0.24147026, dtype=float32)}


  1%|          | 12149/1000000 [33:47<25:52:21, 10.61it/s]

{'loss': Array(0.25811696, dtype=float32), 'loss_cross_entropy': Array(0.2315816, dtype=float32)}


  1%|          | 12159/1000000 [33:48<37:04:54,  7.40it/s]

{'loss': Array(0.28519437, dtype=float32), 'loss_cross_entropy': Array(0.25629354, dtype=float32)}


  1%|          | 12169/1000000 [33:50<27:48:44,  9.87it/s]

{'loss': Array(0.25084564, dtype=float32), 'loss_cross_entropy': Array(0.22492169, dtype=float32)}


  1%|          | 12179/1000000 [33:51<25:20:28, 10.83it/s]

{'loss': Array(0.27080783, dtype=float32), 'loss_cross_entropy': Array(0.24245253, dtype=float32)}


  1%|          | 12189/1000000 [33:53<32:27:25,  8.45it/s]

{'loss': Array(0.26044196, dtype=float32), 'loss_cross_entropy': Array(0.23309548, dtype=float32)}


  1%|          | 12199/1000000 [33:54<26:58:26, 10.17it/s]

{'loss': Array(0.27917734, dtype=float32), 'loss_cross_entropy': Array(0.2526897, dtype=float32)}


  1%|          | 12209/1000000 [33:56<25:23:03, 10.81it/s]

{'loss': Array(0.26619795, dtype=float32), 'loss_cross_entropy': Array(0.2382551, dtype=float32)}


  1%|          | 12219/1000000 [33:57<28:59:56,  9.46it/s]

{'loss': Array(0.2720742, dtype=float32), 'loss_cross_entropy': Array(0.24485198, dtype=float32)}


  1%|          | 12229/1000000 [33:59<27:36:56,  9.94it/s]

{'loss': Array(0.26165828, dtype=float32), 'loss_cross_entropy': Array(0.23500548, dtype=float32)}


  1%|          | 12239/1000000 [34:01<33:27:26,  8.20it/s]

{'loss': Array(0.26484963, dtype=float32), 'loss_cross_entropy': Array(0.23697162, dtype=float32)}


  1%|          | 12249/1000000 [34:02<26:51:23, 10.22it/s]

{'loss': Array(0.262953, dtype=float32), 'loss_cross_entropy': Array(0.23672247, dtype=float32)}


  1%|          | 12259/1000000 [34:04<40:48:19,  6.72it/s]

{'loss': Array(0.2849715, dtype=float32), 'loss_cross_entropy': Array(0.25762868, dtype=float32)}


  1%|          | 12269/1000000 [34:05<27:51:14,  9.85it/s]

{'loss': Array(0.26118776, dtype=float32), 'loss_cross_entropy': Array(0.23531441, dtype=float32)}


  1%|          | 12279/1000000 [34:07<40:44:27,  6.73it/s]

{'loss': Array(0.27557057, dtype=float32), 'loss_cross_entropy': Array(0.24770974, dtype=float32)}


  1%|          | 12289/1000000 [34:08<28:19:41,  9.69it/s]

{'loss': Array(0.287024, dtype=float32), 'loss_cross_entropy': Array(0.25802937, dtype=float32)}


  1%|          | 12299/1000000 [34:10<25:34:20, 10.73it/s]

{'loss': Array(0.27922186, dtype=float32), 'loss_cross_entropy': Array(0.25056252, dtype=float32)}


  1%|          | 12309/1000000 [34:11<35:22:58,  7.75it/s]

{'loss': Array(0.26590797, dtype=float32), 'loss_cross_entropy': Array(0.23949015, dtype=float32)}


  1%|          | 12319/1000000 [34:13<29:07:25,  9.42it/s]

{'loss': Array(0.2636221, dtype=float32), 'loss_cross_entropy': Array(0.2375721, dtype=float32)}


  1%|          | 12329/1000000 [34:14<40:36:20,  6.76it/s]

{'loss': Array(0.2848343, dtype=float32), 'loss_cross_entropy': Array(0.25607416, dtype=float32)}


  1%|          | 12339/1000000 [34:16<28:07:42,  9.75it/s]

{'loss': Array(0.2577797, dtype=float32), 'loss_cross_entropy': Array(0.2322499, dtype=float32)}


  1%|          | 12349/1000000 [34:17<25:31:55, 10.75it/s]

{'loss': Array(0.27737316, dtype=float32), 'loss_cross_entropy': Array(0.24946828, dtype=float32)}


  1%|          | 12359/1000000 [34:19<29:03:01,  9.44it/s]

{'loss': Array(0.26752922, dtype=float32), 'loss_cross_entropy': Array(0.2393031, dtype=float32)}


  1%|          | 12369/1000000 [34:20<25:49:14, 10.62it/s]

{'loss': Array(0.2618937, dtype=float32), 'loss_cross_entropy': Array(0.23354042, dtype=float32)}


  1%|          | 12379/1000000 [34:22<28:50:04,  9.51it/s]

{'loss': Array(0.2741318, dtype=float32), 'loss_cross_entropy': Array(0.24564767, dtype=float32)}


  1%|          | 12389/1000000 [34:23<26:55:47, 10.19it/s]

{'loss': Array(0.2756734, dtype=float32), 'loss_cross_entropy': Array(0.247649, dtype=float32)}


  1%|          | 12399/1000000 [34:25<39:49:35,  6.89it/s]

{'loss': Array(0.28359592, dtype=float32), 'loss_cross_entropy': Array(0.25727373, dtype=float32)}


  1%|          | 12409/1000000 [34:26<27:39:20,  9.92it/s]

{'loss': Array(0.26431993, dtype=float32), 'loss_cross_entropy': Array(0.23774663, dtype=float32)}


  1%|          | 12419/1000000 [34:28<27:36:55,  9.93it/s]

{'loss': Array(0.25696492, dtype=float32), 'loss_cross_entropy': Array(0.22976835, dtype=float32)}


  1%|          | 12429/1000000 [34:29<34:04:56,  8.05it/s]

{'loss': Array(0.254594, dtype=float32), 'loss_cross_entropy': Array(0.22730207, dtype=float32)}


  1%|          | 12439/1000000 [34:31<26:33:48, 10.33it/s]

{'loss': Array(0.27382585, dtype=float32), 'loss_cross_entropy': Array(0.24382682, dtype=float32)}


  1%|          | 12449/1000000 [34:32<40:32:31,  6.77it/s]

{'loss': Array(0.26487175, dtype=float32), 'loss_cross_entropy': Array(0.23848557, dtype=float32)}


  1%|          | 12459/1000000 [34:34<28:07:53,  9.75it/s]

{'loss': Array(0.25670198, dtype=float32), 'loss_cross_entropy': Array(0.2299031, dtype=float32)}


  1%|          | 12469/1000000 [34:35<25:54:07, 10.59it/s]

{'loss': Array(0.2737794, dtype=float32), 'loss_cross_entropy': Array(0.2464187, dtype=float32)}


  1%|          | 12479/1000000 [34:37<30:22:43,  9.03it/s]

{'loss': Array(0.25237247, dtype=float32), 'loss_cross_entropy': Array(0.22676842, dtype=float32)}


  1%|          | 12489/1000000 [34:38<26:43:50, 10.26it/s]

{'loss': Array(0.28949863, dtype=float32), 'loss_cross_entropy': Array(0.26042622, dtype=float32)}


  1%|          | 12499/1000000 [34:40<36:08:36,  7.59it/s]

{'loss': Array(0.28684554, dtype=float32), 'loss_cross_entropy': Array(0.25879523, dtype=float32)}


  1%|▏         | 12509/1000000 [34:48<91:54:01,  2.98it/s] 

{'loss': Array(0.2420526, dtype=float32), 'loss_cross_entropy': Array(0.21639848, dtype=float32)}


  1%|▏         | 12519/1000000 [34:50<47:35:32,  5.76it/s] 

{'loss': Array(0.27428845, dtype=float32), 'loss_cross_entropy': Array(0.24600999, dtype=float32)}


  1%|▏         | 12529/1000000 [34:51<28:46:34,  9.53it/s]

{'loss': Array(0.2912229, dtype=float32), 'loss_cross_entropy': Array(0.2634165, dtype=float32)}


  1%|▏         | 12539/1000000 [34:52<26:21:03, 10.41it/s]

{'loss': Array(0.26063147, dtype=float32), 'loss_cross_entropy': Array(0.23563571, dtype=float32)}


  1%|▏         | 12549/1000000 [34:54<32:58:16,  8.32it/s]

{'loss': Array(0.26606783, dtype=float32), 'loss_cross_entropy': Array(0.2390379, dtype=float32)}


  1%|▏         | 12559/1000000 [34:56<26:29:46, 10.35it/s]

{'loss': Array(0.26117676, dtype=float32), 'loss_cross_entropy': Array(0.23634973, dtype=float32)}


  1%|▏         | 12569/1000000 [34:57<25:02:17, 10.95it/s]

{'loss': Array(0.25116, dtype=float32), 'loss_cross_entropy': Array(0.22633171, dtype=float32)}


  1%|▏         | 12579/1000000 [34:59<30:09:16,  9.10it/s]

{'loss': Array(0.2749822, dtype=float32), 'loss_cross_entropy': Array(0.2472487, dtype=float32)}


  1%|▏         | 12589/1000000 [35:00<26:14:13, 10.45it/s]

{'loss': Array(0.2838094, dtype=float32), 'loss_cross_entropy': Array(0.2563324, dtype=float32)}


  1%|▏         | 12599/1000000 [35:02<32:28:58,  8.44it/s]

{'loss': Array(0.28228873, dtype=float32), 'loss_cross_entropy': Array(0.2538799, dtype=float32)}


  1%|▏         | 12609/1000000 [35:03<26:58:13, 10.17it/s]

{'loss': Array(0.2948136, dtype=float32), 'loss_cross_entropy': Array(0.26671952, dtype=float32)}


  1%|▏         | 12619/1000000 [35:05<36:25:10,  7.53it/s]

{'loss': Array(0.25770348, dtype=float32), 'loss_cross_entropy': Array(0.23026578, dtype=float32)}


  1%|▏         | 12629/1000000 [35:06<26:58:33, 10.17it/s]

{'loss': Array(0.26905745, dtype=float32), 'loss_cross_entropy': Array(0.24276534, dtype=float32)}


  1%|▏         | 12639/1000000 [35:08<27:27:51,  9.99it/s]

{'loss': Array(0.26762986, dtype=float32), 'loss_cross_entropy': Array(0.240628, dtype=float32)}


  1%|▏         | 12649/1000000 [35:09<33:03:39,  8.30it/s]

{'loss': Array(0.25358972, dtype=float32), 'loss_cross_entropy': Array(0.2273742, dtype=float32)}


  1%|▏         | 12659/1000000 [35:11<27:00:58, 10.15it/s]

{'loss': Array(0.269849, dtype=float32), 'loss_cross_entropy': Array(0.24316879, dtype=float32)}


  1%|▏         | 12669/1000000 [35:12<25:31:14, 10.75it/s]

{'loss': Array(0.28466082, dtype=float32), 'loss_cross_entropy': Array(0.2570757, dtype=float32)}


  1%|▏         | 12679/1000000 [35:14<29:23:27,  9.33it/s]

{'loss': Array(0.25932845, dtype=float32), 'loss_cross_entropy': Array(0.23342295, dtype=float32)}


  1%|▏         | 12689/1000000 [35:15<25:53:59, 10.59it/s]

{'loss': Array(0.26564714, dtype=float32), 'loss_cross_entropy': Array(0.2385364, dtype=float32)}


  1%|▏         | 12699/1000000 [35:17<32:52:27,  8.34it/s]

{'loss': Array(0.26105753, dtype=float32), 'loss_cross_entropy': Array(0.23519805, dtype=float32)}


  1%|▏         | 12709/1000000 [35:18<27:22:15, 10.02it/s]

{'loss': Array(0.27482805, dtype=float32), 'loss_cross_entropy': Array(0.24695073, dtype=float32)}


  1%|▏         | 12719/1000000 [35:20<40:51:13,  6.71it/s]

{'loss': Array(0.24456127, dtype=float32), 'loss_cross_entropy': Array(0.21989301, dtype=float32)}


  1%|▏         | 12729/1000000 [35:21<28:15:35,  9.70it/s]

{'loss': Array(0.25862703, dtype=float32), 'loss_cross_entropy': Array(0.23310338, dtype=float32)}


  1%|▏         | 12739/1000000 [35:23<27:34:49,  9.94it/s]

{'loss': Array(0.26555648, dtype=float32), 'loss_cross_entropy': Array(0.24011055, dtype=float32)}


  1%|▏         | 12749/1000000 [35:24<36:04:27,  7.60it/s]

{'loss': Array(0.25734416, dtype=float32), 'loss_cross_entropy': Array(0.23143645, dtype=float32)}


  1%|▏         | 12759/1000000 [35:26<27:22:03, 10.02it/s]

{'loss': Array(0.2692476, dtype=float32), 'loss_cross_entropy': Array(0.24434987, dtype=float32)}


  1%|▏         | 12769/1000000 [35:27<25:12:20, 10.88it/s]

{'loss': Array(0.27604014, dtype=float32), 'loss_cross_entropy': Array(0.24887304, dtype=float32)}


  1%|▏         | 12779/1000000 [35:29<31:07:34,  8.81it/s]

{'loss': Array(0.2781968, dtype=float32), 'loss_cross_entropy': Array(0.25189134, dtype=float32)}


  1%|▏         | 12789/1000000 [35:30<27:04:08, 10.13it/s]

{'loss': Array(0.270889, dtype=float32), 'loss_cross_entropy': Array(0.24311164, dtype=float32)}


  1%|▏         | 12799/1000000 [35:32<33:29:29,  8.19it/s]

{'loss': Array(0.25823346, dtype=float32), 'loss_cross_entropy': Array(0.23337863, dtype=float32)}


  1%|▏         | 12809/1000000 [35:34<26:51:41, 10.21it/s]

{'loss': Array(0.24722055, dtype=float32), 'loss_cross_entropy': Array(0.22344036, dtype=float32)}


  1%|▏         | 12819/1000000 [35:35<25:20:15, 10.82it/s]

{'loss': Array(0.26745534, dtype=float32), 'loss_cross_entropy': Array(0.24011372, dtype=float32)}


  1%|▏         | 12829/1000000 [35:37<28:32:32,  9.61it/s]

{'loss': Array(0.25615385, dtype=float32), 'loss_cross_entropy': Array(0.22917593, dtype=float32)}


  1%|▏         | 12839/1000000 [35:38<27:16:47, 10.05it/s]

{'loss': Array(0.25800595, dtype=float32), 'loss_cross_entropy': Array(0.23259783, dtype=float32)}


  1%|▏         | 12849/1000000 [35:40<36:37:00,  7.49it/s]

{'loss': Array(0.28138897, dtype=float32), 'loss_cross_entropy': Array(0.2546043, dtype=float32)}


  1%|▏         | 12859/1000000 [35:41<27:15:09, 10.06it/s]

{'loss': Array(0.27171478, dtype=float32), 'loss_cross_entropy': Array(0.24544087, dtype=float32)}


  1%|▏         | 12869/1000000 [35:42<26:35:23, 10.31it/s]

{'loss': Array(0.26451552, dtype=float32), 'loss_cross_entropy': Array(0.23945217, dtype=float32)}


  1%|▏         | 12879/1000000 [35:44<29:59:26,  9.14it/s]

{'loss': Array(0.2678957, dtype=float32), 'loss_cross_entropy': Array(0.24183916, dtype=float32)}


  1%|▏         | 12889/1000000 [35:46<25:42:03, 10.67it/s]

{'loss': Array(0.29410192, dtype=float32), 'loss_cross_entropy': Array(0.26428503, dtype=float32)}


  1%|▏         | 12899/1000000 [35:47<30:16:59,  9.05it/s]

{'loss': Array(0.25130102, dtype=float32), 'loss_cross_entropy': Array(0.22572589, dtype=float32)}


  1%|▏         | 12909/1000000 [35:49<26:44:51, 10.25it/s]

{'loss': Array(0.2642901, dtype=float32), 'loss_cross_entropy': Array(0.23866646, dtype=float32)}


  1%|▏         | 12919/1000000 [35:50<40:43:07,  6.73it/s]

{'loss': Array(0.26519224, dtype=float32), 'loss_cross_entropy': Array(0.23831327, dtype=float32)}


  1%|▏         | 12929/1000000 [35:52<28:00:35,  9.79it/s]

{'loss': Array(0.27414295, dtype=float32), 'loss_cross_entropy': Array(0.2468607, dtype=float32)}


  1%|▏         | 12939/1000000 [35:53<26:25:51, 10.37it/s]

{'loss': Array(0.29916126, dtype=float32), 'loss_cross_entropy': Array(0.26994, dtype=float32)}


  1%|▏         | 12949/1000000 [35:55<33:27:50,  8.19it/s]

{'loss': Array(0.24390244, dtype=float32), 'loss_cross_entropy': Array(0.219379, dtype=float32)}


  1%|▏         | 12959/1000000 [35:56<26:53:11, 10.20it/s]

{'loss': Array(0.25939062, dtype=float32), 'loss_cross_entropy': Array(0.23375535, dtype=float32)}


  1%|▏         | 12969/1000000 [35:58<36:41:14,  7.47it/s]

{'loss': Array(0.25455806, dtype=float32), 'loss_cross_entropy': Array(0.22901686, dtype=float32)}


  1%|▏         | 12979/1000000 [36:00<28:24:47,  9.65it/s]

{'loss': Array(0.23390734, dtype=float32), 'loss_cross_entropy': Array(0.21088412, dtype=float32)}


  1%|▏         | 12989/1000000 [36:01<36:36:10,  7.49it/s]

{'loss': Array(0.26676854, dtype=float32), 'loss_cross_entropy': Array(0.2412015, dtype=float32)}


  1%|▏         | 12999/1000000 [36:03<29:15:17,  9.37it/s]

{'loss': Array(0.28414044, dtype=float32), 'loss_cross_entropy': Array(0.25608194, dtype=float32)}


  1%|▏         | 13009/1000000 [36:11<90:54:10,  3.02it/s] 

{'loss': Array(0.26811016, dtype=float32), 'loss_cross_entropy': Array(0.24203324, dtype=float32)}


  1%|▏         | 13019/1000000 [36:12<41:27:36,  6.61it/s] 

{'loss': Array(0.2586786, dtype=float32), 'loss_cross_entropy': Array(0.23339425, dtype=float32)}


  1%|▏         | 13029/1000000 [36:14<28:21:15,  9.67it/s]

{'loss': Array(0.25327727, dtype=float32), 'loss_cross_entropy': Array(0.22792883, dtype=float32)}


  1%|▏         | 13039/1000000 [36:15<36:12:32,  7.57it/s]

{'loss': Array(0.2622572, dtype=float32), 'loss_cross_entropy': Array(0.23589928, dtype=float32)}


  1%|▏         | 13049/1000000 [36:17<27:02:38, 10.14it/s]

{'loss': Array(0.25914845, dtype=float32), 'loss_cross_entropy': Array(0.23308614, dtype=float32)}


  1%|▏         | 13059/1000000 [36:19<40:30:36,  6.77it/s]

{'loss': Array(0.25003153, dtype=float32), 'loss_cross_entropy': Array(0.2252012, dtype=float32)}


  1%|▏         | 13069/1000000 [36:20<27:34:03,  9.94it/s]

{'loss': Array(0.30094665, dtype=float32), 'loss_cross_entropy': Array(0.2703056, dtype=float32)}


  1%|▏         | 13079/1000000 [36:21<25:25:03, 10.79it/s]

{'loss': Array(0.26421878, dtype=float32), 'loss_cross_entropy': Array(0.23757306, dtype=float32)}


  1%|▏         | 13089/1000000 [36:23<32:58:22,  8.31it/s]

{'loss': Array(0.26470017, dtype=float32), 'loss_cross_entropy': Array(0.23841639, dtype=float32)}


  1%|▏         | 13099/1000000 [36:24<26:15:05, 10.44it/s]

{'loss': Array(0.26454026, dtype=float32), 'loss_cross_entropy': Array(0.23853604, dtype=float32)}


  1%|▏         | 13109/1000000 [36:26<35:41:13,  7.68it/s]

{'loss': Array(0.24997118, dtype=float32), 'loss_cross_entropy': Array(0.22413535, dtype=float32)}


  1%|▏         | 13119/1000000 [36:27<27:01:01, 10.15it/s]

{'loss': Array(0.2803119, dtype=float32), 'loss_cross_entropy': Array(0.25388208, dtype=float32)}


  1%|▏         | 13129/1000000 [36:29<26:28:56, 10.35it/s]

{'loss': Array(0.25823045, dtype=float32), 'loss_cross_entropy': Array(0.23311225, dtype=float32)}


  1%|▏         | 13139/1000000 [36:31<29:30:39,  9.29it/s]

{'loss': Array(0.2696901, dtype=float32), 'loss_cross_entropy': Array(0.24329665, dtype=float32)}


  1%|▏         | 13149/1000000 [36:32<26:32:49, 10.33it/s]

{'loss': Array(0.2754676, dtype=float32), 'loss_cross_entropy': Array(0.24664953, dtype=float32)}


  1%|▏         | 13159/1000000 [36:34<40:40:01,  6.74it/s]

{'loss': Array(0.27310506, dtype=float32), 'loss_cross_entropy': Array(0.24680738, dtype=float32)}


  1%|▏         | 13169/1000000 [36:35<27:55:49,  9.81it/s]

{'loss': Array(0.2466934, dtype=float32), 'loss_cross_entropy': Array(0.22213326, dtype=float32)}


  1%|▏         | 13179/1000000 [36:36<25:28:04, 10.76it/s]

{'loss': Array(0.24275957, dtype=float32), 'loss_cross_entropy': Array(0.21681206, dtype=float32)}


  1%|▏         | 13189/1000000 [36:38<32:59:13,  8.31it/s]

{'loss': Array(0.2541984, dtype=float32), 'loss_cross_entropy': Array(0.22965734, dtype=float32)}


  1%|▏         | 13199/1000000 [36:39<26:41:44, 10.27it/s]

{'loss': Array(0.26181975, dtype=float32), 'loss_cross_entropy': Array(0.23615074, dtype=float32)}


  1%|▏         | 13209/1000000 [36:41<35:56:48,  7.63it/s]

{'loss': Array(0.2736101, dtype=float32), 'loss_cross_entropy': Array(0.24661095, dtype=float32)}


  1%|▏         | 13219/1000000 [36:42<27:12:45, 10.07it/s]

{'loss': Array(0.27791837, dtype=float32), 'loss_cross_entropy': Array(0.25019807, dtype=float32)}


  1%|▏         | 13229/1000000 [36:44<36:33:04,  7.50it/s]

{'loss': Array(0.27227867, dtype=float32), 'loss_cross_entropy': Array(0.24608897, dtype=float32)}


  1%|▏         | 13239/1000000 [36:46<27:46:08,  9.87it/s]

{'loss': Array(0.2741271, dtype=float32), 'loss_cross_entropy': Array(0.24563861, dtype=float32)}


  1%|▏         | 13249/1000000 [36:47<25:46:00, 10.64it/s]

{'loss': Array(0.2741455, dtype=float32), 'loss_cross_entropy': Array(0.24639642, dtype=float32)}


  1%|▏         | 13259/1000000 [36:49<30:44:48,  8.91it/s]

{'loss': Array(0.2608777, dtype=float32), 'loss_cross_entropy': Array(0.23493147, dtype=float32)}


  1%|▏         | 13269/1000000 [36:50<25:46:21, 10.63it/s]

{'loss': Array(0.27166012, dtype=float32), 'loss_cross_entropy': Array(0.2455827, dtype=float32)}


  1%|▏         | 13279/1000000 [36:52<32:17:55,  8.49it/s]

{'loss': Array(0.26693103, dtype=float32), 'loss_cross_entropy': Array(0.24066511, dtype=float32)}


  1%|▏         | 13289/1000000 [36:53<26:54:45, 10.18it/s]

{'loss': Array(0.26459548, dtype=float32), 'loss_cross_entropy': Array(0.23942125, dtype=float32)}


  1%|▏         | 13299/1000000 [36:55<40:25:53,  6.78it/s]

{'loss': Array(0.24717103, dtype=float32), 'loss_cross_entropy': Array(0.22271645, dtype=float32)}


  1%|▏         | 13309/1000000 [36:56<27:10:03, 10.09it/s]

{'loss': Array(0.26127636, dtype=float32), 'loss_cross_entropy': Array(0.23674932, dtype=float32)}


  1%|▏         | 13319/1000000 [36:58<41:45:03,  6.56it/s]

{'loss': Array(0.26643416, dtype=float32), 'loss_cross_entropy': Array(0.23921092, dtype=float32)}


  1%|▏         | 13329/1000000 [36:59<28:44:25,  9.54it/s]

{'loss': Array(0.26709205, dtype=float32), 'loss_cross_entropy': Array(0.23930947, dtype=float32)}


  1%|▏         | 13339/1000000 [37:01<25:48:54, 10.62it/s]

{'loss': Array(0.2592475, dtype=float32), 'loss_cross_entropy': Array(0.23277397, dtype=float32)}


  1%|▏         | 13349/1000000 [37:02<35:15:33,  7.77it/s]

{'loss': Array(0.28452408, dtype=float32), 'loss_cross_entropy': Array(0.2568379, dtype=float32)}


  1%|▏         | 13359/1000000 [37:04<27:22:07, 10.01it/s]

{'loss': Array(0.23564127, dtype=float32), 'loss_cross_entropy': Array(0.21197012, dtype=float32)}


  1%|▏         | 13369/1000000 [37:05<40:31:15,  6.76it/s]

{'loss': Array(0.25750348, dtype=float32), 'loss_cross_entropy': Array(0.23223257, dtype=float32)}


  1%|▏         | 13379/1000000 [37:07<27:39:02,  9.91it/s]

{'loss': Array(0.2907302, dtype=float32), 'loss_cross_entropy': Array(0.2620431, dtype=float32)}


  1%|▏         | 13389/1000000 [37:08<25:59:21, 10.55it/s]

{'loss': Array(0.26353619, dtype=float32), 'loss_cross_entropy': Array(0.23812135, dtype=float32)}


  1%|▏         | 13399/1000000 [37:10<28:29:34,  9.62it/s]

{'loss': Array(0.25915954, dtype=float32), 'loss_cross_entropy': Array(0.23287034, dtype=float32)}


  1%|▏         | 13409/1000000 [37:11<25:54:39, 10.58it/s]

{'loss': Array(0.26486278, dtype=float32), 'loss_cross_entropy': Array(0.23857227, dtype=float32)}


  1%|▏         | 13419/1000000 [37:13<30:11:38,  9.08it/s]

{'loss': Array(0.24460073, dtype=float32), 'loss_cross_entropy': Array(0.22095326, dtype=float32)}


  1%|▏         | 13429/1000000 [37:14<25:51:51, 10.60it/s]

{'loss': Array(0.27007732, dtype=float32), 'loss_cross_entropy': Array(0.24285702, dtype=float32)}


  1%|▏         | 13439/1000000 [37:16<39:49:52,  6.88it/s]

{'loss': Array(0.24649349, dtype=float32), 'loss_cross_entropy': Array(0.22075926, dtype=float32)}


  1%|▏         | 13449/1000000 [37:17<27:15:11, 10.06it/s]

{'loss': Array(0.26559678, dtype=float32), 'loss_cross_entropy': Array(0.23976126, dtype=float32)}


  1%|▏         | 13459/1000000 [37:19<26:37:50, 10.29it/s]

{'loss': Array(0.28170967, dtype=float32), 'loss_cross_entropy': Array(0.25575492, dtype=float32)}


  1%|▏         | 13469/1000000 [37:20<33:11:48,  8.25it/s]

{'loss': Array(0.2504973, dtype=float32), 'loss_cross_entropy': Array(0.2261353, dtype=float32)}


  1%|▏         | 13479/1000000 [37:22<27:33:06,  9.95it/s]

{'loss': Array(0.24866107, dtype=float32), 'loss_cross_entropy': Array(0.22469659, dtype=float32)}


  1%|▏         | 13489/1000000 [37:24<40:58:25,  6.69it/s]

{'loss': Array(0.26263198, dtype=float32), 'loss_cross_entropy': Array(0.2378866, dtype=float32)}


  1%|▏         | 13499/1000000 [37:25<27:36:36,  9.92it/s]

{'loss': Array(0.25562534, dtype=float32), 'loss_cross_entropy': Array(0.22984743, dtype=float32)}


  1%|▏         | 13509/1000000 [37:33<89:26:59,  3.06it/s] 

{'loss': Array(0.25447187, dtype=float32), 'loss_cross_entropy': Array(0.23023227, dtype=float32)}


  1%|▏         | 13519/1000000 [37:34<41:56:40,  6.53it/s]

{'loss': Array(0.26891282, dtype=float32), 'loss_cross_entropy': Array(0.24367936, dtype=float32)}


  1%|▏         | 13529/1000000 [37:36<28:16:10,  9.69it/s]

{'loss': Array(0.27492702, dtype=float32), 'loss_cross_entropy': Array(0.24778163, dtype=float32)}


  1%|▏         | 13539/1000000 [37:38<34:47:34,  7.88it/s]

{'loss': Array(0.2801395, dtype=float32), 'loss_cross_entropy': Array(0.25294667, dtype=float32)}


  1%|▏         | 13549/1000000 [37:39<26:46:00, 10.24it/s]

{'loss': Array(0.24719386, dtype=float32), 'loss_cross_entropy': Array(0.22240165, dtype=float32)}


  1%|▏         | 13559/1000000 [37:41<39:47:55,  6.88it/s]

{'loss': Array(0.26196763, dtype=float32), 'loss_cross_entropy': Array(0.23695318, dtype=float32)}


  1%|▏         | 13569/1000000 [37:42<27:28:20,  9.97it/s]

{'loss': Array(0.27626285, dtype=float32), 'loss_cross_entropy': Array(0.25043318, dtype=float32)}


  1%|▏         | 13579/1000000 [37:43<25:50:20, 10.60it/s]

{'loss': Array(0.28767133, dtype=float32), 'loss_cross_entropy': Array(0.26034233, dtype=float32)}


  1%|▏         | 13589/1000000 [37:45<28:33:52,  9.59it/s]

{'loss': Array(0.2693161, dtype=float32), 'loss_cross_entropy': Array(0.24281524, dtype=float32)}


  1%|▏         | 13599/1000000 [37:46<25:23:26, 10.79it/s]

{'loss': Array(0.25824335, dtype=float32), 'loss_cross_entropy': Array(0.23185323, dtype=float32)}


  1%|▏         | 13609/1000000 [37:48<41:39:32,  6.58it/s]

{'loss': Array(0.2858727, dtype=float32), 'loss_cross_entropy': Array(0.25773516, dtype=float32)}


  1%|▏         | 13619/1000000 [37:49<28:15:01,  9.70it/s]

{'loss': Array(0.2502942, dtype=float32), 'loss_cross_entropy': Array(0.22534569, dtype=float32)}


  1%|▏         | 13629/1000000 [37:51<26:28:07, 10.35it/s]

{'loss': Array(0.27229795, dtype=float32), 'loss_cross_entropy': Array(0.24587174, dtype=float32)}


  1%|▏         | 13639/1000000 [37:52<30:07:48,  9.09it/s]

{'loss': Array(0.26674724, dtype=float32), 'loss_cross_entropy': Array(0.24244773, dtype=float32)}


  1%|▏         | 13649/1000000 [37:54<26:49:13, 10.22it/s]

{'loss': Array(0.25818953, dtype=float32), 'loss_cross_entropy': Array(0.23221083, dtype=float32)}


  1%|▏         | 13659/1000000 [37:56<36:26:06,  7.52it/s]

{'loss': Array(0.24647658, dtype=float32), 'loss_cross_entropy': Array(0.22142784, dtype=float32)}


  1%|▏         | 13669/1000000 [37:57<28:01:58,  9.77it/s]

{'loss': Array(0.27359268, dtype=float32), 'loss_cross_entropy': Array(0.24814503, dtype=float32)}


  1%|▏         | 13679/1000000 [37:59<41:49:48,  6.55it/s]

{'loss': Array(0.24502876, dtype=float32), 'loss_cross_entropy': Array(0.2198712, dtype=float32)}


  1%|▏         | 13689/1000000 [38:00<29:05:33,  9.42it/s]

{'loss': Array(0.24239242, dtype=float32), 'loss_cross_entropy': Array(0.21849516, dtype=float32)}


  1%|▏         | 13699/1000000 [38:02<26:21:19, 10.40it/s]

{'loss': Array(0.23518406, dtype=float32), 'loss_cross_entropy': Array(0.21156822, dtype=float32)}


  1%|▏         | 13709/1000000 [38:03<33:30:24,  8.18it/s]

{'loss': Array(0.28343734, dtype=float32), 'loss_cross_entropy': Array(0.25565884, dtype=float32)}


  1%|▏         | 13719/1000000 [38:05<26:20:44, 10.40it/s]

{'loss': Array(0.28960958, dtype=float32), 'loss_cross_entropy': Array(0.2616663, dtype=float32)}


  1%|▏         | 13729/1000000 [38:06<36:10:23,  7.57it/s]

{'loss': Array(0.22472887, dtype=float32), 'loss_cross_entropy': Array(0.20128453, dtype=float32)}


  1%|▏         | 13739/1000000 [38:08<28:13:15,  9.71it/s]

{'loss': Array(0.26761678, dtype=float32), 'loss_cross_entropy': Array(0.24213068, dtype=float32)}


  1%|▏         | 13749/1000000 [38:09<25:50:37, 10.60it/s]

{'loss': Array(0.27312756, dtype=float32), 'loss_cross_entropy': Array(0.24798036, dtype=float32)}


  1%|▏         | 13759/1000000 [38:11<28:56:46,  9.46it/s]

{'loss': Array(0.26185212, dtype=float32), 'loss_cross_entropy': Array(0.2371112, dtype=float32)}


  1%|▏         | 13769/1000000 [38:12<25:56:41, 10.56it/s]

{'loss': Array(0.2598713, dtype=float32), 'loss_cross_entropy': Array(0.23477702, dtype=float32)}


  1%|▏         | 13779/1000000 [38:14<40:29:58,  6.76it/s]

{'loss': Array(0.2788919, dtype=float32), 'loss_cross_entropy': Array(0.25114262, dtype=float32)}


  1%|▏         | 13789/1000000 [38:15<27:49:52,  9.84it/s]

{'loss': Array(0.2364097, dtype=float32), 'loss_cross_entropy': Array(0.21285787, dtype=float32)}


  1%|▏         | 13799/1000000 [38:17<25:39:07, 10.68it/s]

{'loss': Array(0.25666636, dtype=float32), 'loss_cross_entropy': Array(0.23277675, dtype=float32)}


  1%|▏         | 13809/1000000 [38:19<33:08:09,  8.27it/s]

{'loss': Array(0.25025004, dtype=float32), 'loss_cross_entropy': Array(0.22623315, dtype=float32)}


  1%|▏         | 13819/1000000 [38:20<26:48:44, 10.22it/s]

{'loss': Array(0.26373526, dtype=float32), 'loss_cross_entropy': Array(0.23915432, dtype=float32)}


  1%|▏         | 13829/1000000 [38:22<36:09:13,  7.58it/s]

{'loss': Array(0.26840693, dtype=float32), 'loss_cross_entropy': Array(0.24371564, dtype=float32)}


  1%|▏         | 13839/1000000 [38:23<27:55:34,  9.81it/s]

{'loss': Array(0.27332747, dtype=float32), 'loss_cross_entropy': Array(0.24789762, dtype=float32)}


  1%|▏         | 13849/1000000 [38:25<36:03:33,  7.60it/s]

{'loss': Array(0.2577599, dtype=float32), 'loss_cross_entropy': Array(0.23445742, dtype=float32)}


  1%|▏         | 13859/1000000 [38:26<27:12:30, 10.07it/s]

{'loss': Array(0.24502997, dtype=float32), 'loss_cross_entropy': Array(0.22006734, dtype=float32)}


  1%|▏         | 13869/1000000 [38:27<25:47:20, 10.62it/s]

{'loss': Array(0.2642916, dtype=float32), 'loss_cross_entropy': Array(0.23900595, dtype=float32)}


  1%|▏         | 13879/1000000 [38:29<30:44:57,  8.91it/s]

{'loss': Array(0.25074562, dtype=float32), 'loss_cross_entropy': Array(0.22610343, dtype=float32)}


  1%|▏         | 13889/1000000 [38:31<26:17:44, 10.42it/s]

{'loss': Array(0.26091406, dtype=float32), 'loss_cross_entropy': Array(0.23697412, dtype=float32)}


  1%|▏         | 13899/1000000 [38:32<32:44:55,  8.36it/s]

{'loss': Array(0.26436207, dtype=float32), 'loss_cross_entropy': Array(0.23764808, dtype=float32)}


  1%|▏         | 13909/1000000 [38:34<27:12:58, 10.06it/s]

{'loss': Array(0.25184152, dtype=float32), 'loss_cross_entropy': Array(0.22750442, dtype=float32)}


  1%|▏         | 13919/1000000 [38:35<39:47:51,  6.88it/s]

{'loss': Array(0.25423023, dtype=float32), 'loss_cross_entropy': Array(0.22950438, dtype=float32)}


  1%|▏         | 13929/1000000 [38:37<27:51:49,  9.83it/s]

{'loss': Array(0.26018378, dtype=float32), 'loss_cross_entropy': Array(0.23551698, dtype=float32)}


  1%|▏         | 13939/1000000 [38:38<41:04:32,  6.67it/s]

{'loss': Array(0.26792878, dtype=float32), 'loss_cross_entropy': Array(0.24188414, dtype=float32)}


  1%|▏         | 13949/1000000 [38:40<27:45:51,  9.87it/s]

{'loss': Array(0.2606844, dtype=float32), 'loss_cross_entropy': Array(0.23551574, dtype=float32)}


  1%|▏         | 13959/1000000 [38:41<25:54:36, 10.57it/s]

{'loss': Array(0.25281236, dtype=float32), 'loss_cross_entropy': Array(0.22796094, dtype=float32)}


  1%|▏         | 13969/1000000 [38:43<35:41:26,  7.67it/s]

{'loss': Array(0.26369625, dtype=float32), 'loss_cross_entropy': Array(0.23797755, dtype=float32)}


  1%|▏         | 13979/1000000 [38:44<26:43:53, 10.25it/s]

{'loss': Array(0.25347587, dtype=float32), 'loss_cross_entropy': Array(0.22861312, dtype=float32)}


  1%|▏         | 13989/1000000 [38:46<40:16:43,  6.80it/s]

{'loss': Array(0.26101527, dtype=float32), 'loss_cross_entropy': Array(0.23631163, dtype=float32)}


  1%|▏         | 13999/1000000 [38:47<27:51:08,  9.83it/s]

{'loss': Array(0.25535306, dtype=float32), 'loss_cross_entropy': Array(0.23185793, dtype=float32)}


  1%|▏         | 14009/1000000 [38:55<90:46:02,  3.02it/s] 

{'loss': Array(0.26355457, dtype=float32), 'loss_cross_entropy': Array(0.23854144, dtype=float32)}


  1%|▏         | 14019/1000000 [38:57<39:23:57,  6.95it/s] 

{'loss': Array(0.2478137, dtype=float32), 'loss_cross_entropy': Array(0.22294386, dtype=float32)}


  1%|▏         | 14029/1000000 [38:58<28:22:33,  9.65it/s]

{'loss': Array(0.28206265, dtype=float32), 'loss_cross_entropy': Array(0.25495544, dtype=float32)}


  1%|▏         | 14039/1000000 [39:00<30:40:56,  8.93it/s]

{'loss': Array(0.2633979, dtype=float32), 'loss_cross_entropy': Array(0.23836806, dtype=float32)}


  1%|▏         | 14049/1000000 [39:01<26:46:48, 10.23it/s]

{'loss': Array(0.25589487, dtype=float32), 'loss_cross_entropy': Array(0.231286, dtype=float32)}


  1%|▏         | 14059/1000000 [39:03<42:00:35,  6.52it/s]

{'loss': Array(0.27311245, dtype=float32), 'loss_cross_entropy': Array(0.24654447, dtype=float32)}


  1%|▏         | 14069/1000000 [39:05<27:56:33,  9.80it/s]

{'loss': Array(0.28059545, dtype=float32), 'loss_cross_entropy': Array(0.25509933, dtype=float32)}


  1%|▏         | 14079/1000000 [39:06<25:22:53, 10.79it/s]

{'loss': Array(0.25073645, dtype=float32), 'loss_cross_entropy': Array(0.2263815, dtype=float32)}


  1%|▏         | 14089/1000000 [39:08<35:34:53,  7.70it/s]

{'loss': Array(0.27341637, dtype=float32), 'loss_cross_entropy': Array(0.24785125, dtype=float32)}


  1%|▏         | 14099/1000000 [39:09<27:40:24,  9.90it/s]

{'loss': Array(0.26307315, dtype=float32), 'loss_cross_entropy': Array(0.23806535, dtype=float32)}


  1%|▏         | 14109/1000000 [39:11<40:54:44,  6.69it/s]

{'loss': Array(0.2650183, dtype=float32), 'loss_cross_entropy': Array(0.23921748, dtype=float32)}


  1%|▏         | 14119/1000000 [39:12<27:52:12,  9.83it/s]

{'loss': Array(0.27632508, dtype=float32), 'loss_cross_entropy': Array(0.24982233, dtype=float32)}


  1%|▏         | 14129/1000000 [39:14<26:31:55, 10.32it/s]

{'loss': Array(0.25702664, dtype=float32), 'loss_cross_entropy': Array(0.23208164, dtype=float32)}


  1%|▏         | 14139/1000000 [39:15<30:38:57,  8.93it/s]

{'loss': Array(0.28727248, dtype=float32), 'loss_cross_entropy': Array(0.25913087, dtype=float32)}


  1%|▏         | 14149/1000000 [39:17<25:43:24, 10.65it/s]

{'loss': Array(0.24590449, dtype=float32), 'loss_cross_entropy': Array(0.22278483, dtype=float32)}


  1%|▏         | 14159/1000000 [39:18<36:28:14,  7.51it/s]

{'loss': Array(0.24373336, dtype=float32), 'loss_cross_entropy': Array(0.21839677, dtype=float32)}


  1%|▏         | 14169/1000000 [39:20<27:34:30,  9.93it/s]

{'loss': Array(0.2646451, dtype=float32), 'loss_cross_entropy': Array(0.23883288, dtype=float32)}


  1%|▏         | 14179/1000000 [39:21<26:01:11, 10.52it/s]

{'loss': Array(0.25915307, dtype=float32), 'loss_cross_entropy': Array(0.23376627, dtype=float32)}


  1%|▏         | 14189/1000000 [39:23<33:43:25,  8.12it/s]

{'loss': Array(0.27190956, dtype=float32), 'loss_cross_entropy': Array(0.24711826, dtype=float32)}


  1%|▏         | 14199/1000000 [39:24<26:28:39, 10.34it/s]

{'loss': Array(0.26056919, dtype=float32), 'loss_cross_entropy': Array(0.23462494, dtype=float32)}


  1%|▏         | 14209/1000000 [39:26<25:30:47, 10.73it/s]

{'loss': Array(0.26797035, dtype=float32), 'loss_cross_entropy': Array(0.24242258, dtype=float32)}


  1%|▏         | 14219/1000000 [39:27<28:45:38,  9.52it/s]

{'loss': Array(0.28779316, dtype=float32), 'loss_cross_entropy': Array(0.26069507, dtype=float32)}


  1%|▏         | 14229/1000000 [39:29<26:08:42, 10.47it/s]

{'loss': Array(0.2819657, dtype=float32), 'loss_cross_entropy': Array(0.25464204, dtype=float32)}


  1%|▏         | 14239/1000000 [39:31<33:13:21,  8.24it/s]

{'loss': Array(0.26804247, dtype=float32), 'loss_cross_entropy': Array(0.242919, dtype=float32)}


  1%|▏         | 14249/1000000 [39:32<26:44:38, 10.24it/s]

{'loss': Array(0.25692454, dtype=float32), 'loss_cross_entropy': Array(0.23135646, dtype=float32)}


  1%|▏         | 14259/1000000 [39:34<40:40:11,  6.73it/s]

{'loss': Array(0.2311456, dtype=float32), 'loss_cross_entropy': Array(0.20811196, dtype=float32)}


  1%|▏         | 14269/1000000 [39:35<27:47:22,  9.85it/s]

{'loss': Array(0.26759723, dtype=float32), 'loss_cross_entropy': Array(0.24170049, dtype=float32)}


  1%|▏         | 14279/1000000 [39:37<40:50:05,  6.71it/s]

{'loss': Array(0.25303105, dtype=float32), 'loss_cross_entropy': Array(0.2280535, dtype=float32)}


  1%|▏         | 14289/1000000 [39:38<28:35:00,  9.58it/s]

{'loss': Array(0.278394, dtype=float32), 'loss_cross_entropy': Array(0.25164524, dtype=float32)}


  1%|▏         | 14299/1000000 [39:40<25:45:56, 10.63it/s]

{'loss': Array(0.23562796, dtype=float32), 'loss_cross_entropy': Array(0.21204968, dtype=float32)}


  1%|▏         | 14309/1000000 [39:41<35:37:13,  7.69it/s]

{'loss': Array(0.24671021, dtype=float32), 'loss_cross_entropy': Array(0.22271931, dtype=float32)}


  1%|▏         | 14319/1000000 [39:43<29:18:51,  9.34it/s]

{'loss': Array(0.2713809, dtype=float32), 'loss_cross_entropy': Array(0.24594752, dtype=float32)}


  1%|▏         | 14329/1000000 [39:44<40:28:33,  6.76it/s]

{'loss': Array(0.23912232, dtype=float32), 'loss_cross_entropy': Array(0.21506405, dtype=float32)}


  1%|▏         | 14339/1000000 [39:46<28:28:27,  9.62it/s]

{'loss': Array(0.2568966, dtype=float32), 'loss_cross_entropy': Array(0.23065534, dtype=float32)}


  1%|▏         | 14349/1000000 [39:47<26:09:44, 10.47it/s]

{'loss': Array(0.26100454, dtype=float32), 'loss_cross_entropy': Array(0.23417096, dtype=float32)}


  1%|▏         | 14359/1000000 [39:49<28:45:41,  9.52it/s]

{'loss': Array(0.2672375, dtype=float32), 'loss_cross_entropy': Array(0.2404534, dtype=float32)}


  1%|▏         | 14369/1000000 [39:50<25:51:24, 10.59it/s]

{'loss': Array(0.2679219, dtype=float32), 'loss_cross_entropy': Array(0.24285693, dtype=float32)}


  1%|▏         | 14379/1000000 [39:52<28:28:46,  9.61it/s]

{'loss': Array(0.25851688, dtype=float32), 'loss_cross_entropy': Array(0.2329876, dtype=float32)}


  1%|▏         | 14389/1000000 [39:53<26:31:58, 10.32it/s]

{'loss': Array(0.26294774, dtype=float32), 'loss_cross_entropy': Array(0.23704664, dtype=float32)}


  1%|▏         | 14399/1000000 [39:55<40:14:14,  6.80it/s]

{'loss': Array(0.2564776, dtype=float32), 'loss_cross_entropy': Array(0.23087302, dtype=float32)}


  1%|▏         | 14409/1000000 [39:56<27:23:16, 10.00it/s]

{'loss': Array(0.22616287, dtype=float32), 'loss_cross_entropy': Array(0.20298985, dtype=float32)}


  1%|▏         | 14419/1000000 [39:58<27:14:58, 10.05it/s]

{'loss': Array(0.27684376, dtype=float32), 'loss_cross_entropy': Array(0.24989705, dtype=float32)}


  1%|▏         | 14429/1000000 [39:59<34:21:07,  7.97it/s]

{'loss': Array(0.25815466, dtype=float32), 'loss_cross_entropy': Array(0.2324549, dtype=float32)}


  1%|▏         | 14439/1000000 [40:01<26:45:02, 10.23it/s]

{'loss': Array(0.26685753, dtype=float32), 'loss_cross_entropy': Array(0.2396717, dtype=float32)}


  1%|▏         | 14449/1000000 [40:02<40:24:44,  6.77it/s]

{'loss': Array(0.25946328, dtype=float32), 'loss_cross_entropy': Array(0.23421359, dtype=float32)}


  1%|▏         | 14459/1000000 [40:04<28:23:02,  9.64it/s]

{'loss': Array(0.23944233, dtype=float32), 'loss_cross_entropy': Array(0.21531986, dtype=float32)}


  1%|▏         | 14469/1000000 [40:05<26:00:43, 10.52it/s]

{'loss': Array(0.25523645, dtype=float32), 'loss_cross_entropy': Array(0.23000908, dtype=float32)}


  1%|▏         | 14479/1000000 [40:07<30:04:27,  9.10it/s]

{'loss': Array(0.26026902, dtype=float32), 'loss_cross_entropy': Array(0.2352039, dtype=float32)}


  1%|▏         | 14489/1000000 [40:08<26:45:17, 10.23it/s]

{'loss': Array(0.26464295, dtype=float32), 'loss_cross_entropy': Array(0.23924883, dtype=float32)}


  1%|▏         | 14499/1000000 [40:10<36:06:17,  7.58it/s]

{'loss': Array(0.27792853, dtype=float32), 'loss_cross_entropy': Array(0.25117207, dtype=float32)}


  1%|▏         | 14509/1000000 [40:18<90:51:00,  3.01it/s] 

{'loss': Array(0.24430929, dtype=float32), 'loss_cross_entropy': Array(0.22018914, dtype=float32)}


  1%|▏         | 14519/1000000 [40:20<46:32:35,  5.88it/s]

{'loss': Array(0.24256821, dtype=float32), 'loss_cross_entropy': Array(0.21761906, dtype=float32)}


  1%|▏         | 14529/1000000 [40:21<28:43:14,  9.53it/s]

{'loss': Array(0.26876092, dtype=float32), 'loss_cross_entropy': Array(0.24315779, dtype=float32)}


  1%|▏         | 14539/1000000 [40:22<26:03:22, 10.51it/s]

{'loss': Array(0.27094448, dtype=float32), 'loss_cross_entropy': Array(0.24479257, dtype=float32)}


  1%|▏         | 14549/1000000 [40:24<32:36:16,  8.40it/s]

{'loss': Array(0.2432236, dtype=float32), 'loss_cross_entropy': Array(0.2199965, dtype=float32)}


  1%|▏         | 14559/1000000 [40:25<26:31:32, 10.32it/s]

{'loss': Array(0.2786842, dtype=float32), 'loss_cross_entropy': Array(0.25300303, dtype=float32)}


  1%|▏         | 14569/1000000 [40:27<25:12:22, 10.86it/s]

{'loss': Array(0.28625545, dtype=float32), 'loss_cross_entropy': Array(0.25992617, dtype=float32)}


  1%|▏         | 14579/1000000 [40:28<28:59:33,  9.44it/s]

{'loss': Array(0.25958186, dtype=float32), 'loss_cross_entropy': Array(0.2350066, dtype=float32)}


  1%|▏         | 14589/1000000 [40:30<25:43:55, 10.64it/s]

{'loss': Array(0.24538735, dtype=float32), 'loss_cross_entropy': Array(0.22075449, dtype=float32)}


  1%|▏         | 14599/1000000 [40:31<32:51:59,  8.33it/s]

{'loss': Array(0.28127927, dtype=float32), 'loss_cross_entropy': Array(0.25428253, dtype=float32)}


  1%|▏         | 14609/1000000 [40:33<27:48:01,  9.85it/s]

{'loss': Array(0.27129173, dtype=float32), 'loss_cross_entropy': Array(0.24710396, dtype=float32)}


  1%|▏         | 14619/1000000 [40:35<36:12:31,  7.56it/s]

{'loss': Array(0.28775167, dtype=float32), 'loss_cross_entropy': Array(0.2602984, dtype=float32)}


  1%|▏         | 14629/1000000 [40:36<27:22:00, 10.00it/s]

{'loss': Array(0.25552246, dtype=float32), 'loss_cross_entropy': Array(0.22969575, dtype=float32)}


  1%|▏         | 14639/1000000 [40:37<25:42:49, 10.64it/s]

{'loss': Array(0.2774786, dtype=float32), 'loss_cross_entropy': Array(0.25187692, dtype=float32)}


  1%|▏         | 14649/1000000 [40:39<33:53:28,  8.08it/s]

{'loss': Array(0.26453364, dtype=float32), 'loss_cross_entropy': Array(0.23934858, dtype=float32)}


  1%|▏         | 14659/1000000 [40:41<27:02:36, 10.12it/s]

{'loss': Array(0.27310213, dtype=float32), 'loss_cross_entropy': Array(0.24676459, dtype=float32)}


  1%|▏         | 14669/1000000 [40:42<25:41:36, 10.65it/s]

{'loss': Array(0.28867543, dtype=float32), 'loss_cross_entropy': Array(0.26132798, dtype=float32)}


  1%|▏         | 14679/1000000 [40:44<29:05:03,  9.41it/s]

{'loss': Array(0.25546992, dtype=float32), 'loss_cross_entropy': Array(0.22869657, dtype=float32)}


  1%|▏         | 14689/1000000 [40:45<26:07:08, 10.48it/s]

{'loss': Array(0.25546834, dtype=float32), 'loss_cross_entropy': Array(0.23059288, dtype=float32)}


  1%|▏         | 14699/1000000 [40:47<32:38:06,  8.39it/s]

{'loss': Array(0.260849, dtype=float32), 'loss_cross_entropy': Array(0.23655823, dtype=float32)}


  1%|▏         | 14709/1000000 [40:48<26:49:41, 10.20it/s]

{'loss': Array(0.2583749, dtype=float32), 'loss_cross_entropy': Array(0.23450135, dtype=float32)}


  1%|▏         | 14719/1000000 [40:50<40:10:19,  6.81it/s]

{'loss': Array(0.25567976, dtype=float32), 'loss_cross_entropy': Array(0.23132065, dtype=float32)}


  1%|▏         | 14729/1000000 [40:51<28:09:38,  9.72it/s]

{'loss': Array(0.25679672, dtype=float32), 'loss_cross_entropy': Array(0.23112324, dtype=float32)}


  1%|▏         | 14739/1000000 [40:53<28:14:41,  9.69it/s]

{'loss': Array(0.25917512, dtype=float32), 'loss_cross_entropy': Array(0.23470536, dtype=float32)}


  1%|▏         | 14749/1000000 [40:54<35:47:32,  7.65it/s]

{'loss': Array(0.25461596, dtype=float32), 'loss_cross_entropy': Array(0.22986059, dtype=float32)}


  1%|▏         | 14759/1000000 [40:56<27:00:57, 10.13it/s]

{'loss': Array(0.24375, dtype=float32), 'loss_cross_entropy': Array(0.22033985, dtype=float32)}


  1%|▏         | 14769/1000000 [40:57<25:06:30, 10.90it/s]

{'loss': Array(0.2732454, dtype=float32), 'loss_cross_entropy': Array(0.24684624, dtype=float32)}


  1%|▏         | 14779/1000000 [40:59<30:42:26,  8.91it/s]

{'loss': Array(0.25634223, dtype=float32), 'loss_cross_entropy': Array(0.2309635, dtype=float32)}


  1%|▏         | 14789/1000000 [41:00<26:55:49, 10.16it/s]

{'loss': Array(0.26555043, dtype=float32), 'loss_cross_entropy': Array(0.24011497, dtype=float32)}


  1%|▏         | 14799/1000000 [41:02<32:12:55,  8.49it/s]

{'loss': Array(0.25015625, dtype=float32), 'loss_cross_entropy': Array(0.22639532, dtype=float32)}


  1%|▏         | 14809/1000000 [41:03<26:39:24, 10.27it/s]

{'loss': Array(0.25091892, dtype=float32), 'loss_cross_entropy': Array(0.22449937, dtype=float32)}


  1%|▏         | 14819/1000000 [41:05<25:50:37, 10.59it/s]

{'loss': Array(0.27807608, dtype=float32), 'loss_cross_entropy': Array(0.25086507, dtype=float32)}


  1%|▏         | 14829/1000000 [41:06<29:42:17,  9.21it/s]

{'loss': Array(0.26111647, dtype=float32), 'loss_cross_entropy': Array(0.23460126, dtype=float32)}


  1%|▏         | 14839/1000000 [41:08<27:41:37,  9.88it/s]

{'loss': Array(0.24219099, dtype=float32), 'loss_cross_entropy': Array(0.21879624, dtype=float32)}


  1%|▏         | 14849/1000000 [41:10<35:37:10,  7.68it/s]

{'loss': Array(0.25231853, dtype=float32), 'loss_cross_entropy': Array(0.22709146, dtype=float32)}


  1%|▏         | 14859/1000000 [41:11<26:31:37, 10.32it/s]

{'loss': Array(0.2734674, dtype=float32), 'loss_cross_entropy': Array(0.2499547, dtype=float32)}


  1%|▏         | 14869/1000000 [41:12<25:49:35, 10.60it/s]

{'loss': Array(0.24145702, dtype=float32), 'loss_cross_entropy': Array(0.21742637, dtype=float32)}


  1%|▏         | 14879/1000000 [41:14<29:12:14,  9.37it/s]

{'loss': Array(0.26328078, dtype=float32), 'loss_cross_entropy': Array(0.23867284, dtype=float32)}


  1%|▏         | 14889/1000000 [41:15<25:56:49, 10.55it/s]

{'loss': Array(0.2507423, dtype=float32), 'loss_cross_entropy': Array(0.22512043, dtype=float32)}


  1%|▏         | 14899/1000000 [41:17<30:07:45,  9.08it/s]

{'loss': Array(0.27922884, dtype=float32), 'loss_cross_entropy': Array(0.2537448, dtype=float32)}


  1%|▏         | 14909/1000000 [41:18<27:09:19, 10.08it/s]

{'loss': Array(0.26785347, dtype=float32), 'loss_cross_entropy': Array(0.24256332, dtype=float32)}


  1%|▏         | 14919/1000000 [41:20<40:47:58,  6.71it/s]

{'loss': Array(0.26771662, dtype=float32), 'loss_cross_entropy': Array(0.2420378, dtype=float32)}


  1%|▏         | 14929/1000000 [41:22<27:59:33,  9.78it/s]

{'loss': Array(0.27290043, dtype=float32), 'loss_cross_entropy': Array(0.2483108, dtype=float32)}


  1%|▏         | 14939/1000000 [41:23<26:40:43, 10.26it/s]

{'loss': Array(0.26125503, dtype=float32), 'loss_cross_entropy': Array(0.23581731, dtype=float32)}


  1%|▏         | 14949/1000000 [41:25<32:49:45,  8.33it/s]

{'loss': Array(0.24505503, dtype=float32), 'loss_cross_entropy': Array(0.22018778, dtype=float32)}


  1%|▏         | 14959/1000000 [41:26<26:12:40, 10.44it/s]

{'loss': Array(0.2570843, dtype=float32), 'loss_cross_entropy': Array(0.23256889, dtype=float32)}


  1%|▏         | 14969/1000000 [41:28<37:04:22,  7.38it/s]

{'loss': Array(0.25018856, dtype=float32), 'loss_cross_entropy': Array(0.22670208, dtype=float32)}


  1%|▏         | 14979/1000000 [41:29<27:19:56, 10.01it/s]

{'loss': Array(0.23717153, dtype=float32), 'loss_cross_entropy': Array(0.21311732, dtype=float32)}


  1%|▏         | 14989/1000000 [41:31<35:59:27,  7.60it/s]

{'loss': Array(0.23831931, dtype=float32), 'loss_cross_entropy': Array(0.21528482, dtype=float32)}


  1%|▏         | 14999/1000000 [41:32<27:21:14, 10.00it/s]

{'loss': Array(0.2489473, dtype=float32), 'loss_cross_entropy': Array(0.22468884, dtype=float32)}


  2%|▏         | 15009/1000000 [41:40<89:31:13,  3.06it/s] 

{'loss': Array(0.27366886, dtype=float32), 'loss_cross_entropy': Array(0.24676959, dtype=float32)}


  2%|▏         | 15019/1000000 [41:42<41:04:34,  6.66it/s]

{'loss': Array(0.24914923, dtype=float32), 'loss_cross_entropy': Array(0.22490154, dtype=float32)}


  2%|▏         | 15029/1000000 [41:43<28:31:56,  9.59it/s]

{'loss': Array(0.2530388, dtype=float32), 'loss_cross_entropy': Array(0.2274671, dtype=float32)}


  2%|▏         | 15039/1000000 [41:45<35:53:58,  7.62it/s]

{'loss': Array(0.26634383, dtype=float32), 'loss_cross_entropy': Array(0.24116397, dtype=float32)}


  2%|▏         | 15049/1000000 [41:46<27:03:13, 10.11it/s]

{'loss': Array(0.27029896, dtype=float32), 'loss_cross_entropy': Array(0.24367113, dtype=float32)}


  2%|▏         | 15059/1000000 [41:48<42:00:04,  6.51it/s]

{'loss': Array(0.2526154, dtype=float32), 'loss_cross_entropy': Array(0.2290653, dtype=float32)}


  2%|▏         | 15069/1000000 [41:49<27:41:14,  9.88it/s]

{'loss': Array(0.25885817, dtype=float32), 'loss_cross_entropy': Array(0.23386946, dtype=float32)}


  2%|▏         | 15079/1000000 [41:51<25:29:46, 10.73it/s]

{'loss': Array(0.26976195, dtype=float32), 'loss_cross_entropy': Array(0.24336337, dtype=float32)}


  2%|▏         | 15089/1000000 [41:52<32:18:08,  8.47it/s]

{'loss': Array(0.26070982, dtype=float32), 'loss_cross_entropy': Array(0.23611367, dtype=float32)}


  2%|▏         | 15099/1000000 [41:54<27:13:47, 10.05it/s]

{'loss': Array(0.23795724, dtype=float32), 'loss_cross_entropy': Array(0.21420336, dtype=float32)}


  2%|▏         | 15109/1000000 [41:55<36:49:04,  7.43it/s]

{'loss': Array(0.25034815, dtype=float32), 'loss_cross_entropy': Array(0.22762202, dtype=float32)}


  2%|▏         | 15119/1000000 [41:57<27:29:52,  9.95it/s]

{'loss': Array(0.2365549, dtype=float32), 'loss_cross_entropy': Array(0.2117627, dtype=float32)}


  2%|▏         | 15129/1000000 [41:58<26:44:08, 10.23it/s]

{'loss': Array(0.2546498, dtype=float32), 'loss_cross_entropy': Array(0.22977965, dtype=float32)}


  2%|▏         | 15139/1000000 [42:00<29:12:37,  9.37it/s]

{'loss': Array(0.26437232, dtype=float32), 'loss_cross_entropy': Array(0.23803945, dtype=float32)}


  2%|▏         | 15149/1000000 [42:01<25:40:52, 10.65it/s]

{'loss': Array(0.2420276, dtype=float32), 'loss_cross_entropy': Array(0.2179483, dtype=float32)}


  2%|▏         | 15159/1000000 [42:03<41:30:43,  6.59it/s]

{'loss': Array(0.24559398, dtype=float32), 'loss_cross_entropy': Array(0.22257808, dtype=float32)}


  2%|▏         | 15169/1000000 [42:04<28:30:29,  9.60it/s]

{'loss': Array(0.2674782, dtype=float32), 'loss_cross_entropy': Array(0.24181263, dtype=float32)}


  2%|▏         | 15179/1000000 [42:06<26:32:48, 10.30it/s]

{'loss': Array(0.2678576, dtype=float32), 'loss_cross_entropy': Array(0.24257837, dtype=float32)}


  2%|▏         | 15189/1000000 [42:08<35:25:19,  7.72it/s]

{'loss': Array(0.26053688, dtype=float32), 'loss_cross_entropy': Array(0.23615848, dtype=float32)}


  2%|▏         | 15199/1000000 [42:09<27:15:22, 10.04it/s]

{'loss': Array(0.28724578, dtype=float32), 'loss_cross_entropy': Array(0.26073423, dtype=float32)}


  2%|▏         | 15209/1000000 [42:11<36:07:45,  7.57it/s]

{'loss': Array(0.25928134, dtype=float32), 'loss_cross_entropy': Array(0.23451555, dtype=float32)}


  2%|▏         | 15219/1000000 [42:12<26:33:34, 10.30it/s]

{'loss': Array(0.24742809, dtype=float32), 'loss_cross_entropy': Array(0.22274451, dtype=float32)}


  2%|▏         | 15229/1000000 [42:14<36:17:42,  7.54it/s]

{'loss': Array(0.25633505, dtype=float32), 'loss_cross_entropy': Array(0.23064797, dtype=float32)}


  2%|▏         | 15239/1000000 [42:15<27:28:19,  9.96it/s]

{'loss': Array(0.24799645, dtype=float32), 'loss_cross_entropy': Array(0.22286312, dtype=float32)}


  2%|▏         | 15249/1000000 [42:17<26:12:03, 10.44it/s]

{'loss': Array(0.2461596, dtype=float32), 'loss_cross_entropy': Array(0.22083703, dtype=float32)}


  2%|▏         | 15259/1000000 [42:18<31:04:42,  8.80it/s]

{'loss': Array(0.23939715, dtype=float32), 'loss_cross_entropy': Array(0.21755575, dtype=float32)}


  2%|▏         | 15269/1000000 [42:20<25:59:48, 10.52it/s]

{'loss': Array(0.26713237, dtype=float32), 'loss_cross_entropy': Array(0.24166155, dtype=float32)}


  2%|▏         | 15279/1000000 [42:21<32:18:41,  8.47it/s]

{'loss': Array(0.24368177, dtype=float32), 'loss_cross_entropy': Array(0.22021389, dtype=float32)}


  2%|▏         | 15289/1000000 [42:23<28:22:23,  9.64it/s]

{'loss': Array(0.25848922, dtype=float32), 'loss_cross_entropy': Array(0.23415759, dtype=float32)}


  2%|▏         | 15299/1000000 [42:24<39:58:30,  6.84it/s]

{'loss': Array(0.25801584, dtype=float32), 'loss_cross_entropy': Array(0.23243117, dtype=float32)}


  2%|▏         | 15309/1000000 [42:26<27:22:59,  9.99it/s]

{'loss': Array(0.25589114, dtype=float32), 'loss_cross_entropy': Array(0.23032339, dtype=float32)}


  2%|▏         | 15319/1000000 [42:27<39:58:33,  6.84it/s]

{'loss': Array(0.27953508, dtype=float32), 'loss_cross_entropy': Array(0.25295338, dtype=float32)}


  2%|▏         | 15329/1000000 [42:29<28:25:43,  9.62it/s]

{'loss': Array(0.25601727, dtype=float32), 'loss_cross_entropy': Array(0.23111014, dtype=float32)}


  2%|▏         | 15339/1000000 [42:30<25:56:57, 10.54it/s]

{'loss': Array(0.27186376, dtype=float32), 'loss_cross_entropy': Array(0.24708202, dtype=float32)}


  2%|▏         | 15349/1000000 [42:32<35:42:44,  7.66it/s]

{'loss': Array(0.24891126, dtype=float32), 'loss_cross_entropy': Array(0.22429073, dtype=float32)}


  2%|▏         | 15359/1000000 [42:33<27:20:21, 10.00it/s]

{'loss': Array(0.255996, dtype=float32), 'loss_cross_entropy': Array(0.23183618, dtype=float32)}


  2%|▏         | 15369/1000000 [42:35<40:06:56,  6.82it/s]

{'loss': Array(0.24447158, dtype=float32), 'loss_cross_entropy': Array(0.22007315, dtype=float32)}


  2%|▏         | 15379/1000000 [42:36<27:58:25,  9.78it/s]

{'loss': Array(0.24760659, dtype=float32), 'loss_cross_entropy': Array(0.22325459, dtype=float32)}


  2%|▏         | 15389/1000000 [42:38<26:54:00, 10.17it/s]

{'loss': Array(0.29676437, dtype=float32), 'loss_cross_entropy': Array(0.26939616, dtype=float32)}


  2%|▏         | 15399/1000000 [42:40<29:49:56,  9.17it/s]

{'loss': Array(0.24949782, dtype=float32), 'loss_cross_entropy': Array(0.22581437, dtype=float32)}


  2%|▏         | 15409/1000000 [42:41<26:18:10, 10.40it/s]

{'loss': Array(0.24734412, dtype=float32), 'loss_cross_entropy': Array(0.22301947, dtype=float32)}


  2%|▏         | 15419/1000000 [42:43<30:56:34,  8.84it/s]

{'loss': Array(0.23855844, dtype=float32), 'loss_cross_entropy': Array(0.21476035, dtype=float32)}


  2%|▏         | 15429/1000000 [42:44<26:16:29, 10.41it/s]

{'loss': Array(0.25237206, dtype=float32), 'loss_cross_entropy': Array(0.22827862, dtype=float32)}


  2%|▏         | 15439/1000000 [42:46<40:36:56,  6.73it/s]

{'loss': Array(0.278453, dtype=float32), 'loss_cross_entropy': Array(0.2520324, dtype=float32)}


  2%|▏         | 15449/1000000 [42:47<27:44:19,  9.86it/s]

{'loss': Array(0.23778306, dtype=float32), 'loss_cross_entropy': Array(0.21408693, dtype=float32)}


  2%|▏         | 15459/1000000 [42:49<26:01:51, 10.51it/s]

{'loss': Array(0.28044006, dtype=float32), 'loss_cross_entropy': Array(0.25396076, dtype=float32)}


  2%|▏         | 15469/1000000 [42:50<32:10:19,  8.50it/s]

{'loss': Array(0.25166065, dtype=float32), 'loss_cross_entropy': Array(0.22777823, dtype=float32)}


  2%|▏         | 15479/1000000 [42:52<26:05:39, 10.48it/s]

{'loss': Array(0.25713673, dtype=float32), 'loss_cross_entropy': Array(0.23336633, dtype=float32)}


  2%|▏         | 15489/1000000 [42:53<41:03:50,  6.66it/s]

{'loss': Array(0.25927284, dtype=float32), 'loss_cross_entropy': Array(0.23480792, dtype=float32)}


  2%|▏         | 15499/1000000 [42:55<27:55:41,  9.79it/s]

{'loss': Array(0.22812422, dtype=float32), 'loss_cross_entropy': Array(0.20556545, dtype=float32)}


  2%|▏         | 15509/1000000 [43:03<92:14:01,  2.96it/s] 

{'loss': Array(0.25970957, dtype=float32), 'loss_cross_entropy': Array(0.23347616, dtype=float32)}


  2%|▏         | 15519/1000000 [43:04<41:58:47,  6.51it/s] 

{'loss': Array(0.23988317, dtype=float32), 'loss_cross_entropy': Array(0.21616681, dtype=float32)}


  2%|▏         | 15529/1000000 [43:06<28:27:43,  9.61it/s]

{'loss': Array(0.2434841, dtype=float32), 'loss_cross_entropy': Array(0.22014217, dtype=float32)}


  2%|▏         | 15539/1000000 [43:08<35:01:08,  7.81it/s]

{'loss': Array(0.2594093, dtype=float32), 'loss_cross_entropy': Array(0.234266, dtype=float32)}


  2%|▏         | 15549/1000000 [43:09<26:51:34, 10.18it/s]

{'loss': Array(0.23721908, dtype=float32), 'loss_cross_entropy': Array(0.21273048, dtype=float32)}


  2%|▏         | 15559/1000000 [43:11<41:10:44,  6.64it/s]

{'loss': Array(0.25327802, dtype=float32), 'loss_cross_entropy': Array(0.22941649, dtype=float32)}


  2%|▏         | 15569/1000000 [43:12<27:50:54,  9.82it/s]

{'loss': Array(0.27168518, dtype=float32), 'loss_cross_entropy': Array(0.24646404, dtype=float32)}


  2%|▏         | 15579/1000000 [43:13<26:08:07, 10.46it/s]

{'loss': Array(0.24608512, dtype=float32), 'loss_cross_entropy': Array(0.22279203, dtype=float32)}


  2%|▏         | 15589/1000000 [43:15<28:50:42,  9.48it/s]

{'loss': Array(0.25748882, dtype=float32), 'loss_cross_entropy': Array(0.2333477, dtype=float32)}


  2%|▏         | 15599/1000000 [43:16<25:35:01, 10.69it/s]

{'loss': Array(0.24766226, dtype=float32), 'loss_cross_entropy': Array(0.22407351, dtype=float32)}


  2%|▏         | 15609/1000000 [43:18<40:52:22,  6.69it/s]

{'loss': Array(0.2752183, dtype=float32), 'loss_cross_entropy': Array(0.24900933, dtype=float32)}


  2%|▏         | 15619/1000000 [43:20<27:47:55,  9.84it/s]

{'loss': Array(0.2395511, dtype=float32), 'loss_cross_entropy': Array(0.21507645, dtype=float32)}


  2%|▏         | 15629/1000000 [43:21<25:34:04, 10.69it/s]

{'loss': Array(0.24961033, dtype=float32), 'loss_cross_entropy': Array(0.22483258, dtype=float32)}


  2%|▏         | 15639/1000000 [43:23<32:18:12,  8.46it/s]

{'loss': Array(0.224309, dtype=float32), 'loss_cross_entropy': Array(0.20100491, dtype=float32)}


  2%|▏         | 15649/1000000 [43:24<26:08:50, 10.46it/s]

{'loss': Array(0.23547518, dtype=float32), 'loss_cross_entropy': Array(0.21256474, dtype=float32)}


  2%|▏         | 15659/1000000 [43:26<35:17:28,  7.75it/s]

{'loss': Array(0.2551426, dtype=float32), 'loss_cross_entropy': Array(0.23086527, dtype=float32)}


  2%|▏         | 15669/1000000 [43:27<27:09:24, 10.07it/s]

{'loss': Array(0.26749358, dtype=float32), 'loss_cross_entropy': Array(0.24279366, dtype=float32)}


  2%|▏         | 15679/1000000 [43:29<40:33:13,  6.74it/s]

{'loss': Array(0.25950253, dtype=float32), 'loss_cross_entropy': Array(0.23628092, dtype=float32)}


  2%|▏         | 15689/1000000 [43:30<28:02:28,  9.75it/s]

{'loss': Array(0.23954852, dtype=float32), 'loss_cross_entropy': Array(0.21538155, dtype=float32)}


  2%|▏         | 15699/1000000 [43:31<26:10:22, 10.45it/s]

{'loss': Array(0.2571016, dtype=float32), 'loss_cross_entropy': Array(0.23313919, dtype=float32)}


  2%|▏         | 15709/1000000 [43:33<33:25:40,  8.18it/s]

{'loss': Array(0.27749297, dtype=float32), 'loss_cross_entropy': Array(0.25035864, dtype=float32)}


  2%|▏         | 15719/1000000 [43:35<26:25:57, 10.34it/s]

{'loss': Array(0.22950792, dtype=float32), 'loss_cross_entropy': Array(0.20629595, dtype=float32)}


  2%|▏         | 15729/1000000 [43:36<35:19:34,  7.74it/s]

{'loss': Array(0.24040131, dtype=float32), 'loss_cross_entropy': Array(0.21773088, dtype=float32)}


  2%|▏         | 15739/1000000 [43:38<29:01:10,  9.42it/s]

{'loss': Array(0.2703354, dtype=float32), 'loss_cross_entropy': Array(0.2453218, dtype=float32)}


  2%|▏         | 15749/1000000 [43:39<26:06:33, 10.47it/s]

{'loss': Array(0.29181868, dtype=float32), 'loss_cross_entropy': Array(0.26535457, dtype=float32)}


  2%|▏         | 15759/1000000 [43:41<29:02:44,  9.41it/s]

{'loss': Array(0.25224778, dtype=float32), 'loss_cross_entropy': Array(0.22843592, dtype=float32)}


  2%|▏         | 15769/1000000 [43:42<25:34:41, 10.69it/s]

{'loss': Array(0.2624294, dtype=float32), 'loss_cross_entropy': Array(0.23907109, dtype=float32)}


  2%|▏         | 15779/1000000 [43:44<40:42:21,  6.72it/s]

{'loss': Array(0.25568822, dtype=float32), 'loss_cross_entropy': Array(0.23246792, dtype=float32)}


  2%|▏         | 15789/1000000 [43:45<27:46:22,  9.84it/s]

{'loss': Array(0.22368887, dtype=float32), 'loss_cross_entropy': Array(0.20135902, dtype=float32)}


  2%|▏         | 15799/1000000 [43:47<25:40:15, 10.65it/s]

{'loss': Array(0.3169407, dtype=float32), 'loss_cross_entropy': Array(0.289114, dtype=float32)}


  2%|▏         | 15809/1000000 [43:48<33:00:18,  8.28it/s]

{'loss': Array(0.2732442, dtype=float32), 'loss_cross_entropy': Array(0.24819548, dtype=float32)}


  2%|▏         | 15819/1000000 [43:50<26:18:04, 10.39it/s]

{'loss': Array(0.24055941, dtype=float32), 'loss_cross_entropy': Array(0.2171357, dtype=float32)}


  2%|▏         | 15829/1000000 [43:51<35:22:27,  7.73it/s]

{'loss': Array(0.2776523, dtype=float32), 'loss_cross_entropy': Array(0.25282118, dtype=float32)}


  2%|▏         | 15839/1000000 [43:53<28:28:11,  9.60it/s]

{'loss': Array(0.26176128, dtype=float32), 'loss_cross_entropy': Array(0.23749743, dtype=float32)}


  2%|▏         | 15849/1000000 [43:54<36:00:27,  7.59it/s]

{'loss': Array(0.2316027, dtype=float32), 'loss_cross_entropy': Array(0.20843866, dtype=float32)}


  2%|▏         | 15859/1000000 [43:56<26:41:23, 10.24it/s]

{'loss': Array(0.27942613, dtype=float32), 'loss_cross_entropy': Array(0.2531094, dtype=float32)}


  2%|▏         | 15869/1000000 [43:57<26:03:33, 10.49it/s]

{'loss': Array(0.25815496, dtype=float32), 'loss_cross_entropy': Array(0.23505044, dtype=float32)}


  2%|▏         | 15879/1000000 [43:59<31:21:44,  8.72it/s]

{'loss': Array(0.25346676, dtype=float32), 'loss_cross_entropy': Array(0.22918163, dtype=float32)}


  2%|▏         | 15889/1000000 [44:00<26:59:17, 10.13it/s]

{'loss': Array(0.25865874, dtype=float32), 'loss_cross_entropy': Array(0.23258848, dtype=float32)}


  2%|▏         | 15899/1000000 [44:02<32:53:37,  8.31it/s]

{'loss': Array(0.27397785, dtype=float32), 'loss_cross_entropy': Array(0.24802272, dtype=float32)}


  2%|▏         | 15909/1000000 [44:03<27:09:56, 10.06it/s]

{'loss': Array(0.25858882, dtype=float32), 'loss_cross_entropy': Array(0.23443492, dtype=float32)}


  2%|▏         | 15919/1000000 [44:05<39:58:17,  6.84it/s]

{'loss': Array(0.23981197, dtype=float32), 'loss_cross_entropy': Array(0.21637134, dtype=float32)}


  2%|▏         | 15929/1000000 [44:06<27:57:11,  9.78it/s]

{'loss': Array(0.25907853, dtype=float32), 'loss_cross_entropy': Array(0.23478127, dtype=float32)}


  2%|▏         | 15939/1000000 [44:08<41:21:03,  6.61it/s]

{'loss': Array(0.24442232, dtype=float32), 'loss_cross_entropy': Array(0.22101425, dtype=float32)}


  2%|▏         | 15949/1000000 [44:10<27:36:11,  9.90it/s]

{'loss': Array(0.26035148, dtype=float32), 'loss_cross_entropy': Array(0.2362788, dtype=float32)}


  2%|▏         | 15959/1000000 [44:11<25:32:05, 10.70it/s]

{'loss': Array(0.23998742, dtype=float32), 'loss_cross_entropy': Array(0.2176944, dtype=float32)}


  2%|▏         | 15969/1000000 [44:13<38:27:10,  7.11it/s]

{'loss': Array(0.26470816, dtype=float32), 'loss_cross_entropy': Array(0.23781438, dtype=float32)}


  2%|▏         | 15979/1000000 [44:14<27:06:42, 10.08it/s]

{'loss': Array(0.26154265, dtype=float32), 'loss_cross_entropy': Array(0.23795275, dtype=float32)}


  2%|▏         | 15989/1000000 [44:16<40:36:41,  6.73it/s]

{'loss': Array(0.28328502, dtype=float32), 'loss_cross_entropy': Array(0.25611147, dtype=float32)}


  2%|▏         | 15999/1000000 [44:17<27:50:10,  9.82it/s]

{'loss': Array(0.2529101, dtype=float32), 'loss_cross_entropy': Array(0.22896038, dtype=float32)}


  2%|▏         | 16009/1000000 [44:25<91:00:49,  3.00it/s] 

{'loss': Array(0.24063371, dtype=float32), 'loss_cross_entropy': Array(0.21864967, dtype=float32)}


  2%|▏         | 16019/1000000 [44:27<39:10:04,  6.98it/s] 

{'loss': Array(0.23681235, dtype=float32), 'loss_cross_entropy': Array(0.21374261, dtype=float32)}


  2%|▏         | 16029/1000000 [44:28<27:49:46,  9.82it/s]

{'loss': Array(0.2182102, dtype=float32), 'loss_cross_entropy': Array(0.19663154, dtype=float32)}


  2%|▏         | 16039/1000000 [44:30<28:55:35,  9.45it/s]

{'loss': Array(0.24262372, dtype=float32), 'loss_cross_entropy': Array(0.21993862, dtype=float32)}


  2%|▏         | 16049/1000000 [44:31<26:00:45, 10.51it/s]

{'loss': Array(0.23863927, dtype=float32), 'loss_cross_entropy': Array(0.21559195, dtype=float32)}


  2%|▏         | 16059/1000000 [44:33<41:40:51,  6.56it/s]

{'loss': Array(0.26998046, dtype=float32), 'loss_cross_entropy': Array(0.24411742, dtype=float32)}


  2%|▏         | 16069/1000000 [44:34<27:45:52,  9.84it/s]

{'loss': Array(0.24704733, dtype=float32), 'loss_cross_entropy': Array(0.22383273, dtype=float32)}


  2%|▏         | 16079/1000000 [44:36<25:26:40, 10.74it/s]

{'loss': Array(0.26475695, dtype=float32), 'loss_cross_entropy': Array(0.2417159, dtype=float32)}


  2%|▏         | 16089/1000000 [44:37<32:15:33,  8.47it/s]

{'loss': Array(0.25515586, dtype=float32), 'loss_cross_entropy': Array(0.23079441, dtype=float32)}


  2%|▏         | 16099/1000000 [44:39<27:28:22,  9.95it/s]

{'loss': Array(0.24410029, dtype=float32), 'loss_cross_entropy': Array(0.22037528, dtype=float32)}


  2%|▏         | 16109/1000000 [44:40<40:12:26,  6.80it/s]

{'loss': Array(0.2496969, dtype=float32), 'loss_cross_entropy': Array(0.22589047, dtype=float32)}


  2%|▏         | 16119/1000000 [44:42<28:03:22,  9.74it/s]

{'loss': Array(0.25304565, dtype=float32), 'loss_cross_entropy': Array(0.22949338, dtype=float32)}


  2%|▏         | 16129/1000000 [44:43<25:51:37, 10.57it/s]

{'loss': Array(0.24543718, dtype=float32), 'loss_cross_entropy': Array(0.22064014, dtype=float32)}


  2%|▏         | 16139/1000000 [44:45<30:13:10,  9.04it/s]

{'loss': Array(0.25753576, dtype=float32), 'loss_cross_entropy': Array(0.23400469, dtype=float32)}


  2%|▏         | 16149/1000000 [44:46<26:04:52, 10.48it/s]

{'loss': Array(0.26784533, dtype=float32), 'loss_cross_entropy': Array(0.24294455, dtype=float32)}


  2%|▏         | 16159/1000000 [44:48<36:28:19,  7.49it/s]

{'loss': Array(0.24488024, dtype=float32), 'loss_cross_entropy': Array(0.2210217, dtype=float32)}


  2%|▏         | 16169/1000000 [44:49<26:45:32, 10.21it/s]

{'loss': Array(0.23515733, dtype=float32), 'loss_cross_entropy': Array(0.21189658, dtype=float32)}


  2%|▏         | 16179/1000000 [44:51<26:01:40, 10.50it/s]

{'loss': Array(0.2715405, dtype=float32), 'loss_cross_entropy': Array(0.2466745, dtype=float32)}


  2%|▏         | 16189/1000000 [44:52<32:37:48,  8.38it/s]

{'loss': Array(0.25752237, dtype=float32), 'loss_cross_entropy': Array(0.2329771, dtype=float32)}


  2%|▏         | 16199/1000000 [44:54<26:56:28, 10.14it/s]

{'loss': Array(0.248515, dtype=float32), 'loss_cross_entropy': Array(0.22360277, dtype=float32)}


  2%|▏         | 16209/1000000 [44:55<25:34:27, 10.69it/s]

{'loss': Array(0.25663337, dtype=float32), 'loss_cross_entropy': Array(0.23237002, dtype=float32)}


  2%|▏         | 16219/1000000 [44:57<28:30:41,  9.58it/s]

{'loss': Array(0.24139228, dtype=float32), 'loss_cross_entropy': Array(0.21828707, dtype=float32)}


  2%|▏         | 16229/1000000 [44:58<26:31:51, 10.30it/s]

{'loss': Array(0.25736687, dtype=float32), 'loss_cross_entropy': Array(0.23319021, dtype=float32)}


  2%|▏         | 16239/1000000 [45:00<32:12:00,  8.49it/s]

{'loss': Array(0.24730085, dtype=float32), 'loss_cross_entropy': Array(0.22262578, dtype=float32)}


  2%|▏         | 16249/1000000 [45:01<26:30:16, 10.31it/s]

{'loss': Array(0.24886051, dtype=float32), 'loss_cross_entropy': Array(0.22485386, dtype=float32)}


  2%|▏         | 16259/1000000 [45:03<41:30:49,  6.58it/s]

{'loss': Array(0.26289022, dtype=float32), 'loss_cross_entropy': Array(0.2384877, dtype=float32)}


  2%|▏         | 16269/1000000 [45:04<28:29:28,  9.59it/s]

{'loss': Array(0.24080916, dtype=float32), 'loss_cross_entropy': Array(0.21826361, dtype=float32)}


  2%|▏         | 16279/1000000 [45:06<40:28:58,  6.75it/s]

{'loss': Array(0.22712378, dtype=float32), 'loss_cross_entropy': Array(0.20472279, dtype=float32)}


  2%|▏         | 16289/1000000 [45:07<28:03:35,  9.74it/s]

{'loss': Array(0.24978097, dtype=float32), 'loss_cross_entropy': Array(0.2261684, dtype=float32)}


  2%|▏         | 16299/1000000 [45:09<26:27:51, 10.33it/s]

{'loss': Array(0.24997512, dtype=float32), 'loss_cross_entropy': Array(0.22628243, dtype=float32)}


  2%|▏         | 16309/1000000 [45:11<35:55:04,  7.61it/s]

{'loss': Array(0.24907076, dtype=float32), 'loss_cross_entropy': Array(0.22562964, dtype=float32)}


  2%|▏         | 16319/1000000 [45:12<27:05:51, 10.08it/s]

{'loss': Array(0.25980043, dtype=float32), 'loss_cross_entropy': Array(0.23564927, dtype=float32)}


  2%|▏         | 16329/1000000 [45:14<40:39:34,  6.72it/s]

{'loss': Array(0.26365685, dtype=float32), 'loss_cross_entropy': Array(0.23886454, dtype=float32)}


  2%|▏         | 16339/1000000 [45:15<27:54:56,  9.79it/s]

{'loss': Array(0.2565454, dtype=float32), 'loss_cross_entropy': Array(0.23271452, dtype=float32)}


  2%|▏         | 16349/1000000 [45:16<25:53:57, 10.55it/s]

{'loss': Array(0.2585736, dtype=float32), 'loss_cross_entropy': Array(0.23272827, dtype=float32)}


  2%|▏         | 16359/1000000 [45:18<29:15:27,  9.34it/s]

{'loss': Array(0.2645416, dtype=float32), 'loss_cross_entropy': Array(0.24017969, dtype=float32)}


  2%|▏         | 16369/1000000 [45:20<25:42:57, 10.62it/s]

{'loss': Array(0.24304943, dtype=float32), 'loss_cross_entropy': Array(0.21768546, dtype=float32)}


  2%|▏         | 16379/1000000 [45:21<28:36:49,  9.55it/s]

{'loss': Array(0.23166087, dtype=float32), 'loss_cross_entropy': Array(0.20935385, dtype=float32)}


  2%|▏         | 16389/1000000 [45:23<27:12:00, 10.04it/s]

{'loss': Array(0.24838357, dtype=float32), 'loss_cross_entropy': Array(0.22377133, dtype=float32)}


  2%|▏         | 16399/1000000 [45:24<39:55:02,  6.84it/s]

{'loss': Array(0.24335122, dtype=float32), 'loss_cross_entropy': Array(0.2189474, dtype=float32)}


  2%|▏         | 16409/1000000 [45:26<27:55:01,  9.79it/s]

{'loss': Array(0.2594661, dtype=float32), 'loss_cross_entropy': Array(0.23478206, dtype=float32)}


  2%|▏         | 16419/1000000 [45:27<25:43:18, 10.62it/s]

{'loss': Array(0.25816867, dtype=float32), 'loss_cross_entropy': Array(0.23381273, dtype=float32)}


  2%|▏         | 16429/1000000 [45:29<32:24:00,  8.43it/s]

{'loss': Array(0.27051717, dtype=float32), 'loss_cross_entropy': Array(0.24433993, dtype=float32)}


  2%|▏         | 16439/1000000 [45:30<26:38:07, 10.26it/s]

{'loss': Array(0.26204476, dtype=float32), 'loss_cross_entropy': Array(0.23745123, dtype=float32)}


  2%|▏         | 16449/1000000 [45:32<40:34:06,  6.73it/s]

{'loss': Array(0.22871065, dtype=float32), 'loss_cross_entropy': Array(0.20561282, dtype=float32)}


  2%|▏         | 16459/1000000 [45:33<28:00:59,  9.75it/s]

{'loss': Array(0.25333425, dtype=float32), 'loss_cross_entropy': Array(0.22877835, dtype=float32)}


  2%|▏         | 16469/1000000 [45:35<25:44:29, 10.61it/s]

{'loss': Array(0.2679498, dtype=float32), 'loss_cross_entropy': Array(0.24369292, dtype=float32)}


  2%|▏         | 16479/1000000 [45:36<30:15:14,  9.03it/s]

{'loss': Array(0.2456357, dtype=float32), 'loss_cross_entropy': Array(0.22292726, dtype=float32)}


  2%|▏         | 16489/1000000 [45:38<28:13:29,  9.68it/s]

{'loss': Array(0.24857731, dtype=float32), 'loss_cross_entropy': Array(0.22594689, dtype=float32)}


  2%|▏         | 16499/1000000 [45:39<36:40:58,  7.45it/s]

{'loss': Array(0.2421054, dtype=float32), 'loss_cross_entropy': Array(0.2188671, dtype=float32)}


  2%|▏         | 16509/1000000 [45:47<91:09:43,  3.00it/s] 

{'loss': Array(0.25928396, dtype=float32), 'loss_cross_entropy': Array(0.23562539, dtype=float32)}


  2%|▏         | 16519/1000000 [45:49<46:42:41,  5.85it/s] 

{'loss': Array(0.27022752, dtype=float32), 'loss_cross_entropy': Array(0.24575277, dtype=float32)}


  2%|▏         | 16529/1000000 [45:50<29:14:15,  9.34it/s]

{'loss': Array(0.27230382, dtype=float32), 'loss_cross_entropy': Array(0.24713145, dtype=float32)}


  2%|▏         | 16539/1000000 [45:52<26:00:31, 10.50it/s]

{'loss': Array(0.26890832, dtype=float32), 'loss_cross_entropy': Array(0.24406691, dtype=float32)}


  2%|▏         | 16549/1000000 [45:54<32:57:19,  8.29it/s]

{'loss': Array(0.24254239, dtype=float32), 'loss_cross_entropy': Array(0.21881536, dtype=float32)}


  2%|▏         | 16559/1000000 [45:55<26:33:47, 10.28it/s]

{'loss': Array(0.26669928, dtype=float32), 'loss_cross_entropy': Array(0.24209543, dtype=float32)}


  2%|▏         | 16569/1000000 [45:56<25:47:08, 10.59it/s]

{'loss': Array(0.25218096, dtype=float32), 'loss_cross_entropy': Array(0.22831702, dtype=float32)}


  2%|▏         | 16579/1000000 [45:58<29:33:30,  9.24it/s]

{'loss': Array(0.26411805, dtype=float32), 'loss_cross_entropy': Array(0.23881848, dtype=float32)}


  2%|▏         | 16589/1000000 [45:59<27:07:49, 10.07it/s]

{'loss': Array(0.2320179, dtype=float32), 'loss_cross_entropy': Array(0.20987847, dtype=float32)}


  2%|▏         | 16599/1000000 [46:01<33:09:08,  8.24it/s]

{'loss': Array(0.2584856, dtype=float32), 'loss_cross_entropy': Array(0.23520692, dtype=float32)}


  2%|▏         | 16609/1000000 [46:03<29:24:11,  9.29it/s]

{'loss': Array(0.26445386, dtype=float32), 'loss_cross_entropy': Array(0.24043253, dtype=float32)}


  2%|▏         | 16619/1000000 [46:04<36:03:41,  7.57it/s]

{'loss': Array(0.24297638, dtype=float32), 'loss_cross_entropy': Array(0.21937907, dtype=float32)}


  2%|▏         | 16629/1000000 [46:06<26:38:19, 10.25it/s]

{'loss': Array(0.24438195, dtype=float32), 'loss_cross_entropy': Array(0.22232969, dtype=float32)}


  2%|▏         | 16639/1000000 [46:07<25:33:40, 10.69it/s]

{'loss': Array(0.2521003, dtype=float32), 'loss_cross_entropy': Array(0.22907749, dtype=float32)}


  2%|▏         | 16649/1000000 [46:09<32:45:04,  8.34it/s]

{'loss': Array(0.2593154, dtype=float32), 'loss_cross_entropy': Array(0.23427165, dtype=float32)}


  2%|▏         | 16659/1000000 [46:10<26:20:50, 10.37it/s]

{'loss': Array(0.27492425, dtype=float32), 'loss_cross_entropy': Array(0.24994226, dtype=float32)}


  2%|▏         | 16669/1000000 [46:11<25:22:23, 10.77it/s]

{'loss': Array(0.2552976, dtype=float32), 'loss_cross_entropy': Array(0.22989821, dtype=float32)}


  2%|▏         | 16679/1000000 [46:13<29:02:36,  9.40it/s]

{'loss': Array(0.24167585, dtype=float32), 'loss_cross_entropy': Array(0.2178115, dtype=float32)}


  2%|▏         | 16689/1000000 [46:15<26:08:31, 10.45it/s]

{'loss': Array(0.26678458, dtype=float32), 'loss_cross_entropy': Array(0.2430853, dtype=float32)}


  2%|▏         | 16699/1000000 [46:16<33:09:09,  8.24it/s]

{'loss': Array(0.2482165, dtype=float32), 'loss_cross_entropy': Array(0.22431386, dtype=float32)}


  2%|▏         | 16709/1000000 [46:18<28:38:32,  9.54it/s]

{'loss': Array(0.255236, dtype=float32), 'loss_cross_entropy': Array(0.2320408, dtype=float32)}


  2%|▏         | 16719/1000000 [46:19<41:00:16,  6.66it/s]

{'loss': Array(0.23465274, dtype=float32), 'loss_cross_entropy': Array(0.21114011, dtype=float32)}


  2%|▏         | 16729/1000000 [46:21<27:38:58,  9.88it/s]

{'loss': Array(0.25724268, dtype=float32), 'loss_cross_entropy': Array(0.23154901, dtype=float32)}


  2%|▏         | 16739/1000000 [46:22<25:19:30, 10.78it/s]

{'loss': Array(0.24707048, dtype=float32), 'loss_cross_entropy': Array(0.2232425, dtype=float32)}


  2%|▏         | 16749/1000000 [46:24<36:01:23,  7.58it/s]

{'loss': Array(0.24708657, dtype=float32), 'loss_cross_entropy': Array(0.22448508, dtype=float32)}


  2%|▏         | 16759/1000000 [46:25<27:31:16,  9.92it/s]

{'loss': Array(0.236563, dtype=float32), 'loss_cross_entropy': Array(0.2136273, dtype=float32)}


  2%|▏         | 16769/1000000 [46:27<25:58:14, 10.52it/s]

{'loss': Array(0.2562549, dtype=float32), 'loss_cross_entropy': Array(0.23228948, dtype=float32)}


  2%|▏         | 16779/1000000 [46:28<31:04:20,  8.79it/s]

{'loss': Array(0.2596394, dtype=float32), 'loss_cross_entropy': Array(0.23680234, dtype=float32)}


  2%|▏         | 16789/1000000 [46:30<25:54:47, 10.54it/s]

{'loss': Array(0.2596508, dtype=float32), 'loss_cross_entropy': Array(0.23516567, dtype=float32)}


  2%|▏         | 16799/1000000 [46:31<32:52:49,  8.31it/s]

{'loss': Array(0.24168746, dtype=float32), 'loss_cross_entropy': Array(0.21775238, dtype=float32)}


  2%|▏         | 16809/1000000 [46:33<27:16:30, 10.01it/s]

{'loss': Array(0.23733996, dtype=float32), 'loss_cross_entropy': Array(0.21533135, dtype=float32)}


  2%|▏         | 16819/1000000 [46:34<25:36:51, 10.66it/s]

{'loss': Array(0.28632385, dtype=float32), 'loss_cross_entropy': Array(0.26032156, dtype=float32)}


  2%|▏         | 16829/1000000 [46:36<29:12:36,  9.35it/s]

{'loss': Array(0.25606716, dtype=float32), 'loss_cross_entropy': Array(0.23372114, dtype=float32)}


  2%|▏         | 16839/1000000 [46:37<25:42:30, 10.62it/s]

{'loss': Array(0.24925874, dtype=float32), 'loss_cross_entropy': Array(0.22543238, dtype=float32)}


  2%|▏         | 16849/1000000 [46:39<36:12:56,  7.54it/s]

{'loss': Array(0.25092793, dtype=float32), 'loss_cross_entropy': Array(0.2267036, dtype=float32)}


  2%|▏         | 16859/1000000 [46:40<27:10:19, 10.05it/s]

{'loss': Array(0.2525903, dtype=float32), 'loss_cross_entropy': Array(0.22843304, dtype=float32)}


  2%|▏         | 16869/1000000 [46:42<25:22:21, 10.76it/s]

{'loss': Array(0.23380338, dtype=float32), 'loss_cross_entropy': Array(0.21155877, dtype=float32)}


  2%|▏         | 16879/1000000 [46:44<29:14:59,  9.34it/s]

{'loss': Array(0.24635227, dtype=float32), 'loss_cross_entropy': Array(0.22274375, dtype=float32)}


  2%|▏         | 16889/1000000 [46:45<25:55:43, 10.53it/s]

{'loss': Array(0.25356144, dtype=float32), 'loss_cross_entropy': Array(0.22776389, dtype=float32)}


  2%|▏         | 16899/1000000 [46:47<30:32:21,  8.94it/s]

{'loss': Array(0.26060757, dtype=float32), 'loss_cross_entropy': Array(0.2354797, dtype=float32)}


  2%|▏         | 16909/1000000 [46:48<27:21:36,  9.98it/s]

{'loss': Array(0.25204808, dtype=float32), 'loss_cross_entropy': Array(0.22746085, dtype=float32)}


  2%|▏         | 16919/1000000 [46:50<39:56:00,  6.84it/s]

{'loss': Array(0.2818497, dtype=float32), 'loss_cross_entropy': Array(0.25587806, dtype=float32)}


  2%|▏         | 16929/1000000 [46:51<27:34:16,  9.90it/s]

{'loss': Array(0.25667423, dtype=float32), 'loss_cross_entropy': Array(0.23232193, dtype=float32)}


  2%|▏         | 16939/1000000 [46:52<25:48:36, 10.58it/s]

{'loss': Array(0.23456655, dtype=float32), 'loss_cross_entropy': Array(0.21273804, dtype=float32)}


  2%|▏         | 16949/1000000 [46:54<32:33:54,  8.39it/s]

{'loss': Array(0.27496564, dtype=float32), 'loss_cross_entropy': Array(0.25016662, dtype=float32)}


  2%|▏         | 16959/1000000 [46:55<26:31:50, 10.29it/s]

{'loss': Array(0.25538278, dtype=float32), 'loss_cross_entropy': Array(0.23149948, dtype=float32)}


  2%|▏         | 16969/1000000 [46:57<36:06:58,  7.56it/s]

{'loss': Array(0.26215252, dtype=float32), 'loss_cross_entropy': Array(0.23741174, dtype=float32)}


  2%|▏         | 16979/1000000 [46:59<28:36:55,  9.54it/s]

{'loss': Array(0.24157448, dtype=float32), 'loss_cross_entropy': Array(0.21949254, dtype=float32)}


  2%|▏         | 16989/1000000 [47:00<36:15:29,  7.53it/s]

{'loss': Array(0.22004853, dtype=float32), 'loss_cross_entropy': Array(0.19800602, dtype=float32)}


  2%|▏         | 16999/1000000 [47:02<27:19:09,  9.99it/s]

{'loss': Array(0.26552412, dtype=float32), 'loss_cross_entropy': Array(0.24137235, dtype=float32)}


  2%|▏         | 17009/1000000 [47:10<90:01:34,  3.03it/s] 

{'loss': Array(0.26735625, dtype=float32), 'loss_cross_entropy': Array(0.2417451, dtype=float32)}


  2%|▏         | 17019/1000000 [47:11<41:11:03,  6.63it/s]

{'loss': Array(0.23388307, dtype=float32), 'loss_cross_entropy': Array(0.21103571, dtype=float32)}


  2%|▏         | 17029/1000000 [47:13<29:10:04,  9.36it/s]

{'loss': Array(0.25408757, dtype=float32), 'loss_cross_entropy': Array(0.23091355, dtype=float32)}


  2%|▏         | 17039/1000000 [47:15<36:26:31,  7.49it/s]

{'loss': Array(0.24437606, dtype=float32), 'loss_cross_entropy': Array(0.22168803, dtype=float32)}


  2%|▏         | 17049/1000000 [47:16<27:17:34, 10.00it/s]

{'loss': Array(0.25967205, dtype=float32), 'loss_cross_entropy': Array(0.23566313, dtype=float32)}


  2%|▏         | 17059/1000000 [47:18<41:39:54,  6.55it/s]

{'loss': Array(0.27496487, dtype=float32), 'loss_cross_entropy': Array(0.24951498, dtype=float32)}


  2%|▏         | 17069/1000000 [47:19<27:53:07,  9.79it/s]

{'loss': Array(0.2506209, dtype=float32), 'loss_cross_entropy': Array(0.22665842, dtype=float32)}


  2%|▏         | 17079/1000000 [47:20<25:57:52, 10.52it/s]

{'loss': Array(0.25860366, dtype=float32), 'loss_cross_entropy': Array(0.23425595, dtype=float32)}


  2%|▏         | 17089/1000000 [47:22<32:02:37,  8.52it/s]

{'loss': Array(0.24292679, dtype=float32), 'loss_cross_entropy': Array(0.21976541, dtype=float32)}


  2%|▏         | 17099/1000000 [47:23<26:50:36, 10.17it/s]

{'loss': Array(0.24074662, dtype=float32), 'loss_cross_entropy': Array(0.21777605, dtype=float32)}


  2%|▏         | 17109/1000000 [47:25<35:40:29,  7.65it/s]

{'loss': Array(0.2544035, dtype=float32), 'loss_cross_entropy': Array(0.22924638, dtype=float32)}


  2%|▏         | 17119/1000000 [47:27<26:58:52, 10.12it/s]

{'loss': Array(0.24310184, dtype=float32), 'loss_cross_entropy': Array(0.22014482, dtype=float32)}


  2%|▏         | 17129/1000000 [47:28<26:12:00, 10.42it/s]

{'loss': Array(0.24604984, dtype=float32), 'loss_cross_entropy': Array(0.22192056, dtype=float32)}


  2%|▏         | 17139/1000000 [47:30<28:58:14,  9.42it/s]

{'loss': Array(0.25244698, dtype=float32), 'loss_cross_entropy': Array(0.22818033, dtype=float32)}


  2%|▏         | 17149/1000000 [47:31<26:10:57, 10.43it/s]

{'loss': Array(0.21630517, dtype=float32), 'loss_cross_entropy': Array(0.19558862, dtype=float32)}


  2%|▏         | 17159/1000000 [47:33<41:25:21,  6.59it/s]

{'loss': Array(0.2308809, dtype=float32), 'loss_cross_entropy': Array(0.2090681, dtype=float32)}


  2%|▏         | 17169/1000000 [47:34<27:47:22,  9.82it/s]

{'loss': Array(0.23355936, dtype=float32), 'loss_cross_entropy': Array(0.21138044, dtype=float32)}


  2%|▏         | 17179/1000000 [47:35<25:32:44, 10.69it/s]

{'loss': Array(0.26832664, dtype=float32), 'loss_cross_entropy': Array(0.24288927, dtype=float32)}


  2%|▏         | 17189/1000000 [47:37<32:24:17,  8.42it/s]

{'loss': Array(0.25442207, dtype=float32), 'loss_cross_entropy': Array(0.23074703, dtype=float32)}


  2%|▏         | 17199/1000000 [47:39<26:59:17, 10.12it/s]

{'loss': Array(0.24912064, dtype=float32), 'loss_cross_entropy': Array(0.22611813, dtype=float32)}


  2%|▏         | 17209/1000000 [47:40<35:40:20,  7.65it/s]

{'loss': Array(0.25812653, dtype=float32), 'loss_cross_entropy': Array(0.2338949, dtype=float32)}


  2%|▏         | 17219/1000000 [47:42<26:44:20, 10.21it/s]

{'loss': Array(0.23596548, dtype=float32), 'loss_cross_entropy': Array(0.21387024, dtype=float32)}


  2%|▏         | 17229/1000000 [47:43<36:43:38,  7.43it/s]

{'loss': Array(0.23953557, dtype=float32), 'loss_cross_entropy': Array(0.21671174, dtype=float32)}


  2%|▏         | 17239/1000000 [47:45<27:06:41, 10.07it/s]

{'loss': Array(0.23327677, dtype=float32), 'loss_cross_entropy': Array(0.20985399, dtype=float32)}


  2%|▏         | 17249/1000000 [47:46<25:12:52, 10.83it/s]

{'loss': Array(0.23453479, dtype=float32), 'loss_cross_entropy': Array(0.21126579, dtype=float32)}


  2%|▏         | 17259/1000000 [47:48<31:35:14,  8.64it/s]

{'loss': Array(0.2506776, dtype=float32), 'loss_cross_entropy': Array(0.22753873, dtype=float32)}


  2%|▏         | 17269/1000000 [47:49<26:40:38, 10.23it/s]

{'loss': Array(0.23820646, dtype=float32), 'loss_cross_entropy': Array(0.21434914, dtype=float32)}


  2%|▏         | 17279/1000000 [47:51<33:33:46,  8.13it/s]

{'loss': Array(0.25691137, dtype=float32), 'loss_cross_entropy': Array(0.23217283, dtype=float32)}


  2%|▏         | 17289/1000000 [47:52<26:34:50, 10.27it/s]

{'loss': Array(0.24306667, dtype=float32), 'loss_cross_entropy': Array(0.22081666, dtype=float32)}


  2%|▏         | 17299/1000000 [47:54<40:27:43,  6.75it/s]

{'loss': Array(0.22490382, dtype=float32), 'loss_cross_entropy': Array(0.2022151, dtype=float32)}


  2%|▏         | 17309/1000000 [47:55<27:44:48,  9.84it/s]

{'loss': Array(0.25506467, dtype=float32), 'loss_cross_entropy': Array(0.230412, dtype=float32)}


  2%|▏         | 17319/1000000 [47:57<40:07:33,  6.80it/s]

{'loss': Array(0.24305539, dtype=float32), 'loss_cross_entropy': Array(0.21878512, dtype=float32)}


  2%|▏         | 17329/1000000 [47:59<28:34:11,  9.55it/s]

{'loss': Array(0.22158976, dtype=float32), 'loss_cross_entropy': Array(0.20103334, dtype=float32)}


  2%|▏         | 17339/1000000 [48:00<26:50:38, 10.17it/s]

{'loss': Array(0.25785363, dtype=float32), 'loss_cross_entropy': Array(0.23341246, dtype=float32)}


  2%|▏         | 17349/1000000 [48:02<35:30:17,  7.69it/s]

{'loss': Array(0.2579324, dtype=float32), 'loss_cross_entropy': Array(0.23379794, dtype=float32)}


  2%|▏         | 17359/1000000 [48:03<27:28:29,  9.93it/s]

{'loss': Array(0.2588048, dtype=float32), 'loss_cross_entropy': Array(0.2351207, dtype=float32)}


  2%|▏         | 17369/1000000 [48:05<40:01:56,  6.82it/s]

{'loss': Array(0.27576455, dtype=float32), 'loss_cross_entropy': Array(0.25000688, dtype=float32)}


  2%|▏         | 17379/1000000 [48:06<27:39:52,  9.87it/s]

{'loss': Array(0.25528425, dtype=float32), 'loss_cross_entropy': Array(0.22957282, dtype=float32)}


  2%|▏         | 17389/1000000 [48:08<26:52:53, 10.15it/s]

{'loss': Array(0.27641073, dtype=float32), 'loss_cross_entropy': Array(0.25025207, dtype=float32)}


  2%|▏         | 17399/1000000 [48:09<29:35:08,  9.23it/s]

{'loss': Array(0.22356056, dtype=float32), 'loss_cross_entropy': Array(0.20118587, dtype=float32)}


  2%|▏         | 17409/1000000 [48:11<26:00:55, 10.49it/s]

{'loss': Array(0.25300166, dtype=float32), 'loss_cross_entropy': Array(0.22882144, dtype=float32)}


  2%|▏         | 17419/1000000 [48:12<28:24:24,  9.61it/s]

{'loss': Array(0.2503162, dtype=float32), 'loss_cross_entropy': Array(0.22740848, dtype=float32)}


  2%|▏         | 17429/1000000 [48:14<26:11:44, 10.42it/s]

{'loss': Array(0.2263023, dtype=float32), 'loss_cross_entropy': Array(0.20523691, dtype=float32)}


  2%|▏         | 17439/1000000 [48:15<40:20:23,  6.77it/s]

{'loss': Array(0.23647916, dtype=float32), 'loss_cross_entropy': Array(0.21453412, dtype=float32)}


  2%|▏         | 17449/1000000 [48:17<27:33:59,  9.90it/s]

{'loss': Array(0.26681176, dtype=float32), 'loss_cross_entropy': Array(0.24214037, dtype=float32)}


  2%|▏         | 17459/1000000 [48:18<25:37:22, 10.65it/s]

{'loss': Array(0.25264773, dtype=float32), 'loss_cross_entropy': Array(0.22884586, dtype=float32)}


  2%|▏         | 17469/1000000 [48:20<32:35:46,  8.37it/s]

{'loss': Array(0.25111365, dtype=float32), 'loss_cross_entropy': Array(0.22793534, dtype=float32)}


  2%|▏         | 17479/1000000 [48:21<26:12:47, 10.41it/s]

{'loss': Array(0.24013577, dtype=float32), 'loss_cross_entropy': Array(0.21699266, dtype=float32)}


  2%|▏         | 17489/1000000 [48:23<40:57:23,  6.66it/s]

{'loss': Array(0.25092837, dtype=float32), 'loss_cross_entropy': Array(0.22719316, dtype=float32)}


  2%|▏         | 17499/1000000 [48:24<27:34:49,  9.90it/s]

{'loss': Array(0.25839776, dtype=float32), 'loss_cross_entropy': Array(0.2346855, dtype=float32)}


  2%|▏         | 17509/1000000 [48:32<89:01:26,  3.07it/s] 

{'loss': Array(0.26104015, dtype=float32), 'loss_cross_entropy': Array(0.23732269, dtype=float32)}


  2%|▏         | 17519/1000000 [48:34<41:11:48,  6.62it/s]

{'loss': Array(0.25039488, dtype=float32), 'loss_cross_entropy': Array(0.22595151, dtype=float32)}


  2%|▏         | 17529/1000000 [48:35<27:22:06,  9.97it/s]

{'loss': Array(0.2283097, dtype=float32), 'loss_cross_entropy': Array(0.20579782, dtype=float32)}


  2%|▏         | 17539/1000000 [48:37<32:32:47,  8.39it/s]

{'loss': Array(0.23266493, dtype=float32), 'loss_cross_entropy': Array(0.20905235, dtype=float32)}


  2%|▏         | 17549/1000000 [48:38<26:51:06, 10.16it/s]

{'loss': Array(0.26333508, dtype=float32), 'loss_cross_entropy': Array(0.23983335, dtype=float32)}


  2%|▏         | 17559/1000000 [48:40<40:15:06,  6.78it/s]

{'loss': Array(0.24488859, dtype=float32), 'loss_cross_entropy': Array(0.22245875, dtype=float32)}


  2%|▏         | 17569/1000000 [48:41<27:34:46,  9.89it/s]

{'loss': Array(0.25565574, dtype=float32), 'loss_cross_entropy': Array(0.2323014, dtype=float32)}


  2%|▏         | 17579/1000000 [48:43<27:42:32,  9.85it/s]

{'loss': Array(0.23735599, dtype=float32), 'loss_cross_entropy': Array(0.21352564, dtype=float32)}


  2%|▏         | 17589/1000000 [48:44<29:20:12,  9.30it/s]

{'loss': Array(0.260939, dtype=float32), 'loss_cross_entropy': Array(0.2361063, dtype=float32)}


  2%|▏         | 17599/1000000 [48:46<25:33:59, 10.67it/s]

{'loss': Array(0.23684931, dtype=float32), 'loss_cross_entropy': Array(0.21530218, dtype=float32)}


  2%|▏         | 17609/1000000 [48:47<39:29:04,  6.91it/s]

{'loss': Array(0.26023847, dtype=float32), 'loss_cross_entropy': Array(0.23681961, dtype=float32)}


  2%|▏         | 17619/1000000 [48:49<28:03:26,  9.73it/s]

{'loss': Array(0.2651497, dtype=float32), 'loss_cross_entropy': Array(0.23939224, dtype=float32)}


  2%|▏         | 17629/1000000 [48:50<25:08:49, 10.85it/s]

{'loss': Array(0.2405533, dtype=float32), 'loss_cross_entropy': Array(0.21717492, dtype=float32)}


  2%|▏         | 17639/1000000 [48:52<29:51:28,  9.14it/s]

{'loss': Array(0.23924866, dtype=float32), 'loss_cross_entropy': Array(0.21550588, dtype=float32)}


  2%|▏         | 17649/1000000 [48:53<26:15:31, 10.39it/s]

{'loss': Array(0.24824964, dtype=float32), 'loss_cross_entropy': Array(0.22520737, dtype=float32)}


  2%|▏         | 17659/1000000 [48:55<35:25:37,  7.70it/s]

{'loss': Array(0.24137579, dtype=float32), 'loss_cross_entropy': Array(0.22036727, dtype=float32)}


  2%|▏         | 17669/1000000 [48:56<27:02:35, 10.09it/s]

{'loss': Array(0.25755587, dtype=float32), 'loss_cross_entropy': Array(0.23338011, dtype=float32)}


  2%|▏         | 17679/1000000 [48:58<41:48:18,  6.53it/s]

{'loss': Array(0.261366, dtype=float32), 'loss_cross_entropy': Array(0.23721604, dtype=float32)}


  2%|▏         | 17689/1000000 [48:59<28:20:43,  9.63it/s]

{'loss': Array(0.24366008, dtype=float32), 'loss_cross_entropy': Array(0.22016387, dtype=float32)}


  2%|▏         | 17699/1000000 [49:01<25:51:10, 10.55it/s]

{'loss': Array(0.25224534, dtype=float32), 'loss_cross_entropy': Array(0.22890003, dtype=float32)}


  2%|▏         | 17709/1000000 [49:02<32:35:01,  8.37it/s]

{'loss': Array(0.24822135, dtype=float32), 'loss_cross_entropy': Array(0.22455958, dtype=float32)}


  2%|▏         | 17719/1000000 [49:04<27:42:13,  9.85it/s]

{'loss': Array(0.2392691, dtype=float32), 'loss_cross_entropy': Array(0.21636768, dtype=float32)}


  2%|▏         | 17729/1000000 [49:06<36:19:58,  7.51it/s]

{'loss': Array(0.22364064, dtype=float32), 'loss_cross_entropy': Array(0.20086367, dtype=float32)}


  2%|▏         | 17739/1000000 [49:07<26:45:06, 10.20it/s]

{'loss': Array(0.2653763, dtype=float32), 'loss_cross_entropy': Array(0.24112971, dtype=float32)}


  2%|▏         | 17749/1000000 [49:08<25:51:02, 10.55it/s]

{'loss': Array(0.26578933, dtype=float32), 'loss_cross_entropy': Array(0.24116068, dtype=float32)}


  2%|▏         | 17759/1000000 [49:10<29:28:10,  9.26it/s]

{'loss': Array(0.24925259, dtype=float32), 'loss_cross_entropy': Array(0.22499004, dtype=float32)}


  2%|▏         | 17769/1000000 [49:11<26:29:00, 10.30it/s]

{'loss': Array(0.22764535, dtype=float32), 'loss_cross_entropy': Array(0.20532756, dtype=float32)}


  2%|▏         | 17779/1000000 [49:13<41:16:35,  6.61it/s]

{'loss': Array(0.26700756, dtype=float32), 'loss_cross_entropy': Array(0.2432613, dtype=float32)}


  2%|▏         | 17789/1000000 [49:15<28:00:53,  9.74it/s]

{'loss': Array(0.25533578, dtype=float32), 'loss_cross_entropy': Array(0.23095644, dtype=float32)}


  2%|▏         | 17799/1000000 [49:16<25:45:47, 10.59it/s]

{'loss': Array(0.25406533, dtype=float32), 'loss_cross_entropy': Array(0.23027264, dtype=float32)}


  2%|▏         | 17809/1000000 [49:18<34:14:18,  7.97it/s]

{'loss': Array(0.25778505, dtype=float32), 'loss_cross_entropy': Array(0.23442261, dtype=float32)}


  2%|▏         | 17819/1000000 [49:19<27:01:16, 10.10it/s]

{'loss': Array(0.25832543, dtype=float32), 'loss_cross_entropy': Array(0.23567355, dtype=float32)}


  2%|▏         | 17829/1000000 [49:21<35:41:47,  7.64it/s]

{'loss': Array(0.26388764, dtype=float32), 'loss_cross_entropy': Array(0.24042645, dtype=float32)}


  2%|▏         | 17839/1000000 [49:22<26:46:41, 10.19it/s]

{'loss': Array(0.24177487, dtype=float32), 'loss_cross_entropy': Array(0.21999185, dtype=float32)}


  2%|▏         | 17849/1000000 [49:24<36:10:35,  7.54it/s]

{'loss': Array(0.24418579, dtype=float32), 'loss_cross_entropy': Array(0.22195613, dtype=float32)}


  2%|▏         | 17859/1000000 [49:25<27:32:35,  9.91it/s]

{'loss': Array(0.26943552, dtype=float32), 'loss_cross_entropy': Array(0.24561833, dtype=float32)}


  2%|▏         | 17869/1000000 [49:27<25:54:08, 10.53it/s]

{'loss': Array(0.24681802, dtype=float32), 'loss_cross_entropy': Array(0.22326858, dtype=float32)}


  2%|▏         | 17879/1000000 [49:28<30:51:31,  8.84it/s]

{'loss': Array(0.2693894, dtype=float32), 'loss_cross_entropy': Array(0.24514103, dtype=float32)}


  2%|▏         | 17889/1000000 [49:30<25:56:08, 10.52it/s]

{'loss': Array(0.26387104, dtype=float32), 'loss_cross_entropy': Array(0.24074642, dtype=float32)}


  2%|▏         | 17899/1000000 [49:32<32:57:00,  8.28it/s]

{'loss': Array(0.26629528, dtype=float32), 'loss_cross_entropy': Array(0.24195044, dtype=float32)}


  2%|▏         | 17909/1000000 [49:33<27:33:37,  9.90it/s]

{'loss': Array(0.25722307, dtype=float32), 'loss_cross_entropy': Array(0.23320834, dtype=float32)}


  2%|▏         | 17919/1000000 [49:35<40:18:31,  6.77it/s]

{'loss': Array(0.2611995, dtype=float32), 'loss_cross_entropy': Array(0.23731339, dtype=float32)}


  2%|▏         | 17929/1000000 [49:36<27:54:05,  9.78it/s]

{'loss': Array(0.28299257, dtype=float32), 'loss_cross_entropy': Array(0.25669682, dtype=float32)}


  2%|▏         | 17939/1000000 [49:38<41:54:39,  6.51it/s]

{'loss': Array(0.2538886, dtype=float32), 'loss_cross_entropy': Array(0.22900437, dtype=float32)}


  2%|▏         | 17949/1000000 [49:39<27:51:10,  9.79it/s]

{'loss': Array(0.26138684, dtype=float32), 'loss_cross_entropy': Array(0.23814951, dtype=float32)}


  2%|▏         | 17959/1000000 [49:41<25:43:50, 10.60it/s]

{'loss': Array(0.2776524, dtype=float32), 'loss_cross_entropy': Array(0.25237146, dtype=float32)}


  2%|▏         | 17969/1000000 [49:42<35:16:16,  7.73it/s]

{'loss': Array(0.23791932, dtype=float32), 'loss_cross_entropy': Array(0.21487014, dtype=float32)}


  2%|▏         | 17979/1000000 [49:44<27:19:55,  9.98it/s]

{'loss': Array(0.24156366, dtype=float32), 'loss_cross_entropy': Array(0.21985944, dtype=float32)}


  2%|▏         | 17989/1000000 [49:45<40:02:26,  6.81it/s]

{'loss': Array(0.25296727, dtype=float32), 'loss_cross_entropy': Array(0.22988854, dtype=float32)}


  2%|▏         | 17999/1000000 [49:47<27:47:13,  9.82it/s]

{'loss': Array(0.25563887, dtype=float32), 'loss_cross_entropy': Array(0.22983101, dtype=float32)}


  2%|▏         | 18009/1000000 [49:55<91:18:33,  2.99it/s] 

{'loss': Array(0.25137708, dtype=float32), 'loss_cross_entropy': Array(0.2286217, dtype=float32)}


  2%|▏         | 18019/1000000 [49:57<40:35:26,  6.72it/s] 

{'loss': Array(0.25571498, dtype=float32), 'loss_cross_entropy': Array(0.23284106, dtype=float32)}


  2%|▏         | 18029/1000000 [49:58<28:53:34,  9.44it/s]

{'loss': Array(0.2646134, dtype=float32), 'loss_cross_entropy': Array(0.24039295, dtype=float32)}


  2%|▏         | 18039/1000000 [50:00<29:50:45,  9.14it/s]

{'loss': Array(0.261488, dtype=float32), 'loss_cross_entropy': Array(0.23810013, dtype=float32)}


  2%|▏         | 18049/1000000 [50:01<26:16:09, 10.38it/s]

{'loss': Array(0.22816224, dtype=float32), 'loss_cross_entropy': Array(0.20630884, dtype=float32)}


  2%|▏         | 18059/1000000 [50:03<40:09:54,  6.79it/s]

{'loss': Array(0.26488426, dtype=float32), 'loss_cross_entropy': Array(0.24068196, dtype=float32)}


  2%|▏         | 18069/1000000 [50:04<28:06:56,  9.70it/s]

{'loss': Array(0.27103567, dtype=float32), 'loss_cross_entropy': Array(0.24641322, dtype=float32)}


  2%|▏         | 18079/1000000 [50:06<25:56:48, 10.51it/s]

{'loss': Array(0.22810726, dtype=float32), 'loss_cross_entropy': Array(0.20597525, dtype=float32)}


  2%|▏         | 18089/1000000 [50:07<32:15:24,  8.46it/s]

{'loss': Array(0.25472453, dtype=float32), 'loss_cross_entropy': Array(0.23047443, dtype=float32)}


  2%|▏         | 18099/1000000 [50:09<27:00:40, 10.10it/s]

{'loss': Array(0.26760742, dtype=float32), 'loss_cross_entropy': Array(0.24329345, dtype=float32)}


  2%|▏         | 18109/1000000 [50:10<41:03:34,  6.64it/s]

{'loss': Array(0.25178882, dtype=float32), 'loss_cross_entropy': Array(0.22928981, dtype=float32)}


  2%|▏         | 18119/1000000 [50:12<28:26:53,  9.59it/s]

{'loss': Array(0.25297752, dtype=float32), 'loss_cross_entropy': Array(0.22894695, dtype=float32)}


  2%|▏         | 18129/1000000 [50:13<25:56:56, 10.51it/s]

{'loss': Array(0.23600522, dtype=float32), 'loss_cross_entropy': Array(0.21261083, dtype=float32)}


  2%|▏         | 18139/1000000 [50:15<30:14:13,  9.02it/s]

{'loss': Array(0.23301207, dtype=float32), 'loss_cross_entropy': Array(0.21117716, dtype=float32)}


  2%|▏         | 18149/1000000 [50:16<25:54:11, 10.53it/s]

{'loss': Array(0.24564743, dtype=float32), 'loss_cross_entropy': Array(0.22397009, dtype=float32)}


  2%|▏         | 18159/1000000 [50:18<36:58:25,  7.38it/s]

{'loss': Array(0.23974311, dtype=float32), 'loss_cross_entropy': Array(0.2172871, dtype=float32)}


  2%|▏         | 18169/1000000 [50:19<27:00:48, 10.10it/s]

{'loss': Array(0.25738585, dtype=float32), 'loss_cross_entropy': Array(0.23134802, dtype=float32)}


  2%|▏         | 18179/1000000 [50:21<25:01:58, 10.89it/s]

{'loss': Array(0.25082493, dtype=float32), 'loss_cross_entropy': Array(0.22744286, dtype=float32)}


  2%|▏         | 18189/1000000 [50:22<31:51:12,  8.56it/s]

{'loss': Array(0.25580215, dtype=float32), 'loss_cross_entropy': Array(0.23288596, dtype=float32)}


  2%|▏         | 18199/1000000 [50:24<26:29:10, 10.30it/s]

{'loss': Array(0.26912314, dtype=float32), 'loss_cross_entropy': Array(0.24492078, dtype=float32)}


  2%|▏         | 18209/1000000 [50:25<25:01:35, 10.90it/s]

{'loss': Array(0.25220504, dtype=float32), 'loss_cross_entropy': Array(0.2290622, dtype=float32)}


  2%|▏         | 18219/1000000 [50:27<28:24:22,  9.60it/s]

{'loss': Array(0.24845485, dtype=float32), 'loss_cross_entropy': Array(0.22467318, dtype=float32)}


  2%|▏         | 18229/1000000 [50:28<26:18:50, 10.36it/s]

{'loss': Array(0.23733823, dtype=float32), 'loss_cross_entropy': Array(0.21558201, dtype=float32)}


  2%|▏         | 18239/1000000 [50:30<32:12:20,  8.47it/s]

{'loss': Array(0.25735077, dtype=float32), 'loss_cross_entropy': Array(0.23393162, dtype=float32)}


  2%|▏         | 18249/1000000 [50:31<26:00:21, 10.49it/s]

{'loss': Array(0.26064894, dtype=float32), 'loss_cross_entropy': Array(0.23677538, dtype=float32)}


  2%|▏         | 18259/1000000 [50:33<41:26:24,  6.58it/s]

{'loss': Array(0.25078818, dtype=float32), 'loss_cross_entropy': Array(0.2272123, dtype=float32)}


  2%|▏         | 18269/1000000 [50:34<27:35:52,  9.88it/s]

{'loss': Array(0.2748883, dtype=float32), 'loss_cross_entropy': Array(0.25050437, dtype=float32)}


  2%|▏         | 18279/1000000 [50:36<40:03:55,  6.81it/s]

{'loss': Array(0.24447499, dtype=float32), 'loss_cross_entropy': Array(0.22259314, dtype=float32)}


  2%|▏         | 18289/1000000 [50:37<27:27:52,  9.93it/s]

{'loss': Array(0.24946265, dtype=float32), 'loss_cross_entropy': Array(0.22491615, dtype=float32)}


  2%|▏         | 18299/1000000 [50:39<26:17:47, 10.37it/s]

{'loss': Array(0.25398067, dtype=float32), 'loss_cross_entropy': Array(0.2291535, dtype=float32)}


  2%|▏         | 18309/1000000 [50:40<35:38:04,  7.65it/s]

{'loss': Array(0.27402198, dtype=float32), 'loss_cross_entropy': Array(0.24954663, dtype=float32)}


  2%|▏         | 18319/1000000 [50:42<27:17:18,  9.99it/s]

{'loss': Array(0.25976542, dtype=float32), 'loss_cross_entropy': Array(0.23576236, dtype=float32)}


  2%|▏         | 18329/1000000 [50:44<40:44:37,  6.69it/s]

{'loss': Array(0.22465394, dtype=float32), 'loss_cross_entropy': Array(0.20295934, dtype=float32)}


  2%|▏         | 18339/1000000 [50:45<27:20:23,  9.97it/s]

{'loss': Array(0.2531965, dtype=float32), 'loss_cross_entropy': Array(0.22960506, dtype=float32)}


  2%|▏         | 18349/1000000 [50:46<25:23:16, 10.74it/s]

{'loss': Array(0.24417995, dtype=float32), 'loss_cross_entropy': Array(0.22193466, dtype=float32)}


  2%|▏         | 18359/1000000 [50:48<29:10:33,  9.35it/s]

{'loss': Array(0.21986829, dtype=float32), 'loss_cross_entropy': Array(0.19979052, dtype=float32)}


  2%|▏         | 18369/1000000 [50:49<26:34:54, 10.26it/s]

{'loss': Array(0.24399947, dtype=float32), 'loss_cross_entropy': Array(0.22166681, dtype=float32)}


  2%|▏         | 18379/1000000 [50:51<29:01:41,  9.39it/s]

{'loss': Array(0.25178596, dtype=float32), 'loss_cross_entropy': Array(0.22863765, dtype=float32)}


  2%|▏         | 18389/1000000 [50:52<25:51:45, 10.54it/s]

{'loss': Array(0.23249452, dtype=float32), 'loss_cross_entropy': Array(0.2119082, dtype=float32)}


  2%|▏         | 18399/1000000 [50:54<40:06:55,  6.80it/s]

{'loss': Array(0.25733545, dtype=float32), 'loss_cross_entropy': Array(0.23381428, dtype=float32)}


  2%|▏         | 18409/1000000 [50:56<28:17:53,  9.64it/s]

{'loss': Array(0.25196525, dtype=float32), 'loss_cross_entropy': Array(0.22835182, dtype=float32)}


  2%|▏         | 18419/1000000 [50:57<25:45:05, 10.59it/s]

{'loss': Array(0.24715681, dtype=float32), 'loss_cross_entropy': Array(0.2250094, dtype=float32)}


  2%|▏         | 18429/1000000 [50:59<33:30:29,  8.14it/s]

{'loss': Array(0.2505546, dtype=float32), 'loss_cross_entropy': Array(0.22831933, dtype=float32)}


  2%|▏         | 18439/1000000 [51:00<27:18:35,  9.98it/s]

{'loss': Array(0.2503036, dtype=float32), 'loss_cross_entropy': Array(0.22654013, dtype=float32)}


  2%|▏         | 18449/1000000 [51:02<40:04:15,  6.80it/s]

{'loss': Array(0.24946459, dtype=float32), 'loss_cross_entropy': Array(0.22677813, dtype=float32)}


  2%|▏         | 18459/1000000 [51:03<27:36:54,  9.87it/s]

{'loss': Array(0.25188357, dtype=float32), 'loss_cross_entropy': Array(0.2288272, dtype=float32)}


  2%|▏         | 18469/1000000 [51:04<25:30:55, 10.69it/s]

{'loss': Array(0.25621852, dtype=float32), 'loss_cross_entropy': Array(0.23142369, dtype=float32)}


  2%|▏         | 18479/1000000 [51:06<29:48:59,  9.14it/s]

{'loss': Array(0.22679107, dtype=float32), 'loss_cross_entropy': Array(0.2056432, dtype=float32)}


  2%|▏         | 18489/1000000 [51:07<26:12:24, 10.40it/s]

{'loss': Array(0.26110768, dtype=float32), 'loss_cross_entropy': Array(0.23608701, dtype=float32)}


  2%|▏         | 18499/1000000 [51:09<36:02:54,  7.56it/s]

{'loss': Array(0.2570046, dtype=float32), 'loss_cross_entropy': Array(0.23358026, dtype=float32)}


  2%|▏         | 18509/1000000 [51:17<91:31:44,  2.98it/s] 

{'loss': Array(0.23657162, dtype=float32), 'loss_cross_entropy': Array(0.2151544, dtype=float32)}


  2%|▏         | 18519/1000000 [51:19<47:08:10,  5.78it/s] 

{'loss': Array(0.2363539, dtype=float32), 'loss_cross_entropy': Array(0.21454814, dtype=float32)}


  2%|▏         | 18529/1000000 [51:20<28:57:50,  9.41it/s]

{'loss': Array(0.25539938, dtype=float32), 'loss_cross_entropy': Array(0.2309697, dtype=float32)}


  2%|▏         | 18539/1000000 [51:22<25:18:02, 10.78it/s]

{'loss': Array(0.23743115, dtype=float32), 'loss_cross_entropy': Array(0.21569507, dtype=float32)}


  2%|▏         | 18549/1000000 [51:23<33:22:13,  8.17it/s]

{'loss': Array(0.25483716, dtype=float32), 'loss_cross_entropy': Array(0.23114581, dtype=float32)}


  2%|▏         | 18559/1000000 [51:25<26:22:02, 10.34it/s]

{'loss': Array(0.23044053, dtype=float32), 'loss_cross_entropy': Array(0.20840685, dtype=float32)}


  2%|▏         | 18569/1000000 [51:26<25:37:52, 10.64it/s]

{'loss': Array(0.24231589, dtype=float32), 'loss_cross_entropy': Array(0.2198596, dtype=float32)}


  2%|▏         | 18579/1000000 [51:28<29:36:46,  9.21it/s]

{'loss': Array(0.25625777, dtype=float32), 'loss_cross_entropy': Array(0.23398653, dtype=float32)}


  2%|▏         | 18589/1000000 [51:29<25:56:43, 10.51it/s]

{'loss': Array(0.24406624, dtype=float32), 'loss_cross_entropy': Array(0.22143029, dtype=float32)}


  2%|▏         | 18599/1000000 [51:31<32:14:46,  8.45it/s]

{'loss': Array(0.23613255, dtype=float32), 'loss_cross_entropy': Array(0.2148288, dtype=float32)}


  2%|▏         | 18609/1000000 [51:32<25:42:42, 10.60it/s]

{'loss': Array(0.24383734, dtype=float32), 'loss_cross_entropy': Array(0.22072558, dtype=float32)}


  2%|▏         | 18619/1000000 [51:34<35:50:49,  7.60it/s]

{'loss': Array(0.25767446, dtype=float32), 'loss_cross_entropy': Array(0.23428898, dtype=float32)}


  2%|▏         | 18629/1000000 [51:35<26:48:50, 10.17it/s]

{'loss': Array(0.24537449, dtype=float32), 'loss_cross_entropy': Array(0.22256267, dtype=float32)}


  2%|▏         | 18639/1000000 [51:37<25:16:24, 10.79it/s]

{'loss': Array(0.23848832, dtype=float32), 'loss_cross_entropy': Array(0.21554844, dtype=float32)}


  2%|▏         | 18649/1000000 [51:38<33:43:29,  8.08it/s]

{'loss': Array(0.26135325, dtype=float32), 'loss_cross_entropy': Array(0.23741741, dtype=float32)}


  2%|▏         | 18659/1000000 [51:40<26:19:45, 10.35it/s]

{'loss': Array(0.24563503, dtype=float32), 'loss_cross_entropy': Array(0.22367452, dtype=float32)}


  2%|▏         | 18669/1000000 [51:41<25:35:53, 10.65it/s]

{'loss': Array(0.25829074, dtype=float32), 'loss_cross_entropy': Array(0.2353379, dtype=float32)}


  2%|▏         | 18679/1000000 [51:43<30:30:24,  8.94it/s]

{'loss': Array(0.25870225, dtype=float32), 'loss_cross_entropy': Array(0.23587726, dtype=float32)}


  2%|▏         | 18689/1000000 [51:44<26:09:20, 10.42it/s]

{'loss': Array(0.24778764, dtype=float32), 'loss_cross_entropy': Array(0.22584198, dtype=float32)}


  2%|▏         | 18699/1000000 [51:46<32:36:27,  8.36it/s]

{'loss': Array(0.24311452, dtype=float32), 'loss_cross_entropy': Array(0.2196772, dtype=float32)}


  2%|▏         | 18709/1000000 [51:47<27:04:48, 10.07it/s]

{'loss': Array(0.2487192, dtype=float32), 'loss_cross_entropy': Array(0.22596565, dtype=float32)}


  2%|▏         | 18719/1000000 [51:49<40:21:30,  6.75it/s]

{'loss': Array(0.2610323, dtype=float32), 'loss_cross_entropy': Array(0.23840061, dtype=float32)}


  2%|▏         | 18729/1000000 [51:50<27:50:31,  9.79it/s]

{'loss': Array(0.24566887, dtype=float32), 'loss_cross_entropy': Array(0.22321709, dtype=float32)}


  2%|▏         | 18739/1000000 [51:52<25:58:00, 10.50it/s]

{'loss': Array(0.23748803, dtype=float32), 'loss_cross_entropy': Array(0.21626906, dtype=float32)}


  2%|▏         | 18749/1000000 [51:54<36:22:44,  7.49it/s]

{'loss': Array(0.23214546, dtype=float32), 'loss_cross_entropy': Array(0.20916192, dtype=float32)}


  2%|▏         | 18759/1000000 [51:55<26:39:43, 10.22it/s]

{'loss': Array(0.24938217, dtype=float32), 'loss_cross_entropy': Array(0.22809418, dtype=float32)}


  2%|▏         | 18769/1000000 [51:56<25:06:05, 10.86it/s]

{'loss': Array(0.23154376, dtype=float32), 'loss_cross_entropy': Array(0.2111806, dtype=float32)}


  2%|▏         | 18779/1000000 [51:58<30:40:47,  8.88it/s]

{'loss': Array(0.26929244, dtype=float32), 'loss_cross_entropy': Array(0.24472642, dtype=float32)}


  2%|▏         | 18789/1000000 [51:59<26:29:37, 10.29it/s]

{'loss': Array(0.24928437, dtype=float32), 'loss_cross_entropy': Array(0.22572549, dtype=float32)}


  2%|▏         | 18799/1000000 [52:01<33:39:56,  8.10it/s]

{'loss': Array(0.25875664, dtype=float32), 'loss_cross_entropy': Array(0.23510413, dtype=float32)}


  2%|▏         | 18809/1000000 [52:02<27:01:15, 10.09it/s]

{'loss': Array(0.250146, dtype=float32), 'loss_cross_entropy': Array(0.22590719, dtype=float32)}


  2%|▏         | 18819/1000000 [52:04<26:33:15, 10.26it/s]

{'loss': Array(0.23594892, dtype=float32), 'loss_cross_entropy': Array(0.21310256, dtype=float32)}


  2%|▏         | 18829/1000000 [52:06<29:19:14,  9.30it/s]

{'loss': Array(0.23796955, dtype=float32), 'loss_cross_entropy': Array(0.21565747, dtype=float32)}


  2%|▏         | 18839/1000000 [52:07<25:54:51, 10.52it/s]

{'loss': Array(0.26858217, dtype=float32), 'loss_cross_entropy': Array(0.24616726, dtype=float32)}


  2%|▏         | 18849/1000000 [52:09<35:41:58,  7.63it/s]

{'loss': Array(0.2542142, dtype=float32), 'loss_cross_entropy': Array(0.23128799, dtype=float32)}


  2%|▏         | 18859/1000000 [52:10<27:14:05, 10.01it/s]

{'loss': Array(0.22674334, dtype=float32), 'loss_cross_entropy': Array(0.20533784, dtype=float32)}


  2%|▏         | 18869/1000000 [52:11<25:07:02, 10.85it/s]

{'loss': Array(0.25175828, dtype=float32), 'loss_cross_entropy': Array(0.22910793, dtype=float32)}


  2%|▏         | 18879/1000000 [52:13<29:06:54,  9.36it/s]

{'loss': Array(0.25027508, dtype=float32), 'loss_cross_entropy': Array(0.22738545, dtype=float32)}


  2%|▏         | 18889/1000000 [52:15<25:40:56, 10.61it/s]

{'loss': Array(0.2383462, dtype=float32), 'loss_cross_entropy': Array(0.21714787, dtype=float32)}


  2%|▏         | 18899/1000000 [52:16<30:13:56,  9.01it/s]

{'loss': Array(0.26496425, dtype=float32), 'loss_cross_entropy': Array(0.23998426, dtype=float32)}


  2%|▏         | 18909/1000000 [52:18<28:30:47,  9.56it/s]

{'loss': Array(0.2679758, dtype=float32), 'loss_cross_entropy': Array(0.24430206, dtype=float32)}


  2%|▏         | 18919/1000000 [52:19<39:53:49,  6.83it/s]

{'loss': Array(0.25964043, dtype=float32), 'loss_cross_entropy': Array(0.23681472, dtype=float32)}


  2%|▏         | 18929/1000000 [52:21<27:49:38,  9.79it/s]

{'loss': Array(0.24023388, dtype=float32), 'loss_cross_entropy': Array(0.21714802, dtype=float32)}


  2%|▏         | 18939/1000000 [52:22<25:33:30, 10.66it/s]

{'loss': Array(0.23859306, dtype=float32), 'loss_cross_entropy': Array(0.2155369, dtype=float32)}


  2%|▏         | 18949/1000000 [52:24<32:32:43,  8.37it/s]

{'loss': Array(0.237394, dtype=float32), 'loss_cross_entropy': Array(0.21603242, dtype=float32)}


  2%|▏         | 18959/1000000 [52:25<26:06:39, 10.44it/s]

{'loss': Array(0.23725541, dtype=float32), 'loss_cross_entropy': Array(0.21550493, dtype=float32)}


  2%|▏         | 18969/1000000 [52:27<36:08:37,  7.54it/s]

{'loss': Array(0.24142547, dtype=float32), 'loss_cross_entropy': Array(0.21900974, dtype=float32)}


  2%|▏         | 18979/1000000 [52:28<27:10:15, 10.03it/s]

{'loss': Array(0.24000528, dtype=float32), 'loss_cross_entropy': Array(0.21881028, dtype=float32)}


  2%|▏         | 18989/1000000 [52:30<35:27:43,  7.68it/s]

{'loss': Array(0.2634379, dtype=float32), 'loss_cross_entropy': Array(0.23781753, dtype=float32)}


  2%|▏         | 18999/1000000 [52:31<26:38:38, 10.23it/s]

{'loss': Array(0.23780067, dtype=float32), 'loss_cross_entropy': Array(0.21588507, dtype=float32)}


  2%|▏         | 19009/1000000 [52:39<89:58:26,  3.03it/s] 

{'loss': Array(0.2331837, dtype=float32), 'loss_cross_entropy': Array(0.21169238, dtype=float32)}


  2%|▏         | 19019/1000000 [52:41<41:01:56,  6.64it/s]

{'loss': Array(0.2604456, dtype=float32), 'loss_cross_entropy': Array(0.23785892, dtype=float32)}


  2%|▏         | 19029/1000000 [52:42<27:43:18,  9.83it/s]

{'loss': Array(0.2578574, dtype=float32), 'loss_cross_entropy': Array(0.23508187, dtype=float32)}


  2%|▏         | 19039/1000000 [52:44<36:34:35,  7.45it/s]

{'loss': Array(0.25316986, dtype=float32), 'loss_cross_entropy': Array(0.23054528, dtype=float32)}


  2%|▏         | 19049/1000000 [52:45<27:39:17,  9.85it/s]

{'loss': Array(0.25716364, dtype=float32), 'loss_cross_entropy': Array(0.23521753, dtype=float32)}


  2%|▏         | 19059/1000000 [52:47<39:55:59,  6.82it/s]

{'loss': Array(0.25619888, dtype=float32), 'loss_cross_entropy': Array(0.23409398, dtype=float32)}


  2%|▏         | 19069/1000000 [52:48<28:20:24,  9.61it/s]

{'loss': Array(0.22852981, dtype=float32), 'loss_cross_entropy': Array(0.20832483, dtype=float32)}


  2%|▏         | 19079/1000000 [52:50<26:05:13, 10.44it/s]

{'loss': Array(0.24277098, dtype=float32), 'loss_cross_entropy': Array(0.22157125, dtype=float32)}


  2%|▏         | 19089/1000000 [52:51<32:27:38,  8.39it/s]

{'loss': Array(0.23572695, dtype=float32), 'loss_cross_entropy': Array(0.21469453, dtype=float32)}


  2%|▏         | 19099/1000000 [52:53<27:10:53, 10.02it/s]

{'loss': Array(0.25498697, dtype=float32), 'loss_cross_entropy': Array(0.23177965, dtype=float32)}


  2%|▏         | 19109/1000000 [52:55<36:02:48,  7.56it/s]

{'loss': Array(0.23924641, dtype=float32), 'loss_cross_entropy': Array(0.21754391, dtype=float32)}


  2%|▏         | 19119/1000000 [52:56<27:15:28, 10.00it/s]

{'loss': Array(0.243257, dtype=float32), 'loss_cross_entropy': Array(0.2215842, dtype=float32)}


  2%|▏         | 19129/1000000 [52:57<25:52:56, 10.53it/s]

{'loss': Array(0.23405221, dtype=float32), 'loss_cross_entropy': Array(0.21309622, dtype=float32)}


  2%|▏         | 19139/1000000 [52:59<29:15:10,  9.31it/s]

{'loss': Array(0.22583638, dtype=float32), 'loss_cross_entropy': Array(0.2037627, dtype=float32)}


  2%|▏         | 19149/1000000 [53:01<26:26:28, 10.30it/s]

{'loss': Array(0.2308011, dtype=float32), 'loss_cross_entropy': Array(0.20923625, dtype=float32)}


  2%|▏         | 19159/1000000 [53:02<40:34:44,  6.71it/s]

{'loss': Array(0.2629532, dtype=float32), 'loss_cross_entropy': Array(0.23947442, dtype=float32)}


  2%|▏         | 19169/1000000 [53:04<27:54:33,  9.76it/s]

{'loss': Array(0.2606479, dtype=float32), 'loss_cross_entropy': Array(0.23682311, dtype=float32)}


  2%|▏         | 19179/1000000 [53:05<25:23:03, 10.73it/s]

{'loss': Array(0.2499123, dtype=float32), 'loss_cross_entropy': Array(0.22653368, dtype=float32)}


  2%|▏         | 19189/1000000 [53:07<31:58:10,  8.52it/s]

{'loss': Array(0.25360724, dtype=float32), 'loss_cross_entropy': Array(0.23078318, dtype=float32)}


  2%|▏         | 19199/1000000 [53:08<26:40:11, 10.22it/s]

{'loss': Array(0.24141975, dtype=float32), 'loss_cross_entropy': Array(0.21954577, dtype=float32)}


  2%|▏         | 19209/1000000 [53:10<35:13:37,  7.73it/s]

{'loss': Array(0.23160516, dtype=float32), 'loss_cross_entropy': Array(0.20966898, dtype=float32)}


  2%|▏         | 19219/1000000 [53:11<26:32:52, 10.26it/s]

{'loss': Array(0.24539669, dtype=float32), 'loss_cross_entropy': Array(0.22241543, dtype=float32)}


  2%|▏         | 19229/1000000 [53:13<36:55:42,  7.38it/s]

{'loss': Array(0.2619573, dtype=float32), 'loss_cross_entropy': Array(0.2392466, dtype=float32)}


  2%|▏         | 19239/1000000 [53:14<26:54:45, 10.12it/s]

{'loss': Array(0.23481576, dtype=float32), 'loss_cross_entropy': Array(0.21331377, dtype=float32)}


  2%|▏         | 19249/1000000 [53:15<25:18:55, 10.76it/s]

{'loss': Array(0.24795082, dtype=float32), 'loss_cross_entropy': Array(0.22573908, dtype=float32)}


  2%|▏         | 19259/1000000 [53:17<29:47:08,  9.15it/s]

{'loss': Array(0.25799227, dtype=float32), 'loss_cross_entropy': Array(0.23477378, dtype=float32)}


  2%|▏         | 19269/1000000 [53:19<26:24:24, 10.32it/s]

{'loss': Array(0.25539204, dtype=float32), 'loss_cross_entropy': Array(0.23270622, dtype=float32)}


  2%|▏         | 19279/1000000 [53:20<32:11:08,  8.46it/s]

{'loss': Array(0.25202018, dtype=float32), 'loss_cross_entropy': Array(0.22888322, dtype=float32)}


  2%|▏         | 19289/1000000 [53:22<26:07:15, 10.43it/s]

{'loss': Array(0.25435993, dtype=float32), 'loss_cross_entropy': Array(0.22931552, dtype=float32)}


  2%|▏         | 19299/1000000 [53:23<40:46:50,  6.68it/s]

{'loss': Array(0.24489725, dtype=float32), 'loss_cross_entropy': Array(0.22312157, dtype=float32)}


  2%|▏         | 19309/1000000 [53:25<27:29:39,  9.91it/s]

{'loss': Array(0.23721401, dtype=float32), 'loss_cross_entropy': Array(0.21536127, dtype=float32)}


  2%|▏         | 19319/1000000 [53:26<40:26:02,  6.74it/s]

{'loss': Array(0.2340822, dtype=float32), 'loss_cross_entropy': Array(0.21177232, dtype=float32)}


  2%|▏         | 19329/1000000 [53:28<29:04:48,  9.37it/s]

{'loss': Array(0.27066258, dtype=float32), 'loss_cross_entropy': Array(0.2461276, dtype=float32)}


  2%|▏         | 19339/1000000 [53:29<26:07:49, 10.42it/s]

{'loss': Array(0.2636492, dtype=float32), 'loss_cross_entropy': Array(0.23937738, dtype=float32)}


  2%|▏         | 19349/1000000 [53:31<35:09:52,  7.75it/s]

{'loss': Array(0.25169212, dtype=float32), 'loss_cross_entropy': Array(0.22935402, dtype=float32)}


  2%|▏         | 19359/1000000 [53:32<26:32:14, 10.26it/s]

{'loss': Array(0.24895845, dtype=float32), 'loss_cross_entropy': Array(0.22507238, dtype=float32)}


  2%|▏         | 19369/1000000 [53:34<40:12:53,  6.77it/s]

{'loss': Array(0.23859628, dtype=float32), 'loss_cross_entropy': Array(0.21691366, dtype=float32)}


  2%|▏         | 19379/1000000 [53:35<27:45:30,  9.81it/s]

{'loss': Array(0.23930661, dtype=float32), 'loss_cross_entropy': Array(0.21781091, dtype=float32)}


  2%|▏         | 19389/1000000 [53:37<25:13:03, 10.80it/s]

{'loss': Array(0.23207417, dtype=float32), 'loss_cross_entropy': Array(0.21100211, dtype=float32)}


  2%|▏         | 19399/1000000 [53:38<29:06:34,  9.36it/s]

{'loss': Array(0.27951154, dtype=float32), 'loss_cross_entropy': Array(0.2556314, dtype=float32)}


  2%|▏         | 19409/1000000 [53:40<25:26:40, 10.71it/s]

{'loss': Array(0.24329722, dtype=float32), 'loss_cross_entropy': Array(0.22249739, dtype=float32)}


  2%|▏         | 19419/1000000 [53:41<29:18:04,  9.30it/s]

{'loss': Array(0.25853127, dtype=float32), 'loss_cross_entropy': Array(0.23570283, dtype=float32)}


  2%|▏         | 19429/1000000 [53:43<26:57:07, 10.11it/s]

{'loss': Array(0.23828068, dtype=float32), 'loss_cross_entropy': Array(0.21569672, dtype=float32)}


  2%|▏         | 19439/1000000 [53:45<39:48:39,  6.84it/s]

{'loss': Array(0.26796383, dtype=float32), 'loss_cross_entropy': Array(0.24304835, dtype=float32)}


  2%|▏         | 19449/1000000 [53:46<27:54:18,  9.76it/s]

{'loss': Array(0.24568231, dtype=float32), 'loss_cross_entropy': Array(0.22267304, dtype=float32)}


  2%|▏         | 19459/1000000 [53:47<25:10:26, 10.82it/s]

{'loss': Array(0.23895161, dtype=float32), 'loss_cross_entropy': Array(0.21896146, dtype=float32)}


  2%|▏         | 19469/1000000 [53:49<32:56:15,  8.27it/s]

{'loss': Array(0.25758192, dtype=float32), 'loss_cross_entropy': Array(0.23520921, dtype=float32)}


  2%|▏         | 19479/1000000 [53:50<26:25:21, 10.31it/s]

{'loss': Array(0.22826894, dtype=float32), 'loss_cross_entropy': Array(0.20695964, dtype=float32)}


  2%|▏         | 19489/1000000 [53:52<39:48:30,  6.84it/s]

{'loss': Array(0.2427243, dtype=float32), 'loss_cross_entropy': Array(0.22094047, dtype=float32)}


  2%|▏         | 19499/1000000 [53:53<28:08:15,  9.68it/s]

{'loss': Array(0.25243446, dtype=float32), 'loss_cross_entropy': Array(0.22965716, dtype=float32)}


  2%|▏         | 19509/1000000 [54:01<88:36:04,  3.07it/s] 

{'loss': Array(0.22335286, dtype=float32), 'loss_cross_entropy': Array(0.20237266, dtype=float32)}


  2%|▏         | 19519/1000000 [54:03<41:14:10,  6.60it/s]

{'loss': Array(0.24175946, dtype=float32), 'loss_cross_entropy': Array(0.22066317, dtype=float32)}


  2%|▏         | 19529/1000000 [54:04<27:26:40,  9.92it/s]

{'loss': Array(0.2545061, dtype=float32), 'loss_cross_entropy': Array(0.23272197, dtype=float32)}


  2%|▏         | 19539/1000000 [54:06<32:30:38,  8.38it/s]

{'loss': Array(0.23785332, dtype=float32), 'loss_cross_entropy': Array(0.2168345, dtype=float32)}


  2%|▏         | 19549/1000000 [54:07<26:17:00, 10.36it/s]

{'loss': Array(0.23105894, dtype=float32), 'loss_cross_entropy': Array(0.20954596, dtype=float32)}


  2%|▏         | 19559/1000000 [54:09<41:00:12,  6.64it/s]

{'loss': Array(0.23373766, dtype=float32), 'loss_cross_entropy': Array(0.21258798, dtype=float32)}


  2%|▏         | 19569/1000000 [54:10<28:22:22,  9.60it/s]

{'loss': Array(0.26329467, dtype=float32), 'loss_cross_entropy': Array(0.24003628, dtype=float32)}


  2%|▏         | 19579/1000000 [54:12<26:16:30, 10.36it/s]

{'loss': Array(0.2566144, dtype=float32), 'loss_cross_entropy': Array(0.2348119, dtype=float32)}


  2%|▏         | 19589/1000000 [54:14<29:50:34,  9.13it/s]

{'loss': Array(0.23487692, dtype=float32), 'loss_cross_entropy': Array(0.21407925, dtype=float32)}


  2%|▏         | 19599/1000000 [54:15<26:24:15, 10.31it/s]

{'loss': Array(0.236751, dtype=float32), 'loss_cross_entropy': Array(0.21562329, dtype=float32)}


  2%|▏         | 19609/1000000 [54:17<40:24:22,  6.74it/s]

{'loss': Array(0.22446238, dtype=float32), 'loss_cross_entropy': Array(0.20474859, dtype=float32)}


  2%|▏         | 19619/1000000 [54:18<28:36:58,  9.52it/s]

{'loss': Array(0.271743, dtype=float32), 'loss_cross_entropy': Array(0.2484254, dtype=float32)}


  2%|▏         | 19629/1000000 [54:20<26:08:31, 10.42it/s]

{'loss': Array(0.24042134, dtype=float32), 'loss_cross_entropy': Array(0.21849231, dtype=float32)}


  2%|▏         | 19639/1000000 [54:21<30:18:49,  8.98it/s]

{'loss': Array(0.2314409, dtype=float32), 'loss_cross_entropy': Array(0.2090478, dtype=float32)}


  2%|▏         | 19649/1000000 [54:23<27:38:31,  9.85it/s]

{'loss': Array(0.25399274, dtype=float32), 'loss_cross_entropy': Array(0.23236728, dtype=float32)}


  2%|▏         | 19659/1000000 [54:24<35:45:17,  7.62it/s]

{'loss': Array(0.23815298, dtype=float32), 'loss_cross_entropy': Array(0.21568911, dtype=float32)}


  2%|▏         | 19669/1000000 [54:26<27:01:27, 10.08it/s]

{'loss': Array(0.23559022, dtype=float32), 'loss_cross_entropy': Array(0.21385537, dtype=float32)}


  2%|▏         | 19679/1000000 [54:28<41:49:18,  6.51it/s]

{'loss': Array(0.24679747, dtype=float32), 'loss_cross_entropy': Array(0.22486678, dtype=float32)}


  2%|▏         | 19689/1000000 [54:29<28:08:48,  9.67it/s]

{'loss': Array(0.23946169, dtype=float32), 'loss_cross_entropy': Array(0.21776406, dtype=float32)}


  2%|▏         | 19699/1000000 [54:30<25:30:48, 10.67it/s]

{'loss': Array(0.23416524, dtype=float32), 'loss_cross_entropy': Array(0.21276955, dtype=float32)}


  2%|▏         | 19709/1000000 [54:32<32:07:26,  8.48it/s]

{'loss': Array(0.23277624, dtype=float32), 'loss_cross_entropy': Array(0.21178015, dtype=float32)}


  2%|▏         | 19719/1000000 [54:33<27:02:33, 10.07it/s]

{'loss': Array(0.2473859, dtype=float32), 'loss_cross_entropy': Array(0.22554289, dtype=float32)}


  2%|▏         | 19729/1000000 [54:35<36:12:43,  7.52it/s]

{'loss': Array(0.23211245, dtype=float32), 'loss_cross_entropy': Array(0.21020067, dtype=float32)}


  2%|▏         | 19739/1000000 [54:36<27:14:50,  9.99it/s]

{'loss': Array(0.25263494, dtype=float32), 'loss_cross_entropy': Array(0.23102903, dtype=float32)}


  2%|▏         | 19749/1000000 [54:38<26:57:58, 10.10it/s]

{'loss': Array(0.23448706, dtype=float32), 'loss_cross_entropy': Array(0.21344881, dtype=float32)}


  2%|▏         | 19759/1000000 [54:40<28:59:59,  9.39it/s]

{'loss': Array(0.24621148, dtype=float32), 'loss_cross_entropy': Array(0.223732, dtype=float32)}


  2%|▏         | 19769/1000000 [54:41<25:39:00, 10.62it/s]

{'loss': Array(0.26202294, dtype=float32), 'loss_cross_entropy': Array(0.24015184, dtype=float32)}


  2%|▏         | 19779/1000000 [54:43<40:12:59,  6.77it/s]

{'loss': Array(0.26538178, dtype=float32), 'loss_cross_entropy': Array(0.24243234, dtype=float32)}


  2%|▏         | 19789/1000000 [54:44<27:21:24,  9.95it/s]

{'loss': Array(0.25066015, dtype=float32), 'loss_cross_entropy': Array(0.2286191, dtype=float32)}


  2%|▏         | 19799/1000000 [54:45<25:22:41, 10.73it/s]

{'loss': Array(0.23592308, dtype=float32), 'loss_cross_entropy': Array(0.21467026, dtype=float32)}


  2%|▏         | 19809/1000000 [54:47<31:43:09,  8.58it/s]

{'loss': Array(0.26280904, dtype=float32), 'loss_cross_entropy': Array(0.23869348, dtype=float32)}


  2%|▏         | 19819/1000000 [54:48<26:42:36, 10.19it/s]

{'loss': Array(0.258829, dtype=float32), 'loss_cross_entropy': Array(0.23552752, dtype=float32)}


  2%|▏         | 19829/1000000 [54:50<36:19:05,  7.50it/s]

{'loss': Array(0.22421125, dtype=float32), 'loss_cross_entropy': Array(0.20338014, dtype=float32)}


  2%|▏         | 19839/1000000 [54:52<27:34:55,  9.87it/s]

{'loss': Array(0.25173995, dtype=float32), 'loss_cross_entropy': Array(0.22905378, dtype=float32)}


  2%|▏         | 19849/1000000 [54:53<36:19:22,  7.50it/s]

{'loss': Array(0.23616345, dtype=float32), 'loss_cross_entropy': Array(0.21437612, dtype=float32)}


  2%|▏         | 19859/1000000 [54:55<27:00:19, 10.08it/s]

{'loss': Array(0.24336687, dtype=float32), 'loss_cross_entropy': Array(0.22107923, dtype=float32)}


  2%|▏         | 19869/1000000 [54:56<25:55:47, 10.50it/s]

{'loss': Array(0.23607306, dtype=float32), 'loss_cross_entropy': Array(0.2132992, dtype=float32)}


  2%|▏         | 19879/1000000 [54:58<31:58:41,  8.51it/s]

{'loss': Array(0.22676472, dtype=float32), 'loss_cross_entropy': Array(0.20701467, dtype=float32)}


  2%|▏         | 19889/1000000 [54:59<26:40:20, 10.21it/s]

{'loss': Array(0.24912432, dtype=float32), 'loss_cross_entropy': Array(0.22664364, dtype=float32)}


  2%|▏         | 19899/1000000 [55:01<33:01:08,  8.25it/s]

{'loss': Array(0.25961694, dtype=float32), 'loss_cross_entropy': Array(0.23771296, dtype=float32)}


  2%|▏         | 19909/1000000 [55:02<26:31:28, 10.26it/s]

{'loss': Array(0.24912369, dtype=float32), 'loss_cross_entropy': Array(0.22639203, dtype=float32)}


  2%|▏         | 19919/1000000 [55:04<40:27:46,  6.73it/s]

{'loss': Array(0.26287803, dtype=float32), 'loss_cross_entropy': Array(0.23953979, dtype=float32)}


  2%|▏         | 19929/1000000 [55:05<28:11:34,  9.66it/s]

{'loss': Array(0.23261516, dtype=float32), 'loss_cross_entropy': Array(0.21308453, dtype=float32)}


  2%|▏         | 19939/1000000 [55:07<40:40:37,  6.69it/s]

{'loss': Array(0.24594441, dtype=float32), 'loss_cross_entropy': Array(0.22403054, dtype=float32)}


  2%|▏         | 19949/1000000 [55:09<28:04:10,  9.70it/s]

{'loss': Array(0.22412066, dtype=float32), 'loss_cross_entropy': Array(0.20459555, dtype=float32)}


  2%|▏         | 19959/1000000 [55:10<25:39:56, 10.61it/s]

{'loss': Array(0.25446212, dtype=float32), 'loss_cross_entropy': Array(0.23119164, dtype=float32)}


  2%|▏         | 19969/1000000 [55:12<35:05:40,  7.76it/s]

{'loss': Array(0.23350175, dtype=float32), 'loss_cross_entropy': Array(0.21233116, dtype=float32)}


  2%|▏         | 19979/1000000 [55:13<27:32:33,  9.88it/s]

{'loss': Array(0.22446986, dtype=float32), 'loss_cross_entropy': Array(0.20359936, dtype=float32)}


  2%|▏         | 19989/1000000 [55:15<40:27:55,  6.73it/s]

{'loss': Array(0.25630742, dtype=float32), 'loss_cross_entropy': Array(0.23474173, dtype=float32)}


  2%|▏         | 19999/1000000 [55:16<28:09:34,  9.67it/s]

{'loss': Array(0.2454965, dtype=float32), 'loss_cross_entropy': Array(0.22396548, dtype=float32)}


  2%|▏         | 20009/1000000 [55:24<89:34:40,  3.04it/s] 

{'loss': Array(0.25053182, dtype=float32), 'loss_cross_entropy': Array(0.2285091, dtype=float32)}


  2%|▏         | 20019/1000000 [55:26<39:36:39,  6.87it/s] 

{'loss': Array(0.22866297, dtype=float32), 'loss_cross_entropy': Array(0.20862226, dtype=float32)}


  2%|▏         | 20029/1000000 [55:27<27:36:55,  9.86it/s]

{'loss': Array(0.23094046, dtype=float32), 'loss_cross_entropy': Array(0.21000384, dtype=float32)}


  2%|▏         | 20039/1000000 [55:29<29:24:13,  9.26it/s]

{'loss': Array(0.24353051, dtype=float32), 'loss_cross_entropy': Array(0.22109072, dtype=float32)}


  2%|▏         | 20049/1000000 [55:30<25:42:59, 10.58it/s]

{'loss': Array(0.21690972, dtype=float32), 'loss_cross_entropy': Array(0.1970675, dtype=float32)}


  2%|▏         | 20059/1000000 [55:32<39:50:22,  6.83it/s]

{'loss': Array(0.21850987, dtype=float32), 'loss_cross_entropy': Array(0.19827764, dtype=float32)}


  2%|▏         | 20069/1000000 [55:33<28:04:11,  9.70it/s]

{'loss': Array(0.2559966, dtype=float32), 'loss_cross_entropy': Array(0.23217261, dtype=float32)}


  2%|▏         | 20079/1000000 [55:35<25:11:05, 10.81it/s]

{'loss': Array(0.2414162, dtype=float32), 'loss_cross_entropy': Array(0.2199317, dtype=float32)}


  2%|▏         | 20089/1000000 [55:36<32:03:20,  8.49it/s]

{'loss': Array(0.25171158, dtype=float32), 'loss_cross_entropy': Array(0.22839704, dtype=float32)}


  2%|▏         | 20099/1000000 [55:38<28:44:35,  9.47it/s]

{'loss': Array(0.2590049, dtype=float32), 'loss_cross_entropy': Array(0.23529, dtype=float32)}


  2%|▏         | 20109/1000000 [55:39<40:17:01,  6.76it/s]

{'loss': Array(0.21896194, dtype=float32), 'loss_cross_entropy': Array(0.19885312, dtype=float32)}


  2%|▏         | 20119/1000000 [55:41<27:52:01,  9.77it/s]

{'loss': Array(0.24268666, dtype=float32), 'loss_cross_entropy': Array(0.22148517, dtype=float32)}


  2%|▏         | 20129/1000000 [55:42<25:49:50, 10.54it/s]

{'loss': Array(0.23980783, dtype=float32), 'loss_cross_entropy': Array(0.21785855, dtype=float32)}


  2%|▏         | 20139/1000000 [55:44<30:22:02,  8.96it/s]

{'loss': Array(0.23878089, dtype=float32), 'loss_cross_entropy': Array(0.21636264, dtype=float32)}


  2%|▏         | 20149/1000000 [55:45<26:29:58, 10.27it/s]

{'loss': Array(0.23895085, dtype=float32), 'loss_cross_entropy': Array(0.21726564, dtype=float32)}


  2%|▏         | 20159/1000000 [55:47<35:50:48,  7.59it/s]

{'loss': Array(0.24945457, dtype=float32), 'loss_cross_entropy': Array(0.22695251, dtype=float32)}


  2%|▏         | 20169/1000000 [55:48<27:28:45,  9.90it/s]

{'loss': Array(0.23568682, dtype=float32), 'loss_cross_entropy': Array(0.21516521, dtype=float32)}


  2%|▏         | 20179/1000000 [55:50<25:24:29, 10.71it/s]

{'loss': Array(0.23569448, dtype=float32), 'loss_cross_entropy': Array(0.21577954, dtype=float32)}


  2%|▏         | 20189/1000000 [55:51<32:56:56,  8.26it/s]

{'loss': Array(0.23510277, dtype=float32), 'loss_cross_entropy': Array(0.21408394, dtype=float32)}


  2%|▏         | 20199/1000000 [55:53<27:27:25,  9.91it/s]

{'loss': Array(0.25365892, dtype=float32), 'loss_cross_entropy': Array(0.23105441, dtype=float32)}


  2%|▏         | 20209/1000000 [55:54<25:56:54, 10.49it/s]

{'loss': Array(0.21424699, dtype=float32), 'loss_cross_entropy': Array(0.19456108, dtype=float32)}


  2%|▏         | 20219/1000000 [55:56<30:28:02,  8.93it/s]

{'loss': Array(0.24358442, dtype=float32), 'loss_cross_entropy': Array(0.22145624, dtype=float32)}


  2%|▏         | 20229/1000000 [55:57<26:04:57, 10.43it/s]

{'loss': Array(0.23325804, dtype=float32), 'loss_cross_entropy': Array(0.2121733, dtype=float32)}


  2%|▏         | 20239/1000000 [55:59<36:28:15,  7.46it/s]

{'loss': Array(0.23395684, dtype=float32), 'loss_cross_entropy': Array(0.21486369, dtype=float32)}


  2%|▏         | 20249/1000000 [56:00<27:37:23,  9.85it/s]

{'loss': Array(0.23011088, dtype=float32), 'loss_cross_entropy': Array(0.20808756, dtype=float32)}


  2%|▏         | 20259/1000000 [56:02<40:08:34,  6.78it/s]

{'loss': Array(0.22971848, dtype=float32), 'loss_cross_entropy': Array(0.20869985, dtype=float32)}


  2%|▏         | 20269/1000000 [56:04<28:03:37,  9.70it/s]

{'loss': Array(0.21380661, dtype=float32), 'loss_cross_entropy': Array(0.1934109, dtype=float32)}


  2%|▏         | 20279/1000000 [56:05<25:11:42, 10.80it/s]

{'loss': Array(0.24165125, dtype=float32), 'loss_cross_entropy': Array(0.220492, dtype=float32)}


  2%|▏         | 20289/1000000 [56:07<28:16:38,  9.62it/s]

{'loss': Array(0.2182899, dtype=float32), 'loss_cross_entropy': Array(0.19838686, dtype=float32)}


  2%|▏         | 20299/1000000 [56:08<26:24:56, 10.30it/s]

{'loss': Array(0.21471377, dtype=float32), 'loss_cross_entropy': Array(0.19353957, dtype=float32)}


  2%|▏         | 20309/1000000 [56:10<40:09:38,  6.78it/s]

{'loss': Array(0.24496377, dtype=float32), 'loss_cross_entropy': Array(0.22280143, dtype=float32)}


  2%|▏         | 20319/1000000 [56:11<27:15:44,  9.98it/s]

{'loss': Array(0.22515285, dtype=float32), 'loss_cross_entropy': Array(0.20454493, dtype=float32)}


  2%|▏         | 20329/1000000 [56:12<25:28:11, 10.68it/s]

{'loss': Array(0.21297395, dtype=float32), 'loss_cross_entropy': Array(0.19357303, dtype=float32)}


  2%|▏         | 20339/1000000 [56:14<30:07:11,  9.03it/s]

{'loss': Array(0.226836, dtype=float32), 'loss_cross_entropy': Array(0.20586038, dtype=float32)}


  2%|▏         | 20349/1000000 [56:15<26:39:51, 10.21it/s]

{'loss': Array(0.25341782, dtype=float32), 'loss_cross_entropy': Array(0.23012538, dtype=float32)}


  2%|▏         | 20359/1000000 [56:17<35:24:22,  7.69it/s]

{'loss': Array(0.23943375, dtype=float32), 'loss_cross_entropy': Array(0.21883176, dtype=float32)}


  2%|▏         | 20369/1000000 [56:19<27:42:08,  9.82it/s]

{'loss': Array(0.2313355, dtype=float32), 'loss_cross_entropy': Array(0.21019666, dtype=float32)}


  2%|▏         | 20379/1000000 [56:20<39:44:05,  6.85it/s]

{'loss': Array(0.25599042, dtype=float32), 'loss_cross_entropy': Array(0.23249017, dtype=float32)}


  2%|▏         | 20389/1000000 [56:22<27:23:56,  9.93it/s]

{'loss': Array(0.23859218, dtype=float32), 'loss_cross_entropy': Array(0.21686478, dtype=float32)}


  2%|▏         | 20399/1000000 [56:23<26:13:21, 10.38it/s]

{'loss': Array(0.2196627, dtype=float32), 'loss_cross_entropy': Array(0.1996159, dtype=float32)}


  2%|▏         | 20409/1000000 [56:25<32:15:02,  8.44it/s]

{'loss': Array(0.21550496, dtype=float32), 'loss_cross_entropy': Array(0.19546013, dtype=float32)}


  2%|▏         | 20419/1000000 [56:26<27:05:58, 10.04it/s]

{'loss': Array(0.2570147, dtype=float32), 'loss_cross_entropy': Array(0.23589404, dtype=float32)}


  2%|▏         | 20429/1000000 [56:28<36:26:52,  7.47it/s]

{'loss': Array(0.24108592, dtype=float32), 'loss_cross_entropy': Array(0.21919, dtype=float32)}


  2%|▏         | 20439/1000000 [56:29<26:44:05, 10.18it/s]

{'loss': Array(0.23352554, dtype=float32), 'loss_cross_entropy': Array(0.21264385, dtype=float32)}


  2%|▏         | 20449/1000000 [56:31<25:31:28, 10.66it/s]

{'loss': Array(0.24180625, dtype=float32), 'loss_cross_entropy': Array(0.21965754, dtype=float32)}


  2%|▏         | 20459/1000000 [56:32<28:36:33,  9.51it/s]

{'loss': Array(0.25225756, dtype=float32), 'loss_cross_entropy': Array(0.2300664, dtype=float32)}


  2%|▏         | 20469/1000000 [56:34<26:11:08, 10.39it/s]

{'loss': Array(0.2439302, dtype=float32), 'loss_cross_entropy': Array(0.22158454, dtype=float32)}


  2%|▏         | 20479/1000000 [56:35<40:05:28,  6.79it/s]

{'loss': Array(0.21735941, dtype=float32), 'loss_cross_entropy': Array(0.1970865, dtype=float32)}


  2%|▏         | 20489/1000000 [56:37<28:02:03,  9.71it/s]

{'loss': Array(0.2293564, dtype=float32), 'loss_cross_entropy': Array(0.20718573, dtype=float32)}


  2%|▏         | 20499/1000000 [56:38<26:13:52, 10.37it/s]

{'loss': Array(0.24208649, dtype=float32), 'loss_cross_entropy': Array(0.22134562, dtype=float32)}


  2%|▏         | 20509/1000000 [56:46<94:37:24,  2.88it/s] 

{'loss': Array(0.21710041, dtype=float32), 'loss_cross_entropy': Array(0.19816618, dtype=float32)}


  2%|▏         | 20519/1000000 [56:48<38:57:09,  6.98it/s] 

{'loss': Array(0.22645965, dtype=float32), 'loss_cross_entropy': Array(0.20538543, dtype=float32)}


  2%|▏         | 20529/1000000 [56:49<37:57:30,  7.17it/s]

{'loss': Array(0.2296335, dtype=float32), 'loss_cross_entropy': Array(0.2093843, dtype=float32)}


  2%|▏         | 20539/1000000 [56:51<27:34:46,  9.86it/s]

{'loss': Array(0.24052171, dtype=float32), 'loss_cross_entropy': Array(0.21887569, dtype=float32)}


  2%|▏         | 20549/1000000 [56:52<35:53:42,  7.58it/s]

{'loss': Array(0.2331388, dtype=float32), 'loss_cross_entropy': Array(0.2125409, dtype=float32)}


  2%|▏         | 20559/1000000 [56:54<27:29:51,  9.89it/s]

{'loss': Array(0.2300794, dtype=float32), 'loss_cross_entropy': Array(0.2102118, dtype=float32)}


  2%|▏         | 20569/1000000 [56:55<25:49:48, 10.53it/s]

{'loss': Array(0.22962986, dtype=float32), 'loss_cross_entropy': Array(0.20908406, dtype=float32)}


  2%|▏         | 20579/1000000 [56:57<30:42:30,  8.86it/s]

{'loss': Array(0.2453057, dtype=float32), 'loss_cross_entropy': Array(0.22282155, dtype=float32)}


  2%|▏         | 20589/1000000 [56:58<26:29:33, 10.27it/s]

{'loss': Array(0.22325365, dtype=float32), 'loss_cross_entropy': Array(0.20318227, dtype=float32)}


  2%|▏         | 20599/1000000 [57:00<33:05:42,  8.22it/s]

{'loss': Array(0.22542317, dtype=float32), 'loss_cross_entropy': Array(0.20465545, dtype=float32)}


  2%|▏         | 20609/1000000 [57:01<26:55:07, 10.11it/s]

{'loss': Array(0.25739303, dtype=float32), 'loss_cross_entropy': Array(0.2342565, dtype=float32)}


  2%|▏         | 20619/1000000 [57:03<40:52:42,  6.66it/s]

{'loss': Array(0.25179353, dtype=float32), 'loss_cross_entropy': Array(0.22982176, dtype=float32)}


  2%|▏         | 20629/1000000 [57:05<27:55:26,  9.74it/s]

{'loss': Array(0.23742747, dtype=float32), 'loss_cross_entropy': Array(0.21548258, dtype=float32)}


  2%|▏         | 20639/1000000 [57:06<40:30:15,  6.72it/s]

{'loss': Array(0.23070374, dtype=float32), 'loss_cross_entropy': Array(0.21021596, dtype=float32)}


  2%|▏         | 20649/1000000 [57:08<31:02:48,  8.76it/s]

{'loss': Array(0.23056248, dtype=float32), 'loss_cross_entropy': Array(0.20923741, dtype=float32)}


  2%|▏         | 20659/1000000 [57:09<26:16:24, 10.35it/s]

{'loss': Array(0.22268562, dtype=float32), 'loss_cross_entropy': Array(0.2018644, dtype=float32)}


  2%|▏         | 20669/1000000 [57:11<35:23:00,  7.69it/s]

{'loss': Array(0.28738692, dtype=float32), 'loss_cross_entropy': Array(0.26301453, dtype=float32)}


  2%|▏         | 20679/1000000 [57:12<27:04:22, 10.05it/s]

{'loss': Array(0.2512212, dtype=float32), 'loss_cross_entropy': Array(0.22915815, dtype=float32)}


  2%|▏         | 20689/1000000 [57:14<40:24:35,  6.73it/s]

{'loss': Array(0.22202955, dtype=float32), 'loss_cross_entropy': Array(0.20217495, dtype=float32)}


  2%|▏         | 20699/1000000 [57:15<27:38:05,  9.84it/s]

{'loss': Array(0.24008267, dtype=float32), 'loss_cross_entropy': Array(0.21804658, dtype=float32)}


  2%|▏         | 20709/1000000 [57:17<25:55:41, 10.49it/s]

{'loss': Array(0.24549715, dtype=float32), 'loss_cross_entropy': Array(0.223576, dtype=float32)}


  2%|▏         | 20719/1000000 [57:18<29:30:52,  9.22it/s]

{'loss': Array(0.24939454, dtype=float32), 'loss_cross_entropy': Array(0.22745954, dtype=float32)}


  2%|▏         | 20729/1000000 [57:20<26:03:29, 10.44it/s]

{'loss': Array(0.23373131, dtype=float32), 'loss_cross_entropy': Array(0.21273385, dtype=float32)}


  2%|▏         | 20739/1000000 [57:21<28:39:50,  9.49it/s]

{'loss': Array(0.24558167, dtype=float32), 'loss_cross_entropy': Array(0.22433491, dtype=float32)}


  2%|▏         | 20749/1000000 [57:23<26:49:07, 10.14it/s]

{'loss': Array(0.23130004, dtype=float32), 'loss_cross_entropy': Array(0.21059941, dtype=float32)}


  2%|▏         | 20759/1000000 [57:24<39:47:21,  6.84it/s]

{'loss': Array(0.23650467, dtype=float32), 'loss_cross_entropy': Array(0.21585834, dtype=float32)}


  2%|▏         | 20769/1000000 [57:26<27:14:35,  9.98it/s]

{'loss': Array(0.21515842, dtype=float32), 'loss_cross_entropy': Array(0.19454817, dtype=float32)}


  2%|▏         | 20779/1000000 [57:27<25:14:31, 10.78it/s]

{'loss': Array(0.2518904, dtype=float32), 'loss_cross_entropy': Array(0.22870092, dtype=float32)}


  2%|▏         | 20789/1000000 [57:29<32:56:19,  8.26it/s]

{'loss': Array(0.23179935, dtype=float32), 'loss_cross_entropy': Array(0.21105218, dtype=float32)}


  2%|▏         | 20799/1000000 [57:30<26:37:13, 10.22it/s]

{'loss': Array(0.25711545, dtype=float32), 'loss_cross_entropy': Array(0.23431145, dtype=float32)}


  2%|▏         | 20809/1000000 [57:32<39:41:20,  6.85it/s]

{'loss': Array(0.24927478, dtype=float32), 'loss_cross_entropy': Array(0.22677918, dtype=float32)}


  2%|▏         | 20819/1000000 [57:33<27:54:21,  9.75it/s]

{'loss': Array(0.24006239, dtype=float32), 'loss_cross_entropy': Array(0.21951728, dtype=float32)}


  2%|▏         | 20829/1000000 [57:35<25:20:46, 10.73it/s]

{'loss': Array(0.25034848, dtype=float32), 'loss_cross_entropy': Array(0.2280304, dtype=float32)}


  2%|▏         | 20839/1000000 [57:36<29:53:55,  9.10it/s]

{'loss': Array(0.21597162, dtype=float32), 'loss_cross_entropy': Array(0.19627939, dtype=float32)}


  2%|▏         | 20849/1000000 [57:38<27:28:58,  9.90it/s]

{'loss': Array(0.24989168, dtype=float32), 'loss_cross_entropy': Array(0.22813928, dtype=float32)}


  2%|▏         | 20859/1000000 [57:39<36:07:01,  7.53it/s]

{'loss': Array(0.24728946, dtype=float32), 'loss_cross_entropy': Array(0.22643414, dtype=float32)}


  2%|▏         | 20869/1000000 [57:41<27:08:05, 10.02it/s]

{'loss': Array(0.24469629, dtype=float32), 'loss_cross_entropy': Array(0.22267644, dtype=float32)}


  2%|▏         | 20879/1000000 [57:42<25:27:13, 10.69it/s]

{'loss': Array(0.25176033, dtype=float32), 'loss_cross_entropy': Array(0.22941549, dtype=float32)}


  2%|▏         | 20889/1000000 [57:44<32:44:23,  8.31it/s]

{'loss': Array(0.24037562, dtype=float32), 'loss_cross_entropy': Array(0.21874006, dtype=float32)}


  2%|▏         | 20899/1000000 [57:45<26:23:38, 10.30it/s]

{'loss': Array(0.23297809, dtype=float32), 'loss_cross_entropy': Array(0.21371703, dtype=float32)}


  2%|▏         | 20909/1000000 [57:47<24:54:14, 10.92it/s]

{'loss': Array(0.22917698, dtype=float32), 'loss_cross_entropy': Array(0.20801668, dtype=float32)}


  2%|▏         | 20919/1000000 [57:48<29:18:40,  9.28it/s]

{'loss': Array(0.23358718, dtype=float32), 'loss_cross_entropy': Array(0.21207826, dtype=float32)}


  2%|▏         | 20929/1000000 [57:50<25:36:13, 10.62it/s]

{'loss': Array(0.2425671, dtype=float32), 'loss_cross_entropy': Array(0.22110195, dtype=float32)}


  2%|▏         | 20939/1000000 [57:51<32:37:22,  8.34it/s]

{'loss': Array(0.2428145, dtype=float32), 'loss_cross_entropy': Array(0.22108312, dtype=float32)}


  2%|▏         | 20949/1000000 [57:53<27:30:12,  9.89it/s]

{'loss': Array(0.24448843, dtype=float32), 'loss_cross_entropy': Array(0.22306633, dtype=float32)}


  2%|▏         | 20959/1000000 [57:55<39:52:37,  6.82it/s]

{'loss': Array(0.24502552, dtype=float32), 'loss_cross_entropy': Array(0.22279249, dtype=float32)}


  2%|▏         | 20969/1000000 [57:56<27:37:36,  9.84it/s]

{'loss': Array(0.22760312, dtype=float32), 'loss_cross_entropy': Array(0.2074151, dtype=float32)}


  2%|▏         | 20979/1000000 [57:58<41:10:32,  6.60it/s]

{'loss': Array(0.22608466, dtype=float32), 'loss_cross_entropy': Array(0.20714651, dtype=float32)}


  2%|▏         | 20989/1000000 [57:59<28:11:01,  9.65it/s]

{'loss': Array(0.24169473, dtype=float32), 'loss_cross_entropy': Array(0.2200586, dtype=float32)}


  2%|▏         | 20999/1000000 [58:00<26:35:52, 10.22it/s]

{'loss': Array(0.25868765, dtype=float32), 'loss_cross_entropy': Array(0.23718338, dtype=float32)}


  2%|▏         | 21009/1000000 [58:09<98:50:06,  2.75it/s] 

{'loss': Array(0.2336274, dtype=float32), 'loss_cross_entropy': Array(0.21379863, dtype=float32)}


  2%|▏         | 21019/1000000 [58:10<37:23:58,  7.27it/s] 

{'loss': Array(0.24254565, dtype=float32), 'loss_cross_entropy': Array(0.22182813, dtype=float32)}


  2%|▏         | 21029/1000000 [58:12<41:49:53,  6.50it/s]

{'loss': Array(0.2471354, dtype=float32), 'loss_cross_entropy': Array(0.22543533, dtype=float32)}


  2%|▏         | 21039/1000000 [58:13<28:34:40,  9.52it/s]

{'loss': Array(0.2501318, dtype=float32), 'loss_cross_entropy': Array(0.22894096, dtype=float32)}


  2%|▏         | 21049/1000000 [58:14<25:39:27, 10.60it/s]

{'loss': Array(0.23049791, dtype=float32), 'loss_cross_entropy': Array(0.2107877, dtype=float32)}


  2%|▏         | 21059/1000000 [58:16<29:07:26,  9.34it/s]

{'loss': Array(0.24002354, dtype=float32), 'loss_cross_entropy': Array(0.21912657, dtype=float32)}


  2%|▏         | 21069/1000000 [58:18<28:43:13,  9.47it/s]

{'loss': Array(0.24610043, dtype=float32), 'loss_cross_entropy': Array(0.22551115, dtype=float32)}


  2%|▏         | 21079/1000000 [58:19<28:39:20,  9.49it/s]

{'loss': Array(0.2306963, dtype=float32), 'loss_cross_entropy': Array(0.21078634, dtype=float32)}


  2%|▏         | 21089/1000000 [58:21<25:17:29, 10.75it/s]

{'loss': Array(0.2354164, dtype=float32), 'loss_cross_entropy': Array(0.2152705, dtype=float32)}


  2%|▏         | 21099/1000000 [58:22<39:10:22,  6.94it/s]

{'loss': Array(0.22755514, dtype=float32), 'loss_cross_entropy': Array(0.20748141, dtype=float32)}


  2%|▏         | 21109/1000000 [58:24<28:28:04,  9.55it/s]

{'loss': Array(0.233918, dtype=float32), 'loss_cross_entropy': Array(0.21250127, dtype=float32)}


  2%|▏         | 21119/1000000 [58:25<25:12:50, 10.78it/s]

{'loss': Array(0.24338603, dtype=float32), 'loss_cross_entropy': Array(0.22358143, dtype=float32)}


  2%|▏         | 21129/1000000 [58:27<32:09:31,  8.46it/s]

{'loss': Array(0.26802206, dtype=float32), 'loss_cross_entropy': Array(0.24504729, dtype=float32)}


  2%|▏         | 21139/1000000 [58:28<26:45:28, 10.16it/s]

{'loss': Array(0.25059766, dtype=float32), 'loss_cross_entropy': Array(0.22964282, dtype=float32)}


  2%|▏         | 21149/1000000 [58:30<40:18:10,  6.75it/s]

{'loss': Array(0.23982434, dtype=float32), 'loss_cross_entropy': Array(0.21743181, dtype=float32)}


  2%|▏         | 21159/1000000 [58:31<27:26:30,  9.91it/s]

{'loss': Array(0.22066407, dtype=float32), 'loss_cross_entropy': Array(0.2014128, dtype=float32)}


  2%|▏         | 21169/1000000 [58:33<27:51:14,  9.76it/s]

{'loss': Array(0.24817672, dtype=float32), 'loss_cross_entropy': Array(0.22703274, dtype=float32)}


  2%|▏         | 21179/1000000 [58:34<31:03:07,  8.76it/s]

{'loss': Array(0.26414025, dtype=float32), 'loss_cross_entropy': Array(0.24239755, dtype=float32)}


  2%|▏         | 21189/1000000 [58:36<25:52:44, 10.51it/s]

{'loss': Array(0.23679724, dtype=float32), 'loss_cross_entropy': Array(0.21452959, dtype=float32)}


  2%|▏         | 21199/1000000 [58:37<35:59:36,  7.55it/s]

{'loss': Array(0.23610091, dtype=float32), 'loss_cross_entropy': Array(0.21588488, dtype=float32)}


  2%|▏         | 21209/1000000 [58:39<27:25:55,  9.91it/s]

{'loss': Array(0.23174529, dtype=float32), 'loss_cross_entropy': Array(0.21244709, dtype=float32)}


  2%|▏         | 21219/1000000 [58:40<25:48:34, 10.53it/s]

{'loss': Array(0.21789134, dtype=float32), 'loss_cross_entropy': Array(0.19880691, dtype=float32)}


  2%|▏         | 21229/1000000 [58:42<32:48:21,  8.29it/s]

{'loss': Array(0.24011576, dtype=float32), 'loss_cross_entropy': Array(0.21858802, dtype=float32)}


  2%|▏         | 21239/1000000 [58:43<27:01:36, 10.06it/s]

{'loss': Array(0.24182998, dtype=float32), 'loss_cross_entropy': Array(0.22066474, dtype=float32)}


  2%|▏         | 21249/1000000 [58:45<25:08:05, 10.82it/s]

{'loss': Array(0.24319528, dtype=float32), 'loss_cross_entropy': Array(0.22296648, dtype=float32)}


  2%|▏         | 21259/1000000 [58:46<28:46:38,  9.45it/s]

{'loss': Array(0.24216342, dtype=float32), 'loss_cross_entropy': Array(0.22226532, dtype=float32)}


  2%|▏         | 21269/1000000 [58:48<26:55:20, 10.10it/s]

{'loss': Array(0.22256242, dtype=float32), 'loss_cross_entropy': Array(0.20426194, dtype=float32)}


  2%|▏         | 21279/1000000 [58:50<32:04:08,  8.48it/s]

{'loss': Array(0.2551678, dtype=float32), 'loss_cross_entropy': Array(0.23368993, dtype=float32)}


  2%|▏         | 21289/1000000 [58:51<26:17:51, 10.34it/s]

{'loss': Array(0.2805578, dtype=float32), 'loss_cross_entropy': Array(0.25687012, dtype=float32)}


  2%|▏         | 21299/1000000 [58:53<41:30:08,  6.55it/s]

{'loss': Array(0.243184, dtype=float32), 'loss_cross_entropy': Array(0.22161137, dtype=float32)}


  2%|▏         | 21309/1000000 [58:54<27:43:25,  9.81it/s]

{'loss': Array(0.24371324, dtype=float32), 'loss_cross_entropy': Array(0.22309993, dtype=float32)}


  2%|▏         | 21319/1000000 [58:56<40:19:33,  6.74it/s]

{'loss': Array(0.23040166, dtype=float32), 'loss_cross_entropy': Array(0.21023487, dtype=float32)}


  2%|▏         | 21329/1000000 [58:57<27:33:11,  9.87it/s]

{'loss': Array(0.24891976, dtype=float32), 'loss_cross_entropy': Array(0.22875272, dtype=float32)}


  2%|▏         | 21339/1000000 [58:58<26:30:16, 10.26it/s]

{'loss': Array(0.24130352, dtype=float32), 'loss_cross_entropy': Array(0.22144954, dtype=float32)}


  2%|▏         | 21349/1000000 [59:00<35:59:19,  7.55it/s]

{'loss': Array(0.24064282, dtype=float32), 'loss_cross_entropy': Array(0.22096197, dtype=float32)}


  2%|▏         | 21359/1000000 [59:02<26:47:54, 10.14it/s]

{'loss': Array(0.24760298, dtype=float32), 'loss_cross_entropy': Array(0.22716038, dtype=float32)}


  2%|▏         | 21369/1000000 [59:03<40:23:18,  6.73it/s]

{'loss': Array(0.21283531, dtype=float32), 'loss_cross_entropy': Array(0.19354333, dtype=float32)}


  2%|▏         | 21379/1000000 [59:05<28:07:45,  9.66it/s]

{'loss': Array(0.23858595, dtype=float32), 'loss_cross_entropy': Array(0.21668355, dtype=float32)}


  2%|▏         | 21389/1000000 [59:06<25:37:39, 10.61it/s]

{'loss': Array(0.22030078, dtype=float32), 'loss_cross_entropy': Array(0.19901986, dtype=float32)}


  2%|▏         | 21399/1000000 [59:08<29:59:28,  9.06it/s]

{'loss': Array(0.23698533, dtype=float32), 'loss_cross_entropy': Array(0.21542601, dtype=float32)}


  2%|▏         | 21409/1000000 [59:09<26:17:55, 10.34it/s]

{'loss': Array(0.23598115, dtype=float32), 'loss_cross_entropy': Array(0.21514343, dtype=float32)}


  2%|▏         | 21419/1000000 [59:11<28:53:42,  9.41it/s]

{'loss': Array(0.22366984, dtype=float32), 'loss_cross_entropy': Array(0.20234254, dtype=float32)}


  2%|▏         | 21429/1000000 [59:12<26:30:25, 10.25it/s]

{'loss': Array(0.23351322, dtype=float32), 'loss_cross_entropy': Array(0.21314101, dtype=float32)}


  2%|▏         | 21439/1000000 [59:14<40:56:52,  6.64it/s]

{'loss': Array(0.23030238, dtype=float32), 'loss_cross_entropy': Array(0.21047728, dtype=float32)}


  2%|▏         | 21449/1000000 [59:15<27:37:56,  9.84it/s]

{'loss': Array(0.23076792, dtype=float32), 'loss_cross_entropy': Array(0.21035133, dtype=float32)}


  2%|▏         | 21459/1000000 [59:17<25:15:18, 10.76it/s]

{'loss': Array(0.23707603, dtype=float32), 'loss_cross_entropy': Array(0.21645747, dtype=float32)}


  2%|▏         | 21469/1000000 [59:19<33:00:43,  8.23it/s]

{'loss': Array(0.25659478, dtype=float32), 'loss_cross_entropy': Array(0.23473358, dtype=float32)}


  2%|▏         | 21479/1000000 [59:20<26:26:09, 10.28it/s]

{'loss': Array(0.23311286, dtype=float32), 'loss_cross_entropy': Array(0.21262705, dtype=float32)}


  2%|▏         | 21489/1000000 [59:22<40:03:47,  6.78it/s]

{'loss': Array(0.24318229, dtype=float32), 'loss_cross_entropy': Array(0.22057858, dtype=float32)}


  2%|▏         | 21499/1000000 [59:23<28:14:12,  9.63it/s]

{'loss': Array(0.22506212, dtype=float32), 'loss_cross_entropy': Array(0.20416573, dtype=float32)}


  2%|▏         | 21509/1000000 [59:31<88:54:30,  3.06it/s] 

{'loss': Array(0.2419982, dtype=float32), 'loss_cross_entropy': Array(0.22049628, dtype=float32)}


  2%|▏         | 21519/1000000 [59:33<43:19:32,  6.27it/s]

{'loss': Array(0.24323273, dtype=float32), 'loss_cross_entropy': Array(0.22291589, dtype=float32)}


  2%|▏         | 21529/1000000 [59:34<28:28:31,  9.54it/s]

{'loss': Array(0.21633112, dtype=float32), 'loss_cross_entropy': Array(0.19727534, dtype=float32)}


  2%|▏         | 21539/1000000 [59:36<32:54:29,  8.26it/s]

{'loss': Array(0.2127655, dtype=float32), 'loss_cross_entropy': Array(0.19394355, dtype=float32)}


  2%|▏         | 21549/1000000 [59:37<26:32:41, 10.24it/s]

{'loss': Array(0.22908409, dtype=float32), 'loss_cross_entropy': Array(0.20744029, dtype=float32)}


  2%|▏         | 21559/1000000 [59:39<40:21:00,  6.74it/s]

{'loss': Array(0.24383953, dtype=float32), 'loss_cross_entropy': Array(0.22307804, dtype=float32)}


  2%|▏         | 21569/1000000 [59:40<28:02:01,  9.69it/s]

{'loss': Array(0.23187149, dtype=float32), 'loss_cross_entropy': Array(0.21132167, dtype=float32)}


  2%|▏         | 21579/1000000 [59:42<26:17:10, 10.34it/s]

{'loss': Array(0.2029954, dtype=float32), 'loss_cross_entropy': Array(0.18491845, dtype=float32)}


  2%|▏         | 21589/1000000 [59:43<29:43:58,  9.14it/s]

{'loss': Array(0.25636408, dtype=float32), 'loss_cross_entropy': Array(0.23363952, dtype=float32)}


  2%|▏         | 21599/1000000 [59:45<25:57:05, 10.47it/s]

{'loss': Array(0.23029406, dtype=float32), 'loss_cross_entropy': Array(0.21159206, dtype=float32)}


  2%|▏         | 21609/1000000 [59:46<39:47:12,  6.83it/s]

{'loss': Array(0.2490392, dtype=float32), 'loss_cross_entropy': Array(0.22671457, dtype=float32)}


  2%|▏         | 21619/1000000 [59:48<26:58:29, 10.08it/s]

{'loss': Array(0.22703674, dtype=float32), 'loss_cross_entropy': Array(0.20604339, dtype=float32)}


  2%|▏         | 21629/1000000 [59:49<25:03:22, 10.85it/s]

{'loss': Array(0.225316, dtype=float32), 'loss_cross_entropy': Array(0.20473325, dtype=float32)}


  2%|▏         | 21639/1000000 [59:51<29:53:07,  9.09it/s]

{'loss': Array(0.23509751, dtype=float32), 'loss_cross_entropy': Array(0.2149797, dtype=float32)}


  2%|▏         | 21649/1000000 [59:52<25:36:23, 10.61it/s]

{'loss': Array(0.21566176, dtype=float32), 'loss_cross_entropy': Array(0.1970694, dtype=float32)}


  2%|▏         | 21659/1000000 [59:54<35:22:11,  7.68it/s]

{'loss': Array(0.24845628, dtype=float32), 'loss_cross_entropy': Array(0.22770111, dtype=float32)}


  2%|▏         | 21669/1000000 [59:55<26:15:06, 10.35it/s]

{'loss': Array(0.2464838, dtype=float32), 'loss_cross_entropy': Array(0.2255512, dtype=float32)}


  2%|▏         | 21679/1000000 [59:57<41:07:53,  6.61it/s]

{'loss': Array(0.22872595, dtype=float32), 'loss_cross_entropy': Array(0.20834732, dtype=float32)}


  2%|▏         | 21689/1000000 [59:58<28:11:39,  9.64it/s]

{'loss': Array(0.24558549, dtype=float32), 'loss_cross_entropy': Array(0.2239009, dtype=float32)}


  2%|▏         | 21699/1000000 [1:00:00<25:59:52, 10.45it/s]

{'loss': Array(0.23709989, dtype=float32), 'loss_cross_entropy': Array(0.21575105, dtype=float32)}


  2%|▏         | 21709/1000000 [1:00:01<32:46:35,  8.29it/s]

{'loss': Array(0.23119752, dtype=float32), 'loss_cross_entropy': Array(0.21127582, dtype=float32)}


  2%|▏         | 21719/1000000 [1:00:03<26:31:41, 10.24it/s]

{'loss': Array(0.24164437, dtype=float32), 'loss_cross_entropy': Array(0.22009146, dtype=float32)}


  2%|▏         | 21729/1000000 [1:00:04<36:18:11,  7.49it/s]

{'loss': Array(0.25191805, dtype=float32), 'loss_cross_entropy': Array(0.22984758, dtype=float32)}


  2%|▏         | 21739/1000000 [1:00:06<27:22:51,  9.92it/s]

{'loss': Array(0.23611747, dtype=float32), 'loss_cross_entropy': Array(0.21552458, dtype=float32)}


  2%|▏         | 21749/1000000 [1:00:07<25:52:35, 10.50it/s]

{'loss': Array(0.22537783, dtype=float32), 'loss_cross_entropy': Array(0.20626383, dtype=float32)}


  2%|▏         | 21759/1000000 [1:00:09<28:56:29,  9.39it/s]

{'loss': Array(0.23129725, dtype=float32), 'loss_cross_entropy': Array(0.21138881, dtype=float32)}


  2%|▏         | 21769/1000000 [1:00:10<26:30:04, 10.25it/s]

{'loss': Array(0.23111515, dtype=float32), 'loss_cross_entropy': Array(0.21077394, dtype=float32)}


  2%|▏         | 21779/1000000 [1:00:12<40:24:51,  6.72it/s]

{'loss': Array(0.25808203, dtype=float32), 'loss_cross_entropy': Array(0.2361192, dtype=float32)}


  2%|▏         | 21789/1000000 [1:00:14<28:21:19,  9.58it/s]

{'loss': Array(0.2399375, dtype=float32), 'loss_cross_entropy': Array(0.2186749, dtype=float32)}


  2%|▏         | 21799/1000000 [1:00:15<25:33:12, 10.63it/s]

{'loss': Array(0.22149716, dtype=float32), 'loss_cross_entropy': Array(0.20164359, dtype=float32)}


  2%|▏         | 21809/1000000 [1:00:17<33:36:11,  8.09it/s]

{'loss': Array(0.23756896, dtype=float32), 'loss_cross_entropy': Array(0.21625867, dtype=float32)}


  2%|▏         | 21819/1000000 [1:00:18<27:00:42, 10.06it/s]

{'loss': Array(0.21761195, dtype=float32), 'loss_cross_entropy': Array(0.1984241, dtype=float32)}


  2%|▏         | 21829/1000000 [1:00:20<35:17:51,  7.70it/s]

{'loss': Array(0.22957878, dtype=float32), 'loss_cross_entropy': Array(0.21005487, dtype=float32)}


  2%|▏         | 21839/1000000 [1:00:21<26:26:38, 10.28it/s]

{'loss': Array(0.25448987, dtype=float32), 'loss_cross_entropy': Array(0.2311785, dtype=float32)}


  2%|▏         | 21849/1000000 [1:00:23<35:19:24,  7.69it/s]

{'loss': Array(0.25032958, dtype=float32), 'loss_cross_entropy': Array(0.22920354, dtype=float32)}


  2%|▏         | 21859/1000000 [1:00:24<26:34:15, 10.23it/s]

{'loss': Array(0.23383747, dtype=float32), 'loss_cross_entropy': Array(0.2142268, dtype=float32)}


  2%|▏         | 21869/1000000 [1:00:25<25:21:00, 10.72it/s]

{'loss': Array(0.25354543, dtype=float32), 'loss_cross_entropy': Array(0.2319154, dtype=float32)}


  2%|▏         | 21879/1000000 [1:00:27<29:32:30,  9.20it/s]

{'loss': Array(0.22481607, dtype=float32), 'loss_cross_entropy': Array(0.20431535, dtype=float32)}


  2%|▏         | 21889/1000000 [1:00:29<26:12:52, 10.36it/s]

{'loss': Array(0.23332535, dtype=float32), 'loss_cross_entropy': Array(0.21310866, dtype=float32)}


  2%|▏         | 21899/1000000 [1:00:30<32:34:51,  8.34it/s]

{'loss': Array(0.21554188, dtype=float32), 'loss_cross_entropy': Array(0.19677296, dtype=float32)}


  2%|▏         | 21909/1000000 [1:00:32<26:33:10, 10.23it/s]

{'loss': Array(0.24131298, dtype=float32), 'loss_cross_entropy': Array(0.22020116, dtype=float32)}


  2%|▏         | 21919/1000000 [1:00:33<40:59:48,  6.63it/s]

{'loss': Array(0.21922646, dtype=float32), 'loss_cross_entropy': Array(0.19917698, dtype=float32)}


  2%|▏         | 21929/1000000 [1:00:35<27:17:39,  9.95it/s]

{'loss': Array(0.2334023, dtype=float32), 'loss_cross_entropy': Array(0.21393903, dtype=float32)}


  2%|▏         | 21939/1000000 [1:00:36<40:02:36,  6.78it/s]

{'loss': Array(0.2394239, dtype=float32), 'loss_cross_entropy': Array(0.21920227, dtype=float32)}


  2%|▏         | 21949/1000000 [1:00:38<28:07:05,  9.66it/s]

{'loss': Array(0.23964761, dtype=float32), 'loss_cross_entropy': Array(0.21873549, dtype=float32)}


  2%|▏         | 21959/1000000 [1:00:39<25:44:04, 10.56it/s]

{'loss': Array(0.24360016, dtype=float32), 'loss_cross_entropy': Array(0.22255722, dtype=float32)}


  2%|▏         | 21969/1000000 [1:00:41<34:47:59,  7.81it/s]

{'loss': Array(0.23628394, dtype=float32), 'loss_cross_entropy': Array(0.21459436, dtype=float32)}


  2%|▏         | 21979/1000000 [1:00:42<26:34:08, 10.23it/s]

{'loss': Array(0.22639851, dtype=float32), 'loss_cross_entropy': Array(0.20680617, dtype=float32)}


  2%|▏         | 21989/1000000 [1:00:44<40:22:43,  6.73it/s]

{'loss': Array(0.24827228, dtype=float32), 'loss_cross_entropy': Array(0.22560766, dtype=float32)}


  2%|▏         | 21999/1000000 [1:00:45<27:45:40,  9.79it/s]

{'loss': Array(0.2310154, dtype=float32), 'loss_cross_entropy': Array(0.21028304, dtype=float32)}


  2%|▏         | 22009/1000000 [1:00:53<91:39:28,  2.96it/s] 

{'loss': Array(0.22508617, dtype=float32), 'loss_cross_entropy': Array(0.20498124, dtype=float32)}


  2%|▏         | 22019/1000000 [1:00:55<39:32:40,  6.87it/s] 

{'loss': Array(0.24150944, dtype=float32), 'loss_cross_entropy': Array(0.21953426, dtype=float32)}


  2%|▏         | 22029/1000000 [1:00:56<27:28:38,  9.89it/s]

{'loss': Array(0.2653534, dtype=float32), 'loss_cross_entropy': Array(0.24332967, dtype=float32)}


  2%|▏         | 22039/1000000 [1:00:58<28:52:51,  9.41it/s]

{'loss': Array(0.23407112, dtype=float32), 'loss_cross_entropy': Array(0.2131995, dtype=float32)}


  2%|▏         | 22049/1000000 [1:00:59<26:04:48, 10.42it/s]

{'loss': Array(0.24358444, dtype=float32), 'loss_cross_entropy': Array(0.22264552, dtype=float32)}


  2%|▏         | 22059/1000000 [1:01:01<41:02:26,  6.62it/s]

{'loss': Array(0.22167829, dtype=float32), 'loss_cross_entropy': Array(0.20310603, dtype=float32)}


  2%|▏         | 22069/1000000 [1:01:03<27:44:12,  9.79it/s]

{'loss': Array(0.23049179, dtype=float32), 'loss_cross_entropy': Array(0.21097134, dtype=float32)}


  2%|▏         | 22079/1000000 [1:01:04<25:38:59, 10.59it/s]

{'loss': Array(0.23958759, dtype=float32), 'loss_cross_entropy': Array(0.21921729, dtype=float32)}


  2%|▏         | 22089/1000000 [1:01:06<33:14:21,  8.17it/s]

{'loss': Array(0.2437676, dtype=float32), 'loss_cross_entropy': Array(0.22355144, dtype=float32)}


  2%|▏         | 22099/1000000 [1:01:07<26:29:55, 10.25it/s]

{'loss': Array(0.22370163, dtype=float32), 'loss_cross_entropy': Array(0.20392516, dtype=float32)}


  2%|▏         | 22109/1000000 [1:01:09<39:57:56,  6.80it/s]

{'loss': Array(0.2212513, dtype=float32), 'loss_cross_entropy': Array(0.20215325, dtype=float32)}


  2%|▏         | 22119/1000000 [1:01:10<27:56:47,  9.72it/s]

{'loss': Array(0.23029569, dtype=float32), 'loss_cross_entropy': Array(0.21002875, dtype=float32)}


  2%|▏         | 22129/1000000 [1:01:12<25:51:47, 10.50it/s]

{'loss': Array(0.2340673, dtype=float32), 'loss_cross_entropy': Array(0.21418269, dtype=float32)}


  2%|▏         | 22139/1000000 [1:01:13<29:53:39,  9.09it/s]

{'loss': Array(0.24497595, dtype=float32), 'loss_cross_entropy': Array(0.22439949, dtype=float32)}


  2%|▏         | 22149/1000000 [1:01:15<26:03:54, 10.42it/s]

{'loss': Array(0.20184593, dtype=float32), 'loss_cross_entropy': Array(0.18347953, dtype=float32)}


  2%|▏         | 22159/1000000 [1:01:16<35:48:55,  7.58it/s]

{'loss': Array(0.23862092, dtype=float32), 'loss_cross_entropy': Array(0.2188165, dtype=float32)}


  2%|▏         | 22169/1000000 [1:01:18<26:50:52, 10.12it/s]

{'loss': Array(0.22902091, dtype=float32), 'loss_cross_entropy': Array(0.20920971, dtype=float32)}


  2%|▏         | 22179/1000000 [1:01:19<25:22:17, 10.71it/s]

{'loss': Array(0.24206531, dtype=float32), 'loss_cross_entropy': Array(0.22136806, dtype=float32)}


  2%|▏         | 22189/1000000 [1:01:21<32:30:37,  8.35it/s]

{'loss': Array(0.24399824, dtype=float32), 'loss_cross_entropy': Array(0.2226363, dtype=float32)}


  2%|▏         | 22199/1000000 [1:01:22<26:28:49, 10.26it/s]

{'loss': Array(0.2235447, dtype=float32), 'loss_cross_entropy': Array(0.20393573, dtype=float32)}


  2%|▏         | 22209/1000000 [1:01:23<25:18:45, 10.73it/s]

{'loss': Array(0.23994683, dtype=float32), 'loss_cross_entropy': Array(0.21998845, dtype=float32)}


  2%|▏         | 22219/1000000 [1:01:25<28:51:11,  9.41it/s]

{'loss': Array(0.23962712, dtype=float32), 'loss_cross_entropy': Array(0.21836208, dtype=float32)}


  2%|▏         | 22229/1000000 [1:01:27<26:06:15, 10.40it/s]

{'loss': Array(0.22881801, dtype=float32), 'loss_cross_entropy': Array(0.20930481, dtype=float32)}


  2%|▏         | 22239/1000000 [1:01:28<32:20:21,  8.40it/s]

{'loss': Array(0.23501138, dtype=float32), 'loss_cross_entropy': Array(0.21543045, dtype=float32)}


  2%|▏         | 22249/1000000 [1:01:30<26:17:07, 10.33it/s]

{'loss': Array(0.22122405, dtype=float32), 'loss_cross_entropy': Array(0.20228826, dtype=float32)}


  2%|▏         | 22259/1000000 [1:01:31<39:32:27,  6.87it/s]

{'loss': Array(0.21910994, dtype=float32), 'loss_cross_entropy': Array(0.20029011, dtype=float32)}


  2%|▏         | 22269/1000000 [1:01:33<27:25:01,  9.91it/s]

{'loss': Array(0.2343231, dtype=float32), 'loss_cross_entropy': Array(0.21422704, dtype=float32)}


  2%|▏         | 22279/1000000 [1:01:34<40:01:43,  6.78it/s]

{'loss': Array(0.23546922, dtype=float32), 'loss_cross_entropy': Array(0.21517816, dtype=float32)}


  2%|▏         | 22289/1000000 [1:01:36<27:40:41,  9.81it/s]

{'loss': Array(0.25320658, dtype=float32), 'loss_cross_entropy': Array(0.2319008, dtype=float32)}


  2%|▏         | 22299/1000000 [1:01:37<25:22:05, 10.71it/s]

{'loss': Array(0.22223711, dtype=float32), 'loss_cross_entropy': Array(0.20218034, dtype=float32)}


  2%|▏         | 22309/1000000 [1:01:39<35:48:47,  7.58it/s]

{'loss': Array(0.2618864, dtype=float32), 'loss_cross_entropy': Array(0.24094479, dtype=float32)}


  2%|▏         | 22319/1000000 [1:01:40<27:25:52,  9.90it/s]

{'loss': Array(0.20835362, dtype=float32), 'loss_cross_entropy': Array(0.18934743, dtype=float32)}


  2%|▏         | 22329/1000000 [1:01:42<41:12:42,  6.59it/s]

{'loss': Array(0.24782443, dtype=float32), 'loss_cross_entropy': Array(0.22654699, dtype=float32)}


  2%|▏         | 22339/1000000 [1:01:43<27:41:27,  9.81it/s]

{'loss': Array(0.22263768, dtype=float32), 'loss_cross_entropy': Array(0.20372961, dtype=float32)}


  2%|▏         | 22349/1000000 [1:01:45<25:31:50, 10.64it/s]

{'loss': Array(0.2405623, dtype=float32), 'loss_cross_entropy': Array(0.22019528, dtype=float32)}


  2%|▏         | 22359/1000000 [1:01:46<29:17:46,  9.27it/s]

{'loss': Array(0.24347024, dtype=float32), 'loss_cross_entropy': Array(0.22269018, dtype=float32)}


  2%|▏         | 22369/1000000 [1:01:48<25:46:05, 10.54it/s]

{'loss': Array(0.23723876, dtype=float32), 'loss_cross_entropy': Array(0.21726616, dtype=float32)}


  2%|▏         | 22379/1000000 [1:01:49<28:21:02,  9.58it/s]

{'loss': Array(0.23983033, dtype=float32), 'loss_cross_entropy': Array(0.21906471, dtype=float32)}


  2%|▏         | 22389/1000000 [1:01:51<25:21:32, 10.71it/s]

{'loss': Array(0.2337069, dtype=float32), 'loss_cross_entropy': Array(0.21365023, dtype=float32)}


  2%|▏         | 22399/1000000 [1:01:52<39:17:26,  6.91it/s]

{'loss': Array(0.26318732, dtype=float32), 'loss_cross_entropy': Array(0.24202414, dtype=float32)}


  2%|▏         | 22409/1000000 [1:01:54<28:21:40,  9.57it/s]

{'loss': Array(0.224545, dtype=float32), 'loss_cross_entropy': Array(0.20393093, dtype=float32)}


  2%|▏         | 22419/1000000 [1:01:55<25:53:45, 10.49it/s]

{'loss': Array(0.22669803, dtype=float32), 'loss_cross_entropy': Array(0.20583384, dtype=float32)}


  2%|▏         | 22429/1000000 [1:01:57<32:22:51,  8.39it/s]

{'loss': Array(0.23240136, dtype=float32), 'loss_cross_entropy': Array(0.21184035, dtype=float32)}


  2%|▏         | 22439/1000000 [1:01:58<26:35:20, 10.21it/s]

{'loss': Array(0.21930139, dtype=float32), 'loss_cross_entropy': Array(0.19925621, dtype=float32)}


  2%|▏         | 22449/1000000 [1:02:00<41:04:04,  6.61it/s]

{'loss': Array(0.24243374, dtype=float32), 'loss_cross_entropy': Array(0.22120796, dtype=float32)}


  2%|▏         | 22459/1000000 [1:02:02<28:53:17,  9.40it/s]

{'loss': Array(0.26455423, dtype=float32), 'loss_cross_entropy': Array(0.2408588, dtype=float32)}


  2%|▏         | 22469/1000000 [1:02:03<25:46:51, 10.53it/s]

{'loss': Array(0.23602426, dtype=float32), 'loss_cross_entropy': Array(0.21517678, dtype=float32)}


  2%|▏         | 22479/1000000 [1:02:05<29:56:56,  9.07it/s]

{'loss': Array(0.2521991, dtype=float32), 'loss_cross_entropy': Array(0.23122768, dtype=float32)}


  2%|▏         | 22489/1000000 [1:02:06<25:44:18, 10.55it/s]

{'loss': Array(0.2173476, dtype=float32), 'loss_cross_entropy': Array(0.19839649, dtype=float32)}


  2%|▏         | 22499/1000000 [1:02:08<34:59:24,  7.76it/s]

{'loss': Array(0.23643844, dtype=float32), 'loss_cross_entropy': Array(0.21620429, dtype=float32)}


  2%|▏         | 22509/1000000 [1:02:15<88:15:59,  3.08it/s] 

{'loss': Array(0.21312104, dtype=float32), 'loss_cross_entropy': Array(0.19397883, dtype=float32)}


  2%|▏         | 22519/1000000 [1:02:17<48:15:58,  5.63it/s]

{'loss': Array(0.21358359, dtype=float32), 'loss_cross_entropy': Array(0.1946991, dtype=float32)}


  2%|▏         | 22529/1000000 [1:02:18<29:29:28,  9.21it/s]

{'loss': Array(0.24172433, dtype=float32), 'loss_cross_entropy': Array(0.22071628, dtype=float32)}


  2%|▏         | 22539/1000000 [1:02:20<25:41:50, 10.57it/s]

{'loss': Array(0.23916471, dtype=float32), 'loss_cross_entropy': Array(0.2180645, dtype=float32)}


  2%|▏         | 22549/1000000 [1:02:22<33:01:06,  8.22it/s]

{'loss': Array(0.23659244, dtype=float32), 'loss_cross_entropy': Array(0.21639422, dtype=float32)}


  2%|▏         | 22559/1000000 [1:02:23<27:23:05,  9.91it/s]

{'loss': Array(0.23044586, dtype=float32), 'loss_cross_entropy': Array(0.21066847, dtype=float32)}


  2%|▏         | 22569/1000000 [1:02:24<25:33:18, 10.62it/s]

{'loss': Array(0.23162095, dtype=float32), 'loss_cross_entropy': Array(0.21203557, dtype=float32)}


  2%|▏         | 22579/1000000 [1:02:26<28:36:17,  9.49it/s]

{'loss': Array(0.2389473, dtype=float32), 'loss_cross_entropy': Array(0.218395, dtype=float32)}


  2%|▏         | 22589/1000000 [1:02:27<26:00:55, 10.44it/s]

{'loss': Array(0.24927707, dtype=float32), 'loss_cross_entropy': Array(0.22876146, dtype=float32)}


  2%|▏         | 22599/1000000 [1:02:29<33:08:22,  8.19it/s]

{'loss': Array(0.22904697, dtype=float32), 'loss_cross_entropy': Array(0.20924453, dtype=float32)}


  2%|▏         | 22609/1000000 [1:02:31<26:40:44, 10.18it/s]

{'loss': Array(0.24088375, dtype=float32), 'loss_cross_entropy': Array(0.21922271, dtype=float32)}


  2%|▏         | 22619/1000000 [1:02:32<35:50:02,  7.58it/s]

{'loss': Array(0.2318443, dtype=float32), 'loss_cross_entropy': Array(0.21271466, dtype=float32)}


  2%|▏         | 22629/1000000 [1:02:34<26:56:35, 10.08it/s]

{'loss': Array(0.2267965, dtype=float32), 'loss_cross_entropy': Array(0.20609358, dtype=float32)}


  2%|▏         | 22639/1000000 [1:02:35<25:19:57, 10.72it/s]

{'loss': Array(0.22630115, dtype=float32), 'loss_cross_entropy': Array(0.20729442, dtype=float32)}


  2%|▏         | 22649/1000000 [1:02:37<33:03:16,  8.21it/s]

{'loss': Array(0.25742492, dtype=float32), 'loss_cross_entropy': Array(0.23543167, dtype=float32)}


  2%|▏         | 22659/1000000 [1:02:38<27:13:08,  9.97it/s]

{'loss': Array(0.24054778, dtype=float32), 'loss_cross_entropy': Array(0.22034855, dtype=float32)}


  2%|▏         | 22669/1000000 [1:02:40<25:33:51, 10.62it/s]

{'loss': Array(0.21975946, dtype=float32), 'loss_cross_entropy': Array(0.200748, dtype=float32)}


  2%|▏         | 22679/1000000 [1:02:41<28:15:33,  9.61it/s]

{'loss': Array(0.22838691, dtype=float32), 'loss_cross_entropy': Array(0.20787434, dtype=float32)}


  2%|▏         | 22689/1000000 [1:02:43<25:17:01, 10.74it/s]

{'loss': Array(0.24857278, dtype=float32), 'loss_cross_entropy': Array(0.22773865, dtype=float32)}


  2%|▏         | 22699/1000000 [1:02:44<31:58:03,  8.49it/s]

{'loss': Array(0.24034762, dtype=float32), 'loss_cross_entropy': Array(0.22001086, dtype=float32)}


  2%|▏         | 22709/1000000 [1:02:46<25:57:28, 10.46it/s]

{'loss': Array(0.24669097, dtype=float32), 'loss_cross_entropy': Array(0.2256341, dtype=float32)}


  2%|▏         | 22719/1000000 [1:02:47<39:45:45,  6.83it/s]

{'loss': Array(0.2419622, dtype=float32), 'loss_cross_entropy': Array(0.22148244, dtype=float32)}


  2%|▏         | 22729/1000000 [1:02:49<28:02:59,  9.68it/s]

{'loss': Array(0.2424602, dtype=float32), 'loss_cross_entropy': Array(0.22288175, dtype=float32)}


  2%|▏         | 22739/1000000 [1:02:50<25:44:43, 10.54it/s]

{'loss': Array(0.23079741, dtype=float32), 'loss_cross_entropy': Array(0.21015589, dtype=float32)}


  2%|▏         | 22749/1000000 [1:02:52<35:56:51,  7.55it/s]

{'loss': Array(0.21574071, dtype=float32), 'loss_cross_entropy': Array(0.1977811, dtype=float32)}


  2%|▏         | 22759/1000000 [1:02:53<26:45:33, 10.14it/s]

{'loss': Array(0.21608613, dtype=float32), 'loss_cross_entropy': Array(0.1973164, dtype=float32)}


  2%|▏         | 22769/1000000 [1:02:54<25:57:50, 10.45it/s]

{'loss': Array(0.2271051, dtype=float32), 'loss_cross_entropy': Array(0.20784175, dtype=float32)}


  2%|▏         | 22779/1000000 [1:02:56<31:00:01,  8.76it/s]

{'loss': Array(0.22254522, dtype=float32), 'loss_cross_entropy': Array(0.203262, dtype=float32)}


  2%|▏         | 22789/1000000 [1:02:58<26:40:44, 10.17it/s]

{'loss': Array(0.24100947, dtype=float32), 'loss_cross_entropy': Array(0.221516, dtype=float32)}


  2%|▏         | 22799/1000000 [1:02:59<34:05:36,  7.96it/s]

{'loss': Array(0.22993194, dtype=float32), 'loss_cross_entropy': Array(0.21123528, dtype=float32)}


  2%|▏         | 22809/1000000 [1:03:01<27:30:03,  9.87it/s]

{'loss': Array(0.22117507, dtype=float32), 'loss_cross_entropy': Array(0.20213924, dtype=float32)}


  2%|▏         | 22819/1000000 [1:03:02<25:27:45, 10.66it/s]

{'loss': Array(0.22474742, dtype=float32), 'loss_cross_entropy': Array(0.20442335, dtype=float32)}


  2%|▏         | 22829/1000000 [1:03:04<28:32:38,  9.51it/s]

{'loss': Array(0.23371725, dtype=float32), 'loss_cross_entropy': Array(0.21357654, dtype=float32)}


  2%|▏         | 22839/1000000 [1:03:05<25:33:39, 10.62it/s]

{'loss': Array(0.22581397, dtype=float32), 'loss_cross_entropy': Array(0.20743608, dtype=float32)}


  2%|▏         | 22849/1000000 [1:03:07<35:16:18,  7.70it/s]

{'loss': Array(0.23534624, dtype=float32), 'loss_cross_entropy': Array(0.21562016, dtype=float32)}


  2%|▏         | 22859/1000000 [1:03:08<26:42:14, 10.16it/s]

{'loss': Array(0.20582397, dtype=float32), 'loss_cross_entropy': Array(0.1869792, dtype=float32)}


  2%|▏         | 22869/1000000 [1:03:10<25:33:32, 10.62it/s]

{'loss': Array(0.24260688, dtype=float32), 'loss_cross_entropy': Array(0.22200361, dtype=float32)}


  2%|▏         | 22879/1000000 [1:03:11<28:34:50,  9.50it/s]

{'loss': Array(0.23582974, dtype=float32), 'loss_cross_entropy': Array(0.21699817, dtype=float32)}


  2%|▏         | 22889/1000000 [1:03:13<25:47:26, 10.52it/s]

{'loss': Array(0.20329256, dtype=float32), 'loss_cross_entropy': Array(0.1850918, dtype=float32)}


  2%|▏         | 22899/1000000 [1:03:14<30:21:27,  8.94it/s]

{'loss': Array(0.23017426, dtype=float32), 'loss_cross_entropy': Array(0.21137455, dtype=float32)}


  2%|▏         | 22909/1000000 [1:03:16<26:07:20, 10.39it/s]

{'loss': Array(0.20396952, dtype=float32), 'loss_cross_entropy': Array(0.18606907, dtype=float32)}


  2%|▏         | 22919/1000000 [1:03:18<40:20:36,  6.73it/s]

{'loss': Array(0.21309029, dtype=float32), 'loss_cross_entropy': Array(0.19423555, dtype=float32)}


  2%|▏         | 22929/1000000 [1:03:19<28:00:36,  9.69it/s]

{'loss': Array(0.21437919, dtype=float32), 'loss_cross_entropy': Array(0.1949094, dtype=float32)}


  2%|▏         | 22939/1000000 [1:03:20<25:55:20, 10.47it/s]

{'loss': Array(0.2598778, dtype=float32), 'loss_cross_entropy': Array(0.23782007, dtype=float32)}


  2%|▏         | 22949/1000000 [1:03:22<31:57:57,  8.49it/s]

{'loss': Array(0.22800672, dtype=float32), 'loss_cross_entropy': Array(0.20878144, dtype=float32)}


  2%|▏         | 22959/1000000 [1:03:23<27:18:53,  9.94it/s]

{'loss': Array(0.22347605, dtype=float32), 'loss_cross_entropy': Array(0.20483124, dtype=float32)}


  2%|▏         | 22969/1000000 [1:03:25<35:00:29,  7.75it/s]

{'loss': Array(0.22823511, dtype=float32), 'loss_cross_entropy': Array(0.20805739, dtype=float32)}


  2%|▏         | 22979/1000000 [1:03:26<26:48:16, 10.12it/s]

{'loss': Array(0.2218993, dtype=float32), 'loss_cross_entropy': Array(0.20297377, dtype=float32)}


  2%|▏         | 22989/1000000 [1:03:28<35:49:01,  7.58it/s]

{'loss': Array(0.24864574, dtype=float32), 'loss_cross_entropy': Array(0.22795136, dtype=float32)}


  2%|▏         | 22999/1000000 [1:03:30<27:25:56,  9.89it/s]

{'loss': Array(0.23050475, dtype=float32), 'loss_cross_entropy': Array(0.21047549, dtype=float32)}


  2%|▏         | 23009/1000000 [1:03:37<87:04:25,  3.12it/s] 

{'loss': Array(0.21437536, dtype=float32), 'loss_cross_entropy': Array(0.19565494, dtype=float32)}


  2%|▏         | 23019/1000000 [1:03:39<40:16:11,  6.74it/s]

{'loss': Array(0.23205566, dtype=float32), 'loss_cross_entropy': Array(0.21364406, dtype=float32)}


  2%|▏         | 23029/1000000 [1:03:40<27:51:39,  9.74it/s]

{'loss': Array(0.23722993, dtype=float32), 'loss_cross_entropy': Array(0.21766026, dtype=float32)}


  2%|▏         | 23039/1000000 [1:03:42<35:55:58,  7.55it/s]

{'loss': Array(0.22760092, dtype=float32), 'loss_cross_entropy': Array(0.20873407, dtype=float32)}


  2%|▏         | 23049/1000000 [1:03:43<27:29:58,  9.87it/s]

{'loss': Array(0.23924959, dtype=float32), 'loss_cross_entropy': Array(0.21988516, dtype=float32)}


  2%|▏         | 23059/1000000 [1:03:45<39:36:01,  6.85it/s]

{'loss': Array(0.23308142, dtype=float32), 'loss_cross_entropy': Array(0.2133507, dtype=float32)}


  2%|▏         | 23069/1000000 [1:03:46<28:00:44,  9.69it/s]

{'loss': Array(0.21492593, dtype=float32), 'loss_cross_entropy': Array(0.19591318, dtype=float32)}


  2%|▏         | 23079/1000000 [1:03:48<25:40:19, 10.57it/s]

{'loss': Array(0.24129723, dtype=float32), 'loss_cross_entropy': Array(0.22022401, dtype=float32)}


  2%|▏         | 23089/1000000 [1:03:50<32:01:26,  8.47it/s]

{'loss': Array(0.22689973, dtype=float32), 'loss_cross_entropy': Array(0.20758966, dtype=float32)}


  2%|▏         | 23099/1000000 [1:03:51<26:01:05, 10.43it/s]

{'loss': Array(0.2123305, dtype=float32), 'loss_cross_entropy': Array(0.19262448, dtype=float32)}


  2%|▏         | 23109/1000000 [1:03:53<35:48:36,  7.58it/s]

{'loss': Array(0.21440728, dtype=float32), 'loss_cross_entropy': Array(0.19491042, dtype=float32)}


  2%|▏         | 23119/1000000 [1:03:54<26:49:50, 10.11it/s]

{'loss': Array(0.22555879, dtype=float32), 'loss_cross_entropy': Array(0.20546587, dtype=float32)}


  2%|▏         | 23129/1000000 [1:03:55<25:18:19, 10.72it/s]

{'loss': Array(0.2268373, dtype=float32), 'loss_cross_entropy': Array(0.20609252, dtype=float32)}


  2%|▏         | 23139/1000000 [1:03:57<28:09:21,  9.64it/s]

{'loss': Array(0.22253132, dtype=float32), 'loss_cross_entropy': Array(0.20417546, dtype=float32)}


  2%|▏         | 23149/1000000 [1:03:58<25:55:36, 10.47it/s]

{'loss': Array(0.23863058, dtype=float32), 'loss_cross_entropy': Array(0.21766602, dtype=float32)}


  2%|▏         | 23159/1000000 [1:04:00<40:31:26,  6.70it/s]

{'loss': Array(0.23802245, dtype=float32), 'loss_cross_entropy': Array(0.21856491, dtype=float32)}


  2%|▏         | 23169/1000000 [1:04:01<28:45:41,  9.43it/s]

{'loss': Array(0.20871083, dtype=float32), 'loss_cross_entropy': Array(0.1906455, dtype=float32)}


  2%|▏         | 23179/1000000 [1:04:03<25:27:43, 10.66it/s]

{'loss': Array(0.23644625, dtype=float32), 'loss_cross_entropy': Array(0.2166581, dtype=float32)}


  2%|▏         | 23189/1000000 [1:04:04<32:08:32,  8.44it/s]

{'loss': Array(0.23850691, dtype=float32), 'loss_cross_entropy': Array(0.21816023, dtype=float32)}


  2%|▏         | 23199/1000000 [1:04:06<26:01:48, 10.42it/s]

{'loss': Array(0.24058686, dtype=float32), 'loss_cross_entropy': Array(0.219994, dtype=float32)}


  2%|▏         | 23209/1000000 [1:04:08<35:38:52,  7.61it/s]

{'loss': Array(0.22950657, dtype=float32), 'loss_cross_entropy': Array(0.21042709, dtype=float32)}


  2%|▏         | 23219/1000000 [1:04:09<27:15:41,  9.95it/s]

{'loss': Array(0.21046863, dtype=float32), 'loss_cross_entropy': Array(0.1912689, dtype=float32)}


  2%|▏         | 23229/1000000 [1:04:11<36:00:53,  7.53it/s]

{'loss': Array(0.22300851, dtype=float32), 'loss_cross_entropy': Array(0.20322838, dtype=float32)}


  2%|▏         | 23239/1000000 [1:04:12<27:21:51,  9.92it/s]

{'loss': Array(0.21666558, dtype=float32), 'loss_cross_entropy': Array(0.19758026, dtype=float32)}


  2%|▏         | 23249/1000000 [1:04:13<26:01:34, 10.42it/s]

{'loss': Array(0.229407, dtype=float32), 'loss_cross_entropy': Array(0.20904365, dtype=float32)}


  2%|▏         | 23259/1000000 [1:04:15<30:25:23,  8.92it/s]

{'loss': Array(0.23002067, dtype=float32), 'loss_cross_entropy': Array(0.21033943, dtype=float32)}


  2%|▏         | 23269/1000000 [1:04:16<25:49:24, 10.51it/s]

{'loss': Array(0.23896419, dtype=float32), 'loss_cross_entropy': Array(0.21865952, dtype=float32)}


  2%|▏         | 23279/1000000 [1:04:18<32:28:39,  8.35it/s]

{'loss': Array(0.21429853, dtype=float32), 'loss_cross_entropy': Array(0.19684103, dtype=float32)}


  2%|▏         | 23289/1000000 [1:04:20<26:39:43, 10.18it/s]

{'loss': Array(0.21937235, dtype=float32), 'loss_cross_entropy': Array(0.1996512, dtype=float32)}


  2%|▏         | 23299/1000000 [1:04:21<40:26:41,  6.71it/s]

{'loss': Array(0.22406743, dtype=float32), 'loss_cross_entropy': Array(0.20491874, dtype=float32)}


  2%|▏         | 23309/1000000 [1:04:23<27:32:31,  9.85it/s]

{'loss': Array(0.2175937, dtype=float32), 'loss_cross_entropy': Array(0.19798513, dtype=float32)}


  2%|▏         | 23319/1000000 [1:04:24<40:13:38,  6.74it/s]

{'loss': Array(0.22794597, dtype=float32), 'loss_cross_entropy': Array(0.20858024, dtype=float32)}


  2%|▏         | 23329/1000000 [1:04:26<27:48:33,  9.76it/s]

{'loss': Array(0.22833529, dtype=float32), 'loss_cross_entropy': Array(0.20876944, dtype=float32)}


  2%|▏         | 23339/1000000 [1:04:27<25:15:34, 10.74it/s]

{'loss': Array(0.22960262, dtype=float32), 'loss_cross_entropy': Array(0.21028738, dtype=float32)}


  2%|▏         | 23349/1000000 [1:04:29<35:10:45,  7.71it/s]

{'loss': Array(0.21566652, dtype=float32), 'loss_cross_entropy': Array(0.19688216, dtype=float32)}


  2%|▏         | 23359/1000000 [1:04:30<26:46:37, 10.13it/s]

{'loss': Array(0.26289147, dtype=float32), 'loss_cross_entropy': Array(0.24184147, dtype=float32)}


  2%|▏         | 23369/1000000 [1:04:32<39:54:06,  6.80it/s]

{'loss': Array(0.2217125, dtype=float32), 'loss_cross_entropy': Array(0.20337108, dtype=float32)}


  2%|▏         | 23379/1000000 [1:04:33<27:30:38,  9.86it/s]

{'loss': Array(0.22230291, dtype=float32), 'loss_cross_entropy': Array(0.2022858, dtype=float32)}


  2%|▏         | 23389/1000000 [1:04:35<25:35:49, 10.60it/s]

{'loss': Array(0.23786521, dtype=float32), 'loss_cross_entropy': Array(0.21701372, dtype=float32)}


  2%|▏         | 23399/1000000 [1:04:36<28:51:37,  9.40it/s]

{'loss': Array(0.2230678, dtype=float32), 'loss_cross_entropy': Array(0.20379932, dtype=float32)}


  2%|▏         | 23409/1000000 [1:04:38<25:25:59, 10.67it/s]

{'loss': Array(0.23214212, dtype=float32), 'loss_cross_entropy': Array(0.21225584, dtype=float32)}


  2%|▏         | 23419/1000000 [1:04:39<28:21:55,  9.56it/s]

{'loss': Array(0.24580026, dtype=float32), 'loss_cross_entropy': Array(0.22507615, dtype=float32)}


  2%|▏         | 23429/1000000 [1:04:41<25:28:40, 10.65it/s]

{'loss': Array(0.21422115, dtype=float32), 'loss_cross_entropy': Array(0.19431232, dtype=float32)}


  2%|▏         | 23439/1000000 [1:04:42<39:12:08,  6.92it/s]

{'loss': Array(0.22181919, dtype=float32), 'loss_cross_entropy': Array(0.20262109, dtype=float32)}


  2%|▏         | 23449/1000000 [1:04:44<27:20:03,  9.92it/s]

{'loss': Array(0.22498937, dtype=float32), 'loss_cross_entropy': Array(0.20630161, dtype=float32)}


  2%|▏         | 23459/1000000 [1:04:45<25:19:13, 10.71it/s]

{'loss': Array(0.24438106, dtype=float32), 'loss_cross_entropy': Array(0.22381864, dtype=float32)}


  2%|▏         | 23469/1000000 [1:04:47<32:10:57,  8.43it/s]

{'loss': Array(0.21519604, dtype=float32), 'loss_cross_entropy': Array(0.19716166, dtype=float32)}


  2%|▏         | 23479/1000000 [1:04:48<26:21:22, 10.29it/s]

{'loss': Array(0.21402717, dtype=float32), 'loss_cross_entropy': Array(0.1956814, dtype=float32)}


  2%|▏         | 23489/1000000 [1:04:50<40:23:32,  6.72it/s]

{'loss': Array(0.22954476, dtype=float32), 'loss_cross_entropy': Array(0.20963989, dtype=float32)}


  2%|▏         | 23499/1000000 [1:04:51<28:12:33,  9.62it/s]

{'loss': Array(0.22818677, dtype=float32), 'loss_cross_entropy': Array(0.2086288, dtype=float32)}


  2%|▏         | 23509/1000000 [1:04:59<90:19:39,  3.00it/s] 

{'loss': Array(0.23459266, dtype=float32), 'loss_cross_entropy': Array(0.21460466, dtype=float32)}


  2%|▏         | 23519/1000000 [1:05:01<41:41:37,  6.51it/s]

{'loss': Array(0.21719079, dtype=float32), 'loss_cross_entropy': Array(0.19796245, dtype=float32)}


  2%|▏         | 23529/1000000 [1:05:02<28:06:32,  9.65it/s]

{'loss': Array(0.20172746, dtype=float32), 'loss_cross_entropy': Array(0.18396534, dtype=float32)}


  2%|▏         | 23539/1000000 [1:05:04<32:54:50,  8.24it/s]

{'loss': Array(0.23496418, dtype=float32), 'loss_cross_entropy': Array(0.21388029, dtype=float32)}


  2%|▏         | 23549/1000000 [1:05:06<26:50:10, 10.11it/s]

{'loss': Array(0.24671893, dtype=float32), 'loss_cross_entropy': Array(0.22537969, dtype=float32)}


  2%|▏         | 23559/1000000 [1:05:07<39:35:22,  6.85it/s]

{'loss': Array(0.21793532, dtype=float32), 'loss_cross_entropy': Array(0.198725, dtype=float32)}


  2%|▏         | 23569/1000000 [1:05:09<27:21:43,  9.91it/s]

{'loss': Array(0.23703976, dtype=float32), 'loss_cross_entropy': Array(0.21691847, dtype=float32)}


  2%|▏         | 23579/1000000 [1:05:10<25:18:37, 10.72it/s]

{'loss': Array(0.231656, dtype=float32), 'loss_cross_entropy': Array(0.2115316, dtype=float32)}


  2%|▏         | 23589/1000000 [1:05:12<29:05:42,  9.32it/s]

{'loss': Array(0.22639814, dtype=float32), 'loss_cross_entropy': Array(0.20517817, dtype=float32)}


  2%|▏         | 23599/1000000 [1:05:13<25:28:14, 10.65it/s]

{'loss': Array(0.22180377, dtype=float32), 'loss_cross_entropy': Array(0.20343001, dtype=float32)}


  2%|▏         | 23609/1000000 [1:05:15<39:31:44,  6.86it/s]

{'loss': Array(0.20938216, dtype=float32), 'loss_cross_entropy': Array(0.1922781, dtype=float32)}


  2%|▏         | 23619/1000000 [1:05:16<27:11:37,  9.97it/s]

{'loss': Array(0.22444235, dtype=float32), 'loss_cross_entropy': Array(0.20541401, dtype=float32)}


  2%|▏         | 23629/1000000 [1:05:17<24:57:34, 10.87it/s]

{'loss': Array(0.23944555, dtype=float32), 'loss_cross_entropy': Array(0.22000626, dtype=float32)}


  2%|▏         | 23639/1000000 [1:05:19<29:42:27,  9.13it/s]

{'loss': Array(0.20754562, dtype=float32), 'loss_cross_entropy': Array(0.18981361, dtype=float32)}


  2%|▏         | 23649/1000000 [1:05:20<26:18:45, 10.31it/s]

{'loss': Array(0.23140252, dtype=float32), 'loss_cross_entropy': Array(0.21210532, dtype=float32)}


  2%|▏         | 23659/1000000 [1:05:22<36:04:31,  7.52it/s]

{'loss': Array(0.24076243, dtype=float32), 'loss_cross_entropy': Array(0.22010498, dtype=float32)}


  2%|▏         | 23669/1000000 [1:05:24<27:49:02,  9.75it/s]

{'loss': Array(0.22348283, dtype=float32), 'loss_cross_entropy': Array(0.20375393, dtype=float32)}


  2%|▏         | 23679/1000000 [1:05:25<40:07:20,  6.76it/s]

{'loss': Array(0.22566466, dtype=float32), 'loss_cross_entropy': Array(0.20611084, dtype=float32)}


  2%|▏         | 23689/1000000 [1:05:27<27:53:04,  9.73it/s]

{'loss': Array(0.23107283, dtype=float32), 'loss_cross_entropy': Array(0.21127255, dtype=float32)}


  2%|▏         | 23699/1000000 [1:05:28<25:54:23, 10.47it/s]

{'loss': Array(0.22159114, dtype=float32), 'loss_cross_entropy': Array(0.20331882, dtype=float32)}


  2%|▏         | 23709/1000000 [1:05:30<32:28:11,  8.35it/s]

{'loss': Array(0.22651815, dtype=float32), 'loss_cross_entropy': Array(0.20680335, dtype=float32)}


  2%|▏         | 23719/1000000 [1:05:31<26:30:56, 10.23it/s]

{'loss': Array(0.23786096, dtype=float32), 'loss_cross_entropy': Array(0.21768375, dtype=float32)}


  2%|▏         | 23729/1000000 [1:05:33<35:04:34,  7.73it/s]

{'loss': Array(0.21655849, dtype=float32), 'loss_cross_entropy': Array(0.19827084, dtype=float32)}


  2%|▏         | 23739/1000000 [1:05:34<26:39:28, 10.17it/s]

{'loss': Array(0.21532524, dtype=float32), 'loss_cross_entropy': Array(0.19640823, dtype=float32)}


  2%|▏         | 23749/1000000 [1:05:36<26:05:23, 10.39it/s]

{'loss': Array(0.22358035, dtype=float32), 'loss_cross_entropy': Array(0.2040535, dtype=float32)}


  2%|▏         | 23759/1000000 [1:05:37<29:19:34,  9.25it/s]

{'loss': Array(0.23990884, dtype=float32), 'loss_cross_entropy': Array(0.22068906, dtype=float32)}


  2%|▏         | 23769/1000000 [1:05:39<26:12:27, 10.35it/s]

{'loss': Array(0.23151886, dtype=float32), 'loss_cross_entropy': Array(0.21080811, dtype=float32)}


  2%|▏         | 23779/1000000 [1:05:40<39:45:44,  6.82it/s]

{'loss': Array(0.2316782, dtype=float32), 'loss_cross_entropy': Array(0.21213444, dtype=float32)}


  2%|▏         | 23789/1000000 [1:05:42<27:28:10,  9.87it/s]

{'loss': Array(0.2381304, dtype=float32), 'loss_cross_entropy': Array(0.21806255, dtype=float32)}


  2%|▏         | 23799/1000000 [1:05:43<25:28:11, 10.65it/s]

{'loss': Array(0.23355143, dtype=float32), 'loss_cross_entropy': Array(0.21391578, dtype=float32)}


  2%|▏         | 23809/1000000 [1:05:45<32:23:54,  8.37it/s]

{'loss': Array(0.23157556, dtype=float32), 'loss_cross_entropy': Array(0.2113891, dtype=float32)}


  2%|▏         | 23819/1000000 [1:05:46<26:16:03, 10.32it/s]

{'loss': Array(0.2248938, dtype=float32), 'loss_cross_entropy': Array(0.20645498, dtype=float32)}


  2%|▏         | 23829/1000000 [1:05:48<35:05:23,  7.73it/s]

{'loss': Array(0.240588, dtype=float32), 'loss_cross_entropy': Array(0.2203993, dtype=float32)}


  2%|▏         | 23839/1000000 [1:05:49<26:55:03, 10.07it/s]

{'loss': Array(0.22099392, dtype=float32), 'loss_cross_entropy': Array(0.20198463, dtype=float32)}


  2%|▏         | 23849/1000000 [1:05:51<36:13:07,  7.49it/s]

{'loss': Array(0.22015248, dtype=float32), 'loss_cross_entropy': Array(0.20137544, dtype=float32)}


  2%|▏         | 23859/1000000 [1:05:52<26:30:57, 10.23it/s]

{'loss': Array(0.20186546, dtype=float32), 'loss_cross_entropy': Array(0.18390352, dtype=float32)}


  2%|▏         | 23869/1000000 [1:05:54<25:12:19, 10.76it/s]

{'loss': Array(0.25264072, dtype=float32), 'loss_cross_entropy': Array(0.23170371, dtype=float32)}


  2%|▏         | 23879/1000000 [1:05:55<29:58:53,  9.04it/s]

{'loss': Array(0.20557678, dtype=float32), 'loss_cross_entropy': Array(0.1873531, dtype=float32)}


  2%|▏         | 23889/1000000 [1:05:57<25:31:38, 10.62it/s]

{'loss': Array(0.2496103, dtype=float32), 'loss_cross_entropy': Array(0.23011625, dtype=float32)}


  2%|▏         | 23899/1000000 [1:05:58<32:32:52,  8.33it/s]

{'loss': Array(0.21088663, dtype=float32), 'loss_cross_entropy': Array(0.19250445, dtype=float32)}


  2%|▏         | 23909/1000000 [1:06:00<26:54:04, 10.08it/s]

{'loss': Array(0.23851505, dtype=float32), 'loss_cross_entropy': Array(0.21927916, dtype=float32)}


  2%|▏         | 23919/1000000 [1:06:02<41:14:57,  6.57it/s]

{'loss': Array(0.23435818, dtype=float32), 'loss_cross_entropy': Array(0.21470661, dtype=float32)}


  2%|▏         | 23929/1000000 [1:06:03<27:37:31,  9.81it/s]

{'loss': Array(0.22066489, dtype=float32), 'loss_cross_entropy': Array(0.20145896, dtype=float32)}


  2%|▏         | 23939/1000000 [1:06:05<39:57:43,  6.78it/s]

{'loss': Array(0.24687873, dtype=float32), 'loss_cross_entropy': Array(0.22663084, dtype=float32)}


  2%|▏         | 23949/1000000 [1:06:06<27:22:47,  9.90it/s]

{'loss': Array(0.23610091, dtype=float32), 'loss_cross_entropy': Array(0.21576153, dtype=float32)}


  2%|▏         | 23959/1000000 [1:06:07<27:13:24,  9.96it/s]

{'loss': Array(0.22712016, dtype=float32), 'loss_cross_entropy': Array(0.20797132, dtype=float32)}


  2%|▏         | 23969/1000000 [1:06:09<36:03:11,  7.52it/s]

{'loss': Array(0.21621786, dtype=float32), 'loss_cross_entropy': Array(0.19834071, dtype=float32)}


  2%|▏         | 23979/1000000 [1:06:10<26:35:42, 10.19it/s]

{'loss': Array(0.23986089, dtype=float32), 'loss_cross_entropy': Array(0.22005586, dtype=float32)}


  2%|▏         | 23989/1000000 [1:06:12<40:31:36,  6.69it/s]

{'loss': Array(0.23890515, dtype=float32), 'loss_cross_entropy': Array(0.21858115, dtype=float32)}


  2%|▏         | 23999/1000000 [1:06:14<27:53:25,  9.72it/s]

{'loss': Array(0.22190924, dtype=float32), 'loss_cross_entropy': Array(0.20376249, dtype=float32)}


  2%|▏         | 24009/1000000 [1:06:22<90:11:55,  3.01it/s] 

{'loss': Array(0.23214118, dtype=float32), 'loss_cross_entropy': Array(0.21370517, dtype=float32)}


  2%|▏         | 24019/1000000 [1:06:23<39:59:08,  6.78it/s] 

{'loss': Array(0.2207696, dtype=float32), 'loss_cross_entropy': Array(0.20122595, dtype=float32)}


  2%|▏         | 24029/1000000 [1:06:25<28:12:56,  9.61it/s]

{'loss': Array(0.24018244, dtype=float32), 'loss_cross_entropy': Array(0.22098465, dtype=float32)}


  2%|▏         | 24039/1000000 [1:06:26<29:23:05,  9.23it/s]

{'loss': Array(0.2364422, dtype=float32), 'loss_cross_entropy': Array(0.21654013, dtype=float32)}


  2%|▏         | 24049/1000000 [1:06:28<25:51:43, 10.48it/s]

{'loss': Array(0.22921124, dtype=float32), 'loss_cross_entropy': Array(0.20874953, dtype=float32)}


  2%|▏         | 24059/1000000 [1:06:29<39:54:27,  6.79it/s]

{'loss': Array(0.24303213, dtype=float32), 'loss_cross_entropy': Array(0.22252774, dtype=float32)}


  2%|▏         | 24069/1000000 [1:06:31<27:29:37,  9.86it/s]

{'loss': Array(0.21335597, dtype=float32), 'loss_cross_entropy': Array(0.19542041, dtype=float32)}


  2%|▏         | 24079/1000000 [1:06:32<25:37:03, 10.58it/s]

{'loss': Array(0.2130915, dtype=float32), 'loss_cross_entropy': Array(0.1949821, dtype=float32)}


  2%|▏         | 24089/1000000 [1:06:34<32:34:16,  8.32it/s]

{'loss': Array(0.2216657, dtype=float32), 'loss_cross_entropy': Array(0.20263612, dtype=float32)}


  2%|▏         | 24099/1000000 [1:06:35<26:04:57, 10.39it/s]

{'loss': Array(0.21941447, dtype=float32), 'loss_cross_entropy': Array(0.20129299, dtype=float32)}


  2%|▏         | 24109/1000000 [1:06:37<39:51:34,  6.80it/s]

{'loss': Array(0.23121607, dtype=float32), 'loss_cross_entropy': Array(0.21127795, dtype=float32)}


  2%|▏         | 24119/1000000 [1:06:38<28:22:59,  9.55it/s]

{'loss': Array(0.2169195, dtype=float32), 'loss_cross_entropy': Array(0.19796303, dtype=float32)}


  2%|▏         | 24129/1000000 [1:06:40<25:46:07, 10.52it/s]

{'loss': Array(0.20774877, dtype=float32), 'loss_cross_entropy': Array(0.1895171, dtype=float32)}


  2%|▏         | 24139/1000000 [1:06:41<30:17:09,  8.95it/s]

{'loss': Array(0.22870941, dtype=float32), 'loss_cross_entropy': Array(0.21003905, dtype=float32)}


  2%|▏         | 24149/1000000 [1:06:43<26:14:46, 10.33it/s]

{'loss': Array(0.24390876, dtype=float32), 'loss_cross_entropy': Array(0.22372864, dtype=float32)}


  2%|▏         | 24159/1000000 [1:06:45<35:48:50,  7.57it/s]

{'loss': Array(0.23291898, dtype=float32), 'loss_cross_entropy': Array(0.21341772, dtype=float32)}


  2%|▏         | 24169/1000000 [1:06:46<27:01:47, 10.03it/s]

{'loss': Array(0.22918987, dtype=float32), 'loss_cross_entropy': Array(0.20877261, dtype=float32)}


  2%|▏         | 24179/1000000 [1:06:47<27:30:59,  9.85it/s]

{'loss': Array(0.22712605, dtype=float32), 'loss_cross_entropy': Array(0.20852812, dtype=float32)}


  2%|▏         | 24189/1000000 [1:06:49<32:39:46,  8.30it/s]

{'loss': Array(0.22746992, dtype=float32), 'loss_cross_entropy': Array(0.2093886, dtype=float32)}


  2%|▏         | 24199/1000000 [1:06:50<27:22:09,  9.90it/s]

{'loss': Array(0.20105135, dtype=float32), 'loss_cross_entropy': Array(0.18338911, dtype=float32)}


  2%|▏         | 24209/1000000 [1:06:52<25:47:32, 10.51it/s]

{'loss': Array(0.23287585, dtype=float32), 'loss_cross_entropy': Array(0.21305548, dtype=float32)}


  2%|▏         | 24219/1000000 [1:06:54<28:40:10,  9.45it/s]

{'loss': Array(0.22384308, dtype=float32), 'loss_cross_entropy': Array(0.20521025, dtype=float32)}


  2%|▏         | 24229/1000000 [1:06:55<25:55:14, 10.46it/s]

{'loss': Array(0.24971826, dtype=float32), 'loss_cross_entropy': Array(0.22887401, dtype=float32)}


  2%|▏         | 24239/1000000 [1:06:57<32:48:28,  8.26it/s]

{'loss': Array(0.23107293, dtype=float32), 'loss_cross_entropy': Array(0.21223319, dtype=float32)}


  2%|▏         | 24249/1000000 [1:06:58<26:43:00, 10.14it/s]

{'loss': Array(0.24426849, dtype=float32), 'loss_cross_entropy': Array(0.22301474, dtype=float32)}


  2%|▏         | 24259/1000000 [1:07:00<41:04:59,  6.60it/s]

{'loss': Array(0.22718275, dtype=float32), 'loss_cross_entropy': Array(0.20809713, dtype=float32)}


  2%|▏         | 24269/1000000 [1:07:01<28:08:52,  9.63it/s]

{'loss': Array(0.23140414, dtype=float32), 'loss_cross_entropy': Array(0.21127994, dtype=float32)}


  2%|▏         | 24279/1000000 [1:07:03<40:51:52,  6.63it/s]

{'loss': Array(0.20851007, dtype=float32), 'loss_cross_entropy': Array(0.19073218, dtype=float32)}


  2%|▏         | 24289/1000000 [1:07:04<28:02:15,  9.67it/s]

{'loss': Array(0.22714889, dtype=float32), 'loss_cross_entropy': Array(0.20661171, dtype=float32)}


  2%|▏         | 24299/1000000 [1:07:06<26:18:38, 10.30it/s]

{'loss': Array(0.21733566, dtype=float32), 'loss_cross_entropy': Array(0.1984669, dtype=float32)}


  2%|▏         | 24309/1000000 [1:07:08<35:39:38,  7.60it/s]

{'loss': Array(0.21534733, dtype=float32), 'loss_cross_entropy': Array(0.19639044, dtype=float32)}


  2%|▏         | 24319/1000000 [1:07:09<26:36:20, 10.19it/s]

{'loss': Array(0.21662615, dtype=float32), 'loss_cross_entropy': Array(0.19739164, dtype=float32)}


  2%|▏         | 24329/1000000 [1:07:11<39:54:35,  6.79it/s]

{'loss': Array(0.19839603, dtype=float32), 'loss_cross_entropy': Array(0.17985152, dtype=float32)}


  2%|▏         | 24339/1000000 [1:07:12<27:20:35,  9.91it/s]

{'loss': Array(0.23005433, dtype=float32), 'loss_cross_entropy': Array(0.20997503, dtype=float32)}


  2%|▏         | 24349/1000000 [1:07:13<25:19:08, 10.70it/s]

{'loss': Array(0.231908, dtype=float32), 'loss_cross_entropy': Array(0.21269715, dtype=float32)}


  2%|▏         | 24359/1000000 [1:07:15<28:44:43,  9.43it/s]

{'loss': Array(0.22215061, dtype=float32), 'loss_cross_entropy': Array(0.20327048, dtype=float32)}


  2%|▏         | 24369/1000000 [1:07:16<25:57:15, 10.44it/s]

{'loss': Array(0.23374127, dtype=float32), 'loss_cross_entropy': Array(0.2157673, dtype=float32)}


  2%|▏         | 24379/1000000 [1:07:18<29:16:20,  9.26it/s]

{'loss': Array(0.21194665, dtype=float32), 'loss_cross_entropy': Array(0.19364212, dtype=float32)}


  2%|▏         | 24389/1000000 [1:07:19<26:08:10, 10.37it/s]

{'loss': Array(0.22061343, dtype=float32), 'loss_cross_entropy': Array(0.20194857, dtype=float32)}


  2%|▏         | 24399/1000000 [1:07:21<39:55:45,  6.79it/s]

{'loss': Array(0.21688536, dtype=float32), 'loss_cross_entropy': Array(0.19805859, dtype=float32)}


  2%|▏         | 24409/1000000 [1:07:22<27:36:37,  9.82it/s]

{'loss': Array(0.21384719, dtype=float32), 'loss_cross_entropy': Array(0.19658099, dtype=float32)}


  2%|▏         | 24419/1000000 [1:07:24<25:09:39, 10.77it/s]

{'loss': Array(0.21423197, dtype=float32), 'loss_cross_entropy': Array(0.1952856, dtype=float32)}


  2%|▏         | 24429/1000000 [1:07:25<32:06:02,  8.44it/s]

{'loss': Array(0.24086137, dtype=float32), 'loss_cross_entropy': Array(0.22038773, dtype=float32)}


  2%|▏         | 24439/1000000 [1:07:27<26:13:57, 10.33it/s]

{'loss': Array(0.22965045, dtype=float32), 'loss_cross_entropy': Array(0.21108213, dtype=float32)}


  2%|▏         | 24449/1000000 [1:07:29<40:13:55,  6.74it/s]

{'loss': Array(0.20950249, dtype=float32), 'loss_cross_entropy': Array(0.19070947, dtype=float32)}


  2%|▏         | 24459/1000000 [1:07:30<27:57:10,  9.69it/s]

{'loss': Array(0.23040019, dtype=float32), 'loss_cross_entropy': Array(0.21072502, dtype=float32)}


  2%|▏         | 24469/1000000 [1:07:31<25:26:07, 10.65it/s]

{'loss': Array(0.20643218, dtype=float32), 'loss_cross_entropy': Array(0.18794087, dtype=float32)}


  2%|▏         | 24479/1000000 [1:07:33<29:46:09,  9.10it/s]

{'loss': Array(0.20597623, dtype=float32), 'loss_cross_entropy': Array(0.18850942, dtype=float32)}


  2%|▏         | 24489/1000000 [1:07:34<25:36:29, 10.58it/s]

{'loss': Array(0.22011757, dtype=float32), 'loss_cross_entropy': Array(0.20231652, dtype=float32)}


  2%|▏         | 24499/1000000 [1:07:36<36:07:36,  7.50it/s]

{'loss': Array(0.23275493, dtype=float32), 'loss_cross_entropy': Array(0.21296504, dtype=float32)}


  2%|▏         | 24509/1000000 [1:07:44<89:27:55,  3.03it/s] 

{'loss': Array(0.23499659, dtype=float32), 'loss_cross_entropy': Array(0.21417491, dtype=float32)}


  2%|▏         | 24519/1000000 [1:07:46<46:44:43,  5.80it/s]

{'loss': Array(0.22692907, dtype=float32), 'loss_cross_entropy': Array(0.20690416, dtype=float32)}


  2%|▏         | 24529/1000000 [1:07:47<29:17:07,  9.25it/s]

{'loss': Array(0.20088927, dtype=float32), 'loss_cross_entropy': Array(0.18262853, dtype=float32)}


  2%|▏         | 24539/1000000 [1:07:48<25:57:51, 10.44it/s]

{'loss': Array(0.23066683, dtype=float32), 'loss_cross_entropy': Array(0.21187201, dtype=float32)}


  2%|▏         | 24549/1000000 [1:07:50<32:19:13,  8.38it/s]

{'loss': Array(0.23861547, dtype=float32), 'loss_cross_entropy': Array(0.21844192, dtype=float32)}


  2%|▏         | 24559/1000000 [1:07:51<26:29:40, 10.23it/s]

{'loss': Array(0.20078719, dtype=float32), 'loss_cross_entropy': Array(0.18356991, dtype=float32)}


  2%|▏         | 24569/1000000 [1:07:53<25:02:57, 10.82it/s]

{'loss': Array(0.25385094, dtype=float32), 'loss_cross_entropy': Array(0.23363154, dtype=float32)}


  2%|▏         | 24579/1000000 [1:07:54<28:46:10,  9.42it/s]

{'loss': Array(0.23191194, dtype=float32), 'loss_cross_entropy': Array(0.21220954, dtype=float32)}


  2%|▏         | 24589/1000000 [1:07:56<26:03:57, 10.39it/s]

{'loss': Array(0.23057139, dtype=float32), 'loss_cross_entropy': Array(0.21085182, dtype=float32)}


  2%|▏         | 24599/1000000 [1:07:58<32:48:06,  8.26it/s]

{'loss': Array(0.24385443, dtype=float32), 'loss_cross_entropy': Array(0.22463353, dtype=float32)}


  2%|▏         | 24609/1000000 [1:07:59<26:54:21, 10.07it/s]

{'loss': Array(0.22261634, dtype=float32), 'loss_cross_entropy': Array(0.20327477, dtype=float32)}


  2%|▏         | 24619/1000000 [1:08:01<35:47:51,  7.57it/s]

{'loss': Array(0.23028386, dtype=float32), 'loss_cross_entropy': Array(0.2110325, dtype=float32)}


  2%|▏         | 24629/1000000 [1:08:02<26:40:57, 10.15it/s]

{'loss': Array(0.20120664, dtype=float32), 'loss_cross_entropy': Array(0.183201, dtype=float32)}


  2%|▏         | 24639/1000000 [1:08:03<25:32:35, 10.61it/s]

{'loss': Array(0.24048069, dtype=float32), 'loss_cross_entropy': Array(0.22096251, dtype=float32)}


  2%|▏         | 24649/1000000 [1:08:05<32:20:28,  8.38it/s]

{'loss': Array(0.22615337, dtype=float32), 'loss_cross_entropy': Array(0.20749934, dtype=float32)}


  2%|▏         | 24659/1000000 [1:08:07<26:17:27, 10.31it/s]

{'loss': Array(0.23174024, dtype=float32), 'loss_cross_entropy': Array(0.21087281, dtype=float32)}


  2%|▏         | 24669/1000000 [1:08:08<25:20:50, 10.69it/s]

{'loss': Array(0.23068018, dtype=float32), 'loss_cross_entropy': Array(0.21095915, dtype=float32)}


  2%|▏         | 24679/1000000 [1:08:10<29:11:38,  9.28it/s]

{'loss': Array(0.21307528, dtype=float32), 'loss_cross_entropy': Array(0.19508749, dtype=float32)}


  2%|▏         | 24689/1000000 [1:08:11<25:51:42, 10.48it/s]

{'loss': Array(0.2069979, dtype=float32), 'loss_cross_entropy': Array(0.18877317, dtype=float32)}


  2%|▏         | 24699/1000000 [1:08:13<32:32:43,  8.32it/s]

{'loss': Array(0.23024693, dtype=float32), 'loss_cross_entropy': Array(0.21031027, dtype=float32)}


  2%|▏         | 24709/1000000 [1:08:14<26:41:20, 10.15it/s]

{'loss': Array(0.21624663, dtype=float32), 'loss_cross_entropy': Array(0.19776973, dtype=float32)}


  2%|▏         | 24719/1000000 [1:08:16<39:50:01,  6.80it/s]

{'loss': Array(0.23213735, dtype=float32), 'loss_cross_entropy': Array(0.21325676, dtype=float32)}


  2%|▏         | 24729/1000000 [1:08:17<27:28:27,  9.86it/s]

{'loss': Array(0.23487401, dtype=float32), 'loss_cross_entropy': Array(0.21501933, dtype=float32)}


  2%|▏         | 24739/1000000 [1:08:19<26:01:02, 10.41it/s]

{'loss': Array(0.21104185, dtype=float32), 'loss_cross_entropy': Array(0.19267492, dtype=float32)}


  2%|▏         | 24749/1000000 [1:08:20<35:43:21,  7.58it/s]

{'loss': Array(0.2152067, dtype=float32), 'loss_cross_entropy': Array(0.19606201, dtype=float32)}


  2%|▏         | 24759/1000000 [1:08:22<26:42:10, 10.15it/s]

{'loss': Array(0.19994676, dtype=float32), 'loss_cross_entropy': Array(0.1823432, dtype=float32)}


  2%|▏         | 24769/1000000 [1:08:23<25:10:20, 10.76it/s]

{'loss': Array(0.22809891, dtype=float32), 'loss_cross_entropy': Array(0.20895465, dtype=float32)}


  2%|▏         | 24779/1000000 [1:08:25<29:54:54,  9.06it/s]

{'loss': Array(0.23009168, dtype=float32), 'loss_cross_entropy': Array(0.21036816, dtype=float32)}


  2%|▏         | 24789/1000000 [1:08:26<25:48:57, 10.49it/s]

{'loss': Array(0.23845463, dtype=float32), 'loss_cross_entropy': Array(0.21969531, dtype=float32)}


  2%|▏         | 24799/1000000 [1:08:28<32:02:43,  8.45it/s]

{'loss': Array(0.20385306, dtype=float32), 'loss_cross_entropy': Array(0.18720734, dtype=float32)}


  2%|▏         | 24809/1000000 [1:08:29<26:28:24, 10.23it/s]

{'loss': Array(0.23444453, dtype=float32), 'loss_cross_entropy': Array(0.21410751, dtype=float32)}


  2%|▏         | 24819/1000000 [1:08:30<26:20:38, 10.28it/s]

{'loss': Array(0.22142935, dtype=float32), 'loss_cross_entropy': Array(0.20343494, dtype=float32)}


  2%|▏         | 24829/1000000 [1:08:32<28:44:27,  9.42it/s]

{'loss': Array(0.22597753, dtype=float32), 'loss_cross_entropy': Array(0.20764637, dtype=float32)}


  2%|▏         | 24839/1000000 [1:08:34<25:46:36, 10.51it/s]

{'loss': Array(0.20530592, dtype=float32), 'loss_cross_entropy': Array(0.1872339, dtype=float32)}


  2%|▏         | 24849/1000000 [1:08:35<35:33:12,  7.62it/s]

{'loss': Array(0.216644, dtype=float32), 'loss_cross_entropy': Array(0.19763185, dtype=float32)}


  2%|▏         | 24859/1000000 [1:08:37<26:31:33, 10.21it/s]

{'loss': Array(0.23559058, dtype=float32), 'loss_cross_entropy': Array(0.2165923, dtype=float32)}


  2%|▏         | 24869/1000000 [1:08:38<24:49:33, 10.91it/s]

{'loss': Array(0.22245426, dtype=float32), 'loss_cross_entropy': Array(0.20335566, dtype=float32)}


  2%|▏         | 24879/1000000 [1:08:40<28:24:48,  9.53it/s]

{'loss': Array(0.24352665, dtype=float32), 'loss_cross_entropy': Array(0.22285353, dtype=float32)}


  2%|▏         | 24889/1000000 [1:08:41<25:43:49, 10.53it/s]

{'loss': Array(0.22821175, dtype=float32), 'loss_cross_entropy': Array(0.210023, dtype=float32)}


  2%|▏         | 24899/1000000 [1:08:43<30:51:16,  8.78it/s]

{'loss': Array(0.19041279, dtype=float32), 'loss_cross_entropy': Array(0.17384411, dtype=float32)}


  2%|▏         | 24909/1000000 [1:08:44<26:11:10, 10.34it/s]

{'loss': Array(0.20267037, dtype=float32), 'loss_cross_entropy': Array(0.18481705, dtype=float32)}


  2%|▏         | 24919/1000000 [1:08:46<40:38:26,  6.66it/s]

{'loss': Array(0.23817001, dtype=float32), 'loss_cross_entropy': Array(0.21797536, dtype=float32)}


  2%|▏         | 24929/1000000 [1:08:47<27:33:30,  9.83it/s]

{'loss': Array(0.2038018, dtype=float32), 'loss_cross_entropy': Array(0.18661216, dtype=float32)}


  2%|▏         | 24939/1000000 [1:08:49<25:19:37, 10.69it/s]

{'loss': Array(0.18336809, dtype=float32), 'loss_cross_entropy': Array(0.16732049, dtype=float32)}


  2%|▏         | 24949/1000000 [1:08:50<32:31:20,  8.33it/s]

{'loss': Array(0.20379129, dtype=float32), 'loss_cross_entropy': Array(0.18581049, dtype=float32)}


  2%|▏         | 24959/1000000 [1:08:52<26:37:53, 10.17it/s]

{'loss': Array(0.23289593, dtype=float32), 'loss_cross_entropy': Array(0.21414776, dtype=float32)}


  2%|▏         | 24969/1000000 [1:08:53<35:36:43,  7.61it/s]

{'loss': Array(0.22747341, dtype=float32), 'loss_cross_entropy': Array(0.20956181, dtype=float32)}


  2%|▏         | 24979/1000000 [1:08:55<27:08:41,  9.98it/s]

{'loss': Array(0.24324031, dtype=float32), 'loss_cross_entropy': Array(0.22389396, dtype=float32)}


  2%|▏         | 24989/1000000 [1:08:56<35:37:50,  7.60it/s]

{'loss': Array(0.22831813, dtype=float32), 'loss_cross_entropy': Array(0.20997027, dtype=float32)}


  2%|▏         | 24999/1000000 [1:08:58<26:55:37, 10.06it/s]

{'loss': Array(0.22413245, dtype=float32), 'loss_cross_entropy': Array(0.206425, dtype=float32)}


  3%|▎         | 25009/1000000 [1:09:06<89:17:33,  3.03it/s] 

{'loss': Array(0.22999732, dtype=float32), 'loss_cross_entropy': Array(0.21024962, dtype=float32)}


  3%|▎         | 25019/1000000 [1:09:07<42:00:42,  6.45it/s]

{'loss': Array(0.23048568, dtype=float32), 'loss_cross_entropy': Array(0.21209918, dtype=float32)}


  3%|▎         | 25029/1000000 [1:09:09<28:15:45,  9.58it/s]

{'loss': Array(0.20994313, dtype=float32), 'loss_cross_entropy': Array(0.19255896, dtype=float32)}


  3%|▎         | 25039/1000000 [1:09:11<36:06:50,  7.50it/s]

{'loss': Array(0.20649993, dtype=float32), 'loss_cross_entropy': Array(0.1898428, dtype=float32)}


  3%|▎         | 25049/1000000 [1:09:12<26:43:13, 10.14it/s]

{'loss': Array(0.19027841, dtype=float32), 'loss_cross_entropy': Array(0.17366593, dtype=float32)}


  3%|▎         | 25059/1000000 [1:09:14<39:39:58,  6.83it/s]

{'loss': Array(0.22988991, dtype=float32), 'loss_cross_entropy': Array(0.21233518, dtype=float32)}


  3%|▎         | 25069/1000000 [1:09:15<27:34:09,  9.82it/s]

{'loss': Array(0.2108589, dtype=float32), 'loss_cross_entropy': Array(0.19226773, dtype=float32)}


  3%|▎         | 25079/1000000 [1:09:16<25:17:44, 10.71it/s]

{'loss': Array(0.20204738, dtype=float32), 'loss_cross_entropy': Array(0.1847635, dtype=float32)}


  3%|▎         | 25089/1000000 [1:09:18<32:00:04,  8.46it/s]

{'loss': Array(0.24082907, dtype=float32), 'loss_cross_entropy': Array(0.2205833, dtype=float32)}


  3%|▎         | 25099/1000000 [1:09:19<27:16:01,  9.93it/s]

{'loss': Array(0.21970277, dtype=float32), 'loss_cross_entropy': Array(0.2023857, dtype=float32)}


  3%|▎         | 25109/1000000 [1:09:21<35:40:35,  7.59it/s]

{'loss': Array(0.21344273, dtype=float32), 'loss_cross_entropy': Array(0.1954903, dtype=float32)}


  3%|▎         | 25119/1000000 [1:09:23<27:33:33,  9.83it/s]

{'loss': Array(0.20485711, dtype=float32), 'loss_cross_entropy': Array(0.18737976, dtype=float32)}


  3%|▎         | 25129/1000000 [1:09:24<26:09:17, 10.35it/s]

{'loss': Array(0.20306747, dtype=float32), 'loss_cross_entropy': Array(0.18601528, dtype=float32)}


  3%|▎         | 25139/1000000 [1:09:26<28:32:30,  9.49it/s]

{'loss': Array(0.2017058, dtype=float32), 'loss_cross_entropy': Array(0.18330792, dtype=float32)}


  3%|▎         | 25149/1000000 [1:09:27<26:07:06, 10.37it/s]

{'loss': Array(0.23551388, dtype=float32), 'loss_cross_entropy': Array(0.21618655, dtype=float32)}


  3%|▎         | 25159/1000000 [1:09:29<41:02:40,  6.60it/s]

{'loss': Array(0.216432, dtype=float32), 'loss_cross_entropy': Array(0.19755207, dtype=float32)}


  3%|▎         | 25169/1000000 [1:09:30<28:21:57,  9.55it/s]

{'loss': Array(0.20537673, dtype=float32), 'loss_cross_entropy': Array(0.18777482, dtype=float32)}


  3%|▎         | 25179/1000000 [1:09:32<25:46:40, 10.50it/s]

{'loss': Array(0.2075661, dtype=float32), 'loss_cross_entropy': Array(0.19018877, dtype=float32)}


  3%|▎         | 25189/1000000 [1:09:33<32:22:42,  8.36it/s]

{'loss': Array(0.22671877, dtype=float32), 'loss_cross_entropy': Array(0.20808892, dtype=float32)}


  3%|▎         | 25199/1000000 [1:09:35<26:41:47, 10.14it/s]

{'loss': Array(0.20946398, dtype=float32), 'loss_cross_entropy': Array(0.19253968, dtype=float32)}


  3%|▎         | 25209/1000000 [1:09:36<36:18:48,  7.46it/s]

{'loss': Array(0.22696455, dtype=float32), 'loss_cross_entropy': Array(0.20748444, dtype=float32)}


  3%|▎         | 25219/1000000 [1:09:38<27:33:26,  9.83it/s]

{'loss': Array(0.20661898, dtype=float32), 'loss_cross_entropy': Array(0.1891168, dtype=float32)}


  3%|▎         | 25229/1000000 [1:09:40<36:05:42,  7.50it/s]

{'loss': Array(0.2428083, dtype=float32), 'loss_cross_entropy': Array(0.22182404, dtype=float32)}


  3%|▎         | 25239/1000000 [1:09:41<27:15:42,  9.93it/s]

{'loss': Array(0.2162432, dtype=float32), 'loss_cross_entropy': Array(0.19658034, dtype=float32)}


  3%|▎         | 25249/1000000 [1:09:42<25:07:31, 10.78it/s]

{'loss': Array(0.19317825, dtype=float32), 'loss_cross_entropy': Array(0.17607445, dtype=float32)}


  3%|▎         | 25259/1000000 [1:09:44<29:57:59,  9.04it/s]

{'loss': Array(0.211439, dtype=float32), 'loss_cross_entropy': Array(0.19257447, dtype=float32)}


  3%|▎         | 25269/1000000 [1:09:45<26:28:20, 10.23it/s]

{'loss': Array(0.2059172, dtype=float32), 'loss_cross_entropy': Array(0.18898213, dtype=float32)}


  3%|▎         | 25279/1000000 [1:09:47<32:10:13,  8.42it/s]

{'loss': Array(0.2178971, dtype=float32), 'loss_cross_entropy': Array(0.19950645, dtype=float32)}


  3%|▎         | 25289/1000000 [1:09:49<26:29:23, 10.22it/s]

{'loss': Array(0.21537212, dtype=float32), 'loss_cross_entropy': Array(0.19723256, dtype=float32)}


  3%|▎         | 25299/1000000 [1:09:50<40:07:44,  6.75it/s]

{'loss': Array(0.21486822, dtype=float32), 'loss_cross_entropy': Array(0.19643383, dtype=float32)}


  3%|▎         | 25309/1000000 [1:09:52<28:19:12,  9.56it/s]

{'loss': Array(0.24198978, dtype=float32), 'loss_cross_entropy': Array(0.22256151, dtype=float32)}


  3%|▎         | 25319/1000000 [1:09:53<40:16:31,  6.72it/s]

{'loss': Array(0.2308315, dtype=float32), 'loss_cross_entropy': Array(0.21129273, dtype=float32)}


  3%|▎         | 25329/1000000 [1:09:55<27:30:44,  9.84it/s]

{'loss': Array(0.21643904, dtype=float32), 'loss_cross_entropy': Array(0.19777799, dtype=float32)}


  3%|▎         | 25339/1000000 [1:09:56<25:31:33, 10.61it/s]

{'loss': Array(0.20699635, dtype=float32), 'loss_cross_entropy': Array(0.1892832, dtype=float32)}


  3%|▎         | 25349/1000000 [1:09:58<35:16:26,  7.68it/s]

{'loss': Array(0.20395021, dtype=float32), 'loss_cross_entropy': Array(0.1852831, dtype=float32)}


  3%|▎         | 25359/1000000 [1:09:59<27:36:37,  9.81it/s]

{'loss': Array(0.22908635, dtype=float32), 'loss_cross_entropy': Array(0.20916544, dtype=float32)}


  3%|▎         | 25369/1000000 [1:10:01<40:25:23,  6.70it/s]

{'loss': Array(0.20563816, dtype=float32), 'loss_cross_entropy': Array(0.187952, dtype=float32)}


  3%|▎         | 25379/1000000 [1:10:02<28:05:57,  9.63it/s]

{'loss': Array(0.23288369, dtype=float32), 'loss_cross_entropy': Array(0.21462479, dtype=float32)}


  3%|▎         | 25389/1000000 [1:10:04<25:31:23, 10.61it/s]

{'loss': Array(0.22702852, dtype=float32), 'loss_cross_entropy': Array(0.20854877, dtype=float32)}


  3%|▎         | 25399/1000000 [1:10:05<29:09:11,  9.29it/s]

{'loss': Array(0.20718463, dtype=float32), 'loss_cross_entropy': Array(0.18900053, dtype=float32)}


  3%|▎         | 25409/1000000 [1:10:07<25:45:59, 10.51it/s]

{'loss': Array(0.23360002, dtype=float32), 'loss_cross_entropy': Array(0.21438172, dtype=float32)}


  3%|▎         | 25419/1000000 [1:10:09<29:20:35,  9.23it/s]

{'loss': Array(0.233535, dtype=float32), 'loss_cross_entropy': Array(0.21392451, dtype=float32)}


  3%|▎         | 25429/1000000 [1:10:10<25:35:08, 10.58it/s]

{'loss': Array(0.20909207, dtype=float32), 'loss_cross_entropy': Array(0.19130084, dtype=float32)}


  3%|▎         | 25439/1000000 [1:10:12<39:42:14,  6.82it/s]

{'loss': Array(0.2403544, dtype=float32), 'loss_cross_entropy': Array(0.22093408, dtype=float32)}


  3%|▎         | 25449/1000000 [1:10:13<27:22:34,  9.89it/s]

{'loss': Array(0.23289609, dtype=float32), 'loss_cross_entropy': Array(0.21332899, dtype=float32)}


  3%|▎         | 25459/1000000 [1:10:14<25:30:43, 10.61it/s]

{'loss': Array(0.21125793, dtype=float32), 'loss_cross_entropy': Array(0.19306569, dtype=float32)}


  3%|▎         | 25469/1000000 [1:10:16<32:17:19,  8.38it/s]

{'loss': Array(0.22038667, dtype=float32), 'loss_cross_entropy': Array(0.20115915, dtype=float32)}


  3%|▎         | 25479/1000000 [1:10:17<26:06:21, 10.37it/s]

{'loss': Array(0.22785182, dtype=float32), 'loss_cross_entropy': Array(0.20926242, dtype=float32)}


  3%|▎         | 25489/1000000 [1:10:19<39:25:01,  6.87it/s]

{'loss': Array(0.22485378, dtype=float32), 'loss_cross_entropy': Array(0.20668292, dtype=float32)}


  3%|▎         | 25499/1000000 [1:10:20<27:34:26,  9.82it/s]

{'loss': Array(0.23990043, dtype=float32), 'loss_cross_entropy': Array(0.21956809, dtype=float32)}


  3%|▎         | 25509/1000000 [1:10:28<87:46:57,  3.08it/s] 

{'loss': Array(0.22012, dtype=float32), 'loss_cross_entropy': Array(0.20144497, dtype=float32)}


  3%|▎         | 25519/1000000 [1:10:30<40:08:24,  6.74it/s]

{'loss': Array(0.24014914, dtype=float32), 'loss_cross_entropy': Array(0.22083831, dtype=float32)}


  3%|▎         | 25529/1000000 [1:10:31<27:27:18,  9.86it/s]

{'loss': Array(0.21491745, dtype=float32), 'loss_cross_entropy': Array(0.19572306, dtype=float32)}


  3%|▎         | 25539/1000000 [1:10:33<32:45:38,  8.26it/s]

{'loss': Array(0.1970068, dtype=float32), 'loss_cross_entropy': Array(0.1800044, dtype=float32)}


  3%|▎         | 25549/1000000 [1:10:34<26:41:59, 10.14it/s]

{'loss': Array(0.2215052, dtype=float32), 'loss_cross_entropy': Array(0.20342699, dtype=float32)}


  3%|▎         | 25559/1000000 [1:10:36<40:24:53,  6.70it/s]

{'loss': Array(0.23536392, dtype=float32), 'loss_cross_entropy': Array(0.21646552, dtype=float32)}


  3%|▎         | 25569/1000000 [1:10:37<27:55:54,  9.69it/s]

{'loss': Array(0.22155605, dtype=float32), 'loss_cross_entropy': Array(0.20298858, dtype=float32)}


  3%|▎         | 25579/1000000 [1:10:39<25:30:54, 10.61it/s]

{'loss': Array(0.23356538, dtype=float32), 'loss_cross_entropy': Array(0.21444102, dtype=float32)}


  3%|▎         | 25589/1000000 [1:10:41<29:05:10,  9.31it/s]

{'loss': Array(0.219839, dtype=float32), 'loss_cross_entropy': Array(0.20190418, dtype=float32)}


  3%|▎         | 25599/1000000 [1:10:42<25:43:15, 10.52it/s]

{'loss': Array(0.23896754, dtype=float32), 'loss_cross_entropy': Array(0.21926609, dtype=float32)}


  3%|▎         | 25609/1000000 [1:10:44<39:30:49,  6.85it/s]

{'loss': Array(0.22480293, dtype=float32), 'loss_cross_entropy': Array(0.20615129, dtype=float32)}


  3%|▎         | 25619/1000000 [1:10:45<27:09:12,  9.97it/s]

{'loss': Array(0.24562459, dtype=float32), 'loss_cross_entropy': Array(0.22566536, dtype=float32)}


  3%|▎         | 25629/1000000 [1:10:46<25:03:47, 10.80it/s]

{'loss': Array(0.22408421, dtype=float32), 'loss_cross_entropy': Array(0.20523019, dtype=float32)}


  3%|▎         | 25639/1000000 [1:10:48<29:31:44,  9.17it/s]

{'loss': Array(0.22809088, dtype=float32), 'loss_cross_entropy': Array(0.20895424, dtype=float32)}


  3%|▎         | 25649/1000000 [1:10:49<25:31:41, 10.60it/s]

{'loss': Array(0.2328681, dtype=float32), 'loss_cross_entropy': Array(0.21489064, dtype=float32)}


  3%|▎         | 25659/1000000 [1:10:51<35:12:48,  7.69it/s]

{'loss': Array(0.23675485, dtype=float32), 'loss_cross_entropy': Array(0.21673043, dtype=float32)}


  3%|▎         | 25669/1000000 [1:10:52<26:38:30, 10.16it/s]

{'loss': Array(0.21093626, dtype=float32), 'loss_cross_entropy': Array(0.19249508, dtype=float32)}


  3%|▎         | 25679/1000000 [1:10:54<39:40:20,  6.82it/s]

{'loss': Array(0.22292018, dtype=float32), 'loss_cross_entropy': Array(0.2051359, dtype=float32)}


  3%|▎         | 25689/1000000 [1:10:55<27:01:33, 10.01it/s]

{'loss': Array(0.24437848, dtype=float32), 'loss_cross_entropy': Array(0.22353223, dtype=float32)}


  3%|▎         | 25699/1000000 [1:10:57<25:42:18, 10.53it/s]

{'loss': Array(0.23161402, dtype=float32), 'loss_cross_entropy': Array(0.2134953, dtype=float32)}


  3%|▎         | 25709/1000000 [1:10:59<33:03:14,  8.19it/s]

{'loss': Array(0.22944131, dtype=float32), 'loss_cross_entropy': Array(0.21104196, dtype=float32)}


  3%|▎         | 25719/1000000 [1:11:00<28:13:16,  9.59it/s]

{'loss': Array(0.23797177, dtype=float32), 'loss_cross_entropy': Array(0.21879053, dtype=float32)}


  3%|▎         | 25729/1000000 [1:11:02<35:36:00,  7.60it/s]

{'loss': Array(0.23990694, dtype=float32), 'loss_cross_entropy': Array(0.22116561, dtype=float32)}


  3%|▎         | 25739/1000000 [1:11:03<26:49:53, 10.09it/s]

{'loss': Array(0.22244398, dtype=float32), 'loss_cross_entropy': Array(0.20438042, dtype=float32)}


  3%|▎         | 25749/1000000 [1:11:04<25:55:47, 10.44it/s]

{'loss': Array(0.2109473, dtype=float32), 'loss_cross_entropy': Array(0.19366543, dtype=float32)}


  3%|▎         | 25759/1000000 [1:11:06<28:42:09,  9.43it/s]

{'loss': Array(0.2501924, dtype=float32), 'loss_cross_entropy': Array(0.2311987, dtype=float32)}


  3%|▎         | 25769/1000000 [1:11:08<25:48:03, 10.49it/s]

{'loss': Array(0.23841675, dtype=float32), 'loss_cross_entropy': Array(0.21941307, dtype=float32)}


  3%|▎         | 25779/1000000 [1:11:09<40:08:03,  6.74it/s]

{'loss': Array(0.21259041, dtype=float32), 'loss_cross_entropy': Array(0.19503598, dtype=float32)}


  3%|▎         | 25789/1000000 [1:11:11<27:29:17,  9.84it/s]

{'loss': Array(0.22190762, dtype=float32), 'loss_cross_entropy': Array(0.20422864, dtype=float32)}


  3%|▎         | 25799/1000000 [1:11:12<25:18:25, 10.69it/s]

{'loss': Array(0.2145267, dtype=float32), 'loss_cross_entropy': Array(0.19685087, dtype=float32)}


  3%|▎         | 25809/1000000 [1:11:14<32:02:56,  8.44it/s]

{'loss': Array(0.22714894, dtype=float32), 'loss_cross_entropy': Array(0.20764165, dtype=float32)}


  3%|▎         | 25819/1000000 [1:11:15<26:05:02, 10.37it/s]

{'loss': Array(0.22644845, dtype=float32), 'loss_cross_entropy': Array(0.20810957, dtype=float32)}


  3%|▎         | 25829/1000000 [1:11:17<35:41:01,  7.58it/s]

{'loss': Array(0.19461368, dtype=float32), 'loss_cross_entropy': Array(0.17772536, dtype=float32)}


  3%|▎         | 25839/1000000 [1:11:18<26:40:12, 10.15it/s]

{'loss': Array(0.22771485, dtype=float32), 'loss_cross_entropy': Array(0.20880668, dtype=float32)}


  3%|▎         | 25849/1000000 [1:11:20<35:02:06,  7.72it/s]

{'loss': Array(0.20971096, dtype=float32), 'loss_cross_entropy': Array(0.19170968, dtype=float32)}


  3%|▎         | 25859/1000000 [1:11:21<26:52:40, 10.07it/s]

{'loss': Array(0.24196036, dtype=float32), 'loss_cross_entropy': Array(0.22166674, dtype=float32)}


  3%|▎         | 25869/1000000 [1:11:22<25:46:58, 10.50it/s]

{'loss': Array(0.22278371, dtype=float32), 'loss_cross_entropy': Array(0.2046999, dtype=float32)}


  3%|▎         | 25879/1000000 [1:11:24<30:29:12,  8.88it/s]

{'loss': Array(0.1947938, dtype=float32), 'loss_cross_entropy': Array(0.1772511, dtype=float32)}


  3%|▎         | 25889/1000000 [1:11:26<26:39:23, 10.15it/s]

{'loss': Array(0.21094362, dtype=float32), 'loss_cross_entropy': Array(0.19327958, dtype=float32)}


  3%|▎         | 25899/1000000 [1:11:27<32:05:58,  8.43it/s]

{'loss': Array(0.21540366, dtype=float32), 'loss_cross_entropy': Array(0.19761316, dtype=float32)}


  3%|▎         | 25909/1000000 [1:11:29<25:58:39, 10.42it/s]

{'loss': Array(0.22484815, dtype=float32), 'loss_cross_entropy': Array(0.2060626, dtype=float32)}


  3%|▎         | 25919/1000000 [1:11:30<40:17:54,  6.71it/s]

{'loss': Array(0.21780895, dtype=float32), 'loss_cross_entropy': Array(0.20072751, dtype=float32)}


  3%|▎         | 25929/1000000 [1:11:32<27:40:17,  9.78it/s]

{'loss': Array(0.21611372, dtype=float32), 'loss_cross_entropy': Array(0.19739912, dtype=float32)}


  3%|▎         | 25939/1000000 [1:11:34<40:58:32,  6.60it/s]

{'loss': Array(0.1977614, dtype=float32), 'loss_cross_entropy': Array(0.18158643, dtype=float32)}


  3%|▎         | 25949/1000000 [1:11:35<27:20:12,  9.90it/s]

{'loss': Array(0.2439438, dtype=float32), 'loss_cross_entropy': Array(0.22263062, dtype=float32)}


  3%|▎         | 25959/1000000 [1:11:36<25:41:59, 10.53it/s]

{'loss': Array(0.22984143, dtype=float32), 'loss_cross_entropy': Array(0.21054912, dtype=float32)}


  3%|▎         | 25969/1000000 [1:11:38<35:21:58,  7.65it/s]

{'loss': Array(0.2344157, dtype=float32), 'loss_cross_entropy': Array(0.21484862, dtype=float32)}


  3%|▎         | 25979/1000000 [1:11:39<26:32:10, 10.20it/s]

{'loss': Array(0.20400949, dtype=float32), 'loss_cross_entropy': Array(0.18714198, dtype=float32)}


  3%|▎         | 25989/1000000 [1:11:41<39:13:28,  6.90it/s]

{'loss': Array(0.20357572, dtype=float32), 'loss_cross_entropy': Array(0.18722466, dtype=float32)}


  3%|▎         | 25999/1000000 [1:11:42<27:06:08,  9.98it/s]

{'loss': Array(0.21604489, dtype=float32), 'loss_cross_entropy': Array(0.19837874, dtype=float32)}


  3%|▎         | 26009/1000000 [1:11:50<91:04:03,  2.97it/s] 

{'loss': Array(0.21400385, dtype=float32), 'loss_cross_entropy': Array(0.19633213, dtype=float32)}


  3%|▎         | 26019/1000000 [1:11:52<39:24:03,  6.87it/s] 

{'loss': Array(0.20435348, dtype=float32), 'loss_cross_entropy': Array(0.18792804, dtype=float32)}


  3%|▎         | 26029/1000000 [1:11:53<27:38:49,  9.79it/s]

{'loss': Array(0.22537915, dtype=float32), 'loss_cross_entropy': Array(0.2072355, dtype=float32)}


  3%|▎         | 26039/1000000 [1:11:55<29:01:47,  9.32it/s]

{'loss': Array(0.23533106, dtype=float32), 'loss_cross_entropy': Array(0.2160195, dtype=float32)}


  3%|▎         | 26049/1000000 [1:11:57<26:19:20, 10.28it/s]

{'loss': Array(0.19821927, dtype=float32), 'loss_cross_entropy': Array(0.18125872, dtype=float32)}


  3%|▎         | 26059/1000000 [1:11:58<40:39:03,  6.66it/s]

{'loss': Array(0.22422431, dtype=float32), 'loss_cross_entropy': Array(0.2064873, dtype=float32)}


  3%|▎         | 26069/1000000 [1:12:00<28:50:21,  9.38it/s]

{'loss': Array(0.19512337, dtype=float32), 'loss_cross_entropy': Array(0.17818376, dtype=float32)}


  3%|▎         | 26079/1000000 [1:12:01<25:50:15, 10.47it/s]

{'loss': Array(0.21961069, dtype=float32), 'loss_cross_entropy': Array(0.20118368, dtype=float32)}


  3%|▎         | 26089/1000000 [1:12:03<32:29:36,  8.33it/s]

{'loss': Array(0.20848052, dtype=float32), 'loss_cross_entropy': Array(0.19099252, dtype=float32)}


  3%|▎         | 26099/1000000 [1:12:04<25:56:03, 10.43it/s]

{'loss': Array(0.22785158, dtype=float32), 'loss_cross_entropy': Array(0.20975147, dtype=float32)}


  3%|▎         | 26109/1000000 [1:12:06<39:48:53,  6.79it/s]

{'loss': Array(0.22288065, dtype=float32), 'loss_cross_entropy': Array(0.20508276, dtype=float32)}


  3%|▎         | 26119/1000000 [1:12:07<27:31:52,  9.83it/s]

{'loss': Array(0.21885608, dtype=float32), 'loss_cross_entropy': Array(0.20070839, dtype=float32)}


  3%|▎         | 26129/1000000 [1:12:09<26:15:40, 10.30it/s]

{'loss': Array(0.21150474, dtype=float32), 'loss_cross_entropy': Array(0.19421376, dtype=float32)}


  3%|▎         | 26139/1000000 [1:12:10<29:38:55,  9.12it/s]

{'loss': Array(0.22359686, dtype=float32), 'loss_cross_entropy': Array(0.20428911, dtype=float32)}


  3%|▎         | 26149/1000000 [1:12:12<25:55:43, 10.43it/s]

{'loss': Array(0.2108591, dtype=float32), 'loss_cross_entropy': Array(0.19398902, dtype=float32)}


  3%|▎         | 26159/1000000 [1:12:13<35:17:08,  7.67it/s]

{'loss': Array(0.23701237, dtype=float32), 'loss_cross_entropy': Array(0.21810763, dtype=float32)}


  3%|▎         | 26169/1000000 [1:12:15<26:49:58, 10.08it/s]

{'loss': Array(0.23671746, dtype=float32), 'loss_cross_entropy': Array(0.21708708, dtype=float32)}


  3%|▎         | 26179/1000000 [1:12:16<25:50:50, 10.47it/s]

{'loss': Array(0.22773813, dtype=float32), 'loss_cross_entropy': Array(0.20888548, dtype=float32)}


  3%|▎         | 26189/1000000 [1:12:18<32:05:03,  8.43it/s]

{'loss': Array(0.195679, dtype=float32), 'loss_cross_entropy': Array(0.17888364, dtype=float32)}


  3%|▎         | 26199/1000000 [1:12:19<26:01:14, 10.40it/s]

{'loss': Array(0.2528635, dtype=float32), 'loss_cross_entropy': Array(0.2326958, dtype=float32)}


  3%|▎         | 26209/1000000 [1:12:21<24:51:07, 10.88it/s]

{'loss': Array(0.21528827, dtype=float32), 'loss_cross_entropy': Array(0.19755833, dtype=float32)}


  3%|▎         | 26219/1000000 [1:12:22<28:40:52,  9.43it/s]

{'loss': Array(0.22272933, dtype=float32), 'loss_cross_entropy': Array(0.20504622, dtype=float32)}


  3%|▎         | 26229/1000000 [1:12:24<25:29:13, 10.61it/s]

{'loss': Array(0.22089516, dtype=float32), 'loss_cross_entropy': Array(0.20285165, dtype=float32)}


  3%|▎         | 26239/1000000 [1:12:25<32:09:42,  8.41it/s]

{'loss': Array(0.2126801, dtype=float32), 'loss_cross_entropy': Array(0.19536547, dtype=float32)}


  3%|▎         | 26249/1000000 [1:12:27<25:50:50, 10.46it/s]

{'loss': Array(0.21708582, dtype=float32), 'loss_cross_entropy': Array(0.19912046, dtype=float32)}


  3%|▎         | 26259/1000000 [1:12:28<39:35:04,  6.83it/s]

{'loss': Array(0.20064235, dtype=float32), 'loss_cross_entropy': Array(0.18493941, dtype=float32)}


  3%|▎         | 26269/1000000 [1:12:30<27:38:41,  9.78it/s]

{'loss': Array(0.24222453, dtype=float32), 'loss_cross_entropy': Array(0.22381961, dtype=float32)}


  3%|▎         | 26279/1000000 [1:12:31<39:59:04,  6.76it/s]

{'loss': Array(0.23238938, dtype=float32), 'loss_cross_entropy': Array(0.21254158, dtype=float32)}


  3%|▎         | 26289/1000000 [1:12:33<27:21:25,  9.89it/s]

{'loss': Array(0.19584368, dtype=float32), 'loss_cross_entropy': Array(0.17843346, dtype=float32)}


  3%|▎         | 26299/1000000 [1:12:34<25:16:35, 10.70it/s]

{'loss': Array(0.21720305, dtype=float32), 'loss_cross_entropy': Array(0.19928095, dtype=float32)}


  3%|▎         | 26309/1000000 [1:12:36<35:26:38,  7.63it/s]

{'loss': Array(0.2150472, dtype=float32), 'loss_cross_entropy': Array(0.1970601, dtype=float32)}


  3%|▎         | 26319/1000000 [1:12:37<26:20:17, 10.27it/s]

{'loss': Array(0.21133481, dtype=float32), 'loss_cross_entropy': Array(0.19373469, dtype=float32)}


  3%|▎         | 26329/1000000 [1:12:39<39:58:09,  6.77it/s]

{'loss': Array(0.21690264, dtype=float32), 'loss_cross_entropy': Array(0.20024541, dtype=float32)}


  3%|▎         | 26339/1000000 [1:12:40<27:47:28,  9.73it/s]

{'loss': Array(0.22230995, dtype=float32), 'loss_cross_entropy': Array(0.20349804, dtype=float32)}


  3%|▎         | 26349/1000000 [1:12:42<25:11:43, 10.73it/s]

{'loss': Array(0.22053023, dtype=float32), 'loss_cross_entropy': Array(0.20221794, dtype=float32)}


  3%|▎         | 26359/1000000 [1:12:43<28:05:53,  9.63it/s]

{'loss': Array(0.22913158, dtype=float32), 'loss_cross_entropy': Array(0.21131594, dtype=float32)}


  3%|▎         | 26369/1000000 [1:12:45<25:20:01, 10.68it/s]

{'loss': Array(0.19382782, dtype=float32), 'loss_cross_entropy': Array(0.17729445, dtype=float32)}


  3%|▎         | 26379/1000000 [1:12:46<28:41:26,  9.43it/s]

{'loss': Array(0.2088331, dtype=float32), 'loss_cross_entropy': Array(0.19171599, dtype=float32)}


  3%|▎         | 26389/1000000 [1:12:48<25:18:35, 10.69it/s]

{'loss': Array(0.20224504, dtype=float32), 'loss_cross_entropy': Array(0.18496875, dtype=float32)}


  3%|▎         | 26399/1000000 [1:12:49<39:44:07,  6.81it/s]

{'loss': Array(0.209341, dtype=float32), 'loss_cross_entropy': Array(0.1925189, dtype=float32)}


  3%|▎         | 26409/1000000 [1:12:51<27:19:01,  9.90it/s]

{'loss': Array(0.23069523, dtype=float32), 'loss_cross_entropy': Array(0.21135831, dtype=float32)}


  3%|▎         | 26419/1000000 [1:12:52<25:08:49, 10.75it/s]

{'loss': Array(0.20986895, dtype=float32), 'loss_cross_entropy': Array(0.19269209, dtype=float32)}


  3%|▎         | 26429/1000000 [1:12:54<31:58:47,  8.46it/s]

{'loss': Array(0.23046957, dtype=float32), 'loss_cross_entropy': Array(0.21195947, dtype=float32)}


  3%|▎         | 26439/1000000 [1:12:55<26:07:47, 10.35it/s]

{'loss': Array(0.2207429, dtype=float32), 'loss_cross_entropy': Array(0.20256765, dtype=float32)}


  3%|▎         | 26449/1000000 [1:12:57<39:56:07,  6.77it/s]

{'loss': Array(0.22788914, dtype=float32), 'loss_cross_entropy': Array(0.20967261, dtype=float32)}


  3%|▎         | 26459/1000000 [1:12:58<27:25:24,  9.86it/s]

{'loss': Array(0.22602944, dtype=float32), 'loss_cross_entropy': Array(0.20821121, dtype=float32)}


  3%|▎         | 26469/1000000 [1:12:59<26:38:09, 10.15it/s]

{'loss': Array(0.19804397, dtype=float32), 'loss_cross_entropy': Array(0.18208304, dtype=float32)}


  3%|▎         | 26479/1000000 [1:13:01<31:13:23,  8.66it/s]

{'loss': Array(0.248048, dtype=float32), 'loss_cross_entropy': Array(0.22846046, dtype=float32)}


  3%|▎         | 26489/1000000 [1:13:03<26:13:16, 10.31it/s]

{'loss': Array(0.22622047, dtype=float32), 'loss_cross_entropy': Array(0.20786643, dtype=float32)}


  3%|▎         | 26499/1000000 [1:13:04<35:32:24,  7.61it/s]

{'loss': Array(0.21557823, dtype=float32), 'loss_cross_entropy': Array(0.19781642, dtype=float32)}


  3%|▎         | 26509/1000000 [1:13:12<89:44:55,  3.01it/s] 

{'loss': Array(0.22025347, dtype=float32), 'loss_cross_entropy': Array(0.20249358, dtype=float32)}


  3%|▎         | 26519/1000000 [1:13:14<45:42:43,  5.92it/s]

{'loss': Array(0.21926676, dtype=float32), 'loss_cross_entropy': Array(0.20188776, dtype=float32)}


  3%|▎         | 26529/1000000 [1:13:15<28:34:44,  9.46it/s]

{'loss': Array(0.23509589, dtype=float32), 'loss_cross_entropy': Array(0.2159649, dtype=float32)}


  3%|▎         | 26539/1000000 [1:13:17<26:57:31, 10.03it/s]

{'loss': Array(0.2302501, dtype=float32), 'loss_cross_entropy': Array(0.2124803, dtype=float32)}


  3%|▎         | 26549/1000000 [1:13:18<32:26:50,  8.33it/s]

{'loss': Array(0.22308016, dtype=float32), 'loss_cross_entropy': Array(0.20513435, dtype=float32)}


  3%|▎         | 26559/1000000 [1:13:20<26:06:55, 10.35it/s]

{'loss': Array(0.21801214, dtype=float32), 'loss_cross_entropy': Array(0.2000402, dtype=float32)}


  3%|▎         | 26569/1000000 [1:13:21<24:54:31, 10.86it/s]

{'loss': Array(0.21804844, dtype=float32), 'loss_cross_entropy': Array(0.20018302, dtype=float32)}


  3%|▎         | 26579/1000000 [1:13:23<28:23:45,  9.52it/s]

{'loss': Array(0.2157321, dtype=float32), 'loss_cross_entropy': Array(0.19829099, dtype=float32)}


  3%|▎         | 26589/1000000 [1:13:24<25:51:04, 10.46it/s]

{'loss': Array(0.20928298, dtype=float32), 'loss_cross_entropy': Array(0.19153467, dtype=float32)}


  3%|▎         | 26599/1000000 [1:13:26<32:21:39,  8.36it/s]

{'loss': Array(0.20166555, dtype=float32), 'loss_cross_entropy': Array(0.1843259, dtype=float32)}


  3%|▎         | 26609/1000000 [1:13:27<26:02:25, 10.38it/s]

{'loss': Array(0.20365036, dtype=float32), 'loss_cross_entropy': Array(0.18698014, dtype=float32)}


  3%|▎         | 26619/1000000 [1:13:29<35:04:35,  7.71it/s]

{'loss': Array(0.19533876, dtype=float32), 'loss_cross_entropy': Array(0.1791565, dtype=float32)}


  3%|▎         | 26629/1000000 [1:13:30<26:41:02, 10.13it/s]

{'loss': Array(0.22153413, dtype=float32), 'loss_cross_entropy': Array(0.20372057, dtype=float32)}


  3%|▎         | 26639/1000000 [1:13:32<25:12:20, 10.73it/s]

{'loss': Array(0.23028067, dtype=float32), 'loss_cross_entropy': Array(0.21127538, dtype=float32)}


  3%|▎         | 26649/1000000 [1:13:33<32:24:29,  8.34it/s]

{'loss': Array(0.21985178, dtype=float32), 'loss_cross_entropy': Array(0.20180503, dtype=float32)}


  3%|▎         | 26659/1000000 [1:13:35<26:43:14, 10.12it/s]

{'loss': Array(0.22031164, dtype=float32), 'loss_cross_entropy': Array(0.20247686, dtype=float32)}


  3%|▎         | 26669/1000000 [1:13:36<26:13:44, 10.31it/s]

{'loss': Array(0.20516865, dtype=float32), 'loss_cross_entropy': Array(0.18743853, dtype=float32)}


  3%|▎         | 26679/1000000 [1:13:38<28:26:52,  9.50it/s]

{'loss': Array(0.22478227, dtype=float32), 'loss_cross_entropy': Array(0.20706804, dtype=float32)}


  3%|▎         | 26689/1000000 [1:13:39<26:11:50, 10.32it/s]

{'loss': Array(0.21629725, dtype=float32), 'loss_cross_entropy': Array(0.19931541, dtype=float32)}


  3%|▎         | 26699/1000000 [1:13:41<32:23:22,  8.35it/s]

{'loss': Array(0.21455745, dtype=float32), 'loss_cross_entropy': Array(0.19646572, dtype=float32)}


  3%|▎         | 26709/1000000 [1:13:42<26:09:27, 10.34it/s]

{'loss': Array(0.2195581, dtype=float32), 'loss_cross_entropy': Array(0.20099063, dtype=float32)}


  3%|▎         | 26719/1000000 [1:13:44<39:43:15,  6.81it/s]

{'loss': Array(0.23292723, dtype=float32), 'loss_cross_entropy': Array(0.21496192, dtype=float32)}


  3%|▎         | 26729/1000000 [1:13:45<27:44:19,  9.75it/s]

{'loss': Array(0.21774057, dtype=float32), 'loss_cross_entropy': Array(0.20029247, dtype=float32)}


  3%|▎         | 26739/1000000 [1:13:47<25:24:16, 10.64it/s]

{'loss': Array(0.22154367, dtype=float32), 'loss_cross_entropy': Array(0.20465134, dtype=float32)}


  3%|▎         | 26749/1000000 [1:13:48<35:47:01,  7.56it/s]

{'loss': Array(0.22720388, dtype=float32), 'loss_cross_entropy': Array(0.20871992, dtype=float32)}


  3%|▎         | 26759/1000000 [1:13:50<26:45:36, 10.10it/s]

{'loss': Array(0.22579455, dtype=float32), 'loss_cross_entropy': Array(0.20664184, dtype=float32)}


  3%|▎         | 26769/1000000 [1:13:51<25:28:38, 10.61it/s]

{'loss': Array(0.24348202, dtype=float32), 'loss_cross_entropy': Array(0.22353522, dtype=float32)}


  3%|▎         | 26779/1000000 [1:13:53<30:17:49,  8.92it/s]

{'loss': Array(0.20343888, dtype=float32), 'loss_cross_entropy': Array(0.18515185, dtype=float32)}


  3%|▎         | 26789/1000000 [1:13:54<25:48:15, 10.48it/s]

{'loss': Array(0.22412553, dtype=float32), 'loss_cross_entropy': Array(0.20563908, dtype=float32)}


  3%|▎         | 26799/1000000 [1:13:56<32:16:30,  8.38it/s]

{'loss': Array(0.21465182, dtype=float32), 'loss_cross_entropy': Array(0.19696236, dtype=float32)}


  3%|▎         | 26809/1000000 [1:13:57<26:05:00, 10.36it/s]

{'loss': Array(0.22741123, dtype=float32), 'loss_cross_entropy': Array(0.2092191, dtype=float32)}


  3%|▎         | 26819/1000000 [1:13:59<26:19:12, 10.27it/s]

{'loss': Array(0.21490645, dtype=float32), 'loss_cross_entropy': Array(0.19685352, dtype=float32)}


  3%|▎         | 26829/1000000 [1:14:00<30:09:55,  8.96it/s]

{'loss': Array(0.21317191, dtype=float32), 'loss_cross_entropy': Array(0.19532391, dtype=float32)}


  3%|▎         | 26839/1000000 [1:14:02<25:53:58, 10.44it/s]

{'loss': Array(0.22824264, dtype=float32), 'loss_cross_entropy': Array(0.20998685, dtype=float32)}


  3%|▎         | 26849/1000000 [1:14:04<35:30:17,  7.61it/s]

{'loss': Array(0.1935441, dtype=float32), 'loss_cross_entropy': Array(0.17568433, dtype=float32)}


  3%|▎         | 26859/1000000 [1:14:05<26:26:20, 10.22it/s]

{'loss': Array(0.2273045, dtype=float32), 'loss_cross_entropy': Array(0.20959203, dtype=float32)}


  3%|▎         | 26869/1000000 [1:14:06<25:05:47, 10.77it/s]

{'loss': Array(0.20496598, dtype=float32), 'loss_cross_entropy': Array(0.18766026, dtype=float32)}


  3%|▎         | 26879/1000000 [1:14:08<28:46:42,  9.39it/s]

{'loss': Array(0.22691822, dtype=float32), 'loss_cross_entropy': Array(0.20909157, dtype=float32)}


  3%|▎         | 26889/1000000 [1:14:09<25:42:24, 10.52it/s]

{'loss': Array(0.2075218, dtype=float32), 'loss_cross_entropy': Array(0.19077168, dtype=float32)}


  3%|▎         | 26899/1000000 [1:14:11<29:38:56,  9.12it/s]

{'loss': Array(0.22569668, dtype=float32), 'loss_cross_entropy': Array(0.20783429, dtype=float32)}


  3%|▎         | 26909/1000000 [1:14:12<25:53:25, 10.44it/s]

{'loss': Array(0.22793806, dtype=float32), 'loss_cross_entropy': Array(0.20978932, dtype=float32)}


  3%|▎         | 26919/1000000 [1:14:14<39:45:18,  6.80it/s]

{'loss': Array(0.2100214, dtype=float32), 'loss_cross_entropy': Array(0.19184025, dtype=float32)}


  3%|▎         | 26929/1000000 [1:14:15<27:27:53,  9.84it/s]

{'loss': Array(0.22230683, dtype=float32), 'loss_cross_entropy': Array(0.20444481, dtype=float32)}


  3%|▎         | 26939/1000000 [1:14:17<25:21:01, 10.66it/s]

{'loss': Array(0.22824891, dtype=float32), 'loss_cross_entropy': Array(0.20967348, dtype=float32)}


  3%|▎         | 26949/1000000 [1:14:18<32:09:26,  8.41it/s]

{'loss': Array(0.22590294, dtype=float32), 'loss_cross_entropy': Array(0.20777296, dtype=float32)}


  3%|▎         | 26959/1000000 [1:14:20<26:16:53, 10.28it/s]

{'loss': Array(0.20709613, dtype=float32), 'loss_cross_entropy': Array(0.19000582, dtype=float32)}


  3%|▎         | 26969/1000000 [1:14:21<35:32:02,  7.61it/s]

{'loss': Array(0.22973335, dtype=float32), 'loss_cross_entropy': Array(0.21057625, dtype=float32)}


  3%|▎         | 26979/1000000 [1:14:23<26:30:48, 10.19it/s]

{'loss': Array(0.21228527, dtype=float32), 'loss_cross_entropy': Array(0.19660841, dtype=float32)}


  3%|▎         | 26989/1000000 [1:14:25<36:57:42,  7.31it/s]

{'loss': Array(0.22245407, dtype=float32), 'loss_cross_entropy': Array(0.2045552, dtype=float32)}


  3%|▎         | 26999/1000000 [1:14:26<28:12:38,  9.58it/s]

{'loss': Array(0.24407724, dtype=float32), 'loss_cross_entropy': Array(0.2229346, dtype=float32)}


  3%|▎         | 27009/1000000 [1:14:34<88:19:39,  3.06it/s] 

{'loss': Array(0.21608806, dtype=float32), 'loss_cross_entropy': Array(0.199257, dtype=float32)}


  3%|▎         | 27019/1000000 [1:14:36<40:37:39,  6.65it/s]

{'loss': Array(0.21306038, dtype=float32), 'loss_cross_entropy': Array(0.1953061, dtype=float32)}


  3%|▎         | 27029/1000000 [1:14:37<27:23:10,  9.87it/s]

{'loss': Array(0.23344909, dtype=float32), 'loss_cross_entropy': Array(0.21600838, dtype=float32)}


  3%|▎         | 27039/1000000 [1:14:39<35:50:32,  7.54it/s]

{'loss': Array(0.21833487, dtype=float32), 'loss_cross_entropy': Array(0.200047, dtype=float32)}


  3%|▎         | 27049/1000000 [1:14:40<26:46:16, 10.10it/s]

{'loss': Array(0.228439, dtype=float32), 'loss_cross_entropy': Array(0.21033083, dtype=float32)}


  3%|▎         | 27059/1000000 [1:14:42<39:43:38,  6.80it/s]

{'loss': Array(0.2073469, dtype=float32), 'loss_cross_entropy': Array(0.19006653, dtype=float32)}


  3%|▎         | 27069/1000000 [1:14:43<27:33:51,  9.80it/s]

{'loss': Array(0.21906547, dtype=float32), 'loss_cross_entropy': Array(0.20099874, dtype=float32)}


  3%|▎         | 27079/1000000 [1:14:44<25:57:54, 10.41it/s]

{'loss': Array(0.2122315, dtype=float32), 'loss_cross_entropy': Array(0.19504848, dtype=float32)}


  3%|▎         | 27089/1000000 [1:14:46<32:36:33,  8.29it/s]

{'loss': Array(0.23562907, dtype=float32), 'loss_cross_entropy': Array(0.21680549, dtype=float32)}


  3%|▎         | 27099/1000000 [1:14:48<26:12:29, 10.31it/s]

{'loss': Array(0.20156816, dtype=float32), 'loss_cross_entropy': Array(0.18465708, dtype=float32)}


  3%|▎         | 27109/1000000 [1:14:49<35:03:38,  7.71it/s]

{'loss': Array(0.22393362, dtype=float32), 'loss_cross_entropy': Array(0.20589928, dtype=float32)}


  3%|▎         | 27119/1000000 [1:14:51<26:22:21, 10.25it/s]

{'loss': Array(0.20950823, dtype=float32), 'loss_cross_entropy': Array(0.19261602, dtype=float32)}


  3%|▎         | 27129/1000000 [1:14:52<25:09:36, 10.74it/s]

{'loss': Array(0.23164323, dtype=float32), 'loss_cross_entropy': Array(0.21412908, dtype=float32)}


  3%|▎         | 27139/1000000 [1:14:54<28:29:57,  9.48it/s]

{'loss': Array(0.22921582, dtype=float32), 'loss_cross_entropy': Array(0.2112395, dtype=float32)}


  3%|▎         | 27149/1000000 [1:14:55<26:23:13, 10.24it/s]

{'loss': Array(0.21633565, dtype=float32), 'loss_cross_entropy': Array(0.19921635, dtype=float32)}


  3%|▎         | 27159/1000000 [1:14:57<39:39:22,  6.81it/s]

{'loss': Array(0.21052766, dtype=float32), 'loss_cross_entropy': Array(0.19401899, dtype=float32)}


  3%|▎         | 27169/1000000 [1:14:58<27:27:42,  9.84it/s]

{'loss': Array(0.22116263, dtype=float32), 'loss_cross_entropy': Array(0.20265308, dtype=float32)}


  3%|▎         | 27179/1000000 [1:14:59<26:49:02, 10.08it/s]

{'loss': Array(0.23915505, dtype=float32), 'loss_cross_entropy': Array(0.22048561, dtype=float32)}


  3%|▎         | 27189/1000000 [1:15:01<33:22:45,  8.10it/s]

{'loss': Array(0.23038547, dtype=float32), 'loss_cross_entropy': Array(0.2120624, dtype=float32)}


  3%|▎         | 27199/1000000 [1:15:03<27:07:49,  9.96it/s]

{'loss': Array(0.19157271, dtype=float32), 'loss_cross_entropy': Array(0.17467146, dtype=float32)}


  3%|▎         | 27209/1000000 [1:15:04<35:35:18,  7.59it/s]

{'loss': Array(0.2314518, dtype=float32), 'loss_cross_entropy': Array(0.21264803, dtype=float32)}


  3%|▎         | 27219/1000000 [1:15:06<26:47:12, 10.09it/s]

{'loss': Array(0.21907775, dtype=float32), 'loss_cross_entropy': Array(0.20130284, dtype=float32)}


  3%|▎         | 27229/1000000 [1:15:07<35:00:35,  7.72it/s]

{'loss': Array(0.19365303, dtype=float32), 'loss_cross_entropy': Array(0.1768369, dtype=float32)}


  3%|▎         | 27239/1000000 [1:15:09<26:05:46, 10.35it/s]

{'loss': Array(0.2195855, dtype=float32), 'loss_cross_entropy': Array(0.20205696, dtype=float32)}


  3%|▎         | 27249/1000000 [1:15:10<25:00:18, 10.81it/s]

{'loss': Array(0.23593254, dtype=float32), 'loss_cross_entropy': Array(0.21683626, dtype=float32)}


  3%|▎         | 27259/1000000 [1:15:12<30:33:41,  8.84it/s]

{'loss': Array(0.19128788, dtype=float32), 'loss_cross_entropy': Array(0.17485632, dtype=float32)}


  3%|▎         | 27269/1000000 [1:15:13<26:09:08, 10.33it/s]

{'loss': Array(0.22522984, dtype=float32), 'loss_cross_entropy': Array(0.2074066, dtype=float32)}


  3%|▎         | 27279/1000000 [1:15:15<31:48:29,  8.49it/s]

{'loss': Array(0.19436155, dtype=float32), 'loss_cross_entropy': Array(0.17905302, dtype=float32)}


  3%|▎         | 27289/1000000 [1:15:16<26:12:41, 10.31it/s]

{'loss': Array(0.23105252, dtype=float32), 'loss_cross_entropy': Array(0.21229625, dtype=float32)}


  3%|▎         | 27299/1000000 [1:15:18<39:28:28,  6.84it/s]

{'loss': Array(0.19014362, dtype=float32), 'loss_cross_entropy': Array(0.17365547, dtype=float32)}


  3%|▎         | 27309/1000000 [1:15:19<27:59:37,  9.65it/s]

{'loss': Array(0.20451784, dtype=float32), 'loss_cross_entropy': Array(0.18777071, dtype=float32)}


  3%|▎         | 27319/1000000 [1:15:21<40:04:49,  6.74it/s]

{'loss': Array(0.20898163, dtype=float32), 'loss_cross_entropy': Array(0.19171898, dtype=float32)}


  3%|▎         | 27329/1000000 [1:15:22<27:23:41,  9.86it/s]

{'loss': Array(0.21976052, dtype=float32), 'loss_cross_entropy': Array(0.201858, dtype=float32)}


  3%|▎         | 27339/1000000 [1:15:24<24:59:21, 10.81it/s]

{'loss': Array(0.23033498, dtype=float32), 'loss_cross_entropy': Array(0.21206145, dtype=float32)}


  3%|▎         | 27349/1000000 [1:15:25<35:08:05,  7.69it/s]

{'loss': Array(0.22405739, dtype=float32), 'loss_cross_entropy': Array(0.20598888, dtype=float32)}


  3%|▎         | 27359/1000000 [1:15:27<26:51:45, 10.06it/s]

{'loss': Array(0.2237918, dtype=float32), 'loss_cross_entropy': Array(0.20616975, dtype=float32)}


  3%|▎         | 27369/1000000 [1:15:28<40:36:56,  6.65it/s]

{'loss': Array(0.20936048, dtype=float32), 'loss_cross_entropy': Array(0.1921129, dtype=float32)}


  3%|▎         | 27379/1000000 [1:15:30<27:47:21,  9.72it/s]

{'loss': Array(0.21611801, dtype=float32), 'loss_cross_entropy': Array(0.19883202, dtype=float32)}


  3%|▎         | 27389/1000000 [1:15:31<25:37:03, 10.55it/s]

{'loss': Array(0.2117065, dtype=float32), 'loss_cross_entropy': Array(0.19426808, dtype=float32)}


  3%|▎         | 27399/1000000 [1:15:33<28:52:57,  9.35it/s]

{'loss': Array(0.216579, dtype=float32), 'loss_cross_entropy': Array(0.19794019, dtype=float32)}


  3%|▎         | 27409/1000000 [1:15:34<25:05:27, 10.77it/s]

{'loss': Array(0.21743552, dtype=float32), 'loss_cross_entropy': Array(0.19909158, dtype=float32)}


  3%|▎         | 27419/1000000 [1:15:36<28:37:10,  9.44it/s]

{'loss': Array(0.194746, dtype=float32), 'loss_cross_entropy': Array(0.17871878, dtype=float32)}


  3%|▎         | 27429/1000000 [1:15:37<26:00:38, 10.39it/s]

{'loss': Array(0.223789, dtype=float32), 'loss_cross_entropy': Array(0.20557128, dtype=float32)}


  3%|▎         | 27439/1000000 [1:15:39<39:43:10,  6.80it/s]

{'loss': Array(0.19842963, dtype=float32), 'loss_cross_entropy': Array(0.18239687, dtype=float32)}


  3%|▎         | 27449/1000000 [1:15:41<27:53:18,  9.69it/s]

{'loss': Array(0.2139262, dtype=float32), 'loss_cross_entropy': Array(0.19726507, dtype=float32)}


  3%|▎         | 27459/1000000 [1:15:42<25:37:55, 10.54it/s]

{'loss': Array(0.21163176, dtype=float32), 'loss_cross_entropy': Array(0.19407669, dtype=float32)}


  3%|▎         | 27469/1000000 [1:15:44<32:00:01,  8.44it/s]

{'loss': Array(0.21325436, dtype=float32), 'loss_cross_entropy': Array(0.19583769, dtype=float32)}


  3%|▎         | 27479/1000000 [1:15:45<25:52:51, 10.44it/s]

{'loss': Array(0.22982179, dtype=float32), 'loss_cross_entropy': Array(0.21250406, dtype=float32)}


  3%|▎         | 27489/1000000 [1:15:47<39:13:51,  6.89it/s]

{'loss': Array(0.23703995, dtype=float32), 'loss_cross_entropy': Array(0.21786478, dtype=float32)}


  3%|▎         | 27499/1000000 [1:15:48<27:00:35, 10.00it/s]

{'loss': Array(0.20830026, dtype=float32), 'loss_cross_entropy': Array(0.19130187, dtype=float32)}


  3%|▎         | 27509/1000000 [1:15:56<86:16:00,  3.13it/s] 

{'loss': Array(0.21594651, dtype=float32), 'loss_cross_entropy': Array(0.19769408, dtype=float32)}


  3%|▎         | 27519/1000000 [1:15:57<39:47:47,  6.79it/s]

{'loss': Array(0.2156469, dtype=float32), 'loss_cross_entropy': Array(0.19849221, dtype=float32)}


  3%|▎         | 27529/1000000 [1:15:59<28:03:32,  9.63it/s]

{'loss': Array(0.20025419, dtype=float32), 'loss_cross_entropy': Array(0.18359987, dtype=float32)}


  3%|▎         | 27539/1000000 [1:16:00<34:05:08,  7.92it/s]

{'loss': Array(0.2133495, dtype=float32), 'loss_cross_entropy': Array(0.19688186, dtype=float32)}


  3%|▎         | 27549/1000000 [1:16:02<27:48:43,  9.71it/s]

{'loss': Array(0.20650618, dtype=float32), 'loss_cross_entropy': Array(0.19052522, dtype=float32)}


  3%|▎         | 27559/1000000 [1:16:04<39:43:13,  6.80it/s]

{'loss': Array(0.21315621, dtype=float32), 'loss_cross_entropy': Array(0.19580369, dtype=float32)}


  3%|▎         | 27569/1000000 [1:16:05<26:50:28, 10.06it/s]

{'loss': Array(0.22191815, dtype=float32), 'loss_cross_entropy': Array(0.20416749, dtype=float32)}


  3%|▎         | 27579/1000000 [1:16:06<25:19:09, 10.67it/s]

{'loss': Array(0.21368776, dtype=float32), 'loss_cross_entropy': Array(0.19578026, dtype=float32)}


  3%|▎         | 27589/1000000 [1:16:08<28:03:50,  9.62it/s]

{'loss': Array(0.23337498, dtype=float32), 'loss_cross_entropy': Array(0.21459602, dtype=float32)}


  3%|▎         | 27599/1000000 [1:16:09<25:39:56, 10.52it/s]

{'loss': Array(0.2416555, dtype=float32), 'loss_cross_entropy': Array(0.22234301, dtype=float32)}


  3%|▎         | 27609/1000000 [1:16:11<39:47:44,  6.79it/s]

{'loss': Array(0.21861255, dtype=float32), 'loss_cross_entropy': Array(0.20107053, dtype=float32)}


  3%|▎         | 27619/1000000 [1:16:12<27:51:44,  9.69it/s]

{'loss': Array(0.20619746, dtype=float32), 'loss_cross_entropy': Array(0.18923812, dtype=float32)}


  3%|▎         | 27629/1000000 [1:16:14<25:07:37, 10.75it/s]

{'loss': Array(0.21506391, dtype=float32), 'loss_cross_entropy': Array(0.19681953, dtype=float32)}


  3%|▎         | 27639/1000000 [1:16:15<29:36:54,  9.12it/s]

{'loss': Array(0.21680765, dtype=float32), 'loss_cross_entropy': Array(0.19899817, dtype=float32)}


  3%|▎         | 27649/1000000 [1:16:17<25:37:04, 10.54it/s]

{'loss': Array(0.21707486, dtype=float32), 'loss_cross_entropy': Array(0.19955969, dtype=float32)}


  3%|▎         | 27659/1000000 [1:16:18<35:30:06,  7.61it/s]

{'loss': Array(0.22963038, dtype=float32), 'loss_cross_entropy': Array(0.21214774, dtype=float32)}


  3%|▎         | 27669/1000000 [1:16:20<26:32:39, 10.18it/s]

{'loss': Array(0.20784114, dtype=float32), 'loss_cross_entropy': Array(0.19060563, dtype=float32)}


  3%|▎         | 27679/1000000 [1:16:21<39:55:22,  6.77it/s]

{'loss': Array(0.20372072, dtype=float32), 'loss_cross_entropy': Array(0.18643256, dtype=float32)}


  3%|▎         | 27689/1000000 [1:16:23<27:44:33,  9.74it/s]

{'loss': Array(0.23483698, dtype=float32), 'loss_cross_entropy': Array(0.21637933, dtype=float32)}


  3%|▎         | 27699/1000000 [1:16:24<25:44:24, 10.49it/s]

{'loss': Array(0.19911598, dtype=float32), 'loss_cross_entropy': Array(0.18294959, dtype=float32)}


  3%|▎         | 27709/1000000 [1:16:26<32:41:46,  8.26it/s]

{'loss': Array(0.20697665, dtype=float32), 'loss_cross_entropy': Array(0.18946506, dtype=float32)}


  3%|▎         | 27719/1000000 [1:16:27<26:13:32, 10.30it/s]

{'loss': Array(0.18719617, dtype=float32), 'loss_cross_entropy': Array(0.1711515, dtype=float32)}


  3%|▎         | 27729/1000000 [1:16:29<35:26:13,  7.62it/s]

{'loss': Array(0.22140431, dtype=float32), 'loss_cross_entropy': Array(0.20266894, dtype=float32)}


  3%|▎         | 27739/1000000 [1:16:30<27:19:24,  9.88it/s]

{'loss': Array(0.22701, dtype=float32), 'loss_cross_entropy': Array(0.20870209, dtype=float32)}


  3%|▎         | 27749/1000000 [1:16:32<25:52:46, 10.44it/s]

{'loss': Array(0.21030064, dtype=float32), 'loss_cross_entropy': Array(0.19276214, dtype=float32)}


  3%|▎         | 27759/1000000 [1:16:34<28:28:32,  9.48it/s]

{'loss': Array(0.21561265, dtype=float32), 'loss_cross_entropy': Array(0.1983472, dtype=float32)}


  3%|▎         | 27769/1000000 [1:16:35<25:37:34, 10.54it/s]

{'loss': Array(0.20038795, dtype=float32), 'loss_cross_entropy': Array(0.18425713, dtype=float32)}


  3%|▎         | 27779/1000000 [1:16:37<40:06:50,  6.73it/s]

{'loss': Array(0.19602282, dtype=float32), 'loss_cross_entropy': Array(0.18085666, dtype=float32)}


  3%|▎         | 27789/1000000 [1:16:38<27:40:34,  9.76it/s]

{'loss': Array(0.23544331, dtype=float32), 'loss_cross_entropy': Array(0.21787556, dtype=float32)}


  3%|▎         | 27799/1000000 [1:16:39<25:28:52, 10.60it/s]

{'loss': Array(0.20203121, dtype=float32), 'loss_cross_entropy': Array(0.18582492, dtype=float32)}


  3%|▎         | 27809/1000000 [1:16:41<31:49:34,  8.49it/s]

{'loss': Array(0.23441313, dtype=float32), 'loss_cross_entropy': Array(0.21496986, dtype=float32)}


  3%|▎         | 27819/1000000 [1:16:42<26:30:47, 10.19it/s]

{'loss': Array(0.2317705, dtype=float32), 'loss_cross_entropy': Array(0.21347384, dtype=float32)}


  3%|▎         | 27829/1000000 [1:16:44<35:10:44,  7.68it/s]

{'loss': Array(0.21543919, dtype=float32), 'loss_cross_entropy': Array(0.19708584, dtype=float32)}


  3%|▎         | 27839/1000000 [1:16:46<27:26:54,  9.84it/s]

{'loss': Array(0.2420976, dtype=float32), 'loss_cross_entropy': Array(0.2231142, dtype=float32)}


  3%|▎         | 27849/1000000 [1:16:47<35:39:12,  7.57it/s]

{'loss': Array(0.21276717, dtype=float32), 'loss_cross_entropy': Array(0.19544056, dtype=float32)}


  3%|▎         | 27859/1000000 [1:16:49<26:44:42, 10.10it/s]

{'loss': Array(0.20341173, dtype=float32), 'loss_cross_entropy': Array(0.18604185, dtype=float32)}


  3%|▎         | 27869/1000000 [1:16:50<25:02:34, 10.78it/s]

{'loss': Array(0.2224444, dtype=float32), 'loss_cross_entropy': Array(0.20577006, dtype=float32)}


  3%|▎         | 27879/1000000 [1:16:52<29:45:16,  9.08it/s]

{'loss': Array(0.22543333, dtype=float32), 'loss_cross_entropy': Array(0.20727094, dtype=float32)}


  3%|▎         | 27889/1000000 [1:16:53<25:36:46, 10.54it/s]

{'loss': Array(0.22709174, dtype=float32), 'loss_cross_entropy': Array(0.20943172, dtype=float32)}


  3%|▎         | 27899/1000000 [1:16:55<32:02:20,  8.43it/s]

{'loss': Array(0.20073672, dtype=float32), 'loss_cross_entropy': Array(0.1844555, dtype=float32)}


  3%|▎         | 27909/1000000 [1:16:56<26:11:42, 10.31it/s]

{'loss': Array(0.24913536, dtype=float32), 'loss_cross_entropy': Array(0.2290596, dtype=float32)}


  3%|▎         | 27919/1000000 [1:16:58<40:16:06,  6.71it/s]

{'loss': Array(0.20336416, dtype=float32), 'loss_cross_entropy': Array(0.18627803, dtype=float32)}


  3%|▎         | 27929/1000000 [1:16:59<28:58:34,  9.32it/s]

{'loss': Array(0.20527704, dtype=float32), 'loss_cross_entropy': Array(0.18863872, dtype=float32)}


  3%|▎         | 27939/1000000 [1:17:01<41:49:38,  6.46it/s]

{'loss': Array(0.20942445, dtype=float32), 'loss_cross_entropy': Array(0.19377346, dtype=float32)}


  3%|▎         | 27949/1000000 [1:17:02<27:18:40,  9.89it/s]

{'loss': Array(0.21174686, dtype=float32), 'loss_cross_entropy': Array(0.19432202, dtype=float32)}


  3%|▎         | 27959/1000000 [1:17:04<24:50:49, 10.87it/s]

{'loss': Array(0.22667086, dtype=float32), 'loss_cross_entropy': Array(0.20853722, dtype=float32)}


  3%|▎         | 27969/1000000 [1:17:05<34:33:51,  7.81it/s]

{'loss': Array(0.19147694, dtype=float32), 'loss_cross_entropy': Array(0.17538579, dtype=float32)}


  3%|▎         | 27979/1000000 [1:17:07<26:36:24, 10.15it/s]

{'loss': Array(0.23696494, dtype=float32), 'loss_cross_entropy': Array(0.21816741, dtype=float32)}


  3%|▎         | 27989/1000000 [1:17:08<40:07:04,  6.73it/s]

{'loss': Array(0.23309925, dtype=float32), 'loss_cross_entropy': Array(0.21434636, dtype=float32)}


  3%|▎         | 27999/1000000 [1:17:10<27:00:48, 10.00it/s]

{'loss': Array(0.23964219, dtype=float32), 'loss_cross_entropy': Array(0.22132507, dtype=float32)}


  3%|▎         | 28009/1000000 [1:17:18<89:47:45,  3.01it/s] 

{'loss': Array(0.21768124, dtype=float32), 'loss_cross_entropy': Array(0.20013945, dtype=float32)}


  3%|▎         | 28019/1000000 [1:17:19<40:01:49,  6.74it/s] 

{'loss': Array(0.21592481, dtype=float32), 'loss_cross_entropy': Array(0.19924228, dtype=float32)}


  3%|▎         | 28029/1000000 [1:17:21<28:24:19,  9.50it/s]

{'loss': Array(0.2357575, dtype=float32), 'loss_cross_entropy': Array(0.21666162, dtype=float32)}


  3%|▎         | 28039/1000000 [1:17:23<30:03:41,  8.98it/s]

{'loss': Array(0.22155683, dtype=float32), 'loss_cross_entropy': Array(0.2041631, dtype=float32)}


  3%|▎         | 28049/1000000 [1:17:24<25:57:48, 10.40it/s]

{'loss': Array(0.20018499, dtype=float32), 'loss_cross_entropy': Array(0.18349174, dtype=float32)}


  3%|▎         | 28059/1000000 [1:17:26<39:09:05,  6.90it/s]

{'loss': Array(0.22447923, dtype=float32), 'loss_cross_entropy': Array(0.20687352, dtype=float32)}


  3%|▎         | 28069/1000000 [1:17:27<27:20:54,  9.87it/s]

{'loss': Array(0.19923256, dtype=float32), 'loss_cross_entropy': Array(0.1826043, dtype=float32)}


  3%|▎         | 28079/1000000 [1:17:28<25:13:16, 10.70it/s]

{'loss': Array(0.22010337, dtype=float32), 'loss_cross_entropy': Array(0.20174077, dtype=float32)}


  3%|▎         | 28089/1000000 [1:17:30<31:43:00,  8.51it/s]

{'loss': Array(0.21824406, dtype=float32), 'loss_cross_entropy': Array(0.19987059, dtype=float32)}


  3%|▎         | 28099/1000000 [1:17:31<26:48:43, 10.07it/s]

{'loss': Array(0.2346222, dtype=float32), 'loss_cross_entropy': Array(0.21525502, dtype=float32)}


  3%|▎         | 28109/1000000 [1:17:33<40:02:55,  6.74it/s]

{'loss': Array(0.2071737, dtype=float32), 'loss_cross_entropy': Array(0.19034493, dtype=float32)}


  3%|▎         | 28119/1000000 [1:17:35<28:11:08,  9.58it/s]

{'loss': Array(0.21406554, dtype=float32), 'loss_cross_entropy': Array(0.19632511, dtype=float32)}


  3%|▎         | 28129/1000000 [1:17:36<25:47:01, 10.47it/s]

{'loss': Array(0.20069519, dtype=float32), 'loss_cross_entropy': Array(0.18506874, dtype=float32)}


  3%|▎         | 28139/1000000 [1:17:38<30:21:31,  8.89it/s]

{'loss': Array(0.20964025, dtype=float32), 'loss_cross_entropy': Array(0.19151257, dtype=float32)}


  3%|▎         | 28149/1000000 [1:17:39<25:31:07, 10.58it/s]

{'loss': Array(0.21253598, dtype=float32), 'loss_cross_entropy': Array(0.19499975, dtype=float32)}


  3%|▎         | 28159/1000000 [1:17:41<34:49:19,  7.75it/s]

{'loss': Array(0.21851097, dtype=float32), 'loss_cross_entropy': Array(0.20123056, dtype=float32)}


  3%|▎         | 28169/1000000 [1:17:42<26:08:16, 10.33it/s]

{'loss': Array(0.2043275, dtype=float32), 'loss_cross_entropy': Array(0.18815804, dtype=float32)}


  3%|▎         | 28179/1000000 [1:17:43<25:05:47, 10.76it/s]

{'loss': Array(0.18819599, dtype=float32), 'loss_cross_entropy': Array(0.17215928, dtype=float32)}


  3%|▎         | 28189/1000000 [1:17:45<31:31:05,  8.56it/s]

{'loss': Array(0.20977283, dtype=float32), 'loss_cross_entropy': Array(0.19201271, dtype=float32)}


  3%|▎         | 28199/1000000 [1:17:46<25:53:42, 10.42it/s]

{'loss': Array(0.21039908, dtype=float32), 'loss_cross_entropy': Array(0.1936853, dtype=float32)}


  3%|▎         | 28209/1000000 [1:17:48<26:12:58, 10.30it/s]

{'loss': Array(0.205408, dtype=float32), 'loss_cross_entropy': Array(0.18763733, dtype=float32)}


  3%|▎         | 28219/1000000 [1:17:50<28:35:05,  9.44it/s]

{'loss': Array(0.21156478, dtype=float32), 'loss_cross_entropy': Array(0.19462015, dtype=float32)}


  3%|▎         | 28229/1000000 [1:17:51<25:30:04, 10.59it/s]

{'loss': Array(0.20755468, dtype=float32), 'loss_cross_entropy': Array(0.19077678, dtype=float32)}


  3%|▎         | 28239/1000000 [1:17:53<32:10:18,  8.39it/s]

{'loss': Array(0.21931162, dtype=float32), 'loss_cross_entropy': Array(0.20131993, dtype=float32)}


  3%|▎         | 28249/1000000 [1:17:54<26:25:56, 10.21it/s]

{'loss': Array(0.21881373, dtype=float32), 'loss_cross_entropy': Array(0.20095256, dtype=float32)}


  3%|▎         | 28259/1000000 [1:17:56<39:34:01,  6.82it/s]

{'loss': Array(0.21393132, dtype=float32), 'loss_cross_entropy': Array(0.19702941, dtype=float32)}


  3%|▎         | 28269/1000000 [1:17:57<27:20:53,  9.87it/s]

{'loss': Array(0.19716546, dtype=float32), 'loss_cross_entropy': Array(0.18098202, dtype=float32)}


  3%|▎         | 28279/1000000 [1:17:59<41:54:58,  6.44it/s]

{'loss': Array(0.20790875, dtype=float32), 'loss_cross_entropy': Array(0.19012487, dtype=float32)}


  3%|▎         | 28289/1000000 [1:18:00<28:54:51,  9.34it/s]

{'loss': Array(0.19659191, dtype=float32), 'loss_cross_entropy': Array(0.18125375, dtype=float32)}


  3%|▎         | 28299/1000000 [1:18:02<26:36:31, 10.14it/s]

{'loss': Array(0.20827416, dtype=float32), 'loss_cross_entropy': Array(0.19131796, dtype=float32)}


  3%|▎         | 28309/1000000 [1:18:03<34:56:32,  7.72it/s]

{'loss': Array(0.21649542, dtype=float32), 'loss_cross_entropy': Array(0.20047462, dtype=float32)}


  3%|▎         | 28319/1000000 [1:18:05<26:17:15, 10.27it/s]

{'loss': Array(0.2194324, dtype=float32), 'loss_cross_entropy': Array(0.20236614, dtype=float32)}


  3%|▎         | 28329/1000000 [1:18:06<40:15:54,  6.70it/s]

{'loss': Array(0.21071327, dtype=float32), 'loss_cross_entropy': Array(0.19486658, dtype=float32)}


  3%|▎         | 28339/1000000 [1:18:08<27:01:53,  9.98it/s]

{'loss': Array(0.19226998, dtype=float32), 'loss_cross_entropy': Array(0.17687021, dtype=float32)}


  3%|▎         | 28349/1000000 [1:18:09<25:41:03, 10.51it/s]

{'loss': Array(0.2315805, dtype=float32), 'loss_cross_entropy': Array(0.21321629, dtype=float32)}


  3%|▎         | 28359/1000000 [1:18:11<29:03:09,  9.29it/s]

{'loss': Array(0.22816947, dtype=float32), 'loss_cross_entropy': Array(0.20934825, dtype=float32)}


  3%|▎         | 28369/1000000 [1:18:12<25:36:18, 10.54it/s]

{'loss': Array(0.17823431, dtype=float32), 'loss_cross_entropy': Array(0.16263975, dtype=float32)}


  3%|▎         | 28379/1000000 [1:18:14<28:34:50,  9.44it/s]

{'loss': Array(0.24725221, dtype=float32), 'loss_cross_entropy': Array(0.22913532, dtype=float32)}


  3%|▎         | 28389/1000000 [1:18:15<25:26:31, 10.61it/s]

{'loss': Array(0.22000961, dtype=float32), 'loss_cross_entropy': Array(0.20287953, dtype=float32)}


  3%|▎         | 28399/1000000 [1:18:17<39:45:15,  6.79it/s]

{'loss': Array(0.22586158, dtype=float32), 'loss_cross_entropy': Array(0.20748346, dtype=float32)}


  3%|▎         | 28409/1000000 [1:18:18<27:16:21,  9.90it/s]

{'loss': Array(0.20424612, dtype=float32), 'loss_cross_entropy': Array(0.1871518, dtype=float32)}


  3%|▎         | 28419/1000000 [1:18:20<25:22:26, 10.64it/s]

{'loss': Array(0.21652459, dtype=float32), 'loss_cross_entropy': Array(0.1994869, dtype=float32)}


  3%|▎         | 28429/1000000 [1:18:21<31:55:46,  8.45it/s]

{'loss': Array(0.19103344, dtype=float32), 'loss_cross_entropy': Array(0.17575914, dtype=float32)}


  3%|▎         | 28439/1000000 [1:18:23<26:24:15, 10.22it/s]

{'loss': Array(0.19742924, dtype=float32), 'loss_cross_entropy': Array(0.1824836, dtype=float32)}


  3%|▎         | 28449/1000000 [1:18:25<39:27:52,  6.84it/s]

{'loss': Array(0.21651392, dtype=float32), 'loss_cross_entropy': Array(0.19770704, dtype=float32)}


  3%|▎         | 28459/1000000 [1:18:26<26:50:50, 10.05it/s]

{'loss': Array(0.19716884, dtype=float32), 'loss_cross_entropy': Array(0.18071227, dtype=float32)}


  3%|▎         | 28469/1000000 [1:18:27<25:47:39, 10.46it/s]

{'loss': Array(0.19047531, dtype=float32), 'loss_cross_entropy': Array(0.17476243, dtype=float32)}


  3%|▎         | 28479/1000000 [1:18:29<29:26:26,  9.17it/s]

{'loss': Array(0.23306742, dtype=float32), 'loss_cross_entropy': Array(0.21447256, dtype=float32)}


  3%|▎         | 28489/1000000 [1:18:30<25:24:02, 10.62it/s]

{'loss': Array(0.18316798, dtype=float32), 'loss_cross_entropy': Array(0.16688482, dtype=float32)}


  3%|▎         | 28499/1000000 [1:18:32<35:11:44,  7.67it/s]

{'loss': Array(0.21681134, dtype=float32), 'loss_cross_entropy': Array(0.20059636, dtype=float32)}


  3%|▎         | 28509/1000000 [1:18:40<88:45:56,  3.04it/s] 

{'loss': Array(0.23447049, dtype=float32), 'loss_cross_entropy': Array(0.21675728, dtype=float32)}


  3%|▎         | 28519/1000000 [1:18:41<45:50:08,  5.89it/s]

{'loss': Array(0.22971332, dtype=float32), 'loss_cross_entropy': Array(0.21205567, dtype=float32)}


  3%|▎         | 28529/1000000 [1:18:43<28:35:05,  9.44it/s]

{'loss': Array(0.2019995, dtype=float32), 'loss_cross_entropy': Array(0.18465206, dtype=float32)}


  3%|▎         | 28539/1000000 [1:18:44<25:40:39, 10.51it/s]

{'loss': Array(0.1913002, dtype=float32), 'loss_cross_entropy': Array(0.17463374, dtype=float32)}


  3%|▎         | 28549/1000000 [1:18:46<32:33:17,  8.29it/s]

{'loss': Array(0.22287686, dtype=float32), 'loss_cross_entropy': Array(0.20490754, dtype=float32)}


  3%|▎         | 28559/1000000 [1:18:47<26:21:57, 10.23it/s]

{'loss': Array(0.24256635, dtype=float32), 'loss_cross_entropy': Array(0.22371154, dtype=float32)}


  3%|▎         | 28569/1000000 [1:18:49<26:07:49, 10.33it/s]

{'loss': Array(0.20681964, dtype=float32), 'loss_cross_entropy': Array(0.18993677, dtype=float32)}


  3%|▎         | 28579/1000000 [1:18:50<28:37:47,  9.43it/s]

{'loss': Array(0.21251185, dtype=float32), 'loss_cross_entropy': Array(0.19579911, dtype=float32)}


  3%|▎         | 28589/1000000 [1:18:52<25:32:08, 10.57it/s]

{'loss': Array(0.20049517, dtype=float32), 'loss_cross_entropy': Array(0.18245886, dtype=float32)}


  3%|▎         | 28599/1000000 [1:18:53<32:04:26,  8.41it/s]

{'loss': Array(0.20832923, dtype=float32), 'loss_cross_entropy': Array(0.1918282, dtype=float32)}


  3%|▎         | 28609/1000000 [1:18:55<25:49:50, 10.45it/s]

{'loss': Array(0.19201036, dtype=float32), 'loss_cross_entropy': Array(0.17586805, dtype=float32)}


  3%|▎         | 28619/1000000 [1:18:56<35:18:28,  7.64it/s]

{'loss': Array(0.22314678, dtype=float32), 'loss_cross_entropy': Array(0.2053361, dtype=float32)}


  3%|▎         | 28629/1000000 [1:18:58<27:15:09,  9.90it/s]

{'loss': Array(0.24410282, dtype=float32), 'loss_cross_entropy': Array(0.22473729, dtype=float32)}


  3%|▎         | 28639/1000000 [1:18:59<26:45:38, 10.08it/s]

{'loss': Array(0.22327106, dtype=float32), 'loss_cross_entropy': Array(0.20585124, dtype=float32)}


  3%|▎         | 28649/1000000 [1:19:01<33:49:37,  7.98it/s]

{'loss': Array(0.22654815, dtype=float32), 'loss_cross_entropy': Array(0.20890844, dtype=float32)}


  3%|▎         | 28659/1000000 [1:19:02<26:40:33, 10.11it/s]

{'loss': Array(0.21342957, dtype=float32), 'loss_cross_entropy': Array(0.19677003, dtype=float32)}


  3%|▎         | 28669/1000000 [1:19:04<24:56:19, 10.82it/s]

{'loss': Array(0.22835886, dtype=float32), 'loss_cross_entropy': Array(0.21061313, dtype=float32)}


  3%|▎         | 28679/1000000 [1:19:05<28:48:06,  9.37it/s]

{'loss': Array(0.21566032, dtype=float32), 'loss_cross_entropy': Array(0.19904274, dtype=float32)}


  3%|▎         | 28689/1000000 [1:19:07<25:26:43, 10.60it/s]

{'loss': Array(0.21094814, dtype=float32), 'loss_cross_entropy': Array(0.19419235, dtype=float32)}


  3%|▎         | 28699/1000000 [1:19:08<32:00:29,  8.43it/s]

{'loss': Array(0.2356426, dtype=float32), 'loss_cross_entropy': Array(0.21740122, dtype=float32)}


  3%|▎         | 28709/1000000 [1:19:10<26:06:36, 10.33it/s]

{'loss': Array(0.21137285, dtype=float32), 'loss_cross_entropy': Array(0.19463705, dtype=float32)}


  3%|▎         | 28719/1000000 [1:19:12<40:10:59,  6.71it/s]

{'loss': Array(0.22756544, dtype=float32), 'loss_cross_entropy': Array(0.20939088, dtype=float32)}


  3%|▎         | 28729/1000000 [1:19:13<28:08:31,  9.59it/s]

{'loss': Array(0.20621529, dtype=float32), 'loss_cross_entropy': Array(0.1892468, dtype=float32)}


  3%|▎         | 28739/1000000 [1:19:14<25:30:14, 10.58it/s]

{'loss': Array(0.22030242, dtype=float32), 'loss_cross_entropy': Array(0.20232819, dtype=float32)}


  3%|▎         | 28749/1000000 [1:19:16<35:57:11,  7.50it/s]

{'loss': Array(0.19144098, dtype=float32), 'loss_cross_entropy': Array(0.17588516, dtype=float32)}


  3%|▎         | 28759/1000000 [1:19:17<26:29:40, 10.18it/s]

{'loss': Array(0.19277875, dtype=float32), 'loss_cross_entropy': Array(0.17655651, dtype=float32)}


  3%|▎         | 28769/1000000 [1:19:19<25:20:08, 10.65it/s]

{'loss': Array(0.20537737, dtype=float32), 'loss_cross_entropy': Array(0.18785384, dtype=float32)}


  3%|▎         | 28779/1000000 [1:19:21<30:25:15,  8.87it/s]

{'loss': Array(0.22693829, dtype=float32), 'loss_cross_entropy': Array(0.20864724, dtype=float32)}


  3%|▎         | 28789/1000000 [1:19:22<25:32:14, 10.56it/s]

{'loss': Array(0.20103835, dtype=float32), 'loss_cross_entropy': Array(0.18525767, dtype=float32)}


  3%|▎         | 28799/1000000 [1:19:24<32:16:02,  8.36it/s]

{'loss': Array(0.20468266, dtype=float32), 'loss_cross_entropy': Array(0.18809173, dtype=float32)}


  3%|▎         | 28809/1000000 [1:19:25<25:41:28, 10.50it/s]

{'loss': Array(0.21465574, dtype=float32), 'loss_cross_entropy': Array(0.19771741, dtype=float32)}


  3%|▎         | 28819/1000000 [1:19:26<24:33:06, 10.99it/s]

{'loss': Array(0.23392823, dtype=float32), 'loss_cross_entropy': Array(0.21533859, dtype=float32)}


  3%|▎         | 28829/1000000 [1:19:28<28:11:37,  9.57it/s]

{'loss': Array(0.18583599, dtype=float32), 'loss_cross_entropy': Array(0.17054915, dtype=float32)}


  3%|▎         | 28839/1000000 [1:19:29<25:06:07, 10.75it/s]

{'loss': Array(0.20419462, dtype=float32), 'loss_cross_entropy': Array(0.1887263, dtype=float32)}


  3%|▎         | 28849/1000000 [1:19:31<34:40:42,  7.78it/s]

{'loss': Array(0.20655528, dtype=float32), 'loss_cross_entropy': Array(0.19016747, dtype=float32)}


  3%|▎         | 28859/1000000 [1:19:32<26:20:57, 10.24it/s]

{'loss': Array(0.21769981, dtype=float32), 'loss_cross_entropy': Array(0.20039912, dtype=float32)}


  3%|▎         | 28869/1000000 [1:19:34<24:54:17, 10.83it/s]

{'loss': Array(0.20154034, dtype=float32), 'loss_cross_entropy': Array(0.1864022, dtype=float32)}


  3%|▎         | 28879/1000000 [1:19:35<28:03:40,  9.61it/s]

{'loss': Array(0.20650057, dtype=float32), 'loss_cross_entropy': Array(0.18977319, dtype=float32)}


  3%|▎         | 28889/1000000 [1:19:37<25:20:59, 10.64it/s]

{'loss': Array(0.20248811, dtype=float32), 'loss_cross_entropy': Array(0.18674275, dtype=float32)}


  3%|▎         | 28899/1000000 [1:19:38<29:42:47,  9.08it/s]

{'loss': Array(0.2004786, dtype=float32), 'loss_cross_entropy': Array(0.18278807, dtype=float32)}


  3%|▎         | 28909/1000000 [1:19:40<25:28:55, 10.59it/s]

{'loss': Array(0.20494632, dtype=float32), 'loss_cross_entropy': Array(0.18861783, dtype=float32)}


  3%|▎         | 28919/1000000 [1:19:41<39:11:19,  6.88it/s]

{'loss': Array(0.21337382, dtype=float32), 'loss_cross_entropy': Array(0.196286, dtype=float32)}


  3%|▎         | 28929/1000000 [1:19:43<27:14:35,  9.90it/s]

{'loss': Array(0.22404397, dtype=float32), 'loss_cross_entropy': Array(0.20722623, dtype=float32)}


  3%|▎         | 28939/1000000 [1:19:44<25:00:18, 10.79it/s]

{'loss': Array(0.23004337, dtype=float32), 'loss_cross_entropy': Array(0.21270768, dtype=float32)}


  3%|▎         | 28949/1000000 [1:19:46<31:56:31,  8.44it/s]

{'loss': Array(0.21499208, dtype=float32), 'loss_cross_entropy': Array(0.19854808, dtype=float32)}


  3%|▎         | 28959/1000000 [1:19:47<25:57:15, 10.39it/s]

{'loss': Array(0.19172275, dtype=float32), 'loss_cross_entropy': Array(0.17591497, dtype=float32)}


  3%|▎         | 28969/1000000 [1:19:49<35:08:47,  7.67it/s]

{'loss': Array(0.22332887, dtype=float32), 'loss_cross_entropy': Array(0.20583875, dtype=float32)}


  3%|▎         | 28979/1000000 [1:19:50<26:52:17, 10.04it/s]

{'loss': Array(0.19638763, dtype=float32), 'loss_cross_entropy': Array(0.18088214, dtype=float32)}


  3%|▎         | 28989/1000000 [1:19:52<35:16:26,  7.65it/s]

{'loss': Array(0.21415584, dtype=float32), 'loss_cross_entropy': Array(0.1964765, dtype=float32)}


  3%|▎         | 28999/1000000 [1:19:53<26:07:37, 10.32it/s]

{'loss': Array(0.21683493, dtype=float32), 'loss_cross_entropy': Array(0.19971524, dtype=float32)}


  3%|▎         | 29009/1000000 [1:20:01<89:06:26,  3.03it/s] 

{'loss': Array(0.20825927, dtype=float32), 'loss_cross_entropy': Array(0.19137679, dtype=float32)}


  3%|▎         | 29019/1000000 [1:20:03<41:28:30,  6.50it/s] 

{'loss': Array(0.21040416, dtype=float32), 'loss_cross_entropy': Array(0.19279669, dtype=float32)}


  3%|▎         | 29029/1000000 [1:20:04<27:54:28,  9.66it/s]

{'loss': Array(0.21325555, dtype=float32), 'loss_cross_entropy': Array(0.19596721, dtype=float32)}


  3%|▎         | 29039/1000000 [1:20:06<35:55:10,  7.51it/s]

{'loss': Array(0.23475038, dtype=float32), 'loss_cross_entropy': Array(0.2171955, dtype=float32)}


  3%|▎         | 29049/1000000 [1:20:07<27:11:50,  9.92it/s]

{'loss': Array(0.20470682, dtype=float32), 'loss_cross_entropy': Array(0.18808575, dtype=float32)}


  3%|▎         | 29059/1000000 [1:20:09<40:54:19,  6.59it/s]

{'loss': Array(0.21692441, dtype=float32), 'loss_cross_entropy': Array(0.200015, dtype=float32)}


  3%|▎         | 29069/1000000 [1:20:10<28:03:05,  9.61it/s]

{'loss': Array(0.19828096, dtype=float32), 'loss_cross_entropy': Array(0.18299274, dtype=float32)}


  3%|▎         | 29079/1000000 [1:20:12<25:36:42, 10.53it/s]

{'loss': Array(0.2192112, dtype=float32), 'loss_cross_entropy': Array(0.20117831, dtype=float32)}


  3%|▎         | 29089/1000000 [1:20:13<32:02:44,  8.42it/s]

{'loss': Array(0.21186236, dtype=float32), 'loss_cross_entropy': Array(0.1954534, dtype=float32)}


  3%|▎         | 29099/1000000 [1:20:15<25:41:30, 10.50it/s]

{'loss': Array(0.23365231, dtype=float32), 'loss_cross_entropy': Array(0.21604028, dtype=float32)}


  3%|▎         | 29109/1000000 [1:20:17<35:57:53,  7.50it/s]

{'loss': Array(0.20015566, dtype=float32), 'loss_cross_entropy': Array(0.18450814, dtype=float32)}


  3%|▎         | 29119/1000000 [1:20:18<27:03:00,  9.97it/s]

{'loss': Array(0.2186445, dtype=float32), 'loss_cross_entropy': Array(0.20073035, dtype=float32)}


  3%|▎         | 29129/1000000 [1:20:19<25:20:12, 10.64it/s]

{'loss': Array(0.2109437, dtype=float32), 'loss_cross_entropy': Array(0.19361241, dtype=float32)}


  3%|▎         | 29139/1000000 [1:20:21<28:20:17,  9.52it/s]

{'loss': Array(0.21300219, dtype=float32), 'loss_cross_entropy': Array(0.19677596, dtype=float32)}


  3%|▎         | 29149/1000000 [1:20:22<25:18:42, 10.65it/s]

{'loss': Array(0.20689085, dtype=float32), 'loss_cross_entropy': Array(0.19126232, dtype=float32)}


  3%|▎         | 29159/1000000 [1:20:24<39:13:02,  6.88it/s]

{'loss': Array(0.19470264, dtype=float32), 'loss_cross_entropy': Array(0.17783034, dtype=float32)}


  3%|▎         | 29169/1000000 [1:20:25<27:20:11,  9.86it/s]

{'loss': Array(0.24277599, dtype=float32), 'loss_cross_entropy': Array(0.22481702, dtype=float32)}


  3%|▎         | 29179/1000000 [1:20:27<25:22:52, 10.62it/s]

{'loss': Array(0.19896676, dtype=float32), 'loss_cross_entropy': Array(0.18352823, dtype=float32)}


  3%|▎         | 29189/1000000 [1:20:28<32:16:59,  8.35it/s]

{'loss': Array(0.20236385, dtype=float32), 'loss_cross_entropy': Array(0.18624759, dtype=float32)}


  3%|▎         | 29199/1000000 [1:20:30<26:29:50, 10.18it/s]

{'loss': Array(0.22702134, dtype=float32), 'loss_cross_entropy': Array(0.20981887, dtype=float32)}


  3%|▎         | 29209/1000000 [1:20:32<35:47:57,  7.53it/s]

{'loss': Array(0.23428333, dtype=float32), 'loss_cross_entropy': Array(0.21459596, dtype=float32)}


  3%|▎         | 29219/1000000 [1:20:33<26:55:29, 10.02it/s]

{'loss': Array(0.2052149, dtype=float32), 'loss_cross_entropy': Array(0.18821673, dtype=float32)}


  3%|▎         | 29229/1000000 [1:20:35<35:20:55,  7.63it/s]

{'loss': Array(0.21084987, dtype=float32), 'loss_cross_entropy': Array(0.19403568, dtype=float32)}


  3%|▎         | 29239/1000000 [1:20:36<26:33:27, 10.15it/s]

{'loss': Array(0.2092111, dtype=float32), 'loss_cross_entropy': Array(0.19214006, dtype=float32)}


  3%|▎         | 29249/1000000 [1:20:37<24:56:17, 10.81it/s]

{'loss': Array(0.16523951, dtype=float32), 'loss_cross_entropy': Array(0.15105315, dtype=float32)}


  3%|▎         | 29259/1000000 [1:20:39<30:06:17,  8.96it/s]

{'loss': Array(0.21803784, dtype=float32), 'loss_cross_entropy': Array(0.19924398, dtype=float32)}


  3%|▎         | 29269/1000000 [1:20:40<26:14:25, 10.28it/s]

{'loss': Array(0.22776675, dtype=float32), 'loss_cross_entropy': Array(0.21079068, dtype=float32)}


  3%|▎         | 29279/1000000 [1:20:42<32:11:47,  8.37it/s]

{'loss': Array(0.21498652, dtype=float32), 'loss_cross_entropy': Array(0.1980488, dtype=float32)}


  3%|▎         | 29289/1000000 [1:20:44<25:58:17, 10.38it/s]

{'loss': Array(0.2042063, dtype=float32), 'loss_cross_entropy': Array(0.18829493, dtype=float32)}


  3%|▎         | 29299/1000000 [1:20:45<39:06:40,  6.89it/s]

{'loss': Array(0.22542171, dtype=float32), 'loss_cross_entropy': Array(0.20726681, dtype=float32)}


  3%|▎         | 29309/1000000 [1:20:47<27:08:03,  9.94it/s]

{'loss': Array(0.20794384, dtype=float32), 'loss_cross_entropy': Array(0.1919744, dtype=float32)}


  3%|▎         | 29319/1000000 [1:20:48<40:16:40,  6.69it/s]

{'loss': Array(0.21974562, dtype=float32), 'loss_cross_entropy': Array(0.2017773, dtype=float32)}


  3%|▎         | 29329/1000000 [1:20:50<27:52:35,  9.67it/s]

{'loss': Array(0.20423369, dtype=float32), 'loss_cross_entropy': Array(0.18750484, dtype=float32)}


  3%|▎         | 29339/1000000 [1:20:51<25:34:04, 10.55it/s]

{'loss': Array(0.22028092, dtype=float32), 'loss_cross_entropy': Array(0.20340677, dtype=float32)}


  3%|▎         | 29349/1000000 [1:20:53<35:18:20,  7.64it/s]

{'loss': Array(0.22167902, dtype=float32), 'loss_cross_entropy': Array(0.2052532, dtype=float32)}


  3%|▎         | 29359/1000000 [1:20:54<26:28:36, 10.18it/s]

{'loss': Array(0.21477394, dtype=float32), 'loss_cross_entropy': Array(0.1992053, dtype=float32)}


  3%|▎         | 29369/1000000 [1:20:56<39:31:02,  6.82it/s]

{'loss': Array(0.22219157, dtype=float32), 'loss_cross_entropy': Array(0.20531039, dtype=float32)}


  3%|▎         | 29379/1000000 [1:20:57<26:52:23, 10.03it/s]

{'loss': Array(0.21980779, dtype=float32), 'loss_cross_entropy': Array(0.20246904, dtype=float32)}


  3%|▎         | 29389/1000000 [1:20:59<25:38:17, 10.52it/s]

{'loss': Array(0.22201793, dtype=float32), 'loss_cross_entropy': Array(0.20385826, dtype=float32)}


  3%|▎         | 29399/1000000 [1:21:00<30:19:18,  8.89it/s]

{'loss': Array(0.21493898, dtype=float32), 'loss_cross_entropy': Array(0.19790985, dtype=float32)}


  3%|▎         | 29409/1000000 [1:21:02<26:02:55, 10.35it/s]

{'loss': Array(0.2236413, dtype=float32), 'loss_cross_entropy': Array(0.2062443, dtype=float32)}


  3%|▎         | 29419/1000000 [1:21:03<29:02:42,  9.28it/s]

{'loss': Array(0.24210139, dtype=float32), 'loss_cross_entropy': Array(0.22467068, dtype=float32)}


  3%|▎         | 29429/1000000 [1:21:05<25:48:22, 10.45it/s]

{'loss': Array(0.21798292, dtype=float32), 'loss_cross_entropy': Array(0.20062767, dtype=float32)}


  3%|▎         | 29439/1000000 [1:21:07<38:57:57,  6.92it/s]

{'loss': Array(0.23144332, dtype=float32), 'loss_cross_entropy': Array(0.21301325, dtype=float32)}


  3%|▎         | 29449/1000000 [1:21:08<26:46:49, 10.07it/s]

{'loss': Array(0.21977599, dtype=float32), 'loss_cross_entropy': Array(0.20159356, dtype=float32)}


  3%|▎         | 29459/1000000 [1:21:09<25:34:21, 10.54it/s]

{'loss': Array(0.20238157, dtype=float32), 'loss_cross_entropy': Array(0.18607472, dtype=float32)}


  3%|▎         | 29469/1000000 [1:21:11<32:41:53,  8.24it/s]

{'loss': Array(0.21804467, dtype=float32), 'loss_cross_entropy': Array(0.20072809, dtype=float32)}


  3%|▎         | 29479/1000000 [1:21:12<25:53:25, 10.41it/s]

{'loss': Array(0.19005536, dtype=float32), 'loss_cross_entropy': Array(0.1750774, dtype=float32)}


  3%|▎         | 29489/1000000 [1:21:14<39:30:26,  6.82it/s]

{'loss': Array(0.21538524, dtype=float32), 'loss_cross_entropy': Array(0.19713211, dtype=float32)}


  3%|▎         | 29499/1000000 [1:21:15<27:26:15,  9.83it/s]

{'loss': Array(0.21256857, dtype=float32), 'loss_cross_entropy': Array(0.1956042, dtype=float32)}


  3%|▎         | 29509/1000000 [1:21:23<87:56:42,  3.07it/s] 

{'loss': Array(0.21378498, dtype=float32), 'loss_cross_entropy': Array(0.19733906, dtype=float32)}


  3%|▎         | 29519/1000000 [1:21:25<40:28:06,  6.66it/s]

{'loss': Array(0.21594946, dtype=float32), 'loss_cross_entropy': Array(0.19888896, dtype=float32)}


  3%|▎         | 29529/1000000 [1:21:26<28:42:12,  9.39it/s]

{'loss': Array(0.2041254, dtype=float32), 'loss_cross_entropy': Array(0.18769343, dtype=float32)}


  3%|▎         | 29539/1000000 [1:21:28<32:14:50,  8.36it/s]

{'loss': Array(0.2310729, dtype=float32), 'loss_cross_entropy': Array(0.21333471, dtype=float32)}


  3%|▎         | 29549/1000000 [1:21:29<26:20:05, 10.24it/s]

{'loss': Array(0.22942105, dtype=float32), 'loss_cross_entropy': Array(0.21151069, dtype=float32)}


  3%|▎         | 29559/1000000 [1:21:31<40:26:42,  6.66it/s]

{'loss': Array(0.21897511, dtype=float32), 'loss_cross_entropy': Array(0.20147109, dtype=float32)}


  3%|▎         | 29569/1000000 [1:21:32<28:07:07,  9.59it/s]

{'loss': Array(0.21511248, dtype=float32), 'loss_cross_entropy': Array(0.19943495, dtype=float32)}


  3%|▎         | 29579/1000000 [1:21:34<26:03:44, 10.34it/s]

{'loss': Array(0.21855319, dtype=float32), 'loss_cross_entropy': Array(0.20080554, dtype=float32)}


  3%|▎         | 29589/1000000 [1:21:36<28:46:46,  9.37it/s]

{'loss': Array(0.20781572, dtype=float32), 'loss_cross_entropy': Array(0.19207299, dtype=float32)}


  3%|▎         | 29599/1000000 [1:21:37<25:12:02, 10.70it/s]

{'loss': Array(0.20700398, dtype=float32), 'loss_cross_entropy': Array(0.19018264, dtype=float32)}


  3%|▎         | 29609/1000000 [1:21:39<39:31:47,  6.82it/s]

{'loss': Array(0.21395122, dtype=float32), 'loss_cross_entropy': Array(0.19553679, dtype=float32)}


  3%|▎         | 29619/1000000 [1:21:40<27:29:12,  9.81it/s]

{'loss': Array(0.20504265, dtype=float32), 'loss_cross_entropy': Array(0.18829729, dtype=float32)}


  3%|▎         | 29629/1000000 [1:21:41<26:06:32, 10.32it/s]

{'loss': Array(0.18636626, dtype=float32), 'loss_cross_entropy': Array(0.17156671, dtype=float32)}


  3%|▎         | 29639/1000000 [1:21:43<29:55:56,  9.01it/s]

{'loss': Array(0.2111363, dtype=float32), 'loss_cross_entropy': Array(0.19499303, dtype=float32)}


  3%|▎         | 29649/1000000 [1:21:45<25:58:10, 10.38it/s]

{'loss': Array(0.19117786, dtype=float32), 'loss_cross_entropy': Array(0.17478567, dtype=float32)}


  3%|▎         | 29659/1000000 [1:21:46<34:58:06,  7.71it/s]

{'loss': Array(0.20349422, dtype=float32), 'loss_cross_entropy': Array(0.18736158, dtype=float32)}


  3%|▎         | 29669/1000000 [1:21:48<26:08:18, 10.31it/s]

{'loss': Array(0.1903488, dtype=float32), 'loss_cross_entropy': Array(0.17575705, dtype=float32)}


  3%|▎         | 29679/1000000 [1:21:49<39:55:09,  6.75it/s]

{'loss': Array(0.20703612, dtype=float32), 'loss_cross_entropy': Array(0.19009508, dtype=float32)}


  3%|▎         | 29689/1000000 [1:21:51<27:57:55,  9.64it/s]

{'loss': Array(0.20418067, dtype=float32), 'loss_cross_entropy': Array(0.18725891, dtype=float32)}


  3%|▎         | 29699/1000000 [1:21:52<25:45:33, 10.46it/s]

{'loss': Array(0.22508834, dtype=float32), 'loss_cross_entropy': Array(0.20844801, dtype=float32)}


  3%|▎         | 29709/1000000 [1:21:54<32:01:25,  8.42it/s]

{'loss': Array(0.21284814, dtype=float32), 'loss_cross_entropy': Array(0.19637755, dtype=float32)}


  3%|▎         | 29719/1000000 [1:21:55<27:03:34,  9.96it/s]

{'loss': Array(0.22731443, dtype=float32), 'loss_cross_entropy': Array(0.2106365, dtype=float32)}


  3%|▎         | 29729/1000000 [1:21:57<35:31:02,  7.59it/s]

{'loss': Array(0.20042299, dtype=float32), 'loss_cross_entropy': Array(0.18392201, dtype=float32)}


  3%|▎         | 29739/1000000 [1:21:58<26:31:38, 10.16it/s]

{'loss': Array(0.19303147, dtype=float32), 'loss_cross_entropy': Array(0.1760345, dtype=float32)}


  3%|▎         | 29749/1000000 [1:22:00<26:08:43, 10.31it/s]

{'loss': Array(0.22785573, dtype=float32), 'loss_cross_entropy': Array(0.21067475, dtype=float32)}


  3%|▎         | 29759/1000000 [1:22:01<29:16:40,  9.21it/s]

{'loss': Array(0.2039351, dtype=float32), 'loss_cross_entropy': Array(0.18829572, dtype=float32)}


  3%|▎         | 29769/1000000 [1:22:03<25:50:29, 10.43it/s]

{'loss': Array(0.19230418, dtype=float32), 'loss_cross_entropy': Array(0.17745522, dtype=float32)}


  3%|▎         | 29779/1000000 [1:22:04<39:08:27,  6.89it/s]

{'loss': Array(0.2072386, dtype=float32), 'loss_cross_entropy': Array(0.19041002, dtype=float32)}


  3%|▎         | 29789/1000000 [1:22:06<27:05:19,  9.95it/s]

{'loss': Array(0.19335161, dtype=float32), 'loss_cross_entropy': Array(0.17758843, dtype=float32)}


  3%|▎         | 29799/1000000 [1:22:07<25:18:47, 10.65it/s]

{'loss': Array(0.20667289, dtype=float32), 'loss_cross_entropy': Array(0.18997535, dtype=float32)}


  3%|▎         | 29809/1000000 [1:22:09<31:34:13,  8.54it/s]

{'loss': Array(0.1999571, dtype=float32), 'loss_cross_entropy': Array(0.18361866, dtype=float32)}


  3%|▎         | 29819/1000000 [1:22:10<25:44:18, 10.47it/s]

{'loss': Array(0.21002798, dtype=float32), 'loss_cross_entropy': Array(0.19316006, dtype=float32)}


  3%|▎         | 29829/1000000 [1:22:12<35:26:54,  7.60it/s]

{'loss': Array(0.23031859, dtype=float32), 'loss_cross_entropy': Array(0.21223019, dtype=float32)}


  3%|▎         | 29839/1000000 [1:22:13<26:30:01, 10.17it/s]

{'loss': Array(0.23361945, dtype=float32), 'loss_cross_entropy': Array(0.2146169, dtype=float32)}


  3%|▎         | 29849/1000000 [1:22:15<35:11:44,  7.66it/s]

{'loss': Array(0.21584737, dtype=float32), 'loss_cross_entropy': Array(0.19893545, dtype=float32)}


  3%|▎         | 29859/1000000 [1:22:16<27:11:35,  9.91it/s]

{'loss': Array(0.20439796, dtype=float32), 'loss_cross_entropy': Array(0.18834706, dtype=float32)}


  3%|▎         | 29869/1000000 [1:22:18<25:03:14, 10.76it/s]

{'loss': Array(0.18133472, dtype=float32), 'loss_cross_entropy': Array(0.16565542, dtype=float32)}


  3%|▎         | 29879/1000000 [1:22:19<29:42:23,  9.07it/s]

{'loss': Array(0.21411131, dtype=float32), 'loss_cross_entropy': Array(0.19671321, dtype=float32)}


  3%|▎         | 29889/1000000 [1:22:21<25:37:07, 10.52it/s]

{'loss': Array(0.22418164, dtype=float32), 'loss_cross_entropy': Array(0.2074158, dtype=float32)}


  3%|▎         | 29899/1000000 [1:22:22<31:50:43,  8.46it/s]

{'loss': Array(0.20296274, dtype=float32), 'loss_cross_entropy': Array(0.18617629, dtype=float32)}


  3%|▎         | 29909/1000000 [1:22:24<26:09:31, 10.30it/s]

{'loss': Array(0.20800538, dtype=float32), 'loss_cross_entropy': Array(0.190864, dtype=float32)}


  3%|▎         | 29919/1000000 [1:22:25<39:33:26,  6.81it/s]

{'loss': Array(0.21519749, dtype=float32), 'loss_cross_entropy': Array(0.19889906, dtype=float32)}


  3%|▎         | 29929/1000000 [1:22:27<27:19:12,  9.86it/s]

{'loss': Array(0.20719121, dtype=float32), 'loss_cross_entropy': Array(0.18955107, dtype=float32)}


  3%|▎         | 29939/1000000 [1:22:28<39:29:19,  6.82it/s]

{'loss': Array(0.20696333, dtype=float32), 'loss_cross_entropy': Array(0.19051248, dtype=float32)}


  3%|▎         | 29949/1000000 [1:22:30<26:45:54, 10.07it/s]

{'loss': Array(0.20607176, dtype=float32), 'loss_cross_entropy': Array(0.18921654, dtype=float32)}


  3%|▎         | 29959/1000000 [1:22:31<24:47:40, 10.87it/s]

{'loss': Array(0.2053858, dtype=float32), 'loss_cross_entropy': Array(0.1897464, dtype=float32)}


  3%|▎         | 29969/1000000 [1:22:33<35:20:46,  7.62it/s]

{'loss': Array(0.23134123, dtype=float32), 'loss_cross_entropy': Array(0.21357189, dtype=float32)}


  3%|▎         | 29979/1000000 [1:22:34<26:24:36, 10.20it/s]

{'loss': Array(0.22717194, dtype=float32), 'loss_cross_entropy': Array(0.20935173, dtype=float32)}


  3%|▎         | 29989/1000000 [1:22:36<39:44:08,  6.78it/s]

{'loss': Array(0.22382021, dtype=float32), 'loss_cross_entropy': Array(0.20667946, dtype=float32)}


  3%|▎         | 29999/1000000 [1:22:37<27:42:20,  9.73it/s]

{'loss': Array(0.20674276, dtype=float32), 'loss_cross_entropy': Array(0.19040024, dtype=float32)}


  3%|▎         | 30009/1000000 [1:22:45<90:28:38,  2.98it/s] 

{'loss': Array(0.17947719, dtype=float32), 'loss_cross_entropy': Array(0.16475664, dtype=float32)}


  3%|▎         | 30019/1000000 [1:22:47<40:00:30,  6.73it/s] 

{'loss': Array(0.19586913, dtype=float32), 'loss_cross_entropy': Array(0.18090014, dtype=float32)}


  3%|▎         | 30029/1000000 [1:22:48<28:12:16,  9.55it/s]

{'loss': Array(0.22209035, dtype=float32), 'loss_cross_entropy': Array(0.20453882, dtype=float32)}


  3%|▎         | 30039/1000000 [1:22:50<28:49:28,  9.35it/s]

{'loss': Array(0.19297346, dtype=float32), 'loss_cross_entropy': Array(0.178251, dtype=float32)}


  3%|▎         | 30049/1000000 [1:22:52<25:25:28, 10.60it/s]

{'loss': Array(0.21898846, dtype=float32), 'loss_cross_entropy': Array(0.20130268, dtype=float32)}


  3%|▎         | 30059/1000000 [1:22:53<39:06:51,  6.89it/s]

{'loss': Array(0.20798707, dtype=float32), 'loss_cross_entropy': Array(0.19136111, dtype=float32)}


  3%|▎         | 30069/1000000 [1:22:55<27:01:59,  9.97it/s]

{'loss': Array(0.20099857, dtype=float32), 'loss_cross_entropy': Array(0.1857488, dtype=float32)}


  3%|▎         | 30079/1000000 [1:22:56<25:07:19, 10.72it/s]

{'loss': Array(0.22012456, dtype=float32), 'loss_cross_entropy': Array(0.20290387, dtype=float32)}


  3%|▎         | 30089/1000000 [1:22:58<32:34:48,  8.27it/s]

{'loss': Array(0.23561905, dtype=float32), 'loss_cross_entropy': Array(0.2173466, dtype=float32)}


  3%|▎         | 30099/1000000 [1:22:59<26:51:04, 10.03it/s]

{'loss': Array(0.20716834, dtype=float32), 'loss_cross_entropy': Array(0.18992251, dtype=float32)}


  3%|▎         | 30109/1000000 [1:23:01<41:24:57,  6.51it/s]

{'loss': Array(0.20668773, dtype=float32), 'loss_cross_entropy': Array(0.19014321, dtype=float32)}


  3%|▎         | 30119/1000000 [1:23:02<27:40:04,  9.74it/s]

{'loss': Array(0.23051448, dtype=float32), 'loss_cross_entropy': Array(0.21276478, dtype=float32)}


  3%|▎         | 30129/1000000 [1:23:03<25:14:28, 10.67it/s]

{'loss': Array(0.2147781, dtype=float32), 'loss_cross_entropy': Array(0.19775331, dtype=float32)}


  3%|▎         | 30139/1000000 [1:23:05<29:22:03,  9.17it/s]

{'loss': Array(0.19957352, dtype=float32), 'loss_cross_entropy': Array(0.18321232, dtype=float32)}


  3%|▎         | 30149/1000000 [1:23:07<26:34:16, 10.14it/s]

{'loss': Array(0.19400446, dtype=float32), 'loss_cross_entropy': Array(0.1777841, dtype=float32)}


  3%|▎         | 30159/1000000 [1:23:08<36:06:13,  7.46it/s]

{'loss': Array(0.20757632, dtype=float32), 'loss_cross_entropy': Array(0.19064252, dtype=float32)}


  3%|▎         | 30169/1000000 [1:23:10<26:52:20, 10.03it/s]

{'loss': Array(0.20902148, dtype=float32), 'loss_cross_entropy': Array(0.19303769, dtype=float32)}


  3%|▎         | 30179/1000000 [1:23:11<25:19:22, 10.64it/s]

{'loss': Array(0.21801, dtype=float32), 'loss_cross_entropy': Array(0.20140433, dtype=float32)}


  3%|▎         | 30189/1000000 [1:23:13<32:13:54,  8.36it/s]

{'loss': Array(0.21840663, dtype=float32), 'loss_cross_entropy': Array(0.20149334, dtype=float32)}


  3%|▎         | 30199/1000000 [1:23:14<25:46:21, 10.45it/s]

{'loss': Array(0.21218172, dtype=float32), 'loss_cross_entropy': Array(0.19518483, dtype=float32)}


  3%|▎         | 30209/1000000 [1:23:15<24:50:38, 10.84it/s]

{'loss': Array(0.22201538, dtype=float32), 'loss_cross_entropy': Array(0.20457672, dtype=float32)}


  3%|▎         | 30219/1000000 [1:23:17<28:23:54,  9.49it/s]

{'loss': Array(0.18467735, dtype=float32), 'loss_cross_entropy': Array(0.16937563, dtype=float32)}


  3%|▎         | 30229/1000000 [1:23:19<25:48:30, 10.44it/s]

{'loss': Array(0.22742581, dtype=float32), 'loss_cross_entropy': Array(0.20927007, dtype=float32)}


  3%|▎         | 30239/1000000 [1:23:20<32:04:44,  8.40it/s]

{'loss': Array(0.2125672, dtype=float32), 'loss_cross_entropy': Array(0.19578253, dtype=float32)}


  3%|▎         | 30249/1000000 [1:23:22<26:18:22, 10.24it/s]

{'loss': Array(0.22298141, dtype=float32), 'loss_cross_entropy': Array(0.20577596, dtype=float32)}


  3%|▎         | 30259/1000000 [1:23:23<39:18:00,  6.85it/s]

{'loss': Array(0.22142982, dtype=float32), 'loss_cross_entropy': Array(0.20473611, dtype=float32)}


  3%|▎         | 30269/1000000 [1:23:25<27:20:57,  9.85it/s]

{'loss': Array(0.21973728, dtype=float32), 'loss_cross_entropy': Array(0.20244868, dtype=float32)}


  3%|▎         | 30279/1000000 [1:23:26<40:17:38,  6.69it/s]

{'loss': Array(0.20794843, dtype=float32), 'loss_cross_entropy': Array(0.19190687, dtype=float32)}


  3%|▎         | 30289/1000000 [1:23:28<28:06:00,  9.59it/s]

{'loss': Array(0.216396, dtype=float32), 'loss_cross_entropy': Array(0.19930324, dtype=float32)}


  3%|▎         | 30299/1000000 [1:23:29<25:46:44, 10.45it/s]

{'loss': Array(0.19605128, dtype=float32), 'loss_cross_entropy': Array(0.17957895, dtype=float32)}


  3%|▎         | 30309/1000000 [1:23:31<35:27:00,  7.60it/s]

{'loss': Array(0.2062444, dtype=float32), 'loss_cross_entropy': Array(0.18986787, dtype=float32)}


  3%|▎         | 30319/1000000 [1:23:32<26:29:57, 10.16it/s]

{'loss': Array(0.20649293, dtype=float32), 'loss_cross_entropy': Array(0.19044027, dtype=float32)}


  3%|▎         | 30329/1000000 [1:23:34<39:46:26,  6.77it/s]

{'loss': Array(0.19617744, dtype=float32), 'loss_cross_entropy': Array(0.1806337, dtype=float32)}


  3%|▎         | 30339/1000000 [1:23:35<27:46:21,  9.70it/s]

{'loss': Array(0.19486716, dtype=float32), 'loss_cross_entropy': Array(0.17965329, dtype=float32)}


  3%|▎         | 30349/1000000 [1:23:37<25:11:02, 10.70it/s]

{'loss': Array(0.23338759, dtype=float32), 'loss_cross_entropy': Array(0.21495247, dtype=float32)}


  3%|▎         | 30359/1000000 [1:23:38<29:02:13,  9.28it/s]

{'loss': Array(0.19289356, dtype=float32), 'loss_cross_entropy': Array(0.17688403, dtype=float32)}


  3%|▎         | 30369/1000000 [1:23:40<25:23:34, 10.61it/s]

{'loss': Array(0.21028204, dtype=float32), 'loss_cross_entropy': Array(0.19310892, dtype=float32)}


  3%|▎         | 30379/1000000 [1:23:41<28:47:19,  9.36it/s]

{'loss': Array(0.20004873, dtype=float32), 'loss_cross_entropy': Array(0.18474051, dtype=float32)}


  3%|▎         | 30389/1000000 [1:23:43<25:00:21, 10.77it/s]

{'loss': Array(0.21393691, dtype=float32), 'loss_cross_entropy': Array(0.19758031, dtype=float32)}


  3%|▎         | 30399/1000000 [1:23:44<38:57:48,  6.91it/s]

{'loss': Array(0.20568247, dtype=float32), 'loss_cross_entropy': Array(0.18886456, dtype=float32)}


  3%|▎         | 30409/1000000 [1:23:46<27:09:23,  9.92it/s]

{'loss': Array(0.21969144, dtype=float32), 'loss_cross_entropy': Array(0.20271623, dtype=float32)}


  3%|▎         | 30419/1000000 [1:23:47<25:17:28, 10.65it/s]

{'loss': Array(0.19347313, dtype=float32), 'loss_cross_entropy': Array(0.1786295, dtype=float32)}


  3%|▎         | 30429/1000000 [1:23:49<32:41:11,  8.24it/s]

{'loss': Array(0.21087961, dtype=float32), 'loss_cross_entropy': Array(0.1952192, dtype=float32)}


  3%|▎         | 30439/1000000 [1:23:50<26:20:47, 10.22it/s]

{'loss': Array(0.2198764, dtype=float32), 'loss_cross_entropy': Array(0.20409353, dtype=float32)}


  3%|▎         | 30449/1000000 [1:23:52<39:17:43,  6.85it/s]

{'loss': Array(0.20202914, dtype=float32), 'loss_cross_entropy': Array(0.18597677, dtype=float32)}


  3%|▎         | 30459/1000000 [1:23:53<27:46:53,  9.69it/s]

{'loss': Array(0.19645734, dtype=float32), 'loss_cross_entropy': Array(0.18146019, dtype=float32)}


  3%|▎         | 30469/1000000 [1:23:55<25:44:14, 10.46it/s]

{'loss': Array(0.19747327, dtype=float32), 'loss_cross_entropy': Array(0.18181531, dtype=float32)}


  3%|▎         | 30479/1000000 [1:23:56<29:44:42,  9.05it/s]

{'loss': Array(0.21850203, dtype=float32), 'loss_cross_entropy': Array(0.20201218, dtype=float32)}


  3%|▎         | 30489/1000000 [1:23:58<25:51:39, 10.41it/s]

{'loss': Array(0.22499943, dtype=float32), 'loss_cross_entropy': Array(0.20750915, dtype=float32)}


  3%|▎         | 30499/1000000 [1:24:00<37:00:33,  7.28it/s]

{'loss': Array(0.21833853, dtype=float32), 'loss_cross_entropy': Array(0.2009418, dtype=float32)}


  3%|▎         | 30509/1000000 [1:24:07<88:36:47,  3.04it/s] 

{'loss': Array(0.21100424, dtype=float32), 'loss_cross_entropy': Array(0.19433473, dtype=float32)}


  3%|▎         | 30519/1000000 [1:24:09<45:41:07,  5.89it/s]

{'loss': Array(0.21428834, dtype=float32), 'loss_cross_entropy': Array(0.19761108, dtype=float32)}


  3%|▎         | 30529/1000000 [1:24:10<28:46:56,  9.36it/s]

{'loss': Array(0.20881274, dtype=float32), 'loss_cross_entropy': Array(0.19281733, dtype=float32)}


  3%|▎         | 30539/1000000 [1:24:12<25:40:34, 10.49it/s]

{'loss': Array(0.21596444, dtype=float32), 'loss_cross_entropy': Array(0.20011334, dtype=float32)}


  3%|▎         | 30549/1000000 [1:24:14<32:49:57,  8.20it/s]

{'loss': Array(0.21454597, dtype=float32), 'loss_cross_entropy': Array(0.19758041, dtype=float32)}


  3%|▎         | 30559/1000000 [1:24:15<27:01:30,  9.96it/s]

{'loss': Array(0.20384304, dtype=float32), 'loss_cross_entropy': Array(0.18779857, dtype=float32)}


  3%|▎         | 30569/1000000 [1:24:16<25:21:07, 10.62it/s]

{'loss': Array(0.19008128, dtype=float32), 'loss_cross_entropy': Array(0.17578082, dtype=float32)}


  3%|▎         | 30579/1000000 [1:24:18<28:09:31,  9.56it/s]

{'loss': Array(0.20100859, dtype=float32), 'loss_cross_entropy': Array(0.18658185, dtype=float32)}


  3%|▎         | 30589/1000000 [1:24:19<25:57:19, 10.37it/s]

{'loss': Array(0.2151221, dtype=float32), 'loss_cross_entropy': Array(0.19947647, dtype=float32)}


  3%|▎         | 30599/1000000 [1:24:21<32:57:48,  8.17it/s]

{'loss': Array(0.19511542, dtype=float32), 'loss_cross_entropy': Array(0.17986989, dtype=float32)}


  3%|▎         | 30609/1000000 [1:24:23<27:05:32,  9.94it/s]

{'loss': Array(0.20660172, dtype=float32), 'loss_cross_entropy': Array(0.19106764, dtype=float32)}


  3%|▎         | 30619/1000000 [1:24:24<36:00:42,  7.48it/s]

{'loss': Array(0.18882537, dtype=float32), 'loss_cross_entropy': Array(0.17267033, dtype=float32)}


  3%|▎         | 30629/1000000 [1:24:26<26:42:13, 10.08it/s]

{'loss': Array(0.1950906, dtype=float32), 'loss_cross_entropy': Array(0.17887297, dtype=float32)}


  3%|▎         | 30639/1000000 [1:24:27<25:27:06, 10.58it/s]

{'loss': Array(0.2067924, dtype=float32), 'loss_cross_entropy': Array(0.19101024, dtype=float32)}


  3%|▎         | 30649/1000000 [1:24:29<32:03:46,  8.40it/s]

{'loss': Array(0.19589595, dtype=float32), 'loss_cross_entropy': Array(0.17961968, dtype=float32)}


  3%|▎         | 30659/1000000 [1:24:30<25:58:10, 10.37it/s]

{'loss': Array(0.19139887, dtype=float32), 'loss_cross_entropy': Array(0.17645505, dtype=float32)}


  3%|▎         | 30669/1000000 [1:24:32<25:05:17, 10.73it/s]

{'loss': Array(0.19778588, dtype=float32), 'loss_cross_entropy': Array(0.18221192, dtype=float32)}


  3%|▎         | 30679/1000000 [1:24:33<28:34:49,  9.42it/s]

{'loss': Array(0.2147672, dtype=float32), 'loss_cross_entropy': Array(0.1977624, dtype=float32)}


  3%|▎         | 30689/1000000 [1:24:35<25:58:32, 10.37it/s]

{'loss': Array(0.2270185, dtype=float32), 'loss_cross_entropy': Array(0.20948003, dtype=float32)}


  3%|▎         | 30699/1000000 [1:24:36<32:38:56,  8.25it/s]

{'loss': Array(0.22075294, dtype=float32), 'loss_cross_entropy': Array(0.20270498, dtype=float32)}


  3%|▎         | 30709/1000000 [1:24:38<27:01:45,  9.96it/s]

{'loss': Array(0.20153964, dtype=float32), 'loss_cross_entropy': Array(0.18676351, dtype=float32)}


  3%|▎         | 30719/1000000 [1:24:40<40:34:29,  6.64it/s]

{'loss': Array(0.20073485, dtype=float32), 'loss_cross_entropy': Array(0.1859216, dtype=float32)}


  3%|▎         | 30729/1000000 [1:24:41<27:01:48,  9.96it/s]

{'loss': Array(0.21582606, dtype=float32), 'loss_cross_entropy': Array(0.1994359, dtype=float32)}


  3%|▎         | 30739/1000000 [1:24:42<24:57:08, 10.79it/s]

{'loss': Array(0.20606184, dtype=float32), 'loss_cross_entropy': Array(0.190167, dtype=float32)}


  3%|▎         | 30749/1000000 [1:24:44<35:32:44,  7.57it/s]

{'loss': Array(0.19112882, dtype=float32), 'loss_cross_entropy': Array(0.1755201, dtype=float32)}


  3%|▎         | 30759/1000000 [1:24:45<26:47:37, 10.05it/s]

{'loss': Array(0.19678544, dtype=float32), 'loss_cross_entropy': Array(0.18104467, dtype=float32)}


  3%|▎         | 30769/1000000 [1:24:47<25:38:30, 10.50it/s]

{'loss': Array(0.2066716, dtype=float32), 'loss_cross_entropy': Array(0.19027027, dtype=float32)}


  3%|▎         | 30779/1000000 [1:24:48<30:43:08,  8.76it/s]

{'loss': Array(0.20600648, dtype=float32), 'loss_cross_entropy': Array(0.19122057, dtype=float32)}


  3%|▎         | 30789/1000000 [1:24:50<26:19:01, 10.23it/s]

{'loss': Array(0.20788693, dtype=float32), 'loss_cross_entropy': Array(0.19164243, dtype=float32)}


  3%|▎         | 30799/1000000 [1:24:52<32:14:24,  8.35it/s]

{'loss': Array(0.21012326, dtype=float32), 'loss_cross_entropy': Array(0.19398525, dtype=float32)}


  3%|▎         | 30809/1000000 [1:24:53<25:59:25, 10.36it/s]

{'loss': Array(0.20732284, dtype=float32), 'loss_cross_entropy': Array(0.19151805, dtype=float32)}


  3%|▎         | 30819/1000000 [1:24:54<25:37:16, 10.51it/s]

{'loss': Array(0.2140255, dtype=float32), 'loss_cross_entropy': Array(0.19651048, dtype=float32)}


  3%|▎         | 30829/1000000 [1:24:56<28:22:24,  9.49it/s]

{'loss': Array(0.2082759, dtype=float32), 'loss_cross_entropy': Array(0.19195057, dtype=float32)}


  3%|▎         | 30839/1000000 [1:24:57<26:14:12, 10.26it/s]

{'loss': Array(0.20781587, dtype=float32), 'loss_cross_entropy': Array(0.19147335, dtype=float32)}


  3%|▎         | 30849/1000000 [1:24:59<36:41:15,  7.34it/s]

{'loss': Array(0.21136987, dtype=float32), 'loss_cross_entropy': Array(0.19519897, dtype=float32)}


  3%|▎         | 30859/1000000 [1:25:01<28:02:44,  9.60it/s]

{'loss': Array(0.20450893, dtype=float32), 'loss_cross_entropy': Array(0.18837748, dtype=float32)}


  3%|▎         | 30869/1000000 [1:25:02<25:45:29, 10.45it/s]

{'loss': Array(0.19812171, dtype=float32), 'loss_cross_entropy': Array(0.18180671, dtype=float32)}


  3%|▎         | 30879/1000000 [1:25:04<28:01:46,  9.60it/s]

{'loss': Array(0.19811893, dtype=float32), 'loss_cross_entropy': Array(0.18317267, dtype=float32)}


  3%|▎         | 30889/1000000 [1:25:05<25:01:54, 10.75it/s]

{'loss': Array(0.17859831, dtype=float32), 'loss_cross_entropy': Array(0.1648217, dtype=float32)}


  3%|▎         | 30899/1000000 [1:25:07<29:57:32,  8.99it/s]

{'loss': Array(0.23616461, dtype=float32), 'loss_cross_entropy': Array(0.21847978, dtype=float32)}


  3%|▎         | 30909/1000000 [1:25:08<26:02:36, 10.34it/s]

{'loss': Array(0.193857, dtype=float32), 'loss_cross_entropy': Array(0.17852476, dtype=float32)}


  3%|▎         | 30919/1000000 [1:25:10<39:47:25,  6.77it/s]

{'loss': Array(0.24015203, dtype=float32), 'loss_cross_entropy': Array(0.22227068, dtype=float32)}


  3%|▎         | 30929/1000000 [1:25:11<27:36:10,  9.75it/s]

{'loss': Array(0.20863461, dtype=float32), 'loss_cross_entropy': Array(0.19178556, dtype=float32)}


  3%|▎         | 30939/1000000 [1:25:13<25:38:07, 10.50it/s]

{'loss': Array(0.20091067, dtype=float32), 'loss_cross_entropy': Array(0.18631251, dtype=float32)}


  3%|▎         | 30949/1000000 [1:25:14<31:43:39,  8.48it/s]

{'loss': Array(0.21954194, dtype=float32), 'loss_cross_entropy': Array(0.20310454, dtype=float32)}


  3%|▎         | 30959/1000000 [1:25:16<25:45:27, 10.45it/s]

{'loss': Array(0.20914768, dtype=float32), 'loss_cross_entropy': Array(0.19286679, dtype=float32)}


  3%|▎         | 30969/1000000 [1:25:17<34:52:43,  7.72it/s]

{'loss': Array(0.209196, dtype=float32), 'loss_cross_entropy': Array(0.1926382, dtype=float32)}


  3%|▎         | 30979/1000000 [1:25:19<26:31:24, 10.15it/s]

{'loss': Array(0.20673685, dtype=float32), 'loss_cross_entropy': Array(0.19014621, dtype=float32)}


  3%|▎         | 30989/1000000 [1:25:20<35:18:21,  7.62it/s]

{'loss': Array(0.20409231, dtype=float32), 'loss_cross_entropy': Array(0.18806723, dtype=float32)}


  3%|▎         | 30999/1000000 [1:25:22<26:54:24, 10.00it/s]

{'loss': Array(0.19855025, dtype=float32), 'loss_cross_entropy': Array(0.18318921, dtype=float32)}


  3%|▎         | 31009/1000000 [1:25:30<88:00:51,  3.06it/s] 

{'loss': Array(0.21267597, dtype=float32), 'loss_cross_entropy': Array(0.19581787, dtype=float32)}


  3%|▎         | 31019/1000000 [1:25:31<40:12:28,  6.69it/s]

{'loss': Array(0.21105753, dtype=float32), 'loss_cross_entropy': Array(0.19344704, dtype=float32)}


  3%|▎         | 31029/1000000 [1:25:33<27:34:43,  9.76it/s]

{'loss': Array(0.19219461, dtype=float32), 'loss_cross_entropy': Array(0.1768418, dtype=float32)}


  3%|▎         | 31039/1000000 [1:25:34<35:53:29,  7.50it/s]

{'loss': Array(0.20847633, dtype=float32), 'loss_cross_entropy': Array(0.19220635, dtype=float32)}


  3%|▎         | 31049/1000000 [1:25:36<26:36:47, 10.11it/s]

{'loss': Array(0.20853443, dtype=float32), 'loss_cross_entropy': Array(0.19215725, dtype=float32)}


  3%|▎         | 31059/1000000 [1:25:37<39:58:09,  6.73it/s]

{'loss': Array(0.21188946, dtype=float32), 'loss_cross_entropy': Array(0.1949939, dtype=float32)}


  3%|▎         | 31069/1000000 [1:25:39<28:04:15,  9.59it/s]

{'loss': Array(0.21698879, dtype=float32), 'loss_cross_entropy': Array(0.20004502, dtype=float32)}


  3%|▎         | 31079/1000000 [1:25:40<26:02:10, 10.34it/s]

{'loss': Array(0.20893751, dtype=float32), 'loss_cross_entropy': Array(0.19194135, dtype=float32)}


  3%|▎         | 31089/1000000 [1:25:42<32:50:07,  8.20it/s]

{'loss': Array(0.20962606, dtype=float32), 'loss_cross_entropy': Array(0.19358647, dtype=float32)}


  3%|▎         | 31099/1000000 [1:25:43<26:26:01, 10.18it/s]

{'loss': Array(0.22049391, dtype=float32), 'loss_cross_entropy': Array(0.20323761, dtype=float32)}


  3%|▎         | 31109/1000000 [1:25:45<35:15:52,  7.63it/s]

{'loss': Array(0.20370372, dtype=float32), 'loss_cross_entropy': Array(0.188151, dtype=float32)}


  3%|▎         | 31119/1000000 [1:25:46<27:05:47,  9.93it/s]

{'loss': Array(0.20707051, dtype=float32), 'loss_cross_entropy': Array(0.19133283, dtype=float32)}


  3%|▎         | 31129/1000000 [1:25:48<25:25:06, 10.59it/s]

{'loss': Array(0.19933403, dtype=float32), 'loss_cross_entropy': Array(0.18410407, dtype=float32)}


  3%|▎         | 31139/1000000 [1:25:49<28:19:04,  9.50it/s]

{'loss': Array(0.18725945, dtype=float32), 'loss_cross_entropy': Array(0.17169724, dtype=float32)}


  3%|▎         | 31149/1000000 [1:25:51<25:32:43, 10.54it/s]

{'loss': Array(0.21239455, dtype=float32), 'loss_cross_entropy': Array(0.1965422, dtype=float32)}


  3%|▎         | 31159/1000000 [1:25:53<39:35:03,  6.80it/s]

{'loss': Array(0.2088652, dtype=float32), 'loss_cross_entropy': Array(0.19157463, dtype=float32)}


  3%|▎         | 31169/1000000 [1:25:54<27:28:53,  9.79it/s]

{'loss': Array(0.20979324, dtype=float32), 'loss_cross_entropy': Array(0.19354182, dtype=float32)}


  3%|▎         | 31179/1000000 [1:25:55<25:16:02, 10.65it/s]

{'loss': Array(0.21870954, dtype=float32), 'loss_cross_entropy': Array(0.20136714, dtype=float32)}


  3%|▎         | 31189/1000000 [1:25:57<31:29:08,  8.55it/s]

{'loss': Array(0.18532334, dtype=float32), 'loss_cross_entropy': Array(0.17136888, dtype=float32)}


  3%|▎         | 31199/1000000 [1:25:58<26:09:52, 10.29it/s]

{'loss': Array(0.21457855, dtype=float32), 'loss_cross_entropy': Array(0.19753726, dtype=float32)}


  3%|▎         | 31209/1000000 [1:26:00<35:43:10,  7.53it/s]

{'loss': Array(0.20636559, dtype=float32), 'loss_cross_entropy': Array(0.19086193, dtype=float32)}


  3%|▎         | 31219/1000000 [1:26:01<27:36:55,  9.74it/s]

{'loss': Array(0.20796521, dtype=float32), 'loss_cross_entropy': Array(0.1915521, dtype=float32)}


  3%|▎         | 31229/1000000 [1:26:03<36:16:10,  7.42it/s]

{'loss': Array(0.1877154, dtype=float32), 'loss_cross_entropy': Array(0.17182261, dtype=float32)}


  3%|▎         | 31239/1000000 [1:26:05<26:57:23,  9.98it/s]

{'loss': Array(0.19006263, dtype=float32), 'loss_cross_entropy': Array(0.17536186, dtype=float32)}


  3%|▎         | 31249/1000000 [1:26:06<24:54:54, 10.80it/s]

{'loss': Array(0.17629525, dtype=float32), 'loss_cross_entropy': Array(0.16204116, dtype=float32)}


  3%|▎         | 31259/1000000 [1:26:08<30:05:32,  8.94it/s]

{'loss': Array(0.2029624, dtype=float32), 'loss_cross_entropy': Array(0.18751197, dtype=float32)}


  3%|▎         | 31269/1000000 [1:26:09<25:36:55, 10.51it/s]

{'loss': Array(0.19722293, dtype=float32), 'loss_cross_entropy': Array(0.1808439, dtype=float32)}


  3%|▎         | 31279/1000000 [1:26:11<32:07:12,  8.38it/s]

{'loss': Array(0.20864879, dtype=float32), 'loss_cross_entropy': Array(0.19305384, dtype=float32)}


  3%|▎         | 31289/1000000 [1:26:12<26:19:13, 10.22it/s]

{'loss': Array(0.18159743, dtype=float32), 'loss_cross_entropy': Array(0.16722508, dtype=float32)}


  3%|▎         | 31299/1000000 [1:26:14<40:09:28,  6.70it/s]

{'loss': Array(0.2328795, dtype=float32), 'loss_cross_entropy': Array(0.2164632, dtype=float32)}


  3%|▎         | 31309/1000000 [1:26:15<27:19:48,  9.85it/s]

{'loss': Array(0.22534214, dtype=float32), 'loss_cross_entropy': Array(0.20725308, dtype=float32)}


  3%|▎         | 31319/1000000 [1:26:17<39:29:47,  6.81it/s]

{'loss': Array(0.20350817, dtype=float32), 'loss_cross_entropy': Array(0.18709034, dtype=float32)}


  3%|▎         | 31329/1000000 [1:26:18<26:54:18, 10.00it/s]

{'loss': Array(0.19999902, dtype=float32), 'loss_cross_entropy': Array(0.18508378, dtype=float32)}


  3%|▎         | 31339/1000000 [1:26:20<26:01:59, 10.34it/s]

{'loss': Array(0.21278377, dtype=float32), 'loss_cross_entropy': Array(0.19650428, dtype=float32)}


  3%|▎         | 31349/1000000 [1:26:21<34:32:09,  7.79it/s]

{'loss': Array(0.22702757, dtype=float32), 'loss_cross_entropy': Array(0.20979977, dtype=float32)}


  3%|▎         | 31359/1000000 [1:26:23<26:12:50, 10.26it/s]

{'loss': Array(0.20272854, dtype=float32), 'loss_cross_entropy': Array(0.18618867, dtype=float32)}


  3%|▎         | 31369/1000000 [1:26:24<39:15:22,  6.85it/s]

{'loss': Array(0.20953682, dtype=float32), 'loss_cross_entropy': Array(0.19277576, dtype=float32)}


  3%|▎         | 31379/1000000 [1:26:26<27:17:39,  9.86it/s]

{'loss': Array(0.19249855, dtype=float32), 'loss_cross_entropy': Array(0.17680655, dtype=float32)}


  3%|▎         | 31389/1000000 [1:26:27<25:36:39, 10.51it/s]

{'loss': Array(0.20496984, dtype=float32), 'loss_cross_entropy': Array(0.18889922, dtype=float32)}


  3%|▎         | 31399/1000000 [1:26:29<27:59:21,  9.61it/s]

{'loss': Array(0.19645497, dtype=float32), 'loss_cross_entropy': Array(0.18093608, dtype=float32)}


  3%|▎         | 31409/1000000 [1:26:30<25:16:09, 10.65it/s]

{'loss': Array(0.19965339, dtype=float32), 'loss_cross_entropy': Array(0.18420708, dtype=float32)}


  3%|▎         | 31419/1000000 [1:26:32<28:06:35,  9.57it/s]

{'loss': Array(0.19888753, dtype=float32), 'loss_cross_entropy': Array(0.18340044, dtype=float32)}


  3%|▎         | 31429/1000000 [1:26:33<25:41:55, 10.47it/s]

{'loss': Array(0.23013122, dtype=float32), 'loss_cross_entropy': Array(0.21283399, dtype=float32)}


  3%|▎         | 31439/1000000 [1:26:35<39:25:53,  6.82it/s]

{'loss': Array(0.20777526, dtype=float32), 'loss_cross_entropy': Array(0.19240093, dtype=float32)}


  3%|▎         | 31449/1000000 [1:26:36<27:49:44,  9.67it/s]

{'loss': Array(0.18704925, dtype=float32), 'loss_cross_entropy': Array(0.1726326, dtype=float32)}


  3%|▎         | 31459/1000000 [1:26:38<25:55:02, 10.38it/s]

{'loss': Array(0.1883692, dtype=float32), 'loss_cross_entropy': Array(0.17353335, dtype=float32)}


  3%|▎         | 31469/1000000 [1:26:39<32:13:24,  8.35it/s]

{'loss': Array(0.21234262, dtype=float32), 'loss_cross_entropy': Array(0.19503228, dtype=float32)}


  3%|▎         | 31479/1000000 [1:26:41<26:57:22,  9.98it/s]

{'loss': Array(0.20064993, dtype=float32), 'loss_cross_entropy': Array(0.18411806, dtype=float32)}


  3%|▎         | 31489/1000000 [1:26:42<39:50:40,  6.75it/s]

{'loss': Array(0.20041595, dtype=float32), 'loss_cross_entropy': Array(0.18427324, dtype=float32)}


  3%|▎         | 31499/1000000 [1:26:44<27:28:52,  9.79it/s]

{'loss': Array(0.22744527, dtype=float32), 'loss_cross_entropy': Array(0.21101804, dtype=float32)}


  3%|▎         | 31509/1000000 [1:26:52<87:55:34,  3.06it/s] 

{'loss': Array(0.21929522, dtype=float32), 'loss_cross_entropy': Array(0.20301063, dtype=float32)}


  3%|▎         | 31519/1000000 [1:26:53<40:37:31,  6.62it/s]

{'loss': Array(0.19284998, dtype=float32), 'loss_cross_entropy': Array(0.17803709, dtype=float32)}


  3%|▎         | 31529/1000000 [1:26:55<27:35:57,  9.75it/s]

{'loss': Array(0.20756173, dtype=float32), 'loss_cross_entropy': Array(0.19109401, dtype=float32)}


  3%|▎         | 31539/1000000 [1:26:56<32:24:23,  8.30it/s]

{'loss': Array(0.2343422, dtype=float32), 'loss_cross_entropy': Array(0.21709032, dtype=float32)}


  3%|▎         | 31549/1000000 [1:26:58<26:01:22, 10.34it/s]

{'loss': Array(0.22630392, dtype=float32), 'loss_cross_entropy': Array(0.20964499, dtype=float32)}


  3%|▎         | 31559/1000000 [1:27:00<40:42:59,  6.61it/s]

{'loss': Array(0.20848148, dtype=float32), 'loss_cross_entropy': Array(0.19299501, dtype=float32)}


  3%|▎         | 31569/1000000 [1:27:01<28:31:00,  9.43it/s]

{'loss': Array(0.19500558, dtype=float32), 'loss_cross_entropy': Array(0.1799613, dtype=float32)}


  3%|▎         | 31579/1000000 [1:27:02<25:38:32, 10.49it/s]

{'loss': Array(0.20579444, dtype=float32), 'loss_cross_entropy': Array(0.18945312, dtype=float32)}


  3%|▎         | 31589/1000000 [1:27:04<28:29:26,  9.44it/s]

{'loss': Array(0.20149398, dtype=float32), 'loss_cross_entropy': Array(0.18591265, dtype=float32)}


  3%|▎         | 31599/1000000 [1:27:05<25:19:22, 10.62it/s]

{'loss': Array(0.2053943, dtype=float32), 'loss_cross_entropy': Array(0.19018297, dtype=float32)}


  3%|▎         | 31609/1000000 [1:27:07<39:16:52,  6.85it/s]

{'loss': Array(0.21204911, dtype=float32), 'loss_cross_entropy': Array(0.1963428, dtype=float32)}


  3%|▎         | 31619/1000000 [1:27:08<28:03:06,  9.59it/s]

{'loss': Array(0.1714144, dtype=float32), 'loss_cross_entropy': Array(0.15728898, dtype=float32)}


  3%|▎         | 31629/1000000 [1:27:10<26:08:50, 10.29it/s]

{'loss': Array(0.19854598, dtype=float32), 'loss_cross_entropy': Array(0.18308567, dtype=float32)}


  3%|▎         | 31639/1000000 [1:27:12<30:06:57,  8.93it/s]

{'loss': Array(0.21787278, dtype=float32), 'loss_cross_entropy': Array(0.20125924, dtype=float32)}


  3%|▎         | 31649/1000000 [1:27:13<25:52:12, 10.40it/s]

{'loss': Array(0.20591794, dtype=float32), 'loss_cross_entropy': Array(0.19002984, dtype=float32)}


  3%|▎         | 31659/1000000 [1:27:15<35:16:00,  7.63it/s]

{'loss': Array(0.17154066, dtype=float32), 'loss_cross_entropy': Array(0.15655942, dtype=float32)}


  3%|▎         | 31669/1000000 [1:27:16<26:33:50, 10.13it/s]

{'loss': Array(0.20319195, dtype=float32), 'loss_cross_entropy': Array(0.18842772, dtype=float32)}


  3%|▎         | 31679/1000000 [1:27:18<39:42:04,  6.78it/s]

{'loss': Array(0.18325596, dtype=float32), 'loss_cross_entropy': Array(0.16904537, dtype=float32)}


  3%|▎         | 31689/1000000 [1:27:19<27:33:01,  9.76it/s]

{'loss': Array(0.20954394, dtype=float32), 'loss_cross_entropy': Array(0.19259025, dtype=float32)}


  3%|▎         | 31699/1000000 [1:27:21<25:46:53, 10.43it/s]

{'loss': Array(0.20570433, dtype=float32), 'loss_cross_entropy': Array(0.18905066, dtype=float32)}


  3%|▎         | 31709/1000000 [1:27:22<32:07:07,  8.37it/s]

{'loss': Array(0.19655697, dtype=float32), 'loss_cross_entropy': Array(0.18145202, dtype=float32)}


  3%|▎         | 31719/1000000 [1:27:24<25:58:27, 10.36it/s]

{'loss': Array(0.18440096, dtype=float32), 'loss_cross_entropy': Array(0.17046754, dtype=float32)}


  3%|▎         | 31729/1000000 [1:27:25<35:30:09,  7.58it/s]

{'loss': Array(0.20727551, dtype=float32), 'loss_cross_entropy': Array(0.19082396, dtype=float32)}


  3%|▎         | 31739/1000000 [1:27:27<26:45:57, 10.05it/s]

{'loss': Array(0.22160459, dtype=float32), 'loss_cross_entropy': Array(0.20512772, dtype=float32)}


  3%|▎         | 31749/1000000 [1:27:28<25:15:00, 10.65it/s]

{'loss': Array(0.23571156, dtype=float32), 'loss_cross_entropy': Array(0.21835636, dtype=float32)}


  3%|▎         | 31759/1000000 [1:27:30<28:11:18,  9.54it/s]

{'loss': Array(0.22418964, dtype=float32), 'loss_cross_entropy': Array(0.20666401, dtype=float32)}


  3%|▎         | 31769/1000000 [1:27:31<25:39:16, 10.48it/s]

{'loss': Array(0.20078047, dtype=float32), 'loss_cross_entropy': Array(0.18482327, dtype=float32)}


  3%|▎         | 31779/1000000 [1:27:33<39:12:17,  6.86it/s]

{'loss': Array(0.21192129, dtype=float32), 'loss_cross_entropy': Array(0.19558565, dtype=float32)}


  3%|▎         | 31789/1000000 [1:27:34<27:04:43,  9.93it/s]

{'loss': Array(0.18449861, dtype=float32), 'loss_cross_entropy': Array(0.169479, dtype=float32)}


  3%|▎         | 31799/1000000 [1:27:35<25:24:24, 10.59it/s]

{'loss': Array(0.20566653, dtype=float32), 'loss_cross_entropy': Array(0.19007628, dtype=float32)}


  3%|▎         | 31809/1000000 [1:27:37<32:17:08,  8.33it/s]

{'loss': Array(0.2296572, dtype=float32), 'loss_cross_entropy': Array(0.21199445, dtype=float32)}


  3%|▎         | 31819/1000000 [1:27:39<26:25:15, 10.18it/s]

{'loss': Array(0.19418076, dtype=float32), 'loss_cross_entropy': Array(0.17840539, dtype=float32)}


  3%|▎         | 31829/1000000 [1:27:40<35:21:04,  7.61it/s]

{'loss': Array(0.21091738, dtype=float32), 'loss_cross_entropy': Array(0.19413197, dtype=float32)}


  3%|▎         | 31839/1000000 [1:27:42<27:30:17,  9.78it/s]

{'loss': Array(0.21409965, dtype=float32), 'loss_cross_entropy': Array(0.19770882, dtype=float32)}


  3%|▎         | 31849/1000000 [1:27:43<35:07:24,  7.66it/s]

{'loss': Array(0.20418458, dtype=float32), 'loss_cross_entropy': Array(0.18783702, dtype=float32)}


  3%|▎         | 31859/1000000 [1:27:45<26:58:53,  9.97it/s]

{'loss': Array(0.16335836, dtype=float32), 'loss_cross_entropy': Array(0.15014033, dtype=float32)}


  3%|▎         | 31869/1000000 [1:27:46<25:01:10, 10.75it/s]

{'loss': Array(0.21049087, dtype=float32), 'loss_cross_entropy': Array(0.19485019, dtype=float32)}


  3%|▎         | 31879/1000000 [1:27:48<29:33:33,  9.10it/s]

{'loss': Array(0.1902597, dtype=float32), 'loss_cross_entropy': Array(0.17451139, dtype=float32)}


  3%|▎         | 31889/1000000 [1:27:49<25:39:03, 10.48it/s]

{'loss': Array(0.20837103, dtype=float32), 'loss_cross_entropy': Array(0.19262214, dtype=float32)}


  3%|▎         | 31899/1000000 [1:27:51<31:35:16,  8.51it/s]

{'loss': Array(0.22108023, dtype=float32), 'loss_cross_entropy': Array(0.2040555, dtype=float32)}


  3%|▎         | 31909/1000000 [1:27:52<26:01:24, 10.33it/s]

{'loss': Array(0.19642717, dtype=float32), 'loss_cross_entropy': Array(0.18184279, dtype=float32)}


  3%|▎         | 31919/1000000 [1:27:54<39:58:28,  6.73it/s]

{'loss': Array(0.1799681, dtype=float32), 'loss_cross_entropy': Array(0.16516674, dtype=float32)}


  3%|▎         | 31929/1000000 [1:27:55<27:37:44,  9.73it/s]

{'loss': Array(0.20765172, dtype=float32), 'loss_cross_entropy': Array(0.19112639, dtype=float32)}


  3%|▎         | 31939/1000000 [1:27:57<40:33:14,  6.63it/s]

{'loss': Array(0.20338614, dtype=float32), 'loss_cross_entropy': Array(0.18767224, dtype=float32)}


  3%|▎         | 31949/1000000 [1:27:58<27:49:48,  9.66it/s]

{'loss': Array(0.1892201, dtype=float32), 'loss_cross_entropy': Array(0.17444879, dtype=float32)}


  3%|▎         | 31959/1000000 [1:28:00<26:35:40, 10.11it/s]

{'loss': Array(0.21314275, dtype=float32), 'loss_cross_entropy': Array(0.19722667, dtype=float32)}


  3%|▎         | 31969/1000000 [1:28:02<36:23:03,  7.39it/s]

{'loss': Array(0.21493542, dtype=float32), 'loss_cross_entropy': Array(0.19901654, dtype=float32)}


  3%|▎         | 31979/1000000 [1:28:03<26:55:59,  9.98it/s]

{'loss': Array(0.19874126, dtype=float32), 'loss_cross_entropy': Array(0.18264495, dtype=float32)}


  3%|▎         | 31989/1000000 [1:28:05<40:16:53,  6.68it/s]

{'loss': Array(0.18844084, dtype=float32), 'loss_cross_entropy': Array(0.17351627, dtype=float32)}


  3%|▎         | 31999/1000000 [1:28:06<28:03:09,  9.59it/s]

{'loss': Array(0.20165668, dtype=float32), 'loss_cross_entropy': Array(0.18516526, dtype=float32)}


  3%|▎         | 32009/1000000 [1:28:14<88:36:13,  3.03it/s] 

{'loss': Array(0.19699308, dtype=float32), 'loss_cross_entropy': Array(0.18230781, dtype=float32)}


  3%|▎         | 32019/1000000 [1:28:16<39:07:51,  6.87it/s] 

{'loss': Array(0.19378999, dtype=float32), 'loss_cross_entropy': Array(0.17868133, dtype=float32)}


  3%|▎         | 32029/1000000 [1:28:17<27:17:44,  9.85it/s]

{'loss': Array(0.1888578, dtype=float32), 'loss_cross_entropy': Array(0.17419493, dtype=float32)}


  3%|▎         | 32039/1000000 [1:28:19<29:23:39,  9.15it/s]

{'loss': Array(0.20960693, dtype=float32), 'loss_cross_entropy': Array(0.19350791, dtype=float32)}


  3%|▎         | 32049/1000000 [1:28:20<26:40:34, 10.08it/s]

{'loss': Array(0.18707104, dtype=float32), 'loss_cross_entropy': Array(0.1723082, dtype=float32)}


  3%|▎         | 32059/1000000 [1:28:22<40:00:21,  6.72it/s]

{'loss': Array(0.18786377, dtype=float32), 'loss_cross_entropy': Array(0.1730091, dtype=float32)}


  3%|▎         | 32069/1000000 [1:28:23<27:12:14,  9.88it/s]

{'loss': Array(0.19400142, dtype=float32), 'loss_cross_entropy': Array(0.17818183, dtype=float32)}


  3%|▎         | 32079/1000000 [1:28:25<25:07:13, 10.70it/s]

{'loss': Array(0.18443501, dtype=float32), 'loss_cross_entropy': Array(0.16981885, dtype=float32)}


  3%|▎         | 32089/1000000 [1:28:26<31:46:46,  8.46it/s]

{'loss': Array(0.20386814, dtype=float32), 'loss_cross_entropy': Array(0.18834476, dtype=float32)}


  3%|▎         | 32099/1000000 [1:28:28<26:24:08, 10.18it/s]

{'loss': Array(0.22101417, dtype=float32), 'loss_cross_entropy': Array(0.20424739, dtype=float32)}


  3%|▎         | 32109/1000000 [1:28:29<39:41:22,  6.77it/s]

{'loss': Array(0.18726884, dtype=float32), 'loss_cross_entropy': Array(0.17235921, dtype=float32)}


  3%|▎         | 32119/1000000 [1:28:31<27:34:44,  9.75it/s]

{'loss': Array(0.20660539, dtype=float32), 'loss_cross_entropy': Array(0.19007537, dtype=float32)}


  3%|▎         | 32129/1000000 [1:28:32<25:49:48, 10.41it/s]

{'loss': Array(0.2041372, dtype=float32), 'loss_cross_entropy': Array(0.18697448, dtype=float32)}


  3%|▎         | 32139/1000000 [1:28:34<30:36:44,  8.78it/s]

{'loss': Array(0.20827103, dtype=float32), 'loss_cross_entropy': Array(0.192149, dtype=float32)}


  3%|▎         | 32149/1000000 [1:28:35<26:13:40, 10.25it/s]

{'loss': Array(0.21778965, dtype=float32), 'loss_cross_entropy': Array(0.1997821, dtype=float32)}


  3%|▎         | 32159/1000000 [1:28:37<35:16:31,  7.62it/s]

{'loss': Array(0.2117651, dtype=float32), 'loss_cross_entropy': Array(0.19586809, dtype=float32)}


  3%|▎         | 32169/1000000 [1:28:38<26:45:04, 10.05it/s]

{'loss': Array(0.18598658, dtype=float32), 'loss_cross_entropy': Array(0.17053561, dtype=float32)}


  3%|▎         | 32179/1000000 [1:28:40<25:21:50, 10.60it/s]

{'loss': Array(0.17799935, dtype=float32), 'loss_cross_entropy': Array(0.16396506, dtype=float32)}


  3%|▎         | 32189/1000000 [1:28:42<33:06:08,  8.12it/s]

{'loss': Array(0.22717069, dtype=float32), 'loss_cross_entropy': Array(0.2099535, dtype=float32)}


  3%|▎         | 32199/1000000 [1:28:43<26:31:58, 10.13it/s]

{'loss': Array(0.20862702, dtype=float32), 'loss_cross_entropy': Array(0.19260219, dtype=float32)}


  3%|▎         | 32209/1000000 [1:28:44<24:54:45, 10.79it/s]

{'loss': Array(0.2139579, dtype=float32), 'loss_cross_entropy': Array(0.19729193, dtype=float32)}


  3%|▎         | 32219/1000000 [1:28:46<28:16:58,  9.50it/s]

{'loss': Array(0.18496709, dtype=float32), 'loss_cross_entropy': Array(0.17038123, dtype=float32)}


  3%|▎         | 32229/1000000 [1:28:47<25:51:06, 10.40it/s]

{'loss': Array(0.20388904, dtype=float32), 'loss_cross_entropy': Array(0.18847036, dtype=float32)}


  3%|▎         | 32239/1000000 [1:28:49<31:48:38,  8.45it/s]

{'loss': Array(0.2018106, dtype=float32), 'loss_cross_entropy': Array(0.18613581, dtype=float32)}


  3%|▎         | 32249/1000000 [1:28:50<26:13:41, 10.25it/s]

{'loss': Array(0.19697784, dtype=float32), 'loss_cross_entropy': Array(0.18144359, dtype=float32)}


  3%|▎         | 32259/1000000 [1:28:52<39:51:30,  6.74it/s]

{'loss': Array(0.21471511, dtype=float32), 'loss_cross_entropy': Array(0.19737704, dtype=float32)}


  3%|▎         | 32269/1000000 [1:28:54<27:25:51,  9.80it/s]

{'loss': Array(0.19703773, dtype=float32), 'loss_cross_entropy': Array(0.18239366, dtype=float32)}


  3%|▎         | 32279/1000000 [1:28:55<40:16:24,  6.67it/s]

{'loss': Array(0.19484206, dtype=float32), 'loss_cross_entropy': Array(0.1796827, dtype=float32)}


  3%|▎         | 32289/1000000 [1:28:57<27:49:36,  9.66it/s]

{'loss': Array(0.22425234, dtype=float32), 'loss_cross_entropy': Array(0.20823394, dtype=float32)}


  3%|▎         | 32299/1000000 [1:28:58<25:23:00, 10.59it/s]

{'loss': Array(0.2034161, dtype=float32), 'loss_cross_entropy': Array(0.18701275, dtype=float32)}


  3%|▎         | 32309/1000000 [1:29:00<36:49:51,  7.30it/s]

{'loss': Array(0.20123754, dtype=float32), 'loss_cross_entropy': Array(0.18552187, dtype=float32)}


  3%|▎         | 32319/1000000 [1:29:01<28:53:14,  9.31it/s]

{'loss': Array(0.19260065, dtype=float32), 'loss_cross_entropy': Array(0.17803805, dtype=float32)}


  3%|▎         | 32329/1000000 [1:29:03<40:04:52,  6.71it/s]

{'loss': Array(0.2070223, dtype=float32), 'loss_cross_entropy': Array(0.19182095, dtype=float32)}


  3%|▎         | 32339/1000000 [1:29:04<27:35:02,  9.74it/s]

{'loss': Array(0.20244358, dtype=float32), 'loss_cross_entropy': Array(0.18773985, dtype=float32)}


  3%|▎         | 32349/1000000 [1:29:06<25:20:58, 10.60it/s]

{'loss': Array(0.2122833, dtype=float32), 'loss_cross_entropy': Array(0.19597942, dtype=float32)}


  3%|▎         | 32359/1000000 [1:29:08<28:50:48,  9.32it/s]

{'loss': Array(0.20353301, dtype=float32), 'loss_cross_entropy': Array(0.18893349, dtype=float32)}


  3%|▎         | 32369/1000000 [1:29:09<25:09:03, 10.69it/s]

{'loss': Array(0.18977724, dtype=float32), 'loss_cross_entropy': Array(0.17545232, dtype=float32)}


  3%|▎         | 32379/1000000 [1:29:11<28:23:52,  9.46it/s]

{'loss': Array(0.18991925, dtype=float32), 'loss_cross_entropy': Array(0.17496495, dtype=float32)}


  3%|▎         | 32389/1000000 [1:29:12<25:12:44, 10.66it/s]

{'loss': Array(0.1775846, dtype=float32), 'loss_cross_entropy': Array(0.16389802, dtype=float32)}


  3%|▎         | 32399/1000000 [1:29:14<38:43:35,  6.94it/s]

{'loss': Array(0.2096591, dtype=float32), 'loss_cross_entropy': Array(0.19353206, dtype=float32)}


  3%|▎         | 32409/1000000 [1:29:15<26:50:07, 10.02it/s]

{'loss': Array(0.18447831, dtype=float32), 'loss_cross_entropy': Array(0.16981117, dtype=float32)}


  3%|▎         | 32419/1000000 [1:29:16<25:03:36, 10.73it/s]

{'loss': Array(0.20275073, dtype=float32), 'loss_cross_entropy': Array(0.18678379, dtype=float32)}


  3%|▎         | 32429/1000000 [1:29:18<32:00:58,  8.39it/s]

{'loss': Array(0.1964653, dtype=float32), 'loss_cross_entropy': Array(0.18118961, dtype=float32)}


  3%|▎         | 32439/1000000 [1:29:19<26:27:49, 10.16it/s]

{'loss': Array(0.21391527, dtype=float32), 'loss_cross_entropy': Array(0.19793761, dtype=float32)}


  3%|▎         | 32449/1000000 [1:29:21<39:23:50,  6.82it/s]

{'loss': Array(0.2123119, dtype=float32), 'loss_cross_entropy': Array(0.19652806, dtype=float32)}


  3%|▎         | 32459/1000000 [1:29:22<27:17:04,  9.85it/s]

{'loss': Array(0.20195055, dtype=float32), 'loss_cross_entropy': Array(0.18738674, dtype=float32)}


  3%|▎         | 32469/1000000 [1:29:24<25:26:27, 10.56it/s]

{'loss': Array(0.21439181, dtype=float32), 'loss_cross_entropy': Array(0.19777112, dtype=float32)}


  3%|▎         | 32479/1000000 [1:29:25<29:54:31,  8.99it/s]

{'loss': Array(0.20112626, dtype=float32), 'loss_cross_entropy': Array(0.18589373, dtype=float32)}


  3%|▎         | 32489/1000000 [1:29:27<26:03:35, 10.31it/s]

{'loss': Array(0.18913445, dtype=float32), 'loss_cross_entropy': Array(0.17335421, dtype=float32)}


  3%|▎         | 32499/1000000 [1:29:29<35:27:35,  7.58it/s]

{'loss': Array(0.21221209, dtype=float32), 'loss_cross_entropy': Array(0.19696426, dtype=float32)}


  3%|▎         | 32509/1000000 [1:29:36<87:40:25,  3.07it/s] 

{'loss': Array(0.18434873, dtype=float32), 'loss_cross_entropy': Array(0.1702109, dtype=float32)}


  3%|▎         | 32519/1000000 [1:29:38<45:12:50,  5.94it/s]

{'loss': Array(0.23182765, dtype=float32), 'loss_cross_entropy': Array(0.21440986, dtype=float32)}


  3%|▎         | 32529/1000000 [1:29:39<28:06:46,  9.56it/s]

{'loss': Array(0.21414156, dtype=float32), 'loss_cross_entropy': Array(0.19752568, dtype=float32)}


  3%|▎         | 32539/1000000 [1:29:41<25:10:30, 10.67it/s]

{'loss': Array(0.2114098, dtype=float32), 'loss_cross_entropy': Array(0.19595167, dtype=float32)}


  3%|▎         | 32549/1000000 [1:29:42<31:37:16,  8.50it/s]

{'loss': Array(0.21003394, dtype=float32), 'loss_cross_entropy': Array(0.19313104, dtype=float32)}


  3%|▎         | 32559/1000000 [1:29:44<26:09:43, 10.27it/s]

{'loss': Array(0.20346366, dtype=float32), 'loss_cross_entropy': Array(0.18837984, dtype=float32)}


  3%|▎         | 32569/1000000 [1:29:45<25:03:52, 10.72it/s]

{'loss': Array(0.2045253, dtype=float32), 'loss_cross_entropy': Array(0.18941443, dtype=float32)}


  3%|▎         | 32579/1000000 [1:29:47<28:38:03,  9.38it/s]

{'loss': Array(0.2053829, dtype=float32), 'loss_cross_entropy': Array(0.19004993, dtype=float32)}


  3%|▎         | 32589/1000000 [1:29:48<25:58:51, 10.34it/s]

{'loss': Array(0.20831393, dtype=float32), 'loss_cross_entropy': Array(0.1924213, dtype=float32)}


  3%|▎         | 32599/1000000 [1:29:50<32:43:06,  8.21it/s]

{'loss': Array(0.20436303, dtype=float32), 'loss_cross_entropy': Array(0.18870726, dtype=float32)}


  3%|▎         | 32609/1000000 [1:29:51<25:55:15, 10.37it/s]

{'loss': Array(0.19810961, dtype=float32), 'loss_cross_entropy': Array(0.18208702, dtype=float32)}


  3%|▎         | 32619/1000000 [1:29:53<34:42:07,  7.74it/s]

{'loss': Array(0.22012173, dtype=float32), 'loss_cross_entropy': Array(0.20302473, dtype=float32)}


  3%|▎         | 32629/1000000 [1:29:54<26:22:33, 10.19it/s]

{'loss': Array(0.18907763, dtype=float32), 'loss_cross_entropy': Array(0.17410378, dtype=float32)}


  3%|▎         | 32639/1000000 [1:29:56<24:44:07, 10.86it/s]

{'loss': Array(0.21847537, dtype=float32), 'loss_cross_entropy': Array(0.20188618, dtype=float32)}


  3%|▎         | 32649/1000000 [1:29:57<31:30:13,  8.53it/s]

{'loss': Array(0.18695062, dtype=float32), 'loss_cross_entropy': Array(0.1709874, dtype=float32)}


  3%|▎         | 32659/1000000 [1:29:59<26:06:37, 10.29it/s]

{'loss': Array(0.20391805, dtype=float32), 'loss_cross_entropy': Array(0.18795183, dtype=float32)}


  3%|▎         | 32669/1000000 [1:30:00<25:25:16, 10.57it/s]

{'loss': Array(0.18949847, dtype=float32), 'loss_cross_entropy': Array(0.17478715, dtype=float32)}


  3%|▎         | 32679/1000000 [1:30:02<28:43:10,  9.36it/s]

{'loss': Array(0.1829773, dtype=float32), 'loss_cross_entropy': Array(0.16854618, dtype=float32)}


  3%|▎         | 32689/1000000 [1:30:03<25:48:46, 10.41it/s]

{'loss': Array(0.21266079, dtype=float32), 'loss_cross_entropy': Array(0.19686274, dtype=float32)}


  3%|▎         | 32699/1000000 [1:30:05<32:05:51,  8.37it/s]

{'loss': Array(0.18024151, dtype=float32), 'loss_cross_entropy': Array(0.16627656, dtype=float32)}


  3%|▎         | 32709/1000000 [1:30:06<26:19:37, 10.21it/s]

{'loss': Array(0.21842717, dtype=float32), 'loss_cross_entropy': Array(0.20230584, dtype=float32)}


  3%|▎         | 32719/1000000 [1:30:08<40:02:47,  6.71it/s]

{'loss': Array(0.23453204, dtype=float32), 'loss_cross_entropy': Array(0.21677831, dtype=float32)}


  3%|▎         | 32729/1000000 [1:30:09<27:31:11,  9.76it/s]

{'loss': Array(0.21983106, dtype=float32), 'loss_cross_entropy': Array(0.20247708, dtype=float32)}


  3%|▎         | 32739/1000000 [1:30:11<26:17:46, 10.22it/s]

{'loss': Array(0.19915581, dtype=float32), 'loss_cross_entropy': Array(0.18394355, dtype=float32)}


  3%|▎         | 32749/1000000 [1:30:12<35:13:48,  7.63it/s]

{'loss': Array(0.20045304, dtype=float32), 'loss_cross_entropy': Array(0.18506055, dtype=float32)}


  3%|▎         | 32759/1000000 [1:30:14<26:15:10, 10.23it/s]

{'loss': Array(0.19453564, dtype=float32), 'loss_cross_entropy': Array(0.1799117, dtype=float32)}


  3%|▎         | 32769/1000000 [1:30:15<25:06:55, 10.70it/s]

{'loss': Array(0.21022415, dtype=float32), 'loss_cross_entropy': Array(0.1952535, dtype=float32)}


  3%|▎         | 32779/1000000 [1:30:17<29:34:53,  9.08it/s]

{'loss': Array(0.19051866, dtype=float32), 'loss_cross_entropy': Array(0.17601357, dtype=float32)}


  3%|▎         | 32789/1000000 [1:30:18<25:50:15, 10.40it/s]

{'loss': Array(0.22178002, dtype=float32), 'loss_cross_entropy': Array(0.20541981, dtype=float32)}


  3%|▎         | 32799/1000000 [1:30:20<32:00:46,  8.39it/s]

{'loss': Array(0.20490551, dtype=float32), 'loss_cross_entropy': Array(0.19009607, dtype=float32)}


  3%|▎         | 32809/1000000 [1:30:21<26:37:37, 10.09it/s]

{'loss': Array(0.17066859, dtype=float32), 'loss_cross_entropy': Array(0.15761822, dtype=float32)}


  3%|▎         | 32819/1000000 [1:30:23<25:39:17, 10.47it/s]

{'loss': Array(0.1909477, dtype=float32), 'loss_cross_entropy': Array(0.17616056, dtype=float32)}


  3%|▎         | 32829/1000000 [1:30:24<28:21:24,  9.47it/s]

{'loss': Array(0.21551314, dtype=float32), 'loss_cross_entropy': Array(0.198663, dtype=float32)}


  3%|▎         | 32839/1000000 [1:30:26<25:10:35, 10.67it/s]

{'loss': Array(0.20896752, dtype=float32), 'loss_cross_entropy': Array(0.19350211, dtype=float32)}


  3%|▎         | 32849/1000000 [1:30:27<35:14:02,  7.62it/s]

{'loss': Array(0.18991339, dtype=float32), 'loss_cross_entropy': Array(0.17557278, dtype=float32)}


  3%|▎         | 32859/1000000 [1:30:29<26:36:38, 10.10it/s]

{'loss': Array(0.19168206, dtype=float32), 'loss_cross_entropy': Array(0.17704256, dtype=float32)}


  3%|▎         | 32869/1000000 [1:30:30<25:06:52, 10.70it/s]

{'loss': Array(0.19648743, dtype=float32), 'loss_cross_entropy': Array(0.18097223, dtype=float32)}


  3%|▎         | 32879/1000000 [1:30:32<28:32:40,  9.41it/s]

{'loss': Array(0.1766557, dtype=float32), 'loss_cross_entropy': Array(0.16258867, dtype=float32)}


  3%|▎         | 32889/1000000 [1:30:33<25:08:13, 10.69it/s]

{'loss': Array(0.20208089, dtype=float32), 'loss_cross_entropy': Array(0.18710719, dtype=float32)}


  3%|▎         | 32899/1000000 [1:30:35<31:26:52,  8.54it/s]

{'loss': Array(0.17487134, dtype=float32), 'loss_cross_entropy': Array(0.16100334, dtype=float32)}


  3%|▎         | 32909/1000000 [1:30:36<26:08:06, 10.28it/s]

{'loss': Array(0.21172126, dtype=float32), 'loss_cross_entropy': Array(0.19512253, dtype=float32)}


  3%|▎         | 32919/1000000 [1:30:38<39:54:13,  6.73it/s]

{'loss': Array(0.215233, dtype=float32), 'loss_cross_entropy': Array(0.2001339, dtype=float32)}


  3%|▎         | 32929/1000000 [1:30:39<27:27:30,  9.78it/s]

{'loss': Array(0.19169219, dtype=float32), 'loss_cross_entropy': Array(0.17650175, dtype=float32)}


  3%|▎         | 32939/1000000 [1:30:41<25:16:58, 10.62it/s]

{'loss': Array(0.1987439, dtype=float32), 'loss_cross_entropy': Array(0.18409716, dtype=float32)}


  3%|▎         | 32949/1000000 [1:30:43<32:14:53,  8.33it/s]

{'loss': Array(0.19273816, dtype=float32), 'loss_cross_entropy': Array(0.17803438, dtype=float32)}


  3%|▎         | 32959/1000000 [1:30:44<25:48:43, 10.41it/s]

{'loss': Array(0.19932328, dtype=float32), 'loss_cross_entropy': Array(0.1843468, dtype=float32)}


  3%|▎         | 32969/1000000 [1:30:46<35:13:13,  7.63it/s]

{'loss': Array(0.22250906, dtype=float32), 'loss_cross_entropy': Array(0.20614855, dtype=float32)}


  3%|▎         | 32979/1000000 [1:30:47<27:05:25,  9.92it/s]

{'loss': Array(0.20054941, dtype=float32), 'loss_cross_entropy': Array(0.1855727, dtype=float32)}


  3%|▎         | 32989/1000000 [1:30:49<35:08:19,  7.64it/s]

{'loss': Array(0.21306646, dtype=float32), 'loss_cross_entropy': Array(0.19650811, dtype=float32)}


  3%|▎         | 32999/1000000 [1:30:50<26:09:25, 10.27it/s]

{'loss': Array(0.2137336, dtype=float32), 'loss_cross_entropy': Array(0.19831233, dtype=float32)}


  3%|▎         | 33009/1000000 [1:30:58<86:52:06,  3.09it/s] 

{'loss': Array(0.18972602, dtype=float32), 'loss_cross_entropy': Array(0.17423847, dtype=float32)}


  3%|▎         | 33019/1000000 [1:31:00<41:12:07,  6.52it/s]

{'loss': Array(0.21525192, dtype=float32), 'loss_cross_entropy': Array(0.19900993, dtype=float32)}


  3%|▎         | 33029/1000000 [1:31:01<28:52:49,  9.30it/s]

{'loss': Array(0.18780617, dtype=float32), 'loss_cross_entropy': Array(0.17354903, dtype=float32)}


  3%|▎         | 33039/1000000 [1:31:03<36:02:27,  7.45it/s]

{'loss': Array(0.21043566, dtype=float32), 'loss_cross_entropy': Array(0.1943919, dtype=float32)}


  3%|▎         | 33049/1000000 [1:31:04<26:09:17, 10.27it/s]

{'loss': Array(0.20063289, dtype=float32), 'loss_cross_entropy': Array(0.18406586, dtype=float32)}


  3%|▎         | 33059/1000000 [1:31:06<39:27:33,  6.81it/s]

{'loss': Array(0.21113597, dtype=float32), 'loss_cross_entropy': Array(0.19540767, dtype=float32)}


  3%|▎         | 33069/1000000 [1:31:07<27:10:38,  9.88it/s]

{'loss': Array(0.18622501, dtype=float32), 'loss_cross_entropy': Array(0.17241232, dtype=float32)}


  3%|▎         | 33079/1000000 [1:31:08<25:11:54, 10.66it/s]

{'loss': Array(0.20085447, dtype=float32), 'loss_cross_entropy': Array(0.18539384, dtype=float32)}


  3%|▎         | 33089/1000000 [1:31:10<32:04:19,  8.37it/s]

{'loss': Array(0.19352365, dtype=float32), 'loss_cross_entropy': Array(0.178062, dtype=float32)}


  3%|▎         | 33099/1000000 [1:31:11<26:45:41, 10.04it/s]

{'loss': Array(0.20967834, dtype=float32), 'loss_cross_entropy': Array(0.1936598, dtype=float32)}


  3%|▎         | 33109/1000000 [1:31:13<35:12:08,  7.63it/s]

{'loss': Array(0.2061822, dtype=float32), 'loss_cross_entropy': Array(0.19106594, dtype=float32)}


  3%|▎         | 33119/1000000 [1:31:15<26:34:04, 10.11it/s]

{'loss': Array(0.18671037, dtype=float32), 'loss_cross_entropy': Array(0.17286791, dtype=float32)}


  3%|▎         | 33129/1000000 [1:31:16<25:00:32, 10.74it/s]

{'loss': Array(0.212458, dtype=float32), 'loss_cross_entropy': Array(0.19583926, dtype=float32)}


  3%|▎         | 33139/1000000 [1:31:18<28:53:17,  9.30it/s]

{'loss': Array(0.20881727, dtype=float32), 'loss_cross_entropy': Array(0.19413209, dtype=float32)}


  3%|▎         | 33149/1000000 [1:31:19<25:27:28, 10.55it/s]

{'loss': Array(0.21766491, dtype=float32), 'loss_cross_entropy': Array(0.20096575, dtype=float32)}


  3%|▎         | 33159/1000000 [1:31:21<39:26:04,  6.81it/s]

{'loss': Array(0.19420156, dtype=float32), 'loss_cross_entropy': Array(0.17858033, dtype=float32)}


  3%|▎         | 33169/1000000 [1:31:22<27:18:23,  9.84it/s]

{'loss': Array(0.18906704, dtype=float32), 'loss_cross_entropy': Array(0.17390433, dtype=float32)}


  3%|▎         | 33179/1000000 [1:31:23<25:18:08, 10.61it/s]

{'loss': Array(0.18363355, dtype=float32), 'loss_cross_entropy': Array(0.16845708, dtype=float32)}


  3%|▎         | 33189/1000000 [1:31:25<31:28:48,  8.53it/s]

{'loss': Array(0.18931322, dtype=float32), 'loss_cross_entropy': Array(0.1739991, dtype=float32)}


  3%|▎         | 33199/1000000 [1:31:26<26:00:40, 10.32it/s]

{'loss': Array(0.20104554, dtype=float32), 'loss_cross_entropy': Array(0.18456094, dtype=float32)}


  3%|▎         | 33209/1000000 [1:31:28<35:16:42,  7.61it/s]

{'loss': Array(0.21751319, dtype=float32), 'loss_cross_entropy': Array(0.20147185, dtype=float32)}


  3%|▎         | 33219/1000000 [1:31:30<26:53:17,  9.99it/s]

{'loss': Array(0.20396677, dtype=float32), 'loss_cross_entropy': Array(0.18874808, dtype=float32)}


  3%|▎         | 33229/1000000 [1:31:31<35:31:04,  7.56it/s]

{'loss': Array(0.19486424, dtype=float32), 'loss_cross_entropy': Array(0.1800606, dtype=float32)}


  3%|▎         | 33239/1000000 [1:31:33<26:40:01, 10.07it/s]

{'loss': Array(0.2157079, dtype=float32), 'loss_cross_entropy': Array(0.20029242, dtype=float32)}


  3%|▎         | 33249/1000000 [1:31:34<25:06:19, 10.70it/s]

{'loss': Array(0.19396411, dtype=float32), 'loss_cross_entropy': Array(0.17961392, dtype=float32)}


  3%|▎         | 33259/1000000 [1:31:36<29:51:33,  8.99it/s]

{'loss': Array(0.23304777, dtype=float32), 'loss_cross_entropy': Array(0.21547706, dtype=float32)}


  3%|▎         | 33269/1000000 [1:31:37<25:36:27, 10.49it/s]

{'loss': Array(0.19372956, dtype=float32), 'loss_cross_entropy': Array(0.17924736, dtype=float32)}


  3%|▎         | 33279/1000000 [1:31:39<31:49:15,  8.44it/s]

{'loss': Array(0.18197651, dtype=float32), 'loss_cross_entropy': Array(0.16833653, dtype=float32)}


  3%|▎         | 33289/1000000 [1:31:40<26:40:50, 10.06it/s]

{'loss': Array(0.18592317, dtype=float32), 'loss_cross_entropy': Array(0.17125873, dtype=float32)}


  3%|▎         | 33299/1000000 [1:31:42<39:17:28,  6.83it/s]

{'loss': Array(0.20738147, dtype=float32), 'loss_cross_entropy': Array(0.19205318, dtype=float32)}


  3%|▎         | 33309/1000000 [1:31:43<27:29:35,  9.77it/s]

{'loss': Array(0.20698643, dtype=float32), 'loss_cross_entropy': Array(0.19205065, dtype=float32)}


  3%|▎         | 33319/1000000 [1:31:45<39:59:56,  6.71it/s]

{'loss': Array(0.19524114, dtype=float32), 'loss_cross_entropy': Array(0.18017133, dtype=float32)}


  3%|▎         | 33329/1000000 [1:31:46<27:17:36,  9.84it/s]

{'loss': Array(0.21102045, dtype=float32), 'loss_cross_entropy': Array(0.19423094, dtype=float32)}


  3%|▎         | 33339/1000000 [1:31:48<25:55:13, 10.36it/s]

{'loss': Array(0.21998392, dtype=float32), 'loss_cross_entropy': Array(0.2027349, dtype=float32)}


  3%|▎         | 33349/1000000 [1:31:49<35:14:23,  7.62it/s]

{'loss': Array(0.21214199, dtype=float32), 'loss_cross_entropy': Array(0.19720238, dtype=float32)}


  3%|▎         | 33359/1000000 [1:31:51<26:20:54, 10.19it/s]

{'loss': Array(0.19134788, dtype=float32), 'loss_cross_entropy': Array(0.17653151, dtype=float32)}


  3%|▎         | 33369/1000000 [1:31:52<39:17:11,  6.83it/s]

{'loss': Array(0.20172441, dtype=float32), 'loss_cross_entropy': Array(0.18567774, dtype=float32)}


  3%|▎         | 33379/1000000 [1:31:54<27:30:27,  9.76it/s]

{'loss': Array(0.19383714, dtype=float32), 'loss_cross_entropy': Array(0.17907558, dtype=float32)}


  3%|▎         | 33389/1000000 [1:31:55<25:12:51, 10.65it/s]

{'loss': Array(0.18030114, dtype=float32), 'loss_cross_entropy': Array(0.16669397, dtype=float32)}


  3%|▎         | 33399/1000000 [1:31:57<28:34:07,  9.40it/s]

{'loss': Array(0.18816684, dtype=float32), 'loss_cross_entropy': Array(0.17362686, dtype=float32)}


  3%|▎         | 33409/1000000 [1:31:58<25:17:02, 10.62it/s]

{'loss': Array(0.20311196, dtype=float32), 'loss_cross_entropy': Array(0.1880509, dtype=float32)}


  3%|▎         | 33419/1000000 [1:32:00<29:39:29,  9.05it/s]

{'loss': Array(0.20856969, dtype=float32), 'loss_cross_entropy': Array(0.19301014, dtype=float32)}


  3%|▎         | 33429/1000000 [1:32:01<27:05:18,  9.91it/s]

{'loss': Array(0.17051403, dtype=float32), 'loss_cross_entropy': Array(0.15673009, dtype=float32)}


  3%|▎         | 33439/1000000 [1:32:03<39:38:11,  6.77it/s]

{'loss': Array(0.18297242, dtype=float32), 'loss_cross_entropy': Array(0.16882794, dtype=float32)}


  3%|▎         | 33449/1000000 [1:32:04<27:00:45,  9.94it/s]

{'loss': Array(0.18727687, dtype=float32), 'loss_cross_entropy': Array(0.17239557, dtype=float32)}


  3%|▎         | 33459/1000000 [1:32:06<24:55:42, 10.77it/s]

{'loss': Array(0.19951569, dtype=float32), 'loss_cross_entropy': Array(0.18343978, dtype=float32)}


  3%|▎         | 33469/1000000 [1:32:08<32:14:15,  8.33it/s]

{'loss': Array(0.20807175, dtype=float32), 'loss_cross_entropy': Array(0.19263063, dtype=float32)}


  3%|▎         | 33479/1000000 [1:32:09<25:57:22, 10.34it/s]

{'loss': Array(0.19572365, dtype=float32), 'loss_cross_entropy': Array(0.18118532, dtype=float32)}


  3%|▎         | 33489/1000000 [1:32:11<39:32:53,  6.79it/s]

{'loss': Array(0.18127695, dtype=float32), 'loss_cross_entropy': Array(0.1672654, dtype=float32)}


  3%|▎         | 33499/1000000 [1:32:12<27:28:14,  9.77it/s]

{'loss': Array(0.19161484, dtype=float32), 'loss_cross_entropy': Array(0.17626686, dtype=float32)}


  3%|▎         | 33509/1000000 [1:32:20<88:11:37,  3.04it/s] 

{'loss': Array(0.22139914, dtype=float32), 'loss_cross_entropy': Array(0.20442128, dtype=float32)}


  3%|▎         | 33519/1000000 [1:32:22<40:40:31,  6.60it/s]

{'loss': Array(0.19949381, dtype=float32), 'loss_cross_entropy': Array(0.1843689, dtype=float32)}


  3%|▎         | 33529/1000000 [1:32:23<27:21:29,  9.81it/s]

{'loss': Array(0.19643731, dtype=float32), 'loss_cross_entropy': Array(0.18204236, dtype=float32)}


  3%|▎         | 33539/1000000 [1:32:25<32:20:57,  8.30it/s]

{'loss': Array(0.19428645, dtype=float32), 'loss_cross_entropy': Array(0.1797858, dtype=float32)}


  3%|▎         | 33549/1000000 [1:32:26<25:56:44, 10.35it/s]

{'loss': Array(0.21140221, dtype=float32), 'loss_cross_entropy': Array(0.19601756, dtype=float32)}


  3%|▎         | 33559/1000000 [1:32:28<39:37:40,  6.77it/s]

{'loss': Array(0.1994117, dtype=float32), 'loss_cross_entropy': Array(0.18458681, dtype=float32)}


  3%|▎         | 33569/1000000 [1:32:29<27:07:02,  9.90it/s]

{'loss': Array(0.20512593, dtype=float32), 'loss_cross_entropy': Array(0.18915954, dtype=float32)}


  3%|▎         | 33579/1000000 [1:32:30<25:48:05, 10.40it/s]

{'loss': Array(0.2036182, dtype=float32), 'loss_cross_entropy': Array(0.18841222, dtype=float32)}


  3%|▎         | 33589/1000000 [1:32:32<28:56:21,  9.28it/s]

{'loss': Array(0.17466561, dtype=float32), 'loss_cross_entropy': Array(0.16061603, dtype=float32)}


  3%|▎         | 33599/1000000 [1:32:34<26:10:17, 10.26it/s]

{'loss': Array(0.19829106, dtype=float32), 'loss_cross_entropy': Array(0.18296894, dtype=float32)}


  3%|▎         | 33609/1000000 [1:32:35<39:31:42,  6.79it/s]

{'loss': Array(0.20543371, dtype=float32), 'loss_cross_entropy': Array(0.1905006, dtype=float32)}


  3%|▎         | 33619/1000000 [1:32:37<27:43:26,  9.68it/s]

{'loss': Array(0.21356185, dtype=float32), 'loss_cross_entropy': Array(0.19770816, dtype=float32)}


  3%|▎         | 33629/1000000 [1:32:38<25:16:40, 10.62it/s]

{'loss': Array(0.1925593, dtype=float32), 'loss_cross_entropy': Array(0.17788823, dtype=float32)}


  3%|▎         | 33639/1000000 [1:32:40<29:46:40,  9.01it/s]

{'loss': Array(0.20490156, dtype=float32), 'loss_cross_entropy': Array(0.19017267, dtype=float32)}


  3%|▎         | 33649/1000000 [1:32:41<25:40:39, 10.45it/s]

{'loss': Array(0.212422, dtype=float32), 'loss_cross_entropy': Array(0.19575343, dtype=float32)}


  3%|▎         | 33659/1000000 [1:32:43<35:16:50,  7.61it/s]

{'loss': Array(0.19414859, dtype=float32), 'loss_cross_entropy': Array(0.17946951, dtype=float32)}


  3%|▎         | 33669/1000000 [1:32:44<26:40:10, 10.06it/s]

{'loss': Array(0.20574044, dtype=float32), 'loss_cross_entropy': Array(0.19043152, dtype=float32)}


  3%|▎         | 33679/1000000 [1:32:46<39:45:00,  6.75it/s]

{'loss': Array(0.1822743, dtype=float32), 'loss_cross_entropy': Array(0.16757523, dtype=float32)}


  3%|▎         | 33689/1000000 [1:32:47<27:45:13,  9.67it/s]

{'loss': Array(0.19139884, dtype=float32), 'loss_cross_entropy': Array(0.17703983, dtype=float32)}


  3%|▎         | 33699/1000000 [1:32:49<25:19:54, 10.60it/s]

{'loss': Array(0.19261871, dtype=float32), 'loss_cross_entropy': Array(0.17808414, dtype=float32)}


  3%|▎         | 33709/1000000 [1:32:50<31:42:51,  8.46it/s]

{'loss': Array(0.21178268, dtype=float32), 'loss_cross_entropy': Array(0.19550776, dtype=float32)}


  3%|▎         | 33719/1000000 [1:32:52<25:44:16, 10.43it/s]

{'loss': Array(0.19744588, dtype=float32), 'loss_cross_entropy': Array(0.18253198, dtype=float32)}


  3%|▎         | 33729/1000000 [1:32:53<34:59:17,  7.67it/s]

{'loss': Array(0.19682711, dtype=float32), 'loss_cross_entropy': Array(0.18259194, dtype=float32)}


  3%|▎         | 33739/1000000 [1:32:55<26:08:49, 10.27it/s]

{'loss': Array(0.18676649, dtype=float32), 'loss_cross_entropy': Array(0.17190544, dtype=float32)}


  3%|▎         | 33749/1000000 [1:32:56<25:10:06, 10.66it/s]

{'loss': Array(0.22022982, dtype=float32), 'loss_cross_entropy': Array(0.20430437, dtype=float32)}


  3%|▎         | 33759/1000000 [1:32:58<28:18:56,  9.48it/s]

{'loss': Array(0.16757312, dtype=float32), 'loss_cross_entropy': Array(0.1555959, dtype=float32)}


  3%|▎         | 33769/1000000 [1:32:59<26:23:14, 10.17it/s]

{'loss': Array(0.2184465, dtype=float32), 'loss_cross_entropy': Array(0.20249434, dtype=float32)}


  3%|▎         | 33779/1000000 [1:33:01<40:37:11,  6.61it/s]

{'loss': Array(0.1968395, dtype=float32), 'loss_cross_entropy': Array(0.18204026, dtype=float32)}


  3%|▎         | 33789/1000000 [1:33:02<28:07:23,  9.54it/s]

{'loss': Array(0.20909739, dtype=float32), 'loss_cross_entropy': Array(0.19311285, dtype=float32)}


  3%|▎         | 33799/1000000 [1:33:04<25:31:33, 10.51it/s]

{'loss': Array(0.1833184, dtype=float32), 'loss_cross_entropy': Array(0.16950354, dtype=float32)}


  3%|▎         | 33809/1000000 [1:33:05<31:52:55,  8.42it/s]

{'loss': Array(0.18416679, dtype=float32), 'loss_cross_entropy': Array(0.1690139, dtype=float32)}


  3%|▎         | 33819/1000000 [1:33:07<25:32:51, 10.51it/s]

{'loss': Array(0.1875767, dtype=float32), 'loss_cross_entropy': Array(0.17262797, dtype=float32)}


  3%|▎         | 33829/1000000 [1:33:08<35:38:43,  7.53it/s]

{'loss': Array(0.19025339, dtype=float32), 'loss_cross_entropy': Array(0.17586349, dtype=float32)}


  3%|▎         | 33839/1000000 [1:33:10<27:07:46,  9.89it/s]

{'loss': Array(0.17733216, dtype=float32), 'loss_cross_entropy': Array(0.16332847, dtype=float32)}


  3%|▎         | 33849/1000000 [1:33:12<36:22:10,  7.38it/s]

{'loss': Array(0.20355654, dtype=float32), 'loss_cross_entropy': Array(0.18800248, dtype=float32)}


  3%|▎         | 33859/1000000 [1:33:13<26:52:10,  9.99it/s]

{'loss': Array(0.20017247, dtype=float32), 'loss_cross_entropy': Array(0.18449719, dtype=float32)}


  3%|▎         | 33869/1000000 [1:33:14<25:36:57, 10.48it/s]

{'loss': Array(0.186702, dtype=float32), 'loss_cross_entropy': Array(0.17217484, dtype=float32)}


  3%|▎         | 33879/1000000 [1:33:16<29:47:54,  9.01it/s]

{'loss': Array(0.17958508, dtype=float32), 'loss_cross_entropy': Array(0.16493705, dtype=float32)}


  3%|▎         | 33889/1000000 [1:33:17<26:11:56, 10.24it/s]

{'loss': Array(0.20558445, dtype=float32), 'loss_cross_entropy': Array(0.19097148, dtype=float32)}


  3%|▎         | 33899/1000000 [1:33:19<32:05:26,  8.36it/s]

{'loss': Array(0.2162851, dtype=float32), 'loss_cross_entropy': Array(0.20036049, dtype=float32)}


  3%|▎         | 33909/1000000 [1:33:20<26:24:28, 10.16it/s]

{'loss': Array(0.20212483, dtype=float32), 'loss_cross_entropy': Array(0.18740807, dtype=float32)}


  3%|▎         | 33919/1000000 [1:33:22<39:26:18,  6.80it/s]

{'loss': Array(0.19520615, dtype=float32), 'loss_cross_entropy': Array(0.18113212, dtype=float32)}


  3%|▎         | 33929/1000000 [1:33:24<27:58:33,  9.59it/s]

{'loss': Array(0.19175838, dtype=float32), 'loss_cross_entropy': Array(0.1776091, dtype=float32)}


  3%|▎         | 33939/1000000 [1:33:25<40:17:52,  6.66it/s]

{'loss': Array(0.18394957, dtype=float32), 'loss_cross_entropy': Array(0.16994868, dtype=float32)}


  3%|▎         | 33949/1000000 [1:33:27<27:29:27,  9.76it/s]

{'loss': Array(0.1993431, dtype=float32), 'loss_cross_entropy': Array(0.18374543, dtype=float32)}


  3%|▎         | 33959/1000000 [1:33:28<25:24:42, 10.56it/s]

{'loss': Array(0.1942538, dtype=float32), 'loss_cross_entropy': Array(0.18022397, dtype=float32)}


  3%|▎         | 33969/1000000 [1:33:30<34:21:12,  7.81it/s]

{'loss': Array(0.20373125, dtype=float32), 'loss_cross_entropy': Array(0.18812831, dtype=float32)}


  3%|▎         | 33979/1000000 [1:33:31<26:36:23, 10.09it/s]

{'loss': Array(0.19490336, dtype=float32), 'loss_cross_entropy': Array(0.17974769, dtype=float32)}


  3%|▎         | 33989/1000000 [1:33:33<38:56:35,  6.89it/s]

{'loss': Array(0.19294493, dtype=float32), 'loss_cross_entropy': Array(0.1775298, dtype=float32)}


  3%|▎         | 33999/1000000 [1:33:34<26:52:31,  9.98it/s]

{'loss': Array(0.19333903, dtype=float32), 'loss_cross_entropy': Array(0.17841154, dtype=float32)}


  3%|▎         | 34009/1000000 [1:33:42<88:39:47,  3.03it/s] 

{'loss': Array(0.20650266, dtype=float32), 'loss_cross_entropy': Array(0.1901611, dtype=float32)}


  3%|▎         | 34019/1000000 [1:33:44<39:24:46,  6.81it/s] 

{'loss': Array(0.21272634, dtype=float32), 'loss_cross_entropy': Array(0.19652444, dtype=float32)}


  3%|▎         | 34029/1000000 [1:33:45<27:06:17,  9.90it/s]

{'loss': Array(0.18983942, dtype=float32), 'loss_cross_entropy': Array(0.17462113, dtype=float32)}


  3%|▎         | 34039/1000000 [1:33:47<28:14:06,  9.50it/s]

{'loss': Array(0.18712063, dtype=float32), 'loss_cross_entropy': Array(0.17315452, dtype=float32)}


  3%|▎         | 34049/1000000 [1:33:48<25:29:47, 10.52it/s]

{'loss': Array(0.19924973, dtype=float32), 'loss_cross_entropy': Array(0.18423362, dtype=float32)}


  3%|▎         | 34059/1000000 [1:33:50<39:57:34,  6.71it/s]

{'loss': Array(0.18353193, dtype=float32), 'loss_cross_entropy': Array(0.16983752, dtype=float32)}


  3%|▎         | 34069/1000000 [1:33:51<27:02:43,  9.92it/s]

{'loss': Array(0.21475, dtype=float32), 'loss_cross_entropy': Array(0.19893591, dtype=float32)}


  3%|▎         | 34079/1000000 [1:33:53<25:06:50, 10.68it/s]

{'loss': Array(0.1926823, dtype=float32), 'loss_cross_entropy': Array(0.17743354, dtype=float32)}


  3%|▎         | 34089/1000000 [1:33:54<31:40:37,  8.47it/s]

{'loss': Array(0.18758301, dtype=float32), 'loss_cross_entropy': Array(0.17393024, dtype=float32)}


  3%|▎         | 34099/1000000 [1:33:56<25:40:56, 10.45it/s]

{'loss': Array(0.20102353, dtype=float32), 'loss_cross_entropy': Array(0.18621735, dtype=float32)}


  3%|▎         | 34109/1000000 [1:33:57<39:11:41,  6.85it/s]

{'loss': Array(0.19121082, dtype=float32), 'loss_cross_entropy': Array(0.17655125, dtype=float32)}


  3%|▎         | 34119/1000000 [1:33:59<27:36:36,  9.72it/s]

{'loss': Array(0.20596693, dtype=float32), 'loss_cross_entropy': Array(0.19089751, dtype=float32)}


  3%|▎         | 34129/1000000 [1:34:00<26:22:10, 10.17it/s]

{'loss': Array(0.19990347, dtype=float32), 'loss_cross_entropy': Array(0.18515694, dtype=float32)}


  3%|▎         | 34139/1000000 [1:34:02<30:18:23,  8.85it/s]

{'loss': Array(0.20222838, dtype=float32), 'loss_cross_entropy': Array(0.18660818, dtype=float32)}


  3%|▎         | 34149/1000000 [1:34:03<25:31:28, 10.51it/s]

{'loss': Array(0.22060286, dtype=float32), 'loss_cross_entropy': Array(0.20406687, dtype=float32)}


  3%|▎         | 34159/1000000 [1:34:05<35:29:05,  7.56it/s]

{'loss': Array(0.20760047, dtype=float32), 'loss_cross_entropy': Array(0.19286834, dtype=float32)}


  3%|▎         | 34169/1000000 [1:34:06<27:06:48,  9.89it/s]

{'loss': Array(0.20190907, dtype=float32), 'loss_cross_entropy': Array(0.18564463, dtype=float32)}


  3%|▎         | 34179/1000000 [1:34:08<25:26:28, 10.55it/s]

{'loss': Array(0.20005165, dtype=float32), 'loss_cross_entropy': Array(0.18597221, dtype=float32)}


  3%|▎         | 34189/1000000 [1:34:09<31:44:27,  8.45it/s]

{'loss': Array(0.19905977, dtype=float32), 'loss_cross_entropy': Array(0.1844172, dtype=float32)}


  3%|▎         | 34199/1000000 [1:34:11<25:51:07, 10.38it/s]

{'loss': Array(0.19419894, dtype=float32), 'loss_cross_entropy': Array(0.18015586, dtype=float32)}


  3%|▎         | 34209/1000000 [1:34:12<24:52:33, 10.78it/s]

{'loss': Array(0.21395056, dtype=float32), 'loss_cross_entropy': Array(0.19806646, dtype=float32)}


  3%|▎         | 34219/1000000 [1:34:14<28:02:47,  9.57it/s]

{'loss': Array(0.21435516, dtype=float32), 'loss_cross_entropy': Array(0.19877201, dtype=float32)}


  3%|▎         | 34229/1000000 [1:34:15<25:39:54, 10.45it/s]

{'loss': Array(0.20835887, dtype=float32), 'loss_cross_entropy': Array(0.19323221, dtype=float32)}


  3%|▎         | 34239/1000000 [1:34:17<32:26:10,  8.27it/s]

{'loss': Array(0.20175326, dtype=float32), 'loss_cross_entropy': Array(0.18668549, dtype=float32)}


  3%|▎         | 34249/1000000 [1:34:18<26:52:39,  9.98it/s]

{'loss': Array(0.2009828, dtype=float32), 'loss_cross_entropy': Array(0.18636642, dtype=float32)}


  3%|▎         | 34259/1000000 [1:34:20<40:14:14,  6.67it/s]

{'loss': Array(0.20378259, dtype=float32), 'loss_cross_entropy': Array(0.18888119, dtype=float32)}


  3%|▎         | 34269/1000000 [1:34:21<27:12:57,  9.86it/s]

{'loss': Array(0.18556629, dtype=float32), 'loss_cross_entropy': Array(0.1718108, dtype=float32)}


  3%|▎         | 34279/1000000 [1:34:23<39:20:02,  6.82it/s]

{'loss': Array(0.21846211, dtype=float32), 'loss_cross_entropy': Array(0.20177038, dtype=float32)}


  3%|▎         | 34289/1000000 [1:34:24<27:04:56,  9.91it/s]

{'loss': Array(0.21318734, dtype=float32), 'loss_cross_entropy': Array(0.19784164, dtype=float32)}


  3%|▎         | 34299/1000000 [1:34:26<25:24:31, 10.56it/s]

{'loss': Array(0.20174284, dtype=float32), 'loss_cross_entropy': Array(0.18680261, dtype=float32)}


  3%|▎         | 34309/1000000 [1:34:27<34:42:34,  7.73it/s]

{'loss': Array(0.18722795, dtype=float32), 'loss_cross_entropy': Array(0.17293958, dtype=float32)}


  3%|▎         | 34319/1000000 [1:34:29<26:33:44, 10.10it/s]

{'loss': Array(0.21321361, dtype=float32), 'loss_cross_entropy': Array(0.19719434, dtype=float32)}


  3%|▎         | 34329/1000000 [1:34:31<40:03:25,  6.70it/s]

{'loss': Array(0.18419293, dtype=float32), 'loss_cross_entropy': Array(0.1700556, dtype=float32)}


  3%|▎         | 34339/1000000 [1:34:32<27:52:13,  9.62it/s]

{'loss': Array(0.19619992, dtype=float32), 'loss_cross_entropy': Array(0.18178193, dtype=float32)}


  3%|▎         | 34349/1000000 [1:34:33<26:04:50, 10.28it/s]

{'loss': Array(0.20299032, dtype=float32), 'loss_cross_entropy': Array(0.1885653, dtype=float32)}


  3%|▎         | 34359/1000000 [1:34:35<28:08:40,  9.53it/s]

{'loss': Array(0.19399513, dtype=float32), 'loss_cross_entropy': Array(0.17903607, dtype=float32)}


  3%|▎         | 34369/1000000 [1:34:36<25:49:33, 10.39it/s]

{'loss': Array(0.23044877, dtype=float32), 'loss_cross_entropy': Array(0.21438423, dtype=float32)}


  3%|▎         | 34379/1000000 [1:34:38<28:06:03,  9.55it/s]

{'loss': Array(0.19659576, dtype=float32), 'loss_cross_entropy': Array(0.18208373, dtype=float32)}


  3%|▎         | 34389/1000000 [1:34:39<24:53:31, 10.78it/s]

{'loss': Array(0.20418075, dtype=float32), 'loss_cross_entropy': Array(0.1884539, dtype=float32)}


  3%|▎         | 34399/1000000 [1:34:41<39:13:47,  6.84it/s]

{'loss': Array(0.1998494, dtype=float32), 'loss_cross_entropy': Array(0.1840481, dtype=float32)}


  3%|▎         | 34409/1000000 [1:34:43<27:47:32,  9.65it/s]

{'loss': Array(0.1806181, dtype=float32), 'loss_cross_entropy': Array(0.16626559, dtype=float32)}


  3%|▎         | 34419/1000000 [1:34:44<25:52:09, 10.37it/s]

{'loss': Array(0.2026712, dtype=float32), 'loss_cross_entropy': Array(0.18836035, dtype=float32)}


  3%|▎         | 34429/1000000 [1:34:46<31:57:01,  8.39it/s]

{'loss': Array(0.18785773, dtype=float32), 'loss_cross_entropy': Array(0.17338741, dtype=float32)}


  3%|▎         | 34439/1000000 [1:34:47<25:57:55, 10.33it/s]

{'loss': Array(0.19203688, dtype=float32), 'loss_cross_entropy': Array(0.17652218, dtype=float32)}


  3%|▎         | 34449/1000000 [1:34:49<39:26:56,  6.80it/s]

{'loss': Array(0.20406833, dtype=float32), 'loss_cross_entropy': Array(0.18924361, dtype=float32)}


  3%|▎         | 34459/1000000 [1:34:50<27:26:04,  9.78it/s]

{'loss': Array(0.20294967, dtype=float32), 'loss_cross_entropy': Array(0.1881092, dtype=float32)}


  3%|▎         | 34469/1000000 [1:34:51<25:40:29, 10.45it/s]

{'loss': Array(0.20134802, dtype=float32), 'loss_cross_entropy': Array(0.1858599, dtype=float32)}


  3%|▎         | 34479/1000000 [1:34:53<29:23:46,  9.12it/s]

{'loss': Array(0.19690122, dtype=float32), 'loss_cross_entropy': Array(0.18184412, dtype=float32)}


  3%|▎         | 34489/1000000 [1:34:55<25:49:05, 10.39it/s]

{'loss': Array(0.15375778, dtype=float32), 'loss_cross_entropy': Array(0.14244467, dtype=float32)}


  3%|▎         | 34499/1000000 [1:34:56<35:05:43,  7.64it/s]

{'loss': Array(0.21593602, dtype=float32), 'loss_cross_entropy': Array(0.20031062, dtype=float32)}


  3%|▎         | 34509/1000000 [1:35:04<89:04:24,  3.01it/s] 

{'loss': Array(0.21090744, dtype=float32), 'loss_cross_entropy': Array(0.19540411, dtype=float32)}


  3%|▎         | 34519/1000000 [1:35:06<46:49:32,  5.73it/s] 

{'loss': Array(0.20847896, dtype=float32), 'loss_cross_entropy': Array(0.19240049, dtype=float32)}


  3%|▎         | 34529/1000000 [1:35:07<29:24:27,  9.12it/s]

{'loss': Array(0.18174146, dtype=float32), 'loss_cross_entropy': Array(0.16788949, dtype=float32)}


  3%|▎         | 34539/1000000 [1:35:09<26:19:59, 10.18it/s]

{'loss': Array(0.19021963, dtype=float32), 'loss_cross_entropy': Array(0.17502342, dtype=float32)}


  3%|▎         | 34549/1000000 [1:35:10<32:02:32,  8.37it/s]

{'loss': Array(0.18749768, dtype=float32), 'loss_cross_entropy': Array(0.17266391, dtype=float32)}


  3%|▎         | 34559/1000000 [1:35:12<26:17:42, 10.20it/s]

{'loss': Array(0.1838953, dtype=float32), 'loss_cross_entropy': Array(0.16966468, dtype=float32)}


  3%|▎         | 34569/1000000 [1:35:13<25:07:51, 10.67it/s]

{'loss': Array(0.20740397, dtype=float32), 'loss_cross_entropy': Array(0.19141856, dtype=float32)}


  3%|▎         | 34579/1000000 [1:35:15<28:18:13,  9.47it/s]

{'loss': Array(0.21820875, dtype=float32), 'loss_cross_entropy': Array(0.20146659, dtype=float32)}


  3%|▎         | 34589/1000000 [1:35:16<25:27:33, 10.53it/s]

{'loss': Array(0.19152851, dtype=float32), 'loss_cross_entropy': Array(0.17781961, dtype=float32)}


  3%|▎         | 34599/1000000 [1:35:18<31:58:25,  8.39it/s]

{'loss': Array(0.19045094, dtype=float32), 'loss_cross_entropy': Array(0.1762958, dtype=float32)}


  3%|▎         | 34609/1000000 [1:35:19<26:56:25,  9.95it/s]

{'loss': Array(0.19806342, dtype=float32), 'loss_cross_entropy': Array(0.18346284, dtype=float32)}


  3%|▎         | 34619/1000000 [1:35:21<35:46:12,  7.50it/s]

{'loss': Array(0.22291596, dtype=float32), 'loss_cross_entropy': Array(0.20663524, dtype=float32)}


  3%|▎         | 34629/1000000 [1:35:23<27:21:30,  9.80it/s]

{'loss': Array(0.193456, dtype=float32), 'loss_cross_entropy': Array(0.17841934, dtype=float32)}


  3%|▎         | 34639/1000000 [1:35:24<25:09:09, 10.66it/s]

{'loss': Array(0.19642289, dtype=float32), 'loss_cross_entropy': Array(0.18137033, dtype=float32)}


  3%|▎         | 34649/1000000 [1:35:26<32:18:08,  8.30it/s]

{'loss': Array(0.17847292, dtype=float32), 'loss_cross_entropy': Array(0.1645485, dtype=float32)}


  3%|▎         | 34659/1000000 [1:35:27<25:46:02, 10.41it/s]

{'loss': Array(0.18707566, dtype=float32), 'loss_cross_entropy': Array(0.17325118, dtype=float32)}


  3%|▎         | 34669/1000000 [1:35:28<24:54:00, 10.77it/s]

{'loss': Array(0.22030869, dtype=float32), 'loss_cross_entropy': Array(0.20434277, dtype=float32)}


  3%|▎         | 34679/1000000 [1:35:30<28:07:50,  9.53it/s]

{'loss': Array(0.19011739, dtype=float32), 'loss_cross_entropy': Array(0.17566307, dtype=float32)}


  3%|▎         | 34689/1000000 [1:35:31<27:26:01,  9.77it/s]

{'loss': Array(0.19025505, dtype=float32), 'loss_cross_entropy': Array(0.1757282, dtype=float32)}


  3%|▎         | 34699/1000000 [1:35:33<31:44:17,  8.45it/s]

{'loss': Array(0.22008872, dtype=float32), 'loss_cross_entropy': Array(0.20383191, dtype=float32)}


  3%|▎         | 34709/1000000 [1:35:35<26:06:05, 10.27it/s]

{'loss': Array(0.21688297, dtype=float32), 'loss_cross_entropy': Array(0.20143037, dtype=float32)}


  3%|▎         | 34719/1000000 [1:35:36<40:18:26,  6.65it/s]

{'loss': Array(0.18971291, dtype=float32), 'loss_cross_entropy': Array(0.17591715, dtype=float32)}


  3%|▎         | 34729/1000000 [1:35:38<27:33:35,  9.73it/s]

{'loss': Array(0.19591014, dtype=float32), 'loss_cross_entropy': Array(0.18262553, dtype=float32)}


  3%|▎         | 34739/1000000 [1:35:39<25:05:25, 10.69it/s]

{'loss': Array(0.21750963, dtype=float32), 'loss_cross_entropy': Array(0.20134735, dtype=float32)}


  3%|▎         | 34749/1000000 [1:35:41<34:30:05,  7.77it/s]

{'loss': Array(0.21021938, dtype=float32), 'loss_cross_entropy': Array(0.19491102, dtype=float32)}


  3%|▎         | 34759/1000000 [1:35:42<26:02:11, 10.30it/s]

{'loss': Array(0.18176877, dtype=float32), 'loss_cross_entropy': Array(0.16820513, dtype=float32)}


  3%|▎         | 34769/1000000 [1:35:43<24:50:02, 10.80it/s]

{'loss': Array(0.17440107, dtype=float32), 'loss_cross_entropy': Array(0.16017246, dtype=float32)}


  3%|▎         | 34779/1000000 [1:35:45<29:19:40,  9.14it/s]

{'loss': Array(0.21827121, dtype=float32), 'loss_cross_entropy': Array(0.20166035, dtype=float32)}


  3%|▎         | 34789/1000000 [1:35:46<25:15:25, 10.62it/s]

{'loss': Array(0.19972385, dtype=float32), 'loss_cross_entropy': Array(0.18449119, dtype=float32)}


  3%|▎         | 34799/1000000 [1:35:48<32:01:51,  8.37it/s]

{'loss': Array(0.19663993, dtype=float32), 'loss_cross_entropy': Array(0.18110566, dtype=float32)}


  3%|▎         | 34809/1000000 [1:35:49<26:30:34, 10.11it/s]

{'loss': Array(0.20873843, dtype=float32), 'loss_cross_entropy': Array(0.19280419, dtype=float32)}


  3%|▎         | 34819/1000000 [1:35:51<25:00:25, 10.72it/s]

{'loss': Array(0.20051073, dtype=float32), 'loss_cross_entropy': Array(0.1857888, dtype=float32)}


  3%|▎         | 34829/1000000 [1:35:52<28:25:56,  9.43it/s]

{'loss': Array(0.19621876, dtype=float32), 'loss_cross_entropy': Array(0.18089108, dtype=float32)}


  3%|▎         | 34839/1000000 [1:35:54<25:38:15, 10.46it/s]

{'loss': Array(0.20327392, dtype=float32), 'loss_cross_entropy': Array(0.18795978, dtype=float32)}


  3%|▎         | 34849/1000000 [1:35:56<35:03:21,  7.65it/s]

{'loss': Array(0.20547791, dtype=float32), 'loss_cross_entropy': Array(0.19035819, dtype=float32)}


  3%|▎         | 34859/1000000 [1:35:57<26:23:51, 10.16it/s]

{'loss': Array(0.19398625, dtype=float32), 'loss_cross_entropy': Array(0.1784705, dtype=float32)}


  3%|▎         | 34869/1000000 [1:35:58<25:54:55, 10.34it/s]

{'loss': Array(0.19949383, dtype=float32), 'loss_cross_entropy': Array(0.18423977, dtype=float32)}


  3%|▎         | 34879/1000000 [1:36:00<29:19:22,  9.14it/s]

{'loss': Array(0.21996827, dtype=float32), 'loss_cross_entropy': Array(0.20458367, dtype=float32)}


  3%|▎         | 34889/1000000 [1:36:02<26:24:02, 10.15it/s]

{'loss': Array(0.20333126, dtype=float32), 'loss_cross_entropy': Array(0.18826392, dtype=float32)}


  3%|▎         | 34899/1000000 [1:36:03<30:33:33,  8.77it/s]

{'loss': Array(0.20645201, dtype=float32), 'loss_cross_entropy': Array(0.1910997, dtype=float32)}


  3%|▎         | 34909/1000000 [1:36:05<26:29:22, 10.12it/s]

{'loss': Array(0.20535974, dtype=float32), 'loss_cross_entropy': Array(0.18988557, dtype=float32)}


  3%|▎         | 34919/1000000 [1:36:06<39:30:47,  6.78it/s]

{'loss': Array(0.18295579, dtype=float32), 'loss_cross_entropy': Array(0.1689382, dtype=float32)}


  3%|▎         | 34929/1000000 [1:36:08<27:40:00,  9.69it/s]

{'loss': Array(0.1946816, dtype=float32), 'loss_cross_entropy': Array(0.18040493, dtype=float32)}


  3%|▎         | 34939/1000000 [1:36:09<25:49:21, 10.38it/s]

{'loss': Array(0.2234954, dtype=float32), 'loss_cross_entropy': Array(0.20804901, dtype=float32)}


  3%|▎         | 34949/1000000 [1:36:11<31:22:47,  8.54it/s]

{'loss': Array(0.19246082, dtype=float32), 'loss_cross_entropy': Array(0.17820232, dtype=float32)}


  3%|▎         | 34959/1000000 [1:36:12<25:25:08, 10.55it/s]

{'loss': Array(0.17035556, dtype=float32), 'loss_cross_entropy': Array(0.15700714, dtype=float32)}


  3%|▎         | 34969/1000000 [1:36:14<34:50:54,  7.69it/s]

{'loss': Array(0.20248292, dtype=float32), 'loss_cross_entropy': Array(0.18695356, dtype=float32)}


  3%|▎         | 34979/1000000 [1:36:15<26:23:58, 10.15it/s]

{'loss': Array(0.19692364, dtype=float32), 'loss_cross_entropy': Array(0.18153292, dtype=float32)}


  3%|▎         | 34989/1000000 [1:36:17<34:46:17,  7.71it/s]

{'loss': Array(0.19226198, dtype=float32), 'loss_cross_entropy': Array(0.17664713, dtype=float32)}


  3%|▎         | 34999/1000000 [1:36:18<26:17:54, 10.19it/s]

{'loss': Array(0.17912234, dtype=float32), 'loss_cross_entropy': Array(0.16587323, dtype=float32)}


  4%|▎         | 35009/1000000 [1:36:26<89:03:34,  3.01it/s] 

{'loss': Array(0.21642132, dtype=float32), 'loss_cross_entropy': Array(0.20152672, dtype=float32)}


  4%|▎         | 35019/1000000 [1:36:28<41:28:20,  6.46it/s]

{'loss': Array(0.19614863, dtype=float32), 'loss_cross_entropy': Array(0.18250078, dtype=float32)}


  4%|▎         | 35029/1000000 [1:36:29<28:25:49,  9.43it/s]

{'loss': Array(0.20220374, dtype=float32), 'loss_cross_entropy': Array(0.18748379, dtype=float32)}


  4%|▎         | 35039/1000000 [1:36:31<35:48:54,  7.48it/s]

{'loss': Array(0.1776161, dtype=float32), 'loss_cross_entropy': Array(0.16406377, dtype=float32)}


  4%|▎         | 35049/1000000 [1:36:33<26:27:47, 10.13it/s]

{'loss': Array(0.19506428, dtype=float32), 'loss_cross_entropy': Array(0.1804045, dtype=float32)}


  4%|▎         | 35059/1000000 [1:36:34<40:20:02,  6.65it/s]

{'loss': Array(0.21524723, dtype=float32), 'loss_cross_entropy': Array(0.20032358, dtype=float32)}


  4%|▎         | 35069/1000000 [1:36:36<27:32:46,  9.73it/s]

{'loss': Array(0.21028844, dtype=float32), 'loss_cross_entropy': Array(0.19582902, dtype=float32)}


  4%|▎         | 35079/1000000 [1:36:37<25:41:16, 10.43it/s]

{'loss': Array(0.17724347, dtype=float32), 'loss_cross_entropy': Array(0.16359244, dtype=float32)}


  4%|▎         | 35089/1000000 [1:36:39<33:11:26,  8.08it/s]

{'loss': Array(0.21143293, dtype=float32), 'loss_cross_entropy': Array(0.19436859, dtype=float32)}


  4%|▎         | 35099/1000000 [1:36:40<26:36:36, 10.07it/s]

{'loss': Array(0.19244711, dtype=float32), 'loss_cross_entropy': Array(0.17799321, dtype=float32)}


  4%|▎         | 35109/1000000 [1:36:42<35:35:47,  7.53it/s]

{'loss': Array(0.19888249, dtype=float32), 'loss_cross_entropy': Array(0.18406044, dtype=float32)}


  4%|▎         | 35119/1000000 [1:36:43<26:36:26, 10.07it/s]

{'loss': Array(0.19698219, dtype=float32), 'loss_cross_entropy': Array(0.18194209, dtype=float32)}


  4%|▎         | 35129/1000000 [1:36:45<26:06:22, 10.27it/s]

{'loss': Array(0.20595042, dtype=float32), 'loss_cross_entropy': Array(0.19032668, dtype=float32)}


  4%|▎         | 35139/1000000 [1:36:47<28:41:06,  9.34it/s]

{'loss': Array(0.213491, dtype=float32), 'loss_cross_entropy': Array(0.19897893, dtype=float32)}


  4%|▎         | 35149/1000000 [1:36:48<25:26:14, 10.54it/s]

{'loss': Array(0.20561425, dtype=float32), 'loss_cross_entropy': Array(0.19074555, dtype=float32)}


  4%|▎         | 35159/1000000 [1:36:50<39:05:02,  6.86it/s]

{'loss': Array(0.20703118, dtype=float32), 'loss_cross_entropy': Array(0.1919153, dtype=float32)}


  4%|▎         | 35169/1000000 [1:36:51<27:23:42,  9.78it/s]

{'loss': Array(0.21228217, dtype=float32), 'loss_cross_entropy': Array(0.19784556, dtype=float32)}


  4%|▎         | 35179/1000000 [1:36:52<25:43:48, 10.42it/s]

{'loss': Array(0.20584038, dtype=float32), 'loss_cross_entropy': Array(0.19015153, dtype=float32)}


  4%|▎         | 35189/1000000 [1:36:54<31:48:21,  8.43it/s]

{'loss': Array(0.18270491, dtype=float32), 'loss_cross_entropy': Array(0.16834544, dtype=float32)}


  4%|▎         | 35199/1000000 [1:36:55<25:33:10, 10.49it/s]

{'loss': Array(0.1784965, dtype=float32), 'loss_cross_entropy': Array(0.16554847, dtype=float32)}


  4%|▎         | 35209/1000000 [1:36:57<34:54:26,  7.68it/s]

{'loss': Array(0.17628829, dtype=float32), 'loss_cross_entropy': Array(0.1631475, dtype=float32)}


  4%|▎         | 35219/1000000 [1:36:58<27:05:18,  9.89it/s]

{'loss': Array(0.19427846, dtype=float32), 'loss_cross_entropy': Array(0.17898235, dtype=float32)}


  4%|▎         | 35229/1000000 [1:37:00<36:21:18,  7.37it/s]

{'loss': Array(0.18603031, dtype=float32), 'loss_cross_entropy': Array(0.17249517, dtype=float32)}


  4%|▎         | 35239/1000000 [1:37:02<27:32:03,  9.73it/s]

{'loss': Array(0.1747097, dtype=float32), 'loss_cross_entropy': Array(0.16106144, dtype=float32)}


  4%|▎         | 35249/1000000 [1:37:03<26:07:41, 10.26it/s]

{'loss': Array(0.19692841, dtype=float32), 'loss_cross_entropy': Array(0.18123217, dtype=float32)}


  4%|▎         | 35259/1000000 [1:37:05<30:16:48,  8.85it/s]

{'loss': Array(0.1895727, dtype=float32), 'loss_cross_entropy': Array(0.176032, dtype=float32)}


  4%|▎         | 35269/1000000 [1:37:06<26:09:52, 10.24it/s]

{'loss': Array(0.21078837, dtype=float32), 'loss_cross_entropy': Array(0.19468151, dtype=float32)}


  4%|▎         | 35279/1000000 [1:37:08<33:14:03,  8.06it/s]

{'loss': Array(0.20749752, dtype=float32), 'loss_cross_entropy': Array(0.19189647, dtype=float32)}


  4%|▎         | 35289/1000000 [1:37:09<26:28:58, 10.12it/s]

{'loss': Array(0.17585775, dtype=float32), 'loss_cross_entropy': Array(0.16137111, dtype=float32)}


  4%|▎         | 35299/1000000 [1:37:11<39:30:39,  6.78it/s]

{'loss': Array(0.19221629, dtype=float32), 'loss_cross_entropy': Array(0.17756872, dtype=float32)}


  4%|▎         | 35309/1000000 [1:37:12<27:23:06,  9.79it/s]

{'loss': Array(0.1835629, dtype=float32), 'loss_cross_entropy': Array(0.16950245, dtype=float32)}


  4%|▎         | 35319/1000000 [1:37:14<39:44:36,  6.74it/s]

{'loss': Array(0.18599324, dtype=float32), 'loss_cross_entropy': Array(0.17228429, dtype=float32)}


  4%|▎         | 35329/1000000 [1:37:15<27:10:00,  9.86it/s]

{'loss': Array(0.19108087, dtype=float32), 'loss_cross_entropy': Array(0.17738405, dtype=float32)}


  4%|▎         | 35339/1000000 [1:37:17<24:58:02, 10.73it/s]

{'loss': Array(0.18667535, dtype=float32), 'loss_cross_entropy': Array(0.17214154, dtype=float32)}


  4%|▎         | 35349/1000000 [1:37:18<34:48:44,  7.70it/s]

{'loss': Array(0.20563637, dtype=float32), 'loss_cross_entropy': Array(0.18967526, dtype=float32)}


  4%|▎         | 35359/1000000 [1:37:20<26:12:57, 10.22it/s]

{'loss': Array(0.20490408, dtype=float32), 'loss_cross_entropy': Array(0.19079152, dtype=float32)}


  4%|▎         | 35369/1000000 [1:37:21<39:26:11,  6.79it/s]

{'loss': Array(0.18041794, dtype=float32), 'loss_cross_entropy': Array(0.16653283, dtype=float32)}


  4%|▎         | 35379/1000000 [1:37:23<27:02:37,  9.91it/s]

{'loss': Array(0.1812051, dtype=float32), 'loss_cross_entropy': Array(0.16712594, dtype=float32)}


  4%|▎         | 35389/1000000 [1:37:24<24:59:54, 10.72it/s]

{'loss': Array(0.21463735, dtype=float32), 'loss_cross_entropy': Array(0.19961064, dtype=float32)}


  4%|▎         | 35399/1000000 [1:37:26<28:22:19,  9.44it/s]

{'loss': Array(0.2035877, dtype=float32), 'loss_cross_entropy': Array(0.18865062, dtype=float32)}


  4%|▎         | 35409/1000000 [1:37:27<25:28:59, 10.51it/s]

{'loss': Array(0.19161643, dtype=float32), 'loss_cross_entropy': Array(0.1767233, dtype=float32)}


  4%|▎         | 35419/1000000 [1:37:29<28:25:38,  9.43it/s]

{'loss': Array(0.1959371, dtype=float32), 'loss_cross_entropy': Array(0.18069525, dtype=float32)}


  4%|▎         | 35429/1000000 [1:37:30<25:41:46, 10.43it/s]

{'loss': Array(0.18744056, dtype=float32), 'loss_cross_entropy': Array(0.17234762, dtype=float32)}


  4%|▎         | 35439/1000000 [1:37:32<38:53:56,  6.89it/s]

{'loss': Array(0.20801131, dtype=float32), 'loss_cross_entropy': Array(0.19291781, dtype=float32)}


  4%|▎         | 35449/1000000 [1:37:33<27:27:45,  9.76it/s]

{'loss': Array(0.20305704, dtype=float32), 'loss_cross_entropy': Array(0.18711041, dtype=float32)}


  4%|▎         | 35459/1000000 [1:37:35<25:37:17, 10.46it/s]

{'loss': Array(0.17965129, dtype=float32), 'loss_cross_entropy': Array(0.16592613, dtype=float32)}


  4%|▎         | 35469/1000000 [1:37:37<33:17:09,  8.05it/s]

{'loss': Array(0.18761401, dtype=float32), 'loss_cross_entropy': Array(0.1741933, dtype=float32)}


  4%|▎         | 35479/1000000 [1:37:38<25:48:42, 10.38it/s]

{'loss': Array(0.20024009, dtype=float32), 'loss_cross_entropy': Array(0.18499117, dtype=float32)}


  4%|▎         | 35489/1000000 [1:37:40<39:18:00,  6.82it/s]

{'loss': Array(0.21214895, dtype=float32), 'loss_cross_entropy': Array(0.19677296, dtype=float32)}


  4%|▎         | 35499/1000000 [1:37:41<27:28:18,  9.75it/s]

{'loss': Array(0.19929634, dtype=float32), 'loss_cross_entropy': Array(0.18434997, dtype=float32)}


  4%|▎         | 35509/1000000 [1:37:49<87:57:03,  3.05it/s] 

{'loss': Array(0.19522355, dtype=float32), 'loss_cross_entropy': Array(0.18017605, dtype=float32)}


  4%|▎         | 35519/1000000 [1:37:51<40:28:46,  6.62it/s]

{'loss': Array(0.1995307, dtype=float32), 'loss_cross_entropy': Array(0.18382816, dtype=float32)}


  4%|▎         | 35529/1000000 [1:37:52<27:21:41,  9.79it/s]

{'loss': Array(0.2013202, dtype=float32), 'loss_cross_entropy': Array(0.18619, dtype=float32)}


  4%|▎         | 35539/1000000 [1:37:54<32:46:50,  8.17it/s]

{'loss': Array(0.19517715, dtype=float32), 'loss_cross_entropy': Array(0.17983852, dtype=float32)}


  4%|▎         | 35549/1000000 [1:37:55<26:11:18, 10.23it/s]

{'loss': Array(0.1966782, dtype=float32), 'loss_cross_entropy': Array(0.18233262, dtype=float32)}


  4%|▎         | 35559/1000000 [1:37:57<39:58:02,  6.70it/s]

{'loss': Array(0.1971315, dtype=float32), 'loss_cross_entropy': Array(0.18282863, dtype=float32)}


  4%|▎         | 35569/1000000 [1:37:58<27:21:03,  9.79it/s]

{'loss': Array(0.17416474, dtype=float32), 'loss_cross_entropy': Array(0.16169833, dtype=float32)}


  4%|▎         | 35579/1000000 [1:38:00<25:58:42, 10.31it/s]

{'loss': Array(0.19878285, dtype=float32), 'loss_cross_entropy': Array(0.1846572, dtype=float32)}


  4%|▎         | 35589/1000000 [1:38:01<29:17:10,  9.15it/s]

{'loss': Array(0.21334644, dtype=float32), 'loss_cross_entropy': Array(0.19791164, dtype=float32)}


  4%|▎         | 35599/1000000 [1:38:03<26:24:25, 10.14it/s]

{'loss': Array(0.1887356, dtype=float32), 'loss_cross_entropy': Array(0.17396037, dtype=float32)}


  4%|▎         | 35609/1000000 [1:38:04<39:59:31,  6.70it/s]

{'loss': Array(0.19206305, dtype=float32), 'loss_cross_entropy': Array(0.177881, dtype=float32)}


  4%|▎         | 35619/1000000 [1:38:06<27:03:53,  9.90it/s]

{'loss': Array(0.18640006, dtype=float32), 'loss_cross_entropy': Array(0.17198929, dtype=float32)}


  4%|▎         | 35629/1000000 [1:38:07<24:51:20, 10.78it/s]

{'loss': Array(0.18628933, dtype=float32), 'loss_cross_entropy': Array(0.17266957, dtype=float32)}


  4%|▎         | 35639/1000000 [1:38:09<29:38:00,  9.04it/s]

{'loss': Array(0.20305388, dtype=float32), 'loss_cross_entropy': Array(0.18841486, dtype=float32)}


  4%|▎         | 35649/1000000 [1:38:10<25:59:36, 10.31it/s]

{'loss': Array(0.18922947, dtype=float32), 'loss_cross_entropy': Array(0.1760634, dtype=float32)}


  4%|▎         | 35659/1000000 [1:38:12<34:32:13,  7.76it/s]

{'loss': Array(0.192996, dtype=float32), 'loss_cross_entropy': Array(0.17878176, dtype=float32)}


  4%|▎         | 35669/1000000 [1:38:13<26:08:40, 10.25it/s]

{'loss': Array(0.2010438, dtype=float32), 'loss_cross_entropy': Array(0.18652026, dtype=float32)}


  4%|▎         | 35679/1000000 [1:38:15<39:13:57,  6.83it/s]

{'loss': Array(0.20513311, dtype=float32), 'loss_cross_entropy': Array(0.19134764, dtype=float32)}


  4%|▎         | 35689/1000000 [1:38:16<26:53:28,  9.96it/s]

{'loss': Array(0.18964331, dtype=float32), 'loss_cross_entropy': Array(0.17559691, dtype=float32)}


  4%|▎         | 35699/1000000 [1:38:18<25:23:34, 10.55it/s]

{'loss': Array(0.19843027, dtype=float32), 'loss_cross_entropy': Array(0.18355778, dtype=float32)}


  4%|▎         | 35709/1000000 [1:38:19<31:56:38,  8.39it/s]

{'loss': Array(0.19728991, dtype=float32), 'loss_cross_entropy': Array(0.18260983, dtype=float32)}


  4%|▎         | 35719/1000000 [1:38:21<27:01:39,  9.91it/s]

{'loss': Array(0.21828353, dtype=float32), 'loss_cross_entropy': Array(0.20288329, dtype=float32)}


  4%|▎         | 35729/1000000 [1:38:22<35:26:30,  7.56it/s]

{'loss': Array(0.18888013, dtype=float32), 'loss_cross_entropy': Array(0.17421179, dtype=float32)}


  4%|▎         | 35739/1000000 [1:38:24<26:38:55, 10.05it/s]

{'loss': Array(0.22630969, dtype=float32), 'loss_cross_entropy': Array(0.20966314, dtype=float32)}


  4%|▎         | 35749/1000000 [1:38:25<25:39:47, 10.44it/s]

{'loss': Array(0.17933641, dtype=float32), 'loss_cross_entropy': Array(0.1664635, dtype=float32)}


  4%|▎         | 35759/1000000 [1:38:27<28:54:00,  9.27it/s]

{'loss': Array(0.19005814, dtype=float32), 'loss_cross_entropy': Array(0.1754269, dtype=float32)}


  4%|▎         | 35769/1000000 [1:38:28<26:27:00, 10.13it/s]

{'loss': Array(0.196871, dtype=float32), 'loss_cross_entropy': Array(0.18206146, dtype=float32)}


  4%|▎         | 35779/1000000 [1:38:30<39:33:02,  6.77it/s]

{'loss': Array(0.20325287, dtype=float32), 'loss_cross_entropy': Array(0.18863937, dtype=float32)}


  4%|▎         | 35789/1000000 [1:38:32<27:35:50,  9.71it/s]

{'loss': Array(0.18186122, dtype=float32), 'loss_cross_entropy': Array(0.16818039, dtype=float32)}


  4%|▎         | 35799/1000000 [1:38:33<25:05:12, 10.68it/s]

{'loss': Array(0.20298253, dtype=float32), 'loss_cross_entropy': Array(0.18847089, dtype=float32)}


  4%|▎         | 35809/1000000 [1:38:35<31:26:45,  8.52it/s]

{'loss': Array(0.18486242, dtype=float32), 'loss_cross_entropy': Array(0.17104763, dtype=float32)}


  4%|▎         | 35819/1000000 [1:38:36<25:49:54, 10.37it/s]

{'loss': Array(0.1780939, dtype=float32), 'loss_cross_entropy': Array(0.1646219, dtype=float32)}


  4%|▎         | 35829/1000000 [1:38:38<34:55:44,  7.67it/s]

{'loss': Array(0.20104504, dtype=float32), 'loss_cross_entropy': Array(0.18676318, dtype=float32)}


  4%|▎         | 35839/1000000 [1:38:39<26:19:54, 10.17it/s]

{'loss': Array(0.20900488, dtype=float32), 'loss_cross_entropy': Array(0.19325195, dtype=float32)}


  4%|▎         | 35849/1000000 [1:38:41<35:15:06,  7.60it/s]

{'loss': Array(0.20770334, dtype=float32), 'loss_cross_entropy': Array(0.19311157, dtype=float32)}


  4%|▎         | 35859/1000000 [1:38:42<26:44:15, 10.02it/s]

{'loss': Array(0.18712871, dtype=float32), 'loss_cross_entropy': Array(0.17327379, dtype=float32)}


  4%|▎         | 35869/1000000 [1:38:44<25:42:20, 10.42it/s]

{'loss': Array(0.1777239, dtype=float32), 'loss_cross_entropy': Array(0.16420004, dtype=float32)}


  4%|▎         | 35879/1000000 [1:38:45<29:51:32,  8.97it/s]

{'loss': Array(0.20790516, dtype=float32), 'loss_cross_entropy': Array(0.19333847, dtype=float32)}


  4%|▎         | 35889/1000000 [1:38:47<27:21:44,  9.79it/s]

{'loss': Array(0.19230597, dtype=float32), 'loss_cross_entropy': Array(0.17814396, dtype=float32)}


  4%|▎         | 35899/1000000 [1:38:48<32:19:55,  8.28it/s]

{'loss': Array(0.19787838, dtype=float32), 'loss_cross_entropy': Array(0.18354587, dtype=float32)}


  4%|▎         | 35909/1000000 [1:38:50<26:13:36, 10.21it/s]

{'loss': Array(0.2145311, dtype=float32), 'loss_cross_entropy': Array(0.19865255, dtype=float32)}


  4%|▎         | 35919/1000000 [1:38:51<39:40:43,  6.75it/s]

{'loss': Array(0.18636163, dtype=float32), 'loss_cross_entropy': Array(0.17176779, dtype=float32)}


  4%|▎         | 35929/1000000 [1:38:53<27:15:02,  9.83it/s]

{'loss': Array(0.19237898, dtype=float32), 'loss_cross_entropy': Array(0.17763022, dtype=float32)}


  4%|▎         | 35939/1000000 [1:38:55<39:57:04,  6.70it/s]

{'loss': Array(0.18772498, dtype=float32), 'loss_cross_entropy': Array(0.1735098, dtype=float32)}


  4%|▎         | 35949/1000000 [1:38:56<27:45:38,  9.65it/s]

{'loss': Array(0.20119047, dtype=float32), 'loss_cross_entropy': Array(0.18601806, dtype=float32)}


  4%|▎         | 35959/1000000 [1:38:57<25:05:15, 10.67it/s]

{'loss': Array(0.16576652, dtype=float32), 'loss_cross_entropy': Array(0.15197203, dtype=float32)}


  4%|▎         | 35969/1000000 [1:38:59<36:14:05,  7.39it/s]

{'loss': Array(0.18543963, dtype=float32), 'loss_cross_entropy': Array(0.17307447, dtype=float32)}


  4%|▎         | 35979/1000000 [1:39:00<28:18:46,  9.46it/s]

{'loss': Array(0.19589853, dtype=float32), 'loss_cross_entropy': Array(0.18106718, dtype=float32)}


  4%|▎         | 35989/1000000 [1:39:02<41:27:35,  6.46it/s]

{'loss': Array(0.19523863, dtype=float32), 'loss_cross_entropy': Array(0.1809213, dtype=float32)}


  4%|▎         | 35999/1000000 [1:39:04<27:37:34,  9.69it/s]

{'loss': Array(0.21376717, dtype=float32), 'loss_cross_entropy': Array(0.19869213, dtype=float32)}


  4%|▎         | 36009/1000000 [1:39:12<91:19:35,  2.93it/s] 

{'loss': Array(0.18112794, dtype=float32), 'loss_cross_entropy': Array(0.16795118, dtype=float32)}


  4%|▎         | 36019/1000000 [1:39:14<39:53:34,  6.71it/s] 

{'loss': Array(0.17585585, dtype=float32), 'loss_cross_entropy': Array(0.16221154, dtype=float32)}


  4%|▎         | 36029/1000000 [1:39:15<28:10:17,  9.51it/s]

{'loss': Array(0.18098474, dtype=float32), 'loss_cross_entropy': Array(0.1681308, dtype=float32)}


  4%|▎         | 36039/1000000 [1:39:17<29:37:16,  9.04it/s]

{'loss': Array(0.20165487, dtype=float32), 'loss_cross_entropy': Array(0.18692924, dtype=float32)}


  4%|▎         | 36049/1000000 [1:39:18<25:37:58, 10.45it/s]

{'loss': Array(0.19084102, dtype=float32), 'loss_cross_entropy': Array(0.1777482, dtype=float32)}


  4%|▎         | 36059/1000000 [1:39:20<39:40:01,  6.75it/s]

{'loss': Array(0.18816963, dtype=float32), 'loss_cross_entropy': Array(0.1740876, dtype=float32)}


  4%|▎         | 36069/1000000 [1:39:21<27:22:31,  9.78it/s]

{'loss': Array(0.20683733, dtype=float32), 'loss_cross_entropy': Array(0.19175936, dtype=float32)}


  4%|▎         | 36079/1000000 [1:39:23<25:17:44, 10.59it/s]

{'loss': Array(0.20123608, dtype=float32), 'loss_cross_entropy': Array(0.18648612, dtype=float32)}


  4%|▎         | 36089/1000000 [1:39:24<32:11:25,  8.32it/s]

{'loss': Array(0.21204372, dtype=float32), 'loss_cross_entropy': Array(0.19608389, dtype=float32)}


  4%|▎         | 36099/1000000 [1:39:26<26:03:10, 10.28it/s]

{'loss': Array(0.18991876, dtype=float32), 'loss_cross_entropy': Array(0.17551054, dtype=float32)}


  4%|▎         | 36109/1000000 [1:39:27<39:22:14,  6.80it/s]

{'loss': Array(0.21589525, dtype=float32), 'loss_cross_entropy': Array(0.2002356, dtype=float32)}


  4%|▎         | 36119/1000000 [1:39:29<27:00:20,  9.91it/s]

{'loss': Array(0.19020294, dtype=float32), 'loss_cross_entropy': Array(0.17629993, dtype=float32)}


  4%|▎         | 36129/1000000 [1:39:30<24:48:41, 10.79it/s]

{'loss': Array(0.18541442, dtype=float32), 'loss_cross_entropy': Array(0.1719843, dtype=float32)}


  4%|▎         | 36139/1000000 [1:39:32<29:57:08,  8.94it/s]

{'loss': Array(0.20003211, dtype=float32), 'loss_cross_entropy': Array(0.18522252, dtype=float32)}


  4%|▎         | 36149/1000000 [1:39:33<25:10:40, 10.63it/s]

{'loss': Array(0.18730165, dtype=float32), 'loss_cross_entropy': Array(0.17360933, dtype=float32)}


  4%|▎         | 36159/1000000 [1:39:35<34:44:40,  7.71it/s]

{'loss': Array(0.1884238, dtype=float32), 'loss_cross_entropy': Array(0.17413686, dtype=float32)}


  4%|▎         | 36169/1000000 [1:39:36<26:09:04, 10.24it/s]

{'loss': Array(0.18490694, dtype=float32), 'loss_cross_entropy': Array(0.17139487, dtype=float32)}


  4%|▎         | 36179/1000000 [1:39:38<25:57:12, 10.32it/s]

{'loss': Array(0.15854032, dtype=float32), 'loss_cross_entropy': Array(0.14591621, dtype=float32)}


  4%|▎         | 36189/1000000 [1:39:39<31:46:29,  8.43it/s]

{'loss': Array(0.2135953, dtype=float32), 'loss_cross_entropy': Array(0.19855492, dtype=float32)}


  4%|▎         | 36199/1000000 [1:39:41<26:23:04, 10.15it/s]

{'loss': Array(0.19762202, dtype=float32), 'loss_cross_entropy': Array(0.18292339, dtype=float32)}


  4%|▎         | 36209/1000000 [1:39:42<25:35:31, 10.46it/s]

{'loss': Array(0.18033598, dtype=float32), 'loss_cross_entropy': Array(0.16676505, dtype=float32)}


  4%|▎         | 36219/1000000 [1:39:44<28:05:03,  9.53it/s]

{'loss': Array(0.18693407, dtype=float32), 'loss_cross_entropy': Array(0.17239647, dtype=float32)}


  4%|▎         | 36229/1000000 [1:39:45<24:59:16, 10.71it/s]

{'loss': Array(0.18821494, dtype=float32), 'loss_cross_entropy': Array(0.17363095, dtype=float32)}


  4%|▎         | 36239/1000000 [1:39:47<31:49:13,  8.41it/s]

{'loss': Array(0.2011477, dtype=float32), 'loss_cross_entropy': Array(0.1866509, dtype=float32)}


  4%|▎         | 36249/1000000 [1:39:48<25:52:57, 10.34it/s]

{'loss': Array(0.19266652, dtype=float32), 'loss_cross_entropy': Array(0.17822349, dtype=float32)}


  4%|▎         | 36259/1000000 [1:39:50<39:31:35,  6.77it/s]

{'loss': Array(0.20174251, dtype=float32), 'loss_cross_entropy': Array(0.1866916, dtype=float32)}


  4%|▎         | 36269/1000000 [1:39:51<27:03:27,  9.89it/s]

{'loss': Array(0.18821347, dtype=float32), 'loss_cross_entropy': Array(0.17435895, dtype=float32)}


  4%|▎         | 36279/1000000 [1:39:53<39:23:15,  6.80it/s]

{'loss': Array(0.2102202, dtype=float32), 'loss_cross_entropy': Array(0.19518, dtype=float32)}


  4%|▎         | 36289/1000000 [1:39:54<27:07:04,  9.87it/s]

{'loss': Array(0.19171421, dtype=float32), 'loss_cross_entropy': Array(0.17758286, dtype=float32)}


  4%|▎         | 36299/1000000 [1:39:56<24:53:37, 10.75it/s]

{'loss': Array(0.18988694, dtype=float32), 'loss_cross_entropy': Array(0.17563562, dtype=float32)}


  4%|▎         | 36309/1000000 [1:39:57<35:05:13,  7.63it/s]

{'loss': Array(0.18632948, dtype=float32), 'loss_cross_entropy': Array(0.17247455, dtype=float32)}


  4%|▎         | 36319/1000000 [1:39:59<28:26:41,  9.41it/s]

{'loss': Array(0.20390134, dtype=float32), 'loss_cross_entropy': Array(0.18942617, dtype=float32)}


  4%|▎         | 36329/1000000 [1:40:01<41:59:00,  6.38it/s]

{'loss': Array(0.18939488, dtype=float32), 'loss_cross_entropy': Array(0.17392156, dtype=float32)}


  4%|▎         | 36339/1000000 [1:40:02<28:32:42,  9.38it/s]

{'loss': Array(0.20780896, dtype=float32), 'loss_cross_entropy': Array(0.19228773, dtype=float32)}


  4%|▎         | 36349/1000000 [1:40:03<25:30:53, 10.49it/s]

{'loss': Array(0.19655296, dtype=float32), 'loss_cross_entropy': Array(0.18211828, dtype=float32)}


  4%|▎         | 36359/1000000 [1:40:05<28:12:09,  9.49it/s]

{'loss': Array(0.18768626, dtype=float32), 'loss_cross_entropy': Array(0.17392263, dtype=float32)}


  4%|▎         | 36369/1000000 [1:40:06<25:15:15, 10.60it/s]

{'loss': Array(0.18504193, dtype=float32), 'loss_cross_entropy': Array(0.17170799, dtype=float32)}


  4%|▎         | 36379/1000000 [1:40:08<28:22:03,  9.44it/s]

{'loss': Array(0.20718768, dtype=float32), 'loss_cross_entropy': Array(0.19173852, dtype=float32)}


  4%|▎         | 36389/1000000 [1:40:10<25:32:48, 10.48it/s]

{'loss': Array(0.19664, dtype=float32), 'loss_cross_entropy': Array(0.18263909, dtype=float32)}


  4%|▎         | 36399/1000000 [1:40:11<39:28:30,  6.78it/s]

{'loss': Array(0.18047644, dtype=float32), 'loss_cross_entropy': Array(0.16681473, dtype=float32)}


  4%|▎         | 36409/1000000 [1:40:13<27:31:08,  9.73it/s]

{'loss': Array(0.18781435, dtype=float32), 'loss_cross_entropy': Array(0.17311555, dtype=float32)}


  4%|▎         | 36419/1000000 [1:40:14<24:50:49, 10.77it/s]

{'loss': Array(0.1804561, dtype=float32), 'loss_cross_entropy': Array(0.16717672, dtype=float32)}


  4%|▎         | 36429/1000000 [1:40:16<31:37:50,  8.46it/s]

{'loss': Array(0.16864295, dtype=float32), 'loss_cross_entropy': Array(0.1556286, dtype=float32)}


  4%|▎         | 36439/1000000 [1:40:17<25:32:24, 10.48it/s]

{'loss': Array(0.20514889, dtype=float32), 'loss_cross_entropy': Array(0.18974847, dtype=float32)}


  4%|▎         | 36449/1000000 [1:40:19<38:57:45,  6.87it/s]

{'loss': Array(0.18788324, dtype=float32), 'loss_cross_entropy': Array(0.17386362, dtype=float32)}


  4%|▎         | 36459/1000000 [1:40:20<26:42:21, 10.02it/s]

{'loss': Array(0.21284576, dtype=float32), 'loss_cross_entropy': Array(0.19778693, dtype=float32)}


  4%|▎         | 36469/1000000 [1:40:21<25:03:30, 10.68it/s]

{'loss': Array(0.19804724, dtype=float32), 'loss_cross_entropy': Array(0.18337636, dtype=float32)}


  4%|▎         | 36479/1000000 [1:40:23<29:34:15,  9.05it/s]

{'loss': Array(0.18893377, dtype=float32), 'loss_cross_entropy': Array(0.17488448, dtype=float32)}


  4%|▎         | 36489/1000000 [1:40:24<26:00:56, 10.29it/s]

{'loss': Array(0.18299375, dtype=float32), 'loss_cross_entropy': Array(0.16953027, dtype=float32)}


  4%|▎         | 36499/1000000 [1:40:26<35:26:31,  7.55it/s]

{'loss': Array(0.20199247, dtype=float32), 'loss_cross_entropy': Array(0.18741043, dtype=float32)}


  4%|▎         | 36509/1000000 [1:40:34<89:52:17,  2.98it/s] 

{'loss': Array(0.20898655, dtype=float32), 'loss_cross_entropy': Array(0.1941351, dtype=float32)}


  4%|▎         | 36519/1000000 [1:40:36<46:21:07,  5.77it/s]

{'loss': Array(0.1830558, dtype=float32), 'loss_cross_entropy': Array(0.16922206, dtype=float32)}


  4%|▎         | 36529/1000000 [1:40:37<28:13:22,  9.48it/s]

{'loss': Array(0.17141719, dtype=float32), 'loss_cross_entropy': Array(0.15813619, dtype=float32)}


  4%|▎         | 36539/1000000 [1:40:39<25:20:42, 10.56it/s]

{'loss': Array(0.19184162, dtype=float32), 'loss_cross_entropy': Array(0.17784786, dtype=float32)}


  4%|▎         | 36549/1000000 [1:40:40<31:53:02,  8.39it/s]

{'loss': Array(0.19929563, dtype=float32), 'loss_cross_entropy': Array(0.18449165, dtype=float32)}


  4%|▎         | 36559/1000000 [1:40:42<26:10:43, 10.22it/s]

{'loss': Array(0.1828369, dtype=float32), 'loss_cross_entropy': Array(0.16888778, dtype=float32)}


  4%|▎         | 36569/1000000 [1:40:43<24:54:47, 10.74it/s]

{'loss': Array(0.20124009, dtype=float32), 'loss_cross_entropy': Array(0.18643185, dtype=float32)}


  4%|▎         | 36579/1000000 [1:40:45<27:50:40,  9.61it/s]

{'loss': Array(0.19163144, dtype=float32), 'loss_cross_entropy': Array(0.17673087, dtype=float32)}


  4%|▎         | 36589/1000000 [1:40:46<25:14:53, 10.60it/s]

{'loss': Array(0.20629862, dtype=float32), 'loss_cross_entropy': Array(0.19110487, dtype=float32)}


  4%|▎         | 36599/1000000 [1:40:48<31:44:58,  8.43it/s]

{'loss': Array(0.18438953, dtype=float32), 'loss_cross_entropy': Array(0.17046688, dtype=float32)}


  4%|▎         | 36609/1000000 [1:40:49<25:56:52, 10.31it/s]

{'loss': Array(0.18697153, dtype=float32), 'loss_cross_entropy': Array(0.17404756, dtype=float32)}


  4%|▎         | 36619/1000000 [1:40:51<35:01:40,  7.64it/s]

{'loss': Array(0.21919857, dtype=float32), 'loss_cross_entropy': Array(0.20357108, dtype=float32)}


  4%|▎         | 36629/1000000 [1:40:52<26:18:19, 10.17it/s]

{'loss': Array(0.18853071, dtype=float32), 'loss_cross_entropy': Array(0.17508957, dtype=float32)}


  4%|▎         | 36639/1000000 [1:40:54<25:26:31, 10.52it/s]

{'loss': Array(0.19600384, dtype=float32), 'loss_cross_entropy': Array(0.18173875, dtype=float32)}


  4%|▎         | 36649/1000000 [1:40:55<31:57:44,  8.37it/s]

{'loss': Array(0.19854118, dtype=float32), 'loss_cross_entropy': Array(0.18362653, dtype=float32)}


  4%|▎         | 36659/1000000 [1:40:57<25:52:43, 10.34it/s]

{'loss': Array(0.18048336, dtype=float32), 'loss_cross_entropy': Array(0.16695231, dtype=float32)}


  4%|▎         | 36669/1000000 [1:40:58<24:41:36, 10.84it/s]

{'loss': Array(0.19265631, dtype=float32), 'loss_cross_entropy': Array(0.17923528, dtype=float32)}


  4%|▎         | 36679/1000000 [1:41:00<29:25:22,  9.09it/s]

{'loss': Array(0.18352114, dtype=float32), 'loss_cross_entropy': Array(0.17024182, dtype=float32)}


  4%|▎         | 36689/1000000 [1:41:01<26:24:55, 10.13it/s]

{'loss': Array(0.18725117, dtype=float32), 'loss_cross_entropy': Array(0.17220841, dtype=float32)}


  4%|▎         | 36699/1000000 [1:41:03<33:25:53,  8.00it/s]

{'loss': Array(0.20311795, dtype=float32), 'loss_cross_entropy': Array(0.18718123, dtype=float32)}


  4%|▎         | 36709/1000000 [1:41:04<26:31:57, 10.09it/s]

{'loss': Array(0.19971858, dtype=float32), 'loss_cross_entropy': Array(0.18554424, dtype=float32)}


  4%|▎         | 36719/1000000 [1:41:06<40:02:04,  6.68it/s]

{'loss': Array(0.19268535, dtype=float32), 'loss_cross_entropy': Array(0.17868544, dtype=float32)}


  4%|▎         | 36729/1000000 [1:41:07<27:43:21,  9.65it/s]

{'loss': Array(0.19807471, dtype=float32), 'loss_cross_entropy': Array(0.1831756, dtype=float32)}


  4%|▎         | 36739/1000000 [1:41:09<25:39:04, 10.43it/s]

{'loss': Array(0.20416856, dtype=float32), 'loss_cross_entropy': Array(0.18935168, dtype=float32)}


  4%|▎         | 36749/1000000 [1:41:11<35:09:20,  7.61it/s]

{'loss': Array(0.17846239, dtype=float32), 'loss_cross_entropy': Array(0.16513726, dtype=float32)}


  4%|▎         | 36759/1000000 [1:41:12<26:20:20, 10.16it/s]

{'loss': Array(0.18748279, dtype=float32), 'loss_cross_entropy': Array(0.17392011, dtype=float32)}


  4%|▎         | 36769/1000000 [1:41:13<25:06:59, 10.65it/s]

{'loss': Array(0.1904621, dtype=float32), 'loss_cross_entropy': Array(0.17717078, dtype=float32)}


  4%|▎         | 36779/1000000 [1:41:15<29:49:14,  8.97it/s]

{'loss': Array(0.19114538, dtype=float32), 'loss_cross_entropy': Array(0.17647986, dtype=float32)}


  4%|▎         | 36789/1000000 [1:41:16<25:28:31, 10.50it/s]

{'loss': Array(0.17034881, dtype=float32), 'loss_cross_entropy': Array(0.15733325, dtype=float32)}


  4%|▎         | 36799/1000000 [1:41:18<31:25:33,  8.51it/s]

{'loss': Array(0.21980746, dtype=float32), 'loss_cross_entropy': Array(0.20420392, dtype=float32)}


  4%|▎         | 36809/1000000 [1:41:19<26:08:33, 10.23it/s]

{'loss': Array(0.1988569, dtype=float32), 'loss_cross_entropy': Array(0.18390234, dtype=float32)}


  4%|▎         | 36819/1000000 [1:41:21<24:32:55, 10.90it/s]

{'loss': Array(0.175372, dtype=float32), 'loss_cross_entropy': Array(0.1624071, dtype=float32)}


  4%|▎         | 36829/1000000 [1:41:22<28:35:40,  9.36it/s]

{'loss': Array(0.2156072, dtype=float32), 'loss_cross_entropy': Array(0.20027173, dtype=float32)}


  4%|▎         | 36839/1000000 [1:41:24<25:09:47, 10.63it/s]

{'loss': Array(0.17655003, dtype=float32), 'loss_cross_entropy': Array(0.16380554, dtype=float32)}


  4%|▎         | 36849/1000000 [1:41:25<34:50:25,  7.68it/s]

{'loss': Array(0.18377799, dtype=float32), 'loss_cross_entropy': Array(0.16934815, dtype=float32)}


  4%|▎         | 36859/1000000 [1:41:27<26:22:37, 10.14it/s]

{'loss': Array(0.195663, dtype=float32), 'loss_cross_entropy': Array(0.1818457, dtype=float32)}


  4%|▎         | 36869/1000000 [1:41:28<25:20:13, 10.56it/s]

{'loss': Array(0.21274607, dtype=float32), 'loss_cross_entropy': Array(0.19731148, dtype=float32)}


  4%|▎         | 36879/1000000 [1:41:30<27:51:07,  9.61it/s]

{'loss': Array(0.20638238, dtype=float32), 'loss_cross_entropy': Array(0.19133203, dtype=float32)}


  4%|▎         | 36889/1000000 [1:41:31<25:37:32, 10.44it/s]

{'loss': Array(0.20853801, dtype=float32), 'loss_cross_entropy': Array(0.19448477, dtype=float32)}


  4%|▎         | 36899/1000000 [1:41:33<29:54:21,  8.95it/s]

{'loss': Array(0.21061634, dtype=float32), 'loss_cross_entropy': Array(0.19542925, dtype=float32)}


  4%|▎         | 36909/1000000 [1:41:34<26:38:11, 10.04it/s]

{'loss': Array(0.201358, dtype=float32), 'loss_cross_entropy': Array(0.18643618, dtype=float32)}


  4%|▎         | 36919/1000000 [1:41:36<39:11:28,  6.83it/s]

{'loss': Array(0.19172546, dtype=float32), 'loss_cross_entropy': Array(0.1768531, dtype=float32)}


  4%|▎         | 36929/1000000 [1:41:37<27:10:34,  9.84it/s]

{'loss': Array(0.17485762, dtype=float32), 'loss_cross_entropy': Array(0.16154216, dtype=float32)}


  4%|▎         | 36939/1000000 [1:41:39<25:25:33, 10.52it/s]

{'loss': Array(0.18997224, dtype=float32), 'loss_cross_entropy': Array(0.17645101, dtype=float32)}


  4%|▎         | 36949/1000000 [1:41:41<32:10:26,  8.31it/s]

{'loss': Array(0.21392524, dtype=float32), 'loss_cross_entropy': Array(0.19835575, dtype=float32)}


  4%|▎         | 36959/1000000 [1:41:42<25:34:27, 10.46it/s]

{'loss': Array(0.19698966, dtype=float32), 'loss_cross_entropy': Array(0.18303491, dtype=float32)}


  4%|▎         | 36969/1000000 [1:41:44<34:42:47,  7.71it/s]

{'loss': Array(0.19378759, dtype=float32), 'loss_cross_entropy': Array(0.17872559, dtype=float32)}


  4%|▎         | 36979/1000000 [1:41:45<26:09:17, 10.23it/s]

{'loss': Array(0.18958405, dtype=float32), 'loss_cross_entropy': Array(0.17513847, dtype=float32)}


  4%|▎         | 36989/1000000 [1:41:47<35:18:23,  7.58it/s]

{'loss': Array(0.20182924, dtype=float32), 'loss_cross_entropy': Array(0.18745837, dtype=float32)}


  4%|▎         | 36999/1000000 [1:41:48<26:37:11, 10.05it/s]

{'loss': Array(0.21400571, dtype=float32), 'loss_cross_entropy': Array(0.1983253, dtype=float32)}


  4%|▎         | 37009/1000000 [1:41:56<88:51:31,  3.01it/s] 

{'loss': Array(0.20993958, dtype=float32), 'loss_cross_entropy': Array(0.19469391, dtype=float32)}


  4%|▎         | 37019/1000000 [1:41:58<40:26:57,  6.61it/s]

{'loss': Array(0.19132707, dtype=float32), 'loss_cross_entropy': Array(0.17780088, dtype=float32)}


  4%|▎         | 37029/1000000 [1:41:59<28:32:37,  9.37it/s]

{'loss': Array(0.19094333, dtype=float32), 'loss_cross_entropy': Array(0.17739938, dtype=float32)}


  4%|▎         | 37039/1000000 [1:42:01<35:46:02,  7.48it/s]

{'loss': Array(0.18258093, dtype=float32), 'loss_cross_entropy': Array(0.16791245, dtype=float32)}


  4%|▎         | 37049/1000000 [1:42:02<27:17:17,  9.80it/s]

{'loss': Array(0.18176125, dtype=float32), 'loss_cross_entropy': Array(0.16802852, dtype=float32)}


  4%|▎         | 37059/1000000 [1:42:04<40:00:22,  6.69it/s]

{'loss': Array(0.18094848, dtype=float32), 'loss_cross_entropy': Array(0.16809471, dtype=float32)}


  4%|▎         | 37069/1000000 [1:42:05<27:00:42,  9.90it/s]

{'loss': Array(0.18872803, dtype=float32), 'loss_cross_entropy': Array(0.17404425, dtype=float32)}


  4%|▎         | 37079/1000000 [1:42:07<24:52:40, 10.75it/s]

{'loss': Array(0.17240606, dtype=float32), 'loss_cross_entropy': Array(0.16020368, dtype=float32)}


  4%|▎         | 37089/1000000 [1:42:08<31:33:28,  8.48it/s]

{'loss': Array(0.19104987, dtype=float32), 'loss_cross_entropy': Array(0.17701316, dtype=float32)}


  4%|▎         | 37099/1000000 [1:42:10<25:34:36, 10.46it/s]

{'loss': Array(0.18191986, dtype=float32), 'loss_cross_entropy': Array(0.16803446, dtype=float32)}


  4%|▎         | 37109/1000000 [1:42:11<35:08:12,  7.61it/s]

{'loss': Array(0.18645974, dtype=float32), 'loss_cross_entropy': Array(0.17292273, dtype=float32)}


  4%|▎         | 37119/1000000 [1:42:13<26:32:02, 10.08it/s]

{'loss': Array(0.22062483, dtype=float32), 'loss_cross_entropy': Array(0.20390971, dtype=float32)}


  4%|▎         | 37129/1000000 [1:42:14<25:12:58, 10.61it/s]

{'loss': Array(0.20598474, dtype=float32), 'loss_cross_entropy': Array(0.19107676, dtype=float32)}


  4%|▎         | 37139/1000000 [1:42:16<28:12:56,  9.48it/s]

{'loss': Array(0.19046974, dtype=float32), 'loss_cross_entropy': Array(0.17580122, dtype=float32)}


  4%|▎         | 37149/1000000 [1:42:17<25:55:24, 10.32it/s]

{'loss': Array(0.18992136, dtype=float32), 'loss_cross_entropy': Array(0.17661881, dtype=float32)}


  4%|▎         | 37159/1000000 [1:42:19<38:58:53,  6.86it/s]

{'loss': Array(0.18437089, dtype=float32), 'loss_cross_entropy': Array(0.17071925, dtype=float32)}


  4%|▎         | 37169/1000000 [1:42:20<26:57:43,  9.92it/s]

{'loss': Array(0.20999312, dtype=float32), 'loss_cross_entropy': Array(0.1951555, dtype=float32)}


  4%|▎         | 37179/1000000 [1:42:22<25:16:04, 10.58it/s]

{'loss': Array(0.20954548, dtype=float32), 'loss_cross_entropy': Array(0.19530661, dtype=float32)}


  4%|▎         | 37189/1000000 [1:42:23<32:02:18,  8.35it/s]

{'loss': Array(0.18726279, dtype=float32), 'loss_cross_entropy': Array(0.17361715, dtype=float32)}


  4%|▎         | 37199/1000000 [1:42:25<26:18:33, 10.17it/s]

{'loss': Array(0.19026391, dtype=float32), 'loss_cross_entropy': Array(0.17589447, dtype=float32)}


  4%|▎         | 37209/1000000 [1:42:26<34:58:38,  7.65it/s]

{'loss': Array(0.2163025, dtype=float32), 'loss_cross_entropy': Array(0.20115244, dtype=float32)}


  4%|▎         | 37219/1000000 [1:42:28<25:59:13, 10.29it/s]

{'loss': Array(0.17166865, dtype=float32), 'loss_cross_entropy': Array(0.15908103, dtype=float32)}


  4%|▎         | 37229/1000000 [1:42:29<35:04:22,  7.63it/s]

{'loss': Array(0.20624526, dtype=float32), 'loss_cross_entropy': Array(0.19147243, dtype=float32)}


  4%|▎         | 37239/1000000 [1:42:31<26:48:06,  9.98it/s]

{'loss': Array(0.2017393, dtype=float32), 'loss_cross_entropy': Array(0.18802737, dtype=float32)}


  4%|▎         | 37249/1000000 [1:42:32<24:41:11, 10.83it/s]

{'loss': Array(0.20159984, dtype=float32), 'loss_cross_entropy': Array(0.18608388, dtype=float32)}


  4%|▎         | 37259/1000000 [1:42:34<29:07:47,  9.18it/s]

{'loss': Array(0.18191198, dtype=float32), 'loss_cross_entropy': Array(0.168582, dtype=float32)}


  4%|▎         | 37269/1000000 [1:42:35<25:06:48, 10.65it/s]

{'loss': Array(0.19404317, dtype=float32), 'loss_cross_entropy': Array(0.17992815, dtype=float32)}


  4%|▎         | 37279/1000000 [1:42:37<31:25:06,  8.51it/s]

{'loss': Array(0.20942794, dtype=float32), 'loss_cross_entropy': Array(0.19444422, dtype=float32)}


  4%|▎         | 37289/1000000 [1:42:38<25:45:31, 10.38it/s]

{'loss': Array(0.17868826, dtype=float32), 'loss_cross_entropy': Array(0.16530803, dtype=float32)}


  4%|▎         | 37299/1000000 [1:42:40<39:19:01,  6.80it/s]

{'loss': Array(0.19469628, dtype=float32), 'loss_cross_entropy': Array(0.18119366, dtype=float32)}


  4%|▎         | 37309/1000000 [1:42:41<28:16:25,  9.46it/s]

{'loss': Array(0.20168726, dtype=float32), 'loss_cross_entropy': Array(0.18706787, dtype=float32)}


  4%|▎         | 37319/1000000 [1:42:43<39:14:57,  6.81it/s]

{'loss': Array(0.17797609, dtype=float32), 'loss_cross_entropy': Array(0.16496284, dtype=float32)}


  4%|▎         | 37329/1000000 [1:42:44<27:20:37,  9.78it/s]

{'loss': Array(0.18789618, dtype=float32), 'loss_cross_entropy': Array(0.17497073, dtype=float32)}


  4%|▎         | 37339/1000000 [1:42:46<25:23:43, 10.53it/s]

{'loss': Array(0.20810333, dtype=float32), 'loss_cross_entropy': Array(0.19362204, dtype=float32)}


  4%|▎         | 37349/1000000 [1:42:47<34:52:07,  7.67it/s]

{'loss': Array(0.18052135, dtype=float32), 'loss_cross_entropy': Array(0.16699217, dtype=float32)}


  4%|▎         | 37359/1000000 [1:42:49<26:31:25, 10.08it/s]

{'loss': Array(0.20887749, dtype=float32), 'loss_cross_entropy': Array(0.1934086, dtype=float32)}


  4%|▎         | 37369/1000000 [1:42:50<39:11:06,  6.82it/s]

{'loss': Array(0.18610625, dtype=float32), 'loss_cross_entropy': Array(0.1725411, dtype=float32)}


  4%|▎         | 37379/1000000 [1:42:52<26:57:20,  9.92it/s]

{'loss': Array(0.20110933, dtype=float32), 'loss_cross_entropy': Array(0.18809126, dtype=float32)}


  4%|▎         | 37389/1000000 [1:42:53<25:08:49, 10.63it/s]

{'loss': Array(0.17273057, dtype=float32), 'loss_cross_entropy': Array(0.16024868, dtype=float32)}


  4%|▎         | 37399/1000000 [1:42:55<28:04:44,  9.52it/s]

{'loss': Array(0.1699621, dtype=float32), 'loss_cross_entropy': Array(0.15721038, dtype=float32)}


  4%|▎         | 37409/1000000 [1:42:56<25:16:32, 10.58it/s]

{'loss': Array(0.20559683, dtype=float32), 'loss_cross_entropy': Array(0.19081712, dtype=float32)}


  4%|▎         | 37419/1000000 [1:42:58<29:04:39,  9.20it/s]

{'loss': Array(0.18893841, dtype=float32), 'loss_cross_entropy': Array(0.175428, dtype=float32)}


  4%|▎         | 37429/1000000 [1:42:59<26:29:06, 10.10it/s]

{'loss': Array(0.18213487, dtype=float32), 'loss_cross_entropy': Array(0.16863577, dtype=float32)}


  4%|▎         | 37439/1000000 [1:43:01<40:26:51,  6.61it/s]

{'loss': Array(0.18962917, dtype=float32), 'loss_cross_entropy': Array(0.17538826, dtype=float32)}


  4%|▎         | 37449/1000000 [1:43:02<28:48:55,  9.28it/s]

{'loss': Array(0.19301863, dtype=float32), 'loss_cross_entropy': Array(0.17913388, dtype=float32)}


  4%|▎         | 37459/1000000 [1:43:04<25:10:31, 10.62it/s]

{'loss': Array(0.1844384, dtype=float32), 'loss_cross_entropy': Array(0.1714447, dtype=float32)}


  4%|▎         | 37469/1000000 [1:43:05<31:53:24,  8.38it/s]

{'loss': Array(0.1941129, dtype=float32), 'loss_cross_entropy': Array(0.18020202, dtype=float32)}


  4%|▎         | 37479/1000000 [1:43:07<26:19:50, 10.15it/s]

{'loss': Array(0.1776374, dtype=float32), 'loss_cross_entropy': Array(0.16424799, dtype=float32)}


  4%|▎         | 37489/1000000 [1:43:09<39:08:47,  6.83it/s]

{'loss': Array(0.18822752, dtype=float32), 'loss_cross_entropy': Array(0.17494993, dtype=float32)}


  4%|▎         | 37499/1000000 [1:43:10<27:09:05,  9.85it/s]

{'loss': Array(0.1941027, dtype=float32), 'loss_cross_entropy': Array(0.17984633, dtype=float32)}


  4%|▍         | 37509/1000000 [1:43:17<84:54:40,  3.15it/s] 

{'loss': Array(0.19367397, dtype=float32), 'loss_cross_entropy': Array(0.1797978, dtype=float32)}


  4%|▍         | 37519/1000000 [1:43:19<39:27:12,  6.78it/s]

{'loss': Array(0.18964563, dtype=float32), 'loss_cross_entropy': Array(0.17591943, dtype=float32)}


  4%|▍         | 37529/1000000 [1:43:20<26:57:32,  9.92it/s]

{'loss': Array(0.17957097, dtype=float32), 'loss_cross_entropy': Array(0.16664639, dtype=float32)}


  4%|▍         | 37539/1000000 [1:43:22<32:14:25,  8.29it/s]

{'loss': Array(0.20448112, dtype=float32), 'loss_cross_entropy': Array(0.18918173, dtype=float32)}


  4%|▍         | 37549/1000000 [1:43:23<25:51:53, 10.34it/s]

{'loss': Array(0.17108661, dtype=float32), 'loss_cross_entropy': Array(0.15799049, dtype=float32)}


  4%|▍         | 37559/1000000 [1:43:25<40:29:19,  6.60it/s]

{'loss': Array(0.18817893, dtype=float32), 'loss_cross_entropy': Array(0.17412448, dtype=float32)}


  4%|▍         | 37569/1000000 [1:43:27<27:33:05,  9.70it/s]

{'loss': Array(0.16658658, dtype=float32), 'loss_cross_entropy': Array(0.15390809, dtype=float32)}


  4%|▍         | 37579/1000000 [1:43:28<24:57:47, 10.71it/s]

{'loss': Array(0.22430237, dtype=float32), 'loss_cross_entropy': Array(0.2088248, dtype=float32)}


  4%|▍         | 37589/1000000 [1:43:30<27:55:21,  9.57it/s]

{'loss': Array(0.18174948, dtype=float32), 'loss_cross_entropy': Array(0.16817759, dtype=float32)}


  4%|▍         | 37599/1000000 [1:43:31<24:51:35, 10.75it/s]

{'loss': Array(0.17148542, dtype=float32), 'loss_cross_entropy': Array(0.15786429, dtype=float32)}


  4%|▍         | 37609/1000000 [1:43:33<39:17:17,  6.80it/s]

{'loss': Array(0.19362938, dtype=float32), 'loss_cross_entropy': Array(0.1796069, dtype=float32)}


  4%|▍         | 37619/1000000 [1:43:34<27:33:33,  9.70it/s]

{'loss': Array(0.19733502, dtype=float32), 'loss_cross_entropy': Array(0.18364392, dtype=float32)}


  4%|▍         | 37629/1000000 [1:43:35<25:15:02, 10.59it/s]

{'loss': Array(0.19529282, dtype=float32), 'loss_cross_entropy': Array(0.18151353, dtype=float32)}


  4%|▍         | 37639/1000000 [1:43:37<29:22:42,  9.10it/s]

{'loss': Array(0.19145048, dtype=float32), 'loss_cross_entropy': Array(0.17717074, dtype=float32)}


  4%|▍         | 37649/1000000 [1:43:38<26:23:45, 10.13it/s]

{'loss': Array(0.18571846, dtype=float32), 'loss_cross_entropy': Array(0.17172568, dtype=float32)}


  4%|▍         | 37659/1000000 [1:43:40<35:30:59,  7.53it/s]

{'loss': Array(0.18242799, dtype=float32), 'loss_cross_entropy': Array(0.1700154, dtype=float32)}


  4%|▍         | 37669/1000000 [1:43:42<26:18:24, 10.16it/s]

{'loss': Array(0.19710234, dtype=float32), 'loss_cross_entropy': Array(0.18315282, dtype=float32)}


  4%|▍         | 37679/1000000 [1:43:43<38:53:19,  6.87it/s]

{'loss': Array(0.21749902, dtype=float32), 'loss_cross_entropy': Array(0.20317884, dtype=float32)}


  4%|▍         | 37689/1000000 [1:43:45<26:47:39,  9.98it/s]

{'loss': Array(0.20233849, dtype=float32), 'loss_cross_entropy': Array(0.1879901, dtype=float32)}


  4%|▍         | 37699/1000000 [1:43:46<24:59:01, 10.70it/s]

{'loss': Array(0.1822476, dtype=float32), 'loss_cross_entropy': Array(0.16910367, dtype=float32)}


  4%|▍         | 37709/1000000 [1:43:48<31:31:49,  8.48it/s]

{'loss': Array(0.2012825, dtype=float32), 'loss_cross_entropy': Array(0.18655965, dtype=float32)}


  4%|▍         | 37719/1000000 [1:43:49<26:03:21, 10.26it/s]

{'loss': Array(0.19519998, dtype=float32), 'loss_cross_entropy': Array(0.1813128, dtype=float32)}


  4%|▍         | 37729/1000000 [1:43:51<34:53:50,  7.66it/s]

{'loss': Array(0.19138287, dtype=float32), 'loss_cross_entropy': Array(0.17764106, dtype=float32)}


  4%|▍         | 37739/1000000 [1:43:52<26:26:27, 10.11it/s]

{'loss': Array(0.17715742, dtype=float32), 'loss_cross_entropy': Array(0.16447024, dtype=float32)}


  4%|▍         | 37749/1000000 [1:43:53<25:21:21, 10.54it/s]

{'loss': Array(0.18123794, dtype=float32), 'loss_cross_entropy': Array(0.16843833, dtype=float32)}


  4%|▍         | 37759/1000000 [1:43:55<28:48:49,  9.28it/s]

{'loss': Array(0.19811139, dtype=float32), 'loss_cross_entropy': Array(0.18363295, dtype=float32)}


  4%|▍         | 37769/1000000 [1:43:57<25:48:16, 10.36it/s]

{'loss': Array(0.18891717, dtype=float32), 'loss_cross_entropy': Array(0.17543738, dtype=float32)}


  4%|▍         | 37779/1000000 [1:43:58<39:04:21,  6.84it/s]

{'loss': Array(0.18750964, dtype=float32), 'loss_cross_entropy': Array(0.17392252, dtype=float32)}


  4%|▍         | 37789/1000000 [1:44:00<27:35:25,  9.69it/s]

{'loss': Array(0.180122, dtype=float32), 'loss_cross_entropy': Array(0.16713502, dtype=float32)}


  4%|▍         | 37799/1000000 [1:44:01<25:18:32, 10.56it/s]

{'loss': Array(0.18853413, dtype=float32), 'loss_cross_entropy': Array(0.17526321, dtype=float32)}


  4%|▍         | 37809/1000000 [1:44:03<32:45:25,  8.16it/s]

{'loss': Array(0.16933477, dtype=float32), 'loss_cross_entropy': Array(0.15622054, dtype=float32)}


  4%|▍         | 37819/1000000 [1:44:04<26:26:20, 10.11it/s]

{'loss': Array(0.17531352, dtype=float32), 'loss_cross_entropy': Array(0.1627455, dtype=float32)}


  4%|▍         | 37829/1000000 [1:44:06<35:26:20,  7.54it/s]

{'loss': Array(0.19597125, dtype=float32), 'loss_cross_entropy': Array(0.18142393, dtype=float32)}


  4%|▍         | 37839/1000000 [1:44:07<27:06:44,  9.86it/s]

{'loss': Array(0.20110829, dtype=float32), 'loss_cross_entropy': Array(0.18648203, dtype=float32)}


  4%|▍         | 37849/1000000 [1:44:09<35:25:15,  7.55it/s]

{'loss': Array(0.20614146, dtype=float32), 'loss_cross_entropy': Array(0.19178544, dtype=float32)}


  4%|▍         | 37859/1000000 [1:44:10<27:04:26,  9.87it/s]

{'loss': Array(0.17651162, dtype=float32), 'loss_cross_entropy': Array(0.16380447, dtype=float32)}


  4%|▍         | 37869/1000000 [1:44:12<25:38:53, 10.42it/s]

{'loss': Array(0.16878168, dtype=float32), 'loss_cross_entropy': Array(0.15586792, dtype=float32)}


  4%|▍         | 37879/1000000 [1:44:13<29:39:53,  9.01it/s]

{'loss': Array(0.19420092, dtype=float32), 'loss_cross_entropy': Array(0.18061066, dtype=float32)}


  4%|▍         | 37889/1000000 [1:44:15<25:23:07, 10.53it/s]

{'loss': Array(0.18203895, dtype=float32), 'loss_cross_entropy': Array(0.16859458, dtype=float32)}


  4%|▍         | 37899/1000000 [1:44:17<32:50:23,  8.14it/s]

{'loss': Array(0.1854705, dtype=float32), 'loss_cross_entropy': Array(0.17109261, dtype=float32)}


  4%|▍         | 37909/1000000 [1:44:18<26:15:43, 10.18it/s]

{'loss': Array(0.19131202, dtype=float32), 'loss_cross_entropy': Array(0.17730568, dtype=float32)}


  4%|▍         | 37919/1000000 [1:44:20<39:01:36,  6.85it/s]

{'loss': Array(0.17400983, dtype=float32), 'loss_cross_entropy': Array(0.16108425, dtype=float32)}


  4%|▍         | 37929/1000000 [1:44:21<26:59:53,  9.90it/s]

{'loss': Array(0.18148492, dtype=float32), 'loss_cross_entropy': Array(0.16906494, dtype=float32)}


  4%|▍         | 37939/1000000 [1:44:23<39:12:52,  6.81it/s]

{'loss': Array(0.19620024, dtype=float32), 'loss_cross_entropy': Array(0.18229115, dtype=float32)}


  4%|▍         | 37949/1000000 [1:44:24<26:46:10,  9.98it/s]

{'loss': Array(0.18013175, dtype=float32), 'loss_cross_entropy': Array(0.16647488, dtype=float32)}


  4%|▍         | 37959/1000000 [1:44:25<24:56:10, 10.72it/s]

{'loss': Array(0.18795156, dtype=float32), 'loss_cross_entropy': Array(0.17410587, dtype=float32)}


  4%|▍         | 37969/1000000 [1:44:27<34:42:31,  7.70it/s]

{'loss': Array(0.19759059, dtype=float32), 'loss_cross_entropy': Array(0.18356033, dtype=float32)}


  4%|▍         | 37979/1000000 [1:44:28<26:28:00, 10.10it/s]

{'loss': Array(0.1940377, dtype=float32), 'loss_cross_entropy': Array(0.17949314, dtype=float32)}


  4%|▍         | 37989/1000000 [1:44:30<39:38:51,  6.74it/s]

{'loss': Array(0.18390034, dtype=float32), 'loss_cross_entropy': Array(0.17067026, dtype=float32)}


  4%|▍         | 37999/1000000 [1:44:31<27:20:29,  9.77it/s]

{'loss': Array(0.21281087, dtype=float32), 'loss_cross_entropy': Array(0.1970223, dtype=float32)}


  4%|▍         | 38009/1000000 [1:44:40<89:43:07,  2.98it/s] 

{'loss': Array(0.20361304, dtype=float32), 'loss_cross_entropy': Array(0.18892653, dtype=float32)}


  4%|▍         | 38019/1000000 [1:44:41<39:31:16,  6.76it/s] 

{'loss': Array(0.17523468, dtype=float32), 'loss_cross_entropy': Array(0.16223197, dtype=float32)}


  4%|▍         | 38029/1000000 [1:44:43<27:45:40,  9.63it/s]

{'loss': Array(0.19617043, dtype=float32), 'loss_cross_entropy': Array(0.18206431, dtype=float32)}


  4%|▍         | 38039/1000000 [1:44:44<28:54:08,  9.25it/s]

{'loss': Array(0.18619536, dtype=float32), 'loss_cross_entropy': Array(0.17221616, dtype=float32)}


  4%|▍         | 38049/1000000 [1:44:46<24:57:49, 10.70it/s]

{'loss': Array(0.19653587, dtype=float32), 'loss_cross_entropy': Array(0.18256545, dtype=float32)}


  4%|▍         | 38059/1000000 [1:44:47<39:04:42,  6.84it/s]

{'loss': Array(0.2180671, dtype=float32), 'loss_cross_entropy': Array(0.20310168, dtype=float32)}


  4%|▍         | 38069/1000000 [1:44:49<26:41:22, 10.01it/s]

{'loss': Array(0.19106197, dtype=float32), 'loss_cross_entropy': Array(0.17763881, dtype=float32)}


  4%|▍         | 38079/1000000 [1:44:50<24:37:07, 10.85it/s]

{'loss': Array(0.18202148, dtype=float32), 'loss_cross_entropy': Array(0.1684257, dtype=float32)}


  4%|▍         | 38089/1000000 [1:44:52<31:22:38,  8.52it/s]

{'loss': Array(0.18414009, dtype=float32), 'loss_cross_entropy': Array(0.1713059, dtype=float32)}


  4%|▍         | 38099/1000000 [1:44:53<26:03:57, 10.25it/s]

{'loss': Array(0.19312723, dtype=float32), 'loss_cross_entropy': Array(0.17940766, dtype=float32)}


  4%|▍         | 38109/1000000 [1:44:55<40:42:07,  6.56it/s]

{'loss': Array(0.19132186, dtype=float32), 'loss_cross_entropy': Array(0.17667097, dtype=float32)}


  4%|▍         | 38119/1000000 [1:44:56<27:17:50,  9.79it/s]

{'loss': Array(0.1925842, dtype=float32), 'loss_cross_entropy': Array(0.17867999, dtype=float32)}


  4%|▍         | 38129/1000000 [1:44:58<25:04:55, 10.65it/s]

{'loss': Array(0.16918223, dtype=float32), 'loss_cross_entropy': Array(0.1568969, dtype=float32)}


  4%|▍         | 38139/1000000 [1:44:59<30:59:42,  8.62it/s]

{'loss': Array(0.19562812, dtype=float32), 'loss_cross_entropy': Array(0.1818562, dtype=float32)}


  4%|▍         | 38149/1000000 [1:45:01<25:58:54, 10.28it/s]

{'loss': Array(0.20574987, dtype=float32), 'loss_cross_entropy': Array(0.19135635, dtype=float32)}


  4%|▍         | 38159/1000000 [1:45:03<35:39:23,  7.49it/s]

{'loss': Array(0.20408423, dtype=float32), 'loss_cross_entropy': Array(0.18937685, dtype=float32)}


  4%|▍         | 38169/1000000 [1:45:04<26:41:42, 10.01it/s]

{'loss': Array(0.19192295, dtype=float32), 'loss_cross_entropy': Array(0.17821066, dtype=float32)}


  4%|▍         | 38179/1000000 [1:45:05<25:29:51, 10.48it/s]

{'loss': Array(0.18310443, dtype=float32), 'loss_cross_entropy': Array(0.16961479, dtype=float32)}


  4%|▍         | 38189/1000000 [1:45:07<31:59:58,  8.35it/s]

{'loss': Array(0.19231144, dtype=float32), 'loss_cross_entropy': Array(0.17874269, dtype=float32)}


  4%|▍         | 38199/1000000 [1:45:08<26:29:50, 10.08it/s]

{'loss': Array(0.16672361, dtype=float32), 'loss_cross_entropy': Array(0.15433274, dtype=float32)}


  4%|▍         | 38209/1000000 [1:45:10<24:51:35, 10.75it/s]

{'loss': Array(0.18606772, dtype=float32), 'loss_cross_entropy': Array(0.17283191, dtype=float32)}


  4%|▍         | 38219/1000000 [1:45:11<28:30:41,  9.37it/s]

{'loss': Array(0.21252178, dtype=float32), 'loss_cross_entropy': Array(0.19759843, dtype=float32)}


  4%|▍         | 38229/1000000 [1:45:13<25:58:03, 10.29it/s]

{'loss': Array(0.17657553, dtype=float32), 'loss_cross_entropy': Array(0.1639511, dtype=float32)}


  4%|▍         | 38239/1000000 [1:45:15<32:16:13,  8.28it/s]

{'loss': Array(0.18522242, dtype=float32), 'loss_cross_entropy': Array(0.17187469, dtype=float32)}


  4%|▍         | 38249/1000000 [1:45:16<26:52:30,  9.94it/s]

{'loss': Array(0.1920865, dtype=float32), 'loss_cross_entropy': Array(0.17998475, dtype=float32)}


  4%|▍         | 38259/1000000 [1:45:18<39:14:50,  6.81it/s]

{'loss': Array(0.19081585, dtype=float32), 'loss_cross_entropy': Array(0.17759798, dtype=float32)}


  4%|▍         | 38269/1000000 [1:45:19<26:43:08, 10.00it/s]

{'loss': Array(0.18873303, dtype=float32), 'loss_cross_entropy': Array(0.1751138, dtype=float32)}


  4%|▍         | 38279/1000000 [1:45:21<40:15:23,  6.64it/s]

{'loss': Array(0.18354182, dtype=float32), 'loss_cross_entropy': Array(0.17015624, dtype=float32)}


  4%|▍         | 38289/1000000 [1:45:22<27:29:16,  9.72it/s]

{'loss': Array(0.19454835, dtype=float32), 'loss_cross_entropy': Array(0.18057267, dtype=float32)}


  4%|▍         | 38299/1000000 [1:45:24<25:03:01, 10.66it/s]

{'loss': Array(0.1806125, dtype=float32), 'loss_cross_entropy': Array(0.16842493, dtype=float32)}


  4%|▍         | 38309/1000000 [1:45:25<35:09:46,  7.60it/s]

{'loss': Array(0.18656869, dtype=float32), 'loss_cross_entropy': Array(0.17300284, dtype=float32)}


  4%|▍         | 38319/1000000 [1:45:27<26:12:00, 10.20it/s]

{'loss': Array(0.18927574, dtype=float32), 'loss_cross_entropy': Array(0.1761365, dtype=float32)}


  4%|▍         | 38329/1000000 [1:45:28<39:09:53,  6.82it/s]

{'loss': Array(0.16846465, dtype=float32), 'loss_cross_entropy': Array(0.15590015, dtype=float32)}


  4%|▍         | 38339/1000000 [1:45:30<26:52:25,  9.94it/s]

{'loss': Array(0.20086585, dtype=float32), 'loss_cross_entropy': Array(0.1865923, dtype=float32)}


  4%|▍         | 38349/1000000 [1:45:31<24:37:56, 10.84it/s]

{'loss': Array(0.19312133, dtype=float32), 'loss_cross_entropy': Array(0.1786573, dtype=float32)}


  4%|▍         | 38359/1000000 [1:45:33<27:49:35,  9.60it/s]

{'loss': Array(0.19452731, dtype=float32), 'loss_cross_entropy': Array(0.18032333, dtype=float32)}


  4%|▍         | 38369/1000000 [1:45:34<24:44:01, 10.80it/s]

{'loss': Array(0.19645014, dtype=float32), 'loss_cross_entropy': Array(0.18207793, dtype=float32)}


  4%|▍         | 38379/1000000 [1:45:36<28:06:27,  9.50it/s]

{'loss': Array(0.20095284, dtype=float32), 'loss_cross_entropy': Array(0.1880059, dtype=float32)}


  4%|▍         | 38389/1000000 [1:45:37<25:07:17, 10.63it/s]

{'loss': Array(0.19190565, dtype=float32), 'loss_cross_entropy': Array(0.17783703, dtype=float32)}


  4%|▍         | 38399/1000000 [1:45:39<38:56:55,  6.86it/s]

{'loss': Array(0.1873554, dtype=float32), 'loss_cross_entropy': Array(0.17390019, dtype=float32)}


  4%|▍         | 38409/1000000 [1:45:40<27:00:05,  9.89it/s]

{'loss': Array(0.18184344, dtype=float32), 'loss_cross_entropy': Array(0.1679025, dtype=float32)}


  4%|▍         | 38419/1000000 [1:45:41<25:27:05, 10.49it/s]

{'loss': Array(0.17573011, dtype=float32), 'loss_cross_entropy': Array(0.16247891, dtype=float32)}


  4%|▍         | 38429/1000000 [1:45:43<31:42:55,  8.42it/s]

{'loss': Array(0.19846879, dtype=float32), 'loss_cross_entropy': Array(0.18424813, dtype=float32)}


  4%|▍         | 38439/1000000 [1:45:45<26:40:42, 10.01it/s]

{'loss': Array(0.18130128, dtype=float32), 'loss_cross_entropy': Array(0.16768935, dtype=float32)}


  4%|▍         | 38449/1000000 [1:45:46<39:07:21,  6.83it/s]

{'loss': Array(0.16885585, dtype=float32), 'loss_cross_entropy': Array(0.15693934, dtype=float32)}


  4%|▍         | 38459/1000000 [1:45:48<27:11:53,  9.82it/s]

{'loss': Array(0.19411415, dtype=float32), 'loss_cross_entropy': Array(0.17965378, dtype=float32)}


  4%|▍         | 38469/1000000 [1:45:49<24:57:38, 10.70it/s]

{'loss': Array(0.19190797, dtype=float32), 'loss_cross_entropy': Array(0.17847703, dtype=float32)}


  4%|▍         | 38479/1000000 [1:45:51<30:21:30,  8.80it/s]

{'loss': Array(0.18280928, dtype=float32), 'loss_cross_entropy': Array(0.16901116, dtype=float32)}


  4%|▍         | 38489/1000000 [1:45:52<26:02:55, 10.25it/s]

{'loss': Array(0.19456728, dtype=float32), 'loss_cross_entropy': Array(0.18060027, dtype=float32)}


  4%|▍         | 38499/1000000 [1:45:54<35:20:58,  7.56it/s]

{'loss': Array(0.1979162, dtype=float32), 'loss_cross_entropy': Array(0.18369994, dtype=float32)}


  4%|▍         | 38509/1000000 [1:46:02<91:27:42,  2.92it/s] 

{'loss': Array(0.1715027, dtype=float32), 'loss_cross_entropy': Array(0.15852793, dtype=float32)}


  4%|▍         | 38519/1000000 [1:46:04<46:55:36,  5.69it/s]

{'loss': Array(0.20374547, dtype=float32), 'loss_cross_entropy': Array(0.1889401, dtype=float32)}


  4%|▍         | 38529/1000000 [1:46:05<28:27:20,  9.39it/s]

{'loss': Array(0.18250707, dtype=float32), 'loss_cross_entropy': Array(0.16873334, dtype=float32)}


  4%|▍         | 38539/1000000 [1:46:06<25:18:15, 10.55it/s]

{'loss': Array(0.18642388, dtype=float32), 'loss_cross_entropy': Array(0.1726229, dtype=float32)}


  4%|▍         | 38549/1000000 [1:46:08<32:08:44,  8.31it/s]

{'loss': Array(0.18681212, dtype=float32), 'loss_cross_entropy': Array(0.1735881, dtype=float32)}


  4%|▍         | 38559/1000000 [1:46:09<26:05:54, 10.23it/s]

{'loss': Array(0.19903411, dtype=float32), 'loss_cross_entropy': Array(0.18525569, dtype=float32)}


  4%|▍         | 38569/1000000 [1:46:11<24:46:55, 10.78it/s]

{'loss': Array(0.1867151, dtype=float32), 'loss_cross_entropy': Array(0.17318532, dtype=float32)}


  4%|▍         | 38579/1000000 [1:46:12<28:28:08,  9.38it/s]

{'loss': Array(0.19764827, dtype=float32), 'loss_cross_entropy': Array(0.18343394, dtype=float32)}


  4%|▍         | 38589/1000000 [1:46:14<25:25:43, 10.50it/s]

{'loss': Array(0.19321083, dtype=float32), 'loss_cross_entropy': Array(0.17931223, dtype=float32)}


  4%|▍         | 38599/1000000 [1:46:16<31:57:33,  8.36it/s]

{'loss': Array(0.19035597, dtype=float32), 'loss_cross_entropy': Array(0.1760131, dtype=float32)}


  4%|▍         | 38609/1000000 [1:46:17<25:48:51, 10.35it/s]

{'loss': Array(0.18530309, dtype=float32), 'loss_cross_entropy': Array(0.17269523, dtype=float32)}


  4%|▍         | 38619/1000000 [1:46:19<34:47:13,  7.68it/s]

{'loss': Array(0.20280953, dtype=float32), 'loss_cross_entropy': Array(0.187957, dtype=float32)}


  4%|▍         | 38629/1000000 [1:46:20<26:12:46, 10.19it/s]

{'loss': Array(0.19473529, dtype=float32), 'loss_cross_entropy': Array(0.18132898, dtype=float32)}


  4%|▍         | 38639/1000000 [1:46:21<24:51:38, 10.74it/s]

{'loss': Array(0.19191031, dtype=float32), 'loss_cross_entropy': Array(0.17809099, dtype=float32)}


  4%|▍         | 38649/1000000 [1:46:23<31:15:39,  8.54it/s]

{'loss': Array(0.19084461, dtype=float32), 'loss_cross_entropy': Array(0.17758429, dtype=float32)}


  4%|▍         | 38659/1000000 [1:46:24<26:01:26, 10.26it/s]

{'loss': Array(0.17491353, dtype=float32), 'loss_cross_entropy': Array(0.16197953, dtype=float32)}


  4%|▍         | 38669/1000000 [1:46:26<25:04:18, 10.65it/s]

{'loss': Array(0.1873114, dtype=float32), 'loss_cross_entropy': Array(0.17375286, dtype=float32)}


  4%|▍         | 38679/1000000 [1:46:27<28:49:01,  9.27it/s]

{'loss': Array(0.18263566, dtype=float32), 'loss_cross_entropy': Array(0.16963123, dtype=float32)}


  4%|▍         | 38689/1000000 [1:46:29<25:14:34, 10.58it/s]

{'loss': Array(0.20136006, dtype=float32), 'loss_cross_entropy': Array(0.18658713, dtype=float32)}


  4%|▍         | 38699/1000000 [1:46:31<31:59:54,  8.35it/s]

{'loss': Array(0.16181211, dtype=float32), 'loss_cross_entropy': Array(0.14908086, dtype=float32)}


  4%|▍         | 38709/1000000 [1:46:32<25:42:53, 10.38it/s]

{'loss': Array(0.1807531, dtype=float32), 'loss_cross_entropy': Array(0.16803427, dtype=float32)}


  4%|▍         | 38719/1000000 [1:46:34<39:19:40,  6.79it/s]

{'loss': Array(0.1915304, dtype=float32), 'loss_cross_entropy': Array(0.17853037, dtype=float32)}


  4%|▍         | 38729/1000000 [1:46:35<27:21:38,  9.76it/s]

{'loss': Array(0.18296324, dtype=float32), 'loss_cross_entropy': Array(0.16981846, dtype=float32)}


  4%|▍         | 38739/1000000 [1:46:36<26:12:09, 10.19it/s]

{'loss': Array(0.16196787, dtype=float32), 'loss_cross_entropy': Array(0.15044355, dtype=float32)}


  4%|▍         | 38749/1000000 [1:46:38<35:21:41,  7.55it/s]

{'loss': Array(0.20444103, dtype=float32), 'loss_cross_entropy': Array(0.19055407, dtype=float32)}


  4%|▍         | 38759/1000000 [1:46:39<26:32:16, 10.06it/s]

{'loss': Array(0.20133834, dtype=float32), 'loss_cross_entropy': Array(0.18698005, dtype=float32)}


  4%|▍         | 38769/1000000 [1:46:41<25:51:14, 10.33it/s]

{'loss': Array(0.1942605, dtype=float32), 'loss_cross_entropy': Array(0.1801949, dtype=float32)}


  4%|▍         | 38779/1000000 [1:46:43<29:42:56,  8.99it/s]

{'loss': Array(0.19227958, dtype=float32), 'loss_cross_entropy': Array(0.17859015, dtype=float32)}


  4%|▍         | 38789/1000000 [1:46:44<25:19:45, 10.54it/s]

{'loss': Array(0.19988692, dtype=float32), 'loss_cross_entropy': Array(0.18553124, dtype=float32)}


  4%|▍         | 38799/1000000 [1:46:46<31:56:41,  8.36it/s]

{'loss': Array(0.19299465, dtype=float32), 'loss_cross_entropy': Array(0.17897175, dtype=float32)}


  4%|▍         | 38809/1000000 [1:46:47<25:42:02, 10.39it/s]

{'loss': Array(0.1846192, dtype=float32), 'loss_cross_entropy': Array(0.17076083, dtype=float32)}


  4%|▍         | 38819/1000000 [1:46:48<25:41:03, 10.40it/s]

{'loss': Array(0.16798179, dtype=float32), 'loss_cross_entropy': Array(0.15534227, dtype=float32)}


  4%|▍         | 38829/1000000 [1:46:50<28:28:39,  9.38it/s]

{'loss': Array(0.20669861, dtype=float32), 'loss_cross_entropy': Array(0.19162835, dtype=float32)}


  4%|▍         | 38839/1000000 [1:46:52<25:55:05, 10.30it/s]

{'loss': Array(0.1933212, dtype=float32), 'loss_cross_entropy': Array(0.1792048, dtype=float32)}


  4%|▍         | 38849/1000000 [1:46:53<35:24:52,  7.54it/s]

{'loss': Array(0.17885928, dtype=float32), 'loss_cross_entropy': Array(0.16457152, dtype=float32)}


  4%|▍         | 38859/1000000 [1:46:55<27:13:32,  9.81it/s]

{'loss': Array(0.17684731, dtype=float32), 'loss_cross_entropy': Array(0.16334508, dtype=float32)}


  4%|▍         | 38869/1000000 [1:46:56<24:56:45, 10.70it/s]

{'loss': Array(0.1928503, dtype=float32), 'loss_cross_entropy': Array(0.17918834, dtype=float32)}


  4%|▍         | 38879/1000000 [1:46:58<28:23:53,  9.40it/s]

{'loss': Array(0.19175155, dtype=float32), 'loss_cross_entropy': Array(0.17798875, dtype=float32)}


  4%|▍         | 38889/1000000 [1:46:59<26:12:59, 10.18it/s]

{'loss': Array(0.18052858, dtype=float32), 'loss_cross_entropy': Array(0.16761294, dtype=float32)}


  4%|▍         | 38899/1000000 [1:47:01<31:08:45,  8.57it/s]

{'loss': Array(0.17523745, dtype=float32), 'loss_cross_entropy': Array(0.16232064, dtype=float32)}


  4%|▍         | 38909/1000000 [1:47:02<26:30:51, 10.07it/s]

{'loss': Array(0.17259783, dtype=float32), 'loss_cross_entropy': Array(0.16048606, dtype=float32)}


  4%|▍         | 38919/1000000 [1:47:04<39:35:02,  6.74it/s]

{'loss': Array(0.17992315, dtype=float32), 'loss_cross_entropy': Array(0.16626474, dtype=float32)}


  4%|▍         | 38929/1000000 [1:47:05<27:18:41,  9.77it/s]

{'loss': Array(0.18568468, dtype=float32), 'loss_cross_entropy': Array(0.17212547, dtype=float32)}


  4%|▍         | 38939/1000000 [1:47:07<25:25:22, 10.50it/s]

{'loss': Array(0.21481477, dtype=float32), 'loss_cross_entropy': Array(0.20025086, dtype=float32)}


  4%|▍         | 38949/1000000 [1:47:09<32:07:13,  8.31it/s]

{'loss': Array(0.19811185, dtype=float32), 'loss_cross_entropy': Array(0.18413736, dtype=float32)}


  4%|▍         | 38959/1000000 [1:47:10<26:42:41,  9.99it/s]

{'loss': Array(0.19283497, dtype=float32), 'loss_cross_entropy': Array(0.17918447, dtype=float32)}


  4%|▍         | 38969/1000000 [1:47:12<34:58:25,  7.63it/s]

{'loss': Array(0.21294561, dtype=float32), 'loss_cross_entropy': Array(0.19850907, dtype=float32)}


  4%|▍         | 38979/1000000 [1:47:13<26:51:01,  9.94it/s]

{'loss': Array(0.1936351, dtype=float32), 'loss_cross_entropy': Array(0.17958872, dtype=float32)}


  4%|▍         | 38989/1000000 [1:47:15<35:32:17,  7.51it/s]

{'loss': Array(0.1937325, dtype=float32), 'loss_cross_entropy': Array(0.17865436, dtype=float32)}


  4%|▍         | 38999/1000000 [1:47:16<26:38:57, 10.02it/s]

{'loss': Array(0.20234194, dtype=float32), 'loss_cross_entropy': Array(0.18710789, dtype=float32)}


  4%|▍         | 39009/1000000 [1:47:24<87:12:42,  3.06it/s] 

{'loss': Array(0.20343988, dtype=float32), 'loss_cross_entropy': Array(0.18913852, dtype=float32)}


  4%|▍         | 39019/1000000 [1:47:26<40:08:14,  6.65it/s]

{'loss': Array(0.17768957, dtype=float32), 'loss_cross_entropy': Array(0.16445608, dtype=float32)}


  4%|▍         | 39029/1000000 [1:47:27<27:07:55,  9.84it/s]

{'loss': Array(0.17808013, dtype=float32), 'loss_cross_entropy': Array(0.16488872, dtype=float32)}


  4%|▍         | 39039/1000000 [1:47:29<34:46:49,  7.67it/s]

{'loss': Array(0.17274195, dtype=float32), 'loss_cross_entropy': Array(0.16002907, dtype=float32)}


  4%|▍         | 39049/1000000 [1:47:30<26:30:52, 10.07it/s]

{'loss': Array(0.17178704, dtype=float32), 'loss_cross_entropy': Array(0.15882461, dtype=float32)}


  4%|▍         | 39059/1000000 [1:47:32<39:26:20,  6.77it/s]

{'loss': Array(0.17328374, dtype=float32), 'loss_cross_entropy': Array(0.1603446, dtype=float32)}


  4%|▍         | 39069/1000000 [1:47:33<27:24:38,  9.74it/s]

{'loss': Array(0.18793735, dtype=float32), 'loss_cross_entropy': Array(0.174696, dtype=float32)}


  4%|▍         | 39079/1000000 [1:47:34<24:53:07, 10.73it/s]

{'loss': Array(0.19007352, dtype=float32), 'loss_cross_entropy': Array(0.1763073, dtype=float32)}


  4%|▍         | 39089/1000000 [1:47:36<31:34:53,  8.45it/s]

{'loss': Array(0.16599758, dtype=float32), 'loss_cross_entropy': Array(0.15353332, dtype=float32)}


  4%|▍         | 39099/1000000 [1:47:37<25:45:09, 10.36it/s]

{'loss': Array(0.16724788, dtype=float32), 'loss_cross_entropy': Array(0.15524247, dtype=float32)}


  4%|▍         | 39109/1000000 [1:47:39<35:11:19,  7.59it/s]

{'loss': Array(0.19692871, dtype=float32), 'loss_cross_entropy': Array(0.18211643, dtype=float32)}


  4%|▍         | 39119/1000000 [1:47:41<26:23:15, 10.12it/s]

{'loss': Array(0.1909886, dtype=float32), 'loss_cross_entropy': Array(0.17781651, dtype=float32)}


  4%|▍         | 39129/1000000 [1:47:42<24:42:20, 10.80it/s]

{'loss': Array(0.18836057, dtype=float32), 'loss_cross_entropy': Array(0.17526345, dtype=float32)}


  4%|▍         | 39139/1000000 [1:47:44<28:22:19,  9.41it/s]

{'loss': Array(0.1959135, dtype=float32), 'loss_cross_entropy': Array(0.18201135, dtype=float32)}


  4%|▍         | 39149/1000000 [1:47:45<25:04:31, 10.64it/s]

{'loss': Array(0.18326873, dtype=float32), 'loss_cross_entropy': Array(0.16975874, dtype=float32)}


  4%|▍         | 39159/1000000 [1:47:47<38:52:04,  6.87it/s]

{'loss': Array(0.19988525, dtype=float32), 'loss_cross_entropy': Array(0.18602417, dtype=float32)}


  4%|▍         | 39169/1000000 [1:47:48<26:50:30,  9.94it/s]

{'loss': Array(0.19297476, dtype=float32), 'loss_cross_entropy': Array(0.17802586, dtype=float32)}


  4%|▍         | 39179/1000000 [1:47:49<24:40:50, 10.81it/s]

{'loss': Array(0.19845648, dtype=float32), 'loss_cross_entropy': Array(0.18425524, dtype=float32)}


  4%|▍         | 39189/1000000 [1:47:51<32:21:36,  8.25it/s]

{'loss': Array(0.20963912, dtype=float32), 'loss_cross_entropy': Array(0.1938605, dtype=float32)}


  4%|▍         | 39199/1000000 [1:47:52<26:11:29, 10.19it/s]

{'loss': Array(0.21421646, dtype=float32), 'loss_cross_entropy': Array(0.1994319, dtype=float32)}


  4%|▍         | 39209/1000000 [1:47:54<35:14:56,  7.57it/s]

{'loss': Array(0.18907583, dtype=float32), 'loss_cross_entropy': Array(0.17552751, dtype=float32)}


  4%|▍         | 39219/1000000 [1:47:56<26:46:45,  9.97it/s]

{'loss': Array(0.19015898, dtype=float32), 'loss_cross_entropy': Array(0.17733802, dtype=float32)}


  4%|▍         | 39229/1000000 [1:47:57<35:37:45,  7.49it/s]

{'loss': Array(0.18682055, dtype=float32), 'loss_cross_entropy': Array(0.17312865, dtype=float32)}


  4%|▍         | 39239/1000000 [1:47:59<27:36:59,  9.66it/s]

{'loss': Array(0.18271133, dtype=float32), 'loss_cross_entropy': Array(0.16918473, dtype=float32)}


  4%|▍         | 39249/1000000 [1:48:00<25:53:58, 10.30it/s]

{'loss': Array(0.16827832, dtype=float32), 'loss_cross_entropy': Array(0.15518314, dtype=float32)}


  4%|▍         | 39259/1000000 [1:48:02<30:29:46,  8.75it/s]

{'loss': Array(0.18527701, dtype=float32), 'loss_cross_entropy': Array(0.17193262, dtype=float32)}


  4%|▍         | 39269/1000000 [1:48:03<26:24:29, 10.11it/s]

{'loss': Array(0.19951539, dtype=float32), 'loss_cross_entropy': Array(0.18520588, dtype=float32)}


  4%|▍         | 39279/1000000 [1:48:05<31:30:00,  8.47it/s]

{'loss': Array(0.19087668, dtype=float32), 'loss_cross_entropy': Array(0.17754875, dtype=float32)}


  4%|▍         | 39289/1000000 [1:48:06<26:01:11, 10.26it/s]

{'loss': Array(0.17876002, dtype=float32), 'loss_cross_entropy': Array(0.16571103, dtype=float32)}


  4%|▍         | 39299/1000000 [1:48:08<38:46:00,  6.88it/s]

{'loss': Array(0.20128882, dtype=float32), 'loss_cross_entropy': Array(0.18747622, dtype=float32)}


  4%|▍         | 39309/1000000 [1:48:09<27:06:24,  9.84it/s]

{'loss': Array(0.17779325, dtype=float32), 'loss_cross_entropy': Array(0.16361864, dtype=float32)}


  4%|▍         | 39319/1000000 [1:48:11<39:19:22,  6.79it/s]

{'loss': Array(0.1772096, dtype=float32), 'loss_cross_entropy': Array(0.16398756, dtype=float32)}


  4%|▍         | 39329/1000000 [1:48:12<27:09:00,  9.83it/s]

{'loss': Array(0.18963772, dtype=float32), 'loss_cross_entropy': Array(0.17651013, dtype=float32)}


  4%|▍         | 39339/1000000 [1:48:14<24:39:41, 10.82it/s]

{'loss': Array(0.18059205, dtype=float32), 'loss_cross_entropy': Array(0.16695826, dtype=float32)}


  4%|▍         | 39349/1000000 [1:48:15<34:29:37,  7.74it/s]

{'loss': Array(0.18692558, dtype=float32), 'loss_cross_entropy': Array(0.1728229, dtype=float32)}


  4%|▍         | 39359/1000000 [1:48:17<26:00:35, 10.26it/s]

{'loss': Array(0.19988517, dtype=float32), 'loss_cross_entropy': Array(0.18588421, dtype=float32)}


  4%|▍         | 39369/1000000 [1:48:18<38:47:03,  6.88it/s]

{'loss': Array(0.188898, dtype=float32), 'loss_cross_entropy': Array(0.17509528, dtype=float32)}


  4%|▍         | 39379/1000000 [1:48:20<26:40:27, 10.00it/s]

{'loss': Array(0.17619237, dtype=float32), 'loss_cross_entropy': Array(0.1642103, dtype=float32)}


  4%|▍         | 39389/1000000 [1:48:21<25:12:40, 10.58it/s]

{'loss': Array(0.18679872, dtype=float32), 'loss_cross_entropy': Array(0.17279495, dtype=float32)}


  4%|▍         | 39399/1000000 [1:48:23<28:06:53,  9.49it/s]

{'loss': Array(0.18689369, dtype=float32), 'loss_cross_entropy': Array(0.17325766, dtype=float32)}


  4%|▍         | 39409/1000000 [1:48:24<25:13:58, 10.57it/s]

{'loss': Array(0.18430524, dtype=float32), 'loss_cross_entropy': Array(0.1715173, dtype=float32)}


  4%|▍         | 39419/1000000 [1:48:26<28:06:33,  9.49it/s]

{'loss': Array(0.18061799, dtype=float32), 'loss_cross_entropy': Array(0.16748376, dtype=float32)}


  4%|▍         | 39429/1000000 [1:48:27<25:50:06, 10.33it/s]

{'loss': Array(0.17816252, dtype=float32), 'loss_cross_entropy': Array(0.16541675, dtype=float32)}


  4%|▍         | 39439/1000000 [1:48:29<39:01:33,  6.84it/s]

{'loss': Array(0.19133125, dtype=float32), 'loss_cross_entropy': Array(0.1770582, dtype=float32)}


  4%|▍         | 39449/1000000 [1:48:30<27:07:40,  9.84it/s]

{'loss': Array(0.18705986, dtype=float32), 'loss_cross_entropy': Array(0.17344445, dtype=float32)}


  4%|▍         | 39459/1000000 [1:48:32<24:46:30, 10.77it/s]

{'loss': Array(0.17742378, dtype=float32), 'loss_cross_entropy': Array(0.16487268, dtype=float32)}


  4%|▍         | 39469/1000000 [1:48:33<31:02:17,  8.60it/s]

{'loss': Array(0.19726197, dtype=float32), 'loss_cross_entropy': Array(0.18331908, dtype=float32)}


  4%|▍         | 39479/1000000 [1:48:35<25:30:08, 10.46it/s]

{'loss': Array(0.18595861, dtype=float32), 'loss_cross_entropy': Array(0.17309882, dtype=float32)}


  4%|▍         | 39489/1000000 [1:48:36<38:50:56,  6.87it/s]

{'loss': Array(0.18051825, dtype=float32), 'loss_cross_entropy': Array(0.16737066, dtype=float32)}


  4%|▍         | 39499/1000000 [1:48:38<27:38:30,  9.65it/s]

{'loss': Array(0.21309803, dtype=float32), 'loss_cross_entropy': Array(0.19833402, dtype=float32)}


  4%|▍         | 39509/1000000 [1:48:45<86:23:42,  3.09it/s] 

{'loss': Array(0.17836104, dtype=float32), 'loss_cross_entropy': Array(0.16552171, dtype=float32)}


  4%|▍         | 39519/1000000 [1:48:47<39:57:52,  6.68it/s]

{'loss': Array(0.17431197, dtype=float32), 'loss_cross_entropy': Array(0.16185345, dtype=float32)}


  4%|▍         | 39529/1000000 [1:48:48<27:29:36,  9.70it/s]

{'loss': Array(0.18019842, dtype=float32), 'loss_cross_entropy': Array(0.1668158, dtype=float32)}


  4%|▍         | 39539/1000000 [1:48:50<32:56:34,  8.10it/s]

{'loss': Array(0.18194507, dtype=float32), 'loss_cross_entropy': Array(0.16831549, dtype=float32)}


  4%|▍         | 39549/1000000 [1:48:52<26:21:14, 10.12it/s]

{'loss': Array(0.19461952, dtype=float32), 'loss_cross_entropy': Array(0.18067847, dtype=float32)}


  4%|▍         | 39559/1000000 [1:48:53<39:43:47,  6.72it/s]

{'loss': Array(0.2046148, dtype=float32), 'loss_cross_entropy': Array(0.18993214, dtype=float32)}


  4%|▍         | 39569/1000000 [1:48:55<27:16:42,  9.78it/s]

{'loss': Array(0.21148153, dtype=float32), 'loss_cross_entropy': Array(0.19657002, dtype=float32)}


  4%|▍         | 39579/1000000 [1:48:56<24:45:51, 10.77it/s]

{'loss': Array(0.17228377, dtype=float32), 'loss_cross_entropy': Array(0.15862344, dtype=float32)}


  4%|▍         | 39589/1000000 [1:48:58<28:09:07,  9.48it/s]

{'loss': Array(0.19194202, dtype=float32), 'loss_cross_entropy': Array(0.17855309, dtype=float32)}


  4%|▍         | 39599/1000000 [1:48:59<26:04:31, 10.23it/s]

{'loss': Array(0.16634734, dtype=float32), 'loss_cross_entropy': Array(0.15438846, dtype=float32)}


  4%|▍         | 39609/1000000 [1:49:01<40:11:56,  6.64it/s]

{'loss': Array(0.17842154, dtype=float32), 'loss_cross_entropy': Array(0.16545978, dtype=float32)}


  4%|▍         | 39619/1000000 [1:49:02<28:40:42,  9.30it/s]

{'loss': Array(0.19393705, dtype=float32), 'loss_cross_entropy': Array(0.1809155, dtype=float32)}


  4%|▍         | 39629/1000000 [1:49:04<25:19:32, 10.53it/s]

{'loss': Array(0.1754487, dtype=float32), 'loss_cross_entropy': Array(0.16298115, dtype=float32)}


  4%|▍         | 39639/1000000 [1:49:05<29:54:21,  8.92it/s]

{'loss': Array(0.18944989, dtype=float32), 'loss_cross_entropy': Array(0.1762306, dtype=float32)}


  4%|▍         | 39649/1000000 [1:49:07<26:06:49, 10.22it/s]

{'loss': Array(0.18850832, dtype=float32), 'loss_cross_entropy': Array(0.1757196, dtype=float32)}


  4%|▍         | 39659/1000000 [1:49:09<35:50:16,  7.44it/s]

{'loss': Array(0.19849332, dtype=float32), 'loss_cross_entropy': Array(0.18420745, dtype=float32)}


  4%|▍         | 39669/1000000 [1:49:10<27:18:53,  9.77it/s]

{'loss': Array(0.18015213, dtype=float32), 'loss_cross_entropy': Array(0.16733746, dtype=float32)}


  4%|▍         | 39679/1000000 [1:49:12<39:26:00,  6.76it/s]

{'loss': Array(0.19658177, dtype=float32), 'loss_cross_entropy': Array(0.18322156, dtype=float32)}


  4%|▍         | 39689/1000000 [1:49:13<27:18:26,  9.77it/s]

{'loss': Array(0.19142815, dtype=float32), 'loss_cross_entropy': Array(0.17778715, dtype=float32)}


  4%|▍         | 39699/1000000 [1:49:14<24:48:34, 10.75it/s]

{'loss': Array(0.1918465, dtype=float32), 'loss_cross_entropy': Array(0.1784003, dtype=float32)}


  4%|▍         | 39709/1000000 [1:49:16<31:21:14,  8.51it/s]

{'loss': Array(0.20894201, dtype=float32), 'loss_cross_entropy': Array(0.19487453, dtype=float32)}


  4%|▍         | 39719/1000000 [1:49:17<25:46:55, 10.35it/s]

{'loss': Array(0.1950721, dtype=float32), 'loss_cross_entropy': Array(0.18079647, dtype=float32)}


  4%|▍         | 39729/1000000 [1:49:19<34:50:11,  7.66it/s]

{'loss': Array(0.1847053, dtype=float32), 'loss_cross_entropy': Array(0.17223997, dtype=float32)}


  4%|▍         | 39739/1000000 [1:49:20<26:59:25,  9.88it/s]

{'loss': Array(0.18666601, dtype=float32), 'loss_cross_entropy': Array(0.17284414, dtype=float32)}


  4%|▍         | 39749/1000000 [1:49:22<25:23:17, 10.51it/s]

{'loss': Array(0.19006401, dtype=float32), 'loss_cross_entropy': Array(0.1766957, dtype=float32)}


  4%|▍         | 39759/1000000 [1:49:24<28:31:09,  9.35it/s]

{'loss': Array(0.16537704, dtype=float32), 'loss_cross_entropy': Array(0.15338188, dtype=float32)}


  4%|▍         | 39769/1000000 [1:49:25<25:19:40, 10.53it/s]

{'loss': Array(0.1911854, dtype=float32), 'loss_cross_entropy': Array(0.17737862, dtype=float32)}


  4%|▍         | 39779/1000000 [1:49:27<39:03:55,  6.83it/s]

{'loss': Array(0.18373577, dtype=float32), 'loss_cross_entropy': Array(0.17052461, dtype=float32)}


  4%|▍         | 39789/1000000 [1:49:28<27:41:25,  9.63it/s]

{'loss': Array(0.17793332, dtype=float32), 'loss_cross_entropy': Array(0.16493091, dtype=float32)}


  4%|▍         | 39799/1000000 [1:49:29<25:29:19, 10.46it/s]

{'loss': Array(0.18764904, dtype=float32), 'loss_cross_entropy': Array(0.17339121, dtype=float32)}


  4%|▍         | 39809/1000000 [1:49:31<32:02:15,  8.33it/s]

{'loss': Array(0.18657082, dtype=float32), 'loss_cross_entropy': Array(0.17274928, dtype=float32)}


  4%|▍         | 39819/1000000 [1:49:33<25:53:40, 10.30it/s]

{'loss': Array(0.17081451, dtype=float32), 'loss_cross_entropy': Array(0.15781909, dtype=float32)}


  4%|▍         | 39829/1000000 [1:49:34<35:24:58,  7.53it/s]

{'loss': Array(0.20458129, dtype=float32), 'loss_cross_entropy': Array(0.19025755, dtype=float32)}


  4%|▍         | 39839/1000000 [1:49:36<26:39:36, 10.00it/s]

{'loss': Array(0.18932776, dtype=float32), 'loss_cross_entropy': Array(0.1753444, dtype=float32)}


  4%|▍         | 39849/1000000 [1:49:37<35:22:34,  7.54it/s]

{'loss': Array(0.17845498, dtype=float32), 'loss_cross_entropy': Array(0.16556449, dtype=float32)}


  4%|▍         | 39859/1000000 [1:49:39<27:06:04,  9.84it/s]

{'loss': Array(0.1953064, dtype=float32), 'loss_cross_entropy': Array(0.18209794, dtype=float32)}


  4%|▍         | 39869/1000000 [1:49:40<24:57:20, 10.69it/s]

{'loss': Array(0.1956646, dtype=float32), 'loss_cross_entropy': Array(0.18144892, dtype=float32)}


  4%|▍         | 39879/1000000 [1:49:42<29:37:44,  9.00it/s]

{'loss': Array(0.1893272, dtype=float32), 'loss_cross_entropy': Array(0.17661284, dtype=float32)}


  4%|▍         | 39889/1000000 [1:49:43<26:00:06, 10.26it/s]

{'loss': Array(0.16449715, dtype=float32), 'loss_cross_entropy': Array(0.1526518, dtype=float32)}


  4%|▍         | 39899/1000000 [1:49:45<32:55:53,  8.10it/s]

{'loss': Array(0.20165674, dtype=float32), 'loss_cross_entropy': Array(0.18704389, dtype=float32)}


  4%|▍         | 39909/1000000 [1:49:46<26:33:08, 10.04it/s]

{'loss': Array(0.17704044, dtype=float32), 'loss_cross_entropy': Array(0.1645763, dtype=float32)}


  4%|▍         | 39919/1000000 [1:49:48<40:09:14,  6.64it/s]

{'loss': Array(0.19600101, dtype=float32), 'loss_cross_entropy': Array(0.18161295, dtype=float32)}


  4%|▍         | 39929/1000000 [1:49:50<27:20:15,  9.76it/s]

{'loss': Array(0.20811431, dtype=float32), 'loss_cross_entropy': Array(0.19319029, dtype=float32)}


  4%|▍         | 39939/1000000 [1:49:51<39:56:58,  6.68it/s]

{'loss': Array(0.20066492, dtype=float32), 'loss_cross_entropy': Array(0.1871491, dtype=float32)}


  4%|▍         | 39949/1000000 [1:49:53<27:36:20,  9.66it/s]

{'loss': Array(0.19809453, dtype=float32), 'loss_cross_entropy': Array(0.182981, dtype=float32)}


  4%|▍         | 39959/1000000 [1:49:54<24:40:37, 10.81it/s]

{'loss': Array(0.16892003, dtype=float32), 'loss_cross_entropy': Array(0.1563168, dtype=float32)}


  4%|▍         | 39969/1000000 [1:49:56<34:24:10,  7.75it/s]

{'loss': Array(0.18170796, dtype=float32), 'loss_cross_entropy': Array(0.16873418, dtype=float32)}


  4%|▍         | 39979/1000000 [1:49:57<26:10:54, 10.19it/s]

{'loss': Array(0.18143845, dtype=float32), 'loss_cross_entropy': Array(0.16926348, dtype=float32)}


  4%|▍         | 39989/1000000 [1:49:59<40:06:20,  6.65it/s]

{'loss': Array(0.18950723, dtype=float32), 'loss_cross_entropy': Array(0.17599379, dtype=float32)}


  4%|▍         | 39999/1000000 [1:50:00<28:30:23,  9.35it/s]

{'loss': Array(0.18315068, dtype=float32), 'loss_cross_entropy': Array(0.17095219, dtype=float32)}


  4%|▍         | 40009/1000000 [1:50:08<89:27:42,  2.98it/s] 

{'loss': Array(0.18977545, dtype=float32), 'loss_cross_entropy': Array(0.17615314, dtype=float32)}


  4%|▍         | 40019/1000000 [1:50:10<38:56:26,  6.85it/s] 

{'loss': Array(0.19478281, dtype=float32), 'loss_cross_entropy': Array(0.18112825, dtype=float32)}


  4%|▍         | 40029/1000000 [1:50:11<26:41:24,  9.99it/s]

{'loss': Array(0.18227616, dtype=float32), 'loss_cross_entropy': Array(0.16854718, dtype=float32)}


  4%|▍         | 40039/1000000 [1:50:13<28:55:28,  9.22it/s]

{'loss': Array(0.20258148, dtype=float32), 'loss_cross_entropy': Array(0.18904842, dtype=float32)}


  4%|▍         | 40049/1000000 [1:50:14<25:24:43, 10.49it/s]

{'loss': Array(0.19940336, dtype=float32), 'loss_cross_entropy': Array(0.18582484, dtype=float32)}


  4%|▍         | 40059/1000000 [1:50:16<39:03:57,  6.83it/s]

{'loss': Array(0.18724065, dtype=float32), 'loss_cross_entropy': Array(0.17433898, dtype=float32)}


  4%|▍         | 40069/1000000 [1:50:17<26:57:16,  9.89it/s]

{'loss': Array(0.18117395, dtype=float32), 'loss_cross_entropy': Array(0.16843817, dtype=float32)}


  4%|▍         | 40079/1000000 [1:50:19<25:13:43, 10.57it/s]

{'loss': Array(0.18983689, dtype=float32), 'loss_cross_entropy': Array(0.17648886, dtype=float32)}


  4%|▍         | 40089/1000000 [1:50:21<31:32:29,  8.45it/s]

{'loss': Array(0.19423802, dtype=float32), 'loss_cross_entropy': Array(0.18032256, dtype=float32)}


  4%|▍         | 40099/1000000 [1:50:22<25:59:07, 10.26it/s]

{'loss': Array(0.18509956, dtype=float32), 'loss_cross_entropy': Array(0.17090835, dtype=float32)}


  4%|▍         | 40109/1000000 [1:50:24<39:00:28,  6.84it/s]

{'loss': Array(0.19438136, dtype=float32), 'loss_cross_entropy': Array(0.18114138, dtype=float32)}


  4%|▍         | 40119/1000000 [1:50:25<26:49:48,  9.94it/s]

{'loss': Array(0.17797028, dtype=float32), 'loss_cross_entropy': Array(0.16526344, dtype=float32)}


  4%|▍         | 40129/1000000 [1:50:26<25:08:57, 10.60it/s]

{'loss': Array(0.19752856, dtype=float32), 'loss_cross_entropy': Array(0.18343846, dtype=float32)}


  4%|▍         | 40139/1000000 [1:50:28<29:18:56,  9.10it/s]

{'loss': Array(0.19516203, dtype=float32), 'loss_cross_entropy': Array(0.18060122, dtype=float32)}


  4%|▍         | 40149/1000000 [1:50:29<26:15:37, 10.15it/s]

{'loss': Array(0.1707299, dtype=float32), 'loss_cross_entropy': Array(0.15882437, dtype=float32)}


  4%|▍         | 40159/1000000 [1:50:31<35:01:39,  7.61it/s]

{'loss': Array(0.19261162, dtype=float32), 'loss_cross_entropy': Array(0.17975897, dtype=float32)}


  4%|▍         | 40169/1000000 [1:50:32<26:52:05,  9.92it/s]

{'loss': Array(0.17211856, dtype=float32), 'loss_cross_entropy': Array(0.1599434, dtype=float32)}


  4%|▍         | 40179/1000000 [1:50:34<24:55:02, 10.70it/s]

{'loss': Array(0.17096335, dtype=float32), 'loss_cross_entropy': Array(0.15845938, dtype=float32)}


  4%|▍         | 40189/1000000 [1:50:35<31:23:03,  8.50it/s]

{'loss': Array(0.18829973, dtype=float32), 'loss_cross_entropy': Array(0.17503287, dtype=float32)}


  4%|▍         | 40199/1000000 [1:50:37<25:37:16, 10.41it/s]

{'loss': Array(0.20227806, dtype=float32), 'loss_cross_entropy': Array(0.18756364, dtype=float32)}


  4%|▍         | 40209/1000000 [1:50:38<24:31:44, 10.87it/s]

{'loss': Array(0.1836436, dtype=float32), 'loss_cross_entropy': Array(0.16972795, dtype=float32)}


  4%|▍         | 40219/1000000 [1:50:40<28:06:33,  9.48it/s]

{'loss': Array(0.1863638, dtype=float32), 'loss_cross_entropy': Array(0.17299353, dtype=float32)}


  4%|▍         | 40229/1000000 [1:50:41<25:18:53, 10.53it/s]

{'loss': Array(0.20605719, dtype=float32), 'loss_cross_entropy': Array(0.19248186, dtype=float32)}


  4%|▍         | 40239/1000000 [1:50:43<31:31:16,  8.46it/s]

{'loss': Array(0.19626777, dtype=float32), 'loss_cross_entropy': Array(0.18154056, dtype=float32)}


  4%|▍         | 40249/1000000 [1:50:44<25:22:53, 10.50it/s]

{'loss': Array(0.19177443, dtype=float32), 'loss_cross_entropy': Array(0.1788085, dtype=float32)}


  4%|▍         | 40259/1000000 [1:50:46<38:33:44,  6.91it/s]

{'loss': Array(0.188501, dtype=float32), 'loss_cross_entropy': Array(0.17443746, dtype=float32)}


  4%|▍         | 40269/1000000 [1:50:47<27:01:47,  9.86it/s]

{'loss': Array(0.19964984, dtype=float32), 'loss_cross_entropy': Array(0.18516494, dtype=float32)}


  4%|▍         | 40279/1000000 [1:50:49<39:42:55,  6.71it/s]

{'loss': Array(0.16921589, dtype=float32), 'loss_cross_entropy': Array(0.15651445, dtype=float32)}


  4%|▍         | 40289/1000000 [1:50:50<27:22:53,  9.74it/s]

{'loss': Array(0.19711976, dtype=float32), 'loss_cross_entropy': Array(0.18377936, dtype=float32)}


  4%|▍         | 40299/1000000 [1:50:52<25:26:35, 10.48it/s]

{'loss': Array(0.19357716, dtype=float32), 'loss_cross_entropy': Array(0.18033932, dtype=float32)}


  4%|▍         | 40309/1000000 [1:50:53<35:08:51,  7.58it/s]

{'loss': Array(0.18393147, dtype=float32), 'loss_cross_entropy': Array(0.17101626, dtype=float32)}


  4%|▍         | 40319/1000000 [1:50:55<26:04:10, 10.23it/s]

{'loss': Array(0.19057687, dtype=float32), 'loss_cross_entropy': Array(0.17721152, dtype=float32)}


  4%|▍         | 40329/1000000 [1:50:56<38:53:05,  6.86it/s]

{'loss': Array(0.17899188, dtype=float32), 'loss_cross_entropy': Array(0.16590829, dtype=float32)}


  4%|▍         | 40339/1000000 [1:50:58<27:29:30,  9.70it/s]

{'loss': Array(0.18193905, dtype=float32), 'loss_cross_entropy': Array(0.16829148, dtype=float32)}


  4%|▍         | 40349/1000000 [1:50:59<25:38:29, 10.40it/s]

{'loss': Array(0.1879035, dtype=float32), 'loss_cross_entropy': Array(0.17474262, dtype=float32)}


  4%|▍         | 40359/1000000 [1:51:01<29:51:33,  8.93it/s]

{'loss': Array(0.18318686, dtype=float32), 'loss_cross_entropy': Array(0.17008604, dtype=float32)}


  4%|▍         | 40369/1000000 [1:51:02<26:05:31, 10.22it/s]

{'loss': Array(0.18129943, dtype=float32), 'loss_cross_entropy': Array(0.1680362, dtype=float32)}


  4%|▍         | 40379/1000000 [1:51:04<28:46:36,  9.26it/s]

{'loss': Array(0.19345087, dtype=float32), 'loss_cross_entropy': Array(0.17936757, dtype=float32)}


  4%|▍         | 40389/1000000 [1:51:06<25:52:29, 10.30it/s]

{'loss': Array(0.18319559, dtype=float32), 'loss_cross_entropy': Array(0.16918808, dtype=float32)}


  4%|▍         | 40399/1000000 [1:51:07<38:55:01,  6.85it/s]

{'loss': Array(0.18582246, dtype=float32), 'loss_cross_entropy': Array(0.17189007, dtype=float32)}


  4%|▍         | 40409/1000000 [1:51:09<27:10:04,  9.81it/s]

{'loss': Array(0.16738005, dtype=float32), 'loss_cross_entropy': Array(0.15515062, dtype=float32)}


  4%|▍         | 40419/1000000 [1:51:10<24:45:47, 10.76it/s]

{'loss': Array(0.19334285, dtype=float32), 'loss_cross_entropy': Array(0.17931147, dtype=float32)}


  4%|▍         | 40429/1000000 [1:51:12<31:36:35,  8.43it/s]

{'loss': Array(0.19126415, dtype=float32), 'loss_cross_entropy': Array(0.17801178, dtype=float32)}


  4%|▍         | 40439/1000000 [1:51:13<25:44:00, 10.36it/s]

{'loss': Array(0.18104036, dtype=float32), 'loss_cross_entropy': Array(0.16901709, dtype=float32)}


  4%|▍         | 40449/1000000 [1:51:15<39:08:01,  6.81it/s]

{'loss': Array(0.18733673, dtype=float32), 'loss_cross_entropy': Array(0.17450015, dtype=float32)}


  4%|▍         | 40459/1000000 [1:51:16<26:59:23,  9.88it/s]

{'loss': Array(0.18312882, dtype=float32), 'loss_cross_entropy': Array(0.17029585, dtype=float32)}


  4%|▍         | 40469/1000000 [1:51:17<25:06:27, 10.62it/s]

{'loss': Array(0.20633855, dtype=float32), 'loss_cross_entropy': Array(0.19183184, dtype=float32)}


  4%|▍         | 40479/1000000 [1:51:19<29:00:22,  9.19it/s]

{'loss': Array(0.1764361, dtype=float32), 'loss_cross_entropy': Array(0.16452476, dtype=float32)}


  4%|▍         | 40489/1000000 [1:51:20<25:02:08, 10.65it/s]

{'loss': Array(0.18506144, dtype=float32), 'loss_cross_entropy': Array(0.17267838, dtype=float32)}


  4%|▍         | 40499/1000000 [1:51:22<34:34:24,  7.71it/s]

{'loss': Array(0.17467825, dtype=float32), 'loss_cross_entropy': Array(0.1616045, dtype=float32)}


  4%|▍         | 40509/1000000 [1:51:30<87:54:30,  3.03it/s] 

{'loss': Array(0.19379814, dtype=float32), 'loss_cross_entropy': Array(0.1807356, dtype=float32)}


  4%|▍         | 40519/1000000 [1:51:32<45:25:26,  5.87it/s]

{'loss': Array(0.18873043, dtype=float32), 'loss_cross_entropy': Array(0.1747289, dtype=float32)}


  4%|▍         | 40529/1000000 [1:51:33<28:05:36,  9.49it/s]

{'loss': Array(0.17055212, dtype=float32), 'loss_cross_entropy': Array(0.15801048, dtype=float32)}


  4%|▍         | 40539/1000000 [1:51:34<25:54:29, 10.29it/s]

{'loss': Array(0.1806084, dtype=float32), 'loss_cross_entropy': Array(0.16772656, dtype=float32)}


  4%|▍         | 40549/1000000 [1:51:36<31:49:35,  8.37it/s]

{'loss': Array(0.18377674, dtype=float32), 'loss_cross_entropy': Array(0.17031829, dtype=float32)}


  4%|▍         | 40559/1000000 [1:51:38<27:03:03,  9.85it/s]

{'loss': Array(0.17759632, dtype=float32), 'loss_cross_entropy': Array(0.16460083, dtype=float32)}


  4%|▍         | 40569/1000000 [1:51:39<25:12:07, 10.57it/s]

{'loss': Array(0.20503123, dtype=float32), 'loss_cross_entropy': Array(0.19063914, dtype=float32)}


  4%|▍         | 40579/1000000 [1:51:41<28:13:17,  9.44it/s]

{'loss': Array(0.17884584, dtype=float32), 'loss_cross_entropy': Array(0.16639839, dtype=float32)}


  4%|▍         | 40589/1000000 [1:51:42<25:28:15, 10.46it/s]

{'loss': Array(0.18237534, dtype=float32), 'loss_cross_entropy': Array(0.16950452, dtype=float32)}


  4%|▍         | 40599/1000000 [1:51:44<31:31:18,  8.45it/s]

{'loss': Array(0.17989428, dtype=float32), 'loss_cross_entropy': Array(0.16623206, dtype=float32)}


  4%|▍         | 40609/1000000 [1:51:45<25:20:07, 10.52it/s]

{'loss': Array(0.18026799, dtype=float32), 'loss_cross_entropy': Array(0.16802089, dtype=float32)}


  4%|▍         | 40619/1000000 [1:51:47<34:39:38,  7.69it/s]

{'loss': Array(0.19143763, dtype=float32), 'loss_cross_entropy': Array(0.1788999, dtype=float32)}


  4%|▍         | 40629/1000000 [1:51:48<26:06:21, 10.21it/s]

{'loss': Array(0.17372657, dtype=float32), 'loss_cross_entropy': Array(0.16085759, dtype=float32)}


  4%|▍         | 40639/1000000 [1:51:49<25:17:51, 10.53it/s]

{'loss': Array(0.1762831, dtype=float32), 'loss_cross_entropy': Array(0.16441348, dtype=float32)}


  4%|▍         | 40649/1000000 [1:51:51<31:36:45,  8.43it/s]

{'loss': Array(0.17117201, dtype=float32), 'loss_cross_entropy': Array(0.15844528, dtype=float32)}


  4%|▍         | 40659/1000000 [1:51:53<26:02:52, 10.23it/s]

{'loss': Array(0.18328615, dtype=float32), 'loss_cross_entropy': Array(0.17080806, dtype=float32)}


  4%|▍         | 40669/1000000 [1:51:54<25:37:22, 10.40it/s]

{'loss': Array(0.18838234, dtype=float32), 'loss_cross_entropy': Array(0.17430271, dtype=float32)}


  4%|▍         | 40679/1000000 [1:51:56<28:30:53,  9.35it/s]

{'loss': Array(0.18297434, dtype=float32), 'loss_cross_entropy': Array(0.16980243, dtype=float32)}


  4%|▍         | 40689/1000000 [1:51:57<25:24:27, 10.49it/s]

{'loss': Array(0.17464781, dtype=float32), 'loss_cross_entropy': Array(0.16163576, dtype=float32)}


  4%|▍         | 40699/1000000 [1:51:59<33:01:16,  8.07it/s]

{'loss': Array(0.19207804, dtype=float32), 'loss_cross_entropy': Array(0.17891882, dtype=float32)}


  4%|▍         | 40709/1000000 [1:52:00<26:41:50,  9.98it/s]

{'loss': Array(0.19495878, dtype=float32), 'loss_cross_entropy': Array(0.18029395, dtype=float32)}


  4%|▍         | 40719/1000000 [1:52:02<40:23:37,  6.60it/s]

{'loss': Array(0.17634852, dtype=float32), 'loss_cross_entropy': Array(0.16463117, dtype=float32)}


  4%|▍         | 40729/1000000 [1:52:03<28:19:05,  9.41it/s]

{'loss': Array(0.21579638, dtype=float32), 'loss_cross_entropy': Array(0.20141993, dtype=float32)}


  4%|▍         | 40739/1000000 [1:52:05<26:02:37, 10.23it/s]

{'loss': Array(0.18889232, dtype=float32), 'loss_cross_entropy': Array(0.17585497, dtype=float32)}


  4%|▍         | 40749/1000000 [1:52:06<34:55:21,  7.63it/s]

{'loss': Array(0.19479486, dtype=float32), 'loss_cross_entropy': Array(0.18199605, dtype=float32)}


  4%|▍         | 40759/1000000 [1:52:08<26:09:28, 10.19it/s]

{'loss': Array(0.17189609, dtype=float32), 'loss_cross_entropy': Array(0.15887846, dtype=float32)}


  4%|▍         | 40769/1000000 [1:52:09<25:15:28, 10.55it/s]

{'loss': Array(0.16888683, dtype=float32), 'loss_cross_entropy': Array(0.15757672, dtype=float32)}


  4%|▍         | 40779/1000000 [1:52:11<29:24:08,  9.06it/s]

{'loss': Array(0.16133796, dtype=float32), 'loss_cross_entropy': Array(0.15005845, dtype=float32)}


  4%|▍         | 40789/1000000 [1:52:12<25:41:05, 10.37it/s]

{'loss': Array(0.18380131, dtype=float32), 'loss_cross_entropy': Array(0.17073396, dtype=float32)}


  4%|▍         | 40799/1000000 [1:52:14<32:08:13,  8.29it/s]

{'loss': Array(0.18686715, dtype=float32), 'loss_cross_entropy': Array(0.1745469, dtype=float32)}


  4%|▍         | 40809/1000000 [1:52:15<26:35:50, 10.02it/s]

{'loss': Array(0.19324887, dtype=float32), 'loss_cross_entropy': Array(0.18051822, dtype=float32)}


  4%|▍         | 40819/1000000 [1:52:17<24:40:55, 10.79it/s]

{'loss': Array(0.17267625, dtype=float32), 'loss_cross_entropy': Array(0.16006964, dtype=float32)}


  4%|▍         | 40829/1000000 [1:52:18<28:43:48,  9.27it/s]

{'loss': Array(0.18657903, dtype=float32), 'loss_cross_entropy': Array(0.17310145, dtype=float32)}


  4%|▍         | 40839/1000000 [1:52:20<25:09:00, 10.59it/s]

{'loss': Array(0.15997405, dtype=float32), 'loss_cross_entropy': Array(0.14836648, dtype=float32)}


  4%|▍         | 40849/1000000 [1:52:22<34:55:32,  7.63it/s]

{'loss': Array(0.1772526, dtype=float32), 'loss_cross_entropy': Array(0.16427505, dtype=float32)}


  4%|▍         | 40859/1000000 [1:52:23<26:13:54, 10.16it/s]

{'loss': Array(0.17765227, dtype=float32), 'loss_cross_entropy': Array(0.16485588, dtype=float32)}


  4%|▍         | 40869/1000000 [1:52:24<25:17:44, 10.53it/s]

{'loss': Array(0.20343292, dtype=float32), 'loss_cross_entropy': Array(0.19005504, dtype=float32)}


  4%|▍         | 40879/1000000 [1:52:26<28:07:01,  9.48it/s]

{'loss': Array(0.18661068, dtype=float32), 'loss_cross_entropy': Array(0.17391524, dtype=float32)}


  4%|▍         | 40889/1000000 [1:52:27<25:09:27, 10.59it/s]

{'loss': Array(0.18876444, dtype=float32), 'loss_cross_entropy': Array(0.17539568, dtype=float32)}


  4%|▍         | 40899/1000000 [1:52:29<29:42:52,  8.97it/s]

{'loss': Array(0.17936097, dtype=float32), 'loss_cross_entropy': Array(0.16720675, dtype=float32)}


  4%|▍         | 40909/1000000 [1:52:30<25:43:18, 10.36it/s]

{'loss': Array(0.19429918, dtype=float32), 'loss_cross_entropy': Array(0.18099152, dtype=float32)}


  4%|▍         | 40919/1000000 [1:52:32<39:11:39,  6.80it/s]

{'loss': Array(0.19988182, dtype=float32), 'loss_cross_entropy': Array(0.18661971, dtype=float32)}


  4%|▍         | 40929/1000000 [1:52:34<27:56:25,  9.53it/s]

{'loss': Array(0.2192192, dtype=float32), 'loss_cross_entropy': Array(0.2051399, dtype=float32)}


  4%|▍         | 40939/1000000 [1:52:35<25:19:40, 10.52it/s]

{'loss': Array(0.18147811, dtype=float32), 'loss_cross_entropy': Array(0.16818698, dtype=float32)}


  4%|▍         | 40949/1000000 [1:52:37<32:07:07,  8.29it/s]

{'loss': Array(0.17331062, dtype=float32), 'loss_cross_entropy': Array(0.16200623, dtype=float32)}


  4%|▍         | 40959/1000000 [1:52:38<25:55:31, 10.28it/s]

{'loss': Array(0.1980429, dtype=float32), 'loss_cross_entropy': Array(0.1838972, dtype=float32)}


  4%|▍         | 40969/1000000 [1:52:40<34:39:00,  7.69it/s]

{'loss': Array(0.17290176, dtype=float32), 'loss_cross_entropy': Array(0.15999292, dtype=float32)}


  4%|▍         | 40979/1000000 [1:52:41<26:05:42, 10.21it/s]

{'loss': Array(0.1833864, dtype=float32), 'loss_cross_entropy': Array(0.17001633, dtype=float32)}


  4%|▍         | 40989/1000000 [1:52:43<35:29:39,  7.51it/s]

{'loss': Array(0.17722572, dtype=float32), 'loss_cross_entropy': Array(0.16408734, dtype=float32)}


  4%|▍         | 40999/1000000 [1:52:44<26:37:56, 10.00it/s]

{'loss': Array(0.15684152, dtype=float32), 'loss_cross_entropy': Array(0.14523475, dtype=float32)}


  4%|▍         | 41009/1000000 [1:52:52<84:51:49,  3.14it/s] 

{'loss': Array(0.19437149, dtype=float32), 'loss_cross_entropy': Array(0.18059379, dtype=float32)}


  4%|▍         | 41019/1000000 [1:52:53<39:39:44,  6.72it/s]

{'loss': Array(0.19228928, dtype=float32), 'loss_cross_entropy': Array(0.17873947, dtype=float32)}


  4%|▍         | 41029/1000000 [1:52:55<26:53:50,  9.90it/s]

{'loss': Array(0.18745905, dtype=float32), 'loss_cross_entropy': Array(0.17434049, dtype=float32)}


  4%|▍         | 41039/1000000 [1:52:56<34:58:56,  7.61it/s]

{'loss': Array(0.17513222, dtype=float32), 'loss_cross_entropy': Array(0.16315256, dtype=float32)}


  4%|▍         | 41049/1000000 [1:52:58<26:30:58, 10.05it/s]

{'loss': Array(0.17844628, dtype=float32), 'loss_cross_entropy': Array(0.16586865, dtype=float32)}


  4%|▍         | 41059/1000000 [1:53:00<41:02:43,  6.49it/s]

{'loss': Array(0.17888926, dtype=float32), 'loss_cross_entropy': Array(0.16670398, dtype=float32)}


  4%|▍         | 41069/1000000 [1:53:01<28:25:12,  9.37it/s]

{'loss': Array(0.18476115, dtype=float32), 'loss_cross_entropy': Array(0.17134999, dtype=float32)}


  4%|▍         | 41079/1000000 [1:53:02<26:27:21, 10.07it/s]

{'loss': Array(0.17549151, dtype=float32), 'loss_cross_entropy': Array(0.16276442, dtype=float32)}


  4%|▍         | 41089/1000000 [1:53:04<31:21:54,  8.49it/s]

{'loss': Array(0.18392988, dtype=float32), 'loss_cross_entropy': Array(0.17030716, dtype=float32)}


  4%|▍         | 41099/1000000 [1:53:05<25:41:06, 10.37it/s]

{'loss': Array(0.20348601, dtype=float32), 'loss_cross_entropy': Array(0.18926063, dtype=float32)}


  4%|▍         | 41109/1000000 [1:53:07<34:29:11,  7.72it/s]

{'loss': Array(0.19134636, dtype=float32), 'loss_cross_entropy': Array(0.17722213, dtype=float32)}


  4%|▍         | 41119/1000000 [1:53:09<26:16:10, 10.14it/s]

{'loss': Array(0.17201664, dtype=float32), 'loss_cross_entropy': Array(0.1601436, dtype=float32)}


  4%|▍         | 41129/1000000 [1:53:10<25:38:38, 10.39it/s]

{'loss': Array(0.1959285, dtype=float32), 'loss_cross_entropy': Array(0.18151139, dtype=float32)}


  4%|▍         | 41139/1000000 [1:53:12<28:23:40,  9.38it/s]

{'loss': Array(0.17703561, dtype=float32), 'loss_cross_entropy': Array(0.16431098, dtype=float32)}


  4%|▍         | 41149/1000000 [1:53:13<25:09:45, 10.59it/s]

{'loss': Array(0.15089351, dtype=float32), 'loss_cross_entropy': Array(0.1395484, dtype=float32)}


  4%|▍         | 41159/1000000 [1:53:15<39:07:18,  6.81it/s]

{'loss': Array(0.17697322, dtype=float32), 'loss_cross_entropy': Array(0.16389307, dtype=float32)}


  4%|▍         | 41169/1000000 [1:53:16<26:50:03,  9.93it/s]

{'loss': Array(0.17478949, dtype=float32), 'loss_cross_entropy': Array(0.16230823, dtype=float32)}


  4%|▍         | 41179/1000000 [1:53:17<25:02:36, 10.64it/s]

{'loss': Array(0.19995527, dtype=float32), 'loss_cross_entropy': Array(0.18642317, dtype=float32)}


  4%|▍         | 41189/1000000 [1:53:19<31:35:58,  8.43it/s]

{'loss': Array(0.20440558, dtype=float32), 'loss_cross_entropy': Array(0.18999194, dtype=float32)}


  4%|▍         | 41199/1000000 [1:53:20<26:19:32, 10.12it/s]

{'loss': Array(0.19186807, dtype=float32), 'loss_cross_entropy': Array(0.17786823, dtype=float32)}


  4%|▍         | 41209/1000000 [1:53:22<35:20:47,  7.53it/s]

{'loss': Array(0.18414515, dtype=float32), 'loss_cross_entropy': Array(0.17147069, dtype=float32)}


  4%|▍         | 41219/1000000 [1:53:24<26:30:04, 10.05it/s]

{'loss': Array(0.18462045, dtype=float32), 'loss_cross_entropy': Array(0.17158438, dtype=float32)}


  4%|▍         | 41229/1000000 [1:53:25<37:22:27,  7.13it/s]

{'loss': Array(0.18622847, dtype=float32), 'loss_cross_entropy': Array(0.17244972, dtype=float32)}


  4%|▍         | 41239/1000000 [1:53:27<26:34:33, 10.02it/s]

{'loss': Array(0.16358216, dtype=float32), 'loss_cross_entropy': Array(0.15131412, dtype=float32)}


  4%|▍         | 41249/1000000 [1:53:28<25:38:04, 10.39it/s]

{'loss': Array(0.17370696, dtype=float32), 'loss_cross_entropy': Array(0.1611877, dtype=float32)}


  4%|▍         | 41259/1000000 [1:53:30<29:38:34,  8.98it/s]

{'loss': Array(0.2037404, dtype=float32), 'loss_cross_entropy': Array(0.18970972, dtype=float32)}


  4%|▍         | 41269/1000000 [1:53:31<25:06:36, 10.61it/s]

{'loss': Array(0.16868556, dtype=float32), 'loss_cross_entropy': Array(0.15619165, dtype=float32)}


  4%|▍         | 41279/1000000 [1:53:33<31:24:56,  8.48it/s]

{'loss': Array(0.18743281, dtype=float32), 'loss_cross_entropy': Array(0.174045, dtype=float32)}


  4%|▍         | 41289/1000000 [1:53:34<25:17:47, 10.53it/s]

{'loss': Array(0.19669174, dtype=float32), 'loss_cross_entropy': Array(0.18336168, dtype=float32)}


  4%|▍         | 41299/1000000 [1:53:36<39:16:46,  6.78it/s]

{'loss': Array(0.19054829, dtype=float32), 'loss_cross_entropy': Array(0.17678212, dtype=float32)}


  4%|▍         | 41309/1000000 [1:53:37<26:54:46,  9.89it/s]

{'loss': Array(0.1805625, dtype=float32), 'loss_cross_entropy': Array(0.16761135, dtype=float32)}


  4%|▍         | 41319/1000000 [1:53:39<39:59:06,  6.66it/s]

{'loss': Array(0.18113343, dtype=float32), 'loss_cross_entropy': Array(0.16810122, dtype=float32)}


  4%|▍         | 41329/1000000 [1:53:40<27:10:45,  9.80it/s]

{'loss': Array(0.19864705, dtype=float32), 'loss_cross_entropy': Array(0.18427956, dtype=float32)}


  4%|▍         | 41339/1000000 [1:53:42<25:17:10, 10.53it/s]

{'loss': Array(0.1894078, dtype=float32), 'loss_cross_entropy': Array(0.17633699, dtype=float32)}


  4%|▍         | 41349/1000000 [1:53:43<34:50:01,  7.64it/s]

{'loss': Array(0.17911443, dtype=float32), 'loss_cross_entropy': Array(0.16676408, dtype=float32)}


  4%|▍         | 41359/1000000 [1:53:45<26:16:23, 10.14it/s]

{'loss': Array(0.15364233, dtype=float32), 'loss_cross_entropy': Array(0.14191307, dtype=float32)}


  4%|▍         | 41369/1000000 [1:53:46<38:46:04,  6.87it/s]

{'loss': Array(0.16077152, dtype=float32), 'loss_cross_entropy': Array(0.1490621, dtype=float32)}


  4%|▍         | 41379/1000000 [1:53:48<26:39:09,  9.99it/s]

{'loss': Array(0.19358745, dtype=float32), 'loss_cross_entropy': Array(0.17922866, dtype=float32)}


  4%|▍         | 41389/1000000 [1:53:49<25:25:58, 10.47it/s]

{'loss': Array(0.19551723, dtype=float32), 'loss_cross_entropy': Array(0.18113096, dtype=float32)}


  4%|▍         | 41399/1000000 [1:53:51<28:29:07,  9.35it/s]

{'loss': Array(0.18747392, dtype=float32), 'loss_cross_entropy': Array(0.17456363, dtype=float32)}


  4%|▍         | 41409/1000000 [1:53:52<25:16:16, 10.54it/s]

{'loss': Array(0.18793608, dtype=float32), 'loss_cross_entropy': Array(0.17498545, dtype=float32)}


  4%|▍         | 41419/1000000 [1:53:54<28:17:54,  9.41it/s]

{'loss': Array(0.18722177, dtype=float32), 'loss_cross_entropy': Array(0.17363416, dtype=float32)}


  4%|▍         | 41429/1000000 [1:53:55<25:07:03, 10.60it/s]

{'loss': Array(0.18317105, dtype=float32), 'loss_cross_entropy': Array(0.16986685, dtype=float32)}


  4%|▍         | 41439/1000000 [1:53:57<39:21:16,  6.77it/s]

{'loss': Array(0.19120981, dtype=float32), 'loss_cross_entropy': Array(0.17760043, dtype=float32)}


  4%|▍         | 41449/1000000 [1:53:58<27:16:46,  9.76it/s]

{'loss': Array(0.1850226, dtype=float32), 'loss_cross_entropy': Array(0.17255233, dtype=float32)}


  4%|▍         | 41459/1000000 [1:54:00<25:36:09, 10.40it/s]

{'loss': Array(0.18412831, dtype=float32), 'loss_cross_entropy': Array(0.1707899, dtype=float32)}


  4%|▍         | 41469/1000000 [1:54:02<33:43:16,  7.90it/s]

{'loss': Array(0.18530229, dtype=float32), 'loss_cross_entropy': Array(0.17198776, dtype=float32)}


  4%|▍         | 41479/1000000 [1:54:03<26:59:02,  9.87it/s]

{'loss': Array(0.17657205, dtype=float32), 'loss_cross_entropy': Array(0.16402347, dtype=float32)}


  4%|▍         | 41489/1000000 [1:54:05<39:57:05,  6.66it/s]

{'loss': Array(0.17169243, dtype=float32), 'loss_cross_entropy': Array(0.15988915, dtype=float32)}


  4%|▍         | 41499/1000000 [1:54:06<26:45:31,  9.95it/s]

{'loss': Array(0.16718785, dtype=float32), 'loss_cross_entropy': Array(0.15506966, dtype=float32)}


  4%|▍         | 41509/1000000 [1:54:14<85:11:04,  3.13it/s] 

{'loss': Array(0.18894942, dtype=float32), 'loss_cross_entropy': Array(0.17585695, dtype=float32)}


  4%|▍         | 41519/1000000 [1:54:15<39:59:05,  6.66it/s]

{'loss': Array(0.19212559, dtype=float32), 'loss_cross_entropy': Array(0.17872079, dtype=float32)}


  4%|▍         | 41529/1000000 [1:54:17<27:16:49,  9.76it/s]

{'loss': Array(0.18046735, dtype=float32), 'loss_cross_entropy': Array(0.16747755, dtype=float32)}


  4%|▍         | 41539/1000000 [1:54:19<32:14:57,  8.26it/s]

{'loss': Array(0.17542817, dtype=float32), 'loss_cross_entropy': Array(0.16274081, dtype=float32)}


  4%|▍         | 41549/1000000 [1:54:20<25:29:42, 10.44it/s]

{'loss': Array(0.18199521, dtype=float32), 'loss_cross_entropy': Array(0.16882084, dtype=float32)}


  4%|▍         | 41559/1000000 [1:54:22<39:22:05,  6.76it/s]

{'loss': Array(0.17981501, dtype=float32), 'loss_cross_entropy': Array(0.16645136, dtype=float32)}


  4%|▍         | 41569/1000000 [1:54:23<26:47:00,  9.94it/s]

{'loss': Array(0.19258696, dtype=float32), 'loss_cross_entropy': Array(0.17906795, dtype=float32)}


  4%|▍         | 41579/1000000 [1:54:24<24:46:26, 10.75it/s]

{'loss': Array(0.17904958, dtype=float32), 'loss_cross_entropy': Array(0.16668591, dtype=float32)}


  4%|▍         | 41589/1000000 [1:54:26<28:05:30,  9.48it/s]

{'loss': Array(0.18549669, dtype=float32), 'loss_cross_entropy': Array(0.17219883, dtype=float32)}


  4%|▍         | 41599/1000000 [1:54:27<25:52:59, 10.29it/s]

{'loss': Array(0.1934462, dtype=float32), 'loss_cross_entropy': Array(0.1808941, dtype=float32)}


  4%|▍         | 41609/1000000 [1:54:29<39:36:17,  6.72it/s]

{'loss': Array(0.18503183, dtype=float32), 'loss_cross_entropy': Array(0.17202085, dtype=float32)}


  4%|▍         | 41619/1000000 [1:54:30<27:01:51,  9.85it/s]

{'loss': Array(0.18288694, dtype=float32), 'loss_cross_entropy': Array(0.17025436, dtype=float32)}


  4%|▍         | 41629/1000000 [1:54:32<24:46:02, 10.75it/s]

{'loss': Array(0.18040666, dtype=float32), 'loss_cross_entropy': Array(0.16893445, dtype=float32)}


  4%|▍         | 41639/1000000 [1:54:33<29:33:06,  9.01it/s]

{'loss': Array(0.17388348, dtype=float32), 'loss_cross_entropy': Array(0.16116323, dtype=float32)}


  4%|▍         | 41649/1000000 [1:54:35<26:08:50, 10.18it/s]

{'loss': Array(0.18243079, dtype=float32), 'loss_cross_entropy': Array(0.16949287, dtype=float32)}


  4%|▍         | 41659/1000000 [1:54:37<34:46:14,  7.66it/s]

{'loss': Array(0.18886895, dtype=float32), 'loss_cross_entropy': Array(0.17509797, dtype=float32)}


  4%|▍         | 41669/1000000 [1:54:38<26:05:22, 10.20it/s]

{'loss': Array(0.19882706, dtype=float32), 'loss_cross_entropy': Array(0.18444239, dtype=float32)}


  4%|▍         | 41679/1000000 [1:54:40<39:03:43,  6.81it/s]

{'loss': Array(0.16872235, dtype=float32), 'loss_cross_entropy': Array(0.15725866, dtype=float32)}


  4%|▍         | 41689/1000000 [1:54:41<27:12:26,  9.78it/s]

{'loss': Array(0.18032728, dtype=float32), 'loss_cross_entropy': Array(0.1676791, dtype=float32)}


  4%|▍         | 41699/1000000 [1:54:42<25:05:00, 10.61it/s]

{'loss': Array(0.18556942, dtype=float32), 'loss_cross_entropy': Array(0.17260452, dtype=float32)}


  4%|▍         | 41709/1000000 [1:54:44<31:57:56,  8.33it/s]

{'loss': Array(0.192501, dtype=float32), 'loss_cross_entropy': Array(0.17925079, dtype=float32)}


  4%|▍         | 41719/1000000 [1:54:45<26:15:29, 10.14it/s]

{'loss': Array(0.19662495, dtype=float32), 'loss_cross_entropy': Array(0.1822604, dtype=float32)}


  4%|▍         | 41729/1000000 [1:54:47<34:30:22,  7.71it/s]

{'loss': Array(0.19839981, dtype=float32), 'loss_cross_entropy': Array(0.18418097, dtype=float32)}


  4%|▍         | 41739/1000000 [1:54:48<26:10:01, 10.17it/s]

{'loss': Array(0.17275374, dtype=float32), 'loss_cross_entropy': Array(0.16057688, dtype=float32)}


  4%|▍         | 41749/1000000 [1:54:50<24:45:35, 10.75it/s]

{'loss': Array(0.18685059, dtype=float32), 'loss_cross_entropy': Array(0.17359586, dtype=float32)}


  4%|▍         | 41759/1000000 [1:54:52<28:14:47,  9.42it/s]

{'loss': Array(0.17713319, dtype=float32), 'loss_cross_entropy': Array(0.16516195, dtype=float32)}


  4%|▍         | 41769/1000000 [1:54:53<24:55:38, 10.68it/s]

{'loss': Array(0.19448586, dtype=float32), 'loss_cross_entropy': Array(0.18144071, dtype=float32)}


  4%|▍         | 41779/1000000 [1:54:55<38:56:18,  6.84it/s]

{'loss': Array(0.17271641, dtype=float32), 'loss_cross_entropy': Array(0.16004781, dtype=float32)}


  4%|▍         | 41789/1000000 [1:54:56<26:52:49,  9.90it/s]

{'loss': Array(0.20438223, dtype=float32), 'loss_cross_entropy': Array(0.19045214, dtype=float32)}


  4%|▍         | 41799/1000000 [1:54:57<24:59:05, 10.65it/s]

{'loss': Array(0.16976391, dtype=float32), 'loss_cross_entropy': Array(0.157553, dtype=float32)}


  4%|▍         | 41809/1000000 [1:54:59<32:03:15,  8.30it/s]

{'loss': Array(0.17573641, dtype=float32), 'loss_cross_entropy': Array(0.16320309, dtype=float32)}


  4%|▍         | 41819/1000000 [1:55:00<26:13:53, 10.15it/s]

{'loss': Array(0.17374562, dtype=float32), 'loss_cross_entropy': Array(0.16253288, dtype=float32)}


  4%|▍         | 41829/1000000 [1:55:02<35:21:31,  7.53it/s]

{'loss': Array(0.1910566, dtype=float32), 'loss_cross_entropy': Array(0.17735985, dtype=float32)}


  4%|▍         | 41839/1000000 [1:55:03<26:45:32,  9.95it/s]

{'loss': Array(0.18869038, dtype=float32), 'loss_cross_entropy': Array(0.17545235, dtype=float32)}


  4%|▍         | 41849/1000000 [1:55:05<35:47:35,  7.44it/s]

{'loss': Array(0.18330671, dtype=float32), 'loss_cross_entropy': Array(0.16987123, dtype=float32)}


  4%|▍         | 41859/1000000 [1:55:07<26:38:35,  9.99it/s]

{'loss': Array(0.19038942, dtype=float32), 'loss_cross_entropy': Array(0.1766148, dtype=float32)}


  4%|▍         | 41869/1000000 [1:55:08<24:46:28, 10.74it/s]

{'loss': Array(0.18584831, dtype=float32), 'loss_cross_entropy': Array(0.17313194, dtype=float32)}


  4%|▍         | 41879/1000000 [1:55:10<29:52:37,  8.91it/s]

{'loss': Array(0.17816646, dtype=float32), 'loss_cross_entropy': Array(0.1667898, dtype=float32)}


  4%|▍         | 41889/1000000 [1:55:11<25:17:33, 10.52it/s]

{'loss': Array(0.17920779, dtype=float32), 'loss_cross_entropy': Array(0.16591333, dtype=float32)}


  4%|▍         | 41899/1000000 [1:55:13<31:34:36,  8.43it/s]

{'loss': Array(0.17796896, dtype=float32), 'loss_cross_entropy': Array(0.1656684, dtype=float32)}


  4%|▍         | 41909/1000000 [1:55:14<25:33:04, 10.42it/s]

{'loss': Array(0.19151619, dtype=float32), 'loss_cross_entropy': Array(0.17811792, dtype=float32)}


  4%|▍         | 41919/1000000 [1:55:16<38:42:51,  6.87it/s]

{'loss': Array(0.18811487, dtype=float32), 'loss_cross_entropy': Array(0.17430927, dtype=float32)}


  4%|▍         | 41929/1000000 [1:55:17<26:50:34,  9.91it/s]

{'loss': Array(0.18812813, dtype=float32), 'loss_cross_entropy': Array(0.17576794, dtype=float32)}


  4%|▍         | 41939/1000000 [1:55:19<39:24:58,  6.75it/s]

{'loss': Array(0.19252563, dtype=float32), 'loss_cross_entropy': Array(0.17911409, dtype=float32)}


  4%|▍         | 41949/1000000 [1:55:20<27:09:43,  9.80it/s]

{'loss': Array(0.18767588, dtype=float32), 'loss_cross_entropy': Array(0.1744133, dtype=float32)}


  4%|▍         | 41959/1000000 [1:55:21<25:08:45, 10.58it/s]

{'loss': Array(0.18078138, dtype=float32), 'loss_cross_entropy': Array(0.16809745, dtype=float32)}


  4%|▍         | 41969/1000000 [1:55:23<34:41:38,  7.67it/s]

{'loss': Array(0.18643573, dtype=float32), 'loss_cross_entropy': Array(0.17383124, dtype=float32)}


  4%|▍         | 41979/1000000 [1:55:25<26:27:26, 10.06it/s]

{'loss': Array(0.18144585, dtype=float32), 'loss_cross_entropy': Array(0.16865754, dtype=float32)}


  4%|▍         | 41989/1000000 [1:55:26<38:56:56,  6.83it/s]

{'loss': Array(0.18431501, dtype=float32), 'loss_cross_entropy': Array(0.17114201, dtype=float32)}


  4%|▍         | 41999/1000000 [1:55:28<26:55:38,  9.88it/s]

{'loss': Array(0.20108195, dtype=float32), 'loss_cross_entropy': Array(0.18666887, dtype=float32)}


  4%|▍         | 42009/1000000 [1:55:35<87:25:05,  3.04it/s] 

{'loss': Array(0.19586326, dtype=float32), 'loss_cross_entropy': Array(0.18178008, dtype=float32)}


  4%|▍         | 42019/1000000 [1:55:37<39:06:26,  6.80it/s] 

{'loss': Array(0.20894146, dtype=float32), 'loss_cross_entropy': Array(0.19367789, dtype=float32)}


  4%|▍         | 42029/1000000 [1:55:39<28:10:37,  9.44it/s]

{'loss': Array(0.19251144, dtype=float32), 'loss_cross_entropy': Array(0.17908336, dtype=float32)}


  4%|▍         | 42039/1000000 [1:55:40<29:10:05,  9.12it/s]

{'loss': Array(0.1694881, dtype=float32), 'loss_cross_entropy': Array(0.15716122, dtype=float32)}


  4%|▍         | 42049/1000000 [1:55:42<26:10:53, 10.16it/s]

{'loss': Array(0.17941833, dtype=float32), 'loss_cross_entropy': Array(0.16752852, dtype=float32)}


  4%|▍         | 42059/1000000 [1:55:43<38:54:07,  6.84it/s]

{'loss': Array(0.18921608, dtype=float32), 'loss_cross_entropy': Array(0.17596216, dtype=float32)}


  4%|▍         | 42069/1000000 [1:55:45<27:18:52,  9.74it/s]

{'loss': Array(0.19186857, dtype=float32), 'loss_cross_entropy': Array(0.17868742, dtype=float32)}


  4%|▍         | 42079/1000000 [1:55:46<25:01:48, 10.63it/s]

{'loss': Array(0.1994873, dtype=float32), 'loss_cross_entropy': Array(0.18613878, dtype=float32)}


  4%|▍         | 42089/1000000 [1:55:48<31:09:49,  8.54it/s]

{'loss': Array(0.1837289, dtype=float32), 'loss_cross_entropy': Array(0.17109905, dtype=float32)}


  4%|▍         | 42099/1000000 [1:55:49<25:19:43, 10.51it/s]

{'loss': Array(0.1674141, dtype=float32), 'loss_cross_entropy': Array(0.15565096, dtype=float32)}


  4%|▍         | 42109/1000000 [1:55:51<38:36:58,  6.89it/s]

{'loss': Array(0.16697113, dtype=float32), 'loss_cross_entropy': Array(0.15450405, dtype=float32)}


  4%|▍         | 42119/1000000 [1:55:52<27:24:09,  9.71it/s]

{'loss': Array(0.18831195, dtype=float32), 'loss_cross_entropy': Array(0.17480537, dtype=float32)}


  4%|▍         | 42129/1000000 [1:55:54<24:48:32, 10.72it/s]

{'loss': Array(0.176939, dtype=float32), 'loss_cross_entropy': Array(0.16396649, dtype=float32)}


  4%|▍         | 42139/1000000 [1:55:55<29:00:07,  9.17it/s]

{'loss': Array(0.16941433, dtype=float32), 'loss_cross_entropy': Array(0.15714793, dtype=float32)}


  4%|▍         | 42149/1000000 [1:55:57<25:19:03, 10.51it/s]

{'loss': Array(0.18321119, dtype=float32), 'loss_cross_entropy': Array(0.17082837, dtype=float32)}


  4%|▍         | 42159/1000000 [1:55:58<35:05:33,  7.58it/s]

{'loss': Array(0.19771521, dtype=float32), 'loss_cross_entropy': Array(0.18376623, dtype=float32)}


  4%|▍         | 42169/1000000 [1:56:00<27:51:29,  9.55it/s]

{'loss': Array(0.1786866, dtype=float32), 'loss_cross_entropy': Array(0.16623321, dtype=float32)}


  4%|▍         | 42179/1000000 [1:56:01<27:05:47,  9.82it/s]

{'loss': Array(0.17325917, dtype=float32), 'loss_cross_entropy': Array(0.1598262, dtype=float32)}


  4%|▍         | 42189/1000000 [1:56:03<33:11:20,  8.02it/s]

{'loss': Array(0.19538634, dtype=float32), 'loss_cross_entropy': Array(0.18194862, dtype=float32)}


  4%|▍         | 42199/1000000 [1:56:05<27:58:58,  9.51it/s]

{'loss': Array(0.17152199, dtype=float32), 'loss_cross_entropy': Array(0.15908752, dtype=float32)}


  4%|▍         | 42209/1000000 [1:56:06<25:17:00, 10.52it/s]

{'loss': Array(0.15025632, dtype=float32), 'loss_cross_entropy': Array(0.13894655, dtype=float32)}


  4%|▍         | 42219/1000000 [1:56:08<28:43:40,  9.26it/s]

{'loss': Array(0.17129916, dtype=float32), 'loss_cross_entropy': Array(0.15871295, dtype=float32)}


  4%|▍         | 42229/1000000 [1:56:09<25:36:55, 10.39it/s]

{'loss': Array(0.18219133, dtype=float32), 'loss_cross_entropy': Array(0.16925962, dtype=float32)}


  4%|▍         | 42239/1000000 [1:56:11<31:36:15,  8.42it/s]

{'loss': Array(0.17331275, dtype=float32), 'loss_cross_entropy': Array(0.16128898, dtype=float32)}


  4%|▍         | 42249/1000000 [1:56:12<25:30:03, 10.43it/s]

{'loss': Array(0.17710565, dtype=float32), 'loss_cross_entropy': Array(0.16418706, dtype=float32)}


  4%|▍         | 42259/1000000 [1:56:14<38:51:04,  6.85it/s]

{'loss': Array(0.19561993, dtype=float32), 'loss_cross_entropy': Array(0.18286489, dtype=float32)}


  4%|▍         | 42269/1000000 [1:56:15<26:42:53,  9.96it/s]

{'loss': Array(0.17903392, dtype=float32), 'loss_cross_entropy': Array(0.16563854, dtype=float32)}


  4%|▍         | 42279/1000000 [1:56:17<38:57:27,  6.83it/s]

{'loss': Array(0.18362533, dtype=float32), 'loss_cross_entropy': Array(0.17116633, dtype=float32)}


  4%|▍         | 42289/1000000 [1:56:18<26:51:14,  9.91it/s]

{'loss': Array(0.17063493, dtype=float32), 'loss_cross_entropy': Array(0.15804444, dtype=float32)}


  4%|▍         | 42299/1000000 [1:56:19<25:44:32, 10.33it/s]

{'loss': Array(0.17556645, dtype=float32), 'loss_cross_entropy': Array(0.16413473, dtype=float32)}


  4%|▍         | 42309/1000000 [1:56:21<34:55:41,  7.62it/s]

{'loss': Array(0.17507875, dtype=float32), 'loss_cross_entropy': Array(0.16176002, dtype=float32)}


  4%|▍         | 42319/1000000 [1:56:23<26:22:25, 10.09it/s]

{'loss': Array(0.16660339, dtype=float32), 'loss_cross_entropy': Array(0.15505396, dtype=float32)}


  4%|▍         | 42329/1000000 [1:56:24<39:15:25,  6.78it/s]

{'loss': Array(0.17234, dtype=float32), 'loss_cross_entropy': Array(0.16010217, dtype=float32)}


  4%|▍         | 42339/1000000 [1:56:26<26:58:21,  9.86it/s]

{'loss': Array(0.17611967, dtype=float32), 'loss_cross_entropy': Array(0.16384521, dtype=float32)}


  4%|▍         | 42349/1000000 [1:56:27<24:50:08, 10.71it/s]

{'loss': Array(0.19231243, dtype=float32), 'loss_cross_entropy': Array(0.17858572, dtype=float32)}


  4%|▍         | 42359/1000000 [1:56:29<28:11:12,  9.44it/s]

{'loss': Array(0.18390511, dtype=float32), 'loss_cross_entropy': Array(0.1710604, dtype=float32)}


  4%|▍         | 42369/1000000 [1:56:30<25:50:50, 10.29it/s]

{'loss': Array(0.17633042, dtype=float32), 'loss_cross_entropy': Array(0.16336964, dtype=float32)}


  4%|▍         | 42379/1000000 [1:56:32<28:38:21,  9.29it/s]

{'loss': Array(0.19333556, dtype=float32), 'loss_cross_entropy': Array(0.17952721, dtype=float32)}


  4%|▍         | 42389/1000000 [1:56:33<25:48:26, 10.31it/s]

{'loss': Array(0.1658027, dtype=float32), 'loss_cross_entropy': Array(0.15421802, dtype=float32)}


  4%|▍         | 42399/1000000 [1:56:35<39:09:12,  6.79it/s]

{'loss': Array(0.18788768, dtype=float32), 'loss_cross_entropy': Array(0.17447068, dtype=float32)}


  4%|▍         | 42409/1000000 [1:56:36<27:05:56,  9.82it/s]

{'loss': Array(0.19491825, dtype=float32), 'loss_cross_entropy': Array(0.18145977, dtype=float32)}


  4%|▍         | 42419/1000000 [1:56:38<24:48:09, 10.72it/s]

{'loss': Array(0.19423772, dtype=float32), 'loss_cross_entropy': Array(0.18025552, dtype=float32)}


  4%|▍         | 42429/1000000 [1:56:39<31:35:43,  8.42it/s]

{'loss': Array(0.16649507, dtype=float32), 'loss_cross_entropy': Array(0.15517054, dtype=float32)}


  4%|▍         | 42439/1000000 [1:56:41<26:03:54, 10.20it/s]

{'loss': Array(0.20612209, dtype=float32), 'loss_cross_entropy': Array(0.19157559, dtype=float32)}


  4%|▍         | 42449/1000000 [1:56:42<38:45:55,  6.86it/s]

{'loss': Array(0.17406754, dtype=float32), 'loss_cross_entropy': Array(0.16142517, dtype=float32)}


  4%|▍         | 42459/1000000 [1:56:44<29:35:16,  8.99it/s]

{'loss': Array(0.17301778, dtype=float32), 'loss_cross_entropy': Array(0.16097225, dtype=float32)}


  4%|▍         | 42469/1000000 [1:56:45<25:30:06, 10.43it/s]

{'loss': Array(0.16331808, dtype=float32), 'loss_cross_entropy': Array(0.15122648, dtype=float32)}


  4%|▍         | 42479/1000000 [1:56:47<29:03:58,  9.15it/s]

{'loss': Array(0.17828055, dtype=float32), 'loss_cross_entropy': Array(0.16493316, dtype=float32)}


  4%|▍         | 42489/1000000 [1:56:48<25:07:51, 10.58it/s]

{'loss': Array(0.18089549, dtype=float32), 'loss_cross_entropy': Array(0.1688666, dtype=float32)}


  4%|▍         | 42499/1000000 [1:56:50<34:57:46,  7.61it/s]

{'loss': Array(0.16839851, dtype=float32), 'loss_cross_entropy': Array(0.15588592, dtype=float32)}


  4%|▍         | 42509/1000000 [1:56:58<89:33:18,  2.97it/s] 

{'loss': Array(0.18572035, dtype=float32), 'loss_cross_entropy': Array(0.17294572, dtype=float32)}


  4%|▍         | 42519/1000000 [1:57:00<46:51:42,  5.68it/s] 

{'loss': Array(0.18794684, dtype=float32), 'loss_cross_entropy': Array(0.17514096, dtype=float32)}


  4%|▍         | 42529/1000000 [1:57:01<30:38:02,  8.68it/s]

{'loss': Array(0.19371222, dtype=float32), 'loss_cross_entropy': Array(0.17994519, dtype=float32)}


  4%|▍         | 42539/1000000 [1:57:03<26:33:19, 10.02it/s]

{'loss': Array(0.2089258, dtype=float32), 'loss_cross_entropy': Array(0.19584273, dtype=float32)}


  4%|▍         | 42549/1000000 [1:57:04<32:23:10,  8.21it/s]

{'loss': Array(0.18485443, dtype=float32), 'loss_cross_entropy': Array(0.1714321, dtype=float32)}


  4%|▍         | 42559/1000000 [1:57:06<25:53:49, 10.27it/s]

{'loss': Array(0.18633358, dtype=float32), 'loss_cross_entropy': Array(0.17339234, dtype=float32)}


  4%|▍         | 42569/1000000 [1:57:07<24:20:55, 10.92it/s]

{'loss': Array(0.18141888, dtype=float32), 'loss_cross_entropy': Array(0.16855042, dtype=float32)}


  4%|▍         | 42579/1000000 [1:57:09<27:53:09,  9.54it/s]

{'loss': Array(0.18004219, dtype=float32), 'loss_cross_entropy': Array(0.16721122, dtype=float32)}


  4%|▍         | 42589/1000000 [1:57:10<25:21:10, 10.49it/s]

{'loss': Array(0.17749225, dtype=float32), 'loss_cross_entropy': Array(0.16442065, dtype=float32)}


  4%|▍         | 42599/1000000 [1:57:12<32:21:26,  8.22it/s]

{'loss': Array(0.17312033, dtype=float32), 'loss_cross_entropy': Array(0.16039117, dtype=float32)}


  4%|▍         | 42609/1000000 [1:57:13<26:30:49, 10.03it/s]

{'loss': Array(0.17457081, dtype=float32), 'loss_cross_entropy': Array(0.16236888, dtype=float32)}


  4%|▍         | 42619/1000000 [1:57:15<35:26:33,  7.50it/s]

{'loss': Array(0.16251823, dtype=float32), 'loss_cross_entropy': Array(0.15069441, dtype=float32)}


  4%|▍         | 42629/1000000 [1:57:16<26:24:04, 10.07it/s]

{'loss': Array(0.16746734, dtype=float32), 'loss_cross_entropy': Array(0.15446346, dtype=float32)}


  4%|▍         | 42639/1000000 [1:57:18<24:39:17, 10.79it/s]

{'loss': Array(0.18073069, dtype=float32), 'loss_cross_entropy': Array(0.16764031, dtype=float32)}


  4%|▍         | 42649/1000000 [1:57:19<31:39:00,  8.40it/s]

{'loss': Array(0.16079094, dtype=float32), 'loss_cross_entropy': Array(0.14986111, dtype=float32)}


  4%|▍         | 42659/1000000 [1:57:21<25:34:21, 10.40it/s]

{'loss': Array(0.20424333, dtype=float32), 'loss_cross_entropy': Array(0.19089262, dtype=float32)}


  4%|▍         | 42669/1000000 [1:57:22<24:58:40, 10.65it/s]

{'loss': Array(0.19432822, dtype=float32), 'loss_cross_entropy': Array(0.18123937, dtype=float32)}


  4%|▍         | 42679/1000000 [1:57:24<28:16:33,  9.40it/s]

{'loss': Array(0.18287463, dtype=float32), 'loss_cross_entropy': Array(0.17022312, dtype=float32)}


  4%|▍         | 42689/1000000 [1:57:25<25:15:39, 10.53it/s]

{'loss': Array(0.16358383, dtype=float32), 'loss_cross_entropy': Array(0.15184422, dtype=float32)}


  4%|▍         | 42699/1000000 [1:57:27<31:21:32,  8.48it/s]

{'loss': Array(0.19557321, dtype=float32), 'loss_cross_entropy': Array(0.18136863, dtype=float32)}


  4%|▍         | 42709/1000000 [1:57:28<26:01:34, 10.22it/s]

{'loss': Array(0.16099592, dtype=float32), 'loss_cross_entropy': Array(0.14927351, dtype=float32)}


  4%|▍         | 42719/1000000 [1:57:30<39:01:38,  6.81it/s]

{'loss': Array(0.16336097, dtype=float32), 'loss_cross_entropy': Array(0.15124458, dtype=float32)}


  4%|▍         | 42729/1000000 [1:57:31<26:54:22,  9.88it/s]

{'loss': Array(0.17881374, dtype=float32), 'loss_cross_entropy': Array(0.16621302, dtype=float32)}


  4%|▍         | 42739/1000000 [1:57:33<24:43:05, 10.76it/s]

{'loss': Array(0.1738458, dtype=float32), 'loss_cross_entropy': Array(0.16204256, dtype=float32)}


  4%|▍         | 42749/1000000 [1:57:34<35:33:29,  7.48it/s]

{'loss': Array(0.1821481, dtype=float32), 'loss_cross_entropy': Array(0.1685047, dtype=float32)}


  4%|▍         | 42759/1000000 [1:57:36<26:09:48, 10.16it/s]

{'loss': Array(0.17645581, dtype=float32), 'loss_cross_entropy': Array(0.16425447, dtype=float32)}


  4%|▍         | 42769/1000000 [1:57:37<24:59:55, 10.64it/s]

{'loss': Array(0.16324073, dtype=float32), 'loss_cross_entropy': Array(0.15138452, dtype=float32)}


  4%|▍         | 42779/1000000 [1:57:39<29:39:02,  8.97it/s]

{'loss': Array(0.1565961, dtype=float32), 'loss_cross_entropy': Array(0.1452456, dtype=float32)}


  4%|▍         | 42789/1000000 [1:57:40<25:27:28, 10.44it/s]

{'loss': Array(0.19379608, dtype=float32), 'loss_cross_entropy': Array(0.18066208, dtype=float32)}


  4%|▍         | 42799/1000000 [1:57:42<32:21:28,  8.22it/s]

{'loss': Array(0.17100461, dtype=float32), 'loss_cross_entropy': Array(0.15918092, dtype=float32)}


  4%|▍         | 42809/1000000 [1:57:43<25:40:02, 10.36it/s]

{'loss': Array(0.17186646, dtype=float32), 'loss_cross_entropy': Array(0.1595236, dtype=float32)}


  4%|▍         | 42819/1000000 [1:57:45<24:43:30, 10.75it/s]

{'loss': Array(0.17412731, dtype=float32), 'loss_cross_entropy': Array(0.16174243, dtype=float32)}


  4%|▍         | 42829/1000000 [1:57:46<27:58:51,  9.50it/s]

{'loss': Array(0.17910993, dtype=float32), 'loss_cross_entropy': Array(0.16607086, dtype=float32)}


  4%|▍         | 42839/1000000 [1:57:48<25:05:14, 10.60it/s]

{'loss': Array(0.2140349, dtype=float32), 'loss_cross_entropy': Array(0.2002251, dtype=float32)}


  4%|▍         | 42849/1000000 [1:57:49<34:56:40,  7.61it/s]

{'loss': Array(0.16424094, dtype=float32), 'loss_cross_entropy': Array(0.15253487, dtype=float32)}


  4%|▍         | 42859/1000000 [1:57:51<26:19:37, 10.10it/s]

{'loss': Array(0.19134954, dtype=float32), 'loss_cross_entropy': Array(0.17839602, dtype=float32)}


  4%|▍         | 42869/1000000 [1:57:52<24:39:15, 10.78it/s]

{'loss': Array(0.17775422, dtype=float32), 'loss_cross_entropy': Array(0.16394211, dtype=float32)}


  4%|▍         | 42879/1000000 [1:57:54<27:29:43,  9.67it/s]

{'loss': Array(0.16838159, dtype=float32), 'loss_cross_entropy': Array(0.15656215, dtype=float32)}


  4%|▍         | 42889/1000000 [1:57:55<25:28:41, 10.43it/s]

{'loss': Array(0.18950364, dtype=float32), 'loss_cross_entropy': Array(0.17682649, dtype=float32)}


  4%|▍         | 42899/1000000 [1:57:57<30:13:31,  8.80it/s]

{'loss': Array(0.18817768, dtype=float32), 'loss_cross_entropy': Array(0.17548843, dtype=float32)}


  4%|▍         | 42909/1000000 [1:57:58<25:48:37, 10.30it/s]

{'loss': Array(0.18712048, dtype=float32), 'loss_cross_entropy': Array(0.17486465, dtype=float32)}


  4%|▍         | 42919/1000000 [1:58:00<40:04:38,  6.63it/s]

{'loss': Array(0.18361245, dtype=float32), 'loss_cross_entropy': Array(0.17088638, dtype=float32)}


  4%|▍         | 42929/1000000 [1:58:01<27:54:32,  9.53it/s]

{'loss': Array(0.1784405, dtype=float32), 'loss_cross_entropy': Array(0.16515611, dtype=float32)}


  4%|▍         | 42939/1000000 [1:58:03<28:08:27,  9.45it/s]

{'loss': Array(0.17594661, dtype=float32), 'loss_cross_entropy': Array(0.163644, dtype=float32)}


  4%|▍         | 42949/1000000 [1:58:05<33:45:20,  7.88it/s]

{'loss': Array(0.2026879, dtype=float32), 'loss_cross_entropy': Array(0.18999581, dtype=float32)}


  4%|▍         | 42959/1000000 [1:58:06<27:16:39,  9.75it/s]

{'loss': Array(0.15121296, dtype=float32), 'loss_cross_entropy': Array(0.14016418, dtype=float32)}


  4%|▍         | 42969/1000000 [1:58:08<35:44:48,  7.44it/s]

{'loss': Array(0.17956004, dtype=float32), 'loss_cross_entropy': Array(0.16700098, dtype=float32)}


  4%|▍         | 42979/1000000 [1:58:09<26:31:48, 10.02it/s]

{'loss': Array(0.18215899, dtype=float32), 'loss_cross_entropy': Array(0.16951233, dtype=float32)}


  4%|▍         | 42989/1000000 [1:58:11<34:42:08,  7.66it/s]

{'loss': Array(0.1940894, dtype=float32), 'loss_cross_entropy': Array(0.18074727, dtype=float32)}


  4%|▍         | 42999/1000000 [1:58:12<26:38:18,  9.98it/s]

{'loss': Array(0.1774949, dtype=float32), 'loss_cross_entropy': Array(0.16437404, dtype=float32)}


  4%|▍         | 43009/1000000 [1:58:20<85:29:39,  3.11it/s] 

{'loss': Array(0.18771869, dtype=float32), 'loss_cross_entropy': Array(0.1749379, dtype=float32)}


  4%|▍         | 43019/1000000 [1:58:22<39:33:16,  6.72it/s]

{'loss': Array(0.17523775, dtype=float32), 'loss_cross_entropy': Array(0.16318916, dtype=float32)}


  4%|▍         | 43029/1000000 [1:58:23<26:53:15,  9.89it/s]

{'loss': Array(0.16378962, dtype=float32), 'loss_cross_entropy': Array(0.15193282, dtype=float32)}


  4%|▍         | 43039/1000000 [1:58:25<34:40:12,  7.67it/s]

{'loss': Array(0.16138157, dtype=float32), 'loss_cross_entropy': Array(0.15016446, dtype=float32)}


  4%|▍         | 43049/1000000 [1:58:26<26:16:47, 10.11it/s]

{'loss': Array(0.19326699, dtype=float32), 'loss_cross_entropy': Array(0.17984293, dtype=float32)}


  4%|▍         | 43059/1000000 [1:58:28<39:24:12,  6.75it/s]

{'loss': Array(0.19502111, dtype=float32), 'loss_cross_entropy': Array(0.18152644, dtype=float32)}


  4%|▍         | 43069/1000000 [1:58:29<27:22:05,  9.71it/s]

{'loss': Array(0.18674688, dtype=float32), 'loss_cross_entropy': Array(0.17393893, dtype=float32)}


  4%|▍         | 43079/1000000 [1:58:31<25:24:07, 10.46it/s]

{'loss': Array(0.2022794, dtype=float32), 'loss_cross_entropy': Array(0.18922228, dtype=float32)}


  4%|▍         | 43089/1000000 [1:58:32<31:15:10,  8.51it/s]

{'loss': Array(0.17521688, dtype=float32), 'loss_cross_entropy': Array(0.16362694, dtype=float32)}


  4%|▍         | 43099/1000000 [1:58:34<25:26:02, 10.45it/s]

{'loss': Array(0.18694142, dtype=float32), 'loss_cross_entropy': Array(0.17425787, dtype=float32)}


  4%|▍         | 43109/1000000 [1:58:35<34:55:45,  7.61it/s]

{'loss': Array(0.17519583, dtype=float32), 'loss_cross_entropy': Array(0.16317677, dtype=float32)}


  4%|▍         | 43119/1000000 [1:58:37<26:13:44, 10.13it/s]

{'loss': Array(0.17706995, dtype=float32), 'loss_cross_entropy': Array(0.16525355, dtype=float32)}


  4%|▍         | 43129/1000000 [1:58:38<24:47:03, 10.72it/s]

{'loss': Array(0.20070548, dtype=float32), 'loss_cross_entropy': Array(0.18730192, dtype=float32)}


  4%|▍         | 43139/1000000 [1:58:40<28:02:02,  9.48it/s]

{'loss': Array(0.16899736, dtype=float32), 'loss_cross_entropy': Array(0.15719108, dtype=float32)}


  4%|▍         | 43149/1000000 [1:58:41<25:47:04, 10.31it/s]

{'loss': Array(0.17653151, dtype=float32), 'loss_cross_entropy': Array(0.16479908, dtype=float32)}


  4%|▍         | 43159/1000000 [1:58:43<38:55:08,  6.83it/s]

{'loss': Array(0.21638194, dtype=float32), 'loss_cross_entropy': Array(0.20183535, dtype=float32)}


  4%|▍         | 43169/1000000 [1:58:44<26:38:28,  9.98it/s]

{'loss': Array(0.16720451, dtype=float32), 'loss_cross_entropy': Array(0.15570995, dtype=float32)}


  4%|▍         | 43179/1000000 [1:58:46<24:57:34, 10.65it/s]

{'loss': Array(0.17129393, dtype=float32), 'loss_cross_entropy': Array(0.15950458, dtype=float32)}


  4%|▍         | 43189/1000000 [1:58:47<31:37:51,  8.40it/s]

{'loss': Array(0.18478945, dtype=float32), 'loss_cross_entropy': Array(0.17240791, dtype=float32)}


  4%|▍         | 43199/1000000 [1:58:49<26:01:39, 10.21it/s]

{'loss': Array(0.18058898, dtype=float32), 'loss_cross_entropy': Array(0.16730331, dtype=float32)}


  4%|▍         | 43209/1000000 [1:58:50<35:04:25,  7.58it/s]

{'loss': Array(0.16478801, dtype=float32), 'loss_cross_entropy': Array(0.15255895, dtype=float32)}


  4%|▍         | 43219/1000000 [1:58:52<26:21:39, 10.08it/s]

{'loss': Array(0.16288029, dtype=float32), 'loss_cross_entropy': Array(0.15230356, dtype=float32)}


  4%|▍         | 43229/1000000 [1:58:53<35:39:46,  7.45it/s]

{'loss': Array(0.18918253, dtype=float32), 'loss_cross_entropy': Array(0.17657794, dtype=float32)}


  4%|▍         | 43239/1000000 [1:58:55<26:36:51,  9.99it/s]

{'loss': Array(0.17601392, dtype=float32), 'loss_cross_entropy': Array(0.1640683, dtype=float32)}


  4%|▍         | 43249/1000000 [1:58:56<25:15:00, 10.53it/s]

{'loss': Array(0.19141111, dtype=float32), 'loss_cross_entropy': Array(0.17904465, dtype=float32)}


  4%|▍         | 43259/1000000 [1:58:58<29:56:50,  8.87it/s]

{'loss': Array(0.17968021, dtype=float32), 'loss_cross_entropy': Array(0.16686949, dtype=float32)}


  4%|▍         | 43269/1000000 [1:58:59<26:32:14, 10.01it/s]

{'loss': Array(0.19094914, dtype=float32), 'loss_cross_entropy': Array(0.17772311, dtype=float32)}


  4%|▍         | 43279/1000000 [1:59:01<33:13:55,  8.00it/s]

{'loss': Array(0.16072851, dtype=float32), 'loss_cross_entropy': Array(0.14818451, dtype=float32)}


  4%|▍         | 43289/1000000 [1:59:03<26:55:53,  9.87it/s]

{'loss': Array(0.20034514, dtype=float32), 'loss_cross_entropy': Array(0.18647806, dtype=float32)}


  4%|▍         | 43299/1000000 [1:59:04<39:57:47,  6.65it/s]

{'loss': Array(0.21279004, dtype=float32), 'loss_cross_entropy': Array(0.19787817, dtype=float32)}


  4%|▍         | 43309/1000000 [1:59:06<26:48:23,  9.91it/s]

{'loss': Array(0.19212413, dtype=float32), 'loss_cross_entropy': Array(0.17868492, dtype=float32)}


  4%|▍         | 43319/1000000 [1:59:07<39:26:02,  6.74it/s]

{'loss': Array(0.17762785, dtype=float32), 'loss_cross_entropy': Array(0.16549699, dtype=float32)}


  4%|▍         | 43329/1000000 [1:59:09<27:30:30,  9.66it/s]

{'loss': Array(0.19025938, dtype=float32), 'loss_cross_entropy': Array(0.1776912, dtype=float32)}


  4%|▍         | 43339/1000000 [1:59:10<24:48:29, 10.71it/s]

{'loss': Array(0.17332232, dtype=float32), 'loss_cross_entropy': Array(0.16008781, dtype=float32)}


  4%|▍         | 43349/1000000 [1:59:12<34:13:19,  7.77it/s]

{'loss': Array(0.1764491, dtype=float32), 'loss_cross_entropy': Array(0.1639401, dtype=float32)}


  4%|▍         | 43359/1000000 [1:59:13<25:46:48, 10.31it/s]

{'loss': Array(0.17396224, dtype=float32), 'loss_cross_entropy': Array(0.16175607, dtype=float32)}


  4%|▍         | 43369/1000000 [1:59:15<41:02:16,  6.48it/s]

{'loss': Array(0.18552254, dtype=float32), 'loss_cross_entropy': Array(0.17251287, dtype=float32)}


  4%|▍         | 43379/1000000 [1:59:16<27:15:01,  9.75it/s]

{'loss': Array(0.1824425, dtype=float32), 'loss_cross_entropy': Array(0.17090169, dtype=float32)}


  4%|▍         | 43389/1000000 [1:59:18<25:24:23, 10.46it/s]

{'loss': Array(0.17661293, dtype=float32), 'loss_cross_entropy': Array(0.16467069, dtype=float32)}


  4%|▍         | 43399/1000000 [1:59:19<28:31:38,  9.31it/s]

{'loss': Array(0.18604693, dtype=float32), 'loss_cross_entropy': Array(0.17329167, dtype=float32)}


  4%|▍         | 43409/1000000 [1:59:21<25:23:03, 10.47it/s]

{'loss': Array(0.17565092, dtype=float32), 'loss_cross_entropy': Array(0.16327773, dtype=float32)}


  4%|▍         | 43419/1000000 [1:59:22<27:40:51,  9.60it/s]

{'loss': Array(0.19857419, dtype=float32), 'loss_cross_entropy': Array(0.18509798, dtype=float32)}


  4%|▍         | 43429/1000000 [1:59:24<24:31:42, 10.83it/s]

{'loss': Array(0.18778288, dtype=float32), 'loss_cross_entropy': Array(0.17482975, dtype=float32)}


  4%|▍         | 43439/1000000 [1:59:25<38:11:08,  6.96it/s]

{'loss': Array(0.1875077, dtype=float32), 'loss_cross_entropy': Array(0.17443222, dtype=float32)}


  4%|▍         | 43449/1000000 [1:59:27<26:44:38,  9.94it/s]

{'loss': Array(0.15700369, dtype=float32), 'loss_cross_entropy': Array(0.14581783, dtype=float32)}


  4%|▍         | 43459/1000000 [1:59:28<24:39:21, 10.78it/s]

{'loss': Array(0.18884303, dtype=float32), 'loss_cross_entropy': Array(0.17525876, dtype=float32)}


  4%|▍         | 43469/1000000 [1:59:30<31:23:44,  8.46it/s]

{'loss': Array(0.16872562, dtype=float32), 'loss_cross_entropy': Array(0.15646006, dtype=float32)}


  4%|▍         | 43479/1000000 [1:59:31<25:42:27, 10.34it/s]

{'loss': Array(0.17421256, dtype=float32), 'loss_cross_entropy': Array(0.16139598, dtype=float32)}


  4%|▍         | 43489/1000000 [1:59:33<38:31:16,  6.90it/s]

{'loss': Array(0.1675638, dtype=float32), 'loss_cross_entropy': Array(0.1566057, dtype=float32)}


  4%|▍         | 43499/1000000 [1:59:34<27:05:19,  9.81it/s]

{'loss': Array(0.17738433, dtype=float32), 'loss_cross_entropy': Array(0.16537145, dtype=float32)}


  4%|▍         | 43509/1000000 [1:59:42<87:02:31,  3.05it/s] 

{'loss': Array(0.18820451, dtype=float32), 'loss_cross_entropy': Array(0.1751627, dtype=float32)}


  4%|▍         | 43519/1000000 [1:59:44<40:46:30,  6.52it/s]

{'loss': Array(0.18080786, dtype=float32), 'loss_cross_entropy': Array(0.16746168, dtype=float32)}


  4%|▍         | 43529/1000000 [1:59:45<27:12:20,  9.77it/s]

{'loss': Array(0.1624239, dtype=float32), 'loss_cross_entropy': Array(0.15065412, dtype=float32)}


  4%|▍         | 43539/1000000 [1:59:47<31:59:27,  8.30it/s]

{'loss': Array(0.18066691, dtype=float32), 'loss_cross_entropy': Array(0.16679214, dtype=float32)}


  4%|▍         | 43549/1000000 [1:59:48<25:56:44, 10.24it/s]

{'loss': Array(0.16619115, dtype=float32), 'loss_cross_entropy': Array(0.15502238, dtype=float32)}


  4%|▍         | 43559/1000000 [1:59:50<39:16:26,  6.76it/s]

{'loss': Array(0.1939035, dtype=float32), 'loss_cross_entropy': Array(0.18073823, dtype=float32)}


  4%|▍         | 43569/1000000 [1:59:51<26:37:28,  9.98it/s]

{'loss': Array(0.18210296, dtype=float32), 'loss_cross_entropy': Array(0.16924702, dtype=float32)}


  4%|▍         | 43579/1000000 [1:59:53<25:04:27, 10.60it/s]

{'loss': Array(0.1778354, dtype=float32), 'loss_cross_entropy': Array(0.16516638, dtype=float32)}


  4%|▍         | 43589/1000000 [1:59:54<28:16:49,  9.39it/s]

{'loss': Array(0.1632648, dtype=float32), 'loss_cross_entropy': Array(0.15109196, dtype=float32)}


  4%|▍         | 43599/1000000 [1:59:56<25:26:43, 10.44it/s]

{'loss': Array(0.18674104, dtype=float32), 'loss_cross_entropy': Array(0.17347835, dtype=float32)}


  4%|▍         | 43609/1000000 [1:59:57<38:55:48,  6.82it/s]

{'loss': Array(0.19043052, dtype=float32), 'loss_cross_entropy': Array(0.17833298, dtype=float32)}


  4%|▍         | 43619/1000000 [1:59:59<28:47:59,  9.22it/s]

{'loss': Array(0.18402885, dtype=float32), 'loss_cross_entropy': Array(0.17265, dtype=float32)}


  4%|▍         | 43629/1000000 [2:00:00<25:14:27, 10.52it/s]

{'loss': Array(0.17795363, dtype=float32), 'loss_cross_entropy': Array(0.16605484, dtype=float32)}


  4%|▍         | 43639/1000000 [2:00:02<30:52:45,  8.60it/s]

{'loss': Array(0.1909337, dtype=float32), 'loss_cross_entropy': Array(0.17850351, dtype=float32)}


  4%|▍         | 43649/1000000 [2:00:03<26:34:06, 10.00it/s]

{'loss': Array(0.18864825, dtype=float32), 'loss_cross_entropy': Array(0.17548953, dtype=float32)}


  4%|▍         | 43659/1000000 [2:00:05<34:57:12,  7.60it/s]

{'loss': Array(0.19502617, dtype=float32), 'loss_cross_entropy': Array(0.18166254, dtype=float32)}


  4%|▍         | 43669/1000000 [2:00:06<26:07:08, 10.17it/s]

{'loss': Array(0.17105787, dtype=float32), 'loss_cross_entropy': Array(0.15912943, dtype=float32)}


  4%|▍         | 43679/1000000 [2:00:08<39:32:36,  6.72it/s]

{'loss': Array(0.17387927, dtype=float32), 'loss_cross_entropy': Array(0.16200654, dtype=float32)}


  4%|▍         | 43689/1000000 [2:00:10<27:39:57,  9.60it/s]

{'loss': Array(0.1885822, dtype=float32), 'loss_cross_entropy': Array(0.17583655, dtype=float32)}


  4%|▍         | 43699/1000000 [2:00:11<25:04:49, 10.59it/s]

{'loss': Array(0.1887047, dtype=float32), 'loss_cross_entropy': Array(0.17543511, dtype=float32)}


  4%|▍         | 43709/1000000 [2:00:13<31:51:10,  8.34it/s]

{'loss': Array(0.1807652, dtype=float32), 'loss_cross_entropy': Array(0.1687486, dtype=float32)}


  4%|▍         | 43719/1000000 [2:00:14<25:29:15, 10.42it/s]

{'loss': Array(0.17546223, dtype=float32), 'loss_cross_entropy': Array(0.16324507, dtype=float32)}


  4%|▍         | 43729/1000000 [2:00:16<34:55:39,  7.61it/s]

{'loss': Array(0.20412914, dtype=float32), 'loss_cross_entropy': Array(0.19000874, dtype=float32)}


  4%|▍         | 43739/1000000 [2:00:17<26:06:13, 10.18it/s]

{'loss': Array(0.17814443, dtype=float32), 'loss_cross_entropy': Array(0.16523117, dtype=float32)}


  4%|▍         | 43749/1000000 [2:00:18<25:00:10, 10.62it/s]

{'loss': Array(0.1935706, dtype=float32), 'loss_cross_entropy': Array(0.18062507, dtype=float32)}


  4%|▍         | 43759/1000000 [2:00:20<27:54:10,  9.52it/s]

{'loss': Array(0.15858872, dtype=float32), 'loss_cross_entropy': Array(0.1467266, dtype=float32)}


  4%|▍         | 43769/1000000 [2:00:22<27:24:52,  9.69it/s]

{'loss': Array(0.17248636, dtype=float32), 'loss_cross_entropy': Array(0.16088466, dtype=float32)}


  4%|▍         | 43779/1000000 [2:00:23<38:57:27,  6.82it/s]

{'loss': Array(0.17860864, dtype=float32), 'loss_cross_entropy': Array(0.16618368, dtype=float32)}


  4%|▍         | 43789/1000000 [2:00:25<26:50:00,  9.90it/s]

{'loss': Array(0.1698684, dtype=float32), 'loss_cross_entropy': Array(0.1582387, dtype=float32)}


  4%|▍         | 43799/1000000 [2:00:26<25:28:09, 10.43it/s]

{'loss': Array(0.16050704, dtype=float32), 'loss_cross_entropy': Array(0.14916062, dtype=float32)}


  4%|▍         | 43809/1000000 [2:00:28<31:45:41,  8.36it/s]

{'loss': Array(0.18714826, dtype=float32), 'loss_cross_entropy': Array(0.17434143, dtype=float32)}


  4%|▍         | 43819/1000000 [2:00:29<25:27:00, 10.44it/s]

{'loss': Array(0.18176322, dtype=float32), 'loss_cross_entropy': Array(0.16977656, dtype=float32)}


  4%|▍         | 43829/1000000 [2:00:31<34:55:36,  7.60it/s]

{'loss': Array(0.17809562, dtype=float32), 'loss_cross_entropy': Array(0.16619943, dtype=float32)}


  4%|▍         | 43839/1000000 [2:00:32<26:37:44,  9.97it/s]

{'loss': Array(0.19143617, dtype=float32), 'loss_cross_entropy': Array(0.17834699, dtype=float32)}


  4%|▍         | 43849/1000000 [2:00:34<35:39:59,  7.45it/s]

{'loss': Array(0.20960715, dtype=float32), 'loss_cross_entropy': Array(0.1965179, dtype=float32)}


  4%|▍         | 43859/1000000 [2:00:35<26:57:00,  9.86it/s]

{'loss': Array(0.19340841, dtype=float32), 'loss_cross_entropy': Array(0.18047293, dtype=float32)}


  4%|▍         | 43869/1000000 [2:00:37<25:13:31, 10.53it/s]

{'loss': Array(0.18104698, dtype=float32), 'loss_cross_entropy': Array(0.16853018, dtype=float32)}


  4%|▍         | 43879/1000000 [2:00:38<29:48:26,  8.91it/s]

{'loss': Array(0.15996769, dtype=float32), 'loss_cross_entropy': Array(0.14837614, dtype=float32)}


  4%|▍         | 43889/1000000 [2:00:40<25:17:27, 10.50it/s]

{'loss': Array(0.17772195, dtype=float32), 'loss_cross_entropy': Array(0.16591592, dtype=float32)}


  4%|▍         | 43899/1000000 [2:00:41<31:37:43,  8.40it/s]

{'loss': Array(0.16787311, dtype=float32), 'loss_cross_entropy': Array(0.155949, dtype=float32)}


  4%|▍         | 43909/1000000 [2:00:43<25:36:10, 10.37it/s]

{'loss': Array(0.18155025, dtype=float32), 'loss_cross_entropy': Array(0.16941829, dtype=float32)}


  4%|▍         | 43919/1000000 [2:00:44<38:50:10,  6.84it/s]

{'loss': Array(0.15829636, dtype=float32), 'loss_cross_entropy': Array(0.14726359, dtype=float32)}


  4%|▍         | 43929/1000000 [2:00:46<26:32:56, 10.00it/s]

{'loss': Array(0.17840287, dtype=float32), 'loss_cross_entropy': Array(0.16571067, dtype=float32)}


  4%|▍         | 43939/1000000 [2:00:48<39:13:26,  6.77it/s]

{'loss': Array(0.18548284, dtype=float32), 'loss_cross_entropy': Array(0.17245871, dtype=float32)}


  4%|▍         | 43949/1000000 [2:00:49<27:00:09,  9.83it/s]

{'loss': Array(0.1626573, dtype=float32), 'loss_cross_entropy': Array(0.15037434, dtype=float32)}


  4%|▍         | 43959/1000000 [2:00:50<24:48:39, 10.70it/s]

{'loss': Array(0.19998956, dtype=float32), 'loss_cross_entropy': Array(0.18611424, dtype=float32)}


  4%|▍         | 43969/1000000 [2:00:52<33:56:04,  7.83it/s]

{'loss': Array(0.1768049, dtype=float32), 'loss_cross_entropy': Array(0.16506433, dtype=float32)}


  4%|▍         | 43979/1000000 [2:00:53<26:10:39, 10.14it/s]

{'loss': Array(0.17225818, dtype=float32), 'loss_cross_entropy': Array(0.16020457, dtype=float32)}


  4%|▍         | 43989/1000000 [2:00:55<39:52:48,  6.66it/s]

{'loss': Array(0.18833965, dtype=float32), 'loss_cross_entropy': Array(0.17655568, dtype=float32)}


  4%|▍         | 43999/1000000 [2:00:56<27:08:37,  9.78it/s]

{'loss': Array(0.17696153, dtype=float32), 'loss_cross_entropy': Array(0.16467571, dtype=float32)}


  4%|▍         | 44009/1000000 [2:01:04<89:16:14,  2.97it/s] 

{'loss': Array(0.17536257, dtype=float32), 'loss_cross_entropy': Array(0.16332844, dtype=float32)}


  4%|▍         | 44019/1000000 [2:01:06<39:52:12,  6.66it/s] 

{'loss': Array(0.17737897, dtype=float32), 'loss_cross_entropy': Array(0.16518006, dtype=float32)}


  4%|▍         | 44029/1000000 [2:01:08<28:50:44,  9.21it/s]

{'loss': Array(0.17490086, dtype=float32), 'loss_cross_entropy': Array(0.16323195, dtype=float32)}


  4%|▍         | 44039/1000000 [2:01:09<29:37:09,  8.97it/s]

{'loss': Array(0.1777235, dtype=float32), 'loss_cross_entropy': Array(0.16563344, dtype=float32)}


  4%|▍         | 44049/1000000 [2:01:11<26:16:10, 10.11it/s]

{'loss': Array(0.19282398, dtype=float32), 'loss_cross_entropy': Array(0.18011563, dtype=float32)}


  4%|▍         | 44059/1000000 [2:01:13<39:07:41,  6.79it/s]

{'loss': Array(0.19964176, dtype=float32), 'loss_cross_entropy': Array(0.18595628, dtype=float32)}


  4%|▍         | 44069/1000000 [2:01:14<27:03:13,  9.82it/s]

{'loss': Array(0.1808581, dtype=float32), 'loss_cross_entropy': Array(0.16817383, dtype=float32)}


  4%|▍         | 44079/1000000 [2:01:15<24:40:35, 10.76it/s]

{'loss': Array(0.15589496, dtype=float32), 'loss_cross_entropy': Array(0.14492472, dtype=float32)}


  4%|▍         | 44089/1000000 [2:01:17<31:54:56,  8.32it/s]

{'loss': Array(0.19762866, dtype=float32), 'loss_cross_entropy': Array(0.18387783, dtype=float32)}


  4%|▍         | 44099/1000000 [2:01:18<26:01:12, 10.20it/s]

{'loss': Array(0.19127552, dtype=float32), 'loss_cross_entropy': Array(0.17840843, dtype=float32)}


  4%|▍         | 44109/1000000 [2:01:20<38:42:46,  6.86it/s]

{'loss': Array(0.18385226, dtype=float32), 'loss_cross_entropy': Array(0.17137472, dtype=float32)}


  4%|▍         | 44119/1000000 [2:01:21<27:21:21,  9.71it/s]

{'loss': Array(0.18404989, dtype=float32), 'loss_cross_entropy': Array(0.17111403, dtype=float32)}


  4%|▍         | 44129/1000000 [2:01:23<24:35:53, 10.79it/s]

{'loss': Array(0.15895633, dtype=float32), 'loss_cross_entropy': Array(0.1476184, dtype=float32)}


  4%|▍         | 44139/1000000 [2:01:24<29:19:20,  9.06it/s]

{'loss': Array(0.1772522, dtype=float32), 'loss_cross_entropy': Array(0.16469039, dtype=float32)}


  4%|▍         | 44149/1000000 [2:01:26<25:07:58, 10.56it/s]

{'loss': Array(0.1681529, dtype=float32), 'loss_cross_entropy': Array(0.15637979, dtype=float32)}


  4%|▍         | 44159/1000000 [2:01:28<35:10:45,  7.55it/s]

{'loss': Array(0.16554965, dtype=float32), 'loss_cross_entropy': Array(0.15294607, dtype=float32)}


  4%|▍         | 44169/1000000 [2:01:29<27:40:58,  9.59it/s]

{'loss': Array(0.19157529, dtype=float32), 'loss_cross_entropy': Array(0.17841606, dtype=float32)}


  4%|▍         | 44179/1000000 [2:01:30<25:42:08, 10.33it/s]

{'loss': Array(0.17280786, dtype=float32), 'loss_cross_entropy': Array(0.16175476, dtype=float32)}


  4%|▍         | 44189/1000000 [2:01:32<31:57:09,  8.31it/s]

{'loss': Array(0.17189313, dtype=float32), 'loss_cross_entropy': Array(0.16016135, dtype=float32)}


  4%|▍         | 44199/1000000 [2:01:33<25:46:42, 10.30it/s]

{'loss': Array(0.1921045, dtype=float32), 'loss_cross_entropy': Array(0.17853099, dtype=float32)}


  4%|▍         | 44209/1000000 [2:01:35<24:32:36, 10.82it/s]

{'loss': Array(0.1583061, dtype=float32), 'loss_cross_entropy': Array(0.14739005, dtype=float32)}


  4%|▍         | 44219/1000000 [2:01:36<28:25:36,  9.34it/s]

{'loss': Array(0.1720096, dtype=float32), 'loss_cross_entropy': Array(0.16003951, dtype=float32)}


  4%|▍         | 44229/1000000 [2:01:38<25:19:58, 10.48it/s]

{'loss': Array(0.18856458, dtype=float32), 'loss_cross_entropy': Array(0.1763721, dtype=float32)}


  4%|▍         | 44239/1000000 [2:01:40<32:38:24,  8.13it/s]

{'loss': Array(0.1811419, dtype=float32), 'loss_cross_entropy': Array(0.16852568, dtype=float32)}


  4%|▍         | 44249/1000000 [2:01:41<25:45:06, 10.31it/s]

{'loss': Array(0.17930673, dtype=float32), 'loss_cross_entropy': Array(0.16732155, dtype=float32)}


  4%|▍         | 44259/1000000 [2:01:43<39:06:25,  6.79it/s]

{'loss': Array(0.18019216, dtype=float32), 'loss_cross_entropy': Array(0.168031, dtype=float32)}


  4%|▍         | 44269/1000000 [2:01:44<26:51:17,  9.89it/s]

{'loss': Array(0.18291749, dtype=float32), 'loss_cross_entropy': Array(0.17006007, dtype=float32)}


  4%|▍         | 44279/1000000 [2:01:46<39:53:14,  6.66it/s]

{'loss': Array(0.16261677, dtype=float32), 'loss_cross_entropy': Array(0.15110096, dtype=float32)}


  4%|▍         | 44289/1000000 [2:01:47<27:43:02,  9.58it/s]

{'loss': Array(0.18188165, dtype=float32), 'loss_cross_entropy': Array(0.16959995, dtype=float32)}


  4%|▍         | 44299/1000000 [2:01:49<25:00:30, 10.62it/s]

{'loss': Array(0.1954547, dtype=float32), 'loss_cross_entropy': Array(0.18279639, dtype=float32)}


  4%|▍         | 44309/1000000 [2:01:50<34:23:07,  7.72it/s]

{'loss': Array(0.1799962, dtype=float32), 'loss_cross_entropy': Array(0.16766271, dtype=float32)}


  4%|▍         | 44319/1000000 [2:01:52<26:04:55, 10.18it/s]

{'loss': Array(0.19296457, dtype=float32), 'loss_cross_entropy': Array(0.17985019, dtype=float32)}


  4%|▍         | 44329/1000000 [2:01:53<38:50:33,  6.83it/s]

{'loss': Array(0.17050593, dtype=float32), 'loss_cross_entropy': Array(0.15861201, dtype=float32)}


  4%|▍         | 44339/1000000 [2:01:55<26:34:24,  9.99it/s]

{'loss': Array(0.1584975, dtype=float32), 'loss_cross_entropy': Array(0.14759797, dtype=float32)}


  4%|▍         | 44349/1000000 [2:01:56<25:09:03, 10.55it/s]

{'loss': Array(0.18052058, dtype=float32), 'loss_cross_entropy': Array(0.16849817, dtype=float32)}


  4%|▍         | 44359/1000000 [2:01:58<28:22:00,  9.36it/s]

{'loss': Array(0.14334437, dtype=float32), 'loss_cross_entropy': Array(0.13201514, dtype=float32)}


  4%|▍         | 44369/1000000 [2:01:59<26:32:11, 10.00it/s]

{'loss': Array(0.20070732, dtype=float32), 'loss_cross_entropy': Array(0.18811752, dtype=float32)}


  4%|▍         | 44379/1000000 [2:02:01<28:08:43,  9.43it/s]

{'loss': Array(0.16663319, dtype=float32), 'loss_cross_entropy': Array(0.15500581, dtype=float32)}


  4%|▍         | 44389/1000000 [2:02:02<24:58:33, 10.63it/s]

{'loss': Array(0.15105999, dtype=float32), 'loss_cross_entropy': Array(0.14028189, dtype=float32)}


  4%|▍         | 44399/1000000 [2:02:04<39:07:26,  6.78it/s]

{'loss': Array(0.17718343, dtype=float32), 'loss_cross_entropy': Array(0.16522239, dtype=float32)}


  4%|▍         | 44409/1000000 [2:02:05<27:14:29,  9.74it/s]

{'loss': Array(0.15934709, dtype=float32), 'loss_cross_entropy': Array(0.14835675, dtype=float32)}


  4%|▍         | 44419/1000000 [2:02:07<25:22:59, 10.46it/s]

{'loss': Array(0.17231706, dtype=float32), 'loss_cross_entropy': Array(0.15931635, dtype=float32)}


  4%|▍         | 44429/1000000 [2:02:08<32:25:56,  8.18it/s]

{'loss': Array(0.18723033, dtype=float32), 'loss_cross_entropy': Array(0.1743241, dtype=float32)}


  4%|▍         | 44439/1000000 [2:02:10<27:30:58,  9.65it/s]

{'loss': Array(0.15574971, dtype=float32), 'loss_cross_entropy': Array(0.14420295, dtype=float32)}


  4%|▍         | 44449/1000000 [2:02:12<39:56:33,  6.65it/s]

{'loss': Array(0.1953304, dtype=float32), 'loss_cross_entropy': Array(0.18279211, dtype=float32)}


  4%|▍         | 44459/1000000 [2:02:13<27:20:48,  9.71it/s]

{'loss': Array(0.18130665, dtype=float32), 'loss_cross_entropy': Array(0.16833563, dtype=float32)}


  4%|▍         | 44469/1000000 [2:02:14<25:34:31, 10.38it/s]

{'loss': Array(0.17828052, dtype=float32), 'loss_cross_entropy': Array(0.16638939, dtype=float32)}


  4%|▍         | 44479/1000000 [2:02:16<29:02:15,  9.14it/s]

{'loss': Array(0.16806376, dtype=float32), 'loss_cross_entropy': Array(0.156652, dtype=float32)}


  4%|▍         | 44489/1000000 [2:02:17<25:27:03, 10.43it/s]

{'loss': Array(0.19290939, dtype=float32), 'loss_cross_entropy': Array(0.18000901, dtype=float32)}


  4%|▍         | 44499/1000000 [2:02:19<35:17:32,  7.52it/s]

{'loss': Array(0.15819937, dtype=float32), 'loss_cross_entropy': Array(0.14676936, dtype=float32)}


  4%|▍         | 44509/1000000 [2:02:27<87:43:41,  3.03it/s] 

{'loss': Array(0.19249134, dtype=float32), 'loss_cross_entropy': Array(0.17948411, dtype=float32)}


  4%|▍         | 44519/1000000 [2:02:29<45:20:44,  5.85it/s]

{'loss': Array(0.19798319, dtype=float32), 'loss_cross_entropy': Array(0.18438111, dtype=float32)}


  4%|▍         | 44529/1000000 [2:02:30<27:41:49,  9.58it/s]

{'loss': Array(0.17273965, dtype=float32), 'loss_cross_entropy': Array(0.16072498, dtype=float32)}


  4%|▍         | 44539/1000000 [2:02:31<24:57:38, 10.63it/s]

{'loss': Array(0.19567083, dtype=float32), 'loss_cross_entropy': Array(0.18315743, dtype=float32)}


  4%|▍         | 44549/1000000 [2:02:33<32:48:28,  8.09it/s]

{'loss': Array(0.18822564, dtype=float32), 'loss_cross_entropy': Array(0.17562073, dtype=float32)}


  4%|▍         | 44559/1000000 [2:02:34<25:59:29, 10.21it/s]

{'loss': Array(0.18027778, dtype=float32), 'loss_cross_entropy': Array(0.16735105, dtype=float32)}


  4%|▍         | 44569/1000000 [2:02:36<24:34:29, 10.80it/s]

{'loss': Array(0.1818856, dtype=float32), 'loss_cross_entropy': Array(0.16906203, dtype=float32)}


  4%|▍         | 44579/1000000 [2:02:38<28:46:10,  9.22it/s]

{'loss': Array(0.18145843, dtype=float32), 'loss_cross_entropy': Array(0.16863565, dtype=float32)}


  4%|▍         | 44589/1000000 [2:02:39<25:29:38, 10.41it/s]

{'loss': Array(0.18313733, dtype=float32), 'loss_cross_entropy': Array(0.17096324, dtype=float32)}


  4%|▍         | 44599/1000000 [2:02:41<32:20:34,  8.21it/s]

{'loss': Array(0.16271184, dtype=float32), 'loss_cross_entropy': Array(0.1523691, dtype=float32)}


  4%|▍         | 44609/1000000 [2:02:42<25:40:04, 10.34it/s]

{'loss': Array(0.17124006, dtype=float32), 'loss_cross_entropy': Array(0.1594228, dtype=float32)}


  4%|▍         | 44619/1000000 [2:02:44<34:18:45,  7.73it/s]

{'loss': Array(0.1880847, dtype=float32), 'loss_cross_entropy': Array(0.17582409, dtype=float32)}


  4%|▍         | 44629/1000000 [2:02:45<25:51:29, 10.26it/s]

{'loss': Array(0.18777756, dtype=float32), 'loss_cross_entropy': Array(0.1742284, dtype=float32)}


  4%|▍         | 44639/1000000 [2:02:46<24:53:12, 10.66it/s]

{'loss': Array(0.18938534, dtype=float32), 'loss_cross_entropy': Array(0.176704, dtype=float32)}


  4%|▍         | 44649/1000000 [2:02:48<31:59:15,  8.30it/s]

{'loss': Array(0.18983975, dtype=float32), 'loss_cross_entropy': Array(0.17725131, dtype=float32)}


  4%|▍         | 44659/1000000 [2:02:49<25:47:16, 10.29it/s]

{'loss': Array(0.19936621, dtype=float32), 'loss_cross_entropy': Array(0.18520294, dtype=float32)}


  4%|▍         | 44669/1000000 [2:02:51<25:08:21, 10.56it/s]

{'loss': Array(0.16921459, dtype=float32), 'loss_cross_entropy': Array(0.15795673, dtype=float32)}


  4%|▍         | 44679/1000000 [2:02:53<27:57:11,  9.49it/s]

{'loss': Array(0.19703662, dtype=float32), 'loss_cross_entropy': Array(0.1837417, dtype=float32)}


  4%|▍         | 44689/1000000 [2:02:54<25:17:50, 10.49it/s]

{'loss': Array(0.190491, dtype=float32), 'loss_cross_entropy': Array(0.17658149, dtype=float32)}


  4%|▍         | 44699/1000000 [2:02:56<31:15:37,  8.49it/s]

{'loss': Array(0.1669086, dtype=float32), 'loss_cross_entropy': Array(0.15509796, dtype=float32)}


  4%|▍         | 44709/1000000 [2:02:57<25:45:29, 10.30it/s]

{'loss': Array(0.20289253, dtype=float32), 'loss_cross_entropy': Array(0.18950681, dtype=float32)}


  4%|▍         | 44719/1000000 [2:02:59<39:43:17,  6.68it/s]

{'loss': Array(0.15704857, dtype=float32), 'loss_cross_entropy': Array(0.14562382, dtype=float32)}


  4%|▍         | 44729/1000000 [2:03:00<28:39:16,  9.26it/s]

{'loss': Array(0.16976996, dtype=float32), 'loss_cross_entropy': Array(0.1578339, dtype=float32)}


  4%|▍         | 44739/1000000 [2:03:02<26:37:46,  9.96it/s]

{'loss': Array(0.16586064, dtype=float32), 'loss_cross_entropy': Array(0.1536688, dtype=float32)}


  4%|▍         | 44749/1000000 [2:03:03<35:16:35,  7.52it/s]

{'loss': Array(0.18006442, dtype=float32), 'loss_cross_entropy': Array(0.16814813, dtype=float32)}


  4%|▍         | 44759/1000000 [2:03:05<26:26:25, 10.04it/s]

{'loss': Array(0.16705033, dtype=float32), 'loss_cross_entropy': Array(0.1546882, dtype=float32)}


  4%|▍         | 44769/1000000 [2:03:06<24:53:11, 10.66it/s]

{'loss': Array(0.16466384, dtype=float32), 'loss_cross_entropy': Array(0.15331429, dtype=float32)}


  4%|▍         | 44779/1000000 [2:03:08<29:52:55,  8.88it/s]

{'loss': Array(0.16782105, dtype=float32), 'loss_cross_entropy': Array(0.15591854, dtype=float32)}


  4%|▍         | 44789/1000000 [2:03:09<25:35:53, 10.37it/s]

{'loss': Array(0.170385, dtype=float32), 'loss_cross_entropy': Array(0.15914209, dtype=float32)}


  4%|▍         | 44799/1000000 [2:03:11<31:44:04,  8.36it/s]

{'loss': Array(0.19752435, dtype=float32), 'loss_cross_entropy': Array(0.18429103, dtype=float32)}


  4%|▍         | 44809/1000000 [2:03:12<25:43:19, 10.32it/s]

{'loss': Array(0.16451529, dtype=float32), 'loss_cross_entropy': Array(0.15346885, dtype=float32)}


  4%|▍         | 44819/1000000 [2:03:14<25:10:35, 10.54it/s]

{'loss': Array(0.1718479, dtype=float32), 'loss_cross_entropy': Array(0.15998869, dtype=float32)}


  4%|▍         | 44829/1000000 [2:03:15<28:17:36,  9.38it/s]

{'loss': Array(0.1820087, dtype=float32), 'loss_cross_entropy': Array(0.16976294, dtype=float32)}


  4%|▍         | 44839/1000000 [2:03:17<25:20:22, 10.47it/s]

{'loss': Array(0.18184777, dtype=float32), 'loss_cross_entropy': Array(0.16958387, dtype=float32)}


  4%|▍         | 44849/1000000 [2:03:18<34:44:08,  7.64it/s]

{'loss': Array(0.18163131, dtype=float32), 'loss_cross_entropy': Array(0.16910678, dtype=float32)}


  4%|▍         | 44859/1000000 [2:03:20<25:59:15, 10.21it/s]

{'loss': Array(0.18432654, dtype=float32), 'loss_cross_entropy': Array(0.17131798, dtype=float32)}


  4%|▍         | 44869/1000000 [2:03:21<24:35:07, 10.79it/s]

{'loss': Array(0.16949475, dtype=float32), 'loss_cross_entropy': Array(0.15790151, dtype=float32)}


  4%|▍         | 44879/1000000 [2:03:23<27:32:24,  9.63it/s]

{'loss': Array(0.17425172, dtype=float32), 'loss_cross_entropy': Array(0.16188689, dtype=float32)}


  4%|▍         | 44889/1000000 [2:03:24<25:02:54, 10.59it/s]

{'loss': Array(0.18716855, dtype=float32), 'loss_cross_entropy': Array(0.17487147, dtype=float32)}


  4%|▍         | 44899/1000000 [2:03:26<29:39:22,  8.95it/s]

{'loss': Array(0.18142503, dtype=float32), 'loss_cross_entropy': Array(0.1698667, dtype=float32)}


  4%|▍         | 44909/1000000 [2:03:27<25:18:01, 10.49it/s]

{'loss': Array(0.16633984, dtype=float32), 'loss_cross_entropy': Array(0.1556556, dtype=float32)}


  4%|▍         | 44919/1000000 [2:03:29<39:34:35,  6.70it/s]

{'loss': Array(0.17245732, dtype=float32), 'loss_cross_entropy': Array(0.16121878, dtype=float32)}


  4%|▍         | 44929/1000000 [2:03:30<26:57:58,  9.84it/s]

{'loss': Array(0.17811517, dtype=float32), 'loss_cross_entropy': Array(0.16630478, dtype=float32)}


  4%|▍         | 44939/1000000 [2:03:32<24:51:45, 10.67it/s]

{'loss': Array(0.19637637, dtype=float32), 'loss_cross_entropy': Array(0.18341807, dtype=float32)}


  4%|▍         | 44949/1000000 [2:03:33<31:14:50,  8.49it/s]

{'loss': Array(0.1784146, dtype=float32), 'loss_cross_entropy': Array(0.167326, dtype=float32)}


  4%|▍         | 44959/1000000 [2:03:35<25:51:09, 10.26it/s]

{'loss': Array(0.18361787, dtype=float32), 'loss_cross_entropy': Array(0.1712533, dtype=float32)}


  4%|▍         | 44969/1000000 [2:03:36<35:09:17,  7.55it/s]

{'loss': Array(0.17986074, dtype=float32), 'loss_cross_entropy': Array(0.16729437, dtype=float32)}


  4%|▍         | 44979/1000000 [2:03:38<26:54:14,  9.86it/s]

{'loss': Array(0.17376769, dtype=float32), 'loss_cross_entropy': Array(0.16162574, dtype=float32)}


  4%|▍         | 44989/1000000 [2:03:39<34:33:37,  7.68it/s]

{'loss': Array(0.18035595, dtype=float32), 'loss_cross_entropy': Array(0.16884057, dtype=float32)}


  4%|▍         | 44999/1000000 [2:03:41<25:51:56, 10.26it/s]

{'loss': Array(0.18332101, dtype=float32), 'loss_cross_entropy': Array(0.17022751, dtype=float32)}


  5%|▍         | 45009/1000000 [2:03:49<87:55:58,  3.02it/s] 

{'loss': Array(0.16818097, dtype=float32), 'loss_cross_entropy': Array(0.15610251, dtype=float32)}


  5%|▍         | 45019/1000000 [2:03:50<40:59:42,  6.47it/s]

{'loss': Array(0.18185188, dtype=float32), 'loss_cross_entropy': Array(0.1698848, dtype=float32)}


  5%|▍         | 45029/1000000 [2:03:52<26:55:06,  9.85it/s]

{'loss': Array(0.17980345, dtype=float32), 'loss_cross_entropy': Array(0.16734816, dtype=float32)}


  5%|▍         | 45039/1000000 [2:03:54<34:36:21,  7.67it/s]

{'loss': Array(0.20012572, dtype=float32), 'loss_cross_entropy': Array(0.18692665, dtype=float32)}


  5%|▍         | 45049/1000000 [2:03:55<26:43:01,  9.93it/s]

{'loss': Array(0.16996704, dtype=float32), 'loss_cross_entropy': Array(0.157508, dtype=float32)}


  5%|▍         | 45059/1000000 [2:03:57<39:44:24,  6.67it/s]

{'loss': Array(0.16880552, dtype=float32), 'loss_cross_entropy': Array(0.15675636, dtype=float32)}


  5%|▍         | 45069/1000000 [2:03:58<26:47:18,  9.90it/s]

{'loss': Array(0.17448573, dtype=float32), 'loss_cross_entropy': Array(0.1629331, dtype=float32)}


  5%|▍         | 45079/1000000 [2:03:59<25:59:29, 10.21it/s]

{'loss': Array(0.1618472, dtype=float32), 'loss_cross_entropy': Array(0.15052186, dtype=float32)}


  5%|▍         | 45089/1000000 [2:04:01<33:30:47,  7.91it/s]

{'loss': Array(0.18720333, dtype=float32), 'loss_cross_entropy': Array(0.17443886, dtype=float32)}


  5%|▍         | 45099/1000000 [2:04:03<27:38:26,  9.60it/s]

{'loss': Array(0.19336067, dtype=float32), 'loss_cross_entropy': Array(0.18075445, dtype=float32)}


  5%|▍         | 45109/1000000 [2:04:04<35:30:45,  7.47it/s]

{'loss': Array(0.1558586, dtype=float32), 'loss_cross_entropy': Array(0.14435743, dtype=float32)}


  5%|▍         | 45119/1000000 [2:04:06<26:27:53, 10.02it/s]

{'loss': Array(0.1886113, dtype=float32), 'loss_cross_entropy': Array(0.17588665, dtype=float32)}


  5%|▍         | 45129/1000000 [2:04:07<24:57:48, 10.63it/s]

{'loss': Array(0.17166127, dtype=float32), 'loss_cross_entropy': Array(0.16021481, dtype=float32)}


  5%|▍         | 45139/1000000 [2:04:09<27:51:18,  9.52it/s]

{'loss': Array(0.18943785, dtype=float32), 'loss_cross_entropy': Array(0.17551878, dtype=float32)}


  5%|▍         | 45149/1000000 [2:04:10<25:04:36, 10.58it/s]

{'loss': Array(0.18984203, dtype=float32), 'loss_cross_entropy': Array(0.17705297, dtype=float32)}


  5%|▍         | 45159/1000000 [2:04:12<39:30:26,  6.71it/s]

{'loss': Array(0.1787442, dtype=float32), 'loss_cross_entropy': Array(0.16665225, dtype=float32)}


  5%|▍         | 45169/1000000 [2:04:13<27:10:57,  9.76it/s]

{'loss': Array(0.1911068, dtype=float32), 'loss_cross_entropy': Array(0.17886204, dtype=float32)}


  5%|▍         | 45179/1000000 [2:04:15<25:13:07, 10.52it/s]

{'loss': Array(0.18652408, dtype=float32), 'loss_cross_entropy': Array(0.17390487, dtype=float32)}


  5%|▍         | 45189/1000000 [2:04:16<31:35:06,  8.40it/s]

{'loss': Array(0.18792675, dtype=float32), 'loss_cross_entropy': Array(0.1757978, dtype=float32)}


  5%|▍         | 45199/1000000 [2:04:18<25:33:08, 10.38it/s]

{'loss': Array(0.1730572, dtype=float32), 'loss_cross_entropy': Array(0.16099386, dtype=float32)}


  5%|▍         | 45209/1000000 [2:04:19<34:54:21,  7.60it/s]

{'loss': Array(0.17685114, dtype=float32), 'loss_cross_entropy': Array(0.16384716, dtype=float32)}


  5%|▍         | 45219/1000000 [2:04:21<26:16:40, 10.09it/s]

{'loss': Array(0.15097192, dtype=float32), 'loss_cross_entropy': Array(0.13955119, dtype=float32)}


  5%|▍         | 45229/1000000 [2:04:23<35:39:21,  7.44it/s]

{'loss': Array(0.14977166, dtype=float32), 'loss_cross_entropy': Array(0.13894601, dtype=float32)}


  5%|▍         | 45239/1000000 [2:04:24<27:07:32,  9.78it/s]

{'loss': Array(0.1673041, dtype=float32), 'loss_cross_entropy': Array(0.1558864, dtype=float32)}


  5%|▍         | 45249/1000000 [2:04:25<24:37:38, 10.77it/s]

{'loss': Array(0.17436573, dtype=float32), 'loss_cross_entropy': Array(0.1618432, dtype=float32)}


  5%|▍         | 45259/1000000 [2:04:27<29:04:31,  9.12it/s]

{'loss': Array(0.17549685, dtype=float32), 'loss_cross_entropy': Array(0.16364022, dtype=float32)}


  5%|▍         | 45269/1000000 [2:04:28<25:12:17, 10.52it/s]

{'loss': Array(0.19524448, dtype=float32), 'loss_cross_entropy': Array(0.18244588, dtype=float32)}


  5%|▍         | 45279/1000000 [2:04:30<31:18:05,  8.47it/s]

{'loss': Array(0.17791545, dtype=float32), 'loss_cross_entropy': Array(0.16539282, dtype=float32)}


  5%|▍         | 45289/1000000 [2:04:31<26:16:18, 10.09it/s]

{'loss': Array(0.16934626, dtype=float32), 'loss_cross_entropy': Array(0.15712626, dtype=float32)}


  5%|▍         | 45299/1000000 [2:04:33<38:43:28,  6.85it/s]

{'loss': Array(0.17672342, dtype=float32), 'loss_cross_entropy': Array(0.16478878, dtype=float32)}


  5%|▍         | 45309/1000000 [2:04:34<27:05:21,  9.79it/s]

{'loss': Array(0.18516102, dtype=float32), 'loss_cross_entropy': Array(0.17275581, dtype=float32)}


  5%|▍         | 45319/1000000 [2:04:36<38:32:26,  6.88it/s]

{'loss': Array(0.17237236, dtype=float32), 'loss_cross_entropy': Array(0.1603081, dtype=float32)}


  5%|▍         | 45329/1000000 [2:04:38<27:06:26,  9.78it/s]

{'loss': Array(0.18676133, dtype=float32), 'loss_cross_entropy': Array(0.17456137, dtype=float32)}


  5%|▍         | 45339/1000000 [2:04:39<24:56:08, 10.63it/s]

{'loss': Array(0.19233568, dtype=float32), 'loss_cross_entropy': Array(0.17995417, dtype=float32)}


  5%|▍         | 45349/1000000 [2:04:41<34:41:59,  7.64it/s]

{'loss': Array(0.17820214, dtype=float32), 'loss_cross_entropy': Array(0.16673096, dtype=float32)}


  5%|▍         | 45359/1000000 [2:04:42<26:05:24, 10.16it/s]

{'loss': Array(0.17899415, dtype=float32), 'loss_cross_entropy': Array(0.16618894, dtype=float32)}


  5%|▍         | 45369/1000000 [2:04:44<38:55:14,  6.81it/s]

{'loss': Array(0.18196134, dtype=float32), 'loss_cross_entropy': Array(0.16921674, dtype=float32)}


  5%|▍         | 45379/1000000 [2:04:45<26:56:23,  9.84it/s]

{'loss': Array(0.1806598, dtype=float32), 'loss_cross_entropy': Array(0.16823211, dtype=float32)}


  5%|▍         | 45389/1000000 [2:04:46<25:18:24, 10.48it/s]

{'loss': Array(0.17046487, dtype=float32), 'loss_cross_entropy': Array(0.15840583, dtype=float32)}


  5%|▍         | 45399/1000000 [2:04:48<27:51:59,  9.52it/s]

{'loss': Array(0.18857126, dtype=float32), 'loss_cross_entropy': Array(0.1767571, dtype=float32)}


  5%|▍         | 45409/1000000 [2:04:49<25:19:18, 10.47it/s]

{'loss': Array(0.17613257, dtype=float32), 'loss_cross_entropy': Array(0.16425686, dtype=float32)}


  5%|▍         | 45419/1000000 [2:04:51<28:30:26,  9.30it/s]

{'loss': Array(0.17201357, dtype=float32), 'loss_cross_entropy': Array(0.16044019, dtype=float32)}


  5%|▍         | 45429/1000000 [2:04:53<25:26:13, 10.42it/s]

{'loss': Array(0.18387245, dtype=float32), 'loss_cross_entropy': Array(0.17233132, dtype=float32)}


  5%|▍         | 45439/1000000 [2:04:54<38:37:53,  6.86it/s]

{'loss': Array(0.17382447, dtype=float32), 'loss_cross_entropy': Array(0.16147795, dtype=float32)}


  5%|▍         | 45449/1000000 [2:04:56<27:10:51,  9.76it/s]

{'loss': Array(0.16888101, dtype=float32), 'loss_cross_entropy': Array(0.15739062, dtype=float32)}


  5%|▍         | 45459/1000000 [2:04:57<25:09:53, 10.54it/s]

{'loss': Array(0.19155656, dtype=float32), 'loss_cross_entropy': Array(0.17877284, dtype=float32)}


  5%|▍         | 45469/1000000 [2:04:59<33:01:19,  8.03it/s]

{'loss': Array(0.171946, dtype=float32), 'loss_cross_entropy': Array(0.16040692, dtype=float32)}


  5%|▍         | 45479/1000000 [2:05:00<27:07:49,  9.77it/s]

{'loss': Array(0.17929924, dtype=float32), 'loss_cross_entropy': Array(0.16711257, dtype=float32)}


  5%|▍         | 45489/1000000 [2:05:02<40:32:06,  6.54it/s]

{'loss': Array(0.18450023, dtype=float32), 'loss_cross_entropy': Array(0.17248979, dtype=float32)}


  5%|▍         | 45499/1000000 [2:05:03<28:27:57,  9.31it/s]

{'loss': Array(0.1782872, dtype=float32), 'loss_cross_entropy': Array(0.16568291, dtype=float32)}


  5%|▍         | 45509/1000000 [2:05:11<84:37:33,  3.13it/s] 

{'loss': Array(0.19125201, dtype=float32), 'loss_cross_entropy': Array(0.1783097, dtype=float32)}


  5%|▍         | 45519/1000000 [2:05:13<39:15:12,  6.75it/s]

{'loss': Array(0.18457645, dtype=float32), 'loss_cross_entropy': Array(0.17204264, dtype=float32)}


  5%|▍         | 45529/1000000 [2:05:14<27:01:09,  9.81it/s]

{'loss': Array(0.1783469, dtype=float32), 'loss_cross_entropy': Array(0.16677062, dtype=float32)}


  5%|▍         | 45539/1000000 [2:05:16<31:39:57,  8.37it/s]

{'loss': Array(0.17209294, dtype=float32), 'loss_cross_entropy': Array(0.15977441, dtype=float32)}


  5%|▍         | 45549/1000000 [2:05:17<25:42:09, 10.32it/s]

{'loss': Array(0.15845671, dtype=float32), 'loss_cross_entropy': Array(0.14761558, dtype=float32)}


  5%|▍         | 45559/1000000 [2:05:19<39:44:04,  6.67it/s]

{'loss': Array(0.17768393, dtype=float32), 'loss_cross_entropy': Array(0.16482238, dtype=float32)}


  5%|▍         | 45569/1000000 [2:05:20<27:51:50,  9.51it/s]

{'loss': Array(0.17094164, dtype=float32), 'loss_cross_entropy': Array(0.15906201, dtype=float32)}


  5%|▍         | 45579/1000000 [2:05:22<24:41:41, 10.74it/s]

{'loss': Array(0.18180583, dtype=float32), 'loss_cross_entropy': Array(0.16879815, dtype=float32)}


  5%|▍         | 45589/1000000 [2:05:23<27:36:01,  9.61it/s]

{'loss': Array(0.16011538, dtype=float32), 'loss_cross_entropy': Array(0.1487869, dtype=float32)}


  5%|▍         | 45599/1000000 [2:05:25<24:58:14, 10.62it/s]

{'loss': Array(0.16070989, dtype=float32), 'loss_cross_entropy': Array(0.14958893, dtype=float32)}


  5%|▍         | 45609/1000000 [2:05:26<38:26:09,  6.90it/s]

{'loss': Array(0.18890148, dtype=float32), 'loss_cross_entropy': Array(0.1755505, dtype=float32)}


  5%|▍         | 45619/1000000 [2:05:28<27:13:29,  9.74it/s]

{'loss': Array(0.19101262, dtype=float32), 'loss_cross_entropy': Array(0.17841147, dtype=float32)}


  5%|▍         | 45629/1000000 [2:05:29<25:00:52, 10.60it/s]

{'loss': Array(0.17715396, dtype=float32), 'loss_cross_entropy': Array(0.16403952, dtype=float32)}


  5%|▍         | 45639/1000000 [2:05:31<29:54:43,  8.86it/s]

{'loss': Array(0.1769494, dtype=float32), 'loss_cross_entropy': Array(0.16426429, dtype=float32)}


  5%|▍         | 45649/1000000 [2:05:32<25:17:20, 10.48it/s]

{'loss': Array(0.19271517, dtype=float32), 'loss_cross_entropy': Array(0.17978144, dtype=float32)}


  5%|▍         | 45659/1000000 [2:05:34<34:14:58,  7.74it/s]

{'loss': Array(0.19764404, dtype=float32), 'loss_cross_entropy': Array(0.18476097, dtype=float32)}


  5%|▍         | 45669/1000000 [2:05:35<25:54:11, 10.23it/s]

{'loss': Array(0.16734095, dtype=float32), 'loss_cross_entropy': Array(0.15535672, dtype=float32)}


  5%|▍         | 45679/1000000 [2:05:37<38:52:44,  6.82it/s]

{'loss': Array(0.1861999, dtype=float32), 'loss_cross_entropy': Array(0.17413546, dtype=float32)}


  5%|▍         | 45689/1000000 [2:05:38<27:09:43,  9.76it/s]

{'loss': Array(0.18924713, dtype=float32), 'loss_cross_entropy': Array(0.17642991, dtype=float32)}


  5%|▍         | 45699/1000000 [2:05:39<25:12:43, 10.51it/s]

{'loss': Array(0.17549884, dtype=float32), 'loss_cross_entropy': Array(0.1635145, dtype=float32)}


  5%|▍         | 45709/1000000 [2:05:41<31:24:02,  8.44it/s]

{'loss': Array(0.16910818, dtype=float32), 'loss_cross_entropy': Array(0.15861236, dtype=float32)}


  5%|▍         | 45719/1000000 [2:05:43<25:42:52, 10.31it/s]

{'loss': Array(0.1542602, dtype=float32), 'loss_cross_entropy': Array(0.14358199, dtype=float32)}


  5%|▍         | 45729/1000000 [2:05:44<34:32:11,  7.68it/s]

{'loss': Array(0.17365001, dtype=float32), 'loss_cross_entropy': Array(0.16249561, dtype=float32)}


  5%|▍         | 45739/1000000 [2:05:46<25:49:58, 10.26it/s]

{'loss': Array(0.16695252, dtype=float32), 'loss_cross_entropy': Array(0.15568784, dtype=float32)}


  5%|▍         | 45749/1000000 [2:05:47<24:48:16, 10.69it/s]

{'loss': Array(0.18555734, dtype=float32), 'loss_cross_entropy': Array(0.17261477, dtype=float32)}


  5%|▍         | 45759/1000000 [2:05:49<27:27:34,  9.65it/s]

{'loss': Array(0.16758794, dtype=float32), 'loss_cross_entropy': Array(0.1549711, dtype=float32)}


  5%|▍         | 45769/1000000 [2:05:50<24:53:31, 10.65it/s]

{'loss': Array(0.18799774, dtype=float32), 'loss_cross_entropy': Array(0.17595972, dtype=float32)}


  5%|▍         | 45779/1000000 [2:05:52<39:15:21,  6.75it/s]

{'loss': Array(0.16975167, dtype=float32), 'loss_cross_entropy': Array(0.15782757, dtype=float32)}


  5%|▍         | 45789/1000000 [2:05:53<26:48:29,  9.89it/s]

{'loss': Array(0.18017605, dtype=float32), 'loss_cross_entropy': Array(0.16811197, dtype=float32)}


  5%|▍         | 45799/1000000 [2:05:54<24:55:56, 10.63it/s]

{'loss': Array(0.17894933, dtype=float32), 'loss_cross_entropy': Array(0.16683917, dtype=float32)}


  5%|▍         | 45809/1000000 [2:05:56<32:16:22,  8.21it/s]

{'loss': Array(0.16359554, dtype=float32), 'loss_cross_entropy': Array(0.1518001, dtype=float32)}


  5%|▍         | 45819/1000000 [2:05:57<26:00:03, 10.19it/s]

{'loss': Array(0.16916493, dtype=float32), 'loss_cross_entropy': Array(0.1577525, dtype=float32)}


  5%|▍         | 45829/1000000 [2:05:59<35:18:00,  7.51it/s]

{'loss': Array(0.16579847, dtype=float32), 'loss_cross_entropy': Array(0.15350913, dtype=float32)}


  5%|▍         | 45839/1000000 [2:06:01<27:15:39,  9.72it/s]

{'loss': Array(0.17528503, dtype=float32), 'loss_cross_entropy': Array(0.16381155, dtype=float32)}


  5%|▍         | 45849/1000000 [2:06:02<36:01:33,  7.36it/s]

{'loss': Array(0.17418286, dtype=float32), 'loss_cross_entropy': Array(0.16176264, dtype=float32)}


  5%|▍         | 45859/1000000 [2:06:04<26:33:06,  9.98it/s]

{'loss': Array(0.17556258, dtype=float32), 'loss_cross_entropy': Array(0.16343896, dtype=float32)}


  5%|▍         | 45869/1000000 [2:06:05<25:34:59, 10.36it/s]

{'loss': Array(0.17250167, dtype=float32), 'loss_cross_entropy': Array(0.16057177, dtype=float32)}


  5%|▍         | 45879/1000000 [2:06:07<30:46:11,  8.61it/s]

{'loss': Array(0.1815925, dtype=float32), 'loss_cross_entropy': Array(0.16957006, dtype=float32)}


  5%|▍         | 45889/1000000 [2:06:08<25:55:02, 10.23it/s]

{'loss': Array(0.17318532, dtype=float32), 'loss_cross_entropy': Array(0.16061448, dtype=float32)}


  5%|▍         | 45899/1000000 [2:06:10<31:26:48,  8.43it/s]

{'loss': Array(0.16444607, dtype=float32), 'loss_cross_entropy': Array(0.15342534, dtype=float32)}


  5%|▍         | 45909/1000000 [2:06:11<26:06:38, 10.15it/s]

{'loss': Array(0.16064407, dtype=float32), 'loss_cross_entropy': Array(0.14938782, dtype=float32)}


  5%|▍         | 45919/1000000 [2:06:13<38:28:45,  6.89it/s]

{'loss': Array(0.18319553, dtype=float32), 'loss_cross_entropy': Array(0.17121977, dtype=float32)}


  5%|▍         | 45929/1000000 [2:06:14<27:17:08,  9.71it/s]

{'loss': Array(0.17675684, dtype=float32), 'loss_cross_entropy': Array(0.16513577, dtype=float32)}


  5%|▍         | 45939/1000000 [2:06:16<39:20:01,  6.74it/s]

{'loss': Array(0.16004907, dtype=float32), 'loss_cross_entropy': Array(0.14866015, dtype=float32)}


  5%|▍         | 45949/1000000 [2:06:18<27:16:12,  9.72it/s]

{'loss': Array(0.15447453, dtype=float32), 'loss_cross_entropy': Array(0.1441158, dtype=float32)}


  5%|▍         | 45959/1000000 [2:06:19<25:47:31, 10.27it/s]

{'loss': Array(0.16816415, dtype=float32), 'loss_cross_entropy': Array(0.15615965, dtype=float32)}


  5%|▍         | 45969/1000000 [2:06:21<34:26:00,  7.70it/s]

{'loss': Array(0.16625689, dtype=float32), 'loss_cross_entropy': Array(0.15426259, dtype=float32)}


  5%|▍         | 45979/1000000 [2:06:22<25:50:54, 10.25it/s]

{'loss': Array(0.1771718, dtype=float32), 'loss_cross_entropy': Array(0.16453956, dtype=float32)}


  5%|▍         | 45989/1000000 [2:06:24<39:26:33,  6.72it/s]

{'loss': Array(0.18347947, dtype=float32), 'loss_cross_entropy': Array(0.1703646, dtype=float32)}


  5%|▍         | 45999/1000000 [2:06:25<27:33:36,  9.62it/s]

{'loss': Array(0.157758, dtype=float32), 'loss_cross_entropy': Array(0.14703557, dtype=float32)}


  5%|▍         | 46009/1000000 [2:06:33<89:21:12,  2.97it/s] 

{'loss': Array(0.1820478, dtype=float32), 'loss_cross_entropy': Array(0.16977836, dtype=float32)}


  5%|▍         | 46019/1000000 [2:06:35<38:44:35,  6.84it/s] 

{'loss': Array(0.17386585, dtype=float32), 'loss_cross_entropy': Array(0.16240416, dtype=float32)}


  5%|▍         | 46029/1000000 [2:06:36<26:38:57,  9.94it/s]

{'loss': Array(0.17029007, dtype=float32), 'loss_cross_entropy': Array(0.15903276, dtype=float32)}


  5%|▍         | 46039/1000000 [2:06:38<28:18:06,  9.36it/s]

{'loss': Array(0.16535388, dtype=float32), 'loss_cross_entropy': Array(0.15422462, dtype=float32)}


  5%|▍         | 46049/1000000 [2:06:39<25:24:39, 10.43it/s]

{'loss': Array(0.16992772, dtype=float32), 'loss_cross_entropy': Array(0.15764552, dtype=float32)}


  5%|▍         | 46059/1000000 [2:06:41<39:23:24,  6.73it/s]

{'loss': Array(0.16478609, dtype=float32), 'loss_cross_entropy': Array(0.15284649, dtype=float32)}


  5%|▍         | 46069/1000000 [2:06:42<27:36:59,  9.60it/s]

{'loss': Array(0.15501621, dtype=float32), 'loss_cross_entropy': Array(0.14421035, dtype=float32)}


  5%|▍         | 46079/1000000 [2:06:44<25:15:51, 10.49it/s]

{'loss': Array(0.17006083, dtype=float32), 'loss_cross_entropy': Array(0.15861557, dtype=float32)}


  5%|▍         | 46089/1000000 [2:06:46<31:55:33,  8.30it/s]

{'loss': Array(0.17622286, dtype=float32), 'loss_cross_entropy': Array(0.16459574, dtype=float32)}


  5%|▍         | 46099/1000000 [2:06:47<26:11:01, 10.12it/s]

{'loss': Array(0.14999191, dtype=float32), 'loss_cross_entropy': Array(0.13955413, dtype=float32)}


  5%|▍         | 46109/1000000 [2:06:49<39:31:19,  6.70it/s]

{'loss': Array(0.17549138, dtype=float32), 'loss_cross_entropy': Array(0.16282731, dtype=float32)}


  5%|▍         | 46119/1000000 [2:06:50<27:19:54,  9.69it/s]

{'loss': Array(0.172026, dtype=float32), 'loss_cross_entropy': Array(0.15999624, dtype=float32)}


  5%|▍         | 46129/1000000 [2:06:51<25:12:09, 10.51it/s]

{'loss': Array(0.17512086, dtype=float32), 'loss_cross_entropy': Array(0.16250011, dtype=float32)}


  5%|▍         | 46139/1000000 [2:06:53<29:05:03,  9.11it/s]

{'loss': Array(0.18340573, dtype=float32), 'loss_cross_entropy': Array(0.17046021, dtype=float32)}


  5%|▍         | 46149/1000000 [2:06:54<25:12:35, 10.51it/s]

{'loss': Array(0.15310656, dtype=float32), 'loss_cross_entropy': Array(0.14257443, dtype=float32)}


  5%|▍         | 46159/1000000 [2:06:56<34:17:27,  7.73it/s]

{'loss': Array(0.14638117, dtype=float32), 'loss_cross_entropy': Array(0.1358104, dtype=float32)}


  5%|▍         | 46169/1000000 [2:06:57<26:10:26, 10.12it/s]

{'loss': Array(0.19220184, dtype=float32), 'loss_cross_entropy': Array(0.17969418, dtype=float32)}


  5%|▍         | 46179/1000000 [2:06:59<24:49:03, 10.68it/s]

{'loss': Array(0.19022322, dtype=float32), 'loss_cross_entropy': Array(0.17797366, dtype=float32)}


  5%|▍         | 46189/1000000 [2:07:01<32:06:01,  8.25it/s]

{'loss': Array(0.17468789, dtype=float32), 'loss_cross_entropy': Array(0.16344194, dtype=float32)}


  5%|▍         | 46199/1000000 [2:07:02<25:47:37, 10.27it/s]

{'loss': Array(0.16907412, dtype=float32), 'loss_cross_entropy': Array(0.15761144, dtype=float32)}


  5%|▍         | 46209/1000000 [2:07:03<24:58:17, 10.61it/s]

{'loss': Array(0.16777766, dtype=float32), 'loss_cross_entropy': Array(0.15644036, dtype=float32)}


  5%|▍         | 46219/1000000 [2:07:05<28:19:35,  9.35it/s]

{'loss': Array(0.19946007, dtype=float32), 'loss_cross_entropy': Array(0.1857551, dtype=float32)}


  5%|▍         | 46229/1000000 [2:07:06<25:37:02, 10.34it/s]

{'loss': Array(0.1843653, dtype=float32), 'loss_cross_entropy': Array(0.17199185, dtype=float32)}


  5%|▍         | 46239/1000000 [2:07:08<31:36:04,  8.38it/s]

{'loss': Array(0.20113361, dtype=float32), 'loss_cross_entropy': Array(0.18807885, dtype=float32)}


  5%|▍         | 46249/1000000 [2:07:10<26:26:13, 10.02it/s]

{'loss': Array(0.19206749, dtype=float32), 'loss_cross_entropy': Array(0.17910959, dtype=float32)}


  5%|▍         | 46259/1000000 [2:07:11<39:55:16,  6.64it/s]

{'loss': Array(0.17492118, dtype=float32), 'loss_cross_entropy': Array(0.16306768, dtype=float32)}


  5%|▍         | 46269/1000000 [2:07:13<28:20:50,  9.35it/s]

{'loss': Array(0.17262252, dtype=float32), 'loss_cross_entropy': Array(0.1607074, dtype=float32)}


  5%|▍         | 46279/1000000 [2:07:14<39:19:20,  6.74it/s]

{'loss': Array(0.18975331, dtype=float32), 'loss_cross_entropy': Array(0.17665736, dtype=float32)}


  5%|▍         | 46289/1000000 [2:07:16<26:57:13,  9.83it/s]

{'loss': Array(0.16896829, dtype=float32), 'loss_cross_entropy': Array(0.15735132, dtype=float32)}


  5%|▍         | 46299/1000000 [2:07:17<24:49:11, 10.67it/s]

{'loss': Array(0.18616468, dtype=float32), 'loss_cross_entropy': Array(0.17343102, dtype=float32)}


  5%|▍         | 46309/1000000 [2:07:19<35:25:11,  7.48it/s]

{'loss': Array(0.18352018, dtype=float32), 'loss_cross_entropy': Array(0.1712532, dtype=float32)}


  5%|▍         | 46319/1000000 [2:07:20<26:49:35,  9.87it/s]

{'loss': Array(0.16810456, dtype=float32), 'loss_cross_entropy': Array(0.15714166, dtype=float32)}


  5%|▍         | 46329/1000000 [2:07:22<39:18:23,  6.74it/s]

{'loss': Array(0.16186167, dtype=float32), 'loss_cross_entropy': Array(0.15043545, dtype=float32)}


  5%|▍         | 46339/1000000 [2:07:23<27:07:28,  9.77it/s]

{'loss': Array(0.16523029, dtype=float32), 'loss_cross_entropy': Array(0.15375033, dtype=float32)}


  5%|▍         | 46349/1000000 [2:07:25<24:30:21, 10.81it/s]

{'loss': Array(0.16884325, dtype=float32), 'loss_cross_entropy': Array(0.15674715, dtype=float32)}


  5%|▍         | 46359/1000000 [2:07:26<27:42:20,  9.56it/s]

{'loss': Array(0.17103904, dtype=float32), 'loss_cross_entropy': Array(0.15991327, dtype=float32)}


  5%|▍         | 46369/1000000 [2:07:28<24:41:51, 10.73it/s]

{'loss': Array(0.16390239, dtype=float32), 'loss_cross_entropy': Array(0.15275277, dtype=float32)}


  5%|▍         | 46379/1000000 [2:07:29<28:37:57,  9.25it/s]

{'loss': Array(0.16512859, dtype=float32), 'loss_cross_entropy': Array(0.15326688, dtype=float32)}


  5%|▍         | 46389/1000000 [2:07:31<24:54:01, 10.64it/s]

{'loss': Array(0.18433014, dtype=float32), 'loss_cross_entropy': Array(0.17182255, dtype=float32)}


  5%|▍         | 46399/1000000 [2:07:33<38:33:35,  6.87it/s]

{'loss': Array(0.19079109, dtype=float32), 'loss_cross_entropy': Array(0.17839803, dtype=float32)}


  5%|▍         | 46409/1000000 [2:07:34<26:32:05,  9.98it/s]

{'loss': Array(0.15918949, dtype=float32), 'loss_cross_entropy': Array(0.14763792, dtype=float32)}


  5%|▍         | 46419/1000000 [2:07:35<25:02:42, 10.58it/s]

{'loss': Array(0.16312401, dtype=float32), 'loss_cross_entropy': Array(0.1524478, dtype=float32)}


  5%|▍         | 46429/1000000 [2:07:37<31:50:34,  8.32it/s]

{'loss': Array(0.1814457, dtype=float32), 'loss_cross_entropy': Array(0.16964273, dtype=float32)}


  5%|▍         | 46439/1000000 [2:07:38<26:09:25, 10.13it/s]

{'loss': Array(0.17725675, dtype=float32), 'loss_cross_entropy': Array(0.16499506, dtype=float32)}


  5%|▍         | 46449/1000000 [2:07:40<38:51:08,  6.82it/s]

{'loss': Array(0.17971486, dtype=float32), 'loss_cross_entropy': Array(0.16687359, dtype=float32)}


  5%|▍         | 46459/1000000 [2:07:41<27:02:35,  9.79it/s]

{'loss': Array(0.16957635, dtype=float32), 'loss_cross_entropy': Array(0.15708719, dtype=float32)}


  5%|▍         | 46469/1000000 [2:07:43<25:08:23, 10.54it/s]

{'loss': Array(0.17722695, dtype=float32), 'loss_cross_entropy': Array(0.16565333, dtype=float32)}


  5%|▍         | 46479/1000000 [2:07:44<29:31:55,  8.97it/s]

{'loss': Array(0.18070862, dtype=float32), 'loss_cross_entropy': Array(0.16839133, dtype=float32)}


  5%|▍         | 46489/1000000 [2:07:46<25:17:58, 10.47it/s]

{'loss': Array(0.18495725, dtype=float32), 'loss_cross_entropy': Array(0.17275102, dtype=float32)}


  5%|▍         | 46499/1000000 [2:07:48<35:16:17,  7.51it/s]

{'loss': Array(0.15825117, dtype=float32), 'loss_cross_entropy': Array(0.14733219, dtype=float32)}


  5%|▍         | 46509/1000000 [2:07:55<88:52:18,  2.98it/s] 

{'loss': Array(0.19470654, dtype=float32), 'loss_cross_entropy': Array(0.18093203, dtype=float32)}


  5%|▍         | 46519/1000000 [2:07:57<45:25:16,  5.83it/s]

{'loss': Array(0.15085544, dtype=float32), 'loss_cross_entropy': Array(0.13974902, dtype=float32)}


  5%|▍         | 46529/1000000 [2:07:59<28:05:33,  9.43it/s]

{'loss': Array(0.1860737, dtype=float32), 'loss_cross_entropy': Array(0.17456342, dtype=float32)}


  5%|▍         | 46539/1000000 [2:08:00<25:23:46, 10.43it/s]

{'loss': Array(0.19336204, dtype=float32), 'loss_cross_entropy': Array(0.18116644, dtype=float32)}


  5%|▍         | 46549/1000000 [2:08:02<31:52:23,  8.31it/s]

{'loss': Array(0.17029487, dtype=float32), 'loss_cross_entropy': Array(0.15795003, dtype=float32)}


  5%|▍         | 46559/1000000 [2:08:03<25:25:56, 10.41it/s]

{'loss': Array(0.18371475, dtype=float32), 'loss_cross_entropy': Array(0.1719002, dtype=float32)}


  5%|▍         | 46569/1000000 [2:08:04<24:24:34, 10.85it/s]

{'loss': Array(0.1504097, dtype=float32), 'loss_cross_entropy': Array(0.14049911, dtype=float32)}


  5%|▍         | 46579/1000000 [2:08:06<28:19:01,  9.35it/s]

{'loss': Array(0.17055127, dtype=float32), 'loss_cross_entropy': Array(0.15910912, dtype=float32)}


  5%|▍         | 46589/1000000 [2:08:08<26:08:48, 10.13it/s]

{'loss': Array(0.17986463, dtype=float32), 'loss_cross_entropy': Array(0.16805631, dtype=float32)}


  5%|▍         | 46599/1000000 [2:08:09<32:11:40,  8.23it/s]

{'loss': Array(0.17665525, dtype=float32), 'loss_cross_entropy': Array(0.16499342, dtype=float32)}


  5%|▍         | 46609/1000000 [2:08:11<27:08:19,  9.76it/s]

{'loss': Array(0.16696078, dtype=float32), 'loss_cross_entropy': Array(0.15480055, dtype=float32)}


  5%|▍         | 46619/1000000 [2:08:12<36:35:51,  7.24it/s]

{'loss': Array(0.16007142, dtype=float32), 'loss_cross_entropy': Array(0.14884141, dtype=float32)}


  5%|▍         | 46629/1000000 [2:08:14<26:50:08,  9.87it/s]

{'loss': Array(0.15678059, dtype=float32), 'loss_cross_entropy': Array(0.14489035, dtype=float32)}


  5%|▍         | 46639/1000000 [2:08:15<24:28:54, 10.82it/s]

{'loss': Array(0.18332414, dtype=float32), 'loss_cross_entropy': Array(0.17060685, dtype=float32)}


  5%|▍         | 46649/1000000 [2:08:17<31:14:49,  8.48it/s]

{'loss': Array(0.171841, dtype=float32), 'loss_cross_entropy': Array(0.1604644, dtype=float32)}


  5%|▍         | 46659/1000000 [2:08:18<25:29:38, 10.39it/s]

{'loss': Array(0.17640261, dtype=float32), 'loss_cross_entropy': Array(0.16440876, dtype=float32)}


  5%|▍         | 46669/1000000 [2:08:20<24:33:43, 10.78it/s]

{'loss': Array(0.191967, dtype=float32), 'loss_cross_entropy': Array(0.17888258, dtype=float32)}


  5%|▍         | 46679/1000000 [2:08:21<27:57:47,  9.47it/s]

{'loss': Array(0.17641999, dtype=float32), 'loss_cross_entropy': Array(0.1639089, dtype=float32)}


  5%|▍         | 46689/1000000 [2:08:23<24:50:55, 10.66it/s]

{'loss': Array(0.19127631, dtype=float32), 'loss_cross_entropy': Array(0.17896546, dtype=float32)}


  5%|▍         | 46699/1000000 [2:08:24<31:50:23,  8.32it/s]

{'loss': Array(0.16828571, dtype=float32), 'loss_cross_entropy': Array(0.15751939, dtype=float32)}


  5%|▍         | 46709/1000000 [2:08:26<25:47:36, 10.27it/s]

{'loss': Array(0.17547779, dtype=float32), 'loss_cross_entropy': Array(0.16374683, dtype=float32)}


  5%|▍         | 46719/1000000 [2:08:27<40:37:22,  6.52it/s]

{'loss': Array(0.18993339, dtype=float32), 'loss_cross_entropy': Array(0.17712688, dtype=float32)}


  5%|▍         | 46729/1000000 [2:08:29<26:59:39,  9.81it/s]

{'loss': Array(0.17705432, dtype=float32), 'loss_cross_entropy': Array(0.16480213, dtype=float32)}


  5%|▍         | 46739/1000000 [2:08:30<24:48:41, 10.67it/s]

{'loss': Array(0.16694789, dtype=float32), 'loss_cross_entropy': Array(0.15530531, dtype=float32)}


  5%|▍         | 46749/1000000 [2:08:32<34:26:43,  7.69it/s]

{'loss': Array(0.18775532, dtype=float32), 'loss_cross_entropy': Array(0.17627592, dtype=float32)}


  5%|▍         | 46759/1000000 [2:08:33<26:11:58, 10.11it/s]

{'loss': Array(0.19003527, dtype=float32), 'loss_cross_entropy': Array(0.17750792, dtype=float32)}


  5%|▍         | 46769/1000000 [2:08:35<24:40:39, 10.73it/s]

{'loss': Array(0.1953818, dtype=float32), 'loss_cross_entropy': Array(0.1825062, dtype=float32)}


  5%|▍         | 46779/1000000 [2:08:36<29:09:34,  9.08it/s]

{'loss': Array(0.17087804, dtype=float32), 'loss_cross_entropy': Array(0.1589722, dtype=float32)}


  5%|▍         | 46789/1000000 [2:08:38<25:16:47, 10.47it/s]

{'loss': Array(0.16470675, dtype=float32), 'loss_cross_entropy': Array(0.15302049, dtype=float32)}


  5%|▍         | 46799/1000000 [2:08:39<32:00:10,  8.27it/s]

{'loss': Array(0.18112925, dtype=float32), 'loss_cross_entropy': Array(0.16866186, dtype=float32)}


  5%|▍         | 46809/1000000 [2:08:41<26:19:53, 10.06it/s]

{'loss': Array(0.18671702, dtype=float32), 'loss_cross_entropy': Array(0.1743727, dtype=float32)}


  5%|▍         | 46819/1000000 [2:08:42<25:22:07, 10.44it/s]

{'loss': Array(0.19203337, dtype=float32), 'loss_cross_entropy': Array(0.17961143, dtype=float32)}


  5%|▍         | 46829/1000000 [2:08:44<28:35:50,  9.26it/s]

{'loss': Array(0.15140246, dtype=float32), 'loss_cross_entropy': Array(0.14063983, dtype=float32)}


  5%|▍         | 46839/1000000 [2:08:45<25:49:40, 10.25it/s]

{'loss': Array(0.19502273, dtype=float32), 'loss_cross_entropy': Array(0.18165432, dtype=float32)}


  5%|▍         | 46849/1000000 [2:08:47<35:02:48,  7.55it/s]

{'loss': Array(0.16884659, dtype=float32), 'loss_cross_entropy': Array(0.15816799, dtype=float32)}


  5%|▍         | 46859/1000000 [2:08:49<26:51:38,  9.86it/s]

{'loss': Array(0.17210113, dtype=float32), 'loss_cross_entropy': Array(0.1603117, dtype=float32)}


  5%|▍         | 46869/1000000 [2:08:50<24:52:17, 10.65it/s]

{'loss': Array(0.17682941, dtype=float32), 'loss_cross_entropy': Array(0.16491218, dtype=float32)}


  5%|▍         | 46879/1000000 [2:08:52<28:23:53,  9.32it/s]

{'loss': Array(0.17624024, dtype=float32), 'loss_cross_entropy': Array(0.16393854, dtype=float32)}


  5%|▍         | 46889/1000000 [2:08:53<25:11:37, 10.51it/s]

{'loss': Array(0.16288732, dtype=float32), 'loss_cross_entropy': Array(0.15174058, dtype=float32)}


  5%|▍         | 46899/1000000 [2:08:55<30:22:09,  8.72it/s]

{'loss': Array(0.1628634, dtype=float32), 'loss_cross_entropy': Array(0.15182936, dtype=float32)}


  5%|▍         | 46909/1000000 [2:08:56<26:02:00, 10.17it/s]

{'loss': Array(0.1970402, dtype=float32), 'loss_cross_entropy': Array(0.18451865, dtype=float32)}


  5%|▍         | 46919/1000000 [2:08:58<39:16:01,  6.74it/s]

{'loss': Array(0.1664214, dtype=float32), 'loss_cross_entropy': Array(0.15542369, dtype=float32)}


  5%|▍         | 46929/1000000 [2:08:59<28:07:13,  9.41it/s]

{'loss': Array(0.17647776, dtype=float32), 'loss_cross_entropy': Array(0.16422065, dtype=float32)}


  5%|▍         | 46939/1000000 [2:09:01<24:55:55, 10.62it/s]

{'loss': Array(0.17295569, dtype=float32), 'loss_cross_entropy': Array(0.16111909, dtype=float32)}


  5%|▍         | 46949/1000000 [2:09:02<32:29:51,  8.15it/s]

{'loss': Array(0.17226534, dtype=float32), 'loss_cross_entropy': Array(0.16028583, dtype=float32)}


  5%|▍         | 46959/1000000 [2:09:04<26:06:45, 10.14it/s]

{'loss': Array(0.18544208, dtype=float32), 'loss_cross_entropy': Array(0.17331086, dtype=float32)}


  5%|▍         | 46969/1000000 [2:09:06<36:01:49,  7.35it/s]

{'loss': Array(0.18066172, dtype=float32), 'loss_cross_entropy': Array(0.1690403, dtype=float32)}


  5%|▍         | 46979/1000000 [2:09:07<27:13:55,  9.72it/s]

{'loss': Array(0.18515204, dtype=float32), 'loss_cross_entropy': Array(0.17267226, dtype=float32)}


  5%|▍         | 46989/1000000 [2:09:09<34:59:11,  7.57it/s]

{'loss': Array(0.17023502, dtype=float32), 'loss_cross_entropy': Array(0.15828587, dtype=float32)}


  5%|▍         | 46999/1000000 [2:09:10<26:05:44, 10.14it/s]

{'loss': Array(0.16534525, dtype=float32), 'loss_cross_entropy': Array(0.15382591, dtype=float32)}


  5%|▍         | 47009/1000000 [2:09:18<85:40:56,  3.09it/s] 

{'loss': Array(0.17054711, dtype=float32), 'loss_cross_entropy': Array(0.1586553, dtype=float32)}


  5%|▍         | 47019/1000000 [2:09:19<39:39:19,  6.68it/s]

{'loss': Array(0.17422675, dtype=float32), 'loss_cross_entropy': Array(0.16293092, dtype=float32)}


  5%|▍         | 47029/1000000 [2:09:21<27:41:12,  9.56it/s]

{'loss': Array(0.1618781, dtype=float32), 'loss_cross_entropy': Array(0.15095904, dtype=float32)}


  5%|▍         | 47039/1000000 [2:09:23<34:49:20,  7.60it/s]

{'loss': Array(0.18040694, dtype=float32), 'loss_cross_entropy': Array(0.16823865, dtype=float32)}


  5%|▍         | 47049/1000000 [2:09:24<25:48:43, 10.26it/s]

{'loss': Array(0.1848969, dtype=float32), 'loss_cross_entropy': Array(0.17170915, dtype=float32)}


  5%|▍         | 47059/1000000 [2:09:26<38:52:48,  6.81it/s]

{'loss': Array(0.17618613, dtype=float32), 'loss_cross_entropy': Array(0.16502644, dtype=float32)}


  5%|▍         | 47069/1000000 [2:09:27<26:29:39,  9.99it/s]

{'loss': Array(0.17157231, dtype=float32), 'loss_cross_entropy': Array(0.16007447, dtype=float32)}


  5%|▍         | 47079/1000000 [2:09:28<24:40:28, 10.73it/s]

{'loss': Array(0.19411707, dtype=float32), 'loss_cross_entropy': Array(0.18207946, dtype=float32)}


  5%|▍         | 47089/1000000 [2:09:30<31:23:16,  8.43it/s]

{'loss': Array(0.16192186, dtype=float32), 'loss_cross_entropy': Array(0.15149365, dtype=float32)}


  5%|▍         | 47099/1000000 [2:09:31<25:40:07, 10.31it/s]

{'loss': Array(0.16519624, dtype=float32), 'loss_cross_entropy': Array(0.15426402, dtype=float32)}


  5%|▍         | 47109/1000000 [2:09:33<35:32:03,  7.45it/s]

{'loss': Array(0.17629185, dtype=float32), 'loss_cross_entropy': Array(0.1644258, dtype=float32)}


  5%|▍         | 47119/1000000 [2:09:34<26:12:08, 10.10it/s]

{'loss': Array(0.17488639, dtype=float32), 'loss_cross_entropy': Array(0.16257517, dtype=float32)}


  5%|▍         | 47129/1000000 [2:09:36<24:44:32, 10.70it/s]

{'loss': Array(0.1603912, dtype=float32), 'loss_cross_entropy': Array(0.14960174, dtype=float32)}


  5%|▍         | 47139/1000000 [2:09:37<28:42:27,  9.22it/s]

{'loss': Array(0.17049645, dtype=float32), 'loss_cross_entropy': Array(0.15872647, dtype=float32)}


  5%|▍         | 47149/1000000 [2:09:39<25:03:43, 10.56it/s]

{'loss': Array(0.16778651, dtype=float32), 'loss_cross_entropy': Array(0.15616374, dtype=float32)}


  5%|▍         | 47159/1000000 [2:09:41<38:58:59,  6.79it/s]

{'loss': Array(0.1835231, dtype=float32), 'loss_cross_entropy': Array(0.17074463, dtype=float32)}


  5%|▍         | 47169/1000000 [2:09:42<26:54:18,  9.84it/s]

{'loss': Array(0.17463689, dtype=float32), 'loss_cross_entropy': Array(0.16321829, dtype=float32)}


  5%|▍         | 47179/1000000 [2:09:43<25:14:13, 10.49it/s]

{'loss': Array(0.18173753, dtype=float32), 'loss_cross_entropy': Array(0.1694542, dtype=float32)}


  5%|▍         | 47189/1000000 [2:09:45<33:09:07,  7.98it/s]

{'loss': Array(0.18130372, dtype=float32), 'loss_cross_entropy': Array(0.16853729, dtype=float32)}


  5%|▍         | 47199/1000000 [2:09:46<26:42:46,  9.91it/s]

{'loss': Array(0.17090212, dtype=float32), 'loss_cross_entropy': Array(0.15967913, dtype=float32)}


  5%|▍         | 47209/1000000 [2:09:48<35:00:08,  7.56it/s]

{'loss': Array(0.17203131, dtype=float32), 'loss_cross_entropy': Array(0.16047943, dtype=float32)}


  5%|▍         | 47219/1000000 [2:09:50<26:12:11, 10.10it/s]

{'loss': Array(0.16775392, dtype=float32), 'loss_cross_entropy': Array(0.15611278, dtype=float32)}


  5%|▍         | 47229/1000000 [2:09:51<35:11:55,  7.52it/s]

{'loss': Array(0.15594296, dtype=float32), 'loss_cross_entropy': Array(0.14494406, dtype=float32)}


  5%|▍         | 47239/1000000 [2:09:53<26:59:41,  9.80it/s]

{'loss': Array(0.16724813, dtype=float32), 'loss_cross_entropy': Array(0.15597998, dtype=float32)}


  5%|▍         | 47249/1000000 [2:09:54<24:39:33, 10.73it/s]

{'loss': Array(0.17078464, dtype=float32), 'loss_cross_entropy': Array(0.15913323, dtype=float32)}


  5%|▍         | 47259/1000000 [2:09:56<29:17:34,  9.03it/s]

{'loss': Array(0.16072997, dtype=float32), 'loss_cross_entropy': Array(0.15002637, dtype=float32)}


  5%|▍         | 47269/1000000 [2:09:57<25:27:41, 10.39it/s]

{'loss': Array(0.1705481, dtype=float32), 'loss_cross_entropy': Array(0.15924239, dtype=float32)}


  5%|▍         | 47279/1000000 [2:09:59<32:20:46,  8.18it/s]

{'loss': Array(0.16319035, dtype=float32), 'loss_cross_entropy': Array(0.15187386, dtype=float32)}


  5%|▍         | 47289/1000000 [2:10:00<26:35:36,  9.95it/s]

{'loss': Array(0.1498822, dtype=float32), 'loss_cross_entropy': Array(0.13991523, dtype=float32)}


  5%|▍         | 47299/1000000 [2:10:02<40:21:21,  6.56it/s]

{'loss': Array(0.17674664, dtype=float32), 'loss_cross_entropy': Array(0.16405378, dtype=float32)}


  5%|▍         | 47309/1000000 [2:10:03<28:09:18,  9.40it/s]

{'loss': Array(0.1732851, dtype=float32), 'loss_cross_entropy': Array(0.16160111, dtype=float32)}


  5%|▍         | 47319/1000000 [2:10:05<39:44:01,  6.66it/s]

{'loss': Array(0.16496482, dtype=float32), 'loss_cross_entropy': Array(0.15338008, dtype=float32)}


  5%|▍         | 47329/1000000 [2:10:07<27:27:36,  9.64it/s]

{'loss': Array(0.1917261, dtype=float32), 'loss_cross_entropy': Array(0.17860185, dtype=float32)}


  5%|▍         | 47339/1000000 [2:10:08<25:21:16, 10.44it/s]

{'loss': Array(0.18005686, dtype=float32), 'loss_cross_entropy': Array(0.1685155, dtype=float32)}


  5%|▍         | 47349/1000000 [2:10:10<35:19:20,  7.49it/s]

{'loss': Array(0.16874318, dtype=float32), 'loss_cross_entropy': Array(0.15756606, dtype=float32)}


  5%|▍         | 47359/1000000 [2:10:11<26:48:37,  9.87it/s]

{'loss': Array(0.18750532, dtype=float32), 'loss_cross_entropy': Array(0.1749422, dtype=float32)}


  5%|▍         | 47369/1000000 [2:10:13<39:19:03,  6.73it/s]

{'loss': Array(0.18864603, dtype=float32), 'loss_cross_entropy': Array(0.17551973, dtype=float32)}


  5%|▍         | 47379/1000000 [2:10:14<26:50:39,  9.86it/s]

{'loss': Array(0.15891157, dtype=float32), 'loss_cross_entropy': Array(0.14748113, dtype=float32)}


  5%|▍         | 47389/1000000 [2:10:15<24:50:27, 10.65it/s]

{'loss': Array(0.16960461, dtype=float32), 'loss_cross_entropy': Array(0.15842722, dtype=float32)}


  5%|▍         | 47399/1000000 [2:10:17<27:20:32,  9.68it/s]

{'loss': Array(0.17650555, dtype=float32), 'loss_cross_entropy': Array(0.16473632, dtype=float32)}


  5%|▍         | 47409/1000000 [2:10:18<24:46:04, 10.68it/s]

{'loss': Array(0.18043838, dtype=float32), 'loss_cross_entropy': Array(0.16872726, dtype=float32)}


  5%|▍         | 47419/1000000 [2:10:20<27:12:41,  9.72it/s]

{'loss': Array(0.16015609, dtype=float32), 'loss_cross_entropy': Array(0.14908862, dtype=float32)}


  5%|▍         | 47429/1000000 [2:10:22<25:41:12, 10.30it/s]

{'loss': Array(0.16723119, dtype=float32), 'loss_cross_entropy': Array(0.15544078, dtype=float32)}


  5%|▍         | 47439/1000000 [2:10:23<38:48:19,  6.82it/s]

{'loss': Array(0.17851067, dtype=float32), 'loss_cross_entropy': Array(0.16626695, dtype=float32)}


  5%|▍         | 47449/1000000 [2:10:25<26:50:58,  9.85it/s]

{'loss': Array(0.16012226, dtype=float32), 'loss_cross_entropy': Array(0.14877538, dtype=float32)}


  5%|▍         | 47459/1000000 [2:10:26<25:06:34, 10.54it/s]

{'loss': Array(0.14983419, dtype=float32), 'loss_cross_entropy': Array(0.13921495, dtype=float32)}


  5%|▍         | 47469/1000000 [2:10:28<31:41:53,  8.35it/s]

{'loss': Array(0.17563812, dtype=float32), 'loss_cross_entropy': Array(0.16360697, dtype=float32)}


  5%|▍         | 47479/1000000 [2:10:29<25:59:33, 10.18it/s]

{'loss': Array(0.14212173, dtype=float32), 'loss_cross_entropy': Array(0.13163328, dtype=float32)}


  5%|▍         | 47489/1000000 [2:10:31<38:59:32,  6.79it/s]

{'loss': Array(0.1763398, dtype=float32), 'loss_cross_entropy': Array(0.16415219, dtype=float32)}


  5%|▍         | 47499/1000000 [2:10:32<26:44:39,  9.89it/s]

{'loss': Array(0.17425871, dtype=float32), 'loss_cross_entropy': Array(0.1618133, dtype=float32)}


  5%|▍         | 47509/1000000 [2:10:40<89:39:56,  2.95it/s] 

{'loss': Array(0.15812802, dtype=float32), 'loss_cross_entropy': Array(0.14722775, dtype=float32)}


  5%|▍         | 47519/1000000 [2:10:42<40:24:03,  6.55it/s]

{'loss': Array(0.15533316, dtype=float32), 'loss_cross_entropy': Array(0.14447115, dtype=float32)}


  5%|▍         | 47529/1000000 [2:10:43<27:06:09,  9.76it/s]

{'loss': Array(0.15948732, dtype=float32), 'loss_cross_entropy': Array(0.1484584, dtype=float32)}


  5%|▍         | 47539/1000000 [2:10:45<32:26:55,  8.15it/s]

{'loss': Array(0.17624032, dtype=float32), 'loss_cross_entropy': Array(0.16417122, dtype=float32)}


  5%|▍         | 47549/1000000 [2:10:46<26:05:00, 10.14it/s]

{'loss': Array(0.16893038, dtype=float32), 'loss_cross_entropy': Array(0.15854497, dtype=float32)}


  5%|▍         | 47559/1000000 [2:10:48<39:24:07,  6.71it/s]

{'loss': Array(0.18427947, dtype=float32), 'loss_cross_entropy': Array(0.17199181, dtype=float32)}


  5%|▍         | 47569/1000000 [2:10:49<28:10:23,  9.39it/s]

{'loss': Array(0.16855761, dtype=float32), 'loss_cross_entropy': Array(0.15654655, dtype=float32)}


  5%|▍         | 47579/1000000 [2:10:51<25:37:33, 10.32it/s]

{'loss': Array(0.16990976, dtype=float32), 'loss_cross_entropy': Array(0.15822633, dtype=float32)}


  5%|▍         | 47589/1000000 [2:10:53<28:18:09,  9.35it/s]

{'loss': Array(0.17534885, dtype=float32), 'loss_cross_entropy': Array(0.16322286, dtype=float32)}


  5%|▍         | 47599/1000000 [2:10:54<25:30:36, 10.37it/s]

{'loss': Array(0.16734819, dtype=float32), 'loss_cross_entropy': Array(0.15592073, dtype=float32)}


  5%|▍         | 47609/1000000 [2:10:56<39:24:41,  6.71it/s]

{'loss': Array(0.15265691, dtype=float32), 'loss_cross_entropy': Array(0.1418101, dtype=float32)}


  5%|▍         | 47619/1000000 [2:10:57<27:29:41,  9.62it/s]

{'loss': Array(0.16071098, dtype=float32), 'loss_cross_entropy': Array(0.14907958, dtype=float32)}


  5%|▍         | 47629/1000000 [2:10:59<26:01:04, 10.17it/s]

{'loss': Array(0.17963327, dtype=float32), 'loss_cross_entropy': Array(0.16704583, dtype=float32)}


  5%|▍         | 47639/1000000 [2:11:00<31:15:22,  8.46it/s]

{'loss': Array(0.18125662, dtype=float32), 'loss_cross_entropy': Array(0.1686775, dtype=float32)}


  5%|▍         | 47649/1000000 [2:11:02<26:54:42,  9.83it/s]

{'loss': Array(0.16684659, dtype=float32), 'loss_cross_entropy': Array(0.15612803, dtype=float32)}


  5%|▍         | 47659/1000000 [2:11:04<37:15:44,  7.10it/s]

{'loss': Array(0.15564282, dtype=float32), 'loss_cross_entropy': Array(0.1447502, dtype=float32)}


  5%|▍         | 47669/1000000 [2:11:05<26:43:53,  9.90it/s]

{'loss': Array(0.16830459, dtype=float32), 'loss_cross_entropy': Array(0.15633686, dtype=float32)}


  5%|▍         | 47679/1000000 [2:11:07<39:41:38,  6.66it/s]

{'loss': Array(0.18401138, dtype=float32), 'loss_cross_entropy': Array(0.17124656, dtype=float32)}


  5%|▍         | 47689/1000000 [2:11:08<27:05:44,  9.76it/s]

{'loss': Array(0.1737928, dtype=float32), 'loss_cross_entropy': Array(0.16201241, dtype=float32)}


  5%|▍         | 47699/1000000 [2:11:10<25:16:01, 10.47it/s]

{'loss': Array(0.17600025, dtype=float32), 'loss_cross_entropy': Array(0.16424437, dtype=float32)}


  5%|▍         | 47709/1000000 [2:11:11<31:42:34,  8.34it/s]

{'loss': Array(0.18289532, dtype=float32), 'loss_cross_entropy': Array(0.17030865, dtype=float32)}


  5%|▍         | 47719/1000000 [2:11:13<26:15:30, 10.07it/s]

{'loss': Array(0.18534249, dtype=float32), 'loss_cross_entropy': Array(0.17340577, dtype=float32)}


  5%|▍         | 47729/1000000 [2:11:14<35:22:54,  7.48it/s]

{'loss': Array(0.15921065, dtype=float32), 'loss_cross_entropy': Array(0.14779139, dtype=float32)}


  5%|▍         | 47739/1000000 [2:11:16<26:15:01, 10.08it/s]

{'loss': Array(0.18858252, dtype=float32), 'loss_cross_entropy': Array(0.17618625, dtype=float32)}


  5%|▍         | 47749/1000000 [2:11:17<24:51:21, 10.64it/s]

{'loss': Array(0.19969913, dtype=float32), 'loss_cross_entropy': Array(0.18540679, dtype=float32)}


  5%|▍         | 47759/1000000 [2:11:19<27:41:21,  9.55it/s]

{'loss': Array(0.1733949, dtype=float32), 'loss_cross_entropy': Array(0.16236255, dtype=float32)}


  5%|▍         | 47769/1000000 [2:11:20<25:33:51, 10.35it/s]

{'loss': Array(0.17942128, dtype=float32), 'loss_cross_entropy': Array(0.16735338, dtype=float32)}


  5%|▍         | 47779/1000000 [2:11:22<38:19:37,  6.90it/s]

{'loss': Array(0.1640067, dtype=float32), 'loss_cross_entropy': Array(0.15255384, dtype=float32)}


  5%|▍         | 47789/1000000 [2:11:23<26:49:53,  9.86it/s]

{'loss': Array(0.15832418, dtype=float32), 'loss_cross_entropy': Array(0.1477785, dtype=float32)}


  5%|▍         | 47799/1000000 [2:11:25<24:56:48, 10.60it/s]

{'loss': Array(0.15460989, dtype=float32), 'loss_cross_entropy': Array(0.14387468, dtype=float32)}


  5%|▍         | 47809/1000000 [2:11:26<30:48:54,  8.58it/s]

{'loss': Array(0.16111393, dtype=float32), 'loss_cross_entropy': Array(0.1495903, dtype=float32)}


  5%|▍         | 47819/1000000 [2:11:28<25:30:44, 10.37it/s]

{'loss': Array(0.1615611, dtype=float32), 'loss_cross_entropy': Array(0.15023981, dtype=float32)}


  5%|▍         | 47829/1000000 [2:11:29<34:03:05,  7.77it/s]

{'loss': Array(0.1475807, dtype=float32), 'loss_cross_entropy': Array(0.13706036, dtype=float32)}


  5%|▍         | 47839/1000000 [2:11:31<26:02:30, 10.16it/s]

{'loss': Array(0.16404893, dtype=float32), 'loss_cross_entropy': Array(0.1524861, dtype=float32)}


  5%|▍         | 47849/1000000 [2:11:32<34:17:22,  7.71it/s]

{'loss': Array(0.19380485, dtype=float32), 'loss_cross_entropy': Array(0.18091923, dtype=float32)}


  5%|▍         | 47859/1000000 [2:11:34<26:33:08,  9.96it/s]

{'loss': Array(0.17609422, dtype=float32), 'loss_cross_entropy': Array(0.16477005, dtype=float32)}


  5%|▍         | 47869/1000000 [2:11:35<25:26:13, 10.40it/s]

{'loss': Array(0.19750835, dtype=float32), 'loss_cross_entropy': Array(0.18411681, dtype=float32)}


  5%|▍         | 47879/1000000 [2:11:37<29:22:27,  9.00it/s]

{'loss': Array(0.17188333, dtype=float32), 'loss_cross_entropy': Array(0.15997167, dtype=float32)}


  5%|▍         | 47889/1000000 [2:11:38<25:28:55, 10.38it/s]

{'loss': Array(0.16307348, dtype=float32), 'loss_cross_entropy': Array(0.1522222, dtype=float32)}


  5%|▍         | 47899/1000000 [2:11:40<31:36:06,  8.37it/s]

{'loss': Array(0.19955249, dtype=float32), 'loss_cross_entropy': Array(0.18714754, dtype=float32)}


  5%|▍         | 47909/1000000 [2:11:41<25:27:00, 10.39it/s]

{'loss': Array(0.17722301, dtype=float32), 'loss_cross_entropy': Array(0.16499381, dtype=float32)}


  5%|▍         | 47919/1000000 [2:11:43<38:47:45,  6.82it/s]

{'loss': Array(0.17432547, dtype=float32), 'loss_cross_entropy': Array(0.16230348, dtype=float32)}


  5%|▍         | 47929/1000000 [2:11:44<27:16:02,  9.70it/s]

{'loss': Array(0.17385323, dtype=float32), 'loss_cross_entropy': Array(0.1625339, dtype=float32)}


  5%|▍         | 47939/1000000 [2:11:46<39:44:33,  6.65it/s]

{'loss': Array(0.17268814, dtype=float32), 'loss_cross_entropy': Array(0.16102004, dtype=float32)}


  5%|▍         | 47949/1000000 [2:11:48<27:22:46,  9.66it/s]

{'loss': Array(0.18961814, dtype=float32), 'loss_cross_entropy': Array(0.17706232, dtype=float32)}


  5%|▍         | 47959/1000000 [2:11:49<25:00:26, 10.58it/s]

{'loss': Array(0.17575936, dtype=float32), 'loss_cross_entropy': Array(0.16336015, dtype=float32)}


  5%|▍         | 47969/1000000 [2:11:51<34:50:22,  7.59it/s]

{'loss': Array(0.19248803, dtype=float32), 'loss_cross_entropy': Array(0.17920473, dtype=float32)}


  5%|▍         | 47979/1000000 [2:11:52<25:56:16, 10.20it/s]

{'loss': Array(0.17874406, dtype=float32), 'loss_cross_entropy': Array(0.16633114, dtype=float32)}


  5%|▍         | 47989/1000000 [2:11:54<38:50:49,  6.81it/s]

{'loss': Array(0.16653682, dtype=float32), 'loss_cross_entropy': Array(0.1560372, dtype=float32)}


  5%|▍         | 47999/1000000 [2:11:55<26:24:21, 10.01it/s]

{'loss': Array(0.16328499, dtype=float32), 'loss_cross_entropy': Array(0.15153193, dtype=float32)}


  5%|▍         | 48009/1000000 [2:12:03<88:11:56,  3.00it/s] 

{'loss': Array(0.18453352, dtype=float32), 'loss_cross_entropy': Array(0.17264073, dtype=float32)}


  5%|▍         | 48019/1000000 [2:12:05<39:20:37,  6.72it/s] 

{'loss': Array(0.17711201, dtype=float32), 'loss_cross_entropy': Array(0.16663806, dtype=float32)}


  5%|▍         | 48029/1000000 [2:12:06<27:14:09,  9.71it/s]

{'loss': Array(0.158145, dtype=float32), 'loss_cross_entropy': Array(0.1476868, dtype=float32)}


  5%|▍         | 48039/1000000 [2:12:08<28:36:41,  9.24it/s]

{'loss': Array(0.17514583, dtype=float32), 'loss_cross_entropy': Array(0.16348875, dtype=float32)}


  5%|▍         | 48049/1000000 [2:12:09<25:34:53, 10.34it/s]

{'loss': Array(0.1865954, dtype=float32), 'loss_cross_entropy': Array(0.1739758, dtype=float32)}


  5%|▍         | 48059/1000000 [2:12:11<39:21:37,  6.72it/s]

{'loss': Array(0.1820738, dtype=float32), 'loss_cross_entropy': Array(0.17071661, dtype=float32)}


  5%|▍         | 48069/1000000 [2:12:12<27:16:18,  9.70it/s]

{'loss': Array(0.17653142, dtype=float32), 'loss_cross_entropy': Array(0.16444904, dtype=float32)}


  5%|▍         | 48079/1000000 [2:12:14<24:42:37, 10.70it/s]

{'loss': Array(0.16752315, dtype=float32), 'loss_cross_entropy': Array(0.15557663, dtype=float32)}


  5%|▍         | 48089/1000000 [2:12:15<31:38:26,  8.36it/s]

{'loss': Array(0.17400394, dtype=float32), 'loss_cross_entropy': Array(0.16222648, dtype=float32)}


  5%|▍         | 48099/1000000 [2:12:17<26:08:57, 10.11it/s]

{'loss': Array(0.18381292, dtype=float32), 'loss_cross_entropy': Array(0.17191051, dtype=float32)}


  5%|▍         | 48109/1000000 [2:12:19<39:22:08,  6.72it/s]

{'loss': Array(0.19357975, dtype=float32), 'loss_cross_entropy': Array(0.18078779, dtype=float32)}


  5%|▍         | 48119/1000000 [2:12:20<27:02:11,  9.78it/s]

{'loss': Array(0.18815951, dtype=float32), 'loss_cross_entropy': Array(0.17544077, dtype=float32)}


  5%|▍         | 48129/1000000 [2:12:21<25:01:25, 10.57it/s]

{'loss': Array(0.17051439, dtype=float32), 'loss_cross_entropy': Array(0.15796165, dtype=float32)}


  5%|▍         | 48139/1000000 [2:12:23<29:44:29,  8.89it/s]

{'loss': Array(0.20364812, dtype=float32), 'loss_cross_entropy': Array(0.19019166, dtype=float32)}


  5%|▍         | 48149/1000000 [2:12:24<25:31:38, 10.36it/s]

{'loss': Array(0.15550032, dtype=float32), 'loss_cross_entropy': Array(0.1450712, dtype=float32)}


  5%|▍         | 48159/1000000 [2:12:26<34:30:12,  7.66it/s]

{'loss': Array(0.17309897, dtype=float32), 'loss_cross_entropy': Array(0.16122077, dtype=float32)}


  5%|▍         | 48169/1000000 [2:12:27<26:11:42, 10.09it/s]

{'loss': Array(0.16589749, dtype=float32), 'loss_cross_entropy': Array(0.15469755, dtype=float32)}


  5%|▍         | 48179/1000000 [2:12:29<24:23:03, 10.84it/s]

{'loss': Array(0.17669904, dtype=float32), 'loss_cross_entropy': Array(0.1652464, dtype=float32)}


  5%|▍         | 48189/1000000 [2:12:30<31:33:33,  8.38it/s]

{'loss': Array(0.17356926, dtype=float32), 'loss_cross_entropy': Array(0.16258569, dtype=float32)}


  5%|▍         | 48199/1000000 [2:12:32<25:46:14, 10.26it/s]

{'loss': Array(0.18727282, dtype=float32), 'loss_cross_entropy': Array(0.17510447, dtype=float32)}


  5%|▍         | 48209/1000000 [2:12:33<25:12:54, 10.49it/s]

{'loss': Array(0.177941, dtype=float32), 'loss_cross_entropy': Array(0.16713402, dtype=float32)}


  5%|▍         | 48219/1000000 [2:12:35<28:16:18,  9.35it/s]

{'loss': Array(0.179033, dtype=float32), 'loss_cross_entropy': Array(0.16714723, dtype=float32)}


  5%|▍         | 48229/1000000 [2:12:36<24:59:20, 10.58it/s]

{'loss': Array(0.16970408, dtype=float32), 'loss_cross_entropy': Array(0.15880477, dtype=float32)}


  5%|▍         | 48239/1000000 [2:12:38<32:09:34,  8.22it/s]

{'loss': Array(0.16362777, dtype=float32), 'loss_cross_entropy': Array(0.15202291, dtype=float32)}


  5%|▍         | 48249/1000000 [2:12:39<25:41:01, 10.29it/s]

{'loss': Array(0.15457736, dtype=float32), 'loss_cross_entropy': Array(0.14381824, dtype=float32)}


  5%|▍         | 48259/1000000 [2:12:41<39:00:41,  6.78it/s]

{'loss': Array(0.17406116, dtype=float32), 'loss_cross_entropy': Array(0.16178213, dtype=float32)}


  5%|▍         | 48269/1000000 [2:12:43<27:11:45,  9.72it/s]

{'loss': Array(0.17993014, dtype=float32), 'loss_cross_entropy': Array(0.16760878, dtype=float32)}


  5%|▍         | 48279/1000000 [2:12:44<39:07:51,  6.76it/s]

{'loss': Array(0.16030498, dtype=float32), 'loss_cross_entropy': Array(0.14978723, dtype=float32)}


  5%|▍         | 48289/1000000 [2:12:46<26:58:18,  9.80it/s]

{'loss': Array(0.17591487, dtype=float32), 'loss_cross_entropy': Array(0.16395561, dtype=float32)}


  5%|▍         | 48299/1000000 [2:12:47<24:38:02, 10.73it/s]

{'loss': Array(0.16284744, dtype=float32), 'loss_cross_entropy': Array(0.15305562, dtype=float32)}


  5%|▍         | 48309/1000000 [2:12:49<34:16:14,  7.71it/s]

{'loss': Array(0.19007683, dtype=float32), 'loss_cross_entropy': Array(0.17773095, dtype=float32)}


  5%|▍         | 48319/1000000 [2:12:50<25:44:18, 10.27it/s]

{'loss': Array(0.16085409, dtype=float32), 'loss_cross_entropy': Array(0.14931522, dtype=float32)}


  5%|▍         | 48329/1000000 [2:12:52<38:40:22,  6.84it/s]

{'loss': Array(0.18051113, dtype=float32), 'loss_cross_entropy': Array(0.16802017, dtype=float32)}


  5%|▍         | 48339/1000000 [2:12:53<26:54:24,  9.82it/s]

{'loss': Array(0.18185861, dtype=float32), 'loss_cross_entropy': Array(0.16945997, dtype=float32)}


  5%|▍         | 48349/1000000 [2:12:54<25:34:23, 10.34it/s]

{'loss': Array(0.16410871, dtype=float32), 'loss_cross_entropy': Array(0.1530012, dtype=float32)}


  5%|▍         | 48359/1000000 [2:12:56<29:05:48,  9.09it/s]

{'loss': Array(0.1680604, dtype=float32), 'loss_cross_entropy': Array(0.15661159, dtype=float32)}


  5%|▍         | 48369/1000000 [2:12:57<25:25:38, 10.40it/s]

{'loss': Array(0.19139434, dtype=float32), 'loss_cross_entropy': Array(0.17868392, dtype=float32)}


  5%|▍         | 48379/1000000 [2:12:59<28:14:20,  9.36it/s]

{'loss': Array(0.16874956, dtype=float32), 'loss_cross_entropy': Array(0.15743189, dtype=float32)}


  5%|▍         | 48389/1000000 [2:13:01<25:34:14, 10.34it/s]

{'loss': Array(0.19828247, dtype=float32), 'loss_cross_entropy': Array(0.18463688, dtype=float32)}


  5%|▍         | 48399/1000000 [2:13:02<39:29:29,  6.69it/s]

{'loss': Array(0.17559876, dtype=float32), 'loss_cross_entropy': Array(0.16338687, dtype=float32)}


  5%|▍         | 48409/1000000 [2:13:04<27:00:43,  9.79it/s]

{'loss': Array(0.18372779, dtype=float32), 'loss_cross_entropy': Array(0.1719771, dtype=float32)}


  5%|▍         | 48419/1000000 [2:13:05<26:37:06,  9.93it/s]

{'loss': Array(0.17038299, dtype=float32), 'loss_cross_entropy': Array(0.1586604, dtype=float32)}


  5%|▍         | 48429/1000000 [2:13:07<32:46:19,  8.07it/s]

{'loss': Array(0.15622048, dtype=float32), 'loss_cross_entropy': Array(0.14563899, dtype=float32)}


  5%|▍         | 48439/1000000 [2:13:08<25:30:31, 10.36it/s]

{'loss': Array(0.14758077, dtype=float32), 'loss_cross_entropy': Array(0.13725476, dtype=float32)}


  5%|▍         | 48449/1000000 [2:13:10<38:58:29,  6.78it/s]

{'loss': Array(0.18080011, dtype=float32), 'loss_cross_entropy': Array(0.1693, dtype=float32)}


  5%|▍         | 48459/1000000 [2:13:11<27:14:12,  9.70it/s]

{'loss': Array(0.16320638, dtype=float32), 'loss_cross_entropy': Array(0.15207386, dtype=float32)}


  5%|▍         | 48469/1000000 [2:13:13<25:14:07, 10.47it/s]

{'loss': Array(0.18538636, dtype=float32), 'loss_cross_entropy': Array(0.17271458, dtype=float32)}


  5%|▍         | 48479/1000000 [2:13:14<29:01:01,  9.11it/s]

{'loss': Array(0.1809573, dtype=float32), 'loss_cross_entropy': Array(0.16868953, dtype=float32)}


  5%|▍         | 48489/1000000 [2:13:16<25:11:22, 10.49it/s]

{'loss': Array(0.1769266, dtype=float32), 'loss_cross_entropy': Array(0.16529845, dtype=float32)}


  5%|▍         | 48499/1000000 [2:13:17<34:25:43,  7.68it/s]

{'loss': Array(0.16223018, dtype=float32), 'loss_cross_entropy': Array(0.15133919, dtype=float32)}


  5%|▍         | 48509/1000000 [2:13:25<86:26:35,  3.06it/s] 

{'loss': Array(0.17977683, dtype=float32), 'loss_cross_entropy': Array(0.16751634, dtype=float32)}


  5%|▍         | 48519/1000000 [2:13:27<44:44:15,  5.91it/s]

{'loss': Array(0.16754581, dtype=float32), 'loss_cross_entropy': Array(0.1559784, dtype=float32)}


  5%|▍         | 48529/1000000 [2:13:28<28:24:54,  9.30it/s]

{'loss': Array(0.16819935, dtype=float32), 'loss_cross_entropy': Array(0.15728937, dtype=float32)}


  5%|▍         | 48539/1000000 [2:13:30<25:03:15, 10.55it/s]

{'loss': Array(0.17176798, dtype=float32), 'loss_cross_entropy': Array(0.1600179, dtype=float32)}


  5%|▍         | 48549/1000000 [2:13:31<31:15:35,  8.45it/s]

{'loss': Array(0.17577456, dtype=float32), 'loss_cross_entropy': Array(0.16333358, dtype=float32)}


  5%|▍         | 48559/1000000 [2:13:33<25:22:10, 10.42it/s]

{'loss': Array(0.17576274, dtype=float32), 'loss_cross_entropy': Array(0.16412699, dtype=float32)}


  5%|▍         | 48569/1000000 [2:13:34<24:03:44, 10.98it/s]

{'loss': Array(0.1535092, dtype=float32), 'loss_cross_entropy': Array(0.14236031, dtype=float32)}


  5%|▍         | 48579/1000000 [2:13:36<27:24:33,  9.64it/s]

{'loss': Array(0.18791434, dtype=float32), 'loss_cross_entropy': Array(0.17609559, dtype=float32)}


  5%|▍         | 48589/1000000 [2:13:37<24:52:22, 10.63it/s]

{'loss': Array(0.17030594, dtype=float32), 'loss_cross_entropy': Array(0.15865867, dtype=float32)}


  5%|▍         | 48599/1000000 [2:13:39<31:31:49,  8.38it/s]

{'loss': Array(0.17751132, dtype=float32), 'loss_cross_entropy': Array(0.16528784, dtype=float32)}


  5%|▍         | 48609/1000000 [2:13:40<25:12:48, 10.48it/s]

{'loss': Array(0.17273276, dtype=float32), 'loss_cross_entropy': Array(0.16180286, dtype=float32)}


  5%|▍         | 48619/1000000 [2:13:42<34:17:53,  7.71it/s]

{'loss': Array(0.1512578, dtype=float32), 'loss_cross_entropy': Array(0.14073907, dtype=float32)}


  5%|▍         | 48629/1000000 [2:13:43<25:56:43, 10.19it/s]

{'loss': Array(0.16429482, dtype=float32), 'loss_cross_entropy': Array(0.15350276, dtype=float32)}


  5%|▍         | 48639/1000000 [2:13:44<24:41:07, 10.71it/s]

{'loss': Array(0.15665948, dtype=float32), 'loss_cross_entropy': Array(0.14544363, dtype=float32)}


  5%|▍         | 48649/1000000 [2:13:46<31:52:31,  8.29it/s]

{'loss': Array(0.17940526, dtype=float32), 'loss_cross_entropy': Array(0.1675495, dtype=float32)}


  5%|▍         | 48659/1000000 [2:13:47<25:48:35, 10.24it/s]

{'loss': Array(0.1684898, dtype=float32), 'loss_cross_entropy': Array(0.15721323, dtype=float32)}


  5%|▍         | 48669/1000000 [2:13:49<24:25:05, 10.82it/s]

{'loss': Array(0.16377194, dtype=float32), 'loss_cross_entropy': Array(0.15264103, dtype=float32)}


  5%|▍         | 48679/1000000 [2:13:50<27:37:01,  9.57it/s]

{'loss': Array(0.15899602, dtype=float32), 'loss_cross_entropy': Array(0.14827396, dtype=float32)}


  5%|▍         | 48689/1000000 [2:13:52<24:43:47, 10.69it/s]

{'loss': Array(0.18926156, dtype=float32), 'loss_cross_entropy': Array(0.17676812, dtype=float32)}


  5%|▍         | 48699/1000000 [2:13:53<31:24:18,  8.41it/s]

{'loss': Array(0.17884561, dtype=float32), 'loss_cross_entropy': Array(0.16747092, dtype=float32)}


  5%|▍         | 48709/1000000 [2:13:55<25:20:33, 10.43it/s]

{'loss': Array(0.17250076, dtype=float32), 'loss_cross_entropy': Array(0.16113608, dtype=float32)}


  5%|▍         | 48719/1000000 [2:13:57<38:46:15,  6.82it/s]

{'loss': Array(0.1869245, dtype=float32), 'loss_cross_entropy': Array(0.17448911, dtype=float32)}


  5%|▍         | 48729/1000000 [2:13:58<26:42:46,  9.89it/s]

{'loss': Array(0.16046798, dtype=float32), 'loss_cross_entropy': Array(0.14965105, dtype=float32)}


  5%|▍         | 48739/1000000 [2:13:59<25:35:11, 10.33it/s]

{'loss': Array(0.1856977, dtype=float32), 'loss_cross_entropy': Array(0.17331229, dtype=float32)}


  5%|▍         | 48749/1000000 [2:14:01<35:55:26,  7.36it/s]

{'loss': Array(0.19795655, dtype=float32), 'loss_cross_entropy': Array(0.18588986, dtype=float32)}


  5%|▍         | 48759/1000000 [2:14:02<27:55:27,  9.46it/s]

{'loss': Array(0.18171938, dtype=float32), 'loss_cross_entropy': Array(0.16950761, dtype=float32)}


  5%|▍         | 48769/1000000 [2:14:04<26:13:14, 10.08it/s]

{'loss': Array(0.1824209, dtype=float32), 'loss_cross_entropy': Array(0.16987833, dtype=float32)}


  5%|▍         | 48779/1000000 [2:14:06<29:45:49,  8.88it/s]

{'loss': Array(0.17804638, dtype=float32), 'loss_cross_entropy': Array(0.1661904, dtype=float32)}


  5%|▍         | 48789/1000000 [2:14:07<25:02:56, 10.55it/s]

{'loss': Array(0.13586555, dtype=float32), 'loss_cross_entropy': Array(0.12655334, dtype=float32)}


  5%|▍         | 48799/1000000 [2:14:09<32:02:55,  8.24it/s]

{'loss': Array(0.18389912, dtype=float32), 'loss_cross_entropy': Array(0.1723287, dtype=float32)}


  5%|▍         | 48809/1000000 [2:14:10<26:05:10, 10.13it/s]

{'loss': Array(0.1753509, dtype=float32), 'loss_cross_entropy': Array(0.16443872, dtype=float32)}


  5%|▍         | 48819/1000000 [2:14:11<24:45:57, 10.67it/s]

{'loss': Array(0.17688003, dtype=float32), 'loss_cross_entropy': Array(0.16555344, dtype=float32)}


  5%|▍         | 48829/1000000 [2:14:13<28:21:04,  9.32it/s]

{'loss': Array(0.18210499, dtype=float32), 'loss_cross_entropy': Array(0.1708125, dtype=float32)}


  5%|▍         | 48839/1000000 [2:14:15<25:27:48, 10.38it/s]

{'loss': Array(0.17084941, dtype=float32), 'loss_cross_entropy': Array(0.15980278, dtype=float32)}


  5%|▍         | 48849/1000000 [2:14:16<34:52:48,  7.57it/s]

{'loss': Array(0.17745548, dtype=float32), 'loss_cross_entropy': Array(0.16586336, dtype=float32)}


  5%|▍         | 48859/1000000 [2:14:18<26:41:49,  9.90it/s]

{'loss': Array(0.16457637, dtype=float32), 'loss_cross_entropy': Array(0.1530222, dtype=float32)}


  5%|▍         | 48869/1000000 [2:14:19<25:06:31, 10.52it/s]

{'loss': Array(0.16157201, dtype=float32), 'loss_cross_entropy': Array(0.15084545, dtype=float32)}


  5%|▍         | 48879/1000000 [2:14:21<28:17:22,  9.34it/s]

{'loss': Array(0.18313739, dtype=float32), 'loss_cross_entropy': Array(0.17088519, dtype=float32)}


  5%|▍         | 48889/1000000 [2:14:22<24:52:45, 10.62it/s]

{'loss': Array(0.16244583, dtype=float32), 'loss_cross_entropy': Array(0.15119295, dtype=float32)}


  5%|▍         | 48899/1000000 [2:14:24<29:10:53,  9.05it/s]

{'loss': Array(0.17166431, dtype=float32), 'loss_cross_entropy': Array(0.15911642, dtype=float32)}


  5%|▍         | 48909/1000000 [2:14:25<24:55:31, 10.60it/s]

{'loss': Array(0.17497909, dtype=float32), 'loss_cross_entropy': Array(0.16288619, dtype=float32)}


  5%|▍         | 48919/1000000 [2:14:27<38:39:14,  6.83it/s]

{'loss': Array(0.15093248, dtype=float32), 'loss_cross_entropy': Array(0.14009915, dtype=float32)}


  5%|▍         | 48929/1000000 [2:14:28<26:29:51,  9.97it/s]

{'loss': Array(0.1492242, dtype=float32), 'loss_cross_entropy': Array(0.13853255, dtype=float32)}


  5%|▍         | 48939/1000000 [2:14:30<24:55:13, 10.60it/s]

{'loss': Array(0.15913849, dtype=float32), 'loss_cross_entropy': Array(0.14826563, dtype=float32)}


  5%|▍         | 48949/1000000 [2:14:31<31:35:28,  8.36it/s]

{'loss': Array(0.16841862, dtype=float32), 'loss_cross_entropy': Array(0.15682536, dtype=float32)}


  5%|▍         | 48959/1000000 [2:14:33<25:53:40, 10.20it/s]

{'loss': Array(0.16357978, dtype=float32), 'loss_cross_entropy': Array(0.15282291, dtype=float32)}


  5%|▍         | 48969/1000000 [2:14:34<34:51:44,  7.58it/s]

{'loss': Array(0.16923448, dtype=float32), 'loss_cross_entropy': Array(0.15863399, dtype=float32)}


  5%|▍         | 48979/1000000 [2:14:36<25:47:06, 10.25it/s]

{'loss': Array(0.1880254, dtype=float32), 'loss_cross_entropy': Array(0.17569892, dtype=float32)}


  5%|▍         | 48989/1000000 [2:14:37<34:52:13,  7.58it/s]

{'loss': Array(0.16545707, dtype=float32), 'loss_cross_entropy': Array(0.15358643, dtype=float32)}


  5%|▍         | 48999/1000000 [2:14:39<25:50:08, 10.22it/s]

{'loss': Array(0.16337392, dtype=float32), 'loss_cross_entropy': Array(0.15249504, dtype=float32)}


  5%|▍         | 49009/1000000 [2:14:46<84:54:13,  3.11it/s] 

{'loss': Array(0.17574218, dtype=float32), 'loss_cross_entropy': Array(0.16401686, dtype=float32)}


  5%|▍         | 49019/1000000 [2:14:48<39:18:37,  6.72it/s]

{'loss': Array(0.15206097, dtype=float32), 'loss_cross_entropy': Array(0.14179741, dtype=float32)}


  5%|▍         | 49029/1000000 [2:14:50<27:12:16,  9.71it/s]

{'loss': Array(0.18320148, dtype=float32), 'loss_cross_entropy': Array(0.17068076, dtype=float32)}


  5%|▍         | 49039/1000000 [2:14:51<34:59:17,  7.55it/s]

{'loss': Array(0.17626157, dtype=float32), 'loss_cross_entropy': Array(0.16422367, dtype=float32)}


  5%|▍         | 49049/1000000 [2:14:53<26:14:44, 10.06it/s]

{'loss': Array(0.172008, dtype=float32), 'loss_cross_entropy': Array(0.16010764, dtype=float32)}


  5%|▍         | 49059/1000000 [2:14:54<38:43:51,  6.82it/s]

{'loss': Array(0.16508153, dtype=float32), 'loss_cross_entropy': Array(0.15406938, dtype=float32)}


  5%|▍         | 49069/1000000 [2:14:56<26:33:42,  9.94it/s]

{'loss': Array(0.17275965, dtype=float32), 'loss_cross_entropy': Array(0.16065256, dtype=float32)}


  5%|▍         | 49079/1000000 [2:14:57<24:25:54, 10.81it/s]

{'loss': Array(0.16277435, dtype=float32), 'loss_cross_entropy': Array(0.15166408, dtype=float32)}


  5%|▍         | 49089/1000000 [2:14:59<32:51:06,  8.04it/s]

{'loss': Array(0.16971868, dtype=float32), 'loss_cross_entropy': Array(0.15823291, dtype=float32)}


  5%|▍         | 49099/1000000 [2:15:00<26:32:29,  9.95it/s]

{'loss': Array(0.1784093, dtype=float32), 'loss_cross_entropy': Array(0.16720977, dtype=float32)}


  5%|▍         | 49109/1000000 [2:15:02<36:47:24,  7.18it/s]

{'loss': Array(0.17598222, dtype=float32), 'loss_cross_entropy': Array(0.1648897, dtype=float32)}


  5%|▍         | 49119/1000000 [2:15:03<27:29:47,  9.61it/s]

{'loss': Array(0.17592257, dtype=float32), 'loss_cross_entropy': Array(0.16424277, dtype=float32)}


  5%|▍         | 49129/1000000 [2:15:05<25:32:29, 10.34it/s]

{'loss': Array(0.16298632, dtype=float32), 'loss_cross_entropy': Array(0.153204, dtype=float32)}


  5%|▍         | 49139/1000000 [2:15:06<28:14:49,  9.35it/s]

{'loss': Array(0.17070583, dtype=float32), 'loss_cross_entropy': Array(0.15964971, dtype=float32)}


  5%|▍         | 49149/1000000 [2:15:08<25:37:27, 10.31it/s]

{'loss': Array(0.1798245, dtype=float32), 'loss_cross_entropy': Array(0.16773716, dtype=float32)}


  5%|▍         | 49159/1000000 [2:15:10<39:35:05,  6.67it/s]

{'loss': Array(0.17825451, dtype=float32), 'loss_cross_entropy': Array(0.1658986, dtype=float32)}


  5%|▍         | 49169/1000000 [2:15:11<27:19:22,  9.67it/s]

{'loss': Array(0.17845264, dtype=float32), 'loss_cross_entropy': Array(0.16654466, dtype=float32)}


  5%|▍         | 49179/1000000 [2:15:12<24:58:01, 10.58it/s]

{'loss': Array(0.15332298, dtype=float32), 'loss_cross_entropy': Array(0.14204082, dtype=float32)}


  5%|▍         | 49189/1000000 [2:15:14<31:42:16,  8.33it/s]

{'loss': Array(0.1679823, dtype=float32), 'loss_cross_entropy': Array(0.15699045, dtype=float32)}


  5%|▍         | 49199/1000000 [2:15:15<26:09:17, 10.10it/s]

{'loss': Array(0.1737045, dtype=float32), 'loss_cross_entropy': Array(0.16191351, dtype=float32)}


  5%|▍         | 49209/1000000 [2:15:17<34:57:14,  7.56it/s]

{'loss': Array(0.16011302, dtype=float32), 'loss_cross_entropy': Array(0.14861195, dtype=float32)}


  5%|▍         | 49219/1000000 [2:15:19<27:48:41,  9.50it/s]

{'loss': Array(0.1850941, dtype=float32), 'loss_cross_entropy': Array(0.17262559, dtype=float32)}


  5%|▍         | 49229/1000000 [2:15:20<35:10:44,  7.51it/s]

{'loss': Array(0.15826888, dtype=float32), 'loss_cross_entropy': Array(0.14794421, dtype=float32)}


  5%|▍         | 49239/1000000 [2:15:22<26:26:49,  9.99it/s]

{'loss': Array(0.17381527, dtype=float32), 'loss_cross_entropy': Array(0.1617412, dtype=float32)}


  5%|▍         | 49249/1000000 [2:15:23<24:42:01, 10.69it/s]

{'loss': Array(0.16400549, dtype=float32), 'loss_cross_entropy': Array(0.15260659, dtype=float32)}


  5%|▍         | 49259/1000000 [2:15:25<29:29:14,  8.96it/s]

{'loss': Array(0.1752389, dtype=float32), 'loss_cross_entropy': Array(0.1641616, dtype=float32)}


  5%|▍         | 49269/1000000 [2:15:26<25:28:05, 10.37it/s]

{'loss': Array(0.17062499, dtype=float32), 'loss_cross_entropy': Array(0.15929177, dtype=float32)}


  5%|▍         | 49279/1000000 [2:15:28<31:22:58,  8.42it/s]

{'loss': Array(0.17760767, dtype=float32), 'loss_cross_entropy': Array(0.16587938, dtype=float32)}


  5%|▍         | 49289/1000000 [2:15:29<25:25:01, 10.39it/s]

{'loss': Array(0.1915022, dtype=float32), 'loss_cross_entropy': Array(0.17910157, dtype=float32)}


  5%|▍         | 49299/1000000 [2:15:31<38:30:10,  6.86it/s]

{'loss': Array(0.17287494, dtype=float32), 'loss_cross_entropy': Array(0.16113828, dtype=float32)}


  5%|▍         | 49309/1000000 [2:15:32<26:34:06,  9.94it/s]

{'loss': Array(0.18130164, dtype=float32), 'loss_cross_entropy': Array(0.17023359, dtype=float32)}


  5%|▍         | 49319/1000000 [2:15:34<39:09:28,  6.74it/s]

{'loss': Array(0.18173157, dtype=float32), 'loss_cross_entropy': Array(0.16943686, dtype=float32)}


  5%|▍         | 49329/1000000 [2:15:35<26:26:45,  9.99it/s]

{'loss': Array(0.1815722, dtype=float32), 'loss_cross_entropy': Array(0.16959512, dtype=float32)}


  5%|▍         | 49339/1000000 [2:15:37<24:27:21, 10.80it/s]

{'loss': Array(0.16409479, dtype=float32), 'loss_cross_entropy': Array(0.15322128, dtype=float32)}


  5%|▍         | 49349/1000000 [2:15:38<34:18:55,  7.70it/s]

{'loss': Array(0.18778382, dtype=float32), 'loss_cross_entropy': Array(0.17547642, dtype=float32)}


  5%|▍         | 49359/1000000 [2:15:40<26:10:20, 10.09it/s]

{'loss': Array(0.17650996, dtype=float32), 'loss_cross_entropy': Array(0.1646991, dtype=float32)}


  5%|▍         | 49369/1000000 [2:15:41<38:29:42,  6.86it/s]

{'loss': Array(0.18320684, dtype=float32), 'loss_cross_entropy': Array(0.17124699, dtype=float32)}


  5%|▍         | 49379/1000000 [2:15:43<26:54:12,  9.82it/s]

{'loss': Array(0.17361115, dtype=float32), 'loss_cross_entropy': Array(0.16187948, dtype=float32)}


  5%|▍         | 49389/1000000 [2:15:44<24:51:33, 10.62it/s]

{'loss': Array(0.16216846, dtype=float32), 'loss_cross_entropy': Array(0.15102565, dtype=float32)}


  5%|▍         | 49399/1000000 [2:15:46<27:54:55,  9.46it/s]

{'loss': Array(0.17219119, dtype=float32), 'loss_cross_entropy': Array(0.16074654, dtype=float32)}


  5%|▍         | 49409/1000000 [2:15:47<25:19:57, 10.42it/s]

{'loss': Array(0.1742338, dtype=float32), 'loss_cross_entropy': Array(0.16226538, dtype=float32)}


  5%|▍         | 49419/1000000 [2:15:49<28:27:47,  9.28it/s]

{'loss': Array(0.16508059, dtype=float32), 'loss_cross_entropy': Array(0.15478513, dtype=float32)}


  5%|▍         | 49429/1000000 [2:15:50<25:17:17, 10.44it/s]

{'loss': Array(0.16763948, dtype=float32), 'loss_cross_entropy': Array(0.15686773, dtype=float32)}


  5%|▍         | 49439/1000000 [2:15:52<38:24:34,  6.87it/s]

{'loss': Array(0.17557168, dtype=float32), 'loss_cross_entropy': Array(0.16365801, dtype=float32)}


  5%|▍         | 49449/1000000 [2:15:53<26:43:18,  9.88it/s]

{'loss': Array(0.15885843, dtype=float32), 'loss_cross_entropy': Array(0.14821574, dtype=float32)}


  5%|▍         | 49459/1000000 [2:15:55<25:16:35, 10.45it/s]

{'loss': Array(0.1714242, dtype=float32), 'loss_cross_entropy': Array(0.15988763, dtype=float32)}


  5%|▍         | 49469/1000000 [2:15:56<31:35:29,  8.36it/s]

{'loss': Array(0.1810159, dtype=float32), 'loss_cross_entropy': Array(0.16938965, dtype=float32)}


  5%|▍         | 49479/1000000 [2:15:58<26:05:45, 10.12it/s]

{'loss': Array(0.17796038, dtype=float32), 'loss_cross_entropy': Array(0.16670595, dtype=float32)}


  5%|▍         | 49489/1000000 [2:16:00<39:57:47,  6.61it/s]

{'loss': Array(0.17173176, dtype=float32), 'loss_cross_entropy': Array(0.16028857, dtype=float32)}


  5%|▍         | 49499/1000000 [2:16:01<27:57:09,  9.45it/s]

{'loss': Array(0.17025082, dtype=float32), 'loss_cross_entropy': Array(0.1589834, dtype=float32)}


  5%|▍         | 49509/1000000 [2:16:09<87:36:07,  3.01it/s] 

{'loss': Array(0.16703247, dtype=float32), 'loss_cross_entropy': Array(0.15513995, dtype=float32)}


  5%|▍         | 49519/1000000 [2:16:11<39:53:19,  6.62it/s]

{'loss': Array(0.19382794, dtype=float32), 'loss_cross_entropy': Array(0.18221554, dtype=float32)}


  5%|▍         | 49529/1000000 [2:16:12<27:21:22,  9.65it/s]

{'loss': Array(0.19821641, dtype=float32), 'loss_cross_entropy': Array(0.18552943, dtype=float32)}


  5%|▍         | 49539/1000000 [2:16:14<31:43:18,  8.32it/s]

{'loss': Array(0.15798774, dtype=float32), 'loss_cross_entropy': Array(0.14690943, dtype=float32)}


  5%|▍         | 49549/1000000 [2:16:15<25:40:39, 10.28it/s]

{'loss': Array(0.19420992, dtype=float32), 'loss_cross_entropy': Array(0.18208568, dtype=float32)}


  5%|▍         | 49559/1000000 [2:16:17<38:43:22,  6.82it/s]

{'loss': Array(0.16092095, dtype=float32), 'loss_cross_entropy': Array(0.15013897, dtype=float32)}


  5%|▍         | 49569/1000000 [2:16:18<27:00:59,  9.77it/s]

{'loss': Array(0.16934095, dtype=float32), 'loss_cross_entropy': Array(0.15802133, dtype=float32)}


  5%|▍         | 49579/1000000 [2:16:19<24:58:02, 10.57it/s]

{'loss': Array(0.18166251, dtype=float32), 'loss_cross_entropy': Array(0.16971004, dtype=float32)}


  5%|▍         | 49589/1000000 [2:16:21<27:41:40,  9.53it/s]

{'loss': Array(0.1764544, dtype=float32), 'loss_cross_entropy': Array(0.16419175, dtype=float32)}


  5%|▍         | 49599/1000000 [2:16:23<25:25:53, 10.38it/s]

{'loss': Array(0.18583046, dtype=float32), 'loss_cross_entropy': Array(0.17366314, dtype=float32)}


  5%|▍         | 49609/1000000 [2:16:24<40:02:21,  6.59it/s]

{'loss': Array(0.18945834, dtype=float32), 'loss_cross_entropy': Array(0.17804812, dtype=float32)}


  5%|▍         | 49619/1000000 [2:16:26<26:46:42,  9.86it/s]

{'loss': Array(0.17351834, dtype=float32), 'loss_cross_entropy': Array(0.16211675, dtype=float32)}


  5%|▍         | 49629/1000000 [2:16:27<24:42:05, 10.69it/s]

{'loss': Array(0.16148289, dtype=float32), 'loss_cross_entropy': Array(0.15089189, dtype=float32)}


  5%|▍         | 49639/1000000 [2:16:29<29:17:50,  9.01it/s]

{'loss': Array(0.170471, dtype=float32), 'loss_cross_entropy': Array(0.15911336, dtype=float32)}


  5%|▍         | 49649/1000000 [2:16:30<24:53:44, 10.60it/s]

{'loss': Array(0.19222337, dtype=float32), 'loss_cross_entropy': Array(0.17914642, dtype=float32)}


  5%|▍         | 49659/1000000 [2:16:32<34:06:08,  7.74it/s]

{'loss': Array(0.17282985, dtype=float32), 'loss_cross_entropy': Array(0.16112949, dtype=float32)}


  5%|▍         | 49669/1000000 [2:16:33<26:00:38, 10.15it/s]

{'loss': Array(0.1781713, dtype=float32), 'loss_cross_entropy': Array(0.16664614, dtype=float32)}


  5%|▍         | 49679/1000000 [2:16:35<39:15:46,  6.72it/s]

{'loss': Array(0.17996202, dtype=float32), 'loss_cross_entropy': Array(0.16790949, dtype=float32)}


  5%|▍         | 49689/1000000 [2:16:36<26:46:18,  9.86it/s]

{'loss': Array(0.16085908, dtype=float32), 'loss_cross_entropy': Array(0.14957324, dtype=float32)}


  5%|▍         | 49699/1000000 [2:16:38<25:07:58, 10.50it/s]

{'loss': Array(0.1464467, dtype=float32), 'loss_cross_entropy': Array(0.13548274, dtype=float32)}


  5%|▍         | 49709/1000000 [2:16:39<31:41:46,  8.33it/s]

{'loss': Array(0.18482845, dtype=float32), 'loss_cross_entropy': Array(0.17334954, dtype=float32)}


  5%|▍         | 49719/1000000 [2:16:41<25:19:11, 10.43it/s]

{'loss': Array(0.16550837, dtype=float32), 'loss_cross_entropy': Array(0.1547801, dtype=float32)}


  5%|▍         | 49729/1000000 [2:16:42<34:19:13,  7.69it/s]

{'loss': Array(0.1696098, dtype=float32), 'loss_cross_entropy': Array(0.15837605, dtype=float32)}


  5%|▍         | 49739/1000000 [2:16:44<25:57:17, 10.17it/s]

{'loss': Array(0.190045, dtype=float32), 'loss_cross_entropy': Array(0.17835496, dtype=float32)}


  5%|▍         | 49749/1000000 [2:16:45<24:50:23, 10.63it/s]

{'loss': Array(0.1811503, dtype=float32), 'loss_cross_entropy': Array(0.16869807, dtype=float32)}


  5%|▍         | 49759/1000000 [2:16:47<28:03:01,  9.41it/s]

{'loss': Array(0.17823339, dtype=float32), 'loss_cross_entropy': Array(0.16626208, dtype=float32)}


  5%|▍         | 49769/1000000 [2:16:48<25:10:42, 10.48it/s]

{'loss': Array(0.17290922, dtype=float32), 'loss_cross_entropy': Array(0.1616233, dtype=float32)}


  5%|▍         | 49779/1000000 [2:16:50<38:59:44,  6.77it/s]

{'loss': Array(0.15039057, dtype=float32), 'loss_cross_entropy': Array(0.14033306, dtype=float32)}


  5%|▍         | 49789/1000000 [2:16:51<26:36:52,  9.92it/s]

{'loss': Array(0.1696749, dtype=float32), 'loss_cross_entropy': Array(0.15816747, dtype=float32)}


  5%|▍         | 49799/1000000 [2:16:53<24:27:45, 10.79it/s]

{'loss': Array(0.17605837, dtype=float32), 'loss_cross_entropy': Array(0.1641107, dtype=float32)}


  5%|▍         | 49809/1000000 [2:16:54<31:24:27,  8.40it/s]

{'loss': Array(0.17811005, dtype=float32), 'loss_cross_entropy': Array(0.1667141, dtype=float32)}


  5%|▍         | 49819/1000000 [2:16:56<25:47:53, 10.23it/s]

{'loss': Array(0.18274854, dtype=float32), 'loss_cross_entropy': Array(0.17057341, dtype=float32)}


  5%|▍         | 49829/1000000 [2:16:57<34:33:05,  7.64it/s]

{'loss': Array(0.18234037, dtype=float32), 'loss_cross_entropy': Array(0.17000516, dtype=float32)}


  5%|▍         | 49839/1000000 [2:16:59<26:44:56,  9.87it/s]

{'loss': Array(0.16754882, dtype=float32), 'loss_cross_entropy': Array(0.15635207, dtype=float32)}


  5%|▍         | 49849/1000000 [2:17:00<35:26:10,  7.45it/s]

{'loss': Array(0.16534746, dtype=float32), 'loss_cross_entropy': Array(0.15486366, dtype=float32)}


  5%|▍         | 49859/1000000 [2:17:02<27:03:21,  9.75it/s]

{'loss': Array(0.18807966, dtype=float32), 'loss_cross_entropy': Array(0.1754653, dtype=float32)}


  5%|▍         | 49869/1000000 [2:17:03<25:22:42, 10.40it/s]

{'loss': Array(0.15917262, dtype=float32), 'loss_cross_entropy': Array(0.14883164, dtype=float32)}


  5%|▍         | 49879/1000000 [2:17:05<30:22:22,  8.69it/s]

{'loss': Array(0.16991766, dtype=float32), 'loss_cross_entropy': Array(0.15823576, dtype=float32)}


  5%|▍         | 49889/1000000 [2:17:06<25:26:15, 10.38it/s]

{'loss': Array(0.18515408, dtype=float32), 'loss_cross_entropy': Array(0.17357789, dtype=float32)}


  5%|▍         | 49899/1000000 [2:17:08<31:07:38,  8.48it/s]

{'loss': Array(0.15464476, dtype=float32), 'loss_cross_entropy': Array(0.14463033, dtype=float32)}


  5%|▍         | 49909/1000000 [2:17:09<25:38:34, 10.29it/s]

{'loss': Array(0.181845, dtype=float32), 'loss_cross_entropy': Array(0.16985457, dtype=float32)}


  5%|▍         | 49919/1000000 [2:17:11<39:04:50,  6.75it/s]

{'loss': Array(0.15575974, dtype=float32), 'loss_cross_entropy': Array(0.14533205, dtype=float32)}


  5%|▍         | 49929/1000000 [2:17:12<26:54:26,  9.81it/s]

{'loss': Array(0.17174686, dtype=float32), 'loss_cross_entropy': Array(0.16003315, dtype=float32)}


  5%|▍         | 49939/1000000 [2:17:14<38:55:22,  6.78it/s]

{'loss': Array(0.16777746, dtype=float32), 'loss_cross_entropy': Array(0.15627013, dtype=float32)}


  5%|▍         | 49949/1000000 [2:17:15<26:57:21,  9.79it/s]

{'loss': Array(0.1680167, dtype=float32), 'loss_cross_entropy': Array(0.15686063, dtype=float32)}


  5%|▍         | 49959/1000000 [2:17:17<24:52:34, 10.61it/s]

{'loss': Array(0.18263061, dtype=float32), 'loss_cross_entropy': Array(0.17075185, dtype=float32)}


  5%|▍         | 49969/1000000 [2:17:19<35:31:47,  7.43it/s]

{'loss': Array(0.14609815, dtype=float32), 'loss_cross_entropy': Array(0.13570966, dtype=float32)}


  5%|▍         | 49979/1000000 [2:17:20<26:01:20, 10.14it/s]

{'loss': Array(0.15900505, dtype=float32), 'loss_cross_entropy': Array(0.1486355, dtype=float32)}


  5%|▍         | 49989/1000000 [2:17:22<39:29:25,  6.68it/s]

{'loss': Array(0.17635529, dtype=float32), 'loss_cross_entropy': Array(0.16403113, dtype=float32)}


  5%|▍         | 49999/1000000 [2:17:23<27:32:53,  9.58it/s]

{'loss': Array(0.17539664, dtype=float32), 'loss_cross_entropy': Array(0.16361833, dtype=float32)}


  5%|▌         | 50009/1000000 [2:17:31<91:39:39,  2.88it/s] 

{'loss': Array(0.16400947, dtype=float32), 'loss_cross_entropy': Array(0.15266155, dtype=float32)}


  5%|▌         | 50019/1000000 [2:17:33<39:36:02,  6.66it/s] 

{'loss': Array(0.16172425, dtype=float32), 'loss_cross_entropy': Array(0.15026997, dtype=float32)}


  5%|▌         | 50029/1000000 [2:17:34<26:47:16,  9.85it/s]

{'loss': Array(0.18260527, dtype=float32), 'loss_cross_entropy': Array(0.17168646, dtype=float32)}


  5%|▌         | 50039/1000000 [2:17:36<27:55:59,  9.45it/s]

{'loss': Array(0.15913676, dtype=float32), 'loss_cross_entropy': Array(0.14922032, dtype=float32)}


  5%|▌         | 50049/1000000 [2:17:38<25:30:11, 10.35it/s]

{'loss': Array(0.16236475, dtype=float32), 'loss_cross_entropy': Array(0.15076464, dtype=float32)}


  5%|▌         | 50059/1000000 [2:17:39<38:25:29,  6.87it/s]

{'loss': Array(0.15776499, dtype=float32), 'loss_cross_entropy': Array(0.14745148, dtype=float32)}


  5%|▌         | 50069/1000000 [2:17:41<26:53:42,  9.81it/s]

{'loss': Array(0.16613682, dtype=float32), 'loss_cross_entropy': Array(0.15472572, dtype=float32)}


  5%|▌         | 50079/1000000 [2:17:42<24:38:59, 10.70it/s]

{'loss': Array(0.17257003, dtype=float32), 'loss_cross_entropy': Array(0.16174953, dtype=float32)}


  5%|▌         | 50089/1000000 [2:17:44<31:34:42,  8.36it/s]

{'loss': Array(0.1840704, dtype=float32), 'loss_cross_entropy': Array(0.17160311, dtype=float32)}


  5%|▌         | 50099/1000000 [2:17:45<25:07:02, 10.51it/s]

{'loss': Array(0.17850147, dtype=float32), 'loss_cross_entropy': Array(0.16606645, dtype=float32)}


  5%|▌         | 50109/1000000 [2:17:47<38:28:07,  6.86it/s]

{'loss': Array(0.1729321, dtype=float32), 'loss_cross_entropy': Array(0.1617089, dtype=float32)}


  5%|▌         | 50119/1000000 [2:17:48<26:46:40,  9.85it/s]

{'loss': Array(0.19349127, dtype=float32), 'loss_cross_entropy': Array(0.1809842, dtype=float32)}


  5%|▌         | 50129/1000000 [2:17:49<25:15:18, 10.45it/s]

{'loss': Array(0.17641841, dtype=float32), 'loss_cross_entropy': Array(0.16498147, dtype=float32)}


  5%|▌         | 50139/1000000 [2:17:51<29:30:13,  8.94it/s]

{'loss': Array(0.17348586, dtype=float32), 'loss_cross_entropy': Array(0.16161856, dtype=float32)}


  5%|▌         | 50149/1000000 [2:17:52<25:34:30, 10.32it/s]

{'loss': Array(0.16076738, dtype=float32), 'loss_cross_entropy': Array(0.14923944, dtype=float32)}


  5%|▌         | 50159/1000000 [2:17:54<34:15:38,  7.70it/s]

{'loss': Array(0.16282327, dtype=float32), 'loss_cross_entropy': Array(0.15142411, dtype=float32)}


  5%|▌         | 50169/1000000 [2:17:56<26:17:30, 10.04it/s]

{'loss': Array(0.15104264, dtype=float32), 'loss_cross_entropy': Array(0.14080863, dtype=float32)}


  5%|▌         | 50179/1000000 [2:17:57<24:58:12, 10.57it/s]

{'loss': Array(0.155196, dtype=float32), 'loss_cross_entropy': Array(0.14414287, dtype=float32)}


  5%|▌         | 50189/1000000 [2:17:59<31:43:39,  8.32it/s]

{'loss': Array(0.18948346, dtype=float32), 'loss_cross_entropy': Array(0.17717001, dtype=float32)}


  5%|▌         | 50199/1000000 [2:18:00<25:59:06, 10.15it/s]

{'loss': Array(0.17419325, dtype=float32), 'loss_cross_entropy': Array(0.16270617, dtype=float32)}


  5%|▌         | 50209/1000000 [2:18:01<25:56:40, 10.17it/s]

{'loss': Array(0.15717873, dtype=float32), 'loss_cross_entropy': Array(0.14666985, dtype=float32)}


  5%|▌         | 50219/1000000 [2:18:03<28:39:50,  9.20it/s]

{'loss': Array(0.17512456, dtype=float32), 'loss_cross_entropy': Array(0.16380528, dtype=float32)}


  5%|▌         | 50229/1000000 [2:18:05<25:41:52, 10.27it/s]

{'loss': Array(0.17336659, dtype=float32), 'loss_cross_entropy': Array(0.16255055, dtype=float32)}


  5%|▌         | 50239/1000000 [2:18:06<31:59:06,  8.25it/s]

{'loss': Array(0.16469823, dtype=float32), 'loss_cross_entropy': Array(0.15386687, dtype=float32)}


  5%|▌         | 50249/1000000 [2:18:08<26:22:34, 10.00it/s]

{'loss': Array(0.17820504, dtype=float32), 'loss_cross_entropy': Array(0.16605991, dtype=float32)}


  5%|▌         | 50259/1000000 [2:18:09<39:07:56,  6.74it/s]

{'loss': Array(0.16975327, dtype=float32), 'loss_cross_entropy': Array(0.15812182, dtype=float32)}


  5%|▌         | 50269/1000000 [2:18:11<26:34:48,  9.93it/s]

{'loss': Array(0.17016277, dtype=float32), 'loss_cross_entropy': Array(0.15913527, dtype=float32)}


  5%|▌         | 50279/1000000 [2:18:12<38:40:50,  6.82it/s]

{'loss': Array(0.17054582, dtype=float32), 'loss_cross_entropy': Array(0.1589339, dtype=float32)}


  5%|▌         | 50289/1000000 [2:18:14<26:25:15,  9.98it/s]

{'loss': Array(0.16398697, dtype=float32), 'loss_cross_entropy': Array(0.15230393, dtype=float32)}


  5%|▌         | 50299/1000000 [2:18:15<24:29:35, 10.77it/s]

{'loss': Array(0.201098, dtype=float32), 'loss_cross_entropy': Array(0.18795341, dtype=float32)}


  5%|▌         | 50309/1000000 [2:18:17<34:08:47,  7.73it/s]

{'loss': Array(0.1827544, dtype=float32), 'loss_cross_entropy': Array(0.17061818, dtype=float32)}


  5%|▌         | 50319/1000000 [2:18:18<26:13:55, 10.06it/s]

{'loss': Array(0.15530916, dtype=float32), 'loss_cross_entropy': Array(0.1447975, dtype=float32)}


  5%|▌         | 50329/1000000 [2:18:20<38:47:48,  6.80it/s]

{'loss': Array(0.1550951, dtype=float32), 'loss_cross_entropy': Array(0.14466166, dtype=float32)}


  5%|▌         | 50339/1000000 [2:18:21<26:35:59,  9.92it/s]

{'loss': Array(0.1744906, dtype=float32), 'loss_cross_entropy': Array(0.16262017, dtype=float32)}


  5%|▌         | 50349/1000000 [2:18:23<24:55:38, 10.58it/s]

{'loss': Array(0.14241952, dtype=float32), 'loss_cross_entropy': Array(0.1318115, dtype=float32)}


  5%|▌         | 50359/1000000 [2:18:24<27:40:47,  9.53it/s]

{'loss': Array(0.19241141, dtype=float32), 'loss_cross_entropy': Array(0.18016236, dtype=float32)}


  5%|▌         | 50369/1000000 [2:18:26<25:38:53, 10.28it/s]

{'loss': Array(0.17049015, dtype=float32), 'loss_cross_entropy': Array(0.15923825, dtype=float32)}


  5%|▌         | 50379/1000000 [2:18:27<27:39:32,  9.54it/s]

{'loss': Array(0.15568143, dtype=float32), 'loss_cross_entropy': Array(0.14548327, dtype=float32)}


  5%|▌         | 50389/1000000 [2:18:29<25:04:23, 10.52it/s]

{'loss': Array(0.17307745, dtype=float32), 'loss_cross_entropy': Array(0.16214839, dtype=float32)}


  5%|▌         | 50399/1000000 [2:18:30<38:56:42,  6.77it/s]

{'loss': Array(0.18601672, dtype=float32), 'loss_cross_entropy': Array(0.1734159, dtype=float32)}


  5%|▌         | 50409/1000000 [2:18:32<26:47:45,  9.84it/s]

{'loss': Array(0.19795994, dtype=float32), 'loss_cross_entropy': Array(0.18701561, dtype=float32)}


  5%|▌         | 50419/1000000 [2:18:33<24:24:57, 10.80it/s]

{'loss': Array(0.16859868, dtype=float32), 'loss_cross_entropy': Array(0.15737574, dtype=float32)}


  5%|▌         | 50429/1000000 [2:18:35<30:52:59,  8.54it/s]

{'loss': Array(0.17076646, dtype=float32), 'loss_cross_entropy': Array(0.1595393, dtype=float32)}


  5%|▌         | 50439/1000000 [2:18:36<25:35:46, 10.30it/s]

{'loss': Array(0.16565843, dtype=float32), 'loss_cross_entropy': Array(0.15461926, dtype=float32)}


  5%|▌         | 50449/1000000 [2:18:38<39:25:40,  6.69it/s]

{'loss': Array(0.18151577, dtype=float32), 'loss_cross_entropy': Array(0.16919161, dtype=float32)}


  5%|▌         | 50459/1000000 [2:18:39<26:32:27,  9.94it/s]

{'loss': Array(0.14650524, dtype=float32), 'loss_cross_entropy': Array(0.13656749, dtype=float32)}


  5%|▌         | 50469/1000000 [2:18:41<24:53:09, 10.60it/s]

{'loss': Array(0.18247452, dtype=float32), 'loss_cross_entropy': Array(0.17108461, dtype=float32)}


  5%|▌         | 50479/1000000 [2:18:42<30:30:59,  8.64it/s]

{'loss': Array(0.18485077, dtype=float32), 'loss_cross_entropy': Array(0.17226416, dtype=float32)}


  5%|▌         | 50489/1000000 [2:18:44<25:33:58, 10.32it/s]

{'loss': Array(0.1851213, dtype=float32), 'loss_cross_entropy': Array(0.17382164, dtype=float32)}


  5%|▌         | 50499/1000000 [2:18:45<34:53:01,  7.56it/s]

{'loss': Array(0.17774554, dtype=float32), 'loss_cross_entropy': Array(0.16673066, dtype=float32)}


  5%|▌         | 50509/1000000 [2:18:53<86:33:52,  3.05it/s] 

{'loss': Array(0.17073877, dtype=float32), 'loss_cross_entropy': Array(0.159579, dtype=float32)}


  5%|▌         | 50519/1000000 [2:18:55<44:46:13,  5.89it/s]

{'loss': Array(0.15079938, dtype=float32), 'loss_cross_entropy': Array(0.14053556, dtype=float32)}


  5%|▌         | 50529/1000000 [2:18:56<27:31:08,  9.58it/s]

{'loss': Array(0.17699777, dtype=float32), 'loss_cross_entropy': Array(0.16491784, dtype=float32)}


  5%|▌         | 50539/1000000 [2:18:58<24:49:13, 10.63it/s]

{'loss': Array(0.19580303, dtype=float32), 'loss_cross_entropy': Array(0.18290041, dtype=float32)}


  5%|▌         | 50549/1000000 [2:18:59<31:54:45,  8.26it/s]

{'loss': Array(0.19816099, dtype=float32), 'loss_cross_entropy': Array(0.18515652, dtype=float32)}


  5%|▌         | 50559/1000000 [2:19:01<26:16:14, 10.04it/s]

{'loss': Array(0.16169652, dtype=float32), 'loss_cross_entropy': Array(0.1511458, dtype=float32)}


  5%|▌         | 50569/1000000 [2:19:02<26:07:49, 10.09it/s]

{'loss': Array(0.15977381, dtype=float32), 'loss_cross_entropy': Array(0.14865826, dtype=float32)}


  5%|▌         | 50579/1000000 [2:19:04<28:50:17,  9.15it/s]

{'loss': Array(0.18768932, dtype=float32), 'loss_cross_entropy': Array(0.17611091, dtype=float32)}


  5%|▌         | 50589/1000000 [2:19:05<25:16:50, 10.43it/s]

{'loss': Array(0.17326955, dtype=float32), 'loss_cross_entropy': Array(0.16206156, dtype=float32)}


  5%|▌         | 50599/1000000 [2:19:07<32:05:22,  8.22it/s]

{'loss': Array(0.17651175, dtype=float32), 'loss_cross_entropy': Array(0.16511434, dtype=float32)}


  5%|▌         | 50609/1000000 [2:19:08<25:25:30, 10.37it/s]

{'loss': Array(0.16905634, dtype=float32), 'loss_cross_entropy': Array(0.15816282, dtype=float32)}


  5%|▌         | 50619/1000000 [2:19:10<34:27:55,  7.65it/s]

{'loss': Array(0.15944165, dtype=float32), 'loss_cross_entropy': Array(0.14843571, dtype=float32)}


  5%|▌         | 50629/1000000 [2:19:11<25:58:23, 10.15it/s]

{'loss': Array(0.17149298, dtype=float32), 'loss_cross_entropy': Array(0.15988477, dtype=float32)}


  5%|▌         | 50639/1000000 [2:19:13<24:35:37, 10.72it/s]

{'loss': Array(0.16324653, dtype=float32), 'loss_cross_entropy': Array(0.15310964, dtype=float32)}


  5%|▌         | 50649/1000000 [2:19:14<31:37:14,  8.34it/s]

{'loss': Array(0.17749195, dtype=float32), 'loss_cross_entropy': Array(0.16550721, dtype=float32)}


  5%|▌         | 50659/1000000 [2:19:16<26:01:16, 10.13it/s]

{'loss': Array(0.17465806, dtype=float32), 'loss_cross_entropy': Array(0.16291673, dtype=float32)}


  5%|▌         | 50669/1000000 [2:19:17<24:49:34, 10.62it/s]

{'loss': Array(0.17118758, dtype=float32), 'loss_cross_entropy': Array(0.15945671, dtype=float32)}


  5%|▌         | 50679/1000000 [2:19:19<28:09:22,  9.37it/s]

{'loss': Array(0.1784553, dtype=float32), 'loss_cross_entropy': Array(0.16661218, dtype=float32)}


  5%|▌         | 50689/1000000 [2:19:20<25:14:14, 10.45it/s]

{'loss': Array(0.17753218, dtype=float32), 'loss_cross_entropy': Array(0.16551448, dtype=float32)}


  5%|▌         | 50699/1000000 [2:19:22<29:06:35,  9.06it/s]

{'loss': Array(0.18127108, dtype=float32), 'loss_cross_entropy': Array(0.16946827, dtype=float32)}


  5%|▌         | 50709/1000000 [2:19:23<25:26:18, 10.37it/s]

{'loss': Array(0.17649195, dtype=float32), 'loss_cross_entropy': Array(0.16421877, dtype=float32)}


  5%|▌         | 50719/1000000 [2:19:25<34:41:05,  7.60it/s]

{'loss': Array(0.15944219, dtype=float32), 'loss_cross_entropy': Array(0.1488701, dtype=float32)}


  5%|▌         | 50729/1000000 [2:19:26<26:10:53, 10.07it/s]

{'loss': Array(0.17458922, dtype=float32), 'loss_cross_entropy': Array(0.16331674, dtype=float32)}


  5%|▌         | 50739/1000000 [2:19:28<38:24:28,  6.87it/s]

{'loss': Array(0.16302015, dtype=float32), 'loss_cross_entropy': Array(0.1526681, dtype=float32)}


  5%|▌         | 50749/1000000 [2:19:29<26:41:21,  9.88it/s]

{'loss': Array(0.17406288, dtype=float32), 'loss_cross_entropy': Array(0.16289303, dtype=float32)}


  5%|▌         | 50759/1000000 [2:19:31<25:08:27, 10.49it/s]

{'loss': Array(0.15744226, dtype=float32), 'loss_cross_entropy': Array(0.14633305, dtype=float32)}


  5%|▌         | 50769/1000000 [2:19:33<35:01:33,  7.53it/s]

{'loss': Array(0.16960686, dtype=float32), 'loss_cross_entropy': Array(0.15828933, dtype=float32)}


  5%|▌         | 50779/1000000 [2:19:34<26:33:31,  9.93it/s]

{'loss': Array(0.16225359, dtype=float32), 'loss_cross_entropy': Array(0.15218219, dtype=float32)}


  5%|▌         | 50789/1000000 [2:19:35<24:54:57, 10.58it/s]

{'loss': Array(0.15922165, dtype=float32), 'loss_cross_entropy': Array(0.14911582, dtype=float32)}


  5%|▌         | 50799/1000000 [2:19:37<29:10:30,  9.04it/s]

{'loss': Array(0.14752288, dtype=float32), 'loss_cross_entropy': Array(0.13700853, dtype=float32)}


  5%|▌         | 50809/1000000 [2:19:38<25:25:32, 10.37it/s]

{'loss': Array(0.18226469, dtype=float32), 'loss_cross_entropy': Array(0.17058846, dtype=float32)}


  5%|▌         | 50819/1000000 [2:19:40<35:25:37,  7.44it/s]

{'loss': Array(0.17736427, dtype=float32), 'loss_cross_entropy': Array(0.1655962, dtype=float32)}


  5%|▌         | 50829/1000000 [2:19:42<26:44:15,  9.86it/s]

{'loss': Array(0.17464328, dtype=float32), 'loss_cross_entropy': Array(0.1633993, dtype=float32)}


  5%|▌         | 50839/1000000 [2:19:43<24:57:38, 10.56it/s]

{'loss': Array(0.1671185, dtype=float32), 'loss_cross_entropy': Array(0.15722223, dtype=float32)}


  5%|▌         | 50849/1000000 [2:19:45<28:18:00,  9.32it/s]

{'loss': Array(0.1845731, dtype=float32), 'loss_cross_entropy': Array(0.17228429, dtype=float32)}


  5%|▌         | 50859/1000000 [2:19:46<25:06:52, 10.50it/s]

{'loss': Array(0.15523414, dtype=float32), 'loss_cross_entropy': Array(0.14435951, dtype=float32)}


  5%|▌         | 50869/1000000 [2:19:47<25:45:51, 10.23it/s]

{'loss': Array(0.16117205, dtype=float32), 'loss_cross_entropy': Array(0.14989631, dtype=float32)}


  5%|▌         | 50879/1000000 [2:19:49<27:50:56,  9.47it/s]

{'loss': Array(0.18008015, dtype=float32), 'loss_cross_entropy': Array(0.16877465, dtype=float32)}


  5%|▌         | 50889/1000000 [2:19:51<24:55:41, 10.58it/s]

{'loss': Array(0.17171876, dtype=float32), 'loss_cross_entropy': Array(0.16017304, dtype=float32)}


  5%|▌         | 50899/1000000 [2:19:52<35:21:55,  7.45it/s]

{'loss': Array(0.16597104, dtype=float32), 'loss_cross_entropy': Array(0.15490267, dtype=float32)}


  5%|▌         | 50909/1000000 [2:19:54<26:15:54, 10.04it/s]

{'loss': Array(0.17420115, dtype=float32), 'loss_cross_entropy': Array(0.16245063, dtype=float32)}


  5%|▌         | 50919/1000000 [2:19:55<38:41:16,  6.81it/s]

{'loss': Array(0.17288025, dtype=float32), 'loss_cross_entropy': Array(0.16124785, dtype=float32)}


  5%|▌         | 50929/1000000 [2:19:57<26:28:52,  9.96it/s]

{'loss': Array(0.17955945, dtype=float32), 'loss_cross_entropy': Array(0.16820672, dtype=float32)}


  5%|▌         | 50939/1000000 [2:19:58<24:31:33, 10.75it/s]

{'loss': Array(0.16755067, dtype=float32), 'loss_cross_entropy': Array(0.15619482, dtype=float32)}


  5%|▌         | 50949/1000000 [2:20:00<28:51:58,  9.13it/s]

{'loss': Array(0.17245482, dtype=float32), 'loss_cross_entropy': Array(0.16107182, dtype=float32)}


  5%|▌         | 50959/1000000 [2:20:01<26:24:02,  9.99it/s]

{'loss': Array(0.15607868, dtype=float32), 'loss_cross_entropy': Array(0.14569592, dtype=float32)}


  5%|▌         | 50969/1000000 [2:20:03<41:00:54,  6.43it/s]

{'loss': Array(0.1676217, dtype=float32), 'loss_cross_entropy': Array(0.15581597, dtype=float32)}


  5%|▌         | 50979/1000000 [2:20:04<28:36:52,  9.21it/s]

{'loss': Array(0.18397944, dtype=float32), 'loss_cross_entropy': Array(0.17265494, dtype=float32)}


  5%|▌         | 50989/1000000 [2:20:06<25:17:58, 10.42it/s]

{'loss': Array(0.14953864, dtype=float32), 'loss_cross_entropy': Array(0.13870019, dtype=float32)}


  5%|▌         | 50999/1000000 [2:20:08<29:40:26,  8.88it/s]

{'loss': Array(0.18909514, dtype=float32), 'loss_cross_entropy': Array(0.1760299, dtype=float32)}


  5%|▌         | 51009/1000000 [2:20:15<85:15:26,  3.09it/s] 

{'loss': Array(0.19642673, dtype=float32), 'loss_cross_entropy': Array(0.18343893, dtype=float32)}


  5%|▌         | 51019/1000000 [2:20:17<48:18:02,  5.46it/s]

{'loss': Array(0.1602794, dtype=float32), 'loss_cross_entropy': Array(0.1499756, dtype=float32)}


  5%|▌         | 51029/1000000 [2:20:18<28:38:54,  9.20it/s]

{'loss': Array(0.175319, dtype=float32), 'loss_cross_entropy': Array(0.16450588, dtype=float32)}


  5%|▌         | 51039/1000000 [2:20:20<25:11:29, 10.46it/s]

{'loss': Array(0.17948426, dtype=float32), 'loss_cross_entropy': Array(0.16701376, dtype=float32)}


  5%|▌         | 51049/1000000 [2:20:21<28:13:07,  9.34it/s]

{'loss': Array(0.1694442, dtype=float32), 'loss_cross_entropy': Array(0.15874872, dtype=float32)}


  5%|▌         | 51059/1000000 [2:20:23<24:45:36, 10.65it/s]

{'loss': Array(0.17927699, dtype=float32), 'loss_cross_entropy': Array(0.16809987, dtype=float32)}


  5%|▌         | 51069/1000000 [2:20:24<38:33:07,  6.84it/s]

{'loss': Array(0.16817668, dtype=float32), 'loss_cross_entropy': Array(0.15682423, dtype=float32)}


  5%|▌         | 51079/1000000 [2:20:26<27:14:06,  9.68it/s]

{'loss': Array(0.1735297, dtype=float32), 'loss_cross_entropy': Array(0.1624078, dtype=float32)}


  5%|▌         | 51089/1000000 [2:20:27<24:40:46, 10.68it/s]

{'loss': Array(0.17017685, dtype=float32), 'loss_cross_entropy': Array(0.15837963, dtype=float32)}


  5%|▌         | 51099/1000000 [2:20:29<34:45:33,  7.58it/s]

{'loss': Array(0.18020676, dtype=float32), 'loss_cross_entropy': Array(0.16804102, dtype=float32)}


  5%|▌         | 51109/1000000 [2:20:30<26:23:38,  9.99it/s]

{'loss': Array(0.18608133, dtype=float32), 'loss_cross_entropy': Array(0.17375694, dtype=float32)}


  5%|▌         | 51119/1000000 [2:20:32<25:04:27, 10.51it/s]

{'loss': Array(0.17047608, dtype=float32), 'loss_cross_entropy': Array(0.15986465, dtype=float32)}


  5%|▌         | 51129/1000000 [2:20:33<29:19:16,  8.99it/s]

{'loss': Array(0.18203562, dtype=float32), 'loss_cross_entropy': Array(0.17105931, dtype=float32)}


  5%|▌         | 51139/1000000 [2:20:35<25:13:06, 10.45it/s]

{'loss': Array(0.16569969, dtype=float32), 'loss_cross_entropy': Array(0.15525459, dtype=float32)}


  5%|▌         | 51149/1000000 [2:20:36<31:24:04,  8.39it/s]

{'loss': Array(0.17686622, dtype=float32), 'loss_cross_entropy': Array(0.16498648, dtype=float32)}


  5%|▌         | 51159/1000000 [2:20:38<25:27:49, 10.35it/s]

{'loss': Array(0.1840206, dtype=float32), 'loss_cross_entropy': Array(0.17164244, dtype=float32)}


  5%|▌         | 51169/1000000 [2:20:39<24:37:39, 10.70it/s]

{'loss': Array(0.16600256, dtype=float32), 'loss_cross_entropy': Array(0.15426037, dtype=float32)}


  5%|▌         | 51179/1000000 [2:20:41<27:16:28,  9.66it/s]

{'loss': Array(0.1727936, dtype=float32), 'loss_cross_entropy': Array(0.16135012, dtype=float32)}


  5%|▌         | 51189/1000000 [2:20:42<24:30:47, 10.75it/s]

{'loss': Array(0.17117898, dtype=float32), 'loss_cross_entropy': Array(0.15983973, dtype=float32)}


  5%|▌         | 51199/1000000 [2:20:44<33:39:52,  7.83it/s]

{'loss': Array(0.186184, dtype=float32), 'loss_cross_entropy': Array(0.1736995, dtype=float32)}


  5%|▌         | 51209/1000000 [2:20:45<25:48:35, 10.21it/s]

{'loss': Array(0.17004746, dtype=float32), 'loss_cross_entropy': Array(0.15847127, dtype=float32)}


  5%|▌         | 51219/1000000 [2:20:46<25:05:16, 10.51it/s]

{'loss': Array(0.17036307, dtype=float32), 'loss_cross_entropy': Array(0.15902393, dtype=float32)}


  5%|▌         | 51229/1000000 [2:20:48<28:34:47,  9.22it/s]

{'loss': Array(0.16312008, dtype=float32), 'loss_cross_entropy': Array(0.15263747, dtype=float32)}


  5%|▌         | 51239/1000000 [2:20:50<25:20:07, 10.40it/s]

{'loss': Array(0.17069082, dtype=float32), 'loss_cross_entropy': Array(0.1599351, dtype=float32)}


  5%|▌         | 51249/1000000 [2:20:51<29:02:34,  9.07it/s]

{'loss': Array(0.14925277, dtype=float32), 'loss_cross_entropy': Array(0.1386744, dtype=float32)}


  5%|▌         | 51259/1000000 [2:20:53<25:43:03, 10.25it/s]

{'loss': Array(0.17015925, dtype=float32), 'loss_cross_entropy': Array(0.15882047, dtype=float32)}


  5%|▌         | 51269/1000000 [2:20:54<38:32:13,  6.84it/s]

{'loss': Array(0.17731343, dtype=float32), 'loss_cross_entropy': Array(0.16524868, dtype=float32)}


  5%|▌         | 51279/1000000 [2:20:56<26:43:06,  9.86it/s]

{'loss': Array(0.17219652, dtype=float32), 'loss_cross_entropy': Array(0.16132478, dtype=float32)}


  5%|▌         | 51289/1000000 [2:20:57<24:29:10, 10.76it/s]

{'loss': Array(0.15303184, dtype=float32), 'loss_cross_entropy': Array(0.14286447, dtype=float32)}


  5%|▌         | 51299/1000000 [2:20:59<32:01:09,  8.23it/s]

{'loss': Array(0.166001, dtype=float32), 'loss_cross_entropy': Array(0.15491195, dtype=float32)}


  5%|▌         | 51309/1000000 [2:21:00<26:31:53,  9.93it/s]

{'loss': Array(0.18885833, dtype=float32), 'loss_cross_entropy': Array(0.17719065, dtype=float32)}


  5%|▌         | 51319/1000000 [2:21:02<35:53:12,  7.34it/s]

{'loss': Array(0.16557126, dtype=float32), 'loss_cross_entropy': Array(0.15424298, dtype=float32)}


  5%|▌         | 51329/1000000 [2:21:03<27:03:51,  9.74it/s]

{'loss': Array(0.17007558, dtype=float32), 'loss_cross_entropy': Array(0.15905449, dtype=float32)}


  5%|▌         | 51339/1000000 [2:21:05<36:06:32,  7.30it/s]

{'loss': Array(0.16592689, dtype=float32), 'loss_cross_entropy': Array(0.15483111, dtype=float32)}


  5%|▌         | 51349/1000000 [2:21:06<26:54:54,  9.79it/s]

{'loss': Array(0.16678739, dtype=float32), 'loss_cross_entropy': Array(0.15593015, dtype=float32)}


  5%|▌         | 51359/1000000 [2:21:08<25:09:42, 10.47it/s]

{'loss': Array(0.15270875, dtype=float32), 'loss_cross_entropy': Array(0.14192776, dtype=float32)}


  5%|▌         | 51369/1000000 [2:21:10<30:03:41,  8.77it/s]

{'loss': Array(0.17070667, dtype=float32), 'loss_cross_entropy': Array(0.15966716, dtype=float32)}


  5%|▌         | 51379/1000000 [2:21:11<25:00:52, 10.53it/s]

{'loss': Array(0.18599518, dtype=float32), 'loss_cross_entropy': Array(0.17424695, dtype=float32)}


  5%|▌         | 51389/1000000 [2:21:13<31:05:39,  8.47it/s]

{'loss': Array(0.18094774, dtype=float32), 'loss_cross_entropy': Array(0.17005554, dtype=float32)}


  5%|▌         | 51399/1000000 [2:21:14<25:51:54, 10.19it/s]

{'loss': Array(0.18049537, dtype=float32), 'loss_cross_entropy': Array(0.169098, dtype=float32)}


  5%|▌         | 51409/1000000 [2:21:16<39:12:29,  6.72it/s]

{'loss': Array(0.18750916, dtype=float32), 'loss_cross_entropy': Array(0.17548327, dtype=float32)}


  5%|▌         | 51419/1000000 [2:21:17<26:41:57,  9.87it/s]

{'loss': Array(0.17855875, dtype=float32), 'loss_cross_entropy': Array(0.16727108, dtype=float32)}


  5%|▌         | 51429/1000000 [2:21:19<39:20:15,  6.70it/s]

{'loss': Array(0.17287792, dtype=float32), 'loss_cross_entropy': Array(0.16162024, dtype=float32)}


  5%|▌         | 51439/1000000 [2:21:20<26:53:21,  9.80it/s]

{'loss': Array(0.1603997, dtype=float32), 'loss_cross_entropy': Array(0.15004228, dtype=float32)}


  5%|▌         | 51449/1000000 [2:21:22<24:44:09, 10.65it/s]

{'loss': Array(0.18680008, dtype=float32), 'loss_cross_entropy': Array(0.17458032, dtype=float32)}


  5%|▌         | 51459/1000000 [2:21:23<34:22:15,  7.67it/s]

{'loss': Array(0.18333316, dtype=float32), 'loss_cross_entropy': Array(0.17234918, dtype=float32)}


  5%|▌         | 51469/1000000 [2:21:25<26:08:00, 10.08it/s]

{'loss': Array(0.16221608, dtype=float32), 'loss_cross_entropy': Array(0.15102349, dtype=float32)}


  5%|▌         | 51479/1000000 [2:21:26<39:03:20,  6.75it/s]

{'loss': Array(0.15110928, dtype=float32), 'loss_cross_entropy': Array(0.14083493, dtype=float32)}


  5%|▌         | 51489/1000000 [2:21:28<26:43:13,  9.86it/s]

{'loss': Array(0.1735071, dtype=float32), 'loss_cross_entropy': Array(0.16201846, dtype=float32)}


  5%|▌         | 51499/1000000 [2:21:29<24:52:19, 10.59it/s]

{'loss': Array(0.16505823, dtype=float32), 'loss_cross_entropy': Array(0.15374681, dtype=float32)}


  5%|▌         | 51509/1000000 [2:21:37<87:25:48,  3.01it/s] 

{'loss': Array(0.17703204, dtype=float32), 'loss_cross_entropy': Array(0.16598001, dtype=float32)}


  5%|▌         | 51519/1000000 [2:21:38<35:08:46,  7.50it/s]

{'loss': Array(0.15699105, dtype=float32), 'loss_cross_entropy': Array(0.14606842, dtype=float32)}


  5%|▌         | 51529/1000000 [2:21:40<29:46:34,  8.85it/s]

{'loss': Array(0.16910367, dtype=float32), 'loss_cross_entropy': Array(0.15819348, dtype=float32)}


  5%|▌         | 51539/1000000 [2:21:42<25:27:27, 10.35it/s]

{'loss': Array(0.17042178, dtype=float32), 'loss_cross_entropy': Array(0.15927425, dtype=float32)}


  5%|▌         | 51549/1000000 [2:21:43<24:40:33, 10.68it/s]

{'loss': Array(0.15561984, dtype=float32), 'loss_cross_entropy': Array(0.14519605, dtype=float32)}


  5%|▌         | 51559/1000000 [2:21:45<27:44:45,  9.50it/s]

{'loss': Array(0.18484765, dtype=float32), 'loss_cross_entropy': Array(0.17235856, dtype=float32)}


  5%|▌         | 51569/1000000 [2:21:46<25:15:26, 10.43it/s]

{'loss': Array(0.16130751, dtype=float32), 'loss_cross_entropy': Array(0.15037724, dtype=float32)}


  5%|▌         | 51579/1000000 [2:21:48<31:19:52,  8.41it/s]

{'loss': Array(0.17007719, dtype=float32), 'loss_cross_entropy': Array(0.15907091, dtype=float32)}


  5%|▌         | 51589/1000000 [2:21:49<26:08:23, 10.08it/s]

{'loss': Array(0.16860984, dtype=float32), 'loss_cross_entropy': Array(0.15765269, dtype=float32)}


  5%|▌         | 51599/1000000 [2:21:51<38:55:47,  6.77it/s]

{'loss': Array(0.17652217, dtype=float32), 'loss_cross_entropy': Array(0.16432635, dtype=float32)}


  5%|▌         | 51609/1000000 [2:21:52<26:38:38,  9.89it/s]

{'loss': Array(0.1615182, dtype=float32), 'loss_cross_entropy': Array(0.15091603, dtype=float32)}


  5%|▌         | 51619/1000000 [2:21:53<25:12:35, 10.45it/s]

{'loss': Array(0.16629739, dtype=float32), 'loss_cross_entropy': Array(0.15526344, dtype=float32)}


  5%|▌         | 51629/1000000 [2:21:55<28:27:31,  9.26it/s]

{'loss': Array(0.18705752, dtype=float32), 'loss_cross_entropy': Array(0.17543738, dtype=float32)}


  5%|▌         | 51639/1000000 [2:21:57<24:51:08, 10.60it/s]

{'loss': Array(0.17907742, dtype=float32), 'loss_cross_entropy': Array(0.16693634, dtype=float32)}


  5%|▌         | 51649/1000000 [2:21:58<31:00:22,  8.50it/s]

{'loss': Array(0.16205879, dtype=float32), 'loss_cross_entropy': Array(0.15102449, dtype=float32)}


  5%|▌         | 51659/1000000 [2:22:00<26:51:42,  9.81it/s]

{'loss': Array(0.1638618, dtype=float32), 'loss_cross_entropy': Array(0.15310457, dtype=float32)}


  5%|▌         | 51669/1000000 [2:22:01<26:00:11, 10.13it/s]

{'loss': Array(0.1828091, dtype=float32), 'loss_cross_entropy': Array(0.17074686, dtype=float32)}


  5%|▌         | 51679/1000000 [2:22:03<29:48:21,  8.84it/s]

{'loss': Array(0.15684904, dtype=float32), 'loss_cross_entropy': Array(0.146286, dtype=float32)}


  5%|▌         | 51689/1000000 [2:22:04<26:58:09,  9.77it/s]

{'loss': Array(0.17628084, dtype=float32), 'loss_cross_entropy': Array(0.1651709, dtype=float32)}


  5%|▌         | 51699/1000000 [2:22:06<39:38:44,  6.64it/s]

{'loss': Array(0.15931468, dtype=float32), 'loss_cross_entropy': Array(0.14816199, dtype=float32)}


  5%|▌         | 51709/1000000 [2:22:07<26:48:32,  9.83it/s]

{'loss': Array(0.16984232, dtype=float32), 'loss_cross_entropy': Array(0.15776764, dtype=float32)}


  5%|▌         | 51719/1000000 [2:22:09<24:34:40, 10.72it/s]

{'loss': Array(0.15971135, dtype=float32), 'loss_cross_entropy': Array(0.14993474, dtype=float32)}


  5%|▌         | 51729/1000000 [2:22:10<31:20:49,  8.40it/s]

{'loss': Array(0.163114, dtype=float32), 'loss_cross_entropy': Array(0.1522458, dtype=float32)}


  5%|▌         | 51739/1000000 [2:22:12<25:24:47, 10.36it/s]

{'loss': Array(0.15159723, dtype=float32), 'loss_cross_entropy': Array(0.1408559, dtype=float32)}


  5%|▌         | 51749/1000000 [2:22:14<34:34:09,  7.62it/s]

{'loss': Array(0.16729918, dtype=float32), 'loss_cross_entropy': Array(0.15617202, dtype=float32)}


  5%|▌         | 51759/1000000 [2:22:15<27:45:11,  9.49it/s]

{'loss': Array(0.14732616, dtype=float32), 'loss_cross_entropy': Array(0.13743411, dtype=float32)}


  5%|▌         | 51769/1000000 [2:22:17<35:13:32,  7.48it/s]

{'loss': Array(0.15921797, dtype=float32), 'loss_cross_entropy': Array(0.14845124, dtype=float32)}


  5%|▌         | 51779/1000000 [2:22:18<25:46:11, 10.22it/s]

{'loss': Array(0.16569844, dtype=float32), 'loss_cross_entropy': Array(0.15398394, dtype=float32)}


  5%|▌         | 51789/1000000 [2:22:19<24:48:39, 10.62it/s]

{'loss': Array(0.16053504, dtype=float32), 'loss_cross_entropy': Array(0.15005045, dtype=float32)}


  5%|▌         | 51799/1000000 [2:22:21<29:32:41,  8.91it/s]

{'loss': Array(0.1740378, dtype=float32), 'loss_cross_entropy': Array(0.16202699, dtype=float32)}


  5%|▌         | 51809/1000000 [2:22:22<25:16:28, 10.42it/s]

{'loss': Array(0.15904024, dtype=float32), 'loss_cross_entropy': Array(0.1486167, dtype=float32)}


  5%|▌         | 51819/1000000 [2:22:24<30:48:58,  8.55it/s]

{'loss': Array(0.1596468, dtype=float32), 'loss_cross_entropy': Array(0.14897059, dtype=float32)}


  5%|▌         | 51829/1000000 [2:22:25<25:23:14, 10.37it/s]

{'loss': Array(0.16592696, dtype=float32), 'loss_cross_entropy': Array(0.15551592, dtype=float32)}


  5%|▌         | 51839/1000000 [2:22:27<38:08:38,  6.90it/s]

{'loss': Array(0.16499181, dtype=float32), 'loss_cross_entropy': Array(0.15365897, dtype=float32)}


  5%|▌         | 51849/1000000 [2:22:28<26:33:08,  9.92it/s]

{'loss': Array(0.1830596, dtype=float32), 'loss_cross_entropy': Array(0.17242469, dtype=float32)}


  5%|▌         | 51859/1000000 [2:22:30<38:28:24,  6.85it/s]

{'loss': Array(0.20022897, dtype=float32), 'loss_cross_entropy': Array(0.18733059, dtype=float32)}


  5%|▌         | 51869/1000000 [2:22:32<26:40:40,  9.87it/s]

{'loss': Array(0.16214682, dtype=float32), 'loss_cross_entropy': Array(0.1512331, dtype=float32)}


  5%|▌         | 51879/1000000 [2:22:33<24:33:35, 10.72it/s]

{'loss': Array(0.14532702, dtype=float32), 'loss_cross_entropy': Array(0.13492513, dtype=float32)}


  5%|▌         | 51889/1000000 [2:22:35<34:39:29,  7.60it/s]

{'loss': Array(0.16356693, dtype=float32), 'loss_cross_entropy': Array(0.15196675, dtype=float32)}


  5%|▌         | 51899/1000000 [2:22:36<26:09:13, 10.07it/s]

{'loss': Array(0.1685801, dtype=float32), 'loss_cross_entropy': Array(0.15736082, dtype=float32)}


  5%|▌         | 51909/1000000 [2:22:38<39:32:18,  6.66it/s]

{'loss': Array(0.1532799, dtype=float32), 'loss_cross_entropy': Array(0.14301552, dtype=float32)}


  5%|▌         | 51919/1000000 [2:22:39<27:03:57,  9.73it/s]

{'loss': Array(0.18175577, dtype=float32), 'loss_cross_entropy': Array(0.170009, dtype=float32)}


  5%|▌         | 51929/1000000 [2:22:40<24:55:14, 10.57it/s]

{'loss': Array(0.18069664, dtype=float32), 'loss_cross_entropy': Array(0.16828291, dtype=float32)}


  5%|▌         | 51939/1000000 [2:22:42<27:21:27,  9.63it/s]

{'loss': Array(0.16376422, dtype=float32), 'loss_cross_entropy': Array(0.15255733, dtype=float32)}


  5%|▌         | 51949/1000000 [2:22:43<24:50:00, 10.60it/s]

{'loss': Array(0.15737972, dtype=float32), 'loss_cross_entropy': Array(0.1465394, dtype=float32)}


  5%|▌         | 51959/1000000 [2:22:45<27:35:01,  9.55it/s]

{'loss': Array(0.15735723, dtype=float32), 'loss_cross_entropy': Array(0.14739601, dtype=float32)}


  5%|▌         | 51969/1000000 [2:22:46<24:47:32, 10.62it/s]

{'loss': Array(0.16227536, dtype=float32), 'loss_cross_entropy': Array(0.15116443, dtype=float32)}


  5%|▌         | 51979/1000000 [2:22:48<38:10:13,  6.90it/s]

{'loss': Array(0.17320709, dtype=float32), 'loss_cross_entropy': Array(0.16189869, dtype=float32)}


  5%|▌         | 51989/1000000 [2:22:50<27:10:42,  9.69it/s]

{'loss': Array(0.14577666, dtype=float32), 'loss_cross_entropy': Array(0.13558613, dtype=float32)}


  5%|▌         | 51999/1000000 [2:22:51<24:48:03, 10.62it/s]

{'loss': Array(0.17751043, dtype=float32), 'loss_cross_entropy': Array(0.16583718, dtype=float32)}


  5%|▌         | 52009/1000000 [2:22:59<96:21:37,  2.73it/s] 

{'loss': Array(0.1970945, dtype=float32), 'loss_cross_entropy': Array(0.1835119, dtype=float32)}


  5%|▌         | 52019/1000000 [2:23:01<36:57:04,  7.13it/s] 

{'loss': Array(0.18225391, dtype=float32), 'loss_cross_entropy': Array(0.17036755, dtype=float32)}


  5%|▌         | 52029/1000000 [2:23:03<41:13:37,  6.39it/s]

{'loss': Array(0.17799403, dtype=float32), 'loss_cross_entropy': Array(0.16652337, dtype=float32)}


  5%|▌         | 52039/1000000 [2:23:04<27:09:53,  9.69it/s]

{'loss': Array(0.17137228, dtype=float32), 'loss_cross_entropy': Array(0.16029571, dtype=float32)}


  5%|▌         | 52049/1000000 [2:23:05<25:05:24, 10.49it/s]

{'loss': Array(0.17842643, dtype=float32), 'loss_cross_entropy': Array(0.16675723, dtype=float32)}


  5%|▌         | 52059/1000000 [2:23:07<34:49:18,  7.56it/s]

{'loss': Array(0.17393182, dtype=float32), 'loss_cross_entropy': Array(0.1623221, dtype=float32)}


  5%|▌         | 52069/1000000 [2:23:08<27:21:11,  9.63it/s]

{'loss': Array(0.16578579, dtype=float32), 'loss_cross_entropy': Array(0.15479974, dtype=float32)}


  5%|▌         | 52079/1000000 [2:23:10<25:45:32, 10.22it/s]

{'loss': Array(0.15623516, dtype=float32), 'loss_cross_entropy': Array(0.14601336, dtype=float32)}


  5%|▌         | 52089/1000000 [2:23:12<30:18:46,  8.69it/s]

{'loss': Array(0.14888899, dtype=float32), 'loss_cross_entropy': Array(0.13888521, dtype=float32)}


  5%|▌         | 52099/1000000 [2:23:13<25:35:05, 10.29it/s]

{'loss': Array(0.16955088, dtype=float32), 'loss_cross_entropy': Array(0.1582455, dtype=float32)}


  5%|▌         | 52109/1000000 [2:23:15<31:44:56,  8.29it/s]

{'loss': Array(0.18188082, dtype=float32), 'loss_cross_entropy': Array(0.17010316, dtype=float32)}


  5%|▌         | 52119/1000000 [2:23:16<25:28:36, 10.33it/s]

{'loss': Array(0.17805985, dtype=float32), 'loss_cross_entropy': Array(0.16660564, dtype=float32)}


  5%|▌         | 52129/1000000 [2:23:17<25:03:59, 10.50it/s]

{'loss': Array(0.14268175, dtype=float32), 'loss_cross_entropy': Array(0.13281587, dtype=float32)}


  5%|▌         | 52139/1000000 [2:23:19<28:03:24,  9.38it/s]

{'loss': Array(0.16646455, dtype=float32), 'loss_cross_entropy': Array(0.15549059, dtype=float32)}


  5%|▌         | 52149/1000000 [2:23:21<26:28:41,  9.94it/s]

{'loss': Array(0.16742274, dtype=float32), 'loss_cross_entropy': Array(0.1553881, dtype=float32)}


  5%|▌         | 52159/1000000 [2:23:22<34:44:03,  7.58it/s]

{'loss': Array(0.17842868, dtype=float32), 'loss_cross_entropy': Array(0.1671691, dtype=float32)}


  5%|▌         | 52169/1000000 [2:23:24<26:52:46,  9.80it/s]

{'loss': Array(0.17238414, dtype=float32), 'loss_cross_entropy': Array(0.16158131, dtype=float32)}


  5%|▌         | 52179/1000000 [2:23:25<24:47:23, 10.62it/s]

{'loss': Array(0.1582775, dtype=float32), 'loss_cross_entropy': Array(0.14805733, dtype=float32)}


  5%|▌         | 52189/1000000 [2:23:27<27:22:02,  9.62it/s]

{'loss': Array(0.16093664, dtype=float32), 'loss_cross_entropy': Array(0.15080516, dtype=float32)}


  5%|▌         | 52199/1000000 [2:23:28<24:39:09, 10.68it/s]

{'loss': Array(0.16893259, dtype=float32), 'loss_cross_entropy': Array(0.15817113, dtype=float32)}


  5%|▌         | 52209/1000000 [2:23:30<29:19:30,  8.98it/s]

{'loss': Array(0.17953609, dtype=float32), 'loss_cross_entropy': Array(0.16791679, dtype=float32)}


  5%|▌         | 52219/1000000 [2:23:31<24:54:17, 10.57it/s]

{'loss': Array(0.16311666, dtype=float32), 'loss_cross_entropy': Array(0.15226743, dtype=float32)}


  5%|▌         | 52229/1000000 [2:23:33<38:43:43,  6.80it/s]

{'loss': Array(0.16806419, dtype=float32), 'loss_cross_entropy': Array(0.15737078, dtype=float32)}


  5%|▌         | 52239/1000000 [2:23:34<26:57:42,  9.76it/s]

{'loss': Array(0.15687372, dtype=float32), 'loss_cross_entropy': Array(0.1467007, dtype=float32)}


  5%|▌         | 52249/1000000 [2:23:36<24:44:11, 10.64it/s]

{'loss': Array(0.17769225, dtype=float32), 'loss_cross_entropy': Array(0.16516523, dtype=float32)}


  5%|▌         | 52259/1000000 [2:23:37<31:39:17,  8.32it/s]

{'loss': Array(0.17330913, dtype=float32), 'loss_cross_entropy': Array(0.16161527, dtype=float32)}


  5%|▌         | 52269/1000000 [2:23:39<25:40:01, 10.26it/s]

{'loss': Array(0.15378474, dtype=float32), 'loss_cross_entropy': Array(0.14266916, dtype=float32)}


  5%|▌         | 52279/1000000 [2:23:40<34:32:12,  7.62it/s]

{'loss': Array(0.15382385, dtype=float32), 'loss_cross_entropy': Array(0.14327411, dtype=float32)}


  5%|▌         | 52289/1000000 [2:23:42<25:36:20, 10.28it/s]

{'loss': Array(0.15949886, dtype=float32), 'loss_cross_entropy': Array(0.14819987, dtype=float32)}


  5%|▌         | 52299/1000000 [2:23:43<34:23:17,  7.66it/s]

{'loss': Array(0.17038937, dtype=float32), 'loss_cross_entropy': Array(0.15933302, dtype=float32)}


  5%|▌         | 52309/1000000 [2:23:45<25:49:32, 10.19it/s]

{'loss': Array(0.16188744, dtype=float32), 'loss_cross_entropy': Array(0.15141426, dtype=float32)}


  5%|▌         | 52319/1000000 [2:23:46<24:54:06, 10.57it/s]

{'loss': Array(0.15427022, dtype=float32), 'loss_cross_entropy': Array(0.1436152, dtype=float32)}


  5%|▌         | 52329/1000000 [2:23:48<28:56:58,  9.09it/s]

{'loss': Array(0.17412189, dtype=float32), 'loss_cross_entropy': Array(0.16316499, dtype=float32)}


  5%|▌         | 52339/1000000 [2:23:49<25:20:58, 10.38it/s]

{'loss': Array(0.19554289, dtype=float32), 'loss_cross_entropy': Array(0.18266857, dtype=float32)}


  5%|▌         | 52349/1000000 [2:23:51<31:46:54,  8.28it/s]

{'loss': Array(0.16708851, dtype=float32), 'loss_cross_entropy': Array(0.15635893, dtype=float32)}


  5%|▌         | 52359/1000000 [2:23:52<25:40:50, 10.25it/s]

{'loss': Array(0.16861583, dtype=float32), 'loss_cross_entropy': Array(0.1577106, dtype=float32)}


  5%|▌         | 52369/1000000 [2:23:54<38:55:28,  6.76it/s]

{'loss': Array(0.17688094, dtype=float32), 'loss_cross_entropy': Array(0.16560559, dtype=float32)}


  5%|▌         | 52379/1000000 [2:23:55<26:56:19,  9.77it/s]

{'loss': Array(0.16040431, dtype=float32), 'loss_cross_entropy': Array(0.14911254, dtype=float32)}


  5%|▌         | 52389/1000000 [2:23:57<39:12:37,  6.71it/s]

{'loss': Array(0.16581158, dtype=float32), 'loss_cross_entropy': Array(0.15447246, dtype=float32)}


  5%|▌         | 52399/1000000 [2:23:58<27:20:23,  9.63it/s]

{'loss': Array(0.15327916, dtype=float32), 'loss_cross_entropy': Array(0.14289796, dtype=float32)}


  5%|▌         | 52409/1000000 [2:24:00<25:43:39, 10.23it/s]

{'loss': Array(0.17690444, dtype=float32), 'loss_cross_entropy': Array(0.16579294, dtype=float32)}


  5%|▌         | 52419/1000000 [2:24:02<35:35:17,  7.40it/s]

{'loss': Array(0.15281929, dtype=float32), 'loss_cross_entropy': Array(0.14270318, dtype=float32)}


  5%|▌         | 52429/1000000 [2:24:03<27:40:45,  9.51it/s]

{'loss': Array(0.1610316, dtype=float32), 'loss_cross_entropy': Array(0.15078664, dtype=float32)}


  5%|▌         | 52439/1000000 [2:24:05<40:26:48,  6.51it/s]

{'loss': Array(0.18663853, dtype=float32), 'loss_cross_entropy': Array(0.17508285, dtype=float32)}


  5%|▌         | 52449/1000000 [2:24:06<27:08:33,  9.70it/s]

{'loss': Array(0.1638454, dtype=float32), 'loss_cross_entropy': Array(0.15317053, dtype=float32)}


  5%|▌         | 52459/1000000 [2:24:08<24:54:37, 10.57it/s]

{'loss': Array(0.15946084, dtype=float32), 'loss_cross_entropy': Array(0.14903836, dtype=float32)}


  5%|▌         | 52469/1000000 [2:24:09<27:59:29,  9.40it/s]

{'loss': Array(0.16673368, dtype=float32), 'loss_cross_entropy': Array(0.15550931, dtype=float32)}


  5%|▌         | 52479/1000000 [2:24:11<25:03:06, 10.51it/s]

{'loss': Array(0.18133584, dtype=float32), 'loss_cross_entropy': Array(0.1700596, dtype=float32)}


  5%|▌         | 52489/1000000 [2:24:12<28:02:08,  9.39it/s]

{'loss': Array(0.15871663, dtype=float32), 'loss_cross_entropy': Array(0.1481479, dtype=float32)}


  5%|▌         | 52499/1000000 [2:24:14<25:19:53, 10.39it/s]

{'loss': Array(0.16933368, dtype=float32), 'loss_cross_entropy': Array(0.15789372, dtype=float32)}


  5%|▌         | 52509/1000000 [2:24:22<98:55:04,  2.66it/s] 

{'loss': Array(0.16882496, dtype=float32), 'loss_cross_entropy': Array(0.15841666, dtype=float32)}


  5%|▌         | 52519/1000000 [2:24:23<37:38:30,  6.99it/s] 

{'loss': Array(0.1814134, dtype=float32), 'loss_cross_entropy': Array(0.16946253, dtype=float32)}


  5%|▌         | 52529/1000000 [2:24:25<27:26:12,  9.59it/s]

{'loss': Array(0.1684402, dtype=float32), 'loss_cross_entropy': Array(0.15744932, dtype=float32)}


  5%|▌         | 52539/1000000 [2:24:26<35:18:41,  7.45it/s]

{'loss': Array(0.16306335, dtype=float32), 'loss_cross_entropy': Array(0.15232871, dtype=float32)}


  5%|▌         | 52549/1000000 [2:24:28<26:08:03, 10.07it/s]

{'loss': Array(0.15399888, dtype=float32), 'loss_cross_entropy': Array(0.14384134, dtype=float32)}


  5%|▌         | 52559/1000000 [2:24:29<24:44:30, 10.64it/s]

{'loss': Array(0.1783043, dtype=float32), 'loss_cross_entropy': Array(0.16653119, dtype=float32)}


  5%|▌         | 52569/1000000 [2:24:31<28:27:03,  9.25it/s]

{'loss': Array(0.17030112, dtype=float32), 'loss_cross_entropy': Array(0.15862255, dtype=float32)}


  5%|▌         | 52579/1000000 [2:24:32<24:48:03, 10.61it/s]

{'loss': Array(0.15494578, dtype=float32), 'loss_cross_entropy': Array(0.14357117, dtype=float32)}


  5%|▌         | 52589/1000000 [2:24:34<31:44:32,  8.29it/s]

{'loss': Array(0.15981063, dtype=float32), 'loss_cross_entropy': Array(0.15023182, dtype=float32)}


  5%|▌         | 52599/1000000 [2:24:35<26:00:34, 10.12it/s]

{'loss': Array(0.16938715, dtype=float32), 'loss_cross_entropy': Array(0.15815896, dtype=float32)}


  5%|▌         | 52609/1000000 [2:24:37<35:11:35,  7.48it/s]

{'loss': Array(0.16761184, dtype=float32), 'loss_cross_entropy': Array(0.15610489, dtype=float32)}


  5%|▌         | 52619/1000000 [2:24:38<25:59:27, 10.13it/s]

{'loss': Array(0.18719263, dtype=float32), 'loss_cross_entropy': Array(0.17532077, dtype=float32)}


  5%|▌         | 52629/1000000 [2:24:40<24:16:11, 10.84it/s]

{'loss': Array(0.17712083, dtype=float32), 'loss_cross_entropy': Array(0.16509764, dtype=float32)}


  5%|▌         | 52639/1000000 [2:24:42<31:40:55,  8.31it/s]

{'loss': Array(0.1792205, dtype=float32), 'loss_cross_entropy': Array(0.1681386, dtype=float32)}


  5%|▌         | 52649/1000000 [2:24:43<26:01:09, 10.11it/s]

{'loss': Array(0.17083769, dtype=float32), 'loss_cross_entropy': Array(0.15970631, dtype=float32)}


  5%|▌         | 52659/1000000 [2:24:44<24:57:05, 10.55it/s]

{'loss': Array(0.15146597, dtype=float32), 'loss_cross_entropy': Array(0.14091195, dtype=float32)}


  5%|▌         | 52669/1000000 [2:24:46<28:15:51,  9.31it/s]

{'loss': Array(0.17580925, dtype=float32), 'loss_cross_entropy': Array(0.16426028, dtype=float32)}


  5%|▌         | 52679/1000000 [2:24:47<25:09:13, 10.46it/s]

{'loss': Array(0.17963599, dtype=float32), 'loss_cross_entropy': Array(0.16736086, dtype=float32)}


  5%|▌         | 52689/1000000 [2:24:49<31:15:48,  8.42it/s]

{'loss': Array(0.16333081, dtype=float32), 'loss_cross_entropy': Array(0.1520693, dtype=float32)}


  5%|▌         | 52699/1000000 [2:24:51<26:10:54, 10.05it/s]

{'loss': Array(0.1649758, dtype=float32), 'loss_cross_entropy': Array(0.15483448, dtype=float32)}


  5%|▌         | 52709/1000000 [2:24:52<38:42:52,  6.80it/s]

{'loss': Array(0.1654962, dtype=float32), 'loss_cross_entropy': Array(0.15490793, dtype=float32)}


  5%|▌         | 52719/1000000 [2:24:54<27:02:26,  9.73it/s]

{'loss': Array(0.16563627, dtype=float32), 'loss_cross_entropy': Array(0.15450397, dtype=float32)}


  5%|▌         | 52729/1000000 [2:24:55<24:28:15, 10.75it/s]

{'loss': Array(0.17055218, dtype=float32), 'loss_cross_entropy': Array(0.1592121, dtype=float32)}


  5%|▌         | 52739/1000000 [2:24:57<33:42:44,  7.81it/s]

{'loss': Array(0.1556838, dtype=float32), 'loss_cross_entropy': Array(0.14527957, dtype=float32)}


  5%|▌         | 52749/1000000 [2:24:58<25:46:30, 10.21it/s]

{'loss': Array(0.19077514, dtype=float32), 'loss_cross_entropy': Array(0.17830744, dtype=float32)}


  5%|▌         | 52759/1000000 [2:24:59<25:25:50, 10.35it/s]

{'loss': Array(0.1542533, dtype=float32), 'loss_cross_entropy': Array(0.14336853, dtype=float32)}


  5%|▌         | 52769/1000000 [2:25:01<30:12:34,  8.71it/s]

{'loss': Array(0.15651067, dtype=float32), 'loss_cross_entropy': Array(0.14619762, dtype=float32)}


  5%|▌         | 52779/1000000 [2:25:03<26:51:19,  9.80it/s]

{'loss': Array(0.17022924, dtype=float32), 'loss_cross_entropy': Array(0.15958223, dtype=float32)}


  5%|▌         | 52789/1000000 [2:25:04<32:52:30,  8.00it/s]

{'loss': Array(0.18274811, dtype=float32), 'loss_cross_entropy': Array(0.17146568, dtype=float32)}


  5%|▌         | 52799/1000000 [2:25:06<26:22:42,  9.97it/s]

{'loss': Array(0.16027145, dtype=float32), 'loss_cross_entropy': Array(0.14943634, dtype=float32)}


  5%|▌         | 52809/1000000 [2:25:07<24:29:44, 10.74it/s]

{'loss': Array(0.15669213, dtype=float32), 'loss_cross_entropy': Array(0.14605363, dtype=float32)}


  5%|▌         | 52819/1000000 [2:25:09<28:08:15,  9.35it/s]

{'loss': Array(0.16048357, dtype=float32), 'loss_cross_entropy': Array(0.1497291, dtype=float32)}


  5%|▌         | 52829/1000000 [2:25:10<25:04:39, 10.49it/s]

{'loss': Array(0.17083298, dtype=float32), 'loss_cross_entropy': Array(0.16031606, dtype=float32)}


  5%|▌         | 52839/1000000 [2:25:12<35:10:28,  7.48it/s]

{'loss': Array(0.16300084, dtype=float32), 'loss_cross_entropy': Array(0.15284993, dtype=float32)}


  5%|▌         | 52849/1000000 [2:25:13<26:24:53,  9.96it/s]

{'loss': Array(0.17726223, dtype=float32), 'loss_cross_entropy': Array(0.16590767, dtype=float32)}


  5%|▌         | 52859/1000000 [2:25:15<25:07:09, 10.47it/s]

{'loss': Array(0.18381763, dtype=float32), 'loss_cross_entropy': Array(0.17215143, dtype=float32)}


  5%|▌         | 52869/1000000 [2:25:16<28:10:23,  9.34it/s]

{'loss': Array(0.15378283, dtype=float32), 'loss_cross_entropy': Array(0.14435066, dtype=float32)}


  5%|▌         | 52879/1000000 [2:25:18<24:44:48, 10.63it/s]

{'loss': Array(0.1686436, dtype=float32), 'loss_cross_entropy': Array(0.15785122, dtype=float32)}


  5%|▌         | 52889/1000000 [2:25:20<29:48:33,  8.83it/s]

{'loss': Array(0.18404865, dtype=float32), 'loss_cross_entropy': Array(0.17280617, dtype=float32)}


  5%|▌         | 52899/1000000 [2:25:21<26:03:18, 10.10it/s]

{'loss': Array(0.17123488, dtype=float32), 'loss_cross_entropy': Array(0.15988092, dtype=float32)}


  5%|▌         | 52909/1000000 [2:25:23<38:59:10,  6.75it/s]

{'loss': Array(0.18417878, dtype=float32), 'loss_cross_entropy': Array(0.17284174, dtype=float32)}


  5%|▌         | 52919/1000000 [2:25:24<26:26:37,  9.95it/s]

{'loss': Array(0.15138578, dtype=float32), 'loss_cross_entropy': Array(0.14169572, dtype=float32)}


  5%|▌         | 52929/1000000 [2:25:25<24:54:17, 10.56it/s]

{'loss': Array(0.17616294, dtype=float32), 'loss_cross_entropy': Array(0.16486603, dtype=float32)}


  5%|▌         | 52939/1000000 [2:25:27<31:08:58,  8.45it/s]

{'loss': Array(0.15772384, dtype=float32), 'loss_cross_entropy': Array(0.14745057, dtype=float32)}


  5%|▌         | 52949/1000000 [2:25:28<25:21:55, 10.37it/s]

{'loss': Array(0.16084132, dtype=float32), 'loss_cross_entropy': Array(0.14989412, dtype=float32)}


  5%|▌         | 52959/1000000 [2:25:30<34:27:29,  7.63it/s]

{'loss': Array(0.1682999, dtype=float32), 'loss_cross_entropy': Array(0.15712665, dtype=float32)}


  5%|▌         | 52969/1000000 [2:25:31<26:23:25,  9.97it/s]

{'loss': Array(0.1508102, dtype=float32), 'loss_cross_entropy': Array(0.14129627, dtype=float32)}


  5%|▌         | 52979/1000000 [2:25:33<34:21:07,  7.66it/s]

{'loss': Array(0.15264577, dtype=float32), 'loss_cross_entropy': Array(0.14243641, dtype=float32)}


  5%|▌         | 52989/1000000 [2:25:34<25:47:04, 10.20it/s]

{'loss': Array(0.16921695, dtype=float32), 'loss_cross_entropy': Array(0.15788089, dtype=float32)}


  5%|▌         | 52999/1000000 [2:25:36<24:20:32, 10.81it/s]

{'loss': Array(0.17762059, dtype=float32), 'loss_cross_entropy': Array(0.16583438, dtype=float32)}


  5%|▌         | 53009/1000000 [2:25:44<91:19:55,  2.88it/s] 

{'loss': Array(0.17395894, dtype=float32), 'loss_cross_entropy': Array(0.16276067, dtype=float32)}


  5%|▌         | 53019/1000000 [2:25:45<35:52:40,  7.33it/s]

{'loss': Array(0.15885933, dtype=float32), 'loss_cross_entropy': Array(0.14825472, dtype=float32)}


  5%|▌         | 53029/1000000 [2:25:47<33:20:38,  7.89it/s]

{'loss': Array(0.17838427, dtype=float32), 'loss_cross_entropy': Array(0.16715996, dtype=float32)}


  5%|▌         | 53039/1000000 [2:25:49<26:17:42, 10.00it/s]

{'loss': Array(0.17893338, dtype=float32), 'loss_cross_entropy': Array(0.16712955, dtype=float32)}


  5%|▌         | 53049/1000000 [2:25:50<39:42:48,  6.62it/s]

{'loss': Array(0.1931151, dtype=float32), 'loss_cross_entropy': Array(0.18079379, dtype=float32)}


  5%|▌         | 53059/1000000 [2:25:52<26:39:03,  9.87it/s]

{'loss': Array(0.17647317, dtype=float32), 'loss_cross_entropy': Array(0.16501139, dtype=float32)}


  5%|▌         | 53069/1000000 [2:25:53<38:23:49,  6.85it/s]

{'loss': Array(0.1621416, dtype=float32), 'loss_cross_entropy': Array(0.15173618, dtype=float32)}


  5%|▌         | 53079/1000000 [2:25:55<26:32:18,  9.91it/s]

{'loss': Array(0.1762095, dtype=float32), 'loss_cross_entropy': Array(0.16526523, dtype=float32)}


  5%|▌         | 53089/1000000 [2:25:56<24:31:18, 10.73it/s]

{'loss': Array(0.1773303, dtype=float32), 'loss_cross_entropy': Array(0.16485572, dtype=float32)}


  5%|▌         | 53099/1000000 [2:25:58<34:32:20,  7.62it/s]

{'loss': Array(0.17090623, dtype=float32), 'loss_cross_entropy': Array(0.15953302, dtype=float32)}


  5%|▌         | 53109/1000000 [2:25:59<26:30:39,  9.92it/s]

{'loss': Array(0.17017044, dtype=float32), 'loss_cross_entropy': Array(0.15932362, dtype=float32)}


  5%|▌         | 53119/1000000 [2:26:01<40:02:50,  6.57it/s]

{'loss': Array(0.16225497, dtype=float32), 'loss_cross_entropy': Array(0.15193391, dtype=float32)}


  5%|▌         | 53129/1000000 [2:26:02<27:37:42,  9.52it/s]

{'loss': Array(0.16532199, dtype=float32), 'loss_cross_entropy': Array(0.15479052, dtype=float32)}


  5%|▌         | 53139/1000000 [2:26:04<25:57:55, 10.13it/s]

{'loss': Array(0.18357383, dtype=float32), 'loss_cross_entropy': Array(0.17203371, dtype=float32)}


  5%|▌         | 53149/1000000 [2:26:06<29:20:23,  8.96it/s]

{'loss': Array(0.1746003, dtype=float32), 'loss_cross_entropy': Array(0.16343741, dtype=float32)}


  5%|▌         | 53159/1000000 [2:26:07<25:22:05, 10.37it/s]

{'loss': Array(0.15734361, dtype=float32), 'loss_cross_entropy': Array(0.14650331, dtype=float32)}


  5%|▌         | 53169/1000000 [2:26:09<28:08:56,  9.34it/s]

{'loss': Array(0.17279987, dtype=float32), 'loss_cross_entropy': Array(0.1623677, dtype=float32)}


  5%|▌         | 53179/1000000 [2:26:10<24:46:19, 10.62it/s]

{'loss': Array(0.15317082, dtype=float32), 'loss_cross_entropy': Array(0.14307933, dtype=float32)}


  5%|▌         | 53189/1000000 [2:26:12<38:48:48,  6.78it/s]

{'loss': Array(0.17634797, dtype=float32), 'loss_cross_entropy': Array(0.1653919, dtype=float32)}


  5%|▌         | 53199/1000000 [2:26:13<26:31:23,  9.92it/s]

{'loss': Array(0.16551475, dtype=float32), 'loss_cross_entropy': Array(0.15509252, dtype=float32)}


  5%|▌         | 53209/1000000 [2:26:14<25:03:52, 10.49it/s]

{'loss': Array(0.18252376, dtype=float32), 'loss_cross_entropy': Array(0.17037, dtype=float32)}


  5%|▌         | 53219/1000000 [2:26:16<31:09:55,  8.44it/s]

{'loss': Array(0.17179051, dtype=float32), 'loss_cross_entropy': Array(0.16040328, dtype=float32)}


  5%|▌         | 53229/1000000 [2:26:18<26:31:25,  9.92it/s]

{'loss': Array(0.16425273, dtype=float32), 'loss_cross_entropy': Array(0.1537361, dtype=float32)}


  5%|▌         | 53239/1000000 [2:26:19<38:44:56,  6.79it/s]

{'loss': Array(0.18055776, dtype=float32), 'loss_cross_entropy': Array(0.16845076, dtype=float32)}


  5%|▌         | 53249/1000000 [2:26:21<26:28:08,  9.94it/s]

{'loss': Array(0.16075355, dtype=float32), 'loss_cross_entropy': Array(0.15085149, dtype=float32)}


  5%|▌         | 53259/1000000 [2:26:22<24:46:54, 10.61it/s]

{'loss': Array(0.19183935, dtype=float32), 'loss_cross_entropy': Array(0.179418, dtype=float32)}


  5%|▌         | 53269/1000000 [2:26:24<29:16:16,  8.98it/s]

{'loss': Array(0.15599193, dtype=float32), 'loss_cross_entropy': Array(0.14591071, dtype=float32)}


  5%|▌         | 53279/1000000 [2:26:25<25:20:05, 10.38it/s]

{'loss': Array(0.17411283, dtype=float32), 'loss_cross_entropy': Array(0.16288923, dtype=float32)}


  5%|▌         | 53289/1000000 [2:26:27<34:43:47,  7.57it/s]

{'loss': Array(0.16498783, dtype=float32), 'loss_cross_entropy': Array(0.15464906, dtype=float32)}


  5%|▌         | 53299/1000000 [2:26:28<26:00:57, 10.11it/s]

{'loss': Array(0.16398641, dtype=float32), 'loss_cross_entropy': Array(0.15351208, dtype=float32)}


  5%|▌         | 53309/1000000 [2:26:29<24:47:46, 10.61it/s]

{'loss': Array(0.17193143, dtype=float32), 'loss_cross_entropy': Array(0.16100836, dtype=float32)}


  5%|▌         | 53319/1000000 [2:26:31<31:08:53,  8.44it/s]

{'loss': Array(0.17275447, dtype=float32), 'loss_cross_entropy': Array(0.16138314, dtype=float32)}


  5%|▌         | 53329/1000000 [2:26:32<25:28:12, 10.32it/s]

{'loss': Array(0.17985903, dtype=float32), 'loss_cross_entropy': Array(0.16793795, dtype=float32)}


  5%|▌         | 53339/1000000 [2:26:34<24:26:13, 10.76it/s]

{'loss': Array(0.1505748, dtype=float32), 'loss_cross_entropy': Array(0.14048977, dtype=float32)}


  5%|▌         | 53349/1000000 [2:26:36<28:21:59,  9.27it/s]

{'loss': Array(0.18544827, dtype=float32), 'loss_cross_entropy': Array(0.1751211, dtype=float32)}


  5%|▌         | 53359/1000000 [2:26:37<25:16:51, 10.40it/s]

{'loss': Array(0.16849379, dtype=float32), 'loss_cross_entropy': Array(0.15739349, dtype=float32)}


  5%|▌         | 53369/1000000 [2:26:39<31:20:01,  8.39it/s]

{'loss': Array(0.17196508, dtype=float32), 'loss_cross_entropy': Array(0.1607086, dtype=float32)}


  5%|▌         | 53379/1000000 [2:26:40<25:26:43, 10.33it/s]

{'loss': Array(0.16808783, dtype=float32), 'loss_cross_entropy': Array(0.15742807, dtype=float32)}


  5%|▌         | 53389/1000000 [2:26:42<37:57:03,  6.93it/s]

{'loss': Array(0.15859179, dtype=float32), 'loss_cross_entropy': Array(0.14837007, dtype=float32)}


  5%|▌         | 53399/1000000 [2:26:43<26:20:24,  9.98it/s]

{'loss': Array(0.1603036, dtype=float32), 'loss_cross_entropy': Array(0.1494893, dtype=float32)}


  5%|▌         | 53409/1000000 [2:26:45<39:24:37,  6.67it/s]

{'loss': Array(0.1578762, dtype=float32), 'loss_cross_entropy': Array(0.14795019, dtype=float32)}


  5%|▌         | 53419/1000000 [2:26:46<27:16:42,  9.64it/s]

{'loss': Array(0.1603761, dtype=float32), 'loss_cross_entropy': Array(0.15028863, dtype=float32)}


  5%|▌         | 53429/1000000 [2:26:48<25:11:23, 10.44it/s]

{'loss': Array(0.17964894, dtype=float32), 'loss_cross_entropy': Array(0.16767631, dtype=float32)}


  5%|▌         | 53439/1000000 [2:26:49<33:59:08,  7.74it/s]

{'loss': Array(0.16246603, dtype=float32), 'loss_cross_entropy': Array(0.15184794, dtype=float32)}


  5%|▌         | 53449/1000000 [2:26:51<25:48:25, 10.19it/s]

{'loss': Array(0.15945624, dtype=float32), 'loss_cross_entropy': Array(0.14976561, dtype=float32)}


  5%|▌         | 53459/1000000 [2:26:52<40:21:53,  6.51it/s]

{'loss': Array(0.17586663, dtype=float32), 'loss_cross_entropy': Array(0.16478743, dtype=float32)}


  5%|▌         | 53469/1000000 [2:26:54<26:54:01,  9.77it/s]

{'loss': Array(0.18061705, dtype=float32), 'loss_cross_entropy': Array(0.16877137, dtype=float32)}


  5%|▌         | 53479/1000000 [2:26:55<25:09:01, 10.45it/s]

{'loss': Array(0.17162055, dtype=float32), 'loss_cross_entropy': Array(0.16068384, dtype=float32)}


  5%|▌         | 53489/1000000 [2:26:57<28:00:11,  9.39it/s]

{'loss': Array(0.1618047, dtype=float32), 'loss_cross_entropy': Array(0.15139256, dtype=float32)}


  5%|▌         | 53499/1000000 [2:26:58<24:46:49, 10.61it/s]

{'loss': Array(0.13612205, dtype=float32), 'loss_cross_entropy': Array(0.12685835, dtype=float32)}


  5%|▌         | 53509/1000000 [2:27:07<90:52:05,  2.89it/s] 

{'loss': Array(0.177225, dtype=float32), 'loss_cross_entropy': Array(0.16605726, dtype=float32)}


  5%|▌         | 53519/1000000 [2:27:08<36:02:45,  7.29it/s]

{'loss': Array(0.16480507, dtype=float32), 'loss_cross_entropy': Array(0.15409283, dtype=float32)}


  5%|▌         | 53529/1000000 [2:27:10<34:21:11,  7.65it/s]

{'loss': Array(0.16754007, dtype=float32), 'loss_cross_entropy': Array(0.15707278, dtype=float32)}


  5%|▌         | 53539/1000000 [2:27:11<26:07:42, 10.06it/s]

{'loss': Array(0.17647426, dtype=float32), 'loss_cross_entropy': Array(0.16475594, dtype=float32)}


  5%|▌         | 53549/1000000 [2:27:13<39:03:31,  6.73it/s]

{'loss': Array(0.17660587, dtype=float32), 'loss_cross_entropy': Array(0.16577303, dtype=float32)}


  5%|▌         | 53559/1000000 [2:27:14<27:27:34,  9.57it/s]

{'loss': Array(0.17369835, dtype=float32), 'loss_cross_entropy': Array(0.16308837, dtype=float32)}


  5%|▌         | 53569/1000000 [2:27:16<25:26:35, 10.33it/s]

{'loss': Array(0.16128655, dtype=float32), 'loss_cross_entropy': Array(0.15002425, dtype=float32)}


  5%|▌         | 53579/1000000 [2:27:17<28:24:31,  9.25it/s]

{'loss': Array(0.17998734, dtype=float32), 'loss_cross_entropy': Array(0.16865206, dtype=float32)}


  5%|▌         | 53589/1000000 [2:27:19<25:02:11, 10.50it/s]

{'loss': Array(0.17485349, dtype=float32), 'loss_cross_entropy': Array(0.1634883, dtype=float32)}


  5%|▌         | 53599/1000000 [2:27:20<38:39:59,  6.80it/s]

{'loss': Array(0.16672917, dtype=float32), 'loss_cross_entropy': Array(0.15686888, dtype=float32)}


  5%|▌         | 53609/1000000 [2:27:22<27:10:32,  9.67it/s]

{'loss': Array(0.16218749, dtype=float32), 'loss_cross_entropy': Array(0.15201794, dtype=float32)}


  5%|▌         | 53619/1000000 [2:27:23<24:39:58, 10.66it/s]

{'loss': Array(0.17623097, dtype=float32), 'loss_cross_entropy': Array(0.16528046, dtype=float32)}


  5%|▌         | 53629/1000000 [2:27:25<29:10:13,  9.01it/s]

{'loss': Array(0.15597843, dtype=float32), 'loss_cross_entropy': Array(0.14581403, dtype=float32)}


  5%|▌         | 53639/1000000 [2:27:26<25:21:36, 10.37it/s]

{'loss': Array(0.16571108, dtype=float32), 'loss_cross_entropy': Array(0.15522781, dtype=float32)}


  5%|▌         | 53649/1000000 [2:27:28<34:10:59,  7.69it/s]

{'loss': Array(0.19611774, dtype=float32), 'loss_cross_entropy': Array(0.18407957, dtype=float32)}


  5%|▌         | 53659/1000000 [2:27:29<26:06:06, 10.07it/s]

{'loss': Array(0.18291692, dtype=float32), 'loss_cross_entropy': Array(0.17071973, dtype=float32)}


  5%|▌         | 53669/1000000 [2:27:31<38:13:20,  6.88it/s]

{'loss': Array(0.15890382, dtype=float32), 'loss_cross_entropy': Array(0.14793506, dtype=float32)}


  5%|▌         | 53679/1000000 [2:27:32<26:13:12, 10.03it/s]

{'loss': Array(0.16721348, dtype=float32), 'loss_cross_entropy': Array(0.15606605, dtype=float32)}


  5%|▌         | 53689/1000000 [2:27:34<24:52:55, 10.56it/s]

{'loss': Array(0.14528438, dtype=float32), 'loss_cross_entropy': Array(0.13605952, dtype=float32)}


  5%|▌         | 53699/1000000 [2:27:35<31:32:39,  8.33it/s]

{'loss': Array(0.16686986, dtype=float32), 'loss_cross_entropy': Array(0.15562502, dtype=float32)}


  5%|▌         | 53709/1000000 [2:27:37<25:00:49, 10.51it/s]

{'loss': Array(0.17386936, dtype=float32), 'loss_cross_entropy': Array(0.1624614, dtype=float32)}


  5%|▌         | 53719/1000000 [2:27:38<34:22:08,  7.65it/s]

{'loss': Array(0.1763447, dtype=float32), 'loss_cross_entropy': Array(0.16500263, dtype=float32)}


  5%|▌         | 53729/1000000 [2:27:40<26:29:38,  9.92it/s]

{'loss': Array(0.16701868, dtype=float32), 'loss_cross_entropy': Array(0.15621437, dtype=float32)}


  5%|▌         | 53739/1000000 [2:27:41<25:36:20, 10.27it/s]

{'loss': Array(0.17166345, dtype=float32), 'loss_cross_entropy': Array(0.160841, dtype=float32)}


  5%|▌         | 53749/1000000 [2:27:43<28:32:56,  9.21it/s]

{'loss': Array(0.16891865, dtype=float32), 'loss_cross_entropy': Array(0.1579795, dtype=float32)}


  5%|▌         | 53759/1000000 [2:27:44<25:13:50, 10.42it/s]

{'loss': Array(0.15251103, dtype=float32), 'loss_cross_entropy': Array(0.14195642, dtype=float32)}


  5%|▌         | 53769/1000000 [2:27:46<38:08:00,  6.89it/s]

{'loss': Array(0.15980437, dtype=float32), 'loss_cross_entropy': Array(0.14894979, dtype=float32)}


  5%|▌         | 53779/1000000 [2:27:47<26:44:45,  9.83it/s]

{'loss': Array(0.18838249, dtype=float32), 'loss_cross_entropy': Array(0.17630415, dtype=float32)}


  5%|▌         | 53789/1000000 [2:27:49<24:20:20, 10.80it/s]

{'loss': Array(0.16712026, dtype=float32), 'loss_cross_entropy': Array(0.15638079, dtype=float32)}


  5%|▌         | 53799/1000000 [2:27:51<31:58:16,  8.22it/s]

{'loss': Array(0.16266961, dtype=float32), 'loss_cross_entropy': Array(0.15182418, dtype=float32)}


  5%|▌         | 53809/1000000 [2:27:52<25:33:40, 10.28it/s]

{'loss': Array(0.15648475, dtype=float32), 'loss_cross_entropy': Array(0.14641403, dtype=float32)}


  5%|▌         | 53819/1000000 [2:27:54<35:03:48,  7.50it/s]

{'loss': Array(0.14787288, dtype=float32), 'loss_cross_entropy': Array(0.13808566, dtype=float32)}


  5%|▌         | 53829/1000000 [2:27:55<26:25:31,  9.95it/s]

{'loss': Array(0.1751932, dtype=float32), 'loss_cross_entropy': Array(0.16384245, dtype=float32)}


  5%|▌         | 53839/1000000 [2:27:57<34:47:17,  7.55it/s]

{'loss': Array(0.17579435, dtype=float32), 'loss_cross_entropy': Array(0.16489777, dtype=float32)}


  5%|▌         | 53849/1000000 [2:27:58<26:24:59,  9.95it/s]

{'loss': Array(0.17152789, dtype=float32), 'loss_cross_entropy': Array(0.16046236, dtype=float32)}


  5%|▌         | 53859/1000000 [2:28:00<26:09:10, 10.05it/s]

{'loss': Array(0.17801206, dtype=float32), 'loss_cross_entropy': Array(0.16716777, dtype=float32)}


  5%|▌         | 53869/1000000 [2:28:01<30:08:28,  8.72it/s]

{'loss': Array(0.1845433, dtype=float32), 'loss_cross_entropy': Array(0.17280565, dtype=float32)}


  5%|▌         | 53879/1000000 [2:28:03<26:13:36, 10.02it/s]

{'loss': Array(0.15721071, dtype=float32), 'loss_cross_entropy': Array(0.14651182, dtype=float32)}


  5%|▌         | 53889/1000000 [2:28:04<32:13:14,  8.16it/s]

{'loss': Array(0.14863525, dtype=float32), 'loss_cross_entropy': Array(0.13871497, dtype=float32)}


  5%|▌         | 53899/1000000 [2:28:06<25:41:00, 10.23it/s]

{'loss': Array(0.18681918, dtype=float32), 'loss_cross_entropy': Array(0.174989, dtype=float32)}


  5%|▌         | 53909/1000000 [2:28:08<38:40:12,  6.80it/s]

{'loss': Array(0.17391185, dtype=float32), 'loss_cross_entropy': Array(0.16289444, dtype=float32)}


  5%|▌         | 53919/1000000 [2:28:09<26:38:37,  9.86it/s]

{'loss': Array(0.15700622, dtype=float32), 'loss_cross_entropy': Array(0.14626138, dtype=float32)}


  5%|▌         | 53929/1000000 [2:28:11<39:24:02,  6.67it/s]

{'loss': Array(0.18270384, dtype=float32), 'loss_cross_entropy': Array(0.1708151, dtype=float32)}


  5%|▌         | 53939/1000000 [2:28:12<26:30:15,  9.92it/s]

{'loss': Array(0.16203465, dtype=float32), 'loss_cross_entropy': Array(0.15147133, dtype=float32)}


  5%|▌         | 53949/1000000 [2:28:13<24:59:08, 10.52it/s]

{'loss': Array(0.17745712, dtype=float32), 'loss_cross_entropy': Array(0.16650103, dtype=float32)}


  5%|▌         | 53959/1000000 [2:28:15<34:17:43,  7.66it/s]

{'loss': Array(0.16954175, dtype=float32), 'loss_cross_entropy': Array(0.15926945, dtype=float32)}


  5%|▌         | 53969/1000000 [2:28:16<26:03:10, 10.09it/s]

{'loss': Array(0.18613815, dtype=float32), 'loss_cross_entropy': Array(0.174812, dtype=float32)}


  5%|▌         | 53979/1000000 [2:28:18<38:39:50,  6.80it/s]

{'loss': Array(0.15280996, dtype=float32), 'loss_cross_entropy': Array(0.14222442, dtype=float32)}


  5%|▌         | 53989/1000000 [2:28:20<27:09:28,  9.68it/s]

{'loss': Array(0.18381988, dtype=float32), 'loss_cross_entropy': Array(0.17280917, dtype=float32)}


  5%|▌         | 53999/1000000 [2:28:21<25:02:40, 10.49it/s]

{'loss': Array(0.1830995, dtype=float32), 'loss_cross_entropy': Array(0.17133944, dtype=float32)}


  5%|▌         | 54009/1000000 [2:28:29<90:57:20,  2.89it/s] 

{'loss': Array(0.15313813, dtype=float32), 'loss_cross_entropy': Array(0.14279772, dtype=float32)}


  5%|▌         | 54019/1000000 [2:28:31<35:37:44,  7.38it/s]

{'loss': Array(0.16909042, dtype=float32), 'loss_cross_entropy': Array(0.15888742, dtype=float32)}


  5%|▌         | 54029/1000000 [2:28:32<33:28:32,  7.85it/s]

{'loss': Array(0.1909148, dtype=float32), 'loss_cross_entropy': Array(0.17941052, dtype=float32)}


  5%|▌         | 54039/1000000 [2:28:34<26:11:54, 10.03it/s]

{'loss': Array(0.17160243, dtype=float32), 'loss_cross_entropy': Array(0.16057675, dtype=float32)}


  5%|▌         | 54049/1000000 [2:28:36<39:23:04,  6.67it/s]

{'loss': Array(0.16987465, dtype=float32), 'loss_cross_entropy': Array(0.15921533, dtype=float32)}


  5%|▌         | 54059/1000000 [2:28:37<26:58:51,  9.74it/s]

{'loss': Array(0.16023682, dtype=float32), 'loss_cross_entropy': Array(0.14983194, dtype=float32)}


  5%|▌         | 54069/1000000 [2:28:38<25:06:17, 10.47it/s]

{'loss': Array(0.15992911, dtype=float32), 'loss_cross_entropy': Array(0.14874864, dtype=float32)}


  5%|▌         | 54079/1000000 [2:28:40<27:54:23,  9.42it/s]

{'loss': Array(0.16939823, dtype=float32), 'loss_cross_entropy': Array(0.1589344, dtype=float32)}


  5%|▌         | 54089/1000000 [2:28:41<25:30:59, 10.30it/s]

{'loss': Array(0.17960978, dtype=float32), 'loss_cross_entropy': Array(0.16844703, dtype=float32)}


  5%|▌         | 54099/1000000 [2:28:43<39:09:57,  6.71it/s]

{'loss': Array(0.16292046, dtype=float32), 'loss_cross_entropy': Array(0.1521734, dtype=float32)}


  5%|▌         | 54109/1000000 [2:28:45<27:12:50,  9.65it/s]

{'loss': Array(0.18050002, dtype=float32), 'loss_cross_entropy': Array(0.16861765, dtype=float32)}


  5%|▌         | 54119/1000000 [2:28:46<24:51:37, 10.57it/s]

{'loss': Array(0.15239592, dtype=float32), 'loss_cross_entropy': Array(0.14288878, dtype=float32)}


  5%|▌         | 54129/1000000 [2:28:48<28:47:58,  9.12it/s]

{'loss': Array(0.17655912, dtype=float32), 'loss_cross_entropy': Array(0.16507165, dtype=float32)}


  5%|▌         | 54139/1000000 [2:28:49<24:58:51, 10.52it/s]

{'loss': Array(0.16140074, dtype=float32), 'loss_cross_entropy': Array(0.15126516, dtype=float32)}


  5%|▌         | 54149/1000000 [2:28:51<34:41:00,  7.58it/s]

{'loss': Array(0.1636263, dtype=float32), 'loss_cross_entropy': Array(0.15320298, dtype=float32)}


  5%|▌         | 54159/1000000 [2:28:52<25:42:03, 10.22it/s]

{'loss': Array(0.16747527, dtype=float32), 'loss_cross_entropy': Array(0.15698199, dtype=float32)}


  5%|▌         | 54169/1000000 [2:28:54<38:32:50,  6.82it/s]

{'loss': Array(0.18627524, dtype=float32), 'loss_cross_entropy': Array(0.17438112, dtype=float32)}


  5%|▌         | 54179/1000000 [2:28:55<26:30:26,  9.91it/s]

{'loss': Array(0.15775411, dtype=float32), 'loss_cross_entropy': Array(0.14769796, dtype=float32)}


  5%|▌         | 54189/1000000 [2:28:56<25:36:53, 10.26it/s]

{'loss': Array(0.18308814, dtype=float32), 'loss_cross_entropy': Array(0.17202616, dtype=float32)}


  5%|▌         | 54199/1000000 [2:28:58<31:07:59,  8.44it/s]

{'loss': Array(0.15837105, dtype=float32), 'loss_cross_entropy': Array(0.1474015, dtype=float32)}


  5%|▌         | 54209/1000000 [2:29:00<26:28:14,  9.92it/s]

{'loss': Array(0.1712936, dtype=float32), 'loss_cross_entropy': Array(0.16041896, dtype=float32)}


  5%|▌         | 54219/1000000 [2:29:01<36:43:48,  7.15it/s]

{'loss': Array(0.16641344, dtype=float32), 'loss_cross_entropy': Array(0.15603147, dtype=float32)}


  5%|▌         | 54229/1000000 [2:29:03<26:31:37,  9.90it/s]

{'loss': Array(0.1498831, dtype=float32), 'loss_cross_entropy': Array(0.1391874, dtype=float32)}


  5%|▌         | 54239/1000000 [2:29:04<25:32:13, 10.29it/s]

{'loss': Array(0.14935943, dtype=float32), 'loss_cross_entropy': Array(0.13923855, dtype=float32)}


  5%|▌         | 54249/1000000 [2:29:06<29:19:13,  8.96it/s]

{'loss': Array(0.16137646, dtype=float32), 'loss_cross_entropy': Array(0.15040809, dtype=float32)}


  5%|▌         | 54259/1000000 [2:29:07<25:22:36, 10.35it/s]

{'loss': Array(0.16208716, dtype=float32), 'loss_cross_entropy': Array(0.15213345, dtype=float32)}


  5%|▌         | 54269/1000000 [2:29:09<38:22:58,  6.84it/s]

{'loss': Array(0.17253642, dtype=float32), 'loss_cross_entropy': Array(0.16150449, dtype=float32)}


  5%|▌         | 54279/1000000 [2:29:10<26:24:14,  9.95it/s]

{'loss': Array(0.17634329, dtype=float32), 'loss_cross_entropy': Array(0.16532902, dtype=float32)}


  5%|▌         | 54289/1000000 [2:29:12<24:30:04, 10.72it/s]

{'loss': Array(0.16140923, dtype=float32), 'loss_cross_entropy': Array(0.15099669, dtype=float32)}


  5%|▌         | 54299/1000000 [2:29:13<31:28:21,  8.35it/s]

{'loss': Array(0.17562886, dtype=float32), 'loss_cross_entropy': Array(0.16434418, dtype=float32)}


  5%|▌         | 54309/1000000 [2:29:15<25:59:56, 10.10it/s]

{'loss': Array(0.17196392, dtype=float32), 'loss_cross_entropy': Array(0.16116045, dtype=float32)}


  5%|▌         | 54319/1000000 [2:29:16<34:43:30,  7.56it/s]

{'loss': Array(0.17052974, dtype=float32), 'loss_cross_entropy': Array(0.15985453, dtype=float32)}


  5%|▌         | 54329/1000000 [2:29:18<26:25:41,  9.94it/s]

{'loss': Array(0.15213543, dtype=float32), 'loss_cross_entropy': Array(0.1418868, dtype=float32)}


  5%|▌         | 54339/1000000 [2:29:20<34:36:38,  7.59it/s]

{'loss': Array(0.17786606, dtype=float32), 'loss_cross_entropy': Array(0.16593245, dtype=float32)}


  5%|▌         | 54349/1000000 [2:29:21<25:36:24, 10.26it/s]

{'loss': Array(0.1761501, dtype=float32), 'loss_cross_entropy': Array(0.16457407, dtype=float32)}


  5%|▌         | 54359/1000000 [2:29:22<24:39:27, 10.65it/s]

{'loss': Array(0.16955958, dtype=float32), 'loss_cross_entropy': Array(0.1592554, dtype=float32)}


  5%|▌         | 54369/1000000 [2:29:24<28:56:30,  9.08it/s]

{'loss': Array(0.14937608, dtype=float32), 'loss_cross_entropy': Array(0.13913333, dtype=float32)}


  5%|▌         | 54379/1000000 [2:29:25<24:33:27, 10.70it/s]

{'loss': Array(0.16921483, dtype=float32), 'loss_cross_entropy': Array(0.15815668, dtype=float32)}


  5%|▌         | 54389/1000000 [2:29:27<31:13:03,  8.41it/s]

{'loss': Array(0.15854017, dtype=float32), 'loss_cross_entropy': Array(0.14781566, dtype=float32)}


  5%|▌         | 54399/1000000 [2:29:28<25:38:33, 10.24it/s]

{'loss': Array(0.16427532, dtype=float32), 'loss_cross_entropy': Array(0.15392783, dtype=float32)}


  5%|▌         | 54409/1000000 [2:29:30<39:06:14,  6.72it/s]

{'loss': Array(0.16162105, dtype=float32), 'loss_cross_entropy': Array(0.1508447, dtype=float32)}


  5%|▌         | 54419/1000000 [2:29:31<27:18:33,  9.62it/s]

{'loss': Array(0.1755951, dtype=float32), 'loss_cross_entropy': Array(0.16455059, dtype=float32)}


  5%|▌         | 54429/1000000 [2:29:33<39:23:22,  6.67it/s]

{'loss': Array(0.15957847, dtype=float32), 'loss_cross_entropy': Array(0.1492731, dtype=float32)}


  5%|▌         | 54439/1000000 [2:29:35<27:19:04,  9.61it/s]

{'loss': Array(0.16009459, dtype=float32), 'loss_cross_entropy': Array(0.14883263, dtype=float32)}


  5%|▌         | 54449/1000000 [2:29:36<24:49:47, 10.58it/s]

{'loss': Array(0.16572075, dtype=float32), 'loss_cross_entropy': Array(0.15538754, dtype=float32)}


  5%|▌         | 54459/1000000 [2:29:38<34:09:10,  7.69it/s]

{'loss': Array(0.15338245, dtype=float32), 'loss_cross_entropy': Array(0.14329599, dtype=float32)}


  5%|▌         | 54469/1000000 [2:29:39<26:00:25, 10.10it/s]

{'loss': Array(0.15221132, dtype=float32), 'loss_cross_entropy': Array(0.14155923, dtype=float32)}


  5%|▌         | 54479/1000000 [2:29:41<38:46:08,  6.77it/s]

{'loss': Array(0.18164098, dtype=float32), 'loss_cross_entropy': Array(0.16984208, dtype=float32)}


  5%|▌         | 54489/1000000 [2:29:42<26:21:44,  9.96it/s]

{'loss': Array(0.1534336, dtype=float32), 'loss_cross_entropy': Array(0.14331095, dtype=float32)}


  5%|▌         | 54499/1000000 [2:29:43<24:53:04, 10.55it/s]

{'loss': Array(0.18139923, dtype=float32), 'loss_cross_entropy': Array(0.17006418, dtype=float32)}


  5%|▌         | 54509/1000000 [2:29:52<89:39:30,  2.93it/s] 

{'loss': Array(0.18257895, dtype=float32), 'loss_cross_entropy': Array(0.17120302, dtype=float32)}


  5%|▌         | 54519/1000000 [2:29:53<35:16:50,  7.44it/s]

{'loss': Array(0.15973866, dtype=float32), 'loss_cross_entropy': Array(0.14874098, dtype=float32)}


  5%|▌         | 54529/1000000 [2:29:55<29:35:18,  8.88it/s]

{'loss': Array(0.16215274, dtype=float32), 'loss_cross_entropy': Array(0.15071236, dtype=float32)}


  5%|▌         | 54539/1000000 [2:29:56<25:15:58, 10.39it/s]

{'loss': Array(0.15844844, dtype=float32), 'loss_cross_entropy': Array(0.14731374, dtype=float32)}


  5%|▌         | 54549/1000000 [2:29:58<25:32:43, 10.28it/s]

{'loss': Array(0.16944078, dtype=float32), 'loss_cross_entropy': Array(0.15841804, dtype=float32)}


  5%|▌         | 54559/1000000 [2:29:59<28:29:22,  9.22it/s]

{'loss': Array(0.16885722, dtype=float32), 'loss_cross_entropy': Array(0.1583573, dtype=float32)}


  5%|▌         | 54569/1000000 [2:30:01<25:16:42, 10.39it/s]

{'loss': Array(0.17099668, dtype=float32), 'loss_cross_entropy': Array(0.16060339, dtype=float32)}


  5%|▌         | 54579/1000000 [2:30:02<32:01:45,  8.20it/s]

{'loss': Array(0.13707231, dtype=float32), 'loss_cross_entropy': Array(0.12718438, dtype=float32)}


  5%|▌         | 54589/1000000 [2:30:04<27:50:20,  9.43it/s]

{'loss': Array(0.15940271, dtype=float32), 'loss_cross_entropy': Array(0.14932041, dtype=float32)}


  5%|▌         | 54599/1000000 [2:30:06<40:14:58,  6.52it/s]

{'loss': Array(0.1518539, dtype=float32), 'loss_cross_entropy': Array(0.14182493, dtype=float32)}


  5%|▌         | 54609/1000000 [2:30:07<27:24:04,  9.58it/s]

{'loss': Array(0.1722928, dtype=float32), 'loss_cross_entropy': Array(0.16137663, dtype=float32)}


  5%|▌         | 54619/1000000 [2:30:08<24:56:56, 10.53it/s]

{'loss': Array(0.16978413, dtype=float32), 'loss_cross_entropy': Array(0.15845609, dtype=float32)}


  5%|▌         | 54629/1000000 [2:30:10<27:26:07,  9.57it/s]

{'loss': Array(0.15880919, dtype=float32), 'loss_cross_entropy': Array(0.14846824, dtype=float32)}


  5%|▌         | 54639/1000000 [2:30:11<25:11:47, 10.42it/s]

{'loss': Array(0.16856961, dtype=float32), 'loss_cross_entropy': Array(0.15802531, dtype=float32)}


  5%|▌         | 54649/1000000 [2:30:13<31:30:34,  8.33it/s]

{'loss': Array(0.14397644, dtype=float32), 'loss_cross_entropy': Array(0.13454218, dtype=float32)}


  5%|▌         | 54659/1000000 [2:30:15<25:25:51, 10.33it/s]

{'loss': Array(0.1568029, dtype=float32), 'loss_cross_entropy': Array(0.1467386, dtype=float32)}


  5%|▌         | 54669/1000000 [2:30:16<24:17:40, 10.81it/s]

{'loss': Array(0.1697465, dtype=float32), 'loss_cross_entropy': Array(0.15905158, dtype=float32)}


  5%|▌         | 54679/1000000 [2:30:18<29:06:46,  9.02it/s]

{'loss': Array(0.16343844, dtype=float32), 'loss_cross_entropy': Array(0.15236202, dtype=float32)}


  5%|▌         | 54689/1000000 [2:30:19<25:40:50, 10.23it/s]

{'loss': Array(0.17748706, dtype=float32), 'loss_cross_entropy': Array(0.16565025, dtype=float32)}


  5%|▌         | 54699/1000000 [2:30:21<39:11:31,  6.70it/s]

{'loss': Array(0.16794376, dtype=float32), 'loss_cross_entropy': Array(0.15732332, dtype=float32)}


  5%|▌         | 54709/1000000 [2:30:22<26:45:43,  9.81it/s]

{'loss': Array(0.13415255, dtype=float32), 'loss_cross_entropy': Array(0.125152, dtype=float32)}


  5%|▌         | 54719/1000000 [2:30:23<24:46:05, 10.60it/s]

{'loss': Array(0.16737567, dtype=float32), 'loss_cross_entropy': Array(0.15645014, dtype=float32)}


  5%|▌         | 54729/1000000 [2:30:25<30:54:20,  8.50it/s]

{'loss': Array(0.1666938, dtype=float32), 'loss_cross_entropy': Array(0.15627988, dtype=float32)}


  5%|▌         | 54739/1000000 [2:30:26<25:17:51, 10.38it/s]

{'loss': Array(0.16836257, dtype=float32), 'loss_cross_entropy': Array(0.1575062, dtype=float32)}


  5%|▌         | 54749/1000000 [2:30:28<34:27:44,  7.62it/s]

{'loss': Array(0.15679167, dtype=float32), 'loss_cross_entropy': Array(0.14613664, dtype=float32)}


  5%|▌         | 54759/1000000 [2:30:30<25:32:53, 10.28it/s]

{'loss': Array(0.15408295, dtype=float32), 'loss_cross_entropy': Array(0.14402498, dtype=float32)}


  5%|▌         | 54769/1000000 [2:30:31<34:25:35,  7.63it/s]

{'loss': Array(0.15857343, dtype=float32), 'loss_cross_entropy': Array(0.1484271, dtype=float32)}


  5%|▌         | 54779/1000000 [2:30:33<25:36:56, 10.25it/s]

{'loss': Array(0.14922376, dtype=float32), 'loss_cross_entropy': Array(0.1387766, dtype=float32)}


  5%|▌         | 54789/1000000 [2:30:34<24:36:37, 10.67it/s]

{'loss': Array(0.18161671, dtype=float32), 'loss_cross_entropy': Array(0.16997571, dtype=float32)}


  5%|▌         | 54799/1000000 [2:30:36<29:18:32,  8.96it/s]

{'loss': Array(0.16908012, dtype=float32), 'loss_cross_entropy': Array(0.15797083, dtype=float32)}


  5%|▌         | 54809/1000000 [2:30:37<24:41:20, 10.63it/s]

{'loss': Array(0.16076489, dtype=float32), 'loss_cross_entropy': Array(0.1501565, dtype=float32)}


  5%|▌         | 54819/1000000 [2:30:39<31:05:54,  8.44it/s]

{'loss': Array(0.15725486, dtype=float32), 'loss_cross_entropy': Array(0.14639202, dtype=float32)}


  5%|▌         | 54829/1000000 [2:30:40<25:37:32, 10.25it/s]

{'loss': Array(0.16727258, dtype=float32), 'loss_cross_entropy': Array(0.15557167, dtype=float32)}


  5%|▌         | 54839/1000000 [2:30:42<38:27:29,  6.83it/s]

{'loss': Array(0.16092345, dtype=float32), 'loss_cross_entropy': Array(0.15010339, dtype=float32)}


  5%|▌         | 54849/1000000 [2:30:43<26:36:04,  9.87it/s]

{'loss': Array(0.16592486, dtype=float32), 'loss_cross_entropy': Array(0.15528949, dtype=float32)}


  5%|▌         | 54859/1000000 [2:30:45<39:08:55,  6.71it/s]

{'loss': Array(0.1534887, dtype=float32), 'loss_cross_entropy': Array(0.14350018, dtype=float32)}


  5%|▌         | 54869/1000000 [2:30:46<27:06:09,  9.69it/s]

{'loss': Array(0.14813253, dtype=float32), 'loss_cross_entropy': Array(0.13899569, dtype=float32)}


  5%|▌         | 54879/1000000 [2:30:48<24:28:46, 10.72it/s]

{'loss': Array(0.16087495, dtype=float32), 'loss_cross_entropy': Array(0.15079024, dtype=float32)}


  5%|▌         | 54889/1000000 [2:30:49<34:10:34,  7.68it/s]

{'loss': Array(0.15776516, dtype=float32), 'loss_cross_entropy': Array(0.14745316, dtype=float32)}


  5%|▌         | 54899/1000000 [2:30:51<25:49:54, 10.16it/s]

{'loss': Array(0.15837102, dtype=float32), 'loss_cross_entropy': Array(0.14747792, dtype=float32)}


  5%|▌         | 54909/1000000 [2:30:52<39:08:29,  6.71it/s]

{'loss': Array(0.16732743, dtype=float32), 'loss_cross_entropy': Array(0.15671088, dtype=float32)}


  5%|▌         | 54919/1000000 [2:30:54<27:04:04,  9.70it/s]

{'loss': Array(0.17662406, dtype=float32), 'loss_cross_entropy': Array(0.16522942, dtype=float32)}


  5%|▌         | 54929/1000000 [2:30:55<24:55:50, 10.53it/s]

{'loss': Array(0.17098491, dtype=float32), 'loss_cross_entropy': Array(0.15957992, dtype=float32)}


  5%|▌         | 54939/1000000 [2:30:57<27:56:09,  9.40it/s]

{'loss': Array(0.170052, dtype=float32), 'loss_cross_entropy': Array(0.15872951, dtype=float32)}


  5%|▌         | 54949/1000000 [2:30:58<25:01:38, 10.49it/s]

{'loss': Array(0.17858899, dtype=float32), 'loss_cross_entropy': Array(0.16762644, dtype=float32)}


  5%|▌         | 54959/1000000 [2:31:00<28:52:22,  9.09it/s]

{'loss': Array(0.1562514, dtype=float32), 'loss_cross_entropy': Array(0.14576447, dtype=float32)}


  5%|▌         | 54969/1000000 [2:31:01<25:18:10, 10.37it/s]

{'loss': Array(0.15511923, dtype=float32), 'loss_cross_entropy': Array(0.14471878, dtype=float32)}


  5%|▌         | 54979/1000000 [2:31:03<38:59:40,  6.73it/s]

{'loss': Array(0.15747452, dtype=float32), 'loss_cross_entropy': Array(0.1468911, dtype=float32)}


  5%|▌         | 54989/1000000 [2:31:04<26:50:58,  9.78it/s]

{'loss': Array(0.16637866, dtype=float32), 'loss_cross_entropy': Array(0.1557989, dtype=float32)}


  5%|▌         | 54999/1000000 [2:31:06<25:39:32, 10.23it/s]

{'loss': Array(0.16694042, dtype=float32), 'loss_cross_entropy': Array(0.15636885, dtype=float32)}


  6%|▌         | 55009/1000000 [2:31:14<93:51:22,  2.80it/s] 

{'loss': Array(0.17432828, dtype=float32), 'loss_cross_entropy': Array(0.16289388, dtype=float32)}


  6%|▌         | 55019/1000000 [2:31:15<36:41:49,  7.15it/s] 

{'loss': Array(0.16878878, dtype=float32), 'loss_cross_entropy': Array(0.15762381, dtype=float32)}


  6%|▌         | 55029/1000000 [2:31:17<40:45:25,  6.44it/s]

{'loss': Array(0.1684633, dtype=float32), 'loss_cross_entropy': Array(0.15716018, dtype=float32)}


  6%|▌         | 55039/1000000 [2:31:19<27:19:03,  9.61it/s]

{'loss': Array(0.15877607, dtype=float32), 'loss_cross_entropy': Array(0.1488552, dtype=float32)}


  6%|▌         | 55049/1000000 [2:31:20<24:49:48, 10.57it/s]

{'loss': Array(0.16974758, dtype=float32), 'loss_cross_entropy': Array(0.15879345, dtype=float32)}


  6%|▌         | 55059/1000000 [2:31:22<29:18:45,  8.95it/s]

{'loss': Array(0.15871559, dtype=float32), 'loss_cross_entropy': Array(0.14850955, dtype=float32)}


  6%|▌         | 55069/1000000 [2:31:23<25:22:27, 10.34it/s]

{'loss': Array(0.15362279, dtype=float32), 'loss_cross_entropy': Array(0.1435956, dtype=float32)}


  6%|▌         | 55079/1000000 [2:31:25<33:55:25,  7.74it/s]

{'loss': Array(0.17211096, dtype=float32), 'loss_cross_entropy': Array(0.16137351, dtype=float32)}


  6%|▌         | 55089/1000000 [2:31:26<25:27:12, 10.31it/s]

{'loss': Array(0.14734277, dtype=float32), 'loss_cross_entropy': Array(0.13768043, dtype=float32)}


  6%|▌         | 55099/1000000 [2:31:27<24:15:39, 10.82it/s]

{'loss': Array(0.14923932, dtype=float32), 'loss_cross_entropy': Array(0.13982305, dtype=float32)}


  6%|▌         | 55109/1000000 [2:31:29<31:21:07,  8.37it/s]

{'loss': Array(0.17932037, dtype=float32), 'loss_cross_entropy': Array(0.16823384, dtype=float32)}


  6%|▌         | 55119/1000000 [2:31:30<25:35:34, 10.26it/s]

{'loss': Array(0.15372749, dtype=float32), 'loss_cross_entropy': Array(0.14330132, dtype=float32)}


  6%|▌         | 55129/1000000 [2:31:32<25:06:43, 10.45it/s]

{'loss': Array(0.16627625, dtype=float32), 'loss_cross_entropy': Array(0.15592383, dtype=float32)}


  6%|▌         | 55139/1000000 [2:31:34<27:53:37,  9.41it/s]

{'loss': Array(0.1649745, dtype=float32), 'loss_cross_entropy': Array(0.15452278, dtype=float32)}


  6%|▌         | 55149/1000000 [2:31:35<25:10:28, 10.43it/s]

{'loss': Array(0.17204773, dtype=float32), 'loss_cross_entropy': Array(0.1616379, dtype=float32)}


  6%|▌         | 55159/1000000 [2:31:37<31:24:52,  8.35it/s]

{'loss': Array(0.1710671, dtype=float32), 'loss_cross_entropy': Array(0.15987535, dtype=float32)}


  6%|▌         | 55169/1000000 [2:31:38<25:40:35, 10.22it/s]

{'loss': Array(0.21066825, dtype=float32), 'loss_cross_entropy': Array(0.19801159, dtype=float32)}


  6%|▌         | 55179/1000000 [2:31:40<38:56:55,  6.74it/s]

{'loss': Array(0.16542557, dtype=float32), 'loss_cross_entropy': Array(0.15483885, dtype=float32)}


  6%|▌         | 55189/1000000 [2:31:41<26:34:27,  9.88it/s]

{'loss': Array(0.16159698, dtype=float32), 'loss_cross_entropy': Array(0.15083703, dtype=float32)}


  6%|▌         | 55199/1000000 [2:31:43<38:47:00,  6.77it/s]

{'loss': Array(0.16760616, dtype=float32), 'loss_cross_entropy': Array(0.15720691, dtype=float32)}


  6%|▌         | 55209/1000000 [2:31:44<26:18:23,  9.98it/s]

{'loss': Array(0.18654029, dtype=float32), 'loss_cross_entropy': Array(0.17440514, dtype=float32)}


  6%|▌         | 55219/1000000 [2:31:45<24:24:30, 10.75it/s]

{'loss': Array(0.17372362, dtype=float32), 'loss_cross_entropy': Array(0.16254498, dtype=float32)}


  6%|▌         | 55229/1000000 [2:31:47<33:38:12,  7.80it/s]

{'loss': Array(0.16099437, dtype=float32), 'loss_cross_entropy': Array(0.15030958, dtype=float32)}


  6%|▌         | 55239/1000000 [2:31:49<26:19:06,  9.97it/s]

{'loss': Array(0.14636283, dtype=float32), 'loss_cross_entropy': Array(0.13684414, dtype=float32)}


  6%|▌         | 55249/1000000 [2:31:50<38:21:07,  6.84it/s]

{'loss': Array(0.18655807, dtype=float32), 'loss_cross_entropy': Array(0.17531495, dtype=float32)}


  6%|▌         | 55259/1000000 [2:31:52<26:55:55,  9.74it/s]

{'loss': Array(0.1690505, dtype=float32), 'loss_cross_entropy': Array(0.15844536, dtype=float32)}


  6%|▌         | 55269/1000000 [2:31:53<24:28:44, 10.72it/s]

{'loss': Array(0.16336875, dtype=float32), 'loss_cross_entropy': Array(0.15276866, dtype=float32)}


  6%|▌         | 55279/1000000 [2:31:55<27:59:06,  9.38it/s]

{'loss': Array(0.17235878, dtype=float32), 'loss_cross_entropy': Array(0.16080499, dtype=float32)}


  6%|▌         | 55289/1000000 [2:31:56<25:15:04, 10.39it/s]

{'loss': Array(0.16074, dtype=float32), 'loss_cross_entropy': Array(0.15052252, dtype=float32)}


  6%|▌         | 55299/1000000 [2:31:58<28:11:20,  9.31it/s]

{'loss': Array(0.15468888, dtype=float32), 'loss_cross_entropy': Array(0.1443789, dtype=float32)}


  6%|▌         | 55309/1000000 [2:31:59<25:05:13, 10.46it/s]

{'loss': Array(0.16294806, dtype=float32), 'loss_cross_entropy': Array(0.15276188, dtype=float32)}


  6%|▌         | 55319/1000000 [2:32:01<41:10:33,  6.37it/s]

{'loss': Array(0.15502399, dtype=float32), 'loss_cross_entropy': Array(0.14498101, dtype=float32)}


  6%|▌         | 55329/1000000 [2:32:02<28:53:29,  9.08it/s]

{'loss': Array(0.17346324, dtype=float32), 'loss_cross_entropy': Array(0.162265, dtype=float32)}


  6%|▌         | 55339/1000000 [2:32:04<26:08:04, 10.04it/s]

{'loss': Array(0.18705538, dtype=float32), 'loss_cross_entropy': Array(0.17561194, dtype=float32)}


  6%|▌         | 55349/1000000 [2:32:06<32:18:57,  8.12it/s]

{'loss': Array(0.19101518, dtype=float32), 'loss_cross_entropy': Array(0.17910348, dtype=float32)}


  6%|▌         | 55359/1000000 [2:32:07<25:56:36, 10.11it/s]

{'loss': Array(0.1512327, dtype=float32), 'loss_cross_entropy': Array(0.14104675, dtype=float32)}


  6%|▌         | 55369/1000000 [2:32:09<39:13:02,  6.69it/s]

{'loss': Array(0.15250821, dtype=float32), 'loss_cross_entropy': Array(0.14257501, dtype=float32)}


  6%|▌         | 55379/1000000 [2:32:10<26:20:36,  9.96it/s]

{'loss': Array(0.16431402, dtype=float32), 'loss_cross_entropy': Array(0.15337811, dtype=float32)}


  6%|▌         | 55389/1000000 [2:32:12<25:07:18, 10.44it/s]

{'loss': Array(0.15889029, dtype=float32), 'loss_cross_entropy': Array(0.14912762, dtype=float32)}


  6%|▌         | 55399/1000000 [2:32:13<29:26:28,  8.91it/s]

{'loss': Array(0.18632151, dtype=float32), 'loss_cross_entropy': Array(0.1745053, dtype=float32)}


  6%|▌         | 55409/1000000 [2:32:15<24:55:31, 10.53it/s]

{'loss': Array(0.16080391, dtype=float32), 'loss_cross_entropy': Array(0.15056923, dtype=float32)}


  6%|▌         | 55419/1000000 [2:32:16<34:41:32,  7.56it/s]

{'loss': Array(0.15544023, dtype=float32), 'loss_cross_entropy': Array(0.14467755, dtype=float32)}


  6%|▌         | 55429/1000000 [2:32:18<26:02:17, 10.08it/s]

{'loss': Array(0.16183516, dtype=float32), 'loss_cross_entropy': Array(0.1511592, dtype=float32)}


  6%|▌         | 55439/1000000 [2:32:19<24:25:39, 10.74it/s]

{'loss': Array(0.15990996, dtype=float32), 'loss_cross_entropy': Array(0.14905976, dtype=float32)}


  6%|▌         | 55449/1000000 [2:32:21<30:58:22,  8.47it/s]

{'loss': Array(0.16360871, dtype=float32), 'loss_cross_entropy': Array(0.15338713, dtype=float32)}


  6%|▌         | 55459/1000000 [2:32:22<25:32:48, 10.27it/s]

{'loss': Array(0.17219768, dtype=float32), 'loss_cross_entropy': Array(0.1615324, dtype=float32)}


  6%|▌         | 55469/1000000 [2:32:23<25:28:44, 10.30it/s]

{'loss': Array(0.17822084, dtype=float32), 'loss_cross_entropy': Array(0.16647191, dtype=float32)}


  6%|▌         | 55479/1000000 [2:32:25<28:27:36,  9.22it/s]

{'loss': Array(0.15349515, dtype=float32), 'loss_cross_entropy': Array(0.14316498, dtype=float32)}


  6%|▌         | 55489/1000000 [2:32:27<25:07:52, 10.44it/s]

{'loss': Array(0.17476249, dtype=float32), 'loss_cross_entropy': Array(0.16354994, dtype=float32)}


  6%|▌         | 55499/1000000 [2:32:28<31:27:10,  8.34it/s]

{'loss': Array(0.15132211, dtype=float32), 'loss_cross_entropy': Array(0.14131324, dtype=float32)}


  6%|▌         | 55509/1000000 [2:32:36<88:12:00,  2.97it/s] 

{'loss': Array(0.1718468, dtype=float32), 'loss_cross_entropy': Array(0.16177614, dtype=float32)}


  6%|▌         | 55519/1000000 [2:32:38<35:08:00,  7.47it/s]

{'loss': Array(0.14181513, dtype=float32), 'loss_cross_entropy': Array(0.13199864, dtype=float32)}


  6%|▌         | 55529/1000000 [2:32:39<29:05:49,  9.02it/s]

{'loss': Array(0.17128561, dtype=float32), 'loss_cross_entropy': Array(0.16036601, dtype=float32)}


  6%|▌         | 55539/1000000 [2:32:41<25:02:04, 10.48it/s]

{'loss': Array(0.18330805, dtype=float32), 'loss_cross_entropy': Array(0.17134355, dtype=float32)}


  6%|▌         | 55549/1000000 [2:32:42<29:28:20,  8.90it/s]

{'loss': Array(0.18369082, dtype=float32), 'loss_cross_entropy': Array(0.17289756, dtype=float32)}


  6%|▌         | 55559/1000000 [2:32:44<24:52:14, 10.55it/s]

{'loss': Array(0.17814289, dtype=float32), 'loss_cross_entropy': Array(0.1670873, dtype=float32)}


  6%|▌         | 55569/1000000 [2:32:45<34:31:26,  7.60it/s]

{'loss': Array(0.18472712, dtype=float32), 'loss_cross_entropy': Array(0.17335963, dtype=float32)}


  6%|▌         | 55579/1000000 [2:32:47<26:07:13, 10.04it/s]

{'loss': Array(0.1927491, dtype=float32), 'loss_cross_entropy': Array(0.18062733, dtype=float32)}


  6%|▌         | 55589/1000000 [2:32:49<34:52:24,  7.52it/s]

{'loss': Array(0.16831377, dtype=float32), 'loss_cross_entropy': Array(0.15785955, dtype=float32)}


  6%|▌         | 55599/1000000 [2:32:50<26:07:27, 10.04it/s]

{'loss': Array(0.16690283, dtype=float32), 'loss_cross_entropy': Array(0.15624698, dtype=float32)}


  6%|▌         | 55609/1000000 [2:32:51<24:24:08, 10.75it/s]

{'loss': Array(0.17400496, dtype=float32), 'loss_cross_entropy': Array(0.16292231, dtype=float32)}


  6%|▌         | 55619/1000000 [2:32:53<30:42:48,  8.54it/s]

{'loss': Array(0.16886291, dtype=float32), 'loss_cross_entropy': Array(0.15734139, dtype=float32)}


  6%|▌         | 55629/1000000 [2:32:54<25:04:48, 10.46it/s]

{'loss': Array(0.15941116, dtype=float32), 'loss_cross_entropy': Array(0.14878726, dtype=float32)}


  6%|▌         | 55639/1000000 [2:32:56<24:52:08, 10.55it/s]

{'loss': Array(0.17645109, dtype=float32), 'loss_cross_entropy': Array(0.16569589, dtype=float32)}


  6%|▌         | 55649/1000000 [2:32:57<28:00:39,  9.36it/s]

{'loss': Array(0.17474364, dtype=float32), 'loss_cross_entropy': Array(0.16367601, dtype=float32)}


  6%|▌         | 55659/1000000 [2:32:59<25:42:12, 10.21it/s]

{'loss': Array(0.16604437, dtype=float32), 'loss_cross_entropy': Array(0.15567538, dtype=float32)}


  6%|▌         | 55669/1000000 [2:33:01<32:59:12,  7.95it/s]

{'loss': Array(0.15276496, dtype=float32), 'loss_cross_entropy': Array(0.14290825, dtype=float32)}


  6%|▌         | 55679/1000000 [2:33:02<26:38:31,  9.85it/s]

{'loss': Array(0.16859941, dtype=float32), 'loss_cross_entropy': Array(0.1583282, dtype=float32)}


  6%|▌         | 55689/1000000 [2:33:04<36:18:55,  7.22it/s]

{'loss': Array(0.18411647, dtype=float32), 'loss_cross_entropy': Array(0.17322801, dtype=float32)}


  6%|▌         | 55699/1000000 [2:33:05<27:20:51,  9.59it/s]

{'loss': Array(0.16453223, dtype=float32), 'loss_cross_entropy': Array(0.15414928, dtype=float32)}


  6%|▌         | 55709/1000000 [2:33:07<24:58:52, 10.50it/s]

{'loss': Array(0.17338978, dtype=float32), 'loss_cross_entropy': Array(0.16248457, dtype=float32)}


  6%|▌         | 55719/1000000 [2:33:08<31:11:24,  8.41it/s]

{'loss': Array(0.180622, dtype=float32), 'loss_cross_entropy': Array(0.16848297, dtype=float32)}


  6%|▌         | 55729/1000000 [2:33:10<25:10:30, 10.42it/s]

{'loss': Array(0.16444361, dtype=float32), 'loss_cross_entropy': Array(0.15375404, dtype=float32)}


  6%|▌         | 55739/1000000 [2:33:11<23:55:11, 10.97it/s]

{'loss': Array(0.15886764, dtype=float32), 'loss_cross_entropy': Array(0.14818887, dtype=float32)}


  6%|▌         | 55749/1000000 [2:33:13<27:16:32,  9.62it/s]

{'loss': Array(0.14785786, dtype=float32), 'loss_cross_entropy': Array(0.13791354, dtype=float32)}


  6%|▌         | 55759/1000000 [2:33:14<24:51:35, 10.55it/s]

{'loss': Array(0.1586174, dtype=float32), 'loss_cross_entropy': Array(0.1479791, dtype=float32)}


  6%|▌         | 55769/1000000 [2:33:16<31:34:49,  8.31it/s]

{'loss': Array(0.17291941, dtype=float32), 'loss_cross_entropy': Array(0.16188149, dtype=float32)}


  6%|▌         | 55779/1000000 [2:33:17<25:22:42, 10.33it/s]

{'loss': Array(0.1764708, dtype=float32), 'loss_cross_entropy': Array(0.16500953, dtype=float32)}


  6%|▌         | 55789/1000000 [2:33:19<38:48:30,  6.76it/s]

{'loss': Array(0.16067682, dtype=float32), 'loss_cross_entropy': Array(0.14980845, dtype=float32)}


  6%|▌         | 55799/1000000 [2:33:20<26:57:11,  9.73it/s]

{'loss': Array(0.15771097, dtype=float32), 'loss_cross_entropy': Array(0.14752486, dtype=float32)}


  6%|▌         | 55809/1000000 [2:33:22<25:08:34, 10.43it/s]

{'loss': Array(0.14041966, dtype=float32), 'loss_cross_entropy': Array(0.13074325, dtype=float32)}


  6%|▌         | 55819/1000000 [2:33:23<35:02:42,  7.48it/s]

{'loss': Array(0.16165435, dtype=float32), 'loss_cross_entropy': Array(0.15203081, dtype=float32)}


  6%|▌         | 55829/1000000 [2:33:25<26:22:15,  9.95it/s]

{'loss': Array(0.19158135, dtype=float32), 'loss_cross_entropy': Array(0.18026924, dtype=float32)}


  6%|▌         | 55839/1000000 [2:33:26<24:44:46, 10.60it/s]

{'loss': Array(0.1670269, dtype=float32), 'loss_cross_entropy': Array(0.15658227, dtype=float32)}


  6%|▌         | 55849/1000000 [2:33:28<29:22:53,  8.93it/s]

{'loss': Array(0.1827916, dtype=float32), 'loss_cross_entropy': Array(0.17159301, dtype=float32)}


  6%|▌         | 55859/1000000 [2:33:29<25:45:14, 10.18it/s]

{'loss': Array(0.16984688, dtype=float32), 'loss_cross_entropy': Array(0.15937096, dtype=float32)}


  6%|▌         | 55869/1000000 [2:33:31<31:08:57,  8.42it/s]

{'loss': Array(0.16832305, dtype=float32), 'loss_cross_entropy': Array(0.15759407, dtype=float32)}


  6%|▌         | 55879/1000000 [2:33:32<25:09:52, 10.42it/s]

{'loss': Array(0.17312844, dtype=float32), 'loss_cross_entropy': Array(0.16140546, dtype=float32)}


  6%|▌         | 55889/1000000 [2:33:33<24:26:02, 10.73it/s]

{'loss': Array(0.15516727, dtype=float32), 'loss_cross_entropy': Array(0.1452434, dtype=float32)}


  6%|▌         | 55899/1000000 [2:33:35<27:28:22,  9.55it/s]

{'loss': Array(0.13533589, dtype=float32), 'loss_cross_entropy': Array(0.12589021, dtype=float32)}


  6%|▌         | 55909/1000000 [2:33:37<24:41:59, 10.62it/s]

{'loss': Array(0.1560099, dtype=float32), 'loss_cross_entropy': Array(0.14577182, dtype=float32)}


  6%|▌         | 55919/1000000 [2:33:38<33:57:35,  7.72it/s]

{'loss': Array(0.16967516, dtype=float32), 'loss_cross_entropy': Array(0.15860148, dtype=float32)}


  6%|▌         | 55929/1000000 [2:33:40<26:42:25,  9.82it/s]

{'loss': Array(0.15161292, dtype=float32), 'loss_cross_entropy': Array(0.14257066, dtype=float32)}


  6%|▌         | 55939/1000000 [2:33:41<24:45:41, 10.59it/s]

{'loss': Array(0.1519089, dtype=float32), 'loss_cross_entropy': Array(0.14232796, dtype=float32)}


  6%|▌         | 55949/1000000 [2:33:43<27:43:38,  9.46it/s]

{'loss': Array(0.15553819, dtype=float32), 'loss_cross_entropy': Array(0.14565563, dtype=float32)}


  6%|▌         | 55959/1000000 [2:33:44<25:09:37, 10.42it/s]

{'loss': Array(0.16014346, dtype=float32), 'loss_cross_entropy': Array(0.15032178, dtype=float32)}


  6%|▌         | 55969/1000000 [2:33:46<29:45:00,  8.81it/s]

{'loss': Array(0.16582619, dtype=float32), 'loss_cross_entropy': Array(0.15471165, dtype=float32)}


  6%|▌         | 55979/1000000 [2:33:47<25:03:47, 10.46it/s]

{'loss': Array(0.18100749, dtype=float32), 'loss_cross_entropy': Array(0.16999589, dtype=float32)}


  6%|▌         | 55989/1000000 [2:33:49<38:34:03,  6.80it/s]

{'loss': Array(0.16302697, dtype=float32), 'loss_cross_entropy': Array(0.15190162, dtype=float32)}


  6%|▌         | 55999/1000000 [2:33:50<26:40:12,  9.83it/s]

{'loss': Array(0.16299285, dtype=float32), 'loss_cross_entropy': Array(0.15274438, dtype=float32)}


  6%|▌         | 56009/1000000 [2:33:58<85:41:00,  3.06it/s] 

{'loss': Array(0.17803752, dtype=float32), 'loss_cross_entropy': Array(0.16739242, dtype=float32)}


  6%|▌         | 56019/1000000 [2:34:00<40:19:00,  6.50it/s]

{'loss': Array(0.18546389, dtype=float32), 'loss_cross_entropy': Array(0.17416935, dtype=float32)}


  6%|▌         | 56029/1000000 [2:34:01<27:59:00,  9.37it/s]

{'loss': Array(0.15317264, dtype=float32), 'loss_cross_entropy': Array(0.14339918, dtype=float32)}


  6%|▌         | 56039/1000000 [2:34:03<35:27:08,  7.40it/s]

{'loss': Array(0.16256557, dtype=float32), 'loss_cross_entropy': Array(0.15208179, dtype=float32)}


  6%|▌         | 56049/1000000 [2:34:04<27:54:21,  9.40it/s]

{'loss': Array(0.13503297, dtype=float32), 'loss_cross_entropy': Array(0.12651943, dtype=float32)}


  6%|▌         | 56059/1000000 [2:34:06<38:54:15,  6.74it/s]

{'loss': Array(0.17834853, dtype=float32), 'loss_cross_entropy': Array(0.16749309, dtype=float32)}


  6%|▌         | 56069/1000000 [2:34:07<26:28:54,  9.90it/s]

{'loss': Array(0.1629091, dtype=float32), 'loss_cross_entropy': Array(0.15244655, dtype=float32)}


  6%|▌         | 56079/1000000 [2:34:09<24:20:44, 10.77it/s]

{'loss': Array(0.18137366, dtype=float32), 'loss_cross_entropy': Array(0.17003591, dtype=float32)}


  6%|▌         | 56089/1000000 [2:34:11<34:24:01,  7.62it/s]

{'loss': Array(0.18699168, dtype=float32), 'loss_cross_entropy': Array(0.17583174, dtype=float32)}


  6%|▌         | 56099/1000000 [2:34:12<26:34:25,  9.87it/s]

{'loss': Array(0.16337885, dtype=float32), 'loss_cross_entropy': Array(0.15244892, dtype=float32)}


  6%|▌         | 56109/1000000 [2:34:13<25:56:16, 10.11it/s]

{'loss': Array(0.1849929, dtype=float32), 'loss_cross_entropy': Array(0.17343937, dtype=float32)}


  6%|▌         | 56119/1000000 [2:34:15<29:34:42,  8.86it/s]

{'loss': Array(0.17317279, dtype=float32), 'loss_cross_entropy': Array(0.16257326, dtype=float32)}


  6%|▌         | 56129/1000000 [2:34:17<26:05:07, 10.05it/s]

{'loss': Array(0.14726306, dtype=float32), 'loss_cross_entropy': Array(0.13732508, dtype=float32)}


  6%|▌         | 56139/1000000 [2:34:18<35:47:45,  7.32it/s]

{'loss': Array(0.17966919, dtype=float32), 'loss_cross_entropy': Array(0.16834307, dtype=float32)}


  6%|▌         | 56149/1000000 [2:34:20<26:33:21,  9.87it/s]

{'loss': Array(0.17616782, dtype=float32), 'loss_cross_entropy': Array(0.16516492, dtype=float32)}


  6%|▌         | 56159/1000000 [2:34:21<24:35:47, 10.66it/s]

{'loss': Array(0.14337783, dtype=float32), 'loss_cross_entropy': Array(0.1336479, dtype=float32)}


  6%|▌         | 56169/1000000 [2:34:23<27:25:47,  9.56it/s]

{'loss': Array(0.18574786, dtype=float32), 'loss_cross_entropy': Array(0.17369299, dtype=float32)}


  6%|▌         | 56179/1000000 [2:34:24<24:52:02, 10.54it/s]

{'loss': Array(0.15153621, dtype=float32), 'loss_cross_entropy': Array(0.14163749, dtype=float32)}


  6%|▌         | 56189/1000000 [2:34:25<24:06:07, 10.88it/s]

{'loss': Array(0.17910509, dtype=float32), 'loss_cross_entropy': Array(0.16733451, dtype=float32)}


  6%|▌         | 56199/1000000 [2:34:27<27:33:44,  9.51it/s]

{'loss': Array(0.17320083, dtype=float32), 'loss_cross_entropy': Array(0.1618921, dtype=float32)}


  6%|▌         | 56209/1000000 [2:34:29<24:45:50, 10.59it/s]

{'loss': Array(0.16262437, dtype=float32), 'loss_cross_entropy': Array(0.15269479, dtype=float32)}


  6%|▌         | 56219/1000000 [2:34:30<33:51:58,  7.74it/s]

{'loss': Array(0.15613185, dtype=float32), 'loss_cross_entropy': Array(0.14606695, dtype=float32)}


  6%|▌         | 56229/1000000 [2:34:32<25:28:43, 10.29it/s]

{'loss': Array(0.15306357, dtype=float32), 'loss_cross_entropy': Array(0.1427471, dtype=float32)}


  6%|▌         | 56239/1000000 [2:34:33<38:00:59,  6.90it/s]

{'loss': Array(0.1657688, dtype=float32), 'loss_cross_entropy': Array(0.15533772, dtype=float32)}


  6%|▌         | 56249/1000000 [2:34:35<27:07:13,  9.67it/s]

{'loss': Array(0.1680067, dtype=float32), 'loss_cross_entropy': Array(0.15770416, dtype=float32)}


  6%|▌         | 56259/1000000 [2:34:36<24:35:17, 10.66it/s]

{'loss': Array(0.16611357, dtype=float32), 'loss_cross_entropy': Array(0.15480654, dtype=float32)}


  6%|▌         | 56269/1000000 [2:34:38<27:44:39,  9.45it/s]

{'loss': Array(0.17540926, dtype=float32), 'loss_cross_entropy': Array(0.16385591, dtype=float32)}


  6%|▌         | 56279/1000000 [2:34:39<24:48:06, 10.57it/s]

{'loss': Array(0.18609701, dtype=float32), 'loss_cross_entropy': Array(0.17443073, dtype=float32)}


  6%|▌         | 56289/1000000 [2:34:41<40:20:20,  6.50it/s]

{'loss': Array(0.16548434, dtype=float32), 'loss_cross_entropy': Array(0.15524161, dtype=float32)}


  6%|▌         | 56299/1000000 [2:34:42<27:18:07,  9.60it/s]

{'loss': Array(0.16496526, dtype=float32), 'loss_cross_entropy': Array(0.15474415, dtype=float32)}


  6%|▌         | 56309/1000000 [2:34:44<25:28:25, 10.29it/s]

{'loss': Array(0.1686548, dtype=float32), 'loss_cross_entropy': Array(0.15752023, dtype=float32)}


  6%|▌         | 56319/1000000 [2:34:45<29:26:58,  8.90it/s]

{'loss': Array(0.18063174, dtype=float32), 'loss_cross_entropy': Array(0.16949843, dtype=float32)}


  6%|▌         | 56329/1000000 [2:34:47<26:04:04, 10.06it/s]

{'loss': Array(0.16454649, dtype=float32), 'loss_cross_entropy': Array(0.15402943, dtype=float32)}


  6%|▌         | 56339/1000000 [2:34:48<33:52:49,  7.74it/s]

{'loss': Array(0.17576326, dtype=float32), 'loss_cross_entropy': Array(0.16494893, dtype=float32)}


  6%|▌         | 56349/1000000 [2:34:50<25:30:19, 10.28it/s]

{'loss': Array(0.16045086, dtype=float32), 'loss_cross_entropy': Array(0.15047228, dtype=float32)}


  6%|▌         | 56359/1000000 [2:34:52<39:12:36,  6.69it/s]

{'loss': Array(0.16070046, dtype=float32), 'loss_cross_entropy': Array(0.15048687, dtype=float32)}


  6%|▌         | 56369/1000000 [2:34:53<26:13:11, 10.00it/s]

{'loss': Array(0.15331076, dtype=float32), 'loss_cross_entropy': Array(0.14328688, dtype=float32)}


  6%|▌         | 56379/1000000 [2:34:54<24:57:16, 10.50it/s]

{'loss': Array(0.17355923, dtype=float32), 'loss_cross_entropy': Array(0.1628519, dtype=float32)}


  6%|▌         | 56389/1000000 [2:34:56<31:44:03,  8.26it/s]

{'loss': Array(0.16309492, dtype=float32), 'loss_cross_entropy': Array(0.15293565, dtype=float32)}


  6%|▌         | 56399/1000000 [2:34:57<25:55:00, 10.11it/s]

{'loss': Array(0.16595934, dtype=float32), 'loss_cross_entropy': Array(0.15505403, dtype=float32)}


  6%|▌         | 56409/1000000 [2:34:59<35:23:29,  7.41it/s]

{'loss': Array(0.16787267, dtype=float32), 'loss_cross_entropy': Array(0.15672159, dtype=float32)}


  6%|▌         | 56419/1000000 [2:35:00<26:49:50,  9.77it/s]

{'loss': Array(0.15254629, dtype=float32), 'loss_cross_entropy': Array(0.14259474, dtype=float32)}


  6%|▌         | 56429/1000000 [2:35:02<26:03:11, 10.06it/s]

{'loss': Array(0.15801857, dtype=float32), 'loss_cross_entropy': Array(0.14817779, dtype=float32)}


  6%|▌         | 56439/1000000 [2:35:04<29:05:55,  9.01it/s]

{'loss': Array(0.17417845, dtype=float32), 'loss_cross_entropy': Array(0.16309023, dtype=float32)}


  6%|▌         | 56449/1000000 [2:35:05<26:06:49, 10.04it/s]

{'loss': Array(0.15496999, dtype=float32), 'loss_cross_entropy': Array(0.14534871, dtype=float32)}


  6%|▌         | 56459/1000000 [2:35:07<39:08:17,  6.70it/s]

{'loss': Array(0.15528558, dtype=float32), 'loss_cross_entropy': Array(0.1457476, dtype=float32)}


  6%|▌         | 56469/1000000 [2:35:08<27:04:22,  9.68it/s]

{'loss': Array(0.1574235, dtype=float32), 'loss_cross_entropy': Array(0.14760034, dtype=float32)}


  6%|▌         | 56479/1000000 [2:35:10<24:22:36, 10.75it/s]

{'loss': Array(0.1625457, dtype=float32), 'loss_cross_entropy': Array(0.15200587, dtype=float32)}


  6%|▌         | 56489/1000000 [2:35:11<30:40:12,  8.55it/s]

{'loss': Array(0.16359802, dtype=float32), 'loss_cross_entropy': Array(0.15387763, dtype=float32)}


  6%|▌         | 56499/1000000 [2:35:13<25:31:00, 10.27it/s]

{'loss': Array(0.15125833, dtype=float32), 'loss_cross_entropy': Array(0.14138679, dtype=float32)}


  6%|▌         | 56509/1000000 [2:35:21<96:25:29,  2.72it/s] 

{'loss': Array(0.15959711, dtype=float32), 'loss_cross_entropy': Array(0.14940022, dtype=float32)}


  6%|▌         | 56519/1000000 [2:35:22<36:30:42,  7.18it/s] 

{'loss': Array(0.18024279, dtype=float32), 'loss_cross_entropy': Array(0.16915214, dtype=float32)}


  6%|▌         | 56529/1000000 [2:35:24<40:33:57,  6.46it/s]

{'loss': Array(0.15991428, dtype=float32), 'loss_cross_entropy': Array(0.14928237, dtype=float32)}


  6%|▌         | 56539/1000000 [2:35:25<26:43:40,  9.81it/s]

{'loss': Array(0.15955532, dtype=float32), 'loss_cross_entropy': Array(0.14887069, dtype=float32)}


  6%|▌         | 56549/1000000 [2:35:27<38:37:45,  6.78it/s]

{'loss': Array(0.16869788, dtype=float32), 'loss_cross_entropy': Array(0.15815105, dtype=float32)}


  6%|▌         | 56559/1000000 [2:35:28<26:53:54,  9.74it/s]

{'loss': Array(0.17663649, dtype=float32), 'loss_cross_entropy': Array(0.16560256, dtype=float32)}


  6%|▌         | 56569/1000000 [2:35:30<24:17:12, 10.79it/s]

{'loss': Array(0.15054879, dtype=float32), 'loss_cross_entropy': Array(0.14031132, dtype=float32)}


  6%|▌         | 56579/1000000 [2:35:31<31:00:23,  8.45it/s]

{'loss': Array(0.18324175, dtype=float32), 'loss_cross_entropy': Array(0.17142735, dtype=float32)}


  6%|▌         | 56589/1000000 [2:35:33<25:09:58, 10.41it/s]

{'loss': Array(0.1863768, dtype=float32), 'loss_cross_entropy': Array(0.17482029, dtype=float32)}


  6%|▌         | 56599/1000000 [2:35:34<39:07:38,  6.70it/s]

{'loss': Array(0.16169523, dtype=float32), 'loss_cross_entropy': Array(0.15154754, dtype=float32)}


  6%|▌         | 56609/1000000 [2:35:36<26:43:40,  9.80it/s]

{'loss': Array(0.17830707, dtype=float32), 'loss_cross_entropy': Array(0.1665283, dtype=float32)}


  6%|▌         | 56619/1000000 [2:35:37<25:28:56, 10.28it/s]

{'loss': Array(0.14350772, dtype=float32), 'loss_cross_entropy': Array(0.13437243, dtype=float32)}


  6%|▌         | 56629/1000000 [2:35:39<34:34:57,  7.58it/s]

{'loss': Array(0.17432204, dtype=float32), 'loss_cross_entropy': Array(0.16442703, dtype=float32)}


  6%|▌         | 56639/1000000 [2:35:40<25:50:06, 10.14it/s]

{'loss': Array(0.16464002, dtype=float32), 'loss_cross_entropy': Array(0.15495948, dtype=float32)}


  6%|▌         | 56649/1000000 [2:35:42<24:17:06, 10.79it/s]

{'loss': Array(0.16032496, dtype=float32), 'loss_cross_entropy': Array(0.15072739, dtype=float32)}


  6%|▌         | 56659/1000000 [2:35:43<28:34:14,  9.17it/s]

{'loss': Array(0.16520014, dtype=float32), 'loss_cross_entropy': Array(0.15482256, dtype=float32)}


  6%|▌         | 56669/1000000 [2:35:45<24:53:58, 10.52it/s]

{'loss': Array(0.17348443, dtype=float32), 'loss_cross_entropy': Array(0.16204844, dtype=float32)}


  6%|▌         | 56679/1000000 [2:35:46<31:24:20,  8.34it/s]

{'loss': Array(0.15522896, dtype=float32), 'loss_cross_entropy': Array(0.14532249, dtype=float32)}


  6%|▌         | 56689/1000000 [2:35:48<26:27:09,  9.91it/s]

{'loss': Array(0.1300198, dtype=float32), 'loss_cross_entropy': Array(0.12128337, dtype=float32)}


  6%|▌         | 56699/1000000 [2:35:49<24:39:15, 10.63it/s]

{'loss': Array(0.14904095, dtype=float32), 'loss_cross_entropy': Array(0.13921407, dtype=float32)}


  6%|▌         | 56709/1000000 [2:35:51<27:20:06,  9.59it/s]

{'loss': Array(0.16974117, dtype=float32), 'loss_cross_entropy': Array(0.15929022, dtype=float32)}


  6%|▌         | 56719/1000000 [2:35:52<25:09:22, 10.42it/s]

{'loss': Array(0.16944776, dtype=float32), 'loss_cross_entropy': Array(0.15839627, dtype=float32)}


  6%|▌         | 56729/1000000 [2:35:54<34:41:47,  7.55it/s]

{'loss': Array(0.17257674, dtype=float32), 'loss_cross_entropy': Array(0.16148572, dtype=float32)}


  6%|▌         | 56739/1000000 [2:35:55<26:11:06, 10.01it/s]

{'loss': Array(0.17026477, dtype=float32), 'loss_cross_entropy': Array(0.15994005, dtype=float32)}


  6%|▌         | 56749/1000000 [2:35:57<24:09:41, 10.84it/s]

{'loss': Array(0.14966379, dtype=float32), 'loss_cross_entropy': Array(0.139993, dtype=float32)}


  6%|▌         | 56759/1000000 [2:35:58<28:11:30,  9.29it/s]

{'loss': Array(0.14956735, dtype=float32), 'loss_cross_entropy': Array(0.13936022, dtype=float32)}


  6%|▌         | 56769/1000000 [2:36:00<25:25:49, 10.30it/s]

{'loss': Array(0.16999106, dtype=float32), 'loss_cross_entropy': Array(0.15932779, dtype=float32)}


  6%|▌         | 56779/1000000 [2:36:02<30:37:57,  8.55it/s]

{'loss': Array(0.16331442, dtype=float32), 'loss_cross_entropy': Array(0.15264733, dtype=float32)}


  6%|▌         | 56789/1000000 [2:36:03<27:38:22,  9.48it/s]

{'loss': Array(0.16660817, dtype=float32), 'loss_cross_entropy': Array(0.15638612, dtype=float32)}


  6%|▌         | 56799/1000000 [2:36:05<39:48:31,  6.58it/s]

{'loss': Array(0.15248546, dtype=float32), 'loss_cross_entropy': Array(0.14275005, dtype=float32)}


  6%|▌         | 56809/1000000 [2:36:06<26:51:42,  9.75it/s]

{'loss': Array(0.17715834, dtype=float32), 'loss_cross_entropy': Array(0.16574799, dtype=float32)}


  6%|▌         | 56819/1000000 [2:36:08<24:58:25, 10.49it/s]

{'loss': Array(0.17091882, dtype=float32), 'loss_cross_entropy': Array(0.1603766, dtype=float32)}


  6%|▌         | 56829/1000000 [2:36:09<31:13:29,  8.39it/s]

{'loss': Array(0.17634675, dtype=float32), 'loss_cross_entropy': Array(0.16575356, dtype=float32)}


  6%|▌         | 56839/1000000 [2:36:11<25:59:11, 10.08it/s]

{'loss': Array(0.16427238, dtype=float32), 'loss_cross_entropy': Array(0.15385933, dtype=float32)}


  6%|▌         | 56849/1000000 [2:36:12<34:54:22,  7.51it/s]

{'loss': Array(0.16104154, dtype=float32), 'loss_cross_entropy': Array(0.15116084, dtype=float32)}


  6%|▌         | 56859/1000000 [2:36:14<26:05:34, 10.04it/s]

{'loss': Array(0.1533726, dtype=float32), 'loss_cross_entropy': Array(0.14320973, dtype=float32)}


  6%|▌         | 56869/1000000 [2:36:15<34:38:11,  7.56it/s]

{'loss': Array(0.17417572, dtype=float32), 'loss_cross_entropy': Array(0.16368887, dtype=float32)}


  6%|▌         | 56879/1000000 [2:36:17<25:38:40, 10.22it/s]

{'loss': Array(0.17703475, dtype=float32), 'loss_cross_entropy': Array(0.16623014, dtype=float32)}


  6%|▌         | 56889/1000000 [2:36:18<24:12:57, 10.82it/s]

{'loss': Array(0.16100381, dtype=float32), 'loss_cross_entropy': Array(0.15108843, dtype=float32)}


  6%|▌         | 56899/1000000 [2:36:20<29:38:55,  8.84it/s]

{'loss': Array(0.16105595, dtype=float32), 'loss_cross_entropy': Array(0.15110485, dtype=float32)}


  6%|▌         | 56909/1000000 [2:36:21<25:39:53, 10.21it/s]

{'loss': Array(0.16614406, dtype=float32), 'loss_cross_entropy': Array(0.15548134, dtype=float32)}


  6%|▌         | 56919/1000000 [2:36:23<30:51:33,  8.49it/s]

{'loss': Array(0.15786587, dtype=float32), 'loss_cross_entropy': Array(0.14692019, dtype=float32)}


  6%|▌         | 56929/1000000 [2:36:24<25:12:16, 10.39it/s]

{'loss': Array(0.18403976, dtype=float32), 'loss_cross_entropy': Array(0.17271681, dtype=float32)}


  6%|▌         | 56939/1000000 [2:36:26<38:34:39,  6.79it/s]

{'loss': Array(0.16457076, dtype=float32), 'loss_cross_entropy': Array(0.15392375, dtype=float32)}


  6%|▌         | 56949/1000000 [2:36:27<26:44:01,  9.80it/s]

{'loss': Array(0.1617874, dtype=float32), 'loss_cross_entropy': Array(0.15188435, dtype=float32)}


  6%|▌         | 56959/1000000 [2:36:29<38:44:34,  6.76it/s]

{'loss': Array(0.17052779, dtype=float32), 'loss_cross_entropy': Array(0.15946549, dtype=float32)}


  6%|▌         | 56969/1000000 [2:36:31<26:47:54,  9.77it/s]

{'loss': Array(0.17393613, dtype=float32), 'loss_cross_entropy': Array(0.1624271, dtype=float32)}


  6%|▌         | 56979/1000000 [2:36:32<24:33:59, 10.66it/s]

{'loss': Array(0.15685846, dtype=float32), 'loss_cross_entropy': Array(0.14692001, dtype=float32)}


  6%|▌         | 56989/1000000 [2:36:34<34:00:36,  7.70it/s]

{'loss': Array(0.17906718, dtype=float32), 'loss_cross_entropy': Array(0.16756813, dtype=float32)}


  6%|▌         | 56999/1000000 [2:36:35<25:54:25, 10.11it/s]

{'loss': Array(0.15894514, dtype=float32), 'loss_cross_entropy': Array(0.14884876, dtype=float32)}


  6%|▌         | 57009/1000000 [2:36:43<84:46:16,  3.09it/s] 

{'loss': Array(0.17369969, dtype=float32), 'loss_cross_entropy': Array(0.16281883, dtype=float32)}


  6%|▌         | 57019/1000000 [2:36:44<38:02:37,  6.89it/s] 

{'loss': Array(0.1444211, dtype=float32), 'loss_cross_entropy': Array(0.13464443, dtype=float32)}


  6%|▌         | 57029/1000000 [2:36:46<26:18:43,  9.95it/s]

{'loss': Array(0.1745709, dtype=float32), 'loss_cross_entropy': Array(0.16374941, dtype=float32)}


  6%|▌         | 57039/1000000 [2:36:47<31:24:01,  8.34it/s]

{'loss': Array(0.16683647, dtype=float32), 'loss_cross_entropy': Array(0.15656745, dtype=float32)}


  6%|▌         | 57049/1000000 [2:36:49<26:07:25, 10.03it/s]

{'loss': Array(0.16154894, dtype=float32), 'loss_cross_entropy': Array(0.1514819, dtype=float32)}


  6%|▌         | 57059/1000000 [2:36:51<34:43:16,  7.54it/s]

{'loss': Array(0.18359965, dtype=float32), 'loss_cross_entropy': Array(0.1715113, dtype=float32)}


  6%|▌         | 57069/1000000 [2:36:52<25:34:04, 10.24it/s]

{'loss': Array(0.17069052, dtype=float32), 'loss_cross_entropy': Array(0.160234, dtype=float32)}


  6%|▌         | 57079/1000000 [2:36:54<38:41:33,  6.77it/s]

{'loss': Array(0.1692509, dtype=float32), 'loss_cross_entropy': Array(0.15873258, dtype=float32)}


  6%|▌         | 57089/1000000 [2:36:55<26:39:47,  9.82it/s]

{'loss': Array(0.16613075, dtype=float32), 'loss_cross_entropy': Array(0.15489225, dtype=float32)}


  6%|▌         | 57099/1000000 [2:36:56<24:43:39, 10.59it/s]

{'loss': Array(0.149309, dtype=float32), 'loss_cross_entropy': Array(0.13934074, dtype=float32)}


  6%|▌         | 57109/1000000 [2:36:58<34:18:29,  7.63it/s]

{'loss': Array(0.16917676, dtype=float32), 'loss_cross_entropy': Array(0.15939486, dtype=float32)}


  6%|▌         | 57119/1000000 [2:36:59<27:10:51,  9.64it/s]

{'loss': Array(0.16825871, dtype=float32), 'loss_cross_entropy': Array(0.15700823, dtype=float32)}


  6%|▌         | 57129/1000000 [2:37:01<25:48:12, 10.15it/s]

{'loss': Array(0.1694309, dtype=float32), 'loss_cross_entropy': Array(0.15845339, dtype=float32)}


  6%|▌         | 57139/1000000 [2:37:03<28:09:50,  9.30it/s]

{'loss': Array(0.1523757, dtype=float32), 'loss_cross_entropy': Array(0.14247684, dtype=float32)}


  6%|▌         | 57149/1000000 [2:37:04<25:20:21, 10.34it/s]

{'loss': Array(0.17534266, dtype=float32), 'loss_cross_entropy': Array(0.16396686, dtype=float32)}


  6%|▌         | 57159/1000000 [2:37:06<31:09:29,  8.41it/s]

{'loss': Array(0.15031621, dtype=float32), 'loss_cross_entropy': Array(0.14005816, dtype=float32)}


  6%|▌         | 57169/1000000 [2:37:07<27:15:59,  9.61it/s]

{'loss': Array(0.15806861, dtype=float32), 'loss_cross_entropy': Array(0.14714305, dtype=float32)}


  6%|▌         | 57179/1000000 [2:37:09<38:50:22,  6.74it/s]

{'loss': Array(0.15556028, dtype=float32), 'loss_cross_entropy': Array(0.14577678, dtype=float32)}


  6%|▌         | 57189/1000000 [2:37:10<27:04:47,  9.67it/s]

{'loss': Array(0.15744635, dtype=float32), 'loss_cross_entropy': Array(0.14670281, dtype=float32)}


  6%|▌         | 57199/1000000 [2:37:12<25:00:25, 10.47it/s]

{'loss': Array(0.18632813, dtype=float32), 'loss_cross_entropy': Array(0.17446253, dtype=float32)}


  6%|▌         | 57209/1000000 [2:37:13<30:57:29,  8.46it/s]

{'loss': Array(0.13701324, dtype=float32), 'loss_cross_entropy': Array(0.12795497, dtype=float32)}


  6%|▌         | 57219/1000000 [2:37:15<25:16:38, 10.36it/s]

{'loss': Array(0.16349737, dtype=float32), 'loss_cross_entropy': Array(0.15302812, dtype=float32)}


  6%|▌         | 57229/1000000 [2:37:16<38:28:36,  6.81it/s]

{'loss': Array(0.15709764, dtype=float32), 'loss_cross_entropy': Array(0.14704005, dtype=float32)}


  6%|▌         | 57239/1000000 [2:37:18<26:34:26,  9.85it/s]

{'loss': Array(0.15582274, dtype=float32), 'loss_cross_entropy': Array(0.14581679, dtype=float32)}


  6%|▌         | 57249/1000000 [2:37:19<24:24:29, 10.73it/s]

{'loss': Array(0.16334742, dtype=float32), 'loss_cross_entropy': Array(0.15237407, dtype=float32)}


  6%|▌         | 57259/1000000 [2:37:21<28:49:35,  9.08it/s]

{'loss': Array(0.18726473, dtype=float32), 'loss_cross_entropy': Array(0.1763617, dtype=float32)}


  6%|▌         | 57269/1000000 [2:37:22<25:26:49, 10.29it/s]

{'loss': Array(0.17353581, dtype=float32), 'loss_cross_entropy': Array(0.1629575, dtype=float32)}


  6%|▌         | 57279/1000000 [2:37:24<34:38:33,  7.56it/s]

{'loss': Array(0.14766888, dtype=float32), 'loss_cross_entropy': Array(0.13734232, dtype=float32)}


  6%|▌         | 57289/1000000 [2:37:25<25:49:51, 10.14it/s]

{'loss': Array(0.16996565, dtype=float32), 'loss_cross_entropy': Array(0.15953156, dtype=float32)}


  6%|▌         | 57299/1000000 [2:37:27<24:49:30, 10.55it/s]

{'loss': Array(0.16953616, dtype=float32), 'loss_cross_entropy': Array(0.15847175, dtype=float32)}


  6%|▌         | 57309/1000000 [2:37:28<31:15:22,  8.38it/s]

{'loss': Array(0.20456521, dtype=float32), 'loss_cross_entropy': Array(0.19187239, dtype=float32)}


  6%|▌         | 57319/1000000 [2:37:30<25:15:58, 10.36it/s]

{'loss': Array(0.16105312, dtype=float32), 'loss_cross_entropy': Array(0.15110119, dtype=float32)}


  6%|▌         | 57329/1000000 [2:37:31<24:11:40, 10.82it/s]

{'loss': Array(0.15801017, dtype=float32), 'loss_cross_entropy': Array(0.14754175, dtype=float32)}


  6%|▌         | 57339/1000000 [2:37:33<27:32:56,  9.50it/s]

{'loss': Array(0.1572958, dtype=float32), 'loss_cross_entropy': Array(0.14670004, dtype=float32)}


  6%|▌         | 57349/1000000 [2:37:34<24:24:41, 10.73it/s]

{'loss': Array(0.16621554, dtype=float32), 'loss_cross_entropy': Array(0.15491663, dtype=float32)}


  6%|▌         | 57359/1000000 [2:37:36<31:15:32,  8.38it/s]

{'loss': Array(0.14356092, dtype=float32), 'loss_cross_entropy': Array(0.13391556, dtype=float32)}


  6%|▌         | 57369/1000000 [2:37:37<26:17:59,  9.96it/s]

{'loss': Array(0.17075478, dtype=float32), 'loss_cross_entropy': Array(0.15969191, dtype=float32)}


  6%|▌         | 57379/1000000 [2:37:39<39:11:49,  6.68it/s]

{'loss': Array(0.15807973, dtype=float32), 'loss_cross_entropy': Array(0.14858745, dtype=float32)}


  6%|▌         | 57389/1000000 [2:37:40<27:11:14,  9.63it/s]

{'loss': Array(0.17219876, dtype=float32), 'loss_cross_entropy': Array(0.16144513, dtype=float32)}


  6%|▌         | 57399/1000000 [2:37:42<38:49:03,  6.75it/s]

{'loss': Array(0.15931062, dtype=float32), 'loss_cross_entropy': Array(0.14927022, dtype=float32)}


  6%|▌         | 57409/1000000 [2:37:43<26:53:08,  9.74it/s]

{'loss': Array(0.1611462, dtype=float32), 'loss_cross_entropy': Array(0.15111618, dtype=float32)}


  6%|▌         | 57419/1000000 [2:37:45<24:47:12, 10.56it/s]

{'loss': Array(0.14044541, dtype=float32), 'loss_cross_entropy': Array(0.1317643, dtype=float32)}


  6%|▌         | 57429/1000000 [2:37:47<34:59:16,  7.48it/s]

{'loss': Array(0.16437338, dtype=float32), 'loss_cross_entropy': Array(0.15397325, dtype=float32)}


  6%|▌         | 57439/1000000 [2:37:48<26:07:28, 10.02it/s]

{'loss': Array(0.14951177, dtype=float32), 'loss_cross_entropy': Array(0.14022523, dtype=float32)}


  6%|▌         | 57449/1000000 [2:37:50<38:14:15,  6.85it/s]

{'loss': Array(0.1636881, dtype=float32), 'loss_cross_entropy': Array(0.15336452, dtype=float32)}


  6%|▌         | 57459/1000000 [2:37:51<26:18:31,  9.95it/s]

{'loss': Array(0.17728092, dtype=float32), 'loss_cross_entropy': Array(0.16677849, dtype=float32)}


  6%|▌         | 57469/1000000 [2:37:52<24:30:52, 10.68it/s]

{'loss': Array(0.18289962, dtype=float32), 'loss_cross_entropy': Array(0.17168352, dtype=float32)}


  6%|▌         | 57479/1000000 [2:37:54<27:39:09,  9.47it/s]

{'loss': Array(0.15260027, dtype=float32), 'loss_cross_entropy': Array(0.14286827, dtype=float32)}


  6%|▌         | 57489/1000000 [2:37:55<24:45:33, 10.57it/s]

{'loss': Array(0.17893992, dtype=float32), 'loss_cross_entropy': Array(0.16802803, dtype=float32)}


  6%|▌         | 57499/1000000 [2:37:57<27:35:09,  9.49it/s]

{'loss': Array(0.18384357, dtype=float32), 'loss_cross_entropy': Array(0.17183124, dtype=float32)}


  6%|▌         | 57509/1000000 [2:38:05<88:58:18,  2.94it/s] 

{'loss': Array(0.17941676, dtype=float32), 'loss_cross_entropy': Array(0.16822796, dtype=float32)}


  6%|▌         | 57519/1000000 [2:38:07<42:19:51,  6.18it/s]

{'loss': Array(0.15787116, dtype=float32), 'loss_cross_entropy': Array(0.14810108, dtype=float32)}


  6%|▌         | 57529/1000000 [2:38:08<27:21:29,  9.57it/s]

{'loss': Array(0.17166805, dtype=float32), 'loss_cross_entropy': Array(0.16035922, dtype=float32)}


  6%|▌         | 57539/1000000 [2:38:10<24:48:27, 10.55it/s]

{'loss': Array(0.14908904, dtype=float32), 'loss_cross_entropy': Array(0.13944678, dtype=float32)}


  6%|▌         | 57549/1000000 [2:38:11<29:16:36,  8.94it/s]

{'loss': Array(0.15207468, dtype=float32), 'loss_cross_entropy': Array(0.14215484, dtype=float32)}


  6%|▌         | 57559/1000000 [2:38:13<26:58:17,  9.71it/s]

{'loss': Array(0.1525052, dtype=float32), 'loss_cross_entropy': Array(0.1424767, dtype=float32)}


  6%|▌         | 57569/1000000 [2:38:14<38:45:44,  6.75it/s]

{'loss': Array(0.14832693, dtype=float32), 'loss_cross_entropy': Array(0.13867937, dtype=float32)}


  6%|▌         | 57579/1000000 [2:38:16<26:40:52,  9.81it/s]

{'loss': Array(0.18081968, dtype=float32), 'loss_cross_entropy': Array(0.16934167, dtype=float32)}


  6%|▌         | 57589/1000000 [2:38:17<24:42:13, 10.60it/s]

{'loss': Array(0.14891502, dtype=float32), 'loss_cross_entropy': Array(0.13908273, dtype=float32)}


  6%|▌         | 57599/1000000 [2:38:19<31:09:54,  8.40it/s]

{'loss': Array(0.16545995, dtype=float32), 'loss_cross_entropy': Array(0.1551148, dtype=float32)}


  6%|▌         | 57609/1000000 [2:38:20<25:27:59, 10.28it/s]

{'loss': Array(0.1629281, dtype=float32), 'loss_cross_entropy': Array(0.15287483, dtype=float32)}


  6%|▌         | 57619/1000000 [2:38:22<34:42:49,  7.54it/s]

{'loss': Array(0.17117716, dtype=float32), 'loss_cross_entropy': Array(0.16088003, dtype=float32)}


  6%|▌         | 57629/1000000 [2:38:23<26:21:56,  9.93it/s]

{'loss': Array(0.17655961, dtype=float32), 'loss_cross_entropy': Array(0.16543083, dtype=float32)}


  6%|▌         | 57639/1000000 [2:38:25<34:36:20,  7.56it/s]

{'loss': Array(0.18343365, dtype=float32), 'loss_cross_entropy': Array(0.17222683, dtype=float32)}


  6%|▌         | 57649/1000000 [2:38:26<26:10:31, 10.00it/s]

{'loss': Array(0.18096893, dtype=float32), 'loss_cross_entropy': Array(0.17046297, dtype=float32)}


  6%|▌         | 57659/1000000 [2:38:28<24:22:18, 10.74it/s]

{'loss': Array(0.15451886, dtype=float32), 'loss_cross_entropy': Array(0.14439586, dtype=float32)}


  6%|▌         | 57669/1000000 [2:38:30<28:54:32,  9.05it/s]

{'loss': Array(0.1470472, dtype=float32), 'loss_cross_entropy': Array(0.13738889, dtype=float32)}


  6%|▌         | 57679/1000000 [2:38:31<25:03:52, 10.44it/s]

{'loss': Array(0.15333371, dtype=float32), 'loss_cross_entropy': Array(0.14362521, dtype=float32)}


  6%|▌         | 57689/1000000 [2:38:33<31:33:02,  8.30it/s]

{'loss': Array(0.1641362, dtype=float32), 'loss_cross_entropy': Array(0.15363961, dtype=float32)}


  6%|▌         | 57699/1000000 [2:38:34<25:45:53, 10.16it/s]

{'loss': Array(0.1795714, dtype=float32), 'loss_cross_entropy': Array(0.16857114, dtype=float32)}


  6%|▌         | 57709/1000000 [2:38:36<39:05:32,  6.70it/s]

{'loss': Array(0.15773039, dtype=float32), 'loss_cross_entropy': Array(0.1469706, dtype=float32)}


  6%|▌         | 57719/1000000 [2:38:37<26:54:21,  9.73it/s]

{'loss': Array(0.17279108, dtype=float32), 'loss_cross_entropy': Array(0.1626052, dtype=float32)}


  6%|▌         | 57729/1000000 [2:38:39<39:02:01,  6.71it/s]

{'loss': Array(0.15864675, dtype=float32), 'loss_cross_entropy': Array(0.14896643, dtype=float32)}


  6%|▌         | 57739/1000000 [2:38:40<26:53:28,  9.73it/s]

{'loss': Array(0.17273091, dtype=float32), 'loss_cross_entropy': Array(0.16146134, dtype=float32)}


  6%|▌         | 57749/1000000 [2:38:42<24:52:31, 10.52it/s]

{'loss': Array(0.1741883, dtype=float32), 'loss_cross_entropy': Array(0.162911, dtype=float32)}


  6%|▌         | 57759/1000000 [2:38:43<34:24:15,  7.61it/s]

{'loss': Array(0.14594296, dtype=float32), 'loss_cross_entropy': Array(0.13640581, dtype=float32)}


  6%|▌         | 57769/1000000 [2:38:45<26:15:39,  9.97it/s]

{'loss': Array(0.15548871, dtype=float32), 'loss_cross_entropy': Array(0.14531367, dtype=float32)}


  6%|▌         | 57779/1000000 [2:38:46<39:17:37,  6.66it/s]

{'loss': Array(0.1613162, dtype=float32), 'loss_cross_entropy': Array(0.15138511, dtype=float32)}


  6%|▌         | 57789/1000000 [2:38:48<26:45:02,  9.78it/s]

{'loss': Array(0.12491416, dtype=float32), 'loss_cross_entropy': Array(0.11679502, dtype=float32)}


  6%|▌         | 57799/1000000 [2:38:49<25:16:29, 10.36it/s]

{'loss': Array(0.16532227, dtype=float32), 'loss_cross_entropy': Array(0.15541703, dtype=float32)}


  6%|▌         | 57809/1000000 [2:38:51<28:02:07,  9.34it/s]

{'loss': Array(0.17196658, dtype=float32), 'loss_cross_entropy': Array(0.16179107, dtype=float32)}


  6%|▌         | 57819/1000000 [2:38:52<25:17:39, 10.35it/s]

{'loss': Array(0.17254621, dtype=float32), 'loss_cross_entropy': Array(0.16125564, dtype=float32)}


  6%|▌         | 57829/1000000 [2:38:54<27:37:29,  9.47it/s]

{'loss': Array(0.16159157, dtype=float32), 'loss_cross_entropy': Array(0.15142356, dtype=float32)}


  6%|▌         | 57839/1000000 [2:38:55<25:25:23, 10.29it/s]

{'loss': Array(0.19031821, dtype=float32), 'loss_cross_entropy': Array(0.17833309, dtype=float32)}


  6%|▌         | 57849/1000000 [2:38:57<39:09:59,  6.68it/s]

{'loss': Array(0.17150761, dtype=float32), 'loss_cross_entropy': Array(0.16120094, dtype=float32)}


  6%|▌         | 57859/1000000 [2:38:59<27:40:39,  9.46it/s]

{'loss': Array(0.1526771, dtype=float32), 'loss_cross_entropy': Array(0.14326043, dtype=float32)}


  6%|▌         | 57869/1000000 [2:39:00<26:02:34, 10.05it/s]

{'loss': Array(0.17234759, dtype=float32), 'loss_cross_entropy': Array(0.16147234, dtype=float32)}


  6%|▌         | 57879/1000000 [2:39:02<33:09:22,  7.89it/s]

{'loss': Array(0.17429526, dtype=float32), 'loss_cross_entropy': Array(0.16339634, dtype=float32)}


  6%|▌         | 57889/1000000 [2:39:03<27:16:22,  9.60it/s]

{'loss': Array(0.14894754, dtype=float32), 'loss_cross_entropy': Array(0.13901259, dtype=float32)}


  6%|▌         | 57899/1000000 [2:39:05<40:14:19,  6.50it/s]

{'loss': Array(0.15572038, dtype=float32), 'loss_cross_entropy': Array(0.14589988, dtype=float32)}


  6%|▌         | 57909/1000000 [2:39:07<27:33:03,  9.50it/s]

{'loss': Array(0.16570126, dtype=float32), 'loss_cross_entropy': Array(0.15639667, dtype=float32)}


  6%|▌         | 57919/1000000 [2:39:08<25:02:19, 10.45it/s]

{'loss': Array(0.15963374, dtype=float32), 'loss_cross_entropy': Array(0.14967322, dtype=float32)}


  6%|▌         | 57929/1000000 [2:39:10<29:51:08,  8.77it/s]

{'loss': Array(0.17497724, dtype=float32), 'loss_cross_entropy': Array(0.16400103, dtype=float32)}


  6%|▌         | 57939/1000000 [2:39:11<25:33:06, 10.24it/s]

{'loss': Array(0.18357144, dtype=float32), 'loss_cross_entropy': Array(0.1718002, dtype=float32)}


  6%|▌         | 57949/1000000 [2:39:13<35:00:52,  7.47it/s]

{'loss': Array(0.20621657, dtype=float32), 'loss_cross_entropy': Array(0.19411801, dtype=float32)}


  6%|▌         | 57959/1000000 [2:39:14<26:29:11,  9.88it/s]

{'loss': Array(0.15713514, dtype=float32), 'loss_cross_entropy': Array(0.14805983, dtype=float32)}


  6%|▌         | 57969/1000000 [2:39:16<24:32:56, 10.66it/s]

{'loss': Array(0.16545892, dtype=float32), 'loss_cross_entropy': Array(0.1539477, dtype=float32)}


  6%|▌         | 57979/1000000 [2:39:17<31:27:22,  8.32it/s]

{'loss': Array(0.16989566, dtype=float32), 'loss_cross_entropy': Array(0.15976013, dtype=float32)}


  6%|▌         | 57989/1000000 [2:39:19<26:03:11, 10.04it/s]

{'loss': Array(0.16264054, dtype=float32), 'loss_cross_entropy': Array(0.15231511, dtype=float32)}


  6%|▌         | 57999/1000000 [2:39:20<24:42:09, 10.59it/s]

{'loss': Array(0.16538958, dtype=float32), 'loss_cross_entropy': Array(0.1551141, dtype=float32)}


  6%|▌         | 58009/1000000 [2:39:28<89:18:40,  2.93it/s] 

{'loss': Array(0.16236615, dtype=float32), 'loss_cross_entropy': Array(0.15205532, dtype=float32)}


  6%|▌         | 58019/1000000 [2:39:30<35:15:24,  7.42it/s]

{'loss': Array(0.18170772, dtype=float32), 'loss_cross_entropy': Array(0.17089358, dtype=float32)}


  6%|▌         | 58029/1000000 [2:39:31<35:59:59,  7.27it/s]

{'loss': Array(0.14915, dtype=float32), 'loss_cross_entropy': Array(0.13934804, dtype=float32)}


  6%|▌         | 58039/1000000 [2:39:33<26:00:24, 10.06it/s]

{'loss': Array(0.17700163, dtype=float32), 'loss_cross_entropy': Array(0.16625848, dtype=float32)}


  6%|▌         | 58049/1000000 [2:39:35<39:04:35,  6.70it/s]

{'loss': Array(0.1537239, dtype=float32), 'loss_cross_entropy': Array(0.14341556, dtype=float32)}


  6%|▌         | 58059/1000000 [2:39:36<26:45:44,  9.78it/s]

{'loss': Array(0.1656916, dtype=float32), 'loss_cross_entropy': Array(0.15527247, dtype=float32)}


  6%|▌         | 58069/1000000 [2:39:37<24:42:43, 10.59it/s]

{'loss': Array(0.15581436, dtype=float32), 'loss_cross_entropy': Array(0.14640383, dtype=float32)}


  6%|▌         | 58079/1000000 [2:39:39<28:48:35,  9.08it/s]

{'loss': Array(0.14599924, dtype=float32), 'loss_cross_entropy': Array(0.13661335, dtype=float32)}


  6%|▌         | 58089/1000000 [2:39:40<24:37:32, 10.62it/s]

{'loss': Array(0.18414408, dtype=float32), 'loss_cross_entropy': Array(0.17211805, dtype=float32)}


  6%|▌         | 58099/1000000 [2:39:42<23:56:52, 10.93it/s]

{'loss': Array(0.17907147, dtype=float32), 'loss_cross_entropy': Array(0.1685115, dtype=float32)}


  6%|▌         | 58109/1000000 [2:39:43<27:26:09,  9.54it/s]

{'loss': Array(0.17049423, dtype=float32), 'loss_cross_entropy': Array(0.15934643, dtype=float32)}


  6%|▌         | 58119/1000000 [2:39:45<25:06:38, 10.42it/s]

{'loss': Array(0.15925881, dtype=float32), 'loss_cross_entropy': Array(0.14869857, dtype=float32)}


  6%|▌         | 58129/1000000 [2:39:46<38:21:41,  6.82it/s]

{'loss': Array(0.17376086, dtype=float32), 'loss_cross_entropy': Array(0.1621517, dtype=float32)}


  6%|▌         | 58139/1000000 [2:39:48<26:22:09,  9.92it/s]

{'loss': Array(0.17345218, dtype=float32), 'loss_cross_entropy': Array(0.16229066, dtype=float32)}


  6%|▌         | 58149/1000000 [2:39:49<24:34:39, 10.64it/s]

{'loss': Array(0.17097984, dtype=float32), 'loss_cross_entropy': Array(0.16064632, dtype=float32)}


  6%|▌         | 58159/1000000 [2:39:51<29:21:06,  8.91it/s]

{'loss': Array(0.16020814, dtype=float32), 'loss_cross_entropy': Array(0.14939561, dtype=float32)}


  6%|▌         | 58169/1000000 [2:39:52<25:05:44, 10.42it/s]

{'loss': Array(0.16752115, dtype=float32), 'loss_cross_entropy': Array(0.15627542, dtype=float32)}


  6%|▌         | 58179/1000000 [2:39:54<31:01:25,  8.43it/s]

{'loss': Array(0.1590043, dtype=float32), 'loss_cross_entropy': Array(0.14826386, dtype=float32)}


  6%|▌         | 58189/1000000 [2:39:55<25:10:04, 10.39it/s]

{'loss': Array(0.1583458, dtype=float32), 'loss_cross_entropy': Array(0.14793468, dtype=float32)}


  6%|▌         | 58199/1000000 [2:39:57<38:31:33,  6.79it/s]

{'loss': Array(0.18023185, dtype=float32), 'loss_cross_entropy': Array(0.16826263, dtype=float32)}


  6%|▌         | 58209/1000000 [2:39:58<26:20:38,  9.93it/s]

{'loss': Array(0.17445305, dtype=float32), 'loss_cross_entropy': Array(0.16337878, dtype=float32)}


  6%|▌         | 58219/1000000 [2:40:00<40:21:28,  6.48it/s]

{'loss': Array(0.16655485, dtype=float32), 'loss_cross_entropy': Array(0.15587513, dtype=float32)}


  6%|▌         | 58229/1000000 [2:40:02<28:44:51,  9.10it/s]

{'loss': Array(0.16496217, dtype=float32), 'loss_cross_entropy': Array(0.15432289, dtype=float32)}


  6%|▌         | 58239/1000000 [2:40:03<25:12:55, 10.37it/s]

{'loss': Array(0.17629063, dtype=float32), 'loss_cross_entropy': Array(0.16519926, dtype=float32)}


  6%|▌         | 58249/1000000 [2:40:05<35:51:46,  7.29it/s]

{'loss': Array(0.15322073, dtype=float32), 'loss_cross_entropy': Array(0.14286707, dtype=float32)}


  6%|▌         | 58259/1000000 [2:40:06<26:48:40,  9.76it/s]

{'loss': Array(0.16668688, dtype=float32), 'loss_cross_entropy': Array(0.15620737, dtype=float32)}


  6%|▌         | 58269/1000000 [2:40:08<38:53:57,  6.72it/s]

{'loss': Array(0.14867194, dtype=float32), 'loss_cross_entropy': Array(0.1390387, dtype=float32)}


  6%|▌         | 58279/1000000 [2:40:09<27:14:38,  9.60it/s]

{'loss': Array(0.14197803, dtype=float32), 'loss_cross_entropy': Array(0.13256426, dtype=float32)}


  6%|▌         | 58289/1000000 [2:40:11<25:01:31, 10.45it/s]

{'loss': Array(0.17251317, dtype=float32), 'loss_cross_entropy': Array(0.16149762, dtype=float32)}


  6%|▌         | 58299/1000000 [2:40:12<27:51:50,  9.39it/s]

{'loss': Array(0.15550724, dtype=float32), 'loss_cross_entropy': Array(0.14550683, dtype=float32)}


  6%|▌         | 58309/1000000 [2:40:14<24:54:30, 10.50it/s]

{'loss': Array(0.16799569, dtype=float32), 'loss_cross_entropy': Array(0.15751675, dtype=float32)}


  6%|▌         | 58319/1000000 [2:40:15<27:36:51,  9.47it/s]

{'loss': Array(0.18285494, dtype=float32), 'loss_cross_entropy': Array(0.17206153, dtype=float32)}


  6%|▌         | 58329/1000000 [2:40:17<24:46:10, 10.56it/s]

{'loss': Array(0.15510534, dtype=float32), 'loss_cross_entropy': Array(0.14573877, dtype=float32)}


  6%|▌         | 58339/1000000 [2:40:19<38:54:00,  6.72it/s]

{'loss': Array(0.1514069, dtype=float32), 'loss_cross_entropy': Array(0.14156565, dtype=float32)}


  6%|▌         | 58349/1000000 [2:40:20<26:48:24,  9.76it/s]

{'loss': Array(0.18226378, dtype=float32), 'loss_cross_entropy': Array(0.17154543, dtype=float32)}


  6%|▌         | 58359/1000000 [2:40:21<26:45:18,  9.78it/s]

{'loss': Array(0.16339166, dtype=float32), 'loss_cross_entropy': Array(0.1534067, dtype=float32)}


  6%|▌         | 58369/1000000 [2:40:23<31:22:39,  8.34it/s]

{'loss': Array(0.13901024, dtype=float32), 'loss_cross_entropy': Array(0.13055567, dtype=float32)}


  6%|▌         | 58379/1000000 [2:40:24<25:13:13, 10.37it/s]

{'loss': Array(0.16333663, dtype=float32), 'loss_cross_entropy': Array(0.15327232, dtype=float32)}


  6%|▌         | 58389/1000000 [2:40:26<38:10:04,  6.85it/s]

{'loss': Array(0.15686934, dtype=float32), 'loss_cross_entropy': Array(0.14717525, dtype=float32)}


  6%|▌         | 58399/1000000 [2:40:27<26:51:31,  9.74it/s]

{'loss': Array(0.149255, dtype=float32), 'loss_cross_entropy': Array(0.13968031, dtype=float32)}


  6%|▌         | 58409/1000000 [2:40:29<24:49:15, 10.54it/s]

{'loss': Array(0.1698649, dtype=float32), 'loss_cross_entropy': Array(0.15891199, dtype=float32)}


  6%|▌         | 58419/1000000 [2:40:30<28:58:16,  9.03it/s]

{'loss': Array(0.17388175, dtype=float32), 'loss_cross_entropy': Array(0.16361506, dtype=float32)}


  6%|▌         | 58429/1000000 [2:40:32<25:02:29, 10.44it/s]

{'loss': Array(0.15472712, dtype=float32), 'loss_cross_entropy': Array(0.14456674, dtype=float32)}


  6%|▌         | 58439/1000000 [2:40:34<34:51:27,  7.50it/s]

{'loss': Array(0.18820861, dtype=float32), 'loss_cross_entropy': Array(0.17767976, dtype=float32)}


  6%|▌         | 58449/1000000 [2:40:35<25:38:17, 10.20it/s]

{'loss': Array(0.17240274, dtype=float32), 'loss_cross_entropy': Array(0.16133028, dtype=float32)}


  6%|▌         | 58459/1000000 [2:40:36<23:57:03, 10.92it/s]

{'loss': Array(0.15809621, dtype=float32), 'loss_cross_entropy': Array(0.1480681, dtype=float32)}


  6%|▌         | 58469/1000000 [2:40:38<31:18:05,  8.36it/s]

{'loss': Array(0.16577125, dtype=float32), 'loss_cross_entropy': Array(0.15486756, dtype=float32)}


  6%|▌         | 58479/1000000 [2:40:39<25:20:38, 10.32it/s]

{'loss': Array(0.16924933, dtype=float32), 'loss_cross_entropy': Array(0.15853973, dtype=float32)}


  6%|▌         | 58489/1000000 [2:40:41<24:09:21, 10.83it/s]

{'loss': Array(0.1763788, dtype=float32), 'loss_cross_entropy': Array(0.16524895, dtype=float32)}


  6%|▌         | 58499/1000000 [2:40:42<27:35:26,  9.48it/s]

{'loss': Array(0.1486094, dtype=float32), 'loss_cross_entropy': Array(0.13940749, dtype=float32)}


  6%|▌         | 58509/1000000 [2:40:50<84:28:11,  3.10it/s] 

{'loss': Array(0.18356809, dtype=float32), 'loss_cross_entropy': Array(0.17230782, dtype=float32)}


  6%|▌         | 58519/1000000 [2:40:52<41:23:33,  6.32it/s]

{'loss': Array(0.14878011, dtype=float32), 'loss_cross_entropy': Array(0.13909772, dtype=float32)}


  6%|▌         | 58529/1000000 [2:40:53<27:04:40,  9.66it/s]

{'loss': Array(0.16621785, dtype=float32), 'loss_cross_entropy': Array(0.15584993, dtype=float32)}


  6%|▌         | 58539/1000000 [2:40:55<35:17:16,  7.41it/s]

{'loss': Array(0.18020271, dtype=float32), 'loss_cross_entropy': Array(0.16918582, dtype=float32)}


  6%|▌         | 58549/1000000 [2:40:56<26:11:31,  9.98it/s]

{'loss': Array(0.1718733, dtype=float32), 'loss_cross_entropy': Array(0.16092925, dtype=float32)}


  6%|▌         | 58559/1000000 [2:40:58<24:24:01, 10.72it/s]

{'loss': Array(0.16155104, dtype=float32), 'loss_cross_entropy': Array(0.15091707, dtype=float32)}


  6%|▌         | 58569/1000000 [2:40:59<28:55:26,  9.04it/s]

{'loss': Array(0.16470018, dtype=float32), 'loss_cross_entropy': Array(0.15394965, dtype=float32)}


  6%|▌         | 58579/1000000 [2:41:01<25:11:22, 10.38it/s]

{'loss': Array(0.16711676, dtype=float32), 'loss_cross_entropy': Array(0.15713684, dtype=float32)}


  6%|▌         | 58589/1000000 [2:41:02<38:59:35,  6.71it/s]

{'loss': Array(0.16946068, dtype=float32), 'loss_cross_entropy': Array(0.15898538, dtype=float32)}


  6%|▌         | 58599/1000000 [2:41:04<26:53:52,  9.72it/s]

{'loss': Array(0.15316486, dtype=float32), 'loss_cross_entropy': Array(0.14268859, dtype=float32)}


  6%|▌         | 58609/1000000 [2:41:05<25:45:48, 10.15it/s]

{'loss': Array(0.14668684, dtype=float32), 'loss_cross_entropy': Array(0.13730104, dtype=float32)}


  6%|▌         | 58619/1000000 [2:41:07<32:07:24,  8.14it/s]

{'loss': Array(0.15333387, dtype=float32), 'loss_cross_entropy': Array(0.1435211, dtype=float32)}


  6%|▌         | 58629/1000000 [2:41:08<26:10:20,  9.99it/s]

{'loss': Array(0.1598119, dtype=float32), 'loss_cross_entropy': Array(0.14961651, dtype=float32)}


  6%|▌         | 58639/1000000 [2:41:10<35:13:17,  7.42it/s]

{'loss': Array(0.16209109, dtype=float32), 'loss_cross_entropy': Array(0.15194045, dtype=float32)}


  6%|▌         | 58649/1000000 [2:41:12<26:16:57,  9.95it/s]

{'loss': Array(0.16592808, dtype=float32), 'loss_cross_entropy': Array(0.15531014, dtype=float32)}


  6%|▌         | 58659/1000000 [2:41:13<24:38:19, 10.61it/s]

{'loss': Array(0.1492881, dtype=float32), 'loss_cross_entropy': Array(0.13988267, dtype=float32)}


  6%|▌         | 58669/1000000 [2:41:15<27:27:29,  9.52it/s]

{'loss': Array(0.16979584, dtype=float32), 'loss_cross_entropy': Array(0.15930574, dtype=float32)}


  6%|▌         | 58679/1000000 [2:41:16<24:26:42, 10.70it/s]

{'loss': Array(0.15616125, dtype=float32), 'loss_cross_entropy': Array(0.14601661, dtype=float32)}


  6%|▌         | 58689/1000000 [2:41:18<38:30:58,  6.79it/s]

{'loss': Array(0.17616515, dtype=float32), 'loss_cross_entropy': Array(0.16581161, dtype=float32)}


  6%|▌         | 58699/1000000 [2:41:19<27:13:05,  9.61it/s]

{'loss': Array(0.16565892, dtype=float32), 'loss_cross_entropy': Array(0.15523456, dtype=float32)}


  6%|▌         | 58709/1000000 [2:41:20<24:59:02, 10.47it/s]

{'loss': Array(0.16841839, dtype=float32), 'loss_cross_entropy': Array(0.15806185, dtype=float32)}


  6%|▌         | 58719/1000000 [2:41:22<30:45:56,  8.50it/s]

{'loss': Array(0.19108005, dtype=float32), 'loss_cross_entropy': Array(0.18003704, dtype=float32)}


  6%|▌         | 58729/1000000 [2:41:23<25:08:21, 10.40it/s]

{'loss': Array(0.1765714, dtype=float32), 'loss_cross_entropy': Array(0.16538244, dtype=float32)}


  6%|▌         | 58739/1000000 [2:41:25<33:43:16,  7.75it/s]

{'loss': Array(0.16438532, dtype=float32), 'loss_cross_entropy': Array(0.15418129, dtype=float32)}


  6%|▌         | 58749/1000000 [2:41:26<25:24:29, 10.29it/s]

{'loss': Array(0.16549675, dtype=float32), 'loss_cross_entropy': Array(0.15512352, dtype=float32)}


  6%|▌         | 58759/1000000 [2:41:28<33:59:26,  7.69it/s]

{'loss': Array(0.14930038, dtype=float32), 'loss_cross_entropy': Array(0.13992527, dtype=float32)}


  6%|▌         | 58769/1000000 [2:41:29<26:23:41,  9.91it/s]

{'loss': Array(0.15938784, dtype=float32), 'loss_cross_entropy': Array(0.14895579, dtype=float32)}


  6%|▌         | 58779/1000000 [2:41:31<24:24:50, 10.71it/s]

{'loss': Array(0.14879692, dtype=float32), 'loss_cross_entropy': Array(0.1388961, dtype=float32)}


  6%|▌         | 58789/1000000 [2:41:33<29:56:22,  8.73it/s]

{'loss': Array(0.17769778, dtype=float32), 'loss_cross_entropy': Array(0.16656552, dtype=float32)}


  6%|▌         | 58799/1000000 [2:41:34<25:09:07, 10.39it/s]

{'loss': Array(0.16480432, dtype=float32), 'loss_cross_entropy': Array(0.15461223, dtype=float32)}


  6%|▌         | 58809/1000000 [2:41:36<31:51:00,  8.21it/s]

{'loss': Array(0.17428708, dtype=float32), 'loss_cross_entropy': Array(0.16411771, dtype=float32)}


  6%|▌         | 58819/1000000 [2:41:37<25:15:29, 10.35it/s]

{'loss': Array(0.16192994, dtype=float32), 'loss_cross_entropy': Array(0.15150858, dtype=float32)}


  6%|▌         | 58829/1000000 [2:41:39<38:48:05,  6.74it/s]

{'loss': Array(0.15948631, dtype=float32), 'loss_cross_entropy': Array(0.14865382, dtype=float32)}


  6%|▌         | 58839/1000000 [2:41:40<27:01:55,  9.67it/s]

{'loss': Array(0.16988747, dtype=float32), 'loss_cross_entropy': Array(0.15892667, dtype=float32)}


  6%|▌         | 58849/1000000 [2:41:42<38:43:45,  6.75it/s]

{'loss': Array(0.17692144, dtype=float32), 'loss_cross_entropy': Array(0.16515082, dtype=float32)}


  6%|▌         | 58859/1000000 [2:41:43<26:11:36,  9.98it/s]

{'loss': Array(0.1714422, dtype=float32), 'loss_cross_entropy': Array(0.16019818, dtype=float32)}


  6%|▌         | 58869/1000000 [2:41:44<24:19:06, 10.75it/s]

{'loss': Array(0.16205238, dtype=float32), 'loss_cross_entropy': Array(0.15258749, dtype=float32)}


  6%|▌         | 58879/1000000 [2:41:46<34:03:10,  7.68it/s]

{'loss': Array(0.15409638, dtype=float32), 'loss_cross_entropy': Array(0.14430459, dtype=float32)}


  6%|▌         | 58889/1000000 [2:41:47<25:35:55, 10.21it/s]

{'loss': Array(0.16102104, dtype=float32), 'loss_cross_entropy': Array(0.15086518, dtype=float32)}


  6%|▌         | 58899/1000000 [2:41:49<38:25:47,  6.80it/s]

{'loss': Array(0.16138195, dtype=float32), 'loss_cross_entropy': Array(0.15129422, dtype=float32)}


  6%|▌         | 58909/1000000 [2:41:51<26:38:15,  9.81it/s]

{'loss': Array(0.1711191, dtype=float32), 'loss_cross_entropy': Array(0.16069244, dtype=float32)}


  6%|▌         | 58919/1000000 [2:41:52<24:35:00, 10.63it/s]

{'loss': Array(0.16064589, dtype=float32), 'loss_cross_entropy': Array(0.1499757, dtype=float32)}


  6%|▌         | 58929/1000000 [2:41:54<27:14:48,  9.59it/s]

{'loss': Array(0.1514596, dtype=float32), 'loss_cross_entropy': Array(0.14218761, dtype=float32)}


  6%|▌         | 58939/1000000 [2:41:55<24:12:31, 10.80it/s]

{'loss': Array(0.148916, dtype=float32), 'loss_cross_entropy': Array(0.13901193, dtype=float32)}


  6%|▌         | 58949/1000000 [2:41:57<28:07:47,  9.29it/s]

{'loss': Array(0.17302848, dtype=float32), 'loss_cross_entropy': Array(0.16194563, dtype=float32)}


  6%|▌         | 58959/1000000 [2:41:58<24:50:54, 10.52it/s]

{'loss': Array(0.1484486, dtype=float32), 'loss_cross_entropy': Array(0.13839225, dtype=float32)}


  6%|▌         | 58969/1000000 [2:42:00<39:44:14,  6.58it/s]

{'loss': Array(0.14473516, dtype=float32), 'loss_cross_entropy': Array(0.13629514, dtype=float32)}


  6%|▌         | 58979/1000000 [2:42:01<28:11:53,  9.27it/s]

{'loss': Array(0.17940684, dtype=float32), 'loss_cross_entropy': Array(0.16842934, dtype=float32)}


  6%|▌         | 58989/1000000 [2:42:03<26:12:44,  9.97it/s]

{'loss': Array(0.1361123, dtype=float32), 'loss_cross_entropy': Array(0.1271587, dtype=float32)}


  6%|▌         | 58999/1000000 [2:42:04<32:37:24,  8.01it/s]

{'loss': Array(0.15262641, dtype=float32), 'loss_cross_entropy': Array(0.14252535, dtype=float32)}


  6%|▌         | 59009/1000000 [2:42:12<85:08:16,  3.07it/s] 

{'loss': Array(0.16848293, dtype=float32), 'loss_cross_entropy': Array(0.15796688, dtype=float32)}


  6%|▌         | 59019/1000000 [2:42:13<34:41:21,  7.53it/s]

{'loss': Array(0.18265341, dtype=float32), 'loss_cross_entropy': Array(0.17162974, dtype=float32)}


  6%|▌         | 59029/1000000 [2:42:15<30:50:38,  8.47it/s]

{'loss': Array(0.15323809, dtype=float32), 'loss_cross_entropy': Array(0.14288399, dtype=float32)}


  6%|▌         | 59039/1000000 [2:42:17<25:31:19, 10.24it/s]

{'loss': Array(0.18379654, dtype=float32), 'loss_cross_entropy': Array(0.17258112, dtype=float32)}


  6%|▌         | 59049/1000000 [2:42:18<34:21:44,  7.61it/s]

{'loss': Array(0.15576288, dtype=float32), 'loss_cross_entropy': Array(0.14553173, dtype=float32)}


  6%|▌         | 59059/1000000 [2:42:20<25:51:39, 10.11it/s]

{'loss': Array(0.16101329, dtype=float32), 'loss_cross_entropy': Array(0.15122707, dtype=float32)}


  6%|▌         | 59069/1000000 [2:42:21<38:42:48,  6.75it/s]

{'loss': Array(0.16103789, dtype=float32), 'loss_cross_entropy': Array(0.15152775, dtype=float32)}


  6%|▌         | 59079/1000000 [2:42:23<26:51:50,  9.73it/s]

{'loss': Array(0.15385747, dtype=float32), 'loss_cross_entropy': Array(0.1432088, dtype=float32)}


  6%|▌         | 59089/1000000 [2:42:24<24:49:07, 10.53it/s]

{'loss': Array(0.16378818, dtype=float32), 'loss_cross_entropy': Array(0.1535541, dtype=float32)}


  6%|▌         | 59099/1000000 [2:42:26<38:10:19,  6.85it/s]

{'loss': Array(0.20138223, dtype=float32), 'loss_cross_entropy': Array(0.18894725, dtype=float32)}


  6%|▌         | 59109/1000000 [2:42:27<26:39:29,  9.80it/s]

{'loss': Array(0.18086533, dtype=float32), 'loss_cross_entropy': Array(0.16975413, dtype=float32)}


  6%|▌         | 59119/1000000 [2:42:29<25:05:17, 10.42it/s]

{'loss': Array(0.14454913, dtype=float32), 'loss_cross_entropy': Array(0.13540135, dtype=float32)}


  6%|▌         | 59129/1000000 [2:42:30<31:05:21,  8.41it/s]

{'loss': Array(0.16152547, dtype=float32), 'loss_cross_entropy': Array(0.15023689, dtype=float32)}


  6%|▌         | 59139/1000000 [2:42:32<25:45:14, 10.15it/s]

{'loss': Array(0.16029476, dtype=float32), 'loss_cross_entropy': Array(0.1506456, dtype=float32)}


  6%|▌         | 59149/1000000 [2:42:33<38:15:30,  6.83it/s]

{'loss': Array(0.16206357, dtype=float32), 'loss_cross_entropy': Array(0.15233119, dtype=float32)}


  6%|▌         | 59159/1000000 [2:42:35<26:46:09,  9.76it/s]

{'loss': Array(0.16686746, dtype=float32), 'loss_cross_entropy': Array(0.15601622, dtype=float32)}


  6%|▌         | 59169/1000000 [2:42:36<24:35:01, 10.63it/s]

{'loss': Array(0.15825267, dtype=float32), 'loss_cross_entropy': Array(0.14843899, dtype=float32)}


  6%|▌         | 59179/1000000 [2:42:38<29:32:14,  8.85it/s]

{'loss': Array(0.161723, dtype=float32), 'loss_cross_entropy': Array(0.15224336, dtype=float32)}


  6%|▌         | 59189/1000000 [2:42:39<25:24:03, 10.29it/s]

{'loss': Array(0.14594479, dtype=float32), 'loss_cross_entropy': Array(0.13658054, dtype=float32)}


  6%|▌         | 59199/1000000 [2:42:41<35:01:19,  7.46it/s]

{'loss': Array(0.1636993, dtype=float32), 'loss_cross_entropy': Array(0.15248452, dtype=float32)}


  6%|▌         | 59209/1000000 [2:42:42<25:54:41, 10.09it/s]

{'loss': Array(0.1555476, dtype=float32), 'loss_cross_entropy': Array(0.14504114, dtype=float32)}


  6%|▌         | 59219/1000000 [2:42:44<24:15:33, 10.77it/s]

{'loss': Array(0.1519418, dtype=float32), 'loss_cross_entropy': Array(0.14193283, dtype=float32)}


  6%|▌         | 59229/1000000 [2:42:45<30:45:51,  8.49it/s]

{'loss': Array(0.16775848, dtype=float32), 'loss_cross_entropy': Array(0.15719111, dtype=float32)}


  6%|▌         | 59239/1000000 [2:42:47<25:05:56, 10.41it/s]

{'loss': Array(0.1693614, dtype=float32), 'loss_cross_entropy': Array(0.15889803, dtype=float32)}


  6%|▌         | 59249/1000000 [2:42:48<24:02:49, 10.87it/s]

{'loss': Array(0.17488118, dtype=float32), 'loss_cross_entropy': Array(0.16417377, dtype=float32)}


  6%|▌         | 59259/1000000 [2:42:50<27:58:41,  9.34it/s]

{'loss': Array(0.16234681, dtype=float32), 'loss_cross_entropy': Array(0.15185778, dtype=float32)}


  6%|▌         | 59269/1000000 [2:42:51<25:17:10, 10.33it/s]

{'loss': Array(0.17818853, dtype=float32), 'loss_cross_entropy': Array(0.16756886, dtype=float32)}


  6%|▌         | 59279/1000000 [2:42:53<31:06:36,  8.40it/s]

{'loss': Array(0.16885173, dtype=float32), 'loss_cross_entropy': Array(0.15865342, dtype=float32)}


  6%|▌         | 59289/1000000 [2:42:54<25:12:44, 10.36it/s]

{'loss': Array(0.16465096, dtype=float32), 'loss_cross_entropy': Array(0.15421702, dtype=float32)}


  6%|▌         | 59299/1000000 [2:42:56<38:40:16,  6.76it/s]

{'loss': Array(0.15520506, dtype=float32), 'loss_cross_entropy': Array(0.14507839, dtype=float32)}


  6%|▌         | 59309/1000000 [2:42:57<26:51:17,  9.73it/s]

{'loss': Array(0.16846791, dtype=float32), 'loss_cross_entropy': Array(0.1575941, dtype=float32)}


  6%|▌         | 59319/1000000 [2:42:59<39:40:08,  6.59it/s]

{'loss': Array(0.14660238, dtype=float32), 'loss_cross_entropy': Array(0.1376531, dtype=float32)}


  6%|▌         | 59329/1000000 [2:43:00<27:53:14,  9.37it/s]

{'loss': Array(0.16143611, dtype=float32), 'loss_cross_entropy': Array(0.15170878, dtype=float32)}


  6%|▌         | 59339/1000000 [2:43:02<26:08:44,  9.99it/s]

{'loss': Array(0.14828077, dtype=float32), 'loss_cross_entropy': Array(0.13805173, dtype=float32)}


  6%|▌         | 59349/1000000 [2:43:04<34:56:59,  7.48it/s]

{'loss': Array(0.16154109, dtype=float32), 'loss_cross_entropy': Array(0.15146734, dtype=float32)}


  6%|▌         | 59359/1000000 [2:43:05<26:33:57,  9.84it/s]

{'loss': Array(0.16878486, dtype=float32), 'loss_cross_entropy': Array(0.15860955, dtype=float32)}


  6%|▌         | 59369/1000000 [2:43:07<39:35:06,  6.60it/s]

{'loss': Array(0.17000555, dtype=float32), 'loss_cross_entropy': Array(0.16027637, dtype=float32)}


  6%|▌         | 59379/1000000 [2:43:08<26:43:18,  9.78it/s]

{'loss': Array(0.15973718, dtype=float32), 'loss_cross_entropy': Array(0.15009235, dtype=float32)}


  6%|▌         | 59389/1000000 [2:43:09<24:35:00, 10.63it/s]

{'loss': Array(0.17479132, dtype=float32), 'loss_cross_entropy': Array(0.16397123, dtype=float32)}


  6%|▌         | 59399/1000000 [2:43:11<26:57:00,  9.69it/s]

{'loss': Array(0.15549265, dtype=float32), 'loss_cross_entropy': Array(0.14566784, dtype=float32)}


  6%|▌         | 59409/1000000 [2:43:12<24:53:23, 10.50it/s]

{'loss': Array(0.13285537, dtype=float32), 'loss_cross_entropy': Array(0.12398165, dtype=float32)}


  6%|▌         | 59419/1000000 [2:43:14<27:44:29,  9.42it/s]

{'loss': Array(0.16659397, dtype=float32), 'loss_cross_entropy': Array(0.15605257, dtype=float32)}


  6%|▌         | 59429/1000000 [2:43:15<24:46:00, 10.55it/s]

{'loss': Array(0.17645115, dtype=float32), 'loss_cross_entropy': Array(0.16500895, dtype=float32)}


  6%|▌         | 59439/1000000 [2:43:17<38:38:56,  6.76it/s]

{'loss': Array(0.18327151, dtype=float32), 'loss_cross_entropy': Array(0.1720011, dtype=float32)}


  6%|▌         | 59449/1000000 [2:43:19<26:49:02,  9.74it/s]

{'loss': Array(0.16127592, dtype=float32), 'loss_cross_entropy': Array(0.15075205, dtype=float32)}


  6%|▌         | 59459/1000000 [2:43:20<24:33:46, 10.64it/s]

{'loss': Array(0.17854056, dtype=float32), 'loss_cross_entropy': Array(0.16774985, dtype=float32)}


  6%|▌         | 59469/1000000 [2:43:22<30:52:27,  8.46it/s]

{'loss': Array(0.16981144, dtype=float32), 'loss_cross_entropy': Array(0.15863328, dtype=float32)}


  6%|▌         | 59479/1000000 [2:43:23<25:13:15, 10.36it/s]

{'loss': Array(0.16139081, dtype=float32), 'loss_cross_entropy': Array(0.15098245, dtype=float32)}


  6%|▌         | 59489/1000000 [2:43:25<37:53:09,  6.90it/s]

{'loss': Array(0.16449109, dtype=float32), 'loss_cross_entropy': Array(0.15387529, dtype=float32)}


  6%|▌         | 59499/1000000 [2:43:26<26:15:37,  9.95it/s]

{'loss': Array(0.16312194, dtype=float32), 'loss_cross_entropy': Array(0.15277603, dtype=float32)}


  6%|▌         | 59509/1000000 [2:43:34<86:22:09,  3.02it/s] 

{'loss': Array(0.16059823, dtype=float32), 'loss_cross_entropy': Array(0.15024154, dtype=float32)}


  6%|▌         | 59519/1000000 [2:43:36<39:43:40,  6.58it/s]

{'loss': Array(0.16970204, dtype=float32), 'loss_cross_entropy': Array(0.15919633, dtype=float32)}


  6%|▌         | 59529/1000000 [2:43:37<26:36:29,  9.82it/s]

{'loss': Array(0.16912296, dtype=float32), 'loss_cross_entropy': Array(0.15884976, dtype=float32)}


  6%|▌         | 59539/1000000 [2:43:39<31:20:32,  8.34it/s]

{'loss': Array(0.1791787, dtype=float32), 'loss_cross_entropy': Array(0.16856098, dtype=float32)}


  6%|▌         | 59549/1000000 [2:43:40<24:59:47, 10.45it/s]

{'loss': Array(0.17556025, dtype=float32), 'loss_cross_entropy': Array(0.16458623, dtype=float32)}


  6%|▌         | 59559/1000000 [2:43:42<40:08:20,  6.51it/s]

{'loss': Array(0.14960307, dtype=float32), 'loss_cross_entropy': Array(0.13936637, dtype=float32)}


  6%|▌         | 59569/1000000 [2:43:43<26:58:51,  9.68it/s]

{'loss': Array(0.16085778, dtype=float32), 'loss_cross_entropy': Array(0.1518137, dtype=float32)}


  6%|▌         | 59579/1000000 [2:43:45<24:46:49, 10.54it/s]

{'loss': Array(0.1412486, dtype=float32), 'loss_cross_entropy': Array(0.13193142, dtype=float32)}


  6%|▌         | 59589/1000000 [2:43:46<27:19:14,  9.56it/s]

{'loss': Array(0.15908836, dtype=float32), 'loss_cross_entropy': Array(0.14841226, dtype=float32)}


  6%|▌         | 59599/1000000 [2:43:48<24:25:36, 10.69it/s]

{'loss': Array(0.13857432, dtype=float32), 'loss_cross_entropy': Array(0.12882493, dtype=float32)}


  6%|▌         | 59609/1000000 [2:43:49<38:05:50,  6.86it/s]

{'loss': Array(0.1658417, dtype=float32), 'loss_cross_entropy': Array(0.15596184, dtype=float32)}


  6%|▌         | 59619/1000000 [2:43:51<26:42:43,  9.78it/s]

{'loss': Array(0.16153449, dtype=float32), 'loss_cross_entropy': Array(0.1515765, dtype=float32)}


  6%|▌         | 59629/1000000 [2:43:52<24:41:21, 10.58it/s]

{'loss': Array(0.16934185, dtype=float32), 'loss_cross_entropy': Array(0.1586955, dtype=float32)}


  6%|▌         | 59639/1000000 [2:43:54<29:25:19,  8.88it/s]

{'loss': Array(0.16082998, dtype=float32), 'loss_cross_entropy': Array(0.15066142, dtype=float32)}


  6%|▌         | 59649/1000000 [2:43:55<24:55:50, 10.48it/s]

{'loss': Array(0.15429251, dtype=float32), 'loss_cross_entropy': Array(0.14495409, dtype=float32)}


  6%|▌         | 59659/1000000 [2:43:57<34:13:29,  7.63it/s]

{'loss': Array(0.16058616, dtype=float32), 'loss_cross_entropy': Array(0.15059726, dtype=float32)}


  6%|▌         | 59669/1000000 [2:43:58<25:54:21, 10.08it/s]

{'loss': Array(0.16058189, dtype=float32), 'loss_cross_entropy': Array(0.15051544, dtype=float32)}


  6%|▌         | 59679/1000000 [2:44:00<40:58:50,  6.37it/s]

{'loss': Array(0.14190881, dtype=float32), 'loss_cross_entropy': Array(0.13254763, dtype=float32)}


  6%|▌         | 59689/1000000 [2:44:01<27:57:05,  9.34it/s]

{'loss': Array(0.1502541, dtype=float32), 'loss_cross_entropy': Array(0.14066721, dtype=float32)}


  6%|▌         | 59699/1000000 [2:44:03<26:20:04,  9.92it/s]

{'loss': Array(0.15046567, dtype=float32), 'loss_cross_entropy': Array(0.14154108, dtype=float32)}


  6%|▌         | 59709/1000000 [2:44:05<32:28:51,  8.04it/s]

{'loss': Array(0.16097905, dtype=float32), 'loss_cross_entropy': Array(0.15090989, dtype=float32)}


  6%|▌         | 59719/1000000 [2:44:06<25:31:20, 10.23it/s]

{'loss': Array(0.13235982, dtype=float32), 'loss_cross_entropy': Array(0.12394569, dtype=float32)}


  6%|▌         | 59729/1000000 [2:44:08<34:36:56,  7.55it/s]

{'loss': Array(0.15694718, dtype=float32), 'loss_cross_entropy': Array(0.14652179, dtype=float32)}


  6%|▌         | 59739/1000000 [2:44:09<26:04:08, 10.02it/s]

{'loss': Array(0.15594427, dtype=float32), 'loss_cross_entropy': Array(0.14617132, dtype=float32)}


  6%|▌         | 59749/1000000 [2:44:10<25:10:37, 10.37it/s]

{'loss': Array(0.1671494, dtype=float32), 'loss_cross_entropy': Array(0.15631838, dtype=float32)}


  6%|▌         | 59759/1000000 [2:44:12<27:40:07,  9.44it/s]

{'loss': Array(0.1308492, dtype=float32), 'loss_cross_entropy': Array(0.12196314, dtype=float32)}


  6%|▌         | 59769/1000000 [2:44:14<24:54:29, 10.49it/s]

{'loss': Array(0.17062323, dtype=float32), 'loss_cross_entropy': Array(0.1600957, dtype=float32)}


  6%|▌         | 59779/1000000 [2:44:15<39:05:12,  6.68it/s]

{'loss': Array(0.1685267, dtype=float32), 'loss_cross_entropy': Array(0.15823099, dtype=float32)}


  6%|▌         | 59789/1000000 [2:44:17<26:47:02,  9.75it/s]

{'loss': Array(0.17329161, dtype=float32), 'loss_cross_entropy': Array(0.16276616, dtype=float32)}


  6%|▌         | 59799/1000000 [2:44:18<24:42:00, 10.57it/s]

{'loss': Array(0.17392485, dtype=float32), 'loss_cross_entropy': Array(0.16327962, dtype=float32)}


  6%|▌         | 59809/1000000 [2:44:20<31:29:31,  8.29it/s]

{'loss': Array(0.15226774, dtype=float32), 'loss_cross_entropy': Array(0.1423949, dtype=float32)}


  6%|▌         | 59819/1000000 [2:44:21<25:20:36, 10.30it/s]

{'loss': Array(0.18119016, dtype=float32), 'loss_cross_entropy': Array(0.17040436, dtype=float32)}


  6%|▌         | 59829/1000000 [2:44:23<34:24:05,  7.59it/s]

{'loss': Array(0.17650655, dtype=float32), 'loss_cross_entropy': Array(0.16556565, dtype=float32)}


  6%|▌         | 59839/1000000 [2:44:24<26:11:58,  9.97it/s]

{'loss': Array(0.15097886, dtype=float32), 'loss_cross_entropy': Array(0.14205879, dtype=float32)}


  6%|▌         | 59849/1000000 [2:44:26<34:43:16,  7.52it/s]

{'loss': Array(0.14037535, dtype=float32), 'loss_cross_entropy': Array(0.13097154, dtype=float32)}


  6%|▌         | 59859/1000000 [2:44:27<25:56:30, 10.07it/s]

{'loss': Array(0.16490765, dtype=float32), 'loss_cross_entropy': Array(0.15458342, dtype=float32)}


  6%|▌         | 59869/1000000 [2:44:29<24:17:28, 10.75it/s]

{'loss': Array(0.16015433, dtype=float32), 'loss_cross_entropy': Array(0.14987421, dtype=float32)}


  6%|▌         | 59879/1000000 [2:44:30<29:09:21,  8.96it/s]

{'loss': Array(0.14828153, dtype=float32), 'loss_cross_entropy': Array(0.13885956, dtype=float32)}


  6%|▌         | 59889/1000000 [2:44:32<25:02:05, 10.43it/s]

{'loss': Array(0.17267112, dtype=float32), 'loss_cross_entropy': Array(0.16183233, dtype=float32)}


  6%|▌         | 59899/1000000 [2:44:33<31:26:45,  8.30it/s]

{'loss': Array(0.14978373, dtype=float32), 'loss_cross_entropy': Array(0.13974428, dtype=float32)}


  6%|▌         | 59909/1000000 [2:44:35<25:33:30, 10.22it/s]

{'loss': Array(0.16433334, dtype=float32), 'loss_cross_entropy': Array(0.15368043, dtype=float32)}


  6%|▌         | 59919/1000000 [2:44:37<39:20:26,  6.64it/s]

{'loss': Array(0.16859964, dtype=float32), 'loss_cross_entropy': Array(0.15806134, dtype=float32)}


  6%|▌         | 59929/1000000 [2:44:38<26:54:40,  9.70it/s]

{'loss': Array(0.16360536, dtype=float32), 'loss_cross_entropy': Array(0.15259442, dtype=float32)}


  6%|▌         | 59939/1000000 [2:44:40<38:34:29,  6.77it/s]

{'loss': Array(0.1643471, dtype=float32), 'loss_cross_entropy': Array(0.15315436, dtype=float32)}


  6%|▌         | 59949/1000000 [2:44:41<26:38:42,  9.80it/s]

{'loss': Array(0.18799984, dtype=float32), 'loss_cross_entropy': Array(0.17723225, dtype=float32)}


  6%|▌         | 59959/1000000 [2:44:42<25:17:56, 10.32it/s]

{'loss': Array(0.1743753, dtype=float32), 'loss_cross_entropy': Array(0.16337048, dtype=float32)}


  6%|▌         | 59969/1000000 [2:44:44<34:00:17,  7.68it/s]

{'loss': Array(0.16974017, dtype=float32), 'loss_cross_entropy': Array(0.15878282, dtype=float32)}


  6%|▌         | 59979/1000000 [2:44:46<25:47:24, 10.12it/s]

{'loss': Array(0.16499582, dtype=float32), 'loss_cross_entropy': Array(0.15523775, dtype=float32)}


  6%|▌         | 59989/1000000 [2:44:47<38:11:53,  6.84it/s]

{'loss': Array(0.17938851, dtype=float32), 'loss_cross_entropy': Array(0.16874817, dtype=float32)}


  6%|▌         | 59999/1000000 [2:44:49<26:07:53,  9.99it/s]

{'loss': Array(0.15269576, dtype=float32), 'loss_cross_entropy': Array(0.14263201, dtype=float32)}


  6%|▌         | 60009/1000000 [2:44:57<86:15:03,  3.03it/s] 

{'loss': Array(0.15338413, dtype=float32), 'loss_cross_entropy': Array(0.143068, dtype=float32)}


  6%|▌         | 60019/1000000 [2:44:58<37:43:33,  6.92it/s] 

{'loss': Array(0.14968416, dtype=float32), 'loss_cross_entropy': Array(0.14053763, dtype=float32)}


  6%|▌         | 60029/1000000 [2:45:00<27:45:04,  9.41it/s]

{'loss': Array(0.17483732, dtype=float32), 'loss_cross_entropy': Array(0.1644018, dtype=float32)}


  6%|▌         | 60039/1000000 [2:45:01<29:36:11,  8.82it/s]

{'loss': Array(0.17068458, dtype=float32), 'loss_cross_entropy': Array(0.15883373, dtype=float32)}


  6%|▌         | 60049/1000000 [2:45:03<26:20:11,  9.91it/s]

{'loss': Array(0.15591101, dtype=float32), 'loss_cross_entropy': Array(0.14560005, dtype=float32)}


  6%|▌         | 60059/1000000 [2:45:05<39:55:25,  6.54it/s]

{'loss': Array(0.14803992, dtype=float32), 'loss_cross_entropy': Array(0.13891254, dtype=float32)}


  6%|▌         | 60069/1000000 [2:45:06<26:55:26,  9.70it/s]

{'loss': Array(0.14677231, dtype=float32), 'loss_cross_entropy': Array(0.13711856, dtype=float32)}


  6%|▌         | 60079/1000000 [2:45:07<24:43:07, 10.56it/s]

{'loss': Array(0.16912888, dtype=float32), 'loss_cross_entropy': Array(0.15919797, dtype=float32)}


  6%|▌         | 60089/1000000 [2:45:09<31:20:26,  8.33it/s]

{'loss': Array(0.16831283, dtype=float32), 'loss_cross_entropy': Array(0.15794551, dtype=float32)}


  6%|▌         | 60099/1000000 [2:45:10<25:13:51, 10.35it/s]

{'loss': Array(0.1579466, dtype=float32), 'loss_cross_entropy': Array(0.14794855, dtype=float32)}


  6%|▌         | 60109/1000000 [2:45:12<38:46:10,  6.73it/s]

{'loss': Array(0.13815333, dtype=float32), 'loss_cross_entropy': Array(0.12929738, dtype=float32)}


  6%|▌         | 60119/1000000 [2:45:13<27:10:28,  9.61it/s]

{'loss': Array(0.16182037, dtype=float32), 'loss_cross_entropy': Array(0.1513354, dtype=float32)}


  6%|▌         | 60129/1000000 [2:45:15<24:27:11, 10.68it/s]

{'loss': Array(0.15270156, dtype=float32), 'loss_cross_entropy': Array(0.14347394, dtype=float32)}


  6%|▌         | 60139/1000000 [2:45:17<28:58:00,  9.01it/s]

{'loss': Array(0.16425759, dtype=float32), 'loss_cross_entropy': Array(0.15428856, dtype=float32)}


  6%|▌         | 60149/1000000 [2:45:18<24:44:57, 10.55it/s]

{'loss': Array(0.15072787, dtype=float32), 'loss_cross_entropy': Array(0.14099294, dtype=float32)}


  6%|▌         | 60159/1000000 [2:45:20<33:57:49,  7.69it/s]

{'loss': Array(0.1751406, dtype=float32), 'loss_cross_entropy': Array(0.16379844, dtype=float32)}


  6%|▌         | 60169/1000000 [2:45:21<25:31:47, 10.23it/s]

{'loss': Array(0.15626077, dtype=float32), 'loss_cross_entropy': Array(0.14662348, dtype=float32)}


  6%|▌         | 60179/1000000 [2:45:22<24:31:34, 10.64it/s]

{'loss': Array(0.14693661, dtype=float32), 'loss_cross_entropy': Array(0.13706498, dtype=float32)}


  6%|▌         | 60189/1000000 [2:45:24<31:42:04,  8.23it/s]

{'loss': Array(0.1658849, dtype=float32), 'loss_cross_entropy': Array(0.15538225, dtype=float32)}


  6%|▌         | 60199/1000000 [2:45:25<25:56:22, 10.06it/s]

{'loss': Array(0.1676611, dtype=float32), 'loss_cross_entropy': Array(0.15759192, dtype=float32)}


  6%|▌         | 60209/1000000 [2:45:27<24:31:06, 10.65it/s]

{'loss': Array(0.14701645, dtype=float32), 'loss_cross_entropy': Array(0.13749234, dtype=float32)}


  6%|▌         | 60219/1000000 [2:45:28<27:48:35,  9.39it/s]

{'loss': Array(0.16424838, dtype=float32), 'loss_cross_entropy': Array(0.15446642, dtype=float32)}


  6%|▌         | 60229/1000000 [2:45:30<24:49:54, 10.51it/s]

{'loss': Array(0.16933587, dtype=float32), 'loss_cross_entropy': Array(0.15859722, dtype=float32)}


  6%|▌         | 60239/1000000 [2:45:32<30:55:12,  8.44it/s]

{'loss': Array(0.16600142, dtype=float32), 'loss_cross_entropy': Array(0.15507638, dtype=float32)}


  6%|▌         | 60249/1000000 [2:45:33<25:13:35, 10.35it/s]

{'loss': Array(0.13084929, dtype=float32), 'loss_cross_entropy': Array(0.12218126, dtype=float32)}


  6%|▌         | 60259/1000000 [2:45:35<38:03:19,  6.86it/s]

{'loss': Array(0.17559576, dtype=float32), 'loss_cross_entropy': Array(0.16437297, dtype=float32)}


  6%|▌         | 60269/1000000 [2:45:36<26:06:00, 10.00it/s]

{'loss': Array(0.17012636, dtype=float32), 'loss_cross_entropy': Array(0.16055611, dtype=float32)}


  6%|▌         | 60279/1000000 [2:45:38<39:15:18,  6.65it/s]

{'loss': Array(0.16743493, dtype=float32), 'loss_cross_entropy': Array(0.15767093, dtype=float32)}


  6%|▌         | 60289/1000000 [2:45:39<26:30:45,  9.85it/s]

{'loss': Array(0.16779079, dtype=float32), 'loss_cross_entropy': Array(0.15747474, dtype=float32)}


  6%|▌         | 60299/1000000 [2:45:40<24:59:13, 10.45it/s]

{'loss': Array(0.16217461, dtype=float32), 'loss_cross_entropy': Array(0.15161042, dtype=float32)}


  6%|▌         | 60309/1000000 [2:45:42<33:41:19,  7.75it/s]

{'loss': Array(0.12914228, dtype=float32), 'loss_cross_entropy': Array(0.12081879, dtype=float32)}


  6%|▌         | 60319/1000000 [2:45:43<26:24:32,  9.88it/s]

{'loss': Array(0.1579435, dtype=float32), 'loss_cross_entropy': Array(0.1477872, dtype=float32)}


  6%|▌         | 60329/1000000 [2:45:45<38:22:17,  6.80it/s]

{'loss': Array(0.17386535, dtype=float32), 'loss_cross_entropy': Array(0.16375685, dtype=float32)}


  6%|▌         | 60339/1000000 [2:45:46<26:26:26,  9.87it/s]

{'loss': Array(0.16287659, dtype=float32), 'loss_cross_entropy': Array(0.1525605, dtype=float32)}


  6%|▌         | 60349/1000000 [2:45:48<24:12:09, 10.78it/s]

{'loss': Array(0.16767626, dtype=float32), 'loss_cross_entropy': Array(0.15677096, dtype=float32)}


  6%|▌         | 60359/1000000 [2:45:49<27:09:40,  9.61it/s]

{'loss': Array(0.14767867, dtype=float32), 'loss_cross_entropy': Array(0.13752995, dtype=float32)}


  6%|▌         | 60369/1000000 [2:45:51<24:45:50, 10.54it/s]

{'loss': Array(0.15901512, dtype=float32), 'loss_cross_entropy': Array(0.14939165, dtype=float32)}


  6%|▌         | 60379/1000000 [2:45:53<27:47:22,  9.39it/s]

{'loss': Array(0.15951774, dtype=float32), 'loss_cross_entropy': Array(0.14994404, dtype=float32)}


  6%|▌         | 60389/1000000 [2:45:54<25:35:52, 10.20it/s]

{'loss': Array(0.1552487, dtype=float32), 'loss_cross_entropy': Array(0.14508133, dtype=float32)}


  6%|▌         | 60399/1000000 [2:45:56<39:21:34,  6.63it/s]

{'loss': Array(0.15981226, dtype=float32), 'loss_cross_entropy': Array(0.14952493, dtype=float32)}


  6%|▌         | 60409/1000000 [2:45:57<26:38:54,  9.79it/s]

{'loss': Array(0.16662814, dtype=float32), 'loss_cross_entropy': Array(0.1560583, dtype=float32)}


  6%|▌         | 60419/1000000 [2:45:58<24:40:01, 10.58it/s]

{'loss': Array(0.15599652, dtype=float32), 'loss_cross_entropy': Array(0.14591546, dtype=float32)}


  6%|▌         | 60429/1000000 [2:46:00<32:06:40,  8.13it/s]

{'loss': Array(0.18382616, dtype=float32), 'loss_cross_entropy': Array(0.17228417, dtype=float32)}


  6%|▌         | 60439/1000000 [2:46:01<25:45:04, 10.13it/s]

{'loss': Array(0.16803233, dtype=float32), 'loss_cross_entropy': Array(0.15766905, dtype=float32)}


  6%|▌         | 60449/1000000 [2:46:03<38:44:50,  6.74it/s]

{'loss': Array(0.16385381, dtype=float32), 'loss_cross_entropy': Array(0.15322267, dtype=float32)}


  6%|▌         | 60459/1000000 [2:46:05<27:25:27,  9.52it/s]

{'loss': Array(0.15961044, dtype=float32), 'loss_cross_entropy': Array(0.14972906, dtype=float32)}


  6%|▌         | 60469/1000000 [2:46:06<25:06:33, 10.39it/s]

{'loss': Array(0.16020595, dtype=float32), 'loss_cross_entropy': Array(0.15023433, dtype=float32)}


  6%|▌         | 60479/1000000 [2:46:08<29:35:28,  8.82it/s]

{'loss': Array(0.16893607, dtype=float32), 'loss_cross_entropy': Array(0.15883629, dtype=float32)}


  6%|▌         | 60489/1000000 [2:46:09<26:25:29,  9.88it/s]

{'loss': Array(0.16336907, dtype=float32), 'loss_cross_entropy': Array(0.15361544, dtype=float32)}


  6%|▌         | 60499/1000000 [2:46:11<36:03:53,  7.24it/s]

{'loss': Array(0.15158318, dtype=float32), 'loss_cross_entropy': Array(0.14212821, dtype=float32)}


  6%|▌         | 60509/1000000 [2:46:19<87:59:40,  2.97it/s] 

{'loss': Array(0.15800938, dtype=float32), 'loss_cross_entropy': Array(0.14824253, dtype=float32)}


  6%|▌         | 60519/1000000 [2:46:21<44:42:07,  5.84it/s]

{'loss': Array(0.16704221, dtype=float32), 'loss_cross_entropy': Array(0.15659402, dtype=float32)}


  6%|▌         | 60529/1000000 [2:46:22<27:10:05,  9.61it/s]

{'loss': Array(0.1445312, dtype=float32), 'loss_cross_entropy': Array(0.13579068, dtype=float32)}


  6%|▌         | 60539/1000000 [2:46:23<24:22:26, 10.71it/s]

{'loss': Array(0.1376786, dtype=float32), 'loss_cross_entropy': Array(0.12867361, dtype=float32)}


  6%|▌         | 60549/1000000 [2:46:25<30:45:39,  8.48it/s]

{'loss': Array(0.1492284, dtype=float32), 'loss_cross_entropy': Array(0.13961743, dtype=float32)}


  6%|▌         | 60559/1000000 [2:46:26<24:49:36, 10.51it/s]

{'loss': Array(0.17175032, dtype=float32), 'loss_cross_entropy': Array(0.16113241, dtype=float32)}


  6%|▌         | 60569/1000000 [2:46:28<23:54:45, 10.91it/s]

{'loss': Array(0.17434597, dtype=float32), 'loss_cross_entropy': Array(0.16397452, dtype=float32)}


  6%|▌         | 60579/1000000 [2:46:29<27:13:18,  9.59it/s]

{'loss': Array(0.17819415, dtype=float32), 'loss_cross_entropy': Array(0.16726649, dtype=float32)}


  6%|▌         | 60589/1000000 [2:46:31<24:31:17, 10.64it/s]

{'loss': Array(0.17069377, dtype=float32), 'loss_cross_entropy': Array(0.1599965, dtype=float32)}


  6%|▌         | 60599/1000000 [2:46:32<30:30:44,  8.55it/s]

{'loss': Array(0.17161433, dtype=float32), 'loss_cross_entropy': Array(0.16074762, dtype=float32)}


  6%|▌         | 60609/1000000 [2:46:34<24:38:39, 10.59it/s]

{'loss': Array(0.14510022, dtype=float32), 'loss_cross_entropy': Array(0.13543591, dtype=float32)}


  6%|▌         | 60619/1000000 [2:46:35<33:22:49,  7.82it/s]

{'loss': Array(0.17414995, dtype=float32), 'loss_cross_entropy': Array(0.16377667, dtype=float32)}


  6%|▌         | 60629/1000000 [2:46:37<25:48:48, 10.11it/s]

{'loss': Array(0.1679546, dtype=float32), 'loss_cross_entropy': Array(0.1579048, dtype=float32)}


  6%|▌         | 60639/1000000 [2:46:38<24:18:19, 10.74it/s]

{'loss': Array(0.17031762, dtype=float32), 'loss_cross_entropy': Array(0.15963186, dtype=float32)}


  6%|▌         | 60649/1000000 [2:46:40<30:58:19,  8.42it/s]

{'loss': Array(0.17117563, dtype=float32), 'loss_cross_entropy': Array(0.16018808, dtype=float32)}


  6%|▌         | 60659/1000000 [2:46:41<25:16:43, 10.32it/s]

{'loss': Array(0.17700581, dtype=float32), 'loss_cross_entropy': Array(0.16598779, dtype=float32)}


  6%|▌         | 60669/1000000 [2:46:42<24:45:05, 10.54it/s]

{'loss': Array(0.18006465, dtype=float32), 'loss_cross_entropy': Array(0.1692564, dtype=float32)}


  6%|▌         | 60679/1000000 [2:46:44<27:13:54,  9.58it/s]

{'loss': Array(0.1667265, dtype=float32), 'loss_cross_entropy': Array(0.15600526, dtype=float32)}


  6%|▌         | 60689/1000000 [2:46:45<24:58:57, 10.44it/s]

{'loss': Array(0.1579474, dtype=float32), 'loss_cross_entropy': Array(0.14891367, dtype=float32)}


  6%|▌         | 60699/1000000 [2:46:47<31:19:27,  8.33it/s]

{'loss': Array(0.171333, dtype=float32), 'loss_cross_entropy': Array(0.16013996, dtype=float32)}


  6%|▌         | 60709/1000000 [2:46:49<26:47:42,  9.74it/s]

{'loss': Array(0.15653674, dtype=float32), 'loss_cross_entropy': Array(0.14632614, dtype=float32)}


  6%|▌         | 60719/1000000 [2:46:50<39:31:34,  6.60it/s]

{'loss': Array(0.17225349, dtype=float32), 'loss_cross_entropy': Array(0.16180968, dtype=float32)}


  6%|▌         | 60729/1000000 [2:46:52<26:13:29,  9.95it/s]

{'loss': Array(0.16441923, dtype=float32), 'loss_cross_entropy': Array(0.15449032, dtype=float32)}


  6%|▌         | 60739/1000000 [2:46:53<24:37:02, 10.60it/s]

{'loss': Array(0.16456646, dtype=float32), 'loss_cross_entropy': Array(0.15438998, dtype=float32)}


  6%|▌         | 60749/1000000 [2:46:55<34:39:14,  7.53it/s]

{'loss': Array(0.16370177, dtype=float32), 'loss_cross_entropy': Array(0.15381373, dtype=float32)}


  6%|▌         | 60759/1000000 [2:46:56<26:01:19, 10.03it/s]

{'loss': Array(0.16518109, dtype=float32), 'loss_cross_entropy': Array(0.15393572, dtype=float32)}


  6%|▌         | 60769/1000000 [2:46:58<24:54:42, 10.47it/s]

{'loss': Array(0.15726463, dtype=float32), 'loss_cross_entropy': Array(0.14742759, dtype=float32)}


  6%|▌         | 60779/1000000 [2:46:59<29:36:13,  8.81it/s]

{'loss': Array(0.18621491, dtype=float32), 'loss_cross_entropy': Array(0.17512934, dtype=float32)}


  6%|▌         | 60789/1000000 [2:47:01<25:53:40, 10.08it/s]

{'loss': Array(0.16234013, dtype=float32), 'loss_cross_entropy': Array(0.15152197, dtype=float32)}


  6%|▌         | 60799/1000000 [2:47:03<31:34:25,  8.26it/s]

{'loss': Array(0.16990414, dtype=float32), 'loss_cross_entropy': Array(0.15940724, dtype=float32)}


  6%|▌         | 60809/1000000 [2:47:04<25:10:54, 10.36it/s]

{'loss': Array(0.15579306, dtype=float32), 'loss_cross_entropy': Array(0.1450942, dtype=float32)}


  6%|▌         | 60819/1000000 [2:47:05<24:20:32, 10.72it/s]

{'loss': Array(0.15071242, dtype=float32), 'loss_cross_entropy': Array(0.14116263, dtype=float32)}


  6%|▌         | 60829/1000000 [2:47:07<27:37:43,  9.44it/s]

{'loss': Array(0.1648645, dtype=float32), 'loss_cross_entropy': Array(0.15485173, dtype=float32)}


  6%|▌         | 60839/1000000 [2:47:08<24:54:04, 10.48it/s]

{'loss': Array(0.16707055, dtype=float32), 'loss_cross_entropy': Array(0.15708469, dtype=float32)}


  6%|▌         | 60849/1000000 [2:47:10<34:43:02,  7.51it/s]

{'loss': Array(0.15293233, dtype=float32), 'loss_cross_entropy': Array(0.14283165, dtype=float32)}


  6%|▌         | 60859/1000000 [2:47:11<26:50:14,  9.72it/s]

{'loss': Array(0.1658635, dtype=float32), 'loss_cross_entropy': Array(0.15669222, dtype=float32)}


  6%|▌         | 60869/1000000 [2:47:13<25:33:05, 10.21it/s]

{'loss': Array(0.1562628, dtype=float32), 'loss_cross_entropy': Array(0.1468669, dtype=float32)}


  6%|▌         | 60879/1000000 [2:47:15<28:22:50,  9.19it/s]

{'loss': Array(0.14827906, dtype=float32), 'loss_cross_entropy': Array(0.13924636, dtype=float32)}


  6%|▌         | 60889/1000000 [2:47:16<25:06:05, 10.39it/s]

{'loss': Array(0.15435237, dtype=float32), 'loss_cross_entropy': Array(0.14484267, dtype=float32)}


  6%|▌         | 60899/1000000 [2:47:18<29:04:37,  8.97it/s]

{'loss': Array(0.16092382, dtype=float32), 'loss_cross_entropy': Array(0.15112835, dtype=float32)}


  6%|▌         | 60909/1000000 [2:47:19<24:46:11, 10.53it/s]

{'loss': Array(0.15819076, dtype=float32), 'loss_cross_entropy': Array(0.14855507, dtype=float32)}


  6%|▌         | 60919/1000000 [2:47:21<38:46:51,  6.73it/s]

{'loss': Array(0.1557571, dtype=float32), 'loss_cross_entropy': Array(0.14613806, dtype=float32)}


  6%|▌         | 60929/1000000 [2:47:22<26:50:02,  9.72it/s]

{'loss': Array(0.16257118, dtype=float32), 'loss_cross_entropy': Array(0.15183036, dtype=float32)}


  6%|▌         | 60939/1000000 [2:47:23<24:42:05, 10.56it/s]

{'loss': Array(0.14665088, dtype=float32), 'loss_cross_entropy': Array(0.13753055, dtype=float32)}


  6%|▌         | 60949/1000000 [2:47:25<31:31:16,  8.28it/s]

{'loss': Array(0.1530376, dtype=float32), 'loss_cross_entropy': Array(0.14384721, dtype=float32)}


  6%|▌         | 60959/1000000 [2:47:27<26:40:55,  9.78it/s]

{'loss': Array(0.15932772, dtype=float32), 'loss_cross_entropy': Array(0.14926709, dtype=float32)}


  6%|▌         | 60969/1000000 [2:47:28<34:46:39,  7.50it/s]

{'loss': Array(0.14075749, dtype=float32), 'loss_cross_entropy': Array(0.1319263, dtype=float32)}


  6%|▌         | 60979/1000000 [2:47:30<25:41:21, 10.15it/s]

{'loss': Array(0.1586881, dtype=float32), 'loss_cross_entropy': Array(0.14930059, dtype=float32)}


  6%|▌         | 60989/1000000 [2:47:32<34:48:11,  7.49it/s]

{'loss': Array(0.16364506, dtype=float32), 'loss_cross_entropy': Array(0.15303834, dtype=float32)}


  6%|▌         | 60999/1000000 [2:47:33<26:38:05,  9.79it/s]

{'loss': Array(0.15254281, dtype=float32), 'loss_cross_entropy': Array(0.14197923, dtype=float32)}


  6%|▌         | 61009/1000000 [2:47:41<85:18:39,  3.06it/s] 

{'loss': Array(0.1616627, dtype=float32), 'loss_cross_entropy': Array(0.15192704, dtype=float32)}


  6%|▌         | 61019/1000000 [2:47:42<38:46:13,  6.73it/s]

{'loss': Array(0.16766791, dtype=float32), 'loss_cross_entropy': Array(0.15711363, dtype=float32)}


  6%|▌         | 61029/1000000 [2:47:44<26:25:29,  9.87it/s]

{'loss': Array(0.15782706, dtype=float32), 'loss_cross_entropy': Array(0.14720486, dtype=float32)}


  6%|▌         | 61039/1000000 [2:47:45<34:20:33,  7.59it/s]

{'loss': Array(0.17311294, dtype=float32), 'loss_cross_entropy': Array(0.16285668, dtype=float32)}


  6%|▌         | 61049/1000000 [2:47:47<25:46:10, 10.12it/s]

{'loss': Array(0.14992636, dtype=float32), 'loss_cross_entropy': Array(0.13987863, dtype=float32)}


  6%|▌         | 61059/1000000 [2:47:48<38:08:43,  6.84it/s]

{'loss': Array(0.1732227, dtype=float32), 'loss_cross_entropy': Array(0.16300917, dtype=float32)}


  6%|▌         | 61069/1000000 [2:47:50<26:13:46,  9.94it/s]

{'loss': Array(0.16787708, dtype=float32), 'loss_cross_entropy': Array(0.15800105, dtype=float32)}


  6%|▌         | 61079/1000000 [2:47:51<24:02:59, 10.84it/s]

{'loss': Array(0.15800612, dtype=float32), 'loss_cross_entropy': Array(0.14753316, dtype=float32)}


  6%|▌         | 61089/1000000 [2:47:53<31:24:13,  8.31it/s]

{'loss': Array(0.17269, dtype=float32), 'loss_cross_entropy': Array(0.16222316, dtype=float32)}


  6%|▌         | 61099/1000000 [2:47:54<25:09:53, 10.36it/s]

{'loss': Array(0.14406368, dtype=float32), 'loss_cross_entropy': Array(0.13543883, dtype=float32)}


  6%|▌         | 61109/1000000 [2:47:56<34:19:39,  7.60it/s]

{'loss': Array(0.1567912, dtype=float32), 'loss_cross_entropy': Array(0.14717184, dtype=float32)}


  6%|▌         | 61119/1000000 [2:47:57<25:43:30, 10.14it/s]

{'loss': Array(0.17261927, dtype=float32), 'loss_cross_entropy': Array(0.16271862, dtype=float32)}


  6%|▌         | 61129/1000000 [2:47:59<24:39:49, 10.57it/s]

{'loss': Array(0.15178333, dtype=float32), 'loss_cross_entropy': Array(0.14206646, dtype=float32)}


  6%|▌         | 61139/1000000 [2:48:00<29:01:29,  8.99it/s]

{'loss': Array(0.15456845, dtype=float32), 'loss_cross_entropy': Array(0.14423336, dtype=float32)}


  6%|▌         | 61149/1000000 [2:48:02<25:20:45, 10.29it/s]

{'loss': Array(0.15626399, dtype=float32), 'loss_cross_entropy': Array(0.14621727, dtype=float32)}


  6%|▌         | 61159/1000000 [2:48:04<40:39:31,  6.41it/s]

{'loss': Array(0.1447399, dtype=float32), 'loss_cross_entropy': Array(0.13532105, dtype=float32)}


  6%|▌         | 61169/1000000 [2:48:05<27:54:26,  9.34it/s]

{'loss': Array(0.14481895, dtype=float32), 'loss_cross_entropy': Array(0.13483691, dtype=float32)}


  6%|▌         | 61179/1000000 [2:48:06<25:04:46, 10.40it/s]

{'loss': Array(0.15901332, dtype=float32), 'loss_cross_entropy': Array(0.14892866, dtype=float32)}


  6%|▌         | 61189/1000000 [2:48:08<30:54:56,  8.44it/s]

{'loss': Array(0.1511607, dtype=float32), 'loss_cross_entropy': Array(0.14138614, dtype=float32)}


  6%|▌         | 61199/1000000 [2:48:09<25:20:11, 10.29it/s]

{'loss': Array(0.16415946, dtype=float32), 'loss_cross_entropy': Array(0.15407917, dtype=float32)}


  6%|▌         | 61209/1000000 [2:48:11<33:59:22,  7.67it/s]

{'loss': Array(0.17536299, dtype=float32), 'loss_cross_entropy': Array(0.16478486, dtype=float32)}


  6%|▌         | 61219/1000000 [2:48:12<25:39:56, 10.16it/s]

{'loss': Array(0.15845208, dtype=float32), 'loss_cross_entropy': Array(0.14872098, dtype=float32)}


  6%|▌         | 61229/1000000 [2:48:14<33:59:24,  7.67it/s]

{'loss': Array(0.15736869, dtype=float32), 'loss_cross_entropy': Array(0.14782614, dtype=float32)}


  6%|▌         | 61239/1000000 [2:48:15<25:59:39, 10.03it/s]

{'loss': Array(0.1657861, dtype=float32), 'loss_cross_entropy': Array(0.15543154, dtype=float32)}


  6%|▌         | 61249/1000000 [2:48:17<24:39:47, 10.57it/s]

{'loss': Array(0.14667343, dtype=float32), 'loss_cross_entropy': Array(0.1375411, dtype=float32)}


  6%|▌         | 61259/1000000 [2:48:19<29:13:28,  8.92it/s]

{'loss': Array(0.15814671, dtype=float32), 'loss_cross_entropy': Array(0.14844434, dtype=float32)}


  6%|▌         | 61269/1000000 [2:48:20<24:53:06, 10.48it/s]

{'loss': Array(0.16450292, dtype=float32), 'loss_cross_entropy': Array(0.15455826, dtype=float32)}


  6%|▌         | 61279/1000000 [2:48:22<31:04:45,  8.39it/s]

{'loss': Array(0.15846483, dtype=float32), 'loss_cross_entropy': Array(0.14821802, dtype=float32)}


  6%|▌         | 61289/1000000 [2:48:23<25:13:19, 10.34it/s]

{'loss': Array(0.17278187, dtype=float32), 'loss_cross_entropy': Array(0.1620134, dtype=float32)}


  6%|▌         | 61299/1000000 [2:48:25<38:01:28,  6.86it/s]

{'loss': Array(0.15046303, dtype=float32), 'loss_cross_entropy': Array(0.14110307, dtype=float32)}


  6%|▌         | 61309/1000000 [2:48:26<26:04:10, 10.00it/s]

{'loss': Array(0.15812758, dtype=float32), 'loss_cross_entropy': Array(0.14844139, dtype=float32)}


  6%|▌         | 61319/1000000 [2:48:28<38:29:32,  6.77it/s]

{'loss': Array(0.15530649, dtype=float32), 'loss_cross_entropy': Array(0.14505516, dtype=float32)}


  6%|▌         | 61329/1000000 [2:48:29<26:23:38,  9.88it/s]

{'loss': Array(0.17360003, dtype=float32), 'loss_cross_entropy': Array(0.1631236, dtype=float32)}


  6%|▌         | 61339/1000000 [2:48:30<24:35:16, 10.60it/s]

{'loss': Array(0.16291004, dtype=float32), 'loss_cross_entropy': Array(0.15267108, dtype=float32)}


  6%|▌         | 61349/1000000 [2:48:32<34:05:44,  7.65it/s]

{'loss': Array(0.15669928, dtype=float32), 'loss_cross_entropy': Array(0.14692786, dtype=float32)}


  6%|▌         | 61359/1000000 [2:48:33<26:02:27, 10.01it/s]

{'loss': Array(0.16056046, dtype=float32), 'loss_cross_entropy': Array(0.15081482, dtype=float32)}


  6%|▌         | 61369/1000000 [2:48:35<38:26:43,  6.78it/s]

{'loss': Array(0.15349334, dtype=float32), 'loss_cross_entropy': Array(0.14387512, dtype=float32)}


  6%|▌         | 61379/1000000 [2:48:37<26:47:34,  9.73it/s]

{'loss': Array(0.19438894, dtype=float32), 'loss_cross_entropy': Array(0.18286869, dtype=float32)}


  6%|▌         | 61389/1000000 [2:48:38<24:42:45, 10.55it/s]

{'loss': Array(0.1580231, dtype=float32), 'loss_cross_entropy': Array(0.14814627, dtype=float32)}


  6%|▌         | 61399/1000000 [2:48:40<27:14:59,  9.57it/s]

{'loss': Array(0.15196298, dtype=float32), 'loss_cross_entropy': Array(0.14160186, dtype=float32)}


  6%|▌         | 61409/1000000 [2:48:41<24:25:26, 10.67it/s]

{'loss': Array(0.15822035, dtype=float32), 'loss_cross_entropy': Array(0.14844278, dtype=float32)}


  6%|▌         | 61419/1000000 [2:48:43<27:50:02,  9.37it/s]

{'loss': Array(0.15380435, dtype=float32), 'loss_cross_entropy': Array(0.14461553, dtype=float32)}


  6%|▌         | 61429/1000000 [2:48:44<24:57:10, 10.45it/s]

{'loss': Array(0.1542841, dtype=float32), 'loss_cross_entropy': Array(0.14437138, dtype=float32)}


  6%|▌         | 61439/1000000 [2:48:46<38:36:05,  6.75it/s]

{'loss': Array(0.14828363, dtype=float32), 'loss_cross_entropy': Array(0.13850279, dtype=float32)}


  6%|▌         | 61449/1000000 [2:48:47<26:35:21,  9.81it/s]

{'loss': Array(0.16966821, dtype=float32), 'loss_cross_entropy': Array(0.15957487, dtype=float32)}


  6%|▌         | 61459/1000000 [2:48:49<24:47:06, 10.52it/s]

{'loss': Array(0.16311607, dtype=float32), 'loss_cross_entropy': Array(0.15327767, dtype=float32)}


  6%|▌         | 61469/1000000 [2:48:50<31:24:44,  8.30it/s]

{'loss': Array(0.17155714, dtype=float32), 'loss_cross_entropy': Array(0.16100544, dtype=float32)}


  6%|▌         | 61479/1000000 [2:48:52<25:06:20, 10.38it/s]

{'loss': Array(0.18246906, dtype=float32), 'loss_cross_entropy': Array(0.17109373, dtype=float32)}


  6%|▌         | 61489/1000000 [2:48:53<38:51:01,  6.71it/s]

{'loss': Array(0.16800681, dtype=float32), 'loss_cross_entropy': Array(0.15736543, dtype=float32)}


  6%|▌         | 61499/1000000 [2:48:55<27:31:47,  9.47it/s]

{'loss': Array(0.15231189, dtype=float32), 'loss_cross_entropy': Array(0.1421442, dtype=float32)}


  6%|▌         | 61509/1000000 [2:49:03<86:24:33,  3.02it/s] 

{'loss': Array(0.16853388, dtype=float32), 'loss_cross_entropy': Array(0.15780976, dtype=float32)}


  6%|▌         | 61519/1000000 [2:49:04<40:09:40,  6.49it/s]

{'loss': Array(0.16058247, dtype=float32), 'loss_cross_entropy': Array(0.15066604, dtype=float32)}


  6%|▌         | 61529/1000000 [2:49:06<28:48:46,  9.05it/s]

{'loss': Array(0.13320234, dtype=float32), 'loss_cross_entropy': Array(0.12478336, dtype=float32)}


  6%|▌         | 61539/1000000 [2:49:08<33:30:51,  7.78it/s]

{'loss': Array(0.14885949, dtype=float32), 'loss_cross_entropy': Array(0.13967319, dtype=float32)}


  6%|▌         | 61549/1000000 [2:49:09<26:37:48,  9.79it/s]

{'loss': Array(0.16500458, dtype=float32), 'loss_cross_entropy': Array(0.15506771, dtype=float32)}


  6%|▌         | 61559/1000000 [2:49:11<38:26:38,  6.78it/s]

{'loss': Array(0.16324285, dtype=float32), 'loss_cross_entropy': Array(0.15339506, dtype=float32)}


  6%|▌         | 61569/1000000 [2:49:12<26:16:41,  9.92it/s]

{'loss': Array(0.15971708, dtype=float32), 'loss_cross_entropy': Array(0.1493078, dtype=float32)}


  6%|▌         | 61579/1000000 [2:49:14<24:30:13, 10.64it/s]

{'loss': Array(0.15438242, dtype=float32), 'loss_cross_entropy': Array(0.14513181, dtype=float32)}


  6%|▌         | 61589/1000000 [2:49:15<27:36:29,  9.44it/s]

{'loss': Array(0.1652333, dtype=float32), 'loss_cross_entropy': Array(0.1553411, dtype=float32)}


  6%|▌         | 61599/1000000 [2:49:17<25:03:35, 10.40it/s]

{'loss': Array(0.16889189, dtype=float32), 'loss_cross_entropy': Array(0.15870292, dtype=float32)}


  6%|▌         | 61609/1000000 [2:49:18<39:08:14,  6.66it/s]

{'loss': Array(0.14685138, dtype=float32), 'loss_cross_entropy': Array(0.13729198, dtype=float32)}


  6%|▌         | 61619/1000000 [2:49:20<26:38:56,  9.78it/s]

{'loss': Array(0.14697708, dtype=float32), 'loss_cross_entropy': Array(0.13828702, dtype=float32)}


  6%|▌         | 61629/1000000 [2:49:21<24:21:52, 10.70it/s]

{'loss': Array(0.15454149, dtype=float32), 'loss_cross_entropy': Array(0.14508551, dtype=float32)}


  6%|▌         | 61639/1000000 [2:49:23<29:19:32,  8.89it/s]

{'loss': Array(0.16645265, dtype=float32), 'loss_cross_entropy': Array(0.15681028, dtype=float32)}


  6%|▌         | 61649/1000000 [2:49:24<25:07:23, 10.38it/s]

{'loss': Array(0.16494794, dtype=float32), 'loss_cross_entropy': Array(0.15481819, dtype=float32)}


  6%|▌         | 61659/1000000 [2:49:26<33:48:23,  7.71it/s]

{'loss': Array(0.14027813, dtype=float32), 'loss_cross_entropy': Array(0.13143604, dtype=float32)}


  6%|▌         | 61669/1000000 [2:49:27<25:17:21, 10.31it/s]

{'loss': Array(0.13676094, dtype=float32), 'loss_cross_entropy': Array(0.12795316, dtype=float32)}


  6%|▌         | 61679/1000000 [2:49:29<38:31:14,  6.77it/s]

{'loss': Array(0.1696447, dtype=float32), 'loss_cross_entropy': Array(0.15945038, dtype=float32)}


  6%|▌         | 61689/1000000 [2:49:30<26:14:26,  9.93it/s]

{'loss': Array(0.17087959, dtype=float32), 'loss_cross_entropy': Array(0.1607096, dtype=float32)}


  6%|▌         | 61699/1000000 [2:49:32<24:28:45, 10.65it/s]

{'loss': Array(0.17244393, dtype=float32), 'loss_cross_entropy': Array(0.16182213, dtype=float32)}


  6%|▌         | 61709/1000000 [2:49:33<31:07:00,  8.38it/s]

{'loss': Array(0.16191319, dtype=float32), 'loss_cross_entropy': Array(0.15224734, dtype=float32)}


  6%|▌         | 61719/1000000 [2:49:35<25:04:40, 10.39it/s]

{'loss': Array(0.18461542, dtype=float32), 'loss_cross_entropy': Array(0.17388706, dtype=float32)}


  6%|▌         | 61729/1000000 [2:49:36<34:42:26,  7.51it/s]

{'loss': Array(0.16579469, dtype=float32), 'loss_cross_entropy': Array(0.15516113, dtype=float32)}


  6%|▌         | 61739/1000000 [2:49:38<25:39:05, 10.16it/s]

{'loss': Array(0.1770156, dtype=float32), 'loss_cross_entropy': Array(0.1671047, dtype=float32)}


  6%|▌         | 61749/1000000 [2:49:39<24:25:41, 10.67it/s]

{'loss': Array(0.17315252, dtype=float32), 'loss_cross_entropy': Array(0.16268706, dtype=float32)}


  6%|▌         | 61759/1000000 [2:49:41<27:18:12,  9.55it/s]

{'loss': Array(0.15061067, dtype=float32), 'loss_cross_entropy': Array(0.1413848, dtype=float32)}


  6%|▌         | 61769/1000000 [2:49:42<24:23:21, 10.69it/s]

{'loss': Array(0.15717204, dtype=float32), 'loss_cross_entropy': Array(0.14821844, dtype=float32)}


  6%|▌         | 61779/1000000 [2:49:44<38:16:06,  6.81it/s]

{'loss': Array(0.15854554, dtype=float32), 'loss_cross_entropy': Array(0.14827953, dtype=float32)}


  6%|▌         | 61789/1000000 [2:49:45<26:45:19,  9.74it/s]

{'loss': Array(0.16801, dtype=float32), 'loss_cross_entropy': Array(0.15855499, dtype=float32)}


  6%|▌         | 61799/1000000 [2:49:47<24:27:08, 10.66it/s]

{'loss': Array(0.15288185, dtype=float32), 'loss_cross_entropy': Array(0.14309709, dtype=float32)}


  6%|▌         | 61809/1000000 [2:49:48<30:50:42,  8.45it/s]

{'loss': Array(0.16452637, dtype=float32), 'loss_cross_entropy': Array(0.15384756, dtype=float32)}


  6%|▌         | 61819/1000000 [2:49:50<26:03:13, 10.00it/s]

{'loss': Array(0.19896191, dtype=float32), 'loss_cross_entropy': Array(0.18709362, dtype=float32)}


  6%|▌         | 61829/1000000 [2:49:51<35:01:59,  7.44it/s]

{'loss': Array(0.14714327, dtype=float32), 'loss_cross_entropy': Array(0.13758075, dtype=float32)}


  6%|▌         | 61839/1000000 [2:49:53<26:37:30,  9.79it/s]

{'loss': Array(0.17448176, dtype=float32), 'loss_cross_entropy': Array(0.16343622, dtype=float32)}


  6%|▌         | 61849/1000000 [2:49:55<34:48:09,  7.49it/s]

{'loss': Array(0.14651339, dtype=float32), 'loss_cross_entropy': Array(0.13705818, dtype=float32)}


  6%|▌         | 61859/1000000 [2:49:56<25:54:12, 10.06it/s]

{'loss': Array(0.18138549, dtype=float32), 'loss_cross_entropy': Array(0.17047359, dtype=float32)}


  6%|▌         | 61869/1000000 [2:49:57<24:15:10, 10.74it/s]

{'loss': Array(0.15508945, dtype=float32), 'loss_cross_entropy': Array(0.14511184, dtype=float32)}


  6%|▌         | 61879/1000000 [2:49:59<30:28:05,  8.55it/s]

{'loss': Array(0.15890597, dtype=float32), 'loss_cross_entropy': Array(0.14856347, dtype=float32)}


  6%|▌         | 61889/1000000 [2:50:01<26:34:51,  9.80it/s]

{'loss': Array(0.15554059, dtype=float32), 'loss_cross_entropy': Array(0.14515658, dtype=float32)}


  6%|▌         | 61899/1000000 [2:50:02<32:27:43,  8.03it/s]

{'loss': Array(0.16304003, dtype=float32), 'loss_cross_entropy': Array(0.15295258, dtype=float32)}


  6%|▌         | 61909/1000000 [2:50:04<26:26:29,  9.85it/s]

{'loss': Array(0.1599453, dtype=float32), 'loss_cross_entropy': Array(0.15027063, dtype=float32)}


  6%|▌         | 61919/1000000 [2:50:05<39:35:04,  6.58it/s]

{'loss': Array(0.14683177, dtype=float32), 'loss_cross_entropy': Array(0.1369603, dtype=float32)}


  6%|▌         | 61929/1000000 [2:50:07<26:51:12,  9.70it/s]

{'loss': Array(0.15764906, dtype=float32), 'loss_cross_entropy': Array(0.14767268, dtype=float32)}


  6%|▌         | 61939/1000000 [2:50:09<39:15:01,  6.64it/s]

{'loss': Array(0.14155912, dtype=float32), 'loss_cross_entropy': Array(0.13225749, dtype=float32)}


  6%|▌         | 61949/1000000 [2:50:10<27:04:05,  9.63it/s]

{'loss': Array(0.1630737, dtype=float32), 'loss_cross_entropy': Array(0.15280646, dtype=float32)}


  6%|▌         | 61959/1000000 [2:50:11<24:48:13, 10.51it/s]

{'loss': Array(0.13848643, dtype=float32), 'loss_cross_entropy': Array(0.12977746, dtype=float32)}


  6%|▌         | 61969/1000000 [2:50:13<34:00:15,  7.66it/s]

{'loss': Array(0.17960726, dtype=float32), 'loss_cross_entropy': Array(0.16941391, dtype=float32)}


  6%|▌         | 61979/1000000 [2:50:14<25:50:41, 10.08it/s]

{'loss': Array(0.153176, dtype=float32), 'loss_cross_entropy': Array(0.14332163, dtype=float32)}


  6%|▌         | 61989/1000000 [2:50:16<39:06:44,  6.66it/s]

{'loss': Array(0.16522966, dtype=float32), 'loss_cross_entropy': Array(0.15390134, dtype=float32)}


  6%|▌         | 61999/1000000 [2:50:18<27:32:30,  9.46it/s]

{'loss': Array(0.13616799, dtype=float32), 'loss_cross_entropy': Array(0.127573, dtype=float32)}


  6%|▌         | 62009/1000000 [2:50:26<88:01:16,  2.96it/s] 

{'loss': Array(0.16742444, dtype=float32), 'loss_cross_entropy': Array(0.15804832, dtype=float32)}


  6%|▌         | 62019/1000000 [2:50:27<38:22:03,  6.79it/s] 

{'loss': Array(0.16548811, dtype=float32), 'loss_cross_entropy': Array(0.15449503, dtype=float32)}


  6%|▌         | 62029/1000000 [2:50:29<27:18:36,  9.54it/s]

{'loss': Array(0.16061892, dtype=float32), 'loss_cross_entropy': Array(0.14987908, dtype=float32)}


  6%|▌         | 62039/1000000 [2:50:31<28:22:26,  9.18it/s]

{'loss': Array(0.17166291, dtype=float32), 'loss_cross_entropy': Array(0.160694, dtype=float32)}


  6%|▌         | 62049/1000000 [2:50:32<24:26:53, 10.66it/s]

{'loss': Array(0.16984816, dtype=float32), 'loss_cross_entropy': Array(0.16008757, dtype=float32)}


  6%|▌         | 62059/1000000 [2:50:34<37:37:54,  6.92it/s]

{'loss': Array(0.14386868, dtype=float32), 'loss_cross_entropy': Array(0.13467833, dtype=float32)}


  6%|▌         | 62069/1000000 [2:50:35<26:17:49,  9.91it/s]

{'loss': Array(0.16316709, dtype=float32), 'loss_cross_entropy': Array(0.15287615, dtype=float32)}


  6%|▌         | 62079/1000000 [2:50:36<24:33:02, 10.61it/s]

{'loss': Array(0.15739398, dtype=float32), 'loss_cross_entropy': Array(0.14722143, dtype=float32)}


  6%|▌         | 62089/1000000 [2:50:38<31:28:42,  8.28it/s]

{'loss': Array(0.15763004, dtype=float32), 'loss_cross_entropy': Array(0.14804678, dtype=float32)}


  6%|▌         | 62099/1000000 [2:50:39<26:02:42, 10.00it/s]

{'loss': Array(0.1729191, dtype=float32), 'loss_cross_entropy': Array(0.16327326, dtype=float32)}


  6%|▌         | 62109/1000000 [2:50:41<38:13:57,  6.81it/s]

{'loss': Array(0.17198397, dtype=float32), 'loss_cross_entropy': Array(0.16078664, dtype=float32)}


  6%|▌         | 62119/1000000 [2:50:42<26:41:26,  9.76it/s]

{'loss': Array(0.1529788, dtype=float32), 'loss_cross_entropy': Array(0.14334178, dtype=float32)}


  6%|▌         | 62129/1000000 [2:50:44<24:38:19, 10.57it/s]

{'loss': Array(0.15947013, dtype=float32), 'loss_cross_entropy': Array(0.1485347, dtype=float32)}


  6%|▌         | 62139/1000000 [2:50:46<29:06:01,  8.95it/s]

{'loss': Array(0.1701964, dtype=float32), 'loss_cross_entropy': Array(0.16015296, dtype=float32)}


  6%|▌         | 62149/1000000 [2:50:47<25:15:29, 10.31it/s]

{'loss': Array(0.16221766, dtype=float32), 'loss_cross_entropy': Array(0.15189506, dtype=float32)}


  6%|▌         | 62159/1000000 [2:50:49<34:30:34,  7.55it/s]

{'loss': Array(0.15643466, dtype=float32), 'loss_cross_entropy': Array(0.14715253, dtype=float32)}


  6%|▌         | 62169/1000000 [2:50:50<25:52:25, 10.07it/s]

{'loss': Array(0.16574973, dtype=float32), 'loss_cross_entropy': Array(0.15506148, dtype=float32)}


  6%|▌         | 62179/1000000 [2:50:51<24:30:07, 10.63it/s]

{'loss': Array(0.16266026, dtype=float32), 'loss_cross_entropy': Array(0.15375574, dtype=float32)}


  6%|▌         | 62189/1000000 [2:50:53<31:38:53,  8.23it/s]

{'loss': Array(0.17068109, dtype=float32), 'loss_cross_entropy': Array(0.16059081, dtype=float32)}


  6%|▌         | 62199/1000000 [2:50:55<25:29:35, 10.22it/s]

{'loss': Array(0.15832232, dtype=float32), 'loss_cross_entropy': Array(0.14903557, dtype=float32)}


  6%|▌         | 62209/1000000 [2:50:56<24:30:46, 10.63it/s]

{'loss': Array(0.15158199, dtype=float32), 'loss_cross_entropy': Array(0.14216655, dtype=float32)}


  6%|▌         | 62219/1000000 [2:50:58<27:46:30,  9.38it/s]

{'loss': Array(0.14808086, dtype=float32), 'loss_cross_entropy': Array(0.13807839, dtype=float32)}


  6%|▌         | 62229/1000000 [2:50:59<26:09:14,  9.96it/s]

{'loss': Array(0.15130179, dtype=float32), 'loss_cross_entropy': Array(0.14181475, dtype=float32)}


  6%|▌         | 62239/1000000 [2:51:01<32:13:21,  8.08it/s]

{'loss': Array(0.15053938, dtype=float32), 'loss_cross_entropy': Array(0.14085104, dtype=float32)}


  6%|▌         | 62249/1000000 [2:51:02<26:33:41,  9.81it/s]

{'loss': Array(0.19369344, dtype=float32), 'loss_cross_entropy': Array(0.1814912, dtype=float32)}


  6%|▌         | 62259/1000000 [2:51:04<39:52:54,  6.53it/s]

{'loss': Array(0.1468209, dtype=float32), 'loss_cross_entropy': Array(0.13694702, dtype=float32)}


  6%|▌         | 62269/1000000 [2:51:05<28:01:56,  9.29it/s]

{'loss': Array(0.16857746, dtype=float32), 'loss_cross_entropy': Array(0.157468, dtype=float32)}


  6%|▌         | 62279/1000000 [2:51:07<40:24:08,  6.45it/s]

{'loss': Array(0.15966465, dtype=float32), 'loss_cross_entropy': Array(0.14969572, dtype=float32)}


  6%|▌         | 62289/1000000 [2:51:09<27:15:43,  9.55it/s]

{'loss': Array(0.14227724, dtype=float32), 'loss_cross_entropy': Array(0.1326518, dtype=float32)}


  6%|▌         | 62299/1000000 [2:51:10<24:49:53, 10.49it/s]

{'loss': Array(0.15779144, dtype=float32), 'loss_cross_entropy': Array(0.14758818, dtype=float32)}


  6%|▌         | 62309/1000000 [2:51:12<34:35:02,  7.53it/s]

{'loss': Array(0.17345496, dtype=float32), 'loss_cross_entropy': Array(0.16262037, dtype=float32)}


  6%|▌         | 62319/1000000 [2:51:13<27:36:13,  9.44it/s]

{'loss': Array(0.15761834, dtype=float32), 'loss_cross_entropy': Array(0.14790992, dtype=float32)}


  6%|▌         | 62329/1000000 [2:51:15<38:42:08,  6.73it/s]

{'loss': Array(0.1518332, dtype=float32), 'loss_cross_entropy': Array(0.14218332, dtype=float32)}


  6%|▌         | 62339/1000000 [2:51:16<26:19:04,  9.90it/s]

{'loss': Array(0.16343346, dtype=float32), 'loss_cross_entropy': Array(0.15357779, dtype=float32)}


  6%|▌         | 62349/1000000 [2:51:18<24:16:13, 10.73it/s]

{'loss': Array(0.16598205, dtype=float32), 'loss_cross_entropy': Array(0.1556115, dtype=float32)}


  6%|▌         | 62359/1000000 [2:51:19<27:34:26,  9.45it/s]

{'loss': Array(0.17009762, dtype=float32), 'loss_cross_entropy': Array(0.16017914, dtype=float32)}


  6%|▌         | 62369/1000000 [2:51:21<25:08:09, 10.36it/s]

{'loss': Array(0.14711161, dtype=float32), 'loss_cross_entropy': Array(0.13766001, dtype=float32)}


  6%|▌         | 62379/1000000 [2:51:22<28:00:40,  9.30it/s]

{'loss': Array(0.15890229, dtype=float32), 'loss_cross_entropy': Array(0.14935353, dtype=float32)}


  6%|▌         | 62389/1000000 [2:51:24<25:10:07, 10.35it/s]

{'loss': Array(0.16372214, dtype=float32), 'loss_cross_entropy': Array(0.15433185, dtype=float32)}


  6%|▌         | 62399/1000000 [2:51:26<37:49:34,  6.89it/s]

{'loss': Array(0.14569953, dtype=float32), 'loss_cross_entropy': Array(0.13657343, dtype=float32)}


  6%|▌         | 62409/1000000 [2:51:27<26:14:17,  9.93it/s]

{'loss': Array(0.15966946, dtype=float32), 'loss_cross_entropy': Array(0.15031497, dtype=float32)}


  6%|▌         | 62419/1000000 [2:51:28<25:23:17, 10.26it/s]

{'loss': Array(0.16158211, dtype=float32), 'loss_cross_entropy': Array(0.15129575, dtype=float32)}


  6%|▌         | 62429/1000000 [2:51:30<31:04:03,  8.38it/s]

{'loss': Array(0.147709, dtype=float32), 'loss_cross_entropy': Array(0.13823842, dtype=float32)}


  6%|▌         | 62439/1000000 [2:51:31<24:57:36, 10.43it/s]

{'loss': Array(0.17084645, dtype=float32), 'loss_cross_entropy': Array(0.15998426, dtype=float32)}


  6%|▌         | 62449/1000000 [2:51:33<38:23:37,  6.78it/s]

{'loss': Array(0.18209974, dtype=float32), 'loss_cross_entropy': Array(0.17157862, dtype=float32)}


  6%|▌         | 62459/1000000 [2:51:34<26:50:43,  9.70it/s]

{'loss': Array(0.1600293, dtype=float32), 'loss_cross_entropy': Array(0.15011035, dtype=float32)}


  6%|▌         | 62469/1000000 [2:51:36<25:01:13, 10.41it/s]

{'loss': Array(0.13654256, dtype=float32), 'loss_cross_entropy': Array(0.12763813, dtype=float32)}


  6%|▌         | 62479/1000000 [2:51:38<29:44:33,  8.76it/s]

{'loss': Array(0.1591696, dtype=float32), 'loss_cross_entropy': Array(0.14887394, dtype=float32)}


  6%|▌         | 62489/1000000 [2:51:39<25:04:57, 10.38it/s]

{'loss': Array(0.15045188, dtype=float32), 'loss_cross_entropy': Array(0.14130916, dtype=float32)}


  6%|▌         | 62499/1000000 [2:51:41<34:18:07,  7.59it/s]

{'loss': Array(0.15376928, dtype=float32), 'loss_cross_entropy': Array(0.14427696, dtype=float32)}


  6%|▋         | 62509/1000000 [2:51:48<86:24:33,  3.01it/s] 

{'loss': Array(0.16073415, dtype=float32), 'loss_cross_entropy': Array(0.15049891, dtype=float32)}


  6%|▋         | 62519/1000000 [2:51:50<44:03:39,  5.91it/s]

{'loss': Array(0.16717266, dtype=float32), 'loss_cross_entropy': Array(0.15608667, dtype=float32)}


  6%|▋         | 62529/1000000 [2:51:51<27:20:38,  9.52it/s]

{'loss': Array(0.15663867, dtype=float32), 'loss_cross_entropy': Array(0.14733903, dtype=float32)}


  6%|▋         | 62539/1000000 [2:51:53<24:40:38, 10.55it/s]

{'loss': Array(0.16095318, dtype=float32), 'loss_cross_entropy': Array(0.15140377, dtype=float32)}


  6%|▋         | 62549/1000000 [2:51:55<31:12:00,  8.35it/s]

{'loss': Array(0.15765232, dtype=float32), 'loss_cross_entropy': Array(0.14725453, dtype=float32)}


  6%|▋         | 62559/1000000 [2:51:56<25:46:07, 10.11it/s]

{'loss': Array(0.15782166, dtype=float32), 'loss_cross_entropy': Array(0.14838408, dtype=float32)}


  6%|▋         | 62569/1000000 [2:51:57<24:39:51, 10.56it/s]

{'loss': Array(0.16244784, dtype=float32), 'loss_cross_entropy': Array(0.15254307, dtype=float32)}


  6%|▋         | 62579/1000000 [2:51:59<29:08:14,  8.94it/s]

{'loss': Array(0.14775005, dtype=float32), 'loss_cross_entropy': Array(0.13880597, dtype=float32)}


  6%|▋         | 62589/1000000 [2:52:01<25:50:53, 10.07it/s]

{'loss': Array(0.17407088, dtype=float32), 'loss_cross_entropy': Array(0.16279949, dtype=float32)}


  6%|▋         | 62599/1000000 [2:52:02<32:37:51,  7.98it/s]

{'loss': Array(0.16096368, dtype=float32), 'loss_cross_entropy': Array(0.15022197, dtype=float32)}


  6%|▋         | 62609/1000000 [2:52:04<27:03:30,  9.62it/s]

{'loss': Array(0.18600534, dtype=float32), 'loss_cross_entropy': Array(0.17465876, dtype=float32)}


  6%|▋         | 62619/1000000 [2:52:06<35:36:14,  7.31it/s]

{'loss': Array(0.17031577, dtype=float32), 'loss_cross_entropy': Array(0.16019209, dtype=float32)}


  6%|▋         | 62629/1000000 [2:52:07<26:08:44,  9.96it/s]

{'loss': Array(0.15687922, dtype=float32), 'loss_cross_entropy': Array(0.14732407, dtype=float32)}


  6%|▋         | 62639/1000000 [2:52:08<24:38:53, 10.56it/s]

{'loss': Array(0.17423944, dtype=float32), 'loss_cross_entropy': Array(0.16394839, dtype=float32)}


  6%|▋         | 62649/1000000 [2:52:10<31:55:50,  8.15it/s]

{'loss': Array(0.160982, dtype=float32), 'loss_cross_entropy': Array(0.15019995, dtype=float32)}


  6%|▋         | 62659/1000000 [2:52:11<25:21:11, 10.27it/s]

{'loss': Array(0.1478995, dtype=float32), 'loss_cross_entropy': Array(0.138741, dtype=float32)}


  6%|▋         | 62669/1000000 [2:52:13<24:20:53, 10.69it/s]

{'loss': Array(0.16968651, dtype=float32), 'loss_cross_entropy': Array(0.15900008, dtype=float32)}


  6%|▋         | 62679/1000000 [2:52:14<27:25:59,  9.49it/s]

{'loss': Array(0.1462351, dtype=float32), 'loss_cross_entropy': Array(0.13707417, dtype=float32)}


  6%|▋         | 62689/1000000 [2:52:16<24:44:58, 10.52it/s]

{'loss': Array(0.14299856, dtype=float32), 'loss_cross_entropy': Array(0.1338853, dtype=float32)}


  6%|▋         | 62699/1000000 [2:52:18<31:36:47,  8.24it/s]

{'loss': Array(0.15221284, dtype=float32), 'loss_cross_entropy': Array(0.14325665, dtype=float32)}


  6%|▋         | 62709/1000000 [2:52:19<25:17:49, 10.29it/s]

{'loss': Array(0.14369309, dtype=float32), 'loss_cross_entropy': Array(0.13483614, dtype=float32)}


  6%|▋         | 62719/1000000 [2:52:21<38:36:44,  6.74it/s]

{'loss': Array(0.1738299, dtype=float32), 'loss_cross_entropy': Array(0.16314937, dtype=float32)}


  6%|▋         | 62729/1000000 [2:52:22<26:06:34,  9.97it/s]

{'loss': Array(0.16144486, dtype=float32), 'loss_cross_entropy': Array(0.15149215, dtype=float32)}


  6%|▋         | 62739/1000000 [2:52:23<25:01:57, 10.40it/s]

{'loss': Array(0.1573225, dtype=float32), 'loss_cross_entropy': Array(0.14783649, dtype=float32)}


  6%|▋         | 62749/1000000 [2:52:25<34:34:50,  7.53it/s]

{'loss': Array(0.16059956, dtype=float32), 'loss_cross_entropy': Array(0.15143907, dtype=float32)}


  6%|▋         | 62759/1000000 [2:52:26<25:53:35, 10.05it/s]

{'loss': Array(0.16328791, dtype=float32), 'loss_cross_entropy': Array(0.1534014, dtype=float32)}


  6%|▋         | 62769/1000000 [2:52:28<24:25:44, 10.66it/s]

{'loss': Array(0.14778766, dtype=float32), 'loss_cross_entropy': Array(0.139041, dtype=float32)}


  6%|▋         | 62779/1000000 [2:52:30<29:28:12,  8.83it/s]

{'loss': Array(0.16924664, dtype=float32), 'loss_cross_entropy': Array(0.159012, dtype=float32)}


  6%|▋         | 62789/1000000 [2:52:31<25:23:23, 10.25it/s]

{'loss': Array(0.16522032, dtype=float32), 'loss_cross_entropy': Array(0.15533859, dtype=float32)}


  6%|▋         | 62799/1000000 [2:52:33<30:53:47,  8.43it/s]

{'loss': Array(0.15925692, dtype=float32), 'loss_cross_entropy': Array(0.14920107, dtype=float32)}


  6%|▋         | 62809/1000000 [2:52:34<25:13:11, 10.32it/s]

{'loss': Array(0.15380812, dtype=float32), 'loss_cross_entropy': Array(0.14410566, dtype=float32)}


  6%|▋         | 62819/1000000 [2:52:35<23:55:54, 10.88it/s]

{'loss': Array(0.17628728, dtype=float32), 'loss_cross_entropy': Array(0.16610667, dtype=float32)}


  6%|▋         | 62829/1000000 [2:52:37<27:28:41,  9.47it/s]

{'loss': Array(0.1639514, dtype=float32), 'loss_cross_entropy': Array(0.15403448, dtype=float32)}


  6%|▋         | 62839/1000000 [2:52:38<25:13:03, 10.32it/s]

{'loss': Array(0.15991198, dtype=float32), 'loss_cross_entropy': Array(0.14931251, dtype=float32)}


  6%|▋         | 62849/1000000 [2:52:40<33:36:46,  7.74it/s]

{'loss': Array(0.14860591, dtype=float32), 'loss_cross_entropy': Array(0.13912886, dtype=float32)}


  6%|▋         | 62859/1000000 [2:52:42<25:54:42, 10.05it/s]

{'loss': Array(0.15419133, dtype=float32), 'loss_cross_entropy': Array(0.14350529, dtype=float32)}


  6%|▋         | 62869/1000000 [2:52:43<24:07:52, 10.79it/s]

{'loss': Array(0.16791132, dtype=float32), 'loss_cross_entropy': Array(0.15799518, dtype=float32)}


  6%|▋         | 62879/1000000 [2:52:45<27:12:57,  9.56it/s]

{'loss': Array(0.15899375, dtype=float32), 'loss_cross_entropy': Array(0.14914201, dtype=float32)}


  6%|▋         | 62889/1000000 [2:52:46<24:32:37, 10.61it/s]

{'loss': Array(0.14822048, dtype=float32), 'loss_cross_entropy': Array(0.1392038, dtype=float32)}


  6%|▋         | 62899/1000000 [2:52:48<29:29:15,  8.83it/s]

{'loss': Array(0.1490471, dtype=float32), 'loss_cross_entropy': Array(0.13980493, dtype=float32)}


  6%|▋         | 62909/1000000 [2:52:49<25:16:52, 10.30it/s]

{'loss': Array(0.14789426, dtype=float32), 'loss_cross_entropy': Array(0.13879754, dtype=float32)}


  6%|▋         | 62919/1000000 [2:52:51<39:02:25,  6.67it/s]

{'loss': Array(0.15919428, dtype=float32), 'loss_cross_entropy': Array(0.14973108, dtype=float32)}


  6%|▋         | 62929/1000000 [2:52:52<26:53:52,  9.68it/s]

{'loss': Array(0.15697482, dtype=float32), 'loss_cross_entropy': Array(0.14762037, dtype=float32)}


  6%|▋         | 62939/1000000 [2:52:54<25:08:31, 10.35it/s]

{'loss': Array(0.15297541, dtype=float32), 'loss_cross_entropy': Array(0.14329137, dtype=float32)}


  6%|▋         | 62949/1000000 [2:52:55<30:50:28,  8.44it/s]

{'loss': Array(0.17427178, dtype=float32), 'loss_cross_entropy': Array(0.16355534, dtype=float32)}


  6%|▋         | 62959/1000000 [2:52:57<25:13:12, 10.32it/s]

{'loss': Array(0.17844456, dtype=float32), 'loss_cross_entropy': Array(0.1670653, dtype=float32)}


  6%|▋         | 62969/1000000 [2:52:58<33:52:25,  7.68it/s]

{'loss': Array(0.16178837, dtype=float32), 'loss_cross_entropy': Array(0.1519756, dtype=float32)}


  6%|▋         | 62979/1000000 [2:53:00<26:45:18,  9.73it/s]

{'loss': Array(0.16954629, dtype=float32), 'loss_cross_entropy': Array(0.15900223, dtype=float32)}


  6%|▋         | 62989/1000000 [2:53:02<36:27:54,  7.14it/s]

{'loss': Array(0.14767024, dtype=float32), 'loss_cross_entropy': Array(0.13822642, dtype=float32)}


  6%|▋         | 62999/1000000 [2:53:03<27:28:28,  9.47it/s]

{'loss': Array(0.17686342, dtype=float32), 'loss_cross_entropy': Array(0.16627161, dtype=float32)}


  6%|▋         | 63009/1000000 [2:53:11<86:19:38,  3.01it/s] 

{'loss': Array(0.141174, dtype=float32), 'loss_cross_entropy': Array(0.13152687, dtype=float32)}


  6%|▋         | 63019/1000000 [2:53:13<39:52:21,  6.53it/s]

{'loss': Array(0.16283253, dtype=float32), 'loss_cross_entropy': Array(0.152307, dtype=float32)}


  6%|▋         | 63029/1000000 [2:53:14<27:02:00,  9.63it/s]

{'loss': Array(0.14688526, dtype=float32), 'loss_cross_entropy': Array(0.13786986, dtype=float32)}


  6%|▋         | 63039/1000000 [2:53:16<35:07:15,  7.41it/s]

{'loss': Array(0.15091507, dtype=float32), 'loss_cross_entropy': Array(0.14087471, dtype=float32)}


  6%|▋         | 63049/1000000 [2:53:17<26:14:53,  9.92it/s]

{'loss': Array(0.1654421, dtype=float32), 'loss_cross_entropy': Array(0.15461312, dtype=float32)}


  6%|▋         | 63059/1000000 [2:53:19<38:48:00,  6.71it/s]

{'loss': Array(0.16294329, dtype=float32), 'loss_cross_entropy': Array(0.15316446, dtype=float32)}


  6%|▋         | 63069/1000000 [2:53:20<26:35:45,  9.79it/s]

{'loss': Array(0.16373211, dtype=float32), 'loss_cross_entropy': Array(0.15326308, dtype=float32)}


  6%|▋         | 63079/1000000 [2:53:22<24:58:49, 10.42it/s]

{'loss': Array(0.17156373, dtype=float32), 'loss_cross_entropy': Array(0.16087857, dtype=float32)}


  6%|▋         | 63089/1000000 [2:53:23<30:54:04,  8.42it/s]

{'loss': Array(0.15652132, dtype=float32), 'loss_cross_entropy': Array(0.14704339, dtype=float32)}


  6%|▋         | 63099/1000000 [2:53:25<24:54:02, 10.45it/s]

{'loss': Array(0.15091078, dtype=float32), 'loss_cross_entropy': Array(0.14216082, dtype=float32)}


  6%|▋         | 63109/1000000 [2:53:26<33:50:37,  7.69it/s]

{'loss': Array(0.15221098, dtype=float32), 'loss_cross_entropy': Array(0.14279474, dtype=float32)}


  6%|▋         | 63119/1000000 [2:53:28<25:22:11, 10.26it/s]

{'loss': Array(0.15965907, dtype=float32), 'loss_cross_entropy': Array(0.15002255, dtype=float32)}


  6%|▋         | 63129/1000000 [2:53:29<24:22:48, 10.67it/s]

{'loss': Array(0.15913299, dtype=float32), 'loss_cross_entropy': Array(0.14879033, dtype=float32)}


  6%|▋         | 63139/1000000 [2:53:31<27:14:05,  9.56it/s]

{'loss': Array(0.1751128, dtype=float32), 'loss_cross_entropy': Array(0.16476636, dtype=float32)}


  6%|▋         | 63149/1000000 [2:53:32<24:28:54, 10.63it/s]

{'loss': Array(0.15025531, dtype=float32), 'loss_cross_entropy': Array(0.14064558, dtype=float32)}


  6%|▋         | 63159/1000000 [2:53:34<38:08:07,  6.82it/s]

{'loss': Array(0.15822344, dtype=float32), 'loss_cross_entropy': Array(0.14810161, dtype=float32)}


  6%|▋         | 63169/1000000 [2:53:35<26:12:58,  9.93it/s]

{'loss': Array(0.16963379, dtype=float32), 'loss_cross_entropy': Array(0.15909049, dtype=float32)}


  6%|▋         | 63179/1000000 [2:53:37<24:21:34, 10.68it/s]

{'loss': Array(0.16212265, dtype=float32), 'loss_cross_entropy': Array(0.15266234, dtype=float32)}


  6%|▋         | 63189/1000000 [2:53:38<30:30:13,  8.53it/s]

{'loss': Array(0.16480829, dtype=float32), 'loss_cross_entropy': Array(0.15425916, dtype=float32)}


  6%|▋         | 63199/1000000 [2:53:40<25:21:08, 10.26it/s]

{'loss': Array(0.15759681, dtype=float32), 'loss_cross_entropy': Array(0.14834315, dtype=float32)}


  6%|▋         | 63209/1000000 [2:53:41<33:31:44,  7.76it/s]

{'loss': Array(0.14906257, dtype=float32), 'loss_cross_entropy': Array(0.14020114, dtype=float32)}


  6%|▋         | 63219/1000000 [2:53:43<25:25:22, 10.24it/s]

{'loss': Array(0.15133294, dtype=float32), 'loss_cross_entropy': Array(0.14181338, dtype=float32)}


  6%|▋         | 63229/1000000 [2:53:44<34:30:17,  7.54it/s]

{'loss': Array(0.15557697, dtype=float32), 'loss_cross_entropy': Array(0.14583515, dtype=float32)}


  6%|▋         | 63239/1000000 [2:53:46<25:51:06, 10.07it/s]

{'loss': Array(0.14513054, dtype=float32), 'loss_cross_entropy': Array(0.13642694, dtype=float32)}


  6%|▋         | 63249/1000000 [2:53:47<24:03:30, 10.82it/s]

{'loss': Array(0.15645997, dtype=float32), 'loss_cross_entropy': Array(0.14664601, dtype=float32)}


  6%|▋         | 63259/1000000 [2:53:49<28:59:46,  8.97it/s]

{'loss': Array(0.14308476, dtype=float32), 'loss_cross_entropy': Array(0.13421385, dtype=float32)}


  6%|▋         | 63269/1000000 [2:53:50<24:34:54, 10.59it/s]

{'loss': Array(0.16314907, dtype=float32), 'loss_cross_entropy': Array(0.15280464, dtype=float32)}


  6%|▋         | 63279/1000000 [2:53:52<30:51:15,  8.43it/s]

{'loss': Array(0.16130348, dtype=float32), 'loss_cross_entropy': Array(0.1513954, dtype=float32)}


  6%|▋         | 63289/1000000 [2:53:53<24:50:51, 10.47it/s]

{'loss': Array(0.16527596, dtype=float32), 'loss_cross_entropy': Array(0.15443239, dtype=float32)}


  6%|▋         | 63299/1000000 [2:53:55<38:01:43,  6.84it/s]

{'loss': Array(0.1489954, dtype=float32), 'loss_cross_entropy': Array(0.13926552, dtype=float32)}


  6%|▋         | 63309/1000000 [2:53:56<26:30:05,  9.82it/s]

{'loss': Array(0.16645548, dtype=float32), 'loss_cross_entropy': Array(0.15607755, dtype=float32)}


  6%|▋         | 63319/1000000 [2:53:58<38:21:54,  6.78it/s]

{'loss': Array(0.15296116, dtype=float32), 'loss_cross_entropy': Array(0.14307456, dtype=float32)}


  6%|▋         | 63329/1000000 [2:53:59<27:24:19,  9.49it/s]

{'loss': Array(0.16792789, dtype=float32), 'loss_cross_entropy': Array(0.1573035, dtype=float32)}


  6%|▋         | 63339/1000000 [2:54:01<25:57:33, 10.02it/s]

{'loss': Array(0.14990063, dtype=float32), 'loss_cross_entropy': Array(0.14085875, dtype=float32)}


  6%|▋         | 63349/1000000 [2:54:02<36:16:54,  7.17it/s]

{'loss': Array(0.16542375, dtype=float32), 'loss_cross_entropy': Array(0.1546943, dtype=float32)}


  6%|▋         | 63359/1000000 [2:54:04<26:54:26,  9.67it/s]

{'loss': Array(0.17281781, dtype=float32), 'loss_cross_entropy': Array(0.1623137, dtype=float32)}


  6%|▋         | 63369/1000000 [2:54:06<39:09:14,  6.64it/s]

{'loss': Array(0.15966663, dtype=float32), 'loss_cross_entropy': Array(0.1497411, dtype=float32)}


  6%|▋         | 63379/1000000 [2:54:07<26:38:29,  9.77it/s]

{'loss': Array(0.16037233, dtype=float32), 'loss_cross_entropy': Array(0.1508724, dtype=float32)}


  6%|▋         | 63389/1000000 [2:54:08<24:46:09, 10.50it/s]

{'loss': Array(0.16625376, dtype=float32), 'loss_cross_entropy': Array(0.15612392, dtype=float32)}


  6%|▋         | 63399/1000000 [2:54:10<27:36:53,  9.42it/s]

{'loss': Array(0.149049, dtype=float32), 'loss_cross_entropy': Array(0.13933785, dtype=float32)}


  6%|▋         | 63409/1000000 [2:54:11<25:03:13, 10.38it/s]

{'loss': Array(0.157572, dtype=float32), 'loss_cross_entropy': Array(0.14715067, dtype=float32)}


  6%|▋         | 63419/1000000 [2:54:13<27:38:28,  9.41it/s]

{'loss': Array(0.16415195, dtype=float32), 'loss_cross_entropy': Array(0.15411277, dtype=float32)}


  6%|▋         | 63429/1000000 [2:54:14<24:52:33, 10.46it/s]

{'loss': Array(0.15642123, dtype=float32), 'loss_cross_entropy': Array(0.14687197, dtype=float32)}


  6%|▋         | 63439/1000000 [2:54:16<38:11:39,  6.81it/s]

{'loss': Array(0.17004016, dtype=float32), 'loss_cross_entropy': Array(0.15932335, dtype=float32)}


  6%|▋         | 63449/1000000 [2:54:18<27:23:45,  9.50it/s]

{'loss': Array(0.16082314, dtype=float32), 'loss_cross_entropy': Array(0.15134393, dtype=float32)}


  6%|▋         | 63459/1000000 [2:54:19<24:42:47, 10.53it/s]

{'loss': Array(0.15827054, dtype=float32), 'loss_cross_entropy': Array(0.14844917, dtype=float32)}


  6%|▋         | 63469/1000000 [2:54:21<30:50:51,  8.43it/s]

{'loss': Array(0.1529532, dtype=float32), 'loss_cross_entropy': Array(0.14328985, dtype=float32)}


  6%|▋         | 63479/1000000 [2:54:22<25:21:29, 10.26it/s]

{'loss': Array(0.19658153, dtype=float32), 'loss_cross_entropy': Array(0.18518762, dtype=float32)}


  6%|▋         | 63489/1000000 [2:54:24<38:58:45,  6.67it/s]

{'loss': Array(0.16756009, dtype=float32), 'loss_cross_entropy': Array(0.15727942, dtype=float32)}


  6%|▋         | 63499/1000000 [2:54:25<26:43:00,  9.74it/s]

{'loss': Array(0.1583865, dtype=float32), 'loss_cross_entropy': Array(0.148557, dtype=float32)}


  6%|▋         | 63509/1000000 [2:54:33<85:01:32,  3.06it/s] 

{'loss': Array(0.1518139, dtype=float32), 'loss_cross_entropy': Array(0.14234568, dtype=float32)}


  6%|▋         | 63519/1000000 [2:54:35<39:08:01,  6.65it/s]

{'loss': Array(0.1400234, dtype=float32), 'loss_cross_entropy': Array(0.13109592, dtype=float32)}


  6%|▋         | 63529/1000000 [2:54:36<27:14:12,  9.55it/s]

{'loss': Array(0.16904207, dtype=float32), 'loss_cross_entropy': Array(0.15853429, dtype=float32)}


  6%|▋         | 63539/1000000 [2:54:38<31:29:57,  8.26it/s]

{'loss': Array(0.16801392, dtype=float32), 'loss_cross_entropy': Array(0.15787373, dtype=float32)}


  6%|▋         | 63549/1000000 [2:54:39<25:00:53, 10.40it/s]

{'loss': Array(0.16228081, dtype=float32), 'loss_cross_entropy': Array(0.15220927, dtype=float32)}


  6%|▋         | 63559/1000000 [2:54:41<38:13:49,  6.80it/s]

{'loss': Array(0.17369097, dtype=float32), 'loss_cross_entropy': Array(0.1630813, dtype=float32)}


  6%|▋         | 63569/1000000 [2:54:42<26:05:09,  9.97it/s]

{'loss': Array(0.16802822, dtype=float32), 'loss_cross_entropy': Array(0.15737616, dtype=float32)}


  6%|▋         | 63579/1000000 [2:54:44<24:03:52, 10.81it/s]

{'loss': Array(0.17162253, dtype=float32), 'loss_cross_entropy': Array(0.16123438, dtype=float32)}


  6%|▋         | 63589/1000000 [2:54:45<27:41:38,  9.39it/s]

{'loss': Array(0.16023217, dtype=float32), 'loss_cross_entropy': Array(0.15059061, dtype=float32)}


  6%|▋         | 63599/1000000 [2:54:47<25:26:23, 10.22it/s]

{'loss': Array(0.14940362, dtype=float32), 'loss_cross_entropy': Array(0.1400336, dtype=float32)}


  6%|▋         | 63609/1000000 [2:54:48<38:44:57,  6.71it/s]

{'loss': Array(0.16174321, dtype=float32), 'loss_cross_entropy': Array(0.1518463, dtype=float32)}


  6%|▋         | 63619/1000000 [2:54:50<26:37:52,  9.77it/s]

{'loss': Array(0.155968, dtype=float32), 'loss_cross_entropy': Array(0.14580889, dtype=float32)}


  6%|▋         | 63629/1000000 [2:54:51<24:21:38, 10.68it/s]

{'loss': Array(0.16576813, dtype=float32), 'loss_cross_entropy': Array(0.15553321, dtype=float32)}


  6%|▋         | 63639/1000000 [2:54:53<28:52:30,  9.01it/s]

{'loss': Array(0.14061595, dtype=float32), 'loss_cross_entropy': Array(0.13077174, dtype=float32)}


  6%|▋         | 63649/1000000 [2:54:54<25:34:21, 10.17it/s]

{'loss': Array(0.15393248, dtype=float32), 'loss_cross_entropy': Array(0.1447526, dtype=float32)}


  6%|▋         | 63659/1000000 [2:54:56<34:44:06,  7.49it/s]

{'loss': Array(0.1627128, dtype=float32), 'loss_cross_entropy': Array(0.15300432, dtype=float32)}


  6%|▋         | 63669/1000000 [2:54:57<26:03:43,  9.98it/s]

{'loss': Array(0.17812829, dtype=float32), 'loss_cross_entropy': Array(0.16730526, dtype=float32)}


  6%|▋         | 63679/1000000 [2:54:59<39:56:35,  6.51it/s]

{'loss': Array(0.1655197, dtype=float32), 'loss_cross_entropy': Array(0.15591338, dtype=float32)}


  6%|▋         | 63689/1000000 [2:55:01<27:42:57,  9.38it/s]

{'loss': Array(0.17066231, dtype=float32), 'loss_cross_entropy': Array(0.16002262, dtype=float32)}


  6%|▋         | 63699/1000000 [2:55:02<24:56:55, 10.42it/s]

{'loss': Array(0.17352574, dtype=float32), 'loss_cross_entropy': Array(0.16255723, dtype=float32)}


  6%|▋         | 63709/1000000 [2:55:04<31:12:27,  8.33it/s]

{'loss': Array(0.15947007, dtype=float32), 'loss_cross_entropy': Array(0.14955525, dtype=float32)}


  6%|▋         | 63719/1000000 [2:55:05<25:40:29, 10.13it/s]

{'loss': Array(0.16891912, dtype=float32), 'loss_cross_entropy': Array(0.15831582, dtype=float32)}


  6%|▋         | 63729/1000000 [2:55:07<34:44:48,  7.48it/s]

{'loss': Array(0.16061571, dtype=float32), 'loss_cross_entropy': Array(0.15109293, dtype=float32)}


  6%|▋         | 63739/1000000 [2:55:08<26:27:17,  9.83it/s]

{'loss': Array(0.16103725, dtype=float32), 'loss_cross_entropy': Array(0.15095088, dtype=float32)}


  6%|▋         | 63749/1000000 [2:55:10<25:23:50, 10.24it/s]

{'loss': Array(0.15808527, dtype=float32), 'loss_cross_entropy': Array(0.14818572, dtype=float32)}


  6%|▋         | 63759/1000000 [2:55:11<28:18:25,  9.19it/s]

{'loss': Array(0.15227376, dtype=float32), 'loss_cross_entropy': Array(0.14285813, dtype=float32)}


  6%|▋         | 63769/1000000 [2:55:13<26:42:32,  9.74it/s]

{'loss': Array(0.17866996, dtype=float32), 'loss_cross_entropy': Array(0.16803132, dtype=float32)}


  6%|▋         | 63779/1000000 [2:55:15<39:54:15,  6.52it/s]

{'loss': Array(0.18676099, dtype=float32), 'loss_cross_entropy': Array(0.17514189, dtype=float32)}


  6%|▋         | 63789/1000000 [2:55:16<27:38:23,  9.41it/s]

{'loss': Array(0.17419447, dtype=float32), 'loss_cross_entropy': Array(0.16355228, dtype=float32)}


  6%|▋         | 63799/1000000 [2:55:17<25:31:53, 10.19it/s]

{'loss': Array(0.1621304, dtype=float32), 'loss_cross_entropy': Array(0.15220807, dtype=float32)}


  6%|▋         | 63809/1000000 [2:55:19<31:15:53,  8.32it/s]

{'loss': Array(0.1413176, dtype=float32), 'loss_cross_entropy': Array(0.13273422, dtype=float32)}


  6%|▋         | 63819/1000000 [2:55:21<25:22:55, 10.25it/s]

{'loss': Array(0.16874762, dtype=float32), 'loss_cross_entropy': Array(0.15848947, dtype=float32)}


  6%|▋         | 63829/1000000 [2:55:22<33:51:17,  7.68it/s]

{'loss': Array(0.15889524, dtype=float32), 'loss_cross_entropy': Array(0.14864352, dtype=float32)}


  6%|▋         | 63839/1000000 [2:55:24<25:16:07, 10.29it/s]

{'loss': Array(0.1689754, dtype=float32), 'loss_cross_entropy': Array(0.15869366, dtype=float32)}


  6%|▋         | 63849/1000000 [2:55:25<33:57:59,  7.66it/s]

{'loss': Array(0.14017461, dtype=float32), 'loss_cross_entropy': Array(0.1311786, dtype=float32)}


  6%|▋         | 63859/1000000 [2:55:27<25:56:47, 10.02it/s]

{'loss': Array(0.17262436, dtype=float32), 'loss_cross_entropy': Array(0.16155274, dtype=float32)}


  6%|▋         | 63869/1000000 [2:55:28<24:33:37, 10.59it/s]

{'loss': Array(0.18027611, dtype=float32), 'loss_cross_entropy': Array(0.1693302, dtype=float32)}


  6%|▋         | 63879/1000000 [2:55:30<29:09:41,  8.92it/s]

{'loss': Array(0.15019202, dtype=float32), 'loss_cross_entropy': Array(0.1413462, dtype=float32)}


  6%|▋         | 63889/1000000 [2:55:31<25:21:22, 10.26it/s]

{'loss': Array(0.14671004, dtype=float32), 'loss_cross_entropy': Array(0.13720857, dtype=float32)}


  6%|▋         | 63899/1000000 [2:55:33<31:28:20,  8.26it/s]

{'loss': Array(0.1580598, dtype=float32), 'loss_cross_entropy': Array(0.14794932, dtype=float32)}


  6%|▋         | 63909/1000000 [2:55:34<26:04:17,  9.97it/s]

{'loss': Array(0.16689421, dtype=float32), 'loss_cross_entropy': Array(0.1571282, dtype=float32)}


  6%|▋         | 63919/1000000 [2:55:36<39:29:56,  6.58it/s]

{'loss': Array(0.14847694, dtype=float32), 'loss_cross_entropy': Array(0.13898744, dtype=float32)}


  6%|▋         | 63929/1000000 [2:55:37<26:55:10,  9.66it/s]

{'loss': Array(0.16004671, dtype=float32), 'loss_cross_entropy': Array(0.15065804, dtype=float32)}


  6%|▋         | 63939/1000000 [2:55:39<38:59:47,  6.67it/s]

{'loss': Array(0.18192878, dtype=float32), 'loss_cross_entropy': Array(0.17159534, dtype=float32)}


  6%|▋         | 63949/1000000 [2:55:41<27:06:10,  9.59it/s]

{'loss': Array(0.15936087, dtype=float32), 'loss_cross_entropy': Array(0.15002383, dtype=float32)}


  6%|▋         | 63959/1000000 [2:55:42<24:08:51, 10.77it/s]

{'loss': Array(0.18200254, dtype=float32), 'loss_cross_entropy': Array(0.17120099, dtype=float32)}


  6%|▋         | 63969/1000000 [2:55:44<33:40:06,  7.72it/s]

{'loss': Array(0.1565438, dtype=float32), 'loss_cross_entropy': Array(0.14646025, dtype=float32)}


  6%|▋         | 63979/1000000 [2:55:45<25:50:54, 10.06it/s]

{'loss': Array(0.15876059, dtype=float32), 'loss_cross_entropy': Array(0.14853565, dtype=float32)}


  6%|▋         | 63989/1000000 [2:55:47<39:22:27,  6.60it/s]

{'loss': Array(0.16219272, dtype=float32), 'loss_cross_entropy': Array(0.15221958, dtype=float32)}


  6%|▋         | 63999/1000000 [2:55:48<27:40:48,  9.39it/s]

{'loss': Array(0.15608697, dtype=float32), 'loss_cross_entropy': Array(0.14655384, dtype=float32)}


  6%|▋         | 64009/1000000 [2:55:56<86:55:25,  2.99it/s] 

{'loss': Array(0.17470732, dtype=float32), 'loss_cross_entropy': Array(0.16379784, dtype=float32)}


  6%|▋         | 64019/1000000 [2:55:58<37:59:26,  6.84it/s] 

{'loss': Array(0.15731697, dtype=float32), 'loss_cross_entropy': Array(0.1478787, dtype=float32)}


  6%|▋         | 64029/1000000 [2:55:59<27:59:33,  9.29it/s]

{'loss': Array(0.16831148, dtype=float32), 'loss_cross_entropy': Array(0.15864608, dtype=float32)}


  6%|▋         | 64039/1000000 [2:56:01<29:00:48,  8.96it/s]

{'loss': Array(0.15962718, dtype=float32), 'loss_cross_entropy': Array(0.15004985, dtype=float32)}


  6%|▋         | 64049/1000000 [2:56:02<25:52:36, 10.05it/s]

{'loss': Array(0.15748735, dtype=float32), 'loss_cross_entropy': Array(0.14744656, dtype=float32)}


  6%|▋         | 64059/1000000 [2:56:04<39:19:05,  6.61it/s]

{'loss': Array(0.14504074, dtype=float32), 'loss_cross_entropy': Array(0.13630207, dtype=float32)}


  6%|▋         | 64069/1000000 [2:56:06<27:46:26,  9.36it/s]

{'loss': Array(0.14848422, dtype=float32), 'loss_cross_entropy': Array(0.13846779, dtype=float32)}


  6%|▋         | 64079/1000000 [2:56:07<24:57:56, 10.41it/s]

{'loss': Array(0.16901255, dtype=float32), 'loss_cross_entropy': Array(0.15896165, dtype=float32)}


  6%|▋         | 64089/1000000 [2:56:09<30:49:53,  8.43it/s]

{'loss': Array(0.15422857, dtype=float32), 'loss_cross_entropy': Array(0.14492737, dtype=float32)}


  6%|▋         | 64099/1000000 [2:56:10<25:21:18, 10.25it/s]

{'loss': Array(0.13591471, dtype=float32), 'loss_cross_entropy': Array(0.12722921, dtype=float32)}


  6%|▋         | 64109/1000000 [2:56:12<37:47:02,  6.88it/s]

{'loss': Array(0.16075855, dtype=float32), 'loss_cross_entropy': Array(0.15080416, dtype=float32)}


  6%|▋         | 64119/1000000 [2:56:13<26:19:04,  9.88it/s]

{'loss': Array(0.15754907, dtype=float32), 'loss_cross_entropy': Array(0.14739475, dtype=float32)}


  6%|▋         | 64129/1000000 [2:56:15<25:49:46, 10.06it/s]

{'loss': Array(0.17222089, dtype=float32), 'loss_cross_entropy': Array(0.16195583, dtype=float32)}


  6%|▋         | 64139/1000000 [2:56:16<29:16:03,  8.88it/s]

{'loss': Array(0.144816, dtype=float32), 'loss_cross_entropy': Array(0.13619809, dtype=float32)}


  6%|▋         | 64149/1000000 [2:56:18<24:56:17, 10.42it/s]

{'loss': Array(0.13649212, dtype=float32), 'loss_cross_entropy': Array(0.12725012, dtype=float32)}


  6%|▋         | 64159/1000000 [2:56:19<34:22:27,  7.56it/s]

{'loss': Array(0.14605331, dtype=float32), 'loss_cross_entropy': Array(0.13621485, dtype=float32)}


  6%|▋         | 64169/1000000 [2:56:21<25:56:31, 10.02it/s]

{'loss': Array(0.15874739, dtype=float32), 'loss_cross_entropy': Array(0.14914347, dtype=float32)}


  6%|▋         | 64179/1000000 [2:56:22<24:30:40, 10.61it/s]

{'loss': Array(0.17185135, dtype=float32), 'loss_cross_entropy': Array(0.16194153, dtype=float32)}


  6%|▋         | 64189/1000000 [2:56:24<30:57:52,  8.39it/s]

{'loss': Array(0.1411374, dtype=float32), 'loss_cross_entropy': Array(0.13241963, dtype=float32)}


  6%|▋         | 64199/1000000 [2:56:25<24:56:15, 10.42it/s]

{'loss': Array(0.15619053, dtype=float32), 'loss_cross_entropy': Array(0.14689748, dtype=float32)}


  6%|▋         | 64209/1000000 [2:56:27<23:54:42, 10.87it/s]

{'loss': Array(0.16014747, dtype=float32), 'loss_cross_entropy': Array(0.1493116, dtype=float32)}


  6%|▋         | 64219/1000000 [2:56:28<27:06:38,  9.59it/s]

{'loss': Array(0.16094628, dtype=float32), 'loss_cross_entropy': Array(0.15153308, dtype=float32)}


  6%|▋         | 64229/1000000 [2:56:30<24:50:46, 10.46it/s]

{'loss': Array(0.1606954, dtype=float32), 'loss_cross_entropy': Array(0.15132035, dtype=float32)}


  6%|▋         | 64239/1000000 [2:56:31<31:11:23,  8.33it/s]

{'loss': Array(0.1449341, dtype=float32), 'loss_cross_entropy': Array(0.13557674, dtype=float32)}


  6%|▋         | 64249/1000000 [2:56:33<25:21:44, 10.25it/s]

{'loss': Array(0.15525013, dtype=float32), 'loss_cross_entropy': Array(0.14516738, dtype=float32)}


  6%|▋         | 64259/1000000 [2:56:34<38:16:28,  6.79it/s]

{'loss': Array(0.17756076, dtype=float32), 'loss_cross_entropy': Array(0.1661656, dtype=float32)}


  6%|▋         | 64269/1000000 [2:56:36<26:09:09,  9.94it/s]

{'loss': Array(0.14826931, dtype=float32), 'loss_cross_entropy': Array(0.13874933, dtype=float32)}


  6%|▋         | 64279/1000000 [2:56:37<38:44:25,  6.71it/s]

{'loss': Array(0.14125744, dtype=float32), 'loss_cross_entropy': Array(0.13194594, dtype=float32)}


  6%|▋         | 64289/1000000 [2:56:39<26:16:33,  9.89it/s]

{'loss': Array(0.15379414, dtype=float32), 'loss_cross_entropy': Array(0.14427954, dtype=float32)}


  6%|▋         | 64299/1000000 [2:56:40<24:27:19, 10.63it/s]

{'loss': Array(0.17594819, dtype=float32), 'loss_cross_entropy': Array(0.16485544, dtype=float32)}


  6%|▋         | 64309/1000000 [2:56:42<33:42:27,  7.71it/s]

{'loss': Array(0.1722803, dtype=float32), 'loss_cross_entropy': Array(0.16232143, dtype=float32)}


  6%|▋         | 64319/1000000 [2:56:43<25:48:19, 10.07it/s]

{'loss': Array(0.15982589, dtype=float32), 'loss_cross_entropy': Array(0.15048319, dtype=float32)}


  6%|▋         | 64329/1000000 [2:56:45<38:00:36,  6.84it/s]

{'loss': Array(0.16067947, dtype=float32), 'loss_cross_entropy': Array(0.15049858, dtype=float32)}


  6%|▋         | 64339/1000000 [2:56:46<26:27:28,  9.82it/s]

{'loss': Array(0.17978446, dtype=float32), 'loss_cross_entropy': Array(0.16885464, dtype=float32)}


  6%|▋         | 64349/1000000 [2:56:48<24:51:36, 10.45it/s]

{'loss': Array(0.15203938, dtype=float32), 'loss_cross_entropy': Array(0.14317423, dtype=float32)}


  6%|▋         | 64359/1000000 [2:56:49<28:13:23,  9.21it/s]

{'loss': Array(0.1419131, dtype=float32), 'loss_cross_entropy': Array(0.13284336, dtype=float32)}


  6%|▋         | 64369/1000000 [2:56:51<24:34:46, 10.57it/s]

{'loss': Array(0.1889773, dtype=float32), 'loss_cross_entropy': Array(0.17836648, dtype=float32)}


  6%|▋         | 64379/1000000 [2:56:52<27:15:39,  9.53it/s]

{'loss': Array(0.14723678, dtype=float32), 'loss_cross_entropy': Array(0.13768607, dtype=float32)}


  6%|▋         | 64389/1000000 [2:56:54<24:19:39, 10.68it/s]

{'loss': Array(0.16599284, dtype=float32), 'loss_cross_entropy': Array(0.15564139, dtype=float32)}


  6%|▋         | 64399/1000000 [2:56:56<38:42:22,  6.71it/s]

{'loss': Array(0.1571914, dtype=float32), 'loss_cross_entropy': Array(0.14725637, dtype=float32)}


  6%|▋         | 64409/1000000 [2:56:57<26:49:45,  9.69it/s]

{'loss': Array(0.16426115, dtype=float32), 'loss_cross_entropy': Array(0.15406027, dtype=float32)}


  6%|▋         | 64419/1000000 [2:56:58<24:47:23, 10.48it/s]

{'loss': Array(0.170112, dtype=float32), 'loss_cross_entropy': Array(0.15959018, dtype=float32)}


  6%|▋         | 64429/1000000 [2:57:00<32:17:36,  8.05it/s]

{'loss': Array(0.18470632, dtype=float32), 'loss_cross_entropy': Array(0.17440121, dtype=float32)}


  6%|▋         | 64439/1000000 [2:57:02<26:13:58,  9.91it/s]

{'loss': Array(0.15038808, dtype=float32), 'loss_cross_entropy': Array(0.14042224, dtype=float32)}


  6%|▋         | 64449/1000000 [2:57:03<39:32:41,  6.57it/s]

{'loss': Array(0.17611562, dtype=float32), 'loss_cross_entropy': Array(0.16453679, dtype=float32)}


  6%|▋         | 64459/1000000 [2:57:05<27:47:58,  9.35it/s]

{'loss': Array(0.16828202, dtype=float32), 'loss_cross_entropy': Array(0.15788566, dtype=float32)}


  6%|▋         | 64469/1000000 [2:57:06<25:43:13, 10.10it/s]

{'loss': Array(0.15260774, dtype=float32), 'loss_cross_entropy': Array(0.1427521, dtype=float32)}


  6%|▋         | 64479/1000000 [2:57:08<29:27:06,  8.82it/s]

{'loss': Array(0.18693182, dtype=float32), 'loss_cross_entropy': Array(0.17651354, dtype=float32)}


  6%|▋         | 64489/1000000 [2:57:09<25:57:09, 10.01it/s]

{'loss': Array(0.16917534, dtype=float32), 'loss_cross_entropy': Array(0.15911911, dtype=float32)}


  6%|▋         | 64499/1000000 [2:57:11<34:38:53,  7.50it/s]

{'loss': Array(0.14219372, dtype=float32), 'loss_cross_entropy': Array(0.13274826, dtype=float32)}


  6%|▋         | 64509/1000000 [2:57:19<86:37:02,  3.00it/s] 

{'loss': Array(0.17167924, dtype=float32), 'loss_cross_entropy': Array(0.16148901, dtype=float32)}


  6%|▋         | 64519/1000000 [2:57:21<44:07:59,  5.89it/s]

{'loss': Array(0.15529704, dtype=float32), 'loss_cross_entropy': Array(0.14604162, dtype=float32)}


  6%|▋         | 64529/1000000 [2:57:22<27:41:40,  9.38it/s]

{'loss': Array(0.14992152, dtype=float32), 'loss_cross_entropy': Array(0.14052793, dtype=float32)}


  6%|▋         | 64539/1000000 [2:57:23<25:22:25, 10.24it/s]

{'loss': Array(0.15304427, dtype=float32), 'loss_cross_entropy': Array(0.14366876, dtype=float32)}


  6%|▋         | 64549/1000000 [2:57:25<31:29:25,  8.25it/s]

{'loss': Array(0.16153944, dtype=float32), 'loss_cross_entropy': Array(0.15182205, dtype=float32)}


  6%|▋         | 64559/1000000 [2:57:27<25:35:40, 10.15it/s]

{'loss': Array(0.18118273, dtype=float32), 'loss_cross_entropy': Array(0.16958174, dtype=float32)}


  6%|▋         | 64569/1000000 [2:57:28<24:21:30, 10.67it/s]

{'loss': Array(0.17192893, dtype=float32), 'loss_cross_entropy': Array(0.16130845, dtype=float32)}


  6%|▋         | 64579/1000000 [2:57:30<27:13:07,  9.55it/s]

{'loss': Array(0.17025988, dtype=float32), 'loss_cross_entropy': Array(0.1602951, dtype=float32)}


  6%|▋         | 64589/1000000 [2:57:31<24:25:14, 10.64it/s]

{'loss': Array(0.17458981, dtype=float32), 'loss_cross_entropy': Array(0.16413377, dtype=float32)}


  6%|▋         | 64599/1000000 [2:57:33<31:18:43,  8.30it/s]

{'loss': Array(0.16624531, dtype=float32), 'loss_cross_entropy': Array(0.1561372, dtype=float32)}


  6%|▋         | 64609/1000000 [2:57:34<25:42:50, 10.10it/s]

{'loss': Array(0.17130177, dtype=float32), 'loss_cross_entropy': Array(0.1602356, dtype=float32)}


  6%|▋         | 64619/1000000 [2:57:36<33:44:01,  7.70it/s]

{'loss': Array(0.15999082, dtype=float32), 'loss_cross_entropy': Array(0.15025163, dtype=float32)}


  6%|▋         | 64629/1000000 [2:57:37<25:32:25, 10.17it/s]

{'loss': Array(0.16487859, dtype=float32), 'loss_cross_entropy': Array(0.15385674, dtype=float32)}


  6%|▋         | 64639/1000000 [2:57:38<24:27:26, 10.62it/s]

{'loss': Array(0.17406127, dtype=float32), 'loss_cross_entropy': Array(0.16344768, dtype=float32)}


  6%|▋         | 64649/1000000 [2:57:40<30:32:43,  8.51it/s]

{'loss': Array(0.17245181, dtype=float32), 'loss_cross_entropy': Array(0.16221428, dtype=float32)}


  6%|▋         | 64659/1000000 [2:57:41<25:11:28, 10.31it/s]

{'loss': Array(0.15276209, dtype=float32), 'loss_cross_entropy': Array(0.14326255, dtype=float32)}


  6%|▋         | 64669/1000000 [2:57:43<24:12:33, 10.73it/s]

{'loss': Array(0.17940794, dtype=float32), 'loss_cross_entropy': Array(0.16877003, dtype=float32)}


  6%|▋         | 64679/1000000 [2:57:45<27:19:48,  9.51it/s]

{'loss': Array(0.17717569, dtype=float32), 'loss_cross_entropy': Array(0.1665667, dtype=float32)}


  6%|▋         | 64689/1000000 [2:57:46<24:18:53, 10.69it/s]

{'loss': Array(0.17883338, dtype=float32), 'loss_cross_entropy': Array(0.16784099, dtype=float32)}


  6%|▋         | 64699/1000000 [2:57:48<30:38:43,  8.48it/s]

{'loss': Array(0.14012948, dtype=float32), 'loss_cross_entropy': Array(0.1315145, dtype=float32)}


  6%|▋         | 64709/1000000 [2:57:49<25:30:30, 10.18it/s]

{'loss': Array(0.13917165, dtype=float32), 'loss_cross_entropy': Array(0.13004582, dtype=float32)}


  6%|▋         | 64719/1000000 [2:57:51<38:07:42,  6.81it/s]

{'loss': Array(0.15916288, dtype=float32), 'loss_cross_entropy': Array(0.14906417, dtype=float32)}


  6%|▋         | 64729/1000000 [2:57:52<26:20:06,  9.87it/s]

{'loss': Array(0.15504053, dtype=float32), 'loss_cross_entropy': Array(0.14508823, dtype=float32)}


  6%|▋         | 64739/1000000 [2:57:53<24:22:15, 10.66it/s]

{'loss': Array(0.16582234, dtype=float32), 'loss_cross_entropy': Array(0.15550242, dtype=float32)}


  6%|▋         | 64749/1000000 [2:57:55<34:00:40,  7.64it/s]

{'loss': Array(0.13163134, dtype=float32), 'loss_cross_entropy': Array(0.12359202, dtype=float32)}


  6%|▋         | 64759/1000000 [2:57:56<25:41:23, 10.11it/s]

{'loss': Array(0.1717749, dtype=float32), 'loss_cross_entropy': Array(0.16246173, dtype=float32)}


  6%|▋         | 64769/1000000 [2:57:58<24:28:33, 10.61it/s]

{'loss': Array(0.14591296, dtype=float32), 'loss_cross_entropy': Array(0.13637327, dtype=float32)}


  6%|▋         | 64779/1000000 [2:57:59<30:04:15,  8.64it/s]

{'loss': Array(0.15367503, dtype=float32), 'loss_cross_entropy': Array(0.14410172, dtype=float32)}


  6%|▋         | 64789/1000000 [2:58:01<26:07:38,  9.94it/s]

{'loss': Array(0.15473084, dtype=float32), 'loss_cross_entropy': Array(0.1456214, dtype=float32)}


  6%|▋         | 64799/1000000 [2:58:03<32:12:09,  8.07it/s]

{'loss': Array(0.14733498, dtype=float32), 'loss_cross_entropy': Array(0.13789049, dtype=float32)}


  6%|▋         | 64809/1000000 [2:58:04<26:39:54,  9.74it/s]

{'loss': Array(0.14521745, dtype=float32), 'loss_cross_entropy': Array(0.13589476, dtype=float32)}


  6%|▋         | 64819/1000000 [2:58:05<24:50:17, 10.46it/s]

{'loss': Array(0.15338741, dtype=float32), 'loss_cross_entropy': Array(0.14401026, dtype=float32)}


  6%|▋         | 64829/1000000 [2:58:07<27:11:30,  9.55it/s]

{'loss': Array(0.13945031, dtype=float32), 'loss_cross_entropy': Array(0.13062754, dtype=float32)}


  6%|▋         | 64839/1000000 [2:58:09<24:51:32, 10.45it/s]

{'loss': Array(0.1729341, dtype=float32), 'loss_cross_entropy': Array(0.1616617, dtype=float32)}


  6%|▋         | 64849/1000000 [2:58:10<33:55:19,  7.66it/s]

{'loss': Array(0.16719747, dtype=float32), 'loss_cross_entropy': Array(0.15686089, dtype=float32)}


  6%|▋         | 64859/1000000 [2:58:12<25:46:00, 10.08it/s]

{'loss': Array(0.16385882, dtype=float32), 'loss_cross_entropy': Array(0.15425526, dtype=float32)}


  6%|▋         | 64869/1000000 [2:58:13<24:09:39, 10.75it/s]

{'loss': Array(0.14180309, dtype=float32), 'loss_cross_entropy': Array(0.1323946, dtype=float32)}


  6%|▋         | 64879/1000000 [2:58:15<27:19:54,  9.50it/s]

{'loss': Array(0.16607028, dtype=float32), 'loss_cross_entropy': Array(0.1555926, dtype=float32)}


  6%|▋         | 64889/1000000 [2:58:16<24:30:59, 10.60it/s]

{'loss': Array(0.15359922, dtype=float32), 'loss_cross_entropy': Array(0.14404225, dtype=float32)}


  6%|▋         | 64899/1000000 [2:58:18<29:05:38,  8.93it/s]

{'loss': Array(0.14672732, dtype=float32), 'loss_cross_entropy': Array(0.13743997, dtype=float32)}


  6%|▋         | 64909/1000000 [2:58:19<25:30:28, 10.18it/s]

{'loss': Array(0.18058664, dtype=float32), 'loss_cross_entropy': Array(0.1702776, dtype=float32)}


  6%|▋         | 64919/1000000 [2:58:21<38:13:50,  6.79it/s]

{'loss': Array(0.14826046, dtype=float32), 'loss_cross_entropy': Array(0.13848846, dtype=float32)}


  6%|▋         | 64929/1000000 [2:58:22<26:15:13,  9.89it/s]

{'loss': Array(0.16413222, dtype=float32), 'loss_cross_entropy': Array(0.15337937, dtype=float32)}


  6%|▋         | 64939/1000000 [2:58:24<24:17:20, 10.69it/s]

{'loss': Array(0.15029697, dtype=float32), 'loss_cross_entropy': Array(0.14108728, dtype=float32)}


  6%|▋         | 64949/1000000 [2:58:25<30:55:28,  8.40it/s]

{'loss': Array(0.15674882, dtype=float32), 'loss_cross_entropy': Array(0.14638913, dtype=float32)}


  6%|▋         | 64959/1000000 [2:58:27<25:23:41, 10.23it/s]

{'loss': Array(0.16842276, dtype=float32), 'loss_cross_entropy': Array(0.15818796, dtype=float32)}


  6%|▋         | 64969/1000000 [2:58:28<33:54:17,  7.66it/s]

{'loss': Array(0.17203134, dtype=float32), 'loss_cross_entropy': Array(0.16137724, dtype=float32)}


  6%|▋         | 64979/1000000 [2:58:30<25:51:37, 10.04it/s]

{'loss': Array(0.14924952, dtype=float32), 'loss_cross_entropy': Array(0.14045365, dtype=float32)}


  6%|▋         | 64989/1000000 [2:58:31<34:39:58,  7.49it/s]

{'loss': Array(0.15922424, dtype=float32), 'loss_cross_entropy': Array(0.1504073, dtype=float32)}


  6%|▋         | 64999/1000000 [2:58:33<26:17:50,  9.88it/s]

{'loss': Array(0.15332499, dtype=float32), 'loss_cross_entropy': Array(0.14387764, dtype=float32)}


  7%|▋         | 65009/1000000 [2:58:40<83:16:43,  3.12it/s] 

{'loss': Array(0.18449704, dtype=float32), 'loss_cross_entropy': Array(0.1742614, dtype=float32)}


  7%|▋         | 65019/1000000 [2:58:42<39:18:28,  6.61it/s]

{'loss': Array(0.1687377, dtype=float32), 'loss_cross_entropy': Array(0.15839492, dtype=float32)}


  7%|▋         | 65029/1000000 [2:58:44<27:00:47,  9.61it/s]

{'loss': Array(0.16175215, dtype=float32), 'loss_cross_entropy': Array(0.1521125, dtype=float32)}


  7%|▋         | 65039/1000000 [2:58:45<34:39:03,  7.50it/s]

{'loss': Array(0.17197293, dtype=float32), 'loss_cross_entropy': Array(0.16152833, dtype=float32)}


  7%|▋         | 65049/1000000 [2:58:47<26:14:58,  9.89it/s]

{'loss': Array(0.1496449, dtype=float32), 'loss_cross_entropy': Array(0.14063053, dtype=float32)}


  7%|▋         | 65059/1000000 [2:58:48<38:47:35,  6.69it/s]

{'loss': Array(0.16661511, dtype=float32), 'loss_cross_entropy': Array(0.15719198, dtype=float32)}


  7%|▋         | 65069/1000000 [2:58:50<27:07:03,  9.58it/s]

{'loss': Array(0.16133621, dtype=float32), 'loss_cross_entropy': Array(0.15186976, dtype=float32)}


  7%|▋         | 65079/1000000 [2:58:51<24:45:10, 10.49it/s]

{'loss': Array(0.14313959, dtype=float32), 'loss_cross_entropy': Array(0.13477588, dtype=float32)}


  7%|▋         | 65089/1000000 [2:58:53<30:39:43,  8.47it/s]

{'loss': Array(0.16501911, dtype=float32), 'loss_cross_entropy': Array(0.15464199, dtype=float32)}


  7%|▋         | 65099/1000000 [2:58:54<25:04:37, 10.36it/s]

{'loss': Array(0.13931303, dtype=float32), 'loss_cross_entropy': Array(0.13036764, dtype=float32)}


  7%|▋         | 65109/1000000 [2:58:56<34:47:44,  7.46it/s]

{'loss': Array(0.17280672, dtype=float32), 'loss_cross_entropy': Array(0.1624915, dtype=float32)}


  7%|▋         | 65119/1000000 [2:58:57<26:03:08,  9.97it/s]

{'loss': Array(0.13321267, dtype=float32), 'loss_cross_entropy': Array(0.1249444, dtype=float32)}


  7%|▋         | 65129/1000000 [2:58:59<25:07:37, 10.33it/s]

{'loss': Array(0.15593167, dtype=float32), 'loss_cross_entropy': Array(0.14595622, dtype=float32)}


  7%|▋         | 65139/1000000 [2:59:01<28:44:58,  9.03it/s]

{'loss': Array(0.17664364, dtype=float32), 'loss_cross_entropy': Array(0.16587432, dtype=float32)}


  7%|▋         | 65149/1000000 [2:59:02<24:34:17, 10.57it/s]

{'loss': Array(0.17193459, dtype=float32), 'loss_cross_entropy': Array(0.16203566, dtype=float32)}


  7%|▋         | 65159/1000000 [2:59:04<40:22:08,  6.43it/s]

{'loss': Array(0.18215969, dtype=float32), 'loss_cross_entropy': Array(0.17163949, dtype=float32)}


  7%|▋         | 65169/1000000 [2:59:05<27:42:49,  9.37it/s]

{'loss': Array(0.15399978, dtype=float32), 'loss_cross_entropy': Array(0.14449847, dtype=float32)}


  7%|▋         | 65179/1000000 [2:59:07<25:52:55, 10.03it/s]

{'loss': Array(0.14376771, dtype=float32), 'loss_cross_entropy': Array(0.13443972, dtype=float32)}


  7%|▋         | 65189/1000000 [2:59:08<33:30:04,  7.75it/s]

{'loss': Array(0.16001932, dtype=float32), 'loss_cross_entropy': Array(0.150308, dtype=float32)}


  7%|▋         | 65199/1000000 [2:59:10<25:49:39, 10.05it/s]

{'loss': Array(0.16386364, dtype=float32), 'loss_cross_entropy': Array(0.15396644, dtype=float32)}


  7%|▋         | 65209/1000000 [2:59:12<34:36:56,  7.50it/s]

{'loss': Array(0.15639202, dtype=float32), 'loss_cross_entropy': Array(0.14763027, dtype=float32)}


  7%|▋         | 65219/1000000 [2:59:13<26:11:49,  9.91it/s]

{'loss': Array(0.15812463, dtype=float32), 'loss_cross_entropy': Array(0.148952, dtype=float32)}


  7%|▋         | 65229/1000000 [2:59:15<33:57:05,  7.65it/s]

{'loss': Array(0.16622192, dtype=float32), 'loss_cross_entropy': Array(0.15592031, dtype=float32)}


  7%|▋         | 65239/1000000 [2:59:16<26:25:16,  9.83it/s]

{'loss': Array(0.16789196, dtype=float32), 'loss_cross_entropy': Array(0.1569555, dtype=float32)}


  7%|▋         | 65249/1000000 [2:59:17<25:02:57, 10.37it/s]

{'loss': Array(0.17420268, dtype=float32), 'loss_cross_entropy': Array(0.16345078, dtype=float32)}


  7%|▋         | 65259/1000000 [2:59:19<29:01:54,  8.94it/s]

{'loss': Array(0.1459482, dtype=float32), 'loss_cross_entropy': Array(0.13637035, dtype=float32)}


  7%|▋         | 65269/1000000 [2:59:20<24:55:53, 10.41it/s]

{'loss': Array(0.16312625, dtype=float32), 'loss_cross_entropy': Array(0.15318799, dtype=float32)}


  7%|▋         | 65279/1000000 [2:59:22<31:05:30,  8.35it/s]

{'loss': Array(0.14922117, dtype=float32), 'loss_cross_entropy': Array(0.14012158, dtype=float32)}


  7%|▋         | 65289/1000000 [2:59:24<25:21:08, 10.24it/s]

{'loss': Array(0.16627824, dtype=float32), 'loss_cross_entropy': Array(0.15644291, dtype=float32)}


  7%|▋         | 65299/1000000 [2:59:25<38:23:01,  6.76it/s]

{'loss': Array(0.1863498, dtype=float32), 'loss_cross_entropy': Array(0.1753841, dtype=float32)}


  7%|▋         | 65309/1000000 [2:59:27<26:52:00,  9.66it/s]

{'loss': Array(0.14503182, dtype=float32), 'loss_cross_entropy': Array(0.13580248, dtype=float32)}


  7%|▋         | 65319/1000000 [2:59:28<38:46:56,  6.69it/s]

{'loss': Array(0.15984586, dtype=float32), 'loss_cross_entropy': Array(0.14994323, dtype=float32)}


  7%|▋         | 65329/1000000 [2:59:30<26:20:35,  9.86it/s]

{'loss': Array(0.15451746, dtype=float32), 'loss_cross_entropy': Array(0.14444807, dtype=float32)}


  7%|▋         | 65339/1000000 [2:59:31<24:11:12, 10.73it/s]

{'loss': Array(0.15132761, dtype=float32), 'loss_cross_entropy': Array(0.1421092, dtype=float32)}


  7%|▋         | 65349/1000000 [2:59:33<33:24:46,  7.77it/s]

{'loss': Array(0.14113808, dtype=float32), 'loss_cross_entropy': Array(0.13222641, dtype=float32)}


  7%|▋         | 65359/1000000 [2:59:34<25:20:56, 10.24it/s]

{'loss': Array(0.16214648, dtype=float32), 'loss_cross_entropy': Array(0.15269248, dtype=float32)}


  7%|▋         | 65369/1000000 [2:59:36<38:28:55,  6.75it/s]

{'loss': Array(0.16058944, dtype=float32), 'loss_cross_entropy': Array(0.15129423, dtype=float32)}


  7%|▋         | 65379/1000000 [2:59:37<26:30:59,  9.79it/s]

{'loss': Array(0.14379016, dtype=float32), 'loss_cross_entropy': Array(0.1351844, dtype=float32)}


  7%|▋         | 65389/1000000 [2:59:39<24:52:24, 10.44it/s]

{'loss': Array(0.1543141, dtype=float32), 'loss_cross_entropy': Array(0.14439045, dtype=float32)}


  7%|▋         | 65399/1000000 [2:59:40<27:30:26,  9.44it/s]

{'loss': Array(0.14900559, dtype=float32), 'loss_cross_entropy': Array(0.13975829, dtype=float32)}


  7%|▋         | 65409/1000000 [2:59:42<25:05:27, 10.35it/s]

{'loss': Array(0.15985236, dtype=float32), 'loss_cross_entropy': Array(0.14970672, dtype=float32)}


  7%|▋         | 65419/1000000 [2:59:43<27:40:55,  9.38it/s]

{'loss': Array(0.16440569, dtype=float32), 'loss_cross_entropy': Array(0.15367286, dtype=float32)}


  7%|▋         | 65429/1000000 [2:59:45<24:40:26, 10.52it/s]

{'loss': Array(0.15982477, dtype=float32), 'loss_cross_entropy': Array(0.15017068, dtype=float32)}


  7%|▋         | 65439/1000000 [2:59:46<37:43:33,  6.88it/s]

{'loss': Array(0.16402161, dtype=float32), 'loss_cross_entropy': Array(0.15340887, dtype=float32)}


  7%|▋         | 65449/1000000 [2:59:48<26:14:42,  9.89it/s]

{'loss': Array(0.15175134, dtype=float32), 'loss_cross_entropy': Array(0.14235942, dtype=float32)}


  7%|▋         | 65459/1000000 [2:59:49<24:08:08, 10.76it/s]

{'loss': Array(0.15532903, dtype=float32), 'loss_cross_entropy': Array(0.1455743, dtype=float32)}


  7%|▋         | 65469/1000000 [2:59:51<30:52:18,  8.41it/s]

{'loss': Array(0.16066091, dtype=float32), 'loss_cross_entropy': Array(0.15078688, dtype=float32)}


  7%|▋         | 65479/1000000 [2:59:52<25:40:00, 10.11it/s]

{'loss': Array(0.14898004, dtype=float32), 'loss_cross_entropy': Array(0.13973819, dtype=float32)}


  7%|▋         | 65489/1000000 [2:59:54<38:23:46,  6.76it/s]

{'loss': Array(0.16731714, dtype=float32), 'loss_cross_entropy': Array(0.15702407, dtype=float32)}


  7%|▋         | 65499/1000000 [2:59:55<26:35:33,  9.76it/s]

{'loss': Array(0.15468173, dtype=float32), 'loss_cross_entropy': Array(0.1445797, dtype=float32)}


  7%|▋         | 65509/1000000 [3:00:03<83:32:17,  3.11it/s] 

{'loss': Array(0.16104169, dtype=float32), 'loss_cross_entropy': Array(0.15113674, dtype=float32)}


  7%|▋         | 65519/1000000 [3:00:05<39:12:20,  6.62it/s]

{'loss': Array(0.16939071, dtype=float32), 'loss_cross_entropy': Array(0.15982874, dtype=float32)}


  7%|▋         | 65529/1000000 [3:00:06<27:35:37,  9.41it/s]

{'loss': Array(0.16490726, dtype=float32), 'loss_cross_entropy': Array(0.15470348, dtype=float32)}


  7%|▋         | 65539/1000000 [3:00:08<32:00:15,  8.11it/s]

{'loss': Array(0.15217052, dtype=float32), 'loss_cross_entropy': Array(0.14366962, dtype=float32)}


  7%|▋         | 65549/1000000 [3:00:09<25:26:44, 10.20it/s]

{'loss': Array(0.14768484, dtype=float32), 'loss_cross_entropy': Array(0.13839486, dtype=float32)}


  7%|▋         | 65559/1000000 [3:00:11<38:23:16,  6.76it/s]

{'loss': Array(0.16975252, dtype=float32), 'loss_cross_entropy': Array(0.15975152, dtype=float32)}


  7%|▋         | 65569/1000000 [3:00:12<26:25:45,  9.82it/s]

{'loss': Array(0.14884453, dtype=float32), 'loss_cross_entropy': Array(0.13924566, dtype=float32)}


  7%|▋         | 65579/1000000 [3:00:13<24:28:13, 10.61it/s]

{'loss': Array(0.15207483, dtype=float32), 'loss_cross_entropy': Array(0.14273006, dtype=float32)}


  7%|▋         | 65589/1000000 [3:00:15<27:20:52,  9.49it/s]

{'loss': Array(0.15441339, dtype=float32), 'loss_cross_entropy': Array(0.1445645, dtype=float32)}


  7%|▋         | 65599/1000000 [3:00:17<24:56:02, 10.41it/s]

{'loss': Array(0.18117957, dtype=float32), 'loss_cross_entropy': Array(0.17060193, dtype=float32)}


  7%|▋         | 65609/1000000 [3:00:18<38:30:53,  6.74it/s]

{'loss': Array(0.13972317, dtype=float32), 'loss_cross_entropy': Array(0.13027655, dtype=float32)}


  7%|▋         | 65619/1000000 [3:00:20<26:48:52,  9.68it/s]

{'loss': Array(0.16141523, dtype=float32), 'loss_cross_entropy': Array(0.1508385, dtype=float32)}


  7%|▋         | 65629/1000000 [3:00:21<24:35:10, 10.56it/s]

{'loss': Array(0.15825637, dtype=float32), 'loss_cross_entropy': Array(0.1483677, dtype=float32)}


  7%|▋         | 65639/1000000 [3:00:23<29:27:09,  8.81it/s]

{'loss': Array(0.16578852, dtype=float32), 'loss_cross_entropy': Array(0.15631583, dtype=float32)}


  7%|▋         | 65649/1000000 [3:00:24<25:42:20, 10.10it/s]

{'loss': Array(0.14785767, dtype=float32), 'loss_cross_entropy': Array(0.13865173, dtype=float32)}


  7%|▋         | 65659/1000000 [3:00:26<35:24:52,  7.33it/s]

{'loss': Array(0.16142344, dtype=float32), 'loss_cross_entropy': Array(0.15176047, dtype=float32)}


  7%|▋         | 65669/1000000 [3:00:27<26:38:39,  9.74it/s]

{'loss': Array(0.151841, dtype=float32), 'loss_cross_entropy': Array(0.14248863, dtype=float32)}


  7%|▋         | 65679/1000000 [3:00:29<39:21:01,  6.60it/s]

{'loss': Array(0.14715116, dtype=float32), 'loss_cross_entropy': Array(0.1377308, dtype=float32)}


  7%|▋         | 65689/1000000 [3:00:30<27:19:33,  9.50it/s]

{'loss': Array(0.15037742, dtype=float32), 'loss_cross_entropy': Array(0.14106882, dtype=float32)}


  7%|▋         | 65699/1000000 [3:00:32<25:06:51, 10.33it/s]

{'loss': Array(0.16307887, dtype=float32), 'loss_cross_entropy': Array(0.15238215, dtype=float32)}


  7%|▋         | 65709/1000000 [3:00:34<31:29:23,  8.24it/s]

{'loss': Array(0.1407785, dtype=float32), 'loss_cross_entropy': Array(0.13149287, dtype=float32)}


  7%|▋         | 65719/1000000 [3:00:35<25:52:42, 10.03it/s]

{'loss': Array(0.13926172, dtype=float32), 'loss_cross_entropy': Array(0.13052653, dtype=float32)}


  7%|▋         | 65729/1000000 [3:00:37<33:42:55,  7.70it/s]

{'loss': Array(0.14810042, dtype=float32), 'loss_cross_entropy': Array(0.1387371, dtype=float32)}


  7%|▋         | 65739/1000000 [3:00:38<25:38:05, 10.12it/s]

{'loss': Array(0.16447358, dtype=float32), 'loss_cross_entropy': Array(0.15350953, dtype=float32)}


  7%|▋         | 65749/1000000 [3:00:39<24:34:28, 10.56it/s]

{'loss': Array(0.15434654, dtype=float32), 'loss_cross_entropy': Array(0.14478691, dtype=float32)}


  7%|▋         | 65759/1000000 [3:00:41<27:48:06,  9.33it/s]

{'loss': Array(0.13832663, dtype=float32), 'loss_cross_entropy': Array(0.12960856, dtype=float32)}


  7%|▋         | 65769/1000000 [3:00:43<25:17:23, 10.26it/s]

{'loss': Array(0.17082648, dtype=float32), 'loss_cross_entropy': Array(0.16075496, dtype=float32)}


  7%|▋         | 65779/1000000 [3:00:44<39:07:20,  6.63it/s]

{'loss': Array(0.16492139, dtype=float32), 'loss_cross_entropy': Array(0.1548215, dtype=float32)}


  7%|▋         | 65789/1000000 [3:00:46<27:50:06,  9.32it/s]

{'loss': Array(0.16659, dtype=float32), 'loss_cross_entropy': Array(0.15682907, dtype=float32)}


  7%|▋         | 65799/1000000 [3:00:47<25:02:48, 10.36it/s]

{'loss': Array(0.14276832, dtype=float32), 'loss_cross_entropy': Array(0.13394974, dtype=float32)}


  7%|▋         | 65809/1000000 [3:00:49<31:05:15,  8.35it/s]

{'loss': Array(0.15005825, dtype=float32), 'loss_cross_entropy': Array(0.14120902, dtype=float32)}


  7%|▋         | 65819/1000000 [3:00:50<25:20:52, 10.24it/s]

{'loss': Array(0.15004306, dtype=float32), 'loss_cross_entropy': Array(0.14075488, dtype=float32)}


  7%|▋         | 65829/1000000 [3:00:52<33:51:55,  7.66it/s]

{'loss': Array(0.15665679, dtype=float32), 'loss_cross_entropy': Array(0.14697449, dtype=float32)}


  7%|▋         | 65839/1000000 [3:00:53<25:44:34, 10.08it/s]

{'loss': Array(0.15687934, dtype=float32), 'loss_cross_entropy': Array(0.14756864, dtype=float32)}


  7%|▋         | 65849/1000000 [3:00:55<34:02:30,  7.62it/s]

{'loss': Array(0.16075133, dtype=float32), 'loss_cross_entropy': Array(0.15121213, dtype=float32)}


  7%|▋         | 65859/1000000 [3:00:56<26:18:28,  9.86it/s]

{'loss': Array(0.15484376, dtype=float32), 'loss_cross_entropy': Array(0.14554971, dtype=float32)}


  7%|▋         | 65869/1000000 [3:00:58<24:55:57, 10.41it/s]

{'loss': Array(0.16155046, dtype=float32), 'loss_cross_entropy': Array(0.15157263, dtype=float32)}


  7%|▋         | 65879/1000000 [3:00:59<28:31:41,  9.10it/s]

{'loss': Array(0.1753424, dtype=float32), 'loss_cross_entropy': Array(0.16425756, dtype=float32)}


  7%|▋         | 65889/1000000 [3:01:01<25:06:34, 10.33it/s]

{'loss': Array(0.13749859, dtype=float32), 'loss_cross_entropy': Array(0.12837102, dtype=float32)}


  7%|▋         | 65899/1000000 [3:01:02<30:51:02,  8.41it/s]

{'loss': Array(0.15538022, dtype=float32), 'loss_cross_entropy': Array(0.14658956, dtype=float32)}


  7%|▋         | 65909/1000000 [3:01:04<25:49:22, 10.05it/s]

{'loss': Array(0.17192675, dtype=float32), 'loss_cross_entropy': Array(0.1614406, dtype=float32)}


  7%|▋         | 65919/1000000 [3:01:06<38:08:18,  6.80it/s]

{'loss': Array(0.15073682, dtype=float32), 'loss_cross_entropy': Array(0.14107321, dtype=float32)}


  7%|▋         | 65929/1000000 [3:01:07<26:26:55,  9.81it/s]

{'loss': Array(0.1640253, dtype=float32), 'loss_cross_entropy': Array(0.15345271, dtype=float32)}


  7%|▋         | 65939/1000000 [3:01:09<37:42:40,  6.88it/s]

{'loss': Array(0.16720025, dtype=float32), 'loss_cross_entropy': Array(0.15777676, dtype=float32)}


  7%|▋         | 65949/1000000 [3:01:10<26:08:43,  9.92it/s]

{'loss': Array(0.15905337, dtype=float32), 'loss_cross_entropy': Array(0.14979766, dtype=float32)}


  7%|▋         | 65959/1000000 [3:01:11<24:19:27, 10.67it/s]

{'loss': Array(0.15199697, dtype=float32), 'loss_cross_entropy': Array(0.14253056, dtype=float32)}


  7%|▋         | 65969/1000000 [3:01:13<33:58:09,  7.64it/s]

{'loss': Array(0.16298293, dtype=float32), 'loss_cross_entropy': Array(0.15334761, dtype=float32)}


  7%|▋         | 65979/1000000 [3:01:14<26:04:38,  9.95it/s]

{'loss': Array(0.16972314, dtype=float32), 'loss_cross_entropy': Array(0.15988708, dtype=float32)}


  7%|▋         | 65989/1000000 [3:01:16<37:59:42,  6.83it/s]

{'loss': Array(0.14255667, dtype=float32), 'loss_cross_entropy': Array(0.13382037, dtype=float32)}


  7%|▋         | 65999/1000000 [3:01:17<26:53:48,  9.65it/s]

{'loss': Array(0.15364115, dtype=float32), 'loss_cross_entropy': Array(0.14399512, dtype=float32)}


  7%|▋         | 66009/1000000 [3:01:25<85:04:08,  3.05it/s] 

{'loss': Array(0.155896, dtype=float32), 'loss_cross_entropy': Array(0.14587545, dtype=float32)}


  7%|▋         | 66019/1000000 [3:01:27<37:56:05,  6.84it/s] 

{'loss': Array(0.15991513, dtype=float32), 'loss_cross_entropy': Array(0.14989577, dtype=float32)}


  7%|▋         | 66029/1000000 [3:01:28<26:43:20,  9.71it/s]

{'loss': Array(0.16555469, dtype=float32), 'loss_cross_entropy': Array(0.15572529, dtype=float32)}


  7%|▋         | 66039/1000000 [3:01:30<27:55:55,  9.29it/s]

{'loss': Array(0.16822883, dtype=float32), 'loss_cross_entropy': Array(0.15757228, dtype=float32)}


  7%|▋         | 66049/1000000 [3:01:32<25:11:25, 10.30it/s]

{'loss': Array(0.14128248, dtype=float32), 'loss_cross_entropy': Array(0.13226898, dtype=float32)}


  7%|▋         | 66059/1000000 [3:01:33<38:06:52,  6.81it/s]

{'loss': Array(0.1515852, dtype=float32), 'loss_cross_entropy': Array(0.14211647, dtype=float32)}


  7%|▋         | 66069/1000000 [3:01:35<26:54:50,  9.64it/s]

{'loss': Array(0.14978813, dtype=float32), 'loss_cross_entropy': Array(0.1402372, dtype=float32)}


  7%|▋         | 66079/1000000 [3:01:36<26:13:40,  9.89it/s]

{'loss': Array(0.16777597, dtype=float32), 'loss_cross_entropy': Array(0.1573572, dtype=float32)}


  7%|▋         | 66089/1000000 [3:01:38<31:03:17,  8.35it/s]

{'loss': Array(0.15768741, dtype=float32), 'loss_cross_entropy': Array(0.1481937, dtype=float32)}


  7%|▋         | 66099/1000000 [3:01:39<24:55:04, 10.41it/s]

{'loss': Array(0.16780566, dtype=float32), 'loss_cross_entropy': Array(0.15752187, dtype=float32)}


  7%|▋         | 66109/1000000 [3:01:41<38:09:57,  6.80it/s]

{'loss': Array(0.1592725, dtype=float32), 'loss_cross_entropy': Array(0.14953938, dtype=float32)}


  7%|▋         | 66119/1000000 [3:01:42<26:51:54,  9.66it/s]

{'loss': Array(0.16087027, dtype=float32), 'loss_cross_entropy': Array(0.15138929, dtype=float32)}


  7%|▋         | 66129/1000000 [3:01:44<24:16:13, 10.69it/s]

{'loss': Array(0.15393178, dtype=float32), 'loss_cross_entropy': Array(0.14469706, dtype=float32)}


  7%|▋         | 66139/1000000 [3:01:45<28:51:21,  8.99it/s]

{'loss': Array(0.13051458, dtype=float32), 'loss_cross_entropy': Array(0.12169706, dtype=float32)}


  7%|▋         | 66149/1000000 [3:01:47<25:16:26, 10.26it/s]

{'loss': Array(0.16884276, dtype=float32), 'loss_cross_entropy': Array(0.1593927, dtype=float32)}


  7%|▋         | 66159/1000000 [3:01:48<33:44:23,  7.69it/s]

{'loss': Array(0.15262759, dtype=float32), 'loss_cross_entropy': Array(0.14326142, dtype=float32)}


  7%|▋         | 66169/1000000 [3:01:50<25:40:48, 10.10it/s]

{'loss': Array(0.1660576, dtype=float32), 'loss_cross_entropy': Array(0.1560782, dtype=float32)}


  7%|▋         | 66179/1000000 [3:01:51<24:27:27, 10.61it/s]

{'loss': Array(0.13891235, dtype=float32), 'loss_cross_entropy': Array(0.13010637, dtype=float32)}


  7%|▋         | 66189/1000000 [3:01:53<30:45:33,  8.43it/s]

{'loss': Array(0.15832005, dtype=float32), 'loss_cross_entropy': Array(0.14808927, dtype=float32)}


  7%|▋         | 66199/1000000 [3:01:54<25:10:34, 10.30it/s]

{'loss': Array(0.16375338, dtype=float32), 'loss_cross_entropy': Array(0.15407866, dtype=float32)}


  7%|▋         | 66209/1000000 [3:01:55<24:37:07, 10.54it/s]

{'loss': Array(0.16473487, dtype=float32), 'loss_cross_entropy': Array(0.15512945, dtype=float32)}


  7%|▋         | 66219/1000000 [3:01:57<27:46:16,  9.34it/s]

{'loss': Array(0.15664338, dtype=float32), 'loss_cross_entropy': Array(0.14627972, dtype=float32)}


  7%|▋         | 66229/1000000 [3:01:59<25:05:13, 10.34it/s]

{'loss': Array(0.15559201, dtype=float32), 'loss_cross_entropy': Array(0.14572556, dtype=float32)}


  7%|▋         | 66239/1000000 [3:02:00<31:20:36,  8.28it/s]

{'loss': Array(0.15645336, dtype=float32), 'loss_cross_entropy': Array(0.14708702, dtype=float32)}


  7%|▋         | 66249/1000000 [3:02:02<26:08:40,  9.92it/s]

{'loss': Array(0.15535383, dtype=float32), 'loss_cross_entropy': Array(0.14603493, dtype=float32)}


  7%|▋         | 66259/1000000 [3:02:03<39:44:21,  6.53it/s]

{'loss': Array(0.1558082, dtype=float32), 'loss_cross_entropy': Array(0.14591703, dtype=float32)}


  7%|▋         | 66269/1000000 [3:02:05<27:20:21,  9.49it/s]

{'loss': Array(0.16519813, dtype=float32), 'loss_cross_entropy': Array(0.15505324, dtype=float32)}


  7%|▋         | 66279/1000000 [3:02:07<40:19:36,  6.43it/s]

{'loss': Array(0.14853536, dtype=float32), 'loss_cross_entropy': Array(0.1395878, dtype=float32)}


  7%|▋         | 66289/1000000 [3:02:08<27:54:21,  9.29it/s]

{'loss': Array(0.15800323, dtype=float32), 'loss_cross_entropy': Array(0.14797026, dtype=float32)}


  7%|▋         | 66299/1000000 [3:02:10<25:11:20, 10.30it/s]

{'loss': Array(0.14128523, dtype=float32), 'loss_cross_entropy': Array(0.13220064, dtype=float32)}


  7%|▋         | 66309/1000000 [3:02:11<34:21:13,  7.55it/s]

{'loss': Array(0.14363019, dtype=float32), 'loss_cross_entropy': Array(0.13398322, dtype=float32)}


  7%|▋         | 66319/1000000 [3:02:13<26:16:47,  9.87it/s]

{'loss': Array(0.14586966, dtype=float32), 'loss_cross_entropy': Array(0.13715906, dtype=float32)}


  7%|▋         | 66329/1000000 [3:02:14<38:23:01,  6.76it/s]

{'loss': Array(0.14536887, dtype=float32), 'loss_cross_entropy': Array(0.13634764, dtype=float32)}


  7%|▋         | 66339/1000000 [3:02:16<26:11:27,  9.90it/s]

{'loss': Array(0.15160637, dtype=float32), 'loss_cross_entropy': Array(0.14158314, dtype=float32)}


  7%|▋         | 66349/1000000 [3:02:17<24:12:43, 10.71it/s]

{'loss': Array(0.1486521, dtype=float32), 'loss_cross_entropy': Array(0.13945809, dtype=float32)}


  7%|▋         | 66359/1000000 [3:02:19<27:09:28,  9.55it/s]

{'loss': Array(0.15895577, dtype=float32), 'loss_cross_entropy': Array(0.14996414, dtype=float32)}


  7%|▋         | 66369/1000000 [3:02:20<24:24:54, 10.62it/s]

{'loss': Array(0.14890409, dtype=float32), 'loss_cross_entropy': Array(0.13920596, dtype=float32)}


  7%|▋         | 66379/1000000 [3:02:22<27:38:12,  9.38it/s]

{'loss': Array(0.16019467, dtype=float32), 'loss_cross_entropy': Array(0.15026887, dtype=float32)}


  7%|▋         | 66389/1000000 [3:02:23<24:39:04, 10.52it/s]

{'loss': Array(0.15094154, dtype=float32), 'loss_cross_entropy': Array(0.14164104, dtype=float32)}


  7%|▋         | 66399/1000000 [3:02:25<37:58:01,  6.83it/s]

{'loss': Array(0.15063532, dtype=float32), 'loss_cross_entropy': Array(0.14198887, dtype=float32)}


  7%|▋         | 66409/1000000 [3:02:26<26:03:11,  9.95it/s]

{'loss': Array(0.1643713, dtype=float32), 'loss_cross_entropy': Array(0.15453921, dtype=float32)}


  7%|▋         | 66419/1000000 [3:02:28<23:55:03, 10.84it/s]

{'loss': Array(0.16917834, dtype=float32), 'loss_cross_entropy': Array(0.15977821, dtype=float32)}


  7%|▋         | 66429/1000000 [3:02:29<30:37:17,  8.47it/s]

{'loss': Array(0.17132601, dtype=float32), 'loss_cross_entropy': Array(0.16116682, dtype=float32)}


  7%|▋         | 66439/1000000 [3:02:31<25:14:10, 10.28it/s]

{'loss': Array(0.14450264, dtype=float32), 'loss_cross_entropy': Array(0.13564758, dtype=float32)}


  7%|▋         | 66449/1000000 [3:02:32<38:20:22,  6.76it/s]

{'loss': Array(0.15338157, dtype=float32), 'loss_cross_entropy': Array(0.14312144, dtype=float32)}


  7%|▋         | 66459/1000000 [3:02:34<27:00:04,  9.60it/s]

{'loss': Array(0.13780968, dtype=float32), 'loss_cross_entropy': Array(0.12844525, dtype=float32)}


  7%|▋         | 66469/1000000 [3:02:35<25:02:22, 10.36it/s]

{'loss': Array(0.15300302, dtype=float32), 'loss_cross_entropy': Array(0.14435503, dtype=float32)}


  7%|▋         | 66479/1000000 [3:02:37<28:45:20,  9.02it/s]

{'loss': Array(0.15656357, dtype=float32), 'loss_cross_entropy': Array(0.14692082, dtype=float32)}


  7%|▋         | 66489/1000000 [3:02:38<24:41:56, 10.50it/s]

{'loss': Array(0.15988745, dtype=float32), 'loss_cross_entropy': Array(0.15045463, dtype=float32)}


  7%|▋         | 66499/1000000 [3:02:40<34:43:23,  7.47it/s]

{'loss': Array(0.15946566, dtype=float32), 'loss_cross_entropy': Array(0.14994608, dtype=float32)}


  7%|▋         | 66509/1000000 [3:02:48<86:21:48,  3.00it/s] 

{'loss': Array(0.15872176, dtype=float32), 'loss_cross_entropy': Array(0.14937429, dtype=float32)}


  7%|▋         | 66519/1000000 [3:02:49<44:10:05,  5.87it/s]

{'loss': Array(0.1622568, dtype=float32), 'loss_cross_entropy': Array(0.15293495, dtype=float32)}


  7%|▋         | 66529/1000000 [3:02:51<27:40:54,  9.37it/s]

{'loss': Array(0.15213537, dtype=float32), 'loss_cross_entropy': Array(0.14319837, dtype=float32)}


  7%|▋         | 66539/1000000 [3:02:52<24:35:33, 10.54it/s]

{'loss': Array(0.15882152, dtype=float32), 'loss_cross_entropy': Array(0.14919423, dtype=float32)}


  7%|▋         | 66549/1000000 [3:02:54<31:32:11,  8.22it/s]

{'loss': Array(0.16384016, dtype=float32), 'loss_cross_entropy': Array(0.15475416, dtype=float32)}


  7%|▋         | 66559/1000000 [3:02:55<25:14:21, 10.27it/s]

{'loss': Array(0.15142034, dtype=float32), 'loss_cross_entropy': Array(0.1423044, dtype=float32)}


  7%|▋         | 66569/1000000 [3:02:57<23:49:49, 10.88it/s]

{'loss': Array(0.14049043, dtype=float32), 'loss_cross_entropy': Array(0.13160779, dtype=float32)}


  7%|▋         | 66579/1000000 [3:02:58<27:41:40,  9.36it/s]

{'loss': Array(0.17255019, dtype=float32), 'loss_cross_entropy': Array(0.16173972, dtype=float32)}


  7%|▋         | 66589/1000000 [3:03:00<25:41:28, 10.09it/s]

{'loss': Array(0.17357092, dtype=float32), 'loss_cross_entropy': Array(0.16304015, dtype=float32)}


  7%|▋         | 66599/1000000 [3:03:02<31:48:51,  8.15it/s]

{'loss': Array(0.14644448, dtype=float32), 'loss_cross_entropy': Array(0.13688578, dtype=float32)}


  7%|▋         | 66609/1000000 [3:03:03<25:38:47, 10.11it/s]

{'loss': Array(0.14169954, dtype=float32), 'loss_cross_entropy': Array(0.13296725, dtype=float32)}


  7%|▋         | 66619/1000000 [3:03:05<35:04:32,  7.39it/s]

{'loss': Array(0.14670594, dtype=float32), 'loss_cross_entropy': Array(0.13797581, dtype=float32)}


  7%|▋         | 66629/1000000 [3:03:06<26:04:44,  9.94it/s]

{'loss': Array(0.1572494, dtype=float32), 'loss_cross_entropy': Array(0.1469084, dtype=float32)}


  7%|▋         | 66639/1000000 [3:03:07<25:40:55, 10.10it/s]

{'loss': Array(0.14203061, dtype=float32), 'loss_cross_entropy': Array(0.13290107, dtype=float32)}


  7%|▋         | 66649/1000000 [3:03:09<32:16:00,  8.04it/s]

{'loss': Array(0.14869474, dtype=float32), 'loss_cross_entropy': Array(0.13910054, dtype=float32)}


  7%|▋         | 66659/1000000 [3:03:11<26:07:54,  9.92it/s]

{'loss': Array(0.14137004, dtype=float32), 'loss_cross_entropy': Array(0.1324553, dtype=float32)}


  7%|▋         | 66669/1000000 [3:03:12<24:43:05, 10.49it/s]

{'loss': Array(0.15767448, dtype=float32), 'loss_cross_entropy': Array(0.14821576, dtype=float32)}


  7%|▋         | 66679/1000000 [3:03:14<27:30:21,  9.43it/s]

{'loss': Array(0.15793696, dtype=float32), 'loss_cross_entropy': Array(0.1477326, dtype=float32)}


  7%|▋         | 66689/1000000 [3:03:15<24:19:12, 10.66it/s]

{'loss': Array(0.15947853, dtype=float32), 'loss_cross_entropy': Array(0.14975083, dtype=float32)}


  7%|▋         | 66699/1000000 [3:03:17<30:41:30,  8.45it/s]

{'loss': Array(0.15431671, dtype=float32), 'loss_cross_entropy': Array(0.14461763, dtype=float32)}


  7%|▋         | 66709/1000000 [3:03:18<25:16:20, 10.26it/s]

{'loss': Array(0.17271255, dtype=float32), 'loss_cross_entropy': Array(0.16235588, dtype=float32)}


  7%|▋         | 66719/1000000 [3:03:20<39:19:11,  6.59it/s]

{'loss': Array(0.17723985, dtype=float32), 'loss_cross_entropy': Array(0.16621678, dtype=float32)}


  7%|▋         | 66729/1000000 [3:03:21<26:32:29,  9.77it/s]

{'loss': Array(0.14286327, dtype=float32), 'loss_cross_entropy': Array(0.1337943, dtype=float32)}


  7%|▋         | 66739/1000000 [3:03:23<24:19:40, 10.66it/s]

{'loss': Array(0.15411478, dtype=float32), 'loss_cross_entropy': Array(0.14461274, dtype=float32)}


  7%|▋         | 66749/1000000 [3:03:24<34:21:27,  7.55it/s]

{'loss': Array(0.14966013, dtype=float32), 'loss_cross_entropy': Array(0.14068852, dtype=float32)}


  7%|▋         | 66759/1000000 [3:03:26<25:35:50, 10.13it/s]

{'loss': Array(0.13777255, dtype=float32), 'loss_cross_entropy': Array(0.12872495, dtype=float32)}


  7%|▋         | 66769/1000000 [3:03:27<24:43:14, 10.49it/s]

{'loss': Array(0.17298993, dtype=float32), 'loss_cross_entropy': Array(0.16269796, dtype=float32)}


  7%|▋         | 66779/1000000 [3:03:29<29:17:20,  8.85it/s]

{'loss': Array(0.18000455, dtype=float32), 'loss_cross_entropy': Array(0.16945639, dtype=float32)}


  7%|▋         | 66789/1000000 [3:03:30<24:34:37, 10.55it/s]

{'loss': Array(0.15873681, dtype=float32), 'loss_cross_entropy': Array(0.14872088, dtype=float32)}


  7%|▋         | 66799/1000000 [3:03:32<30:56:25,  8.38it/s]

{'loss': Array(0.16843624, dtype=float32), 'loss_cross_entropy': Array(0.15809143, dtype=float32)}


  7%|▋         | 66809/1000000 [3:03:33<25:11:19, 10.29it/s]

{'loss': Array(0.17199396, dtype=float32), 'loss_cross_entropy': Array(0.16158275, dtype=float32)}


  7%|▋         | 66819/1000000 [3:03:35<24:11:31, 10.71it/s]

{'loss': Array(0.15871589, dtype=float32), 'loss_cross_entropy': Array(0.1492898, dtype=float32)}


  7%|▋         | 66829/1000000 [3:03:36<27:23:01,  9.47it/s]

{'loss': Array(0.16723506, dtype=float32), 'loss_cross_entropy': Array(0.15730439, dtype=float32)}


  7%|▋         | 66839/1000000 [3:03:38<24:16:54, 10.68it/s]

{'loss': Array(0.1739218, dtype=float32), 'loss_cross_entropy': Array(0.16320238, dtype=float32)}


  7%|▋         | 66849/1000000 [3:03:39<34:14:22,  7.57it/s]

{'loss': Array(0.13769788, dtype=float32), 'loss_cross_entropy': Array(0.12903307, dtype=float32)}


  7%|▋         | 66859/1000000 [3:03:41<26:22:26,  9.83it/s]

{'loss': Array(0.16559276, dtype=float32), 'loss_cross_entropy': Array(0.15607508, dtype=float32)}


  7%|▋         | 66869/1000000 [3:03:42<24:25:43, 10.61it/s]

{'loss': Array(0.16407578, dtype=float32), 'loss_cross_entropy': Array(0.15418105, dtype=float32)}


  7%|▋         | 66879/1000000 [3:03:44<26:59:44,  9.60it/s]

{'loss': Array(0.16250752, dtype=float32), 'loss_cross_entropy': Array(0.15278399, dtype=float32)}


  7%|▋         | 66889/1000000 [3:03:45<24:46:49, 10.46it/s]

{'loss': Array(0.15750669, dtype=float32), 'loss_cross_entropy': Array(0.14772628, dtype=float32)}


  7%|▋         | 66899/1000000 [3:03:47<29:06:58,  8.90it/s]

{'loss': Array(0.139967, dtype=float32), 'loss_cross_entropy': Array(0.13130172, dtype=float32)}


  7%|▋         | 66909/1000000 [3:03:48<25:05:49, 10.33it/s]

{'loss': Array(0.159411, dtype=float32), 'loss_cross_entropy': Array(0.14966819, dtype=float32)}


  7%|▋         | 66919/1000000 [3:03:50<38:28:02,  6.74it/s]

{'loss': Array(0.16424277, dtype=float32), 'loss_cross_entropy': Array(0.15444215, dtype=float32)}


  7%|▋         | 66929/1000000 [3:03:51<26:28:47,  9.79it/s]

{'loss': Array(0.15486729, dtype=float32), 'loss_cross_entropy': Array(0.14534354, dtype=float32)}


  7%|▋         | 66939/1000000 [3:03:53<24:21:30, 10.64it/s]

{'loss': Array(0.17101435, dtype=float32), 'loss_cross_entropy': Array(0.16050997, dtype=float32)}


  7%|▋         | 66949/1000000 [3:03:55<30:51:33,  8.40it/s]

{'loss': Array(0.16307521, dtype=float32), 'loss_cross_entropy': Array(0.15346482, dtype=float32)}


  7%|▋         | 66959/1000000 [3:03:56<25:10:30, 10.30it/s]

{'loss': Array(0.17701764, dtype=float32), 'loss_cross_entropy': Array(0.1666798, dtype=float32)}


  7%|▋         | 66969/1000000 [3:03:58<34:02:36,  7.61it/s]

{'loss': Array(0.1546488, dtype=float32), 'loss_cross_entropy': Array(0.14518003, dtype=float32)}


  7%|▋         | 66979/1000000 [3:03:59<26:35:43,  9.74it/s]

{'loss': Array(0.17603324, dtype=float32), 'loss_cross_entropy': Array(0.16527095, dtype=float32)}


  7%|▋         | 66989/1000000 [3:04:01<35:02:29,  7.40it/s]

{'loss': Array(0.14643276, dtype=float32), 'loss_cross_entropy': Array(0.13722461, dtype=float32)}


  7%|▋         | 66999/1000000 [3:04:02<26:39:55,  9.72it/s]

{'loss': Array(0.15549366, dtype=float32), 'loss_cross_entropy': Array(0.14632653, dtype=float32)}


  7%|▋         | 67009/1000000 [3:04:10<85:18:46,  3.04it/s] 

{'loss': Array(0.15804978, dtype=float32), 'loss_cross_entropy': Array(0.1487213, dtype=float32)}


  7%|▋         | 67019/1000000 [3:04:12<39:13:05,  6.61it/s]

{'loss': Array(0.1574429, dtype=float32), 'loss_cross_entropy': Array(0.14707498, dtype=float32)}


  7%|▋         | 67029/1000000 [3:04:13<26:31:26,  9.77it/s]

{'loss': Array(0.16559891, dtype=float32), 'loss_cross_entropy': Array(0.15581961, dtype=float32)}


  7%|▋         | 67039/1000000 [3:04:15<34:42:29,  7.47it/s]

{'loss': Array(0.16314846, dtype=float32), 'loss_cross_entropy': Array(0.15259111, dtype=float32)}


  7%|▋         | 67049/1000000 [3:04:16<26:04:25,  9.94it/s]

{'loss': Array(0.16057743, dtype=float32), 'loss_cross_entropy': Array(0.15122484, dtype=float32)}


  7%|▋         | 67059/1000000 [3:04:18<38:39:41,  6.70it/s]

{'loss': Array(0.15002637, dtype=float32), 'loss_cross_entropy': Array(0.14077495, dtype=float32)}


  7%|▋         | 67069/1000000 [3:04:19<26:21:53,  9.83it/s]

{'loss': Array(0.14110115, dtype=float32), 'loss_cross_entropy': Array(0.13280207, dtype=float32)}


  7%|▋         | 67079/1000000 [3:04:21<25:25:05, 10.20it/s]

{'loss': Array(0.14044543, dtype=float32), 'loss_cross_entropy': Array(0.13140516, dtype=float32)}


  7%|▋         | 67089/1000000 [3:04:22<31:04:34,  8.34it/s]

{'loss': Array(0.1422367, dtype=float32), 'loss_cross_entropy': Array(0.13276124, dtype=float32)}


  7%|▋         | 67099/1000000 [3:04:24<25:03:32, 10.34it/s]

{'loss': Array(0.13991569, dtype=float32), 'loss_cross_entropy': Array(0.13125552, dtype=float32)}


  7%|▋         | 67109/1000000 [3:04:26<34:39:27,  7.48it/s]

{'loss': Array(0.1505057, dtype=float32), 'loss_cross_entropy': Array(0.14139836, dtype=float32)}


  7%|▋         | 67119/1000000 [3:04:27<26:23:06,  9.82it/s]

{'loss': Array(0.15651555, dtype=float32), 'loss_cross_entropy': Array(0.1472023, dtype=float32)}


  7%|▋         | 67129/1000000 [3:04:28<24:10:21, 10.72it/s]

{'loss': Array(0.16414945, dtype=float32), 'loss_cross_entropy': Array(0.15452829, dtype=float32)}


  7%|▋         | 67139/1000000 [3:04:30<27:16:28,  9.50it/s]

{'loss': Array(0.17027853, dtype=float32), 'loss_cross_entropy': Array(0.16020544, dtype=float32)}


  7%|▋         | 67149/1000000 [3:04:31<24:56:45, 10.39it/s]

{'loss': Array(0.1512516, dtype=float32), 'loss_cross_entropy': Array(0.14222772, dtype=float32)}


  7%|▋         | 67159/1000000 [3:04:33<39:41:56,  6.53it/s]

{'loss': Array(0.14326583, dtype=float32), 'loss_cross_entropy': Array(0.13482046, dtype=float32)}


  7%|▋         | 67169/1000000 [3:04:34<26:51:20,  9.65it/s]

{'loss': Array(0.13397878, dtype=float32), 'loss_cross_entropy': Array(0.12514327, dtype=float32)}


  7%|▋         | 67179/1000000 [3:04:36<24:35:22, 10.54it/s]

{'loss': Array(0.15016674, dtype=float32), 'loss_cross_entropy': Array(0.1403651, dtype=float32)}


  7%|▋         | 67189/1000000 [3:04:38<31:30:02,  8.23it/s]

{'loss': Array(0.15034573, dtype=float32), 'loss_cross_entropy': Array(0.14083019, dtype=float32)}


  7%|▋         | 67199/1000000 [3:04:39<25:25:47, 10.19it/s]

{'loss': Array(0.15477864, dtype=float32), 'loss_cross_entropy': Array(0.14523478, dtype=float32)}


  7%|▋         | 67209/1000000 [3:04:41<34:10:32,  7.58it/s]

{'loss': Array(0.17343192, dtype=float32), 'loss_cross_entropy': Array(0.16303293, dtype=float32)}


  7%|▋         | 67219/1000000 [3:04:42<25:36:54, 10.12it/s]

{'loss': Array(0.1535079, dtype=float32), 'loss_cross_entropy': Array(0.143958, dtype=float32)}


  7%|▋         | 67229/1000000 [3:04:44<34:05:22,  7.60it/s]

{'loss': Array(0.15240632, dtype=float32), 'loss_cross_entropy': Array(0.14283572, dtype=float32)}


  7%|▋         | 67239/1000000 [3:04:45<25:13:58, 10.27it/s]

{'loss': Array(0.17621882, dtype=float32), 'loss_cross_entropy': Array(0.16628893, dtype=float32)}


  7%|▋         | 67249/1000000 [3:04:46<24:01:03, 10.79it/s]

{'loss': Array(0.16669028, dtype=float32), 'loss_cross_entropy': Array(0.1571335, dtype=float32)}


  7%|▋         | 67259/1000000 [3:04:48<28:12:25,  9.19it/s]

{'loss': Array(0.15448426, dtype=float32), 'loss_cross_entropy': Array(0.14457557, dtype=float32)}


  7%|▋         | 67269/1000000 [3:04:49<25:07:52, 10.31it/s]

{'loss': Array(0.14416113, dtype=float32), 'loss_cross_entropy': Array(0.1348276, dtype=float32)}


  7%|▋         | 67279/1000000 [3:04:51<30:28:05,  8.50it/s]

{'loss': Array(0.16728882, dtype=float32), 'loss_cross_entropy': Array(0.15810136, dtype=float32)}


  7%|▋         | 67289/1000000 [3:04:52<24:47:13, 10.45it/s]

{'loss': Array(0.15680562, dtype=float32), 'loss_cross_entropy': Array(0.14747831, dtype=float32)}


  7%|▋         | 67299/1000000 [3:04:54<37:49:17,  6.85it/s]

{'loss': Array(0.14133875, dtype=float32), 'loss_cross_entropy': Array(0.13229091, dtype=float32)}


  7%|▋         | 67309/1000000 [3:04:55<25:56:56,  9.98it/s]

{'loss': Array(0.16463952, dtype=float32), 'loss_cross_entropy': Array(0.15482698, dtype=float32)}


  7%|▋         | 67319/1000000 [3:04:57<38:03:14,  6.81it/s]

{'loss': Array(0.1363183, dtype=float32), 'loss_cross_entropy': Array(0.12839897, dtype=float32)}


  7%|▋         | 67329/1000000 [3:04:59<26:57:47,  9.61it/s]

{'loss': Array(0.16217636, dtype=float32), 'loss_cross_entropy': Array(0.15330203, dtype=float32)}


  7%|▋         | 67339/1000000 [3:05:00<24:32:10, 10.56it/s]

{'loss': Array(0.1725844, dtype=float32), 'loss_cross_entropy': Array(0.16199598, dtype=float32)}


  7%|▋         | 67349/1000000 [3:05:02<34:45:25,  7.45it/s]

{'loss': Array(0.15707046, dtype=float32), 'loss_cross_entropy': Array(0.14755665, dtype=float32)}


  7%|▋         | 67359/1000000 [3:05:03<26:50:33,  9.65it/s]

{'loss': Array(0.17168598, dtype=float32), 'loss_cross_entropy': Array(0.1614096, dtype=float32)}


  7%|▋         | 67369/1000000 [3:05:05<40:53:33,  6.34it/s]

{'loss': Array(0.15960567, dtype=float32), 'loss_cross_entropy': Array(0.1500138, dtype=float32)}


  7%|▋         | 67379/1000000 [3:05:06<28:37:21,  9.05it/s]

{'loss': Array(0.18363944, dtype=float32), 'loss_cross_entropy': Array(0.17220688, dtype=float32)}


  7%|▋         | 67389/1000000 [3:05:08<25:28:35, 10.17it/s]

{'loss': Array(0.15178381, dtype=float32), 'loss_cross_entropy': Array(0.14311552, dtype=float32)}


  7%|▋         | 67399/1000000 [3:05:10<27:35:05,  9.39it/s]

{'loss': Array(0.1589159, dtype=float32), 'loss_cross_entropy': Array(0.15014333, dtype=float32)}


  7%|▋         | 67409/1000000 [3:05:11<24:09:32, 10.72it/s]

{'loss': Array(0.13579072, dtype=float32), 'loss_cross_entropy': Array(0.12693769, dtype=float32)}


  7%|▋         | 67419/1000000 [3:05:13<27:11:09,  9.53it/s]

{'loss': Array(0.15736637, dtype=float32), 'loss_cross_entropy': Array(0.14753218, dtype=float32)}


  7%|▋         | 67429/1000000 [3:05:14<24:08:50, 10.73it/s]

{'loss': Array(0.1614605, dtype=float32), 'loss_cross_entropy': Array(0.15168248, dtype=float32)}


  7%|▋         | 67439/1000000 [3:05:16<37:37:20,  6.89it/s]

{'loss': Array(0.1288857, dtype=float32), 'loss_cross_entropy': Array(0.12068551, dtype=float32)}


  7%|▋         | 67449/1000000 [3:05:17<25:51:06, 10.02it/s]

{'loss': Array(0.13514529, dtype=float32), 'loss_cross_entropy': Array(0.12675123, dtype=float32)}


  7%|▋         | 67459/1000000 [3:05:18<24:15:47, 10.68it/s]

{'loss': Array(0.15393151, dtype=float32), 'loss_cross_entropy': Array(0.14421616, dtype=float32)}


  7%|▋         | 67469/1000000 [3:05:20<31:13:39,  8.30it/s]

{'loss': Array(0.13726795, dtype=float32), 'loss_cross_entropy': Array(0.12837721, dtype=float32)}


  7%|▋         | 67479/1000000 [3:05:21<25:04:32, 10.33it/s]

{'loss': Array(0.16345842, dtype=float32), 'loss_cross_entropy': Array(0.1535617, dtype=float32)}


  7%|▋         | 67489/1000000 [3:05:23<37:52:16,  6.84it/s]

{'loss': Array(0.15942769, dtype=float32), 'loss_cross_entropy': Array(0.149641, dtype=float32)}


  7%|▋         | 67499/1000000 [3:05:24<26:45:34,  9.68it/s]

{'loss': Array(0.14066236, dtype=float32), 'loss_cross_entropy': Array(0.13201372, dtype=float32)}


  7%|▋         | 67509/1000000 [3:05:32<85:00:41,  3.05it/s] 

{'loss': Array(0.13123773, dtype=float32), 'loss_cross_entropy': Array(0.12320188, dtype=float32)}


  7%|▋         | 67519/1000000 [3:05:34<38:32:11,  6.72it/s]

{'loss': Array(0.14929563, dtype=float32), 'loss_cross_entropy': Array(0.13997363, dtype=float32)}


  7%|▋         | 67529/1000000 [3:05:35<26:27:24,  9.79it/s]

{'loss': Array(0.16148967, dtype=float32), 'loss_cross_entropy': Array(0.15152436, dtype=float32)}


  7%|▋         | 67539/1000000 [3:05:37<31:03:35,  8.34it/s]

{'loss': Array(0.15348458, dtype=float32), 'loss_cross_entropy': Array(0.14402327, dtype=float32)}


  7%|▋         | 67549/1000000 [3:05:38<27:05:47,  9.56it/s]

{'loss': Array(0.14635585, dtype=float32), 'loss_cross_entropy': Array(0.1375418, dtype=float32)}


  7%|▋         | 67559/1000000 [3:05:40<38:38:30,  6.70it/s]

{'loss': Array(0.16864808, dtype=float32), 'loss_cross_entropy': Array(0.15882969, dtype=float32)}


  7%|▋         | 67569/1000000 [3:05:41<26:31:50,  9.76it/s]

{'loss': Array(0.14880833, dtype=float32), 'loss_cross_entropy': Array(0.13943425, dtype=float32)}


  7%|▋         | 67579/1000000 [3:05:43<25:03:35, 10.34it/s]

{'loss': Array(0.14426814, dtype=float32), 'loss_cross_entropy': Array(0.13478674, dtype=float32)}


  7%|▋         | 67589/1000000 [3:05:45<27:22:48,  9.46it/s]

{'loss': Array(0.16749609, dtype=float32), 'loss_cross_entropy': Array(0.15758996, dtype=float32)}


  7%|▋         | 67599/1000000 [3:05:46<24:18:59, 10.65it/s]

{'loss': Array(0.1554381, dtype=float32), 'loss_cross_entropy': Array(0.14547883, dtype=float32)}


  7%|▋         | 67609/1000000 [3:05:48<38:13:09,  6.78it/s]

{'loss': Array(0.16619143, dtype=float32), 'loss_cross_entropy': Array(0.15691808, dtype=float32)}


  7%|▋         | 67619/1000000 [3:05:49<27:02:17,  9.58it/s]

{'loss': Array(0.14514439, dtype=float32), 'loss_cross_entropy': Array(0.13643396, dtype=float32)}


  7%|▋         | 67629/1000000 [3:05:50<24:23:34, 10.62it/s]

{'loss': Array(0.1602458, dtype=float32), 'loss_cross_entropy': Array(0.15038751, dtype=float32)}


  7%|▋         | 67639/1000000 [3:05:52<28:37:11,  9.05it/s]

{'loss': Array(0.15888214, dtype=float32), 'loss_cross_entropy': Array(0.14935534, dtype=float32)}


  7%|▋         | 67649/1000000 [3:05:53<25:02:13, 10.34it/s]

{'loss': Array(0.17764215, dtype=float32), 'loss_cross_entropy': Array(0.16687153, dtype=float32)}


  7%|▋         | 67659/1000000 [3:05:55<33:39:23,  7.69it/s]

{'loss': Array(0.14309147, dtype=float32), 'loss_cross_entropy': Array(0.13390234, dtype=float32)}


  7%|▋         | 67669/1000000 [3:05:56<26:02:14,  9.95it/s]

{'loss': Array(0.14212835, dtype=float32), 'loss_cross_entropy': Array(0.1331467, dtype=float32)}


  7%|▋         | 67679/1000000 [3:05:58<38:33:11,  6.72it/s]

{'loss': Array(0.15856922, dtype=float32), 'loss_cross_entropy': Array(0.14851253, dtype=float32)}


  7%|▋         | 67689/1000000 [3:06:00<27:14:06,  9.51it/s]

{'loss': Array(0.16151817, dtype=float32), 'loss_cross_entropy': Array(0.15205877, dtype=float32)}


  7%|▋         | 67699/1000000 [3:06:01<25:33:03, 10.14it/s]

{'loss': Array(0.13756965, dtype=float32), 'loss_cross_entropy': Array(0.12904298, dtype=float32)}


  7%|▋         | 67709/1000000 [3:06:03<32:37:42,  7.94it/s]

{'loss': Array(0.17419776, dtype=float32), 'loss_cross_entropy': Array(0.16402648, dtype=float32)}


  7%|▋         | 67719/1000000 [3:06:04<27:11:53,  9.52it/s]

{'loss': Array(0.14626116, dtype=float32), 'loss_cross_entropy': Array(0.13785456, dtype=float32)}


  7%|▋         | 67729/1000000 [3:06:06<35:44:43,  7.24it/s]

{'loss': Array(0.14120553, dtype=float32), 'loss_cross_entropy': Array(0.13265096, dtype=float32)}


  7%|▋         | 67739/1000000 [3:06:07<25:42:28, 10.07it/s]

{'loss': Array(0.17233752, dtype=float32), 'loss_cross_entropy': Array(0.16135669, dtype=float32)}


  7%|▋         | 67749/1000000 [3:06:09<24:03:56, 10.76it/s]

{'loss': Array(0.15944211, dtype=float32), 'loss_cross_entropy': Array(0.1493827, dtype=float32)}


  7%|▋         | 67759/1000000 [3:06:10<27:38:16,  9.37it/s]

{'loss': Array(0.17073219, dtype=float32), 'loss_cross_entropy': Array(0.1601799, dtype=float32)}


  7%|▋         | 67769/1000000 [3:06:12<24:27:02, 10.59it/s]

{'loss': Array(0.14700916, dtype=float32), 'loss_cross_entropy': Array(0.1373121, dtype=float32)}


  7%|▋         | 67779/1000000 [3:06:14<38:07:39,  6.79it/s]

{'loss': Array(0.14888859, dtype=float32), 'loss_cross_entropy': Array(0.14009416, dtype=float32)}


  7%|▋         | 67789/1000000 [3:06:15<26:23:25,  9.81it/s]

{'loss': Array(0.15386364, dtype=float32), 'loss_cross_entropy': Array(0.14415275, dtype=float32)}


  7%|▋         | 67799/1000000 [3:06:16<24:17:39, 10.66it/s]

{'loss': Array(0.14877044, dtype=float32), 'loss_cross_entropy': Array(0.14047259, dtype=float32)}


  7%|▋         | 67809/1000000 [3:06:18<30:52:38,  8.39it/s]

{'loss': Array(0.14523004, dtype=float32), 'loss_cross_entropy': Array(0.13666354, dtype=float32)}


  7%|▋         | 67819/1000000 [3:06:19<25:01:39, 10.35it/s]

{'loss': Array(0.17650287, dtype=float32), 'loss_cross_entropy': Array(0.16662143, dtype=float32)}


  7%|▋         | 67829/1000000 [3:06:21<34:54:40,  7.42it/s]

{'loss': Array(0.14610001, dtype=float32), 'loss_cross_entropy': Array(0.13682134, dtype=float32)}


  7%|▋         | 67839/1000000 [3:06:22<26:02:56,  9.94it/s]

{'loss': Array(0.15868825, dtype=float32), 'loss_cross_entropy': Array(0.14901091, dtype=float32)}


  7%|▋         | 67849/1000000 [3:06:24<33:36:59,  7.70it/s]

{'loss': Array(0.15302938, dtype=float32), 'loss_cross_entropy': Array(0.14395726, dtype=float32)}


  7%|▋         | 67859/1000000 [3:06:26<25:54:47,  9.99it/s]

{'loss': Array(0.15346542, dtype=float32), 'loss_cross_entropy': Array(0.14391701, dtype=float32)}


  7%|▋         | 67869/1000000 [3:06:27<24:31:20, 10.56it/s]

{'loss': Array(0.13817327, dtype=float32), 'loss_cross_entropy': Array(0.13037078, dtype=float32)}


  7%|▋         | 67879/1000000 [3:06:29<29:16:58,  8.84it/s]

{'loss': Array(0.16784273, dtype=float32), 'loss_cross_entropy': Array(0.15818556, dtype=float32)}


  7%|▋         | 67889/1000000 [3:06:30<25:13:51, 10.26it/s]

{'loss': Array(0.15580848, dtype=float32), 'loss_cross_entropy': Array(0.14585972, dtype=float32)}


  7%|▋         | 67899/1000000 [3:06:32<31:21:44,  8.26it/s]

{'loss': Array(0.1443194, dtype=float32), 'loss_cross_entropy': Array(0.13576473, dtype=float32)}


  7%|▋         | 67909/1000000 [3:06:33<25:21:38, 10.21it/s]

{'loss': Array(0.14921078, dtype=float32), 'loss_cross_entropy': Array(0.13979268, dtype=float32)}


  7%|▋         | 67919/1000000 [3:06:35<38:30:35,  6.72it/s]

{'loss': Array(0.14136542, dtype=float32), 'loss_cross_entropy': Array(0.13275293, dtype=float32)}


  7%|▋         | 67929/1000000 [3:06:36<26:13:54,  9.87it/s]

{'loss': Array(0.1399961, dtype=float32), 'loss_cross_entropy': Array(0.1312661, dtype=float32)}


  7%|▋         | 67939/1000000 [3:06:38<38:17:32,  6.76it/s]

{'loss': Array(0.17301755, dtype=float32), 'loss_cross_entropy': Array(0.16296837, dtype=float32)}


  7%|▋         | 67949/1000000 [3:06:39<25:57:07,  9.98it/s]

{'loss': Array(0.16046397, dtype=float32), 'loss_cross_entropy': Array(0.15060101, dtype=float32)}


  7%|▋         | 67959/1000000 [3:06:41<24:23:49, 10.61it/s]

{'loss': Array(0.15252803, dtype=float32), 'loss_cross_entropy': Array(0.14366293, dtype=float32)}


  7%|▋         | 67969/1000000 [3:06:42<33:49:50,  7.65it/s]

{'loss': Array(0.14672697, dtype=float32), 'loss_cross_entropy': Array(0.13740756, dtype=float32)}


  7%|▋         | 67979/1000000 [3:06:44<25:19:24, 10.22it/s]

{'loss': Array(0.14708804, dtype=float32), 'loss_cross_entropy': Array(0.137852, dtype=float32)}


  7%|▋         | 67989/1000000 [3:06:45<38:25:09,  6.74it/s]

{'loss': Array(0.1695754, dtype=float32), 'loss_cross_entropy': Array(0.15971878, dtype=float32)}


  7%|▋         | 67999/1000000 [3:06:47<26:42:56,  9.69it/s]

{'loss': Array(0.14985773, dtype=float32), 'loss_cross_entropy': Array(0.13979836, dtype=float32)}


  7%|▋         | 68009/1000000 [3:06:55<84:19:32,  3.07it/s] 

{'loss': Array(0.15513293, dtype=float32), 'loss_cross_entropy': Array(0.14572333, dtype=float32)}


  7%|▋         | 68019/1000000 [3:06:56<36:47:28,  7.04it/s] 

{'loss': Array(0.17442583, dtype=float32), 'loss_cross_entropy': Array(0.16378011, dtype=float32)}


  7%|▋         | 68029/1000000 [3:06:58<26:36:24,  9.73it/s]

{'loss': Array(0.16221572, dtype=float32), 'loss_cross_entropy': Array(0.15227753, dtype=float32)}


  7%|▋         | 68039/1000000 [3:06:59<28:44:21,  9.01it/s]

{'loss': Array(0.15086772, dtype=float32), 'loss_cross_entropy': Array(0.14130874, dtype=float32)}


  7%|▋         | 68049/1000000 [3:07:01<25:23:33, 10.19it/s]

{'loss': Array(0.16974472, dtype=float32), 'loss_cross_entropy': Array(0.15898223, dtype=float32)}


  7%|▋         | 68059/1000000 [3:07:03<38:47:38,  6.67it/s]

{'loss': Array(0.16550668, dtype=float32), 'loss_cross_entropy': Array(0.15496647, dtype=float32)}


  7%|▋         | 68069/1000000 [3:07:04<27:14:16,  9.50it/s]

{'loss': Array(0.14880155, dtype=float32), 'loss_cross_entropy': Array(0.13912033, dtype=float32)}


  7%|▋         | 68079/1000000 [3:07:05<26:18:27,  9.84it/s]

{'loss': Array(0.15278094, dtype=float32), 'loss_cross_entropy': Array(0.14290066, dtype=float32)}


  7%|▋         | 68089/1000000 [3:07:07<31:40:41,  8.17it/s]

{'loss': Array(0.14920068, dtype=float32), 'loss_cross_entropy': Array(0.13992041, dtype=float32)}


  7%|▋         | 68099/1000000 [3:07:08<25:10:10, 10.28it/s]

{'loss': Array(0.15339524, dtype=float32), 'loss_cross_entropy': Array(0.14466032, dtype=float32)}


  7%|▋         | 68109/1000000 [3:07:10<37:33:57,  6.89it/s]

{'loss': Array(0.16025922, dtype=float32), 'loss_cross_entropy': Array(0.15098514, dtype=float32)}


  7%|▋         | 68119/1000000 [3:07:12<26:10:57,  9.89it/s]

{'loss': Array(0.17289722, dtype=float32), 'loss_cross_entropy': Array(0.16257693, dtype=float32)}


  7%|▋         | 68129/1000000 [3:07:13<24:22:44, 10.62it/s]

{'loss': Array(0.1583029, dtype=float32), 'loss_cross_entropy': Array(0.14874731, dtype=float32)}


  7%|▋         | 68139/1000000 [3:07:15<28:53:00,  8.96it/s]

{'loss': Array(0.14352182, dtype=float32), 'loss_cross_entropy': Array(0.13404389, dtype=float32)}


  7%|▋         | 68149/1000000 [3:07:16<24:20:02, 10.64it/s]

{'loss': Array(0.14952682, dtype=float32), 'loss_cross_entropy': Array(0.13987146, dtype=float32)}


  7%|▋         | 68159/1000000 [3:07:18<34:06:14,  7.59it/s]

{'loss': Array(0.15879403, dtype=float32), 'loss_cross_entropy': Array(0.14876172, dtype=float32)}


  7%|▋         | 68169/1000000 [3:07:19<25:28:30, 10.16it/s]

{'loss': Array(0.16305749, dtype=float32), 'loss_cross_entropy': Array(0.15279865, dtype=float32)}


  7%|▋         | 68179/1000000 [3:07:20<23:46:33, 10.89it/s]

{'loss': Array(0.15716599, dtype=float32), 'loss_cross_entropy': Array(0.14718506, dtype=float32)}


  7%|▋         | 68189/1000000 [3:07:22<30:27:06,  8.50it/s]

{'loss': Array(0.15228616, dtype=float32), 'loss_cross_entropy': Array(0.14319074, dtype=float32)}


  7%|▋         | 68199/1000000 [3:07:23<24:59:02, 10.36it/s]

{'loss': Array(0.16754912, dtype=float32), 'loss_cross_entropy': Array(0.15692458, dtype=float32)}


  7%|▋         | 68209/1000000 [3:07:25<24:12:11, 10.69it/s]

{'loss': Array(0.1750795, dtype=float32), 'loss_cross_entropy': Array(0.16408055, dtype=float32)}


  7%|▋         | 68219/1000000 [3:07:26<27:22:37,  9.45it/s]

{'loss': Array(0.16005914, dtype=float32), 'loss_cross_entropy': Array(0.1504728, dtype=float32)}


  7%|▋         | 68229/1000000 [3:07:28<24:27:57, 10.58it/s]

{'loss': Array(0.13702749, dtype=float32), 'loss_cross_entropy': Array(0.1277961, dtype=float32)}


  7%|▋         | 68239/1000000 [3:07:29<30:59:15,  8.35it/s]

{'loss': Array(0.17249057, dtype=float32), 'loss_cross_entropy': Array(0.16227823, dtype=float32)}


  7%|▋         | 68249/1000000 [3:07:31<25:11:50, 10.27it/s]

{'loss': Array(0.17692716, dtype=float32), 'loss_cross_entropy': Array(0.16684365, dtype=float32)}


  7%|▋         | 68259/1000000 [3:07:33<38:13:47,  6.77it/s]

{'loss': Array(0.15463515, dtype=float32), 'loss_cross_entropy': Array(0.14490433, dtype=float32)}


  7%|▋         | 68269/1000000 [3:07:34<26:17:04,  9.85it/s]

{'loss': Array(0.1456301, dtype=float32), 'loss_cross_entropy': Array(0.13695727, dtype=float32)}


  7%|▋         | 68279/1000000 [3:07:36<38:36:31,  6.70it/s]

{'loss': Array(0.16013871, dtype=float32), 'loss_cross_entropy': Array(0.15126276, dtype=float32)}


  7%|▋         | 68289/1000000 [3:07:37<26:44:53,  9.68it/s]

{'loss': Array(0.14871255, dtype=float32), 'loss_cross_entropy': Array(0.13979843, dtype=float32)}


  7%|▋         | 68299/1000000 [3:07:38<24:18:30, 10.65it/s]

{'loss': Array(0.14294907, dtype=float32), 'loss_cross_entropy': Array(0.13327168, dtype=float32)}


  7%|▋         | 68309/1000000 [3:07:40<33:42:04,  7.68it/s]

{'loss': Array(0.15077052, dtype=float32), 'loss_cross_entropy': Array(0.14170441, dtype=float32)}


  7%|▋         | 68319/1000000 [3:07:41<26:34:26,  9.74it/s]

{'loss': Array(0.1656872, dtype=float32), 'loss_cross_entropy': Array(0.15560107, dtype=float32)}


  7%|▋         | 68329/1000000 [3:07:43<38:13:27,  6.77it/s]

{'loss': Array(0.13927552, dtype=float32), 'loss_cross_entropy': Array(0.13054244, dtype=float32)}


  7%|▋         | 68339/1000000 [3:07:45<26:23:07,  9.81it/s]

{'loss': Array(0.16413707, dtype=float32), 'loss_cross_entropy': Array(0.15439133, dtype=float32)}


  7%|▋         | 68349/1000000 [3:07:46<24:59:44, 10.35it/s]

{'loss': Array(0.16912727, dtype=float32), 'loss_cross_entropy': Array(0.15885127, dtype=float32)}


  7%|▋         | 68359/1000000 [3:07:48<27:32:53,  9.39it/s]

{'loss': Array(0.1625623, dtype=float32), 'loss_cross_entropy': Array(0.15246391, dtype=float32)}


  7%|▋         | 68369/1000000 [3:07:49<24:44:12, 10.46it/s]

{'loss': Array(0.1639298, dtype=float32), 'loss_cross_entropy': Array(0.15368743, dtype=float32)}


  7%|▋         | 68379/1000000 [3:07:51<27:24:05,  9.44it/s]

{'loss': Array(0.17966579, dtype=float32), 'loss_cross_entropy': Array(0.16955608, dtype=float32)}


  7%|▋         | 68389/1000000 [3:07:52<24:55:53, 10.38it/s]

{'loss': Array(0.14976828, dtype=float32), 'loss_cross_entropy': Array(0.14010121, dtype=float32)}


  7%|▋         | 68399/1000000 [3:07:54<38:16:23,  6.76it/s]

{'loss': Array(0.16820478, dtype=float32), 'loss_cross_entropy': Array(0.15783788, dtype=float32)}


  7%|▋         | 68409/1000000 [3:07:55<26:58:51,  9.59it/s]

{'loss': Array(0.1631081, dtype=float32), 'loss_cross_entropy': Array(0.15324917, dtype=float32)}


  7%|▋         | 68419/1000000 [3:07:57<24:06:19, 10.74it/s]

{'loss': Array(0.17260891, dtype=float32), 'loss_cross_entropy': Array(0.16222401, dtype=float32)}


  7%|▋         | 68429/1000000 [3:07:58<31:33:17,  8.20it/s]

{'loss': Array(0.14591953, dtype=float32), 'loss_cross_entropy': Array(0.13678217, dtype=float32)}


  7%|▋         | 68439/1000000 [3:08:00<26:26:56,  9.78it/s]

{'loss': Array(0.16032977, dtype=float32), 'loss_cross_entropy': Array(0.1505804, dtype=float32)}


  7%|▋         | 68449/1000000 [3:08:02<39:35:52,  6.53it/s]

{'loss': Array(0.16228798, dtype=float32), 'loss_cross_entropy': Array(0.15236802, dtype=float32)}


  7%|▋         | 68459/1000000 [3:08:03<27:57:26,  9.26it/s]

{'loss': Array(0.16311799, dtype=float32), 'loss_cross_entropy': Array(0.15294881, dtype=float32)}


  7%|▋         | 68469/1000000 [3:08:04<26:12:39,  9.87it/s]

{'loss': Array(0.1525822, dtype=float32), 'loss_cross_entropy': Array(0.14343101, dtype=float32)}


  7%|▋         | 68479/1000000 [3:08:06<30:14:32,  8.56it/s]

{'loss': Array(0.17240493, dtype=float32), 'loss_cross_entropy': Array(0.16209225, dtype=float32)}


  7%|▋         | 68489/1000000 [3:08:08<26:20:00,  9.83it/s]

{'loss': Array(0.14187466, dtype=float32), 'loss_cross_entropy': Array(0.13345937, dtype=float32)}


  7%|▋         | 68499/1000000 [3:08:09<35:19:47,  7.32it/s]

{'loss': Array(0.16710044, dtype=float32), 'loss_cross_entropy': Array(0.15740386, dtype=float32)}


  7%|▋         | 68509/1000000 [3:08:17<84:19:48,  3.07it/s] 

{'loss': Array(0.14247803, dtype=float32), 'loss_cross_entropy': Array(0.13333887, dtype=float32)}


  7%|▋         | 68519/1000000 [3:08:19<43:19:28,  5.97it/s]

{'loss': Array(0.14045583, dtype=float32), 'loss_cross_entropy': Array(0.13176718, dtype=float32)}


  7%|▋         | 68529/1000000 [3:08:20<27:05:28,  9.55it/s]

{'loss': Array(0.1375011, dtype=float32), 'loss_cross_entropy': Array(0.12860894, dtype=float32)}


  7%|▋         | 68539/1000000 [3:08:21<24:39:44, 10.49it/s]

{'loss': Array(0.1842642, dtype=float32), 'loss_cross_entropy': Array(0.1741097, dtype=float32)}


  7%|▋         | 68549/1000000 [3:08:23<30:48:16,  8.40it/s]

{'loss': Array(0.1617436, dtype=float32), 'loss_cross_entropy': Array(0.15228218, dtype=float32)}


  7%|▋         | 68559/1000000 [3:08:24<24:58:39, 10.36it/s]

{'loss': Array(0.18170506, dtype=float32), 'loss_cross_entropy': Array(0.1712604, dtype=float32)}


  7%|▋         | 68569/1000000 [3:08:26<24:39:24, 10.49it/s]

{'loss': Array(0.1535776, dtype=float32), 'loss_cross_entropy': Array(0.14381087, dtype=float32)}


  7%|▋         | 68579/1000000 [3:08:28<27:31:54,  9.40it/s]

{'loss': Array(0.15139034, dtype=float32), 'loss_cross_entropy': Array(0.14131764, dtype=float32)}


  7%|▋         | 68589/1000000 [3:08:29<24:19:22, 10.64it/s]

{'loss': Array(0.1713554, dtype=float32), 'loss_cross_entropy': Array(0.16199426, dtype=float32)}


  7%|▋         | 68599/1000000 [3:08:31<30:49:04,  8.40it/s]

{'loss': Array(0.16406977, dtype=float32), 'loss_cross_entropy': Array(0.15404163, dtype=float32)}


  7%|▋         | 68609/1000000 [3:08:32<24:47:42, 10.43it/s]

{'loss': Array(0.16934407, dtype=float32), 'loss_cross_entropy': Array(0.16003744, dtype=float32)}


  7%|▋         | 68619/1000000 [3:08:34<34:26:01,  7.51it/s]

{'loss': Array(0.16447362, dtype=float32), 'loss_cross_entropy': Array(0.15472724, dtype=float32)}


  7%|▋         | 68629/1000000 [3:08:35<25:56:28,  9.97it/s]

{'loss': Array(0.15829654, dtype=float32), 'loss_cross_entropy': Array(0.14865461, dtype=float32)}


  7%|▋         | 68639/1000000 [3:08:37<24:54:26, 10.39it/s]

{'loss': Array(0.161259, dtype=float32), 'loss_cross_entropy': Array(0.15164773, dtype=float32)}


  7%|▋         | 68649/1000000 [3:08:38<30:43:22,  8.42it/s]

{'loss': Array(0.18022648, dtype=float32), 'loss_cross_entropy': Array(0.16987175, dtype=float32)}


  7%|▋         | 68659/1000000 [3:08:40<24:53:07, 10.40it/s]

{'loss': Array(0.16100757, dtype=float32), 'loss_cross_entropy': Array(0.15124634, dtype=float32)}


  7%|▋         | 68669/1000000 [3:08:41<23:56:11, 10.81it/s]

{'loss': Array(0.16485569, dtype=float32), 'loss_cross_entropy': Array(0.1540193, dtype=float32)}


  7%|▋         | 68679/1000000 [3:08:43<27:04:35,  9.55it/s]

{'loss': Array(0.14949588, dtype=float32), 'loss_cross_entropy': Array(0.1402824, dtype=float32)}


  7%|▋         | 68689/1000000 [3:08:44<24:13:39, 10.68it/s]

{'loss': Array(0.14130113, dtype=float32), 'loss_cross_entropy': Array(0.13249426, dtype=float32)}


  7%|▋         | 68699/1000000 [3:08:46<30:25:55,  8.50it/s]

{'loss': Array(0.16327396, dtype=float32), 'loss_cross_entropy': Array(0.15297715, dtype=float32)}


  7%|▋         | 68709/1000000 [3:08:47<25:12:14, 10.26it/s]

{'loss': Array(0.14570986, dtype=float32), 'loss_cross_entropy': Array(0.13628867, dtype=float32)}


  7%|▋         | 68719/1000000 [3:08:49<38:21:42,  6.74it/s]

{'loss': Array(0.1560856, dtype=float32), 'loss_cross_entropy': Array(0.1467571, dtype=float32)}


  7%|▋         | 68729/1000000 [3:08:50<26:33:21,  9.74it/s]

{'loss': Array(0.15524189, dtype=float32), 'loss_cross_entropy': Array(0.14587857, dtype=float32)}


  7%|▋         | 68739/1000000 [3:08:51<24:38:09, 10.50it/s]

{'loss': Array(0.16020904, dtype=float32), 'loss_cross_entropy': Array(0.14987122, dtype=float32)}


  7%|▋         | 68749/1000000 [3:08:53<34:09:20,  7.57it/s]

{'loss': Array(0.13833071, dtype=float32), 'loss_cross_entropy': Array(0.13032031, dtype=float32)}


  7%|▋         | 68759/1000000 [3:08:55<26:00:11,  9.95it/s]

{'loss': Array(0.15725654, dtype=float32), 'loss_cross_entropy': Array(0.14678942, dtype=float32)}


  7%|▋         | 68769/1000000 [3:08:56<24:54:07, 10.39it/s]

{'loss': Array(0.14477226, dtype=float32), 'loss_cross_entropy': Array(0.135694, dtype=float32)}


  7%|▋         | 68779/1000000 [3:08:58<28:53:52,  8.95it/s]

{'loss': Array(0.15134881, dtype=float32), 'loss_cross_entropy': Array(0.14229123, dtype=float32)}


  7%|▋         | 68789/1000000 [3:08:59<25:54:10,  9.99it/s]

{'loss': Array(0.1604286, dtype=float32), 'loss_cross_entropy': Array(0.15108117, dtype=float32)}


  7%|▋         | 68799/1000000 [3:09:01<32:30:25,  7.96it/s]

{'loss': Array(0.15561168, dtype=float32), 'loss_cross_entropy': Array(0.14592938, dtype=float32)}


  7%|▋         | 68809/1000000 [3:09:02<26:09:50,  9.89it/s]

{'loss': Array(0.14851564, dtype=float32), 'loss_cross_entropy': Array(0.13859859, dtype=float32)}


  7%|▋         | 68819/1000000 [3:09:04<25:03:08, 10.32it/s]

{'loss': Array(0.154995, dtype=float32), 'loss_cross_entropy': Array(0.14538448, dtype=float32)}


  7%|▋         | 68829/1000000 [3:09:05<29:06:09,  8.89it/s]

{'loss': Array(0.1542717, dtype=float32), 'loss_cross_entropy': Array(0.144908, dtype=float32)}


  7%|▋         | 68839/1000000 [3:09:07<26:00:16,  9.95it/s]

{'loss': Array(0.15617725, dtype=float32), 'loss_cross_entropy': Array(0.14659514, dtype=float32)}


  7%|▋         | 68849/1000000 [3:09:09<34:54:16,  7.41it/s]

{'loss': Array(0.1462132, dtype=float32), 'loss_cross_entropy': Array(0.1372317, dtype=float32)}


  7%|▋         | 68859/1000000 [3:09:10<25:30:13, 10.14it/s]

{'loss': Array(0.15709017, dtype=float32), 'loss_cross_entropy': Array(0.14716017, dtype=float32)}


  7%|▋         | 68869/1000000 [3:09:11<23:59:28, 10.78it/s]

{'loss': Array(0.15900595, dtype=float32), 'loss_cross_entropy': Array(0.14987706, dtype=float32)}


  7%|▋         | 68879/1000000 [3:09:13<26:55:12,  9.61it/s]

{'loss': Array(0.16106957, dtype=float32), 'loss_cross_entropy': Array(0.15168028, dtype=float32)}


  7%|▋         | 68889/1000000 [3:09:14<24:20:03, 10.63it/s]

{'loss': Array(0.1351632, dtype=float32), 'loss_cross_entropy': Array(0.12718724, dtype=float32)}


  7%|▋         | 68899/1000000 [3:09:16<28:23:37,  9.11it/s]

{'loss': Array(0.14093833, dtype=float32), 'loss_cross_entropy': Array(0.13166504, dtype=float32)}


  7%|▋         | 68909/1000000 [3:09:17<24:43:10, 10.46it/s]

{'loss': Array(0.13827719, dtype=float32), 'loss_cross_entropy': Array(0.12999171, dtype=float32)}


  7%|▋         | 68919/1000000 [3:09:19<37:57:04,  6.81it/s]

{'loss': Array(0.1521876, dtype=float32), 'loss_cross_entropy': Array(0.14273365, dtype=float32)}


  7%|▋         | 68929/1000000 [3:09:21<26:39:52,  9.70it/s]

{'loss': Array(0.14713421, dtype=float32), 'loss_cross_entropy': Array(0.13769405, dtype=float32)}


  7%|▋         | 68939/1000000 [3:09:22<24:12:06, 10.69it/s]

{'loss': Array(0.1671136, dtype=float32), 'loss_cross_entropy': Array(0.15593527, dtype=float32)}


  7%|▋         | 68949/1000000 [3:09:24<30:47:29,  8.40it/s]

{'loss': Array(0.1696551, dtype=float32), 'loss_cross_entropy': Array(0.15928905, dtype=float32)}


  7%|▋         | 68959/1000000 [3:09:25<25:21:24, 10.20it/s]

{'loss': Array(0.1526265, dtype=float32), 'loss_cross_entropy': Array(0.1443316, dtype=float32)}


  7%|▋         | 68969/1000000 [3:09:27<34:08:24,  7.58it/s]

{'loss': Array(0.1732883, dtype=float32), 'loss_cross_entropy': Array(0.16313104, dtype=float32)}


  7%|▋         | 68979/1000000 [3:09:28<25:53:30,  9.99it/s]

{'loss': Array(0.11892058, dtype=float32), 'loss_cross_entropy': Array(0.11110754, dtype=float32)}


  7%|▋         | 68989/1000000 [3:09:30<34:44:14,  7.44it/s]

{'loss': Array(0.17777811, dtype=float32), 'loss_cross_entropy': Array(0.16715713, dtype=float32)}


  7%|▋         | 68999/1000000 [3:09:31<25:26:49, 10.16it/s]

{'loss': Array(0.15744133, dtype=float32), 'loss_cross_entropy': Array(0.14798383, dtype=float32)}


  7%|▋         | 69009/1000000 [3:09:39<84:06:39,  3.07it/s] 

{'loss': Array(0.16370796, dtype=float32), 'loss_cross_entropy': Array(0.15392621, dtype=float32)}


  7%|▋         | 69019/1000000 [3:09:41<38:45:31,  6.67it/s]

{'loss': Array(0.15034068, dtype=float32), 'loss_cross_entropy': Array(0.1402224, dtype=float32)}


  7%|▋         | 69029/1000000 [3:09:42<26:31:52,  9.75it/s]

{'loss': Array(0.16494639, dtype=float32), 'loss_cross_entropy': Array(0.15536557, dtype=float32)}


  7%|▋         | 69039/1000000 [3:09:44<34:43:45,  7.45it/s]

{'loss': Array(0.15086308, dtype=float32), 'loss_cross_entropy': Array(0.14159559, dtype=float32)}


  7%|▋         | 69049/1000000 [3:09:45<26:19:34,  9.82it/s]

{'loss': Array(0.1535891, dtype=float32), 'loss_cross_entropy': Array(0.14513643, dtype=float32)}


  7%|▋         | 69059/1000000 [3:09:47<38:27:31,  6.72it/s]

{'loss': Array(0.15160476, dtype=float32), 'loss_cross_entropy': Array(0.14184357, dtype=float32)}


  7%|▋         | 69069/1000000 [3:09:48<26:12:40,  9.87it/s]

{'loss': Array(0.15282798, dtype=float32), 'loss_cross_entropy': Array(0.14271896, dtype=float32)}


  7%|▋         | 69079/1000000 [3:09:50<23:58:33, 10.79it/s]

{'loss': Array(0.15113083, dtype=float32), 'loss_cross_entropy': Array(0.14205746, dtype=float32)}


  7%|▋         | 69089/1000000 [3:09:51<30:46:09,  8.40it/s]

{'loss': Array(0.15897714, dtype=float32), 'loss_cross_entropy': Array(0.14898059, dtype=float32)}


  7%|▋         | 69099/1000000 [3:09:53<25:44:48, 10.04it/s]

{'loss': Array(0.15801457, dtype=float32), 'loss_cross_entropy': Array(0.14886077, dtype=float32)}


  7%|▋         | 69109/1000000 [3:09:54<34:37:19,  7.47it/s]

{'loss': Array(0.16462384, dtype=float32), 'loss_cross_entropy': Array(0.1545193, dtype=float32)}


  7%|▋         | 69119/1000000 [3:09:56<25:33:51, 10.11it/s]

{'loss': Array(0.14825536, dtype=float32), 'loss_cross_entropy': Array(0.13937281, dtype=float32)}


  7%|▋         | 69129/1000000 [3:09:57<24:37:49, 10.50it/s]

{'loss': Array(0.15366763, dtype=float32), 'loss_cross_entropy': Array(0.1447324, dtype=float32)}


  7%|▋         | 69139/1000000 [3:09:59<28:15:28,  9.15it/s]

{'loss': Array(0.15167484, dtype=float32), 'loss_cross_entropy': Array(0.14289947, dtype=float32)}


  7%|▋         | 69149/1000000 [3:10:00<25:27:30, 10.16it/s]

{'loss': Array(0.1526259, dtype=float32), 'loss_cross_entropy': Array(0.14345932, dtype=float32)}


  7%|▋         | 69159/1000000 [3:10:02<39:42:17,  6.51it/s]

{'loss': Array(0.1625287, dtype=float32), 'loss_cross_entropy': Array(0.15263325, dtype=float32)}


  7%|▋         | 69169/1000000 [3:10:04<28:16:09,  9.15it/s]

{'loss': Array(0.15249737, dtype=float32), 'loss_cross_entropy': Array(0.14318736, dtype=float32)}


  7%|▋         | 69179/1000000 [3:10:05<27:03:57,  9.55it/s]

{'loss': Array(0.17302038, dtype=float32), 'loss_cross_entropy': Array(0.16261716, dtype=float32)}


  7%|▋         | 69189/1000000 [3:10:07<32:50:16,  7.87it/s]

{'loss': Array(0.16492504, dtype=float32), 'loss_cross_entropy': Array(0.15475284, dtype=float32)}


  7%|▋         | 69199/1000000 [3:10:08<25:09:35, 10.28it/s]

{'loss': Array(0.14239644, dtype=float32), 'loss_cross_entropy': Array(0.13309935, dtype=float32)}


  7%|▋         | 69209/1000000 [3:10:10<33:35:08,  7.70it/s]

{'loss': Array(0.1557471, dtype=float32), 'loss_cross_entropy': Array(0.1457343, dtype=float32)}


  7%|▋         | 69219/1000000 [3:10:11<25:09:08, 10.28it/s]

{'loss': Array(0.15136288, dtype=float32), 'loss_cross_entropy': Array(0.14155822, dtype=float32)}


  7%|▋         | 69229/1000000 [3:10:13<33:55:21,  7.62it/s]

{'loss': Array(0.16494302, dtype=float32), 'loss_cross_entropy': Array(0.15517557, dtype=float32)}


  7%|▋         | 69239/1000000 [3:10:14<25:32:57, 10.12it/s]

{'loss': Array(0.14833756, dtype=float32), 'loss_cross_entropy': Array(0.13916181, dtype=float32)}


  7%|▋         | 69249/1000000 [3:10:16<24:04:24, 10.74it/s]

{'loss': Array(0.15636064, dtype=float32), 'loss_cross_entropy': Array(0.147087, dtype=float32)}


  7%|▋         | 69259/1000000 [3:10:17<28:16:13,  9.15it/s]

{'loss': Array(0.14703557, dtype=float32), 'loss_cross_entropy': Array(0.13758604, dtype=float32)}


  7%|▋         | 69269/1000000 [3:10:19<24:36:55, 10.50it/s]

{'loss': Array(0.14072423, dtype=float32), 'loss_cross_entropy': Array(0.1321395, dtype=float32)}


  7%|▋         | 69279/1000000 [3:10:20<31:09:56,  8.30it/s]

{'loss': Array(0.15520711, dtype=float32), 'loss_cross_entropy': Array(0.14571786, dtype=float32)}


  7%|▋         | 69289/1000000 [3:10:22<25:22:15, 10.19it/s]

{'loss': Array(0.15208474, dtype=float32), 'loss_cross_entropy': Array(0.14317395, dtype=float32)}


  7%|▋         | 69299/1000000 [3:10:23<37:58:50,  6.81it/s]

{'loss': Array(0.15767618, dtype=float32), 'loss_cross_entropy': Array(0.14806686, dtype=float32)}


  7%|▋         | 69309/1000000 [3:10:25<26:03:18,  9.92it/s]

{'loss': Array(0.15537989, dtype=float32), 'loss_cross_entropy': Array(0.14612865, dtype=float32)}


  7%|▋         | 69319/1000000 [3:10:27<38:18:14,  6.75it/s]

{'loss': Array(0.15459049, dtype=float32), 'loss_cross_entropy': Array(0.14545384, dtype=float32)}


  7%|▋         | 69329/1000000 [3:10:28<26:29:55,  9.76it/s]

{'loss': Array(0.16760091, dtype=float32), 'loss_cross_entropy': Array(0.15778661, dtype=float32)}


  7%|▋         | 69339/1000000 [3:10:29<24:21:40, 10.61it/s]

{'loss': Array(0.16939454, dtype=float32), 'loss_cross_entropy': Array(0.16010728, dtype=float32)}


  7%|▋         | 69349/1000000 [3:10:31<34:11:53,  7.56it/s]

{'loss': Array(0.15168606, dtype=float32), 'loss_cross_entropy': Array(0.14239445, dtype=float32)}


  7%|▋         | 69359/1000000 [3:10:32<25:29:04, 10.14it/s]

{'loss': Array(0.1483057, dtype=float32), 'loss_cross_entropy': Array(0.13942873, dtype=float32)}


  7%|▋         | 69369/1000000 [3:10:34<38:14:27,  6.76it/s]

{'loss': Array(0.1564168, dtype=float32), 'loss_cross_entropy': Array(0.14702336, dtype=float32)}


  7%|▋         | 69379/1000000 [3:10:35<26:59:56,  9.57it/s]

{'loss': Array(0.15558635, dtype=float32), 'loss_cross_entropy': Array(0.14651191, dtype=float32)}


  7%|▋         | 69389/1000000 [3:10:37<24:37:42, 10.50it/s]

{'loss': Array(0.16150145, dtype=float32), 'loss_cross_entropy': Array(0.15137146, dtype=float32)}


  7%|▋         | 69399/1000000 [3:10:39<27:43:09,  9.33it/s]

{'loss': Array(0.14222267, dtype=float32), 'loss_cross_entropy': Array(0.13352306, dtype=float32)}


  7%|▋         | 69409/1000000 [3:10:40<24:20:17, 10.62it/s]

{'loss': Array(0.14134227, dtype=float32), 'loss_cross_entropy': Array(0.13286969, dtype=float32)}


  7%|▋         | 69419/1000000 [3:10:42<26:58:16,  9.58it/s]

{'loss': Array(0.15695195, dtype=float32), 'loss_cross_entropy': Array(0.14748588, dtype=float32)}


  7%|▋         | 69429/1000000 [3:10:43<24:09:13, 10.70it/s]

{'loss': Array(0.15222129, dtype=float32), 'loss_cross_entropy': Array(0.14267512, dtype=float32)}


  7%|▋         | 69439/1000000 [3:10:45<37:31:57,  6.89it/s]

{'loss': Array(0.14205205, dtype=float32), 'loss_cross_entropy': Array(0.1331923, dtype=float32)}


  7%|▋         | 69449/1000000 [3:10:46<26:26:24,  9.78it/s]

{'loss': Array(0.14838012, dtype=float32), 'loss_cross_entropy': Array(0.13857111, dtype=float32)}


  7%|▋         | 69459/1000000 [3:10:47<24:55:21, 10.37it/s]

{'loss': Array(0.16349626, dtype=float32), 'loss_cross_entropy': Array(0.15394795, dtype=float32)}


  7%|▋         | 69469/1000000 [3:10:49<30:52:17,  8.37it/s]

{'loss': Array(0.14256918, dtype=float32), 'loss_cross_entropy': Array(0.13427563, dtype=float32)}


  7%|▋         | 69479/1000000 [3:10:50<25:03:49, 10.31it/s]

{'loss': Array(0.16674127, dtype=float32), 'loss_cross_entropy': Array(0.15705898, dtype=float32)}


  7%|▋         | 69489/1000000 [3:10:52<37:36:49,  6.87it/s]

{'loss': Array(0.1374804, dtype=float32), 'loss_cross_entropy': Array(0.1281838, dtype=float32)}


  7%|▋         | 69499/1000000 [3:10:53<26:00:05,  9.94it/s]

{'loss': Array(0.14280568, dtype=float32), 'loss_cross_entropy': Array(0.13374321, dtype=float32)}


  7%|▋         | 69509/1000000 [3:11:01<85:36:34,  3.02it/s] 

{'loss': Array(0.15509227, dtype=float32), 'loss_cross_entropy': Array(0.14557257, dtype=float32)}


  7%|▋         | 69519/1000000 [3:11:03<39:48:41,  6.49it/s]

{'loss': Array(0.1512395, dtype=float32), 'loss_cross_entropy': Array(0.142277, dtype=float32)}


  7%|▋         | 69529/1000000 [3:11:04<27:38:12,  9.35it/s]

{'loss': Array(0.16455571, dtype=float32), 'loss_cross_entropy': Array(0.15431444, dtype=float32)}


  7%|▋         | 69539/1000000 [3:11:06<34:50:10,  7.42it/s]

{'loss': Array(0.16108827, dtype=float32), 'loss_cross_entropy': Array(0.15160258, dtype=float32)}


  7%|▋         | 69549/1000000 [3:11:08<27:10:31,  9.51it/s]

{'loss': Array(0.16563042, dtype=float32), 'loss_cross_entropy': Array(0.15557627, dtype=float32)}


  7%|▋         | 69559/1000000 [3:11:09<38:49:35,  6.66it/s]

{'loss': Array(0.16231243, dtype=float32), 'loss_cross_entropy': Array(0.15234318, dtype=float32)}


  7%|▋         | 69569/1000000 [3:11:11<26:39:18,  9.70it/s]

{'loss': Array(0.16573869, dtype=float32), 'loss_cross_entropy': Array(0.15545519, dtype=float32)}


  7%|▋         | 69579/1000000 [3:11:12<25:09:08, 10.28it/s]

{'loss': Array(0.17181031, dtype=float32), 'loss_cross_entropy': Array(0.16134794, dtype=float32)}


  7%|▋         | 69589/1000000 [3:11:14<27:21:07,  9.45it/s]

{'loss': Array(0.13762964, dtype=float32), 'loss_cross_entropy': Array(0.12860432, dtype=float32)}


  7%|▋         | 69599/1000000 [3:11:15<24:49:31, 10.41it/s]

{'loss': Array(0.15170068, dtype=float32), 'loss_cross_entropy': Array(0.14251298, dtype=float32)}


  7%|▋         | 69609/1000000 [3:11:17<38:45:47,  6.67it/s]

{'loss': Array(0.15635762, dtype=float32), 'loss_cross_entropy': Array(0.1473578, dtype=float32)}


  7%|▋         | 69619/1000000 [3:11:18<27:00:14,  9.57it/s]

{'loss': Array(0.1606648, dtype=float32), 'loss_cross_entropy': Array(0.15120235, dtype=float32)}


  7%|▋         | 69629/1000000 [3:11:20<24:18:51, 10.63it/s]

{'loss': Array(0.16695523, dtype=float32), 'loss_cross_entropy': Array(0.15703598, dtype=float32)}


  7%|▋         | 69639/1000000 [3:11:22<29:19:39,  8.81it/s]

{'loss': Array(0.15697697, dtype=float32), 'loss_cross_entropy': Array(0.14717554, dtype=float32)}


  7%|▋         | 69649/1000000 [3:11:23<24:36:50, 10.50it/s]

{'loss': Array(0.14525223, dtype=float32), 'loss_cross_entropy': Array(0.136938, dtype=float32)}


  7%|▋         | 69659/1000000 [3:11:25<33:26:05,  7.73it/s]

{'loss': Array(0.15222616, dtype=float32), 'loss_cross_entropy': Array(0.14285795, dtype=float32)}


  7%|▋         | 69669/1000000 [3:11:26<25:26:30, 10.16it/s]

{'loss': Array(0.15236893, dtype=float32), 'loss_cross_entropy': Array(0.14259078, dtype=float32)}


  7%|▋         | 69679/1000000 [3:11:28<37:42:20,  6.85it/s]

{'loss': Array(0.14595741, dtype=float32), 'loss_cross_entropy': Array(0.13685676, dtype=float32)}


  7%|▋         | 69689/1000000 [3:11:29<26:07:30,  9.89it/s]

{'loss': Array(0.16156524, dtype=float32), 'loss_cross_entropy': Array(0.151704, dtype=float32)}


  7%|▋         | 69699/1000000 [3:11:30<24:46:20, 10.43it/s]

{'loss': Array(0.15873773, dtype=float32), 'loss_cross_entropy': Array(0.14972572, dtype=float32)}


  7%|▋         | 69709/1000000 [3:11:32<30:51:38,  8.37it/s]

{'loss': Array(0.15305197, dtype=float32), 'loss_cross_entropy': Array(0.14379136, dtype=float32)}


  7%|▋         | 69719/1000000 [3:11:34<25:45:17, 10.03it/s]

{'loss': Array(0.1484414, dtype=float32), 'loss_cross_entropy': Array(0.1392247, dtype=float32)}


  7%|▋         | 69729/1000000 [3:11:35<33:44:16,  7.66it/s]

{'loss': Array(0.13756907, dtype=float32), 'loss_cross_entropy': Array(0.12925605, dtype=float32)}


  7%|▋         | 69739/1000000 [3:11:37<25:51:31,  9.99it/s]

{'loss': Array(0.1715542, dtype=float32), 'loss_cross_entropy': Array(0.1612776, dtype=float32)}


  7%|▋         | 69749/1000000 [3:11:38<24:27:41, 10.56it/s]

{'loss': Array(0.15823951, dtype=float32), 'loss_cross_entropy': Array(0.14880893, dtype=float32)}


  7%|▋         | 69759/1000000 [3:11:40<26:49:56,  9.63it/s]

{'loss': Array(0.14827971, dtype=float32), 'loss_cross_entropy': Array(0.13885173, dtype=float32)}


  7%|▋         | 69769/1000000 [3:11:41<24:40:09, 10.47it/s]

{'loss': Array(0.14741738, dtype=float32), 'loss_cross_entropy': Array(0.13856424, dtype=float32)}


  7%|▋         | 69779/1000000 [3:11:43<37:54:21,  6.82it/s]

{'loss': Array(0.15009783, dtype=float32), 'loss_cross_entropy': Array(0.14069007, dtype=float32)}


  7%|▋         | 69789/1000000 [3:11:44<26:40:15,  9.69it/s]

{'loss': Array(0.1409159, dtype=float32), 'loss_cross_entropy': Array(0.13206005, dtype=float32)}


  7%|▋         | 69799/1000000 [3:11:45<24:57:38, 10.35it/s]

{'loss': Array(0.15033278, dtype=float32), 'loss_cross_entropy': Array(0.1403198, dtype=float32)}


  7%|▋         | 69809/1000000 [3:11:47<30:51:03,  8.38it/s]

{'loss': Array(0.1475687, dtype=float32), 'loss_cross_entropy': Array(0.13804124, dtype=float32)}


  7%|▋         | 69819/1000000 [3:11:49<25:12:41, 10.25it/s]

{'loss': Array(0.14882158, dtype=float32), 'loss_cross_entropy': Array(0.14027019, dtype=float32)}


  7%|▋         | 69829/1000000 [3:11:50<34:47:36,  7.43it/s]

{'loss': Array(0.1371785, dtype=float32), 'loss_cross_entropy': Array(0.12856671, dtype=float32)}


  7%|▋         | 69839/1000000 [3:11:52<26:13:37,  9.85it/s]

{'loss': Array(0.15567622, dtype=float32), 'loss_cross_entropy': Array(0.14606388, dtype=float32)}


  7%|▋         | 69849/1000000 [3:11:53<34:18:38,  7.53it/s]

{'loss': Array(0.14502648, dtype=float32), 'loss_cross_entropy': Array(0.13635527, dtype=float32)}


  7%|▋         | 69859/1000000 [3:11:55<25:27:29, 10.15it/s]

{'loss': Array(0.16288196, dtype=float32), 'loss_cross_entropy': Array(0.15293412, dtype=float32)}


  7%|▋         | 69869/1000000 [3:11:56<24:21:25, 10.61it/s]

{'loss': Array(0.15994275, dtype=float32), 'loss_cross_entropy': Array(0.15039898, dtype=float32)}


  7%|▋         | 69879/1000000 [3:11:58<29:17:49,  8.82it/s]

{'loss': Array(0.14967796, dtype=float32), 'loss_cross_entropy': Array(0.1401495, dtype=float32)}


  7%|▋         | 69889/1000000 [3:11:59<26:38:51,  9.70it/s]

{'loss': Array(0.16011332, dtype=float32), 'loss_cross_entropy': Array(0.14984067, dtype=float32)}


  7%|▋         | 69899/1000000 [3:12:01<32:27:18,  7.96it/s]

{'loss': Array(0.15311247, dtype=float32), 'loss_cross_entropy': Array(0.14346398, dtype=float32)}


  7%|▋         | 69909/1000000 [3:12:03<26:08:46,  9.88it/s]

{'loss': Array(0.17518827, dtype=float32), 'loss_cross_entropy': Array(0.16431914, dtype=float32)}


  7%|▋         | 69919/1000000 [3:12:04<38:50:58,  6.65it/s]

{'loss': Array(0.16137607, dtype=float32), 'loss_cross_entropy': Array(0.15149856, dtype=float32)}


  7%|▋         | 69929/1000000 [3:12:06<27:10:12,  9.51it/s]

{'loss': Array(0.16817248, dtype=float32), 'loss_cross_entropy': Array(0.15853983, dtype=float32)}


  7%|▋         | 69939/1000000 [3:12:08<39:29:58,  6.54it/s]

{'loss': Array(0.12934648, dtype=float32), 'loss_cross_entropy': Array(0.1214084, dtype=float32)}


  7%|▋         | 69949/1000000 [3:12:09<26:22:59,  9.79it/s]

{'loss': Array(0.13893007, dtype=float32), 'loss_cross_entropy': Array(0.130021, dtype=float32)}


  7%|▋         | 69959/1000000 [3:12:10<24:51:19, 10.39it/s]

{'loss': Array(0.14505284, dtype=float32), 'loss_cross_entropy': Array(0.13550065, dtype=float32)}


  7%|▋         | 69969/1000000 [3:12:12<33:53:09,  7.62it/s]

{'loss': Array(0.1677824, dtype=float32), 'loss_cross_entropy': Array(0.15831429, dtype=float32)}


  7%|▋         | 69979/1000000 [3:12:13<25:35:09, 10.10it/s]

{'loss': Array(0.16658734, dtype=float32), 'loss_cross_entropy': Array(0.15684931, dtype=float32)}


  7%|▋         | 69989/1000000 [3:12:15<38:21:56,  6.73it/s]

{'loss': Array(0.1295868, dtype=float32), 'loss_cross_entropy': Array(0.12123199, dtype=float32)}


  7%|▋         | 69999/1000000 [3:12:17<27:08:56,  9.52it/s]

{'loss': Array(0.13595419, dtype=float32), 'loss_cross_entropy': Array(0.1273752, dtype=float32)}


  7%|▋         | 70009/1000000 [3:12:25<86:39:15,  2.98it/s] 

{'loss': Array(0.1431208, dtype=float32), 'loss_cross_entropy': Array(0.13528691, dtype=float32)}


  7%|▋         | 70019/1000000 [3:12:26<38:08:44,  6.77it/s] 

{'loss': Array(0.17906329, dtype=float32), 'loss_cross_entropy': Array(0.16894351, dtype=float32)}


  7%|▋         | 70029/1000000 [3:12:28<26:18:33,  9.82it/s]

{'loss': Array(0.15086813, dtype=float32), 'loss_cross_entropy': Array(0.14144865, dtype=float32)}


  7%|▋         | 70039/1000000 [3:12:29<27:58:59,  9.23it/s]

{'loss': Array(0.15472382, dtype=float32), 'loss_cross_entropy': Array(0.14552502, dtype=float32)}


  7%|▋         | 70049/1000000 [3:12:31<24:47:13, 10.42it/s]

{'loss': Array(0.15014814, dtype=float32), 'loss_cross_entropy': Array(0.14219712, dtype=float32)}


  7%|▋         | 70059/1000000 [3:12:32<38:03:48,  6.79it/s]

{'loss': Array(0.1533623, dtype=float32), 'loss_cross_entropy': Array(0.1437863, dtype=float32)}


  7%|▋         | 70069/1000000 [3:12:34<26:38:30,  9.70it/s]

{'loss': Array(0.13045032, dtype=float32), 'loss_cross_entropy': Array(0.121999, dtype=float32)}


  7%|▋         | 70079/1000000 [3:12:35<24:24:31, 10.58it/s]

{'loss': Array(0.15631819, dtype=float32), 'loss_cross_entropy': Array(0.1470444, dtype=float32)}


  7%|▋         | 70089/1000000 [3:12:37<31:05:13,  8.31it/s]

{'loss': Array(0.17272699, dtype=float32), 'loss_cross_entropy': Array(0.16280635, dtype=float32)}


  7%|▋         | 70099/1000000 [3:12:38<25:01:04, 10.32it/s]

{'loss': Array(0.14892352, dtype=float32), 'loss_cross_entropy': Array(0.14043908, dtype=float32)}


  7%|▋         | 70109/1000000 [3:12:40<37:56:59,  6.81it/s]

{'loss': Array(0.16840534, dtype=float32), 'loss_cross_entropy': Array(0.1580293, dtype=float32)}


  7%|▋         | 70119/1000000 [3:12:41<26:39:48,  9.69it/s]

{'loss': Array(0.15354294, dtype=float32), 'loss_cross_entropy': Array(0.14342801, dtype=float32)}


  7%|▋         | 70129/1000000 [3:12:43<24:12:55, 10.67it/s]

{'loss': Array(0.15624736, dtype=float32), 'loss_cross_entropy': Array(0.14664014, dtype=float32)}


  7%|▋         | 70139/1000000 [3:12:44<29:05:54,  8.88it/s]

{'loss': Array(0.14594774, dtype=float32), 'loss_cross_entropy': Array(0.1374498, dtype=float32)}


  7%|▋         | 70149/1000000 [3:12:46<25:07:12, 10.28it/s]

{'loss': Array(0.16324733, dtype=float32), 'loss_cross_entropy': Array(0.15377776, dtype=float32)}


  7%|▋         | 70159/1000000 [3:12:48<34:01:02,  7.59it/s]

{'loss': Array(0.15911824, dtype=float32), 'loss_cross_entropy': Array(0.1501429, dtype=float32)}


  7%|▋         | 70169/1000000 [3:12:49<25:16:44, 10.22it/s]

{'loss': Array(0.15046844, dtype=float32), 'loss_cross_entropy': Array(0.14176223, dtype=float32)}


  7%|▋         | 70179/1000000 [3:12:50<23:49:00, 10.84it/s]

{'loss': Array(0.15274422, dtype=float32), 'loss_cross_entropy': Array(0.1433963, dtype=float32)}


  7%|▋         | 70189/1000000 [3:12:52<30:27:38,  8.48it/s]

{'loss': Array(0.16257195, dtype=float32), 'loss_cross_entropy': Array(0.1524558, dtype=float32)}


  7%|▋         | 70199/1000000 [3:12:53<24:52:13, 10.38it/s]

{'loss': Array(0.16165595, dtype=float32), 'loss_cross_entropy': Array(0.15134683, dtype=float32)}


  7%|▋         | 70209/1000000 [3:12:55<24:15:20, 10.65it/s]

{'loss': Array(0.14929798, dtype=float32), 'loss_cross_entropy': Array(0.13991933, dtype=float32)}


  7%|▋         | 70219/1000000 [3:12:56<27:18:43,  9.46it/s]

{'loss': Array(0.15470813, dtype=float32), 'loss_cross_entropy': Array(0.14568256, dtype=float32)}


  7%|▋         | 70229/1000000 [3:12:58<25:13:53, 10.24it/s]

{'loss': Array(0.15839337, dtype=float32), 'loss_cross_entropy': Array(0.14852314, dtype=float32)}


  7%|▋         | 70239/1000000 [3:12:59<31:32:21,  8.19it/s]

{'loss': Array(0.15261434, dtype=float32), 'loss_cross_entropy': Array(0.14276782, dtype=float32)}


  7%|▋         | 70249/1000000 [3:13:01<26:32:55,  9.73it/s]

{'loss': Array(0.1512066, dtype=float32), 'loss_cross_entropy': Array(0.14211506, dtype=float32)}


  7%|▋         | 70259/1000000 [3:13:03<39:16:14,  6.58it/s]

{'loss': Array(0.14502884, dtype=float32), 'loss_cross_entropy': Array(0.13596952, dtype=float32)}


  7%|▋         | 70269/1000000 [3:13:04<27:38:38,  9.34it/s]

{'loss': Array(0.16603105, dtype=float32), 'loss_cross_entropy': Array(0.15652144, dtype=float32)}


  7%|▋         | 70279/1000000 [3:13:06<40:33:40,  6.37it/s]

{'loss': Array(0.13646695, dtype=float32), 'loss_cross_entropy': Array(0.12759776, dtype=float32)}


  7%|▋         | 70289/1000000 [3:13:07<27:26:09,  9.41it/s]

{'loss': Array(0.1522213, dtype=float32), 'loss_cross_entropy': Array(0.14355369, dtype=float32)}


  7%|▋         | 70299/1000000 [3:13:09<24:54:23, 10.37it/s]

{'loss': Array(0.15166426, dtype=float32), 'loss_cross_entropy': Array(0.14169376, dtype=float32)}


  7%|▋         | 70309/1000000 [3:13:10<34:41:34,  7.44it/s]

{'loss': Array(0.13883096, dtype=float32), 'loss_cross_entropy': Array(0.12959345, dtype=float32)}


  7%|▋         | 70319/1000000 [3:13:12<26:04:25,  9.90it/s]

{'loss': Array(0.1546392, dtype=float32), 'loss_cross_entropy': Array(0.1456064, dtype=float32)}


  7%|▋         | 70329/1000000 [3:13:14<38:06:40,  6.78it/s]

{'loss': Array(0.17989267, dtype=float32), 'loss_cross_entropy': Array(0.16937204, dtype=float32)}


  7%|▋         | 70339/1000000 [3:13:15<26:18:30,  9.82it/s]

{'loss': Array(0.14851587, dtype=float32), 'loss_cross_entropy': Array(0.13935241, dtype=float32)}


  7%|▋         | 70349/1000000 [3:13:16<24:27:35, 10.56it/s]

{'loss': Array(0.1619051, dtype=float32), 'loss_cross_entropy': Array(0.15218817, dtype=float32)}


  7%|▋         | 70359/1000000 [3:13:18<27:30:23,  9.39it/s]

{'loss': Array(0.16877185, dtype=float32), 'loss_cross_entropy': Array(0.15890603, dtype=float32)}


  7%|▋         | 70369/1000000 [3:13:19<24:25:10, 10.57it/s]

{'loss': Array(0.13702635, dtype=float32), 'loss_cross_entropy': Array(0.12808725, dtype=float32)}


  7%|▋         | 70379/1000000 [3:13:21<27:23:55,  9.42it/s]

{'loss': Array(0.14979152, dtype=float32), 'loss_cross_entropy': Array(0.13992684, dtype=float32)}


  7%|▋         | 70389/1000000 [3:13:22<25:14:26, 10.23it/s]

{'loss': Array(0.1708404, dtype=float32), 'loss_cross_entropy': Array(0.16081738, dtype=float32)}


  7%|▋         | 70399/1000000 [3:13:24<37:53:05,  6.82it/s]

{'loss': Array(0.15558757, dtype=float32), 'loss_cross_entropy': Array(0.1456887, dtype=float32)}


  7%|▋         | 70409/1000000 [3:13:25<27:04:11,  9.54it/s]

{'loss': Array(0.14841092, dtype=float32), 'loss_cross_entropy': Array(0.13909097, dtype=float32)}


  7%|▋         | 70419/1000000 [3:13:27<24:12:23, 10.67it/s]

{'loss': Array(0.15831845, dtype=float32), 'loss_cross_entropy': Array(0.148607, dtype=float32)}


  7%|▋         | 70429/1000000 [3:13:29<31:32:16,  8.19it/s]

{'loss': Array(0.15506612, dtype=float32), 'loss_cross_entropy': Array(0.14546324, dtype=float32)}


  7%|▋         | 70439/1000000 [3:13:30<25:36:06, 10.09it/s]

{'loss': Array(0.16323395, dtype=float32), 'loss_cross_entropy': Array(0.15374632, dtype=float32)}


  7%|▋         | 70449/1000000 [3:13:32<38:10:09,  6.76it/s]

{'loss': Array(0.15142156, dtype=float32), 'loss_cross_entropy': Array(0.14232656, dtype=float32)}


  7%|▋         | 70459/1000000 [3:13:33<26:05:19,  9.90it/s]

{'loss': Array(0.15593283, dtype=float32), 'loss_cross_entropy': Array(0.14632261, dtype=float32)}


  7%|▋         | 70469/1000000 [3:13:34<24:19:21, 10.62it/s]

{'loss': Array(0.1665793, dtype=float32), 'loss_cross_entropy': Array(0.15624547, dtype=float32)}


  7%|▋         | 70479/1000000 [3:13:36<28:40:32,  9.00it/s]

{'loss': Array(0.17169182, dtype=float32), 'loss_cross_entropy': Array(0.16154206, dtype=float32)}


  7%|▋         | 70489/1000000 [3:13:38<25:42:43, 10.04it/s]

{'loss': Array(0.15738595, dtype=float32), 'loss_cross_entropy': Array(0.14774472, dtype=float32)}


  7%|▋         | 70499/1000000 [3:13:39<34:21:29,  7.51it/s]

{'loss': Array(0.1513515, dtype=float32), 'loss_cross_entropy': Array(0.14160517, dtype=float32)}


  7%|▋         | 70509/1000000 [3:13:47<85:01:18,  3.04it/s] 

{'loss': Array(0.15164608, dtype=float32), 'loss_cross_entropy': Array(0.14231521, dtype=float32)}


  7%|▋         | 70519/1000000 [3:13:49<44:32:10,  5.80it/s]

{'loss': Array(0.15807904, dtype=float32), 'loss_cross_entropy': Array(0.14792918, dtype=float32)}


  7%|▋         | 70529/1000000 [3:13:50<27:17:49,  9.46it/s]

{'loss': Array(0.15049879, dtype=float32), 'loss_cross_entropy': Array(0.14186318, dtype=float32)}


  7%|▋         | 70539/1000000 [3:13:51<24:23:42, 10.58it/s]

{'loss': Array(0.17051034, dtype=float32), 'loss_cross_entropy': Array(0.15989523, dtype=float32)}


  7%|▋         | 70549/1000000 [3:13:53<30:44:48,  8.40it/s]

{'loss': Array(0.17185552, dtype=float32), 'loss_cross_entropy': Array(0.16186248, dtype=float32)}


  7%|▋         | 70559/1000000 [3:13:55<25:07:08, 10.28it/s]

{'loss': Array(0.13981156, dtype=float32), 'loss_cross_entropy': Array(0.13185431, dtype=float32)}


  7%|▋         | 70569/1000000 [3:13:56<24:05:22, 10.72it/s]

{'loss': Array(0.15011506, dtype=float32), 'loss_cross_entropy': Array(0.14054763, dtype=float32)}


  7%|▋         | 70579/1000000 [3:13:58<26:56:28,  9.58it/s]

{'loss': Array(0.1634331, dtype=float32), 'loss_cross_entropy': Array(0.15324657, dtype=float32)}


  7%|▋         | 70589/1000000 [3:13:59<26:08:45,  9.87it/s]

{'loss': Array(0.14623277, dtype=float32), 'loss_cross_entropy': Array(0.13759758, dtype=float32)}


  7%|▋         | 70599/1000000 [3:14:01<32:27:19,  7.95it/s]

{'loss': Array(0.16057563, dtype=float32), 'loss_cross_entropy': Array(0.15136847, dtype=float32)}


  7%|▋         | 70609/1000000 [3:14:02<26:46:19,  9.64it/s]

{'loss': Array(0.14197998, dtype=float32), 'loss_cross_entropy': Array(0.13369405, dtype=float32)}


  7%|▋         | 70619/1000000 [3:14:04<34:50:04,  7.41it/s]

{'loss': Array(0.15679954, dtype=float32), 'loss_cross_entropy': Array(0.1473711, dtype=float32)}


  7%|▋         | 70629/1000000 [3:14:05<27:05:04,  9.53it/s]

{'loss': Array(0.16001748, dtype=float32), 'loss_cross_entropy': Array(0.15124655, dtype=float32)}


  7%|▋         | 70639/1000000 [3:14:07<24:39:57, 10.47it/s]

{'loss': Array(0.15007345, dtype=float32), 'loss_cross_entropy': Array(0.14084768, dtype=float32)}


  7%|▋         | 70649/1000000 [3:14:08<30:50:36,  8.37it/s]

{'loss': Array(0.14255202, dtype=float32), 'loss_cross_entropy': Array(0.13374403, dtype=float32)}


  7%|▋         | 70659/1000000 [3:14:10<24:57:08, 10.35it/s]

{'loss': Array(0.13215603, dtype=float32), 'loss_cross_entropy': Array(0.1232734, dtype=float32)}


  7%|▋         | 70669/1000000 [3:14:11<23:52:34, 10.81it/s]

{'loss': Array(0.16373669, dtype=float32), 'loss_cross_entropy': Array(0.15406542, dtype=float32)}


  7%|▋         | 70679/1000000 [3:14:13<27:03:44,  9.54it/s]

{'loss': Array(0.14563519, dtype=float32), 'loss_cross_entropy': Array(0.13719635, dtype=float32)}


  7%|▋         | 70689/1000000 [3:14:14<25:03:50, 10.30it/s]

{'loss': Array(0.15956554, dtype=float32), 'loss_cross_entropy': Array(0.15051548, dtype=float32)}


  7%|▋         | 70699/1000000 [3:14:16<30:42:27,  8.41it/s]

{'loss': Array(0.14947644, dtype=float32), 'loss_cross_entropy': Array(0.14005952, dtype=float32)}


  7%|▋         | 70709/1000000 [3:14:17<25:11:36, 10.25it/s]

{'loss': Array(0.16754015, dtype=float32), 'loss_cross_entropy': Array(0.15825409, dtype=float32)}


  7%|▋         | 70719/1000000 [3:14:19<38:03:50,  6.78it/s]

{'loss': Array(0.15847583, dtype=float32), 'loss_cross_entropy': Array(0.14888883, dtype=float32)}


  7%|▋         | 70729/1000000 [3:14:20<26:13:54,  9.84it/s]

{'loss': Array(0.15762226, dtype=float32), 'loss_cross_entropy': Array(0.1479326, dtype=float32)}


  7%|▋         | 70739/1000000 [3:14:22<24:27:25, 10.55it/s]

{'loss': Array(0.15768173, dtype=float32), 'loss_cross_entropy': Array(0.14829168, dtype=float32)}


  7%|▋         | 70749/1000000 [3:14:24<34:04:23,  7.58it/s]

{'loss': Array(0.14922044, dtype=float32), 'loss_cross_entropy': Array(0.14028473, dtype=float32)}


  7%|▋         | 70759/1000000 [3:14:25<26:15:46,  9.83it/s]

{'loss': Array(0.16494666, dtype=float32), 'loss_cross_entropy': Array(0.1555018, dtype=float32)}


  7%|▋         | 70769/1000000 [3:14:26<24:05:24, 10.71it/s]

{'loss': Array(0.14652972, dtype=float32), 'loss_cross_entropy': Array(0.1371197, dtype=float32)}


  7%|▋         | 70779/1000000 [3:14:28<28:06:27,  9.18it/s]

{'loss': Array(0.15512496, dtype=float32), 'loss_cross_entropy': Array(0.14563516, dtype=float32)}


  7%|▋         | 70789/1000000 [3:14:29<25:01:03, 10.32it/s]

{'loss': Array(0.1515082, dtype=float32), 'loss_cross_entropy': Array(0.14188522, dtype=float32)}


  7%|▋         | 70799/1000000 [3:14:31<31:12:10,  8.27it/s]

{'loss': Array(0.1421762, dtype=float32), 'loss_cross_entropy': Array(0.13299601, dtype=float32)}


  7%|▋         | 70809/1000000 [3:14:32<25:10:07, 10.26it/s]

{'loss': Array(0.1432148, dtype=float32), 'loss_cross_entropy': Array(0.13405675, dtype=float32)}


  7%|▋         | 70819/1000000 [3:14:34<24:41:32, 10.45it/s]

{'loss': Array(0.150761, dtype=float32), 'loss_cross_entropy': Array(0.14064784, dtype=float32)}


  7%|▋         | 70829/1000000 [3:14:36<27:38:24,  9.34it/s]

{'loss': Array(0.1509171, dtype=float32), 'loss_cross_entropy': Array(0.1421641, dtype=float32)}


  7%|▋         | 70839/1000000 [3:14:37<24:15:27, 10.64it/s]

{'loss': Array(0.12952392, dtype=float32), 'loss_cross_entropy': Array(0.12142056, dtype=float32)}


  7%|▋         | 70849/1000000 [3:14:39<33:48:16,  7.64it/s]

{'loss': Array(0.17004661, dtype=float32), 'loss_cross_entropy': Array(0.15986484, dtype=float32)}


  7%|▋         | 70859/1000000 [3:14:40<25:57:54,  9.94it/s]

{'loss': Array(0.15840748, dtype=float32), 'loss_cross_entropy': Array(0.14861527, dtype=float32)}


  7%|▋         | 70869/1000000 [3:14:41<24:36:54, 10.49it/s]

{'loss': Array(0.15974513, dtype=float32), 'loss_cross_entropy': Array(0.149935, dtype=float32)}


  7%|▋         | 70879/1000000 [3:14:43<27:52:09,  9.26it/s]

{'loss': Array(0.15504916, dtype=float32), 'loss_cross_entropy': Array(0.14599997, dtype=float32)}


  7%|▋         | 70889/1000000 [3:14:45<24:34:09, 10.50it/s]

{'loss': Array(0.17462291, dtype=float32), 'loss_cross_entropy': Array(0.16458617, dtype=float32)}


  7%|▋         | 70899/1000000 [3:14:46<28:32:27,  9.04it/s]

{'loss': Array(0.16228557, dtype=float32), 'loss_cross_entropy': Array(0.15261862, dtype=float32)}


  7%|▋         | 70909/1000000 [3:14:48<24:44:22, 10.43it/s]

{'loss': Array(0.18119499, dtype=float32), 'loss_cross_entropy': Array(0.17073712, dtype=float32)}


  7%|▋         | 70919/1000000 [3:14:49<38:06:57,  6.77it/s]

{'loss': Array(0.16837752, dtype=float32), 'loss_cross_entropy': Array(0.15817253, dtype=float32)}


  7%|▋         | 70929/1000000 [3:14:51<26:24:19,  9.77it/s]

{'loss': Array(0.16536252, dtype=float32), 'loss_cross_entropy': Array(0.1552852, dtype=float32)}


  7%|▋         | 70939/1000000 [3:14:52<24:06:50, 10.70it/s]

{'loss': Array(0.15223323, dtype=float32), 'loss_cross_entropy': Array(0.14301208, dtype=float32)}


  7%|▋         | 70949/1000000 [3:14:54<31:08:23,  8.29it/s]

{'loss': Array(0.14433736, dtype=float32), 'loss_cross_entropy': Array(0.13507245, dtype=float32)}


  7%|▋         | 70959/1000000 [3:14:55<25:16:43, 10.21it/s]

{'loss': Array(0.15615007, dtype=float32), 'loss_cross_entropy': Array(0.14632788, dtype=float32)}


  7%|▋         | 70969/1000000 [3:14:57<34:24:23,  7.50it/s]

{'loss': Array(0.16592136, dtype=float32), 'loss_cross_entropy': Array(0.15547793, dtype=float32)}


  7%|▋         | 70979/1000000 [3:14:58<25:35:28, 10.08it/s]

{'loss': Array(0.14942351, dtype=float32), 'loss_cross_entropy': Array(0.14050467, dtype=float32)}


  7%|▋         | 70989/1000000 [3:15:00<35:39:42,  7.24it/s]

{'loss': Array(0.14701831, dtype=float32), 'loss_cross_entropy': Array(0.13806477, dtype=float32)}


  7%|▋         | 70999/1000000 [3:15:02<27:21:28,  9.43it/s]

{'loss': Array(0.14846581, dtype=float32), 'loss_cross_entropy': Array(0.1397191, dtype=float32)}


  7%|▋         | 71009/1000000 [3:15:10<87:13:03,  2.96it/s] 

{'loss': Array(0.16201368, dtype=float32), 'loss_cross_entropy': Array(0.15264216, dtype=float32)}


  7%|▋         | 71019/1000000 [3:15:11<39:26:57,  6.54it/s]

{'loss': Array(0.15207605, dtype=float32), 'loss_cross_entropy': Array(0.14301167, dtype=float32)}


  7%|▋         | 71029/1000000 [3:15:13<27:23:36,  9.42it/s]

{'loss': Array(0.13888282, dtype=float32), 'loss_cross_entropy': Array(0.13024664, dtype=float32)}


  7%|▋         | 71039/1000000 [3:15:14<34:09:23,  7.55it/s]

{'loss': Array(0.16609482, dtype=float32), 'loss_cross_entropy': Array(0.15648897, dtype=float32)}


  7%|▋         | 71049/1000000 [3:15:16<25:55:10,  9.96it/s]

{'loss': Array(0.1861854, dtype=float32), 'loss_cross_entropy': Array(0.17501022, dtype=float32)}


  7%|▋         | 71059/1000000 [3:15:18<37:36:38,  6.86it/s]

{'loss': Array(0.15667327, dtype=float32), 'loss_cross_entropy': Array(0.14666688, dtype=float32)}


  7%|▋         | 71069/1000000 [3:15:19<25:42:16, 10.04it/s]

{'loss': Array(0.15170832, dtype=float32), 'loss_cross_entropy': Array(0.14251411, dtype=float32)}


  7%|▋         | 71079/1000000 [3:15:20<24:39:15, 10.47it/s]

{'loss': Array(0.14837325, dtype=float32), 'loss_cross_entropy': Array(0.13928883, dtype=float32)}


  7%|▋         | 71089/1000000 [3:15:22<31:02:29,  8.31it/s]

{'loss': Array(0.15699223, dtype=float32), 'loss_cross_entropy': Array(0.14804165, dtype=float32)}


  7%|▋         | 71099/1000000 [3:15:23<25:33:03, 10.10it/s]

{'loss': Array(0.14231083, dtype=float32), 'loss_cross_entropy': Array(0.13344005, dtype=float32)}


  7%|▋         | 71109/1000000 [3:15:25<33:59:43,  7.59it/s]

{'loss': Array(0.16233861, dtype=float32), 'loss_cross_entropy': Array(0.15253463, dtype=float32)}


  7%|▋         | 71119/1000000 [3:15:26<25:42:28, 10.04it/s]

{'loss': Array(0.17460589, dtype=float32), 'loss_cross_entropy': Array(0.16453671, dtype=float32)}


  7%|▋         | 71129/1000000 [3:15:28<23:40:45, 10.90it/s]

{'loss': Array(0.16354631, dtype=float32), 'loss_cross_entropy': Array(0.15396337, dtype=float32)}


  7%|▋         | 71139/1000000 [3:15:29<27:04:54,  9.53it/s]

{'loss': Array(0.16358154, dtype=float32), 'loss_cross_entropy': Array(0.15389955, dtype=float32)}


  7%|▋         | 71149/1000000 [3:15:31<25:07:24, 10.27it/s]

{'loss': Array(0.1353268, dtype=float32), 'loss_cross_entropy': Array(0.12664616, dtype=float32)}


  7%|▋         | 71159/1000000 [3:15:33<38:23:29,  6.72it/s]

{'loss': Array(0.16717674, dtype=float32), 'loss_cross_entropy': Array(0.15720022, dtype=float32)}


  7%|▋         | 71169/1000000 [3:15:34<26:01:02,  9.92it/s]

{'loss': Array(0.1374441, dtype=float32), 'loss_cross_entropy': Array(0.12861669, dtype=float32)}


  7%|▋         | 71179/1000000 [3:15:35<24:20:58, 10.60it/s]

{'loss': Array(0.17097734, dtype=float32), 'loss_cross_entropy': Array(0.16096845, dtype=float32)}


  7%|▋         | 71189/1000000 [3:15:37<30:17:53,  8.52it/s]

{'loss': Array(0.16015801, dtype=float32), 'loss_cross_entropy': Array(0.15068097, dtype=float32)}


  7%|▋         | 71199/1000000 [3:15:38<26:05:49,  9.89it/s]

{'loss': Array(0.13991813, dtype=float32), 'loss_cross_entropy': Array(0.13123222, dtype=float32)}


  7%|▋         | 71209/1000000 [3:15:40<33:38:05,  7.67it/s]

{'loss': Array(0.15831192, dtype=float32), 'loss_cross_entropy': Array(0.14827763, dtype=float32)}


  7%|▋         | 71219/1000000 [3:15:41<25:55:28,  9.95it/s]

{'loss': Array(0.17267019, dtype=float32), 'loss_cross_entropy': Array(0.16240685, dtype=float32)}


  7%|▋         | 71229/1000000 [3:15:43<34:17:14,  7.52it/s]

{'loss': Array(0.14872196, dtype=float32), 'loss_cross_entropy': Array(0.1392126, dtype=float32)}


  7%|▋         | 71239/1000000 [3:15:45<25:05:54, 10.28it/s]

{'loss': Array(0.16231248, dtype=float32), 'loss_cross_entropy': Array(0.15271664, dtype=float32)}


  7%|▋         | 71249/1000000 [3:15:46<23:43:20, 10.88it/s]

{'loss': Array(0.158385, dtype=float32), 'loss_cross_entropy': Array(0.1489101, dtype=float32)}


  7%|▋         | 71259/1000000 [3:15:48<28:55:44,  8.92it/s]

{'loss': Array(0.16025862, dtype=float32), 'loss_cross_entropy': Array(0.1506949, dtype=float32)}


  7%|▋         | 71269/1000000 [3:15:49<24:40:50, 10.45it/s]

{'loss': Array(0.1526712, dtype=float32), 'loss_cross_entropy': Array(0.14373614, dtype=float32)}


  7%|▋         | 71279/1000000 [3:15:51<31:03:03,  8.31it/s]

{'loss': Array(0.17074461, dtype=float32), 'loss_cross_entropy': Array(0.1607458, dtype=float32)}


  7%|▋         | 71289/1000000 [3:15:52<25:29:39, 10.12it/s]

{'loss': Array(0.13415329, dtype=float32), 'loss_cross_entropy': Array(0.12545581, dtype=float32)}


  7%|▋         | 71299/1000000 [3:15:54<38:44:33,  6.66it/s]

{'loss': Array(0.16960524, dtype=float32), 'loss_cross_entropy': Array(0.15907882, dtype=float32)}


  7%|▋         | 71309/1000000 [3:15:55<26:21:07,  9.79it/s]

{'loss': Array(0.14569013, dtype=float32), 'loss_cross_entropy': Array(0.13657705, dtype=float32)}


  7%|▋         | 71319/1000000 [3:15:57<38:16:44,  6.74it/s]

{'loss': Array(0.14927499, dtype=float32), 'loss_cross_entropy': Array(0.14066754, dtype=float32)}


  7%|▋         | 71329/1000000 [3:15:58<26:12:42,  9.84it/s]

{'loss': Array(0.14339396, dtype=float32), 'loss_cross_entropy': Array(0.1353514, dtype=float32)}


  7%|▋         | 71339/1000000 [3:16:00<25:25:21, 10.15it/s]

{'loss': Array(0.13832699, dtype=float32), 'loss_cross_entropy': Array(0.12990725, dtype=float32)}


  7%|▋         | 71349/1000000 [3:16:01<35:18:05,  7.31it/s]

{'loss': Array(0.16299736, dtype=float32), 'loss_cross_entropy': Array(0.1533947, dtype=float32)}


  7%|▋         | 71359/1000000 [3:16:03<26:32:08,  9.72it/s]

{'loss': Array(0.13092494, dtype=float32), 'loss_cross_entropy': Array(0.12275245, dtype=float32)}


  7%|▋         | 71369/1000000 [3:16:05<39:35:02,  6.52it/s]

{'loss': Array(0.15834886, dtype=float32), 'loss_cross_entropy': Array(0.14868036, dtype=float32)}


  7%|▋         | 71379/1000000 [3:16:06<27:41:04,  9.32it/s]

{'loss': Array(0.15103279, dtype=float32), 'loss_cross_entropy': Array(0.14189486, dtype=float32)}


  7%|▋         | 71389/1000000 [3:16:07<24:51:07, 10.38it/s]

{'loss': Array(0.15927894, dtype=float32), 'loss_cross_entropy': Array(0.15011963, dtype=float32)}


  7%|▋         | 71399/1000000 [3:16:09<27:25:14,  9.41it/s]

{'loss': Array(0.15928544, dtype=float32), 'loss_cross_entropy': Array(0.1501212, dtype=float32)}


  7%|▋         | 71409/1000000 [3:16:10<24:39:10, 10.46it/s]

{'loss': Array(0.15122035, dtype=float32), 'loss_cross_entropy': Array(0.1420481, dtype=float32)}


  7%|▋         | 71419/1000000 [3:16:12<27:21:06,  9.43it/s]

{'loss': Array(0.16781148, dtype=float32), 'loss_cross_entropy': Array(0.1582421, dtype=float32)}


  7%|▋         | 71429/1000000 [3:16:14<24:22:01, 10.59it/s]

{'loss': Array(0.16390662, dtype=float32), 'loss_cross_entropy': Array(0.1549881, dtype=float32)}


  7%|▋         | 71439/1000000 [3:16:15<38:03:06,  6.78it/s]

{'loss': Array(0.12662984, dtype=float32), 'loss_cross_entropy': Array(0.11886578, dtype=float32)}


  7%|▋         | 71449/1000000 [3:16:17<25:51:07,  9.98it/s]

{'loss': Array(0.16006856, dtype=float32), 'loss_cross_entropy': Array(0.14995964, dtype=float32)}


  7%|▋         | 71459/1000000 [3:16:18<24:31:54, 10.51it/s]

{'loss': Array(0.16030766, dtype=float32), 'loss_cross_entropy': Array(0.15030967, dtype=float32)}


  7%|▋         | 71469/1000000 [3:16:20<30:26:21,  8.47it/s]

{'loss': Array(0.16010803, dtype=float32), 'loss_cross_entropy': Array(0.15125771, dtype=float32)}


  7%|▋         | 71479/1000000 [3:16:21<24:41:23, 10.45it/s]

{'loss': Array(0.14662029, dtype=float32), 'loss_cross_entropy': Array(0.13792746, dtype=float32)}


  7%|▋         | 71489/1000000 [3:16:23<38:06:39,  6.77it/s]

{'loss': Array(0.16641521, dtype=float32), 'loss_cross_entropy': Array(0.15616913, dtype=float32)}


  7%|▋         | 71499/1000000 [3:16:24<26:17:22,  9.81it/s]

{'loss': Array(0.16808236, dtype=float32), 'loss_cross_entropy': Array(0.15874143, dtype=float32)}


  7%|▋         | 71509/1000000 [3:16:32<83:51:09,  3.08it/s] 

{'loss': Array(0.16951975, dtype=float32), 'loss_cross_entropy': Array(0.15947096, dtype=float32)}


  7%|▋         | 71519/1000000 [3:16:34<38:58:05,  6.62it/s]

{'loss': Array(0.1641383, dtype=float32), 'loss_cross_entropy': Array(0.15364741, dtype=float32)}


  7%|▋         | 71529/1000000 [3:16:35<26:22:47,  9.78it/s]

{'loss': Array(0.14369307, dtype=float32), 'loss_cross_entropy': Array(0.13504921, dtype=float32)}


  7%|▋         | 71539/1000000 [3:16:37<31:11:40,  8.27it/s]

{'loss': Array(0.14847146, dtype=float32), 'loss_cross_entropy': Array(0.13920169, dtype=float32)}


  7%|▋         | 71549/1000000 [3:16:38<25:30:46, 10.11it/s]

{'loss': Array(0.16306198, dtype=float32), 'loss_cross_entropy': Array(0.15332925, dtype=float32)}


  7%|▋         | 71559/1000000 [3:16:40<37:59:10,  6.79it/s]

{'loss': Array(0.1512886, dtype=float32), 'loss_cross_entropy': Array(0.14221509, dtype=float32)}


  7%|▋         | 71569/1000000 [3:16:41<26:13:44,  9.83it/s]

{'loss': Array(0.15084547, dtype=float32), 'loss_cross_entropy': Array(0.14143549, dtype=float32)}


  7%|▋         | 71579/1000000 [3:16:42<24:47:41, 10.40it/s]

{'loss': Array(0.1676912, dtype=float32), 'loss_cross_entropy': Array(0.15777443, dtype=float32)}


  7%|▋         | 71589/1000000 [3:16:44<27:01:04,  9.55it/s]

{'loss': Array(0.15576561, dtype=float32), 'loss_cross_entropy': Array(0.14666341, dtype=float32)}


  7%|▋         | 71599/1000000 [3:16:46<24:32:36, 10.51it/s]

{'loss': Array(0.17346548, dtype=float32), 'loss_cross_entropy': Array(0.16365413, dtype=float32)}


  7%|▋         | 71609/1000000 [3:16:47<37:54:08,  6.80it/s]

{'loss': Array(0.14302179, dtype=float32), 'loss_cross_entropy': Array(0.13471706, dtype=float32)}


  7%|▋         | 71619/1000000 [3:16:49<26:18:43,  9.80it/s]

{'loss': Array(0.15921013, dtype=float32), 'loss_cross_entropy': Array(0.1490349, dtype=float32)}


  7%|▋         | 71629/1000000 [3:16:50<24:39:36, 10.46it/s]

{'loss': Array(0.15216349, dtype=float32), 'loss_cross_entropy': Array(0.14277628, dtype=float32)}


  7%|▋         | 71639/1000000 [3:16:52<28:37:30,  9.01it/s]

{'loss': Array(0.16176204, dtype=float32), 'loss_cross_entropy': Array(0.15211849, dtype=float32)}


  7%|▋         | 71649/1000000 [3:16:53<24:20:40, 10.59it/s]

{'loss': Array(0.16635385, dtype=float32), 'loss_cross_entropy': Array(0.15671, dtype=float32)}


  7%|▋         | 71659/1000000 [3:16:55<33:19:40,  7.74it/s]

{'loss': Array(0.1445694, dtype=float32), 'loss_cross_entropy': Array(0.13535978, dtype=float32)}


  7%|▋         | 71669/1000000 [3:16:56<25:20:01, 10.18it/s]

{'loss': Array(0.15395427, dtype=float32), 'loss_cross_entropy': Array(0.14443915, dtype=float32)}


  7%|▋         | 71679/1000000 [3:16:58<37:36:12,  6.86it/s]

{'loss': Array(0.15091297, dtype=float32), 'loss_cross_entropy': Array(0.14162238, dtype=float32)}


  7%|▋         | 71689/1000000 [3:16:59<27:10:20,  9.49it/s]

{'loss': Array(0.15642394, dtype=float32), 'loss_cross_entropy': Array(0.14728527, dtype=float32)}


  7%|▋         | 71699/1000000 [3:17:01<25:52:29,  9.97it/s]

{'loss': Array(0.1453357, dtype=float32), 'loss_cross_entropy': Array(0.13600242, dtype=float32)}


  7%|▋         | 71709/1000000 [3:17:02<32:39:22,  7.90it/s]

{'loss': Array(0.1645498, dtype=float32), 'loss_cross_entropy': Array(0.15483126, dtype=float32)}


  7%|▋         | 71719/1000000 [3:17:04<26:49:40,  9.61it/s]

{'loss': Array(0.1475613, dtype=float32), 'loss_cross_entropy': Array(0.13799924, dtype=float32)}


  7%|▋         | 71729/1000000 [3:17:06<35:58:54,  7.17it/s]

{'loss': Array(0.14235377, dtype=float32), 'loss_cross_entropy': Array(0.13383788, dtype=float32)}


  7%|▋         | 71739/1000000 [3:17:07<26:21:22,  9.78it/s]

{'loss': Array(0.15749846, dtype=float32), 'loss_cross_entropy': Array(0.14820307, dtype=float32)}


  7%|▋         | 71749/1000000 [3:17:08<24:03:56, 10.71it/s]

{'loss': Array(0.13824156, dtype=float32), 'loss_cross_entropy': Array(0.12950899, dtype=float32)}


  7%|▋         | 71759/1000000 [3:17:10<27:28:57,  9.38it/s]

{'loss': Array(0.1542224, dtype=float32), 'loss_cross_entropy': Array(0.14469571, dtype=float32)}


  7%|▋         | 71769/1000000 [3:17:11<24:45:39, 10.41it/s]

{'loss': Array(0.15988252, dtype=float32), 'loss_cross_entropy': Array(0.15056269, dtype=float32)}


  7%|▋         | 71779/1000000 [3:17:13<38:02:38,  6.78it/s]

{'loss': Array(0.1540979, dtype=float32), 'loss_cross_entropy': Array(0.14414953, dtype=float32)}


  7%|▋         | 71789/1000000 [3:17:15<26:22:01,  9.78it/s]

{'loss': Array(0.14881036, dtype=float32), 'loss_cross_entropy': Array(0.13898015, dtype=float32)}


  7%|▋         | 71799/1000000 [3:17:16<24:22:06, 10.58it/s]

{'loss': Array(0.14505693, dtype=float32), 'loss_cross_entropy': Array(0.13645917, dtype=float32)}


  7%|▋         | 71809/1000000 [3:17:18<30:56:10,  8.33it/s]

{'loss': Array(0.13749732, dtype=float32), 'loss_cross_entropy': Array(0.12929897, dtype=float32)}


  7%|▋         | 71819/1000000 [3:17:19<24:54:38, 10.35it/s]

{'loss': Array(0.1683116, dtype=float32), 'loss_cross_entropy': Array(0.15784688, dtype=float32)}


  7%|▋         | 71829/1000000 [3:17:21<33:26:11,  7.71it/s]

{'loss': Array(0.15809752, dtype=float32), 'loss_cross_entropy': Array(0.14917053, dtype=float32)}


  7%|▋         | 71839/1000000 [3:17:22<25:06:01, 10.27it/s]

{'loss': Array(0.1773786, dtype=float32), 'loss_cross_entropy': Array(0.16664557, dtype=float32)}


  7%|▋         | 71849/1000000 [3:17:24<33:33:05,  7.68it/s]

{'loss': Array(0.16406225, dtype=float32), 'loss_cross_entropy': Array(0.15467261, dtype=float32)}


  7%|▋         | 71859/1000000 [3:17:25<25:44:30, 10.02it/s]

{'loss': Array(0.14973736, dtype=float32), 'loss_cross_entropy': Array(0.14032279, dtype=float32)}


  7%|▋         | 71869/1000000 [3:17:26<24:32:40, 10.50it/s]

{'loss': Array(0.15928915, dtype=float32), 'loss_cross_entropy': Array(0.1493223, dtype=float32)}


  7%|▋         | 71879/1000000 [3:17:28<28:44:28,  8.97it/s]

{'loss': Array(0.15539305, dtype=float32), 'loss_cross_entropy': Array(0.1462473, dtype=float32)}


  7%|▋         | 71889/1000000 [3:17:30<24:29:04, 10.53it/s]

{'loss': Array(0.14989798, dtype=float32), 'loss_cross_entropy': Array(0.14052467, dtype=float32)}


  7%|▋         | 71899/1000000 [3:17:31<30:57:17,  8.33it/s]

{'loss': Array(0.16227143, dtype=float32), 'loss_cross_entropy': Array(0.1515852, dtype=float32)}


  7%|▋         | 71909/1000000 [3:17:33<24:51:47, 10.37it/s]

{'loss': Array(0.15936849, dtype=float32), 'loss_cross_entropy': Array(0.15009275, dtype=float32)}


  7%|▋         | 71919/1000000 [3:17:34<37:41:06,  6.84it/s]

{'loss': Array(0.17255068, dtype=float32), 'loss_cross_entropy': Array(0.1624738, dtype=float32)}


  7%|▋         | 71929/1000000 [3:17:36<26:31:57,  9.72it/s]

{'loss': Array(0.16423352, dtype=float32), 'loss_cross_entropy': Array(0.15423462, dtype=float32)}


  7%|▋         | 71939/1000000 [3:17:37<37:56:35,  6.79it/s]

{'loss': Array(0.14698051, dtype=float32), 'loss_cross_entropy': Array(0.13818997, dtype=float32)}


  7%|▋         | 71949/1000000 [3:17:39<26:15:40,  9.82it/s]

{'loss': Array(0.14883237, dtype=float32), 'loss_cross_entropy': Array(0.13941988, dtype=float32)}


  7%|▋         | 71959/1000000 [3:17:40<24:05:36, 10.70it/s]

{'loss': Array(0.15970854, dtype=float32), 'loss_cross_entropy': Array(0.15035833, dtype=float32)}


  7%|▋         | 71969/1000000 [3:17:42<33:36:52,  7.67it/s]

{'loss': Array(0.15498748, dtype=float32), 'loss_cross_entropy': Array(0.14507024, dtype=float32)}


  7%|▋         | 71979/1000000 [3:17:43<25:36:36, 10.07it/s]

{'loss': Array(0.14739665, dtype=float32), 'loss_cross_entropy': Array(0.13849925, dtype=float32)}


  7%|▋         | 71989/1000000 [3:17:45<37:47:53,  6.82it/s]

{'loss': Array(0.12981078, dtype=float32), 'loss_cross_entropy': Array(0.12156913, dtype=float32)}


  7%|▋         | 71999/1000000 [3:17:46<26:21:16,  9.78it/s]

{'loss': Array(0.16459037, dtype=float32), 'loss_cross_entropy': Array(0.15513174, dtype=float32)}


  7%|▋         | 72009/1000000 [3:17:54<85:51:31,  3.00it/s] 

{'loss': Array(0.17774886, dtype=float32), 'loss_cross_entropy': Array(0.16802572, dtype=float32)}


  7%|▋         | 72019/1000000 [3:17:56<37:15:16,  6.92it/s] 

{'loss': Array(0.15536879, dtype=float32), 'loss_cross_entropy': Array(0.14609697, dtype=float32)}


  7%|▋         | 72029/1000000 [3:17:57<26:31:20,  9.72it/s]

{'loss': Array(0.1674002, dtype=float32), 'loss_cross_entropy': Array(0.15755369, dtype=float32)}


  7%|▋         | 72039/1000000 [3:17:59<29:05:46,  8.86it/s]

{'loss': Array(0.15248445, dtype=float32), 'loss_cross_entropy': Array(0.14354266, dtype=float32)}


  7%|▋         | 72049/1000000 [3:18:00<24:45:49, 10.41it/s]

{'loss': Array(0.15336274, dtype=float32), 'loss_cross_entropy': Array(0.14413284, dtype=float32)}


  7%|▋         | 72059/1000000 [3:18:02<38:47:05,  6.65it/s]

{'loss': Array(0.16983457, dtype=float32), 'loss_cross_entropy': Array(0.15930954, dtype=float32)}


  7%|▋         | 72069/1000000 [3:18:04<26:54:26,  9.58it/s]

{'loss': Array(0.15887506, dtype=float32), 'loss_cross_entropy': Array(0.14932409, dtype=float32)}


  7%|▋         | 72079/1000000 [3:18:05<25:03:14, 10.29it/s]

{'loss': Array(0.15836804, dtype=float32), 'loss_cross_entropy': Array(0.14897649, dtype=float32)}


  7%|▋         | 72089/1000000 [3:18:07<31:47:56,  8.11it/s]

{'loss': Array(0.14600943, dtype=float32), 'loss_cross_entropy': Array(0.13708635, dtype=float32)}


  7%|▋         | 72099/1000000 [3:18:08<25:47:43,  9.99it/s]

{'loss': Array(0.16146643, dtype=float32), 'loss_cross_entropy': Array(0.15177552, dtype=float32)}


  7%|▋         | 72109/1000000 [3:18:10<38:58:11,  6.61it/s]

{'loss': Array(0.15707216, dtype=float32), 'loss_cross_entropy': Array(0.14756668, dtype=float32)}


  7%|▋         | 72119/1000000 [3:18:11<27:26:13,  9.39it/s]

{'loss': Array(0.14676967, dtype=float32), 'loss_cross_entropy': Array(0.13759364, dtype=float32)}


  7%|▋         | 72129/1000000 [3:18:13<25:51:42,  9.97it/s]

{'loss': Array(0.14587802, dtype=float32), 'loss_cross_entropy': Array(0.1369759, dtype=float32)}


  7%|▋         | 72139/1000000 [3:18:15<29:09:19,  8.84it/s]

{'loss': Array(0.15562049, dtype=float32), 'loss_cross_entropy': Array(0.14615671, dtype=float32)}


  7%|▋         | 72149/1000000 [3:18:16<24:48:33, 10.39it/s]

{'loss': Array(0.14927718, dtype=float32), 'loss_cross_entropy': Array(0.14028545, dtype=float32)}


  7%|▋         | 72159/1000000 [3:18:18<34:30:29,  7.47it/s]

{'loss': Array(0.14876617, dtype=float32), 'loss_cross_entropy': Array(0.1394141, dtype=float32)}


  7%|▋         | 72169/1000000 [3:18:19<25:52:29,  9.96it/s]

{'loss': Array(0.16352956, dtype=float32), 'loss_cross_entropy': Array(0.15365739, dtype=float32)}


  7%|▋         | 72179/1000000 [3:18:20<24:25:37, 10.55it/s]

{'loss': Array(0.16046906, dtype=float32), 'loss_cross_entropy': Array(0.1511103, dtype=float32)}


  7%|▋         | 72189/1000000 [3:18:22<30:48:49,  8.36it/s]

{'loss': Array(0.13669325, dtype=float32), 'loss_cross_entropy': Array(0.12841083, dtype=float32)}


  7%|▋         | 72199/1000000 [3:18:23<25:09:54, 10.24it/s]

{'loss': Array(0.14607315, dtype=float32), 'loss_cross_entropy': Array(0.13696055, dtype=float32)}


  7%|▋         | 72209/1000000 [3:18:25<23:54:54, 10.78it/s]

{'loss': Array(0.14789806, dtype=float32), 'loss_cross_entropy': Array(0.13937767, dtype=float32)}


  7%|▋         | 72219/1000000 [3:18:27<26:55:26,  9.57it/s]

{'loss': Array(0.15107243, dtype=float32), 'loss_cross_entropy': Array(0.14217918, dtype=float32)}


  7%|▋         | 72229/1000000 [3:18:28<24:27:38, 10.54it/s]

{'loss': Array(0.1583421, dtype=float32), 'loss_cross_entropy': Array(0.14900748, dtype=float32)}


  7%|▋         | 72239/1000000 [3:18:30<31:14:07,  8.25it/s]

{'loss': Array(0.15634868, dtype=float32), 'loss_cross_entropy': Array(0.14679536, dtype=float32)}


  7%|▋         | 72249/1000000 [3:18:31<24:53:54, 10.35it/s]

{'loss': Array(0.15998167, dtype=float32), 'loss_cross_entropy': Array(0.15061788, dtype=float32)}


  7%|▋         | 72259/1000000 [3:18:33<38:07:31,  6.76it/s]

{'loss': Array(0.14754413, dtype=float32), 'loss_cross_entropy': Array(0.13880609, dtype=float32)}


  7%|▋         | 72269/1000000 [3:18:34<26:52:40,  9.59it/s]

{'loss': Array(0.15557878, dtype=float32), 'loss_cross_entropy': Array(0.14591649, dtype=float32)}


  7%|▋         | 72279/1000000 [3:18:36<38:26:33,  6.70it/s]

{'loss': Array(0.15614483, dtype=float32), 'loss_cross_entropy': Array(0.14732362, dtype=float32)}


  7%|▋         | 72289/1000000 [3:18:37<26:27:24,  9.74it/s]

{'loss': Array(0.15117903, dtype=float32), 'loss_cross_entropy': Array(0.14222765, dtype=float32)}


  7%|▋         | 72299/1000000 [3:18:39<24:26:33, 10.54it/s]

{'loss': Array(0.15067819, dtype=float32), 'loss_cross_entropy': Array(0.14151484, dtype=float32)}


  7%|▋         | 72309/1000000 [3:18:40<33:22:12,  7.72it/s]

{'loss': Array(0.14269312, dtype=float32), 'loss_cross_entropy': Array(0.13415296, dtype=float32)}


  7%|▋         | 72319/1000000 [3:18:42<25:14:40, 10.21it/s]

{'loss': Array(0.15176128, dtype=float32), 'loss_cross_entropy': Array(0.14310695, dtype=float32)}


  7%|▋         | 72329/1000000 [3:18:43<38:37:45,  6.67it/s]

{'loss': Array(0.16017103, dtype=float32), 'loss_cross_entropy': Array(0.15049444, dtype=float32)}


  7%|▋         | 72339/1000000 [3:18:45<26:07:46,  9.86it/s]

{'loss': Array(0.14255725, dtype=float32), 'loss_cross_entropy': Array(0.13451093, dtype=float32)}


  7%|▋         | 72349/1000000 [3:18:46<24:01:41, 10.72it/s]

{'loss': Array(0.15954576, dtype=float32), 'loss_cross_entropy': Array(0.1502747, dtype=float32)}


  7%|▋         | 72359/1000000 [3:18:48<26:43:40,  9.64it/s]

{'loss': Array(0.13672312, dtype=float32), 'loss_cross_entropy': Array(0.12851809, dtype=float32)}


  7%|▋         | 72369/1000000 [3:18:49<24:50:42, 10.37it/s]

{'loss': Array(0.14775796, dtype=float32), 'loss_cross_entropy': Array(0.13872683, dtype=float32)}


  7%|▋         | 72379/1000000 [3:18:51<27:30:39,  9.37it/s]

{'loss': Array(0.14090869, dtype=float32), 'loss_cross_entropy': Array(0.1326229, dtype=float32)}


  7%|▋         | 72389/1000000 [3:18:52<24:52:40, 10.36it/s]

{'loss': Array(0.16133985, dtype=float32), 'loss_cross_entropy': Array(0.15210211, dtype=float32)}


  7%|▋         | 72399/1000000 [3:18:54<38:03:10,  6.77it/s]

{'loss': Array(0.15293609, dtype=float32), 'loss_cross_entropy': Array(0.14370474, dtype=float32)}


  7%|▋         | 72409/1000000 [3:18:55<26:29:25,  9.73it/s]

{'loss': Array(0.16092663, dtype=float32), 'loss_cross_entropy': Array(0.15133749, dtype=float32)}


  7%|▋         | 72419/1000000 [3:18:57<24:23:04, 10.57it/s]

{'loss': Array(0.1521447, dtype=float32), 'loss_cross_entropy': Array(0.14312151, dtype=float32)}


  7%|▋         | 72429/1000000 [3:18:58<31:49:53,  8.09it/s]

{'loss': Array(0.1334501, dtype=float32), 'loss_cross_entropy': Array(0.12482681, dtype=float32)}


  7%|▋         | 72439/1000000 [3:19:00<26:34:56,  9.69it/s]

{'loss': Array(0.16384368, dtype=float32), 'loss_cross_entropy': Array(0.15427604, dtype=float32)}


  7%|▋         | 72449/1000000 [3:19:02<39:52:43,  6.46it/s]

{'loss': Array(0.1600053, dtype=float32), 'loss_cross_entropy': Array(0.15011826, dtype=float32)}


  7%|▋         | 72459/1000000 [3:19:03<28:03:06,  9.18it/s]

{'loss': Array(0.13560803, dtype=float32), 'loss_cross_entropy': Array(0.12706867, dtype=float32)}


  7%|▋         | 72469/1000000 [3:19:05<25:51:47,  9.96it/s]

{'loss': Array(0.14280751, dtype=float32), 'loss_cross_entropy': Array(0.13366754, dtype=float32)}


  7%|▋         | 72479/1000000 [3:19:06<30:36:44,  8.42it/s]

{'loss': Array(0.16900223, dtype=float32), 'loss_cross_entropy': Array(0.15919998, dtype=float32)}


  7%|▋         | 72489/1000000 [3:19:08<26:19:20,  9.79it/s]

{'loss': Array(0.13361786, dtype=float32), 'loss_cross_entropy': Array(0.12551004, dtype=float32)}


  7%|▋         | 72499/1000000 [3:19:10<34:03:13,  7.57it/s]

{'loss': Array(0.17047095, dtype=float32), 'loss_cross_entropy': Array(0.16010009, dtype=float32)}


  7%|▋         | 72509/1000000 [3:19:17<85:18:45,  3.02it/s] 

{'loss': Array(0.15533505, dtype=float32), 'loss_cross_entropy': Array(0.14531173, dtype=float32)}


  7%|▋         | 72519/1000000 [3:19:19<44:12:57,  5.83it/s]

{'loss': Array(0.15925205, dtype=float32), 'loss_cross_entropy': Array(0.14946924, dtype=float32)}


  7%|▋         | 72529/1000000 [3:19:20<27:28:26,  9.38it/s]

{'loss': Array(0.15538438, dtype=float32), 'loss_cross_entropy': Array(0.1461954, dtype=float32)}


  7%|▋         | 72539/1000000 [3:19:22<24:15:55, 10.62it/s]

{'loss': Array(0.14292948, dtype=float32), 'loss_cross_entropy': Array(0.1339999, dtype=float32)}


  7%|▋         | 72549/1000000 [3:19:23<30:38:09,  8.41it/s]

{'loss': Array(0.15288332, dtype=float32), 'loss_cross_entropy': Array(0.14342895, dtype=float32)}


  7%|▋         | 72559/1000000 [3:19:25<24:44:10, 10.41it/s]

{'loss': Array(0.14427224, dtype=float32), 'loss_cross_entropy': Array(0.13554473, dtype=float32)}


  7%|▋         | 72569/1000000 [3:19:26<24:14:08, 10.63it/s]

{'loss': Array(0.15287556, dtype=float32), 'loss_cross_entropy': Array(0.14338343, dtype=float32)}


  7%|▋         | 72579/1000000 [3:19:28<27:02:02,  9.53it/s]

{'loss': Array(0.1621485, dtype=float32), 'loss_cross_entropy': Array(0.15215708, dtype=float32)}


  7%|▋         | 72589/1000000 [3:19:29<24:30:12, 10.51it/s]

{'loss': Array(0.15264454, dtype=float32), 'loss_cross_entropy': Array(0.14310661, dtype=float32)}


  7%|▋         | 72599/1000000 [3:19:31<30:51:48,  8.35it/s]

{'loss': Array(0.14396484, dtype=float32), 'loss_cross_entropy': Array(0.13454092, dtype=float32)}


  7%|▋         | 72609/1000000 [3:19:32<24:39:11, 10.45it/s]

{'loss': Array(0.14016387, dtype=float32), 'loss_cross_entropy': Array(0.13101141, dtype=float32)}


  7%|▋         | 72619/1000000 [3:19:34<34:00:33,  7.57it/s]

{'loss': Array(0.14240451, dtype=float32), 'loss_cross_entropy': Array(0.13378148, dtype=float32)}


  7%|▋         | 72629/1000000 [3:19:35<25:24:09, 10.14it/s]

{'loss': Array(0.1607149, dtype=float32), 'loss_cross_entropy': Array(0.15138078, dtype=float32)}


  7%|▋         | 72639/1000000 [3:19:37<24:16:30, 10.61it/s]

{'loss': Array(0.1317723, dtype=float32), 'loss_cross_entropy': Array(0.1233158, dtype=float32)}


  7%|▋         | 72649/1000000 [3:19:39<31:20:07,  8.22it/s]

{'loss': Array(0.14068888, dtype=float32), 'loss_cross_entropy': Array(0.13186035, dtype=float32)}


  7%|▋         | 72659/1000000 [3:19:40<26:02:24,  9.89it/s]

{'loss': Array(0.15489441, dtype=float32), 'loss_cross_entropy': Array(0.14511304, dtype=float32)}


  7%|▋         | 72669/1000000 [3:19:41<24:10:00, 10.66it/s]

{'loss': Array(0.1486341, dtype=float32), 'loss_cross_entropy': Array(0.13865148, dtype=float32)}


  7%|▋         | 72679/1000000 [3:19:43<27:13:14,  9.46it/s]

{'loss': Array(0.15494457, dtype=float32), 'loss_cross_entropy': Array(0.14555646, dtype=float32)}


  7%|▋         | 72689/1000000 [3:19:44<24:39:49, 10.44it/s]

{'loss': Array(0.14671125, dtype=float32), 'loss_cross_entropy': Array(0.13784133, dtype=float32)}


  7%|▋         | 72699/1000000 [3:19:46<31:32:00,  8.17it/s]

{'loss': Array(0.1651783, dtype=float32), 'loss_cross_entropy': Array(0.15555082, dtype=float32)}


  7%|▋         | 72709/1000000 [3:19:47<25:02:31, 10.29it/s]

{'loss': Array(0.13842924, dtype=float32), 'loss_cross_entropy': Array(0.13024521, dtype=float32)}


  7%|▋         | 72719/1000000 [3:19:49<38:03:00,  6.77it/s]

{'loss': Array(0.15257137, dtype=float32), 'loss_cross_entropy': Array(0.14361888, dtype=float32)}


  7%|▋         | 72729/1000000 [3:19:50<26:31:57,  9.71it/s]

{'loss': Array(0.14705434, dtype=float32), 'loss_cross_entropy': Array(0.13806653, dtype=float32)}


  7%|▋         | 72739/1000000 [3:19:52<23:49:01, 10.81it/s]

{'loss': Array(0.12519518, dtype=float32), 'loss_cross_entropy': Array(0.11715841, dtype=float32)}


  7%|▋         | 72749/1000000 [3:19:54<33:30:12,  7.69it/s]

{'loss': Array(0.15121135, dtype=float32), 'loss_cross_entropy': Array(0.14250676, dtype=float32)}


  7%|▋         | 72759/1000000 [3:19:55<25:29:59, 10.10it/s]

{'loss': Array(0.1502199, dtype=float32), 'loss_cross_entropy': Array(0.1415992, dtype=float32)}


  7%|▋         | 72769/1000000 [3:19:56<23:43:19, 10.86it/s]

{'loss': Array(0.1436458, dtype=float32), 'loss_cross_entropy': Array(0.13459352, dtype=float32)}


  7%|▋         | 72779/1000000 [3:19:58<28:10:26,  9.14it/s]

{'loss': Array(0.15881146, dtype=float32), 'loss_cross_entropy': Array(0.14964828, dtype=float32)}


  7%|▋         | 72789/1000000 [3:19:59<24:57:41, 10.32it/s]

{'loss': Array(0.1526787, dtype=float32), 'loss_cross_entropy': Array(0.14303774, dtype=float32)}


  7%|▋         | 72799/1000000 [3:20:01<32:12:21,  8.00it/s]

{'loss': Array(0.14199458, dtype=float32), 'loss_cross_entropy': Array(0.13329868, dtype=float32)}


  7%|▋         | 72809/1000000 [3:20:02<25:44:25, 10.01it/s]

{'loss': Array(0.16111518, dtype=float32), 'loss_cross_entropy': Array(0.15162352, dtype=float32)}


  7%|▋         | 72819/1000000 [3:20:04<25:25:12, 10.13it/s]

{'loss': Array(0.16052477, dtype=float32), 'loss_cross_entropy': Array(0.15107985, dtype=float32)}


  7%|▋         | 72829/1000000 [3:20:06<28:45:44,  8.95it/s]

{'loss': Array(0.16071501, dtype=float32), 'loss_cross_entropy': Array(0.15142004, dtype=float32)}


  7%|▋         | 72839/1000000 [3:20:07<26:11:22,  9.83it/s]

{'loss': Array(0.15670471, dtype=float32), 'loss_cross_entropy': Array(0.14738724, dtype=float32)}


  7%|▋         | 72849/1000000 [3:20:09<34:11:38,  7.53it/s]

{'loss': Array(0.13157639, dtype=float32), 'loss_cross_entropy': Array(0.12370073, dtype=float32)}


  7%|▋         | 72859/1000000 [3:20:10<25:31:41, 10.09it/s]

{'loss': Array(0.14473896, dtype=float32), 'loss_cross_entropy': Array(0.13578664, dtype=float32)}


  7%|▋         | 72869/1000000 [3:20:12<23:52:57, 10.78it/s]

{'loss': Array(0.16052479, dtype=float32), 'loss_cross_entropy': Array(0.15113811, dtype=float32)}


  7%|▋         | 72879/1000000 [3:20:13<27:16:05,  9.44it/s]

{'loss': Array(0.1643663, dtype=float32), 'loss_cross_entropy': Array(0.15416308, dtype=float32)}


  7%|▋         | 72889/1000000 [3:20:15<25:03:09, 10.28it/s]

{'loss': Array(0.16685914, dtype=float32), 'loss_cross_entropy': Array(0.15661497, dtype=float32)}


  7%|▋         | 72899/1000000 [3:20:16<29:04:55,  8.86it/s]

{'loss': Array(0.156246, dtype=float32), 'loss_cross_entropy': Array(0.14677157, dtype=float32)}


  7%|▋         | 72909/1000000 [3:20:18<25:05:42, 10.26it/s]

{'loss': Array(0.17183453, dtype=float32), 'loss_cross_entropy': Array(0.16287307, dtype=float32)}


  7%|▋         | 72919/1000000 [3:20:20<38:09:45,  6.75it/s]

{'loss': Array(0.16233596, dtype=float32), 'loss_cross_entropy': Array(0.15257211, dtype=float32)}


  7%|▋         | 72929/1000000 [3:20:21<26:00:27,  9.90it/s]

{'loss': Array(0.16032147, dtype=float32), 'loss_cross_entropy': Array(0.15043549, dtype=float32)}


  7%|▋         | 72939/1000000 [3:20:22<24:34:35, 10.48it/s]

{'loss': Array(0.16685887, dtype=float32), 'loss_cross_entropy': Array(0.15666723, dtype=float32)}


  7%|▋         | 72949/1000000 [3:20:24<31:05:15,  8.28it/s]

{'loss': Array(0.14687997, dtype=float32), 'loss_cross_entropy': Array(0.13740425, dtype=float32)}


  7%|▋         | 72959/1000000 [3:20:25<24:55:44, 10.33it/s]

{'loss': Array(0.15075223, dtype=float32), 'loss_cross_entropy': Array(0.141759, dtype=float32)}


  7%|▋         | 72969/1000000 [3:20:27<33:45:11,  7.63it/s]

{'loss': Array(0.14264157, dtype=float32), 'loss_cross_entropy': Array(0.13311158, dtype=float32)}


  7%|▋         | 72979/1000000 [3:20:28<25:57:06,  9.92it/s]

{'loss': Array(0.16380677, dtype=float32), 'loss_cross_entropy': Array(0.15333194, dtype=float32)}


  7%|▋         | 72989/1000000 [3:20:30<34:12:19,  7.53it/s]

{'loss': Array(0.16487037, dtype=float32), 'loss_cross_entropy': Array(0.15571123, dtype=float32)}


  7%|▋         | 72999/1000000 [3:20:32<26:10:46,  9.84it/s]

{'loss': Array(0.15446341, dtype=float32), 'loss_cross_entropy': Array(0.14595352, dtype=float32)}


  7%|▋         | 73009/1000000 [3:20:39<84:27:14,  3.05it/s] 

{'loss': Array(0.14380613, dtype=float32), 'loss_cross_entropy': Array(0.13486886, dtype=float32)}


  7%|▋         | 73019/1000000 [3:20:41<38:37:48,  6.67it/s]

{'loss': Array(0.15630974, dtype=float32), 'loss_cross_entropy': Array(0.14684993, dtype=float32)}


  7%|▋         | 73029/1000000 [3:20:42<26:25:23,  9.74it/s]

{'loss': Array(0.15278396, dtype=float32), 'loss_cross_entropy': Array(0.14368398, dtype=float32)}


  7%|▋         | 73039/1000000 [3:20:44<34:44:14,  7.41it/s]

{'loss': Array(0.15665968, dtype=float32), 'loss_cross_entropy': Array(0.1473367, dtype=float32)}


  7%|▋         | 73049/1000000 [3:20:46<25:37:32, 10.05it/s]

{'loss': Array(0.14665192, dtype=float32), 'loss_cross_entropy': Array(0.13746826, dtype=float32)}


  7%|▋         | 73059/1000000 [3:20:47<38:03:55,  6.76it/s]

{'loss': Array(0.16179085, dtype=float32), 'loss_cross_entropy': Array(0.15167782, dtype=float32)}


  7%|▋         | 73069/1000000 [3:20:49<25:56:50,  9.92it/s]

{'loss': Array(0.14209425, dtype=float32), 'loss_cross_entropy': Array(0.13358764, dtype=float32)}


  7%|▋         | 73079/1000000 [3:20:50<23:51:12, 10.79it/s]

{'loss': Array(0.15803769, dtype=float32), 'loss_cross_entropy': Array(0.14905235, dtype=float32)}


  7%|▋         | 73089/1000000 [3:20:52<30:40:07,  8.40it/s]

{'loss': Array(0.1619738, dtype=float32), 'loss_cross_entropy': Array(0.15274635, dtype=float32)}


  7%|▋         | 73099/1000000 [3:20:53<24:53:51, 10.34it/s]

{'loss': Array(0.1484807, dtype=float32), 'loss_cross_entropy': Array(0.13923101, dtype=float32)}


  7%|▋         | 73109/1000000 [3:20:55<33:27:18,  7.70it/s]

{'loss': Array(0.1604017, dtype=float32), 'loss_cross_entropy': Array(0.15091746, dtype=float32)}


  7%|▋         | 73119/1000000 [3:20:56<25:19:44, 10.16it/s]

{'loss': Array(0.16453631, dtype=float32), 'loss_cross_entropy': Array(0.15462618, dtype=float32)}


  7%|▋         | 73129/1000000 [3:20:57<24:22:39, 10.56it/s]

{'loss': Array(0.15895186, dtype=float32), 'loss_cross_entropy': Array(0.14906524, dtype=float32)}


  7%|▋         | 73139/1000000 [3:20:59<28:37:31,  8.99it/s]

{'loss': Array(0.1499474, dtype=float32), 'loss_cross_entropy': Array(0.14071462, dtype=float32)}


  7%|▋         | 73149/1000000 [3:21:01<25:42:44, 10.01it/s]

{'loss': Array(0.16994387, dtype=float32), 'loss_cross_entropy': Array(0.16041645, dtype=float32)}


  7%|▋         | 73159/1000000 [3:21:02<39:15:24,  6.56it/s]

{'loss': Array(0.15033814, dtype=float32), 'loss_cross_entropy': Array(0.14161925, dtype=float32)}


  7%|▋         | 73169/1000000 [3:21:04<27:48:55,  9.26it/s]

{'loss': Array(0.16343439, dtype=float32), 'loss_cross_entropy': Array(0.1535025, dtype=float32)}


  7%|▋         | 73179/1000000 [3:21:05<26:03:49,  9.88it/s]

{'loss': Array(0.1528819, dtype=float32), 'loss_cross_entropy': Array(0.14360519, dtype=float32)}


  7%|▋         | 73189/1000000 [3:21:07<32:19:02,  7.97it/s]

{'loss': Array(0.17095704, dtype=float32), 'loss_cross_entropy': Array(0.16094425, dtype=float32)}


  7%|▋         | 73199/1000000 [3:21:08<26:39:21,  9.66it/s]

{'loss': Array(0.1496933, dtype=float32), 'loss_cross_entropy': Array(0.14091888, dtype=float32)}


  7%|▋         | 73209/1000000 [3:21:10<34:38:39,  7.43it/s]

{'loss': Array(0.13904177, dtype=float32), 'loss_cross_entropy': Array(0.13047658, dtype=float32)}


  7%|▋         | 73219/1000000 [3:21:12<25:54:55,  9.93it/s]

{'loss': Array(0.14656739, dtype=float32), 'loss_cross_entropy': Array(0.13797002, dtype=float32)}


  7%|▋         | 73229/1000000 [3:21:13<33:56:33,  7.58it/s]

{'loss': Array(0.14000486, dtype=float32), 'loss_cross_entropy': Array(0.13105334, dtype=float32)}


  7%|▋         | 73239/1000000 [3:21:15<25:59:54,  9.90it/s]

{'loss': Array(0.14641844, dtype=float32), 'loss_cross_entropy': Array(0.13773313, dtype=float32)}


  7%|▋         | 73249/1000000 [3:21:16<24:04:31, 10.69it/s]

{'loss': Array(0.1547457, dtype=float32), 'loss_cross_entropy': Array(0.14595388, dtype=float32)}


  7%|▋         | 73259/1000000 [3:21:18<28:56:47,  8.89it/s]

{'loss': Array(0.15529709, dtype=float32), 'loss_cross_entropy': Array(0.14578702, dtype=float32)}


  7%|▋         | 73269/1000000 [3:21:19<25:15:28, 10.19it/s]

{'loss': Array(0.15292113, dtype=float32), 'loss_cross_entropy': Array(0.14369735, dtype=float32)}


  7%|▋         | 73279/1000000 [3:21:21<30:19:11,  8.49it/s]

{'loss': Array(0.16357148, dtype=float32), 'loss_cross_entropy': Array(0.15328535, dtype=float32)}


  7%|▋         | 73289/1000000 [3:21:22<24:26:55, 10.53it/s]

{'loss': Array(0.16005425, dtype=float32), 'loss_cross_entropy': Array(0.15032776, dtype=float32)}


  7%|▋         | 73299/1000000 [3:21:24<38:15:25,  6.73it/s]

{'loss': Array(0.15787032, dtype=float32), 'loss_cross_entropy': Array(0.14830063, dtype=float32)}


  7%|▋         | 73309/1000000 [3:21:25<26:37:13,  9.67it/s]

{'loss': Array(0.1492034, dtype=float32), 'loss_cross_entropy': Array(0.140209, dtype=float32)}


  7%|▋         | 73319/1000000 [3:21:27<37:55:55,  6.79it/s]

{'loss': Array(0.14325452, dtype=float32), 'loss_cross_entropy': Array(0.13480563, dtype=float32)}


  7%|▋         | 73329/1000000 [3:21:28<26:43:33,  9.63it/s]

{'loss': Array(0.15047826, dtype=float32), 'loss_cross_entropy': Array(0.14141001, dtype=float32)}


  7%|▋         | 73339/1000000 [3:21:30<24:20:29, 10.57it/s]

{'loss': Array(0.14468734, dtype=float32), 'loss_cross_entropy': Array(0.13566402, dtype=float32)}


  7%|▋         | 73349/1000000 [3:21:31<33:58:39,  7.58it/s]

{'loss': Array(0.15170796, dtype=float32), 'loss_cross_entropy': Array(0.14246446, dtype=float32)}


  7%|▋         | 73359/1000000 [3:21:33<25:29:36, 10.10it/s]

{'loss': Array(0.1493408, dtype=float32), 'loss_cross_entropy': Array(0.14065544, dtype=float32)}


  7%|▋         | 73369/1000000 [3:21:35<38:02:41,  6.77it/s]

{'loss': Array(0.1630923, dtype=float32), 'loss_cross_entropy': Array(0.15404914, dtype=float32)}


  7%|▋         | 73379/1000000 [3:21:36<26:13:00,  9.82it/s]

{'loss': Array(0.17617415, dtype=float32), 'loss_cross_entropy': Array(0.1665357, dtype=float32)}


  7%|▋         | 73389/1000000 [3:21:37<24:53:19, 10.34it/s]

{'loss': Array(0.15823819, dtype=float32), 'loss_cross_entropy': Array(0.148312, dtype=float32)}


  7%|▋         | 73399/1000000 [3:21:39<27:21:37,  9.41it/s]

{'loss': Array(0.14999992, dtype=float32), 'loss_cross_entropy': Array(0.14202613, dtype=float32)}


  7%|▋         | 73409/1000000 [3:21:40<24:47:51, 10.38it/s]

{'loss': Array(0.16889967, dtype=float32), 'loss_cross_entropy': Array(0.15892386, dtype=float32)}


  7%|▋         | 73419/1000000 [3:21:42<27:49:55,  9.25it/s]

{'loss': Array(0.1545702, dtype=float32), 'loss_cross_entropy': Array(0.14543499, dtype=float32)}


  7%|▋         | 73429/1000000 [3:21:44<25:04:18, 10.27it/s]

{'loss': Array(0.14750245, dtype=float32), 'loss_cross_entropy': Array(0.13790996, dtype=float32)}


  7%|▋         | 73439/1000000 [3:21:45<37:48:10,  6.81it/s]

{'loss': Array(0.1568603, dtype=float32), 'loss_cross_entropy': Array(0.1471769, dtype=float32)}


  7%|▋         | 73449/1000000 [3:21:47<26:02:02,  9.89it/s]

{'loss': Array(0.1368929, dtype=float32), 'loss_cross_entropy': Array(0.12903325, dtype=float32)}


  7%|▋         | 73459/1000000 [3:21:48<24:25:57, 10.53it/s]

{'loss': Array(0.1506254, dtype=float32), 'loss_cross_entropy': Array(0.140916, dtype=float32)}


  7%|▋         | 73469/1000000 [3:21:50<30:26:31,  8.45it/s]

{'loss': Array(0.14691903, dtype=float32), 'loss_cross_entropy': Array(0.13784873, dtype=float32)}


  7%|▋         | 73479/1000000 [3:21:51<25:01:10, 10.29it/s]

{'loss': Array(0.13628924, dtype=float32), 'loss_cross_entropy': Array(0.12792513, dtype=float32)}


  7%|▋         | 73489/1000000 [3:21:53<38:01:19,  6.77it/s]

{'loss': Array(0.16094443, dtype=float32), 'loss_cross_entropy': Array(0.15158194, dtype=float32)}


  7%|▋         | 73499/1000000 [3:21:54<25:47:52,  9.98it/s]

{'loss': Array(0.13887905, dtype=float32), 'loss_cross_entropy': Array(0.13034646, dtype=float32)}


  7%|▋         | 73509/1000000 [3:22:02<85:11:46,  3.02it/s] 

{'loss': Array(0.15832607, dtype=float32), 'loss_cross_entropy': Array(0.14877722, dtype=float32)}


  7%|▋         | 73519/1000000 [3:22:04<40:00:29,  6.43it/s]

{'loss': Array(0.1528057, dtype=float32), 'loss_cross_entropy': Array(0.14372794, dtype=float32)}


  7%|▋         | 73529/1000000 [3:22:05<27:56:01,  9.21it/s]

{'loss': Array(0.13490437, dtype=float32), 'loss_cross_entropy': Array(0.1267197, dtype=float32)}


  7%|▋         | 73539/1000000 [3:22:07<31:40:09,  8.13it/s]

{'loss': Array(0.1649537, dtype=float32), 'loss_cross_entropy': Array(0.1549077, dtype=float32)}


  7%|▋         | 73549/1000000 [3:22:08<26:47:44,  9.60it/s]

{'loss': Array(0.16661805, dtype=float32), 'loss_cross_entropy': Array(0.15678874, dtype=float32)}


  7%|▋         | 73559/1000000 [3:22:10<38:38:17,  6.66it/s]

{'loss': Array(0.16177805, dtype=float32), 'loss_cross_entropy': Array(0.15166114, dtype=float32)}


  7%|▋         | 73569/1000000 [3:22:11<26:20:01,  9.77it/s]

{'loss': Array(0.15909176, dtype=float32), 'loss_cross_entropy': Array(0.14976798, dtype=float32)}


  7%|▋         | 73579/1000000 [3:22:13<24:19:00, 10.58it/s]

{'loss': Array(0.16319819, dtype=float32), 'loss_cross_entropy': Array(0.15345308, dtype=float32)}


  7%|▋         | 73589/1000000 [3:22:15<28:00:24,  9.19it/s]

{'loss': Array(0.14255123, dtype=float32), 'loss_cross_entropy': Array(0.1338765, dtype=float32)}


  7%|▋         | 73599/1000000 [3:22:16<25:22:18, 10.14it/s]

{'loss': Array(0.1415529, dtype=float32), 'loss_cross_entropy': Array(0.13267392, dtype=float32)}


  7%|▋         | 73609/1000000 [3:22:18<38:57:08,  6.61it/s]

{'loss': Array(0.15963584, dtype=float32), 'loss_cross_entropy': Array(0.15038379, dtype=float32)}


  7%|▋         | 73619/1000000 [3:22:19<27:06:23,  9.49it/s]

{'loss': Array(0.140255, dtype=float32), 'loss_cross_entropy': Array(0.13178183, dtype=float32)}


  7%|▋         | 73629/1000000 [3:22:21<24:54:14, 10.33it/s]

{'loss': Array(0.14922962, dtype=float32), 'loss_cross_entropy': Array(0.14002162, dtype=float32)}


  7%|▋         | 73639/1000000 [3:22:22<29:20:23,  8.77it/s]

{'loss': Array(0.164286, dtype=float32), 'loss_cross_entropy': Array(0.15374874, dtype=float32)}


  7%|▋         | 73649/1000000 [3:22:24<24:56:51, 10.31it/s]

{'loss': Array(0.16131242, dtype=float32), 'loss_cross_entropy': Array(0.1515899, dtype=float32)}


  7%|▋         | 73659/1000000 [3:22:25<33:46:24,  7.62it/s]

{'loss': Array(0.163576, dtype=float32), 'loss_cross_entropy': Array(0.1546051, dtype=float32)}


  7%|▋         | 73669/1000000 [3:22:27<25:20:09, 10.16it/s]

{'loss': Array(0.13894594, dtype=float32), 'loss_cross_entropy': Array(0.13065729, dtype=float32)}


  7%|▋         | 73679/1000000 [3:22:29<38:31:21,  6.68it/s]

{'loss': Array(0.16723149, dtype=float32), 'loss_cross_entropy': Array(0.15781626, dtype=float32)}


  7%|▋         | 73689/1000000 [3:22:30<26:22:08,  9.76it/s]

{'loss': Array(0.17077613, dtype=float32), 'loss_cross_entropy': Array(0.16141869, dtype=float32)}


  7%|▋         | 73699/1000000 [3:22:31<24:04:52, 10.68it/s]

{'loss': Array(0.1460291, dtype=float32), 'loss_cross_entropy': Array(0.1375932, dtype=float32)}


  7%|▋         | 73709/1000000 [3:22:33<30:26:58,  8.45it/s]

{'loss': Array(0.14770322, dtype=float32), 'loss_cross_entropy': Array(0.13856015, dtype=float32)}


  7%|▋         | 73719/1000000 [3:22:34<24:44:43, 10.40it/s]

{'loss': Array(0.14419924, dtype=float32), 'loss_cross_entropy': Array(0.13495833, dtype=float32)}


  7%|▋         | 73729/1000000 [3:22:36<33:25:01,  7.70it/s]

{'loss': Array(0.15042478, dtype=float32), 'loss_cross_entropy': Array(0.14128374, dtype=float32)}


  7%|▋         | 73739/1000000 [3:22:37<25:33:43, 10.07it/s]

{'loss': Array(0.14251629, dtype=float32), 'loss_cross_entropy': Array(0.13350277, dtype=float32)}


  7%|▋         | 73749/1000000 [3:22:39<25:09:11, 10.23it/s]

{'loss': Array(0.1394244, dtype=float32), 'loss_cross_entropy': Array(0.1305012, dtype=float32)}


  7%|▋         | 73759/1000000 [3:22:40<27:14:54,  9.44it/s]

{'loss': Array(0.15313621, dtype=float32), 'loss_cross_entropy': Array(0.1445993, dtype=float32)}


  7%|▋         | 73769/1000000 [3:22:42<24:18:45, 10.58it/s]

{'loss': Array(0.15381095, dtype=float32), 'loss_cross_entropy': Array(0.14447205, dtype=float32)}


  7%|▋         | 73779/1000000 [3:22:44<38:17:14,  6.72it/s]

{'loss': Array(0.16747068, dtype=float32), 'loss_cross_entropy': Array(0.15770356, dtype=float32)}


  7%|▋         | 73789/1000000 [3:22:45<26:17:02,  9.79it/s]

{'loss': Array(0.1462311, dtype=float32), 'loss_cross_entropy': Array(0.13772088, dtype=float32)}


  7%|▋         | 73799/1000000 [3:22:46<24:14:09, 10.62it/s]

{'loss': Array(0.1487287, dtype=float32), 'loss_cross_entropy': Array(0.14021665, dtype=float32)}


  7%|▋         | 73809/1000000 [3:22:48<30:34:08,  8.42it/s]

{'loss': Array(0.14332008, dtype=float32), 'loss_cross_entropy': Array(0.13432772, dtype=float32)}


  7%|▋         | 73819/1000000 [3:22:49<25:26:41, 10.11it/s]

{'loss': Array(0.15734534, dtype=float32), 'loss_cross_entropy': Array(0.14768206, dtype=float32)}


  7%|▋         | 73829/1000000 [3:22:51<34:28:42,  7.46it/s]

{'loss': Array(0.15985458, dtype=float32), 'loss_cross_entropy': Array(0.15055583, dtype=float32)}


  7%|▋         | 73839/1000000 [3:22:52<25:39:51, 10.02it/s]

{'loss': Array(0.15791628, dtype=float32), 'loss_cross_entropy': Array(0.14871626, dtype=float32)}


  7%|▋         | 73849/1000000 [3:22:54<33:35:18,  7.66it/s]

{'loss': Array(0.15422264, dtype=float32), 'loss_cross_entropy': Array(0.14430976, dtype=float32)}


  7%|▋         | 73859/1000000 [3:22:56<25:31:58, 10.08it/s]

{'loss': Array(0.18136899, dtype=float32), 'loss_cross_entropy': Array(0.17089437, dtype=float32)}


  7%|▋         | 73869/1000000 [3:22:57<23:54:30, 10.76it/s]

{'loss': Array(0.13874528, dtype=float32), 'loss_cross_entropy': Array(0.13037413, dtype=float32)}


  7%|▋         | 73879/1000000 [3:22:59<28:34:42,  9.00it/s]

{'loss': Array(0.12535058, dtype=float32), 'loss_cross_entropy': Array(0.11754779, dtype=float32)}


  7%|▋         | 73889/1000000 [3:23:00<26:03:07,  9.87it/s]

{'loss': Array(0.14527081, dtype=float32), 'loss_cross_entropy': Array(0.13613501, dtype=float32)}


  7%|▋         | 73899/1000000 [3:23:02<31:09:41,  8.26it/s]

{'loss': Array(0.17336859, dtype=float32), 'loss_cross_entropy': Array(0.16318153, dtype=float32)}


  7%|▋         | 73909/1000000 [3:23:03<25:31:50, 10.08it/s]

{'loss': Array(0.16260743, dtype=float32), 'loss_cross_entropy': Array(0.15234944, dtype=float32)}


  7%|▋         | 73919/1000000 [3:23:05<38:41:52,  6.65it/s]

{'loss': Array(0.14063163, dtype=float32), 'loss_cross_entropy': Array(0.13216329, dtype=float32)}


  7%|▋         | 73929/1000000 [3:23:06<26:41:55,  9.63it/s]

{'loss': Array(0.17921428, dtype=float32), 'loss_cross_entropy': Array(0.16837612, dtype=float32)}


  7%|▋         | 73939/1000000 [3:23:08<40:14:34,  6.39it/s]

{'loss': Array(0.14522074, dtype=float32), 'loss_cross_entropy': Array(0.13596836, dtype=float32)}


  7%|▋         | 73949/1000000 [3:23:09<27:49:03,  9.25it/s]

{'loss': Array(0.16017434, dtype=float32), 'loss_cross_entropy': Array(0.15043767, dtype=float32)}


  7%|▋         | 73959/1000000 [3:23:11<24:29:50, 10.50it/s]

{'loss': Array(0.16958414, dtype=float32), 'loss_cross_entropy': Array(0.15960257, dtype=float32)}


  7%|▋         | 73969/1000000 [3:23:13<34:06:11,  7.54it/s]

{'loss': Array(0.14918633, dtype=float32), 'loss_cross_entropy': Array(0.14065234, dtype=float32)}


  7%|▋         | 73979/1000000 [3:23:14<25:36:02, 10.05it/s]

{'loss': Array(0.15653844, dtype=float32), 'loss_cross_entropy': Array(0.14748202, dtype=float32)}


  7%|▋         | 73989/1000000 [3:23:16<38:14:09,  6.73it/s]

{'loss': Array(0.1435474, dtype=float32), 'loss_cross_entropy': Array(0.13485508, dtype=float32)}


  7%|▋         | 73999/1000000 [3:23:17<25:48:49,  9.96it/s]

{'loss': Array(0.16538076, dtype=float32), 'loss_cross_entropy': Array(0.15638204, dtype=float32)}


  7%|▋         | 74009/1000000 [3:23:25<84:34:56,  3.04it/s] 

{'loss': Array(0.15110323, dtype=float32), 'loss_cross_entropy': Array(0.1418753, dtype=float32)}


  7%|▋         | 74019/1000000 [3:23:27<37:07:38,  6.93it/s] 

{'loss': Array(0.14565231, dtype=float32), 'loss_cross_entropy': Array(0.13674746, dtype=float32)}


  7%|▋         | 74029/1000000 [3:23:28<25:56:07,  9.92it/s]

{'loss': Array(0.16413002, dtype=float32), 'loss_cross_entropy': Array(0.1538917, dtype=float32)}


  7%|▋         | 74039/1000000 [3:23:30<27:08:07,  9.48it/s]

{'loss': Array(0.17405741, dtype=float32), 'loss_cross_entropy': Array(0.1640116, dtype=float32)}


  7%|▋         | 74049/1000000 [3:23:31<23:58:31, 10.73it/s]

{'loss': Array(0.1414619, dtype=float32), 'loss_cross_entropy': Array(0.13334219, dtype=float32)}


  7%|▋         | 74059/1000000 [3:23:33<37:58:29,  6.77it/s]

{'loss': Array(0.17159848, dtype=float32), 'loss_cross_entropy': Array(0.16181949, dtype=float32)}


  7%|▋         | 74069/1000000 [3:23:34<25:39:45, 10.02it/s]

{'loss': Array(0.14724447, dtype=float32), 'loss_cross_entropy': Array(0.13844478, dtype=float32)}


  7%|▋         | 74079/1000000 [3:23:35<24:03:06, 10.69it/s]

{'loss': Array(0.1443838, dtype=float32), 'loss_cross_entropy': Array(0.13547224, dtype=float32)}


  7%|▋         | 74089/1000000 [3:23:37<31:00:29,  8.29it/s]

{'loss': Array(0.14851682, dtype=float32), 'loss_cross_entropy': Array(0.1397267, dtype=float32)}


  7%|▋         | 74099/1000000 [3:23:38<25:55:51,  9.92it/s]

{'loss': Array(0.13764586, dtype=float32), 'loss_cross_entropy': Array(0.12965721, dtype=float32)}


  7%|▋         | 74109/1000000 [3:23:40<37:24:06,  6.88it/s]

{'loss': Array(0.13725607, dtype=float32), 'loss_cross_entropy': Array(0.12853245, dtype=float32)}


  7%|▋         | 74119/1000000 [3:23:41<25:51:37,  9.95it/s]

{'loss': Array(0.15370265, dtype=float32), 'loss_cross_entropy': Array(0.1443457, dtype=float32)}


  7%|▋         | 74129/1000000 [3:23:43<24:05:26, 10.68it/s]

{'loss': Array(0.15551908, dtype=float32), 'loss_cross_entropy': Array(0.14553973, dtype=float32)}


  7%|▋         | 74139/1000000 [3:23:44<28:47:00,  8.94it/s]

{'loss': Array(0.15117584, dtype=float32), 'loss_cross_entropy': Array(0.1424544, dtype=float32)}


  7%|▋         | 74149/1000000 [3:23:46<24:23:47, 10.54it/s]

{'loss': Array(0.15066448, dtype=float32), 'loss_cross_entropy': Array(0.14213876, dtype=float32)}


  7%|▋         | 74159/1000000 [3:23:48<33:37:57,  7.65it/s]

{'loss': Array(0.13831426, dtype=float32), 'loss_cross_entropy': Array(0.13007121, dtype=float32)}


  7%|▋         | 74169/1000000 [3:23:49<25:57:12,  9.91it/s]

{'loss': Array(0.1665481, dtype=float32), 'loss_cross_entropy': Array(0.15708712, dtype=float32)}


  7%|▋         | 74179/1000000 [3:23:50<24:04:04, 10.69it/s]

{'loss': Array(0.15393618, dtype=float32), 'loss_cross_entropy': Array(0.14495006, dtype=float32)}


  7%|▋         | 74189/1000000 [3:23:52<30:12:28,  8.51it/s]

{'loss': Array(0.15374969, dtype=float32), 'loss_cross_entropy': Array(0.14463866, dtype=float32)}


  7%|▋         | 74199/1000000 [3:23:53<24:50:07, 10.35it/s]

{'loss': Array(0.15741788, dtype=float32), 'loss_cross_entropy': Array(0.14781511, dtype=float32)}


  7%|▋         | 74209/1000000 [3:23:55<24:22:22, 10.55it/s]

{'loss': Array(0.14569317, dtype=float32), 'loss_cross_entropy': Array(0.13712652, dtype=float32)}


  7%|▋         | 74219/1000000 [3:23:56<27:44:58,  9.27it/s]

{'loss': Array(0.15603612, dtype=float32), 'loss_cross_entropy': Array(0.14702392, dtype=float32)}


  7%|▋         | 74229/1000000 [3:23:58<24:13:29, 10.62it/s]

{'loss': Array(0.15683968, dtype=float32), 'loss_cross_entropy': Array(0.14707644, dtype=float32)}


  7%|▋         | 74239/1000000 [3:23:59<30:51:47,  8.33it/s]

{'loss': Array(0.15462433, dtype=float32), 'loss_cross_entropy': Array(0.14549227, dtype=float32)}


  7%|▋         | 74249/1000000 [3:24:01<25:26:18, 10.11it/s]

{'loss': Array(0.14741395, dtype=float32), 'loss_cross_entropy': Array(0.13831596, dtype=float32)}


  7%|▋         | 74259/1000000 [3:24:03<38:18:26,  6.71it/s]

{'loss': Array(0.16328202, dtype=float32), 'loss_cross_entropy': Array(0.15357642, dtype=float32)}


  7%|▋         | 74269/1000000 [3:24:04<26:29:37,  9.71it/s]

{'loss': Array(0.14433543, dtype=float32), 'loss_cross_entropy': Array(0.13595606, dtype=float32)}


  7%|▋         | 74279/1000000 [3:24:06<39:54:08,  6.44it/s]

{'loss': Array(0.14031677, dtype=float32), 'loss_cross_entropy': Array(0.13187414, dtype=float32)}


  7%|▋         | 74289/1000000 [3:24:07<27:24:18,  9.38it/s]

{'loss': Array(0.17892455, dtype=float32), 'loss_cross_entropy': Array(0.16873239, dtype=float32)}


  7%|▋         | 74299/1000000 [3:24:09<26:09:16,  9.83it/s]

{'loss': Array(0.13834499, dtype=float32), 'loss_cross_entropy': Array(0.1299769, dtype=float32)}


  7%|▋         | 74309/1000000 [3:24:10<34:50:02,  7.38it/s]

{'loss': Array(0.17582247, dtype=float32), 'loss_cross_entropy': Array(0.16569993, dtype=float32)}


  7%|▋         | 74319/1000000 [3:24:12<25:12:46, 10.20it/s]

{'loss': Array(0.15784465, dtype=float32), 'loss_cross_entropy': Array(0.14870666, dtype=float32)}


  7%|▋         | 74329/1000000 [3:24:13<38:24:28,  6.69it/s]

{'loss': Array(0.15358934, dtype=float32), 'loss_cross_entropy': Array(0.14397098, dtype=float32)}


  7%|▋         | 74339/1000000 [3:24:15<26:29:12,  9.71it/s]

{'loss': Array(0.14216116, dtype=float32), 'loss_cross_entropy': Array(0.13313358, dtype=float32)}


  7%|▋         | 74349/1000000 [3:24:16<24:48:53, 10.36it/s]

{'loss': Array(0.14742249, dtype=float32), 'loss_cross_entropy': Array(0.13784683, dtype=float32)}


  7%|▋         | 74359/1000000 [3:24:18<27:34:55,  9.32it/s]

{'loss': Array(0.1553063, dtype=float32), 'loss_cross_entropy': Array(0.14654435, dtype=float32)}


  7%|▋         | 74369/1000000 [3:24:19<24:12:28, 10.62it/s]

{'loss': Array(0.15745862, dtype=float32), 'loss_cross_entropy': Array(0.14797127, dtype=float32)}


  7%|▋         | 74379/1000000 [3:24:21<27:06:11,  9.49it/s]

{'loss': Array(0.13738708, dtype=float32), 'loss_cross_entropy': Array(0.12813047, dtype=float32)}


  7%|▋         | 74389/1000000 [3:24:22<24:21:41, 10.55it/s]

{'loss': Array(0.15030275, dtype=float32), 'loss_cross_entropy': Array(0.14106016, dtype=float32)}


  7%|▋         | 74399/1000000 [3:24:24<37:20:25,  6.89it/s]

{'loss': Array(0.13472125, dtype=float32), 'loss_cross_entropy': Array(0.1265829, dtype=float32)}


  7%|▋         | 74409/1000000 [3:24:25<26:24:19,  9.74it/s]

{'loss': Array(0.16758247, dtype=float32), 'loss_cross_entropy': Array(0.15832964, dtype=float32)}


  7%|▋         | 74419/1000000 [3:24:27<23:54:20, 10.76it/s]

{'loss': Array(0.13909955, dtype=float32), 'loss_cross_entropy': Array(0.13068253, dtype=float32)}


  7%|▋         | 74429/1000000 [3:24:28<30:49:29,  8.34it/s]

{'loss': Array(0.15381412, dtype=float32), 'loss_cross_entropy': Array(0.14446475, dtype=float32)}


  7%|▋         | 74439/1000000 [3:24:30<25:06:22, 10.24it/s]

{'loss': Array(0.1419117, dtype=float32), 'loss_cross_entropy': Array(0.1329068, dtype=float32)}


  7%|▋         | 74449/1000000 [3:24:32<37:31:39,  6.85it/s]

{'loss': Array(0.14199363, dtype=float32), 'loss_cross_entropy': Array(0.13314097, dtype=float32)}


  7%|▋         | 74459/1000000 [3:24:33<25:44:43,  9.99it/s]

{'loss': Array(0.15262341, dtype=float32), 'loss_cross_entropy': Array(0.14357214, dtype=float32)}


  7%|▋         | 74469/1000000 [3:24:34<24:13:22, 10.61it/s]

{'loss': Array(0.13927004, dtype=float32), 'loss_cross_entropy': Array(0.13083513, dtype=float32)}


  7%|▋         | 74479/1000000 [3:24:36<28:58:09,  8.87it/s]

{'loss': Array(0.14083196, dtype=float32), 'loss_cross_entropy': Array(0.13216436, dtype=float32)}


  7%|▋         | 74489/1000000 [3:24:37<24:39:36, 10.43it/s]

{'loss': Array(0.13605773, dtype=float32), 'loss_cross_entropy': Array(0.12773226, dtype=float32)}


  7%|▋         | 74499/1000000 [3:24:39<34:00:04,  7.56it/s]

{'loss': Array(0.1898375, dtype=float32), 'loss_cross_entropy': Array(0.17919049, dtype=float32)}


  7%|▋         | 74509/1000000 [3:24:47<84:06:42,  3.06it/s] 

{'loss': Array(0.13730371, dtype=float32), 'loss_cross_entropy': Array(0.12864658, dtype=float32)}


  7%|▋         | 74519/1000000 [3:24:48<43:21:18,  5.93it/s]

{'loss': Array(0.14793704, dtype=float32), 'loss_cross_entropy': Array(0.13889723, dtype=float32)}


  7%|▋         | 74529/1000000 [3:24:50<26:44:38,  9.61it/s]

{'loss': Array(0.17444472, dtype=float32), 'loss_cross_entropy': Array(0.16488653, dtype=float32)}


  7%|▋         | 74539/1000000 [3:24:51<24:01:32, 10.70it/s]

{'loss': Array(0.1408819, dtype=float32), 'loss_cross_entropy': Array(0.13209641, dtype=float32)}


  7%|▋         | 74549/1000000 [3:24:53<30:26:11,  8.45it/s]

{'loss': Array(0.14881206, dtype=float32), 'loss_cross_entropy': Array(0.14004742, dtype=float32)}


  7%|▋         | 74559/1000000 [3:24:54<24:41:06, 10.41it/s]

{'loss': Array(0.15923212, dtype=float32), 'loss_cross_entropy': Array(0.14927754, dtype=float32)}


  7%|▋         | 74569/1000000 [3:24:55<24:02:12, 10.69it/s]

{'loss': Array(0.13518257, dtype=float32), 'loss_cross_entropy': Array(0.12672536, dtype=float32)}


  7%|▋         | 74579/1000000 [3:24:57<27:02:46,  9.50it/s]

{'loss': Array(0.14837253, dtype=float32), 'loss_cross_entropy': Array(0.13916108, dtype=float32)}


  7%|▋         | 74589/1000000 [3:24:59<24:57:29, 10.30it/s]

{'loss': Array(0.1476063, dtype=float32), 'loss_cross_entropy': Array(0.13892244, dtype=float32)}


  7%|▋         | 74599/1000000 [3:25:00<31:49:28,  8.08it/s]

{'loss': Array(0.1554737, dtype=float32), 'loss_cross_entropy': Array(0.14559223, dtype=float32)}


  7%|▋         | 74609/1000000 [3:25:02<26:41:32,  9.63it/s]

{'loss': Array(0.16002892, dtype=float32), 'loss_cross_entropy': Array(0.1505992, dtype=float32)}


  7%|▋         | 74619/1000000 [3:25:04<35:04:37,  7.33it/s]

{'loss': Array(0.15218624, dtype=float32), 'loss_cross_entropy': Array(0.14333205, dtype=float32)}


  7%|▋         | 74629/1000000 [3:25:05<26:26:42,  9.72it/s]

{'loss': Array(0.15297273, dtype=float32), 'loss_cross_entropy': Array(0.143624, dtype=float32)}


  7%|▋         | 74639/1000000 [3:25:06<24:57:19, 10.30it/s]

{'loss': Array(0.16760546, dtype=float32), 'loss_cross_entropy': Array(0.15764736, dtype=float32)}


  7%|▋         | 74649/1000000 [3:25:08<32:22:17,  7.94it/s]

{'loss': Array(0.16650948, dtype=float32), 'loss_cross_entropy': Array(0.1567239, dtype=float32)}


  7%|▋         | 74659/1000000 [3:25:10<26:06:14,  9.85it/s]

{'loss': Array(0.14151396, dtype=float32), 'loss_cross_entropy': Array(0.1323056, dtype=float32)}


  7%|▋         | 74669/1000000 [3:25:11<23:46:28, 10.81it/s]

{'loss': Array(0.14101307, dtype=float32), 'loss_cross_entropy': Array(0.1321337, dtype=float32)}


  7%|▋         | 74679/1000000 [3:25:13<26:54:55,  9.55it/s]

{'loss': Array(0.16887847, dtype=float32), 'loss_cross_entropy': Array(0.15823568, dtype=float32)}


  7%|▋         | 74689/1000000 [3:25:14<24:18:48, 10.57it/s]

{'loss': Array(0.14077924, dtype=float32), 'loss_cross_entropy': Array(0.1325819, dtype=float32)}


  7%|▋         | 74699/1000000 [3:25:16<31:02:36,  8.28it/s]

{'loss': Array(0.15341547, dtype=float32), 'loss_cross_entropy': Array(0.14415869, dtype=float32)}


  7%|▋         | 74709/1000000 [3:25:17<24:49:03, 10.36it/s]

{'loss': Array(0.16540672, dtype=float32), 'loss_cross_entropy': Array(0.15604804, dtype=float32)}


  7%|▋         | 74719/1000000 [3:25:19<37:20:48,  6.88it/s]

{'loss': Array(0.15545808, dtype=float32), 'loss_cross_entropy': Array(0.14636074, dtype=float32)}


  7%|▋         | 74729/1000000 [3:25:20<26:32:57,  9.68it/s]

{'loss': Array(0.14658588, dtype=float32), 'loss_cross_entropy': Array(0.13782434, dtype=float32)}


  7%|▋         | 74739/1000000 [3:25:22<24:40:56, 10.41it/s]

{'loss': Array(0.15649705, dtype=float32), 'loss_cross_entropy': Array(0.14758933, dtype=float32)}


  7%|▋         | 74749/1000000 [3:25:23<33:20:03,  7.71it/s]

{'loss': Array(0.1421828, dtype=float32), 'loss_cross_entropy': Array(0.13405614, dtype=float32)}


  7%|▋         | 74759/1000000 [3:25:25<25:20:14, 10.14it/s]

{'loss': Array(0.1568144, dtype=float32), 'loss_cross_entropy': Array(0.14780353, dtype=float32)}


  7%|▋         | 74769/1000000 [3:25:26<24:00:57, 10.70it/s]

{'loss': Array(0.14444624, dtype=float32), 'loss_cross_entropy': Array(0.1354421, dtype=float32)}


  7%|▋         | 74779/1000000 [3:25:28<29:06:57,  8.83it/s]

{'loss': Array(0.15067254, dtype=float32), 'loss_cross_entropy': Array(0.14151306, dtype=float32)}


  7%|▋         | 74789/1000000 [3:25:29<24:32:34, 10.47it/s]

{'loss': Array(0.17933056, dtype=float32), 'loss_cross_entropy': Array(0.16882059, dtype=float32)}


  7%|▋         | 74799/1000000 [3:25:31<30:18:50,  8.48it/s]

{'loss': Array(0.16738848, dtype=float32), 'loss_cross_entropy': Array(0.15834653, dtype=float32)}


  7%|▋         | 74809/1000000 [3:25:32<25:02:43, 10.26it/s]

{'loss': Array(0.1523746, dtype=float32), 'loss_cross_entropy': Array(0.14246595, dtype=float32)}


  7%|▋         | 74819/1000000 [3:25:33<24:06:02, 10.66it/s]

{'loss': Array(0.15957771, dtype=float32), 'loss_cross_entropy': Array(0.14979874, dtype=float32)}


  7%|▋         | 74829/1000000 [3:25:35<26:34:24,  9.67it/s]

{'loss': Array(0.1310105, dtype=float32), 'loss_cross_entropy': Array(0.12298838, dtype=float32)}


  7%|▋         | 74839/1000000 [3:25:36<23:55:56, 10.74it/s]

{'loss': Array(0.14938734, dtype=float32), 'loss_cross_entropy': Array(0.14065737, dtype=float32)}


  7%|▋         | 74849/1000000 [3:25:38<33:30:59,  7.67it/s]

{'loss': Array(0.15014027, dtype=float32), 'loss_cross_entropy': Array(0.14111255, dtype=float32)}


  7%|▋         | 74859/1000000 [3:25:40<25:51:30,  9.94it/s]

{'loss': Array(0.15601915, dtype=float32), 'loss_cross_entropy': Array(0.1469255, dtype=float32)}


  7%|▋         | 74869/1000000 [3:25:41<24:48:13, 10.36it/s]

{'loss': Array(0.1632152, dtype=float32), 'loss_cross_entropy': Array(0.15410726, dtype=float32)}


  7%|▋         | 74879/1000000 [3:25:43<27:24:14,  9.38it/s]

{'loss': Array(0.16382024, dtype=float32), 'loss_cross_entropy': Array(0.15398395, dtype=float32)}


  7%|▋         | 74889/1000000 [3:25:44<25:00:25, 10.28it/s]

{'loss': Array(0.1643071, dtype=float32), 'loss_cross_entropy': Array(0.15422541, dtype=float32)}


  7%|▋         | 74899/1000000 [3:25:46<29:06:44,  8.83it/s]

{'loss': Array(0.15593038, dtype=float32), 'loss_cross_entropy': Array(0.14647444, dtype=float32)}


  7%|▋         | 74909/1000000 [3:25:47<25:24:27, 10.11it/s]

{'loss': Array(0.15516874, dtype=float32), 'loss_cross_entropy': Array(0.14599173, dtype=float32)}


  7%|▋         | 74919/1000000 [3:25:49<37:59:11,  6.76it/s]

{'loss': Array(0.15527175, dtype=float32), 'loss_cross_entropy': Array(0.14632455, dtype=float32)}


  7%|▋         | 74929/1000000 [3:25:50<26:27:53,  9.71it/s]

{'loss': Array(0.14062062, dtype=float32), 'loss_cross_entropy': Array(0.13219343, dtype=float32)}


  7%|▋         | 74939/1000000 [3:25:52<24:39:22, 10.42it/s]

{'loss': Array(0.15011361, dtype=float32), 'loss_cross_entropy': Array(0.14146267, dtype=float32)}


  7%|▋         | 74949/1000000 [3:25:53<31:18:18,  8.21it/s]

{'loss': Array(0.15185605, dtype=float32), 'loss_cross_entropy': Array(0.14263016, dtype=float32)}


  7%|▋         | 74959/1000000 [3:25:55<24:45:03, 10.38it/s]

{'loss': Array(0.14866225, dtype=float32), 'loss_cross_entropy': Array(0.13924651, dtype=float32)}


  7%|▋         | 74969/1000000 [3:25:56<33:47:12,  7.61it/s]

{'loss': Array(0.15058406, dtype=float32), 'loss_cross_entropy': Array(0.14159049, dtype=float32)}


  7%|▋         | 74979/1000000 [3:25:58<25:57:09,  9.90it/s]

{'loss': Array(0.15681781, dtype=float32), 'loss_cross_entropy': Array(0.14751972, dtype=float32)}


  7%|▋         | 74989/1000000 [3:26:00<35:31:08,  7.23it/s]

{'loss': Array(0.14704262, dtype=float32), 'loss_cross_entropy': Array(0.13802896, dtype=float32)}


  7%|▋         | 74999/1000000 [3:26:01<26:21:46,  9.75it/s]

{'loss': Array(0.16697098, dtype=float32), 'loss_cross_entropy': Array(0.15721081, dtype=float32)}


  8%|▊         | 75009/1000000 [3:26:09<84:32:26,  3.04it/s] 

{'loss': Array(0.15643856, dtype=float32), 'loss_cross_entropy': Array(0.14740257, dtype=float32)}


  8%|▊         | 75019/1000000 [3:26:11<39:16:31,  6.54it/s]

{'loss': Array(0.14560285, dtype=float32), 'loss_cross_entropy': Array(0.13754092, dtype=float32)}


  8%|▊         | 75029/1000000 [3:26:12<25:59:21,  9.89it/s]

{'loss': Array(0.14840361, dtype=float32), 'loss_cross_entropy': Array(0.13947825, dtype=float32)}


  8%|▊         | 75039/1000000 [3:26:14<33:46:30,  7.61it/s]

{'loss': Array(0.1315362, dtype=float32), 'loss_cross_entropy': Array(0.12335368, dtype=float32)}


  8%|▊         | 75049/1000000 [3:26:15<26:11:01,  9.81it/s]

{'loss': Array(0.13427825, dtype=float32), 'loss_cross_entropy': Array(0.12538978, dtype=float32)}


  8%|▊         | 75059/1000000 [3:26:17<38:21:16,  6.70it/s]

{'loss': Array(0.16130121, dtype=float32), 'loss_cross_entropy': Array(0.15159096, dtype=float32)}


  8%|▊         | 75069/1000000 [3:26:18<26:14:59,  9.79it/s]

{'loss': Array(0.1647498, dtype=float32), 'loss_cross_entropy': Array(0.15494429, dtype=float32)}


  8%|▊         | 75079/1000000 [3:26:19<24:39:09, 10.42it/s]

{'loss': Array(0.14861058, dtype=float32), 'loss_cross_entropy': Array(0.13969117, dtype=float32)}


  8%|▊         | 75089/1000000 [3:26:21<31:18:43,  8.21it/s]

{'loss': Array(0.15662901, dtype=float32), 'loss_cross_entropy': Array(0.14743915, dtype=float32)}


  8%|▊         | 75099/1000000 [3:26:23<25:26:14, 10.10it/s]

{'loss': Array(0.13582811, dtype=float32), 'loss_cross_entropy': Array(0.12760249, dtype=float32)}


  8%|▊         | 75109/1000000 [3:26:24<34:04:39,  7.54it/s]

{'loss': Array(0.15114766, dtype=float32), 'loss_cross_entropy': Array(0.14124398, dtype=float32)}


  8%|▊         | 75119/1000000 [3:26:26<25:04:09, 10.25it/s]

{'loss': Array(0.13639812, dtype=float32), 'loss_cross_entropy': Array(0.1276729, dtype=float32)}


  8%|▊         | 75129/1000000 [3:26:27<23:25:04, 10.97it/s]

{'loss': Array(0.15473294, dtype=float32), 'loss_cross_entropy': Array(0.14525405, dtype=float32)}


  8%|▊         | 75139/1000000 [3:26:29<26:30:45,  9.69it/s]

{'loss': Array(0.1492171, dtype=float32), 'loss_cross_entropy': Array(0.14079203, dtype=float32)}


  8%|▊         | 75149/1000000 [3:26:30<23:56:35, 10.73it/s]

{'loss': Array(0.19086106, dtype=float32), 'loss_cross_entropy': Array(0.1802787, dtype=float32)}


  8%|▊         | 75159/1000000 [3:26:32<37:39:43,  6.82it/s]

{'loss': Array(0.16405736, dtype=float32), 'loss_cross_entropy': Array(0.15431096, dtype=float32)}


  8%|▊         | 75169/1000000 [3:26:33<26:22:08,  9.74it/s]

{'loss': Array(0.1479617, dtype=float32), 'loss_cross_entropy': Array(0.13914113, dtype=float32)}


  8%|▊         | 75179/1000000 [3:26:34<24:54:07, 10.32it/s]

{'loss': Array(0.15656434, dtype=float32), 'loss_cross_entropy': Array(0.14740188, dtype=float32)}


  8%|▊         | 75189/1000000 [3:26:36<30:40:12,  8.38it/s]

{'loss': Array(0.17498608, dtype=float32), 'loss_cross_entropy': Array(0.16502227, dtype=float32)}


  8%|▊         | 75199/1000000 [3:26:38<25:02:33, 10.26it/s]

{'loss': Array(0.13975136, dtype=float32), 'loss_cross_entropy': Array(0.13133, dtype=float32)}


  8%|▊         | 75209/1000000 [3:26:39<33:34:31,  7.65it/s]

{'loss': Array(0.15917902, dtype=float32), 'loss_cross_entropy': Array(0.14996478, dtype=float32)}


  8%|▊         | 75219/1000000 [3:26:41<25:31:32, 10.06it/s]

{'loss': Array(0.1572058, dtype=float32), 'loss_cross_entropy': Array(0.14767905, dtype=float32)}


  8%|▊         | 75229/1000000 [3:26:42<33:47:36,  7.60it/s]

{'loss': Array(0.1539925, dtype=float32), 'loss_cross_entropy': Array(0.14528507, dtype=float32)}


  8%|▊         | 75239/1000000 [3:26:44<25:24:16, 10.11it/s]

{'loss': Array(0.13385224, dtype=float32), 'loss_cross_entropy': Array(0.12598939, dtype=float32)}


  8%|▊         | 75249/1000000 [3:26:45<23:52:22, 10.76it/s]

{'loss': Array(0.14581951, dtype=float32), 'loss_cross_entropy': Array(0.1372305, dtype=float32)}


  8%|▊         | 75259/1000000 [3:26:47<29:00:47,  8.85it/s]

{'loss': Array(0.15141399, dtype=float32), 'loss_cross_entropy': Array(0.14253423, dtype=float32)}


  8%|▊         | 75269/1000000 [3:26:48<25:00:10, 10.27it/s]

{'loss': Array(0.15161324, dtype=float32), 'loss_cross_entropy': Array(0.14237903, dtype=float32)}


  8%|▊         | 75279/1000000 [3:26:50<30:39:02,  8.38it/s]

{'loss': Array(0.16496325, dtype=float32), 'loss_cross_entropy': Array(0.15537703, dtype=float32)}


  8%|▊         | 75289/1000000 [3:26:51<24:56:20, 10.30it/s]

{'loss': Array(0.15124285, dtype=float32), 'loss_cross_entropy': Array(0.14229701, dtype=float32)}


  8%|▊         | 75299/1000000 [3:26:53<38:32:54,  6.66it/s]

{'loss': Array(0.15546536, dtype=float32), 'loss_cross_entropy': Array(0.14587438, dtype=float32)}


  8%|▊         | 75309/1000000 [3:26:54<25:48:23,  9.95it/s]

{'loss': Array(0.14332363, dtype=float32), 'loss_cross_entropy': Array(0.13454956, dtype=float32)}


  8%|▊         | 75319/1000000 [3:26:56<37:54:26,  6.78it/s]

{'loss': Array(0.15666853, dtype=float32), 'loss_cross_entropy': Array(0.14818875, dtype=float32)}


  8%|▊         | 75329/1000000 [3:26:57<25:57:19,  9.90it/s]

{'loss': Array(0.15276404, dtype=float32), 'loss_cross_entropy': Array(0.14365919, dtype=float32)}


  8%|▊         | 75339/1000000 [3:26:59<24:57:26, 10.29it/s]

{'loss': Array(0.15604897, dtype=float32), 'loss_cross_entropy': Array(0.14711529, dtype=float32)}


  8%|▊         | 75349/1000000 [3:27:01<34:53:30,  7.36it/s]

{'loss': Array(0.13793173, dtype=float32), 'loss_cross_entropy': Array(0.12963825, dtype=float32)}


  8%|▊         | 75359/1000000 [3:27:02<25:58:18,  9.89it/s]

{'loss': Array(0.13068885, dtype=float32), 'loss_cross_entropy': Array(0.12289822, dtype=float32)}


  8%|▊         | 75369/1000000 [3:27:04<38:59:59,  6.59it/s]

{'loss': Array(0.16212289, dtype=float32), 'loss_cross_entropy': Array(0.15194319, dtype=float32)}


  8%|▊         | 75379/1000000 [3:27:05<27:08:30,  9.46it/s]

{'loss': Array(0.15708774, dtype=float32), 'loss_cross_entropy': Array(0.14854278, dtype=float32)}


  8%|▊         | 75389/1000000 [3:27:07<25:41:46, 10.00it/s]

{'loss': Array(0.15923814, dtype=float32), 'loss_cross_entropy': Array(0.14947958, dtype=float32)}


  8%|▊         | 75399/1000000 [3:27:08<28:40:44,  8.96it/s]

{'loss': Array(0.1596441, dtype=float32), 'loss_cross_entropy': Array(0.14996481, dtype=float32)}


  8%|▊         | 75409/1000000 [3:27:10<25:02:39, 10.26it/s]

{'loss': Array(0.15279935, dtype=float32), 'loss_cross_entropy': Array(0.14409061, dtype=float32)}


  8%|▊         | 75419/1000000 [3:27:11<27:49:44,  9.23it/s]

{'loss': Array(0.17183757, dtype=float32), 'loss_cross_entropy': Array(0.16199905, dtype=float32)}


  8%|▊         | 75429/1000000 [3:27:13<24:31:11, 10.47it/s]

{'loss': Array(0.13400836, dtype=float32), 'loss_cross_entropy': Array(0.12581001, dtype=float32)}


  8%|▊         | 75439/1000000 [3:27:15<37:43:18,  6.81it/s]

{'loss': Array(0.17898974, dtype=float32), 'loss_cross_entropy': Array(0.1685484, dtype=float32)}


  8%|▊         | 75449/1000000 [3:27:16<25:45:59,  9.97it/s]

{'loss': Array(0.14545798, dtype=float32), 'loss_cross_entropy': Array(0.13640833, dtype=float32)}


  8%|▊         | 75459/1000000 [3:27:17<24:17:52, 10.57it/s]

{'loss': Array(0.1553285, dtype=float32), 'loss_cross_entropy': Array(0.14601068, dtype=float32)}


  8%|▊         | 75469/1000000 [3:27:19<30:19:47,  8.47it/s]

{'loss': Array(0.15099381, dtype=float32), 'loss_cross_entropy': Array(0.1410322, dtype=float32)}


  8%|▊         | 75479/1000000 [3:27:20<26:02:34,  9.86it/s]

{'loss': Array(0.16136669, dtype=float32), 'loss_cross_entropy': Array(0.15236716, dtype=float32)}


  8%|▊         | 75489/1000000 [3:27:22<37:28:06,  6.85it/s]

{'loss': Array(0.1531589, dtype=float32), 'loss_cross_entropy': Array(0.14425676, dtype=float32)}


  8%|▊         | 75499/1000000 [3:27:23<26:25:56,  9.72it/s]

{'loss': Array(0.14971992, dtype=float32), 'loss_cross_entropy': Array(0.1406495, dtype=float32)}


  8%|▊         | 75509/1000000 [3:27:31<85:56:43,  2.99it/s] 

{'loss': Array(0.14444017, dtype=float32), 'loss_cross_entropy': Array(0.13551094, dtype=float32)}


  8%|▊         | 75519/1000000 [3:27:33<39:20:23,  6.53it/s]

{'loss': Array(0.1602531, dtype=float32), 'loss_cross_entropy': Array(0.15074386, dtype=float32)}


  8%|▊         | 75529/1000000 [3:27:35<26:57:22,  9.53it/s]

{'loss': Array(0.16574447, dtype=float32), 'loss_cross_entropy': Array(0.15602106, dtype=float32)}


  8%|▊         | 75539/1000000 [3:27:36<31:08:28,  8.25it/s]

{'loss': Array(0.16754018, dtype=float32), 'loss_cross_entropy': Array(0.15783712, dtype=float32)}


  8%|▊         | 75549/1000000 [3:27:38<25:33:38, 10.05it/s]

{'loss': Array(0.1627811, dtype=float32), 'loss_cross_entropy': Array(0.15241475, dtype=float32)}


  8%|▊         | 75559/1000000 [3:27:39<38:00:49,  6.76it/s]

{'loss': Array(0.15908176, dtype=float32), 'loss_cross_entropy': Array(0.14983277, dtype=float32)}


  8%|▊         | 75569/1000000 [3:27:41<26:11:47,  9.80it/s]

{'loss': Array(0.15967955, dtype=float32), 'loss_cross_entropy': Array(0.15003954, dtype=float32)}


  8%|▊         | 75579/1000000 [3:27:42<24:00:17, 10.70it/s]

{'loss': Array(0.15650856, dtype=float32), 'loss_cross_entropy': Array(0.14728226, dtype=float32)}


  8%|▊         | 75589/1000000 [3:27:44<27:31:38,  9.33it/s]

{'loss': Array(0.1671075, dtype=float32), 'loss_cross_entropy': Array(0.15712552, dtype=float32)}


  8%|▊         | 75599/1000000 [3:27:45<24:34:20, 10.45it/s]

{'loss': Array(0.16005382, dtype=float32), 'loss_cross_entropy': Array(0.15116231, dtype=float32)}


  8%|▊         | 75609/1000000 [3:27:47<38:01:16,  6.75it/s]

{'loss': Array(0.13380907, dtype=float32), 'loss_cross_entropy': Array(0.12601659, dtype=float32)}


  8%|▊         | 75619/1000000 [3:27:48<26:04:03,  9.85it/s]

{'loss': Array(0.1609441, dtype=float32), 'loss_cross_entropy': Array(0.15118533, dtype=float32)}


  8%|▊         | 75629/1000000 [3:27:50<25:06:22, 10.23it/s]

{'loss': Array(0.1462426, dtype=float32), 'loss_cross_entropy': Array(0.1378647, dtype=float32)}


  8%|▊         | 75639/1000000 [3:27:51<28:22:03,  9.05it/s]

{'loss': Array(0.13216253, dtype=float32), 'loss_cross_entropy': Array(0.1245473, dtype=float32)}


  8%|▊         | 75649/1000000 [3:27:53<25:02:44, 10.25it/s]

{'loss': Array(0.15055493, dtype=float32), 'loss_cross_entropy': Array(0.14123052, dtype=float32)}


  8%|▊         | 75659/1000000 [3:27:54<33:52:24,  7.58it/s]

{'loss': Array(0.15757227, dtype=float32), 'loss_cross_entropy': Array(0.14878257, dtype=float32)}


  8%|▊         | 75669/1000000 [3:27:56<25:25:03, 10.10it/s]

{'loss': Array(0.13565671, dtype=float32), 'loss_cross_entropy': Array(0.12679933, dtype=float32)}


  8%|▊         | 75679/1000000 [3:27:58<38:01:24,  6.75it/s]

{'loss': Array(0.1554877, dtype=float32), 'loss_cross_entropy': Array(0.14661522, dtype=float32)}


  8%|▊         | 75689/1000000 [3:27:59<28:14:58,  9.09it/s]

{'loss': Array(0.14880273, dtype=float32), 'loss_cross_entropy': Array(0.13956434, dtype=float32)}


  8%|▊         | 75699/1000000 [3:28:00<26:16:51,  9.77it/s]

{'loss': Array(0.17774777, dtype=float32), 'loss_cross_entropy': Array(0.16755368, dtype=float32)}


  8%|▊         | 75709/1000000 [3:28:02<31:39:10,  8.11it/s]

{'loss': Array(0.16382344, dtype=float32), 'loss_cross_entropy': Array(0.15401337, dtype=float32)}


  8%|▊         | 75719/1000000 [3:28:04<25:56:12,  9.90it/s]

{'loss': Array(0.15614262, dtype=float32), 'loss_cross_entropy': Array(0.14672369, dtype=float32)}


  8%|▊         | 75729/1000000 [3:28:05<37:26:58,  6.86it/s]

{'loss': Array(0.15117764, dtype=float32), 'loss_cross_entropy': Array(0.14153014, dtype=float32)}


  8%|▊         | 75739/1000000 [3:28:07<27:02:30,  9.49it/s]

{'loss': Array(0.17628363, dtype=float32), 'loss_cross_entropy': Array(0.16601488, dtype=float32)}


  8%|▊         | 75749/1000000 [3:28:08<25:35:07, 10.03it/s]

{'loss': Array(0.13821448, dtype=float32), 'loss_cross_entropy': Array(0.13015525, dtype=float32)}


  8%|▊         | 75759/1000000 [3:28:10<27:34:47,  9.31it/s]

{'loss': Array(0.14674078, dtype=float32), 'loss_cross_entropy': Array(0.13798952, dtype=float32)}


  8%|▊         | 75769/1000000 [3:28:11<25:05:55, 10.23it/s]

{'loss': Array(0.16988467, dtype=float32), 'loss_cross_entropy': Array(0.15927489, dtype=float32)}


  8%|▊         | 75779/1000000 [3:28:13<37:58:41,  6.76it/s]

{'loss': Array(0.1286689, dtype=float32), 'loss_cross_entropy': Array(0.12053796, dtype=float32)}


  8%|▊         | 75789/1000000 [3:28:15<26:29:42,  9.69it/s]

{'loss': Array(0.15244596, dtype=float32), 'loss_cross_entropy': Array(0.14299987, dtype=float32)}


  8%|▊         | 75799/1000000 [3:28:16<24:14:57, 10.59it/s]

{'loss': Array(0.14916015, dtype=float32), 'loss_cross_entropy': Array(0.14072418, dtype=float32)}


  8%|▊         | 75809/1000000 [3:28:18<31:16:09,  8.21it/s]

{'loss': Array(0.13813584, dtype=float32), 'loss_cross_entropy': Array(0.13009517, dtype=float32)}


  8%|▊         | 75819/1000000 [3:28:19<25:01:55, 10.26it/s]

{'loss': Array(0.14966275, dtype=float32), 'loss_cross_entropy': Array(0.14090645, dtype=float32)}


  8%|▊         | 75829/1000000 [3:28:21<33:20:24,  7.70it/s]

{'loss': Array(0.14227502, dtype=float32), 'loss_cross_entropy': Array(0.13335626, dtype=float32)}


  8%|▊         | 75839/1000000 [3:28:22<25:18:30, 10.14it/s]

{'loss': Array(0.14072932, dtype=float32), 'loss_cross_entropy': Array(0.13235985, dtype=float32)}


  8%|▊         | 75849/1000000 [3:28:24<33:31:37,  7.66it/s]

{'loss': Array(0.14521845, dtype=float32), 'loss_cross_entropy': Array(0.13618267, dtype=float32)}


  8%|▊         | 75859/1000000 [3:28:25<25:11:52, 10.19it/s]

{'loss': Array(0.14117204, dtype=float32), 'loss_cross_entropy': Array(0.13197981, dtype=float32)}


  8%|▊         | 75869/1000000 [3:28:26<23:55:08, 10.73it/s]

{'loss': Array(0.13666013, dtype=float32), 'loss_cross_entropy': Array(0.12814014, dtype=float32)}


  8%|▊         | 75879/1000000 [3:28:28<28:13:40,  9.09it/s]

{'loss': Array(0.15440653, dtype=float32), 'loss_cross_entropy': Array(0.14527832, dtype=float32)}


  8%|▊         | 75889/1000000 [3:28:29<24:27:11, 10.50it/s]

{'loss': Array(0.15926473, dtype=float32), 'loss_cross_entropy': Array(0.15038757, dtype=float32)}


  8%|▊         | 75899/1000000 [3:28:31<30:27:08,  8.43it/s]

{'loss': Array(0.16553481, dtype=float32), 'loss_cross_entropy': Array(0.15591459, dtype=float32)}


  8%|▊         | 75909/1000000 [3:28:33<24:57:39, 10.28it/s]

{'loss': Array(0.18291555, dtype=float32), 'loss_cross_entropy': Array(0.17218274, dtype=float32)}


  8%|▊         | 75919/1000000 [3:28:34<37:53:21,  6.77it/s]

{'loss': Array(0.15348257, dtype=float32), 'loss_cross_entropy': Array(0.1450276, dtype=float32)}


  8%|▊         | 75929/1000000 [3:28:36<26:50:24,  9.56it/s]

{'loss': Array(0.1624495, dtype=float32), 'loss_cross_entropy': Array(0.1527199, dtype=float32)}


  8%|▊         | 75939/1000000 [3:28:37<37:45:30,  6.80it/s]

{'loss': Array(0.14778166, dtype=float32), 'loss_cross_entropy': Array(0.13825901, dtype=float32)}


  8%|▊         | 75949/1000000 [3:28:39<26:18:53,  9.75it/s]

{'loss': Array(0.15066274, dtype=float32), 'loss_cross_entropy': Array(0.14175908, dtype=float32)}


  8%|▊         | 75959/1000000 [3:28:40<24:05:58, 10.65it/s]

{'loss': Array(0.1433941, dtype=float32), 'loss_cross_entropy': Array(0.13418813, dtype=float32)}


  8%|▊         | 75969/1000000 [3:28:42<33:33:26,  7.65it/s]

{'loss': Array(0.16640688, dtype=float32), 'loss_cross_entropy': Array(0.15677178, dtype=float32)}


  8%|▊         | 75979/1000000 [3:28:43<25:25:02, 10.10it/s]

{'loss': Array(0.15950108, dtype=float32), 'loss_cross_entropy': Array(0.1499824, dtype=float32)}


  8%|▊         | 75989/1000000 [3:28:45<37:55:34,  6.77it/s]

{'loss': Array(0.14732522, dtype=float32), 'loss_cross_entropy': Array(0.1384944, dtype=float32)}


  8%|▊         | 75999/1000000 [3:28:46<25:51:08,  9.93it/s]

{'loss': Array(0.15104832, dtype=float32), 'loss_cross_entropy': Array(0.14254896, dtype=float32)}


  8%|▊         | 76009/1000000 [3:28:54<85:07:28,  3.02it/s] 

{'loss': Array(0.14804476, dtype=float32), 'loss_cross_entropy': Array(0.1392631, dtype=float32)}


  8%|▊         | 76019/1000000 [3:28:56<37:00:09,  6.94it/s] 

{'loss': Array(0.1271905, dtype=float32), 'loss_cross_entropy': Array(0.11974937, dtype=float32)}


  8%|▊         | 76029/1000000 [3:28:57<26:15:25,  9.77it/s]

{'loss': Array(0.14974383, dtype=float32), 'loss_cross_entropy': Array(0.14063983, dtype=float32)}


  8%|▊         | 76039/1000000 [3:28:59<28:31:56,  9.00it/s]

{'loss': Array(0.14283308, dtype=float32), 'loss_cross_entropy': Array(0.13432972, dtype=float32)}


  8%|▊         | 76049/1000000 [3:29:00<25:33:32, 10.04it/s]

{'loss': Array(0.14863454, dtype=float32), 'loss_cross_entropy': Array(0.13960914, dtype=float32)}


  8%|▊         | 76059/1000000 [3:29:02<39:05:23,  6.57it/s]

{'loss': Array(0.14344887, dtype=float32), 'loss_cross_entropy': Array(0.13517682, dtype=float32)}


  8%|▊         | 76069/1000000 [3:29:04<26:41:00,  9.62it/s]

{'loss': Array(0.15323994, dtype=float32), 'loss_cross_entropy': Array(0.14468707, dtype=float32)}


  8%|▊         | 76079/1000000 [3:29:05<24:47:08, 10.35it/s]

{'loss': Array(0.15387288, dtype=float32), 'loss_cross_entropy': Array(0.14503013, dtype=float32)}


  8%|▊         | 76089/1000000 [3:29:07<31:37:53,  8.11it/s]

{'loss': Array(0.13867827, dtype=float32), 'loss_cross_entropy': Array(0.13028947, dtype=float32)}


  8%|▊         | 76099/1000000 [3:29:08<25:30:14, 10.06it/s]

{'loss': Array(0.18333481, dtype=float32), 'loss_cross_entropy': Array(0.17321639, dtype=float32)}


  8%|▊         | 76109/1000000 [3:29:10<38:02:52,  6.75it/s]

{'loss': Array(0.15367958, dtype=float32), 'loss_cross_entropy': Array(0.14436205, dtype=float32)}


  8%|▊         | 76119/1000000 [3:29:11<26:06:54,  9.83it/s]

{'loss': Array(0.14766686, dtype=float32), 'loss_cross_entropy': Array(0.1391302, dtype=float32)}


  8%|▊         | 76129/1000000 [3:29:13<24:37:44, 10.42it/s]

{'loss': Array(0.14572923, dtype=float32), 'loss_cross_entropy': Array(0.13661598, dtype=float32)}


  8%|▊         | 76139/1000000 [3:29:14<28:51:09,  8.89it/s]

{'loss': Array(0.16105501, dtype=float32), 'loss_cross_entropy': Array(0.15197307, dtype=float32)}


  8%|▊         | 76149/1000000 [3:29:16<25:03:35, 10.24it/s]

{'loss': Array(0.15322421, dtype=float32), 'loss_cross_entropy': Array(0.14431976, dtype=float32)}


  8%|▊         | 76159/1000000 [3:29:17<34:14:46,  7.49it/s]

{'loss': Array(0.1512272, dtype=float32), 'loss_cross_entropy': Array(0.14251141, dtype=float32)}


  8%|▊         | 76169/1000000 [3:29:19<26:01:45,  9.86it/s]

{'loss': Array(0.14336795, dtype=float32), 'loss_cross_entropy': Array(0.13478296, dtype=float32)}


  8%|▊         | 76179/1000000 [3:29:20<24:49:23, 10.34it/s]

{'loss': Array(0.16166703, dtype=float32), 'loss_cross_entropy': Array(0.15260065, dtype=float32)}


  8%|▊         | 76189/1000000 [3:29:22<30:07:03,  8.52it/s]

{'loss': Array(0.14490366, dtype=float32), 'loss_cross_entropy': Array(0.13653247, dtype=float32)}


  8%|▊         | 76199/1000000 [3:29:23<24:46:13, 10.36it/s]

{'loss': Array(0.15713541, dtype=float32), 'loss_cross_entropy': Array(0.14806311, dtype=float32)}


  8%|▊         | 76209/1000000 [3:29:25<23:51:48, 10.75it/s]

{'loss': Array(0.16825475, dtype=float32), 'loss_cross_entropy': Array(0.15826236, dtype=float32)}


  8%|▊         | 76219/1000000 [3:29:26<27:34:11,  9.31it/s]

{'loss': Array(0.16308789, dtype=float32), 'loss_cross_entropy': Array(0.153922, dtype=float32)}


  8%|▊         | 76229/1000000 [3:29:28<25:17:08, 10.15it/s]

{'loss': Array(0.16265564, dtype=float32), 'loss_cross_entropy': Array(0.15323424, dtype=float32)}


  8%|▊         | 76239/1000000 [3:29:30<31:31:59,  8.14it/s]

{'loss': Array(0.1430987, dtype=float32), 'loss_cross_entropy': Array(0.1349053, dtype=float32)}


  8%|▊         | 76249/1000000 [3:29:31<25:02:36, 10.25it/s]

{'loss': Array(0.15220349, dtype=float32), 'loss_cross_entropy': Array(0.14305845, dtype=float32)}


  8%|▊         | 76259/1000000 [3:29:33<37:49:32,  6.78it/s]

{'loss': Array(0.15987815, dtype=float32), 'loss_cross_entropy': Array(0.15051107, dtype=float32)}


  8%|▊         | 76269/1000000 [3:29:34<26:57:00,  9.52it/s]

{'loss': Array(0.14780007, dtype=float32), 'loss_cross_entropy': Array(0.13868445, dtype=float32)}


  8%|▊         | 76279/1000000 [3:29:36<38:39:44,  6.64it/s]

{'loss': Array(0.14279775, dtype=float32), 'loss_cross_entropy': Array(0.13421802, dtype=float32)}


  8%|▊         | 76289/1000000 [3:29:37<26:15:58,  9.77it/s]

{'loss': Array(0.16314915, dtype=float32), 'loss_cross_entropy': Array(0.15325528, dtype=float32)}


  8%|▊         | 76299/1000000 [3:29:38<24:02:34, 10.67it/s]

{'loss': Array(0.14412522, dtype=float32), 'loss_cross_entropy': Array(0.13588184, dtype=float32)}


  8%|▊         | 76309/1000000 [3:29:40<32:56:44,  7.79it/s]

{'loss': Array(0.13977806, dtype=float32), 'loss_cross_entropy': Array(0.13091795, dtype=float32)}


  8%|▊         | 76319/1000000 [3:29:41<25:07:32, 10.21it/s]

{'loss': Array(0.16162013, dtype=float32), 'loss_cross_entropy': Array(0.15184407, dtype=float32)}


  8%|▊         | 76329/1000000 [3:29:43<37:40:28,  6.81it/s]

{'loss': Array(0.14327443, dtype=float32), 'loss_cross_entropy': Array(0.13423051, dtype=float32)}


  8%|▊         | 76339/1000000 [3:29:45<25:56:59,  9.89it/s]

{'loss': Array(0.17480175, dtype=float32), 'loss_cross_entropy': Array(0.16458997, dtype=float32)}


  8%|▊         | 76349/1000000 [3:29:46<24:09:04, 10.62it/s]

{'loss': Array(0.15584652, dtype=float32), 'loss_cross_entropy': Array(0.14644645, dtype=float32)}


  8%|▊         | 76359/1000000 [3:29:48<26:39:47,  9.62it/s]

{'loss': Array(0.14727613, dtype=float32), 'loss_cross_entropy': Array(0.13849716, dtype=float32)}


  8%|▊         | 76369/1000000 [3:29:49<23:51:11, 10.76it/s]

{'loss': Array(0.15339725, dtype=float32), 'loss_cross_entropy': Array(0.14442135, dtype=float32)}


  8%|▊         | 76379/1000000 [3:29:51<27:34:58,  9.30it/s]

{'loss': Array(0.1358674, dtype=float32), 'loss_cross_entropy': Array(0.12745146, dtype=float32)}


  8%|▊         | 76389/1000000 [3:29:52<23:59:36, 10.69it/s]

{'loss': Array(0.14705443, dtype=float32), 'loss_cross_entropy': Array(0.1379803, dtype=float32)}


  8%|▊         | 76399/1000000 [3:29:54<37:05:57,  6.92it/s]

{'loss': Array(0.13272539, dtype=float32), 'loss_cross_entropy': Array(0.12397792, dtype=float32)}


  8%|▊         | 76409/1000000 [3:29:55<25:50:31,  9.93it/s]

{'loss': Array(0.17556542, dtype=float32), 'loss_cross_entropy': Array(0.16543558, dtype=float32)}


  8%|▊         | 76419/1000000 [3:29:56<24:07:50, 10.63it/s]

{'loss': Array(0.16742174, dtype=float32), 'loss_cross_entropy': Array(0.15804915, dtype=float32)}


  8%|▊         | 76429/1000000 [3:29:58<31:01:56,  8.27it/s]

{'loss': Array(0.1519324, dtype=float32), 'loss_cross_entropy': Array(0.14319584, dtype=float32)}


  8%|▊         | 76439/1000000 [3:29:59<25:58:13,  9.88it/s]

{'loss': Array(0.15496922, dtype=float32), 'loss_cross_entropy': Array(0.14468537, dtype=float32)}


  8%|▊         | 76449/1000000 [3:30:01<39:50:44,  6.44it/s]

{'loss': Array(0.14425781, dtype=float32), 'loss_cross_entropy': Array(0.13577501, dtype=float32)}


  8%|▊         | 76459/1000000 [3:30:03<27:41:16,  9.27it/s]

{'loss': Array(0.1412675, dtype=float32), 'loss_cross_entropy': Array(0.13325001, dtype=float32)}


  8%|▊         | 76469/1000000 [3:30:04<24:56:24, 10.29it/s]

{'loss': Array(0.17590046, dtype=float32), 'loss_cross_entropy': Array(0.16524222, dtype=float32)}


  8%|▊         | 76479/1000000 [3:30:06<29:56:45,  8.57it/s]

{'loss': Array(0.15127419, dtype=float32), 'loss_cross_entropy': Array(0.14192879, dtype=float32)}


  8%|▊         | 76489/1000000 [3:30:07<26:13:44,  9.78it/s]

{'loss': Array(0.12893437, dtype=float32), 'loss_cross_entropy': Array(0.12160207, dtype=float32)}


  8%|▊         | 76499/1000000 [3:30:09<34:55:18,  7.35it/s]

{'loss': Array(0.15810883, dtype=float32), 'loss_cross_entropy': Array(0.14882815, dtype=float32)}


  8%|▊         | 76509/1000000 [3:30:17<85:02:40,  3.02it/s] 

{'loss': Array(0.13835089, dtype=float32), 'loss_cross_entropy': Array(0.12979715, dtype=float32)}


  8%|▊         | 76519/1000000 [3:30:19<43:27:52,  5.90it/s]

{'loss': Array(0.16170014, dtype=float32), 'loss_cross_entropy': Array(0.15216881, dtype=float32)}


  8%|▊         | 76529/1000000 [3:30:20<26:46:13,  9.58it/s]

{'loss': Array(0.1444148, dtype=float32), 'loss_cross_entropy': Array(0.13542823, dtype=float32)}


  8%|▊         | 76539/1000000 [3:30:21<24:27:24, 10.49it/s]

{'loss': Array(0.15127444, dtype=float32), 'loss_cross_entropy': Array(0.14230885, dtype=float32)}


  8%|▊         | 76549/1000000 [3:30:23<30:48:36,  8.33it/s]

{'loss': Array(0.1482323, dtype=float32), 'loss_cross_entropy': Array(0.13940515, dtype=float32)}


  8%|▊         | 76559/1000000 [3:30:24<25:04:37, 10.23it/s]

{'loss': Array(0.13641497, dtype=float32), 'loss_cross_entropy': Array(0.1281817, dtype=float32)}


  8%|▊         | 76569/1000000 [3:30:26<23:59:14, 10.69it/s]

{'loss': Array(0.16221982, dtype=float32), 'loss_cross_entropy': Array(0.15315713, dtype=float32)}


  8%|▊         | 76579/1000000 [3:30:27<27:35:23,  9.30it/s]

{'loss': Array(0.13797772, dtype=float32), 'loss_cross_entropy': Array(0.12938012, dtype=float32)}


  8%|▊         | 76589/1000000 [3:30:29<24:19:47, 10.54it/s]

{'loss': Array(0.14199224, dtype=float32), 'loss_cross_entropy': Array(0.13349177, dtype=float32)}


  8%|▊         | 76599/1000000 [3:30:30<30:30:57,  8.41it/s]

{'loss': Array(0.14195777, dtype=float32), 'loss_cross_entropy': Array(0.13367325, dtype=float32)}


  8%|▊         | 76609/1000000 [3:30:32<24:40:48, 10.39it/s]

{'loss': Array(0.15218064, dtype=float32), 'loss_cross_entropy': Array(0.1436437, dtype=float32)}


  8%|▊         | 76619/1000000 [3:30:34<34:33:02,  7.42it/s]

{'loss': Array(0.1647927, dtype=float32), 'loss_cross_entropy': Array(0.15562597, dtype=float32)}


  8%|▊         | 76629/1000000 [3:30:35<25:30:20, 10.06it/s]

{'loss': Array(0.15723158, dtype=float32), 'loss_cross_entropy': Array(0.14778928, dtype=float32)}


  8%|▊         | 76639/1000000 [3:30:36<24:05:13, 10.65it/s]

{'loss': Array(0.15772942, dtype=float32), 'loss_cross_entropy': Array(0.14824526, dtype=float32)}


  8%|▊         | 76649/1000000 [3:30:38<33:11:12,  7.73it/s]

{'loss': Array(0.1569735, dtype=float32), 'loss_cross_entropy': Array(0.14723869, dtype=float32)}


  8%|▊         | 76659/1000000 [3:30:39<25:05:15, 10.22it/s]

{'loss': Array(0.15610123, dtype=float32), 'loss_cross_entropy': Array(0.14699833, dtype=float32)}


  8%|▊         | 76669/1000000 [3:30:41<23:46:22, 10.79it/s]

{'loss': Array(0.14732943, dtype=float32), 'loss_cross_entropy': Array(0.13836807, dtype=float32)}


  8%|▊         | 76679/1000000 [3:30:42<27:17:41,  9.40it/s]

{'loss': Array(0.1471462, dtype=float32), 'loss_cross_entropy': Array(0.13760856, dtype=float32)}


  8%|▊         | 76689/1000000 [3:30:44<24:10:45, 10.61it/s]

{'loss': Array(0.15921144, dtype=float32), 'loss_cross_entropy': Array(0.14993161, dtype=float32)}


  8%|▊         | 76699/1000000 [3:30:45<30:55:09,  8.29it/s]

{'loss': Array(0.17922185, dtype=float32), 'loss_cross_entropy': Array(0.1695331, dtype=float32)}


  8%|▊         | 76709/1000000 [3:30:47<25:04:49, 10.23it/s]

{'loss': Array(0.14299886, dtype=float32), 'loss_cross_entropy': Array(0.13475351, dtype=float32)}


  8%|▊         | 76719/1000000 [3:30:49<38:17:39,  6.70it/s]

{'loss': Array(0.1499851, dtype=float32), 'loss_cross_entropy': Array(0.1411764, dtype=float32)}


  8%|▊         | 76729/1000000 [3:30:50<26:00:50,  9.86it/s]

{'loss': Array(0.17104042, dtype=float32), 'loss_cross_entropy': Array(0.16098343, dtype=float32)}


  8%|▊         | 76739/1000000 [3:30:51<24:16:27, 10.57it/s]

{'loss': Array(0.14259918, dtype=float32), 'loss_cross_entropy': Array(0.13370393, dtype=float32)}


  8%|▊         | 76749/1000000 [3:30:53<33:22:05,  7.69it/s]

{'loss': Array(0.14728776, dtype=float32), 'loss_cross_entropy': Array(0.13813125, dtype=float32)}


  8%|▊         | 76759/1000000 [3:30:54<25:24:01, 10.10it/s]

{'loss': Array(0.147025, dtype=float32), 'loss_cross_entropy': Array(0.1384264, dtype=float32)}


  8%|▊         | 76769/1000000 [3:30:56<23:44:05, 10.80it/s]

{'loss': Array(0.15253656, dtype=float32), 'loss_cross_entropy': Array(0.1433644, dtype=float32)}


  8%|▊         | 76779/1000000 [3:30:57<28:17:51,  9.06it/s]

{'loss': Array(0.15930183, dtype=float32), 'loss_cross_entropy': Array(0.14980195, dtype=float32)}


  8%|▊         | 76789/1000000 [3:30:59<25:05:14, 10.22it/s]

{'loss': Array(0.15414584, dtype=float32), 'loss_cross_entropy': Array(0.1451566, dtype=float32)}


  8%|▊         | 76799/1000000 [3:31:01<31:51:42,  8.05it/s]

{'loss': Array(0.15760645, dtype=float32), 'loss_cross_entropy': Array(0.14882615, dtype=float32)}


  8%|▊         | 76809/1000000 [3:31:02<25:57:26,  9.88it/s]

{'loss': Array(0.15540963, dtype=float32), 'loss_cross_entropy': Array(0.14623341, dtype=float32)}


  8%|▊         | 76819/1000000 [3:31:03<25:09:47, 10.19it/s]

{'loss': Array(0.15312794, dtype=float32), 'loss_cross_entropy': Array(0.14426224, dtype=float32)}


  8%|▊         | 76829/1000000 [3:31:05<28:08:57,  9.11it/s]

{'loss': Array(0.13760202, dtype=float32), 'loss_cross_entropy': Array(0.12816639, dtype=float32)}


  8%|▊         | 76839/1000000 [3:31:07<25:35:03, 10.02it/s]

{'loss': Array(0.15447049, dtype=float32), 'loss_cross_entropy': Array(0.14491089, dtype=float32)}


  8%|▊         | 76849/1000000 [3:31:08<34:57:07,  7.34it/s]

{'loss': Array(0.13628611, dtype=float32), 'loss_cross_entropy': Array(0.12852298, dtype=float32)}


  8%|▊         | 76859/1000000 [3:31:10<25:51:27,  9.92it/s]

{'loss': Array(0.15080263, dtype=float32), 'loss_cross_entropy': Array(0.14180304, dtype=float32)}


  8%|▊         | 76869/1000000 [3:31:11<24:07:48, 10.63it/s]

{'loss': Array(0.13547693, dtype=float32), 'loss_cross_entropy': Array(0.12760496, dtype=float32)}


  8%|▊         | 76879/1000000 [3:31:13<26:36:23,  9.64it/s]

{'loss': Array(0.18439512, dtype=float32), 'loss_cross_entropy': Array(0.17403637, dtype=float32)}


  8%|▊         | 76889/1000000 [3:31:14<24:23:50, 10.51it/s]

{'loss': Array(0.17897467, dtype=float32), 'loss_cross_entropy': Array(0.16914289, dtype=float32)}


  8%|▊         | 76899/1000000 [3:31:16<30:35:40,  8.38it/s]

{'loss': Array(0.16831818, dtype=float32), 'loss_cross_entropy': Array(0.1583006, dtype=float32)}


  8%|▊         | 76909/1000000 [3:31:17<24:47:54, 10.34it/s]

{'loss': Array(0.14319703, dtype=float32), 'loss_cross_entropy': Array(0.13435203, dtype=float32)}


  8%|▊         | 76919/1000000 [3:31:19<37:45:47,  6.79it/s]

{'loss': Array(0.14627947, dtype=float32), 'loss_cross_entropy': Array(0.13698842, dtype=float32)}


  8%|▊         | 76929/1000000 [3:31:20<25:47:52,  9.94it/s]

{'loss': Array(0.14657693, dtype=float32), 'loss_cross_entropy': Array(0.13810737, dtype=float32)}


  8%|▊         | 76939/1000000 [3:31:22<24:09:29, 10.61it/s]

{'loss': Array(0.17135993, dtype=float32), 'loss_cross_entropy': Array(0.16145916, dtype=float32)}


  8%|▊         | 76949/1000000 [3:31:23<30:58:03,  8.28it/s]

{'loss': Array(0.16063988, dtype=float32), 'loss_cross_entropy': Array(0.15084134, dtype=float32)}


  8%|▊         | 76959/1000000 [3:31:25<25:05:03, 10.22it/s]

{'loss': Array(0.15766747, dtype=float32), 'loss_cross_entropy': Array(0.1482193, dtype=float32)}


  8%|▊         | 76969/1000000 [3:31:26<34:10:28,  7.50it/s]

{'loss': Array(0.15338705, dtype=float32), 'loss_cross_entropy': Array(0.14462374, dtype=float32)}


  8%|▊         | 76979/1000000 [3:31:28<25:20:07, 10.12it/s]

{'loss': Array(0.14759578, dtype=float32), 'loss_cross_entropy': Array(0.13843565, dtype=float32)}


  8%|▊         | 76989/1000000 [3:31:30<34:27:21,  7.44it/s]

{'loss': Array(0.1613322, dtype=float32), 'loss_cross_entropy': Array(0.15156195, dtype=float32)}


  8%|▊         | 76999/1000000 [3:31:31<25:54:41,  9.89it/s]

{'loss': Array(0.154683, dtype=float32), 'loss_cross_entropy': Array(0.14470012, dtype=float32)}


  8%|▊         | 77009/1000000 [3:31:39<84:21:33,  3.04it/s] 

{'loss': Array(0.15505347, dtype=float32), 'loss_cross_entropy': Array(0.14577416, dtype=float32)}


  8%|▊         | 77019/1000000 [3:31:41<38:55:16,  6.59it/s]

{'loss': Array(0.17138426, dtype=float32), 'loss_cross_entropy': Array(0.16138205, dtype=float32)}


  8%|▊         | 77029/1000000 [3:31:42<26:01:34,  9.85it/s]

{'loss': Array(0.15094405, dtype=float32), 'loss_cross_entropy': Array(0.14207803, dtype=float32)}


  8%|▊         | 77039/1000000 [3:31:44<34:16:04,  7.48it/s]

{'loss': Array(0.1489065, dtype=float32), 'loss_cross_entropy': Array(0.14015523, dtype=float32)}


  8%|▊         | 77049/1000000 [3:31:45<25:03:19, 10.23it/s]

{'loss': Array(0.15497737, dtype=float32), 'loss_cross_entropy': Array(0.14604805, dtype=float32)}


  8%|▊         | 77059/1000000 [3:31:47<37:23:36,  6.86it/s]

{'loss': Array(0.13756149, dtype=float32), 'loss_cross_entropy': Array(0.12894551, dtype=float32)}


  8%|▊         | 77069/1000000 [3:31:48<25:57:28,  9.88it/s]

{'loss': Array(0.15351255, dtype=float32), 'loss_cross_entropy': Array(0.14483671, dtype=float32)}


  8%|▊         | 77079/1000000 [3:31:49<24:33:43, 10.44it/s]

{'loss': Array(0.15501171, dtype=float32), 'loss_cross_entropy': Array(0.14609663, dtype=float32)}


  8%|▊         | 77089/1000000 [3:31:51<31:02:36,  8.26it/s]

{'loss': Array(0.1645772, dtype=float32), 'loss_cross_entropy': Array(0.1547176, dtype=float32)}


  8%|▊         | 77099/1000000 [3:31:52<24:49:22, 10.33it/s]

{'loss': Array(0.15679097, dtype=float32), 'loss_cross_entropy': Array(0.14829595, dtype=float32)}


  8%|▊         | 77109/1000000 [3:31:54<33:21:27,  7.69it/s]

{'loss': Array(0.16242301, dtype=float32), 'loss_cross_entropy': Array(0.15270036, dtype=float32)}


  8%|▊         | 77119/1000000 [3:31:56<25:54:40,  9.89it/s]

{'loss': Array(0.14830582, dtype=float32), 'loss_cross_entropy': Array(0.13992405, dtype=float32)}


  8%|▊         | 77129/1000000 [3:31:57<24:34:58, 10.43it/s]

{'loss': Array(0.15592466, dtype=float32), 'loss_cross_entropy': Array(0.147125, dtype=float32)}


  8%|▊         | 77139/1000000 [3:31:59<26:52:27,  9.54it/s]

{'loss': Array(0.15240093, dtype=float32), 'loss_cross_entropy': Array(0.14426027, dtype=float32)}


  8%|▊         | 77149/1000000 [3:32:00<25:03:21, 10.23it/s]

{'loss': Array(0.15952232, dtype=float32), 'loss_cross_entropy': Array(0.1500696, dtype=float32)}


  8%|▊         | 77159/1000000 [3:32:02<38:29:39,  6.66it/s]

{'loss': Array(0.14002524, dtype=float32), 'loss_cross_entropy': Array(0.13211034, dtype=float32)}


  8%|▊         | 77169/1000000 [3:32:03<27:04:09,  9.47it/s]

{'loss': Array(0.15045963, dtype=float32), 'loss_cross_entropy': Array(0.14127506, dtype=float32)}


  8%|▊         | 77179/1000000 [3:32:05<25:22:42, 10.10it/s]

{'loss': Array(0.13802247, dtype=float32), 'loss_cross_entropy': Array(0.13002302, dtype=float32)}


  8%|▊         | 77189/1000000 [3:32:06<31:50:17,  8.05it/s]

{'loss': Array(0.16567259, dtype=float32), 'loss_cross_entropy': Array(0.15608008, dtype=float32)}


  8%|▊         | 77199/1000000 [3:32:08<26:20:03,  9.73it/s]

{'loss': Array(0.16007952, dtype=float32), 'loss_cross_entropy': Array(0.15020376, dtype=float32)}


  8%|▊         | 77209/1000000 [3:32:10<34:49:01,  7.36it/s]

{'loss': Array(0.14667171, dtype=float32), 'loss_cross_entropy': Array(0.13771403, dtype=float32)}


  8%|▊         | 77219/1000000 [3:32:11<26:17:32,  9.75it/s]

{'loss': Array(0.15508954, dtype=float32), 'loss_cross_entropy': Array(0.14576031, dtype=float32)}


  8%|▊         | 77229/1000000 [3:32:13<34:31:30,  7.42it/s]

{'loss': Array(0.16451831, dtype=float32), 'loss_cross_entropy': Array(0.15464254, dtype=float32)}


  8%|▊         | 77239/1000000 [3:32:14<25:20:56, 10.11it/s]

{'loss': Array(0.15086794, dtype=float32), 'loss_cross_entropy': Array(0.14171286, dtype=float32)}


  8%|▊         | 77249/1000000 [3:32:15<24:13:47, 10.58it/s]

{'loss': Array(0.14208046, dtype=float32), 'loss_cross_entropy': Array(0.1339367, dtype=float32)}


  8%|▊         | 77259/1000000 [3:32:17<28:53:40,  8.87it/s]

{'loss': Array(0.16729303, dtype=float32), 'loss_cross_entropy': Array(0.15728071, dtype=float32)}


  8%|▊         | 77269/1000000 [3:32:19<24:52:40, 10.30it/s]

{'loss': Array(0.14470048, dtype=float32), 'loss_cross_entropy': Array(0.13599724, dtype=float32)}


  8%|▊         | 77279/1000000 [3:32:20<30:46:34,  8.33it/s]

{'loss': Array(0.17251945, dtype=float32), 'loss_cross_entropy': Array(0.16218634, dtype=float32)}


  8%|▊         | 77289/1000000 [3:32:22<25:19:14, 10.12it/s]

{'loss': Array(0.1604567, dtype=float32), 'loss_cross_entropy': Array(0.15107468, dtype=float32)}


  8%|▊         | 77299/1000000 [3:32:23<37:52:15,  6.77it/s]

{'loss': Array(0.15868586, dtype=float32), 'loss_cross_entropy': Array(0.14953244, dtype=float32)}


  8%|▊         | 77309/1000000 [3:32:25<25:47:16,  9.94it/s]

{'loss': Array(0.16675544, dtype=float32), 'loss_cross_entropy': Array(0.15700136, dtype=float32)}


  8%|▊         | 77319/1000000 [3:32:26<37:48:21,  6.78it/s]

{'loss': Array(0.15089753, dtype=float32), 'loss_cross_entropy': Array(0.14195244, dtype=float32)}


  8%|▊         | 77329/1000000 [3:32:28<26:38:13,  9.62it/s]

{'loss': Array(0.1372385, dtype=float32), 'loss_cross_entropy': Array(0.12835646, dtype=float32)}


  8%|▊         | 77339/1000000 [3:32:29<24:14:06, 10.58it/s]

{'loss': Array(0.15400825, dtype=float32), 'loss_cross_entropy': Array(0.14480706, dtype=float32)}


  8%|▊         | 77349/1000000 [3:32:31<32:56:13,  7.78it/s]

{'loss': Array(0.15515625, dtype=float32), 'loss_cross_entropy': Array(0.14591388, dtype=float32)}


  8%|▊         | 77359/1000000 [3:32:32<25:22:07, 10.10it/s]

{'loss': Array(0.1535704, dtype=float32), 'loss_cross_entropy': Array(0.14401926, dtype=float32)}


  8%|▊         | 77369/1000000 [3:32:34<37:44:30,  6.79it/s]

{'loss': Array(0.16360985, dtype=float32), 'loss_cross_entropy': Array(0.1539764, dtype=float32)}


  8%|▊         | 77379/1000000 [3:32:35<26:25:43,  9.70it/s]

{'loss': Array(0.16668245, dtype=float32), 'loss_cross_entropy': Array(0.15688081, dtype=float32)}


  8%|▊         | 77389/1000000 [3:32:37<24:07:11, 10.63it/s]

{'loss': Array(0.16729794, dtype=float32), 'loss_cross_entropy': Array(0.15756123, dtype=float32)}


  8%|▊         | 77399/1000000 [3:32:38<27:07:16,  9.45it/s]

{'loss': Array(0.16192421, dtype=float32), 'loss_cross_entropy': Array(0.15235086, dtype=float32)}


  8%|▊         | 77409/1000000 [3:32:40<24:28:21, 10.47it/s]

{'loss': Array(0.15227102, dtype=float32), 'loss_cross_entropy': Array(0.1425777, dtype=float32)}


  8%|▊         | 77419/1000000 [3:32:41<27:06:35,  9.45it/s]

{'loss': Array(0.14590953, dtype=float32), 'loss_cross_entropy': Array(0.13758992, dtype=float32)}


  8%|▊         | 77429/1000000 [3:32:43<24:02:57, 10.66it/s]

{'loss': Array(0.14178638, dtype=float32), 'loss_cross_entropy': Array(0.13372976, dtype=float32)}


  8%|▊         | 77439/1000000 [3:32:44<37:44:32,  6.79it/s]

{'loss': Array(0.14642398, dtype=float32), 'loss_cross_entropy': Array(0.13773537, dtype=float32)}


  8%|▊         | 77449/1000000 [3:32:46<25:48:56,  9.93it/s]

{'loss': Array(0.14362405, dtype=float32), 'loss_cross_entropy': Array(0.13449572, dtype=float32)}


  8%|▊         | 77459/1000000 [3:32:47<24:07:46, 10.62it/s]

{'loss': Array(0.15891282, dtype=float32), 'loss_cross_entropy': Array(0.14967464, dtype=float32)}


  8%|▊         | 77469/1000000 [3:32:49<30:57:48,  8.28it/s]

{'loss': Array(0.1291676, dtype=float32), 'loss_cross_entropy': Array(0.12174205, dtype=float32)}


  8%|▊         | 77479/1000000 [3:32:50<25:05:40, 10.21it/s]

{'loss': Array(0.15510857, dtype=float32), 'loss_cross_entropy': Array(0.1465985, dtype=float32)}


  8%|▊         | 77489/1000000 [3:32:52<37:53:03,  6.76it/s]

{'loss': Array(0.1431818, dtype=float32), 'loss_cross_entropy': Array(0.13461973, dtype=float32)}


  8%|▊         | 77499/1000000 [3:32:53<27:08:26,  9.44it/s]

{'loss': Array(0.14020085, dtype=float32), 'loss_cross_entropy': Array(0.13186452, dtype=float32)}


  8%|▊         | 77509/1000000 [3:33:01<85:48:39,  2.99it/s] 

{'loss': Array(0.13687882, dtype=float32), 'loss_cross_entropy': Array(0.12863675, dtype=float32)}


  8%|▊         | 77519/1000000 [3:33:03<39:38:39,  6.46it/s]

{'loss': Array(0.1533473, dtype=float32), 'loss_cross_entropy': Array(0.14446582, dtype=float32)}


  8%|▊         | 77529/1000000 [3:33:04<27:19:31,  9.38it/s]

{'loss': Array(0.15220849, dtype=float32), 'loss_cross_entropy': Array(0.14284907, dtype=float32)}


  8%|▊         | 77539/1000000 [3:33:06<32:04:14,  7.99it/s]

{'loss': Array(0.15528496, dtype=float32), 'loss_cross_entropy': Array(0.1465536, dtype=float32)}


  8%|▊         | 77549/1000000 [3:33:08<27:25:42,  9.34it/s]

{'loss': Array(0.17821203, dtype=float32), 'loss_cross_entropy': Array(0.16841617, dtype=float32)}


  8%|▊         | 77559/1000000 [3:33:09<39:31:22,  6.48it/s]

{'loss': Array(0.15530321, dtype=float32), 'loss_cross_entropy': Array(0.14654276, dtype=float32)}


  8%|▊         | 77569/1000000 [3:33:11<26:03:51,  9.83it/s]

{'loss': Array(0.12440001, dtype=float32), 'loss_cross_entropy': Array(0.1165874, dtype=float32)}


  8%|▊         | 77579/1000000 [3:33:12<24:07:37, 10.62it/s]

{'loss': Array(0.1338816, dtype=float32), 'loss_cross_entropy': Array(0.12551373, dtype=float32)}


  8%|▊         | 77589/1000000 [3:33:14<26:53:10,  9.53it/s]

{'loss': Array(0.17140289, dtype=float32), 'loss_cross_entropy': Array(0.16152315, dtype=float32)}


  8%|▊         | 77599/1000000 [3:33:15<23:50:51, 10.74it/s]

{'loss': Array(0.14788543, dtype=float32), 'loss_cross_entropy': Array(0.13906014, dtype=float32)}


  8%|▊         | 77609/1000000 [3:33:17<38:09:55,  6.71it/s]

{'loss': Array(0.13974188, dtype=float32), 'loss_cross_entropy': Array(0.13127196, dtype=float32)}


  8%|▊         | 77619/1000000 [3:33:18<26:25:40,  9.69it/s]

{'loss': Array(0.14010867, dtype=float32), 'loss_cross_entropy': Array(0.13120596, dtype=float32)}


  8%|▊         | 77629/1000000 [3:33:20<24:07:35, 10.62it/s]

{'loss': Array(0.15356289, dtype=float32), 'loss_cross_entropy': Array(0.14442785, dtype=float32)}


  8%|▊         | 77639/1000000 [3:33:21<28:10:01,  9.10it/s]

{'loss': Array(0.16664176, dtype=float32), 'loss_cross_entropy': Array(0.15729605, dtype=float32)}


  8%|▊         | 77649/1000000 [3:33:23<24:10:52, 10.60it/s]

{'loss': Array(0.16741335, dtype=float32), 'loss_cross_entropy': Array(0.15748312, dtype=float32)}


  8%|▊         | 77659/1000000 [3:33:24<32:55:25,  7.78it/s]

{'loss': Array(0.16480252, dtype=float32), 'loss_cross_entropy': Array(0.15529658, dtype=float32)}


  8%|▊         | 77669/1000000 [3:33:26<25:21:05, 10.11it/s]

{'loss': Array(0.16181402, dtype=float32), 'loss_cross_entropy': Array(0.15258615, dtype=float32)}


  8%|▊         | 77679/1000000 [3:33:27<38:49:26,  6.60it/s]

{'loss': Array(0.1674763, dtype=float32), 'loss_cross_entropy': Array(0.15772438, dtype=float32)}


  8%|▊         | 77689/1000000 [3:33:29<26:21:31,  9.72it/s]

{'loss': Array(0.13888286, dtype=float32), 'loss_cross_entropy': Array(0.13062663, dtype=float32)}


  8%|▊         | 77699/1000000 [3:33:30<23:55:25, 10.71it/s]

{'loss': Array(0.17178781, dtype=float32), 'loss_cross_entropy': Array(0.16134588, dtype=float32)}


  8%|▊         | 77709/1000000 [3:33:32<30:15:17,  8.47it/s]

{'loss': Array(0.17339765, dtype=float32), 'loss_cross_entropy': Array(0.16353236, dtype=float32)}


  8%|▊         | 77719/1000000 [3:33:33<24:44:02, 10.36it/s]

{'loss': Array(0.14761369, dtype=float32), 'loss_cross_entropy': Array(0.13940339, dtype=float32)}


  8%|▊         | 77729/1000000 [3:33:35<33:02:45,  7.75it/s]

{'loss': Array(0.15118414, dtype=float32), 'loss_cross_entropy': Array(0.14190553, dtype=float32)}


  8%|▊         | 77739/1000000 [3:33:36<25:04:09, 10.22it/s]

{'loss': Array(0.15115444, dtype=float32), 'loss_cross_entropy': Array(0.14223431, dtype=float32)}


  8%|▊         | 77749/1000000 [3:33:38<24:37:19, 10.40it/s]

{'loss': Array(0.15069439, dtype=float32), 'loss_cross_entropy': Array(0.141757, dtype=float32)}


  8%|▊         | 77759/1000000 [3:33:39<27:02:06,  9.48it/s]

{'loss': Array(0.17988005, dtype=float32), 'loss_cross_entropy': Array(0.16980468, dtype=float32)}


  8%|▊         | 77769/1000000 [3:33:41<23:52:21, 10.73it/s]

{'loss': Array(0.14763546, dtype=float32), 'loss_cross_entropy': Array(0.13891934, dtype=float32)}


  8%|▊         | 77779/1000000 [3:33:42<37:05:32,  6.91it/s]

{'loss': Array(0.15741585, dtype=float32), 'loss_cross_entropy': Array(0.14885032, dtype=float32)}


  8%|▊         | 77789/1000000 [3:33:44<26:41:06,  9.60it/s]

{'loss': Array(0.16014789, dtype=float32), 'loss_cross_entropy': Array(0.15010394, dtype=float32)}


  8%|▊         | 77799/1000000 [3:33:45<24:04:19, 10.64it/s]

{'loss': Array(0.1587964, dtype=float32), 'loss_cross_entropy': Array(0.14974968, dtype=float32)}


  8%|▊         | 77809/1000000 [3:33:47<30:45:25,  8.33it/s]

{'loss': Array(0.14447956, dtype=float32), 'loss_cross_entropy': Array(0.13532518, dtype=float32)}


  8%|▊         | 77819/1000000 [3:33:48<25:16:22, 10.14it/s]

{'loss': Array(0.15035689, dtype=float32), 'loss_cross_entropy': Array(0.14132537, dtype=float32)}


  8%|▊         | 77829/1000000 [3:33:50<33:36:49,  7.62it/s]

{'loss': Array(0.12888823, dtype=float32), 'loss_cross_entropy': Array(0.12062395, dtype=float32)}


  8%|▊         | 77839/1000000 [3:33:51<25:00:28, 10.24it/s]

{'loss': Array(0.16433333, dtype=float32), 'loss_cross_entropy': Array(0.15491498, dtype=float32)}


  8%|▊         | 77849/1000000 [3:33:53<33:33:31,  7.63it/s]

{'loss': Array(0.15633826, dtype=float32), 'loss_cross_entropy': Array(0.14748488, dtype=float32)}


  8%|▊         | 77859/1000000 [3:33:54<25:28:36, 10.05it/s]

{'loss': Array(0.16446419, dtype=float32), 'loss_cross_entropy': Array(0.1550541, dtype=float32)}


  8%|▊         | 77869/1000000 [3:33:56<26:43:53,  9.58it/s]

{'loss': Array(0.15275693, dtype=float32), 'loss_cross_entropy': Array(0.14399762, dtype=float32)}


  8%|▊         | 77879/1000000 [3:33:57<29:00:29,  8.83it/s]

{'loss': Array(0.18410897, dtype=float32), 'loss_cross_entropy': Array(0.17336427, dtype=float32)}


  8%|▊         | 77889/1000000 [3:33:59<25:41:12,  9.97it/s]

{'loss': Array(0.15969048, dtype=float32), 'loss_cross_entropy': Array(0.14998402, dtype=float32)}


  8%|▊         | 77899/1000000 [3:34:01<32:20:55,  7.92it/s]

{'loss': Array(0.14050886, dtype=float32), 'loss_cross_entropy': Array(0.13237973, dtype=float32)}


  8%|▊         | 77909/1000000 [3:34:02<26:02:13,  9.84it/s]

{'loss': Array(0.1574261, dtype=float32), 'loss_cross_entropy': Array(0.14776237, dtype=float32)}


  8%|▊         | 77919/1000000 [3:34:04<39:12:52,  6.53it/s]

{'loss': Array(0.16533318, dtype=float32), 'loss_cross_entropy': Array(0.15508653, dtype=float32)}


  8%|▊         | 77929/1000000 [3:34:05<27:49:21,  9.21it/s]

{'loss': Array(0.15993658, dtype=float32), 'loss_cross_entropy': Array(0.15122512, dtype=float32)}


  8%|▊         | 77939/1000000 [3:34:07<40:04:56,  6.39it/s]

{'loss': Array(0.14469098, dtype=float32), 'loss_cross_entropy': Array(0.13541095, dtype=float32)}


  8%|▊         | 77949/1000000 [3:34:09<27:16:37,  9.39it/s]

{'loss': Array(0.16116945, dtype=float32), 'loss_cross_entropy': Array(0.15151837, dtype=float32)}


  8%|▊         | 77959/1000000 [3:34:10<24:58:55, 10.25it/s]

{'loss': Array(0.14288679, dtype=float32), 'loss_cross_entropy': Array(0.13410935, dtype=float32)}


  8%|▊         | 77969/1000000 [3:34:12<33:22:58,  7.67it/s]

{'loss': Array(0.15790138, dtype=float32), 'loss_cross_entropy': Array(0.14848922, dtype=float32)}


  8%|▊         | 77979/1000000 [3:34:13<25:14:37, 10.15it/s]

{'loss': Array(0.15929621, dtype=float32), 'loss_cross_entropy': Array(0.15029152, dtype=float32)}


  8%|▊         | 77989/1000000 [3:34:15<37:30:32,  6.83it/s]

{'loss': Array(0.1238713, dtype=float32), 'loss_cross_entropy': Array(0.11619818, dtype=float32)}


  8%|▊         | 77999/1000000 [3:34:16<26:07:09,  9.81it/s]

{'loss': Array(0.17514887, dtype=float32), 'loss_cross_entropy': Array(0.16524442, dtype=float32)}


  8%|▊         | 78009/1000000 [3:34:24<86:36:08,  2.96it/s] 

{'loss': Array(0.1310402, dtype=float32), 'loss_cross_entropy': Array(0.12301972, dtype=float32)}


  8%|▊         | 78019/1000000 [3:34:26<37:38:20,  6.80it/s] 

{'loss': Array(0.14707556, dtype=float32), 'loss_cross_entropy': Array(0.13804547, dtype=float32)}


  8%|▊         | 78029/1000000 [3:34:27<25:38:28,  9.99it/s]

{'loss': Array(0.140526, dtype=float32), 'loss_cross_entropy': Array(0.1322131, dtype=float32)}


  8%|▊         | 78039/1000000 [3:34:29<26:53:10,  9.53it/s]

{'loss': Array(0.1612513, dtype=float32), 'loss_cross_entropy': Array(0.15241902, dtype=float32)}


  8%|▊         | 78049/1000000 [3:34:30<24:14:15, 10.57it/s]

{'loss': Array(0.15770124, dtype=float32), 'loss_cross_entropy': Array(0.14847495, dtype=float32)}


  8%|▊         | 78059/1000000 [3:34:32<37:56:46,  6.75it/s]

{'loss': Array(0.14516388, dtype=float32), 'loss_cross_entropy': Array(0.13641071, dtype=float32)}


  8%|▊         | 78069/1000000 [3:34:33<26:24:05,  9.70it/s]

{'loss': Array(0.13607097, dtype=float32), 'loss_cross_entropy': Array(0.12781289, dtype=float32)}


  8%|▊         | 78079/1000000 [3:34:35<23:53:52, 10.72it/s]

{'loss': Array(0.14294007, dtype=float32), 'loss_cross_entropy': Array(0.13488397, dtype=float32)}


  8%|▊         | 78089/1000000 [3:34:36<30:40:25,  8.35it/s]

{'loss': Array(0.15945527, dtype=float32), 'loss_cross_entropy': Array(0.14985196, dtype=float32)}


  8%|▊         | 78099/1000000 [3:34:38<25:05:02, 10.21it/s]

{'loss': Array(0.14919701, dtype=float32), 'loss_cross_entropy': Array(0.14089482, dtype=float32)}


  8%|▊         | 78109/1000000 [3:34:40<37:28:53,  6.83it/s]

{'loss': Array(0.13866499, dtype=float32), 'loss_cross_entropy': Array(0.13038993, dtype=float32)}


  8%|▊         | 78119/1000000 [3:34:41<25:48:04,  9.93it/s]

{'loss': Array(0.1580455, dtype=float32), 'loss_cross_entropy': Array(0.14872594, dtype=float32)}


  8%|▊         | 78129/1000000 [3:34:42<23:54:59, 10.71it/s]

{'loss': Array(0.15852123, dtype=float32), 'loss_cross_entropy': Array(0.14941281, dtype=float32)}


  8%|▊         | 78139/1000000 [3:34:44<28:19:19,  9.04it/s]

{'loss': Array(0.14864142, dtype=float32), 'loss_cross_entropy': Array(0.14016457, dtype=float32)}


  8%|▊         | 78149/1000000 [3:34:45<24:08:44, 10.61it/s]

{'loss': Array(0.15088753, dtype=float32), 'loss_cross_entropy': Array(0.1416987, dtype=float32)}


  8%|▊         | 78159/1000000 [3:34:47<33:14:42,  7.70it/s]

{'loss': Array(0.13229942, dtype=float32), 'loss_cross_entropy': Array(0.12454257, dtype=float32)}


  8%|▊         | 78169/1000000 [3:34:48<25:10:59, 10.17it/s]

{'loss': Array(0.16224752, dtype=float32), 'loss_cross_entropy': Array(0.15283144, dtype=float32)}


  8%|▊         | 78179/1000000 [3:34:50<24:03:03, 10.65it/s]

{'loss': Array(0.159701, dtype=float32), 'loss_cross_entropy': Array(0.15050423, dtype=float32)}


  8%|▊         | 78189/1000000 [3:34:51<30:19:27,  8.44it/s]

{'loss': Array(0.16097394, dtype=float32), 'loss_cross_entropy': Array(0.15187901, dtype=float32)}


  8%|▊         | 78199/1000000 [3:34:53<24:56:31, 10.27it/s]

{'loss': Array(0.16036908, dtype=float32), 'loss_cross_entropy': Array(0.1522026, dtype=float32)}


  8%|▊         | 78209/1000000 [3:34:54<24:05:35, 10.63it/s]

{'loss': Array(0.13959123, dtype=float32), 'loss_cross_entropy': Array(0.13116315, dtype=float32)}


  8%|▊         | 78219/1000000 [3:34:56<27:13:08,  9.41it/s]

{'loss': Array(0.16667467, dtype=float32), 'loss_cross_entropy': Array(0.15682527, dtype=float32)}


  8%|▊         | 78229/1000000 [3:34:57<24:09:31, 10.60it/s]

{'loss': Array(0.13679747, dtype=float32), 'loss_cross_entropy': Array(0.12864588, dtype=float32)}


  8%|▊         | 78239/1000000 [3:34:59<30:36:29,  8.37it/s]

{'loss': Array(0.16151755, dtype=float32), 'loss_cross_entropy': Array(0.15228985, dtype=float32)}


  8%|▊         | 78249/1000000 [3:35:00<25:42:18,  9.96it/s]

{'loss': Array(0.13291338, dtype=float32), 'loss_cross_entropy': Array(0.12459821, dtype=float32)}


  8%|▊         | 78259/1000000 [3:35:02<40:03:33,  6.39it/s]

{'loss': Array(0.13969631, dtype=float32), 'loss_cross_entropy': Array(0.1317976, dtype=float32)}


  8%|▊         | 78269/1000000 [3:35:04<27:40:37,  9.25it/s]

{'loss': Array(0.14208232, dtype=float32), 'loss_cross_entropy': Array(0.13377906, dtype=float32)}


  8%|▊         | 78279/1000000 [3:35:05<38:46:04,  6.60it/s]

{'loss': Array(0.1324707, dtype=float32), 'loss_cross_entropy': Array(0.12472292, dtype=float32)}


  8%|▊         | 78289/1000000 [3:35:07<27:15:32,  9.39it/s]

{'loss': Array(0.15053019, dtype=float32), 'loss_cross_entropy': Array(0.14135353, dtype=float32)}


  8%|▊         | 78299/1000000 [3:35:08<25:19:46, 10.11it/s]

{'loss': Array(0.14857273, dtype=float32), 'loss_cross_entropy': Array(0.13966696, dtype=float32)}


  8%|▊         | 78309/1000000 [3:35:10<33:44:34,  7.59it/s]

{'loss': Array(0.13698152, dtype=float32), 'loss_cross_entropy': Array(0.12824476, dtype=float32)}


  8%|▊         | 78319/1000000 [3:35:11<25:39:44,  9.98it/s]

{'loss': Array(0.15264194, dtype=float32), 'loss_cross_entropy': Array(0.14391686, dtype=float32)}


  8%|▊         | 78329/1000000 [3:35:13<38:05:37,  6.72it/s]

{'loss': Array(0.15497111, dtype=float32), 'loss_cross_entropy': Array(0.14586918, dtype=float32)}


  8%|▊         | 78339/1000000 [3:35:14<25:43:19,  9.95it/s]

{'loss': Array(0.15989794, dtype=float32), 'loss_cross_entropy': Array(0.15034358, dtype=float32)}


  8%|▊         | 78349/1000000 [3:35:16<23:59:29, 10.67it/s]

{'loss': Array(0.1618847, dtype=float32), 'loss_cross_entropy': Array(0.15242118, dtype=float32)}


  8%|▊         | 78359/1000000 [3:35:17<26:33:24,  9.64it/s]

{'loss': Array(0.16282356, dtype=float32), 'loss_cross_entropy': Array(0.15284407, dtype=float32)}


  8%|▊         | 78369/1000000 [3:35:19<25:06:11, 10.20it/s]

{'loss': Array(0.16512525, dtype=float32), 'loss_cross_entropy': Array(0.155507, dtype=float32)}


  8%|▊         | 78379/1000000 [3:35:20<26:38:35,  9.61it/s]

{'loss': Array(0.16570656, dtype=float32), 'loss_cross_entropy': Array(0.1554545, dtype=float32)}


  8%|▊         | 78389/1000000 [3:35:22<23:42:55, 10.79it/s]

{'loss': Array(0.132538, dtype=float32), 'loss_cross_entropy': Array(0.12351614, dtype=float32)}


  8%|▊         | 78399/1000000 [3:35:23<37:43:32,  6.79it/s]

{'loss': Array(0.14401378, dtype=float32), 'loss_cross_entropy': Array(0.13510099, dtype=float32)}


  8%|▊         | 78409/1000000 [3:35:25<26:39:28,  9.60it/s]

{'loss': Array(0.1562517, dtype=float32), 'loss_cross_entropy': Array(0.14730231, dtype=float32)}


  8%|▊         | 78419/1000000 [3:35:26<25:13:39, 10.15it/s]

{'loss': Array(0.15035501, dtype=float32), 'loss_cross_entropy': Array(0.14225338, dtype=float32)}


  8%|▊         | 78429/1000000 [3:35:28<30:34:20,  8.37it/s]

{'loss': Array(0.17385207, dtype=float32), 'loss_cross_entropy': Array(0.16378616, dtype=float32)}


  8%|▊         | 78439/1000000 [3:35:29<24:24:10, 10.49it/s]

{'loss': Array(0.156542, dtype=float32), 'loss_cross_entropy': Array(0.14783067, dtype=float32)}


  8%|▊         | 78449/1000000 [3:35:31<37:14:39,  6.87it/s]

{'loss': Array(0.15019466, dtype=float32), 'loss_cross_entropy': Array(0.14162864, dtype=float32)}


  8%|▊         | 78459/1000000 [3:35:32<26:59:25,  9.48it/s]

{'loss': Array(0.14980245, dtype=float32), 'loss_cross_entropy': Array(0.14112748, dtype=float32)}


  8%|▊         | 78469/1000000 [3:35:34<24:31:10, 10.44it/s]

{'loss': Array(0.15550463, dtype=float32), 'loss_cross_entropy': Array(0.1465917, dtype=float32)}


  8%|▊         | 78479/1000000 [3:35:35<28:20:33,  9.03it/s]

{'loss': Array(0.16292553, dtype=float32), 'loss_cross_entropy': Array(0.15345351, dtype=float32)}


  8%|▊         | 78489/1000000 [3:35:37<24:06:25, 10.62it/s]

{'loss': Array(0.15518807, dtype=float32), 'loss_cross_entropy': Array(0.14589284, dtype=float32)}


  8%|▊         | 78499/1000000 [3:35:38<33:52:18,  7.56it/s]

{'loss': Array(0.1657763, dtype=float32), 'loss_cross_entropy': Array(0.15616597, dtype=float32)}


  8%|▊         | 78509/1000000 [3:35:46<86:18:45,  2.97it/s] 

{'loss': Array(0.16436882, dtype=float32), 'loss_cross_entropy': Array(0.1547371, dtype=float32)}


  8%|▊         | 78519/1000000 [3:35:48<43:39:10,  5.86it/s]

{'loss': Array(0.12699375, dtype=float32), 'loss_cross_entropy': Array(0.11933768, dtype=float32)}


  8%|▊         | 78529/1000000 [3:35:49<27:22:11,  9.35it/s]

{'loss': Array(0.15170729, dtype=float32), 'loss_cross_entropy': Array(0.14224169, dtype=float32)}


  8%|▊         | 78539/1000000 [3:35:51<24:06:34, 10.62it/s]

{'loss': Array(0.14480136, dtype=float32), 'loss_cross_entropy': Array(0.13630678, dtype=float32)}


  8%|▊         | 78549/1000000 [3:35:53<31:01:04,  8.25it/s]

{'loss': Array(0.15634814, dtype=float32), 'loss_cross_entropy': Array(0.14696588, dtype=float32)}


  8%|▊         | 78559/1000000 [3:35:54<24:51:06, 10.30it/s]

{'loss': Array(0.15923728, dtype=float32), 'loss_cross_entropy': Array(0.15010898, dtype=float32)}


  8%|▊         | 78569/1000000 [3:35:55<23:38:26, 10.83it/s]

{'loss': Array(0.15824217, dtype=float32), 'loss_cross_entropy': Array(0.149386, dtype=float32)}


  8%|▊         | 78579/1000000 [3:35:57<27:42:01,  9.24it/s]

{'loss': Array(0.1446708, dtype=float32), 'loss_cross_entropy': Array(0.1364494, dtype=float32)}


  8%|▊         | 78589/1000000 [3:35:58<24:59:09, 10.24it/s]

{'loss': Array(0.13110831, dtype=float32), 'loss_cross_entropy': Array(0.12328339, dtype=float32)}


  8%|▊         | 78599/1000000 [3:36:00<32:05:09,  7.98it/s]

{'loss': Array(0.14864887, dtype=float32), 'loss_cross_entropy': Array(0.14006402, dtype=float32)}


  8%|▊         | 78609/1000000 [3:36:02<25:19:35, 10.11it/s]

{'loss': Array(0.14438777, dtype=float32), 'loss_cross_entropy': Array(0.13558064, dtype=float32)}


  8%|▊         | 78619/1000000 [3:36:03<34:53:26,  7.34it/s]

{'loss': Array(0.13858213, dtype=float32), 'loss_cross_entropy': Array(0.12990288, dtype=float32)}


  8%|▊         | 78629/1000000 [3:36:05<26:10:39,  9.78it/s]

{'loss': Array(0.14000723, dtype=float32), 'loss_cross_entropy': Array(0.1307122, dtype=float32)}


  8%|▊         | 78639/1000000 [3:36:06<24:58:37, 10.25it/s]

{'loss': Array(0.14678924, dtype=float32), 'loss_cross_entropy': Array(0.13864154, dtype=float32)}


  8%|▊         | 78649/1000000 [3:36:08<32:04:07,  7.98it/s]

{'loss': Array(0.1633793, dtype=float32), 'loss_cross_entropy': Array(0.15364936, dtype=float32)}


  8%|▊         | 78659/1000000 [3:36:09<25:57:33,  9.86it/s]

{'loss': Array(0.14516939, dtype=float32), 'loss_cross_entropy': Array(0.13674465, dtype=float32)}


  8%|▊         | 78669/1000000 [3:36:11<24:43:29, 10.35it/s]

{'loss': Array(0.14769955, dtype=float32), 'loss_cross_entropy': Array(0.13879894, dtype=float32)}


  8%|▊         | 78679/1000000 [3:36:12<27:02:16,  9.47it/s]

{'loss': Array(0.15988588, dtype=float32), 'loss_cross_entropy': Array(0.1508293, dtype=float32)}


  8%|▊         | 78689/1000000 [3:36:14<23:58:13, 10.68it/s]

{'loss': Array(0.15014216, dtype=float32), 'loss_cross_entropy': Array(0.14116596, dtype=float32)}


  8%|▊         | 78699/1000000 [3:36:15<30:37:12,  8.36it/s]

{'loss': Array(0.13149957, dtype=float32), 'loss_cross_entropy': Array(0.12314285, dtype=float32)}


  8%|▊         | 78709/1000000 [3:36:17<25:15:04, 10.13it/s]

{'loss': Array(0.15562603, dtype=float32), 'loss_cross_entropy': Array(0.14661886, dtype=float32)}


  8%|▊         | 78719/1000000 [3:36:19<38:26:43,  6.66it/s]

{'loss': Array(0.15564755, dtype=float32), 'loss_cross_entropy': Array(0.14679232, dtype=float32)}


  8%|▊         | 78729/1000000 [3:36:20<26:09:15,  9.78it/s]

{'loss': Array(0.15782043, dtype=float32), 'loss_cross_entropy': Array(0.14891095, dtype=float32)}


  8%|▊         | 78739/1000000 [3:36:21<24:45:21, 10.34it/s]

{'loss': Array(0.1268228, dtype=float32), 'loss_cross_entropy': Array(0.1191486, dtype=float32)}


  8%|▊         | 78749/1000000 [3:36:23<34:01:53,  7.52it/s]

{'loss': Array(0.15855816, dtype=float32), 'loss_cross_entropy': Array(0.14878517, dtype=float32)}


  8%|▊         | 78759/1000000 [3:36:25<25:45:53,  9.93it/s]

{'loss': Array(0.15413992, dtype=float32), 'loss_cross_entropy': Array(0.14555721, dtype=float32)}


  8%|▊         | 78769/1000000 [3:36:26<23:48:11, 10.75it/s]

{'loss': Array(0.15775223, dtype=float32), 'loss_cross_entropy': Array(0.14878215, dtype=float32)}


  8%|▊         | 78779/1000000 [3:36:28<28:41:59,  8.92it/s]

{'loss': Array(0.15499137, dtype=float32), 'loss_cross_entropy': Array(0.14557166, dtype=float32)}


  8%|▊         | 78789/1000000 [3:36:29<25:01:06, 10.23it/s]

{'loss': Array(0.1487205, dtype=float32), 'loss_cross_entropy': Array(0.14023395, dtype=float32)}


  8%|▊         | 78799/1000000 [3:36:31<31:02:55,  8.24it/s]

{'loss': Array(0.15078124, dtype=float32), 'loss_cross_entropy': Array(0.14198764, dtype=float32)}


  8%|▊         | 78809/1000000 [3:36:32<24:49:43, 10.31it/s]

{'loss': Array(0.13729422, dtype=float32), 'loss_cross_entropy': Array(0.12842552, dtype=float32)}


  8%|▊         | 78819/1000000 [3:36:33<24:23:15, 10.49it/s]

{'loss': Array(0.15323694, dtype=float32), 'loss_cross_entropy': Array(0.14404213, dtype=float32)}


  8%|▊         | 78829/1000000 [3:36:35<26:56:29,  9.50it/s]

{'loss': Array(0.14051221, dtype=float32), 'loss_cross_entropy': Array(0.1326498, dtype=float32)}


  8%|▊         | 78839/1000000 [3:36:37<24:39:48, 10.37it/s]

{'loss': Array(0.15408796, dtype=float32), 'loss_cross_entropy': Array(0.14521377, dtype=float32)}


  8%|▊         | 78849/1000000 [3:36:38<33:42:42,  7.59it/s]

{'loss': Array(0.16227272, dtype=float32), 'loss_cross_entropy': Array(0.15399957, dtype=float32)}


  8%|▊         | 78859/1000000 [3:36:40<24:58:21, 10.25it/s]

{'loss': Array(0.11513581, dtype=float32), 'loss_cross_entropy': Array(0.10764536, dtype=float32)}


  8%|▊         | 78869/1000000 [3:36:41<24:07:40, 10.60it/s]

{'loss': Array(0.15044224, dtype=float32), 'loss_cross_entropy': Array(0.14146939, dtype=float32)}


  8%|▊         | 78879/1000000 [3:36:43<28:08:14,  9.09it/s]

{'loss': Array(0.1440571, dtype=float32), 'loss_cross_entropy': Array(0.13560067, dtype=float32)}


  8%|▊         | 78889/1000000 [3:36:44<24:18:58, 10.52it/s]

{'loss': Array(0.16918012, dtype=float32), 'loss_cross_entropy': Array(0.15945671, dtype=float32)}


  8%|▊         | 78899/1000000 [3:36:46<28:16:33,  9.05it/s]

{'loss': Array(0.12915984, dtype=float32), 'loss_cross_entropy': Array(0.1215634, dtype=float32)}


  8%|▊         | 78909/1000000 [3:36:47<24:41:41, 10.36it/s]

{'loss': Array(0.16127019, dtype=float32), 'loss_cross_entropy': Array(0.15227869, dtype=float32)}


  8%|▊         | 78919/1000000 [3:36:49<38:52:20,  6.58it/s]

{'loss': Array(0.15588783, dtype=float32), 'loss_cross_entropy': Array(0.14677146, dtype=float32)}


  8%|▊         | 78929/1000000 [3:36:50<25:51:42,  9.89it/s]

{'loss': Array(0.16564278, dtype=float32), 'loss_cross_entropy': Array(0.15590371, dtype=float32)}


  8%|▊         | 78939/1000000 [3:36:52<23:44:13, 10.78it/s]

{'loss': Array(0.14679816, dtype=float32), 'loss_cross_entropy': Array(0.13823487, dtype=float32)}


  8%|▊         | 78949/1000000 [3:36:53<30:06:40,  8.50it/s]

{'loss': Array(0.1509248, dtype=float32), 'loss_cross_entropy': Array(0.14262834, dtype=float32)}


  8%|▊         | 78959/1000000 [3:36:55<24:53:14, 10.28it/s]

{'loss': Array(0.16163625, dtype=float32), 'loss_cross_entropy': Array(0.15196536, dtype=float32)}


  8%|▊         | 78969/1000000 [3:36:56<33:41:01,  7.60it/s]

{'loss': Array(0.15490225, dtype=float32), 'loss_cross_entropy': Array(0.1459332, dtype=float32)}


  8%|▊         | 78979/1000000 [3:36:58<25:35:00, 10.00it/s]

{'loss': Array(0.16167693, dtype=float32), 'loss_cross_entropy': Array(0.15237753, dtype=float32)}


  8%|▊         | 78989/1000000 [3:37:00<35:23:39,  7.23it/s]

{'loss': Array(0.1636064, dtype=float32), 'loss_cross_entropy': Array(0.1542959, dtype=float32)}


  8%|▊         | 78999/1000000 [3:37:01<26:06:23,  9.80it/s]

{'loss': Array(0.12872034, dtype=float32), 'loss_cross_entropy': Array(0.12085525, dtype=float32)}


  8%|▊         | 79009/1000000 [3:37:09<85:54:41,  2.98it/s] 

{'loss': Array(0.14451657, dtype=float32), 'loss_cross_entropy': Array(0.13576472, dtype=float32)}


  8%|▊         | 79019/1000000 [3:37:11<39:46:02,  6.43it/s]

{'loss': Array(0.13515049, dtype=float32), 'loss_cross_entropy': Array(0.12689692, dtype=float32)}


  8%|▊         | 79029/1000000 [3:37:12<26:16:53,  9.73it/s]

{'loss': Array(0.15925033, dtype=float32), 'loss_cross_entropy': Array(0.14956902, dtype=float32)}


  8%|▊         | 79039/1000000 [3:37:14<33:58:52,  7.53it/s]

{'loss': Array(0.16572209, dtype=float32), 'loss_cross_entropy': Array(0.15647115, dtype=float32)}


  8%|▊         | 79049/1000000 [3:37:15<25:19:52, 10.10it/s]

{'loss': Array(0.14561856, dtype=float32), 'loss_cross_entropy': Array(0.13658987, dtype=float32)}


  8%|▊         | 79059/1000000 [3:37:17<38:17:20,  6.68it/s]

{'loss': Array(0.14294994, dtype=float32), 'loss_cross_entropy': Array(0.13410918, dtype=float32)}


  8%|▊         | 79069/1000000 [3:37:18<26:31:21,  9.65it/s]

{'loss': Array(0.13467781, dtype=float32), 'loss_cross_entropy': Array(0.12630787, dtype=float32)}


  8%|▊         | 79079/1000000 [3:37:20<23:44:49, 10.77it/s]

{'loss': Array(0.13714333, dtype=float32), 'loss_cross_entropy': Array(0.12821917, dtype=float32)}


  8%|▊         | 79089/1000000 [3:37:21<30:18:53,  8.44it/s]

{'loss': Array(0.15674971, dtype=float32), 'loss_cross_entropy': Array(0.14821227, dtype=float32)}


  8%|▊         | 79099/1000000 [3:37:23<25:18:54, 10.10it/s]

{'loss': Array(0.15022673, dtype=float32), 'loss_cross_entropy': Array(0.14106102, dtype=float32)}


  8%|▊         | 79109/1000000 [3:37:24<33:33:29,  7.62it/s]

{'loss': Array(0.15158121, dtype=float32), 'loss_cross_entropy': Array(0.14262934, dtype=float32)}


  8%|▊         | 79119/1000000 [3:37:26<25:36:55,  9.99it/s]

{'loss': Array(0.16159643, dtype=float32), 'loss_cross_entropy': Array(0.15203822, dtype=float32)}


  8%|▊         | 79129/1000000 [3:37:27<24:20:27, 10.51it/s]

{'loss': Array(0.14454298, dtype=float32), 'loss_cross_entropy': Array(0.13596678, dtype=float32)}


  8%|▊         | 79139/1000000 [3:37:29<26:56:12,  9.50it/s]

{'loss': Array(0.14287351, dtype=float32), 'loss_cross_entropy': Array(0.13497762, dtype=float32)}


  8%|▊         | 79149/1000000 [3:37:30<24:36:37, 10.39it/s]

{'loss': Array(0.14808595, dtype=float32), 'loss_cross_entropy': Array(0.13880812, dtype=float32)}


  8%|▊         | 79159/1000000 [3:37:32<37:35:57,  6.80it/s]

{'loss': Array(0.141779, dtype=float32), 'loss_cross_entropy': Array(0.13303016, dtype=float32)}


  8%|▊         | 79169/1000000 [3:37:33<26:34:42,  9.62it/s]

{'loss': Array(0.13392775, dtype=float32), 'loss_cross_entropy': Array(0.12475152, dtype=float32)}


  8%|▊         | 79179/1000000 [3:37:35<23:43:19, 10.78it/s]

{'loss': Array(0.1450869, dtype=float32), 'loss_cross_entropy': Array(0.13633764, dtype=float32)}


  8%|▊         | 79189/1000000 [3:37:36<30:36:11,  8.36it/s]

{'loss': Array(0.13605382, dtype=float32), 'loss_cross_entropy': Array(0.12799335, dtype=float32)}


  8%|▊         | 79199/1000000 [3:37:38<25:14:31, 10.13it/s]

{'loss': Array(0.1427533, dtype=float32), 'loss_cross_entropy': Array(0.13440888, dtype=float32)}


  8%|▊         | 79209/1000000 [3:37:39<33:43:00,  7.59it/s]

{'loss': Array(0.14358042, dtype=float32), 'loss_cross_entropy': Array(0.13492407, dtype=float32)}


  8%|▊         | 79219/1000000 [3:37:41<24:58:45, 10.24it/s]

{'loss': Array(0.16172591, dtype=float32), 'loss_cross_entropy': Array(0.15238826, dtype=float32)}


  8%|▊         | 79229/1000000 [3:37:43<34:04:25,  7.51it/s]

{'loss': Array(0.15248516, dtype=float32), 'loss_cross_entropy': Array(0.14353234, dtype=float32)}


  8%|▊         | 79239/1000000 [3:37:44<25:21:01, 10.09it/s]

{'loss': Array(0.13545984, dtype=float32), 'loss_cross_entropy': Array(0.12684762, dtype=float32)}


  8%|▊         | 79249/1000000 [3:37:45<24:02:30, 10.64it/s]

{'loss': Array(0.16069482, dtype=float32), 'loss_cross_entropy': Array(0.1515077, dtype=float32)}


  8%|▊         | 79259/1000000 [3:37:47<28:28:08,  8.98it/s]

{'loss': Array(0.150699, dtype=float32), 'loss_cross_entropy': Array(0.14194766, dtype=float32)}


  8%|▊         | 79269/1000000 [3:37:48<24:35:40, 10.40it/s]

{'loss': Array(0.12934473, dtype=float32), 'loss_cross_entropy': Array(0.12188639, dtype=float32)}


  8%|▊         | 79279/1000000 [3:37:50<30:25:56,  8.40it/s]

{'loss': Array(0.17429332, dtype=float32), 'loss_cross_entropy': Array(0.16431642, dtype=float32)}


  8%|▊         | 79289/1000000 [3:37:51<24:51:37, 10.29it/s]

{'loss': Array(0.15117745, dtype=float32), 'loss_cross_entropy': Array(0.14215961, dtype=float32)}


  8%|▊         | 79299/1000000 [3:37:53<37:09:37,  6.88it/s]

{'loss': Array(0.12781249, dtype=float32), 'loss_cross_entropy': Array(0.11933654, dtype=float32)}


  8%|▊         | 79309/1000000 [3:37:54<26:20:48,  9.71it/s]

{'loss': Array(0.15561183, dtype=float32), 'loss_cross_entropy': Array(0.14696638, dtype=float32)}


  8%|▊         | 79319/1000000 [3:37:56<37:39:30,  6.79it/s]

{'loss': Array(0.15312372, dtype=float32), 'loss_cross_entropy': Array(0.14395739, dtype=float32)}


  8%|▊         | 79329/1000000 [3:37:58<26:24:55,  9.68it/s]

{'loss': Array(0.15891498, dtype=float32), 'loss_cross_entropy': Array(0.14984037, dtype=float32)}


  8%|▊         | 79339/1000000 [3:37:59<25:04:12, 10.20it/s]

{'loss': Array(0.15165032, dtype=float32), 'loss_cross_entropy': Array(0.1429868, dtype=float32)}


  8%|▊         | 79349/1000000 [3:38:01<34:41:19,  7.37it/s]

{'loss': Array(0.1637337, dtype=float32), 'loss_cross_entropy': Array(0.15368907, dtype=float32)}


  8%|▊         | 79359/1000000 [3:38:02<26:05:54,  9.80it/s]

{'loss': Array(0.16932903, dtype=float32), 'loss_cross_entropy': Array(0.15895079, dtype=float32)}


  8%|▊         | 79369/1000000 [3:38:04<39:01:54,  6.55it/s]

{'loss': Array(0.14463075, dtype=float32), 'loss_cross_entropy': Array(0.13579944, dtype=float32)}


  8%|▊         | 79379/1000000 [3:38:05<27:32:58,  9.28it/s]

{'loss': Array(0.16582443, dtype=float32), 'loss_cross_entropy': Array(0.15653461, dtype=float32)}


  8%|▊         | 79389/1000000 [3:38:07<25:55:03,  9.87it/s]

{'loss': Array(0.13971299, dtype=float32), 'loss_cross_entropy': Array(0.13190465, dtype=float32)}


  8%|▊         | 79399/1000000 [3:38:08<27:37:35,  9.26it/s]

{'loss': Array(0.154655, dtype=float32), 'loss_cross_entropy': Array(0.14565082, dtype=float32)}


  8%|▊         | 79409/1000000 [3:38:10<24:13:03, 10.56it/s]

{'loss': Array(0.14205973, dtype=float32), 'loss_cross_entropy': Array(0.1334169, dtype=float32)}


  8%|▊         | 79419/1000000 [3:38:12<27:28:40,  9.31it/s]

{'loss': Array(0.13970803, dtype=float32), 'loss_cross_entropy': Array(0.1313194, dtype=float32)}


  8%|▊         | 79429/1000000 [3:38:13<24:28:19, 10.45it/s]

{'loss': Array(0.17226057, dtype=float32), 'loss_cross_entropy': Array(0.16316108, dtype=float32)}


  8%|▊         | 79439/1000000 [3:38:15<39:05:57,  6.54it/s]

{'loss': Array(0.15820567, dtype=float32), 'loss_cross_entropy': Array(0.14876477, dtype=float32)}


  8%|▊         | 79449/1000000 [3:38:16<26:00:42,  9.83it/s]

{'loss': Array(0.15034717, dtype=float32), 'loss_cross_entropy': Array(0.14188991, dtype=float32)}


  8%|▊         | 79459/1000000 [3:38:17<24:24:05, 10.48it/s]

{'loss': Array(0.13883126, dtype=float32), 'loss_cross_entropy': Array(0.13000832, dtype=float32)}


  8%|▊         | 79469/1000000 [3:38:19<30:52:33,  8.28it/s]

{'loss': Array(0.15080935, dtype=float32), 'loss_cross_entropy': Array(0.14120522, dtype=float32)}


  8%|▊         | 79479/1000000 [3:38:21<25:22:02, 10.08it/s]

{'loss': Array(0.16188177, dtype=float32), 'loss_cross_entropy': Array(0.15228575, dtype=float32)}


  8%|▊         | 79489/1000000 [3:38:22<37:24:05,  6.84it/s]

{'loss': Array(0.16692002, dtype=float32), 'loss_cross_entropy': Array(0.15783058, dtype=float32)}


  8%|▊         | 79499/1000000 [3:38:24<26:28:15,  9.66it/s]

{'loss': Array(0.16669817, dtype=float32), 'loss_cross_entropy': Array(0.15728107, dtype=float32)}


  8%|▊         | 79509/1000000 [3:38:31<84:19:52,  3.03it/s] 

{'loss': Array(0.14986831, dtype=float32), 'loss_cross_entropy': Array(0.14094245, dtype=float32)}


  8%|▊         | 79519/1000000 [3:38:33<38:40:27,  6.61it/s]

{'loss': Array(0.14874096, dtype=float32), 'loss_cross_entropy': Array(0.13947988, dtype=float32)}


  8%|▊         | 79529/1000000 [3:38:35<26:07:22,  9.79it/s]

{'loss': Array(0.15927179, dtype=float32), 'loss_cross_entropy': Array(0.15043195, dtype=float32)}


  8%|▊         | 79539/1000000 [3:38:36<31:02:11,  8.24it/s]

{'loss': Array(0.15936081, dtype=float32), 'loss_cross_entropy': Array(0.1498086, dtype=float32)}


  8%|▊         | 79549/1000000 [3:38:38<24:48:45, 10.30it/s]

{'loss': Array(0.15774262, dtype=float32), 'loss_cross_entropy': Array(0.14852206, dtype=float32)}


  8%|▊         | 79559/1000000 [3:38:39<37:45:41,  6.77it/s]

{'loss': Array(0.15476868, dtype=float32), 'loss_cross_entropy': Array(0.14592172, dtype=float32)}


  8%|▊         | 79569/1000000 [3:38:41<25:42:03,  9.95it/s]

{'loss': Array(0.14637497, dtype=float32), 'loss_cross_entropy': Array(0.13763182, dtype=float32)}


  8%|▊         | 79579/1000000 [3:38:42<24:02:38, 10.63it/s]

{'loss': Array(0.14398663, dtype=float32), 'loss_cross_entropy': Array(0.13496034, dtype=float32)}


  8%|▊         | 79589/1000000 [3:38:44<27:20:56,  9.35it/s]

{'loss': Array(0.1373803, dtype=float32), 'loss_cross_entropy': Array(0.12940101, dtype=float32)}


  8%|▊         | 79599/1000000 [3:38:45<25:07:21, 10.18it/s]

{'loss': Array(0.1538635, dtype=float32), 'loss_cross_entropy': Array(0.14521693, dtype=float32)}


  8%|▊         | 79609/1000000 [3:38:47<38:03:06,  6.72it/s]

{'loss': Array(0.15855503, dtype=float32), 'loss_cross_entropy': Array(0.14942493, dtype=float32)}


  8%|▊         | 79619/1000000 [3:38:48<26:17:16,  9.73it/s]

{'loss': Array(0.16051984, dtype=float32), 'loss_cross_entropy': Array(0.15165167, dtype=float32)}


  8%|▊         | 79629/1000000 [3:38:50<23:55:34, 10.69it/s]

{'loss': Array(0.17316917, dtype=float32), 'loss_cross_entropy': Array(0.16394795, dtype=float32)}


  8%|▊         | 79639/1000000 [3:38:51<28:43:25,  8.90it/s]

{'loss': Array(0.14831907, dtype=float32), 'loss_cross_entropy': Array(0.13946676, dtype=float32)}


  8%|▊         | 79649/1000000 [3:38:53<24:32:56, 10.41it/s]

{'loss': Array(0.15033819, dtype=float32), 'loss_cross_entropy': Array(0.14154115, dtype=float32)}


  8%|▊         | 79659/1000000 [3:38:54<33:20:29,  7.67it/s]

{'loss': Array(0.13135028, dtype=float32), 'loss_cross_entropy': Array(0.12280817, dtype=float32)}


  8%|▊         | 79669/1000000 [3:38:56<25:05:41, 10.19it/s]

{'loss': Array(0.12470818, dtype=float32), 'loss_cross_entropy': Array(0.11747053, dtype=float32)}


  8%|▊         | 79679/1000000 [3:38:57<37:24:47,  6.83it/s]

{'loss': Array(0.16943529, dtype=float32), 'loss_cross_entropy': Array(0.1601873, dtype=float32)}


  8%|▊         | 79689/1000000 [3:38:59<26:06:55,  9.79it/s]

{'loss': Array(0.1515428, dtype=float32), 'loss_cross_entropy': Array(0.14262562, dtype=float32)}


  8%|▊         | 79699/1000000 [3:39:00<24:49:00, 10.30it/s]

{'loss': Array(0.15235594, dtype=float32), 'loss_cross_entropy': Array(0.14347643, dtype=float32)}


  8%|▊         | 79709/1000000 [3:39:02<32:04:40,  7.97it/s]

{'loss': Array(0.15887259, dtype=float32), 'loss_cross_entropy': Array(0.14934641, dtype=float32)}


  8%|▊         | 79719/1000000 [3:39:03<25:21:21, 10.08it/s]

{'loss': Array(0.15543905, dtype=float32), 'loss_cross_entropy': Array(0.14621423, dtype=float32)}


  8%|▊         | 79729/1000000 [3:39:05<35:29:42,  7.20it/s]

{'loss': Array(0.13928081, dtype=float32), 'loss_cross_entropy': Array(0.13070361, dtype=float32)}


  8%|▊         | 79739/1000000 [3:39:07<27:32:52,  9.28it/s]

{'loss': Array(0.11611153, dtype=float32), 'loss_cross_entropy': Array(0.10838639, dtype=float32)}


  8%|▊         | 79749/1000000 [3:39:08<25:12:21, 10.14it/s]

{'loss': Array(0.14822105, dtype=float32), 'loss_cross_entropy': Array(0.13959765, dtype=float32)}


  8%|▊         | 79759/1000000 [3:39:10<27:55:28,  9.15it/s]

{'loss': Array(0.14780664, dtype=float32), 'loss_cross_entropy': Array(0.13909559, dtype=float32)}


  8%|▊         | 79769/1000000 [3:39:11<24:21:45, 10.49it/s]

{'loss': Array(0.14179733, dtype=float32), 'loss_cross_entropy': Array(0.132743, dtype=float32)}


  8%|▊         | 79779/1000000 [3:39:13<37:23:32,  6.84it/s]

{'loss': Array(0.17628896, dtype=float32), 'loss_cross_entropy': Array(0.16625272, dtype=float32)}


  8%|▊         | 79789/1000000 [3:39:14<25:55:51,  9.86it/s]

{'loss': Array(0.16252483, dtype=float32), 'loss_cross_entropy': Array(0.15307255, dtype=float32)}


  8%|▊         | 79799/1000000 [3:39:16<24:22:03, 10.49it/s]

{'loss': Array(0.18061544, dtype=float32), 'loss_cross_entropy': Array(0.17044716, dtype=float32)}


  8%|▊         | 79809/1000000 [3:39:17<30:25:00,  8.40it/s]

{'loss': Array(0.14687827, dtype=float32), 'loss_cross_entropy': Array(0.13854548, dtype=float32)}


  8%|▊         | 79819/1000000 [3:39:19<25:20:02, 10.09it/s]

{'loss': Array(0.16573721, dtype=float32), 'loss_cross_entropy': Array(0.15558274, dtype=float32)}


  8%|▊         | 79829/1000000 [3:39:20<33:51:14,  7.55it/s]

{'loss': Array(0.1517105, dtype=float32), 'loss_cross_entropy': Array(0.14294323, dtype=float32)}


  8%|▊         | 79839/1000000 [3:39:22<25:15:38, 10.12it/s]

{'loss': Array(0.15127075, dtype=float32), 'loss_cross_entropy': Array(0.14238828, dtype=float32)}


  8%|▊         | 79849/1000000 [3:39:23<34:26:15,  7.42it/s]

{'loss': Array(0.1370893, dtype=float32), 'loss_cross_entropy': Array(0.12860131, dtype=float32)}


  8%|▊         | 79859/1000000 [3:39:25<25:10:34, 10.15it/s]

{'loss': Array(0.13922326, dtype=float32), 'loss_cross_entropy': Array(0.13058415, dtype=float32)}


  8%|▊         | 79869/1000000 [3:39:26<23:59:15, 10.66it/s]

{'loss': Array(0.13735595, dtype=float32), 'loss_cross_entropy': Array(0.12925504, dtype=float32)}


  8%|▊         | 79879/1000000 [3:39:28<28:59:01,  8.82it/s]

{'loss': Array(0.14593542, dtype=float32), 'loss_cross_entropy': Array(0.13721852, dtype=float32)}


  8%|▊         | 79889/1000000 [3:39:29<24:33:21, 10.41it/s]

{'loss': Array(0.15108112, dtype=float32), 'loss_cross_entropy': Array(0.14214616, dtype=float32)}


  8%|▊         | 79899/1000000 [3:39:31<30:12:43,  8.46it/s]

{'loss': Array(0.1570631, dtype=float32), 'loss_cross_entropy': Array(0.14821663, dtype=float32)}


  8%|▊         | 79909/1000000 [3:39:32<24:41:44, 10.35it/s]

{'loss': Array(0.16445999, dtype=float32), 'loss_cross_entropy': Array(0.15444963, dtype=float32)}


  8%|▊         | 79919/1000000 [3:39:34<37:40:35,  6.78it/s]

{'loss': Array(0.1621766, dtype=float32), 'loss_cross_entropy': Array(0.15249407, dtype=float32)}


  8%|▊         | 79929/1000000 [3:39:35<26:01:29,  9.82it/s]

{'loss': Array(0.14700836, dtype=float32), 'loss_cross_entropy': Array(0.13836502, dtype=float32)}


  8%|▊         | 79939/1000000 [3:39:37<38:00:28,  6.72it/s]

{'loss': Array(0.15686528, dtype=float32), 'loss_cross_entropy': Array(0.14832497, dtype=float32)}


  8%|▊         | 79949/1000000 [3:39:38<26:17:13,  9.72it/s]

{'loss': Array(0.16941366, dtype=float32), 'loss_cross_entropy': Array(0.16018377, dtype=float32)}


  8%|▊         | 79959/1000000 [3:39:40<23:43:30, 10.77it/s]

{'loss': Array(0.14714226, dtype=float32), 'loss_cross_entropy': Array(0.13826796, dtype=float32)}


  8%|▊         | 79969/1000000 [3:39:42<33:26:31,  7.64it/s]

{'loss': Array(0.14757934, dtype=float32), 'loss_cross_entropy': Array(0.13969314, dtype=float32)}


  8%|▊         | 79979/1000000 [3:39:43<24:52:00, 10.28it/s]

{'loss': Array(0.1504562, dtype=float32), 'loss_cross_entropy': Array(0.14203276, dtype=float32)}


  8%|▊         | 79989/1000000 [3:39:45<37:36:55,  6.79it/s]

{'loss': Array(0.14057346, dtype=float32), 'loss_cross_entropy': Array(0.13202225, dtype=float32)}


  8%|▊         | 79999/1000000 [3:39:46<26:13:14,  9.75it/s]

{'loss': Array(0.13718067, dtype=float32), 'loss_cross_entropy': Array(0.12906371, dtype=float32)}


  8%|▊         | 80009/1000000 [3:39:54<84:20:33,  3.03it/s] 

{'loss': Array(0.1477331, dtype=float32), 'loss_cross_entropy': Array(0.13867721, dtype=float32)}


  8%|▊         | 80019/1000000 [3:39:55<36:59:06,  6.91it/s] 

{'loss': Array(0.15105791, dtype=float32), 'loss_cross_entropy': Array(0.1424258, dtype=float32)}


  8%|▊         | 80029/1000000 [3:39:57<25:47:39,  9.91it/s]

{'loss': Array(0.14745325, dtype=float32), 'loss_cross_entropy': Array(0.13909546, dtype=float32)}


  8%|▊         | 80039/1000000 [3:39:59<28:13:48,  9.05it/s]

{'loss': Array(0.15569757, dtype=float32), 'loss_cross_entropy': Array(0.14629017, dtype=float32)}


  8%|▊         | 80049/1000000 [3:40:00<25:53:16,  9.87it/s]

{'loss': Array(0.14904489, dtype=float32), 'loss_cross_entropy': Array(0.13945673, dtype=float32)}


  8%|▊         | 80059/1000000 [3:40:02<38:57:17,  6.56it/s]

{'loss': Array(0.12880012, dtype=float32), 'loss_cross_entropy': Array(0.12062418, dtype=float32)}


  8%|▊         | 80069/1000000 [3:40:03<27:05:36,  9.43it/s]

{'loss': Array(0.16202901, dtype=float32), 'loss_cross_entropy': Array(0.15291263, dtype=float32)}


  8%|▊         | 80079/1000000 [3:40:05<25:33:51, 10.00it/s]

{'loss': Array(0.14914261, dtype=float32), 'loss_cross_entropy': Array(0.14052798, dtype=float32)}


  8%|▊         | 80089/1000000 [3:40:06<32:03:00,  7.97it/s]

{'loss': Array(0.13795048, dtype=float32), 'loss_cross_entropy': Array(0.12993126, dtype=float32)}


  8%|▊         | 80099/1000000 [3:40:08<26:31:51,  9.63it/s]

{'loss': Array(0.14473073, dtype=float32), 'loss_cross_entropy': Array(0.13603903, dtype=float32)}


  8%|▊         | 80109/1000000 [3:40:10<38:24:31,  6.65it/s]

{'loss': Array(0.16215552, dtype=float32), 'loss_cross_entropy': Array(0.15302299, dtype=float32)}


  8%|▊         | 80119/1000000 [3:40:11<25:52:53,  9.87it/s]

{'loss': Array(0.16644724, dtype=float32), 'loss_cross_entropy': Array(0.15695028, dtype=float32)}


  8%|▊         | 80129/1000000 [3:40:12<24:20:40, 10.50it/s]

{'loss': Array(0.15463051, dtype=float32), 'loss_cross_entropy': Array(0.1454842, dtype=float32)}


  8%|▊         | 80139/1000000 [3:40:14<28:30:34,  8.96it/s]

{'loss': Array(0.15091366, dtype=float32), 'loss_cross_entropy': Array(0.1416832, dtype=float32)}


  8%|▊         | 80149/1000000 [3:40:15<24:45:30, 10.32it/s]

{'loss': Array(0.16436034, dtype=float32), 'loss_cross_entropy': Array(0.15500787, dtype=float32)}


  8%|▊         | 80159/1000000 [3:40:17<33:23:00,  7.65it/s]

{'loss': Array(0.16330375, dtype=float32), 'loss_cross_entropy': Array(0.15383743, dtype=float32)}


  8%|▊         | 80169/1000000 [3:40:19<25:43:55,  9.93it/s]

{'loss': Array(0.15531904, dtype=float32), 'loss_cross_entropy': Array(0.14669167, dtype=float32)}


  8%|▊         | 80179/1000000 [3:40:20<24:03:27, 10.62it/s]

{'loss': Array(0.15034553, dtype=float32), 'loss_cross_entropy': Array(0.14152849, dtype=float32)}


  8%|▊         | 80189/1000000 [3:40:22<30:25:18,  8.40it/s]

{'loss': Array(0.16684, dtype=float32), 'loss_cross_entropy': Array(0.15731786, dtype=float32)}


  8%|▊         | 80199/1000000 [3:40:23<24:56:58, 10.24it/s]

{'loss': Array(0.1488227, dtype=float32), 'loss_cross_entropy': Array(0.13986741, dtype=float32)}


  8%|▊         | 80209/1000000 [3:40:24<24:01:03, 10.64it/s]

{'loss': Array(0.13010958, dtype=float32), 'loss_cross_entropy': Array(0.12233018, dtype=float32)}


  8%|▊         | 80219/1000000 [3:40:26<26:49:15,  9.53it/s]

{'loss': Array(0.16334175, dtype=float32), 'loss_cross_entropy': Array(0.15392408, dtype=float32)}


  8%|▊         | 80229/1000000 [3:40:27<24:23:46, 10.47it/s]

{'loss': Array(0.16018258, dtype=float32), 'loss_cross_entropy': Array(0.15108083, dtype=float32)}


  8%|▊         | 80239/1000000 [3:40:29<30:15:12,  8.44it/s]

{'loss': Array(0.15205947, dtype=float32), 'loss_cross_entropy': Array(0.14305101, dtype=float32)}


  8%|▊         | 80249/1000000 [3:40:31<25:19:15, 10.09it/s]

{'loss': Array(0.14549616, dtype=float32), 'loss_cross_entropy': Array(0.13705234, dtype=float32)}


  8%|▊         | 80259/1000000 [3:40:32<38:28:16,  6.64it/s]

{'loss': Array(0.14709602, dtype=float32), 'loss_cross_entropy': Array(0.13876596, dtype=float32)}


  8%|▊         | 80269/1000000 [3:40:34<25:55:53,  9.85it/s]

{'loss': Array(0.14365974, dtype=float32), 'loss_cross_entropy': Array(0.134593, dtype=float32)}


  8%|▊         | 80279/1000000 [3:40:35<37:47:08,  6.76it/s]

{'loss': Array(0.13567834, dtype=float32), 'loss_cross_entropy': Array(0.1278497, dtype=float32)}


  8%|▊         | 80289/1000000 [3:40:37<26:13:58,  9.74it/s]

{'loss': Array(0.1335773, dtype=float32), 'loss_cross_entropy': Array(0.12572485, dtype=float32)}


  8%|▊         | 80299/1000000 [3:40:38<24:11:25, 10.56it/s]

{'loss': Array(0.13955665, dtype=float32), 'loss_cross_entropy': Array(0.13116744, dtype=float32)}


  8%|▊         | 80309/1000000 [3:40:40<33:16:57,  7.68it/s]

{'loss': Array(0.15174745, dtype=float32), 'loss_cross_entropy': Array(0.14269976, dtype=float32)}


  8%|▊         | 80319/1000000 [3:40:41<24:50:21, 10.28it/s]

{'loss': Array(0.16282815, dtype=float32), 'loss_cross_entropy': Array(0.15365484, dtype=float32)}


  8%|▊         | 80329/1000000 [3:40:43<37:43:51,  6.77it/s]

{'loss': Array(0.14560421, dtype=float32), 'loss_cross_entropy': Array(0.13704173, dtype=float32)}


  8%|▊         | 80339/1000000 [3:40:44<25:45:25,  9.92it/s]

{'loss': Array(0.14468166, dtype=float32), 'loss_cross_entropy': Array(0.13591404, dtype=float32)}


  8%|▊         | 80349/1000000 [3:40:45<23:45:11, 10.75it/s]

{'loss': Array(0.15787253, dtype=float32), 'loss_cross_entropy': Array(0.14889298, dtype=float32)}


  8%|▊         | 80359/1000000 [3:40:47<26:29:38,  9.64it/s]

{'loss': Array(0.1497023, dtype=float32), 'loss_cross_entropy': Array(0.1406308, dtype=float32)}


  8%|▊         | 80369/1000000 [3:40:49<24:14:33, 10.54it/s]

{'loss': Array(0.14572576, dtype=float32), 'loss_cross_entropy': Array(0.13675748, dtype=float32)}


  8%|▊         | 80379/1000000 [3:40:50<26:53:27,  9.50it/s]

{'loss': Array(0.13706307, dtype=float32), 'loss_cross_entropy': Array(0.12872066, dtype=float32)}


  8%|▊         | 80389/1000000 [3:40:52<24:10:30, 10.57it/s]

{'loss': Array(0.15190482, dtype=float32), 'loss_cross_entropy': Array(0.14325961, dtype=float32)}


  8%|▊         | 80399/1000000 [3:40:53<37:21:25,  6.84it/s]

{'loss': Array(0.16034248, dtype=float32), 'loss_cross_entropy': Array(0.15140174, dtype=float32)}


  8%|▊         | 80409/1000000 [3:40:55<26:02:11,  9.81it/s]

{'loss': Array(0.13947104, dtype=float32), 'loss_cross_entropy': Array(0.13065758, dtype=float32)}


  8%|▊         | 80419/1000000 [3:40:56<23:52:40, 10.70it/s]

{'loss': Array(0.15727608, dtype=float32), 'loss_cross_entropy': Array(0.14814477, dtype=float32)}


  8%|▊         | 80429/1000000 [3:40:58<30:07:12,  8.48it/s]

{'loss': Array(0.14415629, dtype=float32), 'loss_cross_entropy': Array(0.13603668, dtype=float32)}


  8%|▊         | 80439/1000000 [3:40:59<25:12:00, 10.14it/s]

{'loss': Array(0.15136473, dtype=float32), 'loss_cross_entropy': Array(0.14244688, dtype=float32)}


  8%|▊         | 80449/1000000 [3:41:01<38:17:36,  6.67it/s]

{'loss': Array(0.15366103, dtype=float32), 'loss_cross_entropy': Array(0.14477225, dtype=float32)}


  8%|▊         | 80459/1000000 [3:41:02<27:38:24,  9.24it/s]

{'loss': Array(0.13690022, dtype=float32), 'loss_cross_entropy': Array(0.1285014, dtype=float32)}


  8%|▊         | 80469/1000000 [3:41:04<25:18:59, 10.09it/s]

{'loss': Array(0.15834789, dtype=float32), 'loss_cross_entropy': Array(0.14906184, dtype=float32)}


  8%|▊         | 80479/1000000 [3:41:05<29:51:56,  8.55it/s]

{'loss': Array(0.15367602, dtype=float32), 'loss_cross_entropy': Array(0.14479004, dtype=float32)}


  8%|▊         | 80489/1000000 [3:41:07<25:53:26,  9.87it/s]

{'loss': Array(0.15966573, dtype=float32), 'loss_cross_entropy': Array(0.15046196, dtype=float32)}


  8%|▊         | 80499/1000000 [3:41:09<34:46:12,  7.35it/s]

{'loss': Array(0.15035973, dtype=float32), 'loss_cross_entropy': Array(0.14154245, dtype=float32)}


  8%|▊         | 80509/1000000 [3:41:16<84:10:46,  3.03it/s] 

{'loss': Array(0.17275988, dtype=float32), 'loss_cross_entropy': Array(0.16275825, dtype=float32)}


  8%|▊         | 80519/1000000 [3:41:18<43:50:21,  5.83it/s]

{'loss': Array(0.14295904, dtype=float32), 'loss_cross_entropy': Array(0.13468204, dtype=float32)}


  8%|▊         | 80529/1000000 [3:41:19<26:57:10,  9.48it/s]

{'loss': Array(0.12780724, dtype=float32), 'loss_cross_entropy': Array(0.11972296, dtype=float32)}


  8%|▊         | 80539/1000000 [3:41:21<24:07:34, 10.59it/s]

{'loss': Array(0.14395663, dtype=float32), 'loss_cross_entropy': Array(0.13509835, dtype=float32)}


  8%|▊         | 80549/1000000 [3:41:22<30:16:27,  8.44it/s]

{'loss': Array(0.16917728, dtype=float32), 'loss_cross_entropy': Array(0.15949045, dtype=float32)}


  8%|▊         | 80559/1000000 [3:41:24<24:27:41, 10.44it/s]

{'loss': Array(0.1759519, dtype=float32), 'loss_cross_entropy': Array(0.1662239, dtype=float32)}


  8%|▊         | 80569/1000000 [3:41:25<23:58:32, 10.65it/s]

{'loss': Array(0.1418174, dtype=float32), 'loss_cross_entropy': Array(0.1336049, dtype=float32)}


  8%|▊         | 80579/1000000 [3:41:27<27:06:26,  9.42it/s]

{'loss': Array(0.15739843, dtype=float32), 'loss_cross_entropy': Array(0.14755192, dtype=float32)}


  8%|▊         | 80589/1000000 [3:41:28<24:45:18, 10.32it/s]

{'loss': Array(0.15111227, dtype=float32), 'loss_cross_entropy': Array(0.14183074, dtype=float32)}


  8%|▊         | 80599/1000000 [3:41:30<31:12:34,  8.18it/s]

{'loss': Array(0.13386269, dtype=float32), 'loss_cross_entropy': Array(0.12572943, dtype=float32)}


  8%|▊         | 80609/1000000 [3:41:31<25:21:57, 10.07it/s]

{'loss': Array(0.152808, dtype=float32), 'loss_cross_entropy': Array(0.14386144, dtype=float32)}


  8%|▊         | 80619/1000000 [3:41:33<33:25:47,  7.64it/s]

{'loss': Array(0.16344388, dtype=float32), 'loss_cross_entropy': Array(0.15493296, dtype=float32)}


  8%|▊         | 80629/1000000 [3:41:35<25:07:51, 10.16it/s]

{'loss': Array(0.15147248, dtype=float32), 'loss_cross_entropy': Array(0.1430225, dtype=float32)}


  8%|▊         | 80639/1000000 [3:41:36<23:44:05, 10.76it/s]

{'loss': Array(0.14190593, dtype=float32), 'loss_cross_entropy': Array(0.13427804, dtype=float32)}


  8%|▊         | 80649/1000000 [3:41:38<31:12:52,  8.18it/s]

{'loss': Array(0.12678258, dtype=float32), 'loss_cross_entropy': Array(0.11878365, dtype=float32)}


  8%|▊         | 80659/1000000 [3:41:39<25:18:26, 10.09it/s]

{'loss': Array(0.19014657, dtype=float32), 'loss_cross_entropy': Array(0.17925327, dtype=float32)}


  8%|▊         | 80669/1000000 [3:41:40<23:54:46, 10.68it/s]

{'loss': Array(0.15308292, dtype=float32), 'loss_cross_entropy': Array(0.1442705, dtype=float32)}


  8%|▊         | 80679/1000000 [3:41:42<26:32:32,  9.62it/s]

{'loss': Array(0.14797585, dtype=float32), 'loss_cross_entropy': Array(0.13966344, dtype=float32)}


  8%|▊         | 80689/1000000 [3:41:43<24:52:33, 10.27it/s]

{'loss': Array(0.14716266, dtype=float32), 'loss_cross_entropy': Array(0.13766564, dtype=float32)}


  8%|▊         | 80699/1000000 [3:41:45<30:58:38,  8.24it/s]

{'loss': Array(0.15084991, dtype=float32), 'loss_cross_entropy': Array(0.14206287, dtype=float32)}


  8%|▊         | 80709/1000000 [3:41:47<25:09:17, 10.15it/s]

{'loss': Array(0.16365673, dtype=float32), 'loss_cross_entropy': Array(0.1532536, dtype=float32)}


  8%|▊         | 80719/1000000 [3:41:48<37:58:09,  6.73it/s]

{'loss': Array(0.16121274, dtype=float32), 'loss_cross_entropy': Array(0.15212469, dtype=float32)}


  8%|▊         | 80729/1000000 [3:41:50<26:33:28,  9.61it/s]

{'loss': Array(0.14812422, dtype=float32), 'loss_cross_entropy': Array(0.13983649, dtype=float32)}


  8%|▊         | 80739/1000000 [3:41:51<24:14:05, 10.54it/s]

{'loss': Array(0.14714095, dtype=float32), 'loss_cross_entropy': Array(0.1381558, dtype=float32)}


  8%|▊         | 80749/1000000 [3:41:53<33:16:16,  7.67it/s]

{'loss': Array(0.14412172, dtype=float32), 'loss_cross_entropy': Array(0.13547063, dtype=float32)}


  8%|▊         | 80759/1000000 [3:41:54<25:13:00, 10.13it/s]

{'loss': Array(0.17290549, dtype=float32), 'loss_cross_entropy': Array(0.16298263, dtype=float32)}


  8%|▊         | 80769/1000000 [3:41:55<24:13:04, 10.54it/s]

{'loss': Array(0.15094571, dtype=float32), 'loss_cross_entropy': Array(0.14179038, dtype=float32)}


  8%|▊         | 80779/1000000 [3:41:57<28:32:01,  8.95it/s]

{'loss': Array(0.14274512, dtype=float32), 'loss_cross_entropy': Array(0.13451354, dtype=float32)}


  8%|▊         | 80789/1000000 [3:41:59<24:26:50, 10.44it/s]

{'loss': Array(0.15046337, dtype=float32), 'loss_cross_entropy': Array(0.14201666, dtype=float32)}


  8%|▊         | 80799/1000000 [3:42:00<31:39:30,  8.07it/s]

{'loss': Array(0.14304209, dtype=float32), 'loss_cross_entropy': Array(0.13497928, dtype=float32)}


  8%|▊         | 80809/1000000 [3:42:02<26:20:40,  9.69it/s]

{'loss': Array(0.1375194, dtype=float32), 'loss_cross_entropy': Array(0.12959042, dtype=float32)}


  8%|▊         | 80819/1000000 [3:42:03<25:38:19,  9.96it/s]

{'loss': Array(0.16106775, dtype=float32), 'loss_cross_entropy': Array(0.1514322, dtype=float32)}


  8%|▊         | 80829/1000000 [3:42:05<28:22:59,  9.00it/s]

{'loss': Array(0.1558989, dtype=float32), 'loss_cross_entropy': Array(0.14623001, dtype=float32)}


  8%|▊         | 80839/1000000 [3:42:06<26:17:03,  9.71it/s]

{'loss': Array(0.14099321, dtype=float32), 'loss_cross_entropy': Array(0.13253377, dtype=float32)}


  8%|▊         | 80849/1000000 [3:42:08<34:46:43,  7.34it/s]

{'loss': Array(0.15565453, dtype=float32), 'loss_cross_entropy': Array(0.14642406, dtype=float32)}


  8%|▊         | 80859/1000000 [3:42:10<25:28:46, 10.02it/s]

{'loss': Array(0.15741475, dtype=float32), 'loss_cross_entropy': Array(0.14827213, dtype=float32)}


  8%|▊         | 80869/1000000 [3:42:11<23:32:18, 10.85it/s]

{'loss': Array(0.15118659, dtype=float32), 'loss_cross_entropy': Array(0.1418445, dtype=float32)}


  8%|▊         | 80879/1000000 [3:42:13<26:46:49,  9.53it/s]

{'loss': Array(0.14517274, dtype=float32), 'loss_cross_entropy': Array(0.13657902, dtype=float32)}


  8%|▊         | 80889/1000000 [3:42:14<24:21:21, 10.48it/s]

{'loss': Array(0.12278222, dtype=float32), 'loss_cross_entropy': Array(0.11513672, dtype=float32)}


  8%|▊         | 80899/1000000 [3:42:16<28:36:01,  8.93it/s]

{'loss': Array(0.15000084, dtype=float32), 'loss_cross_entropy': Array(0.14123003, dtype=float32)}


  8%|▊         | 80909/1000000 [3:42:17<24:34:17, 10.39it/s]

{'loss': Array(0.16145061, dtype=float32), 'loss_cross_entropy': Array(0.15321845, dtype=float32)}


  8%|▊         | 80919/1000000 [3:42:19<37:50:51,  6.75it/s]

{'loss': Array(0.1636845, dtype=float32), 'loss_cross_entropy': Array(0.15403672, dtype=float32)}


  8%|▊         | 80929/1000000 [3:42:20<26:06:52,  9.78it/s]

{'loss': Array(0.14724071, dtype=float32), 'loss_cross_entropy': Array(0.13887322, dtype=float32)}


  8%|▊         | 80939/1000000 [3:42:22<24:30:59, 10.41it/s]

{'loss': Array(0.14772387, dtype=float32), 'loss_cross_entropy': Array(0.13887535, dtype=float32)}


  8%|▊         | 80949/1000000 [3:42:23<30:11:58,  8.45it/s]

{'loss': Array(0.14810972, dtype=float32), 'loss_cross_entropy': Array(0.13948336, dtype=float32)}


  8%|▊         | 80959/1000000 [3:42:25<25:15:43, 10.11it/s]

{'loss': Array(0.15227965, dtype=float32), 'loss_cross_entropy': Array(0.14366569, dtype=float32)}


  8%|▊         | 80969/1000000 [3:42:26<33:51:33,  7.54it/s]

{'loss': Array(0.16403155, dtype=float32), 'loss_cross_entropy': Array(0.1542896, dtype=float32)}


  8%|▊         | 80979/1000000 [3:42:28<25:43:40,  9.92it/s]

{'loss': Array(0.15664014, dtype=float32), 'loss_cross_entropy': Array(0.14703803, dtype=float32)}


  8%|▊         | 80989/1000000 [3:42:30<33:42:46,  7.57it/s]

{'loss': Array(0.15573213, dtype=float32), 'loss_cross_entropy': Array(0.14674017, dtype=float32)}


  8%|▊         | 80999/1000000 [3:42:31<24:54:12, 10.25it/s]

{'loss': Array(0.15454833, dtype=float32), 'loss_cross_entropy': Array(0.14559501, dtype=float32)}


  8%|▊         | 81009/1000000 [3:42:38<81:40:23,  3.13it/s] 

{'loss': Array(0.11974925, dtype=float32), 'loss_cross_entropy': Array(0.11192854, dtype=float32)}


  8%|▊         | 81019/1000000 [3:42:40<37:51:41,  6.74it/s]

{'loss': Array(0.14748333, dtype=float32), 'loss_cross_entropy': Array(0.13865538, dtype=float32)}


  8%|▊         | 81029/1000000 [3:42:42<25:56:24,  9.84it/s]

{'loss': Array(0.16423504, dtype=float32), 'loss_cross_entropy': Array(0.1550308, dtype=float32)}


  8%|▊         | 81039/1000000 [3:42:43<33:38:40,  7.59it/s]

{'loss': Array(0.14305595, dtype=float32), 'loss_cross_entropy': Array(0.13433175, dtype=float32)}


  8%|▊         | 81049/1000000 [3:42:45<24:58:00, 10.22it/s]

{'loss': Array(0.15662515, dtype=float32), 'loss_cross_entropy': Array(0.1473694, dtype=float32)}


  8%|▊         | 81059/1000000 [3:42:46<38:12:06,  6.68it/s]

{'loss': Array(0.14575839, dtype=float32), 'loss_cross_entropy': Array(0.1364038, dtype=float32)}


  8%|▊         | 81069/1000000 [3:42:48<25:42:59,  9.93it/s]

{'loss': Array(0.15492842, dtype=float32), 'loss_cross_entropy': Array(0.14527313, dtype=float32)}


  8%|▊         | 81079/1000000 [3:42:49<24:02:46, 10.62it/s]

{'loss': Array(0.13182226, dtype=float32), 'loss_cross_entropy': Array(0.12447961, dtype=float32)}


  8%|▊         | 81089/1000000 [3:42:51<30:22:35,  8.40it/s]

{'loss': Array(0.15937766, dtype=float32), 'loss_cross_entropy': Array(0.1503981, dtype=float32)}


  8%|▊         | 81099/1000000 [3:42:52<25:01:59, 10.20it/s]

{'loss': Array(0.16071263, dtype=float32), 'loss_cross_entropy': Array(0.15135054, dtype=float32)}


  8%|▊         | 81109/1000000 [3:42:54<34:20:11,  7.43it/s]

{'loss': Array(0.18031643, dtype=float32), 'loss_cross_entropy': Array(0.17046879, dtype=float32)}


  8%|▊         | 81119/1000000 [3:42:55<25:46:21,  9.90it/s]

{'loss': Array(0.16166413, dtype=float32), 'loss_cross_entropy': Array(0.15200876, dtype=float32)}


  8%|▊         | 81129/1000000 [3:42:57<24:53:01, 10.26it/s]

{'loss': Array(0.13530625, dtype=float32), 'loss_cross_entropy': Array(0.1274757, dtype=float32)}


  8%|▊         | 81139/1000000 [3:42:58<27:56:49,  9.13it/s]

{'loss': Array(0.15275191, dtype=float32), 'loss_cross_entropy': Array(0.14423111, dtype=float32)}


  8%|▊         | 81149/1000000 [3:43:00<25:15:08, 10.11it/s]

{'loss': Array(0.1670776, dtype=float32), 'loss_cross_entropy': Array(0.1578018, dtype=float32)}


  8%|▊         | 81159/1000000 [3:43:02<39:21:40,  6.48it/s]

{'loss': Array(0.1451869, dtype=float32), 'loss_cross_entropy': Array(0.13663189, dtype=float32)}


  8%|▊         | 81169/1000000 [3:43:03<26:26:41,  9.65it/s]

{'loss': Array(0.14275001, dtype=float32), 'loss_cross_entropy': Array(0.13469657, dtype=float32)}


  8%|▊         | 81179/1000000 [3:43:04<25:04:13, 10.18it/s]

{'loss': Array(0.13798748, dtype=float32), 'loss_cross_entropy': Array(0.12957613, dtype=float32)}


  8%|▊         | 81189/1000000 [3:43:06<31:34:00,  8.09it/s]

{'loss': Array(0.15138814, dtype=float32), 'loss_cross_entropy': Array(0.14210182, dtype=float32)}


  8%|▊         | 81199/1000000 [3:43:08<25:58:34,  9.83it/s]

{'loss': Array(0.13958333, dtype=float32), 'loss_cross_entropy': Array(0.13154395, dtype=float32)}


  8%|▊         | 81209/1000000 [3:43:09<33:59:47,  7.51it/s]

{'loss': Array(0.1501214, dtype=float32), 'loss_cross_entropy': Array(0.14138657, dtype=float32)}


  8%|▊         | 81219/1000000 [3:43:11<25:12:51, 10.12it/s]

{'loss': Array(0.1475384, dtype=float32), 'loss_cross_entropy': Array(0.13837217, dtype=float32)}


  8%|▊         | 81229/1000000 [3:43:12<33:00:51,  7.73it/s]

{'loss': Array(0.17183298, dtype=float32), 'loss_cross_entropy': Array(0.1623774, dtype=float32)}


  8%|▊         | 81239/1000000 [3:43:14<25:34:16,  9.98it/s]

{'loss': Array(0.14307208, dtype=float32), 'loss_cross_entropy': Array(0.13395372, dtype=float32)}


  8%|▊         | 81249/1000000 [3:43:15<23:49:42, 10.71it/s]

{'loss': Array(0.15114968, dtype=float32), 'loss_cross_entropy': Array(0.14256029, dtype=float32)}


  8%|▊         | 81259/1000000 [3:43:17<28:24:21,  8.98it/s]

{'loss': Array(0.14493701, dtype=float32), 'loss_cross_entropy': Array(0.13635378, dtype=float32)}


  8%|▊         | 81269/1000000 [3:43:18<24:48:16, 10.29it/s]

{'loss': Array(0.14783733, dtype=float32), 'loss_cross_entropy': Array(0.13863955, dtype=float32)}


  8%|▊         | 81279/1000000 [3:43:20<30:45:13,  8.30it/s]

{'loss': Array(0.15050969, dtype=float32), 'loss_cross_entropy': Array(0.14188813, dtype=float32)}


  8%|▊         | 81289/1000000 [3:43:21<24:33:04, 10.39it/s]

{'loss': Array(0.15091784, dtype=float32), 'loss_cross_entropy': Array(0.142064, dtype=float32)}


  8%|▊         | 81299/1000000 [3:43:23<37:17:49,  6.84it/s]

{'loss': Array(0.14410852, dtype=float32), 'loss_cross_entropy': Array(0.13526192, dtype=float32)}


  8%|▊         | 81309/1000000 [3:43:24<25:43:52,  9.92it/s]

{'loss': Array(0.14766417, dtype=float32), 'loss_cross_entropy': Array(0.13932502, dtype=float32)}


  8%|▊         | 81319/1000000 [3:43:26<37:54:12,  6.73it/s]

{'loss': Array(0.14289701, dtype=float32), 'loss_cross_entropy': Array(0.13484389, dtype=float32)}


  8%|▊         | 81329/1000000 [3:43:27<25:56:37,  9.84it/s]

{'loss': Array(0.1420733, dtype=float32), 'loss_cross_entropy': Array(0.13343465, dtype=float32)}


  8%|▊         | 81339/1000000 [3:43:29<23:50:02, 10.71it/s]

{'loss': Array(0.1481051, dtype=float32), 'loss_cross_entropy': Array(0.13879831, dtype=float32)}


  8%|▊         | 81349/1000000 [3:43:30<32:40:34,  7.81it/s]

{'loss': Array(0.14776416, dtype=float32), 'loss_cross_entropy': Array(0.13929051, dtype=float32)}


  8%|▊         | 81359/1000000 [3:43:32<24:52:21, 10.26it/s]

{'loss': Array(0.13779955, dtype=float32), 'loss_cross_entropy': Array(0.12923352, dtype=float32)}


  8%|▊         | 81369/1000000 [3:43:33<36:48:49,  6.93it/s]

{'loss': Array(0.16156682, dtype=float32), 'loss_cross_entropy': Array(0.15269725, dtype=float32)}


  8%|▊         | 81379/1000000 [3:43:35<25:52:18,  9.86it/s]

{'loss': Array(0.15639098, dtype=float32), 'loss_cross_entropy': Array(0.14742759, dtype=float32)}


  8%|▊         | 81389/1000000 [3:43:36<24:03:14, 10.61it/s]

{'loss': Array(0.16305566, dtype=float32), 'loss_cross_entropy': Array(0.1533389, dtype=float32)}


  8%|▊         | 81399/1000000 [3:43:38<26:59:54,  9.45it/s]

{'loss': Array(0.16789131, dtype=float32), 'loss_cross_entropy': Array(0.15883161, dtype=float32)}


  8%|▊         | 81409/1000000 [3:43:39<24:07:20, 10.58it/s]

{'loss': Array(0.16724175, dtype=float32), 'loss_cross_entropy': Array(0.15788142, dtype=float32)}


  8%|▊         | 81419/1000000 [3:43:41<26:57:55,  9.46it/s]

{'loss': Array(0.1467282, dtype=float32), 'loss_cross_entropy': Array(0.13798629, dtype=float32)}


  8%|▊         | 81429/1000000 [3:43:42<23:51:27, 10.70it/s]

{'loss': Array(0.15270405, dtype=float32), 'loss_cross_entropy': Array(0.1435934, dtype=float32)}


  8%|▊         | 81439/1000000 [3:43:44<38:00:51,  6.71it/s]

{'loss': Array(0.14835258, dtype=float32), 'loss_cross_entropy': Array(0.13990669, dtype=float32)}


  8%|▊         | 81449/1000000 [3:43:45<26:02:24,  9.80it/s]

{'loss': Array(0.13746439, dtype=float32), 'loss_cross_entropy': Array(0.12940606, dtype=float32)}


  8%|▊         | 81459/1000000 [3:43:47<24:30:25, 10.41it/s]

{'loss': Array(0.13030551, dtype=float32), 'loss_cross_entropy': Array(0.12237401, dtype=float32)}


  8%|▊         | 81469/1000000 [3:43:48<31:05:24,  8.21it/s]

{'loss': Array(0.13299544, dtype=float32), 'loss_cross_entropy': Array(0.1255133, dtype=float32)}


  8%|▊         | 81479/1000000 [3:43:50<25:13:23, 10.12it/s]

{'loss': Array(0.14918289, dtype=float32), 'loss_cross_entropy': Array(0.14011638, dtype=float32)}


  8%|▊         | 81489/1000000 [3:43:51<37:25:33,  6.82it/s]

{'loss': Array(0.15003338, dtype=float32), 'loss_cross_entropy': Array(0.14172322, dtype=float32)}


  8%|▊         | 81499/1000000 [3:43:53<25:57:18,  9.83it/s]

{'loss': Array(0.1538241, dtype=float32), 'loss_cross_entropy': Array(0.14502797, dtype=float32)}


  8%|▊         | 81509/1000000 [3:44:00<82:17:08,  3.10it/s] 

{'loss': Array(0.16599195, dtype=float32), 'loss_cross_entropy': Array(0.15632507, dtype=float32)}


  8%|▊         | 81519/1000000 [3:44:02<38:58:42,  6.55it/s]

{'loss': Array(0.14951257, dtype=float32), 'loss_cross_entropy': Array(0.14104304, dtype=float32)}


  8%|▊         | 81529/1000000 [3:44:04<27:13:42,  9.37it/s]

{'loss': Array(0.17587404, dtype=float32), 'loss_cross_entropy': Array(0.16683772, dtype=float32)}


  8%|▊         | 81539/1000000 [3:44:05<31:57:47,  7.98it/s]

{'loss': Array(0.14927031, dtype=float32), 'loss_cross_entropy': Array(0.14015484, dtype=float32)}


  8%|▊         | 81549/1000000 [3:44:07<26:23:32,  9.67it/s]

{'loss': Array(0.15154521, dtype=float32), 'loss_cross_entropy': Array(0.14234559, dtype=float32)}


  8%|▊         | 81559/1000000 [3:44:09<39:13:25,  6.50it/s]

{'loss': Array(0.13486622, dtype=float32), 'loss_cross_entropy': Array(0.1262027, dtype=float32)}


  8%|▊         | 81569/1000000 [3:44:10<26:27:28,  9.64it/s]

{'loss': Array(0.14068511, dtype=float32), 'loss_cross_entropy': Array(0.13202393, dtype=float32)}


  8%|▊         | 81579/1000000 [3:44:11<24:24:02, 10.46it/s]

{'loss': Array(0.13290481, dtype=float32), 'loss_cross_entropy': Array(0.12481821, dtype=float32)}


  8%|▊         | 81589/1000000 [3:44:13<27:08:35,  9.40it/s]

{'loss': Array(0.15829305, dtype=float32), 'loss_cross_entropy': Array(0.14967138, dtype=float32)}


  8%|▊         | 81599/1000000 [3:44:14<24:41:16, 10.33it/s]

{'loss': Array(0.14079854, dtype=float32), 'loss_cross_entropy': Array(0.13211529, dtype=float32)}


  8%|▊         | 81609/1000000 [3:44:16<36:52:42,  6.92it/s]

{'loss': Array(0.1293554, dtype=float32), 'loss_cross_entropy': Array(0.12158918, dtype=float32)}


  8%|▊         | 81619/1000000 [3:44:17<26:08:49,  9.76it/s]

{'loss': Array(0.14853431, dtype=float32), 'loss_cross_entropy': Array(0.1398544, dtype=float32)}


  8%|▊         | 81629/1000000 [3:44:19<24:06:56, 10.58it/s]

{'loss': Array(0.14287208, dtype=float32), 'loss_cross_entropy': Array(0.13505803, dtype=float32)}


  8%|▊         | 81639/1000000 [3:44:21<28:44:18,  8.88it/s]

{'loss': Array(0.15640038, dtype=float32), 'loss_cross_entropy': Array(0.14761375, dtype=float32)}


  8%|▊         | 81649/1000000 [3:44:22<24:19:40, 10.49it/s]

{'loss': Array(0.1538908, dtype=float32), 'loss_cross_entropy': Array(0.14501569, dtype=float32)}


  8%|▊         | 81659/1000000 [3:44:24<33:55:32,  7.52it/s]

{'loss': Array(0.14380282, dtype=float32), 'loss_cross_entropy': Array(0.13554911, dtype=float32)}


  8%|▊         | 81669/1000000 [3:44:25<25:41:41,  9.93it/s]

{'loss': Array(0.15894406, dtype=float32), 'loss_cross_entropy': Array(0.14924443, dtype=float32)}


  8%|▊         | 81679/1000000 [3:44:27<38:02:54,  6.70it/s]

{'loss': Array(0.13607626, dtype=float32), 'loss_cross_entropy': Array(0.12783007, dtype=float32)}


  8%|▊         | 81689/1000000 [3:44:28<25:39:35,  9.94it/s]

{'loss': Array(0.16310973, dtype=float32), 'loss_cross_entropy': Array(0.15338549, dtype=float32)}


  8%|▊         | 81699/1000000 [3:44:29<24:03:25, 10.60it/s]

{'loss': Array(0.15001932, dtype=float32), 'loss_cross_entropy': Array(0.141718, dtype=float32)}


  8%|▊         | 81709/1000000 [3:44:31<30:34:26,  8.34it/s]

{'loss': Array(0.16530679, dtype=float32), 'loss_cross_entropy': Array(0.15596241, dtype=float32)}


  8%|▊         | 81719/1000000 [3:44:33<24:40:16, 10.34it/s]

{'loss': Array(0.14499258, dtype=float32), 'loss_cross_entropy': Array(0.13582027, dtype=float32)}


  8%|▊         | 81729/1000000 [3:44:34<33:28:07,  7.62it/s]

{'loss': Array(0.13776349, dtype=float32), 'loss_cross_entropy': Array(0.12966791, dtype=float32)}


  8%|▊         | 81739/1000000 [3:44:36<25:50:51,  9.87it/s]

{'loss': Array(0.16919589, dtype=float32), 'loss_cross_entropy': Array(0.15963821, dtype=float32)}


  8%|▊         | 81749/1000000 [3:44:37<23:58:52, 10.64it/s]

{'loss': Array(0.17353433, dtype=float32), 'loss_cross_entropy': Array(0.16357593, dtype=float32)}


  8%|▊         | 81759/1000000 [3:44:39<26:46:26,  9.53it/s]

{'loss': Array(0.15998265, dtype=float32), 'loss_cross_entropy': Array(0.15065476, dtype=float32)}


  8%|▊         | 81769/1000000 [3:44:40<24:08:26, 10.57it/s]

{'loss': Array(0.14163883, dtype=float32), 'loss_cross_entropy': Array(0.13324097, dtype=float32)}


  8%|▊         | 81779/1000000 [3:44:42<37:29:14,  6.80it/s]

{'loss': Array(0.14326541, dtype=float32), 'loss_cross_entropy': Array(0.1347044, dtype=float32)}


  8%|▊         | 81789/1000000 [3:44:43<25:49:31,  9.88it/s]

{'loss': Array(0.1330169, dtype=float32), 'loss_cross_entropy': Array(0.12436426, dtype=float32)}


  8%|▊         | 81799/1000000 [3:44:44<24:24:57, 10.45it/s]

{'loss': Array(0.15035187, dtype=float32), 'loss_cross_entropy': Array(0.14149684, dtype=float32)}


  8%|▊         | 81809/1000000 [3:44:46<30:25:16,  8.38it/s]

{'loss': Array(0.1338494, dtype=float32), 'loss_cross_entropy': Array(0.1259157, dtype=float32)}


  8%|▊         | 81819/1000000 [3:44:48<25:22:39, 10.05it/s]

{'loss': Array(0.15506516, dtype=float32), 'loss_cross_entropy': Array(0.14663771, dtype=float32)}


  8%|▊         | 81829/1000000 [3:44:49<33:51:22,  7.53it/s]

{'loss': Array(0.16464956, dtype=float32), 'loss_cross_entropy': Array(0.15482257, dtype=float32)}


  8%|▊         | 81839/1000000 [3:44:51<25:22:05, 10.05it/s]

{'loss': Array(0.15249422, dtype=float32), 'loss_cross_entropy': Array(0.14368327, dtype=float32)}


  8%|▊         | 81849/1000000 [3:44:52<33:37:08,  7.59it/s]

{'loss': Array(0.15317796, dtype=float32), 'loss_cross_entropy': Array(0.14438426, dtype=float32)}


  8%|▊         | 81859/1000000 [3:44:54<25:05:35, 10.16it/s]

{'loss': Array(0.14905773, dtype=float32), 'loss_cross_entropy': Array(0.1403455, dtype=float32)}


  8%|▊         | 81869/1000000 [3:44:55<24:13:14, 10.53it/s]

{'loss': Array(0.15964226, dtype=float32), 'loss_cross_entropy': Array(0.15049215, dtype=float32)}


  8%|▊         | 81879/1000000 [3:44:57<28:24:48,  8.98it/s]

{'loss': Array(0.15602693, dtype=float32), 'loss_cross_entropy': Array(0.14717142, dtype=float32)}


  8%|▊         | 81889/1000000 [3:44:58<24:52:34, 10.25it/s]

{'loss': Array(0.12933922, dtype=float32), 'loss_cross_entropy': Array(0.12126457, dtype=float32)}


  8%|▊         | 81899/1000000 [3:45:00<31:37:51,  8.06it/s]

{'loss': Array(0.15464021, dtype=float32), 'loss_cross_entropy': Array(0.14556651, dtype=float32)}


  8%|▊         | 81909/1000000 [3:45:01<25:25:03, 10.03it/s]

{'loss': Array(0.1365471, dtype=float32), 'loss_cross_entropy': Array(0.12831382, dtype=float32)}


  8%|▊         | 81919/1000000 [3:45:03<38:56:19,  6.55it/s]

{'loss': Array(0.16623922, dtype=float32), 'loss_cross_entropy': Array(0.15760969, dtype=float32)}


  8%|▊         | 81929/1000000 [3:45:05<28:04:24,  9.08it/s]

{'loss': Array(0.15964852, dtype=float32), 'loss_cross_entropy': Array(0.15003586, dtype=float32)}


  8%|▊         | 81939/1000000 [3:45:06<39:34:37,  6.44it/s]

{'loss': Array(0.1501899, dtype=float32), 'loss_cross_entropy': Array(0.14139558, dtype=float32)}


  8%|▊         | 81949/1000000 [3:45:08<27:31:32,  9.26it/s]

{'loss': Array(0.14131096, dtype=float32), 'loss_cross_entropy': Array(0.13260268, dtype=float32)}


  8%|▊         | 81959/1000000 [3:45:09<25:40:28,  9.93it/s]

{'loss': Array(0.15986869, dtype=float32), 'loss_cross_entropy': Array(0.15105624, dtype=float32)}


  8%|▊         | 81969/1000000 [3:45:11<34:53:31,  7.31it/s]

{'loss': Array(0.15762489, dtype=float32), 'loss_cross_entropy': Array(0.14837663, dtype=float32)}


  8%|▊         | 81979/1000000 [3:45:12<25:45:58,  9.90it/s]

{'loss': Array(0.1598105, dtype=float32), 'loss_cross_entropy': Array(0.15051837, dtype=float32)}


  8%|▊         | 81989/1000000 [3:45:14<37:42:58,  6.76it/s]

{'loss': Array(0.15791416, dtype=float32), 'loss_cross_entropy': Array(0.14856221, dtype=float32)}


  8%|▊         | 81999/1000000 [3:45:16<26:06:32,  9.77it/s]

{'loss': Array(0.15695478, dtype=float32), 'loss_cross_entropy': Array(0.14810576, dtype=float32)}


  8%|▊         | 82009/1000000 [3:45:23<83:09:46,  3.07it/s] 

{'loss': Array(0.15341361, dtype=float32), 'loss_cross_entropy': Array(0.1444887, dtype=float32)}


  8%|▊         | 82019/1000000 [3:45:25<36:35:49,  6.97it/s] 

{'loss': Array(0.15229253, dtype=float32), 'loss_cross_entropy': Array(0.1433671, dtype=float32)}


  8%|▊         | 82029/1000000 [3:45:26<26:15:14,  9.71it/s]

{'loss': Array(0.1394385, dtype=float32), 'loss_cross_entropy': Array(0.13093498, dtype=float32)}


  8%|▊         | 82039/1000000 [3:45:28<27:19:20,  9.33it/s]

{'loss': Array(0.16563807, dtype=float32), 'loss_cross_entropy': Array(0.15695494, dtype=float32)}


  8%|▊         | 82049/1000000 [3:45:29<24:25:59, 10.44it/s]

{'loss': Array(0.15980807, dtype=float32), 'loss_cross_entropy': Array(0.15114406, dtype=float32)}


  8%|▊         | 82059/1000000 [3:45:31<37:23:11,  6.82it/s]

{'loss': Array(0.14980619, dtype=float32), 'loss_cross_entropy': Array(0.14076903, dtype=float32)}


  8%|▊         | 82069/1000000 [3:45:33<26:45:52,  9.53it/s]

{'loss': Array(0.16157943, dtype=float32), 'loss_cross_entropy': Array(0.15275024, dtype=float32)}


  8%|▊         | 82079/1000000 [3:45:34<23:51:33, 10.69it/s]

{'loss': Array(0.15340744, dtype=float32), 'loss_cross_entropy': Array(0.14408326, dtype=float32)}


  8%|▊         | 82089/1000000 [3:45:36<30:18:33,  8.41it/s]

{'loss': Array(0.13972424, dtype=float32), 'loss_cross_entropy': Array(0.13112669, dtype=float32)}


  8%|▊         | 82099/1000000 [3:45:37<24:42:10, 10.32it/s]

{'loss': Array(0.17647478, dtype=float32), 'loss_cross_entropy': Array(0.16683926, dtype=float32)}


  8%|▊         | 82109/1000000 [3:45:39<37:48:45,  6.74it/s]

{'loss': Array(0.15208988, dtype=float32), 'loss_cross_entropy': Array(0.14331338, dtype=float32)}


  8%|▊         | 82119/1000000 [3:45:40<26:03:21,  9.79it/s]

{'loss': Array(0.16924676, dtype=float32), 'loss_cross_entropy': Array(0.15983385, dtype=float32)}


  8%|▊         | 82129/1000000 [3:45:42<24:16:12, 10.51it/s]

{'loss': Array(0.15643843, dtype=float32), 'loss_cross_entropy': Array(0.14745073, dtype=float32)}


  8%|▊         | 82139/1000000 [3:45:43<28:16:36,  9.02it/s]

{'loss': Array(0.15665193, dtype=float32), 'loss_cross_entropy': Array(0.1476454, dtype=float32)}


  8%|▊         | 82149/1000000 [3:45:45<24:17:04, 10.50it/s]

{'loss': Array(0.1436704, dtype=float32), 'loss_cross_entropy': Array(0.13547556, dtype=float32)}


  8%|▊         | 82159/1000000 [3:45:46<32:59:50,  7.73it/s]

{'loss': Array(0.1435471, dtype=float32), 'loss_cross_entropy': Array(0.13482872, dtype=float32)}


  8%|▊         | 82169/1000000 [3:45:48<25:47:02,  9.89it/s]

{'loss': Array(0.13162725, dtype=float32), 'loss_cross_entropy': Array(0.12435285, dtype=float32)}


  8%|▊         | 82179/1000000 [3:45:49<24:06:41, 10.57it/s]

{'loss': Array(0.13917534, dtype=float32), 'loss_cross_entropy': Array(0.1308508, dtype=float32)}


  8%|▊         | 82189/1000000 [3:45:51<30:07:48,  8.46it/s]

{'loss': Array(0.15561567, dtype=float32), 'loss_cross_entropy': Array(0.14638828, dtype=float32)}


  8%|▊         | 82199/1000000 [3:45:52<24:45:10, 10.30it/s]

{'loss': Array(0.1570746, dtype=float32), 'loss_cross_entropy': Array(0.14896668, dtype=float32)}


  8%|▊         | 82209/1000000 [3:45:53<24:02:36, 10.60it/s]

{'loss': Array(0.14234357, dtype=float32), 'loss_cross_entropy': Array(0.13390453, dtype=float32)}


  8%|▊         | 82219/1000000 [3:45:55<26:35:58,  9.58it/s]

{'loss': Array(0.15046908, dtype=float32), 'loss_cross_entropy': Array(0.14168708, dtype=float32)}


  8%|▊         | 82229/1000000 [3:45:56<24:18:40, 10.49it/s]

{'loss': Array(0.15624015, dtype=float32), 'loss_cross_entropy': Array(0.14756611, dtype=float32)}


  8%|▊         | 82239/1000000 [3:45:58<30:13:09,  8.44it/s]

{'loss': Array(0.14913659, dtype=float32), 'loss_cross_entropy': Array(0.14117555, dtype=float32)}


  8%|▊         | 82249/1000000 [3:46:00<25:41:13,  9.92it/s]

{'loss': Array(0.14567594, dtype=float32), 'loss_cross_entropy': Array(0.1373486, dtype=float32)}


  8%|▊         | 82259/1000000 [3:46:01<38:44:04,  6.58it/s]

{'loss': Array(0.1308013, dtype=float32), 'loss_cross_entropy': Array(0.12258506, dtype=float32)}


  8%|▊         | 82269/1000000 [3:46:03<27:19:44,  9.33it/s]

{'loss': Array(0.14041233, dtype=float32), 'loss_cross_entropy': Array(0.13177222, dtype=float32)}


  8%|▊         | 82279/1000000 [3:46:04<39:38:44,  6.43it/s]

{'loss': Array(0.14061765, dtype=float32), 'loss_cross_entropy': Array(0.13226418, dtype=float32)}


  8%|▊         | 82289/1000000 [3:46:06<27:22:12,  9.31it/s]

{'loss': Array(0.13891412, dtype=float32), 'loss_cross_entropy': Array(0.13071458, dtype=float32)}


  8%|▊         | 82299/1000000 [3:46:07<25:20:24, 10.06it/s]

{'loss': Array(0.16190402, dtype=float32), 'loss_cross_entropy': Array(0.15329032, dtype=float32)}


  8%|▊         | 82309/1000000 [3:46:09<34:38:44,  7.36it/s]

{'loss': Array(0.15568183, dtype=float32), 'loss_cross_entropy': Array(0.14692345, dtype=float32)}


  8%|▊         | 82319/1000000 [3:46:11<26:52:31,  9.48it/s]

{'loss': Array(0.16144542, dtype=float32), 'loss_cross_entropy': Array(0.15270732, dtype=float32)}


  8%|▊         | 82329/1000000 [3:46:12<37:39:15,  6.77it/s]

{'loss': Array(0.1525871, dtype=float32), 'loss_cross_entropy': Array(0.14392477, dtype=float32)}


  8%|▊         | 82339/1000000 [3:46:14<26:23:53,  9.66it/s]

{'loss': Array(0.14022158, dtype=float32), 'loss_cross_entropy': Array(0.13185465, dtype=float32)}


  8%|▊         | 82349/1000000 [3:46:15<24:53:13, 10.24it/s]

{'loss': Array(0.14152029, dtype=float32), 'loss_cross_entropy': Array(0.13351972, dtype=float32)}


  8%|▊         | 82359/1000000 [3:46:17<27:10:50,  9.38it/s]

{'loss': Array(0.16237213, dtype=float32), 'loss_cross_entropy': Array(0.15313885, dtype=float32)}


  8%|▊         | 82369/1000000 [3:46:18<24:28:06, 10.42it/s]

{'loss': Array(0.15629801, dtype=float32), 'loss_cross_entropy': Array(0.14670162, dtype=float32)}


  8%|▊         | 82379/1000000 [3:46:20<27:19:27,  9.33it/s]

{'loss': Array(0.16267525, dtype=float32), 'loss_cross_entropy': Array(0.15353778, dtype=float32)}


  8%|▊         | 82389/1000000 [3:46:21<24:30:32, 10.40it/s]

{'loss': Array(0.14835642, dtype=float32), 'loss_cross_entropy': Array(0.13979594, dtype=float32)}


  8%|▊         | 82399/1000000 [3:46:23<37:02:21,  6.88it/s]

{'loss': Array(0.14394759, dtype=float32), 'loss_cross_entropy': Array(0.13545258, dtype=float32)}


  8%|▊         | 82409/1000000 [3:46:24<25:38:58,  9.94it/s]

{'loss': Array(0.13783945, dtype=float32), 'loss_cross_entropy': Array(0.1300834, dtype=float32)}


  8%|▊         | 82419/1000000 [3:46:26<24:10:07, 10.55it/s]

{'loss': Array(0.1458293, dtype=float32), 'loss_cross_entropy': Array(0.13712184, dtype=float32)}


  8%|▊         | 82429/1000000 [3:46:27<30:28:31,  8.36it/s]

{'loss': Array(0.17038384, dtype=float32), 'loss_cross_entropy': Array(0.16052406, dtype=float32)}


  8%|▊         | 82439/1000000 [3:46:29<25:11:51, 10.12it/s]

{'loss': Array(0.1391568, dtype=float32), 'loss_cross_entropy': Array(0.13045555, dtype=float32)}


  8%|▊         | 82449/1000000 [3:46:30<37:56:27,  6.72it/s]

{'loss': Array(0.14639394, dtype=float32), 'loss_cross_entropy': Array(0.13783231, dtype=float32)}


  8%|▊         | 82459/1000000 [3:46:32<26:32:25,  9.60it/s]

{'loss': Array(0.13892366, dtype=float32), 'loss_cross_entropy': Array(0.13063167, dtype=float32)}


  8%|▊         | 82469/1000000 [3:46:33<23:57:21, 10.64it/s]

{'loss': Array(0.14730473, dtype=float32), 'loss_cross_entropy': Array(0.13874394, dtype=float32)}


  8%|▊         | 82479/1000000 [3:46:35<28:06:39,  9.07it/s]

{'loss': Array(0.14626823, dtype=float32), 'loss_cross_entropy': Array(0.13740069, dtype=float32)}


  8%|▊         | 82489/1000000 [3:46:36<24:27:02, 10.42it/s]

{'loss': Array(0.15425603, dtype=float32), 'loss_cross_entropy': Array(0.14519374, dtype=float32)}


  8%|▊         | 82499/1000000 [3:46:38<33:44:05,  7.55it/s]

{'loss': Array(0.15261625, dtype=float32), 'loss_cross_entropy': Array(0.14372325, dtype=float32)}


  8%|▊         | 82509/1000000 [3:46:46<84:58:53,  3.00it/s] 

{'loss': Array(0.1484237, dtype=float32), 'loss_cross_entropy': Array(0.1386431, dtype=float32)}


  8%|▊         | 82519/1000000 [3:46:48<43:52:57,  5.81it/s]

{'loss': Array(0.15034918, dtype=float32), 'loss_cross_entropy': Array(0.1412159, dtype=float32)}


  8%|▊         | 82529/1000000 [3:46:49<27:25:46,  9.29it/s]

{'loss': Array(0.13672571, dtype=float32), 'loss_cross_entropy': Array(0.12863196, dtype=float32)}


  8%|▊         | 82539/1000000 [3:46:50<24:22:21, 10.46it/s]

{'loss': Array(0.16169247, dtype=float32), 'loss_cross_entropy': Array(0.15279095, dtype=float32)}


  8%|▊         | 82549/1000000 [3:46:52<30:16:19,  8.42it/s]

{'loss': Array(0.15672618, dtype=float32), 'loss_cross_entropy': Array(0.14768745, dtype=float32)}


  8%|▊         | 82559/1000000 [3:46:53<25:06:02, 10.15it/s]

{'loss': Array(0.17336778, dtype=float32), 'loss_cross_entropy': Array(0.16364335, dtype=float32)}


  8%|▊         | 82569/1000000 [3:46:55<24:13:37, 10.52it/s]

{'loss': Array(0.14847519, dtype=float32), 'loss_cross_entropy': Array(0.13921414, dtype=float32)}


  8%|▊         | 82579/1000000 [3:46:57<26:41:23,  9.55it/s]

{'loss': Array(0.1452078, dtype=float32), 'loss_cross_entropy': Array(0.13691941, dtype=float32)}


  8%|▊         | 82589/1000000 [3:46:58<23:53:55, 10.66it/s]

{'loss': Array(0.17818603, dtype=float32), 'loss_cross_entropy': Array(0.16786198, dtype=float32)}


  8%|▊         | 82599/1000000 [3:47:00<30:56:27,  8.24it/s]

{'loss': Array(0.13977252, dtype=float32), 'loss_cross_entropy': Array(0.13177298, dtype=float32)}


  8%|▊         | 82609/1000000 [3:47:01<26:31:17,  9.61it/s]

{'loss': Array(0.14886352, dtype=float32), 'loss_cross_entropy': Array(0.14016962, dtype=float32)}


  8%|▊         | 82619/1000000 [3:47:03<35:20:25,  7.21it/s]

{'loss': Array(0.15959997, dtype=float32), 'loss_cross_entropy': Array(0.15126844, dtype=float32)}


  8%|▊         | 82629/1000000 [3:47:04<26:17:45,  9.69it/s]

{'loss': Array(0.16198657, dtype=float32), 'loss_cross_entropy': Array(0.15232556, dtype=float32)}


  8%|▊         | 82639/1000000 [3:47:06<25:12:24, 10.11it/s]

{'loss': Array(0.16948548, dtype=float32), 'loss_cross_entropy': Array(0.1598122, dtype=float32)}


  8%|▊         | 82649/1000000 [3:47:08<32:10:28,  7.92it/s]

{'loss': Array(0.15279053, dtype=float32), 'loss_cross_entropy': Array(0.14444971, dtype=float32)}


  8%|▊         | 82659/1000000 [3:47:09<27:01:02,  9.43it/s]

{'loss': Array(0.14291206, dtype=float32), 'loss_cross_entropy': Array(0.13431972, dtype=float32)}


  8%|▊         | 82669/1000000 [3:47:10<25:04:17, 10.16it/s]

{'loss': Array(0.1503061, dtype=float32), 'loss_cross_entropy': Array(0.14180596, dtype=float32)}


  8%|▊         | 82679/1000000 [3:47:12<27:35:08,  9.24it/s]

{'loss': Array(0.14985423, dtype=float32), 'loss_cross_entropy': Array(0.14057608, dtype=float32)}


  8%|▊         | 82689/1000000 [3:47:14<24:26:46, 10.42it/s]

{'loss': Array(0.14840262, dtype=float32), 'loss_cross_entropy': Array(0.13928948, dtype=float32)}


  8%|▊         | 82699/1000000 [3:47:15<30:24:12,  8.38it/s]

{'loss': Array(0.13526061, dtype=float32), 'loss_cross_entropy': Array(0.12713467, dtype=float32)}


  8%|▊         | 82709/1000000 [3:47:17<25:05:46, 10.15it/s]

{'loss': Array(0.14894806, dtype=float32), 'loss_cross_entropy': Array(0.14046101, dtype=float32)}


  8%|▊         | 82719/1000000 [3:47:18<38:14:21,  6.66it/s]

{'loss': Array(0.12330842, dtype=float32), 'loss_cross_entropy': Array(0.11570527, dtype=float32)}


  8%|▊         | 82729/1000000 [3:47:20<25:37:26,  9.94it/s]

{'loss': Array(0.13946907, dtype=float32), 'loss_cross_entropy': Array(0.13119668, dtype=float32)}


  8%|▊         | 82739/1000000 [3:47:21<23:36:53, 10.79it/s]

{'loss': Array(0.15058671, dtype=float32), 'loss_cross_entropy': Array(0.1414508, dtype=float32)}


  8%|▊         | 82749/1000000 [3:47:23<33:26:04,  7.62it/s]

{'loss': Array(0.1460542, dtype=float32), 'loss_cross_entropy': Array(0.13721496, dtype=float32)}


  8%|▊         | 82759/1000000 [3:47:24<25:13:08, 10.10it/s]

{'loss': Array(0.1572573, dtype=float32), 'loss_cross_entropy': Array(0.14875726, dtype=float32)}


  8%|▊         | 82769/1000000 [3:47:25<24:06:08, 10.57it/s]

{'loss': Array(0.15387018, dtype=float32), 'loss_cross_entropy': Array(0.14443709, dtype=float32)}


  8%|▊         | 82779/1000000 [3:47:27<28:15:01,  9.02it/s]

{'loss': Array(0.15456302, dtype=float32), 'loss_cross_entropy': Array(0.14563207, dtype=float32)}


  8%|▊         | 82789/1000000 [3:47:29<24:44:19, 10.30it/s]

{'loss': Array(0.13625908, dtype=float32), 'loss_cross_entropy': Array(0.12812994, dtype=float32)}


  8%|▊         | 82799/1000000 [3:47:30<30:23:33,  8.38it/s]

{'loss': Array(0.17311236, dtype=float32), 'loss_cross_entropy': Array(0.16363679, dtype=float32)}


  8%|▊         | 82809/1000000 [3:47:32<24:30:56, 10.39it/s]

{'loss': Array(0.14716025, dtype=float32), 'loss_cross_entropy': Array(0.1379808, dtype=float32)}


  8%|▊         | 82819/1000000 [3:47:33<23:41:57, 10.75it/s]

{'loss': Array(0.16756399, dtype=float32), 'loss_cross_entropy': Array(0.15816438, dtype=float32)}


  8%|▊         | 82829/1000000 [3:47:35<27:01:04,  9.43it/s]

{'loss': Array(0.14052539, dtype=float32), 'loss_cross_entropy': Array(0.13172768, dtype=float32)}


  8%|▊         | 82839/1000000 [3:47:36<24:11:52, 10.53it/s]

{'loss': Array(0.15440875, dtype=float32), 'loss_cross_entropy': Array(0.14522497, dtype=float32)}


  8%|▊         | 82849/1000000 [3:47:38<33:41:57,  7.56it/s]

{'loss': Array(0.17191124, dtype=float32), 'loss_cross_entropy': Array(0.16250454, dtype=float32)}


  8%|▊         | 82859/1000000 [3:47:39<25:36:52,  9.95it/s]

{'loss': Array(0.1515448, dtype=float32), 'loss_cross_entropy': Array(0.14286907, dtype=float32)}


  8%|▊         | 82869/1000000 [3:47:41<24:10:48, 10.54it/s]

{'loss': Array(0.14780581, dtype=float32), 'loss_cross_entropy': Array(0.13912395, dtype=float32)}


  8%|▊         | 82879/1000000 [3:47:42<26:46:57,  9.51it/s]

{'loss': Array(0.14529265, dtype=float32), 'loss_cross_entropy': Array(0.13632692, dtype=float32)}


  8%|▊         | 82889/1000000 [3:47:44<23:57:09, 10.64it/s]

{'loss': Array(0.15153815, dtype=float32), 'loss_cross_entropy': Array(0.14232373, dtype=float32)}


  8%|▊         | 82899/1000000 [3:47:45<28:13:37,  9.03it/s]

{'loss': Array(0.16513953, dtype=float32), 'loss_cross_entropy': Array(0.15564382, dtype=float32)}


  8%|▊         | 82909/1000000 [3:47:47<24:32:59, 10.38it/s]

{'loss': Array(0.14875396, dtype=float32), 'loss_cross_entropy': Array(0.14032403, dtype=float32)}


  8%|▊         | 82919/1000000 [3:47:48<37:42:47,  6.75it/s]

{'loss': Array(0.1494988, dtype=float32), 'loss_cross_entropy': Array(0.14048965, dtype=float32)}


  8%|▊         | 82929/1000000 [3:47:50<25:45:01,  9.89it/s]

{'loss': Array(0.16027379, dtype=float32), 'loss_cross_entropy': Array(0.15081531, dtype=float32)}


  8%|▊         | 82939/1000000 [3:47:51<24:06:55, 10.56it/s]

{'loss': Array(0.14301011, dtype=float32), 'loss_cross_entropy': Array(0.13524047, dtype=float32)}


  8%|▊         | 82949/1000000 [3:47:53<30:14:09,  8.42it/s]

{'loss': Array(0.13909698, dtype=float32), 'loss_cross_entropy': Array(0.13149472, dtype=float32)}


  8%|▊         | 82959/1000000 [3:47:54<26:01:48,  9.79it/s]

{'loss': Array(0.15750472, dtype=float32), 'loss_cross_entropy': Array(0.14828677, dtype=float32)}


  8%|▊         | 82969/1000000 [3:47:56<33:51:55,  7.52it/s]

{'loss': Array(0.15086676, dtype=float32), 'loss_cross_entropy': Array(0.14178304, dtype=float32)}


  8%|▊         | 82979/1000000 [3:47:57<25:52:39,  9.84it/s]

{'loss': Array(0.13522393, dtype=float32), 'loss_cross_entropy': Array(0.12753806, dtype=float32)}


  8%|▊         | 82989/1000000 [3:47:59<35:27:38,  7.18it/s]

{'loss': Array(0.15165673, dtype=float32), 'loss_cross_entropy': Array(0.14256813, dtype=float32)}


  8%|▊         | 82999/1000000 [3:48:01<26:04:52,  9.77it/s]

{'loss': Array(0.15697335, dtype=float32), 'loss_cross_entropy': Array(0.1475039, dtype=float32)}


  8%|▊         | 83009/1000000 [3:48:08<84:30:14,  3.01it/s] 

{'loss': Array(0.16416639, dtype=float32), 'loss_cross_entropy': Array(0.15458433, dtype=float32)}


  8%|▊         | 83019/1000000 [3:48:10<40:15:15,  6.33it/s]

{'loss': Array(0.14569592, dtype=float32), 'loss_cross_entropy': Array(0.1375787, dtype=float32)}


  8%|▊         | 83029/1000000 [3:48:12<26:03:30,  9.77it/s]

{'loss': Array(0.1530042, dtype=float32), 'loss_cross_entropy': Array(0.14414407, dtype=float32)}


  8%|▊         | 83039/1000000 [3:48:13<33:33:14,  7.59it/s]

{'loss': Array(0.14351198, dtype=float32), 'loss_cross_entropy': Array(0.13497095, dtype=float32)}


  8%|▊         | 83049/1000000 [3:48:15<25:41:19,  9.92it/s]

{'loss': Array(0.17166062, dtype=float32), 'loss_cross_entropy': Array(0.16233425, dtype=float32)}


  8%|▊         | 83059/1000000 [3:48:16<38:14:59,  6.66it/s]

{'loss': Array(0.16375287, dtype=float32), 'loss_cross_entropy': Array(0.154364, dtype=float32)}


  8%|▊         | 83069/1000000 [3:48:18<25:54:28,  9.83it/s]

{'loss': Array(0.13889204, dtype=float32), 'loss_cross_entropy': Array(0.1305925, dtype=float32)}


  8%|▊         | 83079/1000000 [3:48:19<23:36:55, 10.79it/s]

{'loss': Array(0.15786098, dtype=float32), 'loss_cross_entropy': Array(0.14838855, dtype=float32)}


  8%|▊         | 83089/1000000 [3:48:21<30:36:54,  8.32it/s]

{'loss': Array(0.14829393, dtype=float32), 'loss_cross_entropy': Array(0.13982461, dtype=float32)}


  8%|▊         | 83099/1000000 [3:48:22<24:42:13, 10.31it/s]

{'loss': Array(0.16464838, dtype=float32), 'loss_cross_entropy': Array(0.15514669, dtype=float32)}


  8%|▊         | 83109/1000000 [3:48:24<33:43:32,  7.55it/s]

{'loss': Array(0.15450811, dtype=float32), 'loss_cross_entropy': Array(0.14539452, dtype=float32)}


  8%|▊         | 83119/1000000 [3:48:25<25:28:45, 10.00it/s]

{'loss': Array(0.14666502, dtype=float32), 'loss_cross_entropy': Array(0.13797514, dtype=float32)}


  8%|▊         | 83129/1000000 [3:48:27<23:58:28, 10.62it/s]

{'loss': Array(0.1264985, dtype=float32), 'loss_cross_entropy': Array(0.11868823, dtype=float32)}


  8%|▊         | 83139/1000000 [3:48:28<26:36:10,  9.57it/s]

{'loss': Array(0.15025038, dtype=float32), 'loss_cross_entropy': Array(0.14138532, dtype=float32)}


  8%|▊         | 83149/1000000 [3:48:30<24:02:17, 10.59it/s]

{'loss': Array(0.13401146, dtype=float32), 'loss_cross_entropy': Array(0.12624033, dtype=float32)}


  8%|▊         | 83159/1000000 [3:48:31<37:01:46,  6.88it/s]

{'loss': Array(0.15444124, dtype=float32), 'loss_cross_entropy': Array(0.14566168, dtype=float32)}


  8%|▊         | 83169/1000000 [3:48:33<25:59:13,  9.80it/s]

{'loss': Array(0.17078696, dtype=float32), 'loss_cross_entropy': Array(0.16124077, dtype=float32)}


  8%|▊         | 83179/1000000 [3:48:34<23:55:15, 10.65it/s]

{'loss': Array(0.14393078, dtype=float32), 'loss_cross_entropy': Array(0.13621114, dtype=float32)}


  8%|▊         | 83189/1000000 [3:48:36<30:23:55,  8.38it/s]

{'loss': Array(0.15208949, dtype=float32), 'loss_cross_entropy': Array(0.1433001, dtype=float32)}


  8%|▊         | 83199/1000000 [3:48:37<24:48:42, 10.26it/s]

{'loss': Array(0.16126506, dtype=float32), 'loss_cross_entropy': Array(0.1526708, dtype=float32)}


  8%|▊         | 83209/1000000 [3:48:39<33:01:25,  7.71it/s]

{'loss': Array(0.13619688, dtype=float32), 'loss_cross_entropy': Array(0.12782878, dtype=float32)}


  8%|▊         | 83219/1000000 [3:48:40<25:13:33, 10.10it/s]

{'loss': Array(0.14834568, dtype=float32), 'loss_cross_entropy': Array(0.13939615, dtype=float32)}


  8%|▊         | 83229/1000000 [3:48:42<33:31:10,  7.60it/s]

{'loss': Array(0.13545938, dtype=float32), 'loss_cross_entropy': Array(0.12725815, dtype=float32)}


  8%|▊         | 83239/1000000 [3:48:43<25:28:26, 10.00it/s]

{'loss': Array(0.15128419, dtype=float32), 'loss_cross_entropy': Array(0.14305535, dtype=float32)}


  8%|▊         | 83249/1000000 [3:48:45<23:57:28, 10.63it/s]

{'loss': Array(0.1509989, dtype=float32), 'loss_cross_entropy': Array(0.14254688, dtype=float32)}


  8%|▊         | 83259/1000000 [3:48:46<27:59:30,  9.10it/s]

{'loss': Array(0.13943252, dtype=float32), 'loss_cross_entropy': Array(0.13104708, dtype=float32)}


  8%|▊         | 83269/1000000 [3:48:48<24:29:47, 10.40it/s]

{'loss': Array(0.14107141, dtype=float32), 'loss_cross_entropy': Array(0.13253151, dtype=float32)}


  8%|▊         | 83279/1000000 [3:48:49<30:08:30,  8.45it/s]

{'loss': Array(0.16654932, dtype=float32), 'loss_cross_entropy': Array(0.15706562, dtype=float32)}


  8%|▊         | 83289/1000000 [3:48:51<24:39:39, 10.33it/s]

{'loss': Array(0.14549541, dtype=float32), 'loss_cross_entropy': Array(0.13730638, dtype=float32)}


  8%|▊         | 83299/1000000 [3:48:53<37:36:16,  6.77it/s]

{'loss': Array(0.15030558, dtype=float32), 'loss_cross_entropy': Array(0.14137757, dtype=float32)}


  8%|▊         | 83309/1000000 [3:48:54<26:32:40,  9.59it/s]

{'loss': Array(0.1461109, dtype=float32), 'loss_cross_entropy': Array(0.13772695, dtype=float32)}


  8%|▊         | 83319/1000000 [3:48:56<37:44:02,  6.75it/s]

{'loss': Array(0.1539251, dtype=float32), 'loss_cross_entropy': Array(0.14477247, dtype=float32)}


  8%|▊         | 83329/1000000 [3:48:57<25:50:22,  9.85it/s]

{'loss': Array(0.16295591, dtype=float32), 'loss_cross_entropy': Array(0.15325142, dtype=float32)}


  8%|▊         | 83339/1000000 [3:48:58<24:16:02, 10.49it/s]

{'loss': Array(0.15713823, dtype=float32), 'loss_cross_entropy': Array(0.14750032, dtype=float32)}


  8%|▊         | 83349/1000000 [3:49:00<33:43:59,  7.55it/s]

{'loss': Array(0.16370784, dtype=float32), 'loss_cross_entropy': Array(0.15428129, dtype=float32)}


  8%|▊         | 83359/1000000 [3:49:01<25:47:44,  9.87it/s]

{'loss': Array(0.14577076, dtype=float32), 'loss_cross_entropy': Array(0.13686952, dtype=float32)}


  8%|▊         | 83369/1000000 [3:49:03<38:58:06,  6.53it/s]

{'loss': Array(0.14420392, dtype=float32), 'loss_cross_entropy': Array(0.13568051, dtype=float32)}


  8%|▊         | 83379/1000000 [3:49:05<27:26:25,  9.28it/s]

{'loss': Array(0.1543111, dtype=float32), 'loss_cross_entropy': Array(0.14494495, dtype=float32)}


  8%|▊         | 83389/1000000 [3:49:06<25:05:31, 10.15it/s]

{'loss': Array(0.1568171, dtype=float32), 'loss_cross_entropy': Array(0.14750747, dtype=float32)}


  8%|▊         | 83399/1000000 [3:49:08<28:00:25,  9.09it/s]

{'loss': Array(0.15648514, dtype=float32), 'loss_cross_entropy': Array(0.14733689, dtype=float32)}


  8%|▊         | 83409/1000000 [3:49:09<25:00:32, 10.18it/s]

{'loss': Array(0.13294385, dtype=float32), 'loss_cross_entropy': Array(0.1250488, dtype=float32)}


  8%|▊         | 83419/1000000 [3:49:11<28:10:24,  9.04it/s]

{'loss': Array(0.14917362, dtype=float32), 'loss_cross_entropy': Array(0.14004873, dtype=float32)}


  8%|▊         | 83429/1000000 [3:49:12<24:58:00, 10.20it/s]

{'loss': Array(0.1387064, dtype=float32), 'loss_cross_entropy': Array(0.13025571, dtype=float32)}


  8%|▊         | 83439/1000000 [3:49:14<37:41:28,  6.75it/s]

{'loss': Array(0.14958754, dtype=float32), 'loss_cross_entropy': Array(0.14147608, dtype=float32)}


  8%|▊         | 83449/1000000 [3:49:16<26:06:11,  9.75it/s]

{'loss': Array(0.14225289, dtype=float32), 'loss_cross_entropy': Array(0.13382812, dtype=float32)}


  8%|▊         | 83459/1000000 [3:49:17<23:44:57, 10.72it/s]

{'loss': Array(0.14479105, dtype=float32), 'loss_cross_entropy': Array(0.13645935, dtype=float32)}


  8%|▊         | 83469/1000000 [3:49:19<30:10:49,  8.44it/s]

{'loss': Array(0.15691717, dtype=float32), 'loss_cross_entropy': Array(0.14773205, dtype=float32)}


  8%|▊         | 83479/1000000 [3:49:20<24:29:52, 10.39it/s]

{'loss': Array(0.1498798, dtype=float32), 'loss_cross_entropy': Array(0.14076424, dtype=float32)}


  8%|▊         | 83489/1000000 [3:49:22<37:21:07,  6.82it/s]

{'loss': Array(0.14400749, dtype=float32), 'loss_cross_entropy': Array(0.1357371, dtype=float32)}


  8%|▊         | 83499/1000000 [3:49:23<26:13:13,  9.71it/s]

{'loss': Array(0.15826988, dtype=float32), 'loss_cross_entropy': Array(0.14853328, dtype=float32)}


  8%|▊         | 83509/1000000 [3:49:31<82:56:05,  3.07it/s] 

{'loss': Array(0.16617386, dtype=float32), 'loss_cross_entropy': Array(0.15680222, dtype=float32)}


  8%|▊         | 83519/1000000 [3:49:33<39:04:34,  6.51it/s]

{'loss': Array(0.14194624, dtype=float32), 'loss_cross_entropy': Array(0.13376683, dtype=float32)}


  8%|▊         | 83529/1000000 [3:49:34<26:10:07,  9.73it/s]

{'loss': Array(0.13895099, dtype=float32), 'loss_cross_entropy': Array(0.13106136, dtype=float32)}


  8%|▊         | 83539/1000000 [3:49:36<30:36:12,  8.32it/s]

{'loss': Array(0.15659925, dtype=float32), 'loss_cross_entropy': Array(0.14770037, dtype=float32)}


  8%|▊         | 83549/1000000 [3:49:37<24:48:48, 10.26it/s]

{'loss': Array(0.146618, dtype=float32), 'loss_cross_entropy': Array(0.13854973, dtype=float32)}


  8%|▊         | 83559/1000000 [3:49:39<37:18:41,  6.82it/s]

{'loss': Array(0.15279959, dtype=float32), 'loss_cross_entropy': Array(0.14361945, dtype=float32)}


  8%|▊         | 83569/1000000 [3:49:40<25:46:10,  9.88it/s]

{'loss': Array(0.13018729, dtype=float32), 'loss_cross_entropy': Array(0.12161572, dtype=float32)}


  8%|▊         | 83579/1000000 [3:49:41<24:03:18, 10.58it/s]

{'loss': Array(0.13720992, dtype=float32), 'loss_cross_entropy': Array(0.12906718, dtype=float32)}


  8%|▊         | 83589/1000000 [3:49:43<27:11:31,  9.36it/s]

{'loss': Array(0.16952579, dtype=float32), 'loss_cross_entropy': Array(0.1609448, dtype=float32)}


  8%|▊         | 83599/1000000 [3:49:45<24:16:41, 10.48it/s]

{'loss': Array(0.17105049, dtype=float32), 'loss_cross_entropy': Array(0.16168885, dtype=float32)}


  8%|▊         | 83609/1000000 [3:49:46<37:35:55,  6.77it/s]

{'loss': Array(0.1326686, dtype=float32), 'loss_cross_entropy': Array(0.12516414, dtype=float32)}


  8%|▊         | 83619/1000000 [3:49:48<25:43:50,  9.89it/s]

{'loss': Array(0.158684, dtype=float32), 'loss_cross_entropy': Array(0.14926098, dtype=float32)}


  8%|▊         | 83629/1000000 [3:49:49<24:13:18, 10.51it/s]

{'loss': Array(0.14448877, dtype=float32), 'loss_cross_entropy': Array(0.13642827, dtype=float32)}


  8%|▊         | 83639/1000000 [3:49:51<28:41:35,  8.87it/s]

{'loss': Array(0.15378499, dtype=float32), 'loss_cross_entropy': Array(0.14452386, dtype=float32)}


  8%|▊         | 83649/1000000 [3:49:52<24:04:42, 10.57it/s]

{'loss': Array(0.15682247, dtype=float32), 'loss_cross_entropy': Array(0.14736468, dtype=float32)}


  8%|▊         | 83659/1000000 [3:49:54<33:49:45,  7.52it/s]

{'loss': Array(0.15758619, dtype=float32), 'loss_cross_entropy': Array(0.14796697, dtype=float32)}


  8%|▊         | 83669/1000000 [3:49:55<25:13:54, 10.09it/s]

{'loss': Array(0.15140086, dtype=float32), 'loss_cross_entropy': Array(0.14256206, dtype=float32)}


  8%|▊         | 83679/1000000 [3:49:57<37:22:05,  6.81it/s]

{'loss': Array(0.14600731, dtype=float32), 'loss_cross_entropy': Array(0.13725291, dtype=float32)}


  8%|▊         | 83689/1000000 [3:49:58<26:09:09,  9.73it/s]

{'loss': Array(0.14209433, dtype=float32), 'loss_cross_entropy': Array(0.1338132, dtype=float32)}


  8%|▊         | 83699/1000000 [3:50:00<25:06:36, 10.14it/s]

{'loss': Array(0.16140284, dtype=float32), 'loss_cross_entropy': Array(0.15199961, dtype=float32)}


  8%|▊         | 83709/1000000 [3:50:01<31:49:24,  8.00it/s]

{'loss': Array(0.16879013, dtype=float32), 'loss_cross_entropy': Array(0.15886219, dtype=float32)}


  8%|▊         | 83719/1000000 [3:50:03<26:09:47,  9.73it/s]

{'loss': Array(0.14935844, dtype=float32), 'loss_cross_entropy': Array(0.14018512, dtype=float32)}


  8%|▊         | 83729/1000000 [3:50:05<34:44:25,  7.33it/s]

{'loss': Array(0.16544501, dtype=float32), 'loss_cross_entropy': Array(0.1560298, dtype=float32)}


  8%|▊         | 83739/1000000 [3:50:06<25:45:33,  9.88it/s]

{'loss': Array(0.15109275, dtype=float32), 'loss_cross_entropy': Array(0.14200221, dtype=float32)}


  8%|▊         | 83749/1000000 [3:50:07<25:02:32, 10.16it/s]

{'loss': Array(0.14788692, dtype=float32), 'loss_cross_entropy': Array(0.1390266, dtype=float32)}


  8%|▊         | 83759/1000000 [3:50:09<28:14:11,  9.01it/s]

{'loss': Array(0.15845828, dtype=float32), 'loss_cross_entropy': Array(0.14947711, dtype=float32)}


  8%|▊         | 83769/1000000 [3:50:11<24:55:54, 10.21it/s]

{'loss': Array(0.15755695, dtype=float32), 'loss_cross_entropy': Array(0.14869688, dtype=float32)}


  8%|▊         | 83779/1000000 [3:50:12<36:51:13,  6.91it/s]

{'loss': Array(0.12872395, dtype=float32), 'loss_cross_entropy': Array(0.12033272, dtype=float32)}


  8%|▊         | 83789/1000000 [3:50:14<26:25:17,  9.63it/s]

{'loss': Array(0.15455417, dtype=float32), 'loss_cross_entropy': Array(0.14636394, dtype=float32)}


  8%|▊         | 83799/1000000 [3:50:15<24:22:53, 10.44it/s]

{'loss': Array(0.155266, dtype=float32), 'loss_cross_entropy': Array(0.14636147, dtype=float32)}


  8%|▊         | 83809/1000000 [3:50:17<31:12:52,  8.15it/s]

{'loss': Array(0.13636594, dtype=float32), 'loss_cross_entropy': Array(0.12854883, dtype=float32)}


  8%|▊         | 83819/1000000 [3:50:18<25:12:38, 10.09it/s]

{'loss': Array(0.13362195, dtype=float32), 'loss_cross_entropy': Array(0.12545842, dtype=float32)}


  8%|▊         | 83829/1000000 [3:50:20<33:58:47,  7.49it/s]

{'loss': Array(0.1467413, dtype=float32), 'loss_cross_entropy': Array(0.13846588, dtype=float32)}


  8%|▊         | 83839/1000000 [3:50:21<25:41:47,  9.90it/s]

{'loss': Array(0.17047966, dtype=float32), 'loss_cross_entropy': Array(0.16076525, dtype=float32)}


  8%|▊         | 83849/1000000 [3:50:23<33:24:35,  7.62it/s]

{'loss': Array(0.170029, dtype=float32), 'loss_cross_entropy': Array(0.15975867, dtype=float32)}


  8%|▊         | 83859/1000000 [3:50:24<25:22:50, 10.03it/s]

{'loss': Array(0.13769174, dtype=float32), 'loss_cross_entropy': Array(0.12970917, dtype=float32)}


  8%|▊         | 83869/1000000 [3:50:26<24:15:49, 10.49it/s]

{'loss': Array(0.14465079, dtype=float32), 'loss_cross_entropy': Array(0.13603641, dtype=float32)}


  8%|▊         | 83879/1000000 [3:50:28<29:04:46,  8.75it/s]

{'loss': Array(0.16129299, dtype=float32), 'loss_cross_entropy': Array(0.15172216, dtype=float32)}


  8%|▊         | 83889/1000000 [3:50:29<24:14:07, 10.50it/s]

{'loss': Array(0.14537922, dtype=float32), 'loss_cross_entropy': Array(0.13703373, dtype=float32)}


  8%|▊         | 83899/1000000 [3:50:31<30:12:46,  8.42it/s]

{'loss': Array(0.15596138, dtype=float32), 'loss_cross_entropy': Array(0.14688864, dtype=float32)}


  8%|▊         | 83909/1000000 [3:50:32<24:42:13, 10.30it/s]

{'loss': Array(0.15311871, dtype=float32), 'loss_cross_entropy': Array(0.14466378, dtype=float32)}


  8%|▊         | 83919/1000000 [3:50:34<37:08:51,  6.85it/s]

{'loss': Array(0.15575035, dtype=float32), 'loss_cross_entropy': Array(0.14667259, dtype=float32)}


  8%|▊         | 83929/1000000 [3:50:35<25:38:00,  9.93it/s]

{'loss': Array(0.14720936, dtype=float32), 'loss_cross_entropy': Array(0.13799949, dtype=float32)}


  8%|▊         | 83939/1000000 [3:50:37<38:13:12,  6.66it/s]

{'loss': Array(0.13372551, dtype=float32), 'loss_cross_entropy': Array(0.12548406, dtype=float32)}


  8%|▊         | 83949/1000000 [3:50:38<26:30:47,  9.60it/s]

{'loss': Array(0.16150932, dtype=float32), 'loss_cross_entropy': Array(0.15206353, dtype=float32)}


  8%|▊         | 83959/1000000 [3:50:39<23:55:57, 10.63it/s]

{'loss': Array(0.1358474, dtype=float32), 'loss_cross_entropy': Array(0.12780423, dtype=float32)}


  8%|▊         | 83969/1000000 [3:50:41<33:05:44,  7.69it/s]

{'loss': Array(0.15386038, dtype=float32), 'loss_cross_entropy': Array(0.14498472, dtype=float32)}


  8%|▊         | 83979/1000000 [3:50:43<25:32:08,  9.96it/s]

{'loss': Array(0.14970125, dtype=float32), 'loss_cross_entropy': Array(0.14155087, dtype=float32)}


  8%|▊         | 83989/1000000 [3:50:44<38:02:56,  6.69it/s]

{'loss': Array(0.13637182, dtype=float32), 'loss_cross_entropy': Array(0.12828623, dtype=float32)}


  8%|▊         | 83999/1000000 [3:50:46<26:16:38,  9.68it/s]

{'loss': Array(0.15298909, dtype=float32), 'loss_cross_entropy': Array(0.14374836, dtype=float32)}


  8%|▊         | 84009/1000000 [3:50:54<84:38:12,  3.01it/s] 

{'loss': Array(0.1451609, dtype=float32), 'loss_cross_entropy': Array(0.13706607, dtype=float32)}


  8%|▊         | 84019/1000000 [3:50:55<36:58:29,  6.88it/s] 

{'loss': Array(0.14684774, dtype=float32), 'loss_cross_entropy': Array(0.1373347, dtype=float32)}


  8%|▊         | 84029/1000000 [3:50:57<25:44:57,  9.88it/s]

{'loss': Array(0.14318067, dtype=float32), 'loss_cross_entropy': Array(0.13425194, dtype=float32)}


  8%|▊         | 84039/1000000 [3:50:58<27:38:30,  9.20it/s]

{'loss': Array(0.16387673, dtype=float32), 'loss_cross_entropy': Array(0.15512878, dtype=float32)}


  8%|▊         | 84049/1000000 [3:51:00<25:00:36, 10.17it/s]

{'loss': Array(0.14904018, dtype=float32), 'loss_cross_entropy': Array(0.1402807, dtype=float32)}


  8%|▊         | 84059/1000000 [3:51:02<38:11:38,  6.66it/s]

{'loss': Array(0.15981458, dtype=float32), 'loss_cross_entropy': Array(0.1504623, dtype=float32)}


  8%|▊         | 84069/1000000 [3:51:03<26:33:02,  9.58it/s]

{'loss': Array(0.14799564, dtype=float32), 'loss_cross_entropy': Array(0.13933264, dtype=float32)}


  8%|▊         | 84079/1000000 [3:51:04<24:33:50, 10.36it/s]

{'loss': Array(0.16581732, dtype=float32), 'loss_cross_entropy': Array(0.1565129, dtype=float32)}


  8%|▊         | 84089/1000000 [3:51:06<32:22:15,  7.86it/s]

{'loss': Array(0.14352357, dtype=float32), 'loss_cross_entropy': Array(0.13460922, dtype=float32)}


  8%|▊         | 84099/1000000 [3:51:08<26:06:06,  9.75it/s]

{'loss': Array(0.16166851, dtype=float32), 'loss_cross_entropy': Array(0.15237342, dtype=float32)}


  8%|▊         | 84109/1000000 [3:51:09<39:06:27,  6.51it/s]

{'loss': Array(0.15228772, dtype=float32), 'loss_cross_entropy': Array(0.14265403, dtype=float32)}


  8%|▊         | 84119/1000000 [3:51:11<26:17:11,  9.68it/s]

{'loss': Array(0.13373417, dtype=float32), 'loss_cross_entropy': Array(0.12557591, dtype=float32)}


  8%|▊         | 84129/1000000 [3:51:12<24:17:39, 10.47it/s]

{'loss': Array(0.13887472, dtype=float32), 'loss_cross_entropy': Array(0.13100418, dtype=float32)}


  8%|▊         | 84139/1000000 [3:51:14<28:36:16,  8.89it/s]

{'loss': Array(0.15251018, dtype=float32), 'loss_cross_entropy': Array(0.14328378, dtype=float32)}


  8%|▊         | 84149/1000000 [3:51:15<24:39:26, 10.32it/s]

{'loss': Array(0.15785985, dtype=float32), 'loss_cross_entropy': Array(0.14824769, dtype=float32)}


  8%|▊         | 84159/1000000 [3:51:17<33:50:44,  7.52it/s]

{'loss': Array(0.15626863, dtype=float32), 'loss_cross_entropy': Array(0.147458, dtype=float32)}


  8%|▊         | 84169/1000000 [3:51:18<25:55:43,  9.81it/s]

{'loss': Array(0.1527575, dtype=float32), 'loss_cross_entropy': Array(0.14335681, dtype=float32)}


  8%|▊         | 84179/1000000 [3:51:20<24:22:05, 10.44it/s]

{'loss': Array(0.15319294, dtype=float32), 'loss_cross_entropy': Array(0.14389113, dtype=float32)}


  8%|▊         | 84189/1000000 [3:51:21<31:00:41,  8.20it/s]

{'loss': Array(0.14629956, dtype=float32), 'loss_cross_entropy': Array(0.13757895, dtype=float32)}


  8%|▊         | 84199/1000000 [3:51:23<25:12:46, 10.09it/s]

{'loss': Array(0.14516793, dtype=float32), 'loss_cross_entropy': Array(0.13642639, dtype=float32)}


  8%|▊         | 84209/1000000 [3:51:24<23:53:43, 10.65it/s]

{'loss': Array(0.14052865, dtype=float32), 'loss_cross_entropy': Array(0.13241951, dtype=float32)}


  8%|▊         | 84219/1000000 [3:51:26<27:03:21,  9.40it/s]

{'loss': Array(0.17786932, dtype=float32), 'loss_cross_entropy': Array(0.16783027, dtype=float32)}


  8%|▊         | 84229/1000000 [3:51:27<24:44:25, 10.28it/s]

{'loss': Array(0.16713588, dtype=float32), 'loss_cross_entropy': Array(0.1576899, dtype=float32)}


  8%|▊         | 84239/1000000 [3:51:29<30:35:28,  8.32it/s]

{'loss': Array(0.1527135, dtype=float32), 'loss_cross_entropy': Array(0.14422606, dtype=float32)}


  8%|▊         | 84249/1000000 [3:51:30<24:58:52, 10.18it/s]

{'loss': Array(0.15288308, dtype=float32), 'loss_cross_entropy': Array(0.14405946, dtype=float32)}


  8%|▊         | 84259/1000000 [3:51:32<37:25:03,  6.80it/s]

{'loss': Array(0.14979719, dtype=float32), 'loss_cross_entropy': Array(0.13998292, dtype=float32)}


  8%|▊         | 84269/1000000 [3:51:34<26:07:01,  9.74it/s]

{'loss': Array(0.1405087, dtype=float32), 'loss_cross_entropy': Array(0.13282351, dtype=float32)}


  8%|▊         | 84279/1000000 [3:51:35<38:13:01,  6.66it/s]

{'loss': Array(0.1349756, dtype=float32), 'loss_cross_entropy': Array(0.12654696, dtype=float32)}


  8%|▊         | 84289/1000000 [3:51:37<26:28:25,  9.61it/s]

{'loss': Array(0.14672944, dtype=float32), 'loss_cross_entropy': Array(0.13840583, dtype=float32)}


  8%|▊         | 84299/1000000 [3:51:38<24:02:58, 10.58it/s]

{'loss': Array(0.14554156, dtype=float32), 'loss_cross_entropy': Array(0.1369574, dtype=float32)}


  8%|▊         | 84309/1000000 [3:51:40<33:22:51,  7.62it/s]

{'loss': Array(0.14455573, dtype=float32), 'loss_cross_entropy': Array(0.13603757, dtype=float32)}


  8%|▊         | 84319/1000000 [3:51:41<24:59:23, 10.18it/s]

{'loss': Array(0.1479707, dtype=float32), 'loss_cross_entropy': Array(0.13899718, dtype=float32)}


  8%|▊         | 84329/1000000 [3:51:43<37:26:11,  6.79it/s]

{'loss': Array(0.17104605, dtype=float32), 'loss_cross_entropy': Array(0.16155797, dtype=float32)}


  8%|▊         | 84339/1000000 [3:51:44<25:43:37,  9.89it/s]

{'loss': Array(0.14167845, dtype=float32), 'loss_cross_entropy': Array(0.13324134, dtype=float32)}


  8%|▊         | 84349/1000000 [3:51:46<23:47:18, 10.69it/s]

{'loss': Array(0.16438888, dtype=float32), 'loss_cross_entropy': Array(0.1545463, dtype=float32)}


  8%|▊         | 84359/1000000 [3:51:47<27:22:44,  9.29it/s]

{'loss': Array(0.16649607, dtype=float32), 'loss_cross_entropy': Array(0.15690109, dtype=float32)}


  8%|▊         | 84369/1000000 [3:51:49<23:55:52, 10.63it/s]

{'loss': Array(0.15470135, dtype=float32), 'loss_cross_entropy': Array(0.14554365, dtype=float32)}


  8%|▊         | 84379/1000000 [3:51:50<27:35:01,  9.22it/s]

{'loss': Array(0.14695396, dtype=float32), 'loss_cross_entropy': Array(0.13819799, dtype=float32)}


  8%|▊         | 84389/1000000 [3:51:52<24:02:50, 10.58it/s]

{'loss': Array(0.14407879, dtype=float32), 'loss_cross_entropy': Array(0.13551061, dtype=float32)}


  8%|▊         | 84399/1000000 [3:51:53<37:11:32,  6.84it/s]

{'loss': Array(0.15451229, dtype=float32), 'loss_cross_entropy': Array(0.14495735, dtype=float32)}


  8%|▊         | 84409/1000000 [3:51:55<25:58:15,  9.79it/s]

{'loss': Array(0.15314703, dtype=float32), 'loss_cross_entropy': Array(0.14383268, dtype=float32)}


  8%|▊         | 84419/1000000 [3:51:56<24:44:33, 10.28it/s]

{'loss': Array(0.15832731, dtype=float32), 'loss_cross_entropy': Array(0.14943098, dtype=float32)}


  8%|▊         | 84429/1000000 [3:51:58<31:08:44,  8.17it/s]

{'loss': Array(0.16520701, dtype=float32), 'loss_cross_entropy': Array(0.15595274, dtype=float32)}


  8%|▊         | 84439/1000000 [3:51:59<25:43:43,  9.88it/s]

{'loss': Array(0.16093384, dtype=float32), 'loss_cross_entropy': Array(0.15158898, dtype=float32)}


  8%|▊         | 84449/1000000 [3:52:01<38:25:15,  6.62it/s]

{'loss': Array(0.14674704, dtype=float32), 'loss_cross_entropy': Array(0.13759199, dtype=float32)}


  8%|▊         | 84459/1000000 [3:52:03<27:07:02,  9.38it/s]

{'loss': Array(0.15392555, dtype=float32), 'loss_cross_entropy': Array(0.144958, dtype=float32)}


  8%|▊         | 84469/1000000 [3:52:04<24:58:51, 10.18it/s]

{'loss': Array(0.15367484, dtype=float32), 'loss_cross_entropy': Array(0.14469337, dtype=float32)}


  8%|▊         | 84479/1000000 [3:52:06<29:17:06,  8.68it/s]

{'loss': Array(0.17556182, dtype=float32), 'loss_cross_entropy': Array(0.16657446, dtype=float32)}


  8%|▊         | 84489/1000000 [3:52:07<25:03:40, 10.15it/s]

{'loss': Array(0.14668863, dtype=float32), 'loss_cross_entropy': Array(0.13810733, dtype=float32)}


  8%|▊         | 84499/1000000 [3:52:09<34:20:03,  7.41it/s]

{'loss': Array(0.15393762, dtype=float32), 'loss_cross_entropy': Array(0.14484513, dtype=float32)}


  8%|▊         | 84509/1000000 [3:52:17<85:12:32,  2.98it/s] 

{'loss': Array(0.14690714, dtype=float32), 'loss_cross_entropy': Array(0.13819659, dtype=float32)}


  8%|▊         | 84519/1000000 [3:52:19<43:34:24,  5.84it/s]

{'loss': Array(0.13709863, dtype=float32), 'loss_cross_entropy': Array(0.12952195, dtype=float32)}


  8%|▊         | 84529/1000000 [3:52:20<27:05:48,  9.38it/s]

{'loss': Array(0.15497935, dtype=float32), 'loss_cross_entropy': Array(0.14584175, dtype=float32)}


  8%|▊         | 84539/1000000 [3:52:21<24:18:49, 10.46it/s]

{'loss': Array(0.15112555, dtype=float32), 'loss_cross_entropy': Array(0.14196964, dtype=float32)}


  8%|▊         | 84549/1000000 [3:52:23<30:27:59,  8.35it/s]

{'loss': Array(0.15033269, dtype=float32), 'loss_cross_entropy': Array(0.14180724, dtype=float32)}


  8%|▊         | 84559/1000000 [3:52:24<24:26:38, 10.40it/s]

{'loss': Array(0.13981615, dtype=float32), 'loss_cross_entropy': Array(0.13190292, dtype=float32)}


  8%|▊         | 84569/1000000 [3:52:26<23:30:24, 10.82it/s]

{'loss': Array(0.14831948, dtype=float32), 'loss_cross_entropy': Array(0.13983743, dtype=float32)}


  8%|▊         | 84579/1000000 [3:52:27<26:45:40,  9.50it/s]

{'loss': Array(0.1545836, dtype=float32), 'loss_cross_entropy': Array(0.14513014, dtype=float32)}


  8%|▊         | 84589/1000000 [3:52:29<23:55:06, 10.63it/s]

{'loss': Array(0.14849769, dtype=float32), 'loss_cross_entropy': Array(0.1401204, dtype=float32)}


  8%|▊         | 84599/1000000 [3:52:30<31:18:51,  8.12it/s]

{'loss': Array(0.1453529, dtype=float32), 'loss_cross_entropy': Array(0.13698898, dtype=float32)}


  8%|▊         | 84609/1000000 [3:52:32<24:45:58, 10.27it/s]

{'loss': Array(0.148206, dtype=float32), 'loss_cross_entropy': Array(0.13932894, dtype=float32)}


  8%|▊         | 84619/1000000 [3:52:33<32:47:06,  7.76it/s]

{'loss': Array(0.15061876, dtype=float32), 'loss_cross_entropy': Array(0.14219695, dtype=float32)}


  8%|▊         | 84629/1000000 [3:52:35<24:59:52, 10.17it/s]

{'loss': Array(0.1409489, dtype=float32), 'loss_cross_entropy': Array(0.13249752, dtype=float32)}


  8%|▊         | 84639/1000000 [3:52:36<23:48:15, 10.68it/s]

{'loss': Array(0.16973607, dtype=float32), 'loss_cross_entropy': Array(0.16033843, dtype=float32)}


  8%|▊         | 84649/1000000 [3:52:38<30:36:29,  8.31it/s]

{'loss': Array(0.14063023, dtype=float32), 'loss_cross_entropy': Array(0.1319657, dtype=float32)}


  8%|▊         | 84659/1000000 [3:52:39<25:18:23, 10.05it/s]

{'loss': Array(0.15417174, dtype=float32), 'loss_cross_entropy': Array(0.14516498, dtype=float32)}


  8%|▊         | 84669/1000000 [3:52:41<24:20:38, 10.44it/s]

{'loss': Array(0.15095341, dtype=float32), 'loss_cross_entropy': Array(0.14278854, dtype=float32)}


  8%|▊         | 84679/1000000 [3:52:43<27:36:56,  9.21it/s]

{'loss': Array(0.14233774, dtype=float32), 'loss_cross_entropy': Array(0.13417266, dtype=float32)}


  8%|▊         | 84689/1000000 [3:52:44<24:05:39, 10.55it/s]

{'loss': Array(0.1668656, dtype=float32), 'loss_cross_entropy': Array(0.15759903, dtype=float32)}


  8%|▊         | 84699/1000000 [3:52:46<30:16:20,  8.40it/s]

{'loss': Array(0.15702651, dtype=float32), 'loss_cross_entropy': Array(0.1476829, dtype=float32)}


  8%|▊         | 84709/1000000 [3:52:47<25:07:53, 10.12it/s]

{'loss': Array(0.15439783, dtype=float32), 'loss_cross_entropy': Array(0.14519629, dtype=float32)}


  8%|▊         | 84719/1000000 [3:52:49<37:38:04,  6.76it/s]

{'loss': Array(0.15335107, dtype=float32), 'loss_cross_entropy': Array(0.14413598, dtype=float32)}


  8%|▊         | 84729/1000000 [3:52:50<25:52:08,  9.83it/s]

{'loss': Array(0.13684998, dtype=float32), 'loss_cross_entropy': Array(0.12903595, dtype=float32)}


  8%|▊         | 84739/1000000 [3:52:51<24:06:45, 10.54it/s]

{'loss': Array(0.1552546, dtype=float32), 'loss_cross_entropy': Array(0.14643416, dtype=float32)}


  8%|▊         | 84749/1000000 [3:52:53<33:40:15,  7.55it/s]

{'loss': Array(0.16351645, dtype=float32), 'loss_cross_entropy': Array(0.15452306, dtype=float32)}


  8%|▊         | 84759/1000000 [3:52:55<25:13:10, 10.08it/s]

{'loss': Array(0.1594736, dtype=float32), 'loss_cross_entropy': Array(0.15029624, dtype=float32)}


  8%|▊         | 84769/1000000 [3:52:56<23:50:44, 10.66it/s]

{'loss': Array(0.1629703, dtype=float32), 'loss_cross_entropy': Array(0.15384473, dtype=float32)}


  8%|▊         | 84779/1000000 [3:52:58<28:34:24,  8.90it/s]

{'loss': Array(0.14158908, dtype=float32), 'loss_cross_entropy': Array(0.13337639, dtype=float32)}


  8%|▊         | 84789/1000000 [3:52:59<25:56:53,  9.80it/s]

{'loss': Array(0.15557824, dtype=float32), 'loss_cross_entropy': Array(0.147043, dtype=float32)}


  8%|▊         | 84799/1000000 [3:53:01<31:59:01,  7.95it/s]

{'loss': Array(0.16699612, dtype=float32), 'loss_cross_entropy': Array(0.15733229, dtype=float32)}


  8%|▊         | 84809/1000000 [3:53:02<26:30:59,  9.59it/s]

{'loss': Array(0.13746898, dtype=float32), 'loss_cross_entropy': Array(0.12911697, dtype=float32)}


  8%|▊         | 84819/1000000 [3:53:04<25:44:46,  9.87it/s]

{'loss': Array(0.1478439, dtype=float32), 'loss_cross_entropy': Array(0.13966355, dtype=float32)}


  8%|▊         | 84829/1000000 [3:53:05<28:04:18,  9.06it/s]

{'loss': Array(0.1509089, dtype=float32), 'loss_cross_entropy': Array(0.14257018, dtype=float32)}


  8%|▊         | 84839/1000000 [3:53:07<25:13:24, 10.08it/s]

{'loss': Array(0.15327355, dtype=float32), 'loss_cross_entropy': Array(0.14438267, dtype=float32)}


  8%|▊         | 84849/1000000 [3:53:09<34:52:03,  7.29it/s]

{'loss': Array(0.15290205, dtype=float32), 'loss_cross_entropy': Array(0.14395188, dtype=float32)}


  8%|▊         | 84859/1000000 [3:53:10<25:16:18, 10.06it/s]

{'loss': Array(0.1536676, dtype=float32), 'loss_cross_entropy': Array(0.14495942, dtype=float32)}


  8%|▊         | 84869/1000000 [3:53:11<23:44:57, 10.70it/s]

{'loss': Array(0.16664813, dtype=float32), 'loss_cross_entropy': Array(0.15707925, dtype=float32)}


  8%|▊         | 84879/1000000 [3:53:13<27:27:47,  9.26it/s]

{'loss': Array(0.15758036, dtype=float32), 'loss_cross_entropy': Array(0.14842188, dtype=float32)}


  8%|▊         | 84889/1000000 [3:53:15<24:38:04, 10.32it/s]

{'loss': Array(0.14200325, dtype=float32), 'loss_cross_entropy': Array(0.13416979, dtype=float32)}


  8%|▊         | 84899/1000000 [3:53:16<28:44:16,  8.85it/s]

{'loss': Array(0.1552865, dtype=float32), 'loss_cross_entropy': Array(0.14661412, dtype=float32)}


  8%|▊         | 84909/1000000 [3:53:18<25:00:13, 10.17it/s]

{'loss': Array(0.14528303, dtype=float32), 'loss_cross_entropy': Array(0.13699317, dtype=float32)}


  8%|▊         | 84919/1000000 [3:53:19<37:14:37,  6.83it/s]

{'loss': Array(0.16003141, dtype=float32), 'loss_cross_entropy': Array(0.15083921, dtype=float32)}


  8%|▊         | 84929/1000000 [3:53:21<25:48:58,  9.85it/s]

{'loss': Array(0.16313405, dtype=float32), 'loss_cross_entropy': Array(0.15365617, dtype=float32)}


  8%|▊         | 84939/1000000 [3:53:22<23:24:12, 10.86it/s]

{'loss': Array(0.13744323, dtype=float32), 'loss_cross_entropy': Array(0.12971179, dtype=float32)}


  8%|▊         | 84949/1000000 [3:53:24<29:56:36,  8.49it/s]

{'loss': Array(0.14844836, dtype=float32), 'loss_cross_entropy': Array(0.1392787, dtype=float32)}


  8%|▊         | 84959/1000000 [3:53:25<24:38:21, 10.32it/s]

{'loss': Array(0.13308686, dtype=float32), 'loss_cross_entropy': Array(0.12533414, dtype=float32)}


  8%|▊         | 84969/1000000 [3:53:27<33:17:05,  7.64it/s]

{'loss': Array(0.13809672, dtype=float32), 'loss_cross_entropy': Array(0.1301315, dtype=float32)}


  8%|▊         | 84979/1000000 [3:53:28<25:09:02, 10.11it/s]

{'loss': Array(0.16566263, dtype=float32), 'loss_cross_entropy': Array(0.15680394, dtype=float32)}


  8%|▊         | 84989/1000000 [3:53:30<33:38:39,  7.55it/s]

{'loss': Array(0.14896396, dtype=float32), 'loss_cross_entropy': Array(0.13961741, dtype=float32)}


  8%|▊         | 84999/1000000 [3:53:31<26:03:46,  9.75it/s]

{'loss': Array(0.14495386, dtype=float32), 'loss_cross_entropy': Array(0.13594799, dtype=float32)}


  9%|▊         | 85009/1000000 [3:53:39<83:28:35,  3.04it/s] 

{'loss': Array(0.15743881, dtype=float32), 'loss_cross_entropy': Array(0.14855535, dtype=float32)}


  9%|▊         | 85019/1000000 [3:53:41<38:18:36,  6.63it/s]

{'loss': Array(0.15476637, dtype=float32), 'loss_cross_entropy': Array(0.1461747, dtype=float32)}


  9%|▊         | 85029/1000000 [3:53:42<25:58:01,  9.79it/s]

{'loss': Array(0.14740783, dtype=float32), 'loss_cross_entropy': Array(0.13950102, dtype=float32)}


  9%|▊         | 85039/1000000 [3:53:44<33:59:24,  7.48it/s]

{'loss': Array(0.15508384, dtype=float32), 'loss_cross_entropy': Array(0.14673123, dtype=float32)}


  9%|▊         | 85049/1000000 [3:53:45<25:24:40, 10.00it/s]

{'loss': Array(0.16805808, dtype=float32), 'loss_cross_entropy': Array(0.1583391, dtype=float32)}


  9%|▊         | 85059/1000000 [3:53:47<37:44:00,  6.74it/s]

{'loss': Array(0.16582398, dtype=float32), 'loss_cross_entropy': Array(0.1561067, dtype=float32)}


  9%|▊         | 85069/1000000 [3:53:48<25:38:09,  9.91it/s]

{'loss': Array(0.1397986, dtype=float32), 'loss_cross_entropy': Array(0.13184492, dtype=float32)}


  9%|▊         | 85079/1000000 [3:53:50<23:45:47, 10.69it/s]

{'loss': Array(0.14888868, dtype=float32), 'loss_cross_entropy': Array(0.1407355, dtype=float32)}


  9%|▊         | 85089/1000000 [3:53:51<30:09:32,  8.43it/s]

{'loss': Array(0.13813834, dtype=float32), 'loss_cross_entropy': Array(0.13026415, dtype=float32)}


  9%|▊         | 85099/1000000 [3:53:53<24:56:12, 10.19it/s]

{'loss': Array(0.14943588, dtype=float32), 'loss_cross_entropy': Array(0.14124937, dtype=float32)}


  9%|▊         | 85109/1000000 [3:53:54<33:23:28,  7.61it/s]

{'loss': Array(0.16037671, dtype=float32), 'loss_cross_entropy': Array(0.15143171, dtype=float32)}


  9%|▊         | 85119/1000000 [3:53:56<25:10:33, 10.09it/s]

{'loss': Array(0.15049072, dtype=float32), 'loss_cross_entropy': Array(0.14163072, dtype=float32)}


  9%|▊         | 85129/1000000 [3:53:57<24:23:56, 10.42it/s]

{'loss': Array(0.15103953, dtype=float32), 'loss_cross_entropy': Array(0.1416233, dtype=float32)}


  9%|▊         | 85139/1000000 [3:53:59<27:58:50,  9.08it/s]

{'loss': Array(0.15990236, dtype=float32), 'loss_cross_entropy': Array(0.1509638, dtype=float32)}


  9%|▊         | 85149/1000000 [3:54:00<25:31:35,  9.96it/s]

{'loss': Array(0.16598228, dtype=float32), 'loss_cross_entropy': Array(0.15632951, dtype=float32)}


  9%|▊         | 85159/1000000 [3:54:02<38:42:14,  6.57it/s]

{'loss': Array(0.14279516, dtype=float32), 'loss_cross_entropy': Array(0.13405176, dtype=float32)}


  9%|▊         | 85169/1000000 [3:54:04<27:32:08,  9.23it/s]

{'loss': Array(0.15506394, dtype=float32), 'loss_cross_entropy': Array(0.14637376, dtype=float32)}


  9%|▊         | 85179/1000000 [3:54:05<25:27:18,  9.98it/s]

{'loss': Array(0.16119142, dtype=float32), 'loss_cross_entropy': Array(0.15192059, dtype=float32)}


  9%|▊         | 85189/1000000 [3:54:07<31:47:17,  7.99it/s]

{'loss': Array(0.142731, dtype=float32), 'loss_cross_entropy': Array(0.13509761, dtype=float32)}


  9%|▊         | 85199/1000000 [3:54:08<25:58:02,  9.79it/s]

{'loss': Array(0.13576728, dtype=float32), 'loss_cross_entropy': Array(0.12793389, dtype=float32)}


  9%|▊         | 85209/1000000 [3:54:10<34:17:05,  7.41it/s]

{'loss': Array(0.15315048, dtype=float32), 'loss_cross_entropy': Array(0.14359221, dtype=float32)}


  9%|▊         | 85219/1000000 [3:54:11<25:00:08, 10.16it/s]

{'loss': Array(0.14953546, dtype=float32), 'loss_cross_entropy': Array(0.1402297, dtype=float32)}


  9%|▊         | 85229/1000000 [3:54:13<33:26:06,  7.60it/s]

{'loss': Array(0.14909771, dtype=float32), 'loss_cross_entropy': Array(0.1408201, dtype=float32)}


  9%|▊         | 85239/1000000 [3:54:14<27:00:03,  9.41it/s]

{'loss': Array(0.14522964, dtype=float32), 'loss_cross_entropy': Array(0.13723326, dtype=float32)}


  9%|▊         | 85249/1000000 [3:54:16<24:02:10, 10.57it/s]

{'loss': Array(0.150001, dtype=float32), 'loss_cross_entropy': Array(0.14113174, dtype=float32)}


  9%|▊         | 85259/1000000 [3:54:18<28:41:11,  8.86it/s]

{'loss': Array(0.14977014, dtype=float32), 'loss_cross_entropy': Array(0.1408311, dtype=float32)}


  9%|▊         | 85269/1000000 [3:54:19<24:47:32, 10.25it/s]

{'loss': Array(0.13615112, dtype=float32), 'loss_cross_entropy': Array(0.12813681, dtype=float32)}


  9%|▊         | 85279/1000000 [3:54:21<30:27:05,  8.34it/s]

{'loss': Array(0.15273166, dtype=float32), 'loss_cross_entropy': Array(0.14377737, dtype=float32)}


  9%|▊         | 85289/1000000 [3:54:22<25:00:12, 10.16it/s]

{'loss': Array(0.1385082, dtype=float32), 'loss_cross_entropy': Array(0.13040423, dtype=float32)}


  9%|▊         | 85299/1000000 [3:54:24<37:52:29,  6.71it/s]

{'loss': Array(0.12976919, dtype=float32), 'loss_cross_entropy': Array(0.12166567, dtype=float32)}


  9%|▊         | 85309/1000000 [3:54:25<27:02:13,  9.40it/s]

{'loss': Array(0.13339074, dtype=float32), 'loss_cross_entropy': Array(0.12587295, dtype=float32)}


  9%|▊         | 85319/1000000 [3:54:27<37:49:53,  6.72it/s]

{'loss': Array(0.13712446, dtype=float32), 'loss_cross_entropy': Array(0.12963164, dtype=float32)}


  9%|▊         | 85329/1000000 [3:54:28<26:00:21,  9.77it/s]

{'loss': Array(0.15713115, dtype=float32), 'loss_cross_entropy': Array(0.14791228, dtype=float32)}


  9%|▊         | 85339/1000000 [3:54:30<23:44:05, 10.70it/s]

{'loss': Array(0.14271645, dtype=float32), 'loss_cross_entropy': Array(0.13369662, dtype=float32)}


  9%|▊         | 85349/1000000 [3:54:31<32:59:07,  7.70it/s]

{'loss': Array(0.1406439, dtype=float32), 'loss_cross_entropy': Array(0.13201182, dtype=float32)}


  9%|▊         | 85359/1000000 [3:54:33<24:57:27, 10.18it/s]

{'loss': Array(0.14578238, dtype=float32), 'loss_cross_entropy': Array(0.13725385, dtype=float32)}


  9%|▊         | 85369/1000000 [3:54:34<37:24:33,  6.79it/s]

{'loss': Array(0.13508897, dtype=float32), 'loss_cross_entropy': Array(0.12707286, dtype=float32)}


  9%|▊         | 85379/1000000 [3:54:36<25:52:46,  9.82it/s]

{'loss': Array(0.16161631, dtype=float32), 'loss_cross_entropy': Array(0.15240054, dtype=float32)}


  9%|▊         | 85389/1000000 [3:54:37<23:40:20, 10.73it/s]

{'loss': Array(0.16015136, dtype=float32), 'loss_cross_entropy': Array(0.1511739, dtype=float32)}


  9%|▊         | 85399/1000000 [3:54:39<26:43:36,  9.51it/s]

{'loss': Array(0.15675257, dtype=float32), 'loss_cross_entropy': Array(0.14803189, dtype=float32)}


  9%|▊         | 85409/1000000 [3:54:40<23:44:30, 10.70it/s]

{'loss': Array(0.16464864, dtype=float32), 'loss_cross_entropy': Array(0.15516447, dtype=float32)}


  9%|▊         | 85419/1000000 [3:54:42<26:46:08,  9.49it/s]

{'loss': Array(0.14807688, dtype=float32), 'loss_cross_entropy': Array(0.13988864, dtype=float32)}


  9%|▊         | 85429/1000000 [3:54:43<24:01:40, 10.57it/s]

{'loss': Array(0.1425102, dtype=float32), 'loss_cross_entropy': Array(0.13353361, dtype=float32)}


  9%|▊         | 85439/1000000 [3:54:45<36:44:31,  6.91it/s]

{'loss': Array(0.15509598, dtype=float32), 'loss_cross_entropy': Array(0.14660756, dtype=float32)}


  9%|▊         | 85449/1000000 [3:54:46<25:26:20,  9.99it/s]

{'loss': Array(0.16175096, dtype=float32), 'loss_cross_entropy': Array(0.15271011, dtype=float32)}


  9%|▊         | 85459/1000000 [3:54:47<24:06:13, 10.54it/s]

{'loss': Array(0.15171914, dtype=float32), 'loss_cross_entropy': Array(0.14295514, dtype=float32)}


  9%|▊         | 85469/1000000 [3:54:49<30:40:27,  8.28it/s]

{'loss': Array(0.14036307, dtype=float32), 'loss_cross_entropy': Array(0.13260804, dtype=float32)}


  9%|▊         | 85479/1000000 [3:54:51<24:21:57, 10.43it/s]

{'loss': Array(0.16752408, dtype=float32), 'loss_cross_entropy': Array(0.15869015, dtype=float32)}


  9%|▊         | 85489/1000000 [3:54:52<38:05:38,  6.67it/s]

{'loss': Array(0.16436215, dtype=float32), 'loss_cross_entropy': Array(0.1549937, dtype=float32)}


  9%|▊         | 85499/1000000 [3:54:54<25:46:45,  9.85it/s]

{'loss': Array(0.15374455, dtype=float32), 'loss_cross_entropy': Array(0.14528146, dtype=float32)}


  9%|▊         | 85509/1000000 [3:55:01<84:23:58,  3.01it/s] 

{'loss': Array(0.14551719, dtype=float32), 'loss_cross_entropy': Array(0.13717477, dtype=float32)}


  9%|▊         | 85519/1000000 [3:55:03<38:42:40,  6.56it/s]

{'loss': Array(0.1707195, dtype=float32), 'loss_cross_entropy': Array(0.16136014, dtype=float32)}


  9%|▊         | 85529/1000000 [3:55:05<26:48:13,  9.48it/s]

{'loss': Array(0.1344241, dtype=float32), 'loss_cross_entropy': Array(0.1261769, dtype=float32)}


  9%|▊         | 85539/1000000 [3:55:06<32:28:20,  7.82it/s]

{'loss': Array(0.1489585, dtype=float32), 'loss_cross_entropy': Array(0.14041136, dtype=float32)}


  9%|▊         | 85549/1000000 [3:55:08<26:40:55,  9.52it/s]

{'loss': Array(0.16034204, dtype=float32), 'loss_cross_entropy': Array(0.15140884, dtype=float32)}


  9%|▊         | 85559/1000000 [3:55:10<39:22:47,  6.45it/s]

{'loss': Array(0.13477482, dtype=float32), 'loss_cross_entropy': Array(0.12622482, dtype=float32)}


  9%|▊         | 85569/1000000 [3:55:11<26:17:14,  9.66it/s]

{'loss': Array(0.15673657, dtype=float32), 'loss_cross_entropy': Array(0.14824869, dtype=float32)}


  9%|▊         | 85579/1000000 [3:55:12<23:51:59, 10.64it/s]

{'loss': Array(0.14981174, dtype=float32), 'loss_cross_entropy': Array(0.14124738, dtype=float32)}


  9%|▊         | 85589/1000000 [3:55:14<26:52:18,  9.45it/s]

{'loss': Array(0.15718336, dtype=float32), 'loss_cross_entropy': Array(0.14813605, dtype=float32)}


  9%|▊         | 85599/1000000 [3:55:15<24:18:15, 10.45it/s]

{'loss': Array(0.15289068, dtype=float32), 'loss_cross_entropy': Array(0.1440181, dtype=float32)}


  9%|▊         | 85609/1000000 [3:55:17<37:40:02,  6.74it/s]

{'loss': Array(0.1448775, dtype=float32), 'loss_cross_entropy': Array(0.1359482, dtype=float32)}


  9%|▊         | 85619/1000000 [3:55:19<25:49:18,  9.84it/s]

{'loss': Array(0.16154452, dtype=float32), 'loss_cross_entropy': Array(0.15248327, dtype=float32)}


  9%|▊         | 85629/1000000 [3:55:20<23:36:53, 10.76it/s]

{'loss': Array(0.14378388, dtype=float32), 'loss_cross_entropy': Array(0.13539387, dtype=float32)}


  9%|▊         | 85639/1000000 [3:55:22<28:11:57,  9.01it/s]

{'loss': Array(0.13860516, dtype=float32), 'loss_cross_entropy': Array(0.13027124, dtype=float32)}


  9%|▊         | 85649/1000000 [3:55:23<24:33:46, 10.34it/s]

{'loss': Array(0.13476042, dtype=float32), 'loss_cross_entropy': Array(0.12702106, dtype=float32)}


  9%|▊         | 85659/1000000 [3:55:25<33:46:12,  7.52it/s]

{'loss': Array(0.13527633, dtype=float32), 'loss_cross_entropy': Array(0.12695168, dtype=float32)}


  9%|▊         | 85669/1000000 [3:55:26<25:34:02,  9.93it/s]

{'loss': Array(0.12408362, dtype=float32), 'loss_cross_entropy': Array(0.11599763, dtype=float32)}


  9%|▊         | 85679/1000000 [3:55:28<37:38:50,  6.75it/s]

{'loss': Array(0.15715502, dtype=float32), 'loss_cross_entropy': Array(0.1483213, dtype=float32)}


  9%|▊         | 85689/1000000 [3:55:29<26:46:02,  9.49it/s]

{'loss': Array(0.14876394, dtype=float32), 'loss_cross_entropy': Array(0.1401169, dtype=float32)}


  9%|▊         | 85699/1000000 [3:55:31<24:41:48, 10.28it/s]

{'loss': Array(0.1458143, dtype=float32), 'loss_cross_entropy': Array(0.13732351, dtype=float32)}


  9%|▊         | 85709/1000000 [3:55:32<30:22:32,  8.36it/s]

{'loss': Array(0.13616638, dtype=float32), 'loss_cross_entropy': Array(0.12772144, dtype=float32)}


  9%|▊         | 85719/1000000 [3:55:34<24:31:16, 10.36it/s]

{'loss': Array(0.13981964, dtype=float32), 'loss_cross_entropy': Array(0.13154356, dtype=float32)}


  9%|▊         | 85729/1000000 [3:55:35<33:24:38,  7.60it/s]

{'loss': Array(0.13018104, dtype=float32), 'loss_cross_entropy': Array(0.12224908, dtype=float32)}


  9%|▊         | 85739/1000000 [3:55:37<24:37:57, 10.31it/s]

{'loss': Array(0.1474388, dtype=float32), 'loss_cross_entropy': Array(0.13889354, dtype=float32)}


  9%|▊         | 85749/1000000 [3:55:38<23:42:18, 10.71it/s]

{'loss': Array(0.1438366, dtype=float32), 'loss_cross_entropy': Array(0.1357526, dtype=float32)}


  9%|▊         | 85759/1000000 [3:55:40<27:24:37,  9.26it/s]

{'loss': Array(0.14658807, dtype=float32), 'loss_cross_entropy': Array(0.13781825, dtype=float32)}


  9%|▊         | 85769/1000000 [3:55:41<25:00:01, 10.16it/s]

{'loss': Array(0.14737657, dtype=float32), 'loss_cross_entropy': Array(0.13897517, dtype=float32)}


  9%|▊         | 85779/1000000 [3:55:43<37:00:52,  6.86it/s]

{'loss': Array(0.14034763, dtype=float32), 'loss_cross_entropy': Array(0.13220493, dtype=float32)}


  9%|▊         | 85789/1000000 [3:55:44<25:47:59,  9.84it/s]

{'loss': Array(0.15766612, dtype=float32), 'loss_cross_entropy': Array(0.1486232, dtype=float32)}


  9%|▊         | 85799/1000000 [3:55:46<23:40:32, 10.73it/s]

{'loss': Array(0.14691763, dtype=float32), 'loss_cross_entropy': Array(0.13831785, dtype=float32)}


  9%|▊         | 85809/1000000 [3:55:47<30:40:20,  8.28it/s]

{'loss': Array(0.14570166, dtype=float32), 'loss_cross_entropy': Array(0.13727503, dtype=float32)}


  9%|▊         | 85819/1000000 [3:55:49<24:33:04, 10.34it/s]

{'loss': Array(0.1421725, dtype=float32), 'loss_cross_entropy': Array(0.13431536, dtype=float32)}


  9%|▊         | 85829/1000000 [3:55:50<33:17:50,  7.63it/s]

{'loss': Array(0.15658107, dtype=float32), 'loss_cross_entropy': Array(0.14753489, dtype=float32)}


  9%|▊         | 85839/1000000 [3:55:52<25:23:37, 10.00it/s]

{'loss': Array(0.1574661, dtype=float32), 'loss_cross_entropy': Array(0.14858532, dtype=float32)}


  9%|▊         | 85849/1000000 [3:55:54<33:43:58,  7.53it/s]

{'loss': Array(0.14091477, dtype=float32), 'loss_cross_entropy': Array(0.13237517, dtype=float32)}


  9%|▊         | 85859/1000000 [3:55:55<25:04:14, 10.13it/s]

{'loss': Array(0.15794855, dtype=float32), 'loss_cross_entropy': Array(0.14854644, dtype=float32)}


  9%|▊         | 85869/1000000 [3:55:56<23:24:45, 10.85it/s]

{'loss': Array(0.12512715, dtype=float32), 'loss_cross_entropy': Array(0.11800299, dtype=float32)}


  9%|▊         | 85879/1000000 [3:55:58<28:11:33,  9.01it/s]

{'loss': Array(0.15198384, dtype=float32), 'loss_cross_entropy': Array(0.14316371, dtype=float32)}


  9%|▊         | 85889/1000000 [3:55:59<24:47:21, 10.24it/s]

{'loss': Array(0.1590038, dtype=float32), 'loss_cross_entropy': Array(0.14944203, dtype=float32)}


  9%|▊         | 85899/1000000 [3:56:01<31:05:52,  8.17it/s]

{'loss': Array(0.15185618, dtype=float32), 'loss_cross_entropy': Array(0.1431307, dtype=float32)}


  9%|▊         | 85909/1000000 [3:56:03<26:18:17,  9.65it/s]

{'loss': Array(0.13966143, dtype=float32), 'loss_cross_entropy': Array(0.13133232, dtype=float32)}


  9%|▊         | 85919/1000000 [3:56:04<38:46:14,  6.55it/s]

{'loss': Array(0.14910533, dtype=float32), 'loss_cross_entropy': Array(0.14075184, dtype=float32)}


  9%|▊         | 85929/1000000 [3:56:06<27:38:54,  9.18it/s]

{'loss': Array(0.16684511, dtype=float32), 'loss_cross_entropy': Array(0.15760629, dtype=float32)}


  9%|▊         | 85939/1000000 [3:56:08<39:41:29,  6.40it/s]

{'loss': Array(0.1326005, dtype=float32), 'loss_cross_entropy': Array(0.12433034, dtype=float32)}


  9%|▊         | 85949/1000000 [3:56:09<27:30:14,  9.23it/s]

{'loss': Array(0.15432225, dtype=float32), 'loss_cross_entropy': Array(0.14599867, dtype=float32)}


  9%|▊         | 85959/1000000 [3:56:10<24:14:50, 10.47it/s]

{'loss': Array(0.14764012, dtype=float32), 'loss_cross_entropy': Array(0.13945423, dtype=float32)}


  9%|▊         | 85969/1000000 [3:56:12<33:32:44,  7.57it/s]

{'loss': Array(0.16262186, dtype=float32), 'loss_cross_entropy': Array(0.15351243, dtype=float32)}


  9%|▊         | 85979/1000000 [3:56:13<25:42:40,  9.87it/s]

{'loss': Array(0.13763805, dtype=float32), 'loss_cross_entropy': Array(0.12929305, dtype=float32)}


  9%|▊         | 85989/1000000 [3:56:15<37:54:24,  6.70it/s]

{'loss': Array(0.1455326, dtype=float32), 'loss_cross_entropy': Array(0.13763677, dtype=float32)}


  9%|▊         | 85999/1000000 [3:56:17<26:14:10,  9.68it/s]

{'loss': Array(0.14196211, dtype=float32), 'loss_cross_entropy': Array(0.13383846, dtype=float32)}


  9%|▊         | 86009/1000000 [3:56:24<83:30:42,  3.04it/s] 

{'loss': Array(0.14900325, dtype=float32), 'loss_cross_entropy': Array(0.14057952, dtype=float32)}


  9%|▊         | 86019/1000000 [3:56:26<37:23:22,  6.79it/s] 

{'loss': Array(0.1515048, dtype=float32), 'loss_cross_entropy': Array(0.14247082, dtype=float32)}


  9%|▊         | 86029/1000000 [3:56:28<26:33:45,  9.56it/s]

{'loss': Array(0.16641262, dtype=float32), 'loss_cross_entropy': Array(0.15660451, dtype=float32)}


  9%|▊         | 86039/1000000 [3:56:29<27:45:21,  9.15it/s]

{'loss': Array(0.15535453, dtype=float32), 'loss_cross_entropy': Array(0.14636137, dtype=float32)}


  9%|▊         | 86049/1000000 [3:56:31<24:27:30, 10.38it/s]

{'loss': Array(0.15817226, dtype=float32), 'loss_cross_entropy': Array(0.14913738, dtype=float32)}


  9%|▊         | 86059/1000000 [3:56:32<37:00:03,  6.86it/s]

{'loss': Array(0.16324936, dtype=float32), 'loss_cross_entropy': Array(0.15402268, dtype=float32)}


  9%|▊         | 86069/1000000 [3:56:34<25:57:30,  9.78it/s]

{'loss': Array(0.14050186, dtype=float32), 'loss_cross_entropy': Array(0.13231944, dtype=float32)}


  9%|▊         | 86079/1000000 [3:56:35<23:49:20, 10.66it/s]

{'loss': Array(0.14315127, dtype=float32), 'loss_cross_entropy': Array(0.13500986, dtype=float32)}


  9%|▊         | 86089/1000000 [3:56:37<30:11:26,  8.41it/s]

{'loss': Array(0.15705113, dtype=float32), 'loss_cross_entropy': Array(0.14806645, dtype=float32)}


  9%|▊         | 86099/1000000 [3:56:38<24:28:20, 10.37it/s]

{'loss': Array(0.16200443, dtype=float32), 'loss_cross_entropy': Array(0.15213957, dtype=float32)}


  9%|▊         | 86109/1000000 [3:56:40<37:51:23,  6.71it/s]

{'loss': Array(0.1387112, dtype=float32), 'loss_cross_entropy': Array(0.13056152, dtype=float32)}


  9%|▊         | 86119/1000000 [3:56:41<26:52:46,  9.44it/s]

{'loss': Array(0.15914266, dtype=float32), 'loss_cross_entropy': Array(0.14951365, dtype=float32)}


  9%|▊         | 86129/1000000 [3:56:43<24:39:15, 10.30it/s]

{'loss': Array(0.13761158, dtype=float32), 'loss_cross_entropy': Array(0.1297792, dtype=float32)}


  9%|▊         | 86139/1000000 [3:56:45<28:49:40,  8.81it/s]

{'loss': Array(0.16348888, dtype=float32), 'loss_cross_entropy': Array(0.15399958, dtype=float32)}


  9%|▊         | 86149/1000000 [3:56:46<24:28:22, 10.37it/s]

{'loss': Array(0.14532651, dtype=float32), 'loss_cross_entropy': Array(0.13672584, dtype=float32)}


  9%|▊         | 86159/1000000 [3:56:48<33:21:18,  7.61it/s]

{'loss': Array(0.15587394, dtype=float32), 'loss_cross_entropy': Array(0.14739725, dtype=float32)}


  9%|▊         | 86169/1000000 [3:56:49<25:20:53, 10.01it/s]

{'loss': Array(0.14281467, dtype=float32), 'loss_cross_entropy': Array(0.13467586, dtype=float32)}


  9%|▊         | 86179/1000000 [3:56:50<24:04:35, 10.54it/s]

{'loss': Array(0.1408149, dtype=float32), 'loss_cross_entropy': Array(0.13276628, dtype=float32)}


  9%|▊         | 86189/1000000 [3:56:52<30:21:28,  8.36it/s]

{'loss': Array(0.14224449, dtype=float32), 'loss_cross_entropy': Array(0.1340055, dtype=float32)}


  9%|▊         | 86199/1000000 [3:56:53<25:10:13, 10.08it/s]

{'loss': Array(0.16679998, dtype=float32), 'loss_cross_entropy': Array(0.15685534, dtype=float32)}


  9%|▊         | 86209/1000000 [3:56:55<23:27:15, 10.82it/s]

{'loss': Array(0.14502391, dtype=float32), 'loss_cross_entropy': Array(0.13672678, dtype=float32)}


  9%|▊         | 86219/1000000 [3:56:57<27:04:22,  9.38it/s]

{'loss': Array(0.15457067, dtype=float32), 'loss_cross_entropy': Array(0.14527442, dtype=float32)}


  9%|▊         | 86229/1000000 [3:56:58<24:25:55, 10.39it/s]

{'loss': Array(0.1420074, dtype=float32), 'loss_cross_entropy': Array(0.13341919, dtype=float32)}


  9%|▊         | 86239/1000000 [3:57:00<31:15:06,  8.12it/s]

{'loss': Array(0.14198634, dtype=float32), 'loss_cross_entropy': Array(0.13412014, dtype=float32)}


  9%|▊         | 86249/1000000 [3:57:01<25:44:55,  9.86it/s]

{'loss': Array(0.13968293, dtype=float32), 'loss_cross_entropy': Array(0.13203642, dtype=float32)}


  9%|▊         | 86259/1000000 [3:57:03<37:58:14,  6.68it/s]

{'loss': Array(0.14310351, dtype=float32), 'loss_cross_entropy': Array(0.13458961, dtype=float32)}


  9%|▊         | 86269/1000000 [3:57:04<26:45:24,  9.49it/s]

{'loss': Array(0.12810929, dtype=float32), 'loss_cross_entropy': Array(0.12019523, dtype=float32)}


  9%|▊         | 86279/1000000 [3:57:06<38:57:40,  6.51it/s]

{'loss': Array(0.14994454, dtype=float32), 'loss_cross_entropy': Array(0.14108562, dtype=float32)}


  9%|▊         | 86289/1000000 [3:57:07<26:32:48,  9.56it/s]

{'loss': Array(0.14034705, dtype=float32), 'loss_cross_entropy': Array(0.13203382, dtype=float32)}


  9%|▊         | 86299/1000000 [3:57:09<25:11:54, 10.07it/s]

{'loss': Array(0.14346723, dtype=float32), 'loss_cross_entropy': Array(0.13514139, dtype=float32)}


  9%|▊         | 86309/1000000 [3:57:11<34:34:27,  7.34it/s]

{'loss': Array(0.14631724, dtype=float32), 'loss_cross_entropy': Array(0.13786821, dtype=float32)}


  9%|▊         | 86319/1000000 [3:57:12<25:42:59,  9.87it/s]

{'loss': Array(0.1362208, dtype=float32), 'loss_cross_entropy': Array(0.12749137, dtype=float32)}


  9%|▊         | 86329/1000000 [3:57:14<37:00:25,  6.86it/s]

{'loss': Array(0.13998453, dtype=float32), 'loss_cross_entropy': Array(0.13119072, dtype=float32)}


  9%|▊         | 86339/1000000 [3:57:15<25:30:47,  9.95it/s]

{'loss': Array(0.15108895, dtype=float32), 'loss_cross_entropy': Array(0.14229259, dtype=float32)}


  9%|▊         | 86349/1000000 [3:57:16<23:40:18, 10.72it/s]

{'loss': Array(0.13275048, dtype=float32), 'loss_cross_entropy': Array(0.12504072, dtype=float32)}


  9%|▊         | 86359/1000000 [3:57:18<26:26:16,  9.60it/s]

{'loss': Array(0.13571922, dtype=float32), 'loss_cross_entropy': Array(0.12796143, dtype=float32)}


  9%|▊         | 86369/1000000 [3:57:19<24:30:10, 10.36it/s]

{'loss': Array(0.18825586, dtype=float32), 'loss_cross_entropy': Array(0.17807722, dtype=float32)}


  9%|▊         | 86379/1000000 [3:57:21<27:35:52,  9.20it/s]

{'loss': Array(0.1460521, dtype=float32), 'loss_cross_entropy': Array(0.13758549, dtype=float32)}


  9%|▊         | 86389/1000000 [3:57:22<24:06:48, 10.52it/s]

{'loss': Array(0.14667392, dtype=float32), 'loss_cross_entropy': Array(0.13786869, dtype=float32)}


  9%|▊         | 86399/1000000 [3:57:24<37:12:16,  6.82it/s]

{'loss': Array(0.14327954, dtype=float32), 'loss_cross_entropy': Array(0.13479422, dtype=float32)}


  9%|▊         | 86409/1000000 [3:57:26<26:12:14,  9.68it/s]

{'loss': Array(0.14930855, dtype=float32), 'loss_cross_entropy': Array(0.14116289, dtype=float32)}


  9%|▊         | 86419/1000000 [3:57:27<23:49:05, 10.65it/s]

{'loss': Array(0.14458196, dtype=float32), 'loss_cross_entropy': Array(0.13618241, dtype=float32)}


  9%|▊         | 86429/1000000 [3:57:29<30:18:10,  8.37it/s]

{'loss': Array(0.16320994, dtype=float32), 'loss_cross_entropy': Array(0.1539994, dtype=float32)}


  9%|▊         | 86439/1000000 [3:57:30<24:22:55, 10.41it/s]

{'loss': Array(0.13420272, dtype=float32), 'loss_cross_entropy': Array(0.1263002, dtype=float32)}


  9%|▊         | 86449/1000000 [3:57:32<36:52:09,  6.88it/s]

{'loss': Array(0.14572406, dtype=float32), 'loss_cross_entropy': Array(0.13686103, dtype=float32)}


  9%|▊         | 86459/1000000 [3:57:33<25:15:23, 10.05it/s]

{'loss': Array(0.15887578, dtype=float32), 'loss_cross_entropy': Array(0.14942096, dtype=float32)}


  9%|▊         | 86469/1000000 [3:57:34<23:22:54, 10.85it/s]

{'loss': Array(0.14814566, dtype=float32), 'loss_cross_entropy': Array(0.13988249, dtype=float32)}


  9%|▊         | 86479/1000000 [3:57:36<28:10:30,  9.01it/s]

{'loss': Array(0.14584397, dtype=float32), 'loss_cross_entropy': Array(0.13732497, dtype=float32)}


  9%|▊         | 86489/1000000 [3:57:37<24:23:46, 10.40it/s]

{'loss': Array(0.13403024, dtype=float32), 'loss_cross_entropy': Array(0.12646316, dtype=float32)}


  9%|▊         | 86499/1000000 [3:57:39<33:34:31,  7.56it/s]

{'loss': Array(0.13542546, dtype=float32), 'loss_cross_entropy': Array(0.12778465, dtype=float32)}


  9%|▊         | 86509/1000000 [3:57:47<84:53:01,  2.99it/s] 

{'loss': Array(0.14825001, dtype=float32), 'loss_cross_entropy': Array(0.13967231, dtype=float32)}


  9%|▊         | 86519/1000000 [3:57:49<43:02:58,  5.89it/s]

{'loss': Array(0.15647025, dtype=float32), 'loss_cross_entropy': Array(0.14669895, dtype=float32)}


  9%|▊         | 86529/1000000 [3:57:50<26:27:01,  9.59it/s]

{'loss': Array(0.13615869, dtype=float32), 'loss_cross_entropy': Array(0.12802884, dtype=float32)}


  9%|▊         | 86539/1000000 [3:57:51<24:32:07, 10.34it/s]

{'loss': Array(0.14971276, dtype=float32), 'loss_cross_entropy': Array(0.14088474, dtype=float32)}


  9%|▊         | 86549/1000000 [3:57:53<30:51:08,  8.22it/s]

{'loss': Array(0.14809541, dtype=float32), 'loss_cross_entropy': Array(0.13936141, dtype=float32)}


  9%|▊         | 86559/1000000 [3:57:55<24:48:05, 10.23it/s]

{'loss': Array(0.14933021, dtype=float32), 'loss_cross_entropy': Array(0.14054987, dtype=float32)}


  9%|▊         | 86569/1000000 [3:57:56<23:32:27, 10.78it/s]

{'loss': Array(0.14139077, dtype=float32), 'loss_cross_entropy': Array(0.13224427, dtype=float32)}


  9%|▊         | 86579/1000000 [3:57:58<26:31:53,  9.56it/s]

{'loss': Array(0.12908699, dtype=float32), 'loss_cross_entropy': Array(0.12114231, dtype=float32)}


  9%|▊         | 86589/1000000 [3:57:59<24:02:40, 10.55it/s]

{'loss': Array(0.14351705, dtype=float32), 'loss_cross_entropy': Array(0.1351553, dtype=float32)}


  9%|▊         | 86599/1000000 [3:58:01<30:46:16,  8.25it/s]

{'loss': Array(0.16430332, dtype=float32), 'loss_cross_entropy': Array(0.15543675, dtype=float32)}


  9%|▊         | 86609/1000000 [3:58:02<24:40:16, 10.28it/s]

{'loss': Array(0.16448191, dtype=float32), 'loss_cross_entropy': Array(0.15523973, dtype=float32)}


  9%|▊         | 86619/1000000 [3:58:04<33:09:28,  7.65it/s]

{'loss': Array(0.14974312, dtype=float32), 'loss_cross_entropy': Array(0.14130056, dtype=float32)}


  9%|▊         | 86629/1000000 [3:58:05<25:17:45, 10.03it/s]

{'loss': Array(0.15568778, dtype=float32), 'loss_cross_entropy': Array(0.14682367, dtype=float32)}


  9%|▊         | 86639/1000000 [3:58:07<24:46:19, 10.24it/s]

{'loss': Array(0.16574238, dtype=float32), 'loss_cross_entropy': Array(0.15595707, dtype=float32)}


  9%|▊         | 86649/1000000 [3:58:08<30:30:07,  8.32it/s]

{'loss': Array(0.14438382, dtype=float32), 'loss_cross_entropy': Array(0.1363434, dtype=float32)}


  9%|▊         | 86659/1000000 [3:58:10<24:52:29, 10.20it/s]

{'loss': Array(0.1428247, dtype=float32), 'loss_cross_entropy': Array(0.13525559, dtype=float32)}


  9%|▊         | 86669/1000000 [3:58:11<24:55:51, 10.18it/s]

{'loss': Array(0.14052817, dtype=float32), 'loss_cross_entropy': Array(0.13220707, dtype=float32)}


  9%|▊         | 86679/1000000 [3:58:13<27:51:15,  9.11it/s]

{'loss': Array(0.15259108, dtype=float32), 'loss_cross_entropy': Array(0.14385743, dtype=float32)}


  9%|▊         | 86689/1000000 [3:58:14<25:37:16,  9.90it/s]

{'loss': Array(0.15657039, dtype=float32), 'loss_cross_entropy': Array(0.14785393, dtype=float32)}


  9%|▊         | 86699/1000000 [3:58:16<31:39:18,  8.01it/s]

{'loss': Array(0.12735985, dtype=float32), 'loss_cross_entropy': Array(0.11967968, dtype=float32)}


  9%|▊         | 86709/1000000 [3:58:17<26:15:32,  9.66it/s]

{'loss': Array(0.15693709, dtype=float32), 'loss_cross_entropy': Array(0.14760357, dtype=float32)}


  9%|▊         | 86719/1000000 [3:58:19<38:17:40,  6.62it/s]

{'loss': Array(0.14423414, dtype=float32), 'loss_cross_entropy': Array(0.13603516, dtype=float32)}


  9%|▊         | 86729/1000000 [3:58:21<25:43:17,  9.86it/s]

{'loss': Array(0.13817105, dtype=float32), 'loss_cross_entropy': Array(0.12984161, dtype=float32)}


  9%|▊         | 86739/1000000 [3:58:22<23:44:39, 10.68it/s]

{'loss': Array(0.15364233, dtype=float32), 'loss_cross_entropy': Array(0.14485377, dtype=float32)}


  9%|▊         | 86749/1000000 [3:58:24<33:00:47,  7.68it/s]

{'loss': Array(0.1522817, dtype=float32), 'loss_cross_entropy': Array(0.14357111, dtype=float32)}


  9%|▊         | 86759/1000000 [3:58:25<24:53:06, 10.19it/s]

{'loss': Array(0.14902751, dtype=float32), 'loss_cross_entropy': Array(0.14022575, dtype=float32)}


  9%|▊         | 86769/1000000 [3:58:26<23:54:02, 10.61it/s]

{'loss': Array(0.14841442, dtype=float32), 'loss_cross_entropy': Array(0.13954113, dtype=float32)}


  9%|▊         | 86779/1000000 [3:58:28<28:49:31,  8.80it/s]

{'loss': Array(0.1480705, dtype=float32), 'loss_cross_entropy': Array(0.1394103, dtype=float32)}


  9%|▊         | 86789/1000000 [3:58:30<25:13:11, 10.06it/s]

{'loss': Array(0.14994295, dtype=float32), 'loss_cross_entropy': Array(0.1416608, dtype=float32)}


  9%|▊         | 86799/1000000 [3:58:31<30:28:35,  8.32it/s]

{'loss': Array(0.16200136, dtype=float32), 'loss_cross_entropy': Array(0.15187576, dtype=float32)}


  9%|▊         | 86809/1000000 [3:58:33<24:46:42, 10.24it/s]

{'loss': Array(0.15334186, dtype=float32), 'loss_cross_entropy': Array(0.144802, dtype=float32)}


  9%|▊         | 86819/1000000 [3:58:34<23:42:34, 10.70it/s]

{'loss': Array(0.16043948, dtype=float32), 'loss_cross_entropy': Array(0.15128995, dtype=float32)}


  9%|▊         | 86829/1000000 [3:58:36<26:44:27,  9.49it/s]

{'loss': Array(0.1736245, dtype=float32), 'loss_cross_entropy': Array(0.16380045, dtype=float32)}


  9%|▊         | 86839/1000000 [3:58:37<23:55:23, 10.60it/s]

{'loss': Array(0.15402226, dtype=float32), 'loss_cross_entropy': Array(0.14584, dtype=float32)}


  9%|▊         | 86849/1000000 [3:58:39<33:06:08,  7.66it/s]

{'loss': Array(0.14423499, dtype=float32), 'loss_cross_entropy': Array(0.1359949, dtype=float32)}


  9%|▊         | 86859/1000000 [3:58:40<25:20:59, 10.01it/s]

{'loss': Array(0.15023021, dtype=float32), 'loss_cross_entropy': Array(0.1418442, dtype=float32)}


  9%|▊         | 86869/1000000 [3:58:41<24:28:10, 10.37it/s]

{'loss': Array(0.16841102, dtype=float32), 'loss_cross_entropy': Array(0.15922792, dtype=float32)}


  9%|▊         | 86879/1000000 [3:58:43<27:35:56,  9.19it/s]

{'loss': Array(0.14016438, dtype=float32), 'loss_cross_entropy': Array(0.13232253, dtype=float32)}


  9%|▊         | 86889/1000000 [3:58:45<24:02:27, 10.55it/s]

{'loss': Array(0.1399845, dtype=float32), 'loss_cross_entropy': Array(0.13147591, dtype=float32)}


  9%|▊         | 86899/1000000 [3:58:46<28:51:36,  8.79it/s]

{'loss': Array(0.13797271, dtype=float32), 'loss_cross_entropy': Array(0.12972008, dtype=float32)}


  9%|▊         | 86909/1000000 [3:58:48<24:20:23, 10.42it/s]

{'loss': Array(0.14498678, dtype=float32), 'loss_cross_entropy': Array(0.13662992, dtype=float32)}


  9%|▊         | 86919/1000000 [3:58:49<37:34:22,  6.75it/s]

{'loss': Array(0.15515904, dtype=float32), 'loss_cross_entropy': Array(0.14645736, dtype=float32)}


  9%|▊         | 86929/1000000 [3:58:51<25:55:50,  9.78it/s]

{'loss': Array(0.13767146, dtype=float32), 'loss_cross_entropy': Array(0.12987041, dtype=float32)}


  9%|▊         | 86939/1000000 [3:58:52<24:07:39, 10.51it/s]

{'loss': Array(0.15559702, dtype=float32), 'loss_cross_entropy': Array(0.14697985, dtype=float32)}


  9%|▊         | 86949/1000000 [3:58:54<30:13:14,  8.39it/s]

{'loss': Array(0.17539297, dtype=float32), 'loss_cross_entropy': Array(0.1657446, dtype=float32)}


  9%|▊         | 86959/1000000 [3:58:55<25:11:11, 10.07it/s]

{'loss': Array(0.15425436, dtype=float32), 'loss_cross_entropy': Array(0.14596222, dtype=float32)}


  9%|▊         | 86969/1000000 [3:58:57<33:44:25,  7.52it/s]

{'loss': Array(0.13033122, dtype=float32), 'loss_cross_entropy': Array(0.12198087, dtype=float32)}


  9%|▊         | 86979/1000000 [3:58:58<25:44:38,  9.85it/s]

{'loss': Array(0.12351654, dtype=float32), 'loss_cross_entropy': Array(0.11677568, dtype=float32)}


  9%|▊         | 86989/1000000 [3:59:00<33:47:41,  7.50it/s]

{'loss': Array(0.13800834, dtype=float32), 'loss_cross_entropy': Array(0.1298226, dtype=float32)}


  9%|▊         | 86999/1000000 [3:59:02<26:08:28,  9.70it/s]

{'loss': Array(0.1624351, dtype=float32), 'loss_cross_entropy': Array(0.1538831, dtype=float32)}


  9%|▊         | 87009/1000000 [3:59:10<84:30:12,  3.00it/s] 

{'loss': Array(0.13987596, dtype=float32), 'loss_cross_entropy': Array(0.13148497, dtype=float32)}


  9%|▊         | 87019/1000000 [3:59:11<39:14:41,  6.46it/s]

{'loss': Array(0.15060663, dtype=float32), 'loss_cross_entropy': Array(0.14223437, dtype=float32)}


  9%|▊         | 87029/1000000 [3:59:13<26:35:06,  9.54it/s]

{'loss': Array(0.15570527, dtype=float32), 'loss_cross_entropy': Array(0.14634305, dtype=float32)}


  9%|▊         | 87039/1000000 [3:59:14<35:06:35,  7.22it/s]

{'loss': Array(0.15666094, dtype=float32), 'loss_cross_entropy': Array(0.1473697, dtype=float32)}


  9%|▊         | 87049/1000000 [3:59:16<26:53:16,  9.43it/s]

{'loss': Array(0.15268321, dtype=float32), 'loss_cross_entropy': Array(0.14385583, dtype=float32)}


  9%|▊         | 87059/1000000 [3:59:18<37:29:03,  6.77it/s]

{'loss': Array(0.1392515, dtype=float32), 'loss_cross_entropy': Array(0.13113494, dtype=float32)}


  9%|▊         | 87069/1000000 [3:59:19<25:34:28,  9.92it/s]

{'loss': Array(0.15313482, dtype=float32), 'loss_cross_entropy': Array(0.14387994, dtype=float32)}


  9%|▊         | 87079/1000000 [3:59:20<24:04:57, 10.53it/s]

{'loss': Array(0.14611961, dtype=float32), 'loss_cross_entropy': Array(0.1367902, dtype=float32)}


  9%|▊         | 87089/1000000 [3:59:22<30:36:38,  8.28it/s]

{'loss': Array(0.14007127, dtype=float32), 'loss_cross_entropy': Array(0.13199127, dtype=float32)}


  9%|▊         | 87099/1000000 [3:59:23<24:38:29, 10.29it/s]

{'loss': Array(0.15150893, dtype=float32), 'loss_cross_entropy': Array(0.14286399, dtype=float32)}


  9%|▊         | 87109/1000000 [3:59:25<33:57:50,  7.47it/s]

{'loss': Array(0.1477593, dtype=float32), 'loss_cross_entropy': Array(0.13896559, dtype=float32)}


  9%|▊         | 87119/1000000 [3:59:27<25:45:42,  9.84it/s]

{'loss': Array(0.14529909, dtype=float32), 'loss_cross_entropy': Array(0.1367927, dtype=float32)}


  9%|▊         | 87129/1000000 [3:59:28<24:02:21, 10.55it/s]

{'loss': Array(0.14933407, dtype=float32), 'loss_cross_entropy': Array(0.14100632, dtype=float32)}


  9%|▊         | 87139/1000000 [3:59:30<26:48:04,  9.46it/s]

{'loss': Array(0.15425803, dtype=float32), 'loss_cross_entropy': Array(0.14539932, dtype=float32)}


  9%|▊         | 87149/1000000 [3:59:31<24:05:32, 10.52it/s]

{'loss': Array(0.12971726, dtype=float32), 'loss_cross_entropy': Array(0.1212907, dtype=float32)}


  9%|▊         | 87159/1000000 [3:59:33<37:29:31,  6.76it/s]

{'loss': Array(0.14390917, dtype=float32), 'loss_cross_entropy': Array(0.13534826, dtype=float32)}


  9%|▊         | 87169/1000000 [3:59:34<26:00:15,  9.75it/s]

{'loss': Array(0.15354225, dtype=float32), 'loss_cross_entropy': Array(0.1440538, dtype=float32)}


  9%|▊         | 87179/1000000 [3:59:35<23:36:21, 10.74it/s]

{'loss': Array(0.12900011, dtype=float32), 'loss_cross_entropy': Array(0.12153716, dtype=float32)}


  9%|▊         | 87189/1000000 [3:59:37<29:52:28,  8.49it/s]

{'loss': Array(0.13533404, dtype=float32), 'loss_cross_entropy': Array(0.12753563, dtype=float32)}


  9%|▊         | 87199/1000000 [3:59:38<24:54:25, 10.18it/s]

{'loss': Array(0.15610182, dtype=float32), 'loss_cross_entropy': Array(0.14732455, dtype=float32)}


  9%|▊         | 87209/1000000 [3:59:40<34:32:43,  7.34it/s]

{'loss': Array(0.14146009, dtype=float32), 'loss_cross_entropy': Array(0.13371058, dtype=float32)}


  9%|▊         | 87219/1000000 [3:59:42<25:59:36,  9.75it/s]

{'loss': Array(0.15613678, dtype=float32), 'loss_cross_entropy': Array(0.14720951, dtype=float32)}


  9%|▊         | 87229/1000000 [3:59:43<34:02:33,  7.45it/s]

{'loss': Array(0.13766661, dtype=float32), 'loss_cross_entropy': Array(0.12943764, dtype=float32)}


  9%|▊         | 87239/1000000 [3:59:45<25:49:44,  9.82it/s]

{'loss': Array(0.12367254, dtype=float32), 'loss_cross_entropy': Array(0.11610015, dtype=float32)}


  9%|▊         | 87249/1000000 [3:59:46<24:23:41, 10.39it/s]

{'loss': Array(0.15988412, dtype=float32), 'loss_cross_entropy': Array(0.1511426, dtype=float32)}


  9%|▊         | 87259/1000000 [3:59:48<28:35:42,  8.87it/s]

{'loss': Array(0.14485072, dtype=float32), 'loss_cross_entropy': Array(0.13665807, dtype=float32)}


  9%|▊         | 87269/1000000 [3:59:49<24:09:35, 10.49it/s]

{'loss': Array(0.12235375, dtype=float32), 'loss_cross_entropy': Array(0.11476966, dtype=float32)}


  9%|▊         | 87279/1000000 [3:59:51<30:54:24,  8.20it/s]

{'loss': Array(0.14197566, dtype=float32), 'loss_cross_entropy': Array(0.13350998, dtype=float32)}


  9%|▊         | 87289/1000000 [3:59:52<25:36:29,  9.90it/s]

{'loss': Array(0.12519185, dtype=float32), 'loss_cross_entropy': Array(0.11757185, dtype=float32)}


  9%|▊         | 87299/1000000 [3:59:54<37:25:06,  6.78it/s]

{'loss': Array(0.14405699, dtype=float32), 'loss_cross_entropy': Array(0.13599965, dtype=float32)}


  9%|▊         | 87309/1000000 [3:59:56<25:43:23,  9.86it/s]

{'loss': Array(0.13282564, dtype=float32), 'loss_cross_entropy': Array(0.12431987, dtype=float32)}


  9%|▊         | 87319/1000000 [3:59:57<37:30:45,  6.76it/s]

{'loss': Array(0.14658569, dtype=float32), 'loss_cross_entropy': Array(0.13783585, dtype=float32)}


  9%|▊         | 87329/1000000 [3:59:59<26:12:17,  9.67it/s]

{'loss': Array(0.14492741, dtype=float32), 'loss_cross_entropy': Array(0.13634147, dtype=float32)}


  9%|▊         | 87339/1000000 [4:00:00<25:01:18, 10.13it/s]

{'loss': Array(0.1352022, dtype=float32), 'loss_cross_entropy': Array(0.12708378, dtype=float32)}


  9%|▊         | 87349/1000000 [4:00:02<34:12:40,  7.41it/s]

{'loss': Array(0.14149654, dtype=float32), 'loss_cross_entropy': Array(0.13345744, dtype=float32)}


  9%|▊         | 87359/1000000 [4:00:03<26:12:47,  9.67it/s]

{'loss': Array(0.1608443, dtype=float32), 'loss_cross_entropy': Array(0.15128854, dtype=float32)}


  9%|▊         | 87369/1000000 [4:00:05<38:46:29,  6.54it/s]

{'loss': Array(0.14536805, dtype=float32), 'loss_cross_entropy': Array(0.13646984, dtype=float32)}


  9%|▊         | 87379/1000000 [4:00:06<27:14:09,  9.31it/s]

{'loss': Array(0.14964393, dtype=float32), 'loss_cross_entropy': Array(0.14107418, dtype=float32)}


  9%|▊         | 87389/1000000 [4:00:08<25:26:02,  9.97it/s]

{'loss': Array(0.14190957, dtype=float32), 'loss_cross_entropy': Array(0.13407739, dtype=float32)}


  9%|▊         | 87399/1000000 [4:00:10<28:47:33,  8.80it/s]

{'loss': Array(0.13704467, dtype=float32), 'loss_cross_entropy': Array(0.12923904, dtype=float32)}


  9%|▊         | 87409/1000000 [4:00:11<24:48:40, 10.22it/s]

{'loss': Array(0.162201, dtype=float32), 'loss_cross_entropy': Array(0.15309785, dtype=float32)}


  9%|▊         | 87419/1000000 [4:00:13<27:08:36,  9.34it/s]

{'loss': Array(0.13918601, dtype=float32), 'loss_cross_entropy': Array(0.13095264, dtype=float32)}


  9%|▊         | 87429/1000000 [4:00:14<23:52:26, 10.62it/s]

{'loss': Array(0.1494445, dtype=float32), 'loss_cross_entropy': Array(0.14135985, dtype=float32)}


  9%|▊         | 87439/1000000 [4:00:16<37:01:50,  6.85it/s]

{'loss': Array(0.14116292, dtype=float32), 'loss_cross_entropy': Array(0.13259558, dtype=float32)}


  9%|▊         | 87449/1000000 [4:00:17<25:30:05,  9.94it/s]

{'loss': Array(0.14221452, dtype=float32), 'loss_cross_entropy': Array(0.13390753, dtype=float32)}


  9%|▊         | 87459/1000000 [4:00:19<23:59:24, 10.57it/s]

{'loss': Array(0.13837151, dtype=float32), 'loss_cross_entropy': Array(0.13022964, dtype=float32)}


  9%|▊         | 87469/1000000 [4:00:20<30:18:23,  8.36it/s]

{'loss': Array(0.15151875, dtype=float32), 'loss_cross_entropy': Array(0.14234875, dtype=float32)}


  9%|▊         | 87479/1000000 [4:00:22<25:03:57, 10.11it/s]

{'loss': Array(0.1401469, dtype=float32), 'loss_cross_entropy': Array(0.13182114, dtype=float32)}


  9%|▊         | 87489/1000000 [4:00:23<37:22:42,  6.78it/s]

{'loss': Array(0.1538298, dtype=float32), 'loss_cross_entropy': Array(0.14478968, dtype=float32)}


  9%|▊         | 87499/1000000 [4:00:25<25:25:01,  9.97it/s]

{'loss': Array(0.133552, dtype=float32), 'loss_cross_entropy': Array(0.1252268, dtype=float32)}


  9%|▉         | 87509/1000000 [4:00:32<82:14:05,  3.08it/s] 

{'loss': Array(0.15893286, dtype=float32), 'loss_cross_entropy': Array(0.14979275, dtype=float32)}


  9%|▉         | 87519/1000000 [4:00:34<37:28:16,  6.76it/s]

{'loss': Array(0.14075617, dtype=float32), 'loss_cross_entropy': Array(0.13289927, dtype=float32)}


  9%|▉         | 87529/1000000 [4:00:36<26:31:23,  9.56it/s]

{'loss': Array(0.16038981, dtype=float32), 'loss_cross_entropy': Array(0.15110148, dtype=float32)}


  9%|▉         | 87539/1000000 [4:00:37<30:55:08,  8.20it/s]

{'loss': Array(0.15326479, dtype=float32), 'loss_cross_entropy': Array(0.14343502, dtype=float32)}


  9%|▉         | 87549/1000000 [4:00:39<24:43:09, 10.25it/s]

{'loss': Array(0.13658176, dtype=float32), 'loss_cross_entropy': Array(0.12806249, dtype=float32)}


  9%|▉         | 87559/1000000 [4:00:40<37:12:29,  6.81it/s]

{'loss': Array(0.15015166, dtype=float32), 'loss_cross_entropy': Array(0.14062616, dtype=float32)}


  9%|▉         | 87569/1000000 [4:00:42<25:48:05,  9.82it/s]

{'loss': Array(0.15325415, dtype=float32), 'loss_cross_entropy': Array(0.14440499, dtype=float32)}


  9%|▉         | 87579/1000000 [4:00:43<23:43:24, 10.68it/s]

{'loss': Array(0.1528865, dtype=float32), 'loss_cross_entropy': Array(0.14395376, dtype=float32)}


  9%|▉         | 87589/1000000 [4:00:45<26:41:42,  9.49it/s]

{'loss': Array(0.14743128, dtype=float32), 'loss_cross_entropy': Array(0.13913795, dtype=float32)}


  9%|▉         | 87599/1000000 [4:00:46<23:49:24, 10.64it/s]

{'loss': Array(0.14083098, dtype=float32), 'loss_cross_entropy': Array(0.13263123, dtype=float32)}


  9%|▉         | 87609/1000000 [4:00:48<38:15:14,  6.63it/s]

{'loss': Array(0.15783408, dtype=float32), 'loss_cross_entropy': Array(0.148694, dtype=float32)}


  9%|▉         | 87619/1000000 [4:00:49<26:20:26,  9.62it/s]

{'loss': Array(0.14952266, dtype=float32), 'loss_cross_entropy': Array(0.1409544, dtype=float32)}


  9%|▉         | 87629/1000000 [4:00:51<24:30:45, 10.34it/s]

{'loss': Array(0.1534102, dtype=float32), 'loss_cross_entropy': Array(0.14471228, dtype=float32)}


  9%|▉         | 87639/1000000 [4:00:52<28:43:58,  8.82it/s]

{'loss': Array(0.13602315, dtype=float32), 'loss_cross_entropy': Array(0.1281009, dtype=float32)}


  9%|▉         | 87649/1000000 [4:00:54<25:58:32,  9.76it/s]

{'loss': Array(0.11948144, dtype=float32), 'loss_cross_entropy': Array(0.11210116, dtype=float32)}


  9%|▉         | 87659/1000000 [4:00:55<33:15:56,  7.62it/s]

{'loss': Array(0.13982923, dtype=float32), 'loss_cross_entropy': Array(0.13132176, dtype=float32)}


  9%|▉         | 87669/1000000 [4:00:57<25:03:14, 10.12it/s]

{'loss': Array(0.1373298, dtype=float32), 'loss_cross_entropy': Array(0.1291881, dtype=float32)}


  9%|▉         | 87679/1000000 [4:00:59<37:56:29,  6.68it/s]

{'loss': Array(0.13778968, dtype=float32), 'loss_cross_entropy': Array(0.12906133, dtype=float32)}


  9%|▉         | 87689/1000000 [4:01:00<26:48:39,  9.45it/s]

{'loss': Array(0.14421563, dtype=float32), 'loss_cross_entropy': Array(0.13590573, dtype=float32)}


  9%|▉         | 87699/1000000 [4:01:01<25:32:25,  9.92it/s]

{'loss': Array(0.1728315, dtype=float32), 'loss_cross_entropy': Array(0.16282779, dtype=float32)}


  9%|▉         | 87709/1000000 [4:01:03<31:07:29,  8.14it/s]

{'loss': Array(0.16028081, dtype=float32), 'loss_cross_entropy': Array(0.15149955, dtype=float32)}


  9%|▉         | 87719/1000000 [4:01:05<25:50:23,  9.81it/s]

{'loss': Array(0.14861324, dtype=float32), 'loss_cross_entropy': Array(0.1400458, dtype=float32)}


  9%|▉         | 87729/1000000 [4:01:06<34:33:45,  7.33it/s]

{'loss': Array(0.1459014, dtype=float32), 'loss_cross_entropy': Array(0.13754158, dtype=float32)}


  9%|▉         | 87739/1000000 [4:01:08<26:13:15,  9.66it/s]

{'loss': Array(0.16148293, dtype=float32), 'loss_cross_entropy': Array(0.15182696, dtype=float32)}


  9%|▉         | 87749/1000000 [4:01:09<25:01:33, 10.13it/s]

{'loss': Array(0.14807232, dtype=float32), 'loss_cross_entropy': Array(0.14019118, dtype=float32)}


  9%|▉         | 87759/1000000 [4:01:11<27:22:28,  9.26it/s]

{'loss': Array(0.15547743, dtype=float32), 'loss_cross_entropy': Array(0.14634044, dtype=float32)}


  9%|▉         | 87769/1000000 [4:01:12<24:33:04, 10.32it/s]

{'loss': Array(0.13098137, dtype=float32), 'loss_cross_entropy': Array(0.12322807, dtype=float32)}


  9%|▉         | 87779/1000000 [4:01:14<38:18:38,  6.61it/s]

{'loss': Array(0.1344867, dtype=float32), 'loss_cross_entropy': Array(0.12630728, dtype=float32)}


  9%|▉         | 87789/1000000 [4:01:15<25:59:14,  9.75it/s]

{'loss': Array(0.15164626, dtype=float32), 'loss_cross_entropy': Array(0.14279053, dtype=float32)}


  9%|▉         | 87799/1000000 [4:01:17<23:50:47, 10.63it/s]

{'loss': Array(0.145197, dtype=float32), 'loss_cross_entropy': Array(0.13611825, dtype=float32)}


  9%|▉         | 87809/1000000 [4:01:19<30:50:50,  8.21it/s]

{'loss': Array(0.16137911, dtype=float32), 'loss_cross_entropy': Array(0.15213186, dtype=float32)}


  9%|▉         | 87819/1000000 [4:01:20<25:19:02, 10.01it/s]

{'loss': Array(0.13308354, dtype=float32), 'loss_cross_entropy': Array(0.12543778, dtype=float32)}


  9%|▉         | 87829/1000000 [4:01:22<33:12:33,  7.63it/s]

{'loss': Array(0.15531544, dtype=float32), 'loss_cross_entropy': Array(0.14618348, dtype=float32)}


  9%|▉         | 87839/1000000 [4:01:23<24:45:43, 10.23it/s]

{'loss': Array(0.14729737, dtype=float32), 'loss_cross_entropy': Array(0.13896471, dtype=float32)}


  9%|▉         | 87849/1000000 [4:01:25<33:17:09,  7.61it/s]

{'loss': Array(0.15258925, dtype=float32), 'loss_cross_entropy': Array(0.14354944, dtype=float32)}


  9%|▉         | 87859/1000000 [4:01:26<24:57:47, 10.15it/s]

{'loss': Array(0.12631145, dtype=float32), 'loss_cross_entropy': Array(0.1184034, dtype=float32)}


  9%|▉         | 87869/1000000 [4:01:27<23:33:04, 10.76it/s]

{'loss': Array(0.14068048, dtype=float32), 'loss_cross_entropy': Array(0.13238733, dtype=float32)}


  9%|▉         | 87879/1000000 [4:01:29<27:43:00,  9.14it/s]

{'loss': Array(0.1622665, dtype=float32), 'loss_cross_entropy': Array(0.15319394, dtype=float32)}


  9%|▉         | 87889/1000000 [4:01:30<24:26:38, 10.37it/s]

{'loss': Array(0.15031637, dtype=float32), 'loss_cross_entropy': Array(0.14166915, dtype=float32)}


  9%|▉         | 87899/1000000 [4:01:32<30:15:25,  8.37it/s]

{'loss': Array(0.14596549, dtype=float32), 'loss_cross_entropy': Array(0.13816795, dtype=float32)}


  9%|▉         | 87909/1000000 [4:01:33<25:01:17, 10.13it/s]

{'loss': Array(0.15449184, dtype=float32), 'loss_cross_entropy': Array(0.14568236, dtype=float32)}


  9%|▉         | 87919/1000000 [4:01:35<37:32:40,  6.75it/s]

{'loss': Array(0.14932783, dtype=float32), 'loss_cross_entropy': Array(0.14023961, dtype=float32)}


  9%|▉         | 87929/1000000 [4:01:37<25:43:01,  9.85it/s]

{'loss': Array(0.15649058, dtype=float32), 'loss_cross_entropy': Array(0.14813371, dtype=float32)}


  9%|▉         | 87939/1000000 [4:01:38<37:13:31,  6.81it/s]

{'loss': Array(0.16068068, dtype=float32), 'loss_cross_entropy': Array(0.15168688, dtype=float32)}


  9%|▉         | 87949/1000000 [4:01:40<25:33:33,  9.91it/s]

{'loss': Array(0.14524658, dtype=float32), 'loss_cross_entropy': Array(0.13671558, dtype=float32)}


  9%|▉         | 87959/1000000 [4:01:41<24:11:20, 10.47it/s]

{'loss': Array(0.14064768, dtype=float32), 'loss_cross_entropy': Array(0.13222913, dtype=float32)}


  9%|▉         | 87969/1000000 [4:01:43<33:44:56,  7.51it/s]

{'loss': Array(0.1346586, dtype=float32), 'loss_cross_entropy': Array(0.12600325, dtype=float32)}


  9%|▉         | 87979/1000000 [4:01:44<25:09:49, 10.07it/s]

{'loss': Array(0.14368765, dtype=float32), 'loss_cross_entropy': Array(0.13541527, dtype=float32)}


  9%|▉         | 87989/1000000 [4:01:46<37:36:46,  6.74it/s]

{'loss': Array(0.15548931, dtype=float32), 'loss_cross_entropy': Array(0.14607643, dtype=float32)}


  9%|▉         | 87999/1000000 [4:01:47<26:44:23,  9.47it/s]

{'loss': Array(0.14580555, dtype=float32), 'loss_cross_entropy': Array(0.13746873, dtype=float32)}


  9%|▉         | 88009/1000000 [4:01:55<85:01:03,  2.98it/s] 

{'loss': Array(0.15060304, dtype=float32), 'loss_cross_entropy': Array(0.14208418, dtype=float32)}


  9%|▉         | 88019/1000000 [4:01:57<39:19:41,  6.44it/s] 

{'loss': Array(0.15827587, dtype=float32), 'loss_cross_entropy': Array(0.1490268, dtype=float32)}


  9%|▉         | 88029/1000000 [4:01:58<26:32:19,  9.55it/s]

{'loss': Array(0.1365405, dtype=float32), 'loss_cross_entropy': Array(0.12841842, dtype=float32)}


  9%|▉         | 88039/1000000 [4:02:00<28:27:56,  8.90it/s]

{'loss': Array(0.13200496, dtype=float32), 'loss_cross_entropy': Array(0.1249847, dtype=float32)}


  9%|▉         | 88049/1000000 [4:02:02<25:30:01,  9.93it/s]

{'loss': Array(0.16530299, dtype=float32), 'loss_cross_entropy': Array(0.15578401, dtype=float32)}


  9%|▉         | 88059/1000000 [4:02:03<38:43:26,  6.54it/s]

{'loss': Array(0.15628119, dtype=float32), 'loss_cross_entropy': Array(0.1468917, dtype=float32)}


  9%|▉         | 88069/1000000 [4:02:05<26:56:59,  9.40it/s]

{'loss': Array(0.14198653, dtype=float32), 'loss_cross_entropy': Array(0.13395338, dtype=float32)}


  9%|▉         | 88079/1000000 [4:02:06<25:08:52, 10.07it/s]

{'loss': Array(0.14643738, dtype=float32), 'loss_cross_entropy': Array(0.13789938, dtype=float32)}


  9%|▉         | 88089/1000000 [4:02:08<31:23:37,  8.07it/s]

{'loss': Array(0.14954841, dtype=float32), 'loss_cross_entropy': Array(0.1406142, dtype=float32)}


  9%|▉         | 88099/1000000 [4:02:09<25:13:28, 10.04it/s]

{'loss': Array(0.17291377, dtype=float32), 'loss_cross_entropy': Array(0.16316551, dtype=float32)}


  9%|▉         | 88109/1000000 [4:02:11<37:29:45,  6.76it/s]

{'loss': Array(0.16026415, dtype=float32), 'loss_cross_entropy': Array(0.15106589, dtype=float32)}


  9%|▉         | 88119/1000000 [4:02:12<25:28:33,  9.94it/s]

{'loss': Array(0.15740716, dtype=float32), 'loss_cross_entropy': Array(0.148445, dtype=float32)}


  9%|▉         | 88129/1000000 [4:02:14<23:35:35, 10.74it/s]

{'loss': Array(0.15392485, dtype=float32), 'loss_cross_entropy': Array(0.14509843, dtype=float32)}


  9%|▉         | 88139/1000000 [4:02:15<27:35:27,  9.18it/s]

{'loss': Array(0.15789744, dtype=float32), 'loss_cross_entropy': Array(0.14924948, dtype=float32)}


  9%|▉         | 88149/1000000 [4:02:17<23:39:24, 10.71it/s]

{'loss': Array(0.14637707, dtype=float32), 'loss_cross_entropy': Array(0.13780878, dtype=float32)}


  9%|▉         | 88159/1000000 [4:02:18<32:44:09,  7.74it/s]

{'loss': Array(0.1486481, dtype=float32), 'loss_cross_entropy': Array(0.14032021, dtype=float32)}


  9%|▉         | 88169/1000000 [4:02:20<24:55:45, 10.16it/s]

{'loss': Array(0.13634942, dtype=float32), 'loss_cross_entropy': Array(0.12861328, dtype=float32)}


  9%|▉         | 88179/1000000 [4:02:21<23:35:21, 10.74it/s]

{'loss': Array(0.15792543, dtype=float32), 'loss_cross_entropy': Array(0.14942454, dtype=float32)}


  9%|▉         | 88189/1000000 [4:02:23<29:37:02,  8.55it/s]

{'loss': Array(0.13018963, dtype=float32), 'loss_cross_entropy': Array(0.12249877, dtype=float32)}


  9%|▉         | 88199/1000000 [4:02:24<24:06:28, 10.51it/s]

{'loss': Array(0.14601158, dtype=float32), 'loss_cross_entropy': Array(0.1368813, dtype=float32)}


  9%|▉         | 88209/1000000 [4:02:26<23:57:06, 10.57it/s]

{'loss': Array(0.13781093, dtype=float32), 'loss_cross_entropy': Array(0.12930869, dtype=float32)}


  9%|▉         | 88219/1000000 [4:02:27<26:23:16,  9.60it/s]

{'loss': Array(0.16020742, dtype=float32), 'loss_cross_entropy': Array(0.15066114, dtype=float32)}


  9%|▉         | 88229/1000000 [4:02:29<24:09:38, 10.48it/s]

{'loss': Array(0.13100232, dtype=float32), 'loss_cross_entropy': Array(0.1230446, dtype=float32)}


  9%|▉         | 88239/1000000 [4:02:30<29:59:27,  8.44it/s]

{'loss': Array(0.17033455, dtype=float32), 'loss_cross_entropy': Array(0.16078213, dtype=float32)}


  9%|▉         | 88249/1000000 [4:02:32<24:29:12, 10.34it/s]

{'loss': Array(0.14655839, dtype=float32), 'loss_cross_entropy': Array(0.13797966, dtype=float32)}


  9%|▉         | 88259/1000000 [4:02:33<36:54:55,  6.86it/s]

{'loss': Array(0.14667323, dtype=float32), 'loss_cross_entropy': Array(0.13818054, dtype=float32)}


  9%|▉         | 88269/1000000 [4:02:35<25:24:15,  9.97it/s]

{'loss': Array(0.14578684, dtype=float32), 'loss_cross_entropy': Array(0.13734357, dtype=float32)}


  9%|▉         | 88279/1000000 [4:02:36<37:19:11,  6.79it/s]

{'loss': Array(0.1568697, dtype=float32), 'loss_cross_entropy': Array(0.14792359, dtype=float32)}


  9%|▉         | 88289/1000000 [4:02:38<26:01:22,  9.73it/s]

{'loss': Array(0.14698024, dtype=float32), 'loss_cross_entropy': Array(0.13853608, dtype=float32)}


  9%|▉         | 88299/1000000 [4:02:39<23:46:00, 10.66it/s]

{'loss': Array(0.14843832, dtype=float32), 'loss_cross_entropy': Array(0.1394157, dtype=float32)}


  9%|▉         | 88309/1000000 [4:02:41<33:10:04,  7.64it/s]

{'loss': Array(0.15318084, dtype=float32), 'loss_cross_entropy': Array(0.14433923, dtype=float32)}


  9%|▉         | 88319/1000000 [4:02:42<24:56:37, 10.15it/s]

{'loss': Array(0.14761893, dtype=float32), 'loss_cross_entropy': Array(0.13886411, dtype=float32)}


  9%|▉         | 88329/1000000 [4:02:44<37:58:57,  6.67it/s]

{'loss': Array(0.16549662, dtype=float32), 'loss_cross_entropy': Array(0.15618156, dtype=float32)}


  9%|▉         | 88339/1000000 [4:02:45<25:54:45,  9.77it/s]

{'loss': Array(0.15262619, dtype=float32), 'loss_cross_entropy': Array(0.14453755, dtype=float32)}


  9%|▉         | 88349/1000000 [4:02:47<23:48:40, 10.64it/s]

{'loss': Array(0.16556051, dtype=float32), 'loss_cross_entropy': Array(0.15641914, dtype=float32)}


  9%|▉         | 88359/1000000 [4:02:48<26:13:53,  9.65it/s]

{'loss': Array(0.15358736, dtype=float32), 'loss_cross_entropy': Array(0.1449876, dtype=float32)}


  9%|▉         | 88369/1000000 [4:02:50<24:15:49, 10.44it/s]

{'loss': Array(0.14578475, dtype=float32), 'loss_cross_entropy': Array(0.13741203, dtype=float32)}


  9%|▉         | 88379/1000000 [4:02:51<26:49:44,  9.44it/s]

{'loss': Array(0.1391261, dtype=float32), 'loss_cross_entropy': Array(0.13077849, dtype=float32)}


  9%|▉         | 88389/1000000 [4:02:53<24:18:42, 10.42it/s]

{'loss': Array(0.14127894, dtype=float32), 'loss_cross_entropy': Array(0.13314421, dtype=float32)}


  9%|▉         | 88399/1000000 [4:02:55<37:52:47,  6.68it/s]

{'loss': Array(0.16367875, dtype=float32), 'loss_cross_entropy': Array(0.15455897, dtype=float32)}


  9%|▉         | 88409/1000000 [4:02:56<26:06:10,  9.70it/s]

{'loss': Array(0.13987526, dtype=float32), 'loss_cross_entropy': Array(0.13183664, dtype=float32)}


  9%|▉         | 88419/1000000 [4:02:57<23:48:04, 10.64it/s]

{'loss': Array(0.13844849, dtype=float32), 'loss_cross_entropy': Array(0.12995298, dtype=float32)}


  9%|▉         | 88429/1000000 [4:02:59<29:51:06,  8.48it/s]

{'loss': Array(0.16604656, dtype=float32), 'loss_cross_entropy': Array(0.15660278, dtype=float32)}


  9%|▉         | 88439/1000000 [4:03:00<25:01:16, 10.12it/s]

{'loss': Array(0.15493242, dtype=float32), 'loss_cross_entropy': Array(0.14631802, dtype=float32)}


  9%|▉         | 88449/1000000 [4:03:02<38:10:44,  6.63it/s]

{'loss': Array(0.15680616, dtype=float32), 'loss_cross_entropy': Array(0.14807184, dtype=float32)}


  9%|▉         | 88459/1000000 [4:03:03<27:04:01,  9.35it/s]

{'loss': Array(0.15347904, dtype=float32), 'loss_cross_entropy': Array(0.1447227, dtype=float32)}


  9%|▉         | 88469/1000000 [4:03:05<25:42:43,  9.85it/s]

{'loss': Array(0.14610215, dtype=float32), 'loss_cross_entropy': Array(0.13752747, dtype=float32)}


  9%|▉         | 88479/1000000 [4:03:07<30:15:54,  8.37it/s]

{'loss': Array(0.1507236, dtype=float32), 'loss_cross_entropy': Array(0.14217685, dtype=float32)}


  9%|▉         | 88489/1000000 [4:03:08<26:00:32,  9.73it/s]

{'loss': Array(0.14898288, dtype=float32), 'loss_cross_entropy': Array(0.13995719, dtype=float32)}


  9%|▉         | 88499/1000000 [4:03:10<34:10:35,  7.41it/s]

{'loss': Array(0.1465684, dtype=float32), 'loss_cross_entropy': Array(0.13809161, dtype=float32)}


  9%|▉         | 88509/1000000 [4:03:18<83:29:14,  3.03it/s] 

{'loss': Array(0.15105264, dtype=float32), 'loss_cross_entropy': Array(0.14237283, dtype=float32)}


  9%|▉         | 88519/1000000 [4:03:19<43:26:32,  5.83it/s]

{'loss': Array(0.16746852, dtype=float32), 'loss_cross_entropy': Array(0.15770343, dtype=float32)}


  9%|▉         | 88529/1000000 [4:03:21<26:26:26,  9.58it/s]

{'loss': Array(0.14675522, dtype=float32), 'loss_cross_entropy': Array(0.13812767, dtype=float32)}


  9%|▉         | 88539/1000000 [4:03:22<24:14:42, 10.44it/s]

{'loss': Array(0.15420626, dtype=float32), 'loss_cross_entropy': Array(0.14548443, dtype=float32)}


  9%|▉         | 88549/1000000 [4:03:24<30:33:45,  8.28it/s]

{'loss': Array(0.15571229, dtype=float32), 'loss_cross_entropy': Array(0.1469788, dtype=float32)}


  9%|▉         | 88559/1000000 [4:03:25<25:42:58,  9.85it/s]

{'loss': Array(0.15897627, dtype=float32), 'loss_cross_entropy': Array(0.1497335, dtype=float32)}


  9%|▉         | 88569/1000000 [4:03:27<23:35:30, 10.73it/s]

{'loss': Array(0.15308605, dtype=float32), 'loss_cross_entropy': Array(0.14432509, dtype=float32)}


  9%|▉         | 88579/1000000 [4:03:28<26:45:58,  9.46it/s]

{'loss': Array(0.14836071, dtype=float32), 'loss_cross_entropy': Array(0.13984747, dtype=float32)}


  9%|▉         | 88589/1000000 [4:03:30<24:05:09, 10.51it/s]

{'loss': Array(0.12320179, dtype=float32), 'loss_cross_entropy': Array(0.11572184, dtype=float32)}


  9%|▉         | 88599/1000000 [4:03:31<30:06:41,  8.41it/s]

{'loss': Array(0.14144655, dtype=float32), 'loss_cross_entropy': Array(0.13310914, dtype=float32)}


  9%|▉         | 88609/1000000 [4:03:33<24:17:05, 10.42it/s]

{'loss': Array(0.1541432, dtype=float32), 'loss_cross_entropy': Array(0.14503233, dtype=float32)}


  9%|▉         | 88619/1000000 [4:03:34<33:10:24,  7.63it/s]

{'loss': Array(0.1585799, dtype=float32), 'loss_cross_entropy': Array(0.14900671, dtype=float32)}


  9%|▉         | 88629/1000000 [4:03:36<25:25:13,  9.96it/s]

{'loss': Array(0.14660196, dtype=float32), 'loss_cross_entropy': Array(0.13798337, dtype=float32)}


  9%|▉         | 88639/1000000 [4:03:37<23:52:59, 10.60it/s]

{'loss': Array(0.14634363, dtype=float32), 'loss_cross_entropy': Array(0.1383019, dtype=float32)}


  9%|▉         | 88649/1000000 [4:03:39<30:43:17,  8.24it/s]

{'loss': Array(0.15143049, dtype=float32), 'loss_cross_entropy': Array(0.14235413, dtype=float32)}


  9%|▉         | 88659/1000000 [4:03:40<24:58:54, 10.13it/s]

{'loss': Array(0.14366053, dtype=float32), 'loss_cross_entropy': Array(0.13538566, dtype=float32)}


  9%|▉         | 88669/1000000 [4:03:42<23:52:46, 10.60it/s]

{'loss': Array(0.15995118, dtype=float32), 'loss_cross_entropy': Array(0.15126285, dtype=float32)}


  9%|▉         | 88679/1000000 [4:03:43<26:33:36,  9.53it/s]

{'loss': Array(0.14892735, dtype=float32), 'loss_cross_entropy': Array(0.14068909, dtype=float32)}


  9%|▉         | 88689/1000000 [4:03:45<23:39:46, 10.70it/s]

{'loss': Array(0.14802168, dtype=float32), 'loss_cross_entropy': Array(0.1393503, dtype=float32)}


  9%|▉         | 88699/1000000 [4:03:46<30:23:21,  8.33it/s]

{'loss': Array(0.14849396, dtype=float32), 'loss_cross_entropy': Array(0.14031918, dtype=float32)}


  9%|▉         | 88709/1000000 [4:03:48<24:48:58, 10.20it/s]

{'loss': Array(0.14694598, dtype=float32), 'loss_cross_entropy': Array(0.13782312, dtype=float32)}


  9%|▉         | 88719/1000000 [4:03:50<37:29:18,  6.75it/s]

{'loss': Array(0.16499801, dtype=float32), 'loss_cross_entropy': Array(0.15642495, dtype=float32)}


  9%|▉         | 88729/1000000 [4:03:51<26:24:12,  9.59it/s]

{'loss': Array(0.15377776, dtype=float32), 'loss_cross_entropy': Array(0.14478545, dtype=float32)}


  9%|▉         | 88739/1000000 [4:03:52<23:30:36, 10.77it/s]

{'loss': Array(0.14723776, dtype=float32), 'loss_cross_entropy': Array(0.13897727, dtype=float32)}


  9%|▉         | 88749/1000000 [4:03:54<32:32:23,  7.78it/s]

{'loss': Array(0.14310597, dtype=float32), 'loss_cross_entropy': Array(0.13447267, dtype=float32)}


  9%|▉         | 88759/1000000 [4:03:55<24:45:05, 10.23it/s]

{'loss': Array(0.14448848, dtype=float32), 'loss_cross_entropy': Array(0.13668597, dtype=float32)}


  9%|▉         | 88769/1000000 [4:03:57<23:27:07, 10.79it/s]

{'loss': Array(0.15027401, dtype=float32), 'loss_cross_entropy': Array(0.14134972, dtype=float32)}


  9%|▉         | 88779/1000000 [4:03:58<28:52:41,  8.76it/s]

{'loss': Array(0.1347013, dtype=float32), 'loss_cross_entropy': Array(0.12675153, dtype=float32)}


  9%|▉         | 88789/1000000 [4:04:00<25:06:56, 10.08it/s]

{'loss': Array(0.13734548, dtype=float32), 'loss_cross_entropy': Array(0.12895294, dtype=float32)}


  9%|▉         | 88799/1000000 [4:04:02<32:02:30,  7.90it/s]

{'loss': Array(0.15294279, dtype=float32), 'loss_cross_entropy': Array(0.14360629, dtype=float32)}


  9%|▉         | 88809/1000000 [4:04:03<25:35:26,  9.89it/s]

{'loss': Array(0.12573731, dtype=float32), 'loss_cross_entropy': Array(0.11788604, dtype=float32)}


  9%|▉         | 88819/1000000 [4:04:05<25:14:52, 10.02it/s]

{'loss': Array(0.13841516, dtype=float32), 'loss_cross_entropy': Array(0.13006845, dtype=float32)}


  9%|▉         | 88829/1000000 [4:04:06<28:18:19,  8.94it/s]

{'loss': Array(0.13642293, dtype=float32), 'loss_cross_entropy': Array(0.12793481, dtype=float32)}


  9%|▉         | 88839/1000000 [4:04:08<25:47:16,  9.81it/s]

{'loss': Array(0.15370345, dtype=float32), 'loss_cross_entropy': Array(0.14494999, dtype=float32)}


  9%|▉         | 88849/1000000 [4:04:10<34:34:34,  7.32it/s]

{'loss': Array(0.15085562, dtype=float32), 'loss_cross_entropy': Array(0.1422601, dtype=float32)}


  9%|▉         | 88859/1000000 [4:04:11<26:39:03,  9.50it/s]

{'loss': Array(0.13674915, dtype=float32), 'loss_cross_entropy': Array(0.12866712, dtype=float32)}


  9%|▉         | 88869/1000000 [4:04:12<24:19:39, 10.40it/s]

{'loss': Array(0.15428385, dtype=float32), 'loss_cross_entropy': Array(0.1461217, dtype=float32)}


  9%|▉         | 88879/1000000 [4:04:14<26:45:36,  9.46it/s]

{'loss': Array(0.1463414, dtype=float32), 'loss_cross_entropy': Array(0.13821918, dtype=float32)}


  9%|▉         | 88889/1000000 [4:04:15<24:12:11, 10.46it/s]

{'loss': Array(0.12992406, dtype=float32), 'loss_cross_entropy': Array(0.12160318, dtype=float32)}


  9%|▉         | 88899/1000000 [4:04:17<28:06:33,  9.00it/s]

{'loss': Array(0.14762239, dtype=float32), 'loss_cross_entropy': Array(0.1386183, dtype=float32)}


  9%|▉         | 88909/1000000 [4:04:18<24:09:38, 10.47it/s]

{'loss': Array(0.13891348, dtype=float32), 'loss_cross_entropy': Array(0.13067167, dtype=float32)}


  9%|▉         | 88919/1000000 [4:04:20<38:06:16,  6.64it/s]

{'loss': Array(0.14795017, dtype=float32), 'loss_cross_entropy': Array(0.1390747, dtype=float32)}


  9%|▉         | 88929/1000000 [4:04:22<26:24:14,  9.58it/s]

{'loss': Array(0.13937533, dtype=float32), 'loss_cross_entropy': Array(0.13152297, dtype=float32)}


  9%|▉         | 88939/1000000 [4:04:23<24:05:40, 10.50it/s]

{'loss': Array(0.12835836, dtype=float32), 'loss_cross_entropy': Array(0.12078726, dtype=float32)}


  9%|▉         | 88949/1000000 [4:04:25<30:30:35,  8.29it/s]

{'loss': Array(0.14418866, dtype=float32), 'loss_cross_entropy': Array(0.13554053, dtype=float32)}


  9%|▉         | 88959/1000000 [4:04:26<24:30:12, 10.33it/s]

{'loss': Array(0.1631052, dtype=float32), 'loss_cross_entropy': Array(0.15345752, dtype=float32)}


  9%|▉         | 88969/1000000 [4:04:28<33:01:32,  7.66it/s]

{'loss': Array(0.1491747, dtype=float32), 'loss_cross_entropy': Array(0.14078104, dtype=float32)}


  9%|▉         | 88979/1000000 [4:04:29<25:03:34, 10.10it/s]

{'loss': Array(0.16817017, dtype=float32), 'loss_cross_entropy': Array(0.15873809, dtype=float32)}


  9%|▉         | 88989/1000000 [4:04:31<33:02:31,  7.66it/s]

{'loss': Array(0.14804964, dtype=float32), 'loss_cross_entropy': Array(0.13931516, dtype=float32)}


  9%|▉         | 88999/1000000 [4:04:32<25:15:44, 10.02it/s]

{'loss': Array(0.15705031, dtype=float32), 'loss_cross_entropy': Array(0.14786129, dtype=float32)}


  9%|▉         | 89009/1000000 [4:04:40<81:26:57,  3.11it/s] 

{'loss': Array(0.14917992, dtype=float32), 'loss_cross_entropy': Array(0.14065187, dtype=float32)}


  9%|▉         | 89019/1000000 [4:04:42<38:50:31,  6.51it/s]

{'loss': Array(0.15310808, dtype=float32), 'loss_cross_entropy': Array(0.14424892, dtype=float32)}


  9%|▉         | 89029/1000000 [4:04:43<25:53:36,  9.77it/s]

{'loss': Array(0.14945485, dtype=float32), 'loss_cross_entropy': Array(0.14072989, dtype=float32)}


  9%|▉         | 89039/1000000 [4:04:45<33:44:34,  7.50it/s]

{'loss': Array(0.16352956, dtype=float32), 'loss_cross_entropy': Array(0.15474834, dtype=float32)}


  9%|▉         | 89049/1000000 [4:04:46<26:08:09,  9.68it/s]

{'loss': Array(0.14322841, dtype=float32), 'loss_cross_entropy': Array(0.13445878, dtype=float32)}


  9%|▉         | 89059/1000000 [4:04:48<37:29:13,  6.75it/s]

{'loss': Array(0.1604704, dtype=float32), 'loss_cross_entropy': Array(0.15223311, dtype=float32)}


  9%|▉         | 89069/1000000 [4:04:49<26:10:10,  9.67it/s]

{'loss': Array(0.14502661, dtype=float32), 'loss_cross_entropy': Array(0.13608575, dtype=float32)}


  9%|▉         | 89079/1000000 [4:04:51<23:55:23, 10.58it/s]

{'loss': Array(0.14997968, dtype=float32), 'loss_cross_entropy': Array(0.14096588, dtype=float32)}


  9%|▉         | 89089/1000000 [4:04:52<30:48:34,  8.21it/s]

{'loss': Array(0.15955743, dtype=float32), 'loss_cross_entropy': Array(0.15070532, dtype=float32)}


  9%|▉         | 89099/1000000 [4:04:54<24:58:53, 10.13it/s]

{'loss': Array(0.16362837, dtype=float32), 'loss_cross_entropy': Array(0.15446806, dtype=float32)}


  9%|▉         | 89109/1000000 [4:04:56<33:35:57,  7.53it/s]

{'loss': Array(0.15098488, dtype=float32), 'loss_cross_entropy': Array(0.14221562, dtype=float32)}


  9%|▉         | 89119/1000000 [4:04:57<24:58:02, 10.13it/s]

{'loss': Array(0.15587732, dtype=float32), 'loss_cross_entropy': Array(0.14650238, dtype=float32)}


  9%|▉         | 89129/1000000 [4:04:58<24:00:08, 10.54it/s]

{'loss': Array(0.15638296, dtype=float32), 'loss_cross_entropy': Array(0.14725338, dtype=float32)}


  9%|▉         | 89139/1000000 [4:05:00<28:04:56,  9.01it/s]

{'loss': Array(0.14337651, dtype=float32), 'loss_cross_entropy': Array(0.13466237, dtype=float32)}


  9%|▉         | 89149/1000000 [4:05:01<25:10:45, 10.05it/s]

{'loss': Array(0.14863698, dtype=float32), 'loss_cross_entropy': Array(0.14011152, dtype=float32)}


  9%|▉         | 89159/1000000 [4:05:03<38:34:57,  6.56it/s]

{'loss': Array(0.1371602, dtype=float32), 'loss_cross_entropy': Array(0.12961297, dtype=float32)}


  9%|▉         | 89169/1000000 [4:05:05<26:15:54,  9.63it/s]

{'loss': Array(0.14505422, dtype=float32), 'loss_cross_entropy': Array(0.13706899, dtype=float32)}


  9%|▉         | 89179/1000000 [4:05:06<24:56:39, 10.14it/s]

{'loss': Array(0.14432089, dtype=float32), 'loss_cross_entropy': Array(0.13616748, dtype=float32)}


  9%|▉         | 89189/1000000 [4:05:08<31:48:31,  7.95it/s]

{'loss': Array(0.14237815, dtype=float32), 'loss_cross_entropy': Array(0.13401924, dtype=float32)}


  9%|▉         | 89199/1000000 [4:05:09<26:12:13,  9.66it/s]

{'loss': Array(0.15901826, dtype=float32), 'loss_cross_entropy': Array(0.15070485, dtype=float32)}


  9%|▉         | 89209/1000000 [4:05:11<33:57:47,  7.45it/s]

{'loss': Array(0.13553743, dtype=float32), 'loss_cross_entropy': Array(0.12756948, dtype=float32)}


  9%|▉         | 89219/1000000 [4:05:12<25:19:14,  9.99it/s]

{'loss': Array(0.14529994, dtype=float32), 'loss_cross_entropy': Array(0.13745047, dtype=float32)}


  9%|▉         | 89229/1000000 [4:05:14<33:14:02,  7.61it/s]

{'loss': Array(0.13498913, dtype=float32), 'loss_cross_entropy': Array(0.12708993, dtype=float32)}


  9%|▉         | 89239/1000000 [4:05:15<25:44:41,  9.83it/s]

{'loss': Array(0.16465317, dtype=float32), 'loss_cross_entropy': Array(0.15518379, dtype=float32)}


  9%|▉         | 89249/1000000 [4:05:17<23:42:26, 10.67it/s]

{'loss': Array(0.15229402, dtype=float32), 'loss_cross_entropy': Array(0.14423385, dtype=float32)}


  9%|▉         | 89259/1000000 [4:05:19<28:14:16,  8.96it/s]

{'loss': Array(0.14573607, dtype=float32), 'loss_cross_entropy': Array(0.13751051, dtype=float32)}


  9%|▉         | 89269/1000000 [4:05:20<24:01:47, 10.53it/s]

{'loss': Array(0.134057, dtype=float32), 'loss_cross_entropy': Array(0.12623827, dtype=float32)}


  9%|▉         | 89279/1000000 [4:05:22<30:16:31,  8.36it/s]

{'loss': Array(0.12871194, dtype=float32), 'loss_cross_entropy': Array(0.12044108, dtype=float32)}


  9%|▉         | 89289/1000000 [4:05:23<24:53:10, 10.17it/s]

{'loss': Array(0.14502399, dtype=float32), 'loss_cross_entropy': Array(0.13652717, dtype=float32)}


  9%|▉         | 89299/1000000 [4:05:25<37:18:15,  6.78it/s]

{'loss': Array(0.16807276, dtype=float32), 'loss_cross_entropy': Array(0.15888643, dtype=float32)}


  9%|▉         | 89309/1000000 [4:05:26<25:36:35,  9.88it/s]

{'loss': Array(0.12641206, dtype=float32), 'loss_cross_entropy': Array(0.11903741, dtype=float32)}


  9%|▉         | 89319/1000000 [4:05:28<37:32:35,  6.74it/s]

{'loss': Array(0.14486088, dtype=float32), 'loss_cross_entropy': Array(0.13635746, dtype=float32)}


  9%|▉         | 89329/1000000 [4:05:29<25:51:23,  9.78it/s]

{'loss': Array(0.14758752, dtype=float32), 'loss_cross_entropy': Array(0.13899876, dtype=float32)}


  9%|▉         | 89339/1000000 [4:05:31<23:37:51, 10.70it/s]

{'loss': Array(0.13179271, dtype=float32), 'loss_cross_entropy': Array(0.12362597, dtype=float32)}


  9%|▉         | 89349/1000000 [4:05:32<32:55:23,  7.68it/s]

{'loss': Array(0.12754714, dtype=float32), 'loss_cross_entropy': Array(0.11967515, dtype=float32)}


  9%|▉         | 89359/1000000 [4:05:34<25:08:24, 10.06it/s]

{'loss': Array(0.15901542, dtype=float32), 'loss_cross_entropy': Array(0.15022638, dtype=float32)}


  9%|▉         | 89369/1000000 [4:05:35<37:40:33,  6.71it/s]

{'loss': Array(0.14588225, dtype=float32), 'loss_cross_entropy': Array(0.13706855, dtype=float32)}


  9%|▉         | 89379/1000000 [4:05:37<25:53:51,  9.77it/s]

{'loss': Array(0.1599979, dtype=float32), 'loss_cross_entropy': Array(0.15053822, dtype=float32)}


  9%|▉         | 89389/1000000 [4:05:38<24:19:40, 10.40it/s]

{'loss': Array(0.1540239, dtype=float32), 'loss_cross_entropy': Array(0.14521195, dtype=float32)}


  9%|▉         | 89399/1000000 [4:05:40<27:12:03,  9.30it/s]

{'loss': Array(0.1371114, dtype=float32), 'loss_cross_entropy': Array(0.12938333, dtype=float32)}


  9%|▉         | 89409/1000000 [4:05:41<23:29:19, 10.77it/s]

{'loss': Array(0.13306393, dtype=float32), 'loss_cross_entropy': Array(0.12549226, dtype=float32)}


  9%|▉         | 89419/1000000 [4:05:43<26:24:23,  9.58it/s]

{'loss': Array(0.13549228, dtype=float32), 'loss_cross_entropy': Array(0.12766121, dtype=float32)}


  9%|▉         | 89429/1000000 [4:05:44<23:55:56, 10.57it/s]

{'loss': Array(0.14791782, dtype=float32), 'loss_cross_entropy': Array(0.1387607, dtype=float32)}


  9%|▉         | 89439/1000000 [4:05:46<37:38:25,  6.72it/s]

{'loss': Array(0.13981532, dtype=float32), 'loss_cross_entropy': Array(0.13146327, dtype=float32)}


  9%|▉         | 89449/1000000 [4:05:47<25:56:16,  9.75it/s]

{'loss': Array(0.15398282, dtype=float32), 'loss_cross_entropy': Array(0.14550535, dtype=float32)}


  9%|▉         | 89459/1000000 [4:05:49<23:33:35, 10.74it/s]

{'loss': Array(0.14668201, dtype=float32), 'loss_cross_entropy': Array(0.1386212, dtype=float32)}


  9%|▉         | 89469/1000000 [4:05:50<30:05:41,  8.40it/s]

{'loss': Array(0.1296858, dtype=float32), 'loss_cross_entropy': Array(0.12198774, dtype=float32)}


  9%|▉         | 89479/1000000 [4:05:52<24:32:35, 10.31it/s]

{'loss': Array(0.1574456, dtype=float32), 'loss_cross_entropy': Array(0.14875004, dtype=float32)}


  9%|▉         | 89489/1000000 [4:05:53<37:28:30,  6.75it/s]

{'loss': Array(0.15363415, dtype=float32), 'loss_cross_entropy': Array(0.1451823, dtype=float32)}


  9%|▉         | 89499/1000000 [4:05:55<25:55:35,  9.76it/s]

{'loss': Array(0.1440991, dtype=float32), 'loss_cross_entropy': Array(0.13560976, dtype=float32)}


  9%|▉         | 89509/1000000 [4:06:03<84:39:10,  2.99it/s] 

{'loss': Array(0.15680906, dtype=float32), 'loss_cross_entropy': Array(0.1476851, dtype=float32)}


  9%|▉         | 89519/1000000 [4:06:05<38:56:12,  6.50it/s]

{'loss': Array(0.1505699, dtype=float32), 'loss_cross_entropy': Array(0.14163272, dtype=float32)}


  9%|▉         | 89529/1000000 [4:06:06<26:39:43,  9.49it/s]

{'loss': Array(0.14499912, dtype=float32), 'loss_cross_entropy': Array(0.13678357, dtype=float32)}


  9%|▉         | 89539/1000000 [4:06:08<31:46:47,  7.96it/s]

{'loss': Array(0.14281793, dtype=float32), 'loss_cross_entropy': Array(0.13437554, dtype=float32)}


  9%|▉         | 89549/1000000 [4:06:09<25:57:47,  9.74it/s]

{'loss': Array(0.14100859, dtype=float32), 'loss_cross_entropy': Array(0.13354604, dtype=float32)}


  9%|▉         | 89559/1000000 [4:06:11<38:58:50,  6.49it/s]

{'loss': Array(0.12508248, dtype=float32), 'loss_cross_entropy': Array(0.1179653, dtype=float32)}


  9%|▉         | 89569/1000000 [4:06:12<26:35:54,  9.51it/s]

{'loss': Array(0.15878549, dtype=float32), 'loss_cross_entropy': Array(0.14927277, dtype=float32)}


  9%|▉         | 89579/1000000 [4:06:14<24:52:27, 10.17it/s]

{'loss': Array(0.14813994, dtype=float32), 'loss_cross_entropy': Array(0.13958812, dtype=float32)}


  9%|▉         | 89589/1000000 [4:06:16<27:55:23,  9.06it/s]

{'loss': Array(0.1454979, dtype=float32), 'loss_cross_entropy': Array(0.13690695, dtype=float32)}


  9%|▉         | 89599/1000000 [4:06:17<24:52:40, 10.17it/s]

{'loss': Array(0.14144933, dtype=float32), 'loss_cross_entropy': Array(0.13271724, dtype=float32)}


  9%|▉         | 89609/1000000 [4:06:19<37:11:05,  6.80it/s]

{'loss': Array(0.141299, dtype=float32), 'loss_cross_entropy': Array(0.13292341, dtype=float32)}


  9%|▉         | 89619/1000000 [4:06:20<25:42:39,  9.84it/s]

{'loss': Array(0.1298001, dtype=float32), 'loss_cross_entropy': Array(0.12217438, dtype=float32)}


  9%|▉         | 89629/1000000 [4:06:21<23:44:32, 10.65it/s]

{'loss': Array(0.16064055, dtype=float32), 'loss_cross_entropy': Array(0.1519769, dtype=float32)}


  9%|▉         | 89639/1000000 [4:06:23<28:03:34,  9.01it/s]

{'loss': Array(0.15108025, dtype=float32), 'loss_cross_entropy': Array(0.14227907, dtype=float32)}


  9%|▉         | 89649/1000000 [4:06:25<25:40:36,  9.85it/s]

{'loss': Array(0.146335, dtype=float32), 'loss_cross_entropy': Array(0.13736951, dtype=float32)}


  9%|▉         | 89659/1000000 [4:06:26<33:12:11,  7.62it/s]

{'loss': Array(0.15558372, dtype=float32), 'loss_cross_entropy': Array(0.14708938, dtype=float32)}


  9%|▉         | 89669/1000000 [4:06:28<25:01:02, 10.11it/s]

{'loss': Array(0.15629232, dtype=float32), 'loss_cross_entropy': Array(0.14740017, dtype=float32)}


  9%|▉         | 89679/1000000 [4:06:29<37:33:19,  6.73it/s]

{'loss': Array(0.17020579, dtype=float32), 'loss_cross_entropy': Array(0.16090104, dtype=float32)}


  9%|▉         | 89689/1000000 [4:06:31<25:35:38,  9.88it/s]

{'loss': Array(0.14657067, dtype=float32), 'loss_cross_entropy': Array(0.13795435, dtype=float32)}


  9%|▉         | 89699/1000000 [4:06:32<24:15:55, 10.42it/s]

{'loss': Array(0.17978217, dtype=float32), 'loss_cross_entropy': Array(0.17002313, dtype=float32)}


  9%|▉         | 89709/1000000 [4:06:34<29:58:19,  8.44it/s]

{'loss': Array(0.17096142, dtype=float32), 'loss_cross_entropy': Array(0.16182338, dtype=float32)}


  9%|▉         | 89719/1000000 [4:06:35<24:21:31, 10.38it/s]

{'loss': Array(0.1507893, dtype=float32), 'loss_cross_entropy': Array(0.14225684, dtype=float32)}


  9%|▉         | 89729/1000000 [4:06:37<33:44:33,  7.49it/s]

{'loss': Array(0.15020144, dtype=float32), 'loss_cross_entropy': Array(0.14187117, dtype=float32)}


  9%|▉         | 89739/1000000 [4:06:38<25:17:36, 10.00it/s]

{'loss': Array(0.16087686, dtype=float32), 'loss_cross_entropy': Array(0.15134838, dtype=float32)}


  9%|▉         | 89749/1000000 [4:06:40<23:44:50, 10.65it/s]

{'loss': Array(0.14948958, dtype=float32), 'loss_cross_entropy': Array(0.14085548, dtype=float32)}


  9%|▉         | 89759/1000000 [4:06:41<26:39:17,  9.49it/s]

{'loss': Array(0.15583886, dtype=float32), 'loss_cross_entropy': Array(0.14668606, dtype=float32)}


  9%|▉         | 89769/1000000 [4:06:43<23:58:16, 10.55it/s]

{'loss': Array(0.13891889, dtype=float32), 'loss_cross_entropy': Array(0.13065664, dtype=float32)}


  9%|▉         | 89779/1000000 [4:06:44<36:49:49,  6.86it/s]

{'loss': Array(0.1536609, dtype=float32), 'loss_cross_entropy': Array(0.14520861, dtype=float32)}


  9%|▉         | 89789/1000000 [4:06:46<25:31:25,  9.91it/s]

{'loss': Array(0.15052693, dtype=float32), 'loss_cross_entropy': Array(0.14267328, dtype=float32)}


  9%|▉         | 89799/1000000 [4:06:47<23:58:11, 10.55it/s]

{'loss': Array(0.14529414, dtype=float32), 'loss_cross_entropy': Array(0.13624883, dtype=float32)}


  9%|▉         | 89809/1000000 [4:06:49<29:44:45,  8.50it/s]

{'loss': Array(0.14272375, dtype=float32), 'loss_cross_entropy': Array(0.13410294, dtype=float32)}


  9%|▉         | 89819/1000000 [4:06:50<24:09:51, 10.46it/s]

{'loss': Array(0.14385657, dtype=float32), 'loss_cross_entropy': Array(0.13543074, dtype=float32)}


  9%|▉         | 89829/1000000 [4:06:52<33:09:51,  7.62it/s]

{'loss': Array(0.14314501, dtype=float32), 'loss_cross_entropy': Array(0.13457413, dtype=float32)}


  9%|▉         | 89839/1000000 [4:06:53<25:00:36, 10.11it/s]

{'loss': Array(0.14854376, dtype=float32), 'loss_cross_entropy': Array(0.13989402, dtype=float32)}


  9%|▉         | 89849/1000000 [4:06:55<33:06:50,  7.63it/s]

{'loss': Array(0.13689093, dtype=float32), 'loss_cross_entropy': Array(0.1285448, dtype=float32)}


  9%|▉         | 89859/1000000 [4:06:56<24:48:30, 10.19it/s]

{'loss': Array(0.15723442, dtype=float32), 'loss_cross_entropy': Array(0.14808516, dtype=float32)}


  9%|▉         | 89869/1000000 [4:06:57<23:49:32, 10.61it/s]

{'loss': Array(0.1283377, dtype=float32), 'loss_cross_entropy': Array(0.12050837, dtype=float32)}


  9%|▉         | 89879/1000000 [4:06:59<28:23:56,  8.90it/s]

{'loss': Array(0.13825561, dtype=float32), 'loss_cross_entropy': Array(0.13052976, dtype=float32)}


  9%|▉         | 89889/1000000 [4:07:01<24:42:56, 10.23it/s]

{'loss': Array(0.15983118, dtype=float32), 'loss_cross_entropy': Array(0.15110178, dtype=float32)}


  9%|▉         | 89899/1000000 [4:07:02<29:43:29,  8.50it/s]

{'loss': Array(0.14869149, dtype=float32), 'loss_cross_entropy': Array(0.14061688, dtype=float32)}


  9%|▉         | 89909/1000000 [4:07:04<24:15:11, 10.42it/s]

{'loss': Array(0.13399185, dtype=float32), 'loss_cross_entropy': Array(0.12651782, dtype=float32)}


  9%|▉         | 89919/1000000 [4:07:05<37:20:48,  6.77it/s]

{'loss': Array(0.13852476, dtype=float32), 'loss_cross_entropy': Array(0.13022842, dtype=float32)}


  9%|▉         | 89929/1000000 [4:07:07<25:44:23,  9.82it/s]

{'loss': Array(0.15558739, dtype=float32), 'loss_cross_entropy': Array(0.14729625, dtype=float32)}


  9%|▉         | 89939/1000000 [4:07:08<37:33:05,  6.73it/s]

{'loss': Array(0.1458993, dtype=float32), 'loss_cross_entropy': Array(0.13744448, dtype=float32)}


  9%|▉         | 89949/1000000 [4:07:10<26:20:34,  9.60it/s]

{'loss': Array(0.13798515, dtype=float32), 'loss_cross_entropy': Array(0.1304462, dtype=float32)}


  9%|▉         | 89959/1000000 [4:07:11<24:48:50, 10.19it/s]

{'loss': Array(0.15051943, dtype=float32), 'loss_cross_entropy': Array(0.14205892, dtype=float32)}


  9%|▉         | 89969/1000000 [4:07:13<34:49:22,  7.26it/s]

{'loss': Array(0.14828728, dtype=float32), 'loss_cross_entropy': Array(0.13954604, dtype=float32)}


  9%|▉         | 89979/1000000 [4:07:14<26:09:34,  9.66it/s]

{'loss': Array(0.15762763, dtype=float32), 'loss_cross_entropy': Array(0.1483732, dtype=float32)}


  9%|▉         | 89989/1000000 [4:07:16<38:59:27,  6.48it/s]

{'loss': Array(0.1450415, dtype=float32), 'loss_cross_entropy': Array(0.13725045, dtype=float32)}


  9%|▉         | 89999/1000000 [4:07:18<27:29:21,  9.20it/s]

{'loss': Array(0.16806363, dtype=float32), 'loss_cross_entropy': Array(0.1586306, dtype=float32)}


  9%|▉         | 90009/1000000 [4:07:26<83:57:04,  3.01it/s] 

{'loss': Array(0.13894469, dtype=float32), 'loss_cross_entropy': Array(0.13044737, dtype=float32)}


  9%|▉         | 90019/1000000 [4:07:27<36:44:21,  6.88it/s] 

{'loss': Array(0.13982663, dtype=float32), 'loss_cross_entropy': Array(0.13193743, dtype=float32)}


  9%|▉         | 90029/1000000 [4:07:29<25:59:06,  9.73it/s]

{'loss': Array(0.13657457, dtype=float32), 'loss_cross_entropy': Array(0.1286463, dtype=float32)}


  9%|▉         | 90039/1000000 [4:07:30<27:56:06,  9.05it/s]

{'loss': Array(0.1536351, dtype=float32), 'loss_cross_entropy': Array(0.14476743, dtype=float32)}


  9%|▉         | 90049/1000000 [4:07:32<24:53:41, 10.15it/s]

{'loss': Array(0.13553308, dtype=float32), 'loss_cross_entropy': Array(0.12776127, dtype=float32)}


  9%|▉         | 90059/1000000 [4:07:34<36:57:40,  6.84it/s]

{'loss': Array(0.1452531, dtype=float32), 'loss_cross_entropy': Array(0.13672976, dtype=float32)}


  9%|▉         | 90069/1000000 [4:07:35<25:39:52,  9.85it/s]

{'loss': Array(0.15345278, dtype=float32), 'loss_cross_entropy': Array(0.14491569, dtype=float32)}


  9%|▉         | 90079/1000000 [4:07:36<24:16:07, 10.41it/s]

{'loss': Array(0.13094412, dtype=float32), 'loss_cross_entropy': Array(0.12305764, dtype=float32)}


  9%|▉         | 90089/1000000 [4:07:38<30:10:14,  8.38it/s]

{'loss': Array(0.13653663, dtype=float32), 'loss_cross_entropy': Array(0.12814069, dtype=float32)}


  9%|▉         | 90099/1000000 [4:07:39<24:26:53, 10.34it/s]

{'loss': Array(0.14229453, dtype=float32), 'loss_cross_entropy': Array(0.13415836, dtype=float32)}


  9%|▉         | 90109/1000000 [4:07:41<37:10:47,  6.80it/s]

{'loss': Array(0.14955018, dtype=float32), 'loss_cross_entropy': Array(0.14090915, dtype=float32)}


  9%|▉         | 90119/1000000 [4:07:42<25:40:26,  9.84it/s]

{'loss': Array(0.1547996, dtype=float32), 'loss_cross_entropy': Array(0.14638151, dtype=float32)}


  9%|▉         | 90129/1000000 [4:07:44<23:31:12, 10.75it/s]

{'loss': Array(0.15158835, dtype=float32), 'loss_cross_entropy': Array(0.14312394, dtype=float32)}


  9%|▉         | 90139/1000000 [4:07:45<28:01:00,  9.02it/s]

{'loss': Array(0.14946574, dtype=float32), 'loss_cross_entropy': Array(0.14055184, dtype=float32)}


  9%|▉         | 90149/1000000 [4:07:47<24:17:34, 10.40it/s]

{'loss': Array(0.1626096, dtype=float32), 'loss_cross_entropy': Array(0.15353315, dtype=float32)}


  9%|▉         | 90159/1000000 [4:07:48<32:56:42,  7.67it/s]

{'loss': Array(0.13170055, dtype=float32), 'loss_cross_entropy': Array(0.12339928, dtype=float32)}


  9%|▉         | 90169/1000000 [4:07:50<24:29:42, 10.32it/s]

{'loss': Array(0.15752968, dtype=float32), 'loss_cross_entropy': Array(0.14837019, dtype=float32)}


  9%|▉         | 90179/1000000 [4:07:51<23:15:59, 10.86it/s]

{'loss': Array(0.14492884, dtype=float32), 'loss_cross_entropy': Array(0.13625078, dtype=float32)}


  9%|▉         | 90189/1000000 [4:07:53<30:27:20,  8.30it/s]

{'loss': Array(0.14580417, dtype=float32), 'loss_cross_entropy': Array(0.13712119, dtype=float32)}


  9%|▉         | 90199/1000000 [4:07:54<24:12:53, 10.44it/s]

{'loss': Array(0.13070619, dtype=float32), 'loss_cross_entropy': Array(0.12272318, dtype=float32)}


  9%|▉         | 90209/1000000 [4:07:55<23:20:04, 10.83it/s]

{'loss': Array(0.14616552, dtype=float32), 'loss_cross_entropy': Array(0.1383446, dtype=float32)}


  9%|▉         | 90219/1000000 [4:07:57<26:39:58,  9.48it/s]

{'loss': Array(0.12054918, dtype=float32), 'loss_cross_entropy': Array(0.11344486, dtype=float32)}


  9%|▉         | 90229/1000000 [4:07:59<24:45:40, 10.21it/s]

{'loss': Array(0.1474094, dtype=float32), 'loss_cross_entropy': Array(0.13833766, dtype=float32)}


  9%|▉         | 90239/1000000 [4:08:00<31:25:09,  8.04it/s]

{'loss': Array(0.15819934, dtype=float32), 'loss_cross_entropy': Array(0.14930081, dtype=float32)}


  9%|▉         | 90249/1000000 [4:08:02<24:34:18, 10.28it/s]

{'loss': Array(0.14487684, dtype=float32), 'loss_cross_entropy': Array(0.13610466, dtype=float32)}


  9%|▉         | 90259/1000000 [4:08:03<37:22:34,  6.76it/s]

{'loss': Array(0.15931843, dtype=float32), 'loss_cross_entropy': Array(0.15006715, dtype=float32)}


  9%|▉         | 90269/1000000 [4:08:05<26:41:52,  9.47it/s]

{'loss': Array(0.13976014, dtype=float32), 'loss_cross_entropy': Array(0.13152462, dtype=float32)}


  9%|▉         | 90279/1000000 [4:08:07<37:32:21,  6.73it/s]

{'loss': Array(0.14907704, dtype=float32), 'loss_cross_entropy': Array(0.13993809, dtype=float32)}


  9%|▉         | 90289/1000000 [4:08:08<26:36:14,  9.50it/s]

{'loss': Array(0.14895728, dtype=float32), 'loss_cross_entropy': Array(0.1408069, dtype=float32)}


  9%|▉         | 90299/1000000 [4:08:09<24:33:54, 10.29it/s]

{'loss': Array(0.14893328, dtype=float32), 'loss_cross_entropy': Array(0.14008206, dtype=float32)}


  9%|▉         | 90309/1000000 [4:08:11<34:39:17,  7.29it/s]

{'loss': Array(0.14974074, dtype=float32), 'loss_cross_entropy': Array(0.14026886, dtype=float32)}


  9%|▉         | 90319/1000000 [4:08:13<26:32:37,  9.52it/s]

{'loss': Array(0.1298687, dtype=float32), 'loss_cross_entropy': Array(0.12227523, dtype=float32)}


  9%|▉         | 90329/1000000 [4:08:14<38:55:49,  6.49it/s]

{'loss': Array(0.1332346, dtype=float32), 'loss_cross_entropy': Array(0.12525482, dtype=float32)}


  9%|▉         | 90339/1000000 [4:08:16<27:44:47,  9.11it/s]

{'loss': Array(0.15549856, dtype=float32), 'loss_cross_entropy': Array(0.14661896, dtype=float32)}


  9%|▉         | 90349/1000000 [4:08:17<25:04:11, 10.08it/s]

{'loss': Array(0.14009264, dtype=float32), 'loss_cross_entropy': Array(0.13156468, dtype=float32)}


  9%|▉         | 90359/1000000 [4:08:19<26:59:10,  9.36it/s]

{'loss': Array(0.15316129, dtype=float32), 'loss_cross_entropy': Array(0.14448568, dtype=float32)}


  9%|▉         | 90369/1000000 [4:08:20<23:57:34, 10.55it/s]

{'loss': Array(0.1648792, dtype=float32), 'loss_cross_entropy': Array(0.15542895, dtype=float32)}


  9%|▉         | 90379/1000000 [4:08:22<26:30:49,  9.53it/s]

{'loss': Array(0.12904808, dtype=float32), 'loss_cross_entropy': Array(0.12088555, dtype=float32)}


  9%|▉         | 90389/1000000 [4:08:23<24:02:54, 10.51it/s]

{'loss': Array(0.16154175, dtype=float32), 'loss_cross_entropy': Array(0.15245037, dtype=float32)}


  9%|▉         | 90399/1000000 [4:08:25<36:57:40,  6.84it/s]

{'loss': Array(0.1477079, dtype=float32), 'loss_cross_entropy': Array(0.13906547, dtype=float32)}


  9%|▉         | 90409/1000000 [4:08:26<25:47:19,  9.80it/s]

{'loss': Array(0.15036526, dtype=float32), 'loss_cross_entropy': Array(0.14159617, dtype=float32)}


  9%|▉         | 90419/1000000 [4:08:28<23:23:43, 10.80it/s]

{'loss': Array(0.16819863, dtype=float32), 'loss_cross_entropy': Array(0.15888266, dtype=float32)}


  9%|▉         | 90429/1000000 [4:08:29<30:12:29,  8.36it/s]

{'loss': Array(0.1316751, dtype=float32), 'loss_cross_entropy': Array(0.12360598, dtype=float32)}


  9%|▉         | 90439/1000000 [4:08:31<24:49:18, 10.18it/s]

{'loss': Array(0.14147975, dtype=float32), 'loss_cross_entropy': Array(0.13338178, dtype=float32)}


  9%|▉         | 90449/1000000 [4:08:33<36:39:48,  6.89it/s]

{'loss': Array(0.14419045, dtype=float32), 'loss_cross_entropy': Array(0.13619213, dtype=float32)}


  9%|▉         | 90459/1000000 [4:08:34<25:28:34,  9.92it/s]

{'loss': Array(0.12863807, dtype=float32), 'loss_cross_entropy': Array(0.12087768, dtype=float32)}


  9%|▉         | 90469/1000000 [4:08:35<23:42:32, 10.66it/s]

{'loss': Array(0.12957557, dtype=float32), 'loss_cross_entropy': Array(0.12187707, dtype=float32)}


  9%|▉         | 90479/1000000 [4:08:37<29:40:55,  8.51it/s]

{'loss': Array(0.15468712, dtype=float32), 'loss_cross_entropy': Array(0.1465562, dtype=float32)}


  9%|▉         | 90489/1000000 [4:08:38<25:22:10,  9.96it/s]

{'loss': Array(0.15055571, dtype=float32), 'loss_cross_entropy': Array(0.14207211, dtype=float32)}


  9%|▉         | 90499/1000000 [4:08:40<33:07:21,  7.63it/s]

{'loss': Array(0.14293706, dtype=float32), 'loss_cross_entropy': Array(0.13434653, dtype=float32)}


  9%|▉         | 90509/1000000 [4:08:48<82:54:35,  3.05it/s] 

{'loss': Array(0.1506857, dtype=float32), 'loss_cross_entropy': Array(0.14204824, dtype=float32)}


  9%|▉         | 90519/1000000 [4:08:50<42:48:23,  5.90it/s]

{'loss': Array(0.13120484, dtype=float32), 'loss_cross_entropy': Array(0.124267, dtype=float32)}


  9%|▉         | 90529/1000000 [4:08:51<26:58:10,  9.37it/s]

{'loss': Array(0.15250866, dtype=float32), 'loss_cross_entropy': Array(0.14289957, dtype=float32)}


  9%|▉         | 90539/1000000 [4:08:52<23:48:44, 10.61it/s]

{'loss': Array(0.1572375, dtype=float32), 'loss_cross_entropy': Array(0.14870813, dtype=float32)}


  9%|▉         | 90549/1000000 [4:08:54<30:47:43,  8.20it/s]

{'loss': Array(0.15417354, dtype=float32), 'loss_cross_entropy': Array(0.14455508, dtype=float32)}


  9%|▉         | 90559/1000000 [4:08:55<25:04:45, 10.07it/s]

{'loss': Array(0.1516588, dtype=float32), 'loss_cross_entropy': Array(0.14290063, dtype=float32)}


  9%|▉         | 90569/1000000 [4:08:57<24:02:16, 10.51it/s]

{'loss': Array(0.15980159, dtype=float32), 'loss_cross_entropy': Array(0.15065388, dtype=float32)}


  9%|▉         | 90579/1000000 [4:08:59<26:53:56,  9.39it/s]

{'loss': Array(0.155552, dtype=float32), 'loss_cross_entropy': Array(0.14712352, dtype=float32)}


  9%|▉         | 90589/1000000 [4:09:00<24:35:41, 10.27it/s]

{'loss': Array(0.15753976, dtype=float32), 'loss_cross_entropy': Array(0.14847116, dtype=float32)}


  9%|▉         | 90599/1000000 [4:09:02<30:57:57,  8.16it/s]

{'loss': Array(0.15820463, dtype=float32), 'loss_cross_entropy': Array(0.15035903, dtype=float32)}


  9%|▉         | 90609/1000000 [4:09:03<25:25:31,  9.94it/s]

{'loss': Array(0.13645636, dtype=float32), 'loss_cross_entropy': Array(0.12892525, dtype=float32)}


  9%|▉         | 90619/1000000 [4:09:05<33:28:32,  7.55it/s]

{'loss': Array(0.15200226, dtype=float32), 'loss_cross_entropy': Array(0.14312509, dtype=float32)}


  9%|▉         | 90629/1000000 [4:09:06<25:22:17,  9.96it/s]

{'loss': Array(0.13748704, dtype=float32), 'loss_cross_entropy': Array(0.12967426, dtype=float32)}


  9%|▉         | 90639/1000000 [4:09:08<24:07:10, 10.47it/s]

{'loss': Array(0.15005583, dtype=float32), 'loss_cross_entropy': Array(0.14122826, dtype=float32)}


  9%|▉         | 90649/1000000 [4:09:09<31:29:29,  8.02it/s]

{'loss': Array(0.16019994, dtype=float32), 'loss_cross_entropy': Array(0.15170507, dtype=float32)}


  9%|▉         | 90659/1000000 [4:09:11<25:31:53,  9.89it/s]

{'loss': Array(0.14259286, dtype=float32), 'loss_cross_entropy': Array(0.1346412, dtype=float32)}


  9%|▉         | 90669/1000000 [4:09:12<24:42:49, 10.22it/s]

{'loss': Array(0.1355037, dtype=float32), 'loss_cross_entropy': Array(0.12757851, dtype=float32)}


  9%|▉         | 90679/1000000 [4:09:14<28:01:25,  9.01it/s]

{'loss': Array(0.14263755, dtype=float32), 'loss_cross_entropy': Array(0.13438001, dtype=float32)}


  9%|▉         | 90689/1000000 [4:09:16<25:33:16,  9.88it/s]

{'loss': Array(0.16435155, dtype=float32), 'loss_cross_entropy': Array(0.1549316, dtype=float32)}


  9%|▉         | 90699/1000000 [4:09:17<31:16:25,  8.08it/s]

{'loss': Array(0.16776586, dtype=float32), 'loss_cross_entropy': Array(0.15867536, dtype=float32)}


  9%|▉         | 90709/1000000 [4:09:19<25:52:52,  9.76it/s]

{'loss': Array(0.14172818, dtype=float32), 'loss_cross_entropy': Array(0.13365443, dtype=float32)}


  9%|▉         | 90719/1000000 [4:09:20<38:03:29,  6.64it/s]

{'loss': Array(0.15136349, dtype=float32), 'loss_cross_entropy': Array(0.14262745, dtype=float32)}


  9%|▉         | 90729/1000000 [4:09:22<26:08:12,  9.66it/s]

{'loss': Array(0.14224799, dtype=float32), 'loss_cross_entropy': Array(0.13431601, dtype=float32)}


  9%|▉         | 90739/1000000 [4:09:23<23:48:32, 10.61it/s]

{'loss': Array(0.14329372, dtype=float32), 'loss_cross_entropy': Array(0.13515161, dtype=float32)}


  9%|▉         | 90749/1000000 [4:09:25<32:51:41,  7.69it/s]

{'loss': Array(0.15659812, dtype=float32), 'loss_cross_entropy': Array(0.14839634, dtype=float32)}


  9%|▉         | 90759/1000000 [4:09:26<25:23:22,  9.95it/s]

{'loss': Array(0.17323174, dtype=float32), 'loss_cross_entropy': Array(0.1633984, dtype=float32)}


  9%|▉         | 90769/1000000 [4:09:28<23:42:12, 10.66it/s]

{'loss': Array(0.15675561, dtype=float32), 'loss_cross_entropy': Array(0.14854656, dtype=float32)}


  9%|▉         | 90779/1000000 [4:09:29<28:20:50,  8.91it/s]

{'loss': Array(0.15495051, dtype=float32), 'loss_cross_entropy': Array(0.14716308, dtype=float32)}


  9%|▉         | 90789/1000000 [4:09:31<24:14:53, 10.42it/s]

{'loss': Array(0.12784244, dtype=float32), 'loss_cross_entropy': Array(0.12012716, dtype=float32)}


  9%|▉         | 90799/1000000 [4:09:32<30:22:50,  8.31it/s]

{'loss': Array(0.158405, dtype=float32), 'loss_cross_entropy': Array(0.14916332, dtype=float32)}


  9%|▉         | 90809/1000000 [4:09:34<24:34:26, 10.28it/s]

{'loss': Array(0.14890061, dtype=float32), 'loss_cross_entropy': Array(0.14041992, dtype=float32)}


  9%|▉         | 90819/1000000 [4:09:35<23:27:26, 10.77it/s]

{'loss': Array(0.16333018, dtype=float32), 'loss_cross_entropy': Array(0.1540034, dtype=float32)}


  9%|▉         | 90829/1000000 [4:09:37<26:05:39,  9.68it/s]

{'loss': Array(0.14316879, dtype=float32), 'loss_cross_entropy': Array(0.1349874, dtype=float32)}


  9%|▉         | 90839/1000000 [4:09:38<23:30:42, 10.74it/s]

{'loss': Array(0.16344106, dtype=float32), 'loss_cross_entropy': Array(0.15402128, dtype=float32)}


  9%|▉         | 90849/1000000 [4:09:40<32:50:33,  7.69it/s]

{'loss': Array(0.11783709, dtype=float32), 'loss_cross_entropy': Array(0.11047071, dtype=float32)}


  9%|▉         | 90859/1000000 [4:09:41<25:07:55, 10.05it/s]

{'loss': Array(0.14990495, dtype=float32), 'loss_cross_entropy': Array(0.14128421, dtype=float32)}


  9%|▉         | 90869/1000000 [4:09:43<23:46:57, 10.62it/s]

{'loss': Array(0.14918807, dtype=float32), 'loss_cross_entropy': Array(0.14056356, dtype=float32)}


  9%|▉         | 90879/1000000 [4:09:44<26:23:13,  9.57it/s]

{'loss': Array(0.14029889, dtype=float32), 'loss_cross_entropy': Array(0.13161215, dtype=float32)}


  9%|▉         | 90889/1000000 [4:09:46<23:40:53, 10.66it/s]

{'loss': Array(0.1418529, dtype=float32), 'loss_cross_entropy': Array(0.13341744, dtype=float32)}


  9%|▉         | 90899/1000000 [4:09:47<27:32:46,  9.17it/s]

{'loss': Array(0.15263842, dtype=float32), 'loss_cross_entropy': Array(0.14411986, dtype=float32)}


  9%|▉         | 90909/1000000 [4:09:49<23:46:28, 10.62it/s]

{'loss': Array(0.12841506, dtype=float32), 'loss_cross_entropy': Array(0.12062763, dtype=float32)}


  9%|▉         | 90919/1000000 [4:09:50<36:33:23,  6.91it/s]

{'loss': Array(0.15784541, dtype=float32), 'loss_cross_entropy': Array(0.14913985, dtype=float32)}


  9%|▉         | 90929/1000000 [4:09:52<26:10:28,  9.65it/s]

{'loss': Array(0.14713497, dtype=float32), 'loss_cross_entropy': Array(0.13894928, dtype=float32)}


  9%|▉         | 90939/1000000 [4:09:53<23:48:55, 10.60it/s]

{'loss': Array(0.14161162, dtype=float32), 'loss_cross_entropy': Array(0.13345172, dtype=float32)}


  9%|▉         | 90949/1000000 [4:09:55<30:12:34,  8.36it/s]

{'loss': Array(0.14630851, dtype=float32), 'loss_cross_entropy': Array(0.13837026, dtype=float32)}


  9%|▉         | 90959/1000000 [4:09:56<24:40:38, 10.23it/s]

{'loss': Array(0.14867006, dtype=float32), 'loss_cross_entropy': Array(0.14029641, dtype=float32)}


  9%|▉         | 90969/1000000 [4:09:58<33:12:49,  7.60it/s]

{'loss': Array(0.16126117, dtype=float32), 'loss_cross_entropy': Array(0.15272732, dtype=float32)}


  9%|▉         | 90979/1000000 [4:09:59<26:47:44,  9.42it/s]

{'loss': Array(0.15095726, dtype=float32), 'loss_cross_entropy': Array(0.14222196, dtype=float32)}


  9%|▉         | 90989/1000000 [4:10:01<33:42:12,  7.49it/s]

{'loss': Array(0.14015429, dtype=float32), 'loss_cross_entropy': Array(0.13213275, dtype=float32)}


  9%|▉         | 90999/1000000 [4:10:02<25:28:51,  9.91it/s]

{'loss': Array(0.16917154, dtype=float32), 'loss_cross_entropy': Array(0.15965645, dtype=float32)}


  9%|▉         | 91009/1000000 [4:10:10<83:23:01,  3.03it/s] 

{'loss': Array(0.1556649, dtype=float32), 'loss_cross_entropy': Array(0.14686958, dtype=float32)}


  9%|▉         | 91019/1000000 [4:10:12<39:06:10,  6.46it/s]

{'loss': Array(0.11585287, dtype=float32), 'loss_cross_entropy': Array(0.10851724, dtype=float32)}


  9%|▉         | 91029/1000000 [4:10:13<26:23:40,  9.57it/s]

{'loss': Array(0.12747149, dtype=float32), 'loss_cross_entropy': Array(0.12023105, dtype=float32)}


  9%|▉         | 91039/1000000 [4:10:15<34:00:55,  7.42it/s]

{'loss': Array(0.13292335, dtype=float32), 'loss_cross_entropy': Array(0.12560542, dtype=float32)}


  9%|▉         | 91049/1000000 [4:10:17<26:27:14,  9.54it/s]

{'loss': Array(0.16420051, dtype=float32), 'loss_cross_entropy': Array(0.15480888, dtype=float32)}


  9%|▉         | 91059/1000000 [4:10:18<38:51:43,  6.50it/s]

{'loss': Array(0.15068935, dtype=float32), 'loss_cross_entropy': Array(0.14205128, dtype=float32)}


  9%|▉         | 91069/1000000 [4:10:20<27:21:03,  9.23it/s]

{'loss': Array(0.1368057, dtype=float32), 'loss_cross_entropy': Array(0.12869047, dtype=float32)}


  9%|▉         | 91079/1000000 [4:10:21<25:05:41, 10.06it/s]

{'loss': Array(0.17701463, dtype=float32), 'loss_cross_entropy': Array(0.16682151, dtype=float32)}


  9%|▉         | 91089/1000000 [4:10:23<30:17:02,  8.34it/s]

{'loss': Array(0.15717065, dtype=float32), 'loss_cross_entropy': Array(0.14783217, dtype=float32)}


  9%|▉         | 91099/1000000 [4:10:24<24:28:32, 10.32it/s]

{'loss': Array(0.15529242, dtype=float32), 'loss_cross_entropy': Array(0.14656663, dtype=float32)}


  9%|▉         | 91109/1000000 [4:10:26<32:47:08,  7.70it/s]

{'loss': Array(0.14722376, dtype=float32), 'loss_cross_entropy': Array(0.13873674, dtype=float32)}


  9%|▉         | 91119/1000000 [4:10:27<25:00:11, 10.10it/s]

{'loss': Array(0.14318708, dtype=float32), 'loss_cross_entropy': Array(0.13502899, dtype=float32)}


  9%|▉         | 91129/1000000 [4:10:29<23:30:59, 10.74it/s]

{'loss': Array(0.14970233, dtype=float32), 'loss_cross_entropy': Array(0.14114574, dtype=float32)}


  9%|▉         | 91139/1000000 [4:10:30<26:33:30,  9.51it/s]

{'loss': Array(0.1352317, dtype=float32), 'loss_cross_entropy': Array(0.12728508, dtype=float32)}


  9%|▉         | 91149/1000000 [4:10:32<24:36:40, 10.26it/s]

{'loss': Array(0.14853977, dtype=float32), 'loss_cross_entropy': Array(0.13944851, dtype=float32)}


  9%|▉         | 91159/1000000 [4:10:34<36:57:27,  6.83it/s]

{'loss': Array(0.15688483, dtype=float32), 'loss_cross_entropy': Array(0.14783661, dtype=float32)}


  9%|▉         | 91169/1000000 [4:10:35<25:33:29,  9.88it/s]

{'loss': Array(0.14986812, dtype=float32), 'loss_cross_entropy': Array(0.14063816, dtype=float32)}


  9%|▉         | 91179/1000000 [4:10:36<24:08:15, 10.46it/s]

{'loss': Array(0.15311083, dtype=float32), 'loss_cross_entropy': Array(0.14431903, dtype=float32)}


  9%|▉         | 91189/1000000 [4:10:38<30:20:41,  8.32it/s]

{'loss': Array(0.14865737, dtype=float32), 'loss_cross_entropy': Array(0.13958953, dtype=float32)}


  9%|▉         | 91199/1000000 [4:10:39<24:34:28, 10.27it/s]

{'loss': Array(0.15061322, dtype=float32), 'loss_cross_entropy': Array(0.14218049, dtype=float32)}


  9%|▉         | 91209/1000000 [4:10:41<33:17:00,  7.58it/s]

{'loss': Array(0.13690285, dtype=float32), 'loss_cross_entropy': Array(0.12977862, dtype=float32)}


  9%|▉         | 91219/1000000 [4:10:42<25:01:08, 10.09it/s]

{'loss': Array(0.13090815, dtype=float32), 'loss_cross_entropy': Array(0.12316012, dtype=float32)}


  9%|▉         | 91229/1000000 [4:10:44<33:03:44,  7.64it/s]

{'loss': Array(0.14089262, dtype=float32), 'loss_cross_entropy': Array(0.13207607, dtype=float32)}


  9%|▉         | 91239/1000000 [4:10:46<24:40:47, 10.23it/s]

{'loss': Array(0.16547309, dtype=float32), 'loss_cross_entropy': Array(0.15622358, dtype=float32)}


  9%|▉         | 91249/1000000 [4:10:47<23:31:20, 10.73it/s]

{'loss': Array(0.14325778, dtype=float32), 'loss_cross_entropy': Array(0.13456783, dtype=float32)}


  9%|▉         | 91259/1000000 [4:10:49<27:53:42,  9.05it/s]

{'loss': Array(0.16419894, dtype=float32), 'loss_cross_entropy': Array(0.15468797, dtype=float32)}


  9%|▉         | 91269/1000000 [4:10:50<23:54:42, 10.56it/s]

{'loss': Array(0.15140437, dtype=float32), 'loss_cross_entropy': Array(0.14231014, dtype=float32)}


  9%|▉         | 91279/1000000 [4:10:52<29:49:43,  8.46it/s]

{'loss': Array(0.15137696, dtype=float32), 'loss_cross_entropy': Array(0.14247048, dtype=float32)}


  9%|▉         | 91289/1000000 [4:10:53<24:08:36, 10.45it/s]

{'loss': Array(0.13970335, dtype=float32), 'loss_cross_entropy': Array(0.13136643, dtype=float32)}


  9%|▉         | 91299/1000000 [4:10:55<37:28:39,  6.74it/s]

{'loss': Array(0.16150586, dtype=float32), 'loss_cross_entropy': Array(0.15264301, dtype=float32)}


  9%|▉         | 91309/1000000 [4:10:56<26:06:53,  9.67it/s]

{'loss': Array(0.13771665, dtype=float32), 'loss_cross_entropy': Array(0.1297829, dtype=float32)}


  9%|▉         | 91319/1000000 [4:10:58<38:06:52,  6.62it/s]

{'loss': Array(0.15371633, dtype=float32), 'loss_cross_entropy': Array(0.14549945, dtype=float32)}


  9%|▉         | 91329/1000000 [4:10:59<27:11:43,  9.28it/s]

{'loss': Array(0.16072078, dtype=float32), 'loss_cross_entropy': Array(0.15193777, dtype=float32)}


  9%|▉         | 91339/1000000 [4:11:01<24:42:07, 10.22it/s]

{'loss': Array(0.17587931, dtype=float32), 'loss_cross_entropy': Array(0.1663227, dtype=float32)}


  9%|▉         | 91349/1000000 [4:11:02<34:34:38,  7.30it/s]

{'loss': Array(0.15489401, dtype=float32), 'loss_cross_entropy': Array(0.14613937, dtype=float32)}


  9%|▉         | 91359/1000000 [4:11:04<25:59:47,  9.71it/s]

{'loss': Array(0.15549634, dtype=float32), 'loss_cross_entropy': Array(0.14728425, dtype=float32)}


  9%|▉         | 91369/1000000 [4:11:06<39:08:09,  6.45it/s]

{'loss': Array(0.15149316, dtype=float32), 'loss_cross_entropy': Array(0.14299336, dtype=float32)}


  9%|▉         | 91379/1000000 [4:11:07<27:56:04,  9.04it/s]

{'loss': Array(0.15647963, dtype=float32), 'loss_cross_entropy': Array(0.14793104, dtype=float32)}


  9%|▉         | 91389/1000000 [4:11:09<26:04:06,  9.68it/s]

{'loss': Array(0.15317701, dtype=float32), 'loss_cross_entropy': Array(0.14414737, dtype=float32)}


  9%|▉         | 91399/1000000 [4:11:10<27:38:34,  9.13it/s]

{'loss': Array(0.14499037, dtype=float32), 'loss_cross_entropy': Array(0.13742629, dtype=float32)}


  9%|▉         | 91409/1000000 [4:11:12<24:00:37, 10.51it/s]

{'loss': Array(0.17243166, dtype=float32), 'loss_cross_entropy': Array(0.16313265, dtype=float32)}


  9%|▉         | 91419/1000000 [4:11:13<27:13:45,  9.27it/s]

{'loss': Array(0.15194272, dtype=float32), 'loss_cross_entropy': Array(0.14279093, dtype=float32)}


  9%|▉         | 91429/1000000 [4:11:15<24:16:32, 10.40it/s]

{'loss': Array(0.14084284, dtype=float32), 'loss_cross_entropy': Array(0.1321828, dtype=float32)}


  9%|▉         | 91439/1000000 [4:11:16<37:07:13,  6.80it/s]

{'loss': Array(0.1404421, dtype=float32), 'loss_cross_entropy': Array(0.13245265, dtype=float32)}


  9%|▉         | 91449/1000000 [4:11:18<26:56:43,  9.37it/s]

{'loss': Array(0.13675343, dtype=float32), 'loss_cross_entropy': Array(0.12866509, dtype=float32)}


  9%|▉         | 91459/1000000 [4:11:19<25:18:14,  9.97it/s]

{'loss': Array(0.15400296, dtype=float32), 'loss_cross_entropy': Array(0.14529058, dtype=float32)}


  9%|▉         | 91469/1000000 [4:11:21<30:31:51,  8.27it/s]

{'loss': Array(0.15870722, dtype=float32), 'loss_cross_entropy': Array(0.1505455, dtype=float32)}


  9%|▉         | 91479/1000000 [4:11:22<25:04:23, 10.07it/s]

{'loss': Array(0.15453973, dtype=float32), 'loss_cross_entropy': Array(0.14581731, dtype=float32)}


  9%|▉         | 91489/1000000 [4:11:24<36:55:59,  6.83it/s]

{'loss': Array(0.16305979, dtype=float32), 'loss_cross_entropy': Array(0.15398328, dtype=float32)}


  9%|▉         | 91499/1000000 [4:11:25<25:25:35,  9.93it/s]

{'loss': Array(0.13952842, dtype=float32), 'loss_cross_entropy': Array(0.13166897, dtype=float32)}


  9%|▉         | 91509/1000000 [4:11:33<82:12:35,  3.07it/s] 

{'loss': Array(0.16633205, dtype=float32), 'loss_cross_entropy': Array(0.15768974, dtype=float32)}


  9%|▉         | 91519/1000000 [4:11:35<38:06:07,  6.62it/s]

{'loss': Array(0.14170924, dtype=float32), 'loss_cross_entropy': Array(0.13349792, dtype=float32)}


  9%|▉         | 91529/1000000 [4:11:36<26:19:36,  9.59it/s]

{'loss': Array(0.1445085, dtype=float32), 'loss_cross_entropy': Array(0.1363287, dtype=float32)}


  9%|▉         | 91539/1000000 [4:11:38<32:07:01,  7.86it/s]

{'loss': Array(0.1412697, dtype=float32), 'loss_cross_entropy': Array(0.13362153, dtype=float32)}


  9%|▉         | 91549/1000000 [4:11:40<24:57:04, 10.11it/s]

{'loss': Array(0.13678749, dtype=float32), 'loss_cross_entropy': Array(0.12908828, dtype=float32)}


  9%|▉         | 91559/1000000 [4:11:41<37:24:10,  6.75it/s]

{'loss': Array(0.14988194, dtype=float32), 'loss_cross_entropy': Array(0.14117198, dtype=float32)}


  9%|▉         | 91569/1000000 [4:11:43<26:01:07,  9.70it/s]

{'loss': Array(0.12877955, dtype=float32), 'loss_cross_entropy': Array(0.12121476, dtype=float32)}


  9%|▉         | 91579/1000000 [4:11:44<23:32:31, 10.72it/s]

{'loss': Array(0.12735105, dtype=float32), 'loss_cross_entropy': Array(0.11946813, dtype=float32)}


  9%|▉         | 91589/1000000 [4:11:46<26:28:21,  9.53it/s]

{'loss': Array(0.1424188, dtype=float32), 'loss_cross_entropy': Array(0.13385539, dtype=float32)}


  9%|▉         | 91599/1000000 [4:11:47<23:36:49, 10.69it/s]

{'loss': Array(0.15812442, dtype=float32), 'loss_cross_entropy': Array(0.14924978, dtype=float32)}


  9%|▉         | 91609/1000000 [4:11:49<36:53:18,  6.84it/s]

{'loss': Array(0.1655227, dtype=float32), 'loss_cross_entropy': Array(0.1553818, dtype=float32)}


  9%|▉         | 91619/1000000 [4:11:50<25:22:51,  9.94it/s]

{'loss': Array(0.15513815, dtype=float32), 'loss_cross_entropy': Array(0.14599662, dtype=float32)}


  9%|▉         | 91629/1000000 [4:11:51<23:45:11, 10.62it/s]

{'loss': Array(0.14406121, dtype=float32), 'loss_cross_entropy': Array(0.13636851, dtype=float32)}


  9%|▉         | 91639/1000000 [4:11:53<28:32:58,  8.84it/s]

{'loss': Array(0.14690627, dtype=float32), 'loss_cross_entropy': Array(0.13880831, dtype=float32)}


  9%|▉         | 91649/1000000 [4:11:55<24:46:58, 10.18it/s]

{'loss': Array(0.14335556, dtype=float32), 'loss_cross_entropy': Array(0.13500474, dtype=float32)}


  9%|▉         | 91659/1000000 [4:11:56<34:07:00,  7.40it/s]

{'loss': Array(0.1426555, dtype=float32), 'loss_cross_entropy': Array(0.13469386, dtype=float32)}


  9%|▉         | 91669/1000000 [4:11:58<25:21:13,  9.95it/s]

{'loss': Array(0.14664738, dtype=float32), 'loss_cross_entropy': Array(0.13878138, dtype=float32)}


  9%|▉         | 91679/1000000 [4:11:59<38:32:25,  6.55it/s]

{'loss': Array(0.14332807, dtype=float32), 'loss_cross_entropy': Array(0.13495934, dtype=float32)}


  9%|▉         | 91689/1000000 [4:12:01<26:28:06,  9.53it/s]

{'loss': Array(0.14492345, dtype=float32), 'loss_cross_entropy': Array(0.13627051, dtype=float32)}


  9%|▉         | 91699/1000000 [4:12:02<25:07:13, 10.04it/s]

{'loss': Array(0.1503478, dtype=float32), 'loss_cross_entropy': Array(0.14133732, dtype=float32)}


  9%|▉         | 91709/1000000 [4:12:04<30:54:43,  8.16it/s]

{'loss': Array(0.15719163, dtype=float32), 'loss_cross_entropy': Array(0.14757706, dtype=float32)}


  9%|▉         | 91719/1000000 [4:12:05<25:30:47,  9.89it/s]

{'loss': Array(0.1475604, dtype=float32), 'loss_cross_entropy': Array(0.13924901, dtype=float32)}


  9%|▉         | 91729/1000000 [4:12:07<34:38:17,  7.28it/s]

{'loss': Array(0.15552284, dtype=float32), 'loss_cross_entropy': Array(0.1468068, dtype=float32)}


  9%|▉         | 91739/1000000 [4:12:09<26:12:36,  9.63it/s]

{'loss': Array(0.14702746, dtype=float32), 'loss_cross_entropy': Array(0.13812387, dtype=float32)}


  9%|▉         | 91749/1000000 [4:12:10<25:11:18, 10.02it/s]

{'loss': Array(0.14739162, dtype=float32), 'loss_cross_entropy': Array(0.13900232, dtype=float32)}


  9%|▉         | 91759/1000000 [4:12:12<27:22:45,  9.21it/s]

{'loss': Array(0.15021023, dtype=float32), 'loss_cross_entropy': Array(0.14209107, dtype=float32)}


  9%|▉         | 91769/1000000 [4:12:13<24:14:36, 10.41it/s]

{'loss': Array(0.16183688, dtype=float32), 'loss_cross_entropy': Array(0.15308271, dtype=float32)}


  9%|▉         | 91779/1000000 [4:12:15<37:43:45,  6.69it/s]

{'loss': Array(0.1458856, dtype=float32), 'loss_cross_entropy': Array(0.13765533, dtype=float32)}


  9%|▉         | 91789/1000000 [4:12:16<25:54:59,  9.73it/s]

{'loss': Array(0.15674391, dtype=float32), 'loss_cross_entropy': Array(0.14829595, dtype=float32)}


  9%|▉         | 91799/1000000 [4:12:18<23:45:11, 10.62it/s]

{'loss': Array(0.16318637, dtype=float32), 'loss_cross_entropy': Array(0.15380959, dtype=float32)}


  9%|▉         | 91809/1000000 [4:12:19<29:59:21,  8.41it/s]

{'loss': Array(0.13396919, dtype=float32), 'loss_cross_entropy': Array(0.12582025, dtype=float32)}


  9%|▉         | 91819/1000000 [4:12:21<23:59:50, 10.51it/s]

{'loss': Array(0.15818416, dtype=float32), 'loss_cross_entropy': Array(0.1490665, dtype=float32)}


  9%|▉         | 91829/1000000 [4:12:22<32:56:54,  7.66it/s]

{'loss': Array(0.1526229, dtype=float32), 'loss_cross_entropy': Array(0.14349036, dtype=float32)}


  9%|▉         | 91839/1000000 [4:12:24<24:40:08, 10.23it/s]

{'loss': Array(0.16391025, dtype=float32), 'loss_cross_entropy': Array(0.15499857, dtype=float32)}


  9%|▉         | 91849/1000000 [4:12:25<33:04:32,  7.63it/s]

{'loss': Array(0.15395012, dtype=float32), 'loss_cross_entropy': Array(0.14473626, dtype=float32)}


  9%|▉         | 91859/1000000 [4:12:27<25:08:36, 10.03it/s]

{'loss': Array(0.13918872, dtype=float32), 'loss_cross_entropy': Array(0.1310151, dtype=float32)}


  9%|▉         | 91869/1000000 [4:12:28<23:30:01, 10.73it/s]

{'loss': Array(0.14100488, dtype=float32), 'loss_cross_entropy': Array(0.13266185, dtype=float32)}


  9%|▉         | 91879/1000000 [4:12:30<27:34:03,  9.15it/s]

{'loss': Array(0.1596744, dtype=float32), 'loss_cross_entropy': Array(0.15093146, dtype=float32)}


  9%|▉         | 91889/1000000 [4:12:31<23:36:39, 10.68it/s]

{'loss': Array(0.15780182, dtype=float32), 'loss_cross_entropy': Array(0.1493343, dtype=float32)}


  9%|▉         | 91899/1000000 [4:12:33<30:47:30,  8.19it/s]

{'loss': Array(0.13722372, dtype=float32), 'loss_cross_entropy': Array(0.1296622, dtype=float32)}


  9%|▉         | 91909/1000000 [4:12:34<24:27:08, 10.32it/s]

{'loss': Array(0.13135816, dtype=float32), 'loss_cross_entropy': Array(0.12373324, dtype=float32)}


  9%|▉         | 91919/1000000 [4:12:36<37:41:13,  6.69it/s]

{'loss': Array(0.15196662, dtype=float32), 'loss_cross_entropy': Array(0.14380275, dtype=float32)}


  9%|▉         | 91929/1000000 [4:12:37<25:40:23,  9.83it/s]

{'loss': Array(0.1468098, dtype=float32), 'loss_cross_entropy': Array(0.1382045, dtype=float32)}


  9%|▉         | 91939/1000000 [4:12:39<37:13:32,  6.78it/s]

{'loss': Array(0.16947137, dtype=float32), 'loss_cross_entropy': Array(0.16009434, dtype=float32)}


  9%|▉         | 91949/1000000 [4:12:40<25:58:37,  9.71it/s]

{'loss': Array(0.13956556, dtype=float32), 'loss_cross_entropy': Array(0.131182, dtype=float32)}


  9%|▉         | 91959/1000000 [4:12:42<23:35:56, 10.69it/s]

{'loss': Array(0.14605065, dtype=float32), 'loss_cross_entropy': Array(0.1380384, dtype=float32)}


  9%|▉         | 91969/1000000 [4:12:43<32:21:35,  7.79it/s]

{'loss': Array(0.14452761, dtype=float32), 'loss_cross_entropy': Array(0.13641323, dtype=float32)}


  9%|▉         | 91979/1000000 [4:12:45<25:14:42,  9.99it/s]

{'loss': Array(0.17046498, dtype=float32), 'loss_cross_entropy': Array(0.1612541, dtype=float32)}


  9%|▉         | 91989/1000000 [4:12:46<36:53:11,  6.84it/s]

{'loss': Array(0.16077645, dtype=float32), 'loss_cross_entropy': Array(0.15223174, dtype=float32)}


  9%|▉         | 91999/1000000 [4:12:48<25:29:54,  9.89it/s]

{'loss': Array(0.16087478, dtype=float32), 'loss_cross_entropy': Array(0.15198854, dtype=float32)}


  9%|▉         | 92009/1000000 [4:12:56<84:18:54,  2.99it/s] 

{'loss': Array(0.15679805, dtype=float32), 'loss_cross_entropy': Array(0.14831243, dtype=float32)}


  9%|▉         | 92019/1000000 [4:12:58<37:28:36,  6.73it/s] 

{'loss': Array(0.13959329, dtype=float32), 'loss_cross_entropy': Array(0.1319379, dtype=float32)}


  9%|▉         | 92029/1000000 [4:12:59<26:08:52,  9.65it/s]

{'loss': Array(0.1622529, dtype=float32), 'loss_cross_entropy': Array(0.15296708, dtype=float32)}


  9%|▉         | 92039/1000000 [4:13:01<27:32:26,  9.16it/s]

{'loss': Array(0.13568437, dtype=float32), 'loss_cross_entropy': Array(0.12758899, dtype=float32)}


  9%|▉         | 92049/1000000 [4:13:02<24:31:35, 10.28it/s]

{'loss': Array(0.15367633, dtype=float32), 'loss_cross_entropy': Array(0.14441264, dtype=float32)}


  9%|▉         | 92059/1000000 [4:13:04<38:40:53,  6.52it/s]

{'loss': Array(0.14147204, dtype=float32), 'loss_cross_entropy': Array(0.13303863, dtype=float32)}


  9%|▉         | 92069/1000000 [4:13:05<26:58:37,  9.35it/s]

{'loss': Array(0.14980543, dtype=float32), 'loss_cross_entropy': Array(0.14080803, dtype=float32)}


  9%|▉         | 92079/1000000 [4:13:07<25:06:10, 10.05it/s]

{'loss': Array(0.13209113, dtype=float32), 'loss_cross_entropy': Array(0.12400664, dtype=float32)}


  9%|▉         | 92089/1000000 [4:13:09<31:48:11,  7.93it/s]

{'loss': Array(0.15551004, dtype=float32), 'loss_cross_entropy': Array(0.1463599, dtype=float32)}


  9%|▉         | 92099/1000000 [4:13:10<26:11:45,  9.63it/s]

{'loss': Array(0.13606136, dtype=float32), 'loss_cross_entropy': Array(0.12777992, dtype=float32)}


  9%|▉         | 92109/1000000 [4:13:12<37:43:53,  6.68it/s]

{'loss': Array(0.1447708, dtype=float32), 'loss_cross_entropy': Array(0.13629602, dtype=float32)}


  9%|▉         | 92119/1000000 [4:13:13<26:06:11,  9.66it/s]

{'loss': Array(0.1520535, dtype=float32), 'loss_cross_entropy': Array(0.14321834, dtype=float32)}


  9%|▉         | 92129/1000000 [4:13:15<23:58:44, 10.52it/s]

{'loss': Array(0.13991646, dtype=float32), 'loss_cross_entropy': Array(0.13198148, dtype=float32)}


  9%|▉         | 92139/1000000 [4:13:16<28:25:46,  8.87it/s]

{'loss': Array(0.14259392, dtype=float32), 'loss_cross_entropy': Array(0.13415828, dtype=float32)}


  9%|▉         | 92149/1000000 [4:13:18<24:37:57, 10.24it/s]

{'loss': Array(0.14193307, dtype=float32), 'loss_cross_entropy': Array(0.13290022, dtype=float32)}


  9%|▉         | 92159/1000000 [4:13:19<33:43:37,  7.48it/s]

{'loss': Array(0.14685157, dtype=float32), 'loss_cross_entropy': Array(0.13866265, dtype=float32)}


  9%|▉         | 92169/1000000 [4:13:21<25:02:14, 10.07it/s]

{'loss': Array(0.14810836, dtype=float32), 'loss_cross_entropy': Array(0.13887441, dtype=float32)}


  9%|▉         | 92179/1000000 [4:13:22<23:32:29, 10.71it/s]

{'loss': Array(0.1567647, dtype=float32), 'loss_cross_entropy': Array(0.1483655, dtype=float32)}


  9%|▉         | 92189/1000000 [4:13:24<29:38:09,  8.51it/s]

{'loss': Array(0.15295154, dtype=float32), 'loss_cross_entropy': Array(0.14418346, dtype=float32)}


  9%|▉         | 92199/1000000 [4:13:25<25:04:07, 10.06it/s]

{'loss': Array(0.15567435, dtype=float32), 'loss_cross_entropy': Array(0.14646135, dtype=float32)}


  9%|▉         | 92209/1000000 [4:13:27<23:55:30, 10.54it/s]

{'loss': Array(0.13212165, dtype=float32), 'loss_cross_entropy': Array(0.12431552, dtype=float32)}


  9%|▉         | 92219/1000000 [4:13:28<26:32:24,  9.50it/s]

{'loss': Array(0.13438794, dtype=float32), 'loss_cross_entropy': Array(0.12652688, dtype=float32)}


  9%|▉         | 92229/1000000 [4:13:30<23:58:46, 10.52it/s]

{'loss': Array(0.15290691, dtype=float32), 'loss_cross_entropy': Array(0.144595, dtype=float32)}


  9%|▉         | 92239/1000000 [4:13:31<30:41:40,  8.21it/s]

{'loss': Array(0.14503185, dtype=float32), 'loss_cross_entropy': Array(0.13634004, dtype=float32)}


  9%|▉         | 92249/1000000 [4:13:33<24:24:43, 10.33it/s]

{'loss': Array(0.14940059, dtype=float32), 'loss_cross_entropy': Array(0.1410812, dtype=float32)}


  9%|▉         | 92259/1000000 [4:13:35<37:29:23,  6.73it/s]

{'loss': Array(0.1378121, dtype=float32), 'loss_cross_entropy': Array(0.12933062, dtype=float32)}


  9%|▉         | 92269/1000000 [4:13:36<25:56:48,  9.72it/s]

{'loss': Array(0.14816904, dtype=float32), 'loss_cross_entropy': Array(0.13935529, dtype=float32)}


  9%|▉         | 92279/1000000 [4:13:38<37:08:37,  6.79it/s]

{'loss': Array(0.14528768, dtype=float32), 'loss_cross_entropy': Array(0.1366576, dtype=float32)}


  9%|▉         | 92289/1000000 [4:13:39<26:00:29,  9.69it/s]

{'loss': Array(0.12910627, dtype=float32), 'loss_cross_entropy': Array(0.12119608, dtype=float32)}


  9%|▉         | 92299/1000000 [4:13:40<23:41:05, 10.65it/s]

{'loss': Array(0.15063919, dtype=float32), 'loss_cross_entropy': Array(0.1422981, dtype=float32)}


  9%|▉         | 92309/1000000 [4:13:42<32:47:18,  7.69it/s]

{'loss': Array(0.13451062, dtype=float32), 'loss_cross_entropy': Array(0.12675388, dtype=float32)}


  9%|▉         | 92319/1000000 [4:13:43<25:15:45,  9.98it/s]

{'loss': Array(0.1472828, dtype=float32), 'loss_cross_entropy': Array(0.13914183, dtype=float32)}


  9%|▉         | 92329/1000000 [4:13:45<37:10:09,  6.78it/s]

{'loss': Array(0.14426385, dtype=float32), 'loss_cross_entropy': Array(0.13603696, dtype=float32)}


  9%|▉         | 92339/1000000 [4:13:46<25:51:55,  9.75it/s]

{'loss': Array(0.1348692, dtype=float32), 'loss_cross_entropy': Array(0.12749436, dtype=float32)}


  9%|▉         | 92349/1000000 [4:13:48<24:07:03, 10.45it/s]

{'loss': Array(0.16890684, dtype=float32), 'loss_cross_entropy': Array(0.16008863, dtype=float32)}


  9%|▉         | 92359/1000000 [4:13:50<26:35:22,  9.48it/s]

{'loss': Array(0.15450783, dtype=float32), 'loss_cross_entropy': Array(0.14550348, dtype=float32)}


  9%|▉         | 92369/1000000 [4:13:51<23:26:27, 10.76it/s]

{'loss': Array(0.15918525, dtype=float32), 'loss_cross_entropy': Array(0.14978425, dtype=float32)}


  9%|▉         | 92379/1000000 [4:13:53<26:45:35,  9.42it/s]

{'loss': Array(0.13489753, dtype=float32), 'loss_cross_entropy': Array(0.12682468, dtype=float32)}


  9%|▉         | 92389/1000000 [4:13:54<23:46:05, 10.61it/s]

{'loss': Array(0.15105091, dtype=float32), 'loss_cross_entropy': Array(0.1426317, dtype=float32)}


  9%|▉         | 92399/1000000 [4:13:56<36:53:44,  6.83it/s]

{'loss': Array(0.14190479, dtype=float32), 'loss_cross_entropy': Array(0.13370366, dtype=float32)}


  9%|▉         | 92409/1000000 [4:13:57<25:13:02, 10.00it/s]

{'loss': Array(0.16648458, dtype=float32), 'loss_cross_entropy': Array(0.15712136, dtype=float32)}


  9%|▉         | 92419/1000000 [4:13:58<24:30:30, 10.29it/s]

{'loss': Array(0.16126865, dtype=float32), 'loss_cross_entropy': Array(0.15241249, dtype=float32)}


  9%|▉         | 92429/1000000 [4:14:00<30:45:30,  8.20it/s]

{'loss': Array(0.1498851, dtype=float32), 'loss_cross_entropy': Array(0.14137825, dtype=float32)}


  9%|▉         | 92439/1000000 [4:14:02<24:53:19, 10.13it/s]

{'loss': Array(0.15193935, dtype=float32), 'loss_cross_entropy': Array(0.14398502, dtype=float32)}


  9%|▉         | 92449/1000000 [4:14:03<38:51:41,  6.49it/s]

{'loss': Array(0.1513983, dtype=float32), 'loss_cross_entropy': Array(0.1430343, dtype=float32)}


  9%|▉         | 92459/1000000 [4:14:05<27:00:58,  9.33it/s]

{'loss': Array(0.13667516, dtype=float32), 'loss_cross_entropy': Array(0.12879659, dtype=float32)}


  9%|▉         | 92469/1000000 [4:14:06<25:07:01, 10.04it/s]

{'loss': Array(0.12706779, dtype=float32), 'loss_cross_entropy': Array(0.11960306, dtype=float32)}


  9%|▉         | 92479/1000000 [4:14:08<29:06:29,  8.66it/s]

{'loss': Array(0.15291022, dtype=float32), 'loss_cross_entropy': Array(0.1443911, dtype=float32)}


  9%|▉         | 92489/1000000 [4:14:09<25:48:47,  9.77it/s]

{'loss': Array(0.14445591, dtype=float32), 'loss_cross_entropy': Array(0.13671963, dtype=float32)}


  9%|▉         | 92499/1000000 [4:14:11<33:49:41,  7.45it/s]

{'loss': Array(0.13936986, dtype=float32), 'loss_cross_entropy': Array(0.1313936, dtype=float32)}


  9%|▉         | 92509/1000000 [4:14:19<84:16:19,  2.99it/s] 

{'loss': Array(0.17050342, dtype=float32), 'loss_cross_entropy': Array(0.16070664, dtype=float32)}


  9%|▉         | 92519/1000000 [4:14:21<43:08:01,  5.84it/s]

{'loss': Array(0.1487495, dtype=float32), 'loss_cross_entropy': Array(0.14028324, dtype=float32)}


  9%|▉         | 92529/1000000 [4:14:22<26:15:00,  9.60it/s]

{'loss': Array(0.12968867, dtype=float32), 'loss_cross_entropy': Array(0.12210389, dtype=float32)}


  9%|▉         | 92539/1000000 [4:14:23<24:11:24, 10.42it/s]

{'loss': Array(0.1677029, dtype=float32), 'loss_cross_entropy': Array(0.15790756, dtype=float32)}


  9%|▉         | 92549/1000000 [4:14:25<30:42:35,  8.21it/s]

{'loss': Array(0.15666525, dtype=float32), 'loss_cross_entropy': Array(0.14774801, dtype=float32)}


  9%|▉         | 92559/1000000 [4:14:27<24:28:55, 10.30it/s]

{'loss': Array(0.15365045, dtype=float32), 'loss_cross_entropy': Array(0.14511578, dtype=float32)}


  9%|▉         | 92569/1000000 [4:14:28<23:14:14, 10.85it/s]

{'loss': Array(0.15395945, dtype=float32), 'loss_cross_entropy': Array(0.1447161, dtype=float32)}


  9%|▉         | 92579/1000000 [4:14:30<27:02:32,  9.32it/s]

{'loss': Array(0.1388539, dtype=float32), 'loss_cross_entropy': Array(0.13068609, dtype=float32)}


  9%|▉         | 92589/1000000 [4:14:31<23:48:50, 10.58it/s]

{'loss': Array(0.1569462, dtype=float32), 'loss_cross_entropy': Array(0.14781328, dtype=float32)}


  9%|▉         | 92599/1000000 [4:14:33<29:46:46,  8.46it/s]

{'loss': Array(0.12226365, dtype=float32), 'loss_cross_entropy': Array(0.11465517, dtype=float32)}


  9%|▉         | 92609/1000000 [4:14:34<24:24:00, 10.33it/s]

{'loss': Array(0.13492025, dtype=float32), 'loss_cross_entropy': Array(0.1272995, dtype=float32)}


  9%|▉         | 92619/1000000 [4:14:36<33:36:15,  7.50it/s]

{'loss': Array(0.14774632, dtype=float32), 'loss_cross_entropy': Array(0.13942164, dtype=float32)}


  9%|▉         | 92629/1000000 [4:14:37<25:09:58, 10.02it/s]

{'loss': Array(0.15174936, dtype=float32), 'loss_cross_entropy': Array(0.14256571, dtype=float32)}


  9%|▉         | 92639/1000000 [4:14:39<24:01:41, 10.49it/s]

{'loss': Array(0.1683062, dtype=float32), 'loss_cross_entropy': Array(0.15869339, dtype=float32)}


  9%|▉         | 92649/1000000 [4:14:40<29:52:20,  8.44it/s]

{'loss': Array(0.13641518, dtype=float32), 'loss_cross_entropy': Array(0.12819996, dtype=float32)}


  9%|▉         | 92659/1000000 [4:14:42<25:09:33, 10.02it/s]

{'loss': Array(0.1678652, dtype=float32), 'loss_cross_entropy': Array(0.15845767, dtype=float32)}


  9%|▉         | 92669/1000000 [4:14:43<23:20:10, 10.80it/s]

{'loss': Array(0.14100823, dtype=float32), 'loss_cross_entropy': Array(0.1332904, dtype=float32)}


  9%|▉         | 92679/1000000 [4:14:45<26:10:29,  9.63it/s]

{'loss': Array(0.13967092, dtype=float32), 'loss_cross_entropy': Array(0.13115668, dtype=float32)}


  9%|▉         | 92689/1000000 [4:14:46<23:49:16, 10.58it/s]

{'loss': Array(0.14217809, dtype=float32), 'loss_cross_entropy': Array(0.13386476, dtype=float32)}


  9%|▉         | 92699/1000000 [4:14:48<29:43:43,  8.48it/s]

{'loss': Array(0.1587351, dtype=float32), 'loss_cross_entropy': Array(0.15007825, dtype=float32)}


  9%|▉         | 92709/1000000 [4:14:49<24:17:55, 10.37it/s]

{'loss': Array(0.15906084, dtype=float32), 'loss_cross_entropy': Array(0.14980936, dtype=float32)}


  9%|▉         | 92719/1000000 [4:14:51<36:58:58,  6.81it/s]

{'loss': Array(0.15782595, dtype=float32), 'loss_cross_entropy': Array(0.14911237, dtype=float32)}


  9%|▉         | 92729/1000000 [4:14:52<25:57:48,  9.71it/s]

{'loss': Array(0.15513141, dtype=float32), 'loss_cross_entropy': Array(0.14578344, dtype=float32)}


  9%|▉         | 92739/1000000 [4:14:53<24:05:42, 10.46it/s]

{'loss': Array(0.15236475, dtype=float32), 'loss_cross_entropy': Array(0.14375107, dtype=float32)}


  9%|▉         | 92749/1000000 [4:14:55<32:47:01,  7.69it/s]

{'loss': Array(0.15248589, dtype=float32), 'loss_cross_entropy': Array(0.14403893, dtype=float32)}


  9%|▉         | 92759/1000000 [4:14:57<24:32:23, 10.27it/s]

{'loss': Array(0.14126475, dtype=float32), 'loss_cross_entropy': Array(0.1334277, dtype=float32)}


  9%|▉         | 92769/1000000 [4:14:58<23:02:13, 10.94it/s]

{'loss': Array(0.14655556, dtype=float32), 'loss_cross_entropy': Array(0.13782154, dtype=float32)}


  9%|▉         | 92779/1000000 [4:15:00<28:27:21,  8.86it/s]

{'loss': Array(0.1560049, dtype=float32), 'loss_cross_entropy': Array(0.14763822, dtype=float32)}


  9%|▉         | 92789/1000000 [4:15:01<24:58:03, 10.09it/s]

{'loss': Array(0.12755796, dtype=float32), 'loss_cross_entropy': Array(0.11969481, dtype=float32)}


  9%|▉         | 92799/1000000 [4:15:03<30:05:01,  8.38it/s]

{'loss': Array(0.14353345, dtype=float32), 'loss_cross_entropy': Array(0.13567546, dtype=float32)}


  9%|▉         | 92809/1000000 [4:15:04<24:18:55, 10.36it/s]

{'loss': Array(0.1402437, dtype=float32), 'loss_cross_entropy': Array(0.13237749, dtype=float32)}


  9%|▉         | 92819/1000000 [4:15:05<24:14:13, 10.40it/s]

{'loss': Array(0.14248982, dtype=float32), 'loss_cross_entropy': Array(0.13479853, dtype=float32)}


  9%|▉         | 92829/1000000 [4:15:07<27:07:53,  9.29it/s]

{'loss': Array(0.1555717, dtype=float32), 'loss_cross_entropy': Array(0.14655519, dtype=float32)}


  9%|▉         | 92839/1000000 [4:15:09<24:52:57, 10.13it/s]

{'loss': Array(0.14343756, dtype=float32), 'loss_cross_entropy': Array(0.13531801, dtype=float32)}


  9%|▉         | 92849/1000000 [4:15:10<34:16:46,  7.35it/s]

{'loss': Array(0.13617508, dtype=float32), 'loss_cross_entropy': Array(0.12842993, dtype=float32)}


  9%|▉         | 92859/1000000 [4:15:12<26:22:29,  9.55it/s]

{'loss': Array(0.13986568, dtype=float32), 'loss_cross_entropy': Array(0.1314755, dtype=float32)}


  9%|▉         | 92869/1000000 [4:15:13<25:06:59, 10.03it/s]

{'loss': Array(0.15153919, dtype=float32), 'loss_cross_entropy': Array(0.14265658, dtype=float32)}


  9%|▉         | 92879/1000000 [4:15:15<28:26:32,  8.86it/s]

{'loss': Array(0.13403656, dtype=float32), 'loss_cross_entropy': Array(0.12628947, dtype=float32)}


  9%|▉         | 92889/1000000 [4:15:16<24:47:39, 10.16it/s]

{'loss': Array(0.14136069, dtype=float32), 'loss_cross_entropy': Array(0.13337263, dtype=float32)}


  9%|▉         | 92899/1000000 [4:15:18<28:21:47,  8.88it/s]

{'loss': Array(0.1393568, dtype=float32), 'loss_cross_entropy': Array(0.13104653, dtype=float32)}


  9%|▉         | 92909/1000000 [4:15:19<23:44:31, 10.61it/s]

{'loss': Array(0.15087473, dtype=float32), 'loss_cross_entropy': Array(0.14254573, dtype=float32)}


  9%|▉         | 92919/1000000 [4:15:21<36:51:36,  6.84it/s]

{'loss': Array(0.1457483, dtype=float32), 'loss_cross_entropy': Array(0.13738261, dtype=float32)}


  9%|▉         | 92929/1000000 [4:15:23<25:26:56,  9.90it/s]

{'loss': Array(0.15629739, dtype=float32), 'loss_cross_entropy': Array(0.1475917, dtype=float32)}


  9%|▉         | 92939/1000000 [4:15:24<23:54:25, 10.54it/s]

{'loss': Array(0.15042531, dtype=float32), 'loss_cross_entropy': Array(0.14186107, dtype=float32)}


  9%|▉         | 92949/1000000 [4:15:26<30:03:49,  8.38it/s]

{'loss': Array(0.14878075, dtype=float32), 'loss_cross_entropy': Array(0.14070442, dtype=float32)}


  9%|▉         | 92959/1000000 [4:15:27<24:11:19, 10.42it/s]

{'loss': Array(0.17228217, dtype=float32), 'loss_cross_entropy': Array(0.16267315, dtype=float32)}


  9%|▉         | 92969/1000000 [4:15:29<33:18:07,  7.57it/s]

{'loss': Array(0.1347488, dtype=float32), 'loss_cross_entropy': Array(0.12688386, dtype=float32)}


  9%|▉         | 92979/1000000 [4:15:30<25:02:36, 10.06it/s]

{'loss': Array(0.13203415, dtype=float32), 'loss_cross_entropy': Array(0.12414568, dtype=float32)}


  9%|▉         | 92989/1000000 [4:15:32<33:08:52,  7.60it/s]

{'loss': Array(0.13754492, dtype=float32), 'loss_cross_entropy': Array(0.12961198, dtype=float32)}


  9%|▉         | 92999/1000000 [4:15:33<24:44:19, 10.18it/s]

{'loss': Array(0.14888488, dtype=float32), 'loss_cross_entropy': Array(0.14016268, dtype=float32)}


  9%|▉         | 93009/1000000 [4:15:41<80:11:11,  3.14it/s] 

{'loss': Array(0.15489823, dtype=float32), 'loss_cross_entropy': Array(0.14619596, dtype=float32)}


  9%|▉         | 93019/1000000 [4:15:42<37:51:29,  6.65it/s]

{'loss': Array(0.15385714, dtype=float32), 'loss_cross_entropy': Array(0.14505388, dtype=float32)}


  9%|▉         | 93029/1000000 [4:15:44<25:52:48,  9.73it/s]

{'loss': Array(0.15014696, dtype=float32), 'loss_cross_entropy': Array(0.14218867, dtype=float32)}


  9%|▉         | 93039/1000000 [4:15:46<33:05:01,  7.62it/s]

{'loss': Array(0.14593543, dtype=float32), 'loss_cross_entropy': Array(0.13769852, dtype=float32)}


  9%|▉         | 93049/1000000 [4:15:47<24:48:23, 10.16it/s]

{'loss': Array(0.13647225, dtype=float32), 'loss_cross_entropy': Array(0.12891045, dtype=float32)}


  9%|▉         | 93059/1000000 [4:15:49<37:27:51,  6.72it/s]

{'loss': Array(0.13777657, dtype=float32), 'loss_cross_entropy': Array(0.1299284, dtype=float32)}


  9%|▉         | 93069/1000000 [4:15:50<26:06:52,  9.65it/s]

{'loss': Array(0.14603142, dtype=float32), 'loss_cross_entropy': Array(0.13750748, dtype=float32)}


  9%|▉         | 93079/1000000 [4:15:51<24:46:44, 10.17it/s]

{'loss': Array(0.13978305, dtype=float32), 'loss_cross_entropy': Array(0.13143575, dtype=float32)}


  9%|▉         | 93089/1000000 [4:15:53<30:08:20,  8.36it/s]

{'loss': Array(0.14104827, dtype=float32), 'loss_cross_entropy': Array(0.13247123, dtype=float32)}


  9%|▉         | 93099/1000000 [4:15:55<24:30:44, 10.28it/s]

{'loss': Array(0.1490767, dtype=float32), 'loss_cross_entropy': Array(0.14050107, dtype=float32)}


  9%|▉         | 93109/1000000 [4:15:56<32:59:40,  7.63it/s]

{'loss': Array(0.14656904, dtype=float32), 'loss_cross_entropy': Array(0.13808806, dtype=float32)}


  9%|▉         | 93119/1000000 [4:15:58<25:09:55, 10.01it/s]

{'loss': Array(0.15029582, dtype=float32), 'loss_cross_entropy': Array(0.1419534, dtype=float32)}


  9%|▉         | 93129/1000000 [4:15:59<23:45:48, 10.60it/s]

{'loss': Array(0.1337177, dtype=float32), 'loss_cross_entropy': Array(0.1257991, dtype=float32)}


  9%|▉         | 93139/1000000 [4:16:01<27:38:39,  9.11it/s]

{'loss': Array(0.17128174, dtype=float32), 'loss_cross_entropy': Array(0.16229829, dtype=float32)}


  9%|▉         | 93149/1000000 [4:16:02<24:52:28, 10.13it/s]

{'loss': Array(0.13574195, dtype=float32), 'loss_cross_entropy': Array(0.12734398, dtype=float32)}


  9%|▉         | 93159/1000000 [4:16:04<37:23:01,  6.74it/s]

{'loss': Array(0.14066377, dtype=float32), 'loss_cross_entropy': Array(0.1322213, dtype=float32)}


  9%|▉         | 93169/1000000 [4:16:05<26:12:43,  9.61it/s]

{'loss': Array(0.14359881, dtype=float32), 'loss_cross_entropy': Array(0.13531378, dtype=float32)}


  9%|▉         | 93179/1000000 [4:16:07<24:24:44, 10.32it/s]

{'loss': Array(0.15605508, dtype=float32), 'loss_cross_entropy': Array(0.14719282, dtype=float32)}


  9%|▉         | 93189/1000000 [4:16:08<31:00:35,  8.12it/s]

{'loss': Array(0.13583703, dtype=float32), 'loss_cross_entropy': Array(0.12771578, dtype=float32)}


  9%|▉         | 93199/1000000 [4:16:10<26:10:06,  9.63it/s]

{'loss': Array(0.13327748, dtype=float32), 'loss_cross_entropy': Array(0.12517706, dtype=float32)}


  9%|▉         | 93209/1000000 [4:16:12<34:48:23,  7.24it/s]

{'loss': Array(0.13992737, dtype=float32), 'loss_cross_entropy': Array(0.13136223, dtype=float32)}


  9%|▉         | 93219/1000000 [4:16:13<26:09:19,  9.63it/s]

{'loss': Array(0.14123425, dtype=float32), 'loss_cross_entropy': Array(0.13337962, dtype=float32)}


  9%|▉         | 93229/1000000 [4:16:15<33:47:30,  7.45it/s]

{'loss': Array(0.13507788, dtype=float32), 'loss_cross_entropy': Array(0.12717338, dtype=float32)}


  9%|▉         | 93239/1000000 [4:16:16<25:14:54,  9.98it/s]

{'loss': Array(0.15200672, dtype=float32), 'loss_cross_entropy': Array(0.14379592, dtype=float32)}


  9%|▉         | 93249/1000000 [4:16:18<24:22:58, 10.33it/s]

{'loss': Array(0.13771515, dtype=float32), 'loss_cross_entropy': Array(0.1292678, dtype=float32)}


  9%|▉         | 93259/1000000 [4:16:19<28:40:33,  8.78it/s]

{'loss': Array(0.14947121, dtype=float32), 'loss_cross_entropy': Array(0.14098416, dtype=float32)}


  9%|▉         | 93269/1000000 [4:16:21<24:01:56, 10.48it/s]

{'loss': Array(0.17300388, dtype=float32), 'loss_cross_entropy': Array(0.16338892, dtype=float32)}


  9%|▉         | 93279/1000000 [4:16:22<30:32:57,  8.24it/s]

{'loss': Array(0.14849487, dtype=float32), 'loss_cross_entropy': Array(0.1404346, dtype=float32)}


  9%|▉         | 93289/1000000 [4:16:24<24:30:05, 10.28it/s]

{'loss': Array(0.14417659, dtype=float32), 'loss_cross_entropy': Array(0.13628505, dtype=float32)}


  9%|▉         | 93299/1000000 [4:16:26<37:11:33,  6.77it/s]

{'loss': Array(0.12740219, dtype=float32), 'loss_cross_entropy': Array(0.12045822, dtype=float32)}


  9%|▉         | 93309/1000000 [4:16:27<25:42:40,  9.80it/s]

{'loss': Array(0.15903254, dtype=float32), 'loss_cross_entropy': Array(0.15005957, dtype=float32)}


  9%|▉         | 93319/1000000 [4:16:29<37:14:17,  6.76it/s]

{'loss': Array(0.15640108, dtype=float32), 'loss_cross_entropy': Array(0.14699082, dtype=float32)}


  9%|▉         | 93329/1000000 [4:16:30<25:47:49,  9.76it/s]

{'loss': Array(0.15715149, dtype=float32), 'loss_cross_entropy': Array(0.14818096, dtype=float32)}


  9%|▉         | 93339/1000000 [4:16:31<23:56:28, 10.52it/s]

{'loss': Array(0.14376493, dtype=float32), 'loss_cross_entropy': Array(0.1356784, dtype=float32)}


  9%|▉         | 93349/1000000 [4:16:33<32:39:34,  7.71it/s]

{'loss': Array(0.16055924, dtype=float32), 'loss_cross_entropy': Array(0.15114574, dtype=float32)}


  9%|▉         | 93359/1000000 [4:16:34<25:17:46,  9.96it/s]

{'loss': Array(0.1342955, dtype=float32), 'loss_cross_entropy': Array(0.12644346, dtype=float32)}


  9%|▉         | 93369/1000000 [4:16:36<36:56:14,  6.82it/s]

{'loss': Array(0.15434712, dtype=float32), 'loss_cross_entropy': Array(0.14579712, dtype=float32)}


  9%|▉         | 93379/1000000 [4:16:37<26:17:24,  9.58it/s]

{'loss': Array(0.17687905, dtype=float32), 'loss_cross_entropy': Array(0.16721635, dtype=float32)}


  9%|▉         | 93389/1000000 [4:16:39<24:33:22, 10.26it/s]

{'loss': Array(0.15263627, dtype=float32), 'loss_cross_entropy': Array(0.14342153, dtype=float32)}


  9%|▉         | 93399/1000000 [4:16:41<27:03:55,  9.30it/s]

{'loss': Array(0.13673554, dtype=float32), 'loss_cross_entropy': Array(0.12862802, dtype=float32)}


  9%|▉         | 93409/1000000 [4:16:42<23:28:53, 10.72it/s]

{'loss': Array(0.15598868, dtype=float32), 'loss_cross_entropy': Array(0.14754798, dtype=float32)}


  9%|▉         | 93419/1000000 [4:16:44<26:28:45,  9.51it/s]

{'loss': Array(0.1283155, dtype=float32), 'loss_cross_entropy': Array(0.12040837, dtype=float32)}


  9%|▉         | 93429/1000000 [4:16:45<23:55:50, 10.52it/s]

{'loss': Array(0.16344567, dtype=float32), 'loss_cross_entropy': Array(0.1537955, dtype=float32)}


  9%|▉         | 93439/1000000 [4:16:47<37:41:34,  6.68it/s]

{'loss': Array(0.13784987, dtype=float32), 'loss_cross_entropy': Array(0.12938511, dtype=float32)}


  9%|▉         | 93449/1000000 [4:16:48<26:00:55,  9.68it/s]

{'loss': Array(0.13380657, dtype=float32), 'loss_cross_entropy': Array(0.12592615, dtype=float32)}


  9%|▉         | 93459/1000000 [4:16:49<23:42:01, 10.62it/s]

{'loss': Array(0.14539562, dtype=float32), 'loss_cross_entropy': Array(0.13666195, dtype=float32)}


  9%|▉         | 93469/1000000 [4:16:51<30:35:10,  8.23it/s]

{'loss': Array(0.15773217, dtype=float32), 'loss_cross_entropy': Array(0.14882727, dtype=float32)}


  9%|▉         | 93479/1000000 [4:16:53<24:29:32, 10.28it/s]

{'loss': Array(0.14409806, dtype=float32), 'loss_cross_entropy': Array(0.13659017, dtype=float32)}


  9%|▉         | 93489/1000000 [4:16:54<36:45:04,  6.85it/s]

{'loss': Array(0.15186009, dtype=float32), 'loss_cross_entropy': Array(0.14327262, dtype=float32)}


  9%|▉         | 93499/1000000 [4:16:56<26:22:30,  9.55it/s]

{'loss': Array(0.1346493, dtype=float32), 'loss_cross_entropy': Array(0.12640955, dtype=float32)}


  9%|▉         | 93509/1000000 [4:17:04<83:53:59,  3.00it/s] 

{'loss': Array(0.13058156, dtype=float32), 'loss_cross_entropy': Array(0.12334951, dtype=float32)}


  9%|▉         | 93519/1000000 [4:17:05<38:31:12,  6.54it/s]

{'loss': Array(0.14753865, dtype=float32), 'loss_cross_entropy': Array(0.13894232, dtype=float32)}


  9%|▉         | 93529/1000000 [4:17:07<26:20:25,  9.56it/s]

{'loss': Array(0.13973984, dtype=float32), 'loss_cross_entropy': Array(0.13113217, dtype=float32)}


  9%|▉         | 93539/1000000 [4:17:08<31:14:33,  8.06it/s]

{'loss': Array(0.12968005, dtype=float32), 'loss_cross_entropy': Array(0.12185042, dtype=float32)}


  9%|▉         | 93549/1000000 [4:17:10<25:25:12,  9.91it/s]

{'loss': Array(0.14663176, dtype=float32), 'loss_cross_entropy': Array(0.13828646, dtype=float32)}


  9%|▉         | 93559/1000000 [4:17:12<38:29:24,  6.54it/s]

{'loss': Array(0.1361011, dtype=float32), 'loss_cross_entropy': Array(0.12809914, dtype=float32)}


  9%|▉         | 93569/1000000 [4:17:13<27:10:06,  9.27it/s]

{'loss': Array(0.14507847, dtype=float32), 'loss_cross_entropy': Array(0.13701217, dtype=float32)}


  9%|▉         | 93579/1000000 [4:17:15<25:03:16, 10.05it/s]

{'loss': Array(0.129313, dtype=float32), 'loss_cross_entropy': Array(0.1215549, dtype=float32)}


  9%|▉         | 93589/1000000 [4:17:16<26:46:15,  9.40it/s]

{'loss': Array(0.144767, dtype=float32), 'loss_cross_entropy': Array(0.13724683, dtype=float32)}


  9%|▉         | 93599/1000000 [4:17:18<23:42:28, 10.62it/s]

{'loss': Array(0.14310421, dtype=float32), 'loss_cross_entropy': Array(0.13504255, dtype=float32)}


  9%|▉         | 93609/1000000 [4:17:19<37:19:49,  6.74it/s]

{'loss': Array(0.13611938, dtype=float32), 'loss_cross_entropy': Array(0.12813945, dtype=float32)}


  9%|▉         | 93619/1000000 [4:17:21<26:13:10,  9.60it/s]

{'loss': Array(0.15124713, dtype=float32), 'loss_cross_entropy': Array(0.14243205, dtype=float32)}


  9%|▉         | 93629/1000000 [4:17:22<24:18:43, 10.36it/s]

{'loss': Array(0.14424317, dtype=float32), 'loss_cross_entropy': Array(0.13593191, dtype=float32)}


  9%|▉         | 93639/1000000 [4:17:24<28:42:34,  8.77it/s]

{'loss': Array(0.14485632, dtype=float32), 'loss_cross_entropy': Array(0.1364556, dtype=float32)}


  9%|▉         | 93649/1000000 [4:17:25<24:19:33, 10.35it/s]

{'loss': Array(0.13340352, dtype=float32), 'loss_cross_entropy': Array(0.12511377, dtype=float32)}


  9%|▉         | 93659/1000000 [4:17:27<33:07:48,  7.60it/s]

{'loss': Array(0.15061568, dtype=float32), 'loss_cross_entropy': Array(0.14211354, dtype=float32)}


  9%|▉         | 93669/1000000 [4:17:28<25:07:05, 10.02it/s]

{'loss': Array(0.15784945, dtype=float32), 'loss_cross_entropy': Array(0.1486402, dtype=float32)}


  9%|▉         | 93679/1000000 [4:17:30<37:31:43,  6.71it/s]

{'loss': Array(0.14018758, dtype=float32), 'loss_cross_entropy': Array(0.13211818, dtype=float32)}


  9%|▉         | 93689/1000000 [4:17:31<25:26:45,  9.89it/s]

{'loss': Array(0.14433709, dtype=float32), 'loss_cross_entropy': Array(0.13675249, dtype=float32)}


  9%|▉         | 93699/1000000 [4:17:33<23:30:18, 10.71it/s]

{'loss': Array(0.15491755, dtype=float32), 'loss_cross_entropy': Array(0.14579387, dtype=float32)}


  9%|▉         | 93709/1000000 [4:17:34<30:05:14,  8.37it/s]

{'loss': Array(0.1541528, dtype=float32), 'loss_cross_entropy': Array(0.14544673, dtype=float32)}


  9%|▉         | 93719/1000000 [4:17:36<24:05:34, 10.45it/s]

{'loss': Array(0.14381252, dtype=float32), 'loss_cross_entropy': Array(0.13607888, dtype=float32)}


  9%|▉         | 93729/1000000 [4:17:37<33:29:52,  7.52it/s]

{'loss': Array(0.14040829, dtype=float32), 'loss_cross_entropy': Array(0.13237606, dtype=float32)}


  9%|▉         | 93739/1000000 [4:17:39<24:58:50, 10.08it/s]

{'loss': Array(0.13966735, dtype=float32), 'loss_cross_entropy': Array(0.13144915, dtype=float32)}


  9%|▉         | 93749/1000000 [4:17:40<23:43:07, 10.61it/s]

{'loss': Array(0.14236072, dtype=float32), 'loss_cross_entropy': Array(0.13391314, dtype=float32)}


  9%|▉         | 93759/1000000 [4:17:42<26:34:55,  9.47it/s]

{'loss': Array(0.153055, dtype=float32), 'loss_cross_entropy': Array(0.1447569, dtype=float32)}


  9%|▉         | 93769/1000000 [4:17:43<23:32:54, 10.69it/s]

{'loss': Array(0.12863488, dtype=float32), 'loss_cross_entropy': Array(0.12096643, dtype=float32)}


  9%|▉         | 93779/1000000 [4:17:45<36:49:55,  6.83it/s]

{'loss': Array(0.14108239, dtype=float32), 'loss_cross_entropy': Array(0.13334553, dtype=float32)}


  9%|▉         | 93789/1000000 [4:17:46<26:06:22,  9.64it/s]

{'loss': Array(0.11334091, dtype=float32), 'loss_cross_entropy': Array(0.10598554, dtype=float32)}


  9%|▉         | 93799/1000000 [4:17:48<24:11:57, 10.40it/s]

{'loss': Array(0.15908754, dtype=float32), 'loss_cross_entropy': Array(0.15056431, dtype=float32)}


  9%|▉         | 93809/1000000 [4:17:49<30:37:25,  8.22it/s]

{'loss': Array(0.15383478, dtype=float32), 'loss_cross_entropy': Array(0.14512084, dtype=float32)}


  9%|▉         | 93819/1000000 [4:17:51<24:34:57, 10.24it/s]

{'loss': Array(0.12843953, dtype=float32), 'loss_cross_entropy': Array(0.12151233, dtype=float32)}


  9%|▉         | 93829/1000000 [4:17:53<32:53:05,  7.65it/s]

{'loss': Array(0.14952023, dtype=float32), 'loss_cross_entropy': Array(0.14134562, dtype=float32)}


  9%|▉         | 93839/1000000 [4:17:54<24:27:13, 10.29it/s]

{'loss': Array(0.15269232, dtype=float32), 'loss_cross_entropy': Array(0.14427446, dtype=float32)}


  9%|▉         | 93849/1000000 [4:17:56<33:04:31,  7.61it/s]

{'loss': Array(0.13544981, dtype=float32), 'loss_cross_entropy': Array(0.1278234, dtype=float32)}


  9%|▉         | 93859/1000000 [4:17:57<25:11:45,  9.99it/s]

{'loss': Array(0.13641772, dtype=float32), 'loss_cross_entropy': Array(0.12838946, dtype=float32)}


  9%|▉         | 93869/1000000 [4:17:58<24:03:23, 10.46it/s]

{'loss': Array(0.1535925, dtype=float32), 'loss_cross_entropy': Array(0.14458717, dtype=float32)}


  9%|▉         | 93879/1000000 [4:18:00<28:58:12,  8.69it/s]

{'loss': Array(0.13911955, dtype=float32), 'loss_cross_entropy': Array(0.13103639, dtype=float32)}


  9%|▉         | 93889/1000000 [4:18:02<24:47:57, 10.15it/s]

{'loss': Array(0.1493387, dtype=float32), 'loss_cross_entropy': Array(0.14032646, dtype=float32)}


  9%|▉         | 93899/1000000 [4:18:03<31:18:41,  8.04it/s]

{'loss': Array(0.15401557, dtype=float32), 'loss_cross_entropy': Array(0.14562091, dtype=float32)}


  9%|▉         | 93909/1000000 [4:18:05<25:43:46,  9.78it/s]

{'loss': Array(0.1568406, dtype=float32), 'loss_cross_entropy': Array(0.14761427, dtype=float32)}


  9%|▉         | 93919/1000000 [4:18:07<39:24:31,  6.39it/s]

{'loss': Array(0.16344896, dtype=float32), 'loss_cross_entropy': Array(0.15399256, dtype=float32)}


  9%|▉         | 93929/1000000 [4:18:08<27:14:16,  9.24it/s]

{'loss': Array(0.16541478, dtype=float32), 'loss_cross_entropy': Array(0.15648352, dtype=float32)}


  9%|▉         | 93939/1000000 [4:18:10<39:09:04,  6.43it/s]

{'loss': Array(0.14813603, dtype=float32), 'loss_cross_entropy': Array(0.14003268, dtype=float32)}


  9%|▉         | 93949/1000000 [4:18:11<27:04:42,  9.29it/s]

{'loss': Array(0.14224458, dtype=float32), 'loss_cross_entropy': Array(0.13418601, dtype=float32)}


  9%|▉         | 93959/1000000 [4:18:13<25:10:00, 10.00it/s]

{'loss': Array(0.14955364, dtype=float32), 'loss_cross_entropy': Array(0.14100991, dtype=float32)}


  9%|▉         | 93969/1000000 [4:18:14<34:35:57,  7.27it/s]

{'loss': Array(0.1381668, dtype=float32), 'loss_cross_entropy': Array(0.13026185, dtype=float32)}


  9%|▉         | 93979/1000000 [4:18:16<25:59:42,  9.68it/s]

{'loss': Array(0.16589199, dtype=float32), 'loss_cross_entropy': Array(0.1574123, dtype=float32)}


  9%|▉         | 93989/1000000 [4:18:18<38:50:02,  6.48it/s]

{'loss': Array(0.1340245, dtype=float32), 'loss_cross_entropy': Array(0.12650274, dtype=float32)}


  9%|▉         | 93999/1000000 [4:18:19<26:31:18,  9.49it/s]

{'loss': Array(0.16438422, dtype=float32), 'loss_cross_entropy': Array(0.15466058, dtype=float32)}


  9%|▉         | 94009/1000000 [4:18:27<85:26:20,  2.95it/s] 

{'loss': Array(0.12961863, dtype=float32), 'loss_cross_entropy': Array(0.12249418, dtype=float32)}


  9%|▉         | 94019/1000000 [4:18:29<36:48:52,  6.84it/s] 

{'loss': Array(0.16050093, dtype=float32), 'loss_cross_entropy': Array(0.15212958, dtype=float32)}


  9%|▉         | 94029/1000000 [4:18:30<26:05:15,  9.65it/s]

{'loss': Array(0.13809629, dtype=float32), 'loss_cross_entropy': Array(0.1306053, dtype=float32)}


  9%|▉         | 94039/1000000 [4:18:32<27:38:38,  9.10it/s]

{'loss': Array(0.16718714, dtype=float32), 'loss_cross_entropy': Array(0.15814836, dtype=float32)}


  9%|▉         | 94049/1000000 [4:18:33<24:29:53, 10.27it/s]

{'loss': Array(0.13941276, dtype=float32), 'loss_cross_entropy': Array(0.13088779, dtype=float32)}


  9%|▉         | 94059/1000000 [4:18:35<37:12:21,  6.76it/s]

{'loss': Array(0.14322487, dtype=float32), 'loss_cross_entropy': Array(0.13495165, dtype=float32)}


  9%|▉         | 94069/1000000 [4:18:36<26:26:29,  9.52it/s]

{'loss': Array(0.13598904, dtype=float32), 'loss_cross_entropy': Array(0.12824567, dtype=float32)}


  9%|▉         | 94079/1000000 [4:18:38<24:32:06, 10.26it/s]

{'loss': Array(0.14261399, dtype=float32), 'loss_cross_entropy': Array(0.13422622, dtype=float32)}


  9%|▉         | 94089/1000000 [4:18:40<30:23:55,  8.28it/s]

{'loss': Array(0.15045741, dtype=float32), 'loss_cross_entropy': Array(0.14223479, dtype=float32)}


  9%|▉         | 94099/1000000 [4:18:41<24:22:20, 10.32it/s]

{'loss': Array(0.15259624, dtype=float32), 'loss_cross_entropy': Array(0.14444937, dtype=float32)}


  9%|▉         | 94109/1000000 [4:18:43<37:12:42,  6.76it/s]

{'loss': Array(0.13881476, dtype=float32), 'loss_cross_entropy': Array(0.1308113, dtype=float32)}


  9%|▉         | 94119/1000000 [4:18:44<25:38:44,  9.81it/s]

{'loss': Array(0.14212331, dtype=float32), 'loss_cross_entropy': Array(0.13411546, dtype=float32)}


  9%|▉         | 94129/1000000 [4:18:45<24:14:20, 10.38it/s]

{'loss': Array(0.14204542, dtype=float32), 'loss_cross_entropy': Array(0.13425416, dtype=float32)}


  9%|▉         | 94139/1000000 [4:18:47<28:03:19,  8.97it/s]

{'loss': Array(0.13149823, dtype=float32), 'loss_cross_entropy': Array(0.12334787, dtype=float32)}


  9%|▉         | 94149/1000000 [4:18:48<24:20:19, 10.34it/s]

{'loss': Array(0.14040281, dtype=float32), 'loss_cross_entropy': Array(0.13222484, dtype=float32)}


  9%|▉         | 94159/1000000 [4:18:50<33:11:39,  7.58it/s]

{'loss': Array(0.14967825, dtype=float32), 'loss_cross_entropy': Array(0.14153634, dtype=float32)}


  9%|▉         | 94169/1000000 [4:18:52<25:12:38,  9.98it/s]

{'loss': Array(0.13681786, dtype=float32), 'loss_cross_entropy': Array(0.12893839, dtype=float32)}


  9%|▉         | 94179/1000000 [4:18:53<23:32:20, 10.69it/s]

{'loss': Array(0.13436747, dtype=float32), 'loss_cross_entropy': Array(0.1262718, dtype=float32)}


  9%|▉         | 94189/1000000 [4:18:55<30:20:25,  8.29it/s]

{'loss': Array(0.14891918, dtype=float32), 'loss_cross_entropy': Array(0.14053749, dtype=float32)}


  9%|▉         | 94199/1000000 [4:18:56<24:48:25, 10.14it/s]

{'loss': Array(0.13236117, dtype=float32), 'loss_cross_entropy': Array(0.12414191, dtype=float32)}


  9%|▉         | 94209/1000000 [4:18:57<23:54:16, 10.53it/s]

{'loss': Array(0.1568034, dtype=float32), 'loss_cross_entropy': Array(0.14759998, dtype=float32)}


  9%|▉         | 94219/1000000 [4:18:59<26:21:13,  9.55it/s]

{'loss': Array(0.13895729, dtype=float32), 'loss_cross_entropy': Array(0.13106616, dtype=float32)}


  9%|▉         | 94229/1000000 [4:19:00<24:19:22, 10.34it/s]

{'loss': Array(0.1598432, dtype=float32), 'loss_cross_entropy': Array(0.15058695, dtype=float32)}


  9%|▉         | 94239/1000000 [4:19:02<30:08:22,  8.35it/s]

{'loss': Array(0.14050989, dtype=float32), 'loss_cross_entropy': Array(0.131936, dtype=float32)}


  9%|▉         | 94249/1000000 [4:19:04<25:11:08,  9.99it/s]

{'loss': Array(0.14646815, dtype=float32), 'loss_cross_entropy': Array(0.13856152, dtype=float32)}


  9%|▉         | 94259/1000000 [4:19:05<36:52:14,  6.82it/s]

{'loss': Array(0.15183616, dtype=float32), 'loss_cross_entropy': Array(0.14300497, dtype=float32)}


  9%|▉         | 94269/1000000 [4:19:07<26:00:35,  9.67it/s]

{'loss': Array(0.15952086, dtype=float32), 'loss_cross_entropy': Array(0.15056096, dtype=float32)}


  9%|▉         | 94279/1000000 [4:19:08<38:39:18,  6.51it/s]

{'loss': Array(0.13656257, dtype=float32), 'loss_cross_entropy': Array(0.12820393, dtype=float32)}


  9%|▉         | 94289/1000000 [4:19:10<26:13:32,  9.59it/s]

{'loss': Array(0.15805064, dtype=float32), 'loss_cross_entropy': Array(0.1490831, dtype=float32)}


  9%|▉         | 94299/1000000 [4:19:11<23:58:48, 10.49it/s]

{'loss': Array(0.15787663, dtype=float32), 'loss_cross_entropy': Array(0.14891493, dtype=float32)}


  9%|▉         | 94309/1000000 [4:19:13<32:42:23,  7.69it/s]

{'loss': Array(0.124013, dtype=float32), 'loss_cross_entropy': Array(0.11605235, dtype=float32)}


  9%|▉         | 94319/1000000 [4:19:14<24:52:20, 10.11it/s]

{'loss': Array(0.15184253, dtype=float32), 'loss_cross_entropy': Array(0.14342599, dtype=float32)}


  9%|▉         | 94329/1000000 [4:19:16<37:22:31,  6.73it/s]

{'loss': Array(0.14195968, dtype=float32), 'loss_cross_entropy': Array(0.13395135, dtype=float32)}


  9%|▉         | 94339/1000000 [4:19:17<26:00:19,  9.67it/s]

{'loss': Array(0.15547758, dtype=float32), 'loss_cross_entropy': Array(0.14710127, dtype=float32)}


  9%|▉         | 94349/1000000 [4:19:19<24:13:47, 10.38it/s]

{'loss': Array(0.13766517, dtype=float32), 'loss_cross_entropy': Array(0.1295598, dtype=float32)}


  9%|▉         | 94359/1000000 [4:19:20<26:23:34,  9.53it/s]

{'loss': Array(0.1537539, dtype=float32), 'loss_cross_entropy': Array(0.14508526, dtype=float32)}


  9%|▉         | 94369/1000000 [4:19:22<23:58:10, 10.50it/s]

{'loss': Array(0.14177483, dtype=float32), 'loss_cross_entropy': Array(0.13347256, dtype=float32)}


  9%|▉         | 94379/1000000 [4:19:23<26:35:57,  9.46it/s]

{'loss': Array(0.15393005, dtype=float32), 'loss_cross_entropy': Array(0.14539634, dtype=float32)}


  9%|▉         | 94389/1000000 [4:19:25<24:31:40, 10.26it/s]

{'loss': Array(0.14468415, dtype=float32), 'loss_cross_entropy': Array(0.1369192, dtype=float32)}


  9%|▉         | 94399/1000000 [4:19:26<37:01:26,  6.79it/s]

{'loss': Array(0.14812419, dtype=float32), 'loss_cross_entropy': Array(0.13997887, dtype=float32)}


  9%|▉         | 94409/1000000 [4:19:28<25:54:03,  9.71it/s]

{'loss': Array(0.14905894, dtype=float32), 'loss_cross_entropy': Array(0.14067984, dtype=float32)}


  9%|▉         | 94419/1000000 [4:19:29<23:59:09, 10.49it/s]

{'loss': Array(0.13335536, dtype=float32), 'loss_cross_entropy': Array(0.12584311, dtype=float32)}


  9%|▉         | 94429/1000000 [4:19:31<30:18:14,  8.30it/s]

{'loss': Array(0.1437936, dtype=float32), 'loss_cross_entropy': Array(0.13524036, dtype=float32)}


  9%|▉         | 94439/1000000 [4:19:32<24:21:53, 10.32it/s]

{'loss': Array(0.1511415, dtype=float32), 'loss_cross_entropy': Array(0.14251237, dtype=float32)}


  9%|▉         | 94449/1000000 [4:19:34<37:01:55,  6.79it/s]

{'loss': Array(0.14829074, dtype=float32), 'loss_cross_entropy': Array(0.14007853, dtype=float32)}


  9%|▉         | 94459/1000000 [4:19:35<26:06:50,  9.63it/s]

{'loss': Array(0.13482176, dtype=float32), 'loss_cross_entropy': Array(0.12693283, dtype=float32)}


  9%|▉         | 94469/1000000 [4:19:37<24:08:53, 10.42it/s]

{'loss': Array(0.14661434, dtype=float32), 'loss_cross_entropy': Array(0.1380418, dtype=float32)}


  9%|▉         | 94479/1000000 [4:19:38<28:24:57,  8.85it/s]

{'loss': Array(0.15256512, dtype=float32), 'loss_cross_entropy': Array(0.14399977, dtype=float32)}


  9%|▉         | 94489/1000000 [4:19:40<24:41:39, 10.19it/s]

{'loss': Array(0.13729349, dtype=float32), 'loss_cross_entropy': Array(0.12957388, dtype=float32)}


  9%|▉         | 94499/1000000 [4:19:42<32:57:49,  7.63it/s]

{'loss': Array(0.14496557, dtype=float32), 'loss_cross_entropy': Array(0.13624151, dtype=float32)}


  9%|▉         | 94509/1000000 [4:19:49<82:59:23,  3.03it/s] 

{'loss': Array(0.13916557, dtype=float32), 'loss_cross_entropy': Array(0.13068376, dtype=float32)}


  9%|▉         | 94519/1000000 [4:19:51<43:25:58,  5.79it/s]

{'loss': Array(0.12716804, dtype=float32), 'loss_cross_entropy': Array(0.11944069, dtype=float32)}


  9%|▉         | 94529/1000000 [4:19:52<27:24:21,  9.18it/s]

{'loss': Array(0.17032053, dtype=float32), 'loss_cross_entropy': Array(0.1608038, dtype=float32)}


  9%|▉         | 94539/1000000 [4:19:54<24:52:31, 10.11it/s]

{'loss': Array(0.14519288, dtype=float32), 'loss_cross_entropy': Array(0.13670526, dtype=float32)}


  9%|▉         | 94549/1000000 [4:19:56<30:23:30,  8.28it/s]

{'loss': Array(0.16030942, dtype=float32), 'loss_cross_entropy': Array(0.15121238, dtype=float32)}


  9%|▉         | 94559/1000000 [4:19:57<25:24:44,  9.90it/s]

{'loss': Array(0.14892085, dtype=float32), 'loss_cross_entropy': Array(0.14066507, dtype=float32)}


  9%|▉         | 94569/1000000 [4:19:58<24:51:58, 10.11it/s]

{'loss': Array(0.14105992, dtype=float32), 'loss_cross_entropy': Array(0.13277619, dtype=float32)}


  9%|▉         | 94579/1000000 [4:20:00<27:17:12,  9.22it/s]

{'loss': Array(0.14622776, dtype=float32), 'loss_cross_entropy': Array(0.13761508, dtype=float32)}


  9%|▉         | 94589/1000000 [4:20:02<24:21:25, 10.33it/s]

{'loss': Array(0.15826103, dtype=float32), 'loss_cross_entropy': Array(0.14952868, dtype=float32)}


  9%|▉         | 94599/1000000 [4:20:03<30:32:12,  8.24it/s]

{'loss': Array(0.15629609, dtype=float32), 'loss_cross_entropy': Array(0.14683682, dtype=float32)}


  9%|▉         | 94609/1000000 [4:20:05<24:03:36, 10.45it/s]

{'loss': Array(0.15195982, dtype=float32), 'loss_cross_entropy': Array(0.14342554, dtype=float32)}


  9%|▉         | 94619/1000000 [4:20:06<33:10:58,  7.58it/s]

{'loss': Array(0.12749024, dtype=float32), 'loss_cross_entropy': Array(0.11993309, dtype=float32)}


  9%|▉         | 94629/1000000 [4:20:08<25:17:29,  9.94it/s]

{'loss': Array(0.16193585, dtype=float32), 'loss_cross_entropy': Array(0.15292643, dtype=float32)}


  9%|▉         | 94639/1000000 [4:20:09<23:26:23, 10.73it/s]

{'loss': Array(0.15609865, dtype=float32), 'loss_cross_entropy': Array(0.14774798, dtype=float32)}


  9%|▉         | 94649/1000000 [4:20:11<29:25:44,  8.55it/s]

{'loss': Array(0.15388986, dtype=float32), 'loss_cross_entropy': Array(0.14518724, dtype=float32)}


  9%|▉         | 94659/1000000 [4:20:12<24:16:05, 10.36it/s]

{'loss': Array(0.1485086, dtype=float32), 'loss_cross_entropy': Array(0.14019544, dtype=float32)}


  9%|▉         | 94669/1000000 [4:20:13<23:20:44, 10.77it/s]

{'loss': Array(0.13546817, dtype=float32), 'loss_cross_entropy': Array(0.12764612, dtype=float32)}


  9%|▉         | 94679/1000000 [4:20:15<26:12:51,  9.59it/s]

{'loss': Array(0.15118937, dtype=float32), 'loss_cross_entropy': Array(0.14319569, dtype=float32)}


  9%|▉         | 94689/1000000 [4:20:17<23:38:14, 10.64it/s]

{'loss': Array(0.13305412, dtype=float32), 'loss_cross_entropy': Array(0.12465656, dtype=float32)}


  9%|▉         | 94699/1000000 [4:20:18<29:55:17,  8.40it/s]

{'loss': Array(0.15219502, dtype=float32), 'loss_cross_entropy': Array(0.1433826, dtype=float32)}


  9%|▉         | 94709/1000000 [4:20:20<24:04:25, 10.45it/s]

{'loss': Array(0.15885095, dtype=float32), 'loss_cross_entropy': Array(0.15006879, dtype=float32)}


  9%|▉         | 94719/1000000 [4:20:21<37:44:17,  6.66it/s]

{'loss': Array(0.13253744, dtype=float32), 'loss_cross_entropy': Array(0.1246167, dtype=float32)}


  9%|▉         | 94729/1000000 [4:20:23<25:48:35,  9.74it/s]

{'loss': Array(0.14045678, dtype=float32), 'loss_cross_entropy': Array(0.13276744, dtype=float32)}


  9%|▉         | 94739/1000000 [4:20:24<23:39:26, 10.63it/s]

{'loss': Array(0.14454971, dtype=float32), 'loss_cross_entropy': Array(0.13598631, dtype=float32)}


  9%|▉         | 94749/1000000 [4:20:26<32:39:27,  7.70it/s]

{'loss': Array(0.15560187, dtype=float32), 'loss_cross_entropy': Array(0.1468509, dtype=float32)}


  9%|▉         | 94759/1000000 [4:20:27<24:32:32, 10.25it/s]

{'loss': Array(0.15763713, dtype=float32), 'loss_cross_entropy': Array(0.14899804, dtype=float32)}


  9%|▉         | 94769/1000000 [4:20:28<24:08:28, 10.42it/s]

{'loss': Array(0.16144283, dtype=float32), 'loss_cross_entropy': Array(0.15248273, dtype=float32)}


  9%|▉         | 94779/1000000 [4:20:30<27:36:00,  9.11it/s]

{'loss': Array(0.15817527, dtype=float32), 'loss_cross_entropy': Array(0.1496892, dtype=float32)}


  9%|▉         | 94789/1000000 [4:20:31<24:17:56, 10.35it/s]

{'loss': Array(0.14250799, dtype=float32), 'loss_cross_entropy': Array(0.13429365, dtype=float32)}


  9%|▉         | 94799/1000000 [4:20:33<30:06:38,  8.35it/s]

{'loss': Array(0.14922711, dtype=float32), 'loss_cross_entropy': Array(0.14026506, dtype=float32)}


  9%|▉         | 94809/1000000 [4:20:35<23:59:39, 10.48it/s]

{'loss': Array(0.13370429, dtype=float32), 'loss_cross_entropy': Array(0.12614316, dtype=float32)}


  9%|▉         | 94819/1000000 [4:20:36<23:41:41, 10.61it/s]

{'loss': Array(0.15217559, dtype=float32), 'loss_cross_entropy': Array(0.14385365, dtype=float32)}


  9%|▉         | 94829/1000000 [4:20:38<26:57:22,  9.33it/s]

{'loss': Array(0.13368642, dtype=float32), 'loss_cross_entropy': Array(0.12589985, dtype=float32)}


  9%|▉         | 94839/1000000 [4:20:39<24:11:56, 10.39it/s]

{'loss': Array(0.13651727, dtype=float32), 'loss_cross_entropy': Array(0.12848875, dtype=float32)}


  9%|▉         | 94849/1000000 [4:20:41<32:50:43,  7.65it/s]

{'loss': Array(0.16845, dtype=float32), 'loss_cross_entropy': Array(0.15956393, dtype=float32)}


  9%|▉         | 94859/1000000 [4:20:42<25:00:44, 10.05it/s]

{'loss': Array(0.15305154, dtype=float32), 'loss_cross_entropy': Array(0.14385356, dtype=float32)}


  9%|▉         | 94869/1000000 [4:20:44<24:21:35, 10.32it/s]

{'loss': Array(0.14897677, dtype=float32), 'loss_cross_entropy': Array(0.14078562, dtype=float32)}


  9%|▉         | 94879/1000000 [4:20:45<27:44:35,  9.06it/s]

{'loss': Array(0.13684542, dtype=float32), 'loss_cross_entropy': Array(0.12861271, dtype=float32)}


  9%|▉         | 94889/1000000 [4:20:47<23:46:43, 10.57it/s]

{'loss': Array(0.14219005, dtype=float32), 'loss_cross_entropy': Array(0.13436131, dtype=float32)}


  9%|▉         | 94899/1000000 [4:20:48<28:14:42,  8.90it/s]

{'loss': Array(0.15840082, dtype=float32), 'loss_cross_entropy': Array(0.14969854, dtype=float32)}


  9%|▉         | 94909/1000000 [4:20:50<24:27:31, 10.28it/s]

{'loss': Array(0.15515935, dtype=float32), 'loss_cross_entropy': Array(0.14638233, dtype=float32)}


  9%|▉         | 94919/1000000 [4:20:51<37:16:19,  6.75it/s]

{'loss': Array(0.15412551, dtype=float32), 'loss_cross_entropy': Array(0.14589551, dtype=float32)}


  9%|▉         | 94929/1000000 [4:20:53<25:36:40,  9.82it/s]

{'loss': Array(0.15131205, dtype=float32), 'loss_cross_entropy': Array(0.14304195, dtype=float32)}


  9%|▉         | 94939/1000000 [4:20:54<23:51:23, 10.54it/s]

{'loss': Array(0.15855491, dtype=float32), 'loss_cross_entropy': Array(0.1499676, dtype=float32)}


  9%|▉         | 94949/1000000 [4:20:56<30:09:52,  8.33it/s]

{'loss': Array(0.15986244, dtype=float32), 'loss_cross_entropy': Array(0.1506962, dtype=float32)}


  9%|▉         | 94959/1000000 [4:20:57<24:26:45, 10.28it/s]

{'loss': Array(0.12922819, dtype=float32), 'loss_cross_entropy': Array(0.12123399, dtype=float32)}


  9%|▉         | 94969/1000000 [4:20:59<33:24:49,  7.52it/s]

{'loss': Array(0.15443386, dtype=float32), 'loss_cross_entropy': Array(0.14606969, dtype=float32)}


  9%|▉         | 94979/1000000 [4:21:00<25:59:31,  9.67it/s]

{'loss': Array(0.14752665, dtype=float32), 'loss_cross_entropy': Array(0.139318, dtype=float32)}


  9%|▉         | 94989/1000000 [4:21:02<34:48:50,  7.22it/s]

{'loss': Array(0.1582631, dtype=float32), 'loss_cross_entropy': Array(0.15022555, dtype=float32)}


  9%|▉         | 94999/1000000 [4:21:04<26:44:23,  9.40it/s]

{'loss': Array(0.1421117, dtype=float32), 'loss_cross_entropy': Array(0.13361336, dtype=float32)}


 10%|▉         | 95009/1000000 [4:21:12<83:52:49,  3.00it/s] 

{'loss': Array(0.15543877, dtype=float32), 'loss_cross_entropy': Array(0.14719085, dtype=float32)}


 10%|▉         | 95019/1000000 [4:21:13<38:30:44,  6.53it/s]

{'loss': Array(0.1253858, dtype=float32), 'loss_cross_entropy': Array(0.11771511, dtype=float32)}


 10%|▉         | 95029/1000000 [4:21:15<25:57:18,  9.69it/s]

{'loss': Array(0.15456712, dtype=float32), 'loss_cross_entropy': Array(0.1455829, dtype=float32)}


 10%|▉         | 95039/1000000 [4:21:16<33:25:08,  7.52it/s]

{'loss': Array(0.15017772, dtype=float32), 'loss_cross_entropy': Array(0.1419739, dtype=float32)}


 10%|▉         | 95049/1000000 [4:21:18<25:12:14,  9.97it/s]

{'loss': Array(0.14904255, dtype=float32), 'loss_cross_entropy': Array(0.14031622, dtype=float32)}


 10%|▉         | 95059/1000000 [4:21:20<36:46:46,  6.83it/s]

{'loss': Array(0.14157525, dtype=float32), 'loss_cross_entropy': Array(0.13285372, dtype=float32)}


 10%|▉         | 95069/1000000 [4:21:21<25:28:10,  9.87it/s]

{'loss': Array(0.1497771, dtype=float32), 'loss_cross_entropy': Array(0.14057419, dtype=float32)}


 10%|▉         | 95079/1000000 [4:21:22<23:52:07, 10.53it/s]

{'loss': Array(0.14312199, dtype=float32), 'loss_cross_entropy': Array(0.13510424, dtype=float32)}


 10%|▉         | 95089/1000000 [4:21:24<29:43:56,  8.45it/s]

{'loss': Array(0.131455, dtype=float32), 'loss_cross_entropy': Array(0.12332258, dtype=float32)}


 10%|▉         | 95099/1000000 [4:21:25<24:03:45, 10.45it/s]

{'loss': Array(0.13569796, dtype=float32), 'loss_cross_entropy': Array(0.12821247, dtype=float32)}


 10%|▉         | 95109/1000000 [4:21:27<33:06:24,  7.59it/s]

{'loss': Array(0.1512898, dtype=float32), 'loss_cross_entropy': Array(0.14256059, dtype=float32)}


 10%|▉         | 95119/1000000 [4:21:28<25:03:47, 10.03it/s]

{'loss': Array(0.14560992, dtype=float32), 'loss_cross_entropy': Array(0.13717787, dtype=float32)}


 10%|▉         | 95129/1000000 [4:21:30<23:31:51, 10.68it/s]

{'loss': Array(0.14506921, dtype=float32), 'loss_cross_entropy': Array(0.13709937, dtype=float32)}


 10%|▉         | 95139/1000000 [4:21:31<26:34:07,  9.46it/s]

{'loss': Array(0.16046631, dtype=float32), 'loss_cross_entropy': Array(0.15171508, dtype=float32)}


 10%|▉         | 95149/1000000 [4:21:33<23:45:44, 10.58it/s]

{'loss': Array(0.13249716, dtype=float32), 'loss_cross_entropy': Array(0.12501822, dtype=float32)}


 10%|▉         | 95159/1000000 [4:21:34<36:49:38,  6.82it/s]

{'loss': Array(0.1344711, dtype=float32), 'loss_cross_entropy': Array(0.12662065, dtype=float32)}


 10%|▉         | 95169/1000000 [4:21:36<25:28:02,  9.87it/s]

{'loss': Array(0.16017391, dtype=float32), 'loss_cross_entropy': Array(0.15122062, dtype=float32)}


 10%|▉         | 95179/1000000 [4:21:37<23:41:02, 10.61it/s]

{'loss': Array(0.15200119, dtype=float32), 'loss_cross_entropy': Array(0.14353547, dtype=float32)}


 10%|▉         | 95189/1000000 [4:21:39<29:48:15,  8.43it/s]

{'loss': Array(0.14507174, dtype=float32), 'loss_cross_entropy': Array(0.1367806, dtype=float32)}


 10%|▉         | 95199/1000000 [4:21:40<24:33:40, 10.23it/s]

{'loss': Array(0.13416336, dtype=float32), 'loss_cross_entropy': Array(0.12647401, dtype=float32)}


 10%|▉         | 95209/1000000 [4:21:42<33:21:56,  7.53it/s]

{'loss': Array(0.13957639, dtype=float32), 'loss_cross_entropy': Array(0.13144684, dtype=float32)}


 10%|▉         | 95219/1000000 [4:21:43<25:21:21,  9.91it/s]

{'loss': Array(0.15052639, dtype=float32), 'loss_cross_entropy': Array(0.14211635, dtype=float32)}


 10%|▉         | 95229/1000000 [4:21:45<32:52:45,  7.64it/s]

{'loss': Array(0.14318866, dtype=float32), 'loss_cross_entropy': Array(0.13453408, dtype=float32)}


 10%|▉         | 95239/1000000 [4:21:46<24:34:24, 10.23it/s]

{'loss': Array(0.16067319, dtype=float32), 'loss_cross_entropy': Array(0.15156578, dtype=float32)}


 10%|▉         | 95249/1000000 [4:21:48<23:27:15, 10.72it/s]

{'loss': Array(0.15321286, dtype=float32), 'loss_cross_entropy': Array(0.1446278, dtype=float32)}


 10%|▉         | 95259/1000000 [4:21:50<28:07:39,  8.93it/s]

{'loss': Array(0.14958213, dtype=float32), 'loss_cross_entropy': Array(0.1411091, dtype=float32)}


 10%|▉         | 95269/1000000 [4:21:51<24:19:51, 10.33it/s]

{'loss': Array(0.13928545, dtype=float32), 'loss_cross_entropy': Array(0.13150808, dtype=float32)}


 10%|▉         | 95279/1000000 [4:21:53<29:51:28,  8.42it/s]

{'loss': Array(0.14989167, dtype=float32), 'loss_cross_entropy': Array(0.14168012, dtype=float32)}


 10%|▉         | 95289/1000000 [4:21:54<24:04:58, 10.44it/s]

{'loss': Array(0.14666438, dtype=float32), 'loss_cross_entropy': Array(0.13827077, dtype=float32)}


 10%|▉         | 95299/1000000 [4:21:56<37:02:20,  6.78it/s]

{'loss': Array(0.15694456, dtype=float32), 'loss_cross_entropy': Array(0.14792149, dtype=float32)}


 10%|▉         | 95309/1000000 [4:21:57<25:05:46, 10.01it/s]

{'loss': Array(0.15568022, dtype=float32), 'loss_cross_entropy': Array(0.14686243, dtype=float32)}


 10%|▉         | 95319/1000000 [4:21:59<37:42:40,  6.66it/s]

{'loss': Array(0.15267599, dtype=float32), 'loss_cross_entropy': Array(0.14437354, dtype=float32)}


 10%|▉         | 95329/1000000 [4:22:00<26:18:39,  9.55it/s]

{'loss': Array(0.15975074, dtype=float32), 'loss_cross_entropy': Array(0.15117946, dtype=float32)}


 10%|▉         | 95339/1000000 [4:22:02<25:05:34, 10.01it/s]

{'loss': Array(0.14084469, dtype=float32), 'loss_cross_entropy': Array(0.13279791, dtype=float32)}


 10%|▉         | 95349/1000000 [4:22:03<34:04:42,  7.37it/s]

{'loss': Array(0.13770722, dtype=float32), 'loss_cross_entropy': Array(0.12984751, dtype=float32)}


 10%|▉         | 95359/1000000 [4:22:05<25:24:56,  9.89it/s]

{'loss': Array(0.14387317, dtype=float32), 'loss_cross_entropy': Array(0.13586347, dtype=float32)}


 10%|▉         | 95369/1000000 [4:22:06<38:48:49,  6.47it/s]

{'loss': Array(0.13136007, dtype=float32), 'loss_cross_entropy': Array(0.12425121, dtype=float32)}


 10%|▉         | 95379/1000000 [4:22:08<27:13:52,  9.23it/s]

{'loss': Array(0.14064103, dtype=float32), 'loss_cross_entropy': Array(0.13278472, dtype=float32)}


 10%|▉         | 95389/1000000 [4:22:09<25:10:28,  9.98it/s]

{'loss': Array(0.13655105, dtype=float32), 'loss_cross_entropy': Array(0.12867348, dtype=float32)}


 10%|▉         | 95399/1000000 [4:22:11<27:09:27,  9.25it/s]

{'loss': Array(0.1551324, dtype=float32), 'loss_cross_entropy': Array(0.14678104, dtype=float32)}


 10%|▉         | 95409/1000000 [4:22:12<24:15:50, 10.36it/s]

{'loss': Array(0.14430693, dtype=float32), 'loss_cross_entropy': Array(0.13522077, dtype=float32)}


 10%|▉         | 95419/1000000 [4:22:14<26:26:00,  9.51it/s]

{'loss': Array(0.13435642, dtype=float32), 'loss_cross_entropy': Array(0.12615025, dtype=float32)}


 10%|▉         | 95429/1000000 [4:22:15<23:41:22, 10.61it/s]

{'loss': Array(0.13695814, dtype=float32), 'loss_cross_entropy': Array(0.12930332, dtype=float32)}


 10%|▉         | 95439/1000000 [4:22:17<37:01:56,  6.79it/s]

{'loss': Array(0.12366382, dtype=float32), 'loss_cross_entropy': Array(0.11600976, dtype=float32)}


 10%|▉         | 95449/1000000 [4:22:19<25:57:48,  9.68it/s]

{'loss': Array(0.1428946, dtype=float32), 'loss_cross_entropy': Array(0.13502434, dtype=float32)}


 10%|▉         | 95459/1000000 [4:22:20<23:38:28, 10.63it/s]

{'loss': Array(0.15753363, dtype=float32), 'loss_cross_entropy': Array(0.14960685, dtype=float32)}


 10%|▉         | 95469/1000000 [4:22:22<29:47:07,  8.44it/s]

{'loss': Array(0.14242293, dtype=float32), 'loss_cross_entropy': Array(0.13373165, dtype=float32)}


 10%|▉         | 95479/1000000 [4:22:23<24:17:28, 10.34it/s]

{'loss': Array(0.13393112, dtype=float32), 'loss_cross_entropy': Array(0.12579757, dtype=float32)}


 10%|▉         | 95489/1000000 [4:22:25<36:53:06,  6.81it/s]

{'loss': Array(0.16332412, dtype=float32), 'loss_cross_entropy': Array(0.15411289, dtype=float32)}


 10%|▉         | 95499/1000000 [4:22:26<25:22:51,  9.90it/s]

{'loss': Array(0.14958017, dtype=float32), 'loss_cross_entropy': Array(0.14191844, dtype=float32)}


 10%|▉         | 95509/1000000 [4:22:34<80:31:46,  3.12it/s] 

{'loss': Array(0.14860243, dtype=float32), 'loss_cross_entropy': Array(0.13986884, dtype=float32)}


 10%|▉         | 95519/1000000 [4:22:35<37:27:03,  6.71it/s]

{'loss': Array(0.15754372, dtype=float32), 'loss_cross_entropy': Array(0.14856601, dtype=float32)}


 10%|▉         | 95529/1000000 [4:22:37<26:24:23,  9.51it/s]

{'loss': Array(0.1236225, dtype=float32), 'loss_cross_entropy': Array(0.11639362, dtype=float32)}


 10%|▉         | 95539/1000000 [4:22:39<31:02:37,  8.09it/s]

{'loss': Array(0.16412748, dtype=float32), 'loss_cross_entropy': Array(0.15473251, dtype=float32)}


 10%|▉         | 95549/1000000 [4:22:40<24:54:38, 10.09it/s]

{'loss': Array(0.15730105, dtype=float32), 'loss_cross_entropy': Array(0.14893274, dtype=float32)}


 10%|▉         | 95559/1000000 [4:22:42<37:16:55,  6.74it/s]

{'loss': Array(0.14836167, dtype=float32), 'loss_cross_entropy': Array(0.14005886, dtype=float32)}


 10%|▉         | 95569/1000000 [4:22:43<25:43:56,  9.76it/s]

{'loss': Array(0.15081406, dtype=float32), 'loss_cross_entropy': Array(0.14219992, dtype=float32)}


 10%|▉         | 95579/1000000 [4:22:44<23:21:52, 10.75it/s]

{'loss': Array(0.15137246, dtype=float32), 'loss_cross_entropy': Array(0.14267252, dtype=float32)}


 10%|▉         | 95589/1000000 [4:22:46<26:14:30,  9.57it/s]

{'loss': Array(0.13964973, dtype=float32), 'loss_cross_entropy': Array(0.13150667, dtype=float32)}


 10%|▉         | 95599/1000000 [4:22:47<23:45:25, 10.57it/s]

{'loss': Array(0.14258896, dtype=float32), 'loss_cross_entropy': Array(0.13390873, dtype=float32)}


 10%|▉         | 95609/1000000 [4:22:49<36:59:07,  6.79it/s]

{'loss': Array(0.16493666, dtype=float32), 'loss_cross_entropy': Array(0.15644406, dtype=float32)}


 10%|▉         | 95619/1000000 [4:22:50<25:21:03,  9.91it/s]

{'loss': Array(0.13588546, dtype=float32), 'loss_cross_entropy': Array(0.12776771, dtype=float32)}


 10%|▉         | 95629/1000000 [4:22:52<23:11:53, 10.83it/s]

{'loss': Array(0.12691188, dtype=float32), 'loss_cross_entropy': Array(0.11933634, dtype=float32)}


 10%|▉         | 95639/1000000 [4:22:53<27:29:33,  9.14it/s]

{'loss': Array(0.14109366, dtype=float32), 'loss_cross_entropy': Array(0.13319428, dtype=float32)}


 10%|▉         | 95649/1000000 [4:22:55<23:44:01, 10.58it/s]

{'loss': Array(0.15615384, dtype=float32), 'loss_cross_entropy': Array(0.14717974, dtype=float32)}


 10%|▉         | 95659/1000000 [4:22:57<33:03:57,  7.60it/s]

{'loss': Array(0.12987252, dtype=float32), 'loss_cross_entropy': Array(0.12238335, dtype=float32)}


 10%|▉         | 95669/1000000 [4:22:58<24:58:26, 10.06it/s]

{'loss': Array(0.13883781, dtype=float32), 'loss_cross_entropy': Array(0.13049619, dtype=float32)}


 10%|▉         | 95679/1000000 [4:23:00<38:29:52,  6.53it/s]

{'loss': Array(0.17066428, dtype=float32), 'loss_cross_entropy': Array(0.1614753, dtype=float32)}


 10%|▉         | 95689/1000000 [4:23:01<26:43:21,  9.40it/s]

{'loss': Array(0.15683734, dtype=float32), 'loss_cross_entropy': Array(0.14829369, dtype=float32)}


 10%|▉         | 95699/1000000 [4:23:03<24:34:49, 10.22it/s]

{'loss': Array(0.11429872, dtype=float32), 'loss_cross_entropy': Array(0.10748076, dtype=float32)}


 10%|▉         | 95709/1000000 [4:23:04<31:10:55,  8.06it/s]

{'loss': Array(0.16253226, dtype=float32), 'loss_cross_entropy': Array(0.15318339, dtype=float32)}


 10%|▉         | 95719/1000000 [4:23:06<25:16:14,  9.94it/s]

{'loss': Array(0.13254113, dtype=float32), 'loss_cross_entropy': Array(0.12515979, dtype=float32)}


 10%|▉         | 95729/1000000 [4:23:07<34:51:55,  7.20it/s]

{'loss': Array(0.15018506, dtype=float32), 'loss_cross_entropy': Array(0.14181446, dtype=float32)}


 10%|▉         | 95739/1000000 [4:23:09<25:54:02,  9.70it/s]

{'loss': Array(0.13003804, dtype=float32), 'loss_cross_entropy': Array(0.12219965, dtype=float32)}


 10%|▉         | 95749/1000000 [4:23:10<24:04:30, 10.43it/s]

{'loss': Array(0.14923628, dtype=float32), 'loss_cross_entropy': Array(0.14068542, dtype=float32)}


 10%|▉         | 95759/1000000 [4:23:12<26:37:40,  9.43it/s]

{'loss': Array(0.14892006, dtype=float32), 'loss_cross_entropy': Array(0.14055504, dtype=float32)}


 10%|▉         | 95769/1000000 [4:23:13<23:24:01, 10.73it/s]

{'loss': Array(0.1558476, dtype=float32), 'loss_cross_entropy': Array(0.14670368, dtype=float32)}


 10%|▉         | 95779/1000000 [4:23:15<36:37:47,  6.86it/s]

{'loss': Array(0.12873957, dtype=float32), 'loss_cross_entropy': Array(0.12087657, dtype=float32)}


 10%|▉         | 95789/1000000 [4:23:16<25:09:08,  9.99it/s]

{'loss': Array(0.14752996, dtype=float32), 'loss_cross_entropy': Array(0.13899663, dtype=float32)}


 10%|▉         | 95799/1000000 [4:23:18<24:01:11, 10.46it/s]

{'loss': Array(0.15024386, dtype=float32), 'loss_cross_entropy': Array(0.14142548, dtype=float32)}


 10%|▉         | 95809/1000000 [4:23:19<30:13:44,  8.31it/s]

{'loss': Array(0.13018267, dtype=float32), 'loss_cross_entropy': Array(0.12232476, dtype=float32)}


 10%|▉         | 95819/1000000 [4:23:21<24:27:35, 10.27it/s]

{'loss': Array(0.15020293, dtype=float32), 'loss_cross_entropy': Array(0.14219652, dtype=float32)}


 10%|▉         | 95829/1000000 [4:23:22<33:14:41,  7.55it/s]

{'loss': Array(0.15787445, dtype=float32), 'loss_cross_entropy': Array(0.14895134, dtype=float32)}


 10%|▉         | 95839/1000000 [4:23:24<25:01:33, 10.04it/s]

{'loss': Array(0.14656377, dtype=float32), 'loss_cross_entropy': Array(0.13843258, dtype=float32)}


 10%|▉         | 95849/1000000 [4:23:26<32:45:54,  7.67it/s]

{'loss': Array(0.16399094, dtype=float32), 'loss_cross_entropy': Array(0.15440547, dtype=float32)}


 10%|▉         | 95859/1000000 [4:23:27<25:00:22, 10.04it/s]

{'loss': Array(0.14414178, dtype=float32), 'loss_cross_entropy': Array(0.13568388, dtype=float32)}


 10%|▉         | 95869/1000000 [4:23:28<23:28:27, 10.70it/s]

{'loss': Array(0.15821819, dtype=float32), 'loss_cross_entropy': Array(0.1489018, dtype=float32)}


 10%|▉         | 95879/1000000 [4:23:30<28:00:19,  8.97it/s]

{'loss': Array(0.12978332, dtype=float32), 'loss_cross_entropy': Array(0.12209272, dtype=float32)}


 10%|▉         | 95889/1000000 [4:23:31<24:48:22, 10.12it/s]

{'loss': Array(0.16616488, dtype=float32), 'loss_cross_entropy': Array(0.15705933, dtype=float32)}


 10%|▉         | 95899/1000000 [4:23:33<30:05:13,  8.35it/s]

{'loss': Array(0.12266284, dtype=float32), 'loss_cross_entropy': Array(0.11516163, dtype=float32)}


 10%|▉         | 95909/1000000 [4:23:34<24:20:18, 10.32it/s]

{'loss': Array(0.14508055, dtype=float32), 'loss_cross_entropy': Array(0.1373476, dtype=float32)}


 10%|▉         | 95919/1000000 [4:23:36<36:45:45,  6.83it/s]

{'loss': Array(0.14942257, dtype=float32), 'loss_cross_entropy': Array(0.14004633, dtype=float32)}


 10%|▉         | 95929/1000000 [4:23:37<25:29:21,  9.85it/s]

{'loss': Array(0.13774392, dtype=float32), 'loss_cross_entropy': Array(0.13032497, dtype=float32)}


 10%|▉         | 95939/1000000 [4:23:39<37:21:18,  6.72it/s]

{'loss': Array(0.14806536, dtype=float32), 'loss_cross_entropy': Array(0.14024982, dtype=float32)}


 10%|▉         | 95949/1000000 [4:23:41<25:39:01,  9.79it/s]

{'loss': Array(0.13080236, dtype=float32), 'loss_cross_entropy': Array(0.12300495, dtype=float32)}


 10%|▉         | 95959/1000000 [4:23:42<23:49:41, 10.54it/s]

{'loss': Array(0.14018248, dtype=float32), 'loss_cross_entropy': Array(0.13212475, dtype=float32)}


 10%|▉         | 95969/1000000 [4:23:44<33:05:36,  7.59it/s]

{'loss': Array(0.14726369, dtype=float32), 'loss_cross_entropy': Array(0.13832763, dtype=float32)}


 10%|▉         | 95979/1000000 [4:23:45<24:49:31, 10.12it/s]

{'loss': Array(0.1462211, dtype=float32), 'loss_cross_entropy': Array(0.13834098, dtype=float32)}


 10%|▉         | 95989/1000000 [4:23:47<37:10:51,  6.75it/s]

{'loss': Array(0.14469926, dtype=float32), 'loss_cross_entropy': Array(0.13624726, dtype=float32)}


 10%|▉         | 95999/1000000 [4:23:48<25:31:21,  9.84it/s]

{'loss': Array(0.14984736, dtype=float32), 'loss_cross_entropy': Array(0.14108454, dtype=float32)}


 10%|▉         | 96009/1000000 [4:23:56<82:54:04,  3.03it/s] 

{'loss': Array(0.14725347, dtype=float32), 'loss_cross_entropy': Array(0.13916245, dtype=float32)}


 10%|▉         | 96019/1000000 [4:23:58<36:09:42,  6.94it/s] 

{'loss': Array(0.15723483, dtype=float32), 'loss_cross_entropy': Array(0.1484378, dtype=float32)}


 10%|▉         | 96029/1000000 [4:23:59<26:45:57,  9.38it/s]

{'loss': Array(0.14363976, dtype=float32), 'loss_cross_entropy': Array(0.13517515, dtype=float32)}


 10%|▉         | 96039/1000000 [4:24:01<27:27:13,  9.15it/s]

{'loss': Array(0.14874506, dtype=float32), 'loss_cross_entropy': Array(0.14057939, dtype=float32)}


 10%|▉         | 96049/1000000 [4:24:02<25:08:20,  9.99it/s]

{'loss': Array(0.13065131, dtype=float32), 'loss_cross_entropy': Array(0.12294779, dtype=float32)}


 10%|▉         | 96059/1000000 [4:24:04<37:52:48,  6.63it/s]

{'loss': Array(0.14057712, dtype=float32), 'loss_cross_entropy': Array(0.13338815, dtype=float32)}


 10%|▉         | 96069/1000000 [4:24:05<26:59:54,  9.30it/s]

{'loss': Array(0.14535362, dtype=float32), 'loss_cross_entropy': Array(0.13667034, dtype=float32)}


 10%|▉         | 96079/1000000 [4:24:07<25:04:03, 10.02it/s]

{'loss': Array(0.15452366, dtype=float32), 'loss_cross_entropy': Array(0.14568548, dtype=float32)}


 10%|▉         | 96089/1000000 [4:24:09<31:38:27,  7.94it/s]

{'loss': Array(0.14591572, dtype=float32), 'loss_cross_entropy': Array(0.13779856, dtype=float32)}


 10%|▉         | 96099/1000000 [4:24:10<25:36:10,  9.81it/s]

{'loss': Array(0.15844627, dtype=float32), 'loss_cross_entropy': Array(0.14945321, dtype=float32)}


 10%|▉         | 96109/1000000 [4:24:12<38:03:58,  6.60it/s]

{'loss': Array(0.14071319, dtype=float32), 'loss_cross_entropy': Array(0.13276993, dtype=float32)}


 10%|▉         | 96119/1000000 [4:24:13<26:31:57,  9.46it/s]

{'loss': Array(0.13924542, dtype=float32), 'loss_cross_entropy': Array(0.13156064, dtype=float32)}


 10%|▉         | 96129/1000000 [4:24:15<23:41:00, 10.60it/s]

{'loss': Array(0.12503178, dtype=float32), 'loss_cross_entropy': Array(0.11774613, dtype=float32)}


 10%|▉         | 96139/1000000 [4:24:16<28:16:31,  8.88it/s]

{'loss': Array(0.14648636, dtype=float32), 'loss_cross_entropy': Array(0.13810986, dtype=float32)}


 10%|▉         | 96149/1000000 [4:24:18<24:35:16, 10.21it/s]

{'loss': Array(0.13963975, dtype=float32), 'loss_cross_entropy': Array(0.13147947, dtype=float32)}


 10%|▉         | 96159/1000000 [4:24:19<32:34:40,  7.71it/s]

{'loss': Array(0.13999802, dtype=float32), 'loss_cross_entropy': Array(0.1319206, dtype=float32)}


 10%|▉         | 96169/1000000 [4:24:21<24:22:15, 10.30it/s]

{'loss': Array(0.14842635, dtype=float32), 'loss_cross_entropy': Array(0.14028864, dtype=float32)}


 10%|▉         | 96179/1000000 [4:24:22<23:19:17, 10.77it/s]

{'loss': Array(0.14313087, dtype=float32), 'loss_cross_entropy': Array(0.13544755, dtype=float32)}


 10%|▉         | 96189/1000000 [4:24:24<29:16:52,  8.57it/s]

{'loss': Array(0.1457789, dtype=float32), 'loss_cross_entropy': Array(0.13742577, dtype=float32)}


 10%|▉         | 96199/1000000 [4:24:25<23:56:44, 10.48it/s]

{'loss': Array(0.15975478, dtype=float32), 'loss_cross_entropy': Array(0.15109114, dtype=float32)}


 10%|▉         | 96209/1000000 [4:24:26<23:20:58, 10.75it/s]

{'loss': Array(0.14909957, dtype=float32), 'loss_cross_entropy': Array(0.1402543, dtype=float32)}


 10%|▉         | 96219/1000000 [4:24:28<26:16:39,  9.55it/s]

{'loss': Array(0.15799074, dtype=float32), 'loss_cross_entropy': Array(0.1494987, dtype=float32)}


 10%|▉         | 96229/1000000 [4:24:29<23:45:58, 10.56it/s]

{'loss': Array(0.13133922, dtype=float32), 'loss_cross_entropy': Array(0.12404775, dtype=float32)}


 10%|▉         | 96239/1000000 [4:24:31<29:30:42,  8.51it/s]

{'loss': Array(0.16569112, dtype=float32), 'loss_cross_entropy': Array(0.15696938, dtype=float32)}


 10%|▉         | 96249/1000000 [4:24:33<24:43:49, 10.15it/s]

{'loss': Array(0.14540403, dtype=float32), 'loss_cross_entropy': Array(0.13678633, dtype=float32)}


 10%|▉         | 96259/1000000 [4:24:34<37:18:30,  6.73it/s]

{'loss': Array(0.14707598, dtype=float32), 'loss_cross_entropy': Array(0.13847093, dtype=float32)}


 10%|▉         | 96269/1000000 [4:24:36<25:35:05,  9.81it/s]

{'loss': Array(0.13988025, dtype=float32), 'loss_cross_entropy': Array(0.1318635, dtype=float32)}


 10%|▉         | 96279/1000000 [4:24:37<37:36:11,  6.68it/s]

{'loss': Array(0.1416692, dtype=float32), 'loss_cross_entropy': Array(0.13339601, dtype=float32)}


 10%|▉         | 96289/1000000 [4:24:39<25:59:19,  9.66it/s]

{'loss': Array(0.13502482, dtype=float32), 'loss_cross_entropy': Array(0.12762524, dtype=float32)}


 10%|▉         | 96299/1000000 [4:24:40<23:55:48, 10.49it/s]

{'loss': Array(0.15634252, dtype=float32), 'loss_cross_entropy': Array(0.14774156, dtype=float32)}


 10%|▉         | 96309/1000000 [4:24:42<32:42:01,  7.68it/s]

{'loss': Array(0.13860874, dtype=float32), 'loss_cross_entropy': Array(0.13085306, dtype=float32)}


 10%|▉         | 96319/1000000 [4:24:43<25:05:12, 10.01it/s]

{'loss': Array(0.15259837, dtype=float32), 'loss_cross_entropy': Array(0.14357854, dtype=float32)}


 10%|▉         | 96329/1000000 [4:24:45<37:00:26,  6.78it/s]

{'loss': Array(0.14661624, dtype=float32), 'loss_cross_entropy': Array(0.13820557, dtype=float32)}


 10%|▉         | 96339/1000000 [4:24:46<26:00:22,  9.65it/s]

{'loss': Array(0.12969923, dtype=float32), 'loss_cross_entropy': Array(0.1226089, dtype=float32)}


 10%|▉         | 96349/1000000 [4:24:48<23:31:35, 10.67it/s]

{'loss': Array(0.15138642, dtype=float32), 'loss_cross_entropy': Array(0.14310408, dtype=float32)}


 10%|▉         | 96359/1000000 [4:24:49<26:02:08,  9.64it/s]

{'loss': Array(0.1481552, dtype=float32), 'loss_cross_entropy': Array(0.13962285, dtype=float32)}


 10%|▉         | 96369/1000000 [4:24:51<23:36:29, 10.63it/s]

{'loss': Array(0.16134287, dtype=float32), 'loss_cross_entropy': Array(0.15209018, dtype=float32)}


 10%|▉         | 96379/1000000 [4:24:52<27:05:13,  9.27it/s]

{'loss': Array(0.15194504, dtype=float32), 'loss_cross_entropy': Array(0.14345303, dtype=float32)}


 10%|▉         | 96389/1000000 [4:24:54<23:35:49, 10.64it/s]

{'loss': Array(0.13995641, dtype=float32), 'loss_cross_entropy': Array(0.13149282, dtype=float32)}


 10%|▉         | 96399/1000000 [4:24:55<36:21:09,  6.90it/s]

{'loss': Array(0.12225957, dtype=float32), 'loss_cross_entropy': Array(0.11478741, dtype=float32)}


 10%|▉         | 96409/1000000 [4:24:57<25:23:55,  9.88it/s]

{'loss': Array(0.13326482, dtype=float32), 'loss_cross_entropy': Array(0.1259619, dtype=float32)}


 10%|▉         | 96419/1000000 [4:24:58<23:58:15, 10.47it/s]

{'loss': Array(0.12778658, dtype=float32), 'loss_cross_entropy': Array(0.1202905, dtype=float32)}


 10%|▉         | 96429/1000000 [4:25:00<31:38:40,  7.93it/s]

{'loss': Array(0.163236, dtype=float32), 'loss_cross_entropy': Array(0.1540266, dtype=float32)}


 10%|▉         | 96439/1000000 [4:25:01<25:10:41,  9.97it/s]

{'loss': Array(0.13079023, dtype=float32), 'loss_cross_entropy': Array(0.12397017, dtype=float32)}


 10%|▉         | 96449/1000000 [4:25:03<38:53:39,  6.45it/s]

{'loss': Array(0.15882877, dtype=float32), 'loss_cross_entropy': Array(0.1500316, dtype=float32)}


 10%|▉         | 96459/1000000 [4:25:05<26:52:30,  9.34it/s]

{'loss': Array(0.13600878, dtype=float32), 'loss_cross_entropy': Array(0.12866597, dtype=float32)}


 10%|▉         | 96469/1000000 [4:25:06<24:40:46, 10.17it/s]

{'loss': Array(0.15306602, dtype=float32), 'loss_cross_entropy': Array(0.14341974, dtype=float32)}


 10%|▉         | 96479/1000000 [4:25:08<29:25:26,  8.53it/s]

{'loss': Array(0.158716, dtype=float32), 'loss_cross_entropy': Array(0.149943, dtype=float32)}


 10%|▉         | 96489/1000000 [4:25:09<25:48:54,  9.72it/s]

{'loss': Array(0.15433756, dtype=float32), 'loss_cross_entropy': Array(0.14530088, dtype=float32)}


 10%|▉         | 96499/1000000 [4:25:11<34:43:11,  7.23it/s]

{'loss': Array(0.14673671, dtype=float32), 'loss_cross_entropy': Array(0.138346, dtype=float32)}


 10%|▉         | 96509/1000000 [4:25:19<84:35:27,  2.97it/s] 

{'loss': Array(0.15245894, dtype=float32), 'loss_cross_entropy': Array(0.14368428, dtype=float32)}


 10%|▉         | 96519/1000000 [4:25:21<42:59:19,  5.84it/s]

{'loss': Array(0.14639436, dtype=float32), 'loss_cross_entropy': Array(0.1379206, dtype=float32)}


 10%|▉         | 96529/1000000 [4:25:22<26:19:30,  9.53it/s]

{'loss': Array(0.13964657, dtype=float32), 'loss_cross_entropy': Array(0.13181722, dtype=float32)}


 10%|▉         | 96539/1000000 [4:25:23<24:12:49, 10.36it/s]

{'loss': Array(0.12792, dtype=float32), 'loss_cross_entropy': Array(0.12037919, dtype=float32)}


 10%|▉         | 96549/1000000 [4:25:25<29:55:31,  8.39it/s]

{'loss': Array(0.13565187, dtype=float32), 'loss_cross_entropy': Array(0.12766226, dtype=float32)}


 10%|▉         | 96559/1000000 [4:25:27<24:19:49, 10.31it/s]

{'loss': Array(0.13130496, dtype=float32), 'loss_cross_entropy': Array(0.12350167, dtype=float32)}


 10%|▉         | 96569/1000000 [4:25:28<22:59:49, 10.91it/s]

{'loss': Array(0.15152359, dtype=float32), 'loss_cross_entropy': Array(0.14285864, dtype=float32)}


 10%|▉         | 96579/1000000 [4:25:30<26:58:15,  9.30it/s]

{'loss': Array(0.14418368, dtype=float32), 'loss_cross_entropy': Array(0.13562985, dtype=float32)}


 10%|▉         | 96589/1000000 [4:25:31<24:23:55, 10.29it/s]

{'loss': Array(0.15596138, dtype=float32), 'loss_cross_entropy': Array(0.14726925, dtype=float32)}


 10%|▉         | 96599/1000000 [4:25:33<29:47:49,  8.42it/s]

{'loss': Array(0.1530634, dtype=float32), 'loss_cross_entropy': Array(0.14433993, dtype=float32)}


 10%|▉         | 96609/1000000 [4:25:34<24:32:12, 10.23it/s]

{'loss': Array(0.12980649, dtype=float32), 'loss_cross_entropy': Array(0.12190337, dtype=float32)}


 10%|▉         | 96619/1000000 [4:25:36<33:17:12,  7.54it/s]

{'loss': Array(0.16539851, dtype=float32), 'loss_cross_entropy': Array(0.15614308, dtype=float32)}


 10%|▉         | 96629/1000000 [4:25:37<25:05:20, 10.00it/s]

{'loss': Array(0.14239089, dtype=float32), 'loss_cross_entropy': Array(0.13418226, dtype=float32)}


 10%|▉         | 96639/1000000 [4:25:39<23:35:40, 10.64it/s]

{'loss': Array(0.15583871, dtype=float32), 'loss_cross_entropy': Array(0.1465209, dtype=float32)}


 10%|▉         | 96649/1000000 [4:25:40<29:52:47,  8.40it/s]

{'loss': Array(0.14755702, dtype=float32), 'loss_cross_entropy': Array(0.13913333, dtype=float32)}


 10%|▉         | 96659/1000000 [4:25:42<24:28:44, 10.25it/s]

{'loss': Array(0.16220115, dtype=float32), 'loss_cross_entropy': Array(0.15302825, dtype=float32)}


 10%|▉         | 96669/1000000 [4:25:43<23:04:59, 10.87it/s]

{'loss': Array(0.15799932, dtype=float32), 'loss_cross_entropy': Array(0.14915556, dtype=float32)}


 10%|▉         | 96679/1000000 [4:25:45<27:06:58,  9.25it/s]

{'loss': Array(0.14129195, dtype=float32), 'loss_cross_entropy': Array(0.13289489, dtype=float32)}


 10%|▉         | 96689/1000000 [4:25:46<24:12:01, 10.37it/s]

{'loss': Array(0.13436906, dtype=float32), 'loss_cross_entropy': Array(0.12605102, dtype=float32)}


 10%|▉         | 96699/1000000 [4:25:48<30:17:27,  8.28it/s]

{'loss': Array(0.15878582, dtype=float32), 'loss_cross_entropy': Array(0.14978954, dtype=float32)}


 10%|▉         | 96709/1000000 [4:25:49<24:45:18, 10.14it/s]

{'loss': Array(0.1135756, dtype=float32), 'loss_cross_entropy': Array(0.106308, dtype=float32)}


 10%|▉         | 96719/1000000 [4:25:51<36:46:37,  6.82it/s]

{'loss': Array(0.15027998, dtype=float32), 'loss_cross_entropy': Array(0.14140491, dtype=float32)}


 10%|▉         | 96729/1000000 [4:25:52<25:17:34,  9.92it/s]

{'loss': Array(0.14858256, dtype=float32), 'loss_cross_entropy': Array(0.1398841, dtype=float32)}


 10%|▉         | 96739/1000000 [4:25:54<23:36:04, 10.63it/s]

{'loss': Array(0.14176618, dtype=float32), 'loss_cross_entropy': Array(0.13355596, dtype=float32)}


 10%|▉         | 96749/1000000 [4:25:55<32:21:47,  7.75it/s]

{'loss': Array(0.16270691, dtype=float32), 'loss_cross_entropy': Array(0.15347163, dtype=float32)}


 10%|▉         | 96759/1000000 [4:25:57<24:40:49, 10.17it/s]

{'loss': Array(0.14042199, dtype=float32), 'loss_cross_entropy': Array(0.13204917, dtype=float32)}


 10%|▉         | 96769/1000000 [4:25:58<23:59:04, 10.46it/s]

{'loss': Array(0.13485327, dtype=float32), 'loss_cross_entropy': Array(0.1266899, dtype=float32)}


 10%|▉         | 96779/1000000 [4:26:00<28:59:31,  8.65it/s]

{'loss': Array(0.12774238, dtype=float32), 'loss_cross_entropy': Array(0.12080478, dtype=float32)}


 10%|▉         | 96789/1000000 [4:26:01<25:02:56, 10.02it/s]

{'loss': Array(0.14701681, dtype=float32), 'loss_cross_entropy': Array(0.13836224, dtype=float32)}


 10%|▉         | 96799/1000000 [4:26:03<30:33:25,  8.21it/s]

{'loss': Array(0.13768397, dtype=float32), 'loss_cross_entropy': Array(0.13004802, dtype=float32)}


 10%|▉         | 96809/1000000 [4:26:04<25:41:28,  9.77it/s]

{'loss': Array(0.13959686, dtype=float32), 'loss_cross_entropy': Array(0.13195784, dtype=float32)}


 10%|▉         | 96819/1000000 [4:26:06<25:13:01,  9.95it/s]

{'loss': Array(0.12889256, dtype=float32), 'loss_cross_entropy': Array(0.12181081, dtype=float32)}


 10%|▉         | 96829/1000000 [4:26:08<27:27:56,  9.13it/s]

{'loss': Array(0.13530031, dtype=float32), 'loss_cross_entropy': Array(0.12742816, dtype=float32)}


 10%|▉         | 96839/1000000 [4:26:09<24:37:49, 10.19it/s]

{'loss': Array(0.1439154, dtype=float32), 'loss_cross_entropy': Array(0.13533282, dtype=float32)}


 10%|▉         | 96849/1000000 [4:26:11<33:26:08,  7.50it/s]

{'loss': Array(0.15756145, dtype=float32), 'loss_cross_entropy': Array(0.14892939, dtype=float32)}


 10%|▉         | 96859/1000000 [4:26:12<25:39:27,  9.78it/s]

{'loss': Array(0.1400409, dtype=float32), 'loss_cross_entropy': Array(0.13176052, dtype=float32)}


 10%|▉         | 96869/1000000 [4:26:14<24:07:52, 10.40it/s]

{'loss': Array(0.15718302, dtype=float32), 'loss_cross_entropy': Array(0.1478893, dtype=float32)}


 10%|▉         | 96879/1000000 [4:26:15<27:02:49,  9.28it/s]

{'loss': Array(0.12918453, dtype=float32), 'loss_cross_entropy': Array(0.12180034, dtype=float32)}


 10%|▉         | 96889/1000000 [4:26:17<23:47:13, 10.55it/s]

{'loss': Array(0.13038537, dtype=float32), 'loss_cross_entropy': Array(0.12314103, dtype=float32)}


 10%|▉         | 96899/1000000 [4:26:18<27:52:16,  9.00it/s]

{'loss': Array(0.14463033, dtype=float32), 'loss_cross_entropy': Array(0.13678613, dtype=float32)}


 10%|▉         | 96909/1000000 [4:26:20<23:51:15, 10.52it/s]

{'loss': Array(0.15571915, dtype=float32), 'loss_cross_entropy': Array(0.14694093, dtype=float32)}


 10%|▉         | 96919/1000000 [4:26:21<36:41:51,  6.84it/s]

{'loss': Array(0.15338722, dtype=float32), 'loss_cross_entropy': Array(0.14462556, dtype=float32)}


 10%|▉         | 96929/1000000 [4:26:23<25:32:22,  9.82it/s]

{'loss': Array(0.14089839, dtype=float32), 'loss_cross_entropy': Array(0.1325938, dtype=float32)}


 10%|▉         | 96939/1000000 [4:26:24<23:53:53, 10.50it/s]

{'loss': Array(0.14600098, dtype=float32), 'loss_cross_entropy': Array(0.13779055, dtype=float32)}


 10%|▉         | 96949/1000000 [4:26:26<30:10:45,  8.31it/s]

{'loss': Array(0.14198126, dtype=float32), 'loss_cross_entropy': Array(0.13393646, dtype=float32)}


 10%|▉         | 96959/1000000 [4:26:27<24:48:39, 10.11it/s]

{'loss': Array(0.1589144, dtype=float32), 'loss_cross_entropy': Array(0.15005152, dtype=float32)}


 10%|▉         | 96969/1000000 [4:26:29<33:46:17,  7.43it/s]

{'loss': Array(0.14509206, dtype=float32), 'loss_cross_entropy': Array(0.13709964, dtype=float32)}


 10%|▉         | 96979/1000000 [4:26:30<24:46:56, 10.12it/s]

{'loss': Array(0.13009809, dtype=float32), 'loss_cross_entropy': Array(0.12248774, dtype=float32)}


 10%|▉         | 96989/1000000 [4:26:32<32:58:58,  7.61it/s]

{'loss': Array(0.1553462, dtype=float32), 'loss_cross_entropy': Array(0.14655603, dtype=float32)}


 10%|▉         | 96999/1000000 [4:26:33<24:49:54, 10.10it/s]

{'loss': Array(0.15567745, dtype=float32), 'loss_cross_entropy': Array(0.1470638, dtype=float32)}


 10%|▉         | 97009/1000000 [4:26:41<80:54:35,  3.10it/s] 

{'loss': Array(0.1599009, dtype=float32), 'loss_cross_entropy': Array(0.15070823, dtype=float32)}


 10%|▉         | 97019/1000000 [4:26:43<37:24:56,  6.70it/s]

{'loss': Array(0.1617624, dtype=float32), 'loss_cross_entropy': Array(0.15297747, dtype=float32)}


 10%|▉         | 97029/1000000 [4:26:44<25:32:24,  9.82it/s]

{'loss': Array(0.1568367, dtype=float32), 'loss_cross_entropy': Array(0.14732236, dtype=float32)}


 10%|▉         | 97039/1000000 [4:26:46<33:28:04,  7.49it/s]

{'loss': Array(0.14437549, dtype=float32), 'loss_cross_entropy': Array(0.13630648, dtype=float32)}


 10%|▉         | 97049/1000000 [4:26:47<24:42:49, 10.15it/s]

{'loss': Array(0.13625012, dtype=float32), 'loss_cross_entropy': Array(0.12823331, dtype=float32)}


 10%|▉         | 97059/1000000 [4:26:49<37:26:55,  6.70it/s]

{'loss': Array(0.14809215, dtype=float32), 'loss_cross_entropy': Array(0.13894801, dtype=float32)}


 10%|▉         | 97069/1000000 [4:26:50<25:29:27,  9.84it/s]

{'loss': Array(0.12344225, dtype=float32), 'loss_cross_entropy': Array(0.11590154, dtype=float32)}


 10%|▉         | 97079/1000000 [4:26:52<23:38:52, 10.61it/s]

{'loss': Array(0.14734225, dtype=float32), 'loss_cross_entropy': Array(0.13886574, dtype=float32)}


 10%|▉         | 97089/1000000 [4:26:53<29:55:38,  8.38it/s]

{'loss': Array(0.14140928, dtype=float32), 'loss_cross_entropy': Array(0.13354085, dtype=float32)}


 10%|▉         | 97099/1000000 [4:26:55<24:19:59, 10.31it/s]

{'loss': Array(0.15559971, dtype=float32), 'loss_cross_entropy': Array(0.14663939, dtype=float32)}


 10%|▉         | 97109/1000000 [4:26:57<33:10:23,  7.56it/s]

{'loss': Array(0.12993363, dtype=float32), 'loss_cross_entropy': Array(0.12221356, dtype=float32)}


 10%|▉         | 97119/1000000 [4:26:58<25:40:14,  9.77it/s]

{'loss': Array(0.13349617, dtype=float32), 'loss_cross_entropy': Array(0.12586085, dtype=float32)}


 10%|▉         | 97129/1000000 [4:26:59<24:55:08, 10.06it/s]

{'loss': Array(0.14604217, dtype=float32), 'loss_cross_entropy': Array(0.13807285, dtype=float32)}


 10%|▉         | 97139/1000000 [4:27:01<27:47:43,  9.02it/s]

{'loss': Array(0.13592581, dtype=float32), 'loss_cross_entropy': Array(0.12811323, dtype=float32)}


 10%|▉         | 97149/1000000 [4:27:03<25:19:46,  9.90it/s]

{'loss': Array(0.15686107, dtype=float32), 'loss_cross_entropy': Array(0.14901909, dtype=float32)}


 10%|▉         | 97159/1000000 [4:27:04<38:30:35,  6.51it/s]

{'loss': Array(0.14667726, dtype=float32), 'loss_cross_entropy': Array(0.13831855, dtype=float32)}


 10%|▉         | 97169/1000000 [4:27:06<26:34:35,  9.44it/s]

{'loss': Array(0.13111314, dtype=float32), 'loss_cross_entropy': Array(0.12347817, dtype=float32)}


 10%|▉         | 97179/1000000 [4:27:07<24:59:59, 10.03it/s]

{'loss': Array(0.1420433, dtype=float32), 'loss_cross_entropy': Array(0.13429427, dtype=float32)}


 10%|▉         | 97189/1000000 [4:27:09<30:58:18,  8.10it/s]

{'loss': Array(0.14221524, dtype=float32), 'loss_cross_entropy': Array(0.13424496, dtype=float32)}


 10%|▉         | 97199/1000000 [4:27:10<25:40:06,  9.77it/s]

{'loss': Array(0.14309989, dtype=float32), 'loss_cross_entropy': Array(0.13508312, dtype=float32)}


 10%|▉         | 97209/1000000 [4:27:12<33:37:20,  7.46it/s]

{'loss': Array(0.16453509, dtype=float32), 'loss_cross_entropy': Array(0.15603726, dtype=float32)}


 10%|▉         | 97219/1000000 [4:27:13<24:59:32, 10.03it/s]

{'loss': Array(0.13065656, dtype=float32), 'loss_cross_entropy': Array(0.12309444, dtype=float32)}


 10%|▉         | 97229/1000000 [4:27:15<33:13:21,  7.55it/s]

{'loss': Array(0.13178845, dtype=float32), 'loss_cross_entropy': Array(0.12454897, dtype=float32)}


 10%|▉         | 97239/1000000 [4:27:17<24:45:57, 10.13it/s]

{'loss': Array(0.13830155, dtype=float32), 'loss_cross_entropy': Array(0.13001236, dtype=float32)}


 10%|▉         | 97249/1000000 [4:27:18<22:56:37, 10.93it/s]

{'loss': Array(0.14258966, dtype=float32), 'loss_cross_entropy': Array(0.13431065, dtype=float32)}


 10%|▉         | 97259/1000000 [4:27:20<27:40:43,  9.06it/s]

{'loss': Array(0.13412333, dtype=float32), 'loss_cross_entropy': Array(0.12647681, dtype=float32)}


 10%|▉         | 97269/1000000 [4:27:21<23:33:00, 10.65it/s]

{'loss': Array(0.14256454, dtype=float32), 'loss_cross_entropy': Array(0.13451694, dtype=float32)}


 10%|▉         | 97279/1000000 [4:27:23<30:23:20,  8.25it/s]

{'loss': Array(0.12639928, dtype=float32), 'loss_cross_entropy': Array(0.11877143, dtype=float32)}


 10%|▉         | 97289/1000000 [4:27:24<24:58:15, 10.04it/s]

{'loss': Array(0.15503204, dtype=float32), 'loss_cross_entropy': Array(0.14633824, dtype=float32)}


 10%|▉         | 97299/1000000 [4:27:26<36:39:41,  6.84it/s]

{'loss': Array(0.16022094, dtype=float32), 'loss_cross_entropy': Array(0.15190142, dtype=float32)}


 10%|▉         | 97309/1000000 [4:27:27<25:01:31, 10.02it/s]

{'loss': Array(0.14078505, dtype=float32), 'loss_cross_entropy': Array(0.13262019, dtype=float32)}


 10%|▉         | 97319/1000000 [4:27:29<36:44:18,  6.83it/s]

{'loss': Array(0.1376676, dtype=float32), 'loss_cross_entropy': Array(0.12981956, dtype=float32)}


 10%|▉         | 97329/1000000 [4:27:30<25:27:09,  9.85it/s]

{'loss': Array(0.14396627, dtype=float32), 'loss_cross_entropy': Array(0.1356021, dtype=float32)}


 10%|▉         | 97339/1000000 [4:27:32<24:28:42, 10.24it/s]

{'loss': Array(0.14329466, dtype=float32), 'loss_cross_entropy': Array(0.13481025, dtype=float32)}


 10%|▉         | 97349/1000000 [4:27:33<33:15:46,  7.54it/s]

{'loss': Array(0.14024644, dtype=float32), 'loss_cross_entropy': Array(0.13237204, dtype=float32)}


 10%|▉         | 97359/1000000 [4:27:35<25:22:56,  9.88it/s]

{'loss': Array(0.16017489, dtype=float32), 'loss_cross_entropy': Array(0.15143262, dtype=float32)}


 10%|▉         | 97369/1000000 [4:27:36<37:10:09,  6.75it/s]

{'loss': Array(0.14257014, dtype=float32), 'loss_cross_entropy': Array(0.13413201, dtype=float32)}


 10%|▉         | 97379/1000000 [4:27:38<25:41:57,  9.76it/s]

{'loss': Array(0.14893523, dtype=float32), 'loss_cross_entropy': Array(0.14024775, dtype=float32)}


 10%|▉         | 97389/1000000 [4:27:39<23:28:50, 10.68it/s]

{'loss': Array(0.12663464, dtype=float32), 'loss_cross_entropy': Array(0.11870071, dtype=float32)}


 10%|▉         | 97399/1000000 [4:27:41<26:12:35,  9.57it/s]

{'loss': Array(0.13219236, dtype=float32), 'loss_cross_entropy': Array(0.12450111, dtype=float32)}


 10%|▉         | 97409/1000000 [4:27:42<23:23:20, 10.72it/s]

{'loss': Array(0.1601152, dtype=float32), 'loss_cross_entropy': Array(0.15155326, dtype=float32)}


 10%|▉         | 97419/1000000 [4:27:44<26:05:34,  9.61it/s]

{'loss': Array(0.13625869, dtype=float32), 'loss_cross_entropy': Array(0.12842125, dtype=float32)}


 10%|▉         | 97429/1000000 [4:27:45<23:49:11, 10.53it/s]

{'loss': Array(0.1367271, dtype=float32), 'loss_cross_entropy': Array(0.12941962, dtype=float32)}


 10%|▉         | 97439/1000000 [4:27:47<36:55:12,  6.79it/s]

{'loss': Array(0.13383785, dtype=float32), 'loss_cross_entropy': Array(0.12618767, dtype=float32)}


 10%|▉         | 97449/1000000 [4:27:48<25:18:05,  9.91it/s]

{'loss': Array(0.1444636, dtype=float32), 'loss_cross_entropy': Array(0.1361257, dtype=float32)}


 10%|▉         | 97459/1000000 [4:27:50<23:17:31, 10.76it/s]

{'loss': Array(0.13464199, dtype=float32), 'loss_cross_entropy': Array(0.12621051, dtype=float32)}


 10%|▉         | 97469/1000000 [4:27:51<29:38:03,  8.46it/s]

{'loss': Array(0.13771658, dtype=float32), 'loss_cross_entropy': Array(0.12981045, dtype=float32)}


 10%|▉         | 97479/1000000 [4:27:53<24:08:40, 10.38it/s]

{'loss': Array(0.13233732, dtype=float32), 'loss_cross_entropy': Array(0.12493335, dtype=float32)}


 10%|▉         | 97489/1000000 [4:27:54<36:42:36,  6.83it/s]

{'loss': Array(0.13840209, dtype=float32), 'loss_cross_entropy': Array(0.13019723, dtype=float32)}


 10%|▉         | 97499/1000000 [4:27:56<25:15:38,  9.92it/s]

{'loss': Array(0.14356863, dtype=float32), 'loss_cross_entropy': Array(0.13564263, dtype=float32)}


 10%|▉         | 97509/1000000 [4:28:04<82:38:46,  3.03it/s] 

{'loss': Array(0.15251577, dtype=float32), 'loss_cross_entropy': Array(0.14480118, dtype=float32)}


 10%|▉         | 97519/1000000 [4:28:05<38:49:18,  6.46it/s]

{'loss': Array(0.16008924, dtype=float32), 'loss_cross_entropy': Array(0.15093909, dtype=float32)}


 10%|▉         | 97529/1000000 [4:28:07<27:11:12,  9.22it/s]

{'loss': Array(0.13543211, dtype=float32), 'loss_cross_entropy': Array(0.12807275, dtype=float32)}


 10%|▉         | 97539/1000000 [4:28:09<31:31:29,  7.95it/s]

{'loss': Array(0.15415265, dtype=float32), 'loss_cross_entropy': Array(0.14479294, dtype=float32)}


 10%|▉         | 97549/1000000 [4:28:10<25:12:58,  9.94it/s]

{'loss': Array(0.14312467, dtype=float32), 'loss_cross_entropy': Array(0.13510914, dtype=float32)}


 10%|▉         | 97559/1000000 [4:28:12<38:58:21,  6.43it/s]

{'loss': Array(0.1304797, dtype=float32), 'loss_cross_entropy': Array(0.12279397, dtype=float32)}


 10%|▉         | 97569/1000000 [4:28:13<26:28:20,  9.47it/s]

{'loss': Array(0.15305656, dtype=float32), 'loss_cross_entropy': Array(0.14467247, dtype=float32)}


 10%|▉         | 97579/1000000 [4:28:15<24:39:54, 10.16it/s]

{'loss': Array(0.13395576, dtype=float32), 'loss_cross_entropy': Array(0.12645702, dtype=float32)}


 10%|▉         | 97589/1000000 [4:28:16<26:49:10,  9.35it/s]

{'loss': Array(0.14310612, dtype=float32), 'loss_cross_entropy': Array(0.1346515, dtype=float32)}


 10%|▉         | 97599/1000000 [4:28:18<24:56:43, 10.05it/s]

{'loss': Array(0.1418901, dtype=float32), 'loss_cross_entropy': Array(0.13367593, dtype=float32)}


 10%|▉         | 97609/1000000 [4:28:20<38:43:10,  6.47it/s]

{'loss': Array(0.14172678, dtype=float32), 'loss_cross_entropy': Array(0.13391308, dtype=float32)}


 10%|▉         | 97619/1000000 [4:28:21<25:47:48,  9.72it/s]

{'loss': Array(0.1313014, dtype=float32), 'loss_cross_entropy': Array(0.12347464, dtype=float32)}


 10%|▉         | 97629/1000000 [4:28:22<23:40:39, 10.59it/s]

{'loss': Array(0.13241844, dtype=float32), 'loss_cross_entropy': Array(0.12535957, dtype=float32)}


 10%|▉         | 97639/1000000 [4:28:24<28:08:54,  8.90it/s]

{'loss': Array(0.14268194, dtype=float32), 'loss_cross_entropy': Array(0.13446411, dtype=float32)}


 10%|▉         | 97649/1000000 [4:28:25<24:23:56, 10.27it/s]

{'loss': Array(0.1618144, dtype=float32), 'loss_cross_entropy': Array(0.15270123, dtype=float32)}


 10%|▉         | 97659/1000000 [4:28:27<32:46:40,  7.65it/s]

{'loss': Array(0.13056223, dtype=float32), 'loss_cross_entropy': Array(0.12254494, dtype=float32)}


 10%|▉         | 97669/1000000 [4:28:28<24:46:00, 10.12it/s]

{'loss': Array(0.1397216, dtype=float32), 'loss_cross_entropy': Array(0.13136928, dtype=float32)}


 10%|▉         | 97679/1000000 [4:28:30<37:05:17,  6.76it/s]

{'loss': Array(0.14607774, dtype=float32), 'loss_cross_entropy': Array(0.13745886, dtype=float32)}


 10%|▉         | 97689/1000000 [4:28:31<25:15:31,  9.92it/s]

{'loss': Array(0.14157538, dtype=float32), 'loss_cross_entropy': Array(0.13335685, dtype=float32)}


 10%|▉         | 97699/1000000 [4:28:33<23:08:40, 10.83it/s]

{'loss': Array(0.12818971, dtype=float32), 'loss_cross_entropy': Array(0.12047261, dtype=float32)}


 10%|▉         | 97709/1000000 [4:28:35<29:58:09,  8.36it/s]

{'loss': Array(0.14099488, dtype=float32), 'loss_cross_entropy': Array(0.13303624, dtype=float32)}


 10%|▉         | 97719/1000000 [4:28:36<24:22:55, 10.28it/s]

{'loss': Array(0.1351671, dtype=float32), 'loss_cross_entropy': Array(0.1271629, dtype=float32)}


 10%|▉         | 97729/1000000 [4:28:38<32:52:52,  7.62it/s]

{'loss': Array(0.13375875, dtype=float32), 'loss_cross_entropy': Array(0.12582563, dtype=float32)}


 10%|▉         | 97739/1000000 [4:28:39<24:44:38, 10.13it/s]

{'loss': Array(0.14993559, dtype=float32), 'loss_cross_entropy': Array(0.14160328, dtype=float32)}


 10%|▉         | 97749/1000000 [4:28:40<23:35:30, 10.62it/s]

{'loss': Array(0.11918472, dtype=float32), 'loss_cross_entropy': Array(0.11245846, dtype=float32)}


 10%|▉         | 97759/1000000 [4:28:42<26:26:05,  9.48it/s]

{'loss': Array(0.15470013, dtype=float32), 'loss_cross_entropy': Array(0.14578335, dtype=float32)}


 10%|▉         | 97769/1000000 [4:28:43<24:09:25, 10.37it/s]

{'loss': Array(0.12816902, dtype=float32), 'loss_cross_entropy': Array(0.1209574, dtype=float32)}


 10%|▉         | 97779/1000000 [4:28:45<36:56:28,  6.78it/s]

{'loss': Array(0.14563718, dtype=float32), 'loss_cross_entropy': Array(0.13768041, dtype=float32)}


 10%|▉         | 97789/1000000 [4:28:47<25:47:04,  9.72it/s]

{'loss': Array(0.15110372, dtype=float32), 'loss_cross_entropy': Array(0.14283693, dtype=float32)}


 10%|▉         | 97799/1000000 [4:28:48<23:44:56, 10.55it/s]

{'loss': Array(0.1579491, dtype=float32), 'loss_cross_entropy': Array(0.15001082, dtype=float32)}


 10%|▉         | 97809/1000000 [4:28:50<30:19:45,  8.26it/s]

{'loss': Array(0.15281598, dtype=float32), 'loss_cross_entropy': Array(0.14416659, dtype=float32)}


 10%|▉         | 97819/1000000 [4:28:51<24:41:41, 10.15it/s]

{'loss': Array(0.13563591, dtype=float32), 'loss_cross_entropy': Array(0.12777844, dtype=float32)}


 10%|▉         | 97829/1000000 [4:28:53<33:11:45,  7.55it/s]

{'loss': Array(0.13947923, dtype=float32), 'loss_cross_entropy': Array(0.13170974, dtype=float32)}


 10%|▉         | 97839/1000000 [4:28:54<24:41:30, 10.15it/s]

{'loss': Array(0.15021679, dtype=float32), 'loss_cross_entropy': Array(0.14221677, dtype=float32)}


 10%|▉         | 97849/1000000 [4:28:56<32:53:05,  7.62it/s]

{'loss': Array(0.15101646, dtype=float32), 'loss_cross_entropy': Array(0.1426546, dtype=float32)}


 10%|▉         | 97859/1000000 [4:28:57<25:23:16,  9.87it/s]

{'loss': Array(0.15104531, dtype=float32), 'loss_cross_entropy': Array(0.1424876, dtype=float32)}


 10%|▉         | 97869/1000000 [4:28:59<24:04:29, 10.41it/s]

{'loss': Array(0.14212304, dtype=float32), 'loss_cross_entropy': Array(0.13385287, dtype=float32)}


 10%|▉         | 97879/1000000 [4:29:00<28:13:06,  8.88it/s]

{'loss': Array(0.13814233, dtype=float32), 'loss_cross_entropy': Array(0.12992755, dtype=float32)}


 10%|▉         | 97889/1000000 [4:29:02<25:14:00,  9.93it/s]

{'loss': Array(0.14928104, dtype=float32), 'loss_cross_entropy': Array(0.14092772, dtype=float32)}


 10%|▉         | 97899/1000000 [4:29:04<31:11:11,  8.04it/s]

{'loss': Array(0.13675539, dtype=float32), 'loss_cross_entropy': Array(0.12888838, dtype=float32)}


 10%|▉         | 97909/1000000 [4:29:05<25:47:16,  9.72it/s]

{'loss': Array(0.12575316, dtype=float32), 'loss_cross_entropy': Array(0.11842872, dtype=float32)}


 10%|▉         | 97919/1000000 [4:29:07<38:11:47,  6.56it/s]

{'loss': Array(0.14128423, dtype=float32), 'loss_cross_entropy': Array(0.13312651, dtype=float32)}


 10%|▉         | 97929/1000000 [4:29:08<26:17:18,  9.53it/s]

{'loss': Array(0.14073661, dtype=float32), 'loss_cross_entropy': Array(0.13188204, dtype=float32)}


 10%|▉         | 97939/1000000 [4:29:10<38:27:45,  6.51it/s]

{'loss': Array(0.14519048, dtype=float32), 'loss_cross_entropy': Array(0.13645963, dtype=float32)}


 10%|▉         | 97949/1000000 [4:29:11<27:12:43,  9.21it/s]

{'loss': Array(0.12325037, dtype=float32), 'loss_cross_entropy': Array(0.1156763, dtype=float32)}


 10%|▉         | 97959/1000000 [4:29:13<24:11:31, 10.36it/s]

{'loss': Array(0.1537128, dtype=float32), 'loss_cross_entropy': Array(0.14538632, dtype=float32)}


 10%|▉         | 97969/1000000 [4:29:14<32:47:36,  7.64it/s]

{'loss': Array(0.14583094, dtype=float32), 'loss_cross_entropy': Array(0.13738473, dtype=float32)}


 10%|▉         | 97979/1000000 [4:29:16<24:45:12, 10.12it/s]

{'loss': Array(0.14614724, dtype=float32), 'loss_cross_entropy': Array(0.13762473, dtype=float32)}


 10%|▉         | 97989/1000000 [4:29:18<37:10:59,  6.74it/s]

{'loss': Array(0.15269007, dtype=float32), 'loss_cross_entropy': Array(0.14415042, dtype=float32)}


 10%|▉         | 97999/1000000 [4:29:19<25:38:32,  9.77it/s]

{'loss': Array(0.15310104, dtype=float32), 'loss_cross_entropy': Array(0.1447296, dtype=float32)}


 10%|▉         | 98009/1000000 [4:29:27<83:00:49,  3.02it/s] 

{'loss': Array(0.15557961, dtype=float32), 'loss_cross_entropy': Array(0.14709395, dtype=float32)}


 10%|▉         | 98019/1000000 [4:29:29<36:13:31,  6.92it/s] 

{'loss': Array(0.14636229, dtype=float32), 'loss_cross_entropy': Array(0.13794199, dtype=float32)}


 10%|▉         | 98029/1000000 [4:29:30<25:11:05,  9.95it/s]

{'loss': Array(0.14015894, dtype=float32), 'loss_cross_entropy': Array(0.13200006, dtype=float32)}


 10%|▉         | 98039/1000000 [4:29:32<27:06:13,  9.24it/s]

{'loss': Array(0.12781173, dtype=float32), 'loss_cross_entropy': Array(0.11961434, dtype=float32)}


 10%|▉         | 98049/1000000 [4:29:33<23:27:18, 10.68it/s]

{'loss': Array(0.15779915, dtype=float32), 'loss_cross_entropy': Array(0.14955984, dtype=float32)}


 10%|▉         | 98059/1000000 [4:29:35<37:03:14,  6.76it/s]

{'loss': Array(0.14635044, dtype=float32), 'loss_cross_entropy': Array(0.13831507, dtype=float32)}


 10%|▉         | 98069/1000000 [4:29:36<25:32:35,  9.81it/s]

{'loss': Array(0.15716009, dtype=float32), 'loss_cross_entropy': Array(0.14810126, dtype=float32)}


 10%|▉         | 98079/1000000 [4:29:37<23:24:35, 10.70it/s]

{'loss': Array(0.1515981, dtype=float32), 'loss_cross_entropy': Array(0.14284112, dtype=float32)}


 10%|▉         | 98089/1000000 [4:29:39<29:47:33,  8.41it/s]

{'loss': Array(0.15059404, dtype=float32), 'loss_cross_entropy': Array(0.142596, dtype=float32)}


 10%|▉         | 98099/1000000 [4:29:40<24:27:08, 10.25it/s]

{'loss': Array(0.14851886, dtype=float32), 'loss_cross_entropy': Array(0.14029644, dtype=float32)}


 10%|▉         | 98109/1000000 [4:29:42<37:14:11,  6.73it/s]

{'loss': Array(0.16142206, dtype=float32), 'loss_cross_entropy': Array(0.15280901, dtype=float32)}


 10%|▉         | 98119/1000000 [4:29:44<25:28:19,  9.84it/s]

{'loss': Array(0.1458167, dtype=float32), 'loss_cross_entropy': Array(0.13719492, dtype=float32)}


 10%|▉         | 98129/1000000 [4:29:45<23:15:20, 10.77it/s]

{'loss': Array(0.1433106, dtype=float32), 'loss_cross_entropy': Array(0.13496234, dtype=float32)}


 10%|▉         | 98139/1000000 [4:29:47<27:47:04,  9.02it/s]

{'loss': Array(0.14824021, dtype=float32), 'loss_cross_entropy': Array(0.13993491, dtype=float32)}


 10%|▉         | 98149/1000000 [4:29:48<23:42:30, 10.57it/s]

{'loss': Array(0.11521202, dtype=float32), 'loss_cross_entropy': Array(0.10794543, dtype=float32)}


 10%|▉         | 98159/1000000 [4:29:50<32:52:18,  7.62it/s]

{'loss': Array(0.13807188, dtype=float32), 'loss_cross_entropy': Array(0.13013111, dtype=float32)}


 10%|▉         | 98169/1000000 [4:29:51<24:44:03, 10.13it/s]

{'loss': Array(0.14063492, dtype=float32), 'loss_cross_entropy': Array(0.13303211, dtype=float32)}


 10%|▉         | 98179/1000000 [4:29:52<23:44:55, 10.55it/s]

{'loss': Array(0.15130082, dtype=float32), 'loss_cross_entropy': Array(0.1428748, dtype=float32)}


 10%|▉         | 98189/1000000 [4:29:54<29:53:42,  8.38it/s]

{'loss': Array(0.14223084, dtype=float32), 'loss_cross_entropy': Array(0.13485354, dtype=float32)}


 10%|▉         | 98199/1000000 [4:29:55<24:39:10, 10.16it/s]

{'loss': Array(0.16354303, dtype=float32), 'loss_cross_entropy': Array(0.15439762, dtype=float32)}


 10%|▉         | 98209/1000000 [4:29:57<23:52:49, 10.49it/s]

{'loss': Array(0.13811441, dtype=float32), 'loss_cross_entropy': Array(0.13028833, dtype=float32)}


 10%|▉         | 98219/1000000 [4:29:59<26:42:14,  9.38it/s]

{'loss': Array(0.14121532, dtype=float32), 'loss_cross_entropy': Array(0.13269858, dtype=float32)}


 10%|▉         | 98229/1000000 [4:30:00<24:03:14, 10.41it/s]

{'loss': Array(0.14282776, dtype=float32), 'loss_cross_entropy': Array(0.13436556, dtype=float32)}


 10%|▉         | 98239/1000000 [4:30:02<30:57:55,  8.09it/s]

{'loss': Array(0.14469944, dtype=float32), 'loss_cross_entropy': Array(0.13675444, dtype=float32)}


 10%|▉         | 98249/1000000 [4:30:03<26:03:12,  9.61it/s]

{'loss': Array(0.16051781, dtype=float32), 'loss_cross_entropy': Array(0.15165584, dtype=float32)}


 10%|▉         | 98259/1000000 [4:30:05<38:28:58,  6.51it/s]

{'loss': Array(0.14355247, dtype=float32), 'loss_cross_entropy': Array(0.13528034, dtype=float32)}


 10%|▉         | 98269/1000000 [4:30:06<27:31:32,  9.10it/s]

{'loss': Array(0.14580193, dtype=float32), 'loss_cross_entropy': Array(0.13734916, dtype=float32)}


 10%|▉         | 98279/1000000 [4:30:08<38:54:38,  6.44it/s]

{'loss': Array(0.1393121, dtype=float32), 'loss_cross_entropy': Array(0.1303432, dtype=float32)}


 10%|▉         | 98289/1000000 [4:30:10<26:44:19,  9.37it/s]

{'loss': Array(0.14989452, dtype=float32), 'loss_cross_entropy': Array(0.142204, dtype=float32)}


 10%|▉         | 98299/1000000 [4:30:11<24:25:51, 10.25it/s]

{'loss': Array(0.15232877, dtype=float32), 'loss_cross_entropy': Array(0.1440404, dtype=float32)}


 10%|▉         | 98309/1000000 [4:30:13<32:39:40,  7.67it/s]

{'loss': Array(0.16390541, dtype=float32), 'loss_cross_entropy': Array(0.1551718, dtype=float32)}


 10%|▉         | 98319/1000000 [4:30:14<25:09:17,  9.96it/s]

{'loss': Array(0.14768633, dtype=float32), 'loss_cross_entropy': Array(0.13971083, dtype=float32)}


 10%|▉         | 98329/1000000 [4:30:16<36:58:17,  6.77it/s]

{'loss': Array(0.16175918, dtype=float32), 'loss_cross_entropy': Array(0.15297046, dtype=float32)}


 10%|▉         | 98339/1000000 [4:30:17<25:39:30,  9.76it/s]

{'loss': Array(0.14155495, dtype=float32), 'loss_cross_entropy': Array(0.13355954, dtype=float32)}


 10%|▉         | 98349/1000000 [4:30:19<24:28:06, 10.24it/s]

{'loss': Array(0.16154343, dtype=float32), 'loss_cross_entropy': Array(0.15283339, dtype=float32)}


 10%|▉         | 98359/1000000 [4:30:20<26:36:25,  9.41it/s]

{'loss': Array(0.1545814, dtype=float32), 'loss_cross_entropy': Array(0.14609121, dtype=float32)}


 10%|▉         | 98369/1000000 [4:30:22<23:33:36, 10.63it/s]

{'loss': Array(0.14371322, dtype=float32), 'loss_cross_entropy': Array(0.13480072, dtype=float32)}


 10%|▉         | 98379/1000000 [4:30:23<26:01:16,  9.62it/s]

{'loss': Array(0.14847828, dtype=float32), 'loss_cross_entropy': Array(0.13933803, dtype=float32)}


 10%|▉         | 98389/1000000 [4:30:25<23:23:01, 10.71it/s]

{'loss': Array(0.12633632, dtype=float32), 'loss_cross_entropy': Array(0.11885587, dtype=float32)}


 10%|▉         | 98399/1000000 [4:30:26<36:27:26,  6.87it/s]

{'loss': Array(0.1544416, dtype=float32), 'loss_cross_entropy': Array(0.14554733, dtype=float32)}


 10%|▉         | 98409/1000000 [4:30:28<25:15:44,  9.91it/s]

{'loss': Array(0.13914432, dtype=float32), 'loss_cross_entropy': Array(0.13124302, dtype=float32)}


 10%|▉         | 98419/1000000 [4:30:29<23:50:15, 10.51it/s]

{'loss': Array(0.13887376, dtype=float32), 'loss_cross_entropy': Array(0.13062066, dtype=float32)}


 10%|▉         | 98429/1000000 [4:30:31<29:47:45,  8.41it/s]

{'loss': Array(0.14255576, dtype=float32), 'loss_cross_entropy': Array(0.13473003, dtype=float32)}


 10%|▉         | 98439/1000000 [4:30:32<26:23:30,  9.49it/s]

{'loss': Array(0.13628255, dtype=float32), 'loss_cross_entropy': Array(0.12795897, dtype=float32)}


 10%|▉         | 98449/1000000 [4:30:34<37:21:35,  6.70it/s]

{'loss': Array(0.1474825, dtype=float32), 'loss_cross_entropy': Array(0.13926482, dtype=float32)}


 10%|▉         | 98459/1000000 [4:30:35<25:14:02,  9.92it/s]

{'loss': Array(0.14550656, dtype=float32), 'loss_cross_entropy': Array(0.13745789, dtype=float32)}


 10%|▉         | 98469/1000000 [4:30:37<23:21:42, 10.72it/s]

{'loss': Array(0.1520308, dtype=float32), 'loss_cross_entropy': Array(0.14301035, dtype=float32)}


 10%|▉         | 98479/1000000 [4:30:38<28:01:15,  8.94it/s]

{'loss': Array(0.14847158, dtype=float32), 'loss_cross_entropy': Array(0.14001366, dtype=float32)}


 10%|▉         | 98489/1000000 [4:30:40<23:58:26, 10.45it/s]

{'loss': Array(0.14079848, dtype=float32), 'loss_cross_entropy': Array(0.13271146, dtype=float32)}


 10%|▉         | 98499/1000000 [4:30:41<33:03:59,  7.57it/s]

{'loss': Array(0.15093672, dtype=float32), 'loss_cross_entropy': Array(0.14245069, dtype=float32)}


 10%|▉         | 98509/1000000 [4:30:49<82:35:58,  3.03it/s] 

{'loss': Array(0.16584867, dtype=float32), 'loss_cross_entropy': Array(0.15716878, dtype=float32)}


 10%|▉         | 98519/1000000 [4:30:51<42:54:58,  5.83it/s]

{'loss': Array(0.12907283, dtype=float32), 'loss_cross_entropy': Array(0.12134568, dtype=float32)}


 10%|▉         | 98529/1000000 [4:30:52<26:42:01,  9.38it/s]

{'loss': Array(0.14306897, dtype=float32), 'loss_cross_entropy': Array(0.13539477, dtype=float32)}


 10%|▉         | 98539/1000000 [4:30:54<23:54:21, 10.47it/s]

{'loss': Array(0.1341507, dtype=float32), 'loss_cross_entropy': Array(0.12627932, dtype=float32)}


 10%|▉         | 98549/1000000 [4:30:55<29:53:04,  8.38it/s]

{'loss': Array(0.1284517, dtype=float32), 'loss_cross_entropy': Array(0.12098689, dtype=float32)}


 10%|▉         | 98559/1000000 [4:30:57<24:32:00, 10.21it/s]

{'loss': Array(0.14512455, dtype=float32), 'loss_cross_entropy': Array(0.13710473, dtype=float32)}


 10%|▉         | 98569/1000000 [4:30:58<23:05:57, 10.84it/s]

{'loss': Array(0.16160771, dtype=float32), 'loss_cross_entropy': Array(0.15314126, dtype=float32)}


 10%|▉         | 98579/1000000 [4:31:00<27:45:19,  9.02it/s]

{'loss': Array(0.1314662, dtype=float32), 'loss_cross_entropy': Array(0.12436211, dtype=float32)}


 10%|▉         | 98589/1000000 [4:31:01<25:11:23,  9.94it/s]

{'loss': Array(0.1468089, dtype=float32), 'loss_cross_entropy': Array(0.13873744, dtype=float32)}


 10%|▉         | 98599/1000000 [4:31:03<30:52:05,  8.11it/s]

{'loss': Array(0.14266989, dtype=float32), 'loss_cross_entropy': Array(0.13456535, dtype=float32)}


 10%|▉         | 98609/1000000 [4:31:04<25:52:24,  9.68it/s]

{'loss': Array(0.14530449, dtype=float32), 'loss_cross_entropy': Array(0.13706705, dtype=float32)}


 10%|▉         | 98619/1000000 [4:31:06<34:02:37,  7.35it/s]

{'loss': Array(0.1420386, dtype=float32), 'loss_cross_entropy': Array(0.13360408, dtype=float32)}


 10%|▉         | 98629/1000000 [4:31:08<26:10:32,  9.57it/s]

{'loss': Array(0.15149902, dtype=float32), 'loss_cross_entropy': Array(0.14279267, dtype=float32)}


 10%|▉         | 98639/1000000 [4:31:09<25:21:50,  9.87it/s]

{'loss': Array(0.12991594, dtype=float32), 'loss_cross_entropy': Array(0.12278463, dtype=float32)}


 10%|▉         | 98649/1000000 [4:31:11<30:19:19,  8.26it/s]

{'loss': Array(0.14421748, dtype=float32), 'loss_cross_entropy': Array(0.13576655, dtype=float32)}


 10%|▉         | 98659/1000000 [4:31:12<24:29:24, 10.22it/s]

{'loss': Array(0.14950517, dtype=float32), 'loss_cross_entropy': Array(0.14089964, dtype=float32)}


 10%|▉         | 98669/1000000 [4:31:14<23:18:34, 10.74it/s]

{'loss': Array(0.1594892, dtype=float32), 'loss_cross_entropy': Array(0.15058695, dtype=float32)}


 10%|▉         | 98679/1000000 [4:31:15<26:38:19,  9.40it/s]

{'loss': Array(0.14222024, dtype=float32), 'loss_cross_entropy': Array(0.13388148, dtype=float32)}


 10%|▉         | 98689/1000000 [4:31:17<23:36:51, 10.60it/s]

{'loss': Array(0.14268295, dtype=float32), 'loss_cross_entropy': Array(0.13414769, dtype=float32)}


 10%|▉         | 98699/1000000 [4:31:18<29:48:07,  8.40it/s]

{'loss': Array(0.16107284, dtype=float32), 'loss_cross_entropy': Array(0.1519749, dtype=float32)}


 10%|▉         | 98709/1000000 [4:31:20<24:14:35, 10.33it/s]

{'loss': Array(0.14577055, dtype=float32), 'loss_cross_entropy': Array(0.1368427, dtype=float32)}


 10%|▉         | 98719/1000000 [4:31:21<36:40:34,  6.83it/s]

{'loss': Array(0.13912605, dtype=float32), 'loss_cross_entropy': Array(0.13145642, dtype=float32)}


 10%|▉         | 98729/1000000 [4:31:23<26:05:06,  9.60it/s]

{'loss': Array(0.16139388, dtype=float32), 'loss_cross_entropy': Array(0.15231772, dtype=float32)}


 10%|▉         | 98739/1000000 [4:31:24<23:16:39, 10.75it/s]

{'loss': Array(0.1580331, dtype=float32), 'loss_cross_entropy': Array(0.1491694, dtype=float32)}


 10%|▉         | 98749/1000000 [4:31:26<32:33:33,  7.69it/s]

{'loss': Array(0.13810062, dtype=float32), 'loss_cross_entropy': Array(0.13025974, dtype=float32)}


 10%|▉         | 98759/1000000 [4:31:27<25:09:13,  9.95it/s]

{'loss': Array(0.14980061, dtype=float32), 'loss_cross_entropy': Array(0.14208136, dtype=float32)}


 10%|▉         | 98769/1000000 [4:31:28<23:13:58, 10.78it/s]

{'loss': Array(0.14417106, dtype=float32), 'loss_cross_entropy': Array(0.13574721, dtype=float32)}


 10%|▉         | 98779/1000000 [4:31:30<27:32:56,  9.09it/s]

{'loss': Array(0.14478219, dtype=float32), 'loss_cross_entropy': Array(0.13671094, dtype=float32)}


 10%|▉         | 98789/1000000 [4:31:31<23:49:44, 10.51it/s]

{'loss': Array(0.14222175, dtype=float32), 'loss_cross_entropy': Array(0.13406514, dtype=float32)}


 10%|▉         | 98799/1000000 [4:31:33<30:26:28,  8.22it/s]

{'loss': Array(0.13905433, dtype=float32), 'loss_cross_entropy': Array(0.13125102, dtype=float32)}


 10%|▉         | 98809/1000000 [4:31:35<24:18:17, 10.30it/s]

{'loss': Array(0.14872451, dtype=float32), 'loss_cross_entropy': Array(0.14034335, dtype=float32)}


 10%|▉         | 98819/1000000 [4:31:36<24:03:54, 10.40it/s]

{'loss': Array(0.14497882, dtype=float32), 'loss_cross_entropy': Array(0.13678522, dtype=float32)}


 10%|▉         | 98829/1000000 [4:31:38<26:28:56,  9.45it/s]

{'loss': Array(0.1351284, dtype=float32), 'loss_cross_entropy': Array(0.12760317, dtype=float32)}


 10%|▉         | 98839/1000000 [4:31:39<23:29:38, 10.65it/s]

{'loss': Array(0.15537012, dtype=float32), 'loss_cross_entropy': Array(0.14652602, dtype=float32)}


 10%|▉         | 98849/1000000 [4:31:41<32:02:36,  7.81it/s]

{'loss': Array(0.13889533, dtype=float32), 'loss_cross_entropy': Array(0.13072744, dtype=float32)}


 10%|▉         | 98859/1000000 [4:31:42<24:38:28, 10.16it/s]

{'loss': Array(0.14395976, dtype=float32), 'loss_cross_entropy': Array(0.13603641, dtype=float32)}


 10%|▉         | 98869/1000000 [4:31:43<23:43:18, 10.55it/s]

{'loss': Array(0.12998147, dtype=float32), 'loss_cross_entropy': Array(0.12290021, dtype=float32)}


 10%|▉         | 98879/1000000 [4:31:45<26:52:41,  9.31it/s]

{'loss': Array(0.14602336, dtype=float32), 'loss_cross_entropy': Array(0.13776632, dtype=float32)}


 10%|▉         | 98889/1000000 [4:31:47<23:48:31, 10.51it/s]

{'loss': Array(0.13515034, dtype=float32), 'loss_cross_entropy': Array(0.12753747, dtype=float32)}


 10%|▉         | 98899/1000000 [4:31:48<27:38:19,  9.06it/s]

{'loss': Array(0.16046837, dtype=float32), 'loss_cross_entropy': Array(0.15206757, dtype=float32)}


 10%|▉         | 98909/1000000 [4:31:50<23:58:20, 10.44it/s]

{'loss': Array(0.14039393, dtype=float32), 'loss_cross_entropy': Array(0.13221923, dtype=float32)}


 10%|▉         | 98919/1000000 [4:31:51<36:29:55,  6.86it/s]

{'loss': Array(0.13200638, dtype=float32), 'loss_cross_entropy': Array(0.12373959, dtype=float32)}


 10%|▉         | 98929/1000000 [4:31:53<25:56:29,  9.65it/s]

{'loss': Array(0.14677435, dtype=float32), 'loss_cross_entropy': Array(0.13753812, dtype=float32)}


 10%|▉         | 98939/1000000 [4:31:54<23:37:17, 10.60it/s]

{'loss': Array(0.14708848, dtype=float32), 'loss_cross_entropy': Array(0.13840948, dtype=float32)}


 10%|▉         | 98949/1000000 [4:31:56<29:41:12,  8.43it/s]

{'loss': Array(0.13625662, dtype=float32), 'loss_cross_entropy': Array(0.12901689, dtype=float32)}


 10%|▉         | 98959/1000000 [4:31:57<24:01:38, 10.42it/s]

{'loss': Array(0.14843719, dtype=float32), 'loss_cross_entropy': Array(0.14021128, dtype=float32)}


 10%|▉         | 98969/1000000 [4:31:59<32:34:54,  7.68it/s]

{'loss': Array(0.1603346, dtype=float32), 'loss_cross_entropy': Array(0.1516579, dtype=float32)}


 10%|▉         | 98979/1000000 [4:32:00<25:43:46,  9.73it/s]

{'loss': Array(0.14878641, dtype=float32), 'loss_cross_entropy': Array(0.1402472, dtype=float32)}


 10%|▉         | 98989/1000000 [4:32:02<33:28:07,  7.48it/s]

{'loss': Array(0.14755319, dtype=float32), 'loss_cross_entropy': Array(0.13908558, dtype=float32)}


 10%|▉         | 98999/1000000 [4:32:03<26:10:29,  9.56it/s]

{'loss': Array(0.1542868, dtype=float32), 'loss_cross_entropy': Array(0.14546797, dtype=float32)}


 10%|▉         | 99009/1000000 [4:32:12<85:27:58,  2.93it/s] 

{'loss': Array(0.15017247, dtype=float32), 'loss_cross_entropy': Array(0.14163296, dtype=float32)}


 10%|▉         | 99019/1000000 [4:32:13<38:22:28,  6.52it/s]

{'loss': Array(0.15428053, dtype=float32), 'loss_cross_entropy': Array(0.14593059, dtype=float32)}


 10%|▉         | 99029/1000000 [4:32:15<26:26:58,  9.46it/s]

{'loss': Array(0.15306197, dtype=float32), 'loss_cross_entropy': Array(0.14484482, dtype=float32)}


 10%|▉         | 99039/1000000 [4:32:16<33:45:21,  7.41it/s]

{'loss': Array(0.15171383, dtype=float32), 'loss_cross_entropy': Array(0.14241022, dtype=float32)}


 10%|▉         | 99049/1000000 [4:32:18<25:02:55,  9.99it/s]

{'loss': Array(0.1521613, dtype=float32), 'loss_cross_entropy': Array(0.14287339, dtype=float32)}


 10%|▉         | 99059/1000000 [4:32:19<36:26:48,  6.87it/s]

{'loss': Array(0.14635846, dtype=float32), 'loss_cross_entropy': Array(0.13880353, dtype=float32)}


 10%|▉         | 99069/1000000 [4:32:21<25:21:56,  9.87it/s]

{'loss': Array(0.1523531, dtype=float32), 'loss_cross_entropy': Array(0.14380643, dtype=float32)}


 10%|▉         | 99079/1000000 [4:32:22<23:17:26, 10.74it/s]

{'loss': Array(0.14788505, dtype=float32), 'loss_cross_entropy': Array(0.13868377, dtype=float32)}


 10%|▉         | 99089/1000000 [4:32:24<30:16:56,  8.26it/s]

{'loss': Array(0.14459234, dtype=float32), 'loss_cross_entropy': Array(0.13662514, dtype=float32)}


 10%|▉         | 99099/1000000 [4:32:25<24:10:31, 10.35it/s]

{'loss': Array(0.14899163, dtype=float32), 'loss_cross_entropy': Array(0.14064799, dtype=float32)}


 10%|▉         | 99109/1000000 [4:32:27<32:31:05,  7.70it/s]

{'loss': Array(0.1427182, dtype=float32), 'loss_cross_entropy': Array(0.13450585, dtype=float32)}


 10%|▉         | 99119/1000000 [4:32:28<24:18:03, 10.30it/s]

{'loss': Array(0.16392647, dtype=float32), 'loss_cross_entropy': Array(0.15520073, dtype=float32)}


 10%|▉         | 99129/1000000 [4:32:30<23:13:14, 10.78it/s]

{'loss': Array(0.14158572, dtype=float32), 'loss_cross_entropy': Array(0.1329451, dtype=float32)}


 10%|▉         | 99139/1000000 [4:32:31<26:36:14,  9.41it/s]

{'loss': Array(0.16540278, dtype=float32), 'loss_cross_entropy': Array(0.15664749, dtype=float32)}


 10%|▉         | 99149/1000000 [4:32:33<23:25:42, 10.68it/s]

{'loss': Array(0.12830131, dtype=float32), 'loss_cross_entropy': Array(0.12085509, dtype=float32)}


 10%|▉         | 99159/1000000 [4:32:34<36:34:13,  6.84it/s]

{'loss': Array(0.14105766, dtype=float32), 'loss_cross_entropy': Array(0.13340245, dtype=float32)}


 10%|▉         | 99169/1000000 [4:32:36<25:06:24,  9.97it/s]

{'loss': Array(0.13194686, dtype=float32), 'loss_cross_entropy': Array(0.12395795, dtype=float32)}


 10%|▉         | 99179/1000000 [4:32:37<23:51:49, 10.49it/s]

{'loss': Array(0.15144925, dtype=float32), 'loss_cross_entropy': Array(0.14276938, dtype=float32)}


 10%|▉         | 99189/1000000 [4:32:39<29:54:26,  8.37it/s]

{'loss': Array(0.13033848, dtype=float32), 'loss_cross_entropy': Array(0.12299564, dtype=float32)}


 10%|▉         | 99199/1000000 [4:32:40<24:02:35, 10.41it/s]

{'loss': Array(0.12745796, dtype=float32), 'loss_cross_entropy': Array(0.11977897, dtype=float32)}


 10%|▉         | 99209/1000000 [4:32:42<33:37:14,  7.44it/s]

{'loss': Array(0.15530135, dtype=float32), 'loss_cross_entropy': Array(0.1472704, dtype=float32)}


 10%|▉         | 99219/1000000 [4:32:43<24:18:23, 10.29it/s]

{'loss': Array(0.15190144, dtype=float32), 'loss_cross_entropy': Array(0.14362289, dtype=float32)}


 10%|▉         | 99229/1000000 [4:32:45<32:36:57,  7.67it/s]

{'loss': Array(0.13958459, dtype=float32), 'loss_cross_entropy': Array(0.13184969, dtype=float32)}


 10%|▉         | 99239/1000000 [4:32:46<24:44:32, 10.11it/s]

{'loss': Array(0.13917737, dtype=float32), 'loss_cross_entropy': Array(0.13101254, dtype=float32)}


 10%|▉         | 99249/1000000 [4:32:48<24:17:05, 10.30it/s]

{'loss': Array(0.16500396, dtype=float32), 'loss_cross_entropy': Array(0.15629326, dtype=float32)}


 10%|▉         | 99259/1000000 [4:32:49<27:48:38,  9.00it/s]

{'loss': Array(0.14846793, dtype=float32), 'loss_cross_entropy': Array(0.14038761, dtype=float32)}


 10%|▉         | 99269/1000000 [4:32:51<23:48:54, 10.51it/s]

{'loss': Array(0.1727922, dtype=float32), 'loss_cross_entropy': Array(0.16355668, dtype=float32)}


 10%|▉         | 99279/1000000 [4:32:52<29:39:32,  8.44it/s]

{'loss': Array(0.15177034, dtype=float32), 'loss_cross_entropy': Array(0.14329264, dtype=float32)}


 10%|▉         | 99289/1000000 [4:32:54<23:43:27, 10.55it/s]

{'loss': Array(0.12453447, dtype=float32), 'loss_cross_entropy': Array(0.11686267, dtype=float32)}


 10%|▉         | 99299/1000000 [4:32:55<36:47:00,  6.80it/s]

{'loss': Array(0.14394385, dtype=float32), 'loss_cross_entropy': Array(0.13638736, dtype=float32)}


 10%|▉         | 99309/1000000 [4:32:57<25:28:39,  9.82it/s]

{'loss': Array(0.12970439, dtype=float32), 'loss_cross_entropy': Array(0.12266518, dtype=float32)}


 10%|▉         | 99319/1000000 [4:32:59<37:50:27,  6.61it/s]

{'loss': Array(0.14153437, dtype=float32), 'loss_cross_entropy': Array(0.13362376, dtype=float32)}


 10%|▉         | 99329/1000000 [4:33:00<26:27:44,  9.45it/s]

{'loss': Array(0.1572868, dtype=float32), 'loss_cross_entropy': Array(0.14817123, dtype=float32)}


 10%|▉         | 99339/1000000 [4:33:01<24:38:05, 10.16it/s]

{'loss': Array(0.12622671, dtype=float32), 'loss_cross_entropy': Array(0.11834103, dtype=float32)}


 10%|▉         | 99349/1000000 [4:33:03<34:52:40,  7.17it/s]

{'loss': Array(0.15747093, dtype=float32), 'loss_cross_entropy': Array(0.14852749, dtype=float32)}


 10%|▉         | 99359/1000000 [4:33:05<26:58:23,  9.28it/s]

{'loss': Array(0.15611215, dtype=float32), 'loss_cross_entropy': Array(0.14740942, dtype=float32)}


 10%|▉         | 99369/1000000 [4:33:06<39:47:09,  6.29it/s]

{'loss': Array(0.15871303, dtype=float32), 'loss_cross_entropy': Array(0.15013866, dtype=float32)}


 10%|▉         | 99379/1000000 [4:33:08<27:00:53,  9.26it/s]

{'loss': Array(0.14428063, dtype=float32), 'loss_cross_entropy': Array(0.1362061, dtype=float32)}


 10%|▉         | 99389/1000000 [4:33:09<24:59:26, 10.01it/s]

{'loss': Array(0.12718958, dtype=float32), 'loss_cross_entropy': Array(0.11995282, dtype=float32)}


 10%|▉         | 99399/1000000 [4:33:11<26:23:10,  9.48it/s]

{'loss': Array(0.14691104, dtype=float32), 'loss_cross_entropy': Array(0.13811229, dtype=float32)}


 10%|▉         | 99409/1000000 [4:33:12<23:31:56, 10.63it/s]

{'loss': Array(0.14812672, dtype=float32), 'loss_cross_entropy': Array(0.13961582, dtype=float32)}


 10%|▉         | 99419/1000000 [4:33:14<26:29:37,  9.44it/s]

{'loss': Array(0.14602157, dtype=float32), 'loss_cross_entropy': Array(0.13760895, dtype=float32)}


 10%|▉         | 99429/1000000 [4:33:15<24:00:52, 10.42it/s]

{'loss': Array(0.1435147, dtype=float32), 'loss_cross_entropy': Array(0.13514344, dtype=float32)}


 10%|▉         | 99439/1000000 [4:33:17<37:07:30,  6.74it/s]

{'loss': Array(0.14744042, dtype=float32), 'loss_cross_entropy': Array(0.13967218, dtype=float32)}


 10%|▉         | 99449/1000000 [4:33:19<26:25:13,  9.47it/s]

{'loss': Array(0.1365734, dtype=float32), 'loss_cross_entropy': Array(0.12866344, dtype=float32)}


 10%|▉         | 99459/1000000 [4:33:20<23:24:05, 10.69it/s]

{'loss': Array(0.15501009, dtype=float32), 'loss_cross_entropy': Array(0.14683007, dtype=float32)}


 10%|▉         | 99469/1000000 [4:33:22<30:05:00,  8.32it/s]

{'loss': Array(0.13344558, dtype=float32), 'loss_cross_entropy': Array(0.12596737, dtype=float32)}


 10%|▉         | 99479/1000000 [4:33:23<24:09:58, 10.35it/s]

{'loss': Array(0.13327715, dtype=float32), 'loss_cross_entropy': Array(0.12574671, dtype=float32)}


 10%|▉         | 99489/1000000 [4:33:25<37:11:03,  6.73it/s]

{'loss': Array(0.14577171, dtype=float32), 'loss_cross_entropy': Array(0.13696313, dtype=float32)}


 10%|▉         | 99499/1000000 [4:33:26<25:42:07,  9.73it/s]

{'loss': Array(0.15632795, dtype=float32), 'loss_cross_entropy': Array(0.14810462, dtype=float32)}


 10%|▉         | 99509/1000000 [4:33:34<80:47:06,  3.10it/s] 

{'loss': Array(0.16114196, dtype=float32), 'loss_cross_entropy': Array(0.15293388, dtype=float32)}


 10%|▉         | 99519/1000000 [4:33:36<37:26:06,  6.68it/s]

{'loss': Array(0.14449984, dtype=float32), 'loss_cross_entropy': Array(0.13608806, dtype=float32)}


 10%|▉         | 99529/1000000 [4:33:37<25:22:10,  9.86it/s]

{'loss': Array(0.14194477, dtype=float32), 'loss_cross_entropy': Array(0.13416958, dtype=float32)}


 10%|▉         | 99539/1000000 [4:33:39<29:56:10,  8.36it/s]

{'loss': Array(0.12572916, dtype=float32), 'loss_cross_entropy': Array(0.1185648, dtype=float32)}


 10%|▉         | 99549/1000000 [4:33:40<24:10:14, 10.35it/s]

{'loss': Array(0.16121726, dtype=float32), 'loss_cross_entropy': Array(0.15321298, dtype=float32)}


 10%|▉         | 99559/1000000 [4:33:42<36:55:21,  6.77it/s]

{'loss': Array(0.13759284, dtype=float32), 'loss_cross_entropy': Array(0.12960243, dtype=float32)}


 10%|▉         | 99569/1000000 [4:33:43<25:26:56,  9.83it/s]

{'loss': Array(0.15761968, dtype=float32), 'loss_cross_entropy': Array(0.1489888, dtype=float32)}


 10%|▉         | 99579/1000000 [4:33:44<23:40:42, 10.56it/s]

{'loss': Array(0.13494246, dtype=float32), 'loss_cross_entropy': Array(0.12718265, dtype=float32)}


 10%|▉         | 99589/1000000 [4:33:46<26:32:34,  9.42it/s]

{'loss': Array(0.15433608, dtype=float32), 'loss_cross_entropy': Array(0.1455761, dtype=float32)}


 10%|▉         | 99599/1000000 [4:33:47<24:06:01, 10.38it/s]

{'loss': Array(0.14551924, dtype=float32), 'loss_cross_entropy': Array(0.13809569, dtype=float32)}


 10%|▉         | 99609/1000000 [4:33:49<36:40:45,  6.82it/s]

{'loss': Array(0.15971927, dtype=float32), 'loss_cross_entropy': Array(0.15048987, dtype=float32)}


 10%|▉         | 99619/1000000 [4:33:51<25:26:02,  9.83it/s]

{'loss': Array(0.15550701, dtype=float32), 'loss_cross_entropy': Array(0.14635108, dtype=float32)}


 10%|▉         | 99629/1000000 [4:33:52<23:35:28, 10.60it/s]

{'loss': Array(0.13327084, dtype=float32), 'loss_cross_entropy': Array(0.12556909, dtype=float32)}


 10%|▉         | 99639/1000000 [4:33:54<27:45:59,  9.01it/s]

{'loss': Array(0.15299092, dtype=float32), 'loss_cross_entropy': Array(0.14503843, dtype=float32)}


 10%|▉         | 99649/1000000 [4:33:55<23:41:48, 10.55it/s]

{'loss': Array(0.1435195, dtype=float32), 'loss_cross_entropy': Array(0.13604335, dtype=float32)}


 10%|▉         | 99659/1000000 [4:33:57<32:48:33,  7.62it/s]

{'loss': Array(0.1541417, dtype=float32), 'loss_cross_entropy': Array(0.14470527, dtype=float32)}


 10%|▉         | 99669/1000000 [4:33:58<24:39:34, 10.14it/s]

{'loss': Array(0.15704075, dtype=float32), 'loss_cross_entropy': Array(0.14823481, dtype=float32)}


 10%|▉         | 99679/1000000 [4:34:00<38:01:29,  6.58it/s]

{'loss': Array(0.1593228, dtype=float32), 'loss_cross_entropy': Array(0.15041918, dtype=float32)}


 10%|▉         | 99689/1000000 [4:34:01<26:44:02,  9.35it/s]

{'loss': Array(0.14848341, dtype=float32), 'loss_cross_entropy': Array(0.14046995, dtype=float32)}


 10%|▉         | 99699/1000000 [4:34:03<25:15:10,  9.90it/s]

{'loss': Array(0.14839035, dtype=float32), 'loss_cross_entropy': Array(0.14011262, dtype=float32)}


 10%|▉         | 99709/1000000 [4:34:04<31:15:38,  8.00it/s]

{'loss': Array(0.14586405, dtype=float32), 'loss_cross_entropy': Array(0.1376857, dtype=float32)}


 10%|▉         | 99719/1000000 [4:34:06<25:05:28,  9.97it/s]

{'loss': Array(0.15069525, dtype=float32), 'loss_cross_entropy': Array(0.14206252, dtype=float32)}


 10%|▉         | 99729/1000000 [4:34:08<34:41:50,  7.21it/s]

{'loss': Array(0.15111904, dtype=float32), 'loss_cross_entropy': Array(0.14278047, dtype=float32)}


 10%|▉         | 99739/1000000 [4:34:09<25:49:51,  9.68it/s]

{'loss': Array(0.14150472, dtype=float32), 'loss_cross_entropy': Array(0.13350007, dtype=float32)}


 10%|▉         | 99749/1000000 [4:34:10<24:42:32, 10.12it/s]

{'loss': Array(0.16268736, dtype=float32), 'loss_cross_entropy': Array(0.15393232, dtype=float32)}


 10%|▉         | 99759/1000000 [4:34:12<26:36:26,  9.40it/s]

{'loss': Array(0.14596076, dtype=float32), 'loss_cross_entropy': Array(0.13750292, dtype=float32)}


 10%|▉         | 99769/1000000 [4:34:13<23:44:08, 10.54it/s]

{'loss': Array(0.14758389, dtype=float32), 'loss_cross_entropy': Array(0.13878365, dtype=float32)}


 10%|▉         | 99779/1000000 [4:34:15<36:24:29,  6.87it/s]

{'loss': Array(0.1550395, dtype=float32), 'loss_cross_entropy': Array(0.14612107, dtype=float32)}


 10%|▉         | 99789/1000000 [4:34:16<25:02:57,  9.98it/s]

{'loss': Array(0.17476325, dtype=float32), 'loss_cross_entropy': Array(0.16536616, dtype=float32)}


 10%|▉         | 99799/1000000 [4:34:18<23:23:51, 10.69it/s]

{'loss': Array(0.16207637, dtype=float32), 'loss_cross_entropy': Array(0.15269427, dtype=float32)}


 10%|▉         | 99809/1000000 [4:34:20<29:45:09,  8.40it/s]

{'loss': Array(0.13950083, dtype=float32), 'loss_cross_entropy': Array(0.13158219, dtype=float32)}


 10%|▉         | 99819/1000000 [4:34:21<24:15:39, 10.31it/s]

{'loss': Array(0.14721747, dtype=float32), 'loss_cross_entropy': Array(0.13897201, dtype=float32)}


 10%|▉         | 99829/1000000 [4:34:23<33:05:04,  7.56it/s]

{'loss': Array(0.15187657, dtype=float32), 'loss_cross_entropy': Array(0.1435944, dtype=float32)}


 10%|▉         | 99839/1000000 [4:34:24<24:56:35, 10.02it/s]

{'loss': Array(0.13442528, dtype=float32), 'loss_cross_entropy': Array(0.1270847, dtype=float32)}


 10%|▉         | 99849/1000000 [4:34:26<32:39:17,  7.66it/s]

{'loss': Array(0.16205172, dtype=float32), 'loss_cross_entropy': Array(0.1527508, dtype=float32)}


 10%|▉         | 99859/1000000 [4:34:27<24:49:12, 10.07it/s]

{'loss': Array(0.13812035, dtype=float32), 'loss_cross_entropy': Array(0.13009886, dtype=float32)}


 10%|▉         | 99869/1000000 [4:34:28<23:51:56, 10.48it/s]

{'loss': Array(0.1450675, dtype=float32), 'loss_cross_entropy': Array(0.13652548, dtype=float32)}


 10%|▉         | 99879/1000000 [4:34:30<28:19:39,  8.83it/s]

{'loss': Array(0.1180253, dtype=float32), 'loss_cross_entropy': Array(0.11149865, dtype=float32)}


 10%|▉         | 99889/1000000 [4:34:32<24:05:24, 10.38it/s]

{'loss': Array(0.14361568, dtype=float32), 'loss_cross_entropy': Array(0.13486348, dtype=float32)}


 10%|▉         | 99899/1000000 [4:34:33<29:57:09,  8.35it/s]

{'loss': Array(0.13953887, dtype=float32), 'loss_cross_entropy': Array(0.1316712, dtype=float32)}


 10%|▉         | 99909/1000000 [4:34:35<25:15:17,  9.90it/s]

{'loss': Array(0.13778831, dtype=float32), 'loss_cross_entropy': Array(0.13031575, dtype=float32)}


 10%|▉         | 99919/1000000 [4:34:36<37:01:56,  6.75it/s]

{'loss': Array(0.14806752, dtype=float32), 'loss_cross_entropy': Array(0.13994975, dtype=float32)}


 10%|▉         | 99929/1000000 [4:34:38<25:33:52,  9.78it/s]

{'loss': Array(0.1563109, dtype=float32), 'loss_cross_entropy': Array(0.14778996, dtype=float32)}


 10%|▉         | 99939/1000000 [4:34:40<36:41:30,  6.81it/s]

{'loss': Array(0.1440215, dtype=float32), 'loss_cross_entropy': Array(0.13551836, dtype=float32)}


 10%|▉         | 99949/1000000 [4:34:41<25:21:23,  9.86it/s]

{'loss': Array(0.14014868, dtype=float32), 'loss_cross_entropy': Array(0.13227724, dtype=float32)}


 10%|▉         | 99959/1000000 [4:34:42<23:04:40, 10.83it/s]

{'loss': Array(0.16293482, dtype=float32), 'loss_cross_entropy': Array(0.15403356, dtype=float32)}


 10%|▉         | 99969/1000000 [4:34:44<32:11:36,  7.77it/s]

{'loss': Array(0.14582697, dtype=float32), 'loss_cross_entropy': Array(0.13736968, dtype=float32)}


 10%|▉         | 99979/1000000 [4:34:45<24:33:54, 10.18it/s]

{'loss': Array(0.13120024, dtype=float32), 'loss_cross_entropy': Array(0.12341306, dtype=float32)}


 10%|▉         | 99989/1000000 [4:34:47<36:41:53,  6.81it/s]

{'loss': Array(0.15583484, dtype=float32), 'loss_cross_entropy': Array(0.1470683, dtype=float32)}


 10%|▉         | 99999/1000000 [4:34:48<25:20:33,  9.86it/s]

{'loss': Array(0.13103841, dtype=float32), 'loss_cross_entropy': Array(0.12367175, dtype=float32)}


 10%|█         | 100009/1000000 [4:34:56<83:09:30,  3.01it/s] 

{'loss': Array(0.15946032, dtype=float32), 'loss_cross_entropy': Array(0.15071744, dtype=float32)}


 10%|█         | 100019/1000000 [4:34:58<36:27:55,  6.86it/s] 

{'loss': Array(0.14867786, dtype=float32), 'loss_cross_entropy': Array(0.13985927, dtype=float32)}


 10%|█         | 100029/1000000 [4:34:59<26:30:44,  9.43it/s]

{'loss': Array(0.14095204, dtype=float32), 'loss_cross_entropy': Array(0.13347767, dtype=float32)}


 10%|█         | 100039/1000000 [4:35:01<28:04:45,  8.90it/s]

{'loss': Array(0.14520104, dtype=float32), 'loss_cross_entropy': Array(0.13777626, dtype=float32)}


 10%|█         | 100049/1000000 [4:35:03<25:02:39,  9.98it/s]

{'loss': Array(0.14341043, dtype=float32), 'loss_cross_entropy': Array(0.13566294, dtype=float32)}


 10%|█         | 100059/1000000 [4:35:04<38:51:26,  6.43it/s]

{'loss': Array(0.1405178, dtype=float32), 'loss_cross_entropy': Array(0.13254899, dtype=float32)}


 10%|█         | 100069/1000000 [4:35:06<27:15:18,  9.17it/s]

{'loss': Array(0.15188399, dtype=float32), 'loss_cross_entropy': Array(0.1436551, dtype=float32)}


 10%|█         | 100079/1000000 [4:35:07<25:31:37,  9.79it/s]

{'loss': Array(0.15940432, dtype=float32), 'loss_cross_entropy': Array(0.15009952, dtype=float32)}


 10%|█         | 100089/1000000 [4:35:09<31:05:06,  8.04it/s]

{'loss': Array(0.13720691, dtype=float32), 'loss_cross_entropy': Array(0.1293308, dtype=float32)}


 10%|█         | 100099/1000000 [4:35:10<25:27:27,  9.82it/s]

{'loss': Array(0.15744105, dtype=float32), 'loss_cross_entropy': Array(0.14872909, dtype=float32)}


 10%|█         | 100109/1000000 [4:35:12<36:59:37,  6.76it/s]

{'loss': Array(0.14622615, dtype=float32), 'loss_cross_entropy': Array(0.13830939, dtype=float32)}


 10%|█         | 100119/1000000 [4:35:14<26:02:08,  9.60it/s]

{'loss': Array(0.15657975, dtype=float32), 'loss_cross_entropy': Array(0.14776841, dtype=float32)}


 10%|█         | 100129/1000000 [4:35:15<24:00:17, 10.41it/s]

{'loss': Array(0.14641167, dtype=float32), 'loss_cross_entropy': Array(0.13863777, dtype=float32)}


 10%|█         | 100139/1000000 [4:35:17<28:06:53,  8.89it/s]

{'loss': Array(0.15377997, dtype=float32), 'loss_cross_entropy': Array(0.14473452, dtype=float32)}


 10%|█         | 100149/1000000 [4:35:18<24:13:18, 10.32it/s]

{'loss': Array(0.1349825, dtype=float32), 'loss_cross_entropy': Array(0.12752739, dtype=float32)}


 10%|█         | 100159/1000000 [4:35:20<32:22:13,  7.72it/s]

{'loss': Array(0.13591585, dtype=float32), 'loss_cross_entropy': Array(0.1280834, dtype=float32)}


 10%|█         | 100169/1000000 [4:35:21<25:06:19,  9.96it/s]

{'loss': Array(0.13283327, dtype=float32), 'loss_cross_entropy': Array(0.12477056, dtype=float32)}


 10%|█         | 100179/1000000 [4:35:23<24:05:11, 10.38it/s]

{'loss': Array(0.1333475, dtype=float32), 'loss_cross_entropy': Array(0.12489327, dtype=float32)}


 10%|█         | 100189/1000000 [4:35:24<29:35:20,  8.45it/s]

{'loss': Array(0.1377668, dtype=float32), 'loss_cross_entropy': Array(0.12995404, dtype=float32)}


 10%|█         | 100199/1000000 [4:35:26<24:39:04, 10.14it/s]

{'loss': Array(0.15856649, dtype=float32), 'loss_cross_entropy': Array(0.15022366, dtype=float32)}


 10%|█         | 100209/1000000 [4:35:27<23:02:27, 10.85it/s]

{'loss': Array(0.14995733, dtype=float32), 'loss_cross_entropy': Array(0.14148352, dtype=float32)}


 10%|█         | 100219/1000000 [4:35:29<26:22:55,  9.47it/s]

{'loss': Array(0.14199883, dtype=float32), 'loss_cross_entropy': Array(0.1345273, dtype=float32)}


 10%|█         | 100229/1000000 [4:35:30<24:03:49, 10.39it/s]

{'loss': Array(0.14121324, dtype=float32), 'loss_cross_entropy': Array(0.13355443, dtype=float32)}


 10%|█         | 100239/1000000 [4:35:32<29:35:12,  8.45it/s]

{'loss': Array(0.13788687, dtype=float32), 'loss_cross_entropy': Array(0.1304659, dtype=float32)}


 10%|█         | 100249/1000000 [4:35:33<23:48:36, 10.50it/s]

{'loss': Array(0.14847706, dtype=float32), 'loss_cross_entropy': Array(0.13936496, dtype=float32)}


 10%|█         | 100259/1000000 [4:35:35<36:43:47,  6.80it/s]

{'loss': Array(0.14375043, dtype=float32), 'loss_cross_entropy': Array(0.13544612, dtype=float32)}


 10%|█         | 100269/1000000 [4:35:36<26:13:47,  9.53it/s]

{'loss': Array(0.1507661, dtype=float32), 'loss_cross_entropy': Array(0.14152735, dtype=float32)}


 10%|█         | 100279/1000000 [4:35:38<37:57:01,  6.59it/s]

{'loss': Array(0.14923398, dtype=float32), 'loss_cross_entropy': Array(0.14176385, dtype=float32)}


 10%|█         | 100289/1000000 [4:35:39<25:42:25,  9.72it/s]

{'loss': Array(0.15590827, dtype=float32), 'loss_cross_entropy': Array(0.14733385, dtype=float32)}


 10%|█         | 100299/1000000 [4:35:41<23:41:18, 10.55it/s]

{'loss': Array(0.13888887, dtype=float32), 'loss_cross_entropy': Array(0.13137987, dtype=float32)}


 10%|█         | 100309/1000000 [4:35:42<32:32:11,  7.68it/s]

{'loss': Array(0.15423146, dtype=float32), 'loss_cross_entropy': Array(0.14507209, dtype=float32)}


 10%|█         | 100319/1000000 [4:35:44<24:35:35, 10.16it/s]

{'loss': Array(0.1496428, dtype=float32), 'loss_cross_entropy': Array(0.14150448, dtype=float32)}


 10%|█         | 100329/1000000 [4:35:46<37:20:04,  6.69it/s]

{'loss': Array(0.1531819, dtype=float32), 'loss_cross_entropy': Array(0.14401706, dtype=float32)}


 10%|█         | 100339/1000000 [4:35:47<26:06:51,  9.57it/s]

{'loss': Array(0.14985953, dtype=float32), 'loss_cross_entropy': Array(0.14145076, dtype=float32)}


 10%|█         | 100349/1000000 [4:35:48<23:44:20, 10.53it/s]

{'loss': Array(0.14347833, dtype=float32), 'loss_cross_entropy': Array(0.13553, dtype=float32)}


 10%|█         | 100359/1000000 [4:35:50<26:35:58,  9.39it/s]

{'loss': Array(0.15037242, dtype=float32), 'loss_cross_entropy': Array(0.14117552, dtype=float32)}


 10%|█         | 100369/1000000 [4:35:51<23:38:41, 10.57it/s]

{'loss': Array(0.14169781, dtype=float32), 'loss_cross_entropy': Array(0.13363357, dtype=float32)}


 10%|█         | 100379/1000000 [4:35:53<25:59:54,  9.61it/s]

{'loss': Array(0.13999097, dtype=float32), 'loss_cross_entropy': Array(0.1319739, dtype=float32)}


 10%|█         | 100389/1000000 [4:35:54<23:17:17, 10.73it/s]

{'loss': Array(0.14845648, dtype=float32), 'loss_cross_entropy': Array(0.13985409, dtype=float32)}


 10%|█         | 100399/1000000 [4:35:56<36:13:52,  6.90it/s]

{'loss': Array(0.14056005, dtype=float32), 'loss_cross_entropy': Array(0.13310575, dtype=float32)}


 10%|█         | 100409/1000000 [4:35:57<26:10:36,  9.55it/s]

{'loss': Array(0.15241854, dtype=float32), 'loss_cross_entropy': Array(0.14420836, dtype=float32)}


 10%|█         | 100419/1000000 [4:35:59<23:26:32, 10.66it/s]

{'loss': Array(0.13543901, dtype=float32), 'loss_cross_entropy': Array(0.12782967, dtype=float32)}


 10%|█         | 100429/1000000 [4:36:01<30:15:01,  8.26it/s]

{'loss': Array(0.1517492, dtype=float32), 'loss_cross_entropy': Array(0.14334525, dtype=float32)}


 10%|█         | 100439/1000000 [4:36:02<24:37:15, 10.15it/s]

{'loss': Array(0.12389553, dtype=float32), 'loss_cross_entropy': Array(0.11638786, dtype=float32)}


 10%|█         | 100449/1000000 [4:36:04<36:57:20,  6.76it/s]

{'loss': Array(0.14798997, dtype=float32), 'loss_cross_entropy': Array(0.13921145, dtype=float32)}


 10%|█         | 100459/1000000 [4:36:05<26:10:59,  9.54it/s]

{'loss': Array(0.15568112, dtype=float32), 'loss_cross_entropy': Array(0.14702177, dtype=float32)}


 10%|█         | 100469/1000000 [4:36:06<24:06:07, 10.37it/s]

{'loss': Array(0.13775052, dtype=float32), 'loss_cross_entropy': Array(0.129414, dtype=float32)}


 10%|█         | 100479/1000000 [4:36:08<28:36:31,  8.73it/s]

{'loss': Array(0.1395854, dtype=float32), 'loss_cross_entropy': Array(0.1320777, dtype=float32)}


 10%|█         | 100489/1000000 [4:36:10<24:38:12, 10.14it/s]

{'loss': Array(0.15385975, dtype=float32), 'loss_cross_entropy': Array(0.14516841, dtype=float32)}


 10%|█         | 100499/1000000 [4:36:11<34:29:07,  7.25it/s]

{'loss': Array(0.1349201, dtype=float32), 'loss_cross_entropy': Array(0.12671803, dtype=float32)}


 10%|█         | 100509/1000000 [4:36:19<83:59:20,  2.97it/s] 

{'loss': Array(0.14524095, dtype=float32), 'loss_cross_entropy': Array(0.13700528, dtype=float32)}


 10%|█         | 100519/1000000 [4:36:21<42:27:52,  5.88it/s]

{'loss': Array(0.15546954, dtype=float32), 'loss_cross_entropy': Array(0.14708517, dtype=float32)}


 10%|█         | 100529/1000000 [4:36:22<26:04:43,  9.58it/s]

{'loss': Array(0.1352147, dtype=float32), 'loss_cross_entropy': Array(0.12718573, dtype=float32)}


 10%|█         | 100539/1000000 [4:36:24<24:29:41, 10.20it/s]

{'loss': Array(0.15190767, dtype=float32), 'loss_cross_entropy': Array(0.14353059, dtype=float32)}


 10%|█         | 100549/1000000 [4:36:25<29:39:59,  8.42it/s]

{'loss': Array(0.14769362, dtype=float32), 'loss_cross_entropy': Array(0.13955627, dtype=float32)}


 10%|█         | 100559/1000000 [4:36:27<24:06:08, 10.37it/s]

{'loss': Array(0.13851708, dtype=float32), 'loss_cross_entropy': Array(0.1304671, dtype=float32)}


 10%|█         | 100569/1000000 [4:36:28<23:20:32, 10.70it/s]

{'loss': Array(0.13103385, dtype=float32), 'loss_cross_entropy': Array(0.12340488, dtype=float32)}


 10%|█         | 100579/1000000 [4:36:30<27:13:30,  9.18it/s]

{'loss': Array(0.14841099, dtype=float32), 'loss_cross_entropy': Array(0.13964513, dtype=float32)}


 10%|█         | 100589/1000000 [4:36:31<23:26:39, 10.66it/s]

{'loss': Array(0.14993298, dtype=float32), 'loss_cross_entropy': Array(0.14164627, dtype=float32)}


 10%|█         | 100599/1000000 [4:36:33<29:47:35,  8.39it/s]

{'loss': Array(0.14730741, dtype=float32), 'loss_cross_entropy': Array(0.13940509, dtype=float32)}


 10%|█         | 100609/1000000 [4:36:34<24:29:17, 10.20it/s]

{'loss': Array(0.13082878, dtype=float32), 'loss_cross_entropy': Array(0.12350345, dtype=float32)}


 10%|█         | 100619/1000000 [4:36:36<33:00:27,  7.57it/s]

{'loss': Array(0.1655159, dtype=float32), 'loss_cross_entropy': Array(0.15645903, dtype=float32)}


 10%|█         | 100629/1000000 [4:36:37<25:35:03,  9.76it/s]

{'loss': Array(0.15730278, dtype=float32), 'loss_cross_entropy': Array(0.14815816, dtype=float32)}


 10%|█         | 100639/1000000 [4:36:39<23:38:47, 10.56it/s]

{'loss': Array(0.12983532, dtype=float32), 'loss_cross_entropy': Array(0.12256844, dtype=float32)}


 10%|█         | 100649/1000000 [4:36:41<30:45:39,  8.12it/s]

{'loss': Array(0.13282838, dtype=float32), 'loss_cross_entropy': Array(0.12532252, dtype=float32)}


 10%|█         | 100659/1000000 [4:36:42<24:23:21, 10.24it/s]

{'loss': Array(0.12837797, dtype=float32), 'loss_cross_entropy': Array(0.12127163, dtype=float32)}


 10%|█         | 100669/1000000 [4:36:43<23:40:13, 10.55it/s]

{'loss': Array(0.14349952, dtype=float32), 'loss_cross_entropy': Array(0.13539357, dtype=float32)}


 10%|█         | 100679/1000000 [4:36:45<27:07:44,  9.21it/s]

{'loss': Array(0.14666468, dtype=float32), 'loss_cross_entropy': Array(0.13860796, dtype=float32)}


 10%|█         | 100689/1000000 [4:36:47<24:10:41, 10.33it/s]

{'loss': Array(0.14573936, dtype=float32), 'loss_cross_entropy': Array(0.13754785, dtype=float32)}


 10%|█         | 100699/1000000 [4:36:48<29:35:13,  8.44it/s]

{'loss': Array(0.13556321, dtype=float32), 'loss_cross_entropy': Array(0.12755291, dtype=float32)}


 10%|█         | 100709/1000000 [4:36:50<23:50:03, 10.48it/s]

{'loss': Array(0.15616499, dtype=float32), 'loss_cross_entropy': Array(0.14760083, dtype=float32)}


 10%|█         | 100719/1000000 [4:36:51<36:50:25,  6.78it/s]

{'loss': Array(0.14291394, dtype=float32), 'loss_cross_entropy': Array(0.13437894, dtype=float32)}


 10%|█         | 100729/1000000 [4:36:53<25:16:55,  9.88it/s]

{'loss': Array(0.12039055, dtype=float32), 'loss_cross_entropy': Array(0.11363518, dtype=float32)}


 10%|█         | 100739/1000000 [4:36:54<23:19:47, 10.71it/s]

{'loss': Array(0.1733151, dtype=float32), 'loss_cross_entropy': Array(0.16368721, dtype=float32)}


 10%|█         | 100749/1000000 [4:36:56<33:04:43,  7.55it/s]

{'loss': Array(0.13588409, dtype=float32), 'loss_cross_entropy': Array(0.12845334, dtype=float32)}


 10%|█         | 100759/1000000 [4:36:57<24:26:29, 10.22it/s]

{'loss': Array(0.1440899, dtype=float32), 'loss_cross_entropy': Array(0.13580193, dtype=float32)}


 10%|█         | 100769/1000000 [4:36:58<23:41:53, 10.54it/s]

{'loss': Array(0.13217077, dtype=float32), 'loss_cross_entropy': Array(0.12426931, dtype=float32)}


 10%|█         | 100779/1000000 [4:37:00<28:10:29,  8.87it/s]

{'loss': Array(0.1574652, dtype=float32), 'loss_cross_entropy': Array(0.14844094, dtype=float32)}


 10%|█         | 100789/1000000 [4:37:02<24:00:03, 10.41it/s]

{'loss': Array(0.13369776, dtype=float32), 'loss_cross_entropy': Array(0.1258034, dtype=float32)}


 10%|█         | 100799/1000000 [4:37:03<29:40:57,  8.41it/s]

{'loss': Array(0.12250303, dtype=float32), 'loss_cross_entropy': Array(0.11580502, dtype=float32)}


 10%|█         | 100809/1000000 [4:37:05<23:57:18, 10.43it/s]

{'loss': Array(0.13486694, dtype=float32), 'loss_cross_entropy': Array(0.12671612, dtype=float32)}


 10%|█         | 100819/1000000 [4:37:06<23:24:36, 10.67it/s]

{'loss': Array(0.13469791, dtype=float32), 'loss_cross_entropy': Array(0.12702444, dtype=float32)}


 10%|█         | 100829/1000000 [4:37:08<26:12:57,  9.53it/s]

{'loss': Array(0.12406405, dtype=float32), 'loss_cross_entropy': Array(0.11683591, dtype=float32)}


 10%|█         | 100839/1000000 [4:37:09<24:02:30, 10.39it/s]

{'loss': Array(0.14298062, dtype=float32), 'loss_cross_entropy': Array(0.1345932, dtype=float32)}


 10%|█         | 100849/1000000 [4:37:11<33:34:35,  7.44it/s]

{'loss': Array(0.17367077, dtype=float32), 'loss_cross_entropy': Array(0.16388391, dtype=float32)}


 10%|█         | 100859/1000000 [4:37:12<25:08:24,  9.93it/s]

{'loss': Array(0.14331262, dtype=float32), 'loss_cross_entropy': Array(0.13568912, dtype=float32)}


 10%|█         | 100869/1000000 [4:37:14<25:07:34,  9.94it/s]

{'loss': Array(0.13304608, dtype=float32), 'loss_cross_entropy': Array(0.1258299, dtype=float32)}


 10%|█         | 100879/1000000 [4:37:15<27:04:47,  9.22it/s]

{'loss': Array(0.134428, dtype=float32), 'loss_cross_entropy': Array(0.12658909, dtype=float32)}


 10%|█         | 100889/1000000 [4:37:17<25:25:28,  9.82it/s]

{'loss': Array(0.13393877, dtype=float32), 'loss_cross_entropy': Array(0.12543504, dtype=float32)}


 10%|█         | 100899/1000000 [4:37:19<29:13:53,  8.54it/s]

{'loss': Array(0.13405737, dtype=float32), 'loss_cross_entropy': Array(0.12653832, dtype=float32)}


 10%|█         | 100909/1000000 [4:37:20<25:07:15,  9.94it/s]

{'loss': Array(0.14916198, dtype=float32), 'loss_cross_entropy': Array(0.14070849, dtype=float32)}


 10%|█         | 100919/1000000 [4:37:22<38:41:40,  6.45it/s]

{'loss': Array(0.15760599, dtype=float32), 'loss_cross_entropy': Array(0.14861085, dtype=float32)}


 10%|█         | 100929/1000000 [4:37:23<25:45:19,  9.70it/s]

{'loss': Array(0.15373066, dtype=float32), 'loss_cross_entropy': Array(0.14441513, dtype=float32)}


 10%|█         | 100939/1000000 [4:37:25<23:58:42, 10.42it/s]

{'loss': Array(0.1533582, dtype=float32), 'loss_cross_entropy': Array(0.14575851, dtype=float32)}


 10%|█         | 100949/1000000 [4:37:26<29:52:40,  8.36it/s]

{'loss': Array(0.14178504, dtype=float32), 'loss_cross_entropy': Array(0.13353212, dtype=float32)}


 10%|█         | 100959/1000000 [4:37:28<24:31:43, 10.18it/s]

{'loss': Array(0.15581386, dtype=float32), 'loss_cross_entropy': Array(0.14752118, dtype=float32)}


 10%|█         | 100969/1000000 [4:37:29<33:16:23,  7.51it/s]

{'loss': Array(0.15104373, dtype=float32), 'loss_cross_entropy': Array(0.14232941, dtype=float32)}


 10%|█         | 100979/1000000 [4:37:31<25:03:27,  9.97it/s]

{'loss': Array(0.15453883, dtype=float32), 'loss_cross_entropy': Array(0.14610808, dtype=float32)}


 10%|█         | 100989/1000000 [4:37:32<33:15:05,  7.51it/s]

{'loss': Array(0.14465539, dtype=float32), 'loss_cross_entropy': Array(0.13595243, dtype=float32)}


 10%|█         | 100999/1000000 [4:37:34<25:40:17,  9.73it/s]

{'loss': Array(0.11415362, dtype=float32), 'loss_cross_entropy': Array(0.10769313, dtype=float32)}


 10%|█         | 101009/1000000 [4:37:42<83:24:39,  2.99it/s] 

{'loss': Array(0.13918018, dtype=float32), 'loss_cross_entropy': Array(0.13134837, dtype=float32)}


 10%|█         | 101019/1000000 [4:37:44<38:20:10,  6.51it/s]

{'loss': Array(0.14862101, dtype=float32), 'loss_cross_entropy': Array(0.14052406, dtype=float32)}


 10%|█         | 101029/1000000 [4:37:45<25:56:31,  9.63it/s]

{'loss': Array(0.16488151, dtype=float32), 'loss_cross_entropy': Array(0.15605095, dtype=float32)}


 10%|█         | 101039/1000000 [4:37:47<33:10:47,  7.53it/s]

{'loss': Array(0.1512188, dtype=float32), 'loss_cross_entropy': Array(0.14326568, dtype=float32)}


 10%|█         | 101049/1000000 [4:37:48<24:33:06, 10.17it/s]

{'loss': Array(0.1288683, dtype=float32), 'loss_cross_entropy': Array(0.12177091, dtype=float32)}


 10%|█         | 101059/1000000 [4:37:50<36:42:04,  6.80it/s]

{'loss': Array(0.13516335, dtype=float32), 'loss_cross_entropy': Array(0.12783511, dtype=float32)}


 10%|█         | 101069/1000000 [4:37:51<25:31:14,  9.78it/s]

{'loss': Array(0.14289062, dtype=float32), 'loss_cross_entropy': Array(0.13500014, dtype=float32)}


 10%|█         | 101079/1000000 [4:37:53<23:09:41, 10.78it/s]

{'loss': Array(0.130402, dtype=float32), 'loss_cross_entropy': Array(0.12295084, dtype=float32)}


 10%|█         | 101089/1000000 [4:37:54<29:33:18,  8.45it/s]

{'loss': Array(0.11988857, dtype=float32), 'loss_cross_entropy': Array(0.11314607, dtype=float32)}


 10%|█         | 101099/1000000 [4:37:56<24:29:02, 10.20it/s]

{'loss': Array(0.14633648, dtype=float32), 'loss_cross_entropy': Array(0.13788746, dtype=float32)}


 10%|█         | 101109/1000000 [4:37:57<33:48:53,  7.38it/s]

{'loss': Array(0.15415916, dtype=float32), 'loss_cross_entropy': Array(0.14580646, dtype=float32)}


 10%|█         | 101119/1000000 [4:37:59<25:34:07,  9.77it/s]

{'loss': Array(0.14155751, dtype=float32), 'loss_cross_entropy': Array(0.13341025, dtype=float32)}


 10%|█         | 101129/1000000 [4:38:00<24:04:10, 10.37it/s]

{'loss': Array(0.13937812, dtype=float32), 'loss_cross_entropy': Array(0.13143662, dtype=float32)}


 10%|█         | 101139/1000000 [4:38:02<27:46:52,  8.99it/s]

{'loss': Array(0.16549133, dtype=float32), 'loss_cross_entropy': Array(0.156827, dtype=float32)}


 10%|█         | 101149/1000000 [4:38:03<24:45:41, 10.08it/s]

{'loss': Array(0.15733945, dtype=float32), 'loss_cross_entropy': Array(0.1489951, dtype=float32)}


 10%|█         | 101159/1000000 [4:38:05<38:19:40,  6.51it/s]

{'loss': Array(0.1436295, dtype=float32), 'loss_cross_entropy': Array(0.13499485, dtype=float32)}


 10%|█         | 101169/1000000 [4:38:07<27:03:16,  9.23it/s]

{'loss': Array(0.13350861, dtype=float32), 'loss_cross_entropy': Array(0.12497943, dtype=float32)}


 10%|█         | 101179/1000000 [4:38:08<24:47:54, 10.07it/s]

{'loss': Array(0.14294975, dtype=float32), 'loss_cross_entropy': Array(0.13463901, dtype=float32)}


 10%|█         | 101189/1000000 [4:38:10<30:57:10,  8.07it/s]

{'loss': Array(0.14109507, dtype=float32), 'loss_cross_entropy': Array(0.13296358, dtype=float32)}


 10%|█         | 101199/1000000 [4:38:11<24:34:06, 10.16it/s]

{'loss': Array(0.15070291, dtype=float32), 'loss_cross_entropy': Array(0.14297272, dtype=float32)}


 10%|█         | 101209/1000000 [4:38:13<33:19:48,  7.49it/s]

{'loss': Array(0.1393268, dtype=float32), 'loss_cross_entropy': Array(0.13098691, dtype=float32)}


 10%|█         | 101219/1000000 [4:38:14<25:04:19,  9.96it/s]

{'loss': Array(0.15917067, dtype=float32), 'loss_cross_entropy': Array(0.15014137, dtype=float32)}


 10%|█         | 101229/1000000 [4:38:16<32:20:48,  7.72it/s]

{'loss': Array(0.1615173, dtype=float32), 'loss_cross_entropy': Array(0.15280618, dtype=float32)}


 10%|█         | 101239/1000000 [4:38:17<24:49:26, 10.06it/s]

{'loss': Array(0.15195523, dtype=float32), 'loss_cross_entropy': Array(0.14337957, dtype=float32)}


 10%|█         | 101249/1000000 [4:38:19<23:37:31, 10.57it/s]

{'loss': Array(0.14647514, dtype=float32), 'loss_cross_entropy': Array(0.13844092, dtype=float32)}


 10%|█         | 101259/1000000 [4:38:20<27:49:22,  8.97it/s]

{'loss': Array(0.13574456, dtype=float32), 'loss_cross_entropy': Array(0.12779176, dtype=float32)}


 10%|█         | 101269/1000000 [4:38:22<23:34:03, 10.59it/s]

{'loss': Array(0.17316847, dtype=float32), 'loss_cross_entropy': Array(0.16379671, dtype=float32)}


 10%|█         | 101279/1000000 [4:38:24<30:18:01,  8.24it/s]

{'loss': Array(0.1435575, dtype=float32), 'loss_cross_entropy': Array(0.13629192, dtype=float32)}


 10%|█         | 101289/1000000 [4:38:25<24:07:34, 10.35it/s]

{'loss': Array(0.1435634, dtype=float32), 'loss_cross_entropy': Array(0.13568075, dtype=float32)}


 10%|█         | 101299/1000000 [4:38:27<36:27:06,  6.85it/s]

{'loss': Array(0.15261492, dtype=float32), 'loss_cross_entropy': Array(0.14377484, dtype=float32)}


 10%|█         | 101309/1000000 [4:38:28<25:27:58,  9.80it/s]

{'loss': Array(0.13946377, dtype=float32), 'loss_cross_entropy': Array(0.13172965, dtype=float32)}


 10%|█         | 101319/1000000 [4:38:30<37:04:15,  6.73it/s]

{'loss': Array(0.1521955, dtype=float32), 'loss_cross_entropy': Array(0.14355992, dtype=float32)}


 10%|█         | 101329/1000000 [4:38:31<25:38:46,  9.73it/s]

{'loss': Array(0.15212058, dtype=float32), 'loss_cross_entropy': Array(0.14397304, dtype=float32)}


 10%|█         | 101339/1000000 [4:38:32<23:24:03, 10.67it/s]

{'loss': Array(0.13562636, dtype=float32), 'loss_cross_entropy': Array(0.12764478, dtype=float32)}


 10%|█         | 101349/1000000 [4:38:34<33:00:28,  7.56it/s]

{'loss': Array(0.14405274, dtype=float32), 'loss_cross_entropy': Array(0.13598149, dtype=float32)}


 10%|█         | 101359/1000000 [4:38:35<24:17:01, 10.28it/s]

{'loss': Array(0.15365492, dtype=float32), 'loss_cross_entropy': Array(0.14479926, dtype=float32)}


 10%|█         | 101369/1000000 [4:38:37<36:25:54,  6.85it/s]

{'loss': Array(0.15111004, dtype=float32), 'loss_cross_entropy': Array(0.14294668, dtype=float32)}


 10%|█         | 101379/1000000 [4:38:39<26:25:07,  9.45it/s]

{'loss': Array(0.12396468, dtype=float32), 'loss_cross_entropy': Array(0.11694308, dtype=float32)}


 10%|█         | 101389/1000000 [4:38:40<23:46:17, 10.50it/s]

{'loss': Array(0.14066012, dtype=float32), 'loss_cross_entropy': Array(0.13266602, dtype=float32)}


 10%|█         | 101399/1000000 [4:38:42<26:01:54,  9.59it/s]

{'loss': Array(0.14140604, dtype=float32), 'loss_cross_entropy': Array(0.13360961, dtype=float32)}


 10%|█         | 101409/1000000 [4:38:43<23:20:07, 10.70it/s]

{'loss': Array(0.1374539, dtype=float32), 'loss_cross_entropy': Array(0.12922303, dtype=float32)}


 10%|█         | 101419/1000000 [4:38:45<26:18:04,  9.49it/s]

{'loss': Array(0.16442434, dtype=float32), 'loss_cross_entropy': Array(0.15561734, dtype=float32)}


 10%|█         | 101429/1000000 [4:38:46<23:17:41, 10.71it/s]

{'loss': Array(0.14266111, dtype=float32), 'loss_cross_entropy': Array(0.13433842, dtype=float32)}


 10%|█         | 101439/1000000 [4:38:48<36:14:14,  6.89it/s]

{'loss': Array(0.13150834, dtype=float32), 'loss_cross_entropy': Array(0.12407744, dtype=float32)}


 10%|█         | 101449/1000000 [4:38:49<25:00:59,  9.98it/s]

{'loss': Array(0.13825305, dtype=float32), 'loss_cross_entropy': Array(0.13089664, dtype=float32)}


 10%|█         | 101459/1000000 [4:38:50<23:26:47, 10.65it/s]

{'loss': Array(0.1454653, dtype=float32), 'loss_cross_entropy': Array(0.13722865, dtype=float32)}


 10%|█         | 101469/1000000 [4:38:52<29:28:41,  8.47it/s]

{'loss': Array(0.13473599, dtype=float32), 'loss_cross_entropy': Array(0.1268791, dtype=float32)}


 10%|█         | 101479/1000000 [4:38:53<24:12:20, 10.31it/s]

{'loss': Array(0.14709345, dtype=float32), 'loss_cross_entropy': Array(0.1389283, dtype=float32)}


 10%|█         | 101489/1000000 [4:38:55<36:31:17,  6.83it/s]

{'loss': Array(0.17710535, dtype=float32), 'loss_cross_entropy': Array(0.16788624, dtype=float32)}


 10%|█         | 101499/1000000 [4:38:56<25:33:56,  9.76it/s]

{'loss': Array(0.13640487, dtype=float32), 'loss_cross_entropy': Array(0.12887716, dtype=float32)}


 10%|█         | 101509/1000000 [4:39:04<83:25:36,  2.99it/s] 

{'loss': Array(0.15494333, dtype=float32), 'loss_cross_entropy': Array(0.14594299, dtype=float32)}


 10%|█         | 101519/1000000 [4:39:06<39:00:48,  6.40it/s]

{'loss': Array(0.14345555, dtype=float32), 'loss_cross_entropy': Array(0.13528602, dtype=float32)}


 10%|█         | 101529/1000000 [4:39:08<26:22:26,  9.46it/s]

{'loss': Array(0.13842647, dtype=float32), 'loss_cross_entropy': Array(0.13107246, dtype=float32)}


 10%|█         | 101539/1000000 [4:39:09<31:03:06,  8.04it/s]

{'loss': Array(0.14638527, dtype=float32), 'loss_cross_entropy': Array(0.13812059, dtype=float32)}


 10%|█         | 101549/1000000 [4:39:11<25:28:25,  9.80it/s]

{'loss': Array(0.14095889, dtype=float32), 'loss_cross_entropy': Array(0.13289236, dtype=float32)}


 10%|█         | 101559/1000000 [4:39:13<37:45:30,  6.61it/s]

{'loss': Array(0.13778739, dtype=float32), 'loss_cross_entropy': Array(0.12938745, dtype=float32)}


 10%|█         | 101569/1000000 [4:39:14<26:31:17,  9.41it/s]

{'loss': Array(0.14511375, dtype=float32), 'loss_cross_entropy': Array(0.13715523, dtype=float32)}


 10%|█         | 101579/1000000 [4:39:15<24:57:38, 10.00it/s]

{'loss': Array(0.14427257, dtype=float32), 'loss_cross_entropy': Array(0.13606264, dtype=float32)}


 10%|█         | 101589/1000000 [4:39:17<27:38:11,  9.03it/s]

{'loss': Array(0.13332775, dtype=float32), 'loss_cross_entropy': Array(0.12582453, dtype=float32)}


 10%|█         | 101599/1000000 [4:39:19<24:34:30, 10.15it/s]

{'loss': Array(0.1397201, dtype=float32), 'loss_cross_entropy': Array(0.1322991, dtype=float32)}


 10%|█         | 101609/1000000 [4:39:20<37:13:46,  6.70it/s]

{'loss': Array(0.16076553, dtype=float32), 'loss_cross_entropy': Array(0.15228075, dtype=float32)}


 10%|█         | 101619/1000000 [4:39:22<25:15:39,  9.88it/s]

{'loss': Array(0.15480755, dtype=float32), 'loss_cross_entropy': Array(0.14642727, dtype=float32)}


 10%|█         | 101629/1000000 [4:39:23<23:21:05, 10.69it/s]

{'loss': Array(0.14244615, dtype=float32), 'loss_cross_entropy': Array(0.13518238, dtype=float32)}


 10%|█         | 101639/1000000 [4:39:25<27:52:15,  8.95it/s]

{'loss': Array(0.15156451, dtype=float32), 'loss_cross_entropy': Array(0.1429564, dtype=float32)}


 10%|█         | 101649/1000000 [4:39:26<23:37:24, 10.56it/s]

{'loss': Array(0.1308276, dtype=float32), 'loss_cross_entropy': Array(0.12322059, dtype=float32)}


 10%|█         | 101659/1000000 [4:39:28<32:17:25,  7.73it/s]

{'loss': Array(0.16290146, dtype=float32), 'loss_cross_entropy': Array(0.1541475, dtype=float32)}


 10%|█         | 101669/1000000 [4:39:29<24:15:27, 10.29it/s]

{'loss': Array(0.15444784, dtype=float32), 'loss_cross_entropy': Array(0.14597099, dtype=float32)}


 10%|█         | 101679/1000000 [4:39:31<36:21:01,  6.86it/s]

{'loss': Array(0.16086793, dtype=float32), 'loss_cross_entropy': Array(0.15161052, dtype=float32)}


 10%|█         | 101689/1000000 [4:39:32<25:37:24,  9.74it/s]

{'loss': Array(0.17603846, dtype=float32), 'loss_cross_entropy': Array(0.166738, dtype=float32)}


 10%|█         | 101699/1000000 [4:39:34<23:42:55, 10.52it/s]

{'loss': Array(0.15372978, dtype=float32), 'loss_cross_entropy': Array(0.14531119, dtype=float32)}


 10%|█         | 101709/1000000 [4:39:35<29:16:56,  8.52it/s]

{'loss': Array(0.14714943, dtype=float32), 'loss_cross_entropy': Array(0.13892066, dtype=float32)}


 10%|█         | 101719/1000000 [4:39:37<23:52:51, 10.45it/s]

{'loss': Array(0.1446173, dtype=float32), 'loss_cross_entropy': Array(0.1367994, dtype=float32)}


 10%|█         | 101729/1000000 [4:39:38<32:34:31,  7.66it/s]

{'loss': Array(0.15917264, dtype=float32), 'loss_cross_entropy': Array(0.15035425, dtype=float32)}


 10%|█         | 101739/1000000 [4:39:40<24:20:45, 10.25it/s]

{'loss': Array(0.13834602, dtype=float32), 'loss_cross_entropy': Array(0.13071647, dtype=float32)}


 10%|█         | 101749/1000000 [4:39:41<22:56:53, 10.87it/s]

{'loss': Array(0.12719177, dtype=float32), 'loss_cross_entropy': Array(0.12001017, dtype=float32)}


 10%|█         | 101759/1000000 [4:39:43<26:46:02,  9.32it/s]

{'loss': Array(0.13859908, dtype=float32), 'loss_cross_entropy': Array(0.13032481, dtype=float32)}


 10%|█         | 101769/1000000 [4:39:44<23:53:57, 10.44it/s]

{'loss': Array(0.13701947, dtype=float32), 'loss_cross_entropy': Array(0.12919669, dtype=float32)}


 10%|█         | 101779/1000000 [4:39:46<36:37:19,  6.81it/s]

{'loss': Array(0.17526744, dtype=float32), 'loss_cross_entropy': Array(0.16581488, dtype=float32)}


 10%|█         | 101789/1000000 [4:39:47<25:24:11,  9.82it/s]

{'loss': Array(0.15440245, dtype=float32), 'loss_cross_entropy': Array(0.14567189, dtype=float32)}


 10%|█         | 101799/1000000 [4:39:48<23:14:43, 10.73it/s]

{'loss': Array(0.13289818, dtype=float32), 'loss_cross_entropy': Array(0.12508434, dtype=float32)}


 10%|█         | 101809/1000000 [4:39:50<29:17:59,  8.52it/s]

{'loss': Array(0.14915664, dtype=float32), 'loss_cross_entropy': Array(0.14103118, dtype=float32)}


 10%|█         | 101819/1000000 [4:39:51<23:59:32, 10.40it/s]

{'loss': Array(0.15383981, dtype=float32), 'loss_cross_entropy': Array(0.1454865, dtype=float32)}


 10%|█         | 101829/1000000 [4:39:53<32:08:38,  7.76it/s]

{'loss': Array(0.14997075, dtype=float32), 'loss_cross_entropy': Array(0.14155851, dtype=float32)}


 10%|█         | 101839/1000000 [4:39:54<24:28:55, 10.19it/s]

{'loss': Array(0.11323617, dtype=float32), 'loss_cross_entropy': Array(0.10612115, dtype=float32)}


 10%|█         | 101849/1000000 [4:39:56<32:40:25,  7.64it/s]

{'loss': Array(0.13466166, dtype=float32), 'loss_cross_entropy': Array(0.12716286, dtype=float32)}


 10%|█         | 101859/1000000 [4:39:58<25:01:45,  9.97it/s]

{'loss': Array(0.13683583, dtype=float32), 'loss_cross_entropy': Array(0.12897256, dtype=float32)}


 10%|█         | 101869/1000000 [4:39:59<23:27:44, 10.63it/s]

{'loss': Array(0.1376922, dtype=float32), 'loss_cross_entropy': Array(0.12987816, dtype=float32)}


 10%|█         | 101879/1000000 [4:40:01<28:41:11,  8.70it/s]

{'loss': Array(0.16354169, dtype=float32), 'loss_cross_entropy': Array(0.15450768, dtype=float32)}


 10%|█         | 101889/1000000 [4:40:02<24:56:17, 10.00it/s]

{'loss': Array(0.13723235, dtype=float32), 'loss_cross_entropy': Array(0.13000895, dtype=float32)}


 10%|█         | 101899/1000000 [4:40:04<31:13:15,  7.99it/s]

{'loss': Array(0.15816982, dtype=float32), 'loss_cross_entropy': Array(0.14938083, dtype=float32)}


 10%|█         | 101909/1000000 [4:40:05<25:31:31,  9.77it/s]

{'loss': Array(0.15528369, dtype=float32), 'loss_cross_entropy': Array(0.14692788, dtype=float32)}


 10%|█         | 101919/1000000 [4:40:07<38:20:58,  6.51it/s]

{'loss': Array(0.15491088, dtype=float32), 'loss_cross_entropy': Array(0.14680368, dtype=float32)}


 10%|█         | 101929/1000000 [4:40:09<27:50:30,  8.96it/s]

{'loss': Array(0.14718434, dtype=float32), 'loss_cross_entropy': Array(0.13887426, dtype=float32)}


 10%|█         | 101939/1000000 [4:40:10<38:52:43,  6.42it/s]

{'loss': Array(0.15836962, dtype=float32), 'loss_cross_entropy': Array(0.1500785, dtype=float32)}


 10%|█         | 101949/1000000 [4:40:12<25:55:51,  9.62it/s]

{'loss': Array(0.14541028, dtype=float32), 'loss_cross_entropy': Array(0.13689874, dtype=float32)}


 10%|█         | 101959/1000000 [4:40:13<23:54:19, 10.44it/s]

{'loss': Array(0.14813015, dtype=float32), 'loss_cross_entropy': Array(0.13928631, dtype=float32)}


 10%|█         | 101969/1000000 [4:40:15<32:28:57,  7.68it/s]

{'loss': Array(0.13357809, dtype=float32), 'loss_cross_entropy': Array(0.12549074, dtype=float32)}


 10%|█         | 101979/1000000 [4:40:16<24:27:36, 10.20it/s]

{'loss': Array(0.1378989, dtype=float32), 'loss_cross_entropy': Array(0.13056543, dtype=float32)}


 10%|█         | 101989/1000000 [4:40:18<36:26:05,  6.85it/s]

{'loss': Array(0.1386971, dtype=float32), 'loss_cross_entropy': Array(0.13091496, dtype=float32)}


 10%|█         | 101999/1000000 [4:40:19<25:24:44,  9.82it/s]

{'loss': Array(0.16446061, dtype=float32), 'loss_cross_entropy': Array(0.15530966, dtype=float32)}


 10%|█         | 102009/1000000 [4:40:27<80:50:44,  3.09it/s] 

{'loss': Array(0.15554166, dtype=float32), 'loss_cross_entropy': Array(0.14756575, dtype=float32)}


 10%|█         | 102019/1000000 [4:40:29<35:53:17,  6.95it/s] 

{'loss': Array(0.1347889, dtype=float32), 'loss_cross_entropy': Array(0.12728433, dtype=float32)}


 10%|█         | 102029/1000000 [4:40:30<25:13:40,  9.89it/s]

{'loss': Array(0.16743632, dtype=float32), 'loss_cross_entropy': Array(0.15784648, dtype=float32)}


 10%|█         | 102039/1000000 [4:40:32<26:24:07,  9.45it/s]

{'loss': Array(0.15516062, dtype=float32), 'loss_cross_entropy': Array(0.14629203, dtype=float32)}


 10%|█         | 102049/1000000 [4:40:33<23:15:22, 10.73it/s]

{'loss': Array(0.16593122, dtype=float32), 'loss_cross_entropy': Array(0.15666893, dtype=float32)}


 10%|█         | 102059/1000000 [4:40:35<36:26:36,  6.84it/s]

{'loss': Array(0.14257376, dtype=float32), 'loss_cross_entropy': Array(0.13448279, dtype=float32)}


 10%|█         | 102069/1000000 [4:40:36<25:56:44,  9.61it/s]

{'loss': Array(0.14499043, dtype=float32), 'loss_cross_entropy': Array(0.13670418, dtype=float32)}


 10%|█         | 102079/1000000 [4:40:37<23:43:54, 10.51it/s]

{'loss': Array(0.14479685, dtype=float32), 'loss_cross_entropy': Array(0.13669181, dtype=float32)}


 10%|█         | 102089/1000000 [4:40:39<29:29:59,  8.45it/s]

{'loss': Array(0.15144531, dtype=float32), 'loss_cross_entropy': Array(0.1433224, dtype=float32)}


 10%|█         | 102099/1000000 [4:40:40<24:17:07, 10.27it/s]

{'loss': Array(0.14033376, dtype=float32), 'loss_cross_entropy': Array(0.13225582, dtype=float32)}


 10%|█         | 102109/1000000 [4:40:42<36:26:51,  6.84it/s]

{'loss': Array(0.1619219, dtype=float32), 'loss_cross_entropy': Array(0.15315321, dtype=float32)}


 10%|█         | 102119/1000000 [4:40:44<25:21:18,  9.84it/s]

{'loss': Array(0.1462436, dtype=float32), 'loss_cross_entropy': Array(0.13807891, dtype=float32)}


 10%|█         | 102129/1000000 [4:40:45<23:08:45, 10.78it/s]

{'loss': Array(0.14306153, dtype=float32), 'loss_cross_entropy': Array(0.1352788, dtype=float32)}


 10%|█         | 102139/1000000 [4:40:47<27:57:12,  8.92it/s]

{'loss': Array(0.1323574, dtype=float32), 'loss_cross_entropy': Array(0.12500583, dtype=float32)}


 10%|█         | 102149/1000000 [4:40:48<23:45:44, 10.50it/s]

{'loss': Array(0.1474804, dtype=float32), 'loss_cross_entropy': Array(0.13918619, dtype=float32)}


 10%|█         | 102159/1000000 [4:40:50<32:50:22,  7.59it/s]

{'loss': Array(0.14823492, dtype=float32), 'loss_cross_entropy': Array(0.13973038, dtype=float32)}


 10%|█         | 102169/1000000 [4:40:51<25:04:50,  9.94it/s]

{'loss': Array(0.15581785, dtype=float32), 'loss_cross_entropy': Array(0.14768748, dtype=float32)}


 10%|█         | 102179/1000000 [4:40:52<23:11:20, 10.75it/s]

{'loss': Array(0.14353135, dtype=float32), 'loss_cross_entropy': Array(0.1355262, dtype=float32)}


 10%|█         | 102189/1000000 [4:40:54<29:35:18,  8.43it/s]

{'loss': Array(0.14286612, dtype=float32), 'loss_cross_entropy': Array(0.13480419, dtype=float32)}


 10%|█         | 102199/1000000 [4:40:55<24:17:50, 10.26it/s]

{'loss': Array(0.14589377, dtype=float32), 'loss_cross_entropy': Array(0.13769658, dtype=float32)}


 10%|█         | 102209/1000000 [4:40:57<23:12:24, 10.75it/s]

{'loss': Array(0.1199716, dtype=float32), 'loss_cross_entropy': Array(0.11274512, dtype=float32)}


 10%|█         | 102219/1000000 [4:40:58<26:09:30,  9.53it/s]

{'loss': Array(0.13806215, dtype=float32), 'loss_cross_entropy': Array(0.13028042, dtype=float32)}


 10%|█         | 102229/1000000 [4:41:00<24:26:33, 10.20it/s]

{'loss': Array(0.13100916, dtype=float32), 'loss_cross_entropy': Array(0.1236065, dtype=float32)}


 10%|█         | 102239/1000000 [4:41:02<31:12:49,  7.99it/s]

{'loss': Array(0.14574039, dtype=float32), 'loss_cross_entropy': Array(0.13717884, dtype=float32)}


 10%|█         | 102249/1000000 [4:41:03<25:08:06,  9.92it/s]

{'loss': Array(0.14747925, dtype=float32), 'loss_cross_entropy': Array(0.13936226, dtype=float32)}


 10%|█         | 102259/1000000 [4:41:05<37:58:44,  6.57it/s]

{'loss': Array(0.15283905, dtype=float32), 'loss_cross_entropy': Array(0.14437057, dtype=float32)}


 10%|█         | 102269/1000000 [4:41:06<26:46:59,  9.31it/s]

{'loss': Array(0.14881693, dtype=float32), 'loss_cross_entropy': Array(0.14080966, dtype=float32)}


 10%|█         | 102279/1000000 [4:41:08<38:44:10,  6.44it/s]

{'loss': Array(0.14571434, dtype=float32), 'loss_cross_entropy': Array(0.13741998, dtype=float32)}


 10%|█         | 102289/1000000 [4:41:09<26:46:43,  9.31it/s]

{'loss': Array(0.14376786, dtype=float32), 'loss_cross_entropy': Array(0.13598041, dtype=float32)}


 10%|█         | 102299/1000000 [4:41:11<25:05:40,  9.94it/s]

{'loss': Array(0.12036234, dtype=float32), 'loss_cross_entropy': Array(0.11283933, dtype=float32)}


 10%|█         | 102309/1000000 [4:41:13<32:31:15,  7.67it/s]

{'loss': Array(0.146035, dtype=float32), 'loss_cross_entropy': Array(0.13824753, dtype=float32)}


 10%|█         | 102319/1000000 [4:41:14<24:30:50, 10.17it/s]

{'loss': Array(0.15263774, dtype=float32), 'loss_cross_entropy': Array(0.14424552, dtype=float32)}


 10%|█         | 102329/1000000 [4:41:16<37:15:28,  6.69it/s]

{'loss': Array(0.14843169, dtype=float32), 'loss_cross_entropy': Array(0.13970615, dtype=float32)}


 10%|█         | 102339/1000000 [4:41:17<26:14:50,  9.50it/s]

{'loss': Array(0.16446613, dtype=float32), 'loss_cross_entropy': Array(0.1555881, dtype=float32)}


 10%|█         | 102349/1000000 [4:41:18<24:21:29, 10.24it/s]

{'loss': Array(0.13872932, dtype=float32), 'loss_cross_entropy': Array(0.13043502, dtype=float32)}


 10%|█         | 102359/1000000 [4:41:20<26:42:24,  9.34it/s]

{'loss': Array(0.13457988, dtype=float32), 'loss_cross_entropy': Array(0.12670276, dtype=float32)}


 10%|█         | 102369/1000000 [4:41:22<24:18:26, 10.26it/s]

{'loss': Array(0.137522, dtype=float32), 'loss_cross_entropy': Array(0.12986635, dtype=float32)}


 10%|█         | 102379/1000000 [4:41:23<27:30:02,  9.07it/s]

{'loss': Array(0.14927317, dtype=float32), 'loss_cross_entropy': Array(0.14181323, dtype=float32)}


 10%|█         | 102389/1000000 [4:41:25<23:46:17, 10.49it/s]

{'loss': Array(0.13490547, dtype=float32), 'loss_cross_entropy': Array(0.12778763, dtype=float32)}


 10%|█         | 102399/1000000 [4:41:26<36:28:36,  6.84it/s]

{'loss': Array(0.13826735, dtype=float32), 'loss_cross_entropy': Array(0.13087401, dtype=float32)}


 10%|█         | 102409/1000000 [4:41:28<24:51:12, 10.03it/s]

{'loss': Array(0.15236181, dtype=float32), 'loss_cross_entropy': Array(0.14464721, dtype=float32)}


 10%|█         | 102419/1000000 [4:41:29<23:33:04, 10.59it/s]

{'loss': Array(0.13976659, dtype=float32), 'loss_cross_entropy': Array(0.13181388, dtype=float32)}


 10%|█         | 102429/1000000 [4:41:31<30:11:13,  8.26it/s]

{'loss': Array(0.13914305, dtype=float32), 'loss_cross_entropy': Array(0.13132998, dtype=float32)}


 10%|█         | 102439/1000000 [4:41:32<24:17:38, 10.26it/s]

{'loss': Array(0.14448832, dtype=float32), 'loss_cross_entropy': Array(0.13618915, dtype=float32)}


 10%|█         | 102449/1000000 [4:41:34<36:26:07,  6.84it/s]

{'loss': Array(0.14587753, dtype=float32), 'loss_cross_entropy': Array(0.13787043, dtype=float32)}


 10%|█         | 102459/1000000 [4:41:35<24:55:02, 10.01it/s]

{'loss': Array(0.14151934, dtype=float32), 'loss_cross_entropy': Array(0.13358279, dtype=float32)}


 10%|█         | 102469/1000000 [4:41:37<23:02:24, 10.82it/s]

{'loss': Array(0.14492764, dtype=float32), 'loss_cross_entropy': Array(0.13653013, dtype=float32)}


 10%|█         | 102479/1000000 [4:41:38<27:43:07,  8.99it/s]

{'loss': Array(0.12989713, dtype=float32), 'loss_cross_entropy': Array(0.12297716, dtype=float32)}


 10%|█         | 102489/1000000 [4:41:40<23:55:42, 10.42it/s]

{'loss': Array(0.13095431, dtype=float32), 'loss_cross_entropy': Array(0.12342554, dtype=float32)}


 10%|█         | 102499/1000000 [4:41:41<32:31:30,  7.67it/s]

{'loss': Array(0.13073604, dtype=float32), 'loss_cross_entropy': Array(0.12306892, dtype=float32)}


 10%|█         | 102509/1000000 [4:41:49<82:38:17,  3.02it/s] 

{'loss': Array(0.14356565, dtype=float32), 'loss_cross_entropy': Array(0.1355531, dtype=float32)}


 10%|█         | 102519/1000000 [4:41:51<42:19:09,  5.89it/s]

{'loss': Array(0.14992985, dtype=float32), 'loss_cross_entropy': Array(0.14157043, dtype=float32)}


 10%|█         | 102529/1000000 [4:41:52<26:11:40,  9.52it/s]

{'loss': Array(0.16134857, dtype=float32), 'loss_cross_entropy': Array(0.15189414, dtype=float32)}


 10%|█         | 102539/1000000 [4:41:54<23:42:28, 10.52it/s]

{'loss': Array(0.1400129, dtype=float32), 'loss_cross_entropy': Array(0.13212594, dtype=float32)}


 10%|█         | 102549/1000000 [4:41:55<29:37:48,  8.41it/s]

{'loss': Array(0.1431058, dtype=float32), 'loss_cross_entropy': Array(0.13449712, dtype=float32)}


 10%|█         | 102559/1000000 [4:41:57<24:08:19, 10.33it/s]

{'loss': Array(0.15605429, dtype=float32), 'loss_cross_entropy': Array(0.14743328, dtype=float32)}


 10%|█         | 102569/1000000 [4:41:58<23:18:13, 10.70it/s]

{'loss': Array(0.13679557, dtype=float32), 'loss_cross_entropy': Array(0.12869243, dtype=float32)}


 10%|█         | 102579/1000000 [4:42:00<26:32:37,  9.39it/s]

{'loss': Array(0.14666913, dtype=float32), 'loss_cross_entropy': Array(0.1389491, dtype=float32)}


 10%|█         | 102589/1000000 [4:42:01<24:57:44,  9.99it/s]

{'loss': Array(0.15390824, dtype=float32), 'loss_cross_entropy': Array(0.14498855, dtype=float32)}


 10%|█         | 102599/1000000 [4:42:03<31:03:17,  8.03it/s]

{'loss': Array(0.1473705, dtype=float32), 'loss_cross_entropy': Array(0.13882993, dtype=float32)}


 10%|█         | 102609/1000000 [4:42:04<25:19:57,  9.84it/s]

{'loss': Array(0.13657828, dtype=float32), 'loss_cross_entropy': Array(0.12845968, dtype=float32)}


 10%|█         | 102619/1000000 [4:42:06<34:10:57,  7.29it/s]

{'loss': Array(0.14982723, dtype=float32), 'loss_cross_entropy': Array(0.14170186, dtype=float32)}


 10%|█         | 102629/1000000 [4:42:07<26:18:00,  9.48it/s]

{'loss': Array(0.15628475, dtype=float32), 'loss_cross_entropy': Array(0.14720233, dtype=float32)}


 10%|█         | 102639/1000000 [4:42:09<24:51:10, 10.03it/s]

{'loss': Array(0.12749673, dtype=float32), 'loss_cross_entropy': Array(0.12023498, dtype=float32)}


 10%|█         | 102649/1000000 [4:42:11<30:50:26,  8.08it/s]

{'loss': Array(0.13663761, dtype=float32), 'loss_cross_entropy': Array(0.12891057, dtype=float32)}


 10%|█         | 102659/1000000 [4:42:12<24:31:14, 10.17it/s]

{'loss': Array(0.16811092, dtype=float32), 'loss_cross_entropy': Array(0.15893196, dtype=float32)}


 10%|█         | 102669/1000000 [4:42:13<23:39:14, 10.54it/s]

{'loss': Array(0.15200658, dtype=float32), 'loss_cross_entropy': Array(0.14292563, dtype=float32)}


 10%|█         | 102679/1000000 [4:42:15<26:56:47,  9.25it/s]

{'loss': Array(0.12372489, dtype=float32), 'loss_cross_entropy': Array(0.11604786, dtype=float32)}


 10%|█         | 102689/1000000 [4:42:17<23:49:44, 10.46it/s]

{'loss': Array(0.14600785, dtype=float32), 'loss_cross_entropy': Array(0.13783072, dtype=float32)}


 10%|█         | 102699/1000000 [4:42:18<29:48:16,  8.36it/s]

{'loss': Array(0.15421511, dtype=float32), 'loss_cross_entropy': Array(0.14596812, dtype=float32)}


 10%|█         | 102709/1000000 [4:42:20<24:14:53, 10.28it/s]

{'loss': Array(0.15116654, dtype=float32), 'loss_cross_entropy': Array(0.14255834, dtype=float32)}


 10%|█         | 102719/1000000 [4:42:21<37:33:52,  6.64it/s]

{'loss': Array(0.13411143, dtype=float32), 'loss_cross_entropy': Array(0.1261702, dtype=float32)}


 10%|█         | 102729/1000000 [4:42:23<25:17:03,  9.86it/s]

{'loss': Array(0.13145874, dtype=float32), 'loss_cross_entropy': Array(0.12410531, dtype=float32)}


 10%|█         | 102739/1000000 [4:42:24<23:56:49, 10.41it/s]

{'loss': Array(0.15406404, dtype=float32), 'loss_cross_entropy': Array(0.14502814, dtype=float32)}


 10%|█         | 102749/1000000 [4:42:26<32:25:09,  7.69it/s]

{'loss': Array(0.1565803, dtype=float32), 'loss_cross_entropy': Array(0.14830272, dtype=float32)}


 10%|█         | 102759/1000000 [4:42:27<25:24:20,  9.81it/s]

{'loss': Array(0.14308459, dtype=float32), 'loss_cross_entropy': Array(0.13503055, dtype=float32)}


 10%|█         | 102769/1000000 [4:42:29<24:21:46, 10.23it/s]

{'loss': Array(0.1576145, dtype=float32), 'loss_cross_entropy': Array(0.1486958, dtype=float32)}


 10%|█         | 102779/1000000 [4:42:30<27:54:34,  8.93it/s]

{'loss': Array(0.12976953, dtype=float32), 'loss_cross_entropy': Array(0.12213279, dtype=float32)}


 10%|█         | 102789/1000000 [4:42:32<23:27:13, 10.63it/s]

{'loss': Array(0.13217731, dtype=float32), 'loss_cross_entropy': Array(0.12499326, dtype=float32)}


 10%|█         | 102799/1000000 [4:42:33<30:30:30,  8.17it/s]

{'loss': Array(0.13612114, dtype=float32), 'loss_cross_entropy': Array(0.12844113, dtype=float32)}


 10%|█         | 102809/1000000 [4:42:35<24:21:41, 10.23it/s]

{'loss': Array(0.1490009, dtype=float32), 'loss_cross_entropy': Array(0.14012264, dtype=float32)}


 10%|█         | 102819/1000000 [4:42:36<23:25:37, 10.64it/s]

{'loss': Array(0.13234039, dtype=float32), 'loss_cross_entropy': Array(0.12445926, dtype=float32)}


 10%|█         | 102829/1000000 [4:42:38<26:20:33,  9.46it/s]

{'loss': Array(0.14913557, dtype=float32), 'loss_cross_entropy': Array(0.14064963, dtype=float32)}


 10%|█         | 102839/1000000 [4:42:39<23:33:48, 10.58it/s]

{'loss': Array(0.1426137, dtype=float32), 'loss_cross_entropy': Array(0.13488871, dtype=float32)}


 10%|█         | 102849/1000000 [4:42:41<32:47:16,  7.60it/s]

{'loss': Array(0.15266578, dtype=float32), 'loss_cross_entropy': Array(0.14410037, dtype=float32)}


 10%|█         | 102859/1000000 [4:42:42<25:11:01,  9.90it/s]

{'loss': Array(0.15632169, dtype=float32), 'loss_cross_entropy': Array(0.14688335, dtype=float32)}


 10%|█         | 102869/1000000 [4:42:44<23:36:06, 10.56it/s]

{'loss': Array(0.16319218, dtype=float32), 'loss_cross_entropy': Array(0.15460184, dtype=float32)}


 10%|█         | 102879/1000000 [4:42:45<26:01:42,  9.57it/s]

{'loss': Array(0.15322779, dtype=float32), 'loss_cross_entropy': Array(0.14409633, dtype=float32)}


 10%|█         | 102889/1000000 [4:42:47<23:12:57, 10.73it/s]

{'loss': Array(0.16382861, dtype=float32), 'loss_cross_entropy': Array(0.15481727, dtype=float32)}


 10%|█         | 102899/1000000 [4:42:48<27:47:52,  8.96it/s]

{'loss': Array(0.13633536, dtype=float32), 'loss_cross_entropy': Array(0.1283416, dtype=float32)}


 10%|█         | 102909/1000000 [4:42:50<23:25:49, 10.64it/s]

{'loss': Array(0.13745221, dtype=float32), 'loss_cross_entropy': Array(0.12922394, dtype=float32)}


 10%|█         | 102919/1000000 [4:42:51<36:15:26,  6.87it/s]

{'loss': Array(0.13678516, dtype=float32), 'loss_cross_entropy': Array(0.12932788, dtype=float32)}


 10%|█         | 102929/1000000 [4:42:53<24:55:58,  9.99it/s]

{'loss': Array(0.15639962, dtype=float32), 'loss_cross_entropy': Array(0.14775714, dtype=float32)}


 10%|█         | 102939/1000000 [4:42:54<23:12:59, 10.73it/s]

{'loss': Array(0.14324592, dtype=float32), 'loss_cross_entropy': Array(0.13460188, dtype=float32)}


 10%|█         | 102949/1000000 [4:42:56<29:12:19,  8.53it/s]

{'loss': Array(0.15878193, dtype=float32), 'loss_cross_entropy': Array(0.14967684, dtype=float32)}


 10%|█         | 102959/1000000 [4:42:57<23:54:24, 10.42it/s]

{'loss': Array(0.1436738, dtype=float32), 'loss_cross_entropy': Array(0.1358044, dtype=float32)}


 10%|█         | 102969/1000000 [4:42:59<32:08:15,  7.75it/s]

{'loss': Array(0.15610224, dtype=float32), 'loss_cross_entropy': Array(0.14745268, dtype=float32)}


 10%|█         | 102979/1000000 [4:43:00<25:12:23,  9.89it/s]

{'loss': Array(0.15384588, dtype=float32), 'loss_cross_entropy': Array(0.14560401, dtype=float32)}


 10%|█         | 102989/1000000 [4:43:02<33:14:45,  7.49it/s]

{'loss': Array(0.14392781, dtype=float32), 'loss_cross_entropy': Array(0.13638367, dtype=float32)}


 10%|█         | 102999/1000000 [4:43:03<25:12:20,  9.89it/s]

{'loss': Array(0.12179603, dtype=float32), 'loss_cross_entropy': Array(0.11462169, dtype=float32)}


 10%|█         | 103009/1000000 [4:43:11<83:22:22,  2.99it/s] 

{'loss': Array(0.14544466, dtype=float32), 'loss_cross_entropy': Array(0.1373362, dtype=float32)}


 10%|█         | 103019/1000000 [4:43:13<38:31:45,  6.47it/s]

{'loss': Array(0.1441922, dtype=float32), 'loss_cross_entropy': Array(0.13592277, dtype=float32)}


 10%|█         | 103029/1000000 [4:43:14<26:31:59,  9.39it/s]

{'loss': Array(0.14281796, dtype=float32), 'loss_cross_entropy': Array(0.13502303, dtype=float32)}


 10%|█         | 103039/1000000 [4:43:16<34:37:08,  7.20it/s]

{'loss': Array(0.14557773, dtype=float32), 'loss_cross_entropy': Array(0.13762356, dtype=float32)}


 10%|█         | 103049/1000000 [4:43:18<25:10:38,  9.90it/s]

{'loss': Array(0.14322579, dtype=float32), 'loss_cross_entropy': Array(0.13522546, dtype=float32)}


 10%|█         | 103059/1000000 [4:43:19<37:41:41,  6.61it/s]

{'loss': Array(0.15034781, dtype=float32), 'loss_cross_entropy': Array(0.14147508, dtype=float32)}


 10%|█         | 103069/1000000 [4:43:21<25:59:43,  9.58it/s]

{'loss': Array(0.13241582, dtype=float32), 'loss_cross_entropy': Array(0.12491967, dtype=float32)}


 10%|█         | 103079/1000000 [4:43:22<23:23:53, 10.65it/s]

{'loss': Array(0.14010648, dtype=float32), 'loss_cross_entropy': Array(0.13254726, dtype=float32)}


 10%|█         | 103089/1000000 [4:43:24<29:12:22,  8.53it/s]

{'loss': Array(0.1360421, dtype=float32), 'loss_cross_entropy': Array(0.128412, dtype=float32)}


 10%|█         | 103099/1000000 [4:43:25<24:04:03, 10.35it/s]

{'loss': Array(0.1457287, dtype=float32), 'loss_cross_entropy': Array(0.13717161, dtype=float32)}


 10%|█         | 103109/1000000 [4:43:27<32:29:29,  7.67it/s]

{'loss': Array(0.16187488, dtype=float32), 'loss_cross_entropy': Array(0.15243323, dtype=float32)}


 10%|█         | 103119/1000000 [4:43:28<24:14:57, 10.27it/s]

{'loss': Array(0.15701364, dtype=float32), 'loss_cross_entropy': Array(0.1481045, dtype=float32)}


 10%|█         | 103129/1000000 [4:43:29<23:17:59, 10.69it/s]

{'loss': Array(0.13478616, dtype=float32), 'loss_cross_entropy': Array(0.12741391, dtype=float32)}


 10%|█         | 103139/1000000 [4:43:31<26:05:42,  9.55it/s]

{'loss': Array(0.14661878, dtype=float32), 'loss_cross_entropy': Array(0.1383109, dtype=float32)}


 10%|█         | 103149/1000000 [4:43:33<23:45:24, 10.49it/s]

{'loss': Array(0.14848268, dtype=float32), 'loss_cross_entropy': Array(0.1403956, dtype=float32)}


 10%|█         | 103159/1000000 [4:43:34<36:11:11,  6.88it/s]

{'loss': Array(0.14256413, dtype=float32), 'loss_cross_entropy': Array(0.1342293, dtype=float32)}


 10%|█         | 103169/1000000 [4:43:36<25:32:19,  9.75it/s]

{'loss': Array(0.14699434, dtype=float32), 'loss_cross_entropy': Array(0.13880175, dtype=float32)}


 10%|█         | 103179/1000000 [4:43:37<23:12:42, 10.73it/s]

{'loss': Array(0.13367172, dtype=float32), 'loss_cross_entropy': Array(0.12621944, dtype=float32)}


 10%|█         | 103189/1000000 [4:43:39<29:45:23,  8.37it/s]

{'loss': Array(0.14455509, dtype=float32), 'loss_cross_entropy': Array(0.13660888, dtype=float32)}


 10%|█         | 103199/1000000 [4:43:40<24:48:34, 10.04it/s]

{'loss': Array(0.14078961, dtype=float32), 'loss_cross_entropy': Array(0.1323864, dtype=float32)}


 10%|█         | 103209/1000000 [4:43:42<32:38:39,  7.63it/s]

{'loss': Array(0.14818214, dtype=float32), 'loss_cross_entropy': Array(0.14053582, dtype=float32)}


 10%|█         | 103219/1000000 [4:43:43<24:20:59, 10.23it/s]

{'loss': Array(0.149012, dtype=float32), 'loss_cross_entropy': Array(0.140657, dtype=float32)}


 10%|█         | 103229/1000000 [4:43:45<32:26:18,  7.68it/s]

{'loss': Array(0.13909379, dtype=float32), 'loss_cross_entropy': Array(0.13116148, dtype=float32)}


 10%|█         | 103239/1000000 [4:43:46<24:42:38, 10.08it/s]

{'loss': Array(0.13824774, dtype=float32), 'loss_cross_entropy': Array(0.13064972, dtype=float32)}


 10%|█         | 103249/1000000 [4:43:48<23:39:24, 10.53it/s]

{'loss': Array(0.14582752, dtype=float32), 'loss_cross_entropy': Array(0.1376786, dtype=float32)}


 10%|█         | 103259/1000000 [4:43:49<27:52:43,  8.93it/s]

{'loss': Array(0.13241588, dtype=float32), 'loss_cross_entropy': Array(0.12513998, dtype=float32)}


 10%|█         | 103269/1000000 [4:43:51<23:46:15, 10.48it/s]

{'loss': Array(0.12506232, dtype=float32), 'loss_cross_entropy': Array(0.1179587, dtype=float32)}


 10%|█         | 103279/1000000 [4:43:52<30:01:27,  8.30it/s]

{'loss': Array(0.14644091, dtype=float32), 'loss_cross_entropy': Array(0.13801982, dtype=float32)}


 10%|█         | 103289/1000000 [4:43:54<23:43:26, 10.50it/s]

{'loss': Array(0.14663605, dtype=float32), 'loss_cross_entropy': Array(0.13814686, dtype=float32)}


 10%|█         | 103299/1000000 [4:43:55<36:39:11,  6.80it/s]

{'loss': Array(0.15598848, dtype=float32), 'loss_cross_entropy': Array(0.14742294, dtype=float32)}


 10%|█         | 103309/1000000 [4:43:57<25:31:07,  9.76it/s]

{'loss': Array(0.13754469, dtype=float32), 'loss_cross_entropy': Array(0.13001753, dtype=float32)}


 10%|█         | 103319/1000000 [4:43:58<36:49:00,  6.77it/s]

{'loss': Array(0.14335036, dtype=float32), 'loss_cross_entropy': Array(0.13576138, dtype=float32)}


 10%|█         | 103329/1000000 [4:44:00<25:58:39,  9.59it/s]

{'loss': Array(0.15098576, dtype=float32), 'loss_cross_entropy': Array(0.14253427, dtype=float32)}


 10%|█         | 103339/1000000 [4:44:01<24:54:54, 10.00it/s]

{'loss': Array(0.14649366, dtype=float32), 'loss_cross_entropy': Array(0.1390922, dtype=float32)}


 10%|█         | 103349/1000000 [4:44:03<33:46:43,  7.37it/s]

{'loss': Array(0.13805144, dtype=float32), 'loss_cross_entropy': Array(0.13059743, dtype=float32)}


 10%|█         | 103359/1000000 [4:44:04<26:01:03,  9.57it/s]

{'loss': Array(0.15126783, dtype=float32), 'loss_cross_entropy': Array(0.14272702, dtype=float32)}


 10%|█         | 103369/1000000 [4:44:06<37:46:00,  6.59it/s]

{'loss': Array(0.12776813, dtype=float32), 'loss_cross_entropy': Array(0.12030939, dtype=float32)}


 10%|█         | 103379/1000000 [4:44:08<27:02:53,  9.21it/s]

{'loss': Array(0.1373847, dtype=float32), 'loss_cross_entropy': Array(0.12947509, dtype=float32)}


 10%|█         | 103389/1000000 [4:44:09<24:56:16,  9.99it/s]

{'loss': Array(0.12509711, dtype=float32), 'loss_cross_entropy': Array(0.11798406, dtype=float32)}


 10%|█         | 103399/1000000 [4:44:11<28:15:14,  8.81it/s]

{'loss': Array(0.16054729, dtype=float32), 'loss_cross_entropy': Array(0.15180878, dtype=float32)}


 10%|█         | 103409/1000000 [4:44:12<24:14:23, 10.27it/s]

{'loss': Array(0.13328877, dtype=float32), 'loss_cross_entropy': Array(0.12558186, dtype=float32)}


 10%|█         | 103419/1000000 [4:44:14<26:39:38,  9.34it/s]

{'loss': Array(0.15362166, dtype=float32), 'loss_cross_entropy': Array(0.14486668, dtype=float32)}


 10%|█         | 103429/1000000 [4:44:15<23:45:03, 10.49it/s]

{'loss': Array(0.16024029, dtype=float32), 'loss_cross_entropy': Array(0.1517043, dtype=float32)}


 10%|█         | 103439/1000000 [4:44:17<36:56:50,  6.74it/s]

{'loss': Array(0.15969454, dtype=float32), 'loss_cross_entropy': Array(0.15140335, dtype=float32)}


 10%|█         | 103449/1000000 [4:44:19<25:52:36,  9.62it/s]

{'loss': Array(0.16731794, dtype=float32), 'loss_cross_entropy': Array(0.15828608, dtype=float32)}


 10%|█         | 103459/1000000 [4:44:20<22:55:44, 10.86it/s]

{'loss': Array(0.14290504, dtype=float32), 'loss_cross_entropy': Array(0.13469116, dtype=float32)}


 10%|█         | 103469/1000000 [4:44:22<29:45:14,  8.37it/s]

{'loss': Array(0.14534728, dtype=float32), 'loss_cross_entropy': Array(0.13755237, dtype=float32)}


 10%|█         | 103479/1000000 [4:44:23<24:01:55, 10.36it/s]

{'loss': Array(0.16442814, dtype=float32), 'loss_cross_entropy': Array(0.15548633, dtype=float32)}


 10%|█         | 103489/1000000 [4:44:25<37:52:52,  6.57it/s]

{'loss': Array(0.13438474, dtype=float32), 'loss_cross_entropy': Array(0.12675913, dtype=float32)}


 10%|█         | 103499/1000000 [4:44:26<25:36:19,  9.73it/s]

{'loss': Array(0.15423742, dtype=float32), 'loss_cross_entropy': Array(0.1450728, dtype=float32)}


 10%|█         | 103509/1000000 [4:44:34<81:17:40,  3.06it/s] 

{'loss': Array(0.1570211, dtype=float32), 'loss_cross_entropy': Array(0.14779982, dtype=float32)}


 10%|█         | 103519/1000000 [4:44:36<37:31:10,  6.64it/s]

{'loss': Array(0.13537781, dtype=float32), 'loss_cross_entropy': Array(0.12809847, dtype=float32)}


 10%|█         | 103529/1000000 [4:44:37<25:30:09,  9.76it/s]

{'loss': Array(0.15049943, dtype=float32), 'loss_cross_entropy': Array(0.14196287, dtype=float32)}


 10%|█         | 103539/1000000 [4:44:39<30:26:18,  8.18it/s]

{'loss': Array(0.1558823, dtype=float32), 'loss_cross_entropy': Array(0.14700527, dtype=float32)}


 10%|█         | 103549/1000000 [4:44:40<24:40:39, 10.09it/s]

{'loss': Array(0.16806957, dtype=float32), 'loss_cross_entropy': Array(0.15873784, dtype=float32)}


 10%|█         | 103559/1000000 [4:44:42<37:40:03,  6.61it/s]

{'loss': Array(0.14138524, dtype=float32), 'loss_cross_entropy': Array(0.13326842, dtype=float32)}


 10%|█         | 103569/1000000 [4:44:43<25:25:25,  9.79it/s]

{'loss': Array(0.15046819, dtype=float32), 'loss_cross_entropy': Array(0.1420813, dtype=float32)}


 10%|█         | 103579/1000000 [4:44:45<23:20:27, 10.67it/s]

{'loss': Array(0.14014015, dtype=float32), 'loss_cross_entropy': Array(0.13265063, dtype=float32)}


 10%|█         | 103589/1000000 [4:44:46<26:03:53,  9.55it/s]

{'loss': Array(0.13145272, dtype=float32), 'loss_cross_entropy': Array(0.12358111, dtype=float32)}


 10%|█         | 103599/1000000 [4:44:48<23:38:31, 10.53it/s]

{'loss': Array(0.12990648, dtype=float32), 'loss_cross_entropy': Array(0.12244246, dtype=float32)}


 10%|█         | 103609/1000000 [4:44:49<37:00:12,  6.73it/s]

{'loss': Array(0.1404526, dtype=float32), 'loss_cross_entropy': Array(0.13240688, dtype=float32)}


 10%|█         | 103619/1000000 [4:44:51<25:44:28,  9.67it/s]

{'loss': Array(0.14240664, dtype=float32), 'loss_cross_entropy': Array(0.13423128, dtype=float32)}


 10%|█         | 103629/1000000 [4:44:52<23:14:21, 10.71it/s]

{'loss': Array(0.13074327, dtype=float32), 'loss_cross_entropy': Array(0.12307389, dtype=float32)}


 10%|█         | 103639/1000000 [4:44:54<27:54:24,  8.92it/s]

{'loss': Array(0.14646104, dtype=float32), 'loss_cross_entropy': Array(0.13883585, dtype=float32)}


 10%|█         | 103649/1000000 [4:44:55<24:10:39, 10.30it/s]

{'loss': Array(0.11825003, dtype=float32), 'loss_cross_entropy': Array(0.11141638, dtype=float32)}


 10%|█         | 103659/1000000 [4:44:57<32:25:47,  7.68it/s]

{'loss': Array(0.15047179, dtype=float32), 'loss_cross_entropy': Array(0.14254743, dtype=float32)}


 10%|█         | 103669/1000000 [4:44:58<25:38:27,  9.71it/s]

{'loss': Array(0.1344693, dtype=float32), 'loss_cross_entropy': Array(0.12707359, dtype=float32)}


 10%|█         | 103679/1000000 [4:45:00<37:06:53,  6.71it/s]

{'loss': Array(0.14121121, dtype=float32), 'loss_cross_entropy': Array(0.13358645, dtype=float32)}


 10%|█         | 103689/1000000 [4:45:02<26:20:30,  9.45it/s]

{'loss': Array(0.16256583, dtype=float32), 'loss_cross_entropy': Array(0.15349789, dtype=float32)}


 10%|█         | 103699/1000000 [4:45:03<23:37:13, 10.54it/s]

{'loss': Array(0.14337136, dtype=float32), 'loss_cross_entropy': Array(0.13513845, dtype=float32)}


 10%|█         | 103709/1000000 [4:45:05<29:46:09,  8.36it/s]

{'loss': Array(0.14837317, dtype=float32), 'loss_cross_entropy': Array(0.13969819, dtype=float32)}


 10%|█         | 103719/1000000 [4:45:06<24:05:03, 10.34it/s]

{'loss': Array(0.15104721, dtype=float32), 'loss_cross_entropy': Array(0.14283183, dtype=float32)}


 10%|█         | 103729/1000000 [4:45:08<32:42:04,  7.61it/s]

{'loss': Array(0.14530003, dtype=float32), 'loss_cross_entropy': Array(0.13753192, dtype=float32)}


 10%|█         | 103739/1000000 [4:45:09<24:48:27, 10.04it/s]

{'loss': Array(0.13530456, dtype=float32), 'loss_cross_entropy': Array(0.12752746, dtype=float32)}


 10%|█         | 103749/1000000 [4:45:10<23:47:17, 10.47it/s]

{'loss': Array(0.16969714, dtype=float32), 'loss_cross_entropy': Array(0.1604357, dtype=float32)}


 10%|█         | 103759/1000000 [4:45:12<26:28:56,  9.40it/s]

{'loss': Array(0.14805408, dtype=float32), 'loss_cross_entropy': Array(0.1403274, dtype=float32)}


 10%|█         | 103769/1000000 [4:45:14<24:20:54, 10.22it/s]

{'loss': Array(0.15883912, dtype=float32), 'loss_cross_entropy': Array(0.15033738, dtype=float32)}


 10%|█         | 103779/1000000 [4:45:15<37:06:37,  6.71it/s]

{'loss': Array(0.1591558, dtype=float32), 'loss_cross_entropy': Array(0.15038222, dtype=float32)}


 10%|█         | 103789/1000000 [4:45:17<25:40:15,  9.70it/s]

{'loss': Array(0.13863274, dtype=float32), 'loss_cross_entropy': Array(0.13076364, dtype=float32)}


 10%|█         | 103799/1000000 [4:45:18<24:19:29, 10.23it/s]

{'loss': Array(0.15039395, dtype=float32), 'loss_cross_entropy': Array(0.14210437, dtype=float32)}


 10%|█         | 103809/1000000 [4:45:20<30:22:50,  8.19it/s]

{'loss': Array(0.1457966, dtype=float32), 'loss_cross_entropy': Array(0.13734989, dtype=float32)}


 10%|█         | 103819/1000000 [4:45:21<25:00:45,  9.95it/s]

{'loss': Array(0.14502549, dtype=float32), 'loss_cross_entropy': Array(0.13689531, dtype=float32)}


 10%|█         | 103829/1000000 [4:45:23<33:32:23,  7.42it/s]

{'loss': Array(0.159715, dtype=float32), 'loss_cross_entropy': Array(0.15063652, dtype=float32)}


 10%|█         | 103839/1000000 [4:45:24<25:21:43,  9.82it/s]

{'loss': Array(0.15541355, dtype=float32), 'loss_cross_entropy': Array(0.14694227, dtype=float32)}


 10%|█         | 103849/1000000 [4:45:26<32:51:01,  7.58it/s]

{'loss': Array(0.14178748, dtype=float32), 'loss_cross_entropy': Array(0.13337818, dtype=float32)}


 10%|█         | 103859/1000000 [4:45:27<24:21:20, 10.22it/s]

{'loss': Array(0.14305304, dtype=float32), 'loss_cross_entropy': Array(0.13515313, dtype=float32)}


 10%|█         | 103869/1000000 [4:45:29<23:10:09, 10.74it/s]

{'loss': Array(0.16002491, dtype=float32), 'loss_cross_entropy': Array(0.15076193, dtype=float32)}


 10%|█         | 103879/1000000 [4:45:30<27:41:06,  8.99it/s]

{'loss': Array(0.15448338, dtype=float32), 'loss_cross_entropy': Array(0.14541495, dtype=float32)}


 10%|█         | 103889/1000000 [4:45:32<23:59:28, 10.38it/s]

{'loss': Array(0.1352883, dtype=float32), 'loss_cross_entropy': Array(0.12800914, dtype=float32)}


 10%|█         | 103899/1000000 [4:45:33<29:41:19,  8.38it/s]

{'loss': Array(0.13279317, dtype=float32), 'loss_cross_entropy': Array(0.12515672, dtype=float32)}


 10%|█         | 103909/1000000 [4:45:35<24:36:54, 10.11it/s]

{'loss': Array(0.14922224, dtype=float32), 'loss_cross_entropy': Array(0.14086154, dtype=float32)}


 10%|█         | 103919/1000000 [4:45:37<36:45:57,  6.77it/s]

{'loss': Array(0.1504836, dtype=float32), 'loss_cross_entropy': Array(0.14215374, dtype=float32)}


 10%|█         | 103929/1000000 [4:45:38<25:40:30,  9.69it/s]

{'loss': Array(0.15000491, dtype=float32), 'loss_cross_entropy': Array(0.14127049, dtype=float32)}


 10%|█         | 103939/1000000 [4:45:40<36:47:10,  6.77it/s]

{'loss': Array(0.16267107, dtype=float32), 'loss_cross_entropy': Array(0.15362698, dtype=float32)}


 10%|█         | 103949/1000000 [4:45:41<25:09:13,  9.90it/s]

{'loss': Array(0.14728914, dtype=float32), 'loss_cross_entropy': Array(0.1395922, dtype=float32)}


 10%|█         | 103959/1000000 [4:45:42<23:21:06, 10.66it/s]

{'loss': Array(0.15102921, dtype=float32), 'loss_cross_entropy': Array(0.14251296, dtype=float32)}


 10%|█         | 103969/1000000 [4:45:44<32:23:30,  7.68it/s]

{'loss': Array(0.15420416, dtype=float32), 'loss_cross_entropy': Array(0.14627494, dtype=float32)}


 10%|█         | 103979/1000000 [4:45:45<24:38:41, 10.10it/s]

{'loss': Array(0.13998696, dtype=float32), 'loss_cross_entropy': Array(0.13179319, dtype=float32)}


 10%|█         | 103989/1000000 [4:45:47<37:01:33,  6.72it/s]

{'loss': Array(0.14679103, dtype=float32), 'loss_cross_entropy': Array(0.13835613, dtype=float32)}


 10%|█         | 103999/1000000 [4:45:48<25:17:38,  9.84it/s]

{'loss': Array(0.13781217, dtype=float32), 'loss_cross_entropy': Array(0.12984839, dtype=float32)}


 10%|█         | 104009/1000000 [4:45:57<83:16:09,  2.99it/s] 

{'loss': Array(0.12314614, dtype=float32), 'loss_cross_entropy': Array(0.11601056, dtype=float32)}


 10%|█         | 104019/1000000 [4:45:58<37:18:15,  6.67it/s] 

{'loss': Array(0.13571732, dtype=float32), 'loss_cross_entropy': Array(0.12811098, dtype=float32)}


 10%|█         | 104029/1000000 [4:46:00<26:06:14,  9.53it/s]

{'loss': Array(0.13351893, dtype=float32), 'loss_cross_entropy': Array(0.12593114, dtype=float32)}


 10%|█         | 104039/1000000 [4:46:01<27:25:25,  9.08it/s]

{'loss': Array(0.13393293, dtype=float32), 'loss_cross_entropy': Array(0.12612367, dtype=float32)}


 10%|█         | 104049/1000000 [4:46:03<25:06:06,  9.91it/s]

{'loss': Array(0.14718546, dtype=float32), 'loss_cross_entropy': Array(0.13865197, dtype=float32)}


 10%|█         | 104059/1000000 [4:46:05<37:26:18,  6.65it/s]

{'loss': Array(0.13532443, dtype=float32), 'loss_cross_entropy': Array(0.1275258, dtype=float32)}


 10%|█         | 104069/1000000 [4:46:06<26:27:04,  9.41it/s]

{'loss': Array(0.14408804, dtype=float32), 'loss_cross_entropy': Array(0.13588761, dtype=float32)}


 10%|█         | 104079/1000000 [4:46:07<24:45:58, 10.05it/s]

{'loss': Array(0.14580828, dtype=float32), 'loss_cross_entropy': Array(0.13745165, dtype=float32)}


 10%|█         | 104089/1000000 [4:46:09<30:49:37,  8.07it/s]

{'loss': Array(0.14941117, dtype=float32), 'loss_cross_entropy': Array(0.14077401, dtype=float32)}


 10%|█         | 104099/1000000 [4:46:11<25:21:23,  9.81it/s]

{'loss': Array(0.13480909, dtype=float32), 'loss_cross_entropy': Array(0.12740026, dtype=float32)}


 10%|█         | 104109/1000000 [4:46:12<37:02:02,  6.72it/s]

{'loss': Array(0.14191885, dtype=float32), 'loss_cross_entropy': Array(0.13409907, dtype=float32)}


 10%|█         | 104119/1000000 [4:46:14<25:31:26,  9.75it/s]

{'loss': Array(0.13676162, dtype=float32), 'loss_cross_entropy': Array(0.129406, dtype=float32)}


 10%|█         | 104129/1000000 [4:46:15<23:01:07, 10.81it/s]

{'loss': Array(0.13364446, dtype=float32), 'loss_cross_entropy': Array(0.12603764, dtype=float32)}


 10%|█         | 104139/1000000 [4:46:17<27:46:21,  8.96it/s]

{'loss': Array(0.1369898, dtype=float32), 'loss_cross_entropy': Array(0.12909572, dtype=float32)}


 10%|█         | 104149/1000000 [4:46:18<24:05:37, 10.33it/s]

{'loss': Array(0.13362198, dtype=float32), 'loss_cross_entropy': Array(0.12582658, dtype=float32)}


 10%|█         | 104159/1000000 [4:46:20<32:32:48,  7.65it/s]

{'loss': Array(0.15279154, dtype=float32), 'loss_cross_entropy': Array(0.1441795, dtype=float32)}


 10%|█         | 104169/1000000 [4:46:21<24:46:02, 10.05it/s]

{'loss': Array(0.14927201, dtype=float32), 'loss_cross_entropy': Array(0.14166592, dtype=float32)}


 10%|█         | 104179/1000000 [4:46:23<24:01:41, 10.36it/s]

{'loss': Array(0.14021671, dtype=float32), 'loss_cross_entropy': Array(0.13210979, dtype=float32)}


 10%|█         | 104189/1000000 [4:46:24<29:18:36,  8.49it/s]

{'loss': Array(0.13987754, dtype=float32), 'loss_cross_entropy': Array(0.13261963, dtype=float32)}


 10%|█         | 104199/1000000 [4:46:26<24:18:06, 10.24it/s]

{'loss': Array(0.15141733, dtype=float32), 'loss_cross_entropy': Array(0.14258738, dtype=float32)}


 10%|█         | 104209/1000000 [4:46:27<23:17:48, 10.68it/s]

{'loss': Array(0.15315743, dtype=float32), 'loss_cross_entropy': Array(0.14462425, dtype=float32)}


 10%|█         | 104219/1000000 [4:46:29<26:44:36,  9.30it/s]

{'loss': Array(0.13635127, dtype=float32), 'loss_cross_entropy': Array(0.12883727, dtype=float32)}


 10%|█         | 104229/1000000 [4:46:30<23:13:58, 10.71it/s]

{'loss': Array(0.12947157, dtype=float32), 'loss_cross_entropy': Array(0.12204375, dtype=float32)}


 10%|█         | 104239/1000000 [4:46:32<29:53:37,  8.32it/s]

{'loss': Array(0.16025145, dtype=float32), 'loss_cross_entropy': Array(0.15168242, dtype=float32)}


 10%|█         | 104249/1000000 [4:46:33<24:15:08, 10.26it/s]

{'loss': Array(0.15722813, dtype=float32), 'loss_cross_entropy': Array(0.1488902, dtype=float32)}


 10%|█         | 104259/1000000 [4:46:35<37:04:23,  6.71it/s]

{'loss': Array(0.15807526, dtype=float32), 'loss_cross_entropy': Array(0.14954959, dtype=float32)}


 10%|█         | 104269/1000000 [4:46:36<25:35:10,  9.72it/s]

{'loss': Array(0.13674057, dtype=float32), 'loss_cross_entropy': Array(0.12892744, dtype=float32)}


 10%|█         | 104279/1000000 [4:46:38<36:53:57,  6.74it/s]

{'loss': Array(0.15458153, dtype=float32), 'loss_cross_entropy': Array(0.14644478, dtype=float32)}


 10%|█         | 104289/1000000 [4:46:39<26:10:51,  9.50it/s]

{'loss': Array(0.14158364, dtype=float32), 'loss_cross_entropy': Array(0.1339613, dtype=float32)}


 10%|█         | 104299/1000000 [4:46:41<23:16:09, 10.69it/s]

{'loss': Array(0.17943615, dtype=float32), 'loss_cross_entropy': Array(0.16930103, dtype=float32)}


 10%|█         | 104309/1000000 [4:46:43<32:43:45,  7.60it/s]

{'loss': Array(0.16422354, dtype=float32), 'loss_cross_entropy': Array(0.15475427, dtype=float32)}


 10%|█         | 104319/1000000 [4:46:44<24:49:16, 10.02it/s]

{'loss': Array(0.13576077, dtype=float32), 'loss_cross_entropy': Array(0.12807785, dtype=float32)}


 10%|█         | 104329/1000000 [4:46:46<37:33:08,  6.63it/s]

{'loss': Array(0.15361902, dtype=float32), 'loss_cross_entropy': Array(0.144793, dtype=float32)}


 10%|█         | 104339/1000000 [4:46:47<25:19:47,  9.82it/s]

{'loss': Array(0.14566003, dtype=float32), 'loss_cross_entropy': Array(0.13709061, dtype=float32)}


 10%|█         | 104349/1000000 [4:46:48<24:00:41, 10.36it/s]

{'loss': Array(0.15164869, dtype=float32), 'loss_cross_entropy': Array(0.14326298, dtype=float32)}


 10%|█         | 104359/1000000 [4:46:50<26:53:04,  9.25it/s]

{'loss': Array(0.13804783, dtype=float32), 'loss_cross_entropy': Array(0.12985213, dtype=float32)}


 10%|█         | 104369/1000000 [4:46:52<24:32:39, 10.14it/s]

{'loss': Array(0.15019898, dtype=float32), 'loss_cross_entropy': Array(0.1418312, dtype=float32)}


 10%|█         | 104379/1000000 [4:46:53<26:46:31,  9.29it/s]

{'loss': Array(0.14259474, dtype=float32), 'loss_cross_entropy': Array(0.13468258, dtype=float32)}


 10%|█         | 104389/1000000 [4:46:55<23:32:17, 10.57it/s]

{'loss': Array(0.15351678, dtype=float32), 'loss_cross_entropy': Array(0.14570932, dtype=float32)}


 10%|█         | 104399/1000000 [4:46:56<36:41:56,  6.78it/s]

{'loss': Array(0.12421981, dtype=float32), 'loss_cross_entropy': Array(0.1172063, dtype=float32)}


 10%|█         | 104409/1000000 [4:46:58<25:07:32,  9.90it/s]

{'loss': Array(0.1685084, dtype=float32), 'loss_cross_entropy': Array(0.15894382, dtype=float32)}


 10%|█         | 104419/1000000 [4:46:59<23:13:48, 10.71it/s]

{'loss': Array(0.16104095, dtype=float32), 'loss_cross_entropy': Array(0.15269399, dtype=float32)}


 10%|█         | 104429/1000000 [4:47:01<30:21:30,  8.19it/s]

{'loss': Array(0.15489377, dtype=float32), 'loss_cross_entropy': Array(0.14620213, dtype=float32)}


 10%|█         | 104439/1000000 [4:47:02<25:08:05,  9.90it/s]

{'loss': Array(0.14892997, dtype=float32), 'loss_cross_entropy': Array(0.14081089, dtype=float32)}


 10%|█         | 104449/1000000 [4:47:04<39:17:46,  6.33it/s]

{'loss': Array(0.13432081, dtype=float32), 'loss_cross_entropy': Array(0.12704735, dtype=float32)}


 10%|█         | 104459/1000000 [4:47:06<27:15:20,  9.13it/s]

{'loss': Array(0.1298461, dtype=float32), 'loss_cross_entropy': Array(0.12226103, dtype=float32)}


 10%|█         | 104469/1000000 [4:47:07<24:54:05,  9.99it/s]

{'loss': Array(0.13109021, dtype=float32), 'loss_cross_entropy': Array(0.12363125, dtype=float32)}


 10%|█         | 104479/1000000 [4:47:09<28:44:45,  8.65it/s]

{'loss': Array(0.16097823, dtype=float32), 'loss_cross_entropy': Array(0.15204361, dtype=float32)}


 10%|█         | 104489/1000000 [4:47:10<25:07:11,  9.90it/s]

{'loss': Array(0.12398901, dtype=float32), 'loss_cross_entropy': Array(0.11704308, dtype=float32)}


 10%|█         | 104499/1000000 [4:47:12<34:09:22,  7.28it/s]

{'loss': Array(0.16054386, dtype=float32), 'loss_cross_entropy': Array(0.15179123, dtype=float32)}


 10%|█         | 104509/1000000 [4:47:20<81:49:34,  3.04it/s] 

{'loss': Array(0.13319145, dtype=float32), 'loss_cross_entropy': Array(0.12540007, dtype=float32)}


 10%|█         | 104519/1000000 [4:47:21<42:30:17,  5.85it/s]

{'loss': Array(0.12485393, dtype=float32), 'loss_cross_entropy': Array(0.11780081, dtype=float32)}


 10%|█         | 104529/1000000 [4:47:23<26:25:04,  9.42it/s]

{'loss': Array(0.14626661, dtype=float32), 'loss_cross_entropy': Array(0.13737316, dtype=float32)}


 10%|█         | 104539/1000000 [4:47:24<23:42:54, 10.49it/s]

{'loss': Array(0.1396884, dtype=float32), 'loss_cross_entropy': Array(0.13243613, dtype=float32)}


 10%|█         | 104549/1000000 [4:47:26<30:08:08,  8.25it/s]

{'loss': Array(0.13804945, dtype=float32), 'loss_cross_entropy': Array(0.1304261, dtype=float32)}


 10%|█         | 104559/1000000 [4:47:27<24:27:10, 10.17it/s]

{'loss': Array(0.1414535, dtype=float32), 'loss_cross_entropy': Array(0.13334775, dtype=float32)}


 10%|█         | 104569/1000000 [4:47:29<23:31:57, 10.57it/s]

{'loss': Array(0.1596084, dtype=float32), 'loss_cross_entropy': Array(0.15162344, dtype=float32)}


 10%|█         | 104579/1000000 [4:47:30<26:41:49,  9.32it/s]

{'loss': Array(0.13527654, dtype=float32), 'loss_cross_entropy': Array(0.12741841, dtype=float32)}


 10%|█         | 104589/1000000 [4:47:32<23:28:17, 10.60it/s]

{'loss': Array(0.15828294, dtype=float32), 'loss_cross_entropy': Array(0.14974318, dtype=float32)}


 10%|█         | 104599/1000000 [4:47:33<29:26:51,  8.45it/s]

{'loss': Array(0.12959437, dtype=float32), 'loss_cross_entropy': Array(0.12207282, dtype=float32)}


 10%|█         | 104609/1000000 [4:47:35<24:22:12, 10.21it/s]

{'loss': Array(0.14364131, dtype=float32), 'loss_cross_entropy': Array(0.13536185, dtype=float32)}


 10%|█         | 104619/1000000 [4:47:37<33:07:24,  7.51it/s]

{'loss': Array(0.13293105, dtype=float32), 'loss_cross_entropy': Array(0.12517008, dtype=float32)}


 10%|█         | 104629/1000000 [4:47:38<24:24:44, 10.19it/s]

{'loss': Array(0.1427493, dtype=float32), 'loss_cross_entropy': Array(0.13406982, dtype=float32)}


 10%|█         | 104639/1000000 [4:47:39<23:27:47, 10.60it/s]

{'loss': Array(0.16466299, dtype=float32), 'loss_cross_entropy': Array(0.15584543, dtype=float32)}


 10%|█         | 104649/1000000 [4:47:41<29:55:20,  8.31it/s]

{'loss': Array(0.1398506, dtype=float32), 'loss_cross_entropy': Array(0.13247032, dtype=float32)}


 10%|█         | 104659/1000000 [4:47:42<24:28:27, 10.16it/s]

{'loss': Array(0.15090679, dtype=float32), 'loss_cross_entropy': Array(0.14265996, dtype=float32)}


 10%|█         | 104669/1000000 [4:47:44<23:25:42, 10.62it/s]

{'loss': Array(0.15448013, dtype=float32), 'loss_cross_entropy': Array(0.14554651, dtype=float32)}


 10%|█         | 104679/1000000 [4:47:45<26:42:15,  9.31it/s]

{'loss': Array(0.15448056, dtype=float32), 'loss_cross_entropy': Array(0.14551249, dtype=float32)}


 10%|█         | 104689/1000000 [4:47:47<23:18:45, 10.67it/s]

{'loss': Array(0.15832472, dtype=float32), 'loss_cross_entropy': Array(0.14903519, dtype=float32)}


 10%|█         | 104699/1000000 [4:47:49<29:52:21,  8.33it/s]

{'loss': Array(0.13959134, dtype=float32), 'loss_cross_entropy': Array(0.1311453, dtype=float32)}


 10%|█         | 104709/1000000 [4:47:50<24:19:49, 10.22it/s]

{'loss': Array(0.16188599, dtype=float32), 'loss_cross_entropy': Array(0.15260144, dtype=float32)}


 10%|█         | 104719/1000000 [4:47:52<37:09:53,  6.69it/s]

{'loss': Array(0.14551888, dtype=float32), 'loss_cross_entropy': Array(0.13717268, dtype=float32)}


 10%|█         | 104729/1000000 [4:47:53<25:00:20,  9.95it/s]

{'loss': Array(0.13767575, dtype=float32), 'loss_cross_entropy': Array(0.13027577, dtype=float32)}


 10%|█         | 104739/1000000 [4:47:54<23:09:08, 10.74it/s]

{'loss': Array(0.15717736, dtype=float32), 'loss_cross_entropy': Array(0.14891933, dtype=float32)}


 10%|█         | 104749/1000000 [4:47:56<32:42:05,  7.60it/s]

{'loss': Array(0.13700756, dtype=float32), 'loss_cross_entropy': Array(0.12958314, dtype=float32)}


 10%|█         | 104759/1000000 [4:47:57<24:39:03, 10.09it/s]

{'loss': Array(0.15448596, dtype=float32), 'loss_cross_entropy': Array(0.14594392, dtype=float32)}


 10%|█         | 104769/1000000 [4:47:59<23:21:37, 10.65it/s]

{'loss': Array(0.13720348, dtype=float32), 'loss_cross_entropy': Array(0.12907363, dtype=float32)}


 10%|█         | 104779/1000000 [4:48:01<28:31:19,  8.72it/s]

{'loss': Array(0.13888946, dtype=float32), 'loss_cross_entropy': Array(0.13100818, dtype=float32)}


 10%|█         | 104789/1000000 [4:48:02<25:25:55,  9.78it/s]

{'loss': Array(0.15860397, dtype=float32), 'loss_cross_entropy': Array(0.14933987, dtype=float32)}


 10%|█         | 104799/1000000 [4:48:04<30:06:14,  8.26it/s]

{'loss': Array(0.13318895, dtype=float32), 'loss_cross_entropy': Array(0.12555847, dtype=float32)}


 10%|█         | 104809/1000000 [4:48:05<24:46:28, 10.04it/s]

{'loss': Array(0.14429587, dtype=float32), 'loss_cross_entropy': Array(0.1367682, dtype=float32)}


 10%|█         | 104819/1000000 [4:48:07<26:31:16,  9.38it/s]

{'loss': Array(0.14492795, dtype=float32), 'loss_cross_entropy': Array(0.13690002, dtype=float32)}


 10%|█         | 104829/1000000 [4:48:08<27:52:34,  8.92it/s]

{'loss': Array(0.1439708, dtype=float32), 'loss_cross_entropy': Array(0.13619898, dtype=float32)}


 10%|█         | 104839/1000000 [4:48:10<24:30:10, 10.15it/s]

{'loss': Array(0.1609296, dtype=float32), 'loss_cross_entropy': Array(0.15202264, dtype=float32)}


 10%|█         | 104849/1000000 [4:48:11<33:42:37,  7.38it/s]

{'loss': Array(0.12874463, dtype=float32), 'loss_cross_entropy': Array(0.12154224, dtype=float32)}


 10%|█         | 104859/1000000 [4:48:13<25:25:06,  9.78it/s]

{'loss': Array(0.14482532, dtype=float32), 'loss_cross_entropy': Array(0.13692364, dtype=float32)}


 10%|█         | 104869/1000000 [4:48:14<23:35:11, 10.54it/s]

{'loss': Array(0.13895285, dtype=float32), 'loss_cross_entropy': Array(0.13076669, dtype=float32)}


 10%|█         | 104879/1000000 [4:48:16<26:29:38,  9.38it/s]

{'loss': Array(0.13303155, dtype=float32), 'loss_cross_entropy': Array(0.12507503, dtype=float32)}


 10%|█         | 104889/1000000 [4:48:17<23:22:50, 10.63it/s]

{'loss': Array(0.16475749, dtype=float32), 'loss_cross_entropy': Array(0.15605412, dtype=float32)}


 10%|█         | 104899/1000000 [4:48:19<27:21:41,  9.09it/s]

{'loss': Array(0.13546138, dtype=float32), 'loss_cross_entropy': Array(0.12783429, dtype=float32)}


 10%|█         | 104909/1000000 [4:48:20<23:29:54, 10.58it/s]

{'loss': Array(0.15708193, dtype=float32), 'loss_cross_entropy': Array(0.14803404, dtype=float32)}


 10%|█         | 104919/1000000 [4:48:22<36:36:29,  6.79it/s]

{'loss': Array(0.14468853, dtype=float32), 'loss_cross_entropy': Array(0.13636194, dtype=float32)}


 10%|█         | 104929/1000000 [4:48:23<26:16:43,  9.46it/s]

{'loss': Array(0.14881513, dtype=float32), 'loss_cross_entropy': Array(0.14031653, dtype=float32)}


 10%|█         | 104939/1000000 [4:48:25<23:13:34, 10.70it/s]

{'loss': Array(0.14816101, dtype=float32), 'loss_cross_entropy': Array(0.13956557, dtype=float32)}


 10%|█         | 104949/1000000 [4:48:26<29:43:50,  8.36it/s]

{'loss': Array(0.14178254, dtype=float32), 'loss_cross_entropy': Array(0.13416994, dtype=float32)}


 10%|█         | 104959/1000000 [4:48:28<24:34:49, 10.11it/s]

{'loss': Array(0.14229326, dtype=float32), 'loss_cross_entropy': Array(0.13461982, dtype=float32)}


 10%|█         | 104969/1000000 [4:48:30<33:36:12,  7.40it/s]

{'loss': Array(0.12488341, dtype=float32), 'loss_cross_entropy': Array(0.11756527, dtype=float32)}


 10%|█         | 104979/1000000 [4:48:31<25:02:34,  9.93it/s]

{'loss': Array(0.1452396, dtype=float32), 'loss_cross_entropy': Array(0.1367569, dtype=float32)}


 10%|█         | 104989/1000000 [4:48:33<32:25:08,  7.67it/s]

{'loss': Array(0.15706833, dtype=float32), 'loss_cross_entropy': Array(0.1481066, dtype=float32)}


 10%|█         | 104999/1000000 [4:48:34<24:28:57, 10.15it/s]

{'loss': Array(0.17534064, dtype=float32), 'loss_cross_entropy': Array(0.16590357, dtype=float32)}


 11%|█         | 105009/1000000 [4:48:42<80:07:53,  3.10it/s] 

{'loss': Array(0.14079241, dtype=float32), 'loss_cross_entropy': Array(0.1322351, dtype=float32)}


 11%|█         | 105019/1000000 [4:48:44<37:24:39,  6.65it/s]

{'loss': Array(0.13051489, dtype=float32), 'loss_cross_entropy': Array(0.12277889, dtype=float32)}


 11%|█         | 105029/1000000 [4:48:45<25:18:07,  9.83it/s]

{'loss': Array(0.11832545, dtype=float32), 'loss_cross_entropy': Array(0.11083378, dtype=float32)}


 11%|█         | 105039/1000000 [4:48:47<32:45:09,  7.59it/s]

{'loss': Array(0.1643593, dtype=float32), 'loss_cross_entropy': Array(0.15518616, dtype=float32)}


 11%|█         | 105049/1000000 [4:48:48<25:01:02,  9.94it/s]

{'loss': Array(0.15375414, dtype=float32), 'loss_cross_entropy': Array(0.14495753, dtype=float32)}


 11%|█         | 105059/1000000 [4:48:50<36:47:46,  6.76it/s]

{'loss': Array(0.15380722, dtype=float32), 'loss_cross_entropy': Array(0.1452792, dtype=float32)}


 11%|█         | 105069/1000000 [4:48:51<25:15:03,  9.84it/s]

{'loss': Array(0.15092854, dtype=float32), 'loss_cross_entropy': Array(0.14267191, dtype=float32)}


 11%|█         | 105079/1000000 [4:48:52<23:31:30, 10.57it/s]

{'loss': Array(0.14860153, dtype=float32), 'loss_cross_entropy': Array(0.14002363, dtype=float32)}


 11%|█         | 105089/1000000 [4:48:54<29:44:20,  8.36it/s]

{'loss': Array(0.15471578, dtype=float32), 'loss_cross_entropy': Array(0.14625329, dtype=float32)}


 11%|█         | 105099/1000000 [4:48:56<24:43:51, 10.05it/s]

{'loss': Array(0.16453902, dtype=float32), 'loss_cross_entropy': Array(0.1551132, dtype=float32)}


 11%|█         | 105109/1000000 [4:48:57<32:56:22,  7.55it/s]

{'loss': Array(0.1430139, dtype=float32), 'loss_cross_entropy': Array(0.13476242, dtype=float32)}


 11%|█         | 105119/1000000 [4:48:59<25:24:59,  9.78it/s]

{'loss': Array(0.13628972, dtype=float32), 'loss_cross_entropy': Array(0.12870745, dtype=float32)}


 11%|█         | 105129/1000000 [4:49:00<24:35:19, 10.11it/s]

{'loss': Array(0.15617345, dtype=float32), 'loss_cross_entropy': Array(0.1478468, dtype=float32)}


 11%|█         | 105139/1000000 [4:49:02<27:46:51,  8.95it/s]

{'loss': Array(0.15263058, dtype=float32), 'loss_cross_entropy': Array(0.14359333, dtype=float32)}


 11%|█         | 105149/1000000 [4:49:03<25:12:15,  9.86it/s]

{'loss': Array(0.15095913, dtype=float32), 'loss_cross_entropy': Array(0.14275962, dtype=float32)}


 11%|█         | 105159/1000000 [4:49:05<37:19:39,  6.66it/s]

{'loss': Array(0.13798054, dtype=float32), 'loss_cross_entropy': Array(0.1293726, dtype=float32)}


 11%|█         | 105169/1000000 [4:49:07<27:11:49,  9.14it/s]

{'loss': Array(0.13582347, dtype=float32), 'loss_cross_entropy': Array(0.12738834, dtype=float32)}


 11%|█         | 105179/1000000 [4:49:08<25:26:04,  9.77it/s]

{'loss': Array(0.13406251, dtype=float32), 'loss_cross_entropy': Array(0.1266831, dtype=float32)}


 11%|█         | 105189/1000000 [4:49:10<31:48:42,  7.81it/s]

{'loss': Array(0.14460872, dtype=float32), 'loss_cross_entropy': Array(0.13664275, dtype=float32)}


 11%|█         | 105199/1000000 [4:49:11<25:53:36,  9.60it/s]

{'loss': Array(0.14900836, dtype=float32), 'loss_cross_entropy': Array(0.14049006, dtype=float32)}


 11%|█         | 105209/1000000 [4:49:13<33:43:21,  7.37it/s]

{'loss': Array(0.13139348, dtype=float32), 'loss_cross_entropy': Array(0.12404896, dtype=float32)}


 11%|█         | 105219/1000000 [4:49:14<25:06:55,  9.90it/s]

{'loss': Array(0.15726416, dtype=float32), 'loss_cross_entropy': Array(0.14825335, dtype=float32)}


 11%|█         | 105229/1000000 [4:49:16<32:29:52,  7.65it/s]

{'loss': Array(0.1496176, dtype=float32), 'loss_cross_entropy': Array(0.1410028, dtype=float32)}


 11%|█         | 105239/1000000 [4:49:17<24:31:06, 10.14it/s]

{'loss': Array(0.14132877, dtype=float32), 'loss_cross_entropy': Array(0.1332786, dtype=float32)}


 11%|█         | 105249/1000000 [4:49:19<22:48:14, 10.90it/s]

{'loss': Array(0.15056585, dtype=float32), 'loss_cross_entropy': Array(0.1422668, dtype=float32)}


 11%|█         | 105259/1000000 [4:49:21<27:57:00,  8.89it/s]

{'loss': Array(0.16319552, dtype=float32), 'loss_cross_entropy': Array(0.15425219, dtype=float32)}


 11%|█         | 105269/1000000 [4:49:22<24:05:12, 10.32it/s]

{'loss': Array(0.13778937, dtype=float32), 'loss_cross_entropy': Array(0.13044828, dtype=float32)}


 11%|█         | 105279/1000000 [4:49:24<29:26:17,  8.44it/s]

{'loss': Array(0.13190563, dtype=float32), 'loss_cross_entropy': Array(0.12401319, dtype=float32)}


 11%|█         | 105289/1000000 [4:49:25<23:59:52, 10.36it/s]

{'loss': Array(0.15599158, dtype=float32), 'loss_cross_entropy': Array(0.14754553, dtype=float32)}


 11%|█         | 105299/1000000 [4:49:27<36:12:23,  6.86it/s]

{'loss': Array(0.15269291, dtype=float32), 'loss_cross_entropy': Array(0.14417516, dtype=float32)}


 11%|█         | 105309/1000000 [4:49:28<25:40:07,  9.68it/s]

{'loss': Array(0.13831866, dtype=float32), 'loss_cross_entropy': Array(0.1306244, dtype=float32)}


 11%|█         | 105319/1000000 [4:49:30<36:42:23,  6.77it/s]

{'loss': Array(0.16028199, dtype=float32), 'loss_cross_entropy': Array(0.15205008, dtype=float32)}


 11%|█         | 105329/1000000 [4:49:31<25:10:15,  9.87it/s]

{'loss': Array(0.16508654, dtype=float32), 'loss_cross_entropy': Array(0.1562567, dtype=float32)}


 11%|█         | 105339/1000000 [4:49:32<23:22:52, 10.63it/s]

{'loss': Array(0.14818658, dtype=float32), 'loss_cross_entropy': Array(0.14001201, dtype=float32)}


 11%|█         | 105349/1000000 [4:49:34<32:01:27,  7.76it/s]

{'loss': Array(0.1618658, dtype=float32), 'loss_cross_entropy': Array(0.15305263, dtype=float32)}


 11%|█         | 105359/1000000 [4:49:35<24:32:27, 10.13it/s]

{'loss': Array(0.14597557, dtype=float32), 'loss_cross_entropy': Array(0.13767187, dtype=float32)}


 11%|█         | 105369/1000000 [4:49:37<36:52:49,  6.74it/s]

{'loss': Array(0.14419582, dtype=float32), 'loss_cross_entropy': Array(0.13636099, dtype=float32)}


 11%|█         | 105379/1000000 [4:49:39<25:42:37,  9.67it/s]

{'loss': Array(0.15786229, dtype=float32), 'loss_cross_entropy': Array(0.14917655, dtype=float32)}


 11%|█         | 105389/1000000 [4:49:40<23:00:37, 10.80it/s]

{'loss': Array(0.1466947, dtype=float32), 'loss_cross_entropy': Array(0.1380175, dtype=float32)}


 11%|█         | 105399/1000000 [4:49:42<26:22:12,  9.42it/s]

{'loss': Array(0.13542444, dtype=float32), 'loss_cross_entropy': Array(0.12779257, dtype=float32)}


 11%|█         | 105409/1000000 [4:49:43<24:21:51, 10.20it/s]

{'loss': Array(0.14429444, dtype=float32), 'loss_cross_entropy': Array(0.13658646, dtype=float32)}


 11%|█         | 105419/1000000 [4:49:45<26:16:46,  9.46it/s]

{'loss': Array(0.14034417, dtype=float32), 'loss_cross_entropy': Array(0.13222852, dtype=float32)}


 11%|█         | 105429/1000000 [4:49:46<23:31:07, 10.57it/s]

{'loss': Array(0.148201, dtype=float32), 'loss_cross_entropy': Array(0.14044067, dtype=float32)}


 11%|█         | 105439/1000000 [4:49:48<36:07:41,  6.88it/s]

{'loss': Array(0.15644942, dtype=float32), 'loss_cross_entropy': Array(0.14805794, dtype=float32)}


 11%|█         | 105449/1000000 [4:49:49<25:23:41,  9.78it/s]

{'loss': Array(0.1457787, dtype=float32), 'loss_cross_entropy': Array(0.13804512, dtype=float32)}


 11%|█         | 105459/1000000 [4:49:50<23:28:12, 10.59it/s]

{'loss': Array(0.14901067, dtype=float32), 'loss_cross_entropy': Array(0.14005105, dtype=float32)}


 11%|█         | 105469/1000000 [4:49:52<29:29:36,  8.42it/s]

{'loss': Array(0.14179634, dtype=float32), 'loss_cross_entropy': Array(0.13367374, dtype=float32)}


 11%|█         | 105479/1000000 [4:49:54<24:53:35,  9.98it/s]

{'loss': Array(0.139147, dtype=float32), 'loss_cross_entropy': Array(0.13152008, dtype=float32)}


 11%|█         | 105489/1000000 [4:49:55<36:45:31,  6.76it/s]

{'loss': Array(0.12571621, dtype=float32), 'loss_cross_entropy': Array(0.11842704, dtype=float32)}


 11%|█         | 105499/1000000 [4:49:57<24:57:51,  9.95it/s]

{'loss': Array(0.14543599, dtype=float32), 'loss_cross_entropy': Array(0.13745126, dtype=float32)}


 11%|█         | 105509/1000000 [4:50:04<81:16:55,  3.06it/s] 

{'loss': Array(0.15107636, dtype=float32), 'loss_cross_entropy': Array(0.14282446, dtype=float32)}


 11%|█         | 105519/1000000 [4:50:06<38:14:28,  6.50it/s]

{'loss': Array(0.15120783, dtype=float32), 'loss_cross_entropy': Array(0.14226002, dtype=float32)}


 11%|█         | 105529/1000000 [4:50:08<26:29:09,  9.38it/s]

{'loss': Array(0.13357072, dtype=float32), 'loss_cross_entropy': Array(0.12539317, dtype=float32)}


 11%|█         | 105539/1000000 [4:50:09<30:58:21,  8.02it/s]

{'loss': Array(0.13716221, dtype=float32), 'loss_cross_entropy': Array(0.12887327, dtype=float32)}


 11%|█         | 105549/1000000 [4:50:11<25:03:44,  9.91it/s]

{'loss': Array(0.14706926, dtype=float32), 'loss_cross_entropy': Array(0.13904537, dtype=float32)}


 11%|█         | 105559/1000000 [4:50:12<37:26:47,  6.63it/s]

{'loss': Array(0.13691959, dtype=float32), 'loss_cross_entropy': Array(0.12934732, dtype=float32)}


 11%|█         | 105569/1000000 [4:50:14<26:52:35,  9.24it/s]

{'loss': Array(0.14839639, dtype=float32), 'loss_cross_entropy': Array(0.14025192, dtype=float32)}


 11%|█         | 105579/1000000 [4:50:15<24:29:42, 10.14it/s]

{'loss': Array(0.15668888, dtype=float32), 'loss_cross_entropy': Array(0.14808404, dtype=float32)}


 11%|█         | 105589/1000000 [4:50:17<26:29:40,  9.38it/s]

{'loss': Array(0.15532495, dtype=float32), 'loss_cross_entropy': Array(0.14693002, dtype=float32)}


 11%|█         | 105599/1000000 [4:50:18<23:34:47, 10.54it/s]

{'loss': Array(0.13307899, dtype=float32), 'loss_cross_entropy': Array(0.12572496, dtype=float32)}


 11%|█         | 105609/1000000 [4:50:20<35:56:36,  6.91it/s]

{'loss': Array(0.14683054, dtype=float32), 'loss_cross_entropy': Array(0.13839006, dtype=float32)}


 11%|█         | 105619/1000000 [4:50:21<24:51:57,  9.99it/s]

{'loss': Array(0.15705803, dtype=float32), 'loss_cross_entropy': Array(0.14855841, dtype=float32)}


 11%|█         | 105629/1000000 [4:50:23<22:49:23, 10.89it/s]

{'loss': Array(0.14605422, dtype=float32), 'loss_cross_entropy': Array(0.13782664, dtype=float32)}


 11%|█         | 105639/1000000 [4:50:24<27:18:40,  9.10it/s]

{'loss': Array(0.15451209, dtype=float32), 'loss_cross_entropy': Array(0.14647429, dtype=float32)}


 11%|█         | 105649/1000000 [4:50:26<24:48:47, 10.01it/s]

{'loss': Array(0.14963572, dtype=float32), 'loss_cross_entropy': Array(0.14152348, dtype=float32)}


 11%|█         | 105659/1000000 [4:50:27<32:47:39,  7.58it/s]

{'loss': Array(0.15059428, dtype=float32), 'loss_cross_entropy': Array(0.14200513, dtype=float32)}


 11%|█         | 105669/1000000 [4:50:29<24:09:09, 10.29it/s]

{'loss': Array(0.15420538, dtype=float32), 'loss_cross_entropy': Array(0.14594275, dtype=float32)}


 11%|█         | 105679/1000000 [4:50:30<36:26:05,  6.82it/s]

{'loss': Array(0.12960464, dtype=float32), 'loss_cross_entropy': Array(0.12185337, dtype=float32)}


 11%|█         | 105689/1000000 [4:50:32<25:08:39,  9.88it/s]

{'loss': Array(0.13429426, dtype=float32), 'loss_cross_entropy': Array(0.12735341, dtype=float32)}


 11%|█         | 105699/1000000 [4:50:33<23:53:30, 10.40it/s]

{'loss': Array(0.13715103, dtype=float32), 'loss_cross_entropy': Array(0.12936242, dtype=float32)}


 11%|█         | 105709/1000000 [4:50:35<29:17:40,  8.48it/s]

{'loss': Array(0.13236712, dtype=float32), 'loss_cross_entropy': Array(0.12482568, dtype=float32)}


 11%|█         | 105719/1000000 [4:50:36<24:14:17, 10.25it/s]

{'loss': Array(0.14366245, dtype=float32), 'loss_cross_entropy': Array(0.13572378, dtype=float32)}


 11%|█         | 105729/1000000 [4:50:38<33:02:52,  7.52it/s]

{'loss': Array(0.14382382, dtype=float32), 'loss_cross_entropy': Array(0.13656776, dtype=float32)}


 11%|█         | 105739/1000000 [4:50:39<24:24:33, 10.18it/s]

{'loss': Array(0.15379293, dtype=float32), 'loss_cross_entropy': Array(0.14549744, dtype=float32)}


 11%|█         | 105749/1000000 [4:50:41<23:26:16, 10.60it/s]

{'loss': Array(0.14209753, dtype=float32), 'loss_cross_entropy': Array(0.13389185, dtype=float32)}


 11%|█         | 105759/1000000 [4:50:42<26:24:44,  9.40it/s]

{'loss': Array(0.14712816, dtype=float32), 'loss_cross_entropy': Array(0.13861433, dtype=float32)}


 11%|█         | 105769/1000000 [4:50:44<23:07:16, 10.74it/s]

{'loss': Array(0.1364012, dtype=float32), 'loss_cross_entropy': Array(0.12818602, dtype=float32)}


 11%|█         | 105779/1000000 [4:50:45<35:58:12,  6.91it/s]

{'loss': Array(0.1312868, dtype=float32), 'loss_cross_entropy': Array(0.12355208, dtype=float32)}


 11%|█         | 105789/1000000 [4:50:47<25:43:14,  9.66it/s]

{'loss': Array(0.15067196, dtype=float32), 'loss_cross_entropy': Array(0.142013, dtype=float32)}


 11%|█         | 105799/1000000 [4:50:48<23:35:21, 10.53it/s]

{'loss': Array(0.14217101, dtype=float32), 'loss_cross_entropy': Array(0.13416241, dtype=float32)}


 11%|█         | 105809/1000000 [4:50:50<29:26:54,  8.43it/s]

{'loss': Array(0.1581859, dtype=float32), 'loss_cross_entropy': Array(0.1489913, dtype=float32)}


 11%|█         | 105819/1000000 [4:50:51<24:34:39, 10.11it/s]

{'loss': Array(0.13749467, dtype=float32), 'loss_cross_entropy': Array(0.12958711, dtype=float32)}


 11%|█         | 105829/1000000 [4:50:53<33:46:39,  7.35it/s]

{'loss': Array(0.14667973, dtype=float32), 'loss_cross_entropy': Array(0.13864043, dtype=float32)}


 11%|█         | 105839/1000000 [4:50:54<24:47:03, 10.02it/s]

{'loss': Array(0.15188144, dtype=float32), 'loss_cross_entropy': Array(0.14395492, dtype=float32)}


 11%|█         | 105849/1000000 [4:50:56<32:37:17,  7.61it/s]

{'loss': Array(0.14729309, dtype=float32), 'loss_cross_entropy': Array(0.13915367, dtype=float32)}


 11%|█         | 105859/1000000 [4:50:58<25:06:05,  9.89it/s]

{'loss': Array(0.15412064, dtype=float32), 'loss_cross_entropy': Array(0.14580004, dtype=float32)}


 11%|█         | 105869/1000000 [4:50:59<23:36:37, 10.52it/s]

{'loss': Array(0.13680626, dtype=float32), 'loss_cross_entropy': Array(0.12858751, dtype=float32)}


 11%|█         | 105879/1000000 [4:51:01<28:42:49,  8.65it/s]

{'loss': Array(0.14737962, dtype=float32), 'loss_cross_entropy': Array(0.13998064, dtype=float32)}


 11%|█         | 105889/1000000 [4:51:02<24:43:24, 10.05it/s]

{'loss': Array(0.13247636, dtype=float32), 'loss_cross_entropy': Array(0.12487584, dtype=float32)}


 11%|█         | 105899/1000000 [4:51:04<30:22:55,  8.17it/s]

{'loss': Array(0.12469771, dtype=float32), 'loss_cross_entropy': Array(0.11741602, dtype=float32)}


 11%|█         | 105909/1000000 [4:51:05<24:53:27,  9.98it/s]

{'loss': Array(0.13739653, dtype=float32), 'loss_cross_entropy': Array(0.12938617, dtype=float32)}


 11%|█         | 105919/1000000 [4:51:07<37:26:12,  6.63it/s]

{'loss': Array(0.14655167, dtype=float32), 'loss_cross_entropy': Array(0.13883297, dtype=float32)}


 11%|█         | 105929/1000000 [4:51:08<25:34:21,  9.71it/s]

{'loss': Array(0.13707393, dtype=float32), 'loss_cross_entropy': Array(0.129619, dtype=float32)}


 11%|█         | 105939/1000000 [4:51:10<37:54:18,  6.55it/s]

{'loss': Array(0.13854153, dtype=float32), 'loss_cross_entropy': Array(0.13073976, dtype=float32)}


 11%|█         | 105949/1000000 [4:51:12<26:59:14,  9.20it/s]

{'loss': Array(0.15114428, dtype=float32), 'loss_cross_entropy': Array(0.14314991, dtype=float32)}


 11%|█         | 105959/1000000 [4:51:13<25:25:47,  9.77it/s]

{'loss': Array(0.13793162, dtype=float32), 'loss_cross_entropy': Array(0.13032769, dtype=float32)}


 11%|█         | 105969/1000000 [4:51:15<35:44:16,  6.95it/s]

{'loss': Array(0.14411573, dtype=float32), 'loss_cross_entropy': Array(0.1362812, dtype=float32)}


 11%|█         | 105979/1000000 [4:51:16<26:24:14,  9.41it/s]

{'loss': Array(0.14980811, dtype=float32), 'loss_cross_entropy': Array(0.14173554, dtype=float32)}


 11%|█         | 105989/1000000 [4:51:18<37:43:46,  6.58it/s]

{'loss': Array(0.13238089, dtype=float32), 'loss_cross_entropy': Array(0.12548243, dtype=float32)}


 11%|█         | 105999/1000000 [4:51:20<25:51:46,  9.60it/s]

{'loss': Array(0.14839984, dtype=float32), 'loss_cross_entropy': Array(0.14094411, dtype=float32)}


 11%|█         | 106009/1000000 [4:51:27<80:38:26,  3.08it/s] 

{'loss': Array(0.13964988, dtype=float32), 'loss_cross_entropy': Array(0.13148135, dtype=float32)}


 11%|█         | 106019/1000000 [4:51:29<35:56:51,  6.91it/s] 

{'loss': Array(0.1431822, dtype=float32), 'loss_cross_entropy': Array(0.13517554, dtype=float32)}


 11%|█         | 106029/1000000 [4:51:30<25:48:23,  9.62it/s]

{'loss': Array(0.15737166, dtype=float32), 'loss_cross_entropy': Array(0.14854692, dtype=float32)}


 11%|█         | 106039/1000000 [4:51:32<26:51:14,  9.25it/s]

{'loss': Array(0.12794486, dtype=float32), 'loss_cross_entropy': Array(0.12025698, dtype=float32)}


 11%|█         | 106049/1000000 [4:51:34<23:57:27, 10.36it/s]

{'loss': Array(0.1332977, dtype=float32), 'loss_cross_entropy': Array(0.12516338, dtype=float32)}


 11%|█         | 106059/1000000 [4:51:35<35:59:11,  6.90it/s]

{'loss': Array(0.13573046, dtype=float32), 'loss_cross_entropy': Array(0.12751107, dtype=float32)}


 11%|█         | 106069/1000000 [4:51:37<25:02:18,  9.92it/s]

{'loss': Array(0.13866945, dtype=float32), 'loss_cross_entropy': Array(0.13077827, dtype=float32)}


 11%|█         | 106079/1000000 [4:51:38<23:00:53, 10.79it/s]

{'loss': Array(0.14106262, dtype=float32), 'loss_cross_entropy': Array(0.13353454, dtype=float32)}


 11%|█         | 106089/1000000 [4:51:40<29:28:32,  8.42it/s]

{'loss': Array(0.15393496, dtype=float32), 'loss_cross_entropy': Array(0.14602636, dtype=float32)}


 11%|█         | 106099/1000000 [4:51:41<23:59:30, 10.35it/s]

{'loss': Array(0.1424785, dtype=float32), 'loss_cross_entropy': Array(0.13484451, dtype=float32)}


 11%|█         | 106109/1000000 [4:51:43<37:03:11,  6.70it/s]

{'loss': Array(0.13595693, dtype=float32), 'loss_cross_entropy': Array(0.12771702, dtype=float32)}


 11%|█         | 106119/1000000 [4:51:44<25:31:04,  9.73it/s]

{'loss': Array(0.138858, dtype=float32), 'loss_cross_entropy': Array(0.1310767, dtype=float32)}


 11%|█         | 106129/1000000 [4:51:45<23:33:36, 10.54it/s]

{'loss': Array(0.15609308, dtype=float32), 'loss_cross_entropy': Array(0.14712468, dtype=float32)}


 11%|█         | 106139/1000000 [4:51:47<28:13:15,  8.80it/s]

{'loss': Array(0.14242874, dtype=float32), 'loss_cross_entropy': Array(0.1344794, dtype=float32)}


 11%|█         | 106149/1000000 [4:51:49<24:08:38, 10.28it/s]

{'loss': Array(0.14138387, dtype=float32), 'loss_cross_entropy': Array(0.13362925, dtype=float32)}


 11%|█         | 106159/1000000 [4:51:50<31:58:20,  7.77it/s]

{'loss': Array(0.14858998, dtype=float32), 'loss_cross_entropy': Array(0.14063802, dtype=float32)}


 11%|█         | 106169/1000000 [4:51:52<24:32:06, 10.12it/s]

{'loss': Array(0.14076357, dtype=float32), 'loss_cross_entropy': Array(0.13220125, dtype=float32)}


 11%|█         | 106179/1000000 [4:51:53<23:16:19, 10.67it/s]

{'loss': Array(0.14525495, dtype=float32), 'loss_cross_entropy': Array(0.13769719, dtype=float32)}


 11%|█         | 106189/1000000 [4:51:55<29:35:42,  8.39it/s]

{'loss': Array(0.15058973, dtype=float32), 'loss_cross_entropy': Array(0.1417449, dtype=float32)}


 11%|█         | 106199/1000000 [4:51:56<23:51:48, 10.40it/s]

{'loss': Array(0.13018565, dtype=float32), 'loss_cross_entropy': Array(0.12294989, dtype=float32)}


 11%|█         | 106209/1000000 [4:51:57<24:29:28, 10.14it/s]

{'loss': Array(0.14134371, dtype=float32), 'loss_cross_entropy': Array(0.1336463, dtype=float32)}


 11%|█         | 106219/1000000 [4:51:59<28:39:23,  8.66it/s]

{'loss': Array(0.16789882, dtype=float32), 'loss_cross_entropy': Array(0.15835012, dtype=float32)}


 11%|█         | 106229/1000000 [4:52:01<25:34:21,  9.71it/s]

{'loss': Array(0.14222997, dtype=float32), 'loss_cross_entropy': Array(0.13481325, dtype=float32)}


 11%|█         | 106239/1000000 [4:52:03<31:42:59,  7.83it/s]

{'loss': Array(0.14795084, dtype=float32), 'loss_cross_entropy': Array(0.13990389, dtype=float32)}


 11%|█         | 106249/1000000 [4:52:04<25:05:27,  9.89it/s]

{'loss': Array(0.14056244, dtype=float32), 'loss_cross_entropy': Array(0.13296638, dtype=float32)}


 11%|█         | 106259/1000000 [4:52:06<38:13:05,  6.50it/s]

{'loss': Array(0.14067186, dtype=float32), 'loss_cross_entropy': Array(0.13295786, dtype=float32)}


 11%|█         | 106269/1000000 [4:52:07<26:37:38,  9.32it/s]

{'loss': Array(0.12407248, dtype=float32), 'loss_cross_entropy': Array(0.11622985, dtype=float32)}


 11%|█         | 106279/1000000 [4:52:09<37:25:09,  6.63it/s]

{'loss': Array(0.14426063, dtype=float32), 'loss_cross_entropy': Array(0.1362819, dtype=float32)}


 11%|█         | 106289/1000000 [4:52:10<26:12:18,  9.47it/s]

{'loss': Array(0.12840764, dtype=float32), 'loss_cross_entropy': Array(0.1208412, dtype=float32)}


 11%|█         | 106299/1000000 [4:52:12<24:32:33, 10.12it/s]

{'loss': Array(0.14693277, dtype=float32), 'loss_cross_entropy': Array(0.13822322, dtype=float32)}


 11%|█         | 106309/1000000 [4:52:13<32:37:04,  7.61it/s]

{'loss': Array(0.15654083, dtype=float32), 'loss_cross_entropy': Array(0.14831041, dtype=float32)}


 11%|█         | 106319/1000000 [4:52:15<24:11:15, 10.26it/s]

{'loss': Array(0.15351227, dtype=float32), 'loss_cross_entropy': Array(0.14532101, dtype=float32)}


 11%|█         | 106329/1000000 [4:52:16<36:21:45,  6.83it/s]

{'loss': Array(0.13296251, dtype=float32), 'loss_cross_entropy': Array(0.12477183, dtype=float32)}


 11%|█         | 106339/1000000 [4:52:18<24:50:54,  9.99it/s]

{'loss': Array(0.13341929, dtype=float32), 'loss_cross_entropy': Array(0.12561965, dtype=float32)}


 11%|█         | 106349/1000000 [4:52:19<24:03:10, 10.32it/s]

{'loss': Array(0.14641887, dtype=float32), 'loss_cross_entropy': Array(0.13842706, dtype=float32)}


 11%|█         | 106359/1000000 [4:52:21<26:15:02,  9.46it/s]

{'loss': Array(0.14399177, dtype=float32), 'loss_cross_entropy': Array(0.1356051, dtype=float32)}


 11%|█         | 106369/1000000 [4:52:22<23:21:00, 10.63it/s]

{'loss': Array(0.16829613, dtype=float32), 'loss_cross_entropy': Array(0.15893039, dtype=float32)}


 11%|█         | 106379/1000000 [4:52:24<25:57:31,  9.56it/s]

{'loss': Array(0.13251036, dtype=float32), 'loss_cross_entropy': Array(0.12491629, dtype=float32)}


 11%|█         | 106389/1000000 [4:52:25<23:07:46, 10.73it/s]

{'loss': Array(0.13869785, dtype=float32), 'loss_cross_entropy': Array(0.13061263, dtype=float32)}


 11%|█         | 106399/1000000 [4:52:27<36:07:02,  6.87it/s]

{'loss': Array(0.14398327, dtype=float32), 'loss_cross_entropy': Array(0.13578203, dtype=float32)}


 11%|█         | 106409/1000000 [4:52:28<25:13:37,  9.84it/s]

{'loss': Array(0.15531915, dtype=float32), 'loss_cross_entropy': Array(0.14696439, dtype=float32)}


 11%|█         | 106419/1000000 [4:52:30<23:12:04, 10.70it/s]

{'loss': Array(0.13977425, dtype=float32), 'loss_cross_entropy': Array(0.13168673, dtype=float32)}


 11%|█         | 106429/1000000 [4:52:31<29:51:37,  8.31it/s]

{'loss': Array(0.14449179, dtype=float32), 'loss_cross_entropy': Array(0.13623877, dtype=float32)}


 11%|█         | 106439/1000000 [4:52:33<23:34:48, 10.53it/s]

{'loss': Array(0.13677251, dtype=float32), 'loss_cross_entropy': Array(0.12882134, dtype=float32)}


 11%|█         | 106449/1000000 [4:52:34<36:31:52,  6.79it/s]

{'loss': Array(0.12497616, dtype=float32), 'loss_cross_entropy': Array(0.1181648, dtype=float32)}


 11%|█         | 106459/1000000 [4:52:36<25:01:47,  9.92it/s]

{'loss': Array(0.13425134, dtype=float32), 'loss_cross_entropy': Array(0.12688717, dtype=float32)}


 11%|█         | 106469/1000000 [4:52:37<23:19:32, 10.64it/s]

{'loss': Array(0.15525556, dtype=float32), 'loss_cross_entropy': Array(0.14658666, dtype=float32)}


 11%|█         | 106479/1000000 [4:52:39<27:33:14,  9.01it/s]

{'loss': Array(0.16296545, dtype=float32), 'loss_cross_entropy': Array(0.15408926, dtype=float32)}


 11%|█         | 106489/1000000 [4:52:40<23:27:01, 10.58it/s]

{'loss': Array(0.13590038, dtype=float32), 'loss_cross_entropy': Array(0.12833457, dtype=float32)}


 11%|█         | 106499/1000000 [4:52:42<32:09:49,  7.72it/s]

{'loss': Array(0.13945392, dtype=float32), 'loss_cross_entropy': Array(0.13137482, dtype=float32)}


 11%|█         | 106509/1000000 [4:52:50<81:30:48,  3.04it/s] 

{'loss': Array(0.12229516, dtype=float32), 'loss_cross_entropy': Array(0.11501216, dtype=float32)}


 11%|█         | 106519/1000000 [4:52:51<42:39:42,  5.82it/s]

{'loss': Array(0.15056089, dtype=float32), 'loss_cross_entropy': Array(0.14154235, dtype=float32)}


 11%|█         | 106529/1000000 [4:52:53<25:51:49,  9.60it/s]

{'loss': Array(0.14894894, dtype=float32), 'loss_cross_entropy': Array(0.14089118, dtype=float32)}


 11%|█         | 106539/1000000 [4:52:54<23:29:51, 10.56it/s]

{'loss': Array(0.1466466, dtype=float32), 'loss_cross_entropy': Array(0.13888258, dtype=float32)}


 11%|█         | 106549/1000000 [4:52:56<30:12:45,  8.21it/s]

{'loss': Array(0.14452778, dtype=float32), 'loss_cross_entropy': Array(0.13626377, dtype=float32)}


 11%|█         | 106559/1000000 [4:52:57<24:27:43, 10.15it/s]

{'loss': Array(0.14717981, dtype=float32), 'loss_cross_entropy': Array(0.13938218, dtype=float32)}


 11%|█         | 106569/1000000 [4:52:59<23:33:41, 10.53it/s]

{'loss': Array(0.15424214, dtype=float32), 'loss_cross_entropy': Array(0.14605345, dtype=float32)}


 11%|█         | 106579/1000000 [4:53:00<27:05:57,  9.16it/s]

{'loss': Array(0.12166169, dtype=float32), 'loss_cross_entropy': Array(0.11451133, dtype=float32)}


 11%|█         | 106589/1000000 [4:53:02<25:03:06,  9.91it/s]

{'loss': Array(0.15016124, dtype=float32), 'loss_cross_entropy': Array(0.14222889, dtype=float32)}


 11%|█         | 106599/1000000 [4:53:04<30:53:28,  8.03it/s]

{'loss': Array(0.15066549, dtype=float32), 'loss_cross_entropy': Array(0.14304827, dtype=float32)}


 11%|█         | 106609/1000000 [4:53:05<25:42:18,  9.65it/s]

{'loss': Array(0.15031986, dtype=float32), 'loss_cross_entropy': Array(0.14233373, dtype=float32)}


 11%|█         | 106619/1000000 [4:53:07<34:15:01,  7.25it/s]

{'loss': Array(0.14619453, dtype=float32), 'loss_cross_entropy': Array(0.13797192, dtype=float32)}


 11%|█         | 106629/1000000 [4:53:08<25:55:52,  9.57it/s]

{'loss': Array(0.1578454, dtype=float32), 'loss_cross_entropy': Array(0.14864, dtype=float32)}


 11%|█         | 106639/1000000 [4:53:10<24:38:40, 10.07it/s]

{'loss': Array(0.14105797, dtype=float32), 'loss_cross_entropy': Array(0.13292411, dtype=float32)}


 11%|█         | 106649/1000000 [4:53:11<31:18:04,  7.93it/s]

{'loss': Array(0.1504343, dtype=float32), 'loss_cross_entropy': Array(0.14209709, dtype=float32)}


 11%|█         | 106659/1000000 [4:53:13<24:27:13, 10.15it/s]

{'loss': Array(0.14248173, dtype=float32), 'loss_cross_entropy': Array(0.13437532, dtype=float32)}


 11%|█         | 106669/1000000 [4:53:14<23:09:22, 10.72it/s]

{'loss': Array(0.14973806, dtype=float32), 'loss_cross_entropy': Array(0.14137162, dtype=float32)}


 11%|█         | 106679/1000000 [4:53:16<26:31:17,  9.36it/s]

{'loss': Array(0.1516639, dtype=float32), 'loss_cross_entropy': Array(0.14309874, dtype=float32)}


 11%|█         | 106689/1000000 [4:53:17<23:29:33, 10.56it/s]

{'loss': Array(0.1462389, dtype=float32), 'loss_cross_entropy': Array(0.13828592, dtype=float32)}


 11%|█         | 106699/1000000 [4:53:19<29:27:28,  8.42it/s]

{'loss': Array(0.13604914, dtype=float32), 'loss_cross_entropy': Array(0.12844221, dtype=float32)}


 11%|█         | 106709/1000000 [4:53:20<24:36:59, 10.08it/s]

{'loss': Array(0.12495656, dtype=float32), 'loss_cross_entropy': Array(0.11758902, dtype=float32)}


 11%|█         | 106719/1000000 [4:53:22<36:38:02,  6.77it/s]

{'loss': Array(0.14647406, dtype=float32), 'loss_cross_entropy': Array(0.13792917, dtype=float32)}


 11%|█         | 106729/1000000 [4:53:23<25:42:11,  9.65it/s]

{'loss': Array(0.1523615, dtype=float32), 'loss_cross_entropy': Array(0.14344554, dtype=float32)}


 11%|█         | 106739/1000000 [4:53:25<23:13:22, 10.68it/s]

{'loss': Array(0.15567224, dtype=float32), 'loss_cross_entropy': Array(0.1467254, dtype=float32)}


 11%|█         | 106749/1000000 [4:53:27<32:54:35,  7.54it/s]

{'loss': Array(0.13659213, dtype=float32), 'loss_cross_entropy': Array(0.12905587, dtype=float32)}


 11%|█         | 106759/1000000 [4:53:28<24:25:06, 10.16it/s]

{'loss': Array(0.13082384, dtype=float32), 'loss_cross_entropy': Array(0.12362913, dtype=float32)}


 11%|█         | 106769/1000000 [4:53:29<23:20:38, 10.63it/s]

{'loss': Array(0.1610109, dtype=float32), 'loss_cross_entropy': Array(0.15270661, dtype=float32)}


 11%|█         | 106779/1000000 [4:53:31<27:47:02,  8.93it/s]

{'loss': Array(0.13209939, dtype=float32), 'loss_cross_entropy': Array(0.12494534, dtype=float32)}


 11%|█         | 106789/1000000 [4:53:32<23:45:12, 10.45it/s]

{'loss': Array(0.14073275, dtype=float32), 'loss_cross_entropy': Array(0.1332886, dtype=float32)}


 11%|█         | 106799/1000000 [4:53:34<29:29:03,  8.42it/s]

{'loss': Array(0.13147642, dtype=float32), 'loss_cross_entropy': Array(0.12368388, dtype=float32)}


 11%|█         | 106809/1000000 [4:53:35<23:39:12, 10.49it/s]

{'loss': Array(0.14661644, dtype=float32), 'loss_cross_entropy': Array(0.13857506, dtype=float32)}


 11%|█         | 106819/1000000 [4:53:37<22:53:45, 10.84it/s]

{'loss': Array(0.14752357, dtype=float32), 'loss_cross_entropy': Array(0.13912885, dtype=float32)}


 11%|█         | 106829/1000000 [4:53:38<26:44:09,  9.28it/s]

{'loss': Array(0.12624013, dtype=float32), 'loss_cross_entropy': Array(0.11861167, dtype=float32)}


 11%|█         | 106839/1000000 [4:53:40<23:13:18, 10.68it/s]

{'loss': Array(0.13817208, dtype=float32), 'loss_cross_entropy': Array(0.12973236, dtype=float32)}


 11%|█         | 106849/1000000 [4:53:41<32:26:21,  7.65it/s]

{'loss': Array(0.15225415, dtype=float32), 'loss_cross_entropy': Array(0.1435465, dtype=float32)}


 11%|█         | 106859/1000000 [4:53:43<24:48:30, 10.00it/s]

{'loss': Array(0.17110403, dtype=float32), 'loss_cross_entropy': Array(0.16223, dtype=float32)}


 11%|█         | 106869/1000000 [4:53:44<22:47:34, 10.88it/s]

{'loss': Array(0.13643156, dtype=float32), 'loss_cross_entropy': Array(0.12871242, dtype=float32)}


 11%|█         | 106879/1000000 [4:53:46<26:05:16,  9.51it/s]

{'loss': Array(0.14147937, dtype=float32), 'loss_cross_entropy': Array(0.13327232, dtype=float32)}


 11%|█         | 106889/1000000 [4:53:47<23:42:52, 10.46it/s]

{'loss': Array(0.14726858, dtype=float32), 'loss_cross_entropy': Array(0.13918737, dtype=float32)}


 11%|█         | 106899/1000000 [4:53:49<27:37:01,  8.98it/s]

{'loss': Array(0.14608929, dtype=float32), 'loss_cross_entropy': Array(0.137831, dtype=float32)}


 11%|█         | 106909/1000000 [4:53:50<23:37:37, 10.50it/s]

{'loss': Array(0.14254765, dtype=float32), 'loss_cross_entropy': Array(0.13416052, dtype=float32)}


 11%|█         | 106919/1000000 [4:53:52<36:45:07,  6.75it/s]

{'loss': Array(0.11879297, dtype=float32), 'loss_cross_entropy': Array(0.1114211, dtype=float32)}


 11%|█         | 106929/1000000 [4:53:53<25:37:15,  9.68it/s]

{'loss': Array(0.16163827, dtype=float32), 'loss_cross_entropy': Array(0.15289024, dtype=float32)}


 11%|█         | 106939/1000000 [4:53:55<23:53:12, 10.39it/s]

{'loss': Array(0.1457614, dtype=float32), 'loss_cross_entropy': Array(0.13760738, dtype=float32)}


 11%|█         | 106949/1000000 [4:53:57<29:39:56,  8.36it/s]

{'loss': Array(0.1435399, dtype=float32), 'loss_cross_entropy': Array(0.13534434, dtype=float32)}


 11%|█         | 106959/1000000 [4:53:58<23:46:16, 10.44it/s]

{'loss': Array(0.14867394, dtype=float32), 'loss_cross_entropy': Array(0.14033818, dtype=float32)}


 11%|█         | 106969/1000000 [4:54:00<33:41:42,  7.36it/s]

{'loss': Array(0.14998104, dtype=float32), 'loss_cross_entropy': Array(0.14118002, dtype=float32)}


 11%|█         | 106979/1000000 [4:54:01<25:48:10,  9.61it/s]

{'loss': Array(0.16315249, dtype=float32), 'loss_cross_entropy': Array(0.15435944, dtype=float32)}


 11%|█         | 106989/1000000 [4:54:03<33:56:00,  7.31it/s]

{'loss': Array(0.11712398, dtype=float32), 'loss_cross_entropy': Array(0.11010601, dtype=float32)}


 11%|█         | 106999/1000000 [4:54:04<25:33:25,  9.71it/s]

{'loss': Array(0.14274974, dtype=float32), 'loss_cross_entropy': Array(0.13427523, dtype=float32)}


 11%|█         | 107009/1000000 [4:54:12<83:00:27,  2.99it/s] 

{'loss': Array(0.13454147, dtype=float32), 'loss_cross_entropy': Array(0.1269387, dtype=float32)}


 11%|█         | 107019/1000000 [4:54:14<37:29:26,  6.62it/s]

{'loss': Array(0.14926672, dtype=float32), 'loss_cross_entropy': Array(0.14144382, dtype=float32)}


 11%|█         | 107029/1000000 [4:54:15<25:20:16,  9.79it/s]

{'loss': Array(0.13787882, dtype=float32), 'loss_cross_entropy': Array(0.12992415, dtype=float32)}


 11%|█         | 107039/1000000 [4:54:17<33:14:00,  7.46it/s]

{'loss': Array(0.14919476, dtype=float32), 'loss_cross_entropy': Array(0.1419214, dtype=float32)}


 11%|█         | 107049/1000000 [4:54:18<25:37:10,  9.68it/s]

{'loss': Array(0.16607042, dtype=float32), 'loss_cross_entropy': Array(0.15718013, dtype=float32)}


 11%|█         | 107059/1000000 [4:54:20<37:01:57,  6.70it/s]

{'loss': Array(0.1628945, dtype=float32), 'loss_cross_entropy': Array(0.15425806, dtype=float32)}


 11%|█         | 107069/1000000 [4:54:22<25:19:33,  9.79it/s]

{'loss': Array(0.13905185, dtype=float32), 'loss_cross_entropy': Array(0.13128768, dtype=float32)}


 11%|█         | 107079/1000000 [4:54:23<23:18:36, 10.64it/s]

{'loss': Array(0.15731509, dtype=float32), 'loss_cross_entropy': Array(0.14912422, dtype=float32)}


 11%|█         | 107089/1000000 [4:54:25<29:35:33,  8.38it/s]

{'loss': Array(0.14901839, dtype=float32), 'loss_cross_entropy': Array(0.1408527, dtype=float32)}


 11%|█         | 107099/1000000 [4:54:26<24:10:02, 10.26it/s]

{'loss': Array(0.1522846, dtype=float32), 'loss_cross_entropy': Array(0.14381817, dtype=float32)}


 11%|█         | 107109/1000000 [4:54:28<32:54:23,  7.54it/s]

{'loss': Array(0.14171202, dtype=float32), 'loss_cross_entropy': Array(0.13340549, dtype=float32)}


 11%|█         | 107119/1000000 [4:54:29<24:17:25, 10.21it/s]

{'loss': Array(0.15740006, dtype=float32), 'loss_cross_entropy': Array(0.14863676, dtype=float32)}


 11%|█         | 107129/1000000 [4:54:30<23:09:42, 10.71it/s]

{'loss': Array(0.16024712, dtype=float32), 'loss_cross_entropy': Array(0.15180336, dtype=float32)}


 11%|█         | 107139/1000000 [4:54:32<26:21:44,  9.41it/s]

{'loss': Array(0.13985673, dtype=float32), 'loss_cross_entropy': Array(0.13165484, dtype=float32)}


 11%|█         | 107149/1000000 [4:54:34<23:47:57, 10.42it/s]

{'loss': Array(0.13630866, dtype=float32), 'loss_cross_entropy': Array(0.12838775, dtype=float32)}


 11%|█         | 107159/1000000 [4:54:35<36:37:43,  6.77it/s]

{'loss': Array(0.1421152, dtype=float32), 'loss_cross_entropy': Array(0.13371007, dtype=float32)}


 11%|█         | 107169/1000000 [4:54:37<25:21:12,  9.78it/s]

{'loss': Array(0.14625733, dtype=float32), 'loss_cross_entropy': Array(0.13800536, dtype=float32)}


 11%|█         | 107179/1000000 [4:54:38<23:09:44, 10.71it/s]

{'loss': Array(0.13277966, dtype=float32), 'loss_cross_entropy': Array(0.12508427, dtype=float32)}


 11%|█         | 107189/1000000 [4:54:40<29:42:21,  8.35it/s]

{'loss': Array(0.14664535, dtype=float32), 'loss_cross_entropy': Array(0.13890234, dtype=float32)}


 11%|█         | 107199/1000000 [4:54:41<24:01:02, 10.33it/s]

{'loss': Array(0.14405708, dtype=float32), 'loss_cross_entropy': Array(0.13597627, dtype=float32)}


 11%|█         | 107209/1000000 [4:54:43<32:40:25,  7.59it/s]

{'loss': Array(0.149859, dtype=float32), 'loss_cross_entropy': Array(0.14165299, dtype=float32)}


 11%|█         | 107219/1000000 [4:54:44<24:27:25, 10.14it/s]

{'loss': Array(0.15260029, dtype=float32), 'loss_cross_entropy': Array(0.14410959, dtype=float32)}


 11%|█         | 107229/1000000 [4:54:46<32:57:40,  7.52it/s]

{'loss': Array(0.15177725, dtype=float32), 'loss_cross_entropy': Array(0.14401393, dtype=float32)}


 11%|█         | 107239/1000000 [4:54:47<25:13:04,  9.83it/s]

{'loss': Array(0.13637044, dtype=float32), 'loss_cross_entropy': Array(0.12818035, dtype=float32)}


 11%|█         | 107249/1000000 [4:54:49<24:03:26, 10.31it/s]

{'loss': Array(0.14006975, dtype=float32), 'loss_cross_entropy': Array(0.1321122, dtype=float32)}


 11%|█         | 107259/1000000 [4:54:50<27:38:57,  8.97it/s]

{'loss': Array(0.13141549, dtype=float32), 'loss_cross_entropy': Array(0.12393063, dtype=float32)}


 11%|█         | 107269/1000000 [4:54:52<23:44:57, 10.44it/s]

{'loss': Array(0.13816155, dtype=float32), 'loss_cross_entropy': Array(0.1304703, dtype=float32)}


 11%|█         | 107279/1000000 [4:54:54<29:48:12,  8.32it/s]

{'loss': Array(0.13746846, dtype=float32), 'loss_cross_entropy': Array(0.129822, dtype=float32)}


 11%|█         | 107289/1000000 [4:54:55<24:26:41, 10.14it/s]

{'loss': Array(0.14120789, dtype=float32), 'loss_cross_entropy': Array(0.13330077, dtype=float32)}


 11%|█         | 107299/1000000 [4:54:57<36:31:23,  6.79it/s]

{'loss': Array(0.13273822, dtype=float32), 'loss_cross_entropy': Array(0.12475499, dtype=float32)}


 11%|█         | 107309/1000000 [4:54:58<25:28:45,  9.73it/s]

{'loss': Array(0.15159945, dtype=float32), 'loss_cross_entropy': Array(0.14308634, dtype=float32)}


 11%|█         | 107319/1000000 [4:55:00<38:41:05,  6.41it/s]

{'loss': Array(0.132697, dtype=float32), 'loss_cross_entropy': Array(0.12533604, dtype=float32)}


 11%|█         | 107329/1000000 [4:55:01<26:42:44,  9.28it/s]

{'loss': Array(0.12071115, dtype=float32), 'loss_cross_entropy': Array(0.1133193, dtype=float32)}


 11%|█         | 107339/1000000 [4:55:03<25:27:05,  9.74it/s]

{'loss': Array(0.14386515, dtype=float32), 'loss_cross_entropy': Array(0.13582623, dtype=float32)}


 11%|█         | 107349/1000000 [4:55:05<34:31:12,  7.18it/s]

{'loss': Array(0.13480788, dtype=float32), 'loss_cross_entropy': Array(0.12676243, dtype=float32)}


 11%|█         | 107359/1000000 [4:55:06<26:35:19,  9.33it/s]

{'loss': Array(0.13364449, dtype=float32), 'loss_cross_entropy': Array(0.12606393, dtype=float32)}


 11%|█         | 107369/1000000 [4:55:08<38:28:30,  6.44it/s]

{'loss': Array(0.14468217, dtype=float32), 'loss_cross_entropy': Array(0.13631083, dtype=float32)}


 11%|█         | 107379/1000000 [4:55:09<26:35:40,  9.32it/s]

{'loss': Array(0.12816225, dtype=float32), 'loss_cross_entropy': Array(0.12113905, dtype=float32)}


 11%|█         | 107389/1000000 [4:55:11<25:09:12,  9.86it/s]

{'loss': Array(0.15054445, dtype=float32), 'loss_cross_entropy': Array(0.1423437, dtype=float32)}


 11%|█         | 107399/1000000 [4:55:12<26:51:14,  9.23it/s]

{'loss': Array(0.15761924, dtype=float32), 'loss_cross_entropy': Array(0.14906172, dtype=float32)}


 11%|█         | 107409/1000000 [4:55:14<23:47:14, 10.42it/s]

{'loss': Array(0.14012675, dtype=float32), 'loss_cross_entropy': Array(0.1321489, dtype=float32)}


 11%|█         | 107419/1000000 [4:55:16<26:24:26,  9.39it/s]

{'loss': Array(0.14671047, dtype=float32), 'loss_cross_entropy': Array(0.13831282, dtype=float32)}


 11%|█         | 107429/1000000 [4:55:17<23:57:05, 10.35it/s]

{'loss': Array(0.15378456, dtype=float32), 'loss_cross_entropy': Array(0.14498065, dtype=float32)}


 11%|█         | 107439/1000000 [4:55:19<36:11:06,  6.85it/s]

{'loss': Array(0.13816804, dtype=float32), 'loss_cross_entropy': Array(0.13064139, dtype=float32)}


 11%|█         | 107449/1000000 [4:55:20<24:49:22,  9.99it/s]

{'loss': Array(0.131703, dtype=float32), 'loss_cross_entropy': Array(0.1239198, dtype=float32)}


 11%|█         | 107459/1000000 [4:55:21<23:24:51, 10.59it/s]

{'loss': Array(0.15584524, dtype=float32), 'loss_cross_entropy': Array(0.14771394, dtype=float32)}


 11%|█         | 107469/1000000 [4:55:23<29:18:10,  8.46it/s]

{'loss': Array(0.12530375, dtype=float32), 'loss_cross_entropy': Array(0.11856854, dtype=float32)}


 11%|█         | 107479/1000000 [4:55:24<24:07:45, 10.27it/s]

{'loss': Array(0.15580247, dtype=float32), 'loss_cross_entropy': Array(0.14709733, dtype=float32)}


 11%|█         | 107489/1000000 [4:55:26<36:18:37,  6.83it/s]

{'loss': Array(0.13184671, dtype=float32), 'loss_cross_entropy': Array(0.12424871, dtype=float32)}


 11%|█         | 107499/1000000 [4:55:27<25:40:02,  9.66it/s]

{'loss': Array(0.13572402, dtype=float32), 'loss_cross_entropy': Array(0.12855563, dtype=float32)}


 11%|█         | 107509/1000000 [4:55:35<80:03:53,  3.10it/s] 

{'loss': Array(0.11564807, dtype=float32), 'loss_cross_entropy': Array(0.10917468, dtype=float32)}


 11%|█         | 107519/1000000 [4:55:37<37:41:10,  6.58it/s]

{'loss': Array(0.14928359, dtype=float32), 'loss_cross_entropy': Array(0.14058422, dtype=float32)}


 11%|█         | 107529/1000000 [4:55:38<25:29:42,  9.72it/s]

{'loss': Array(0.14518951, dtype=float32), 'loss_cross_entropy': Array(0.13726875, dtype=float32)}


 11%|█         | 107539/1000000 [4:55:40<29:39:10,  8.36it/s]

{'loss': Array(0.13156505, dtype=float32), 'loss_cross_entropy': Array(0.12431794, dtype=float32)}


 11%|█         | 107549/1000000 [4:55:41<23:51:24, 10.39it/s]

{'loss': Array(0.13947357, dtype=float32), 'loss_cross_entropy': Array(0.13068525, dtype=float32)}


 11%|█         | 107559/1000000 [4:55:43<37:07:13,  6.68it/s]

{'loss': Array(0.1412302, dtype=float32), 'loss_cross_entropy': Array(0.13339184, dtype=float32)}


 11%|█         | 107569/1000000 [4:55:44<26:19:11,  9.42it/s]

{'loss': Array(0.14383946, dtype=float32), 'loss_cross_entropy': Array(0.13514833, dtype=float32)}


 11%|█         | 107579/1000000 [4:55:46<23:20:25, 10.62it/s]

{'loss': Array(0.14909287, dtype=float32), 'loss_cross_entropy': Array(0.14119364, dtype=float32)}


 11%|█         | 107589/1000000 [4:55:48<25:55:24,  9.56it/s]

{'loss': Array(0.13769895, dtype=float32), 'loss_cross_entropy': Array(0.1296875, dtype=float32)}


 11%|█         | 107599/1000000 [4:55:49<23:30:22, 10.55it/s]

{'loss': Array(0.1271815, dtype=float32), 'loss_cross_entropy': Array(0.11989234, dtype=float32)}


 11%|█         | 107609/1000000 [4:55:51<36:41:40,  6.76it/s]

{'loss': Array(0.17297086, dtype=float32), 'loss_cross_entropy': Array(0.1640685, dtype=float32)}


 11%|█         | 107619/1000000 [4:55:52<25:23:01,  9.77it/s]

{'loss': Array(0.13841176, dtype=float32), 'loss_cross_entropy': Array(0.13063805, dtype=float32)}


 11%|█         | 107629/1000000 [4:55:53<23:48:13, 10.41it/s]

{'loss': Array(0.13291226, dtype=float32), 'loss_cross_entropy': Array(0.12509885, dtype=float32)}


 11%|█         | 107639/1000000 [4:55:55<28:09:51,  8.80it/s]

{'loss': Array(0.14977051, dtype=float32), 'loss_cross_entropy': Array(0.14132066, dtype=float32)}


 11%|█         | 107649/1000000 [4:55:56<23:59:06, 10.33it/s]

{'loss': Array(0.14025258, dtype=float32), 'loss_cross_entropy': Array(0.13205586, dtype=float32)}


 11%|█         | 107659/1000000 [4:55:58<32:47:16,  7.56it/s]

{'loss': Array(0.14150456, dtype=float32), 'loss_cross_entropy': Array(0.13381815, dtype=float32)}


 11%|█         | 107669/1000000 [4:56:00<25:53:27,  9.57it/s]

{'loss': Array(0.1488897, dtype=float32), 'loss_cross_entropy': Array(0.14114071, dtype=float32)}


 11%|█         | 107679/1000000 [4:56:01<38:19:57,  6.47it/s]

{'loss': Array(0.15777472, dtype=float32), 'loss_cross_entropy': Array(0.14930214, dtype=float32)}


 11%|█         | 107689/1000000 [4:56:03<26:08:31,  9.48it/s]

{'loss': Array(0.14861973, dtype=float32), 'loss_cross_entropy': Array(0.14077947, dtype=float32)}


 11%|█         | 107699/1000000 [4:56:04<24:26:42, 10.14it/s]

{'loss': Array(0.16645658, dtype=float32), 'loss_cross_entropy': Array(0.15718262, dtype=float32)}


 11%|█         | 107709/1000000 [4:56:06<30:53:03,  8.03it/s]

{'loss': Array(0.14057916, dtype=float32), 'loss_cross_entropy': Array(0.13295625, dtype=float32)}


 11%|█         | 107719/1000000 [4:56:07<25:28:01,  9.73it/s]

{'loss': Array(0.13067035, dtype=float32), 'loss_cross_entropy': Array(0.122398, dtype=float32)}


 11%|█         | 107729/1000000 [4:56:09<34:06:43,  7.27it/s]

{'loss': Array(0.13410299, dtype=float32), 'loss_cross_entropy': Array(0.12657301, dtype=float32)}


 11%|█         | 107739/1000000 [4:56:11<26:01:05,  9.53it/s]

{'loss': Array(0.15263453, dtype=float32), 'loss_cross_entropy': Array(0.14431064, dtype=float32)}


 11%|█         | 107749/1000000 [4:56:12<23:51:26, 10.39it/s]

{'loss': Array(0.14370582, dtype=float32), 'loss_cross_entropy': Array(0.13508539, dtype=float32)}


 11%|█         | 107759/1000000 [4:56:14<26:21:23,  9.40it/s]

{'loss': Array(0.1397727, dtype=float32), 'loss_cross_entropy': Array(0.13143417, dtype=float32)}


 11%|█         | 107769/1000000 [4:56:15<23:41:53, 10.46it/s]

{'loss': Array(0.15454967, dtype=float32), 'loss_cross_entropy': Array(0.14651509, dtype=float32)}


 11%|█         | 107779/1000000 [4:56:17<36:36:49,  6.77it/s]

{'loss': Array(0.14655766, dtype=float32), 'loss_cross_entropy': Array(0.13882518, dtype=float32)}


 11%|█         | 107789/1000000 [4:56:18<24:58:18,  9.92it/s]

{'loss': Array(0.153385, dtype=float32), 'loss_cross_entropy': Array(0.145061, dtype=float32)}


 11%|█         | 107799/1000000 [4:56:20<23:45:14, 10.43it/s]

{'loss': Array(0.13870259, dtype=float32), 'loss_cross_entropy': Array(0.1310481, dtype=float32)}


 11%|█         | 107809/1000000 [4:56:21<29:57:35,  8.27it/s]

{'loss': Array(0.13665774, dtype=float32), 'loss_cross_entropy': Array(0.12870075, dtype=float32)}


 11%|█         | 107819/1000000 [4:56:23<23:52:34, 10.38it/s]

{'loss': Array(0.14667584, dtype=float32), 'loss_cross_entropy': Array(0.13738929, dtype=float32)}


 11%|█         | 107829/1000000 [4:56:24<32:32:08,  7.62it/s]

{'loss': Array(0.14029185, dtype=float32), 'loss_cross_entropy': Array(0.13253045, dtype=float32)}


 11%|█         | 107839/1000000 [4:56:26<24:20:58, 10.18it/s]

{'loss': Array(0.12391337, dtype=float32), 'loss_cross_entropy': Array(0.11648662, dtype=float32)}


 11%|█         | 107849/1000000 [4:56:27<33:06:29,  7.49it/s]

{'loss': Array(0.15325074, dtype=float32), 'loss_cross_entropy': Array(0.1447833, dtype=float32)}


 11%|█         | 107859/1000000 [4:56:29<24:55:48,  9.94it/s]

{'loss': Array(0.13010336, dtype=float32), 'loss_cross_entropy': Array(0.12228785, dtype=float32)}


 11%|█         | 107869/1000000 [4:56:30<23:10:44, 10.69it/s]

{'loss': Array(0.14963993, dtype=float32), 'loss_cross_entropy': Array(0.14149146, dtype=float32)}


 11%|█         | 107879/1000000 [4:56:32<27:14:58,  9.09it/s]

{'loss': Array(0.15201674, dtype=float32), 'loss_cross_entropy': Array(0.14339131, dtype=float32)}


 11%|█         | 107889/1000000 [4:56:33<23:26:15, 10.57it/s]

{'loss': Array(0.13359094, dtype=float32), 'loss_cross_entropy': Array(0.12661462, dtype=float32)}


 11%|█         | 107899/1000000 [4:56:35<29:38:16,  8.36it/s]

{'loss': Array(0.14369492, dtype=float32), 'loss_cross_entropy': Array(0.13542321, dtype=float32)}


 11%|█         | 107909/1000000 [4:56:36<24:28:34, 10.12it/s]

{'loss': Array(0.14125745, dtype=float32), 'loss_cross_entropy': Array(0.1330756, dtype=float32)}


 11%|█         | 107919/1000000 [4:56:38<37:17:34,  6.64it/s]

{'loss': Array(0.15307891, dtype=float32), 'loss_cross_entropy': Array(0.14445122, dtype=float32)}


 11%|█         | 107929/1000000 [4:56:39<25:15:46,  9.81it/s]

{'loss': Array(0.14339632, dtype=float32), 'loss_cross_entropy': Array(0.13504033, dtype=float32)}


 11%|█         | 107939/1000000 [4:56:41<36:21:15,  6.82it/s]

{'loss': Array(0.1377056, dtype=float32), 'loss_cross_entropy': Array(0.12980235, dtype=float32)}


 11%|█         | 107949/1000000 [4:56:42<25:09:39,  9.85it/s]

{'loss': Array(0.12728819, dtype=float32), 'loss_cross_entropy': Array(0.11995085, dtype=float32)}


 11%|█         | 107959/1000000 [4:56:44<22:55:17, 10.81it/s]

{'loss': Array(0.15750043, dtype=float32), 'loss_cross_entropy': Array(0.14831439, dtype=float32)}


 11%|█         | 107969/1000000 [4:56:46<32:16:35,  7.68it/s]

{'loss': Array(0.16018656, dtype=float32), 'loss_cross_entropy': Array(0.15209478, dtype=float32)}


 11%|█         | 107979/1000000 [4:56:47<24:24:05, 10.15it/s]

{'loss': Array(0.14978251, dtype=float32), 'loss_cross_entropy': Array(0.141524, dtype=float32)}


 11%|█         | 107989/1000000 [4:56:49<36:17:18,  6.83it/s]

{'loss': Array(0.13162749, dtype=float32), 'loss_cross_entropy': Array(0.12459699, dtype=float32)}


 11%|█         | 107999/1000000 [4:56:50<25:11:29,  9.84it/s]

{'loss': Array(0.13132301, dtype=float32), 'loss_cross_entropy': Array(0.12374252, dtype=float32)}


 11%|█         | 108009/1000000 [4:56:58<81:51:13,  3.03it/s] 

{'loss': Array(0.15043715, dtype=float32), 'loss_cross_entropy': Array(0.1421639, dtype=float32)}


 11%|█         | 108019/1000000 [4:57:00<36:53:34,  6.72it/s] 

{'loss': Array(0.12918654, dtype=float32), 'loss_cross_entropy': Array(0.12180965, dtype=float32)}


 11%|█         | 108029/1000000 [4:57:01<25:20:57,  9.77it/s]

{'loss': Array(0.13532591, dtype=float32), 'loss_cross_entropy': Array(0.12705445, dtype=float32)}


 11%|█         | 108039/1000000 [4:57:03<27:45:51,  8.92it/s]

{'loss': Array(0.16302933, dtype=float32), 'loss_cross_entropy': Array(0.15348527, dtype=float32)}


 11%|█         | 108049/1000000 [4:57:04<24:04:34, 10.29it/s]

{'loss': Array(0.14420207, dtype=float32), 'loss_cross_entropy': Array(0.13612682, dtype=float32)}


 11%|█         | 108059/1000000 [4:57:06<37:35:01,  6.59it/s]

{'loss': Array(0.13261285, dtype=float32), 'loss_cross_entropy': Array(0.12521577, dtype=float32)}


 11%|█         | 108069/1000000 [4:57:07<25:50:17,  9.59it/s]

{'loss': Array(0.154578, dtype=float32), 'loss_cross_entropy': Array(0.14629292, dtype=float32)}


 11%|█         | 108079/1000000 [4:57:09<24:28:45, 10.12it/s]

{'loss': Array(0.13686736, dtype=float32), 'loss_cross_entropy': Array(0.12925605, dtype=float32)}


 11%|█         | 108089/1000000 [4:57:10<30:59:28,  7.99it/s]

{'loss': Array(0.13960254, dtype=float32), 'loss_cross_entropy': Array(0.13148807, dtype=float32)}


 11%|█         | 108099/1000000 [4:57:12<25:23:54,  9.75it/s]

{'loss': Array(0.13672593, dtype=float32), 'loss_cross_entropy': Array(0.12883785, dtype=float32)}


 11%|█         | 108109/1000000 [4:57:14<37:33:10,  6.60it/s]

{'loss': Array(0.13497362, dtype=float32), 'loss_cross_entropy': Array(0.12723601, dtype=float32)}


 11%|█         | 108119/1000000 [4:57:15<25:20:04,  9.78it/s]

{'loss': Array(0.16066483, dtype=float32), 'loss_cross_entropy': Array(0.15158115, dtype=float32)}


 11%|█         | 108129/1000000 [4:57:16<23:50:27, 10.39it/s]

{'loss': Array(0.15296648, dtype=float32), 'loss_cross_entropy': Array(0.14475635, dtype=float32)}


 11%|█         | 108139/1000000 [4:57:18<27:36:05,  8.98it/s]

{'loss': Array(0.1501534, dtype=float32), 'loss_cross_entropy': Array(0.14182727, dtype=float32)}


 11%|█         | 108149/1000000 [4:57:19<23:42:44, 10.45it/s]

{'loss': Array(0.16164933, dtype=float32), 'loss_cross_entropy': Array(0.15284288, dtype=float32)}


 11%|█         | 108159/1000000 [4:57:21<32:50:57,  7.54it/s]

{'loss': Array(0.16255255, dtype=float32), 'loss_cross_entropy': Array(0.15379965, dtype=float32)}


 11%|█         | 108169/1000000 [4:57:23<25:04:12,  9.88it/s]

{'loss': Array(0.16007318, dtype=float32), 'loss_cross_entropy': Array(0.15200645, dtype=float32)}


 11%|█         | 108179/1000000 [4:57:24<23:17:03, 10.64it/s]

{'loss': Array(0.14604655, dtype=float32), 'loss_cross_entropy': Array(0.13842414, dtype=float32)}


 11%|█         | 108189/1000000 [4:57:26<29:34:01,  8.38it/s]

{'loss': Array(0.14742254, dtype=float32), 'loss_cross_entropy': Array(0.13880534, dtype=float32)}


 11%|█         | 108199/1000000 [4:57:27<23:44:27, 10.43it/s]

{'loss': Array(0.1489625, dtype=float32), 'loss_cross_entropy': Array(0.14024203, dtype=float32)}


 11%|█         | 108209/1000000 [4:57:28<23:05:45, 10.73it/s]

{'loss': Array(0.1467462, dtype=float32), 'loss_cross_entropy': Array(0.13841265, dtype=float32)}


 11%|█         | 108219/1000000 [4:57:30<26:11:00,  9.46it/s]

{'loss': Array(0.12577683, dtype=float32), 'loss_cross_entropy': Array(0.11849089, dtype=float32)}


 11%|█         | 108229/1000000 [4:57:31<23:35:27, 10.50it/s]

{'loss': Array(0.14653996, dtype=float32), 'loss_cross_entropy': Array(0.13913706, dtype=float32)}


 11%|█         | 108239/1000000 [4:57:33<29:00:44,  8.54it/s]

{'loss': Array(0.15955909, dtype=float32), 'loss_cross_entropy': Array(0.15093133, dtype=float32)}


 11%|█         | 108249/1000000 [4:57:34<24:18:09, 10.19it/s]

{'loss': Array(0.14441383, dtype=float32), 'loss_cross_entropy': Array(0.13673112, dtype=float32)}


 11%|█         | 108259/1000000 [4:57:36<36:45:27,  6.74it/s]

{'loss': Array(0.11117826, dtype=float32), 'loss_cross_entropy': Array(0.10473173, dtype=float32)}


 11%|█         | 108269/1000000 [4:57:38<25:01:59,  9.90it/s]

{'loss': Array(0.12750714, dtype=float32), 'loss_cross_entropy': Array(0.12050065, dtype=float32)}


 11%|█         | 108279/1000000 [4:57:39<35:56:09,  6.89it/s]

{'loss': Array(0.1133595, dtype=float32), 'loss_cross_entropy': Array(0.10708608, dtype=float32)}


 11%|█         | 108289/1000000 [4:57:41<25:17:52,  9.79it/s]

{'loss': Array(0.14410894, dtype=float32), 'loss_cross_entropy': Array(0.13580666, dtype=float32)}


 11%|█         | 108299/1000000 [4:57:42<23:39:34, 10.47it/s]

{'loss': Array(0.1550054, dtype=float32), 'loss_cross_entropy': Array(0.14644714, dtype=float32)}


 11%|█         | 108309/1000000 [4:57:44<32:43:47,  7.57it/s]

{'loss': Array(0.14097863, dtype=float32), 'loss_cross_entropy': Array(0.13313128, dtype=float32)}


 11%|█         | 108319/1000000 [4:57:45<24:28:34, 10.12it/s]

{'loss': Array(0.14156893, dtype=float32), 'loss_cross_entropy': Array(0.13390176, dtype=float32)}


 11%|█         | 108329/1000000 [4:57:47<37:10:55,  6.66it/s]

{'loss': Array(0.14269836, dtype=float32), 'loss_cross_entropy': Array(0.13480373, dtype=float32)}


 11%|█         | 108339/1000000 [4:57:48<25:14:36,  9.81it/s]

{'loss': Array(0.13493629, dtype=float32), 'loss_cross_entropy': Array(0.12768565, dtype=float32)}


 11%|█         | 108349/1000000 [4:57:50<23:25:03, 10.58it/s]

{'loss': Array(0.13730593, dtype=float32), 'loss_cross_entropy': Array(0.12959073, dtype=float32)}


 11%|█         | 108359/1000000 [4:57:51<26:25:48,  9.37it/s]

{'loss': Array(0.1426009, dtype=float32), 'loss_cross_entropy': Array(0.1345181, dtype=float32)}


 11%|█         | 108369/1000000 [4:57:53<23:25:11, 10.58it/s]

{'loss': Array(0.13998704, dtype=float32), 'loss_cross_entropy': Array(0.13260077, dtype=float32)}


 11%|█         | 108379/1000000 [4:57:54<26:16:55,  9.42it/s]

{'loss': Array(0.13456373, dtype=float32), 'loss_cross_entropy': Array(0.12706946, dtype=float32)}


 11%|█         | 108389/1000000 [4:57:56<23:46:38, 10.42it/s]

{'loss': Array(0.14281437, dtype=float32), 'loss_cross_entropy': Array(0.13491467, dtype=float32)}


 11%|█         | 108399/1000000 [4:57:57<36:40:00,  6.75it/s]

{'loss': Array(0.15737164, dtype=float32), 'loss_cross_entropy': Array(0.14833033, dtype=float32)}


 11%|█         | 108409/1000000 [4:57:59<25:00:02,  9.91it/s]

{'loss': Array(0.12981832, dtype=float32), 'loss_cross_entropy': Array(0.1219574, dtype=float32)}


 11%|█         | 108419/1000000 [4:58:00<24:11:48, 10.24it/s]

{'loss': Array(0.13056865, dtype=float32), 'loss_cross_entropy': Array(0.12247627, dtype=float32)}


 11%|█         | 108429/1000000 [4:58:02<30:08:59,  8.21it/s]

{'loss': Array(0.14393681, dtype=float32), 'loss_cross_entropy': Array(0.13613704, dtype=float32)}


 11%|█         | 108439/1000000 [4:58:03<24:35:51, 10.07it/s]

{'loss': Array(0.14427815, dtype=float32), 'loss_cross_entropy': Array(0.1363841, dtype=float32)}


 11%|█         | 108449/1000000 [4:58:05<37:39:43,  6.58it/s]

{'loss': Array(0.14233907, dtype=float32), 'loss_cross_entropy': Array(0.13416749, dtype=float32)}


 11%|█         | 108459/1000000 [4:58:07<26:09:01,  9.47it/s]

{'loss': Array(0.1479957, dtype=float32), 'loss_cross_entropy': Array(0.13997827, dtype=float32)}


 11%|█         | 108469/1000000 [4:58:08<24:09:25, 10.25it/s]

{'loss': Array(0.145434, dtype=float32), 'loss_cross_entropy': Array(0.13758935, dtype=float32)}


 11%|█         | 108479/1000000 [4:58:10<28:34:58,  8.66it/s]

{'loss': Array(0.13885035, dtype=float32), 'loss_cross_entropy': Array(0.13115275, dtype=float32)}


 11%|█         | 108489/1000000 [4:58:11<24:57:53,  9.92it/s]

{'loss': Array(0.13619828, dtype=float32), 'loss_cross_entropy': Array(0.12835307, dtype=float32)}


 11%|█         | 108499/1000000 [4:58:13<33:38:50,  7.36it/s]

{'loss': Array(0.14853998, dtype=float32), 'loss_cross_entropy': Array(0.14054738, dtype=float32)}


 11%|█         | 108509/1000000 [4:58:21<83:39:04,  2.96it/s] 

{'loss': Array(0.13578537, dtype=float32), 'loss_cross_entropy': Array(0.12859118, dtype=float32)}


 11%|█         | 108519/1000000 [4:58:22<42:11:31,  5.87it/s]

{'loss': Array(0.14612074, dtype=float32), 'loss_cross_entropy': Array(0.138007, dtype=float32)}


 11%|█         | 108529/1000000 [4:58:24<25:40:35,  9.64it/s]

{'loss': Array(0.13746186, dtype=float32), 'loss_cross_entropy': Array(0.12895225, dtype=float32)}


 11%|█         | 108539/1000000 [4:58:25<23:59:30, 10.32it/s]

{'loss': Array(0.14038102, dtype=float32), 'loss_cross_entropy': Array(0.13249426, dtype=float32)}


 11%|█         | 108549/1000000 [4:58:27<29:45:20,  8.32it/s]

{'loss': Array(0.1429624, dtype=float32), 'loss_cross_entropy': Array(0.13485512, dtype=float32)}


 11%|█         | 108559/1000000 [4:58:28<24:04:40, 10.28it/s]

{'loss': Array(0.1498051, dtype=float32), 'loss_cross_entropy': Array(0.14164871, dtype=float32)}


 11%|█         | 108569/1000000 [4:58:30<23:34:22, 10.50it/s]

{'loss': Array(0.13755228, dtype=float32), 'loss_cross_entropy': Array(0.1302578, dtype=float32)}


 11%|█         | 108579/1000000 [4:58:31<26:21:50,  9.39it/s]

{'loss': Array(0.14537321, dtype=float32), 'loss_cross_entropy': Array(0.13678579, dtype=float32)}


 11%|█         | 108589/1000000 [4:58:33<23:16:40, 10.64it/s]

{'loss': Array(0.14253068, dtype=float32), 'loss_cross_entropy': Array(0.13380034, dtype=float32)}


 11%|█         | 108599/1000000 [4:58:34<29:23:18,  8.43it/s]

{'loss': Array(0.13166665, dtype=float32), 'loss_cross_entropy': Array(0.12428931, dtype=float32)}


 11%|█         | 108609/1000000 [4:58:36<23:51:51, 10.38it/s]

{'loss': Array(0.15163386, dtype=float32), 'loss_cross_entropy': Array(0.14286628, dtype=float32)}


 11%|█         | 108619/1000000 [4:58:37<32:12:55,  7.69it/s]

{'loss': Array(0.15096484, dtype=float32), 'loss_cross_entropy': Array(0.14281207, dtype=float32)}


 11%|█         | 108629/1000000 [4:58:39<24:39:11, 10.04it/s]

{'loss': Array(0.14614621, dtype=float32), 'loss_cross_entropy': Array(0.13773645, dtype=float32)}


 11%|█         | 108639/1000000 [4:58:40<23:12:31, 10.67it/s]

{'loss': Array(0.13763084, dtype=float32), 'loss_cross_entropy': Array(0.1303677, dtype=float32)}


 11%|█         | 108649/1000000 [4:58:42<29:21:38,  8.43it/s]

{'loss': Array(0.12833674, dtype=float32), 'loss_cross_entropy': Array(0.12108977, dtype=float32)}


 11%|█         | 108659/1000000 [4:58:43<24:11:25, 10.24it/s]

{'loss': Array(0.12760271, dtype=float32), 'loss_cross_entropy': Array(0.1197824, dtype=float32)}


 11%|█         | 108669/1000000 [4:58:45<23:54:51, 10.35it/s]

{'loss': Array(0.1414712, dtype=float32), 'loss_cross_entropy': Array(0.13373767, dtype=float32)}


 11%|█         | 108679/1000000 [4:58:46<26:18:40,  9.41it/s]

{'loss': Array(0.1508045, dtype=float32), 'loss_cross_entropy': Array(0.1424288, dtype=float32)}


 11%|█         | 108689/1000000 [4:58:48<23:12:17, 10.67it/s]

{'loss': Array(0.13524638, dtype=float32), 'loss_cross_entropy': Array(0.12815894, dtype=float32)}


 11%|█         | 108699/1000000 [4:58:49<29:10:09,  8.49it/s]

{'loss': Array(0.13958752, dtype=float32), 'loss_cross_entropy': Array(0.13205272, dtype=float32)}


 11%|█         | 108709/1000000 [4:58:51<24:03:59, 10.29it/s]

{'loss': Array(0.15141185, dtype=float32), 'loss_cross_entropy': Array(0.14315255, dtype=float32)}


 11%|█         | 108719/1000000 [4:58:52<36:50:52,  6.72it/s]

{'loss': Array(0.1469931, dtype=float32), 'loss_cross_entropy': Array(0.13834706, dtype=float32)}


 11%|█         | 108729/1000000 [4:58:54<24:55:55,  9.93it/s]

{'loss': Array(0.14319333, dtype=float32), 'loss_cross_entropy': Array(0.1352872, dtype=float32)}


 11%|█         | 108739/1000000 [4:58:55<23:09:40, 10.69it/s]

{'loss': Array(0.1402223, dtype=float32), 'loss_cross_entropy': Array(0.13242093, dtype=float32)}


 11%|█         | 108749/1000000 [4:58:57<32:36:02,  7.59it/s]

{'loss': Array(0.13888611, dtype=float32), 'loss_cross_entropy': Array(0.1318205, dtype=float32)}


 11%|█         | 108759/1000000 [4:58:58<24:18:52, 10.18it/s]

{'loss': Array(0.13041425, dtype=float32), 'loss_cross_entropy': Array(0.12325015, dtype=float32)}


 11%|█         | 108769/1000000 [4:59:00<24:01:48, 10.30it/s]

{'loss': Array(0.12591577, dtype=float32), 'loss_cross_entropy': Array(0.11800563, dtype=float32)}


 11%|█         | 108779/1000000 [4:59:01<29:12:40,  8.47it/s]

{'loss': Array(0.15244572, dtype=float32), 'loss_cross_entropy': Array(0.14421113, dtype=float32)}


 11%|█         | 108789/1000000 [4:59:03<24:45:45, 10.00it/s]

{'loss': Array(0.13397482, dtype=float32), 'loss_cross_entropy': Array(0.1263965, dtype=float32)}


 11%|█         | 108799/1000000 [4:59:05<30:44:49,  8.05it/s]

{'loss': Array(0.14982398, dtype=float32), 'loss_cross_entropy': Array(0.14175941, dtype=float32)}


 11%|█         | 108809/1000000 [4:59:06<25:17:35,  9.79it/s]

{'loss': Array(0.13401592, dtype=float32), 'loss_cross_entropy': Array(0.12631199, dtype=float32)}


 11%|█         | 108819/1000000 [4:59:07<24:26:53, 10.13it/s]

{'loss': Array(0.14406325, dtype=float32), 'loss_cross_entropy': Array(0.13608223, dtype=float32)}


 11%|█         | 108829/1000000 [4:59:09<27:30:56,  9.00it/s]

{'loss': Array(0.15248396, dtype=float32), 'loss_cross_entropy': Array(0.14428043, dtype=float32)}


 11%|█         | 108839/1000000 [4:59:11<24:41:20, 10.03it/s]

{'loss': Array(0.13334674, dtype=float32), 'loss_cross_entropy': Array(0.12585957, dtype=float32)}


 11%|█         | 108849/1000000 [4:59:12<32:27:52,  7.62it/s]

{'loss': Array(0.13424511, dtype=float32), 'loss_cross_entropy': Array(0.12658778, dtype=float32)}


 11%|█         | 108859/1000000 [4:59:14<24:28:56, 10.11it/s]

{'loss': Array(0.16272119, dtype=float32), 'loss_cross_entropy': Array(0.1537823, dtype=float32)}


 11%|█         | 108869/1000000 [4:59:15<23:30:24, 10.53it/s]

{'loss': Array(0.14480357, dtype=float32), 'loss_cross_entropy': Array(0.13637921, dtype=float32)}


 11%|█         | 108879/1000000 [4:59:17<26:26:08,  9.36it/s]

{'loss': Array(0.15088673, dtype=float32), 'loss_cross_entropy': Array(0.14220224, dtype=float32)}


 11%|█         | 108889/1000000 [4:59:18<23:53:34, 10.36it/s]

{'loss': Array(0.13678156, dtype=float32), 'loss_cross_entropy': Array(0.12894247, dtype=float32)}


 11%|█         | 108899/1000000 [4:59:20<28:05:17,  8.81it/s]

{'loss': Array(0.13641374, dtype=float32), 'loss_cross_entropy': Array(0.12854557, dtype=float32)}


 11%|█         | 108909/1000000 [4:59:21<23:52:48, 10.37it/s]

{'loss': Array(0.1429833, dtype=float32), 'loss_cross_entropy': Array(0.13508368, dtype=float32)}


 11%|█         | 108919/1000000 [4:59:23<36:28:02,  6.79it/s]

{'loss': Array(0.13406882, dtype=float32), 'loss_cross_entropy': Array(0.12612112, dtype=float32)}


 11%|█         | 108929/1000000 [4:59:24<25:19:58,  9.77it/s]

{'loss': Array(0.15524738, dtype=float32), 'loss_cross_entropy': Array(0.14666162, dtype=float32)}


 11%|█         | 108939/1000000 [4:59:26<23:18:04, 10.62it/s]

{'loss': Array(0.17734599, dtype=float32), 'loss_cross_entropy': Array(0.16838188, dtype=float32)}


 11%|█         | 108949/1000000 [4:59:27<29:45:49,  8.32it/s]

{'loss': Array(0.14914134, dtype=float32), 'loss_cross_entropy': Array(0.140889, dtype=float32)}


 11%|█         | 108959/1000000 [4:59:29<23:51:19, 10.38it/s]

{'loss': Array(0.15294568, dtype=float32), 'loss_cross_entropy': Array(0.14464216, dtype=float32)}


 11%|█         | 108969/1000000 [4:59:31<32:37:14,  7.59it/s]

{'loss': Array(0.14581598, dtype=float32), 'loss_cross_entropy': Array(0.13839616, dtype=float32)}


 11%|█         | 108979/1000000 [4:59:32<24:19:46, 10.17it/s]

{'loss': Array(0.14029305, dtype=float32), 'loss_cross_entropy': Array(0.13256411, dtype=float32)}


 11%|█         | 108989/1000000 [4:59:34<32:09:31,  7.70it/s]

{'loss': Array(0.1422035, dtype=float32), 'loss_cross_entropy': Array(0.13478139, dtype=float32)}


 11%|█         | 108999/1000000 [4:59:35<24:43:05, 10.01it/s]

{'loss': Array(0.14823912, dtype=float32), 'loss_cross_entropy': Array(0.13963464, dtype=float32)}


 11%|█         | 109009/1000000 [4:59:43<79:49:28,  3.10it/s] 

{'loss': Array(0.13263878, dtype=float32), 'loss_cross_entropy': Array(0.12471782, dtype=float32)}


 11%|█         | 109019/1000000 [4:59:44<37:10:27,  6.66it/s]

{'loss': Array(0.15299831, dtype=float32), 'loss_cross_entropy': Array(0.144509, dtype=float32)}


 11%|█         | 109029/1000000 [4:59:46<25:31:20,  9.70it/s]

{'loss': Array(0.13817412, dtype=float32), 'loss_cross_entropy': Array(0.1308754, dtype=float32)}


 11%|█         | 109039/1000000 [4:59:47<32:26:49,  7.63it/s]

{'loss': Array(0.13303268, dtype=float32), 'loss_cross_entropy': Array(0.12559922, dtype=float32)}


 11%|█         | 109049/1000000 [4:59:49<24:04:27, 10.28it/s]

{'loss': Array(0.13454221, dtype=float32), 'loss_cross_entropy': Array(0.12729463, dtype=float32)}


 11%|█         | 109059/1000000 [4:59:50<36:46:27,  6.73it/s]

{'loss': Array(0.13163239, dtype=float32), 'loss_cross_entropy': Array(0.12438522, dtype=float32)}


 11%|█         | 109069/1000000 [4:59:52<25:26:47,  9.73it/s]

{'loss': Array(0.13354252, dtype=float32), 'loss_cross_entropy': Array(0.12543046, dtype=float32)}


 11%|█         | 109079/1000000 [4:59:53<23:34:12, 10.50it/s]

{'loss': Array(0.14940558, dtype=float32), 'loss_cross_entropy': Array(0.14088586, dtype=float32)}


 11%|█         | 109089/1000000 [4:59:55<29:09:38,  8.49it/s]

{'loss': Array(0.13566963, dtype=float32), 'loss_cross_entropy': Array(0.12842685, dtype=float32)}


 11%|█         | 109099/1000000 [4:59:56<24:46:01,  9.99it/s]

{'loss': Array(0.13675883, dtype=float32), 'loss_cross_entropy': Array(0.12919202, dtype=float32)}


 11%|█         | 109109/1000000 [4:59:58<32:20:53,  7.65it/s]

{'loss': Array(0.1534696, dtype=float32), 'loss_cross_entropy': Array(0.14492399, dtype=float32)}


 11%|█         | 109119/1000000 [4:59:59<25:28:58,  9.71it/s]

{'loss': Array(0.12692282, dtype=float32), 'loss_cross_entropy': Array(0.1192348, dtype=float32)}


 11%|█         | 109129/1000000 [5:00:01<23:24:57, 10.57it/s]

{'loss': Array(0.1456949, dtype=float32), 'loss_cross_entropy': Array(0.13733393, dtype=float32)}


 11%|█         | 109139/1000000 [5:00:02<25:57:15,  9.53it/s]

{'loss': Array(0.14272156, dtype=float32), 'loss_cross_entropy': Array(0.13477501, dtype=float32)}


 11%|█         | 109149/1000000 [5:00:04<23:28:42, 10.54it/s]

{'loss': Array(0.13447782, dtype=float32), 'loss_cross_entropy': Array(0.12673862, dtype=float32)}


 11%|█         | 109159/1000000 [5:00:06<36:08:17,  6.85it/s]

{'loss': Array(0.14913529, dtype=float32), 'loss_cross_entropy': Array(0.14045274, dtype=float32)}


 11%|█         | 109169/1000000 [5:00:07<25:08:40,  9.84it/s]

{'loss': Array(0.13382617, dtype=float32), 'loss_cross_entropy': Array(0.12627655, dtype=float32)}


 11%|█         | 109179/1000000 [5:00:08<23:52:31, 10.36it/s]

{'loss': Array(0.13632797, dtype=float32), 'loss_cross_entropy': Array(0.12873356, dtype=float32)}


 11%|█         | 109189/1000000 [5:00:10<30:06:21,  8.22it/s]

{'loss': Array(0.14207998, dtype=float32), 'loss_cross_entropy': Array(0.13462538, dtype=float32)}


 11%|█         | 109199/1000000 [5:00:11<25:53:04,  9.56it/s]

{'loss': Array(0.1401824, dtype=float32), 'loss_cross_entropy': Array(0.13257085, dtype=float32)}


 11%|█         | 109209/1000000 [5:00:13<34:23:16,  7.20it/s]

{'loss': Array(0.1589217, dtype=float32), 'loss_cross_entropy': Array(0.15101373, dtype=float32)}


 11%|█         | 109219/1000000 [5:00:15<25:34:11,  9.68it/s]

{'loss': Array(0.13719395, dtype=float32), 'loss_cross_entropy': Array(0.12894616, dtype=float32)}


 11%|█         | 109229/1000000 [5:00:17<34:01:20,  7.27it/s]

{'loss': Array(0.13845621, dtype=float32), 'loss_cross_entropy': Array(0.13076071, dtype=float32)}


 11%|█         | 109239/1000000 [5:00:18<25:59:02,  9.52it/s]

{'loss': Array(0.1450453, dtype=float32), 'loss_cross_entropy': Array(0.13737845, dtype=float32)}


 11%|█         | 109249/1000000 [5:00:19<25:14:18,  9.80it/s]

{'loss': Array(0.12923019, dtype=float32), 'loss_cross_entropy': Array(0.12203275, dtype=float32)}


 11%|█         | 109259/1000000 [5:00:21<27:47:30,  8.90it/s]

{'loss': Array(0.16674836, dtype=float32), 'loss_cross_entropy': Array(0.15806878, dtype=float32)}


 11%|█         | 109269/1000000 [5:00:23<23:55:44, 10.34it/s]

{'loss': Array(0.12423984, dtype=float32), 'loss_cross_entropy': Array(0.1165685, dtype=float32)}


 11%|█         | 109279/1000000 [5:00:24<29:44:12,  8.32it/s]

{'loss': Array(0.13140239, dtype=float32), 'loss_cross_entropy': Array(0.12405463, dtype=float32)}


 11%|█         | 109289/1000000 [5:00:26<24:15:48, 10.20it/s]

{'loss': Array(0.15196799, dtype=float32), 'loss_cross_entropy': Array(0.14351015, dtype=float32)}


 11%|█         | 109299/1000000 [5:00:27<36:12:16,  6.83it/s]

{'loss': Array(0.12914653, dtype=float32), 'loss_cross_entropy': Array(0.12086672, dtype=float32)}


 11%|█         | 109309/1000000 [5:00:29<25:23:27,  9.74it/s]

{'loss': Array(0.12332135, dtype=float32), 'loss_cross_entropy': Array(0.11627012, dtype=float32)}


 11%|█         | 109319/1000000 [5:00:30<36:40:23,  6.75it/s]

{'loss': Array(0.15710963, dtype=float32), 'loss_cross_entropy': Array(0.14846112, dtype=float32)}


 11%|█         | 109329/1000000 [5:00:32<25:06:23,  9.85it/s]

{'loss': Array(0.13104334, dtype=float32), 'loss_cross_entropy': Array(0.12377421, dtype=float32)}


 11%|█         | 109339/1000000 [5:00:33<23:51:04, 10.37it/s]

{'loss': Array(0.13390192, dtype=float32), 'loss_cross_entropy': Array(0.1266825, dtype=float32)}


 11%|█         | 109349/1000000 [5:00:35<32:47:26,  7.54it/s]

{'loss': Array(0.14008527, dtype=float32), 'loss_cross_entropy': Array(0.13218229, dtype=float32)}


 11%|█         | 109359/1000000 [5:00:36<24:43:32, 10.01it/s]

{'loss': Array(0.1391684, dtype=float32), 'loss_cross_entropy': Array(0.13190877, dtype=float32)}


 11%|█         | 109369/1000000 [5:00:38<36:23:46,  6.80it/s]

{'loss': Array(0.14767776, dtype=float32), 'loss_cross_entropy': Array(0.13950221, dtype=float32)}


 11%|█         | 109379/1000000 [5:00:39<25:38:10,  9.65it/s]

{'loss': Array(0.14588419, dtype=float32), 'loss_cross_entropy': Array(0.13743366, dtype=float32)}


 11%|█         | 109389/1000000 [5:00:41<23:00:49, 10.75it/s]

{'loss': Array(0.13772525, dtype=float32), 'loss_cross_entropy': Array(0.12976077, dtype=float32)}


 11%|█         | 109399/1000000 [5:00:42<26:08:58,  9.46it/s]

{'loss': Array(0.13145265, dtype=float32), 'loss_cross_entropy': Array(0.12388581, dtype=float32)}


 11%|█         | 109409/1000000 [5:00:44<23:08:24, 10.69it/s]

{'loss': Array(0.14809744, dtype=float32), 'loss_cross_entropy': Array(0.13970235, dtype=float32)}


 11%|█         | 109419/1000000 [5:00:45<25:55:03,  9.54it/s]

{'loss': Array(0.1374838, dtype=float32), 'loss_cross_entropy': Array(0.12921011, dtype=float32)}


 11%|█         | 109429/1000000 [5:00:47<23:23:11, 10.58it/s]

{'loss': Array(0.11953074, dtype=float32), 'loss_cross_entropy': Array(0.11298225, dtype=float32)}


 11%|█         | 109439/1000000 [5:00:48<36:07:22,  6.85it/s]

{'loss': Array(0.14638545, dtype=float32), 'loss_cross_entropy': Array(0.13834964, dtype=float32)}


 11%|█         | 109449/1000000 [5:00:50<25:04:32,  9.87it/s]

{'loss': Array(0.1454592, dtype=float32), 'loss_cross_entropy': Array(0.13796492, dtype=float32)}


 11%|█         | 109459/1000000 [5:00:51<23:20:06, 10.60it/s]

{'loss': Array(0.13780366, dtype=float32), 'loss_cross_entropy': Array(0.13072915, dtype=float32)}


 11%|█         | 109469/1000000 [5:00:53<29:15:07,  8.46it/s]

{'loss': Array(0.14791387, dtype=float32), 'loss_cross_entropy': Array(0.13993256, dtype=float32)}


 11%|█         | 109479/1000000 [5:00:54<24:25:05, 10.13it/s]

{'loss': Array(0.13549908, dtype=float32), 'loss_cross_entropy': Array(0.12745573, dtype=float32)}


 11%|█         | 109489/1000000 [5:00:56<36:57:57,  6.69it/s]

{'loss': Array(0.15610091, dtype=float32), 'loss_cross_entropy': Array(0.14697862, dtype=float32)}


 11%|█         | 109499/1000000 [5:00:57<26:08:42,  9.46it/s]

{'loss': Array(0.13807325, dtype=float32), 'loss_cross_entropy': Array(0.13046907, dtype=float32)}


 11%|█         | 109509/1000000 [5:01:05<83:41:40,  2.96it/s] 

{'loss': Array(0.1558013, dtype=float32), 'loss_cross_entropy': Array(0.1471539, dtype=float32)}


 11%|█         | 109519/1000000 [5:01:07<39:37:15,  6.24it/s]

{'loss': Array(0.12277631, dtype=float32), 'loss_cross_entropy': Array(0.11586676, dtype=float32)}


 11%|█         | 109529/1000000 [5:01:09<26:50:04,  9.22it/s]

{'loss': Array(0.12664352, dtype=float32), 'loss_cross_entropy': Array(0.1196798, dtype=float32)}


 11%|█         | 109539/1000000 [5:01:11<31:03:13,  7.97it/s]

{'loss': Array(0.13331732, dtype=float32), 'loss_cross_entropy': Array(0.12600248, dtype=float32)}


 11%|█         | 109549/1000000 [5:01:12<25:23:38,  9.74it/s]

{'loss': Array(0.13018058, dtype=float32), 'loss_cross_entropy': Array(0.12280568, dtype=float32)}


 11%|█         | 109559/1000000 [5:01:14<38:10:09,  6.48it/s]

{'loss': Array(0.14162515, dtype=float32), 'loss_cross_entropy': Array(0.13380678, dtype=float32)}


In [23]:

def sampling_model(key, model, sample_eval, nb_batch_explore=128, nb_step=100):
    """
    Function used to sampling a state from a list 
    """
    seq_len_future = sample_eval["state_future"].shape[1]
    noise_future  = jax.random.dirichlet(key, jnp.ones(6), (nb_batch_explore, seq_len_future, 54))
    sample_eval["reward"] = jnp.linspace(start=-0.5, stop=0.5, num=nb_batch_explore)[:, None]

    for t_step in range(nb_step):
        t_step_array = jnp.ones((nb_batch_explore, 1, 1, 1)) * float(t_step / nb_step)
        sample_eval["context"] = jnp.concatenate([sample_eval["reward"], t_step_array[:, :, 0, 0]], axis=1)

        estimation_logits_past, estimation_logits_future = model(
            sample_eval["state_past"], noise_future, sample_eval["context"]
        )


        estimation_proba_future = jax.nn.softmax(estimation_logits_future, axis=-1)

        noise_future = noise_future + float(1. / nb_step) * 1./ (1. - t_step_array + 0.0001) * (estimation_proba_future - noise_future)

        print(result.sum(axis=-1)[:10])

    return noise_future



In [48]:
key, subkey = jax.random.split(config.jax_key)
config.jax_key = key

sample = buffer.sample(buffer_list, subkey)
sample = reshape_diffusion_setup(sample, subkey)

In [49]:
key, subkey = jax.random.split(config.jax_key)
config.jax_key = key

sample = buffer.sample(buffer_list, subkey)
sample = reshape_diffusion_setup(sample, subkey)


result = sampling_model(key=config.jax_key, model=transformer, sample_eval=sample, nb_batch_explore=128, nb_step=100)
result

[[[1.         1.         1.         ... 0.9999999  1.         1.        ]
  [1.         1.0000001  1.         ... 1.         0.99999994 1.        ]
  [0.9999999  1.0000001  1.0000002  ... 0.99999994 0.9999999  0.99999994]
  ...
  [1.0000001  1.0000001  1.         ... 0.99999994 1.0000001  0.99999994]
  [1.         1.         1.0000001  ... 1.0000001  0.99999994 1.        ]
  [1.0000001  1.         1.         ... 0.99999994 1.         1.        ]]

 [[1.         1.         1.         ... 0.99999994 1.         1.0000001 ]
  [1.         1.         1.         ... 1.         0.9999999  1.0000001 ]
  [0.9999999  1.         0.99999994 ... 0.9999999  1.         0.99999994]
  ...
  [1.         0.99999994 1.         ... 0.9999998  1.         0.99999994]
  [0.99999994 1.         1.0000001  ... 1.         0.99999994 0.99999994]
  [0.99999994 1.         1.         ... 1.         0.99999994 1.        ]]

 [[1.         1.         1.0000001  ... 0.9999999  1.0000001  0.99999994]
  [1.         1.      

Array([[[[1.40354387e-05, 2.78135994e-05, 1.83428638e-05,
          1.19304750e-05, 9.99907136e-01, 2.07419507e-05],
         [5.35143045e-07, 9.99951959e-01, 2.02469528e-06,
          5.87257091e-06, 1.66611280e-05, 2.29480211e-05],
         [3.64724547e-05, 5.65210357e-05, 9.99861240e-01,
          1.56062742e-06, 2.86730938e-05, 1.55076850e-05],
         ...,
         [4.27688065e-07, 2.31107697e-05, 9.99951243e-01,
          9.32649709e-06, 8.20209971e-06, 7.77299283e-06],
         [1.63635705e-05, 9.99928594e-01, 1.97293703e-05,
          1.52112916e-05, 4.17535193e-06, 1.60298077e-05],
         [2.83019617e-05, 8.48784111e-06, 2.48276629e-05,
          1.59980264e-05, 3.40319239e-05, 9.99888361e-01]],

        [[6.94335205e-04, 3.26467765e-04, 2.68780859e-05,
          1.24215148e-05, 3.04284040e-05, 9.98909414e-01],
         [2.33147875e-06, 9.99922991e-01, 9.13714757e-06,
          1.21784396e-05, 3.90338246e-05, 1.43673969e-05],
         [3.58729763e-03, 9.76185441e-01, 1.8595

In [61]:
index_batch  = 63

jnp.argmax(sample["state_past"], axis=-1).reshape((128, 8, 6, 3, 3))[index_batch, -1, :, :, :]

Array([[[5, 2, 2],
        [0, 0, 0],
        [3, 1, 1]],

       [[4, 2, 4],
        [1, 1, 3],
        [0, 1, 2]],

       [[5, 2, 3],
        [2, 2, 4],
        [3, 1, 1]],

       [[0, 5, 2],
        [3, 3, 4],
        [0, 3, 0]],

       [[1, 3, 5],
        [5, 4, 5],
        [4, 4, 4]],

       [[1, 4, 5],
        [0, 5, 0],
        [3, 5, 2]]], dtype=int32)

In [56]:
jnp.argmax(result, axis=-1).reshape((128, 24, 6, 3, 3))[index_batch, 0, :, :, :]

Array([[[3, 4, 1],
        [0, 0, 0],
        [3, 1, 1]],

       [[4, 2, 4],
        [1, 1, 3],
        [0, 1, 2]],

       [[5, 2, 2],
        [2, 2, 5],
        [3, 1, 3]],

       [[0, 3, 0],
        [3, 3, 5],
        [0, 4, 2]],

       [[2, 3, 5],
        [2, 4, 5],
        [5, 4, 4]],

       [[1, 4, 5],
        [0, 5, 0],
        [1, 5, 4]]], dtype=int32)

In [55]:
jnp.argmax(result, axis=-1).reshape((128, 24, 6, 3, 3))[index_batch, 1, :, :, :]

Array([[[3, 4, 1],
        [0, 0, 0],
        [3, 1, 1]],

       [[4, 2, 4],
        [1, 1, 3],
        [0, 1, 2]],

       [[5, 2, 2],
        [2, 2, 5],
        [3, 1, 3]],

       [[0, 3, 0],
        [3, 3, 5],
        [0, 4, 2]],

       [[2, 3, 5],
        [2, 4, 5],
        [5, 4, 4]],

       [[1, 4, 5],
        [0, 5, 0],
        [1, 5, 4]]], dtype=int32)

In [None]:
sample = buffer_eval.sample(buffer_list_eval, subkey)
sample = reshape_sample(sample)

TrajectoryBufferSample(experience={'action': Array([[[1.32556781e-01, 7.96739519e-01, 5.36718592e-02, ...,
         3.91646661e-03, 4.48901858e-03, 9.91594553e-01],
        [3.49070907e-01, 4.57749265e-04, 4.38157976e-01, ...,
         7.23136306e-01, 1.23497941e-01, 1.53365776e-01],
        [6.12441264e-03, 2.50436477e-02, 1.35732419e-03, ...,
         3.82237613e-01, 5.98694921e-01, 1.90675538e-02],
        ...,
        [1.41329234e-04, 2.44877161e-03, 8.43136787e-01, ...,
         2.33344346e-01, 6.42170012e-01, 1.24485560e-01],
        [6.32655225e-04, 1.77795421e-02, 9.65278149e-01, ...,
         1.25269741e-02, 3.21629345e-01, 6.65843725e-01],
        [9.08881542e-04, 1.04175135e-01, 7.50824576e-04, ...,
         9.99683421e-03, 7.89827347e-01, 2.00175866e-01]],

       [[2.03237548e-01, 7.00179100e-01, 3.63819454e-05, ...,
         9.96583939e-01, 2.39940570e-03, 1.01662707e-03],
        [7.63220847e-01, 1.11325733e-01, 3.15520242e-02, ...,
         5.45369804e-01, 4.54322606e-0

AttributeError: 'RubikTransformer' object has no attribute 'state_mapping'

In [14]:
# save buffer, buffer_list
# in pickle
import pickle

state_weight = nnx.state(transformer)

In [15]:
state_weight

State({
  'action_mapping': {
    'bias': VariableState(
      type=Param,
      value=Array([ 4.04955857e-02,  2.82790326e-02, -7.85927773e-02,  7.52160996e-02,
             -6.11112965e-03,  6.96982583e-03, -1.17343664e-02, -1.74523471e-03,
              1.28632234e-02, -7.83682019e-02,  2.75444221e-02, -4.00350802e-02,
              1.79233290e-02,  8.38570073e-02,  2.03401130e-02,  4.92124483e-02,
              8.69528428e-02,  2.20998153e-02, -3.42875794e-02, -6.76687211e-02,
              2.17811018e-02,  8.36544111e-02, -3.08539756e-02, -8.56901798e-03,
             -6.66830465e-02,  1.15918748e-01,  5.94779989e-03,  1.72799546e-02,
             -1.16014622e-01, -6.75882176e-02, -6.16184436e-02, -5.52975051e-02,
              4.17982265e-02, -5.43787293e-02,  1.19193546e-01, -9.40112211e-03,
             -4.03175130e-02, -3.47817354e-02, -6.77642366e-03,  8.85512680e-02,
              4.83243428e-02,  6.59283325e-02, -5.58541063e-03,  3.46533172e-02,
             -6.63223118e-02