In [1]:
from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir("drive/MyDrive/Colab Notebooks/VQ-Dynamics")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!ls

autoregressive_patch_transformer_checkpoints  images_jax
autoregressive_prediction.mp4		      __pycache__
Dynamics.ipynb				      vqvae_checkpoints
encode.py				      vqvae.mp4
failed_mdtransformer_checkpoints	      vqvae.py
failed_transformer_checkpoints


In [None]:
!nvidia-smi

Tue Jul 22 09:56:28 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA L4                      Off |   00000000:00:03.0 Off |                    0 |
| N/A   57C    P0             21W /   72W |       0MiB /  23034MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
import jax
import jax.numpy as jnp
import flax.linen as nn
from flax.training import train_state, checkpoints
import optax
import numpy as np
import os
from functools import partial
from tqdm import tqdm
import matplotlib.pyplot as plt
import matplotlib.animation as animation

from vqvae import (
    generate_image as generate_image_numpy,
    VQVAETrainState,
    create_dataset,
    generate_image,
    eval_step,
    train_step as vq_train_step,
    VQVAE,
)

In [None]:
vq_latent_dim = 32
vq_num_embeddings = 512
vq_commitment_cost = 0.25
npix = 128
train_vq = False

## Vector-Quantised Variational Auto-Encoder Training

In [None]:
def vq_training(
    batch_size=32,
    num_epochs=10,
    lr=1e-3,
    latent_dim=32,
    num_embeddings=512,
    commitment_cost=0.25,
    npix=128,
    vq_directory='./vqvae_checkpoints',
):
    # --- Define an absolute directory for checkpoints ---
    ckpt_dir = os.path.abspath(vq_directory)
    os.makedirs(ckpt_dir, exist_ok=True)

    # --- Initialization ---
    key = jax.random.PRNGKey(42)
    key, model_key, data_key = jax.random.split(key, 3)

    model = VQVAE(
        latent_dim=latent_dim,
        num_embeddings=num_embeddings,
        commitment_cost=commitment_cost
    )

    # Initialize model parameters
    dummy_input = jnp.ones((1, npix, npix, 1))
    params = model.init(model_key, dummy_input)['params']

    # Initialize optimizer and state
    optimizer = optax.adam(learning_rate=lr)
    state = VQVAETrainState.create(apply_fn=model.apply, params=params, tx=optimizer)

    # --- Restore checkpoint if one exists ---
    # This will load the latest checkpoint and update the state object in place.
    # If no checkpoint exists, it returns the original state object.
    state = checkpoints.restore_checkpoint(ckpt_dir=ckpt_dir, target=state)
    print(f"Resuming training from step: {int(state.step)}")

    # --- Training Loop ---
    print("Starting VQ-VAE training...")
    for epoch in range(num_epochs):
        # Create a new data generator for each epoch
        train_loader = create_dataset(npix, batch_size, size=10000)

        # Use tqdm for a progress bar
        pbar = tqdm(train_loader, desc=f"Epoch {epoch + 1}/{num_epochs}", total=10000 // batch_size)

        epoch_metrics = { 'loss': [], 'recon_loss': [], 'vq_loss': [], 'perplexity': [] }

        for batch in pbar:
            key, train_key = jax.random.split(key)
            state, metrics = vq_train_step(state, batch, train_key, model)
            for k, v in metrics.items():
                epoch_metrics[k].append(v)

            pbar.set_postfix(
                loss=f"{metrics['loss']:.4f}",
                recon_loss=f"{metrics['recon_loss']:.4f}",
                perp=f"{metrics['perplexity']:.2f}"
            )

        # --- Save checkpoint at the end of the epoch ---
        checkpoints.save_checkpoint(
            ckpt_dir=ckpt_dir,
            target=state,
            step=state.step,
            overwrite=False, # Set to True to save only the latest checkpoint
            keep=3          # Keep the 3 most recent checkpoints
        )

        # Print average metrics for the epoch
        avg_loss = np.mean([float(x) for x in epoch_metrics['loss']])
        avg_recon_loss = np.mean([float(x) for x in epoch_metrics['recon_loss']])
        avg_vq_loss = np.mean([float(x) for x in epoch_metrics['vq_loss']])
        avg_perp = np.mean([float(x) for x in epoch_metrics['perplexity']])

        print(f"Epoch {epoch + 1} Avg: Loss={avg_loss:.4f}, Recon Loss={avg_recon_loss:.4f}, VQ Loss={avg_vq_loss:.4f}, Perplexity={avg_perp:.4f}")


def vq_reconstruction_animation(state, model):
    # --- Animation Generation ---
    print("\n🎥 Generating animation...")
    os.makedirs('images_jax', exist_ok=True)

    num_points = 200
    num_frames = 100
    sigma = 0.01
    x_coords = np.linspace(0, 1, npix, endpoint=False) + 1 / (2 * npix)
    xx, yy = np.meshgrid(x_coords, x_coords)

    points = np.random.uniform(0, 1, (num_points, 2))
    velocities = np.random.uniform(-0.005, 0.005, (num_points, 2))

    orig_imgs = []
    recon_imgs = []

    for i in tqdm(range(num_frames), desc="Generating frames"):
        orig_img = generate_image(points, xx, yy, sigma, npix)
        orig_imgs.append(orig_img)
        plt.imsave(f'images_jax/orig_frame_{i:04d}.png', orig_img, cmap='gray')

        # Prepare input tensor: (1, H, W, 1)
        input_tensor = jnp.array(orig_img)[jnp.newaxis, ..., jnp.newaxis]
        recon = eval_step(state, input_tensor, model)

        # Convert JAX array back to NumPy for plotting
        recon_img = np.array(recon.squeeze())
        recon_imgs.append(recon_img)
        plt.imsave(f'images_jax/recon_frame_{i:04d}.png', recon_img, cmap='gray')

        # Update points with bounce
        points += velocities
        out_x = (points[:, 0] < 0) | (points[:, 0] > 1)
        velocities[out_x, 0] *= -1
        out_y = (points[:, 1] < 0) | (points[:, 1] > 1)
        velocities[out_y, 1] *= -1


    # Create and save side-by-side animation
    fig, (ax_orig, ax_recon, ax_both) = plt.subplots(1, 3, figsize=(15, 5))
    ax_orig.set_title('Original')
    ax_orig.axis('off')
    ax_recon.set_title('Reconstructed')
    ax_recon.axis('off')
    ax_both.set_title('Original (R) Reconstructed (G, B)')
    ax_both.axis('off')
    plt.tight_layout()

    def col_im(i):
        return np.stack([orig_imgs[i], recon_imgs[i], recon_imgs[i]], axis=-1)

    im_orig = ax_orig.imshow(orig_imgs[0], cmap='gray', animated=True)
    im_recon = ax_recon.imshow(recon_imgs[0], cmap='gray', animated=True)
    im_both = ax_both.imshow(col_im(0), cmap='gray', animated=True)

    def update(frame):
        im_orig.set_array(orig_imgs[frame])
        im_recon.set_array(recon_imgs[frame])
        im_both.set_array(col_im(frame))
        return [im_orig, im_recon, im_both]

    ani = animation.FuncAnimation(fig, update, frames=num_frames, interval=50, blit=True)
    # ani.save('vqvae_animation_jax.gif', writer='pillow')
    ani.save('vqvae.mp4', writer='ffmpeg', fps=20)
    plt.close(fig)

In [None]:
if train_vq:
  vq_training(
      batch_size=32,
      num_epochs=20,
      lr=1e-3,
      latent_dim=vq_latent_dim,
      num_embeddings=vq_num_embeddings,
      commitment_cost=vq_commitment_cost,
      npix=128,
      vq_directory='./vqvae_checkpoints',
  )

## Shared Functions

In [None]:
def load_vq():
    print("🧊 Loading pre-trained VQ-VAE model...")
    vqvae_model = VQVAE(
        latent_dim=vq_latent_dim, num_embeddings=vq_num_embeddings, commitment_cost=vq_commitment_cost
    )
    key = jax.random.PRNGKey(42)
    key, vq_key = jax.random.split(key)
    dummy_vq_input = jnp.ones((1, npix, npix, 1))
    vq_params = vqvae_model.init(vq_key, dummy_vq_input)['params']
    vqvae_state = train_state.TrainState.create(
        apply_fn=vqvae_model.apply, params=vq_params, tx=optax.adam(1e-3)
    )
    vq_ckpt_dir = os.path.abspath('./vqvae_checkpoints')
    vqvae_state = checkpoints.restore_checkpoint(ckpt_dir=vq_ckpt_dir, target=vqvae_state)
    if vqvae_state is None:
        raise FileNotFoundError(f"No VQ-VAE checkpoint found in {vq_ckpt_dir}")
    print("✅ VQ-VAE model loaded successfully.")
    return vqvae_state, vqvae_model

vqvae_state, vqvae_model = load_vq()

🧊 Loading pre-trained VQ-VAE model...




✅ VQ-VAE model loaded successfully.


In [None]:
vq_reconstruction_animation(vqvae_state, vqvae_model)

In [None]:
def get_trajectory(num_frames):
    x_coords_np = np.linspace(0, 1, npix, endpoint=False) + .5 / npix
    xx_np, yy_np = np.meshgrid(x_coords_np, x_coords_np)
    points_np = np.random.uniform(0, 1, (num_points, 2))
    velocities_np = np.random.uniform(-0.005, 0.005, (num_points, 2))
    orig_imgs = []

    for _ in range(num_frames):
        frame = generate_image_numpy(points_np, xx_np, yy_np, sigma=0.01, npix=npix)
        orig_imgs.append(frame)
        points_np += velocities_np
        out_x = (points_np[:, 0] < 0) | (points_np[:, 0] > 1)
        velocities_np[out_x, 0] *= -1
        out_y = (points_np[:, 1] < 0) | (points_np[:, 1] > 1)
        velocities_np[out_y, 1] *= -1

    all_frames = jnp.array(np.stack(orig_imgs, axis=0)[..., np.newaxis])
    return all_frames, orig_imgs


def get_tokeniser_fns(vqvae_state):
    get_tokens_fn = jax.jit(
      lambda imgs: vqvae_state.apply_fn(
          {'params': vqvae_state.params},
          imgs,
          method=VQVAE.encode_to_indices
      ).reshape(imgs.shape[0], -1)
    )
    decode_fn = jax.jit(
      lambda indices: vqvae_state.apply_fn(
          {'params': vqvae_state.params},
          indices,
          method=VQVAE.decode_from_indices
      )
    )
    return get_tokens_fn, decode_fn


def animate_prediction(
    animation_path, orig_imgs, all_frames, all_tokens, predicted_token_grids, get_tokens_fn, decode_fn,
    history_len,
):
    print("🎥 Generating prediction and creating animation...")
    num_frames = len(all_frames)
    num_predictions = num_frames - history_len
    initial_frames = all_frames[:history_len]
    initial_tokens = get_tokens_fn(initial_frames).flatten()

    # Decode all predicted token grids into images in a single batch
    vmapped_decode_fn = jax.vmap(decode_fn)
    predicted_frames = vmapped_decode_fn(predicted_token_grids.reshape(num_predictions, 16, 16))

    check_decoder = vmapped_decode_fn(all_tokens.reshape(num_predictions+history_len, 16, 16))

    # Combine history with predictions
    pred_imgs = orig_imgs[:history_len] + [np.array(frame.squeeze()) for frame in predicted_frames]

    # --- Create and save the final animation ---
    fig, axes = plt.subplots(2, 2, figsize=(10, 10))
    titles = [
        'Ground Truth',
        'Transformer Prediction',
        'Truth (R), Prediction (G, B)',
        'Encode-Decode',
    ]
    im_data = [
        orig_imgs,
        pred_imgs,
        np.stack([orig_imgs, pred_imgs, pred_imgs], axis=-1),
        check_decoder,
    ]
    ims = []
    for ax, title, x in zip(axes.flatten(), titles, im_data):
        ax.set_title(title)
        ax.axis('off')
        ims.append(ax.imshow(x[0], cmap='gray', animated=True, vmin=0, vmax=1))
    plt.tight_layout()

    def update(frame):
        for im, data in zip(ims, im_data):
            im.set_array(data[frame])
        return ims

    ani = animation.FuncAnimation(fig, update, frames=num_frames, interval=50, blit=True)
    ani.save(animation_path, writer='ffmpeg', fps=20, dpi=100)
    plt.close(fig)
    print(f"✅ Animation saved to '{animation_path}'")
    return predicted_frames, check_decoder


def metrics(predicted_tokens, all_tokens, decoded_imgs_pred, decoded_imgs_gt):
    perc_correct = []
    for pred, gt in zip(predicted_tokens, all_tokens):
        perc_correct.append((pred == gt).mean().item() * 100)
    image_losses = []
    for pred, gt in zip(decoded_imgs_pred, decoded_imgs_gt):
        image_losses.append(np.mean((pred - gt) ** 2))
    out = {"Tokens Correct (%)": np.array(perc_correct), "Frame Losses": np.array(image_losses)}
    for k, vs in out.items():
      for i, v in enumerate(vs):
        print(f"{k} in the {i}th predicted frame: {v}%")
    return out


def get_batch(points, velocities, history_len, vqvae_state):
    def step_fn(carry, _):
        p, v = carry
        p_next = p + v
        v_next = jnp.where((p_next < 0) | (p_next > 1), -v, v)
        return (p_next, v_next), p_next

    _, points_sequence = jax.lax.scan(step_fn, (points, velocities), None, length=history_len + 1)
    points_sequence = jnp.transpose(points_sequence, (1, 0, 2, 3))

    x_coords = jnp.linspace(0, 1, npix, endpoint=False) + .5 / npix
    xx, yy = jnp.meshgrid(x_coords, x_coords)
    sigma = 0.01

    vmapped_gen_images = jax.vmap(jax.vmap(generate_images_from_points_jax, in_axes=(0, None, None, None)), in_axes=(0, None, None, None))
    images_sequence = vmapped_gen_images(points_sequence, xx, yy, sigma)
    images_sequence = images_sequence[..., jnp.newaxis]

    def get_tokens(images):
        return vqvae_state.apply_fn({'params': vqvae_state.params}, images, method=VQVAE.encode_to_indices)

    vmapped_get_tokens = jax.vmap(jax.vmap(get_tokens))
    token_indices = vmapped_get_tokens(images_sequence)
    batch = token_indices.reshape(token_indices.shape[0], -1)
    return batch


def generate_images_from_points_jax(points, xx, yy, sigma):
    """ JAX implementation of image generation, designed to be vectorized. """
    pixel_grid = jnp.stack([xx, yy], axis=-1)
    points = points[:, jnp.newaxis, jnp.newaxis, :]
    pixel_grid = pixel_grid[jnp.newaxis, :, :, :]
    dist_sq = jnp.sum((pixel_grid - points) ** 2, axis=-1)
    img = jnp.sum(jnp.exp(-dist_sq / (2 * sigma**2)), axis=0)
    img_max = jnp.max(img)
    img = jnp.where(img_max > 0, img / img_max, img)
    return img


def create_point_cloud_dataset(batch_size, history_len, num_points, num_samples=5000):
    """ Generator that yields batches of initial points and velocities. """
    num_batches = num_samples // batch_size
    for _ in range(num_batches):
        points_batch = np.random.uniform(0, 1, (batch_size, num_points, 2))
        velocities_batch = np.random.uniform(-0.005, 0.005, (batch_size, num_points, 2))
        yield jnp.array(points_batch), jnp.array(velocities_batch)

In [None]:
num_points = 200
history_len = 4
tokens_per_frame = 16 * 16
max_seq_len = (history_len + 1) * tokens_per_frame
transformer_embed_dim = 256
transformer_num_heads = 4
transformer_num_layers = 4
transformer_lr = 1e-4
transformer_batch_size = 256
transformer_epochs = 1000
top_k_sampling = 50
sampling_temperature = 1.2
key = jax.random.PRNGKey(42)

## Auto-Regressive Patch Transformer

In [None]:
class TokenTransformer(nn.Module):
    num_embeddings: int
    num_heads: int
    num_layers: int
    embed_dim: int
    max_len: int

    @nn.compact
    def __call__(self, x, training: bool):
        causal_mask = nn.make_causal_mask(x)
        tok_emb = nn.Embed(num_embeddings=self.num_embeddings, features=self.embed_dim)(x)
        pos_emb = self.param('pos_embedding', nn.initializers.normal(stddev=0.02), (1, self.max_len, self.embed_dim))
        x = tok_emb + pos_emb[:, :x.shape[1], :]
        x = nn.Dropout(rate=0.1)(x, deterministic=not training)

        for _ in range(self.num_layers):
            x_norm = nn.LayerNorm()(x)
            attn_output = nn.SelfAttention(
                num_heads=self.num_heads,
                dtype=jnp.bfloat16 # Use mixed precision for performance on A100
            )(x_norm, mask=causal_mask)
            attn_output = nn.Dropout(rate=0.1)(attn_output, deterministic=not training)
            x = x + attn_output
            x_norm = nn.LayerNorm()(x)
            y = nn.Dense(features=self.embed_dim * 4)(x_norm)
            y = nn.gelu(y)
            ff_output = nn.Dense(features=self.embed_dim)(y)
            ff_output = nn.Dropout(rate=0.1)(ff_output, deterministic=not training)
            x = x + ff_output
        x = nn.LayerNorm()(x)
        logits = nn.Dense(features=self.num_embeddings)(x)
        return logits


@partial(jax.jit, static_argnums=(5, 6, 7, 8))
def train_step(transformer_state, vqvae_state, points, velocities, key, model, history_len, npix, num_points):
    """ Fully JIT-compiled training step, including data generation. """
    dropout_key, data_key = jax.random.split(key)
    dropout_key = jax.random.fold_in(dropout_key, transformer_state.step)

    batch = get_batch(points, velocities, history_len, vqvae_state)

    inputs = batch[:, :-1]
    targets = batch[:, 1:]

    def loss_fn(params):
        logits = model.apply({'params': params}, inputs, training=True, rngs={'dropout': dropout_key})
        loss = optax.softmax_cross_entropy_with_integer_labels(
            logits=logits, labels=targets
        ).mean()
        return loss

    loss, grads = jax.value_and_grad(loss_fn)(transformer_state.params)
    transformer_state = transformer_state.apply_gradients(grads=grads)
    return transformer_state, loss

In [None]:
# --- Initialize Transformer Model ---
print("🤖 Initializing Transformer model...")
transformer_model = TokenTransformer(
    num_embeddings=vq_num_embeddings, num_heads=transformer_num_heads, num_layers=transformer_num_layers,
    embed_dim=transformer_embed_dim, max_len=max_seq_len
)
key, transformer_key, dropout_key = jax.random.split(key, 3)
dummy_transformer_input = jnp.ones((1, max_seq_len - 1), dtype=jnp.int32)
transformer_params = transformer_model.init(
    {'params': transformer_key, 'dropout': dropout_key}, dummy_transformer_input, training=False
)['params']
transformer_state = train_state.TrainState.create(
    apply_fn=transformer_model.apply, params=transformer_params, tx=optax.adam(learning_rate=transformer_lr)
)

# --- Define an absolute directory for checkpoints ---
ckpt_dir = os.path.abspath('./autoregressive_patch_transformer_checkpoints')
os.makedirs(ckpt_dir, exist_ok=True)

# --- Restore checkpoint if one exists ---
# This will load the latest checkpoint and update the state object in place.
# If no checkpoint exists, it returns the original state object.
transformer_state = checkpoints.restore_checkpoint(ckpt_dir=ckpt_dir, target=transformer_state)
print(f"Resuming training from step: {int(transformer_state.step)}")

🤖 Initializing Transformer model...




Resuming training from step: 18069


In [None]:
# # Check the checkpoint has loaded:
# data_loader = create_point_cloud_dataset(128, history_len, num_points)
# pbar = tqdm(data_loader)
# total_loss = 0
# for i, (points, velocities) in enumerate(pbar):
#     key, train_key = jax.random.split(key)
#     transformer_state, loss = train_step(
#         transformer_state, vqvae_state, points, velocities, train_key, transformer_model,
#         history_len, npix, num_points
#     )
#     total_loss += loss
# print(f"Avg Loss: {total_loss / (i+1):.4f}")

39it [01:14,  1.91s/it]


Avg Loss: 1.8119


In [None]:
# --- Phase 3: Train the Transformer ---
print("🚀 Starting Transformer training...")
for epoch in range(transformer_epochs):
    data_loader = create_point_cloud_dataset(
        transformer_batch_size, history_len, num_points
    )
    pbar = tqdm(data_loader, desc=f"Epoch {epoch + 1}/{transformer_epochs}")
    total_loss = 0
    for i, (points, velocities) in enumerate(pbar):
        key, train_key = jax.random.split(key)
        transformer_state, loss = train_step(
            transformer_state, vqvae_state, points, velocities, train_key, transformer_model,
            history_len, npix, num_points
        )
        total_loss += loss
        pbar.set_postfix(loss=f"{loss:.4f}")

    if epoch % 50 == 0:
        # --- Save checkpoint at the end of the epoch ---
        checkpoints.save_checkpoint(
            ckpt_dir=ckpt_dir,
            target=transformer_state,
            step=transformer_state.step,
            overwrite=False, # Set to True to save only the latest checkpoint
            keep=3          # Keep the 3 most recent checkpoints
        )
    print(f"Epoch {epoch + 1} Avg Loss: {total_loss / (i+1):.4f}")


🚀 Starting Transformer training...


Epoch 1/1000: 19it [00:46,  2.47s/it, loss=5.0022]


Epoch 1 Avg Loss: 5.4344


Epoch 2/1000: 19it [00:07,  2.45it/s, loss=4.7136]


Epoch 2 Avg Loss: 4.8299


Epoch 3/1000: 19it [00:07,  2.45it/s, loss=4.5732]


Epoch 3 Avg Loss: 4.6356


Epoch 4/1000: 19it [00:07,  2.45it/s, loss=4.4915]


Epoch 4 Avg Loss: 4.5275


Epoch 5/1000: 19it [00:07,  2.45it/s, loss=4.4357]


Epoch 5 Avg Loss: 4.4555


Epoch 6/1000: 19it [00:07,  2.45it/s, loss=4.3890]


Epoch 6 Avg Loss: 4.3991


Epoch 7/1000: 19it [00:07,  2.45it/s, loss=4.3542]


Epoch 7 Avg Loss: 4.3642


Epoch 8/1000: 19it [00:07,  2.45it/s, loss=4.3240]


Epoch 8 Avg Loss: 4.3428


Epoch 9/1000: 19it [00:07,  2.45it/s, loss=4.3148]


Epoch 9 Avg Loss: 4.3247


Epoch 10/1000: 19it [00:07,  2.44it/s, loss=4.3073]


Epoch 10 Avg Loss: 4.3157


Epoch 11/1000: 19it [00:07,  2.44it/s, loss=4.3003]


Epoch 11 Avg Loss: 4.3035


Epoch 12/1000: 19it [00:07,  2.44it/s, loss=4.3002]


Epoch 12 Avg Loss: 4.2987


Epoch 13/1000: 19it [00:07,  2.44it/s, loss=4.2859]


Epoch 13 Avg Loss: 4.2930


Epoch 14/1000: 19it [00:07,  2.44it/s, loss=4.2801]


Epoch 14 Avg Loss: 4.2852


Epoch 15/1000: 19it [00:07,  2.44it/s, loss=4.2716]


Epoch 15 Avg Loss: 4.2810


Epoch 16/1000: 19it [00:07,  2.44it/s, loss=4.2770]


Epoch 16 Avg Loss: 4.2787


Epoch 17/1000: 19it [00:07,  2.44it/s, loss=4.2732]


Epoch 17 Avg Loss: 4.2746


Epoch 18/1000: 19it [00:07,  2.45it/s, loss=4.2699]


Epoch 18 Avg Loss: 4.2707


Epoch 19/1000: 19it [00:07,  2.44it/s, loss=4.2731]


Epoch 19 Avg Loss: 4.2690


Epoch 20/1000: 19it [00:07,  2.44it/s, loss=4.2622]


Epoch 20 Avg Loss: 4.2646


Epoch 21/1000: 19it [00:07,  2.44it/s, loss=4.2559]


Epoch 21 Avg Loss: 4.2631


Epoch 22/1000: 19it [00:07,  2.44it/s, loss=4.2619]


Epoch 22 Avg Loss: 4.2601


Epoch 23/1000: 19it [00:07,  2.44it/s, loss=4.2491]


Epoch 23 Avg Loss: 4.2568


Epoch 24/1000: 19it [00:07,  2.45it/s, loss=4.2635]


Epoch 24 Avg Loss: 4.2556


Epoch 25/1000: 19it [00:07,  2.44it/s, loss=4.2583]


Epoch 25 Avg Loss: 4.2545


Epoch 26/1000: 19it [00:07,  2.45it/s, loss=4.2589]


Epoch 26 Avg Loss: 4.2534


Epoch 27/1000: 19it [00:07,  2.44it/s, loss=4.2470]


Epoch 27 Avg Loss: 4.2523


Epoch 28/1000: 19it [00:07,  2.44it/s, loss=4.2583]


Epoch 28 Avg Loss: 4.2504


Epoch 29/1000: 19it [00:07,  2.44it/s, loss=4.2364]


Epoch 29 Avg Loss: 4.2478


Epoch 30/1000: 19it [00:07,  2.44it/s, loss=4.2471]


Epoch 30 Avg Loss: 4.2451


Epoch 31/1000: 19it [00:07,  2.44it/s, loss=4.2530]


Epoch 31 Avg Loss: 4.2439


Epoch 32/1000: 19it [00:07,  2.45it/s, loss=4.2455]


Epoch 32 Avg Loss: 4.2450


Epoch 33/1000: 19it [00:07,  2.44it/s, loss=4.2581]


Epoch 33 Avg Loss: 4.2443


Epoch 34/1000: 19it [00:07,  2.44it/s, loss=4.2452]


Epoch 34 Avg Loss: 4.2443


Epoch 35/1000: 19it [00:07,  2.44it/s, loss=4.2429]


Epoch 35 Avg Loss: 4.2422


Epoch 36/1000: 19it [00:07,  2.45it/s, loss=4.2362]


Epoch 36 Avg Loss: 4.2404


Epoch 37/1000: 19it [00:07,  2.45it/s, loss=4.2451]


Epoch 37 Avg Loss: 4.2389


Epoch 38/1000: 19it [00:07,  2.45it/s, loss=4.2339]


Epoch 38 Avg Loss: 4.2390


Epoch 39/1000: 19it [00:07,  2.45it/s, loss=4.2425]


Epoch 39 Avg Loss: 4.2349


Epoch 40/1000: 19it [00:07,  2.45it/s, loss=4.2323]


Epoch 40 Avg Loss: 4.2352


Epoch 41/1000: 19it [00:07,  2.45it/s, loss=4.2286]


Epoch 41 Avg Loss: 4.2350


Epoch 42/1000: 19it [00:07,  2.45it/s, loss=4.2274]


Epoch 42 Avg Loss: 4.2327


Epoch 43/1000: 19it [00:07,  2.45it/s, loss=4.2268]


Epoch 43 Avg Loss: 4.2348


Epoch 44/1000: 19it [00:07,  2.45it/s, loss=4.2336]


Epoch 44 Avg Loss: 4.2349


Epoch 45/1000: 19it [00:07,  2.45it/s, loss=4.2225]


Epoch 45 Avg Loss: 4.2317


Epoch 46/1000: 19it [00:07,  2.45it/s, loss=4.2435]


Epoch 46 Avg Loss: 4.2314


Epoch 47/1000: 19it [00:07,  2.45it/s, loss=4.2362]


Epoch 47 Avg Loss: 4.2313


Epoch 48/1000: 19it [00:07,  2.45it/s, loss=4.2312]


Epoch 48 Avg Loss: 4.2305


Epoch 49/1000: 19it [00:07,  2.45it/s, loss=4.2410]


Epoch 49 Avg Loss: 4.2292


Epoch 50/1000: 19it [00:07,  2.45it/s, loss=4.2325]


Epoch 50 Avg Loss: 4.2277


Epoch 51/1000: 19it [00:07,  2.45it/s, loss=4.2192]


Epoch 51 Avg Loss: 4.2265


Epoch 52/1000: 19it [00:07,  2.45it/s, loss=4.2256]


Epoch 52 Avg Loss: 4.2269


Epoch 53/1000: 19it [00:07,  2.45it/s, loss=4.2232]


Epoch 53 Avg Loss: 4.2262


Epoch 54/1000: 19it [00:07,  2.45it/s, loss=4.2218]


Epoch 54 Avg Loss: 4.2244


Epoch 55/1000: 19it [00:07,  2.45it/s, loss=4.2199]


Epoch 55 Avg Loss: 4.2247


Epoch 56/1000: 19it [00:07,  2.45it/s, loss=4.2268]


Epoch 56 Avg Loss: 4.2233


Epoch 57/1000: 19it [00:07,  2.45it/s, loss=4.2277]


Epoch 57 Avg Loss: 4.2236


Epoch 58/1000: 19it [00:07,  2.45it/s, loss=4.2356]


Epoch 58 Avg Loss: 4.2218


Epoch 59/1000: 19it [00:07,  2.45it/s, loss=4.2241]


Epoch 59 Avg Loss: 4.2244


Epoch 60/1000: 19it [00:07,  2.45it/s, loss=4.2230]


Epoch 60 Avg Loss: 4.2195


Epoch 61/1000: 19it [00:07,  2.45it/s, loss=4.2170]


Epoch 61 Avg Loss: 4.2207


Epoch 62/1000: 19it [00:07,  2.45it/s, loss=4.2175]


Epoch 62 Avg Loss: 4.2181


Epoch 63/1000: 19it [00:07,  2.45it/s, loss=4.2170]


Epoch 63 Avg Loss: 4.2200


Epoch 64/1000: 19it [00:07,  2.45it/s, loss=4.2194]


Epoch 64 Avg Loss: 4.2180


Epoch 65/1000: 19it [00:07,  2.44it/s, loss=4.2151]


Epoch 65 Avg Loss: 4.2164


Epoch 66/1000: 19it [00:07,  2.44it/s, loss=4.2251]


Epoch 66 Avg Loss: 4.2173


Epoch 67/1000: 19it [00:07,  2.44it/s, loss=4.2102]


Epoch 67 Avg Loss: 4.2174


Epoch 68/1000: 19it [00:07,  2.44it/s, loss=4.2130]


Epoch 68 Avg Loss: 4.2164


Epoch 69/1000: 19it [00:07,  2.44it/s, loss=4.2111]


Epoch 69 Avg Loss: 4.2168


Epoch 70/1000: 19it [00:07,  2.45it/s, loss=4.2167]


Epoch 70 Avg Loss: 4.2161


Epoch 71/1000: 19it [00:07,  2.45it/s, loss=4.2151]


Epoch 71 Avg Loss: 4.2133


Epoch 72/1000: 19it [00:07,  2.45it/s, loss=4.2172]


Epoch 72 Avg Loss: 4.2149


Epoch 73/1000: 19it [00:07,  2.45it/s, loss=4.2220]


Epoch 73 Avg Loss: 4.2175


Epoch 74/1000: 19it [00:07,  2.45it/s, loss=4.2244]


Epoch 74 Avg Loss: 4.2145


Epoch 75/1000: 19it [00:07,  2.45it/s, loss=4.2124]


Epoch 75 Avg Loss: 4.2140


Epoch 76/1000: 19it [00:07,  2.45it/s, loss=4.2166]


Epoch 76 Avg Loss: 4.2121


Epoch 77/1000: 19it [00:07,  2.45it/s, loss=4.2103]


Epoch 77 Avg Loss: 4.2101


Epoch 78/1000: 19it [00:07,  2.45it/s, loss=4.2178]


Epoch 78 Avg Loss: 4.2130


Epoch 79/1000: 19it [00:07,  2.45it/s, loss=4.2078]


Epoch 79 Avg Loss: 4.2105


Epoch 80/1000: 19it [00:07,  2.45it/s, loss=4.2094]


Epoch 80 Avg Loss: 4.2106


Epoch 81/1000: 19it [00:07,  2.45it/s, loss=4.2071]


Epoch 81 Avg Loss: 4.2083


Epoch 82/1000: 19it [00:07,  2.45it/s, loss=4.2195]


Epoch 82 Avg Loss: 4.2131


Epoch 83/1000: 19it [00:07,  2.45it/s, loss=4.2055]


Epoch 83 Avg Loss: 4.2094


Epoch 84/1000: 19it [00:07,  2.45it/s, loss=4.2069]


Epoch 84 Avg Loss: 4.2100


Epoch 85/1000: 19it [00:07,  2.45it/s, loss=4.2185]


Epoch 85 Avg Loss: 4.2099


Epoch 86/1000: 19it [00:07,  2.45it/s, loss=4.2004]


Epoch 86 Avg Loss: 4.2076


Epoch 87/1000: 19it [00:07,  2.45it/s, loss=4.2148]


Epoch 87 Avg Loss: 4.2083


Epoch 88/1000: 19it [00:07,  2.45it/s, loss=4.2065]


Epoch 88 Avg Loss: 4.2081


Epoch 89/1000: 19it [00:07,  2.45it/s, loss=4.2132]


Epoch 89 Avg Loss: 4.2083


Epoch 90/1000: 19it [00:07,  2.45it/s, loss=4.2133]


Epoch 90 Avg Loss: 4.2069


Epoch 91/1000: 19it [00:07,  2.45it/s, loss=4.2040]


Epoch 91 Avg Loss: 4.2075


Epoch 92/1000: 19it [00:07,  2.45it/s, loss=4.2025]


Epoch 92 Avg Loss: 4.2075


Epoch 93/1000: 19it [00:07,  2.45it/s, loss=4.1976]


Epoch 93 Avg Loss: 4.2054


Epoch 94/1000: 19it [00:07,  2.45it/s, loss=4.2057]


Epoch 94 Avg Loss: 4.2059


Epoch 95/1000: 19it [00:07,  2.45it/s, loss=4.1968]


Epoch 95 Avg Loss: 4.2054


Epoch 96/1000: 19it [00:07,  2.45it/s, loss=4.2150]


Epoch 96 Avg Loss: 4.2067


Epoch 97/1000: 19it [00:07,  2.45it/s, loss=4.2026]


Epoch 97 Avg Loss: 4.2061


Epoch 98/1000: 19it [00:07,  2.45it/s, loss=4.1947]


Epoch 98 Avg Loss: 4.2039


Epoch 99/1000: 19it [00:07,  2.45it/s, loss=4.2074]


Epoch 99 Avg Loss: 4.2050


Epoch 100/1000: 19it [00:07,  2.45it/s, loss=4.2039]


Epoch 100 Avg Loss: 4.2038


Epoch 101/1000: 19it [00:07,  2.45it/s, loss=4.2042]


Epoch 101 Avg Loss: 4.2044


Epoch 102/1000: 19it [00:07,  2.45it/s, loss=4.2037]


Epoch 102 Avg Loss: 4.2014


Epoch 103/1000: 19it [00:07,  2.45it/s, loss=4.2013]


Epoch 103 Avg Loss: 4.2032


Epoch 104/1000: 19it [00:07,  2.45it/s, loss=4.1967]


Epoch 104 Avg Loss: 4.2025


Epoch 105/1000: 19it [00:07,  2.45it/s, loss=4.1934]


Epoch 105 Avg Loss: 4.2028


Epoch 106/1000: 19it [00:07,  2.45it/s, loss=4.2155]


Epoch 106 Avg Loss: 4.2036


Epoch 107/1000: 19it [00:07,  2.45it/s, loss=4.2089]


Epoch 107 Avg Loss: 4.2020


Epoch 108/1000: 19it [00:07,  2.46it/s, loss=4.1990]


Epoch 108 Avg Loss: 4.2047


Epoch 109/1000: 19it [00:07,  2.45it/s, loss=4.2067]


Epoch 109 Avg Loss: 4.2025


Epoch 110/1000: 19it [00:07,  2.45it/s, loss=4.2030]


Epoch 110 Avg Loss: 4.1996


Epoch 111/1000: 19it [00:07,  2.45it/s, loss=4.2027]


Epoch 111 Avg Loss: 4.1995


Epoch 112/1000: 19it [00:07,  2.45it/s, loss=4.1999]


Epoch 112 Avg Loss: 4.1990


Epoch 113/1000: 19it [00:07,  2.45it/s, loss=4.1978]


Epoch 113 Avg Loss: 4.1969


Epoch 114/1000: 19it [00:07,  2.45it/s, loss=4.2086]


Epoch 114 Avg Loss: 4.1979


Epoch 115/1000: 19it [00:07,  2.45it/s, loss=4.1981]


Epoch 115 Avg Loss: 4.2007


Epoch 116/1000: 19it [00:07,  2.45it/s, loss=4.1928]


Epoch 116 Avg Loss: 4.1945


Epoch 117/1000: 19it [00:07,  2.45it/s, loss=4.2058]


Epoch 117 Avg Loss: 4.1975


Epoch 118/1000: 19it [00:07,  2.45it/s, loss=4.1886]


Epoch 118 Avg Loss: 4.1943


Epoch 119/1000: 19it [00:07,  2.45it/s, loss=4.1925]


Epoch 119 Avg Loss: 4.1969


Epoch 120/1000: 19it [00:07,  2.45it/s, loss=4.1888]


Epoch 120 Avg Loss: 4.1936


Epoch 121/1000: 19it [00:07,  2.45it/s, loss=4.1877]


Epoch 121 Avg Loss: 4.1931


Epoch 122/1000: 19it [00:07,  2.45it/s, loss=4.1875]


Epoch 122 Avg Loss: 4.1919


Epoch 123/1000: 19it [00:07,  2.45it/s, loss=4.1670]


Epoch 123 Avg Loss: 4.1855


Epoch 124/1000: 19it [00:07,  2.45it/s, loss=4.1770]


Epoch 124 Avg Loss: 4.1803


Epoch 125/1000: 19it [00:07,  2.45it/s, loss=4.1709]


Epoch 125 Avg Loss: 4.1741


Epoch 126/1000: 19it [00:07,  2.45it/s, loss=4.1616]


Epoch 126 Avg Loss: 4.1649


Epoch 127/1000: 19it [00:07,  2.45it/s, loss=4.1414]


Epoch 127 Avg Loss: 4.1476


Epoch 128/1000: 19it [00:07,  2.45it/s, loss=4.1238]


Epoch 128 Avg Loss: 4.1298


Epoch 129/1000: 19it [00:07,  2.45it/s, loss=4.1099]


Epoch 129 Avg Loss: 4.1160


Epoch 130/1000: 19it [00:07,  2.45it/s, loss=4.0894]


Epoch 130 Avg Loss: 4.0978


Epoch 131/1000: 19it [00:07,  2.45it/s, loss=4.0584]


Epoch 131 Avg Loss: 4.0740


Epoch 132/1000: 19it [00:07,  2.45it/s, loss=4.0339]


Epoch 132 Avg Loss: 4.0548


Epoch 133/1000: 19it [00:07,  2.45it/s, loss=4.0286]


Epoch 133 Avg Loss: 4.0343


Epoch 134/1000: 19it [00:07,  2.45it/s, loss=3.9992]


Epoch 134 Avg Loss: 4.0111


Epoch 135/1000: 19it [00:07,  2.45it/s, loss=3.9777]


Epoch 135 Avg Loss: 3.9893


Epoch 136/1000: 19it [00:07,  2.45it/s, loss=3.9422]


Epoch 136 Avg Loss: 3.9572


Epoch 137/1000: 19it [00:07,  2.45it/s, loss=3.8990]


Epoch 137 Avg Loss: 3.9176


Epoch 138/1000: 19it [00:07,  2.45it/s, loss=3.8300]


Epoch 138 Avg Loss: 3.8671


Epoch 139/1000: 19it [00:07,  2.45it/s, loss=3.7839]


Epoch 139 Avg Loss: 3.8099


Epoch 140/1000: 19it [00:07,  2.45it/s, loss=3.7447]


Epoch 140 Avg Loss: 3.7647


Epoch 141/1000: 19it [00:07,  2.45it/s, loss=3.6938]


Epoch 141 Avg Loss: 3.7203


Epoch 142/1000: 19it [00:07,  2.45it/s, loss=3.6034]


Epoch 142 Avg Loss: 3.6498


Epoch 143/1000: 19it [00:07,  2.45it/s, loss=3.4870]


Epoch 143 Avg Loss: 3.5394


Epoch 144/1000: 19it [00:07,  2.45it/s, loss=3.2709]


Epoch 144 Avg Loss: 3.3797


Epoch 145/1000: 19it [00:07,  2.45it/s, loss=2.9652]


Epoch 145 Avg Loss: 3.1216


Epoch 146/1000: 19it [00:07,  2.45it/s, loss=2.6528]


Epoch 146 Avg Loss: 2.7952


Epoch 147/1000: 19it [00:07,  2.45it/s, loss=2.4512]


Epoch 147 Avg Loss: 2.5428


Epoch 148/1000: 19it [00:07,  2.45it/s, loss=2.3429]


Epoch 148 Avg Loss: 2.3911


Epoch 149/1000: 19it [00:07,  2.45it/s, loss=2.2777]


Epoch 149 Avg Loss: 2.3095


Epoch 150/1000: 19it [00:07,  2.45it/s, loss=2.2549]


Epoch 150 Avg Loss: 2.2648


Epoch 151/1000: 19it [00:07,  2.45it/s, loss=2.2294]


Epoch 151 Avg Loss: 2.2391


Epoch 152/1000: 19it [00:07,  2.45it/s, loss=2.2106]


Epoch 152 Avg Loss: 2.2220


Epoch 153/1000: 19it [00:07,  2.45it/s, loss=2.2041]


Epoch 153 Avg Loss: 2.2119


Epoch 154/1000: 19it [00:07,  2.45it/s, loss=2.1999]


Epoch 154 Avg Loss: 2.2031


Epoch 155/1000: 19it [00:07,  2.44it/s, loss=2.1868]


Epoch 155 Avg Loss: 2.1916


Epoch 156/1000: 19it [00:07,  2.45it/s, loss=2.1739]


Epoch 156 Avg Loss: 2.1791


Epoch 157/1000: 19it [00:07,  2.45it/s, loss=2.1625]


Epoch 157 Avg Loss: 2.1720


Epoch 158/1000: 19it [00:07,  2.45it/s, loss=2.1640]


Epoch 158 Avg Loss: 2.1623


Epoch 159/1000: 19it [00:07,  2.45it/s, loss=2.1477]


Epoch 159 Avg Loss: 2.1532


Epoch 160/1000: 19it [00:07,  2.45it/s, loss=2.1368]


Epoch 160 Avg Loss: 2.1479


Epoch 161/1000: 19it [00:07,  2.45it/s, loss=2.1361]


Epoch 161 Avg Loss: 2.1428


Epoch 162/1000: 19it [00:07,  2.45it/s, loss=2.1372]


Epoch 162 Avg Loss: 2.1391


Epoch 163/1000: 19it [00:07,  2.45it/s, loss=2.1351]


Epoch 163 Avg Loss: 2.1345


Epoch 164/1000: 19it [00:07,  2.45it/s, loss=2.1293]


Epoch 164 Avg Loss: 2.1315


Epoch 165/1000: 19it [00:07,  2.45it/s, loss=2.1256]


Epoch 165 Avg Loss: 2.1272


Epoch 166/1000: 19it [00:07,  2.45it/s, loss=2.1146]


Epoch 166 Avg Loss: 2.1225


Epoch 167/1000: 19it [00:07,  2.45it/s, loss=2.1192]


Epoch 167 Avg Loss: 2.1188


Epoch 168/1000: 19it [00:07,  2.45it/s, loss=2.1159]


Epoch 168 Avg Loss: 2.1156


Epoch 169/1000: 19it [00:07,  2.44it/s, loss=2.1083]


Epoch 169 Avg Loss: 2.1134


Epoch 170/1000: 19it [00:07,  2.45it/s, loss=2.1108]


Epoch 170 Avg Loss: 2.1111


Epoch 171/1000: 19it [00:07,  2.45it/s, loss=2.1077]


Epoch 171 Avg Loss: 2.1077


Epoch 172/1000: 19it [00:07,  2.45it/s, loss=2.1000]


Epoch 172 Avg Loss: 2.1041


Epoch 173/1000: 19it [00:07,  2.45it/s, loss=2.1003]


Epoch 173 Avg Loss: 2.1004


Epoch 174/1000: 19it [00:07,  2.45it/s, loss=2.0966]


Epoch 174 Avg Loss: 2.0977


Epoch 175/1000: 19it [00:07,  2.45it/s, loss=2.0923]


Epoch 175 Avg Loss: 2.0950


Epoch 176/1000: 19it [00:07,  2.44it/s, loss=2.0902]


Epoch 176 Avg Loss: 2.0939


Epoch 177/1000: 19it [00:07,  2.45it/s, loss=2.0946]


Epoch 177 Avg Loss: 2.0941


Epoch 178/1000: 19it [00:07,  2.45it/s, loss=2.0950]


Epoch 178 Avg Loss: 2.0914


Epoch 179/1000: 19it [00:07,  2.45it/s, loss=2.0949]


Epoch 179 Avg Loss: 2.0889


Epoch 180/1000: 19it [00:07,  2.45it/s, loss=2.0903]


Epoch 180 Avg Loss: 2.0890


Epoch 181/1000: 19it [00:07,  2.45it/s, loss=2.0901]


Epoch 181 Avg Loss: 2.0882


Epoch 182/1000: 19it [00:07,  2.45it/s, loss=2.0834]


Epoch 182 Avg Loss: 2.0875


Epoch 183/1000: 19it [00:07,  2.45it/s, loss=2.0809]


Epoch 183 Avg Loss: 2.0851


Epoch 184/1000: 19it [00:07,  2.45it/s, loss=2.0843]


Epoch 184 Avg Loss: 2.0842


Epoch 185/1000: 19it [00:07,  2.45it/s, loss=2.0822]


Epoch 185 Avg Loss: 2.0830


Epoch 186/1000: 19it [00:07,  2.45it/s, loss=2.0806]


Epoch 186 Avg Loss: 2.0810


Epoch 187/1000: 19it [00:07,  2.45it/s, loss=2.0799]


Epoch 187 Avg Loss: 2.0802


Epoch 188/1000: 19it [00:07,  2.45it/s, loss=2.0797]


Epoch 188 Avg Loss: 2.0789


Epoch 189/1000: 19it [00:07,  2.45it/s, loss=2.0758]


Epoch 189 Avg Loss: 2.0781


Epoch 190/1000: 19it [00:07,  2.45it/s, loss=2.0796]


Epoch 190 Avg Loss: 2.0786


Epoch 191/1000: 19it [00:07,  2.45it/s, loss=2.0768]


Epoch 191 Avg Loss: 2.0767


Epoch 192/1000: 19it [00:07,  2.45it/s, loss=2.0766]


Epoch 192 Avg Loss: 2.0766


Epoch 193/1000: 19it [00:07,  2.45it/s, loss=2.0726]


Epoch 193 Avg Loss: 2.0732


Epoch 194/1000: 19it [00:07,  2.45it/s, loss=2.0750]


Epoch 194 Avg Loss: 2.0743


Epoch 195/1000: 19it [00:07,  2.45it/s, loss=2.0736]


Epoch 195 Avg Loss: 2.0734


Epoch 196/1000: 19it [00:07,  2.45it/s, loss=2.0736]


Epoch 196 Avg Loss: 2.0743


Epoch 197/1000: 19it [00:07,  2.45it/s, loss=2.0762]


Epoch 197 Avg Loss: 2.0723


Epoch 198/1000: 19it [00:07,  2.45it/s, loss=2.0642]


Epoch 198 Avg Loss: 2.0703


Epoch 199/1000: 19it [00:07,  2.45it/s, loss=2.0702]


Epoch 199 Avg Loss: 2.0710


Epoch 200/1000: 19it [00:07,  2.45it/s, loss=2.0674]


Epoch 200 Avg Loss: 2.0703


Epoch 201/1000: 19it [00:07,  2.45it/s, loss=2.0692]


Epoch 201 Avg Loss: 2.0710


Epoch 202/1000: 19it [00:07,  2.45it/s, loss=2.0604]


Epoch 202 Avg Loss: 2.0668


Epoch 203/1000: 19it [00:07,  2.45it/s, loss=2.0676]


Epoch 203 Avg Loss: 2.0671


Epoch 204/1000: 19it [00:07,  2.45it/s, loss=2.0661]


Epoch 204 Avg Loss: 2.0652


Epoch 205/1000: 19it [00:07,  2.45it/s, loss=2.0651]


Epoch 205 Avg Loss: 2.0651


Epoch 206/1000: 19it [00:07,  2.45it/s, loss=2.0608]


Epoch 206 Avg Loss: 2.0658


Epoch 207/1000: 19it [00:07,  2.45it/s, loss=2.0571]


Epoch 207 Avg Loss: 2.0630


Epoch 208/1000: 19it [00:07,  2.45it/s, loss=2.0619]


Epoch 208 Avg Loss: 2.0623


Epoch 209/1000: 19it [00:07,  2.45it/s, loss=2.0585]


Epoch 209 Avg Loss: 2.0622


Epoch 210/1000: 19it [00:07,  2.45it/s, loss=2.0579]


Epoch 210 Avg Loss: 2.0609


Epoch 211/1000: 19it [00:07,  2.45it/s, loss=2.0565]


Epoch 211 Avg Loss: 2.0612


Epoch 212/1000: 19it [00:07,  2.45it/s, loss=2.0607]


Epoch 212 Avg Loss: 2.0601


Epoch 213/1000: 19it [00:07,  2.45it/s, loss=2.0620]


Epoch 213 Avg Loss: 2.0600


Epoch 214/1000: 19it [00:07,  2.45it/s, loss=2.0573]


Epoch 214 Avg Loss: 2.0603


Epoch 215/1000: 19it [00:07,  2.45it/s, loss=2.0520]


Epoch 215 Avg Loss: 2.0599


Epoch 216/1000: 19it [00:07,  2.45it/s, loss=2.0646]


Epoch 216 Avg Loss: 2.0598


Epoch 217/1000: 19it [00:07,  2.45it/s, loss=2.0590]


Epoch 217 Avg Loss: 2.0581


Epoch 218/1000: 19it [00:07,  2.45it/s, loss=2.0525]


Epoch 218 Avg Loss: 2.0577


Epoch 219/1000: 19it [00:07,  2.45it/s, loss=2.0587]


Epoch 219 Avg Loss: 2.0582


Epoch 220/1000: 19it [00:07,  2.45it/s, loss=2.0597]


Epoch 220 Avg Loss: 2.0570


Epoch 221/1000: 19it [00:07,  2.45it/s, loss=2.0548]


Epoch 221 Avg Loss: 2.0579


Epoch 222/1000: 19it [00:07,  2.45it/s, loss=2.0541]


Epoch 222 Avg Loss: 2.0570


Epoch 223/1000: 19it [00:07,  2.45it/s, loss=2.0577]


Epoch 223 Avg Loss: 2.0570


Epoch 224/1000: 19it [00:07,  2.45it/s, loss=2.0625]


Epoch 224 Avg Loss: 2.0566


Epoch 225/1000: 19it [00:07,  2.45it/s, loss=2.0545]


Epoch 225 Avg Loss: 2.0545


Epoch 226/1000: 19it [00:07,  2.45it/s, loss=2.0522]


Epoch 226 Avg Loss: 2.0561


Epoch 227/1000: 19it [00:07,  2.45it/s, loss=2.0519]


Epoch 227 Avg Loss: 2.0570


Epoch 228/1000: 19it [00:07,  2.45it/s, loss=2.0549]


Epoch 228 Avg Loss: 2.0553


Epoch 229/1000: 19it [00:07,  2.45it/s, loss=2.0535]


Epoch 229 Avg Loss: 2.0536


Epoch 230/1000: 19it [00:07,  2.45it/s, loss=2.0473]


Epoch 230 Avg Loss: 2.0538


Epoch 231/1000: 19it [00:07,  2.46it/s, loss=2.0530]


Epoch 231 Avg Loss: 2.0534


Epoch 232/1000: 19it [00:07,  2.45it/s, loss=2.0577]


Epoch 232 Avg Loss: 2.0536


Epoch 233/1000: 19it [00:07,  2.45it/s, loss=2.0491]


Epoch 233 Avg Loss: 2.0512


Epoch 234/1000: 19it [00:07,  2.45it/s, loss=2.0560]


Epoch 234 Avg Loss: 2.0500


Epoch 235/1000: 19it [00:07,  2.44it/s, loss=2.0511]


Epoch 235 Avg Loss: 2.0513


Epoch 236/1000: 19it [00:07,  2.44it/s, loss=2.0517]


Epoch 236 Avg Loss: 2.0524


Epoch 237/1000: 19it [00:07,  2.44it/s, loss=2.0546]


Epoch 237 Avg Loss: 2.0519


Epoch 238/1000: 19it [00:07,  2.44it/s, loss=2.0497]


Epoch 238 Avg Loss: 2.0510


Epoch 239/1000: 19it [00:07,  2.44it/s, loss=2.0460]


Epoch 239 Avg Loss: 2.0484


Epoch 240/1000: 19it [00:07,  2.45it/s, loss=2.0518]


Epoch 240 Avg Loss: 2.0500


Epoch 241/1000: 19it [00:07,  2.45it/s, loss=2.0499]


Epoch 241 Avg Loss: 2.0501


Epoch 242/1000: 19it [00:07,  2.45it/s, loss=2.0492]


Epoch 242 Avg Loss: 2.0500


Epoch 243/1000: 19it [00:07,  2.45it/s, loss=2.0424]


Epoch 243 Avg Loss: 2.0479


Epoch 244/1000: 19it [00:07,  2.45it/s, loss=2.0410]


Epoch 244 Avg Loss: 2.0485


Epoch 245/1000: 19it [00:07,  2.45it/s, loss=2.0456]


Epoch 245 Avg Loss: 2.0496


Epoch 246/1000: 19it [00:07,  2.44it/s, loss=2.0488]


Epoch 246 Avg Loss: 2.0485


Epoch 247/1000: 19it [00:07,  2.44it/s, loss=2.0505]


Epoch 247 Avg Loss: 2.0490


Epoch 248/1000: 19it [00:07,  2.45it/s, loss=2.0528]


Epoch 248 Avg Loss: 2.0485


Epoch 249/1000: 19it [00:07,  2.45it/s, loss=2.0443]


Epoch 249 Avg Loss: 2.0491


Epoch 250/1000: 19it [00:07,  2.45it/s, loss=2.0512]


Epoch 250 Avg Loss: 2.0478


Epoch 251/1000: 19it [00:07,  2.45it/s, loss=2.0470]


Epoch 251 Avg Loss: 2.0481


Epoch 252/1000: 19it [00:07,  2.45it/s, loss=2.0476]


Epoch 252 Avg Loss: 2.0470


Epoch 253/1000: 19it [00:07,  2.45it/s, loss=2.0477]


Epoch 253 Avg Loss: 2.0487


Epoch 254/1000: 19it [00:07,  2.44it/s, loss=2.0475]


Epoch 254 Avg Loss: 2.0477


Epoch 255/1000: 19it [00:07,  2.44it/s, loss=2.0476]


Epoch 255 Avg Loss: 2.0471


Epoch 256/1000: 19it [00:07,  2.44it/s, loss=2.0516]


Epoch 256 Avg Loss: 2.0472


Epoch 257/1000: 19it [00:07,  2.44it/s, loss=2.0453]


Epoch 257 Avg Loss: 2.0467


Epoch 258/1000: 19it [00:07,  2.45it/s, loss=2.0451]


Epoch 258 Avg Loss: 2.0450


Epoch 259/1000: 19it [00:07,  2.44it/s, loss=2.0430]


Epoch 259 Avg Loss: 2.0474


Epoch 260/1000: 19it [00:07,  2.44it/s, loss=2.0435]


Epoch 260 Avg Loss: 2.0464


Epoch 261/1000: 19it [00:07,  2.45it/s, loss=2.0459]


Epoch 261 Avg Loss: 2.0441


Epoch 262/1000: 19it [00:07,  2.44it/s, loss=2.0512]


Epoch 262 Avg Loss: 2.0460


Epoch 263/1000: 19it [00:07,  2.45it/s, loss=2.0410]


Epoch 263 Avg Loss: 2.0459


Epoch 264/1000: 19it [00:07,  2.45it/s, loss=2.0479]


Epoch 264 Avg Loss: 2.0458


Epoch 265/1000: 19it [00:07,  2.44it/s, loss=2.0457]


Epoch 265 Avg Loss: 2.0455


Epoch 266/1000: 19it [00:07,  2.44it/s, loss=2.0496]


Epoch 266 Avg Loss: 2.0448


Epoch 267/1000: 19it [00:07,  2.44it/s, loss=2.0400]


Epoch 267 Avg Loss: 2.0450


Epoch 268/1000: 19it [00:07,  2.44it/s, loss=2.0475]


Epoch 268 Avg Loss: 2.0443


Epoch 269/1000: 19it [00:07,  2.45it/s, loss=2.0458]


Epoch 269 Avg Loss: 2.0426


Epoch 270/1000: 19it [00:07,  2.45it/s, loss=2.0436]


Epoch 270 Avg Loss: 2.0428


Epoch 271/1000: 19it [00:07,  2.45it/s, loss=2.0412]


Epoch 271 Avg Loss: 2.0435


Epoch 272/1000: 19it [00:07,  2.45it/s, loss=2.0404]


Epoch 272 Avg Loss: 2.0434


Epoch 273/1000: 19it [00:07,  2.45it/s, loss=2.0485]


Epoch 273 Avg Loss: 2.0412


Epoch 274/1000: 19it [00:07,  2.45it/s, loss=2.0446]


Epoch 274 Avg Loss: 2.0423


Epoch 275/1000: 19it [00:07,  2.45it/s, loss=2.0465]


Epoch 275 Avg Loss: 2.0430


Epoch 276/1000: 19it [00:07,  2.45it/s, loss=2.0402]


Epoch 276 Avg Loss: 2.0408


Epoch 277/1000: 19it [00:07,  2.45it/s, loss=2.0450]


Epoch 277 Avg Loss: 2.0418


Epoch 278/1000: 19it [00:07,  2.45it/s, loss=2.0389]


Epoch 278 Avg Loss: 2.0420


Epoch 279/1000: 19it [00:07,  2.45it/s, loss=2.0434]


Epoch 279 Avg Loss: 2.0419


Epoch 280/1000: 19it [00:07,  2.44it/s, loss=2.0409]


Epoch 280 Avg Loss: 2.0399


Epoch 281/1000: 19it [00:07,  2.44it/s, loss=2.0359]


Epoch 281 Avg Loss: 2.0397


Epoch 282/1000: 19it [00:07,  2.45it/s, loss=2.0403]


Epoch 282 Avg Loss: 2.0402


Epoch 283/1000: 19it [00:07,  2.44it/s, loss=2.0405]


Epoch 283 Avg Loss: 2.0390


Epoch 284/1000: 19it [00:07,  2.45it/s, loss=2.0328]


Epoch 284 Avg Loss: 2.0383


Epoch 285/1000: 19it [00:07,  2.45it/s, loss=2.0395]


Epoch 285 Avg Loss: 2.0373


Epoch 286/1000: 19it [00:07,  2.45it/s, loss=2.0393]


Epoch 286 Avg Loss: 2.0372


Epoch 287/1000: 19it [00:07,  2.45it/s, loss=2.0369]


Epoch 287 Avg Loss: 2.0376


Epoch 288/1000: 19it [00:07,  2.45it/s, loss=2.0406]


Epoch 288 Avg Loss: 2.0384


Epoch 289/1000: 19it [00:07,  2.45it/s, loss=2.0387]


Epoch 289 Avg Loss: 2.0383


Epoch 290/1000: 19it [00:07,  2.44it/s, loss=2.0337]


Epoch 290 Avg Loss: 2.0369


Epoch 291/1000: 19it [00:07,  2.44it/s, loss=2.0302]


Epoch 291 Avg Loss: 2.0349


Epoch 292/1000: 19it [00:07,  2.44it/s, loss=2.0365]


Epoch 292 Avg Loss: 2.0359


Epoch 293/1000: 19it [00:07,  2.44it/s, loss=2.0374]


Epoch 293 Avg Loss: 2.0361


Epoch 294/1000: 19it [00:07,  2.44it/s, loss=2.0352]


Epoch 294 Avg Loss: 2.0351


Epoch 295/1000: 19it [00:07,  2.44it/s, loss=2.0389]


Epoch 295 Avg Loss: 2.0354


Epoch 296/1000: 19it [00:07,  2.45it/s, loss=2.0342]


Epoch 296 Avg Loss: 2.0364


Epoch 297/1000: 19it [00:07,  2.45it/s, loss=2.0367]


Epoch 297 Avg Loss: 2.0351


Epoch 298/1000: 19it [00:07,  2.45it/s, loss=2.0382]


Epoch 298 Avg Loss: 2.0360


Epoch 299/1000: 19it [00:07,  2.45it/s, loss=2.0310]


Epoch 299 Avg Loss: 2.0340


Epoch 300/1000: 19it [00:07,  2.45it/s, loss=2.0346]


Epoch 300 Avg Loss: 2.0334


Epoch 301/1000: 19it [00:07,  2.45it/s, loss=2.0353]


Epoch 301 Avg Loss: 2.0336


Epoch 302/1000: 19it [00:07,  2.45it/s, loss=2.0314]


Epoch 302 Avg Loss: 2.0323


Epoch 303/1000: 19it [00:07,  2.45it/s, loss=2.0265]


Epoch 303 Avg Loss: 2.0329


Epoch 304/1000: 19it [00:07,  2.45it/s, loss=2.0282]


Epoch 304 Avg Loss: 2.0313


Epoch 305/1000: 19it [00:07,  2.45it/s, loss=2.0370]


Epoch 305 Avg Loss: 2.0324


Epoch 306/1000: 19it [00:07,  2.44it/s, loss=2.0345]


Epoch 306 Avg Loss: 2.0318


Epoch 307/1000: 19it [00:07,  2.44it/s, loss=2.0308]


Epoch 307 Avg Loss: 2.0326


Epoch 308/1000: 19it [00:07,  2.45it/s, loss=2.0329]


Epoch 308 Avg Loss: 2.0308


Epoch 309/1000: 19it [00:07,  2.45it/s, loss=2.0272]


Epoch 309 Avg Loss: 2.0313


Epoch 310/1000: 19it [00:07,  2.45it/s, loss=2.0322]


Epoch 310 Avg Loss: 2.0305


Epoch 311/1000: 19it [00:07,  2.45it/s, loss=2.0282]


Epoch 311 Avg Loss: 2.0308


Epoch 312/1000: 19it [00:07,  2.45it/s, loss=2.0294]


Epoch 312 Avg Loss: 2.0316


Epoch 313/1000: 19it [00:07,  2.45it/s, loss=2.0321]


Epoch 313 Avg Loss: 2.0314


Epoch 314/1000: 19it [00:07,  2.45it/s, loss=2.0338]


Epoch 314 Avg Loss: 2.0315


Epoch 315/1000: 19it [00:07,  2.45it/s, loss=2.0271]


Epoch 315 Avg Loss: 2.0308


Epoch 316/1000: 19it [00:07,  2.45it/s, loss=2.0267]


Epoch 316 Avg Loss: 2.0315


Epoch 317/1000: 19it [00:07,  2.44it/s, loss=2.0342]


Epoch 317 Avg Loss: 2.0305


Epoch 318/1000: 19it [00:07,  2.45it/s, loss=2.0306]


Epoch 318 Avg Loss: 2.0322


Epoch 319/1000: 19it [00:07,  2.44it/s, loss=2.0273]


Epoch 319 Avg Loss: 2.0296


Epoch 320/1000: 19it [00:07,  2.45it/s, loss=2.0307]


Epoch 320 Avg Loss: 2.0302


Epoch 321/1000: 19it [00:07,  2.45it/s, loss=2.0323]


Epoch 321 Avg Loss: 2.0303


Epoch 322/1000: 19it [00:07,  2.45it/s, loss=2.0281]


Epoch 322 Avg Loss: 2.0279


Epoch 323/1000: 19it [00:07,  2.45it/s, loss=2.0245]


Epoch 323 Avg Loss: 2.0284


Epoch 324/1000: 19it [00:07,  2.44it/s, loss=2.0281]


Epoch 324 Avg Loss: 2.0289


Epoch 325/1000: 19it [00:07,  2.45it/s, loss=2.0268]


Epoch 325 Avg Loss: 2.0268


Epoch 326/1000: 19it [00:07,  2.45it/s, loss=2.0230]


Epoch 326 Avg Loss: 2.0275


Epoch 327/1000: 19it [00:07,  2.44it/s, loss=2.0297]


Epoch 327 Avg Loss: 2.0297


Epoch 328/1000: 19it [00:07,  2.45it/s, loss=2.0210]


Epoch 328 Avg Loss: 2.0271


Epoch 329/1000: 19it [00:07,  2.45it/s, loss=2.0260]


Epoch 329 Avg Loss: 2.0283


Epoch 330/1000: 19it [00:07,  2.45it/s, loss=2.0255]


Epoch 330 Avg Loss: 2.0263


Epoch 331/1000: 19it [00:07,  2.45it/s, loss=2.0266]


Epoch 331 Avg Loss: 2.0289


Epoch 332/1000: 19it [00:07,  2.44it/s, loss=2.0301]


Epoch 332 Avg Loss: 2.0271


Epoch 333/1000: 19it [00:07,  2.45it/s, loss=2.0281]


Epoch 333 Avg Loss: 2.0278


Epoch 334/1000: 19it [00:07,  2.45it/s, loss=2.0287]


Epoch 334 Avg Loss: 2.0281


Epoch 335/1000: 19it [00:07,  2.45it/s, loss=2.0293]


Epoch 335 Avg Loss: 2.0271


Epoch 336/1000: 19it [00:07,  2.44it/s, loss=2.0245]


Epoch 336 Avg Loss: 2.0261


Epoch 337/1000: 19it [00:07,  2.45it/s, loss=2.0314]


Epoch 337 Avg Loss: 2.0262


Epoch 338/1000: 19it [00:07,  2.45it/s, loss=2.0219]


Epoch 338 Avg Loss: 2.0264


Epoch 339/1000: 19it [00:07,  2.45it/s, loss=2.0283]


Epoch 339 Avg Loss: 2.0273


Epoch 340/1000: 19it [00:07,  2.45it/s, loss=2.0297]


Epoch 340 Avg Loss: 2.0285


Epoch 341/1000: 19it [00:07,  2.45it/s, loss=2.0204]


Epoch 341 Avg Loss: 2.0269


Epoch 342/1000: 19it [00:07,  2.44it/s, loss=2.0262]


Epoch 342 Avg Loss: 2.0258


Epoch 343/1000: 19it [00:07,  2.45it/s, loss=2.0235]


Epoch 343 Avg Loss: 2.0258


Epoch 344/1000: 19it [00:07,  2.44it/s, loss=2.0270]


Epoch 344 Avg Loss: 2.0259


Epoch 345/1000: 19it [00:07,  2.44it/s, loss=2.0242]


Epoch 345 Avg Loss: 2.0256


Epoch 346/1000: 19it [00:07,  2.45it/s, loss=2.0248]


Epoch 346 Avg Loss: 2.0269


Epoch 347/1000: 19it [00:07,  2.45it/s, loss=2.0302]


Epoch 347 Avg Loss: 2.0249


Epoch 348/1000: 19it [00:07,  2.45it/s, loss=2.0179]


Epoch 348 Avg Loss: 2.0260


Epoch 349/1000: 19it [00:07,  2.45it/s, loss=2.0260]


Epoch 349 Avg Loss: 2.0256


Epoch 350/1000: 19it [00:07,  2.45it/s, loss=2.0256]


Epoch 350 Avg Loss: 2.0260


Epoch 351/1000: 19it [00:07,  2.45it/s, loss=2.0279]


Epoch 351 Avg Loss: 2.0253


Epoch 352/1000: 19it [00:07,  2.44it/s, loss=2.0323]


Epoch 352 Avg Loss: 2.0273


Epoch 353/1000: 19it [00:07,  2.45it/s, loss=2.0248]


Epoch 353 Avg Loss: 2.0254


Epoch 354/1000: 19it [00:07,  2.45it/s, loss=2.0260]


Epoch 354 Avg Loss: 2.0270


Epoch 355/1000: 19it [00:07,  2.45it/s, loss=2.0321]


Epoch 355 Avg Loss: 2.0267


Epoch 356/1000: 19it [00:07,  2.45it/s, loss=2.0252]


Epoch 356 Avg Loss: 2.0250


Epoch 357/1000: 19it [00:07,  2.45it/s, loss=2.0298]


Epoch 357 Avg Loss: 2.0262


Epoch 358/1000: 19it [00:07,  2.45it/s, loss=2.0245]


Epoch 358 Avg Loss: 2.0259


Epoch 359/1000: 19it [00:07,  2.45it/s, loss=2.0214]


Epoch 359 Avg Loss: 2.0249


Epoch 360/1000: 19it [00:07,  2.45it/s, loss=2.0220]


Epoch 360 Avg Loss: 2.0235


Epoch 361/1000: 19it [00:07,  2.45it/s, loss=2.0255]


Epoch 361 Avg Loss: 2.0234


Epoch 362/1000: 19it [00:07,  2.45it/s, loss=2.0208]


Epoch 362 Avg Loss: 2.0209


Epoch 363/1000: 19it [00:07,  2.45it/s, loss=2.0233]


Epoch 363 Avg Loss: 2.0217


Epoch 364/1000: 19it [00:07,  2.45it/s, loss=2.0184]


Epoch 364 Avg Loss: 2.0214


Epoch 365/1000: 19it [00:07,  2.45it/s, loss=2.0230]


Epoch 365 Avg Loss: 2.0228


Epoch 366/1000: 19it [00:07,  2.44it/s, loss=2.0228]


Epoch 366 Avg Loss: 2.0215


Epoch 367/1000: 19it [00:07,  2.44it/s, loss=2.0259]


Epoch 367 Avg Loss: 2.0225


Epoch 368/1000: 19it [00:07,  2.44it/s, loss=2.0207]


Epoch 368 Avg Loss: 2.0223


Epoch 369/1000: 19it [00:07,  2.45it/s, loss=2.0199]


Epoch 369 Avg Loss: 2.0206


Epoch 370/1000: 19it [00:07,  2.45it/s, loss=2.0214]


Epoch 370 Avg Loss: 2.0215


Epoch 371/1000: 19it [00:07,  2.45it/s, loss=2.0218]


Epoch 371 Avg Loss: 2.0215


Epoch 372/1000: 19it [00:07,  2.44it/s, loss=2.0209]


Epoch 372 Avg Loss: 2.0202


Epoch 373/1000: 19it [00:07,  2.44it/s, loss=2.0222]


Epoch 373 Avg Loss: 2.0210


Epoch 374/1000: 19it [00:07,  2.45it/s, loss=2.0215]


Epoch 374 Avg Loss: 2.0216


Epoch 375/1000: 19it [00:07,  2.44it/s, loss=2.0212]


Epoch 375 Avg Loss: 2.0195


Epoch 376/1000: 19it [00:07,  2.45it/s, loss=2.0222]


Epoch 376 Avg Loss: 2.0216


Epoch 377/1000: 19it [00:07,  2.45it/s, loss=2.0190]


Epoch 377 Avg Loss: 2.0209


Epoch 378/1000: 19it [00:07,  2.45it/s, loss=2.0220]


Epoch 378 Avg Loss: 2.0205


Epoch 379/1000: 19it [00:07,  2.44it/s, loss=2.0194]


Epoch 379 Avg Loss: 2.0209


Epoch 380/1000: 19it [00:07,  2.44it/s, loss=2.0201]


Epoch 380 Avg Loss: 2.0191


Epoch 381/1000: 19it [00:07,  2.44it/s, loss=2.0221]


Epoch 381 Avg Loss: 2.0204


Epoch 382/1000: 19it [00:07,  2.44it/s, loss=2.0226]


Epoch 382 Avg Loss: 2.0186


Epoch 383/1000: 19it [00:07,  2.44it/s, loss=2.0139]


Epoch 383 Avg Loss: 2.0190


Epoch 384/1000: 19it [00:07,  2.45it/s, loss=2.0186]


Epoch 384 Avg Loss: 2.0181


Epoch 385/1000: 19it [00:07,  2.45it/s, loss=2.0140]


Epoch 385 Avg Loss: 2.0192


Epoch 386/1000: 19it [00:07,  2.44it/s, loss=2.0223]


Epoch 386 Avg Loss: 2.0186


Epoch 387/1000: 19it [00:07,  2.44it/s, loss=2.0247]


Epoch 387 Avg Loss: 2.0192


Epoch 388/1000: 19it [00:07,  2.44it/s, loss=2.0176]


Epoch 388 Avg Loss: 2.0182


Epoch 389/1000: 19it [00:07,  2.44it/s, loss=2.0203]


Epoch 389 Avg Loss: 2.0207


Epoch 390/1000: 19it [00:07,  2.44it/s, loss=2.0178]


Epoch 390 Avg Loss: 2.0180


Epoch 391/1000: 19it [00:07,  2.44it/s, loss=2.0192]


Epoch 391 Avg Loss: 2.0176


Epoch 392/1000: 19it [00:07,  2.44it/s, loss=2.0204]


Epoch 392 Avg Loss: 2.0185


Epoch 393/1000: 19it [00:07,  2.45it/s, loss=2.0166]


Epoch 393 Avg Loss: 2.0182


Epoch 394/1000: 19it [00:07,  2.45it/s, loss=2.0166]


Epoch 394 Avg Loss: 2.0170


Epoch 395/1000: 19it [00:07,  2.45it/s, loss=2.0199]


Epoch 395 Avg Loss: 2.0171


Epoch 396/1000: 19it [00:07,  2.45it/s, loss=2.0208]


Epoch 396 Avg Loss: 2.0170


Epoch 397/1000: 19it [00:07,  2.45it/s, loss=2.0188]


Epoch 397 Avg Loss: 2.0170


Epoch 398/1000: 19it [00:07,  2.45it/s, loss=2.0183]


Epoch 398 Avg Loss: 2.0169


Epoch 399/1000: 19it [00:07,  2.45it/s, loss=2.0150]


Epoch 399 Avg Loss: 2.0173


Epoch 400/1000: 19it [00:07,  2.45it/s, loss=2.0118]


Epoch 400 Avg Loss: 2.0160


Epoch 401/1000: 19it [00:07,  2.45it/s, loss=2.0183]


Epoch 401 Avg Loss: 2.0167


Epoch 402/1000: 19it [00:07,  2.45it/s, loss=2.0129]


Epoch 402 Avg Loss: 2.0163


Epoch 403/1000: 19it [00:07,  2.45it/s, loss=2.0092]


Epoch 403 Avg Loss: 2.0155


Epoch 404/1000: 19it [00:07,  2.44it/s, loss=2.0211]


Epoch 404 Avg Loss: 2.0159


Epoch 405/1000: 19it [00:07,  2.44it/s, loss=2.0203]


Epoch 405 Avg Loss: 2.0167


Epoch 406/1000: 19it [00:07,  2.45it/s, loss=2.0196]


Epoch 406 Avg Loss: 2.0135


Epoch 407/1000: 19it [00:07,  2.45it/s, loss=2.0153]


Epoch 407 Avg Loss: 2.0132


Epoch 408/1000: 19it [00:07,  2.45it/s, loss=2.0144]


Epoch 408 Avg Loss: 2.0139


Epoch 409/1000: 19it [00:07,  2.45it/s, loss=2.0161]


Epoch 409 Avg Loss: 2.0139


Epoch 410/1000: 19it [00:07,  2.45it/s, loss=2.0123]


Epoch 410 Avg Loss: 2.0116


Epoch 411/1000: 19it [00:07,  2.45it/s, loss=2.0130]


Epoch 411 Avg Loss: 2.0140


Epoch 412/1000: 19it [00:07,  2.45it/s, loss=2.0124]


Epoch 412 Avg Loss: 2.0127


Epoch 413/1000: 19it [00:07,  2.44it/s, loss=2.0077]


Epoch 413 Avg Loss: 2.0129


Epoch 414/1000: 19it [00:07,  2.44it/s, loss=2.0141]


Epoch 414 Avg Loss: 2.0108


Epoch 415/1000: 19it [00:07,  2.44it/s, loss=2.0164]


Epoch 415 Avg Loss: 2.0111


Epoch 416/1000: 19it [00:07,  2.44it/s, loss=2.0199]


Epoch 416 Avg Loss: 2.0117


Epoch 417/1000: 19it [00:07,  2.45it/s, loss=2.0106]


Epoch 417 Avg Loss: 2.0110


Epoch 418/1000: 19it [00:07,  2.45it/s, loss=2.0096]


Epoch 418 Avg Loss: 2.0099


Epoch 419/1000: 19it [00:07,  2.45it/s, loss=2.0118]


Epoch 419 Avg Loss: 2.0090


Epoch 420/1000: 19it [00:07,  2.45it/s, loss=2.0090]


Epoch 420 Avg Loss: 2.0099


Epoch 421/1000: 19it [00:07,  2.45it/s, loss=2.0058]


Epoch 421 Avg Loss: 2.0084


Epoch 422/1000: 19it [00:07,  2.45it/s, loss=2.0066]


Epoch 422 Avg Loss: 2.0087


Epoch 423/1000: 19it [00:07,  2.44it/s, loss=2.0091]


Epoch 423 Avg Loss: 2.0082


Epoch 424/1000: 19it [00:07,  2.45it/s, loss=2.0040]


Epoch 424 Avg Loss: 2.0065


Epoch 425/1000: 19it [00:07,  2.44it/s, loss=2.0085]


Epoch 425 Avg Loss: 2.0070


Epoch 426/1000: 19it [00:07,  2.44it/s, loss=2.0135]


Epoch 426 Avg Loss: 2.0070


Epoch 427/1000: 19it [00:07,  2.45it/s, loss=2.0029]


Epoch 427 Avg Loss: 2.0056


Epoch 428/1000: 19it [00:07,  2.44it/s, loss=2.0050]


Epoch 428 Avg Loss: 2.0069


Epoch 429/1000: 19it [00:07,  2.44it/s, loss=2.0112]


Epoch 429 Avg Loss: 2.0051


Epoch 430/1000: 19it [00:07,  2.44it/s, loss=2.0043]


Epoch 430 Avg Loss: 2.0054


Epoch 431/1000: 19it [00:07,  2.44it/s, loss=2.0011]


Epoch 431 Avg Loss: 2.0041


Epoch 432/1000: 19it [00:07,  2.44it/s, loss=2.0060]


Epoch 432 Avg Loss: 2.0038


Epoch 433/1000: 19it [00:07,  2.44it/s, loss=2.0061]


Epoch 433 Avg Loss: 2.0041


Epoch 434/1000: 19it [00:07,  2.44it/s, loss=2.0042]


Epoch 434 Avg Loss: 2.0029


Epoch 435/1000: 19it [00:07,  2.44it/s, loss=2.0010]


Epoch 435 Avg Loss: 2.0027


Epoch 436/1000: 19it [00:07,  2.45it/s, loss=2.0014]


Epoch 436 Avg Loss: 2.0021


Epoch 437/1000: 19it [00:07,  2.44it/s, loss=2.0018]


Epoch 437 Avg Loss: 2.0026


Epoch 438/1000: 19it [00:07,  2.44it/s, loss=1.9992]


Epoch 438 Avg Loss: 2.0009


Epoch 439/1000: 19it [00:07,  2.44it/s, loss=1.9985]


Epoch 439 Avg Loss: 1.9997


Epoch 440/1000: 19it [00:07,  2.44it/s, loss=1.9974]


Epoch 440 Avg Loss: 2.0001


Epoch 441/1000: 19it [00:07,  2.44it/s, loss=1.9933]


Epoch 441 Avg Loss: 1.9980


Epoch 442/1000: 19it [00:07,  2.44it/s, loss=1.9987]


Epoch 442 Avg Loss: 1.9992


Epoch 443/1000: 19it [00:07,  2.44it/s, loss=1.9944]


Epoch 443 Avg Loss: 1.9986


Epoch 444/1000: 19it [00:07,  2.44it/s, loss=1.9937]


Epoch 444 Avg Loss: 1.9982


Epoch 445/1000: 19it [00:07,  2.44it/s, loss=1.9974]


Epoch 445 Avg Loss: 1.9974


Epoch 446/1000: 19it [00:07,  2.44it/s, loss=1.9961]


Epoch 446 Avg Loss: 1.9971


Epoch 447/1000: 19it [00:07,  2.45it/s, loss=1.9908]


Epoch 447 Avg Loss: 1.9948


Epoch 448/1000: 19it [00:07,  2.44it/s, loss=1.9895]


Epoch 448 Avg Loss: 1.9949


Epoch 449/1000: 19it [00:07,  2.44it/s, loss=1.9909]


Epoch 449 Avg Loss: 1.9943


Epoch 450/1000: 19it [00:07,  2.44it/s, loss=1.9991]


Epoch 450 Avg Loss: 1.9939


Epoch 451/1000: 19it [00:07,  2.45it/s, loss=1.9977]


Epoch 451 Avg Loss: 1.9936


Epoch 452/1000: 19it [00:07,  2.44it/s, loss=1.9938]


Epoch 452 Avg Loss: 1.9929


Epoch 453/1000: 19it [00:07,  2.45it/s, loss=1.9964]


Epoch 453 Avg Loss: 1.9928


Epoch 454/1000: 19it [00:07,  2.44it/s, loss=1.9901]


Epoch 454 Avg Loss: 1.9918


Epoch 455/1000: 19it [00:07,  2.44it/s, loss=1.9952]


Epoch 455 Avg Loss: 1.9921


Epoch 456/1000: 19it [00:07,  2.44it/s, loss=1.9862]


Epoch 456 Avg Loss: 1.9908


Epoch 457/1000: 19it [00:07,  2.45it/s, loss=1.9914]


Epoch 457 Avg Loss: 1.9914


Epoch 458/1000: 19it [00:07,  2.45it/s, loss=1.9896]


Epoch 458 Avg Loss: 1.9901


Epoch 459/1000: 19it [00:07,  2.45it/s, loss=1.9882]


Epoch 459 Avg Loss: 1.9889


Epoch 460/1000: 19it [00:07,  2.44it/s, loss=1.9888]


Epoch 460 Avg Loss: 1.9878


Epoch 461/1000: 19it [00:07,  2.45it/s, loss=1.9850]


Epoch 461 Avg Loss: 1.9876


Epoch 462/1000: 19it [00:07,  2.44it/s, loss=1.9875]


Epoch 462 Avg Loss: 1.9866


Epoch 463/1000: 19it [00:07,  2.44it/s, loss=1.9833]


Epoch 463 Avg Loss: 1.9861


Epoch 464/1000: 19it [00:07,  2.44it/s, loss=1.9831]


Epoch 464 Avg Loss: 1.9857


Epoch 465/1000: 19it [00:07,  2.44it/s, loss=1.9793]


Epoch 465 Avg Loss: 1.9863


Epoch 466/1000: 19it [00:07,  2.45it/s, loss=1.9867]


Epoch 466 Avg Loss: 1.9864


Epoch 467/1000: 19it [00:07,  2.45it/s, loss=1.9839]


Epoch 467 Avg Loss: 1.9856


Epoch 468/1000: 19it [00:07,  2.45it/s, loss=1.9886]


Epoch 468 Avg Loss: 1.9844


Epoch 469/1000: 19it [00:07,  2.45it/s, loss=1.9865]


Epoch 469 Avg Loss: 1.9859


Epoch 470/1000: 19it [00:07,  2.45it/s, loss=1.9851]


Epoch 470 Avg Loss: 1.9838


Epoch 471/1000: 19it [00:07,  2.45it/s, loss=1.9798]


Epoch 471 Avg Loss: 1.9835


Epoch 472/1000: 19it [00:07,  2.45it/s, loss=1.9805]


Epoch 472 Avg Loss: 1.9821


Epoch 473/1000: 19it [00:07,  2.45it/s, loss=1.9856]


Epoch 473 Avg Loss: 1.9818


Epoch 474/1000: 19it [00:07,  2.45it/s, loss=1.9813]


Epoch 474 Avg Loss: 1.9825


Epoch 475/1000: 19it [00:07,  2.45it/s, loss=1.9792]


Epoch 475 Avg Loss: 1.9825


Epoch 476/1000: 19it [00:07,  2.45it/s, loss=1.9783]


Epoch 476 Avg Loss: 1.9810


Epoch 477/1000: 19it [00:07,  2.45it/s, loss=1.9759]


Epoch 477 Avg Loss: 1.9812


Epoch 478/1000: 19it [00:07,  2.45it/s, loss=1.9807]


Epoch 478 Avg Loss: 1.9794


Epoch 479/1000: 19it [00:07,  2.45it/s, loss=1.9801]


Epoch 479 Avg Loss: 1.9785


Epoch 480/1000: 19it [00:07,  2.46it/s, loss=1.9815]


Epoch 480 Avg Loss: 1.9773


Epoch 481/1000: 19it [00:07,  2.45it/s, loss=1.9765]


Epoch 481 Avg Loss: 1.9793


Epoch 482/1000: 19it [00:07,  2.45it/s, loss=1.9766]


Epoch 482 Avg Loss: 1.9776


Epoch 483/1000: 19it [00:07,  2.44it/s, loss=1.9827]


Epoch 483 Avg Loss: 1.9776


Epoch 484/1000: 19it [00:07,  2.44it/s, loss=1.9750]


Epoch 484 Avg Loss: 1.9767


Epoch 485/1000: 19it [00:07,  2.44it/s, loss=1.9740]


Epoch 485 Avg Loss: 1.9761


Epoch 486/1000: 19it [00:07,  2.45it/s, loss=1.9731]


Epoch 486 Avg Loss: 1.9757


Epoch 487/1000: 19it [00:07,  2.44it/s, loss=1.9764]


Epoch 487 Avg Loss: 1.9761


Epoch 488/1000: 19it [00:07,  2.44it/s, loss=1.9718]


Epoch 488 Avg Loss: 1.9746


Epoch 489/1000: 19it [00:07,  2.45it/s, loss=1.9790]


Epoch 489 Avg Loss: 1.9739


Epoch 490/1000: 19it [00:07,  2.45it/s, loss=1.9699]


Epoch 490 Avg Loss: 1.9741


Epoch 491/1000: 19it [00:07,  2.45it/s, loss=1.9734]


Epoch 491 Avg Loss: 1.9728


Epoch 492/1000: 19it [00:07,  2.45it/s, loss=1.9753]


Epoch 492 Avg Loss: 1.9714


Epoch 493/1000: 19it [00:07,  2.44it/s, loss=1.9715]


Epoch 493 Avg Loss: 1.9733


Epoch 494/1000: 19it [00:07,  2.44it/s, loss=1.9632]


Epoch 494 Avg Loss: 1.9707


Epoch 495/1000: 19it [00:07,  2.45it/s, loss=1.9663]


Epoch 495 Avg Loss: 1.9715


Epoch 496/1000: 19it [00:07,  2.44it/s, loss=1.9780]


Epoch 496 Avg Loss: 1.9731


Epoch 497/1000: 19it [00:07,  2.44it/s, loss=1.9673]


Epoch 497 Avg Loss: 1.9710


Epoch 498/1000: 19it [00:07,  2.44it/s, loss=1.9717]


Epoch 498 Avg Loss: 1.9703


Epoch 499/1000: 19it [00:07,  2.44it/s, loss=1.9674]


Epoch 499 Avg Loss: 1.9683


Epoch 500/1000: 19it [00:07,  2.45it/s, loss=1.9688]


Epoch 500 Avg Loss: 1.9687


Epoch 501/1000: 19it [00:07,  2.44it/s, loss=1.9684]


Epoch 501 Avg Loss: 1.9691


Epoch 502/1000: 19it [00:07,  2.45it/s, loss=1.9722]


Epoch 502 Avg Loss: 1.9698


Epoch 503/1000: 19it [00:07,  2.44it/s, loss=1.9589]


Epoch 503 Avg Loss: 1.9650


Epoch 504/1000: 19it [00:07,  2.45it/s, loss=1.9683]


Epoch 504 Avg Loss: 1.9668


Epoch 505/1000: 19it [00:07,  2.45it/s, loss=1.9671]


Epoch 505 Avg Loss: 1.9668


Epoch 506/1000: 19it [00:07,  2.45it/s, loss=1.9631]


Epoch 506 Avg Loss: 1.9664


Epoch 507/1000: 19it [00:07,  2.44it/s, loss=1.9714]


Epoch 507 Avg Loss: 1.9651


Epoch 508/1000: 19it [00:07,  2.44it/s, loss=1.9664]


Epoch 508 Avg Loss: 1.9653


Epoch 509/1000: 19it [00:07,  2.44it/s, loss=1.9646]


Epoch 509 Avg Loss: 1.9649


Epoch 510/1000: 19it [00:07,  2.44it/s, loss=1.9664]


Epoch 510 Avg Loss: 1.9641


Epoch 511/1000: 19it [00:07,  2.44it/s, loss=1.9623]


Epoch 511 Avg Loss: 1.9646


Epoch 512/1000: 19it [00:07,  2.45it/s, loss=1.9628]


Epoch 512 Avg Loss: 1.9628


Epoch 513/1000: 19it [00:07,  2.44it/s, loss=1.9667]


Epoch 513 Avg Loss: 1.9620


Epoch 514/1000: 19it [00:07,  2.44it/s, loss=1.9639]


Epoch 514 Avg Loss: 1.9619


Epoch 515/1000: 19it [00:07,  2.44it/s, loss=1.9656]


Epoch 515 Avg Loss: 1.9613


Epoch 516/1000: 19it [00:07,  2.44it/s, loss=1.9561]


Epoch 516 Avg Loss: 1.9602


Epoch 517/1000: 19it [00:07,  2.44it/s, loss=1.9659]


Epoch 517 Avg Loss: 1.9605


Epoch 518/1000: 19it [00:07,  2.44it/s, loss=1.9593]


Epoch 518 Avg Loss: 1.9617


Epoch 519/1000: 19it [00:07,  2.44it/s, loss=1.9592]


Epoch 519 Avg Loss: 1.9600


Epoch 520/1000: 19it [00:07,  2.44it/s, loss=1.9709]


Epoch 520 Avg Loss: 1.9604


Epoch 521/1000: 19it [00:07,  2.44it/s, loss=1.9625]


Epoch 521 Avg Loss: 1.9595


Epoch 522/1000: 19it [00:07,  2.44it/s, loss=1.9618]


Epoch 522 Avg Loss: 1.9583


Epoch 523/1000: 19it [00:07,  2.44it/s, loss=1.9618]


Epoch 523 Avg Loss: 1.9575


Epoch 524/1000: 19it [00:07,  2.45it/s, loss=1.9587]


Epoch 524 Avg Loss: 1.9577


Epoch 525/1000: 19it [00:07,  2.45it/s, loss=1.9588]


Epoch 525 Avg Loss: 1.9567


Epoch 526/1000: 19it [00:07,  2.45it/s, loss=1.9562]


Epoch 526 Avg Loss: 1.9558


Epoch 527/1000: 19it [00:07,  2.45it/s, loss=1.9550]


Epoch 527 Avg Loss: 1.9555


Epoch 528/1000: 19it [00:07,  2.45it/s, loss=1.9541]


Epoch 528 Avg Loss: 1.9568


Epoch 529/1000: 19it [00:07,  2.44it/s, loss=1.9548]


Epoch 529 Avg Loss: 1.9544


Epoch 530/1000: 19it [00:07,  2.45it/s, loss=1.9503]


Epoch 530 Avg Loss: 1.9547


Epoch 531/1000: 19it [00:07,  2.45it/s, loss=1.9550]


Epoch 531 Avg Loss: 1.9546


Epoch 532/1000: 19it [00:07,  2.45it/s, loss=1.9533]


Epoch 532 Avg Loss: 1.9533


Epoch 533/1000: 19it [00:07,  2.45it/s, loss=1.9557]


Epoch 533 Avg Loss: 1.9535


Epoch 534/1000: 19it [00:07,  2.45it/s, loss=1.9507]


Epoch 534 Avg Loss: 1.9523


Epoch 535/1000: 19it [00:07,  2.44it/s, loss=1.9500]


Epoch 535 Avg Loss: 1.9523


Epoch 536/1000: 19it [00:07,  2.44it/s, loss=1.9503]


Epoch 536 Avg Loss: 1.9524


Epoch 537/1000: 19it [00:07,  2.44it/s, loss=1.9555]


Epoch 537 Avg Loss: 1.9514


Epoch 538/1000: 19it [00:07,  2.44it/s, loss=1.9522]


Epoch 538 Avg Loss: 1.9510


Epoch 539/1000: 19it [00:07,  2.44it/s, loss=1.9492]


Epoch 539 Avg Loss: 1.9499


Epoch 540/1000: 19it [00:07,  2.45it/s, loss=1.9540]


Epoch 540 Avg Loss: 1.9509


Epoch 541/1000: 19it [00:07,  2.44it/s, loss=1.9468]


Epoch 541 Avg Loss: 1.9487


Epoch 542/1000: 19it [00:07,  2.44it/s, loss=1.9451]


Epoch 542 Avg Loss: 1.9478


Epoch 543/1000: 19it [00:07,  2.44it/s, loss=1.9529]


Epoch 543 Avg Loss: 1.9479


Epoch 544/1000: 19it [00:07,  2.44it/s, loss=1.9426]


Epoch 544 Avg Loss: 1.9469


Epoch 545/1000: 19it [00:07,  2.44it/s, loss=1.9423]


Epoch 545 Avg Loss: 1.9471


Epoch 546/1000: 19it [00:07,  2.44it/s, loss=1.9495]


Epoch 546 Avg Loss: 1.9481


Epoch 547/1000: 19it [00:07,  2.44it/s, loss=1.9483]


Epoch 547 Avg Loss: 1.9447


Epoch 548/1000: 19it [00:07,  2.45it/s, loss=1.9462]


Epoch 548 Avg Loss: 1.9464


Epoch 549/1000: 19it [00:07,  2.45it/s, loss=1.9446]


Epoch 549 Avg Loss: 1.9439


Epoch 550/1000: 19it [00:07,  2.44it/s, loss=1.9447]


Epoch 550 Avg Loss: 1.9437


Epoch 551/1000: 19it [00:07,  2.44it/s, loss=1.9439]


Epoch 551 Avg Loss: 1.9432


Epoch 552/1000: 19it [00:07,  2.45it/s, loss=1.9451]


Epoch 552 Avg Loss: 1.9426


Epoch 553/1000: 19it [00:07,  2.45it/s, loss=1.9390]


Epoch 553 Avg Loss: 1.9432


Epoch 554/1000: 19it [00:07,  2.45it/s, loss=1.9458]


Epoch 554 Avg Loss: 1.9435


Epoch 555/1000: 19it [00:07,  2.45it/s, loss=1.9412]


Epoch 555 Avg Loss: 1.9424


Epoch 556/1000: 19it [00:07,  2.45it/s, loss=1.9355]


Epoch 556 Avg Loss: 1.9418


Epoch 557/1000: 19it [00:07,  2.45it/s, loss=1.9359]


Epoch 557 Avg Loss: 1.9399


Epoch 558/1000: 19it [00:07,  2.45it/s, loss=1.9419]


Epoch 558 Avg Loss: 1.9401


Epoch 559/1000: 19it [00:07,  2.44it/s, loss=1.9380]


Epoch 559 Avg Loss: 1.9403


Epoch 560/1000: 19it [00:07,  2.45it/s, loss=1.9410]


Epoch 560 Avg Loss: 1.9411


Epoch 561/1000: 19it [00:07,  2.45it/s, loss=1.9411]


Epoch 561 Avg Loss: 1.9394


Epoch 562/1000: 19it [00:07,  2.45it/s, loss=1.9359]


Epoch 562 Avg Loss: 1.9385


Epoch 563/1000: 19it [00:07,  2.45it/s, loss=1.9389]


Epoch 563 Avg Loss: 1.9382


Epoch 564/1000: 19it [00:07,  2.45it/s, loss=1.9356]


Epoch 564 Avg Loss: 1.9372


Epoch 565/1000: 19it [00:07,  2.45it/s, loss=1.9366]


Epoch 565 Avg Loss: 1.9373


Epoch 566/1000: 19it [00:07,  2.45it/s, loss=1.9359]


Epoch 566 Avg Loss: 1.9381


Epoch 567/1000: 19it [00:07,  2.45it/s, loss=1.9383]


Epoch 567 Avg Loss: 1.9369


Epoch 568/1000: 19it [00:07,  2.45it/s, loss=1.9404]


Epoch 568 Avg Loss: 1.9359


Epoch 569/1000: 19it [00:07,  2.44it/s, loss=1.9380]


Epoch 569 Avg Loss: 1.9352


Epoch 570/1000: 19it [00:07,  2.45it/s, loss=1.9397]


Epoch 570 Avg Loss: 1.9365


Epoch 571/1000: 19it [00:07,  2.45it/s, loss=1.9394]


Epoch 571 Avg Loss: 1.9357


Epoch 572/1000: 19it [00:07,  2.44it/s, loss=1.9325]


Epoch 572 Avg Loss: 1.9349


Epoch 573/1000: 19it [00:07,  2.45it/s, loss=1.9369]


Epoch 573 Avg Loss: 1.9348


Epoch 574/1000: 19it [00:07,  2.44it/s, loss=1.9286]


Epoch 574 Avg Loss: 1.9332


Epoch 575/1000: 19it [00:07,  2.44it/s, loss=1.9330]


Epoch 575 Avg Loss: 1.9319


Epoch 576/1000: 19it [00:07,  2.45it/s, loss=1.9362]


Epoch 576 Avg Loss: 1.9315


Epoch 577/1000: 19it [00:07,  2.45it/s, loss=1.9354]


Epoch 577 Avg Loss: 1.9326


Epoch 578/1000: 19it [00:07,  2.45it/s, loss=1.9343]


Epoch 578 Avg Loss: 1.9332


Epoch 579/1000: 19it [00:07,  2.44it/s, loss=1.9299]


Epoch 579 Avg Loss: 1.9323


Epoch 580/1000: 19it [00:07,  2.45it/s, loss=1.9354]


Epoch 580 Avg Loss: 1.9304


Epoch 581/1000: 19it [00:07,  2.44it/s, loss=1.9288]


Epoch 581 Avg Loss: 1.9303


Epoch 582/1000: 19it [00:07,  2.44it/s, loss=1.9252]


Epoch 582 Avg Loss: 1.9294


Epoch 583/1000: 19it [00:07,  2.44it/s, loss=1.9328]


Epoch 583 Avg Loss: 1.9289


Epoch 584/1000: 19it [00:07,  2.45it/s, loss=1.9254]


Epoch 584 Avg Loss: 1.9283


Epoch 585/1000: 19it [00:07,  2.45it/s, loss=1.9258]


Epoch 585 Avg Loss: 1.9270


Epoch 586/1000: 19it [00:07,  2.45it/s, loss=1.9258]


Epoch 586 Avg Loss: 1.9262


Epoch 587/1000: 19it [00:07,  2.44it/s, loss=1.9311]


Epoch 587 Avg Loss: 1.9267


Epoch 588/1000: 19it [00:07,  2.45it/s, loss=1.9292]


Epoch 588 Avg Loss: 1.9258


Epoch 589/1000: 19it [00:07,  2.45it/s, loss=1.9288]


Epoch 589 Avg Loss: 1.9265


Epoch 590/1000: 19it [00:07,  2.44it/s, loss=1.9225]


Epoch 590 Avg Loss: 1.9252


Epoch 591/1000: 19it [00:07,  2.45it/s, loss=1.9266]


Epoch 591 Avg Loss: 1.9228


Epoch 592/1000: 19it [00:07,  2.45it/s, loss=1.9245]


Epoch 592 Avg Loss: 1.9249


Epoch 593/1000: 19it [00:07,  2.44it/s, loss=1.9190]


Epoch 593 Avg Loss: 1.9237


Epoch 594/1000: 19it [00:07,  2.45it/s, loss=1.9258]


Epoch 594 Avg Loss: 1.9220


Epoch 595/1000: 19it [00:07,  2.45it/s, loss=1.9219]


Epoch 595 Avg Loss: 1.9217


Epoch 596/1000: 19it [00:07,  2.45it/s, loss=1.9210]


Epoch 596 Avg Loss: 1.9194


Epoch 597/1000: 19it [00:07,  2.44it/s, loss=1.9198]


Epoch 597 Avg Loss: 1.9195


Epoch 598/1000: 19it [00:07,  2.44it/s, loss=1.9166]


Epoch 598 Avg Loss: 1.9186


Epoch 599/1000: 19it [00:07,  2.44it/s, loss=1.9164]


Epoch 599 Avg Loss: 1.9182


Epoch 600/1000: 19it [00:07,  2.44it/s, loss=1.9153]


Epoch 600 Avg Loss: 1.9166


Epoch 601/1000: 19it [00:07,  2.44it/s, loss=1.9178]


Epoch 601 Avg Loss: 1.9173


Epoch 602/1000: 19it [00:07,  2.44it/s, loss=1.9181]


Epoch 602 Avg Loss: 1.9153


Epoch 603/1000: 19it [00:07,  2.44it/s, loss=1.9110]


Epoch 603 Avg Loss: 1.9145


Epoch 604/1000: 19it [00:07,  2.45it/s, loss=1.9152]


Epoch 604 Avg Loss: 1.9127


Epoch 605/1000: 19it [00:07,  2.45it/s, loss=1.9089]


Epoch 605 Avg Loss: 1.9119


Epoch 606/1000: 19it [00:07,  2.44it/s, loss=1.9103]


Epoch 606 Avg Loss: 1.9125


Epoch 607/1000: 19it [00:07,  2.44it/s, loss=1.9140]


Epoch 607 Avg Loss: 1.9103


Epoch 608/1000: 19it [00:07,  2.44it/s, loss=1.9114]


Epoch 608 Avg Loss: 1.9103


Epoch 609/1000: 19it [00:07,  2.45it/s, loss=1.9155]


Epoch 609 Avg Loss: 1.9110


Epoch 610/1000: 19it [00:07,  2.44it/s, loss=1.9124]


Epoch 610 Avg Loss: 1.9109


Epoch 611/1000: 19it [00:07,  2.44it/s, loss=1.9094]


Epoch 611 Avg Loss: 1.9121


Epoch 612/1000: 19it [00:07,  2.44it/s, loss=1.9137]


Epoch 612 Avg Loss: 1.9092


Epoch 613/1000: 19it [00:07,  2.45it/s, loss=1.9122]


Epoch 613 Avg Loss: 1.9083


Epoch 614/1000: 19it [00:07,  2.44it/s, loss=1.9127]


Epoch 614 Avg Loss: 1.9065


Epoch 615/1000: 19it [00:07,  2.44it/s, loss=1.9012]


Epoch 615 Avg Loss: 1.9070


Epoch 616/1000: 19it [00:07,  2.44it/s, loss=1.9089]


Epoch 616 Avg Loss: 1.9064


Epoch 617/1000: 19it [00:07,  2.45it/s, loss=1.9054]


Epoch 617 Avg Loss: 1.9059


Epoch 618/1000: 19it [00:07,  2.44it/s, loss=1.9085]


Epoch 618 Avg Loss: 1.9044


Epoch 619/1000: 19it [00:07,  2.45it/s, loss=1.9098]


Epoch 619 Avg Loss: 1.9049


Epoch 620/1000: 19it [00:07,  2.45it/s, loss=1.9054]


Epoch 620 Avg Loss: 1.9043


Epoch 621/1000: 19it [00:07,  2.45it/s, loss=1.9028]


Epoch 621 Avg Loss: 1.9037


Epoch 622/1000: 19it [00:07,  2.44it/s, loss=1.9056]


Epoch 622 Avg Loss: 1.9027


Epoch 623/1000: 19it [00:07,  2.44it/s, loss=1.9011]


Epoch 623 Avg Loss: 1.9019


Epoch 624/1000: 19it [00:07,  2.45it/s, loss=1.8956]


Epoch 624 Avg Loss: 1.9002


Epoch 625/1000: 19it [00:07,  2.44it/s, loss=1.8999]


Epoch 625 Avg Loss: 1.8992


Epoch 626/1000: 19it [00:07,  2.45it/s, loss=1.8940]


Epoch 626 Avg Loss: 1.8981


Epoch 627/1000: 19it [00:07,  2.45it/s, loss=1.8970]


Epoch 627 Avg Loss: 1.8990


Epoch 628/1000: 19it [00:07,  2.45it/s, loss=1.8965]


Epoch 628 Avg Loss: 1.8996


Epoch 629/1000: 19it [00:07,  2.45it/s, loss=1.9033]


Epoch 629 Avg Loss: 1.8986


Epoch 630/1000: 19it [00:07,  2.45it/s, loss=1.8950]


Epoch 630 Avg Loss: 1.8977


Epoch 631/1000: 19it [00:07,  2.45it/s, loss=1.8943]


Epoch 631 Avg Loss: 1.8974


Epoch 632/1000: 19it [00:07,  2.45it/s, loss=1.8993]


Epoch 632 Avg Loss: 1.8961


Epoch 633/1000: 19it [00:07,  2.45it/s, loss=1.8931]


Epoch 633 Avg Loss: 1.8957


Epoch 634/1000: 19it [00:07,  2.45it/s, loss=1.8931]


Epoch 634 Avg Loss: 1.8956


Epoch 635/1000: 19it [00:07,  2.44it/s, loss=1.8977]


Epoch 635 Avg Loss: 1.8945


Epoch 636/1000: 19it [00:07,  2.45it/s, loss=1.8967]


Epoch 636 Avg Loss: 1.8931


Epoch 637/1000: 19it [00:07,  2.45it/s, loss=1.8974]


Epoch 637 Avg Loss: 1.8944


Epoch 638/1000: 19it [00:07,  2.45it/s, loss=1.8893]


Epoch 638 Avg Loss: 1.8927


Epoch 639/1000: 19it [00:07,  2.45it/s, loss=1.8949]


Epoch 639 Avg Loss: 1.8931


Epoch 640/1000: 19it [00:07,  2.45it/s, loss=1.8945]


Epoch 640 Avg Loss: 1.8926


Epoch 641/1000: 19it [00:07,  2.45it/s, loss=1.8850]


Epoch 641 Avg Loss: 1.8915


Epoch 642/1000: 19it [00:07,  2.45it/s, loss=1.8925]


Epoch 642 Avg Loss: 1.8921


Epoch 643/1000: 19it [00:07,  2.45it/s, loss=1.8899]


Epoch 643 Avg Loss: 1.8914


Epoch 644/1000: 19it [00:07,  2.45it/s, loss=1.8892]


Epoch 644 Avg Loss: 1.8907


Epoch 645/1000: 19it [00:07,  2.45it/s, loss=1.8964]


Epoch 645 Avg Loss: 1.8913


Epoch 646/1000: 19it [00:07,  2.45it/s, loss=1.8903]


Epoch 646 Avg Loss: 1.8889


Epoch 647/1000: 19it [00:07,  2.45it/s, loss=1.8933]


Epoch 647 Avg Loss: 1.8894


Epoch 648/1000: 19it [00:07,  2.45it/s, loss=1.8868]


Epoch 648 Avg Loss: 1.8875


Epoch 649/1000: 19it [00:07,  2.45it/s, loss=1.8883]


Epoch 649 Avg Loss: 1.8869


Epoch 650/1000: 19it [00:07,  2.45it/s, loss=1.8881]


Epoch 650 Avg Loss: 1.8862


Epoch 651/1000: 19it [00:07,  2.45it/s, loss=1.8873]


Epoch 651 Avg Loss: 1.8868


Epoch 652/1000: 19it [00:07,  2.44it/s, loss=1.8845]


Epoch 652 Avg Loss: 1.8858


Epoch 653/1000: 19it [00:07,  2.44it/s, loss=1.8885]


Epoch 653 Avg Loss: 1.8855


Epoch 654/1000: 19it [00:07,  2.44it/s, loss=1.8879]


Epoch 654 Avg Loss: 1.8849


Epoch 655/1000: 19it [00:07,  2.45it/s, loss=1.8834]


Epoch 655 Avg Loss: 1.8839


Epoch 656/1000: 19it [00:07,  2.45it/s, loss=1.8804]


Epoch 656 Avg Loss: 1.8842


Epoch 657/1000: 19it [00:07,  2.44it/s, loss=1.8816]


Epoch 657 Avg Loss: 1.8827


Epoch 658/1000: 19it [00:07,  2.45it/s, loss=1.8844]


Epoch 658 Avg Loss: 1.8830


Epoch 659/1000: 19it [00:07,  2.45it/s, loss=1.8802]


Epoch 659 Avg Loss: 1.8818


Epoch 660/1000: 19it [00:07,  2.45it/s, loss=1.8788]


Epoch 660 Avg Loss: 1.8817


Epoch 661/1000: 19it [00:07,  2.45it/s, loss=1.8808]


Epoch 661 Avg Loss: 1.8801


Epoch 662/1000: 19it [00:07,  2.45it/s, loss=1.8785]


Epoch 662 Avg Loss: 1.8802


Epoch 663/1000: 19it [00:07,  2.45it/s, loss=1.8801]


Epoch 663 Avg Loss: 1.8804


Epoch 664/1000: 19it [00:07,  2.45it/s, loss=1.8805]


Epoch 664 Avg Loss: 1.8794


Epoch 665/1000: 19it [00:07,  2.45it/s, loss=1.8801]


Epoch 665 Avg Loss: 1.8787


Epoch 666/1000: 19it [00:07,  2.45it/s, loss=1.8765]


Epoch 666 Avg Loss: 1.8773


Epoch 667/1000: 19it [00:07,  2.45it/s, loss=1.8737]


Epoch 667 Avg Loss: 1.8771


Epoch 668/1000: 19it [00:07,  2.45it/s, loss=1.8763]


Epoch 668 Avg Loss: 1.8762


Epoch 669/1000: 19it [00:07,  2.45it/s, loss=1.8762]


Epoch 669 Avg Loss: 1.8768


Epoch 670/1000: 19it [00:07,  2.45it/s, loss=1.8758]


Epoch 670 Avg Loss: 1.8750


Epoch 671/1000: 19it [00:07,  2.45it/s, loss=1.8751]


Epoch 671 Avg Loss: 1.8756


Epoch 672/1000: 19it [00:07,  2.44it/s, loss=1.8778]


Epoch 672 Avg Loss: 1.8749


Epoch 673/1000: 19it [00:07,  2.45it/s, loss=1.8715]


Epoch 673 Avg Loss: 1.8745


Epoch 674/1000: 19it [00:07,  2.45it/s, loss=1.8724]


Epoch 674 Avg Loss: 1.8736


Epoch 675/1000: 19it [00:07,  2.45it/s, loss=1.8718]


Epoch 675 Avg Loss: 1.8738


Epoch 676/1000: 19it [00:07,  2.45it/s, loss=1.8695]


Epoch 676 Avg Loss: 1.8732


Epoch 677/1000: 19it [00:07,  2.45it/s, loss=1.8738]


Epoch 677 Avg Loss: 1.8725


Epoch 678/1000: 19it [00:07,  2.45it/s, loss=1.8729]


Epoch 678 Avg Loss: 1.8726


Epoch 679/1000: 19it [00:07,  2.45it/s, loss=1.8721]


Epoch 679 Avg Loss: 1.8710


Epoch 680/1000: 19it [00:07,  2.45it/s, loss=1.8702]


Epoch 680 Avg Loss: 1.8730


Epoch 681/1000: 19it [00:07,  2.45it/s, loss=1.8676]


Epoch 681 Avg Loss: 1.8694


Epoch 682/1000: 19it [00:07,  2.44it/s, loss=1.8697]


Epoch 682 Avg Loss: 1.8691


Epoch 683/1000: 19it [00:07,  2.45it/s, loss=1.8682]


Epoch 683 Avg Loss: 1.8697


Epoch 684/1000: 19it [00:07,  2.44it/s, loss=1.8699]


Epoch 684 Avg Loss: 1.8692


Epoch 685/1000: 19it [00:07,  2.45it/s, loss=1.8688]


Epoch 685 Avg Loss: 1.8694


Epoch 686/1000: 19it [00:07,  2.44it/s, loss=1.8694]


Epoch 686 Avg Loss: 1.8683


Epoch 687/1000: 19it [00:07,  2.45it/s, loss=1.8662]


Epoch 687 Avg Loss: 1.8680


Epoch 688/1000: 19it [00:07,  2.45it/s, loss=1.8665]


Epoch 688 Avg Loss: 1.8680


Epoch 689/1000: 19it [00:07,  2.45it/s, loss=1.8673]


Epoch 689 Avg Loss: 1.8668


Epoch 690/1000: 19it [00:07,  2.45it/s, loss=1.8706]


Epoch 690 Avg Loss: 1.8678


Epoch 691/1000: 19it [00:07,  2.45it/s, loss=1.8661]


Epoch 691 Avg Loss: 1.8655


Epoch 692/1000: 19it [00:07,  2.45it/s, loss=1.8647]


Epoch 692 Avg Loss: 1.8659


Epoch 693/1000: 19it [00:07,  2.45it/s, loss=1.8661]


Epoch 693 Avg Loss: 1.8661


Epoch 694/1000: 19it [00:07,  2.45it/s, loss=1.8625]


Epoch 694 Avg Loss: 1.8645


Epoch 695/1000: 19it [00:07,  2.45it/s, loss=1.8669]


Epoch 695 Avg Loss: 1.8646


Epoch 696/1000: 19it [00:07,  2.45it/s, loss=1.8633]


Epoch 696 Avg Loss: 1.8646


Epoch 697/1000: 19it [00:07,  2.45it/s, loss=1.8635]


Epoch 697 Avg Loss: 1.8652


Epoch 698/1000: 19it [00:07,  2.45it/s, loss=1.8639]


Epoch 698 Avg Loss: 1.8638


Epoch 699/1000: 19it [00:07,  2.45it/s, loss=1.8666]


Epoch 699 Avg Loss: 1.8630


Epoch 700/1000: 19it [00:07,  2.45it/s, loss=1.8631]


Epoch 700 Avg Loss: 1.8629


Epoch 701/1000: 19it [00:07,  2.44it/s, loss=1.8638]


Epoch 701 Avg Loss: 1.8635


Epoch 702/1000: 19it [00:07,  2.44it/s, loss=1.8627]


Epoch 702 Avg Loss: 1.8629


Epoch 703/1000: 19it [00:07,  2.44it/s, loss=1.8594]


Epoch 703 Avg Loss: 1.8628


Epoch 704/1000: 19it [00:07,  2.44it/s, loss=1.8588]


Epoch 704 Avg Loss: 1.8609


Epoch 705/1000: 19it [00:07,  2.45it/s, loss=1.8656]


Epoch 705 Avg Loss: 1.8612


Epoch 706/1000: 19it [00:07,  2.45it/s, loss=1.8629]


Epoch 706 Avg Loss: 1.8612


Epoch 707/1000: 19it [00:07,  2.45it/s, loss=1.8599]


Epoch 707 Avg Loss: 1.8611


Epoch 708/1000: 19it [00:07,  2.45it/s, loss=1.8623]


Epoch 708 Avg Loss: 1.8616


Epoch 709/1000: 19it [00:07,  2.45it/s, loss=1.8660]


Epoch 709 Avg Loss: 1.8614


Epoch 710/1000: 19it [00:07,  2.45it/s, loss=1.8560]


Epoch 710 Avg Loss: 1.8598


Epoch 711/1000: 19it [00:07,  2.45it/s, loss=1.8603]


Epoch 711 Avg Loss: 1.8619


Epoch 712/1000: 19it [00:07,  2.45it/s, loss=1.8611]


Epoch 712 Avg Loss: 1.8595


Epoch 713/1000: 19it [00:07,  2.45it/s, loss=1.8598]


Epoch 713 Avg Loss: 1.8592


Epoch 714/1000: 19it [00:07,  2.45it/s, loss=1.8593]


Epoch 714 Avg Loss: 1.8597


Epoch 715/1000: 19it [00:07,  2.45it/s, loss=1.8626]


Epoch 715 Avg Loss: 1.8595


Epoch 716/1000: 19it [00:07,  2.45it/s, loss=1.8548]


Epoch 716 Avg Loss: 1.8584


Epoch 717/1000: 19it [00:07,  2.45it/s, loss=1.8514]


Epoch 717 Avg Loss: 1.8574


Epoch 718/1000: 19it [00:07,  2.45it/s, loss=1.8606]


Epoch 718 Avg Loss: 1.8593


Epoch 719/1000: 19it [00:07,  2.45it/s, loss=1.8572]


Epoch 719 Avg Loss: 1.8577


Epoch 720/1000: 19it [00:07,  2.45it/s, loss=1.8533]


Epoch 720 Avg Loss: 1.8566


Epoch 721/1000: 19it [00:07,  2.45it/s, loss=1.8649]


Epoch 721 Avg Loss: 1.8569


Epoch 722/1000: 19it [00:07,  2.46it/s, loss=1.8596]


Epoch 722 Avg Loss: 1.8567


Epoch 723/1000: 19it [00:07,  2.46it/s, loss=1.8576]


Epoch 723 Avg Loss: 1.8571


Epoch 724/1000: 19it [00:07,  2.45it/s, loss=1.8524]


Epoch 724 Avg Loss: 1.8567


Epoch 725/1000: 19it [00:07,  2.46it/s, loss=1.8629]


Epoch 725 Avg Loss: 1.8570


Epoch 726/1000: 19it [00:07,  2.46it/s, loss=1.8563]


Epoch 726 Avg Loss: 1.8565


Epoch 727/1000: 19it [00:07,  2.45it/s, loss=1.8560]


Epoch 727 Avg Loss: 1.8559


Epoch 728/1000: 19it [00:07,  2.45it/s, loss=1.8535]


Epoch 728 Avg Loss: 1.8562


Epoch 729/1000: 19it [00:07,  2.45it/s, loss=1.8578]


Epoch 729 Avg Loss: 1.8557


Epoch 730/1000: 19it [00:07,  2.45it/s, loss=1.8598]


Epoch 730 Avg Loss: 1.8551


Epoch 731/1000: 19it [00:07,  2.45it/s, loss=1.8557]


Epoch 731 Avg Loss: 1.8553


Epoch 732/1000: 19it [00:07,  2.45it/s, loss=1.8542]


Epoch 732 Avg Loss: 1.8541


Epoch 733/1000: 19it [00:07,  2.45it/s, loss=1.8557]


Epoch 733 Avg Loss: 1.8534


Epoch 734/1000: 19it [00:07,  2.45it/s, loss=1.8584]


Epoch 734 Avg Loss: 1.8541


Epoch 735/1000: 19it [00:07,  2.45it/s, loss=1.8534]


Epoch 735 Avg Loss: 1.8544


Epoch 736/1000: 19it [00:07,  2.45it/s, loss=1.8549]


Epoch 736 Avg Loss: 1.8537


Epoch 737/1000: 19it [00:07,  2.45it/s, loss=1.8523]


Epoch 737 Avg Loss: 1.8532


Epoch 738/1000: 19it [00:07,  2.45it/s, loss=1.8499]


Epoch 738 Avg Loss: 1.8524


Epoch 739/1000: 19it [00:07,  2.45it/s, loss=1.8505]


Epoch 739 Avg Loss: 1.8519


Epoch 740/1000: 19it [00:07,  2.45it/s, loss=1.8513]


Epoch 740 Avg Loss: 1.8524


Epoch 741/1000: 19it [00:07,  2.44it/s, loss=1.8527]


Epoch 741 Avg Loss: 1.8523


Epoch 742/1000: 19it [00:07,  2.44it/s, loss=1.8521]


Epoch 742 Avg Loss: 1.8522


Epoch 743/1000: 19it [00:07,  2.44it/s, loss=1.8538]


Epoch 743 Avg Loss: 1.8526


Epoch 744/1000: 19it [00:07,  2.45it/s, loss=1.8496]


Epoch 744 Avg Loss: 1.8520


Epoch 745/1000: 19it [00:07,  2.45it/s, loss=1.8523]


Epoch 745 Avg Loss: 1.8513


Epoch 746/1000: 19it [00:07,  2.45it/s, loss=1.8504]


Epoch 746 Avg Loss: 1.8522


Epoch 747/1000: 19it [00:07,  2.45it/s, loss=1.8574]


Epoch 747 Avg Loss: 1.8517


Epoch 748/1000: 19it [00:07,  2.45it/s, loss=1.8521]


Epoch 748 Avg Loss: 1.8505


Epoch 749/1000: 19it [00:07,  2.45it/s, loss=1.8499]


Epoch 749 Avg Loss: 1.8503


Epoch 750/1000: 19it [00:07,  2.45it/s, loss=1.8516]


Epoch 750 Avg Loss: 1.8514


Epoch 751/1000: 19it [00:07,  2.45it/s, loss=1.8504]


Epoch 751 Avg Loss: 1.8524


Epoch 752/1000: 19it [00:07,  2.45it/s, loss=1.8504]


Epoch 752 Avg Loss: 1.8502


Epoch 753/1000: 19it [00:07,  2.45it/s, loss=1.8484]


Epoch 753 Avg Loss: 1.8508


Epoch 754/1000: 19it [00:07,  2.45it/s, loss=1.8550]


Epoch 754 Avg Loss: 1.8507


Epoch 755/1000: 19it [00:07,  2.45it/s, loss=1.8463]


Epoch 755 Avg Loss: 1.8494


Epoch 756/1000: 19it [00:07,  2.45it/s, loss=1.8496]


Epoch 756 Avg Loss: 1.8494


Epoch 757/1000: 19it [00:07,  2.45it/s, loss=1.8449]


Epoch 757 Avg Loss: 1.8499


Epoch 758/1000: 19it [00:07,  2.45it/s, loss=1.8463]


Epoch 758 Avg Loss: 1.8487


Epoch 759/1000: 19it [00:07,  2.45it/s, loss=1.8504]


Epoch 759 Avg Loss: 1.8481


Epoch 760/1000: 19it [00:07,  2.45it/s, loss=1.8476]


Epoch 760 Avg Loss: 1.8479


Epoch 761/1000: 19it [00:07,  2.45it/s, loss=1.8476]


Epoch 761 Avg Loss: 1.8485


Epoch 762/1000: 19it [00:07,  2.45it/s, loss=1.8498]


Epoch 762 Avg Loss: 1.8482


Epoch 763/1000: 19it [00:07,  2.45it/s, loss=1.8426]


Epoch 763 Avg Loss: 1.8488


Epoch 764/1000: 19it [00:07,  2.45it/s, loss=1.8446]


Epoch 764 Avg Loss: 1.8487


Epoch 765/1000: 19it [00:07,  2.45it/s, loss=1.8500]


Epoch 765 Avg Loss: 1.8476


Epoch 766/1000: 19it [00:07,  2.45it/s, loss=1.8481]


Epoch 766 Avg Loss: 1.8465


Epoch 767/1000: 19it [00:07,  2.45it/s, loss=1.8476]


Epoch 767 Avg Loss: 1.8475


Epoch 768/1000: 19it [00:07,  2.44it/s, loss=1.8438]


Epoch 768 Avg Loss: 1.8470


Epoch 769/1000: 19it [00:07,  2.44it/s, loss=1.8396]


Epoch 769 Avg Loss: 1.8474


Epoch 770/1000: 19it [00:07,  2.44it/s, loss=1.8475]


Epoch 770 Avg Loss: 1.8464


Epoch 771/1000: 19it [00:07,  2.44it/s, loss=1.8473]


Epoch 771 Avg Loss: 1.8463


Epoch 772/1000: 19it [00:07,  2.44it/s, loss=1.8447]


Epoch 772 Avg Loss: 1.8461


Epoch 773/1000: 19it [00:07,  2.44it/s, loss=1.8417]


Epoch 773 Avg Loss: 1.8451


Epoch 774/1000: 19it [00:07,  2.45it/s, loss=1.8452]


Epoch 774 Avg Loss: 1.8459


Epoch 775/1000: 19it [00:07,  2.45it/s, loss=1.8449]


Epoch 775 Avg Loss: 1.8447


Epoch 776/1000: 19it [00:07,  2.45it/s, loss=1.8419]


Epoch 776 Avg Loss: 1.8459


Epoch 777/1000: 19it [00:07,  2.45it/s, loss=1.8471]


Epoch 777 Avg Loss: 1.8452


Epoch 778/1000: 19it [00:07,  2.45it/s, loss=1.8449]


Epoch 778 Avg Loss: 1.8460


Epoch 779/1000: 19it [00:07,  2.46it/s, loss=1.8444]


Epoch 779 Avg Loss: 1.8446


Epoch 780/1000: 19it [00:07,  2.45it/s, loss=1.8439]


Epoch 780 Avg Loss: 1.8457


Epoch 781/1000: 19it [00:07,  2.45it/s, loss=1.8416]


Epoch 781 Avg Loss: 1.8448


Epoch 782/1000: 19it [00:07,  2.45it/s, loss=1.8447]


Epoch 782 Avg Loss: 1.8446


Epoch 783/1000: 19it [00:07,  2.45it/s, loss=1.8424]


Epoch 783 Avg Loss: 1.8450


Epoch 784/1000: 19it [00:07,  2.45it/s, loss=1.8451]


Epoch 784 Avg Loss: 1.8442


Epoch 785/1000: 19it [00:07,  2.44it/s, loss=1.8419]


Epoch 785 Avg Loss: 1.8457


Epoch 786/1000: 19it [00:07,  2.44it/s, loss=1.8493]


Epoch 786 Avg Loss: 1.8452


Epoch 787/1000: 19it [00:07,  2.44it/s, loss=1.8526]


Epoch 787 Avg Loss: 1.8444


Epoch 788/1000: 19it [00:07,  2.45it/s, loss=1.8479]


Epoch 788 Avg Loss: 1.8435


Epoch 789/1000: 19it [00:07,  2.44it/s, loss=1.8447]


Epoch 789 Avg Loss: 1.8441


Epoch 790/1000: 19it [00:07,  2.44it/s, loss=1.8492]


Epoch 790 Avg Loss: 1.8441


Epoch 791/1000: 19it [00:07,  2.45it/s, loss=1.8421]


Epoch 791 Avg Loss: 1.8456


Epoch 792/1000: 19it [00:07,  2.45it/s, loss=1.8416]


Epoch 792 Avg Loss: 1.8430


Epoch 793/1000: 19it [00:07,  2.45it/s, loss=1.8442]


Epoch 793 Avg Loss: 1.8421


Epoch 794/1000: 19it [00:07,  2.45it/s, loss=1.8459]


Epoch 794 Avg Loss: 1.8431


Epoch 795/1000: 19it [00:07,  2.44it/s, loss=1.8404]


Epoch 795 Avg Loss: 1.8443


Epoch 796/1000: 19it [00:07,  2.45it/s, loss=1.8451]


Epoch 796 Avg Loss: 1.8429


Epoch 797/1000: 19it [00:07,  2.44it/s, loss=1.8398]


Epoch 797 Avg Loss: 1.8421


Epoch 798/1000: 19it [00:07,  2.45it/s, loss=1.8380]


Epoch 798 Avg Loss: 1.8413


Epoch 799/1000: 19it [00:07,  2.45it/s, loss=1.8429]


Epoch 799 Avg Loss: 1.8427


Epoch 800/1000: 19it [00:07,  2.44it/s, loss=1.8423]


Epoch 800 Avg Loss: 1.8427


Epoch 801/1000: 19it [00:07,  2.44it/s, loss=1.8456]


Epoch 801 Avg Loss: 1.8413


Epoch 802/1000: 19it [00:07,  2.44it/s, loss=1.8434]


Epoch 802 Avg Loss: 1.8422


Epoch 803/1000: 19it [00:07,  2.44it/s, loss=1.8436]


Epoch 803 Avg Loss: 1.8414


Epoch 804/1000: 19it [00:07,  2.44it/s, loss=1.8447]


Epoch 804 Avg Loss: 1.8415


Epoch 805/1000: 19it [00:07,  2.45it/s, loss=1.8399]


Epoch 805 Avg Loss: 1.8427


Epoch 806/1000: 19it [00:07,  2.44it/s, loss=1.8416]


Epoch 806 Avg Loss: 1.8411


Epoch 807/1000: 19it [00:07,  2.45it/s, loss=1.8399]


Epoch 807 Avg Loss: 1.8415


Epoch 808/1000: 19it [00:07,  2.44it/s, loss=1.8412]


Epoch 808 Avg Loss: 1.8402


Epoch 809/1000: 19it [00:07,  2.44it/s, loss=1.8412]


Epoch 809 Avg Loss: 1.8413


Epoch 810/1000: 19it [00:07,  2.44it/s, loss=1.8362]


Epoch 810 Avg Loss: 1.8409


Epoch 811/1000: 19it [00:07,  2.44it/s, loss=1.8381]


Epoch 811 Avg Loss: 1.8406


Epoch 812/1000: 19it [00:07,  2.44it/s, loss=1.8392]


Epoch 812 Avg Loss: 1.8406


Epoch 813/1000: 19it [00:07,  2.44it/s, loss=1.8464]


Epoch 813 Avg Loss: 1.8405


Epoch 814/1000: 19it [00:07,  2.44it/s, loss=1.8420]


Epoch 814 Avg Loss: 1.8402


Epoch 815/1000: 19it [00:07,  2.44it/s, loss=1.8415]


Epoch 815 Avg Loss: 1.8410


Epoch 816/1000: 19it [00:07,  2.44it/s, loss=1.8412]


Epoch 816 Avg Loss: 1.8402


Epoch 817/1000: 19it [00:07,  2.44it/s, loss=1.8358]


Epoch 817 Avg Loss: 1.8385


Epoch 818/1000: 19it [00:07,  2.44it/s, loss=1.8394]


Epoch 818 Avg Loss: 1.8398


Epoch 819/1000: 19it [00:07,  2.44it/s, loss=1.8401]


Epoch 819 Avg Loss: 1.8396


Epoch 820/1000: 19it [00:07,  2.44it/s, loss=1.8368]


Epoch 820 Avg Loss: 1.8385


Epoch 821/1000: 19it [00:07,  2.44it/s, loss=1.8404]


Epoch 821 Avg Loss: 1.8389


Epoch 822/1000: 19it [00:07,  2.44it/s, loss=1.8370]


Epoch 822 Avg Loss: 1.8380


Epoch 823/1000: 19it [00:07,  2.44it/s, loss=1.8348]


Epoch 823 Avg Loss: 1.8396


Epoch 824/1000: 19it [00:07,  2.44it/s, loss=1.8373]


Epoch 824 Avg Loss: 1.8384


Epoch 825/1000: 19it [00:07,  2.45it/s, loss=1.8381]


Epoch 825 Avg Loss: 1.8387


Epoch 826/1000: 19it [00:07,  2.45it/s, loss=1.8387]


Epoch 826 Avg Loss: 1.8387


Epoch 827/1000: 19it [00:07,  2.45it/s, loss=1.8379]


Epoch 827 Avg Loss: 1.8382


Epoch 828/1000: 19it [00:07,  2.45it/s, loss=1.8378]


Epoch 828 Avg Loss: 1.8386


Epoch 829/1000: 19it [00:07,  2.45it/s, loss=1.8361]


Epoch 829 Avg Loss: 1.8379


Epoch 830/1000: 19it [00:07,  2.45it/s, loss=1.8414]


Epoch 830 Avg Loss: 1.8367


Epoch 831/1000: 19it [00:07,  2.45it/s, loss=1.8354]


Epoch 831 Avg Loss: 1.8374


Epoch 832/1000: 19it [00:07,  2.45it/s, loss=1.8410]


Epoch 832 Avg Loss: 1.8366


Epoch 833/1000: 19it [00:07,  2.45it/s, loss=1.8388]


Epoch 833 Avg Loss: 1.8377


Epoch 834/1000: 19it [00:07,  2.45it/s, loss=1.8389]


Epoch 834 Avg Loss: 1.8365


Epoch 835/1000: 19it [00:07,  2.45it/s, loss=1.8411]


Epoch 835 Avg Loss: 1.8368


Epoch 836/1000: 19it [00:07,  2.45it/s, loss=1.8366]


Epoch 836 Avg Loss: 1.8363


Epoch 837/1000: 19it [00:07,  2.45it/s, loss=1.8352]


Epoch 837 Avg Loss: 1.8380


Epoch 838/1000: 19it [00:07,  2.45it/s, loss=1.8346]


Epoch 838 Avg Loss: 1.8370


Epoch 839/1000: 19it [00:07,  2.45it/s, loss=1.8384]


Epoch 839 Avg Loss: 1.8367


Epoch 840/1000: 19it [00:07,  2.45it/s, loss=1.8379]


Epoch 840 Avg Loss: 1.8362


Epoch 841/1000: 19it [00:07,  2.45it/s, loss=1.8390]


Epoch 841 Avg Loss: 1.8369


Epoch 842/1000: 19it [00:07,  2.45it/s, loss=1.8367]


Epoch 842 Avg Loss: 1.8366


Epoch 843/1000: 19it [00:07,  2.45it/s, loss=1.8380]


Epoch 843 Avg Loss: 1.8358


Epoch 844/1000: 19it [00:07,  2.45it/s, loss=1.8343]


Epoch 844 Avg Loss: 1.8359


Epoch 845/1000: 19it [00:07,  2.44it/s, loss=1.8355]


Epoch 845 Avg Loss: 1.8359


Epoch 846/1000: 19it [00:07,  2.45it/s, loss=1.8352]


Epoch 846 Avg Loss: 1.8361


Epoch 847/1000: 19it [00:07,  2.45it/s, loss=1.8332]


Epoch 847 Avg Loss: 1.8337


Epoch 848/1000: 19it [00:07,  2.45it/s, loss=1.8322]


Epoch 848 Avg Loss: 1.8353


Epoch 849/1000: 19it [00:07,  2.45it/s, loss=1.8364]


Epoch 849 Avg Loss: 1.8341


Epoch 850/1000: 19it [00:07,  2.45it/s, loss=1.8371]


Epoch 850 Avg Loss: 1.8347


Epoch 851/1000: 19it [00:07,  2.45it/s, loss=1.8426]


Epoch 851 Avg Loss: 1.8353


Epoch 852/1000: 19it [00:07,  2.45it/s, loss=1.8366]


Epoch 852 Avg Loss: 1.8349


Epoch 853/1000: 19it [00:07,  2.45it/s, loss=1.8374]


Epoch 853 Avg Loss: 1.8336


Epoch 854/1000: 19it [00:07,  2.45it/s, loss=1.8323]


Epoch 854 Avg Loss: 1.8352


Epoch 855/1000: 19it [00:07,  2.45it/s, loss=1.8325]


Epoch 855 Avg Loss: 1.8346


Epoch 856/1000: 19it [00:07,  2.44it/s, loss=1.8328]


Epoch 856 Avg Loss: 1.8338


Epoch 857/1000: 19it [00:07,  2.44it/s, loss=1.8375]


Epoch 857 Avg Loss: 1.8334


Epoch 858/1000: 19it [00:07,  2.44it/s, loss=1.8354]


Epoch 858 Avg Loss: 1.8346


Epoch 859/1000: 19it [00:07,  2.45it/s, loss=1.8274]


Epoch 859 Avg Loss: 1.8325


Epoch 860/1000: 19it [00:07,  2.45it/s, loss=1.8271]


Epoch 860 Avg Loss: 1.8336


Epoch 861/1000: 19it [00:07,  2.45it/s, loss=1.8371]


Epoch 861 Avg Loss: 1.8332


Epoch 862/1000: 19it [00:07,  2.44it/s, loss=1.8371]


Epoch 862 Avg Loss: 1.8336


Epoch 863/1000: 19it [00:07,  2.45it/s, loss=1.8339]


Epoch 863 Avg Loss: 1.8325


Epoch 864/1000: 19it [00:07,  2.45it/s, loss=1.8371]


Epoch 864 Avg Loss: 1.8335


Epoch 865/1000: 19it [00:07,  2.45it/s, loss=1.8254]


Epoch 865 Avg Loss: 1.8325


Epoch 866/1000: 19it [00:07,  2.44it/s, loss=1.8307]


Epoch 866 Avg Loss: 1.8324


Epoch 867/1000: 19it [00:07,  2.45it/s, loss=1.8324]


Epoch 867 Avg Loss: 1.8320


Epoch 868/1000: 19it [00:07,  2.44it/s, loss=1.8331]


Epoch 868 Avg Loss: 1.8328


Epoch 869/1000: 19it [00:07,  2.45it/s, loss=1.8289]


Epoch 869 Avg Loss: 1.8307


Epoch 870/1000: 19it [00:07,  2.44it/s, loss=1.8303]


Epoch 870 Avg Loss: 1.8322


Epoch 871/1000: 19it [00:07,  2.45it/s, loss=1.8268]


Epoch 871 Avg Loss: 1.8322


Epoch 872/1000: 19it [00:07,  2.45it/s, loss=1.8317]


Epoch 872 Avg Loss: 1.8312


Epoch 873/1000: 19it [00:07,  2.44it/s, loss=1.8321]


Epoch 873 Avg Loss: 1.8310


Epoch 874/1000: 19it [00:07,  2.45it/s, loss=1.8275]


Epoch 874 Avg Loss: 1.8315


Epoch 875/1000: 19it [00:07,  2.45it/s, loss=1.8285]


Epoch 875 Avg Loss: 1.8302


Epoch 876/1000: 19it [00:07,  2.45it/s, loss=1.8270]


Epoch 876 Avg Loss: 1.8303


Epoch 877/1000: 19it [00:07,  2.45it/s, loss=1.8286]


Epoch 877 Avg Loss: 1.8301


Epoch 878/1000: 19it [00:07,  2.45it/s, loss=1.8280]


Epoch 878 Avg Loss: 1.8304


Epoch 879/1000: 19it [00:07,  2.45it/s, loss=1.8242]


Epoch 879 Avg Loss: 1.8296


Epoch 880/1000: 19it [00:07,  2.45it/s, loss=1.8283]


Epoch 880 Avg Loss: 1.8304


Epoch 881/1000: 19it [00:07,  2.45it/s, loss=1.8287]


Epoch 881 Avg Loss: 1.8313


Epoch 882/1000: 19it [00:07,  2.45it/s, loss=1.8326]


Epoch 882 Avg Loss: 1.8293


Epoch 883/1000: 19it [00:07,  2.45it/s, loss=1.8255]


Epoch 883 Avg Loss: 1.8297


Epoch 884/1000: 19it [00:07,  2.44it/s, loss=1.8278]


Epoch 884 Avg Loss: 1.8293


Epoch 885/1000: 19it [00:07,  2.44it/s, loss=1.8316]


Epoch 885 Avg Loss: 1.8296


Epoch 886/1000: 19it [00:07,  2.44it/s, loss=1.8243]


Epoch 886 Avg Loss: 1.8287


Epoch 887/1000: 19it [00:07,  2.44it/s, loss=1.8309]


Epoch 887 Avg Loss: 1.8295


Epoch 888/1000: 19it [00:07,  2.44it/s, loss=1.8301]


Epoch 888 Avg Loss: 1.8282


Epoch 889/1000: 19it [00:07,  2.44it/s, loss=1.8262]


Epoch 889 Avg Loss: 1.8291


Epoch 890/1000: 19it [00:07,  2.44it/s, loss=1.8276]


Epoch 890 Avg Loss: 1.8291


Epoch 891/1000: 19it [00:07,  2.45it/s, loss=1.8291]


Epoch 891 Avg Loss: 1.8278


Epoch 892/1000: 19it [00:07,  2.45it/s, loss=1.8280]


Epoch 892 Avg Loss: 1.8276


Epoch 893/1000: 19it [00:07,  2.44it/s, loss=1.8250]


Epoch 893 Avg Loss: 1.8277


Epoch 894/1000: 19it [00:07,  2.44it/s, loss=1.8288]


Epoch 894 Avg Loss: 1.8278


Epoch 895/1000: 19it [00:07,  2.44it/s, loss=1.8226]


Epoch 895 Avg Loss: 1.8274


Epoch 896/1000: 19it [00:07,  2.44it/s, loss=1.8254]


Epoch 896 Avg Loss: 1.8273


Epoch 897/1000: 19it [00:07,  2.44it/s, loss=1.8291]


Epoch 897 Avg Loss: 1.8272


Epoch 898/1000: 19it [00:07,  2.44it/s, loss=1.8273]


Epoch 898 Avg Loss: 1.8264


Epoch 899/1000: 19it [00:07,  2.44it/s, loss=1.8226]


Epoch 899 Avg Loss: 1.8264


Epoch 900/1000: 19it [00:07,  2.44it/s, loss=1.8285]


Epoch 900 Avg Loss: 1.8264


Epoch 901/1000: 19it [00:07,  2.44it/s, loss=1.8248]


Epoch 901 Avg Loss: 1.8260


Epoch 902/1000: 19it [00:07,  2.44it/s, loss=1.8267]


Epoch 902 Avg Loss: 1.8256


Epoch 903/1000: 19it [00:07,  2.44it/s, loss=1.8290]


Epoch 903 Avg Loss: 1.8262


Epoch 904/1000: 19it [00:07,  2.44it/s, loss=1.8217]


Epoch 904 Avg Loss: 1.8245


Epoch 905/1000: 19it [00:07,  2.44it/s, loss=1.8267]


Epoch 905 Avg Loss: 1.8253


Epoch 906/1000: 19it [00:07,  2.44it/s, loss=1.8250]


Epoch 906 Avg Loss: 1.8246


Epoch 907/1000: 19it [00:07,  2.44it/s, loss=1.8219]


Epoch 907 Avg Loss: 1.8236


Epoch 908/1000: 19it [00:07,  2.44it/s, loss=1.8234]


Epoch 908 Avg Loss: 1.8237


Epoch 909/1000: 19it [00:07,  2.44it/s, loss=1.8292]


Epoch 909 Avg Loss: 1.8240


Epoch 910/1000: 19it [00:07,  2.44it/s, loss=1.8238]


Epoch 910 Avg Loss: 1.8238


Epoch 911/1000: 19it [00:07,  2.44it/s, loss=1.8221]


Epoch 911 Avg Loss: 1.8236


Epoch 912/1000: 19it [00:07,  2.44it/s, loss=1.8217]


Epoch 912 Avg Loss: 1.8233


Epoch 913/1000: 19it [00:07,  2.44it/s, loss=1.8231]


Epoch 913 Avg Loss: 1.8230


Epoch 914/1000: 19it [00:07,  2.44it/s, loss=1.8197]


Epoch 914 Avg Loss: 1.8227


Epoch 915/1000: 19it [00:07,  2.44it/s, loss=1.8237]


Epoch 915 Avg Loss: 1.8221


Epoch 916/1000: 19it [00:07,  2.44it/s, loss=1.8199]


Epoch 916 Avg Loss: 1.8220


Epoch 917/1000: 19it [00:07,  2.44it/s, loss=1.8219]


Epoch 917 Avg Loss: 1.8213


Epoch 918/1000: 19it [00:07,  2.44it/s, loss=1.8212]


Epoch 918 Avg Loss: 1.8212


Epoch 919/1000: 19it [00:07,  2.44it/s, loss=1.8216]


Epoch 919 Avg Loss: 1.8202


Epoch 920/1000: 19it [00:07,  2.44it/s, loss=1.8239]


Epoch 920 Avg Loss: 1.8207


Epoch 921/1000: 19it [00:07,  2.44it/s, loss=1.8232]


Epoch 921 Avg Loss: 1.8210


Epoch 922/1000: 19it [00:07,  2.44it/s, loss=1.8183]


Epoch 922 Avg Loss: 1.8190


Epoch 923/1000: 19it [00:07,  2.45it/s, loss=1.8257]


Epoch 923 Avg Loss: 1.8218


Epoch 924/1000: 19it [00:07,  2.45it/s, loss=1.8174]


Epoch 924 Avg Loss: 1.8197


Epoch 925/1000: 19it [00:07,  2.45it/s, loss=1.8202]


Epoch 925 Avg Loss: 1.8196


Epoch 926/1000: 19it [00:07,  2.45it/s, loss=1.8220]


Epoch 926 Avg Loss: 1.8182


Epoch 927/1000: 19it [00:07,  2.45it/s, loss=1.8200]


Epoch 927 Avg Loss: 1.8193


Epoch 928/1000: 19it [00:07,  2.45it/s, loss=1.8155]


Epoch 928 Avg Loss: 1.8178


Epoch 929/1000: 19it [00:07,  2.45it/s, loss=1.8206]


Epoch 929 Avg Loss: 1.8184


Epoch 930/1000: 19it [00:07,  2.44it/s, loss=1.8230]


Epoch 930 Avg Loss: 1.8187


Epoch 931/1000: 19it [00:07,  2.44it/s, loss=1.8162]


Epoch 931 Avg Loss: 1.8177


Epoch 932/1000: 19it [00:07,  2.44it/s, loss=1.8181]


Epoch 932 Avg Loss: 1.8172


Epoch 933/1000: 19it [00:07,  2.44it/s, loss=1.8166]


Epoch 933 Avg Loss: 1.8173


Epoch 934/1000: 19it [00:07,  2.44it/s, loss=1.8201]


Epoch 934 Avg Loss: 1.8165


Epoch 935/1000: 19it [00:07,  2.44it/s, loss=1.8188]


Epoch 935 Avg Loss: 1.8161


Epoch 936/1000: 19it [00:07,  2.44it/s, loss=1.8190]


Epoch 936 Avg Loss: 1.8152


Epoch 937/1000: 19it [00:07,  2.44it/s, loss=1.8153]


Epoch 937 Avg Loss: 1.8156


Epoch 938/1000: 19it [00:07,  2.44it/s, loss=1.8144]


Epoch 938 Avg Loss: 1.8148


Epoch 939/1000: 19it [00:07,  2.44it/s, loss=1.8121]


Epoch 939 Avg Loss: 1.8149


Epoch 940/1000: 19it [00:07,  2.44it/s, loss=1.8101]


Epoch 940 Avg Loss: 1.8154


Epoch 941/1000: 19it [00:07,  2.44it/s, loss=1.8167]


Epoch 941 Avg Loss: 1.8146


Epoch 942/1000: 19it [00:07,  2.45it/s, loss=1.8177]


Epoch 942 Avg Loss: 1.8152


Epoch 943/1000: 19it [00:07,  2.44it/s, loss=1.8160]


Epoch 943 Avg Loss: 1.8144


Epoch 944/1000: 19it [00:07,  2.45it/s, loss=1.8146]


Epoch 944 Avg Loss: 1.8140


Epoch 945/1000: 19it [00:07,  2.44it/s, loss=1.8115]


Epoch 945 Avg Loss: 1.8132


Epoch 946/1000: 19it [00:07,  2.45it/s, loss=1.8127]


Epoch 946 Avg Loss: 1.8133


Epoch 947/1000: 19it [00:07,  2.44it/s, loss=1.8144]


Epoch 947 Avg Loss: 1.8126


Epoch 948/1000: 19it [00:07,  2.44it/s, loss=1.8123]


Epoch 948 Avg Loss: 1.8126


Epoch 949/1000: 19it [00:07,  2.44it/s, loss=1.8109]


Epoch 949 Avg Loss: 1.8114


Epoch 950/1000: 19it [00:07,  2.44it/s, loss=1.8082]


Epoch 950 Avg Loss: 1.8120


Epoch 951/1000: 19it [00:07,  2.44it/s, loss=1.8098]


Epoch 951 Avg Loss: 1.8109


Epoch 952/1000: 19it [00:07,  2.45it/s, loss=1.8117]


Epoch 952 Avg Loss: 1.8123


Epoch 953/1000: 19it [00:07,  2.44it/s, loss=1.8088]


Epoch 953 Avg Loss: 1.8119


Epoch 954/1000: 19it [00:07,  2.44it/s, loss=1.8114]


Epoch 954 Avg Loss: 1.8115


Epoch 955/1000: 19it [00:07,  2.44it/s, loss=1.8053]


Epoch 955 Avg Loss: 1.8117


Epoch 956/1000: 19it [00:07,  2.44it/s, loss=1.8116]


Epoch 956 Avg Loss: 1.8116


Epoch 957/1000: 19it [00:07,  2.45it/s, loss=1.8124]


Epoch 957 Avg Loss: 1.8104


Epoch 958/1000: 19it [00:07,  2.44it/s, loss=1.8075]


Epoch 958 Avg Loss: 1.8106


Epoch 959/1000: 19it [00:07,  2.44it/s, loss=1.8098]


Epoch 959 Avg Loss: 1.8104


Epoch 960/1000: 19it [00:07,  2.45it/s, loss=1.8112]


Epoch 960 Avg Loss: 1.8101


Epoch 961/1000: 19it [00:07,  2.45it/s, loss=1.8056]


Epoch 961 Avg Loss: 1.8105


Epoch 962/1000: 19it [00:07,  2.45it/s, loss=1.8127]


Epoch 962 Avg Loss: 1.8110


Epoch 963/1000: 19it [00:07,  2.45it/s, loss=1.8100]


Epoch 963 Avg Loss: 1.8098


Epoch 964/1000: 19it [00:07,  2.44it/s, loss=1.8111]


Epoch 964 Avg Loss: 1.8093


Epoch 965/1000: 19it [00:07,  2.44it/s, loss=1.8134]


Epoch 965 Avg Loss: 1.8100


Epoch 966/1000: 19it [00:07,  2.44it/s, loss=1.8071]


Epoch 966 Avg Loss: 1.8084


Epoch 967/1000: 19it [00:07,  2.45it/s, loss=1.8090]


Epoch 967 Avg Loss: 1.8086


Epoch 968/1000: 19it [00:07,  2.45it/s, loss=1.8087]


Epoch 968 Avg Loss: 1.8093


Epoch 969/1000: 19it [00:07,  2.45it/s, loss=1.8059]


Epoch 969 Avg Loss: 1.8075


Epoch 970/1000: 19it [00:07,  2.45it/s, loss=1.8078]


Epoch 970 Avg Loss: 1.8085


Epoch 971/1000: 19it [00:07,  2.45it/s, loss=1.8046]


Epoch 971 Avg Loss: 1.8074


Epoch 972/1000: 19it [00:07,  2.45it/s, loss=1.8071]


Epoch 972 Avg Loss: 1.8076


Epoch 973/1000: 19it [00:07,  2.44it/s, loss=1.8048]


Epoch 973 Avg Loss: 1.8081


Epoch 974/1000: 19it [00:07,  2.44it/s, loss=1.8058]


Epoch 974 Avg Loss: 1.8072


Epoch 975/1000: 19it [00:07,  2.44it/s, loss=1.8052]


Epoch 975 Avg Loss: 1.8084


Epoch 976/1000: 19it [00:07,  2.45it/s, loss=1.8085]


Epoch 976 Avg Loss: 1.8070


Epoch 977/1000: 19it [00:07,  2.45it/s, loss=1.8070]


Epoch 977 Avg Loss: 1.8074


Epoch 978/1000: 19it [00:07,  2.44it/s, loss=1.8098]


Epoch 978 Avg Loss: 1.8078


Epoch 979/1000: 19it [00:07,  2.44it/s, loss=1.8087]


Epoch 979 Avg Loss: 1.8065


Epoch 980/1000: 19it [00:07,  2.44it/s, loss=1.8091]


Epoch 980 Avg Loss: 1.8063


Epoch 981/1000: 19it [00:07,  2.44it/s, loss=1.8063]


Epoch 981 Avg Loss: 1.8063


Epoch 982/1000: 19it [00:07,  2.45it/s, loss=1.8097]


Epoch 982 Avg Loss: 1.8069


Epoch 983/1000: 19it [00:07,  2.45it/s, loss=1.8104]


Epoch 983 Avg Loss: 1.8052


Epoch 984/1000: 19it [00:07,  2.44it/s, loss=1.8056]


Epoch 984 Avg Loss: 1.8063


Epoch 985/1000: 19it [00:07,  2.45it/s, loss=1.8046]


Epoch 985 Avg Loss: 1.8061


Epoch 986/1000: 19it [00:07,  2.44it/s, loss=1.8050]


Epoch 986 Avg Loss: 1.8050


Epoch 987/1000: 19it [00:07,  2.44it/s, loss=1.8023]


Epoch 987 Avg Loss: 1.8047


Epoch 988/1000: 19it [00:07,  2.44it/s, loss=1.8046]


Epoch 988 Avg Loss: 1.8052


Epoch 989/1000: 19it [00:07,  2.44it/s, loss=1.8030]


Epoch 989 Avg Loss: 1.8054


Epoch 990/1000: 19it [00:07,  2.45it/s, loss=1.8048]


Epoch 990 Avg Loss: 1.8052


Epoch 991/1000: 19it [00:07,  2.45it/s, loss=1.8089]


Epoch 991 Avg Loss: 1.8046


Epoch 992/1000: 19it [00:07,  2.45it/s, loss=1.8054]


Epoch 992 Avg Loss: 1.8035


Epoch 993/1000: 19it [00:07,  2.45it/s, loss=1.8081]


Epoch 993 Avg Loss: 1.8041


Epoch 994/1000: 19it [00:07,  2.44it/s, loss=1.8039]


Epoch 994 Avg Loss: 1.8033


Epoch 995/1000: 19it [00:07,  2.44it/s, loss=1.8062]


Epoch 995 Avg Loss: 1.8029


Epoch 996/1000: 19it [00:07,  2.44it/s, loss=1.8035]


Epoch 996 Avg Loss: 1.8039


Epoch 997/1000: 19it [00:07,  2.44it/s, loss=1.8054]


Epoch 997 Avg Loss: 1.8028


Epoch 998/1000: 19it [00:07,  2.44it/s, loss=1.8011]


Epoch 998 Avg Loss: 1.8032


Epoch 999/1000: 19it [00:07,  2.44it/s, loss=1.8004]


Epoch 999 Avg Loss: 1.8034


Epoch 1000/1000: 19it [00:07,  2.45it/s, loss=1.8035]

Epoch 1000 Avg Loss: 1.8019





In [None]:
@partial(jax.jit, static_argnums=(2, 3, 4, 6, 7))
def generate_patch_autoregressive_trajectory(
    transformer_state, initial_history_tokens, num_predictions, history_len, tokens_per_frame, key, top_k, temperature):
    """generates a trajectory with a nested scan, updating the frame-level
    history at each step.
    """
    # The context length for the history (e.g., 4 frames)
    history_context_len = history_len * tokens_per_frame

    # --- This inner function generates one full frame ---
    def predict_one_frame(history_tokens, key):
        # The buffer for this frame's prediction starts with the history
        # and has padding for the new frame to be filled in.
        initial_buffer = jnp.concatenate([
            history_tokens,
            jnp.zeros(tokens_per_frame, dtype=jnp.int32)
        ])

        # The inner scan body predicts a single token
        def predict_one_token(buffer, i):
            step_key = jax.random.fold_in(key, i) # Create a unique key for each token

            # The input to the model is always a fixed-size slice of the buffer
            model_input = buffer[:-1]

            logits = transformer_state.apply_fn(
                {'params': transformer_state.params},
                model_input[jnp.newaxis, :],
                training=False, rngs={'dropout': step_key}
            )

            # We predict the token at the current position `i` within the new frame
            logit_index = history_context_len + i - 1
            next_token_logits = logits[:, logit_index, :]

            # --- Temperature and Top-K Sampling ---
            scaled_logits = next_token_logits / temperature
            top_k_logits, _ = jax.lax.top_k(scaled_logits, k=top_k)
            kth_logit = jnp.min(top_k_logits)
            masked_logits = jnp.where(scaled_logits < kth_logit, -jnp.inf, scaled_logits)

            next_token = jax.random.categorical(step_key, masked_logits)

            # Update the buffer with the predicted token. The shape remains constant.
            updated_buffer = buffer.at[logit_index + 1].set(next_token[0])

            # The updated buffer is the carry; we don't need to collect a separate output
            return updated_buffer, None

        # Run the inner scan to fill in the new frame's tokens
        final_buffer, _ = jax.lax.scan(
            predict_one_token,
            initial_buffer,
            jnp.arange(tokens_per_frame)
        )

        # Return only the newly generated tokens
        return final_buffer[-tokens_per_frame:]

    # --- This outer function manages the frame-level history progression ---
    def generate_next_frame_and_update_history(history_buffer, key):
        # Predict the next full frame based on the current history
        predicted_frame = predict_one_frame(history_buffer, key)

        # Create the new history for the *next* iteration by sliding the window
        new_history_buffer = jnp.concatenate([history_buffer[tokens_per_frame:], predicted_frame])

        return new_history_buffer, predicted_frame

    # --- Run the outer scan to generate all predicted frames ---
    keys = jax.random.split(key, num_predictions)
    _, all_predicted_frames = jax.lax.scan(
        generate_next_frame_and_update_history,
        initial_history_tokens,
        keys
    )

    return all_predicted_frames

In [None]:
all_frames, orig_imgs = get_trajectory(num_frames=100)

get_tokens_fn, decode_fn = get_tokeniser_fns(vqvae_state)

all_tokens = get_tokens_fn(all_frames)

autoregressive_predicted_token_grids = generate_patch_autoregressive_trajectory(
    transformer_state, all_tokens[:history_len].flatten(), len(all_tokens)-history_len,
    history_len, tokens_per_frame, key, top_k_sampling, sampling_temperature
)

predicted_frames, check_decoder = animate_prediction(
    'autoregressive_predicted_trajectory.mp4',
    orig_imgs,
    all_frames,
    all_tokens,
    autoregressive_predicted_token_grids,
    get_tokens_fn,
    decode_fn,
    history_len,
)

🎥 Generating prediction and creating animation...
✅ Animation saved to 'autoregressive_predicted_trajectory.mp4'


In [None]:
autoregressive_metrics = metrics(
    autoregressive_predicted_token_grids,
    all_tokens[history_len:],
    predicted_frames,
    check_decoder[history_len:],
)
np.savez_compressed("autoregressive_metrics.npz", **autoregressive_metrics)

Tokens Correct (%) in the 0th predicted frame: 39.0625%
Tokens Correct (%) in the 1th predicted frame: 28.515625%
Tokens Correct (%) in the 2th predicted frame: 22.265625%
Tokens Correct (%) in the 3th predicted frame: 18.75%
Tokens Correct (%) in the 4th predicted frame: 14.453125%
Tokens Correct (%) in the 5th predicted frame: 10.546875%
Tokens Correct (%) in the 6th predicted frame: 7.03125%
Tokens Correct (%) in the 7th predicted frame: 5.078125%
Tokens Correct (%) in the 8th predicted frame: 5.46875%
Tokens Correct (%) in the 9th predicted frame: 5.078125%
Tokens Correct (%) in the 10th predicted frame: 3.90625%
Tokens Correct (%) in the 11th predicted frame: 5.078125%
Tokens Correct (%) in the 12th predicted frame: 4.6875%
Tokens Correct (%) in the 13th predicted frame: 4.296875%
Tokens Correct (%) in the 14th predicted frame: 2.734375%
Tokens Correct (%) in the 15th predicted frame: 1.953125%
Tokens Correct (%) in the 16th predicted frame: 2.734375%
Tokens Correct (%) in the 17t

In [None]:
# from google.colab import runtime
# runtime.unassign()

## Encoder-Decoder Transformer

In [None]:
class OneshotFrameTransformer(nn.Module):
    num_embeddings: int
    num_heads: int
    num_layers: int
    embed_dim: int
    tokens_per_frame: int

    @nn.compact
    def __call__(self, history_tokens):
        # --- ENCODER ---
        # Takes the 4 history frames and creates a memory representation.
        history_pos_len = history_tokens.shape[-1]

        history_emb = nn.Embed(num_embeddings=self.num_embeddings, features=self.embed_dim)(history_tokens)
        history_pos_emb = self.param('history_pos_embedding', nn.initializers.normal(stddev=0.02), (1, history_pos_len, self.embed_dim))
        encoder_input = history_emb + history_pos_emb

        target_pe = self.param('target_pos_embedding', nn.initializers.normal(stddev=0.02), (1, self.tokens_per_frame, self.embed_dim))
        target_pe = target_pe.repeat(history_tokens.shape[0], axis=0)
        target_context = nn.Dense(self.embed_dim)(nn.gelu(nn.Dense(self.embed_dim * 4)(target_pe)))
        act = nn.Dense(self.embed_dim)(nn.gelu(nn.Dense(self.embed_dim * 4)(target_pe)))

        context = jnp.concatenate([encoder_input, target_context], axis=1)

        for _ in range(self.num_layers):
            # Self-attention on target tokens + Cross-attention to the encoder's memory
            act += nn.MultiHeadDotProductAttention(
                num_heads=self.num_heads, dtype=jnp.bfloat16
            )(inputs_q=act, inputs_kv=context)
            # Feed-forward layer
            act += nn.Dense(self.embed_dim)(nn.gelu(nn.Dense(self.embed_dim * 4)(act)))

        logits = nn.Dense(features=self.num_embeddings)(act)
        return logits


@partial(jax.jit, static_argnums=(5, 6))
def train_step(transformer_state, vqvae_state, points, velocities, key, model, history_len):
    # @partial(jax.jit, static_argnums=(3,))
    # def train_step(state, batch, key, model):
    tokens = get_batch(points, velocities, history_len, vqvae_state)
    history_tokens = tokens[:, :history_len * tokens_per_frame]
    target_tokens = tokens[:, history_len * tokens_per_frame:]

    dropout_key = jax.random.fold_in(key, transformer_state.step)

    def loss_fn(params):
        # The model only takes the history as input now
        logits = model.apply(
            {'params': params},
            history_tokens,
            rngs={'dropout': dropout_key}
        )
        # The loss is calculated between the output logits and the actual target frame
        loss = optax.softmax_cross_entropy_with_integer_labels(
            logits=logits, labels=target_tokens
        ).mean()
        return loss

    loss, grads = jax.value_and_grad(loss_fn)(transformer_state.params)
    transformer_state = transformer_state.apply_gradients(grads=grads)
    return transformer_state, loss


@partial(jax.jit, static_argnums=(1, 2, 5))
def generate_trajectory(transformer_state, history_len, tokens_per_frame, key, initial_history_tokens, num_predictions):
    """
    Generates a trajectory by predicting one full frame at a time.
    """
    def generate_next_frame_and_update_history(history_buffer, key):
        # Predict the logits for the next frame in a single forward pass
        logits = transformer_state.apply_fn(
            {'params': transformer_state.params},
            history_buffer[jnp.newaxis, :], # Add batch dim
            rngs={'dropout': key}
        )

        # Sample from the logits to get the new frame's tokens
        predicted_frame = jax.random.categorical(key, logits, axis=-1).squeeze(0)

        # Create the new history for the next iteration by sliding the window
        new_history_buffer = jnp.concatenate([history_buffer[tokens_per_frame:], predicted_frame])

        return new_history_buffer, predicted_frame

    # Run the scan to generate all predicted frames
    keys = jax.random.split(key, num_predictions)
    _, all_predicted_frames = jax.lax.scan(
        generate_next_frame_and_update_history,
        initial_history_tokens,
        keys
    )

    return all_predicted_frames

In [None]:
# --- Initialize Transformer Model ---
print("🤖 Initializing Transformer model...")
oneshot_model = OneshotFrameTransformer(
    num_embeddings=vq_num_embeddings, num_heads=transformer_num_heads, num_layers=transformer_num_layers,
    embed_dim=transformer_embed_dim, tokens_per_frame=tokens_per_frame
)
key, transformer_key, dropout_key = jax.random.split(key, 3)
dummy_transformer_input = jnp.ones((1, history_len * tokens_per_frame), dtype=jnp.int32)
oneshot_params = oneshot_model.init(
    {'params': transformer_key, 'dropout': dropout_key}, dummy_transformer_input
)['params']
oneshot_state = train_state.TrainState.create(
    apply_fn=oneshot_model.apply, params=oneshot_params, tx=optax.adam(learning_rate=transformer_lr)
)

# --- Define an absolute directory for checkpoints ---
os_ckpt_dir = os.path.abspath('./oneshot_transformer_checkpoints')
os.makedirs(os_ckpt_dir, exist_ok=True)

# --- Restore checkpoint if one exists ---
# This will load the latest checkpoint and update the state object in place.
# If no checkpoint exists, it returns the original state object.
oneshot_state = checkpoints.restore_checkpoint(ckpt_dir=os_ckpt_dir, target=oneshot_state)
print(f"Resuming training from step: {int(oneshot_state.step)}")

🤖 Initializing Transformer model...
Resuming training from step: 0


In [None]:
# --- Phase: Train the Transformer ---
print("🚀 Starting Transformer training...")
for epoch in range(transformer_epochs):
    data_loader = create_point_cloud_dataset(
        transformer_batch_size, history_len, num_points
    )
    pbar = tqdm(data_loader, desc=f"Epoch {epoch + 1}/{transformer_epochs}")
    total_loss = 0
    for i, (points, velocities) in enumerate(pbar):
        key, train_key = jax.random.split(key)
        oneshot_state, loss = train_step(
            oneshot_state, vqvae_state, points, velocities, train_key, oneshot_model,
            history_len
        )
        total_loss += loss
        pbar.set_postfix(loss=f"{loss:.4f}")

    if epoch % 50 == 0:
        # --- Save checkpoint at the end of the epoch ---
        checkpoints.save_checkpoint(
            ckpt_dir=os_ckpt_dir,
            target=oneshot_state,
            step=oneshot_state.step,
            overwrite=False, # Set to True to save only the latest checkpoint
            keep=3          # Keep the 3 most recent checkpoints
        )
    print(f"Epoch {epoch + 1} Avg Loss: {total_loss / (i+1):.4f}")

🚀 Starting Transformer training...


Epoch 1/1000: 39it [00:16,  2.32it/s, loss=4.4526]


Epoch 1 Avg Loss: 4.4385


Epoch 2/1000: 39it [00:08,  4.55it/s, loss=4.4381]


Epoch 2 Avg Loss: 4.4399


Epoch 3/1000: 39it [00:08,  4.54it/s, loss=4.4610]


Epoch 3 Avg Loss: 4.4403


Epoch 4/1000: 39it [00:08,  4.53it/s, loss=4.4318]


Epoch 4 Avg Loss: 4.4366


Epoch 5/1000: 39it [00:08,  4.53it/s, loss=4.4308]


Epoch 5 Avg Loss: 4.4399


Epoch 6/1000: 39it [00:08,  4.52it/s, loss=4.4304]


Epoch 6 Avg Loss: 4.4380


Epoch 7/1000: 39it [00:08,  4.51it/s, loss=4.4285]


Epoch 7 Avg Loss: 4.4373


Epoch 8/1000: 39it [00:08,  4.51it/s, loss=4.4375]


Epoch 8 Avg Loss: 4.4391


Epoch 9/1000: 39it [00:08,  4.50it/s, loss=4.4360]


Epoch 9 Avg Loss: 4.4419


Epoch 10/1000: 39it [00:08,  4.50it/s, loss=4.4257]


Epoch 10 Avg Loss: 4.4414


Epoch 11/1000: 39it [00:08,  4.49it/s, loss=4.4484]


Epoch 11 Avg Loss: 4.4402


Epoch 12/1000: 39it [00:08,  4.49it/s, loss=4.4408]


Epoch 12 Avg Loss: 4.4395


Epoch 13/1000: 39it [00:08,  4.48it/s, loss=4.4375]


Epoch 13 Avg Loss: 4.4395


Epoch 14/1000: 39it [00:08,  4.48it/s, loss=4.4496]


Epoch 14 Avg Loss: 4.4382


Epoch 15/1000: 39it [00:08,  4.48it/s, loss=4.4299]


Epoch 15 Avg Loss: 4.4389


Epoch 16/1000: 39it [00:08,  4.48it/s, loss=4.4392]


Epoch 16 Avg Loss: 4.4376


Epoch 17/1000: 39it [00:08,  4.48it/s, loss=4.4223]


Epoch 17 Avg Loss: 4.4357


Epoch 18/1000: 39it [00:08,  4.49it/s, loss=4.4348]


Epoch 18 Avg Loss: 4.4324


Epoch 19/1000: 39it [00:08,  4.49it/s, loss=4.4348]


Epoch 19 Avg Loss: 4.4354


Epoch 20/1000: 39it [00:08,  4.49it/s, loss=4.4458]


Epoch 20 Avg Loss: 4.4391


Epoch 21/1000: 39it [00:08,  4.49it/s, loss=4.4333]


Epoch 21 Avg Loss: 4.4353


Epoch 22/1000: 39it [00:08,  4.49it/s, loss=4.4089]


Epoch 22 Avg Loss: 4.4336


Epoch 23/1000: 39it [00:08,  4.49it/s, loss=4.4374]


Epoch 23 Avg Loss: 4.4314


Epoch 24/1000: 39it [00:08,  4.48it/s, loss=4.4216]


Epoch 24 Avg Loss: 4.4318


Epoch 25/1000: 39it [00:08,  4.48it/s, loss=4.4141]


Epoch 25 Avg Loss: 4.4281


Epoch 26/1000: 39it [00:08,  4.48it/s, loss=4.4234]


Epoch 26 Avg Loss: 4.4297


Epoch 27/1000: 39it [00:08,  4.48it/s, loss=4.4472]


Epoch 27 Avg Loss: 4.4330


Epoch 28/1000: 39it [00:08,  4.48it/s, loss=4.4322]


Epoch 28 Avg Loss: 4.4294


Epoch 29/1000: 39it [00:08,  4.48it/s, loss=4.4323]


Epoch 29 Avg Loss: 4.4280


Epoch 30/1000: 39it [00:08,  4.48it/s, loss=4.4244]


Epoch 30 Avg Loss: 4.4254


Epoch 31/1000: 39it [00:08,  4.48it/s, loss=4.4395]


Epoch 31 Avg Loss: 4.4310


Epoch 32/1000: 39it [00:08,  4.49it/s, loss=4.4283]


Epoch 32 Avg Loss: 4.4256


Epoch 33/1000: 39it [00:08,  4.49it/s, loss=4.4391]


Epoch 33 Avg Loss: 4.4278


Epoch 34/1000: 39it [00:08,  4.49it/s, loss=4.4221]


Epoch 34 Avg Loss: 4.4262


Epoch 35/1000: 39it [00:08,  4.48it/s, loss=4.4272]


Epoch 35 Avg Loss: 4.4282


Epoch 36/1000: 39it [00:08,  4.49it/s, loss=4.4217]


Epoch 36 Avg Loss: 4.4241


Epoch 37/1000: 39it [00:08,  4.48it/s, loss=4.3992]


Epoch 37 Avg Loss: 4.4268


Epoch 38/1000: 39it [00:08,  4.49it/s, loss=4.4298]


Epoch 38 Avg Loss: 4.4244


Epoch 39/1000: 39it [00:08,  4.49it/s, loss=4.4415]


Epoch 39 Avg Loss: 4.4226


Epoch 40/1000: 39it [00:08,  4.48it/s, loss=4.4369]


Epoch 40 Avg Loss: 4.4213


Epoch 41/1000: 39it [00:08,  4.48it/s, loss=4.4030]


Epoch 41 Avg Loss: 4.4207


Epoch 42/1000: 39it [00:08,  4.48it/s, loss=4.4231]


Epoch 42 Avg Loss: 4.4203


Epoch 43/1000: 39it [00:08,  4.49it/s, loss=4.4207]


Epoch 43 Avg Loss: 4.4154


Epoch 44/1000: 39it [00:08,  4.48it/s, loss=4.4142]


Epoch 44 Avg Loss: 4.4156


Epoch 45/1000: 39it [00:08,  4.48it/s, loss=4.4158]


Epoch 45 Avg Loss: 4.4151


Epoch 46/1000: 39it [00:08,  4.48it/s, loss=4.4129]


Epoch 46 Avg Loss: 4.4148


Epoch 47/1000: 39it [00:08,  4.48it/s, loss=4.4131]


Epoch 47 Avg Loss: 4.4147


Epoch 48/1000: 39it [00:08,  4.49it/s, loss=4.4107]


Epoch 48 Avg Loss: 4.4127


Epoch 49/1000: 39it [00:08,  4.48it/s, loss=4.4069]


Epoch 49 Avg Loss: 4.4118


Epoch 50/1000: 39it [00:08,  4.48it/s, loss=4.4003]


Epoch 50 Avg Loss: 4.4057


Epoch 51/1000: 39it [00:08,  4.48it/s, loss=4.4091]


Epoch 51 Avg Loss: 4.4075


Epoch 52/1000: 39it [00:08,  4.48it/s, loss=4.4016]


Epoch 52 Avg Loss: 4.4087


Epoch 53/1000: 39it [00:08,  4.48it/s, loss=4.3996]


Epoch 53 Avg Loss: 4.4017


Epoch 54/1000: 39it [00:08,  4.48it/s, loss=4.4152]


Epoch 54 Avg Loss: 4.3985


Epoch 55/1000: 39it [00:08,  4.48it/s, loss=4.3905]


Epoch 55 Avg Loss: 4.3953


Epoch 56/1000: 39it [00:08,  4.48it/s, loss=4.4076]


Epoch 56 Avg Loss: 4.3991


Epoch 57/1000: 39it [00:08,  4.49it/s, loss=4.3947]


Epoch 57 Avg Loss: 4.3927


Epoch 58/1000: 39it [00:08,  4.48it/s, loss=4.3793]


Epoch 58 Avg Loss: 4.3856


Epoch 59/1000: 39it [00:08,  4.49it/s, loss=4.3798]


Epoch 59 Avg Loss: 4.3748


Epoch 60/1000: 39it [00:08,  4.49it/s, loss=4.3465]


Epoch 60 Avg Loss: 4.3623


Epoch 61/1000: 39it [00:08,  4.48it/s, loss=4.3309]


Epoch 61 Avg Loss: 4.3498


Epoch 62/1000: 39it [00:08,  4.48it/s, loss=4.3219]


Epoch 62 Avg Loss: 4.3287


Epoch 63/1000: 39it [00:08,  4.48it/s, loss=4.2961]


Epoch 63 Avg Loss: 4.3109


Epoch 64/1000: 39it [00:08,  4.48it/s, loss=4.2602]


Epoch 64 Avg Loss: 4.2846


Epoch 65/1000: 39it [00:08,  4.48it/s, loss=4.2443]


Epoch 65 Avg Loss: 4.2543


Epoch 66/1000: 39it [00:08,  4.48it/s, loss=4.2617]


Epoch 66 Avg Loss: 4.2360


Epoch 67/1000: 39it [00:08,  4.48it/s, loss=4.1820]


Epoch 67 Avg Loss: 4.2049


Epoch 68/1000: 39it [00:08,  4.48it/s, loss=4.1360]


Epoch 68 Avg Loss: 4.1668


Epoch 69/1000: 39it [00:08,  4.49it/s, loss=4.1022]


Epoch 69 Avg Loss: 4.1365


Epoch 70/1000: 39it [00:08,  4.49it/s, loss=4.0552]


Epoch 70 Avg Loss: 4.0919


Epoch 71/1000: 39it [00:08,  4.48it/s, loss=4.0805]


Epoch 71 Avg Loss: 4.1631


Epoch 72/1000: 39it [00:08,  4.48it/s, loss=3.9595]


Epoch 72 Avg Loss: 4.0149


Epoch 73/1000: 39it [00:08,  4.49it/s, loss=3.8620]


Epoch 73 Avg Loss: 3.8983


Epoch 74/1000: 39it [00:08,  4.49it/s, loss=4.1856]


Epoch 74 Avg Loss: 3.7631


Epoch 75/1000: 39it [00:08,  4.48it/s, loss=3.4544]


Epoch 75 Avg Loss: 3.7313


Epoch 76/1000: 39it [00:08,  4.48it/s, loss=3.2642]


Epoch 76 Avg Loss: 3.3031


Epoch 77/1000: 39it [00:08,  4.48it/s, loss=2.9355]


Epoch 77 Avg Loss: 3.0822


Epoch 78/1000: 39it [00:08,  4.49it/s, loss=2.6638]


Epoch 78 Avg Loss: 3.0087


Epoch 79/1000: 39it [00:08,  4.49it/s, loss=2.6058]


Epoch 79 Avg Loss: 2.7090


Epoch 80/1000: 39it [00:08,  4.49it/s, loss=2.2441]


Epoch 80 Avg Loss: 2.3755


Epoch 81/1000: 39it [00:08,  4.49it/s, loss=2.1894]


Epoch 81 Avg Loss: 2.2362


Epoch 82/1000: 39it [00:08,  4.49it/s, loss=2.4102]


Epoch 82 Avg Loss: 2.8212


Epoch 83/1000: 39it [00:08,  4.48it/s, loss=2.0238]


Epoch 83 Avg Loss: 2.1465


Epoch 84/1000: 39it [00:08,  4.48it/s, loss=1.9840]


Epoch 84 Avg Loss: 2.0063


Epoch 85/1000: 39it [00:08,  4.49it/s, loss=1.9401]


Epoch 85 Avg Loss: 1.9632


Epoch 86/1000: 39it [00:08,  4.49it/s, loss=1.9141]


Epoch 86 Avg Loss: 1.9255


Epoch 87/1000: 39it [00:08,  4.49it/s, loss=1.8884]


Epoch 87 Avg Loss: 1.8991


Epoch 88/1000: 39it [00:08,  4.49it/s, loss=1.8690]


Epoch 88 Avg Loss: 1.8859


Epoch 89/1000: 39it [00:08,  4.49it/s, loss=1.8751]


Epoch 89 Avg Loss: 1.8741


Epoch 90/1000: 39it [00:08,  4.49it/s, loss=1.8928]


Epoch 90 Avg Loss: 1.8875


Epoch 91/1000: 39it [00:08,  4.49it/s, loss=1.8515]


Epoch 91 Avg Loss: 1.8740


Epoch 92/1000: 39it [00:08,  4.49it/s, loss=1.8537]


Epoch 92 Avg Loss: 1.8474


Epoch 93/1000: 39it [00:08,  4.49it/s, loss=1.8107]


Epoch 93 Avg Loss: 1.8477


Epoch 94/1000: 39it [00:08,  4.49it/s, loss=1.8018]


Epoch 94 Avg Loss: 1.8188


Epoch 95/1000: 39it [00:08,  4.49it/s, loss=1.7956]


Epoch 95 Avg Loss: 1.8061


Epoch 96/1000: 39it [00:08,  4.49it/s, loss=1.7665]


Epoch 96 Avg Loss: 1.7931


Epoch 97/1000: 39it [00:08,  4.49it/s, loss=1.7670]


Epoch 97 Avg Loss: 1.7853


Epoch 98/1000: 39it [00:08,  4.49it/s, loss=1.7791]


Epoch 98 Avg Loss: 1.7789


Epoch 99/1000: 39it [00:08,  4.49it/s, loss=1.7579]


Epoch 99 Avg Loss: 1.7664


Epoch 100/1000: 39it [00:08,  4.49it/s, loss=1.7518]


Epoch 100 Avg Loss: 1.7537


Epoch 101/1000: 39it [00:08,  4.49it/s, loss=1.7518]


Epoch 101 Avg Loss: 1.7456


Epoch 102/1000: 39it [00:08,  4.49it/s, loss=1.7282]


Epoch 102 Avg Loss: 1.7394


Epoch 103/1000: 39it [00:08,  4.49it/s, loss=1.7451]


Epoch 103 Avg Loss: 1.7332


Epoch 104/1000: 39it [00:08,  4.49it/s, loss=1.7282]


Epoch 104 Avg Loss: 1.7297


Epoch 105/1000: 39it [00:08,  4.49it/s, loss=1.7255]


Epoch 105 Avg Loss: 1.7270


Epoch 106/1000: 39it [00:08,  4.48it/s, loss=1.7259]


Epoch 106 Avg Loss: 1.7213


Epoch 107/1000: 39it [00:08,  4.49it/s, loss=1.7076]


Epoch 107 Avg Loss: 1.7178


Epoch 108/1000: 39it [00:08,  4.49it/s, loss=1.6926]


Epoch 108 Avg Loss: 1.7125


Epoch 109/1000: 39it [00:08,  4.49it/s, loss=1.7202]


Epoch 109 Avg Loss: 1.7071


Epoch 110/1000: 39it [00:08,  4.49it/s, loss=1.7185]


Epoch 110 Avg Loss: 1.7044


Epoch 111/1000: 39it [00:08,  4.49it/s, loss=1.6935]


Epoch 111 Avg Loss: 1.7000


Epoch 112/1000: 39it [00:08,  4.49it/s, loss=1.7099]


Epoch 112 Avg Loss: 1.6985


Epoch 113/1000: 39it [00:08,  4.49it/s, loss=1.7081]


Epoch 113 Avg Loss: 1.6984


Epoch 114/1000: 39it [00:08,  4.49it/s, loss=1.6885]


Epoch 114 Avg Loss: 1.6957


Epoch 115/1000: 39it [00:08,  4.49it/s, loss=1.6902]


Epoch 115 Avg Loss: 1.6929


Epoch 116/1000: 39it [00:08,  4.49it/s, loss=1.6905]


Epoch 116 Avg Loss: 1.6962


Epoch 117/1000: 39it [00:08,  4.49it/s, loss=1.6985]


Epoch 117 Avg Loss: 1.6893


Epoch 118/1000: 39it [00:08,  4.48it/s, loss=1.6810]


Epoch 118 Avg Loss: 1.6857


Epoch 119/1000: 39it [00:08,  4.49it/s, loss=1.6828]


Epoch 119 Avg Loss: 1.6830


Epoch 120/1000: 39it [00:08,  4.49it/s, loss=1.6878]


Epoch 120 Avg Loss: 1.6801


Epoch 121/1000: 39it [00:08,  4.49it/s, loss=1.6692]


Epoch 121 Avg Loss: 1.6766


Epoch 122/1000: 39it [00:08,  4.48it/s, loss=1.6828]


Epoch 122 Avg Loss: 1.6783


Epoch 123/1000: 39it [00:08,  4.49it/s, loss=1.6805]


Epoch 123 Avg Loss: 1.6734


Epoch 124/1000: 39it [00:08,  4.49it/s, loss=1.6670]


Epoch 124 Avg Loss: 1.6723


Epoch 125/1000: 39it [00:08,  4.48it/s, loss=1.6831]


Epoch 125 Avg Loss: 1.6719


Epoch 126/1000: 39it [00:08,  4.48it/s, loss=1.6785]


Epoch 126 Avg Loss: 1.6666


Epoch 127/1000: 39it [00:08,  4.49it/s, loss=1.6651]


Epoch 127 Avg Loss: 1.6675


Epoch 128/1000: 39it [00:08,  4.49it/s, loss=1.6742]


Epoch 128 Avg Loss: 1.6659


Epoch 129/1000: 39it [00:08,  4.49it/s, loss=1.6661]


Epoch 129 Avg Loss: 1.6669


Epoch 130/1000: 39it [00:08,  4.49it/s, loss=1.6611]


Epoch 130 Avg Loss: 1.6594


Epoch 131/1000: 39it [00:08,  4.49it/s, loss=1.6477]


Epoch 131 Avg Loss: 1.6560


Epoch 132/1000: 39it [00:08,  4.49it/s, loss=1.6574]


Epoch 132 Avg Loss: 1.6531


Epoch 133/1000: 39it [00:08,  4.49it/s, loss=1.6590]


Epoch 133 Avg Loss: 1.6526


Epoch 134/1000: 39it [00:08,  4.49it/s, loss=1.6434]


Epoch 134 Avg Loss: 1.6521


Epoch 135/1000: 39it [00:08,  4.49it/s, loss=1.6587]


Epoch 135 Avg Loss: 1.6535


Epoch 136/1000: 39it [00:08,  4.48it/s, loss=1.6434]


Epoch 136 Avg Loss: 1.6521


Epoch 137/1000: 39it [00:08,  4.49it/s, loss=1.6329]


Epoch 137 Avg Loss: 1.6487


Epoch 138/1000: 39it [00:08,  4.49it/s, loss=1.6387]


Epoch 138 Avg Loss: 1.6471


Epoch 139/1000: 39it [00:08,  4.48it/s, loss=1.8753]


Epoch 139 Avg Loss: 1.6521


Epoch 140/1000: 39it [00:08,  4.49it/s, loss=2.4667]


Epoch 140 Avg Loss: 3.6911


Epoch 141/1000: 39it [00:08,  4.48it/s, loss=2.2128]


Epoch 141 Avg Loss: 2.3032


Epoch 142/1000: 39it [00:08,  4.48it/s, loss=2.1969]


Epoch 142 Avg Loss: 2.2067


Epoch 143/1000: 39it [00:08,  4.49it/s, loss=2.1300]


Epoch 143 Avg Loss: 2.1523


Epoch 144/1000: 39it [00:08,  4.48it/s, loss=2.0880]


Epoch 144 Avg Loss: 2.1038


Epoch 145/1000: 39it [00:08,  4.48it/s, loss=2.0603]


Epoch 145 Avg Loss: 2.0809


Epoch 146/1000: 39it [00:08,  4.49it/s, loss=2.0424]


Epoch 146 Avg Loss: 2.0559


Epoch 147/1000: 39it [00:08,  4.49it/s, loss=2.0084]


Epoch 147 Avg Loss: 2.0276


Epoch 148/1000: 39it [00:08,  4.49it/s, loss=2.0008]


Epoch 148 Avg Loss: 2.0177


Epoch 149/1000: 39it [00:08,  4.49it/s, loss=2.0233]


Epoch 149 Avg Loss: 2.0106


Epoch 150/1000: 39it [00:08,  4.49it/s, loss=2.0058]


Epoch 150 Avg Loss: 2.0034


Epoch 151/1000: 39it [00:08,  4.49it/s, loss=2.0017]


Epoch 151 Avg Loss: 2.0026


Epoch 152/1000: 39it [00:08,  4.49it/s, loss=1.9973]


Epoch 152 Avg Loss: 1.9983


Epoch 153/1000: 39it [00:08,  4.49it/s, loss=2.0055]


Epoch 153 Avg Loss: 1.9977


Epoch 154/1000: 39it [00:08,  4.48it/s, loss=1.9940]


Epoch 154 Avg Loss: 1.9956


Epoch 155/1000: 39it [00:08,  4.49it/s, loss=1.9677]


Epoch 155 Avg Loss: 1.9896


Epoch 156/1000: 39it [00:08,  4.48it/s, loss=1.9535]


Epoch 156 Avg Loss: 1.9647


Epoch 157/1000: 39it [00:08,  4.49it/s, loss=1.9661]


Epoch 157 Avg Loss: 1.9582


Epoch 158/1000: 39it [00:08,  4.48it/s, loss=1.9479]


Epoch 158 Avg Loss: 1.9539


Epoch 159/1000: 39it [00:08,  4.49it/s, loss=1.9522]


Epoch 159 Avg Loss: 1.9515


Epoch 160/1000: 39it [00:08,  4.48it/s, loss=1.9468]


Epoch 160 Avg Loss: 1.9506


Epoch 161/1000: 39it [00:08,  4.49it/s, loss=1.9350]


Epoch 161 Avg Loss: 1.9503


Epoch 162/1000: 39it [00:08,  4.49it/s, loss=1.9401]


Epoch 162 Avg Loss: 1.9441


Epoch 163/1000: 39it [00:08,  4.49it/s, loss=1.9496]


Epoch 163 Avg Loss: 1.9481


Epoch 164/1000: 39it [00:08,  4.49it/s, loss=1.9639]


Epoch 164 Avg Loss: 1.9443


Epoch 165/1000: 39it [00:08,  4.48it/s, loss=1.9186]


Epoch 165 Avg Loss: 1.9405


Epoch 166/1000: 39it [00:08,  4.49it/s, loss=1.9426]


Epoch 166 Avg Loss: 1.9384


Epoch 167/1000: 39it [00:08,  4.48it/s, loss=1.9275]


Epoch 167 Avg Loss: 1.9263


Epoch 168/1000: 39it [00:08,  4.48it/s, loss=1.9181]


Epoch 168 Avg Loss: 1.9215


Epoch 169/1000: 39it [00:08,  4.48it/s, loss=1.9185]


Epoch 169 Avg Loss: 1.9192


Epoch 170/1000: 39it [00:08,  4.48it/s, loss=1.9186]


Epoch 170 Avg Loss: 1.9226


Epoch 171/1000: 39it [00:08,  4.48it/s, loss=1.9110]


Epoch 171 Avg Loss: 1.9184


Epoch 172/1000: 39it [00:08,  4.48it/s, loss=1.8837]


Epoch 172 Avg Loss: 1.9080


Epoch 173/1000: 39it [00:08,  4.48it/s, loss=1.8696]


Epoch 173 Avg Loss: 1.8923


Epoch 174/1000: 39it [00:08,  4.48it/s, loss=1.8812]


Epoch 174 Avg Loss: 1.8892


Epoch 175/1000: 39it [00:08,  4.48it/s, loss=1.8872]


Epoch 175 Avg Loss: 1.8857


Epoch 176/1000: 39it [00:08,  4.48it/s, loss=1.8707]


Epoch 176 Avg Loss: 1.8857


Epoch 177/1000: 39it [00:08,  4.48it/s, loss=1.8977]


Epoch 177 Avg Loss: 1.8830


Epoch 178/1000: 39it [00:08,  4.48it/s, loss=1.8613]


Epoch 178 Avg Loss: 1.8824


Epoch 179/1000: 39it [00:08,  4.48it/s, loss=1.8823]


Epoch 179 Avg Loss: 1.8840


Epoch 180/1000: 39it [00:08,  4.48it/s, loss=1.8818]


Epoch 180 Avg Loss: 1.8786


Epoch 181/1000: 39it [00:08,  4.48it/s, loss=1.8785]


Epoch 181 Avg Loss: 1.8703


Epoch 182/1000: 39it [00:08,  4.48it/s, loss=1.8685]


Epoch 182 Avg Loss: 1.8681


Epoch 183/1000: 39it [00:08,  4.48it/s, loss=1.8643]


Epoch 183 Avg Loss: 1.8677


Epoch 184/1000: 39it [00:08,  4.48it/s, loss=1.8592]


Epoch 184 Avg Loss: 1.8639


Epoch 185/1000: 39it [00:08,  4.48it/s, loss=1.8501]


Epoch 185 Avg Loss: 1.8633


Epoch 186/1000: 39it [00:08,  4.48it/s, loss=1.8552]


Epoch 186 Avg Loss: 1.8604


Epoch 187/1000: 39it [00:08,  4.48it/s, loss=1.8575]


Epoch 187 Avg Loss: 1.8596


Epoch 188/1000: 39it [00:08,  4.48it/s, loss=1.8196]


Epoch 188 Avg Loss: 1.8471


Epoch 189/1000: 39it [00:08,  4.48it/s, loss=1.8286]


Epoch 189 Avg Loss: 1.8396


Epoch 190/1000: 39it [00:08,  4.48it/s, loss=1.8279]


Epoch 190 Avg Loss: 1.8350


Epoch 191/1000: 39it [00:08,  4.48it/s, loss=1.8385]


Epoch 191 Avg Loss: 1.8308


Epoch 192/1000: 39it [00:08,  4.48it/s, loss=1.8009]


Epoch 192 Avg Loss: 1.8282


Epoch 193/1000: 39it [00:08,  4.48it/s, loss=1.8312]


Epoch 193 Avg Loss: 1.8279


Epoch 194/1000: 39it [00:08,  4.48it/s, loss=1.8272]


Epoch 194 Avg Loss: 1.8248


Epoch 195/1000: 39it [00:08,  4.48it/s, loss=1.7990]


Epoch 195 Avg Loss: 1.8223


Epoch 196/1000: 39it [00:08,  4.48it/s, loss=1.8086]


Epoch 196 Avg Loss: 1.8230


Epoch 197/1000: 39it [00:08,  4.48it/s, loss=1.8331]


Epoch 197 Avg Loss: 1.8203


Epoch 198/1000: 39it [00:08,  4.48it/s, loss=1.8067]


Epoch 198 Avg Loss: 1.8119


Epoch 199/1000: 39it [00:08,  4.48it/s, loss=1.7964]


Epoch 199 Avg Loss: 1.8095


Epoch 200/1000: 39it [00:08,  4.48it/s, loss=1.8097]


Epoch 200 Avg Loss: 1.8049


Epoch 201/1000: 39it [00:08,  4.48it/s, loss=1.7856]


Epoch 201 Avg Loss: 1.7984


Epoch 202/1000: 39it [00:08,  4.49it/s, loss=1.7905]


Epoch 202 Avg Loss: 1.7871


Epoch 203/1000: 39it [00:08,  4.48it/s, loss=1.7799]


Epoch 203 Avg Loss: 1.7863


Epoch 204/1000: 39it [00:08,  4.48it/s, loss=1.7659]


Epoch 204 Avg Loss: 1.7809


Epoch 205/1000: 39it [00:08,  4.48it/s, loss=1.7578]


Epoch 205 Avg Loss: 1.7709


Epoch 206/1000: 39it [00:08,  4.48it/s, loss=1.7596]


Epoch 206 Avg Loss: 1.7623


Epoch 207/1000: 39it [00:08,  4.48it/s, loss=1.7334]


Epoch 207 Avg Loss: 1.7532


Epoch 208/1000: 39it [00:08,  4.48it/s, loss=1.7500]


Epoch 208 Avg Loss: 1.7540


Epoch 209/1000: 39it [00:08,  4.48it/s, loss=1.7395]


Epoch 209 Avg Loss: 1.7491


Epoch 210/1000: 39it [00:08,  4.48it/s, loss=1.7439]


Epoch 210 Avg Loss: 1.7435


Epoch 211/1000: 39it [00:08,  4.48it/s, loss=1.7408]


Epoch 211 Avg Loss: 1.7411


Epoch 212/1000: 39it [00:08,  4.49it/s, loss=1.7570]


Epoch 212 Avg Loss: 1.7461


Epoch 213/1000: 39it [00:08,  4.48it/s, loss=1.7061]


Epoch 213 Avg Loss: 1.7375


Epoch 214/1000: 39it [00:08,  4.48it/s, loss=1.7303]


Epoch 214 Avg Loss: 1.7243


Epoch 215/1000: 39it [00:08,  4.48it/s, loss=1.7042]


Epoch 215 Avg Loss: 1.7137


Epoch 216/1000: 39it [00:08,  4.49it/s, loss=1.7278]


Epoch 216 Avg Loss: 1.7138


Epoch 217/1000: 39it [00:08,  4.48it/s, loss=1.7110]


Epoch 217 Avg Loss: 1.7112


Epoch 218/1000: 39it [00:08,  4.48it/s, loss=1.7102]


Epoch 218 Avg Loss: 1.7080


Epoch 219/1000: 39it [00:08,  4.48it/s, loss=1.7106]


Epoch 219 Avg Loss: 1.7056


Epoch 220/1000: 39it [00:08,  4.48it/s, loss=1.7045]


Epoch 220 Avg Loss: 1.7035


Epoch 221/1000: 39it [00:08,  4.48it/s, loss=1.6944]


Epoch 221 Avg Loss: 1.6992


Epoch 222/1000: 39it [00:08,  4.48it/s, loss=1.6977]


Epoch 222 Avg Loss: 1.7004


Epoch 223/1000: 39it [00:08,  4.48it/s, loss=1.6874]


Epoch 223 Avg Loss: 1.6929


Epoch 224/1000: 39it [00:08,  4.48it/s, loss=1.7012]


Epoch 224 Avg Loss: 1.6863


Epoch 225/1000: 39it [00:08,  4.48it/s, loss=1.6658]


Epoch 225 Avg Loss: 1.6800


Epoch 226/1000: 39it [00:08,  4.48it/s, loss=1.6889]


Epoch 226 Avg Loss: 1.6769


Epoch 227/1000: 39it [00:08,  4.48it/s, loss=1.6721]


Epoch 227 Avg Loss: 1.6733


Epoch 228/1000: 39it [00:08,  4.48it/s, loss=1.6793]


Epoch 228 Avg Loss: 1.6724


Epoch 229/1000: 39it [00:08,  4.48it/s, loss=1.6582]


Epoch 229 Avg Loss: 1.6659


Epoch 230/1000: 39it [00:08,  4.48it/s, loss=1.6638]


Epoch 230 Avg Loss: 1.6684


Epoch 231/1000: 39it [00:08,  4.48it/s, loss=1.6498]


Epoch 231 Avg Loss: 1.6636


Epoch 232/1000: 39it [00:08,  4.47it/s, loss=1.6749]


Epoch 232 Avg Loss: 1.6675


Epoch 233/1000: 39it [00:08,  4.48it/s, loss=1.6689]


Epoch 233 Avg Loss: 1.6697


Epoch 234/1000: 39it [00:08,  4.48it/s, loss=1.6588]


Epoch 234 Avg Loss: 1.6561


Epoch 235/1000: 39it [00:08,  4.47it/s, loss=1.6397]


Epoch 235 Avg Loss: 1.6520


Epoch 236/1000: 39it [00:08,  4.48it/s, loss=1.6533]


Epoch 236 Avg Loss: 1.6518


Epoch 237/1000: 39it [00:08,  4.48it/s, loss=1.6478]


Epoch 237 Avg Loss: 1.6502


Epoch 238/1000: 39it [00:08,  4.48it/s, loss=1.6525]


Epoch 238 Avg Loss: 1.6443


Epoch 239/1000: 39it [00:08,  4.48it/s, loss=1.6342]


Epoch 239 Avg Loss: 1.6468


Epoch 240/1000: 39it [00:08,  4.47it/s, loss=1.6299]


Epoch 240 Avg Loss: 1.6371


Epoch 241/1000: 39it [00:08,  4.48it/s, loss=1.6308]


Epoch 241 Avg Loss: 1.6350


Epoch 242/1000: 39it [00:08,  4.48it/s, loss=1.6343]


Epoch 242 Avg Loss: 1.6304


Epoch 243/1000: 39it [00:08,  4.48it/s, loss=1.6067]


Epoch 243 Avg Loss: 1.6263


Epoch 244/1000: 39it [00:08,  4.48it/s, loss=1.6182]


Epoch 244 Avg Loss: 1.6211


Epoch 245/1000: 39it [00:08,  4.48it/s, loss=1.6304]


Epoch 245 Avg Loss: 1.6196


Epoch 246/1000: 39it [00:08,  4.47it/s, loss=1.6197]


Epoch 246 Avg Loss: 1.6152


Epoch 247/1000: 39it [00:08,  4.48it/s, loss=1.6035]


Epoch 247 Avg Loss: 1.6151


Epoch 248/1000: 39it [00:08,  4.48it/s, loss=1.6125]


Epoch 248 Avg Loss: 1.6130


Epoch 249/1000: 39it [00:08,  4.48it/s, loss=1.6185]


Epoch 249 Avg Loss: 1.6099


Epoch 250/1000: 39it [00:08,  4.48it/s, loss=1.6274]


Epoch 250 Avg Loss: 1.6075


Epoch 251/1000: 39it [00:08,  4.48it/s, loss=1.6012]


Epoch 251 Avg Loss: 1.6063


Epoch 252/1000: 39it [00:08,  4.48it/s, loss=1.5981]


Epoch 252 Avg Loss: 1.6041


Epoch 253/1000: 39it [00:08,  4.48it/s, loss=1.6069]


Epoch 253 Avg Loss: 1.6068


Epoch 254/1000: 39it [00:08,  4.47it/s, loss=1.6123]


Epoch 254 Avg Loss: 1.6046


Epoch 255/1000: 39it [00:08,  4.48it/s, loss=1.5884]


Epoch 255 Avg Loss: 1.6012


Epoch 256/1000: 39it [00:08,  4.48it/s, loss=1.5911]


Epoch 256 Avg Loss: 1.6017


Epoch 257/1000: 39it [00:08,  4.47it/s, loss=1.6024]


Epoch 257 Avg Loss: 1.6002


Epoch 258/1000: 39it [00:08,  4.48it/s, loss=1.6100]


Epoch 258 Avg Loss: 1.5979


Epoch 259/1000: 39it [00:08,  4.48it/s, loss=1.6044]


Epoch 259 Avg Loss: 1.6001


Epoch 260/1000: 39it [00:08,  4.48it/s, loss=1.6087]


Epoch 260 Avg Loss: 1.5996


Epoch 261/1000: 39it [00:08,  4.48it/s, loss=1.5998]


Epoch 261 Avg Loss: 1.5984


Epoch 262/1000: 39it [00:08,  4.48it/s, loss=1.6006]


Epoch 262 Avg Loss: 1.5984


Epoch 263/1000: 39it [00:08,  4.48it/s, loss=1.6008]


Epoch 263 Avg Loss: 1.5939


Epoch 264/1000: 39it [00:08,  4.47it/s, loss=1.6061]


Epoch 264 Avg Loss: 1.5953


Epoch 265/1000: 39it [00:08,  4.48it/s, loss=1.5859]


Epoch 265 Avg Loss: 1.5950


Epoch 266/1000: 39it [00:08,  4.48it/s, loss=1.6027]


Epoch 266 Avg Loss: 1.5951


Epoch 267/1000: 39it [00:08,  4.48it/s, loss=1.5914]


Epoch 267 Avg Loss: 1.5961


Epoch 268/1000: 39it [00:08,  4.48it/s, loss=1.5874]


Epoch 268 Avg Loss: 1.5953


Epoch 269/1000: 39it [00:08,  4.48it/s, loss=1.5829]


Epoch 269 Avg Loss: 1.5974


Epoch 270/1000: 39it [00:08,  4.48it/s, loss=1.5932]


Epoch 270 Avg Loss: 1.5937


Epoch 271/1000: 39it [00:08,  4.48it/s, loss=1.5938]


Epoch 271 Avg Loss: 1.5928


Epoch 272/1000: 39it [00:08,  4.48it/s, loss=1.5809]


Epoch 272 Avg Loss: 1.5897


Epoch 273/1000: 39it [00:08,  4.47it/s, loss=1.5810]


Epoch 273 Avg Loss: 1.5949


Epoch 274/1000: 39it [00:08,  4.48it/s, loss=1.6052]


Epoch 274 Avg Loss: 1.5936


Epoch 275/1000: 39it [00:08,  4.47it/s, loss=1.5952]


Epoch 275 Avg Loss: 1.5909


Epoch 276/1000: 39it [00:08,  4.48it/s, loss=1.5882]


Epoch 276 Avg Loss: 1.5885


Epoch 277/1000: 39it [00:08,  4.48it/s, loss=1.5750]


Epoch 277 Avg Loss: 1.5879


Epoch 278/1000: 39it [00:08,  4.48it/s, loss=1.5748]


Epoch 278 Avg Loss: 1.5908


Epoch 279/1000: 39it [00:08,  4.48it/s, loss=1.5774]


Epoch 279 Avg Loss: 1.5916


Epoch 280/1000: 39it [00:08,  4.48it/s, loss=1.5955]


Epoch 280 Avg Loss: 1.5893


Epoch 281/1000: 39it [00:08,  4.48it/s, loss=1.5949]


Epoch 281 Avg Loss: 1.5900


Epoch 282/1000: 39it [00:08,  4.48it/s, loss=1.5810]


Epoch 282 Avg Loss: 1.5861


Epoch 283/1000: 39it [00:08,  4.47it/s, loss=1.5856]


Epoch 283 Avg Loss: 1.5866


Epoch 284/1000: 39it [00:08,  4.47it/s, loss=1.5942]


Epoch 284 Avg Loss: 1.5882


Epoch 285/1000: 39it [00:08,  4.48it/s, loss=1.5828]


Epoch 285 Avg Loss: 1.5864


Epoch 286/1000: 39it [00:08,  4.48it/s, loss=1.5901]


Epoch 286 Avg Loss: 1.5849


Epoch 287/1000: 39it [00:08,  4.48it/s, loss=1.5671]


Epoch 287 Avg Loss: 1.5850


Epoch 288/1000: 39it [00:08,  4.48it/s, loss=1.5797]


Epoch 288 Avg Loss: 1.5885


Epoch 289/1000: 39it [00:08,  4.48it/s, loss=2.5505]


Epoch 289 Avg Loss: 3.5205


Epoch 290/1000: 39it [00:08,  4.47it/s, loss=2.2726]


Epoch 290 Avg Loss: 2.3322


Epoch 291/1000: 39it [00:08,  4.48it/s, loss=2.2062]


Epoch 291 Avg Loss: 2.2232


Epoch 292/1000: 39it [00:08,  4.47it/s, loss=2.1853]


Epoch 292 Avg Loss: 2.1977


Epoch 293/1000: 39it [00:08,  4.47it/s, loss=2.1655]


Epoch 293 Avg Loss: 2.1648


Epoch 294/1000: 39it [00:08,  4.47it/s, loss=2.1226]


Epoch 294 Avg Loss: 2.1460


Epoch 295/1000: 39it [00:08,  4.48it/s, loss=2.1081]


Epoch 295 Avg Loss: 2.1284


Epoch 296/1000: 39it [00:08,  4.47it/s, loss=2.1074]


Epoch 296 Avg Loss: 2.1177


Epoch 297/1000: 39it [00:08,  4.47it/s, loss=2.1021]


Epoch 297 Avg Loss: 2.1155


Epoch 298/1000: 39it [00:08,  4.48it/s, loss=2.1162]


Epoch 298 Avg Loss: 2.1085


Epoch 299/1000: 39it [00:08,  4.47it/s, loss=2.0827]


Epoch 299 Avg Loss: 2.0985


Epoch 300/1000: 39it [00:08,  4.47it/s, loss=2.0834]


Epoch 300 Avg Loss: 2.0932


Epoch 301/1000: 39it [00:08,  4.47it/s, loss=2.0747]


Epoch 301 Avg Loss: 2.0777


Epoch 302/1000: 39it [00:08,  4.48it/s, loss=2.0590]


Epoch 302 Avg Loss: 2.0725


Epoch 303/1000: 39it [00:08,  4.47it/s, loss=2.0551]


Epoch 303 Avg Loss: 2.0629


Epoch 304/1000: 39it [00:08,  4.47it/s, loss=2.0678]


Epoch 304 Avg Loss: 2.0560


Epoch 305/1000: 39it [00:08,  4.47it/s, loss=2.0429]


Epoch 305 Avg Loss: 2.0508


Epoch 306/1000: 39it [00:08,  4.47it/s, loss=2.0450]


Epoch 306 Avg Loss: 2.0429


Epoch 307/1000: 39it [00:08,  4.47it/s, loss=2.0334]


Epoch 307 Avg Loss: 2.0354


Epoch 308/1000: 39it [00:08,  4.47it/s, loss=2.0054]


Epoch 308 Avg Loss: 2.0211


Epoch 309/1000: 39it [00:08,  4.47it/s, loss=1.9949]


Epoch 309 Avg Loss: 2.0066


Epoch 310/1000: 39it [00:08,  4.47it/s, loss=1.9919]


Epoch 310 Avg Loss: 1.9874


Epoch 311/1000: 39it [00:08,  4.47it/s, loss=1.9577]


Epoch 311 Avg Loss: 1.9714


Epoch 312/1000: 39it [00:08,  4.48it/s, loss=1.9414]


Epoch 312 Avg Loss: 1.9548


Epoch 313/1000: 39it [00:08,  4.47it/s, loss=1.9224]


Epoch 313 Avg Loss: 1.9357


Epoch 314/1000: 39it [00:08,  4.47it/s, loss=1.9016]


Epoch 314 Avg Loss: 1.9109


Epoch 315/1000: 39it [00:08,  4.47it/s, loss=1.8465]


Epoch 315 Avg Loss: 1.8783


Epoch 316/1000: 39it [00:08,  4.47it/s, loss=1.8466]


Epoch 316 Avg Loss: 1.8574


Epoch 317/1000: 39it [00:08,  4.47it/s, loss=1.8375]


Epoch 317 Avg Loss: 1.8403


Epoch 318/1000: 39it [00:08,  4.47it/s, loss=1.8156]


Epoch 318 Avg Loss: 1.8185


Epoch 319/1000: 39it [00:08,  4.47it/s, loss=1.8049]


Epoch 319 Avg Loss: 1.7920


Epoch 320/1000: 39it [00:08,  4.47it/s, loss=1.7662]


Epoch 320 Avg Loss: 1.7751


Epoch 321/1000: 39it [00:08,  4.47it/s, loss=1.7586]


Epoch 321 Avg Loss: 1.7697


Epoch 322/1000: 39it [00:08,  4.47it/s, loss=1.7680]


Epoch 322 Avg Loss: 1.7683


Epoch 323/1000: 39it [00:08,  4.48it/s, loss=1.7575]


Epoch 323 Avg Loss: 1.7644


Epoch 324/1000: 39it [00:08,  4.47it/s, loss=1.7576]


Epoch 324 Avg Loss: 1.7540


Epoch 325/1000: 39it [00:08,  4.47it/s, loss=1.7459]


Epoch 325 Avg Loss: 1.7484


Epoch 326/1000: 39it [00:08,  4.48it/s, loss=1.7581]


Epoch 326 Avg Loss: 1.7465


Epoch 327/1000: 39it [00:08,  4.47it/s, loss=1.7351]


Epoch 327 Avg Loss: 1.7434


Epoch 328/1000: 39it [00:08,  4.47it/s, loss=1.7308]


Epoch 328 Avg Loss: 1.7405


Epoch 329/1000: 39it [00:08,  4.47it/s, loss=1.7426]


Epoch 329 Avg Loss: 1.7380


Epoch 330/1000: 39it [00:08,  4.47it/s, loss=1.7397]


Epoch 330 Avg Loss: 1.7373


Epoch 331/1000: 39it [00:08,  4.47it/s, loss=1.7389]


Epoch 331 Avg Loss: 1.7347


Epoch 332/1000: 39it [00:08,  4.47it/s, loss=1.7308]


Epoch 332 Avg Loss: 1.7340


Epoch 333/1000: 39it [00:08,  4.47it/s, loss=1.7308]


Epoch 333 Avg Loss: 1.7301


Epoch 334/1000: 39it [00:08,  4.47it/s, loss=1.7318]


Epoch 334 Avg Loss: 1.7310


Epoch 335/1000: 39it [00:08,  4.47it/s, loss=1.7152]


Epoch 335 Avg Loss: 1.7287


Epoch 336/1000: 39it [00:08,  4.47it/s, loss=1.7410]


Epoch 336 Avg Loss: 1.7292


Epoch 337/1000: 39it [00:08,  4.48it/s, loss=1.7409]


Epoch 337 Avg Loss: 1.7272


Epoch 338/1000: 39it [00:08,  4.48it/s, loss=1.7115]


Epoch 338 Avg Loss: 1.7284


Epoch 339/1000: 39it [00:08,  4.48it/s, loss=1.7199]


Epoch 339 Avg Loss: 1.7293


Epoch 340/1000: 39it [00:08,  4.47it/s, loss=1.7250]


Epoch 340 Avg Loss: 1.7270


Epoch 341/1000: 39it [00:08,  4.48it/s, loss=1.7299]


Epoch 341 Avg Loss: 1.7246


Epoch 342/1000: 39it [00:08,  4.47it/s, loss=1.7338]


Epoch 342 Avg Loss: 1.7236


Epoch 343/1000: 39it [00:08,  4.47it/s, loss=1.7141]


Epoch 343 Avg Loss: 1.7218


Epoch 344/1000: 39it [00:08,  4.48it/s, loss=1.7201]


Epoch 344 Avg Loss: 1.7212


Epoch 345/1000: 39it [00:08,  4.47it/s, loss=1.7181]


Epoch 345 Avg Loss: 1.7229


Epoch 346/1000: 39it [00:08,  4.47it/s, loss=1.7121]


Epoch 346 Avg Loss: 1.7213


Epoch 347/1000: 39it [00:08,  4.47it/s, loss=1.7160]


Epoch 347 Avg Loss: 1.7162


Epoch 348/1000: 39it [00:08,  4.47it/s, loss=1.7079]


Epoch 348 Avg Loss: 1.7202


Epoch 349/1000: 39it [00:08,  4.48it/s, loss=1.7050]


Epoch 349 Avg Loss: 1.7170


Epoch 350/1000: 39it [00:08,  4.47it/s, loss=1.7073]


Epoch 350 Avg Loss: 1.7152


Epoch 351/1000: 39it [00:08,  4.47it/s, loss=1.7141]


Epoch 351 Avg Loss: 1.7204


Epoch 352/1000: 39it [00:08,  4.48it/s, loss=1.7080]


Epoch 352 Avg Loss: 1.7178


Epoch 353/1000: 39it [00:08,  4.48it/s, loss=1.7207]


Epoch 353 Avg Loss: 1.7166


Epoch 354/1000: 39it [00:08,  4.48it/s, loss=1.7274]


Epoch 354 Avg Loss: 1.7153


Epoch 355/1000: 39it [00:08,  4.48it/s, loss=1.7008]


Epoch 355 Avg Loss: 1.7127


Epoch 356/1000: 39it [00:08,  4.48it/s, loss=1.7207]


Epoch 356 Avg Loss: 1.7132


Epoch 357/1000: 39it [00:08,  4.48it/s, loss=1.6901]


Epoch 357 Avg Loss: 1.7101


Epoch 358/1000: 39it [00:08,  4.48it/s, loss=1.7186]


Epoch 358 Avg Loss: 1.7125


Epoch 359/1000: 39it [00:08,  4.48it/s, loss=1.6998]


Epoch 359 Avg Loss: 1.7133


Epoch 360/1000: 39it [00:08,  4.48it/s, loss=1.7049]


Epoch 360 Avg Loss: 1.7104


Epoch 361/1000: 39it [00:08,  4.47it/s, loss=1.6951]


Epoch 361 Avg Loss: 1.7123


Epoch 362/1000: 39it [00:08,  4.48it/s, loss=1.7118]


Epoch 362 Avg Loss: 1.7095


Epoch 363/1000: 39it [00:08,  4.48it/s, loss=1.7148]


Epoch 363 Avg Loss: 1.7086


Epoch 364/1000: 39it [00:08,  4.48it/s, loss=1.6958]


Epoch 364 Avg Loss: 1.7095


Epoch 365/1000: 39it [00:08,  4.47it/s, loss=1.7122]


Epoch 365 Avg Loss: 1.7090


Epoch 366/1000: 39it [00:08,  4.48it/s, loss=1.7158]


Epoch 366 Avg Loss: 1.7065


Epoch 367/1000: 39it [00:08,  4.48it/s, loss=1.7123]


Epoch 367 Avg Loss: 1.7084


Epoch 368/1000: 39it [00:08,  4.48it/s, loss=1.6990]


Epoch 368 Avg Loss: 1.7066


Epoch 369/1000: 39it [00:08,  4.48it/s, loss=1.7003]


Epoch 369 Avg Loss: 1.7033


Epoch 370/1000: 39it [00:08,  4.48it/s, loss=1.6974]


Epoch 370 Avg Loss: 1.7046


Epoch 371/1000: 39it [00:08,  4.47it/s, loss=1.7145]


Epoch 371 Avg Loss: 1.7069


Epoch 372/1000: 39it [00:08,  4.48it/s, loss=1.7074]


Epoch 372 Avg Loss: 1.7067


Epoch 373/1000: 39it [00:08,  4.48it/s, loss=1.7053]


Epoch 373 Avg Loss: 1.7051


Epoch 374/1000: 39it [00:08,  4.47it/s, loss=1.7110]


Epoch 374 Avg Loss: 1.7046


Epoch 375/1000: 39it [00:08,  4.48it/s, loss=1.6969]


Epoch 375 Avg Loss: 1.7039


Epoch 376/1000: 39it [00:08,  4.48it/s, loss=1.6943]


Epoch 376 Avg Loss: 1.6984


Epoch 377/1000: 39it [00:08,  4.48it/s, loss=1.6920]


Epoch 377 Avg Loss: 1.6972


Epoch 378/1000: 39it [00:08,  4.48it/s, loss=1.6901]


Epoch 378 Avg Loss: 1.6959


Epoch 379/1000: 39it [00:08,  4.48it/s, loss=1.6839]


Epoch 379 Avg Loss: 1.6956


Epoch 380/1000: 39it [00:08,  4.48it/s, loss=1.6927]


Epoch 380 Avg Loss: 1.6931


Epoch 381/1000: 39it [00:08,  4.48it/s, loss=1.7010]


Epoch 381 Avg Loss: 1.6943


Epoch 382/1000: 39it [00:08,  4.48it/s, loss=1.7039]


Epoch 382 Avg Loss: 1.6939


Epoch 383/1000: 39it [00:08,  4.48it/s, loss=1.6992]


Epoch 383 Avg Loss: 1.6942


Epoch 384/1000: 39it [00:08,  4.48it/s, loss=1.6802]


Epoch 384 Avg Loss: 1.6915


Epoch 385/1000: 39it [00:08,  4.48it/s, loss=1.6814]


Epoch 385 Avg Loss: 1.6902


Epoch 386/1000: 39it [00:08,  4.48it/s, loss=1.6960]


Epoch 386 Avg Loss: 1.6925


Epoch 387/1000: 39it [00:08,  4.48it/s, loss=1.6839]


Epoch 387 Avg Loss: 1.6902


Epoch 388/1000: 39it [00:08,  4.48it/s, loss=1.6964]


Epoch 388 Avg Loss: 1.6869


Epoch 389/1000: 39it [00:08,  4.48it/s, loss=1.6807]


Epoch 389 Avg Loss: 1.6919


Epoch 390/1000: 39it [00:08,  4.47it/s, loss=1.6787]


Epoch 390 Avg Loss: 1.6893


Epoch 391/1000: 39it [00:08,  4.48it/s, loss=1.6917]


Epoch 391 Avg Loss: 1.6910


Epoch 392/1000: 39it [00:08,  4.48it/s, loss=1.6814]


Epoch 392 Avg Loss: 1.6898


Epoch 393/1000: 39it [00:08,  4.48it/s, loss=1.6867]


Epoch 393 Avg Loss: 1.6891


Epoch 394/1000: 39it [00:08,  4.48it/s, loss=1.7084]


Epoch 394 Avg Loss: 1.6885


Epoch 395/1000: 39it [00:08,  4.48it/s, loss=1.6803]


Epoch 395 Avg Loss: 1.6904


Epoch 396/1000: 39it [00:08,  4.48it/s, loss=1.7009]


Epoch 396 Avg Loss: 1.6873


Epoch 397/1000: 39it [00:08,  4.47it/s, loss=1.6800]


Epoch 397 Avg Loss: 1.6841


Epoch 398/1000: 39it [00:08,  4.48it/s, loss=1.6789]


Epoch 398 Avg Loss: 1.6860


Epoch 399/1000: 39it [00:08,  4.48it/s, loss=1.6978]


Epoch 399 Avg Loss: 1.6884


Epoch 400/1000: 39it [00:08,  4.47it/s, loss=1.6823]


Epoch 400 Avg Loss: 1.6881


Epoch 401/1000: 39it [00:08,  4.47it/s, loss=1.6965]


Epoch 401 Avg Loss: 1.6879


Epoch 402/1000: 39it [00:08,  4.48it/s, loss=1.6995]


Epoch 402 Avg Loss: 1.6873


Epoch 403/1000: 39it [00:08,  4.47it/s, loss=1.6888]


Epoch 403 Avg Loss: 1.6860


Epoch 404/1000: 39it [00:08,  4.47it/s, loss=1.6854]


Epoch 404 Avg Loss: 1.6857


Epoch 405/1000: 39it [00:08,  4.48it/s, loss=1.6718]


Epoch 405 Avg Loss: 1.6808


Epoch 406/1000: 39it [00:08,  4.48it/s, loss=1.6809]


Epoch 406 Avg Loss: 1.6808


Epoch 407/1000: 39it [00:08,  4.48it/s, loss=1.6994]


Epoch 407 Avg Loss: 1.6836


Epoch 408/1000: 39it [00:08,  4.48it/s, loss=1.6824]


Epoch 408 Avg Loss: 1.6853


Epoch 409/1000: 39it [00:08,  4.48it/s, loss=1.6920]


Epoch 409 Avg Loss: 1.6852


Epoch 410/1000: 39it [00:08,  4.47it/s, loss=1.6772]


Epoch 410 Avg Loss: 1.6808


Epoch 411/1000: 39it [00:08,  4.48it/s, loss=1.6587]


Epoch 411 Avg Loss: 1.6704


Epoch 412/1000: 39it [00:08,  4.48it/s, loss=1.6841]


Epoch 412 Avg Loss: 1.6685


Epoch 413/1000: 39it [00:08,  4.48it/s, loss=1.6610]


Epoch 413 Avg Loss: 1.6709


Epoch 414/1000: 39it [00:08,  4.48it/s, loss=1.6677]


Epoch 414 Avg Loss: 1.6721


Epoch 415/1000: 39it [00:08,  4.47it/s, loss=1.6703]


Epoch 415 Avg Loss: 1.6716


Epoch 416/1000: 39it [00:08,  4.47it/s, loss=1.6835]


Epoch 416 Avg Loss: 1.6732


Epoch 417/1000: 39it [00:08,  4.48it/s, loss=1.6801]


Epoch 417 Avg Loss: 1.6682


Epoch 418/1000: 39it [00:08,  4.47it/s, loss=1.6917]


Epoch 418 Avg Loss: 1.6698


Epoch 419/1000: 39it [00:08,  4.48it/s, loss=1.6773]


Epoch 419 Avg Loss: 1.6689


Epoch 420/1000: 39it [00:08,  4.47it/s, loss=1.6914]


Epoch 420 Avg Loss: 1.6698


Epoch 421/1000: 39it [00:08,  4.48it/s, loss=1.6632]


Epoch 421 Avg Loss: 1.6691


Epoch 422/1000: 39it [00:08,  4.48it/s, loss=1.6768]


Epoch 422 Avg Loss: 1.6657


Epoch 423/1000: 39it [00:08,  4.48it/s, loss=1.6802]


Epoch 423 Avg Loss: 1.6680


Epoch 424/1000: 39it [00:08,  4.47it/s, loss=1.6695]


Epoch 424 Avg Loss: 1.6678


Epoch 425/1000: 39it [00:08,  4.47it/s, loss=1.6682]


Epoch 425 Avg Loss: 1.6659


Epoch 426/1000: 39it [00:08,  4.47it/s, loss=1.6676]


Epoch 426 Avg Loss: 1.6640


Epoch 427/1000: 39it [00:08,  4.47it/s, loss=1.6720]


Epoch 427 Avg Loss: 1.6656


Epoch 428/1000: 39it [00:08,  4.47it/s, loss=1.6605]


Epoch 428 Avg Loss: 1.6677


Epoch 429/1000: 39it [00:08,  4.47it/s, loss=1.6639]


Epoch 429 Avg Loss: 1.6665


Epoch 430/1000: 39it [00:08,  4.47it/s, loss=1.6554]


Epoch 430 Avg Loss: 1.6658


Epoch 431/1000: 39it [00:08,  4.47it/s, loss=1.6714]


Epoch 431 Avg Loss: 1.6644


Epoch 432/1000: 39it [00:08,  4.47it/s, loss=1.6548]


Epoch 432 Avg Loss: 1.6635


Epoch 433/1000: 39it [00:08,  4.47it/s, loss=1.6690]


Epoch 433 Avg Loss: 1.6665


Epoch 434/1000: 39it [00:08,  4.47it/s, loss=1.6611]


Epoch 434 Avg Loss: 1.6650


Epoch 435/1000: 39it [00:08,  4.48it/s, loss=1.6648]


Epoch 435 Avg Loss: 1.6643


Epoch 436/1000: 39it [00:08,  4.47it/s, loss=1.6621]


Epoch 436 Avg Loss: 1.6624


Epoch 437/1000: 39it [00:08,  4.47it/s, loss=1.6699]


Epoch 437 Avg Loss: 1.6643


Epoch 438/1000: 39it [00:08,  4.47it/s, loss=1.6755]


Epoch 438 Avg Loss: 1.6646


Epoch 439/1000: 39it [00:08,  4.47it/s, loss=1.6619]


Epoch 439 Avg Loss: 1.6594


Epoch 440/1000: 39it [00:08,  4.47it/s, loss=1.6664]


Epoch 440 Avg Loss: 1.6630


Epoch 441/1000: 39it [00:08,  4.47it/s, loss=1.6477]


Epoch 441 Avg Loss: 1.6612


Epoch 442/1000: 39it [00:08,  4.47it/s, loss=1.6798]


Epoch 442 Avg Loss: 1.6632


Epoch 443/1000: 39it [00:08,  4.47it/s, loss=1.6629]


Epoch 443 Avg Loss: 1.6621


Epoch 444/1000: 39it [00:08,  4.47it/s, loss=1.6565]


Epoch 444 Avg Loss: 1.6582


Epoch 445/1000: 39it [00:08,  4.47it/s, loss=1.8100]


Epoch 445 Avg Loss: 1.9646


Epoch 446/1000: 39it [00:08,  4.47it/s, loss=1.7261]


Epoch 446 Avg Loss: 1.7548


Epoch 447/1000: 39it [00:08,  4.47it/s, loss=1.7232]


Epoch 447 Avg Loss: 1.7211


Epoch 448/1000: 39it [00:08,  4.47it/s, loss=1.6975]


Epoch 448 Avg Loss: 1.7066


Epoch 449/1000: 39it [00:08,  4.47it/s, loss=1.6981]


Epoch 449 Avg Loss: 1.6976


Epoch 450/1000: 39it [00:08,  4.47it/s, loss=1.6809]


Epoch 450 Avg Loss: 1.6889


Epoch 451/1000: 39it [00:08,  4.48it/s, loss=1.6708]


Epoch 451 Avg Loss: 1.6817


Epoch 452/1000: 39it [00:08,  4.48it/s, loss=1.6710]


Epoch 452 Avg Loss: 1.6749


Epoch 453/1000: 39it [00:08,  4.47it/s, loss=1.6511]


Epoch 453 Avg Loss: 1.6602


Epoch 454/1000: 39it [00:08,  4.48it/s, loss=1.6663]


Epoch 454 Avg Loss: 1.6529


Epoch 455/1000: 39it [00:08,  4.48it/s, loss=1.6354]


Epoch 455 Avg Loss: 1.6428


Epoch 456/1000: 39it [00:08,  4.48it/s, loss=1.6380]


Epoch 456 Avg Loss: 1.6391


Epoch 457/1000: 39it [00:08,  4.47it/s, loss=1.6384]


Epoch 457 Avg Loss: 1.6354


Epoch 458/1000: 39it [00:08,  4.47it/s, loss=1.6246]


Epoch 458 Avg Loss: 1.6247


Epoch 459/1000: 39it [00:08,  4.48it/s, loss=1.6253]


Epoch 459 Avg Loss: 1.6208


Epoch 460/1000: 39it [00:08,  4.47it/s, loss=1.6057]


Epoch 460 Avg Loss: 1.6078


Epoch 461/1000: 39it [00:08,  4.47it/s, loss=1.6014]


Epoch 461 Avg Loss: 1.6002


Epoch 462/1000: 39it [00:08,  4.47it/s, loss=1.6065]


Epoch 462 Avg Loss: 1.6034


Epoch 463/1000: 39it [00:08,  4.47it/s, loss=1.5980]


Epoch 463 Avg Loss: 1.5968


Epoch 464/1000: 39it [00:08,  4.47it/s, loss=1.6171]


Epoch 464 Avg Loss: 1.5929


Epoch 465/1000: 39it [00:08,  4.48it/s, loss=1.5903]


Epoch 465 Avg Loss: 1.5962


Epoch 466/1000: 39it [00:08,  4.48it/s, loss=1.5968]


Epoch 466 Avg Loss: 1.5939


Epoch 467/1000: 39it [00:08,  4.48it/s, loss=1.5930]


Epoch 467 Avg Loss: 1.5919


Epoch 468/1000: 39it [00:08,  4.48it/s, loss=1.5879]


Epoch 468 Avg Loss: 1.5901


Epoch 469/1000: 39it [00:08,  4.47it/s, loss=1.5863]


Epoch 469 Avg Loss: 1.5938


Epoch 470/1000: 39it [00:08,  4.47it/s, loss=1.5852]


Epoch 470 Avg Loss: 1.5900


Epoch 471/1000: 39it [00:08,  4.47it/s, loss=1.5783]


Epoch 471 Avg Loss: 1.5900


Epoch 472/1000: 39it [00:08,  4.47it/s, loss=1.5806]


Epoch 472 Avg Loss: 1.5881


Epoch 473/1000: 39it [00:08,  4.47it/s, loss=1.5793]


Epoch 473 Avg Loss: 1.5893


Epoch 474/1000: 39it [00:08,  4.47it/s, loss=1.5991]


Epoch 474 Avg Loss: 1.5861


Epoch 475/1000: 39it [00:08,  4.47it/s, loss=1.5851]


Epoch 475 Avg Loss: 1.5881


Epoch 476/1000: 39it [00:08,  4.47it/s, loss=1.5907]


Epoch 476 Avg Loss: 1.5859


Epoch 477/1000: 39it [00:08,  4.48it/s, loss=1.5889]


Epoch 477 Avg Loss: 1.5865


Epoch 478/1000: 39it [00:08,  4.48it/s, loss=1.5846]


Epoch 478 Avg Loss: 1.5855


Epoch 479/1000: 39it [00:08,  4.47it/s, loss=1.5868]


Epoch 479 Avg Loss: 1.5832


Epoch 480/1000: 39it [00:08,  4.48it/s, loss=1.5767]


Epoch 480 Avg Loss: 1.5865


Epoch 481/1000: 39it [00:08,  4.48it/s, loss=1.5851]


Epoch 481 Avg Loss: 1.5838


Epoch 482/1000: 39it [00:08,  4.47it/s, loss=1.5829]


Epoch 482 Avg Loss: 1.5852


Epoch 483/1000: 39it [00:08,  4.48it/s, loss=1.5754]


Epoch 483 Avg Loss: 1.5823


Epoch 484/1000: 39it [00:08,  4.48it/s, loss=1.5916]


Epoch 484 Avg Loss: 1.5834


Epoch 485/1000: 39it [00:08,  4.48it/s, loss=1.5759]


Epoch 485 Avg Loss: 1.5839


Epoch 486/1000: 39it [00:08,  4.47it/s, loss=1.5835]


Epoch 486 Avg Loss: 1.5840


Epoch 487/1000: 39it [00:08,  4.47it/s, loss=1.5892]


Epoch 487 Avg Loss: 1.5821


Epoch 488/1000: 39it [00:08,  4.47it/s, loss=1.5817]


Epoch 488 Avg Loss: 1.5850


Epoch 489/1000: 39it [00:08,  4.47it/s, loss=1.5978]


Epoch 489 Avg Loss: 1.5841


Epoch 490/1000: 39it [00:08,  4.47it/s, loss=1.5697]


Epoch 490 Avg Loss: 1.5838


Epoch 491/1000: 39it [00:08,  4.47it/s, loss=1.5787]


Epoch 491 Avg Loss: 1.5818


Epoch 492/1000: 39it [00:08,  4.47it/s, loss=1.5756]


Epoch 492 Avg Loss: 1.5816


Epoch 493/1000: 39it [00:08,  4.47it/s, loss=1.5960]


Epoch 493 Avg Loss: 1.5848


Epoch 494/1000: 39it [00:08,  4.47it/s, loss=1.5743]


Epoch 494 Avg Loss: 1.5816


Epoch 495/1000: 39it [00:08,  4.47it/s, loss=1.5701]


Epoch 495 Avg Loss: 1.5823


Epoch 496/1000: 39it [00:08,  4.47it/s, loss=1.5835]


Epoch 496 Avg Loss: 1.5824


Epoch 497/1000: 39it [00:08,  4.47it/s, loss=1.5816]


Epoch 497 Avg Loss: 1.5832


Epoch 498/1000: 39it [00:08,  4.47it/s, loss=1.5835]


Epoch 498 Avg Loss: 1.5811


Epoch 499/1000: 39it [00:08,  4.47it/s, loss=1.5901]


Epoch 499 Avg Loss: 1.5830


Epoch 500/1000: 39it [00:08,  4.47it/s, loss=1.5899]


Epoch 500 Avg Loss: 1.5813


Epoch 501/1000: 39it [00:08,  4.47it/s, loss=1.6010]


Epoch 501 Avg Loss: 1.5824


Epoch 502/1000: 39it [00:08,  4.47it/s, loss=1.5921]


Epoch 502 Avg Loss: 1.5806


Epoch 503/1000: 39it [00:08,  4.47it/s, loss=1.5769]


Epoch 503 Avg Loss: 1.5823


Epoch 504/1000: 39it [00:08,  4.48it/s, loss=1.5946]


Epoch 504 Avg Loss: 1.5818


Epoch 505/1000: 39it [00:08,  4.48it/s, loss=1.5852]


Epoch 505 Avg Loss: 1.5830


Epoch 506/1000: 39it [00:08,  4.48it/s, loss=1.5907]


Epoch 506 Avg Loss: 1.5805


Epoch 507/1000: 39it [00:08,  4.48it/s, loss=1.5837]


Epoch 507 Avg Loss: 1.5795


Epoch 508/1000: 39it [00:08,  4.47it/s, loss=1.5750]


Epoch 508 Avg Loss: 1.5794


Epoch 509/1000: 39it [00:08,  4.47it/s, loss=1.5850]


Epoch 509 Avg Loss: 1.5798


Epoch 510/1000: 39it [00:08,  4.47it/s, loss=1.5742]


Epoch 510 Avg Loss: 1.5800


Epoch 511/1000: 39it [00:08,  4.47it/s, loss=1.5721]


Epoch 511 Avg Loss: 1.5805


Epoch 512/1000: 39it [00:08,  4.47it/s, loss=1.5807]


Epoch 512 Avg Loss: 1.5821


Epoch 513/1000: 39it [00:08,  4.47it/s, loss=1.5601]


Epoch 513 Avg Loss: 1.5783


Epoch 514/1000: 39it [00:08,  4.47it/s, loss=1.5824]


Epoch 514 Avg Loss: 1.5761


Epoch 515/1000: 39it [00:08,  4.47it/s, loss=1.5731]


Epoch 515 Avg Loss: 1.5758


Epoch 516/1000: 39it [00:08,  4.47it/s, loss=1.5760]


Epoch 516 Avg Loss: 1.5801


Epoch 517/1000: 39it [00:08,  4.48it/s, loss=1.5726]


Epoch 517 Avg Loss: 1.5790


Epoch 518/1000: 39it [00:08,  4.47it/s, loss=1.5890]


Epoch 518 Avg Loss: 1.5796


Epoch 519/1000: 39it [00:08,  4.48it/s, loss=1.5705]


Epoch 519 Avg Loss: 1.5748


Epoch 520/1000: 39it [00:08,  4.48it/s, loss=1.5661]


Epoch 520 Avg Loss: 1.5753


Epoch 521/1000: 39it [00:08,  4.48it/s, loss=1.5729]


Epoch 521 Avg Loss: 1.5759


Epoch 522/1000: 39it [00:08,  4.48it/s, loss=1.5686]


Epoch 522 Avg Loss: 1.5746


Epoch 523/1000: 39it [00:08,  4.48it/s, loss=1.5712]


Epoch 523 Avg Loss: 1.5753


Epoch 524/1000: 39it [00:08,  4.48it/s, loss=1.5805]


Epoch 524 Avg Loss: 1.5746


Epoch 525/1000: 39it [00:08,  4.47it/s, loss=1.5692]


Epoch 525 Avg Loss: 1.5760


Epoch 526/1000: 39it [00:08,  4.47it/s, loss=1.5917]


Epoch 526 Avg Loss: 1.5727


Epoch 527/1000: 39it [00:08,  4.47it/s, loss=1.5737]


Epoch 527 Avg Loss: 1.5734


Epoch 528/1000: 39it [00:08,  4.47it/s, loss=1.5787]


Epoch 528 Avg Loss: 1.5744


Epoch 529/1000: 39it [00:08,  4.47it/s, loss=1.5879]


Epoch 529 Avg Loss: 1.5766


Epoch 530/1000: 39it [00:08,  4.47it/s, loss=1.5699]


Epoch 530 Avg Loss: 1.5739


Epoch 531/1000: 39it [00:08,  4.48it/s, loss=1.5809]


Epoch 531 Avg Loss: 1.5747


Epoch 532/1000: 39it [00:08,  4.47it/s, loss=1.5740]


Epoch 532 Avg Loss: 1.5732


Epoch 533/1000: 39it [00:08,  4.47it/s, loss=1.5679]


Epoch 533 Avg Loss: 1.5754


Epoch 534/1000: 39it [00:08,  4.47it/s, loss=1.5659]


Epoch 534 Avg Loss: 1.5723


Epoch 535/1000: 39it [00:08,  4.47it/s, loss=1.5838]


Epoch 535 Avg Loss: 1.5743


Epoch 536/1000: 39it [00:08,  4.47it/s, loss=1.5769]


Epoch 536 Avg Loss: 1.5750


Epoch 537/1000: 39it [00:08,  4.47it/s, loss=1.5592]


Epoch 537 Avg Loss: 1.5710


Epoch 538/1000: 39it [00:08,  4.47it/s, loss=1.5786]


Epoch 538 Avg Loss: 1.5698


Epoch 539/1000: 39it [00:08,  4.48it/s, loss=1.5834]


Epoch 539 Avg Loss: 1.5760


Epoch 540/1000: 39it [00:08,  4.48it/s, loss=1.5774]


Epoch 540 Avg Loss: 1.5741


Epoch 541/1000: 39it [00:08,  4.47it/s, loss=1.5737]


Epoch 541 Avg Loss: 1.5735


Epoch 542/1000: 39it [00:08,  4.47it/s, loss=1.5894]


Epoch 542 Avg Loss: 1.5722


Epoch 543/1000: 39it [00:08,  4.48it/s, loss=1.5636]


Epoch 543 Avg Loss: 1.5721


Epoch 544/1000: 39it [00:08,  4.48it/s, loss=1.5778]


Epoch 544 Avg Loss: 1.5716


Epoch 545/1000: 39it [00:08,  4.48it/s, loss=1.5653]


Epoch 545 Avg Loss: 1.5733


Epoch 546/1000: 39it [00:08,  4.47it/s, loss=1.5792]


Epoch 546 Avg Loss: 1.5715


Epoch 547/1000: 39it [00:08,  4.48it/s, loss=1.5662]


Epoch 547 Avg Loss: 1.5724


Epoch 548/1000: 39it [00:08,  4.48it/s, loss=1.5623]


Epoch 548 Avg Loss: 1.5723


Epoch 549/1000: 39it [00:08,  4.48it/s, loss=1.5710]


Epoch 549 Avg Loss: 1.5722


Epoch 550/1000: 39it [00:08,  4.47it/s, loss=1.5584]


Epoch 550 Avg Loss: 1.5721


Epoch 551/1000: 39it [00:08,  4.47it/s, loss=1.5694]


Epoch 551 Avg Loss: 1.5714


Epoch 552/1000: 39it [00:08,  4.47it/s, loss=1.5717]


Epoch 552 Avg Loss: 1.5715


Epoch 553/1000: 39it [00:08,  4.47it/s, loss=1.5860]


Epoch 553 Avg Loss: 1.5703


Epoch 554/1000: 39it [00:08,  4.47it/s, loss=1.5677]


Epoch 554 Avg Loss: 1.5708


Epoch 555/1000: 39it [00:08,  4.47it/s, loss=1.5719]


Epoch 555 Avg Loss: 1.5692


Epoch 556/1000: 39it [00:08,  4.48it/s, loss=1.5779]


Epoch 556 Avg Loss: 1.5703


Epoch 557/1000: 39it [00:08,  4.48it/s, loss=1.5753]


Epoch 557 Avg Loss: 1.5751


Epoch 558/1000: 39it [00:08,  4.48it/s, loss=1.5623]


Epoch 558 Avg Loss: 1.5737


Epoch 559/1000: 39it [00:08,  4.47it/s, loss=1.5789]


Epoch 559 Avg Loss: 1.5739


Epoch 560/1000: 39it [00:08,  4.47it/s, loss=1.5677]


Epoch 560 Avg Loss: 1.5720


Epoch 561/1000: 39it [00:08,  4.47it/s, loss=1.5858]


Epoch 561 Avg Loss: 1.5723


Epoch 562/1000: 39it [00:08,  4.48it/s, loss=1.5557]


Epoch 562 Avg Loss: 1.5737


Epoch 563/1000: 39it [00:08,  4.47it/s, loss=1.5776]


Epoch 563 Avg Loss: 1.5687


Epoch 564/1000: 39it [00:08,  4.47it/s, loss=1.5549]


Epoch 564 Avg Loss: 1.5709


Epoch 565/1000: 39it [00:08,  4.47it/s, loss=1.5730]


Epoch 565 Avg Loss: 1.5705


Epoch 566/1000: 39it [00:08,  4.47it/s, loss=1.5790]


Epoch 566 Avg Loss: 1.5728


Epoch 567/1000: 39it [00:08,  4.47it/s, loss=1.5700]


Epoch 567 Avg Loss: 1.5705


Epoch 568/1000: 39it [00:08,  4.47it/s, loss=1.5471]


Epoch 568 Avg Loss: 1.5687


Epoch 569/1000: 39it [00:08,  4.47it/s, loss=1.5738]


Epoch 569 Avg Loss: 1.5705


Epoch 570/1000: 39it [00:08,  4.48it/s, loss=1.5871]


Epoch 570 Avg Loss: 1.5702


Epoch 571/1000: 39it [00:08,  4.48it/s, loss=1.5694]


Epoch 571 Avg Loss: 1.5697


Epoch 572/1000: 39it [00:08,  4.47it/s, loss=1.5666]


Epoch 572 Avg Loss: 1.5689


Epoch 573/1000: 39it [00:08,  4.47it/s, loss=1.5811]


Epoch 573 Avg Loss: 1.5695


Epoch 574/1000: 39it [00:08,  4.47it/s, loss=1.5618]


Epoch 574 Avg Loss: 1.5695


Epoch 575/1000: 39it [00:08,  4.48it/s, loss=1.5788]


Epoch 575 Avg Loss: 1.5722


Epoch 576/1000: 39it [00:08,  4.47it/s, loss=1.5549]


Epoch 576 Avg Loss: 1.5690


Epoch 577/1000: 39it [00:08,  4.48it/s, loss=1.5721]


Epoch 577 Avg Loss: 1.5701


Epoch 578/1000: 39it [00:08,  4.47it/s, loss=1.5705]


Epoch 578 Avg Loss: 1.5713


Epoch 579/1000: 39it [00:08,  4.48it/s, loss=1.5699]


Epoch 579 Avg Loss: 1.5699


Epoch 580/1000: 39it [00:08,  4.48it/s, loss=1.5586]


Epoch 580 Avg Loss: 1.5725


Epoch 581/1000: 39it [00:08,  4.47it/s, loss=1.5662]


Epoch 581 Avg Loss: 1.5670


Epoch 582/1000: 39it [00:08,  4.47it/s, loss=1.5538]


Epoch 582 Avg Loss: 1.5697


Epoch 583/1000: 39it [00:08,  4.47it/s, loss=1.5658]


Epoch 583 Avg Loss: 1.5683


Epoch 584/1000: 39it [00:08,  4.47it/s, loss=1.5721]


Epoch 584 Avg Loss: 1.5696


Epoch 585/1000: 39it [00:08,  4.47it/s, loss=1.5681]


Epoch 585 Avg Loss: 1.5675


Epoch 586/1000: 39it [00:08,  4.47it/s, loss=1.5669]


Epoch 586 Avg Loss: 1.5705


Epoch 587/1000: 39it [00:08,  4.47it/s, loss=1.5652]


Epoch 587 Avg Loss: 1.5694


Epoch 588/1000: 39it [00:08,  4.48it/s, loss=1.5623]


Epoch 588 Avg Loss: 1.5696


Epoch 589/1000: 39it [00:08,  4.48it/s, loss=1.5729]


Epoch 589 Avg Loss: 1.5720


Epoch 590/1000: 39it [00:08,  4.48it/s, loss=1.5834]


Epoch 590 Avg Loss: 1.5685


Epoch 591/1000: 39it [00:08,  4.47it/s, loss=1.5639]


Epoch 591 Avg Loss: 1.5699


Epoch 592/1000: 39it [00:08,  4.47it/s, loss=1.5779]


Epoch 592 Avg Loss: 1.5699


Epoch 593/1000: 39it [00:08,  4.48it/s, loss=1.5685]


Epoch 593 Avg Loss: 1.5710


Epoch 594/1000: 39it [00:08,  4.47it/s, loss=1.5616]


Epoch 594 Avg Loss: 1.5708


Epoch 595/1000: 39it [00:08,  4.47it/s, loss=1.5632]


Epoch 595 Avg Loss: 1.5670


Epoch 596/1000: 39it [00:08,  4.47it/s, loss=1.5661]


Epoch 596 Avg Loss: 1.5691


Epoch 597/1000: 39it [00:08,  4.47it/s, loss=1.5743]


Epoch 597 Avg Loss: 1.5710


Epoch 598/1000: 39it [00:08,  4.47it/s, loss=1.5725]


Epoch 598 Avg Loss: 1.5678


Epoch 599/1000: 39it [00:08,  4.47it/s, loss=1.5670]


Epoch 599 Avg Loss: 1.5672


Epoch 600/1000: 39it [00:08,  4.47it/s, loss=1.5608]


Epoch 600 Avg Loss: 1.5694


Epoch 601/1000: 39it [00:08,  4.47it/s, loss=1.5764]


Epoch 601 Avg Loss: 1.5670


Epoch 602/1000: 39it [00:08,  4.47it/s, loss=1.5587]


Epoch 602 Avg Loss: 1.5675


Epoch 603/1000: 39it [00:08,  4.47it/s, loss=1.5604]


Epoch 603 Avg Loss: 1.5685


Epoch 604/1000: 39it [00:08,  4.48it/s, loss=1.5697]


Epoch 604 Avg Loss: 1.5677


Epoch 605/1000: 39it [00:08,  4.47it/s, loss=1.5688]


Epoch 605 Avg Loss: 1.5682


Epoch 606/1000: 39it [00:08,  4.47it/s, loss=1.5707]


Epoch 606 Avg Loss: 1.5654


Epoch 607/1000: 39it [00:08,  4.48it/s, loss=1.5637]


Epoch 607 Avg Loss: 1.5659


Epoch 608/1000: 39it [00:08,  4.47it/s, loss=1.5597]


Epoch 608 Avg Loss: 1.5665


Epoch 609/1000: 39it [00:08,  4.47it/s, loss=1.5646]


Epoch 609 Avg Loss: 1.5661


Epoch 610/1000: 39it [00:08,  4.47it/s, loss=1.5718]


Epoch 610 Avg Loss: 1.5667


Epoch 611/1000: 39it [00:08,  4.47it/s, loss=1.5922]


Epoch 611 Avg Loss: 1.5676


Epoch 612/1000: 39it [00:08,  4.47it/s, loss=1.5686]


Epoch 612 Avg Loss: 1.5658


Epoch 613/1000: 39it [00:08,  4.47it/s, loss=1.5708]


Epoch 613 Avg Loss: 1.5659


Epoch 614/1000: 39it [00:08,  4.47it/s, loss=1.5650]


Epoch 614 Avg Loss: 1.5651


Epoch 615/1000: 39it [00:08,  4.47it/s, loss=1.5656]


Epoch 615 Avg Loss: 1.5666


Epoch 616/1000: 39it [00:08,  4.47it/s, loss=1.5845]


Epoch 616 Avg Loss: 1.5680


Epoch 617/1000: 39it [00:08,  4.47it/s, loss=1.5570]


Epoch 617 Avg Loss: 1.5641


Epoch 618/1000: 39it [00:08,  4.47it/s, loss=1.5762]


Epoch 618 Avg Loss: 1.5638


Epoch 619/1000: 39it [00:08,  4.47it/s, loss=1.5642]


Epoch 619 Avg Loss: 1.5667


Epoch 620/1000: 39it [00:08,  4.48it/s, loss=1.5643]


Epoch 620 Avg Loss: 1.5647


Epoch 621/1000: 39it [00:08,  4.48it/s, loss=1.5721]


Epoch 621 Avg Loss: 1.5650


Epoch 622/1000: 39it [00:08,  4.48it/s, loss=1.5703]


Epoch 622 Avg Loss: 1.5641


Epoch 623/1000: 39it [00:08,  4.47it/s, loss=1.5641]


Epoch 623 Avg Loss: 1.5646


Epoch 624/1000: 39it [00:08,  4.48it/s, loss=1.5682]


Epoch 624 Avg Loss: 1.5599


Epoch 625/1000: 39it [00:08,  4.47it/s, loss=1.5611]


Epoch 625 Avg Loss: 1.5610


Epoch 626/1000: 39it [00:08,  4.47it/s, loss=1.6925]


Epoch 626 Avg Loss: 1.8535


Epoch 627/1000: 39it [00:08,  4.47it/s, loss=1.6007]


Epoch 627 Avg Loss: 1.6342


Epoch 628/1000: 39it [00:08,  4.48it/s, loss=1.5956]


Epoch 628 Avg Loss: 1.6120


Epoch 629/1000: 39it [00:08,  4.47it/s, loss=1.6076]


Epoch 629 Avg Loss: 1.6049


Epoch 630/1000: 39it [00:08,  4.47it/s, loss=1.5991]


Epoch 630 Avg Loss: 1.5953


Epoch 631/1000: 39it [00:08,  4.47it/s, loss=1.5921]


Epoch 631 Avg Loss: 1.5932


Epoch 632/1000: 39it [00:08,  4.47it/s, loss=1.5823]


Epoch 632 Avg Loss: 1.5951


Epoch 633/1000: 39it [00:08,  4.48it/s, loss=1.5854]


Epoch 633 Avg Loss: 1.5849


Epoch 634/1000: 39it [00:08,  4.48it/s, loss=1.5778]


Epoch 634 Avg Loss: 1.5782


Epoch 635/1000: 39it [00:08,  4.47it/s, loss=1.5841]


Epoch 635 Avg Loss: 1.5743


Epoch 636/1000: 39it [00:08,  4.48it/s, loss=1.5606]


Epoch 636 Avg Loss: 1.5674


Epoch 637/1000: 39it [00:08,  4.48it/s, loss=1.5679]


Epoch 637 Avg Loss: 1.5651


Epoch 638/1000: 39it [00:08,  4.47it/s, loss=1.5509]


Epoch 638 Avg Loss: 1.5612


Epoch 639/1000: 39it [00:08,  4.47it/s, loss=1.5676]


Epoch 639 Avg Loss: 1.5622


Epoch 640/1000: 39it [00:08,  4.48it/s, loss=1.5577]


Epoch 640 Avg Loss: 1.5582


Epoch 641/1000: 39it [00:08,  4.47it/s, loss=1.5588]


Epoch 641 Avg Loss: 1.5579


Epoch 642/1000: 39it [00:08,  4.48it/s, loss=1.5565]


Epoch 642 Avg Loss: 1.5603


Epoch 643/1000: 39it [00:08,  4.48it/s, loss=1.5504]


Epoch 643 Avg Loss: 1.5582


Epoch 644/1000: 39it [00:08,  4.48it/s, loss=1.5850]


Epoch 644 Avg Loss: 1.5587


Epoch 645/1000: 39it [00:08,  4.48it/s, loss=1.5606]


Epoch 645 Avg Loss: 1.5599


Epoch 646/1000: 39it [00:08,  4.47it/s, loss=1.5528]


Epoch 646 Avg Loss: 1.5585


Epoch 647/1000: 39it [00:08,  4.47it/s, loss=1.5497]


Epoch 647 Avg Loss: 1.5552


Epoch 648/1000: 39it [00:08,  4.47it/s, loss=1.5525]


Epoch 648 Avg Loss: 1.5605


Epoch 649/1000: 39it [00:08,  4.48it/s, loss=1.5666]


Epoch 649 Avg Loss: 1.5581


Epoch 650/1000: 39it [00:08,  4.47it/s, loss=1.5560]


Epoch 650 Avg Loss: 1.5582


Epoch 651/1000: 39it [00:08,  4.48it/s, loss=1.5783]


Epoch 651 Avg Loss: 1.5605


Epoch 652/1000: 39it [00:08,  4.48it/s, loss=1.5570]


Epoch 652 Avg Loss: 1.5576


Epoch 653/1000: 39it [00:08,  4.47it/s, loss=1.5754]


Epoch 653 Avg Loss: 1.5574


Epoch 654/1000: 39it [00:08,  4.47it/s, loss=1.5651]


Epoch 654 Avg Loss: 1.5570


Epoch 655/1000: 39it [00:08,  4.48it/s, loss=1.5563]


Epoch 655 Avg Loss: 1.5564


Epoch 656/1000: 39it [00:08,  4.48it/s, loss=1.5550]


Epoch 656 Avg Loss: 1.5561


Epoch 657/1000: 39it [00:08,  4.47it/s, loss=1.5781]


Epoch 657 Avg Loss: 1.5584


Epoch 658/1000: 39it [00:08,  4.47it/s, loss=1.5514]


Epoch 658 Avg Loss: 1.5575


Epoch 659/1000: 39it [00:08,  4.47it/s, loss=1.5561]


Epoch 659 Avg Loss: 1.5574


Epoch 660/1000: 39it [00:08,  4.47it/s, loss=1.5900]


Epoch 660 Avg Loss: 1.5591


Epoch 661/1000: 39it [00:08,  4.47it/s, loss=1.5571]


Epoch 661 Avg Loss: 1.5574


Epoch 662/1000: 39it [00:08,  4.47it/s, loss=1.5618]


Epoch 662 Avg Loss: 1.5554


Epoch 663/1000: 39it [00:08,  4.48it/s, loss=1.5479]


Epoch 663 Avg Loss: 1.5567


Epoch 664/1000: 39it [00:08,  4.47it/s, loss=1.5755]


Epoch 664 Avg Loss: 1.5581


Epoch 665/1000: 39it [00:08,  4.48it/s, loss=1.5371]


Epoch 665 Avg Loss: 1.5567


Epoch 666/1000: 39it [00:08,  4.47it/s, loss=1.5642]


Epoch 666 Avg Loss: 1.5570


Epoch 667/1000: 39it [00:08,  4.47it/s, loss=1.5672]


Epoch 667 Avg Loss: 1.5580


Epoch 668/1000: 39it [00:08,  4.47it/s, loss=1.5521]


Epoch 668 Avg Loss: 1.5590


Epoch 669/1000: 39it [00:08,  4.47it/s, loss=1.5578]


Epoch 669 Avg Loss: 1.5581


Epoch 670/1000: 39it [00:08,  4.48it/s, loss=1.5564]


Epoch 670 Avg Loss: 1.5588


Epoch 671/1000: 39it [00:08,  4.47it/s, loss=1.5629]


Epoch 671 Avg Loss: 1.5592


Epoch 672/1000: 39it [00:08,  4.47it/s, loss=1.5543]


Epoch 672 Avg Loss: 1.5558


Epoch 673/1000: 39it [00:08,  4.47it/s, loss=1.5542]


Epoch 673 Avg Loss: 1.5569


Epoch 674/1000: 39it [00:08,  4.48it/s, loss=1.5558]


Epoch 674 Avg Loss: 1.5566


Epoch 675/1000: 39it [00:08,  4.47it/s, loss=1.5603]


Epoch 675 Avg Loss: 1.5570


Epoch 676/1000: 39it [00:08,  4.47it/s, loss=1.5530]


Epoch 676 Avg Loss: 1.5570


Epoch 677/1000: 39it [00:08,  4.47it/s, loss=1.5668]


Epoch 677 Avg Loss: 1.5585


Epoch 678/1000: 39it [00:08,  4.47it/s, loss=1.5495]


Epoch 678 Avg Loss: 1.5582


Epoch 679/1000: 39it [00:08,  4.47it/s, loss=1.5641]


Epoch 679 Avg Loss: 1.5579


Epoch 680/1000: 39it [00:08,  4.48it/s, loss=1.5643]


Epoch 680 Avg Loss: 1.5575


Epoch 681/1000: 39it [00:08,  4.47it/s, loss=1.5621]


Epoch 681 Avg Loss: 1.5553


Epoch 682/1000: 39it [00:08,  4.48it/s, loss=1.5569]


Epoch 682 Avg Loss: 1.5580


Epoch 683/1000: 39it [00:08,  4.47it/s, loss=1.5490]


Epoch 683 Avg Loss: 1.5574


Epoch 684/1000: 39it [00:08,  4.47it/s, loss=1.5639]


Epoch 684 Avg Loss: 1.5580


Epoch 685/1000: 39it [00:08,  4.47it/s, loss=1.5604]


Epoch 685 Avg Loss: 1.5596


Epoch 686/1000: 39it [00:08,  4.47it/s, loss=1.5443]


Epoch 686 Avg Loss: 1.5574


Epoch 687/1000: 39it [00:08,  4.47it/s, loss=1.5619]


Epoch 687 Avg Loss: 1.5596


Epoch 688/1000: 39it [00:08,  4.48it/s, loss=1.5439]


Epoch 688 Avg Loss: 1.5583


Epoch 689/1000: 39it [00:08,  4.47it/s, loss=1.5586]


Epoch 689 Avg Loss: 1.5589


Epoch 690/1000: 39it [00:08,  4.47it/s, loss=1.5631]


Epoch 690 Avg Loss: 1.5556


Epoch 691/1000: 39it [00:08,  4.48it/s, loss=1.5525]


Epoch 691 Avg Loss: 1.5564


Epoch 692/1000: 39it [00:08,  4.48it/s, loss=1.5613]


Epoch 692 Avg Loss: 1.5614


Epoch 693/1000: 39it [00:08,  4.47it/s, loss=1.5617]


Epoch 693 Avg Loss: 1.5576


Epoch 694/1000: 39it [00:08,  4.47it/s, loss=1.5718]


Epoch 694 Avg Loss: 1.5579


Epoch 695/1000: 39it [00:08,  4.48it/s, loss=1.5530]


Epoch 695 Avg Loss: 1.5553


Epoch 696/1000: 39it [00:08,  4.47it/s, loss=1.5477]


Epoch 696 Avg Loss: 1.5556


Epoch 697/1000: 39it [00:08,  4.47it/s, loss=1.5586]


Epoch 697 Avg Loss: 1.5565


Epoch 698/1000: 39it [00:08,  4.47it/s, loss=1.5680]


Epoch 698 Avg Loss: 1.5570


Epoch 699/1000: 39it [00:08,  4.48it/s, loss=1.5572]


Epoch 699 Avg Loss: 1.5545


Epoch 700/1000: 39it [00:08,  4.47it/s, loss=1.5629]


Epoch 700 Avg Loss: 1.5582


Epoch 701/1000: 39it [00:08,  4.47it/s, loss=1.5605]


Epoch 701 Avg Loss: 1.5546


Epoch 702/1000: 39it [00:08,  4.47it/s, loss=1.5500]


Epoch 702 Avg Loss: 1.5560


Epoch 703/1000: 39it [00:08,  4.47it/s, loss=1.5813]


Epoch 703 Avg Loss: 1.5560


Epoch 704/1000: 39it [00:08,  4.47it/s, loss=1.5696]


Epoch 704 Avg Loss: 1.5568


Epoch 705/1000: 39it [00:08,  4.48it/s, loss=1.5470]


Epoch 705 Avg Loss: 1.5593


Epoch 706/1000: 39it [00:08,  4.48it/s, loss=1.5446]


Epoch 706 Avg Loss: 1.5542


Epoch 707/1000: 39it [00:08,  4.47it/s, loss=1.5523]


Epoch 707 Avg Loss: 1.5558


Epoch 708/1000: 39it [00:08,  4.47it/s, loss=1.5620]


Epoch 708 Avg Loss: 1.5583


Epoch 709/1000: 39it [00:08,  4.47it/s, loss=1.5511]


Epoch 709 Avg Loss: 1.5589


Epoch 710/1000: 39it [00:08,  4.47it/s, loss=1.5522]


Epoch 710 Avg Loss: 1.5575


Epoch 711/1000: 39it [00:08,  4.47it/s, loss=1.5584]


Epoch 711 Avg Loss: 1.5610


Epoch 712/1000: 39it [00:08,  4.47it/s, loss=1.5563]


Epoch 712 Avg Loss: 1.5582


Epoch 713/1000: 39it [00:08,  4.48it/s, loss=1.5546]


Epoch 713 Avg Loss: 1.5590


Epoch 714/1000: 39it [00:08,  4.47it/s, loss=1.5728]


Epoch 714 Avg Loss: 1.5591


Epoch 715/1000: 39it [00:08,  4.48it/s, loss=1.5671]


Epoch 715 Avg Loss: 1.5566


Epoch 716/1000: 39it [00:08,  4.48it/s, loss=1.5433]


Epoch 716 Avg Loss: 1.5568


Epoch 717/1000: 39it [00:08,  4.48it/s, loss=1.5531]


Epoch 717 Avg Loss: 1.5555


Epoch 718/1000: 39it [00:08,  4.47it/s, loss=1.5505]


Epoch 718 Avg Loss: 1.5559


Epoch 719/1000: 39it [00:08,  4.48it/s, loss=1.5569]


Epoch 719 Avg Loss: 1.5547


Epoch 720/1000: 39it [00:08,  4.48it/s, loss=1.5554]


Epoch 720 Avg Loss: 1.5542


Epoch 721/1000: 39it [00:08,  4.48it/s, loss=1.5771]


Epoch 721 Avg Loss: 1.5585


Epoch 722/1000: 39it [00:08,  4.47it/s, loss=1.5619]


Epoch 722 Avg Loss: 1.5601


Epoch 723/1000: 39it [00:08,  4.47it/s, loss=1.5511]


Epoch 723 Avg Loss: 1.5550


Epoch 724/1000: 39it [00:08,  4.47it/s, loss=1.5625]


Epoch 724 Avg Loss: 1.5540


Epoch 725/1000: 39it [00:08,  4.47it/s, loss=1.5526]


Epoch 725 Avg Loss: 1.5578


Epoch 726/1000: 39it [00:08,  4.47it/s, loss=1.5520]


Epoch 726 Avg Loss: 1.5550


Epoch 727/1000: 39it [00:08,  4.47it/s, loss=1.5639]


Epoch 727 Avg Loss: 1.5552


Epoch 728/1000: 39it [00:08,  4.48it/s, loss=1.5431]


Epoch 728 Avg Loss: 1.5552


Epoch 729/1000: 39it [00:08,  4.48it/s, loss=1.5583]


Epoch 729 Avg Loss: 1.5556


Epoch 730/1000: 39it [00:08,  4.47it/s, loss=1.5567]


Epoch 730 Avg Loss: 1.5552


Epoch 731/1000: 39it [00:08,  4.48it/s, loss=1.5487]


Epoch 731 Avg Loss: 1.5552


Epoch 732/1000: 39it [00:08,  4.48it/s, loss=1.5614]


Epoch 732 Avg Loss: 1.5579


Epoch 733/1000: 39it [00:08,  4.48it/s, loss=1.5524]


Epoch 733 Avg Loss: 1.5588


Epoch 734/1000: 39it [00:08,  4.48it/s, loss=1.5377]


Epoch 734 Avg Loss: 1.5555


Epoch 735/1000: 39it [00:08,  4.48it/s, loss=1.5534]


Epoch 735 Avg Loss: 1.5581


Epoch 736/1000: 39it [00:08,  4.48it/s, loss=1.5679]


Epoch 736 Avg Loss: 1.5571


Epoch 737/1000: 39it [00:08,  4.47it/s, loss=1.5608]


Epoch 737 Avg Loss: 1.5576


Epoch 738/1000: 39it [00:08,  4.48it/s, loss=1.5590]


Epoch 738 Avg Loss: 1.5565


Epoch 739/1000: 39it [00:08,  4.48it/s, loss=1.5571]


Epoch 739 Avg Loss: 1.5579


Epoch 740/1000: 39it [00:08,  4.48it/s, loss=1.5591]


Epoch 740 Avg Loss: 1.5582


Epoch 741/1000: 39it [00:08,  4.48it/s, loss=1.5602]


Epoch 741 Avg Loss: 1.5553


Epoch 742/1000: 39it [00:08,  4.47it/s, loss=1.5597]


Epoch 742 Avg Loss: 1.5571


Epoch 743/1000: 39it [00:08,  4.48it/s, loss=1.5704]


Epoch 743 Avg Loss: 1.5570


Epoch 744/1000: 39it [00:08,  4.47it/s, loss=1.5554]


Epoch 744 Avg Loss: 1.5546


Epoch 745/1000: 39it [00:08,  4.48it/s, loss=1.5516]


Epoch 745 Avg Loss: 1.5571


Epoch 746/1000: 39it [00:08,  4.47it/s, loss=1.5598]


Epoch 746 Avg Loss: 1.5556


Epoch 747/1000: 39it [00:08,  4.47it/s, loss=1.5508]


Epoch 747 Avg Loss: 1.5555


Epoch 748/1000: 39it [00:08,  4.48it/s, loss=1.5526]


Epoch 748 Avg Loss: 1.5549


Epoch 749/1000: 39it [00:08,  4.47it/s, loss=1.5584]


Epoch 749 Avg Loss: 1.5528


Epoch 750/1000: 39it [00:08,  4.48it/s, loss=1.5556]


Epoch 750 Avg Loss: 1.5536


Epoch 751/1000: 39it [00:08,  4.48it/s, loss=1.5532]


Epoch 751 Avg Loss: 1.5556


Epoch 752/1000: 39it [00:08,  4.47it/s, loss=1.5563]


Epoch 752 Avg Loss: 1.5572


Epoch 753/1000: 39it [00:08,  4.47it/s, loss=1.5480]


Epoch 753 Avg Loss: 1.5551


Epoch 754/1000: 39it [00:08,  4.48it/s, loss=1.5652]


Epoch 754 Avg Loss: 1.5569


Epoch 755/1000: 39it [00:08,  4.47it/s, loss=1.5616]


Epoch 755 Avg Loss: 1.5547


Epoch 756/1000: 39it [00:08,  4.48it/s, loss=1.5528]


Epoch 756 Avg Loss: 1.5552


Epoch 757/1000: 39it [00:08,  4.47it/s, loss=1.5592]


Epoch 757 Avg Loss: 1.5561


Epoch 758/1000: 39it [00:08,  4.47it/s, loss=1.5477]


Epoch 758 Avg Loss: 1.5560


Epoch 759/1000: 39it [00:08,  4.48it/s, loss=1.5473]


Epoch 759 Avg Loss: 1.5528


Epoch 760/1000: 39it [00:08,  4.47it/s, loss=1.5622]


Epoch 760 Avg Loss: 1.5545


Epoch 761/1000: 39it [00:08,  4.47it/s, loss=1.5661]


Epoch 761 Avg Loss: 1.5531


Epoch 762/1000: 39it [00:08,  4.47it/s, loss=1.5512]


Epoch 762 Avg Loss: 1.5522


Epoch 763/1000: 39it [00:08,  4.47it/s, loss=1.5455]


Epoch 763 Avg Loss: 1.5575


Epoch 764/1000: 39it [00:08,  4.47it/s, loss=1.5356]


Epoch 764 Avg Loss: 1.5560


Epoch 765/1000: 39it [00:08,  4.47it/s, loss=1.5484]


Epoch 765 Avg Loss: 1.5568


Epoch 766/1000: 39it [00:08,  4.47it/s, loss=1.5533]


Epoch 766 Avg Loss: 1.5540


Epoch 767/1000: 39it [00:08,  4.47it/s, loss=1.5572]


Epoch 767 Avg Loss: 1.5560


Epoch 768/1000: 39it [00:08,  4.47it/s, loss=1.5678]


Epoch 768 Avg Loss: 1.5541


Epoch 769/1000: 39it [00:08,  4.47it/s, loss=1.5536]


Epoch 769 Avg Loss: 1.5548


Epoch 770/1000: 39it [00:08,  4.47it/s, loss=1.9480]


Epoch 770 Avg Loss: 1.6596


Epoch 771/1000: 39it [00:08,  4.47it/s, loss=1.5957]


Epoch 771 Avg Loss: 1.6506


Epoch 772/1000: 39it [00:08,  4.47it/s, loss=1.5676]


Epoch 772 Avg Loss: 1.5761


Epoch 773/1000: 39it [00:08,  4.48it/s, loss=1.5756]


Epoch 773 Avg Loss: 1.5684


Epoch 774/1000: 39it [00:08,  4.48it/s, loss=1.5724]


Epoch 774 Avg Loss: 1.5684


Epoch 775/1000: 39it [00:08,  4.48it/s, loss=1.5655]


Epoch 775 Avg Loss: 1.5665


Epoch 776/1000: 39it [00:08,  4.47it/s, loss=1.5529]


Epoch 776 Avg Loss: 1.5588


Epoch 777/1000: 39it [00:08,  4.47it/s, loss=1.5486]


Epoch 777 Avg Loss: 1.5571


Epoch 778/1000: 39it [00:08,  4.47it/s, loss=1.5706]


Epoch 778 Avg Loss: 1.5551


Epoch 779/1000: 39it [00:08,  4.47it/s, loss=1.5544]


Epoch 779 Avg Loss: 1.5544


Epoch 780/1000: 39it [00:08,  4.48it/s, loss=1.5551]


Epoch 780 Avg Loss: 1.5540


Epoch 781/1000: 39it [00:08,  4.48it/s, loss=1.5610]


Epoch 781 Avg Loss: 1.5510


Epoch 782/1000: 39it [00:08,  4.47it/s, loss=1.5506]


Epoch 782 Avg Loss: 1.5568


Epoch 783/1000: 39it [00:08,  4.47it/s, loss=1.5431]


Epoch 783 Avg Loss: 1.5533


Epoch 784/1000: 39it [00:08,  4.47it/s, loss=1.5564]


Epoch 784 Avg Loss: 1.5542


Epoch 785/1000: 39it [00:08,  4.48it/s, loss=1.5486]


Epoch 785 Avg Loss: 1.5529


Epoch 786/1000: 39it [00:08,  4.48it/s, loss=1.5474]


Epoch 786 Avg Loss: 1.5554


Epoch 787/1000: 39it [00:08,  4.47it/s, loss=1.5672]


Epoch 787 Avg Loss: 1.5535


Epoch 788/1000: 39it [00:08,  4.48it/s, loss=1.5413]


Epoch 788 Avg Loss: 1.5544


Epoch 789/1000: 39it [00:08,  4.47it/s, loss=1.5538]


Epoch 789 Avg Loss: 1.5572


Epoch 790/1000: 39it [00:08,  4.47it/s, loss=1.5603]


Epoch 790 Avg Loss: 1.5575


Epoch 791/1000: 39it [00:08,  4.48it/s, loss=1.5526]


Epoch 791 Avg Loss: 1.5509


Epoch 792/1000: 39it [00:08,  4.47it/s, loss=1.5498]


Epoch 792 Avg Loss: 1.5529


Epoch 793/1000: 39it [00:08,  4.47it/s, loss=1.5508]


Epoch 793 Avg Loss: 1.5546


Epoch 794/1000: 39it [00:08,  4.47it/s, loss=1.5558]


Epoch 794 Avg Loss: 1.5537


Epoch 795/1000: 39it [00:08,  4.47it/s, loss=1.5710]


Epoch 795 Avg Loss: 1.5532


Epoch 796/1000: 39it [00:08,  4.48it/s, loss=1.5722]


Epoch 796 Avg Loss: 1.5557


Epoch 797/1000: 39it [00:08,  4.47it/s, loss=1.5299]


Epoch 797 Avg Loss: 1.5522


Epoch 798/1000: 39it [00:08,  4.47it/s, loss=1.5502]


Epoch 798 Avg Loss: 1.5533


Epoch 799/1000: 39it [00:08,  4.48it/s, loss=1.5505]


Epoch 799 Avg Loss: 1.5527


Epoch 800/1000: 39it [00:08,  4.48it/s, loss=1.5471]


Epoch 800 Avg Loss: 1.5529


Epoch 801/1000: 39it [00:08,  4.48it/s, loss=1.5327]


Epoch 801 Avg Loss: 1.5518


Epoch 802/1000: 39it [00:08,  4.48it/s, loss=1.5625]


Epoch 802 Avg Loss: 1.5564


Epoch 803/1000: 39it [00:08,  4.48it/s, loss=1.5558]


Epoch 803 Avg Loss: 1.5521


Epoch 804/1000: 39it [00:08,  4.47it/s, loss=1.5370]


Epoch 804 Avg Loss: 1.5531


Epoch 805/1000: 39it [00:08,  4.48it/s, loss=1.5454]


Epoch 805 Avg Loss: 1.5517


Epoch 806/1000: 39it [00:08,  4.48it/s, loss=1.5499]


Epoch 806 Avg Loss: 1.5536


Epoch 807/1000: 39it [00:08,  4.48it/s, loss=1.5593]


Epoch 807 Avg Loss: 1.5530


Epoch 808/1000: 39it [00:08,  4.47it/s, loss=1.5507]


Epoch 808 Avg Loss: 1.5528


Epoch 809/1000: 39it [00:08,  4.48it/s, loss=1.5576]


Epoch 809 Avg Loss: 1.5534


Epoch 810/1000: 39it [00:08,  4.48it/s, loss=1.5639]


Epoch 810 Avg Loss: 1.5553


Epoch 811/1000: 39it [00:08,  4.47it/s, loss=1.5467]


Epoch 811 Avg Loss: 1.5539


Epoch 812/1000: 39it [00:08,  4.47it/s, loss=1.5506]


Epoch 812 Avg Loss: 1.5530


Epoch 813/1000: 39it [00:08,  4.47it/s, loss=1.5659]


Epoch 813 Avg Loss: 1.5537


Epoch 814/1000: 39it [00:08,  4.47it/s, loss=1.5445]


Epoch 814 Avg Loss: 1.5549


Epoch 815/1000: 39it [00:08,  4.47it/s, loss=1.5520]


Epoch 815 Avg Loss: 1.5552


Epoch 816/1000: 39it [00:08,  4.47it/s, loss=1.5483]


Epoch 816 Avg Loss: 1.5534


Epoch 817/1000: 39it [00:08,  4.48it/s, loss=1.5417]


Epoch 817 Avg Loss: 1.5518


Epoch 818/1000: 39it [00:08,  4.47it/s, loss=1.5442]


Epoch 818 Avg Loss: 1.5513


Epoch 819/1000: 39it [00:08,  4.48it/s, loss=1.5566]


Epoch 819 Avg Loss: 1.5546


Epoch 820/1000: 39it [00:08,  4.47it/s, loss=1.5536]


Epoch 820 Avg Loss: 1.5534


Epoch 821/1000: 39it [00:08,  4.48it/s, loss=1.5428]


Epoch 821 Avg Loss: 1.5500


Epoch 822/1000: 39it [00:08,  4.47it/s, loss=1.5596]


Epoch 822 Avg Loss: 1.5531


Epoch 823/1000: 39it [00:08,  4.47it/s, loss=1.5374]


Epoch 823 Avg Loss: 1.5534


Epoch 824/1000: 39it [00:08,  4.47it/s, loss=1.5590]


Epoch 824 Avg Loss: 1.5546


Epoch 825/1000: 39it [00:08,  4.48it/s, loss=1.5624]


Epoch 825 Avg Loss: 1.5530


Epoch 826/1000: 39it [00:08,  4.47it/s, loss=1.5391]


Epoch 826 Avg Loss: 1.5539


Epoch 827/1000: 39it [00:08,  4.48it/s, loss=1.5632]


Epoch 827 Avg Loss: 1.5524


Epoch 828/1000: 39it [00:08,  4.48it/s, loss=1.5530]


Epoch 828 Avg Loss: 1.5542


Epoch 829/1000: 39it [00:08,  4.47it/s, loss=1.5361]


Epoch 829 Avg Loss: 1.5527


Epoch 830/1000: 39it [00:08,  4.47it/s, loss=1.5554]


Epoch 830 Avg Loss: 1.5529


Epoch 831/1000: 39it [00:08,  4.47it/s, loss=1.5472]


Epoch 831 Avg Loss: 1.5538


Epoch 832/1000: 39it [00:08,  4.47it/s, loss=1.5425]


Epoch 832 Avg Loss: 1.5525


Epoch 833/1000: 39it [00:08,  4.47it/s, loss=1.5428]


Epoch 833 Avg Loss: 1.5525


Epoch 834/1000: 39it [00:08,  4.47it/s, loss=1.5505]


Epoch 834 Avg Loss: 1.5516


Epoch 835/1000: 39it [00:08,  4.48it/s, loss=1.5530]


Epoch 835 Avg Loss: 1.5523


Epoch 836/1000: 39it [00:08,  4.48it/s, loss=1.5631]


Epoch 836 Avg Loss: 1.5521


Epoch 837/1000: 39it [00:08,  4.47it/s, loss=1.5424]


Epoch 837 Avg Loss: 1.5511


Epoch 838/1000: 39it [00:08,  4.48it/s, loss=1.5571]


Epoch 838 Avg Loss: 1.5541


Epoch 839/1000: 39it [00:08,  4.48it/s, loss=1.5508]


Epoch 839 Avg Loss: 1.5525


Epoch 840/1000: 39it [00:08,  4.47it/s, loss=1.5588]


Epoch 840 Avg Loss: 1.5541


Epoch 841/1000: 39it [00:08,  4.47it/s, loss=1.5636]


Epoch 841 Avg Loss: 1.5543


Epoch 842/1000: 39it [00:08,  4.47it/s, loss=1.5479]


Epoch 842 Avg Loss: 1.5517


Epoch 843/1000: 39it [00:08,  4.47it/s, loss=1.5500]


Epoch 843 Avg Loss: 1.5556


Epoch 844/1000: 39it [00:08,  4.47it/s, loss=1.5357]


Epoch 844 Avg Loss: 1.5527


Epoch 845/1000: 39it [00:08,  4.47it/s, loss=1.5433]


Epoch 845 Avg Loss: 1.5526


Epoch 846/1000: 39it [00:08,  4.48it/s, loss=1.5378]


Epoch 846 Avg Loss: 1.5524


Epoch 847/1000: 39it [00:08,  4.47it/s, loss=1.5533]


Epoch 847 Avg Loss: 1.5532


Epoch 848/1000: 39it [00:08,  4.47it/s, loss=1.5617]


Epoch 848 Avg Loss: 1.5530


Epoch 849/1000: 39it [00:08,  4.47it/s, loss=1.5593]


Epoch 849 Avg Loss: 1.5549


Epoch 850/1000: 39it [00:08,  4.47it/s, loss=1.5594]


Epoch 850 Avg Loss: 1.5516


Epoch 851/1000: 39it [00:08,  4.47it/s, loss=1.5571]


Epoch 851 Avg Loss: 1.5529


Epoch 852/1000: 39it [00:08,  4.48it/s, loss=1.5601]


Epoch 852 Avg Loss: 1.5512


Epoch 853/1000: 39it [00:08,  4.48it/s, loss=1.5563]


Epoch 853 Avg Loss: 1.5510


Epoch 854/1000: 39it [00:08,  4.47it/s, loss=1.5506]


Epoch 854 Avg Loss: 1.5515


Epoch 855/1000: 39it [00:08,  4.47it/s, loss=1.5472]


Epoch 855 Avg Loss: 1.5508


Epoch 856/1000: 39it [00:08,  4.47it/s, loss=1.5532]


Epoch 856 Avg Loss: 1.5520


Epoch 857/1000: 39it [00:08,  4.48it/s, loss=1.5731]


Epoch 857 Avg Loss: 1.5525


Epoch 858/1000: 39it [00:08,  4.47it/s, loss=1.5503]


Epoch 858 Avg Loss: 1.5497


Epoch 859/1000: 39it [00:08,  4.47it/s, loss=1.5651]


Epoch 859 Avg Loss: 1.5528


Epoch 860/1000: 39it [00:08,  4.47it/s, loss=1.5391]


Epoch 860 Avg Loss: 1.5537


Epoch 861/1000: 39it [00:08,  4.47it/s, loss=1.5447]


Epoch 861 Avg Loss: 1.5502


Epoch 862/1000: 39it [00:08,  4.47it/s, loss=1.5483]


Epoch 862 Avg Loss: 1.5516


Epoch 863/1000: 39it [00:08,  4.47it/s, loss=1.5448]


Epoch 863 Avg Loss: 1.5521


Epoch 864/1000: 39it [00:08,  4.47it/s, loss=1.5632]


Epoch 864 Avg Loss: 1.5525


Epoch 865/1000: 39it [00:08,  4.47it/s, loss=1.5537]


Epoch 865 Avg Loss: 1.5527


Epoch 866/1000: 39it [00:08,  4.47it/s, loss=1.5514]


Epoch 866 Avg Loss: 1.5507


Epoch 867/1000: 39it [00:08,  4.47it/s, loss=1.5536]


Epoch 867 Avg Loss: 1.5533


Epoch 868/1000: 39it [00:08,  4.47it/s, loss=1.5459]


Epoch 868 Avg Loss: 1.5491


Epoch 869/1000: 39it [00:08,  4.48it/s, loss=1.5579]


Epoch 869 Avg Loss: 1.5532


Epoch 870/1000: 39it [00:08,  4.48it/s, loss=1.5322]


Epoch 870 Avg Loss: 1.5505


Epoch 871/1000: 39it [00:08,  4.47it/s, loss=1.5549]


Epoch 871 Avg Loss: 1.5500


Epoch 872/1000: 39it [00:08,  4.48it/s, loss=1.5552]


Epoch 872 Avg Loss: 1.5483


Epoch 873/1000: 39it [00:08,  4.47it/s, loss=1.5472]


Epoch 873 Avg Loss: 1.5532


Epoch 874/1000: 39it [00:08,  4.47it/s, loss=1.5355]


Epoch 874 Avg Loss: 1.5505


Epoch 875/1000: 39it [00:08,  4.47it/s, loss=1.5443]


Epoch 875 Avg Loss: 1.5505


Epoch 876/1000: 39it [00:08,  4.47it/s, loss=1.5491]


Epoch 876 Avg Loss: 1.5512


Epoch 877/1000: 39it [00:08,  4.47it/s, loss=1.5541]


Epoch 877 Avg Loss: 1.5515


Epoch 878/1000: 39it [00:08,  4.48it/s, loss=1.5529]


Epoch 878 Avg Loss: 1.5519


Epoch 879/1000: 39it [00:08,  4.47it/s, loss=1.5415]


Epoch 879 Avg Loss: 1.5484


Epoch 880/1000: 39it [00:08,  4.48it/s, loss=1.5613]


Epoch 880 Avg Loss: 1.5489


Epoch 881/1000: 39it [00:08,  4.47it/s, loss=1.5473]


Epoch 881 Avg Loss: 1.5531


Epoch 882/1000: 39it [00:08,  4.48it/s, loss=1.5433]


Epoch 882 Avg Loss: 1.5521


Epoch 883/1000: 39it [00:08,  4.47it/s, loss=1.5494]


Epoch 883 Avg Loss: 1.5494


Epoch 884/1000: 39it [00:08,  4.47it/s, loss=1.5496]


Epoch 884 Avg Loss: 1.5533


Epoch 885/1000: 39it [00:08,  4.47it/s, loss=1.5627]


Epoch 885 Avg Loss: 1.5512


Epoch 886/1000: 39it [00:08,  4.47it/s, loss=1.5535]


Epoch 886 Avg Loss: 1.5530


Epoch 887/1000: 39it [00:08,  4.47it/s, loss=1.5405]


Epoch 887 Avg Loss: 1.5514


Epoch 888/1000: 39it [00:08,  4.48it/s, loss=1.5614]


Epoch 888 Avg Loss: 1.5514


Epoch 889/1000: 39it [00:08,  4.47it/s, loss=1.5576]


Epoch 889 Avg Loss: 1.5514


Epoch 890/1000: 39it [00:08,  4.47it/s, loss=1.5474]


Epoch 890 Avg Loss: 1.5517


Epoch 891/1000: 39it [00:08,  4.48it/s, loss=1.5534]


Epoch 891 Avg Loss: 1.5492


Epoch 892/1000: 39it [00:08,  4.48it/s, loss=1.5564]


Epoch 892 Avg Loss: 1.5527


Epoch 893/1000: 39it [00:08,  4.48it/s, loss=1.5367]


Epoch 893 Avg Loss: 1.5486


Epoch 894/1000: 39it [00:08,  4.47it/s, loss=1.5420]


Epoch 894 Avg Loss: 1.5508


Epoch 895/1000: 39it [00:08,  4.47it/s, loss=1.5395]


Epoch 895 Avg Loss: 1.5516


Epoch 896/1000: 39it [00:08,  4.47it/s, loss=1.5491]


Epoch 896 Avg Loss: 1.5521


Epoch 897/1000: 39it [00:08,  4.48it/s, loss=1.5362]


Epoch 897 Avg Loss: 1.5476


Epoch 898/1000: 39it [00:08,  4.48it/s, loss=1.5715]


Epoch 898 Avg Loss: 1.5526


Epoch 899/1000: 39it [00:08,  4.48it/s, loss=1.5416]


Epoch 899 Avg Loss: 1.5504


Epoch 900/1000: 39it [00:08,  4.48it/s, loss=1.5405]


Epoch 900 Avg Loss: 1.5499


Epoch 901/1000: 39it [00:08,  4.47it/s, loss=1.5464]


Epoch 901 Avg Loss: 1.5502


Epoch 902/1000: 39it [00:08,  4.47it/s, loss=1.5512]


Epoch 902 Avg Loss: 1.5514


Epoch 903/1000: 39it [00:08,  4.48it/s, loss=1.5464]


Epoch 903 Avg Loss: 1.5511


Epoch 904/1000: 39it [00:08,  4.47it/s, loss=1.5638]


Epoch 904 Avg Loss: 1.5510


Epoch 905/1000: 39it [00:08,  4.47it/s, loss=1.5419]


Epoch 905 Avg Loss: 1.5511


Epoch 906/1000: 39it [00:08,  4.47it/s, loss=1.5514]


Epoch 906 Avg Loss: 1.5511


Epoch 907/1000: 39it [00:08,  4.47it/s, loss=1.5402]


Epoch 907 Avg Loss: 1.5521


Epoch 908/1000: 39it [00:08,  4.48it/s, loss=1.5481]


Epoch 908 Avg Loss: 1.5500


Epoch 909/1000: 39it [00:08,  4.48it/s, loss=1.5490]


Epoch 909 Avg Loss: 1.5492


Epoch 910/1000: 39it [00:08,  4.48it/s, loss=1.5567]


Epoch 910 Avg Loss: 1.5504


Epoch 911/1000: 39it [00:08,  4.47it/s, loss=1.5546]


Epoch 911 Avg Loss: 1.5483


Epoch 912/1000: 39it [00:08,  4.48it/s, loss=1.5424]


Epoch 912 Avg Loss: 1.5483


Epoch 913/1000: 39it [00:08,  4.48it/s, loss=1.5482]


Epoch 913 Avg Loss: 1.5502


Epoch 914/1000: 39it [00:08,  4.48it/s, loss=1.5582]


Epoch 914 Avg Loss: 1.5524


Epoch 915/1000: 39it [00:08,  4.47it/s, loss=1.5636]


Epoch 915 Avg Loss: 1.5501


Epoch 916/1000: 39it [00:08,  4.47it/s, loss=1.5393]


Epoch 916 Avg Loss: 1.5492


Epoch 917/1000: 39it [00:08,  4.48it/s, loss=1.5571]


Epoch 917 Avg Loss: 1.5514


Epoch 918/1000: 39it [00:08,  4.48it/s, loss=1.5473]


Epoch 918 Avg Loss: 1.5505


Epoch 919/1000: 39it [00:08,  4.48it/s, loss=1.5383]


Epoch 919 Avg Loss: 1.5488


Epoch 920/1000: 39it [00:08,  4.47it/s, loss=1.5496]


Epoch 920 Avg Loss: 1.5502


Epoch 921/1000: 39it [00:08,  4.48it/s, loss=1.5531]


Epoch 921 Avg Loss: 1.5515


Epoch 922/1000: 39it [00:08,  4.47it/s, loss=1.5440]


Epoch 922 Avg Loss: 1.5469


Epoch 923/1000: 39it [00:08,  4.48it/s, loss=1.5597]


Epoch 923 Avg Loss: 1.5520


Epoch 924/1000: 39it [00:08,  4.47it/s, loss=1.5346]


Epoch 924 Avg Loss: 1.5473


Epoch 925/1000: 39it [00:08,  4.48it/s, loss=1.5416]


Epoch 925 Avg Loss: 1.5489


Epoch 926/1000: 39it [00:08,  4.47it/s, loss=1.5441]


Epoch 926 Avg Loss: 1.5518


Epoch 927/1000: 39it [00:08,  4.47it/s, loss=1.5395]


Epoch 927 Avg Loss: 1.5480


Epoch 928/1000: 39it [00:08,  4.47it/s, loss=1.5503]


Epoch 928 Avg Loss: 1.5492


Epoch 929/1000: 39it [00:08,  4.47it/s, loss=1.5624]


Epoch 929 Avg Loss: 1.5482


Epoch 930/1000: 39it [00:08,  4.47it/s, loss=1.5473]


Epoch 930 Avg Loss: 1.5502


Epoch 931/1000: 39it [00:08,  4.47it/s, loss=1.5405]


Epoch 931 Avg Loss: 1.5479


Epoch 932/1000: 39it [00:08,  4.47it/s, loss=1.5532]


Epoch 932 Avg Loss: 1.5497


Epoch 933/1000: 39it [00:08,  4.47it/s, loss=1.5593]


Epoch 933 Avg Loss: 1.5505


Epoch 934/1000: 39it [00:08,  4.47it/s, loss=1.5504]


Epoch 934 Avg Loss: 1.5493


Epoch 935/1000: 39it [00:08,  4.47it/s, loss=1.5412]


Epoch 935 Avg Loss: 1.5483


Epoch 936/1000: 39it [00:08,  4.47it/s, loss=1.5532]


Epoch 936 Avg Loss: 1.5497


Epoch 937/1000: 39it [00:08,  4.47it/s, loss=1.5611]


Epoch 937 Avg Loss: 1.5522


Epoch 938/1000: 39it [00:08,  4.47it/s, loss=1.5525]


Epoch 938 Avg Loss: 1.5489


Epoch 939/1000: 39it [00:08,  4.47it/s, loss=1.5432]


Epoch 939 Avg Loss: 1.5495


Epoch 940/1000: 39it [00:08,  4.47it/s, loss=1.5429]


Epoch 940 Avg Loss: 1.5474


Epoch 941/1000: 39it [00:08,  4.47it/s, loss=1.5417]


Epoch 941 Avg Loss: 1.5476


Epoch 942/1000: 39it [00:08,  4.47it/s, loss=1.5483]


Epoch 942 Avg Loss: 1.5495


Epoch 943/1000: 39it [00:08,  4.48it/s, loss=1.5355]


Epoch 943 Avg Loss: 1.5471


Epoch 944/1000: 39it [00:08,  4.47it/s, loss=1.5587]


Epoch 944 Avg Loss: 1.5456


Epoch 945/1000: 39it [00:08,  4.47it/s, loss=1.5466]


Epoch 945 Avg Loss: 1.5516


Epoch 946/1000: 39it [00:08,  4.48it/s, loss=1.5553]


Epoch 946 Avg Loss: 1.5472


Epoch 947/1000: 39it [00:08,  4.47it/s, loss=1.5323]


Epoch 947 Avg Loss: 1.5480


Epoch 948/1000: 39it [00:08,  4.48it/s, loss=1.5477]


Epoch 948 Avg Loss: 1.5484


Epoch 949/1000: 39it [00:08,  4.48it/s, loss=1.5307]


Epoch 949 Avg Loss: 1.5456


Epoch 950/1000: 39it [00:08,  4.48it/s, loss=1.5342]


Epoch 950 Avg Loss: 1.5487


Epoch 951/1000: 39it [00:08,  4.47it/s, loss=1.5460]


Epoch 951 Avg Loss: 1.5490


Epoch 952/1000: 39it [00:08,  4.48it/s, loss=1.5530]


Epoch 952 Avg Loss: 1.5505


Epoch 953/1000: 39it [00:08,  4.47it/s, loss=1.5541]


Epoch 953 Avg Loss: 1.5471


Epoch 954/1000: 39it [00:08,  4.47it/s, loss=1.5527]


Epoch 954 Avg Loss: 1.5489


Epoch 955/1000: 39it [00:08,  4.47it/s, loss=1.5568]


Epoch 955 Avg Loss: 1.5508


Epoch 956/1000: 39it [00:08,  4.47it/s, loss=1.5369]


Epoch 956 Avg Loss: 1.5484


Epoch 957/1000: 39it [00:08,  4.47it/s, loss=1.5416]


Epoch 957 Avg Loss: 1.5494


Epoch 958/1000: 39it [00:08,  4.47it/s, loss=1.5341]


Epoch 958 Avg Loss: 1.5479


Epoch 959/1000: 39it [00:08,  4.47it/s, loss=1.5537]


Epoch 959 Avg Loss: 1.5516


Epoch 960/1000: 39it [00:08,  4.47it/s, loss=1.5389]


Epoch 960 Avg Loss: 1.5515


Epoch 961/1000: 39it [00:08,  4.47it/s, loss=1.5610]


Epoch 961 Avg Loss: 1.5459


Epoch 962/1000: 39it [00:08,  4.47it/s, loss=1.5604]


Epoch 962 Avg Loss: 1.5498


Epoch 963/1000: 39it [00:08,  4.47it/s, loss=1.5376]


Epoch 963 Avg Loss: 1.5480


Epoch 964/1000: 39it [00:08,  4.47it/s, loss=1.5507]


Epoch 964 Avg Loss: 1.5503


Epoch 965/1000: 39it [00:08,  4.47it/s, loss=1.5487]


Epoch 965 Avg Loss: 1.5501


Epoch 966/1000: 39it [00:08,  4.47it/s, loss=1.5333]


Epoch 966 Avg Loss: 1.5479


Epoch 967/1000: 39it [00:08,  4.47it/s, loss=1.5429]


Epoch 967 Avg Loss: 1.5468


Epoch 968/1000: 39it [00:08,  4.47it/s, loss=1.5508]


Epoch 968 Avg Loss: 1.5462


Epoch 969/1000: 39it [00:08,  4.47it/s, loss=1.5544]


Epoch 969 Avg Loss: 1.5487


Epoch 970/1000: 39it [00:08,  4.47it/s, loss=1.5553]


Epoch 970 Avg Loss: 1.5496


Epoch 971/1000: 39it [00:08,  4.48it/s, loss=1.5596]


Epoch 971 Avg Loss: 1.5480


Epoch 972/1000: 39it [00:08,  4.47it/s, loss=1.5540]


Epoch 972 Avg Loss: 1.5481


Epoch 973/1000: 39it [00:08,  4.47it/s, loss=1.5386]


Epoch 973 Avg Loss: 1.5504


Epoch 974/1000: 39it [00:08,  4.47it/s, loss=1.5575]


Epoch 974 Avg Loss: 1.5473


Epoch 975/1000: 39it [00:08,  4.47it/s, loss=1.5363]


Epoch 975 Avg Loss: 1.5495


Epoch 976/1000: 39it [00:08,  4.47it/s, loss=1.5485]


Epoch 976 Avg Loss: 1.5483


Epoch 977/1000: 39it [00:08,  4.47it/s, loss=1.5578]


Epoch 977 Avg Loss: 1.5464


Epoch 978/1000: 39it [00:08,  4.47it/s, loss=1.5387]


Epoch 978 Avg Loss: 1.5488


Epoch 979/1000: 39it [00:08,  4.47it/s, loss=1.5345]


Epoch 979 Avg Loss: 1.5475


Epoch 980/1000: 39it [00:08,  4.48it/s, loss=1.5532]


Epoch 980 Avg Loss: 1.5489


Epoch 981/1000: 39it [00:08,  4.48it/s, loss=1.5527]


Epoch 981 Avg Loss: 1.5482


Epoch 982/1000: 39it [00:08,  4.47it/s, loss=1.5405]


Epoch 982 Avg Loss: 1.5476


Epoch 983/1000: 39it [00:08,  4.47it/s, loss=1.5311]


Epoch 983 Avg Loss: 1.5461


Epoch 984/1000: 39it [00:08,  4.47it/s, loss=1.7654]


Epoch 984 Avg Loss: 1.8612


Epoch 985/1000: 39it [00:08,  4.47it/s, loss=1.6261]


Epoch 985 Avg Loss: 1.6576


Epoch 986/1000: 39it [00:08,  4.47it/s, loss=1.6315]


Epoch 986 Avg Loss: 1.6203


Epoch 987/1000: 39it [00:08,  4.47it/s, loss=1.6059]


Epoch 987 Avg Loss: 1.6029


Epoch 988/1000: 39it [00:08,  4.47it/s, loss=1.5799]


Epoch 988 Avg Loss: 1.5941


Epoch 989/1000: 39it [00:08,  4.47it/s, loss=1.5902]


Epoch 989 Avg Loss: 1.5913


Epoch 990/1000: 39it [00:08,  4.47it/s, loss=1.5765]


Epoch 990 Avg Loss: 1.5828


Epoch 991/1000: 39it [00:08,  4.47it/s, loss=1.5771]


Epoch 991 Avg Loss: 1.5803


Epoch 992/1000: 39it [00:08,  4.47it/s, loss=1.5762]


Epoch 992 Avg Loss: 1.5709


Epoch 993/1000: 39it [00:08,  4.48it/s, loss=1.5672]


Epoch 993 Avg Loss: 1.5642


Epoch 994/1000: 39it [00:08,  4.47it/s, loss=1.5520]


Epoch 994 Avg Loss: 1.5530


Epoch 995/1000: 39it [00:08,  4.48it/s, loss=1.5505]


Epoch 995 Avg Loss: 1.5497


Epoch 996/1000: 39it [00:08,  4.47it/s, loss=1.5464]


Epoch 996 Avg Loss: 1.5515


Epoch 997/1000: 39it [00:08,  4.47it/s, loss=1.5639]


Epoch 997 Avg Loss: 1.5491


Epoch 998/1000: 39it [00:08,  4.47it/s, loss=1.5566]


Epoch 998 Avg Loss: 1.5465


Epoch 999/1000: 39it [00:08,  4.47it/s, loss=1.5553]


Epoch 999 Avg Loss: 1.5498


Epoch 1000/1000: 39it [00:08,  4.48it/s, loss=1.5361]

Epoch 1000 Avg Loss: 1.5475





In [91]:
oneshot_predicted_token_grids = generate_trajectory(
    oneshot_state, history_len, tokens_per_frame, key, all_tokens[:history_len].flatten(), len(all_tokens)-history_len
)

predicted_frames, check_decoder = animate_prediction(
    'onehot_predicted_traj.mp4',
    orig_imgs,
    all_frames,
    all_tokens,
    oneshot_predicted_token_grids,
    get_tokens_fn,
    decode_fn,
    history_len,
)

🎥 Generating prediction and creating animation...
✅ Animation saved to 'onehot_predicted_traj.mp4'


In [92]:
oneshot_metrics = metrics(
    oneshot_predicted_token_grids,
    all_tokens[history_len:],
    predicted_frames,
    check_decoder[history_len:],
)
np.savez_compressed("frame_autoregressive_metrics.npz", **oneshot_metrics)

Tokens Correct (%) in the 0th predicted frame: 41.40625%
Tokens Correct (%) in the 1th predicted frame: 20.3125%
Tokens Correct (%) in the 2th predicted frame: 16.40625%
Tokens Correct (%) in the 3th predicted frame: 13.671875%
Tokens Correct (%) in the 4th predicted frame: 12.890625%
Tokens Correct (%) in the 5th predicted frame: 11.71875%
Tokens Correct (%) in the 6th predicted frame: 8.984375%
Tokens Correct (%) in the 7th predicted frame: 7.421875%
Tokens Correct (%) in the 8th predicted frame: 7.03125%
Tokens Correct (%) in the 9th predicted frame: 6.640625%
Tokens Correct (%) in the 10th predicted frame: 6.25%
Tokens Correct (%) in the 11th predicted frame: 6.25%
Tokens Correct (%) in the 12th predicted frame: 5.859375%
Tokens Correct (%) in the 13th predicted frame: 4.6875%
Tokens Correct (%) in the 14th predicted frame: 5.078125%
Tokens Correct (%) in the 15th predicted frame: 4.6875%
Tokens Correct (%) in the 16th predicted frame: 4.6875%
Tokens Correct (%) in the 17th predict

### Plot

In [93]:
transformer_metrics = {}
for fn in os.listdir():
  if fn.endswith("_metrics.npz"):
    transformer_metrics[fn.split("_metrics.npz")[0]] = dict(np.load(fn))

# 1. Get the structure of the dictionary
outer_keys = list(transformer_metrics.keys())
# Get inner keys from the first entry, assuming the structure is consistent
inner_keys = list(transformer_metrics[outer_keys[0]].keys())
num_subplots = len(inner_keys)

# 2. Create a figure with one subplot for each inner key
fig, axes = plt.subplots(1, num_subplots, figsize=(8 * num_subplots, 5))
# If there's only one subplot, `plt.subplots` returns a single axis object
if num_subplots == 1:
    axes = [axes]

# 3. Iterate through each metric (e.g., 'Tokens Correct') to create a subplot
for ax, inner_key in zip(axes, inner_keys):

    # 4. On each subplot, iterate through each model type (e.g., 'autoregressive') to plot its line
    for outer_key in outer_keys:
        # Get the data array for the current line
        data = transformer_metrics[outer_key][inner_key]
        x_axis = np.arange(len(data))

        # Plot the data, using the outer key as the label for the legend
        ax.plot(x_axis, data, label=outer_key.capitalize(), marker='o', linestyle='-')

    # 5. Customize the subplot
    ax.set_title(f'Metric: "{inner_key}"', fontsize=14)
    ax.set_xlabel('Frame #', fontsize=12)
    ax.set_ylabel('Value', fontsize=12)
    ax.grid(True, linestyle='--', alpha=0.6)
    ax.legend(title="Model Type")

# 6. Final adjustments and display
plt.suptitle('Comparison of Transformer Metrics', fontsize=16, y=1.02)
plt.tight_layout()
plt.savefig("metrics.png")
plt.close(fig)

In [None]:
# from google.colab import runtime
# runtime.unassign()

In [79]:
checkpoints.save_checkpoint(
            ckpt_dir=os_ckpt_dir,
            target=oneshot_state,
            step=oneshot_state.step,
            overwrite=False, # Set to True to save only the latest checkpoint
            keep=3          # Keep the 3 most recent checkpoints
        )

'/content/drive/MyDrive/Colab Notebooks/VQ-Dynamics/oneshot_transformer_checkpoints/checkpoint_40399'

In [3]:
# !zip -r all_files.zip .

  adding: encode.py (deflated 69%)
  adding: __pycache__/ (stored 0%)
  adding: __pycache__/vqvae.cpython-311.pyc (deflated 53%)
  adding: images_jax/ (stored 0%)
  adding: images_jax/orig_frame_0000.png (deflated 4%)
  adding: images_jax/recon_frame_0000.png (deflated 3%)
  adding: images_jax/recon_frame_0002.png (deflated 4%)
  adding: images_jax/orig_frame_0001.png (deflated 3%)
  adding: images_jax/orig_frame_0003.png (deflated 2%)
  adding: images_jax/recon_frame_0001.png (deflated 4%)
  adding: images_jax/orig_frame_0002.png (deflated 3%)
  adding: images_jax/recon_frame_0003.png (deflated 3%)
  adding: images_jax/orig_frame_0004.png (deflated 2%)
  adding: images_jax/recon_frame_0004.png (deflated 3%)
  adding: images_jax/recon_frame_0005.png (deflated 4%)
  adding: images_jax/orig_frame_0005.png (deflated 3%)
  adding: images_jax/orig_frame_0006.png (deflated 2%)
  adding: images_jax/recon_frame_0006.png (deflated 4%)
  adding: images_jax/orig_frame_0007.png (deflated 2%)
  add

In [None]:
# from google.colab import files
# files.download('all_files.zip')