In [1]:
%load_ext autoreload
# %reload_ext autoreload
%autoreload 2


In [2]:
from datetime import datetime

import torch
from torch.utils.tensorboard import SummaryWriter

from methylVA.mnist.model import VAE
from methylVA.mnist.training import train, test


batch_size = 128
learning_rate = 1e-3
weight_decay = 1e-2
num_epochs = 50
latent_dim = 2
hidden_dim = 512
name = 'VAE_MNIST_UNIFORM_NOISE'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = VAE(input_dim=784, latent_dim=latent_dim, hidden_dim=hidden_dim).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
writer_train = SummaryWriter(f'../experiments/{name}/train/{datetime.now().strftime("%Y%m%d-%H%M%S")}')
writer_test = SummaryWriter(f'../experiments/{name}/test/{datetime.now().strftime("%Y%m%d-%H%M%S")}')


In [3]:
from methylVA.mnist.dataset import get_uniform_data_loaders
train_loader, test_loader = get_uniform_data_loaders()

In [4]:
from methylVA.mnist.training import train, test


prev_updates = 0
for epoch in range(num_epochs):
    print(f'Epoch {epoch + 1}/{num_epochs}')
    prev_updates = train(model, train_loader, optimizer, prev_updates, writer=writer_train)
    test(model, test_loader, prev_updates, writer=writer_test)

Epoch 1/50


  3%|▎         | 13/469 [00:00<00:14, 31.85it/s]

Step 0, (N samples: 0), Loss: 543.7794, (Recon: 543.5255, KL: 0.2538), Gradient norm: 1.0683


 26%|██▌       | 121/469 [00:01<00:03, 113.58it/s]

Step 100, (N samples: 12,800), Loss: 543.4340, (Recon: 543.4337, KL: 0.0003), Gradient norm: 0.7749


 46%|████▋     | 217/469 [00:02<00:02, 117.36it/s]

Step 200, (N samples: 25,600), Loss: 543.4432, (Recon: 543.4429, KL: 0.0003), Gradient norm: 0.7441


 67%|██████▋   | 313/469 [00:03<00:01, 117.92it/s]

Step 300, (N samples: 38,400), Loss: 543.4442, (Recon: 543.4440, KL: 0.0002), Gradient norm: 0.7770


 90%|████████▉ | 421/469 [00:03<00:00, 116.27it/s]

Step 400, (N samples: 51,200), Loss: 543.4294, (Recon: 543.4293, KL: 0.0001), Gradient norm: 0.7470


100%|██████████| 469/469 [00:04<00:00, 106.32it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 157.75it/s]


====> Test set loss: 543.4394, (BCE: 543.4393, KLD: 0.0001)
Epoch 2/50


 12%|█▏        | 55/469 [00:00<00:03, 111.43it/s]

Step 500, (N samples: 64,000), Loss: 543.4510, (Recon: 543.4509, KL: 0.0001), Gradient norm: 0.7928


 32%|███▏      | 151/469 [00:01<00:02, 118.03it/s]

Step 600, (N samples: 76,800), Loss: 543.4564, (Recon: 543.4563, KL: 0.0001), Gradient norm: 0.8236


 53%|█████▎    | 247/469 [00:02<00:01, 117.72it/s]

Step 700, (N samples: 89,600), Loss: 543.4474, (Recon: 543.4473, KL: 0.0001), Gradient norm: 0.7959


 76%|███████▌  | 355/469 [00:03<00:00, 118.04it/s]

Step 800, (N samples: 102,400), Loss: 543.4265, (Recon: 543.4263, KL: 0.0001), Gradient norm: 0.7684


 96%|█████████▌| 451/469 [00:03<00:00, 118.12it/s]

Step 900, (N samples: 115,200), Loss: 543.4423, (Recon: 543.4423, KL: 0.0001), Gradient norm: 0.7947


100%|██████████| 469/469 [00:04<00:00, 116.02it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.64it/s]


====> Test set loss: 543.4357, (BCE: 543.4357, KLD: 0.0001)
Epoch 3/50


 17%|█▋        | 79/469 [00:00<00:03, 115.81it/s]

Step 1,000, (N samples: 128,000), Loss: 543.4374, (Recon: 543.4374, KL: 0.0001), Gradient norm: 0.7990


 37%|███▋      | 175/469 [00:01<00:02, 117.18it/s]

Step 1,100, (N samples: 140,800), Loss: 543.4415, (Recon: 543.4414, KL: 0.0000), Gradient norm: 0.7842


 60%|██████    | 283/469 [00:02<00:01, 118.62it/s]

Step 1,200, (N samples: 153,600), Loss: 543.4367, (Recon: 543.4366, KL: 0.0001), Gradient norm: 0.7636


 81%|████████  | 379/469 [00:03<00:00, 118.63it/s]

Step 1,300, (N samples: 166,400), Loss: 543.4348, (Recon: 543.4348, KL: 0.0000), Gradient norm: 0.8176


100%|██████████| 469/469 [00:04<00:00, 116.69it/s]


Step 1,400, (N samples: 179,200), Loss: 543.4302, (Recon: 543.4302, KL: 0.0000), Gradient norm: 0.7509


Testing: 100%|██████████| 79/79 [00:00<00:00, 156.26it/s]


====> Test set loss: 543.4392, (BCE: 543.4392, KLD: 0.0000)
Epoch 4/50


 25%|██▍       | 115/469 [00:01<00:03, 116.85it/s]

Step 1,500, (N samples: 192,000), Loss: 543.4357, (Recon: 543.4357, KL: 0.0000), Gradient norm: 0.7572


 45%|████▍     | 211/469 [00:01<00:02, 116.60it/s]

Step 1,600, (N samples: 204,800), Loss: 543.4271, (Recon: 543.4270, KL: 0.0001), Gradient norm: 0.7549


 65%|██████▌   | 307/469 [00:02<00:01, 115.99it/s]

Step 1,700, (N samples: 217,600), Loss: 543.4346, (Recon: 543.4346, KL: 0.0000), Gradient norm: 0.7584


 88%|████████▊ | 415/469 [00:03<00:00, 116.73it/s]

Step 1,800, (N samples: 230,400), Loss: 543.4356, (Recon: 543.4355, KL: 0.0000), Gradient norm: 0.7509


100%|██████████| 469/469 [00:04<00:00, 115.44it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.58it/s]


====> Test set loss: 543.4354, (BCE: 543.4353, KLD: 0.0001)
Epoch 5/50


  9%|▉         | 43/469 [00:00<00:03, 107.74it/s]

Step 1,900, (N samples: 243,200), Loss: 543.4265, (Recon: 543.4265, KL: 0.0001), Gradient norm: 0.7673


 30%|██▉       | 139/469 [00:01<00:02, 117.38it/s]

Step 2,000, (N samples: 256,000), Loss: 543.4405, (Recon: 543.4404, KL: 0.0000), Gradient norm: 0.7507


 53%|█████▎    | 247/469 [00:02<00:01, 118.55it/s]

Step 2,100, (N samples: 268,800), Loss: 543.4329, (Recon: 543.4329, KL: 0.0001), Gradient norm: 0.7522


 73%|███████▎  | 343/469 [00:02<00:01, 118.22it/s]

Step 2,200, (N samples: 281,600), Loss: 543.4463, (Recon: 543.4462, KL: 0.0001), Gradient norm: 0.7812


 94%|█████████▎| 439/469 [00:03<00:00, 118.41it/s]

Step 2,300, (N samples: 294,400), Loss: 543.4428, (Recon: 543.4428, KL: 0.0000), Gradient norm: 0.8073


100%|██████████| 469/469 [00:04<00:00, 116.20it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.36it/s]


====> Test set loss: 543.4365, (BCE: 543.4364, KLD: 0.0000)
Epoch 6/50


 17%|█▋        | 79/469 [00:00<00:03, 114.05it/s]

Step 2,400, (N samples: 307,200), Loss: 543.4368, (Recon: 543.4368, KL: 0.0000), Gradient norm: 0.7415


 37%|███▋      | 175/469 [00:01<00:02, 118.07it/s]

Step 2,500, (N samples: 320,000), Loss: 543.4437, (Recon: 543.4437, KL: 0.0000), Gradient norm: 0.7647


 58%|█████▊    | 271/469 [00:02<00:01, 117.93it/s]

Step 2,600, (N samples: 332,800), Loss: 543.4250, (Recon: 543.4250, KL: 0.0000), Gradient norm: 0.7882


 81%|████████  | 379/469 [00:03<00:00, 118.26it/s]

Step 2,700, (N samples: 345,600), Loss: 543.4344, (Recon: 543.4344, KL: 0.0000), Gradient norm: 0.7600


100%|██████████| 469/469 [00:04<00:00, 116.19it/s]


Step 2,800, (N samples: 358,400), Loss: 543.4482, (Recon: 543.4482, KL: 0.0000), Gradient norm: 0.7923


Testing: 100%|██████████| 79/79 [00:00<00:00, 158.61it/s]


====> Test set loss: 543.4338, (BCE: 543.4338, KLD: 0.0000)
Epoch 7/50


 22%|██▏       | 103/469 [00:00<00:03, 116.69it/s]

Step 2,900, (N samples: 371,200), Loss: 543.4324, (Recon: 543.4324, KL: 0.0000), Gradient norm: 0.7594


 42%|████▏     | 199/469 [00:01<00:02, 116.69it/s]

Step 3,000, (N samples: 384,000), Loss: 543.4334, (Recon: 543.4334, KL: 0.0000), Gradient norm: 0.7465


 65%|██████▌   | 307/469 [00:02<00:01, 117.65it/s]

Step 3,100, (N samples: 396,800), Loss: 543.4365, (Recon: 543.4365, KL: 0.0000), Gradient norm: 0.7863


 86%|████████▌ | 403/469 [00:03<00:00, 118.36it/s]

Step 3,200, (N samples: 409,600), Loss: 543.4339, (Recon: 543.4339, KL: 0.0000), Gradient norm: 0.8137


100%|██████████| 469/469 [00:04<00:00, 116.16it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.00it/s]


====> Test set loss: 543.4351, (BCE: 543.4351, KLD: 0.0000)
Epoch 8/50


  6%|▋         | 30/469 [00:00<00:04, 100.78it/s]

Step 3,300, (N samples: 422,400), Loss: 543.4303, (Recon: 543.4303, KL: 0.0000), Gradient norm: 0.7819


 29%|██▉       | 138/469 [00:01<00:02, 116.00it/s]

Step 3,400, (N samples: 435,200), Loss: 543.4336, (Recon: 543.4336, KL: 0.0000), Gradient norm: 0.8116


 50%|████▉     | 234/469 [00:02<00:02, 116.64it/s]

Step 3,500, (N samples: 448,000), Loss: 543.4289, (Recon: 543.4289, KL: 0.0000), Gradient norm: 0.8281


 70%|███████   | 330/469 [00:02<00:01, 117.87it/s]

Step 3,600, (N samples: 460,800), Loss: 543.4372, (Recon: 543.4371, KL: 0.0000), Gradient norm: 0.8008


 93%|█████████▎| 438/469 [00:03<00:00, 118.20it/s]

Step 3,700, (N samples: 473,600), Loss: 543.4410, (Recon: 543.4410, KL: 0.0000), Gradient norm: 0.7765


100%|██████████| 469/469 [00:04<00:00, 115.71it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 157.40it/s]


====> Test set loss: 543.4357, (BCE: 543.4357, KLD: 0.0000)
Epoch 9/50


 14%|█▍        | 65/469 [00:00<00:03, 112.01it/s]

Step 3,800, (N samples: 486,400), Loss: 543.4290, (Recon: 543.4290, KL: 0.0000), Gradient norm: 0.8046


 34%|███▍      | 161/469 [00:01<00:02, 114.22it/s]

Step 3,900, (N samples: 499,200), Loss: 543.4448, (Recon: 543.4448, KL: 0.0000), Gradient norm: 0.8392


 57%|█████▋    | 269/469 [00:02<00:01, 117.54it/s]

Step 4,000, (N samples: 512,000), Loss: 543.4235, (Recon: 543.4235, KL: 0.0000), Gradient norm: 0.7875


 78%|███████▊  | 365/469 [00:03<00:00, 117.95it/s]

Step 4,100, (N samples: 524,800), Loss: 543.4406, (Recon: 543.4405, KL: 0.0001), Gradient norm: 0.8199


100%|██████████| 469/469 [00:04<00:00, 115.11it/s]


Step 4,200, (N samples: 537,600), Loss: 543.4417, (Recon: 543.4417, KL: 0.0000), Gradient norm: 0.8122


Testing: 100%|██████████| 79/79 [00:00<00:00, 157.99it/s]


====> Test set loss: 543.4340, (BCE: 543.4340, KLD: 0.0000)
Epoch 10/50


 22%|██▏       | 103/469 [00:00<00:03, 117.31it/s]

Step 4,300, (N samples: 550,400), Loss: 543.4324, (Recon: 543.4324, KL: 0.0000), Gradient norm: 0.7531


 42%|████▏     | 199/469 [00:01<00:02, 118.21it/s]

Step 4,400, (N samples: 563,200), Loss: 543.4240, (Recon: 543.4240, KL: 0.0000), Gradient norm: 0.7873


 63%|██████▎   | 295/469 [00:02<00:01, 117.17it/s]

Step 4,500, (N samples: 576,000), Loss: 543.4376, (Recon: 543.4376, KL: 0.0000), Gradient norm: 0.7875


 86%|████████▌ | 403/469 [00:03<00:00, 117.28it/s]

Step 4,600, (N samples: 588,800), Loss: 543.4367, (Recon: 543.4366, KL: 0.0000), Gradient norm: 0.7995


100%|██████████| 469/469 [00:04<00:00, 116.22it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.83it/s]


====> Test set loss: 543.4355, (BCE: 543.4355, KLD: 0.0000)
Epoch 11/50


  2%|▏         | 9/469 [00:00<00:05, 83.97it/s]

Step 4,700, (N samples: 601,600), Loss: 543.4393, (Recon: 543.4393, KL: 0.0000), Gradient norm: 0.7739


 27%|██▋       | 126/469 [00:01<00:02, 117.63it/s]

Step 4,800, (N samples: 614,400), Loss: 543.4340, (Recon: 543.4340, KL: 0.0000), Gradient norm: 0.8352


 50%|████▉     | 234/469 [00:02<00:02, 117.42it/s]

Step 4,900, (N samples: 627,200), Loss: 543.4312, (Recon: 543.4312, KL: 0.0000), Gradient norm: 0.8597


 70%|███████   | 330/469 [00:02<00:01, 117.69it/s]

Step 5,000, (N samples: 640,000), Loss: 543.4393, (Recon: 543.4392, KL: 0.0000), Gradient norm: 0.8398


 91%|█████████ | 426/469 [00:03<00:00, 117.12it/s]

Step 5,100, (N samples: 652,800), Loss: 543.4363, (Recon: 543.4363, KL: 0.0000), Gradient norm: 0.8154


100%|██████████| 469/469 [00:04<00:00, 115.65it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.56it/s]


====> Test set loss: 543.4352, (BCE: 543.4352, KLD: 0.0001)
Epoch 12/50


 12%|█▏        | 55/469 [00:00<00:03, 112.12it/s]

Step 5,200, (N samples: 665,600), Loss: 543.4233, (Recon: 543.4233, KL: 0.0000), Gradient norm: 0.8395


 35%|███▍      | 163/469 [00:01<00:02, 117.05it/s]

Step 5,300, (N samples: 678,400), Loss: 543.4321, (Recon: 543.4321, KL: 0.0000), Gradient norm: 0.8141


 55%|█████▌    | 259/469 [00:02<00:01, 117.75it/s]

Step 5,400, (N samples: 691,200), Loss: 543.4338, (Recon: 543.4338, KL: 0.0000), Gradient norm: 0.8280


 76%|███████▌  | 355/469 [00:03<00:00, 118.06it/s]

Step 5,500, (N samples: 704,000), Loss: 543.4320, (Recon: 543.4320, KL: 0.0000), Gradient norm: 0.7784


 99%|█████████▊| 463/469 [00:03<00:00, 116.76it/s]

Step 5,600, (N samples: 716,800), Loss: 543.4337, (Recon: 543.4337, KL: 0.0000), Gradient norm: 0.8701


100%|██████████| 469/469 [00:04<00:00, 115.73it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.64it/s]


====> Test set loss: 543.4361, (BCE: 543.4361, KLD: 0.0000)
Epoch 13/50


 19%|█▉        | 91/469 [00:00<00:03, 116.64it/s]

Step 5,700, (N samples: 729,600), Loss: 543.4223, (Recon: 543.4223, KL: 0.0000), Gradient norm: 0.8387


 40%|███▉      | 187/469 [00:01<00:02, 118.59it/s]

Step 5,800, (N samples: 742,400), Loss: 543.4503, (Recon: 543.4501, KL: 0.0001), Gradient norm: 0.8790


 63%|██████▎   | 295/469 [00:02<00:01, 118.80it/s]

Step 5,900, (N samples: 755,200), Loss: 543.4368, (Recon: 543.4368, KL: 0.0000), Gradient norm: 0.8615


 83%|████████▎ | 391/469 [00:03<00:00, 117.45it/s]

Step 6,000, (N samples: 768,000), Loss: 543.4236, (Recon: 543.4236, KL: 0.0001), Gradient norm: 0.8435


100%|██████████| 469/469 [00:04<00:00, 116.64it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 157.17it/s]


====> Test set loss: 543.4359, (BCE: 543.4359, KLD: 0.0000)
Epoch 14/50


  2%|▏         | 9/469 [00:00<00:05, 82.87it/s]

Step 6,100, (N samples: 780,800), Loss: 543.4372, (Recon: 543.4372, KL: 0.0000), Gradient norm: 0.8545


 27%|██▋       | 127/469 [00:01<00:02, 117.65it/s]

Step 6,200, (N samples: 793,600), Loss: 543.4401, (Recon: 543.4401, KL: 0.0000), Gradient norm: 0.8389


 48%|████▊     | 223/469 [00:01<00:02, 118.60it/s]

Step 6,300, (N samples: 806,400), Loss: 543.4314, (Recon: 543.4314, KL: 0.0000), Gradient norm: 0.8855


 68%|██████▊   | 319/469 [00:02<00:01, 118.66it/s]

Step 6,400, (N samples: 819,200), Loss: 543.4372, (Recon: 543.4371, KL: 0.0001), Gradient norm: 0.8481


 91%|█████████ | 427/469 [00:03<00:00, 118.28it/s]

Step 6,500, (N samples: 832,000), Loss: 543.4346, (Recon: 543.4346, KL: 0.0000), Gradient norm: 0.8742


100%|██████████| 469/469 [00:04<00:00, 116.68it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 157.10it/s]


====> Test set loss: 543.4371, (BCE: 543.4370, KLD: 0.0001)
Epoch 15/50


 12%|█▏        | 55/469 [00:00<00:03, 111.19it/s]

Step 6,600, (N samples: 844,800), Loss: 543.4364, (Recon: 543.4364, KL: 0.0000), Gradient norm: 0.9076


 32%|███▏      | 151/469 [00:01<00:02, 118.20it/s]

Step 6,700, (N samples: 857,600), Loss: 543.4344, (Recon: 543.4343, KL: 0.0001), Gradient norm: 0.8546


 53%|█████▎    | 247/469 [00:02<00:01, 118.67it/s]

Step 6,800, (N samples: 870,400), Loss: 543.4332, (Recon: 543.4331, KL: 0.0001), Gradient norm: 0.8282


 76%|███████▌  | 355/469 [00:03<00:00, 118.09it/s]

Step 6,900, (N samples: 883,200), Loss: 543.4319, (Recon: 543.4319, KL: 0.0000), Gradient norm: 0.8147


 96%|█████████▌| 451/469 [00:03<00:00, 117.87it/s]

Step 7,000, (N samples: 896,000), Loss: 543.4388, (Recon: 543.4388, KL: 0.0000), Gradient norm: 0.8158


100%|██████████| 469/469 [00:04<00:00, 116.45it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 112.58it/s]


====> Test set loss: 543.4355, (BCE: 543.4355, KLD: 0.0000)
Epoch 16/50


 18%|█▊        | 84/469 [00:00<00:03, 103.57it/s]

Step 7,100, (N samples: 908,800), Loss: 543.4440, (Recon: 543.4440, KL: 0.0000), Gradient norm: 0.8222


 38%|███▊      | 180/469 [00:01<00:02, 117.41it/s]

Step 7,200, (N samples: 921,600), Loss: 543.4491, (Recon: 543.4491, KL: 0.0000), Gradient norm: 0.8217


 61%|██████▏   | 288/469 [00:02<00:01, 117.78it/s]

Step 7,300, (N samples: 934,400), Loss: 543.4427, (Recon: 543.4426, KL: 0.0001), Gradient norm: 0.8760


 82%|████████▏ | 384/469 [00:03<00:00, 118.58it/s]

Step 7,400, (N samples: 947,200), Loss: 543.4419, (Recon: 543.4419, KL: 0.0000), Gradient norm: 0.8451


100%|██████████| 469/469 [00:04<00:00, 111.31it/s]


Step 7,500, (N samples: 960,000), Loss: 543.4275, (Recon: 543.4275, KL: 0.0000), Gradient norm: 0.8779


Testing: 100%|██████████| 79/79 [00:00<00:00, 158.70it/s]


====> Test set loss: 543.4368, (BCE: 543.4368, KLD: 0.0000)
Epoch 17/50


 25%|██▍       | 115/469 [00:01<00:03, 117.32it/s]

Step 7,600, (N samples: 972,800), Loss: 543.4515, (Recon: 543.4515, KL: 0.0000), Gradient norm: 0.8859


 45%|████▍     | 211/469 [00:01<00:02, 118.11it/s]

Step 7,700, (N samples: 985,600), Loss: 543.4368, (Recon: 543.4368, KL: 0.0001), Gradient norm: 0.8477


 68%|██████▊   | 319/469 [00:02<00:01, 118.62it/s]

Step 7,800, (N samples: 998,400), Loss: 543.4301, (Recon: 543.4301, KL: 0.0000), Gradient norm: 0.8253


 88%|████████▊ | 415/469 [00:03<00:00, 114.66it/s]

Step 7,900, (N samples: 1,011,200), Loss: 543.4404, (Recon: 543.4404, KL: 0.0000), Gradient norm: 0.8751


100%|██████████| 469/469 [00:04<00:00, 116.02it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 159.04it/s]


====> Test set loss: 543.4376, (BCE: 543.4376, KLD: 0.0000)
Epoch 18/50


  9%|▉         | 43/469 [00:00<00:04, 106.39it/s]

Step 8,000, (N samples: 1,024,000), Loss: 543.4418, (Recon: 543.4418, KL: 0.0000), Gradient norm: 0.8267


 32%|███▏      | 151/469 [00:01<00:02, 116.35it/s]

Step 8,100, (N samples: 1,036,800), Loss: 543.4243, (Recon: 543.4243, KL: 0.0000), Gradient norm: 0.8554


 53%|█████▎    | 247/469 [00:02<00:01, 117.85it/s]

Step 8,200, (N samples: 1,049,600), Loss: 543.4362, (Recon: 543.4362, KL: 0.0000), Gradient norm: 0.8690


 73%|███████▎  | 343/469 [00:02<00:01, 117.95it/s]

Step 8,300, (N samples: 1,062,400), Loss: 543.4329, (Recon: 543.4324, KL: 0.0006), Gradient norm: 0.8851


 96%|█████████▌| 451/469 [00:03<00:00, 118.60it/s]

Step 8,400, (N samples: 1,075,200), Loss: 543.4354, (Recon: 543.4354, KL: 0.0000), Gradient norm: 0.8488


100%|██████████| 469/469 [00:04<00:00, 115.92it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.68it/s]


====> Test set loss: 543.4362, (BCE: 543.4362, KLD: 0.0000)
Epoch 19/50


 17%|█▋        | 79/469 [00:00<00:03, 115.56it/s]

Step 8,500, (N samples: 1,088,000), Loss: 543.4500, (Recon: 543.4500, KL: 0.0000), Gradient norm: 0.8618


 37%|███▋      | 175/469 [00:01<00:02, 118.36it/s]

Step 8,600, (N samples: 1,100,800), Loss: 543.4263, (Recon: 543.4263, KL: 0.0000), Gradient norm: 0.8422


 58%|█████▊    | 271/469 [00:02<00:01, 118.43it/s]

Step 8,700, (N samples: 1,113,600), Loss: 543.4363, (Recon: 543.4363, KL: 0.0000), Gradient norm: 0.8683


 81%|████████  | 379/469 [00:03<00:00, 118.46it/s]

Step 8,800, (N samples: 1,126,400), Loss: 543.4340, (Recon: 543.4340, KL: 0.0000), Gradient norm: 0.8053


100%|██████████| 469/469 [00:04<00:00, 116.70it/s]


Step 8,900, (N samples: 1,139,200), Loss: 543.4443, (Recon: 543.4443, KL: 0.0000), Gradient norm: 0.8909


Testing: 100%|██████████| 79/79 [00:00<00:00, 159.14it/s]


====> Test set loss: 543.4354, (BCE: 543.4354, KLD: 0.0000)
Epoch 20/50


 22%|██▏       | 103/469 [00:00<00:03, 117.26it/s]

Step 9,000, (N samples: 1,152,000), Loss: 543.4402, (Recon: 543.4402, KL: 0.0000), Gradient norm: 0.7998


 45%|████▍     | 211/469 [00:01<00:02, 118.46it/s]

Step 9,100, (N samples: 1,164,800), Loss: 543.4340, (Recon: 543.4340, KL: 0.0000), Gradient norm: 0.8462


 65%|██████▌   | 307/469 [00:02<00:01, 118.43it/s]

Step 9,200, (N samples: 1,177,600), Loss: 543.4327, (Recon: 543.4327, KL: 0.0000), Gradient norm: 0.8589


 86%|████████▌ | 403/469 [00:03<00:00, 118.62it/s]

Step 9,300, (N samples: 1,190,400), Loss: 543.4387, (Recon: 543.4387, KL: 0.0000), Gradient norm: 0.8810


100%|██████████| 469/469 [00:04<00:00, 116.85it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.93it/s]


====> Test set loss: 543.4365, (BCE: 543.4365, KLD: 0.0000)
Epoch 21/50


  9%|▉         | 43/469 [00:00<00:03, 108.46it/s]

Step 9,400, (N samples: 1,203,200), Loss: 543.4474, (Recon: 543.4474, KL: 0.0000), Gradient norm: 0.8570


 30%|██▉       | 139/469 [00:01<00:02, 118.22it/s]

Step 9,500, (N samples: 1,216,000), Loss: 543.4393, (Recon: 543.4393, KL: 0.0000), Gradient norm: 0.8939


 50%|█████     | 235/469 [00:02<00:01, 118.38it/s]

Step 9,600, (N samples: 1,228,800), Loss: 543.4313, (Recon: 543.4313, KL: 0.0000), Gradient norm: 0.8975


 73%|███████▎  | 343/469 [00:02<00:01, 118.48it/s]

Step 9,700, (N samples: 1,241,600), Loss: 543.4329, (Recon: 543.4329, KL: 0.0000), Gradient norm: 0.8574


 94%|█████████▎| 439/469 [00:03<00:00, 118.64it/s]

Step 9,800, (N samples: 1,254,400), Loss: 543.4366, (Recon: 543.4366, KL: 0.0000), Gradient norm: 0.8874


100%|██████████| 469/469 [00:04<00:00, 116.88it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.52it/s]


====> Test set loss: 543.4357, (BCE: 543.4357, KLD: 0.0000)
Epoch 22/50


 14%|█▍        | 67/469 [00:00<00:03, 114.10it/s]

Step 9,900, (N samples: 1,267,200), Loss: 543.4341, (Recon: 543.4341, KL: 0.0000), Gradient norm: 0.8780


 37%|███▋      | 175/469 [00:01<00:02, 118.37it/s]

Step 10,000, (N samples: 1,280,000), Loss: 543.4362, (Recon: 543.4362, KL: 0.0000), Gradient norm: 0.8810


 58%|█████▊    | 271/469 [00:02<00:01, 118.58it/s]

Step 10,100, (N samples: 1,292,800), Loss: 543.4390, (Recon: 543.4390, KL: 0.0000), Gradient norm: 0.8286


 78%|███████▊  | 367/469 [00:03<00:00, 118.50it/s]

Step 10,200, (N samples: 1,305,600), Loss: 543.4402, (Recon: 543.4402, KL: 0.0000), Gradient norm: 0.8897


100%|██████████| 469/469 [00:04<00:00, 116.77it/s]


Step 10,300, (N samples: 1,318,400), Loss: 543.4280, (Recon: 543.4280, KL: 0.0000), Gradient norm: 0.8536


Testing: 100%|██████████| 79/79 [00:00<00:00, 158.86it/s]


====> Test set loss: 543.4364, (BCE: 543.4364, KLD: 0.0000)
Epoch 23/50


 22%|██▏       | 103/469 [00:00<00:03, 116.70it/s]

Step 10,400, (N samples: 1,331,200), Loss: 543.4327, (Recon: 543.4327, KL: 0.0000), Gradient norm: 0.8860


 42%|████▏     | 199/469 [00:01<00:02, 118.50it/s]

Step 10,500, (N samples: 1,344,000), Loss: 543.4246, (Recon: 543.4246, KL: 0.0000), Gradient norm: 0.8575


 63%|██████▎   | 295/469 [00:02<00:01, 118.69it/s]

Step 10,600, (N samples: 1,356,800), Loss: 543.4368, (Recon: 543.4368, KL: 0.0000), Gradient norm: 0.8605


 86%|████████▌ | 403/469 [00:03<00:00, 118.46it/s]

Step 10,700, (N samples: 1,369,600), Loss: 543.4252, (Recon: 543.4252, KL: 0.0000), Gradient norm: 0.8464


100%|██████████| 469/469 [00:04<00:00, 116.72it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 159.02it/s]


====> Test set loss: 543.4362, (BCE: 543.4362, KLD: 0.0000)
Epoch 24/50


  2%|▏         | 9/469 [00:00<00:05, 84.22it/s]

Step 10,800, (N samples: 1,382,400), Loss: 543.4493, (Recon: 543.4493, KL: 0.0000), Gradient norm: 0.9000


 27%|██▋       | 126/469 [00:01<00:02, 118.09it/s]

Step 10,900, (N samples: 1,395,200), Loss: 543.4293, (Recon: 543.4293, KL: 0.0000), Gradient norm: 0.8655


 50%|████▉     | 234/469 [00:02<00:01, 118.60it/s]

Step 11,000, (N samples: 1,408,000), Loss: 543.4413, (Recon: 543.4413, KL: 0.0000), Gradient norm: 0.8931


 70%|███████   | 330/469 [00:02<00:01, 118.66it/s]

Step 11,100, (N samples: 1,420,800), Loss: 543.4366, (Recon: 543.4366, KL: 0.0000), Gradient norm: 0.8964


 91%|█████████ | 426/469 [00:03<00:00, 118.23it/s]

Step 11,200, (N samples: 1,433,600), Loss: 543.4304, (Recon: 543.4304, KL: 0.0000), Gradient norm: 0.8916


100%|██████████| 469/469 [00:04<00:00, 116.89it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.12it/s]


====> Test set loss: 543.4375, (BCE: 543.4375, KLD: 0.0000)
Epoch 25/50


 14%|█▍        | 67/469 [00:00<00:03, 113.25it/s]

Step 11,300, (N samples: 1,446,400), Loss: 543.4337, (Recon: 543.4337, KL: 0.0000), Gradient norm: 0.8584


 35%|███▍      | 163/469 [00:01<00:02, 118.18it/s]

Step 11,400, (N samples: 1,459,200), Loss: 543.4434, (Recon: 543.4434, KL: 0.0000), Gradient norm: 0.9268


 55%|█████▌    | 259/469 [00:02<00:01, 118.27it/s]

Step 11,500, (N samples: 1,472,000), Loss: 543.4371, (Recon: 543.4371, KL: 0.0000), Gradient norm: 0.8725


 78%|███████▊  | 367/469 [00:03<00:00, 118.62it/s]

Step 11,600, (N samples: 1,484,800), Loss: 543.4283, (Recon: 543.4283, KL: 0.0000), Gradient norm: 0.8409


 99%|█████████▊| 463/469 [00:03<00:00, 118.53it/s]

Step 11,700, (N samples: 1,497,600), Loss: 543.4456, (Recon: 543.4456, KL: 0.0000), Gradient norm: 0.8868


100%|██████████| 469/469 [00:04<00:00, 116.60it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.80it/s]


====> Test set loss: 543.4357, (BCE: 543.4357, KLD: 0.0000)
Epoch 26/50


 19%|█▉        | 91/469 [00:00<00:03, 116.33it/s]

Step 11,800, (N samples: 1,510,400), Loss: 543.4316, (Recon: 543.4316, KL: 0.0000), Gradient norm: 0.8737


 42%|████▏     | 199/469 [00:01<00:02, 118.59it/s]

Step 11,900, (N samples: 1,523,200), Loss: 543.4292, (Recon: 543.4292, KL: 0.0000), Gradient norm: 0.8795


 63%|██████▎   | 295/469 [00:02<00:01, 118.38it/s]

Step 12,000, (N samples: 1,536,000), Loss: 543.4466, (Recon: 543.4466, KL: 0.0000), Gradient norm: 0.9019


 83%|████████▎ | 391/469 [00:03<00:00, 118.62it/s]

Step 12,100, (N samples: 1,548,800), Loss: 543.4333, (Recon: 543.4333, KL: 0.0000), Gradient norm: 0.8568


100%|██████████| 469/469 [00:04<00:00, 116.87it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.47it/s]


====> Test set loss: 543.4355, (BCE: 543.4355, KLD: 0.0000)
Epoch 27/50


  2%|▏         | 9/469 [00:00<00:05, 82.39it/s]

Step 12,200, (N samples: 1,561,600), Loss: 543.4452, (Recon: 543.4452, KL: 0.0000), Gradient norm: 0.8973


 27%|██▋       | 127/469 [00:01<00:02, 117.71it/s]

Step 12,300, (N samples: 1,574,400), Loss: 543.4387, (Recon: 543.4387, KL: 0.0000), Gradient norm: 0.9123


 48%|████▊     | 223/469 [00:01<00:02, 118.57it/s]

Step 12,400, (N samples: 1,587,200), Loss: 543.4410, (Recon: 543.4410, KL: 0.0000), Gradient norm: 0.9245


 68%|██████▊   | 319/469 [00:02<00:01, 118.57it/s]

Step 12,500, (N samples: 1,600,000), Loss: 543.4349, (Recon: 543.4349, KL: 0.0000), Gradient norm: 0.9137


 91%|█████████ | 427/469 [00:03<00:00, 118.12it/s]

Step 12,600, (N samples: 1,612,800), Loss: 543.4396, (Recon: 543.4396, KL: 0.0000), Gradient norm: 0.8790


100%|██████████| 469/469 [00:04<00:00, 116.66it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 159.06it/s]


====> Test set loss: 543.4371, (BCE: 543.4371, KLD: 0.0000)
Epoch 28/50


 12%|█▏        | 55/469 [00:00<00:03, 112.15it/s]

Step 12,700, (N samples: 1,625,600), Loss: 543.4403, (Recon: 543.4403, KL: 0.0000), Gradient norm: 0.9600


 32%|███▏      | 151/469 [00:01<00:02, 118.26it/s]

Step 12,800, (N samples: 1,638,400), Loss: 543.4468, (Recon: 543.4468, KL: 0.0000), Gradient norm: 0.9198


 55%|█████▌    | 259/469 [00:02<00:01, 118.49it/s]

Step 12,900, (N samples: 1,651,200), Loss: 543.4374, (Recon: 543.4374, KL: 0.0000), Gradient norm: 0.9416


 76%|███████▌  | 355/469 [00:03<00:00, 118.68it/s]

Step 13,000, (N samples: 1,664,000), Loss: 543.4232, (Recon: 543.4232, KL: 0.0000), Gradient norm: 0.9235


 96%|█████████▌| 451/469 [00:03<00:00, 118.42it/s]

Step 13,100, (N samples: 1,676,800), Loss: 543.4366, (Recon: 543.4366, KL: 0.0000), Gradient norm: 0.8365


100%|██████████| 469/469 [00:04<00:00, 116.80it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.81it/s]


====> Test set loss: 543.4371, (BCE: 543.4371, KLD: 0.0000)
Epoch 29/50


 19%|█▉        | 91/469 [00:00<00:03, 116.60it/s]

Step 13,200, (N samples: 1,689,600), Loss: 543.4431, (Recon: 543.4431, KL: 0.0000), Gradient norm: 0.8748


 40%|███▉      | 187/469 [00:01<00:02, 118.13it/s]

Step 13,300, (N samples: 1,702,400), Loss: 543.4451, (Recon: 543.4451, KL: 0.0000), Gradient norm: 0.8941


 60%|██████    | 283/469 [00:02<00:01, 118.38it/s]

Step 13,400, (N samples: 1,715,200), Loss: 543.4330, (Recon: 543.4330, KL: 0.0000), Gradient norm: 0.8429


 83%|████████▎ | 391/469 [00:03<00:00, 118.61it/s]

Step 13,500, (N samples: 1,728,000), Loss: 543.4341, (Recon: 543.4341, KL: 0.0000), Gradient norm: 0.9109


100%|██████████| 469/469 [00:04<00:00, 116.75it/s]


Step 13,600, (N samples: 1,740,800), Loss: 543.4441, (Recon: 543.4441, KL: 0.0000), Gradient norm: 1.0443


Testing: 100%|██████████| 79/79 [00:00<00:00, 159.25it/s]


====> Test set loss: 543.4374, (BCE: 543.4374, KLD: 0.0000)
Epoch 30/50


 25%|██▍       | 115/469 [00:01<00:03, 117.68it/s]

Step 13,700, (N samples: 1,753,600), Loss: 543.4158, (Recon: 543.4158, KL: 0.0000), Gradient norm: 0.8651


 48%|████▊     | 223/469 [00:01<00:02, 118.16it/s]

Step 13,800, (N samples: 1,766,400), Loss: 543.4442, (Recon: 543.4442, KL: 0.0000), Gradient norm: 0.9239


 68%|██████▊   | 319/469 [00:02<00:01, 118.39it/s]

Step 13,900, (N samples: 1,779,200), Loss: 543.4465, (Recon: 543.4465, KL: 0.0000), Gradient norm: 0.9564


 88%|████████▊ | 415/469 [00:03<00:00, 118.51it/s]

Step 14,000, (N samples: 1,792,000), Loss: 543.4471, (Recon: 543.4471, KL: 0.0000), Gradient norm: 0.9225


100%|██████████| 469/469 [00:04<00:00, 116.83it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.82it/s]


====> Test set loss: 543.4378, (BCE: 543.4378, KLD: 0.0000)
Epoch 31/50


  9%|▉         | 43/469 [00:00<00:03, 108.20it/s]

Step 14,100, (N samples: 1,804,800), Loss: 543.4316, (Recon: 543.4316, KL: 0.0000), Gradient norm: 0.9923


 32%|███▏      | 151/469 [00:01<00:02, 117.64it/s]

Step 14,200, (N samples: 1,817,600), Loss: 543.4361, (Recon: 543.4361, KL: 0.0000), Gradient norm: 0.8902


 53%|█████▎    | 247/469 [00:02<00:01, 117.95it/s]

Step 14,300, (N samples: 1,830,400), Loss: 543.4358, (Recon: 543.4358, KL: 0.0000), Gradient norm: 0.9068


 73%|███████▎  | 343/469 [00:02<00:01, 117.86it/s]

Step 14,400, (N samples: 1,843,200), Loss: 543.4396, (Recon: 543.4396, KL: 0.0000), Gradient norm: 0.9011


 96%|█████████▌| 451/469 [00:03<00:00, 118.46it/s]

Step 14,500, (N samples: 1,856,000), Loss: 543.4398, (Recon: 543.4398, KL: 0.0000), Gradient norm: 0.9157


100%|██████████| 469/469 [00:04<00:00, 116.12it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.75it/s]


====> Test set loss: 543.4355, (BCE: 543.4355, KLD: 0.0000)
Epoch 32/50


 17%|█▋        | 79/469 [00:00<00:03, 115.75it/s]

Step 14,600, (N samples: 1,868,800), Loss: 543.4467, (Recon: 543.4467, KL: 0.0000), Gradient norm: 0.9304


 37%|███▋      | 175/469 [00:01<00:02, 118.44it/s]

Step 14,700, (N samples: 1,881,600), Loss: 543.4332, (Recon: 543.4332, KL: 0.0000), Gradient norm: 0.9011


 60%|██████    | 283/469 [00:02<00:01, 118.43it/s]

Step 14,800, (N samples: 1,894,400), Loss: 543.4373, (Recon: 543.4373, KL: 0.0000), Gradient norm: 0.9293


 81%|████████  | 379/469 [00:03<00:00, 118.40it/s]

Step 14,900, (N samples: 1,907,200), Loss: 543.4269, (Recon: 543.4269, KL: 0.0000), Gradient norm: 0.9079


100%|██████████| 469/469 [00:04<00:00, 116.80it/s]


Step 15,000, (N samples: 1,920,000), Loss: 543.4407, (Recon: 543.4407, KL: 0.0000), Gradient norm: 0.9163


Testing: 100%|██████████| 79/79 [00:00<00:00, 158.23it/s]


====> Test set loss: 543.4374, (BCE: 543.4374, KLD: 0.0000)
Epoch 33/50


 25%|██▍       | 115/469 [00:01<00:03, 117.66it/s]

Step 15,100, (N samples: 1,932,800), Loss: 543.4470, (Recon: 543.4470, KL: 0.0000), Gradient norm: 0.9021


 45%|████▍     | 211/469 [00:01<00:02, 118.40it/s]

Step 15,200, (N samples: 1,945,600), Loss: 543.4335, (Recon: 543.4335, KL: 0.0000), Gradient norm: 0.9056


 65%|██████▌   | 307/469 [00:02<00:01, 118.52it/s]

Step 15,300, (N samples: 1,958,400), Loss: 543.4418, (Recon: 543.4418, KL: 0.0000), Gradient norm: 0.9270


 88%|████████▊ | 415/469 [00:03<00:00, 118.11it/s]

Step 15,400, (N samples: 1,971,200), Loss: 543.4424, (Recon: 543.4424, KL: 0.0000), Gradient norm: 0.9284


100%|██████████| 469/469 [00:04<00:00, 116.77it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.58it/s]


====> Test set loss: 543.4358, (BCE: 543.4358, KLD: 0.0000)
Epoch 34/50


  9%|▉         | 43/469 [00:00<00:03, 108.44it/s]

Step 15,500, (N samples: 1,984,000), Loss: 543.4299, (Recon: 543.4299, KL: 0.0000), Gradient norm: 0.9061


 30%|██▉       | 139/469 [00:01<00:02, 118.13it/s]

Step 15,600, (N samples: 1,996,800), Loss: 543.4385, (Recon: 543.4385, KL: 0.0000), Gradient norm: 0.9164


 53%|█████▎    | 247/469 [00:02<00:01, 118.65it/s]

Step 15,700, (N samples: 2,009,600), Loss: 543.4344, (Recon: 543.4344, KL: 0.0000), Gradient norm: 0.9166


 73%|███████▎  | 343/469 [00:02<00:01, 118.63it/s]

Step 15,800, (N samples: 2,022,400), Loss: 543.4386, (Recon: 543.4386, KL: 0.0000), Gradient norm: 0.9592


 94%|█████████▎| 439/469 [00:03<00:00, 118.65it/s]

Step 15,900, (N samples: 2,035,200), Loss: 543.4293, (Recon: 543.4293, KL: 0.0000), Gradient norm: 0.9382


100%|██████████| 469/469 [00:04<00:00, 116.86it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.56it/s]


====> Test set loss: 543.4372, (BCE: 543.4372, KLD: 0.0000)
Epoch 35/50


 14%|█▍        | 67/469 [00:00<00:03, 113.51it/s]

Step 16,000, (N samples: 2,048,000), Loss: 543.4427, (Recon: 543.4427, KL: 0.0000), Gradient norm: 0.9341


 37%|███▋      | 175/469 [00:01<00:02, 118.35it/s]

Step 16,100, (N samples: 2,060,800), Loss: 543.4340, (Recon: 543.4340, KL: 0.0000), Gradient norm: 0.9146


 58%|█████▊    | 271/469 [00:02<00:01, 118.68it/s]

Step 16,200, (N samples: 2,073,600), Loss: 543.4392, (Recon: 543.4392, KL: 0.0000), Gradient norm: 0.9207


 78%|███████▊  | 367/469 [00:03<00:00, 118.39it/s]

Step 16,300, (N samples: 2,086,400), Loss: 543.4331, (Recon: 543.4331, KL: 0.0000), Gradient norm: 0.9238


100%|██████████| 469/469 [00:04<00:00, 116.74it/s]


Step 16,400, (N samples: 2,099,200), Loss: 543.4339, (Recon: 543.4339, KL: 0.0000), Gradient norm: 0.9126


Testing: 100%|██████████| 79/79 [00:00<00:00, 158.88it/s]


====> Test set loss: 543.4360, (BCE: 543.4360, KLD: 0.0000)
Epoch 36/50


 22%|██▏       | 103/469 [00:00<00:03, 117.25it/s]

Step 16,500, (N samples: 2,112,000), Loss: 543.4398, (Recon: 543.4398, KL: 0.0000), Gradient norm: 0.9300


 42%|████▏     | 199/469 [00:01<00:02, 118.56it/s]

Step 16,600, (N samples: 2,124,800), Loss: 543.4384, (Recon: 543.4384, KL: 0.0000), Gradient norm: 0.9990


 65%|██████▌   | 307/469 [00:02<00:01, 118.67it/s]

Step 16,700, (N samples: 2,137,600), Loss: 543.4348, (Recon: 543.4348, KL: 0.0000), Gradient norm: 0.8937


 86%|████████▌ | 403/469 [00:03<00:00, 118.55it/s]

Step 16,800, (N samples: 2,150,400), Loss: 543.4338, (Recon: 543.4338, KL: 0.0000), Gradient norm: 0.8857


100%|██████████| 469/469 [00:04<00:00, 116.88it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.84it/s]


====> Test set loss: 543.4344, (BCE: 543.4344, KLD: 0.0000)
Epoch 37/50


  6%|▋         | 30/469 [00:00<00:04, 101.69it/s]

Step 16,900, (N samples: 2,163,200), Loss: 543.4386, (Recon: 543.4386, KL: 0.0000), Gradient norm: 0.9342


 29%|██▉       | 138/469 [00:01<00:02, 118.10it/s]

Step 17,000, (N samples: 2,176,000), Loss: 543.4395, (Recon: 543.4395, KL: 0.0000), Gradient norm: 0.9449


 50%|████▉     | 234/469 [00:02<00:01, 118.56it/s]

Step 17,100, (N samples: 2,188,800), Loss: 543.4455, (Recon: 543.4455, KL: 0.0000), Gradient norm: 0.9383


 70%|███████   | 330/469 [00:02<00:01, 118.62it/s]

Step 17,200, (N samples: 2,201,600), Loss: 543.4290, (Recon: 543.4290, KL: 0.0000), Gradient norm: 0.8999


 93%|█████████▎| 438/469 [00:03<00:00, 118.67it/s]

Step 17,300, (N samples: 2,214,400), Loss: 543.4242, (Recon: 543.4242, KL: 0.0000), Gradient norm: 0.9073


100%|██████████| 469/469 [00:04<00:00, 116.86it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 159.04it/s]


====> Test set loss: 543.4366, (BCE: 543.4366, KLD: 0.0000)
Epoch 38/50


 14%|█▍        | 67/469 [00:00<00:03, 114.29it/s]

Step 17,400, (N samples: 2,227,200), Loss: 543.4362, (Recon: 543.4362, KL: 0.0000), Gradient norm: 0.9328


 35%|███▍      | 163/469 [00:01<00:02, 118.19it/s]

Step 17,500, (N samples: 2,240,000), Loss: 543.4456, (Recon: 543.4456, KL: 0.0000), Gradient norm: 1.0206


 58%|█████▊    | 271/469 [00:02<00:01, 118.24it/s]

Step 17,600, (N samples: 2,252,800), Loss: 543.4318, (Recon: 543.4318, KL: 0.0000), Gradient norm: 0.9246


 78%|███████▊  | 367/469 [00:03<00:00, 118.57it/s]

Step 17,700, (N samples: 2,265,600), Loss: 543.4349, (Recon: 543.4349, KL: 0.0000), Gradient norm: 0.9621


100%|██████████| 469/469 [00:04<00:00, 116.80it/s]


Step 17,800, (N samples: 2,278,400), Loss: 543.4451, (Recon: 543.4451, KL: 0.0000), Gradient norm: 0.9491


Testing: 100%|██████████| 79/79 [00:00<00:00, 158.96it/s]


====> Test set loss: 543.4364, (BCE: 543.4364, KLD: 0.0000)
Epoch 39/50


 19%|█▉        | 91/469 [00:00<00:03, 116.50it/s]

Step 17,900, (N samples: 2,291,200), Loss: 543.4414, (Recon: 543.4414, KL: 0.0000), Gradient norm: 0.9839


 42%|████▏     | 199/469 [00:01<00:02, 118.50it/s]

Step 18,000, (N samples: 2,304,000), Loss: 543.4373, (Recon: 543.4373, KL: 0.0000), Gradient norm: 0.9083


 63%|██████▎   | 295/469 [00:02<00:01, 118.53it/s]

Step 18,100, (N samples: 2,316,800), Loss: 543.4342, (Recon: 543.4342, KL: 0.0000), Gradient norm: 0.9225


 83%|████████▎ | 391/469 [00:03<00:00, 118.54it/s]

Step 18,200, (N samples: 2,329,600), Loss: 543.4352, (Recon: 543.4352, KL: 0.0000), Gradient norm: 0.9249


100%|██████████| 469/469 [00:04<00:00, 116.79it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.74it/s]


====> Test set loss: 543.4383, (BCE: 543.4383, KLD: 0.0000)
Epoch 40/50


  2%|▏         | 9/469 [00:00<00:05, 84.30it/s]

Step 18,300, (N samples: 2,342,400), Loss: 543.4375, (Recon: 543.4375, KL: 0.0000), Gradient norm: 0.9214


 27%|██▋       | 126/469 [00:01<00:02, 117.74it/s]

Step 18,400, (N samples: 2,355,200), Loss: 543.4341, (Recon: 543.4341, KL: 0.0000), Gradient norm: 0.9790


 47%|████▋     | 222/469 [00:01<00:02, 118.55it/s]

Step 18,500, (N samples: 2,368,000), Loss: 543.4304, (Recon: 543.4304, KL: 0.0000), Gradient norm: 0.9547


 70%|███████   | 330/469 [00:02<00:01, 118.53it/s]

Step 18,600, (N samples: 2,380,800), Loss: 543.4290, (Recon: 543.4290, KL: 0.0000), Gradient norm: 0.9280


 91%|█████████ | 426/469 [00:03<00:00, 118.49it/s]

Step 18,700, (N samples: 2,393,600), Loss: 543.4371, (Recon: 543.4371, KL: 0.0000), Gradient norm: 0.9275


100%|██████████| 469/469 [00:04<00:00, 116.81it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.28it/s]


====> Test set loss: 543.4371, (BCE: 543.4371, KLD: 0.0000)
Epoch 41/50


 12%|█▏        | 55/469 [00:00<00:03, 111.53it/s]

Step 18,800, (N samples: 2,406,400), Loss: 543.4286, (Recon: 543.4286, KL: 0.0000), Gradient norm: 0.8478


 35%|███▍      | 163/469 [00:01<00:02, 117.93it/s]

Step 18,900, (N samples: 2,419,200), Loss: 543.4440, (Recon: 543.4440, KL: 0.0000), Gradient norm: 0.9158


 55%|█████▌    | 259/469 [00:02<00:01, 118.49it/s]

Step 19,000, (N samples: 2,432,000), Loss: 543.4272, (Recon: 543.4272, KL: 0.0000), Gradient norm: 0.9333


 76%|███████▌  | 355/469 [00:03<00:00, 118.30it/s]

Step 19,100, (N samples: 2,444,800), Loss: 543.4326, (Recon: 543.4326, KL: 0.0000), Gradient norm: 0.9690


 99%|█████████▊| 463/469 [00:03<00:00, 118.23it/s]

Step 19,200, (N samples: 2,457,600), Loss: 543.4451, (Recon: 543.4451, KL: 0.0000), Gradient norm: 0.9318


100%|██████████| 469/469 [00:04<00:00, 116.58it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.77it/s]


====> Test set loss: 543.4374, (BCE: 543.4374, KLD: 0.0000)
Epoch 42/50


 19%|█▉        | 91/469 [00:00<00:03, 116.71it/s]

Step 19,300, (N samples: 2,470,400), Loss: 543.4313, (Recon: 543.4313, KL: 0.0000), Gradient norm: 0.9468


 40%|███▉      | 187/469 [00:01<00:02, 118.56it/s]

Step 19,400, (N samples: 2,483,200), Loss: 543.4437, (Recon: 543.4437, KL: 0.0000), Gradient norm: 0.9435


 63%|██████▎   | 295/469 [00:02<00:01, 118.42it/s]

Step 19,500, (N samples: 2,496,000), Loss: 543.4357, (Recon: 543.4357, KL: 0.0000), Gradient norm: 0.9063


 83%|████████▎ | 391/469 [00:03<00:00, 118.49it/s]

Step 19,600, (N samples: 2,508,800), Loss: 543.4341, (Recon: 543.4341, KL: 0.0000), Gradient norm: 0.9620


100%|██████████| 469/469 [00:04<00:00, 116.84it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.24it/s]


====> Test set loss: 543.4367, (BCE: 543.4367, KLD: 0.0000)
Epoch 43/50


  2%|▏         | 9/469 [00:00<00:05, 82.26it/s]

Step 19,700, (N samples: 2,521,600), Loss: 543.4375, (Recon: 543.4375, KL: 0.0000), Gradient norm: 0.9756


 25%|██▍       | 115/469 [00:01<00:03, 117.68it/s]

Step 19,800, (N samples: 2,534,400), Loss: 543.4350, (Recon: 543.4350, KL: 0.0000), Gradient norm: 0.8800


 48%|████▊     | 223/469 [00:01<00:02, 118.54it/s]

Step 19,900, (N samples: 2,547,200), Loss: 543.4373, (Recon: 543.4373, KL: 0.0000), Gradient norm: 1.0425


 68%|██████▊   | 319/469 [00:02<00:01, 118.36it/s]

Step 20,000, (N samples: 2,560,000), Loss: 543.4449, (Recon: 543.4449, KL: 0.0000), Gradient norm: 0.9811


 88%|████████▊ | 415/469 [00:03<00:00, 118.63it/s]

Step 20,100, (N samples: 2,572,800), Loss: 543.4373, (Recon: 543.4373, KL: 0.0000), Gradient norm: 0.9910


100%|██████████| 469/469 [00:04<00:00, 116.86it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.69it/s]


====> Test set loss: 543.4353, (BCE: 543.4353, KLD: 0.0000)
Epoch 44/50


 12%|█▏        | 55/469 [00:00<00:03, 111.46it/s]

Step 20,200, (N samples: 2,585,600), Loss: 543.4385, (Recon: 543.4385, KL: 0.0000), Gradient norm: 0.8840


 32%|███▏      | 151/469 [00:01<00:02, 118.15it/s]

Step 20,300, (N samples: 2,598,400), Loss: 543.4458, (Recon: 543.4458, KL: 0.0000), Gradient norm: 0.9539


 53%|█████▎    | 247/469 [00:02<00:01, 118.57it/s]

Step 20,400, (N samples: 2,611,200), Loss: 543.4393, (Recon: 543.4393, KL: 0.0000), Gradient norm: 0.9128


 76%|███████▌  | 355/469 [00:03<00:00, 118.53it/s]

Step 20,500, (N samples: 2,624,000), Loss: 543.4238, (Recon: 543.4238, KL: 0.0000), Gradient norm: 0.9297


 96%|█████████▌| 451/469 [00:03<00:00, 118.46it/s]

Step 20,600, (N samples: 2,636,800), Loss: 543.4354, (Recon: 543.4354, KL: 0.0000), Gradient norm: 0.9283


100%|██████████| 469/469 [00:04<00:00, 116.68it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 159.48it/s]


====> Test set loss: 543.4361, (BCE: 543.4361, KLD: 0.0000)
Epoch 45/50


 17%|█▋        | 79/469 [00:00<00:03, 115.71it/s]

Step 20,700, (N samples: 2,649,600), Loss: 543.4438, (Recon: 543.4438, KL: 0.0000), Gradient norm: 0.9921


 40%|███▉      | 187/469 [00:01<00:02, 118.48it/s]

Step 20,800, (N samples: 2,662,400), Loss: 543.4335, (Recon: 543.4335, KL: 0.0000), Gradient norm: 0.9285


 60%|██████    | 283/469 [00:02<00:01, 118.65it/s]

Step 20,900, (N samples: 2,675,200), Loss: 543.4390, (Recon: 543.4390, KL: 0.0000), Gradient norm: 0.9288


 81%|████████  | 379/469 [00:03<00:00, 118.70it/s]

Step 21,000, (N samples: 2,688,000), Loss: 543.4479, (Recon: 543.4479, KL: 0.0000), Gradient norm: 1.0118


100%|██████████| 469/469 [00:04<00:00, 116.80it/s]


Step 21,100, (N samples: 2,700,800), Loss: 543.4404, (Recon: 543.4404, KL: 0.0000), Gradient norm: 0.9870


Testing: 100%|██████████| 79/79 [00:00<00:00, 158.55it/s]


====> Test set loss: 543.4376, (BCE: 543.4376, KLD: 0.0000)
Epoch 46/50


 25%|██▍       | 115/469 [00:01<00:03, 117.54it/s]

Step 21,200, (N samples: 2,713,600), Loss: 543.4419, (Recon: 543.4419, KL: 0.0000), Gradient norm: 0.9424


 45%|████▍     | 211/469 [00:01<00:02, 118.53it/s]

Step 21,300, (N samples: 2,726,400), Loss: 543.4406, (Recon: 543.4406, KL: 0.0000), Gradient norm: 0.9029


 68%|██████▊   | 319/469 [00:02<00:01, 118.58it/s]

Step 21,400, (N samples: 2,739,200), Loss: 543.4361, (Recon: 543.4361, KL: 0.0000), Gradient norm: 0.9735


 88%|████████▊ | 415/469 [00:03<00:00, 118.57it/s]

Step 21,500, (N samples: 2,752,000), Loss: 543.4400, (Recon: 543.4400, KL: 0.0000), Gradient norm: 0.9197


100%|██████████| 469/469 [00:04<00:00, 116.83it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.13it/s]


====> Test set loss: 543.4364, (BCE: 543.4364, KLD: 0.0000)
Epoch 47/50


  9%|▉         | 43/469 [00:00<00:03, 108.28it/s]

Step 21,600, (N samples: 2,764,800), Loss: 543.4470, (Recon: 543.4470, KL: 0.0000), Gradient norm: 0.9083


 30%|██▉       | 139/469 [00:01<00:02, 117.71it/s]

Step 21,700, (N samples: 2,777,600), Loss: 543.4443, (Recon: 543.4443, KL: 0.0000), Gradient norm: 0.9346


 53%|█████▎    | 247/469 [00:02<00:01, 118.58it/s]

Step 21,800, (N samples: 2,790,400), Loss: 543.4421, (Recon: 543.4421, KL: 0.0000), Gradient norm: 0.8975


 73%|███████▎  | 343/469 [00:02<00:01, 118.62it/s]

Step 21,900, (N samples: 2,803,200), Loss: 543.4315, (Recon: 543.4314, KL: 0.0000), Gradient norm: 0.9983


 94%|█████████▎| 439/469 [00:03<00:00, 118.64it/s]

Step 22,000, (N samples: 2,816,000), Loss: 543.4199, (Recon: 543.4199, KL: 0.0000), Gradient norm: 0.9832


100%|██████████| 469/469 [00:04<00:00, 116.82it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.45it/s]


====> Test set loss: 543.4362, (BCE: 543.4362, KLD: 0.0000)
Epoch 48/50


 17%|█▋        | 79/469 [00:00<00:03, 115.79it/s]

Step 22,100, (N samples: 2,828,800), Loss: 543.4442, (Recon: 543.4442, KL: 0.0000), Gradient norm: 0.8779


 37%|███▋      | 175/469 [00:01<00:02, 118.47it/s]

Step 22,200, (N samples: 2,841,600), Loss: 543.4415, (Recon: 543.4415, KL: 0.0000), Gradient norm: 0.9602


 58%|█████▊    | 271/469 [00:02<00:01, 118.60it/s]

Step 22,300, (N samples: 2,854,400), Loss: 543.4352, (Recon: 543.4352, KL: 0.0000), Gradient norm: 0.9921


 81%|████████  | 379/469 [00:03<00:00, 118.73it/s]

Step 22,400, (N samples: 2,867,200), Loss: 543.4449, (Recon: 543.4449, KL: 0.0000), Gradient norm: 0.9016


100%|██████████| 469/469 [00:04<00:00, 116.88it/s]


Step 22,500, (N samples: 2,880,000), Loss: 543.4452, (Recon: 543.4452, KL: 0.0000), Gradient norm: 0.8984


Testing: 100%|██████████| 79/79 [00:00<00:00, 159.75it/s]


====> Test set loss: 543.4361, (BCE: 543.4361, KLD: 0.0000)
Epoch 49/50


 22%|██▏       | 103/469 [00:00<00:03, 117.18it/s]

Step 22,600, (N samples: 2,892,800), Loss: 543.4399, (Recon: 543.4399, KL: 0.0000), Gradient norm: 0.9097


 45%|████▍     | 211/469 [00:01<00:02, 118.45it/s]

Step 22,700, (N samples: 2,905,600), Loss: 543.4352, (Recon: 543.4352, KL: 0.0000), Gradient norm: 0.8840


 65%|██████▌   | 307/469 [00:02<00:01, 118.46it/s]

Step 22,800, (N samples: 2,918,400), Loss: 543.4294, (Recon: 543.4294, KL: 0.0000), Gradient norm: 0.9386


 86%|████████▌ | 403/469 [00:03<00:00, 118.61it/s]

Step 22,900, (N samples: 2,931,200), Loss: 543.4454, (Recon: 543.4454, KL: 0.0000), Gradient norm: 0.9530


100%|██████████| 469/469 [00:04<00:00, 116.84it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.95it/s]


====> Test set loss: 543.4388, (BCE: 543.4388, KLD: 0.0000)
Epoch 50/50


  9%|▉         | 43/469 [00:00<00:03, 108.02it/s]

Step 23,000, (N samples: 2,944,000), Loss: 543.4315, (Recon: 543.4314, KL: 0.0001), Gradient norm: 0.9703


 30%|██▉       | 139/469 [00:01<00:02, 117.85it/s]

Step 23,100, (N samples: 2,956,800), Loss: 543.4360, (Recon: 543.4360, KL: 0.0000), Gradient norm: 0.8947


 50%|█████     | 235/469 [00:02<00:01, 118.19it/s]

Step 23,200, (N samples: 2,969,600), Loss: 543.4272, (Recon: 543.4272, KL: 0.0000), Gradient norm: 0.9126


 73%|███████▎  | 343/469 [00:02<00:01, 118.23it/s]

Step 23,300, (N samples: 2,982,400), Loss: 543.4387, (Recon: 543.4387, KL: 0.0000), Gradient norm: 0.9439


 94%|█████████▎| 439/469 [00:03<00:00, 118.51it/s]

Step 23,400, (N samples: 2,995,200), Loss: 543.4240, (Recon: 543.4240, KL: 0.0000), Gradient norm: 0.9210


100%|██████████| 469/469 [00:04<00:00, 116.65it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 158.98it/s]

====> Test set loss: 543.4358, (BCE: 543.4358, KLD: 0.0000)





In [15]:
writer_train.flush()
writer_test.flush()

In [16]:
%load_ext tensorboard

In [23]:
%tensorboard --logdir ../experiments/VAE_MNIST/20241028-004306/

Reusing TensorBoard on port 6011 (pid 1110344), started 0:00:12 ago. (Use '!kill 1110344' to kill it.)