In [1]:
%load_ext autoreload
# %reload_ext autoreload
%autoreload 2


In [2]:
from datetime import datetime

import torch
from torch.utils.tensorboard import SummaryWriter

from methylVA.mnist.model import VAE
from methylVA.mnist.training import train, test


batch_size = 128
learning_rate = 1e-3
weight_decay = 1e-2
num_epochs = 50
latent_dim = 2
hidden_dim = 512
name = 'VAE_MNIST_GAUSSIAN_NOISE'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = VAE(input_dim=784, latent_dim=latent_dim, hidden_dim=hidden_dim).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
writer_train = SummaryWriter(f'../experiments/{name}/train/{datetime.now().strftime("%Y%m%d-%H%M%S")}')
writer_test = SummaryWriter(f'../experiments/{name}/test/{datetime.now().strftime("%Y%m%d-%H%M%S")}')


In [3]:
from methylVA.mnist.dataset import get_gaussian_data_loaders
train_loader, test_loader = get_gaussian_data_loaders()

In [4]:
from methylVA.mnist.training import train, test


prev_updates = 0
for epoch in range(num_epochs):
    print(f'Epoch {epoch + 1}/{num_epochs}')
    prev_updates = train(model, train_loader, optimizer, prev_updates, writer=writer_train)
    test(model, test_loader, prev_updates, writer=writer_test)

Epoch 1/50


  3%|▎         | 14/469 [00:00<00:13, 33.24it/s]

Step 0, (N samples: 0), Loss: 543.7075, (Recon: 543.4746, KL: 0.2328), Gradient norm: 1.3653


 25%|██▌       | 118/469 [00:01<00:02, 118.44it/s]

Step 100, (N samples: 12,800), Loss: 543.4623, (Recon: 543.4617, KL: 0.0007), Gradient norm: 1.1996


 47%|████▋     | 222/469 [00:02<00:01, 123.84it/s]

Step 200, (N samples: 25,600), Loss: 543.4163, (Recon: 543.4157, KL: 0.0006), Gradient norm: 1.1119


 70%|██████▉   | 326/469 [00:03<00:01, 124.73it/s]

Step 300, (N samples: 38,400), Loss: 543.4635, (Recon: 543.4630, KL: 0.0005), Gradient norm: 1.2039


 89%|████████▉ | 417/469 [00:03<00:00, 124.74it/s]

Step 400, (N samples: 51,200), Loss: 543.4306, (Recon: 543.4302, KL: 0.0004), Gradient norm: 1.1770


100%|██████████| 469/469 [00:04<00:00, 111.72it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 166.00it/s]


====> Test set loss: 543.4437, (BCE: 543.4432, KLD: 0.0005)
Epoch 2/50


 10%|▉         | 45/469 [00:00<00:03, 113.52it/s]

Step 500, (N samples: 64,000), Loss: 543.4627, (Recon: 543.4623, KL: 0.0004), Gradient norm: 1.1297


 32%|███▏      | 149/469 [00:01<00:02, 124.41it/s]

Step 600, (N samples: 76,800), Loss: 543.4493, (Recon: 543.4492, KL: 0.0001), Gradient norm: 1.1507


 54%|█████▍    | 253/469 [00:02<00:01, 124.85it/s]

Step 700, (N samples: 89,600), Loss: 543.4301, (Recon: 543.4299, KL: 0.0002), Gradient norm: 1.1657


 76%|███████▌  | 357/469 [00:02<00:00, 123.46it/s]

Step 800, (N samples: 102,400), Loss: 543.4391, (Recon: 543.4388, KL: 0.0003), Gradient norm: 1.1251


 96%|█████████▌| 448/469 [00:03<00:00, 124.71it/s]

Step 900, (N samples: 115,200), Loss: 543.4354, (Recon: 543.4349, KL: 0.0004), Gradient norm: 1.1962


100%|██████████| 469/469 [00:03<00:00, 122.36it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 169.58it/s]


====> Test set loss: 543.4462, (BCE: 543.4458, KLD: 0.0004)
Epoch 3/50


 18%|█▊        | 83/469 [00:00<00:03, 120.33it/s]

Step 1,000, (N samples: 128,000), Loss: 543.4416, (Recon: 543.4415, KL: 0.0001), Gradient norm: 1.1965


 40%|███▉      | 187/469 [00:01<00:02, 124.62it/s]

Step 1,100, (N samples: 140,800), Loss: 543.4474, (Recon: 543.4468, KL: 0.0006), Gradient norm: 1.2097


 59%|█████▉    | 278/469 [00:02<00:01, 112.46it/s]

Step 1,200, (N samples: 153,600), Loss: 543.4426, (Recon: 543.4424, KL: 0.0001), Gradient norm: 1.1720


 79%|███████▉  | 370/469 [00:03<00:01, 90.84it/s] 

Step 1,300, (N samples: 166,400), Loss: 543.4407, (Recon: 543.4407, KL: 0.0001), Gradient norm: 1.1399


100%|██████████| 469/469 [00:04<00:00, 110.54it/s]


Step 1,400, (N samples: 179,200), Loss: 543.4351, (Recon: 543.4349, KL: 0.0002), Gradient norm: 1.1383


Testing: 100%|██████████| 79/79 [00:00<00:00, 169.44it/s]


====> Test set loss: 543.4395, (BCE: 543.4393, KLD: 0.0002)
Epoch 4/50


 23%|██▎       | 110/469 [00:00<00:02, 122.21it/s]

Step 1,500, (N samples: 192,000), Loss: 543.4489, (Recon: 543.4487, KL: 0.0001), Gradient norm: 1.1907


 46%|████▌     | 214/469 [00:01<00:02, 124.92it/s]

Step 1,600, (N samples: 204,800), Loss: 543.4455, (Recon: 543.4454, KL: 0.0001), Gradient norm: 1.0947


 68%|██████▊   | 318/469 [00:02<00:01, 124.40it/s]

Step 1,700, (N samples: 217,600), Loss: 543.4409, (Recon: 543.4404, KL: 0.0004), Gradient norm: 1.2042


 87%|████████▋ | 409/469 [00:03<00:00, 124.97it/s]

Step 1,800, (N samples: 230,400), Loss: 543.4491, (Recon: 543.4490, KL: 0.0001), Gradient norm: 1.1814


100%|██████████| 469/469 [00:03<00:00, 122.52it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 169.41it/s]


====> Test set loss: 543.4380, (BCE: 543.4378, KLD: 0.0002)
Epoch 5/50


 10%|▉         | 45/469 [00:00<00:03, 114.06it/s]

Step 1,900, (N samples: 243,200), Loss: 543.4489, (Recon: 543.4489, KL: 0.0001), Gradient norm: 1.1554


 32%|███▏      | 149/469 [00:01<00:02, 124.37it/s]

Step 2,000, (N samples: 256,000), Loss: 543.4340, (Recon: 543.4340, KL: 0.0001), Gradient norm: 1.1737


 51%|█████     | 240/469 [00:01<00:01, 124.98it/s]

Step 2,100, (N samples: 268,800), Loss: 543.4568, (Recon: 543.4567, KL: 0.0002), Gradient norm: 1.1374


 73%|███████▎  | 344/469 [00:02<00:00, 125.09it/s]

Step 2,200, (N samples: 281,600), Loss: 543.4350, (Recon: 543.4349, KL: 0.0001), Gradient norm: 1.1955


 96%|█████████▌| 448/469 [00:03<00:00, 124.77it/s]

Step 2,300, (N samples: 294,400), Loss: 543.4411, (Recon: 543.4409, KL: 0.0002), Gradient norm: 1.1511


100%|██████████| 469/469 [00:03<00:00, 123.01it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 169.80it/s]


====> Test set loss: 543.4395, (BCE: 543.4395, KLD: 0.0001)
Epoch 6/50


 15%|█▌        | 71/469 [00:00<00:03, 120.36it/s]

Step 2,400, (N samples: 307,200), Loss: 543.4250, (Recon: 543.4249, KL: 0.0001), Gradient norm: 1.1590


 37%|███▋      | 175/469 [00:01<00:02, 124.90it/s]

Step 2,500, (N samples: 320,000), Loss: 543.4327, (Recon: 543.4316, KL: 0.0011), Gradient norm: 1.1742


 59%|█████▉    | 279/469 [00:02<00:01, 123.28it/s]

Step 2,600, (N samples: 332,800), Loss: 543.4335, (Recon: 543.4334, KL: 0.0001), Gradient norm: 1.1812


 79%|███████▉  | 370/469 [00:03<00:00, 125.00it/s]

Step 2,700, (N samples: 345,600), Loss: 543.4312, (Recon: 543.4311, KL: 0.0001), Gradient norm: 1.1933


100%|██████████| 469/469 [00:03<00:00, 122.80it/s]


Step 2,800, (N samples: 358,400), Loss: 543.4611, (Recon: 543.4609, KL: 0.0001), Gradient norm: 1.2528


Testing: 100%|██████████| 79/79 [00:00<00:00, 169.21it/s]


====> Test set loss: 543.4420, (BCE: 543.4416, KLD: 0.0004)
Epoch 7/50


 23%|██▎       | 110/469 [00:00<00:02, 122.45it/s]

Step 2,900, (N samples: 371,200), Loss: 543.4423, (Recon: 543.4421, KL: 0.0003), Gradient norm: 1.2227


 43%|████▎     | 201/469 [00:01<00:02, 124.88it/s]

Step 3,000, (N samples: 384,000), Loss: 543.4320, (Recon: 543.4319, KL: 0.0001), Gradient norm: 1.2006


 65%|██████▌   | 305/469 [00:02<00:01, 125.03it/s]

Step 3,100, (N samples: 396,800), Loss: 543.4354, (Recon: 543.4353, KL: 0.0001), Gradient norm: 1.1770


 87%|████████▋ | 409/469 [00:03<00:00, 125.05it/s]

Step 3,200, (N samples: 409,600), Loss: 543.4720, (Recon: 543.4717, KL: 0.0003), Gradient norm: 1.2478


100%|██████████| 469/469 [00:03<00:00, 122.84it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 168.68it/s]


====> Test set loss: 543.4414, (BCE: 543.4412, KLD: 0.0001)
Epoch 8/50


  7%|▋         | 32/469 [00:00<00:04, 105.44it/s]

Step 3,300, (N samples: 422,400), Loss: 543.4501, (Recon: 543.4498, KL: 0.0003), Gradient norm: 1.2099


 29%|██▉       | 136/469 [00:01<00:02, 124.19it/s]

Step 3,400, (N samples: 435,200), Loss: 543.4290, (Recon: 543.4288, KL: 0.0002), Gradient norm: 1.2681


 51%|█████     | 240/469 [00:01<00:01, 124.87it/s]

Step 3,500, (N samples: 448,000), Loss: 543.4312, (Recon: 543.4310, KL: 0.0002), Gradient norm: 1.3032


 71%|███████   | 331/469 [00:02<00:01, 124.93it/s]

Step 3,600, (N samples: 460,800), Loss: 543.4346, (Recon: 543.4344, KL: 0.0002), Gradient norm: 1.1997


 93%|█████████▎| 435/469 [00:03<00:00, 125.08it/s]

Step 3,700, (N samples: 473,600), Loss: 543.4294, (Recon: 543.4292, KL: 0.0002), Gradient norm: 1.1953


100%|██████████| 469/469 [00:03<00:00, 122.56it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 169.30it/s]


====> Test set loss: 543.4412, (BCE: 543.4410, KLD: 0.0002)
Epoch 9/50


 15%|█▌        | 71/469 [00:00<00:03, 120.56it/s]

Step 3,800, (N samples: 486,400), Loss: 543.4338, (Recon: 543.4337, KL: 0.0002), Gradient norm: 1.1916


 35%|███▍      | 162/469 [00:01<00:02, 124.68it/s]

Step 3,900, (N samples: 499,200), Loss: 543.4532, (Recon: 543.4531, KL: 0.0001), Gradient norm: 1.2569


 57%|█████▋    | 266/469 [00:02<00:01, 124.97it/s]

Step 4,000, (N samples: 512,000), Loss: 543.4341, (Recon: 543.4341, KL: 0.0000), Gradient norm: 1.2108


 79%|███████▉  | 370/469 [00:03<00:00, 125.04it/s]

Step 4,100, (N samples: 524,800), Loss: 543.4521, (Recon: 543.4517, KL: 0.0004), Gradient norm: 1.1920


100%|██████████| 469/469 [00:03<00:00, 122.88it/s]


Step 4,200, (N samples: 537,600), Loss: 543.4179, (Recon: 543.4177, KL: 0.0002), Gradient norm: 1.1642


Testing: 100%|██████████| 79/79 [00:00<00:00, 169.64it/s]


====> Test set loss: 543.4376, (BCE: 543.4375, KLD: 0.0000)
Epoch 10/50


 20%|██        | 96/469 [00:00<00:03, 122.79it/s]

Step 4,300, (N samples: 550,400), Loss: 543.4185, (Recon: 543.4179, KL: 0.0006), Gradient norm: 1.1778


 43%|████▎     | 200/469 [00:01<00:02, 125.05it/s]

Step 4,400, (N samples: 563,200), Loss: 543.4684, (Recon: 543.4680, KL: 0.0004), Gradient norm: 1.2418


 65%|██████▍   | 304/469 [00:02<00:01, 124.95it/s]

Step 4,500, (N samples: 576,000), Loss: 543.4243, (Recon: 543.4242, KL: 0.0001), Gradient norm: 1.2406


 84%|████████▍ | 395/469 [00:03<00:00, 125.24it/s]

Step 4,600, (N samples: 588,800), Loss: 543.4290, (Recon: 543.4287, KL: 0.0003), Gradient norm: 1.1798


100%|██████████| 469/469 [00:03<00:00, 122.84it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 170.15it/s]


====> Test set loss: 543.4370, (BCE: 543.4370, KLD: 0.0000)
Epoch 11/50


  2%|▏         | 9/469 [00:00<00:05, 87.82it/s]

Step 4,700, (N samples: 601,600), Loss: 543.4525, (Recon: 543.4524, KL: 0.0000), Gradient norm: 1.2250


 29%|██▉       | 135/469 [00:01<00:02, 123.78it/s]

Step 4,800, (N samples: 614,400), Loss: 543.4368, (Recon: 543.4367, KL: 0.0000), Gradient norm: 1.2452


 48%|████▊     | 226/469 [00:01<00:01, 125.26it/s]

Step 4,900, (N samples: 627,200), Loss: 543.4593, (Recon: 543.4593, KL: 0.0000), Gradient norm: 1.2459


 70%|███████   | 330/469 [00:02<00:01, 125.32it/s]

Step 5,000, (N samples: 640,000), Loss: 543.4431, (Recon: 543.4430, KL: 0.0000), Gradient norm: 1.2323


 93%|█████████▎| 434/469 [00:03<00:00, 125.40it/s]

Step 5,100, (N samples: 652,800), Loss: 543.4240, (Recon: 543.4240, KL: 0.0000), Gradient norm: 1.2564


100%|██████████| 469/469 [00:03<00:00, 122.93it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 169.93it/s]


====> Test set loss: 543.4420, (BCE: 543.4420, KLD: 0.0000)
Epoch 12/50


 12%|█▏        | 58/469 [00:00<00:03, 118.22it/s]

Step 5,200, (N samples: 665,600), Loss: 543.4377, (Recon: 543.4377, KL: 0.0000), Gradient norm: 1.2419


 35%|███▍      | 162/469 [00:01<00:02, 125.14it/s]

Step 5,300, (N samples: 678,400), Loss: 543.4526, (Recon: 543.4526, KL: 0.0000), Gradient norm: 1.2480


 57%|█████▋    | 266/469 [00:02<00:01, 125.14it/s]

Step 5,400, (N samples: 691,200), Loss: 543.4461, (Recon: 543.4460, KL: 0.0000), Gradient norm: 1.3234


 76%|███████▌  | 357/469 [00:02<00:00, 125.39it/s]

Step 5,500, (N samples: 704,000), Loss: 543.4382, (Recon: 543.4382, KL: 0.0000), Gradient norm: 1.2297


 98%|█████████▊| 461/469 [00:03<00:00, 125.10it/s]

Step 5,600, (N samples: 716,800), Loss: 543.4360, (Recon: 543.4360, KL: 0.0000), Gradient norm: 1.2590


100%|██████████| 469/469 [00:03<00:00, 123.18it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 169.97it/s]


====> Test set loss: 543.4409, (BCE: 543.4409, KLD: 0.0000)
Epoch 13/50


 21%|██        | 97/469 [00:00<00:03, 123.15it/s]

Step 5,700, (N samples: 729,600), Loss: 543.4535, (Recon: 543.4535, KL: 0.0000), Gradient norm: 1.2862


 40%|████      | 188/469 [00:01<00:02, 125.32it/s]

Step 5,800, (N samples: 742,400), Loss: 543.4572, (Recon: 543.4572, KL: 0.0000), Gradient norm: 1.2137


 62%|██████▏   | 292/469 [00:02<00:01, 123.74it/s]

Step 5,900, (N samples: 755,200), Loss: 543.4463, (Recon: 543.4463, KL: 0.0000), Gradient norm: 1.2392


 84%|████████▍ | 396/469 [00:03<00:00, 125.29it/s]

Step 6,000, (N samples: 768,000), Loss: 543.4674, (Recon: 543.4674, KL: 0.0000), Gradient norm: 1.2449


100%|██████████| 469/469 [00:03<00:00, 123.17it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 169.89it/s]


====> Test set loss: 543.4419, (BCE: 543.4419, KLD: 0.0000)
Epoch 14/50


  2%|▏         | 9/469 [00:00<00:05, 86.66it/s]

Step 6,100, (N samples: 780,800), Loss: 543.4531, (Recon: 543.4531, KL: 0.0000), Gradient norm: 1.1670


 26%|██▌       | 123/469 [00:01<00:02, 122.95it/s]

Step 6,200, (N samples: 793,600), Loss: 543.4615, (Recon: 543.4615, KL: 0.0000), Gradient norm: 1.2303


 48%|████▊     | 227/469 [00:01<00:01, 125.21it/s]

Step 6,300, (N samples: 806,400), Loss: 543.4268, (Recon: 543.4268, KL: 0.0000), Gradient norm: 1.2701


 68%|██████▊   | 318/469 [00:02<00:01, 125.51it/s]

Step 6,400, (N samples: 819,200), Loss: 543.4082, (Recon: 543.4082, KL: 0.0000), Gradient norm: 1.2363


 90%|████████▉ | 422/469 [00:03<00:00, 125.29it/s]

Step 6,500, (N samples: 832,000), Loss: 543.4295, (Recon: 543.4294, KL: 0.0000), Gradient norm: 1.3022


100%|██████████| 469/469 [00:03<00:00, 123.11it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 170.20it/s]


====> Test set loss: 543.4406, (BCE: 543.4406, KLD: 0.0000)
Epoch 15/50


 12%|█▏        | 58/469 [00:00<00:03, 117.64it/s]

Step 6,600, (N samples: 844,800), Loss: 543.4468, (Recon: 543.4468, KL: 0.0000), Gradient norm: 1.2732


 32%|███▏      | 149/469 [00:01<00:02, 124.20it/s]

Step 6,700, (N samples: 857,600), Loss: 543.4328, (Recon: 543.4328, KL: 0.0000), Gradient norm: 1.2034


 54%|█████▍    | 253/469 [00:02<00:01, 125.42it/s]

Step 6,800, (N samples: 870,400), Loss: 543.4360, (Recon: 543.4360, KL: 0.0000), Gradient norm: 1.2948


 76%|███████▌  | 357/469 [00:02<00:00, 125.23it/s]

Step 6,900, (N samples: 883,200), Loss: 543.4616, (Recon: 543.4616, KL: 0.0000), Gradient norm: 1.4094


 96%|█████████▌| 448/469 [00:03<00:00, 124.01it/s]

Step 7,000, (N samples: 896,000), Loss: 543.4431, (Recon: 543.4431, KL: 0.0000), Gradient norm: 1.2276


100%|██████████| 469/469 [00:03<00:00, 122.49it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 168.73it/s]


====> Test set loss: 543.4442, (BCE: 543.4442, KLD: 0.0000)
Epoch 16/50


 18%|█▊        | 84/469 [00:00<00:03, 119.76it/s]

Step 7,100, (N samples: 908,800), Loss: 543.4290, (Recon: 543.4290, KL: 0.0000), Gradient norm: 1.2404


 40%|███▉      | 187/469 [00:01<00:02, 122.81it/s]

Step 7,200, (N samples: 921,600), Loss: 543.4431, (Recon: 543.4431, KL: 0.0000), Gradient norm: 1.3038


 59%|█████▉    | 278/469 [00:02<00:01, 124.42it/s]

Step 7,300, (N samples: 934,400), Loss: 543.4362, (Recon: 543.4362, KL: 0.0000), Gradient norm: 1.3244


 81%|████████▏ | 382/469 [00:03<00:00, 124.99it/s]

Step 7,400, (N samples: 947,200), Loss: 543.4407, (Recon: 543.4407, KL: 0.0000), Gradient norm: 1.2890


100%|██████████| 469/469 [00:03<00:00, 121.57it/s]


Step 7,500, (N samples: 960,000), Loss: 543.4532, (Recon: 543.4532, KL: 0.0000), Gradient norm: 1.3619


Testing: 100%|██████████| 79/79 [00:00<00:00, 170.06it/s]


====> Test set loss: 543.4424, (BCE: 543.4424, KLD: 0.0000)
Epoch 17/50


 23%|██▎       | 110/469 [00:00<00:02, 123.78it/s]

Step 7,600, (N samples: 972,800), Loss: 543.4378, (Recon: 543.4377, KL: 0.0000), Gradient norm: 1.2545


 46%|████▌     | 214/469 [00:01<00:02, 125.22it/s]

Step 7,700, (N samples: 985,600), Loss: 543.4564, (Recon: 543.4564, KL: 0.0000), Gradient norm: 1.2838


 68%|██████▊   | 318/469 [00:02<00:01, 123.43it/s]

Step 7,800, (N samples: 998,400), Loss: 543.4442, (Recon: 543.4442, KL: 0.0000), Gradient norm: 1.2779


 87%|████████▋ | 409/469 [00:03<00:00, 124.49it/s]

Step 7,900, (N samples: 1,011,200), Loss: 543.4290, (Recon: 543.4290, KL: 0.0000), Gradient norm: 1.3462


100%|██████████| 469/469 [00:03<00:00, 122.92it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 169.92it/s]


====> Test set loss: 543.4418, (BCE: 543.4418, KLD: 0.0000)
Epoch 18/50


 10%|▉         | 45/469 [00:00<00:03, 113.50it/s]

Step 8,000, (N samples: 1,024,000), Loss: 543.4590, (Recon: 543.4590, KL: 0.0000), Gradient norm: 1.4040


 32%|███▏      | 149/469 [00:01<00:02, 124.62it/s]

Step 8,100, (N samples: 1,036,800), Loss: 543.4366, (Recon: 543.4365, KL: 0.0000), Gradient norm: 1.2691


 54%|█████▍    | 253/469 [00:02<00:01, 124.43it/s]

Step 8,200, (N samples: 1,049,600), Loss: 543.4567, (Recon: 543.4567, KL: 0.0000), Gradient norm: 1.3735


 73%|███████▎  | 344/469 [00:02<00:01, 123.90it/s]

Step 8,300, (N samples: 1,062,400), Loss: 543.4424, (Recon: 543.4424, KL: 0.0000), Gradient norm: 1.3252


 96%|█████████▌| 448/469 [00:03<00:00, 125.04it/s]

Step 8,400, (N samples: 1,075,200), Loss: 543.4375, (Recon: 543.4375, KL: 0.0000), Gradient norm: 1.2999


100%|██████████| 469/469 [00:03<00:00, 122.46it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 169.22it/s]


====> Test set loss: 543.4417, (BCE: 543.4417, KLD: 0.0000)
Epoch 19/50


 18%|█▊        | 84/469 [00:00<00:03, 122.02it/s]

Step 8,500, (N samples: 1,088,000), Loss: 543.4221, (Recon: 543.4221, KL: 0.0000), Gradient norm: 1.2515


 37%|███▋      | 175/469 [00:01<00:02, 125.31it/s]

Step 8,600, (N samples: 1,100,800), Loss: 543.4358, (Recon: 543.4358, KL: 0.0000), Gradient norm: 1.3050


 59%|█████▉    | 279/469 [00:02<00:01, 124.62it/s]

Step 8,700, (N samples: 1,113,600), Loss: 543.4381, (Recon: 543.4381, KL: 0.0000), Gradient norm: 1.2413


 82%|████████▏ | 383/469 [00:03<00:00, 124.72it/s]

Step 8,800, (N samples: 1,126,400), Loss: 543.4341, (Recon: 543.4341, KL: 0.0000), Gradient norm: 1.3592


100%|██████████| 469/469 [00:03<00:00, 122.95it/s]


Step 8,900, (N samples: 1,139,200), Loss: 543.4749, (Recon: 543.4749, KL: 0.0000), Gradient norm: 1.3219


Testing: 100%|██████████| 79/79 [00:00<00:00, 169.11it/s]


====> Test set loss: 543.4403, (BCE: 543.4403, KLD: 0.0000)
Epoch 20/50


 23%|██▎       | 110/469 [00:00<00:02, 124.01it/s]

Step 9,000, (N samples: 1,152,000), Loss: 543.4285, (Recon: 543.4285, KL: 0.0000), Gradient norm: 1.2639


 46%|████▌     | 214/469 [00:01<00:02, 124.59it/s]

Step 9,100, (N samples: 1,164,800), Loss: 543.4271, (Recon: 543.4271, KL: 0.0000), Gradient norm: 1.3013


 65%|██████▌   | 305/469 [00:02<00:01, 124.65it/s]

Step 9,200, (N samples: 1,177,600), Loss: 543.4370, (Recon: 543.4370, KL: 0.0000), Gradient norm: 1.3589


 87%|████████▋ | 409/469 [00:03<00:00, 125.08it/s]

Step 9,300, (N samples: 1,190,400), Loss: 543.4410, (Recon: 543.4410, KL: 0.0000), Gradient norm: 1.2791


100%|██████████| 469/469 [00:03<00:00, 123.03it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 167.20it/s]


====> Test set loss: 543.4418, (BCE: 543.4418, KLD: 0.0000)
Epoch 21/50


 10%|▉         | 45/469 [00:00<00:03, 113.99it/s]

Step 9,400, (N samples: 1,203,200), Loss: 543.4281, (Recon: 543.4281, KL: 0.0000), Gradient norm: 1.3090


 29%|██▉       | 136/469 [00:01<00:02, 122.78it/s]

Step 9,500, (N samples: 1,216,000), Loss: 543.4326, (Recon: 543.4326, KL: 0.0000), Gradient norm: 1.3421


 51%|█████     | 240/469 [00:01<00:01, 122.30it/s]

Step 9,600, (N samples: 1,228,800), Loss: 543.4343, (Recon: 543.4343, KL: 0.0000), Gradient norm: 1.3025


 73%|███████▎  | 344/469 [00:02<00:01, 124.25it/s]

Step 9,700, (N samples: 1,241,600), Loss: 543.4413, (Recon: 543.4413, KL: 0.0000), Gradient norm: 1.4288


 93%|█████████▎| 435/469 [00:03<00:00, 125.31it/s]

Step 9,800, (N samples: 1,254,400), Loss: 543.4288, (Recon: 543.4287, KL: 0.0001), Gradient norm: 1.3259


100%|██████████| 469/469 [00:03<00:00, 122.42it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 168.64it/s]


====> Test set loss: 543.4414, (BCE: 543.4413, KLD: 0.0001)
Epoch 22/50


 15%|█▌        | 71/469 [00:00<00:03, 119.36it/s]

Step 9,900, (N samples: 1,267,200), Loss: 543.4423, (Recon: 543.4423, KL: 0.0000), Gradient norm: 1.5060


 37%|███▋      | 175/469 [00:01<00:02, 124.10it/s]

Step 10,000, (N samples: 1,280,000), Loss: 543.4449, (Recon: 543.4449, KL: 0.0000), Gradient norm: 1.3877


 57%|█████▋    | 266/469 [00:02<00:01, 123.76it/s]

Step 10,100, (N samples: 1,292,800), Loss: 543.4480, (Recon: 543.4480, KL: 0.0000), Gradient norm: 1.3972


 79%|███████▉  | 370/469 [00:03<00:00, 125.16it/s]

Step 10,200, (N samples: 1,305,600), Loss: 543.4220, (Recon: 543.4219, KL: 0.0000), Gradient norm: 1.3181


100%|██████████| 469/469 [00:03<00:00, 122.37it/s]


Step 10,300, (N samples: 1,318,400), Loss: 543.4301, (Recon: 543.4301, KL: 0.0000), Gradient norm: 1.3718


Testing: 100%|██████████| 79/79 [00:00<00:00, 169.32it/s]


====> Test set loss: 543.4402, (BCE: 543.4402, KLD: 0.0000)
Epoch 23/50


 21%|██        | 97/469 [00:00<00:03, 120.97it/s]

Step 10,400, (N samples: 1,331,200), Loss: 543.4377, (Recon: 543.4376, KL: 0.0001), Gradient norm: 1.3689


 43%|████▎     | 201/469 [00:01<00:02, 122.84it/s]

Step 10,500, (N samples: 1,344,000), Loss: 543.4562, (Recon: 543.4559, KL: 0.0003), Gradient norm: 1.4350


 65%|██████▌   | 305/469 [00:02<00:01, 121.76it/s]

Step 10,600, (N samples: 1,356,800), Loss: 543.4465, (Recon: 543.4465, KL: 0.0001), Gradient norm: 1.3762


 84%|████████▍ | 396/469 [00:03<00:00, 124.74it/s]

Step 10,700, (N samples: 1,369,600), Loss: 543.4459, (Recon: 543.4458, KL: 0.0001), Gradient norm: 1.4217


100%|██████████| 469/469 [00:03<00:00, 121.41it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 169.60it/s]


====> Test set loss: 543.4430, (BCE: 543.4429, KLD: 0.0001)
Epoch 24/50


  2%|▏         | 9/469 [00:00<00:05, 82.12it/s]

Step 10,800, (N samples: 1,382,400), Loss: 543.4216, (Recon: 543.4214, KL: 0.0003), Gradient norm: 1.3398


 29%|██▉       | 136/469 [00:01<00:02, 124.66it/s]

Step 10,900, (N samples: 1,395,200), Loss: 543.4354, (Recon: 543.4354, KL: 0.0000), Gradient norm: 1.3027


 48%|████▊     | 227/469 [00:01<00:01, 124.59it/s]

Step 11,000, (N samples: 1,408,000), Loss: 543.4540, (Recon: 543.4540, KL: 0.0000), Gradient norm: 1.3232


 71%|███████   | 331/469 [00:02<00:01, 124.97it/s]

Step 11,100, (N samples: 1,420,800), Loss: 543.4384, (Recon: 543.4381, KL: 0.0003), Gradient norm: 1.2548


 93%|█████████▎| 435/469 [00:03<00:00, 124.27it/s]

Step 11,200, (N samples: 1,433,600), Loss: 543.4578, (Recon: 543.4578, KL: 0.0000), Gradient norm: 1.4608


100%|██████████| 469/469 [00:03<00:00, 122.81it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 169.06it/s]


====> Test set loss: 543.4408, (BCE: 543.4408, KLD: 0.0000)
Epoch 25/50


 12%|█▏        | 58/469 [00:00<00:03, 117.84it/s]

Step 11,300, (N samples: 1,446,400), Loss: 543.4209, (Recon: 543.4209, KL: 0.0000), Gradient norm: 1.4282


 35%|███▍      | 162/469 [00:01<00:02, 124.20it/s]

Step 11,400, (N samples: 1,459,200), Loss: 543.4444, (Recon: 543.4444, KL: 0.0000), Gradient norm: 1.3980


 57%|█████▋    | 266/469 [00:02<00:01, 124.19it/s]

Step 11,500, (N samples: 1,472,000), Loss: 543.4358, (Recon: 543.4358, KL: 0.0000), Gradient norm: 1.3038


 79%|███████▉  | 370/469 [00:03<00:00, 124.67it/s]

Step 11,600, (N samples: 1,484,800), Loss: 543.4744, (Recon: 543.4744, KL: 0.0000), Gradient norm: 1.3918


 98%|█████████▊| 461/469 [00:03<00:00, 125.03it/s]

Step 11,700, (N samples: 1,497,600), Loss: 543.4645, (Recon: 543.4645, KL: 0.0000), Gradient norm: 1.4702


100%|██████████| 469/469 [00:03<00:00, 122.32it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 169.44it/s]


====> Test set loss: 543.4373, (BCE: 543.4372, KLD: 0.0001)
Epoch 26/50


 21%|██        | 97/469 [00:00<00:03, 122.47it/s]

Step 11,800, (N samples: 1,510,400), Loss: 543.4443, (Recon: 543.4443, KL: 0.0000), Gradient norm: 1.3480


 43%|████▎     | 201/469 [00:01<00:02, 124.45it/s]

Step 11,900, (N samples: 1,523,200), Loss: 543.4478, (Recon: 543.4478, KL: 0.0000), Gradient norm: 1.3175


 62%|██████▏   | 292/469 [00:02<00:01, 124.54it/s]

Step 12,000, (N samples: 1,536,000), Loss: 543.4438, (Recon: 543.4438, KL: 0.0000), Gradient norm: 1.3855


 84%|████████▍ | 396/469 [00:03<00:00, 125.02it/s]

Step 12,100, (N samples: 1,548,800), Loss: 543.4536, (Recon: 543.4536, KL: 0.0000), Gradient norm: 1.4090


100%|██████████| 469/469 [00:03<00:00, 122.80it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 170.12it/s]


====> Test set loss: 543.4399, (BCE: 543.4398, KLD: 0.0000)
Epoch 27/50


  2%|▏         | 9/469 [00:00<00:05, 86.17it/s]

Step 12,200, (N samples: 1,561,600), Loss: 543.4451, (Recon: 543.4451, KL: 0.0000), Gradient norm: 1.3640


 26%|██▌       | 123/469 [00:01<00:02, 123.13it/s]

Step 12,300, (N samples: 1,574,400), Loss: 543.4405, (Recon: 543.4405, KL: 0.0000), Gradient norm: 1.4434


 48%|████▊     | 227/469 [00:01<00:01, 124.60it/s]

Step 12,400, (N samples: 1,587,200), Loss: 543.4301, (Recon: 543.4301, KL: 0.0000), Gradient norm: 1.3373


 71%|███████   | 331/469 [00:02<00:01, 124.52it/s]

Step 12,500, (N samples: 1,600,000), Loss: 543.4570, (Recon: 543.4570, KL: 0.0000), Gradient norm: 1.3371


 90%|████████▉ | 422/469 [00:03<00:00, 123.77it/s]

Step 12,600, (N samples: 1,612,800), Loss: 543.4554, (Recon: 543.4554, KL: 0.0000), Gradient norm: 1.4967


100%|██████████| 469/469 [00:03<00:00, 122.25it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 167.81it/s]


====> Test set loss: 543.4434, (BCE: 543.4433, KLD: 0.0001)
Epoch 28/50


 12%|█▏        | 58/469 [00:00<00:03, 117.54it/s]

Step 12,700, (N samples: 1,625,600), Loss: 543.4556, (Recon: 543.4555, KL: 0.0001), Gradient norm: 1.4381


 35%|███▍      | 162/469 [00:01<00:02, 121.25it/s]

Step 12,800, (N samples: 1,638,400), Loss: 543.4404, (Recon: 543.4404, KL: 0.0000), Gradient norm: 1.3456


 54%|█████▍    | 253/469 [00:02<00:01, 123.56it/s]

Step 12,900, (N samples: 1,651,200), Loss: 543.4575, (Recon: 543.4575, KL: 0.0000), Gradient norm: 1.3727


 76%|███████▌  | 357/469 [00:02<00:00, 124.69it/s]

Step 13,000, (N samples: 1,664,000), Loss: 543.4405, (Recon: 543.4404, KL: 0.0001), Gradient norm: 1.4549


 98%|█████████▊| 461/469 [00:03<00:00, 124.90it/s]

Step 13,100, (N samples: 1,676,800), Loss: 543.4362, (Recon: 543.4362, KL: 0.0000), Gradient norm: 1.4243


100%|██████████| 469/469 [00:03<00:00, 121.84it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 169.80it/s]


====> Test set loss: 543.4406, (BCE: 543.4406, KLD: 0.0001)
Epoch 29/50


 18%|█▊        | 84/469 [00:00<00:03, 122.27it/s]

Step 13,200, (N samples: 1,689,600), Loss: 543.4346, (Recon: 543.4345, KL: 0.0000), Gradient norm: 1.4054


 40%|████      | 188/469 [00:01<00:02, 125.20it/s]

Step 13,300, (N samples: 1,702,400), Loss: 543.4433, (Recon: 543.4433, KL: 0.0000), Gradient norm: 1.4401


 62%|██████▏   | 292/469 [00:02<00:01, 125.06it/s]

Step 13,400, (N samples: 1,715,200), Loss: 543.4615, (Recon: 543.4615, KL: 0.0000), Gradient norm: 1.4304


 82%|████████▏ | 383/469 [00:03<00:00, 125.14it/s]

Step 13,500, (N samples: 1,728,000), Loss: 543.4299, (Recon: 543.4298, KL: 0.0001), Gradient norm: 1.3003


100%|██████████| 469/469 [00:03<00:00, 123.19it/s]


Step 13,600, (N samples: 1,740,800), Loss: 543.4335, (Recon: 543.4335, KL: 0.0000), Gradient norm: 1.7276


Testing: 100%|██████████| 79/79 [00:00<00:00, 170.10it/s]


====> Test set loss: 543.4396, (BCE: 543.4395, KLD: 0.0000)
Epoch 30/50


 26%|██▌       | 123/469 [00:01<00:02, 124.38it/s]

Step 13,700, (N samples: 1,753,600), Loss: 543.4627, (Recon: 543.4624, KL: 0.0003), Gradient norm: 1.4660


 46%|████▌     | 214/469 [00:01<00:02, 124.77it/s]

Step 13,800, (N samples: 1,766,400), Loss: 543.4414, (Recon: 543.4414, KL: 0.0000), Gradient norm: 1.5301


 68%|██████▊   | 318/469 [00:02<00:01, 123.63it/s]

Step 13,900, (N samples: 1,779,200), Loss: 543.4291, (Recon: 543.4291, KL: 0.0000), Gradient norm: 1.3774


 90%|████████▉ | 422/469 [00:03<00:00, 124.98it/s]

Step 14,000, (N samples: 1,792,000), Loss: 543.4551, (Recon: 543.4551, KL: 0.0000), Gradient norm: 1.3515


100%|██████████| 469/469 [00:03<00:00, 122.79it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 170.08it/s]


====> Test set loss: 543.4402, (BCE: 543.4402, KLD: 0.0000)
Epoch 31/50


 10%|▉         | 45/469 [00:00<00:03, 113.86it/s]

Step 14,100, (N samples: 1,804,800), Loss: 543.4318, (Recon: 543.4318, KL: 0.0000), Gradient norm: 1.2990


 32%|███▏      | 149/469 [00:01<00:02, 124.93it/s]

Step 14,200, (N samples: 1,817,600), Loss: 543.4339, (Recon: 543.4338, KL: 0.0000), Gradient norm: 1.4106


 54%|█████▍    | 253/469 [00:02<00:01, 125.48it/s]

Step 14,300, (N samples: 1,830,400), Loss: 543.4171, (Recon: 543.4171, KL: 0.0000), Gradient norm: 1.3172


 73%|███████▎  | 344/469 [00:02<00:00, 125.03it/s]

Step 14,400, (N samples: 1,843,200), Loss: 543.4210, (Recon: 543.4210, KL: 0.0000), Gradient norm: 1.3510


 94%|█████████▍| 442/469 [00:03<00:00, 90.03it/s] 

Step 14,500, (N samples: 1,856,000), Loss: 543.4211, (Recon: 543.4211, KL: 0.0000), Gradient norm: 1.4715


100%|██████████| 469/469 [00:04<00:00, 113.57it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 150.37it/s]


====> Test set loss: 543.4406, (BCE: 543.4405, KLD: 0.0001)
Epoch 32/50


 18%|█▊        | 84/469 [00:00<00:03, 122.31it/s]

Step 14,600, (N samples: 1,868,800), Loss: 543.4337, (Recon: 543.4335, KL: 0.0002), Gradient norm: 1.4802


 37%|███▋      | 175/469 [00:01<00:02, 125.13it/s]

Step 14,700, (N samples: 1,881,600), Loss: 543.4389, (Recon: 543.4389, KL: 0.0000), Gradient norm: 1.4360


 59%|█████▉    | 279/469 [00:02<00:01, 125.38it/s]

Step 14,800, (N samples: 1,894,400), Loss: 543.4252, (Recon: 543.4252, KL: 0.0000), Gradient norm: 1.4167


 82%|████████▏ | 383/469 [00:03<00:00, 125.16it/s]

Step 14,900, (N samples: 1,907,200), Loss: 543.4574, (Recon: 543.4573, KL: 0.0001), Gradient norm: 1.4648


100%|██████████| 469/469 [00:03<00:00, 122.95it/s]


Step 15,000, (N samples: 1,920,000), Loss: 543.4620, (Recon: 543.4620, KL: 0.0000), Gradient norm: 1.4401


Testing: 100%|██████████| 79/79 [00:00<00:00, 169.54it/s]


====> Test set loss: 543.4402, (BCE: 543.4402, KLD: 0.0000)
Epoch 33/50


 23%|██▎       | 110/469 [00:00<00:02, 124.03it/s]

Step 15,100, (N samples: 1,932,800), Loss: 543.4513, (Recon: 543.4501, KL: 0.0012), Gradient norm: 1.4309


 46%|████▌     | 214/469 [00:01<00:02, 125.53it/s]

Step 15,200, (N samples: 1,945,600), Loss: 543.4279, (Recon: 543.4279, KL: 0.0000), Gradient norm: 1.3523


 68%|██████▊   | 318/469 [00:02<00:01, 125.50it/s]

Step 15,300, (N samples: 1,958,400), Loss: 543.4495, (Recon: 543.4495, KL: 0.0000), Gradient norm: 1.3535


 87%|████████▋ | 409/469 [00:03<00:00, 125.41it/s]

Step 15,400, (N samples: 1,971,200), Loss: 543.4408, (Recon: 543.4408, KL: 0.0000), Gradient norm: 1.3430


100%|██████████| 469/469 [00:03<00:00, 123.51it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 169.84it/s]


====> Test set loss: 543.4445, (BCE: 543.4445, KLD: 0.0000)
Epoch 34/50


 10%|▉         | 45/469 [00:00<00:03, 113.79it/s]

Step 15,500, (N samples: 1,984,000), Loss: 543.4301, (Recon: 543.4301, KL: 0.0000), Gradient norm: 1.3613


 32%|███▏      | 149/469 [00:01<00:02, 124.87it/s]

Step 15,600, (N samples: 1,996,800), Loss: 543.4277, (Recon: 543.4277, KL: 0.0000), Gradient norm: 1.3747


 51%|█████     | 240/469 [00:01<00:01, 125.59it/s]

Step 15,700, (N samples: 2,009,600), Loss: 543.4565, (Recon: 543.4564, KL: 0.0001), Gradient norm: 1.3840


 73%|███████▎  | 344/469 [00:02<00:00, 125.72it/s]

Step 15,800, (N samples: 2,022,400), Loss: 543.4409, (Recon: 543.4409, KL: 0.0000), Gradient norm: 1.2952


 96%|█████████▌| 448/469 [00:03<00:00, 125.11it/s]

Step 15,900, (N samples: 2,035,200), Loss: 543.4495, (Recon: 543.4495, KL: 0.0000), Gradient norm: 1.3585


100%|██████████| 469/469 [00:03<00:00, 123.25it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 168.98it/s]


====> Test set loss: 543.4399, (BCE: 543.4399, KLD: 0.0000)
Epoch 35/50


 15%|█▌        | 71/469 [00:00<00:03, 120.28it/s]

Step 16,000, (N samples: 2,048,000), Loss: 543.4316, (Recon: 543.4316, KL: 0.0000), Gradient norm: 1.3804


 37%|███▋      | 175/469 [00:01<00:02, 124.78it/s]

Step 16,100, (N samples: 2,060,800), Loss: 543.4723, (Recon: 543.4723, KL: 0.0000), Gradient norm: 1.4238


 59%|█████▉    | 279/469 [00:02<00:01, 125.42it/s]

Step 16,200, (N samples: 2,073,600), Loss: 543.4528, (Recon: 543.4528, KL: 0.0000), Gradient norm: 1.4630


 79%|███████▉  | 370/469 [00:03<00:00, 125.44it/s]

Step 16,300, (N samples: 2,086,400), Loss: 543.4502, (Recon: 543.4502, KL: 0.0000), Gradient norm: 1.3662


100%|██████████| 469/469 [00:03<00:00, 123.11it/s]


Step 16,400, (N samples: 2,099,200), Loss: 543.4451, (Recon: 543.4451, KL: 0.0000), Gradient norm: 1.4249


Testing: 100%|██████████| 79/79 [00:00<00:00, 170.26it/s]


====> Test set loss: 543.4387, (BCE: 543.4387, KLD: 0.0000)
Epoch 36/50


 23%|██▎       | 110/469 [00:00<00:02, 123.99it/s]

Step 16,500, (N samples: 2,112,000), Loss: 543.4327, (Recon: 543.4327, KL: 0.0000), Gradient norm: 1.4160


 43%|████▎     | 201/469 [00:01<00:02, 125.12it/s]

Step 16,600, (N samples: 2,124,800), Loss: 543.4424, (Recon: 543.4424, KL: 0.0000), Gradient norm: 1.4534


 65%|██████▌   | 305/469 [00:02<00:01, 124.95it/s]

Step 16,700, (N samples: 2,137,600), Loss: 543.4497, (Recon: 543.4497, KL: 0.0000), Gradient norm: 1.4859


 87%|████████▋ | 409/469 [00:03<00:00, 125.48it/s]

Step 16,800, (N samples: 2,150,400), Loss: 543.4324, (Recon: 543.4324, KL: 0.0000), Gradient norm: 1.5260


100%|██████████| 469/469 [00:03<00:00, 123.25it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 169.74it/s]


====> Test set loss: 543.4455, (BCE: 543.4455, KLD: 0.0000)
Epoch 37/50


  7%|▋         | 32/469 [00:00<00:04, 106.72it/s]

Step 16,900, (N samples: 2,163,200), Loss: 543.4227, (Recon: 543.4227, KL: 0.0000), Gradient norm: 1.5144


 29%|██▉       | 136/469 [00:01<00:02, 122.54it/s]

Step 17,000, (N samples: 2,176,000), Loss: 543.4625, (Recon: 543.4625, KL: 0.0000), Gradient norm: 1.4427


 51%|█████     | 240/469 [00:01<00:01, 125.27it/s]

Step 17,100, (N samples: 2,188,800), Loss: 543.4495, (Recon: 543.4495, KL: 0.0000), Gradient norm: 1.4061


 71%|███████   | 331/469 [00:02<00:01, 124.91it/s]

Step 17,200, (N samples: 2,201,600), Loss: 543.4390, (Recon: 543.4390, KL: 0.0000), Gradient norm: 1.5117


 93%|█████████▎| 435/469 [00:03<00:00, 125.11it/s]

Step 17,300, (N samples: 2,214,400), Loss: 543.4380, (Recon: 543.4380, KL: 0.0000), Gradient norm: 1.3983


100%|██████████| 469/469 [00:03<00:00, 122.84it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 169.95it/s]


====> Test set loss: 543.4386, (BCE: 543.4386, KLD: 0.0000)
Epoch 38/50


 15%|█▌        | 71/469 [00:00<00:03, 120.77it/s]

Step 17,400, (N samples: 2,227,200), Loss: 543.4644, (Recon: 543.4644, KL: 0.0000), Gradient norm: 1.3888


 35%|███▍      | 162/469 [00:01<00:02, 125.19it/s]

Step 17,500, (N samples: 2,240,000), Loss: 543.4397, (Recon: 543.4397, KL: 0.0000), Gradient norm: 1.4508


 57%|█████▋    | 266/469 [00:02<00:01, 125.50it/s]

Step 17,600, (N samples: 2,252,800), Loss: 543.4452, (Recon: 543.4452, KL: 0.0000), Gradient norm: 1.4498


 79%|███████▉  | 370/469 [00:03<00:00, 124.68it/s]

Step 17,700, (N samples: 2,265,600), Loss: 543.4478, (Recon: 543.4478, KL: 0.0000), Gradient norm: 1.4189


100%|██████████| 469/469 [00:03<00:00, 123.34it/s]


Step 17,800, (N samples: 2,278,400), Loss: 543.4310, (Recon: 543.4310, KL: 0.0000), Gradient norm: 1.5421


Testing: 100%|██████████| 79/79 [00:00<00:00, 169.03it/s]


====> Test set loss: 543.4413, (BCE: 543.4413, KLD: 0.0000)
Epoch 39/50


 21%|██        | 97/469 [00:00<00:03, 123.21it/s]

Step 17,900, (N samples: 2,291,200), Loss: 543.4374, (Recon: 543.4374, KL: 0.0000), Gradient norm: 1.4431


 43%|████▎     | 201/469 [00:01<00:02, 125.32it/s]

Step 18,000, (N samples: 2,304,000), Loss: 543.4224, (Recon: 543.4224, KL: 0.0000), Gradient norm: 1.4470


 62%|██████▏   | 292/469 [00:02<00:01, 125.50it/s]

Step 18,100, (N samples: 2,316,800), Loss: 543.4388, (Recon: 543.4388, KL: 0.0000), Gradient norm: 1.3778


 84%|████████▍ | 396/469 [00:03<00:00, 125.05it/s]

Step 18,200, (N samples: 2,329,600), Loss: 543.4374, (Recon: 543.4374, KL: 0.0000), Gradient norm: 1.3987


100%|██████████| 469/469 [00:03<00:00, 122.93it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 169.72it/s]


====> Test set loss: 543.4408, (BCE: 543.4408, KLD: 0.0000)
Epoch 40/50


  2%|▏         | 9/469 [00:00<00:05, 83.01it/s]

Step 18,300, (N samples: 2,342,400), Loss: 543.4421, (Recon: 543.4421, KL: 0.0000), Gradient norm: 1.3945


 26%|██▌       | 123/469 [00:01<00:02, 124.21it/s]

Step 18,400, (N samples: 2,355,200), Loss: 543.4491, (Recon: 543.4491, KL: 0.0000), Gradient norm: 1.3783


 48%|████▊     | 227/469 [00:01<00:01, 125.42it/s]

Step 18,500, (N samples: 2,368,000), Loss: 543.4209, (Recon: 543.4209, KL: 0.0000), Gradient norm: 1.5492


 71%|███████   | 331/469 [00:02<00:01, 125.17it/s]

Step 18,600, (N samples: 2,380,800), Loss: 543.4380, (Recon: 543.4380, KL: 0.0000), Gradient norm: 1.4170


 93%|█████████▎| 435/469 [00:03<00:00, 125.45it/s]

Step 18,700, (N samples: 2,393,600), Loss: 543.4221, (Recon: 543.4221, KL: 0.0000), Gradient norm: 1.3923


100%|██████████| 469/469 [00:03<00:00, 123.15it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 169.59it/s]


====> Test set loss: 543.4416, (BCE: 543.4416, KLD: 0.0000)
Epoch 41/50


 12%|█▏        | 58/469 [00:00<00:03, 118.02it/s]

Step 18,800, (N samples: 2,406,400), Loss: 543.4540, (Recon: 543.4540, KL: 0.0000), Gradient norm: 1.4411


 35%|███▍      | 162/469 [00:01<00:02, 124.30it/s]

Step 18,900, (N samples: 2,419,200), Loss: 543.4404, (Recon: 543.4404, KL: 0.0000), Gradient norm: 1.4293


 57%|█████▋    | 266/469 [00:02<00:01, 125.30it/s]

Step 19,000, (N samples: 2,432,000), Loss: 543.4396, (Recon: 543.4396, KL: 0.0000), Gradient norm: 1.4401


 76%|███████▌  | 357/469 [00:02<00:00, 125.41it/s]

Step 19,100, (N samples: 2,444,800), Loss: 543.4501, (Recon: 543.4501, KL: 0.0000), Gradient norm: 1.5253


 98%|█████████▊| 461/469 [00:03<00:00, 125.43it/s]

Step 19,200, (N samples: 2,457,600), Loss: 543.4400, (Recon: 543.4400, KL: 0.0000), Gradient norm: 1.5272


100%|██████████| 469/469 [00:03<00:00, 123.26it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 167.56it/s]


====> Test set loss: 543.4421, (BCE: 543.4421, KLD: 0.0000)
Epoch 42/50


 21%|██        | 97/469 [00:00<00:03, 123.15it/s]

Step 19,300, (N samples: 2,470,400), Loss: 543.4078, (Recon: 543.4077, KL: 0.0000), Gradient norm: 1.4046


 40%|████      | 188/469 [00:01<00:02, 124.84it/s]

Step 19,400, (N samples: 2,483,200), Loss: 543.4290, (Recon: 543.4290, KL: 0.0000), Gradient norm: 1.4708


 62%|██████▏   | 292/469 [00:02<00:01, 125.06it/s]

Step 19,500, (N samples: 2,496,000), Loss: 543.4501, (Recon: 543.4501, KL: 0.0000), Gradient norm: 1.4922


 84%|████████▍ | 396/469 [00:03<00:00, 125.37it/s]

Step 19,600, (N samples: 2,508,800), Loss: 543.4365, (Recon: 543.4365, KL: 0.0000), Gradient norm: 1.3724


100%|██████████| 469/469 [00:03<00:00, 123.03it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 169.84it/s]


====> Test set loss: 543.4411, (BCE: 543.4411, KLD: 0.0000)
Epoch 43/50


  2%|▏         | 9/469 [00:00<00:05, 86.48it/s]

Step 19,700, (N samples: 2,521,600), Loss: 543.4363, (Recon: 543.4363, KL: 0.0000), Gradient norm: 1.3977


 26%|██▌       | 123/469 [00:01<00:02, 124.45it/s]

Step 19,800, (N samples: 2,534,400), Loss: 543.4113, (Recon: 543.4113, KL: 0.0000), Gradient norm: 1.3475


 48%|████▊     | 227/469 [00:01<00:01, 125.12it/s]

Step 19,900, (N samples: 2,547,200), Loss: 543.4410, (Recon: 543.4410, KL: 0.0000), Gradient norm: 1.4294


 68%|██████▊   | 318/469 [00:02<00:01, 124.96it/s]

Step 20,000, (N samples: 2,560,000), Loss: 543.4454, (Recon: 543.4454, KL: 0.0000), Gradient norm: 1.5043


 90%|████████▉ | 422/469 [00:03<00:00, 125.26it/s]

Step 20,100, (N samples: 2,572,800), Loss: 543.4432, (Recon: 543.4432, KL: 0.0000), Gradient norm: 1.5511


100%|██████████| 469/469 [00:03<00:00, 123.23it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 169.71it/s]


====> Test set loss: 543.4386, (BCE: 543.4385, KLD: 0.0001)
Epoch 44/50


 12%|█▏        | 55/469 [00:00<00:03, 108.37it/s]

Step 20,200, (N samples: 2,585,600), Loss: 543.4316, (Recon: 543.4316, KL: 0.0001), Gradient norm: 1.3668


 34%|███▍      | 159/469 [00:01<00:02, 124.41it/s]

Step 20,300, (N samples: 2,598,400), Loss: 543.4582, (Recon: 543.4581, KL: 0.0000), Gradient norm: 1.4650


 53%|█████▎    | 250/469 [00:02<00:01, 125.19it/s]

Step 20,400, (N samples: 2,611,200), Loss: 543.4522, (Recon: 543.4521, KL: 0.0001), Gradient norm: 1.3086


 75%|███████▌  | 354/469 [00:02<00:00, 124.72it/s]

Step 20,500, (N samples: 2,624,000), Loss: 543.4166, (Recon: 543.4164, KL: 0.0001), Gradient norm: 1.5370


 98%|█████████▊| 458/469 [00:03<00:00, 124.67it/s]

Step 20,600, (N samples: 2,636,800), Loss: 543.4373, (Recon: 543.4373, KL: 0.0000), Gradient norm: 1.4435


100%|██████████| 469/469 [00:03<00:00, 122.02it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 170.35it/s]


====> Test set loss: 543.4413, (BCE: 543.4413, KLD: 0.0000)
Epoch 45/50


 18%|█▊        | 84/469 [00:00<00:03, 122.28it/s]

Step 20,700, (N samples: 2,649,600), Loss: 543.4337, (Recon: 543.4337, KL: 0.0000), Gradient norm: 1.4821


 40%|████      | 188/469 [00:01<00:02, 125.20it/s]

Step 20,800, (N samples: 2,662,400), Loss: 543.4486, (Recon: 543.4486, KL: 0.0000), Gradient norm: 1.5654


 59%|█████▉    | 279/469 [00:02<00:01, 125.43it/s]

Step 20,900, (N samples: 2,675,200), Loss: 543.4460, (Recon: 543.4460, KL: 0.0000), Gradient norm: 1.4216


 82%|████████▏ | 383/469 [00:03<00:00, 125.44it/s]

Step 21,000, (N samples: 2,688,000), Loss: 543.4421, (Recon: 543.4421, KL: 0.0000), Gradient norm: 1.4545


100%|██████████| 469/469 [00:03<00:00, 123.32it/s]


Step 21,100, (N samples: 2,700,800), Loss: 543.4644, (Recon: 543.4644, KL: 0.0000), Gradient norm: 1.5077


Testing: 100%|██████████| 79/79 [00:00<00:00, 169.82it/s]


====> Test set loss: 543.4404, (BCE: 543.4404, KLD: 0.0000)
Epoch 46/50


 23%|██▎       | 110/469 [00:00<00:02, 123.81it/s]

Step 21,200, (N samples: 2,713,600), Loss: 543.4617, (Recon: 543.4617, KL: 0.0000), Gradient norm: 1.4946


 46%|████▌     | 214/469 [00:01<00:02, 125.45it/s]

Step 21,300, (N samples: 2,726,400), Loss: 543.4478, (Recon: 543.4478, KL: 0.0000), Gradient norm: 1.5065


 68%|██████▊   | 318/469 [00:02<00:01, 125.21it/s]

Step 21,400, (N samples: 2,739,200), Loss: 543.4540, (Recon: 543.4540, KL: 0.0000), Gradient norm: 1.3853


 87%|████████▋ | 409/469 [00:03<00:00, 125.46it/s]

Step 21,500, (N samples: 2,752,000), Loss: 543.4330, (Recon: 543.4330, KL: 0.0000), Gradient norm: 1.4034


100%|██████████| 469/469 [00:03<00:00, 123.08it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 169.42it/s]


====> Test set loss: 543.4388, (BCE: 543.4388, KLD: 0.0000)
Epoch 47/50


 10%|▉         | 45/469 [00:00<00:03, 114.23it/s]

Step 21,600, (N samples: 2,764,800), Loss: 543.4365, (Recon: 543.4365, KL: 0.0000), Gradient norm: 1.4131


 32%|███▏      | 149/469 [00:01<00:02, 121.58it/s]

Step 21,700, (N samples: 2,777,600), Loss: 543.4703, (Recon: 543.4703, KL: 0.0001), Gradient norm: 1.5292


 51%|█████     | 240/469 [00:01<00:01, 123.83it/s]

Step 21,800, (N samples: 2,790,400), Loss: 543.4100, (Recon: 543.4099, KL: 0.0001), Gradient norm: 1.5273


 73%|███████▎  | 344/469 [00:02<00:00, 125.25it/s]

Step 21,900, (N samples: 2,803,200), Loss: 543.4535, (Recon: 543.4535, KL: 0.0000), Gradient norm: 1.4813


 96%|█████████▌| 448/469 [00:03<00:00, 125.46it/s]

Step 22,000, (N samples: 2,816,000), Loss: 543.4363, (Recon: 543.4363, KL: 0.0000), Gradient norm: 1.4598


100%|██████████| 469/469 [00:03<00:00, 122.71it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 169.96it/s]


====> Test set loss: 543.4440, (BCE: 543.4440, KLD: 0.0000)
Epoch 48/50


 15%|█▌        | 71/469 [00:00<00:03, 120.73it/s]

Step 22,100, (N samples: 2,828,800), Loss: 543.4491, (Recon: 543.4491, KL: 0.0000), Gradient norm: 1.4993


 37%|███▋      | 175/469 [00:01<00:02, 125.04it/s]

Step 22,200, (N samples: 2,841,600), Loss: 543.4290, (Recon: 543.4290, KL: 0.0000), Gradient norm: 1.4406


 59%|█████▉    | 279/469 [00:02<00:01, 125.27it/s]

Step 22,300, (N samples: 2,854,400), Loss: 543.4507, (Recon: 543.4507, KL: 0.0000), Gradient norm: 1.4957


 82%|████████▏ | 383/469 [00:03<00:00, 125.45it/s]

Step 22,400, (N samples: 2,867,200), Loss: 543.4510, (Recon: 543.4510, KL: 0.0000), Gradient norm: 1.5331


100%|██████████| 469/469 [00:03<00:00, 123.31it/s]


Step 22,500, (N samples: 2,880,000), Loss: 543.4473, (Recon: 543.4473, KL: 0.0000), Gradient norm: 1.4721


Testing: 100%|██████████| 79/79 [00:00<00:00, 169.58it/s]


====> Test set loss: 543.4387, (BCE: 543.4387, KLD: 0.0000)
Epoch 49/50


 23%|██▎       | 110/469 [00:00<00:02, 123.82it/s]

Step 22,600, (N samples: 2,892,800), Loss: 543.4646, (Recon: 543.4646, KL: 0.0000), Gradient norm: 1.5138


 46%|████▌     | 214/469 [00:01<00:02, 124.75it/s]

Step 22,700, (N samples: 2,905,600), Loss: 543.4323, (Recon: 543.4323, KL: 0.0000), Gradient norm: 1.4784


 65%|██████▌   | 305/469 [00:02<00:01, 122.77it/s]

Step 22,800, (N samples: 2,918,400), Loss: 543.4458, (Recon: 543.4458, KL: 0.0000), Gradient norm: 1.3977


 87%|████████▋ | 409/469 [00:03<00:00, 125.30it/s]

Step 22,900, (N samples: 2,931,200), Loss: 543.4482, (Recon: 543.4482, KL: 0.0000), Gradient norm: 1.4483


100%|██████████| 469/469 [00:03<00:00, 122.90it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 169.93it/s]


====> Test set loss: 543.4423, (BCE: 543.4423, KLD: 0.0000)
Epoch 50/50


 10%|▉         | 45/469 [00:00<00:03, 114.39it/s]

Step 23,000, (N samples: 2,944,000), Loss: 543.4447, (Recon: 543.4446, KL: 0.0001), Gradient norm: 1.4759


 29%|██▉       | 136/469 [00:01<00:02, 124.83it/s]

Step 23,100, (N samples: 2,956,800), Loss: 543.4316, (Recon: 543.4316, KL: 0.0000), Gradient norm: 1.4538


 51%|█████     | 240/469 [00:01<00:01, 125.40it/s]

Step 23,200, (N samples: 2,969,600), Loss: 543.4241, (Recon: 543.4241, KL: 0.0000), Gradient norm: 1.4278


 73%|███████▎  | 344/469 [00:02<00:00, 125.36it/s]

Step 23,300, (N samples: 2,982,400), Loss: 543.4580, (Recon: 543.4580, KL: 0.0000), Gradient norm: 1.4720


 93%|█████████▎| 435/469 [00:03<00:00, 125.06it/s]

Step 23,400, (N samples: 2,995,200), Loss: 543.4489, (Recon: 543.4489, KL: 0.0000), Gradient norm: 1.4946


100%|██████████| 469/469 [00:03<00:00, 123.32it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 170.24it/s]

====> Test set loss: 543.4416, (BCE: 543.4415, KLD: 0.0001)





In [15]:
writer_train.flush()
writer_test.flush()

In [16]:
%load_ext tensorboard

In [23]:
%tensorboard --logdir ../experiments/VAE_MNIST/20241028-004306/

Reusing TensorBoard on port 6011 (pid 1110344), started 0:00:12 ago. (Use '!kill 1110344' to kill it.)