In [7]:
%load_ext autoreload
# %reload_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [8]:
from datetime import datetime

import torch
from torch.utils.tensorboard import SummaryWriter

from methylVA.mnist.model import VAE
from methylVA.mnist.training import train, test


batch_size = 128
learning_rate = 1e-3
weight_decay = 1e-2
num_epochs = 50
latent_dim = 2
hidden_dim = 512
name = 'AE_MNIST_GAUSSIAN_NOISE_latent_2_kl_1'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = VAE(input_dim=784, latent_dim=latent_dim, hidden_dim=hidden_dim).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
# writer = SummaryWriter(f'../experiments/VAE_MNIST/{datetime.now().strftime("%Y%m%d-%H%M%S")}')
writer_train = SummaryWriter(f'../experiments/{name}/train/{datetime.now().strftime("%Y%m%d-%H%M%S")}')
writer_test = SummaryWriter(f'../experiments/{name}/test/{datetime.now().strftime("%Y%m%d-%H%M%S")}')

In [9]:
model

VAE(
  (encoder): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): SiLU()
    (2): Linear(in_features=512, out_features=256, bias=True)
    (3): SiLU()
    (4): Linear(in_features=256, out_features=128, bias=True)
    (5): SiLU()
    (6): Linear(in_features=128, out_features=64, bias=True)
    (7): SiLU()
    (8): Linear(in_features=64, out_features=40, bias=True)
  )
  (softplus): Softplus(beta=1.0, threshold=20.0)
  (decoder): Sequential(
    (0): Linear(in_features=20, out_features=64, bias=True)
    (1): SiLU()
    (2): Linear(in_features=64, out_features=128, bias=True)
    (3): SiLU()
    (4): Linear(in_features=128, out_features=256, bias=True)
    (5): SiLU()
    (6): Linear(in_features=256, out_features=512, bias=True)
    (7): SiLU()
    (8): Linear(in_features=512, out_features=784, bias=True)
    (9): Sigmoid()
  )
)

In [10]:
from methylVA.mnist.dataset import get_gaussian_data_loaders
train_loader, test_loader = get_gaussian_data_loaders()

In [11]:
!pwd

/fast/AG_Ohler/ekarimi/projects/methylVA/notebooks


In [12]:
from methylVA.mnist.training import train, test


prev_updates = 0
for epoch in range(num_epochs):
    print(f'Epoch {epoch + 1}/{num_epochs}')
    prev_updates = train(model, train_loader, optimizer, prev_updates, writer=writer_train)
    test(model, test_loader, prev_updates, writer=writer_test)

Epoch 1/50


  2%|▏         | 9/469 [00:00<00:05, 82.39it/s]

Step 0, (N samples: 0), Loss: 545.7476, (Recon: 543.4945, KLD: 2.2531), Gradient norm: 2.2111


 26%|██▌       | 123/469 [00:01<00:02, 123.36it/s]

Step 100, (N samples: 12,800), Loss: 543.4627, (Recon: 543.4623, KLD: 0.0004), Gradient norm: 1.1757


 46%|████▌     | 214/469 [00:01<00:02, 124.34it/s]

Step 200, (N samples: 25,600), Loss: 543.4492, (Recon: 543.4491, KLD: 0.0001), Gradient norm: 1.1525


 68%|██████▊   | 318/469 [00:02<00:01, 124.73it/s]

Step 300, (N samples: 38,400), Loss: 543.4585, (Recon: 543.4584, KLD: 0.0001), Gradient norm: 1.2042


 90%|████████▉ | 422/469 [00:03<00:00, 124.36it/s]

Step 400, (N samples: 51,200), Loss: 543.4618, (Recon: 543.4616, KLD: 0.0002), Gradient norm: 1.1472


100%|██████████| 469/469 [00:03<00:00, 122.63it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.56it/s]


====> Test set loss: 543.4468, (BCE: 543.4466, KLD: 0.0002)
Epoch 2/50


 10%|▉         | 46/469 [00:00<00:03, 114.22it/s]

Step 500, (N samples: 64,000), Loss: 543.4559, (Recon: 543.4556, KLD: 0.0002), Gradient norm: 1.1379


 32%|███▏      | 150/469 [00:01<00:02, 122.38it/s]

Step 600, (N samples: 76,800), Loss: 543.4441, (Recon: 543.4438, KLD: 0.0003), Gradient norm: 1.2115


 54%|█████▍    | 254/469 [00:02<00:01, 124.57it/s]

Step 700, (N samples: 89,600), Loss: 543.4637, (Recon: 543.4631, KLD: 0.0006), Gradient norm: 1.2167


 74%|███████▎  | 345/469 [00:02<00:00, 124.65it/s]

Step 800, (N samples: 102,400), Loss: 543.4418, (Recon: 543.4413, KLD: 0.0005), Gradient norm: 1.1802


 96%|█████████▌| 449/469 [00:03<00:00, 124.73it/s]

Step 900, (N samples: 115,200), Loss: 543.4424, (Recon: 543.4418, KLD: 0.0006), Gradient norm: 1.1417


100%|██████████| 469/469 [00:03<00:00, 122.32it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 169.87it/s]


====> Test set loss: 543.4452, (BCE: 543.4445, KLD: 0.0007)
Epoch 3/50


 18%|█▊        | 84/469 [00:00<00:03, 121.40it/s]

Step 1,000, (N samples: 128,000), Loss: 543.4564, (Recon: 543.4554, KLD: 0.0010), Gradient norm: 1.2067


 37%|███▋      | 175/469 [00:01<00:02, 124.26it/s]

Step 1,100, (N samples: 140,800), Loss: 543.4268, (Recon: 543.4255, KLD: 0.0013), Gradient norm: 1.1744


 59%|█████▉    | 279/469 [00:02<00:01, 124.67it/s]

Step 1,200, (N samples: 153,600), Loss: 543.4436, (Recon: 543.4429, KLD: 0.0007), Gradient norm: 1.1870


 82%|████████▏ | 383/469 [00:03<00:00, 124.75it/s]

Step 1,300, (N samples: 166,400), Loss: 543.4442, (Recon: 543.4435, KLD: 0.0007), Gradient norm: 1.2323


100%|██████████| 469/469 [00:03<00:00, 122.77it/s]


Step 1,400, (N samples: 179,200), Loss: 543.4218, (Recon: 543.4210, KLD: 0.0008), Gradient norm: 1.1824


Testing: 100%|██████████| 79/79 [00:00<00:00, 171.86it/s]


====> Test set loss: 543.4430, (BCE: 543.4422, KLD: 0.0008)
Epoch 4/50


 23%|██▎       | 110/469 [00:00<00:02, 123.36it/s]

Step 1,500, (N samples: 192,000), Loss: 543.4329, (Recon: 543.4316, KLD: 0.0013), Gradient norm: 1.1802


 46%|████▌     | 214/469 [00:01<00:02, 124.51it/s]

Step 1,600, (N samples: 204,800), Loss: 543.4504, (Recon: 543.4496, KLD: 0.0008), Gradient norm: 1.1779


 65%|██████▌   | 305/469 [00:02<00:01, 123.69it/s]

Step 1,700, (N samples: 217,600), Loss: 543.4605, (Recon: 543.4595, KLD: 0.0010), Gradient norm: 1.2018


 87%|████████▋ | 409/469 [00:03<00:00, 124.33it/s]

Step 1,800, (N samples: 230,400), Loss: 543.4343, (Recon: 543.4330, KLD: 0.0013), Gradient norm: 1.1511


100%|██████████| 469/469 [00:03<00:00, 122.60it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.50it/s]


====> Test set loss: 543.4405, (BCE: 543.4394, KLD: 0.0012)
Epoch 5/50


 10%|▉         | 45/469 [00:00<00:03, 114.75it/s]

Step 1,900, (N samples: 243,200), Loss: 543.4371, (Recon: 543.4359, KLD: 0.0012), Gradient norm: 1.1705


 32%|███▏      | 149/469 [00:01<00:02, 123.39it/s]

Step 2,000, (N samples: 256,000), Loss: 543.4350, (Recon: 543.4340, KLD: 0.0010), Gradient norm: 1.1778


 51%|█████     | 240/469 [00:01<00:01, 124.81it/s]

Step 2,100, (N samples: 268,800), Loss: 543.4532, (Recon: 543.4521, KLD: 0.0011), Gradient norm: 1.1732


 73%|███████▎  | 344/469 [00:02<00:01, 124.85it/s]

Step 2,200, (N samples: 281,600), Loss: 543.4503, (Recon: 543.4494, KLD: 0.0009), Gradient norm: 1.1721


 96%|█████████▌| 448/469 [00:03<00:00, 124.77it/s]

Step 2,300, (N samples: 294,400), Loss: 543.4201, (Recon: 543.4192, KLD: 0.0009), Gradient norm: 1.1752


100%|██████████| 469/469 [00:03<00:00, 122.77it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.53it/s]


====> Test set loss: 543.4383, (BCE: 543.4374, KLD: 0.0008)
Epoch 6/50


 15%|█▌        | 71/469 [00:00<00:03, 120.49it/s]

Step 2,400, (N samples: 307,200), Loss: 543.4437, (Recon: 543.4432, KLD: 0.0006), Gradient norm: 1.2081


 37%|███▋      | 175/469 [00:01<00:02, 124.53it/s]

Step 2,500, (N samples: 320,000), Loss: 543.4374, (Recon: 543.4365, KLD: 0.0009), Gradient norm: 1.1458


 59%|█████▉    | 279/469 [00:02<00:01, 124.76it/s]

Step 2,600, (N samples: 332,800), Loss: 543.4131, (Recon: 543.4125, KLD: 0.0006), Gradient norm: 1.1419


 79%|███████▉  | 370/469 [00:03<00:00, 124.71it/s]

Step 2,700, (N samples: 345,600), Loss: 543.4267, (Recon: 543.4260, KLD: 0.0007), Gradient norm: 1.1593


100%|██████████| 469/469 [00:03<00:00, 122.94it/s]


Step 2,800, (N samples: 358,400), Loss: 543.4579, (Recon: 543.4564, KLD: 0.0014), Gradient norm: 1.1659


Testing: 100%|██████████| 79/79 [00:00<00:00, 169.04it/s]


====> Test set loss: 543.4389, (BCE: 543.4376, KLD: 0.0013)
Epoch 7/50


 23%|██▎       | 110/469 [00:00<00:02, 123.58it/s]

Step 2,900, (N samples: 371,200), Loss: 543.4420, (Recon: 543.4401, KLD: 0.0019), Gradient norm: 1.2299


 43%|████▎     | 201/469 [00:01<00:02, 124.63it/s]

Step 3,000, (N samples: 384,000), Loss: 543.4403, (Recon: 543.4395, KLD: 0.0009), Gradient norm: 1.1717


 65%|██████▌   | 305/469 [00:02<00:01, 122.80it/s]

Step 3,100, (N samples: 396,800), Loss: 543.4523, (Recon: 543.4517, KLD: 0.0006), Gradient norm: 1.1912


 87%|████████▋ | 409/469 [00:03<00:00, 124.75it/s]

Step 3,200, (N samples: 409,600), Loss: 543.4225, (Recon: 543.4213, KLD: 0.0013), Gradient norm: 1.2156


100%|██████████| 469/469 [00:03<00:00, 122.83it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.64it/s]


====> Test set loss: 543.4394, (BCE: 543.4383, KLD: 0.0011)
Epoch 8/50


  7%|▋         | 32/469 [00:00<00:04, 107.93it/s]

Step 3,300, (N samples: 422,400), Loss: 543.4424, (Recon: 543.4413, KLD: 0.0011), Gradient norm: 1.1998


 29%|██▉       | 136/469 [00:01<00:02, 124.33it/s]

Step 3,400, (N samples: 435,200), Loss: 543.4595, (Recon: 543.4583, KLD: 0.0012), Gradient norm: 1.2292


 51%|█████     | 240/469 [00:01<00:01, 124.89it/s]

Step 3,500, (N samples: 448,000), Loss: 543.4302, (Recon: 543.4289, KLD: 0.0013), Gradient norm: 1.2066


 71%|███████   | 331/469 [00:02<00:01, 124.68it/s]

Step 3,600, (N samples: 460,800), Loss: 543.4340, (Recon: 543.4333, KLD: 0.0006), Gradient norm: 1.2075


 93%|█████████▎| 435/469 [00:03<00:00, 124.67it/s]

Step 3,700, (N samples: 473,600), Loss: 543.4510, (Recon: 543.4502, KLD: 0.0008), Gradient norm: 1.1426


100%|██████████| 469/469 [00:03<00:00, 122.94it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.69it/s]


====> Test set loss: 543.4423, (BCE: 543.4414, KLD: 0.0009)
Epoch 9/50


 15%|█▌        | 71/469 [00:00<00:03, 120.63it/s]

Step 3,800, (N samples: 486,400), Loss: 543.4373, (Recon: 543.4367, KLD: 0.0006), Gradient norm: 1.2135


 35%|███▍      | 162/469 [00:01<00:02, 122.48it/s]

Step 3,900, (N samples: 499,200), Loss: 543.4467, (Recon: 543.4451, KLD: 0.0016), Gradient norm: 1.2210


 57%|█████▋    | 266/469 [00:02<00:01, 124.71it/s]

Step 4,000, (N samples: 512,000), Loss: 543.4126, (Recon: 543.4111, KLD: 0.0015), Gradient norm: 1.2529


 79%|███████▉  | 370/469 [00:03<00:00, 124.81it/s]

Step 4,100, (N samples: 524,800), Loss: 543.4569, (Recon: 543.4553, KLD: 0.0016), Gradient norm: 1.2062


100%|██████████| 469/469 [00:03<00:00, 122.72it/s]


Step 4,200, (N samples: 537,600), Loss: 543.4301, (Recon: 543.4285, KLD: 0.0016), Gradient norm: 1.2464


Testing: 100%|██████████| 79/79 [00:00<00:00, 170.90it/s]


====> Test set loss: 543.4388, (BCE: 543.4371, KLD: 0.0016)
Epoch 10/50


 21%|██        | 97/469 [00:00<00:03, 122.61it/s]

Step 4,300, (N samples: 550,400), Loss: 543.4775, (Recon: 543.4761, KLD: 0.0015), Gradient norm: 1.2932


 43%|████▎     | 201/469 [00:01<00:02, 124.73it/s]

Step 4,400, (N samples: 563,200), Loss: 543.4379, (Recon: 543.4364, KLD: 0.0015), Gradient norm: 1.2284


 65%|██████▌   | 305/469 [00:02<00:01, 124.78it/s]

Step 4,500, (N samples: 576,000), Loss: 543.4339, (Recon: 543.4326, KLD: 0.0013), Gradient norm: 1.2141


 84%|████████▍ | 396/469 [00:03<00:00, 124.83it/s]

Step 4,600, (N samples: 588,800), Loss: 543.4315, (Recon: 543.4310, KLD: 0.0005), Gradient norm: 1.2556


100%|██████████| 469/469 [00:03<00:00, 123.00it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.72it/s]


====> Test set loss: 543.4407, (BCE: 543.4401, KLD: 0.0006)
Epoch 11/50


  2%|▏         | 9/469 [00:00<00:05, 88.02it/s]

Step 4,700, (N samples: 601,600), Loss: 543.4528, (Recon: 543.4520, KLD: 0.0008), Gradient norm: 1.2182


 29%|██▉       | 136/469 [00:01<00:02, 124.30it/s]

Step 4,800, (N samples: 614,400), Loss: 543.4216, (Recon: 543.4205, KLD: 0.0011), Gradient norm: 1.1863


 48%|████▊     | 227/469 [00:01<00:01, 124.82it/s]

Step 4,900, (N samples: 627,200), Loss: 543.4130, (Recon: 543.4113, KLD: 0.0017), Gradient norm: 1.1843


 71%|███████   | 331/469 [00:02<00:01, 122.15it/s]

Step 5,000, (N samples: 640,000), Loss: 543.4282, (Recon: 543.4271, KLD: 0.0012), Gradient norm: 1.2152


 93%|█████████▎| 435/469 [00:03<00:00, 124.54it/s]

Step 5,100, (N samples: 652,800), Loss: 543.4652, (Recon: 543.4645, KLD: 0.0007), Gradient norm: 1.2885


100%|██████████| 469/469 [00:03<00:00, 122.76it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.54it/s]


====> Test set loss: 543.4403, (BCE: 543.4399, KLD: 0.0004)
Epoch 12/50


 12%|█▏        | 58/469 [00:00<00:03, 118.60it/s]

Step 5,200, (N samples: 665,600), Loss: 543.4186, (Recon: 543.4183, KLD: 0.0003), Gradient norm: 1.2338


 35%|███▍      | 162/469 [00:01<00:02, 123.64it/s]

Step 5,300, (N samples: 678,400), Loss: 543.4816, (Recon: 543.4810, KLD: 0.0006), Gradient norm: 1.3273


 57%|█████▋    | 266/469 [00:02<00:01, 124.72it/s]

Step 5,400, (N samples: 691,200), Loss: 543.4325, (Recon: 543.4312, KLD: 0.0013), Gradient norm: 1.2463


 78%|███████▊  | 364/469 [00:03<00:00, 107.84it/s]

Step 5,500, (N samples: 704,000), Loss: 543.4392, (Recon: 543.4381, KLD: 0.0011), Gradient norm: 1.2362


100%|█████████▉| 467/469 [00:04<00:00, 107.60it/s]

Step 5,600, (N samples: 716,800), Loss: 543.4325, (Recon: 543.4315, KLD: 0.0010), Gradient norm: 1.1843


100%|██████████| 469/469 [00:04<00:00, 110.96it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.65it/s]


====> Test set loss: 543.4379, (BCE: 543.4370, KLD: 0.0008)
Epoch 13/50


 21%|██        | 97/469 [00:00<00:03, 122.98it/s]

Step 5,700, (N samples: 729,600), Loss: 543.4326, (Recon: 543.4313, KLD: 0.0012), Gradient norm: 1.2128


 40%|████      | 188/469 [00:01<00:02, 124.77it/s]

Step 5,800, (N samples: 742,400), Loss: 543.4301, (Recon: 543.4293, KLD: 0.0008), Gradient norm: 1.2475


 62%|██████▏   | 292/469 [00:02<00:01, 124.85it/s]

Step 5,900, (N samples: 755,200), Loss: 543.4506, (Recon: 543.4495, KLD: 0.0011), Gradient norm: 1.2561


 84%|████████▍ | 396/469 [00:03<00:00, 124.98it/s]

Step 6,000, (N samples: 768,000), Loss: 543.4468, (Recon: 543.4460, KLD: 0.0007), Gradient norm: 1.2942


100%|██████████| 469/469 [00:03<00:00, 122.86it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 172.02it/s]


====> Test set loss: 543.4417, (BCE: 543.4408, KLD: 0.0009)
Epoch 14/50


  2%|▏         | 9/469 [00:00<00:05, 86.60it/s]

Step 6,100, (N samples: 780,800), Loss: 543.4421, (Recon: 543.4413, KLD: 0.0009), Gradient norm: 1.2899


 26%|██▌       | 123/469 [00:01<00:02, 124.30it/s]

Step 6,200, (N samples: 793,600), Loss: 543.4453, (Recon: 543.4438, KLD: 0.0015), Gradient norm: 1.2828


 48%|████▊     | 227/469 [00:01<00:01, 124.66it/s]

Step 6,300, (N samples: 806,400), Loss: 543.4426, (Recon: 543.4416, KLD: 0.0010), Gradient norm: 1.2180


 68%|██████▊   | 318/469 [00:02<00:01, 123.66it/s]

Step 6,400, (N samples: 819,200), Loss: 543.4380, (Recon: 543.4371, KLD: 0.0008), Gradient norm: 1.2960


 90%|████████▉ | 422/469 [00:03<00:00, 125.15it/s]

Step 6,500, (N samples: 832,000), Loss: 543.4442, (Recon: 543.4430, KLD: 0.0011), Gradient norm: 1.3349


100%|██████████| 469/469 [00:03<00:00, 122.41it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 172.82it/s]


====> Test set loss: 543.4436, (BCE: 543.4426, KLD: 0.0009)
Epoch 15/50


 12%|█▏        | 58/469 [00:00<00:03, 118.33it/s]

Step 6,600, (N samples: 844,800), Loss: 543.4564, (Recon: 543.4549, KLD: 0.0015), Gradient norm: 1.3403


 32%|███▏      | 149/469 [00:01<00:02, 124.69it/s]

Step 6,700, (N samples: 857,600), Loss: 543.4294, (Recon: 543.4288, KLD: 0.0006), Gradient norm: 1.2810


 54%|█████▍    | 253/469 [00:02<00:01, 125.03it/s]

Step 6,800, (N samples: 870,400), Loss: 543.4263, (Recon: 543.4252, KLD: 0.0011), Gradient norm: 1.1924


 76%|███████▌  | 357/469 [00:02<00:00, 125.03it/s]

Step 6,900, (N samples: 883,200), Loss: 543.4418, (Recon: 543.4415, KLD: 0.0003), Gradient norm: 1.3229


 96%|█████████▌| 448/469 [00:03<00:00, 125.07it/s]

Step 7,000, (N samples: 896,000), Loss: 543.4387, (Recon: 543.4380, KLD: 0.0006), Gradient norm: 1.3183


100%|██████████| 469/469 [00:03<00:00, 123.14it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 173.19it/s]


====> Test set loss: 543.4417, (BCE: 543.4409, KLD: 0.0008)
Epoch 16/50


 18%|█▊        | 84/469 [00:00<00:03, 122.38it/s]

Step 7,100, (N samples: 908,800), Loss: 543.4346, (Recon: 543.4332, KLD: 0.0013), Gradient norm: 1.2258


 40%|████      | 188/469 [00:01<00:02, 125.13it/s]

Step 7,200, (N samples: 921,600), Loss: 543.4370, (Recon: 543.4360, KLD: 0.0009), Gradient norm: 1.2152


 59%|█████▉    | 279/469 [00:02<00:01, 125.19it/s]

Step 7,300, (N samples: 934,400), Loss: 543.4351, (Recon: 543.4335, KLD: 0.0016), Gradient norm: 1.2533


 82%|████████▏ | 383/469 [00:03<00:00, 125.25it/s]

Step 7,400, (N samples: 947,200), Loss: 543.4424, (Recon: 543.4300, KLD: 0.0125), Gradient norm: 1.3465


100%|██████████| 469/469 [00:03<00:00, 122.94it/s]


Step 7,500, (N samples: 960,000), Loss: 543.4402, (Recon: 543.4399, KLD: 0.0003), Gradient norm: 1.2926


Testing: 100%|██████████| 79/79 [00:00<00:00, 171.94it/s]


====> Test set loss: 543.4386, (BCE: 543.4382, KLD: 0.0004)
Epoch 17/50


 23%|██▎       | 110/469 [00:00<00:02, 124.03it/s]

Step 7,600, (N samples: 972,800), Loss: 543.4329, (Recon: 543.4325, KLD: 0.0004), Gradient norm: 1.2522


 46%|████▌     | 214/469 [00:01<00:02, 125.16it/s]

Step 7,700, (N samples: 985,600), Loss: 543.4397, (Recon: 543.4396, KLD: 0.0001), Gradient norm: 1.2823


 68%|██████▊   | 318/469 [00:02<00:01, 125.14it/s]

Step 7,800, (N samples: 998,400), Loss: 543.4214, (Recon: 543.4214, KLD: 0.0001), Gradient norm: 1.2640


 90%|████████▉ | 422/469 [00:03<00:00, 125.43it/s]

Step 7,900, (N samples: 1,011,200), Loss: 543.4210, (Recon: 543.4209, KLD: 0.0001), Gradient norm: 1.2769


100%|██████████| 469/469 [00:03<00:00, 123.46it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 172.10it/s]


====> Test set loss: 543.4404, (BCE: 543.4403, KLD: 0.0001)
Epoch 18/50


 10%|▉         | 45/469 [00:00<00:03, 115.14it/s]

Step 8,000, (N samples: 1,024,000), Loss: 543.4244, (Recon: 543.4243, KLD: 0.0001), Gradient norm: 1.3269


 32%|███▏      | 149/469 [00:01<00:02, 124.76it/s]

Step 8,100, (N samples: 1,036,800), Loss: 543.4376, (Recon: 543.4375, KLD: 0.0000), Gradient norm: 1.1892


 54%|█████▍    | 253/469 [00:02<00:01, 125.23it/s]

Step 8,200, (N samples: 1,049,600), Loss: 543.4386, (Recon: 543.4385, KLD: 0.0001), Gradient norm: 1.3393


 73%|███████▎  | 344/469 [00:02<00:00, 125.41it/s]

Step 8,300, (N samples: 1,062,400), Loss: 543.4397, (Recon: 543.4396, KLD: 0.0001), Gradient norm: 1.2975


 96%|█████████▌| 448/469 [00:03<00:00, 125.38it/s]

Step 8,400, (N samples: 1,075,200), Loss: 543.4223, (Recon: 543.4222, KLD: 0.0001), Gradient norm: 1.3264


100%|██████████| 469/469 [00:03<00:00, 123.42it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 172.36it/s]


====> Test set loss: 543.4383, (BCE: 543.4382, KLD: 0.0001)
Epoch 19/50


 18%|█▊        | 84/469 [00:00<00:03, 122.37it/s]

Step 8,500, (N samples: 1,088,000), Loss: 543.4348, (Recon: 543.4347, KLD: 0.0001), Gradient norm: 1.2831


 37%|███▋      | 175/469 [00:01<00:02, 124.23it/s]

Step 8,600, (N samples: 1,100,800), Loss: 543.4579, (Recon: 543.4578, KLD: 0.0000), Gradient norm: 1.3916


 59%|█████▉    | 279/469 [00:02<00:01, 124.74it/s]

Step 8,700, (N samples: 1,113,600), Loss: 543.4625, (Recon: 543.4625, KLD: 0.0000), Gradient norm: 1.3782


 82%|████████▏ | 383/469 [00:03<00:00, 124.82it/s]

Step 8,800, (N samples: 1,126,400), Loss: 543.4172, (Recon: 543.4171, KLD: 0.0001), Gradient norm: 1.2670


100%|██████████| 469/469 [00:03<00:00, 122.72it/s]


Step 8,900, (N samples: 1,139,200), Loss: 543.4466, (Recon: 543.4465, KLD: 0.0000), Gradient norm: 1.3492


Testing: 100%|██████████| 79/79 [00:00<00:00, 171.54it/s]


====> Test set loss: 543.4369, (BCE: 543.4369, KLD: 0.0001)
Epoch 20/50


 23%|██▎       | 110/469 [00:00<00:02, 123.47it/s]

Step 9,000, (N samples: 1,152,000), Loss: 543.4462, (Recon: 543.4462, KLD: 0.0000), Gradient norm: 1.2639


 46%|████▌     | 214/469 [00:01<00:02, 124.94it/s]

Step 9,100, (N samples: 1,164,800), Loss: 543.4477, (Recon: 543.4476, KLD: 0.0000), Gradient norm: 1.3411


 65%|██████▌   | 305/469 [00:02<00:01, 124.87it/s]

Step 9,200, (N samples: 1,177,600), Loss: 543.4295, (Recon: 543.4294, KLD: 0.0000), Gradient norm: 1.2938


 87%|████████▋ | 409/469 [00:03<00:00, 124.95it/s]

Step 9,300, (N samples: 1,190,400), Loss: 543.4419, (Recon: 543.4419, KLD: 0.0000), Gradient norm: 1.3157


100%|██████████| 469/469 [00:03<00:00, 123.13it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.85it/s]


====> Test set loss: 543.4377, (BCE: 543.4376, KLD: 0.0000)
Epoch 21/50


 10%|▉         | 45/469 [00:00<00:03, 114.76it/s]

Step 9,400, (N samples: 1,203,200), Loss: 543.4619, (Recon: 543.4619, KLD: 0.0001), Gradient norm: 1.2606


 29%|██▉       | 136/469 [00:01<00:02, 123.71it/s]

Step 9,500, (N samples: 1,216,000), Loss: 543.4533, (Recon: 543.4532, KLD: 0.0000), Gradient norm: 1.3507


 51%|█████     | 240/469 [00:01<00:01, 124.51it/s]

Step 9,600, (N samples: 1,228,800), Loss: 543.4628, (Recon: 543.4628, KLD: 0.0000), Gradient norm: 1.3690


 73%|███████▎  | 344/469 [00:02<00:01, 123.55it/s]

Step 9,700, (N samples: 1,241,600), Loss: 543.4489, (Recon: 543.4489, KLD: 0.0000), Gradient norm: 1.3683


 93%|█████████▎| 435/469 [00:03<00:00, 124.70it/s]

Step 9,800, (N samples: 1,254,400), Loss: 543.4416, (Recon: 543.4416, KLD: 0.0000), Gradient norm: 1.3499


100%|██████████| 469/469 [00:03<00:00, 122.26it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.74it/s]


====> Test set loss: 543.4399, (BCE: 543.4398, KLD: 0.0001)
Epoch 22/50


 15%|█▌        | 71/469 [00:00<00:03, 120.52it/s]

Step 9,900, (N samples: 1,267,200), Loss: 543.4612, (Recon: 543.4612, KLD: 0.0001), Gradient norm: 1.2316


 37%|███▋      | 175/469 [00:01<00:02, 124.59it/s]

Step 10,000, (N samples: 1,280,000), Loss: 543.4556, (Recon: 543.4556, KLD: 0.0000), Gradient norm: 1.3304


 57%|█████▋    | 266/469 [00:02<00:01, 124.78it/s]

Step 10,100, (N samples: 1,292,800), Loss: 543.4081, (Recon: 543.4081, KLD: 0.0000), Gradient norm: 1.4525


 79%|███████▉  | 370/469 [00:03<00:00, 124.94it/s]

Step 10,200, (N samples: 1,305,600), Loss: 543.4338, (Recon: 543.4337, KLD: 0.0000), Gradient norm: 1.3705


100%|██████████| 469/469 [00:03<00:00, 123.03it/s]


Step 10,300, (N samples: 1,318,400), Loss: 543.4500, (Recon: 543.4500, KLD: 0.0000), Gradient norm: 1.3921


Testing: 100%|██████████| 79/79 [00:00<00:00, 171.67it/s]


====> Test set loss: 543.4394, (BCE: 543.4394, KLD: 0.0000)
Epoch 23/50


 21%|██        | 97/469 [00:00<00:03, 123.06it/s]

Step 10,400, (N samples: 1,331,200), Loss: 543.4251, (Recon: 543.4251, KLD: 0.0000), Gradient norm: 1.3231


 43%|████▎     | 201/469 [00:01<00:02, 124.72it/s]

Step 10,500, (N samples: 1,344,000), Loss: 543.4673, (Recon: 543.4673, KLD: 0.0000), Gradient norm: 1.3615


 65%|██████▌   | 305/469 [00:02<00:01, 124.78it/s]

Step 10,600, (N samples: 1,356,800), Loss: 543.4399, (Recon: 543.4399, KLD: 0.0000), Gradient norm: 1.3682


 84%|████████▍ | 396/469 [00:03<00:00, 124.80it/s]

Step 10,700, (N samples: 1,369,600), Loss: 543.4537, (Recon: 543.4537, KLD: 0.0000), Gradient norm: 1.4134


100%|██████████| 469/469 [00:03<00:00, 122.91it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 170.39it/s]


====> Test set loss: 543.4409, (BCE: 543.4409, KLD: 0.0000)
Epoch 24/50


  2%|▏         | 9/469 [00:00<00:05, 87.02it/s]

Step 10,800, (N samples: 1,382,400), Loss: 543.4584, (Recon: 543.4584, KLD: 0.0000), Gradient norm: 1.3912


 29%|██▉       | 136/469 [00:01<00:02, 124.31it/s]

Step 10,900, (N samples: 1,395,200), Loss: 543.4623, (Recon: 543.4623, KLD: 0.0000), Gradient norm: 1.3794


 48%|████▊     | 227/469 [00:01<00:01, 124.61it/s]

Step 11,000, (N samples: 1,408,000), Loss: 543.4385, (Recon: 543.4385, KLD: 0.0001), Gradient norm: 1.2789


 71%|███████   | 331/469 [00:02<00:01, 124.83it/s]

Step 11,100, (N samples: 1,420,800), Loss: 543.4398, (Recon: 543.4398, KLD: 0.0000), Gradient norm: 1.3995


 93%|█████████▎| 435/469 [00:03<00:00, 124.88it/s]

Step 11,200, (N samples: 1,433,600), Loss: 543.4417, (Recon: 543.4417, KLD: 0.0000), Gradient norm: 1.2272


100%|██████████| 469/469 [00:03<00:00, 122.96it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.90it/s]


====> Test set loss: 543.4403, (BCE: 543.4402, KLD: 0.0000)
Epoch 25/50


 12%|█▏        | 58/469 [00:00<00:03, 118.26it/s]

Step 11,300, (N samples: 1,446,400), Loss: 543.4510, (Recon: 543.4509, KLD: 0.0000), Gradient norm: 1.3727


 35%|███▍      | 162/469 [00:01<00:02, 124.34it/s]

Step 11,400, (N samples: 1,459,200), Loss: 543.4337, (Recon: 543.4336, KLD: 0.0000), Gradient norm: 1.3423


 57%|█████▋    | 266/469 [00:02<00:01, 124.61it/s]

Step 11,500, (N samples: 1,472,000), Loss: 543.4251, (Recon: 543.4250, KLD: 0.0000), Gradient norm: 1.3252


 79%|███████▉  | 370/469 [00:03<00:00, 124.95it/s]

Step 11,600, (N samples: 1,484,800), Loss: 543.4460, (Recon: 543.4460, KLD: 0.0000), Gradient norm: 1.3493


100%|██████████| 469/469 [00:03<00:00, 122.93it/s]

Step 11,700, (N samples: 1,497,600), Loss: 543.4568, (Recon: 543.4568, KLD: 0.0001), Gradient norm: 1.4032



Testing: 100%|██████████| 79/79 [00:00<00:00, 171.95it/s]


====> Test set loss: 543.4399, (BCE: 543.4398, KLD: 0.0001)
Epoch 26/50


 21%|██        | 97/469 [00:00<00:03, 122.71it/s]

Step 11,800, (N samples: 1,510,400), Loss: 543.4363, (Recon: 543.4362, KLD: 0.0001), Gradient norm: 1.4124


 43%|████▎     | 201/469 [00:01<00:02, 124.19it/s]

Step 11,900, (N samples: 1,523,200), Loss: 543.4427, (Recon: 543.4427, KLD: 0.0000), Gradient norm: 1.3544


 62%|██████▏   | 292/469 [00:02<00:01, 124.58it/s]

Step 12,000, (N samples: 1,536,000), Loss: 543.4443, (Recon: 543.4443, KLD: 0.0000), Gradient norm: 1.3140


 84%|████████▍ | 396/469 [00:03<00:00, 124.13it/s]

Step 12,100, (N samples: 1,548,800), Loss: 543.4287, (Recon: 543.4287, KLD: 0.0000), Gradient norm: 1.2125


100%|██████████| 469/469 [00:03<00:00, 122.51it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.77it/s]


====> Test set loss: 543.4436, (BCE: 543.4435, KLD: 0.0000)
Epoch 27/50


  2%|▏         | 9/469 [00:00<00:05, 86.19it/s]

Step 12,200, (N samples: 1,561,600), Loss: 543.4216, (Recon: 543.4216, KLD: 0.0000), Gradient norm: 1.3442


 26%|██▌       | 123/469 [00:01<00:02, 123.95it/s]

Step 12,300, (N samples: 1,574,400), Loss: 543.4423, (Recon: 543.4422, KLD: 0.0000), Gradient norm: 1.3940


 48%|████▊     | 227/469 [00:01<00:01, 124.85it/s]

Step 12,400, (N samples: 1,587,200), Loss: 543.4573, (Recon: 543.4573, KLD: 0.0000), Gradient norm: 1.3681


 71%|███████   | 331/469 [00:02<00:01, 124.95it/s]

Step 12,500, (N samples: 1,600,000), Loss: 543.4560, (Recon: 543.4559, KLD: 0.0000), Gradient norm: 1.4042


 90%|████████▉ | 422/469 [00:03<00:00, 125.03it/s]

Step 12,600, (N samples: 1,612,800), Loss: 543.4420, (Recon: 543.4420, KLD: 0.0000), Gradient norm: 1.3257


100%|██████████| 469/469 [00:03<00:00, 123.07it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.55it/s]


====> Test set loss: 543.4424, (BCE: 543.4424, KLD: 0.0000)
Epoch 28/50


 12%|█▏        | 58/469 [00:00<00:03, 118.34it/s]

Step 12,700, (N samples: 1,625,600), Loss: 543.4462, (Recon: 543.4462, KLD: 0.0000), Gradient norm: 1.4037


 35%|███▍      | 162/469 [00:01<00:02, 124.70it/s]

Step 12,800, (N samples: 1,638,400), Loss: 543.4333, (Recon: 543.4333, KLD: 0.0000), Gradient norm: 1.2799


 54%|█████▍    | 253/469 [00:02<00:01, 125.07it/s]

Step 12,900, (N samples: 1,651,200), Loss: 543.4556, (Recon: 543.4556, KLD: 0.0000), Gradient norm: 1.3470


 76%|███████▌  | 357/469 [00:02<00:00, 122.86it/s]

Step 13,000, (N samples: 1,664,000), Loss: 543.4372, (Recon: 543.4371, KLD: 0.0001), Gradient norm: 1.3113


 98%|█████████▊| 461/469 [00:03<00:00, 124.81it/s]

Step 13,100, (N samples: 1,676,800), Loss: 543.4451, (Recon: 543.4451, KLD: 0.0000), Gradient norm: 1.3695


100%|██████████| 469/469 [00:03<00:00, 122.58it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.96it/s]


====> Test set loss: 543.4424, (BCE: 543.4424, KLD: 0.0000)
Epoch 29/50


 18%|█▊        | 84/469 [00:00<00:03, 122.22it/s]

Step 13,200, (N samples: 1,689,600), Loss: 543.4545, (Recon: 543.4545, KLD: 0.0000), Gradient norm: 1.3958


 40%|████      | 188/469 [00:01<00:02, 124.68it/s]

Step 13,300, (N samples: 1,702,400), Loss: 543.4296, (Recon: 543.4296, KLD: 0.0000), Gradient norm: 1.3300


 62%|██████▏   | 292/469 [00:02<00:01, 124.97it/s]

Step 13,400, (N samples: 1,715,200), Loss: 543.4379, (Recon: 543.4379, KLD: 0.0000), Gradient norm: 1.4748


 82%|████████▏ | 383/469 [00:03<00:00, 124.94it/s]

Step 13,500, (N samples: 1,728,000), Loss: 543.4168, (Recon: 543.4168, KLD: 0.0000), Gradient norm: 1.3712


100%|██████████| 469/469 [00:03<00:00, 123.08it/s]


Step 13,600, (N samples: 1,740,800), Loss: 543.4537, (Recon: 543.4537, KLD: 0.0000), Gradient norm: 1.5990


Testing: 100%|██████████| 79/79 [00:00<00:00, 171.51it/s]


====> Test set loss: 543.4443, (BCE: 543.4443, KLD: 0.0000)
Epoch 30/50


 26%|██▌       | 123/469 [00:01<00:02, 123.93it/s]

Step 13,700, (N samples: 1,753,600), Loss: 543.4462, (Recon: 543.4462, KLD: 0.0000), Gradient norm: 1.3090


 46%|████▌     | 214/469 [00:01<00:02, 124.92it/s]

Step 13,800, (N samples: 1,766,400), Loss: 543.4479, (Recon: 543.4478, KLD: 0.0000), Gradient norm: 1.3814


 68%|██████▊   | 318/469 [00:02<00:01, 125.05it/s]

Step 13,900, (N samples: 1,779,200), Loss: 543.4402, (Recon: 543.4401, KLD: 0.0001), Gradient norm: 1.3909


 90%|████████▉ | 422/469 [00:03<00:00, 125.00it/s]

Step 14,000, (N samples: 1,792,000), Loss: 543.4421, (Recon: 543.4421, KLD: 0.0000), Gradient norm: 1.3787


100%|██████████| 469/469 [00:03<00:00, 123.09it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 167.70it/s]


====> Test set loss: 543.4417, (BCE: 543.4417, KLD: 0.0000)
Epoch 31/50


 10%|▉         | 46/469 [00:00<00:03, 114.29it/s]

Step 14,100, (N samples: 1,804,800), Loss: 543.4259, (Recon: 543.4259, KLD: 0.0000), Gradient norm: 1.3027


 32%|███▏      | 150/469 [00:01<00:02, 124.53it/s]

Step 14,200, (N samples: 1,817,600), Loss: 543.4332, (Recon: 543.4332, KLD: 0.0000), Gradient norm: 1.3377


 54%|█████▍    | 254/469 [00:02<00:01, 124.96it/s]

Step 14,300, (N samples: 1,830,400), Loss: 543.4381, (Recon: 543.4381, KLD: 0.0000), Gradient norm: 1.3295


 74%|███████▎  | 345/469 [00:02<00:00, 125.02it/s]

Step 14,400, (N samples: 1,843,200), Loss: 543.4486, (Recon: 543.4486, KLD: 0.0000), Gradient norm: 1.3362


 96%|█████████▌| 449/469 [00:03<00:00, 124.95it/s]

Step 14,500, (N samples: 1,856,000), Loss: 543.4517, (Recon: 543.4517, KLD: 0.0000), Gradient norm: 1.3659


100%|██████████| 469/469 [00:03<00:00, 122.95it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 172.70it/s]


====> Test set loss: 543.4401, (BCE: 543.4401, KLD: 0.0000)
Epoch 32/50


 18%|█▊        | 84/469 [00:00<00:03, 122.10it/s]

Step 14,600, (N samples: 1,868,800), Loss: 543.4315, (Recon: 543.4315, KLD: 0.0000), Gradient norm: 1.3693


 37%|███▋      | 175/469 [00:01<00:02, 124.70it/s]

Step 14,700, (N samples: 1,881,600), Loss: 543.4271, (Recon: 543.4271, KLD: 0.0000), Gradient norm: 1.3630


 59%|█████▉    | 279/469 [00:02<00:01, 125.03it/s]

Step 14,800, (N samples: 1,894,400), Loss: 543.4417, (Recon: 543.4417, KLD: 0.0001), Gradient norm: 1.3851


 82%|████████▏ | 383/469 [00:03<00:00, 125.06it/s]

Step 14,900, (N samples: 1,907,200), Loss: 543.4417, (Recon: 543.4416, KLD: 0.0001), Gradient norm: 1.3822


100%|██████████| 469/469 [00:03<00:00, 123.14it/s]


Step 15,000, (N samples: 1,920,000), Loss: 543.4354, (Recon: 543.4353, KLD: 0.0001), Gradient norm: 1.4072


Testing: 100%|██████████| 79/79 [00:00<00:00, 171.68it/s]


====> Test set loss: 543.4436, (BCE: 543.4435, KLD: 0.0001)
Epoch 33/50


 23%|██▎       | 110/469 [00:00<00:02, 123.79it/s]

Step 15,100, (N samples: 1,932,800), Loss: 543.4598, (Recon: 543.4597, KLD: 0.0001), Gradient norm: 1.3593


 46%|████▌     | 214/469 [00:01<00:02, 123.60it/s]

Step 15,200, (N samples: 1,945,600), Loss: 543.4550, (Recon: 543.4546, KLD: 0.0003), Gradient norm: 1.3952


 68%|██████▊   | 318/469 [00:02<00:01, 124.89it/s]

Step 15,300, (N samples: 1,958,400), Loss: 543.4687, (Recon: 543.4686, KLD: 0.0000), Gradient norm: 1.4264


 87%|████████▋ | 409/469 [00:03<00:00, 124.98it/s]

Step 15,400, (N samples: 1,971,200), Loss: 543.4454, (Recon: 543.4454, KLD: 0.0000), Gradient norm: 1.4565


100%|██████████| 469/469 [00:03<00:00, 122.69it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.53it/s]


====> Test set loss: 543.4429, (BCE: 543.4429, KLD: 0.0000)
Epoch 34/50


 10%|▉         | 45/469 [00:00<00:03, 114.99it/s]

Step 15,500, (N samples: 1,984,000), Loss: 543.4121, (Recon: 543.4121, KLD: 0.0000), Gradient norm: 1.3592


 32%|███▏      | 149/469 [00:01<00:02, 124.50it/s]

Step 15,600, (N samples: 1,996,800), Loss: 543.4381, (Recon: 543.4380, KLD: 0.0000), Gradient norm: 1.4144


 51%|█████     | 240/469 [00:01<00:01, 124.93it/s]

Step 15,700, (N samples: 2,009,600), Loss: 543.4410, (Recon: 543.4408, KLD: 0.0002), Gradient norm: 1.4182


 73%|███████▎  | 344/469 [00:02<00:01, 124.90it/s]

Step 15,800, (N samples: 2,022,400), Loss: 543.4446, (Recon: 543.4444, KLD: 0.0002), Gradient norm: 1.2870


 96%|█████████▌| 448/469 [00:03<00:00, 124.86it/s]

Step 15,900, (N samples: 2,035,200), Loss: 543.4419, (Recon: 543.4418, KLD: 0.0001), Gradient norm: 1.3595


100%|██████████| 469/469 [00:03<00:00, 123.06it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.61it/s]


====> Test set loss: 543.4427, (BCE: 543.4426, KLD: 0.0001)
Epoch 35/50


 15%|█▌        | 71/469 [00:00<00:03, 120.89it/s]

Step 16,000, (N samples: 2,048,000), Loss: 543.4399, (Recon: 543.4399, KLD: 0.0000), Gradient norm: 1.4618


 37%|███▋      | 175/469 [00:01<00:02, 124.52it/s]

Step 16,100, (N samples: 2,060,800), Loss: 543.4466, (Recon: 543.4466, KLD: 0.0000), Gradient norm: 1.4237


 59%|█████▉    | 279/469 [00:02<00:01, 124.81it/s]

Step 16,200, (N samples: 2,073,600), Loss: 543.4399, (Recon: 543.4398, KLD: 0.0001), Gradient norm: 1.3838


 79%|███████▉  | 370/469 [00:03<00:00, 122.44it/s]

Step 16,300, (N samples: 2,086,400), Loss: 543.4435, (Recon: 543.4435, KLD: 0.0001), Gradient norm: 1.4110


100%|██████████| 469/469 [00:03<00:00, 122.62it/s]


Step 16,400, (N samples: 2,099,200), Loss: 543.4479, (Recon: 543.4478, KLD: 0.0001), Gradient norm: 1.4246


Testing: 100%|██████████| 79/79 [00:00<00:00, 171.14it/s]


====> Test set loss: 543.4396, (BCE: 543.4393, KLD: 0.0003)
Epoch 36/50


 23%|██▎       | 110/469 [00:00<00:02, 123.51it/s]

Step 16,500, (N samples: 2,112,000), Loss: 543.4047, (Recon: 543.4047, KLD: 0.0001), Gradient norm: 1.3531


 43%|████▎     | 201/469 [00:01<00:02, 124.69it/s]

Step 16,600, (N samples: 2,124,800), Loss: 543.4415, (Recon: 543.4414, KLD: 0.0001), Gradient norm: 1.5022


 65%|██████▌   | 305/469 [00:02<00:01, 124.77it/s]

Step 16,700, (N samples: 2,137,600), Loss: 543.4734, (Recon: 543.4733, KLD: 0.0002), Gradient norm: 1.3776


 87%|████████▋ | 409/469 [00:03<00:00, 124.78it/s]

Step 16,800, (N samples: 2,150,400), Loss: 543.4313, (Recon: 543.4313, KLD: 0.0000), Gradient norm: 1.4050


100%|██████████| 469/469 [00:03<00:00, 123.04it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.54it/s]


====> Test set loss: 543.4388, (BCE: 543.4386, KLD: 0.0001)
Epoch 37/50


  7%|▋         | 32/469 [00:00<00:04, 108.04it/s]

Step 16,900, (N samples: 2,163,200), Loss: 543.4395, (Recon: 543.4394, KLD: 0.0001), Gradient norm: 1.3229


 29%|██▉       | 136/469 [00:01<00:02, 124.45it/s]

Step 17,000, (N samples: 2,176,000), Loss: 543.4156, (Recon: 543.4155, KLD: 0.0001), Gradient norm: 1.4373


 51%|█████     | 240/469 [00:01<00:01, 124.95it/s]

Step 17,100, (N samples: 2,188,800), Loss: 543.4518, (Recon: 543.4517, KLD: 0.0002), Gradient norm: 1.4682


 71%|███████   | 331/469 [00:02<00:01, 124.88it/s]

Step 17,200, (N samples: 2,201,600), Loss: 543.4443, (Recon: 543.4442, KLD: 0.0001), Gradient norm: 1.4250


 93%|█████████▎| 435/469 [00:03<00:00, 124.93it/s]

Step 17,300, (N samples: 2,214,400), Loss: 543.4393, (Recon: 543.4390, KLD: 0.0004), Gradient norm: 1.3693


100%|██████████| 469/469 [00:03<00:00, 123.08it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 170.02it/s]


====> Test set loss: 543.4441, (BCE: 543.4441, KLD: 0.0000)
Epoch 38/50


 15%|█▌        | 72/469 [00:00<00:03, 120.52it/s]

Step 17,400, (N samples: 2,227,200), Loss: 543.4252, (Recon: 543.4250, KLD: 0.0002), Gradient norm: 1.3420


 35%|███▍      | 163/469 [00:01<00:02, 124.74it/s]

Step 17,500, (N samples: 2,240,000), Loss: 543.4535, (Recon: 543.4535, KLD: 0.0000), Gradient norm: 1.3852


 57%|█████▋    | 267/469 [00:02<00:01, 125.05it/s]

Step 17,600, (N samples: 2,252,800), Loss: 543.4481, (Recon: 543.4481, KLD: 0.0000), Gradient norm: 1.4808


 79%|███████▉  | 371/469 [00:03<00:00, 125.01it/s]

Step 17,700, (N samples: 2,265,600), Loss: 543.4347, (Recon: 543.4347, KLD: 0.0000), Gradient norm: 1.4432


100%|██████████| 469/469 [00:03<00:00, 123.01it/s]


Step 17,800, (N samples: 2,278,400), Loss: 543.4311, (Recon: 543.4311, KLD: 0.0000), Gradient norm: 1.4092


Testing: 100%|██████████| 79/79 [00:00<00:00, 171.74it/s]


====> Test set loss: 543.4480, (BCE: 543.4480, KLD: 0.0000)
Epoch 39/50


 21%|██        | 97/469 [00:00<00:03, 122.81it/s]

Step 17,900, (N samples: 2,291,200), Loss: 543.4471, (Recon: 543.4471, KLD: 0.0000), Gradient norm: 1.2983


 43%|████▎     | 201/469 [00:01<00:02, 124.51it/s]

Step 18,000, (N samples: 2,304,000), Loss: 543.4280, (Recon: 543.4280, KLD: 0.0000), Gradient norm: 1.3906


 62%|██████▏   | 292/469 [00:02<00:01, 124.92it/s]

Step 18,100, (N samples: 2,316,800), Loss: 543.4444, (Recon: 543.4443, KLD: 0.0000), Gradient norm: 1.3784


 84%|████████▍ | 396/469 [00:03<00:00, 124.90it/s]

Step 18,200, (N samples: 2,329,600), Loss: 543.4549, (Recon: 543.4549, KLD: 0.0000), Gradient norm: 1.3839


100%|██████████| 469/469 [00:03<00:00, 123.06it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.69it/s]


====> Test set loss: 543.4416, (BCE: 543.4416, KLD: 0.0000)
Epoch 40/50


  2%|▏         | 9/469 [00:00<00:05, 87.05it/s]

Step 18,300, (N samples: 2,342,400), Loss: 543.4511, (Recon: 543.4511, KLD: 0.0000), Gradient norm: 1.3656


 26%|██▌       | 123/469 [00:01<00:02, 124.00it/s]

Step 18,400, (N samples: 2,355,200), Loss: 543.4448, (Recon: 543.4448, KLD: 0.0000), Gradient norm: 1.4314


 48%|████▊     | 227/469 [00:01<00:01, 123.47it/s]

Step 18,500, (N samples: 2,368,000), Loss: 543.4512, (Recon: 543.4512, KLD: 0.0000), Gradient norm: 1.3716


 71%|███████   | 331/469 [00:02<00:01, 124.74it/s]

Step 18,600, (N samples: 2,380,800), Loss: 543.4322, (Recon: 543.4322, KLD: 0.0000), Gradient norm: 1.3965


 93%|█████████▎| 435/469 [00:03<00:00, 124.86it/s]

Step 18,700, (N samples: 2,393,600), Loss: 543.4312, (Recon: 543.4312, KLD: 0.0000), Gradient norm: 1.3997


100%|██████████| 469/469 [00:03<00:00, 122.52it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 111.40it/s]


====> Test set loss: 543.4450, (BCE: 543.4450, KLD: 0.0000)
Epoch 41/50


 12%|█▏        | 57/469 [00:00<00:04, 95.98it/s]

Step 18,800, (N samples: 2,406,400), Loss: 543.4496, (Recon: 543.4496, KLD: 0.0000), Gradient norm: 1.3685


 34%|███▍      | 161/469 [00:01<00:02, 123.22it/s]

Step 18,900, (N samples: 2,419,200), Loss: 543.4492, (Recon: 543.4492, KLD: 0.0000), Gradient norm: 1.2888


 57%|█████▋    | 265/469 [00:02<00:01, 124.65it/s]

Step 19,000, (N samples: 2,432,000), Loss: 543.4427, (Recon: 543.4426, KLD: 0.0000), Gradient norm: 1.4400


 76%|███████▌  | 356/469 [00:03<00:00, 124.89it/s]

Step 19,100, (N samples: 2,444,800), Loss: 543.4337, (Recon: 543.4337, KLD: 0.0000), Gradient norm: 1.3323


 98%|█████████▊| 460/469 [00:03<00:00, 124.97it/s]

Step 19,200, (N samples: 2,457,600), Loss: 543.4509, (Recon: 543.4509, KLD: 0.0000), Gradient norm: 1.4010


100%|██████████| 469/469 [00:03<00:00, 118.71it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.87it/s]


====> Test set loss: 543.4412, (BCE: 543.4412, KLD: 0.0000)
Epoch 42/50


 21%|██        | 97/469 [00:00<00:03, 122.82it/s]

Step 19,300, (N samples: 2,470,400), Loss: 543.4453, (Recon: 543.4452, KLD: 0.0001), Gradient norm: 1.5341


 40%|████      | 188/469 [00:01<00:02, 119.09it/s]

Step 19,400, (N samples: 2,483,200), Loss: 543.4477, (Recon: 543.4476, KLD: 0.0001), Gradient norm: 1.3865


 62%|██████▏   | 291/469 [00:02<00:01, 122.77it/s]

Step 19,500, (N samples: 2,496,000), Loss: 543.4484, (Recon: 543.4484, KLD: 0.0000), Gradient norm: 1.3854


 84%|████████▍ | 395/469 [00:03<00:00, 124.64it/s]

Step 19,600, (N samples: 2,508,800), Loss: 543.4419, (Recon: 543.4419, KLD: 0.0000), Gradient norm: 1.4312


100%|██████████| 469/469 [00:03<00:00, 121.84it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.65it/s]


====> Test set loss: 543.4426, (BCE: 543.4426, KLD: 0.0000)
Epoch 43/50


  2%|▏         | 9/469 [00:00<00:05, 85.44it/s]

Step 19,700, (N samples: 2,521,600), Loss: 543.4382, (Recon: 543.4382, KLD: 0.0000), Gradient norm: 1.3654


 26%|██▌       | 123/469 [00:01<00:02, 123.95it/s]

Step 19,800, (N samples: 2,534,400), Loss: 543.4551, (Recon: 543.4551, KLD: 0.0000), Gradient norm: 1.3826


 48%|████▊     | 227/469 [00:01<00:01, 124.76it/s]

Step 19,900, (N samples: 2,547,200), Loss: 543.4351, (Recon: 543.4351, KLD: 0.0001), Gradient norm: 1.5057


 68%|██████▊   | 318/469 [00:02<00:01, 124.75it/s]

Step 20,000, (N samples: 2,560,000), Loss: 543.4466, (Recon: 543.4464, KLD: 0.0002), Gradient norm: 1.4534


 90%|████████▉ | 422/469 [00:03<00:00, 124.90it/s]

Step 20,100, (N samples: 2,572,800), Loss: 543.4479, (Recon: 543.4479, KLD: 0.0000), Gradient norm: 1.4084


100%|██████████| 469/469 [00:03<00:00, 122.94it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.30it/s]


====> Test set loss: 543.4392, (BCE: 543.4392, KLD: 0.0000)
Epoch 44/50


 12%|█▏        | 58/469 [00:00<00:03, 118.40it/s]

Step 20,200, (N samples: 2,585,600), Loss: 543.4229, (Recon: 543.4229, KLD: 0.0001), Gradient norm: 1.4677


 32%|███▏      | 149/469 [00:01<00:02, 124.43it/s]

Step 20,300, (N samples: 2,598,400), Loss: 543.4378, (Recon: 543.4378, KLD: 0.0000), Gradient norm: 1.4892


 54%|█████▍    | 253/469 [00:02<00:01, 125.01it/s]

Step 20,400, (N samples: 2,611,200), Loss: 543.4380, (Recon: 543.4380, KLD: 0.0000), Gradient norm: 1.4041


 76%|███████▌  | 357/469 [00:02<00:00, 125.04it/s]

Step 20,500, (N samples: 2,624,000), Loss: 543.4399, (Recon: 543.4399, KLD: 0.0000), Gradient norm: 1.3549


 96%|█████████▌| 448/469 [00:03<00:00, 123.46it/s]

Step 20,600, (N samples: 2,636,800), Loss: 543.4334, (Recon: 543.4334, KLD: 0.0000), Gradient norm: 1.4124


100%|██████████| 469/469 [00:03<00:00, 122.44it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.33it/s]


====> Test set loss: 543.4426, (BCE: 543.4426, KLD: 0.0000)
Epoch 45/50


 18%|█▊        | 84/469 [00:00<00:03, 122.18it/s]

Step 20,700, (N samples: 2,649,600), Loss: 543.4584, (Recon: 543.4584, KLD: 0.0000), Gradient norm: 1.3994


 40%|████      | 188/469 [00:01<00:02, 124.75it/s]

Step 20,800, (N samples: 2,662,400), Loss: 543.4468, (Recon: 543.4467, KLD: 0.0001), Gradient norm: 1.3411


 59%|█████▉    | 279/469 [00:02<00:01, 124.94it/s]

Step 20,900, (N samples: 2,675,200), Loss: 543.4314, (Recon: 543.4313, KLD: 0.0001), Gradient norm: 1.4543


 82%|████████▏ | 383/469 [00:03<00:00, 125.04it/s]

Step 21,000, (N samples: 2,688,000), Loss: 543.4385, (Recon: 543.4384, KLD: 0.0001), Gradient norm: 1.4496


100%|██████████| 469/469 [00:03<00:00, 123.09it/s]


Step 21,100, (N samples: 2,700,800), Loss: 543.4388, (Recon: 543.4388, KLD: 0.0000), Gradient norm: 1.4566


Testing: 100%|██████████| 79/79 [00:00<00:00, 171.61it/s]


====> Test set loss: 543.4406, (BCE: 543.4405, KLD: 0.0000)
Epoch 46/50


 23%|██▎       | 110/469 [00:00<00:02, 123.52it/s]

Step 21,200, (N samples: 2,713,600), Loss: 543.4576, (Recon: 543.4576, KLD: 0.0001), Gradient norm: 1.4539


 46%|████▌     | 214/469 [00:01<00:02, 124.91it/s]

Step 21,300, (N samples: 2,726,400), Loss: 543.4526, (Recon: 543.4526, KLD: 0.0000), Gradient norm: 1.3860


 68%|██████▊   | 318/469 [00:02<00:01, 124.96it/s]

Step 21,400, (N samples: 2,739,200), Loss: 543.4251, (Recon: 543.4251, KLD: 0.0000), Gradient norm: 1.4769


 87%|████████▋ | 409/469 [00:03<00:00, 124.93it/s]

Step 21,500, (N samples: 2,752,000), Loss: 543.4407, (Recon: 543.4407, KLD: 0.0000), Gradient norm: 1.3447


100%|██████████| 469/469 [00:03<00:00, 123.11it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.40it/s]


====> Test set loss: 543.4444, (BCE: 543.4443, KLD: 0.0000)
Epoch 47/50


  9%|▉         | 44/469 [00:00<00:03, 111.90it/s]

Step 21,600, (N samples: 2,764,800), Loss: 543.4365, (Recon: 543.4365, KLD: 0.0000), Gradient norm: 1.4076


 32%|███▏      | 148/469 [00:01<00:02, 123.50it/s]

Step 21,700, (N samples: 2,777,600), Loss: 543.4168, (Recon: 543.4167, KLD: 0.0001), Gradient norm: 1.3256


 54%|█████▎    | 252/469 [00:02<00:01, 124.78it/s]

Step 21,800, (N samples: 2,790,400), Loss: 543.4401, (Recon: 543.4400, KLD: 0.0002), Gradient norm: 1.4699


 73%|███████▎  | 343/469 [00:02<00:01, 124.96it/s]

Step 21,900, (N samples: 2,803,200), Loss: 543.4514, (Recon: 543.4513, KLD: 0.0001), Gradient norm: 1.3664


 95%|█████████▌| 447/469 [00:03<00:00, 124.93it/s]

Step 22,000, (N samples: 2,816,000), Loss: 543.4232, (Recon: 543.4231, KLD: 0.0000), Gradient norm: 1.3899


100%|██████████| 469/469 [00:03<00:00, 122.54it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.67it/s]


====> Test set loss: 543.4389, (BCE: 543.4387, KLD: 0.0002)
Epoch 48/50


 15%|█▌        | 71/469 [00:00<00:03, 120.65it/s]

Step 22,100, (N samples: 2,828,800), Loss: 543.4420, (Recon: 543.4420, KLD: 0.0000), Gradient norm: 1.4110


 37%|███▋      | 175/469 [00:01<00:02, 124.59it/s]

Step 22,200, (N samples: 2,841,600), Loss: 543.4475, (Recon: 543.4475, KLD: 0.0000), Gradient norm: 1.4603


 59%|█████▉    | 279/469 [00:02<00:01, 124.73it/s]

Step 22,300, (N samples: 2,854,400), Loss: 543.4493, (Recon: 543.4492, KLD: 0.0001), Gradient norm: 1.4820


 82%|████████▏ | 383/469 [00:03<00:00, 124.89it/s]

Step 22,400, (N samples: 2,867,200), Loss: 543.4332, (Recon: 543.4329, KLD: 0.0003), Gradient norm: 1.4004


100%|██████████| 469/469 [00:03<00:00, 123.02it/s]


Step 22,500, (N samples: 2,880,000), Loss: 543.4561, (Recon: 543.4561, KLD: 0.0000), Gradient norm: 1.4081


Testing: 100%|██████████| 79/79 [00:00<00:00, 171.49it/s]


====> Test set loss: 543.4380, (BCE: 543.4380, KLD: 0.0000)
Epoch 49/50


 23%|██▎       | 110/469 [00:00<00:02, 123.34it/s]

Step 22,600, (N samples: 2,892,800), Loss: 543.4319, (Recon: 543.4319, KLD: 0.0000), Gradient norm: 1.4150


 43%|████▎     | 201/469 [00:01<00:02, 124.40it/s]

Step 22,700, (N samples: 2,905,600), Loss: 543.4300, (Recon: 543.4300, KLD: 0.0000), Gradient norm: 1.4121


 65%|██████▌   | 305/469 [00:02<00:01, 123.15it/s]

Step 22,800, (N samples: 2,918,400), Loss: 543.4312, (Recon: 543.4312, KLD: 0.0000), Gradient norm: 1.5312


 87%|████████▋ | 409/469 [00:03<00:00, 124.65it/s]

Step 22,900, (N samples: 2,931,200), Loss: 543.4642, (Recon: 543.4642, KLD: 0.0000), Gradient norm: 1.5258


100%|██████████| 469/469 [00:03<00:00, 122.50it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.68it/s]


====> Test set loss: 543.4426, (BCE: 543.4426, KLD: 0.0000)
Epoch 50/50


 10%|▉         | 45/469 [00:00<00:03, 115.03it/s]

Step 23,000, (N samples: 2,944,000), Loss: 543.4356, (Recon: 543.4355, KLD: 0.0000), Gradient norm: 1.4339


 29%|██▉       | 136/469 [00:01<00:02, 124.24it/s]

Step 23,100, (N samples: 2,956,800), Loss: 543.4478, (Recon: 543.4476, KLD: 0.0002), Gradient norm: 1.5217


 51%|█████     | 240/469 [00:01<00:01, 124.79it/s]

Step 23,200, (N samples: 2,969,600), Loss: 543.4511, (Recon: 543.4510, KLD: 0.0001), Gradient norm: 1.4697


 73%|███████▎  | 344/469 [00:02<00:00, 125.00it/s]

Step 23,300, (N samples: 2,982,400), Loss: 543.4398, (Recon: 543.4398, KLD: 0.0000), Gradient norm: 1.5957


 93%|█████████▎| 435/469 [00:03<00:00, 124.94it/s]

Step 23,400, (N samples: 2,995,200), Loss: 543.4418, (Recon: 543.4418, KLD: 0.0001), Gradient norm: 1.4518


100%|██████████| 469/469 [00:03<00:00, 123.08it/s]
Testing: 100%|██████████| 79/79 [00:00<00:00, 171.73it/s]

====> Test set loss: 543.4399, (BCE: 543.4399, KLD: 0.0000)





In [15]:
writer.flush()

In [16]:
%load_ext tensorboard

In [23]:
%tensorboard --logdir ../experiments/VAE_MNIST/20241028-004306/

Reusing TensorBoard on port 6011 (pid 1110344), started 0:00:12 ago. (Use '!kill 1110344' to kill it.)