In [3]:
%load_ext autoreload
# %reload_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
from methylVA.mnist.dataset import get_methyl_data_loaders

data_id = 0.1
batch_size = 128

train_data_path = f"../data/dimension_reduction/highly_variable_features/train_data_filtered_{data_id}.pkl"
train_metadata_path = f"../data/dimension_reduction/highly_variable_features/train_metadata_with_labels.pkl"
test_data_path = f"../data/dimension_reduction/highly_variable_features/test_data_filtered_{data_id}.pkl"
test_metadata_path = f"../data/dimension_reduction/highly_variable_features/test_metadata_with_labels.pkl"


train_loader, test_loader = get_methyl_data_loaders(
    train_data_path,
    train_metadata_path,
    test_data_path,
    test_metadata_path,
    batch_size=batch_size
)


Found NaN values in the data after conversion.
Found NaN values in the data after conversion.


In [20]:
data_batch, _ = next(iter(train_loader))


num_train_rows = len(train_loader.dataset)
num_test_rows = len(test_loader.dataset)

print("Number of features in each dataset:", data_batch.shape[1])
print("Number of rows in the training dataset:", num_train_rows)
print("Number of rows in the test dataset:", num_test_rows)

Number of features in each dataset: 2605
Number of rows in the training dataset: 33360
Number of rows in the test dataset: 3707


In [21]:
from datetime import datetime

import torch
from torch.utils.tensorboard import SummaryWriter

from methylVA.mnist.model import VAE
from methylVA.mnist.training import train, test

input_dim = data_batch.shape[1]
learning_rate = 1e-3
weight_decay = 1e-2
num_epochs = 100
latent_dim = 32
hidden_dim = 2048
kl_weight = 1.0
name = f'VAE_methyl_data_{data_id}_latent_{latent_dim}_kl_{kl_weight}'



In [22]:

writer_train = SummaryWriter(f'../experiments/{name}/train/{datetime.now().strftime("%Y%m%d-%H%M%S")}')
writer_test = SummaryWriter(f'../experiments/{name}/test/{datetime.now().strftime("%Y%m%d-%H%M%S")}')

In [23]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = VAE(input_dim=input_dim, latent_dim=latent_dim, hidden_dim=hidden_dim).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

In [24]:
model

VAE(
  (encoder): Sequential(
    (0): Linear(in_features=2605, out_features=2048, bias=True)
    (1): SiLU()
    (2): Linear(in_features=2048, out_features=1024, bias=True)
    (3): SiLU()
    (4): Linear(in_features=1024, out_features=512, bias=True)
    (5): SiLU()
    (6): Linear(in_features=512, out_features=256, bias=True)
    (7): SiLU()
    (8): Linear(in_features=256, out_features=64, bias=True)
  )
  (softplus): Softplus(beta=1.0, threshold=20.0)
  (decoder): Sequential(
    (0): Linear(in_features=32, out_features=256, bias=True)
    (1): SiLU()
    (2): Linear(in_features=256, out_features=512, bias=True)
    (3): SiLU()
    (4): Linear(in_features=512, out_features=1024, bias=True)
    (5): SiLU()
    (6): Linear(in_features=1024, out_features=2048, bias=True)
    (7): SiLU()
    (8): Linear(in_features=2048, out_features=2605, bias=True)
    (9): Sigmoid()
  )
)

In [25]:
from methylVA.mnist.training import train, test


prev_updates = 0
for epoch in range(num_epochs):
    print(f'Epoch {epoch + 1}/{num_epochs}')
    prev_updates = train(model, train_loader, optimizer, prev_updates, writer=writer_train)
    test(model, test_loader, prev_updates, writer=writer_test)

Epoch 1/100


  1%|          | 3/261 [00:00<00:12, 21.27it/s]

Step 0, (N samples: 0), Loss: 1809.1572, (Recon: 1805.5723, KLD: 3.5849), Gradient norm: 6.9216


 39%|███▉      | 103/261 [00:03<00:08, 19.30it/s]

Step 100, (N samples: 12,800), Loss: 1168.3179, (Recon: 1163.1570, KLD: 5.1609), Gradient norm: 204.8409


 78%|███████▊  | 204/261 [00:07<00:01, 31.32it/s]

Step 200, (N samples: 25,600), Loss: 1220.0104, (Recon: 1213.8040, KLD: 6.2064), Gradient norm: 280.6808


100%|██████████| 261/261 [00:08<00:00, 29.41it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.45it/s]


====> Test set loss: 1167.8905, (BCE: 1160.9062, KLD: 6.9843)
Epoch 2/100


 18%|█▊        | 46/261 [00:01<00:06, 31.08it/s]

Step 300, (N samples: 38,400), Loss: 1179.2756, (Recon: 1171.7842, KLD: 7.4915), Gradient norm: 131.2437


 56%|█████▌    | 146/261 [00:04<00:03, 31.57it/s]

Step 400, (N samples: 51,200), Loss: 1169.5092, (Recon: 1162.5339, KLD: 6.9752), Gradient norm: 251.3571


 94%|█████████▍| 246/261 [00:07<00:00, 31.63it/s]

Step 500, (N samples: 64,000), Loss: 1117.8185, (Recon: 1110.4697, KLD: 7.3488), Gradient norm: 172.2022


100%|██████████| 261/261 [00:08<00:00, 31.28it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.26it/s]


====> Test set loss: 1141.0466, (BCE: 1132.9405, KLD: 8.1061)
Epoch 3/100


 31%|███▏      | 82/261 [00:02<00:05, 31.53it/s]

Step 600, (N samples: 76,800), Loss: 1156.1937, (Recon: 1148.7102, KLD: 7.4836), Gradient norm: 148.1809


 70%|██████▉   | 182/261 [00:05<00:02, 31.52it/s]

Step 700, (N samples: 89,600), Loss: 1093.3738, (Recon: 1086.1687, KLD: 7.2050), Gradient norm: 102.6541


100%|██████████| 261/261 [00:08<00:00, 31.37it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.97it/s]


====> Test set loss: 1115.7492, (BCE: 1108.2022, KLD: 7.5469)
Epoch 4/100


  8%|▊         | 22/261 [00:00<00:07, 30.55it/s]

Step 800, (N samples: 102,400), Loss: 1140.9163, (Recon: 1133.2953, KLD: 7.6210), Gradient norm: 94.7164


 47%|████▋     | 122/261 [00:03<00:04, 31.36it/s]

Step 900, (N samples: 115,200), Loss: 1071.1378, (Recon: 1063.4312, KLD: 7.7067), Gradient norm: 104.0897


 85%|████████▌ | 222/261 [00:07<00:01, 31.16it/s]

Step 1,000, (N samples: 128,000), Loss: 1083.3619, (Recon: 1075.4352, KLD: 7.9268), Gradient norm: 208.1615


100%|██████████| 261/261 [00:08<00:00, 31.24it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.20it/s]


====> Test set loss: 1106.3598, (BCE: 1098.4138, KLD: 7.9460)
Epoch 5/100


 24%|██▍       | 62/261 [00:02<00:06, 31.55it/s]

Step 1,100, (N samples: 140,800), Loss: 1126.1216, (Recon: 1118.2339, KLD: 7.8877), Gradient norm: 109.9891


 62%|██████▏   | 162/261 [00:05<00:03, 31.53it/s]

Step 1,200, (N samples: 153,600), Loss: 1073.7318, (Recon: 1065.5826, KLD: 8.1491), Gradient norm: 131.4223


100%|██████████| 261/261 [00:08<00:00, 31.42it/s]


Step 1,300, (N samples: 166,400), Loss: 1093.5249, (Recon: 1085.8110, KLD: 7.7138), Gradient norm: 131.8011


Testing: 100%|██████████| 29/29 [00:00<00:00, 38.41it/s]


====> Test set loss: 1101.3809, (BCE: 1093.3983, KLD: 7.9826)
Epoch 6/100


 39%|███▉      | 102/261 [00:03<00:05, 31.75it/s]

Step 1,400, (N samples: 179,200), Loss: 1128.7625, (Recon: 1120.6244, KLD: 8.1381), Gradient norm: 161.0731


 77%|███████▋  | 202/261 [00:06<00:01, 31.61it/s]

Step 1,500, (N samples: 192,000), Loss: 1126.5209, (Recon: 1118.2032, KLD: 8.3176), Gradient norm: 88.2990


100%|██████████| 261/261 [00:08<00:00, 31.61it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.38it/s]


====> Test set loss: 1096.7442, (BCE: 1088.4582, KLD: 8.2860)
Epoch 7/100


 15%|█▍        | 38/261 [00:01<00:07, 31.58it/s]

Step 1,600, (N samples: 204,800), Loss: 1058.5165, (Recon: 1050.1010, KLD: 8.4156), Gradient norm: 134.3873


 54%|█████▍    | 141/261 [00:04<00:03, 31.56it/s]

Step 1,700, (N samples: 217,600), Loss: 1058.0903, (Recon: 1050.2942, KLD: 7.7962), Gradient norm: 126.5860


 92%|█████████▏| 241/261 [00:07<00:00, 31.57it/s]

Step 1,800, (N samples: 230,400), Loss: 1082.5463, (Recon: 1074.7535, KLD: 7.7927), Gradient norm: 83.7865


100%|██████████| 261/261 [00:08<00:00, 31.20it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.89it/s]


====> Test set loss: 1094.0930, (BCE: 1085.7232, KLD: 8.3698)
Epoch 8/100


 30%|██▉       | 78/261 [00:02<00:05, 31.10it/s]

Step 1,900, (N samples: 243,200), Loss: 1102.7714, (Recon: 1094.1199, KLD: 8.6515), Gradient norm: 121.3478


 68%|██████▊   | 178/261 [00:05<00:02, 31.24it/s]

Step 2,000, (N samples: 256,000), Loss: 1170.2620, (Recon: 1162.1674, KLD: 8.0946), Gradient norm: 175.2609


100%|██████████| 261/261 [00:08<00:00, 31.29it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.55it/s]


====> Test set loss: 1091.4953, (BCE: 1083.2587, KLD: 8.2366)
Epoch 9/100


  7%|▋         | 18/261 [00:00<00:08, 30.20it/s]

Step 2,100, (N samples: 268,800), Loss: 1143.3378, (Recon: 1134.5703, KLD: 8.7674), Gradient norm: 115.5322


 45%|████▌     | 118/261 [00:03<00:04, 30.91it/s]

Step 2,200, (N samples: 281,600), Loss: 1126.7393, (Recon: 1117.9347, KLD: 8.8046), Gradient norm: 106.0162


 84%|████████▎ | 218/261 [00:06<00:01, 31.52it/s]

Step 2,300, (N samples: 294,400), Loss: 1120.9407, (Recon: 1112.9109, KLD: 8.0297), Gradient norm: 104.2832


100%|██████████| 261/261 [00:08<00:00, 31.33it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.41it/s]


====> Test set loss: 1092.4299, (BCE: 1084.1846, KLD: 8.2453)
Epoch 10/100


 22%|██▏       | 58/261 [00:01<00:06, 31.47it/s]

Step 2,400, (N samples: 307,200), Loss: 1094.4446, (Recon: 1085.9006, KLD: 8.5439), Gradient norm: 153.3119


 59%|█████▉    | 154/261 [00:04<00:03, 30.27it/s]

Step 2,500, (N samples: 320,000), Loss: 1133.4290, (Recon: 1124.7162, KLD: 8.7128), Gradient norm: 222.5391


 99%|█████████▉| 258/261 [00:08<00:00, 31.64it/s]

Step 2,600, (N samples: 332,800), Loss: 1113.7899, (Recon: 1105.3323, KLD: 8.4577), Gradient norm: 127.2454


100%|██████████| 261/261 [00:08<00:00, 31.40it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.52it/s]


====> Test set loss: 1088.8875, (BCE: 1080.4217, KLD: 8.4658)
Epoch 11/100


 36%|███▌      | 94/261 [00:03<00:05, 31.54it/s]

Step 2,700, (N samples: 345,600), Loss: 1054.6350, (Recon: 1046.1560, KLD: 8.4789), Gradient norm: 137.3794


 74%|███████▍  | 194/261 [00:06<00:02, 31.01it/s]

Step 2,800, (N samples: 358,400), Loss: 1134.0989, (Recon: 1125.1045, KLD: 8.9944), Gradient norm: 169.8556


100%|██████████| 261/261 [00:08<00:00, 31.36it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.32it/s]


====> Test set loss: 1086.3861, (BCE: 1077.8109, KLD: 8.5752)
Epoch 12/100


 13%|█▎        | 34/261 [00:01<00:07, 31.35it/s]

Step 2,900, (N samples: 371,200), Loss: 1063.5616, (Recon: 1055.5088, KLD: 8.0529), Gradient norm: 81.4569


 51%|█████▏    | 134/261 [00:04<00:04, 31.56it/s]

Step 3,000, (N samples: 384,000), Loss: 1107.1980, (Recon: 1098.5322, KLD: 8.6658), Gradient norm: 138.8624


 90%|████████▉ | 234/261 [00:07<00:00, 30.88it/s]

Step 3,100, (N samples: 396,800), Loss: 1093.7804, (Recon: 1085.1411, KLD: 8.6393), Gradient norm: 177.1222


100%|██████████| 261/261 [00:08<00:00, 31.19it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.34it/s]


====> Test set loss: 1085.4086, (BCE: 1076.7050, KLD: 8.7036)
Epoch 13/100


 28%|██▊       | 74/261 [00:02<00:05, 31.68it/s]

Step 3,200, (N samples: 409,600), Loss: 1076.2301, (Recon: 1067.9253, KLD: 8.3049), Gradient norm: 143.3295


 67%|██████▋   | 174/261 [00:05<00:02, 31.53it/s]

Step 3,300, (N samples: 422,400), Loss: 1114.9391, (Recon: 1105.9880, KLD: 8.9511), Gradient norm: 133.6549


100%|██████████| 261/261 [00:08<00:00, 30.58it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.25it/s]


====> Test set loss: 1084.1403, (BCE: 1075.6116, KLD: 8.5287)
Epoch 14/100


  5%|▌         | 14/261 [00:00<00:08, 29.43it/s]

Step 3,400, (N samples: 435,200), Loss: 1125.0308, (Recon: 1116.2493, KLD: 8.7815), Gradient norm: 157.4598


 44%|████▎     | 114/261 [00:03<00:04, 31.09it/s]

Step 3,500, (N samples: 448,000), Loss: 1070.5236, (Recon: 1062.3853, KLD: 8.1383), Gradient norm: 106.2171


 81%|████████  | 211/261 [00:07<00:02, 23.86it/s]

Step 3,600, (N samples: 460,800), Loss: 1050.9250, (Recon: 1042.2568, KLD: 8.6682), Gradient norm: 137.4900


100%|██████████| 261/261 [00:08<00:00, 29.39it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.17it/s]


====> Test set loss: 1082.2359, (BCE: 1073.7271, KLD: 8.5088)
Epoch 15/100


 19%|█▉        | 50/261 [00:01<00:06, 30.94it/s]

Step 3,700, (N samples: 473,600), Loss: 1121.9867, (Recon: 1113.0779, KLD: 8.9089), Gradient norm: 168.4321


 57%|█████▋    | 150/261 [00:04<00:03, 31.32it/s]

Step 3,800, (N samples: 486,400), Loss: 1063.0007, (Recon: 1054.2515, KLD: 8.7492), Gradient norm: 106.8783


 96%|█████████▌| 250/261 [00:08<00:00, 31.28it/s]

Step 3,900, (N samples: 499,200), Loss: 1094.9872, (Recon: 1086.1370, KLD: 8.8502), Gradient norm: 158.2167


100%|██████████| 261/261 [00:08<00:00, 31.16it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.72it/s]


====> Test set loss: 1083.5439, (BCE: 1074.9386, KLD: 8.6053)
Epoch 16/100


 34%|███▍      | 90/261 [00:02<00:05, 31.50it/s]

Step 4,000, (N samples: 512,000), Loss: 1091.4827, (Recon: 1082.9028, KLD: 8.5798), Gradient norm: 165.6600


 73%|███████▎  | 190/261 [00:06<00:02, 30.73it/s]

Step 4,100, (N samples: 524,800), Loss: 1034.6656, (Recon: 1026.2515, KLD: 8.4142), Gradient norm: 126.8770


100%|██████████| 261/261 [00:08<00:00, 31.27it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.03it/s]


====> Test set loss: 1080.1107, (BCE: 1071.4877, KLD: 8.6230)
Epoch 17/100


 11%|█▏        | 30/261 [00:01<00:07, 30.90it/s]

Step 4,200, (N samples: 537,600), Loss: 1083.7133, (Recon: 1075.2078, KLD: 8.5055), Gradient norm: 144.7211


 50%|████▉     | 130/261 [00:04<00:04, 31.56it/s]

Step 4,300, (N samples: 550,400), Loss: 1068.7358, (Recon: 1059.7996, KLD: 8.9363), Gradient norm: 175.2900


 88%|████████▊ | 230/261 [00:07<00:01, 30.90it/s]

Step 4,400, (N samples: 563,200), Loss: 1065.3818, (Recon: 1056.8896, KLD: 8.4921), Gradient norm: 169.1721


100%|██████████| 261/261 [00:08<00:00, 31.24it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.51it/s]


====> Test set loss: 1080.0066, (BCE: 1071.4110, KLD: 8.5956)
Epoch 18/100


 27%|██▋       | 70/261 [00:02<00:06, 31.33it/s]

Step 4,500, (N samples: 576,000), Loss: 1062.1595, (Recon: 1053.6067, KLD: 8.5528), Gradient norm: 145.6790


 65%|██████▌   | 170/261 [00:05<00:02, 31.43it/s]

Step 4,600, (N samples: 588,800), Loss: 1082.0735, (Recon: 1072.9144, KLD: 9.1590), Gradient norm: 157.5896


100%|██████████| 261/261 [00:08<00:00, 31.23it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.49it/s]


====> Test set loss: 1080.3656, (BCE: 1071.5805, KLD: 8.7852)
Epoch 19/100


  1%|          | 3/261 [00:00<00:11, 22.72it/s]

Step 4,700, (N samples: 601,600), Loss: 1082.4465, (Recon: 1073.7677, KLD: 8.6789), Gradient norm: 156.9509


 41%|████      | 106/261 [00:03<00:05, 30.62it/s]

Step 4,800, (N samples: 614,400), Loss: 1149.8552, (Recon: 1140.7566, KLD: 9.0986), Gradient norm: 177.6912


 79%|███████▉  | 206/261 [00:06<00:01, 31.39it/s]

Step 4,900, (N samples: 627,200), Loss: 1080.3167, (Recon: 1071.4030, KLD: 8.9137), Gradient norm: 194.2326


100%|██████████| 261/261 [00:08<00:00, 31.16it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.59it/s]


====> Test set loss: 1079.9387, (BCE: 1071.2789, KLD: 8.6598)
Epoch 20/100


 18%|█▊        | 46/261 [00:01<00:06, 31.44it/s]

Step 5,000, (N samples: 640,000), Loss: 1062.3660, (Recon: 1053.6763, KLD: 8.6897), Gradient norm: 159.3118


 56%|█████▌    | 146/261 [00:04<00:03, 31.44it/s]

Step 5,100, (N samples: 652,800), Loss: 1134.2296, (Recon: 1125.0588, KLD: 9.1707), Gradient norm: 258.4303


 94%|█████████▍| 246/261 [00:07<00:00, 31.49it/s]

Step 5,200, (N samples: 665,600), Loss: 1079.6414, (Recon: 1070.9518, KLD: 8.6895), Gradient norm: 349.0124


100%|██████████| 261/261 [00:08<00:00, 31.30it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.14it/s]


====> Test set loss: 1078.8735, (BCE: 1070.0527, KLD: 8.8208)
Epoch 21/100


 33%|███▎      | 86/261 [00:02<00:05, 31.19it/s]

Step 5,300, (N samples: 678,400), Loss: 1095.0828, (Recon: 1086.2427, KLD: 8.8401), Gradient norm: 188.6577


 71%|███████▏  | 186/261 [00:05<00:02, 31.41it/s]

Step 5,400, (N samples: 691,200), Loss: 1138.2004, (Recon: 1128.8762, KLD: 9.3242), Gradient norm: 191.7232


100%|██████████| 261/261 [00:08<00:00, 31.10it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.90it/s]


====> Test set loss: 1078.8437, (BCE: 1070.2223, KLD: 8.6214)
Epoch 22/100


 10%|▉         | 26/261 [00:00<00:07, 30.93it/s]

Step 5,500, (N samples: 704,000), Loss: 1015.4706, (Recon: 1006.9929, KLD: 8.4777), Gradient norm: 123.2572


 48%|████▊     | 126/261 [00:04<00:04, 31.53it/s]

Step 5,600, (N samples: 716,800), Loss: 1079.4805, (Recon: 1070.8848, KLD: 8.5958), Gradient norm: 133.9992


 87%|████████▋ | 226/261 [00:07<00:01, 31.36it/s]

Step 5,700, (N samples: 729,600), Loss: 1061.6908, (Recon: 1052.9983, KLD: 8.6925), Gradient norm: 131.9116


100%|██████████| 261/261 [00:08<00:00, 31.27it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.11it/s]


====> Test set loss: 1077.9591, (BCE: 1069.0294, KLD: 8.9297)
Epoch 23/100


 24%|██▍       | 62/261 [00:02<00:06, 31.44it/s]

Step 5,800, (N samples: 742,400), Loss: 1034.4750, (Recon: 1025.7285, KLD: 8.7465), Gradient norm: 143.7333


 62%|██████▏   | 162/261 [00:05<00:03, 31.44it/s]

Step 5,900, (N samples: 755,200), Loss: 1075.8610, (Recon: 1066.9573, KLD: 8.9037), Gradient norm: 165.8539


100%|██████████| 261/261 [00:08<00:00, 31.29it/s]


Step 6,000, (N samples: 768,000), Loss: 1081.9385, (Recon: 1073.0367, KLD: 8.9017), Gradient norm: 154.5281


Testing: 100%|██████████| 29/29 [00:00<00:00, 38.18it/s]


====> Test set loss: 1076.6333, (BCE: 1067.8926, KLD: 8.7407)
Epoch 24/100


 39%|███▉      | 102/261 [00:03<00:05, 31.52it/s]

Step 6,100, (N samples: 780,800), Loss: 1064.7803, (Recon: 1055.9069, KLD: 8.8734), Gradient norm: 134.4588


 77%|███████▋  | 202/261 [00:06<00:01, 31.37it/s]

Step 6,200, (N samples: 793,600), Loss: 1072.4777, (Recon: 1063.6882, KLD: 8.7894), Gradient norm: 127.5489


100%|██████████| 261/261 [00:08<00:00, 31.31it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.97it/s]


====> Test set loss: 1077.1404, (BCE: 1068.4681, KLD: 8.6724)
Epoch 25/100


 16%|█▌        | 42/261 [00:01<00:06, 31.34it/s]

Step 6,300, (N samples: 806,400), Loss: 1083.3615, (Recon: 1074.6865, KLD: 8.6750), Gradient norm: 118.6567


 54%|█████▍    | 142/261 [00:04<00:03, 31.14it/s]

Step 6,400, (N samples: 819,200), Loss: 1099.3170, (Recon: 1090.7715, KLD: 8.5455), Gradient norm: 117.2943


 93%|█████████▎| 242/261 [00:07<00:00, 31.28it/s]

Step 6,500, (N samples: 832,000), Loss: 1087.1757, (Recon: 1078.1555, KLD: 9.0202), Gradient norm: 208.7946


100%|██████████| 261/261 [00:08<00:00, 31.15it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.95it/s]


====> Test set loss: 1075.2398, (BCE: 1066.3230, KLD: 8.9168)
Epoch 26/100


 31%|███▏      | 82/261 [00:02<00:05, 31.54it/s]

Step 6,600, (N samples: 844,800), Loss: 1123.6592, (Recon: 1114.9773, KLD: 8.6818), Gradient norm: 135.2407


 70%|██████▉   | 182/261 [00:05<00:02, 31.50it/s]

Step 6,700, (N samples: 857,600), Loss: 1029.0223, (Recon: 1020.3071, KLD: 8.7152), Gradient norm: 188.9063


100%|██████████| 261/261 [00:08<00:00, 31.28it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.04it/s]


====> Test set loss: 1076.9790, (BCE: 1068.3863, KLD: 8.5927)
Epoch 27/100


  7%|▋         | 18/261 [00:00<00:08, 30.02it/s]

Step 6,800, (N samples: 870,400), Loss: 1050.1948, (Recon: 1040.9764, KLD: 9.2183), Gradient norm: 124.4450


 45%|████▌     | 118/261 [00:03<00:04, 31.35it/s]

Step 6,900, (N samples: 883,200), Loss: 1097.0212, (Recon: 1088.3668, KLD: 8.6545), Gradient norm: 189.9164


 84%|████████▎ | 218/261 [00:06<00:01, 31.14it/s]

Step 7,000, (N samples: 896,000), Loss: 1093.5616, (Recon: 1084.5164, KLD: 9.0453), Gradient norm: 240.5787


100%|██████████| 261/261 [00:08<00:00, 31.17it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 29.67it/s]


====> Test set loss: 1076.1869, (BCE: 1067.4197, KLD: 8.7672)
Epoch 28/100


 23%|██▎       | 60/261 [00:02<00:06, 30.62it/s]

Step 7,100, (N samples: 908,800), Loss: 1074.5874, (Recon: 1065.9526, KLD: 8.6348), Gradient norm: 168.8787


 61%|██████▏   | 160/261 [00:05<00:03, 31.53it/s]

Step 7,200, (N samples: 921,600), Loss: 1085.0354, (Recon: 1076.1240, KLD: 8.9114), Gradient norm: 151.5828


100%|█████████▉| 260/261 [00:08<00:00, 31.43it/s]

Step 7,300, (N samples: 934,400), Loss: 1058.9860, (Recon: 1050.2024, KLD: 8.7835), Gradient norm: 284.1536


100%|██████████| 261/261 [00:08<00:00, 30.10it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.97it/s]


====> Test set loss: 1077.6349, (BCE: 1068.7244, KLD: 8.9106)
Epoch 29/100


 38%|███▊      | 98/261 [00:03<00:05, 31.38it/s]

Step 7,400, (N samples: 947,200), Loss: 1073.6097, (Recon: 1064.9856, KLD: 8.6241), Gradient norm: 237.7992


 76%|███████▌  | 198/261 [00:06<00:02, 31.48it/s]

Step 7,500, (N samples: 960,000), Loss: 1073.7776, (Recon: 1065.0701, KLD: 8.7075), Gradient norm: 173.2011


100%|██████████| 261/261 [00:08<00:00, 31.27it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.86it/s]


====> Test set loss: 1074.7634, (BCE: 1065.6926, KLD: 9.0708)
Epoch 30/100


 15%|█▍        | 38/261 [00:01<00:07, 31.38it/s]

Step 7,600, (N samples: 972,800), Loss: 1046.8098, (Recon: 1037.6785, KLD: 9.1313), Gradient norm: 207.0502


 53%|█████▎    | 138/261 [00:04<00:03, 31.56it/s]

Step 7,700, (N samples: 985,600), Loss: 1093.5007, (Recon: 1084.6780, KLD: 8.8227), Gradient norm: 134.4449


 91%|█████████ | 238/261 [00:07<00:00, 31.48it/s]

Step 7,800, (N samples: 998,400), Loss: 1037.4868, (Recon: 1028.6185, KLD: 8.8683), Gradient norm: 173.2502


100%|██████████| 261/261 [00:08<00:00, 31.28it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.12it/s]


====> Test set loss: 1074.9559, (BCE: 1066.2116, KLD: 8.7443)
Epoch 31/100


 28%|██▊       | 74/261 [00:02<00:05, 31.23it/s]

Step 7,900, (N samples: 1,011,200), Loss: 1118.9884, (Recon: 1110.1232, KLD: 8.8652), Gradient norm: 198.5859


 67%|██████▋   | 174/261 [00:05<00:02, 31.53it/s]

Step 8,000, (N samples: 1,024,000), Loss: 1035.0068, (Recon: 1026.4512, KLD: 8.5557), Gradient norm: 94.9239


100%|██████████| 261/261 [00:08<00:00, 31.29it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.59it/s]


====> Test set loss: 1107.4487, (BCE: 1096.9598, KLD: 10.4888)
Epoch 32/100


  5%|▌         | 14/261 [00:00<00:08, 29.39it/s]

Step 8,100, (N samples: 1,036,800), Loss: 1059.3860, (Recon: 1050.1331, KLD: 9.2529), Gradient norm: 233.0101


 44%|████▎     | 114/261 [00:03<00:04, 31.50it/s]

Step 8,200, (N samples: 1,049,600), Loss: 1061.2740, (Recon: 1052.1919, KLD: 9.0821), Gradient norm: 167.9744


 82%|████████▏ | 214/261 [00:06<00:01, 31.19it/s]

Step 8,300, (N samples: 1,062,400), Loss: 1067.4137, (Recon: 1058.5806, KLD: 8.8332), Gradient norm: 219.1262


100%|██████████| 261/261 [00:08<00:00, 31.21it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.94it/s]


====> Test set loss: 1074.8198, (BCE: 1066.0955, KLD: 8.7243)
Epoch 33/100


 21%|██        | 54/261 [00:01<00:06, 31.36it/s]

Step 8,400, (N samples: 1,075,200), Loss: 1088.9962, (Recon: 1079.7751, KLD: 9.2210), Gradient norm: 209.5831


 59%|█████▉    | 154/261 [00:04<00:03, 31.48it/s]

Step 8,500, (N samples: 1,088,000), Loss: 1053.6646, (Recon: 1044.7319, KLD: 8.9327), Gradient norm: 214.9158


 97%|█████████▋| 254/261 [00:08<00:00, 31.40it/s]

Step 8,600, (N samples: 1,100,800), Loss: 1118.3293, (Recon: 1109.2483, KLD: 9.0811), Gradient norm: 214.5871


100%|██████████| 261/261 [00:08<00:00, 31.19it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.35it/s]


====> Test set loss: 1073.3426, (BCE: 1064.4176, KLD: 8.9250)
Epoch 34/100


 36%|███▌      | 94/261 [00:03<00:05, 31.51it/s]

Step 8,700, (N samples: 1,113,600), Loss: 1113.6716, (Recon: 1104.7377, KLD: 8.9340), Gradient norm: 123.2053


 74%|███████▍  | 194/261 [00:06<00:02, 31.06it/s]

Step 8,800, (N samples: 1,126,400), Loss: 1025.1622, (Recon: 1016.4007, KLD: 8.7616), Gradient norm: 168.2626


100%|██████████| 261/261 [00:08<00:00, 31.13it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.23it/s]


====> Test set loss: 1073.2445, (BCE: 1064.3830, KLD: 8.8614)
Epoch 35/100


 11%|█▏        | 30/261 [00:01<00:07, 30.64it/s]

Step 8,900, (N samples: 1,139,200), Loss: 1029.7968, (Recon: 1021.0803, KLD: 8.7164), Gradient norm: 166.5217


 50%|████▉     | 130/261 [00:04<00:04, 31.36it/s]

Step 9,000, (N samples: 1,152,000), Loss: 1098.6366, (Recon: 1089.3320, KLD: 9.3045), Gradient norm: 199.1933


 88%|████████▊ | 230/261 [00:07<00:00, 31.44it/s]

Step 9,100, (N samples: 1,164,800), Loss: 1053.7069, (Recon: 1044.8088, KLD: 8.8981), Gradient norm: 134.9921


100%|██████████| 261/261 [00:08<00:00, 31.21it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.36it/s]


====> Test set loss: 1072.6797, (BCE: 1063.7748, KLD: 8.9049)
Epoch 36/100


 27%|██▋       | 70/261 [00:02<00:06, 31.58it/s]

Step 9,200, (N samples: 1,177,600), Loss: 1036.1221, (Recon: 1027.2451, KLD: 8.8769), Gradient norm: 153.8218


 65%|██████▌   | 170/261 [00:05<00:02, 31.38it/s]

Step 9,300, (N samples: 1,190,400), Loss: 1095.3282, (Recon: 1086.0542, KLD: 9.2740), Gradient norm: 219.1880


100%|██████████| 261/261 [00:08<00:00, 31.27it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.87it/s]


====> Test set loss: 1071.4338, (BCE: 1062.3796, KLD: 9.0542)
Epoch 37/100


  4%|▍         | 10/261 [00:00<00:08, 27.91it/s]

Step 9,400, (N samples: 1,203,200), Loss: 1054.5913, (Recon: 1045.5901, KLD: 9.0012), Gradient norm: 179.0856


 42%|████▏     | 110/261 [00:03<00:04, 31.36it/s]

Step 9,500, (N samples: 1,216,000), Loss: 1099.1421, (Recon: 1090.2886, KLD: 8.8536), Gradient norm: 149.1445


 80%|████████  | 209/261 [00:06<00:01, 30.62it/s]

Step 9,600, (N samples: 1,228,800), Loss: 1062.9987, (Recon: 1054.2059, KLD: 8.7927), Gradient norm: 149.5907


100%|██████████| 261/261 [00:08<00:00, 30.38it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.58it/s]


====> Test set loss: 1073.1065, (BCE: 1064.2895, KLD: 8.8170)
Epoch 38/100


 19%|█▉        | 50/261 [00:01<00:06, 31.04it/s]

Step 9,700, (N samples: 1,241,600), Loss: 1063.9205, (Recon: 1054.7802, KLD: 9.1404), Gradient norm: 160.4893


 57%|█████▋    | 149/261 [00:04<00:03, 29.80it/s]

Step 9,800, (N samples: 1,254,400), Loss: 1071.1671, (Recon: 1062.4326, KLD: 8.7345), Gradient norm: 104.4958


 95%|█████████▌| 249/261 [00:08<00:00, 31.27it/s]

Step 9,900, (N samples: 1,267,200), Loss: 1105.8696, (Recon: 1096.7207, KLD: 9.1490), Gradient norm: 130.2281


100%|██████████| 261/261 [00:08<00:00, 30.97it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.22it/s]


====> Test set loss: 1071.0133, (BCE: 1062.0869, KLD: 8.9264)
Epoch 39/100


 33%|███▎      | 86/261 [00:02<00:05, 31.45it/s]

Step 10,000, (N samples: 1,280,000), Loss: 1080.5525, (Recon: 1071.5538, KLD: 8.9987), Gradient norm: 149.2818


 71%|███████▏  | 186/261 [00:05<00:02, 30.96it/s]

Step 10,100, (N samples: 1,292,800), Loss: 1117.0852, (Recon: 1108.0381, KLD: 9.0472), Gradient norm: 235.1518


100%|██████████| 261/261 [00:08<00:00, 31.14it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.25it/s]


====> Test set loss: 1071.6432, (BCE: 1062.6273, KLD: 9.0159)
Epoch 40/100


 10%|▉         | 26/261 [00:00<00:07, 30.75it/s]

Step 10,200, (N samples: 1,305,600), Loss: 1057.8396, (Recon: 1049.2117, KLD: 8.6279), Gradient norm: 120.2996


 48%|████▊     | 126/261 [00:04<00:04, 31.29it/s]

Step 10,300, (N samples: 1,318,400), Loss: 1041.1121, (Recon: 1032.0305, KLD: 9.0815), Gradient norm: 179.5936


 87%|████████▋ | 226/261 [00:07<00:01, 31.48it/s]

Step 10,400, (N samples: 1,331,200), Loss: 1106.2954, (Recon: 1097.6174, KLD: 8.6780), Gradient norm: 179.4251


100%|██████████| 261/261 [00:08<00:00, 31.13it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.28it/s]


====> Test set loss: 1071.9611, (BCE: 1062.9431, KLD: 9.0180)
Epoch 41/100


 25%|██▌       | 66/261 [00:02<00:06, 30.86it/s]

Step 10,500, (N samples: 1,344,000), Loss: 1084.2131, (Recon: 1075.2815, KLD: 8.9317), Gradient norm: 211.4803


 64%|██████▍   | 167/261 [00:05<00:02, 31.43it/s]

Step 10,600, (N samples: 1,356,800), Loss: 1049.3403, (Recon: 1040.3767, KLD: 8.9636), Gradient norm: 155.9580


100%|██████████| 261/261 [00:08<00:00, 29.48it/s]


Step 10,700, (N samples: 1,369,600), Loss: 1082.3907, (Recon: 1073.5739, KLD: 8.8169), Gradient norm: 214.4733


Testing: 100%|██████████| 29/29 [00:00<00:00, 38.39it/s]


====> Test set loss: 1072.3486, (BCE: 1063.3369, KLD: 9.0117)
Epoch 42/100


 41%|████      | 106/261 [00:03<00:04, 31.38it/s]

Step 10,800, (N samples: 1,382,400), Loss: 1074.9124, (Recon: 1065.8619, KLD: 9.0504), Gradient norm: 153.4894


 79%|███████▉  | 206/261 [00:06<00:01, 31.57it/s]

Step 10,900, (N samples: 1,395,200), Loss: 1086.6499, (Recon: 1077.8131, KLD: 8.8368), Gradient norm: 221.0065


100%|██████████| 261/261 [00:08<00:00, 31.26it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.22it/s]


====> Test set loss: 1071.3339, (BCE: 1062.4675, KLD: 8.8664)
Epoch 43/100


 16%|█▌        | 42/261 [00:01<00:07, 31.28it/s]

Step 11,000, (N samples: 1,408,000), Loss: 1062.5845, (Recon: 1053.5341, KLD: 9.0504), Gradient norm: 336.6474


 54%|█████▍    | 142/261 [00:04<00:03, 31.53it/s]

Step 11,100, (N samples: 1,420,800), Loss: 1080.3380, (Recon: 1071.2161, KLD: 9.1220), Gradient norm: 178.5773


 93%|█████████▎| 242/261 [00:07<00:00, 31.48it/s]

Step 11,200, (N samples: 1,433,600), Loss: 1064.4117, (Recon: 1055.5703, KLD: 8.8415), Gradient norm: 149.8337


100%|██████████| 261/261 [00:08<00:00, 31.32it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.46it/s]


====> Test set loss: 1071.5709, (BCE: 1062.7923, KLD: 8.7786)
Epoch 44/100


 31%|███▏      | 82/261 [00:02<00:05, 31.43it/s]

Step 11,300, (N samples: 1,446,400), Loss: 1061.6002, (Recon: 1052.8044, KLD: 8.7957), Gradient norm: 159.8651


 70%|██████▉   | 182/261 [00:05<00:02, 31.44it/s]

Step 11,400, (N samples: 1,459,200), Loss: 1051.2928, (Recon: 1042.2290, KLD: 9.0639), Gradient norm: 148.6543


100%|██████████| 261/261 [00:08<00:00, 31.30it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.28it/s]


====> Test set loss: 1071.2862, (BCE: 1062.3637, KLD: 8.9225)
Epoch 45/100


  8%|▊         | 22/261 [00:00<00:08, 29.78it/s]

Step 11,500, (N samples: 1,472,000), Loss: 1071.3258, (Recon: 1062.1831, KLD: 9.1427), Gradient norm: 245.6937


 47%|████▋     | 122/261 [00:03<00:04, 31.15it/s]

Step 11,600, (N samples: 1,484,800), Loss: 1092.0398, (Recon: 1082.9783, KLD: 9.0616), Gradient norm: 170.6289


 85%|████████▌ | 222/261 [00:07<00:01, 31.43it/s]

Step 11,700, (N samples: 1,497,600), Loss: 1041.5243, (Recon: 1032.3247, KLD: 9.1996), Gradient norm: 207.8140


100%|██████████| 261/261 [00:08<00:00, 31.18it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.82it/s]


====> Test set loss: 1071.3855, (BCE: 1062.4920, KLD: 8.8936)
Epoch 46/100


 24%|██▍       | 62/261 [00:02<00:06, 31.08it/s]

Step 11,800, (N samples: 1,510,400), Loss: 1105.5908, (Recon: 1096.6309, KLD: 8.9599), Gradient norm: 243.3134


 62%|██████▏   | 162/261 [00:05<00:03, 31.35it/s]

Step 11,900, (N samples: 1,523,200), Loss: 1114.3364, (Recon: 1105.2733, KLD: 9.0631), Gradient norm: 299.5562


100%|██████████| 261/261 [00:08<00:00, 31.22it/s]


Step 12,000, (N samples: 1,536,000), Loss: 1012.6628, (Recon: 1003.9020, KLD: 8.7608), Gradient norm: 132.7134


Testing: 100%|██████████| 29/29 [00:00<00:00, 38.03it/s]


====> Test set loss: 1070.8443, (BCE: 1061.6966, KLD: 9.1477)
Epoch 47/100


 38%|███▊      | 98/261 [00:03<00:05, 31.25it/s]

Step 12,100, (N samples: 1,548,800), Loss: 1055.8795, (Recon: 1046.9122, KLD: 8.9673), Gradient norm: 233.2383


 76%|███████▌  | 198/261 [00:06<00:02, 31.42it/s]

Step 12,200, (N samples: 1,561,600), Loss: 1015.7254, (Recon: 1006.5616, KLD: 9.1638), Gradient norm: 308.6812


100%|██████████| 261/261 [00:08<00:00, 31.23it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.14it/s]


====> Test set loss: 1071.8799, (BCE: 1063.0262, KLD: 8.8537)
Epoch 48/100


 15%|█▍        | 38/261 [00:01<00:07, 31.45it/s]

Step 12,300, (N samples: 1,574,400), Loss: 1067.4574, (Recon: 1058.8062, KLD: 8.6512), Gradient norm: 165.0149


 53%|█████▎    | 138/261 [00:04<00:03, 31.39it/s]

Step 12,400, (N samples: 1,587,200), Loss: 1034.2133, (Recon: 1025.2596, KLD: 8.9536), Gradient norm: 139.7691


 91%|█████████ | 238/261 [00:07<00:00, 31.51it/s]

Step 12,500, (N samples: 1,600,000), Loss: 1062.3953, (Recon: 1053.7056, KLD: 8.6897), Gradient norm: 214.5389


100%|██████████| 261/261 [00:08<00:00, 31.30it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.74it/s]


====> Test set loss: 1069.4216, (BCE: 1060.4722, KLD: 8.9493)
Epoch 49/100


 30%|██▉       | 78/261 [00:02<00:05, 31.51it/s]

Step 12,600, (N samples: 1,612,800), Loss: 1104.0009, (Recon: 1094.7533, KLD: 9.2476), Gradient norm: 192.3011


 68%|██████▊   | 178/261 [00:05<00:02, 31.29it/s]

Step 12,700, (N samples: 1,625,600), Loss: 1030.5590, (Recon: 1021.5864, KLD: 8.9727), Gradient norm: 121.8728


100%|██████████| 261/261 [00:08<00:00, 31.28it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.04it/s]


====> Test set loss: 1070.4624, (BCE: 1061.4566, KLD: 9.0058)
Epoch 50/100


  7%|▋         | 18/261 [00:00<00:08, 30.17it/s]

Step 12,800, (N samples: 1,638,400), Loss: 1051.3407, (Recon: 1042.5424, KLD: 8.7983), Gradient norm: 182.0860


 45%|████▌     | 118/261 [00:03<00:04, 31.40it/s]

Step 12,900, (N samples: 1,651,200), Loss: 1127.0150, (Recon: 1117.7053, KLD: 9.3096), Gradient norm: 214.1032


 84%|████████▎ | 218/261 [00:06<00:01, 31.36it/s]

Step 13,000, (N samples: 1,664,000), Loss: 1065.5532, (Recon: 1056.0618, KLD: 9.4914), Gradient norm: 185.6852


100%|██████████| 261/261 [00:08<00:00, 31.26it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.01it/s]


====> Test set loss: 1070.5706, (BCE: 1061.4958, KLD: 9.0748)
Epoch 51/100


 21%|██        | 54/261 [00:01<00:06, 31.56it/s]

Step 13,100, (N samples: 1,676,800), Loss: 1079.9207, (Recon: 1070.8152, KLD: 9.1055), Gradient norm: 161.9404


 59%|█████▉    | 154/261 [00:04<00:03, 31.77it/s]

Step 13,200, (N samples: 1,689,600), Loss: 1080.6163, (Recon: 1071.7014, KLD: 8.9150), Gradient norm: 174.2395


 97%|█████████▋| 254/261 [00:08<00:00, 31.65it/s]

Step 13,300, (N samples: 1,702,400), Loss: 1093.9315, (Recon: 1083.9531, KLD: 9.9784), Gradient norm: 259.3858


100%|██████████| 261/261 [00:08<00:00, 31.42it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.43it/s]


====> Test set loss: 1070.8390, (BCE: 1061.9524, KLD: 8.8866)
Epoch 52/100


 36%|███▌      | 94/261 [00:03<00:05, 31.38it/s]

Step 13,400, (N samples: 1,715,200), Loss: 1078.2576, (Recon: 1069.4250, KLD: 8.8326), Gradient norm: 180.4454


 74%|███████▍  | 194/261 [00:06<00:02, 31.51it/s]

Step 13,500, (N samples: 1,728,000), Loss: 1041.1298, (Recon: 1031.9686, KLD: 9.1611), Gradient norm: 149.7320


100%|██████████| 261/261 [00:08<00:00, 31.25it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.38it/s]


====> Test set loss: 1069.4526, (BCE: 1060.3122, KLD: 9.1404)
Epoch 53/100


 13%|█▎        | 34/261 [00:01<00:07, 31.13it/s]

Step 13,600, (N samples: 1,740,800), Loss: 1074.2219, (Recon: 1064.9609, KLD: 9.2610), Gradient norm: 356.6015


 51%|█████▏    | 134/261 [00:04<00:04, 31.55it/s]

Step 13,700, (N samples: 1,753,600), Loss: 1033.6113, (Recon: 1024.4658, KLD: 9.1455), Gradient norm: 138.7329


 90%|████████▉ | 234/261 [00:07<00:00, 31.44it/s]

Step 13,800, (N samples: 1,766,400), Loss: 1064.1631, (Recon: 1054.9287, KLD: 9.2344), Gradient norm: 217.2176


100%|██████████| 261/261 [00:08<00:00, 31.22it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.75it/s]


====> Test set loss: 1070.5515, (BCE: 1061.5393, KLD: 9.0122)
Epoch 54/100


 28%|██▊       | 74/261 [00:02<00:05, 31.27it/s]

Step 13,900, (N samples: 1,779,200), Loss: 1068.3827, (Recon: 1059.0435, KLD: 9.3393), Gradient norm: 139.2760


 65%|██████▌   | 170/261 [00:05<00:03, 27.97it/s]

Step 14,000, (N samples: 1,792,000), Loss: 1017.8892, (Recon: 1008.6530, KLD: 9.2363), Gradient norm: 255.4413


100%|██████████| 261/261 [00:08<00:00, 29.39it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.81it/s]


====> Test set loss: 1070.5168, (BCE: 1061.3538, KLD: 9.1630)
Epoch 55/100


  4%|▍         | 10/261 [00:00<00:08, 27.90it/s]

Step 14,100, (N samples: 1,804,800), Loss: 1086.4250, (Recon: 1077.0881, KLD: 9.3369), Gradient norm: 158.3594


 42%|████▏     | 110/261 [00:03<00:04, 31.35it/s]

Step 14,200, (N samples: 1,817,600), Loss: 1074.2421, (Recon: 1065.0422, KLD: 9.1998), Gradient norm: 213.8213


 80%|████████  | 210/261 [00:06<00:01, 31.47it/s]

Step 14,300, (N samples: 1,830,400), Loss: 1070.6367, (Recon: 1061.2537, KLD: 9.3830), Gradient norm: 237.9184


100%|██████████| 261/261 [00:08<00:00, 31.25it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.67it/s]


====> Test set loss: 1070.2586, (BCE: 1060.9050, KLD: 9.3537)
Epoch 56/100


 19%|█▉        | 50/261 [00:01<00:06, 31.56it/s]

Step 14,400, (N samples: 1,843,200), Loss: 1046.7305, (Recon: 1037.3256, KLD: 9.4049), Gradient norm: 140.6837


 57%|█████▋    | 150/261 [00:04<00:03, 31.44it/s]

Step 14,500, (N samples: 1,856,000), Loss: 997.4464, (Recon: 988.5430, KLD: 8.9035), Gradient norm: 460.1921


 96%|█████████▌| 250/261 [00:07<00:00, 31.55it/s]

Step 14,600, (N samples: 1,868,800), Loss: 1109.0135, (Recon: 1099.9788, KLD: 9.0347), Gradient norm: 317.5809


100%|██████████| 261/261 [00:08<00:00, 31.40it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.41it/s]


====> Test set loss: 1069.2650, (BCE: 1060.1234, KLD: 9.1416)
Epoch 57/100


 34%|███▍      | 90/261 [00:02<00:05, 31.51it/s]

Step 14,700, (N samples: 1,881,600), Loss: 1082.6010, (Recon: 1073.1643, KLD: 9.4367), Gradient norm: 182.0049


 73%|███████▎  | 190/261 [00:06<00:02, 31.10it/s]

Step 14,800, (N samples: 1,894,400), Loss: 1029.6229, (Recon: 1020.7715, KLD: 8.8514), Gradient norm: 165.8395


100%|██████████| 261/261 [00:08<00:00, 31.32it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.34it/s]


====> Test set loss: 1070.9858, (BCE: 1061.8286, KLD: 9.1572)
Epoch 58/100


 11%|█▏        | 30/261 [00:01<00:07, 30.98it/s]

Step 14,900, (N samples: 1,907,200), Loss: 1039.6942, (Recon: 1030.6875, KLD: 9.0067), Gradient norm: 130.9507


 50%|████▉     | 130/261 [00:04<00:04, 31.40it/s]

Step 15,000, (N samples: 1,920,000), Loss: 1074.1719, (Recon: 1065.0930, KLD: 9.0789), Gradient norm: 138.0458


 88%|████████▊ | 230/261 [00:07<00:00, 31.19it/s]

Step 15,100, (N samples: 1,932,800), Loss: 1040.7311, (Recon: 1031.2610, KLD: 9.4701), Gradient norm: 180.8780


100%|██████████| 261/261 [00:08<00:00, 31.22it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.53it/s]


====> Test set loss: 1071.8564, (BCE: 1062.6674, KLD: 9.1891)
Epoch 59/100


 25%|██▌       | 66/261 [00:02<00:06, 30.80it/s]

Step 15,200, (N samples: 1,945,600), Loss: 1031.2922, (Recon: 1022.4740, KLD: 8.8183), Gradient norm: 214.3222


 64%|██████▎   | 166/261 [00:05<00:03, 31.45it/s]

Step 15,300, (N samples: 1,958,400), Loss: 1102.4812, (Recon: 1092.8579, KLD: 9.6233), Gradient norm: 253.6626


100%|██████████| 261/261 [00:08<00:00, 31.19it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.29it/s]


====> Test set loss: 1069.5784, (BCE: 1060.4159, KLD: 9.1625)
Epoch 60/100


  1%|          | 3/261 [00:00<00:11, 22.73it/s]

Step 15,400, (N samples: 1,971,200), Loss: 1054.8262, (Recon: 1045.6403, KLD: 9.1859), Gradient norm: 253.6879


 41%|████      | 106/261 [00:03<00:04, 31.66it/s]

Step 15,500, (N samples: 1,984,000), Loss: 1056.7571, (Recon: 1047.4620, KLD: 9.2951), Gradient norm: 202.2869


 79%|███████▉  | 206/261 [00:06<00:01, 31.83it/s]

Step 15,600, (N samples: 1,996,800), Loss: 1067.1071, (Recon: 1057.7100, KLD: 9.3971), Gradient norm: 131.0557


100%|██████████| 261/261 [00:08<00:00, 31.46it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.99it/s]


====> Test set loss: 1069.1941, (BCE: 1059.8682, KLD: 9.3259)
Epoch 61/100


 17%|█▋        | 45/261 [00:01<00:07, 30.10it/s]

Step 15,700, (N samples: 2,009,600), Loss: 1042.1442, (Recon: 1033.1975, KLD: 8.9466), Gradient norm: 181.5280


 56%|█████▌    | 145/261 [00:04<00:03, 31.13it/s]

Step 15,800, (N samples: 2,022,400), Loss: 1057.6559, (Recon: 1048.2188, KLD: 9.4372), Gradient norm: 260.7413


 94%|█████████▍| 245/261 [00:08<00:00, 31.59it/s]

Step 15,900, (N samples: 2,035,200), Loss: 1072.8610, (Recon: 1063.8646, KLD: 8.9963), Gradient norm: 183.5808


100%|██████████| 261/261 [00:08<00:00, 30.44it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.95it/s]


====> Test set loss: 1070.7943, (BCE: 1061.6700, KLD: 9.1243)
Epoch 62/100


 33%|███▎      | 85/261 [00:02<00:05, 31.42it/s]

Step 16,000, (N samples: 2,048,000), Loss: 1018.1015, (Recon: 1008.6879, KLD: 9.4136), Gradient norm: 249.9329


 71%|███████   | 185/261 [00:05<00:02, 30.97it/s]

Step 16,100, (N samples: 2,060,800), Loss: 1064.2113, (Recon: 1054.6277, KLD: 9.5836), Gradient norm: 145.0670


100%|██████████| 261/261 [00:08<00:00, 31.22it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.00it/s]


====> Test set loss: 1068.5817, (BCE: 1059.4529, KLD: 9.1288)
Epoch 63/100


  8%|▊         | 22/261 [00:00<00:07, 30.57it/s]

Step 16,200, (N samples: 2,073,600), Loss: 1047.5256, (Recon: 1038.4849, KLD: 9.0407), Gradient norm: 177.4379


 47%|████▋     | 122/261 [00:03<00:04, 31.44it/s]

Step 16,300, (N samples: 2,086,400), Loss: 1088.3370, (Recon: 1078.5205, KLD: 9.8165), Gradient norm: 131.9844


 85%|████████▌ | 222/261 [00:07<00:01, 31.09it/s]

Step 16,400, (N samples: 2,099,200), Loss: 1124.7511, (Recon: 1115.4680, KLD: 9.2831), Gradient norm: 218.9980


100%|██████████| 261/261 [00:08<00:00, 31.19it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.23it/s]


====> Test set loss: 1068.4394, (BCE: 1059.0845, KLD: 9.3549)
Epoch 64/100


 24%|██▍       | 62/261 [00:02<00:06, 31.55it/s]

Step 16,500, (N samples: 2,112,000), Loss: 1074.5692, (Recon: 1065.4298, KLD: 9.1394), Gradient norm: 384.5443


 62%|██████▏   | 162/261 [00:05<00:03, 31.48it/s]

Step 16,600, (N samples: 2,124,800), Loss: 1073.5613, (Recon: 1064.4869, KLD: 9.0743), Gradient norm: 242.8929


100%|██████████| 261/261 [00:08<00:00, 31.15it/s]


Step 16,700, (N samples: 2,137,600), Loss: 1038.5427, (Recon: 1029.5840, KLD: 8.9588), Gradient norm: 139.6359


Testing: 100%|██████████| 29/29 [00:00<00:00, 37.88it/s]


====> Test set loss: 1068.6538, (BCE: 1059.4539, KLD: 9.1999)
Epoch 65/100


 39%|███▉      | 102/261 [00:03<00:05, 30.87it/s]

Step 16,800, (N samples: 2,150,400), Loss: 1054.9205, (Recon: 1045.5189, KLD: 9.4016), Gradient norm: 162.0762


 77%|███████▋  | 202/261 [00:06<00:01, 31.39it/s]

Step 16,900, (N samples: 2,163,200), Loss: 1130.5638, (Recon: 1121.1448, KLD: 9.4191), Gradient norm: 300.5985


100%|██████████| 261/261 [00:08<00:00, 31.11it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.96it/s]


====> Test set loss: 1067.8930, (BCE: 1058.7168, KLD: 9.1762)
Epoch 66/100


 16%|█▌        | 42/261 [00:01<00:06, 31.41it/s]

Step 17,000, (N samples: 2,176,000), Loss: 1048.8000, (Recon: 1039.6035, KLD: 9.1965), Gradient norm: 163.1243


 54%|█████▍    | 142/261 [00:04<00:03, 31.04it/s]

Step 17,100, (N samples: 2,188,800), Loss: 1067.9926, (Recon: 1058.5645, KLD: 9.4281), Gradient norm: 162.5352


 93%|█████████▎| 242/261 [00:07<00:00, 31.41it/s]

Step 17,200, (N samples: 2,201,600), Loss: 1054.2797, (Recon: 1045.0334, KLD: 9.2463), Gradient norm: 253.9695


100%|██████████| 261/261 [00:08<00:00, 31.26it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.62it/s]


====> Test set loss: 1067.5312, (BCE: 1058.3671, KLD: 9.1641)
Epoch 67/100


 30%|██▉       | 78/261 [00:02<00:05, 31.19it/s]

Step 17,300, (N samples: 2,214,400), Loss: 1044.4720, (Recon: 1034.9498, KLD: 9.5222), Gradient norm: 260.2538


 68%|██████▊   | 178/261 [00:05<00:02, 30.79it/s]

Step 17,400, (N samples: 2,227,200), Loss: 1077.3246, (Recon: 1067.5522, KLD: 9.7724), Gradient norm: 126.7486


100%|██████████| 261/261 [00:08<00:00, 30.97it/s]
Testing: 100%|██████████| 29/29 [00:01<00:00, 25.62it/s]


====> Test set loss: 1068.8818, (BCE: 1059.5204, KLD: 9.3614)
Epoch 68/100


  7%|▋         | 17/261 [00:00<00:08, 28.41it/s]

Step 17,500, (N samples: 2,240,000), Loss: 1044.9728, (Recon: 1035.4084, KLD: 9.5644), Gradient norm: 194.9265


 45%|████▍     | 117/261 [00:03<00:04, 30.89it/s]

Step 17,600, (N samples: 2,252,800), Loss: 1021.9785, (Recon: 1012.8612, KLD: 9.1173), Gradient norm: 168.6702


 83%|████████▎ | 217/261 [00:07<00:01, 30.78it/s]

Step 17,700, (N samples: 2,265,600), Loss: 1079.4121, (Recon: 1070.0239, KLD: 9.3882), Gradient norm: 293.5509


100%|██████████| 261/261 [00:08<00:00, 30.60it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.62it/s]


====> Test set loss: 1069.4059, (BCE: 1060.2122, KLD: 9.1937)
Epoch 69/100


 22%|██▏       | 58/261 [00:01<00:06, 31.35it/s]

Step 17,800, (N samples: 2,278,400), Loss: 1054.2921, (Recon: 1044.6344, KLD: 9.6577), Gradient norm: 284.0092


 61%|██████    | 158/261 [00:05<00:03, 31.31it/s]

Step 17,900, (N samples: 2,291,200), Loss: 1066.7430, (Recon: 1057.3962, KLD: 9.3468), Gradient norm: 192.2247


 99%|█████████▉| 258/261 [00:08<00:00, 31.38it/s]

Step 18,000, (N samples: 2,304,000), Loss: 1054.4825, (Recon: 1045.3611, KLD: 9.1215), Gradient norm: 295.3014


100%|██████████| 261/261 [00:08<00:00, 31.17it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.99it/s]


====> Test set loss: 1067.9147, (BCE: 1058.6810, KLD: 9.2337)
Epoch 70/100


 38%|███▊      | 98/261 [00:03<00:05, 31.36it/s]

Step 18,100, (N samples: 2,316,800), Loss: 1035.9443, (Recon: 1026.7314, KLD: 9.2129), Gradient norm: 167.7050


 76%|███████▌  | 198/261 [00:06<00:02, 31.45it/s]

Step 18,200, (N samples: 2,329,600), Loss: 1007.5361, (Recon: 998.6040, KLD: 8.9320), Gradient norm: 225.0410


100%|██████████| 261/261 [00:08<00:00, 31.18it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.92it/s]


====> Test set loss: 1067.7145, (BCE: 1058.2773, KLD: 9.4372)
Epoch 71/100


 13%|█▎        | 34/261 [00:01<00:07, 31.43it/s]

Step 18,300, (N samples: 2,342,400), Loss: 1099.1266, (Recon: 1089.8607, KLD: 9.2659), Gradient norm: 133.4936


 51%|█████▏    | 134/261 [00:04<00:04, 31.53it/s]

Step 18,400, (N samples: 2,355,200), Loss: 1050.9584, (Recon: 1041.1272, KLD: 9.8312), Gradient norm: 231.8824


 90%|████████▉ | 234/261 [00:07<00:00, 31.44it/s]

Step 18,500, (N samples: 2,368,000), Loss: 1026.9778, (Recon: 1017.9348, KLD: 9.0429), Gradient norm: 139.1944


100%|██████████| 261/261 [00:08<00:00, 31.12it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.46it/s]


====> Test set loss: 1066.9871, (BCE: 1057.6575, KLD: 9.3296)
Epoch 72/100


 28%|██▊       | 74/261 [00:02<00:06, 29.51it/s]

Step 18,600, (N samples: 2,380,800), Loss: 1028.4117, (Recon: 1019.0987, KLD: 9.3131), Gradient norm: 213.9840


 67%|██████▋   | 176/261 [00:05<00:02, 30.71it/s]

Step 18,700, (N samples: 2,393,600), Loss: 1097.8967, (Recon: 1088.0773, KLD: 9.8195), Gradient norm: 234.9558


100%|██████████| 261/261 [00:08<00:00, 30.42it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.45it/s]


====> Test set loss: 1067.1703, (BCE: 1057.8204, KLD: 9.3499)
Epoch 73/100


  5%|▌         | 14/261 [00:00<00:08, 29.05it/s]

Step 18,800, (N samples: 2,406,400), Loss: 1061.9172, (Recon: 1052.5354, KLD: 9.3819), Gradient norm: 132.1034


 43%|████▎     | 112/261 [00:03<00:05, 29.68it/s]

Step 18,900, (N samples: 2,419,200), Loss: 1045.2057, (Recon: 1036.2057, KLD: 8.9999), Gradient norm: 110.3025


 82%|████████▏ | 213/261 [00:07<00:01, 31.34it/s]

Step 19,000, (N samples: 2,432,000), Loss: 1052.9005, (Recon: 1043.9004, KLD: 9.0001), Gradient norm: 210.4558


100%|██████████| 261/261 [00:08<00:00, 30.38it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.42it/s]


====> Test set loss: 1067.8706, (BCE: 1058.6989, KLD: 9.1717)
Epoch 74/100


 21%|██        | 54/261 [00:01<00:06, 31.17it/s]

Step 19,100, (N samples: 2,444,800), Loss: 1024.4725, (Recon: 1014.8340, KLD: 9.6384), Gradient norm: 247.3155


 59%|█████▉    | 154/261 [00:04<00:03, 30.35it/s]

Step 19,200, (N samples: 2,457,600), Loss: 1022.7223, (Recon: 1013.3409, KLD: 9.3813), Gradient norm: 142.4637


 97%|█████████▋| 254/261 [00:08<00:00, 31.47it/s]

Step 19,300, (N samples: 2,470,400), Loss: 1074.9755, (Recon: 1065.4048, KLD: 9.5706), Gradient norm: 209.5877


100%|██████████| 261/261 [00:08<00:00, 31.08it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.16it/s]


====> Test set loss: 1066.6413, (BCE: 1057.2952, KLD: 9.3461)
Epoch 75/100


 34%|███▍      | 90/261 [00:02<00:05, 31.05it/s]

Step 19,400, (N samples: 2,483,200), Loss: 1029.2356, (Recon: 1020.2147, KLD: 9.0209), Gradient norm: 100.5970


 73%|███████▎  | 190/261 [00:06<00:02, 31.22it/s]

Step 19,500, (N samples: 2,496,000), Loss: 1072.7938, (Recon: 1063.2903, KLD: 9.5035), Gradient norm: 308.2255


100%|██████████| 261/261 [00:08<00:00, 30.79it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 35.87it/s]


====> Test set loss: 1066.8627, (BCE: 1057.5477, KLD: 9.3150)
Epoch 76/100


 11%|█▏        | 30/261 [00:01<00:07, 29.10it/s]

Step 19,600, (N samples: 2,508,800), Loss: 1113.6442, (Recon: 1104.0603, KLD: 9.5839), Gradient norm: 183.1640


 49%|████▉     | 129/261 [00:04<00:04, 29.32it/s]

Step 19,700, (N samples: 2,521,600), Loss: 1066.1240, (Recon: 1056.7310, KLD: 9.3930), Gradient norm: 198.5414


 89%|████████▊ | 231/261 [00:07<00:01, 29.24it/s]

Step 19,800, (N samples: 2,534,400), Loss: 1084.8513, (Recon: 1075.3855, KLD: 9.4658), Gradient norm: 149.6220


100%|██████████| 261/261 [00:08<00:00, 29.21it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 36.22it/s]


====> Test set loss: 1067.8662, (BCE: 1058.4127, KLD: 9.4535)
Epoch 77/100


 27%|██▋       | 70/261 [00:02<00:06, 29.83it/s]

Step 19,900, (N samples: 2,547,200), Loss: 1058.9858, (Recon: 1049.1544, KLD: 9.8314), Gradient norm: 213.2110


 64%|██████▍   | 168/261 [00:05<00:03, 30.75it/s]

Step 20,000, (N samples: 2,560,000), Loss: 1055.4359, (Recon: 1045.9586, KLD: 9.4772), Gradient norm: 182.6432


100%|██████████| 261/261 [00:08<00:00, 30.03it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.70it/s]


====> Test set loss: 1066.8034, (BCE: 1057.2269, KLD: 9.5766)
Epoch 78/100


  1%|          | 3/261 [00:00<00:11, 22.43it/s]

Step 20,100, (N samples: 2,572,800), Loss: 1057.3495, (Recon: 1047.9098, KLD: 9.4397), Gradient norm: 216.2008


 42%|████▏     | 110/261 [00:03<00:04, 30.98it/s]

Step 20,200, (N samples: 2,585,600), Loss: 1052.7524, (Recon: 1043.4077, KLD: 9.3447), Gradient norm: 251.0658


 80%|████████  | 210/261 [00:06<00:01, 30.95it/s]

Step 20,300, (N samples: 2,598,400), Loss: 1035.8558, (Recon: 1026.5972, KLD: 9.2587), Gradient norm: 161.2698


100%|██████████| 261/261 [00:08<00:00, 30.81it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.80it/s]


====> Test set loss: 1065.9717, (BCE: 1056.5080, KLD: 9.4637)
Epoch 79/100


 18%|█▊        | 46/261 [00:01<00:06, 31.20it/s]

Step 20,400, (N samples: 2,611,200), Loss: 1093.0662, (Recon: 1083.1199, KLD: 9.9463), Gradient norm: 193.3128


 56%|█████▌    | 146/261 [00:04<00:03, 31.31it/s]

Step 20,500, (N samples: 2,624,000), Loss: 1038.2990, (Recon: 1029.2330, KLD: 9.0659), Gradient norm: 233.4300


 93%|█████████▎| 242/261 [00:07<00:00, 31.08it/s]

Step 20,600, (N samples: 2,636,800), Loss: 1092.3763, (Recon: 1082.4607, KLD: 9.9157), Gradient norm: 229.0868


100%|██████████| 261/261 [00:08<00:00, 31.02it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.21it/s]


====> Test set loss: 1067.2561, (BCE: 1057.9289, KLD: 9.3271)
Epoch 80/100


 33%|███▎      | 86/261 [00:02<00:05, 31.36it/s]

Step 20,700, (N samples: 2,649,600), Loss: 1002.0297, (Recon: 992.6617, KLD: 9.3679), Gradient norm: 145.4687


 71%|███████▏  | 186/261 [00:05<00:02, 31.29it/s]

Step 20,800, (N samples: 2,662,400), Loss: 1039.1217, (Recon: 1029.4838, KLD: 9.6379), Gradient norm: 184.5368


100%|██████████| 261/261 [00:08<00:00, 31.13it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.55it/s]


====> Test set loss: 1066.8787, (BCE: 1057.3375, KLD: 9.5413)
Epoch 81/100


  9%|▉         | 23/261 [00:01<00:12, 18.56it/s]

Step 20,900, (N samples: 2,675,200), Loss: 1087.7510, (Recon: 1078.3324, KLD: 9.4186), Gradient norm: 187.3658


 48%|████▊     | 124/261 [00:04<00:04, 31.32it/s]

Step 21,000, (N samples: 2,688,000), Loss: 1062.7346, (Recon: 1052.7288, KLD: 10.0059), Gradient norm: 153.3689


 86%|████████▌ | 224/261 [00:07<00:01, 31.37it/s]

Step 21,100, (N samples: 2,700,800), Loss: 1022.8179, (Recon: 1013.6932, KLD: 9.1247), Gradient norm: 234.7427


100%|██████████| 261/261 [00:08<00:00, 29.32it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.10it/s]


====> Test set loss: 1066.4093, (BCE: 1056.9072, KLD: 9.5021)
Epoch 82/100


 25%|██▌       | 66/261 [00:02<00:06, 31.32it/s]

Step 21,200, (N samples: 2,713,600), Loss: 1075.4545, (Recon: 1065.6660, KLD: 9.7885), Gradient norm: 140.1308


 64%|██████▎   | 166/261 [00:05<00:03, 31.42it/s]

Step 21,300, (N samples: 2,726,400), Loss: 1108.6006, (Recon: 1098.4810, KLD: 10.1197), Gradient norm: 176.2182


100%|██████████| 261/261 [00:08<00:00, 31.02it/s]


Step 21,400, (N samples: 2,739,200), Loss: 1049.2458, (Recon: 1039.9370, KLD: 9.3089), Gradient norm: 154.5305


Testing: 100%|██████████| 29/29 [00:00<00:00, 38.35it/s]


====> Test set loss: 1065.0874, (BCE: 1055.7724, KLD: 9.3149)
Epoch 83/100


 39%|███▉      | 102/261 [00:03<00:05, 31.54it/s]

Step 21,500, (N samples: 2,752,000), Loss: 1083.8081, (Recon: 1074.1733, KLD: 9.6348), Gradient norm: 185.0691


 77%|███████▋  | 202/261 [00:06<00:01, 31.40it/s]

Step 21,600, (N samples: 2,764,800), Loss: 1040.1774, (Recon: 1031.0868, KLD: 9.0906), Gradient norm: 452.1056


100%|██████████| 261/261 [00:08<00:00, 31.19it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.23it/s]


====> Test set loss: 1066.2831, (BCE: 1056.7126, KLD: 9.5704)
Epoch 84/100


 16%|█▌        | 42/261 [00:01<00:07, 31.28it/s]

Step 21,700, (N samples: 2,777,600), Loss: 1062.4625, (Recon: 1052.8829, KLD: 9.5796), Gradient norm: 195.2305


 54%|█████▍    | 142/261 [00:04<00:03, 31.41it/s]

Step 21,800, (N samples: 2,790,400), Loss: 1058.5287, (Recon: 1049.1812, KLD: 9.3476), Gradient norm: 307.4223


 93%|█████████▎| 242/261 [00:07<00:00, 31.21it/s]

Step 21,900, (N samples: 2,803,200), Loss: 1035.6031, (Recon: 1026.6160, KLD: 8.9872), Gradient norm: 173.3415


100%|██████████| 261/261 [00:08<00:00, 30.48it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.06it/s]


====> Test set loss: 1065.7677, (BCE: 1056.2717, KLD: 9.4960)
Epoch 85/100


 31%|███▏      | 82/261 [00:02<00:05, 31.79it/s]

Step 22,000, (N samples: 2,816,000), Loss: 1037.7642, (Recon: 1028.5380, KLD: 9.2262), Gradient norm: 185.2268


 70%|██████▉   | 182/261 [00:05<00:02, 31.04it/s]

Step 22,100, (N samples: 2,828,800), Loss: 1067.0277, (Recon: 1057.6453, KLD: 9.3825), Gradient norm: 249.6347


100%|██████████| 261/261 [00:08<00:00, 31.46it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.66it/s]


====> Test set loss: 1065.9057, (BCE: 1056.3448, KLD: 9.5609)
Epoch 86/100


  8%|▊         | 21/261 [00:00<00:07, 30.19it/s]

Step 22,200, (N samples: 2,841,600), Loss: 1050.8418, (Recon: 1041.2444, KLD: 9.5974), Gradient norm: 150.6844


 46%|████▋     | 121/261 [00:03<00:04, 31.48it/s]

Step 22,300, (N samples: 2,854,400), Loss: 1027.3197, (Recon: 1017.9136, KLD: 9.4061), Gradient norm: 190.4625


 85%|████████▍ | 221/261 [00:07<00:01, 31.47it/s]

Step 22,400, (N samples: 2,867,200), Loss: 1039.7129, (Recon: 1029.9760, KLD: 9.7369), Gradient norm: 247.9904


100%|██████████| 261/261 [00:08<00:00, 31.09it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.35it/s]


====> Test set loss: 1066.3003, (BCE: 1056.9088, KLD: 9.3915)
Epoch 87/100


 23%|██▎       | 61/261 [00:02<00:06, 31.48it/s]

Step 22,500, (N samples: 2,880,000), Loss: 1034.9893, (Recon: 1025.4216, KLD: 9.5676), Gradient norm: 394.2894


 62%|██████▏   | 161/261 [00:05<00:03, 31.40it/s]

Step 22,600, (N samples: 2,892,800), Loss: 1100.2441, (Recon: 1090.5939, KLD: 9.6502), Gradient norm: 159.9343


100%|██████████| 261/261 [00:08<00:00, 29.40it/s]


Step 22,700, (N samples: 2,905,600), Loss: 1039.7020, (Recon: 1030.3298, KLD: 9.3723), Gradient norm: 194.6890


Testing: 100%|██████████| 29/29 [00:00<00:00, 37.84it/s]


====> Test set loss: 1066.4527, (BCE: 1057.2085, KLD: 9.2442)
Epoch 88/100


 38%|███▊      | 98/261 [00:03<00:05, 31.47it/s]

Step 22,800, (N samples: 2,918,400), Loss: 1075.0419, (Recon: 1065.2275, KLD: 9.8143), Gradient norm: 253.4802


 76%|███████▌  | 198/261 [00:06<00:02, 31.35it/s]

Step 22,900, (N samples: 2,931,200), Loss: 1021.9479, (Recon: 1012.3274, KLD: 9.6205), Gradient norm: 223.8285


100%|██████████| 261/261 [00:08<00:00, 31.06it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.92it/s]


====> Test set loss: 1065.8099, (BCE: 1056.4726, KLD: 9.3373)
Epoch 89/100


 15%|█▍        | 38/261 [00:01<00:07, 31.27it/s]

Step 23,000, (N samples: 2,944,000), Loss: 1079.8339, (Recon: 1070.3838, KLD: 9.4501), Gradient norm: 120.1865


 53%|█████▎    | 138/261 [00:04<00:03, 31.35it/s]

Step 23,100, (N samples: 2,956,800), Loss: 1062.4662, (Recon: 1052.7783, KLD: 9.6879), Gradient norm: 151.9714


 91%|█████████ | 238/261 [00:07<00:00, 31.40it/s]

Step 23,200, (N samples: 2,969,600), Loss: 1121.6824, (Recon: 1112.2985, KLD: 9.3839), Gradient norm: 183.5584


100%|██████████| 261/261 [00:08<00:00, 31.26it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 31.83it/s]


====> Test set loss: 1065.7209, (BCE: 1056.2123, KLD: 9.5086)
Epoch 90/100


 30%|██▉       | 78/261 [00:02<00:05, 31.69it/s]

Step 23,300, (N samples: 2,982,400), Loss: 1034.2646, (Recon: 1024.8035, KLD: 9.4612), Gradient norm: 187.1985


 67%|██████▋   | 174/261 [00:05<00:02, 31.50it/s]

Step 23,400, (N samples: 2,995,200), Loss: 1124.7010, (Recon: 1114.7927, KLD: 9.9083), Gradient norm: 378.1465


100%|██████████| 261/261 [00:08<00:00, 31.33it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.34it/s]


====> Test set loss: 1065.8329, (BCE: 1056.3777, KLD: 9.4552)
Epoch 91/100


  5%|▌         | 14/261 [00:00<00:08, 29.39it/s]

Step 23,500, (N samples: 3,008,000), Loss: 1096.2639, (Recon: 1086.5701, KLD: 9.6939), Gradient norm: 213.7633


 44%|████▎     | 114/261 [00:03<00:04, 30.89it/s]

Step 23,600, (N samples: 3,020,800), Loss: 1112.8013, (Recon: 1103.0215, KLD: 9.7798), Gradient norm: 187.5519


 83%|████████▎ | 216/261 [00:07<00:01, 31.18it/s]

Step 23,700, (N samples: 3,033,600), Loss: 1061.0710, (Recon: 1051.4360, KLD: 9.6351), Gradient norm: 216.7994


100%|██████████| 261/261 [00:08<00:00, 30.36it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.15it/s]


====> Test set loss: 1066.1744, (BCE: 1056.6179, KLD: 9.5565)
Epoch 92/100


 21%|██        | 54/261 [00:01<00:06, 31.21it/s]

Step 23,800, (N samples: 3,046,400), Loss: 1080.4596, (Recon: 1070.6042, KLD: 9.8554), Gradient norm: 267.1924


 59%|█████▉    | 154/261 [00:04<00:03, 31.25it/s]

Step 23,900, (N samples: 3,059,200), Loss: 995.1465, (Recon: 985.7106, KLD: 9.4360), Gradient norm: 190.4041


 97%|█████████▋| 254/261 [00:08<00:00, 31.37it/s]

Step 24,000, (N samples: 3,072,000), Loss: 1045.7043, (Recon: 1036.1603, KLD: 9.5441), Gradient norm: 246.8070


100%|██████████| 261/261 [00:08<00:00, 31.22it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.79it/s]


====> Test set loss: 1066.4737, (BCE: 1056.9791, KLD: 9.4946)
Epoch 93/100


 36%|███▌      | 94/261 [00:03<00:05, 31.26it/s]

Step 24,100, (N samples: 3,084,800), Loss: 1016.2634, (Recon: 1006.5672, KLD: 9.6962), Gradient norm: 270.3608


 74%|███████▍  | 194/261 [00:06<00:02, 31.67it/s]

Step 24,200, (N samples: 3,097,600), Loss: 1082.4779, (Recon: 1072.9583, KLD: 9.5196), Gradient norm: 166.7588


100%|██████████| 261/261 [00:08<00:00, 31.28it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.41it/s]


====> Test set loss: 1065.3815, (BCE: 1056.0543, KLD: 9.3272)
Epoch 94/100


 13%|█▎        | 34/261 [00:01<00:07, 31.22it/s]

Step 24,300, (N samples: 3,110,400), Loss: 1046.8563, (Recon: 1037.8724, KLD: 8.9838), Gradient norm: 131.9840


 51%|█████     | 133/261 [00:04<00:04, 31.29it/s]

Step 24,400, (N samples: 3,123,200), Loss: 1028.5980, (Recon: 1019.2269, KLD: 9.3712), Gradient norm: 143.2115


 89%|████████▉ | 233/261 [00:07<00:00, 31.36it/s]

Step 24,500, (N samples: 3,136,000), Loss: 1039.4156, (Recon: 1030.1836, KLD: 9.2320), Gradient norm: 112.2952


100%|██████████| 261/261 [00:08<00:00, 29.51it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.07it/s]


====> Test set loss: 1065.2486, (BCE: 1055.8666, KLD: 9.3819)
Epoch 95/100


 27%|██▋       | 70/261 [00:02<00:06, 31.52it/s]

Step 24,600, (N samples: 3,148,800), Loss: 1004.5821, (Recon: 995.7504, KLD: 8.8317), Gradient norm: 232.4142


 65%|██████▌   | 170/261 [00:05<00:02, 31.38it/s]

Step 24,700, (N samples: 3,161,600), Loss: 1061.2281, (Recon: 1051.0684, KLD: 10.1598), Gradient norm: 214.6119


100%|██████████| 261/261 [00:08<00:00, 31.21it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.90it/s]


====> Test set loss: 1065.7015, (BCE: 1056.0798, KLD: 9.6217)
Epoch 96/100


  4%|▍         | 10/261 [00:00<00:08, 28.25it/s]

Step 24,800, (N samples: 3,174,400), Loss: 1026.3525, (Recon: 1016.6666, KLD: 9.6860), Gradient norm: 143.4374


 42%|████▏     | 110/261 [00:03<00:04, 31.51it/s]

Step 24,900, (N samples: 3,187,200), Loss: 1014.6790, (Recon: 1005.3979, KLD: 9.2810), Gradient norm: 147.6167


 80%|████████  | 209/261 [00:06<00:01, 31.28it/s]

Step 25,000, (N samples: 3,200,000), Loss: 1051.4159, (Recon: 1042.1060, KLD: 9.3100), Gradient norm: 210.1334


100%|██████████| 261/261 [00:08<00:00, 31.14it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.32it/s]


====> Test set loss: 1065.3590, (BCE: 1055.8105, KLD: 9.5485)
Epoch 97/100


 19%|█▉        | 50/261 [00:01<00:06, 30.67it/s]

Step 25,100, (N samples: 3,212,800), Loss: 1092.2280, (Recon: 1082.7244, KLD: 9.5036), Gradient norm: 140.0014


 57%|█████▋    | 150/261 [00:04<00:03, 31.53it/s]

Step 25,200, (N samples: 3,225,600), Loss: 1081.8362, (Recon: 1072.3406, KLD: 9.4956), Gradient norm: 138.4265


 96%|█████████▌| 250/261 [00:08<00:00, 31.26it/s]

Step 25,300, (N samples: 3,238,400), Loss: 1109.5361, (Recon: 1099.6051, KLD: 9.9310), Gradient norm: 160.8927


100%|██████████| 261/261 [00:08<00:00, 31.13it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.68it/s]


====> Test set loss: 1066.3500, (BCE: 1056.9834, KLD: 9.3666)
Epoch 98/100


 34%|███▍      | 90/261 [00:02<00:05, 31.49it/s]

Step 25,400, (N samples: 3,251,200), Loss: 1067.8793, (Recon: 1058.3983, KLD: 9.4809), Gradient norm: 254.5201


 73%|███████▎  | 190/261 [00:06<00:02, 31.49it/s]

Step 25,500, (N samples: 3,264,000), Loss: 1058.2118, (Recon: 1048.7568, KLD: 9.4550), Gradient norm: 136.1355


100%|██████████| 261/261 [00:08<00:00, 31.12it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.88it/s]


====> Test set loss: 1065.0090, (BCE: 1055.5007, KLD: 9.5083)
Epoch 99/100


 11%|█         | 29/261 [00:00<00:07, 30.93it/s]

Step 25,600, (N samples: 3,276,800), Loss: 995.9751, (Recon: 986.6624, KLD: 9.3127), Gradient norm: 177.5466


 49%|████▉     | 129/261 [00:04<00:04, 31.39it/s]

Step 25,700, (N samples: 3,289,600), Loss: 1084.5120, (Recon: 1075.5258, KLD: 8.9863), Gradient norm: 185.4473


 88%|████████▊ | 229/261 [00:07<00:01, 31.26it/s]

Step 25,800, (N samples: 3,302,400), Loss: 1064.9865, (Recon: 1055.3191, KLD: 9.6673), Gradient norm: 343.2795


100%|██████████| 261/261 [00:08<00:00, 31.16it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 38.33it/s]


====> Test set loss: 1066.2461, (BCE: 1056.8018, KLD: 9.4442)
Epoch 100/100


 25%|██▌       | 66/261 [00:02<00:06, 31.38it/s]

Step 25,900, (N samples: 3,315,200), Loss: 1025.2896, (Recon: 1016.0814, KLD: 9.2081), Gradient norm: 162.4217


 64%|██████▎   | 166/261 [00:05<00:03, 31.55it/s]

Step 26,000, (N samples: 3,328,000), Loss: 1053.9532, (Recon: 1044.2761, KLD: 9.6771), Gradient norm: 222.4961


100%|██████████| 261/261 [00:08<00:00, 31.28it/s]
Testing: 100%|██████████| 29/29 [00:00<00:00, 37.48it/s]

====> Test set loss: 1066.0442, (BCE: 1056.5937, KLD: 9.4505)





In [15]:
writer.flush()

In [16]:
%load_ext tensorboard

In [23]:
%tensorboard --logdir ../experiments/VAE_MNIST/20241028-004306/

Reusing TensorBoard on port 6011 (pid 1110344), started 0:00:12 ago. (Use '!kill 1110344' to kill it.)