In [10]:
%load_ext autoreload
# %reload_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
from methylVA.mnist.dataset import get_methyl_data_loaders

data_id = 0.05
batch_size = 128

n_features = 30579
n_samples_train = 33360
n_samples_test = 3707

train_data_path = f"../data/random_data/train_data_{n_samples_train}_{n_features}.pkl"
test_data_path = f"../data/random_data/test_data_{n_samples_test}_{n_features}.pkl"

train_metadata_path = f"../data/random_data/train_metadata_{n_samples_train}_{n_features}.pkl"
test_metadata_path = f"../data/random_data/test_metadata_{n_samples_test}_{n_features}.pkl"


train_loader, test_loader = get_methyl_data_loaders(
    train_data_path,
    train_metadata_path,
    test_data_path,
    test_metadata_path,
    batch_size=batch_size
)


In [3]:
data_batch, _ = next(iter(train_loader))


num_train_rows = len(train_loader.dataset)
num_test_rows = len(test_loader.dataset)

print("Number of features in each dataset:", data_batch.shape[1])
print("Number of rows in the training dataset:", num_train_rows)
print("Number of rows in the test dataset:", num_test_rows)

Number of features in each dataset: 30579
Number of rows in the training dataset: 33360
Number of rows in the test dataset: 3707


In [5]:
from datetime import datetime

import torch
from torch.utils.tensorboard import SummaryWriter

from methylVA.mnist.model import VAE
from methylVA.mnist.training import train, test

input_dim = data_batch.shape[1]
learning_rate = 1e-3
weight_decay = 1e-2
num_epochs = 100
latent_dim = 32
hidden_dim = 2048
kl_weight = 1.0
name = f'VAE_random_data_{data_id}_latent_{latent_dim}_kl_{kl_weight}'



In [6]:

writer_train = SummaryWriter(f'../experiments/{name}/train/{datetime.now().strftime("%Y%m%d-%H%M%S")}')
writer_test = SummaryWriter(f'../experiments/{name}/test/{datetime.now().strftime("%Y%m%d-%H%M%S")}')

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = VAE(input_dim=input_dim, latent_dim=latent_dim, hidden_dim=hidden_dim).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

In [8]:
model

VAE(
  (encoder): Sequential(
    (0): Linear(in_features=30579, out_features=2048, bias=True)
    (1): SiLU()
    (2): Linear(in_features=2048, out_features=1024, bias=True)
    (3): SiLU()
    (4): Linear(in_features=1024, out_features=512, bias=True)
    (5): SiLU()
    (6): Linear(in_features=512, out_features=256, bias=True)
    (7): SiLU()
    (8): Linear(in_features=256, out_features=64, bias=True)
  )
  (softplus): Softplus(beta=1.0, threshold=20.0)
  (decoder): Sequential(
    (0): Linear(in_features=32, out_features=256, bias=True)
    (1): SiLU()
    (2): Linear(in_features=256, out_features=512, bias=True)
    (3): SiLU()
    (4): Linear(in_features=512, out_features=1024, bias=True)
    (5): SiLU()
    (6): Linear(in_features=1024, out_features=2048, bias=True)
    (7): SiLU()
    (8): Linear(in_features=2048, out_features=30579, bias=True)
    (9): Sigmoid()
  )
)

In [None]:
from methylVA.mnist.training import train, test


prev_updates = 0
for epoch in range(num_epochs):
    print(f'Epoch {epoch + 1}/{num_epochs}')
    prev_updates = train(model, train_loader, optimizer, prev_updates, writer=writer_train)
    test(model, test_loader, prev_updates, writer=writer_test)

Epoch 1/100


  0%|          | 1/261 [00:00<03:21,  1.29it/s]

Step 0, (N samples: 0), Loss: 21200.0684, (Recon: 21196.5742, KLD: 3.4940), Gradient norm: 5.8527


 39%|███▉      | 102/261 [00:20<00:31,  5.07it/s]

Step 100, (N samples: 12,800), Loss: 21196.1973, (Recon: 21196.1875, KLD: 0.0088), Gradient norm: 5.2876


 77%|███████▋  | 202/261 [00:39<00:11,  5.28it/s]

Step 200, (N samples: 25,600), Loss: 21196.0742, (Recon: 21196.0684, KLD: 0.0056), Gradient norm: 5.1652


100%|██████████| 261/261 [00:50<00:00,  5.18it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.80it/s]


====> Test set loss: 21196.1219, (BCE: 21196.1160, KLD: 0.0054)
Epoch 2/100


 15%|█▌        | 40/261 [00:07<00:42,  5.22it/s]

Step 300, (N samples: 38,400), Loss: 21195.9375, (Recon: 21195.9258, KLD: 0.0125), Gradient norm: 5.5406


 54%|█████▍    | 141/261 [00:26<00:23,  5.22it/s]

Step 400, (N samples: 51,200), Loss: 21196.2500, (Recon: 21196.2461, KLD: 0.0045), Gradient norm: 5.5989


 92%|█████████▏| 241/261 [00:45<00:03,  5.19it/s]

Step 500, (N samples: 64,000), Loss: 21196.1348, (Recon: 21196.1328, KLD: 0.0027), Gradient norm: 5.5419


100%|██████████| 261/261 [00:49<00:00,  5.26it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.91it/s]


====> Test set loss: 21196.0702, (BCE: 21196.0682, KLD: 0.0023)
Epoch 3/100


 31%|███       | 80/261 [00:15<00:35,  5.10it/s]

Step 600, (N samples: 76,800), Loss: 21195.9688, (Recon: 21195.9648, KLD: 0.0040), Gradient norm: 5.8960


 69%|██████▉   | 180/261 [00:34<00:15,  5.11it/s]

Step 700, (N samples: 89,600), Loss: 21196.0664, (Recon: 21196.0625, KLD: 0.0030), Gradient norm: 5.6485


100%|██████████| 261/261 [00:50<00:00,  5.20it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.11it/s]


====> Test set loss: 21196.0580, (BCE: 21196.0560, KLD: 0.0025)
Epoch 4/100


  7%|▋         | 19/261 [00:03<00:47,  5.14it/s]

Step 800, (N samples: 102,400), Loss: 21195.8535, (Recon: 21195.8496, KLD: 0.0032), Gradient norm: 5.9645


 46%|████▌     | 119/261 [00:22<00:27,  5.16it/s]

Step 900, (N samples: 115,200), Loss: 21196.0840, (Recon: 21196.0820, KLD: 0.0020), Gradient norm: 5.7642


 84%|████████▍ | 219/261 [00:41<00:08,  5.11it/s]

Step 1,000, (N samples: 128,000), Loss: 21196.0371, (Recon: 21196.0352, KLD: 0.0015), Gradient norm: 6.1588


100%|██████████| 261/261 [00:49<00:00,  5.24it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.05it/s]


====> Test set loss: 21196.0473, (BCE: 21196.0454, KLD: 0.0020)
Epoch 5/100


 22%|██▏       | 58/261 [00:11<00:38,  5.24it/s]

Step 1,100, (N samples: 140,800), Loss: 21195.9629, (Recon: 21195.9590, KLD: 0.0036), Gradient norm: 6.3869


 61%|██████    | 158/261 [00:30<00:19,  5.22it/s]

Step 1,200, (N samples: 153,600), Loss: 21195.9629, (Recon: 21195.9609, KLD: 0.0017), Gradient norm: 6.2176


 99%|█████████▉| 258/261 [00:49<00:00,  5.28it/s]

Step 1,300, (N samples: 166,400), Loss: 21195.9961, (Recon: 21195.9941, KLD: 0.0015), Gradient norm: 6.0945


100%|██████████| 261/261 [00:50<00:00,  5.20it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.92it/s]


====> Test set loss: 21196.0263, (BCE: 21196.0244, KLD: 0.0014)
Epoch 6/100


 37%|███▋      | 96/261 [00:18<00:31,  5.16it/s]

Step 1,400, (N samples: 179,200), Loss: 21195.9648, (Recon: 21195.9629, KLD: 0.0016), Gradient norm: 6.4603


 75%|███████▌  | 197/261 [00:37<00:12,  5.29it/s]

Step 1,500, (N samples: 192,000), Loss: 21195.9746, (Recon: 21195.9727, KLD: 0.0012), Gradient norm: 6.4093


100%|██████████| 261/261 [00:49<00:00,  5.24it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.13it/s]


====> Test set loss: 21196.0398, (BCE: 21196.0384, KLD: 0.0010)
Epoch 7/100


 14%|█▍        | 36/261 [00:06<00:43,  5.19it/s]

Step 1,600, (N samples: 204,800), Loss: 21195.8945, (Recon: 21195.8926, KLD: 0.0015), Gradient norm: 6.8816


 52%|█████▏    | 135/261 [00:25<00:23,  5.32it/s]

Step 1,700, (N samples: 217,600), Loss: 21195.9824, (Recon: 21195.9824, KLD: 0.0009), Gradient norm: 6.6986


 90%|█████████ | 236/261 [00:45<00:04,  5.29it/s]

Step 1,800, (N samples: 230,400), Loss: 21195.9766, (Recon: 21195.9766, KLD: 0.0007), Gradient norm: 6.7761


100%|██████████| 261/261 [00:50<00:00,  5.21it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.56it/s]


====> Test set loss: 21196.0244, (BCE: 21196.0225, KLD: 0.0011)
Epoch 8/100


 29%|██▊       | 75/261 [00:14<00:35,  5.30it/s]

Step 1,900, (N samples: 243,200), Loss: 21195.9160, (Recon: 21195.9141, KLD: 0.0015), Gradient norm: 6.9751


 67%|██████▋   | 175/261 [00:33<00:16,  5.23it/s]

Step 2,000, (N samples: 256,000), Loss: 21195.9785, (Recon: 21195.9785, KLD: 0.0009), Gradient norm: 6.7564


100%|██████████| 261/261 [00:49<00:00,  5.24it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.99it/s]


====> Test set loss: 21196.0369, (BCE: 21196.0369, KLD: 0.0007)
Epoch 9/100


  5%|▌         | 14/261 [00:02<00:47,  5.20it/s]

Step 2,100, (N samples: 268,800), Loss: 21195.8184, (Recon: 21195.8164, KLD: 0.0010), Gradient norm: 6.6933


 44%|████▎     | 114/261 [00:21<00:27,  5.28it/s]

Step 2,200, (N samples: 281,600), Loss: 21196.1055, (Recon: 21196.1055, KLD: 0.0008), Gradient norm: 7.0591


 82%|████████▏ | 214/261 [00:40<00:08,  5.26it/s]

Step 2,300, (N samples: 294,400), Loss: 21196.0449, (Recon: 21196.0449, KLD: 0.0006), Gradient norm: 7.1856


100%|██████████| 261/261 [00:49<00:00,  5.26it/s]
Testing: 100%|██████████| 29/29 [00:04<00:00,  7.24it/s]


====> Test set loss: 21196.0337, (BCE: 21196.0337, KLD: 0.0007)
Epoch 10/100


 20%|██        | 53/261 [00:10<00:39,  5.24it/s]

Step 2,400, (N samples: 307,200), Loss: 21195.9434, (Recon: 21195.9414, KLD: 0.0012), Gradient norm: 7.3994


 58%|█████▊    | 152/261 [00:29<00:20,  5.21it/s]

Step 2,500, (N samples: 320,000), Loss: 21195.9844, (Recon: 21195.9844, KLD: 0.0007), Gradient norm: 7.3251


 97%|█████████▋| 253/261 [00:48<00:01,  5.21it/s]

Step 2,600, (N samples: 332,800), Loss: 21195.9883, (Recon: 21195.9883, KLD: 0.0007), Gradient norm: 6.8733


100%|██████████| 261/261 [00:49<00:00,  5.23it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.96it/s]


====> Test set loss: 21196.0334, (BCE: 21196.0334, KLD: 0.0007)
Epoch 11/100


 35%|███▌      | 92/261 [00:17<00:32,  5.23it/s]

Step 2,700, (N samples: 345,600), Loss: 21195.8867, (Recon: 21195.8867, KLD: 0.0008), Gradient norm: 7.1772


 74%|███████▎  | 192/261 [00:36<00:13,  5.19it/s]

Step 2,800, (N samples: 358,400), Loss: 21195.9609, (Recon: 21195.9609, KLD: 0.0007), Gradient norm: 6.9796


100%|██████████| 261/261 [00:49<00:00,  5.26it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.97it/s]


====> Test set loss: 21196.0463, (BCE: 21196.0463, KLD: 0.0005)
Epoch 12/100


 12%|█▏        | 31/261 [00:05<00:43,  5.28it/s]

Step 2,900, (N samples: 371,200), Loss: 21195.7695, (Recon: 21195.7695, KLD: 0.0005), Gradient norm: 7.8457


 50%|█████     | 131/261 [00:24<00:24,  5.27it/s]

Step 3,000, (N samples: 384,000), Loss: 21195.9570, (Recon: 21195.9570, KLD: 0.0006), Gradient norm: 7.4180


 88%|████████▊ | 230/261 [00:43<00:05,  5.21it/s]

Step 3,100, (N samples: 396,800), Loss: 21195.9531, (Recon: 21195.9531, KLD: 0.0005), Gradient norm: 7.7513


100%|██████████| 261/261 [00:49<00:00,  5.27it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.85it/s]


====> Test set loss: 21196.0327, (BCE: 21196.0327, KLD: 0.0007)
Epoch 13/100


 27%|██▋       | 70/261 [00:13<00:36,  5.29it/s]

Step 3,200, (N samples: 409,600), Loss: 21195.9023, (Recon: 21195.9023, KLD: 0.0005), Gradient norm: 7.8295


 65%|██████▌   | 170/261 [00:33<00:17,  5.19it/s]

Step 3,300, (N samples: 422,400), Loss: 21195.9961, (Recon: 21195.9961, KLD: 0.0005), Gradient norm: 7.5984


100%|██████████| 261/261 [00:50<00:00,  5.18it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.03it/s]


====> Test set loss: 21196.0245, (BCE: 21196.0245, KLD: 0.0004)
Epoch 14/100


  3%|▎         | 9/261 [00:01<00:48,  5.23it/s]

Step 3,400, (N samples: 435,200), Loss: 21195.7500, (Recon: 21195.7500, KLD: 0.0004), Gradient norm: 8.0290


 42%|████▏     | 109/261 [00:20<00:28,  5.30it/s]

Step 3,500, (N samples: 448,000), Loss: 21195.9941, (Recon: 21195.9941, KLD: 0.0006), Gradient norm: 8.9429


 80%|████████  | 209/261 [00:39<00:09,  5.21it/s]

Step 3,600, (N samples: 460,800), Loss: 21195.9688, (Recon: 21195.9688, KLD: 0.0006), Gradient norm: 7.9326


100%|██████████| 261/261 [00:49<00:00,  5.28it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.75it/s]


====> Test set loss: 21196.0360, (BCE: 21196.0360, KLD: 0.0007)
Epoch 15/100


 18%|█▊        | 48/261 [00:09<00:40,  5.22it/s]

Step 3,700, (N samples: 473,600), Loss: 21195.8359, (Recon: 21195.8359, KLD: 0.0008), Gradient norm: 8.1817


 57%|█████▋    | 148/261 [00:28<00:21,  5.24it/s]

Step 3,800, (N samples: 486,400), Loss: 21196.0020, (Recon: 21196.0000, KLD: 0.0022), Gradient norm: 7.9755


 95%|█████████▌| 248/261 [00:47<00:02,  5.30it/s]

Step 3,900, (N samples: 499,200), Loss: 21196.0898, (Recon: 21196.0781, KLD: 0.0122), Gradient norm: 8.3672


100%|██████████| 261/261 [00:49<00:00,  5.27it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.97it/s]


====> Test set loss: 21196.0320, (BCE: 21196.0299, KLD: 0.0022)
Epoch 16/100


 33%|███▎      | 87/261 [00:16<00:33,  5.27it/s]

Step 4,000, (N samples: 512,000), Loss: 21195.9609, (Recon: 21195.9531, KLD: 0.0084), Gradient norm: 8.7127


 72%|███████▏  | 187/261 [00:35<00:14,  5.23it/s]

Step 4,100, (N samples: 524,800), Loss: 21196.0137, (Recon: 21196.0117, KLD: 0.0025), Gradient norm: 8.4351


100%|██████████| 261/261 [00:49<00:00,  5.25it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.98it/s]


====> Test set loss: 21196.0446, (BCE: 21196.0426, KLD: 0.0013)
Epoch 17/100


 10%|▉         | 26/261 [00:04<00:44,  5.29it/s]

Step 4,200, (N samples: 537,600), Loss: 21195.9258, (Recon: 21195.9219, KLD: 0.0048), Gradient norm: 9.1987


 48%|████▊     | 126/261 [00:24<00:26,  5.15it/s]

Step 4,300, (N samples: 550,400), Loss: 21196.0938, (Recon: 21196.0918, KLD: 0.0027), Gradient norm: 9.4280


 87%|████████▋ | 226/261 [00:43<00:06,  5.17it/s]

Step 4,400, (N samples: 563,200), Loss: 21196.0410, (Recon: 21196.0352, KLD: 0.0063), Gradient norm: 9.2923


100%|██████████| 261/261 [00:49<00:00,  5.23it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.00it/s]


====> Test set loss: 21196.0523, (BCE: 21196.0515, KLD: 0.0011)
Epoch 18/100


 25%|██▍       | 65/261 [00:12<00:37,  5.27it/s]

Step 4,500, (N samples: 576,000), Loss: 21195.8125, (Recon: 21195.8105, KLD: 0.0024), Gradient norm: 8.6051


 63%|██████▎   | 165/261 [00:31<00:18,  5.26it/s]

Step 4,600, (N samples: 588,800), Loss: 21196.0312, (Recon: 21196.0293, KLD: 0.0025), Gradient norm: 9.4213


100%|██████████| 261/261 [00:49<00:00,  5.28it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.10it/s]


====> Test set loss: 21196.0625, (BCE: 21196.0625, KLD: 0.0007)
Epoch 19/100


  2%|▏         | 4/261 [00:00<00:49,  5.15it/s]

Step 4,700, (N samples: 601,600), Loss: 21195.7441, (Recon: 21195.7422, KLD: 0.0010), Gradient norm: 9.1803


 39%|███▉      | 103/261 [00:19<00:30,  5.19it/s]

Step 4,800, (N samples: 614,400), Loss: 21195.9688, (Recon: 21195.9688, KLD: 0.0009), Gradient norm: 8.9704


 78%|███████▊  | 204/261 [00:38<00:10,  5.25it/s]

Step 4,900, (N samples: 627,200), Loss: 21196.0273, (Recon: 21196.0273, KLD: 0.0004), Gradient norm: 9.2126


100%|██████████| 261/261 [00:49<00:00,  5.25it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.87it/s]


====> Test set loss: 21196.0427, (BCE: 21196.0420, KLD: 0.0009)
Epoch 20/100


 16%|█▋        | 43/261 [00:08<00:42,  5.10it/s]

Step 5,000, (N samples: 640,000), Loss: 21195.7344, (Recon: 21195.7344, KLD: 0.0007), Gradient norm: 8.4004


 55%|█████▍    | 143/261 [00:27<00:22,  5.20it/s]

Step 5,100, (N samples: 652,800), Loss: 21196.0020, (Recon: 21196.0020, KLD: 0.0009), Gradient norm: 8.6005


 93%|█████████▎| 243/261 [00:46<00:03,  5.23it/s]

Step 5,200, (N samples: 665,600), Loss: 21195.8945, (Recon: 21195.8945, KLD: 0.0003), Gradient norm: 8.8776


100%|██████████| 261/261 [00:49<00:00,  5.25it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.00it/s]


====> Test set loss: 21196.0519, (BCE: 21196.0519, KLD: 0.0006)
Epoch 21/100


 31%|███▏      | 82/261 [00:15<00:33,  5.30it/s]

Step 5,300, (N samples: 678,400), Loss: 21195.9434, (Recon: 21195.9434, KLD: 0.0003), Gradient norm: 9.3382


 70%|██████▉   | 182/261 [00:34<00:15,  5.24it/s]

Step 5,400, (N samples: 691,200), Loss: 21196.0234, (Recon: 21196.0234, KLD: 0.0003), Gradient norm: 10.5623


100%|██████████| 261/261 [00:49<00:00,  5.28it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.83it/s]


====> Test set loss: 21196.0569, (BCE: 21196.0569, KLD: 0.0008)
Epoch 22/100


  8%|▊         | 21/261 [00:04<00:46,  5.14it/s]

Step 5,500, (N samples: 704,000), Loss: 21195.8281, (Recon: 21195.8281, KLD: 0.0006), Gradient norm: 9.4860


 46%|████▋     | 121/261 [00:23<00:26,  5.23it/s]

Step 5,600, (N samples: 716,800), Loss: 21196.0195, (Recon: 21196.0195, KLD: 0.0009), Gradient norm: 10.3921


 85%|████████▍ | 221/261 [00:41<00:07,  5.23it/s]

Step 5,700, (N samples: 729,600), Loss: 21196.0410, (Recon: 21196.0391, KLD: 0.0011), Gradient norm: 9.5869


100%|██████████| 261/261 [00:49<00:00,  5.27it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.09it/s]


====> Test set loss: 21196.0797, (BCE: 21196.0797, KLD: 0.0003)
Epoch 23/100


 23%|██▎       | 60/261 [00:11<00:37,  5.30it/s]

Step 5,800, (N samples: 742,400), Loss: 21195.9941, (Recon: 21195.9922, KLD: 0.0015), Gradient norm: 10.1069


 61%|██████▏   | 160/261 [00:30<00:19,  5.22it/s]

Step 5,900, (N samples: 755,200), Loss: 21196.0176, (Recon: 21196.0176, KLD: 0.0004), Gradient norm: 9.5403


100%|█████████▉| 260/261 [00:49<00:00,  5.26it/s]

Step 6,000, (N samples: 768,000), Loss: 21195.9746, (Recon: 21195.9746, KLD: 0.0007), Gradient norm: 9.6691


100%|██████████| 261/261 [00:49<00:00,  5.26it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.02it/s]


====> Test set loss: 21196.0393, (BCE: 21196.0393, KLD: 0.0006)
Epoch 24/100


 38%|███▊      | 99/261 [00:18<00:31,  5.18it/s]

Step 6,100, (N samples: 780,800), Loss: 21196.0410, (Recon: 21196.0391, KLD: 0.0011), Gradient norm: 9.7902


 76%|███████▌  | 199/261 [00:37<00:11,  5.24it/s]

Step 6,200, (N samples: 793,600), Loss: 21196.0723, (Recon: 21196.0723, KLD: 0.0006), Gradient norm: 10.1451


100%|██████████| 261/261 [00:49<00:00,  5.26it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.00it/s]


====> Test set loss: 21196.0580, (BCE: 21196.0560, KLD: 0.0012)
Epoch 25/100


 15%|█▍        | 38/261 [00:07<00:43,  5.17it/s]

Step 6,300, (N samples: 806,400), Loss: 21195.8945, (Recon: 21195.8945, KLD: 0.0005), Gradient norm: 10.6235


 53%|█████▎    | 138/261 [00:26<00:23,  5.20it/s]

Step 6,400, (N samples: 819,200), Loss: 21196.0801, (Recon: 21196.0645, KLD: 0.0156), Gradient norm: 10.8515


 91%|█████████ | 238/261 [00:45<00:04,  5.26it/s]

Step 6,500, (N samples: 832,000), Loss: 21195.9785, (Recon: 21195.9766, KLD: 0.0014), Gradient norm: 10.4138


100%|██████████| 261/261 [00:49<00:00,  5.25it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.14it/s]


====> Test set loss: 21196.0781, (BCE: 21196.0781, KLD: 0.0007)
Epoch 26/100


 30%|██▉       | 77/261 [00:14<00:35,  5.12it/s]

Step 6,600, (N samples: 844,800), Loss: 21195.9688, (Recon: 21195.9668, KLD: 0.0017), Gradient norm: 10.7801


 68%|██████▊   | 177/261 [00:33<00:16,  5.23it/s]

Step 6,700, (N samples: 857,600), Loss: 21196.1680, (Recon: 21196.1680, KLD: 0.0007), Gradient norm: 10.4769


100%|██████████| 261/261 [00:49<00:00,  5.23it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.01it/s]


====> Test set loss: 21196.0562, (BCE: 21196.0542, KLD: 0.0012)
Epoch 27/100


  6%|▌         | 16/261 [00:03<00:46,  5.26it/s]

Step 6,800, (N samples: 870,400), Loss: 21195.7852, (Recon: 21195.7832, KLD: 0.0027), Gradient norm: 10.2564


 44%|████▍     | 116/261 [00:22<00:27,  5.19it/s]

Step 6,900, (N samples: 883,200), Loss: 21195.9453, (Recon: 21195.9453, KLD: 0.0009), Gradient norm: 10.8597


 83%|████████▎ | 216/261 [00:41<00:08,  5.26it/s]

Step 7,000, (N samples: 896,000), Loss: 21196.0352, (Recon: 21196.0332, KLD: 0.0011), Gradient norm: 11.2917


100%|██████████| 261/261 [00:49<00:00,  5.25it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.08it/s]


====> Test set loss: 21196.0742, (BCE: 21196.0735, KLD: 0.0010)
Epoch 28/100


 21%|██        | 55/261 [00:10<00:39,  5.22it/s]

Step 7,100, (N samples: 908,800), Loss: 21195.8613, (Recon: 21195.8613, KLD: 0.0008), Gradient norm: 10.8586


 59%|█████▉    | 155/261 [00:29<00:20,  5.29it/s]

Step 7,200, (N samples: 921,600), Loss: 21196.0586, (Recon: 21196.0586, KLD: 0.0007), Gradient norm: 11.2720


 98%|█████████▊| 255/261 [00:48<00:01,  5.22it/s]

Step 7,300, (N samples: 934,400), Loss: 21196.0215, (Recon: 21196.0215, KLD: 0.0007), Gradient norm: 11.5482


100%|██████████| 261/261 [00:49<00:00,  5.27it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.79it/s]


====> Test set loss: 21196.0722, (BCE: 21196.0718, KLD: 0.0009)
Epoch 29/100


 36%|███▌      | 94/261 [00:17<00:31,  5.26it/s]

Step 7,400, (N samples: 947,200), Loss: 21196.0254, (Recon: 21196.0234, KLD: 0.0012), Gradient norm: 11.7941


 74%|███████▍  | 194/261 [00:36<00:12,  5.17it/s]

Step 7,500, (N samples: 960,000), Loss: 21196.1035, (Recon: 21196.1035, KLD: 0.0007), Gradient norm: 11.7737


100%|██████████| 261/261 [00:49<00:00,  5.26it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.97it/s]


====> Test set loss: 21196.0897, (BCE: 21196.0897, KLD: 0.0005)
Epoch 30/100


 13%|█▎        | 33/261 [00:06<00:44,  5.07it/s]

Step 7,600, (N samples: 972,800), Loss: 21195.9043, (Recon: 21195.9023, KLD: 0.0014), Gradient norm: 12.0207


 51%|█████     | 133/261 [00:25<00:24,  5.28it/s]

Step 7,700, (N samples: 985,600), Loss: 21196.0371, (Recon: 21196.0352, KLD: 0.0011), Gradient norm: 10.8955


 89%|████████▉ | 233/261 [00:44<00:05,  5.26it/s]

Step 7,800, (N samples: 998,400), Loss: 21196.1914, (Recon: 21196.1914, KLD: 0.0004), Gradient norm: 11.3998


100%|██████████| 261/261 [00:49<00:00,  5.27it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.94it/s]


====> Test set loss: 21196.1001, (BCE: 21196.0981, KLD: 0.0012)
Epoch 31/100


 28%|██▊       | 72/261 [00:13<00:36,  5.15it/s]

Step 7,900, (N samples: 1,011,200), Loss: 21195.9688, (Recon: 21195.9688, KLD: 0.0005), Gradient norm: 11.8226


 66%|██████▌   | 172/261 [00:32<00:16,  5.29it/s]

Step 8,000, (N samples: 1,024,000), Loss: 21195.9395, (Recon: 21195.9395, KLD: 0.0007), Gradient norm: 12.0596


100%|██████████| 261/261 [00:49<00:00,  5.26it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.95it/s]


====> Test set loss: 21196.1066, (BCE: 21196.1065, KLD: 0.0008)
Epoch 32/100


  4%|▍         | 11/261 [00:02<00:47,  5.25it/s]

Step 8,100, (N samples: 1,036,800), Loss: 21195.8750, (Recon: 21195.8750, KLD: 0.0008), Gradient norm: 12.5070


 43%|████▎     | 111/261 [00:21<00:28,  5.31it/s]

Step 8,200, (N samples: 1,049,600), Loss: 21196.0508, (Recon: 21196.0508, KLD: 0.0004), Gradient norm: 12.1060


 81%|████████  | 211/261 [00:39<00:09,  5.29it/s]

Step 8,300, (N samples: 1,062,400), Loss: 21196.0605, (Recon: 21196.0605, KLD: 0.0003), Gradient norm: 11.9554


100%|██████████| 261/261 [00:49<00:00,  5.30it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.94it/s]


====> Test set loss: 21196.0961, (BCE: 21196.0961, KLD: 0.0008)
Epoch 33/100


 19%|█▉        | 50/261 [00:09<00:40,  5.21it/s]

Step 8,400, (N samples: 1,075,200), Loss: 21195.9512, (Recon: 21195.9512, KLD: 0.0006), Gradient norm: 12.1922


 57%|█████▋    | 150/261 [00:28<00:21,  5.27it/s]

Step 8,500, (N samples: 1,088,000), Loss: 21196.0312, (Recon: 21196.0312, KLD: 0.0005), Gradient norm: 12.9119


 96%|█████████▌| 250/261 [00:47<00:02,  5.20it/s]

Step 8,600, (N samples: 1,100,800), Loss: 21196.0820, (Recon: 21196.0820, KLD: 0.0004), Gradient norm: 12.4108


100%|██████████| 261/261 [00:49<00:00,  5.27it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.02it/s]


====> Test set loss: 21196.1148, (BCE: 21196.1148, KLD: 0.0004)
Epoch 34/100


 34%|███▍      | 89/261 [00:16<00:32,  5.25it/s]

Step 8,700, (N samples: 1,113,600), Loss: 21196.0332, (Recon: 21196.0332, KLD: 0.0005), Gradient norm: 12.8315


 72%|███████▏  | 189/261 [00:35<00:14,  5.14it/s]

Step 8,800, (N samples: 1,126,400), Loss: 21196.0742, (Recon: 21196.0742, KLD: 0.0003), Gradient norm: 11.5502


100%|██████████| 261/261 [00:49<00:00,  5.27it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.07it/s]


====> Test set loss: 21196.1065, (BCE: 21196.1065, KLD: 0.0008)
Epoch 35/100


 11%|█         | 28/261 [00:05<00:44,  5.29it/s]

Step 8,900, (N samples: 1,139,200), Loss: 21195.8789, (Recon: 21195.8789, KLD: 0.0008), Gradient norm: 12.9802


 49%|████▉     | 128/261 [00:24<00:25,  5.13it/s]

Step 9,000, (N samples: 1,152,000), Loss: 21195.9844, (Recon: 21195.9844, KLD: 0.0004), Gradient norm: 12.7932


 87%|████████▋ | 228/261 [00:43<00:06,  5.12it/s]

Step 9,100, (N samples: 1,164,800), Loss: 21196.0840, (Recon: 21196.0840, KLD: 0.0003), Gradient norm: 12.1718


100%|██████████| 261/261 [00:49<00:00,  5.28it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.87it/s]


====> Test set loss: 21196.0988, (BCE: 21196.0988, KLD: 0.0004)
Epoch 36/100


 26%|██▌       | 67/261 [00:12<00:37,  5.11it/s]

Step 9,200, (N samples: 1,177,600), Loss: 21195.9492, (Recon: 21195.9492, KLD: 0.0007), Gradient norm: 12.6486


 64%|██████▍   | 167/261 [00:31<00:18,  5.06it/s]

Step 9,300, (N samples: 1,190,400), Loss: 21196.0586, (Recon: 21196.0586, KLD: 0.0005), Gradient norm: 12.2297


100%|██████████| 261/261 [00:49<00:00,  5.25it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.99it/s]


====> Test set loss: 21196.1018, (BCE: 21196.1018, KLD: 0.0002)
Epoch 37/100


  2%|▏         | 6/261 [00:01<00:49,  5.17it/s]

Step 9,400, (N samples: 1,203,200), Loss: 21195.8008, (Recon: 21195.8008, KLD: 0.0002), Gradient norm: 12.5689


 41%|████      | 106/261 [00:20<00:29,  5.20it/s]

Step 9,500, (N samples: 1,216,000), Loss: 21195.9531, (Recon: 21195.9531, KLD: 0.0002), Gradient norm: 12.9930


 79%|███████▉  | 206/261 [00:39<00:10,  5.06it/s]

Step 9,600, (N samples: 1,228,800), Loss: 21196.0820, (Recon: 21196.0820, KLD: 0.0004), Gradient norm: 13.0625


100%|██████████| 261/261 [00:49<00:00,  5.24it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.98it/s]


====> Test set loss: 21196.1162, (BCE: 21196.1156, KLD: 0.0008)
Epoch 38/100


 17%|█▋        | 45/261 [00:08<00:40,  5.31it/s]

Step 9,700, (N samples: 1,241,600), Loss: 21195.8711, (Recon: 21195.8711, KLD: 0.0004), Gradient norm: 12.9568


 56%|█████▌    | 145/261 [00:27<00:22,  5.25it/s]

Step 9,800, (N samples: 1,254,400), Loss: 21196.1367, (Recon: 21196.1367, KLD: 0.0003), Gradient norm: 13.6225


 94%|█████████▍| 245/261 [00:46<00:03,  5.22it/s]

Step 9,900, (N samples: 1,267,200), Loss: 21196.0879, (Recon: 21196.0879, KLD: 0.0005), Gradient norm: 12.6902


100%|██████████| 261/261 [00:49<00:00,  5.27it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.92it/s]


====> Test set loss: 21196.1115, (BCE: 21196.1115, KLD: 0.0003)
Epoch 39/100


 32%|███▏      | 84/261 [00:16<00:34,  5.19it/s]

Step 10,000, (N samples: 1,280,000), Loss: 21195.9395, (Recon: 21195.9395, KLD: 0.0003), Gradient norm: 13.1318


 70%|███████   | 184/261 [00:34<00:14,  5.21it/s]

Step 10,100, (N samples: 1,292,800), Loss: 21196.0469, (Recon: 21196.0469, KLD: 0.0007), Gradient norm: 13.3102


100%|██████████| 261/261 [00:49<00:00,  5.27it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.96it/s]


====> Test set loss: 21196.0919, (BCE: 21196.0919, KLD: 0.0003)
Epoch 40/100


  9%|▉         | 23/261 [00:04<00:45,  5.26it/s]

Step 10,200, (N samples: 1,305,600), Loss: 21195.8867, (Recon: 21195.8867, KLD: 0.0005), Gradient norm: 13.7385


 47%|████▋     | 123/261 [00:23<00:26,  5.23it/s]

Step 10,300, (N samples: 1,318,400), Loss: 21196.0410, (Recon: 21196.0410, KLD: 0.0003), Gradient norm: 13.2323


 85%|████████▌ | 223/261 [00:42<00:07,  5.21it/s]

Step 10,400, (N samples: 1,331,200), Loss: 21195.9844, (Recon: 21195.9844, KLD: 0.0007), Gradient norm: 12.7658


100%|██████████| 261/261 [00:49<00:00,  5.24it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.95it/s]


====> Test set loss: 21196.1228, (BCE: 21196.1228, KLD: 0.0004)
Epoch 41/100


 24%|██▍       | 62/261 [00:11<00:38,  5.18it/s]

Step 10,500, (N samples: 1,344,000), Loss: 21195.9824, (Recon: 21195.9805, KLD: 0.0010), Gradient norm: 14.0869


 62%|██████▏   | 162/261 [00:30<00:18,  5.27it/s]

Step 10,600, (N samples: 1,356,800), Loss: 21196.1094, (Recon: 21196.1094, KLD: 0.0005), Gradient norm: 13.1030


100%|██████████| 261/261 [00:49<00:00,  5.27it/s]


Step 10,700, (N samples: 1,369,600), Loss: 21196.0840, (Recon: 21196.0723, KLD: 0.0126), Gradient norm: 17.1831


Testing: 100%|██████████| 29/29 [00:03<00:00,  7.91it/s]


====> Test set loss: 21196.0972, (BCE: 21196.0886, KLD: 0.0086)
Epoch 42/100


 39%|███▊      | 101/261 [00:19<00:30,  5.24it/s]

Step 10,800, (N samples: 1,382,400), Loss: 21196.2480, (Recon: 21196.2461, KLD: 0.0013), Gradient norm: 14.5522


 77%|███████▋  | 201/261 [00:37<00:11,  5.31it/s]

Step 10,900, (N samples: 1,395,200), Loss: 21196.1152, (Recon: 21196.1133, KLD: 0.0012), Gradient norm: 13.8344


100%|██████████| 261/261 [00:49<00:00,  5.30it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.78it/s]


====> Test set loss: 21196.1331, (BCE: 21196.1331, KLD: 0.0006)
Epoch 43/100


 15%|█▌        | 40/261 [00:07<00:41,  5.31it/s]

Step 11,000, (N samples: 1,408,000), Loss: 21195.9473, (Recon: 21195.9453, KLD: 0.0010), Gradient norm: 14.4212


 54%|█████▎    | 140/261 [00:26<00:23,  5.22it/s]

Step 11,100, (N samples: 1,420,800), Loss: 21196.0762, (Recon: 21196.0742, KLD: 0.0014), Gradient norm: 13.9071


 92%|█████████▏| 240/261 [00:45<00:03,  5.29it/s]

Step 11,200, (N samples: 1,433,600), Loss: 21196.1406, (Recon: 21196.1406, KLD: 0.0003), Gradient norm: 13.9628


100%|██████████| 261/261 [00:49<00:00,  5.28it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.94it/s]


====> Test set loss: 21196.1380, (BCE: 21196.1380, KLD: 0.0005)
Epoch 44/100


 30%|███       | 79/261 [00:14<00:34,  5.32it/s]

Step 11,300, (N samples: 1,446,400), Loss: 21196.0469, (Recon: 21196.0469, KLD: 0.0008), Gradient norm: 12.8806


 69%|██████▊   | 179/261 [00:33<00:15,  5.30it/s]

Step 11,400, (N samples: 1,459,200), Loss: 21196.0957, (Recon: 21196.0957, KLD: 0.0005), Gradient norm: 14.6375


100%|██████████| 261/261 [00:49<00:00,  5.28it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.07it/s]


====> Test set loss: 21196.1265, (BCE: 21196.1245, KLD: 0.0016)
Epoch 45/100


  7%|▋         | 18/261 [00:03<00:45,  5.30it/s]

Step 11,500, (N samples: 1,472,000), Loss: 21195.9121, (Recon: 21195.9102, KLD: 0.0015), Gradient norm: 14.1970


 45%|████▌     | 118/261 [00:22<00:27,  5.22it/s]

Step 11,600, (N samples: 1,484,800), Loss: 21196.1172, (Recon: 21196.1172, KLD: 0.0005), Gradient norm: 15.1527


 84%|████████▎ | 218/261 [00:41<00:08,  5.20it/s]

Step 11,700, (N samples: 1,497,600), Loss: 21196.0684, (Recon: 21196.0684, KLD: 0.0003), Gradient norm: 14.1866


100%|██████████| 261/261 [00:49<00:00,  5.26it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.98it/s]


====> Test set loss: 21196.0917, (BCE: 21196.0917, KLD: 0.0006)
Epoch 46/100


 22%|██▏       | 57/261 [00:10<00:38,  5.28it/s]

Step 11,800, (N samples: 1,510,400), Loss: 21195.9219, (Recon: 21195.9219, KLD: 0.0005), Gradient norm: 14.5673


 60%|██████    | 157/261 [00:29<00:19,  5.33it/s]

Step 11,900, (N samples: 1,523,200), Loss: 21196.0547, (Recon: 21196.0547, KLD: 0.0007), Gradient norm: 14.5408


 98%|█████████▊| 257/261 [00:48<00:00,  5.26it/s]

Step 12,000, (N samples: 1,536,000), Loss: 21195.9883, (Recon: 21195.9883, KLD: 0.0005), Gradient norm: 13.9212


100%|██████████| 261/261 [00:49<00:00,  5.28it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.01it/s]


====> Test set loss: 21196.1365, (BCE: 21196.1365, KLD: 0.0005)
Epoch 47/100


 37%|███▋      | 96/261 [00:18<00:31,  5.23it/s]

Step 12,100, (N samples: 1,548,800), Loss: 21196.1074, (Recon: 21196.1035, KLD: 0.0043), Gradient norm: 15.0693


 75%|███████▌  | 196/261 [00:37<00:12,  5.25it/s]

Step 12,200, (N samples: 1,561,600), Loss: 21196.1523, (Recon: 21196.1523, KLD: 0.0006), Gradient norm: 14.1978


100%|██████████| 261/261 [00:49<00:00,  5.27it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.00it/s]


====> Test set loss: 21196.1441, (BCE: 21196.1441, KLD: 0.0008)
Epoch 48/100


 13%|█▎        | 35/261 [00:06<00:43,  5.19it/s]

Step 12,300, (N samples: 1,574,400), Loss: 21195.9883, (Recon: 21195.9883, KLD: 0.0010), Gradient norm: 14.9834


 52%|█████▏    | 135/261 [00:25<00:24,  5.22it/s]

Step 12,400, (N samples: 1,587,200), Loss: 21196.1074, (Recon: 21196.1074, KLD: 0.0007), Gradient norm: 15.0015


 90%|█████████ | 235/261 [00:44<00:04,  5.25it/s]

Step 12,500, (N samples: 1,600,000), Loss: 21196.0801, (Recon: 21196.0781, KLD: 0.0010), Gradient norm: 14.5049


100%|██████████| 261/261 [00:49<00:00,  5.27it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.90it/s]


====> Test set loss: 21196.1491, (BCE: 21196.1491, KLD: 0.0004)
Epoch 49/100


 28%|██▊       | 74/261 [00:14<00:35,  5.20it/s]

Step 12,600, (N samples: 1,612,800), Loss: 21196.0000, (Recon: 21196.0000, KLD: 0.0005), Gradient norm: 14.7403


 67%|██████▋   | 174/261 [00:33<00:16,  5.23it/s]

Step 12,700, (N samples: 1,625,600), Loss: 21196.1641, (Recon: 21196.1641, KLD: 0.0004), Gradient norm: 15.7267


100%|██████████| 261/261 [00:49<00:00,  5.28it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.81it/s]


====> Test set loss: 21196.1369, (BCE: 21196.1350, KLD: 0.0015)
Epoch 50/100


  5%|▍         | 13/261 [00:02<00:47,  5.20it/s]

Step 12,800, (N samples: 1,638,400), Loss: 21195.8965, (Recon: 21195.8945, KLD: 0.0011), Gradient norm: 14.8858


 43%|████▎     | 113/261 [00:21<00:28,  5.19it/s]

Step 12,900, (N samples: 1,651,200), Loss: 21196.1074, (Recon: 21196.1055, KLD: 0.0020), Gradient norm: 14.7059


 82%|████████▏ | 213/261 [00:40<00:09,  5.31it/s]

Step 13,000, (N samples: 1,664,000), Loss: 21196.0938, (Recon: 21196.0938, KLD: 0.0006), Gradient norm: 15.4542


100%|██████████| 261/261 [00:49<00:00,  5.28it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.94it/s]


====> Test set loss: 21196.1371, (BCE: 21196.1371, KLD: 0.0004)
Epoch 51/100


 20%|█▉        | 52/261 [00:09<00:39,  5.28it/s]

Step 13,100, (N samples: 1,676,800), Loss: 21195.8418, (Recon: 21195.8418, KLD: 0.0003), Gradient norm: 14.3722


 58%|█████▊    | 152/261 [00:28<00:20,  5.20it/s]

Step 13,200, (N samples: 1,689,600), Loss: 21196.1230, (Recon: 21196.1230, KLD: 0.0004), Gradient norm: 14.5198


 97%|█████████▋| 252/261 [00:47<00:01,  5.26it/s]

Step 13,300, (N samples: 1,702,400), Loss: 21196.1465, (Recon: 21196.1465, KLD: 0.0006), Gradient norm: 15.5805


100%|██████████| 261/261 [00:49<00:00,  5.27it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.00it/s]


====> Test set loss: 21196.1350, (BCE: 21196.1350, KLD: 0.0005)
Epoch 52/100


 35%|███▍      | 91/261 [00:17<00:32,  5.31it/s]

Step 13,400, (N samples: 1,715,200), Loss: 21196.0117, (Recon: 21196.0117, KLD: 0.0006), Gradient norm: 14.1496


 73%|███████▎  | 191/261 [00:36<00:13,  5.26it/s]

Step 13,500, (N samples: 1,728,000), Loss: 21196.1504, (Recon: 21196.1504, KLD: 0.0004), Gradient norm: 14.1706


100%|██████████| 261/261 [00:49<00:00,  5.27it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.98it/s]


====> Test set loss: 21196.1738, (BCE: 21196.1738, KLD: 0.0003)
Epoch 53/100


 11%|█▏        | 30/261 [00:05<00:44,  5.19it/s]

Step 13,600, (N samples: 1,740,800), Loss: 21195.9219, (Recon: 21195.9219, KLD: 0.0004), Gradient norm: 14.9125


 50%|████▉     | 130/261 [00:24<00:25,  5.21it/s]

Step 13,700, (N samples: 1,753,600), Loss: 21196.0781, (Recon: 21196.0781, KLD: 0.0005), Gradient norm: 15.3705


 88%|████████▊ | 230/261 [00:43<00:05,  5.30it/s]

Step 13,800, (N samples: 1,766,400), Loss: 21196.0781, (Recon: 21196.0781, KLD: 0.0005), Gradient norm: 14.7102


100%|██████████| 261/261 [00:49<00:00,  5.25it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.96it/s]


====> Test set loss: 21196.1260, (BCE: 21196.1260, KLD: 0.0003)
Epoch 54/100


 26%|██▋       | 69/261 [00:13<00:36,  5.25it/s]

Step 13,900, (N samples: 1,779,200), Loss: 21195.9609, (Recon: 21195.9609, KLD: 0.0003), Gradient norm: 15.2619


 65%|██████▍   | 169/261 [00:32<00:17,  5.20it/s]

Step 14,000, (N samples: 1,792,000), Loss: 21195.9961, (Recon: 21195.9961, KLD: 0.0002), Gradient norm: 14.1679


100%|██████████| 261/261 [00:49<00:00,  5.25it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.96it/s]


====> Test set loss: 21196.1458, (BCE: 21196.1458, KLD: 0.0005)
Epoch 55/100


  3%|▎         | 8/261 [00:01<00:48,  5.19it/s]

Step 14,100, (N samples: 1,804,800), Loss: 21195.7305, (Recon: 21195.7305, KLD: 0.0005), Gradient norm: 14.5450


 41%|████▏     | 108/261 [00:20<00:29,  5.26it/s]

Step 14,200, (N samples: 1,817,600), Loss: 21196.1133, (Recon: 21196.1133, KLD: 0.0003), Gradient norm: 15.7275


 80%|███████▉  | 208/261 [00:39<00:10,  5.25it/s]

Step 14,300, (N samples: 1,830,400), Loss: 21196.0762, (Recon: 21196.0762, KLD: 0.0003), Gradient norm: 16.1217


100%|██████████| 261/261 [00:49<00:00,  5.25it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.98it/s]


====> Test set loss: 21196.1395, (BCE: 21196.1395, KLD: 0.0003)
Epoch 56/100


 18%|█▊        | 47/261 [00:08<00:40,  5.26it/s]

Step 14,400, (N samples: 1,843,200), Loss: 21195.8984, (Recon: 21195.8984, KLD: 0.0004), Gradient norm: 15.3913


 56%|█████▋    | 147/261 [00:27<00:21,  5.23it/s]

Step 14,500, (N samples: 1,856,000), Loss: 21195.9844, (Recon: 21195.9844, KLD: 0.0004), Gradient norm: 15.5957


 95%|█████████▍| 247/261 [00:46<00:02,  5.28it/s]

Step 14,600, (N samples: 1,868,800), Loss: 21196.1055, (Recon: 21196.1055, KLD: 0.0004), Gradient norm: 16.3203


100%|██████████| 261/261 [00:49<00:00,  5.28it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.70it/s]


====> Test set loss: 21196.1282, (BCE: 21196.1282, KLD: 0.0005)
Epoch 57/100


 33%|███▎      | 86/261 [00:16<00:33,  5.22it/s]

Step 14,700, (N samples: 1,881,600), Loss: 21195.9902, (Recon: 21195.9902, KLD: 0.0002), Gradient norm: 16.1399


 71%|███████▏  | 186/261 [00:35<00:14,  5.17it/s]

Step 14,800, (N samples: 1,894,400), Loss: 21196.0781, (Recon: 21196.0781, KLD: 0.0004), Gradient norm: 14.6541


100%|██████████| 261/261 [00:49<00:00,  5.28it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.98it/s]


====> Test set loss: 21196.1371, (BCE: 21196.1371, KLD: 0.0004)
Epoch 58/100


 10%|▉         | 25/261 [00:04<00:45,  5.22it/s]

Step 14,900, (N samples: 1,907,200), Loss: 21196.0000, (Recon: 21196.0000, KLD: 0.0004), Gradient norm: 15.9988


 48%|████▊     | 125/261 [00:23<00:26,  5.07it/s]

Step 15,000, (N samples: 1,920,000), Loss: 21196.2148, (Recon: 21196.2148, KLD: 0.0003), Gradient norm: 17.2229


 86%|████████▌ | 225/261 [00:42<00:06,  5.22it/s]

Step 15,100, (N samples: 1,932,800), Loss: 21196.1289, (Recon: 21196.1289, KLD: 0.0005), Gradient norm: 16.1285


100%|██████████| 261/261 [00:49<00:00,  5.26it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.85it/s]


====> Test set loss: 21196.1642, (BCE: 21196.1642, KLD: 0.0007)
Epoch 59/100


 25%|██▍       | 64/261 [00:12<00:37,  5.24it/s]

Step 15,200, (N samples: 1,945,600), Loss: 21195.9434, (Recon: 21195.9434, KLD: 0.0003), Gradient norm: 14.5479


 63%|██████▎   | 164/261 [00:31<00:18,  5.23it/s]

Step 15,300, (N samples: 1,958,400), Loss: 21196.0742, (Recon: 21196.0742, KLD: 0.0004), Gradient norm: 16.1262


100%|██████████| 261/261 [00:49<00:00,  5.27it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.06it/s]


====> Test set loss: 21196.1582, (BCE: 21196.1582, KLD: 0.0003)
Epoch 60/100


  1%|          | 3/261 [00:00<00:51,  5.03it/s]

Step 15,400, (N samples: 1,971,200), Loss: 21195.9688, (Recon: 21195.9688, KLD: 0.0003), Gradient norm: 17.3639


 39%|███▉      | 103/261 [00:19<00:30,  5.22it/s]

Step 15,500, (N samples: 1,984,000), Loss: 21196.0469, (Recon: 21196.0469, KLD: 0.0005), Gradient norm: 17.3409


 78%|███████▊  | 203/261 [00:38<00:11,  5.10it/s]

Step 15,600, (N samples: 1,996,800), Loss: 21196.1367, (Recon: 21196.1367, KLD: 0.0004), Gradient norm: 15.1001


100%|██████████| 261/261 [00:49<00:00,  5.26it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.92it/s]


====> Test set loss: 21196.1441, (BCE: 21196.1441, KLD: 0.0002)
Epoch 61/100


 16%|█▌        | 42/261 [00:08<00:43,  5.00it/s]

Step 15,700, (N samples: 2,009,600), Loss: 21195.9141, (Recon: 21195.9141, KLD: 0.0007), Gradient norm: 16.9918


 54%|█████▍    | 142/261 [00:27<00:22,  5.20it/s]

Step 15,800, (N samples: 2,022,400), Loss: 21196.0234, (Recon: 21196.0234, KLD: 0.0003), Gradient norm: 16.1925


 93%|█████████▎| 242/261 [00:46<00:03,  5.21it/s]

Step 15,900, (N samples: 2,035,200), Loss: 21196.2148, (Recon: 21196.2148, KLD: 0.0004), Gradient norm: 16.4758


100%|██████████| 261/261 [00:50<00:00,  5.21it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.96it/s]


====> Test set loss: 21196.1390, (BCE: 21196.1390, KLD: 0.0002)
Epoch 62/100


 31%|███       | 81/261 [00:15<00:34,  5.17it/s]

Step 16,000, (N samples: 2,048,000), Loss: 21196.0586, (Recon: 21196.0586, KLD: 0.0003), Gradient norm: 16.1029


 69%|██████▉   | 181/261 [00:34<00:15,  5.25it/s]

Step 16,100, (N samples: 2,060,800), Loss: 21196.1484, (Recon: 21196.1484, KLD: 0.0007), Gradient norm: 16.2547


100%|██████████| 261/261 [00:49<00:00,  5.23it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.99it/s]


====> Test set loss: 21196.1491, (BCE: 21196.1491, KLD: 0.0003)
Epoch 63/100


  8%|▊         | 20/261 [00:03<00:46,  5.22it/s]

Step 16,200, (N samples: 2,073,600), Loss: 21195.8203, (Recon: 21195.8203, KLD: 0.0003), Gradient norm: 15.8700


 46%|████▌     | 120/261 [00:22<00:26,  5.26it/s]

Step 16,300, (N samples: 2,086,400), Loss: 21196.1172, (Recon: 21196.1172, KLD: 0.0003), Gradient norm: 17.0391


 84%|████████▍ | 220/261 [00:41<00:07,  5.26it/s]

Step 16,400, (N samples: 2,099,200), Loss: 21196.1270, (Recon: 21196.1270, KLD: 0.0002), Gradient norm: 16.2940


100%|██████████| 261/261 [00:49<00:00,  5.26it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.07it/s]


====> Test set loss: 21196.1519, (BCE: 21196.1519, KLD: 0.0005)
Epoch 64/100


 23%|██▎       | 59/261 [00:11<00:38,  5.27it/s]

Step 16,500, (N samples: 2,112,000), Loss: 21196.0039, (Recon: 21196.0039, KLD: 0.0004), Gradient norm: 17.1520


 61%|██████    | 159/261 [00:30<00:19,  5.21it/s]

Step 16,600, (N samples: 2,124,800), Loss: 21196.1152, (Recon: 21196.1152, KLD: 0.0003), Gradient norm: 17.3216


 99%|█████████▉| 258/261 [00:49<00:00,  5.20it/s]

Step 16,700, (N samples: 2,137,600), Loss: 21196.0977, (Recon: 21196.0977, KLD: 0.0004), Gradient norm: 17.0857


100%|██████████| 261/261 [00:49<00:00,  5.25it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.91it/s]


====> Test set loss: 21196.1596, (BCE: 21196.1596, KLD: 0.0004)
Epoch 65/100


 38%|███▊      | 98/261 [00:18<00:30,  5.30it/s]

Step 16,800, (N samples: 2,150,400), Loss: 21196.1094, (Recon: 21196.1094, KLD: 0.0002), Gradient norm: 17.6969


 76%|███████▌  | 198/261 [00:37<00:12,  5.19it/s]

Step 16,900, (N samples: 2,163,200), Loss: 21196.1484, (Recon: 21196.1484, KLD: 0.0002), Gradient norm: 17.2363


100%|██████████| 261/261 [00:49<00:00,  5.23it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.01it/s]


====> Test set loss: 21196.1744, (BCE: 21196.1743, KLD: 0.0007)
Epoch 66/100


 14%|█▍        | 37/261 [00:07<00:42,  5.21it/s]

Step 17,000, (N samples: 2,176,000), Loss: 21196.0117, (Recon: 21196.0117, KLD: 0.0003), Gradient norm: 17.4059


 52%|█████▏    | 137/261 [00:26<00:24,  5.05it/s]

Step 17,100, (N samples: 2,188,800), Loss: 21196.1426, (Recon: 21196.1426, KLD: 0.0003), Gradient norm: 17.5443


 91%|█████████ | 237/261 [00:45<00:04,  5.20it/s]

Step 17,200, (N samples: 2,201,600), Loss: 21196.0859, (Recon: 21196.0859, KLD: 0.0005), Gradient norm: 16.2196


100%|██████████| 261/261 [00:50<00:00,  5.21it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.54it/s]


====> Test set loss: 21196.1203, (BCE: 21196.1203, KLD: 0.0004)
Epoch 67/100


 29%|██▉       | 76/261 [00:14<00:35,  5.21it/s]

Step 17,300, (N samples: 2,214,400), Loss: 21195.9219, (Recon: 21195.9219, KLD: 0.0007), Gradient norm: 17.9396


 67%|██████▋   | 176/261 [00:33<00:16,  5.26it/s]

Step 17,400, (N samples: 2,227,200), Loss: 21196.1191, (Recon: 21196.1191, KLD: 0.0004), Gradient norm: 16.2277


100%|██████████| 261/261 [00:49<00:00,  5.23it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.83it/s]


====> Test set loss: 21196.1542, (BCE: 21196.1542, KLD: 0.0007)
Epoch 68/100


  6%|▌         | 15/261 [00:02<00:47,  5.20it/s]

Step 17,500, (N samples: 2,240,000), Loss: 21195.9180, (Recon: 21195.9180, KLD: 0.0006), Gradient norm: 16.9421


 44%|████▍     | 115/261 [00:21<00:27,  5.22it/s]

Step 17,600, (N samples: 2,252,800), Loss: 21196.2383, (Recon: 21196.2383, KLD: 0.0003), Gradient norm: 18.9152


 82%|████████▏ | 215/261 [00:40<00:08,  5.24it/s]

Step 17,700, (N samples: 2,265,600), Loss: 21196.2383, (Recon: 21196.2383, KLD: 0.0002), Gradient norm: 17.9228


100%|██████████| 261/261 [00:49<00:00,  5.28it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.08it/s]


====> Test set loss: 21196.1523, (BCE: 21196.1523, KLD: 0.0002)
Epoch 69/100


 21%|██        | 54/261 [00:10<00:39,  5.22it/s]

Step 17,800, (N samples: 2,278,400), Loss: 21195.9609, (Recon: 21195.9609, KLD: 0.0005), Gradient norm: 17.6716


 59%|█████▉    | 154/261 [00:29<00:20,  5.22it/s]

Step 17,900, (N samples: 2,291,200), Loss: 21196.1797, (Recon: 21196.1797, KLD: 0.0002), Gradient norm: 17.2766


 97%|█████████▋| 254/261 [00:48<00:01,  5.30it/s]

Step 18,000, (N samples: 2,304,000), Loss: 21196.1250, (Recon: 21196.1250, KLD: 0.0002), Gradient norm: 17.3606


100%|██████████| 261/261 [00:49<00:00,  5.25it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.96it/s]


====> Test set loss: 21196.1670, (BCE: 21196.1670, KLD: 0.0004)
Epoch 70/100


 36%|███▌      | 93/261 [00:17<00:32,  5.21it/s]

Step 18,100, (N samples: 2,316,800), Loss: 21196.0586, (Recon: 21196.0586, KLD: 0.0002), Gradient norm: 16.9990


 74%|███████▍  | 193/261 [00:36<00:13,  5.14it/s]

Step 18,200, (N samples: 2,329,600), Loss: 21196.1387, (Recon: 21196.1387, KLD: 0.0003), Gradient norm: 17.4962


100%|██████████| 261/261 [00:49<00:00,  5.27it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.98it/s]


====> Test set loss: 21196.1827, (BCE: 21196.1827, KLD: 0.0003)
Epoch 71/100


 12%|█▏        | 32/261 [00:06<00:43,  5.22it/s]

Step 18,300, (N samples: 2,342,400), Loss: 21196.0117, (Recon: 21196.0117, KLD: 0.0003), Gradient norm: 17.5932


 51%|█████     | 132/261 [00:25<00:24,  5.31it/s]

Step 18,400, (N samples: 2,355,200), Loss: 21196.0156, (Recon: 21196.0156, KLD: 0.0003), Gradient norm: 16.5831


 89%|████████▉ | 232/261 [00:44<00:05,  5.08it/s]

Step 18,500, (N samples: 2,368,000), Loss: 21196.1523, (Recon: 21196.1523, KLD: 0.0008), Gradient norm: 16.6996


100%|██████████| 261/261 [00:49<00:00,  5.24it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.94it/s]


====> Test set loss: 21196.1923, (BCE: 21196.1816, KLD: 0.0109)
Epoch 72/100


 27%|██▋       | 71/261 [00:13<00:36,  5.21it/s]

Step 18,600, (N samples: 2,380,800), Loss: 21195.9551, (Recon: 21195.9492, KLD: 0.0067), Gradient norm: 16.7287


 66%|██████▌   | 171/261 [00:32<00:17,  5.28it/s]

Step 18,700, (N samples: 2,393,600), Loss: 21196.0508, (Recon: 21196.0430, KLD: 0.0073), Gradient norm: 17.2777


100%|██████████| 261/261 [00:49<00:00,  5.25it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.94it/s]


====> Test set loss: 21196.2044, (BCE: 21196.2019, KLD: 0.0026)
Epoch 73/100


  4%|▍         | 10/261 [00:02<00:48,  5.21it/s]

Step 18,800, (N samples: 2,406,400), Loss: 21195.9551, (Recon: 21195.9492, KLD: 0.0065), Gradient norm: 18.2090


 42%|████▏     | 110/261 [00:20<00:29,  5.17it/s]

Step 18,900, (N samples: 2,419,200), Loss: 21196.1016, (Recon: 21196.0957, KLD: 0.0067), Gradient norm: 17.7642


 80%|████████  | 210/261 [00:40<00:09,  5.26it/s]

Step 19,000, (N samples: 2,432,000), Loss: 21196.1836, (Recon: 21196.1836, KLD: 0.0006), Gradient norm: 18.4619


100%|██████████| 261/261 [00:49<00:00,  5.25it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.96it/s]


====> Test set loss: 21196.1370, (BCE: 21196.1350, KLD: 0.0020)
Epoch 74/100


 19%|█▉        | 49/261 [00:09<00:40,  5.22it/s]

Step 19,100, (N samples: 2,444,800), Loss: 21196.0625, (Recon: 21196.0625, KLD: 0.0010), Gradient norm: 18.1352


 57%|█████▋    | 149/261 [00:28<00:21,  5.28it/s]

Step 19,200, (N samples: 2,457,600), Loss: 21196.1895, (Recon: 21196.1855, KLD: 0.0033), Gradient norm: 19.2878


 95%|█████████▌| 249/261 [00:47<00:02,  5.24it/s]

Step 19,300, (N samples: 2,470,400), Loss: 21196.1406, (Recon: 21196.1406, KLD: 0.0006), Gradient norm: 18.1847


100%|██████████| 261/261 [00:49<00:00,  5.26it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.89it/s]


====> Test set loss: 21196.1373, (BCE: 21196.1373, KLD: 0.0007)
Epoch 75/100


 34%|███▎      | 88/261 [00:16<00:32,  5.27it/s]

Step 19,400, (N samples: 2,483,200), Loss: 21196.0312, (Recon: 21196.0293, KLD: 0.0013), Gradient norm: 18.2904


 72%|███████▏  | 188/261 [00:35<00:13,  5.25it/s]

Step 19,500, (N samples: 2,496,000), Loss: 21196.0449, (Recon: 21196.0430, KLD: 0.0012), Gradient norm: 16.7288


100%|██████████| 261/261 [00:49<00:00,  5.23it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.99it/s]


====> Test set loss: 21196.1550, (BCE: 21196.1531, KLD: 0.0024)
Epoch 76/100


 10%|█         | 27/261 [00:05<00:45,  5.20it/s]

Step 19,600, (N samples: 2,508,800), Loss: 21195.9570, (Recon: 21195.9570, KLD: 0.0006), Gradient norm: 17.7120


 49%|████▊     | 127/261 [00:24<00:25,  5.22it/s]

Step 19,700, (N samples: 2,521,600), Loss: 21196.1699, (Recon: 21196.1699, KLD: 0.0008), Gradient norm: 19.0114


 87%|████████▋ | 227/261 [00:43<00:06,  5.29it/s]

Step 19,800, (N samples: 2,534,400), Loss: 21196.1348, (Recon: 21196.1348, KLD: 0.0010), Gradient norm: 18.2009


100%|██████████| 261/261 [00:49<00:00,  5.26it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.93it/s]


====> Test set loss: 21196.1892, (BCE: 21196.1892, KLD: 0.0007)
Epoch 77/100


 25%|██▌       | 66/261 [00:12<00:37,  5.18it/s]

Step 19,900, (N samples: 2,547,200), Loss: 21196.1172, (Recon: 21196.1172, KLD: 0.0007), Gradient norm: 19.0546


 64%|██████▎   | 166/261 [00:31<00:18,  5.26it/s]

Step 20,000, (N samples: 2,560,000), Loss: 21196.0527, (Recon: 21196.0527, KLD: 0.0007), Gradient norm: 18.3772


100%|██████████| 261/261 [00:49<00:00,  5.25it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.02it/s]


====> Test set loss: 21196.1654, (BCE: 21196.1654, KLD: 0.0003)
Epoch 78/100


  2%|▏         | 5/261 [00:01<00:52,  4.92it/s]

Step 20,100, (N samples: 2,572,800), Loss: 21195.9297, (Recon: 21195.9297, KLD: 0.0004), Gradient norm: 17.5008


 40%|████      | 105/261 [00:20<00:30,  5.15it/s]

Step 20,200, (N samples: 2,585,600), Loss: 21196.2070, (Recon: 21196.2070, KLD: 0.0003), Gradient norm: 18.4357


 79%|███████▊  | 205/261 [00:39<00:10,  5.24it/s]

Step 20,300, (N samples: 2,598,400), Loss: 21196.1836, (Recon: 21196.1836, KLD: 0.0003), Gradient norm: 18.2722


100%|██████████| 261/261 [00:49<00:00,  5.24it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.66it/s]


====> Test set loss: 21196.1953, (BCE: 21196.1953, KLD: 0.0003)
Epoch 79/100


 17%|█▋        | 44/261 [00:08<00:41,  5.19it/s]

Step 20,400, (N samples: 2,611,200), Loss: 21196.0938, (Recon: 21196.0938, KLD: 0.0006), Gradient norm: 18.5000


 55%|█████▌    | 144/261 [00:27<00:22,  5.21it/s]

Step 20,500, (N samples: 2,624,000), Loss: 21196.1738, (Recon: 21196.1738, KLD: 0.0006), Gradient norm: 18.8424


 93%|█████████▎| 244/261 [00:46<00:03,  5.08it/s]

Step 20,600, (N samples: 2,636,800), Loss: 21196.1797, (Recon: 21196.1797, KLD: 0.0002), Gradient norm: 18.8308


100%|██████████| 261/261 [00:49<00:00,  5.23it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.91it/s]


====> Test set loss: 21196.1779, (BCE: 21196.1779, KLD: 0.0001)
Epoch 80/100


 32%|███▏      | 83/261 [00:15<00:34,  5.21it/s]

Step 20,700, (N samples: 2,649,600), Loss: 21196.1328, (Recon: 21196.1328, KLD: 0.0002), Gradient norm: 18.8325


 70%|███████   | 183/261 [00:35<00:15,  5.20it/s]

Step 20,800, (N samples: 2,662,400), Loss: 21196.0723, (Recon: 21196.0723, KLD: 0.0003), Gradient norm: 18.2050


100%|██████████| 261/261 [00:50<00:00,  5.19it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.07it/s]


====> Test set loss: 21196.1493, (BCE: 21196.1493, KLD: 0.0003)
Epoch 81/100


  8%|▊         | 22/261 [00:04<00:45,  5.24it/s]

Step 20,900, (N samples: 2,675,200), Loss: 21195.8984, (Recon: 21195.8984, KLD: 0.0003), Gradient norm: 18.8613


 47%|████▋     | 122/261 [00:23<00:26,  5.27it/s]

Step 21,000, (N samples: 2,688,000), Loss: 21196.0215, (Recon: 21196.0195, KLD: 0.0024), Gradient norm: 17.4422


 85%|████████▌ | 222/261 [00:42<00:07,  5.22it/s]

Step 21,100, (N samples: 2,700,800), Loss: 21196.1797, (Recon: 21196.1797, KLD: 0.0002), Gradient norm: 19.5936


100%|██████████| 261/261 [00:49<00:00,  5.24it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.10it/s]


====> Test set loss: 21196.2039, (BCE: 21196.2039, KLD: 0.0003)
Epoch 82/100


 23%|██▎       | 61/261 [00:11<00:39,  5.07it/s]

Step 21,200, (N samples: 2,713,600), Loss: 21196.0234, (Recon: 21196.0234, KLD: 0.0003), Gradient norm: 18.2795


 62%|██████▏   | 161/261 [00:30<00:18,  5.28it/s]

Step 21,300, (N samples: 2,726,400), Loss: 21196.1250, (Recon: 21196.1250, KLD: 0.0003), Gradient norm: 17.2642


100%|██████████| 261/261 [00:49<00:00,  5.26it/s]


Step 21,400, (N samples: 2,739,200), Loss: 21196.1602, (Recon: 21196.1602, KLD: 0.0003), Gradient norm: 18.8528


Testing: 100%|██████████| 29/29 [00:03<00:00,  7.91it/s]


====> Test set loss: 21196.1881, (BCE: 21196.1881, KLD: 0.0003)
Epoch 83/100


 38%|███▊      | 100/261 [00:19<00:30,  5.26it/s]

Step 21,500, (N samples: 2,752,000), Loss: 21196.1465, (Recon: 21196.1426, KLD: 0.0031), Gradient norm: 19.2564


 77%|███████▋  | 200/261 [00:38<00:11,  5.28it/s]

Step 21,600, (N samples: 2,764,800), Loss: 21196.0840, (Recon: 21196.0840, KLD: 0.0004), Gradient norm: 18.7330


100%|██████████| 261/261 [00:49<00:00,  5.28it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.95it/s]


====> Test set loss: 21196.1643, (BCE: 21196.1643, KLD: 0.0002)
Epoch 84/100


 15%|█▍        | 39/261 [00:07<00:42,  5.26it/s]

Step 21,700, (N samples: 2,777,600), Loss: 21196.0059, (Recon: 21196.0059, KLD: 0.0003), Gradient norm: 18.2513


 53%|█████▎    | 139/261 [00:26<00:23,  5.09it/s]

Step 21,800, (N samples: 2,790,400), Loss: 21196.0527, (Recon: 21196.0527, KLD: 0.0001), Gradient norm: 17.4276


 92%|█████████▏| 239/261 [00:45<00:04,  5.24it/s]

Step 21,900, (N samples: 2,803,200), Loss: 21196.0938, (Recon: 21196.0938, KLD: 0.0004), Gradient norm: 18.9974


100%|██████████| 261/261 [00:49<00:00,  5.26it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.04it/s]


====> Test set loss: 21196.1819, (BCE: 21196.1819, KLD: 0.0002)
Epoch 85/100


 30%|██▉       | 78/261 [00:14<00:35,  5.23it/s]

Step 22,000, (N samples: 2,816,000), Loss: 21196.0703, (Recon: 21196.0703, KLD: 0.0002), Gradient norm: 19.1083


 68%|██████▊   | 178/261 [00:33<00:15,  5.21it/s]

Step 22,100, (N samples: 2,828,800), Loss: 21196.2949, (Recon: 21196.2949, KLD: 0.0002), Gradient norm: 18.3481


100%|██████████| 261/261 [00:49<00:00,  5.25it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.77it/s]


====> Test set loss: 21196.1950, (BCE: 21196.1950, KLD: 0.0003)
Epoch 86/100


  7%|▋         | 17/261 [00:03<00:46,  5.25it/s]

Step 22,200, (N samples: 2,841,600), Loss: 21195.9375, (Recon: 21195.9375, KLD: 0.0003), Gradient norm: 18.5593


 45%|████▍     | 117/261 [00:22<00:27,  5.24it/s]

Step 22,300, (N samples: 2,854,400), Loss: 21196.0898, (Recon: 21196.0898, KLD: 0.0002), Gradient norm: 19.5190


 83%|████████▎ | 217/261 [00:41<00:08,  5.20it/s]

Step 22,400, (N samples: 2,867,200), Loss: 21196.1602, (Recon: 21196.1602, KLD: 0.0002), Gradient norm: 17.9095


100%|██████████| 261/261 [00:49<00:00,  5.28it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.13it/s]


====> Test set loss: 21196.1759, (BCE: 21196.1759, KLD: 0.0003)
Epoch 87/100


 21%|██▏       | 56/261 [00:10<00:39,  5.21it/s]

Step 22,500, (N samples: 2,880,000), Loss: 21196.0898, (Recon: 21196.0898, KLD: 0.0005), Gradient norm: 19.2708


 60%|█████▉    | 156/261 [00:29<00:20,  5.20it/s]

Step 22,600, (N samples: 2,892,800), Loss: 21196.0703, (Recon: 21196.0703, KLD: 0.0003), Gradient norm: 17.7748


 98%|█████████▊| 256/261 [00:48<00:00,  5.25it/s]

Step 22,700, (N samples: 2,905,600), Loss: 21196.1641, (Recon: 21196.1641, KLD: 0.0003), Gradient norm: 18.6374


100%|██████████| 261/261 [00:49<00:00,  5.23it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.88it/s]


====> Test set loss: 21196.1890, (BCE: 21196.1890, KLD: 0.0003)
Epoch 88/100


 36%|███▋      | 95/261 [00:18<00:31,  5.28it/s]

Step 22,800, (N samples: 2,918,400), Loss: 21196.1680, (Recon: 21196.1680, KLD: 0.0003), Gradient norm: 18.4575


 75%|███████▍  | 195/261 [00:37<00:12,  5.21it/s]

Step 22,900, (N samples: 2,931,200), Loss: 21196.2812, (Recon: 21196.2812, KLD: 0.0004), Gradient norm: 20.1705


100%|██████████| 261/261 [00:49<00:00,  5.27it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  8.07it/s]


====> Test set loss: 21196.1750, (BCE: 21196.1750, KLD: 0.0002)
Epoch 89/100


 13%|█▎        | 34/261 [00:06<00:43,  5.16it/s]

Step 23,000, (N samples: 2,944,000), Loss: 21195.9727, (Recon: 21195.9727, KLD: 0.0002), Gradient norm: 19.0862


 51%|█████▏    | 134/261 [00:25<00:24,  5.16it/s]

Step 23,100, (N samples: 2,956,800), Loss: 21196.1816, (Recon: 21196.1816, KLD: 0.0003), Gradient norm: 18.5551


 90%|████████▉ | 234/261 [00:44<00:05,  5.20it/s]

Step 23,200, (N samples: 2,969,600), Loss: 21196.2539, (Recon: 21196.2539, KLD: 0.0002), Gradient norm: 18.5842


100%|██████████| 261/261 [00:49<00:00,  5.24it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.94it/s]


====> Test set loss: 21196.1701, (BCE: 21196.1699, KLD: 0.0002)
Epoch 90/100


 28%|██▊       | 73/261 [00:14<00:36,  5.19it/s]

Step 23,300, (N samples: 2,982,400), Loss: 21195.9766, (Recon: 21195.9746, KLD: 0.0012), Gradient norm: 19.0983


 66%|██████▋   | 173/261 [00:33<00:16,  5.18it/s]

Step 23,400, (N samples: 2,995,200), Loss: 21196.1465, (Recon: 21196.1445, KLD: 0.0019), Gradient norm: 19.2525


100%|██████████| 261/261 [00:49<00:00,  5.23it/s]
Testing: 100%|██████████| 29/29 [00:03<00:00,  7.93it/s]


====> Test set loss: 21196.1764, (BCE: 21196.1750, KLD: 0.0012)
Epoch 91/100


  5%|▍         | 12/261 [00:02<00:47,  5.22it/s]

Step 23,500, (N samples: 3,008,000), Loss: 21195.9023, (Recon: 21195.8984, KLD: 0.0031), Gradient norm: 18.2855


 43%|████▎     | 112/261 [00:21<00:28,  5.27it/s]

Step 23,600, (N samples: 3,020,800), Loss: 21196.1816, (Recon: 21196.1816, KLD: 0.0001), Gradient norm: 20.8328


 81%|████████  | 212/261 [00:40<00:09,  5.22it/s]

Step 23,700, (N samples: 3,033,600), Loss: 21196.2305, (Recon: 21196.2305, KLD: 0.0001), Gradient norm: 19.7289


 97%|█████████▋| 252/261 [00:48<00:01,  5.21it/s]

In [12]:
def pearson_correlation(original_x, x_hat, mask=None):
    
    # Calculate mean and standard deviation
    mean_x = torch.mean(original_x)
    mean_x_hat = torch.mean(x_hat)
    
    std_x = torch.std(original_x)
    std_x_hat = torch.std(x_hat)
    
    # Calculate covariance
    covariance = torch.mean((original_x - mean_x) * (x_hat - mean_x_hat))
    
    # Calculate Pearson correlation
    correlation = covariance / (std_x * std_x_hat)
    
    return correlation.item()


correlations = []

# Disable gradient calculation for efficiency
with torch.no_grad():
    for batch in train_loader:
        # Unpack the batch to get the input data; adjust if there are labels
        original_x, _ = batch  # Adjust this if your batch contains more items
        original_x = original_x.to(device)  # Move input to device if using GPU

        # Get the model output for the entire batch
        out = model(original_x)
        x_hat_batch = out.x_recon

        # Compute Pearson correlation for each element in the batch
        for i in range(original_x.size(0)):  # Loop over each sample in the batch
            original_sample = original_x[i]  # Select the i-th sample
            x_hat_sample = x_hat_batch[i]    # Select the corresponding output sample

            # Compute Pearson correlation for the sample
            correlation = pearson_correlation(original_sample, x_hat_sample)
            correlations.append(correlation)
            # print(f"Sample {i} Pearson Correlation:", correlation)

# Optionally, compute average correlation across all elements
average_correlation = sum(correlations) / len(correlations)
print("Average Pearson Correlation across all samples:", average_correlation)


Average Pearson Correlation across all samples: 0.003047886254953311
