In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

class VAE(nn.Module):
    def __init__(self, input_dim=46, latent_dim=16):
        super(VAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 32),
            nn.ReLU(),
            nn.Linear(32, 24),
            nn.ReLU(),
            nn.Linear(24, latent_dim),
            nn.ReLU(),
        )
        
        self.mu = nn.Linear(16, latent_dim)
        self.log_var = nn.Linear(16, latent_dim)
        
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 16),
            nn.ReLU(),
            nn.Linear(16, 24),
            nn.ReLU(),
            nn.Linear(24, 32),
            nn.ReLU(),
            nn.Linear(32, input_dim),
            nn.Sigmoid()  # Use Sigmoid if input values are normalized to [0, 1]
        )
        
    def encode(self, x):
        x = self.encoder(x)
        mu = self.mu(x)
        log_var = self.log_var(x)
        return mu, log_var

    # reparameterize trick
    def reparameterize(self, mu, log_var):
        std = torch.exp(0.5 * log_var)
        eps = torch.randn_like(std)
        return mu + eps * std
    
    def decode(self, z):
        return self.decoder(z)
    
    def forward(self, x):
        mu, log_var = self.encode(x)
        z = self.reparameterize(mu, log_var)
        reconstructed = self.decode(z)
        return reconstructed, mu, log_var

def loss_function(reconstructed, original, mu, log_var):
    reconstruction_loss = nn.MSELoss()(reconstructed, original)
    # KL Divergence loss
    kl_loss = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())
    return reconstruction_loss + kl_loss




In [2]:
from tqdm import tqdm
# training loop
def train_vae(model, dataloader, device, epochs=20,  learning_rate=1e-3):
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    model.train()
    
    for epoch in range(epochs):
        total_loss = 0
        for batch_idx, (data_entries_tensors) in tqdm(enumerate(dataloader), desc=f"Epoch {epoch+1}/{epochs}"):
            optimizer.zero_grad()
            reconstructed, mu, log_var = model(data_entries_tensors)
            loss = loss_function(reconstructed, data_entries_tensors, mu, log_var)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        print(f"Epoch {epoch + 1}, Loss: {total_loss / len(dataloader)}")


In [3]:
import pandas as pd
import torch
from torch.utils.data import Dataset

class CasasVAEDataset(Dataset):
    def __init__(self, parquet_file):
        # Load the DataFrame from the Parquet file
        self.df = pd.read_parquet(parquet_file).astype(float)

    def __len__(self):
        # Return the total number of samples
        return len(self.df)

    def __getitem__(self, idx):
        # Get the row at the specified index
        row = self.df.iloc[idx]
        sequence = torch.tensor(row, dtype=torch.float32)
        return sequence



In [4]:
###### import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split

# Instantiate and train
vae = VAE(input_dim=53, latent_dim=16)

# Data loaders
parquet_file = './data/vae/combined_sampled_df.parquet'
dataset = CasasVAEDataset(parquet_file)
train_ratio = 0.8
eval_ratio = 0.2

# Wrap them in DataLoaders
train_dataset, eval_dataset = random_split(dataset, [0.8, 0.2])
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
eval_loader = DataLoader(eval_dataset, batch_size=64, shuffle=False)
train_vae(vae, train_loader, device, epochs=50)

  sequence = torch.tensor(row, dtype=torch.float32)
Epoch 1/50: 21it [00:00, 101.43it/s]

tensor(110.2939, grad_fn=<MseLossBackward0>)
tensor(68.4847, grad_fn=<MulBackward0>)
tensor(104.7356, grad_fn=<MseLossBackward0>)
tensor(42.0248, grad_fn=<MulBackward0>)
tensor(100.1768, grad_fn=<MseLossBackward0>)
tensor(26.7463, grad_fn=<MulBackward0>)
tensor(109.2446, grad_fn=<MseLossBackward0>)
tensor(22.3226, grad_fn=<MulBackward0>)
tensor(103.8855, grad_fn=<MseLossBackward0>)
tensor(16.4739, grad_fn=<MulBackward0>)
tensor(107.6348, grad_fn=<MseLossBackward0>)
tensor(13.0607, grad_fn=<MulBackward0>)
tensor(113.3737, grad_fn=<MseLossBackward0>)
tensor(12.2381, grad_fn=<MulBackward0>)
tensor(158.2360, grad_fn=<MseLossBackward0>)
tensor(30.8448, grad_fn=<MulBackward0>)
tensor(111.6309, grad_fn=<MseLossBackward0>)
tensor(9.7245, grad_fn=<MulBackward0>)
tensor(101.9161, grad_fn=<MseLossBackward0>)
tensor(8.9782, grad_fn=<MulBackward0>)
tensor(102.6994, grad_fn=<MseLossBackward0>)
tensor(8.2571, grad_fn=<MulBackward0>)
tensor(110.4846, grad_fn=<MseLossBackward0>)
tensor(8.0274, grad_fn=

Epoch 1/50: 44it [00:00, 106.91it/s]

tensor(102.5788, grad_fn=<MseLossBackward0>)
tensor(6.1496, grad_fn=<MulBackward0>)
tensor(106.9641, grad_fn=<MseLossBackward0>)
tensor(5.9037, grad_fn=<MulBackward0>)
tensor(108.6403, grad_fn=<MseLossBackward0>)
tensor(5.8231, grad_fn=<MulBackward0>)
tensor(103.7907, grad_fn=<MseLossBackward0>)
tensor(5.6469, grad_fn=<MulBackward0>)
tensor(110.7134, grad_fn=<MseLossBackward0>)
tensor(5.7015, grad_fn=<MulBackward0>)
tensor(101.1598, grad_fn=<MseLossBackward0>)
tensor(5.3418, grad_fn=<MulBackward0>)
tensor(177.4348, grad_fn=<MseLossBackward0>)
tensor(5.7410, grad_fn=<MulBackward0>)
tensor(108.8206, grad_fn=<MseLossBackward0>)
tensor(5.3285, grad_fn=<MulBackward0>)
tensor(123.1406, grad_fn=<MseLossBackward0>)
tensor(5.1852, grad_fn=<MulBackward0>)
tensor(111.4211, grad_fn=<MseLossBackward0>)
tensor(5.0719, grad_fn=<MulBackward0>)
tensor(264.7565, grad_fn=<MseLossBackward0>)
tensor(5.2753, grad_fn=<MulBackward0>)
tensor(105.3523, grad_fn=<MseLossBackward0>)
tensor(4.8129, grad_fn=<MulBack

Epoch 1/50: 60it [00:00, 104.14it/s]


tensor(105.3411, grad_fn=<MseLossBackward0>)
tensor(3.3649, grad_fn=<MulBackward0>)
tensor(103.6637, grad_fn=<MseLossBackward0>)
tensor(3.4664, grad_fn=<MulBackward0>)
tensor(110.9106, grad_fn=<MseLossBackward0>)
tensor(3.4058, grad_fn=<MulBackward0>)
tensor(110.8233, grad_fn=<MseLossBackward0>)
tensor(3.2241, grad_fn=<MulBackward0>)
tensor(102.4326, grad_fn=<MseLossBackward0>)
tensor(3.0973, grad_fn=<MulBackward0>)
tensor(102.1194, grad_fn=<MseLossBackward0>)
tensor(2.9505, grad_fn=<MulBackward0>)
tensor(417.5230, grad_fn=<MseLossBackward0>)
tensor(3.0241, grad_fn=<MulBackward0>)
tensor(105.4875, grad_fn=<MseLossBackward0>)
tensor(2.8348, grad_fn=<MulBackward0>)
tensor(109.4281, grad_fn=<MseLossBackward0>)
tensor(2.7650, grad_fn=<MulBackward0>)
tensor(108.9666, grad_fn=<MseLossBackward0>)
tensor(2.5743, grad_fn=<MulBackward0>)
tensor(105.2532, grad_fn=<MseLossBackward0>)
tensor(2.6196, grad_fn=<MulBackward0>)
tensor(99.0712, grad_fn=<MseLossBackward0>)
tensor(2.4320, grad_fn=<MulBackw

Epoch 2/50: 0it [00:00, ?it/s]

tensor(109.5041, grad_fn=<MseLossBackward0>)
tensor(2.2471, grad_fn=<MulBackward0>)
tensor(134.4711, grad_fn=<MseLossBackward0>)
tensor(2.2844, grad_fn=<MulBackward0>)
tensor(120.2353, grad_fn=<MseLossBackward0>)
tensor(2.1404, grad_fn=<MulBackward0>)
tensor(107.0847, grad_fn=<MseLossBackward0>)
tensor(2.0240, grad_fn=<MulBackward0>)
tensor(101.0943, grad_fn=<MseLossBackward0>)
tensor(2.0221, grad_fn=<MulBackward0>)
tensor(107.9279, grad_fn=<MseLossBackward0>)
tensor(1.9166, grad_fn=<MulBackward0>)
tensor(105.9431, grad_fn=<MseLossBackward0>)
tensor(1.8238, grad_fn=<MulBackward0>)


Epoch 2/50: 11it [00:00, 104.11it/s]

tensor(99.2418, grad_fn=<MseLossBackward0>)
tensor(1.8809, grad_fn=<MulBackward0>)
tensor(101.6832, grad_fn=<MseLossBackward0>)
tensor(1.9284, grad_fn=<MulBackward0>)
tensor(100.9931, grad_fn=<MseLossBackward0>)
tensor(1.7038, grad_fn=<MulBackward0>)
tensor(120.7282, grad_fn=<MseLossBackward0>)
tensor(1.6837, grad_fn=<MulBackward0>)
tensor(152.7138, grad_fn=<MseLossBackward0>)
tensor(1.8016, grad_fn=<MulBackward0>)
tensor(259.0256, grad_fn=<MseLossBackward0>)
tensor(1.6993, grad_fn=<MulBackward0>)
tensor(116.4335, grad_fn=<MseLossBackward0>)
tensor(1.5245, grad_fn=<MulBackward0>)
tensor(1127.3370, grad_fn=<MseLossBackward0>)
tensor(1.6546, grad_fn=<MulBackward0>)
tensor(108.9050, grad_fn=<MseLossBackward0>)
tensor(1.6208, grad_fn=<MulBackward0>)
tensor(107.2848, grad_fn=<MseLossBackward0>)
tensor(1.5284, grad_fn=<MulBackward0>)
tensor(107.7592, grad_fn=<MseLossBackward0>)
tensor(1.2943, grad_fn=<MulBackward0>)
tensor(106.5386, grad_fn=<MseLossBackward0>)
tensor(1.3318, grad_fn=<MulBack

Epoch 2/50: 23it [00:00, 108.75it/s]

tensor(95.9318, grad_fn=<MseLossBackward0>)
tensor(1.0773, grad_fn=<MulBackward0>)
tensor(105.9696, grad_fn=<MseLossBackward0>)
tensor(1.1028, grad_fn=<MulBackward0>)
tensor(99.4398, grad_fn=<MseLossBackward0>)
tensor(1.1180, grad_fn=<MulBackward0>)
tensor(102.6987, grad_fn=<MseLossBackward0>)
tensor(0.9767, grad_fn=<MulBackward0>)
tensor(111.6461, grad_fn=<MseLossBackward0>)
tensor(1.0293, grad_fn=<MulBackward0>)
tensor(108.6842, grad_fn=<MseLossBackward0>)
tensor(1.0168, grad_fn=<MulBackward0>)


Epoch 2/50: 34it [00:00, 108.07it/s]

tensor(102.5139, grad_fn=<MseLossBackward0>)
tensor(0.9703, grad_fn=<MulBackward0>)
tensor(103.2789, grad_fn=<MseLossBackward0>)
tensor(0.9090, grad_fn=<MulBackward0>)
tensor(103.9121, grad_fn=<MseLossBackward0>)
tensor(0.9925, grad_fn=<MulBackward0>)
tensor(97.8621, grad_fn=<MseLossBackward0>)
tensor(0.8889, grad_fn=<MulBackward0>)
tensor(98.3540, grad_fn=<MseLossBackward0>)
tensor(0.8638, grad_fn=<MulBackward0>)
tensor(95.4453, grad_fn=<MseLossBackward0>)
tensor(0.8129, grad_fn=<MulBackward0>)
tensor(101.8479, grad_fn=<MseLossBackward0>)
tensor(0.8894, grad_fn=<MulBackward0>)
tensor(102.4314, grad_fn=<MseLossBackward0>)
tensor(0.8019, grad_fn=<MulBackward0>)
tensor(106.9769, grad_fn=<MseLossBackward0>)
tensor(0.7919, grad_fn=<MulBackward0>)
tensor(102.0011, grad_fn=<MseLossBackward0>)
tensor(0.7492, grad_fn=<MulBackward0>)
tensor(109.1520, grad_fn=<MseLossBackward0>)
tensor(0.7824, grad_fn=<MulBackward0>)
tensor(181.5715, grad_fn=<MseLossBackward0>)
tensor(0.7768, grad_fn=<MulBackwar

Epoch 2/50: 45it [00:00, 103.97it/s]

tensor(0.6361, grad_fn=<MulBackward0>)
tensor(105.4411, grad_fn=<MseLossBackward0>)
tensor(0.6176, grad_fn=<MulBackward0>)
tensor(103.2786, grad_fn=<MseLossBackward0>)
tensor(0.6424, grad_fn=<MulBackward0>)
tensor(97.0852, grad_fn=<MseLossBackward0>)
tensor(0.5690, grad_fn=<MulBackward0>)
tensor(101.7501, grad_fn=<MseLossBackward0>)
tensor(0.5453, grad_fn=<MulBackward0>)
tensor(500.8199, grad_fn=<MseLossBackward0>)
tensor(0.6524, grad_fn=<MulBackward0>)


Epoch 2/50: 60it [00:00, 103.74it/s]


tensor(99.0605, grad_fn=<MseLossBackward0>)
tensor(0.5914, grad_fn=<MulBackward0>)
tensor(104.9125, grad_fn=<MseLossBackward0>)
tensor(0.5461, grad_fn=<MulBackward0>)
tensor(103.0190, grad_fn=<MseLossBackward0>)
tensor(0.5236, grad_fn=<MulBackward0>)
tensor(312.4845, grad_fn=<MseLossBackward0>)
tensor(0.5595, grad_fn=<MulBackward0>)
tensor(104.6017, grad_fn=<MseLossBackward0>)
tensor(0.4607, grad_fn=<MulBackward0>)
tensor(108.7681, grad_fn=<MseLossBackward0>)
tensor(0.6619, grad_fn=<MulBackward0>)
tensor(106.6504, grad_fn=<MseLossBackward0>)
tensor(0.4792, grad_fn=<MulBackward0>)
tensor(96.9089, grad_fn=<MseLossBackward0>)
tensor(0.4497, grad_fn=<MulBackward0>)
tensor(382.6190, grad_fn=<MseLossBackward0>)
tensor(0.5899, grad_fn=<MulBackward0>)
tensor(100.2576, grad_fn=<MseLossBackward0>)
tensor(0.4083, grad_fn=<MulBackward0>)
Epoch 2, Loss: 143.70475578308105


Epoch 3/50: 0it [00:00, ?it/s]

tensor(101.6539, grad_fn=<MseLossBackward0>)
tensor(0.3838, grad_fn=<MulBackward0>)
tensor(99.4996, grad_fn=<MseLossBackward0>)
tensor(0.3846, grad_fn=<MulBackward0>)
tensor(505.1465, grad_fn=<MseLossBackward0>)
tensor(0.4799, grad_fn=<MulBackward0>)
tensor(103.2698, grad_fn=<MseLossBackward0>)
tensor(0.3742, grad_fn=<MulBackward0>)
tensor(152.3688, grad_fn=<MseLossBackward0>)
tensor(0.4698, grad_fn=<MulBackward0>)


Epoch 3/50: 11it [00:00, 102.04it/s]

tensor(104.4585, grad_fn=<MseLossBackward0>)
tensor(0.4137, grad_fn=<MulBackward0>)
tensor(176.1014, grad_fn=<MseLossBackward0>)
tensor(0.4451, grad_fn=<MulBackward0>)
tensor(100.4225, grad_fn=<MseLossBackward0>)
tensor(0.3397, grad_fn=<MulBackward0>)
tensor(103.9963, grad_fn=<MseLossBackward0>)
tensor(0.4057, grad_fn=<MulBackward0>)
tensor(119.6284, grad_fn=<MseLossBackward0>)
tensor(0.3942, grad_fn=<MulBackward0>)
tensor(100.4113, grad_fn=<MseLossBackward0>)
tensor(0.3691, grad_fn=<MulBackward0>)
tensor(102.9629, grad_fn=<MseLossBackward0>)
tensor(0.3152, grad_fn=<MulBackward0>)
tensor(305.1294, grad_fn=<MseLossBackward0>)
tensor(0.4375, grad_fn=<MulBackward0>)
tensor(120.6398, grad_fn=<MseLossBackward0>)
tensor(0.4522, grad_fn=<MulBackward0>)
tensor(1410.2520, grad_fn=<MseLossBackward0>)
tensor(0.4290, grad_fn=<MulBackward0>)
tensor(104.1224, grad_fn=<MseLossBackward0>)
tensor(0.3203, grad_fn=<MulBackward0>)
tensor(96.2128, grad_fn=<MseLossBackward0>)
tensor(0.2861, grad_fn=<MulBack

Epoch 3/50: 22it [00:00, 99.40it/s] 

tensor(101.8137, grad_fn=<MseLossBackward0>)
tensor(0.3011, grad_fn=<MulBackward0>)
tensor(107.6876, grad_fn=<MseLossBackward0>)
tensor(0.2438, grad_fn=<MulBackward0>)
tensor(102.0911, grad_fn=<MseLossBackward0>)
tensor(0.2719, grad_fn=<MulBackward0>)
tensor(106.5086, grad_fn=<MseLossBackward0>)
tensor(0.2713, grad_fn=<MulBackward0>)
tensor(98.9144, grad_fn=<MseLossBackward0>)
tensor(0.2568, grad_fn=<MulBackward0>)
tensor(106.5704, grad_fn=<MseLossBackward0>)
tensor(0.2543, grad_fn=<MulBackward0>)
tensor(105.0033, grad_fn=<MseLossBackward0>)
tensor(0.3023, grad_fn=<MulBackward0>)
tensor(106.0377, grad_fn=<MseLossBackward0>)
tensor(0.2396, grad_fn=<MulBackward0>)
tensor(260.6747, grad_fn=<MseLossBackward0>)
tensor(0.2837, grad_fn=<MulBackward0>)
tensor(102.5366, grad_fn=<MseLossBackward0>)
tensor(0.2467, grad_fn=<MulBackward0>)


Epoch 3/50: 32it [00:00, 94.71it/s]

tensor(99.1984, grad_fn=<MseLossBackward0>)
tensor(0.2538, grad_fn=<MulBackward0>)
tensor(103.0724, grad_fn=<MseLossBackward0>)
tensor(0.2434, grad_fn=<MulBackward0>)
tensor(109.5838, grad_fn=<MseLossBackward0>)
tensor(0.3002, grad_fn=<MulBackward0>)
tensor(100.4754, grad_fn=<MseLossBackward0>)
tensor(0.1891, grad_fn=<MulBackward0>)
tensor(101.1441, grad_fn=<MseLossBackward0>)
tensor(0.2083, grad_fn=<MulBackward0>)
tensor(108.8299, grad_fn=<MseLossBackward0>)
tensor(0.2383, grad_fn=<MulBackward0>)
tensor(102.4114, grad_fn=<MseLossBackward0>)
tensor(0.2079, grad_fn=<MulBackward0>)
tensor(107.4583, grad_fn=<MseLossBackward0>)
tensor(0.2025, grad_fn=<MulBackward0>)
tensor(102.9735, grad_fn=<MseLossBackward0>)
tensor(0.1696, grad_fn=<MulBackward0>)


Epoch 3/50: 42it [00:00, 95.12it/s]

tensor(101.8066, grad_fn=<MseLossBackward0>)
tensor(0.2581, grad_fn=<MulBackward0>)
tensor(109.0375, grad_fn=<MseLossBackward0>)
tensor(0.3109, grad_fn=<MulBackward0>)
tensor(101.2541, grad_fn=<MseLossBackward0>)
tensor(0.1966, grad_fn=<MulBackward0>)
tensor(102.2407, grad_fn=<MseLossBackward0>)
tensor(0.1712, grad_fn=<MulBackward0>)
tensor(130.2232, grad_fn=<MseLossBackward0>)
tensor(0.2511, grad_fn=<MulBackward0>)
tensor(100.3435, grad_fn=<MseLossBackward0>)
tensor(0.1796, grad_fn=<MulBackward0>)
tensor(106.2479, grad_fn=<MseLossBackward0>)
tensor(0.1611, grad_fn=<MulBackward0>)
tensor(99.4161, grad_fn=<MseLossBackward0>)
tensor(0.1403, grad_fn=<MulBackward0>)
tensor(105.1931, grad_fn=<MseLossBackward0>)
tensor(0.1846, grad_fn=<MulBackward0>)
tensor(118.8996, grad_fn=<MseLossBackward0>)
tensor(0.1886, grad_fn=<MulBackward0>)
tensor(104.2903, grad_fn=<MseLossBackward0>)
tensor(0.1386, grad_fn=<MulBackward0>)


Epoch 3/50: 60it [00:00, 97.63it/s]


tensor(99.3021, grad_fn=<MseLossBackward0>)
tensor(0.1519, grad_fn=<MulBackward0>)
tensor(97.2448, grad_fn=<MseLossBackward0>)
tensor(0.1325, grad_fn=<MulBackward0>)
tensor(104.9206, grad_fn=<MseLossBackward0>)
tensor(0.1724, grad_fn=<MulBackward0>)
tensor(98.9710, grad_fn=<MseLossBackward0>)
tensor(0.1627, grad_fn=<MulBackward0>)
tensor(96.3686, grad_fn=<MseLossBackward0>)
tensor(0.1367, grad_fn=<MulBackward0>)
tensor(114.9002, grad_fn=<MseLossBackward0>)
tensor(0.1482, grad_fn=<MulBackward0>)
tensor(99.1570, grad_fn=<MseLossBackward0>)
tensor(0.1412, grad_fn=<MulBackward0>)
tensor(176.2305, grad_fn=<MseLossBackward0>)
tensor(0.2390, grad_fn=<MulBackward0>)
tensor(102.2568, grad_fn=<MseLossBackward0>)
tensor(0.1648, grad_fn=<MulBackward0>)
Epoch 3, Loss: 142.40712966918946


Epoch 4/50: 0it [00:00, ?it/s]

tensor(115.0498, grad_fn=<MseLossBackward0>)
tensor(0.1888, grad_fn=<MulBackward0>)
tensor(102.5302, grad_fn=<MseLossBackward0>)
tensor(0.1037, grad_fn=<MulBackward0>)
tensor(102.3244, grad_fn=<MseLossBackward0>)
tensor(0.1704, grad_fn=<MulBackward0>)
tensor(99.9919, grad_fn=<MseLossBackward0>)
tensor(0.1075, grad_fn=<MulBackward0>)
tensor(103.4627, grad_fn=<MseLossBackward0>)
tensor(0.1320, grad_fn=<MulBackward0>)


Epoch 4/50: 10it [00:00, 97.67it/s]

tensor(99.7318, grad_fn=<MseLossBackward0>)
tensor(0.1300, grad_fn=<MulBackward0>)
tensor(100.7672, grad_fn=<MseLossBackward0>)
tensor(0.1137, grad_fn=<MulBackward0>)
tensor(99.1631, grad_fn=<MseLossBackward0>)
tensor(0.1268, grad_fn=<MulBackward0>)
tensor(99.1770, grad_fn=<MseLossBackward0>)
tensor(0.1147, grad_fn=<MulBackward0>)
tensor(101.2967, grad_fn=<MseLossBackward0>)
tensor(0.1382, grad_fn=<MulBackward0>)
tensor(105.0977, grad_fn=<MseLossBackward0>)
tensor(0.1168, grad_fn=<MulBackward0>)


Epoch 4/50: 20it [00:00, 96.06it/s]

tensor(104.4350, grad_fn=<MseLossBackward0>)
tensor(0.1243, grad_fn=<MulBackward0>)
tensor(109.4107, grad_fn=<MseLossBackward0>)
tensor(0.1332, grad_fn=<MulBackward0>)
tensor(104.5036, grad_fn=<MseLossBackward0>)
tensor(0.0925, grad_fn=<MulBackward0>)
tensor(101.7380, grad_fn=<MseLossBackward0>)
tensor(0.0885, grad_fn=<MulBackward0>)
tensor(95.5283, grad_fn=<MseLossBackward0>)
tensor(0.0753, grad_fn=<MulBackward0>)
tensor(508.7593, grad_fn=<MseLossBackward0>)
tensor(0.1618, grad_fn=<MulBackward0>)
tensor(153.1136, grad_fn=<MseLossBackward0>)
tensor(0.2252, grad_fn=<MulBackward0>)
tensor(106.0413, grad_fn=<MseLossBackward0>)
tensor(0.1156, grad_fn=<MulBackward0>)
tensor(107.1633, grad_fn=<MseLossBackward0>)
tensor(0.1194, grad_fn=<MulBackward0>)
tensor(106.9062, grad_fn=<MseLossBackward0>)
tensor(0.1260, grad_fn=<MulBackward0>)
tensor(108.3602, grad_fn=<MseLossBackward0>)
tensor(0.1146, grad_fn=<MulBackward0>)
tensor(102.3074, grad_fn=<MseLossBackward0>)
tensor(0.0933, grad_fn=<MulBackw

Epoch 4/50: 30it [00:00, 96.80it/s]

tensor(186.6012, grad_fn=<MseLossBackward0>)
tensor(0.1527, grad_fn=<MulBackward0>)
tensor(111.5453, grad_fn=<MseLossBackward0>)
tensor(0.0841, grad_fn=<MulBackward0>)
tensor(101.7258, grad_fn=<MseLossBackward0>)
tensor(0.0717, grad_fn=<MulBackward0>)
tensor(105.6039, grad_fn=<MseLossBackward0>)
tensor(0.1106, grad_fn=<MulBackward0>)
tensor(99.6794, grad_fn=<MseLossBackward0>)
tensor(0.1139, grad_fn=<MulBackward0>)
tensor(111.1858, grad_fn=<MseLossBackward0>)
tensor(0.1185, grad_fn=<MulBackward0>)
tensor(111.3195, grad_fn=<MseLossBackward0>)
tensor(0.1109, grad_fn=<MulBackward0>)


Epoch 4/50: 41it [00:00, 100.61it/s]

tensor(115.1008, grad_fn=<MseLossBackward0>)
tensor(0.1356, grad_fn=<MulBackward0>)
tensor(98.3500, grad_fn=<MseLossBackward0>)
tensor(0.0967, grad_fn=<MulBackward0>)
tensor(303.3210, grad_fn=<MseLossBackward0>)
tensor(0.0838, grad_fn=<MulBackward0>)
tensor(103.8033, grad_fn=<MseLossBackward0>)
tensor(0.0906, grad_fn=<MulBackward0>)
tensor(115.1111, grad_fn=<MseLossBackward0>)
tensor(0.1050, grad_fn=<MulBackward0>)
tensor(105.7287, grad_fn=<MseLossBackward0>)
tensor(0.0676, grad_fn=<MulBackward0>)
tensor(128.7082, grad_fn=<MseLossBackward0>)
tensor(0.1151, grad_fn=<MulBackward0>)
tensor(106.2237, grad_fn=<MseLossBackward0>)
tensor(0.1147, grad_fn=<MulBackward0>)
tensor(102.4979, grad_fn=<MseLossBackward0>)
tensor(0.0575, grad_fn=<MulBackward0>)
tensor(98.7881, grad_fn=<MseLossBackward0>)
tensor(0.0550, grad_fn=<MulBackward0>)
tensor(105.4126, grad_fn=<MseLossBackward0>)
tensor(0.0578, grad_fn=<MulBackward0>)
tensor(97.0206, grad_fn=<MseLossBackward0>)
tensor(0.0639, grad_fn=<MulBackwar

Epoch 4/50: 60it [00:00, 97.46it/s] 


tensor(97.1101, grad_fn=<MseLossBackward0>)
tensor(0.1024, grad_fn=<MulBackward0>)
tensor(103.3003, grad_fn=<MseLossBackward0>)
tensor(0.0724, grad_fn=<MulBackward0>)
tensor(255.5465, grad_fn=<MseLossBackward0>)
tensor(0.1403, grad_fn=<MulBackward0>)
tensor(99.1873, grad_fn=<MseLossBackward0>)
tensor(0.1008, grad_fn=<MulBackward0>)
tensor(104.0926, grad_fn=<MseLossBackward0>)
tensor(0.1082, grad_fn=<MulBackward0>)
tensor(104.3868, grad_fn=<MseLossBackward0>)
tensor(0.1694, grad_fn=<MulBackward0>)
tensor(378.3488, grad_fn=<MseLossBackward0>)
tensor(0.1603, grad_fn=<MulBackward0>)
tensor(175.7797, grad_fn=<MseLossBackward0>)
tensor(0.1440, grad_fn=<MulBackward0>)
tensor(102.3553, grad_fn=<MseLossBackward0>)
tensor(0.0529, grad_fn=<MulBackward0>)
tensor(108.6760, grad_fn=<MseLossBackward0>)
tensor(0.0775, grad_fn=<MulBackward0>)
Epoch 4, Loss: 142.26189181009929


Epoch 5/50: 0it [00:00, ?it/s]

tensor(106.1083, grad_fn=<MseLossBackward0>)
tensor(0.1475, grad_fn=<MulBackward0>)
tensor(100.4322, grad_fn=<MseLossBackward0>)
tensor(0.0640, grad_fn=<MulBackward0>)
tensor(94.3092, grad_fn=<MseLossBackward0>)
tensor(0.0940, grad_fn=<MulBackward0>)
tensor(1128.5349, grad_fn=<MseLossBackward0>)
tensor(0.1445, grad_fn=<MulBackward0>)
tensor(100.1246, grad_fn=<MseLossBackward0>)
tensor(0.0581, grad_fn=<MulBackward0>)
tensor(305.9156, grad_fn=<MseLossBackward0>)
tensor(0.1164, grad_fn=<MulBackward0>)


Epoch 5/50: 12it [00:00, 110.73it/s]

tensor(104.5586, grad_fn=<MseLossBackward0>)
tensor(0.1038, grad_fn=<MulBackward0>)
tensor(104.4079, grad_fn=<MseLossBackward0>)
tensor(0.0734, grad_fn=<MulBackward0>)
tensor(507.8704, grad_fn=<MseLossBackward0>)
tensor(0.1291, grad_fn=<MulBackward0>)
tensor(102.3542, grad_fn=<MseLossBackward0>)
tensor(0.1227, grad_fn=<MulBackward0>)
tensor(103.5399, grad_fn=<MseLossBackward0>)
tensor(0.0918, grad_fn=<MulBackward0>)
tensor(99.5180, grad_fn=<MseLossBackward0>)
tensor(0.0424, grad_fn=<MulBackward0>)
tensor(98.0791, grad_fn=<MseLossBackward0>)
tensor(0.0538, grad_fn=<MulBackward0>)
tensor(103.7489, grad_fn=<MseLossBackward0>)
tensor(0.0503, grad_fn=<MulBackward0>)
tensor(109.1049, grad_fn=<MseLossBackward0>)
tensor(0.1383, grad_fn=<MulBackward0>)
tensor(102.9834, grad_fn=<MseLossBackward0>)
tensor(0.1143, grad_fn=<MulBackward0>)
tensor(106.5576, grad_fn=<MseLossBackward0>)
tensor(0.0944, grad_fn=<MulBackward0>)
tensor(101.5855, grad_fn=<MseLossBackward0>)
tensor(0.0549, grad_fn=<MulBackwa

Epoch 5/50: 24it [00:00, 102.26it/s]

tensor(0.1450, grad_fn=<MulBackward0>)
tensor(107.0280, grad_fn=<MseLossBackward0>)
tensor(0.0459, grad_fn=<MulBackward0>)
tensor(99.8477, grad_fn=<MseLossBackward0>)
tensor(0.0466, grad_fn=<MulBackward0>)
tensor(96.5236, grad_fn=<MseLossBackward0>)
tensor(0.0599, grad_fn=<MulBackward0>)
tensor(102.9252, grad_fn=<MseLossBackward0>)
tensor(0.0685, grad_fn=<MulBackward0>)
tensor(102.9365, grad_fn=<MseLossBackward0>)
tensor(0.0523, grad_fn=<MulBackward0>)
tensor(97.8040, grad_fn=<MseLossBackward0>)
tensor(0.0627, grad_fn=<MulBackward0>)
tensor(100.6175, grad_fn=<MseLossBackward0>)
tensor(0.0580, grad_fn=<MulBackward0>)
tensor(103.1696, grad_fn=<MseLossBackward0>)
tensor(0.0814, grad_fn=<MulBackward0>)
tensor(107.1831, grad_fn=<MseLossBackward0>)
tensor(0.1098, grad_fn=<MulBackward0>)
tensor(101.6667, grad_fn=<MseLossBackward0>)
tensor(0.0825, grad_fn=<MulBackward0>)


Epoch 5/50: 35it [00:00, 100.69it/s]

tensor(103.1874, grad_fn=<MseLossBackward0>)
tensor(0.0391, grad_fn=<MulBackward0>)
tensor(103.3868, grad_fn=<MseLossBackward0>)
tensor(0.0650, grad_fn=<MulBackward0>)
tensor(102.3680, grad_fn=<MseLossBackward0>)
tensor(0.0503, grad_fn=<MulBackward0>)
tensor(96.3609, grad_fn=<MseLossBackward0>)
tensor(0.0392, grad_fn=<MulBackward0>)
tensor(127.7490, grad_fn=<MseLossBackward0>)
tensor(0.1111, grad_fn=<MulBackward0>)
tensor(111.1661, grad_fn=<MseLossBackward0>)
tensor(0.1585, grad_fn=<MulBackward0>)
tensor(103.8745, grad_fn=<MseLossBackward0>)
tensor(0.0574, grad_fn=<MulBackward0>)
tensor(143.8287, grad_fn=<MseLossBackward0>)
tensor(0.0973, grad_fn=<MulBackward0>)
tensor(99.9180, grad_fn=<MseLossBackward0>)
tensor(0.0428, grad_fn=<MulBackward0>)
tensor(121.3947, grad_fn=<MseLossBackward0>)
tensor(0.1403, grad_fn=<MulBackward0>)


Epoch 5/50: 47it [00:00, 104.64it/s]

tensor(98.4024, grad_fn=<MseLossBackward0>)
tensor(0.0392, grad_fn=<MulBackward0>)
tensor(107.0376, grad_fn=<MseLossBackward0>)
tensor(0.0761, grad_fn=<MulBackward0>)
tensor(99.0799, grad_fn=<MseLossBackward0>)
tensor(0.0583, grad_fn=<MulBackward0>)
tensor(97.6366, grad_fn=<MseLossBackward0>)
tensor(0.0357, grad_fn=<MulBackward0>)
tensor(103.6702, grad_fn=<MseLossBackward0>)
tensor(0.0362, grad_fn=<MulBackward0>)
tensor(99.5264, grad_fn=<MseLossBackward0>)
tensor(0.0530, grad_fn=<MulBackward0>)
tensor(107.7150, grad_fn=<MseLossBackward0>)
tensor(0.0562, grad_fn=<MulBackward0>)
tensor(193.4001, grad_fn=<MseLossBackward0>)
tensor(0.1800, grad_fn=<MulBackward0>)
tensor(102.7298, grad_fn=<MseLossBackward0>)
tensor(0.0388, grad_fn=<MulBackward0>)
tensor(177.0309, grad_fn=<MseLossBackward0>)
tensor(0.1585, grad_fn=<MulBackward0>)
tensor(101.3378, grad_fn=<MseLossBackward0>)
tensor(0.0450, grad_fn=<MulBackward0>)
tensor(103.5058, grad_fn=<MseLossBackward0>)
tensor(0.0343, grad_fn=<MulBackward

Epoch 5/50: 60it [00:00, 103.58it/s]


tensor(385.5684, grad_fn=<MseLossBackward0>)
tensor(0.1151, grad_fn=<MulBackward0>)
tensor(103.9219, grad_fn=<MseLossBackward0>)
tensor(0.0516, grad_fn=<MulBackward0>)
tensor(105.1735, grad_fn=<MseLossBackward0>)
tensor(0.0309, grad_fn=<MulBackward0>)
tensor(113.2200, grad_fn=<MseLossBackward0>)
tensor(0.1121, grad_fn=<MulBackward0>)
tensor(107.2768, grad_fn=<MseLossBackward0>)
tensor(0.0861, grad_fn=<MulBackward0>)
tensor(113.3597, grad_fn=<MseLossBackward0>)
tensor(0.1042, grad_fn=<MulBackward0>)
Epoch 5, Loss: 142.20309499104818


Epoch 6/50: 0it [00:00, ?it/s]

tensor(108.6965, grad_fn=<MseLossBackward0>)
tensor(0.1080, grad_fn=<MulBackward0>)
tensor(102.4236, grad_fn=<MseLossBackward0>)
tensor(0.0552, grad_fn=<MulBackward0>)
tensor(104.2342, grad_fn=<MseLossBackward0>)
tensor(0.0425, grad_fn=<MulBackward0>)
tensor(98.0708, grad_fn=<MseLossBackward0>)
tensor(0.0409, grad_fn=<MulBackward0>)
tensor(104.1237, grad_fn=<MseLossBackward0>)
tensor(0.0363, grad_fn=<MulBackward0>)
tensor(99.1646, grad_fn=<MseLossBackward0>)
tensor(0.0269, grad_fn=<MulBackward0>)
tensor(179.2240, grad_fn=<MseLossBackward0>)
tensor(0.0972, grad_fn=<MulBackward0>)
tensor(97.4820, grad_fn=<MseLossBackward0>)
tensor(0.0337, grad_fn=<MulBackward0>)
tensor(97.9622, grad_fn=<MseLossBackward0>)
tensor(0.0420, grad_fn=<MulBackward0>)
tensor(103.1847, grad_fn=<MseLossBackward0>)
tensor(0.0588, grad_fn=<MulBackward0>)


Epoch 6/50: 11it [00:00, 97.06it/s]

tensor(123.2903, grad_fn=<MseLossBackward0>)
tensor(0.0985, grad_fn=<MulBackward0>)
tensor(102.3340, grad_fn=<MseLossBackward0>)
tensor(0.0387, grad_fn=<MulBackward0>)
tensor(105.5390, grad_fn=<MseLossBackward0>)
tensor(0.0315, grad_fn=<MulBackward0>)
tensor(254.4674, grad_fn=<MseLossBackward0>)
tensor(0.1029, grad_fn=<MulBackward0>)
tensor(124.8032, grad_fn=<MseLossBackward0>)
tensor(0.1030, grad_fn=<MulBackward0>)
tensor(512.3323, grad_fn=<MseLossBackward0>)
tensor(0.0985, grad_fn=<MulBackward0>)
tensor(99.9046, grad_fn=<MseLossBackward0>)
tensor(0.0288, grad_fn=<MulBackward0>)
tensor(103.4058, grad_fn=<MseLossBackward0>)
tensor(0.0449, grad_fn=<MulBackward0>)
tensor(107.7579, grad_fn=<MseLossBackward0>)
tensor(0.0956, grad_fn=<MulBackward0>)


Epoch 6/50: 21it [00:00, 90.09it/s]

tensor(107.2738, grad_fn=<MseLossBackward0>)
tensor(0.1000, grad_fn=<MulBackward0>)
tensor(1130.2856, grad_fn=<MseLossBackward0>)
tensor(0.0840, grad_fn=<MulBackward0>)
tensor(101.7101, grad_fn=<MseLossBackward0>)
tensor(0.0510, grad_fn=<MulBackward0>)
tensor(106.3361, grad_fn=<MseLossBackward0>)
tensor(0.0922, grad_fn=<MulBackward0>)
tensor(110.1064, grad_fn=<MseLossBackward0>)
tensor(0.0478, grad_fn=<MulBackward0>)
tensor(111.2263, grad_fn=<MseLossBackward0>)
tensor(0.0895, grad_fn=<MulBackward0>)
tensor(167.0773, grad_fn=<MseLossBackward0>)
tensor(0.1086, grad_fn=<MulBackward0>)
tensor(107.0054, grad_fn=<MseLossBackward0>)
tensor(0.0408, grad_fn=<MulBackward0>)
tensor(121.5650, grad_fn=<MseLossBackward0>)
tensor(0.1478, grad_fn=<MulBackward0>)


Epoch 6/50: 31it [00:00, 89.31it/s]

tensor(98.4952, grad_fn=<MseLossBackward0>)
tensor(0.0331, grad_fn=<MulBackward0>)
tensor(112.3974, grad_fn=<MseLossBackward0>)
tensor(0.0544, grad_fn=<MulBackward0>)
tensor(98.5099, grad_fn=<MseLossBackward0>)
tensor(0.0298, grad_fn=<MulBackward0>)
tensor(106.5406, grad_fn=<MseLossBackward0>)
tensor(0.0741, grad_fn=<MulBackward0>)
tensor(99.8965, grad_fn=<MseLossBackward0>)
tensor(0.0253, grad_fn=<MulBackward0>)
tensor(101.5974, grad_fn=<MseLossBackward0>)
tensor(0.0334, grad_fn=<MulBackward0>)
tensor(100.5205, grad_fn=<MseLossBackward0>)
tensor(0.0470, grad_fn=<MulBackward0>)
tensor(102.8258, grad_fn=<MseLossBackward0>)
tensor(0.0436, grad_fn=<MulBackward0>)
tensor(102.1930, grad_fn=<MseLossBackward0>)
tensor(0.0363, grad_fn=<MulBackward0>)
tensor(104.8225, grad_fn=<MseLossBackward0>)
tensor(0.0568, grad_fn=<MulBackward0>)
tensor(100.4773, grad_fn=<MseLossBackward0>)
tensor(0.0267, grad_fn=<MulBackward0>)


Epoch 6/50: 40it [00:00, 86.94it/s]

tensor(98.2328, grad_fn=<MseLossBackward0>)
tensor(0.0338, grad_fn=<MulBackward0>)
tensor(96.9429, grad_fn=<MseLossBackward0>)
tensor(0.0298, grad_fn=<MulBackward0>)
tensor(105.1904, grad_fn=<MseLossBackward0>)
tensor(0.0344, grad_fn=<MulBackward0>)
tensor(102.7857, grad_fn=<MseLossBackward0>)
tensor(0.0555, grad_fn=<MulBackward0>)
tensor(99.7554, grad_fn=<MseLossBackward0>)
tensor(0.0368, grad_fn=<MulBackward0>)
tensor(102.4241, grad_fn=<MseLossBackward0>)
tensor(0.0313, grad_fn=<MulBackward0>)
tensor(102.3174, grad_fn=<MseLossBackward0>)
tensor(0.0338, grad_fn=<MulBackward0>)
tensor(99.3531, grad_fn=<MseLossBackward0>)
tensor(0.0337, grad_fn=<MulBackward0>)
tensor(100.5368, grad_fn=<MseLossBackward0>)
tensor(0.0304, grad_fn=<MulBackward0>)
tensor(107.7066, grad_fn=<MseLossBackward0>)
tensor(0.0406, grad_fn=<MulBackward0>)
tensor(102.8642, grad_fn=<MseLossBackward0>)
tensor(0.0507, grad_fn=<MulBackward0>)
tensor(148.0714, grad_fn=<MseLossBackward0>)
tensor(0.0936, grad_fn=<MulBackward

Epoch 6/50: 51it [00:00, 93.25it/s]

tensor(100.6543, grad_fn=<MseLossBackward0>)
tensor(0.0312, grad_fn=<MulBackward0>)
tensor(330.4429, grad_fn=<MseLossBackward0>)
tensor(0.1462, grad_fn=<MulBackward0>)
tensor(97.6747, grad_fn=<MseLossBackward0>)
tensor(0.0330, grad_fn=<MulBackward0>)
tensor(106.4187, grad_fn=<MseLossBackward0>)
tensor(0.0364, grad_fn=<MulBackward0>)
tensor(100.6882, grad_fn=<MseLossBackward0>)
tensor(0.0362, grad_fn=<MulBackward0>)
tensor(388.2073, grad_fn=<MseLossBackward0>)
tensor(0.0933, grad_fn=<MulBackward0>)
tensor(112.3351, grad_fn=<MseLossBackward0>)
tensor(0.0456, grad_fn=<MulBackward0>)


Epoch 6/50: 60it [00:00, 92.63it/s]


tensor(102.4443, grad_fn=<MseLossBackward0>)
tensor(0.0353, grad_fn=<MulBackward0>)
tensor(102.8681, grad_fn=<MseLossBackward0>)
tensor(0.0453, grad_fn=<MulBackward0>)
Epoch 6, Loss: 142.1765079498291


Epoch 7/50: 0it [00:00, ?it/s]

tensor(112.5716, grad_fn=<MseLossBackward0>)
tensor(0.0929, grad_fn=<MulBackward0>)
tensor(513.5315, grad_fn=<MseLossBackward0>)
tensor(0.1470, grad_fn=<MulBackward0>)
tensor(105.6051, grad_fn=<MseLossBackward0>)
tensor(0.0272, grad_fn=<MulBackward0>)
tensor(109.1380, grad_fn=<MseLossBackward0>)
tensor(0.0466, grad_fn=<MulBackward0>)
tensor(106.9823, grad_fn=<MseLossBackward0>)
tensor(0.0338, grad_fn=<MulBackward0>)
tensor(101.2283, grad_fn=<MseLossBackward0>)
tensor(0.0288, grad_fn=<MulBackward0>)
tensor(1130.6274, grad_fn=<MseLossBackward0>)
tensor(0.0889, grad_fn=<MulBackward0>)


Epoch 7/50: 10it [00:00, 96.57it/s]

tensor(99.9230, grad_fn=<MseLossBackward0>)
tensor(0.0253, grad_fn=<MulBackward0>)
tensor(105.8620, grad_fn=<MseLossBackward0>)
tensor(0.0330, grad_fn=<MulBackward0>)
tensor(120.9397, grad_fn=<MseLossBackward0>)
tensor(0.1054, grad_fn=<MulBackward0>)
tensor(105.4508, grad_fn=<MseLossBackward0>)
tensor(0.0321, grad_fn=<MulBackward0>)
tensor(107.5912, grad_fn=<MseLossBackward0>)
tensor(0.0379, grad_fn=<MulBackward0>)
tensor(97.9709, grad_fn=<MseLossBackward0>)
tensor(0.0441, grad_fn=<MulBackward0>)
tensor(213.5350, grad_fn=<MseLossBackward0>)
tensor(0.1421, grad_fn=<MulBackward0>)
tensor(108.3072, grad_fn=<MseLossBackward0>)
tensor(0.0579, grad_fn=<MulBackward0>)
tensor(95.7278, grad_fn=<MseLossBackward0>)
tensor(0.0254, grad_fn=<MulBackward0>)
tensor(105.1064, grad_fn=<MseLossBackward0>)
tensor(0.0300, grad_fn=<MulBackward0>)
tensor(104.5419, grad_fn=<MseLossBackward0>)
tensor(0.0336, grad_fn=<MulBackward0>)


Epoch 7/50: 20it [00:00, 97.85it/s]

tensor(112.6801, grad_fn=<MseLossBackward0>)
tensor(0.0667, grad_fn=<MulBackward0>)
tensor(99.4495, grad_fn=<MseLossBackward0>)
tensor(0.0240, grad_fn=<MulBackward0>)
tensor(100.8891, grad_fn=<MseLossBackward0>)
tensor(0.0290, grad_fn=<MulBackward0>)
tensor(105.4158, grad_fn=<MseLossBackward0>)
tensor(0.0264, grad_fn=<MulBackward0>)
tensor(134.0125, grad_fn=<MseLossBackward0>)
tensor(0.0888, grad_fn=<MulBackward0>)
tensor(102.6337, grad_fn=<MseLossBackward0>)
tensor(0.0237, grad_fn=<MulBackward0>)
tensor(101.0735, grad_fn=<MseLossBackward0>)
tensor(0.0427, grad_fn=<MulBackward0>)
tensor(98.2176, grad_fn=<MseLossBackward0>)
tensor(0.0339, grad_fn=<MulBackward0>)
tensor(98.7830, grad_fn=<MseLossBackward0>)
tensor(0.0194, grad_fn=<MulBackward0>)


Epoch 7/50: 30it [00:00, 90.78it/s]

tensor(93.1959, grad_fn=<MseLossBackward0>)
tensor(0.0356, grad_fn=<MulBackward0>)
tensor(305.3354, grad_fn=<MseLossBackward0>)
tensor(0.0978, grad_fn=<MulBackward0>)
tensor(103.7057, grad_fn=<MseLossBackward0>)
tensor(0.0408, grad_fn=<MulBackward0>)
tensor(381.4315, grad_fn=<MseLossBackward0>)
tensor(0.0847, grad_fn=<MulBackward0>)
tensor(100.3336, grad_fn=<MseLossBackward0>)
tensor(0.0299, grad_fn=<MulBackward0>)
tensor(101.1526, grad_fn=<MseLossBackward0>)
tensor(0.0362, grad_fn=<MulBackward0>)
tensor(101.1149, grad_fn=<MseLossBackward0>)
tensor(0.0331, grad_fn=<MulBackward0>)
tensor(102.1333, grad_fn=<MseLossBackward0>)
tensor(0.0181, grad_fn=<MulBackward0>)
tensor(105.9157, grad_fn=<MseLossBackward0>)
tensor(0.0338, grad_fn=<MulBackward0>)
tensor(101.6147, grad_fn=<MseLossBackward0>)
tensor(0.0341, grad_fn=<MulBackward0>)
tensor(100.6827, grad_fn=<MseLossBackward0>)
tensor(0.0370, grad_fn=<MulBackward0>)


Epoch 7/50: 40it [00:00, 91.11it/s]

tensor(99.7951, grad_fn=<MseLossBackward0>)
tensor(0.0299, grad_fn=<MulBackward0>)
tensor(181.0909, grad_fn=<MseLossBackward0>)
tensor(0.0866, grad_fn=<MulBackward0>)
tensor(105.6268, grad_fn=<MseLossBackward0>)
tensor(0.0228, grad_fn=<MulBackward0>)
tensor(100.8043, grad_fn=<MseLossBackward0>)
tensor(0.0409, grad_fn=<MulBackward0>)
tensor(101.5609, grad_fn=<MseLossBackward0>)
tensor(0.0284, grad_fn=<MulBackward0>)
tensor(98.6505, grad_fn=<MseLossBackward0>)
tensor(0.0316, grad_fn=<MulBackward0>)
tensor(104.5572, grad_fn=<MseLossBackward0>)
tensor(0.0331, grad_fn=<MulBackward0>)
tensor(108.3481, grad_fn=<MseLossBackward0>)
tensor(0.0300, grad_fn=<MulBackward0>)
tensor(101.6764, grad_fn=<MseLossBackward0>)
tensor(0.0261, grad_fn=<MulBackward0>)


Epoch 7/50: 51it [00:00, 97.25it/s]

tensor(101.9288, grad_fn=<MseLossBackward0>)
tensor(0.0201, grad_fn=<MulBackward0>)
tensor(113.7423, grad_fn=<MseLossBackward0>)
tensor(0.0639, grad_fn=<MulBackward0>)
tensor(111.3371, grad_fn=<MseLossBackward0>)
tensor(0.0474, grad_fn=<MulBackward0>)
tensor(105.5711, grad_fn=<MseLossBackward0>)
tensor(0.0352, grad_fn=<MulBackward0>)
tensor(107.3016, grad_fn=<MseLossBackward0>)
tensor(0.0271, grad_fn=<MulBackward0>)
tensor(107.6025, grad_fn=<MseLossBackward0>)
tensor(0.0392, grad_fn=<MulBackward0>)
tensor(118.7859, grad_fn=<MseLossBackward0>)
tensor(0.0783, grad_fn=<MulBackward0>)
tensor(112.9347, grad_fn=<MseLossBackward0>)
tensor(0.0872, grad_fn=<MulBackward0>)
tensor(101.3073, grad_fn=<MseLossBackward0>)
tensor(0.0330, grad_fn=<MulBackward0>)
tensor(93.5630, grad_fn=<MseLossBackward0>)
tensor(0.0299, grad_fn=<MulBackward0>)
tensor(98.8137, grad_fn=<MseLossBackward0>)
tensor(0.0214, grad_fn=<MulBackward0>)
tensor(256.9194, grad_fn=<MseLossBackward0>)
tensor(0.0861, grad_fn=<MulBackwa

Epoch 7/50: 60it [00:00, 95.38it/s]


tensor(100.8040, grad_fn=<MseLossBackward0>)
tensor(0.0423, grad_fn=<MulBackward0>)
Epoch 7, Loss: 142.16568145751953


Epoch 8/50: 0it [00:00, ?it/s]

tensor(104.8288, grad_fn=<MseLossBackward0>)
tensor(0.0219, grad_fn=<MulBackward0>)
tensor(94.8362, grad_fn=<MseLossBackward0>)
tensor(0.0220, grad_fn=<MulBackward0>)
tensor(102.1439, grad_fn=<MseLossBackward0>)
tensor(0.0268, grad_fn=<MulBackward0>)
tensor(97.0175, grad_fn=<MseLossBackward0>)
tensor(0.0279, grad_fn=<MulBackward0>)
tensor(106.9517, grad_fn=<MseLossBackward0>)
tensor(0.0794, grad_fn=<MulBackward0>)
tensor(102.4788, grad_fn=<MseLossBackward0>)
tensor(0.0282, grad_fn=<MulBackward0>)
tensor(1133.4574, grad_fn=<MseLossBackward0>)
tensor(0.1174, grad_fn=<MulBackward0>)


Epoch 8/50: 11it [00:00, 102.47it/s]

tensor(102.5118, grad_fn=<MseLossBackward0>)
tensor(0.0214, grad_fn=<MulBackward0>)
tensor(101.4333, grad_fn=<MseLossBackward0>)
tensor(0.0255, grad_fn=<MulBackward0>)
tensor(105.8687, grad_fn=<MseLossBackward0>)
tensor(0.0303, grad_fn=<MulBackward0>)
tensor(116.6551, grad_fn=<MseLossBackward0>)
tensor(0.0768, grad_fn=<MulBackward0>)
tensor(101.4329, grad_fn=<MseLossBackward0>)
tensor(0.0249, grad_fn=<MulBackward0>)
tensor(97.4344, grad_fn=<MseLossBackward0>)
tensor(0.0271, grad_fn=<MulBackward0>)
tensor(145.0359, grad_fn=<MseLossBackward0>)
tensor(0.0751, grad_fn=<MulBackward0>)
tensor(104.5386, grad_fn=<MseLossBackward0>)
tensor(0.0309, grad_fn=<MulBackward0>)
tensor(508.0020, grad_fn=<MseLossBackward0>)
tensor(0.0788, grad_fn=<MulBackward0>)
tensor(107.1033, grad_fn=<MseLossBackward0>)
tensor(0.0234, grad_fn=<MulBackward0>)
tensor(105.6728, grad_fn=<MseLossBackward0>)
tensor(0.0270, grad_fn=<MulBackward0>)
tensor(100.6643, grad_fn=<MseLossBackward0>)
tensor(0.0310, grad_fn=<MulBackw

Epoch 8/50: 22it [00:00, 95.17it/s] 

tensor(102.3613, grad_fn=<MseLossBackward0>)
tensor(0.0315, grad_fn=<MulBackward0>)
tensor(97.2824, grad_fn=<MseLossBackward0>)
tensor(0.0258, grad_fn=<MulBackward0>)
tensor(98.9636, grad_fn=<MseLossBackward0>)
tensor(0.0288, grad_fn=<MulBackward0>)
tensor(102.7422, grad_fn=<MseLossBackward0>)
tensor(0.0479, grad_fn=<MulBackward0>)
tensor(99.4925, grad_fn=<MseLossBackward0>)
tensor(0.0223, grad_fn=<MulBackward0>)
tensor(103.8669, grad_fn=<MseLossBackward0>)
tensor(0.0248, grad_fn=<MulBackward0>)
tensor(593.1960, grad_fn=<MseLossBackward0>)
tensor(0.1550, grad_fn=<MulBackward0>)
tensor(200.0201, grad_fn=<MseLossBackward0>)
tensor(0.1361, grad_fn=<MulBackward0>)


Epoch 8/50: 32it [00:00, 91.47it/s]

tensor(108.3899, grad_fn=<MseLossBackward0>)
tensor(0.0251, grad_fn=<MulBackward0>)
tensor(263.4070, grad_fn=<MseLossBackward0>)
tensor(0.0776, grad_fn=<MulBackward0>)
tensor(97.0699, grad_fn=<MseLossBackward0>)
tensor(0.0204, grad_fn=<MulBackward0>)
tensor(100.0008, grad_fn=<MseLossBackward0>)
tensor(0.0223, grad_fn=<MulBackward0>)
tensor(101.8573, grad_fn=<MseLossBackward0>)
tensor(0.0340, grad_fn=<MulBackward0>)
tensor(102.1373, grad_fn=<MseLossBackward0>)
tensor(0.0266, grad_fn=<MulBackward0>)
tensor(99.8853, grad_fn=<MseLossBackward0>)
tensor(0.0212, grad_fn=<MulBackward0>)
tensor(106.4036, grad_fn=<MseLossBackward0>)
tensor(0.0203, grad_fn=<MulBackward0>)
tensor(103.9548, grad_fn=<MseLossBackward0>)
tensor(0.0239, grad_fn=<MulBackward0>)
tensor(116.5537, grad_fn=<MseLossBackward0>)
tensor(0.0821, grad_fn=<MulBackward0>)
tensor(104.7951, grad_fn=<MseLossBackward0>)
tensor(0.0262, grad_fn=<MulBackward0>)


Epoch 8/50: 42it [00:00, 93.29it/s]

tensor(180.7971, grad_fn=<MseLossBackward0>)
tensor(0.0725, grad_fn=<MulBackward0>)
tensor(99.5162, grad_fn=<MseLossBackward0>)
tensor(0.0194, grad_fn=<MulBackward0>)
tensor(104.3297, grad_fn=<MseLossBackward0>)
tensor(0.0402, grad_fn=<MulBackward0>)
tensor(110.7501, grad_fn=<MseLossBackward0>)
tensor(0.0326, grad_fn=<MulBackward0>)
tensor(98.6244, grad_fn=<MseLossBackward0>)
tensor(0.0164, grad_fn=<MulBackward0>)
tensor(100.1000, grad_fn=<MseLossBackward0>)
tensor(0.0309, grad_fn=<MulBackward0>)
tensor(96.7553, grad_fn=<MseLossBackward0>)
tensor(0.0321, grad_fn=<MulBackward0>)
tensor(98.3115, grad_fn=<MseLossBackward0>)
tensor(0.0299, grad_fn=<MulBackward0>)
tensor(110.2256, grad_fn=<MseLossBackward0>)
tensor(0.0386, grad_fn=<MulBackward0>)


Epoch 8/50: 53it [00:00, 96.59it/s]

tensor(106.2882, grad_fn=<MseLossBackward0>)
tensor(0.0240, grad_fn=<MulBackward0>)
tensor(103.1213, grad_fn=<MseLossBackward0>)
tensor(0.0318, grad_fn=<MulBackward0>)
tensor(103.3764, grad_fn=<MseLossBackward0>)
tensor(0.0202, grad_fn=<MulBackward0>)
tensor(99.7124, grad_fn=<MseLossBackward0>)
tensor(0.0199, grad_fn=<MulBackward0>)
tensor(104.7515, grad_fn=<MseLossBackward0>)
tensor(0.0247, grad_fn=<MulBackward0>)
tensor(104.9393, grad_fn=<MseLossBackward0>)
tensor(0.0238, grad_fn=<MulBackward0>)
tensor(114.9258, grad_fn=<MseLossBackward0>)
tensor(0.0449, grad_fn=<MulBackward0>)
tensor(104.4043, grad_fn=<MseLossBackward0>)
tensor(0.0287, grad_fn=<MulBackward0>)
tensor(109.4054, grad_fn=<MseLossBackward0>)
tensor(0.0300, grad_fn=<MulBackward0>)
tensor(107.4891, grad_fn=<MseLossBackward0>)
tensor(0.0253, grad_fn=<MulBackward0>)


Epoch 8/50: 60it [00:00, 95.81it/s]


tensor(104.2783, grad_fn=<MseLossBackward0>)
tensor(0.0244, grad_fn=<MulBackward0>)
Epoch 8, Loss: 142.15800933837892


Epoch 9/50: 0it [00:00, ?it/s]

tensor(112.3139, grad_fn=<MseLossBackward0>)
tensor(0.0385, grad_fn=<MulBackward0>)
tensor(106.4245, grad_fn=<MseLossBackward0>)
tensor(0.0281, grad_fn=<MulBackward0>)
tensor(105.2544, grad_fn=<MseLossBackward0>)
tensor(0.0230, grad_fn=<MulBackward0>)
tensor(90.6426, grad_fn=<MseLossBackward0>)
tensor(0.0178, grad_fn=<MulBackward0>)
tensor(101.4163, grad_fn=<MseLossBackward0>)
tensor(0.0178, grad_fn=<MulBackward0>)
tensor(107.4370, grad_fn=<MseLossBackward0>)
tensor(0.0232, grad_fn=<MulBackward0>)
tensor(110.7872, grad_fn=<MseLossBackward0>)
tensor(0.0360, grad_fn=<MulBackward0>)
tensor(100.5888, grad_fn=<MseLossBackward0>)
tensor(0.0197, grad_fn=<MulBackward0>)
tensor(105.7061, grad_fn=<MseLossBackward0>)
tensor(0.0184, grad_fn=<MulBackward0>)
tensor(102.0385, grad_fn=<MseLossBackward0>)
tensor(0.0224, grad_fn=<MulBackward0>)


Epoch 9/50: 11it [00:00, 104.26it/s]

tensor(99.9861, grad_fn=<MseLossBackward0>)
tensor(0.0301, grad_fn=<MulBackward0>)
tensor(101.8154, grad_fn=<MseLossBackward0>)
tensor(0.0214, grad_fn=<MulBackward0>)
tensor(95.8362, grad_fn=<MseLossBackward0>)
tensor(0.0158, grad_fn=<MulBackward0>)
tensor(107.0970, grad_fn=<MseLossBackward0>)
tensor(0.0222, grad_fn=<MulBackward0>)
tensor(98.4234, grad_fn=<MseLossBackward0>)
tensor(0.0190, grad_fn=<MulBackward0>)
tensor(103.1911, grad_fn=<MseLossBackward0>)
tensor(0.0290, grad_fn=<MulBackward0>)
tensor(99.1182, grad_fn=<MseLossBackward0>)
tensor(0.0185, grad_fn=<MulBackward0>)
tensor(107.7032, grad_fn=<MseLossBackward0>)
tensor(0.0243, grad_fn=<MulBackward0>)
tensor(103.5079, grad_fn=<MseLossBackward0>)
tensor(0.0235, grad_fn=<MulBackward0>)
tensor(117.4920, grad_fn=<MseLossBackward0>)
tensor(0.0871, grad_fn=<MulBackward0>)
tensor(1127.9458, grad_fn=<MseLossBackward0>)
tensor(0.0752, grad_fn=<MulBackward0>)


Epoch 9/50: 22it [00:00, 105.71it/s]

tensor(99.7665, grad_fn=<MseLossBackward0>)
tensor(0.0230, grad_fn=<MulBackward0>)
tensor(123.7532, grad_fn=<MseLossBackward0>)
tensor(0.0799, grad_fn=<MulBackward0>)
tensor(104.6000, grad_fn=<MseLossBackward0>)
tensor(0.0185, grad_fn=<MulBackward0>)
tensor(105.3745, grad_fn=<MseLossBackward0>)
tensor(0.0191, grad_fn=<MulBackward0>)
tensor(101.2083, grad_fn=<MseLossBackward0>)
tensor(0.0244, grad_fn=<MulBackward0>)
tensor(112.9228, grad_fn=<MseLossBackward0>)
tensor(0.0616, grad_fn=<MulBackward0>)
tensor(103.4113, grad_fn=<MseLossBackward0>)
tensor(0.0275, grad_fn=<MulBackward0>)
tensor(103.9169, grad_fn=<MseLossBackward0>)
tensor(0.0265, grad_fn=<MulBackward0>)
tensor(508.9117, grad_fn=<MseLossBackward0>)
tensor(0.0747, grad_fn=<MulBackward0>)
tensor(100.3109, grad_fn=<MseLossBackward0>)
tensor(0.0261, grad_fn=<MulBackward0>)
tensor(98.4075, grad_fn=<MseLossBackward0>)
tensor(0.0232, grad_fn=<MulBackward0>)


Epoch 9/50: 34it [00:00, 108.43it/s]

tensor(100.0752, grad_fn=<MseLossBackward0>)
tensor(0.0182, grad_fn=<MulBackward0>)
tensor(98.0082, grad_fn=<MseLossBackward0>)
tensor(0.0172, grad_fn=<MulBackward0>)
tensor(394.0420, grad_fn=<MseLossBackward0>)
tensor(0.0819, grad_fn=<MulBackward0>)
tensor(104.9092, grad_fn=<MseLossBackward0>)
tensor(0.0192, grad_fn=<MulBackward0>)
tensor(107.1543, grad_fn=<MseLossBackward0>)
tensor(0.0291, grad_fn=<MulBackward0>)
tensor(95.1846, grad_fn=<MseLossBackward0>)
tensor(0.0157, grad_fn=<MulBackward0>)
tensor(106.5147, grad_fn=<MseLossBackward0>)
tensor(0.0199, grad_fn=<MulBackward0>)
tensor(100.8723, grad_fn=<MseLossBackward0>)
tensor(0.0189, grad_fn=<MulBackward0>)
tensor(101.8507, grad_fn=<MseLossBackward0>)
tensor(0.0181, grad_fn=<MulBackward0>)
tensor(107.3030, grad_fn=<MseLossBackward0>)
tensor(0.0225, grad_fn=<MulBackward0>)
tensor(116.4940, grad_fn=<MseLossBackward0>)
tensor(0.0853, grad_fn=<MulBackward0>)
tensor(104.5626, grad_fn=<MseLossBackward0>)
tensor(0.0263, grad_fn=<MulBackwa

Epoch 9/50: 45it [00:00, 103.05it/s]

tensor(106.4834, grad_fn=<MseLossBackward0>)
tensor(0.0187, grad_fn=<MulBackward0>)
tensor(103.1398, grad_fn=<MseLossBackward0>)
tensor(0.0176, grad_fn=<MulBackward0>)
tensor(103.8335, grad_fn=<MseLossBackward0>)
tensor(0.0163, grad_fn=<MulBackward0>)
tensor(121.9291, grad_fn=<MseLossBackward0>)
tensor(0.0885, grad_fn=<MulBackward0>)
tensor(104.1678, grad_fn=<MseLossBackward0>)
tensor(0.0222, grad_fn=<MulBackward0>)
tensor(104.8041, grad_fn=<MseLossBackward0>)
tensor(0.0190, grad_fn=<MulBackward0>)
tensor(98.9454, grad_fn=<MseLossBackward0>)
tensor(0.0160, grad_fn=<MulBackward0>)
tensor(300.3012, grad_fn=<MseLossBackward0>)
tensor(0.0705, grad_fn=<MulBackward0>)
tensor(169.7698, grad_fn=<MseLossBackward0>)
tensor(0.0720, grad_fn=<MulBackward0>)


Epoch 9/50: 60it [00:00, 104.63it/s]


tensor(255.8303, grad_fn=<MseLossBackward0>)
tensor(0.0716, grad_fn=<MulBackward0>)
tensor(139.1340, grad_fn=<MseLossBackward0>)
tensor(0.0689, grad_fn=<MulBackward0>)
tensor(103.8060, grad_fn=<MseLossBackward0>)
tensor(0.0215, grad_fn=<MulBackward0>)
tensor(178.4978, grad_fn=<MseLossBackward0>)
tensor(0.0708, grad_fn=<MulBackward0>)
tensor(100.9738, grad_fn=<MseLossBackward0>)
tensor(0.0120, grad_fn=<MulBackward0>)
tensor(127.5034, grad_fn=<MseLossBackward0>)
tensor(0.0747, grad_fn=<MulBackward0>)
tensor(103.7147, grad_fn=<MseLossBackward0>)
tensor(0.0164, grad_fn=<MulBackward0>)
Epoch 9, Loss: 142.15297292073566


Epoch 10/50: 0it [00:00, ?it/s]

tensor(106.0494, grad_fn=<MseLossBackward0>)
tensor(0.0239, grad_fn=<MulBackward0>)
tensor(108.4948, grad_fn=<MseLossBackward0>)
tensor(0.0675, grad_fn=<MulBackward0>)
tensor(105.3312, grad_fn=<MseLossBackward0>)
tensor(0.0142, grad_fn=<MulBackward0>)
tensor(179.7471, grad_fn=<MseLossBackward0>)
tensor(0.0764, grad_fn=<MulBackward0>)
tensor(110.1440, grad_fn=<MseLossBackward0>)
tensor(0.0424, grad_fn=<MulBackward0>)


Epoch 10/50: 11it [00:00, 103.30it/s]

tensor(107.8335, grad_fn=<MseLossBackward0>)
tensor(0.0233, grad_fn=<MulBackward0>)
tensor(105.8762, grad_fn=<MseLossBackward0>)
tensor(0.0259, grad_fn=<MulBackward0>)
tensor(99.8626, grad_fn=<MseLossBackward0>)
tensor(0.0144, grad_fn=<MulBackward0>)
tensor(97.2506, grad_fn=<MseLossBackward0>)
tensor(0.0131, grad_fn=<MulBackward0>)
tensor(105.8451, grad_fn=<MseLossBackward0>)
tensor(0.0148, grad_fn=<MulBackward0>)
tensor(104.0214, grad_fn=<MseLossBackward0>)
tensor(0.0152, grad_fn=<MulBackward0>)
tensor(101.8490, grad_fn=<MseLossBackward0>)
tensor(0.0189, grad_fn=<MulBackward0>)
tensor(101.2319, grad_fn=<MseLossBackward0>)
tensor(0.0148, grad_fn=<MulBackward0>)
tensor(109.6970, grad_fn=<MseLossBackward0>)
tensor(0.0309, grad_fn=<MulBackward0>)
tensor(1285.1066, grad_fn=<MseLossBackward0>)
tensor(0.1280, grad_fn=<MulBackward0>)
tensor(99.4326, grad_fn=<MseLossBackward0>)
tensor(0.0248, grad_fn=<MulBackward0>)
tensor(101.3028, grad_fn=<MseLossBackward0>)
tensor(0.0219, grad_fn=<MulBackwa

Epoch 10/50: 23it [00:00, 107.51it/s]

tensor(117.7171, grad_fn=<MseLossBackward0>)
tensor(0.0687, grad_fn=<MulBackward0>)
tensor(103.5870, grad_fn=<MseLossBackward0>)
tensor(0.0180, grad_fn=<MulBackward0>)
tensor(125.8730, grad_fn=<MseLossBackward0>)
tensor(0.0679, grad_fn=<MulBackward0>)
tensor(115.6365, grad_fn=<MseLossBackward0>)
tensor(0.0718, grad_fn=<MulBackward0>)
tensor(304.5061, grad_fn=<MseLossBackward0>)
tensor(0.0709, grad_fn=<MulBackward0>)
tensor(505.4028, grad_fn=<MseLossBackward0>)
tensor(0.0486, grad_fn=<MulBackward0>)
tensor(108.2144, grad_fn=<MseLossBackward0>)
tensor(0.0131, grad_fn=<MulBackward0>)
tensor(103.1465, grad_fn=<MseLossBackward0>)
tensor(0.0145, grad_fn=<MulBackward0>)
tensor(112.5090, grad_fn=<MseLossBackward0>)
tensor(0.0700, grad_fn=<MulBackward0>)
tensor(105.8964, grad_fn=<MseLossBackward0>)
tensor(0.0250, grad_fn=<MulBackward0>)
tensor(107.6279, grad_fn=<MseLossBackward0>)
tensor(0.0188, grad_fn=<MulBackward0>)
tensor(104.8947, grad_fn=<MseLossBackward0>)
tensor(0.0137, grad_fn=<MulBack

Epoch 10/50: 34it [00:00, 107.57it/s]

tensor(375.1009, grad_fn=<MseLossBackward0>)
tensor(0.0683, grad_fn=<MulBackward0>)
tensor(105.8671, grad_fn=<MseLossBackward0>)
tensor(0.0214, grad_fn=<MulBackward0>)
tensor(106.8929, grad_fn=<MseLossBackward0>)
tensor(0.0374, grad_fn=<MulBackward0>)
tensor(99.8732, grad_fn=<MseLossBackward0>)
tensor(0.0261, grad_fn=<MulBackward0>)
tensor(99.5561, grad_fn=<MseLossBackward0>)
tensor(0.0164, grad_fn=<MulBackward0>)
tensor(109.4216, grad_fn=<MseLossBackward0>)
tensor(0.0188, grad_fn=<MulBackward0>)
tensor(110.0743, grad_fn=<MseLossBackward0>)
tensor(0.0215, grad_fn=<MulBackward0>)
tensor(101.6601, grad_fn=<MseLossBackward0>)
tensor(0.0252, grad_fn=<MulBackward0>)
tensor(106.3918, grad_fn=<MseLossBackward0>)
tensor(0.0168, grad_fn=<MulBackward0>)
tensor(173.2826, grad_fn=<MseLossBackward0>)
tensor(0.0459, grad_fn=<MulBackward0>)
tensor(102.1481, grad_fn=<MseLossBackward0>)
tensor(0.0208, grad_fn=<MulBackward0>)


Epoch 10/50: 46it [00:00, 108.94it/s]

tensor(143.5093, grad_fn=<MseLossBackward0>)
tensor(0.0726, grad_fn=<MulBackward0>)
tensor(99.6537, grad_fn=<MseLossBackward0>)
tensor(0.0318, grad_fn=<MulBackward0>)
tensor(97.6330, grad_fn=<MseLossBackward0>)
tensor(0.0161, grad_fn=<MulBackward0>)
tensor(102.3601, grad_fn=<MseLossBackward0>)
tensor(0.0271, grad_fn=<MulBackward0>)
tensor(99.3024, grad_fn=<MseLossBackward0>)
tensor(0.0149, grad_fn=<MulBackward0>)
tensor(95.3065, grad_fn=<MseLossBackward0>)
tensor(0.0145, grad_fn=<MulBackward0>)
tensor(107.4613, grad_fn=<MseLossBackward0>)
tensor(0.0371, grad_fn=<MulBackward0>)
tensor(100.6996, grad_fn=<MseLossBackward0>)
tensor(0.0206, grad_fn=<MulBackward0>)
tensor(101.0014, grad_fn=<MseLossBackward0>)
tensor(0.0154, grad_fn=<MulBackward0>)
tensor(108.0113, grad_fn=<MseLossBackward0>)
tensor(0.0183, grad_fn=<MulBackward0>)
tensor(107.3817, grad_fn=<MseLossBackward0>)
tensor(0.0133, grad_fn=<MulBackward0>)


Epoch 10/50: 57it [00:00, 105.72it/s]

tensor(106.2506, grad_fn=<MseLossBackward0>)
tensor(0.0143, grad_fn=<MulBackward0>)
tensor(97.2259, grad_fn=<MseLossBackward0>)
tensor(0.0191, grad_fn=<MulBackward0>)
tensor(101.3738, grad_fn=<MseLossBackward0>)


Epoch 10/50: 60it [00:00, 105.73it/s]


tensor(0.0152, grad_fn=<MulBackward0>)
tensor(112.0562, grad_fn=<MseLossBackward0>)
tensor(0.0206, grad_fn=<MulBackward0>)
Epoch 10, Loss: 142.1490083058675


Epoch 11/50: 0it [00:00, ?it/s]

tensor(104.8084, grad_fn=<MseLossBackward0>)
tensor(0.0178, grad_fn=<MulBackward0>)
tensor(309.3094, grad_fn=<MseLossBackward0>)
tensor(0.0556, grad_fn=<MulBackward0>)
tensor(99.7985, grad_fn=<MseLossBackward0>)
tensor(0.0242, grad_fn=<MulBackward0>)
tensor(154.7757, grad_fn=<MseLossBackward0>)
tensor(0.0718, grad_fn=<MulBackward0>)
tensor(383.7717, grad_fn=<MseLossBackward0>)
tensor(0.0397, grad_fn=<MulBackward0>)
tensor(109.8005, grad_fn=<MseLossBackward0>)
tensor(0.0123, grad_fn=<MulBackward0>)
tensor(101.4401, grad_fn=<MseLossBackward0>)
tensor(0.0259, grad_fn=<MulBackward0>)
tensor(101.2552, grad_fn=<MseLossBackward0>)
tensor(0.0232, grad_fn=<MulBackward0>)
tensor(106.3656, grad_fn=<MseLossBackward0>)
tensor(0.0184, grad_fn=<MulBackward0>)
tensor(114.1005, grad_fn=<MseLossBackward0>)
tensor(0.0658, grad_fn=<MulBackward0>)


Epoch 11/50: 11it [00:00, 103.95it/s]

tensor(100.1305, grad_fn=<MseLossBackward0>)
tensor(0.0153, grad_fn=<MulBackward0>)
tensor(104.0320, grad_fn=<MseLossBackward0>)
tensor(0.0175, grad_fn=<MulBackward0>)
tensor(101.7278, grad_fn=<MseLossBackward0>)
tensor(0.0150, grad_fn=<MulBackward0>)
tensor(107.0740, grad_fn=<MseLossBackward0>)
tensor(0.0171, grad_fn=<MulBackward0>)
tensor(107.7036, grad_fn=<MseLossBackward0>)
tensor(0.0113, grad_fn=<MulBackward0>)
tensor(105.1735, grad_fn=<MseLossBackward0>)
tensor(0.0201, grad_fn=<MulBackward0>)
tensor(102.2653, grad_fn=<MseLossBackward0>)
tensor(0.0213, grad_fn=<MulBackward0>)
tensor(114.2094, grad_fn=<MseLossBackward0>)
tensor(0.0515, grad_fn=<MulBackward0>)
tensor(105.0089, grad_fn=<MseLossBackward0>)
tensor(0.0166, grad_fn=<MulBackward0>)
tensor(101.4230, grad_fn=<MseLossBackward0>)
tensor(0.0173, grad_fn=<MulBackward0>)
tensor(104.5993, grad_fn=<MseLossBackward0>)
tensor(0.0246, grad_fn=<MulBackward0>)


Epoch 11/50: 22it [00:00, 97.56it/s] 

tensor(99.5141, grad_fn=<MseLossBackward0>)
tensor(0.0149, grad_fn=<MulBackward0>)
tensor(100.9276, grad_fn=<MseLossBackward0>)
tensor(0.0175, grad_fn=<MulBackward0>)
tensor(103.1447, grad_fn=<MseLossBackward0>)
tensor(0.0172, grad_fn=<MulBackward0>)
tensor(100.1176, grad_fn=<MseLossBackward0>)
tensor(0.0283, grad_fn=<MulBackward0>)
tensor(103.7627, grad_fn=<MseLossBackward0>)
tensor(0.0094, grad_fn=<MulBackward0>)
tensor(101.2867, grad_fn=<MseLossBackward0>)
tensor(0.0171, grad_fn=<MulBackward0>)
tensor(104.8261, grad_fn=<MseLossBackward0>)
tensor(0.0165, grad_fn=<MulBackward0>)
tensor(93.5163, grad_fn=<MseLossBackward0>)
tensor(0.0120, grad_fn=<MulBackward0>)
tensor(102.1452, grad_fn=<MseLossBackward0>)
tensor(0.0140, grad_fn=<MulBackward0>)


Epoch 11/50: 32it [00:00, 98.54it/s]

tensor(100.7912, grad_fn=<MseLossBackward0>)
tensor(0.0179, grad_fn=<MulBackward0>)
tensor(100.1528, grad_fn=<MseLossBackward0>)
tensor(0.0150, grad_fn=<MulBackward0>)
tensor(103.8640, grad_fn=<MseLossBackward0>)
tensor(0.0201, grad_fn=<MulBackward0>)
tensor(262.3083, grad_fn=<MseLossBackward0>)
tensor(0.0391, grad_fn=<MulBackward0>)
tensor(108.7200, grad_fn=<MseLossBackward0>)
tensor(0.0175, grad_fn=<MulBackward0>)
tensor(102.1529, grad_fn=<MseLossBackward0>)
tensor(0.0178, grad_fn=<MulBackward0>)
tensor(107.3742, grad_fn=<MseLossBackward0>)
tensor(0.0181, grad_fn=<MulBackward0>)
tensor(98.2486, grad_fn=<MseLossBackward0>)
tensor(0.0193, grad_fn=<MulBackward0>)
tensor(103.5722, grad_fn=<MseLossBackward0>)
tensor(0.0133, grad_fn=<MulBackward0>)
tensor(96.9759, grad_fn=<MseLossBackward0>)
tensor(0.0192, grad_fn=<MulBackward0>)
tensor(111.7209, grad_fn=<MseLossBackward0>)
tensor(0.0662, grad_fn=<MulBackward0>)
tensor(119.3014, grad_fn=<MseLossBackward0>)
tensor(0.0698, grad_fn=<MulBackwa

Epoch 11/50: 44it [00:00, 102.41it/s]

tensor(1213.7523, grad_fn=<MseLossBackward0>)
tensor(0.1890, grad_fn=<MulBackward0>)
tensor(100.5705, grad_fn=<MseLossBackward0>)
tensor(0.0197, grad_fn=<MulBackward0>)
tensor(97.5857, grad_fn=<MseLossBackward0>)
tensor(0.0250, grad_fn=<MulBackward0>)
tensor(105.6292, grad_fn=<MseLossBackward0>)
tensor(0.0264, grad_fn=<MulBackward0>)
tensor(129.2687, grad_fn=<MseLossBackward0>)
tensor(0.0695, grad_fn=<MulBackward0>)
tensor(107.1228, grad_fn=<MseLossBackward0>)
tensor(0.0183, grad_fn=<MulBackward0>)
tensor(99.1182, grad_fn=<MseLossBackward0>)
tensor(0.0184, grad_fn=<MulBackward0>)
tensor(506.2039, grad_fn=<MseLossBackward0>)
tensor(0.0756, grad_fn=<MulBackward0>)


Epoch 11/50: 55it [00:00, 98.03it/s] 

tensor(98.0747, grad_fn=<MseLossBackward0>)
tensor(0.0225, grad_fn=<MulBackward0>)
tensor(109.0294, grad_fn=<MseLossBackward0>)
tensor(0.0349, grad_fn=<MulBackward0>)
tensor(94.9470, grad_fn=<MseLossBackward0>)
tensor(0.0174, grad_fn=<MulBackward0>)
tensor(106.3390, grad_fn=<MseLossBackward0>)
tensor(0.0243, grad_fn=<MulBackward0>)
tensor(102.9806, grad_fn=<MseLossBackward0>)
tensor(0.0203, grad_fn=<MulBackward0>)
tensor(103.2849, grad_fn=<MseLossBackward0>)
tensor(0.0162, grad_fn=<MulBackward0>)


Epoch 11/50: 60it [00:00, 99.49it/s]


tensor(104.9059, grad_fn=<MseLossBackward0>)
tensor(0.0210, grad_fn=<MulBackward0>)
tensor(174.2711, grad_fn=<MseLossBackward0>)
tensor(0.0702, grad_fn=<MulBackward0>)
tensor(99.2635, grad_fn=<MseLossBackward0>)
tensor(0.0161, grad_fn=<MulBackward0>)
Epoch 11, Loss: 142.14764582316081


Epoch 12/50: 0it [00:00, ?it/s]

tensor(173.6328, grad_fn=<MseLossBackward0>)
tensor(0.0772, grad_fn=<MulBackward0>)
tensor(99.1302, grad_fn=<MseLossBackward0>)
tensor(0.0183, grad_fn=<MulBackward0>)
tensor(103.9748, grad_fn=<MseLossBackward0>)
tensor(0.0184, grad_fn=<MulBackward0>)
tensor(101.3946, grad_fn=<MseLossBackward0>)
tensor(0.0161, grad_fn=<MulBackward0>)
tensor(1130.2603, grad_fn=<MseLossBackward0>)
tensor(0.0642, grad_fn=<MulBackward0>)
tensor(103.2987, grad_fn=<MseLossBackward0>)
tensor(0.0154, grad_fn=<MulBackward0>)
tensor(112.0955, grad_fn=<MseLossBackward0>)
tensor(0.0706, grad_fn=<MulBackward0>)
tensor(102.0411, grad_fn=<MseLossBackward0>)
tensor(0.0159, grad_fn=<MulBackward0>)
tensor(106.8340, grad_fn=<MseLossBackward0>)
tensor(0.0142, grad_fn=<MulBackward0>)


Epoch 12/50: 11it [00:00, 104.86it/s]

tensor(102.6240, grad_fn=<MseLossBackward0>)
tensor(0.0153, grad_fn=<MulBackward0>)
tensor(107.3235, grad_fn=<MseLossBackward0>)
tensor(0.0158, grad_fn=<MulBackward0>)
tensor(304.9813, grad_fn=<MseLossBackward0>)
tensor(0.0671, grad_fn=<MulBackward0>)
tensor(110.7089, grad_fn=<MseLossBackward0>)
tensor(0.0300, grad_fn=<MulBackward0>)
tensor(100.8683, grad_fn=<MseLossBackward0>)
tensor(0.0125, grad_fn=<MulBackward0>)
tensor(111.2164, grad_fn=<MseLossBackward0>)
tensor(0.0215, grad_fn=<MulBackward0>)
tensor(105.8501, grad_fn=<MseLossBackward0>)
tensor(0.0160, grad_fn=<MulBackward0>)
tensor(113.5604, grad_fn=<MseLossBackward0>)
tensor(0.0638, grad_fn=<MulBackward0>)
tensor(100.9755, grad_fn=<MseLossBackward0>)
tensor(0.0217, grad_fn=<MulBackward0>)
tensor(102.1613, grad_fn=<MseLossBackward0>)
tensor(0.0206, grad_fn=<MulBackward0>)
tensor(99.1664, grad_fn=<MseLossBackward0>)
tensor(0.0137, grad_fn=<MulBackward0>)
tensor(97.3047, grad_fn=<MseLossBackward0>)


Epoch 12/50: 22it [00:00, 98.77it/s] 

tensor(0.0095, grad_fn=<MulBackward0>)
tensor(104.3673, grad_fn=<MseLossBackward0>)
tensor(0.0212, grad_fn=<MulBackward0>)
tensor(101.8539, grad_fn=<MseLossBackward0>)
tensor(0.0197, grad_fn=<MulBackward0>)
tensor(100.2899, grad_fn=<MseLossBackward0>)
tensor(0.0138, grad_fn=<MulBackward0>)
tensor(101.3605, grad_fn=<MseLossBackward0>)
tensor(0.0120, grad_fn=<MulBackward0>)
tensor(106.6973, grad_fn=<MseLossBackward0>)
tensor(0.0110, grad_fn=<MulBackward0>)
tensor(104.3803, grad_fn=<MseLossBackward0>)
tensor(0.0176, grad_fn=<MulBackward0>)
tensor(119.7782, grad_fn=<MseLossBackward0>)
tensor(0.0814, grad_fn=<MulBackward0>)
tensor(260.0909, grad_fn=<MseLossBackward0>)
tensor(0.0702, grad_fn=<MulBackward0>)
tensor(101.4954, grad_fn=<MseLossBackward0>)
tensor(0.0127, grad_fn=<MulBackward0>)
tensor(101.2068, grad_fn=<MseLossBackward0>)
tensor(0.0139, grad_fn=<MulBackward0>)


Epoch 12/50: 32it [00:00, 95.99it/s]

tensor(103.4350, grad_fn=<MseLossBackward0>)
tensor(0.0143, grad_fn=<MulBackward0>)
tensor(101.5377, grad_fn=<MseLossBackward0>)
tensor(0.0187, grad_fn=<MulBackward0>)
tensor(106.1026, grad_fn=<MseLossBackward0>)
tensor(0.0138, grad_fn=<MulBackward0>)
tensor(502.7422, grad_fn=<MseLossBackward0>)
tensor(0.0608, grad_fn=<MulBackward0>)
tensor(101.2970, grad_fn=<MseLossBackward0>)
tensor(0.0190, grad_fn=<MulBackward0>)
tensor(101.9696, grad_fn=<MseLossBackward0>)
tensor(0.0161, grad_fn=<MulBackward0>)
tensor(112.5554, grad_fn=<MseLossBackward0>)
tensor(0.0329, grad_fn=<MulBackward0>)
tensor(105.7492, grad_fn=<MseLossBackward0>)
tensor(0.0140, grad_fn=<MulBackward0>)
tensor(102.5038, grad_fn=<MseLossBackward0>)
tensor(0.0110, grad_fn=<MulBackward0>)


Epoch 12/50: 42it [00:00, 96.87it/s]

tensor(104.0338, grad_fn=<MseLossBackward0>)
tensor(0.0117, grad_fn=<MulBackward0>)
tensor(111.2161, grad_fn=<MseLossBackward0>)
tensor(0.0247, grad_fn=<MulBackward0>)
tensor(103.7927, grad_fn=<MseLossBackward0>)
tensor(0.0100, grad_fn=<MulBackward0>)
tensor(101.7943, grad_fn=<MseLossBackward0>)
tensor(0.0119, grad_fn=<MulBackward0>)
tensor(387.0529, grad_fn=<MseLossBackward0>)
tensor(0.0809, grad_fn=<MulBackward0>)
tensor(175.0469, grad_fn=<MseLossBackward0>)
tensor(0.0664, grad_fn=<MulBackward0>)
tensor(102.4834, grad_fn=<MseLossBackward0>)
tensor(0.0097, grad_fn=<MulBackward0>)
tensor(99.5975, grad_fn=<MseLossBackward0>)
tensor(0.0175, grad_fn=<MulBackward0>)
tensor(105.8108, grad_fn=<MseLossBackward0>)
tensor(0.0180, grad_fn=<MulBackward0>)
tensor(105.0366, grad_fn=<MseLossBackward0>)
tensor(0.0137, grad_fn=<MulBackward0>)
tensor(110.3006, grad_fn=<MseLossBackward0>)
tensor(0.0119, grad_fn=<MulBackward0>)


Epoch 12/50: 52it [00:00, 95.33it/s]

tensor(117.1984, grad_fn=<MseLossBackward0>)
tensor(0.0622, grad_fn=<MulBackward0>)
tensor(95.9577, grad_fn=<MseLossBackward0>)
tensor(0.0140, grad_fn=<MulBackward0>)
tensor(150.8642, grad_fn=<MseLossBackward0>)
tensor(0.0636, grad_fn=<MulBackward0>)
tensor(101.6438, grad_fn=<MseLossBackward0>)
tensor(0.0142, grad_fn=<MulBackward0>)
tensor(100.3434, grad_fn=<MseLossBackward0>)
tensor(0.0143, grad_fn=<MulBackward0>)
tensor(93.7653, grad_fn=<MseLossBackward0>)
tensor(0.0100, grad_fn=<MulBackward0>)
tensor(125.9122, grad_fn=<MseLossBackward0>)
tensor(0.0656, grad_fn=<MulBackward0>)
tensor(110.1142, grad_fn=<MseLossBackward0>)
tensor(0.0125, grad_fn=<MulBackward0>)
tensor(96.3126, grad_fn=<MseLossBackward0>)
tensor(0.0104, grad_fn=<MulBackward0>)


Epoch 12/50: 60it [00:00, 96.53it/s]


Epoch 12, Loss: 142.1454703013102


Epoch 13/50: 0it [00:00, ?it/s]

tensor(101.5729, grad_fn=<MseLossBackward0>)
tensor(0.0116, grad_fn=<MulBackward0>)
tensor(95.2503, grad_fn=<MseLossBackward0>)
tensor(0.0106, grad_fn=<MulBackward0>)
tensor(125.9245, grad_fn=<MseLossBackward0>)
tensor(0.0624, grad_fn=<MulBackward0>)
tensor(109.1735, grad_fn=<MseLossBackward0>)
tensor(0.0164, grad_fn=<MulBackward0>)
tensor(113.3035, grad_fn=<MseLossBackward0>)
tensor(0.0235, grad_fn=<MulBackward0>)
tensor(104.5696, grad_fn=<MseLossBackward0>)
tensor(0.0216, grad_fn=<MulBackward0>)
tensor(110.0464, grad_fn=<MseLossBackward0>)
tensor(0.0156, grad_fn=<MulBackward0>)
tensor(101.6350, grad_fn=<MseLossBackward0>)
tensor(0.0142, grad_fn=<MulBackward0>)


Epoch 13/50: 10it [00:00, 91.73it/s]

tensor(118.5667, grad_fn=<MseLossBackward0>)
tensor(0.0645, grad_fn=<MulBackward0>)
tensor(97.4242, grad_fn=<MseLossBackward0>)
tensor(0.0140, grad_fn=<MulBackward0>)
tensor(99.7630, grad_fn=<MseLossBackward0>)
tensor(0.0114, grad_fn=<MulBackward0>)
tensor(102.9305, grad_fn=<MseLossBackward0>)
tensor(0.0114, grad_fn=<MulBackward0>)
tensor(109.4441, grad_fn=<MseLossBackward0>)
tensor(0.0116, grad_fn=<MulBackward0>)
tensor(1131.2430, grad_fn=<MseLossBackward0>)
tensor(0.0707, grad_fn=<MulBackward0>)
tensor(105.9010, grad_fn=<MseLossBackward0>)
tensor(0.0120, grad_fn=<MulBackward0>)
tensor(384.5990, grad_fn=<MseLossBackward0>)
tensor(0.0584, grad_fn=<MulBackward0>)
tensor(144.7544, grad_fn=<MseLossBackward0>)
tensor(0.0598, grad_fn=<MulBackward0>)
tensor(102.3472, grad_fn=<MseLossBackward0>)
tensor(0.0120, grad_fn=<MulBackward0>)
tensor(173.8239, grad_fn=<MseLossBackward0>)
tensor(0.0600, grad_fn=<MulBackward0>)
tensor(103.7146, grad_fn=<MseLossBackward0>)
tensor(0.0153, grad_fn=<MulBackw

Epoch 13/50: 21it [00:00, 96.23it/s]

tensor(133.5359, grad_fn=<MseLossBackward0>)
tensor(0.0608, grad_fn=<MulBackward0>)
tensor(98.5410, grad_fn=<MseLossBackward0>)
tensor(0.0094, grad_fn=<MulBackward0>)
tensor(102.0405, grad_fn=<MseLossBackward0>)
tensor(0.0126, grad_fn=<MulBackward0>)
tensor(255.3083, grad_fn=<MseLossBackward0>)
tensor(0.0602, grad_fn=<MulBackward0>)
tensor(100.3134, grad_fn=<MseLossBackward0>)
tensor(0.0101, grad_fn=<MulBackward0>)
tensor(114.8829, grad_fn=<MseLossBackward0>)
tensor(0.0447, grad_fn=<MulBackward0>)
tensor(101.9355, grad_fn=<MseLossBackward0>)
tensor(0.0116, grad_fn=<MulBackward0>)
tensor(100.7809, grad_fn=<MseLossBackward0>)
tensor(0.0101, grad_fn=<MulBackward0>)


Epoch 13/50: 31it [00:00, 97.40it/s]

tensor(101.4239, grad_fn=<MseLossBackward0>)
tensor(0.0149, grad_fn=<MulBackward0>)
tensor(100.5312, grad_fn=<MseLossBackward0>)
tensor(0.0126, grad_fn=<MulBackward0>)
tensor(101.2581, grad_fn=<MseLossBackward0>)
tensor(0.0115, grad_fn=<MulBackward0>)
tensor(99.5055, grad_fn=<MseLossBackward0>)
tensor(0.0112, grad_fn=<MulBackward0>)
tensor(96.7639, grad_fn=<MseLossBackward0>)
tensor(0.0130, grad_fn=<MulBackward0>)
tensor(93.3871, grad_fn=<MseLossBackward0>)
tensor(0.0121, grad_fn=<MulBackward0>)
tensor(98.2871, grad_fn=<MseLossBackward0>)
tensor(0.0120, grad_fn=<MulBackward0>)
tensor(100.0981, grad_fn=<MseLossBackward0>)
tensor(0.0188, grad_fn=<MulBackward0>)
tensor(99.0776, grad_fn=<MseLossBackward0>)
tensor(0.0134, grad_fn=<MulBackward0>)
tensor(172.4128, grad_fn=<MseLossBackward0>)
tensor(0.0624, grad_fn=<MulBackward0>)
tensor(109.1678, grad_fn=<MseLossBackward0>)
tensor(0.0085, grad_fn=<MulBackward0>)
tensor(101.5693, grad_fn=<MseLossBackward0>)
tensor(0.0125, grad_fn=<MulBackward0

Epoch 13/50: 42it [00:00, 98.47it/s]

tensor(109.6936, grad_fn=<MseLossBackward0>)
tensor(0.0149, grad_fn=<MulBackward0>)
tensor(106.8718, grad_fn=<MseLossBackward0>)
tensor(0.0151, grad_fn=<MulBackward0>)
tensor(103.8458, grad_fn=<MseLossBackward0>)
tensor(0.0222, grad_fn=<MulBackward0>)
tensor(103.2955, grad_fn=<MseLossBackward0>)
tensor(0.0136, grad_fn=<MulBackward0>)
tensor(114.3281, grad_fn=<MseLossBackward0>)
tensor(0.0123, grad_fn=<MulBackward0>)
tensor(120.4816, grad_fn=<MseLossBackward0>)
tensor(0.0407, grad_fn=<MulBackward0>)
tensor(92.6168, grad_fn=<MseLossBackward0>)
tensor(0.0123, grad_fn=<MulBackward0>)
tensor(102.2924, grad_fn=<MseLossBackward0>)
tensor(0.0082, grad_fn=<MulBackward0>)
tensor(102.7601, grad_fn=<MseLossBackward0>)
tensor(0.0107, grad_fn=<MulBackward0>)
tensor(98.4893, grad_fn=<MseLossBackward0>)
tensor(0.0117, grad_fn=<MulBackward0>)


Epoch 13/50: 52it [00:00, 96.13it/s]

tensor(99.8267, grad_fn=<MseLossBackward0>)
tensor(0.0097, grad_fn=<MulBackward0>)
tensor(104.6568, grad_fn=<MseLossBackward0>)
tensor(0.0177, grad_fn=<MulBackward0>)
tensor(100.8857, grad_fn=<MseLossBackward0>)
tensor(0.0069, grad_fn=<MulBackward0>)
tensor(108.6458, grad_fn=<MseLossBackward0>)
tensor(0.0121, grad_fn=<MulBackward0>)
tensor(102.5251, grad_fn=<MseLossBackward0>)
tensor(0.0093, grad_fn=<MulBackward0>)
tensor(119.5184, grad_fn=<MseLossBackward0>)
tensor(0.0674, grad_fn=<MulBackward0>)


Epoch 13/50: 60it [00:00, 97.20it/s]

tensor(105.9693, grad_fn=<MseLossBackward0>)
tensor(0.0081, grad_fn=<MulBackward0>)
tensor(100.2712, grad_fn=<MseLossBackward0>)
tensor(0.0118, grad_fn=<MulBackward0>)
tensor(713.7312, grad_fn=<MseLossBackward0>)
tensor(0.1063, grad_fn=<MulBackward0>)





Epoch 13, Loss: 142.14184176127117


Epoch 14/50: 0it [00:00, ?it/s]

tensor(102.1394, grad_fn=<MseLossBackward0>)
tensor(0.0107, grad_fn=<MulBackward0>)
tensor(98.5596, grad_fn=<MseLossBackward0>)
tensor(0.0075, grad_fn=<MulBackward0>)
tensor(100.5775, grad_fn=<MseLossBackward0>)
tensor(0.0134, grad_fn=<MulBackward0>)
tensor(103.6466, grad_fn=<MseLossBackward0>)
tensor(0.0142, grad_fn=<MulBackward0>)
tensor(102.5558, grad_fn=<MseLossBackward0>)
tensor(0.0108, grad_fn=<MulBackward0>)
tensor(100.6705, grad_fn=<MseLossBackward0>)
tensor(0.0089, grad_fn=<MulBackward0>)
tensor(103.3247, grad_fn=<MseLossBackward0>)
tensor(0.0123, grad_fn=<MulBackward0>)
tensor(110.5889, grad_fn=<MseLossBackward0>)
tensor(0.0094, grad_fn=<MulBackward0>)


Epoch 14/50: 11it [00:00, 103.38it/s]

tensor(124.7739, grad_fn=<MseLossBackward0>)
tensor(0.0565, grad_fn=<MulBackward0>)
tensor(110.2713, grad_fn=<MseLossBackward0>)
tensor(0.0105, grad_fn=<MulBackward0>)
tensor(383.5873, grad_fn=<MseLossBackward0>)
tensor(0.0665, grad_fn=<MulBackward0>)
tensor(108.2224, grad_fn=<MseLossBackward0>)
tensor(0.0190, grad_fn=<MulBackward0>)
tensor(106.3549, grad_fn=<MseLossBackward0>)
tensor(0.0111, grad_fn=<MulBackward0>)
tensor(111.7177, grad_fn=<MseLossBackward0>)
tensor(0.0158, grad_fn=<MulBackward0>)
tensor(103.5375, grad_fn=<MseLossBackward0>)
tensor(0.0116, grad_fn=<MulBackward0>)
tensor(103.9469, grad_fn=<MseLossBackward0>)
tensor(0.0134, grad_fn=<MulBackward0>)
tensor(106.0736, grad_fn=<MseLossBackward0>)
tensor(0.0119, grad_fn=<MulBackward0>)
tensor(103.6098, grad_fn=<MseLossBackward0>)
tensor(0.0107, grad_fn=<MulBackward0>)
tensor(109.4491, grad_fn=<MseLossBackward0>)
tensor(0.0180, grad_fn=<MulBackward0>)
tensor(170.4483, grad_fn=<MseLossBackward0>)
tensor(0.0563, grad_fn=<MulBack

Epoch 14/50: 22it [00:00, 106.72it/s]

tensor(106.7620, grad_fn=<MseLossBackward0>)
tensor(0.0103, grad_fn=<MulBackward0>)
tensor(117.1367, grad_fn=<MseLossBackward0>)
tensor(0.0577, grad_fn=<MulBackward0>)
tensor(107.2687, grad_fn=<MseLossBackward0>)
tensor(0.0152, grad_fn=<MulBackward0>)
tensor(301.4209, grad_fn=<MseLossBackward0>)
tensor(0.0540, grad_fn=<MulBackward0>)
tensor(99.4553, grad_fn=<MseLossBackward0>)
tensor(0.0124, grad_fn=<MulBackward0>)
tensor(100.7363, grad_fn=<MseLossBackward0>)
tensor(0.0110, grad_fn=<MulBackward0>)
tensor(95.3362, grad_fn=<MseLossBackward0>)
tensor(0.0129, grad_fn=<MulBackward0>)
tensor(266.7125, grad_fn=<MseLossBackward0>)
tensor(0.0600, grad_fn=<MulBackward0>)
tensor(105.5291, grad_fn=<MseLossBackward0>)
tensor(0.0095, grad_fn=<MulBackward0>)


Epoch 14/50: 33it [00:00, 104.82it/s]

tensor(145.2223, grad_fn=<MseLossBackward0>)
tensor(0.0575, grad_fn=<MulBackward0>)
tensor(102.0115, grad_fn=<MseLossBackward0>)
tensor(0.0117, grad_fn=<MulBackward0>)
tensor(106.3699, grad_fn=<MseLossBackward0>)
tensor(0.0077, grad_fn=<MulBackward0>)
tensor(98.7648, grad_fn=<MseLossBackward0>)
tensor(0.0092, grad_fn=<MulBackward0>)
tensor(119.6349, grad_fn=<MseLossBackward0>)
tensor(0.0386, grad_fn=<MulBackward0>)
tensor(109.4678, grad_fn=<MseLossBackward0>)
tensor(0.0147, grad_fn=<MulBackward0>)
tensor(103.1703, grad_fn=<MseLossBackward0>)
tensor(0.0122, grad_fn=<MulBackward0>)
tensor(106.9703, grad_fn=<MseLossBackward0>)
tensor(0.0153, grad_fn=<MulBackward0>)
tensor(92.1922, grad_fn=<MseLossBackward0>)
tensor(0.0083, grad_fn=<MulBackward0>)
tensor(118.2184, grad_fn=<MseLossBackward0>)
tensor(0.0610, grad_fn=<MulBackward0>)
tensor(93.5250, grad_fn=<MseLossBackward0>)
tensor(0.0083, grad_fn=<MulBackward0>)
tensor(101.4877, grad_fn=<MseLossBackward0>)
tensor(0.0114, grad_fn=<MulBackwar

Epoch 14/50: 44it [00:00, 97.08it/s] 

tensor(122.9590, grad_fn=<MseLossBackward0>)
tensor(0.0563, grad_fn=<MulBackward0>)
tensor(99.6826, grad_fn=<MseLossBackward0>)
tensor(0.0165, grad_fn=<MulBackward0>)
tensor(98.6918, grad_fn=<MseLossBackward0>)
tensor(0.0136, grad_fn=<MulBackward0>)
tensor(105.4754, grad_fn=<MseLossBackward0>)
tensor(0.0225, grad_fn=<MulBackward0>)
tensor(102.3408, grad_fn=<MseLossBackward0>)
tensor(0.0104, grad_fn=<MulBackward0>)
tensor(99.7760, grad_fn=<MseLossBackward0>)
tensor(0.0121, grad_fn=<MulBackward0>)
tensor(99.5837, grad_fn=<MseLossBackward0>)
tensor(0.0176, grad_fn=<MulBackward0>)
tensor(103.1536, grad_fn=<MseLossBackward0>)
tensor(0.0129, grad_fn=<MulBackward0>)
tensor(105.5172, grad_fn=<MseLossBackward0>)
tensor(0.0133, grad_fn=<MulBackward0>)
tensor(174.8947, grad_fn=<MseLossBackward0>)
tensor(0.0552, grad_fn=<MulBackward0>)
tensor(110.0227, grad_fn=<MseLossBackward0>)
tensor(0.0238, grad_fn=<MulBackward0>)
tensor(96.7513, grad_fn=<MseLossBackward0>)
tensor(0.0102, grad_fn=<MulBackward0

Epoch 14/50: 60it [00:00, 101.59it/s]


tensor(94.4335, grad_fn=<MseLossBackward0>)
tensor(0.0078, grad_fn=<MulBackward0>)
tensor(103.9436, grad_fn=<MseLossBackward0>)
tensor(0.0115, grad_fn=<MulBackward0>)
tensor(508.8578, grad_fn=<MseLossBackward0>)
tensor(0.0576, grad_fn=<MulBackward0>)
tensor(1129.0591, grad_fn=<MseLossBackward0>)
tensor(0.0604, grad_fn=<MulBackward0>)
tensor(105.2140, grad_fn=<MseLossBackward0>)
tensor(0.0112, grad_fn=<MulBackward0>)
tensor(101.0428, grad_fn=<MseLossBackward0>)
tensor(0.0085, grad_fn=<MulBackward0>)
Epoch 14, Loss: 142.14009653727214


Epoch 15/50: 0it [00:00, ?it/s]

tensor(105.7366, grad_fn=<MseLossBackward0>)
tensor(0.0148, grad_fn=<MulBackward0>)
tensor(107.8671, grad_fn=<MseLossBackward0>)
tensor(0.0106, grad_fn=<MulBackward0>)
tensor(103.1140, grad_fn=<MseLossBackward0>)
tensor(0.0100, grad_fn=<MulBackward0>)
tensor(102.9770, grad_fn=<MseLossBackward0>)
tensor(0.0083, grad_fn=<MulBackward0>)
tensor(103.8305, grad_fn=<MseLossBackward0>)
tensor(0.0116, grad_fn=<MulBackward0>)
tensor(100.8773, grad_fn=<MseLossBackward0>)
tensor(0.0125, grad_fn=<MulBackward0>)
tensor(108.7454, grad_fn=<MseLossBackward0>)
tensor(0.0082, grad_fn=<MulBackward0>)
tensor(116.8142, grad_fn=<MseLossBackward0>)
tensor(0.0236, grad_fn=<MulBackward0>)
tensor(93.7546, grad_fn=<MseLossBackward0>)
tensor(0.0070, grad_fn=<MulBackward0>)


Epoch 15/50: 11it [00:00, 103.96it/s]

tensor(97.1157, grad_fn=<MseLossBackward0>)
tensor(0.0109, grad_fn=<MulBackward0>)
tensor(102.2025, grad_fn=<MseLossBackward0>)
tensor(0.0101, grad_fn=<MulBackward0>)
tensor(520.9307, grad_fn=<MseLossBackward0>)
tensor(0.1001, grad_fn=<MulBackward0>)
tensor(101.6892, grad_fn=<MseLossBackward0>)
tensor(0.0121, grad_fn=<MulBackward0>)
tensor(104.5377, grad_fn=<MseLossBackward0>)
tensor(0.0129, grad_fn=<MulBackward0>)
tensor(102.6035, grad_fn=<MseLossBackward0>)
tensor(0.0095, grad_fn=<MulBackward0>)
tensor(102.3761, grad_fn=<MseLossBackward0>)
tensor(0.0096, grad_fn=<MulBackward0>)


Epoch 15/50: 22it [00:00, 106.87it/s]

tensor(99.2503, grad_fn=<MseLossBackward0>)
tensor(0.0065, grad_fn=<MulBackward0>)
tensor(99.9862, grad_fn=<MseLossBackward0>)
tensor(0.0091, grad_fn=<MulBackward0>)
tensor(111.2135, grad_fn=<MseLossBackward0>)
tensor(0.0295, grad_fn=<MulBackward0>)
tensor(1199.5630, grad_fn=<MseLossBackward0>)
tensor(0.1078, grad_fn=<MulBackward0>)
tensor(98.9525, grad_fn=<MseLossBackward0>)
tensor(0.0101, grad_fn=<MulBackward0>)
tensor(144.2847, grad_fn=<MseLossBackward0>)
tensor(0.0590, grad_fn=<MulBackward0>)
tensor(103.8011, grad_fn=<MseLossBackward0>)
tensor(0.0094, grad_fn=<MulBackward0>)
tensor(97.9280, grad_fn=<MseLossBackward0>)
tensor(0.0072, grad_fn=<MulBackward0>)
tensor(101.3505, grad_fn=<MseLossBackward0>)
tensor(0.0114, grad_fn=<MulBackward0>)
tensor(304.9159, grad_fn=<MseLossBackward0>)
tensor(0.0608, grad_fn=<MulBackward0>)
tensor(105.5610, grad_fn=<MseLossBackward0>)
tensor(0.0107, grad_fn=<MulBackward0>)
tensor(106.7373, grad_fn=<MseLossBackward0>)
tensor(0.0157, grad_fn=<MulBackwar

Epoch 15/50: 33it [00:00, 102.33it/s]

tensor(102.2950, grad_fn=<MseLossBackward0>)
tensor(0.0092, grad_fn=<MulBackward0>)
tensor(102.3996, grad_fn=<MseLossBackward0>)
tensor(0.0072, grad_fn=<MulBackward0>)
tensor(252.3008, grad_fn=<MseLossBackward0>)
tensor(0.0535, grad_fn=<MulBackward0>)
tensor(108.8443, grad_fn=<MseLossBackward0>)
tensor(0.0126, grad_fn=<MulBackward0>)
tensor(101.9522, grad_fn=<MseLossBackward0>)
tensor(0.0079, grad_fn=<MulBackward0>)
tensor(99.8407, grad_fn=<MseLossBackward0>)
tensor(0.0101, grad_fn=<MulBackward0>)
tensor(99.9205, grad_fn=<MseLossBackward0>)
tensor(0.0082, grad_fn=<MulBackward0>)
tensor(102.1805, grad_fn=<MseLossBackward0>)
tensor(0.0065, grad_fn=<MulBackward0>)
tensor(102.0572, grad_fn=<MseLossBackward0>)
tensor(0.0132, grad_fn=<MulBackward0>)
tensor(145.8991, grad_fn=<MseLossBackward0>)
tensor(0.1014, grad_fn=<MulBackward0>)
tensor(382.0997, grad_fn=<MseLossBackward0>)
tensor(0.0602, grad_fn=<MulBackward0>)
tensor(102.1130, grad_fn=<MseLossBackward0>)
tensor(0.0091, grad_fn=<MulBackwa

Epoch 15/50: 45it [00:00, 105.19it/s]

tensor(177.7057, grad_fn=<MseLossBackward0>)
tensor(0.0524, grad_fn=<MulBackward0>)
tensor(114.4233, grad_fn=<MseLossBackward0>)
tensor(0.0179, grad_fn=<MulBackward0>)
tensor(110.3849, grad_fn=<MseLossBackward0>)
tensor(0.0170, grad_fn=<MulBackward0>)
tensor(105.6924, grad_fn=<MseLossBackward0>)
tensor(0.0145, grad_fn=<MulBackward0>)
tensor(101.8396, grad_fn=<MseLossBackward0>)
tensor(0.0116, grad_fn=<MulBackward0>)
tensor(109.3197, grad_fn=<MseLossBackward0>)
tensor(0.0093, grad_fn=<MulBackward0>)
tensor(103.7404, grad_fn=<MseLossBackward0>)
tensor(0.0108, grad_fn=<MulBackward0>)
tensor(108.3372, grad_fn=<MseLossBackward0>)
tensor(0.0181, grad_fn=<MulBackward0>)


Epoch 15/50: 56it [00:00, 96.53it/s] 

tensor(122.9660, grad_fn=<MseLossBackward0>)
tensor(0.0639, grad_fn=<MulBackward0>)
tensor(109.2880, grad_fn=<MseLossBackward0>)
tensor(0.0138, grad_fn=<MulBackward0>)
tensor(102.8929, grad_fn=<MseLossBackward0>)
tensor(0.0092, grad_fn=<MulBackward0>)


Epoch 15/50: 60it [00:00, 99.57it/s]


tensor(100.6411, grad_fn=<MseLossBackward0>)
tensor(0.0048, grad_fn=<MulBackward0>)
tensor(99.1439, grad_fn=<MseLossBackward0>)
tensor(0.0118, grad_fn=<MulBackward0>)
tensor(99.9479, grad_fn=<MseLossBackward0>)
tensor(0.0081, grad_fn=<MulBackward0>)
tensor(102.0237, grad_fn=<MseLossBackward0>)
tensor(0.0118, grad_fn=<MulBackward0>)
Epoch 15, Loss: 142.13871205647786


Epoch 16/50: 0it [00:00, ?it/s]

tensor(108.1836, grad_fn=<MseLossBackward0>)
tensor(0.0077, grad_fn=<MulBackward0>)
tensor(102.9673, grad_fn=<MseLossBackward0>)
tensor(0.0074, grad_fn=<MulBackward0>)
tensor(173.0003, grad_fn=<MseLossBackward0>)
tensor(0.1222, grad_fn=<MulBackward0>)
tensor(99.9565, grad_fn=<MseLossBackward0>)
tensor(0.0072, grad_fn=<MulBackward0>)
tensor(98.6967, grad_fn=<MseLossBackward0>)
tensor(0.0113, grad_fn=<MulBackward0>)
tensor(93.6293, grad_fn=<MseLossBackward0>)
tensor(0.0106, grad_fn=<MulBackward0>)
tensor(310.7157, grad_fn=<MseLossBackward0>)
tensor(0.0512, grad_fn=<MulBackward0>)
tensor(103.5174, grad_fn=<MseLossBackward0>)
tensor(0.0102, grad_fn=<MulBackward0>)


Epoch 16/50: 10it [00:00, 99.32it/s]

tensor(107.4525, grad_fn=<MseLossBackward0>)
tensor(0.0090, grad_fn=<MulBackward0>)
tensor(533.7302, grad_fn=<MseLossBackward0>)
tensor(0.1020, grad_fn=<MulBackward0>)
tensor(97.2648, grad_fn=<MseLossBackward0>)
tensor(0.0071, grad_fn=<MulBackward0>)
tensor(117.6422, grad_fn=<MseLossBackward0>)
tensor(0.0529, grad_fn=<MulBackward0>)
tensor(107.1097, grad_fn=<MseLossBackward0>)
tensor(0.0230, grad_fn=<MulBackward0>)
tensor(381.3362, grad_fn=<MseLossBackward0>)
tensor(0.0546, grad_fn=<MulBackward0>)
tensor(103.9896, grad_fn=<MseLossBackward0>)
tensor(0.0092, grad_fn=<MulBackward0>)
tensor(173.3143, grad_fn=<MseLossBackward0>)
tensor(0.0533, grad_fn=<MulBackward0>)


Epoch 16/50: 20it [00:00, 95.50it/s]

tensor(100.3152, grad_fn=<MseLossBackward0>)
tensor(0.0092, grad_fn=<MulBackward0>)
tensor(106.1120, grad_fn=<MseLossBackward0>)
tensor(0.0068, grad_fn=<MulBackward0>)
tensor(96.1690, grad_fn=<MseLossBackward0>)
tensor(0.0075, grad_fn=<MulBackward0>)
tensor(1126.7144, grad_fn=<MseLossBackward0>)
tensor(0.0533, grad_fn=<MulBackward0>)
tensor(104.8700, grad_fn=<MseLossBackward0>)
tensor(0.0077, grad_fn=<MulBackward0>)
tensor(104.0496, grad_fn=<MseLossBackward0>)
tensor(0.0128, grad_fn=<MulBackward0>)
tensor(103.8655, grad_fn=<MseLossBackward0>)
tensor(0.0070, grad_fn=<MulBackward0>)
tensor(260.0700, grad_fn=<MseLossBackward0>)
tensor(0.0644, grad_fn=<MulBackward0>)
tensor(99.6421, grad_fn=<MseLossBackward0>)
tensor(0.0089, grad_fn=<MulBackward0>)
tensor(95.3643, grad_fn=<MseLossBackward0>)
tensor(0.0067, grad_fn=<MulBackward0>)
tensor(105.5442, grad_fn=<MseLossBackward0>)
tensor(0.0152, grad_fn=<MulBackward0>)
tensor(96.2491, grad_fn=<MseLossBackward0>)
tensor(0.0073, grad_fn=<MulBackwar

Epoch 16/50: 30it [00:00, 97.20it/s]

tensor(98.1753, grad_fn=<MseLossBackward0>)
tensor(0.0083, grad_fn=<MulBackward0>)
tensor(103.5772, grad_fn=<MseLossBackward0>)
tensor(0.0076, grad_fn=<MulBackward0>)
tensor(105.3118, grad_fn=<MseLossBackward0>)
tensor(0.0106, grad_fn=<MulBackward0>)
tensor(101.7812, grad_fn=<MseLossBackward0>)
tensor(0.0097, grad_fn=<MulBackward0>)
tensor(103.5627, grad_fn=<MseLossBackward0>)
tensor(0.0083, grad_fn=<MulBackward0>)
tensor(102.7118, grad_fn=<MseLossBackward0>)
tensor(0.0094, grad_fn=<MulBackward0>)
tensor(103.3841, grad_fn=<MseLossBackward0>)
tensor(0.0110, grad_fn=<MulBackward0>)
tensor(105.9490, grad_fn=<MseLossBackward0>)
tensor(0.0061, grad_fn=<MulBackward0>)
tensor(101.4098, grad_fn=<MseLossBackward0>)
tensor(0.0105, grad_fn=<MulBackward0>)


Epoch 16/50: 41it [00:00, 100.77it/s]

tensor(104.3911, grad_fn=<MseLossBackward0>)
tensor(0.0141, grad_fn=<MulBackward0>)
tensor(107.6739, grad_fn=<MseLossBackward0>)
tensor(0.0088, grad_fn=<MulBackward0>)
tensor(98.9284, grad_fn=<MseLossBackward0>)
tensor(0.0086, grad_fn=<MulBackward0>)
tensor(105.3878, grad_fn=<MseLossBackward0>)
tensor(0.0071, grad_fn=<MulBackward0>)
tensor(105.7993, grad_fn=<MseLossBackward0>)
tensor(0.0197, grad_fn=<MulBackward0>)
tensor(99.4094, grad_fn=<MseLossBackward0>)
tensor(0.0102, grad_fn=<MulBackward0>)
tensor(105.2397, grad_fn=<MseLossBackward0>)
tensor(0.0137, grad_fn=<MulBackward0>)
tensor(113.3098, grad_fn=<MseLossBackward0>)
tensor(0.0098, grad_fn=<MulBackward0>)
tensor(97.2907, grad_fn=<MseLossBackward0>)
tensor(0.0122, grad_fn=<MulBackward0>)
tensor(100.7192, grad_fn=<MseLossBackward0>)
tensor(0.0075, grad_fn=<MulBackward0>)
tensor(102.6109, grad_fn=<MseLossBackward0>)
tensor(0.0089, grad_fn=<MulBackward0>)
tensor(102.3930, grad_fn=<MseLossBackward0>)
tensor(0.0085, grad_fn=<MulBackwar

Epoch 16/50: 52it [00:00, 101.58it/s]

tensor(0.0122, grad_fn=<MulBackward0>)
tensor(101.3338, grad_fn=<MseLossBackward0>)
tensor(0.0105, grad_fn=<MulBackward0>)
tensor(106.2491, grad_fn=<MseLossBackward0>)
tensor(0.0090, grad_fn=<MulBackward0>)
tensor(108.0848, grad_fn=<MseLossBackward0>)
tensor(0.0529, grad_fn=<MulBackward0>)
tensor(103.7524, grad_fn=<MseLossBackward0>)
tensor(0.0095, grad_fn=<MulBackward0>)
tensor(106.7973, grad_fn=<MseLossBackward0>)
tensor(0.0069, grad_fn=<MulBackward0>)
tensor(112.3655, grad_fn=<MseLossBackward0>)
tensor(0.0176, grad_fn=<MulBackward0>)
tensor(99.1499, grad_fn=<MseLossBackward0>)
tensor(0.0098, grad_fn=<MulBackward0>)
tensor(186.5709, grad_fn=<MseLossBackward0>)
tensor(0.0588, grad_fn=<MulBackward0>)
tensor(108.4201, grad_fn=<MseLossBackward0>)
tensor(0.0105, grad_fn=<MulBackward0>)


Epoch 16/50: 60it [00:00, 100.19it/s]


tensor(108.4061, grad_fn=<MseLossBackward0>)
tensor(0.0104, grad_fn=<MulBackward0>)
Epoch 16, Loss: 142.1374673207601


Epoch 17/50: 0it [00:00, ?it/s]

tensor(106.9931, grad_fn=<MseLossBackward0>)
tensor(0.0082, grad_fn=<MulBackward0>)
tensor(103.6023, grad_fn=<MseLossBackward0>)
tensor(0.0106, grad_fn=<MulBackward0>)
tensor(102.1695, grad_fn=<MseLossBackward0>)
tensor(0.0072, grad_fn=<MulBackward0>)
tensor(103.1653, grad_fn=<MseLossBackward0>)
tensor(0.0078, grad_fn=<MulBackward0>)
tensor(102.0018, grad_fn=<MseLossBackward0>)
tensor(0.0100, grad_fn=<MulBackward0>)
tensor(103.1288, grad_fn=<MseLossBackward0>)
tensor(0.0071, grad_fn=<MulBackward0>)
tensor(108.2941, grad_fn=<MseLossBackward0>)
tensor(0.0085, grad_fn=<MulBackward0>)
tensor(104.2380, grad_fn=<MseLossBackward0>)
tensor(0.0108, grad_fn=<MulBackward0>)
tensor(181.9751, grad_fn=<MseLossBackward0>)
tensor(0.0553, grad_fn=<MulBackward0>)


Epoch 17/50: 11it [00:00, 101.97it/s]

tensor(104.4762, grad_fn=<MseLossBackward0>)
tensor(0.0102, grad_fn=<MulBackward0>)
tensor(503.3855, grad_fn=<MseLossBackward0>)
tensor(0.0531, grad_fn=<MulBackward0>)
tensor(1132.8658, grad_fn=<MseLossBackward0>)
tensor(0.0517, grad_fn=<MulBackward0>)
tensor(104.4418, grad_fn=<MseLossBackward0>)
tensor(0.0121, grad_fn=<MulBackward0>)
tensor(117.5502, grad_fn=<MseLossBackward0>)
tensor(0.0533, grad_fn=<MulBackward0>)
tensor(104.1645, grad_fn=<MseLossBackward0>)
tensor(0.0071, grad_fn=<MulBackward0>)
tensor(97.1484, grad_fn=<MseLossBackward0>)
tensor(0.0099, grad_fn=<MulBackward0>)
tensor(102.0549, grad_fn=<MseLossBackward0>)
tensor(0.0102, grad_fn=<MulBackward0>)
tensor(116.1408, grad_fn=<MseLossBackward0>)
tensor(0.0509, grad_fn=<MulBackward0>)
tensor(98.7670, grad_fn=<MseLossBackward0>)
tensor(0.0076, grad_fn=<MulBackward0>)
tensor(180.0280, grad_fn=<MseLossBackward0>)
tensor(0.0514, grad_fn=<MulBackward0>)
tensor(106.5150, grad_fn=<MseLossBackward0>)
tensor(0.0083, grad_fn=<MulBackw

Epoch 17/50: 22it [00:00, 101.02it/s]

tensor(103.8802, grad_fn=<MseLossBackward0>)
tensor(0.0070, grad_fn=<MulBackward0>)
tensor(98.2375, grad_fn=<MseLossBackward0>)
tensor(0.0199, grad_fn=<MulBackward0>)
tensor(101.8697, grad_fn=<MseLossBackward0>)
tensor(0.0084, grad_fn=<MulBackward0>)
tensor(99.9184, grad_fn=<MseLossBackward0>)
tensor(0.0076, grad_fn=<MulBackward0>)
tensor(98.8664, grad_fn=<MseLossBackward0>)
tensor(0.0082, grad_fn=<MulBackward0>)
tensor(106.5637, grad_fn=<MseLossBackward0>)
tensor(0.0061, grad_fn=<MulBackward0>)
tensor(99.0960, grad_fn=<MseLossBackward0>)
tensor(0.0079, grad_fn=<MulBackward0>)
tensor(109.8557, grad_fn=<MseLossBackward0>)
tensor(0.0067, grad_fn=<MulBackward0>)


Epoch 17/50: 33it [00:00, 100.80it/s]

tensor(103.3594, grad_fn=<MseLossBackward0>)
tensor(0.0090, grad_fn=<MulBackward0>)
tensor(111.4086, grad_fn=<MseLossBackward0>)
tensor(0.0103, grad_fn=<MulBackward0>)
tensor(107.9798, grad_fn=<MseLossBackward0>)
tensor(0.0182, grad_fn=<MulBackward0>)
tensor(128.5440, grad_fn=<MseLossBackward0>)
tensor(0.0801, grad_fn=<MulBackward0>)
tensor(99.4812, grad_fn=<MseLossBackward0>)
tensor(0.0100, grad_fn=<MulBackward0>)
tensor(309.5465, grad_fn=<MseLossBackward0>)
tensor(0.0510, grad_fn=<MulBackward0>)
tensor(99.0731, grad_fn=<MseLossBackward0>)
tensor(0.0084, grad_fn=<MulBackward0>)
tensor(96.0812, grad_fn=<MseLossBackward0>)
tensor(0.0081, grad_fn=<MulBackward0>)
tensor(99.4234, grad_fn=<MseLossBackward0>)
tensor(0.0074, grad_fn=<MulBackward0>)
tensor(106.8552, grad_fn=<MseLossBackward0>)
tensor(0.0081, grad_fn=<MulBackward0>)
tensor(102.8796, grad_fn=<MseLossBackward0>)
tensor(0.0088, grad_fn=<MulBackward0>)
tensor(101.6736, grad_fn=<MseLossBackward0>)
tensor(0.0131, grad_fn=<MulBackward

Epoch 17/50: 44it [00:00, 102.25it/s]

tensor(150.9557, grad_fn=<MseLossBackward0>)
tensor(0.0502, grad_fn=<MulBackward0>)
tensor(105.8232, grad_fn=<MseLossBackward0>)
tensor(0.0060, grad_fn=<MulBackward0>)
tensor(101.1271, grad_fn=<MseLossBackward0>)
tensor(0.0074, grad_fn=<MulBackward0>)
tensor(105.7155, grad_fn=<MseLossBackward0>)
tensor(0.0113, grad_fn=<MulBackward0>)
tensor(103.7713, grad_fn=<MseLossBackward0>)
tensor(0.0085, grad_fn=<MulBackward0>)
tensor(106.6929, grad_fn=<MseLossBackward0>)
tensor(0.0086, grad_fn=<MulBackward0>)
tensor(105.5560, grad_fn=<MseLossBackward0>)
tensor(0.0113, grad_fn=<MulBackward0>)
tensor(99.7083, grad_fn=<MseLossBackward0>)


Epoch 17/50: 55it [00:00, 102.18it/s]

tensor(0.0093, grad_fn=<MulBackward0>)
tensor(101.5846, grad_fn=<MseLossBackward0>)
tensor(0.0068, grad_fn=<MulBackward0>)
tensor(99.2362, grad_fn=<MseLossBackward0>)
tensor(0.0058, grad_fn=<MulBackward0>)
tensor(101.8860, grad_fn=<MseLossBackward0>)
tensor(0.0063, grad_fn=<MulBackward0>)
tensor(108.7700, grad_fn=<MseLossBackward0>)
tensor(0.0101, grad_fn=<MulBackward0>)
tensor(102.8977, grad_fn=<MseLossBackward0>)
tensor(0.0076, grad_fn=<MulBackward0>)
tensor(541.6782, grad_fn=<MseLossBackward0>)
tensor(0.0932, grad_fn=<MulBackward0>)


Epoch 17/50: 60it [00:00, 98.50it/s] 


tensor(102.5762, grad_fn=<MseLossBackward0>)
tensor(0.0107, grad_fn=<MulBackward0>)
tensor(98.5601, grad_fn=<MseLossBackward0>)
tensor(0.0065, grad_fn=<MulBackward0>)
tensor(89.4014, grad_fn=<MseLossBackward0>)
tensor(0.0078, grad_fn=<MulBackward0>)
Epoch 17, Loss: 142.1360248565674


Epoch 18/50: 0it [00:00, ?it/s]

tensor(105.3448, grad_fn=<MseLossBackward0>)
tensor(0.0080, grad_fn=<MulBackward0>)
tensor(307.2662, grad_fn=<MseLossBackward0>)
tensor(0.0549, grad_fn=<MulBackward0>)
tensor(96.5810, grad_fn=<MseLossBackward0>)
tensor(0.0073, grad_fn=<MulBackward0>)
tensor(100.5887, grad_fn=<MseLossBackward0>)
tensor(0.0081, grad_fn=<MulBackward0>)
tensor(101.9579, grad_fn=<MseLossBackward0>)
tensor(0.0056, grad_fn=<MulBackward0>)
tensor(104.4449, grad_fn=<MseLossBackward0>)
tensor(0.0069, grad_fn=<MulBackward0>)
tensor(103.6630, grad_fn=<MseLossBackward0>)
tensor(0.0147, grad_fn=<MulBackward0>)
tensor(98.6702, grad_fn=<MseLossBackward0>)
tensor(0.0076, grad_fn=<MulBackward0>)
tensor(103.2958, grad_fn=<MseLossBackward0>)
tensor(0.0096, grad_fn=<MulBackward0>)


Epoch 18/50: 11it [00:00, 103.26it/s]

tensor(102.2694, grad_fn=<MseLossBackward0>)
tensor(0.0068, grad_fn=<MulBackward0>)
tensor(107.8439, grad_fn=<MseLossBackward0>)
tensor(0.0103, grad_fn=<MulBackward0>)
tensor(127.3386, grad_fn=<MseLossBackward0>)
tensor(0.0329, grad_fn=<MulBackward0>)
tensor(101.5189, grad_fn=<MseLossBackward0>)
tensor(0.0100, grad_fn=<MulBackward0>)
tensor(94.9210, grad_fn=<MseLossBackward0>)
tensor(0.0070, grad_fn=<MulBackward0>)
tensor(103.1060, grad_fn=<MseLossBackward0>)
tensor(0.0073, grad_fn=<MulBackward0>)
tensor(95.6031, grad_fn=<MseLossBackward0>)
tensor(0.0069, grad_fn=<MulBackward0>)
tensor(107.6251, grad_fn=<MseLossBackward0>)
tensor(0.0180, grad_fn=<MulBackward0>)
tensor(101.0625, grad_fn=<MseLossBackward0>)


Epoch 18/50: 22it [00:00, 106.21it/s]

tensor(0.0097, grad_fn=<MulBackward0>)
tensor(103.1761, grad_fn=<MseLossBackward0>)
tensor(0.0060, grad_fn=<MulBackward0>)
tensor(103.9706, grad_fn=<MseLossBackward0>)
tensor(0.0056, grad_fn=<MulBackward0>)
tensor(129.1834, grad_fn=<MseLossBackward0>)
tensor(0.0903, grad_fn=<MulBackward0>)
tensor(104.7194, grad_fn=<MseLossBackward0>)
tensor(0.0100, grad_fn=<MulBackward0>)
tensor(108.4982, grad_fn=<MseLossBackward0>)
tensor(0.0195, grad_fn=<MulBackward0>)
tensor(103.1134, grad_fn=<MseLossBackward0>)
tensor(0.0089, grad_fn=<MulBackward0>)
tensor(104.2146, grad_fn=<MseLossBackward0>)
tensor(0.0064, grad_fn=<MulBackward0>)
tensor(101.7536, grad_fn=<MseLossBackward0>)
tensor(0.0069, grad_fn=<MulBackward0>)
tensor(106.5738, grad_fn=<MseLossBackward0>)
tensor(0.0077, grad_fn=<MulBackward0>)
tensor(503.6707, grad_fn=<MseLossBackward0>)
tensor(0.0473, grad_fn=<MulBackward0>)
tensor(302.0215, grad_fn=<MseLossBackward0>)
tensor(0.0911, grad_fn=<MulBackward0>)
tensor(101.3862, grad_fn=<MseLossBack

Epoch 18/50: 33it [00:00, 103.65it/s]

tensor(103.4159, grad_fn=<MseLossBackward0>)
tensor(0.0100, grad_fn=<MulBackward0>)
tensor(104.1310, grad_fn=<MseLossBackward0>)
tensor(0.0077, grad_fn=<MulBackward0>)
tensor(102.7126, grad_fn=<MseLossBackward0>)
tensor(0.0072, grad_fn=<MulBackward0>)
tensor(106.0537, grad_fn=<MseLossBackward0>)
tensor(0.0056, grad_fn=<MulBackward0>)
tensor(103.2168, grad_fn=<MseLossBackward0>)
tensor(0.0054, grad_fn=<MulBackward0>)
tensor(127.0557, grad_fn=<MseLossBackward0>)
tensor(0.0514, grad_fn=<MulBackward0>)
tensor(129.6839, grad_fn=<MseLossBackward0>)
tensor(0.0480, grad_fn=<MulBackward0>)


Epoch 18/50: 44it [00:00, 105.52it/s]

tensor(101.7180, grad_fn=<MseLossBackward0>)
tensor(0.0054, grad_fn=<MulBackward0>)
tensor(166.5054, grad_fn=<MseLossBackward0>)
tensor(0.0505, grad_fn=<MulBackward0>)
tensor(180.6008, grad_fn=<MseLossBackward0>)
tensor(0.0480, grad_fn=<MulBackward0>)
tensor(99.4857, grad_fn=<MseLossBackward0>)
tensor(0.0120, grad_fn=<MulBackward0>)
tensor(107.4326, grad_fn=<MseLossBackward0>)
tensor(0.0066, grad_fn=<MulBackward0>)
tensor(104.4168, grad_fn=<MseLossBackward0>)
tensor(0.0144, grad_fn=<MulBackward0>)
tensor(98.5709, grad_fn=<MseLossBackward0>)
tensor(0.0063, grad_fn=<MulBackward0>)
tensor(1130.1799, grad_fn=<MseLossBackward0>)
tensor(0.0466, grad_fn=<MulBackward0>)
tensor(104.5438, grad_fn=<MseLossBackward0>)
tensor(0.0069, grad_fn=<MulBackward0>)
tensor(95.1395, grad_fn=<MseLossBackward0>)
tensor(0.0062, grad_fn=<MulBackward0>)
tensor(107.5633, grad_fn=<MseLossBackward0>)
tensor(0.0115, grad_fn=<MulBackward0>)
tensor(97.8919, grad_fn=<MseLossBackward0>)
tensor(0.0108, grad_fn=<MulBackwar

Epoch 18/50: 55it [00:00, 104.22it/s]

tensor(99.3463, grad_fn=<MseLossBackward0>)
tensor(0.0085, grad_fn=<MulBackward0>)
tensor(102.3087, grad_fn=<MseLossBackward0>)
tensor(0.0074, grad_fn=<MulBackward0>)
tensor(111.6640, grad_fn=<MseLossBackward0>)
tensor(0.0097, grad_fn=<MulBackward0>)
tensor(106.1823, grad_fn=<MseLossBackward0>)
tensor(0.0086, grad_fn=<MulBackward0>)
tensor(109.3872, grad_fn=<MseLossBackward0>)
tensor(0.0075, grad_fn=<MulBackward0>)
tensor(101.9356, grad_fn=<MseLossBackward0>)
tensor(0.0061, grad_fn=<MulBackward0>)
tensor(102.8920, grad_fn=<MseLossBackward0>)
tensor(0.0068, grad_fn=<MulBackward0>)
tensor(99.3797, grad_fn=<MseLossBackward0>)
tensor(0.0066, grad_fn=<MulBackward0>)


Epoch 18/50: 60it [00:00, 104.24it/s]


Epoch 18, Loss: 142.13519096374512


Epoch 19/50: 0it [00:00, ?it/s]

tensor(104.3089, grad_fn=<MseLossBackward0>)
tensor(0.0085, grad_fn=<MulBackward0>)
tensor(100.6171, grad_fn=<MseLossBackward0>)
tensor(0.0075, grad_fn=<MulBackward0>)
tensor(107.4841, grad_fn=<MseLossBackward0>)
tensor(0.0058, grad_fn=<MulBackward0>)
tensor(104.2863, grad_fn=<MseLossBackward0>)
tensor(0.0072, grad_fn=<MulBackward0>)
tensor(104.1746, grad_fn=<MseLossBackward0>)
tensor(0.0059, grad_fn=<MulBackward0>)
tensor(109.4015, grad_fn=<MseLossBackward0>)
tensor(0.0074, grad_fn=<MulBackward0>)
tensor(104.1896, grad_fn=<MseLossBackward0>)
tensor(0.0053, grad_fn=<MulBackward0>)


Epoch 19/50: 12it [00:00, 113.89it/s]

tensor(125.0600, grad_fn=<MseLossBackward0>)
tensor(0.0609, grad_fn=<MulBackward0>)
tensor(99.1790, grad_fn=<MseLossBackward0>)
tensor(0.0077, grad_fn=<MulBackward0>)
tensor(92.4602, grad_fn=<MseLossBackward0>)
tensor(0.0065, grad_fn=<MulBackward0>)
tensor(98.6602, grad_fn=<MseLossBackward0>)
tensor(0.0071, grad_fn=<MulBackward0>)
tensor(108.9185, grad_fn=<MseLossBackward0>)
tensor(0.0049, grad_fn=<MulBackward0>)
tensor(120.1357, grad_fn=<MseLossBackward0>)
tensor(0.0461, grad_fn=<MulBackward0>)
tensor(100.2324, grad_fn=<MseLossBackward0>)
tensor(0.0070, grad_fn=<MulBackward0>)
tensor(270.1400, grad_fn=<MseLossBackward0>)
tensor(0.0896, grad_fn=<MulBackward0>)
tensor(91.9404, grad_fn=<MseLossBackward0>)
tensor(0.0072, grad_fn=<MulBackward0>)
tensor(128.2073, grad_fn=<MseLossBackward0>)
tensor(0.0457, grad_fn=<MulBackward0>)
tensor(94.6639, grad_fn=<MseLossBackward0>)
tensor(0.0076, grad_fn=<MulBackward0>)
tensor(104.9763, grad_fn=<MseLossBackward0>)
tensor(0.0067, grad_fn=<MulBackward0

Epoch 19/50: 24it [00:00, 103.67it/s]

tensor(98.2433, grad_fn=<MseLossBackward0>)
tensor(0.0060, grad_fn=<MulBackward0>)
tensor(103.7886, grad_fn=<MseLossBackward0>)
tensor(0.0060, grad_fn=<MulBackward0>)
tensor(105.6118, grad_fn=<MseLossBackward0>)
tensor(0.0079, grad_fn=<MulBackward0>)
tensor(97.6149, grad_fn=<MseLossBackward0>)
tensor(0.0065, grad_fn=<MulBackward0>)
tensor(186.7205, grad_fn=<MseLossBackward0>)
tensor(0.0500, grad_fn=<MulBackward0>)
tensor(105.1361, grad_fn=<MseLossBackward0>)
tensor(0.0063, grad_fn=<MulBackward0>)
tensor(103.9626, grad_fn=<MseLossBackward0>)
tensor(0.0105, grad_fn=<MulBackward0>)
tensor(300.3227, grad_fn=<MseLossBackward0>)
tensor(0.0483, grad_fn=<MulBackward0>)
tensor(96.8722, grad_fn=<MseLossBackward0>)
tensor(0.0050, grad_fn=<MulBackward0>)
tensor(103.3567, grad_fn=<MseLossBackward0>)
tensor(0.0079, grad_fn=<MulBackward0>)
tensor(106.4519, grad_fn=<MseLossBackward0>)
tensor(0.0109, grad_fn=<MulBackward0>)
tensor(105.1978, grad_fn=<MseLossBackward0>)
tensor(0.0077, grad_fn=<MulBackwar

Epoch 19/50: 36it [00:00, 107.71it/s]

tensor(102.5423, grad_fn=<MseLossBackward0>)
tensor(0.0090, grad_fn=<MulBackward0>)
tensor(93.9603, grad_fn=<MseLossBackward0>)
tensor(0.0046, grad_fn=<MulBackward0>)
tensor(107.2700, grad_fn=<MseLossBackward0>)
tensor(0.0053, grad_fn=<MulBackward0>)
tensor(105.6683, grad_fn=<MseLossBackward0>)
tensor(0.0097, grad_fn=<MulBackward0>)
tensor(172.2236, grad_fn=<MseLossBackward0>)
tensor(0.0451, grad_fn=<MulBackward0>)
tensor(98.9512, grad_fn=<MseLossBackward0>)
tensor(0.0093, grad_fn=<MulBackward0>)
tensor(101.6115, grad_fn=<MseLossBackward0>)
tensor(0.0089, grad_fn=<MulBackward0>)
tensor(508.3038, grad_fn=<MseLossBackward0>)
tensor(0.0479, grad_fn=<MulBackward0>)
tensor(105.0772, grad_fn=<MseLossBackward0>)


Epoch 19/50: 47it [00:00, 99.72it/s] 

tensor(0.0056, grad_fn=<MulBackward0>)
tensor(103.3505, grad_fn=<MseLossBackward0>)
tensor(0.0060, grad_fn=<MulBackward0>)
tensor(97.2731, grad_fn=<MseLossBackward0>)
tensor(0.0112, grad_fn=<MulBackward0>)
tensor(112.7267, grad_fn=<MseLossBackward0>)
tensor(0.0256, grad_fn=<MulBackward0>)
tensor(102.1650, grad_fn=<MseLossBackward0>)
tensor(0.0062, grad_fn=<MulBackward0>)
tensor(105.2085, grad_fn=<MseLossBackward0>)
tensor(0.0059, grad_fn=<MulBackward0>)
tensor(103.0958, grad_fn=<MseLossBackward0>)
tensor(0.0052, grad_fn=<MulBackward0>)
tensor(102.0920, grad_fn=<MseLossBackward0>)
tensor(0.0063, grad_fn=<MulBackward0>)
tensor(105.0577, grad_fn=<MseLossBackward0>)
tensor(0.0080, grad_fn=<MulBackward0>)
tensor(98.4885, grad_fn=<MseLossBackward0>)
tensor(0.0092, grad_fn=<MulBackward0>)
tensor(107.0154, grad_fn=<MseLossBackward0>)
tensor(0.0097, grad_fn=<MulBackward0>)
tensor(103.1752, grad_fn=<MseLossBackward0>)
tensor(0.0155, grad_fn=<MulBackward0>)
tensor(1134.1473, grad_fn=<MseLossBackw

Epoch 19/50: 60it [00:00, 102.68it/s]


tensor(110.4093, grad_fn=<MseLossBackward0>)
tensor(0.0082, grad_fn=<MulBackward0>)
tensor(101.5579, grad_fn=<MseLossBackward0>)
tensor(0.0124, grad_fn=<MulBackward0>)
tensor(111.2538, grad_fn=<MseLossBackward0>)
tensor(0.0077, grad_fn=<MulBackward0>)
tensor(106.0057, grad_fn=<MseLossBackward0>)
tensor(0.0047, grad_fn=<MulBackward0>)
Epoch 19, Loss: 142.13404324849446


Epoch 20/50: 0it [00:00, ?it/s]

tensor(102.3143, grad_fn=<MseLossBackward0>)
tensor(0.0069, grad_fn=<MulBackward0>)
tensor(102.4481, grad_fn=<MseLossBackward0>)
tensor(0.0038, grad_fn=<MulBackward0>)
tensor(101.9192, grad_fn=<MseLossBackward0>)
tensor(0.0062, grad_fn=<MulBackward0>)
tensor(335.2971, grad_fn=<MseLossBackward0>)
tensor(0.0859, grad_fn=<MulBackward0>)
tensor(109.9457, grad_fn=<MseLossBackward0>)
tensor(0.0044, grad_fn=<MulBackward0>)
tensor(99.9524, grad_fn=<MseLossBackward0>)
tensor(0.0052, grad_fn=<MulBackward0>)
tensor(104.8074, grad_fn=<MseLossBackward0>)
tensor(0.0115, grad_fn=<MulBackward0>)
tensor(107.3261, grad_fn=<MseLossBackward0>)
tensor(0.0060, grad_fn=<MulBackward0>)
tensor(103.3128, grad_fn=<MseLossBackward0>)
tensor(0.0080, grad_fn=<MulBackward0>)


Epoch 20/50: 11it [00:00, 101.87it/s]

tensor(102.4565, grad_fn=<MseLossBackward0>)
tensor(0.0053, grad_fn=<MulBackward0>)
tensor(103.2602, grad_fn=<MseLossBackward0>)
tensor(0.0096, grad_fn=<MulBackward0>)
tensor(105.3220, grad_fn=<MseLossBackward0>)
tensor(0.0078, grad_fn=<MulBackward0>)
tensor(96.4188, grad_fn=<MseLossBackward0>)
tensor(0.0056, grad_fn=<MulBackward0>)
tensor(172.5139, grad_fn=<MseLossBackward0>)
tensor(0.0491, grad_fn=<MulBackward0>)
tensor(97.3052, grad_fn=<MseLossBackward0>)
tensor(0.0140, grad_fn=<MulBackward0>)
tensor(106.1408, grad_fn=<MseLossBackward0>)
tensor(0.0055, grad_fn=<MulBackward0>)
tensor(100.3674, grad_fn=<MseLossBackward0>)
tensor(0.0047, grad_fn=<MulBackward0>)
tensor(102.5620, grad_fn=<MseLossBackward0>)
tensor(0.0075, grad_fn=<MulBackward0>)
tensor(110.0946, grad_fn=<MseLossBackward0>)
tensor(0.0154, grad_fn=<MulBackward0>)
tensor(105.1174, grad_fn=<MseLossBackward0>)
tensor(0.0091, grad_fn=<MulBackward0>)
tensor(104.5118, grad_fn=<MseLossBackward0>)
tensor(0.0049, grad_fn=<MulBackwa

Epoch 20/50: 22it [00:00, 104.34it/s]

tensor(97.6376, grad_fn=<MseLossBackward0>)
tensor(0.0071, grad_fn=<MulBackward0>)
tensor(96.1682, grad_fn=<MseLossBackward0>)
tensor(0.0063, grad_fn=<MulBackward0>)
tensor(104.6867, grad_fn=<MseLossBackward0>)
tensor(0.0089, grad_fn=<MulBackward0>)
tensor(99.6464, grad_fn=<MseLossBackward0>)
tensor(0.0076, grad_fn=<MulBackward0>)
tensor(104.5355, grad_fn=<MseLossBackward0>)
tensor(0.0061, grad_fn=<MulBackward0>)
tensor(118.7124, grad_fn=<MseLossBackward0>)
tensor(0.0451, grad_fn=<MulBackward0>)
tensor(104.7729, grad_fn=<MseLossBackward0>)
tensor(0.0067, grad_fn=<MulBackward0>)
tensor(107.7353, grad_fn=<MseLossBackward0>)
tensor(0.0063, grad_fn=<MulBackward0>)
tensor(183.2017, grad_fn=<MseLossBackward0>)
tensor(0.0459, grad_fn=<MulBackward0>)
tensor(165.7127, grad_fn=<MseLossBackward0>)
tensor(0.0852, grad_fn=<MulBackward0>)


Epoch 20/50: 33it [00:00, 104.07it/s]

tensor(97.2300, grad_fn=<MseLossBackward0>)
tensor(0.0074, grad_fn=<MulBackward0>)
tensor(103.4870, grad_fn=<MseLossBackward0>)
tensor(0.0053, grad_fn=<MulBackward0>)
tensor(106.5621, grad_fn=<MseLossBackward0>)
tensor(0.0066, grad_fn=<MulBackward0>)
tensor(108.7848, grad_fn=<MseLossBackward0>)
tensor(0.0055, grad_fn=<MulBackward0>)
tensor(98.3076, grad_fn=<MseLossBackward0>)
tensor(0.0056, grad_fn=<MulBackward0>)
tensor(109.5495, grad_fn=<MseLossBackward0>)
tensor(0.0094, grad_fn=<MulBackward0>)
tensor(102.4840, grad_fn=<MseLossBackward0>)
tensor(0.0064, grad_fn=<MulBackward0>)
tensor(99.9673, grad_fn=<MseLossBackward0>)
tensor(0.0060, grad_fn=<MulBackward0>)
tensor(101.4321, grad_fn=<MseLossBackward0>)
tensor(0.0066, grad_fn=<MulBackward0>)
tensor(105.9102, grad_fn=<MseLossBackward0>)
tensor(0.0074, grad_fn=<MulBackward0>)
tensor(100.9316, grad_fn=<MseLossBackward0>)
tensor(0.0053, grad_fn=<MulBackward0>)
tensor(99.5469, grad_fn=<MseLossBackward0>)
tensor(0.0053, grad_fn=<MulBackward

Epoch 20/50: 44it [00:00, 105.25it/s]

tensor(113.9108, grad_fn=<MseLossBackward0>)
tensor(0.0470, grad_fn=<MulBackward0>)
tensor(513.2989, grad_fn=<MseLossBackward0>)
tensor(0.0460, grad_fn=<MulBackward0>)
tensor(269.4026, grad_fn=<MseLossBackward0>)
tensor(0.0633, grad_fn=<MulBackward0>)
tensor(1126.9769, grad_fn=<MseLossBackward0>)
tensor(0.0457, grad_fn=<MulBackward0>)
tensor(104.3585, grad_fn=<MseLossBackward0>)
tensor(0.0060, grad_fn=<MulBackward0>)
tensor(107.5856, grad_fn=<MseLossBackward0>)
tensor(0.0097, grad_fn=<MulBackward0>)
tensor(99.0387, grad_fn=<MseLossBackward0>)
tensor(0.0050, grad_fn=<MulBackward0>)
tensor(105.1409, grad_fn=<MseLossBackward0>)
tensor(0.0086, grad_fn=<MulBackward0>)
tensor(100.7113, grad_fn=<MseLossBackward0>)
tensor(0.0068, grad_fn=<MulBackward0>)
tensor(374.9394, grad_fn=<MseLossBackward0>)
tensor(0.0443, grad_fn=<MulBackward0>)


Epoch 20/50: 55it [00:00, 104.02it/s]

tensor(93.3182, grad_fn=<MseLossBackward0>)
tensor(0.0052, grad_fn=<MulBackward0>)
tensor(108.9967, grad_fn=<MseLossBackward0>)
tensor(0.0085, grad_fn=<MulBackward0>)
tensor(99.8818, grad_fn=<MseLossBackward0>)
tensor(0.0053, grad_fn=<MulBackward0>)
tensor(103.8209, grad_fn=<MseLossBackward0>)
tensor(0.0047, grad_fn=<MulBackward0>)
tensor(103.2404, grad_fn=<MseLossBackward0>)
tensor(0.0051, grad_fn=<MulBackward0>)


Epoch 20/50: 60it [00:00, 102.75it/s]

tensor(111.4545, grad_fn=<MseLossBackward0>)
tensor(0.0192, grad_fn=<MulBackward0>)
tensor(100.6073, grad_fn=<MseLossBackward0>)
tensor(0.0051, grad_fn=<MulBackward0>)





Epoch 20, Loss: 142.1332219441732


Epoch 21/50: 0it [00:00, ?it/s]

tensor(105.3157, grad_fn=<MseLossBackward0>)
tensor(0.0061, grad_fn=<MulBackward0>)
tensor(101.1276, grad_fn=<MseLossBackward0>)
tensor(0.0065, grad_fn=<MulBackward0>)
tensor(100.2793, grad_fn=<MseLossBackward0>)
tensor(0.0052, grad_fn=<MulBackward0>)
tensor(103.9719, grad_fn=<MseLossBackward0>)
tensor(0.0051, grad_fn=<MulBackward0>)
tensor(120.5662, grad_fn=<MseLossBackward0>)
tensor(0.0435, grad_fn=<MulBackward0>)
tensor(98.9711, grad_fn=<MseLossBackward0>)
tensor(0.0066, grad_fn=<MulBackward0>)
tensor(101.6521, grad_fn=<MseLossBackward0>)
tensor(0.0063, grad_fn=<MulBackward0>)


Epoch 21/50: 11it [00:00, 103.75it/s]

tensor(1135.6952, grad_fn=<MseLossBackward0>)
tensor(0.0435, grad_fn=<MulBackward0>)
tensor(95.9622, grad_fn=<MseLossBackward0>)
tensor(0.0067, grad_fn=<MulBackward0>)
tensor(101.0348, grad_fn=<MseLossBackward0>)
tensor(0.0053, grad_fn=<MulBackward0>)
tensor(107.2652, grad_fn=<MseLossBackward0>)
tensor(0.0071, grad_fn=<MulBackward0>)
tensor(102.1183, grad_fn=<MseLossBackward0>)
tensor(0.0054, grad_fn=<MulBackward0>)
tensor(101.5704, grad_fn=<MseLossBackward0>)
tensor(0.0116, grad_fn=<MulBackward0>)
tensor(108.0245, grad_fn=<MseLossBackward0>)
tensor(0.0052, grad_fn=<MulBackward0>)
tensor(399.3697, grad_fn=<MseLossBackward0>)
tensor(0.0845, grad_fn=<MulBackward0>)
tensor(101.1357, grad_fn=<MseLossBackward0>)
tensor(0.0059, grad_fn=<MulBackward0>)
tensor(103.0197, grad_fn=<MseLossBackward0>)
tensor(0.0067, grad_fn=<MulBackward0>)
tensor(104.5085, grad_fn=<MseLossBackward0>)
tensor(0.0040, grad_fn=<MulBackward0>)
tensor(97.4365, grad_fn=<MseLossBackward0>)
tensor(0.0043, grad_fn=<MulBackw

Epoch 21/50: 22it [00:00, 96.66it/s] 

tensor(114.1209, grad_fn=<MseLossBackward0>)
tensor(0.0172, grad_fn=<MulBackward0>)
tensor(128.2321, grad_fn=<MseLossBackward0>)
tensor(0.0436, grad_fn=<MulBackward0>)
tensor(103.8269, grad_fn=<MseLossBackward0>)
tensor(0.0065, grad_fn=<MulBackward0>)
tensor(100.5399, grad_fn=<MseLossBackward0>)
tensor(0.0069, grad_fn=<MulBackward0>)
tensor(102.4715, grad_fn=<MseLossBackward0>)
tensor(0.0072, grad_fn=<MulBackward0>)
tensor(107.2246, grad_fn=<MseLossBackward0>)
tensor(0.0064, grad_fn=<MulBackward0>)
tensor(104.6435, grad_fn=<MseLossBackward0>)
tensor(0.0055, grad_fn=<MulBackward0>)
tensor(99.5828, grad_fn=<MseLossBackward0>)
tensor(0.0062, grad_fn=<MulBackward0>)
tensor(107.3406, grad_fn=<MseLossBackward0>)
tensor(0.0074, grad_fn=<MulBackward0>)
tensor(104.7870, grad_fn=<MseLossBackward0>)
tensor(0.0079, grad_fn=<MulBackward0>)
tensor(119.5821, grad_fn=<MseLossBackward0>)
tensor(0.0200, grad_fn=<MulBackward0>)


Epoch 21/50: 33it [00:00, 99.01it/s]

tensor(99.2839, grad_fn=<MseLossBackward0>)
tensor(0.0063, grad_fn=<MulBackward0>)
tensor(103.8241, grad_fn=<MseLossBackward0>)
tensor(0.0053, grad_fn=<MulBackward0>)
tensor(93.7539, grad_fn=<MseLossBackward0>)
tensor(0.0052, grad_fn=<MulBackward0>)
tensor(99.4872, grad_fn=<MseLossBackward0>)
tensor(0.0056, grad_fn=<MulBackward0>)
tensor(311.8076, grad_fn=<MseLossBackward0>)
tensor(0.0578, grad_fn=<MulBackward0>)
tensor(93.6556, grad_fn=<MseLossBackward0>)
tensor(0.0045, grad_fn=<MulBackward0>)
tensor(97.1950, grad_fn=<MseLossBackward0>)
tensor(0.0060, grad_fn=<MulBackward0>)
tensor(108.7283, grad_fn=<MseLossBackward0>)
tensor(0.0056, grad_fn=<MulBackward0>)
tensor(104.3608, grad_fn=<MseLossBackward0>)
tensor(0.0057, grad_fn=<MulBackward0>)
tensor(104.5602, grad_fn=<MseLossBackward0>)
tensor(0.0086, grad_fn=<MulBackward0>)


Epoch 21/50: 43it [00:00, 98.81it/s]

tensor(101.7879, grad_fn=<MseLossBackward0>)
tensor(0.0072, grad_fn=<MulBackward0>)
tensor(105.8484, grad_fn=<MseLossBackward0>)
tensor(0.0061, grad_fn=<MulBackward0>)
tensor(107.9133, grad_fn=<MseLossBackward0>)
tensor(0.0063, grad_fn=<MulBackward0>)
tensor(106.4612, grad_fn=<MseLossBackward0>)
tensor(0.0058, grad_fn=<MulBackward0>)
tensor(97.6259, grad_fn=<MseLossBackward0>)
tensor(0.0056, grad_fn=<MulBackward0>)
tensor(227.7472, grad_fn=<MseLossBackward0>)
tensor(0.0819, grad_fn=<MulBackward0>)
tensor(114.2871, grad_fn=<MseLossBackward0>)
tensor(0.0074, grad_fn=<MulBackward0>)
tensor(103.4708, grad_fn=<MseLossBackward0>)
tensor(0.0144, grad_fn=<MulBackward0>)
tensor(100.1788, grad_fn=<MseLossBackward0>)
tensor(0.0052, grad_fn=<MulBackward0>)
tensor(505.2039, grad_fn=<MseLossBackward0>)
tensor(0.0427, grad_fn=<MulBackward0>)
tensor(94.9459, grad_fn=<MseLossBackward0>)
tensor(0.0043, grad_fn=<MulBackward0>)


Epoch 21/50: 54it [00:00, 100.46it/s]

tensor(187.4851, grad_fn=<MseLossBackward0>)
tensor(0.0796, grad_fn=<MulBackward0>)
tensor(102.0464, grad_fn=<MseLossBackward0>)
tensor(0.0066, grad_fn=<MulBackward0>)
tensor(104.4802, grad_fn=<MseLossBackward0>)
tensor(0.0048, grad_fn=<MulBackward0>)
tensor(101.3532, grad_fn=<MseLossBackward0>)
tensor(0.0057, grad_fn=<MulBackward0>)
tensor(102.9549, grad_fn=<MseLossBackward0>)
tensor(0.0069, grad_fn=<MulBackward0>)
tensor(255.8924, grad_fn=<MseLossBackward0>)
tensor(0.0407, grad_fn=<MulBackward0>)
tensor(105.5709, grad_fn=<MseLossBackward0>)
tensor(0.0051, grad_fn=<MulBackward0>)


Epoch 21/50: 60it [00:00, 99.84it/s] 


tensor(96.1358, grad_fn=<MseLossBackward0>)
tensor(0.0071, grad_fn=<MulBackward0>)
Epoch 21, Loss: 142.13238182067872


Epoch 22/50: 0it [00:00, ?it/s]

tensor(110.7688, grad_fn=<MseLossBackward0>)
tensor(0.0422, grad_fn=<MulBackward0>)
tensor(105.4775, grad_fn=<MseLossBackward0>)
tensor(0.0054, grad_fn=<MulBackward0>)
tensor(112.8172, grad_fn=<MseLossBackward0>)
tensor(0.0063, grad_fn=<MulBackward0>)
tensor(100.0300, grad_fn=<MseLossBackward0>)
tensor(0.0074, grad_fn=<MulBackward0>)
tensor(100.5646, grad_fn=<MseLossBackward0>)
tensor(0.0056, grad_fn=<MulBackward0>)
tensor(108.3268, grad_fn=<MseLossBackward0>)
tensor(0.0080, grad_fn=<MulBackward0>)
tensor(108.7328, grad_fn=<MseLossBackward0>)
tensor(0.0059, grad_fn=<MulBackward0>)
tensor(96.8795, grad_fn=<MseLossBackward0>)
tensor(0.0054, grad_fn=<MulBackward0>)
tensor(121.0779, grad_fn=<MseLossBackward0>)
tensor(0.0462, grad_fn=<MulBackward0>)


Epoch 22/50: 11it [00:00, 103.66it/s]

tensor(101.7731, grad_fn=<MseLossBackward0>)
tensor(0.0104, grad_fn=<MulBackward0>)
tensor(380.5918, grad_fn=<MseLossBackward0>)
tensor(0.0415, grad_fn=<MulBackward0>)
tensor(115.3988, grad_fn=<MseLossBackward0>)
tensor(0.0422, grad_fn=<MulBackward0>)
tensor(103.0676, grad_fn=<MseLossBackward0>)
tensor(0.0058, grad_fn=<MulBackward0>)
tensor(107.7170, grad_fn=<MseLossBackward0>)
tensor(0.0057, grad_fn=<MulBackward0>)
tensor(102.6303, grad_fn=<MseLossBackward0>)
tensor(0.0057, grad_fn=<MulBackward0>)
tensor(99.3312, grad_fn=<MseLossBackward0>)
tensor(0.0062, grad_fn=<MulBackward0>)
tensor(101.6278, grad_fn=<MseLossBackward0>)
tensor(0.0098, grad_fn=<MulBackward0>)
tensor(105.1965, grad_fn=<MseLossBackward0>)
tensor(0.0054, grad_fn=<MulBackward0>)
tensor(97.7297, grad_fn=<MseLossBackward0>)
tensor(0.0041, grad_fn=<MulBackward0>)
tensor(501.6035, grad_fn=<MseLossBackward0>)
tensor(0.0424, grad_fn=<MulBackward0>)


Epoch 22/50: 22it [00:00, 104.75it/s]

tensor(99.5458, grad_fn=<MseLossBackward0>)
tensor(0.0070, grad_fn=<MulBackward0>)
tensor(101.1235, grad_fn=<MseLossBackward0>)
tensor(0.0053, grad_fn=<MulBackward0>)
tensor(102.3306, grad_fn=<MseLossBackward0>)
tensor(0.0068, grad_fn=<MulBackward0>)
tensor(102.9290, grad_fn=<MseLossBackward0>)
tensor(0.0123, grad_fn=<MulBackward0>)
tensor(102.0720, grad_fn=<MseLossBackward0>)
tensor(0.0044, grad_fn=<MulBackward0>)
tensor(100.6633, grad_fn=<MseLossBackward0>)
tensor(0.0034, grad_fn=<MulBackward0>)
tensor(1128.7678, grad_fn=<MseLossBackward0>)
tensor(0.0422, grad_fn=<MulBackward0>)
tensor(99.6200, grad_fn=<MseLossBackward0>)
tensor(0.0039, grad_fn=<MulBackward0>)
tensor(99.7561, grad_fn=<MseLossBackward0>)
tensor(0.0048, grad_fn=<MulBackward0>)
tensor(106.7320, grad_fn=<MseLossBackward0>)
tensor(0.0076, grad_fn=<MulBackward0>)
tensor(98.4265, grad_fn=<MseLossBackward0>)


Epoch 22/50: 33it [00:00, 104.44it/s]

tensor(0.0039, grad_fn=<MulBackward0>)
tensor(145.4095, grad_fn=<MseLossBackward0>)
tensor(0.0446, grad_fn=<MulBackward0>)
tensor(102.1781, grad_fn=<MseLossBackward0>)
tensor(0.0061, grad_fn=<MulBackward0>)
tensor(105.7149, grad_fn=<MseLossBackward0>)
tensor(0.0054, grad_fn=<MulBackward0>)
tensor(101.1393, grad_fn=<MseLossBackward0>)
tensor(0.0049, grad_fn=<MulBackward0>)
tensor(104.5440, grad_fn=<MseLossBackward0>)
tensor(0.0074, grad_fn=<MulBackward0>)
tensor(261.1581, grad_fn=<MseLossBackward0>)
tensor(0.0423, grad_fn=<MulBackward0>)
tensor(104.9169, grad_fn=<MseLossBackward0>)
tensor(0.0071, grad_fn=<MulBackward0>)
tensor(101.4860, grad_fn=<MseLossBackward0>)
tensor(0.0064, grad_fn=<MulBackward0>)
tensor(181.3881, grad_fn=<MseLossBackward0>)
tensor(0.0407, grad_fn=<MulBackward0>)
tensor(178.9373, grad_fn=<MseLossBackward0>)
tensor(0.0422, grad_fn=<MulBackward0>)
tensor(126.3105, grad_fn=<MseLossBackward0>)
tensor(0.0488, grad_fn=<MulBackward0>)


Epoch 22/50: 44it [00:00, 105.47it/s]

tensor(100.3040, grad_fn=<MseLossBackward0>)
tensor(0.0047, grad_fn=<MulBackward0>)
tensor(106.8653, grad_fn=<MseLossBackward0>)
tensor(0.0079, grad_fn=<MulBackward0>)
tensor(105.8075, grad_fn=<MseLossBackward0>)
tensor(0.0040, grad_fn=<MulBackward0>)
tensor(99.6230, grad_fn=<MseLossBackward0>)
tensor(0.0049, grad_fn=<MulBackward0>)
tensor(94.1462, grad_fn=<MseLossBackward0>)
tensor(0.0069, grad_fn=<MulBackward0>)
tensor(102.1155, grad_fn=<MseLossBackward0>)
tensor(0.0051, grad_fn=<MulBackward0>)
tensor(102.2447, grad_fn=<MseLossBackward0>)
tensor(0.0042, grad_fn=<MulBackward0>)
tensor(102.4933, grad_fn=<MseLossBackward0>)
tensor(0.0042, grad_fn=<MulBackward0>)
tensor(104.2516, grad_fn=<MseLossBackward0>)
tensor(0.0044, grad_fn=<MulBackward0>)


Epoch 22/50: 55it [00:00, 103.20it/s]

tensor(106.5190, grad_fn=<MseLossBackward0>)
tensor(0.0049, grad_fn=<MulBackward0>)
tensor(119.5736, grad_fn=<MseLossBackward0>)
tensor(0.0274, grad_fn=<MulBackward0>)
tensor(98.3524, grad_fn=<MseLossBackward0>)
tensor(0.0044, grad_fn=<MulBackward0>)
tensor(104.0930, grad_fn=<MseLossBackward0>)
tensor(0.0127, grad_fn=<MulBackward0>)
tensor(105.7624, grad_fn=<MseLossBackward0>)
tensor(0.0092, grad_fn=<MulBackward0>)
tensor(312.9301, grad_fn=<MseLossBackward0>)
tensor(0.0432, grad_fn=<MulBackward0>)
tensor(111.8354, grad_fn=<MseLossBackward0>)
tensor(0.0058, grad_fn=<MulBackward0>)


Epoch 22/50: 60it [00:00, 102.58it/s]


tensor(104.5153, grad_fn=<MseLossBackward0>)
tensor(0.0055, grad_fn=<MulBackward0>)
tensor(109.1560, grad_fn=<MseLossBackward0>)
tensor(0.0063, grad_fn=<MulBackward0>)
Epoch 22, Loss: 142.13191833496094


Epoch 23/50: 0it [00:00, ?it/s]

tensor(109.0046, grad_fn=<MseLossBackward0>)
tensor(0.0135, grad_fn=<MulBackward0>)
tensor(1124.0017, grad_fn=<MseLossBackward0>)
tensor(0.0407, grad_fn=<MulBackward0>)
tensor(100.4510, grad_fn=<MseLossBackward0>)
tensor(0.0035, grad_fn=<MulBackward0>)
tensor(100.2473, grad_fn=<MseLossBackward0>)
tensor(0.0052, grad_fn=<MulBackward0>)
tensor(101.4935, grad_fn=<MseLossBackward0>)
tensor(0.0048, grad_fn=<MulBackward0>)
tensor(103.6009, grad_fn=<MseLossBackward0>)
tensor(0.0053, grad_fn=<MulBackward0>)
tensor(100.4611, grad_fn=<MseLossBackward0>)
tensor(0.0046, grad_fn=<MulBackward0>)
tensor(100.4770, grad_fn=<MseLossBackward0>)
tensor(0.0076, grad_fn=<MulBackward0>)


Epoch 23/50: 9it [00:00, 89.28it/s]

tensor(102.5284, grad_fn=<MseLossBackward0>)
tensor(0.0049, grad_fn=<MulBackward0>)
tensor(98.1257, grad_fn=<MseLossBackward0>)
tensor(0.0080, grad_fn=<MulBackward0>)
tensor(95.0620, grad_fn=<MseLossBackward0>)
tensor(0.0034, grad_fn=<MulBackward0>)
tensor(108.8814, grad_fn=<MseLossBackward0>)
tensor(0.0039, grad_fn=<MulBackward0>)
tensor(121.7339, grad_fn=<MseLossBackward0>)
tensor(0.0415, grad_fn=<MulBackward0>)
tensor(104.4231, grad_fn=<MseLossBackward0>)
tensor(0.0048, grad_fn=<MulBackward0>)
tensor(97.6392, grad_fn=<MseLossBackward0>)
tensor(0.0057, grad_fn=<MulBackward0>)
tensor(101.5938, grad_fn=<MseLossBackward0>)
tensor(0.0047, grad_fn=<MulBackward0>)
tensor(383.0591, grad_fn=<MseLossBackward0>)
tensor(0.0395, grad_fn=<MulBackward0>)


Epoch 23/50: 20it [00:00, 96.78it/s]

tensor(98.6792, grad_fn=<MseLossBackward0>)
tensor(0.0049, grad_fn=<MulBackward0>)
tensor(106.5304, grad_fn=<MseLossBackward0>)
tensor(0.0044, grad_fn=<MulBackward0>)
tensor(97.6809, grad_fn=<MseLossBackward0>)
tensor(0.0066, grad_fn=<MulBackward0>)
tensor(110.9308, grad_fn=<MseLossBackward0>)
tensor(0.0046, grad_fn=<MulBackward0>)
tensor(120.9245, grad_fn=<MseLossBackward0>)
tensor(0.0402, grad_fn=<MulBackward0>)
tensor(102.7583, grad_fn=<MseLossBackward0>)
tensor(0.0058, grad_fn=<MulBackward0>)
tensor(95.2572, grad_fn=<MseLossBackward0>)
tensor(0.0038, grad_fn=<MulBackward0>)
tensor(109.4662, grad_fn=<MseLossBackward0>)
tensor(0.0108, grad_fn=<MulBackward0>)
tensor(256.7169, grad_fn=<MseLossBackward0>)
tensor(0.0400, grad_fn=<MulBackward0>)
tensor(98.6564, grad_fn=<MseLossBackward0>)
tensor(0.0054, grad_fn=<MulBackward0>)
tensor(110.6922, grad_fn=<MseLossBackward0>)
tensor(0.0056, grad_fn=<MulBackward0>)
tensor(98.0588, grad_fn=<MseLossBackward0>)
tensor(0.0042, grad_fn=<MulBackward0

Epoch 23/50: 31it [00:00, 101.98it/s]

tensor(0.0044, grad_fn=<MulBackward0>)
tensor(98.3248, grad_fn=<MseLossBackward0>)
tensor(0.0043, grad_fn=<MulBackward0>)
tensor(111.1930, grad_fn=<MseLossBackward0>)
tensor(0.0042, grad_fn=<MulBackward0>)
tensor(99.1927, grad_fn=<MseLossBackward0>)
tensor(0.0037, grad_fn=<MulBackward0>)
tensor(185.3804, grad_fn=<MseLossBackward0>)
tensor(0.0404, grad_fn=<MulBackward0>)
tensor(108.1974, grad_fn=<MseLossBackward0>)
tensor(0.0071, grad_fn=<MulBackward0>)
tensor(111.5716, grad_fn=<MseLossBackward0>)
tensor(0.0224, grad_fn=<MulBackward0>)
tensor(105.4931, grad_fn=<MseLossBackward0>)
tensor(0.0046, grad_fn=<MulBackward0>)
tensor(101.7275, grad_fn=<MseLossBackward0>)
tensor(0.0044, grad_fn=<MulBackward0>)
tensor(106.7268, grad_fn=<MseLossBackward0>)
tensor(0.0135, grad_fn=<MulBackward0>)


Epoch 23/50: 42it [00:00, 103.33it/s]

tensor(102.1563, grad_fn=<MseLossBackward0>)
tensor(0.0074, grad_fn=<MulBackward0>)
tensor(98.0260, grad_fn=<MseLossBackward0>)
tensor(0.0041, grad_fn=<MulBackward0>)
tensor(106.3490, grad_fn=<MseLossBackward0>)
tensor(0.0035, grad_fn=<MulBackward0>)
tensor(107.0867, grad_fn=<MseLossBackward0>)
tensor(0.0115, grad_fn=<MulBackward0>)
tensor(149.0979, grad_fn=<MseLossBackward0>)
tensor(0.0750, grad_fn=<MulBackward0>)
tensor(100.4506, grad_fn=<MseLossBackward0>)
tensor(0.0054, grad_fn=<MulBackward0>)
tensor(146.1436, grad_fn=<MseLossBackward0>)
tensor(0.0417, grad_fn=<MulBackward0>)
tensor(304.4500, grad_fn=<MseLossBackward0>)
tensor(0.0415, grad_fn=<MulBackward0>)
tensor(110.7024, grad_fn=<MseLossBackward0>)
tensor(0.0081, grad_fn=<MulBackward0>)
tensor(101.6893, grad_fn=<MseLossBackward0>)
tensor(0.0077, grad_fn=<MulBackward0>)
tensor(94.8625, grad_fn=<MseLossBackward0>)
tensor(0.0048, grad_fn=<MulBackward0>)
tensor(103.8642, grad_fn=<MseLossBackward0>)
tensor(0.0050, grad_fn=<MulBackwa

Epoch 23/50: 53it [00:00, 103.46it/s]

tensor(104.6047, grad_fn=<MseLossBackward0>)
tensor(0.0042, grad_fn=<MulBackward0>)
tensor(106.0472, grad_fn=<MseLossBackward0>)
tensor(0.0039, grad_fn=<MulBackward0>)
tensor(95.4665, grad_fn=<MseLossBackward0>)
tensor(0.0045, grad_fn=<MulBackward0>)
tensor(515.0566, grad_fn=<MseLossBackward0>)
tensor(0.0390, grad_fn=<MulBackward0>)
tensor(101.5320, grad_fn=<MseLossBackward0>)
tensor(0.0043, grad_fn=<MulBackward0>)
tensor(100.6631, grad_fn=<MseLossBackward0>)
tensor(0.0050, grad_fn=<MulBackward0>)
tensor(175.3241, grad_fn=<MseLossBackward0>)
tensor(0.0407, grad_fn=<MulBackward0>)
tensor(102.0314, grad_fn=<MseLossBackward0>)
tensor(0.0078, grad_fn=<MulBackward0>)


Epoch 23/50: 60it [00:00, 101.96it/s]


tensor(108.3592, grad_fn=<MseLossBackward0>)
tensor(0.0045, grad_fn=<MulBackward0>)
Epoch 23, Loss: 142.13098080952963


Epoch 24/50: 0it [00:00, ?it/s]

tensor(98.8161, grad_fn=<MseLossBackward0>)
tensor(0.0084, grad_fn=<MulBackward0>)
tensor(95.5922, grad_fn=<MseLossBackward0>)
tensor(0.0048, grad_fn=<MulBackward0>)
tensor(98.7040, grad_fn=<MseLossBackward0>)
tensor(0.0055, grad_fn=<MulBackward0>)
tensor(98.9294, grad_fn=<MseLossBackward0>)
tensor(0.0042, grad_fn=<MulBackward0>)
tensor(108.3581, grad_fn=<MseLossBackward0>)
tensor(0.0063, grad_fn=<MulBackward0>)
tensor(100.7113, grad_fn=<MseLossBackward0>)
tensor(0.0048, grad_fn=<MulBackward0>)


Epoch 24/50: 8it [00:00, 77.94it/s]

tensor(97.3303, grad_fn=<MseLossBackward0>)
tensor(0.0069, grad_fn=<MulBackward0>)
tensor(113.8589, grad_fn=<MseLossBackward0>)
tensor(0.0122, grad_fn=<MulBackward0>)
tensor(104.7937, grad_fn=<MseLossBackward0>)
tensor(0.0077, grad_fn=<MulBackward0>)
tensor(105.8854, grad_fn=<MseLossBackward0>)
tensor(0.0039, grad_fn=<MulBackward0>)
tensor(102.1985, grad_fn=<MseLossBackward0>)
tensor(0.0060, grad_fn=<MulBackward0>)
tensor(100.5071, grad_fn=<MseLossBackward0>)
tensor(0.0033, grad_fn=<MulBackward0>)
tensor(104.0918, grad_fn=<MseLossBackward0>)
tensor(0.0058, grad_fn=<MulBackward0>)
tensor(1133.2373, grad_fn=<MseLossBackward0>)
tensor(0.0384, grad_fn=<MulBackward0>)
tensor(100.7326, grad_fn=<MseLossBackward0>)
tensor(0.0038, grad_fn=<MulBackward0>)
tensor(108.8624, grad_fn=<MseLossBackward0>)
tensor(0.0046, grad_fn=<MulBackward0>)
tensor(99.7142, grad_fn=<MseLossBackward0>)
tensor(0.0050, grad_fn=<MulBackward0>)


Epoch 24/50: 18it [00:00, 89.10it/s]

tensor(101.0213, grad_fn=<MseLossBackward0>)
tensor(0.0033, grad_fn=<MulBackward0>)
tensor(97.1486, grad_fn=<MseLossBackward0>)
tensor(0.0040, grad_fn=<MulBackward0>)
tensor(109.9206, grad_fn=<MseLossBackward0>)
tensor(0.0062, grad_fn=<MulBackward0>)
tensor(103.0948, grad_fn=<MseLossBackward0>)
tensor(0.0054, grad_fn=<MulBackward0>)
tensor(102.7137, grad_fn=<MseLossBackward0>)
tensor(0.0042, grad_fn=<MulBackward0>)
tensor(106.8017, grad_fn=<MseLossBackward0>)
tensor(0.0042, grad_fn=<MulBackward0>)
tensor(97.4302, grad_fn=<MseLossBackward0>)
tensor(0.0069, grad_fn=<MulBackward0>)
tensor(101.9255, grad_fn=<MseLossBackward0>)
tensor(0.0045, grad_fn=<MulBackward0>)
tensor(100.9720, grad_fn=<MseLossBackward0>)
tensor(0.0043, grad_fn=<MulBackward0>)


Epoch 24/50: 29it [00:00, 97.93it/s]

tensor(105.9725, grad_fn=<MseLossBackward0>)
tensor(0.0037, grad_fn=<MulBackward0>)
tensor(103.3532, grad_fn=<MseLossBackward0>)
tensor(0.0043, grad_fn=<MulBackward0>)
tensor(105.1691, grad_fn=<MseLossBackward0>)
tensor(0.0053, grad_fn=<MulBackward0>)
tensor(101.9675, grad_fn=<MseLossBackward0>)
tensor(0.0039, grad_fn=<MulBackward0>)
tensor(98.2639, grad_fn=<MseLossBackward0>)
tensor(0.0035, grad_fn=<MulBackward0>)
tensor(97.3838, grad_fn=<MseLossBackward0>)
tensor(0.0034, grad_fn=<MulBackward0>)
tensor(101.4214, grad_fn=<MseLossBackward0>)
tensor(0.0041, grad_fn=<MulBackward0>)
tensor(174.4695, grad_fn=<MseLossBackward0>)
tensor(0.0411, grad_fn=<MulBackward0>)
tensor(104.9543, grad_fn=<MseLossBackward0>)
tensor(0.0067, grad_fn=<MulBackward0>)
tensor(105.0206, grad_fn=<MseLossBackward0>)
tensor(0.0043, grad_fn=<MulBackward0>)
tensor(168.8853, grad_fn=<MseLossBackward0>)
tensor(0.0394, grad_fn=<MulBackward0>)
tensor(108.3968, grad_fn=<MseLossBackward0>)
tensor(0.0092, grad_fn=<MulBackwa

Epoch 24/50: 39it [00:00, 98.37it/s]

tensor(109.4522, grad_fn=<MseLossBackward0>)
tensor(0.0069, grad_fn=<MulBackward0>)
tensor(107.1504, grad_fn=<MseLossBackward0>)
tensor(0.0037, grad_fn=<MulBackward0>)
tensor(303.2652, grad_fn=<MseLossBackward0>)
tensor(0.0401, grad_fn=<MulBackward0>)
tensor(109.7249, grad_fn=<MseLossBackward0>)
tensor(0.0062, grad_fn=<MulBackward0>)
tensor(106.7662, grad_fn=<MseLossBackward0>)
tensor(0.0029, grad_fn=<MulBackward0>)
tensor(102.6000, grad_fn=<MseLossBackward0>)
tensor(0.0042, grad_fn=<MulBackward0>)
tensor(120.5889, grad_fn=<MseLossBackward0>)
tensor(0.0389, grad_fn=<MulBackward0>)
tensor(100.6055, grad_fn=<MseLossBackward0>)
tensor(0.0086, grad_fn=<MulBackward0>)


Epoch 24/50: 49it [00:00, 95.25it/s]

tensor(110.8529, grad_fn=<MseLossBackward0>)
tensor(0.0064, grad_fn=<MulBackward0>)
tensor(112.7910, grad_fn=<MseLossBackward0>)
tensor(0.0059, grad_fn=<MulBackward0>)
tensor(122.9632, grad_fn=<MseLossBackward0>)
tensor(0.0514, grad_fn=<MulBackward0>)
tensor(102.8983, grad_fn=<MseLossBackward0>)
tensor(0.0036, grad_fn=<MulBackward0>)
tensor(253.2612, grad_fn=<MseLossBackward0>)
tensor(0.0374, grad_fn=<MulBackward0>)
tensor(96.1995, grad_fn=<MseLossBackward0>)
tensor(0.0038, grad_fn=<MulBackward0>)
tensor(104.9549, grad_fn=<MseLossBackward0>)
tensor(0.0045, grad_fn=<MulBackward0>)
tensor(135.1111, grad_fn=<MseLossBackward0>)
tensor(0.0461, grad_fn=<MulBackward0>)
tensor(143.7340, grad_fn=<MseLossBackward0>)
tensor(0.0375, grad_fn=<MulBackward0>)
tensor(387.4583, grad_fn=<MseLossBackward0>)
tensor(0.0415, grad_fn=<MulBackward0>)
tensor(96.4839, grad_fn=<MseLossBackward0>)
tensor(0.0042, grad_fn=<MulBackward0>)
tensor(107.2677, grad_fn=<MseLossBackward0>)
tensor(0.0046, grad_fn=<MulBackwa

Epoch 24/50: 60it [00:00, 95.84it/s]


Epoch 24, Loss: 142.13025932312013


Epoch 25/50: 0it [00:00, ?it/s]

tensor(104.3573, grad_fn=<MseLossBackward0>)
tensor(0.0132, grad_fn=<MulBackward0>)
tensor(101.5290, grad_fn=<MseLossBackward0>)
tensor(0.0037, grad_fn=<MulBackward0>)
tensor(141.7251, grad_fn=<MseLossBackward0>)
tensor(0.0407, grad_fn=<MulBackward0>)
tensor(502.4254, grad_fn=<MseLossBackward0>)
tensor(0.0372, grad_fn=<MulBackward0>)
tensor(102.3947, grad_fn=<MseLossBackward0>)
tensor(0.0102, grad_fn=<MulBackward0>)
tensor(102.3022, grad_fn=<MseLossBackward0>)
tensor(0.0034, grad_fn=<MulBackward0>)
tensor(106.1580, grad_fn=<MseLossBackward0>)
tensor(0.0040, grad_fn=<MulBackward0>)


Epoch 25/50: 11it [00:00, 107.49it/s]

tensor(102.0292, grad_fn=<MseLossBackward0>)
tensor(0.0045, grad_fn=<MulBackward0>)
tensor(106.6604, grad_fn=<MseLossBackward0>)
tensor(0.0032, grad_fn=<MulBackward0>)
tensor(95.7388, grad_fn=<MseLossBackward0>)
tensor(0.0041, grad_fn=<MulBackward0>)
tensor(103.8563, grad_fn=<MseLossBackward0>)
tensor(0.0047, grad_fn=<MulBackward0>)
tensor(102.7580, grad_fn=<MseLossBackward0>)
tensor(0.0045, grad_fn=<MulBackward0>)
tensor(99.0929, grad_fn=<MseLossBackward0>)
tensor(0.0041, grad_fn=<MulBackward0>)
tensor(110.4187, grad_fn=<MseLossBackward0>)
tensor(0.0061, grad_fn=<MulBackward0>)
tensor(310.2756, grad_fn=<MseLossBackward0>)
tensor(0.0384, grad_fn=<MulBackward0>)
tensor(102.3828, grad_fn=<MseLossBackward0>)
tensor(0.0027, grad_fn=<MulBackward0>)
tensor(100.5563, grad_fn=<MseLossBackward0>)
tensor(0.0037, grad_fn=<MulBackward0>)
tensor(99.4982, grad_fn=<MseLossBackward0>)
tensor(0.0057, grad_fn=<MulBackward0>)
tensor(101.7714, grad_fn=<MseLossBackward0>)
tensor(0.0058, grad_fn=<MulBackwar

Epoch 25/50: 22it [00:00, 105.58it/s]

tensor(110.6936, grad_fn=<MseLossBackward0>)
tensor(0.0060, grad_fn=<MulBackward0>)
tensor(108.9987, grad_fn=<MseLossBackward0>)
tensor(0.0042, grad_fn=<MulBackward0>)
tensor(106.9560, grad_fn=<MseLossBackward0>)
tensor(0.0040, grad_fn=<MulBackward0>)
tensor(100.9347, grad_fn=<MseLossBackward0>)
tensor(0.0096, grad_fn=<MulBackward0>)
tensor(105.7335, grad_fn=<MseLossBackward0>)
tensor(0.0067, grad_fn=<MulBackward0>)
tensor(166.7137, grad_fn=<MseLossBackward0>)
tensor(0.0360, grad_fn=<MulBackward0>)
tensor(102.7260, grad_fn=<MseLossBackward0>)
tensor(0.0063, grad_fn=<MulBackward0>)
tensor(102.4188, grad_fn=<MseLossBackward0>)
tensor(0.0036, grad_fn=<MulBackward0>)


Epoch 25/50: 33it [00:00, 104.85it/s]

tensor(107.6988, grad_fn=<MseLossBackward0>)
tensor(0.0032, grad_fn=<MulBackward0>)
tensor(108.3407, grad_fn=<MseLossBackward0>)
tensor(0.0373, grad_fn=<MulBackward0>)
tensor(186.1896, grad_fn=<MseLossBackward0>)
tensor(0.0366, grad_fn=<MulBackward0>)
tensor(104.6971, grad_fn=<MseLossBackward0>)
tensor(0.0038, grad_fn=<MulBackward0>)
tensor(102.1746, grad_fn=<MseLossBackward0>)
tensor(0.0033, grad_fn=<MulBackward0>)
tensor(97.1333, grad_fn=<MseLossBackward0>)
tensor(0.0044, grad_fn=<MulBackward0>)
tensor(102.2156, grad_fn=<MseLossBackward0>)
tensor(0.0048, grad_fn=<MulBackward0>)
tensor(105.0845, grad_fn=<MseLossBackward0>)
tensor(0.0046, grad_fn=<MulBackward0>)
tensor(107.3742, grad_fn=<MseLossBackward0>)
tensor(0.0039, grad_fn=<MulBackward0>)
tensor(103.7638, grad_fn=<MseLossBackward0>)
tensor(0.0033, grad_fn=<MulBackward0>)
tensor(102.7279, grad_fn=<MseLossBackward0>)
tensor(0.0027, grad_fn=<MulBackward0>)
tensor(110.2816, grad_fn=<MseLossBackward0>)
tensor(0.0128, grad_fn=<MulBackw

Epoch 25/50: 44it [00:00, 105.43it/s]

tensor(105.2855, grad_fn=<MseLossBackward0>)
tensor(0.0030, grad_fn=<MulBackward0>)
tensor(98.2988, grad_fn=<MseLossBackward0>)
tensor(0.0034, grad_fn=<MulBackward0>)
tensor(95.9442, grad_fn=<MseLossBackward0>)
tensor(0.0043, grad_fn=<MulBackward0>)
tensor(99.2910, grad_fn=<MseLossBackward0>)
tensor(0.0046, grad_fn=<MulBackward0>)
tensor(99.4566, grad_fn=<MseLossBackward0>)
tensor(0.0035, grad_fn=<MulBackward0>)
tensor(380.4792, grad_fn=<MseLossBackward0>)
tensor(0.0389, grad_fn=<MulBackward0>)
tensor(132.8501, grad_fn=<MseLossBackward0>)
tensor(0.0482, grad_fn=<MulBackward0>)
tensor(110.8856, grad_fn=<MseLossBackward0>)
tensor(0.0032, grad_fn=<MulBackward0>)
tensor(99.0817, grad_fn=<MseLossBackward0>)
tensor(0.0031, grad_fn=<MulBackward0>)
tensor(116.6012, grad_fn=<MseLossBackward0>)
tensor(0.0374, grad_fn=<MulBackward0>)
tensor(101.7261, grad_fn=<MseLossBackward0>)
tensor(0.0042, grad_fn=<MulBackward0>)
tensor(106.1533, grad_fn=<MseLossBackward0>)
tensor(0.0088, grad_fn=<MulBackward0

Epoch 25/50: 55it [00:00, 103.65it/s]

tensor(1136.1201, grad_fn=<MseLossBackward0>)
tensor(0.0395, grad_fn=<MulBackward0>)
tensor(259.5913, grad_fn=<MseLossBackward0>)
tensor(0.0395, grad_fn=<MulBackward0>)
tensor(104.1265, grad_fn=<MseLossBackward0>)
tensor(0.0038, grad_fn=<MulBackward0>)
tensor(105.0201, grad_fn=<MseLossBackward0>)
tensor(0.0055, grad_fn=<MulBackward0>)


Epoch 25/50: 60it [00:00, 104.97it/s]


tensor(109.0112, grad_fn=<MseLossBackward0>)
tensor(0.0056, grad_fn=<MulBackward0>)
Epoch 25, Loss: 142.12972666422527


Epoch 26/50: 0it [00:00, ?it/s]

tensor(101.7445, grad_fn=<MseLossBackward0>)
tensor(0.0030, grad_fn=<MulBackward0>)
tensor(95.0367, grad_fn=<MseLossBackward0>)
tensor(0.0039, grad_fn=<MulBackward0>)
tensor(106.8092, grad_fn=<MseLossBackward0>)
tensor(0.0040, grad_fn=<MulBackward0>)
tensor(111.8544, grad_fn=<MseLossBackward0>)
tensor(0.0049, grad_fn=<MulBackward0>)
tensor(387.6734, grad_fn=<MseLossBackward0>)
tensor(0.0357, grad_fn=<MulBackward0>)
tensor(106.5155, grad_fn=<MseLossBackward0>)
tensor(0.0042, grad_fn=<MulBackward0>)
tensor(98.9135, grad_fn=<MseLossBackward0>)
tensor(0.0036, grad_fn=<MulBackward0>)
tensor(1131.5569, grad_fn=<MseLossBackward0>)
tensor(0.0354, grad_fn=<MulBackward0>)


Epoch 26/50: 11it [00:00, 104.67it/s]

tensor(112.7689, grad_fn=<MseLossBackward0>)
tensor(0.0147, grad_fn=<MulBackward0>)
tensor(96.0963, grad_fn=<MseLossBackward0>)
tensor(0.0088, grad_fn=<MulBackward0>)
tensor(120.7110, grad_fn=<MseLossBackward0>)
tensor(0.0394, grad_fn=<MulBackward0>)
tensor(104.6571, grad_fn=<MseLossBackward0>)
tensor(0.0058, grad_fn=<MulBackward0>)
tensor(103.5263, grad_fn=<MseLossBackward0>)
tensor(0.0059, grad_fn=<MulBackward0>)
tensor(105.6608, grad_fn=<MseLossBackward0>)
tensor(0.0054, grad_fn=<MulBackward0>)
tensor(304.7647, grad_fn=<MseLossBackward0>)
tensor(0.0345, grad_fn=<MulBackward0>)
tensor(104.1836, grad_fn=<MseLossBackward0>)
tensor(0.0054, grad_fn=<MulBackward0>)
tensor(102.6617, grad_fn=<MseLossBackward0>)
tensor(0.0059, grad_fn=<MulBackward0>)
tensor(102.8285, grad_fn=<MseLossBackward0>)
tensor(0.0035, grad_fn=<MulBackward0>)
tensor(97.0945, grad_fn=<MseLossBackward0>)
tensor(0.0038, grad_fn=<MulBackward0>)
tensor(99.5790, grad_fn=<MseLossBackward0>)
tensor(0.0035, grad_fn=<MulBackwar

Epoch 26/50: 22it [00:00, 98.64it/s] 

tensor(151.2106, grad_fn=<MseLossBackward0>)
tensor(0.0353, grad_fn=<MulBackward0>)
tensor(97.6651, grad_fn=<MseLossBackward0>)
tensor(0.0071, grad_fn=<MulBackward0>)
tensor(101.3258, grad_fn=<MseLossBackward0>)
tensor(0.0033, grad_fn=<MulBackward0>)
tensor(507.8281, grad_fn=<MseLossBackward0>)
tensor(0.0380, grad_fn=<MulBackward0>)
tensor(102.6757, grad_fn=<MseLossBackward0>)
tensor(0.0045, grad_fn=<MulBackward0>)
tensor(102.5836, grad_fn=<MseLossBackward0>)
tensor(0.0051, grad_fn=<MulBackward0>)
tensor(103.6262, grad_fn=<MseLossBackward0>)


Epoch 26/50: 32it [00:00, 98.04it/s]

tensor(0.0047, grad_fn=<MulBackward0>)
tensor(98.2693, grad_fn=<MseLossBackward0>)
tensor(0.0043, grad_fn=<MulBackward0>)
tensor(100.3515, grad_fn=<MseLossBackward0>)
tensor(0.0048, grad_fn=<MulBackward0>)
tensor(97.9296, grad_fn=<MseLossBackward0>)
tensor(0.0039, grad_fn=<MulBackward0>)
tensor(94.3845, grad_fn=<MseLossBackward0>)
tensor(0.0041, grad_fn=<MulBackward0>)
tensor(174.9211, grad_fn=<MseLossBackward0>)
tensor(0.0362, grad_fn=<MulBackward0>)
tensor(101.8146, grad_fn=<MseLossBackward0>)
tensor(0.0028, grad_fn=<MulBackward0>)
tensor(108.2428, grad_fn=<MseLossBackward0>)
tensor(0.0041, grad_fn=<MulBackward0>)
tensor(107.0417, grad_fn=<MseLossBackward0>)
tensor(0.0035, grad_fn=<MulBackward0>)
tensor(100.6872, grad_fn=<MseLossBackward0>)
tensor(0.0024, grad_fn=<MulBackward0>)
tensor(108.6089, grad_fn=<MseLossBackward0>)
tensor(0.0038, grad_fn=<MulBackward0>)
tensor(110.0260, grad_fn=<MseLossBackward0>)
tensor(0.0120, grad_fn=<MulBackward0>)
tensor(104.9153, grad_fn=<MseLossBackwar

Epoch 26/50: 43it [00:00, 100.67it/s]

tensor(97.2941, grad_fn=<MseLossBackward0>)
tensor(0.0022, grad_fn=<MulBackward0>)
tensor(97.3591, grad_fn=<MseLossBackward0>)
tensor(0.0033, grad_fn=<MulBackward0>)
tensor(116.7728, grad_fn=<MseLossBackward0>)
tensor(0.0132, grad_fn=<MulBackward0>)
tensor(259.1267, grad_fn=<MseLossBackward0>)
tensor(0.0362, grad_fn=<MulBackward0>)
tensor(108.2932, grad_fn=<MseLossBackward0>)
tensor(0.0027, grad_fn=<MulBackward0>)
tensor(100.2700, grad_fn=<MseLossBackward0>)
tensor(0.0037, grad_fn=<MulBackward0>)
tensor(118.0879, grad_fn=<MseLossBackward0>)
tensor(0.0338, grad_fn=<MulBackward0>)
tensor(191.3197, grad_fn=<MseLossBackward0>)
tensor(0.0668, grad_fn=<MulBackward0>)
tensor(108.3029, grad_fn=<MseLossBackward0>)
tensor(0.0070, grad_fn=<MulBackward0>)
tensor(122.3823, grad_fn=<MseLossBackward0>)
tensor(0.0349, grad_fn=<MulBackward0>)
tensor(104.9939, grad_fn=<MseLossBackward0>)
tensor(0.0052, grad_fn=<MulBackward0>)
tensor(100.2731, grad_fn=<MseLossBackward0>)
tensor(0.0029, grad_fn=<MulBackwa

Epoch 26/50: 54it [00:00, 100.69it/s]

tensor(100.6903, grad_fn=<MseLossBackward0>)
tensor(0.0030, grad_fn=<MulBackward0>)
tensor(100.8042, grad_fn=<MseLossBackward0>)
tensor(0.0067, grad_fn=<MulBackward0>)
tensor(102.1465, grad_fn=<MseLossBackward0>)
tensor(0.0037, grad_fn=<MulBackward0>)


Epoch 26/50: 60it [00:00, 100.55it/s]


tensor(106.0036, grad_fn=<MseLossBackward0>)
tensor(0.0037, grad_fn=<MulBackward0>)
tensor(102.5748, grad_fn=<MseLossBackward0>)
tensor(0.0034, grad_fn=<MulBackward0>)
tensor(102.2448, grad_fn=<MseLossBackward0>)
tensor(0.0050, grad_fn=<MulBackward0>)
Epoch 26, Loss: 142.12907218933105


Epoch 27/50: 0it [00:00, ?it/s]

tensor(99.7042, grad_fn=<MseLossBackward0>)
tensor(0.0048, grad_fn=<MulBackward0>)
tensor(102.0064, grad_fn=<MseLossBackward0>)
tensor(0.0079, grad_fn=<MulBackward0>)
tensor(104.0694, grad_fn=<MseLossBackward0>)
tensor(0.0041, grad_fn=<MulBackward0>)
tensor(100.5780, grad_fn=<MseLossBackward0>)
tensor(0.0037, grad_fn=<MulBackward0>)
tensor(100.2896, grad_fn=<MseLossBackward0>)
tensor(0.0043, grad_fn=<MulBackward0>)
tensor(98.2667, grad_fn=<MseLossBackward0>)
tensor(0.0038, grad_fn=<MulBackward0>)
tensor(93.2602, grad_fn=<MseLossBackward0>)
tensor(0.0026, grad_fn=<MulBackward0>)
tensor(97.6537, grad_fn=<MseLossBackward0>)
tensor(0.0070, grad_fn=<MulBackward0>)
tensor(102.3546, grad_fn=<MseLossBackward0>)
tensor(0.0060, grad_fn=<MulBackward0>)
tensor(115.5190, grad_fn=<MseLossBackward0>)


Epoch 27/50: 11it [00:00, 107.76it/s]

tensor(0.0075, grad_fn=<MulBackward0>)
tensor(100.4772, grad_fn=<MseLossBackward0>)
tensor(0.0031, grad_fn=<MulBackward0>)
tensor(104.8186, grad_fn=<MseLossBackward0>)
tensor(0.0046, grad_fn=<MulBackward0>)
tensor(1132.7283, grad_fn=<MseLossBackward0>)
tensor(0.0348, grad_fn=<MulBackward0>)
tensor(98.5708, grad_fn=<MseLossBackward0>)
tensor(0.0027, grad_fn=<MulBackward0>)
tensor(100.6942, grad_fn=<MseLossBackward0>)
tensor(0.0028, grad_fn=<MulBackward0>)
tensor(119.3038, grad_fn=<MseLossBackward0>)
tensor(0.0338, grad_fn=<MulBackward0>)
tensor(103.6746, grad_fn=<MseLossBackward0>)
tensor(0.0029, grad_fn=<MulBackward0>)
tensor(110.4917, grad_fn=<MseLossBackward0>)
tensor(0.0052, grad_fn=<MulBackward0>)
tensor(174.6161, grad_fn=<MseLossBackward0>)
tensor(0.0341, grad_fn=<MulBackward0>)


Epoch 27/50: 22it [00:00, 105.86it/s]

tensor(100.4738, grad_fn=<MseLossBackward0>)
tensor(0.0048, grad_fn=<MulBackward0>)
tensor(107.8002, grad_fn=<MseLossBackward0>)
tensor(0.0098, grad_fn=<MulBackward0>)
tensor(99.5974, grad_fn=<MseLossBackward0>)
tensor(0.0035, grad_fn=<MulBackward0>)
tensor(119.1682, grad_fn=<MseLossBackward0>)
tensor(0.0055, grad_fn=<MulBackward0>)
tensor(104.7456, grad_fn=<MseLossBackward0>)
tensor(0.0053, grad_fn=<MulBackward0>)
tensor(111.2988, grad_fn=<MseLossBackward0>)
tensor(0.0070, grad_fn=<MulBackward0>)
tensor(109.3616, grad_fn=<MseLossBackward0>)
tensor(0.0040, grad_fn=<MulBackward0>)
tensor(99.7986, grad_fn=<MseLossBackward0>)
tensor(0.0068, grad_fn=<MulBackward0>)
tensor(101.4272, grad_fn=<MseLossBackward0>)
tensor(0.0030, grad_fn=<MulBackward0>)
tensor(182.1709, grad_fn=<MseLossBackward0>)
tensor(0.0344, grad_fn=<MulBackward0>)
tensor(106.1345, grad_fn=<MseLossBackward0>)
tensor(0.0054, grad_fn=<MulBackward0>)


Epoch 27/50: 33it [00:00, 102.02it/s]

tensor(121.7072, grad_fn=<MseLossBackward0>)
tensor(0.0357, grad_fn=<MulBackward0>)
tensor(99.0830, grad_fn=<MseLossBackward0>)
tensor(0.0032, grad_fn=<MulBackward0>)
tensor(105.2500, grad_fn=<MseLossBackward0>)
tensor(0.0051, grad_fn=<MulBackward0>)
tensor(101.8332, grad_fn=<MseLossBackward0>)
tensor(0.0027, grad_fn=<MulBackward0>)
tensor(103.4052, grad_fn=<MseLossBackward0>)
tensor(0.0030, grad_fn=<MulBackward0>)
tensor(115.8103, grad_fn=<MseLossBackward0>)
tensor(0.0141, grad_fn=<MulBackward0>)
tensor(97.6525, grad_fn=<MseLossBackward0>)
tensor(0.0037, grad_fn=<MulBackward0>)
tensor(99.8919, grad_fn=<MseLossBackward0>)
tensor(0.0027, grad_fn=<MulBackward0>)
tensor(106.1016, grad_fn=<MseLossBackward0>)
tensor(0.0035, grad_fn=<MulBackward0>)
tensor(506.0579, grad_fn=<MseLossBackward0>)
tensor(0.0336, grad_fn=<MulBackward0>)
tensor(103.2922, grad_fn=<MseLossBackward0>)
tensor(0.0026, grad_fn=<MulBackward0>)
tensor(96.1444, grad_fn=<MseLossBackward0>)
tensor(0.0032, grad_fn=<MulBackward

Epoch 27/50: 44it [00:00, 100.69it/s]

tensor(102.4483, grad_fn=<MseLossBackward0>)
tensor(0.0034, grad_fn=<MulBackward0>)
tensor(99.7719, grad_fn=<MseLossBackward0>)
tensor(0.0031, grad_fn=<MulBackward0>)
tensor(385.5566, grad_fn=<MseLossBackward0>)
tensor(0.0332, grad_fn=<MulBackward0>)
tensor(107.0569, grad_fn=<MseLossBackward0>)
tensor(0.0053, grad_fn=<MulBackward0>)
tensor(115.9515, grad_fn=<MseLossBackward0>)
tensor(0.0107, grad_fn=<MulBackward0>)
tensor(101.1841, grad_fn=<MseLossBackward0>)
tensor(0.0039, grad_fn=<MulBackward0>)
tensor(108.1649, grad_fn=<MseLossBackward0>)
tensor(0.0025, grad_fn=<MulBackward0>)
tensor(107.3183, grad_fn=<MseLossBackward0>)
tensor(0.0338, grad_fn=<MulBackward0>)


Epoch 27/50: 55it [00:00, 102.22it/s]

tensor(124.4853, grad_fn=<MseLossBackward0>)
tensor(0.0329, grad_fn=<MulBackward0>)
tensor(100.8548, grad_fn=<MseLossBackward0>)
tensor(0.0024, grad_fn=<MulBackward0>)
tensor(98.3045, grad_fn=<MseLossBackward0>)
tensor(0.0036, grad_fn=<MulBackward0>)
tensor(95.0959, grad_fn=<MseLossBackward0>)
tensor(0.0039, grad_fn=<MulBackward0>)
tensor(97.0715, grad_fn=<MseLossBackward0>)
tensor(0.0030, grad_fn=<MulBackward0>)
tensor(106.1479, grad_fn=<MseLossBackward0>)
tensor(0.0030, grad_fn=<MulBackward0>)
tensor(105.9383, grad_fn=<MseLossBackward0>)
tensor(0.0041, grad_fn=<MulBackward0>)
tensor(262.3563, grad_fn=<MseLossBackward0>)
tensor(0.0352, grad_fn=<MulBackward0>)
tensor(351.7090, grad_fn=<MseLossBackward0>)
tensor(0.0665, grad_fn=<MulBackward0>)


Epoch 27/50: 60it [00:00, 101.62it/s]


Epoch 27, Loss: 142.1285124460856


Epoch 28/50: 0it [00:00, ?it/s]

tensor(105.1353, grad_fn=<MseLossBackward0>)
tensor(0.0028, grad_fn=<MulBackward0>)
tensor(175.5342, grad_fn=<MseLossBackward0>)
tensor(0.0355, grad_fn=<MulBackward0>)
tensor(99.6890, grad_fn=<MseLossBackward0>)
tensor(0.0027, grad_fn=<MulBackward0>)
tensor(100.9998, grad_fn=<MseLossBackward0>)
tensor(0.0032, grad_fn=<MulBackward0>)
tensor(100.8285, grad_fn=<MseLossBackward0>)
tensor(0.0076, grad_fn=<MulBackward0>)
tensor(104.1318, grad_fn=<MseLossBackward0>)
tensor(0.0043, grad_fn=<MulBackward0>)
tensor(263.1254, grad_fn=<MseLossBackward0>)
tensor(0.0369, grad_fn=<MulBackward0>)
tensor(180.4024, grad_fn=<MseLossBackward0>)
tensor(0.0330, grad_fn=<MulBackward0>)
tensor(99.6507, grad_fn=<MseLossBackward0>)
tensor(0.0040, grad_fn=<MulBackward0>)


Epoch 28/50: 10it [00:00, 95.52it/s]

tensor(110.0639, grad_fn=<MseLossBackward0>)
tensor(0.0051, grad_fn=<MulBackward0>)
tensor(105.0140, grad_fn=<MseLossBackward0>)
tensor(0.0066, grad_fn=<MulBackward0>)
tensor(105.7281, grad_fn=<MseLossBackward0>)
tensor(0.0032, grad_fn=<MulBackward0>)
tensor(139.1892, grad_fn=<MseLossBackward0>)
tensor(0.0333, grad_fn=<MulBackward0>)
tensor(98.6757, grad_fn=<MseLossBackward0>)
tensor(0.0039, grad_fn=<MulBackward0>)
tensor(105.5398, grad_fn=<MseLossBackward0>)
tensor(0.0046, grad_fn=<MulBackward0>)
tensor(99.8770, grad_fn=<MseLossBackward0>)
tensor(0.0031, grad_fn=<MulBackward0>)
tensor(102.2955, grad_fn=<MseLossBackward0>)
tensor(0.0046, grad_fn=<MulBackward0>)
tensor(110.0617, grad_fn=<MseLossBackward0>)
tensor(0.0039, grad_fn=<MulBackward0>)
tensor(107.0991, grad_fn=<MseLossBackward0>)
tensor(0.0021, grad_fn=<MulBackward0>)
tensor(103.1673, grad_fn=<MseLossBackward0>)
tensor(0.0032, grad_fn=<MulBackward0>)
tensor(103.6324, grad_fn=<MseLossBackward0>)
tensor(0.0037, grad_fn=<MulBackwa

Epoch 28/50: 21it [00:00, 99.86it/s]

tensor(110.9012, grad_fn=<MseLossBackward0>)
tensor(0.0061, grad_fn=<MulBackward0>)
tensor(102.8840, grad_fn=<MseLossBackward0>)
tensor(0.0055, grad_fn=<MulBackward0>)
tensor(95.7302, grad_fn=<MseLossBackward0>)
tensor(0.0025, grad_fn=<MulBackward0>)
tensor(102.1964, grad_fn=<MseLossBackward0>)
tensor(0.0045, grad_fn=<MulBackward0>)
tensor(113.5517, grad_fn=<MseLossBackward0>)
tensor(0.0094, grad_fn=<MulBackward0>)
tensor(101.2125, grad_fn=<MseLossBackward0>)
tensor(0.0043, grad_fn=<MulBackward0>)
tensor(103.5864, grad_fn=<MseLossBackward0>)
tensor(0.0047, grad_fn=<MulBackward0>)
tensor(380.2003, grad_fn=<MseLossBackward0>)
tensor(0.0326, grad_fn=<MulBackward0>)
tensor(108.2540, grad_fn=<MseLossBackward0>)
tensor(0.0135, grad_fn=<MulBackward0>)


Epoch 28/50: 31it [00:00, 98.12it/s]

tensor(511.6960, grad_fn=<MseLossBackward0>)
tensor(0.0333, grad_fn=<MulBackward0>)
tensor(304.8557, grad_fn=<MseLossBackward0>)
tensor(0.0326, grad_fn=<MulBackward0>)
tensor(104.0410, grad_fn=<MseLossBackward0>)
tensor(0.0063, grad_fn=<MulBackward0>)
tensor(99.1127, grad_fn=<MseLossBackward0>)
tensor(0.0026, grad_fn=<MulBackward0>)
tensor(130.9950, grad_fn=<MseLossBackward0>)
tensor(0.0322, grad_fn=<MulBackward0>)
tensor(99.7617, grad_fn=<MseLossBackward0>)
tensor(0.0029, grad_fn=<MulBackward0>)
tensor(103.8959, grad_fn=<MseLossBackward0>)
tensor(0.0032, grad_fn=<MulBackward0>)
tensor(99.1711, grad_fn=<MseLossBackward0>)
tensor(0.0033, grad_fn=<MulBackward0>)
tensor(1146.3602, grad_fn=<MseLossBackward0>)
tensor(0.0634, grad_fn=<MulBackward0>)
tensor(104.0226, grad_fn=<MseLossBackward0>)
tensor(0.0031, grad_fn=<MulBackward0>)


Epoch 28/50: 41it [00:00, 98.79it/s]

tensor(104.4696, grad_fn=<MseLossBackward0>)
tensor(0.0032, grad_fn=<MulBackward0>)
tensor(96.6623, grad_fn=<MseLossBackward0>)
tensor(0.0032, grad_fn=<MulBackward0>)
tensor(104.6172, grad_fn=<MseLossBackward0>)
tensor(0.0071, grad_fn=<MulBackward0>)
tensor(97.5078, grad_fn=<MseLossBackward0>)
tensor(0.0053, grad_fn=<MulBackward0>)
tensor(108.1272, grad_fn=<MseLossBackward0>)
tensor(0.0044, grad_fn=<MulBackward0>)
tensor(98.8154, grad_fn=<MseLossBackward0>)
tensor(0.0037, grad_fn=<MulBackward0>)
tensor(110.0262, grad_fn=<MseLossBackward0>)
tensor(0.0315, grad_fn=<MulBackward0>)
tensor(100.3904, grad_fn=<MseLossBackward0>)
tensor(0.0042, grad_fn=<MulBackward0>)
tensor(104.0733, grad_fn=<MseLossBackward0>)
tensor(0.0022, grad_fn=<MulBackward0>)
tensor(104.0351, grad_fn=<MseLossBackward0>)
tensor(0.0029, grad_fn=<MulBackward0>)
tensor(108.3382, grad_fn=<MseLossBackward0>)
tensor(0.0033, grad_fn=<MulBackward0>)


Epoch 28/50: 52it [00:00, 101.86it/s]

tensor(100.6168, grad_fn=<MseLossBackward0>)
tensor(0.0030, grad_fn=<MulBackward0>)
tensor(106.9902, grad_fn=<MseLossBackward0>)
tensor(0.0026, grad_fn=<MulBackward0>)
tensor(110.9846, grad_fn=<MseLossBackward0>)
tensor(0.0103, grad_fn=<MulBackward0>)
tensor(116.2500, grad_fn=<MseLossBackward0>)
tensor(0.0316, grad_fn=<MulBackward0>)
tensor(104.8730, grad_fn=<MseLossBackward0>)
tensor(0.0027, grad_fn=<MulBackward0>)
tensor(102.4064, grad_fn=<MseLossBackward0>)
tensor(0.0028, grad_fn=<MulBackward0>)
tensor(98.0118, grad_fn=<MseLossBackward0>)
tensor(0.0022, grad_fn=<MulBackward0>)
tensor(106.7230, grad_fn=<MseLossBackward0>)
tensor(0.0028, grad_fn=<MulBackward0>)


Epoch 28/50: 60it [00:00, 101.56it/s]


tensor(100.8877, grad_fn=<MseLossBackward0>)
tensor(0.0033, grad_fn=<MulBackward0>)
Epoch 28, Loss: 142.1280216217041


Epoch 29/50: 0it [00:00, ?it/s]

tensor(112.0544, grad_fn=<MseLossBackward0>)
tensor(0.0037, grad_fn=<MulBackward0>)
tensor(105.7410, grad_fn=<MseLossBackward0>)
tensor(0.0059, grad_fn=<MulBackward0>)
tensor(116.7600, grad_fn=<MseLossBackward0>)
tensor(0.0311, grad_fn=<MulBackward0>)
tensor(99.5186, grad_fn=<MseLossBackward0>)
tensor(0.0031, grad_fn=<MulBackward0>)
tensor(106.7338, grad_fn=<MseLossBackward0>)
tensor(0.0033, grad_fn=<MulBackward0>)
tensor(109.5019, grad_fn=<MseLossBackward0>)
tensor(0.0032, grad_fn=<MulBackward0>)
tensor(102.2128, grad_fn=<MseLossBackward0>)
tensor(0.0024, grad_fn=<MulBackward0>)
tensor(104.7521, grad_fn=<MseLossBackward0>)
tensor(0.0032, grad_fn=<MulBackward0>)
tensor(100.6253, grad_fn=<MseLossBackward0>)
tensor(0.0026, grad_fn=<MulBackward0>)
tensor(101.9410, grad_fn=<MseLossBackward0>)
tensor(0.0044, grad_fn=<MulBackward0>)
tensor(97.7100, grad_fn=<MseLossBackward0>)
tensor(0.0056, grad_fn=<MulBackward0>)


Epoch 29/50: 11it [00:00, 103.09it/s]

tensor(105.9334, grad_fn=<MseLossBackward0>)
tensor(0.0036, grad_fn=<MulBackward0>)
tensor(102.6241, grad_fn=<MseLossBackward0>)
tensor(0.0028, grad_fn=<MulBackward0>)
tensor(103.5994, grad_fn=<MseLossBackward0>)
tensor(0.0032, grad_fn=<MulBackward0>)
tensor(95.6880, grad_fn=<MseLossBackward0>)
tensor(0.0030, grad_fn=<MulBackward0>)
tensor(99.8876, grad_fn=<MseLossBackward0>)
tensor(0.0042, grad_fn=<MulBackward0>)
tensor(266.3503, grad_fn=<MseLossBackward0>)
tensor(0.0327, grad_fn=<MulBackward0>)
tensor(97.9824, grad_fn=<MseLossBackward0>)
tensor(0.0030, grad_fn=<MulBackward0>)
tensor(97.4414, grad_fn=<MseLossBackward0>)
tensor(0.0033, grad_fn=<MulBackward0>)
tensor(122.9557, grad_fn=<MseLossBackward0>)
tensor(0.0339, grad_fn=<MulBackward0>)


Epoch 29/50: 22it [00:00, 105.60it/s]

tensor(996.2375, grad_fn=<MseLossBackward0>)
tensor(0.0882, grad_fn=<MulBackward0>)
tensor(118.1399, grad_fn=<MseLossBackward0>)
tensor(0.0152, grad_fn=<MulBackward0>)
tensor(98.7950, grad_fn=<MseLossBackward0>)
tensor(0.0030, grad_fn=<MulBackward0>)
tensor(100.9915, grad_fn=<MseLossBackward0>)
tensor(0.0054, grad_fn=<MulBackward0>)
tensor(96.0155, grad_fn=<MseLossBackward0>)
tensor(0.0024, grad_fn=<MulBackward0>)
tensor(104.5268, grad_fn=<MseLossBackward0>)
tensor(0.0028, grad_fn=<MulBackward0>)
tensor(115.0873, grad_fn=<MseLossBackward0>)
tensor(0.0317, grad_fn=<MulBackward0>)
tensor(108.0330, grad_fn=<MseLossBackward0>)
tensor(0.0074, grad_fn=<MulBackward0>)
tensor(102.2831, grad_fn=<MseLossBackward0>)
tensor(0.0037, grad_fn=<MulBackward0>)
tensor(106.5692, grad_fn=<MseLossBackward0>)
tensor(0.0035, grad_fn=<MulBackward0>)
tensor(104.1107, grad_fn=<MseLossBackward0>)
tensor(0.0043, grad_fn=<MulBackward0>)
tensor(183.3206, grad_fn=<MseLossBackward0>)
tensor(0.0314, grad_fn=<MulBackwa

Epoch 29/50: 33it [00:00, 96.73it/s] 

tensor(99.9835, grad_fn=<MseLossBackward0>)
tensor(0.0027, grad_fn=<MulBackward0>)
tensor(105.2035, grad_fn=<MseLossBackward0>)
tensor(0.0035, grad_fn=<MulBackward0>)
tensor(106.1207, grad_fn=<MseLossBackward0>)
tensor(0.0025, grad_fn=<MulBackward0>)
tensor(119.4932, grad_fn=<MseLossBackward0>)
tensor(0.0310, grad_fn=<MulBackward0>)
tensor(105.1759, grad_fn=<MseLossBackward0>)
tensor(0.0022, grad_fn=<MulBackward0>)
tensor(100.3523, grad_fn=<MseLossBackward0>)
tensor(0.0038, grad_fn=<MulBackward0>)
tensor(99.2555, grad_fn=<MseLossBackward0>)
tensor(0.0032, grad_fn=<MulBackward0>)
tensor(1124.1572, grad_fn=<MseLossBackward0>)
tensor(0.0309, grad_fn=<MulBackward0>)
tensor(98.1077, grad_fn=<MseLossBackward0>)
tensor(0.0063, grad_fn=<MulBackward0>)
tensor(100.8463, grad_fn=<MseLossBackward0>)
tensor(0.0035, grad_fn=<MulBackward0>)


Epoch 29/50: 43it [00:00, 93.30it/s]

tensor(104.8613, grad_fn=<MseLossBackward0>)
tensor(0.0044, grad_fn=<MulBackward0>)
tensor(168.4700, grad_fn=<MseLossBackward0>)
tensor(0.0309, grad_fn=<MulBackward0>)
tensor(102.3766, grad_fn=<MseLossBackward0>)
tensor(0.0025, grad_fn=<MulBackward0>)
tensor(104.8044, grad_fn=<MseLossBackward0>)
tensor(0.0028, grad_fn=<MulBackward0>)
tensor(103.4683, grad_fn=<MseLossBackward0>)
tensor(0.0045, grad_fn=<MulBackward0>)
tensor(102.9160, grad_fn=<MseLossBackward0>)
tensor(0.0027, grad_fn=<MulBackward0>)
tensor(105.2333, grad_fn=<MseLossBackward0>)
tensor(0.0065, grad_fn=<MulBackward0>)
tensor(114.9784, grad_fn=<MseLossBackward0>)
tensor(0.0090, grad_fn=<MulBackward0>)
tensor(103.0055, grad_fn=<MseLossBackward0>)
tensor(0.0035, grad_fn=<MulBackward0>)


Epoch 29/50: 53it [00:00, 90.05it/s]

tensor(99.2543, grad_fn=<MseLossBackward0>)
tensor(0.0028, grad_fn=<MulBackward0>)
tensor(110.7700, grad_fn=<MseLossBackward0>)
tensor(0.0025, grad_fn=<MulBackward0>)
tensor(103.5746, grad_fn=<MseLossBackward0>)
tensor(0.0025, grad_fn=<MulBackward0>)
tensor(102.2029, grad_fn=<MseLossBackward0>)
tensor(0.0034, grad_fn=<MulBackward0>)
tensor(102.4255, grad_fn=<MseLossBackward0>)
tensor(0.0025, grad_fn=<MulBackward0>)
tensor(101.4228, grad_fn=<MseLossBackward0>)
tensor(0.0039, grad_fn=<MulBackward0>)
tensor(109.2829, grad_fn=<MseLossBackward0>)
tensor(0.0087, grad_fn=<MulBackward0>)


Epoch 29/50: 60it [00:00, 94.08it/s]


tensor(102.5955, grad_fn=<MseLossBackward0>)
tensor(0.0027, grad_fn=<MulBackward0>)
tensor(142.6117, grad_fn=<MseLossBackward0>)
tensor(0.0313, grad_fn=<MulBackward0>)
Epoch 29, Loss: 142.1274575551351


Epoch 30/50: 0it [00:00, ?it/s]

tensor(101.0152, grad_fn=<MseLossBackward0>)
tensor(0.0029, grad_fn=<MulBackward0>)
tensor(102.1326, grad_fn=<MseLossBackward0>)
tensor(0.0023, grad_fn=<MulBackward0>)
tensor(99.5903, grad_fn=<MseLossBackward0>)
tensor(0.0029, grad_fn=<MulBackward0>)
tensor(103.6168, grad_fn=<MseLossBackward0>)
tensor(0.0024, grad_fn=<MulBackward0>)
tensor(145.7060, grad_fn=<MseLossBackward0>)
tensor(0.0310, grad_fn=<MulBackward0>)
tensor(130.5027, grad_fn=<MseLossBackward0>)
tensor(0.0315, grad_fn=<MulBackward0>)
tensor(110.3478, grad_fn=<MseLossBackward0>)
tensor(0.0030, grad_fn=<MulBackward0>)
tensor(113.5248, grad_fn=<MseLossBackward0>)
tensor(0.0301, grad_fn=<MulBackward0>)
tensor(96.6489, grad_fn=<MseLossBackward0>)
tensor(0.0036, grad_fn=<MulBackward0>)
tensor(105.6614, grad_fn=<MseLossBackward0>)
tensor(0.0096, grad_fn=<MulBackward0>)


Epoch 30/50: 11it [00:00, 101.66it/s]

tensor(308.1064, grad_fn=<MseLossBackward0>)
tensor(0.0298, grad_fn=<MulBackward0>)
tensor(110.7867, grad_fn=<MseLossBackward0>)
tensor(0.0043, grad_fn=<MulBackward0>)
tensor(102.5018, grad_fn=<MseLossBackward0>)
tensor(0.0028, grad_fn=<MulBackward0>)
tensor(104.4131, grad_fn=<MseLossBackward0>)
tensor(0.0028, grad_fn=<MulBackward0>)
tensor(105.8649, grad_fn=<MseLossBackward0>)
tensor(0.0025, grad_fn=<MulBackward0>)
tensor(171.6053, grad_fn=<MseLossBackward0>)
tensor(0.0332, grad_fn=<MulBackward0>)
tensor(103.3422, grad_fn=<MseLossBackward0>)
tensor(0.0029, grad_fn=<MulBackward0>)
tensor(97.5490, grad_fn=<MseLossBackward0>)
tensor(0.0026, grad_fn=<MulBackward0>)
tensor(97.8243, grad_fn=<MseLossBackward0>)
tensor(0.0021, grad_fn=<MulBackward0>)
tensor(392.3569, grad_fn=<MseLossBackward0>)
tensor(0.0302, grad_fn=<MulBackward0>)
tensor(99.9193, grad_fn=<MseLossBackward0>)
tensor(0.0038, grad_fn=<MulBackward0>)
tensor(95.5047, grad_fn=<MseLossBackward0>)


Epoch 30/50: 22it [00:00, 103.21it/s]

tensor(0.0033, grad_fn=<MulBackward0>)
tensor(103.5068, grad_fn=<MseLossBackward0>)
tensor(0.0020, grad_fn=<MulBackward0>)
tensor(101.7989, grad_fn=<MseLossBackward0>)
tensor(0.0049, grad_fn=<MulBackward0>)
tensor(100.3442, grad_fn=<MseLossBackward0>)
tensor(0.0038, grad_fn=<MulBackward0>)
tensor(99.7714, grad_fn=<MseLossBackward0>)
tensor(0.0023, grad_fn=<MulBackward0>)
tensor(106.9865, grad_fn=<MseLossBackward0>)
tensor(0.0021, grad_fn=<MulBackward0>)
tensor(102.8880, grad_fn=<MseLossBackward0>)
tensor(0.0027, grad_fn=<MulBackward0>)
tensor(100.6187, grad_fn=<MseLossBackward0>)
tensor(0.0070, grad_fn=<MulBackward0>)
tensor(108.2644, grad_fn=<MseLossBackward0>)
tensor(0.0047, grad_fn=<MulBackward0>)
tensor(102.2327, grad_fn=<MseLossBackward0>)
tensor(0.0031, grad_fn=<MulBackward0>)


Epoch 30/50: 33it [00:00, 96.02it/s] 

tensor(104.9785, grad_fn=<MseLossBackward0>)
tensor(0.0027, grad_fn=<MulBackward0>)
tensor(97.7692, grad_fn=<MseLossBackward0>)
tensor(0.0032, grad_fn=<MulBackward0>)
tensor(99.9657, grad_fn=<MseLossBackward0>)
tensor(0.0027, grad_fn=<MulBackward0>)
tensor(107.8556, grad_fn=<MseLossBackward0>)
tensor(0.0023, grad_fn=<MulBackward0>)
tensor(116.3113, grad_fn=<MseLossBackward0>)
tensor(0.0104, grad_fn=<MulBackward0>)
tensor(103.0763, grad_fn=<MseLossBackward0>)
tensor(0.0029, grad_fn=<MulBackward0>)
tensor(103.9667, grad_fn=<MseLossBackward0>)
tensor(0.0049, grad_fn=<MulBackward0>)
tensor(107.9453, grad_fn=<MseLossBackward0>)
tensor(0.0040, grad_fn=<MulBackward0>)
tensor(108.0178, grad_fn=<MseLossBackward0>)
tensor(0.0049, grad_fn=<MulBackward0>)
tensor(105.1573, grad_fn=<MseLossBackward0>)
tensor(0.0038, grad_fn=<MulBackward0>)
tensor(102.6870, grad_fn=<MseLossBackward0>)
tensor(0.0028, grad_fn=<MulBackward0>)
tensor(106.0953, grad_fn=<MseLossBackward0>)
tensor(0.0040, grad_fn=<MulBackwa

Epoch 30/50: 44it [00:00, 98.62it/s]

tensor(105.1709, grad_fn=<MseLossBackward0>)
tensor(0.0030, grad_fn=<MulBackward0>)
tensor(103.5356, grad_fn=<MseLossBackward0>)
tensor(0.0079, grad_fn=<MulBackward0>)
tensor(100.1524, grad_fn=<MseLossBackward0>)
tensor(0.0054, grad_fn=<MulBackward0>)
tensor(96.5587, grad_fn=<MseLossBackward0>)
tensor(0.0027, grad_fn=<MulBackward0>)
tensor(113.3919, grad_fn=<MseLossBackward0>)
tensor(0.0295, grad_fn=<MulBackward0>)
tensor(110.0423, grad_fn=<MseLossBackward0>)
tensor(0.0036, grad_fn=<MulBackward0>)
tensor(173.2401, grad_fn=<MseLossBackward0>)
tensor(0.0308, grad_fn=<MulBackward0>)
tensor(1129.6226, grad_fn=<MseLossBackward0>)
tensor(0.0291, grad_fn=<MulBackward0>)
tensor(121.2431, grad_fn=<MseLossBackward0>)
tensor(0.0308, grad_fn=<MulBackward0>)


Epoch 30/50: 55it [00:00, 100.36it/s]

tensor(510.4774, grad_fn=<MseLossBackward0>)
tensor(0.0349, grad_fn=<MulBackward0>)
tensor(102.8587, grad_fn=<MseLossBackward0>)
tensor(0.0023, grad_fn=<MulBackward0>)
tensor(100.6193, grad_fn=<MseLossBackward0>)
tensor(0.0039, grad_fn=<MulBackward0>)
tensor(104.4463, grad_fn=<MseLossBackward0>)
tensor(0.0025, grad_fn=<MulBackward0>)
tensor(105.3315, grad_fn=<MseLossBackward0>)
tensor(0.0057, grad_fn=<MulBackward0>)
tensor(107.2199, grad_fn=<MseLossBackward0>)
tensor(0.0056, grad_fn=<MulBackward0>)
tensor(101.3073, grad_fn=<MseLossBackward0>)
tensor(0.0031, grad_fn=<MulBackward0>)
tensor(253.1007, grad_fn=<MseLossBackward0>)


Epoch 30/50: 60it [00:00, 100.26it/s]


tensor(0.0298, grad_fn=<MulBackward0>)
Epoch 30, Loss: 142.12713381449382


Epoch 31/50: 0it [00:00, ?it/s]

tensor(98.7742, grad_fn=<MseLossBackward0>)
tensor(0.0034, grad_fn=<MulBackward0>)
tensor(98.4344, grad_fn=<MseLossBackward0>)
tensor(0.0026, grad_fn=<MulBackward0>)
tensor(111.6718, grad_fn=<MseLossBackward0>)
tensor(0.0038, grad_fn=<MulBackward0>)
tensor(96.7086, grad_fn=<MseLossBackward0>)
tensor(0.0039, grad_fn=<MulBackward0>)
tensor(105.7084, grad_fn=<MseLossBackward0>)
tensor(0.0029, grad_fn=<MulBackward0>)
tensor(102.3106, grad_fn=<MseLossBackward0>)
tensor(0.0028, grad_fn=<MulBackward0>)
tensor(101.9060, grad_fn=<MseLossBackward0>)
tensor(0.0026, grad_fn=<MulBackward0>)
tensor(107.4335, grad_fn=<MseLossBackward0>)
tensor(0.0026, grad_fn=<MulBackward0>)
tensor(105.9867, grad_fn=<MseLossBackward0>)
tensor(0.0029, grad_fn=<MulBackward0>)


Epoch 31/50: 10it [00:00, 97.99it/s]

tensor(103.4526, grad_fn=<MseLossBackward0>)
tensor(0.0025, grad_fn=<MulBackward0>)
tensor(98.3525, grad_fn=<MseLossBackward0>)
tensor(0.0024, grad_fn=<MulBackward0>)
tensor(98.2535, grad_fn=<MseLossBackward0>)
tensor(0.0034, grad_fn=<MulBackward0>)
tensor(98.0020, grad_fn=<MseLossBackward0>)
tensor(0.0028, grad_fn=<MulBackward0>)
tensor(107.7565, grad_fn=<MseLossBackward0>)
tensor(0.0022, grad_fn=<MulBackward0>)
tensor(124.0712, grad_fn=<MseLossBackward0>)
tensor(0.0295, grad_fn=<MulBackward0>)
tensor(107.8676, grad_fn=<MseLossBackward0>)
tensor(0.0032, grad_fn=<MulBackward0>)
tensor(102.2568, grad_fn=<MseLossBackward0>)
tensor(0.0031, grad_fn=<MulBackward0>)
tensor(97.5277, grad_fn=<MseLossBackward0>)
tensor(0.0029, grad_fn=<MulBackward0>)
tensor(104.1644, grad_fn=<MseLossBackward0>)
tensor(0.0039, grad_fn=<MulBackward0>)


Epoch 31/50: 20it [00:00, 87.96it/s]

tensor(103.8993, grad_fn=<MseLossBackward0>)
tensor(0.0024, grad_fn=<MulBackward0>)
tensor(178.0245, grad_fn=<MseLossBackward0>)
tensor(0.0298, grad_fn=<MulBackward0>)
tensor(96.9108, grad_fn=<MseLossBackward0>)
tensor(0.0022, grad_fn=<MulBackward0>)
tensor(102.0886, grad_fn=<MseLossBackward0>)
tensor(0.0050, grad_fn=<MulBackward0>)
tensor(218.3229, grad_fn=<MseLossBackward0>)
tensor(0.0552, grad_fn=<MulBackward0>)
tensor(102.8383, grad_fn=<MseLossBackward0>)
tensor(0.0043, grad_fn=<MulBackward0>)
tensor(96.5727, grad_fn=<MseLossBackward0>)
tensor(0.0039, grad_fn=<MulBackward0>)
tensor(1405.4834, grad_fn=<MseLossBackward0>)
tensor(0.0580, grad_fn=<MulBackward0>)
tensor(501.9626, grad_fn=<MseLossBackward0>)
tensor(0.0287, grad_fn=<MulBackward0>)
tensor(103.1061, grad_fn=<MseLossBackward0>)
tensor(0.0022, grad_fn=<MulBackward0>)
tensor(116.7171, grad_fn=<MseLossBackward0>)
tensor(0.0300, grad_fn=<MulBackward0>)


Epoch 31/50: 31it [00:00, 92.88it/s]

tensor(306.2600, grad_fn=<MseLossBackward0>)
tensor(0.0290, grad_fn=<MulBackward0>)
tensor(103.7057, grad_fn=<MseLossBackward0>)
tensor(0.0024, grad_fn=<MulBackward0>)
tensor(98.9912, grad_fn=<MseLossBackward0>)
tensor(0.0041, grad_fn=<MulBackward0>)
tensor(98.7424, grad_fn=<MseLossBackward0>)
tensor(0.0022, grad_fn=<MulBackward0>)
tensor(265.6866, grad_fn=<MseLossBackward0>)
tensor(0.0293, grad_fn=<MulBackward0>)
tensor(112.1226, grad_fn=<MseLossBackward0>)
tensor(0.0289, grad_fn=<MulBackward0>)
tensor(99.0106, grad_fn=<MseLossBackward0>)
tensor(0.0028, grad_fn=<MulBackward0>)
tensor(113.7447, grad_fn=<MseLossBackward0>)
tensor(0.0031, grad_fn=<MulBackward0>)
tensor(107.5803, grad_fn=<MseLossBackward0>)
tensor(0.0032, grad_fn=<MulBackward0>)
tensor(97.3515, grad_fn=<MseLossBackward0>)
tensor(0.0058, grad_fn=<MulBackward0>)


Epoch 31/50: 41it [00:00, 83.16it/s]

tensor(106.4932, grad_fn=<MseLossBackward0>)
tensor(0.0053, grad_fn=<MulBackward0>)
tensor(99.5116, grad_fn=<MseLossBackward0>)
tensor(0.0027, grad_fn=<MulBackward0>)
tensor(121.5900, grad_fn=<MseLossBackward0>)
tensor(0.0086, grad_fn=<MulBackward0>)
tensor(99.7210, grad_fn=<MseLossBackward0>)
tensor(0.0023, grad_fn=<MulBackward0>)
tensor(102.0167, grad_fn=<MseLossBackward0>)
tensor(0.0020, grad_fn=<MulBackward0>)
tensor(109.5336, grad_fn=<MseLossBackward0>)
tensor(0.0115, grad_fn=<MulBackward0>)
tensor(97.9870, grad_fn=<MseLossBackward0>)
tensor(0.0039, grad_fn=<MulBackward0>)


Epoch 31/50: 50it [00:00, 85.15it/s]

tensor(100.1327, grad_fn=<MseLossBackward0>)
tensor(0.0034, grad_fn=<MulBackward0>)
tensor(105.5237, grad_fn=<MseLossBackward0>)
tensor(0.0023, grad_fn=<MulBackward0>)
tensor(110.3442, grad_fn=<MseLossBackward0>)
tensor(0.0081, grad_fn=<MulBackward0>)
tensor(105.0156, grad_fn=<MseLossBackward0>)
tensor(0.0026, grad_fn=<MulBackward0>)
tensor(101.0707, grad_fn=<MseLossBackward0>)
tensor(0.0024, grad_fn=<MulBackward0>)
tensor(101.7499, grad_fn=<MseLossBackward0>)
tensor(0.0024, grad_fn=<MulBackward0>)
tensor(104.3813, grad_fn=<MseLossBackward0>)
tensor(0.0025, grad_fn=<MulBackward0>)
tensor(115.2194, grad_fn=<MseLossBackward0>)
tensor(0.0041, grad_fn=<MulBackward0>)
tensor(98.3122, grad_fn=<MseLossBackward0>)
tensor(0.0027, grad_fn=<MulBackward0>)
tensor(136.0944, grad_fn=<MseLossBackward0>)
tensor(0.0315, grad_fn=<MulBackward0>)
tensor(102.9877, grad_fn=<MseLossBackward0>)
tensor(0.0024, grad_fn=<MulBackward0>)
tensor(104.1381, grad_fn=<MseLossBackward0>)
tensor(0.0033, grad_fn=<MulBackw

Epoch 31/50: 60it [00:00, 88.79it/s]


Epoch 31, Loss: 142.12666028340658


Epoch 32/50: 0it [00:00, ?it/s]

tensor(108.0811, grad_fn=<MseLossBackward0>)
tensor(0.0072, grad_fn=<MulBackward0>)
tensor(171.7683, grad_fn=<MseLossBackward0>)
tensor(0.0299, grad_fn=<MulBackward0>)
tensor(103.7257, grad_fn=<MseLossBackward0>)
tensor(0.0028, grad_fn=<MulBackward0>)
tensor(101.1338, grad_fn=<MseLossBackward0>)
tensor(0.0020, grad_fn=<MulBackward0>)
tensor(102.9306, grad_fn=<MseLossBackward0>)
tensor(0.0024, grad_fn=<MulBackward0>)
tensor(104.0136, grad_fn=<MseLossBackward0>)
tensor(0.0026, grad_fn=<MulBackward0>)
tensor(99.1311, grad_fn=<MseLossBackward0>)
tensor(0.0019, grad_fn=<MulBackward0>)


Epoch 32/50: 10it [00:00, 93.63it/s]

tensor(99.5618, grad_fn=<MseLossBackward0>)
tensor(0.0024, grad_fn=<MulBackward0>)
tensor(102.9903, grad_fn=<MseLossBackward0>)
tensor(0.0023, grad_fn=<MulBackward0>)
tensor(101.4895, grad_fn=<MseLossBackward0>)
tensor(0.0034, grad_fn=<MulBackward0>)
tensor(109.7350, grad_fn=<MseLossBackward0>)
tensor(0.0034, grad_fn=<MulBackward0>)
tensor(99.1846, grad_fn=<MseLossBackward0>)
tensor(0.0046, grad_fn=<MulBackward0>)
tensor(378.5487, grad_fn=<MseLossBackward0>)
tensor(0.0282, grad_fn=<MulBackward0>)
tensor(103.1606, grad_fn=<MseLossBackward0>)
tensor(0.0032, grad_fn=<MulBackward0>)
tensor(99.3486, grad_fn=<MseLossBackward0>)
tensor(0.0043, grad_fn=<MulBackward0>)
tensor(106.6929, grad_fn=<MseLossBackward0>)
tensor(0.0034, grad_fn=<MulBackward0>)
tensor(111.3300, grad_fn=<MseLossBackward0>)
tensor(0.0032, grad_fn=<MulBackward0>)
tensor(121.4470, grad_fn=<MseLossBackward0>)
tensor(0.0291, grad_fn=<MulBackward0>)
tensor(97.0296, grad_fn=<MseLossBackward0>)
tensor(0.0022, grad_fn=<MulBackward

Epoch 32/50: 20it [00:00, 93.58it/s]

tensor(0.0031, grad_fn=<MulBackward0>)
tensor(97.7427, grad_fn=<MseLossBackward0>)
tensor(0.0036, grad_fn=<MulBackward0>)
tensor(1134.6160, grad_fn=<MseLossBackward0>)
tensor(0.0347, grad_fn=<MulBackward0>)
tensor(107.5448, grad_fn=<MseLossBackward0>)
tensor(0.0023, grad_fn=<MulBackward0>)
tensor(108.8864, grad_fn=<MseLossBackward0>)
tensor(0.0036, grad_fn=<MulBackward0>)
tensor(334.3663, grad_fn=<MseLossBackward0>)
tensor(0.0549, grad_fn=<MulBackward0>)
tensor(103.5479, grad_fn=<MseLossBackward0>)
tensor(0.0024, grad_fn=<MulBackward0>)
tensor(101.3109, grad_fn=<MseLossBackward0>)
tensor(0.0039, grad_fn=<MulBackward0>)


Epoch 32/50: 30it [00:00, 95.65it/s]

tensor(101.8610, grad_fn=<MseLossBackward0>)
tensor(0.0024, grad_fn=<MulBackward0>)
tensor(133.9284, grad_fn=<MseLossBackward0>)
tensor(0.0328, grad_fn=<MulBackward0>)
tensor(96.5627, grad_fn=<MseLossBackward0>)
tensor(0.0022, grad_fn=<MulBackward0>)
tensor(137.8808, grad_fn=<MseLossBackward0>)
tensor(0.0285, grad_fn=<MulBackward0>)
tensor(99.4757, grad_fn=<MseLossBackward0>)
tensor(0.0059, grad_fn=<MulBackward0>)
tensor(98.5564, grad_fn=<MseLossBackward0>)
tensor(0.0026, grad_fn=<MulBackward0>)
tensor(98.2922, grad_fn=<MseLossBackward0>)
tensor(0.0037, grad_fn=<MulBackward0>)
tensor(105.8444, grad_fn=<MseLossBackward0>)
tensor(0.0021, grad_fn=<MulBackward0>)
tensor(103.2314, grad_fn=<MseLossBackward0>)
tensor(0.0021, grad_fn=<MulBackward0>)
tensor(506.6159, grad_fn=<MseLossBackward0>)
tensor(0.0274, grad_fn=<MulBackward0>)
tensor(106.5316, grad_fn=<MseLossBackward0>)
tensor(0.0021, grad_fn=<MulBackward0>)
tensor(96.8365, grad_fn=<MseLossBackward0>)
tensor(0.0023, grad_fn=<MulBackward0

Epoch 32/50: 41it [00:00, 100.42it/s]

tensor(102.8048, grad_fn=<MseLossBackward0>)
tensor(0.0024, grad_fn=<MulBackward0>)
tensor(100.2420, grad_fn=<MseLossBackward0>)
tensor(0.0021, grad_fn=<MulBackward0>)
tensor(105.5660, grad_fn=<MseLossBackward0>)
tensor(0.0024, grad_fn=<MulBackward0>)
tensor(261.1255, grad_fn=<MseLossBackward0>)
tensor(0.0287, grad_fn=<MulBackward0>)
tensor(181.8666, grad_fn=<MseLossBackward0>)
tensor(0.0285, grad_fn=<MulBackward0>)
tensor(95.4033, grad_fn=<MseLossBackward0>)
tensor(0.0025, grad_fn=<MulBackward0>)
tensor(101.1389, grad_fn=<MseLossBackward0>)
tensor(0.0028, grad_fn=<MulBackward0>)
tensor(102.1023, grad_fn=<MseLossBackward0>)
tensor(0.0023, grad_fn=<MulBackward0>)


Epoch 32/50: 52it [00:00, 94.86it/s] 

tensor(117.9932, grad_fn=<MseLossBackward0>)
tensor(0.0031, grad_fn=<MulBackward0>)
tensor(104.4363, grad_fn=<MseLossBackward0>)
tensor(0.0030, grad_fn=<MulBackward0>)
tensor(103.3132, grad_fn=<MseLossBackward0>)
tensor(0.0018, grad_fn=<MulBackward0>)
tensor(109.4881, grad_fn=<MseLossBackward0>)
tensor(0.0073, grad_fn=<MulBackward0>)
tensor(102.5117, grad_fn=<MseLossBackward0>)
tensor(0.0028, grad_fn=<MulBackward0>)
tensor(101.5565, grad_fn=<MseLossBackward0>)
tensor(0.0019, grad_fn=<MulBackward0>)
tensor(99.7946, grad_fn=<MseLossBackward0>)
tensor(0.0023, grad_fn=<MulBackward0>)
tensor(104.5579, grad_fn=<MseLossBackward0>)
tensor(0.0022, grad_fn=<MulBackward0>)
tensor(104.9679, grad_fn=<MseLossBackward0>)
tensor(0.0022, grad_fn=<MulBackward0>)
tensor(101.2315, grad_fn=<MseLossBackward0>)
tensor(0.0024, grad_fn=<MulBackward0>)
tensor(101.7578, grad_fn=<MseLossBackward0>)
tensor(0.0049, grad_fn=<MulBackward0>)


Epoch 32/50: 60it [00:00, 93.37it/s]

tensor(125.3276, grad_fn=<MseLossBackward0>)
tensor(0.0264, grad_fn=<MulBackward0>)





Epoch 32, Loss: 142.12619082132974


Epoch 33/50: 0it [00:00, ?it/s]

tensor(101.5716, grad_fn=<MseLossBackward0>)
tensor(0.0024, grad_fn=<MulBackward0>)
tensor(99.7894, grad_fn=<MseLossBackward0>)
tensor(0.0033, grad_fn=<MulBackward0>)
tensor(105.4983, grad_fn=<MseLossBackward0>)
tensor(0.0047, grad_fn=<MulBackward0>)
tensor(102.2250, grad_fn=<MseLossBackward0>)
tensor(0.0025, grad_fn=<MulBackward0>)
tensor(104.8081, grad_fn=<MseLossBackward0>)
tensor(0.0041, grad_fn=<MulBackward0>)
tensor(106.4191, grad_fn=<MseLossBackward0>)
tensor(0.0027, grad_fn=<MulBackward0>)
tensor(110.0451, grad_fn=<MseLossBackward0>)
tensor(0.0034, grad_fn=<MulBackward0>)


Epoch 33/50: 11it [00:00, 108.04it/s]

tensor(99.6087, grad_fn=<MseLossBackward0>)
tensor(0.0017, grad_fn=<MulBackward0>)
tensor(176.6364, grad_fn=<MseLossBackward0>)
tensor(0.0313, grad_fn=<MulBackward0>)
tensor(103.0309, grad_fn=<MseLossBackward0>)
tensor(0.0030, grad_fn=<MulBackward0>)
tensor(101.2923, grad_fn=<MseLossBackward0>)
tensor(0.0032, grad_fn=<MulBackward0>)
tensor(109.6973, grad_fn=<MseLossBackward0>)
tensor(0.0040, grad_fn=<MulBackward0>)
tensor(100.0365, grad_fn=<MseLossBackward0>)
tensor(0.0037, grad_fn=<MulBackward0>)
tensor(102.3545, grad_fn=<MseLossBackward0>)
tensor(0.0026, grad_fn=<MulBackward0>)
tensor(101.7813, grad_fn=<MseLossBackward0>)
tensor(0.0016, grad_fn=<MulBackward0>)
tensor(95.7333, grad_fn=<MseLossBackward0>)
tensor(0.0023, grad_fn=<MulBackward0>)
tensor(101.6213, grad_fn=<MseLossBackward0>)
tensor(0.0030, grad_fn=<MulBackward0>)
tensor(104.1940, grad_fn=<MseLossBackward0>)
tensor(0.0037, grad_fn=<MulBackward0>)
tensor(104.8363, grad_fn=<MseLossBackward0>)
tensor(0.0019, grad_fn=<MulBackwa

Epoch 33/50: 22it [00:00, 98.88it/s] 

tensor(102.9565, grad_fn=<MseLossBackward0>)
tensor(0.0021, grad_fn=<MulBackward0>)
tensor(101.4712, grad_fn=<MseLossBackward0>)
tensor(0.0019, grad_fn=<MulBackward0>)
tensor(116.6988, grad_fn=<MseLossBackward0>)
tensor(0.0076, grad_fn=<MulBackward0>)
tensor(122.6440, grad_fn=<MseLossBackward0>)
tensor(0.0265, grad_fn=<MulBackward0>)
tensor(110.7377, grad_fn=<MseLossBackward0>)
tensor(0.0029, grad_fn=<MulBackward0>)
tensor(114.8324, grad_fn=<MseLossBackward0>)
tensor(0.0026, grad_fn=<MulBackward0>)


Epoch 33/50: 32it [00:00, 99.28it/s]

tensor(153.3581, grad_fn=<MseLossBackward0>)
tensor(0.0522, grad_fn=<MulBackward0>)
tensor(103.0278, grad_fn=<MseLossBackward0>)
tensor(0.0023, grad_fn=<MulBackward0>)
tensor(113.1070, grad_fn=<MseLossBackward0>)
tensor(0.0258, grad_fn=<MulBackward0>)
tensor(105.4720, grad_fn=<MseLossBackward0>)
tensor(0.0022, grad_fn=<MulBackward0>)
tensor(102.2342, grad_fn=<MseLossBackward0>)
tensor(0.0025, grad_fn=<MulBackward0>)
tensor(98.8203, grad_fn=<MseLossBackward0>)
tensor(0.0026, grad_fn=<MulBackward0>)


Epoch 33/50: 37it [00:00, 97.63it/s]

tensor(313.9956, grad_fn=<MseLossBackward0>)
tensor(0.0263, grad_fn=<MulBackward0>)
tensor(102.9308, grad_fn=<MseLossBackward0>)
tensor(0.0019, grad_fn=<MulBackward0>)
tensor(129.6376, grad_fn=<MseLossBackward0>)
tensor(0.0278, grad_fn=<MulBackward0>)
tensor(98.9569, grad_fn=<MseLossBackward0>)
tensor(0.0021, grad_fn=<MulBackward0>)
tensor(103.7599, grad_fn=<MseLossBackward0>)
tensor(0.0026, grad_fn=<MulBackward0>)





KeyboardInterrupt: 