In [154]:
import torch
import random
import time
import numpy as np
import pandas as pd
from torch.autograd import Variable
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import pickle
import glob
import math
import pandas as pd
import collections
import random
from torch_geometric.nn import GCNConv
from torch_geometric.data import Batch

## Read demand data

In [155]:
def data_reader(filename):
    f = open(filename,'rb')
    data = pickle.load(f)
    return data

In [156]:
filename = glob.glob('../data/demand/*')
all_data = {}
node_feature = []
for i in filename:
    num = i.split('_')[1]
    data = data_reader(i)
    all_data[int(num)] = data
    node_feature.append(data)
sorted_data = collections.OrderedDict(sorted(all_data.items()))
node_feature = np.array(node_feature)

In [157]:
node_feature.shape

(262, 35064)

In [158]:
all_key = list(sorted_data.keys())

In [159]:
oldkey2newkey = {all_key[i] : i for i in range(len(all_key))}

## Read graph structure¶

In [160]:
file = open('../data/proxList','rb')
data = pickle.load(file)

In [161]:
edge_index = [[], []]
for item in data:
    edge_index[0].append(item[0])
    edge_index[1].append(item[1])

In [162]:
initial_feature = node_feature.mean()

In [163]:
node_feature.mean(1).shape

(262,)

## Clean graph

In [164]:
new_graph = [[], []]
for i in range(len(edge_index[0])):
    if edge_index[0][i] in all_key and edge_index[1][i] in all_key:
        new_graph[0].append(oldkey2newkey[edge_index[0][i]])
        new_graph[1].append(oldkey2newkey[edge_index[1][i]])

In [165]:
edge_index = torch.tensor(new_graph, dtype=torch.long)
node_feature = torch.tensor(node_feature, dtype=torch.float32)

## Hyperparameters

In [279]:
class Args():
    def __init__(self):
        self.split_ratio = [0.7, 0.2, 0.1]
        self.hist_length = 24 
        self.pred_length = 12
        self.batch_size = 1
        self.num_node_features = 1
        self.gcn_hidden_dim = 16
        self.encoder_hidden_dim = 16
        self.decoder_hidden_dim = 16
        self.zone_num = 1
        # self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
        self.device = torch.device("cpu")
        self.epoch_num = 30

## Dataset

In [280]:
class GraphDataset(Dataset):
    def __init__(self, node_feature, hist_length, pred_length):
        self.node_feature = node_feature
        self.total_len = hist_length + pred_length
        self.hist_len = hist_length
        self.pred_len = pred_length
        
    def __len__(self):
        return int(self.node_feature.shape[1] - self.total_len)
    
    def __getitem__(self, index):
        return (self.node_feature[:, index:index + self.hist_len], 
            self.node_feature[:, index + self.hist_len: index + self.total_len])

In [281]:
total_len = node_feature.shape[1]
train_st = 0
train_ed = math.floor(total_len * args.split_ratio[0])
valid_st = math.floor(total_len * args.split_ratio[0])
valid_ed = math.floor(total_len * (args.split_ratio[0] + args.split_ratio[1]))
test_st = math.floor(total_len * (args.split_ratio[0] + args.split_ratio[1]))
test_ed = total_len
train_dataset = GraphDataset(node_feature[:, train_st:train_ed], args.hist_length, args.pred_length)
val_dataset = GraphDataset(node_feature[:, valid_st:valid_ed], args.hist_length, args.pred_length)
test_dataset = GraphDataset(node_feature[:, test_st:test_ed], args.hist_length, args.pred_length)

In [282]:
train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False)

In [283]:
len(train_dataset)

24508

In [284]:
train_dataset[24510][0].shape

torch.Size([262, 24])

## Model

In [285]:
class GCN(torch.nn.Module):
    def __init__(self, args):
        super().__init__()
        self.conv1 = GCNConv(args.num_node_features, args.gcn_hidden_dim)
        self.conv2 = GCNConv(args.gcn_hidden_dim, args.gcn_hidden_dim)

    def forward(self, node_feature, edge_index):
        x = self.conv1(node_feature, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        return x

In [286]:
class GRU_Encoder(torch.nn.Module):
    def __init__(self, args):
        super().__init__()
        self.feature_extractor = GCN(args)
        self.gru1 = nn.GRU(input_size=args.gcn_hidden_dim, hidden_size=args.encoder_hidden_dim)
        self.gru2 = nn.GRU(input_size=args.encoder_hidden_dim, hidden_size=args.encoder_hidden_dim)
        
    def forward(self, batchx, edge_index):
        GCN_output = []
        for i in range(batchx[0].shape[1]):
            gcn_feature = self.feature_extractor(torch.unsqueeze(batchx[0][:, i], 1), edge_index)
            GCN_output.append(gcn_feature)
        
        out = torch.stack(GCN_output)
        out, h1 = self.gru1(out)
        out, h2 = self.gru2(out)
        return h1, h2

In [287]:
class GRU_Decoder(torch.nn.Module):
    def __init__(self, args):
        super().__init__()
        self.gcn = GCN(args)
        self.gru1 = nn.GRUCell(input_size=args.gcn_hidden_dim, hidden_size=args.decoder_hidden_dim)
        self.gru2 = nn.GRUCell(input_size=args.decoder_hidden_dim, hidden_size=args.decoder_hidden_dim)
        self.pred = nn.Linear(args.decoder_hidden_dim, args.zone_num)
    
    def forward(self, input_graph, edge_index, h1, h2):
        graph_embed = self.gcn(input_graph, edge_index)
        h1 = self.gru1(graph_embed, h1)
        h2 = self.gru2(torch.squeeze(h1), h2)
        out_graph = self.pred(torch.squeeze(h2))
        return (out_graph, h1, h2)

In [288]:
class Graph_Seq2Seq(torch.nn.Module):
    def __init__(self, args, initial_feature):
        super().__init__()
        self.initial_feature = initial_feature
        self.encoder = GRU_Encoder(args)
        self.decoder = GRU_Decoder(args)
        
    def forward(self, input_seq, edge_index, pred_len):
        graph_li = []
        h1, h2 = self.encoder(input_seq, edge_index)
        graph = self.initial_feature
        for i in range(pred_len):
            out_graph, h1, h2 = self.decoder(graph, edge_index, torch.squeeze(h1), torch.squeeze(h2))
            graph_li.append(out_graph)
        pred = torch.stack(graph_li)
        return pred

In [289]:
#initial_feature = torch.unsqueeze(node_feature.mean(1), 1)
#model = Graph_Seq2Seq(args, initial_feature)
#for (batchx, batchy) in train_loader:
#    x = model(batchx, edge_index, 12)
#    print(x)
#    i += 1

In [290]:
args.device

device(type='cpu')

In [294]:
def train(model, criterion, optimizer, edge_index, args):
    model.train()
    eval_loss = []
    edge_index = edge_index.to(args.device)
    for batchIdx, (batchx, batchy) in enumerate(train_loader):
        optimizer.zero_grad()
        batchx = batchx.to(args.device)
        batchy = batchy.to(args.device)
        predictions = model(batchx, edge_index, args.pred_length)
        loss = criterion(predictions, batchy.permute(2, 1, 0))
        loss.backward()
        # torch.nn.utils.clip_grad_norm_(model.parameters(), 2)
        optimizer.step()
        eval_loss.append(loss.detach().numpy())
        end = time.time()
        print('train loss:', loss)
    return np.mean(eval_loss)

In [295]:
def valid(model, criterion, edge_index, args):
    model.eval()
    eval_loss = []
    edge_index = edge_index.to(args.device)
    for batchIdx, (batchx, batchy) in enumerate(train_loader):
        optimizer.zero_grad()
        batchx = batchx.to(args.device)
        batchy = batchy.to(args.device)
        predictions = model(batchx, edge_index, args.pred_length)
        loss = criterion(predictions, batchy.permute(2, 1, 0))
        eval_loss.append(loss.detach().numpy())
        end = time.time()
    return np.mean(eval_loss)

In [None]:
model = Graph_Seq2Seq(args, initial_feature).to(args.device)
criterion = F.mse_loss
optimizer = optim.Adam(model.parameters(), amsgrad=True)

for i in range(args.epoch_num):
    train_loss = train(model, criterion, optimizer, edge_index, args)
    print('----------------------------- epoch {} train loss {} ------------------------------'.format(i, train_loss))
    eval_loss = valid(model, criterion, edge_index, args)
    print('----------------------------- epoch {} eval loss {} ------------------------------'.format(i, eval_loss))

train loss: tensor(13674.5430, grad_fn=<MseLossBackward0>)
train loss: tensor(13369.1904, grad_fn=<MseLossBackward0>)
train loss: tensor(15390.6553, grad_fn=<MseLossBackward0>)
train loss: tensor(24052.2910, grad_fn=<MseLossBackward0>)
train loss: tensor(18861.7793, grad_fn=<MseLossBackward0>)
train loss: tensor(4009.0461, grad_fn=<MseLossBackward0>)
train loss: tensor(8033.1475, grad_fn=<MseLossBackward0>)
train loss: tensor(29382.9355, grad_fn=<MseLossBackward0>)
train loss: tensor(9519.3779, grad_fn=<MseLossBackward0>)
train loss: tensor(9859.7383, grad_fn=<MseLossBackward0>)
train loss: tensor(21745.8828, grad_fn=<MseLossBackward0>)
train loss: tensor(32618.3633, grad_fn=<MseLossBackward0>)
train loss: tensor(21214.6113, grad_fn=<MseLossBackward0>)
train loss: tensor(15384.6182, grad_fn=<MseLossBackward0>)
train loss: tensor(14029.1504, grad_fn=<MseLossBackward0>)
train loss: tensor(7277.2632, grad_fn=<MseLossBackward0>)
train loss: tensor(11530.6338, grad_fn=<MseLossBackward0>)
tr

train loss: tensor(15649.9414, grad_fn=<MseLossBackward0>)
train loss: tensor(9899.0996, grad_fn=<MseLossBackward0>)
train loss: tensor(11736.7773, grad_fn=<MseLossBackward0>)
train loss: tensor(21200.2754, grad_fn=<MseLossBackward0>)
train loss: tensor(6441.0562, grad_fn=<MseLossBackward0>)
train loss: tensor(13285.6768, grad_fn=<MseLossBackward0>)
train loss: tensor(12105.6230, grad_fn=<MseLossBackward0>)
train loss: tensor(30710.2383, grad_fn=<MseLossBackward0>)
train loss: tensor(14461.2852, grad_fn=<MseLossBackward0>)
train loss: tensor(18063.6230, grad_fn=<MseLossBackward0>)
train loss: tensor(26250.0781, grad_fn=<MseLossBackward0>)
train loss: tensor(7253.3721, grad_fn=<MseLossBackward0>)
train loss: tensor(11006.0088, grad_fn=<MseLossBackward0>)
train loss: tensor(7810.1851, grad_fn=<MseLossBackward0>)
train loss: tensor(9176.2734, grad_fn=<MseLossBackward0>)
train loss: tensor(14970.5156, grad_fn=<MseLossBackward0>)
train loss: tensor(26498.7988, grad_fn=<MseLossBackward0>)
tr

train loss: tensor(10136.6143, grad_fn=<MseLossBackward0>)
train loss: tensor(12623.7695, grad_fn=<MseLossBackward0>)
train loss: tensor(13178.4922, grad_fn=<MseLossBackward0>)
train loss: tensor(13673.7383, grad_fn=<MseLossBackward0>)
train loss: tensor(34211.0898, grad_fn=<MseLossBackward0>)
train loss: tensor(11794.6348, grad_fn=<MseLossBackward0>)
train loss: tensor(29963.9805, grad_fn=<MseLossBackward0>)
train loss: tensor(21524.3262, grad_fn=<MseLossBackward0>)
train loss: tensor(29249.4141, grad_fn=<MseLossBackward0>)
train loss: tensor(11504.3691, grad_fn=<MseLossBackward0>)
train loss: tensor(6943.4517, grad_fn=<MseLossBackward0>)
train loss: tensor(18603.4297, grad_fn=<MseLossBackward0>)
train loss: tensor(9534.0693, grad_fn=<MseLossBackward0>)
train loss: tensor(8240.9805, grad_fn=<MseLossBackward0>)
train loss: tensor(11216.0127, grad_fn=<MseLossBackward0>)
train loss: tensor(12501.8877, grad_fn=<MseLossBackward0>)
train loss: tensor(9532.6484, grad_fn=<MseLossBackward0>)
t

train loss: tensor(20038.7969, grad_fn=<MseLossBackward0>)
train loss: tensor(19260.3477, grad_fn=<MseLossBackward0>)
train loss: tensor(9622.4717, grad_fn=<MseLossBackward0>)
train loss: tensor(27476.7227, grad_fn=<MseLossBackward0>)
train loss: tensor(9506.5967, grad_fn=<MseLossBackward0>)
train loss: tensor(10849.9365, grad_fn=<MseLossBackward0>)
train loss: tensor(14942.6611, grad_fn=<MseLossBackward0>)
train loss: tensor(9016.5225, grad_fn=<MseLossBackward0>)
train loss: tensor(18647.5645, grad_fn=<MseLossBackward0>)
train loss: tensor(10854.4629, grad_fn=<MseLossBackward0>)
train loss: tensor(17451.7891, grad_fn=<MseLossBackward0>)
train loss: tensor(9253.2305, grad_fn=<MseLossBackward0>)
train loss: tensor(10960.0752, grad_fn=<MseLossBackward0>)
train loss: tensor(25792.0664, grad_fn=<MseLossBackward0>)
train loss: tensor(39678.6797, grad_fn=<MseLossBackward0>)
train loss: tensor(7124.7900, grad_fn=<MseLossBackward0>)
train loss: tensor(10025.5537, grad_fn=<MseLossBackward0>)
tr

train loss: tensor(19785.9844, grad_fn=<MseLossBackward0>)
train loss: tensor(20295.9727, grad_fn=<MseLossBackward0>)
train loss: tensor(11260.7363, grad_fn=<MseLossBackward0>)
train loss: tensor(4353.3838, grad_fn=<MseLossBackward0>)
train loss: tensor(26254.3262, grad_fn=<MseLossBackward0>)
train loss: tensor(29785.0352, grad_fn=<MseLossBackward0>)
train loss: tensor(11142.2314, grad_fn=<MseLossBackward0>)
train loss: tensor(19795.4707, grad_fn=<MseLossBackward0>)
train loss: tensor(8172.7661, grad_fn=<MseLossBackward0>)
train loss: tensor(8922.6543, grad_fn=<MseLossBackward0>)
train loss: tensor(7964.5864, grad_fn=<MseLossBackward0>)
train loss: tensor(7558.7925, grad_fn=<MseLossBackward0>)
train loss: tensor(12297.1582, grad_fn=<MseLossBackward0>)
train loss: tensor(24394.9922, grad_fn=<MseLossBackward0>)
train loss: tensor(22630.2715, grad_fn=<MseLossBackward0>)
train loss: tensor(19485.2891, grad_fn=<MseLossBackward0>)
train loss: tensor(14106.6582, grad_fn=<MseLossBackward0>)
tr

train loss: tensor(29032.4902, grad_fn=<MseLossBackward0>)
train loss: tensor(14650.3486, grad_fn=<MseLossBackward0>)
train loss: tensor(17050.7402, grad_fn=<MseLossBackward0>)
train loss: tensor(10304.0723, grad_fn=<MseLossBackward0>)
train loss: tensor(11665.7002, grad_fn=<MseLossBackward0>)
train loss: tensor(8126.3130, grad_fn=<MseLossBackward0>)
train loss: tensor(18693.3945, grad_fn=<MseLossBackward0>)
train loss: tensor(10888.1631, grad_fn=<MseLossBackward0>)
train loss: tensor(15807.3447, grad_fn=<MseLossBackward0>)
train loss: tensor(17506.6777, grad_fn=<MseLossBackward0>)
train loss: tensor(14493.0459, grad_fn=<MseLossBackward0>)
train loss: tensor(21518.7168, grad_fn=<MseLossBackward0>)
train loss: tensor(24232.5059, grad_fn=<MseLossBackward0>)
train loss: tensor(8839.7998, grad_fn=<MseLossBackward0>)
train loss: tensor(29974.5098, grad_fn=<MseLossBackward0>)
train loss: tensor(23083.6699, grad_fn=<MseLossBackward0>)
train loss: tensor(16716.2559, grad_fn=<MseLossBackward0>)

train loss: tensor(13841.8018, grad_fn=<MseLossBackward0>)
train loss: tensor(14884.7061, grad_fn=<MseLossBackward0>)
train loss: tensor(12819.8037, grad_fn=<MseLossBackward0>)
train loss: tensor(19567.1230, grad_fn=<MseLossBackward0>)
train loss: tensor(27208.4766, grad_fn=<MseLossBackward0>)
train loss: tensor(13921.1035, grad_fn=<MseLossBackward0>)
train loss: tensor(9135.5117, grad_fn=<MseLossBackward0>)
train loss: tensor(20493.5977, grad_fn=<MseLossBackward0>)
train loss: tensor(8172.6792, grad_fn=<MseLossBackward0>)
train loss: tensor(26787.0449, grad_fn=<MseLossBackward0>)
train loss: tensor(10219.6104, grad_fn=<MseLossBackward0>)
train loss: tensor(8718.7734, grad_fn=<MseLossBackward0>)
train loss: tensor(5348.9390, grad_fn=<MseLossBackward0>)
train loss: tensor(21901.9844, grad_fn=<MseLossBackward0>)
train loss: tensor(111277.1875, grad_fn=<MseLossBackward0>)
train loss: tensor(26628.2930, grad_fn=<MseLossBackward0>)
train loss: tensor(19191.6426, grad_fn=<MseLossBackward0>)


train loss: tensor(16182.3311, grad_fn=<MseLossBackward0>)
train loss: tensor(2289.2600, grad_fn=<MseLossBackward0>)
train loss: tensor(13909.2939, grad_fn=<MseLossBackward0>)
train loss: tensor(21088.6777, grad_fn=<MseLossBackward0>)
train loss: tensor(8212.5547, grad_fn=<MseLossBackward0>)
train loss: tensor(15200.8828, grad_fn=<MseLossBackward0>)
train loss: tensor(17545.9902, grad_fn=<MseLossBackward0>)
train loss: tensor(16650.9492, grad_fn=<MseLossBackward0>)
train loss: tensor(8315.2373, grad_fn=<MseLossBackward0>)
train loss: tensor(11296.2275, grad_fn=<MseLossBackward0>)
train loss: tensor(17754.8809, grad_fn=<MseLossBackward0>)
train loss: tensor(30078.1992, grad_fn=<MseLossBackward0>)
train loss: tensor(1812.6002, grad_fn=<MseLossBackward0>)
train loss: tensor(10145.6904, grad_fn=<MseLossBackward0>)
train loss: tensor(29637.3164, grad_fn=<MseLossBackward0>)
train loss: tensor(12175.7266, grad_fn=<MseLossBackward0>)
train loss: tensor(21941.6523, grad_fn=<MseLossBackward0>)
t

train loss: tensor(27487.8809, grad_fn=<MseLossBackward0>)
train loss: tensor(8850.6797, grad_fn=<MseLossBackward0>)
train loss: tensor(20727.1152, grad_fn=<MseLossBackward0>)
train loss: tensor(13592.4824, grad_fn=<MseLossBackward0>)
train loss: tensor(19578.1367, grad_fn=<MseLossBackward0>)
train loss: tensor(22324.4375, grad_fn=<MseLossBackward0>)
train loss: tensor(15414.7852, grad_fn=<MseLossBackward0>)
train loss: tensor(24716.7988, grad_fn=<MseLossBackward0>)
train loss: tensor(14865.5938, grad_fn=<MseLossBackward0>)
train loss: tensor(20099.2324, grad_fn=<MseLossBackward0>)
train loss: tensor(25448.9824, grad_fn=<MseLossBackward0>)
train loss: tensor(11617.3320, grad_fn=<MseLossBackward0>)
train loss: tensor(15381.6357, grad_fn=<MseLossBackward0>)
train loss: tensor(8258.2217, grad_fn=<MseLossBackward0>)
train loss: tensor(9716.6455, grad_fn=<MseLossBackward0>)
train loss: tensor(10032.5762, grad_fn=<MseLossBackward0>)
train loss: tensor(9786.4639, grad_fn=<MseLossBackward0>)
t

train loss: tensor(7778.9897, grad_fn=<MseLossBackward0>)
train loss: tensor(20414.1758, grad_fn=<MseLossBackward0>)
train loss: tensor(16189.7402, grad_fn=<MseLossBackward0>)
train loss: tensor(16966.5898, grad_fn=<MseLossBackward0>)
train loss: tensor(12772.7324, grad_fn=<MseLossBackward0>)
train loss: tensor(10391.8701, grad_fn=<MseLossBackward0>)
train loss: tensor(114829.4219, grad_fn=<MseLossBackward0>)
train loss: tensor(30892.4199, grad_fn=<MseLossBackward0>)
train loss: tensor(14457.5723, grad_fn=<MseLossBackward0>)
train loss: tensor(15131.6133, grad_fn=<MseLossBackward0>)
train loss: tensor(12958.4932, grad_fn=<MseLossBackward0>)
train loss: tensor(13464.5879, grad_fn=<MseLossBackward0>)
train loss: tensor(3162.5002, grad_fn=<MseLossBackward0>)
train loss: tensor(18946.2168, grad_fn=<MseLossBackward0>)
train loss: tensor(20505.5234, grad_fn=<MseLossBackward0>)
train loss: tensor(3783.9409, grad_fn=<MseLossBackward0>)
train loss: tensor(27079.0566, grad_fn=<MseLossBackward0>)

train loss: tensor(10063.0039, grad_fn=<MseLossBackward0>)
train loss: tensor(16486.4297, grad_fn=<MseLossBackward0>)
train loss: tensor(17906.0859, grad_fn=<MseLossBackward0>)
train loss: tensor(14159.1484, grad_fn=<MseLossBackward0>)
train loss: tensor(9214.4551, grad_fn=<MseLossBackward0>)
train loss: tensor(18493.0586, grad_fn=<MseLossBackward0>)
train loss: tensor(17811.4141, grad_fn=<MseLossBackward0>)
train loss: tensor(18956.5645, grad_fn=<MseLossBackward0>)
train loss: tensor(19735.7930, grad_fn=<MseLossBackward0>)
train loss: tensor(14056.3076, grad_fn=<MseLossBackward0>)
train loss: tensor(18786.7773, grad_fn=<MseLossBackward0>)
train loss: tensor(21915.7070, grad_fn=<MseLossBackward0>)
train loss: tensor(13493.0293, grad_fn=<MseLossBackward0>)
train loss: tensor(7132.0903, grad_fn=<MseLossBackward0>)
train loss: tensor(13126.5469, grad_fn=<MseLossBackward0>)
train loss: tensor(14807.5293, grad_fn=<MseLossBackward0>)
train loss: tensor(7789.2456, grad_fn=<MseLossBackward0>)


train loss: tensor(35928.4648, grad_fn=<MseLossBackward0>)
train loss: tensor(11613.5078, grad_fn=<MseLossBackward0>)
train loss: tensor(26350.4258, grad_fn=<MseLossBackward0>)
train loss: tensor(61880.3047, grad_fn=<MseLossBackward0>)
train loss: tensor(14062.1484, grad_fn=<MseLossBackward0>)
train loss: tensor(16668.5234, grad_fn=<MseLossBackward0>)
train loss: tensor(7719.4175, grad_fn=<MseLossBackward0>)
train loss: tensor(14114.8779, grad_fn=<MseLossBackward0>)
train loss: tensor(6279.8511, grad_fn=<MseLossBackward0>)
train loss: tensor(14929.7617, grad_fn=<MseLossBackward0>)
train loss: tensor(26214.8457, grad_fn=<MseLossBackward0>)
train loss: tensor(4521.7388, grad_fn=<MseLossBackward0>)
train loss: tensor(16989.5605, grad_fn=<MseLossBackward0>)
train loss: tensor(4572.9331, grad_fn=<MseLossBackward0>)
train loss: tensor(14396.2256, grad_fn=<MseLossBackward0>)
train loss: tensor(11286.3359, grad_fn=<MseLossBackward0>)
train loss: tensor(5620.5801, grad_fn=<MseLossBackward0>)
tr

train loss: tensor(18586.3418, grad_fn=<MseLossBackward0>)
train loss: tensor(15196.2100, grad_fn=<MseLossBackward0>)
train loss: tensor(16800.2090, grad_fn=<MseLossBackward0>)
train loss: tensor(13583.1045, grad_fn=<MseLossBackward0>)
train loss: tensor(8481.0342, grad_fn=<MseLossBackward0>)
train loss: tensor(9406.4824, grad_fn=<MseLossBackward0>)
train loss: tensor(16661.4961, grad_fn=<MseLossBackward0>)
train loss: tensor(8961.0078, grad_fn=<MseLossBackward0>)
train loss: tensor(10967.1592, grad_fn=<MseLossBackward0>)
train loss: tensor(46975.3086, grad_fn=<MseLossBackward0>)
train loss: tensor(14412.5088, grad_fn=<MseLossBackward0>)
train loss: tensor(8788.2070, grad_fn=<MseLossBackward0>)
train loss: tensor(11402.2363, grad_fn=<MseLossBackward0>)
train loss: tensor(23469.6816, grad_fn=<MseLossBackward0>)
train loss: tensor(35835.2773, grad_fn=<MseLossBackward0>)
train loss: tensor(27543.9688, grad_fn=<MseLossBackward0>)
train loss: tensor(5348.8115, grad_fn=<MseLossBackward0>)
tr

train loss: tensor(12133.6016, grad_fn=<MseLossBackward0>)
train loss: tensor(11462.0898, grad_fn=<MseLossBackward0>)
train loss: tensor(13367.5137, grad_fn=<MseLossBackward0>)
train loss: tensor(13289.8350, grad_fn=<MseLossBackward0>)
train loss: tensor(18673.0391, grad_fn=<MseLossBackward0>)
train loss: tensor(10178.1914, grad_fn=<MseLossBackward0>)
train loss: tensor(13119.5283, grad_fn=<MseLossBackward0>)
train loss: tensor(21021.2012, grad_fn=<MseLossBackward0>)
train loss: tensor(15505.8604, grad_fn=<MseLossBackward0>)
train loss: tensor(30579.2520, grad_fn=<MseLossBackward0>)
train loss: tensor(7968.3252, grad_fn=<MseLossBackward0>)
train loss: tensor(11492.2510, grad_fn=<MseLossBackward0>)
train loss: tensor(5126.6865, grad_fn=<MseLossBackward0>)
train loss: tensor(10446.1621, grad_fn=<MseLossBackward0>)
train loss: tensor(10286.3809, grad_fn=<MseLossBackward0>)
train loss: tensor(17537.1113, grad_fn=<MseLossBackward0>)
train loss: tensor(11771.8604, grad_fn=<MseLossBackward0>)

train loss: tensor(9468.5762, grad_fn=<MseLossBackward0>)
train loss: tensor(8043.5518, grad_fn=<MseLossBackward0>)
train loss: tensor(13451.4824, grad_fn=<MseLossBackward0>)
train loss: tensor(8725.3984, grad_fn=<MseLossBackward0>)
train loss: tensor(6447.5947, grad_fn=<MseLossBackward0>)
train loss: tensor(12433.3398, grad_fn=<MseLossBackward0>)
train loss: tensor(19104.7617, grad_fn=<MseLossBackward0>)
train loss: tensor(11964.7227, grad_fn=<MseLossBackward0>)
train loss: tensor(7809.7437, grad_fn=<MseLossBackward0>)
train loss: tensor(16925.3594, grad_fn=<MseLossBackward0>)
train loss: tensor(28754.0820, grad_fn=<MseLossBackward0>)
train loss: tensor(15330.1895, grad_fn=<MseLossBackward0>)
train loss: tensor(22789.7676, grad_fn=<MseLossBackward0>)
train loss: tensor(28023.8418, grad_fn=<MseLossBackward0>)
train loss: tensor(20039.0508, grad_fn=<MseLossBackward0>)
train loss: tensor(4369.4756, grad_fn=<MseLossBackward0>)
train loss: tensor(16925.3438, grad_fn=<MseLossBackward0>)
tra

train loss: tensor(28318.0410, grad_fn=<MseLossBackward0>)
train loss: tensor(6985.7778, grad_fn=<MseLossBackward0>)
train loss: tensor(3949.4976, grad_fn=<MseLossBackward0>)
train loss: tensor(17672.9805, grad_fn=<MseLossBackward0>)
train loss: tensor(11925.1426, grad_fn=<MseLossBackward0>)
train loss: tensor(18472.4395, grad_fn=<MseLossBackward0>)
train loss: tensor(3929.9146, grad_fn=<MseLossBackward0>)
train loss: tensor(7200.0112, grad_fn=<MseLossBackward0>)
train loss: tensor(10163.4570, grad_fn=<MseLossBackward0>)
train loss: tensor(17779.4316, grad_fn=<MseLossBackward0>)
train loss: tensor(28484.6445, grad_fn=<MseLossBackward0>)
train loss: tensor(7696.9185, grad_fn=<MseLossBackward0>)
train loss: tensor(10128.6924, grad_fn=<MseLossBackward0>)
train loss: tensor(4748.6685, grad_fn=<MseLossBackward0>)
train loss: tensor(22514.1660, grad_fn=<MseLossBackward0>)
train loss: tensor(31891.2266, grad_fn=<MseLossBackward0>)
train loss: tensor(11956.7129, grad_fn=<MseLossBackward0>)
tra

train loss: tensor(9051.3027, grad_fn=<MseLossBackward0>)
train loss: tensor(6498.6821, grad_fn=<MseLossBackward0>)
train loss: tensor(13693.5752, grad_fn=<MseLossBackward0>)
train loss: tensor(30252.0234, grad_fn=<MseLossBackward0>)
train loss: tensor(17536.1016, grad_fn=<MseLossBackward0>)
train loss: tensor(22563.2363, grad_fn=<MseLossBackward0>)
train loss: tensor(15794.5010, grad_fn=<MseLossBackward0>)
train loss: tensor(23825.6719, grad_fn=<MseLossBackward0>)
train loss: tensor(9310.4238, grad_fn=<MseLossBackward0>)
train loss: tensor(19643.5488, grad_fn=<MseLossBackward0>)
train loss: tensor(16959.4707, grad_fn=<MseLossBackward0>)
train loss: tensor(114556.7344, grad_fn=<MseLossBackward0>)
train loss: tensor(16804.2246, grad_fn=<MseLossBackward0>)
train loss: tensor(3895.2449, grad_fn=<MseLossBackward0>)
train loss: tensor(26896.1172, grad_fn=<MseLossBackward0>)
train loss: tensor(13765.2559, grad_fn=<MseLossBackward0>)
train loss: tensor(8439.3145, grad_fn=<MseLossBackward0>)
t

train loss: tensor(21254.9434, grad_fn=<MseLossBackward0>)
train loss: tensor(9291.1143, grad_fn=<MseLossBackward0>)
train loss: tensor(78763.3359, grad_fn=<MseLossBackward0>)
train loss: tensor(42430.5156, grad_fn=<MseLossBackward0>)
train loss: tensor(17463.9355, grad_fn=<MseLossBackward0>)
train loss: tensor(19243.9062, grad_fn=<MseLossBackward0>)
train loss: tensor(13866.7988, grad_fn=<MseLossBackward0>)
train loss: tensor(16155.9482, grad_fn=<MseLossBackward0>)
train loss: tensor(16781.4746, grad_fn=<MseLossBackward0>)
train loss: tensor(5523.9956, grad_fn=<MseLossBackward0>)
train loss: tensor(21611.8379, grad_fn=<MseLossBackward0>)
train loss: tensor(13222.0205, grad_fn=<MseLossBackward0>)
train loss: tensor(11992.3262, grad_fn=<MseLossBackward0>)
train loss: tensor(15686.4365, grad_fn=<MseLossBackward0>)
train loss: tensor(13100.0586, grad_fn=<MseLossBackward0>)
train loss: tensor(7099.3320, grad_fn=<MseLossBackward0>)
train loss: tensor(28239.0449, grad_fn=<MseLossBackward0>)


train loss: tensor(2784.2710, grad_fn=<MseLossBackward0>)
train loss: tensor(15340.7891, grad_fn=<MseLossBackward0>)
train loss: tensor(5306.5732, grad_fn=<MseLossBackward0>)
train loss: tensor(25254.8242, grad_fn=<MseLossBackward0>)
train loss: tensor(32384.4004, grad_fn=<MseLossBackward0>)
train loss: tensor(13750.5049, grad_fn=<MseLossBackward0>)
train loss: tensor(19350.4102, grad_fn=<MseLossBackward0>)
train loss: tensor(8976.8838, grad_fn=<MseLossBackward0>)
train loss: tensor(11941.8145, grad_fn=<MseLossBackward0>)
train loss: tensor(13400.7383, grad_fn=<MseLossBackward0>)
train loss: tensor(6960.9141, grad_fn=<MseLossBackward0>)
train loss: tensor(10351.5957, grad_fn=<MseLossBackward0>)
train loss: tensor(9667.1680, grad_fn=<MseLossBackward0>)
train loss: tensor(22526.7285, grad_fn=<MseLossBackward0>)
train loss: tensor(15243.7432, grad_fn=<MseLossBackward0>)
train loss: tensor(20390.0605, grad_fn=<MseLossBackward0>)
train loss: tensor(10681.9609, grad_fn=<MseLossBackward0>)
tr

train loss: tensor(24398.9004, grad_fn=<MseLossBackward0>)
train loss: tensor(6237.9731, grad_fn=<MseLossBackward0>)
train loss: tensor(8919.9443, grad_fn=<MseLossBackward0>)
train loss: tensor(25098.7793, grad_fn=<MseLossBackward0>)
train loss: tensor(8750.5996, grad_fn=<MseLossBackward0>)
train loss: tensor(11627.2764, grad_fn=<MseLossBackward0>)
train loss: tensor(21411.6543, grad_fn=<MseLossBackward0>)
train loss: tensor(16178.2598, grad_fn=<MseLossBackward0>)
train loss: tensor(11480.6484, grad_fn=<MseLossBackward0>)
train loss: tensor(15338.4609, grad_fn=<MseLossBackward0>)
train loss: tensor(25888.5781, grad_fn=<MseLossBackward0>)
train loss: tensor(18257.3379, grad_fn=<MseLossBackward0>)
train loss: tensor(22616.5098, grad_fn=<MseLossBackward0>)
train loss: tensor(3687.0400, grad_fn=<MseLossBackward0>)
train loss: tensor(12857.7988, grad_fn=<MseLossBackward0>)
train loss: tensor(5795.7139, grad_fn=<MseLossBackward0>)
train loss: tensor(6032.0581, grad_fn=<MseLossBackward0>)
tra

train loss: tensor(12343.4229, grad_fn=<MseLossBackward0>)
train loss: tensor(16530.9746, grad_fn=<MseLossBackward0>)
train loss: tensor(13734.3564, grad_fn=<MseLossBackward0>)
train loss: tensor(19543.8477, grad_fn=<MseLossBackward0>)
train loss: tensor(12871.0166, grad_fn=<MseLossBackward0>)
train loss: tensor(14188.4834, grad_fn=<MseLossBackward0>)
train loss: tensor(18204.3027, grad_fn=<MseLossBackward0>)
train loss: tensor(67993.3359, grad_fn=<MseLossBackward0>)
train loss: tensor(12918.2109, grad_fn=<MseLossBackward0>)
train loss: tensor(10236.6816, grad_fn=<MseLossBackward0>)
train loss: tensor(10396.4355, grad_fn=<MseLossBackward0>)
train loss: tensor(7351.7051, grad_fn=<MseLossBackward0>)
train loss: tensor(14657.8857, grad_fn=<MseLossBackward0>)
train loss: tensor(19812.8867, grad_fn=<MseLossBackward0>)
train loss: tensor(103418.0781, grad_fn=<MseLossBackward0>)
train loss: tensor(9552.6904, grad_fn=<MseLossBackward0>)
train loss: tensor(15707.0811, grad_fn=<MseLossBackward0>

train loss: tensor(7583.6890, grad_fn=<MseLossBackward0>)
train loss: tensor(14469.5322, grad_fn=<MseLossBackward0>)
train loss: tensor(6211.4033, grad_fn=<MseLossBackward0>)
train loss: tensor(17768.4258, grad_fn=<MseLossBackward0>)
train loss: tensor(9603.6914, grad_fn=<MseLossBackward0>)
train loss: tensor(4695.3770, grad_fn=<MseLossBackward0>)
train loss: tensor(11212.8662, grad_fn=<MseLossBackward0>)
train loss: tensor(10885.3730, grad_fn=<MseLossBackward0>)
train loss: tensor(18088.9648, grad_fn=<MseLossBackward0>)
train loss: tensor(18218.0312, grad_fn=<MseLossBackward0>)
train loss: tensor(16155.9482, grad_fn=<MseLossBackward0>)
train loss: tensor(12879.8828, grad_fn=<MseLossBackward0>)
train loss: tensor(8926.6396, grad_fn=<MseLossBackward0>)
train loss: tensor(12623.4756, grad_fn=<MseLossBackward0>)
train loss: tensor(86752.3281, grad_fn=<MseLossBackward0>)
train loss: tensor(15420.8682, grad_fn=<MseLossBackward0>)
train loss: tensor(24085.5938, grad_fn=<MseLossBackward0>)
tr

train loss: tensor(30826.1152, grad_fn=<MseLossBackward0>)
train loss: tensor(13318.3955, grad_fn=<MseLossBackward0>)
train loss: tensor(13311.9609, grad_fn=<MseLossBackward0>)
train loss: tensor(12133.5801, grad_fn=<MseLossBackward0>)
train loss: tensor(16896.0820, grad_fn=<MseLossBackward0>)
train loss: tensor(35881.2305, grad_fn=<MseLossBackward0>)
train loss: tensor(3794.1125, grad_fn=<MseLossBackward0>)
train loss: tensor(29150.6816, grad_fn=<MseLossBackward0>)
train loss: tensor(14521.7998, grad_fn=<MseLossBackward0>)
train loss: tensor(16858.3379, grad_fn=<MseLossBackward0>)
train loss: tensor(14549.2227, grad_fn=<MseLossBackward0>)
train loss: tensor(9073.4902, grad_fn=<MseLossBackward0>)
train loss: tensor(16743.9746, grad_fn=<MseLossBackward0>)
train loss: tensor(13023.0498, grad_fn=<MseLossBackward0>)
train loss: tensor(28415.0527, grad_fn=<MseLossBackward0>)
train loss: tensor(9199.1631, grad_fn=<MseLossBackward0>)
train loss: tensor(7878.3359, grad_fn=<MseLossBackward0>)
t

train loss: tensor(11521.9082, grad_fn=<MseLossBackward0>)
train loss: tensor(34465.3789, grad_fn=<MseLossBackward0>)
train loss: tensor(15233.7520, grad_fn=<MseLossBackward0>)
train loss: tensor(15839.4697, grad_fn=<MseLossBackward0>)
train loss: tensor(17430.0332, grad_fn=<MseLossBackward0>)
train loss: tensor(26513.8516, grad_fn=<MseLossBackward0>)
train loss: tensor(6951.3110, grad_fn=<MseLossBackward0>)
train loss: tensor(30842.1367, grad_fn=<MseLossBackward0>)
train loss: tensor(22128.6777, grad_fn=<MseLossBackward0>)
train loss: tensor(13164.9795, grad_fn=<MseLossBackward0>)
train loss: tensor(15582.2363, grad_fn=<MseLossBackward0>)
train loss: tensor(8542.1709, grad_fn=<MseLossBackward0>)
train loss: tensor(7219.8486, grad_fn=<MseLossBackward0>)
train loss: tensor(19392.2031, grad_fn=<MseLossBackward0>)
train loss: tensor(12473.7998, grad_fn=<MseLossBackward0>)
train loss: tensor(26459.4590, grad_fn=<MseLossBackward0>)
train loss: tensor(9977.5029, grad_fn=<MseLossBackward0>)
t

train loss: tensor(18035.0176, grad_fn=<MseLossBackward0>)
train loss: tensor(6596.4302, grad_fn=<MseLossBackward0>)
train loss: tensor(15780.6211, grad_fn=<MseLossBackward0>)
train loss: tensor(7009.4365, grad_fn=<MseLossBackward0>)
train loss: tensor(11489.5381, grad_fn=<MseLossBackward0>)
train loss: tensor(21294.8555, grad_fn=<MseLossBackward0>)
train loss: tensor(6737.0796, grad_fn=<MseLossBackward0>)
train loss: tensor(7518.0293, grad_fn=<MseLossBackward0>)
train loss: tensor(6407.4985, grad_fn=<MseLossBackward0>)
train loss: tensor(6716.3232, grad_fn=<MseLossBackward0>)
train loss: tensor(10946.9961, grad_fn=<MseLossBackward0>)
train loss: tensor(27689.3105, grad_fn=<MseLossBackward0>)
train loss: tensor(12767.0830, grad_fn=<MseLossBackward0>)
train loss: tensor(14669.8691, grad_fn=<MseLossBackward0>)
train loss: tensor(125015.6562, grad_fn=<MseLossBackward0>)
train loss: tensor(14529.1172, grad_fn=<MseLossBackward0>)
train loss: tensor(20330.1230, grad_fn=<MseLossBackward0>)
tr

train loss: tensor(4766.2061, grad_fn=<MseLossBackward0>)
train loss: tensor(10274.0615, grad_fn=<MseLossBackward0>)
train loss: tensor(11712.5830, grad_fn=<MseLossBackward0>)
train loss: tensor(5973.1826, grad_fn=<MseLossBackward0>)
train loss: tensor(11104.6738, grad_fn=<MseLossBackward0>)
train loss: tensor(31786.4863, grad_fn=<MseLossBackward0>)
train loss: tensor(9564.0205, grad_fn=<MseLossBackward0>)
train loss: tensor(20643.0938, grad_fn=<MseLossBackward0>)
train loss: tensor(14166.5938, grad_fn=<MseLossBackward0>)
train loss: tensor(8667.0586, grad_fn=<MseLossBackward0>)
train loss: tensor(10500.8906, grad_fn=<MseLossBackward0>)
train loss: tensor(11996.1582, grad_fn=<MseLossBackward0>)
train loss: tensor(12984.5010, grad_fn=<MseLossBackward0>)
train loss: tensor(9098.8721, grad_fn=<MseLossBackward0>)
train loss: tensor(8973.5898, grad_fn=<MseLossBackward0>)
train loss: tensor(26783.1582, grad_fn=<MseLossBackward0>)
train loss: tensor(9302.0195, grad_fn=<MseLossBackward0>)
trai

train loss: tensor(7218.7534, grad_fn=<MseLossBackward0>)
train loss: tensor(5951.9688, grad_fn=<MseLossBackward0>)
train loss: tensor(18437.5996, grad_fn=<MseLossBackward0>)
train loss: tensor(17140.0820, grad_fn=<MseLossBackward0>)
train loss: tensor(15166.2480, grad_fn=<MseLossBackward0>)
train loss: tensor(9280.7256, grad_fn=<MseLossBackward0>)
train loss: tensor(15197.5088, grad_fn=<MseLossBackward0>)
train loss: tensor(18770.8633, grad_fn=<MseLossBackward0>)
train loss: tensor(18917.4551, grad_fn=<MseLossBackward0>)
train loss: tensor(13213.6338, grad_fn=<MseLossBackward0>)
train loss: tensor(10260.8506, grad_fn=<MseLossBackward0>)
train loss: tensor(16785.1406, grad_fn=<MseLossBackward0>)
train loss: tensor(18797.8164, grad_fn=<MseLossBackward0>)
train loss: tensor(9507.7129, grad_fn=<MseLossBackward0>)
train loss: tensor(25760.4531, grad_fn=<MseLossBackward0>)
train loss: tensor(7290.5732, grad_fn=<MseLossBackward0>)
train loss: tensor(8985.4111, grad_fn=<MseLossBackward0>)
tra

train loss: tensor(3228.3066, grad_fn=<MseLossBackward0>)
train loss: tensor(10294.9922, grad_fn=<MseLossBackward0>)
train loss: tensor(5576.1792, grad_fn=<MseLossBackward0>)
train loss: tensor(11479.1982, grad_fn=<MseLossBackward0>)
train loss: tensor(17343.5410, grad_fn=<MseLossBackward0>)
train loss: tensor(16343.3740, grad_fn=<MseLossBackward0>)
train loss: tensor(4225.8369, grad_fn=<MseLossBackward0>)
train loss: tensor(10419.4629, grad_fn=<MseLossBackward0>)
train loss: tensor(15107.7598, grad_fn=<MseLossBackward0>)
train loss: tensor(25565.6191, grad_fn=<MseLossBackward0>)
train loss: tensor(19443.6777, grad_fn=<MseLossBackward0>)
train loss: tensor(16236.7764, grad_fn=<MseLossBackward0>)
train loss: tensor(11628.4541, grad_fn=<MseLossBackward0>)
train loss: tensor(24309.8770, grad_fn=<MseLossBackward0>)
train loss: tensor(11618.2197, grad_fn=<MseLossBackward0>)
train loss: tensor(6920.0806, grad_fn=<MseLossBackward0>)
train loss: tensor(7311.8818, grad_fn=<MseLossBackward0>)
tr

train loss: tensor(10846.9375, grad_fn=<MseLossBackward0>)
train loss: tensor(7499.6445, grad_fn=<MseLossBackward0>)
train loss: tensor(3678.9832, grad_fn=<MseLossBackward0>)
train loss: tensor(10640.6641, grad_fn=<MseLossBackward0>)
train loss: tensor(14965.2002, grad_fn=<MseLossBackward0>)
train loss: tensor(18105.1797, grad_fn=<MseLossBackward0>)
train loss: tensor(34235.8867, grad_fn=<MseLossBackward0>)
train loss: tensor(9916.1084, grad_fn=<MseLossBackward0>)
train loss: tensor(25469.2070, grad_fn=<MseLossBackward0>)
train loss: tensor(11787.7021, grad_fn=<MseLossBackward0>)
train loss: tensor(29852.1738, grad_fn=<MseLossBackward0>)
train loss: tensor(6570.9917, grad_fn=<MseLossBackward0>)
train loss: tensor(6491.1450, grad_fn=<MseLossBackward0>)
train loss: tensor(6882.9409, grad_fn=<MseLossBackward0>)
train loss: tensor(8923.0762, grad_fn=<MseLossBackward0>)
train loss: tensor(74324.9609, grad_fn=<MseLossBackward0>)
train loss: tensor(16718.8242, grad_fn=<MseLossBackward0>)
trai

train loss: tensor(8057.5571, grad_fn=<MseLossBackward0>)
train loss: tensor(16675.7422, grad_fn=<MseLossBackward0>)
train loss: tensor(18877.5840, grad_fn=<MseLossBackward0>)
train loss: tensor(14688.0625, grad_fn=<MseLossBackward0>)
train loss: tensor(18952.6484, grad_fn=<MseLossBackward0>)
train loss: tensor(5836.7812, grad_fn=<MseLossBackward0>)
train loss: tensor(21102.0566, grad_fn=<MseLossBackward0>)
train loss: tensor(11901.7227, grad_fn=<MseLossBackward0>)
train loss: tensor(15396.0303, grad_fn=<MseLossBackward0>)
train loss: tensor(11933.4531, grad_fn=<MseLossBackward0>)
train loss: tensor(22508.1855, grad_fn=<MseLossBackward0>)
train loss: tensor(23911.6309, grad_fn=<MseLossBackward0>)
train loss: tensor(27740.6191, grad_fn=<MseLossBackward0>)
train loss: tensor(8456.5039, grad_fn=<MseLossBackward0>)
train loss: tensor(12604.4453, grad_fn=<MseLossBackward0>)
train loss: tensor(17872.5781, grad_fn=<MseLossBackward0>)
train loss: tensor(9883.8496, grad_fn=<MseLossBackward0>)
t

train loss: tensor(53510.6055, grad_fn=<MseLossBackward0>)
train loss: tensor(9116.3770, grad_fn=<MseLossBackward0>)
train loss: tensor(18577.4902, grad_fn=<MseLossBackward0>)
train loss: tensor(11543.9961, grad_fn=<MseLossBackward0>)
train loss: tensor(77441.2812, grad_fn=<MseLossBackward0>)
train loss: tensor(15845.9082, grad_fn=<MseLossBackward0>)
train loss: tensor(4223.4385, grad_fn=<MseLossBackward0>)
train loss: tensor(3039.9114, grad_fn=<MseLossBackward0>)
train loss: tensor(8189.9556, grad_fn=<MseLossBackward0>)
train loss: tensor(3329.0640, grad_fn=<MseLossBackward0>)
train loss: tensor(14637.3389, grad_fn=<MseLossBackward0>)
train loss: tensor(13685.8984, grad_fn=<MseLossBackward0>)
train loss: tensor(9091.8223, grad_fn=<MseLossBackward0>)
train loss: tensor(10073.0098, grad_fn=<MseLossBackward0>)
train loss: tensor(5481.8818, grad_fn=<MseLossBackward0>)
train loss: tensor(22362.2949, grad_fn=<MseLossBackward0>)
train loss: tensor(3992.2292, grad_fn=<MseLossBackward0>)
train

train loss: tensor(9865.1484, grad_fn=<MseLossBackward0>)
train loss: tensor(14886.1602, grad_fn=<MseLossBackward0>)
train loss: tensor(3559.5923, grad_fn=<MseLossBackward0>)
train loss: tensor(18802.9688, grad_fn=<MseLossBackward0>)
train loss: tensor(40956.1016, grad_fn=<MseLossBackward0>)
train loss: tensor(17928.9922, grad_fn=<MseLossBackward0>)
train loss: tensor(11214.3076, grad_fn=<MseLossBackward0>)
train loss: tensor(16470.4141, grad_fn=<MseLossBackward0>)
train loss: tensor(6207.8252, grad_fn=<MseLossBackward0>)
train loss: tensor(19279.1758, grad_fn=<MseLossBackward0>)
train loss: tensor(22083.0898, grad_fn=<MseLossBackward0>)
train loss: tensor(13575.3994, grad_fn=<MseLossBackward0>)
train loss: tensor(11724.6182, grad_fn=<MseLossBackward0>)
train loss: tensor(12216.2451, grad_fn=<MseLossBackward0>)
train loss: tensor(15763.3037, grad_fn=<MseLossBackward0>)
train loss: tensor(20306.6367, grad_fn=<MseLossBackward0>)
train loss: tensor(4553.2554, grad_fn=<MseLossBackward0>)
t

train loss: tensor(58681.0039, grad_fn=<MseLossBackward0>)
train loss: tensor(37007.2773, grad_fn=<MseLossBackward0>)
train loss: tensor(8691.9531, grad_fn=<MseLossBackward0>)
train loss: tensor(23830.7656, grad_fn=<MseLossBackward0>)
train loss: tensor(11003.4531, grad_fn=<MseLossBackward0>)
train loss: tensor(14968.6230, grad_fn=<MseLossBackward0>)
train loss: tensor(15679.7900, grad_fn=<MseLossBackward0>)
train loss: tensor(5024.8335, grad_fn=<MseLossBackward0>)
train loss: tensor(13331.6299, grad_fn=<MseLossBackward0>)
train loss: tensor(4951.7593, grad_fn=<MseLossBackward0>)
train loss: tensor(6003.3633, grad_fn=<MseLossBackward0>)
train loss: tensor(10340.5361, grad_fn=<MseLossBackward0>)
train loss: tensor(20863.6445, grad_fn=<MseLossBackward0>)
train loss: tensor(18900.5977, grad_fn=<MseLossBackward0>)
train loss: tensor(10620.3564, grad_fn=<MseLossBackward0>)
train loss: tensor(8244.7842, grad_fn=<MseLossBackward0>)
train loss: tensor(26029.8242, grad_fn=<MseLossBackward0>)
tr

train loss: tensor(22700.2793, grad_fn=<MseLossBackward0>)
train loss: tensor(16458.8613, grad_fn=<MseLossBackward0>)
train loss: tensor(12649.1836, grad_fn=<MseLossBackward0>)
train loss: tensor(14707.7021, grad_fn=<MseLossBackward0>)
train loss: tensor(82143.3359, grad_fn=<MseLossBackward0>)
train loss: tensor(4573.2275, grad_fn=<MseLossBackward0>)
train loss: tensor(24297.4551, grad_fn=<MseLossBackward0>)
train loss: tensor(10616.4131, grad_fn=<MseLossBackward0>)
train loss: tensor(5088.8696, grad_fn=<MseLossBackward0>)
train loss: tensor(66295.1484, grad_fn=<MseLossBackward0>)
train loss: tensor(11713.1875, grad_fn=<MseLossBackward0>)
train loss: tensor(18289.8340, grad_fn=<MseLossBackward0>)
train loss: tensor(6854.6958, grad_fn=<MseLossBackward0>)
train loss: tensor(22850.0488, grad_fn=<MseLossBackward0>)
train loss: tensor(3996.7275, grad_fn=<MseLossBackward0>)
train loss: tensor(3446.9937, grad_fn=<MseLossBackward0>)
train loss: tensor(18553.2129, grad_fn=<MseLossBackward0>)
tr

train loss: tensor(17170.5176, grad_fn=<MseLossBackward0>)
train loss: tensor(12221.7812, grad_fn=<MseLossBackward0>)
train loss: tensor(3823.5486, grad_fn=<MseLossBackward0>)
train loss: tensor(14498.5928, grad_fn=<MseLossBackward0>)
train loss: tensor(8006.6284, grad_fn=<MseLossBackward0>)
train loss: tensor(17388.9824, grad_fn=<MseLossBackward0>)
train loss: tensor(5834.3882, grad_fn=<MseLossBackward0>)
train loss: tensor(3707.7666, grad_fn=<MseLossBackward0>)
train loss: tensor(7957.2900, grad_fn=<MseLossBackward0>)
train loss: tensor(20465.6074, grad_fn=<MseLossBackward0>)
train loss: tensor(12427.4805, grad_fn=<MseLossBackward0>)
train loss: tensor(10283.4629, grad_fn=<MseLossBackward0>)
train loss: tensor(14223.7773, grad_fn=<MseLossBackward0>)
train loss: tensor(11093.8604, grad_fn=<MseLossBackward0>)
train loss: tensor(13465.1191, grad_fn=<MseLossBackward0>)
train loss: tensor(15739.4785, grad_fn=<MseLossBackward0>)
train loss: tensor(3838.8438, grad_fn=<MseLossBackward0>)
tra

train loss: tensor(15500.8096, grad_fn=<MseLossBackward0>)
train loss: tensor(28037.5684, grad_fn=<MseLossBackward0>)
train loss: tensor(11356.5518, grad_fn=<MseLossBackward0>)
train loss: tensor(10616.8584, grad_fn=<MseLossBackward0>)
train loss: tensor(22869.2969, grad_fn=<MseLossBackward0>)
train loss: tensor(10997.8076, grad_fn=<MseLossBackward0>)
train loss: tensor(4836.6460, grad_fn=<MseLossBackward0>)
train loss: tensor(9797.7129, grad_fn=<MseLossBackward0>)
train loss: tensor(9878.1670, grad_fn=<MseLossBackward0>)
train loss: tensor(11454.3389, grad_fn=<MseLossBackward0>)
train loss: tensor(21382.5234, grad_fn=<MseLossBackward0>)
train loss: tensor(15686.2021, grad_fn=<MseLossBackward0>)
train loss: tensor(17324.4766, grad_fn=<MseLossBackward0>)
train loss: tensor(14122.0547, grad_fn=<MseLossBackward0>)
train loss: tensor(30020.5527, grad_fn=<MseLossBackward0>)
train loss: tensor(9145.1250, grad_fn=<MseLossBackward0>)
train loss: tensor(6367.1748, grad_fn=<MseLossBackward0>)
tr

train loss: tensor(22364.7637, grad_fn=<MseLossBackward0>)
train loss: tensor(19133.9141, grad_fn=<MseLossBackward0>)
train loss: tensor(21752.4512, grad_fn=<MseLossBackward0>)
train loss: tensor(4287.6929, grad_fn=<MseLossBackward0>)
train loss: tensor(4471.9082, grad_fn=<MseLossBackward0>)
train loss: tensor(3026.5623, grad_fn=<MseLossBackward0>)
train loss: tensor(16282.6143, grad_fn=<MseLossBackward0>)
train loss: tensor(16476.3145, grad_fn=<MseLossBackward0>)
train loss: tensor(7526.5269, grad_fn=<MseLossBackward0>)
train loss: tensor(14131.5938, grad_fn=<MseLossBackward0>)
train loss: tensor(26358.6055, grad_fn=<MseLossBackward0>)
train loss: tensor(3093.9788, grad_fn=<MseLossBackward0>)
train loss: tensor(6035.4048, grad_fn=<MseLossBackward0>)
train loss: tensor(16711.6250, grad_fn=<MseLossBackward0>)
train loss: tensor(18609.9023, grad_fn=<MseLossBackward0>)
train loss: tensor(18109.8184, grad_fn=<MseLossBackward0>)
train loss: tensor(15071.1299, grad_fn=<MseLossBackward0>)
tra

train loss: tensor(5162.4941, grad_fn=<MseLossBackward0>)
train loss: tensor(5815.9502, grad_fn=<MseLossBackward0>)
train loss: tensor(3511.1379, grad_fn=<MseLossBackward0>)
train loss: tensor(9764.3965, grad_fn=<MseLossBackward0>)
train loss: tensor(4625.2056, grad_fn=<MseLossBackward0>)
train loss: tensor(4038.7974, grad_fn=<MseLossBackward0>)
train loss: tensor(9740.4854, grad_fn=<MseLossBackward0>)
train loss: tensor(83122.9453, grad_fn=<MseLossBackward0>)
train loss: tensor(12397.5518, grad_fn=<MseLossBackward0>)
train loss: tensor(13876.9990, grad_fn=<MseLossBackward0>)
train loss: tensor(8968.1162, grad_fn=<MseLossBackward0>)
train loss: tensor(13793.7471, grad_fn=<MseLossBackward0>)
train loss: tensor(12144.1973, grad_fn=<MseLossBackward0>)
train loss: tensor(16575.0117, grad_fn=<MseLossBackward0>)
train loss: tensor(18250.4863, grad_fn=<MseLossBackward0>)
train loss: tensor(13046.5557, grad_fn=<MseLossBackward0>)
train loss: tensor(4435.7163, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(17518.9805, grad_fn=<MseLossBackward0>)
train loss: tensor(6821.5449, grad_fn=<MseLossBackward0>)
train loss: tensor(8553.4355, grad_fn=<MseLossBackward0>)
train loss: tensor(9285.7637, grad_fn=<MseLossBackward0>)
train loss: tensor(7765.5376, grad_fn=<MseLossBackward0>)
train loss: tensor(3738.7266, grad_fn=<MseLossBackward0>)
train loss: tensor(18996.2832, grad_fn=<MseLossBackward0>)
train loss: tensor(12497.6436, grad_fn=<MseLossBackward0>)
train loss: tensor(11048.0488, grad_fn=<MseLossBackward0>)
train loss: tensor(15780.7100, grad_fn=<MseLossBackward0>)
train loss: tensor(21737.9648, grad_fn=<MseLossBackward0>)
train loss: tensor(8845.9189, grad_fn=<MseLossBackward0>)
train loss: tensor(12877.0430, grad_fn=<MseLossBackward0>)
train loss: tensor(15381.0029, grad_fn=<MseLossBackward0>)
train loss: tensor(8274.0205, grad_fn=<MseLossBackward0>)
train loss: tensor(5980.5981, grad_fn=<MseLossBackward0>)
train loss: tensor(8132.7881, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(7986.9678, grad_fn=<MseLossBackward0>)
train loss: tensor(15024.8027, grad_fn=<MseLossBackward0>)
train loss: tensor(4309.6260, grad_fn=<MseLossBackward0>)
train loss: tensor(10441.8115, grad_fn=<MseLossBackward0>)
train loss: tensor(4816.2622, grad_fn=<MseLossBackward0>)
train loss: tensor(21279.1367, grad_fn=<MseLossBackward0>)
train loss: tensor(3326.2712, grad_fn=<MseLossBackward0>)
train loss: tensor(6549.0059, grad_fn=<MseLossBackward0>)
train loss: tensor(9746.0391, grad_fn=<MseLossBackward0>)
train loss: tensor(24805.9316, grad_fn=<MseLossBackward0>)
train loss: tensor(14164.5576, grad_fn=<MseLossBackward0>)
train loss: tensor(6800.6675, grad_fn=<MseLossBackward0>)
train loss: tensor(5150.1025, grad_fn=<MseLossBackward0>)
train loss: tensor(4106.2754, grad_fn=<MseLossBackward0>)
train loss: tensor(9922.0205, grad_fn=<MseLossBackward0>)
train loss: tensor(8307.9844, grad_fn=<MseLossBackward0>)
train loss: tensor(7765.6255, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(16236.3242, grad_fn=<MseLossBackward0>)
train loss: tensor(13593.6963, grad_fn=<MseLossBackward0>)
train loss: tensor(21503.9180, grad_fn=<MseLossBackward0>)
train loss: tensor(9385.8857, grad_fn=<MseLossBackward0>)
train loss: tensor(22831.8672, grad_fn=<MseLossBackward0>)
train loss: tensor(12496.3359, grad_fn=<MseLossBackward0>)
train loss: tensor(18727.6016, grad_fn=<MseLossBackward0>)
train loss: tensor(10505.2119, grad_fn=<MseLossBackward0>)
train loss: tensor(2891.4771, grad_fn=<MseLossBackward0>)
train loss: tensor(18315.3887, grad_fn=<MseLossBackward0>)
train loss: tensor(5871.8398, grad_fn=<MseLossBackward0>)
train loss: tensor(11954.0811, grad_fn=<MseLossBackward0>)
train loss: tensor(20871.8750, grad_fn=<MseLossBackward0>)
train loss: tensor(39129.3633, grad_fn=<MseLossBackward0>)
train loss: tensor(26381.3164, grad_fn=<MseLossBackward0>)
train loss: tensor(10193.4102, grad_fn=<MseLossBackward0>)
train loss: tensor(11382.0596, grad_fn=<MseLossBackward0>)


train loss: tensor(8491.9678, grad_fn=<MseLossBackward0>)
train loss: tensor(32473.5898, grad_fn=<MseLossBackward0>)
train loss: tensor(4076.0232, grad_fn=<MseLossBackward0>)
train loss: tensor(20723.5098, grad_fn=<MseLossBackward0>)
train loss: tensor(7580.3711, grad_fn=<MseLossBackward0>)
train loss: tensor(24280.4648, grad_fn=<MseLossBackward0>)
train loss: tensor(93841.9609, grad_fn=<MseLossBackward0>)
train loss: tensor(20463.0625, grad_fn=<MseLossBackward0>)
train loss: tensor(7409.2334, grad_fn=<MseLossBackward0>)
train loss: tensor(9649.5576, grad_fn=<MseLossBackward0>)
train loss: tensor(6949.5273, grad_fn=<MseLossBackward0>)
train loss: tensor(20701.5273, grad_fn=<MseLossBackward0>)
train loss: tensor(2269.9609, grad_fn=<MseLossBackward0>)
train loss: tensor(3131.0229, grad_fn=<MseLossBackward0>)
train loss: tensor(26058.3047, grad_fn=<MseLossBackward0>)
train loss: tensor(1622.6879, grad_fn=<MseLossBackward0>)
train loss: tensor(49949.8516, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(9416.8223, grad_fn=<MseLossBackward0>)
train loss: tensor(9388.3438, grad_fn=<MseLossBackward0>)
train loss: tensor(77775.6406, grad_fn=<MseLossBackward0>)
train loss: tensor(10407.7451, grad_fn=<MseLossBackward0>)
train loss: tensor(11805.7051, grad_fn=<MseLossBackward0>)
train loss: tensor(4957.4263, grad_fn=<MseLossBackward0>)
train loss: tensor(5038.8730, grad_fn=<MseLossBackward0>)
train loss: tensor(7879.5386, grad_fn=<MseLossBackward0>)
train loss: tensor(25973.5977, grad_fn=<MseLossBackward0>)
train loss: tensor(2413.4592, grad_fn=<MseLossBackward0>)
train loss: tensor(10499.9805, grad_fn=<MseLossBackward0>)
train loss: tensor(10977.2422, grad_fn=<MseLossBackward0>)
train loss: tensor(8126.5864, grad_fn=<MseLossBackward0>)
train loss: tensor(23721.1855, grad_fn=<MseLossBackward0>)
train loss: tensor(6990.8486, grad_fn=<MseLossBackward0>)
train loss: tensor(19075.2188, grad_fn=<MseLossBackward0>)
train loss: tensor(5963.9771, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(16118.0977, grad_fn=<MseLossBackward0>)
train loss: tensor(8620.0430, grad_fn=<MseLossBackward0>)
train loss: tensor(8979.2910, grad_fn=<MseLossBackward0>)
train loss: tensor(9084.2324, grad_fn=<MseLossBackward0>)
train loss: tensor(20699.6211, grad_fn=<MseLossBackward0>)
train loss: tensor(8946.6250, grad_fn=<MseLossBackward0>)
train loss: tensor(16108.0586, grad_fn=<MseLossBackward0>)
train loss: tensor(7226.0337, grad_fn=<MseLossBackward0>)
train loss: tensor(4094.6362, grad_fn=<MseLossBackward0>)
train loss: tensor(8724.2334, grad_fn=<MseLossBackward0>)
train loss: tensor(9447.2393, grad_fn=<MseLossBackward0>)
train loss: tensor(2726.9905, grad_fn=<MseLossBackward0>)
train loss: tensor(16573.4805, grad_fn=<MseLossBackward0>)
train loss: tensor(17569.8457, grad_fn=<MseLossBackward0>)
train loss: tensor(21443.3184, grad_fn=<MseLossBackward0>)
train loss: tensor(16828.3223, grad_fn=<MseLossBackward0>)
train loss: tensor(8043.7329, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(17363.7539, grad_fn=<MseLossBackward0>)
train loss: tensor(9159.4805, grad_fn=<MseLossBackward0>)
train loss: tensor(18135.7754, grad_fn=<MseLossBackward0>)
train loss: tensor(12831.6143, grad_fn=<MseLossBackward0>)
train loss: tensor(13632.8164, grad_fn=<MseLossBackward0>)
train loss: tensor(16633.7969, grad_fn=<MseLossBackward0>)
train loss: tensor(11382.1064, grad_fn=<MseLossBackward0>)
train loss: tensor(17569.3887, grad_fn=<MseLossBackward0>)
train loss: tensor(9764.6377, grad_fn=<MseLossBackward0>)
train loss: tensor(8529.9375, grad_fn=<MseLossBackward0>)
train loss: tensor(11927.0303, grad_fn=<MseLossBackward0>)
train loss: tensor(8802.6055, grad_fn=<MseLossBackward0>)
train loss: tensor(5528.8965, grad_fn=<MseLossBackward0>)
train loss: tensor(2974.3257, grad_fn=<MseLossBackward0>)
train loss: tensor(19936.8125, grad_fn=<MseLossBackward0>)
train loss: tensor(8883.8818, grad_fn=<MseLossBackward0>)
train loss: tensor(7382.5342, grad_fn=<MseLossBackward0>)
train

train loss: tensor(19302.3086, grad_fn=<MseLossBackward0>)
train loss: tensor(15355.2979, grad_fn=<MseLossBackward0>)
train loss: tensor(7447.1978, grad_fn=<MseLossBackward0>)
train loss: tensor(10485.5869, grad_fn=<MseLossBackward0>)
train loss: tensor(13311.5664, grad_fn=<MseLossBackward0>)
train loss: tensor(14919.6377, grad_fn=<MseLossBackward0>)
train loss: tensor(3222.1755, grad_fn=<MseLossBackward0>)
train loss: tensor(14737.6064, grad_fn=<MseLossBackward0>)
train loss: tensor(11722.3125, grad_fn=<MseLossBackward0>)
train loss: tensor(9090.5820, grad_fn=<MseLossBackward0>)
train loss: tensor(18271.6875, grad_fn=<MseLossBackward0>)
train loss: tensor(15691.7295, grad_fn=<MseLossBackward0>)
train loss: tensor(85885.7891, grad_fn=<MseLossBackward0>)
train loss: tensor(14716.4902, grad_fn=<MseLossBackward0>)
train loss: tensor(7430.8887, grad_fn=<MseLossBackward0>)
train loss: tensor(4955.3765, grad_fn=<MseLossBackward0>)
train loss: tensor(6457.4219, grad_fn=<MseLossBackward0>)
tra

train loss: tensor(9210.7324, grad_fn=<MseLossBackward0>)
train loss: tensor(5483.0923, grad_fn=<MseLossBackward0>)
train loss: tensor(12707.3691, grad_fn=<MseLossBackward0>)
train loss: tensor(4267.4766, grad_fn=<MseLossBackward0>)
train loss: tensor(51040.8789, grad_fn=<MseLossBackward0>)
train loss: tensor(11359.7021, grad_fn=<MseLossBackward0>)
train loss: tensor(4518.9263, grad_fn=<MseLossBackward0>)
train loss: tensor(11133.7812, grad_fn=<MseLossBackward0>)
train loss: tensor(6681.2812, grad_fn=<MseLossBackward0>)
train loss: tensor(13328.3418, grad_fn=<MseLossBackward0>)
train loss: tensor(10422.9746, grad_fn=<MseLossBackward0>)
train loss: tensor(24513.1035, grad_fn=<MseLossBackward0>)
train loss: tensor(11555.8193, grad_fn=<MseLossBackward0>)
train loss: tensor(7406.2163, grad_fn=<MseLossBackward0>)
train loss: tensor(2100.2275, grad_fn=<MseLossBackward0>)
train loss: tensor(7680.3159, grad_fn=<MseLossBackward0>)
train loss: tensor(6464.5024, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(6691.9180, grad_fn=<MseLossBackward0>)
train loss: tensor(7827.0986, grad_fn=<MseLossBackward0>)
train loss: tensor(6657.8320, grad_fn=<MseLossBackward0>)
train loss: tensor(1741.8557, grad_fn=<MseLossBackward0>)
train loss: tensor(4231.0981, grad_fn=<MseLossBackward0>)
train loss: tensor(9110.8076, grad_fn=<MseLossBackward0>)
train loss: tensor(12792.2725, grad_fn=<MseLossBackward0>)
train loss: tensor(21958.8301, grad_fn=<MseLossBackward0>)
train loss: tensor(8550.1484, grad_fn=<MseLossBackward0>)
train loss: tensor(17887.7832, grad_fn=<MseLossBackward0>)
train loss: tensor(10970.9111, grad_fn=<MseLossBackward0>)
train loss: tensor(23149.3223, grad_fn=<MseLossBackward0>)
train loss: tensor(10916.8438, grad_fn=<MseLossBackward0>)
train loss: tensor(24838.3867, grad_fn=<MseLossBackward0>)
train loss: tensor(9788.2979, grad_fn=<MseLossBackward0>)
train loss: tensor(17935.9219, grad_fn=<MseLossBackward0>)
train loss: tensor(2634.9465, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(7895.6694, grad_fn=<MseLossBackward0>)
train loss: tensor(2583.0764, grad_fn=<MseLossBackward0>)
train loss: tensor(6831.8765, grad_fn=<MseLossBackward0>)
train loss: tensor(20667.8711, grad_fn=<MseLossBackward0>)
train loss: tensor(16501.0938, grad_fn=<MseLossBackward0>)
train loss: tensor(12700.0518, grad_fn=<MseLossBackward0>)
train loss: tensor(13602.9229, grad_fn=<MseLossBackward0>)
train loss: tensor(8382.4541, grad_fn=<MseLossBackward0>)
train loss: tensor(8539.8447, grad_fn=<MseLossBackward0>)
train loss: tensor(18334.7383, grad_fn=<MseLossBackward0>)
train loss: tensor(19020.3125, grad_fn=<MseLossBackward0>)
train loss: tensor(10402.4111, grad_fn=<MseLossBackward0>)
train loss: tensor(5777.6528, grad_fn=<MseLossBackward0>)
train loss: tensor(45624.7109, grad_fn=<MseLossBackward0>)
train loss: tensor(18104.5898, grad_fn=<MseLossBackward0>)
train loss: tensor(11106.2637, grad_fn=<MseLossBackward0>)
train loss: tensor(14042.8164, grad_fn=<MseLossBackward0>)
tra

train loss: tensor(9131.9082, grad_fn=<MseLossBackward0>)
train loss: tensor(8367.2939, grad_fn=<MseLossBackward0>)
train loss: tensor(10306.6504, grad_fn=<MseLossBackward0>)
train loss: tensor(4658.6582, grad_fn=<MseLossBackward0>)
train loss: tensor(6756.0615, grad_fn=<MseLossBackward0>)
train loss: tensor(6894.2480, grad_fn=<MseLossBackward0>)
train loss: tensor(7649.3354, grad_fn=<MseLossBackward0>)
train loss: tensor(24337.2910, grad_fn=<MseLossBackward0>)
train loss: tensor(10837.7158, grad_fn=<MseLossBackward0>)
train loss: tensor(9585.8057, grad_fn=<MseLossBackward0>)
train loss: tensor(6795.1602, grad_fn=<MseLossBackward0>)
train loss: tensor(10614.2432, grad_fn=<MseLossBackward0>)
train loss: tensor(3191.5637, grad_fn=<MseLossBackward0>)
train loss: tensor(5507.1621, grad_fn=<MseLossBackward0>)
train loss: tensor(11495.2158, grad_fn=<MseLossBackward0>)
train loss: tensor(6990.9468, grad_fn=<MseLossBackward0>)
train loss: tensor(16289.5010, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(8631.9824, grad_fn=<MseLossBackward0>)
train loss: tensor(3902.0925, grad_fn=<MseLossBackward0>)
train loss: tensor(12484.3906, grad_fn=<MseLossBackward0>)
train loss: tensor(5038.0186, grad_fn=<MseLossBackward0>)
train loss: tensor(12756.7070, grad_fn=<MseLossBackward0>)
train loss: tensor(14935.5938, grad_fn=<MseLossBackward0>)
train loss: tensor(8318.3428, grad_fn=<MseLossBackward0>)
train loss: tensor(14397.3125, grad_fn=<MseLossBackward0>)
train loss: tensor(9002.5742, grad_fn=<MseLossBackward0>)
train loss: tensor(13489.4150, grad_fn=<MseLossBackward0>)
train loss: tensor(3949.4160, grad_fn=<MseLossBackward0>)
train loss: tensor(12673.5049, grad_fn=<MseLossBackward0>)
train loss: tensor(8231.5703, grad_fn=<MseLossBackward0>)
train loss: tensor(5190.9028, grad_fn=<MseLossBackward0>)
train loss: tensor(7364.9966, grad_fn=<MseLossBackward0>)
train loss: tensor(13160.9844, grad_fn=<MseLossBackward0>)
train loss: tensor(5498.8027, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(14165.8604, grad_fn=<MseLossBackward0>)
train loss: tensor(9233.1064, grad_fn=<MseLossBackward0>)
train loss: tensor(16854.7734, grad_fn=<MseLossBackward0>)
train loss: tensor(4512.1709, grad_fn=<MseLossBackward0>)
train loss: tensor(9209.9795, grad_fn=<MseLossBackward0>)
train loss: tensor(13616.1758, grad_fn=<MseLossBackward0>)
train loss: tensor(17100.9512, grad_fn=<MseLossBackward0>)
train loss: tensor(7886.2607, grad_fn=<MseLossBackward0>)
train loss: tensor(3738.3904, grad_fn=<MseLossBackward0>)
train loss: tensor(24560.5547, grad_fn=<MseLossBackward0>)
train loss: tensor(6797.0879, grad_fn=<MseLossBackward0>)
train loss: tensor(20322.4004, grad_fn=<MseLossBackward0>)
train loss: tensor(12936.5098, grad_fn=<MseLossBackward0>)
train loss: tensor(11711.4678, grad_fn=<MseLossBackward0>)
train loss: tensor(12958.9062, grad_fn=<MseLossBackward0>)
train loss: tensor(18255.6133, grad_fn=<MseLossBackward0>)
train loss: tensor(12673.0410, grad_fn=<MseLossBackward0>)
tra

train loss: tensor(4032.0229, grad_fn=<MseLossBackward0>)
train loss: tensor(8801.8955, grad_fn=<MseLossBackward0>)
train loss: tensor(13171.1006, grad_fn=<MseLossBackward0>)
train loss: tensor(12545.6973, grad_fn=<MseLossBackward0>)
train loss: tensor(21620.4531, grad_fn=<MseLossBackward0>)
train loss: tensor(11184.0137, grad_fn=<MseLossBackward0>)
train loss: tensor(8589.5693, grad_fn=<MseLossBackward0>)
train loss: tensor(10610.2041, grad_fn=<MseLossBackward0>)
train loss: tensor(10241.2422, grad_fn=<MseLossBackward0>)
train loss: tensor(6950.7100, grad_fn=<MseLossBackward0>)
train loss: tensor(7421.2686, grad_fn=<MseLossBackward0>)
train loss: tensor(7992.3066, grad_fn=<MseLossBackward0>)
train loss: tensor(10390.9971, grad_fn=<MseLossBackward0>)
train loss: tensor(17877.9844, grad_fn=<MseLossBackward0>)
train loss: tensor(4683.3892, grad_fn=<MseLossBackward0>)
train loss: tensor(8977.3916, grad_fn=<MseLossBackward0>)
train loss: tensor(6540.5415, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(12488.4062, grad_fn=<MseLossBackward0>)
train loss: tensor(49473.1992, grad_fn=<MseLossBackward0>)
train loss: tensor(9839.4004, grad_fn=<MseLossBackward0>)
train loss: tensor(18075.5977, grad_fn=<MseLossBackward0>)
train loss: tensor(14127.2871, grad_fn=<MseLossBackward0>)
train loss: tensor(46348.1172, grad_fn=<MseLossBackward0>)
train loss: tensor(9724.3770, grad_fn=<MseLossBackward0>)
train loss: tensor(7354.6421, grad_fn=<MseLossBackward0>)
train loss: tensor(9521.5986, grad_fn=<MseLossBackward0>)
train loss: tensor(13939.4863, grad_fn=<MseLossBackward0>)
train loss: tensor(3772.1189, grad_fn=<MseLossBackward0>)
train loss: tensor(7485.9766, grad_fn=<MseLossBackward0>)
train loss: tensor(26399.4805, grad_fn=<MseLossBackward0>)
train loss: tensor(19503.4258, grad_fn=<MseLossBackward0>)
train loss: tensor(14852.2900, grad_fn=<MseLossBackward0>)
train loss: tensor(14036.5068, grad_fn=<MseLossBackward0>)
train loss: tensor(10793.0127, grad_fn=<MseLossBackward0>)
tra

train loss: tensor(12877.5244, grad_fn=<MseLossBackward0>)
train loss: tensor(13634.9502, grad_fn=<MseLossBackward0>)
train loss: tensor(4281.4282, grad_fn=<MseLossBackward0>)
train loss: tensor(6316.0840, grad_fn=<MseLossBackward0>)
train loss: tensor(4039.4116, grad_fn=<MseLossBackward0>)
train loss: tensor(12821.0225, grad_fn=<MseLossBackward0>)
train loss: tensor(10651.8633, grad_fn=<MseLossBackward0>)
train loss: tensor(8617.4150, grad_fn=<MseLossBackward0>)
train loss: tensor(5657.8872, grad_fn=<MseLossBackward0>)
train loss: tensor(7515.6001, grad_fn=<MseLossBackward0>)
train loss: tensor(1353.6824, grad_fn=<MseLossBackward0>)
train loss: tensor(11128.7256, grad_fn=<MseLossBackward0>)
train loss: tensor(5764.3911, grad_fn=<MseLossBackward0>)
train loss: tensor(10327.9668, grad_fn=<MseLossBackward0>)
train loss: tensor(5067.9487, grad_fn=<MseLossBackward0>)
train loss: tensor(4735.4702, grad_fn=<MseLossBackward0>)
train loss: tensor(9898.6582, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(2342.3416, grad_fn=<MseLossBackward0>)
train loss: tensor(13061.5293, grad_fn=<MseLossBackward0>)
train loss: tensor(8873.1670, grad_fn=<MseLossBackward0>)
train loss: tensor(7619.3774, grad_fn=<MseLossBackward0>)
train loss: tensor(3593.8560, grad_fn=<MseLossBackward0>)
train loss: tensor(18209.0352, grad_fn=<MseLossBackward0>)
train loss: tensor(20269.0918, grad_fn=<MseLossBackward0>)
train loss: tensor(16005.2100, grad_fn=<MseLossBackward0>)
train loss: tensor(10791.3936, grad_fn=<MseLossBackward0>)
train loss: tensor(8322.2100, grad_fn=<MseLossBackward0>)
train loss: tensor(5781.2534, grad_fn=<MseLossBackward0>)
train loss: tensor(9699.8809, grad_fn=<MseLossBackward0>)
train loss: tensor(11103.3320, grad_fn=<MseLossBackward0>)
train loss: tensor(7049.2393, grad_fn=<MseLossBackward0>)
train loss: tensor(5171.5161, grad_fn=<MseLossBackward0>)
train loss: tensor(5543.9707, grad_fn=<MseLossBackward0>)
train loss: tensor(61745.0625, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(4364.1187, grad_fn=<MseLossBackward0>)
train loss: tensor(15808.3896, grad_fn=<MseLossBackward0>)
train loss: tensor(13874.0156, grad_fn=<MseLossBackward0>)
train loss: tensor(20577.6504, grad_fn=<MseLossBackward0>)
train loss: tensor(6539.3906, grad_fn=<MseLossBackward0>)
train loss: tensor(22734.6660, grad_fn=<MseLossBackward0>)
train loss: tensor(8120.9321, grad_fn=<MseLossBackward0>)
train loss: tensor(2625.1936, grad_fn=<MseLossBackward0>)
train loss: tensor(18625.0488, grad_fn=<MseLossBackward0>)
train loss: tensor(7546.8105, grad_fn=<MseLossBackward0>)
train loss: tensor(8871.6846, grad_fn=<MseLossBackward0>)
train loss: tensor(13752.2393, grad_fn=<MseLossBackward0>)
train loss: tensor(15027.3291, grad_fn=<MseLossBackward0>)
train loss: tensor(6171.6514, grad_fn=<MseLossBackward0>)
train loss: tensor(2274.0266, grad_fn=<MseLossBackward0>)
train loss: tensor(7953.9702, grad_fn=<MseLossBackward0>)
train loss: tensor(19076.2461, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(20030.2324, grad_fn=<MseLossBackward0>)
train loss: tensor(19372.3086, grad_fn=<MseLossBackward0>)
train loss: tensor(9959.2344, grad_fn=<MseLossBackward0>)
train loss: tensor(8914.2432, grad_fn=<MseLossBackward0>)
train loss: tensor(5795.0610, grad_fn=<MseLossBackward0>)
train loss: tensor(13077.6807, grad_fn=<MseLossBackward0>)
train loss: tensor(3209.0742, grad_fn=<MseLossBackward0>)
train loss: tensor(13051.4482, grad_fn=<MseLossBackward0>)
train loss: tensor(10889.0811, grad_fn=<MseLossBackward0>)
train loss: tensor(18366.7129, grad_fn=<MseLossBackward0>)
train loss: tensor(4045.0911, grad_fn=<MseLossBackward0>)
train loss: tensor(6322.2476, grad_fn=<MseLossBackward0>)
train loss: tensor(14596.6641, grad_fn=<MseLossBackward0>)
train loss: tensor(12033.4609, grad_fn=<MseLossBackward0>)
train loss: tensor(10352.6104, grad_fn=<MseLossBackward0>)
train loss: tensor(5964.3384, grad_fn=<MseLossBackward0>)
train loss: tensor(11860.5635, grad_fn=<MseLossBackward0>)
trai

train loss: tensor(11480.8574, grad_fn=<MseLossBackward0>)
train loss: tensor(14556.4492, grad_fn=<MseLossBackward0>)
train loss: tensor(13221.9746, grad_fn=<MseLossBackward0>)
train loss: tensor(5788.4346, grad_fn=<MseLossBackward0>)
train loss: tensor(16583.2207, grad_fn=<MseLossBackward0>)
train loss: tensor(14170.3359, grad_fn=<MseLossBackward0>)
train loss: tensor(12536.3818, grad_fn=<MseLossBackward0>)
train loss: tensor(6498.1011, grad_fn=<MseLossBackward0>)
train loss: tensor(4677.9341, grad_fn=<MseLossBackward0>)
train loss: tensor(3290.9873, grad_fn=<MseLossBackward0>)
train loss: tensor(7532.9897, grad_fn=<MseLossBackward0>)
train loss: tensor(14819.3223, grad_fn=<MseLossBackward0>)
train loss: tensor(6940.1572, grad_fn=<MseLossBackward0>)
train loss: tensor(8969.9316, grad_fn=<MseLossBackward0>)
train loss: tensor(2898.4751, grad_fn=<MseLossBackward0>)
train loss: tensor(8624.8438, grad_fn=<MseLossBackward0>)
train loss: tensor(7735.1709, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(10971.7324, grad_fn=<MseLossBackward0>)
train loss: tensor(12301.9004, grad_fn=<MseLossBackward0>)
train loss: tensor(5528.9976, grad_fn=<MseLossBackward0>)
train loss: tensor(4768.2759, grad_fn=<MseLossBackward0>)
train loss: tensor(15071.1553, grad_fn=<MseLossBackward0>)
train loss: tensor(6850.0483, grad_fn=<MseLossBackward0>)
train loss: tensor(20176.4902, grad_fn=<MseLossBackward0>)
train loss: tensor(4353.8179, grad_fn=<MseLossBackward0>)
train loss: tensor(8040.8032, grad_fn=<MseLossBackward0>)
train loss: tensor(3887.9790, grad_fn=<MseLossBackward0>)
train loss: tensor(6341.8149, grad_fn=<MseLossBackward0>)
train loss: tensor(12417.3291, grad_fn=<MseLossBackward0>)
train loss: tensor(10361.0312, grad_fn=<MseLossBackward0>)
train loss: tensor(9567.5850, grad_fn=<MseLossBackward0>)
train loss: tensor(17329.8848, grad_fn=<MseLossBackward0>)
train loss: tensor(17131.5488, grad_fn=<MseLossBackward0>)
train loss: tensor(12382.8779, grad_fn=<MseLossBackward0>)
train

train loss: tensor(4251.4849, grad_fn=<MseLossBackward0>)
train loss: tensor(12696.7002, grad_fn=<MseLossBackward0>)
train loss: tensor(15130.0596, grad_fn=<MseLossBackward0>)
train loss: tensor(4156.3008, grad_fn=<MseLossBackward0>)
train loss: tensor(11980.1387, grad_fn=<MseLossBackward0>)
train loss: tensor(17304.5664, grad_fn=<MseLossBackward0>)
train loss: tensor(23174.8809, grad_fn=<MseLossBackward0>)
train loss: tensor(6256.6572, grad_fn=<MseLossBackward0>)
train loss: tensor(8612.7148, grad_fn=<MseLossBackward0>)
train loss: tensor(11126.0352, grad_fn=<MseLossBackward0>)
train loss: tensor(4162.4448, grad_fn=<MseLossBackward0>)
train loss: tensor(8374.8994, grad_fn=<MseLossBackward0>)
train loss: tensor(17992.6074, grad_fn=<MseLossBackward0>)
train loss: tensor(9619.2393, grad_fn=<MseLossBackward0>)
train loss: tensor(13619.8701, grad_fn=<MseLossBackward0>)
train loss: tensor(7561.5801, grad_fn=<MseLossBackward0>)
train loss: tensor(87230.8672, grad_fn=<MseLossBackward0>)
train

train loss: tensor(17097.3750, grad_fn=<MseLossBackward0>)
train loss: tensor(20540.6465, grad_fn=<MseLossBackward0>)
train loss: tensor(24903.0195, grad_fn=<MseLossBackward0>)
train loss: tensor(11932.4609, grad_fn=<MseLossBackward0>)
train loss: tensor(7755.7661, grad_fn=<MseLossBackward0>)
train loss: tensor(15292.4951, grad_fn=<MseLossBackward0>)
train loss: tensor(15170.8125, grad_fn=<MseLossBackward0>)
train loss: tensor(64081.6445, grad_fn=<MseLossBackward0>)
train loss: tensor(20587.0215, grad_fn=<MseLossBackward0>)
train loss: tensor(15016.8418, grad_fn=<MseLossBackward0>)
train loss: tensor(9471.7783, grad_fn=<MseLossBackward0>)
train loss: tensor(4617.6006, grad_fn=<MseLossBackward0>)
train loss: tensor(12551.8242, grad_fn=<MseLossBackward0>)
train loss: tensor(11376.0078, grad_fn=<MseLossBackward0>)
train loss: tensor(2446.5806, grad_fn=<MseLossBackward0>)
train loss: tensor(8642.9297, grad_fn=<MseLossBackward0>)
train loss: tensor(14935.3418, grad_fn=<MseLossBackward0>)
tr

train loss: tensor(15209.7812, grad_fn=<MseLossBackward0>)
train loss: tensor(9705.6377, grad_fn=<MseLossBackward0>)
train loss: tensor(11820.0049, grad_fn=<MseLossBackward0>)
train loss: tensor(26129.8086, grad_fn=<MseLossBackward0>)
train loss: tensor(4751.1724, grad_fn=<MseLossBackward0>)
train loss: tensor(5110.0288, grad_fn=<MseLossBackward0>)
train loss: tensor(26054.3848, grad_fn=<MseLossBackward0>)
train loss: tensor(21978.7363, grad_fn=<MseLossBackward0>)
train loss: tensor(11981.9375, grad_fn=<MseLossBackward0>)
train loss: tensor(8712.9756, grad_fn=<MseLossBackward0>)
train loss: tensor(14592.0801, grad_fn=<MseLossBackward0>)
train loss: tensor(6438.8501, grad_fn=<MseLossBackward0>)
train loss: tensor(10351.6904, grad_fn=<MseLossBackward0>)
train loss: tensor(4883.1123, grad_fn=<MseLossBackward0>)
train loss: tensor(15768.6963, grad_fn=<MseLossBackward0>)
train loss: tensor(12441.9805, grad_fn=<MseLossBackward0>)
train loss: tensor(4888.6860, grad_fn=<MseLossBackward0>)
trai

train loss: tensor(4052.8416, grad_fn=<MseLossBackward0>)
train loss: tensor(10821.6172, grad_fn=<MseLossBackward0>)
train loss: tensor(18694.8555, grad_fn=<MseLossBackward0>)
train loss: tensor(7093.7729, grad_fn=<MseLossBackward0>)
train loss: tensor(6404.5063, grad_fn=<MseLossBackward0>)
train loss: tensor(3882.7048, grad_fn=<MseLossBackward0>)
train loss: tensor(21927.3926, grad_fn=<MseLossBackward0>)
train loss: tensor(7407.8931, grad_fn=<MseLossBackward0>)
train loss: tensor(14844.5342, grad_fn=<MseLossBackward0>)
train loss: tensor(5772.6455, grad_fn=<MseLossBackward0>)
train loss: tensor(5706.9922, grad_fn=<MseLossBackward0>)
train loss: tensor(18799.3516, grad_fn=<MseLossBackward0>)
train loss: tensor(6151.1328, grad_fn=<MseLossBackward0>)
train loss: tensor(9455.9922, grad_fn=<MseLossBackward0>)
train loss: tensor(16196.5117, grad_fn=<MseLossBackward0>)
train loss: tensor(17981.1367, grad_fn=<MseLossBackward0>)
train loss: tensor(10149.3770, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(8863.6611, grad_fn=<MseLossBackward0>)
train loss: tensor(7772.3027, grad_fn=<MseLossBackward0>)
train loss: tensor(12021.3516, grad_fn=<MseLossBackward0>)
train loss: tensor(17388.3516, grad_fn=<MseLossBackward0>)
train loss: tensor(15339.6816, grad_fn=<MseLossBackward0>)
train loss: tensor(5314.5610, grad_fn=<MseLossBackward0>)
train loss: tensor(10418.3906, grad_fn=<MseLossBackward0>)
train loss: tensor(19402.7188, grad_fn=<MseLossBackward0>)
train loss: tensor(28262.5195, grad_fn=<MseLossBackward0>)
train loss: tensor(19129.4160, grad_fn=<MseLossBackward0>)
train loss: tensor(5188.8740, grad_fn=<MseLossBackward0>)
train loss: tensor(13553.5928, grad_fn=<MseLossBackward0>)
train loss: tensor(11220.8027, grad_fn=<MseLossBackward0>)
train loss: tensor(5426.9033, grad_fn=<MseLossBackward0>)
train loss: tensor(10757.1309, grad_fn=<MseLossBackward0>)
train loss: tensor(7900.7988, grad_fn=<MseLossBackward0>)
train loss: tensor(10721.0547, grad_fn=<MseLossBackward0>)
tra

train loss: tensor(7374.9922, grad_fn=<MseLossBackward0>)
train loss: tensor(7359.2744, grad_fn=<MseLossBackward0>)
train loss: tensor(7728.5801, grad_fn=<MseLossBackward0>)
train loss: tensor(16769.5645, grad_fn=<MseLossBackward0>)
train loss: tensor(5815.7559, grad_fn=<MseLossBackward0>)
train loss: tensor(5139.9473, grad_fn=<MseLossBackward0>)
train loss: tensor(16280.0508, grad_fn=<MseLossBackward0>)
train loss: tensor(22893.9297, grad_fn=<MseLossBackward0>)
train loss: tensor(3885.6604, grad_fn=<MseLossBackward0>)
train loss: tensor(8338.2178, grad_fn=<MseLossBackward0>)
train loss: tensor(4927.6426, grad_fn=<MseLossBackward0>)
train loss: tensor(1726.1676, grad_fn=<MseLossBackward0>)
train loss: tensor(17186.3926, grad_fn=<MseLossBackward0>)
train loss: tensor(7354.7456, grad_fn=<MseLossBackward0>)
train loss: tensor(14001.4863, grad_fn=<MseLossBackward0>)
train loss: tensor(36224.9531, grad_fn=<MseLossBackward0>)
train loss: tensor(3900.1079, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(7872.1016, grad_fn=<MseLossBackward0>)
train loss: tensor(15587.4082, grad_fn=<MseLossBackward0>)
train loss: tensor(14621.9580, grad_fn=<MseLossBackward0>)
train loss: tensor(10562.1035, grad_fn=<MseLossBackward0>)
train loss: tensor(6928.7861, grad_fn=<MseLossBackward0>)
train loss: tensor(3214.9177, grad_fn=<MseLossBackward0>)
train loss: tensor(13642.5547, grad_fn=<MseLossBackward0>)
train loss: tensor(7511.2598, grad_fn=<MseLossBackward0>)
train loss: tensor(17312.4941, grad_fn=<MseLossBackward0>)
train loss: tensor(9346.0586, grad_fn=<MseLossBackward0>)
train loss: tensor(23348.2871, grad_fn=<MseLossBackward0>)
train loss: tensor(39105.7383, grad_fn=<MseLossBackward0>)
train loss: tensor(8799.8516, grad_fn=<MseLossBackward0>)
train loss: tensor(12035.8477, grad_fn=<MseLossBackward0>)
train loss: tensor(4109.7021, grad_fn=<MseLossBackward0>)
train loss: tensor(10260.6758, grad_fn=<MseLossBackward0>)
train loss: tensor(5656.4976, grad_fn=<MseLossBackward0>)
train

train loss: tensor(7805.7759, grad_fn=<MseLossBackward0>)
train loss: tensor(18744.8672, grad_fn=<MseLossBackward0>)
train loss: tensor(23230.3027, grad_fn=<MseLossBackward0>)
train loss: tensor(7873.5396, grad_fn=<MseLossBackward0>)
train loss: tensor(15764.9658, grad_fn=<MseLossBackward0>)
train loss: tensor(12565.1709, grad_fn=<MseLossBackward0>)
train loss: tensor(15336.4482, grad_fn=<MseLossBackward0>)
train loss: tensor(28610.4883, grad_fn=<MseLossBackward0>)
train loss: tensor(6488.5571, grad_fn=<MseLossBackward0>)
train loss: tensor(2935.8774, grad_fn=<MseLossBackward0>)
train loss: tensor(16778.9551, grad_fn=<MseLossBackward0>)
train loss: tensor(4952.8618, grad_fn=<MseLossBackward0>)
train loss: tensor(9003.1484, grad_fn=<MseLossBackward0>)
train loss: tensor(20186.1191, grad_fn=<MseLossBackward0>)
train loss: tensor(8877.2783, grad_fn=<MseLossBackward0>)
train loss: tensor(4554.1582, grad_fn=<MseLossBackward0>)
train loss: tensor(10408.1973, grad_fn=<MseLossBackward0>)
train

train loss: tensor(2554.7891, grad_fn=<MseLossBackward0>)
train loss: tensor(9854.9326, grad_fn=<MseLossBackward0>)
train loss: tensor(10254.8242, grad_fn=<MseLossBackward0>)
train loss: tensor(13329.5928, grad_fn=<MseLossBackward0>)
train loss: tensor(21128.2930, grad_fn=<MseLossBackward0>)
train loss: tensor(11777.6094, grad_fn=<MseLossBackward0>)
train loss: tensor(20807.0195, grad_fn=<MseLossBackward0>)
train loss: tensor(14581.2451, grad_fn=<MseLossBackward0>)
train loss: tensor(2902.8574, grad_fn=<MseLossBackward0>)
train loss: tensor(6718.4995, grad_fn=<MseLossBackward0>)
train loss: tensor(24007.4844, grad_fn=<MseLossBackward0>)
train loss: tensor(4274.3809, grad_fn=<MseLossBackward0>)
train loss: tensor(10087.5176, grad_fn=<MseLossBackward0>)
train loss: tensor(6959.7905, grad_fn=<MseLossBackward0>)
train loss: tensor(11290.3613, grad_fn=<MseLossBackward0>)
train loss: tensor(13973.0996, grad_fn=<MseLossBackward0>)
train loss: tensor(10692.2158, grad_fn=<MseLossBackward0>)
tra

train loss: tensor(20308.3535, grad_fn=<MseLossBackward0>)
train loss: tensor(15193.7021, grad_fn=<MseLossBackward0>)
train loss: tensor(14993.9961, grad_fn=<MseLossBackward0>)
train loss: tensor(20486.1504, grad_fn=<MseLossBackward0>)
train loss: tensor(5158.8315, grad_fn=<MseLossBackward0>)
train loss: tensor(10544.4238, grad_fn=<MseLossBackward0>)
train loss: tensor(11898.9238, grad_fn=<MseLossBackward0>)
train loss: tensor(14671.7607, grad_fn=<MseLossBackward0>)
train loss: tensor(16011.2979, grad_fn=<MseLossBackward0>)
train loss: tensor(4593.3379, grad_fn=<MseLossBackward0>)
train loss: tensor(5814.0859, grad_fn=<MseLossBackward0>)
train loss: tensor(3036.9558, grad_fn=<MseLossBackward0>)
train loss: tensor(17688.4062, grad_fn=<MseLossBackward0>)
train loss: tensor(5761.7114, grad_fn=<MseLossBackward0>)
train loss: tensor(10272.9551, grad_fn=<MseLossBackward0>)
train loss: tensor(15066.2686, grad_fn=<MseLossBackward0>)
train loss: tensor(19607.8047, grad_fn=<MseLossBackward0>)
tr

train loss: tensor(4308.1362, grad_fn=<MseLossBackward0>)
train loss: tensor(4973.2397, grad_fn=<MseLossBackward0>)
train loss: tensor(4302.1846, grad_fn=<MseLossBackward0>)
train loss: tensor(10954.2695, grad_fn=<MseLossBackward0>)
train loss: tensor(12061.6719, grad_fn=<MseLossBackward0>)
train loss: tensor(8368.6289, grad_fn=<MseLossBackward0>)
train loss: tensor(11891.5879, grad_fn=<MseLossBackward0>)
train loss: tensor(10178.0977, grad_fn=<MseLossBackward0>)
train loss: tensor(15499.2344, grad_fn=<MseLossBackward0>)
train loss: tensor(17908.5684, grad_fn=<MseLossBackward0>)
train loss: tensor(17921.4082, grad_fn=<MseLossBackward0>)
train loss: tensor(17576.6445, grad_fn=<MseLossBackward0>)
train loss: tensor(10662.9678, grad_fn=<MseLossBackward0>)
train loss: tensor(16041.0889, grad_fn=<MseLossBackward0>)
train loss: tensor(98256.6016, grad_fn=<MseLossBackward0>)
train loss: tensor(12205.8682, grad_fn=<MseLossBackward0>)
train loss: tensor(6858.1089, grad_fn=<MseLossBackward0>)
tr

train loss: tensor(19297.4473, grad_fn=<MseLossBackward0>)
train loss: tensor(12135.3164, grad_fn=<MseLossBackward0>)
train loss: tensor(6778.6309, grad_fn=<MseLossBackward0>)
train loss: tensor(6528.7080, grad_fn=<MseLossBackward0>)
train loss: tensor(6640.0088, grad_fn=<MseLossBackward0>)
train loss: tensor(15548.4912, grad_fn=<MseLossBackward0>)
train loss: tensor(3130.8435, grad_fn=<MseLossBackward0>)
train loss: tensor(6756.9580, grad_fn=<MseLossBackward0>)
train loss: tensor(4001.3318, grad_fn=<MseLossBackward0>)
train loss: tensor(17844.8672, grad_fn=<MseLossBackward0>)
train loss: tensor(6575.7358, grad_fn=<MseLossBackward0>)
train loss: tensor(12499.9648, grad_fn=<MseLossBackward0>)
train loss: tensor(105088.5703, grad_fn=<MseLossBackward0>)
train loss: tensor(18998.0391, grad_fn=<MseLossBackward0>)
train loss: tensor(5809.1665, grad_fn=<MseLossBackward0>)
train loss: tensor(48404.6367, grad_fn=<MseLossBackward0>)
train loss: tensor(12581.8906, grad_fn=<MseLossBackward0>)
trai

train loss: tensor(56135.9961, grad_fn=<MseLossBackward0>)
train loss: tensor(14731.9736, grad_fn=<MseLossBackward0>)
train loss: tensor(18746.0605, grad_fn=<MseLossBackward0>)
train loss: tensor(13828.7695, grad_fn=<MseLossBackward0>)
train loss: tensor(7505.8335, grad_fn=<MseLossBackward0>)
train loss: tensor(9036.4854, grad_fn=<MseLossBackward0>)
train loss: tensor(8674., grad_fn=<MseLossBackward0>)
train loss: tensor(2082.6499, grad_fn=<MseLossBackward0>)
train loss: tensor(17333.7734, grad_fn=<MseLossBackward0>)
train loss: tensor(7261.4834, grad_fn=<MseLossBackward0>)
train loss: tensor(7192.6631, grad_fn=<MseLossBackward0>)
train loss: tensor(10645.0977, grad_fn=<MseLossBackward0>)
train loss: tensor(16348.9834, grad_fn=<MseLossBackward0>)
train loss: tensor(4091.3206, grad_fn=<MseLossBackward0>)
train loss: tensor(5833.8828, grad_fn=<MseLossBackward0>)
train loss: tensor(3913.1782, grad_fn=<MseLossBackward0>)
train loss: tensor(4540.2158, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(8043.1611, grad_fn=<MseLossBackward0>)
train loss: tensor(9574.1387, grad_fn=<MseLossBackward0>)
train loss: tensor(7052.8921, grad_fn=<MseLossBackward0>)
train loss: tensor(2558.5383, grad_fn=<MseLossBackward0>)
train loss: tensor(8267.5488, grad_fn=<MseLossBackward0>)
train loss: tensor(4978.3018, grad_fn=<MseLossBackward0>)
train loss: tensor(20627.9863, grad_fn=<MseLossBackward0>)
train loss: tensor(19353.6660, grad_fn=<MseLossBackward0>)
train loss: tensor(19268.9023, grad_fn=<MseLossBackward0>)
train loss: tensor(7605.1704, grad_fn=<MseLossBackward0>)
train loss: tensor(5073.6426, grad_fn=<MseLossBackward0>)
train loss: tensor(13699.3770, grad_fn=<MseLossBackward0>)
train loss: tensor(11629.2354, grad_fn=<MseLossBackward0>)
train loss: tensor(13624.1016, grad_fn=<MseLossBackward0>)
train loss: tensor(5045.2017, grad_fn=<MseLossBackward0>)
train loss: tensor(3568.2830, grad_fn=<MseLossBackward0>)
train loss: tensor(6006.8462, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(8459.3535, grad_fn=<MseLossBackward0>)
train loss: tensor(11492.9619, grad_fn=<MseLossBackward0>)
train loss: tensor(9272.8242, grad_fn=<MseLossBackward0>)
train loss: tensor(13277.4238, grad_fn=<MseLossBackward0>)
train loss: tensor(5680.2773, grad_fn=<MseLossBackward0>)
train loss: tensor(16502.5371, grad_fn=<MseLossBackward0>)
train loss: tensor(12247.6104, grad_fn=<MseLossBackward0>)
train loss: tensor(3950.5984, grad_fn=<MseLossBackward0>)
train loss: tensor(22007.5410, grad_fn=<MseLossBackward0>)
train loss: tensor(1886.4081, grad_fn=<MseLossBackward0>)
train loss: tensor(6778.9722, grad_fn=<MseLossBackward0>)
train loss: tensor(14801.1777, grad_fn=<MseLossBackward0>)
train loss: tensor(7623.9224, grad_fn=<MseLossBackward0>)
train loss: tensor(3460.6321, grad_fn=<MseLossBackward0>)
train loss: tensor(5522.0068, grad_fn=<MseLossBackward0>)
train loss: tensor(8102.3110, grad_fn=<MseLossBackward0>)
train loss: tensor(16315.3477, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(8736.4268, grad_fn=<MseLossBackward0>)
train loss: tensor(9360.1162, grad_fn=<MseLossBackward0>)
train loss: tensor(4940.5903, grad_fn=<MseLossBackward0>)
train loss: tensor(8318.7705, grad_fn=<MseLossBackward0>)
train loss: tensor(11625.4668, grad_fn=<MseLossBackward0>)
train loss: tensor(5521.1699, grad_fn=<MseLossBackward0>)
train loss: tensor(10223.2100, grad_fn=<MseLossBackward0>)
train loss: tensor(8343.5029, grad_fn=<MseLossBackward0>)
train loss: tensor(8360.0205, grad_fn=<MseLossBackward0>)
train loss: tensor(17472.4766, grad_fn=<MseLossBackward0>)
train loss: tensor(20488.5117, grad_fn=<MseLossBackward0>)
train loss: tensor(15433.0586, grad_fn=<MseLossBackward0>)
train loss: tensor(10885.5332, grad_fn=<MseLossBackward0>)
train loss: tensor(7063.3926, grad_fn=<MseLossBackward0>)
train loss: tensor(3200.3318, grad_fn=<MseLossBackward0>)
train loss: tensor(7586.9116, grad_fn=<MseLossBackward0>)
train loss: tensor(7495.5195, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(12481.5957, grad_fn=<MseLossBackward0>)
train loss: tensor(10163.3516, grad_fn=<MseLossBackward0>)
train loss: tensor(5312.1270, grad_fn=<MseLossBackward0>)
train loss: tensor(6061.1079, grad_fn=<MseLossBackward0>)
train loss: tensor(13397.9072, grad_fn=<MseLossBackward0>)
train loss: tensor(17208.3945, grad_fn=<MseLossBackward0>)
train loss: tensor(9201.3535, grad_fn=<MseLossBackward0>)
train loss: tensor(6778.7773, grad_fn=<MseLossBackward0>)
train loss: tensor(3164.4434, grad_fn=<MseLossBackward0>)
train loss: tensor(8306.7949, grad_fn=<MseLossBackward0>)
train loss: tensor(8376.1523, grad_fn=<MseLossBackward0>)
train loss: tensor(2017.9344, grad_fn=<MseLossBackward0>)
train loss: tensor(7493.5942, grad_fn=<MseLossBackward0>)
train loss: tensor(4933.0405, grad_fn=<MseLossBackward0>)
train loss: tensor(12807.0703, grad_fn=<MseLossBackward0>)
train loss: tensor(7334.1147, grad_fn=<MseLossBackward0>)
train loss: tensor(11940.4170, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(10860.8643, grad_fn=<MseLossBackward0>)
train loss: tensor(20272.1562, grad_fn=<MseLossBackward0>)
train loss: tensor(16807.5488, grad_fn=<MseLossBackward0>)
train loss: tensor(4368.9868, grad_fn=<MseLossBackward0>)
train loss: tensor(6403.0669, grad_fn=<MseLossBackward0>)
train loss: tensor(11105.1436, grad_fn=<MseLossBackward0>)
train loss: tensor(10399.3398, grad_fn=<MseLossBackward0>)
train loss: tensor(6066.9141, grad_fn=<MseLossBackward0>)
train loss: tensor(2807.8909, grad_fn=<MseLossBackward0>)
train loss: tensor(19618.9922, grad_fn=<MseLossBackward0>)
train loss: tensor(8703.6797, grad_fn=<MseLossBackward0>)
train loss: tensor(15117.9414, grad_fn=<MseLossBackward0>)
train loss: tensor(7863.2378, grad_fn=<MseLossBackward0>)
train loss: tensor(5151.1597, grad_fn=<MseLossBackward0>)
train loss: tensor(16537.5312, grad_fn=<MseLossBackward0>)
train loss: tensor(5085.4438, grad_fn=<MseLossBackward0>)
train loss: tensor(8923.9561, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(19509.2598, grad_fn=<MseLossBackward0>)
train loss: tensor(4386.0073, grad_fn=<MseLossBackward0>)
train loss: tensor(5819.5166, grad_fn=<MseLossBackward0>)
train loss: tensor(10190.5215, grad_fn=<MseLossBackward0>)
train loss: tensor(16070.2490, grad_fn=<MseLossBackward0>)
train loss: tensor(5951.8613, grad_fn=<MseLossBackward0>)
train loss: tensor(7969.6602, grad_fn=<MseLossBackward0>)
train loss: tensor(8264.6963, grad_fn=<MseLossBackward0>)
train loss: tensor(6585.6528, grad_fn=<MseLossBackward0>)
train loss: tensor(6861.2964, grad_fn=<MseLossBackward0>)
train loss: tensor(4652.6782, grad_fn=<MseLossBackward0>)
train loss: tensor(4450.3452, grad_fn=<MseLossBackward0>)
train loss: tensor(6408.6382, grad_fn=<MseLossBackward0>)
train loss: tensor(9299.2578, grad_fn=<MseLossBackward0>)
train loss: tensor(14603.3125, grad_fn=<MseLossBackward0>)
train loss: tensor(8713.7598, grad_fn=<MseLossBackward0>)
train loss: tensor(9385.0830, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(17137.4258, grad_fn=<MseLossBackward0>)
train loss: tensor(19970.0371, grad_fn=<MseLossBackward0>)
train loss: tensor(11021.1670, grad_fn=<MseLossBackward0>)
train loss: tensor(11579.0635, grad_fn=<MseLossBackward0>)
train loss: tensor(9599.3750, grad_fn=<MseLossBackward0>)
train loss: tensor(8488.7002, grad_fn=<MseLossBackward0>)
train loss: tensor(10546.0068, grad_fn=<MseLossBackward0>)
train loss: tensor(2286.5818, grad_fn=<MseLossBackward0>)
train loss: tensor(13865.3906, grad_fn=<MseLossBackward0>)
train loss: tensor(7509.1616, grad_fn=<MseLossBackward0>)
train loss: tensor(7181.7163, grad_fn=<MseLossBackward0>)
train loss: tensor(12896.8115, grad_fn=<MseLossBackward0>)
train loss: tensor(6352.4897, grad_fn=<MseLossBackward0>)
train loss: tensor(8322.8008, grad_fn=<MseLossBackward0>)
train loss: tensor(15340.8857, grad_fn=<MseLossBackward0>)
train loss: tensor(8550.0410, grad_fn=<MseLossBackward0>)
train loss: tensor(6207.6631, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(9203.9932, grad_fn=<MseLossBackward0>)
train loss: tensor(13246.2510, grad_fn=<MseLossBackward0>)
train loss: tensor(18570.2871, grad_fn=<MseLossBackward0>)
train loss: tensor(7246.1035, grad_fn=<MseLossBackward0>)
train loss: tensor(14517.4629, grad_fn=<MseLossBackward0>)
train loss: tensor(9383.1553, grad_fn=<MseLossBackward0>)
train loss: tensor(5854.0562, grad_fn=<MseLossBackward0>)
train loss: tensor(8723.8857, grad_fn=<MseLossBackward0>)
train loss: tensor(5564.2959, grad_fn=<MseLossBackward0>)
train loss: tensor(2892.5815, grad_fn=<MseLossBackward0>)
train loss: tensor(6506.8125, grad_fn=<MseLossBackward0>)
train loss: tensor(23213.7402, grad_fn=<MseLossBackward0>)
train loss: tensor(3311.1565, grad_fn=<MseLossBackward0>)
train loss: tensor(10779.0625, grad_fn=<MseLossBackward0>)
train loss: tensor(5366.8193, grad_fn=<MseLossBackward0>)
train loss: tensor(15012.0928, grad_fn=<MseLossBackward0>)
train loss: tensor(6344.9111, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(10337.9131, grad_fn=<MseLossBackward0>)
train loss: tensor(11604.7383, grad_fn=<MseLossBackward0>)
train loss: tensor(13304.9502, grad_fn=<MseLossBackward0>)
train loss: tensor(78084.7734, grad_fn=<MseLossBackward0>)
train loss: tensor(19712.5801, grad_fn=<MseLossBackward0>)
train loss: tensor(7400.1475, grad_fn=<MseLossBackward0>)
train loss: tensor(7814.7012, grad_fn=<MseLossBackward0>)
train loss: tensor(6508.0059, grad_fn=<MseLossBackward0>)
train loss: tensor(4786.9683, grad_fn=<MseLossBackward0>)
train loss: tensor(17461.3672, grad_fn=<MseLossBackward0>)
train loss: tensor(108257.8594, grad_fn=<MseLossBackward0>)
train loss: tensor(4373.4272, grad_fn=<MseLossBackward0>)
train loss: tensor(24921.6211, grad_fn=<MseLossBackward0>)
train loss: tensor(4354.8447, grad_fn=<MseLossBackward0>)
train loss: tensor(19303.4082, grad_fn=<MseLossBackward0>)
train loss: tensor(16602.3809, grad_fn=<MseLossBackward0>)
train loss: tensor(6063.9331, grad_fn=<MseLossBackward0>)
tra

train loss: tensor(12289.2764, grad_fn=<MseLossBackward0>)
train loss: tensor(14312.1182, grad_fn=<MseLossBackward0>)
train loss: tensor(9580.0137, grad_fn=<MseLossBackward0>)
train loss: tensor(4086.5894, grad_fn=<MseLossBackward0>)
train loss: tensor(6888.5864, grad_fn=<MseLossBackward0>)
train loss: tensor(11353.1924, grad_fn=<MseLossBackward0>)
train loss: tensor(4189.3276, grad_fn=<MseLossBackward0>)
train loss: tensor(14437.1064, grad_fn=<MseLossBackward0>)
train loss: tensor(21223.9512, grad_fn=<MseLossBackward0>)
train loss: tensor(8359.2012, grad_fn=<MseLossBackward0>)
train loss: tensor(16763.2168, grad_fn=<MseLossBackward0>)
train loss: tensor(4153.6094, grad_fn=<MseLossBackward0>)
train loss: tensor(9683.4902, grad_fn=<MseLossBackward0>)
train loss: tensor(2298.2612, grad_fn=<MseLossBackward0>)
train loss: tensor(7332.5508, grad_fn=<MseLossBackward0>)
train loss: tensor(12178.9756, grad_fn=<MseLossBackward0>)
train loss: tensor(3933.2637, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(13485.7578, grad_fn=<MseLossBackward0>)
train loss: tensor(14148.4121, grad_fn=<MseLossBackward0>)
train loss: tensor(13408.0762, grad_fn=<MseLossBackward0>)
train loss: tensor(6987.1958, grad_fn=<MseLossBackward0>)
train loss: tensor(7029.8442, grad_fn=<MseLossBackward0>)
train loss: tensor(12241.9697, grad_fn=<MseLossBackward0>)
train loss: tensor(14032.0459, grad_fn=<MseLossBackward0>)
train loss: tensor(12585.4951, grad_fn=<MseLossBackward0>)
train loss: tensor(53395.4766, grad_fn=<MseLossBackward0>)
train loss: tensor(17627.1074, grad_fn=<MseLossBackward0>)
train loss: tensor(15797.7812, grad_fn=<MseLossBackward0>)
train loss: tensor(24557.6289, grad_fn=<MseLossBackward0>)
train loss: tensor(10611.9033, grad_fn=<MseLossBackward0>)
train loss: tensor(5095.2646, grad_fn=<MseLossBackward0>)
train loss: tensor(6714.2505, grad_fn=<MseLossBackward0>)
train loss: tensor(3737.3762, grad_fn=<MseLossBackward0>)
train loss: tensor(21342.3125, grad_fn=<MseLossBackward0>)
tr

train loss: tensor(12484.5850, grad_fn=<MseLossBackward0>)
train loss: tensor(52428.2461, grad_fn=<MseLossBackward0>)
train loss: tensor(7384.5649, grad_fn=<MseLossBackward0>)
train loss: tensor(7510.2402, grad_fn=<MseLossBackward0>)
train loss: tensor(7726.3535, grad_fn=<MseLossBackward0>)
train loss: tensor(19506.0234, grad_fn=<MseLossBackward0>)
train loss: tensor(13069.7852, grad_fn=<MseLossBackward0>)
train loss: tensor(4190.0615, grad_fn=<MseLossBackward0>)
train loss: tensor(10145.8955, grad_fn=<MseLossBackward0>)
train loss: tensor(3813.7808, grad_fn=<MseLossBackward0>)
train loss: tensor(2372.1733, grad_fn=<MseLossBackward0>)
train loss: tensor(10330.5664, grad_fn=<MseLossBackward0>)
train loss: tensor(7199.3804, grad_fn=<MseLossBackward0>)
train loss: tensor(13498.0576, grad_fn=<MseLossBackward0>)
train loss: tensor(3575.1619, grad_fn=<MseLossBackward0>)
train loss: tensor(7712.1616, grad_fn=<MseLossBackward0>)
train loss: tensor(11529.6006, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(7589.9902, grad_fn=<MseLossBackward0>)
train loss: tensor(2394.8513, grad_fn=<MseLossBackward0>)
train loss: tensor(10133.5283, grad_fn=<MseLossBackward0>)
train loss: tensor(6603.0508, grad_fn=<MseLossBackward0>)
train loss: tensor(3906.2812, grad_fn=<MseLossBackward0>)
train loss: tensor(4332.9443, grad_fn=<MseLossBackward0>)
train loss: tensor(15436.3418, grad_fn=<MseLossBackward0>)
train loss: tensor(7316.5322, grad_fn=<MseLossBackward0>)
train loss: tensor(10640.8457, grad_fn=<MseLossBackward0>)
train loss: tensor(80952.8359, grad_fn=<MseLossBackward0>)
train loss: tensor(6345.7578, grad_fn=<MseLossBackward0>)
train loss: tensor(5125.2285, grad_fn=<MseLossBackward0>)
train loss: tensor(9986.4248, grad_fn=<MseLossBackward0>)
train loss: tensor(12124.6230, grad_fn=<MseLossBackward0>)
train loss: tensor(11130.5498, grad_fn=<MseLossBackward0>)
train loss: tensor(3356.4465, grad_fn=<MseLossBackward0>)
train loss: tensor(4693.1191, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(11874.5088, grad_fn=<MseLossBackward0>)
train loss: tensor(5261.6284, grad_fn=<MseLossBackward0>)
train loss: tensor(14127.9248, grad_fn=<MseLossBackward0>)
train loss: tensor(24400.3301, grad_fn=<MseLossBackward0>)
train loss: tensor(13385.3955, grad_fn=<MseLossBackward0>)
train loss: tensor(11613.6553, grad_fn=<MseLossBackward0>)
train loss: tensor(12839.9238, grad_fn=<MseLossBackward0>)
train loss: tensor(5908.4346, grad_fn=<MseLossBackward0>)
train loss: tensor(10058.5273, grad_fn=<MseLossBackward0>)
train loss: tensor(6111.5918, grad_fn=<MseLossBackward0>)
train loss: tensor(11885.2070, grad_fn=<MseLossBackward0>)
train loss: tensor(8807.4434, grad_fn=<MseLossBackward0>)
train loss: tensor(100061.3125, grad_fn=<MseLossBackward0>)
train loss: tensor(12269.8828, grad_fn=<MseLossBackward0>)
train loss: tensor(20021.4395, grad_fn=<MseLossBackward0>)
train loss: tensor(6672.9180, grad_fn=<MseLossBackward0>)
train loss: tensor(12780.8711, grad_fn=<MseLossBackward0>)
t

train loss: tensor(18832.9199, grad_fn=<MseLossBackward0>)
train loss: tensor(28332.0156, grad_fn=<MseLossBackward0>)
train loss: tensor(5989.7837, grad_fn=<MseLossBackward0>)
train loss: tensor(13718.0938, grad_fn=<MseLossBackward0>)
train loss: tensor(983.9518, grad_fn=<MseLossBackward0>)
train loss: tensor(13870.0088, grad_fn=<MseLossBackward0>)
train loss: tensor(13415.5205, grad_fn=<MseLossBackward0>)
train loss: tensor(8794.4375, grad_fn=<MseLossBackward0>)
train loss: tensor(10638.5977, grad_fn=<MseLossBackward0>)
train loss: tensor(7863.0444, grad_fn=<MseLossBackward0>)
train loss: tensor(15991.2354, grad_fn=<MseLossBackward0>)
train loss: tensor(24434.9590, grad_fn=<MseLossBackward0>)
train loss: tensor(7408.6621, grad_fn=<MseLossBackward0>)
train loss: tensor(6318.8745, grad_fn=<MseLossBackward0>)
train loss: tensor(10283.8516, grad_fn=<MseLossBackward0>)
train loss: tensor(15017.0742, grad_fn=<MseLossBackward0>)
train loss: tensor(5201.0571, grad_fn=<MseLossBackward0>)
train

train loss: tensor(16605.5605, grad_fn=<MseLossBackward0>)
train loss: tensor(19357.1914, grad_fn=<MseLossBackward0>)
train loss: tensor(3859.0806, grad_fn=<MseLossBackward0>)
train loss: tensor(8871.6221, grad_fn=<MseLossBackward0>)
train loss: tensor(11731.5088, grad_fn=<MseLossBackward0>)
train loss: tensor(14145.5098, grad_fn=<MseLossBackward0>)
train loss: tensor(10695.9082, grad_fn=<MseLossBackward0>)
train loss: tensor(17380.0742, grad_fn=<MseLossBackward0>)
train loss: tensor(15063.6963, grad_fn=<MseLossBackward0>)
train loss: tensor(9227.4502, grad_fn=<MseLossBackward0>)
train loss: tensor(16719.6328, grad_fn=<MseLossBackward0>)
train loss: tensor(3644.6011, grad_fn=<MseLossBackward0>)
train loss: tensor(14397.6973, grad_fn=<MseLossBackward0>)
train loss: tensor(8132.2085, grad_fn=<MseLossBackward0>)
train loss: tensor(4066.5459, grad_fn=<MseLossBackward0>)
train loss: tensor(8465.7637, grad_fn=<MseLossBackward0>)
train loss: tensor(4313.9619, grad_fn=<MseLossBackward0>)
train

train loss: tensor(16578.1094, grad_fn=<MseLossBackward0>)
train loss: tensor(4808.6738, grad_fn=<MseLossBackward0>)
train loss: tensor(8401.6318, grad_fn=<MseLossBackward0>)
train loss: tensor(6430.8569, grad_fn=<MseLossBackward0>)
train loss: tensor(47467.3555, grad_fn=<MseLossBackward0>)
train loss: tensor(9709.9258, grad_fn=<MseLossBackward0>)
train loss: tensor(7507.4712, grad_fn=<MseLossBackward0>)
train loss: tensor(13580.5469, grad_fn=<MseLossBackward0>)
train loss: tensor(7276.5669, grad_fn=<MseLossBackward0>)
train loss: tensor(10373.8965, grad_fn=<MseLossBackward0>)
train loss: tensor(6568.0532, grad_fn=<MseLossBackward0>)
train loss: tensor(11155.8877, grad_fn=<MseLossBackward0>)
train loss: tensor(7977.8755, grad_fn=<MseLossBackward0>)
train loss: tensor(4467.4810, grad_fn=<MseLossBackward0>)
train loss: tensor(19010.7734, grad_fn=<MseLossBackward0>)
train loss: tensor(19795.9570, grad_fn=<MseLossBackward0>)
train loss: tensor(11854.9443, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(16919.0117, grad_fn=<MseLossBackward0>)
train loss: tensor(10953.6729, grad_fn=<MseLossBackward0>)
train loss: tensor(7129.4878, grad_fn=<MseLossBackward0>)
train loss: tensor(6338.6646, grad_fn=<MseLossBackward0>)
train loss: tensor(6585.7139, grad_fn=<MseLossBackward0>)
train loss: tensor(15690.0391, grad_fn=<MseLossBackward0>)
train loss: tensor(12636.1807, grad_fn=<MseLossBackward0>)
train loss: tensor(9892.5010, grad_fn=<MseLossBackward0>)
train loss: tensor(4799.4199, grad_fn=<MseLossBackward0>)
train loss: tensor(2031.7488, grad_fn=<MseLossBackward0>)
train loss: tensor(8635.2480, grad_fn=<MseLossBackward0>)
train loss: tensor(77849.1875, grad_fn=<MseLossBackward0>)
train loss: tensor(17239.5352, grad_fn=<MseLossBackward0>)
train loss: tensor(11582.4551, grad_fn=<MseLossBackward0>)
train loss: tensor(9273.3164, grad_fn=<MseLossBackward0>)
train loss: tensor(5621.2524, grad_fn=<MseLossBackward0>)
train loss: tensor(4819.6631, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(5587.0854, grad_fn=<MseLossBackward0>)
train loss: tensor(11610.2422, grad_fn=<MseLossBackward0>)
train loss: tensor(9221.0176, grad_fn=<MseLossBackward0>)
train loss: tensor(12794.3535, grad_fn=<MseLossBackward0>)
train loss: tensor(9558.3252, grad_fn=<MseLossBackward0>)
train loss: tensor(5330.3398, grad_fn=<MseLossBackward0>)
train loss: tensor(5296.4868, grad_fn=<MseLossBackward0>)
train loss: tensor(12675.5703, grad_fn=<MseLossBackward0>)
train loss: tensor(3954.9041, grad_fn=<MseLossBackward0>)
train loss: tensor(9776.8145, grad_fn=<MseLossBackward0>)
train loss: tensor(4287.1992, grad_fn=<MseLossBackward0>)
train loss: tensor(5546.1914, grad_fn=<MseLossBackward0>)
train loss: tensor(10430.1436, grad_fn=<MseLossBackward0>)
train loss: tensor(15390.0801, grad_fn=<MseLossBackward0>)
train loss: tensor(3375.7646, grad_fn=<MseLossBackward0>)
train loss: tensor(18799.2617, grad_fn=<MseLossBackward0>)
train loss: tensor(10432.0283, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(5580.2744, grad_fn=<MseLossBackward0>)
train loss: tensor(4740.7925, grad_fn=<MseLossBackward0>)
train loss: tensor(12845.0508, grad_fn=<MseLossBackward0>)
train loss: tensor(8263.7949, grad_fn=<MseLossBackward0>)
train loss: tensor(4937.0371, grad_fn=<MseLossBackward0>)
train loss: tensor(9763.4717, grad_fn=<MseLossBackward0>)
train loss: tensor(4316.8032, grad_fn=<MseLossBackward0>)
train loss: tensor(9020.8936, grad_fn=<MseLossBackward0>)
train loss: tensor(5606.5400, grad_fn=<MseLossBackward0>)
train loss: tensor(13751.0791, grad_fn=<MseLossBackward0>)
train loss: tensor(7828.6406, grad_fn=<MseLossBackward0>)
train loss: tensor(20807.4609, grad_fn=<MseLossBackward0>)
train loss: tensor(16103.3945, grad_fn=<MseLossBackward0>)
train loss: tensor(14259.4512, grad_fn=<MseLossBackward0>)
train loss: tensor(17500.8672, grad_fn=<MseLossBackward0>)
train loss: tensor(7057.3569, grad_fn=<MseLossBackward0>)
train loss: tensor(12173.8525, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(16232.7969, grad_fn=<MseLossBackward0>)
train loss: tensor(16905.7012, grad_fn=<MseLossBackward0>)
train loss: tensor(3179.7666, grad_fn=<MseLossBackward0>)
train loss: tensor(6545.7690, grad_fn=<MseLossBackward0>)
train loss: tensor(7606.1655, grad_fn=<MseLossBackward0>)
train loss: tensor(16615.0547, grad_fn=<MseLossBackward0>)
train loss: tensor(15154.7441, grad_fn=<MseLossBackward0>)
train loss: tensor(7367.3296, grad_fn=<MseLossBackward0>)
train loss: tensor(11638.9873, grad_fn=<MseLossBackward0>)
train loss: tensor(8709.5850, grad_fn=<MseLossBackward0>)
train loss: tensor(17978.7207, grad_fn=<MseLossBackward0>)
train loss: tensor(4624.2466, grad_fn=<MseLossBackward0>)
train loss: tensor(23808.5684, grad_fn=<MseLossBackward0>)
train loss: tensor(4423.0132, grad_fn=<MseLossBackward0>)
train loss: tensor(6208.3335, grad_fn=<MseLossBackward0>)
train loss: tensor(5053.1699, grad_fn=<MseLossBackward0>)
train loss: tensor(6183.7603, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(5680.8472, grad_fn=<MseLossBackward0>)
train loss: tensor(10353.4648, grad_fn=<MseLossBackward0>)
train loss: tensor(5253.2881, grad_fn=<MseLossBackward0>)
train loss: tensor(8961.8096, grad_fn=<MseLossBackward0>)
train loss: tensor(10266.9072, grad_fn=<MseLossBackward0>)
train loss: tensor(7026.6528, grad_fn=<MseLossBackward0>)
train loss: tensor(10552.5391, grad_fn=<MseLossBackward0>)
train loss: tensor(21437.7246, grad_fn=<MseLossBackward0>)
train loss: tensor(7673.7964, grad_fn=<MseLossBackward0>)
train loss: tensor(8816.5586, grad_fn=<MseLossBackward0>)
train loss: tensor(9363.1426, grad_fn=<MseLossBackward0>)
train loss: tensor(4374.2959, grad_fn=<MseLossBackward0>)
train loss: tensor(27361.5938, grad_fn=<MseLossBackward0>)
train loss: tensor(6529.9028, grad_fn=<MseLossBackward0>)
train loss: tensor(18768.9980, grad_fn=<MseLossBackward0>)
train loss: tensor(10164.0869, grad_fn=<MseLossBackward0>)
train loss: tensor(10400.5098, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(12463.0801, grad_fn=<MseLossBackward0>)
train loss: tensor(6369.5762, grad_fn=<MseLossBackward0>)
train loss: tensor(9052.8398, grad_fn=<MseLossBackward0>)
train loss: tensor(4431.0151, grad_fn=<MseLossBackward0>)
train loss: tensor(10002.5791, grad_fn=<MseLossBackward0>)
train loss: tensor(11436.7744, grad_fn=<MseLossBackward0>)
train loss: tensor(8109.5864, grad_fn=<MseLossBackward0>)
train loss: tensor(7394.8188, grad_fn=<MseLossBackward0>)
train loss: tensor(8036.5566, grad_fn=<MseLossBackward0>)
train loss: tensor(13649.9590, grad_fn=<MseLossBackward0>)
train loss: tensor(7487.4648, grad_fn=<MseLossBackward0>)
train loss: tensor(13550.5664, grad_fn=<MseLossBackward0>)
train loss: tensor(10349.2910, grad_fn=<MseLossBackward0>)
train loss: tensor(15314.6357, grad_fn=<MseLossBackward0>)
train loss: tensor(7773.0249, grad_fn=<MseLossBackward0>)
train loss: tensor(5888.5356, grad_fn=<MseLossBackward0>)
train loss: tensor(7756.5879, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(5956.1245, grad_fn=<MseLossBackward0>)
train loss: tensor(81175.8125, grad_fn=<MseLossBackward0>)
train loss: tensor(13920.9160, grad_fn=<MseLossBackward0>)
train loss: tensor(6770.4375, grad_fn=<MseLossBackward0>)
train loss: tensor(9047.4736, grad_fn=<MseLossBackward0>)
train loss: tensor(3093.2014, grad_fn=<MseLossBackward0>)
train loss: tensor(22335.9316, grad_fn=<MseLossBackward0>)
train loss: tensor(21443.9727, grad_fn=<MseLossBackward0>)
train loss: tensor(22958.2246, grad_fn=<MseLossBackward0>)
train loss: tensor(9938.0820, grad_fn=<MseLossBackward0>)
train loss: tensor(2753.9619, grad_fn=<MseLossBackward0>)
train loss: tensor(3432.6011, grad_fn=<MseLossBackward0>)
train loss: tensor(13964.7578, grad_fn=<MseLossBackward0>)
train loss: tensor(4199.1372, grad_fn=<MseLossBackward0>)
train loss: tensor(9012.3711, grad_fn=<MseLossBackward0>)
train loss: tensor(4223.2681, grad_fn=<MseLossBackward0>)
train loss: tensor(9192.4824, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(13982.4102, grad_fn=<MseLossBackward0>)
train loss: tensor(19059.8320, grad_fn=<MseLossBackward0>)
train loss: tensor(4875.7734, grad_fn=<MseLossBackward0>)
train loss: tensor(9115.2646, grad_fn=<MseLossBackward0>)
train loss: tensor(9151.9736, grad_fn=<MseLossBackward0>)
train loss: tensor(17562.2461, grad_fn=<MseLossBackward0>)
train loss: tensor(5144.5615, grad_fn=<MseLossBackward0>)
train loss: tensor(6855.9585, grad_fn=<MseLossBackward0>)
train loss: tensor(18760.1035, grad_fn=<MseLossBackward0>)
train loss: tensor(4380.1235, grad_fn=<MseLossBackward0>)
train loss: tensor(12100.5166, grad_fn=<MseLossBackward0>)
train loss: tensor(13364.1230, grad_fn=<MseLossBackward0>)
train loss: tensor(10312.7314, grad_fn=<MseLossBackward0>)
train loss: tensor(10390.3184, grad_fn=<MseLossBackward0>)
train loss: tensor(14836.4502, grad_fn=<MseLossBackward0>)
train loss: tensor(9258.0947, grad_fn=<MseLossBackward0>)
train loss: tensor(4194.5542, grad_fn=<MseLossBackward0>)
train

train loss: tensor(5823.0649, grad_fn=<MseLossBackward0>)
train loss: tensor(4008.2144, grad_fn=<MseLossBackward0>)
train loss: tensor(4012.5381, grad_fn=<MseLossBackward0>)
train loss: tensor(8232.9746, grad_fn=<MseLossBackward0>)
train loss: tensor(11373.7393, grad_fn=<MseLossBackward0>)
train loss: tensor(9018.5586, grad_fn=<MseLossBackward0>)
train loss: tensor(5886.8096, grad_fn=<MseLossBackward0>)
train loss: tensor(14425.8486, grad_fn=<MseLossBackward0>)
train loss: tensor(20206.2930, grad_fn=<MseLossBackward0>)
train loss: tensor(4723.7192, grad_fn=<MseLossBackward0>)
train loss: tensor(5081.2734, grad_fn=<MseLossBackward0>)
train loss: tensor(5122.0928, grad_fn=<MseLossBackward0>)
train loss: tensor(7618.3501, grad_fn=<MseLossBackward0>)
train loss: tensor(10175.3359, grad_fn=<MseLossBackward0>)
train loss: tensor(10752.6133, grad_fn=<MseLossBackward0>)
train loss: tensor(8137.9033, grad_fn=<MseLossBackward0>)
train loss: tensor(14303.5488, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(18311.0723, grad_fn=<MseLossBackward0>)
train loss: tensor(7609.2290, grad_fn=<MseLossBackward0>)
train loss: tensor(11506.3770, grad_fn=<MseLossBackward0>)
train loss: tensor(16355.9492, grad_fn=<MseLossBackward0>)
train loss: tensor(6639.0386, grad_fn=<MseLossBackward0>)
train loss: tensor(11766.5254, grad_fn=<MseLossBackward0>)
train loss: tensor(5719.6982, grad_fn=<MseLossBackward0>)
train loss: tensor(17505.4316, grad_fn=<MseLossBackward0>)
train loss: tensor(19292.5684, grad_fn=<MseLossBackward0>)
train loss: tensor(19625.2637, grad_fn=<MseLossBackward0>)
train loss: tensor(11950.8320, grad_fn=<MseLossBackward0>)
train loss: tensor(6817.6763, grad_fn=<MseLossBackward0>)
train loss: tensor(5557.6704, grad_fn=<MseLossBackward0>)
train loss: tensor(11116.5967, grad_fn=<MseLossBackward0>)
train loss: tensor(3588.4548, grad_fn=<MseLossBackward0>)
train loss: tensor(18445.8750, grad_fn=<MseLossBackward0>)
train loss: tensor(5304.1494, grad_fn=<MseLossBackward0>)
trai

train loss: tensor(8687.3994, grad_fn=<MseLossBackward0>)
train loss: tensor(6323.0630, grad_fn=<MseLossBackward0>)
train loss: tensor(22613.2266, grad_fn=<MseLossBackward0>)
train loss: tensor(9879.5723, grad_fn=<MseLossBackward0>)
train loss: tensor(14478.6455, grad_fn=<MseLossBackward0>)
train loss: tensor(8463.5449, grad_fn=<MseLossBackward0>)
train loss: tensor(9147.9541, grad_fn=<MseLossBackward0>)
train loss: tensor(8549.7324, grad_fn=<MseLossBackward0>)
train loss: tensor(2807.1138, grad_fn=<MseLossBackward0>)
train loss: tensor(10047.7217, grad_fn=<MseLossBackward0>)
train loss: tensor(3748.8081, grad_fn=<MseLossBackward0>)
train loss: tensor(4449.4912, grad_fn=<MseLossBackward0>)
train loss: tensor(4155.2173, grad_fn=<MseLossBackward0>)
train loss: tensor(3951.6921, grad_fn=<MseLossBackward0>)
train loss: tensor(3791.0427, grad_fn=<MseLossBackward0>)
train loss: tensor(17828.3750, grad_fn=<MseLossBackward0>)
train loss: tensor(5483.5776, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(17141.4941, grad_fn=<MseLossBackward0>)
train loss: tensor(16468.3672, grad_fn=<MseLossBackward0>)
train loss: tensor(6040.2495, grad_fn=<MseLossBackward0>)
train loss: tensor(9092.6631, grad_fn=<MseLossBackward0>)
train loss: tensor(4556.0015, grad_fn=<MseLossBackward0>)
train loss: tensor(56335.3555, grad_fn=<MseLossBackward0>)
train loss: tensor(6381.7017, grad_fn=<MseLossBackward0>)
train loss: tensor(18786.1445, grad_fn=<MseLossBackward0>)
train loss: tensor(10421.8545, grad_fn=<MseLossBackward0>)
train loss: tensor(3515.6526, grad_fn=<MseLossBackward0>)
train loss: tensor(3772.2068, grad_fn=<MseLossBackward0>)
train loss: tensor(9950.4189, grad_fn=<MseLossBackward0>)
train loss: tensor(3197.9697, grad_fn=<MseLossBackward0>)
train loss: tensor(12580.8740, grad_fn=<MseLossBackward0>)
train loss: tensor(3156.2290, grad_fn=<MseLossBackward0>)
train loss: tensor(5184.2236, grad_fn=<MseLossBackward0>)
train loss: tensor(3134.8506, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(6709.1108, grad_fn=<MseLossBackward0>)
train loss: tensor(12223.0850, grad_fn=<MseLossBackward0>)
train loss: tensor(3960.3635, grad_fn=<MseLossBackward0>)
train loss: tensor(4030.3132, grad_fn=<MseLossBackward0>)
train loss: tensor(11660.8770, grad_fn=<MseLossBackward0>)
train loss: tensor(14372.9961, grad_fn=<MseLossBackward0>)
train loss: tensor(4041.3506, grad_fn=<MseLossBackward0>)
train loss: tensor(12433.8525, grad_fn=<MseLossBackward0>)
train loss: tensor(8571.4697, grad_fn=<MseLossBackward0>)
train loss: tensor(12132.4580, grad_fn=<MseLossBackward0>)
train loss: tensor(7010.8633, grad_fn=<MseLossBackward0>)
train loss: tensor(6728.6079, grad_fn=<MseLossBackward0>)
train loss: tensor(3901.5427, grad_fn=<MseLossBackward0>)
train loss: tensor(9853.9072, grad_fn=<MseLossBackward0>)
train loss: tensor(11261.8477, grad_fn=<MseLossBackward0>)
train loss: tensor(4707.5684, grad_fn=<MseLossBackward0>)
train loss: tensor(13122.2646, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(2976.3027, grad_fn=<MseLossBackward0>)
train loss: tensor(15906., grad_fn=<MseLossBackward0>)
train loss: tensor(7977.5762, grad_fn=<MseLossBackward0>)
train loss: tensor(13686.1260, grad_fn=<MseLossBackward0>)
train loss: tensor(2579.9771, grad_fn=<MseLossBackward0>)
train loss: tensor(12589.7480, grad_fn=<MseLossBackward0>)
train loss: tensor(10230.0059, grad_fn=<MseLossBackward0>)
train loss: tensor(8960.1045, grad_fn=<MseLossBackward0>)
train loss: tensor(9548.7666, grad_fn=<MseLossBackward0>)
train loss: tensor(7338.7432, grad_fn=<MseLossBackward0>)
train loss: tensor(13020.5000, grad_fn=<MseLossBackward0>)
train loss: tensor(7352.0405, grad_fn=<MseLossBackward0>)
train loss: tensor(2996.4653, grad_fn=<MseLossBackward0>)
train loss: tensor(18449.9395, grad_fn=<MseLossBackward0>)
train loss: tensor(6914.2148, grad_fn=<MseLossBackward0>)
train loss: tensor(5547.6465, grad_fn=<MseLossBackward0>)
train loss: tensor(15408.5293, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(11020.9844, grad_fn=<MseLossBackward0>)
train loss: tensor(15120.0430, grad_fn=<MseLossBackward0>)
train loss: tensor(4929.9858, grad_fn=<MseLossBackward0>)
train loss: tensor(9600.8086, grad_fn=<MseLossBackward0>)
train loss: tensor(9050.0098, grad_fn=<MseLossBackward0>)
train loss: tensor(13415.3096, grad_fn=<MseLossBackward0>)
train loss: tensor(17921.9004, grad_fn=<MseLossBackward0>)
train loss: tensor(6788.8315, grad_fn=<MseLossBackward0>)
train loss: tensor(14837.7178, grad_fn=<MseLossBackward0>)
train loss: tensor(11628.4121, grad_fn=<MseLossBackward0>)
train loss: tensor(13057.0801, grad_fn=<MseLossBackward0>)
train loss: tensor(7019.6948, grad_fn=<MseLossBackward0>)
train loss: tensor(4689.5234, grad_fn=<MseLossBackward0>)
train loss: tensor(13823.1191, grad_fn=<MseLossBackward0>)
train loss: tensor(6108.6426, grad_fn=<MseLossBackward0>)
train loss: tensor(6938.9683, grad_fn=<MseLossBackward0>)
train loss: tensor(11553.4404, grad_fn=<MseLossBackward0>)
train

train loss: tensor(6744.5488, grad_fn=<MseLossBackward0>)
train loss: tensor(5044.0225, grad_fn=<MseLossBackward0>)
train loss: tensor(4526.2949, grad_fn=<MseLossBackward0>)
train loss: tensor(14103.5254, grad_fn=<MseLossBackward0>)
train loss: tensor(19282.1406, grad_fn=<MseLossBackward0>)
train loss: tensor(11845.6064, grad_fn=<MseLossBackward0>)
train loss: tensor(4404.8428, grad_fn=<MseLossBackward0>)
train loss: tensor(2961.6418, grad_fn=<MseLossBackward0>)
train loss: tensor(8345.6035, grad_fn=<MseLossBackward0>)
train loss: tensor(6028.2808, grad_fn=<MseLossBackward0>)
train loss: tensor(6341.4912, grad_fn=<MseLossBackward0>)
train loss: tensor(9608.2695, grad_fn=<MseLossBackward0>)
train loss: tensor(5774.9517, grad_fn=<MseLossBackward0>)
train loss: tensor(16655.6328, grad_fn=<MseLossBackward0>)
train loss: tensor(35883.1484, grad_fn=<MseLossBackward0>)
train loss: tensor(8694.4746, grad_fn=<MseLossBackward0>)
train loss: tensor(11856.8779, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(7518.4102, grad_fn=<MseLossBackward0>)
train loss: tensor(4669.2437, grad_fn=<MseLossBackward0>)
train loss: tensor(8366.4141, grad_fn=<MseLossBackward0>)
train loss: tensor(12628.8574, grad_fn=<MseLossBackward0>)
train loss: tensor(6727.5762, grad_fn=<MseLossBackward0>)
train loss: tensor(15990.0381, grad_fn=<MseLossBackward0>)
train loss: tensor(4308.7734, grad_fn=<MseLossBackward0>)
train loss: tensor(16021.0586, grad_fn=<MseLossBackward0>)
train loss: tensor(1028.9296, grad_fn=<MseLossBackward0>)
train loss: tensor(3231.5886, grad_fn=<MseLossBackward0>)
train loss: tensor(10324.5117, grad_fn=<MseLossBackward0>)
train loss: tensor(16717.8145, grad_fn=<MseLossBackward0>)
train loss: tensor(4189.4512, grad_fn=<MseLossBackward0>)
train loss: tensor(4759.5205, grad_fn=<MseLossBackward0>)
train loss: tensor(7530.8003, grad_fn=<MseLossBackward0>)
train loss: tensor(5663.1885, grad_fn=<MseLossBackward0>)
train loss: tensor(70479.6875, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(5269.9590, grad_fn=<MseLossBackward0>)
train loss: tensor(8088.3096, grad_fn=<MseLossBackward0>)
train loss: tensor(6382.4678, grad_fn=<MseLossBackward0>)
train loss: tensor(2769.5569, grad_fn=<MseLossBackward0>)
train loss: tensor(6934.7539, grad_fn=<MseLossBackward0>)
train loss: tensor(7442.9131, grad_fn=<MseLossBackward0>)
train loss: tensor(9612.5518, grad_fn=<MseLossBackward0>)
train loss: tensor(16971.8086, grad_fn=<MseLossBackward0>)
train loss: tensor(6805.9941, grad_fn=<MseLossBackward0>)
train loss: tensor(4475.2959, grad_fn=<MseLossBackward0>)
train loss: tensor(4259.6079, grad_fn=<MseLossBackward0>)
train loss: tensor(9401.1699, grad_fn=<MseLossBackward0>)
train loss: tensor(7825.3125, grad_fn=<MseLossBackward0>)
train loss: tensor(13121.0918, grad_fn=<MseLossBackward0>)
train loss: tensor(19464.2656, grad_fn=<MseLossBackward0>)
train loss: tensor(18728.6113, grad_fn=<MseLossBackward0>)
train loss: tensor(3228.6636, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(18015.5352, grad_fn=<MseLossBackward0>)
train loss: tensor(4188.9971, grad_fn=<MseLossBackward0>)
train loss: tensor(3830.2310, grad_fn=<MseLossBackward0>)
train loss: tensor(10305.5137, grad_fn=<MseLossBackward0>)
train loss: tensor(9218.6270, grad_fn=<MseLossBackward0>)
train loss: tensor(11154.6211, grad_fn=<MseLossBackward0>)
train loss: tensor(18122.8516, grad_fn=<MseLossBackward0>)
train loss: tensor(10865.0352, grad_fn=<MseLossBackward0>)
train loss: tensor(10620.1416, grad_fn=<MseLossBackward0>)
train loss: tensor(7897.9541, grad_fn=<MseLossBackward0>)
train loss: tensor(9804.6299, grad_fn=<MseLossBackward0>)
train loss: tensor(2287.1462, grad_fn=<MseLossBackward0>)
train loss: tensor(15757.2568, grad_fn=<MseLossBackward0>)
train loss: tensor(8175.9478, grad_fn=<MseLossBackward0>)
train loss: tensor(7784.2505, grad_fn=<MseLossBackward0>)
train loss: tensor(12137.7354, grad_fn=<MseLossBackward0>)
train loss: tensor(8650.2490, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(6521.8208, grad_fn=<MseLossBackward0>)
train loss: tensor(5715.2817, grad_fn=<MseLossBackward0>)
train loss: tensor(6721.1724, grad_fn=<MseLossBackward0>)
train loss: tensor(10789.6943, grad_fn=<MseLossBackward0>)
train loss: tensor(5927.4741, grad_fn=<MseLossBackward0>)
train loss: tensor(7480.2744, grad_fn=<MseLossBackward0>)
train loss: tensor(9423.3271, grad_fn=<MseLossBackward0>)
train loss: tensor(10881.5869, grad_fn=<MseLossBackward0>)
train loss: tensor(2694.6499, grad_fn=<MseLossBackward0>)
train loss: tensor(10394.0615, grad_fn=<MseLossBackward0>)
train loss: tensor(11922.3457, grad_fn=<MseLossBackward0>)
train loss: tensor(5009.1216, grad_fn=<MseLossBackward0>)
train loss: tensor(17692.8340, grad_fn=<MseLossBackward0>)
train loss: tensor(15516.3564, grad_fn=<MseLossBackward0>)
train loss: tensor(20351.8184, grad_fn=<MseLossBackward0>)
train loss: tensor(12099.3604, grad_fn=<MseLossBackward0>)
train loss: tensor(12579.3408, grad_fn=<MseLossBackward0>)
train

train loss: tensor(14729.3584, grad_fn=<MseLossBackward0>)
train loss: tensor(13407.5459, grad_fn=<MseLossBackward0>)
train loss: tensor(18911.0059, grad_fn=<MseLossBackward0>)
train loss: tensor(13071.2725, grad_fn=<MseLossBackward0>)
train loss: tensor(14598.5928, grad_fn=<MseLossBackward0>)
train loss: tensor(4256.9902, grad_fn=<MseLossBackward0>)
train loss: tensor(15271.3662, grad_fn=<MseLossBackward0>)
train loss: tensor(5902.6709, grad_fn=<MseLossBackward0>)
train loss: tensor(6274.8364, grad_fn=<MseLossBackward0>)
train loss: tensor(3984.6731, grad_fn=<MseLossBackward0>)
train loss: tensor(21325.9961, grad_fn=<MseLossBackward0>)
train loss: tensor(6132.2563, grad_fn=<MseLossBackward0>)
train loss: tensor(11757.7314, grad_fn=<MseLossBackward0>)
train loss: tensor(16824.1367, grad_fn=<MseLossBackward0>)
train loss: tensor(9745.4834, grad_fn=<MseLossBackward0>)
train loss: tensor(6918.3726, grad_fn=<MseLossBackward0>)
train loss: tensor(9347.7930, grad_fn=<MseLossBackward0>)
train

train loss: tensor(3063.3669, grad_fn=<MseLossBackward0>)
train loss: tensor(9498.3564, grad_fn=<MseLossBackward0>)
train loss: tensor(4998.5220, grad_fn=<MseLossBackward0>)
train loss: tensor(6401.6309, grad_fn=<MseLossBackward0>)
train loss: tensor(10255.8330, grad_fn=<MseLossBackward0>)
train loss: tensor(6803.1245, grad_fn=<MseLossBackward0>)
train loss: tensor(769.7097, grad_fn=<MseLossBackward0>)
train loss: tensor(12660.2363, grad_fn=<MseLossBackward0>)
train loss: tensor(3309.3025, grad_fn=<MseLossBackward0>)
train loss: tensor(8108.3218, grad_fn=<MseLossBackward0>)
train loss: tensor(11565.8477, grad_fn=<MseLossBackward0>)
train loss: tensor(5868.8066, grad_fn=<MseLossBackward0>)
train loss: tensor(2099.1509, grad_fn=<MseLossBackward0>)
train loss: tensor(14902.9805, grad_fn=<MseLossBackward0>)
train loss: tensor(4554.7520, grad_fn=<MseLossBackward0>)
train loss: tensor(9364.7383, grad_fn=<MseLossBackward0>)
train loss: tensor(18462.3887, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(10061.3018, grad_fn=<MseLossBackward0>)
train loss: tensor(8585.2578, grad_fn=<MseLossBackward0>)
train loss: tensor(6580.5024, grad_fn=<MseLossBackward0>)
train loss: tensor(13472.8574, grad_fn=<MseLossBackward0>)
train loss: tensor(4707.3892, grad_fn=<MseLossBackward0>)
train loss: tensor(6666.8765, grad_fn=<MseLossBackward0>)
train loss: tensor(8579.7129, grad_fn=<MseLossBackward0>)
train loss: tensor(4031.5007, grad_fn=<MseLossBackward0>)
train loss: tensor(2488.1233, grad_fn=<MseLossBackward0>)
train loss: tensor(1405.8761, grad_fn=<MseLossBackward0>)
train loss: tensor(8411.1562, grad_fn=<MseLossBackward0>)
train loss: tensor(6544.4771, grad_fn=<MseLossBackward0>)
train loss: tensor(6653.8047, grad_fn=<MseLossBackward0>)
train loss: tensor(9080.3350, grad_fn=<MseLossBackward0>)
train loss: tensor(15336.3955, grad_fn=<MseLossBackward0>)
train loss: tensor(7435.7119, grad_fn=<MseLossBackward0>)
train loss: tensor(14255.5762, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(11759.7100, grad_fn=<MseLossBackward0>)
train loss: tensor(6769.6958, grad_fn=<MseLossBackward0>)
train loss: tensor(4844.0288, grad_fn=<MseLossBackward0>)
train loss: tensor(12005.9463, grad_fn=<MseLossBackward0>)
train loss: tensor(3965.2629, grad_fn=<MseLossBackward0>)
train loss: tensor(9293.0801, grad_fn=<MseLossBackward0>)
train loss: tensor(7940.9956, grad_fn=<MseLossBackward0>)
train loss: tensor(3913.1670, grad_fn=<MseLossBackward0>)
train loss: tensor(3277.5396, grad_fn=<MseLossBackward0>)
train loss: tensor(11539.5596, grad_fn=<MseLossBackward0>)
train loss: tensor(6173.4404, grad_fn=<MseLossBackward0>)
train loss: tensor(7511.0269, grad_fn=<MseLossBackward0>)
train loss: tensor(6870.5566, grad_fn=<MseLossBackward0>)
train loss: tensor(4719.9956, grad_fn=<MseLossBackward0>)
train loss: tensor(9311.1973, grad_fn=<MseLossBackward0>)
train loss: tensor(18666.3125, grad_fn=<MseLossBackward0>)
train loss: tensor(9467.4736, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(5083.6421, grad_fn=<MseLossBackward0>)
train loss: tensor(8763.3252, grad_fn=<MseLossBackward0>)
train loss: tensor(2846.9297, grad_fn=<MseLossBackward0>)
train loss: tensor(5235.8252, grad_fn=<MseLossBackward0>)
train loss: tensor(15098.4102, grad_fn=<MseLossBackward0>)
train loss: tensor(7601.9976, grad_fn=<MseLossBackward0>)
train loss: tensor(9183.3271, grad_fn=<MseLossBackward0>)
train loss: tensor(5463.9248, grad_fn=<MseLossBackward0>)
train loss: tensor(6101.9058, grad_fn=<MseLossBackward0>)
train loss: tensor(7199.1821, grad_fn=<MseLossBackward0>)
train loss: tensor(12910.9443, grad_fn=<MseLossBackward0>)
train loss: tensor(5054.1865, grad_fn=<MseLossBackward0>)
train loss: tensor(4160.4702, grad_fn=<MseLossBackward0>)
train loss: tensor(9940.4609, grad_fn=<MseLossBackward0>)
train loss: tensor(9687.9980, grad_fn=<MseLossBackward0>)
train loss: tensor(4774.1890, grad_fn=<MseLossBackward0>)
train loss: tensor(8203.3096, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(10288.4609, grad_fn=<MseLossBackward0>)
train loss: tensor(8993.5352, grad_fn=<MseLossBackward0>)
train loss: tensor(13701.2871, grad_fn=<MseLossBackward0>)
train loss: tensor(16509.2051, grad_fn=<MseLossBackward0>)
train loss: tensor(8270.0586, grad_fn=<MseLossBackward0>)
train loss: tensor(6337.5991, grad_fn=<MseLossBackward0>)
train loss: tensor(4274.5825, grad_fn=<MseLossBackward0>)
train loss: tensor(13939.9590, grad_fn=<MseLossBackward0>)
train loss: tensor(11190.1855, grad_fn=<MseLossBackward0>)
train loss: tensor(10137.6348, grad_fn=<MseLossBackward0>)
train loss: tensor(6795.7129, grad_fn=<MseLossBackward0>)
train loss: tensor(13787.8984, grad_fn=<MseLossBackward0>)
train loss: tensor(6058.1035, grad_fn=<MseLossBackward0>)
train loss: tensor(4600.6045, grad_fn=<MseLossBackward0>)
train loss: tensor(7697.4390, grad_fn=<MseLossBackward0>)
train loss: tensor(8280.7236, grad_fn=<MseLossBackward0>)
train loss: tensor(7934.3613, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(9293.6963, grad_fn=<MseLossBackward0>)
train loss: tensor(17634.3457, grad_fn=<MseLossBackward0>)
train loss: tensor(8290.4932, grad_fn=<MseLossBackward0>)
train loss: tensor(7279.4141, grad_fn=<MseLossBackward0>)
train loss: tensor(9585.5303, grad_fn=<MseLossBackward0>)
train loss: tensor(7971.1362, grad_fn=<MseLossBackward0>)
train loss: tensor(7042.1240, grad_fn=<MseLossBackward0>)
train loss: tensor(18431.0176, grad_fn=<MseLossBackward0>)
train loss: tensor(7330.3262, grad_fn=<MseLossBackward0>)
train loss: tensor(11819.8525, grad_fn=<MseLossBackward0>)
train loss: tensor(3714.0254, grad_fn=<MseLossBackward0>)
train loss: tensor(6053.2690, grad_fn=<MseLossBackward0>)
train loss: tensor(10416.2695, grad_fn=<MseLossBackward0>)
train loss: tensor(8541.5840, grad_fn=<MseLossBackward0>)
train loss: tensor(9534.7803, grad_fn=<MseLossBackward0>)
train loss: tensor(13253.3408, grad_fn=<MseLossBackward0>)
train loss: tensor(7035.2598, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(5631.8701, grad_fn=<MseLossBackward0>)
train loss: tensor(12248.4746, grad_fn=<MseLossBackward0>)
train loss: tensor(6132.3105, grad_fn=<MseLossBackward0>)
train loss: tensor(7245.3208, grad_fn=<MseLossBackward0>)
train loss: tensor(11143.5283, grad_fn=<MseLossBackward0>)
train loss: tensor(9585.6201, grad_fn=<MseLossBackward0>)
train loss: tensor(15405.1270, grad_fn=<MseLossBackward0>)
train loss: tensor(15547.7148, grad_fn=<MseLossBackward0>)
train loss: tensor(11723.2637, grad_fn=<MseLossBackward0>)
train loss: tensor(3000.2026, grad_fn=<MseLossBackward0>)
train loss: tensor(9146.9697, grad_fn=<MseLossBackward0>)
train loss: tensor(6325.4888, grad_fn=<MseLossBackward0>)
train loss: tensor(4314.2881, grad_fn=<MseLossBackward0>)
train loss: tensor(3888.0061, grad_fn=<MseLossBackward0>)
train loss: tensor(6440.0498, grad_fn=<MseLossBackward0>)
train loss: tensor(46292.2383, grad_fn=<MseLossBackward0>)
train loss: tensor(15095.9229, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(2366.5527, grad_fn=<MseLossBackward0>)
train loss: tensor(5292.3892, grad_fn=<MseLossBackward0>)
train loss: tensor(11868.3691, grad_fn=<MseLossBackward0>)
train loss: tensor(13572.4160, grad_fn=<MseLossBackward0>)
train loss: tensor(10943.0303, grad_fn=<MseLossBackward0>)
train loss: tensor(13235.5127, grad_fn=<MseLossBackward0>)
train loss: tensor(7789.8784, grad_fn=<MseLossBackward0>)
train loss: tensor(7553.9287, grad_fn=<MseLossBackward0>)
train loss: tensor(11534.1289, grad_fn=<MseLossBackward0>)
train loss: tensor(9016.5439, grad_fn=<MseLossBackward0>)
train loss: tensor(12866.8447, grad_fn=<MseLossBackward0>)
train loss: tensor(8567.2070, grad_fn=<MseLossBackward0>)
train loss: tensor(7431.2964, grad_fn=<MseLossBackward0>)
train loss: tensor(13976.9326, grad_fn=<MseLossBackward0>)
train loss: tensor(9549.1875, grad_fn=<MseLossBackward0>)
train loss: tensor(8739.8701, grad_fn=<MseLossBackward0>)
train loss: tensor(16316.7598, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(7373.4248, grad_fn=<MseLossBackward0>)
train loss: tensor(6110.2759, grad_fn=<MseLossBackward0>)
train loss: tensor(2324.7075, grad_fn=<MseLossBackward0>)
train loss: tensor(17337.9004, grad_fn=<MseLossBackward0>)
train loss: tensor(16555.6465, grad_fn=<MseLossBackward0>)
train loss: tensor(6208.6157, grad_fn=<MseLossBackward0>)
train loss: tensor(3479.7207, grad_fn=<MseLossBackward0>)
train loss: tensor(12734.8359, grad_fn=<MseLossBackward0>)
train loss: tensor(7989.9897, grad_fn=<MseLossBackward0>)
train loss: tensor(10357.5898, grad_fn=<MseLossBackward0>)
train loss: tensor(11127.9551, grad_fn=<MseLossBackward0>)
train loss: tensor(4169.0972, grad_fn=<MseLossBackward0>)
train loss: tensor(8054.5190, grad_fn=<MseLossBackward0>)
train loss: tensor(6112.8447, grad_fn=<MseLossBackward0>)
train loss: tensor(7249.4517, grad_fn=<MseLossBackward0>)
train loss: tensor(13146.3330, grad_fn=<MseLossBackward0>)
train loss: tensor(8839.4990, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(8144.6904, grad_fn=<MseLossBackward0>)
train loss: tensor(26569.1582, grad_fn=<MseLossBackward0>)
train loss: tensor(9128.3057, grad_fn=<MseLossBackward0>)
train loss: tensor(22943.3340, grad_fn=<MseLossBackward0>)
train loss: tensor(8521.1680, grad_fn=<MseLossBackward0>)
train loss: tensor(13080.5547, grad_fn=<MseLossBackward0>)
train loss: tensor(16974.1309, grad_fn=<MseLossBackward0>)
train loss: tensor(14242.0830, grad_fn=<MseLossBackward0>)
train loss: tensor(4862.8052, grad_fn=<MseLossBackward0>)
train loss: tensor(5749.4668, grad_fn=<MseLossBackward0>)
train loss: tensor(17176.1406, grad_fn=<MseLossBackward0>)
train loss: tensor(12573.9629, grad_fn=<MseLossBackward0>)
train loss: tensor(14641.3643, grad_fn=<MseLossBackward0>)
train loss: tensor(8066.5825, grad_fn=<MseLossBackward0>)
train loss: tensor(4099.0015, grad_fn=<MseLossBackward0>)
train loss: tensor(13114.7578, grad_fn=<MseLossBackward0>)
train loss: tensor(23223.8398, grad_fn=<MseLossBackward0>)
trai

train loss: tensor(13267.7100, grad_fn=<MseLossBackward0>)
train loss: tensor(6662.3169, grad_fn=<MseLossBackward0>)
train loss: tensor(20820.1738, grad_fn=<MseLossBackward0>)
train loss: tensor(2276.1997, grad_fn=<MseLossBackward0>)
train loss: tensor(2174.4934, grad_fn=<MseLossBackward0>)
train loss: tensor(5870.1978, grad_fn=<MseLossBackward0>)
train loss: tensor(17261.9844, grad_fn=<MseLossBackward0>)
train loss: tensor(2435.6877, grad_fn=<MseLossBackward0>)
train loss: tensor(9493.8623, grad_fn=<MseLossBackward0>)
train loss: tensor(8026.2192, grad_fn=<MseLossBackward0>)
train loss: tensor(3678.2793, grad_fn=<MseLossBackward0>)
train loss: tensor(14136.3994, grad_fn=<MseLossBackward0>)
train loss: tensor(7214.9121, grad_fn=<MseLossBackward0>)
train loss: tensor(12910.6006, grad_fn=<MseLossBackward0>)
train loss: tensor(2581.1128, grad_fn=<MseLossBackward0>)
train loss: tensor(29294.2188, grad_fn=<MseLossBackward0>)
train loss: tensor(3320.6826, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(101156.9766, grad_fn=<MseLossBackward0>)
train loss: tensor(5680.5371, grad_fn=<MseLossBackward0>)
train loss: tensor(6688.4116, grad_fn=<MseLossBackward0>)
train loss: tensor(3116.1511, grad_fn=<MseLossBackward0>)
train loss: tensor(8743.2129, grad_fn=<MseLossBackward0>)
train loss: tensor(8250.4346, grad_fn=<MseLossBackward0>)
train loss: tensor(7805.0649, grad_fn=<MseLossBackward0>)
train loss: tensor(28631.5879, grad_fn=<MseLossBackward0>)
train loss: tensor(7029.2222, grad_fn=<MseLossBackward0>)
train loss: tensor(4886.6108, grad_fn=<MseLossBackward0>)
train loss: tensor(11536.0674, grad_fn=<MseLossBackward0>)
train loss: tensor(13154.5518, grad_fn=<MseLossBackward0>)
train loss: tensor(4647.1118, grad_fn=<MseLossBackward0>)
train loss: tensor(8156.2012, grad_fn=<MseLossBackward0>)
train loss: tensor(7799.6602, grad_fn=<MseLossBackward0>)
train loss: tensor(15311.8447, grad_fn=<MseLossBackward0>)
train loss: tensor(3977.0913, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(3469.1428, grad_fn=<MseLossBackward0>)
train loss: tensor(6136.5356, grad_fn=<MseLossBackward0>)
train loss: tensor(15149.3877, grad_fn=<MseLossBackward0>)
train loss: tensor(7876.3369, grad_fn=<MseLossBackward0>)
train loss: tensor(8018.2725, grad_fn=<MseLossBackward0>)
train loss: tensor(5378.7725, grad_fn=<MseLossBackward0>)
train loss: tensor(5876.5654, grad_fn=<MseLossBackward0>)
train loss: tensor(8681.4580, grad_fn=<MseLossBackward0>)
train loss: tensor(14435.6611, grad_fn=<MseLossBackward0>)
train loss: tensor(6102.4131, grad_fn=<MseLossBackward0>)
train loss: tensor(7915.1812, grad_fn=<MseLossBackward0>)
train loss: tensor(5169.9312, grad_fn=<MseLossBackward0>)
train loss: tensor(19764.2461, grad_fn=<MseLossBackward0>)
train loss: tensor(6105.8945, grad_fn=<MseLossBackward0>)
train loss: tensor(7484.4385, grad_fn=<MseLossBackward0>)
train loss: tensor(11551.7803, grad_fn=<MseLossBackward0>)
train loss: tensor(10397.2852, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(5570.8506, grad_fn=<MseLossBackward0>)
train loss: tensor(4134.5039, grad_fn=<MseLossBackward0>)
train loss: tensor(3279.2520, grad_fn=<MseLossBackward0>)
train loss: tensor(13102.5137, grad_fn=<MseLossBackward0>)
train loss: tensor(11510.8818, grad_fn=<MseLossBackward0>)
train loss: tensor(15923.1777, grad_fn=<MseLossBackward0>)
train loss: tensor(9849.4893, grad_fn=<MseLossBackward0>)
train loss: tensor(5313.3887, grad_fn=<MseLossBackward0>)
train loss: tensor(5113.0728, grad_fn=<MseLossBackward0>)
train loss: tensor(8798.0283, grad_fn=<MseLossBackward0>)
train loss: tensor(9727.5146, grad_fn=<MseLossBackward0>)
train loss: tensor(5286.0342, grad_fn=<MseLossBackward0>)
train loss: tensor(37547.9648, grad_fn=<MseLossBackward0>)
train loss: tensor(10116.5586, grad_fn=<MseLossBackward0>)
train loss: tensor(6339.7207, grad_fn=<MseLossBackward0>)
train loss: tensor(10028.4160, grad_fn=<MseLossBackward0>)
train loss: tensor(6549.5376, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(4912.3555, grad_fn=<MseLossBackward0>)
train loss: tensor(6097.0732, grad_fn=<MseLossBackward0>)
train loss: tensor(7079.4941, grad_fn=<MseLossBackward0>)
train loss: tensor(7298.4712, grad_fn=<MseLossBackward0>)
train loss: tensor(22539.2363, grad_fn=<MseLossBackward0>)
train loss: tensor(4843.2783, grad_fn=<MseLossBackward0>)
train loss: tensor(14700.1016, grad_fn=<MseLossBackward0>)
train loss: tensor(6052.3647, grad_fn=<MseLossBackward0>)
train loss: tensor(10168.9658, grad_fn=<MseLossBackward0>)
train loss: tensor(11347.3096, grad_fn=<MseLossBackward0>)
train loss: tensor(7230.0806, grad_fn=<MseLossBackward0>)
train loss: tensor(5122.0908, grad_fn=<MseLossBackward0>)
train loss: tensor(6247.1982, grad_fn=<MseLossBackward0>)
train loss: tensor(7646.7119, grad_fn=<MseLossBackward0>)
train loss: tensor(12344.8105, grad_fn=<MseLossBackward0>)
train loss: tensor(7514.4619, grad_fn=<MseLossBackward0>)
train loss: tensor(2858.0442, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(9689.6309, grad_fn=<MseLossBackward0>)
train loss: tensor(7937.2114, grad_fn=<MseLossBackward0>)
train loss: tensor(14867.8145, grad_fn=<MseLossBackward0>)
train loss: tensor(4813.8506, grad_fn=<MseLossBackward0>)
train loss: tensor(3098.3909, grad_fn=<MseLossBackward0>)
train loss: tensor(11888.5938, grad_fn=<MseLossBackward0>)
train loss: tensor(69029.7734, grad_fn=<MseLossBackward0>)
train loss: tensor(6012.3472, grad_fn=<MseLossBackward0>)
train loss: tensor(7583.3315, grad_fn=<MseLossBackward0>)
train loss: tensor(7832.2598, grad_fn=<MseLossBackward0>)
train loss: tensor(4960.4253, grad_fn=<MseLossBackward0>)
train loss: tensor(11681.5742, grad_fn=<MseLossBackward0>)
train loss: tensor(9772.9434, grad_fn=<MseLossBackward0>)
train loss: tensor(6601.3774, grad_fn=<MseLossBackward0>)
train loss: tensor(9391.0156, grad_fn=<MseLossBackward0>)
train loss: tensor(7938.6641, grad_fn=<MseLossBackward0>)
train loss: tensor(3730.0791, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(4605.3232, grad_fn=<MseLossBackward0>)
train loss: tensor(7428.8345, grad_fn=<MseLossBackward0>)
train loss: tensor(9742.2109, grad_fn=<MseLossBackward0>)
train loss: tensor(3183.9758, grad_fn=<MseLossBackward0>)
train loss: tensor(20748.1250, grad_fn=<MseLossBackward0>)
train loss: tensor(6262.9595, grad_fn=<MseLossBackward0>)
train loss: tensor(3734.4651, grad_fn=<MseLossBackward0>)
train loss: tensor(23905.3262, grad_fn=<MseLossBackward0>)
train loss: tensor(4588.4141, grad_fn=<MseLossBackward0>)
train loss: tensor(20918.0156, grad_fn=<MseLossBackward0>)
train loss: tensor(9483.3105, grad_fn=<MseLossBackward0>)
train loss: tensor(7981.7432, grad_fn=<MseLossBackward0>)
train loss: tensor(14670.9199, grad_fn=<MseLossBackward0>)
train loss: tensor(7579.0127, grad_fn=<MseLossBackward0>)
train loss: tensor(8600.0273, grad_fn=<MseLossBackward0>)
train loss: tensor(50493.9141, grad_fn=<MseLossBackward0>)
train loss: tensor(1911.9830, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(11104.9287, grad_fn=<MseLossBackward0>)
train loss: tensor(10751.9131, grad_fn=<MseLossBackward0>)
train loss: tensor(5792.5195, grad_fn=<MseLossBackward0>)
train loss: tensor(6641.5449, grad_fn=<MseLossBackward0>)
train loss: tensor(4092.5073, grad_fn=<MseLossBackward0>)
train loss: tensor(9710.6338, grad_fn=<MseLossBackward0>)
train loss: tensor(5877.5386, grad_fn=<MseLossBackward0>)
train loss: tensor(16453.6191, grad_fn=<MseLossBackward0>)
train loss: tensor(17999.4707, grad_fn=<MseLossBackward0>)
train loss: tensor(6732.7271, grad_fn=<MseLossBackward0>)
train loss: tensor(12308.6250, grad_fn=<MseLossBackward0>)
train loss: tensor(7534.4658, grad_fn=<MseLossBackward0>)
train loss: tensor(10826.4971, grad_fn=<MseLossBackward0>)
train loss: tensor(6317.4263, grad_fn=<MseLossBackward0>)
train loss: tensor(53689.4102, grad_fn=<MseLossBackward0>)
train loss: tensor(11436.7109, grad_fn=<MseLossBackward0>)
train loss: tensor(19513.6035, grad_fn=<MseLossBackward0>)
train

train loss: tensor(12500.1426, grad_fn=<MseLossBackward0>)
train loss: tensor(19570.4434, grad_fn=<MseLossBackward0>)
train loss: tensor(5518.1025, grad_fn=<MseLossBackward0>)
train loss: tensor(11619.4805, grad_fn=<MseLossBackward0>)
train loss: tensor(4282.4375, grad_fn=<MseLossBackward0>)
train loss: tensor(9768.4619, grad_fn=<MseLossBackward0>)
train loss: tensor(74564.6719, grad_fn=<MseLossBackward0>)
train loss: tensor(17257.5566, grad_fn=<MseLossBackward0>)
train loss: tensor(11597.6035, grad_fn=<MseLossBackward0>)
train loss: tensor(12206.1973, grad_fn=<MseLossBackward0>)
train loss: tensor(6243.2627, grad_fn=<MseLossBackward0>)
train loss: tensor(9705.8164, grad_fn=<MseLossBackward0>)
train loss: tensor(7812.0845, grad_fn=<MseLossBackward0>)
train loss: tensor(13214.7402, grad_fn=<MseLossBackward0>)
train loss: tensor(3505.1311, grad_fn=<MseLossBackward0>)
train loss: tensor(4176.9004, grad_fn=<MseLossBackward0>)
train loss: tensor(1997.9552, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(5628.5898, grad_fn=<MseLossBackward0>)
train loss: tensor(14835.9746, grad_fn=<MseLossBackward0>)
train loss: tensor(5881.8564, grad_fn=<MseLossBackward0>)
train loss: tensor(3927.9045, grad_fn=<MseLossBackward0>)
train loss: tensor(7481.2456, grad_fn=<MseLossBackward0>)
train loss: tensor(7667.1533, grad_fn=<MseLossBackward0>)
train loss: tensor(3282.7563, grad_fn=<MseLossBackward0>)
train loss: tensor(4690.2324, grad_fn=<MseLossBackward0>)
train loss: tensor(10574.9678, grad_fn=<MseLossBackward0>)
train loss: tensor(9848.2568, grad_fn=<MseLossBackward0>)
train loss: tensor(7169.1260, grad_fn=<MseLossBackward0>)
train loss: tensor(4042.7786, grad_fn=<MseLossBackward0>)
train loss: tensor(12160.3262, grad_fn=<MseLossBackward0>)
train loss: tensor(7243.9937, grad_fn=<MseLossBackward0>)
train loss: tensor(8033.7085, grad_fn=<MseLossBackward0>)
train loss: tensor(11897.0303, grad_fn=<MseLossBackward0>)
train loss: tensor(5293.8755, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(3178.5435, grad_fn=<MseLossBackward0>)
train loss: tensor(7881.2124, grad_fn=<MseLossBackward0>)
train loss: tensor(9836.0098, grad_fn=<MseLossBackward0>)
train loss: tensor(42135.8789, grad_fn=<MseLossBackward0>)
train loss: tensor(9589.0244, grad_fn=<MseLossBackward0>)
train loss: tensor(8258.7275, grad_fn=<MseLossBackward0>)
train loss: tensor(11197.2090, grad_fn=<MseLossBackward0>)
train loss: tensor(5918.0122, grad_fn=<MseLossBackward0>)
train loss: tensor(9104.6309, grad_fn=<MseLossBackward0>)
train loss: tensor(38013.0781, grad_fn=<MseLossBackward0>)
train loss: tensor(7423.7314, grad_fn=<MseLossBackward0>)
train loss: tensor(13377.7900, grad_fn=<MseLossBackward0>)
train loss: tensor(7793.2334, grad_fn=<MseLossBackward0>)
train loss: tensor(10263.3857, grad_fn=<MseLossBackward0>)
train loss: tensor(17910.6719, grad_fn=<MseLossBackward0>)
train loss: tensor(3319.4141, grad_fn=<MseLossBackward0>)
train loss: tensor(5031.9438, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(5960.9771, grad_fn=<MseLossBackward0>)
train loss: tensor(8217.6436, grad_fn=<MseLossBackward0>)
train loss: tensor(9728.4297, grad_fn=<MseLossBackward0>)
train loss: tensor(9966.8965, grad_fn=<MseLossBackward0>)
train loss: tensor(2143.0842, grad_fn=<MseLossBackward0>)
train loss: tensor(8957.5264, grad_fn=<MseLossBackward0>)
train loss: tensor(5654.3276, grad_fn=<MseLossBackward0>)
train loss: tensor(6927.7324, grad_fn=<MseLossBackward0>)
train loss: tensor(9795.0811, grad_fn=<MseLossBackward0>)
train loss: tensor(5057.0820, grad_fn=<MseLossBackward0>)
train loss: tensor(12160.3359, grad_fn=<MseLossBackward0>)
train loss: tensor(3827.3245, grad_fn=<MseLossBackward0>)
train loss: tensor(8022.6953, grad_fn=<MseLossBackward0>)
train loss: tensor(5576.4771, grad_fn=<MseLossBackward0>)
train loss: tensor(41937.9219, grad_fn=<MseLossBackward0>)
train loss: tensor(5403.4355, grad_fn=<MseLossBackward0>)
train loss: tensor(10904.2744, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(10565.8271, grad_fn=<MseLossBackward0>)
train loss: tensor(7685.8882, grad_fn=<MseLossBackward0>)
train loss: tensor(6202.2856, grad_fn=<MseLossBackward0>)
train loss: tensor(13338.1348, grad_fn=<MseLossBackward0>)
train loss: tensor(11977.6484, grad_fn=<MseLossBackward0>)
train loss: tensor(5434.2148, grad_fn=<MseLossBackward0>)
train loss: tensor(17039.1992, grad_fn=<MseLossBackward0>)
train loss: tensor(5497.6353, grad_fn=<MseLossBackward0>)
train loss: tensor(7729.3052, grad_fn=<MseLossBackward0>)
train loss: tensor(6704.4097, grad_fn=<MseLossBackward0>)
train loss: tensor(3431.6050, grad_fn=<MseLossBackward0>)
train loss: tensor(4085.6672, grad_fn=<MseLossBackward0>)
train loss: tensor(5610.6211, grad_fn=<MseLossBackward0>)
train loss: tensor(5024.0381, grad_fn=<MseLossBackward0>)
train loss: tensor(14096.8350, grad_fn=<MseLossBackward0>)
train loss: tensor(12203.4619, grad_fn=<MseLossBackward0>)
train loss: tensor(4339.7129, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(10988.6084, grad_fn=<MseLossBackward0>)
train loss: tensor(8822.0156, grad_fn=<MseLossBackward0>)
train loss: tensor(8389.9199, grad_fn=<MseLossBackward0>)
train loss: tensor(10207.4111, grad_fn=<MseLossBackward0>)
train loss: tensor(7056.2256, grad_fn=<MseLossBackward0>)
train loss: tensor(8648.7783, grad_fn=<MseLossBackward0>)
train loss: tensor(7067.9287, grad_fn=<MseLossBackward0>)
train loss: tensor(7260.3516, grad_fn=<MseLossBackward0>)
train loss: tensor(3863.7920, grad_fn=<MseLossBackward0>)
train loss: tensor(9850.1650, grad_fn=<MseLossBackward0>)
train loss: tensor(12886.3945, grad_fn=<MseLossBackward0>)
train loss: tensor(12832.7891, grad_fn=<MseLossBackward0>)
train loss: tensor(6282.2812, grad_fn=<MseLossBackward0>)
train loss: tensor(8898.5518, grad_fn=<MseLossBackward0>)
train loss: tensor(2802.3391, grad_fn=<MseLossBackward0>)
train loss: tensor(14013.6006, grad_fn=<MseLossBackward0>)
train loss: tensor(4922.2246, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(10081.7656, grad_fn=<MseLossBackward0>)
train loss: tensor(10548.0352, grad_fn=<MseLossBackward0>)
train loss: tensor(7037.7466, grad_fn=<MseLossBackward0>)
train loss: tensor(3704.5779, grad_fn=<MseLossBackward0>)
train loss: tensor(12773.1719, grad_fn=<MseLossBackward0>)
train loss: tensor(18376.5762, grad_fn=<MseLossBackward0>)
train loss: tensor(9341.5781, grad_fn=<MseLossBackward0>)
train loss: tensor(5935.7202, grad_fn=<MseLossBackward0>)
train loss: tensor(5841.4146, grad_fn=<MseLossBackward0>)
train loss: tensor(8887.5293, grad_fn=<MseLossBackward0>)
train loss: tensor(15117.0391, grad_fn=<MseLossBackward0>)
train loss: tensor(6066.5107, grad_fn=<MseLossBackward0>)
train loss: tensor(7983.1631, grad_fn=<MseLossBackward0>)
train loss: tensor(1832.6661, grad_fn=<MseLossBackward0>)
train loss: tensor(5868.4160, grad_fn=<MseLossBackward0>)
train loss: tensor(18708.4473, grad_fn=<MseLossBackward0>)
train loss: tensor(9866.5273, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(5808.4033, grad_fn=<MseLossBackward0>)
train loss: tensor(6090.9727, grad_fn=<MseLossBackward0>)
train loss: tensor(8712.5410, grad_fn=<MseLossBackward0>)
train loss: tensor(14911.7812, grad_fn=<MseLossBackward0>)
train loss: tensor(16462.4453, grad_fn=<MseLossBackward0>)
train loss: tensor(7156.1235, grad_fn=<MseLossBackward0>)
train loss: tensor(10022.1777, grad_fn=<MseLossBackward0>)
train loss: tensor(2641.2109, grad_fn=<MseLossBackward0>)
train loss: tensor(5435.7119, grad_fn=<MseLossBackward0>)
train loss: tensor(61882.2812, grad_fn=<MseLossBackward0>)
train loss: tensor(16652.0273, grad_fn=<MseLossBackward0>)
train loss: tensor(19470.4629, grad_fn=<MseLossBackward0>)
train loss: tensor(17498.2090, grad_fn=<MseLossBackward0>)
train loss: tensor(9328.2275, grad_fn=<MseLossBackward0>)
train loss: tensor(6432.7314, grad_fn=<MseLossBackward0>)
train loss: tensor(5458.2056, grad_fn=<MseLossBackward0>)
train loss: tensor(18285.8691, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(8116.4741, grad_fn=<MseLossBackward0>)
train loss: tensor(9180.1338, grad_fn=<MseLossBackward0>)
train loss: tensor(8921.1123, grad_fn=<MseLossBackward0>)
train loss: tensor(4479.5156, grad_fn=<MseLossBackward0>)
train loss: tensor(7483.5103, grad_fn=<MseLossBackward0>)
train loss: tensor(8665.9971, grad_fn=<MseLossBackward0>)
train loss: tensor(15544.1982, grad_fn=<MseLossBackward0>)
train loss: tensor(8726.9678, grad_fn=<MseLossBackward0>)
train loss: tensor(11254.0264, grad_fn=<MseLossBackward0>)
train loss: tensor(5274.5879, grad_fn=<MseLossBackward0>)
train loss: tensor(10209.5107, grad_fn=<MseLossBackward0>)
train loss: tensor(19309.0996, grad_fn=<MseLossBackward0>)
train loss: tensor(14392.8105, grad_fn=<MseLossBackward0>)
train loss: tensor(7087.6992, grad_fn=<MseLossBackward0>)
train loss: tensor(14339.8779, grad_fn=<MseLossBackward0>)
train loss: tensor(10779.6172, grad_fn=<MseLossBackward0>)
train loss: tensor(2576.0103, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(4347.1211, grad_fn=<MseLossBackward0>)
train loss: tensor(2411.9966, grad_fn=<MseLossBackward0>)
train loss: tensor(12928.0586, grad_fn=<MseLossBackward0>)
train loss: tensor(9986.3232, grad_fn=<MseLossBackward0>)
train loss: tensor(13004.6562, grad_fn=<MseLossBackward0>)
train loss: tensor(4382.1411, grad_fn=<MseLossBackward0>)
train loss: tensor(11300.8125, grad_fn=<MseLossBackward0>)
train loss: tensor(11747.9111, grad_fn=<MseLossBackward0>)
train loss: tensor(5161.4526, grad_fn=<MseLossBackward0>)
train loss: tensor(9546.9736, grad_fn=<MseLossBackward0>)
train loss: tensor(4042.3181, grad_fn=<MseLossBackward0>)
train loss: tensor(14986.2490, grad_fn=<MseLossBackward0>)
train loss: tensor(1543.0798, grad_fn=<MseLossBackward0>)
train loss: tensor(5524.2227, grad_fn=<MseLossBackward0>)
train loss: tensor(9436.2256, grad_fn=<MseLossBackward0>)
train loss: tensor(3550.0442, grad_fn=<MseLossBackward0>)
train loss: tensor(4543.5635, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(11865.5684, grad_fn=<MseLossBackward0>)
train loss: tensor(4946.3843, grad_fn=<MseLossBackward0>)
train loss: tensor(6594.0605, grad_fn=<MseLossBackward0>)
train loss: tensor(9576.6895, grad_fn=<MseLossBackward0>)
train loss: tensor(10528.4521, grad_fn=<MseLossBackward0>)
train loss: tensor(9630.4746, grad_fn=<MseLossBackward0>)
train loss: tensor(7943.1108, grad_fn=<MseLossBackward0>)
train loss: tensor(8956.2227, grad_fn=<MseLossBackward0>)
train loss: tensor(18173.6699, grad_fn=<MseLossBackward0>)
train loss: tensor(9262.0195, grad_fn=<MseLossBackward0>)
train loss: tensor(12097.6641, grad_fn=<MseLossBackward0>)
train loss: tensor(13749.5469, grad_fn=<MseLossBackward0>)
train loss: tensor(2196.2046, grad_fn=<MseLossBackward0>)
train loss: tensor(4776.0469, grad_fn=<MseLossBackward0>)
train loss: tensor(3351.0413, grad_fn=<MseLossBackward0>)
train loss: tensor(4854.1982, grad_fn=<MseLossBackward0>)
train loss: tensor(8300.0244, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(4107.4995, grad_fn=<MseLossBackward0>)
train loss: tensor(10278.6680, grad_fn=<MseLossBackward0>)
train loss: tensor(13836.2422, grad_fn=<MseLossBackward0>)
train loss: tensor(4639.7549, grad_fn=<MseLossBackward0>)
train loss: tensor(4215.7822, grad_fn=<MseLossBackward0>)
train loss: tensor(6743.7881, grad_fn=<MseLossBackward0>)
train loss: tensor(5725.5835, grad_fn=<MseLossBackward0>)
train loss: tensor(12567.9277, grad_fn=<MseLossBackward0>)
train loss: tensor(4105.7505, grad_fn=<MseLossBackward0>)
train loss: tensor(4251.1455, grad_fn=<MseLossBackward0>)
train loss: tensor(9914.4004, grad_fn=<MseLossBackward0>)
train loss: tensor(10617.7734, grad_fn=<MseLossBackward0>)
train loss: tensor(17298.4805, grad_fn=<MseLossBackward0>)
train loss: tensor(3847.5747, grad_fn=<MseLossBackward0>)
train loss: tensor(7639.3613, grad_fn=<MseLossBackward0>)
train loss: tensor(8239.8369, grad_fn=<MseLossBackward0>)
train loss: tensor(4829.2461, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(7117.4473, grad_fn=<MseLossBackward0>)
train loss: tensor(9603.7461, grad_fn=<MseLossBackward0>)
train loss: tensor(8452.8672, grad_fn=<MseLossBackward0>)
train loss: tensor(12121.6211, grad_fn=<MseLossBackward0>)
train loss: tensor(9356.7354, grad_fn=<MseLossBackward0>)
train loss: tensor(77668.3672, grad_fn=<MseLossBackward0>)
train loss: tensor(2829.9976, grad_fn=<MseLossBackward0>)
train loss: tensor(6281.1436, grad_fn=<MseLossBackward0>)
train loss: tensor(2358.9702, grad_fn=<MseLossBackward0>)
train loss: tensor(7266.0259, grad_fn=<MseLossBackward0>)
train loss: tensor(17336.7637, grad_fn=<MseLossBackward0>)
train loss: tensor(8201.9902, grad_fn=<MseLossBackward0>)
train loss: tensor(12088.4697, grad_fn=<MseLossBackward0>)
train loss: tensor(5301.5439, grad_fn=<MseLossBackward0>)
train loss: tensor(3497.0791, grad_fn=<MseLossBackward0>)
train loss: tensor(16375.0488, grad_fn=<MseLossBackward0>)
train loss: tensor(9531.6572, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(6149.0420, grad_fn=<MseLossBackward0>)
train loss: tensor(13653.0244, grad_fn=<MseLossBackward0>)
train loss: tensor(12916.4072, grad_fn=<MseLossBackward0>)
train loss: tensor(8060.2393, grad_fn=<MseLossBackward0>)
train loss: tensor(6594.3818, grad_fn=<MseLossBackward0>)
train loss: tensor(5006.9292, grad_fn=<MseLossBackward0>)
train loss: tensor(48450.1562, grad_fn=<MseLossBackward0>)
train loss: tensor(8388.2627, grad_fn=<MseLossBackward0>)
train loss: tensor(12605.2744, grad_fn=<MseLossBackward0>)
train loss: tensor(8624.5791, grad_fn=<MseLossBackward0>)
train loss: tensor(6502.5112, grad_fn=<MseLossBackward0>)
train loss: tensor(15297.5889, grad_fn=<MseLossBackward0>)
train loss: tensor(9425.4492, grad_fn=<MseLossBackward0>)
train loss: tensor(6189.7876, grad_fn=<MseLossBackward0>)
train loss: tensor(4547.0918, grad_fn=<MseLossBackward0>)
train loss: tensor(9822.7695, grad_fn=<MseLossBackward0>)
train loss: tensor(11731.1211, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(3764.6313, grad_fn=<MseLossBackward0>)
train loss: tensor(7967.9619, grad_fn=<MseLossBackward0>)
train loss: tensor(10549.4033, grad_fn=<MseLossBackward0>)
train loss: tensor(8505.1436, grad_fn=<MseLossBackward0>)
train loss: tensor(18022.0645, grad_fn=<MseLossBackward0>)
train loss: tensor(11032.8076, grad_fn=<MseLossBackward0>)
train loss: tensor(22129.2793, grad_fn=<MseLossBackward0>)
train loss: tensor(9101.7578, grad_fn=<MseLossBackward0>)
train loss: tensor(1364.9149, grad_fn=<MseLossBackward0>)
train loss: tensor(21858.2441, grad_fn=<MseLossBackward0>)
train loss: tensor(9899.8945, grad_fn=<MseLossBackward0>)
train loss: tensor(3981.4329, grad_fn=<MseLossBackward0>)
train loss: tensor(10940.5322, grad_fn=<MseLossBackward0>)
train loss: tensor(7085.5479, grad_fn=<MseLossBackward0>)
train loss: tensor(10940.6641, grad_fn=<MseLossBackward0>)
train loss: tensor(3624.7920, grad_fn=<MseLossBackward0>)
train loss: tensor(5501.1680, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(19959.5742, grad_fn=<MseLossBackward0>)
train loss: tensor(6157.4829, grad_fn=<MseLossBackward0>)
train loss: tensor(7665.0791, grad_fn=<MseLossBackward0>)
train loss: tensor(3283.2786, grad_fn=<MseLossBackward0>)
train loss: tensor(6821.2861, grad_fn=<MseLossBackward0>)
train loss: tensor(10111.9414, grad_fn=<MseLossBackward0>)
train loss: tensor(7491.2964, grad_fn=<MseLossBackward0>)
train loss: tensor(4343.9810, grad_fn=<MseLossBackward0>)
train loss: tensor(4840.5459, grad_fn=<MseLossBackward0>)
train loss: tensor(5284.1855, grad_fn=<MseLossBackward0>)
train loss: tensor(3740.5259, grad_fn=<MseLossBackward0>)
train loss: tensor(7491.5840, grad_fn=<MseLossBackward0>)
train loss: tensor(6140.8838, grad_fn=<MseLossBackward0>)
train loss: tensor(8444.1982, grad_fn=<MseLossBackward0>)
train loss: tensor(4198.8716, grad_fn=<MseLossBackward0>)
train loss: tensor(10528.1855, grad_fn=<MseLossBackward0>)
train loss: tensor(6905.0151, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(13261.5264, grad_fn=<MseLossBackward0>)
train loss: tensor(13210.4873, grad_fn=<MseLossBackward0>)
train loss: tensor(10412.3066, grad_fn=<MseLossBackward0>)
train loss: tensor(12282.9062, grad_fn=<MseLossBackward0>)
train loss: tensor(5920.0679, grad_fn=<MseLossBackward0>)
train loss: tensor(4443.9795, grad_fn=<MseLossBackward0>)
train loss: tensor(7977.8105, grad_fn=<MseLossBackward0>)
train loss: tensor(12109.0508, grad_fn=<MseLossBackward0>)
train loss: tensor(12576.9375, grad_fn=<MseLossBackward0>)
train loss: tensor(6034.3755, grad_fn=<MseLossBackward0>)
train loss: tensor(3700.6543, grad_fn=<MseLossBackward0>)
train loss: tensor(4077.8782, grad_fn=<MseLossBackward0>)
train loss: tensor(11554.4512, grad_fn=<MseLossBackward0>)
train loss: tensor(41649.7500, grad_fn=<MseLossBackward0>)
train loss: tensor(5742.1758, grad_fn=<MseLossBackward0>)
train loss: tensor(15469.3584, grad_fn=<MseLossBackward0>)
train loss: tensor(7196.9497, grad_fn=<MseLossBackward0>)
train

train loss: tensor(4842.5674, grad_fn=<MseLossBackward0>)
train loss: tensor(10431.8643, grad_fn=<MseLossBackward0>)
train loss: tensor(18828.7656, grad_fn=<MseLossBackward0>)
train loss: tensor(11466.9775, grad_fn=<MseLossBackward0>)
train loss: tensor(7274.0356, grad_fn=<MseLossBackward0>)
train loss: tensor(17282.9805, grad_fn=<MseLossBackward0>)
train loss: tensor(16222.5762, grad_fn=<MseLossBackward0>)
train loss: tensor(12727.2988, grad_fn=<MseLossBackward0>)
train loss: tensor(18902.9062, grad_fn=<MseLossBackward0>)
train loss: tensor(6666.4390, grad_fn=<MseLossBackward0>)
train loss: tensor(10254.7002, grad_fn=<MseLossBackward0>)
train loss: tensor(18537.6953, grad_fn=<MseLossBackward0>)
train loss: tensor(7491.0508, grad_fn=<MseLossBackward0>)
train loss: tensor(7231.6475, grad_fn=<MseLossBackward0>)
train loss: tensor(8273.8984, grad_fn=<MseLossBackward0>)
train loss: tensor(8097.3130, grad_fn=<MseLossBackward0>)
train loss: tensor(5495.1772, grad_fn=<MseLossBackward0>)
train

train loss: tensor(13724.9482, grad_fn=<MseLossBackward0>)
train loss: tensor(12428.0010, grad_fn=<MseLossBackward0>)
train loss: tensor(16506.2363, grad_fn=<MseLossBackward0>)
train loss: tensor(12693.2578, grad_fn=<MseLossBackward0>)
train loss: tensor(10371.6318, grad_fn=<MseLossBackward0>)
train loss: tensor(8257.3408, grad_fn=<MseLossBackward0>)
train loss: tensor(7683.3032, grad_fn=<MseLossBackward0>)
train loss: tensor(1961.4011, grad_fn=<MseLossBackward0>)
train loss: tensor(8210.6357, grad_fn=<MseLossBackward0>)
train loss: tensor(10362.3438, grad_fn=<MseLossBackward0>)
train loss: tensor(11347.8994, grad_fn=<MseLossBackward0>)
train loss: tensor(8113.3506, grad_fn=<MseLossBackward0>)
train loss: tensor(13712.6455, grad_fn=<MseLossBackward0>)
train loss: tensor(6914.9902, grad_fn=<MseLossBackward0>)
train loss: tensor(7714.5044, grad_fn=<MseLossBackward0>)
train loss: tensor(3860.8110, grad_fn=<MseLossBackward0>)
train loss: tensor(11965.3027, grad_fn=<MseLossBackward0>)
train

train loss: tensor(3863.2935, grad_fn=<MseLossBackward0>)
train loss: tensor(16599.4023, grad_fn=<MseLossBackward0>)
train loss: tensor(5462.2808, grad_fn=<MseLossBackward0>)
train loss: tensor(12857.4971, grad_fn=<MseLossBackward0>)
train loss: tensor(11602.2969, grad_fn=<MseLossBackward0>)
train loss: tensor(9595.7109, grad_fn=<MseLossBackward0>)
train loss: tensor(7792.7920, grad_fn=<MseLossBackward0>)
train loss: tensor(20244.4160, grad_fn=<MseLossBackward0>)
train loss: tensor(10146.4062, grad_fn=<MseLossBackward0>)
train loss: tensor(11800.2734, grad_fn=<MseLossBackward0>)
train loss: tensor(3659.3118, grad_fn=<MseLossBackward0>)
train loss: tensor(8497.7578, grad_fn=<MseLossBackward0>)
train loss: tensor(14688.1172, grad_fn=<MseLossBackward0>)
train loss: tensor(8429.0576, grad_fn=<MseLossBackward0>)
train loss: tensor(5400.1846, grad_fn=<MseLossBackward0>)
train loss: tensor(9106.1230, grad_fn=<MseLossBackward0>)
train loss: tensor(2510.1929, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(4832.9604, grad_fn=<MseLossBackward0>)
train loss: tensor(10487.3984, grad_fn=<MseLossBackward0>)
train loss: tensor(10182.6221, grad_fn=<MseLossBackward0>)
train loss: tensor(12398.8096, grad_fn=<MseLossBackward0>)
train loss: tensor(18115.3223, grad_fn=<MseLossBackward0>)
train loss: tensor(9542.5215, grad_fn=<MseLossBackward0>)
train loss: tensor(7437.0928, grad_fn=<MseLossBackward0>)
train loss: tensor(11446.1523, grad_fn=<MseLossBackward0>)
train loss: tensor(6059.2539, grad_fn=<MseLossBackward0>)
train loss: tensor(9133.6162, grad_fn=<MseLossBackward0>)
train loss: tensor(12193.1172, grad_fn=<MseLossBackward0>)
train loss: tensor(8159.0527, grad_fn=<MseLossBackward0>)
train loss: tensor(6420.9136, grad_fn=<MseLossBackward0>)
train loss: tensor(10828.7217, grad_fn=<MseLossBackward0>)
train loss: tensor(4826.8188, grad_fn=<MseLossBackward0>)
train loss: tensor(11373.6182, grad_fn=<MseLossBackward0>)
train loss: tensor(5016.1284, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(10716.7529, grad_fn=<MseLossBackward0>)
train loss: tensor(7730.8638, grad_fn=<MseLossBackward0>)
train loss: tensor(5147.4790, grad_fn=<MseLossBackward0>)
train loss: tensor(6285.2710, grad_fn=<MseLossBackward0>)
train loss: tensor(1901.0269, grad_fn=<MseLossBackward0>)
train loss: tensor(8440.0889, grad_fn=<MseLossBackward0>)
train loss: tensor(4891.6646, grad_fn=<MseLossBackward0>)
train loss: tensor(8325.5957, grad_fn=<MseLossBackward0>)
train loss: tensor(11778.7705, grad_fn=<MseLossBackward0>)
train loss: tensor(7632.5571, grad_fn=<MseLossBackward0>)
train loss: tensor(2257.3132, grad_fn=<MseLossBackward0>)
train loss: tensor(5583.1016, grad_fn=<MseLossBackward0>)
train loss: tensor(12201.0127, grad_fn=<MseLossBackward0>)
train loss: tensor(78557.1719, grad_fn=<MseLossBackward0>)
train loss: tensor(13515.0166, grad_fn=<MseLossBackward0>)
train loss: tensor(10900.0088, grad_fn=<MseLossBackward0>)
train loss: tensor(7339.6475, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(13814.8223, grad_fn=<MseLossBackward0>)
train loss: tensor(3167.2302, grad_fn=<MseLossBackward0>)
train loss: tensor(2548.9243, grad_fn=<MseLossBackward0>)
train loss: tensor(60833.7500, grad_fn=<MseLossBackward0>)
train loss: tensor(6117.2856, grad_fn=<MseLossBackward0>)
train loss: tensor(8913.2598, grad_fn=<MseLossBackward0>)
train loss: tensor(15968.3486, grad_fn=<MseLossBackward0>)
train loss: tensor(19490.1777, grad_fn=<MseLossBackward0>)
train loss: tensor(5184.2876, grad_fn=<MseLossBackward0>)
train loss: tensor(13626.2852, grad_fn=<MseLossBackward0>)
train loss: tensor(6738.7793, grad_fn=<MseLossBackward0>)
train loss: tensor(12088.5635, grad_fn=<MseLossBackward0>)
train loss: tensor(5843.2231, grad_fn=<MseLossBackward0>)
train loss: tensor(2864.0945, grad_fn=<MseLossBackward0>)
train loss: tensor(19416.1270, grad_fn=<MseLossBackward0>)
train loss: tensor(14633.3164, grad_fn=<MseLossBackward0>)
train loss: tensor(4687.7388, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(6505.7051, grad_fn=<MseLossBackward0>)
train loss: tensor(3485.7346, grad_fn=<MseLossBackward0>)
train loss: tensor(5680.9287, grad_fn=<MseLossBackward0>)
train loss: tensor(4404.7627, grad_fn=<MseLossBackward0>)
train loss: tensor(7239.3379, grad_fn=<MseLossBackward0>)
train loss: tensor(18139.8184, grad_fn=<MseLossBackward0>)
train loss: tensor(14086.3828, grad_fn=<MseLossBackward0>)
train loss: tensor(10505.7822, grad_fn=<MseLossBackward0>)
train loss: tensor(4710.4395, grad_fn=<MseLossBackward0>)
train loss: tensor(7918.4956, grad_fn=<MseLossBackward0>)
train loss: tensor(6968.8066, grad_fn=<MseLossBackward0>)
train loss: tensor(10736.3789, grad_fn=<MseLossBackward0>)
train loss: tensor(10501.2695, grad_fn=<MseLossBackward0>)
train loss: tensor(7390.2158, grad_fn=<MseLossBackward0>)
train loss: tensor(13937.7480, grad_fn=<MseLossBackward0>)
train loss: tensor(19880., grad_fn=<MseLossBackward0>)
train loss: tensor(3158.8984, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(4983.7339, grad_fn=<MseLossBackward0>)
train loss: tensor(10616.8955, grad_fn=<MseLossBackward0>)
train loss: tensor(10536.5225, grad_fn=<MseLossBackward0>)
train loss: tensor(6553.9131, grad_fn=<MseLossBackward0>)
train loss: tensor(1892.2496, grad_fn=<MseLossBackward0>)
train loss: tensor(79052.2266, grad_fn=<MseLossBackward0>)
train loss: tensor(8523.0303, grad_fn=<MseLossBackward0>)
train loss: tensor(3958.0859, grad_fn=<MseLossBackward0>)
train loss: tensor(8166.5396, grad_fn=<MseLossBackward0>)
train loss: tensor(6263.3936, grad_fn=<MseLossBackward0>)
train loss: tensor(8758.3574, grad_fn=<MseLossBackward0>)
train loss: tensor(5060.2051, grad_fn=<MseLossBackward0>)
train loss: tensor(2866.7393, grad_fn=<MseLossBackward0>)
train loss: tensor(8664.0859, grad_fn=<MseLossBackward0>)
train loss: tensor(4626.6812, grad_fn=<MseLossBackward0>)
train loss: tensor(32875.6094, grad_fn=<MseLossBackward0>)
train loss: tensor(7011.3926, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(11741.8740, grad_fn=<MseLossBackward0>)
train loss: tensor(6324.0386, grad_fn=<MseLossBackward0>)
train loss: tensor(3220.2507, grad_fn=<MseLossBackward0>)
train loss: tensor(7116.2729, grad_fn=<MseLossBackward0>)
train loss: tensor(55787.1250, grad_fn=<MseLossBackward0>)
train loss: tensor(9554.1025, grad_fn=<MseLossBackward0>)
train loss: tensor(3959.0325, grad_fn=<MseLossBackward0>)
train loss: tensor(12037.9004, grad_fn=<MseLossBackward0>)
train loss: tensor(19744.7676, grad_fn=<MseLossBackward0>)
train loss: tensor(12701.5137, grad_fn=<MseLossBackward0>)
train loss: tensor(4994.9395, grad_fn=<MseLossBackward0>)
train loss: tensor(2726.2493, grad_fn=<MseLossBackward0>)
train loss: tensor(43013.6836, grad_fn=<MseLossBackward0>)
train loss: tensor(9992.7871, grad_fn=<MseLossBackward0>)
train loss: tensor(4374.1611, grad_fn=<MseLossBackward0>)
train loss: tensor(11074.0869, grad_fn=<MseLossBackward0>)
train loss: tensor(13426.3965, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(10003.1182, grad_fn=<MseLossBackward0>)
train loss: tensor(3390.7825, grad_fn=<MseLossBackward0>)
train loss: tensor(2366.8821, grad_fn=<MseLossBackward0>)
train loss: tensor(7623.5962, grad_fn=<MseLossBackward0>)
train loss: tensor(10376.6113, grad_fn=<MseLossBackward0>)
train loss: tensor(6598.8740, grad_fn=<MseLossBackward0>)
train loss: tensor(5861.5278, grad_fn=<MseLossBackward0>)
train loss: tensor(16487.2344, grad_fn=<MseLossBackward0>)
train loss: tensor(7651.7314, grad_fn=<MseLossBackward0>)
train loss: tensor(16727.5645, grad_fn=<MseLossBackward0>)
train loss: tensor(8137.2695, grad_fn=<MseLossBackward0>)
train loss: tensor(9194.7783, grad_fn=<MseLossBackward0>)
train loss: tensor(6528.8257, grad_fn=<MseLossBackward0>)
train loss: tensor(9951.3574, grad_fn=<MseLossBackward0>)
train loss: tensor(12915.3916, grad_fn=<MseLossBackward0>)
train loss: tensor(9799.2871, grad_fn=<MseLossBackward0>)
train loss: tensor(11327.5332, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(11697.1123, grad_fn=<MseLossBackward0>)
train loss: tensor(18719.9062, grad_fn=<MseLossBackward0>)
train loss: tensor(12037.2695, grad_fn=<MseLossBackward0>)
train loss: tensor(10036.1709, grad_fn=<MseLossBackward0>)
train loss: tensor(3990.6355, grad_fn=<MseLossBackward0>)
train loss: tensor(9320.3467, grad_fn=<MseLossBackward0>)
train loss: tensor(4236.6465, grad_fn=<MseLossBackward0>)
train loss: tensor(8053.6802, grad_fn=<MseLossBackward0>)
train loss: tensor(6943.6240, grad_fn=<MseLossBackward0>)
train loss: tensor(56232.1211, grad_fn=<MseLossBackward0>)
train loss: tensor(4447.8252, grad_fn=<MseLossBackward0>)
train loss: tensor(10300.2334, grad_fn=<MseLossBackward0>)
train loss: tensor(9712.0996, grad_fn=<MseLossBackward0>)
train loss: tensor(73669.8750, grad_fn=<MseLossBackward0>)
train loss: tensor(8149.7725, grad_fn=<MseLossBackward0>)
train loss: tensor(3507.0525, grad_fn=<MseLossBackward0>)
train loss: tensor(6696.5386, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(16699.4121, grad_fn=<MseLossBackward0>)
train loss: tensor(2290.6494, grad_fn=<MseLossBackward0>)
train loss: tensor(5263.9795, grad_fn=<MseLossBackward0>)
train loss: tensor(3945.2417, grad_fn=<MseLossBackward0>)
train loss: tensor(8329.0303, grad_fn=<MseLossBackward0>)
train loss: tensor(3359.4531, grad_fn=<MseLossBackward0>)
train loss: tensor(10021.9922, grad_fn=<MseLossBackward0>)
train loss: tensor(5723.2329, grad_fn=<MseLossBackward0>)
train loss: tensor(10473.9980, grad_fn=<MseLossBackward0>)
train loss: tensor(18614.7266, grad_fn=<MseLossBackward0>)
train loss: tensor(6478.0757, grad_fn=<MseLossBackward0>)
train loss: tensor(15545.7275, grad_fn=<MseLossBackward0>)
train loss: tensor(8109.2114, grad_fn=<MseLossBackward0>)
train loss: tensor(14757.3838, grad_fn=<MseLossBackward0>)
train loss: tensor(14687.1396, grad_fn=<MseLossBackward0>)
train loss: tensor(11208.9004, grad_fn=<MseLossBackward0>)
train loss: tensor(5740.3252, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(10206.7344, grad_fn=<MseLossBackward0>)
train loss: tensor(10616.6406, grad_fn=<MseLossBackward0>)
train loss: tensor(8701.0449, grad_fn=<MseLossBackward0>)
train loss: tensor(12499.8311, grad_fn=<MseLossBackward0>)
train loss: tensor(20416.9863, grad_fn=<MseLossBackward0>)
train loss: tensor(3613.6677, grad_fn=<MseLossBackward0>)
train loss: tensor(6288.6616, grad_fn=<MseLossBackward0>)
train loss: tensor(35044.1445, grad_fn=<MseLossBackward0>)
train loss: tensor(15975.0928, grad_fn=<MseLossBackward0>)
train loss: tensor(9956.1064, grad_fn=<MseLossBackward0>)
train loss: tensor(5561.3843, grad_fn=<MseLossBackward0>)
train loss: tensor(10513.1611, grad_fn=<MseLossBackward0>)
train loss: tensor(12562.2822, grad_fn=<MseLossBackward0>)
train loss: tensor(5785.2583, grad_fn=<MseLossBackward0>)
train loss: tensor(2965.8384, grad_fn=<MseLossBackward0>)
train loss: tensor(6643.9404, grad_fn=<MseLossBackward0>)
train loss: tensor(6487.0737, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(4187.8130, grad_fn=<MseLossBackward0>)
train loss: tensor(12411.9111, grad_fn=<MseLossBackward0>)
train loss: tensor(15826.2842, grad_fn=<MseLossBackward0>)
train loss: tensor(9494.5010, grad_fn=<MseLossBackward0>)
train loss: tensor(2126.4414, grad_fn=<MseLossBackward0>)
train loss: tensor(15272.1016, grad_fn=<MseLossBackward0>)
train loss: tensor(10639.5117, grad_fn=<MseLossBackward0>)
train loss: tensor(15200.2891, grad_fn=<MseLossBackward0>)
train loss: tensor(14296.6270, grad_fn=<MseLossBackward0>)
train loss: tensor(12888.3936, grad_fn=<MseLossBackward0>)
train loss: tensor(8061.0293, grad_fn=<MseLossBackward0>)
train loss: tensor(17288.0098, grad_fn=<MseLossBackward0>)
train loss: tensor(6546.9312, grad_fn=<MseLossBackward0>)
train loss: tensor(71101.4375, grad_fn=<MseLossBackward0>)
train loss: tensor(11990.0752, grad_fn=<MseLossBackward0>)
train loss: tensor(16002.7402, grad_fn=<MseLossBackward0>)
train loss: tensor(12036.1162, grad_fn=<MseLossBackward0>)
tr

train loss: tensor(7852.1616, grad_fn=<MseLossBackward0>)
train loss: tensor(6948.1118, grad_fn=<MseLossBackward0>)
train loss: tensor(10771.5420, grad_fn=<MseLossBackward0>)
train loss: tensor(3457.9326, grad_fn=<MseLossBackward0>)
train loss: tensor(5086.2959, grad_fn=<MseLossBackward0>)
train loss: tensor(17202.8496, grad_fn=<MseLossBackward0>)
train loss: tensor(7119.2354, grad_fn=<MseLossBackward0>)
train loss: tensor(1222.5107, grad_fn=<MseLossBackward0>)
train loss: tensor(11355.2422, grad_fn=<MseLossBackward0>)
train loss: tensor(4647.4326, grad_fn=<MseLossBackward0>)
train loss: tensor(6711.3901, grad_fn=<MseLossBackward0>)
train loss: tensor(5025.6489, grad_fn=<MseLossBackward0>)
train loss: tensor(12462.4541, grad_fn=<MseLossBackward0>)
train loss: tensor(5155.0278, grad_fn=<MseLossBackward0>)
train loss: tensor(2185.0696, grad_fn=<MseLossBackward0>)
train loss: tensor(11503.6631, grad_fn=<MseLossBackward0>)
train loss: tensor(5764.2773, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(2656.4968, grad_fn=<MseLossBackward0>)
train loss: tensor(9764.3447, grad_fn=<MseLossBackward0>)
train loss: tensor(16984.0898, grad_fn=<MseLossBackward0>)
train loss: tensor(95492.1250, grad_fn=<MseLossBackward0>)
train loss: tensor(13524.4502, grad_fn=<MseLossBackward0>)
train loss: tensor(8805.4180, grad_fn=<MseLossBackward0>)
train loss: tensor(6930.5728, grad_fn=<MseLossBackward0>)
train loss: tensor(4074.4094, grad_fn=<MseLossBackward0>)
train loss: tensor(9255.4590, grad_fn=<MseLossBackward0>)
train loss: tensor(9518.2275, grad_fn=<MseLossBackward0>)
train loss: tensor(6458.8320, grad_fn=<MseLossBackward0>)
train loss: tensor(11849.9043, grad_fn=<MseLossBackward0>)
train loss: tensor(19108.0312, grad_fn=<MseLossBackward0>)
train loss: tensor(3950.2009, grad_fn=<MseLossBackward0>)
train loss: tensor(11676.5488, grad_fn=<MseLossBackward0>)
train loss: tensor(13460.3945, grad_fn=<MseLossBackward0>)
train loss: tensor(4638.3501, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(6613.9712, grad_fn=<MseLossBackward0>)
train loss: tensor(16382.8613, grad_fn=<MseLossBackward0>)
train loss: tensor(3672.2131, grad_fn=<MseLossBackward0>)
train loss: tensor(9023.3047, grad_fn=<MseLossBackward0>)
train loss: tensor(6104.7642, grad_fn=<MseLossBackward0>)
train loss: tensor(6105.3647, grad_fn=<MseLossBackward0>)
train loss: tensor(11299.5420, grad_fn=<MseLossBackward0>)
train loss: tensor(5707.5337, grad_fn=<MseLossBackward0>)
train loss: tensor(12870.1934, grad_fn=<MseLossBackward0>)
train loss: tensor(7298.5215, grad_fn=<MseLossBackward0>)
train loss: tensor(6528.7495, grad_fn=<MseLossBackward0>)
train loss: tensor(4002.5427, grad_fn=<MseLossBackward0>)
train loss: tensor(10375.6777, grad_fn=<MseLossBackward0>)
train loss: tensor(8712.9004, grad_fn=<MseLossBackward0>)
train loss: tensor(8503.8330, grad_fn=<MseLossBackward0>)
train loss: tensor(3394.8022, grad_fn=<MseLossBackward0>)
train loss: tensor(3169.1162, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(11378.2734, grad_fn=<MseLossBackward0>)
train loss: tensor(8479.2891, grad_fn=<MseLossBackward0>)
train loss: tensor(10566.9365, grad_fn=<MseLossBackward0>)
train loss: tensor(12110.8398, grad_fn=<MseLossBackward0>)
train loss: tensor(4143.3433, grad_fn=<MseLossBackward0>)
train loss: tensor(5249.7495, grad_fn=<MseLossBackward0>)
train loss: tensor(98289.1094, grad_fn=<MseLossBackward0>)
train loss: tensor(8630.7432, grad_fn=<MseLossBackward0>)
train loss: tensor(2601.1709, grad_fn=<MseLossBackward0>)
train loss: tensor(14776.3418, grad_fn=<MseLossBackward0>)
train loss: tensor(4414.2930, grad_fn=<MseLossBackward0>)
train loss: tensor(18172.7402, grad_fn=<MseLossBackward0>)
train loss: tensor(2032.2875, grad_fn=<MseLossBackward0>)
train loss: tensor(11365.2021, grad_fn=<MseLossBackward0>)
train loss: tensor(18703.2832, grad_fn=<MseLossBackward0>)
train loss: tensor(16952.0645, grad_fn=<MseLossBackward0>)
train loss: tensor(3393.5728, grad_fn=<MseLossBackward0>)
train

train loss: tensor(7753.0757, grad_fn=<MseLossBackward0>)
train loss: tensor(4279.4902, grad_fn=<MseLossBackward0>)
train loss: tensor(8063.6772, grad_fn=<MseLossBackward0>)
train loss: tensor(1408.1414, grad_fn=<MseLossBackward0>)
train loss: tensor(3578.5710, grad_fn=<MseLossBackward0>)
train loss: tensor(11874.2676, grad_fn=<MseLossBackward0>)
train loss: tensor(10254.8945, grad_fn=<MseLossBackward0>)
train loss: tensor(16668.2598, grad_fn=<MseLossBackward0>)
train loss: tensor(10195.9492, grad_fn=<MseLossBackward0>)
train loss: tensor(63232.3477, grad_fn=<MseLossBackward0>)
train loss: tensor(12595.4023, grad_fn=<MseLossBackward0>)
train loss: tensor(1343.9095, grad_fn=<MseLossBackward0>)
train loss: tensor(8159.2041, grad_fn=<MseLossBackward0>)
train loss: tensor(3336.3857, grad_fn=<MseLossBackward0>)
train loss: tensor(13440.5615, grad_fn=<MseLossBackward0>)
train loss: tensor(4580.8521, grad_fn=<MseLossBackward0>)
train loss: tensor(13728.6475, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(3424.1711, grad_fn=<MseLossBackward0>)
train loss: tensor(15778.9512, grad_fn=<MseLossBackward0>)
train loss: tensor(4610.4194, grad_fn=<MseLossBackward0>)
train loss: tensor(15676.2900, grad_fn=<MseLossBackward0>)
train loss: tensor(3742.9275, grad_fn=<MseLossBackward0>)
train loss: tensor(6043.7158, grad_fn=<MseLossBackward0>)
train loss: tensor(8391.5488, grad_fn=<MseLossBackward0>)
train loss: tensor(2937.5566, grad_fn=<MseLossBackward0>)
train loss: tensor(10436.1240, grad_fn=<MseLossBackward0>)
train loss: tensor(6742.1992, grad_fn=<MseLossBackward0>)
train loss: tensor(8481.7988, grad_fn=<MseLossBackward0>)
train loss: tensor(11720.7227, grad_fn=<MseLossBackward0>)
train loss: tensor(6423.4951, grad_fn=<MseLossBackward0>)
train loss: tensor(6199.2900, grad_fn=<MseLossBackward0>)
train loss: tensor(13139.8281, grad_fn=<MseLossBackward0>)
train loss: tensor(5205.2759, grad_fn=<MseLossBackward0>)
train loss: tensor(15234.7324, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(8460.3496, grad_fn=<MseLossBackward0>)
train loss: tensor(6920.6147, grad_fn=<MseLossBackward0>)
train loss: tensor(5077.4160, grad_fn=<MseLossBackward0>)
train loss: tensor(5761.5625, grad_fn=<MseLossBackward0>)
train loss: tensor(16677.2812, grad_fn=<MseLossBackward0>)
train loss: tensor(3524.5400, grad_fn=<MseLossBackward0>)
train loss: tensor(10671.8867, grad_fn=<MseLossBackward0>)
train loss: tensor(6621.1245, grad_fn=<MseLossBackward0>)
train loss: tensor(9336.0088, grad_fn=<MseLossBackward0>)
train loss: tensor(3337.8447, grad_fn=<MseLossBackward0>)
train loss: tensor(5925.9976, grad_fn=<MseLossBackward0>)
train loss: tensor(2061.2671, grad_fn=<MseLossBackward0>)
train loss: tensor(10723.6777, grad_fn=<MseLossBackward0>)
train loss: tensor(7595.0059, grad_fn=<MseLossBackward0>)
train loss: tensor(2192.8767, grad_fn=<MseLossBackward0>)
train loss: tensor(81138.6328, grad_fn=<MseLossBackward0>)
train loss: tensor(6683.7661, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(4200.0537, grad_fn=<MseLossBackward0>)
train loss: tensor(5131.3062, grad_fn=<MseLossBackward0>)
train loss: tensor(14567.6289, grad_fn=<MseLossBackward0>)
train loss: tensor(10567.6904, grad_fn=<MseLossBackward0>)
train loss: tensor(14335.6270, grad_fn=<MseLossBackward0>)
train loss: tensor(7920.3711, grad_fn=<MseLossBackward0>)
train loss: tensor(7017.2397, grad_fn=<MseLossBackward0>)
train loss: tensor(5824.6538, grad_fn=<MseLossBackward0>)
train loss: tensor(3327.8074, grad_fn=<MseLossBackward0>)
train loss: tensor(6942.9038, grad_fn=<MseLossBackward0>)
train loss: tensor(3637.8340, grad_fn=<MseLossBackward0>)
train loss: tensor(13247.0840, grad_fn=<MseLossBackward0>)
train loss: tensor(17014.5781, grad_fn=<MseLossBackward0>)
train loss: tensor(4498.0889, grad_fn=<MseLossBackward0>)
train loss: tensor(13650.1045, grad_fn=<MseLossBackward0>)
train loss: tensor(11811.3027, grad_fn=<MseLossBackward0>)
train loss: tensor(6947.3491, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(6392.3594, grad_fn=<MseLossBackward0>)
train loss: tensor(12070.8867, grad_fn=<MseLossBackward0>)
train loss: tensor(8914.5264, grad_fn=<MseLossBackward0>)
train loss: tensor(4579.2676, grad_fn=<MseLossBackward0>)
train loss: tensor(15240.9443, grad_fn=<MseLossBackward0>)
train loss: tensor(12681.0098, grad_fn=<MseLossBackward0>)
train loss: tensor(7676.6548, grad_fn=<MseLossBackward0>)
train loss: tensor(13019.6484, grad_fn=<MseLossBackward0>)
train loss: tensor(6003.6611, grad_fn=<MseLossBackward0>)
train loss: tensor(3744.3237, grad_fn=<MseLossBackward0>)
train loss: tensor(2385.9592, grad_fn=<MseLossBackward0>)
train loss: tensor(10941.8398, grad_fn=<MseLossBackward0>)
train loss: tensor(4832.5288, grad_fn=<MseLossBackward0>)
train loss: tensor(3727.2178, grad_fn=<MseLossBackward0>)
train loss: tensor(8545.5371, grad_fn=<MseLossBackward0>)
train loss: tensor(5075.3828, grad_fn=<MseLossBackward0>)
train loss: tensor(9997.6309, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(12684.9990, grad_fn=<MseLossBackward0>)
train loss: tensor(12261.0010, grad_fn=<MseLossBackward0>)
train loss: tensor(11529.9648, grad_fn=<MseLossBackward0>)
train loss: tensor(10570.0410, grad_fn=<MseLossBackward0>)
train loss: tensor(7401.5586, grad_fn=<MseLossBackward0>)
train loss: tensor(6136.0308, grad_fn=<MseLossBackward0>)
train loss: tensor(4275.3975, grad_fn=<MseLossBackward0>)
train loss: tensor(5317.0356, grad_fn=<MseLossBackward0>)
train loss: tensor(15303.5508, grad_fn=<MseLossBackward0>)
train loss: tensor(4204.9346, grad_fn=<MseLossBackward0>)
train loss: tensor(1257.1155, grad_fn=<MseLossBackward0>)
train loss: tensor(5080.1592, grad_fn=<MseLossBackward0>)
train loss: tensor(11924.2090, grad_fn=<MseLossBackward0>)
train loss: tensor(4912.3052, grad_fn=<MseLossBackward0>)
train loss: tensor(14238.6768, grad_fn=<MseLossBackward0>)
train loss: tensor(4211.5737, grad_fn=<MseLossBackward0>)
train loss: tensor(11183.9365, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(20015.8164, grad_fn=<MseLossBackward0>)
train loss: tensor(11392.2529, grad_fn=<MseLossBackward0>)
train loss: tensor(11600.5879, grad_fn=<MseLossBackward0>)
train loss: tensor(17371.2422, grad_fn=<MseLossBackward0>)
train loss: tensor(3240.3081, grad_fn=<MseLossBackward0>)
train loss: tensor(9013.3291, grad_fn=<MseLossBackward0>)
train loss: tensor(6951.6235, grad_fn=<MseLossBackward0>)
train loss: tensor(3947.9968, grad_fn=<MseLossBackward0>)
train loss: tensor(35998.3672, grad_fn=<MseLossBackward0>)
train loss: tensor(5527.8003, grad_fn=<MseLossBackward0>)
train loss: tensor(12834.5166, grad_fn=<MseLossBackward0>)
train loss: tensor(3701.1323, grad_fn=<MseLossBackward0>)
train loss: tensor(12662.1553, grad_fn=<MseLossBackward0>)
train loss: tensor(10642.9932, grad_fn=<MseLossBackward0>)
train loss: tensor(13894.0049, grad_fn=<MseLossBackward0>)
train loss: tensor(7307.7666, grad_fn=<MseLossBackward0>)
train loss: tensor(7067.9419, grad_fn=<MseLossBackward0>)
train

train loss: tensor(7886.8765, grad_fn=<MseLossBackward0>)
train loss: tensor(17630.0312, grad_fn=<MseLossBackward0>)
train loss: tensor(10010.6865, grad_fn=<MseLossBackward0>)
train loss: tensor(2062.3188, grad_fn=<MseLossBackward0>)
train loss: tensor(19879.9297, grad_fn=<MseLossBackward0>)
train loss: tensor(10128.5000, grad_fn=<MseLossBackward0>)
train loss: tensor(12306.6309, grad_fn=<MseLossBackward0>)
train loss: tensor(8958.0352, grad_fn=<MseLossBackward0>)
train loss: tensor(1676.6729, grad_fn=<MseLossBackward0>)
train loss: tensor(16071.3359, grad_fn=<MseLossBackward0>)
train loss: tensor(6812.3516, grad_fn=<MseLossBackward0>)
train loss: tensor(1946.0057, grad_fn=<MseLossBackward0>)
train loss: tensor(4761.3179, grad_fn=<MseLossBackward0>)
train loss: tensor(7149.3975, grad_fn=<MseLossBackward0>)
train loss: tensor(12488.6973, grad_fn=<MseLossBackward0>)
train loss: tensor(3315.0042, grad_fn=<MseLossBackward0>)
train loss: tensor(45999.5078, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(4200.0444, grad_fn=<MseLossBackward0>)
train loss: tensor(8167.6743, grad_fn=<MseLossBackward0>)
train loss: tensor(9526.8076, grad_fn=<MseLossBackward0>)
train loss: tensor(8398.3779, grad_fn=<MseLossBackward0>)
train loss: tensor(14339.6592, grad_fn=<MseLossBackward0>)
train loss: tensor(8425.9883, grad_fn=<MseLossBackward0>)
train loss: tensor(7320.9707, grad_fn=<MseLossBackward0>)
train loss: tensor(6610.1997, grad_fn=<MseLossBackward0>)
train loss: tensor(4964.9141, grad_fn=<MseLossBackward0>)
train loss: tensor(3404.4717, grad_fn=<MseLossBackward0>)
train loss: tensor(9254.4980, grad_fn=<MseLossBackward0>)
train loss: tensor(6720.4434, grad_fn=<MseLossBackward0>)
train loss: tensor(6635.0337, grad_fn=<MseLossBackward0>)
train loss: tensor(16432.9766, grad_fn=<MseLossBackward0>)
train loss: tensor(5259.2749, grad_fn=<MseLossBackward0>)
train loss: tensor(5044.9937, grad_fn=<MseLossBackward0>)
train loss: tensor(8265.9541, grad_fn=<MseLossBackward0>)
train loss: 

----------------------------- epoch 0 eval loss 11620.138671875 ------------------------------
train loss: tensor(4110.1035, grad_fn=<MseLossBackward0>)
train loss: tensor(6549.0210, grad_fn=<MseLossBackward0>)
train loss: tensor(7862.2949, grad_fn=<MseLossBackward0>)
train loss: tensor(9338.1680, grad_fn=<MseLossBackward0>)
train loss: tensor(5030.8213, grad_fn=<MseLossBackward0>)
train loss: tensor(5978.6997, grad_fn=<MseLossBackward0>)
train loss: tensor(13228.0176, grad_fn=<MseLossBackward0>)
train loss: tensor(3447.9202, grad_fn=<MseLossBackward0>)
train loss: tensor(3643.0642, grad_fn=<MseLossBackward0>)
train loss: tensor(7676.5161, grad_fn=<MseLossBackward0>)
train loss: tensor(16106.1777, grad_fn=<MseLossBackward0>)
train loss: tensor(4213.8740, grad_fn=<MseLossBackward0>)
train loss: tensor(5774.9365, grad_fn=<MseLossBackward0>)
train loss: tensor(15780.8262, grad_fn=<MseLossBackward0>)
train loss: tensor(8970.9277, grad_fn=<MseLossBackward0>)
train loss: tensor(15653.1016, g

train loss: tensor(5873.1934, grad_fn=<MseLossBackward0>)
train loss: tensor(10358.4834, grad_fn=<MseLossBackward0>)
train loss: tensor(108292.8828, grad_fn=<MseLossBackward0>)
train loss: tensor(3019.4277, grad_fn=<MseLossBackward0>)
train loss: tensor(6372.7256, grad_fn=<MseLossBackward0>)
train loss: tensor(10687.1602, grad_fn=<MseLossBackward0>)
train loss: tensor(12154.3516, grad_fn=<MseLossBackward0>)
train loss: tensor(3636.9404, grad_fn=<MseLossBackward0>)
train loss: tensor(15037.5537, grad_fn=<MseLossBackward0>)
train loss: tensor(5019.3472, grad_fn=<MseLossBackward0>)
train loss: tensor(5938.0596, grad_fn=<MseLossBackward0>)
train loss: tensor(79432.4297, grad_fn=<MseLossBackward0>)
train loss: tensor(2478.2109, grad_fn=<MseLossBackward0>)
train loss: tensor(3045.3794, grad_fn=<MseLossBackward0>)
train loss: tensor(16974.8320, grad_fn=<MseLossBackward0>)
train loss: tensor(8189.1118, grad_fn=<MseLossBackward0>)
train loss: tensor(6121.3159, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(11894.2842, grad_fn=<MseLossBackward0>)
train loss: tensor(4633.5757, grad_fn=<MseLossBackward0>)
train loss: tensor(3290.7083, grad_fn=<MseLossBackward0>)
train loss: tensor(18120.3770, grad_fn=<MseLossBackward0>)
train loss: tensor(2962.2119, grad_fn=<MseLossBackward0>)
train loss: tensor(8364.6855, grad_fn=<MseLossBackward0>)
train loss: tensor(11409.0723, grad_fn=<MseLossBackward0>)
train loss: tensor(7771.5420, grad_fn=<MseLossBackward0>)
train loss: tensor(14255.7393, grad_fn=<MseLossBackward0>)
train loss: tensor(4482.7690, grad_fn=<MseLossBackward0>)
train loss: tensor(7611.3472, grad_fn=<MseLossBackward0>)
train loss: tensor(2064.4993, grad_fn=<MseLossBackward0>)
train loss: tensor(7689.6616, grad_fn=<MseLossBackward0>)
train loss: tensor(4067.9368, grad_fn=<MseLossBackward0>)
train loss: tensor(79959.8750, grad_fn=<MseLossBackward0>)
train loss: tensor(6600.7588, grad_fn=<MseLossBackward0>)
train loss: tensor(15211.0381, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(8424.9883, grad_fn=<MseLossBackward0>)
train loss: tensor(8990.6631, grad_fn=<MseLossBackward0>)
train loss: tensor(4091.2488, grad_fn=<MseLossBackward0>)
train loss: tensor(5046.3306, grad_fn=<MseLossBackward0>)
train loss: tensor(6551.1348, grad_fn=<MseLossBackward0>)
train loss: tensor(2745.4653, grad_fn=<MseLossBackward0>)
train loss: tensor(1253.2517, grad_fn=<MseLossBackward0>)
train loss: tensor(8114.4062, grad_fn=<MseLossBackward0>)
train loss: tensor(9409.7490, grad_fn=<MseLossBackward0>)
train loss: tensor(6866.2822, grad_fn=<MseLossBackward0>)
train loss: tensor(3020.4678, grad_fn=<MseLossBackward0>)
train loss: tensor(11019.2031, grad_fn=<MseLossBackward0>)
train loss: tensor(10291.6455, grad_fn=<MseLossBackward0>)
train loss: tensor(11967.5195, grad_fn=<MseLossBackward0>)
train loss: tensor(8958.1924, grad_fn=<MseLossBackward0>)
train loss: tensor(9533.7578, grad_fn=<MseLossBackward0>)
train loss: tensor(11586.8750, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(4494.4932, grad_fn=<MseLossBackward0>)
train loss: tensor(11870.4463, grad_fn=<MseLossBackward0>)
train loss: tensor(5018.5830, grad_fn=<MseLossBackward0>)
train loss: tensor(6457.4556, grad_fn=<MseLossBackward0>)
train loss: tensor(4885.5703, grad_fn=<MseLossBackward0>)
train loss: tensor(2944.3848, grad_fn=<MseLossBackward0>)
train loss: tensor(4306.9004, grad_fn=<MseLossBackward0>)
train loss: tensor(4495.4888, grad_fn=<MseLossBackward0>)
train loss: tensor(10853.8066, grad_fn=<MseLossBackward0>)
train loss: tensor(12413.0635, grad_fn=<MseLossBackward0>)
train loss: tensor(4810.0410, grad_fn=<MseLossBackward0>)
train loss: tensor(1446.8447, grad_fn=<MseLossBackward0>)
train loss: tensor(6893.5986, grad_fn=<MseLossBackward0>)
train loss: tensor(8807.2646, grad_fn=<MseLossBackward0>)
train loss: tensor(10790.8369, grad_fn=<MseLossBackward0>)
train loss: tensor(13999.8145, grad_fn=<MseLossBackward0>)
train loss: tensor(10097.9990, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(5661.2524, grad_fn=<MseLossBackward0>)
train loss: tensor(18548.5137, grad_fn=<MseLossBackward0>)
train loss: tensor(7024.8989, grad_fn=<MseLossBackward0>)
train loss: tensor(7557.9272, grad_fn=<MseLossBackward0>)
train loss: tensor(5744.2988, grad_fn=<MseLossBackward0>)
train loss: tensor(16826.7578, grad_fn=<MseLossBackward0>)
train loss: tensor(7045.1221, grad_fn=<MseLossBackward0>)
train loss: tensor(10072.2285, grad_fn=<MseLossBackward0>)
train loss: tensor(8641.2695, grad_fn=<MseLossBackward0>)
train loss: tensor(10286.2061, grad_fn=<MseLossBackward0>)
train loss: tensor(16267.4736, grad_fn=<MseLossBackward0>)
train loss: tensor(10140.6416, grad_fn=<MseLossBackward0>)
train loss: tensor(14831.0635, grad_fn=<MseLossBackward0>)
train loss: tensor(14687.0703, grad_fn=<MseLossBackward0>)
train loss: tensor(3361.4268, grad_fn=<MseLossBackward0>)
train loss: tensor(8308.6250, grad_fn=<MseLossBackward0>)
train loss: tensor(814.8734, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(6782.4492, grad_fn=<MseLossBackward0>)
train loss: tensor(9717.9277, grad_fn=<MseLossBackward0>)
train loss: tensor(16059.8604, grad_fn=<MseLossBackward0>)
train loss: tensor(9868.8496, grad_fn=<MseLossBackward0>)
train loss: tensor(15550.2129, grad_fn=<MseLossBackward0>)
train loss: tensor(5801.3975, grad_fn=<MseLossBackward0>)
train loss: tensor(10147.9902, grad_fn=<MseLossBackward0>)
train loss: tensor(9384.8145, grad_fn=<MseLossBackward0>)
train loss: tensor(10698.8730, grad_fn=<MseLossBackward0>)
train loss: tensor(11914.4043, grad_fn=<MseLossBackward0>)
train loss: tensor(11890.6260, grad_fn=<MseLossBackward0>)
train loss: tensor(4120.6182, grad_fn=<MseLossBackward0>)
train loss: tensor(10983.5078, grad_fn=<MseLossBackward0>)
train loss: tensor(3270.4460, grad_fn=<MseLossBackward0>)
train loss: tensor(18588.3984, grad_fn=<MseLossBackward0>)
train loss: tensor(5147.1318, grad_fn=<MseLossBackward0>)
train loss: tensor(7812.9116, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(11733.7822, grad_fn=<MseLossBackward0>)
train loss: tensor(6426.4146, grad_fn=<MseLossBackward0>)
train loss: tensor(6755.5161, grad_fn=<MseLossBackward0>)
train loss: tensor(4892.2090, grad_fn=<MseLossBackward0>)
train loss: tensor(11231.1875, grad_fn=<MseLossBackward0>)
train loss: tensor(13092.8770, grad_fn=<MseLossBackward0>)
train loss: tensor(2966.9739, grad_fn=<MseLossBackward0>)
train loss: tensor(5085.4092, grad_fn=<MseLossBackward0>)
train loss: tensor(4928.0879, grad_fn=<MseLossBackward0>)
train loss: tensor(7807.4858, grad_fn=<MseLossBackward0>)
train loss: tensor(2871.8792, grad_fn=<MseLossBackward0>)
train loss: tensor(8624.5410, grad_fn=<MseLossBackward0>)
train loss: tensor(7384.7710, grad_fn=<MseLossBackward0>)
train loss: tensor(1551.1838, grad_fn=<MseLossBackward0>)
train loss: tensor(10529.3115, grad_fn=<MseLossBackward0>)
train loss: tensor(1454.1915, grad_fn=<MseLossBackward0>)
train loss: tensor(9766.4277, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(4931.2734, grad_fn=<MseLossBackward0>)
train loss: tensor(2906.8911, grad_fn=<MseLossBackward0>)
train loss: tensor(86112.8984, grad_fn=<MseLossBackward0>)
train loss: tensor(6246.5127, grad_fn=<MseLossBackward0>)
train loss: tensor(3497.6018, grad_fn=<MseLossBackward0>)
train loss: tensor(8013.0942, grad_fn=<MseLossBackward0>)
train loss: tensor(5132.9673, grad_fn=<MseLossBackward0>)
train loss: tensor(2888.6238, grad_fn=<MseLossBackward0>)
train loss: tensor(7763.5039, grad_fn=<MseLossBackward0>)
train loss: tensor(8566.6455, grad_fn=<MseLossBackward0>)
train loss: tensor(6262.2222, grad_fn=<MseLossBackward0>)
train loss: tensor(2470.7388, grad_fn=<MseLossBackward0>)
train loss: tensor(16105.8242, grad_fn=<MseLossBackward0>)
train loss: tensor(10173.7998, grad_fn=<MseLossBackward0>)
train loss: tensor(1734.2194, grad_fn=<MseLossBackward0>)
train loss: tensor(5180.4712, grad_fn=<MseLossBackward0>)
train loss: tensor(6435.7256, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(18011.9590, grad_fn=<MseLossBackward0>)
train loss: tensor(6673.6362, grad_fn=<MseLossBackward0>)
train loss: tensor(13486.1094, grad_fn=<MseLossBackward0>)
train loss: tensor(3397.4036, grad_fn=<MseLossBackward0>)
train loss: tensor(5147.1953, grad_fn=<MseLossBackward0>)
train loss: tensor(7086.4009, grad_fn=<MseLossBackward0>)
train loss: tensor(2793.4202, grad_fn=<MseLossBackward0>)
train loss: tensor(9592.3242, grad_fn=<MseLossBackward0>)
train loss: tensor(7637.9629, grad_fn=<MseLossBackward0>)
train loss: tensor(9284.6484, grad_fn=<MseLossBackward0>)
train loss: tensor(4345.7930, grad_fn=<MseLossBackward0>)
train loss: tensor(11405.8066, grad_fn=<MseLossBackward0>)
train loss: tensor(33206.5938, grad_fn=<MseLossBackward0>)
train loss: tensor(4316.2725, grad_fn=<MseLossBackward0>)
train loss: tensor(12105.2344, grad_fn=<MseLossBackward0>)
train loss: tensor(7762.8589, grad_fn=<MseLossBackward0>)
train loss: tensor(3149.7739, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(11728.6943, grad_fn=<MseLossBackward0>)
train loss: tensor(11979.6289, grad_fn=<MseLossBackward0>)
train loss: tensor(18756.1465, grad_fn=<MseLossBackward0>)
train loss: tensor(9091.6475, grad_fn=<MseLossBackward0>)
train loss: tensor(14188.4248, grad_fn=<MseLossBackward0>)
train loss: tensor(11817.3223, grad_fn=<MseLossBackward0>)
train loss: tensor(4697.5220, grad_fn=<MseLossBackward0>)
train loss: tensor(10165.7227, grad_fn=<MseLossBackward0>)
train loss: tensor(4818.2905, grad_fn=<MseLossBackward0>)
train loss: tensor(6944.1006, grad_fn=<MseLossBackward0>)
train loss: tensor(8082.5322, grad_fn=<MseLossBackward0>)
train loss: tensor(7324.7939, grad_fn=<MseLossBackward0>)
train loss: tensor(11956.5420, grad_fn=<MseLossBackward0>)
train loss: tensor(6572.4282, grad_fn=<MseLossBackward0>)
train loss: tensor(14132.7305, grad_fn=<MseLossBackward0>)
train loss: tensor(7553.5820, grad_fn=<MseLossBackward0>)
train loss: tensor(5951.0845, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(5007.4248, grad_fn=<MseLossBackward0>)
train loss: tensor(8977.3789, grad_fn=<MseLossBackward0>)
train loss: tensor(3634.5325, grad_fn=<MseLossBackward0>)
train loss: tensor(9285.6377, grad_fn=<MseLossBackward0>)
train loss: tensor(8078.9185, grad_fn=<MseLossBackward0>)
train loss: tensor(8226.8076, grad_fn=<MseLossBackward0>)
train loss: tensor(8045.9941, grad_fn=<MseLossBackward0>)
train loss: tensor(10157.1377, grad_fn=<MseLossBackward0>)
train loss: tensor(6040.7246, grad_fn=<MseLossBackward0>)
train loss: tensor(16453.7637, grad_fn=<MseLossBackward0>)
train loss: tensor(4720.5835, grad_fn=<MseLossBackward0>)
train loss: tensor(4727.0474, grad_fn=<MseLossBackward0>)
train loss: tensor(4378.7231, grad_fn=<MseLossBackward0>)
train loss: tensor(5164.1699, grad_fn=<MseLossBackward0>)
train loss: tensor(19050.3770, grad_fn=<MseLossBackward0>)
train loss: tensor(12047.7129, grad_fn=<MseLossBackward0>)
train loss: tensor(4205.6621, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(6521.1572, grad_fn=<MseLossBackward0>)
train loss: tensor(6310.0132, grad_fn=<MseLossBackward0>)
train loss: tensor(6393.1953, grad_fn=<MseLossBackward0>)
train loss: tensor(8899.0371, grad_fn=<MseLossBackward0>)
train loss: tensor(4144.3848, grad_fn=<MseLossBackward0>)
train loss: tensor(7983.0005, grad_fn=<MseLossBackward0>)
train loss: tensor(10181.7139, grad_fn=<MseLossBackward0>)
train loss: tensor(3927.3276, grad_fn=<MseLossBackward0>)
train loss: tensor(5767.1719, grad_fn=<MseLossBackward0>)
train loss: tensor(2633.7729, grad_fn=<MseLossBackward0>)
train loss: tensor(13059.3994, grad_fn=<MseLossBackward0>)
train loss: tensor(16568.6758, grad_fn=<MseLossBackward0>)
train loss: tensor(3356.9915, grad_fn=<MseLossBackward0>)
train loss: tensor(7181.2085, grad_fn=<MseLossBackward0>)
train loss: tensor(9983.1318, grad_fn=<MseLossBackward0>)
train loss: tensor(6470.1157, grad_fn=<MseLossBackward0>)
train loss: tensor(3883.6301, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(1784.5931, grad_fn=<MseLossBackward0>)
train loss: tensor(5179.0918, grad_fn=<MseLossBackward0>)
train loss: tensor(10115.8203, grad_fn=<MseLossBackward0>)
train loss: tensor(3504.5959, grad_fn=<MseLossBackward0>)
train loss: tensor(11774.6592, grad_fn=<MseLossBackward0>)
train loss: tensor(5634.4937, grad_fn=<MseLossBackward0>)
train loss: tensor(7697.0581, grad_fn=<MseLossBackward0>)
train loss: tensor(7223.4341, grad_fn=<MseLossBackward0>)
train loss: tensor(8558.1318, grad_fn=<MseLossBackward0>)
train loss: tensor(8229.7559, grad_fn=<MseLossBackward0>)
train loss: tensor(5503.2988, grad_fn=<MseLossBackward0>)
train loss: tensor(4002.9304, grad_fn=<MseLossBackward0>)
train loss: tensor(7106.5581, grad_fn=<MseLossBackward0>)
train loss: tensor(3055.8337, grad_fn=<MseLossBackward0>)
train loss: tensor(7794.4580, grad_fn=<MseLossBackward0>)
train loss: tensor(9733.6104, grad_fn=<MseLossBackward0>)
train loss: tensor(13731.4678, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(16632.7910, grad_fn=<MseLossBackward0>)
train loss: tensor(9472.3027, grad_fn=<MseLossBackward0>)
train loss: tensor(2613.1233, grad_fn=<MseLossBackward0>)
train loss: tensor(13653.9189, grad_fn=<MseLossBackward0>)
train loss: tensor(16060.7549, grad_fn=<MseLossBackward0>)
train loss: tensor(15459.5723, grad_fn=<MseLossBackward0>)
train loss: tensor(11715.6230, grad_fn=<MseLossBackward0>)
train loss: tensor(6574.9370, grad_fn=<MseLossBackward0>)
train loss: tensor(4769.8086, grad_fn=<MseLossBackward0>)
train loss: tensor(5636.1528, grad_fn=<MseLossBackward0>)
train loss: tensor(4620.9209, grad_fn=<MseLossBackward0>)
train loss: tensor(8197.9307, grad_fn=<MseLossBackward0>)
train loss: tensor(14325.1426, grad_fn=<MseLossBackward0>)
train loss: tensor(45192.5508, grad_fn=<MseLossBackward0>)
train loss: tensor(12345.4072, grad_fn=<MseLossBackward0>)
train loss: tensor(3547.4771, grad_fn=<MseLossBackward0>)
train loss: tensor(12724.5596, grad_fn=<MseLossBackward0>)
train

train loss: tensor(7190.6470, grad_fn=<MseLossBackward0>)
train loss: tensor(16192.6191, grad_fn=<MseLossBackward0>)
train loss: tensor(47398.2344, grad_fn=<MseLossBackward0>)
train loss: tensor(4416.0908, grad_fn=<MseLossBackward0>)
train loss: tensor(7107.8022, grad_fn=<MseLossBackward0>)
train loss: tensor(7928.5298, grad_fn=<MseLossBackward0>)
train loss: tensor(9538.6836, grad_fn=<MseLossBackward0>)
train loss: tensor(11411.0596, grad_fn=<MseLossBackward0>)
train loss: tensor(11966.5645, grad_fn=<MseLossBackward0>)
train loss: tensor(6686.6758, grad_fn=<MseLossBackward0>)
train loss: tensor(43425.5625, grad_fn=<MseLossBackward0>)
train loss: tensor(9844.7588, grad_fn=<MseLossBackward0>)
train loss: tensor(3344.9414, grad_fn=<MseLossBackward0>)
train loss: tensor(5467.1709, grad_fn=<MseLossBackward0>)
train loss: tensor(12762.2773, grad_fn=<MseLossBackward0>)
train loss: tensor(2313.6323, grad_fn=<MseLossBackward0>)
train loss: tensor(10467.4072, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(5757.8066, grad_fn=<MseLossBackward0>)
train loss: tensor(4034.4421, grad_fn=<MseLossBackward0>)
train loss: tensor(4161.0005, grad_fn=<MseLossBackward0>)
train loss: tensor(6224.3599, grad_fn=<MseLossBackward0>)
train loss: tensor(3592.0806, grad_fn=<MseLossBackward0>)
train loss: tensor(10936.6855, grad_fn=<MseLossBackward0>)
train loss: tensor(18199.6035, grad_fn=<MseLossBackward0>)
train loss: tensor(7792.3682, grad_fn=<MseLossBackward0>)
train loss: tensor(16442.4727, grad_fn=<MseLossBackward0>)
train loss: tensor(7105.3726, grad_fn=<MseLossBackward0>)
train loss: tensor(4700.5176, grad_fn=<MseLossBackward0>)
train loss: tensor(4642.3271, grad_fn=<MseLossBackward0>)
train loss: tensor(7057.3315, grad_fn=<MseLossBackward0>)
train loss: tensor(6505.5864, grad_fn=<MseLossBackward0>)
train loss: tensor(8671.0918, grad_fn=<MseLossBackward0>)
train loss: tensor(4794.7261, grad_fn=<MseLossBackward0>)
train loss: tensor(13516.0723, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(8428.1650, grad_fn=<MseLossBackward0>)
train loss: tensor(10447.3164, grad_fn=<MseLossBackward0>)
train loss: tensor(4252.7026, grad_fn=<MseLossBackward0>)
train loss: tensor(9163.2139, grad_fn=<MseLossBackward0>)
train loss: tensor(3294.8623, grad_fn=<MseLossBackward0>)
train loss: tensor(13566.2354, grad_fn=<MseLossBackward0>)
train loss: tensor(3041.3525, grad_fn=<MseLossBackward0>)
train loss: tensor(6764.2837, grad_fn=<MseLossBackward0>)
train loss: tensor(6229.8071, grad_fn=<MseLossBackward0>)
train loss: tensor(6802.1367, grad_fn=<MseLossBackward0>)
train loss: tensor(3254.4631, grad_fn=<MseLossBackward0>)
train loss: tensor(3034.0928, grad_fn=<MseLossBackward0>)
train loss: tensor(10611.9209, grad_fn=<MseLossBackward0>)
train loss: tensor(6888.3232, grad_fn=<MseLossBackward0>)
train loss: tensor(6347.4185, grad_fn=<MseLossBackward0>)
train loss: tensor(10895.9844, grad_fn=<MseLossBackward0>)
train loss: tensor(4859.1240, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(1724.1318, grad_fn=<MseLossBackward0>)
train loss: tensor(14536.4658, grad_fn=<MseLossBackward0>)
train loss: tensor(4499.7354, grad_fn=<MseLossBackward0>)
train loss: tensor(7516.8047, grad_fn=<MseLossBackward0>)
train loss: tensor(5871.2725, grad_fn=<MseLossBackward0>)
train loss: tensor(9896.1416, grad_fn=<MseLossBackward0>)
train loss: tensor(4615.0864, grad_fn=<MseLossBackward0>)
train loss: tensor(10199.8604, grad_fn=<MseLossBackward0>)
train loss: tensor(16718.6367, grad_fn=<MseLossBackward0>)
train loss: tensor(6909.6118, grad_fn=<MseLossBackward0>)
train loss: tensor(8011.1338, grad_fn=<MseLossBackward0>)
train loss: tensor(9431.1709, grad_fn=<MseLossBackward0>)
train loss: tensor(3315.2839, grad_fn=<MseLossBackward0>)
train loss: tensor(11464.2803, grad_fn=<MseLossBackward0>)
train loss: tensor(17419.9805, grad_fn=<MseLossBackward0>)
train loss: tensor(7921.9053, grad_fn=<MseLossBackward0>)
train loss: tensor(5748.5854, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(3553.2881, grad_fn=<MseLossBackward0>)
train loss: tensor(4038.5432, grad_fn=<MseLossBackward0>)
train loss: tensor(51654.4219, grad_fn=<MseLossBackward0>)
train loss: tensor(7440.5991, grad_fn=<MseLossBackward0>)
train loss: tensor(16808.8281, grad_fn=<MseLossBackward0>)
train loss: tensor(9906.6182, grad_fn=<MseLossBackward0>)
train loss: tensor(18457.0020, grad_fn=<MseLossBackward0>)
train loss: tensor(3425.4160, grad_fn=<MseLossBackward0>)
train loss: tensor(8637.9141, grad_fn=<MseLossBackward0>)
train loss: tensor(18564.5957, grad_fn=<MseLossBackward0>)
train loss: tensor(17823.7246, grad_fn=<MseLossBackward0>)
train loss: tensor(4204.0444, grad_fn=<MseLossBackward0>)
train loss: tensor(11443.0283, grad_fn=<MseLossBackward0>)
train loss: tensor(10385.2588, grad_fn=<MseLossBackward0>)
train loss: tensor(12273.5342, grad_fn=<MseLossBackward0>)
train loss: tensor(5394.6992, grad_fn=<MseLossBackward0>)
train loss: tensor(6427.8149, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(10903.9697, grad_fn=<MseLossBackward0>)
train loss: tensor(6797.7568, grad_fn=<MseLossBackward0>)
train loss: tensor(11746.3730, grad_fn=<MseLossBackward0>)
train loss: tensor(4627.5967, grad_fn=<MseLossBackward0>)
train loss: tensor(5383.1211, grad_fn=<MseLossBackward0>)
train loss: tensor(14295.7637, grad_fn=<MseLossBackward0>)
train loss: tensor(5760.4580, grad_fn=<MseLossBackward0>)
train loss: tensor(7727.8276, grad_fn=<MseLossBackward0>)
train loss: tensor(3080.9507, grad_fn=<MseLossBackward0>)
train loss: tensor(12088.1348, grad_fn=<MseLossBackward0>)
train loss: tensor(4561.7837, grad_fn=<MseLossBackward0>)
train loss: tensor(13139.7275, grad_fn=<MseLossBackward0>)
train loss: tensor(3551.1599, grad_fn=<MseLossBackward0>)
train loss: tensor(5968.1147, grad_fn=<MseLossBackward0>)
train loss: tensor(7082.4644, grad_fn=<MseLossBackward0>)
train loss: tensor(4340.6953, grad_fn=<MseLossBackward0>)
train loss: tensor(3790.5742, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(8110.1108, grad_fn=<MseLossBackward0>)
train loss: tensor(7470.8555, grad_fn=<MseLossBackward0>)
train loss: tensor(7611.2578, grad_fn=<MseLossBackward0>)
train loss: tensor(17682.6719, grad_fn=<MseLossBackward0>)
train loss: tensor(3304.5955, grad_fn=<MseLossBackward0>)
train loss: tensor(9072.7705, grad_fn=<MseLossBackward0>)
train loss: tensor(7264.1553, grad_fn=<MseLossBackward0>)
train loss: tensor(5189.2188, grad_fn=<MseLossBackward0>)
train loss: tensor(7560.4292, grad_fn=<MseLossBackward0>)
train loss: tensor(13986.2734, grad_fn=<MseLossBackward0>)
train loss: tensor(5496.4468, grad_fn=<MseLossBackward0>)
train loss: tensor(5447.2241, grad_fn=<MseLossBackward0>)
train loss: tensor(9539.7051, grad_fn=<MseLossBackward0>)
train loss: tensor(10399.3369, grad_fn=<MseLossBackward0>)
train loss: tensor(9687.0625, grad_fn=<MseLossBackward0>)
train loss: tensor(4602.6191, grad_fn=<MseLossBackward0>)
train loss: tensor(11801.5537, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(4282.2158, grad_fn=<MseLossBackward0>)
train loss: tensor(5068.6206, grad_fn=<MseLossBackward0>)
train loss: tensor(5404.8306, grad_fn=<MseLossBackward0>)
train loss: tensor(4209.1655, grad_fn=<MseLossBackward0>)
train loss: tensor(7126.4072, grad_fn=<MseLossBackward0>)
train loss: tensor(11428.5957, grad_fn=<MseLossBackward0>)
train loss: tensor(10696.0557, grad_fn=<MseLossBackward0>)
train loss: tensor(4852.7104, grad_fn=<MseLossBackward0>)
train loss: tensor(4839.7368, grad_fn=<MseLossBackward0>)
train loss: tensor(16752.1035, grad_fn=<MseLossBackward0>)
train loss: tensor(16809.3594, grad_fn=<MseLossBackward0>)
train loss: tensor(14364.7939, grad_fn=<MseLossBackward0>)
train loss: tensor(7718.6875, grad_fn=<MseLossBackward0>)
train loss: tensor(11913.3789, grad_fn=<MseLossBackward0>)
train loss: tensor(10517.6250, grad_fn=<MseLossBackward0>)
train loss: tensor(58110.2070, grad_fn=<MseLossBackward0>)
train loss: tensor(5685.9248, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(9383.4229, grad_fn=<MseLossBackward0>)
train loss: tensor(23345.6035, grad_fn=<MseLossBackward0>)
train loss: tensor(16126.7012, grad_fn=<MseLossBackward0>)
train loss: tensor(7576.0854, grad_fn=<MseLossBackward0>)
train loss: tensor(4001.2480, grad_fn=<MseLossBackward0>)
train loss: tensor(13003.4570, grad_fn=<MseLossBackward0>)
train loss: tensor(2856.3362, grad_fn=<MseLossBackward0>)
train loss: tensor(5790.2583, grad_fn=<MseLossBackward0>)
train loss: tensor(5978.7920, grad_fn=<MseLossBackward0>)
train loss: tensor(8157.8389, grad_fn=<MseLossBackward0>)
train loss: tensor(3086.2812, grad_fn=<MseLossBackward0>)
train loss: tensor(14424.9023, grad_fn=<MseLossBackward0>)
train loss: tensor(3427.3245, grad_fn=<MseLossBackward0>)
train loss: tensor(4008.2771, grad_fn=<MseLossBackward0>)
train loss: tensor(7305.0229, grad_fn=<MseLossBackward0>)
train loss: tensor(6867.5312, grad_fn=<MseLossBackward0>)
train loss: tensor(4105.0522, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(9966.6816, grad_fn=<MseLossBackward0>)
train loss: tensor(7769.0522, grad_fn=<MseLossBackward0>)
train loss: tensor(12226.8906, grad_fn=<MseLossBackward0>)
train loss: tensor(8006.5791, grad_fn=<MseLossBackward0>)
train loss: tensor(13081.9795, grad_fn=<MseLossBackward0>)
train loss: tensor(12901.9648, grad_fn=<MseLossBackward0>)
train loss: tensor(10107.7168, grad_fn=<MseLossBackward0>)
train loss: tensor(4177.1274, grad_fn=<MseLossBackward0>)
train loss: tensor(3057.0874, grad_fn=<MseLossBackward0>)
train loss: tensor(9678.7959, grad_fn=<MseLossBackward0>)
train loss: tensor(3225.2017, grad_fn=<MseLossBackward0>)
train loss: tensor(6231.0024, grad_fn=<MseLossBackward0>)
train loss: tensor(6187.2104, grad_fn=<MseLossBackward0>)
train loss: tensor(2142.6536, grad_fn=<MseLossBackward0>)
train loss: tensor(3612.6147, grad_fn=<MseLossBackward0>)
train loss: tensor(14751.7686, grad_fn=<MseLossBackward0>)
train loss: tensor(9705.0391, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(32330.4648, grad_fn=<MseLossBackward0>)
train loss: tensor(6312.5112, grad_fn=<MseLossBackward0>)
train loss: tensor(3442.1438, grad_fn=<MseLossBackward0>)
train loss: tensor(3841.2314, grad_fn=<MseLossBackward0>)
train loss: tensor(2625.9644, grad_fn=<MseLossBackward0>)
train loss: tensor(10394.1035, grad_fn=<MseLossBackward0>)
train loss: tensor(13879.7891, grad_fn=<MseLossBackward0>)
train loss: tensor(7782.2334, grad_fn=<MseLossBackward0>)
train loss: tensor(9626.2324, grad_fn=<MseLossBackward0>)
train loss: tensor(11481.9297, grad_fn=<MseLossBackward0>)
train loss: tensor(4701.4595, grad_fn=<MseLossBackward0>)
train loss: tensor(6195.8423, grad_fn=<MseLossBackward0>)
train loss: tensor(7734.8906, grad_fn=<MseLossBackward0>)
train loss: tensor(14425.8193, grad_fn=<MseLossBackward0>)
train loss: tensor(2948.0105, grad_fn=<MseLossBackward0>)
train loss: tensor(3721.3013, grad_fn=<MseLossBackward0>)
train loss: tensor(6422.3296, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(5994.5415, grad_fn=<MseLossBackward0>)
train loss: tensor(7902.1431, grad_fn=<MseLossBackward0>)
train loss: tensor(8393.8418, grad_fn=<MseLossBackward0>)
train loss: tensor(6631.7212, grad_fn=<MseLossBackward0>)
train loss: tensor(12359.9199, grad_fn=<MseLossBackward0>)
train loss: tensor(3708.6458, grad_fn=<MseLossBackward0>)
train loss: tensor(11035.8945, grad_fn=<MseLossBackward0>)
train loss: tensor(6604.4932, grad_fn=<MseLossBackward0>)
train loss: tensor(78589.1484, grad_fn=<MseLossBackward0>)
train loss: tensor(12256.2275, grad_fn=<MseLossBackward0>)
train loss: tensor(6133.0591, grad_fn=<MseLossBackward0>)
train loss: tensor(9467.6104, grad_fn=<MseLossBackward0>)
train loss: tensor(6991.1577, grad_fn=<MseLossBackward0>)
train loss: tensor(9489.3574, grad_fn=<MseLossBackward0>)
train loss: tensor(5975.8398, grad_fn=<MseLossBackward0>)
train loss: tensor(5667.4204, grad_fn=<MseLossBackward0>)
train loss: tensor(12833.2354, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(3377.4507, grad_fn=<MseLossBackward0>)
train loss: tensor(21080.8477, grad_fn=<MseLossBackward0>)
train loss: tensor(4282.1938, grad_fn=<MseLossBackward0>)
train loss: tensor(7499.0361, grad_fn=<MseLossBackward0>)
train loss: tensor(1986.6206, grad_fn=<MseLossBackward0>)
train loss: tensor(7480.0884, grad_fn=<MseLossBackward0>)
train loss: tensor(3540.1436, grad_fn=<MseLossBackward0>)
train loss: tensor(5738.7671, grad_fn=<MseLossBackward0>)
train loss: tensor(5567.3018, grad_fn=<MseLossBackward0>)
train loss: tensor(4023.8560, grad_fn=<MseLossBackward0>)
train loss: tensor(5218.2485, grad_fn=<MseLossBackward0>)
train loss: tensor(7673.3423, grad_fn=<MseLossBackward0>)
train loss: tensor(15956.6729, grad_fn=<MseLossBackward0>)
train loss: tensor(6180.4902, grad_fn=<MseLossBackward0>)
train loss: tensor(3738.5125, grad_fn=<MseLossBackward0>)
train loss: tensor(12336.2061, grad_fn=<MseLossBackward0>)
train loss: tensor(5095.4175, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(5035.2554, grad_fn=<MseLossBackward0>)
train loss: tensor(12782.6318, grad_fn=<MseLossBackward0>)
train loss: tensor(3405.2612, grad_fn=<MseLossBackward0>)
train loss: tensor(5046.0439, grad_fn=<MseLossBackward0>)
train loss: tensor(4979.1147, grad_fn=<MseLossBackward0>)
train loss: tensor(15475.2891, grad_fn=<MseLossBackward0>)
train loss: tensor(9037.2119, grad_fn=<MseLossBackward0>)
train loss: tensor(3525.4834, grad_fn=<MseLossBackward0>)
train loss: tensor(8282.4102, grad_fn=<MseLossBackward0>)
train loss: tensor(8866.3350, grad_fn=<MseLossBackward0>)
train loss: tensor(13746.4463, grad_fn=<MseLossBackward0>)
train loss: tensor(7789.7988, grad_fn=<MseLossBackward0>)
train loss: tensor(2112.7969, grad_fn=<MseLossBackward0>)
train loss: tensor(13353.4238, grad_fn=<MseLossBackward0>)
train loss: tensor(1430.4854, grad_fn=<MseLossBackward0>)
train loss: tensor(6574.2183, grad_fn=<MseLossBackward0>)
train loss: tensor(6901.7324, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(15881.6855, grad_fn=<MseLossBackward0>)
train loss: tensor(10995.4736, grad_fn=<MseLossBackward0>)
train loss: tensor(4039.0474, grad_fn=<MseLossBackward0>)
train loss: tensor(4566.9663, grad_fn=<MseLossBackward0>)
train loss: tensor(18642.5098, grad_fn=<MseLossBackward0>)
train loss: tensor(7984.5693, grad_fn=<MseLossBackward0>)
train loss: tensor(8894.4795, grad_fn=<MseLossBackward0>)
train loss: tensor(9962.9941, grad_fn=<MseLossBackward0>)
train loss: tensor(10286.2178, grad_fn=<MseLossBackward0>)
train loss: tensor(6386.3218, grad_fn=<MseLossBackward0>)
train loss: tensor(3320.7488, grad_fn=<MseLossBackward0>)
train loss: tensor(2323.4187, grad_fn=<MseLossBackward0>)
train loss: tensor(2248.8333, grad_fn=<MseLossBackward0>)
train loss: tensor(7150.4072, grad_fn=<MseLossBackward0>)
train loss: tensor(4934.7988, grad_fn=<MseLossBackward0>)
train loss: tensor(7926.2437, grad_fn=<MseLossBackward0>)
train loss: tensor(9106.0234, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(8340.8428, grad_fn=<MseLossBackward0>)
train loss: tensor(32726.8965, grad_fn=<MseLossBackward0>)
train loss: tensor(11152.2686, grad_fn=<MseLossBackward0>)
train loss: tensor(10850.8262, grad_fn=<MseLossBackward0>)
train loss: tensor(8662.0771, grad_fn=<MseLossBackward0>)
train loss: tensor(8647.2891, grad_fn=<MseLossBackward0>)
train loss: tensor(8559.5752, grad_fn=<MseLossBackward0>)
train loss: tensor(5985.5742, grad_fn=<MseLossBackward0>)
train loss: tensor(13433.5381, grad_fn=<MseLossBackward0>)
train loss: tensor(3027.5774, grad_fn=<MseLossBackward0>)
train loss: tensor(16317.1963, grad_fn=<MseLossBackward0>)
train loss: tensor(6621.5679, grad_fn=<MseLossBackward0>)
train loss: tensor(5471.1768, grad_fn=<MseLossBackward0>)
train loss: tensor(12681.8037, grad_fn=<MseLossBackward0>)
train loss: tensor(4660.0845, grad_fn=<MseLossBackward0>)
train loss: tensor(7113.7158, grad_fn=<MseLossBackward0>)
train loss: tensor(6316.6787, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(9756.5195, grad_fn=<MseLossBackward0>)
train loss: tensor(6425.6997, grad_fn=<MseLossBackward0>)
train loss: tensor(6386.8159, grad_fn=<MseLossBackward0>)
train loss: tensor(9354.9072, grad_fn=<MseLossBackward0>)
train loss: tensor(9239.2451, grad_fn=<MseLossBackward0>)
train loss: tensor(3514.7134, grad_fn=<MseLossBackward0>)
train loss: tensor(6548.0820, grad_fn=<MseLossBackward0>)
train loss: tensor(3500.9702, grad_fn=<MseLossBackward0>)
train loss: tensor(80673.1016, grad_fn=<MseLossBackward0>)
train loss: tensor(4925.7651, grad_fn=<MseLossBackward0>)
train loss: tensor(1576.2698, grad_fn=<MseLossBackward0>)
train loss: tensor(15513.6729, grad_fn=<MseLossBackward0>)
train loss: tensor(10878.1299, grad_fn=<MseLossBackward0>)
train loss: tensor(5235.8643, grad_fn=<MseLossBackward0>)
train loss: tensor(45064.1836, grad_fn=<MseLossBackward0>)
train loss: tensor(49560.7383, grad_fn=<MseLossBackward0>)
train loss: tensor(3620.3655, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(3561.8657, grad_fn=<MseLossBackward0>)
train loss: tensor(15715.7637, grad_fn=<MseLossBackward0>)
train loss: tensor(5473.0269, grad_fn=<MseLossBackward0>)
train loss: tensor(7476.9077, grad_fn=<MseLossBackward0>)
train loss: tensor(4114.4302, grad_fn=<MseLossBackward0>)
train loss: tensor(16863.8574, grad_fn=<MseLossBackward0>)
train loss: tensor(8630.1807, grad_fn=<MseLossBackward0>)
train loss: tensor(6420.9395, grad_fn=<MseLossBackward0>)
train loss: tensor(7025.1411, grad_fn=<MseLossBackward0>)
train loss: tensor(5943.8872, grad_fn=<MseLossBackward0>)
train loss: tensor(4061.8677, grad_fn=<MseLossBackward0>)
train loss: tensor(5518.6978, grad_fn=<MseLossBackward0>)
train loss: tensor(3835.3994, grad_fn=<MseLossBackward0>)
train loss: tensor(8446.2266, grad_fn=<MseLossBackward0>)
train loss: tensor(6075.1470, grad_fn=<MseLossBackward0>)
train loss: tensor(14465.9189, grad_fn=<MseLossBackward0>)
train loss: tensor(4434.4922, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(3638.5713, grad_fn=<MseLossBackward0>)
train loss: tensor(4587.5527, grad_fn=<MseLossBackward0>)
train loss: tensor(3217.0188, grad_fn=<MseLossBackward0>)
train loss: tensor(3216.6494, grad_fn=<MseLossBackward0>)
train loss: tensor(5776.4165, grad_fn=<MseLossBackward0>)
train loss: tensor(5263.6694, grad_fn=<MseLossBackward0>)
train loss: tensor(6351.4756, grad_fn=<MseLossBackward0>)
train loss: tensor(9391.3535, grad_fn=<MseLossBackward0>)
train loss: tensor(10998.8389, grad_fn=<MseLossBackward0>)
train loss: tensor(11717.3027, grad_fn=<MseLossBackward0>)
train loss: tensor(8376.5957, grad_fn=<MseLossBackward0>)
train loss: tensor(4805.9395, grad_fn=<MseLossBackward0>)
train loss: tensor(1573.0879, grad_fn=<MseLossBackward0>)
train loss: tensor(11644.2764, grad_fn=<MseLossBackward0>)
train loss: tensor(3357.9341, grad_fn=<MseLossBackward0>)
train loss: tensor(8890.5088, grad_fn=<MseLossBackward0>)
train loss: tensor(4424.4829, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(11409.4492, grad_fn=<MseLossBackward0>)
train loss: tensor(2570.9209, grad_fn=<MseLossBackward0>)
train loss: tensor(9767.8389, grad_fn=<MseLossBackward0>)
train loss: tensor(4816.8022, grad_fn=<MseLossBackward0>)
train loss: tensor(3112.5847, grad_fn=<MseLossBackward0>)
train loss: tensor(6119.0952, grad_fn=<MseLossBackward0>)
train loss: tensor(2645.4351, grad_fn=<MseLossBackward0>)
train loss: tensor(10609.9502, grad_fn=<MseLossBackward0>)
train loss: tensor(10686.3613, grad_fn=<MseLossBackward0>)
train loss: tensor(12282.0498, grad_fn=<MseLossBackward0>)
train loss: tensor(9294.5723, grad_fn=<MseLossBackward0>)
train loss: tensor(6592.5898, grad_fn=<MseLossBackward0>)
train loss: tensor(6218.1934, grad_fn=<MseLossBackward0>)
train loss: tensor(2644.9460, grad_fn=<MseLossBackward0>)
train loss: tensor(3013.6233, grad_fn=<MseLossBackward0>)
train loss: tensor(8648.5303, grad_fn=<MseLossBackward0>)
train loss: tensor(10819.2568, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(6093.3589, grad_fn=<MseLossBackward0>)
train loss: tensor(8152.6436, grad_fn=<MseLossBackward0>)
train loss: tensor(3476.8506, grad_fn=<MseLossBackward0>)
train loss: tensor(5795.3398, grad_fn=<MseLossBackward0>)
train loss: tensor(12012.4082, grad_fn=<MseLossBackward0>)
train loss: tensor(10435.0088, grad_fn=<MseLossBackward0>)
train loss: tensor(5021.5308, grad_fn=<MseLossBackward0>)
train loss: tensor(12722.3877, grad_fn=<MseLossBackward0>)
train loss: tensor(4558.3818, grad_fn=<MseLossBackward0>)
train loss: tensor(5280.5972, grad_fn=<MseLossBackward0>)
train loss: tensor(39328.3750, grad_fn=<MseLossBackward0>)
train loss: tensor(10526.9062, grad_fn=<MseLossBackward0>)
train loss: tensor(3500.6367, grad_fn=<MseLossBackward0>)
train loss: tensor(10482.1953, grad_fn=<MseLossBackward0>)
train loss: tensor(6290.1616, grad_fn=<MseLossBackward0>)
train loss: tensor(3158.7029, grad_fn=<MseLossBackward0>)
train loss: tensor(4187.3579, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(5921.0425, grad_fn=<MseLossBackward0>)
train loss: tensor(5933.9551, grad_fn=<MseLossBackward0>)
train loss: tensor(9552.9990, grad_fn=<MseLossBackward0>)
train loss: tensor(6404.2715, grad_fn=<MseLossBackward0>)
train loss: tensor(5498.8872, grad_fn=<MseLossBackward0>)
train loss: tensor(17199.5488, grad_fn=<MseLossBackward0>)
train loss: tensor(3545.6663, grad_fn=<MseLossBackward0>)
train loss: tensor(22118.5391, grad_fn=<MseLossBackward0>)
train loss: tensor(3248.5808, grad_fn=<MseLossBackward0>)
train loss: tensor(11165.8730, grad_fn=<MseLossBackward0>)
train loss: tensor(10298.7402, grad_fn=<MseLossBackward0>)
train loss: tensor(5264.7949, grad_fn=<MseLossBackward0>)
train loss: tensor(6662.8115, grad_fn=<MseLossBackward0>)
train loss: tensor(11890.4102, grad_fn=<MseLossBackward0>)
train loss: tensor(7439.8213, grad_fn=<MseLossBackward0>)
train loss: tensor(11227.4990, grad_fn=<MseLossBackward0>)
train loss: tensor(4395.3169, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(8910.5791, grad_fn=<MseLossBackward0>)
train loss: tensor(12556.1162, grad_fn=<MseLossBackward0>)
train loss: tensor(7351.5234, grad_fn=<MseLossBackward0>)
train loss: tensor(6054.2671, grad_fn=<MseLossBackward0>)
train loss: tensor(15099.4355, grad_fn=<MseLossBackward0>)
train loss: tensor(6638.6465, grad_fn=<MseLossBackward0>)
train loss: tensor(3637.6514, grad_fn=<MseLossBackward0>)
train loss: tensor(6493.5161, grad_fn=<MseLossBackward0>)
train loss: tensor(14597.6816, grad_fn=<MseLossBackward0>)
train loss: tensor(8384.5791, grad_fn=<MseLossBackward0>)
train loss: tensor(5055.9585, grad_fn=<MseLossBackward0>)
train loss: tensor(11129.6484, grad_fn=<MseLossBackward0>)
train loss: tensor(12122.1455, grad_fn=<MseLossBackward0>)
train loss: tensor(10595.5371, grad_fn=<MseLossBackward0>)
train loss: tensor(8190.5581, grad_fn=<MseLossBackward0>)
train loss: tensor(4952.4443, grad_fn=<MseLossBackward0>)
train loss: tensor(2081.6084, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(14865.2549, grad_fn=<MseLossBackward0>)
train loss: tensor(8066.6455, grad_fn=<MseLossBackward0>)
train loss: tensor(5598.7495, grad_fn=<MseLossBackward0>)
train loss: tensor(3800.4314, grad_fn=<MseLossBackward0>)
train loss: tensor(8995.5176, grad_fn=<MseLossBackward0>)
train loss: tensor(11639.7607, grad_fn=<MseLossBackward0>)
train loss: tensor(6250.5464, grad_fn=<MseLossBackward0>)
train loss: tensor(8695.1035, grad_fn=<MseLossBackward0>)
train loss: tensor(8929.5127, grad_fn=<MseLossBackward0>)
train loss: tensor(6882.5928, grad_fn=<MseLossBackward0>)
train loss: tensor(10177.7422, grad_fn=<MseLossBackward0>)
train loss: tensor(6778.3550, grad_fn=<MseLossBackward0>)
train loss: tensor(14435.6719, grad_fn=<MseLossBackward0>)
train loss: tensor(16508.8125, grad_fn=<MseLossBackward0>)
train loss: tensor(2547.2686, grad_fn=<MseLossBackward0>)
train loss: tensor(14604.3711, grad_fn=<MseLossBackward0>)
train loss: tensor(2729.3762, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(11808.6650, grad_fn=<MseLossBackward0>)
train loss: tensor(5000.0195, grad_fn=<MseLossBackward0>)
train loss: tensor(13064.5869, grad_fn=<MseLossBackward0>)
train loss: tensor(6808.4263, grad_fn=<MseLossBackward0>)
train loss: tensor(4096.9463, grad_fn=<MseLossBackward0>)
train loss: tensor(12749.5645, grad_fn=<MseLossBackward0>)
train loss: tensor(5246.5542, grad_fn=<MseLossBackward0>)
train loss: tensor(9125.5469, grad_fn=<MseLossBackward0>)
train loss: tensor(12496.8691, grad_fn=<MseLossBackward0>)
train loss: tensor(7363.8564, grad_fn=<MseLossBackward0>)
train loss: tensor(13004.1611, grad_fn=<MseLossBackward0>)
train loss: tensor(3369.8867, grad_fn=<MseLossBackward0>)
train loss: tensor(14312.8525, grad_fn=<MseLossBackward0>)
train loss: tensor(5674.1938, grad_fn=<MseLossBackward0>)
train loss: tensor(5896.8765, grad_fn=<MseLossBackward0>)
train loss: tensor(5999.6558, grad_fn=<MseLossBackward0>)
train loss: tensor(4784.3931, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(3988.0527, grad_fn=<MseLossBackward0>)
train loss: tensor(7405.0864, grad_fn=<MseLossBackward0>)
train loss: tensor(7209.5742, grad_fn=<MseLossBackward0>)
train loss: tensor(13332.7852, grad_fn=<MseLossBackward0>)
train loss: tensor(8459.8730, grad_fn=<MseLossBackward0>)
train loss: tensor(6226.8931, grad_fn=<MseLossBackward0>)
train loss: tensor(13871.4111, grad_fn=<MseLossBackward0>)
train loss: tensor(11291.5205, grad_fn=<MseLossBackward0>)
train loss: tensor(7204.5166, grad_fn=<MseLossBackward0>)
train loss: tensor(11416.8682, grad_fn=<MseLossBackward0>)
train loss: tensor(18508.2930, grad_fn=<MseLossBackward0>)
train loss: tensor(20676.4023, grad_fn=<MseLossBackward0>)
train loss: tensor(11034.5645, grad_fn=<MseLossBackward0>)
train loss: tensor(11903.5771, grad_fn=<MseLossBackward0>)
train loss: tensor(8909.8125, grad_fn=<MseLossBackward0>)
train loss: tensor(10414.7666, grad_fn=<MseLossBackward0>)
train loss: tensor(13457.1064, grad_fn=<MseLossBackward0>)
trai

train loss: tensor(6031.7612, grad_fn=<MseLossBackward0>)
train loss: tensor(3973.5452, grad_fn=<MseLossBackward0>)
train loss: tensor(4177.3345, grad_fn=<MseLossBackward0>)
train loss: tensor(4121.8091, grad_fn=<MseLossBackward0>)
train loss: tensor(5531.8804, grad_fn=<MseLossBackward0>)
train loss: tensor(4801.3262, grad_fn=<MseLossBackward0>)
train loss: tensor(6517.5439, grad_fn=<MseLossBackward0>)
train loss: tensor(5820.7798, grad_fn=<MseLossBackward0>)
train loss: tensor(4077.7834, grad_fn=<MseLossBackward0>)
train loss: tensor(8512.0791, grad_fn=<MseLossBackward0>)
train loss: tensor(5488.9019, grad_fn=<MseLossBackward0>)
train loss: tensor(12252.0049, grad_fn=<MseLossBackward0>)
train loss: tensor(15663.2900, grad_fn=<MseLossBackward0>)
train loss: tensor(12765.1934, grad_fn=<MseLossBackward0>)
train loss: tensor(6728.0420, grad_fn=<MseLossBackward0>)
train loss: tensor(6479.8359, grad_fn=<MseLossBackward0>)
train loss: tensor(4035.9226, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(5824.7388, grad_fn=<MseLossBackward0>)
train loss: tensor(3908.6545, grad_fn=<MseLossBackward0>)
train loss: tensor(7269.7759, grad_fn=<MseLossBackward0>)
train loss: tensor(4133.2021, grad_fn=<MseLossBackward0>)
train loss: tensor(3086.7437, grad_fn=<MseLossBackward0>)
train loss: tensor(8459.1582, grad_fn=<MseLossBackward0>)
train loss: tensor(15967.6982, grad_fn=<MseLossBackward0>)
train loss: tensor(4619.0303, grad_fn=<MseLossBackward0>)
train loss: tensor(2731.6384, grad_fn=<MseLossBackward0>)
train loss: tensor(8942.2607, grad_fn=<MseLossBackward0>)
train loss: tensor(6132.9263, grad_fn=<MseLossBackward0>)
train loss: tensor(10042.0137, grad_fn=<MseLossBackward0>)
train loss: tensor(2668.3137, grad_fn=<MseLossBackward0>)
train loss: tensor(5055.7578, grad_fn=<MseLossBackward0>)
train loss: tensor(5169.7261, grad_fn=<MseLossBackward0>)
train loss: tensor(8606.9531, grad_fn=<MseLossBackward0>)
train loss: tensor(6910.9375, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(7537.5972, grad_fn=<MseLossBackward0>)
train loss: tensor(6083.9097, grad_fn=<MseLossBackward0>)
train loss: tensor(4727.0771, grad_fn=<MseLossBackward0>)
train loss: tensor(7782.1929, grad_fn=<MseLossBackward0>)
train loss: tensor(35287.0938, grad_fn=<MseLossBackward0>)
train loss: tensor(11151.4609, grad_fn=<MseLossBackward0>)
train loss: tensor(6113.3535, grad_fn=<MseLossBackward0>)
train loss: tensor(4593.6436, grad_fn=<MseLossBackward0>)
train loss: tensor(8028.5176, grad_fn=<MseLossBackward0>)
train loss: tensor(17419.9004, grad_fn=<MseLossBackward0>)
train loss: tensor(5661.9824, grad_fn=<MseLossBackward0>)
train loss: tensor(6261.3145, grad_fn=<MseLossBackward0>)
train loss: tensor(2817.0613, grad_fn=<MseLossBackward0>)
train loss: tensor(8903.9404, grad_fn=<MseLossBackward0>)
train loss: tensor(6910.0234, grad_fn=<MseLossBackward0>)
train loss: tensor(6990.8174, grad_fn=<MseLossBackward0>)
train loss: tensor(15545.3945, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(10366.9463, grad_fn=<MseLossBackward0>)
train loss: tensor(6956.3989, grad_fn=<MseLossBackward0>)
train loss: tensor(6420.7876, grad_fn=<MseLossBackward0>)
train loss: tensor(4425.6289, grad_fn=<MseLossBackward0>)
train loss: tensor(12295.8857, grad_fn=<MseLossBackward0>)
train loss: tensor(12013.3535, grad_fn=<MseLossBackward0>)
train loss: tensor(8074.4824, grad_fn=<MseLossBackward0>)
train loss: tensor(16330.1123, grad_fn=<MseLossBackward0>)
train loss: tensor(9076.1797, grad_fn=<MseLossBackward0>)
train loss: tensor(5904.8638, grad_fn=<MseLossBackward0>)
train loss: tensor(8442.7441, grad_fn=<MseLossBackward0>)
train loss: tensor(12551.2646, grad_fn=<MseLossBackward0>)
train loss: tensor(5070.1895, grad_fn=<MseLossBackward0>)
train loss: tensor(15780.6348, grad_fn=<MseLossBackward0>)
train loss: tensor(5621.1602, grad_fn=<MseLossBackward0>)
train loss: tensor(5721.1211, grad_fn=<MseLossBackward0>)
train loss: tensor(2830.4080, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(3548.3005, grad_fn=<MseLossBackward0>)
train loss: tensor(6466.3491, grad_fn=<MseLossBackward0>)
train loss: tensor(5394.5903, grad_fn=<MseLossBackward0>)
train loss: tensor(3198.0459, grad_fn=<MseLossBackward0>)
train loss: tensor(16510.9316, grad_fn=<MseLossBackward0>)
train loss: tensor(6787.1128, grad_fn=<MseLossBackward0>)
train loss: tensor(13659.8809, grad_fn=<MseLossBackward0>)
train loss: tensor(2598.7051, grad_fn=<MseLossBackward0>)
train loss: tensor(1754.0496, grad_fn=<MseLossBackward0>)
train loss: tensor(14746.2383, grad_fn=<MseLossBackward0>)
train loss: tensor(6912.9468, grad_fn=<MseLossBackward0>)
train loss: tensor(11231.1719, grad_fn=<MseLossBackward0>)
train loss: tensor(6496.5171, grad_fn=<MseLossBackward0>)
train loss: tensor(12269.6680, grad_fn=<MseLossBackward0>)
train loss: tensor(4307.7432, grad_fn=<MseLossBackward0>)
train loss: tensor(11603.0039, grad_fn=<MseLossBackward0>)
train loss: tensor(10959.2822, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(9004.0996, grad_fn=<MseLossBackward0>)
train loss: tensor(9218.5205, grad_fn=<MseLossBackward0>)
train loss: tensor(4360.4951, grad_fn=<MseLossBackward0>)
train loss: tensor(15039.0918, grad_fn=<MseLossBackward0>)
train loss: tensor(9836.6299, grad_fn=<MseLossBackward0>)
train loss: tensor(5944.9771, grad_fn=<MseLossBackward0>)
train loss: tensor(5037.3389, grad_fn=<MseLossBackward0>)
train loss: tensor(3237.2678, grad_fn=<MseLossBackward0>)
train loss: tensor(10484.0215, grad_fn=<MseLossBackward0>)
train loss: tensor(17352.2324, grad_fn=<MseLossBackward0>)
train loss: tensor(5500.9731, grad_fn=<MseLossBackward0>)
train loss: tensor(8366.1934, grad_fn=<MseLossBackward0>)
train loss: tensor(5606.6719, grad_fn=<MseLossBackward0>)
train loss: tensor(9244.0205, grad_fn=<MseLossBackward0>)
train loss: tensor(6560.4961, grad_fn=<MseLossBackward0>)
train loss: tensor(4893.3643, grad_fn=<MseLossBackward0>)
train loss: tensor(6460.8701, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(7552.0127, grad_fn=<MseLossBackward0>)
train loss: tensor(1603.6564, grad_fn=<MseLossBackward0>)
train loss: tensor(9758.4990, grad_fn=<MseLossBackward0>)
train loss: tensor(10573.7207, grad_fn=<MseLossBackward0>)
train loss: tensor(12609.7598, grad_fn=<MseLossBackward0>)
train loss: tensor(3956.6536, grad_fn=<MseLossBackward0>)
train loss: tensor(5019.0366, grad_fn=<MseLossBackward0>)
train loss: tensor(3587.4226, grad_fn=<MseLossBackward0>)
train loss: tensor(6227.0630, grad_fn=<MseLossBackward0>)
train loss: tensor(6705.7021, grad_fn=<MseLossBackward0>)
train loss: tensor(8860.1973, grad_fn=<MseLossBackward0>)
train loss: tensor(9323.1289, grad_fn=<MseLossBackward0>)
train loss: tensor(5366.8965, grad_fn=<MseLossBackward0>)
train loss: tensor(7761.3232, grad_fn=<MseLossBackward0>)
train loss: tensor(6970.4146, grad_fn=<MseLossBackward0>)
train loss: tensor(4084.3210, grad_fn=<MseLossBackward0>)
train loss: tensor(8210.4453, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(5563.7305, grad_fn=<MseLossBackward0>)
train loss: tensor(10936.6055, grad_fn=<MseLossBackward0>)
train loss: tensor(11964.0410, grad_fn=<MseLossBackward0>)
train loss: tensor(19280.5977, grad_fn=<MseLossBackward0>)
train loss: tensor(3736.2900, grad_fn=<MseLossBackward0>)
train loss: tensor(9241.3223, grad_fn=<MseLossBackward0>)
train loss: tensor(8215.4727, grad_fn=<MseLossBackward0>)
train loss: tensor(8441.1670, grad_fn=<MseLossBackward0>)
train loss: tensor(16738.3203, grad_fn=<MseLossBackward0>)
train loss: tensor(8639.2305, grad_fn=<MseLossBackward0>)
train loss: tensor(4476.8301, grad_fn=<MseLossBackward0>)
train loss: tensor(7146.1411, grad_fn=<MseLossBackward0>)
train loss: tensor(12338.0635, grad_fn=<MseLossBackward0>)
train loss: tensor(12337.4736, grad_fn=<MseLossBackward0>)
train loss: tensor(6313.5122, grad_fn=<MseLossBackward0>)
train loss: tensor(7592.3022, grad_fn=<MseLossBackward0>)
train loss: tensor(1168.5610, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(12625.7910, grad_fn=<MseLossBackward0>)
train loss: tensor(1889.1727, grad_fn=<MseLossBackward0>)
train loss: tensor(15321.5625, grad_fn=<MseLossBackward0>)
train loss: tensor(6690.4502, grad_fn=<MseLossBackward0>)
train loss: tensor(8471.9404, grad_fn=<MseLossBackward0>)
train loss: tensor(6403.7837, grad_fn=<MseLossBackward0>)
train loss: tensor(3898.1147, grad_fn=<MseLossBackward0>)
train loss: tensor(4846.1128, grad_fn=<MseLossBackward0>)
train loss: tensor(4218.0112, grad_fn=<MseLossBackward0>)
train loss: tensor(4104.4849, grad_fn=<MseLossBackward0>)
train loss: tensor(3469.9170, grad_fn=<MseLossBackward0>)
train loss: tensor(5695.8257, grad_fn=<MseLossBackward0>)
train loss: tensor(4590.3784, grad_fn=<MseLossBackward0>)
train loss: tensor(6254.6304, grad_fn=<MseLossBackward0>)
train loss: tensor(8242.8730, grad_fn=<MseLossBackward0>)
train loss: tensor(2205.4116, grad_fn=<MseLossBackward0>)
train loss: tensor(9000.0996, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(67963.1953, grad_fn=<MseLossBackward0>)
train loss: tensor(15271.3662, grad_fn=<MseLossBackward0>)
train loss: tensor(7606.9048, grad_fn=<MseLossBackward0>)
train loss: tensor(3436.6064, grad_fn=<MseLossBackward0>)
train loss: tensor(4250.4019, grad_fn=<MseLossBackward0>)
train loss: tensor(2198.7864, grad_fn=<MseLossBackward0>)
train loss: tensor(15501.3691, grad_fn=<MseLossBackward0>)
train loss: tensor(7674.1680, grad_fn=<MseLossBackward0>)
train loss: tensor(5171.8633, grad_fn=<MseLossBackward0>)
train loss: tensor(17822.1738, grad_fn=<MseLossBackward0>)
train loss: tensor(3982.2297, grad_fn=<MseLossBackward0>)
train loss: tensor(9169.2803, grad_fn=<MseLossBackward0>)
train loss: tensor(10096.3242, grad_fn=<MseLossBackward0>)
train loss: tensor(12486.6416, grad_fn=<MseLossBackward0>)
train loss: tensor(6337.0435, grad_fn=<MseLossBackward0>)
train loss: tensor(4636.3828, grad_fn=<MseLossBackward0>)
train loss: tensor(2940.9031, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(6222.1919, grad_fn=<MseLossBackward0>)
train loss: tensor(16688.1113, grad_fn=<MseLossBackward0>)
train loss: tensor(3330.3267, grad_fn=<MseLossBackward0>)
train loss: tensor(4260.9756, grad_fn=<MseLossBackward0>)
train loss: tensor(4750.3628, grad_fn=<MseLossBackward0>)
train loss: tensor(6869.5791, grad_fn=<MseLossBackward0>)
train loss: tensor(14432.0703, grad_fn=<MseLossBackward0>)
train loss: tensor(10992.5137, grad_fn=<MseLossBackward0>)
train loss: tensor(6112.5024, grad_fn=<MseLossBackward0>)
train loss: tensor(7160.9312, grad_fn=<MseLossBackward0>)
train loss: tensor(6162.2622, grad_fn=<MseLossBackward0>)
train loss: tensor(10573.9697, grad_fn=<MseLossBackward0>)
train loss: tensor(4771.9585, grad_fn=<MseLossBackward0>)
train loss: tensor(6572.4263, grad_fn=<MseLossBackward0>)
train loss: tensor(15642.1943, grad_fn=<MseLossBackward0>)
train loss: tensor(4316.0801, grad_fn=<MseLossBackward0>)
train loss: tensor(13021.1963, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(26931.5879, grad_fn=<MseLossBackward0>)
train loss: tensor(4048.4495, grad_fn=<MseLossBackward0>)
train loss: tensor(12270.5127, grad_fn=<MseLossBackward0>)
train loss: tensor(15261.0801, grad_fn=<MseLossBackward0>)
train loss: tensor(9298.5039, grad_fn=<MseLossBackward0>)
train loss: tensor(18976.7461, grad_fn=<MseLossBackward0>)
train loss: tensor(5814.4292, grad_fn=<MseLossBackward0>)
train loss: tensor(8700.1729, grad_fn=<MseLossBackward0>)
train loss: tensor(4975.4819, grad_fn=<MseLossBackward0>)
train loss: tensor(3126.4780, grad_fn=<MseLossBackward0>)
train loss: tensor(14683.4287, grad_fn=<MseLossBackward0>)
train loss: tensor(13471.3184, grad_fn=<MseLossBackward0>)
train loss: tensor(5808.1704, grad_fn=<MseLossBackward0>)
train loss: tensor(12935.6797, grad_fn=<MseLossBackward0>)
train loss: tensor(4502.6499, grad_fn=<MseLossBackward0>)
train loss: tensor(5560.5879, grad_fn=<MseLossBackward0>)
train loss: tensor(8470.0352, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(9481.4297, grad_fn=<MseLossBackward0>)
train loss: tensor(4923.5039, grad_fn=<MseLossBackward0>)
train loss: tensor(6727.7979, grad_fn=<MseLossBackward0>)
train loss: tensor(2367.5247, grad_fn=<MseLossBackward0>)
train loss: tensor(6151.9248, grad_fn=<MseLossBackward0>)
train loss: tensor(15907.5488, grad_fn=<MseLossBackward0>)
train loss: tensor(2751.2964, grad_fn=<MseLossBackward0>)
train loss: tensor(9806.6250, grad_fn=<MseLossBackward0>)
train loss: tensor(2210.3374, grad_fn=<MseLossBackward0>)
train loss: tensor(2346.1453, grad_fn=<MseLossBackward0>)
train loss: tensor(9280.0098, grad_fn=<MseLossBackward0>)
train loss: tensor(13336.3691, grad_fn=<MseLossBackward0>)
train loss: tensor(62508.6055, grad_fn=<MseLossBackward0>)
train loss: tensor(18173.1543, grad_fn=<MseLossBackward0>)
train loss: tensor(9179.2012, grad_fn=<MseLossBackward0>)
train loss: tensor(6466.3486, grad_fn=<MseLossBackward0>)
train loss: tensor(17984.0996, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(15837.4678, grad_fn=<MseLossBackward0>)
train loss: tensor(15705.4102, grad_fn=<MseLossBackward0>)
train loss: tensor(13955.3311, grad_fn=<MseLossBackward0>)
train loss: tensor(38948.3906, grad_fn=<MseLossBackward0>)
train loss: tensor(10661.3408, grad_fn=<MseLossBackward0>)
train loss: tensor(11843.9248, grad_fn=<MseLossBackward0>)
train loss: tensor(12063.5996, grad_fn=<MseLossBackward0>)
train loss: tensor(16383.8369, grad_fn=<MseLossBackward0>)
train loss: tensor(2079.3594, grad_fn=<MseLossBackward0>)
train loss: tensor(3632.5703, grad_fn=<MseLossBackward0>)
train loss: tensor(7015.2192, grad_fn=<MseLossBackward0>)
train loss: tensor(6465.1934, grad_fn=<MseLossBackward0>)
train loss: tensor(5733.2461, grad_fn=<MseLossBackward0>)
train loss: tensor(5127.0391, grad_fn=<MseLossBackward0>)
train loss: tensor(10411.5098, grad_fn=<MseLossBackward0>)
train loss: tensor(6338.4302, grad_fn=<MseLossBackward0>)
train loss: tensor(14856.1230, grad_fn=<MseLossBackward0>)
trai

train loss: tensor(7621.4766, grad_fn=<MseLossBackward0>)
train loss: tensor(7703.6064, grad_fn=<MseLossBackward0>)
train loss: tensor(9558.9863, grad_fn=<MseLossBackward0>)
train loss: tensor(11639.0488, grad_fn=<MseLossBackward0>)
train loss: tensor(46493.1875, grad_fn=<MseLossBackward0>)
train loss: tensor(2536.2197, grad_fn=<MseLossBackward0>)
train loss: tensor(2541.5435, grad_fn=<MseLossBackward0>)
train loss: tensor(4848.6567, grad_fn=<MseLossBackward0>)
train loss: tensor(1459.2360, grad_fn=<MseLossBackward0>)
train loss: tensor(4846.4429, grad_fn=<MseLossBackward0>)
train loss: tensor(7310.8628, grad_fn=<MseLossBackward0>)
train loss: tensor(9415.2988, grad_fn=<MseLossBackward0>)
train loss: tensor(6585.2075, grad_fn=<MseLossBackward0>)
train loss: tensor(5622.6445, grad_fn=<MseLossBackward0>)
train loss: tensor(3911.2983, grad_fn=<MseLossBackward0>)
train loss: tensor(9452.4102, grad_fn=<MseLossBackward0>)
train loss: tensor(13606.4512, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(5202.8999, grad_fn=<MseLossBackward0>)
train loss: tensor(5213.3843, grad_fn=<MseLossBackward0>)
train loss: tensor(73926.1953, grad_fn=<MseLossBackward0>)
train loss: tensor(19470.3926, grad_fn=<MseLossBackward0>)
train loss: tensor(3274.8447, grad_fn=<MseLossBackward0>)
train loss: tensor(5837.3213, grad_fn=<MseLossBackward0>)
train loss: tensor(12099.8623, grad_fn=<MseLossBackward0>)
train loss: tensor(7138.4409, grad_fn=<MseLossBackward0>)
train loss: tensor(7961.5225, grad_fn=<MseLossBackward0>)
train loss: tensor(13890.5449, grad_fn=<MseLossBackward0>)
train loss: tensor(6867.4502, grad_fn=<MseLossBackward0>)
train loss: tensor(9766.9883, grad_fn=<MseLossBackward0>)
train loss: tensor(1596.9294, grad_fn=<MseLossBackward0>)
train loss: tensor(14306.7314, grad_fn=<MseLossBackward0>)
train loss: tensor(4137.8330, grad_fn=<MseLossBackward0>)
train loss: tensor(6422.1802, grad_fn=<MseLossBackward0>)
train loss: tensor(6272.7876, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(6671.9058, grad_fn=<MseLossBackward0>)
train loss: tensor(5977.7129, grad_fn=<MseLossBackward0>)
train loss: tensor(9410.5088, grad_fn=<MseLossBackward0>)
train loss: tensor(7249.2373, grad_fn=<MseLossBackward0>)
train loss: tensor(11402.3359, grad_fn=<MseLossBackward0>)
train loss: tensor(9020.1982, grad_fn=<MseLossBackward0>)
train loss: tensor(3778.9304, grad_fn=<MseLossBackward0>)
train loss: tensor(7430.2368, grad_fn=<MseLossBackward0>)
train loss: tensor(2957.0208, grad_fn=<MseLossBackward0>)
train loss: tensor(7457.1260, grad_fn=<MseLossBackward0>)
train loss: tensor(3448.7019, grad_fn=<MseLossBackward0>)
train loss: tensor(7141.3276, grad_fn=<MseLossBackward0>)
train loss: tensor(17310.4688, grad_fn=<MseLossBackward0>)
train loss: tensor(5582.7964, grad_fn=<MseLossBackward0>)
train loss: tensor(13456.8809, grad_fn=<MseLossBackward0>)
train loss: tensor(11270.7979, grad_fn=<MseLossBackward0>)
train loss: tensor(6863.5044, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(6681.0332, grad_fn=<MseLossBackward0>)
train loss: tensor(3970.9792, grad_fn=<MseLossBackward0>)
train loss: tensor(8138.4146, grad_fn=<MseLossBackward0>)
train loss: tensor(2517.3464, grad_fn=<MseLossBackward0>)
train loss: tensor(2734.8909, grad_fn=<MseLossBackward0>)
train loss: tensor(15371.1855, grad_fn=<MseLossBackward0>)
train loss: tensor(3668.0496, grad_fn=<MseLossBackward0>)
train loss: tensor(9946.0518, grad_fn=<MseLossBackward0>)
train loss: tensor(6277.8257, grad_fn=<MseLossBackward0>)
train loss: tensor(4839.4141, grad_fn=<MseLossBackward0>)
train loss: tensor(11081.3809, grad_fn=<MseLossBackward0>)
train loss: tensor(7592.5371, grad_fn=<MseLossBackward0>)
train loss: tensor(6481.3145, grad_fn=<MseLossBackward0>)
train loss: tensor(8555.7314, grad_fn=<MseLossBackward0>)
train loss: tensor(10182.0771, grad_fn=<MseLossBackward0>)
train loss: tensor(7631.8105, grad_fn=<MseLossBackward0>)
train loss: tensor(6695.4136, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(3358.3254, grad_fn=<MseLossBackward0>)
train loss: tensor(6273.8892, grad_fn=<MseLossBackward0>)
train loss: tensor(4384.9058, grad_fn=<MseLossBackward0>)
train loss: tensor(4350.5562, grad_fn=<MseLossBackward0>)
train loss: tensor(5349.3276, grad_fn=<MseLossBackward0>)
train loss: tensor(16508.4121, grad_fn=<MseLossBackward0>)
train loss: tensor(12284.3330, grad_fn=<MseLossBackward0>)
train loss: tensor(28644.2188, grad_fn=<MseLossBackward0>)
train loss: tensor(6404.8657, grad_fn=<MseLossBackward0>)
train loss: tensor(2794.9412, grad_fn=<MseLossBackward0>)
train loss: tensor(6593.1211, grad_fn=<MseLossBackward0>)
train loss: tensor(2150.8210, grad_fn=<MseLossBackward0>)
train loss: tensor(15548.9707, grad_fn=<MseLossBackward0>)
train loss: tensor(2472.2012, grad_fn=<MseLossBackward0>)
train loss: tensor(5162.7808, grad_fn=<MseLossBackward0>)
train loss: tensor(6022.1973, grad_fn=<MseLossBackward0>)
train loss: tensor(10270.9580, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(6312.8853, grad_fn=<MseLossBackward0>)
train loss: tensor(3937.7542, grad_fn=<MseLossBackward0>)
train loss: tensor(9965.3887, grad_fn=<MseLossBackward0>)
train loss: tensor(2582.1072, grad_fn=<MseLossBackward0>)
train loss: tensor(34544.0391, grad_fn=<MseLossBackward0>)
train loss: tensor(75735.4688, grad_fn=<MseLossBackward0>)
train loss: tensor(7944.7666, grad_fn=<MseLossBackward0>)
train loss: tensor(10917.5039, grad_fn=<MseLossBackward0>)
train loss: tensor(14763.6230, grad_fn=<MseLossBackward0>)
train loss: tensor(10126.5430, grad_fn=<MseLossBackward0>)
train loss: tensor(8892.5244, grad_fn=<MseLossBackward0>)
train loss: tensor(4122.9297, grad_fn=<MseLossBackward0>)
train loss: tensor(15354.2783, grad_fn=<MseLossBackward0>)
train loss: tensor(13749.2393, grad_fn=<MseLossBackward0>)
train loss: tensor(7863.9858, grad_fn=<MseLossBackward0>)
train loss: tensor(7899.2212, grad_fn=<MseLossBackward0>)
train loss: tensor(16076.1904, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(2070.5908, grad_fn=<MseLossBackward0>)
train loss: tensor(78588.8516, grad_fn=<MseLossBackward0>)
train loss: tensor(2840.2251, grad_fn=<MseLossBackward0>)
train loss: tensor(4968.2368, grad_fn=<MseLossBackward0>)
train loss: tensor(5319.8418, grad_fn=<MseLossBackward0>)
train loss: tensor(12000.0469, grad_fn=<MseLossBackward0>)
train loss: tensor(11729.6582, grad_fn=<MseLossBackward0>)
train loss: tensor(10573.2803, grad_fn=<MseLossBackward0>)
train loss: tensor(14037.0791, grad_fn=<MseLossBackward0>)
train loss: tensor(6502.0005, grad_fn=<MseLossBackward0>)
train loss: tensor(10678.2305, grad_fn=<MseLossBackward0>)
train loss: tensor(2908.0732, grad_fn=<MseLossBackward0>)
train loss: tensor(12714.3809, grad_fn=<MseLossBackward0>)
train loss: tensor(4638.2422, grad_fn=<MseLossBackward0>)
train loss: tensor(3707.0178, grad_fn=<MseLossBackward0>)
train loss: tensor(6022.7163, grad_fn=<MseLossBackward0>)
train loss: tensor(9719.3223, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(5580.0488, grad_fn=<MseLossBackward0>)
train loss: tensor(5976.3491, grad_fn=<MseLossBackward0>)
train loss: tensor(65673.0156, grad_fn=<MseLossBackward0>)
train loss: tensor(8640.0469, grad_fn=<MseLossBackward0>)
train loss: tensor(11121.9580, grad_fn=<MseLossBackward0>)
train loss: tensor(15434.9785, grad_fn=<MseLossBackward0>)
train loss: tensor(14206.0459, grad_fn=<MseLossBackward0>)
train loss: tensor(13495.4082, grad_fn=<MseLossBackward0>)
train loss: tensor(39516.8398, grad_fn=<MseLossBackward0>)
train loss: tensor(6227.3281, grad_fn=<MseLossBackward0>)
train loss: tensor(13665.4883, grad_fn=<MseLossBackward0>)
train loss: tensor(8072.0459, grad_fn=<MseLossBackward0>)
train loss: tensor(9020.1592, grad_fn=<MseLossBackward0>)
train loss: tensor(7725.0317, grad_fn=<MseLossBackward0>)
train loss: tensor(14202.4404, grad_fn=<MseLossBackward0>)
train loss: tensor(8434.1807, grad_fn=<MseLossBackward0>)
train loss: tensor(16056.8223, grad_fn=<MseLossBackward0>)
train

train loss: tensor(4196.6826, grad_fn=<MseLossBackward0>)
train loss: tensor(6855.7012, grad_fn=<MseLossBackward0>)
train loss: tensor(2574.8064, grad_fn=<MseLossBackward0>)
train loss: tensor(6719.0981, grad_fn=<MseLossBackward0>)
train loss: tensor(6822.0059, grad_fn=<MseLossBackward0>)
train loss: tensor(10330.6943, grad_fn=<MseLossBackward0>)
train loss: tensor(6616.1934, grad_fn=<MseLossBackward0>)
train loss: tensor(3531.6882, grad_fn=<MseLossBackward0>)
train loss: tensor(12698.2051, grad_fn=<MseLossBackward0>)
train loss: tensor(10282.1680, grad_fn=<MseLossBackward0>)
train loss: tensor(5306.3228, grad_fn=<MseLossBackward0>)
train loss: tensor(10692.3115, grad_fn=<MseLossBackward0>)
train loss: tensor(6878.2734, grad_fn=<MseLossBackward0>)
train loss: tensor(5344.0581, grad_fn=<MseLossBackward0>)
train loss: tensor(8430.9844, grad_fn=<MseLossBackward0>)
train loss: tensor(12673.2480, grad_fn=<MseLossBackward0>)
train loss: tensor(5249.4341, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(19127.5703, grad_fn=<MseLossBackward0>)
train loss: tensor(2283.1731, grad_fn=<MseLossBackward0>)
train loss: tensor(7968.4341, grad_fn=<MseLossBackward0>)
train loss: tensor(3482.3489, grad_fn=<MseLossBackward0>)
train loss: tensor(6770.4878, grad_fn=<MseLossBackward0>)
train loss: tensor(8323.8223, grad_fn=<MseLossBackward0>)
train loss: tensor(9653.4824, grad_fn=<MseLossBackward0>)
train loss: tensor(15739.6943, grad_fn=<MseLossBackward0>)
train loss: tensor(7963.6426, grad_fn=<MseLossBackward0>)
train loss: tensor(3083.5083, grad_fn=<MseLossBackward0>)
train loss: tensor(8361.2402, grad_fn=<MseLossBackward0>)
train loss: tensor(15685.4004, grad_fn=<MseLossBackward0>)
train loss: tensor(6475.6582, grad_fn=<MseLossBackward0>)
train loss: tensor(5083.0879, grad_fn=<MseLossBackward0>)
train loss: tensor(7155.4683, grad_fn=<MseLossBackward0>)
train loss: tensor(4646.7896, grad_fn=<MseLossBackward0>)
train loss: tensor(4534.7520, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(14970.6309, grad_fn=<MseLossBackward0>)
train loss: tensor(3268.1387, grad_fn=<MseLossBackward0>)
train loss: tensor(5892.9692, grad_fn=<MseLossBackward0>)
train loss: tensor(4506.1123, grad_fn=<MseLossBackward0>)
train loss: tensor(8040.3843, grad_fn=<MseLossBackward0>)
train loss: tensor(7706.1353, grad_fn=<MseLossBackward0>)
train loss: tensor(9529.3750, grad_fn=<MseLossBackward0>)
train loss: tensor(6918.9321, grad_fn=<MseLossBackward0>)
train loss: tensor(3750.2476, grad_fn=<MseLossBackward0>)
train loss: tensor(12501.9492, grad_fn=<MseLossBackward0>)
train loss: tensor(12228.8105, grad_fn=<MseLossBackward0>)
train loss: tensor(12534.3604, grad_fn=<MseLossBackward0>)
train loss: tensor(9810.2559, grad_fn=<MseLossBackward0>)
train loss: tensor(3359.8623, grad_fn=<MseLossBackward0>)
train loss: tensor(3551.9714, grad_fn=<MseLossBackward0>)
train loss: tensor(9801.4473, grad_fn=<MseLossBackward0>)
train loss: tensor(7238.4619, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(9534.4414, grad_fn=<MseLossBackward0>)
train loss: tensor(4291.2617, grad_fn=<MseLossBackward0>)
train loss: tensor(10037.1201, grad_fn=<MseLossBackward0>)
train loss: tensor(12426.4551, grad_fn=<MseLossBackward0>)
train loss: tensor(6671.5776, grad_fn=<MseLossBackward0>)
train loss: tensor(77978.3125, grad_fn=<MseLossBackward0>)
train loss: tensor(9335.3643, grad_fn=<MseLossBackward0>)
train loss: tensor(8529.1064, grad_fn=<MseLossBackward0>)
train loss: tensor(10446.9980, grad_fn=<MseLossBackward0>)
train loss: tensor(5541.1870, grad_fn=<MseLossBackward0>)
train loss: tensor(2352.1685, grad_fn=<MseLossBackward0>)
train loss: tensor(3774.7090, grad_fn=<MseLossBackward0>)
train loss: tensor(1061.0463, grad_fn=<MseLossBackward0>)
train loss: tensor(14617.4365, grad_fn=<MseLossBackward0>)
train loss: tensor(2551.1438, grad_fn=<MseLossBackward0>)
train loss: tensor(5060.2485, grad_fn=<MseLossBackward0>)
train loss: tensor(15220.5088, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(5208.9438, grad_fn=<MseLossBackward0>)
train loss: tensor(10415.5127, grad_fn=<MseLossBackward0>)
train loss: tensor(6204.7080, grad_fn=<MseLossBackward0>)
train loss: tensor(6372.5132, grad_fn=<MseLossBackward0>)
train loss: tensor(10952.2393, grad_fn=<MseLossBackward0>)
train loss: tensor(10154.4170, grad_fn=<MseLossBackward0>)
train loss: tensor(5777.4219, grad_fn=<MseLossBackward0>)
train loss: tensor(6341.5186, grad_fn=<MseLossBackward0>)
train loss: tensor(5188.3350, grad_fn=<MseLossBackward0>)
train loss: tensor(9237.2695, grad_fn=<MseLossBackward0>)
train loss: tensor(2304.3557, grad_fn=<MseLossBackward0>)
train loss: tensor(3705.4939, grad_fn=<MseLossBackward0>)
train loss: tensor(15582.5293, grad_fn=<MseLossBackward0>)
train loss: tensor(4198.4229, grad_fn=<MseLossBackward0>)
train loss: tensor(4109.0127, grad_fn=<MseLossBackward0>)
train loss: tensor(4650.9346, grad_fn=<MseLossBackward0>)
train loss: tensor(6044.5591, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(8211.8350, grad_fn=<MseLossBackward0>)
train loss: tensor(13165.6689, grad_fn=<MseLossBackward0>)
train loss: tensor(8216.7129, grad_fn=<MseLossBackward0>)
train loss: tensor(21299.6738, grad_fn=<MseLossBackward0>)
train loss: tensor(8855.5068, grad_fn=<MseLossBackward0>)
train loss: tensor(6919.1387, grad_fn=<MseLossBackward0>)
train loss: tensor(11184.4785, grad_fn=<MseLossBackward0>)
train loss: tensor(6202.6992, grad_fn=<MseLossBackward0>)
train loss: tensor(4514.6699, grad_fn=<MseLossBackward0>)
train loss: tensor(7540.2158, grad_fn=<MseLossBackward0>)
train loss: tensor(6147.4604, grad_fn=<MseLossBackward0>)
train loss: tensor(12533.9795, grad_fn=<MseLossBackward0>)
train loss: tensor(9144.9766, grad_fn=<MseLossBackward0>)
train loss: tensor(15333.8516, grad_fn=<MseLossBackward0>)
train loss: tensor(3655.1125, grad_fn=<MseLossBackward0>)
train loss: tensor(89720.9531, grad_fn=<MseLossBackward0>)
train loss: tensor(1647.5509, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(5068.4980, grad_fn=<MseLossBackward0>)
train loss: tensor(3839.3110, grad_fn=<MseLossBackward0>)
train loss: tensor(4575.8545, grad_fn=<MseLossBackward0>)
train loss: tensor(8189.6772, grad_fn=<MseLossBackward0>)
train loss: tensor(6837.7095, grad_fn=<MseLossBackward0>)
train loss: tensor(29062.0820, grad_fn=<MseLossBackward0>)
train loss: tensor(12428.9531, grad_fn=<MseLossBackward0>)
train loss: tensor(22293.7402, grad_fn=<MseLossBackward0>)
train loss: tensor(3537.1074, grad_fn=<MseLossBackward0>)
train loss: tensor(12473.8486, grad_fn=<MseLossBackward0>)
train loss: tensor(43368.7773, grad_fn=<MseLossBackward0>)
train loss: tensor(4462.1562, grad_fn=<MseLossBackward0>)
train loss: tensor(3997.5559, grad_fn=<MseLossBackward0>)
train loss: tensor(3195.8069, grad_fn=<MseLossBackward0>)
train loss: tensor(4136.7114, grad_fn=<MseLossBackward0>)
train loss: tensor(67614.7812, grad_fn=<MseLossBackward0>)
train loss: tensor(5084.0864, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(6109.7437, grad_fn=<MseLossBackward0>)
train loss: tensor(6894.8408, grad_fn=<MseLossBackward0>)
train loss: tensor(8086.3052, grad_fn=<MseLossBackward0>)
train loss: tensor(9227.1055, grad_fn=<MseLossBackward0>)
train loss: tensor(4162.1987, grad_fn=<MseLossBackward0>)
train loss: tensor(3436.2114, grad_fn=<MseLossBackward0>)
train loss: tensor(5701.9810, grad_fn=<MseLossBackward0>)
train loss: tensor(5089.6411, grad_fn=<MseLossBackward0>)
train loss: tensor(5577.5967, grad_fn=<MseLossBackward0>)
train loss: tensor(6275.4229, grad_fn=<MseLossBackward0>)
train loss: tensor(2767.5085, grad_fn=<MseLossBackward0>)
train loss: tensor(4794.1465, grad_fn=<MseLossBackward0>)
train loss: tensor(9332.8291, grad_fn=<MseLossBackward0>)
train loss: tensor(10693.7070, grad_fn=<MseLossBackward0>)
train loss: tensor(31273.3262, grad_fn=<MseLossBackward0>)
train loss: tensor(8020.5615, grad_fn=<MseLossBackward0>)
train loss: tensor(6481.2832, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(4651.8809, grad_fn=<MseLossBackward0>)
train loss: tensor(8857.2852, grad_fn=<MseLossBackward0>)
train loss: tensor(4555.9717, grad_fn=<MseLossBackward0>)
train loss: tensor(6292.8862, grad_fn=<MseLossBackward0>)
train loss: tensor(11734.6309, grad_fn=<MseLossBackward0>)
train loss: tensor(4907.4268, grad_fn=<MseLossBackward0>)
train loss: tensor(11912.5635, grad_fn=<MseLossBackward0>)
train loss: tensor(10089.2559, grad_fn=<MseLossBackward0>)
train loss: tensor(4171.6411, grad_fn=<MseLossBackward0>)
train loss: tensor(3955.5710, grad_fn=<MseLossBackward0>)
train loss: tensor(5525.3169, grad_fn=<MseLossBackward0>)
train loss: tensor(5514.1694, grad_fn=<MseLossBackward0>)
train loss: tensor(4157.8711, grad_fn=<MseLossBackward0>)
train loss: tensor(3493.6880, grad_fn=<MseLossBackward0>)
train loss: tensor(5021.5459, grad_fn=<MseLossBackward0>)
train loss: tensor(2541.0073, grad_fn=<MseLossBackward0>)
train loss: tensor(7390.9224, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(5870.0552, grad_fn=<MseLossBackward0>)
train loss: tensor(3702.7170, grad_fn=<MseLossBackward0>)
train loss: tensor(3364.5229, grad_fn=<MseLossBackward0>)
train loss: tensor(12249.0039, grad_fn=<MseLossBackward0>)
train loss: tensor(7767.5835, grad_fn=<MseLossBackward0>)
train loss: tensor(3799.8506, grad_fn=<MseLossBackward0>)
train loss: tensor(6180.3257, grad_fn=<MseLossBackward0>)
train loss: tensor(5756.2598, grad_fn=<MseLossBackward0>)
train loss: tensor(3931.9548, grad_fn=<MseLossBackward0>)
train loss: tensor(11254.5098, grad_fn=<MseLossBackward0>)
train loss: tensor(3263.9294, grad_fn=<MseLossBackward0>)
train loss: tensor(3006.8511, grad_fn=<MseLossBackward0>)
train loss: tensor(4282.6743, grad_fn=<MseLossBackward0>)
train loss: tensor(10564.8965, grad_fn=<MseLossBackward0>)
train loss: tensor(10312.0459, grad_fn=<MseLossBackward0>)
train loss: tensor(7807.0889, grad_fn=<MseLossBackward0>)
train loss: tensor(6228.2622, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(5700.0225, grad_fn=<MseLossBackward0>)
train loss: tensor(9318.9980, grad_fn=<MseLossBackward0>)
train loss: tensor(39591.8477, grad_fn=<MseLossBackward0>)
train loss: tensor(3871.4504, grad_fn=<MseLossBackward0>)
train loss: tensor(5287.3760, grad_fn=<MseLossBackward0>)
train loss: tensor(943.8601, grad_fn=<MseLossBackward0>)
train loss: tensor(9779.3154, grad_fn=<MseLossBackward0>)
train loss: tensor(4248.2725, grad_fn=<MseLossBackward0>)
train loss: tensor(6136.9209, grad_fn=<MseLossBackward0>)
train loss: tensor(13809.6426, grad_fn=<MseLossBackward0>)
train loss: tensor(4204.8994, grad_fn=<MseLossBackward0>)
train loss: tensor(10958.8711, grad_fn=<MseLossBackward0>)
train loss: tensor(2392.9265, grad_fn=<MseLossBackward0>)
train loss: tensor(4219.3784, grad_fn=<MseLossBackward0>)
train loss: tensor(7795.6704, grad_fn=<MseLossBackward0>)
train loss: tensor(8469.5957, grad_fn=<MseLossBackward0>)
train loss: tensor(7147.1792, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(5963.0605, grad_fn=<MseLossBackward0>)
train loss: tensor(7121.7456, grad_fn=<MseLossBackward0>)
train loss: tensor(3139.0703, grad_fn=<MseLossBackward0>)
train loss: tensor(9036.5293, grad_fn=<MseLossBackward0>)
train loss: tensor(2230.7202, grad_fn=<MseLossBackward0>)
train loss: tensor(12884.2441, grad_fn=<MseLossBackward0>)
train loss: tensor(18607.9961, grad_fn=<MseLossBackward0>)
train loss: tensor(2907.0928, grad_fn=<MseLossBackward0>)
train loss: tensor(5492.1904, grad_fn=<MseLossBackward0>)
train loss: tensor(9040.6113, grad_fn=<MseLossBackward0>)
train loss: tensor(7419.0898, grad_fn=<MseLossBackward0>)
train loss: tensor(8366.0293, grad_fn=<MseLossBackward0>)
train loss: tensor(3323.9468, grad_fn=<MseLossBackward0>)
train loss: tensor(5318.7583, grad_fn=<MseLossBackward0>)
train loss: tensor(15925.7354, grad_fn=<MseLossBackward0>)
train loss: tensor(2730.7188, grad_fn=<MseLossBackward0>)
train loss: tensor(9214.0107, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(9115.8350, grad_fn=<MseLossBackward0>)
train loss: tensor(10768.8574, grad_fn=<MseLossBackward0>)
train loss: tensor(9704.0811, grad_fn=<MseLossBackward0>)
train loss: tensor(80707.7031, grad_fn=<MseLossBackward0>)
train loss: tensor(6898.7251, grad_fn=<MseLossBackward0>)
train loss: tensor(4768.9502, grad_fn=<MseLossBackward0>)
train loss: tensor(3114.1711, grad_fn=<MseLossBackward0>)
train loss: tensor(8129.2671, grad_fn=<MseLossBackward0>)
train loss: tensor(3853.3340, grad_fn=<MseLossBackward0>)
train loss: tensor(12985.9756, grad_fn=<MseLossBackward0>)
train loss: tensor(6811.5596, grad_fn=<MseLossBackward0>)
train loss: tensor(4556.5874, grad_fn=<MseLossBackward0>)
train loss: tensor(8272.5088, grad_fn=<MseLossBackward0>)
train loss: tensor(8367.2988, grad_fn=<MseLossBackward0>)
train loss: tensor(7946.3564, grad_fn=<MseLossBackward0>)
train loss: tensor(8933.8535, grad_fn=<MseLossBackward0>)
train loss: tensor(4099.6631, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(3419.1218, grad_fn=<MseLossBackward0>)
train loss: tensor(6855.2207, grad_fn=<MseLossBackward0>)
train loss: tensor(14846.6562, grad_fn=<MseLossBackward0>)
train loss: tensor(9660.2090, grad_fn=<MseLossBackward0>)
train loss: tensor(2848.6260, grad_fn=<MseLossBackward0>)
train loss: tensor(8086.2241, grad_fn=<MseLossBackward0>)
train loss: tensor(7747.9414, grad_fn=<MseLossBackward0>)
train loss: tensor(7092.6826, grad_fn=<MseLossBackward0>)
train loss: tensor(10077.5723, grad_fn=<MseLossBackward0>)
train loss: tensor(48325.7773, grad_fn=<MseLossBackward0>)
train loss: tensor(17752.5742, grad_fn=<MseLossBackward0>)
train loss: tensor(10513.1592, grad_fn=<MseLossBackward0>)
train loss: tensor(5196.8579, grad_fn=<MseLossBackward0>)
train loss: tensor(8690.5244, grad_fn=<MseLossBackward0>)
train loss: tensor(1847.3943, grad_fn=<MseLossBackward0>)
train loss: tensor(5923.2017, grad_fn=<MseLossBackward0>)
train loss: tensor(13493.4121, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(11692.4824, grad_fn=<MseLossBackward0>)
train loss: tensor(13435.1133, grad_fn=<MseLossBackward0>)
train loss: tensor(5112.2896, grad_fn=<MseLossBackward0>)
train loss: tensor(7201.0210, grad_fn=<MseLossBackward0>)
train loss: tensor(11191.9043, grad_fn=<MseLossBackward0>)
train loss: tensor(3493.4011, grad_fn=<MseLossBackward0>)
train loss: tensor(11443.5117, grad_fn=<MseLossBackward0>)
train loss: tensor(3915.3169, grad_fn=<MseLossBackward0>)
train loss: tensor(3601.7297, grad_fn=<MseLossBackward0>)
train loss: tensor(8824.0586, grad_fn=<MseLossBackward0>)
train loss: tensor(6891.0791, grad_fn=<MseLossBackward0>)
train loss: tensor(23802.8633, grad_fn=<MseLossBackward0>)
train loss: tensor(6710.5415, grad_fn=<MseLossBackward0>)
train loss: tensor(12441.6465, grad_fn=<MseLossBackward0>)
train loss: tensor(6214.7539, grad_fn=<MseLossBackward0>)
train loss: tensor(6219.8628, grad_fn=<MseLossBackward0>)
train loss: tensor(3330.8748, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(7400.6846, grad_fn=<MseLossBackward0>)
train loss: tensor(7458.4644, grad_fn=<MseLossBackward0>)
train loss: tensor(4767.4365, grad_fn=<MseLossBackward0>)
train loss: tensor(12759.4160, grad_fn=<MseLossBackward0>)
train loss: tensor(7319.2559, grad_fn=<MseLossBackward0>)
train loss: tensor(11262.0811, grad_fn=<MseLossBackward0>)
train loss: tensor(5771.6846, grad_fn=<MseLossBackward0>)
train loss: tensor(10410.8057, grad_fn=<MseLossBackward0>)
train loss: tensor(6455.7144, grad_fn=<MseLossBackward0>)
train loss: tensor(9853.0986, grad_fn=<MseLossBackward0>)
train loss: tensor(15976.8779, grad_fn=<MseLossBackward0>)
train loss: tensor(7346.3052, grad_fn=<MseLossBackward0>)
train loss: tensor(7854.9429, grad_fn=<MseLossBackward0>)
train loss: tensor(6760.0356, grad_fn=<MseLossBackward0>)
train loss: tensor(6286.4521, grad_fn=<MseLossBackward0>)
train loss: tensor(6599.9448, grad_fn=<MseLossBackward0>)
train loss: tensor(14967.1104, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(9591.3906, grad_fn=<MseLossBackward0>)
train loss: tensor(4248.4424, grad_fn=<MseLossBackward0>)
train loss: tensor(13109.7441, grad_fn=<MseLossBackward0>)
train loss: tensor(4955.8042, grad_fn=<MseLossBackward0>)
train loss: tensor(9781.1748, grad_fn=<MseLossBackward0>)
train loss: tensor(8402.8418, grad_fn=<MseLossBackward0>)
train loss: tensor(19226.0020, grad_fn=<MseLossBackward0>)
train loss: tensor(3864.0884, grad_fn=<MseLossBackward0>)
train loss: tensor(3359.5559, grad_fn=<MseLossBackward0>)
train loss: tensor(3272.8879, grad_fn=<MseLossBackward0>)
train loss: tensor(9591.3496, grad_fn=<MseLossBackward0>)
train loss: tensor(4941.8989, grad_fn=<MseLossBackward0>)
train loss: tensor(8416.3633, grad_fn=<MseLossBackward0>)
train loss: tensor(12114.3105, grad_fn=<MseLossBackward0>)
train loss: tensor(10514.6709, grad_fn=<MseLossBackward0>)
train loss: tensor(3761.7839, grad_fn=<MseLossBackward0>)
train loss: tensor(2978.8562, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(7952.3726, grad_fn=<MseLossBackward0>)
train loss: tensor(4922.6768, grad_fn=<MseLossBackward0>)
train loss: tensor(5732.9541, grad_fn=<MseLossBackward0>)
train loss: tensor(7775.1094, grad_fn=<MseLossBackward0>)
train loss: tensor(10330.5010, grad_fn=<MseLossBackward0>)
train loss: tensor(3746.9109, grad_fn=<MseLossBackward0>)
train loss: tensor(13160.8750, grad_fn=<MseLossBackward0>)
train loss: tensor(7884.9731, grad_fn=<MseLossBackward0>)
train loss: tensor(4000.4299, grad_fn=<MseLossBackward0>)
train loss: tensor(6898.6055, grad_fn=<MseLossBackward0>)
train loss: tensor(5342.5356, grad_fn=<MseLossBackward0>)
train loss: tensor(7534.8130, grad_fn=<MseLossBackward0>)
train loss: tensor(4555.7402, grad_fn=<MseLossBackward0>)
train loss: tensor(6858.3232, grad_fn=<MseLossBackward0>)
train loss: tensor(5533.1172, grad_fn=<MseLossBackward0>)
train loss: tensor(7721.8853, grad_fn=<MseLossBackward0>)
train loss: tensor(4633.5518, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(3221.6978, grad_fn=<MseLossBackward0>)
train loss: tensor(4283.2456, grad_fn=<MseLossBackward0>)
train loss: tensor(6927.7188, grad_fn=<MseLossBackward0>)
train loss: tensor(14266.1738, grad_fn=<MseLossBackward0>)
train loss: tensor(2967.5581, grad_fn=<MseLossBackward0>)
train loss: tensor(7231.9365, grad_fn=<MseLossBackward0>)
train loss: tensor(10832.1143, grad_fn=<MseLossBackward0>)
train loss: tensor(43732.2734, grad_fn=<MseLossBackward0>)
train loss: tensor(5705.5630, grad_fn=<MseLossBackward0>)
train loss: tensor(8160.1841, grad_fn=<MseLossBackward0>)
train loss: tensor(8186.0269, grad_fn=<MseLossBackward0>)
train loss: tensor(6843.8828, grad_fn=<MseLossBackward0>)
train loss: tensor(5700.3516, grad_fn=<MseLossBackward0>)
train loss: tensor(1984.7163, grad_fn=<MseLossBackward0>)
train loss: tensor(3300.3931, grad_fn=<MseLossBackward0>)
train loss: tensor(5984.3501, grad_fn=<MseLossBackward0>)
train loss: tensor(4151.1699, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(18557.3691, grad_fn=<MseLossBackward0>)
train loss: tensor(7780.1064, grad_fn=<MseLossBackward0>)
train loss: tensor(5770.9868, grad_fn=<MseLossBackward0>)
train loss: tensor(7385.2256, grad_fn=<MseLossBackward0>)
train loss: tensor(8385.6260, grad_fn=<MseLossBackward0>)
train loss: tensor(6838.9326, grad_fn=<MseLossBackward0>)
train loss: tensor(11987.7871, grad_fn=<MseLossBackward0>)
train loss: tensor(6870.0396, grad_fn=<MseLossBackward0>)
train loss: tensor(12632.0938, grad_fn=<MseLossBackward0>)
train loss: tensor(13702.2227, grad_fn=<MseLossBackward0>)
train loss: tensor(1516.1681, grad_fn=<MseLossBackward0>)
train loss: tensor(6490.4624, grad_fn=<MseLossBackward0>)
train loss: tensor(9895.2012, grad_fn=<MseLossBackward0>)
train loss: tensor(4046.7507, grad_fn=<MseLossBackward0>)
train loss: tensor(2031.5477, grad_fn=<MseLossBackward0>)
train loss: tensor(5688.2437, grad_fn=<MseLossBackward0>)
train loss: tensor(6930.5400, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(71311.4062, grad_fn=<MseLossBackward0>)
train loss: tensor(4116.5186, grad_fn=<MseLossBackward0>)
train loss: tensor(7847.5718, grad_fn=<MseLossBackward0>)
train loss: tensor(6677.9175, grad_fn=<MseLossBackward0>)
train loss: tensor(16463.4434, grad_fn=<MseLossBackward0>)
train loss: tensor(2612.2883, grad_fn=<MseLossBackward0>)
train loss: tensor(5534.2886, grad_fn=<MseLossBackward0>)
train loss: tensor(4714.3892, grad_fn=<MseLossBackward0>)
train loss: tensor(9064.3242, grad_fn=<MseLossBackward0>)
train loss: tensor(6201.7178, grad_fn=<MseLossBackward0>)
train loss: tensor(8230.6348, grad_fn=<MseLossBackward0>)
train loss: tensor(7255.7725, grad_fn=<MseLossBackward0>)
train loss: tensor(18023.1445, grad_fn=<MseLossBackward0>)
train loss: tensor(2590.0859, grad_fn=<MseLossBackward0>)
train loss: tensor(7743.0645, grad_fn=<MseLossBackward0>)
train loss: tensor(9199.0703, grad_fn=<MseLossBackward0>)
train loss: tensor(9366.5820, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(3850.3384, grad_fn=<MseLossBackward0>)
train loss: tensor(7165.0342, grad_fn=<MseLossBackward0>)
train loss: tensor(10823.1123, grad_fn=<MseLossBackward0>)
train loss: tensor(4748.0410, grad_fn=<MseLossBackward0>)
train loss: tensor(8152.9995, grad_fn=<MseLossBackward0>)
train loss: tensor(6415.6577, grad_fn=<MseLossBackward0>)
train loss: tensor(11627.4150, grad_fn=<MseLossBackward0>)
train loss: tensor(15799.0176, grad_fn=<MseLossBackward0>)
train loss: tensor(5154.2012, grad_fn=<MseLossBackward0>)
train loss: tensor(11244.9697, grad_fn=<MseLossBackward0>)
train loss: tensor(6770.8921, grad_fn=<MseLossBackward0>)
train loss: tensor(10181.5283, grad_fn=<MseLossBackward0>)
train loss: tensor(14420.3750, grad_fn=<MseLossBackward0>)
train loss: tensor(3265.3281, grad_fn=<MseLossBackward0>)
train loss: tensor(9117.7754, grad_fn=<MseLossBackward0>)
train loss: tensor(5062.7207, grad_fn=<MseLossBackward0>)
train loss: tensor(4566.7441, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(7422.1147, grad_fn=<MseLossBackward0>)
train loss: tensor(6180.8389, grad_fn=<MseLossBackward0>)
train loss: tensor(2851.9336, grad_fn=<MseLossBackward0>)
train loss: tensor(12457.8496, grad_fn=<MseLossBackward0>)
train loss: tensor(5195.8398, grad_fn=<MseLossBackward0>)
train loss: tensor(2553.1150, grad_fn=<MseLossBackward0>)
train loss: tensor(6710.1284, grad_fn=<MseLossBackward0>)
train loss: tensor(16565.7207, grad_fn=<MseLossBackward0>)
train loss: tensor(4252.3931, grad_fn=<MseLossBackward0>)
train loss: tensor(9000.7344, grad_fn=<MseLossBackward0>)
train loss: tensor(6621.3188, grad_fn=<MseLossBackward0>)
train loss: tensor(14453.8330, grad_fn=<MseLossBackward0>)
train loss: tensor(3446.1545, grad_fn=<MseLossBackward0>)
train loss: tensor(5883.5835, grad_fn=<MseLossBackward0>)
train loss: tensor(9943.0283, grad_fn=<MseLossBackward0>)
train loss: tensor(5798.0845, grad_fn=<MseLossBackward0>)
train loss: tensor(4405.1245, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(3497.8159, grad_fn=<MseLossBackward0>)
train loss: tensor(10442.8955, grad_fn=<MseLossBackward0>)
train loss: tensor(14893.6475, grad_fn=<MseLossBackward0>)
train loss: tensor(5593.0972, grad_fn=<MseLossBackward0>)
train loss: tensor(11646.5850, grad_fn=<MseLossBackward0>)
train loss: tensor(6031.4707, grad_fn=<MseLossBackward0>)
train loss: tensor(16183.8574, grad_fn=<MseLossBackward0>)
train loss: tensor(2316.2068, grad_fn=<MseLossBackward0>)
train loss: tensor(10011.7627, grad_fn=<MseLossBackward0>)
train loss: tensor(14864.0049, grad_fn=<MseLossBackward0>)
train loss: tensor(11192.6875, grad_fn=<MseLossBackward0>)
train loss: tensor(4877.7124, grad_fn=<MseLossBackward0>)
train loss: tensor(8559.4307, grad_fn=<MseLossBackward0>)
train loss: tensor(5304.6865, grad_fn=<MseLossBackward0>)
train loss: tensor(46198.3320, grad_fn=<MseLossBackward0>)
train loss: tensor(9133.4189, grad_fn=<MseLossBackward0>)
train loss: tensor(5539.3828, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(4585.5156, grad_fn=<MseLossBackward0>)
train loss: tensor(3393.2156, grad_fn=<MseLossBackward0>)
train loss: tensor(9681.9736, grad_fn=<MseLossBackward0>)
train loss: tensor(14724.3486, grad_fn=<MseLossBackward0>)
train loss: tensor(6973.7485, grad_fn=<MseLossBackward0>)
train loss: tensor(9146.2744, grad_fn=<MseLossBackward0>)
train loss: tensor(15764.1738, grad_fn=<MseLossBackward0>)
train loss: tensor(13708.4590, grad_fn=<MseLossBackward0>)
train loss: tensor(75669.1016, grad_fn=<MseLossBackward0>)
train loss: tensor(4131.0029, grad_fn=<MseLossBackward0>)
train loss: tensor(5412.0161, grad_fn=<MseLossBackward0>)
train loss: tensor(11190.4795, grad_fn=<MseLossBackward0>)
train loss: tensor(13158.8350, grad_fn=<MseLossBackward0>)
train loss: tensor(6582.2402, grad_fn=<MseLossBackward0>)
train loss: tensor(8754.2793, grad_fn=<MseLossBackward0>)
train loss: tensor(5997.9790, grad_fn=<MseLossBackward0>)
train loss: tensor(6310.4619, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(3663.8054, grad_fn=<MseLossBackward0>)
train loss: tensor(6885.9287, grad_fn=<MseLossBackward0>)
train loss: tensor(1473.6409, grad_fn=<MseLossBackward0>)
train loss: tensor(1777.4460, grad_fn=<MseLossBackward0>)
train loss: tensor(6361.5825, grad_fn=<MseLossBackward0>)
train loss: tensor(3066.8352, grad_fn=<MseLossBackward0>)
train loss: tensor(5831.1978, grad_fn=<MseLossBackward0>)
train loss: tensor(8136.3574, grad_fn=<MseLossBackward0>)
train loss: tensor(14683.4385, grad_fn=<MseLossBackward0>)
train loss: tensor(14977.0879, grad_fn=<MseLossBackward0>)
train loss: tensor(10944.8018, grad_fn=<MseLossBackward0>)
train loss: tensor(5905.5078, grad_fn=<MseLossBackward0>)
train loss: tensor(9769.3789, grad_fn=<MseLossBackward0>)
train loss: tensor(4436.0742, grad_fn=<MseLossBackward0>)
train loss: tensor(5724.0278, grad_fn=<MseLossBackward0>)
train loss: tensor(14187.3057, grad_fn=<MseLossBackward0>)
train loss: tensor(4401.3975, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(29293.3379, grad_fn=<MseLossBackward0>)
train loss: tensor(7146.0293, grad_fn=<MseLossBackward0>)
train loss: tensor(2921.1421, grad_fn=<MseLossBackward0>)
train loss: tensor(7792.2124, grad_fn=<MseLossBackward0>)
train loss: tensor(6785.5376, grad_fn=<MseLossBackward0>)
train loss: tensor(6164.2979, grad_fn=<MseLossBackward0>)
train loss: tensor(14799.0752, grad_fn=<MseLossBackward0>)
train loss: tensor(5592.8545, grad_fn=<MseLossBackward0>)
train loss: tensor(4392.2480, grad_fn=<MseLossBackward0>)
train loss: tensor(5971.7612, grad_fn=<MseLossBackward0>)
train loss: tensor(7707.9243, grad_fn=<MseLossBackward0>)
train loss: tensor(6880.4575, grad_fn=<MseLossBackward0>)
train loss: tensor(5900.3276, grad_fn=<MseLossBackward0>)
train loss: tensor(4659.9438, grad_fn=<MseLossBackward0>)
train loss: tensor(12492.0068, grad_fn=<MseLossBackward0>)
train loss: tensor(1717.9685, grad_fn=<MseLossBackward0>)
train loss: tensor(6226.8398, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(10018.7832, grad_fn=<MseLossBackward0>)
train loss: tensor(2220.9116, grad_fn=<MseLossBackward0>)
train loss: tensor(6353.4458, grad_fn=<MseLossBackward0>)
train loss: tensor(47611.8477, grad_fn=<MseLossBackward0>)
train loss: tensor(6166.9497, grad_fn=<MseLossBackward0>)
train loss: tensor(6275.6177, grad_fn=<MseLossBackward0>)
train loss: tensor(2559.7764, grad_fn=<MseLossBackward0>)
train loss: tensor(4405.6733, grad_fn=<MseLossBackward0>)
train loss: tensor(9339.7979, grad_fn=<MseLossBackward0>)
train loss: tensor(7742.0728, grad_fn=<MseLossBackward0>)
train loss: tensor(3973.3228, grad_fn=<MseLossBackward0>)
train loss: tensor(12161., grad_fn=<MseLossBackward0>)
train loss: tensor(7466.8428, grad_fn=<MseLossBackward0>)
train loss: tensor(6840.5151, grad_fn=<MseLossBackward0>)
train loss: tensor(14085.0430, grad_fn=<MseLossBackward0>)
train loss: tensor(12389.7930, grad_fn=<MseLossBackward0>)
train loss: tensor(2007.7642, grad_fn=<MseLossBackward0>)
train loss: t

train loss: tensor(10956.1191, grad_fn=<MseLossBackward0>)
train loss: tensor(5332.3042, grad_fn=<MseLossBackward0>)
train loss: tensor(25782.3418, grad_fn=<MseLossBackward0>)
train loss: tensor(11004.9531, grad_fn=<MseLossBackward0>)
train loss: tensor(4140.4551, grad_fn=<MseLossBackward0>)
train loss: tensor(6790.5796, grad_fn=<MseLossBackward0>)
train loss: tensor(8390.5459, grad_fn=<MseLossBackward0>)
train loss: tensor(10459.4238, grad_fn=<MseLossBackward0>)
train loss: tensor(5232.0234, grad_fn=<MseLossBackward0>)
train loss: tensor(12061.8242, grad_fn=<MseLossBackward0>)
train loss: tensor(14994.1729, grad_fn=<MseLossBackward0>)
train loss: tensor(5223.4199, grad_fn=<MseLossBackward0>)
train loss: tensor(11436.5586, grad_fn=<MseLossBackward0>)
train loss: tensor(1538.6843, grad_fn=<MseLossBackward0>)
train loss: tensor(3169.1438, grad_fn=<MseLossBackward0>)
train loss: tensor(10765.9512, grad_fn=<MseLossBackward0>)
train loss: tensor(7476.8276, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(7037.9448, grad_fn=<MseLossBackward0>)
train loss: tensor(4882.3291, grad_fn=<MseLossBackward0>)
train loss: tensor(6231.2949, grad_fn=<MseLossBackward0>)
train loss: tensor(15371.7227, grad_fn=<MseLossBackward0>)
train loss: tensor(6782.9370, grad_fn=<MseLossBackward0>)
train loss: tensor(9252.3184, grad_fn=<MseLossBackward0>)
train loss: tensor(2479.1262, grad_fn=<MseLossBackward0>)
train loss: tensor(9467.4453, grad_fn=<MseLossBackward0>)
train loss: tensor(36875.0039, grad_fn=<MseLossBackward0>)
train loss: tensor(6752.3174, grad_fn=<MseLossBackward0>)
train loss: tensor(3436.9583, grad_fn=<MseLossBackward0>)
train loss: tensor(3530.8684, grad_fn=<MseLossBackward0>)
train loss: tensor(7659.0938, grad_fn=<MseLossBackward0>)
train loss: tensor(7758.9795, grad_fn=<MseLossBackward0>)
train loss: tensor(5554.2842, grad_fn=<MseLossBackward0>)
train loss: tensor(3073.5781, grad_fn=<MseLossBackward0>)
train loss: tensor(7336.3784, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(10596.9355, grad_fn=<MseLossBackward0>)
train loss: tensor(9503.2363, grad_fn=<MseLossBackward0>)
train loss: tensor(12775.8057, grad_fn=<MseLossBackward0>)
train loss: tensor(4209.3198, grad_fn=<MseLossBackward0>)
train loss: tensor(3072.8772, grad_fn=<MseLossBackward0>)
train loss: tensor(1903.7946, grad_fn=<MseLossBackward0>)
train loss: tensor(4488.7754, grad_fn=<MseLossBackward0>)
train loss: tensor(6197.7339, grad_fn=<MseLossBackward0>)
train loss: tensor(10881.1006, grad_fn=<MseLossBackward0>)
train loss: tensor(4906.7095, grad_fn=<MseLossBackward0>)
train loss: tensor(3181.1968, grad_fn=<MseLossBackward0>)
train loss: tensor(10675.5928, grad_fn=<MseLossBackward0>)
train loss: tensor(5396.7578, grad_fn=<MseLossBackward0>)
train loss: tensor(9891.9131, grad_fn=<MseLossBackward0>)
train loss: tensor(7114.0649, grad_fn=<MseLossBackward0>)
train loss: tensor(10101.3164, grad_fn=<MseLossBackward0>)
train loss: tensor(3914.0669, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(7631.9326, grad_fn=<MseLossBackward0>)
train loss: tensor(9940.8711, grad_fn=<MseLossBackward0>)
train loss: tensor(74973.7266, grad_fn=<MseLossBackward0>)
train loss: tensor(13558.5557, grad_fn=<MseLossBackward0>)
train loss: tensor(12501.5869, grad_fn=<MseLossBackward0>)
train loss: tensor(3009.0376, grad_fn=<MseLossBackward0>)
train loss: tensor(9895.9902, grad_fn=<MseLossBackward0>)
train loss: tensor(8571.2617, grad_fn=<MseLossBackward0>)
train loss: tensor(12547.4229, grad_fn=<MseLossBackward0>)
train loss: tensor(4431.2637, grad_fn=<MseLossBackward0>)
train loss: tensor(9060.3369, grad_fn=<MseLossBackward0>)
train loss: tensor(4517.0562, grad_fn=<MseLossBackward0>)
train loss: tensor(5910.3999, grad_fn=<MseLossBackward0>)
train loss: tensor(6212.6450, grad_fn=<MseLossBackward0>)
train loss: tensor(7438.7168, grad_fn=<MseLossBackward0>)
train loss: tensor(13048.0195, grad_fn=<MseLossBackward0>)
train loss: tensor(9547.2236, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(7243.5967, grad_fn=<MseLossBackward0>)
train loss: tensor(14518.1631, grad_fn=<MseLossBackward0>)
train loss: tensor(4535.5571, grad_fn=<MseLossBackward0>)
train loss: tensor(13599.0684, grad_fn=<MseLossBackward0>)
train loss: tensor(4632.4995, grad_fn=<MseLossBackward0>)
train loss: tensor(4404.8091, grad_fn=<MseLossBackward0>)
train loss: tensor(10066.9199, grad_fn=<MseLossBackward0>)
train loss: tensor(8041.4194, grad_fn=<MseLossBackward0>)
train loss: tensor(6946.5991, grad_fn=<MseLossBackward0>)
train loss: tensor(2668.4546, grad_fn=<MseLossBackward0>)
train loss: tensor(94121.2188, grad_fn=<MseLossBackward0>)
train loss: tensor(6928.5615, grad_fn=<MseLossBackward0>)
train loss: tensor(3874.8669, grad_fn=<MseLossBackward0>)
train loss: tensor(5037.0435, grad_fn=<MseLossBackward0>)
train loss: tensor(4812.4678, grad_fn=<MseLossBackward0>)
train loss: tensor(7988.6299, grad_fn=<MseLossBackward0>)
train loss: tensor(8001.7100, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(2520.5701, grad_fn=<MseLossBackward0>)
train loss: tensor(6184.7793, grad_fn=<MseLossBackward0>)
train loss: tensor(6412.4453, grad_fn=<MseLossBackward0>)
train loss: tensor(1535.5024, grad_fn=<MseLossBackward0>)
train loss: tensor(3807.4641, grad_fn=<MseLossBackward0>)
train loss: tensor(12071.4717, grad_fn=<MseLossBackward0>)
train loss: tensor(4946.0801, grad_fn=<MseLossBackward0>)
train loss: tensor(3822.7000, grad_fn=<MseLossBackward0>)
train loss: tensor(11748.0039, grad_fn=<MseLossBackward0>)
train loss: tensor(1064.5035, grad_fn=<MseLossBackward0>)
train loss: tensor(2172.3552, grad_fn=<MseLossBackward0>)
train loss: tensor(5253.3154, grad_fn=<MseLossBackward0>)
train loss: tensor(3322.8743, grad_fn=<MseLossBackward0>)
train loss: tensor(10147.6973, grad_fn=<MseLossBackward0>)
train loss: tensor(8761.4717, grad_fn=<MseLossBackward0>)
train loss: tensor(2894.4568, grad_fn=<MseLossBackward0>)
train loss: tensor(8729.5195, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(8745.0986, grad_fn=<MseLossBackward0>)
train loss: tensor(5894.1050, grad_fn=<MseLossBackward0>)
train loss: tensor(12206.2969, grad_fn=<MseLossBackward0>)
train loss: tensor(6398.0552, grad_fn=<MseLossBackward0>)
train loss: tensor(7213.4731, grad_fn=<MseLossBackward0>)
train loss: tensor(7475.8574, grad_fn=<MseLossBackward0>)
train loss: tensor(10482.2451, grad_fn=<MseLossBackward0>)
train loss: tensor(10860.5586, grad_fn=<MseLossBackward0>)
train loss: tensor(14090.3418, grad_fn=<MseLossBackward0>)
train loss: tensor(15597.8330, grad_fn=<MseLossBackward0>)
train loss: tensor(11635.1543, grad_fn=<MseLossBackward0>)
train loss: tensor(13000.0303, grad_fn=<MseLossBackward0>)
train loss: tensor(6334.2451, grad_fn=<MseLossBackward0>)
train loss: tensor(43347.6758, grad_fn=<MseLossBackward0>)
train loss: tensor(4233.1050, grad_fn=<MseLossBackward0>)
train loss: tensor(4400.2134, grad_fn=<MseLossBackward0>)
train loss: tensor(1988.0038, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(3747.1038, grad_fn=<MseLossBackward0>)
train loss: tensor(1843.3655, grad_fn=<MseLossBackward0>)
train loss: tensor(1019.9470, grad_fn=<MseLossBackward0>)
train loss: tensor(5605.0298, grad_fn=<MseLossBackward0>)
train loss: tensor(5090.6514, grad_fn=<MseLossBackward0>)
train loss: tensor(6387.4795, grad_fn=<MseLossBackward0>)
train loss: tensor(22450.2539, grad_fn=<MseLossBackward0>)
train loss: tensor(10202.5488, grad_fn=<MseLossBackward0>)
train loss: tensor(5935.1719, grad_fn=<MseLossBackward0>)
train loss: tensor(6626.7241, grad_fn=<MseLossBackward0>)
train loss: tensor(1681.3036, grad_fn=<MseLossBackward0>)
train loss: tensor(10727.2695, grad_fn=<MseLossBackward0>)
train loss: tensor(5347.8550, grad_fn=<MseLossBackward0>)
train loss: tensor(9062.8418, grad_fn=<MseLossBackward0>)
train loss: tensor(6537.9160, grad_fn=<MseLossBackward0>)
train loss: tensor(9514.8496, grad_fn=<MseLossBackward0>)
train loss: tensor(5868.2759, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(6049.2524, grad_fn=<MseLossBackward0>)
train loss: tensor(16307.4668, grad_fn=<MseLossBackward0>)
train loss: tensor(9806.9141, grad_fn=<MseLossBackward0>)
train loss: tensor(4486.8618, grad_fn=<MseLossBackward0>)
train loss: tensor(14262.4912, grad_fn=<MseLossBackward0>)
train loss: tensor(12645.6523, grad_fn=<MseLossBackward0>)
train loss: tensor(2833.5088, grad_fn=<MseLossBackward0>)
train loss: tensor(4218.1338, grad_fn=<MseLossBackward0>)
train loss: tensor(88095.3516, grad_fn=<MseLossBackward0>)
train loss: tensor(3995.3330, grad_fn=<MseLossBackward0>)
train loss: tensor(7325.9751, grad_fn=<MseLossBackward0>)
train loss: tensor(3186.2268, grad_fn=<MseLossBackward0>)
train loss: tensor(5983.3198, grad_fn=<MseLossBackward0>)
train loss: tensor(6678.4072, grad_fn=<MseLossBackward0>)
train loss: tensor(10977.6582, grad_fn=<MseLossBackward0>)
train loss: tensor(3086.8140, grad_fn=<MseLossBackward0>)
train loss: tensor(9557.3496, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(5505.7612, grad_fn=<MseLossBackward0>)
train loss: tensor(8094.4595, grad_fn=<MseLossBackward0>)
train loss: tensor(7207.7549, grad_fn=<MseLossBackward0>)
train loss: tensor(7643.8545, grad_fn=<MseLossBackward0>)
train loss: tensor(8550.1650, grad_fn=<MseLossBackward0>)
train loss: tensor(5490.2495, grad_fn=<MseLossBackward0>)
train loss: tensor(7944.3647, grad_fn=<MseLossBackward0>)
train loss: tensor(61250.5938, grad_fn=<MseLossBackward0>)
train loss: tensor(4844.8062, grad_fn=<MseLossBackward0>)
train loss: tensor(4674.3096, grad_fn=<MseLossBackward0>)
train loss: tensor(8967.8740, grad_fn=<MseLossBackward0>)
train loss: tensor(7024.6914, grad_fn=<MseLossBackward0>)
train loss: tensor(4461.6792, grad_fn=<MseLossBackward0>)
train loss: tensor(10041.7148, grad_fn=<MseLossBackward0>)
train loss: tensor(3979.4561, grad_fn=<MseLossBackward0>)
train loss: tensor(5639.5825, grad_fn=<MseLossBackward0>)
train loss: tensor(4095.2251, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(4157.7437, grad_fn=<MseLossBackward0>)
train loss: tensor(4503.4463, grad_fn=<MseLossBackward0>)
train loss: tensor(4535.4336, grad_fn=<MseLossBackward0>)
train loss: tensor(5935.0166, grad_fn=<MseLossBackward0>)
train loss: tensor(7780.6890, grad_fn=<MseLossBackward0>)
train loss: tensor(8493.8311, grad_fn=<MseLossBackward0>)
train loss: tensor(7068.5752, grad_fn=<MseLossBackward0>)
train loss: tensor(4028.6379, grad_fn=<MseLossBackward0>)
train loss: tensor(9997.5225, grad_fn=<MseLossBackward0>)
train loss: tensor(3964.0874, grad_fn=<MseLossBackward0>)
train loss: tensor(9900.2891, grad_fn=<MseLossBackward0>)
train loss: tensor(15339.9746, grad_fn=<MseLossBackward0>)
train loss: tensor(7832.4136, grad_fn=<MseLossBackward0>)
train loss: tensor(2386.0159, grad_fn=<MseLossBackward0>)
train loss: tensor(9248.5674, grad_fn=<MseLossBackward0>)
train loss: tensor(6758.8901, grad_fn=<MseLossBackward0>)
train loss: tensor(6809.5630, grad_fn=<MseLossBackward0>)
train loss: t

train loss: tensor(5077.4751, grad_fn=<MseLossBackward0>)
train loss: tensor(9249.4658, grad_fn=<MseLossBackward0>)
train loss: tensor(9523.1836, grad_fn=<MseLossBackward0>)
train loss: tensor(1470.9869, grad_fn=<MseLossBackward0>)
train loss: tensor(8201.5635, grad_fn=<MseLossBackward0>)
train loss: tensor(2143.4219, grad_fn=<MseLossBackward0>)
train loss: tensor(8229.8506, grad_fn=<MseLossBackward0>)
train loss: tensor(3124.9885, grad_fn=<MseLossBackward0>)
train loss: tensor(3174.4587, grad_fn=<MseLossBackward0>)
train loss: tensor(3963.2073, grad_fn=<MseLossBackward0>)
train loss: tensor(3362.7542, grad_fn=<MseLossBackward0>)
train loss: tensor(15064.7383, grad_fn=<MseLossBackward0>)
train loss: tensor(14538.7656, grad_fn=<MseLossBackward0>)
train loss: tensor(3036.1841, grad_fn=<MseLossBackward0>)
train loss: tensor(5215.9590, grad_fn=<MseLossBackward0>)
train loss: tensor(4698.4956, grad_fn=<MseLossBackward0>)
train loss: tensor(6821.9146, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(18061.6367, grad_fn=<MseLossBackward0>)
train loss: tensor(4685.0542, grad_fn=<MseLossBackward0>)
train loss: tensor(11276.3828, grad_fn=<MseLossBackward0>)
train loss: tensor(3747.7817, grad_fn=<MseLossBackward0>)
train loss: tensor(7131.4414, grad_fn=<MseLossBackward0>)
train loss: tensor(8258.1279, grad_fn=<MseLossBackward0>)
train loss: tensor(9863.1260, grad_fn=<MseLossBackward0>)
train loss: tensor(9161.0166, grad_fn=<MseLossBackward0>)
train loss: tensor(6135.9585, grad_fn=<MseLossBackward0>)
train loss: tensor(10250.8584, grad_fn=<MseLossBackward0>)
train loss: tensor(11857.8828, grad_fn=<MseLossBackward0>)
train loss: tensor(4460.9009, grad_fn=<MseLossBackward0>)
train loss: tensor(3025.4636, grad_fn=<MseLossBackward0>)
train loss: tensor(6381.0508, grad_fn=<MseLossBackward0>)
train loss: tensor(4296.6558, grad_fn=<MseLossBackward0>)
train loss: tensor(4502.9575, grad_fn=<MseLossBackward0>)
train loss: tensor(3114.4905, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(9351.1045, grad_fn=<MseLossBackward0>)
train loss: tensor(10024.6982, grad_fn=<MseLossBackward0>)
train loss: tensor(6301.9731, grad_fn=<MseLossBackward0>)
train loss: tensor(5165.5381, grad_fn=<MseLossBackward0>)
train loss: tensor(10171.1230, grad_fn=<MseLossBackward0>)
train loss: tensor(7578.4688, grad_fn=<MseLossBackward0>)
train loss: tensor(14723.4287, grad_fn=<MseLossBackward0>)
train loss: tensor(7551.4531, grad_fn=<MseLossBackward0>)
train loss: tensor(4162.8901, grad_fn=<MseLossBackward0>)
train loss: tensor(7462.0591, grad_fn=<MseLossBackward0>)
train loss: tensor(5322.5449, grad_fn=<MseLossBackward0>)
train loss: tensor(2530.5830, grad_fn=<MseLossBackward0>)
train loss: tensor(11092.6523, grad_fn=<MseLossBackward0>)
train loss: tensor(6617.0674, grad_fn=<MseLossBackward0>)
train loss: tensor(5279.3784, grad_fn=<MseLossBackward0>)
train loss: tensor(12050.4033, grad_fn=<MseLossBackward0>)
train loss: tensor(17753.3281, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(7081.2231, grad_fn=<MseLossBackward0>)
train loss: tensor(2961.5906, grad_fn=<MseLossBackward0>)
train loss: tensor(6381.4810, grad_fn=<MseLossBackward0>)
train loss: tensor(12363.4512, grad_fn=<MseLossBackward0>)
train loss: tensor(8324.7598, grad_fn=<MseLossBackward0>)
train loss: tensor(4249.9062, grad_fn=<MseLossBackward0>)
train loss: tensor(7414.2231, grad_fn=<MseLossBackward0>)
train loss: tensor(2368.8142, grad_fn=<MseLossBackward0>)
train loss: tensor(10382.2354, grad_fn=<MseLossBackward0>)
train loss: tensor(11183.8115, grad_fn=<MseLossBackward0>)
train loss: tensor(3069.0618, grad_fn=<MseLossBackward0>)
train loss: tensor(4483.6138, grad_fn=<MseLossBackward0>)
train loss: tensor(4856.3682, grad_fn=<MseLossBackward0>)
train loss: tensor(6741.8066, grad_fn=<MseLossBackward0>)
train loss: tensor(9082.0596, grad_fn=<MseLossBackward0>)
train loss: tensor(4974.2075, grad_fn=<MseLossBackward0>)
train loss: tensor(4125.0381, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(6654.5649, grad_fn=<MseLossBackward0>)
train loss: tensor(11125.1582, grad_fn=<MseLossBackward0>)
train loss: tensor(5156.6216, grad_fn=<MseLossBackward0>)
train loss: tensor(11155.2061, grad_fn=<MseLossBackward0>)
train loss: tensor(11967.6504, grad_fn=<MseLossBackward0>)
train loss: tensor(3789.1899, grad_fn=<MseLossBackward0>)
train loss: tensor(3811.5286, grad_fn=<MseLossBackward0>)
train loss: tensor(16512.9023, grad_fn=<MseLossBackward0>)
train loss: tensor(6372.4849, grad_fn=<MseLossBackward0>)
train loss: tensor(13029.1768, grad_fn=<MseLossBackward0>)
train loss: tensor(7015.2671, grad_fn=<MseLossBackward0>)
train loss: tensor(2546.8337, grad_fn=<MseLossBackward0>)
train loss: tensor(9456.8291, grad_fn=<MseLossBackward0>)
train loss: tensor(11914.3525, grad_fn=<MseLossBackward0>)
train loss: tensor(2071.3042, grad_fn=<MseLossBackward0>)
train loss: tensor(4800.1401, grad_fn=<MseLossBackward0>)
train loss: tensor(2991.8889, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(4137.8159, grad_fn=<MseLossBackward0>)
train loss: tensor(5650.6201, grad_fn=<MseLossBackward0>)
train loss: tensor(8524.2764, grad_fn=<MseLossBackward0>)
train loss: tensor(3343.2593, grad_fn=<MseLossBackward0>)
train loss: tensor(14656.4990, grad_fn=<MseLossBackward0>)
train loss: tensor(7348.4771, grad_fn=<MseLossBackward0>)
train loss: tensor(6323.0215, grad_fn=<MseLossBackward0>)
train loss: tensor(10356.3096, grad_fn=<MseLossBackward0>)
train loss: tensor(9082.8154, grad_fn=<MseLossBackward0>)
train loss: tensor(7084.0610, grad_fn=<MseLossBackward0>)
train loss: tensor(3097.0872, grad_fn=<MseLossBackward0>)
train loss: tensor(16816.5859, grad_fn=<MseLossBackward0>)
train loss: tensor(6306.5923, grad_fn=<MseLossBackward0>)
train loss: tensor(6759.8062, grad_fn=<MseLossBackward0>)
train loss: tensor(9075.7998, grad_fn=<MseLossBackward0>)
train loss: tensor(3136.0989, grad_fn=<MseLossBackward0>)
train loss: tensor(10420.5547, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(75656.5469, grad_fn=<MseLossBackward0>)
train loss: tensor(4571.5645, grad_fn=<MseLossBackward0>)
train loss: tensor(11373.9844, grad_fn=<MseLossBackward0>)
train loss: tensor(3725.0330, grad_fn=<MseLossBackward0>)
train loss: tensor(3850.9023, grad_fn=<MseLossBackward0>)
train loss: tensor(4618.9482, grad_fn=<MseLossBackward0>)
train loss: tensor(2884.5671, grad_fn=<MseLossBackward0>)
train loss: tensor(13794.4668, grad_fn=<MseLossBackward0>)
train loss: tensor(4157.5625, grad_fn=<MseLossBackward0>)
train loss: tensor(7541.7036, grad_fn=<MseLossBackward0>)
train loss: tensor(11709.6377, grad_fn=<MseLossBackward0>)
train loss: tensor(5505.4263, grad_fn=<MseLossBackward0>)
train loss: tensor(10510.9609, grad_fn=<MseLossBackward0>)
train loss: tensor(8485.0703, grad_fn=<MseLossBackward0>)
train loss: tensor(8290.1016, grad_fn=<MseLossBackward0>)
train loss: tensor(9975.8330, grad_fn=<MseLossBackward0>)
train loss: tensor(5211.0132, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(7738.8628, grad_fn=<MseLossBackward0>)
train loss: tensor(3882.6221, grad_fn=<MseLossBackward0>)
train loss: tensor(13008.1963, grad_fn=<MseLossBackward0>)
train loss: tensor(7228.5562, grad_fn=<MseLossBackward0>)
train loss: tensor(5893.8179, grad_fn=<MseLossBackward0>)
train loss: tensor(4926.8789, grad_fn=<MseLossBackward0>)
train loss: tensor(14688.9248, grad_fn=<MseLossBackward0>)
train loss: tensor(10244.3496, grad_fn=<MseLossBackward0>)
train loss: tensor(11416.6816, grad_fn=<MseLossBackward0>)
train loss: tensor(4377.2271, grad_fn=<MseLossBackward0>)
train loss: tensor(3467.4460, grad_fn=<MseLossBackward0>)
train loss: tensor(6434.4746, grad_fn=<MseLossBackward0>)
train loss: tensor(4488.1006, grad_fn=<MseLossBackward0>)
train loss: tensor(10021.3115, grad_fn=<MseLossBackward0>)
train loss: tensor(10556.1367, grad_fn=<MseLossBackward0>)
train loss: tensor(4752.0317, grad_fn=<MseLossBackward0>)
train loss: tensor(5024.7100, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(4566.3359, grad_fn=<MseLossBackward0>)
train loss: tensor(9438.3076, grad_fn=<MseLossBackward0>)
train loss: tensor(5149.0488, grad_fn=<MseLossBackward0>)
train loss: tensor(5752.0562, grad_fn=<MseLossBackward0>)
train loss: tensor(7742.1172, grad_fn=<MseLossBackward0>)
train loss: tensor(14074.0547, grad_fn=<MseLossBackward0>)
train loss: tensor(12590.1855, grad_fn=<MseLossBackward0>)
train loss: tensor(3181.8140, grad_fn=<MseLossBackward0>)
train loss: tensor(4110.4795, grad_fn=<MseLossBackward0>)
train loss: tensor(4679.7744, grad_fn=<MseLossBackward0>)
train loss: tensor(7625.4648, grad_fn=<MseLossBackward0>)
train loss: tensor(7349.1958, grad_fn=<MseLossBackward0>)
train loss: tensor(4032.6731, grad_fn=<MseLossBackward0>)
train loss: tensor(5421.9849, grad_fn=<MseLossBackward0>)
train loss: tensor(4814.6035, grad_fn=<MseLossBackward0>)
train loss: tensor(6138.0308, grad_fn=<MseLossBackward0>)
train loss: tensor(5566.6099, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(5325.6328, grad_fn=<MseLossBackward0>)
train loss: tensor(10200.1768, grad_fn=<MseLossBackward0>)
train loss: tensor(12826.1689, grad_fn=<MseLossBackward0>)
train loss: tensor(4455.6978, grad_fn=<MseLossBackward0>)
train loss: tensor(10933.9609, grad_fn=<MseLossBackward0>)
train loss: tensor(15875.5557, grad_fn=<MseLossBackward0>)
train loss: tensor(6955.8892, grad_fn=<MseLossBackward0>)
train loss: tensor(5398.9370, grad_fn=<MseLossBackward0>)
train loss: tensor(2794.4917, grad_fn=<MseLossBackward0>)
train loss: tensor(5907.3442, grad_fn=<MseLossBackward0>)
train loss: tensor(6994.4517, grad_fn=<MseLossBackward0>)
train loss: tensor(9274.5654, grad_fn=<MseLossBackward0>)
train loss: tensor(3761.1838, grad_fn=<MseLossBackward0>)
train loss: tensor(8778.2510, grad_fn=<MseLossBackward0>)
train loss: tensor(3384.1252, grad_fn=<MseLossBackward0>)
train loss: tensor(9281.7578, grad_fn=<MseLossBackward0>)
train loss: tensor(4826.0752, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(11485.8730, grad_fn=<MseLossBackward0>)
train loss: tensor(8731.0967, grad_fn=<MseLossBackward0>)
train loss: tensor(5176.7417, grad_fn=<MseLossBackward0>)
train loss: tensor(10987.7490, grad_fn=<MseLossBackward0>)
train loss: tensor(3507.1462, grad_fn=<MseLossBackward0>)
train loss: tensor(10748.5957, grad_fn=<MseLossBackward0>)
train loss: tensor(2229.2861, grad_fn=<MseLossBackward0>)
train loss: tensor(6249.9756, grad_fn=<MseLossBackward0>)
train loss: tensor(4775.3481, grad_fn=<MseLossBackward0>)
train loss: tensor(15759.9238, grad_fn=<MseLossBackward0>)
train loss: tensor(8552.8398, grad_fn=<MseLossBackward0>)
train loss: tensor(4523.0908, grad_fn=<MseLossBackward0>)
train loss: tensor(3595.4119, grad_fn=<MseLossBackward0>)
train loss: tensor(4124.6172, grad_fn=<MseLossBackward0>)
train loss: tensor(6909.2139, grad_fn=<MseLossBackward0>)
train loss: tensor(3640.1682, grad_fn=<MseLossBackward0>)
train loss: tensor(2113.4104, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(14131.2617, grad_fn=<MseLossBackward0>)
train loss: tensor(12170.7656, grad_fn=<MseLossBackward0>)
train loss: tensor(8708.9756, grad_fn=<MseLossBackward0>)
train loss: tensor(13023.2305, grad_fn=<MseLossBackward0>)
train loss: tensor(8663.3691, grad_fn=<MseLossBackward0>)
train loss: tensor(36824.7656, grad_fn=<MseLossBackward0>)
train loss: tensor(6148.5566, grad_fn=<MseLossBackward0>)
train loss: tensor(7002.5132, grad_fn=<MseLossBackward0>)
train loss: tensor(3919.7036, grad_fn=<MseLossBackward0>)
train loss: tensor(3227.0681, grad_fn=<MseLossBackward0>)
train loss: tensor(8423.6035, grad_fn=<MseLossBackward0>)
train loss: tensor(6762.7695, grad_fn=<MseLossBackward0>)
train loss: tensor(5625.8838, grad_fn=<MseLossBackward0>)
train loss: tensor(8907.6367, grad_fn=<MseLossBackward0>)
train loss: tensor(13217.8320, grad_fn=<MseLossBackward0>)
train loss: tensor(10589.3564, grad_fn=<MseLossBackward0>)
train loss: tensor(5473.9355, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(5766.1260, grad_fn=<MseLossBackward0>)
train loss: tensor(3577.3457, grad_fn=<MseLossBackward0>)
train loss: tensor(8379.7666, grad_fn=<MseLossBackward0>)
train loss: tensor(13093.7031, grad_fn=<MseLossBackward0>)
train loss: tensor(5914.3408, grad_fn=<MseLossBackward0>)
train loss: tensor(3874.9036, grad_fn=<MseLossBackward0>)
train loss: tensor(12156.7256, grad_fn=<MseLossBackward0>)
train loss: tensor(3253.5286, grad_fn=<MseLossBackward0>)
train loss: tensor(10464.9082, grad_fn=<MseLossBackward0>)
train loss: tensor(2884.2991, grad_fn=<MseLossBackward0>)
train loss: tensor(5341.2935, grad_fn=<MseLossBackward0>)
train loss: tensor(6003.3594, grad_fn=<MseLossBackward0>)
train loss: tensor(11202.5664, grad_fn=<MseLossBackward0>)
train loss: tensor(13846.2598, grad_fn=<MseLossBackward0>)
train loss: tensor(4016.9551, grad_fn=<MseLossBackward0>)
train loss: tensor(5934.2061, grad_fn=<MseLossBackward0>)
train loss: tensor(2326.8367, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(9538.4434, grad_fn=<MseLossBackward0>)
train loss: tensor(8321.1064, grad_fn=<MseLossBackward0>)
train loss: tensor(9864.7676, grad_fn=<MseLossBackward0>)
train loss: tensor(2446.0947, grad_fn=<MseLossBackward0>)
train loss: tensor(9095.8652, grad_fn=<MseLossBackward0>)
train loss: tensor(10751.8691, grad_fn=<MseLossBackward0>)
train loss: tensor(2642.6755, grad_fn=<MseLossBackward0>)
train loss: tensor(8634.9336, grad_fn=<MseLossBackward0>)
train loss: tensor(7450.1489, grad_fn=<MseLossBackward0>)
train loss: tensor(19806.8887, grad_fn=<MseLossBackward0>)
train loss: tensor(5131.1494, grad_fn=<MseLossBackward0>)
train loss: tensor(9767.5811, grad_fn=<MseLossBackward0>)
train loss: tensor(3701.7236, grad_fn=<MseLossBackward0>)
train loss: tensor(12550.0596, grad_fn=<MseLossBackward0>)
train loss: tensor(7559.9248, grad_fn=<MseLossBackward0>)
train loss: tensor(10166.8682, grad_fn=<MseLossBackward0>)
train loss: tensor(11663.2725, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(6851.4224, grad_fn=<MseLossBackward0>)
train loss: tensor(9466.8984, grad_fn=<MseLossBackward0>)
train loss: tensor(8320.9004, grad_fn=<MseLossBackward0>)
train loss: tensor(2959.3647, grad_fn=<MseLossBackward0>)
train loss: tensor(3428.1809, grad_fn=<MseLossBackward0>)
train loss: tensor(9856.9902, grad_fn=<MseLossBackward0>)
train loss: tensor(3559.9695, grad_fn=<MseLossBackward0>)
train loss: tensor(4737.0752, grad_fn=<MseLossBackward0>)
train loss: tensor(12296.9805, grad_fn=<MseLossBackward0>)
train loss: tensor(4715.7651, grad_fn=<MseLossBackward0>)
train loss: tensor(6663.0547, grad_fn=<MseLossBackward0>)
train loss: tensor(12581.0303, grad_fn=<MseLossBackward0>)
train loss: tensor(67383.3125, grad_fn=<MseLossBackward0>)
train loss: tensor(64333.3984, grad_fn=<MseLossBackward0>)
train loss: tensor(12151.4258, grad_fn=<MseLossBackward0>)
train loss: tensor(1201.4581, grad_fn=<MseLossBackward0>)
train loss: tensor(4806.6509, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(3630.9221, grad_fn=<MseLossBackward0>)
train loss: tensor(8398.3564, grad_fn=<MseLossBackward0>)
train loss: tensor(3925.0554, grad_fn=<MseLossBackward0>)
train loss: tensor(6954.5024, grad_fn=<MseLossBackward0>)
train loss: tensor(9431.4863, grad_fn=<MseLossBackward0>)
train loss: tensor(8681.1631, grad_fn=<MseLossBackward0>)
train loss: tensor(4587.7935, grad_fn=<MseLossBackward0>)
train loss: tensor(10978.1904, grad_fn=<MseLossBackward0>)
train loss: tensor(42690.4375, grad_fn=<MseLossBackward0>)
train loss: tensor(12551.1348, grad_fn=<MseLossBackward0>)
train loss: tensor(16256.3701, grad_fn=<MseLossBackward0>)
train loss: tensor(25477.6973, grad_fn=<MseLossBackward0>)
train loss: tensor(5056.3955, grad_fn=<MseLossBackward0>)
train loss: tensor(6440.0903, grad_fn=<MseLossBackward0>)
train loss: tensor(8173.9272, grad_fn=<MseLossBackward0>)
train loss: tensor(5545.1465, grad_fn=<MseLossBackward0>)
train loss: tensor(4516.4814, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(6722.7837, grad_fn=<MseLossBackward0>)
train loss: tensor(8657.7803, grad_fn=<MseLossBackward0>)
train loss: tensor(4283.6118, grad_fn=<MseLossBackward0>)
train loss: tensor(3438.8474, grad_fn=<MseLossBackward0>)
train loss: tensor(8210.9316, grad_fn=<MseLossBackward0>)
train loss: tensor(6337.0649, grad_fn=<MseLossBackward0>)
train loss: tensor(7890.8091, grad_fn=<MseLossBackward0>)
train loss: tensor(8003.4683, grad_fn=<MseLossBackward0>)
train loss: tensor(3508.5549, grad_fn=<MseLossBackward0>)
train loss: tensor(9181.4160, grad_fn=<MseLossBackward0>)
train loss: tensor(3456.0569, grad_fn=<MseLossBackward0>)
train loss: tensor(3210.4026, grad_fn=<MseLossBackward0>)
train loss: tensor(8506.5049, grad_fn=<MseLossBackward0>)
train loss: tensor(11156.4170, grad_fn=<MseLossBackward0>)
train loss: tensor(5982.6138, grad_fn=<MseLossBackward0>)
train loss: tensor(5013.8994, grad_fn=<MseLossBackward0>)
train loss: tensor(6074.8779, grad_fn=<MseLossBackward0>)
train loss: t

train loss: tensor(2307.2178, grad_fn=<MseLossBackward0>)
train loss: tensor(12227.0576, grad_fn=<MseLossBackward0>)
train loss: tensor(14185.6006, grad_fn=<MseLossBackward0>)
train loss: tensor(4838.1372, grad_fn=<MseLossBackward0>)
train loss: tensor(15131.7549, grad_fn=<MseLossBackward0>)
train loss: tensor(6110.8696, grad_fn=<MseLossBackward0>)
train loss: tensor(44585.1992, grad_fn=<MseLossBackward0>)
train loss: tensor(11329.6807, grad_fn=<MseLossBackward0>)
train loss: tensor(9698.3311, grad_fn=<MseLossBackward0>)
train loss: tensor(6716.4663, grad_fn=<MseLossBackward0>)
train loss: tensor(8393.3486, grad_fn=<MseLossBackward0>)
train loss: tensor(7059.7666, grad_fn=<MseLossBackward0>)
train loss: tensor(14175.5879, grad_fn=<MseLossBackward0>)
train loss: tensor(3028.6682, grad_fn=<MseLossBackward0>)
train loss: tensor(2783.1514, grad_fn=<MseLossBackward0>)
train loss: tensor(6618.7485, grad_fn=<MseLossBackward0>)
train loss: tensor(12184.4111, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(6368.4985, grad_fn=<MseLossBackward0>)
train loss: tensor(7662.1113, grad_fn=<MseLossBackward0>)
train loss: tensor(5473.3564, grad_fn=<MseLossBackward0>)
train loss: tensor(4750.6001, grad_fn=<MseLossBackward0>)
train loss: tensor(5617.4209, grad_fn=<MseLossBackward0>)
train loss: tensor(9814.7295, grad_fn=<MseLossBackward0>)
train loss: tensor(11138.2998, grad_fn=<MseLossBackward0>)
train loss: tensor(7573.8989, grad_fn=<MseLossBackward0>)
train loss: tensor(30723.8926, grad_fn=<MseLossBackward0>)
train loss: tensor(2438.5408, grad_fn=<MseLossBackward0>)
train loss: tensor(6268.3662, grad_fn=<MseLossBackward0>)
train loss: tensor(8815.6318, grad_fn=<MseLossBackward0>)
train loss: tensor(4982.0737, grad_fn=<MseLossBackward0>)
train loss: tensor(2420.5142, grad_fn=<MseLossBackward0>)
train loss: tensor(5323.7021, grad_fn=<MseLossBackward0>)
train loss: tensor(14488.7783, grad_fn=<MseLossBackward0>)
train loss: tensor(3864.4067, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(1713.1707, grad_fn=<MseLossBackward0>)
train loss: tensor(3980.2764, grad_fn=<MseLossBackward0>)
train loss: tensor(10176.0674, grad_fn=<MseLossBackward0>)
train loss: tensor(9030.9463, grad_fn=<MseLossBackward0>)
train loss: tensor(4007.2729, grad_fn=<MseLossBackward0>)
train loss: tensor(8793.5850, grad_fn=<MseLossBackward0>)
train loss: tensor(4094.9434, grad_fn=<MseLossBackward0>)
train loss: tensor(9489.9814, grad_fn=<MseLossBackward0>)
train loss: tensor(13119.9717, grad_fn=<MseLossBackward0>)
train loss: tensor(7949.1118, grad_fn=<MseLossBackward0>)
train loss: tensor(4211.6138, grad_fn=<MseLossBackward0>)
train loss: tensor(10202.2969, grad_fn=<MseLossBackward0>)
train loss: tensor(10749.9551, grad_fn=<MseLossBackward0>)
train loss: tensor(9829.2715, grad_fn=<MseLossBackward0>)
train loss: tensor(7911.3110, grad_fn=<MseLossBackward0>)
train loss: tensor(7880.4565, grad_fn=<MseLossBackward0>)
train loss: tensor(4499.5835, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(8376.5127, grad_fn=<MseLossBackward0>)
train loss: tensor(6817.6572, grad_fn=<MseLossBackward0>)
train loss: tensor(11351.1299, grad_fn=<MseLossBackward0>)
train loss: tensor(2501.8699, grad_fn=<MseLossBackward0>)
train loss: tensor(11506.8164, grad_fn=<MseLossBackward0>)
train loss: tensor(8264.6475, grad_fn=<MseLossBackward0>)
train loss: tensor(5265.4888, grad_fn=<MseLossBackward0>)
train loss: tensor(12130.0342, grad_fn=<MseLossBackward0>)
train loss: tensor(12016.9951, grad_fn=<MseLossBackward0>)
train loss: tensor(6334.0376, grad_fn=<MseLossBackward0>)
train loss: tensor(5591.0195, grad_fn=<MseLossBackward0>)
train loss: tensor(6291.0962, grad_fn=<MseLossBackward0>)
train loss: tensor(8437.1689, grad_fn=<MseLossBackward0>)
train loss: tensor(4915.8188, grad_fn=<MseLossBackward0>)
train loss: tensor(4340.0601, grad_fn=<MseLossBackward0>)
train loss: tensor(14998.1035, grad_fn=<MseLossBackward0>)
train loss: tensor(7667.4033, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(2075.5537, grad_fn=<MseLossBackward0>)
train loss: tensor(12158.3359, grad_fn=<MseLossBackward0>)
train loss: tensor(7292.0762, grad_fn=<MseLossBackward0>)
train loss: tensor(6891.0532, grad_fn=<MseLossBackward0>)
train loss: tensor(12510.1426, grad_fn=<MseLossBackward0>)
train loss: tensor(17533.2422, grad_fn=<MseLossBackward0>)
train loss: tensor(5243.4312, grad_fn=<MseLossBackward0>)
train loss: tensor(12986.3438, grad_fn=<MseLossBackward0>)
train loss: tensor(6949.3579, grad_fn=<MseLossBackward0>)
train loss: tensor(5263.2427, grad_fn=<MseLossBackward0>)
train loss: tensor(4032.5596, grad_fn=<MseLossBackward0>)
train loss: tensor(3335.2278, grad_fn=<MseLossBackward0>)
train loss: tensor(4418.4629, grad_fn=<MseLossBackward0>)
train loss: tensor(8237.3516, grad_fn=<MseLossBackward0>)
train loss: tensor(7418.3926, grad_fn=<MseLossBackward0>)
train loss: tensor(1979.2968, grad_fn=<MseLossBackward0>)
train loss: tensor(3778.9541, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(13337.6318, grad_fn=<MseLossBackward0>)
train loss: tensor(5180.0713, grad_fn=<MseLossBackward0>)
train loss: tensor(7955.8179, grad_fn=<MseLossBackward0>)
train loss: tensor(6476.1509, grad_fn=<MseLossBackward0>)
train loss: tensor(2549.6814, grad_fn=<MseLossBackward0>)
train loss: tensor(5932.2080, grad_fn=<MseLossBackward0>)
train loss: tensor(34146.2070, grad_fn=<MseLossBackward0>)
train loss: tensor(4907.7676, grad_fn=<MseLossBackward0>)
train loss: tensor(4908.9160, grad_fn=<MseLossBackward0>)
train loss: tensor(4935.5430, grad_fn=<MseLossBackward0>)
train loss: tensor(4376.7295, grad_fn=<MseLossBackward0>)
train loss: tensor(6476.8745, grad_fn=<MseLossBackward0>)
train loss: tensor(4309.6553, grad_fn=<MseLossBackward0>)
train loss: tensor(6934.5337, grad_fn=<MseLossBackward0>)
train loss: tensor(9902.5879, grad_fn=<MseLossBackward0>)
train loss: tensor(15288.7803, grad_fn=<MseLossBackward0>)
train loss: tensor(8472.5078, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(7691.8818, grad_fn=<MseLossBackward0>)
train loss: tensor(3615.6101, grad_fn=<MseLossBackward0>)
train loss: tensor(5100.1899, grad_fn=<MseLossBackward0>)
train loss: tensor(14609.0928, grad_fn=<MseLossBackward0>)
train loss: tensor(6041.5371, grad_fn=<MseLossBackward0>)
train loss: tensor(7082.5928, grad_fn=<MseLossBackward0>)
train loss: tensor(9615.7822, grad_fn=<MseLossBackward0>)
train loss: tensor(7215.5498, grad_fn=<MseLossBackward0>)
train loss: tensor(2140.8264, grad_fn=<MseLossBackward0>)
train loss: tensor(4413.0327, grad_fn=<MseLossBackward0>)
train loss: tensor(4308.5615, grad_fn=<MseLossBackward0>)
train loss: tensor(9806.2217, grad_fn=<MseLossBackward0>)
train loss: tensor(7166.3882, grad_fn=<MseLossBackward0>)
train loss: tensor(3351.7532, grad_fn=<MseLossBackward0>)
train loss: tensor(10342.2510, grad_fn=<MseLossBackward0>)
train loss: tensor(7300.8218, grad_fn=<MseLossBackward0>)
train loss: tensor(14961.9531, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(6967.0005, grad_fn=<MseLossBackward0>)
train loss: tensor(9335.9238, grad_fn=<MseLossBackward0>)
train loss: tensor(3877.5840, grad_fn=<MseLossBackward0>)
train loss: tensor(4561.8413, grad_fn=<MseLossBackward0>)
train loss: tensor(7448.0273, grad_fn=<MseLossBackward0>)
train loss: tensor(4986.9160, grad_fn=<MseLossBackward0>)
train loss: tensor(19172.0020, grad_fn=<MseLossBackward0>)
train loss: tensor(12759.5244, grad_fn=<MseLossBackward0>)
train loss: tensor(9536.9775, grad_fn=<MseLossBackward0>)
train loss: tensor(6305.5791, grad_fn=<MseLossBackward0>)
train loss: tensor(12374.7148, grad_fn=<MseLossBackward0>)
train loss: tensor(11827.1387, grad_fn=<MseLossBackward0>)
train loss: tensor(9796.8457, grad_fn=<MseLossBackward0>)
train loss: tensor(8561.2979, grad_fn=<MseLossBackward0>)
train loss: tensor(7102.6118, grad_fn=<MseLossBackward0>)
train loss: tensor(5745.8516, grad_fn=<MseLossBackward0>)
train loss: tensor(6882.9600, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(1583.8262, grad_fn=<MseLossBackward0>)
train loss: tensor(11698.9043, grad_fn=<MseLossBackward0>)
train loss: tensor(7761.1851, grad_fn=<MseLossBackward0>)
train loss: tensor(4088.5349, grad_fn=<MseLossBackward0>)
train loss: tensor(8690.2656, grad_fn=<MseLossBackward0>)
train loss: tensor(6237.1118, grad_fn=<MseLossBackward0>)
train loss: tensor(7196.9746, grad_fn=<MseLossBackward0>)
train loss: tensor(6572.9385, grad_fn=<MseLossBackward0>)
train loss: tensor(6486.7544, grad_fn=<MseLossBackward0>)
train loss: tensor(5327.8911, grad_fn=<MseLossBackward0>)
train loss: tensor(9243.0352, grad_fn=<MseLossBackward0>)
train loss: tensor(4922.8281, grad_fn=<MseLossBackward0>)
train loss: tensor(3922.6025, grad_fn=<MseLossBackward0>)
train loss: tensor(5785.3818, grad_fn=<MseLossBackward0>)
train loss: tensor(12133.1016, grad_fn=<MseLossBackward0>)
train loss: tensor(8056.4521, grad_fn=<MseLossBackward0>)
train loss: tensor(5069.6992, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(5146.8750, grad_fn=<MseLossBackward0>)
train loss: tensor(2895.0449, grad_fn=<MseLossBackward0>)
train loss: tensor(4324.8809, grad_fn=<MseLossBackward0>)
train loss: tensor(8276.8037, grad_fn=<MseLossBackward0>)
train loss: tensor(5653.2812, grad_fn=<MseLossBackward0>)
train loss: tensor(4704.1235, grad_fn=<MseLossBackward0>)
train loss: tensor(7876.8062, grad_fn=<MseLossBackward0>)
train loss: tensor(3756.9255, grad_fn=<MseLossBackward0>)
train loss: tensor(4442.8809, grad_fn=<MseLossBackward0>)
train loss: tensor(7882.1309, grad_fn=<MseLossBackward0>)
train loss: tensor(15066.4668, grad_fn=<MseLossBackward0>)
train loss: tensor(17211.0898, grad_fn=<MseLossBackward0>)
train loss: tensor(7723.4897, grad_fn=<MseLossBackward0>)
train loss: tensor(6354.3384, grad_fn=<MseLossBackward0>)
train loss: tensor(7469.4053, grad_fn=<MseLossBackward0>)
train loss: tensor(7468.5249, grad_fn=<MseLossBackward0>)
train loss: tensor(6890.7549, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(2164.7422, grad_fn=<MseLossBackward0>)
train loss: tensor(3374.6443, grad_fn=<MseLossBackward0>)
train loss: tensor(8594.2695, grad_fn=<MseLossBackward0>)
train loss: tensor(4777.3052, grad_fn=<MseLossBackward0>)
train loss: tensor(8618.0137, grad_fn=<MseLossBackward0>)
train loss: tensor(2018.1382, grad_fn=<MseLossBackward0>)
train loss: tensor(6579.8945, grad_fn=<MseLossBackward0>)
train loss: tensor(4455.3955, grad_fn=<MseLossBackward0>)
train loss: tensor(3844.6714, grad_fn=<MseLossBackward0>)
train loss: tensor(4916.8608, grad_fn=<MseLossBackward0>)
train loss: tensor(21244.0488, grad_fn=<MseLossBackward0>)
train loss: tensor(14952.8516, grad_fn=<MseLossBackward0>)
train loss: tensor(10029.4990, grad_fn=<MseLossBackward0>)
train loss: tensor(6816.5781, grad_fn=<MseLossBackward0>)
train loss: tensor(2345.2886, grad_fn=<MseLossBackward0>)
train loss: tensor(7714.7603, grad_fn=<MseLossBackward0>)
train loss: tensor(5544.5732, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(4174.8193, grad_fn=<MseLossBackward0>)
train loss: tensor(4495.0342, grad_fn=<MseLossBackward0>)
train loss: tensor(5742.4517, grad_fn=<MseLossBackward0>)
train loss: tensor(29643.8379, grad_fn=<MseLossBackward0>)
train loss: tensor(6104.1099, grad_fn=<MseLossBackward0>)
train loss: tensor(5463.2651, grad_fn=<MseLossBackward0>)
train loss: tensor(5251.7720, grad_fn=<MseLossBackward0>)
train loss: tensor(11874.3711, grad_fn=<MseLossBackward0>)
train loss: tensor(8399.6670, grad_fn=<MseLossBackward0>)
train loss: tensor(4001.7102, grad_fn=<MseLossBackward0>)
train loss: tensor(8759.3652, grad_fn=<MseLossBackward0>)
train loss: tensor(5751.7598, grad_fn=<MseLossBackward0>)
train loss: tensor(6804.9365, grad_fn=<MseLossBackward0>)
train loss: tensor(6396.1821, grad_fn=<MseLossBackward0>)
train loss: tensor(8573.5615, grad_fn=<MseLossBackward0>)
train loss: tensor(1576.5873, grad_fn=<MseLossBackward0>)
train loss: tensor(3430.5713, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(4740.4976, grad_fn=<MseLossBackward0>)
train loss: tensor(3811.5662, grad_fn=<MseLossBackward0>)
train loss: tensor(9689.3652, grad_fn=<MseLossBackward0>)
train loss: tensor(2257.6506, grad_fn=<MseLossBackward0>)
train loss: tensor(10859.0488, grad_fn=<MseLossBackward0>)
train loss: tensor(5201.6992, grad_fn=<MseLossBackward0>)
train loss: tensor(4851.6875, grad_fn=<MseLossBackward0>)
train loss: tensor(8154.6172, grad_fn=<MseLossBackward0>)
train loss: tensor(5183.6880, grad_fn=<MseLossBackward0>)
train loss: tensor(5011.7671, grad_fn=<MseLossBackward0>)
train loss: tensor(3705.0701, grad_fn=<MseLossBackward0>)
train loss: tensor(6094.4414, grad_fn=<MseLossBackward0>)
train loss: tensor(15492.2783, grad_fn=<MseLossBackward0>)
train loss: tensor(6656.3052, grad_fn=<MseLossBackward0>)
train loss: tensor(8454.6797, grad_fn=<MseLossBackward0>)
train loss: tensor(9699.6562, grad_fn=<MseLossBackward0>)
train loss: tensor(56161.7188, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(11773.5645, grad_fn=<MseLossBackward0>)
train loss: tensor(5070.4648, grad_fn=<MseLossBackward0>)
train loss: tensor(10380.6289, grad_fn=<MseLossBackward0>)
train loss: tensor(3822.5916, grad_fn=<MseLossBackward0>)
train loss: tensor(7648.2119, grad_fn=<MseLossBackward0>)
train loss: tensor(2244.4185, grad_fn=<MseLossBackward0>)
train loss: tensor(5658.0591, grad_fn=<MseLossBackward0>)
train loss: tensor(12757.7021, grad_fn=<MseLossBackward0>)
train loss: tensor(9191.8213, grad_fn=<MseLossBackward0>)
train loss: tensor(2051.4702, grad_fn=<MseLossBackward0>)
train loss: tensor(8772.2988, grad_fn=<MseLossBackward0>)
train loss: tensor(6453.7100, grad_fn=<MseLossBackward0>)
train loss: tensor(9730.8164, grad_fn=<MseLossBackward0>)
train loss: tensor(8224.0381, grad_fn=<MseLossBackward0>)
train loss: tensor(6377.2158, grad_fn=<MseLossBackward0>)
train loss: tensor(6656.1934, grad_fn=<MseLossBackward0>)
train loss: tensor(8482.9629, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(14224.7393, grad_fn=<MseLossBackward0>)
train loss: tensor(8761.3223, grad_fn=<MseLossBackward0>)
train loss: tensor(10497.8936, grad_fn=<MseLossBackward0>)
train loss: tensor(1999.8680, grad_fn=<MseLossBackward0>)
train loss: tensor(5307.4292, grad_fn=<MseLossBackward0>)
train loss: tensor(4362.1411, grad_fn=<MseLossBackward0>)
train loss: tensor(10654.1729, grad_fn=<MseLossBackward0>)
train loss: tensor(6794.4448, grad_fn=<MseLossBackward0>)
train loss: tensor(4341.3477, grad_fn=<MseLossBackward0>)
train loss: tensor(6010.4731, grad_fn=<MseLossBackward0>)
train loss: tensor(4382.5615, grad_fn=<MseLossBackward0>)
train loss: tensor(3554.6758, grad_fn=<MseLossBackward0>)
train loss: tensor(4326.9805, grad_fn=<MseLossBackward0>)
train loss: tensor(5132.2676, grad_fn=<MseLossBackward0>)
train loss: tensor(4578.7612, grad_fn=<MseLossBackward0>)
train loss: tensor(42859.4180, grad_fn=<MseLossBackward0>)
train loss: tensor(3268.2217, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(8863.6963, grad_fn=<MseLossBackward0>)
train loss: tensor(2708.3386, grad_fn=<MseLossBackward0>)
train loss: tensor(10237.3975, grad_fn=<MseLossBackward0>)
train loss: tensor(70891.1094, grad_fn=<MseLossBackward0>)
train loss: tensor(8795.8955, grad_fn=<MseLossBackward0>)
train loss: tensor(8612.4795, grad_fn=<MseLossBackward0>)
train loss: tensor(3282.4026, grad_fn=<MseLossBackward0>)
train loss: tensor(7967.9165, grad_fn=<MseLossBackward0>)
train loss: tensor(8188.5205, grad_fn=<MseLossBackward0>)
train loss: tensor(6235.9419, grad_fn=<MseLossBackward0>)
train loss: tensor(4282.8218, grad_fn=<MseLossBackward0>)
train loss: tensor(3714.6814, grad_fn=<MseLossBackward0>)
train loss: tensor(14327.7988, grad_fn=<MseLossBackward0>)
train loss: tensor(6589.6406, grad_fn=<MseLossBackward0>)
train loss: tensor(2673.8455, grad_fn=<MseLossBackward0>)
train loss: tensor(13004.1768, grad_fn=<MseLossBackward0>)
train loss: tensor(5729.5298, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(8468.7998, grad_fn=<MseLossBackward0>)
train loss: tensor(6017.4629, grad_fn=<MseLossBackward0>)
train loss: tensor(8873.1934, grad_fn=<MseLossBackward0>)
train loss: tensor(6772.2710, grad_fn=<MseLossBackward0>)
train loss: tensor(4992.4858, grad_fn=<MseLossBackward0>)
train loss: tensor(7117.8535, grad_fn=<MseLossBackward0>)
train loss: tensor(5284.9219, grad_fn=<MseLossBackward0>)
train loss: tensor(4383.8228, grad_fn=<MseLossBackward0>)
train loss: tensor(9468.8506, grad_fn=<MseLossBackward0>)
train loss: tensor(9946.7764, grad_fn=<MseLossBackward0>)
train loss: tensor(6080.1475, grad_fn=<MseLossBackward0>)
train loss: tensor(5742.6211, grad_fn=<MseLossBackward0>)
train loss: tensor(10825.8682, grad_fn=<MseLossBackward0>)
train loss: tensor(14768.4648, grad_fn=<MseLossBackward0>)
train loss: tensor(5994.6069, grad_fn=<MseLossBackward0>)
train loss: tensor(7162.9458, grad_fn=<MseLossBackward0>)
train loss: tensor(6336.4854, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(12521.9160, grad_fn=<MseLossBackward0>)
train loss: tensor(2938.9966, grad_fn=<MseLossBackward0>)
train loss: tensor(7409.0356, grad_fn=<MseLossBackward0>)
train loss: tensor(75024.8516, grad_fn=<MseLossBackward0>)
train loss: tensor(4436.5630, grad_fn=<MseLossBackward0>)
train loss: tensor(5060.5215, grad_fn=<MseLossBackward0>)
train loss: tensor(4753.6245, grad_fn=<MseLossBackward0>)
train loss: tensor(1274.7841, grad_fn=<MseLossBackward0>)
train loss: tensor(13735.6680, grad_fn=<MseLossBackward0>)
train loss: tensor(10793.1299, grad_fn=<MseLossBackward0>)
train loss: tensor(4828.1147, grad_fn=<MseLossBackward0>)
train loss: tensor(1845.4807, grad_fn=<MseLossBackward0>)
train loss: tensor(4539.7705, grad_fn=<MseLossBackward0>)
train loss: tensor(11426.8037, grad_fn=<MseLossBackward0>)
train loss: tensor(8928.6572, grad_fn=<MseLossBackward0>)
train loss: tensor(3539.9746, grad_fn=<MseLossBackward0>)
train loss: tensor(7599.9238, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(2177.8247, grad_fn=<MseLossBackward0>)
train loss: tensor(5701.3555, grad_fn=<MseLossBackward0>)
train loss: tensor(4316.1074, grad_fn=<MseLossBackward0>)
train loss: tensor(9721.0430, grad_fn=<MseLossBackward0>)
train loss: tensor(9336.8877, grad_fn=<MseLossBackward0>)
train loss: tensor(2768.4893, grad_fn=<MseLossBackward0>)
train loss: tensor(3210.9153, grad_fn=<MseLossBackward0>)
train loss: tensor(10912.0811, grad_fn=<MseLossBackward0>)
train loss: tensor(4035.1406, grad_fn=<MseLossBackward0>)
train loss: tensor(10023.6807, grad_fn=<MseLossBackward0>)
train loss: tensor(17057.0527, grad_fn=<MseLossBackward0>)
train loss: tensor(10829.4512, grad_fn=<MseLossBackward0>)
train loss: tensor(3248.4873, grad_fn=<MseLossBackward0>)
train loss: tensor(8694.1963, grad_fn=<MseLossBackward0>)
train loss: tensor(6279.6938, grad_fn=<MseLossBackward0>)
train loss: tensor(4043.0547, grad_fn=<MseLossBackward0>)
train loss: tensor(4581.0054, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(6083.4741, grad_fn=<MseLossBackward0>)
train loss: tensor(6190.0679, grad_fn=<MseLossBackward0>)
train loss: tensor(2550.2024, grad_fn=<MseLossBackward0>)
train loss: tensor(3216.5676, grad_fn=<MseLossBackward0>)
train loss: tensor(5149.6543, grad_fn=<MseLossBackward0>)
train loss: tensor(7977.3726, grad_fn=<MseLossBackward0>)
train loss: tensor(2135.8015, grad_fn=<MseLossBackward0>)
train loss: tensor(7938.1846, grad_fn=<MseLossBackward0>)
train loss: tensor(9537.9844, grad_fn=<MseLossBackward0>)
train loss: tensor(7282.9985, grad_fn=<MseLossBackward0>)
train loss: tensor(2870.0232, grad_fn=<MseLossBackward0>)
train loss: tensor(8105.6831, grad_fn=<MseLossBackward0>)
train loss: tensor(8098.1406, grad_fn=<MseLossBackward0>)
train loss: tensor(6124.8242, grad_fn=<MseLossBackward0>)
train loss: tensor(33523.2227, grad_fn=<MseLossBackward0>)
train loss: tensor(15665.9971, grad_fn=<MseLossBackward0>)
train loss: tensor(5381.3052, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(5759.0879, grad_fn=<MseLossBackward0>)
train loss: tensor(9215.5723, grad_fn=<MseLossBackward0>)
train loss: tensor(17735.1348, grad_fn=<MseLossBackward0>)
train loss: tensor(4047.5591, grad_fn=<MseLossBackward0>)
train loss: tensor(3945.7700, grad_fn=<MseLossBackward0>)
train loss: tensor(8370.5977, grad_fn=<MseLossBackward0>)
train loss: tensor(10546.8506, grad_fn=<MseLossBackward0>)
train loss: tensor(6542.3838, grad_fn=<MseLossBackward0>)
train loss: tensor(13480.4883, grad_fn=<MseLossBackward0>)
train loss: tensor(8394.2090, grad_fn=<MseLossBackward0>)
train loss: tensor(9230.2871, grad_fn=<MseLossBackward0>)
train loss: tensor(10917.5088, grad_fn=<MseLossBackward0>)
train loss: tensor(8185.3779, grad_fn=<MseLossBackward0>)
train loss: tensor(3384.0098, grad_fn=<MseLossBackward0>)
train loss: tensor(6241.0400, grad_fn=<MseLossBackward0>)
train loss: tensor(4676.5952, grad_fn=<MseLossBackward0>)
train loss: tensor(14216.8438, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(8172.4097, grad_fn=<MseLossBackward0>)
train loss: tensor(2591.3052, grad_fn=<MseLossBackward0>)
train loss: tensor(9963.4629, grad_fn=<MseLossBackward0>)
train loss: tensor(6064.9243, grad_fn=<MseLossBackward0>)
train loss: tensor(2916.0942, grad_fn=<MseLossBackward0>)
train loss: tensor(14655.8984, grad_fn=<MseLossBackward0>)
train loss: tensor(2769.0398, grad_fn=<MseLossBackward0>)
train loss: tensor(9346.2285, grad_fn=<MseLossBackward0>)
train loss: tensor(6316.9258, grad_fn=<MseLossBackward0>)
train loss: tensor(5999.4497, grad_fn=<MseLossBackward0>)
train loss: tensor(2064.2097, grad_fn=<MseLossBackward0>)
train loss: tensor(4963.7676, grad_fn=<MseLossBackward0>)
train loss: tensor(1880.6066, grad_fn=<MseLossBackward0>)
train loss: tensor(8649.8242, grad_fn=<MseLossBackward0>)
train loss: tensor(7082.7285, grad_fn=<MseLossBackward0>)
train loss: tensor(1719.7075, grad_fn=<MseLossBackward0>)
train loss: tensor(4399.7686, grad_fn=<MseLossBackward0>)
train loss: t

train loss: tensor(4071.1841, grad_fn=<MseLossBackward0>)
train loss: tensor(14251.8223, grad_fn=<MseLossBackward0>)
train loss: tensor(4945.5815, grad_fn=<MseLossBackward0>)
train loss: tensor(8309.2227, grad_fn=<MseLossBackward0>)
train loss: tensor(7637.1772, grad_fn=<MseLossBackward0>)
train loss: tensor(4678.4292, grad_fn=<MseLossBackward0>)
train loss: tensor(3218.5471, grad_fn=<MseLossBackward0>)
train loss: tensor(10765.3828, grad_fn=<MseLossBackward0>)
train loss: tensor(5454.6099, grad_fn=<MseLossBackward0>)
train loss: tensor(9733.6689, grad_fn=<MseLossBackward0>)
train loss: tensor(10395.8818, grad_fn=<MseLossBackward0>)
train loss: tensor(12406.3438, grad_fn=<MseLossBackward0>)
train loss: tensor(10289.2549, grad_fn=<MseLossBackward0>)
train loss: tensor(10731.9189, grad_fn=<MseLossBackward0>)
train loss: tensor(14231.3691, grad_fn=<MseLossBackward0>)
train loss: tensor(55837.6758, grad_fn=<MseLossBackward0>)
train loss: tensor(12462.9551, grad_fn=<MseLossBackward0>)
train

train loss: tensor(6112.1919, grad_fn=<MseLossBackward0>)
train loss: tensor(4401.2861, grad_fn=<MseLossBackward0>)
train loss: tensor(8142.4224, grad_fn=<MseLossBackward0>)
train loss: tensor(5235.2163, grad_fn=<MseLossBackward0>)
train loss: tensor(2694.4463, grad_fn=<MseLossBackward0>)
train loss: tensor(5454.5278, grad_fn=<MseLossBackward0>)
train loss: tensor(5043.2319, grad_fn=<MseLossBackward0>)
train loss: tensor(13725.3545, grad_fn=<MseLossBackward0>)
train loss: tensor(4708.2930, grad_fn=<MseLossBackward0>)
train loss: tensor(16025.3037, grad_fn=<MseLossBackward0>)
train loss: tensor(3327.5388, grad_fn=<MseLossBackward0>)
train loss: tensor(10388.9424, grad_fn=<MseLossBackward0>)
train loss: tensor(11454.3105, grad_fn=<MseLossBackward0>)
train loss: tensor(9179.6484, grad_fn=<MseLossBackward0>)
train loss: tensor(6459.0991, grad_fn=<MseLossBackward0>)
train loss: tensor(13044.4326, grad_fn=<MseLossBackward0>)
train loss: tensor(7289.9326, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(3962.7239, grad_fn=<MseLossBackward0>)
train loss: tensor(11301.6055, grad_fn=<MseLossBackward0>)
train loss: tensor(3252.3213, grad_fn=<MseLossBackward0>)
train loss: tensor(9128.9912, grad_fn=<MseLossBackward0>)
train loss: tensor(5125.0571, grad_fn=<MseLossBackward0>)
train loss: tensor(3714.6882, grad_fn=<MseLossBackward0>)
train loss: tensor(5456.1089, grad_fn=<MseLossBackward0>)
train loss: tensor(7192.4370, grad_fn=<MseLossBackward0>)
train loss: tensor(4993.3433, grad_fn=<MseLossBackward0>)
train loss: tensor(5674.5381, grad_fn=<MseLossBackward0>)
train loss: tensor(4891.5493, grad_fn=<MseLossBackward0>)
train loss: tensor(4302.7305, grad_fn=<MseLossBackward0>)
train loss: tensor(7736.6099, grad_fn=<MseLossBackward0>)
train loss: tensor(10854.3535, grad_fn=<MseLossBackward0>)
train loss: tensor(11121.5576, grad_fn=<MseLossBackward0>)
train loss: tensor(3949.2595, grad_fn=<MseLossBackward0>)
train loss: tensor(16476.5156, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(7772.9985, grad_fn=<MseLossBackward0>)
train loss: tensor(12982.9795, grad_fn=<MseLossBackward0>)
train loss: tensor(13790.0635, grad_fn=<MseLossBackward0>)
train loss: tensor(6111.5796, grad_fn=<MseLossBackward0>)
train loss: tensor(7884.7666, grad_fn=<MseLossBackward0>)
train loss: tensor(3747.5620, grad_fn=<MseLossBackward0>)
train loss: tensor(10385.4404, grad_fn=<MseLossBackward0>)
train loss: tensor(1913.1440, grad_fn=<MseLossBackward0>)
train loss: tensor(80440.3438, grad_fn=<MseLossBackward0>)
train loss: tensor(4936.2397, grad_fn=<MseLossBackward0>)
train loss: tensor(5298.1299, grad_fn=<MseLossBackward0>)
train loss: tensor(4131.0371, grad_fn=<MseLossBackward0>)
train loss: tensor(6212.6094, grad_fn=<MseLossBackward0>)
train loss: tensor(73365.2578, grad_fn=<MseLossBackward0>)
train loss: tensor(7126.1367, grad_fn=<MseLossBackward0>)
train loss: tensor(4709.2212, grad_fn=<MseLossBackward0>)
train loss: tensor(12941.4453, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(5940.4834, grad_fn=<MseLossBackward0>)
train loss: tensor(7710.8438, grad_fn=<MseLossBackward0>)
train loss: tensor(6088.4287, grad_fn=<MseLossBackward0>)
train loss: tensor(6987.4229, grad_fn=<MseLossBackward0>)
train loss: tensor(6261.7114, grad_fn=<MseLossBackward0>)
train loss: tensor(7289.5034, grad_fn=<MseLossBackward0>)
train loss: tensor(53825.4453, grad_fn=<MseLossBackward0>)
train loss: tensor(9428.7012, grad_fn=<MseLossBackward0>)
train loss: tensor(2552.9504, grad_fn=<MseLossBackward0>)
train loss: tensor(7893.9556, grad_fn=<MseLossBackward0>)
train loss: tensor(8597.1270, grad_fn=<MseLossBackward0>)
train loss: tensor(4612.3496, grad_fn=<MseLossBackward0>)
train loss: tensor(3900.4902, grad_fn=<MseLossBackward0>)
train loss: tensor(5864.6069, grad_fn=<MseLossBackward0>)
train loss: tensor(8865.2949, grad_fn=<MseLossBackward0>)
train loss: tensor(1906.0404, grad_fn=<MseLossBackward0>)
train loss: tensor(66520.7500, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(7732.3091, grad_fn=<MseLossBackward0>)
train loss: tensor(10153.5117, grad_fn=<MseLossBackward0>)
train loss: tensor(5185.5596, grad_fn=<MseLossBackward0>)
train loss: tensor(3195.9851, grad_fn=<MseLossBackward0>)
train loss: tensor(2837.1787, grad_fn=<MseLossBackward0>)
train loss: tensor(10211.9561, grad_fn=<MseLossBackward0>)
train loss: tensor(5950.6401, grad_fn=<MseLossBackward0>)
train loss: tensor(14632.1982, grad_fn=<MseLossBackward0>)
train loss: tensor(4530.6436, grad_fn=<MseLossBackward0>)
train loss: tensor(6110.8047, grad_fn=<MseLossBackward0>)
train loss: tensor(11191.7139, grad_fn=<MseLossBackward0>)
train loss: tensor(2187.0278, grad_fn=<MseLossBackward0>)
train loss: tensor(7913.9370, grad_fn=<MseLossBackward0>)
train loss: tensor(3767.7520, grad_fn=<MseLossBackward0>)
train loss: tensor(8114.8281, grad_fn=<MseLossBackward0>)
train loss: tensor(11803.9580, grad_fn=<MseLossBackward0>)
train loss: tensor(5607.9727, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(6788.6240, grad_fn=<MseLossBackward0>)
train loss: tensor(6370.6782, grad_fn=<MseLossBackward0>)
train loss: tensor(7389.1597, grad_fn=<MseLossBackward0>)
train loss: tensor(4200.3638, grad_fn=<MseLossBackward0>)
train loss: tensor(6470.4893, grad_fn=<MseLossBackward0>)
train loss: tensor(4896.4375, grad_fn=<MseLossBackward0>)
train loss: tensor(8340.1299, grad_fn=<MseLossBackward0>)
train loss: tensor(8189.9165, grad_fn=<MseLossBackward0>)
train loss: tensor(8674.8262, grad_fn=<MseLossBackward0>)
train loss: tensor(5644.0552, grad_fn=<MseLossBackward0>)
train loss: tensor(10264.0059, grad_fn=<MseLossBackward0>)
train loss: tensor(6203.6191, grad_fn=<MseLossBackward0>)
train loss: tensor(12207.5410, grad_fn=<MseLossBackward0>)
train loss: tensor(3080.2754, grad_fn=<MseLossBackward0>)
train loss: tensor(7151.1338, grad_fn=<MseLossBackward0>)
train loss: tensor(6636.6177, grad_fn=<MseLossBackward0>)
train loss: tensor(2411.1609, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(3666.5808, grad_fn=<MseLossBackward0>)
train loss: tensor(5970.4668, grad_fn=<MseLossBackward0>)
train loss: tensor(12537.4355, grad_fn=<MseLossBackward0>)
train loss: tensor(10024.9902, grad_fn=<MseLossBackward0>)
train loss: tensor(11554.1836, grad_fn=<MseLossBackward0>)
train loss: tensor(15320.8965, grad_fn=<MseLossBackward0>)
train loss: tensor(7081.8354, grad_fn=<MseLossBackward0>)
train loss: tensor(14265.3535, grad_fn=<MseLossBackward0>)
train loss: tensor(3761.4590, grad_fn=<MseLossBackward0>)
train loss: tensor(8294.6611, grad_fn=<MseLossBackward0>)
train loss: tensor(10438.2256, grad_fn=<MseLossBackward0>)
train loss: tensor(11687.2275, grad_fn=<MseLossBackward0>)
train loss: tensor(6315.8369, grad_fn=<MseLossBackward0>)
train loss: tensor(2420.8269, grad_fn=<MseLossBackward0>)
train loss: tensor(4761.5571, grad_fn=<MseLossBackward0>)
train loss: tensor(6470.4639, grad_fn=<MseLossBackward0>)
train loss: tensor(10068.3096, grad_fn=<MseLossBackward0>)
train 

train loss: tensor(6969.8843, grad_fn=<MseLossBackward0>)
train loss: tensor(2078.5193, grad_fn=<MseLossBackward0>)
train loss: tensor(7020.4165, grad_fn=<MseLossBackward0>)
train loss: tensor(15358.7139, grad_fn=<MseLossBackward0>)
train loss: tensor(1075.0614, grad_fn=<MseLossBackward0>)
train loss: tensor(6256.1851, grad_fn=<MseLossBackward0>)
train loss: tensor(1863.9431, grad_fn=<MseLossBackward0>)
train loss: tensor(12260.0869, grad_fn=<MseLossBackward0>)
train loss: tensor(5515.4370, grad_fn=<MseLossBackward0>)
train loss: tensor(12919.1982, grad_fn=<MseLossBackward0>)
train loss: tensor(5356.9976, grad_fn=<MseLossBackward0>)
train loss: tensor(6967.3789, grad_fn=<MseLossBackward0>)
train loss: tensor(12371.5879, grad_fn=<MseLossBackward0>)
train loss: tensor(11796.8096, grad_fn=<MseLossBackward0>)
train loss: tensor(5838.4058, grad_fn=<MseLossBackward0>)
train loss: tensor(8209.6084, grad_fn=<MseLossBackward0>)
train loss: tensor(7022.4429, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(2973.0608, grad_fn=<MseLossBackward0>)
train loss: tensor(6659.9565, grad_fn=<MseLossBackward0>)
train loss: tensor(4418.1450, grad_fn=<MseLossBackward0>)
train loss: tensor(6425.3843, grad_fn=<MseLossBackward0>)
train loss: tensor(13383.9004, grad_fn=<MseLossBackward0>)
train loss: tensor(10701.7012, grad_fn=<MseLossBackward0>)
train loss: tensor(5494.1885, grad_fn=<MseLossBackward0>)
train loss: tensor(1551.9307, grad_fn=<MseLossBackward0>)
train loss: tensor(8091.2129, grad_fn=<MseLossBackward0>)
train loss: tensor(7818.7788, grad_fn=<MseLossBackward0>)
train loss: tensor(6521.6372, grad_fn=<MseLossBackward0>)
train loss: tensor(4122.2637, grad_fn=<MseLossBackward0>)
train loss: tensor(9886.8311, grad_fn=<MseLossBackward0>)
train loss: tensor(4716.1226, grad_fn=<MseLossBackward0>)
train loss: tensor(2575.2568, grad_fn=<MseLossBackward0>)
train loss: tensor(3721.9087, grad_fn=<MseLossBackward0>)
train loss: tensor(1824.6000, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(3816.3748, grad_fn=<MseLossBackward0>)
train loss: tensor(12932.8682, grad_fn=<MseLossBackward0>)
train loss: tensor(10364.1396, grad_fn=<MseLossBackward0>)
train loss: tensor(3221.4331, grad_fn=<MseLossBackward0>)
train loss: tensor(2724.1025, grad_fn=<MseLossBackward0>)
train loss: tensor(5983.8296, grad_fn=<MseLossBackward0>)
train loss: tensor(6615.9282, grad_fn=<MseLossBackward0>)
train loss: tensor(6003.4966, grad_fn=<MseLossBackward0>)
train loss: tensor(9553.9170, grad_fn=<MseLossBackward0>)
train loss: tensor(9116.4922, grad_fn=<MseLossBackward0>)
train loss: tensor(14181.9121, grad_fn=<MseLossBackward0>)
train loss: tensor(10113.1758, grad_fn=<MseLossBackward0>)
train loss: tensor(13921.2305, grad_fn=<MseLossBackward0>)
train loss: tensor(11429.4502, grad_fn=<MseLossBackward0>)
train loss: tensor(14426.8145, grad_fn=<MseLossBackward0>)
train loss: tensor(34645.9180, grad_fn=<MseLossBackward0>)
train loss: tensor(11162.1064, grad_fn=<MseLossBackward0>)
train

train loss: tensor(5360.3525, grad_fn=<MseLossBackward0>)
train loss: tensor(9599.8125, grad_fn=<MseLossBackward0>)
train loss: tensor(12213.8350, grad_fn=<MseLossBackward0>)
train loss: tensor(4748.8115, grad_fn=<MseLossBackward0>)
train loss: tensor(3074.5884, grad_fn=<MseLossBackward0>)
train loss: tensor(7791.5752, grad_fn=<MseLossBackward0>)
train loss: tensor(8358.1455, grad_fn=<MseLossBackward0>)
train loss: tensor(6819.4365, grad_fn=<MseLossBackward0>)
train loss: tensor(7307.4028, grad_fn=<MseLossBackward0>)
train loss: tensor(4092.1985, grad_fn=<MseLossBackward0>)
train loss: tensor(5010.9199, grad_fn=<MseLossBackward0>)
train loss: tensor(11947.4873, grad_fn=<MseLossBackward0>)
train loss: tensor(5302.6768, grad_fn=<MseLossBackward0>)
train loss: tensor(4771.4238, grad_fn=<MseLossBackward0>)
train loss: tensor(5625.5005, grad_fn=<MseLossBackward0>)
train loss: tensor(10212.6084, grad_fn=<MseLossBackward0>)
train loss: tensor(4725.3267, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(5860.0498, grad_fn=<MseLossBackward0>)
train loss: tensor(19466.0020, grad_fn=<MseLossBackward0>)
train loss: tensor(13059.2598, grad_fn=<MseLossBackward0>)
train loss: tensor(2362.7812, grad_fn=<MseLossBackward0>)
train loss: tensor(2517.8269, grad_fn=<MseLossBackward0>)
train loss: tensor(2800.7913, grad_fn=<MseLossBackward0>)
train loss: tensor(6334.6665, grad_fn=<MseLossBackward0>)
train loss: tensor(8664.6182, grad_fn=<MseLossBackward0>)
train loss: tensor(17144.0703, grad_fn=<MseLossBackward0>)
train loss: tensor(2980.4268, grad_fn=<MseLossBackward0>)
train loss: tensor(4084.5461, grad_fn=<MseLossBackward0>)
train loss: tensor(2467.4651, grad_fn=<MseLossBackward0>)
train loss: tensor(9437.9893, grad_fn=<MseLossBackward0>)
train loss: tensor(14519.5771, grad_fn=<MseLossBackward0>)
train loss: tensor(68411.9609, grad_fn=<MseLossBackward0>)
train loss: tensor(7541.4922, grad_fn=<MseLossBackward0>)
train loss: tensor(4947.3496, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(5653.0312, grad_fn=<MseLossBackward0>)
train loss: tensor(4941.2812, grad_fn=<MseLossBackward0>)
train loss: tensor(5242.6177, grad_fn=<MseLossBackward0>)
train loss: tensor(3254.3853, grad_fn=<MseLossBackward0>)
train loss: tensor(4042.5872, grad_fn=<MseLossBackward0>)
train loss: tensor(8710.3730, grad_fn=<MseLossBackward0>)
train loss: tensor(6965.1914, grad_fn=<MseLossBackward0>)
train loss: tensor(10351.6094, grad_fn=<MseLossBackward0>)
train loss: tensor(5299.3428, grad_fn=<MseLossBackward0>)
train loss: tensor(1689.1716, grad_fn=<MseLossBackward0>)
train loss: tensor(6132.6348, grad_fn=<MseLossBackward0>)
train loss: tensor(5411.4976, grad_fn=<MseLossBackward0>)
train loss: tensor(19983.0488, grad_fn=<MseLossBackward0>)
train loss: tensor(8025.2148, grad_fn=<MseLossBackward0>)
train loss: tensor(13108.5068, grad_fn=<MseLossBackward0>)
train loss: tensor(5737.7178, grad_fn=<MseLossBackward0>)
train loss: tensor(8220.6660, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(2298.4475, grad_fn=<MseLossBackward0>)
train loss: tensor(6788.9810, grad_fn=<MseLossBackward0>)
train loss: tensor(9394.4727, grad_fn=<MseLossBackward0>)
train loss: tensor(18582.6973, grad_fn=<MseLossBackward0>)
train loss: tensor(3694.1694, grad_fn=<MseLossBackward0>)
train loss: tensor(1741.1832, grad_fn=<MseLossBackward0>)
train loss: tensor(3524.1343, grad_fn=<MseLossBackward0>)
train loss: tensor(42241.7578, grad_fn=<MseLossBackward0>)
train loss: tensor(6317.1870, grad_fn=<MseLossBackward0>)
train loss: tensor(3870.2749, grad_fn=<MseLossBackward0>)
train loss: tensor(2358.9807, grad_fn=<MseLossBackward0>)
train loss: tensor(4940.5117, grad_fn=<MseLossBackward0>)
train loss: tensor(4310.0024, grad_fn=<MseLossBackward0>)
train loss: tensor(3038.9194, grad_fn=<MseLossBackward0>)
train loss: tensor(5501.4351, grad_fn=<MseLossBackward0>)
train loss: tensor(7574.7163, grad_fn=<MseLossBackward0>)
train loss: tensor(6779.9053, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(4664.5112, grad_fn=<MseLossBackward0>)
train loss: tensor(5587.0176, grad_fn=<MseLossBackward0>)
train loss: tensor(2201.9587, grad_fn=<MseLossBackward0>)
train loss: tensor(9299.3037, grad_fn=<MseLossBackward0>)
train loss: tensor(5501.5527, grad_fn=<MseLossBackward0>)
train loss: tensor(5126.5371, grad_fn=<MseLossBackward0>)
train loss: tensor(14468.4111, grad_fn=<MseLossBackward0>)
train loss: tensor(10444.7783, grad_fn=<MseLossBackward0>)
train loss: tensor(4840.1841, grad_fn=<MseLossBackward0>)
train loss: tensor(12963.5557, grad_fn=<MseLossBackward0>)
train loss: tensor(5109.3945, grad_fn=<MseLossBackward0>)
train loss: tensor(6953.3608, grad_fn=<MseLossBackward0>)
train loss: tensor(5928.9888, grad_fn=<MseLossBackward0>)
train loss: tensor(5535.2715, grad_fn=<MseLossBackward0>)
train loss: tensor(7147.8188, grad_fn=<MseLossBackward0>)
train loss: tensor(6526.9888, grad_fn=<MseLossBackward0>)
train loss: tensor(2439.1335, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(6614.9932, grad_fn=<MseLossBackward0>)
train loss: tensor(6862.2822, grad_fn=<MseLossBackward0>)
train loss: tensor(25637.9473, grad_fn=<MseLossBackward0>)
train loss: tensor(5184.8184, grad_fn=<MseLossBackward0>)
train loss: tensor(1702.6641, grad_fn=<MseLossBackward0>)
train loss: tensor(8204.7295, grad_fn=<MseLossBackward0>)
train loss: tensor(3239.5376, grad_fn=<MseLossBackward0>)
train loss: tensor(2937.8708, grad_fn=<MseLossBackward0>)
train loss: tensor(2709.8701, grad_fn=<MseLossBackward0>)
train loss: tensor(6241.8804, grad_fn=<MseLossBackward0>)
train loss: tensor(4281.2222, grad_fn=<MseLossBackward0>)
train loss: tensor(3686.0725, grad_fn=<MseLossBackward0>)
train loss: tensor(4968.1289, grad_fn=<MseLossBackward0>)
train loss: tensor(11713.2656, grad_fn=<MseLossBackward0>)
train loss: tensor(3930.9189, grad_fn=<MseLossBackward0>)
train loss: tensor(14314.1377, grad_fn=<MseLossBackward0>)
train loss: tensor(8665.4043, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(5648.2559, grad_fn=<MseLossBackward0>)
train loss: tensor(3267.9709, grad_fn=<MseLossBackward0>)
train loss: tensor(7417.5918, grad_fn=<MseLossBackward0>)
train loss: tensor(6042.2485, grad_fn=<MseLossBackward0>)
train loss: tensor(3077.6069, grad_fn=<MseLossBackward0>)
train loss: tensor(5837.9600, grad_fn=<MseLossBackward0>)
train loss: tensor(9120.8105, grad_fn=<MseLossBackward0>)
train loss: tensor(2621.2649, grad_fn=<MseLossBackward0>)
train loss: tensor(9276.0156, grad_fn=<MseLossBackward0>)
train loss: tensor(4445.3701, grad_fn=<MseLossBackward0>)
train loss: tensor(13463.3877, grad_fn=<MseLossBackward0>)
train loss: tensor(3267.2173, grad_fn=<MseLossBackward0>)
train loss: tensor(4599.1094, grad_fn=<MseLossBackward0>)
train loss: tensor(13046.6211, grad_fn=<MseLossBackward0>)
train loss: tensor(5210.9727, grad_fn=<MseLossBackward0>)
train loss: tensor(6780.1147, grad_fn=<MseLossBackward0>)
train loss: tensor(3731.1514, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(10986.2012, grad_fn=<MseLossBackward0>)
train loss: tensor(10193.7305, grad_fn=<MseLossBackward0>)
train loss: tensor(11422.0713, grad_fn=<MseLossBackward0>)
train loss: tensor(7453.9321, grad_fn=<MseLossBackward0>)
train loss: tensor(8106.1157, grad_fn=<MseLossBackward0>)
train loss: tensor(4012.8743, grad_fn=<MseLossBackward0>)
train loss: tensor(8307.3262, grad_fn=<MseLossBackward0>)
train loss: tensor(7822.3726, grad_fn=<MseLossBackward0>)
train loss: tensor(7497.2002, grad_fn=<MseLossBackward0>)
train loss: tensor(4807.6006, grad_fn=<MseLossBackward0>)
train loss: tensor(2824.2878, grad_fn=<MseLossBackward0>)
train loss: tensor(10888.7432, grad_fn=<MseLossBackward0>)
train loss: tensor(6411.0942, grad_fn=<MseLossBackward0>)
train loss: tensor(4928.4531, grad_fn=<MseLossBackward0>)
train loss: tensor(4304.5762, grad_fn=<MseLossBackward0>)
train loss: tensor(11672.0684, grad_fn=<MseLossBackward0>)
train loss: tensor(6469.5312, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(12438.3750, grad_fn=<MseLossBackward0>)
train loss: tensor(5728.9155, grad_fn=<MseLossBackward0>)
train loss: tensor(5643.4160, grad_fn=<MseLossBackward0>)
train loss: tensor(4922.9316, grad_fn=<MseLossBackward0>)
train loss: tensor(4724.8608, grad_fn=<MseLossBackward0>)
train loss: tensor(6239.9541, grad_fn=<MseLossBackward0>)
train loss: tensor(5010.5674, grad_fn=<MseLossBackward0>)
train loss: tensor(12428.1211, grad_fn=<MseLossBackward0>)
train loss: tensor(11396.2295, grad_fn=<MseLossBackward0>)
train loss: tensor(4229.7432, grad_fn=<MseLossBackward0>)
train loss: tensor(7125.5674, grad_fn=<MseLossBackward0>)
train loss: tensor(4956.0752, grad_fn=<MseLossBackward0>)
train loss: tensor(3954.2983, grad_fn=<MseLossBackward0>)
train loss: tensor(3749.2700, grad_fn=<MseLossBackward0>)
train loss: tensor(5965.7632, grad_fn=<MseLossBackward0>)
train loss: tensor(10042.6816, grad_fn=<MseLossBackward0>)
train loss: tensor(13901.2637, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(8432.2969, grad_fn=<MseLossBackward0>)
train loss: tensor(14356.0156, grad_fn=<MseLossBackward0>)
train loss: tensor(5200.1812, grad_fn=<MseLossBackward0>)
train loss: tensor(6783.8042, grad_fn=<MseLossBackward0>)
train loss: tensor(9352.3740, grad_fn=<MseLossBackward0>)
train loss: tensor(4539.1533, grad_fn=<MseLossBackward0>)
train loss: tensor(2204.0496, grad_fn=<MseLossBackward0>)
train loss: tensor(4938.3271, grad_fn=<MseLossBackward0>)
train loss: tensor(4612.5620, grad_fn=<MseLossBackward0>)
train loss: tensor(9794.4092, grad_fn=<MseLossBackward0>)
train loss: tensor(9635.5244, grad_fn=<MseLossBackward0>)
train loss: tensor(6194.2905, grad_fn=<MseLossBackward0>)
train loss: tensor(5987.9351, grad_fn=<MseLossBackward0>)
train loss: tensor(3653.7886, grad_fn=<MseLossBackward0>)
train loss: tensor(7024.3179, grad_fn=<MseLossBackward0>)
train loss: tensor(15795.1064, grad_fn=<MseLossBackward0>)
train loss: tensor(7858.9180, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(6198.8428, grad_fn=<MseLossBackward0>)
train loss: tensor(7170.7358, grad_fn=<MseLossBackward0>)
train loss: tensor(16338.0293, grad_fn=<MseLossBackward0>)
train loss: tensor(5790.5991, grad_fn=<MseLossBackward0>)
train loss: tensor(12541.0889, grad_fn=<MseLossBackward0>)
train loss: tensor(7439.9346, grad_fn=<MseLossBackward0>)
train loss: tensor(3667.6104, grad_fn=<MseLossBackward0>)
train loss: tensor(4975.8218, grad_fn=<MseLossBackward0>)
train loss: tensor(14060.0713, grad_fn=<MseLossBackward0>)
train loss: tensor(36957.8750, grad_fn=<MseLossBackward0>)
train loss: tensor(3812.4473, grad_fn=<MseLossBackward0>)
train loss: tensor(7362.3428, grad_fn=<MseLossBackward0>)
train loss: tensor(3537.7864, grad_fn=<MseLossBackward0>)
train loss: tensor(3666.5649, grad_fn=<MseLossBackward0>)
train loss: tensor(4959.3423, grad_fn=<MseLossBackward0>)
train loss: tensor(3603.3245, grad_fn=<MseLossBackward0>)
train loss: tensor(4028.0842, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(3813.7258, grad_fn=<MseLossBackward0>)
train loss: tensor(8020.1455, grad_fn=<MseLossBackward0>)
train loss: tensor(39014.6094, grad_fn=<MseLossBackward0>)
train loss: tensor(5784.3765, grad_fn=<MseLossBackward0>)
train loss: tensor(4142.6221, grad_fn=<MseLossBackward0>)
train loss: tensor(5069.2842, grad_fn=<MseLossBackward0>)
train loss: tensor(5211.6118, grad_fn=<MseLossBackward0>)
train loss: tensor(42977.5742, grad_fn=<MseLossBackward0>)
train loss: tensor(2572.6919, grad_fn=<MseLossBackward0>)
train loss: tensor(4043.5430, grad_fn=<MseLossBackward0>)
train loss: tensor(8319.8740, grad_fn=<MseLossBackward0>)
train loss: tensor(4907.0903, grad_fn=<MseLossBackward0>)
train loss: tensor(8870.8584, grad_fn=<MseLossBackward0>)
train loss: tensor(5110.2905, grad_fn=<MseLossBackward0>)
train loss: tensor(4550.0225, grad_fn=<MseLossBackward0>)
train loss: tensor(2938.5830, grad_fn=<MseLossBackward0>)
train loss: tensor(2025.6570, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(4093.5723, grad_fn=<MseLossBackward0>)
train loss: tensor(3664.4121, grad_fn=<MseLossBackward0>)
train loss: tensor(16138.7676, grad_fn=<MseLossBackward0>)
train loss: tensor(9749.2949, grad_fn=<MseLossBackward0>)
train loss: tensor(8469.0605, grad_fn=<MseLossBackward0>)
train loss: tensor(12983.8564, grad_fn=<MseLossBackward0>)
train loss: tensor(5217.3652, grad_fn=<MseLossBackward0>)
train loss: tensor(4906.2295, grad_fn=<MseLossBackward0>)
train loss: tensor(7099.2002, grad_fn=<MseLossBackward0>)
train loss: tensor(7407.8306, grad_fn=<MseLossBackward0>)
train loss: tensor(8511.3428, grad_fn=<MseLossBackward0>)
train loss: tensor(5005.0845, grad_fn=<MseLossBackward0>)
train loss: tensor(9591.2393, grad_fn=<MseLossBackward0>)
train loss: tensor(5265.8315, grad_fn=<MseLossBackward0>)
train loss: tensor(2707.5789, grad_fn=<MseLossBackward0>)
train loss: tensor(7504.8926, grad_fn=<MseLossBackward0>)
train loss: tensor(8538.8350, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(6161.0532, grad_fn=<MseLossBackward0>)
train loss: tensor(2918.7163, grad_fn=<MseLossBackward0>)
train loss: tensor(3160.3069, grad_fn=<MseLossBackward0>)
train loss: tensor(8465.8262, grad_fn=<MseLossBackward0>)
train loss: tensor(2146.5632, grad_fn=<MseLossBackward0>)
train loss: tensor(7148.1748, grad_fn=<MseLossBackward0>)
train loss: tensor(10612.0166, grad_fn=<MseLossBackward0>)
train loss: tensor(7522.7773, grad_fn=<MseLossBackward0>)
train loss: tensor(33140.4609, grad_fn=<MseLossBackward0>)
train loss: tensor(8486.3027, grad_fn=<MseLossBackward0>)
train loss: tensor(3280.1746, grad_fn=<MseLossBackward0>)
train loss: tensor(6137.7368, grad_fn=<MseLossBackward0>)
train loss: tensor(5027.1699, grad_fn=<MseLossBackward0>)
train loss: tensor(3530.2922, grad_fn=<MseLossBackward0>)
train loss: tensor(30878.2773, grad_fn=<MseLossBackward0>)
train loss: tensor(1218.9484, grad_fn=<MseLossBackward0>)
train loss: tensor(7425.9810, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(32297.4355, grad_fn=<MseLossBackward0>)
train loss: tensor(2721.2959, grad_fn=<MseLossBackward0>)
train loss: tensor(6422.3779, grad_fn=<MseLossBackward0>)
train loss: tensor(5167.1289, grad_fn=<MseLossBackward0>)
train loss: tensor(7497.9517, grad_fn=<MseLossBackward0>)
train loss: tensor(4819.2568, grad_fn=<MseLossBackward0>)
train loss: tensor(8197.0596, grad_fn=<MseLossBackward0>)
train loss: tensor(5576.9639, grad_fn=<MseLossBackward0>)
train loss: tensor(8139.2051, grad_fn=<MseLossBackward0>)
train loss: tensor(2961.0591, grad_fn=<MseLossBackward0>)
train loss: tensor(14332.7637, grad_fn=<MseLossBackward0>)
train loss: tensor(12335.2812, grad_fn=<MseLossBackward0>)
train loss: tensor(8256.6602, grad_fn=<MseLossBackward0>)
train loss: tensor(6404.4888, grad_fn=<MseLossBackward0>)
train loss: tensor(6137.8062, grad_fn=<MseLossBackward0>)
train loss: tensor(9192.8535, grad_fn=<MseLossBackward0>)
train loss: tensor(5258.9956, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(6549.0771, grad_fn=<MseLossBackward0>)
train loss: tensor(7229.4722, grad_fn=<MseLossBackward0>)
train loss: tensor(8111.5415, grad_fn=<MseLossBackward0>)
train loss: tensor(5060.4165, grad_fn=<MseLossBackward0>)
train loss: tensor(32199.0449, grad_fn=<MseLossBackward0>)
train loss: tensor(82414.8984, grad_fn=<MseLossBackward0>)
train loss: tensor(3314.8928, grad_fn=<MseLossBackward0>)
train loss: tensor(5369.3838, grad_fn=<MseLossBackward0>)
train loss: tensor(4324.2124, grad_fn=<MseLossBackward0>)
train loss: tensor(5213.7075, grad_fn=<MseLossBackward0>)
train loss: tensor(5075.4478, grad_fn=<MseLossBackward0>)
train loss: tensor(3277.3484, grad_fn=<MseLossBackward0>)
train loss: tensor(4428.4487, grad_fn=<MseLossBackward0>)
train loss: tensor(4108.7070, grad_fn=<MseLossBackward0>)
train loss: tensor(6435.8726, grad_fn=<MseLossBackward0>)
train loss: tensor(5213.1450, grad_fn=<MseLossBackward0>)
train loss: tensor(6906.5752, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(13533.3057, grad_fn=<MseLossBackward0>)
train loss: tensor(7556.8550, grad_fn=<MseLossBackward0>)
train loss: tensor(6499.4629, grad_fn=<MseLossBackward0>)
train loss: tensor(4875.2461, grad_fn=<MseLossBackward0>)
train loss: tensor(8736.0703, grad_fn=<MseLossBackward0>)
train loss: tensor(4817.8784, grad_fn=<MseLossBackward0>)
train loss: tensor(4542.5513, grad_fn=<MseLossBackward0>)
train loss: tensor(4636.6875, grad_fn=<MseLossBackward0>)
train loss: tensor(29367.8594, grad_fn=<MseLossBackward0>)
train loss: tensor(4814.5806, grad_fn=<MseLossBackward0>)
train loss: tensor(11913.9082, grad_fn=<MseLossBackward0>)
train loss: tensor(3374.7004, grad_fn=<MseLossBackward0>)
train loss: tensor(2252.5303, grad_fn=<MseLossBackward0>)
train loss: tensor(5177.9287, grad_fn=<MseLossBackward0>)
train loss: tensor(16508.2246, grad_fn=<MseLossBackward0>)
train loss: tensor(4237.1299, grad_fn=<MseLossBackward0>)
train loss: tensor(5196.8750, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(5765.1499, grad_fn=<MseLossBackward0>)
train loss: tensor(9709.3857, grad_fn=<MseLossBackward0>)
train loss: tensor(3994.9395, grad_fn=<MseLossBackward0>)
train loss: tensor(8154.6919, grad_fn=<MseLossBackward0>)
train loss: tensor(6930.8628, grad_fn=<MseLossBackward0>)
train loss: tensor(6307.6665, grad_fn=<MseLossBackward0>)
train loss: tensor(5950.8662, grad_fn=<MseLossBackward0>)
train loss: tensor(6239.2734, grad_fn=<MseLossBackward0>)
train loss: tensor(11521.4160, grad_fn=<MseLossBackward0>)
train loss: tensor(2711.9399, grad_fn=<MseLossBackward0>)
train loss: tensor(7388.1465, grad_fn=<MseLossBackward0>)
train loss: tensor(4130.7139, grad_fn=<MseLossBackward0>)
train loss: tensor(14524.2344, grad_fn=<MseLossBackward0>)
train loss: tensor(2347.2559, grad_fn=<MseLossBackward0>)
train loss: tensor(6664.9243, grad_fn=<MseLossBackward0>)
train loss: tensor(9925.3721, grad_fn=<MseLossBackward0>)
train loss: tensor(3329.5044, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(3947.4282, grad_fn=<MseLossBackward0>)
train loss: tensor(2673.4426, grad_fn=<MseLossBackward0>)
train loss: tensor(6438.8389, grad_fn=<MseLossBackward0>)
train loss: tensor(3323.5642, grad_fn=<MseLossBackward0>)
train loss: tensor(9423.4902, grad_fn=<MseLossBackward0>)
train loss: tensor(3031.4041, grad_fn=<MseLossBackward0>)
train loss: tensor(4370.5581, grad_fn=<MseLossBackward0>)
train loss: tensor(6406.3066, grad_fn=<MseLossBackward0>)
train loss: tensor(4000.4358, grad_fn=<MseLossBackward0>)
train loss: tensor(5046.5464, grad_fn=<MseLossBackward0>)
train loss: tensor(6231.8931, grad_fn=<MseLossBackward0>)
train loss: tensor(6410.4116, grad_fn=<MseLossBackward0>)
train loss: tensor(14086.6318, grad_fn=<MseLossBackward0>)
train loss: tensor(6756.2329, grad_fn=<MseLossBackward0>)
train loss: tensor(3386.3726, grad_fn=<MseLossBackward0>)
train loss: tensor(5017.3516, grad_fn=<MseLossBackward0>)
train loss: tensor(11687.7236, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(4436.8701, grad_fn=<MseLossBackward0>)
train loss: tensor(7925.4116, grad_fn=<MseLossBackward0>)
train loss: tensor(6526.4932, grad_fn=<MseLossBackward0>)
train loss: tensor(5439.0337, grad_fn=<MseLossBackward0>)
train loss: tensor(9409.5977, grad_fn=<MseLossBackward0>)
train loss: tensor(6556.3018, grad_fn=<MseLossBackward0>)
train loss: tensor(3197.7986, grad_fn=<MseLossBackward0>)
train loss: tensor(5989.1636, grad_fn=<MseLossBackward0>)
train loss: tensor(6559.0039, grad_fn=<MseLossBackward0>)
train loss: tensor(1768.8063, grad_fn=<MseLossBackward0>)
train loss: tensor(2426.1475, grad_fn=<MseLossBackward0>)
train loss: tensor(2223.6555, grad_fn=<MseLossBackward0>)
train loss: tensor(6120.9404, grad_fn=<MseLossBackward0>)
train loss: tensor(2325.2866, grad_fn=<MseLossBackward0>)
train loss: tensor(6537.8535, grad_fn=<MseLossBackward0>)
train loss: tensor(11033.7480, grad_fn=<MseLossBackward0>)
train loss: tensor(16150.8184, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(4266.4072, grad_fn=<MseLossBackward0>)
train loss: tensor(5381.6646, grad_fn=<MseLossBackward0>)
train loss: tensor(9304.4531, grad_fn=<MseLossBackward0>)
train loss: tensor(8101.7559, grad_fn=<MseLossBackward0>)
train loss: tensor(4985.4487, grad_fn=<MseLossBackward0>)
train loss: tensor(2947.1335, grad_fn=<MseLossBackward0>)
train loss: tensor(4056.4841, grad_fn=<MseLossBackward0>)
train loss: tensor(14093.1514, grad_fn=<MseLossBackward0>)
train loss: tensor(10394.0039, grad_fn=<MseLossBackward0>)
train loss: tensor(3993.6450, grad_fn=<MseLossBackward0>)
train loss: tensor(8722.0088, grad_fn=<MseLossBackward0>)
train loss: tensor(8636.4844, grad_fn=<MseLossBackward0>)
train loss: tensor(3631.3674, grad_fn=<MseLossBackward0>)
train loss: tensor(8720.9795, grad_fn=<MseLossBackward0>)
train loss: tensor(7372.6812, grad_fn=<MseLossBackward0>)
train loss: tensor(7538.7949, grad_fn=<MseLossBackward0>)
train loss: tensor(3142.2334, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(7794.2080, grad_fn=<MseLossBackward0>)
train loss: tensor(2127.6919, grad_fn=<MseLossBackward0>)
train loss: tensor(4643.0415, grad_fn=<MseLossBackward0>)
train loss: tensor(8550.1270, grad_fn=<MseLossBackward0>)
train loss: tensor(6934.2144, grad_fn=<MseLossBackward0>)
train loss: tensor(5811.3931, grad_fn=<MseLossBackward0>)
train loss: tensor(4528.3530, grad_fn=<MseLossBackward0>)
train loss: tensor(5351.0479, grad_fn=<MseLossBackward0>)
train loss: tensor(2768.2732, grad_fn=<MseLossBackward0>)
train loss: tensor(14771.4355, grad_fn=<MseLossBackward0>)
train loss: tensor(5321.0171, grad_fn=<MseLossBackward0>)
train loss: tensor(10512.9971, grad_fn=<MseLossBackward0>)
train loss: tensor(3246.8318, grad_fn=<MseLossBackward0>)
train loss: tensor(2762.1877, grad_fn=<MseLossBackward0>)
train loss: tensor(3149.0261, grad_fn=<MseLossBackward0>)
train loss: tensor(10906.5088, grad_fn=<MseLossBackward0>)
train loss: tensor(6644.7646, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(8961.2383, grad_fn=<MseLossBackward0>)
train loss: tensor(7292.5190, grad_fn=<MseLossBackward0>)
train loss: tensor(7673.1001, grad_fn=<MseLossBackward0>)
train loss: tensor(16031.3477, grad_fn=<MseLossBackward0>)
train loss: tensor(17446.4551, grad_fn=<MseLossBackward0>)
train loss: tensor(14309.9062, grad_fn=<MseLossBackward0>)
train loss: tensor(6681.1226, grad_fn=<MseLossBackward0>)
train loss: tensor(4425.1841, grad_fn=<MseLossBackward0>)
train loss: tensor(5823.2881, grad_fn=<MseLossBackward0>)
train loss: tensor(10577.6211, grad_fn=<MseLossBackward0>)
train loss: tensor(8137.2417, grad_fn=<MseLossBackward0>)
train loss: tensor(2643.7812, grad_fn=<MseLossBackward0>)
train loss: tensor(4316.5161, grad_fn=<MseLossBackward0>)
train loss: tensor(4103.3232, grad_fn=<MseLossBackward0>)
train loss: tensor(2770.9456, grad_fn=<MseLossBackward0>)
train loss: tensor(6632.9966, grad_fn=<MseLossBackward0>)
train loss: tensor(11204.8262, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(4638.5513, grad_fn=<MseLossBackward0>)
train loss: tensor(5214.9160, grad_fn=<MseLossBackward0>)
train loss: tensor(4202.3765, grad_fn=<MseLossBackward0>)
train loss: tensor(4188.0151, grad_fn=<MseLossBackward0>)
train loss: tensor(3408.1960, grad_fn=<MseLossBackward0>)
train loss: tensor(7974.6099, grad_fn=<MseLossBackward0>)
train loss: tensor(3897.4612, grad_fn=<MseLossBackward0>)
train loss: tensor(14161.0449, grad_fn=<MseLossBackward0>)
train loss: tensor(4016.5559, grad_fn=<MseLossBackward0>)
train loss: tensor(5528.7935, grad_fn=<MseLossBackward0>)
train loss: tensor(1805.7673, grad_fn=<MseLossBackward0>)
train loss: tensor(5115.4731, grad_fn=<MseLossBackward0>)
train loss: tensor(3015.1292, grad_fn=<MseLossBackward0>)
train loss: tensor(11585.0801, grad_fn=<MseLossBackward0>)
train loss: tensor(8796.5059, grad_fn=<MseLossBackward0>)
train loss: tensor(8064.3564, grad_fn=<MseLossBackward0>)
train loss: tensor(7182.2202, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(3801.3811, grad_fn=<MseLossBackward0>)
train loss: tensor(7415.3271, grad_fn=<MseLossBackward0>)
train loss: tensor(3357.5354, grad_fn=<MseLossBackward0>)
train loss: tensor(5374.6401, grad_fn=<MseLossBackward0>)
train loss: tensor(3996.6787, grad_fn=<MseLossBackward0>)
train loss: tensor(4688.4600, grad_fn=<MseLossBackward0>)
train loss: tensor(3203.5527, grad_fn=<MseLossBackward0>)
train loss: tensor(4226.3550, grad_fn=<MseLossBackward0>)
train loss: tensor(4785.2559, grad_fn=<MseLossBackward0>)
train loss: tensor(6654.1035, grad_fn=<MseLossBackward0>)
train loss: tensor(17964.6738, grad_fn=<MseLossBackward0>)
train loss: tensor(4583.6289, grad_fn=<MseLossBackward0>)
train loss: tensor(12881.6006, grad_fn=<MseLossBackward0>)
train loss: tensor(7274.5674, grad_fn=<MseLossBackward0>)
train loss: tensor(67538.2109, grad_fn=<MseLossBackward0>)
train loss: tensor(6300.6538, grad_fn=<MseLossBackward0>)
train loss: tensor(5386.2744, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(8863.4160, grad_fn=<MseLossBackward0>)
train loss: tensor(5363.0542, grad_fn=<MseLossBackward0>)
train loss: tensor(1627.5856, grad_fn=<MseLossBackward0>)
train loss: tensor(15652.9355, grad_fn=<MseLossBackward0>)
train loss: tensor(7812.4438, grad_fn=<MseLossBackward0>)
train loss: tensor(4086.4199, grad_fn=<MseLossBackward0>)
train loss: tensor(2663.4597, grad_fn=<MseLossBackward0>)
train loss: tensor(4475.5117, grad_fn=<MseLossBackward0>)
train loss: tensor(18356.8965, grad_fn=<MseLossBackward0>)
train loss: tensor(4479.4385, grad_fn=<MseLossBackward0>)
train loss: tensor(6430.6367, grad_fn=<MseLossBackward0>)
train loss: tensor(9305.2607, grad_fn=<MseLossBackward0>)
train loss: tensor(9466.4951, grad_fn=<MseLossBackward0>)
train loss: tensor(8479.7461, grad_fn=<MseLossBackward0>)
train loss: tensor(5406.5273, grad_fn=<MseLossBackward0>)
train loss: tensor(4242.1558, grad_fn=<MseLossBackward0>)
train loss: tensor(2882.9265, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(7541.8779, grad_fn=<MseLossBackward0>)
train loss: tensor(10684.9512, grad_fn=<MseLossBackward0>)
train loss: tensor(3220.8374, grad_fn=<MseLossBackward0>)
train loss: tensor(5121.3608, grad_fn=<MseLossBackward0>)
train loss: tensor(2606.5718, grad_fn=<MseLossBackward0>)
train loss: tensor(5606.5508, grad_fn=<MseLossBackward0>)
train loss: tensor(5347.1235, grad_fn=<MseLossBackward0>)
train loss: tensor(3773.6829, grad_fn=<MseLossBackward0>)
train loss: tensor(6590.1113, grad_fn=<MseLossBackward0>)
train loss: tensor(10394.6436, grad_fn=<MseLossBackward0>)
train loss: tensor(2910.2185, grad_fn=<MseLossBackward0>)
train loss: tensor(6629.7559, grad_fn=<MseLossBackward0>)
train loss: tensor(8765.5430, grad_fn=<MseLossBackward0>)
train loss: tensor(3173.0444, grad_fn=<MseLossBackward0>)
train loss: tensor(3049.9514, grad_fn=<MseLossBackward0>)
train loss: tensor(8707.6055, grad_fn=<MseLossBackward0>)
train loss: tensor(11875.1777, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(5871.5317, grad_fn=<MseLossBackward0>)
train loss: tensor(8095.5269, grad_fn=<MseLossBackward0>)
train loss: tensor(6978.4072, grad_fn=<MseLossBackward0>)
train loss: tensor(10008.9863, grad_fn=<MseLossBackward0>)
train loss: tensor(14963.2812, grad_fn=<MseLossBackward0>)
train loss: tensor(11939.0576, grad_fn=<MseLossBackward0>)
train loss: tensor(2138.2126, grad_fn=<MseLossBackward0>)
train loss: tensor(6007.7583, grad_fn=<MseLossBackward0>)
train loss: tensor(11800.6377, grad_fn=<MseLossBackward0>)
train loss: tensor(5816.3530, grad_fn=<MseLossBackward0>)
train loss: tensor(3999.9871, grad_fn=<MseLossBackward0>)
train loss: tensor(3958.8601, grad_fn=<MseLossBackward0>)
train loss: tensor(5072.2021, grad_fn=<MseLossBackward0>)
train loss: tensor(3053.3164, grad_fn=<MseLossBackward0>)
train loss: tensor(12849.1191, grad_fn=<MseLossBackward0>)
train loss: tensor(7156.5942, grad_fn=<MseLossBackward0>)
train loss: tensor(4292.6362, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(6527.4121, grad_fn=<MseLossBackward0>)
train loss: tensor(4528.0112, grad_fn=<MseLossBackward0>)
train loss: tensor(5217.4409, grad_fn=<MseLossBackward0>)
train loss: tensor(6394.2822, grad_fn=<MseLossBackward0>)
train loss: tensor(3899.9404, grad_fn=<MseLossBackward0>)
train loss: tensor(7708.4541, grad_fn=<MseLossBackward0>)
train loss: tensor(5892.0103, grad_fn=<MseLossBackward0>)
train loss: tensor(3186.6033, grad_fn=<MseLossBackward0>)
train loss: tensor(6232.3018, grad_fn=<MseLossBackward0>)
train loss: tensor(8144.6675, grad_fn=<MseLossBackward0>)
train loss: tensor(74615.5625, grad_fn=<MseLossBackward0>)
train loss: tensor(5577.2271, grad_fn=<MseLossBackward0>)
train loss: tensor(5738.1685, grad_fn=<MseLossBackward0>)
train loss: tensor(5131.2705, grad_fn=<MseLossBackward0>)
train loss: tensor(8786.3350, grad_fn=<MseLossBackward0>)
train loss: tensor(5219.5522, grad_fn=<MseLossBackward0>)
train loss: tensor(5846.6201, grad_fn=<MseLossBackward0>)
train loss: t

train loss: tensor(3849.9436, grad_fn=<MseLossBackward0>)
train loss: tensor(2098.0564, grad_fn=<MseLossBackward0>)
train loss: tensor(5674.1831, grad_fn=<MseLossBackward0>)
train loss: tensor(6465.7031, grad_fn=<MseLossBackward0>)
train loss: tensor(7145.1758, grad_fn=<MseLossBackward0>)
train loss: tensor(5534.6157, grad_fn=<MseLossBackward0>)
train loss: tensor(16197.7578, grad_fn=<MseLossBackward0>)
train loss: tensor(2980.1631, grad_fn=<MseLossBackward0>)
train loss: tensor(6148.5190, grad_fn=<MseLossBackward0>)
train loss: tensor(2733.6782, grad_fn=<MseLossBackward0>)
train loss: tensor(3768.0171, grad_fn=<MseLossBackward0>)
train loss: tensor(3584.4861, grad_fn=<MseLossBackward0>)
train loss: tensor(7227.9014, grad_fn=<MseLossBackward0>)
train loss: tensor(4019.0549, grad_fn=<MseLossBackward0>)
train loss: tensor(5270.3081, grad_fn=<MseLossBackward0>)
train loss: tensor(4255.0381, grad_fn=<MseLossBackward0>)
train loss: tensor(10236.6836, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(11376.5869, grad_fn=<MseLossBackward0>)
train loss: tensor(10307.5391, grad_fn=<MseLossBackward0>)
train loss: tensor(2191.3767, grad_fn=<MseLossBackward0>)
train loss: tensor(3762.7275, grad_fn=<MseLossBackward0>)
train loss: tensor(8608.9062, grad_fn=<MseLossBackward0>)
train loss: tensor(4401.4116, grad_fn=<MseLossBackward0>)
train loss: tensor(9284.9316, grad_fn=<MseLossBackward0>)
train loss: tensor(5252.5601, grad_fn=<MseLossBackward0>)
train loss: tensor(4231.0171, grad_fn=<MseLossBackward0>)
train loss: tensor(15513.9541, grad_fn=<MseLossBackward0>)
train loss: tensor(7308.3257, grad_fn=<MseLossBackward0>)
train loss: tensor(9587.4150, grad_fn=<MseLossBackward0>)
train loss: tensor(6032.5000, grad_fn=<MseLossBackward0>)
train loss: tensor(5709.9707, grad_fn=<MseLossBackward0>)
train loss: tensor(2308.4609, grad_fn=<MseLossBackward0>)
train loss: tensor(10943.3701, grad_fn=<MseLossBackward0>)
train loss: tensor(6746.2393, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(10057.6396, grad_fn=<MseLossBackward0>)
train loss: tensor(8331.8311, grad_fn=<MseLossBackward0>)
train loss: tensor(4278.5034, grad_fn=<MseLossBackward0>)
train loss: tensor(11152.2871, grad_fn=<MseLossBackward0>)
train loss: tensor(13633.2764, grad_fn=<MseLossBackward0>)
train loss: tensor(25160.1113, grad_fn=<MseLossBackward0>)
train loss: tensor(8868.1318, grad_fn=<MseLossBackward0>)
train loss: tensor(6211.9209, grad_fn=<MseLossBackward0>)
train loss: tensor(8518.7744, grad_fn=<MseLossBackward0>)
train loss: tensor(6520.7271, grad_fn=<MseLossBackward0>)
train loss: tensor(9021.7031, grad_fn=<MseLossBackward0>)
train loss: tensor(6134.5845, grad_fn=<MseLossBackward0>)
train loss: tensor(7344.2695, grad_fn=<MseLossBackward0>)
train loss: tensor(5323.8623, grad_fn=<MseLossBackward0>)
train loss: tensor(8339.7188, grad_fn=<MseLossBackward0>)
train loss: tensor(9185.2012, grad_fn=<MseLossBackward0>)
train loss: tensor(7068.1387, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(12826.5244, grad_fn=<MseLossBackward0>)
train loss: tensor(6375.7729, grad_fn=<MseLossBackward0>)
train loss: tensor(3236.1323, grad_fn=<MseLossBackward0>)
train loss: tensor(3191.7468, grad_fn=<MseLossBackward0>)
train loss: tensor(11339.0098, grad_fn=<MseLossBackward0>)
train loss: tensor(13665.3896, grad_fn=<MseLossBackward0>)
train loss: tensor(4027.3992, grad_fn=<MseLossBackward0>)
train loss: tensor(2775.4487, grad_fn=<MseLossBackward0>)
train loss: tensor(5437.8257, grad_fn=<MseLossBackward0>)
train loss: tensor(11111.4424, grad_fn=<MseLossBackward0>)
train loss: tensor(7906.9312, grad_fn=<MseLossBackward0>)
train loss: tensor(7148.4194, grad_fn=<MseLossBackward0>)
train loss: tensor(3654.6453, grad_fn=<MseLossBackward0>)
train loss: tensor(5201.0244, grad_fn=<MseLossBackward0>)
train loss: tensor(9686.1338, grad_fn=<MseLossBackward0>)
train loss: tensor(10074.3789, grad_fn=<MseLossBackward0>)
train loss: tensor(8447.2354, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(8479.2734, grad_fn=<MseLossBackward0>)
train loss: tensor(2966.4688, grad_fn=<MseLossBackward0>)
train loss: tensor(8976.7891, grad_fn=<MseLossBackward0>)
train loss: tensor(6577.4053, grad_fn=<MseLossBackward0>)
train loss: tensor(3880.7493, grad_fn=<MseLossBackward0>)
train loss: tensor(4018.1758, grad_fn=<MseLossBackward0>)
train loss: tensor(7035.1455, grad_fn=<MseLossBackward0>)
train loss: tensor(7114.4941, grad_fn=<MseLossBackward0>)
train loss: tensor(4438.1309, grad_fn=<MseLossBackward0>)
train loss: tensor(2963.6331, grad_fn=<MseLossBackward0>)
train loss: tensor(4504.3984, grad_fn=<MseLossBackward0>)
train loss: tensor(9793.9902, grad_fn=<MseLossBackward0>)
train loss: tensor(12259.8037, grad_fn=<MseLossBackward0>)
train loss: tensor(5248.5518, grad_fn=<MseLossBackward0>)
train loss: tensor(2990.1372, grad_fn=<MseLossBackward0>)
train loss: tensor(2357.8059, grad_fn=<MseLossBackward0>)
train loss: tensor(7798.9561, grad_fn=<MseLossBackward0>)
train loss: t

train loss: tensor(4148.3594, grad_fn=<MseLossBackward0>)
train loss: tensor(8873.0391, grad_fn=<MseLossBackward0>)
train loss: tensor(3726.8606, grad_fn=<MseLossBackward0>)
train loss: tensor(4685.3726, grad_fn=<MseLossBackward0>)
train loss: tensor(9437.7480, grad_fn=<MseLossBackward0>)
train loss: tensor(8480.2383, grad_fn=<MseLossBackward0>)
train loss: tensor(3195.9834, grad_fn=<MseLossBackward0>)
train loss: tensor(7458.1138, grad_fn=<MseLossBackward0>)
train loss: tensor(3207.4692, grad_fn=<MseLossBackward0>)
train loss: tensor(11691.4531, grad_fn=<MseLossBackward0>)
train loss: tensor(3230.3774, grad_fn=<MseLossBackward0>)
train loss: tensor(1661.1440, grad_fn=<MseLossBackward0>)
train loss: tensor(2419.5220, grad_fn=<MseLossBackward0>)
train loss: tensor(6302.7480, grad_fn=<MseLossBackward0>)
train loss: tensor(6770.5229, grad_fn=<MseLossBackward0>)
train loss: tensor(6352.5195, grad_fn=<MseLossBackward0>)
train loss: tensor(8867.0850, grad_fn=<MseLossBackward0>)
train loss: t

train loss: tensor(4509.6465, grad_fn=<MseLossBackward0>)
train loss: tensor(9955.1406, grad_fn=<MseLossBackward0>)
train loss: tensor(9771.2490, grad_fn=<MseLossBackward0>)
train loss: tensor(8740.8496, grad_fn=<MseLossBackward0>)
train loss: tensor(5476.9458, grad_fn=<MseLossBackward0>)
train loss: tensor(8012.0669, grad_fn=<MseLossBackward0>)
train loss: tensor(2135.8301, grad_fn=<MseLossBackward0>)
train loss: tensor(2878.7371, grad_fn=<MseLossBackward0>)
train loss: tensor(7490.2598, grad_fn=<MseLossBackward0>)
train loss: tensor(6357.6226, grad_fn=<MseLossBackward0>)
train loss: tensor(7588.4287, grad_fn=<MseLossBackward0>)
train loss: tensor(7444.5972, grad_fn=<MseLossBackward0>)
train loss: tensor(9816.6514, grad_fn=<MseLossBackward0>)
train loss: tensor(6563.0942, grad_fn=<MseLossBackward0>)
train loss: tensor(4014.3699, grad_fn=<MseLossBackward0>)
train loss: tensor(6892.5278, grad_fn=<MseLossBackward0>)
train loss: tensor(13974.3203, grad_fn=<MseLossBackward0>)
train loss: t

train loss: tensor(10255.5088, grad_fn=<MseLossBackward0>)
train loss: tensor(2969.3743, grad_fn=<MseLossBackward0>)
train loss: tensor(4939.2144, grad_fn=<MseLossBackward0>)
train loss: tensor(9132.9424, grad_fn=<MseLossBackward0>)
train loss: tensor(2245.9336, grad_fn=<MseLossBackward0>)
train loss: tensor(6064.7373, grad_fn=<MseLossBackward0>)
train loss: tensor(14646.0049, grad_fn=<MseLossBackward0>)
train loss: tensor(5512.8042, grad_fn=<MseLossBackward0>)
train loss: tensor(17114.0723, grad_fn=<MseLossBackward0>)
train loss: tensor(2466.8203, grad_fn=<MseLossBackward0>)
train loss: tensor(5081.3062, grad_fn=<MseLossBackward0>)
train loss: tensor(10038.7129, grad_fn=<MseLossBackward0>)
train loss: tensor(7553.4873, grad_fn=<MseLossBackward0>)
train loss: tensor(9162.7490, grad_fn=<MseLossBackward0>)
train loss: tensor(10435.1982, grad_fn=<MseLossBackward0>)
train loss: tensor(9479.4805, grad_fn=<MseLossBackward0>)
train loss: tensor(14718.3643, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(11887.3457, grad_fn=<MseLossBackward0>)
train loss: tensor(9778.9268, grad_fn=<MseLossBackward0>)
train loss: tensor(3263.0217, grad_fn=<MseLossBackward0>)
train loss: tensor(10221.0820, grad_fn=<MseLossBackward0>)
train loss: tensor(10273.7646, grad_fn=<MseLossBackward0>)
train loss: tensor(5934.0649, grad_fn=<MseLossBackward0>)
train loss: tensor(3930.9351, grad_fn=<MseLossBackward0>)
train loss: tensor(5857.7852, grad_fn=<MseLossBackward0>)
train loss: tensor(71047.5391, grad_fn=<MseLossBackward0>)
train loss: tensor(5789.4561, grad_fn=<MseLossBackward0>)
train loss: tensor(8457.0205, grad_fn=<MseLossBackward0>)
train loss: tensor(2837.7227, grad_fn=<MseLossBackward0>)
train loss: tensor(13104.1553, grad_fn=<MseLossBackward0>)
train loss: tensor(7735.2549, grad_fn=<MseLossBackward0>)
train loss: tensor(3681.4324, grad_fn=<MseLossBackward0>)
train loss: tensor(10104.2422, grad_fn=<MseLossBackward0>)
train loss: tensor(5215.7168, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(3630.6848, grad_fn=<MseLossBackward0>)
train loss: tensor(29326.6445, grad_fn=<MseLossBackward0>)
train loss: tensor(3990.7031, grad_fn=<MseLossBackward0>)
train loss: tensor(4170.7578, grad_fn=<MseLossBackward0>)
train loss: tensor(6140.1729, grad_fn=<MseLossBackward0>)
train loss: tensor(10093.4648, grad_fn=<MseLossBackward0>)
train loss: tensor(8659.0742, grad_fn=<MseLossBackward0>)
train loss: tensor(2837.2100, grad_fn=<MseLossBackward0>)
train loss: tensor(3305.5608, grad_fn=<MseLossBackward0>)
train loss: tensor(4138.7998, grad_fn=<MseLossBackward0>)
train loss: tensor(6742.8296, grad_fn=<MseLossBackward0>)
train loss: tensor(9434.5127, grad_fn=<MseLossBackward0>)
train loss: tensor(4930.8970, grad_fn=<MseLossBackward0>)
train loss: tensor(7704.4575, grad_fn=<MseLossBackward0>)
train loss: tensor(7383.4370, grad_fn=<MseLossBackward0>)
train loss: tensor(5515.7178, grad_fn=<MseLossBackward0>)
train loss: tensor(75356.1641, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(59473.5508, grad_fn=<MseLossBackward0>)
train loss: tensor(3759.9739, grad_fn=<MseLossBackward0>)
train loss: tensor(3353.1367, grad_fn=<MseLossBackward0>)
train loss: tensor(9053.1533, grad_fn=<MseLossBackward0>)
train loss: tensor(6572.5479, grad_fn=<MseLossBackward0>)
train loss: tensor(9487.9482, grad_fn=<MseLossBackward0>)
train loss: tensor(9468.2783, grad_fn=<MseLossBackward0>)
train loss: tensor(5366.4941, grad_fn=<MseLossBackward0>)
train loss: tensor(6040.9229, grad_fn=<MseLossBackward0>)
train loss: tensor(4067.5261, grad_fn=<MseLossBackward0>)
train loss: tensor(8899.2900, grad_fn=<MseLossBackward0>)
train loss: tensor(5151.6836, grad_fn=<MseLossBackward0>)
train loss: tensor(2294.6111, grad_fn=<MseLossBackward0>)
train loss: tensor(8147.1353, grad_fn=<MseLossBackward0>)
train loss: tensor(2780.1162, grad_fn=<MseLossBackward0>)
train loss: tensor(2620.9771, grad_fn=<MseLossBackward0>)
train loss: tensor(6418.5112, grad_fn=<MseLossBackward0>)
train loss: t

train loss: tensor(2751.5330, grad_fn=<MseLossBackward0>)
train loss: tensor(3314.9556, grad_fn=<MseLossBackward0>)
train loss: tensor(7144.4888, grad_fn=<MseLossBackward0>)
train loss: tensor(6357.9692, grad_fn=<MseLossBackward0>)
train loss: tensor(8692.2998, grad_fn=<MseLossBackward0>)
train loss: tensor(5724.7183, grad_fn=<MseLossBackward0>)
train loss: tensor(3633.5129, grad_fn=<MseLossBackward0>)
train loss: tensor(4455.2935, grad_fn=<MseLossBackward0>)
train loss: tensor(8514.7764, grad_fn=<MseLossBackward0>)
train loss: tensor(10889.1719, grad_fn=<MseLossBackward0>)
train loss: tensor(12721.8857, grad_fn=<MseLossBackward0>)
train loss: tensor(4415.8745, grad_fn=<MseLossBackward0>)
train loss: tensor(5126.1133, grad_fn=<MseLossBackward0>)
train loss: tensor(6532.3154, grad_fn=<MseLossBackward0>)
train loss: tensor(2714.0642, grad_fn=<MseLossBackward0>)
train loss: tensor(6798.2314, grad_fn=<MseLossBackward0>)
train loss: tensor(3884.6248, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(8389.3164, grad_fn=<MseLossBackward0>)
train loss: tensor(5176.5166, grad_fn=<MseLossBackward0>)
train loss: tensor(11494.8857, grad_fn=<MseLossBackward0>)
train loss: tensor(9717.1201, grad_fn=<MseLossBackward0>)
train loss: tensor(4867.0327, grad_fn=<MseLossBackward0>)
train loss: tensor(4689.5142, grad_fn=<MseLossBackward0>)
train loss: tensor(5867.5098, grad_fn=<MseLossBackward0>)
train loss: tensor(5511.8354, grad_fn=<MseLossBackward0>)
train loss: tensor(62573.6680, grad_fn=<MseLossBackward0>)
train loss: tensor(9842.7910, grad_fn=<MseLossBackward0>)
train loss: tensor(3984.0767, grad_fn=<MseLossBackward0>)
train loss: tensor(3561.5605, grad_fn=<MseLossBackward0>)
train loss: tensor(3217.9856, grad_fn=<MseLossBackward0>)
train loss: tensor(6857.3672, grad_fn=<MseLossBackward0>)
train loss: tensor(12213.7324, grad_fn=<MseLossBackward0>)
train loss: tensor(5390.6582, grad_fn=<MseLossBackward0>)
train loss: tensor(4721.2954, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(5595.2334, grad_fn=<MseLossBackward0>)
train loss: tensor(9145.3076, grad_fn=<MseLossBackward0>)
train loss: tensor(6427.9512, grad_fn=<MseLossBackward0>)
train loss: tensor(5910.7793, grad_fn=<MseLossBackward0>)
train loss: tensor(5905.5405, grad_fn=<MseLossBackward0>)
train loss: tensor(17577.3555, grad_fn=<MseLossBackward0>)
train loss: tensor(3932.9055, grad_fn=<MseLossBackward0>)
train loss: tensor(3239.0540, grad_fn=<MseLossBackward0>)
train loss: tensor(13036.1846, grad_fn=<MseLossBackward0>)
train loss: tensor(4308.5718, grad_fn=<MseLossBackward0>)
train loss: tensor(8210.8809, grad_fn=<MseLossBackward0>)
train loss: tensor(6966.1011, grad_fn=<MseLossBackward0>)
train loss: tensor(5517.0132, grad_fn=<MseLossBackward0>)
train loss: tensor(3091.5208, grad_fn=<MseLossBackward0>)
train loss: tensor(3830.6995, grad_fn=<MseLossBackward0>)
train loss: tensor(3938.5759, grad_fn=<MseLossBackward0>)
train loss: tensor(1967.0767, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(2243.1541, grad_fn=<MseLossBackward0>)
train loss: tensor(11960.7363, grad_fn=<MseLossBackward0>)
train loss: tensor(5002.4331, grad_fn=<MseLossBackward0>)
train loss: tensor(5032.1338, grad_fn=<MseLossBackward0>)
train loss: tensor(10714.5537, grad_fn=<MseLossBackward0>)
train loss: tensor(10069.0078, grad_fn=<MseLossBackward0>)
train loss: tensor(8428.7852, grad_fn=<MseLossBackward0>)
train loss: tensor(8581.8125, grad_fn=<MseLossBackward0>)
train loss: tensor(3553.6252, grad_fn=<MseLossBackward0>)
train loss: tensor(9358.9375, grad_fn=<MseLossBackward0>)
train loss: tensor(10667.4961, grad_fn=<MseLossBackward0>)
train loss: tensor(7223.0308, grad_fn=<MseLossBackward0>)
train loss: tensor(13587.9844, grad_fn=<MseLossBackward0>)
train loss: tensor(3761.6128, grad_fn=<MseLossBackward0>)
train loss: tensor(7751.6006, grad_fn=<MseLossBackward0>)
train loss: tensor(13759.6885, grad_fn=<MseLossBackward0>)
train loss: tensor(8770.2129, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(7057.0596, grad_fn=<MseLossBackward0>)
train loss: tensor(3639.3926, grad_fn=<MseLossBackward0>)
train loss: tensor(3815.1458, grad_fn=<MseLossBackward0>)
train loss: tensor(5649.6660, grad_fn=<MseLossBackward0>)
train loss: tensor(4564.5269, grad_fn=<MseLossBackward0>)
train loss: tensor(6973.9766, grad_fn=<MseLossBackward0>)
train loss: tensor(11503.1484, grad_fn=<MseLossBackward0>)
train loss: tensor(11423.3662, grad_fn=<MseLossBackward0>)
train loss: tensor(7375.7388, grad_fn=<MseLossBackward0>)
train loss: tensor(3203.2725, grad_fn=<MseLossBackward0>)
train loss: tensor(3259.1221, grad_fn=<MseLossBackward0>)
train loss: tensor(5724.1006, grad_fn=<MseLossBackward0>)
train loss: tensor(4566.0571, grad_fn=<MseLossBackward0>)
train loss: tensor(3650.3694, grad_fn=<MseLossBackward0>)
train loss: tensor(7062.7324, grad_fn=<MseLossBackward0>)
train loss: tensor(6230.8872, grad_fn=<MseLossBackward0>)
train loss: tensor(13439.3936, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(4341.6250, grad_fn=<MseLossBackward0>)
train loss: tensor(2079.7461, grad_fn=<MseLossBackward0>)
train loss: tensor(6122.9751, grad_fn=<MseLossBackward0>)
train loss: tensor(7455.1299, grad_fn=<MseLossBackward0>)
train loss: tensor(1422.1240, grad_fn=<MseLossBackward0>)
train loss: tensor(3220.2017, grad_fn=<MseLossBackward0>)
train loss: tensor(2747.3152, grad_fn=<MseLossBackward0>)
train loss: tensor(2086.8459, grad_fn=<MseLossBackward0>)
train loss: tensor(4404.6841, grad_fn=<MseLossBackward0>)
train loss: tensor(3717.2268, grad_fn=<MseLossBackward0>)
train loss: tensor(11056.5586, grad_fn=<MseLossBackward0>)
train loss: tensor(3569.8066, grad_fn=<MseLossBackward0>)
train loss: tensor(9191.5273, grad_fn=<MseLossBackward0>)
train loss: tensor(3660.2412, grad_fn=<MseLossBackward0>)
train loss: tensor(11107.5967, grad_fn=<MseLossBackward0>)
train loss: tensor(3998.5706, grad_fn=<MseLossBackward0>)
train loss: tensor(2876.9800, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(6367.2607, grad_fn=<MseLossBackward0>)
train loss: tensor(1738.6180, grad_fn=<MseLossBackward0>)
train loss: tensor(85616.5781, grad_fn=<MseLossBackward0>)
train loss: tensor(12354.5488, grad_fn=<MseLossBackward0>)
train loss: tensor(3648.6211, grad_fn=<MseLossBackward0>)
train loss: tensor(3026.9189, grad_fn=<MseLossBackward0>)
train loss: tensor(5824.9810, grad_fn=<MseLossBackward0>)
train loss: tensor(6157.6533, grad_fn=<MseLossBackward0>)
train loss: tensor(3810.4045, grad_fn=<MseLossBackward0>)
train loss: tensor(13846.6562, grad_fn=<MseLossBackward0>)
train loss: tensor(7844.3950, grad_fn=<MseLossBackward0>)
train loss: tensor(10824.5049, grad_fn=<MseLossBackward0>)
train loss: tensor(3910.3665, grad_fn=<MseLossBackward0>)
train loss: tensor(7695.9131, grad_fn=<MseLossBackward0>)
train loss: tensor(6439.9395, grad_fn=<MseLossBackward0>)
train loss: tensor(18264.8203, grad_fn=<MseLossBackward0>)
train loss: tensor(10706.1270, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(3708.6016, grad_fn=<MseLossBackward0>)
train loss: tensor(3731.8518, grad_fn=<MseLossBackward0>)
train loss: tensor(12311.4238, grad_fn=<MseLossBackward0>)
train loss: tensor(17186.5566, grad_fn=<MseLossBackward0>)
train loss: tensor(4094.0146, grad_fn=<MseLossBackward0>)
train loss: tensor(13849.7344, grad_fn=<MseLossBackward0>)
train loss: tensor(5142.0005, grad_fn=<MseLossBackward0>)
train loss: tensor(2050.3994, grad_fn=<MseLossBackward0>)
train loss: tensor(15059.1729, grad_fn=<MseLossBackward0>)
train loss: tensor(2791.5364, grad_fn=<MseLossBackward0>)
train loss: tensor(6392.7808, grad_fn=<MseLossBackward0>)
train loss: tensor(7476.3989, grad_fn=<MseLossBackward0>)
train loss: tensor(2891.9177, grad_fn=<MseLossBackward0>)
train loss: tensor(50409.7266, grad_fn=<MseLossBackward0>)
train loss: tensor(7690.3418, grad_fn=<MseLossBackward0>)
train loss: tensor(7753.5728, grad_fn=<MseLossBackward0>)
train loss: tensor(2776.7561, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(12154.4238, grad_fn=<MseLossBackward0>)
train loss: tensor(3902.4541, grad_fn=<MseLossBackward0>)
train loss: tensor(11281.4189, grad_fn=<MseLossBackward0>)
train loss: tensor(3796.2280, grad_fn=<MseLossBackward0>)
train loss: tensor(4733.3013, grad_fn=<MseLossBackward0>)
train loss: tensor(9450.2578, grad_fn=<MseLossBackward0>)
train loss: tensor(5317.5444, grad_fn=<MseLossBackward0>)
train loss: tensor(3362.2205, grad_fn=<MseLossBackward0>)
train loss: tensor(1847.7626, grad_fn=<MseLossBackward0>)
train loss: tensor(4559.7871, grad_fn=<MseLossBackward0>)
train loss: tensor(5748.4062, grad_fn=<MseLossBackward0>)
train loss: tensor(6473.0962, grad_fn=<MseLossBackward0>)
train loss: tensor(4662.7158, grad_fn=<MseLossBackward0>)
train loss: tensor(2520.5505, grad_fn=<MseLossBackward0>)
train loss: tensor(4877.1318, grad_fn=<MseLossBackward0>)
train loss: tensor(11207.6045, grad_fn=<MseLossBackward0>)
train loss: tensor(2060.4263, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(8847.8428, grad_fn=<MseLossBackward0>)
train loss: tensor(10366.8477, grad_fn=<MseLossBackward0>)
train loss: tensor(10902.7842, grad_fn=<MseLossBackward0>)
train loss: tensor(4646.1089, grad_fn=<MseLossBackward0>)
train loss: tensor(10249.2109, grad_fn=<MseLossBackward0>)
train loss: tensor(14862.8418, grad_fn=<MseLossBackward0>)
train loss: tensor(2434.6509, grad_fn=<MseLossBackward0>)
train loss: tensor(9179.7139, grad_fn=<MseLossBackward0>)
train loss: tensor(3038.7153, grad_fn=<MseLossBackward0>)
train loss: tensor(3116.6943, grad_fn=<MseLossBackward0>)
train loss: tensor(74355.5234, grad_fn=<MseLossBackward0>)
train loss: tensor(5116.4673, grad_fn=<MseLossBackward0>)
train loss: tensor(4832.7632, grad_fn=<MseLossBackward0>)
train loss: tensor(8604.9229, grad_fn=<MseLossBackward0>)
train loss: tensor(6050.8115, grad_fn=<MseLossBackward0>)
train loss: tensor(10251.8271, grad_fn=<MseLossBackward0>)
train loss: tensor(7781.0908, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(6106.0098, grad_fn=<MseLossBackward0>)
train loss: tensor(2503.2617, grad_fn=<MseLossBackward0>)
train loss: tensor(8534.7090, grad_fn=<MseLossBackward0>)
train loss: tensor(1729.5115, grad_fn=<MseLossBackward0>)
train loss: tensor(5342.1855, grad_fn=<MseLossBackward0>)
train loss: tensor(8100.9805, grad_fn=<MseLossBackward0>)
train loss: tensor(1688.5132, grad_fn=<MseLossBackward0>)
train loss: tensor(3018.6877, grad_fn=<MseLossBackward0>)
train loss: tensor(3099.7898, grad_fn=<MseLossBackward0>)
train loss: tensor(7294.3662, grad_fn=<MseLossBackward0>)
train loss: tensor(3075.2256, grad_fn=<MseLossBackward0>)
train loss: tensor(6847.9878, grad_fn=<MseLossBackward0>)
train loss: tensor(7878.0361, grad_fn=<MseLossBackward0>)
train loss: tensor(6461.6284, grad_fn=<MseLossBackward0>)
train loss: tensor(2715.7480, grad_fn=<MseLossBackward0>)
train loss: tensor(2709.8213, grad_fn=<MseLossBackward0>)
train loss: tensor(10623.8096, grad_fn=<MseLossBackward0>)
train loss: t

train loss: tensor(9561.9463, grad_fn=<MseLossBackward0>)
train loss: tensor(13523.8564, grad_fn=<MseLossBackward0>)
train loss: tensor(2951.7583, grad_fn=<MseLossBackward0>)
train loss: tensor(14769.6973, grad_fn=<MseLossBackward0>)
train loss: tensor(5496.2461, grad_fn=<MseLossBackward0>)
train loss: tensor(21165.7715, grad_fn=<MseLossBackward0>)
train loss: tensor(3455.2522, grad_fn=<MseLossBackward0>)
train loss: tensor(11800.2363, grad_fn=<MseLossBackward0>)
train loss: tensor(4832.8828, grad_fn=<MseLossBackward0>)
train loss: tensor(12550.4160, grad_fn=<MseLossBackward0>)
train loss: tensor(7093.2979, grad_fn=<MseLossBackward0>)
train loss: tensor(6163.0083, grad_fn=<MseLossBackward0>)
train loss: tensor(11514.2695, grad_fn=<MseLossBackward0>)
train loss: tensor(10636.0088, grad_fn=<MseLossBackward0>)
train loss: tensor(2531.3523, grad_fn=<MseLossBackward0>)
train loss: tensor(5474.2412, grad_fn=<MseLossBackward0>)
train loss: tensor(5627.9873, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(5626.4868, grad_fn=<MseLossBackward0>)
train loss: tensor(3204.2952, grad_fn=<MseLossBackward0>)
train loss: tensor(10655.7686, grad_fn=<MseLossBackward0>)
train loss: tensor(4048.2952, grad_fn=<MseLossBackward0>)
train loss: tensor(37090.4609, grad_fn=<MseLossBackward0>)
train loss: tensor(4128.1226, grad_fn=<MseLossBackward0>)
train loss: tensor(4213.3647, grad_fn=<MseLossBackward0>)
train loss: tensor(7745.5869, grad_fn=<MseLossBackward0>)
train loss: tensor(4908.9702, grad_fn=<MseLossBackward0>)
train loss: tensor(4281.7070, grad_fn=<MseLossBackward0>)
train loss: tensor(13588.9951, grad_fn=<MseLossBackward0>)
train loss: tensor(4837.6592, grad_fn=<MseLossBackward0>)
train loss: tensor(6979.2314, grad_fn=<MseLossBackward0>)
train loss: tensor(11928.5088, grad_fn=<MseLossBackward0>)
train loss: tensor(7708.2583, grad_fn=<MseLossBackward0>)
train loss: tensor(10756.2852, grad_fn=<MseLossBackward0>)
train loss: tensor(7612.6846, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(11053.3242, grad_fn=<MseLossBackward0>)
train loss: tensor(5646.5229, grad_fn=<MseLossBackward0>)
train loss: tensor(6097.9849, grad_fn=<MseLossBackward0>)
train loss: tensor(3717.0693, grad_fn=<MseLossBackward0>)
train loss: tensor(14911.4072, grad_fn=<MseLossBackward0>)
train loss: tensor(2818.1387, grad_fn=<MseLossBackward0>)
train loss: tensor(7698.0498, grad_fn=<MseLossBackward0>)
train loss: tensor(3619.8757, grad_fn=<MseLossBackward0>)
train loss: tensor(3687.6851, grad_fn=<MseLossBackward0>)
train loss: tensor(5291.1372, grad_fn=<MseLossBackward0>)
train loss: tensor(4607.5347, grad_fn=<MseLossBackward0>)
train loss: tensor(3855.0671, grad_fn=<MseLossBackward0>)
train loss: tensor(3115.3154, grad_fn=<MseLossBackward0>)
train loss: tensor(11473.1973, grad_fn=<MseLossBackward0>)
train loss: tensor(6477.5938, grad_fn=<MseLossBackward0>)
train loss: tensor(5163.6362, grad_fn=<MseLossBackward0>)
train loss: tensor(4081.0112, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(5098.7715, grad_fn=<MseLossBackward0>)
train loss: tensor(13699.5166, grad_fn=<MseLossBackward0>)
train loss: tensor(2921.6162, grad_fn=<MseLossBackward0>)
train loss: tensor(7399.1489, grad_fn=<MseLossBackward0>)
train loss: tensor(3775.0566, grad_fn=<MseLossBackward0>)
train loss: tensor(4171.2822, grad_fn=<MseLossBackward0>)
train loss: tensor(3265.6323, grad_fn=<MseLossBackward0>)
train loss: tensor(11645.6738, grad_fn=<MseLossBackward0>)
train loss: tensor(9158.1240, grad_fn=<MseLossBackward0>)
train loss: tensor(6447.4512, grad_fn=<MseLossBackward0>)
train loss: tensor(4367.6553, grad_fn=<MseLossBackward0>)
train loss: tensor(53474.4922, grad_fn=<MseLossBackward0>)
train loss: tensor(5657.3379, grad_fn=<MseLossBackward0>)
train loss: tensor(3503.9702, grad_fn=<MseLossBackward0>)
train loss: tensor(6215.4805, grad_fn=<MseLossBackward0>)
train loss: tensor(5011.5288, grad_fn=<MseLossBackward0>)
train loss: tensor(12089.6836, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(5748.7041, grad_fn=<MseLossBackward0>)
train loss: tensor(6560.6094, grad_fn=<MseLossBackward0>)
train loss: tensor(6291.3525, grad_fn=<MseLossBackward0>)
train loss: tensor(7302.9951, grad_fn=<MseLossBackward0>)
train loss: tensor(2742.2822, grad_fn=<MseLossBackward0>)
train loss: tensor(1654.6051, grad_fn=<MseLossBackward0>)
train loss: tensor(68791.7031, grad_fn=<MseLossBackward0>)
train loss: tensor(8267.1758, grad_fn=<MseLossBackward0>)
train loss: tensor(2993.8120, grad_fn=<MseLossBackward0>)
train loss: tensor(5407.8369, grad_fn=<MseLossBackward0>)
train loss: tensor(5510.3447, grad_fn=<MseLossBackward0>)
train loss: tensor(5655.3682, grad_fn=<MseLossBackward0>)
train loss: tensor(70217.8906, grad_fn=<MseLossBackward0>)
train loss: tensor(3764.7732, grad_fn=<MseLossBackward0>)
train loss: tensor(4647.6792, grad_fn=<MseLossBackward0>)
train loss: tensor(5272.0122, grad_fn=<MseLossBackward0>)
train loss: tensor(8370.6602, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(8413.2314, grad_fn=<MseLossBackward0>)
train loss: tensor(12080.9404, grad_fn=<MseLossBackward0>)
train loss: tensor(3866.2961, grad_fn=<MseLossBackward0>)
train loss: tensor(7693.5503, grad_fn=<MseLossBackward0>)
train loss: tensor(7243.2588, grad_fn=<MseLossBackward0>)
train loss: tensor(7025.7871, grad_fn=<MseLossBackward0>)
train loss: tensor(6714.5234, grad_fn=<MseLossBackward0>)
train loss: tensor(8629.4678, grad_fn=<MseLossBackward0>)
train loss: tensor(3167.5330, grad_fn=<MseLossBackward0>)
train loss: tensor(3833.2346, grad_fn=<MseLossBackward0>)
train loss: tensor(5498.7412, grad_fn=<MseLossBackward0>)
train loss: tensor(3927.6934, grad_fn=<MseLossBackward0>)
train loss: tensor(5277.9585, grad_fn=<MseLossBackward0>)
train loss: tensor(70385.8125, grad_fn=<MseLossBackward0>)
train loss: tensor(2386.4629, grad_fn=<MseLossBackward0>)
train loss: tensor(4117.4976, grad_fn=<MseLossBackward0>)
train loss: tensor(4986.7402, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(7270.7778, grad_fn=<MseLossBackward0>)
train loss: tensor(6547.1069, grad_fn=<MseLossBackward0>)
train loss: tensor(6096.2686, grad_fn=<MseLossBackward0>)
train loss: tensor(9444.0332, grad_fn=<MseLossBackward0>)
train loss: tensor(76014.7969, grad_fn=<MseLossBackward0>)
train loss: tensor(10731.3203, grad_fn=<MseLossBackward0>)
train loss: tensor(8994.9023, grad_fn=<MseLossBackward0>)
train loss: tensor(7027.7920, grad_fn=<MseLossBackward0>)
train loss: tensor(10822.4883, grad_fn=<MseLossBackward0>)
train loss: tensor(5063.5068, grad_fn=<MseLossBackward0>)
train loss: tensor(3504.6946, grad_fn=<MseLossBackward0>)
train loss: tensor(5209.6274, grad_fn=<MseLossBackward0>)
train loss: tensor(3654.6394, grad_fn=<MseLossBackward0>)
train loss: tensor(9831.1953, grad_fn=<MseLossBackward0>)
train loss: tensor(4252.9536, grad_fn=<MseLossBackward0>)
train loss: tensor(20695.3047, grad_fn=<MseLossBackward0>)
train loss: tensor(3043.1484, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(5642.4453, grad_fn=<MseLossBackward0>)
train loss: tensor(5035.1836, grad_fn=<MseLossBackward0>)
train loss: tensor(4878.8018, grad_fn=<MseLossBackward0>)
train loss: tensor(6672.8281, grad_fn=<MseLossBackward0>)
train loss: tensor(13425.6904, grad_fn=<MseLossBackward0>)
train loss: tensor(6011.2695, grad_fn=<MseLossBackward0>)
train loss: tensor(15686.8779, grad_fn=<MseLossBackward0>)
train loss: tensor(3302.2686, grad_fn=<MseLossBackward0>)
train loss: tensor(5406.1768, grad_fn=<MseLossBackward0>)
train loss: tensor(3006.3728, grad_fn=<MseLossBackward0>)
train loss: tensor(5559.1309, grad_fn=<MseLossBackward0>)
train loss: tensor(13496.8779, grad_fn=<MseLossBackward0>)
train loss: tensor(6741.3618, grad_fn=<MseLossBackward0>)
train loss: tensor(4172.5840, grad_fn=<MseLossBackward0>)
train loss: tensor(4476.8145, grad_fn=<MseLossBackward0>)
train loss: tensor(3368.4917, grad_fn=<MseLossBackward0>)
train loss: tensor(12844.4375, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(5229.1885, grad_fn=<MseLossBackward0>)
train loss: tensor(18145.5488, grad_fn=<MseLossBackward0>)
train loss: tensor(3240.3855, grad_fn=<MseLossBackward0>)
train loss: tensor(4099.5347, grad_fn=<MseLossBackward0>)
train loss: tensor(6387.6387, grad_fn=<MseLossBackward0>)
train loss: tensor(4813.5889, grad_fn=<MseLossBackward0>)
train loss: tensor(11922.1592, grad_fn=<MseLossBackward0>)
train loss: tensor(5073.0029, grad_fn=<MseLossBackward0>)
train loss: tensor(5806.9839, grad_fn=<MseLossBackward0>)
train loss: tensor(6831.7290, grad_fn=<MseLossBackward0>)
train loss: tensor(7714.3438, grad_fn=<MseLossBackward0>)
train loss: tensor(3344.1006, grad_fn=<MseLossBackward0>)
train loss: tensor(3987.5071, grad_fn=<MseLossBackward0>)
train loss: tensor(2645.6956, grad_fn=<MseLossBackward0>)
train loss: tensor(7645.1665, grad_fn=<MseLossBackward0>)
train loss: tensor(3654.3386, grad_fn=<MseLossBackward0>)
train loss: tensor(4428.4067, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(9054.8057, grad_fn=<MseLossBackward0>)
train loss: tensor(7059.9648, grad_fn=<MseLossBackward0>)
train loss: tensor(14027.1709, grad_fn=<MseLossBackward0>)
train loss: tensor(7589.8892, grad_fn=<MseLossBackward0>)
train loss: tensor(6642.9639, grad_fn=<MseLossBackward0>)
train loss: tensor(12543.6182, grad_fn=<MseLossBackward0>)
train loss: tensor(9155.6338, grad_fn=<MseLossBackward0>)
train loss: tensor(4038.2041, grad_fn=<MseLossBackward0>)
train loss: tensor(4866.3589, grad_fn=<MseLossBackward0>)
train loss: tensor(11336.3496, grad_fn=<MseLossBackward0>)
train loss: tensor(4750.8730, grad_fn=<MseLossBackward0>)
train loss: tensor(5161.5669, grad_fn=<MseLossBackward0>)
train loss: tensor(8859.7314, grad_fn=<MseLossBackward0>)
train loss: tensor(6613.1074, grad_fn=<MseLossBackward0>)
train loss: tensor(5058.0698, grad_fn=<MseLossBackward0>)
train loss: tensor(6868.4731, grad_fn=<MseLossBackward0>)
train loss: tensor(1664.1859, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(10451.9795, grad_fn=<MseLossBackward0>)
train loss: tensor(8552.5576, grad_fn=<MseLossBackward0>)
train loss: tensor(12398.5352, grad_fn=<MseLossBackward0>)
train loss: tensor(3460.5212, grad_fn=<MseLossBackward0>)
train loss: tensor(7434.6851, grad_fn=<MseLossBackward0>)
train loss: tensor(10493.5664, grad_fn=<MseLossBackward0>)
train loss: tensor(7038.3994, grad_fn=<MseLossBackward0>)
train loss: tensor(10882.9043, grad_fn=<MseLossBackward0>)
train loss: tensor(10269.5811, grad_fn=<MseLossBackward0>)
train loss: tensor(5504.3066, grad_fn=<MseLossBackward0>)
train loss: tensor(9712.4648, grad_fn=<MseLossBackward0>)
train loss: tensor(6283.7773, grad_fn=<MseLossBackward0>)
train loss: tensor(7293.8242, grad_fn=<MseLossBackward0>)
train loss: tensor(5400.8276, grad_fn=<MseLossBackward0>)
train loss: tensor(12851.0469, grad_fn=<MseLossBackward0>)
train loss: tensor(4934.7012, grad_fn=<MseLossBackward0>)
train loss: tensor(6621.0386, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(4813.1929, grad_fn=<MseLossBackward0>)
train loss: tensor(83620.6328, grad_fn=<MseLossBackward0>)
train loss: tensor(3894.2761, grad_fn=<MseLossBackward0>)
train loss: tensor(6645.2588, grad_fn=<MseLossBackward0>)
train loss: tensor(9475.5791, grad_fn=<MseLossBackward0>)
train loss: tensor(3908.4814, grad_fn=<MseLossBackward0>)
train loss: tensor(2971.3792, grad_fn=<MseLossBackward0>)
train loss: tensor(5815.1572, grad_fn=<MseLossBackward0>)
train loss: tensor(15259.2676, grad_fn=<MseLossBackward0>)
train loss: tensor(4546.4902, grad_fn=<MseLossBackward0>)
train loss: tensor(3498.6628, grad_fn=<MseLossBackward0>)
train loss: tensor(6610.5508, grad_fn=<MseLossBackward0>)
train loss: tensor(6406.7051, grad_fn=<MseLossBackward0>)
train loss: tensor(7081.5596, grad_fn=<MseLossBackward0>)
train loss: tensor(36576.6094, grad_fn=<MseLossBackward0>)
train loss: tensor(5089.7134, grad_fn=<MseLossBackward0>)
train loss: tensor(3896.5427, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(6399.8882, grad_fn=<MseLossBackward0>)
train loss: tensor(3991.1758, grad_fn=<MseLossBackward0>)
train loss: tensor(3761.0681, grad_fn=<MseLossBackward0>)
train loss: tensor(5537.9229, grad_fn=<MseLossBackward0>)
train loss: tensor(22915.7715, grad_fn=<MseLossBackward0>)
train loss: tensor(4696.2212, grad_fn=<MseLossBackward0>)
train loss: tensor(7455.2412, grad_fn=<MseLossBackward0>)
train loss: tensor(3680.2048, grad_fn=<MseLossBackward0>)
train loss: tensor(3744.6641, grad_fn=<MseLossBackward0>)
train loss: tensor(5434.7251, grad_fn=<MseLossBackward0>)
train loss: tensor(9005.4971, grad_fn=<MseLossBackward0>)
train loss: tensor(7819.8804, grad_fn=<MseLossBackward0>)
train loss: tensor(8057.4111, grad_fn=<MseLossBackward0>)
train loss: tensor(7516.8828, grad_fn=<MseLossBackward0>)
train loss: tensor(3800.3806, grad_fn=<MseLossBackward0>)
train loss: tensor(3541.8694, grad_fn=<MseLossBackward0>)
train loss: tensor(6981.8130, grad_fn=<MseLossBackward0>)
train loss: t

train loss: tensor(9455.4678, grad_fn=<MseLossBackward0>)
train loss: tensor(6038.9414, grad_fn=<MseLossBackward0>)
train loss: tensor(10953.8896, grad_fn=<MseLossBackward0>)
train loss: tensor(5402.6934, grad_fn=<MseLossBackward0>)
train loss: tensor(16023.8516, grad_fn=<MseLossBackward0>)
train loss: tensor(6044.5562, grad_fn=<MseLossBackward0>)
train loss: tensor(2954.2678, grad_fn=<MseLossBackward0>)
train loss: tensor(2764.7290, grad_fn=<MseLossBackward0>)
train loss: tensor(6761.3511, grad_fn=<MseLossBackward0>)
train loss: tensor(25092.3867, grad_fn=<MseLossBackward0>)
train loss: tensor(7779.8364, grad_fn=<MseLossBackward0>)
train loss: tensor(3366.4180, grad_fn=<MseLossBackward0>)
train loss: tensor(9083.2900, grad_fn=<MseLossBackward0>)
train loss: tensor(4250.8115, grad_fn=<MseLossBackward0>)
train loss: tensor(2685.3423, grad_fn=<MseLossBackward0>)
train loss: tensor(10394.8691, grad_fn=<MseLossBackward0>)
train loss: tensor(12758.2490, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(13921.1221, grad_fn=<MseLossBackward0>)
train loss: tensor(5195.4868, grad_fn=<MseLossBackward0>)
train loss: tensor(8130.2559, grad_fn=<MseLossBackward0>)
train loss: tensor(4889.3647, grad_fn=<MseLossBackward0>)
train loss: tensor(7784.9238, grad_fn=<MseLossBackward0>)
train loss: tensor(4798.7759, grad_fn=<MseLossBackward0>)
train loss: tensor(7143.3325, grad_fn=<MseLossBackward0>)
train loss: tensor(2968.5713, grad_fn=<MseLossBackward0>)
train loss: tensor(24623.4941, grad_fn=<MseLossBackward0>)
train loss: tensor(7213.6772, grad_fn=<MseLossBackward0>)
train loss: tensor(9132.0127, grad_fn=<MseLossBackward0>)
train loss: tensor(2461.7920, grad_fn=<MseLossBackward0>)
train loss: tensor(3138.8210, grad_fn=<MseLossBackward0>)
train loss: tensor(2230.2766, grad_fn=<MseLossBackward0>)
train loss: tensor(8519.0039, grad_fn=<MseLossBackward0>)
train loss: tensor(7285.5527, grad_fn=<MseLossBackward0>)
train loss: tensor(11494.8525, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(2802.4319, grad_fn=<MseLossBackward0>)
train loss: tensor(4999.1636, grad_fn=<MseLossBackward0>)
train loss: tensor(5811.4678, grad_fn=<MseLossBackward0>)
train loss: tensor(5882.2988, grad_fn=<MseLossBackward0>)
train loss: tensor(6217.1997, grad_fn=<MseLossBackward0>)
train loss: tensor(5725.9370, grad_fn=<MseLossBackward0>)
train loss: tensor(3703.1470, grad_fn=<MseLossBackward0>)
train loss: tensor(3982.2153, grad_fn=<MseLossBackward0>)
train loss: tensor(1159.9012, grad_fn=<MseLossBackward0>)
train loss: tensor(3970.9048, grad_fn=<MseLossBackward0>)
train loss: tensor(9286.3418, grad_fn=<MseLossBackward0>)
train loss: tensor(10820.3730, grad_fn=<MseLossBackward0>)
train loss: tensor(10966.9648, grad_fn=<MseLossBackward0>)
train loss: tensor(7260.7734, grad_fn=<MseLossBackward0>)
train loss: tensor(12329.9697, grad_fn=<MseLossBackward0>)
train loss: tensor(13805.0391, grad_fn=<MseLossBackward0>)
train loss: tensor(3234.9109, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(1365.0027, grad_fn=<MseLossBackward0>)
train loss: tensor(10701.7734, grad_fn=<MseLossBackward0>)
train loss: tensor(8783.4365, grad_fn=<MseLossBackward0>)
train loss: tensor(4342.7393, grad_fn=<MseLossBackward0>)
train loss: tensor(5769.1973, grad_fn=<MseLossBackward0>)
train loss: tensor(4799.2954, grad_fn=<MseLossBackward0>)
train loss: tensor(4137.2212, grad_fn=<MseLossBackward0>)
train loss: tensor(6319.6235, grad_fn=<MseLossBackward0>)
train loss: tensor(3966.4089, grad_fn=<MseLossBackward0>)
train loss: tensor(8372.2188, grad_fn=<MseLossBackward0>)
train loss: tensor(3525.7805, grad_fn=<MseLossBackward0>)
train loss: tensor(2246.9580, grad_fn=<MseLossBackward0>)
train loss: tensor(9092.0957, grad_fn=<MseLossBackward0>)
train loss: tensor(6758.5176, grad_fn=<MseLossBackward0>)
train loss: tensor(9241.0596, grad_fn=<MseLossBackward0>)
train loss: tensor(27317.1914, grad_fn=<MseLossBackward0>)
train loss: tensor(9735.6045, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(4742.6274, grad_fn=<MseLossBackward0>)
train loss: tensor(2320.0542, grad_fn=<MseLossBackward0>)
train loss: tensor(8757.5371, grad_fn=<MseLossBackward0>)
train loss: tensor(12022.5791, grad_fn=<MseLossBackward0>)
train loss: tensor(8556.8848, grad_fn=<MseLossBackward0>)
train loss: tensor(9430.8857, grad_fn=<MseLossBackward0>)
train loss: tensor(6079.6289, grad_fn=<MseLossBackward0>)
train loss: tensor(9884.9971, grad_fn=<MseLossBackward0>)
train loss: tensor(4247.6387, grad_fn=<MseLossBackward0>)
train loss: tensor(4458.4448, grad_fn=<MseLossBackward0>)
train loss: tensor(4938.3662, grad_fn=<MseLossBackward0>)
train loss: tensor(5474.5522, grad_fn=<MseLossBackward0>)
train loss: tensor(5108.2104, grad_fn=<MseLossBackward0>)
train loss: tensor(2304.6658, grad_fn=<MseLossBackward0>)
train loss: tensor(4801.3291, grad_fn=<MseLossBackward0>)
train loss: tensor(9313.1582, grad_fn=<MseLossBackward0>)
train loss: tensor(13009.1045, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(9263.1270, grad_fn=<MseLossBackward0>)
train loss: tensor(7017.7056, grad_fn=<MseLossBackward0>)
train loss: tensor(4165.2007, grad_fn=<MseLossBackward0>)
train loss: tensor(9597.4922, grad_fn=<MseLossBackward0>)
train loss: tensor(3587.1577, grad_fn=<MseLossBackward0>)
train loss: tensor(6967.8545, grad_fn=<MseLossBackward0>)
train loss: tensor(6109.9229, grad_fn=<MseLossBackward0>)
train loss: tensor(6772.4097, grad_fn=<MseLossBackward0>)
train loss: tensor(3823.1379, grad_fn=<MseLossBackward0>)
train loss: tensor(3279.8484, grad_fn=<MseLossBackward0>)
train loss: tensor(2783.2834, grad_fn=<MseLossBackward0>)
train loss: tensor(9979.0049, grad_fn=<MseLossBackward0>)
train loss: tensor(6874.8569, grad_fn=<MseLossBackward0>)
train loss: tensor(2219.1243, grad_fn=<MseLossBackward0>)
train loss: tensor(6718.0103, grad_fn=<MseLossBackward0>)
train loss: tensor(2922.1260, grad_fn=<MseLossBackward0>)
train loss: tensor(9095.7568, grad_fn=<MseLossBackward0>)
train loss: te

train loss: tensor(8989.9805, grad_fn=<MseLossBackward0>)
train loss: tensor(3588.7646, grad_fn=<MseLossBackward0>)
train loss: tensor(1933.7946, grad_fn=<MseLossBackward0>)
train loss: tensor(2849.4819, grad_fn=<MseLossBackward0>)
train loss: tensor(10824.1729, grad_fn=<MseLossBackward0>)
train loss: tensor(5497.3657, grad_fn=<MseLossBackward0>)
train loss: tensor(1832.4622, grad_fn=<MseLossBackward0>)
train loss: tensor(6707.0308, grad_fn=<MseLossBackward0>)
train loss: tensor(2795.8621, grad_fn=<MseLossBackward0>)
train loss: tensor(6603.3682, grad_fn=<MseLossBackward0>)
train loss: tensor(2473.2722, grad_fn=<MseLossBackward0>)
train loss: tensor(9321.2715, grad_fn=<MseLossBackward0>)
train loss: tensor(4440.0513, grad_fn=<MseLossBackward0>)
train loss: tensor(8505.0693, grad_fn=<MseLossBackward0>)
train loss: tensor(10827.9336, grad_fn=<MseLossBackward0>)
train loss: tensor(4694.2534, grad_fn=<MseLossBackward0>)
train loss: tensor(5522.1025, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(7501.3438, grad_fn=<MseLossBackward0>)
train loss: tensor(38414.5859, grad_fn=<MseLossBackward0>)
train loss: tensor(3680.3921, grad_fn=<MseLossBackward0>)
train loss: tensor(7695.2852, grad_fn=<MseLossBackward0>)
train loss: tensor(2800.6162, grad_fn=<MseLossBackward0>)
train loss: tensor(11291.1387, grad_fn=<MseLossBackward0>)
train loss: tensor(2736.0056, grad_fn=<MseLossBackward0>)
train loss: tensor(8498.2305, grad_fn=<MseLossBackward0>)
train loss: tensor(7804.1284, grad_fn=<MseLossBackward0>)
train loss: tensor(4902.7842, grad_fn=<MseLossBackward0>)
train loss: tensor(8699.5449, grad_fn=<MseLossBackward0>)
train loss: tensor(5583.7559, grad_fn=<MseLossBackward0>)
train loss: tensor(1982.9353, grad_fn=<MseLossBackward0>)
train loss: tensor(11971.7422, grad_fn=<MseLossBackward0>)
train loss: tensor(8956.2656, grad_fn=<MseLossBackward0>)
train loss: tensor(3293.4290, grad_fn=<MseLossBackward0>)
train loss: tensor(12774.1182, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(8804.2695, grad_fn=<MseLossBackward0>)
train loss: tensor(13267.3525, grad_fn=<MseLossBackward0>)
train loss: tensor(3441.1409, grad_fn=<MseLossBackward0>)
train loss: tensor(6474.3745, grad_fn=<MseLossBackward0>)
train loss: tensor(7390.2705, grad_fn=<MseLossBackward0>)
train loss: tensor(6396.3970, grad_fn=<MseLossBackward0>)
train loss: tensor(8546.4590, grad_fn=<MseLossBackward0>)
train loss: tensor(11442.9912, grad_fn=<MseLossBackward0>)
train loss: tensor(9997.4316, grad_fn=<MseLossBackward0>)
train loss: tensor(14194.1162, grad_fn=<MseLossBackward0>)
train loss: tensor(12752.2227, grad_fn=<MseLossBackward0>)
train loss: tensor(6341.1582, grad_fn=<MseLossBackward0>)
train loss: tensor(5480.8975, grad_fn=<MseLossBackward0>)
train loss: tensor(12729.5205, grad_fn=<MseLossBackward0>)
train loss: tensor(4826.3125, grad_fn=<MseLossBackward0>)
train loss: tensor(9428.1104, grad_fn=<MseLossBackward0>)
train loss: tensor(5503.6641, grad_fn=<MseLossBackward0>)
train los

train loss: tensor(4192.7671, grad_fn=<MseLossBackward0>)
train loss: tensor(5936.4541, grad_fn=<MseLossBackward0>)
train loss: tensor(6864.8071, grad_fn=<MseLossBackward0>)
train loss: tensor(13327.7910, grad_fn=<MseLossBackward0>)
train loss: tensor(6505.0215, grad_fn=<MseLossBackward0>)
train loss: tensor(70362.5781, grad_fn=<MseLossBackward0>)
train loss: tensor(3210.2725, grad_fn=<MseLossBackward0>)
train loss: tensor(10109.3936, grad_fn=<MseLossBackward0>)
train loss: tensor(6802.7979, grad_fn=<MseLossBackward0>)
train loss: tensor(13200.0830, grad_fn=<MseLossBackward0>)
train loss: tensor(7357.2988, grad_fn=<MseLossBackward0>)
train loss: tensor(7773.6787, grad_fn=<MseLossBackward0>)
train loss: tensor(10851.7744, grad_fn=<MseLossBackward0>)
train loss: tensor(13089.7275, grad_fn=<MseLossBackward0>)
train loss: tensor(5808.1372, grad_fn=<MseLossBackward0>)
train loss: tensor(14560.4287, grad_fn=<MseLossBackward0>)
train loss: tensor(9554.5664, grad_fn=<MseLossBackward0>)
train l

train loss: tensor(4388.0605, grad_fn=<MseLossBackward0>)
train loss: tensor(4137.3511, grad_fn=<MseLossBackward0>)
train loss: tensor(10398.3369, grad_fn=<MseLossBackward0>)
train loss: tensor(11260.5283, grad_fn=<MseLossBackward0>)
train loss: tensor(6964.8818, grad_fn=<MseLossBackward0>)
train loss: tensor(5343.0186, grad_fn=<MseLossBackward0>)
train loss: tensor(3923.8259, grad_fn=<MseLossBackward0>)
train loss: tensor(3427.0464, grad_fn=<MseLossBackward0>)
train loss: tensor(2798.5137, grad_fn=<MseLossBackward0>)
train loss: tensor(10992.0127, grad_fn=<MseLossBackward0>)
train loss: tensor(2052.7512, grad_fn=<MseLossBackward0>)
train loss: tensor(3338.9634, grad_fn=<MseLossBackward0>)
train loss: tensor(11694.4648, grad_fn=<MseLossBackward0>)
train loss: tensor(13218.4453, grad_fn=<MseLossBackward0>)
train loss: tensor(7909.3628, grad_fn=<MseLossBackward0>)
train loss: tensor(8110.8262, grad_fn=<MseLossBackward0>)
train loss: tensor(11012.3057, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(2717.8330, grad_fn=<MseLossBackward0>)
train loss: tensor(8417.9092, grad_fn=<MseLossBackward0>)
train loss: tensor(6710.2920, grad_fn=<MseLossBackward0>)
train loss: tensor(4180.0762, grad_fn=<MseLossBackward0>)
train loss: tensor(13928.0078, grad_fn=<MseLossBackward0>)
train loss: tensor(12908.1719, grad_fn=<MseLossBackward0>)
train loss: tensor(4142.0161, grad_fn=<MseLossBackward0>)
train loss: tensor(7569.0527, grad_fn=<MseLossBackward0>)
train loss: tensor(13828.6045, grad_fn=<MseLossBackward0>)
train loss: tensor(6822.0176, grad_fn=<MseLossBackward0>)
train loss: tensor(14255.8945, grad_fn=<MseLossBackward0>)
train loss: tensor(5094.1890, grad_fn=<MseLossBackward0>)
train loss: tensor(4422.3638, grad_fn=<MseLossBackward0>)
train loss: tensor(9891.8916, grad_fn=<MseLossBackward0>)
train loss: tensor(6180.6611, grad_fn=<MseLossBackward0>)
train loss: tensor(4545.1479, grad_fn=<MseLossBackward0>)
train loss: tensor(6363.9014, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(6177.6304, grad_fn=<MseLossBackward0>)
train loss: tensor(5061.9595, grad_fn=<MseLossBackward0>)
train loss: tensor(12344.8262, grad_fn=<MseLossBackward0>)
train loss: tensor(10133.5107, grad_fn=<MseLossBackward0>)
train loss: tensor(4546.3428, grad_fn=<MseLossBackward0>)
train loss: tensor(8102.1182, grad_fn=<MseLossBackward0>)
train loss: tensor(3887.6135, grad_fn=<MseLossBackward0>)
train loss: tensor(9114.6602, grad_fn=<MseLossBackward0>)
train loss: tensor(11967.8564, grad_fn=<MseLossBackward0>)
train loss: tensor(14443.7090, grad_fn=<MseLossBackward0>)
train loss: tensor(5199.5249, grad_fn=<MseLossBackward0>)
train loss: tensor(7644.7202, grad_fn=<MseLossBackward0>)
train loss: tensor(8244.7461, grad_fn=<MseLossBackward0>)
train loss: tensor(15841.8955, grad_fn=<MseLossBackward0>)
train loss: tensor(9228.3555, grad_fn=<MseLossBackward0>)
train loss: tensor(10326.7217, grad_fn=<MseLossBackward0>)
train loss: tensor(3187.5942, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(10465.9160, grad_fn=<MseLossBackward0>)
train loss: tensor(2210.6362, grad_fn=<MseLossBackward0>)
train loss: tensor(4084.9619, grad_fn=<MseLossBackward0>)
train loss: tensor(3416.9529, grad_fn=<MseLossBackward0>)
train loss: tensor(3920.1353, grad_fn=<MseLossBackward0>)
train loss: tensor(10578.2227, grad_fn=<MseLossBackward0>)
train loss: tensor(1902.9985, grad_fn=<MseLossBackward0>)
train loss: tensor(6904.3843, grad_fn=<MseLossBackward0>)
train loss: tensor(5675.0161, grad_fn=<MseLossBackward0>)
train loss: tensor(3698.3154, grad_fn=<MseLossBackward0>)
train loss: tensor(8879.1230, grad_fn=<MseLossBackward0>)
train loss: tensor(23553.7168, grad_fn=<MseLossBackward0>)
train loss: tensor(3363.0774, grad_fn=<MseLossBackward0>)
train loss: tensor(31832.7305, grad_fn=<MseLossBackward0>)
train loss: tensor(8532.5469, grad_fn=<MseLossBackward0>)
train loss: tensor(6871.2769, grad_fn=<MseLossBackward0>)
train loss: tensor(4135.8105, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(7741.2334, grad_fn=<MseLossBackward0>)
train loss: tensor(37658.2188, grad_fn=<MseLossBackward0>)
train loss: tensor(7536.9414, grad_fn=<MseLossBackward0>)
train loss: tensor(4421.6279, grad_fn=<MseLossBackward0>)
train loss: tensor(9915.5713, grad_fn=<MseLossBackward0>)
train loss: tensor(11163.5156, grad_fn=<MseLossBackward0>)
train loss: tensor(2434.2927, grad_fn=<MseLossBackward0>)
train loss: tensor(5047.0381, grad_fn=<MseLossBackward0>)
train loss: tensor(32177.8496, grad_fn=<MseLossBackward0>)
train loss: tensor(8542.9600, grad_fn=<MseLossBackward0>)
train loss: tensor(9900.9971, grad_fn=<MseLossBackward0>)
train loss: tensor(6070.8345, grad_fn=<MseLossBackward0>)
train loss: tensor(12043.5557, grad_fn=<MseLossBackward0>)
train loss: tensor(5990.1841, grad_fn=<MseLossBackward0>)
train loss: tensor(8136.3555, grad_fn=<MseLossBackward0>)
train loss: tensor(13221.2822, grad_fn=<MseLossBackward0>)
train loss: tensor(54390.3711, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(11181.0322, grad_fn=<MseLossBackward0>)
train loss: tensor(10225.1406, grad_fn=<MseLossBackward0>)
train loss: tensor(6031.7056, grad_fn=<MseLossBackward0>)
train loss: tensor(4932.0469, grad_fn=<MseLossBackward0>)
train loss: tensor(6134.8862, grad_fn=<MseLossBackward0>)
train loss: tensor(2695.0637, grad_fn=<MseLossBackward0>)
train loss: tensor(7832.5752, grad_fn=<MseLossBackward0>)
train loss: tensor(5715.1978, grad_fn=<MseLossBackward0>)
train loss: tensor(11946.8643, grad_fn=<MseLossBackward0>)
train loss: tensor(61703.1211, grad_fn=<MseLossBackward0>)
train loss: tensor(1768.1544, grad_fn=<MseLossBackward0>)
train loss: tensor(5787.0396, grad_fn=<MseLossBackward0>)
train loss: tensor(5500.3042, grad_fn=<MseLossBackward0>)
train loss: tensor(6445.7129, grad_fn=<MseLossBackward0>)
train loss: tensor(7077.6851, grad_fn=<MseLossBackward0>)
train loss: tensor(3651.9736, grad_fn=<MseLossBackward0>)
train loss: tensor(5023.3730, grad_fn=<MseLossBackward0>)
train loss

train loss: tensor(6784.7305, grad_fn=<MseLossBackward0>)
train loss: tensor(8937.0557, grad_fn=<MseLossBackward0>)
train loss: tensor(2571.0557, grad_fn=<MseLossBackward0>)
train loss: tensor(3442.2629, grad_fn=<MseLossBackward0>)
train loss: tensor(8353.6816, grad_fn=<MseLossBackward0>)
train loss: tensor(5809.5884, grad_fn=<MseLossBackward0>)
train loss: tensor(11019.4209, grad_fn=<MseLossBackward0>)
train loss: tensor(2647.4622, grad_fn=<MseLossBackward0>)
train loss: tensor(9133.3281, grad_fn=<MseLossBackward0>)
train loss: tensor(5985.2964, grad_fn=<MseLossBackward0>)
train loss: tensor(2977.6619, grad_fn=<MseLossBackward0>)
train loss: tensor(8188.0049, grad_fn=<MseLossBackward0>)
train loss: tensor(5354.5229, grad_fn=<MseLossBackward0>)
train loss: tensor(6040.2212, grad_fn=<MseLossBackward0>)
train loss: tensor(7026.8032, grad_fn=<MseLossBackward0>)
train loss: tensor(9705.7227, grad_fn=<MseLossBackward0>)
train loss: tensor(6406.6138, grad_fn=<MseLossBackward0>)
train loss: t

train loss: tensor(9247.2441, grad_fn=<MseLossBackward0>)
train loss: tensor(4509.9077, grad_fn=<MseLossBackward0>)
train loss: tensor(3937.9121, grad_fn=<MseLossBackward0>)
train loss: tensor(7849.2290, grad_fn=<MseLossBackward0>)
train loss: tensor(4543.2148, grad_fn=<MseLossBackward0>)
train loss: tensor(7553.7798, grad_fn=<MseLossBackward0>)
train loss: tensor(3480.1965, grad_fn=<MseLossBackward0>)
train loss: tensor(2726.7554, grad_fn=<MseLossBackward0>)
train loss: tensor(5447.2524, grad_fn=<MseLossBackward0>)
train loss: tensor(2537.9131, grad_fn=<MseLossBackward0>)
train loss: tensor(12936.4316, grad_fn=<MseLossBackward0>)
train loss: tensor(13099.0176, grad_fn=<MseLossBackward0>)
train loss: tensor(6056.4077, grad_fn=<MseLossBackward0>)
train loss: tensor(2054.5710, grad_fn=<MseLossBackward0>)
train loss: tensor(9023.8896, grad_fn=<MseLossBackward0>)
train loss: tensor(10920.8877, grad_fn=<MseLossBackward0>)
train loss: tensor(4442.4502, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(3881.9751, grad_fn=<MseLossBackward0>)
train loss: tensor(4116.8613, grad_fn=<MseLossBackward0>)
train loss: tensor(2924.3235, grad_fn=<MseLossBackward0>)
train loss: tensor(2535.7402, grad_fn=<MseLossBackward0>)
train loss: tensor(6597.2725, grad_fn=<MseLossBackward0>)
train loss: tensor(9232.8428, grad_fn=<MseLossBackward0>)
train loss: tensor(7216.5503, grad_fn=<MseLossBackward0>)
train loss: tensor(11348.3105, grad_fn=<MseLossBackward0>)
train loss: tensor(6118.7485, grad_fn=<MseLossBackward0>)
train loss: tensor(5233.1484, grad_fn=<MseLossBackward0>)
train loss: tensor(6258.0737, grad_fn=<MseLossBackward0>)
train loss: tensor(1726.1072, grad_fn=<MseLossBackward0>)
train loss: tensor(13254.1924, grad_fn=<MseLossBackward0>)
train loss: tensor(5870.0435, grad_fn=<MseLossBackward0>)
train loss: tensor(3671.6753, grad_fn=<MseLossBackward0>)
train loss: tensor(4902.1992, grad_fn=<MseLossBackward0>)
train loss: tensor(3054.5459, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(9243.6611, grad_fn=<MseLossBackward0>)
train loss: tensor(15053.0537, grad_fn=<MseLossBackward0>)
train loss: tensor(4709.4028, grad_fn=<MseLossBackward0>)
train loss: tensor(4202.9492, grad_fn=<MseLossBackward0>)
train loss: tensor(7396.5151, grad_fn=<MseLossBackward0>)
train loss: tensor(10072.4375, grad_fn=<MseLossBackward0>)
train loss: tensor(15963.2744, grad_fn=<MseLossBackward0>)
train loss: tensor(4093.5776, grad_fn=<MseLossBackward0>)
train loss: tensor(11733.0791, grad_fn=<MseLossBackward0>)
train loss: tensor(4720.1353, grad_fn=<MseLossBackward0>)
train loss: tensor(6692.4111, grad_fn=<MseLossBackward0>)
train loss: tensor(6588.1592, grad_fn=<MseLossBackward0>)
train loss: tensor(12841.7002, grad_fn=<MseLossBackward0>)
train loss: tensor(5837.8232, grad_fn=<MseLossBackward0>)
train loss: tensor(8598.9258, grad_fn=<MseLossBackward0>)
train loss: tensor(5482.6904, grad_fn=<MseLossBackward0>)
train loss: tensor(44101.9961, grad_fn=<MseLossBackward0>)
train lo

train loss: tensor(6526.3989, grad_fn=<MseLossBackward0>)
train loss: tensor(6905.8110, grad_fn=<MseLossBackward0>)
train loss: tensor(8448.0898, grad_fn=<MseLossBackward0>)
train loss: tensor(4945.7686, grad_fn=<MseLossBackward0>)
train loss: tensor(4807.6475, grad_fn=<MseLossBackward0>)
train loss: tensor(4041.4084, grad_fn=<MseLossBackward0>)
train loss: tensor(10102.7207, grad_fn=<MseLossBackward0>)
train loss: tensor(3722.8628, grad_fn=<MseLossBackward0>)
train loss: tensor(1681.1749, grad_fn=<MseLossBackward0>)
train loss: tensor(3875.5032, grad_fn=<MseLossBackward0>)
train loss: tensor(4399.6484, grad_fn=<MseLossBackward0>)
train loss: tensor(5763.2539, grad_fn=<MseLossBackward0>)
train loss: tensor(5860.3242, grad_fn=<MseLossBackward0>)
train loss: tensor(1968.8082, grad_fn=<MseLossBackward0>)
train loss: tensor(6612.0015, grad_fn=<MseLossBackward0>)
train loss: tensor(5801.0396, grad_fn=<MseLossBackward0>)
train loss: tensor(6076.2979, grad_fn=<MseLossBackward0>)
train loss: t

train loss: tensor(4542.8960, grad_fn=<MseLossBackward0>)
train loss: tensor(3475.7334, grad_fn=<MseLossBackward0>)
train loss: tensor(1966.6002, grad_fn=<MseLossBackward0>)
train loss: tensor(3416.0469, grad_fn=<MseLossBackward0>)
train loss: tensor(4807.9932, grad_fn=<MseLossBackward0>)
train loss: tensor(7974.1338, grad_fn=<MseLossBackward0>)
train loss: tensor(1950.3588, grad_fn=<MseLossBackward0>)
train loss: tensor(9794.1436, grad_fn=<MseLossBackward0>)
train loss: tensor(8068.3882, grad_fn=<MseLossBackward0>)
train loss: tensor(7389.7812, grad_fn=<MseLossBackward0>)
train loss: tensor(13434.0518, grad_fn=<MseLossBackward0>)
train loss: tensor(4143.1440, grad_fn=<MseLossBackward0>)
train loss: tensor(7707.3569, grad_fn=<MseLossBackward0>)
train loss: tensor(9151.6455, grad_fn=<MseLossBackward0>)
train loss: tensor(5390.0122, grad_fn=<MseLossBackward0>)
train loss: tensor(10528.3262, grad_fn=<MseLossBackward0>)
train loss: tensor(3119.9634, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(3287.8669, grad_fn=<MseLossBackward0>)
train loss: tensor(6187.1631, grad_fn=<MseLossBackward0>)
train loss: tensor(2549.9294, grad_fn=<MseLossBackward0>)
train loss: tensor(2563.4541, grad_fn=<MseLossBackward0>)
train loss: tensor(5329.4292, grad_fn=<MseLossBackward0>)
train loss: tensor(8559.5117, grad_fn=<MseLossBackward0>)
train loss: tensor(3497.9651, grad_fn=<MseLossBackward0>)
train loss: tensor(5836.3726, grad_fn=<MseLossBackward0>)
train loss: tensor(8585.2588, grad_fn=<MseLossBackward0>)
train loss: tensor(2226.5508, grad_fn=<MseLossBackward0>)
train loss: tensor(3021.3301, grad_fn=<MseLossBackward0>)
train loss: tensor(11174.6523, grad_fn=<MseLossBackward0>)
train loss: tensor(8805.6172, grad_fn=<MseLossBackward0>)
train loss: tensor(5416.4897, grad_fn=<MseLossBackward0>)
train loss: tensor(6917.0737, grad_fn=<MseLossBackward0>)
train loss: tensor(75094.5469, grad_fn=<MseLossBackward0>)
train loss: tensor(12105.8145, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(4823.5679, grad_fn=<MseLossBackward0>)
train loss: tensor(5824.4053, grad_fn=<MseLossBackward0>)
train loss: tensor(7554.0659, grad_fn=<MseLossBackward0>)
train loss: tensor(5451.9902, grad_fn=<MseLossBackward0>)
train loss: tensor(5056.4023, grad_fn=<MseLossBackward0>)
train loss: tensor(4384.7256, grad_fn=<MseLossBackward0>)
train loss: tensor(8166.8169, grad_fn=<MseLossBackward0>)
train loss: tensor(6528.3262, grad_fn=<MseLossBackward0>)
train loss: tensor(6556.7324, grad_fn=<MseLossBackward0>)
train loss: tensor(3975.4714, grad_fn=<MseLossBackward0>)
train loss: tensor(2901.9236, grad_fn=<MseLossBackward0>)
train loss: tensor(11323.0215, grad_fn=<MseLossBackward0>)
train loss: tensor(59262.3203, grad_fn=<MseLossBackward0>)
train loss: tensor(4449.5684, grad_fn=<MseLossBackward0>)
train loss: tensor(8753.5361, grad_fn=<MseLossBackward0>)
train loss: tensor(9570.7422, grad_fn=<MseLossBackward0>)
train loss: tensor(1479.0588, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(5689.1812, grad_fn=<MseLossBackward0>)
train loss: tensor(5068.2061, grad_fn=<MseLossBackward0>)
train loss: tensor(3626.5842, grad_fn=<MseLossBackward0>)
train loss: tensor(4647.4629, grad_fn=<MseLossBackward0>)
train loss: tensor(6750.9351, grad_fn=<MseLossBackward0>)
train loss: tensor(4552.3701, grad_fn=<MseLossBackward0>)
train loss: tensor(9880.5635, grad_fn=<MseLossBackward0>)
train loss: tensor(4950.3618, grad_fn=<MseLossBackward0>)
train loss: tensor(5536.1318, grad_fn=<MseLossBackward0>)
train loss: tensor(12282.2090, grad_fn=<MseLossBackward0>)
train loss: tensor(7240.8047, grad_fn=<MseLossBackward0>)
train loss: tensor(5238.6113, grad_fn=<MseLossBackward0>)
train loss: tensor(4805.5518, grad_fn=<MseLossBackward0>)
train loss: tensor(4839.6172, grad_fn=<MseLossBackward0>)
train loss: tensor(5856.4116, grad_fn=<MseLossBackward0>)
train loss: tensor(59747.9609, grad_fn=<MseLossBackward0>)
train loss: tensor(3848.0754, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(4792.0542, grad_fn=<MseLossBackward0>)
train loss: tensor(6157.7207, grad_fn=<MseLossBackward0>)
train loss: tensor(4275.8906, grad_fn=<MseLossBackward0>)
train loss: tensor(6218.6982, grad_fn=<MseLossBackward0>)
train loss: tensor(3012.6621, grad_fn=<MseLossBackward0>)
train loss: tensor(4216.1411, grad_fn=<MseLossBackward0>)
train loss: tensor(5709.6094, grad_fn=<MseLossBackward0>)
train loss: tensor(1363.2976, grad_fn=<MseLossBackward0>)
train loss: tensor(10401.4092, grad_fn=<MseLossBackward0>)
train loss: tensor(4802.7334, grad_fn=<MseLossBackward0>)
train loss: tensor(9393.0586, grad_fn=<MseLossBackward0>)
train loss: tensor(4020.0349, grad_fn=<MseLossBackward0>)
train loss: tensor(3917.6453, grad_fn=<MseLossBackward0>)
train loss: tensor(6675.4575, grad_fn=<MseLossBackward0>)
train loss: tensor(15172.1729, grad_fn=<MseLossBackward0>)
train loss: tensor(2755.9329, grad_fn=<MseLossBackward0>)
train loss: tensor(3422.3843, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(8065.5864, grad_fn=<MseLossBackward0>)
train loss: tensor(9550.8682, grad_fn=<MseLossBackward0>)
train loss: tensor(6542.6538, grad_fn=<MseLossBackward0>)
train loss: tensor(38517.7891, grad_fn=<MseLossBackward0>)
train loss: tensor(5510.4731, grad_fn=<MseLossBackward0>)
train loss: tensor(10910.8701, grad_fn=<MseLossBackward0>)
train loss: tensor(6930.9302, grad_fn=<MseLossBackward0>)
train loss: tensor(8926.5410, grad_fn=<MseLossBackward0>)
train loss: tensor(3079.1672, grad_fn=<MseLossBackward0>)
train loss: tensor(2736.8594, grad_fn=<MseLossBackward0>)
train loss: tensor(3541.0337, grad_fn=<MseLossBackward0>)
train loss: tensor(7407.6895, grad_fn=<MseLossBackward0>)
train loss: tensor(9513.8418, grad_fn=<MseLossBackward0>)
train loss: tensor(8466.5088, grad_fn=<MseLossBackward0>)
train loss: tensor(5272.9985, grad_fn=<MseLossBackward0>)
train loss: tensor(12403.5928, grad_fn=<MseLossBackward0>)
train loss: tensor(4546.8926, grad_fn=<MseLossBackward0>)
train loss:

train loss: tensor(3232.4460, grad_fn=<MseLossBackward0>)
train loss: tensor(13341.4502, grad_fn=<MseLossBackward0>)
train loss: tensor(12541.5977, grad_fn=<MseLossBackward0>)
train loss: tensor(6753.8066, grad_fn=<MseLossBackward0>)
train loss: tensor(5704.5586, grad_fn=<MseLossBackward0>)
train loss: tensor(2553.5305, grad_fn=<MseLossBackward0>)
train loss: tensor(5253.9087, grad_fn=<MseLossBackward0>)
train loss: tensor(4834.4175, grad_fn=<MseLossBackward0>)
train loss: tensor(6891.3779, grad_fn=<MseLossBackward0>)
train loss: tensor(4809.1411, grad_fn=<MseLossBackward0>)
train loss: tensor(5713.0132, grad_fn=<MseLossBackward0>)
train loss: tensor(2684.3633, grad_fn=<MseLossBackward0>)
train loss: tensor(4144.4731, grad_fn=<MseLossBackward0>)
train loss: tensor(8263.9883, grad_fn=<MseLossBackward0>)
train loss: tensor(9282.8730, grad_fn=<MseLossBackward0>)
train loss: tensor(2877.8047, grad_fn=<MseLossBackward0>)
train loss: tensor(5570.9531, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(8782.0527, grad_fn=<MseLossBackward0>)
train loss: tensor(9515.5029, grad_fn=<MseLossBackward0>)
train loss: tensor(5968.9019, grad_fn=<MseLossBackward0>)
train loss: tensor(6471.1138, grad_fn=<MseLossBackward0>)
train loss: tensor(4079.5745, grad_fn=<MseLossBackward0>)
train loss: tensor(3813.7871, grad_fn=<MseLossBackward0>)
train loss: tensor(4413.0903, grad_fn=<MseLossBackward0>)
train loss: tensor(5747.3252, grad_fn=<MseLossBackward0>)
train loss: tensor(6962.7231, grad_fn=<MseLossBackward0>)
train loss: tensor(6341.3701, grad_fn=<MseLossBackward0>)
train loss: tensor(7284.6870, grad_fn=<MseLossBackward0>)
train loss: tensor(8027.4873, grad_fn=<MseLossBackward0>)
train loss: tensor(6730.9829, grad_fn=<MseLossBackward0>)
train loss: tensor(45594.9766, grad_fn=<MseLossBackward0>)
train loss: tensor(13518.3682, grad_fn=<MseLossBackward0>)
train loss: tensor(2653.4778, grad_fn=<MseLossBackward0>)
train loss: tensor(6802.2295, grad_fn=<MseLossBackward0>)
train loss: 

train loss: tensor(4526.3418, grad_fn=<MseLossBackward0>)
train loss: tensor(8633.1807, grad_fn=<MseLossBackward0>)
train loss: tensor(5611.3096, grad_fn=<MseLossBackward0>)
train loss: tensor(4709.4512, grad_fn=<MseLossBackward0>)
train loss: tensor(5498.8545, grad_fn=<MseLossBackward0>)
train loss: tensor(6281.4111, grad_fn=<MseLossBackward0>)
train loss: tensor(87302.0391, grad_fn=<MseLossBackward0>)
train loss: tensor(6482.1343, grad_fn=<MseLossBackward0>)
train loss: tensor(3481.8145, grad_fn=<MseLossBackward0>)
train loss: tensor(6733.2168, grad_fn=<MseLossBackward0>)
train loss: tensor(5511.4038, grad_fn=<MseLossBackward0>)
train loss: tensor(14392.2422, grad_fn=<MseLossBackward0>)
train loss: tensor(4440.6611, grad_fn=<MseLossBackward0>)
train loss: tensor(7058.2969, grad_fn=<MseLossBackward0>)
train loss: tensor(1688.1133, grad_fn=<MseLossBackward0>)
train loss: tensor(4733.5923, grad_fn=<MseLossBackward0>)
train loss: tensor(9057.3730, grad_fn=<MseLossBackward0>)
train loss: 